From 1401fcc4e3da97c44dcc7cbf538c07e24768d791 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 29 Sep 2020 21:24:17 -0700 Subject: [PATCH 01/23] nvme-loop: don't put ctrl on nvme_init_ctrl error The function nvme_init_ctrl() gets the ctrl reference & when it fails it does put the ctrl reference in the error unwind code. When creating loop ctrl in nvme_loop_create_ctrl() if nvme_init_ctrl() returns non zero (i.e. error) value it jumps to the "out_put_ctrl" label which calls nvme_put_ctrl(), that will lead to douple ctrl put in error unwind path. Update nvme_loop_create_ctrl() such that this patch removes the "out_put_ctrl" label, add a new "out" label after nvme_put_ctrl() in error unwind path and jump to newly added label when nvme_init_ctrl() call retuns an error. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/loop.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 0d6008cf66a2a..f6d81239be215 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -579,7 +579,7 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_loop_ctrl_ops, 0 /* no quirks, we're perfect! */); if (ret) - goto out_put_ctrl; + goto out; if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) WARN_ON_ONCE(1); @@ -635,8 +635,8 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, kfree(ctrl->queues); out_uninit_ctrl: nvme_uninit_ctrl(&ctrl->ctrl); -out_put_ctrl: nvme_put_ctrl(&ctrl->ctrl); +out: if (ret > 0) ret = -EIO; return ERR_PTR(ret); From 6fcd669514794da08ce6bfa272b6ec9b33cb543d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Aug 2020 09:56:58 +0200 Subject: [PATCH 02/23] block: optimize blk_queue_zoned_model for !CONFIG_BLK_DEV_ZONED Always return BLK_ZONED_NONE if zoned device support is not enabled. This allows various compiler optimizations including the dead code elimination that we so like for avoiding ifdefs. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal --- include/linux/blkdev.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8e77f12de5221..1b81b2766858b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -692,7 +692,9 @@ static inline bool queue_is_mq(struct request_queue *q) static inline enum blk_zoned_model blk_queue_zoned_model(struct request_queue *q) { - return q->limits.zoned; + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) + return q->limits.zoned; + return BLK_ZONED_NONE; } static inline bool blk_queue_is_zoned(struct request_queue *q) From 7fad20dd7c0ab1d2c224755a574576be25f13e03 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Aug 2020 09:31:36 +0200 Subject: [PATCH 03/23] nvme: fix initialization of the zone bitmaps The removal of the ->revalidate_disk method broke the initialization of the zone bitmaps, as nvme_revalidate_disk now never gets called during initialization. Move the zone related code from nvme_revalidate_disk into a new helper in zns.c, and call it from nvme_alloc_ns in addition to nvme_validate_ns to ensure the zone bitmaps are initialized during probe. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Damien Le Moal --- drivers/nvme/host/core.c | 28 +++++----------------------- drivers/nvme/host/nvme.h | 1 + drivers/nvme/host/zns.c | 11 +++++++++++ 3 files changed, 17 insertions(+), 23 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0a985c601c62c..400d995f95fe2 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2202,28 +2202,6 @@ static int _nvme_revalidate_disk(struct gendisk *disk) return ret; } -static int nvme_revalidate_disk(struct gendisk *disk) -{ - int ret; - - ret = _nvme_revalidate_disk(disk); - if (ret) - return ret; - -#ifdef CONFIG_BLK_DEV_ZONED - if (blk_queue_is_zoned(disk->queue)) { - struct nvme_ns *ns = disk->private_data; - struct nvme_ctrl *ctrl = ns->ctrl; - - ret = blk_revalidate_disk_zones(disk, NULL); - if (!ret) - blk_queue_max_zone_append_sectors(disk->queue, - ctrl->max_zone_append); - } -#endif - return ret; -} - static char nvme_pr_type(enum pr_type type) { switch (type) { @@ -3958,6 +3936,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) if (__nvme_revalidate_disk(disk, id)) goto out_put_disk; + if (blk_queue_is_zoned(ns->queue) && nvme_revalidate_zones(ns)) + goto out_put_disk; if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { ret = nvme_nvm_register(ns, disk_name, node); @@ -4052,7 +4032,9 @@ static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid) return; } - ret = nvme_revalidate_disk(ns->disk); + ret = _nvme_revalidate_disk(ns->disk); + if (!ret && blk_queue_is_zoned(ns->queue)) + ret = nvme_revalidate_zones(ns); revalidate_disk_size(ns->disk, ret == 0); if (ret) nvme_ns_remove(ns); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 1096ef1f6aa2b..6cbbd1597ae6d 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -758,6 +758,7 @@ static inline void nvme_mpath_start_freeze(struct nvme_subsystem *subsys) } #endif /* CONFIG_NVME_MULTIPATH */ +int nvme_revalidate_zones(struct nvme_ns *ns); #ifdef CONFIG_BLK_DEV_ZONED int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns, unsigned lbaf); diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index 53efecb678983..56d708017d062 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -7,6 +7,17 @@ #include #include "nvme.h" +int nvme_revalidate_zones(struct nvme_ns *ns) +{ + struct request_queue *q = ns->queue; + int ret; + + ret = blk_revalidate_disk_zones(ns->disk, NULL); + if (!ret) + blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append); + return ret; +} + static int nvme_set_max_append(struct nvme_ctrl *ctrl) { struct nvme_command c = { }; From d525c3c023221619748d1e758e5a26daa775f822 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Aug 2020 14:02:18 +0200 Subject: [PATCH 04/23] nvme: remove the disk argument to nvme_update_zone_info The queue can trivially be derived from the nvme_ns structure. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal --- drivers/nvme/host/core.c | 2 +- drivers/nvme/host/nvme.h | 8 ++------ drivers/nvme/host/zns.c | 5 ++--- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 400d995f95fe2..d4b5032084972 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2095,7 +2095,7 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) case NVME_CSI_NVM: break; case NVME_CSI_ZNS: - ret = nvme_update_zone_info(disk, ns, lbaf); + ret = nvme_update_zone_info(ns, lbaf); if (ret) { dev_warn(ctrl->device, "failed to add zoned namespace:%u ret:%d\n", diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 6cbbd1597ae6d..5667761001267 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -760,9 +760,7 @@ static inline void nvme_mpath_start_freeze(struct nvme_subsystem *subsys) int nvme_revalidate_zones(struct nvme_ns *ns); #ifdef CONFIG_BLK_DEV_ZONED -int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns, - unsigned lbaf); - +int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf); int nvme_report_zones(struct gendisk *disk, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); @@ -779,9 +777,7 @@ static inline blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, return BLK_STS_NOTSUPP; } -static inline int nvme_update_zone_info(struct gendisk *disk, - struct nvme_ns *ns, - unsigned lbaf) +static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) { dev_warn(ns->ctrl->device, "Please enable CONFIG_BLK_DEV_ZONED to support ZNS devices\n"); diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index 56d708017d062..67e87e9f306f1 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -46,11 +46,10 @@ static int nvme_set_max_append(struct nvme_ctrl *ctrl) return 0; } -int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns, - unsigned lbaf) +int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) { struct nvme_effects_log *log = ns->head->effects; - struct request_queue *q = disk->queue; + struct request_queue *q = ns->queue; struct nvme_command c = { }; struct nvme_id_ns_zns *id; int status; From eba9bcf7fef0c4a880245b9a261186a879742355 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Sep 2020 09:42:17 +0200 Subject: [PATCH 05/23] nvme: rename nvme_validate_ns to nvme_validate_or_alloc_ns Use a slightly more descriptive name to enable reusing nvme_validate_ns in the next patch for a lower level function. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal --- drivers/nvme/host/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d4b5032084972..c5f2615bf5837 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4021,7 +4021,7 @@ static void nvme_ns_remove_by_nsid(struct nvme_ctrl *ctrl, u32 nsid) } } -static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid) +static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns *ns; int ret; @@ -4083,7 +4083,7 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl) if (!nsid) /* end of the list? */ goto out; - nvme_validate_ns(ctrl, nsid); + nvme_validate_or_alloc_ns(ctrl, nsid); while (++prev < nsid) nvme_ns_remove_by_nsid(ctrl, prev); } @@ -4106,7 +4106,7 @@ static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl) kfree(id); for (i = 1; i <= nn; i++) - nvme_validate_ns(ctrl, i); + nvme_validate_or_alloc_ns(ctrl, i); nvme_remove_invalid_namespaces(ctrl, nn); } From 2124f096fb4521d8efdf2412e9102d475ff5cd36 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Sep 2020 10:25:54 +0200 Subject: [PATCH 06/23] nvme: rename _nvme_revalidate_disk Rename _nvme_revalidate_disk to nvme_validate_ns to better describe what the function does, and pass the struct nvme_ns instead of the gendisk to better match the call chain. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal --- drivers/nvme/host/core.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c5f2615bf5837..c04043a94e649 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -89,7 +89,7 @@ static dev_t nvme_chr_devt; static struct class *nvme_class; static struct class *nvme_subsys_class; -static int _nvme_revalidate_disk(struct gendisk *disk); +static int nvme_validate_ns(struct nvme_ns *ns); static void nvme_put_subsystem(struct nvme_subsystem *subsys); static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, unsigned nsid); @@ -1026,7 +1026,7 @@ static void nvme_update_formats(struct nvme_ctrl *ctrl, u32 *effects) down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) - if (_nvme_revalidate_disk(ns->disk)) + if (nvme_validate_ns(ns)) nvme_set_queue_dying(ns); else if (blk_queue_is_zoned(ns->disk->queue)) { /* @@ -2154,16 +2154,15 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) return 0; } -static int _nvme_revalidate_disk(struct gendisk *disk) +static int nvme_validate_ns(struct nvme_ns *ns) { - struct nvme_ns *ns = disk->private_data; struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_id_ns *id; struct nvme_ns_ids ids; int ret = 0; if (test_bit(NVME_NS_DEAD, &ns->flags)) { - set_capacity(disk, 0); + set_capacity(ns->disk, 0); return -ENODEV; } @@ -2187,7 +2186,7 @@ static int _nvme_revalidate_disk(struct gendisk *disk) goto free_id; } - ret = __nvme_revalidate_disk(disk, id); + ret = __nvme_revalidate_disk(ns->disk, id); free_id: kfree(id); out: @@ -4032,7 +4031,7 @@ static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) return; } - ret = _nvme_revalidate_disk(ns->disk); + ret = nvme_validate_ns(ns); if (!ret && blk_queue_is_zoned(ns->queue)) ret = nvme_revalidate_zones(ns); revalidate_disk_size(ns->disk, ret == 0); From 81382f1730d24a60e2b0499592e64e6e640b1871 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Sep 2020 12:14:20 +0200 Subject: [PATCH 07/23] nvme: rename __nvme_revalidate_disk Rename __nvme_revalidate_disk to nvme_update_ns_info and pass a namespace instead of the gendisk. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal --- drivers/nvme/host/core.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c04043a94e649..fede487f6e043 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2076,10 +2076,9 @@ static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id) blk_queue_chunk_sectors(ns->queue, iob); } -static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) +static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) { unsigned lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK; - struct nvme_ns *ns = disk->private_data; struct nvme_ctrl *ctrl = ns->ctrl; int ret; @@ -2141,7 +2140,7 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) } nvme_set_chunk_sectors(ns, id); - nvme_update_disk_info(disk, ns, id); + nvme_update_disk_info(ns->disk, ns, id); #ifdef CONFIG_NVME_MULTIPATH if (ns->head->disk) { nvme_update_disk_info(ns->head->disk, ns, id); @@ -2186,7 +2185,7 @@ static int nvme_validate_ns(struct nvme_ns *ns) goto free_id; } - ret = __nvme_revalidate_disk(ns->disk, id); + ret = nvme_update_ns_info(ns, id); free_id: kfree(id); out: @@ -3933,7 +3932,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) memcpy(disk->disk_name, disk_name, DISK_NAME_LEN); ns->disk = disk; - if (__nvme_revalidate_disk(disk, id)) + if (nvme_update_ns_info(ns, id)) goto out_put_disk; if (blk_queue_is_zoned(ns->queue) && nvme_revalidate_zones(ns)) goto out_put_disk; From b8b8cd013327327e757529a0725a8c754c7890e3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Sep 2020 12:33:19 +0200 Subject: [PATCH 08/23] nvme: lift the check for an unallocated namespace into nvme_identify_ns Move the check from the two callers into the common helper. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg --- drivers/nvme/host/core.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index fede487f6e043..7b1423c7e7fc5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1381,9 +1381,16 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, error = nvme_submit_sync_cmd(ctrl->admin_q, &c, *id, sizeof(**id)); if (error) { dev_warn(ctrl->device, "Identify namespace failed (%d)\n", error); - kfree(*id); + goto out_free_id; } + error = -ENODEV; + if ((*id)->ncap == 0) /* namespace not allocated or attached */ + goto out_free_id; + return 0; + +out_free_id: + kfree(*id); return error; } @@ -2169,11 +2176,6 @@ static int nvme_validate_ns(struct nvme_ns *ns) if (ret) goto out; - if (id->ncap == 0) { - ret = -ENODEV; - goto free_id; - } - ret = nvme_report_ns_ids(ctrl, ns->head->ns_id, id, &ids); if (ret) goto free_id; @@ -3913,9 +3915,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) if (ret) goto out_free_queue; - if (id->ncap == 0) /* no namespace (legacy quirk) */ - goto out_free_id; - ret = nvme_init_ns_head(ns, nsid, id); if (ret) goto out_free_id; From fab72f5a046883ae5851c95b7c6e7a08f984e391 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Sep 2020 12:34:04 +0200 Subject: [PATCH 09/23] nvme: call nvme_identify_ns as the first thing in nvme_alloc_ns_block Check if the namespace actually exists as the very first thing and don't bother with any extra work if not. This should speed up and simplify the sequential scanning for NVMe 1.0 devices. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal --- drivers/nvme/host/core.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 7b1423c7e7fc5..4a5c4d45755b5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3887,9 +3887,12 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) char disk_name[DISK_NAME_LEN]; int node = ctrl->numa_node, flags = GENHD_FL_EXT_DEVT, ret; + if (nvme_identify_ns(ctrl, nsid, &id)) + return; + ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node); if (!ns) - return; + goto out_free_id; ns->queue = blk_mq_init_queue(ctrl->tagset); if (IS_ERR(ns->queue)) @@ -3911,13 +3914,9 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); nvme_set_queue_limits(ctrl, ns->queue); - ret = nvme_identify_ns(ctrl, nsid, &id); - if (ret) - goto out_free_queue; - ret = nvme_init_ns_head(ns, nsid, id); if (ret) - goto out_free_id; + goto out_free_queue; nvme_set_disk_name(disk_name, ns, ctrl, &flags); disk = alloc_disk_node(0, node); @@ -3968,12 +3967,12 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) list_del_init(&ns->head->entry); mutex_unlock(&ctrl->subsys->lock); nvme_put_ns_head(ns->head); - out_free_id: - kfree(id); out_free_queue: blk_cleanup_queue(ns->queue); out_free_ns: kfree(ns); + out_free_id: + kfree(id); } static void nvme_ns_remove(struct nvme_ns *ns) From d4609ea8b3d3fb3423f35805843a82774cb4ef2f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Sep 2020 07:19:13 +0200 Subject: [PATCH 10/23] nvme: factor out a nvme_configure_metadata helper Factor out a helper from nvme_update_ns_info that configures the per-namespaces metadata and PI settings. Also make sure the helpers clear the flags explicitly instead of all of ->features to allow for potentially reusing ->features for future non-metadata flags. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal --- drivers/nvme/host/core.c | 78 ++++++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 31 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 4a5c4d45755b5..443aba9c9fdd1 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1966,6 +1966,50 @@ static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns, return 0; } +static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) +{ + struct nvme_ctrl *ctrl = ns->ctrl; + + /* + * The PI implementation requires the metadata size to be equal to the + * t10 pi tuple size. + */ + ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms); + if (ns->ms == sizeof(struct t10_pi_tuple)) + ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK; + else + ns->pi_type = 0; + + ns->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); + if (!ns->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) + return 0; + if (ctrl->ops->flags & NVME_F_FABRICS) { + /* + * The NVMe over Fabrics specification only supports metadata as + * part of the extended data LBA. We rely on HCA/HBA support to + * remap the separate metadata buffer from the block layer. + */ + if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT))) + return -EINVAL; + if (ctrl->max_integrity_segments) + ns->features |= + (NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); + } else { + /* + * For PCIe controllers, we can't easily remap the separate + * metadata buffer from the block layer and thus require a + * separate metadata buffer for block layer metadata/PI support. + * We allow extended LBAs for the passthrough interface, though. + */ + if (id->flbas & NVME_NS_FLBAS_META_EXT) + ns->features |= NVME_NS_EXT_LBAS; + else + ns->features |= NVME_NS_METADATA_SUPPORTED; + } + + return 0; +} + static void nvme_update_disk_info(struct gendisk *disk, struct nvme_ns *ns, struct nvme_id_ns *id) { @@ -2115,37 +2159,9 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) return -ENODEV; } - ns->features = 0; - ns->ms = le16_to_cpu(id->lbaf[lbaf].ms); - /* the PI implementation requires metadata equal t10 pi tuple size */ - if (ns->ms == sizeof(struct t10_pi_tuple)) - ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK; - else - ns->pi_type = 0; - - if (ns->ms) { - /* - * For PCIe only the separate metadata pointer is supported, - * as the block layer supplies metadata in a separate bio_vec - * chain. For Fabrics, only metadata as part of extended data - * LBA is supported on the wire per the Fabrics specification, - * but the HBA/HCA will do the remapping from the separate - * metadata buffers for us. - */ - if (id->flbas & NVME_NS_FLBAS_META_EXT) { - ns->features |= NVME_NS_EXT_LBAS; - if ((ctrl->ops->flags & NVME_F_FABRICS) && - (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) && - ctrl->max_integrity_segments) - ns->features |= NVME_NS_METADATA_SUPPORTED; - } else { - if (WARN_ON_ONCE(ctrl->ops->flags & NVME_F_FABRICS)) - return -EINVAL; - if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) - ns->features |= NVME_NS_METADATA_SUPPORTED; - } - } - + ret = nvme_configure_metadata(ns, id); + if (ret) + return ret; nvme_set_chunk_sectors(ns, id); nvme_update_disk_info(ns->disk, ns, id); #ifdef CONFIG_NVME_MULTIPATH From f9d5f4579feafa721dba2f350fc064a1852c6f8c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Sep 2020 12:11:42 +0200 Subject: [PATCH 11/23] nvme: freeze the queue over ->lba_shift updates Ensure that there can't be any I/O in flight went we change the disk geometry in nvme_update_ns_info, most notable the LBA size by lifting the queue free from nvme_update_disk_info into the caller Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Damien Le Moal --- drivers/nvme/host/core.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 443aba9c9fdd1..82cd03c0ba21b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2021,7 +2021,7 @@ static void nvme_update_disk_info(struct gendisk *disk, /* unsupported block size, set capacity to 0 later */ bs = (1 << 9); } - blk_mq_freeze_queue(disk->queue); + blk_integrity_unregister(disk); atomic_bs = phys_bs = bs; @@ -2086,8 +2086,6 @@ static void nvme_update_disk_info(struct gendisk *disk, set_disk_ro(disk, true); else set_disk_ro(disk, false); - - blk_mq_unfreeze_queue(disk->queue); } static inline bool nvme_first_scan(struct gendisk *disk) @@ -2133,6 +2131,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) struct nvme_ctrl *ctrl = ns->ctrl; int ret; + blk_mq_freeze_queue(ns->disk->queue); /* * If identify namespace failed, use default 512 byte block size so * block layer can use before failing read/write for 0 capacity. @@ -2150,30 +2149,39 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) dev_warn(ctrl->device, "failed to add zoned namespace:%u ret:%d\n", ns->head->ns_id, ret); - return ret; + goto out_unfreeze; } break; default: dev_warn(ctrl->device, "unknown csi:%u ns:%u\n", ns->head->ids.csi, ns->head->ns_id); - return -ENODEV; + ret = -ENODEV; + goto out_unfreeze; } ret = nvme_configure_metadata(ns, id); if (ret) - return ret; + goto out_unfreeze; nvme_set_chunk_sectors(ns, id); nvme_update_disk_info(ns->disk, ns, id); + blk_mq_unfreeze_queue(ns->disk->queue); + #ifdef CONFIG_NVME_MULTIPATH if (ns->head->disk) { + blk_mq_freeze_queue(ns->head->disk->queue); nvme_update_disk_info(ns->head->disk, ns, id); blk_stack_limits(&ns->head->disk->queue->limits, &ns->queue->limits, 0); blk_queue_update_readahead(ns->head->disk->queue); nvme_update_bdev_size(ns->head->disk); + blk_mq_unfreeze_queue(ns->head->disk->queue); } #endif return 0; + +out_unfreeze: + blk_mq_unfreeze_queue(ns->disk->queue); + return ret; } static int nvme_validate_ns(struct nvme_ns *ns) From 13f0b26bbf0a8660d23cad22ed0c4b58b5675dc4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Sep 2020 12:03:13 +0200 Subject: [PATCH 12/23] nvme: clean up the check for too large logic block sizes Use a single statement to set both the capacity and fake block size instead of two. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch --- drivers/nvme/host/core.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 82cd03c0ba21b..0114fe47de357 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2017,8 +2017,12 @@ static void nvme_update_disk_info(struct gendisk *disk, unsigned short bs = 1 << ns->lba_shift; u32 atomic_bs, phys_bs, io_opt = 0; + /* + * The block layer can't support LBA sizes larger than the page size + * yet, so catch this early and don't allow block I/O. + */ if (ns->lba_shift > PAGE_SHIFT) { - /* unsupported block size, set capacity to 0 later */ + capacity = 0; bs = (1 << 9); } @@ -2055,13 +2059,6 @@ static void nvme_update_disk_info(struct gendisk *disk, blk_queue_io_min(disk->queue, phys_bs); blk_queue_io_opt(disk->queue, io_opt); - /* - * The block layer can't support LBA sizes larger than the page size - * yet, so catch this early and don't allow block I/O. - */ - if (ns->lba_shift > PAGE_SHIFT) - capacity = 0; - /* * Register a metadata profile for PI, or the plain non-integrity NVMe * metadata masquerading as Type 0 if supported, otherwise reject block From 310b30e575b1e2b9a569c3582062b79c5a562fb7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Sep 2020 12:12:02 +0200 Subject: [PATCH 13/23] nvme: remove the 0 lba_shift check in nvme_update_ns_info We can no longer reach this code if Identify Namespace failed. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal --- drivers/nvme/host/core.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0114fe47de357..910198c3e0bbd 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2129,13 +2129,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) int ret; blk_mq_freeze_queue(ns->disk->queue); - /* - * If identify namespace failed, use default 512 byte block size so - * block layer can use before failing read/write for 0 capacity. - */ ns->lba_shift = id->lbaf[lbaf].ds; - if (ns->lba_shift == 0) - ns->lba_shift = 9; switch (ns->head->ids.csi) { case NVME_CSI_NVM: From 658d9f7c2c7044f9978623e7f429b85bbb7553a3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Sep 2020 12:05:28 +0200 Subject: [PATCH 14/23] nvme: set the queue limits in nvme_update_ns_info Only set the queue limits once we have the real block size. This also updates the limits on a rescan if needed. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal --- drivers/nvme/host/core.c | 46 ++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 910198c3e0bbd..bb630d5fcb964 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2010,6 +2010,26 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) return 0; } +static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, + struct request_queue *q) +{ + bool vwc = false; + + if (ctrl->max_hw_sectors) { + u32 max_segments = + (ctrl->max_hw_sectors / (NVME_CTRL_PAGE_SIZE >> 9)) + 1; + + max_segments = min_not_zero(max_segments, ctrl->max_segments); + blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); + blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); + } + blk_queue_virt_boundary(q, NVME_CTRL_PAGE_SIZE - 1); + blk_queue_dma_alignment(q, 7); + if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) + vwc = true; + blk_queue_write_cache(q, vwc, vwc); +} + static void nvme_update_disk_info(struct gendisk *disk, struct nvme_ns *ns, struct nvme_id_ns *id) { @@ -2130,6 +2150,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) blk_mq_freeze_queue(ns->disk->queue); ns->lba_shift = id->lbaf[lbaf].ds; + nvme_set_queue_limits(ctrl, ns->queue); switch (ns->head->ids.csi) { case NVME_CSI_NVM: @@ -2495,26 +2516,6 @@ int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_shutdown_ctrl); -static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, - struct request_queue *q) -{ - bool vwc = false; - - if (ctrl->max_hw_sectors) { - u32 max_segments = - (ctrl->max_hw_sectors / (NVME_CTRL_PAGE_SIZE >> 9)) + 1; - - max_segments = min_not_zero(max_segments, ctrl->max_segments); - blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); - blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); - } - blk_queue_virt_boundary(q, NVME_CTRL_PAGE_SIZE - 1); - blk_queue_dma_alignment(q, 7); - if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) - vwc = true; - blk_queue_write_cache(q, vwc, vwc); -} - static int nvme_configure_timestamp(struct nvme_ctrl *ctrl) { __le64 ts; @@ -3922,12 +3923,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ns->queue->queuedata = ns; ns->ctrl = ctrl; - kref_init(&ns->kref); - ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */ - - blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); - nvme_set_queue_limits(ctrl, ns->queue); ret = nvme_init_ns_head(ns, nsid, id); if (ret) From 75eb779ee0d343d1eb6d8c19205bc76a1d3ba2ed Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Sep 2020 08:34:43 +0200 Subject: [PATCH 15/23] nvme: update the known admin effects A Format NVM command can change the capabilities of namespaces, while Sanitize does change the Logical Block Content and must be serialized. Also remove CSUPP bit for Format - it is not a mandatory command, and we don't check for the bit anyway. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal --- drivers/nvme/host/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index bb630d5fcb964..4737591c1143a 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -968,10 +968,10 @@ static u32 nvme_known_admin_effects(u8 opcode) { switch (opcode) { case nvme_admin_format_nvm: - return NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC | + return NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_NCC | NVME_CMD_EFFECTS_CSE_MASK; case nvme_admin_sanitize_nvm: - return NVME_CMD_EFFECTS_CSE_MASK; + return NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK; default: break; } From af0f446d2cad06bd678e9b912f7653b63d87fd6b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Sep 2020 11:10:36 +0200 Subject: [PATCH 16/23] nvme: remove nvme_update_formats Now that the queue is frozen before updating ->lba_shift we can't hit the invalid references mentioned in the comment any more. More importantly this code would not have helped us if the format was changed by another controller or through implementation defined back channels. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Damien Le Moal --- drivers/nvme/host/core.c | 32 ++------------------------------ 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 4737591c1143a..f19f6c7c5b124 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -89,7 +89,6 @@ static dev_t nvme_chr_devt; static struct class *nvme_class; static struct class *nvme_subsys_class; -static int nvme_validate_ns(struct nvme_ns *ns); static void nvme_put_subsystem(struct nvme_subsystem *subsys); static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, unsigned nsid); @@ -1009,7 +1008,7 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, * For simplicity, IO to all namespaces is quiesced even if the command * effects say only one namespace is affected. */ - if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) { + if (effects & NVME_CMD_EFFECTS_CSE_MASK) { mutex_lock(&ctrl->scan_lock); mutex_lock(&ctrl->subsys->lock); nvme_mpath_start_freeze(ctrl->subsys); @@ -1020,36 +1019,9 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, return effects; } -static void nvme_update_formats(struct nvme_ctrl *ctrl, u32 *effects) -{ - struct nvme_ns *ns; - - down_read(&ctrl->namespaces_rwsem); - list_for_each_entry(ns, &ctrl->namespaces, list) - if (nvme_validate_ns(ns)) - nvme_set_queue_dying(ns); - else if (blk_queue_is_zoned(ns->disk->queue)) { - /* - * IO commands are required to fully revalidate a zoned - * device. Force the command effects to trigger rescan - * work so report zones can run in a context with - * unfrozen IO queues. - */ - *effects |= NVME_CMD_EFFECTS_NCC; - } - up_read(&ctrl->namespaces_rwsem); -} - static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) { - /* - * Revalidate LBA changes prior to unfreezing. This is necessary to - * prevent memory corruption if a logical block size was changed by - * this command. - */ - if (effects & NVME_CMD_EFFECTS_LBCC) - nvme_update_formats(ctrl, &effects); - if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) { + if (effects & NVME_CMD_EFFECTS_CSE_MASK) { nvme_unfreeze(ctrl); nvme_mpath_unfreeze(ctrl->subsys); mutex_unlock(&ctrl->subsys->lock); From 3a9967ba7ace91153f9caa8e60a55c7668c7b946 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Sep 2020 12:30:16 +0200 Subject: [PATCH 17/23] nvme: revalidate zone bitmaps in nvme_update_ns_info Consolidate the two calls into a single place. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch --- drivers/nvme/host/core.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f19f6c7c5b124..9c137d8819f75 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2150,6 +2150,12 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) nvme_update_disk_info(ns->disk, ns, id); blk_mq_unfreeze_queue(ns->disk->queue); + if (blk_queue_is_zoned(ns->queue)) { + ret = nvme_revalidate_zones(ns); + if (ret) + return ret; + } + #ifdef CONFIG_NVME_MULTIPATH if (ns->head->disk) { blk_mq_freeze_queue(ns->head->disk->queue); @@ -3915,8 +3921,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) if (nvme_update_ns_info(ns, id)) goto out_put_disk; - if (blk_queue_is_zoned(ns->queue) && nvme_revalidate_zones(ns)) - goto out_put_disk; if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { ret = nvme_nvm_register(ns, disk_name, node); @@ -4012,8 +4016,6 @@ static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) } ret = nvme_validate_ns(ns); - if (!ret && blk_queue_is_zoned(ns->queue)) - ret = nvme_revalidate_zones(ns); revalidate_disk_size(ns->disk, ret == 0); if (ret) nvme_ns_remove(ns); From 8b7c0ff2d46dad4974e84f2363d7e0ddefaf0677 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Sep 2020 14:07:56 +0200 Subject: [PATCH 18/23] nvme: query namespace identifiers before adding the namespace Check the namespace identifier list first thing when scanning namespaces. This keeps the code to query the CSI common between the alloc and validate path, and helps to structure the code better for multiple command set support. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Damien Le Moal --- drivers/nvme/host/core.c | 116 ++++++++++++++++++--------------------- 1 file changed, 53 insertions(+), 63 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9c137d8819f75..ad18c32b36e7b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1281,6 +1281,8 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, int status, pos, len; void *data; + if (ctrl->vs < NVME_VS(1, 3, 0) && !nvme_multi_css(ctrl)) + return 0; if (ctrl->quirks & NVME_QUIRK_NO_NS_DESC_LIST) return 0; @@ -1335,8 +1337,8 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n NVME_IDENTIFY_DATA_SIZE); } -static int nvme_identify_ns(struct nvme_ctrl *ctrl, - unsigned nsid, struct nvme_id_ns **id) +static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, + struct nvme_ns_ids *ids, struct nvme_id_ns **id) { struct nvme_command c = { }; int error; @@ -1359,6 +1361,14 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, error = -ENODEV; if ((*id)->ncap == 0) /* namespace not allocated or attached */ goto out_free_id; + + if (ctrl->vs >= NVME_VS(1, 1, 0) && + !memchr_inv(ids->eui64, 0, sizeof(ids->eui64))) + memcpy(ids->eui64, (*id)->eui64, sizeof(ids->eui64)); + if (ctrl->vs >= NVME_VS(1, 2, 0) && + !memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) + memcpy(ids->nguid, (*id)->nguid, sizeof(ids->nguid)); + return 0; out_free_id: @@ -1884,20 +1894,6 @@ static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns) nvme_lba_to_sect(ns, max_blocks)); } -static int nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid, - struct nvme_id_ns *id, struct nvme_ns_ids *ids) -{ - memset(ids, 0, sizeof(*ids)); - - if (ctrl->vs >= NVME_VS(1, 1, 0)) - memcpy(ids->eui64, id->eui64, sizeof(id->eui64)); - if (ctrl->vs >= NVME_VS(1, 2, 0)) - memcpy(ids->nguid, id->nguid, sizeof(id->nguid)); - if (ctrl->vs >= NVME_VS(1, 3, 0) || nvme_multi_css(ctrl)) - return nvme_identify_ns_descs(ctrl, nsid, ids); - return 0; -} - static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids) { return !uuid_is_null(&ids->uuid) || @@ -2117,30 +2113,16 @@ static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id) static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) { unsigned lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK; - struct nvme_ctrl *ctrl = ns->ctrl; int ret; blk_mq_freeze_queue(ns->disk->queue); ns->lba_shift = id->lbaf[lbaf].ds; - nvme_set_queue_limits(ctrl, ns->queue); + nvme_set_queue_limits(ns->ctrl, ns->queue); - switch (ns->head->ids.csi) { - case NVME_CSI_NVM: - break; - case NVME_CSI_ZNS: + if (ns->head->ids.csi == NVME_CSI_ZNS) { ret = nvme_update_zone_info(ns, lbaf); - if (ret) { - dev_warn(ctrl->device, - "failed to add zoned namespace:%u ret:%d\n", - ns->head->ns_id, ret); + if (ret) goto out_unfreeze; - } - break; - default: - dev_warn(ctrl->device, "unknown csi:%u ns:%u\n", - ns->head->ids.csi, ns->head->ns_id); - ret = -ENODEV; - goto out_unfreeze; } ret = nvme_configure_metadata(ns, id); @@ -2174,11 +2156,10 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) return ret; } -static int nvme_validate_ns(struct nvme_ns *ns) +static int nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_ids *ids) { struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_id_ns *id; - struct nvme_ns_ids ids; int ret = 0; if (test_bit(NVME_NS_DEAD, &ns->flags)) { @@ -2186,15 +2167,11 @@ static int nvme_validate_ns(struct nvme_ns *ns) return -ENODEV; } - ret = nvme_identify_ns(ctrl, ns->head->ns_id, &id); + ret = nvme_identify_ns(ctrl, ns->head->ns_id, ids, &id); if (ret) goto out; - ret = nvme_report_ns_ids(ctrl, ns->head->ns_id, id, &ids); - if (ret) - goto free_id; - - if (!nvme_ns_ids_equal(&ns->head->ids, &ids)) { + if (!nvme_ns_ids_equal(&ns->head->ids, ids)) { dev_err(ctrl->device, "identifiers changed for nsid %d\n", ns->head->ns_id); ret = -ENODEV; @@ -3794,25 +3771,16 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, } static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, - struct nvme_id_ns *id) + struct nvme_ns_ids *ids, bool is_shared) { struct nvme_ctrl *ctrl = ns->ctrl; - bool is_shared = id->nmic & NVME_NS_NMIC_SHARED; struct nvme_ns_head *head = NULL; - struct nvme_ns_ids ids; int ret = 0; - ret = nvme_report_ns_ids(ctrl, nsid, id, &ids); - if (ret) { - if (ret < 0) - return ret; - return blk_status_to_errno(nvme_error_status(ret)); - } - mutex_lock(&ctrl->subsys->lock); head = nvme_find_ns_head(ctrl->subsys, nsid); if (!head) { - head = nvme_alloc_ns_head(ctrl, nsid, &ids); + head = nvme_alloc_ns_head(ctrl, nsid, ids); if (IS_ERR(head)) { ret = PTR_ERR(head); goto out_unlock; @@ -3825,7 +3793,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, "Duplicate unshared namespace %d\n", nsid); goto out_put_ns_head; } - if (!nvme_ns_ids_equal(&head->ids, &ids)) { + if (!nvme_ns_ids_equal(&head->ids, ids)) { dev_err(ctrl->device, "IDs don't match for shared namespace %d\n", nsid); @@ -3873,7 +3841,8 @@ struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) } EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, NVME_TARGET_PASSTHRU); -static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) +static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, + struct nvme_ns_ids *ids) { struct nvme_ns *ns; struct gendisk *disk; @@ -3881,7 +3850,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) char disk_name[DISK_NAME_LEN]; int node = ctrl->numa_node, flags = GENHD_FL_EXT_DEVT, ret; - if (nvme_identify_ns(ctrl, nsid, &id)) + if (nvme_identify_ns(ctrl, nsid, ids, &id)) return; ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node); @@ -3903,7 +3872,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ns->ctrl = ctrl; kref_init(&ns->kref); - ret = nvme_init_ns_head(ns, nsid, id); + ret = nvme_init_ns_head(ns, nsid, ids, id->nmic & NVME_NS_NMIC_SHARED); if (ret) goto out_free_queue; nvme_set_disk_name(disk_name, ns, ctrl, &flags); @@ -4006,20 +3975,41 @@ static void nvme_ns_remove_by_nsid(struct nvme_ctrl *ctrl, u32 nsid) static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) { + struct nvme_ns_ids ids = { }; struct nvme_ns *ns; int ret; + if (nvme_identify_ns_descs(ctrl, nsid, &ids)) + return; + ns = nvme_find_get_ns(ctrl, nsid); - if (!ns) { - nvme_alloc_ns(ctrl, nsid); + if (ns) { + ret = nvme_validate_ns(ns, &ids); + revalidate_disk_size(ns->disk, ret == 0); + if (ret) + nvme_ns_remove(ns); + nvme_put_ns(ns); return; } - ret = nvme_validate_ns(ns); - revalidate_disk_size(ns->disk, ret == 0); - if (ret) - nvme_ns_remove(ns); - nvme_put_ns(ns); + switch (ids.csi) { + case NVME_CSI_NVM: + nvme_alloc_ns(ctrl, nsid, &ids); + break; + case NVME_CSI_ZNS: + if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) { + dev_warn(ctrl->device, + "nsid %u not supported without CONFIG_BLK_DEV_ZONED\n", + nsid); + break; + } + nvme_alloc_ns(ctrl, nsid, &ids); + break; + default: + dev_warn(ctrl->device, "unknown csi %u for nsid %u\n", + ids.csi, nsid); + break; + } } static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, From b2dc748a70c65a1b4eb1b9fceab57662cfd83e41 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Sep 2020 13:55:22 +0200 Subject: [PATCH 19/23] nvme: move nvme_validate_ns Move nvme_validate_ns just above its only remaining caller. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Damien Le Moal --- drivers/nvme/host/core.c | 74 ++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ad18c32b36e7b..07309f6c14faa 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2156,43 +2156,6 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) return ret; } -static int nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_ids *ids) -{ - struct nvme_ctrl *ctrl = ns->ctrl; - struct nvme_id_ns *id; - int ret = 0; - - if (test_bit(NVME_NS_DEAD, &ns->flags)) { - set_capacity(ns->disk, 0); - return -ENODEV; - } - - ret = nvme_identify_ns(ctrl, ns->head->ns_id, ids, &id); - if (ret) - goto out; - - if (!nvme_ns_ids_equal(&ns->head->ids, ids)) { - dev_err(ctrl->device, - "identifiers changed for nsid %d\n", ns->head->ns_id); - ret = -ENODEV; - goto free_id; - } - - ret = nvme_update_ns_info(ns, id); -free_id: - kfree(id); -out: - /* - * Only fail the function if we got a fatal error back from the - * device, otherwise ignore the error and just move on. - */ - if (ret == -ENOMEM || (ret > 0 && !(ret & NVME_SC_DNR))) - ret = 0; - else if (ret > 0) - ret = blk_status_to_errno(nvme_error_status(ret)); - return ret; -} - static char nvme_pr_type(enum pr_type type) { switch (type) { @@ -3973,6 +3936,43 @@ static void nvme_ns_remove_by_nsid(struct nvme_ctrl *ctrl, u32 nsid) } } +static int nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_ids *ids) +{ + struct nvme_ctrl *ctrl = ns->ctrl; + struct nvme_id_ns *id; + int ret = 0; + + if (test_bit(NVME_NS_DEAD, &ns->flags)) { + set_capacity(ns->disk, 0); + return -ENODEV; + } + + ret = nvme_identify_ns(ctrl, ns->head->ns_id, ids, &id); + if (ret) + goto out; + + if (!nvme_ns_ids_equal(&ns->head->ids, ids)) { + dev_err(ctrl->device, + "identifiers changed for nsid %d\n", ns->head->ns_id); + ret = -ENODEV; + goto free_id; + } + + ret = nvme_update_ns_info(ns, id); +free_id: + kfree(id); +out: + /* + * Only fail the function if we got a fatal error back from the + * device, otherwise ignore the error and just move on. + */ + if (ret == -ENOMEM || (ret > 0 && !(ret & NVME_SC_DNR))) + ret = 0; + else if (ret > 0) + ret = blk_status_to_errno(nvme_error_status(ret)); + return ret; +} + static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns_ids ids = { }; From 0a05226a3a2038b28d78101239196222d375124a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Sep 2020 13:59:06 +0200 Subject: [PATCH 20/23] nvme: refactor nvme_validate_ns Move the logic to revalidate the block_device size or remove the namespace from the caller into nvme_validate_ns. This removes the return value and thus the status code translation. Additionally it also catches non-permanent errors from nvme_update_ns_info using the existing logic. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Damien Le Moal --- drivers/nvme/host/core.c | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 07309f6c14faa..0b88a377a47f6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3899,6 +3899,7 @@ static void nvme_ns_remove(struct nvme_ns *ns) if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags)) return; + set_capacity(ns->disk, 0); nvme_fault_inject_fini(&ns->fault_inject); mutex_lock(&ns->ctrl->subsys->lock); @@ -3936,58 +3937,54 @@ static void nvme_ns_remove_by_nsid(struct nvme_ctrl *ctrl, u32 nsid) } } -static int nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_ids *ids) +static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_ids *ids) { struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_id_ns *id; - int ret = 0; + int ret = -ENODEV; - if (test_bit(NVME_NS_DEAD, &ns->flags)) { - set_capacity(ns->disk, 0); - return -ENODEV; - } + if (test_bit(NVME_NS_DEAD, &ns->flags)) + goto out; ret = nvme_identify_ns(ctrl, ns->head->ns_id, ids, &id); if (ret) goto out; + ret = -ENODEV; if (!nvme_ns_ids_equal(&ns->head->ids, ids)) { dev_err(ctrl->device, "identifiers changed for nsid %d\n", ns->head->ns_id); - ret = -ENODEV; - goto free_id; + goto out_free_id; } ret = nvme_update_ns_info(ns, id); -free_id: + +out_free_id: kfree(id); out: /* - * Only fail the function if we got a fatal error back from the + * Only remove the namespace if we got a fatal error back from the * device, otherwise ignore the error and just move on. + * + * TODO: we should probably schedule a delayed retry here. */ - if (ret == -ENOMEM || (ret > 0 && !(ret & NVME_SC_DNR))) - ret = 0; - else if (ret > 0) - ret = blk_status_to_errno(nvme_error_status(ret)); - return ret; + if (ret && ret != -ENOMEM && !(ret > 0 && !(ret & NVME_SC_DNR))) + nvme_ns_remove(ns); + else + revalidate_disk_size(ns->disk, true); } static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns_ids ids = { }; struct nvme_ns *ns; - int ret; if (nvme_identify_ns_descs(ctrl, nsid, &ids)) return; ns = nvme_find_get_ns(ctrl, nsid); if (ns) { - ret = nvme_validate_ns(ns, &ids); - revalidate_disk_size(ns->disk, ret == 0); - if (ret) - nvme_ns_remove(ns); + nvme_validate_ns(ns, &ids); nvme_put_ns(ns); return; } From 7b15336257ed5623cddefa8d33069777494c98e0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Sep 2020 14:08:28 +0200 Subject: [PATCH 21/23] nvme: remove nvme_identify_ns_list Just fold it into the only caller. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/core.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0b88a377a47f6..385b103178737 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1326,17 +1326,6 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, return status; } -static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list) -{ - struct nvme_command c = { }; - - c.identify.opcode = nvme_admin_identify; - c.identify.cns = NVME_ID_CNS_NS_ACTIVE_LIST; - c.identify.nsid = cpu_to_le32(nsid); - return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, - NVME_IDENTIFY_DATA_SIZE); -} - static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, struct nvme_ns_ids *ids, struct nvme_id_ns **id) { @@ -4042,7 +4031,14 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl) return -ENOMEM; for (;;) { - ret = nvme_identify_ns_list(ctrl, prev, ns_list); + struct nvme_command cmd = { + .identify.opcode = nvme_admin_identify, + .identify.cns = NVME_ID_CNS_NS_ACTIVE_LIST, + .identify.nsid = cpu_to_le32(prev), + }; + + ret = nvme_submit_sync_cmd(ctrl->admin_q, &cmd, ns_list, + NVME_IDENTIFY_DATA_SIZE); if (ret) goto free; From af5d6f7ba5f99f8316473557240ae9acdd20a6bd Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Thu, 1 Oct 2020 11:54:31 -0700 Subject: [PATCH 22/23] nvme-core: remove extra variable In nvme_validate_ns() the exra variable ctrl is used only twice. Using ns->ctrl directly still maintains the redability and original length of the lines in the code. Get rid of the extra variable ctrl & use ns->ctrl directly. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 385b103178737..2e505cdf051ec 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3928,20 +3928,19 @@ static void nvme_ns_remove_by_nsid(struct nvme_ctrl *ctrl, u32 nsid) static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_ids *ids) { - struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_id_ns *id; int ret = -ENODEV; if (test_bit(NVME_NS_DEAD, &ns->flags)) goto out; - ret = nvme_identify_ns(ctrl, ns->head->ns_id, ids, &id); + ret = nvme_identify_ns(ns->ctrl, ns->head->ns_id, ids, &id); if (ret) goto out; ret = -ENODEV; if (!nvme_ns_ids_equal(&ns->head->ids, ids)) { - dev_err(ctrl->device, + dev_err(ns->ctrl->device, "identifiers changed for nsid %d\n", ns->head->ns_id); goto out_free_id; } From c4485252cf36ae62c8bf12c4aede72345cad0d2b Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Thu, 1 Oct 2020 11:54:32 -0700 Subject: [PATCH 23/23] nvme-core: remove extra condition for vwc In nvme_set_queue_limits() we initialize vwc to false and later add a condition to set vwc true. The value of the vwc can be declare initialized which makes all the blk_queue_XXX() calls uniform. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2e505cdf051ec..e85f6304efd72 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1970,7 +1970,7 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, struct request_queue *q) { - bool vwc = false; + bool vwc = ctrl->vwc & NVME_CTRL_VWC_PRESENT; if (ctrl->max_hw_sectors) { u32 max_segments = @@ -1982,8 +1982,6 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, } blk_queue_virt_boundary(q, NVME_CTRL_PAGE_SIZE - 1); blk_queue_dma_alignment(q, 7); - if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) - vwc = true; blk_queue_write_cache(q, vwc, vwc); }