Skip to content

Commit

Permalink
nvme: support for zoned namespaces
Browse files Browse the repository at this point in the history
Add support for NVM Express Zoned Namespaces (ZNS) Command Set defined
in NVM Express TP4053. Zoned namespaces are discovered based on their
Command Set Identifier reported in the namespaces Namespace
Identification Descriptor list. A successfully discovered Zoned
Namespace will be registered with the block layer as a host managed
zoned block device with Zone Append command support. A namespace that
does not support append is not supported by the driver.

Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Javier González <javier.gonz@samsung.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
Signed-off-by: Dmitry Fomichev <dmitry.fomichev@wdc.com>
Signed-off-by: Ajay Joshi <ajay.joshi@wdc.com>
Signed-off-by: Aravind Ramesh <aravind.ramesh@wdc.com>
Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com>
Signed-off-by: Matias Bjørling <matias.bjorling@wdc.com>
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Keith Busch <keith.busch@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
  • Loading branch information
Keith Busch authored and Christoph Hellwig committed Jul 8, 2020
1 parent be93e87 commit 240e6ee
Show file tree
Hide file tree
Showing 6 changed files with 492 additions and 15 deletions.
5 changes: 3 additions & 2 deletions block/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,10 @@ config BLK_DEV_ZONED
select MQ_IOSCHED_DEADLINE
help
Block layer zoned block device support. This option enables
support for ZAC/ZBC host-managed and host-aware zoned block devices.
support for ZAC/ZBC/ZNS host-managed and host-aware zoned block
devices.

Say yes here if you have a ZAC or ZBC storage device.
Say yes here if you have a ZAC, ZBC, or ZNS storage device.

config BLK_DEV_THROTTLING
bool "Block layer bio throttling support"
Expand Down
1 change: 1 addition & 0 deletions drivers/nvme/host/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ nvme-core-y := core.o
nvme-core-$(CONFIG_TRACING) += trace.o
nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
nvme-core-$(CONFIG_NVM) += lightnvm.o
nvme-core-$(CONFIG_BLK_DEV_ZONED) += zns.o
nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o
nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o

Expand Down
97 changes: 84 additions & 13 deletions drivers/nvme/host/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ static dev_t nvme_chr_devt;
static struct class *nvme_class;
static struct class *nvme_subsys_class;

static int nvme_revalidate_disk(struct gendisk *disk);
static int _nvme_revalidate_disk(struct gendisk *disk);
static void nvme_put_subsystem(struct nvme_subsystem *subsys);
static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
unsigned nsid);
Expand Down Expand Up @@ -287,6 +287,10 @@ void nvme_complete_rq(struct request *req)
nvme_retry_req(req);
return;
}
} else if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
req_op(req) == REQ_OP_ZONE_APPEND) {
req->__sector = nvme_lba_to_sect(req->q->queuedata,
le64_to_cpu(nvme_req(req)->result.u64));
}

nvme_trace_bio_complete(req, status);
Expand Down Expand Up @@ -673,7 +677,8 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
}

static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
struct request *req, struct nvme_command *cmnd)
struct request *req, struct nvme_command *cmnd,
enum nvme_opcode op)
{
struct nvme_ctrl *ctrl = ns->ctrl;
u16 control = 0;
Expand All @@ -687,7 +692,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
if (req->cmd_flags & REQ_RAHEAD)
dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;

cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
cmnd->rw.opcode = op;
cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id);
cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
Expand Down Expand Up @@ -716,6 +721,8 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
case NVME_NS_DPS_PI_TYPE2:
control |= NVME_RW_PRINFO_PRCHK_GUARD |
NVME_RW_PRINFO_PRCHK_REF;
if (op == nvme_cmd_zone_append)
control |= NVME_RW_APPEND_PIREMAP;
cmnd->rw.reftag = cpu_to_le32(t10_pi_ref_tag(req));
break;
}
Expand Down Expand Up @@ -756,15 +763,33 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
case REQ_OP_FLUSH:
nvme_setup_flush(ns, cmd);
break;
case REQ_OP_ZONE_RESET_ALL:
case REQ_OP_ZONE_RESET:
ret = nvme_setup_zone_mgmt_send(ns, req, cmd, NVME_ZONE_RESET);
break;
case REQ_OP_ZONE_OPEN:
ret = nvme_setup_zone_mgmt_send(ns, req, cmd, NVME_ZONE_OPEN);
break;
case REQ_OP_ZONE_CLOSE:
ret = nvme_setup_zone_mgmt_send(ns, req, cmd, NVME_ZONE_CLOSE);
break;
case REQ_OP_ZONE_FINISH:
ret = nvme_setup_zone_mgmt_send(ns, req, cmd, NVME_ZONE_FINISH);
break;
case REQ_OP_WRITE_ZEROES:
ret = nvme_setup_write_zeroes(ns, req, cmd);
break;
case REQ_OP_DISCARD:
ret = nvme_setup_discard(ns, req, cmd);
break;
case REQ_OP_READ:
ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_read);
break;
case REQ_OP_WRITE:
ret = nvme_setup_rw(ns, req, cmd);
ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_write);
break;
case REQ_OP_ZONE_APPEND:
ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_zone_append);
break;
default:
WARN_ON_ONCE(1);
Expand Down Expand Up @@ -1398,14 +1423,23 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
return effects;
}

static void nvme_update_formats(struct nvme_ctrl *ctrl)
static void nvme_update_formats(struct nvme_ctrl *ctrl, u32 *effects)
{
struct nvme_ns *ns;

down_read(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list)
if (ns->disk && nvme_revalidate_disk(ns->disk))
if (ns->disk && _nvme_revalidate_disk(ns->disk))
nvme_set_queue_dying(ns);
else if (blk_queue_is_zoned(ns->disk->queue)) {
/*
* IO commands are required to fully revalidate a zoned
* device. Force the command effects to trigger rescan
* work so report zones can run in a context with
* unfrozen IO queues.
*/
*effects |= NVME_CMD_EFFECTS_NCC;
}
up_read(&ctrl->namespaces_rwsem);
}

Expand All @@ -1417,7 +1451,7 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
* this command.
*/
if (effects & NVME_CMD_EFFECTS_LBCC)
nvme_update_formats(ctrl);
nvme_update_formats(ctrl, &effects);
if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
nvme_unfreeze(ctrl);
nvme_mpath_unfreeze(ctrl->subsys);
Expand Down Expand Up @@ -1532,7 +1566,7 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
* Issue ioctl requests on the first available path. Note that unlike normal
* block layer requests we will not retry failed request on another controller.
*/
static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk,
struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk,
struct nvme_ns_head **head, int *srcu_idx)
{
#ifdef CONFIG_NVME_MULTIPATH
Expand All @@ -1552,7 +1586,7 @@ static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk,
return disk->private_data;
}

static void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx)
void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx)
{
if (head)
srcu_read_unlock(&head->srcu, idx);
Expand Down Expand Up @@ -1945,23 +1979,34 @@ static void nvme_update_disk_info(struct gendisk *disk,

static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
{
unsigned lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
struct nvme_ns *ns = disk->private_data;
struct nvme_ctrl *ctrl = ns->ctrl;
int ret;
u32 iob;

/*
* If identify namespace failed, use default 512 byte block size so
* block layer can use before failing read/write for 0 capacity.
*/
ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds;
ns->lba_shift = id->lbaf[lbaf].ds;
if (ns->lba_shift == 0)
ns->lba_shift = 9;

switch (ns->head->ids.csi) {
case NVME_CSI_NVM:
break;
case NVME_CSI_ZNS:
ret = nvme_update_zone_info(disk, ns, lbaf);
if (ret) {
dev_warn(ctrl->device,
"failed to add zoned namespace:%u ret:%d\n",
ns->head->ns_id, ret);
return ret;
}
break;
default:
dev_warn(ctrl->device, "unknown csi:%d ns:%d\n",
dev_warn(ctrl->device, "unknown csi:%u ns:%u\n",
ns->head->ids.csi, ns->head->ns_id);
return -ENODEV;
}
Expand All @@ -1973,7 +2018,7 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob));

ns->features = 0;
ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms);
ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
/* the PI implementation requires metadata equal t10 pi tuple size */
if (ns->ms == sizeof(struct t10_pi_tuple))
ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
Expand Down Expand Up @@ -2015,7 +2060,7 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
return 0;
}

static int nvme_revalidate_disk(struct gendisk *disk)
static int _nvme_revalidate_disk(struct gendisk *disk)
{
struct nvme_ns *ns = disk->private_data;
struct nvme_ctrl *ctrl = ns->ctrl;
Expand Down Expand Up @@ -2063,6 +2108,28 @@ static int nvme_revalidate_disk(struct gendisk *disk)
return ret;
}

static int nvme_revalidate_disk(struct gendisk *disk)
{
int ret;

ret = _nvme_revalidate_disk(disk);
if (ret)
return ret;

#ifdef CONFIG_BLK_DEV_ZONED
if (blk_queue_is_zoned(disk->queue)) {
struct nvme_ns *ns = disk->private_data;
struct nvme_ctrl *ctrl = ns->ctrl;

ret = blk_revalidate_disk_zones(disk, NULL);
if (!ret)
blk_queue_max_zone_append_sectors(disk->queue,
ctrl->max_zone_append);
}
#endif
return ret;
}

static char nvme_pr_type(enum pr_type type)
{
switch (type) {
Expand Down Expand Up @@ -2193,6 +2260,7 @@ static const struct block_device_operations nvme_fops = {
.release = nvme_release,
.getgeo = nvme_getgeo,
.revalidate_disk= nvme_revalidate_disk,
.report_zones = nvme_report_zones,
.pr_ops = &nvme_pr_ops,
};

Expand All @@ -2219,6 +2287,7 @@ const struct block_device_operations nvme_ns_head_ops = {
.ioctl = nvme_ioctl,
.compat_ioctl = nvme_compat_ioctl,
.getgeo = nvme_getgeo,
.report_zones = nvme_report_zones,
.pr_ops = &nvme_pr_ops,
};
#endif /* CONFIG_NVME_MULTIPATH */
Expand Down Expand Up @@ -4446,6 +4515,8 @@ static inline void _nvme_check_size(void)
BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ns_zns) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_zns) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
Expand Down
39 changes: 39 additions & 0 deletions drivers/nvme/host/nvme.h
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,9 @@ struct nvme_ctrl {
u32 max_hw_sectors;
u32 max_segments;
u32 max_integrity_segments;
#ifdef CONFIG_BLK_DEV_ZONED
u32 max_zone_append;
#endif
u16 crdt[3];
u16 oncs;
u16 oacs;
Expand Down Expand Up @@ -404,6 +407,9 @@ struct nvme_ns {
u16 sgs;
u32 sws;
u8 pi_type;
#ifdef CONFIG_BLK_DEV_ZONED
u64 zsze;
#endif
unsigned long features;
unsigned long flags;
#define NVME_NS_REMOVING 0
Expand Down Expand Up @@ -571,6 +577,9 @@ int nvme_delete_ctrl(struct nvme_ctrl *ctrl);

int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi,
void *log, size_t size, u64 offset);
struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk,
struct nvme_ns_head **head, int *srcu_idx);
void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx);

extern const struct attribute_group *nvme_ns_id_attr_groups[];
extern const struct block_device_operations nvme_ns_head_ops;
Expand Down Expand Up @@ -693,6 +702,36 @@ static inline void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
}
#endif /* CONFIG_NVME_MULTIPATH */

#ifdef CONFIG_BLK_DEV_ZONED
int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
unsigned lbaf);

int nvme_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);

blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmnd,
enum nvme_zone_mgmt_action action);
#else
#define nvme_report_zones NULL

static inline blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns,
struct request *req, struct nvme_command *cmnd,
enum nvme_zone_mgmt_action action)
{
return BLK_STS_NOTSUPP;
}

static inline int nvme_update_zone_info(struct gendisk *disk,
struct nvme_ns *ns,
unsigned lbaf)
{
dev_warn(ns->ctrl->device,
"Please enable CONFIG_BLK_DEV_ZONED to support ZNS devices\n");
return -EPROTONOSUPPORT;
}
#endif

#ifdef CONFIG_NVM
int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
void nvme_nvm_unregister(struct nvme_ns *ns);
Expand Down
Loading

0 comments on commit 240e6ee

Please sign in to comment.