Skip to content

Commit

Permalink
NVMe: Meta data handling through submit io ioctl
Browse files Browse the repository at this point in the history
This adds support for the extended metadata formats through the submit
IO ioctl, and simplifies the rest when using a separate metadata format.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
  • Loading branch information
Keith Busch authored and Jens Axboe committed Apr 8, 2015
1 parent 7f749d9 commit a67a951
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 76 deletions.
123 changes: 49 additions & 74 deletions drivers/block/nvme-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1745,25 +1745,31 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
struct nvme_dev *dev = ns->dev;
struct nvme_user_io io;
struct nvme_command c;
unsigned length, meta_len;
int status, i;
struct nvme_iod *iod, *meta_iod = NULL;
dma_addr_t meta_dma_addr;
void *meta, *uninitialized_var(meta_mem);
unsigned length, meta_len, prp_len;
int status, write;
struct nvme_iod *iod;
dma_addr_t meta_dma = 0;
void *meta = NULL;

if (copy_from_user(&io, uio, sizeof(io)))
return -EFAULT;
length = (io.nblocks + 1) << ns->lba_shift;
meta_len = (io.nblocks + 1) * ns->ms;

if (meta_len && ((io.metadata & 3) || !io.metadata))
if (meta_len && ((io.metadata & 3) || !io.metadata) && !ns->ext)
return -EINVAL;
else if (meta_len && ns->ext) {
length += meta_len;
meta_len = 0;
}

write = io.opcode & 1;

switch (io.opcode) {
case nvme_cmd_write:
case nvme_cmd_read:
case nvme_cmd_compare:
iod = nvme_map_user_pages(dev, io.opcode & 1, io.addr, length);
iod = nvme_map_user_pages(dev, write, io.addr, length);
break;
default:
return -EINVAL;
Expand All @@ -1772,6 +1778,27 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
if (IS_ERR(iod))
return PTR_ERR(iod);

prp_len = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
if (length != prp_len) {
status = -ENOMEM;
goto unmap;
}
if (meta_len) {
meta = dma_alloc_coherent(&dev->pci_dev->dev, meta_len,
&meta_dma, GFP_KERNEL);
if (!meta) {
status = -ENOMEM;
goto unmap;
}
if (write) {
if (copy_from_user(meta, (void __user *)io.metadata,
meta_len)) {
status = -EFAULT;
goto unmap;
}
}
}

memset(&c, 0, sizeof(c));
c.rw.opcode = io.opcode;
c.rw.flags = io.flags;
Expand All @@ -1783,75 +1810,21 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
c.rw.reftag = cpu_to_le32(io.reftag);
c.rw.apptag = cpu_to_le16(io.apptag);
c.rw.appmask = cpu_to_le16(io.appmask);

if (meta_len) {
meta_iod = nvme_map_user_pages(dev, io.opcode & 1, io.metadata,
meta_len);
if (IS_ERR(meta_iod)) {
status = PTR_ERR(meta_iod);
meta_iod = NULL;
goto unmap;
}

meta_mem = dma_alloc_coherent(&dev->pci_dev->dev, meta_len,
&meta_dma_addr, GFP_KERNEL);
if (!meta_mem) {
status = -ENOMEM;
goto unmap;
}

if (io.opcode & 1) {
int meta_offset = 0;

for (i = 0; i < meta_iod->nents; i++) {
meta = kmap_atomic(sg_page(&meta_iod->sg[i])) +
meta_iod->sg[i].offset;
memcpy(meta_mem + meta_offset, meta,
meta_iod->sg[i].length);
kunmap_atomic(meta);
meta_offset += meta_iod->sg[i].length;
}
}

c.rw.metadata = cpu_to_le64(meta_dma_addr);
}

length = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
c.rw.prp2 = cpu_to_le64(iod->first_dma);

if (length != (io.nblocks + 1) << ns->lba_shift)
status = -ENOMEM;
else
status = nvme_submit_io_cmd(dev, ns, &c, NULL);

if (meta_len) {
if (status == NVME_SC_SUCCESS && !(io.opcode & 1)) {
int meta_offset = 0;

for (i = 0; i < meta_iod->nents; i++) {
meta = kmap_atomic(sg_page(&meta_iod->sg[i])) +
meta_iod->sg[i].offset;
memcpy(meta, meta_mem + meta_offset,
meta_iod->sg[i].length);
kunmap_atomic(meta);
meta_offset += meta_iod->sg[i].length;
}
}

dma_free_coherent(&dev->pci_dev->dev, meta_len, meta_mem,
meta_dma_addr);
}

c.rw.metadata = cpu_to_le64(meta_dma);
status = nvme_submit_io_cmd(dev, ns, &c, NULL);
unmap:
nvme_unmap_user_pages(dev, io.opcode & 1, iod);
nvme_unmap_user_pages(dev, write, iod);
nvme_free_iod(dev, iod);

if (meta_iod) {
nvme_unmap_user_pages(dev, io.opcode & 1, meta_iod);
nvme_free_iod(dev, meta_iod);
if (meta) {
if (status == NVME_SC_SUCCESS && !write) {
if (copy_to_user((void __user *)io.metadata, meta,
meta_len))
status = -EFAULT;
}
dma_free_coherent(&dev->pci_dev->dev, meta_len, meta, meta_dma);
}

return status;
}

Expand Down Expand Up @@ -2014,7 +1987,8 @@ static int nvme_revalidate_disk(struct gendisk *disk)
struct nvme_dev *dev = ns->dev;
struct nvme_id_ns *id;
dma_addr_t dma_addr;
int lbaf, pi_type, old_ms;
u8 lbaf, pi_type;
u16 old_ms;
unsigned short bs;

id = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr,
Expand All @@ -2035,6 +2009,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
ns->lba_shift = id->lbaf[lbaf].ds;
ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);

/*
* If identify namespace failed, use default 512 byte block size so
Expand All @@ -2051,14 +2026,14 @@ static int nvme_revalidate_disk(struct gendisk *disk)
if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
ns->ms != old_ms ||
bs != queue_logical_block_size(disk->queue) ||
(ns->ms && id->flbas & NVME_NS_FLBAS_META_EXT)))
(ns->ms && ns->ext)))
blk_integrity_unregister(disk);

ns->pi_type = pi_type;
blk_queue_logical_block_size(ns->queue, bs);

if (ns->ms && !blk_get_integrity(disk) && (disk->flags & GENHD_FL_UP) &&
!(id->flbas & NVME_NS_FLBAS_META_EXT))
!ns->ext)
nvme_init_integrity(ns);

if (id->ncap == 0 || (ns->ms && !blk_get_integrity(disk)))
Expand Down
5 changes: 3 additions & 2 deletions include/linux/nvme.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,9 @@ struct nvme_ns {

unsigned ns_id;
int lba_shift;
int ms;
int pi_type;
u16 ms;
bool ext;
u8 pi_type;
u64 mode_select_num_blocks;
u32 mode_select_block_len;
};
Expand Down

0 comments on commit a67a951

Please sign in to comment.