From b925a2dc165e5ec2330ca1256704faef8ed96913 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 28 Aug 2017 12:52:27 +0300 Subject: [PATCH 1/3] nvme-rdma: default MR page size to 4k Due to various page sizes in the system (IOMMU/device/kernel), we set the fabrics controller page size to 4k and block layer boundaries accordinglly. In architectures that uses different kernel page size we'll have a mismatch to the MR page size that may cause a mapping error. Update the MR page size to correspond to the core ctrl settings. Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index da04df1af2317..a03299d779229 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -920,7 +920,11 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; int nr; - nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, PAGE_SIZE); + /* + * Align the MR to a 4K page size to match the ctrl page size and + * the block virtual boundary. + */ + nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K); if (nr < count) { if (nr < 0) return nr; @@ -1583,7 +1587,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl) goto out_cleanup_queue; ctrl->ctrl.max_hw_sectors = - (ctrl->max_fr_pages - 1) << (PAGE_SHIFT - 9); + (ctrl->max_fr_pages - 1) << (ilog2(SZ_4K) - 9); error = nvme_init_identify(&ctrl->ctrl); if (error) From 4033f35d174af4804a79fd5731d9e6be976f9f28 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Aug 2017 10:47:18 +0200 Subject: [PATCH 2/3] nvme-pci: use dma memory for the host memory buffer descriptors The NVMe 1.3 specification says in section 5.21.1.13: "After a successful completion of a Set Features enabling the host memory buffer, the host shall not write to the associated host memory region, buffer size, or descriptor list until the host memory buffer has been disabled." While this doesn't state that the descriptor list must remain accessible to the device it certainly implies it must remaing readable by the device. So switch to a dma coherent allocation for the descriptor list just to be safe - it's not like the cost for it matters compared to the actual memory buffers. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn Fixes: 87ad72a59a38 ("nvme-pci: implement host memory buffer support") --- drivers/nvme/host/pci.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 925467b31a333..ea892e732268f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -109,6 +109,7 @@ struct nvme_dev { /* host memory buffer support: */ u64 host_mem_size; u32 nr_host_mem_descs; + dma_addr_t host_mem_descs_dma; struct nvme_host_mem_buf_desc *host_mem_descs; void **host_mem_desc_bufs; }; @@ -1565,16 +1566,10 @@ static inline void nvme_release_cmb(struct nvme_dev *dev) static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits) { - size_t len = dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs); + u64 dma_addr = dev->host_mem_descs_dma; struct nvme_command c; - u64 dma_addr; int ret; - dma_addr = dma_map_single(dev->dev, dev->host_mem_descs, len, - DMA_TO_DEVICE); - if (dma_mapping_error(dev->dev, dma_addr)) - return -ENOMEM; - memset(&c, 0, sizeof(c)); c.features.opcode = nvme_admin_set_features; c.features.fid = cpu_to_le32(NVME_FEAT_HOST_MEM_BUF); @@ -1591,7 +1586,6 @@ static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits) "failed to set host mem (err %d, flags %#x).\n", ret, bits); } - dma_unmap_single(dev->dev, dma_addr, len, DMA_TO_DEVICE); return ret; } @@ -1609,7 +1603,9 @@ static void nvme_free_host_mem(struct nvme_dev *dev) kfree(dev->host_mem_desc_bufs); dev->host_mem_desc_bufs = NULL; - kfree(dev->host_mem_descs); + dma_free_coherent(dev->dev, + dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs), + dev->host_mem_descs, dev->host_mem_descs_dma); dev->host_mem_descs = NULL; } @@ -1617,6 +1613,7 @@ static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred) { struct nvme_host_mem_buf_desc *descs; u32 chunk_size, max_entries, len; + dma_addr_t descs_dma; int i = 0; void **bufs; u64 size = 0, tmp; @@ -1627,7 +1624,8 @@ static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred) tmp = (preferred + chunk_size - 1); do_div(tmp, chunk_size); max_entries = tmp; - descs = kcalloc(max_entries, sizeof(*descs), GFP_KERNEL); + descs = dma_zalloc_coherent(dev->dev, max_entries * sizeof(*descs), + &descs_dma, GFP_KERNEL); if (!descs) goto out; @@ -1661,6 +1659,7 @@ static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred) dev->nr_host_mem_descs = i; dev->host_mem_size = size; dev->host_mem_descs = descs; + dev->host_mem_descs_dma = descs_dma; dev->host_mem_desc_bufs = bufs; return 0; @@ -1674,7 +1673,8 @@ static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred) kfree(bufs); out_free_descs: - kfree(descs); + dma_free_coherent(dev->dev, max_entries * sizeof(*descs), descs, + descs_dma); out: /* try a smaller chunk size if we failed early */ if (chunk_size >= PAGE_SIZE * 2 && (i == 0 || size < min)) { From 223694b9ae8bfba99f3528d49d07a740af6ff95a Mon Sep 17 00:00:00 2001 From: Changpeng Liu Date: Thu, 31 Aug 2017 11:22:49 +0800 Subject: [PATCH 3/3] nvme: fix the definition of the doorbell buffer config support bit NVMe 1.3 specification defines the Optional Admin Command Support feature flags, bit 8 set to '1' then the controller supports the Doorbell Buffer Config command. Bit 7 is used for Virtualization Mangement command. Signed-off-by: Changpeng Liu Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Fixes: f9f38e33 ("nvme: improve performance for virtual NVMe devices") Cc: stable@vger.kernel.org --- include/linux/nvme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 25d8225dbd046..8efff888bd9bc 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -254,7 +254,7 @@ enum { NVME_CTRL_VWC_PRESENT = 1 << 0, NVME_CTRL_OACS_SEC_SUPP = 1 << 0, NVME_CTRL_OACS_DIRECTIVES = 1 << 5, - NVME_CTRL_OACS_DBBUF_SUPP = 1 << 7, + NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8, }; struct nvme_lbaf {