Skip to content

Commit

Permalink
Merge tag 'nvme-6.15-2025-04-02' of git://git.infradead.org/nvme into…
Browse files Browse the repository at this point in the history
… block-6.15

Pull final NVMe updates from Keith:

"nvme updates for Linux 6.15

 - PCI endpoint target cleanup (Damien)
 - Early import for uring_cmd fixed buffer (Caleb)
 - Multipath documentation and notification improvements (John)
 - Invalid pci sq doorbell write fix (Maurizio)"

* tag 'nvme-6.15-2025-04-02' of git://git.infradead.org/nvme:
  nvme-pci: skip nvme_write_sq_db on empty rqlist
  nvme-multipath: change the NVME_MULTIPATH config option
  nvme: update the multipath warning in nvme_init_ns_head
  nvme/ioctl: move fixed buffer lookup to nvme_uring_cmd_io()
  nvme/ioctl: move blk_mq_free_request() out of nvme_map_user_request()
  nvme/ioctl: don't warn on vectorized uring_cmd with fixed buffer
  nvmet: pci-epf: Keep completion queues mapped
  • Loading branch information
Jens Axboe committed Apr 2, 2025
2 parents e3e6831 + 288ff0d commit fb58555
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 74 deletions.
13 changes: 9 additions & 4 deletions drivers/nvme/host/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,15 @@ config NVME_MULTIPATH
bool "NVMe multipath support"
depends on NVME_CORE
help
This option enables support for multipath access to NVMe
subsystems. If this option is enabled only a single
/dev/nvmeXnY device will show up for each NVMe namespace,
even if it is accessible through multiple controllers.
This option controls support for multipath access to NVMe
subsystems. If this option is enabled support for NVMe multipath
access is included in the kernel. If this option is disabled support
for NVMe multipath access is excluded from the kernel. When this
option is disabled each controller/namespace receives its
own /dev/nvmeXnY device entry and NVMe multipath access is
not supported.

If unsure, say Y.

config NVME_VERBOSE_ERRORS
bool "NVMe verbose error reporting"
Expand Down
2 changes: 1 addition & 1 deletion drivers/nvme/host/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -3822,7 +3822,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
"Found shared namespace %d, but multipathing not supported.\n",
info->nsid);
dev_warn_once(ctrl->device,
"Support for shared namespaces without CONFIG_NVME_MULTIPATH is deprecated and will be removed in Linux 6.0.\n");
"Shared namespace support requires core_nvme.multipath=Y.\n");
}
}

Expand Down
68 changes: 37 additions & 31 deletions drivers/nvme/host/ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,7 @@ static struct request *nvme_alloc_user_request(struct request_queue *q,

static int nvme_map_user_request(struct request *req, u64 ubuffer,
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
struct io_uring_cmd *ioucmd, unsigned int flags,
unsigned int iou_issue_flags)
struct iov_iter *iter, unsigned int flags)
{
struct request_queue *q = req->q;
struct nvme_ns *ns = q->queuedata;
Expand All @@ -129,37 +128,23 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
if (!nvme_ctrl_sgl_supported(ctrl))
dev_warn_once(ctrl->device, "using unchecked data buffer\n");
if (has_metadata) {
if (!supports_metadata) {
ret = -EINVAL;
goto out;
}
if (!supports_metadata)
return -EINVAL;

if (!nvme_ctrl_meta_sgl_supported(ctrl))
dev_warn_once(ctrl->device,
"using unchecked metadata buffer\n");
}

if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) {
struct iov_iter iter;

/* fixedbufs is only for non-vectored io */
if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) {
ret = -EINVAL;
goto out;
}
ret = io_uring_cmd_import_fixed(ubuffer, bufflen,
rq_data_dir(req), &iter, ioucmd,
iou_issue_flags);
if (ret < 0)
goto out;
ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL);
} else {
if (iter)
ret = blk_rq_map_user_iov(q, req, NULL, iter, GFP_KERNEL);
else
ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer),
bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0,
0, rq_data_dir(req));
}

if (ret)
goto out;
return ret;

bio = req->bio;
if (bdev)
Expand All @@ -176,8 +161,6 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
out_unmap:
if (bio)
blk_rq_unmap_user(bio);
out:
blk_mq_free_request(req);
return ret;
}

Expand All @@ -200,9 +183,9 @@ static int nvme_submit_user_cmd(struct request_queue *q,
req->timeout = timeout;
if (ubuffer && bufflen) {
ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer,
meta_len, NULL, flags, 0);
meta_len, NULL, flags);
if (ret)
return ret;
goto out_free_req;
}

bio = req->bio;
Expand All @@ -218,7 +201,10 @@ static int nvme_submit_user_cmd(struct request_queue *q,

if (effects)
nvme_passthru_end(ctrl, ns, effects, cmd, ret);
return ret;

out_free_req:
blk_mq_free_request(req);
return ret;
}

Expand Down Expand Up @@ -469,6 +455,8 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct request_queue *q = ns ? ns->queue : ctrl->admin_q;
struct nvme_uring_data d;
struct nvme_command c;
struct iov_iter iter;
struct iov_iter *map_iter = NULL;
struct request *req;
blk_opf_t rq_flags = REQ_ALLOC_CACHE;
blk_mq_req_flags_t blk_flags = 0;
Expand Down Expand Up @@ -504,6 +492,20 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
d.metadata_len = READ_ONCE(cmd->metadata_len);
d.timeout_ms = READ_ONCE(cmd->timeout_ms);

if (d.data_len && (ioucmd->flags & IORING_URING_CMD_FIXED)) {
/* fixedbufs is only for non-vectored io */
if (vec)
return -EINVAL;

ret = io_uring_cmd_import_fixed(d.addr, d.data_len,
nvme_is_write(&c) ? WRITE : READ, &iter, ioucmd,
issue_flags);
if (ret < 0)
return ret;

map_iter = &iter;
}

if (issue_flags & IO_URING_F_NONBLOCK) {
rq_flags |= REQ_NOWAIT;
blk_flags = BLK_MQ_REQ_NOWAIT;
Expand All @@ -517,11 +519,11 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0;

if (d.data_len) {
ret = nvme_map_user_request(req, d.addr,
d.data_len, nvme_to_user_ptr(d.metadata),
d.metadata_len, ioucmd, vec, issue_flags);
ret = nvme_map_user_request(req, d.addr, d.data_len,
nvme_to_user_ptr(d.metadata), d.metadata_len,
map_iter, vec);
if (ret)
return ret;
goto out_free_req;
}

/* to free bio on completion, as req->bio will be null at that time */
Expand All @@ -531,6 +533,10 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
req->end_io = nvme_uring_cmd_end_io;
blk_execute_rq_nowait(req, false);
return -EIOCBQUEUED;

out_free_req:
blk_mq_free_request(req);
return ret;
}

static bool is_ctrl_ioctl(unsigned int cmd)
Expand Down
3 changes: 3 additions & 0 deletions drivers/nvme/host/pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -986,6 +986,9 @@ static void nvme_submit_cmds(struct nvme_queue *nvmeq, struct rq_list *rqlist)
{
struct request *req;

if (rq_list_empty(rqlist))
return;

spin_lock(&nvmeq->sq_lock);
while ((req = rq_list_pop(rqlist))) {
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
Expand Down
63 changes: 25 additions & 38 deletions drivers/nvme/target/pci-epf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1264,6 +1264,7 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl,
struct nvmet_pci_epf_ctrl *ctrl = tctrl->drvdata;
struct nvmet_pci_epf_queue *cq = &ctrl->cq[cqid];
u16 status;
int ret;

if (test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags))
return NVME_SC_QID_INVALID | NVME_STATUS_DNR;
Expand Down Expand Up @@ -1298,13 +1299,35 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl,
if (status != NVME_SC_SUCCESS)
goto err;

/*
* Map the CQ PCI address space and since PCI endpoint controllers may
* return a partial mapping, check that the mapping is large enough.
*/
ret = nvmet_pci_epf_mem_map(ctrl->nvme_epf, cq->pci_addr, cq->pci_size,
&cq->pci_map);
if (ret) {
dev_err(ctrl->dev, "Failed to map CQ %u (err=%d)\n",
cq->qid, ret);
goto err_internal;
}

if (cq->pci_map.pci_size < cq->pci_size) {
dev_err(ctrl->dev, "Invalid partial mapping of queue %u\n",
cq->qid);
goto err_unmap_queue;
}

set_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags);

dev_dbg(ctrl->dev, "CQ[%u]: %u entries of %zu B, IRQ vector %u\n",
cqid, qsize, cq->qes, cq->vector);

return NVME_SC_SUCCESS;

err_unmap_queue:
nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &cq->pci_map);
err_internal:
status = NVME_SC_INTERNAL | NVME_STATUS_DNR;
err:
if (test_and_clear_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags))
nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector);
Expand All @@ -1322,6 +1345,7 @@ static u16 nvmet_pci_epf_delete_cq(struct nvmet_ctrl *tctrl, u16 cqid)
cancel_delayed_work_sync(&cq->work);
nvmet_pci_epf_drain_queue(cq);
nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector);
nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &cq->pci_map);

return NVME_SC_SUCCESS;
}
Expand Down Expand Up @@ -1553,36 +1577,6 @@ static void nvmet_pci_epf_free_queues(struct nvmet_pci_epf_ctrl *ctrl)
ctrl->cq = NULL;
}

static int nvmet_pci_epf_map_queue(struct nvmet_pci_epf_ctrl *ctrl,
struct nvmet_pci_epf_queue *queue)
{
struct nvmet_pci_epf *nvme_epf = ctrl->nvme_epf;
int ret;

ret = nvmet_pci_epf_mem_map(nvme_epf, queue->pci_addr,
queue->pci_size, &queue->pci_map);
if (ret) {
dev_err(ctrl->dev, "Failed to map queue %u (err=%d)\n",
queue->qid, ret);
return ret;
}

if (queue->pci_map.pci_size < queue->pci_size) {
dev_err(ctrl->dev, "Invalid partial mapping of queue %u\n",
queue->qid);
nvmet_pci_epf_mem_unmap(nvme_epf, &queue->pci_map);
return -ENOMEM;
}

return 0;
}

static inline void nvmet_pci_epf_unmap_queue(struct nvmet_pci_epf_ctrl *ctrl,
struct nvmet_pci_epf_queue *queue)
{
nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &queue->pci_map);
}

static void nvmet_pci_epf_exec_iod_work(struct work_struct *work)
{
struct nvmet_pci_epf_iod *iod =
Expand Down Expand Up @@ -1746,11 +1740,7 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work)
struct nvme_completion *cqe;
struct nvmet_pci_epf_iod *iod;
unsigned long flags;
int ret, n = 0;

ret = nvmet_pci_epf_map_queue(ctrl, cq);
if (ret)
goto again;
int ret = 0, n = 0;

while (test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags) && ctrl->link_up) {

Expand Down Expand Up @@ -1797,8 +1787,6 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work)
n++;
}

nvmet_pci_epf_unmap_queue(ctrl, cq);

/*
* We do not support precise IRQ coalescing time (100ns units as per
* NVMe specifications). So if we have posted completion entries without
Expand All @@ -1807,7 +1795,6 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work)
if (n)
nvmet_pci_epf_raise_irq(ctrl, cq, true);

again:
if (ret < 0)
queue_delayed_work(system_highpri_wq, &cq->work,
NVMET_PCI_EPF_CQ_RETRY_INTERVAL);
Expand Down

0 comments on commit fb58555

Please sign in to comment.