Skip to content

Commit

Permalink
Merge tag 'block-6.15-20250403' of git://git.kernel.dk/linux
Browse files Browse the repository at this point in the history
Pull more block updates from Jens Axboe:

 - NVMe pull request via Keith:
      - PCI endpoint target cleanup (Damien)
      - Early import for uring_cmd fixed buffer (Caleb)
      - Multipath documentation and notification improvements (John)
      - Invalid pci sq doorbell write fix (Maurizio)

 - Queue init locking fix

 - Remove dead nsegs parameter from blk_mq_get_new_requests()

* tag 'block-6.15-20250403' of git://git.kernel.dk/linux:
  block: don't grab elevator lock during queue initialization
  nvme-pci: skip nvme_write_sq_db on empty rqlist
  nvme-multipath: change the NVME_MULTIPATH config option
  nvme: update the multipath warning in nvme_init_ns_head
  nvme/ioctl: move fixed buffer lookup to nvme_uring_cmd_io()
  nvme/ioctl: move blk_mq_free_request() out of nvme_map_user_request()
  nvme/ioctl: don't warn on vectorized uring_cmd with fixed buffer
  nvmet: pci-epf: Keep completion queues mapped
  block: remove unused nseg parameter
  • Loading branch information
Linus Torvalds committed Apr 3, 2025
2 parents 7930edc + 01b91bf commit 949dd32
Showing 6 changed files with 94 additions and 84 deletions.
29 changes: 19 additions & 10 deletions block/blk-mq.c
Original file line number Diff line number Diff line change
@@ -2965,8 +2965,7 @@ static bool blk_mq_attempt_bio_merge(struct request_queue *q,

static struct request *blk_mq_get_new_requests(struct request_queue *q,
struct blk_plug *plug,
struct bio *bio,
unsigned int nsegs)
struct bio *bio)
{
struct blk_mq_alloc_data data = {
.q = q,
@@ -3125,7 +3124,7 @@ void blk_mq_submit_bio(struct bio *bio)
if (rq) {
blk_mq_use_cached_rq(rq, plug, bio);
} else {
rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
rq = blk_mq_get_new_requests(q, plug, bio);
if (unlikely(!rq)) {
if (bio->bi_opf & REQ_NOWAIT)
bio_wouldblock_error(bio);
@@ -4465,14 +4464,12 @@ static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
return NULL;
}

static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
struct request_queue *q)
static void __blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
unsigned long i, j;

/* protect against switching io scheduler */
mutex_lock(&q->elevator_lock);
for (i = 0; i < set->nr_hw_queues; i++) {
int old_node;
int node = blk_mq_get_hctx_node(set, i);
@@ -4505,7 +4502,19 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,

xa_for_each_start(&q->hctx_table, j, hctx, j)
blk_mq_exit_hctx(q, set, hctx, j);
mutex_unlock(&q->elevator_lock);
}

static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
struct request_queue *q, bool lock)
{
if (lock) {
/* protect against switching io scheduler */
mutex_lock(&q->elevator_lock);
__blk_mq_realloc_hw_ctxs(set, q);
mutex_unlock(&q->elevator_lock);
} else {
__blk_mq_realloc_hw_ctxs(set, q);
}

/* unregister cpuhp callbacks for exited hctxs */
blk_mq_remove_hw_queues_cpuhp(q);
@@ -4537,7 +4546,7 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,

xa_init(&q->hctx_table);

blk_mq_realloc_hw_ctxs(set, q);
blk_mq_realloc_hw_ctxs(set, q, false);
if (!q->nr_hw_queues)
goto err_hctxs;

@@ -5033,7 +5042,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
fallback:
blk_mq_update_queue_map(set);
list_for_each_entry(q, &set->tag_list, tag_set_list) {
blk_mq_realloc_hw_ctxs(set, q);
blk_mq_realloc_hw_ctxs(set, q, true);

if (q->nr_hw_queues != set->nr_hw_queues) {
int i = prev_nr_hw_queues;
13 changes: 9 additions & 4 deletions drivers/nvme/host/Kconfig
Original file line number Diff line number Diff line change
@@ -18,10 +18,15 @@ config NVME_MULTIPATH
bool "NVMe multipath support"
depends on NVME_CORE
help
This option enables support for multipath access to NVMe
subsystems. If this option is enabled only a single
/dev/nvmeXnY device will show up for each NVMe namespace,
even if it is accessible through multiple controllers.
This option controls support for multipath access to NVMe
subsystems. If this option is enabled support for NVMe multipath
access is included in the kernel. If this option is disabled support
for NVMe multipath access is excluded from the kernel. When this
option is disabled each controller/namespace receives its
own /dev/nvmeXnY device entry and NVMe multipath access is
not supported.

If unsure, say Y.

config NVME_VERBOSE_ERRORS
bool "NVMe verbose error reporting"
2 changes: 1 addition & 1 deletion drivers/nvme/host/core.c
Original file line number Diff line number Diff line change
@@ -3822,7 +3822,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
"Found shared namespace %d, but multipathing not supported.\n",
info->nsid);
dev_warn_once(ctrl->device,
"Support for shared namespaces without CONFIG_NVME_MULTIPATH is deprecated and will be removed in Linux 6.0.\n");
"Shared namespace support requires core_nvme.multipath=Y.\n");
}
}

68 changes: 37 additions & 31 deletions drivers/nvme/host/ioctl.c
Original file line number Diff line number Diff line change
@@ -114,8 +114,7 @@ static struct request *nvme_alloc_user_request(struct request_queue *q,

static int nvme_map_user_request(struct request *req, u64 ubuffer,
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
struct io_uring_cmd *ioucmd, unsigned int flags,
unsigned int iou_issue_flags)
struct iov_iter *iter, unsigned int flags)
{
struct request_queue *q = req->q;
struct nvme_ns *ns = q->queuedata;
@@ -129,37 +128,23 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
if (!nvme_ctrl_sgl_supported(ctrl))
dev_warn_once(ctrl->device, "using unchecked data buffer\n");
if (has_metadata) {
if (!supports_metadata) {
ret = -EINVAL;
goto out;
}
if (!supports_metadata)
return -EINVAL;

if (!nvme_ctrl_meta_sgl_supported(ctrl))
dev_warn_once(ctrl->device,
"using unchecked metadata buffer\n");
}

if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) {
struct iov_iter iter;

/* fixedbufs is only for non-vectored io */
if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) {
ret = -EINVAL;
goto out;
}
ret = io_uring_cmd_import_fixed(ubuffer, bufflen,
rq_data_dir(req), &iter, ioucmd,
iou_issue_flags);
if (ret < 0)
goto out;
ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL);
} else {
if (iter)
ret = blk_rq_map_user_iov(q, req, NULL, iter, GFP_KERNEL);
else
ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer),
bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0,
0, rq_data_dir(req));
}

if (ret)
goto out;
return ret;

bio = req->bio;
if (bdev)
@@ -176,8 +161,6 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
out_unmap:
if (bio)
blk_rq_unmap_user(bio);
out:
blk_mq_free_request(req);
return ret;
}

@@ -200,9 +183,9 @@ static int nvme_submit_user_cmd(struct request_queue *q,
req->timeout = timeout;
if (ubuffer && bufflen) {
ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer,
meta_len, NULL, flags, 0);
meta_len, NULL, flags);
if (ret)
return ret;
goto out_free_req;
}

bio = req->bio;
@@ -218,7 +201,10 @@ static int nvme_submit_user_cmd(struct request_queue *q,

if (effects)
nvme_passthru_end(ctrl, ns, effects, cmd, ret);
return ret;

out_free_req:
blk_mq_free_request(req);
return ret;
}

@@ -469,6 +455,8 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct request_queue *q = ns ? ns->queue : ctrl->admin_q;
struct nvme_uring_data d;
struct nvme_command c;
struct iov_iter iter;
struct iov_iter *map_iter = NULL;
struct request *req;
blk_opf_t rq_flags = REQ_ALLOC_CACHE;
blk_mq_req_flags_t blk_flags = 0;
@@ -504,6 +492,20 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
d.metadata_len = READ_ONCE(cmd->metadata_len);
d.timeout_ms = READ_ONCE(cmd->timeout_ms);

if (d.data_len && (ioucmd->flags & IORING_URING_CMD_FIXED)) {
/* fixedbufs is only for non-vectored io */
if (vec)
return -EINVAL;

ret = io_uring_cmd_import_fixed(d.addr, d.data_len,
nvme_is_write(&c) ? WRITE : READ, &iter, ioucmd,
issue_flags);
if (ret < 0)
return ret;

map_iter = &iter;
}

if (issue_flags & IO_URING_F_NONBLOCK) {
rq_flags |= REQ_NOWAIT;
blk_flags = BLK_MQ_REQ_NOWAIT;
@@ -517,11 +519,11 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0;

if (d.data_len) {
ret = nvme_map_user_request(req, d.addr,
d.data_len, nvme_to_user_ptr(d.metadata),
d.metadata_len, ioucmd, vec, issue_flags);
ret = nvme_map_user_request(req, d.addr, d.data_len,
nvme_to_user_ptr(d.metadata), d.metadata_len,
map_iter, vec);
if (ret)
return ret;
goto out_free_req;
}

/* to free bio on completion, as req->bio will be null at that time */
@@ -531,6 +533,10 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
req->end_io = nvme_uring_cmd_end_io;
blk_execute_rq_nowait(req, false);
return -EIOCBQUEUED;

out_free_req:
blk_mq_free_request(req);
return ret;
}

static bool is_ctrl_ioctl(unsigned int cmd)
3 changes: 3 additions & 0 deletions drivers/nvme/host/pci.c
Original file line number Diff line number Diff line change
@@ -986,6 +986,9 @@ static void nvme_submit_cmds(struct nvme_queue *nvmeq, struct rq_list *rqlist)
{
struct request *req;

if (rq_list_empty(rqlist))
return;

spin_lock(&nvmeq->sq_lock);
while ((req = rq_list_pop(rqlist))) {
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
63 changes: 25 additions & 38 deletions drivers/nvme/target/pci-epf.c
Original file line number Diff line number Diff line change
@@ -1264,6 +1264,7 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl,
struct nvmet_pci_epf_ctrl *ctrl = tctrl->drvdata;
struct nvmet_pci_epf_queue *cq = &ctrl->cq[cqid];
u16 status;
int ret;

if (test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags))
return NVME_SC_QID_INVALID | NVME_STATUS_DNR;
@@ -1298,13 +1299,35 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl,
if (status != NVME_SC_SUCCESS)
goto err;

/*
* Map the CQ PCI address space and since PCI endpoint controllers may
* return a partial mapping, check that the mapping is large enough.
*/
ret = nvmet_pci_epf_mem_map(ctrl->nvme_epf, cq->pci_addr, cq->pci_size,
&cq->pci_map);
if (ret) {
dev_err(ctrl->dev, "Failed to map CQ %u (err=%d)\n",
cq->qid, ret);
goto err_internal;
}

if (cq->pci_map.pci_size < cq->pci_size) {
dev_err(ctrl->dev, "Invalid partial mapping of queue %u\n",
cq->qid);
goto err_unmap_queue;
}

set_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags);

dev_dbg(ctrl->dev, "CQ[%u]: %u entries of %zu B, IRQ vector %u\n",
cqid, qsize, cq->qes, cq->vector);

return NVME_SC_SUCCESS;

err_unmap_queue:
nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &cq->pci_map);
err_internal:
status = NVME_SC_INTERNAL | NVME_STATUS_DNR;
err:
if (test_and_clear_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags))
nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector);
@@ -1322,6 +1345,7 @@ static u16 nvmet_pci_epf_delete_cq(struct nvmet_ctrl *tctrl, u16 cqid)
cancel_delayed_work_sync(&cq->work);
nvmet_pci_epf_drain_queue(cq);
nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector);
nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &cq->pci_map);

return NVME_SC_SUCCESS;
}
@@ -1553,36 +1577,6 @@ static void nvmet_pci_epf_free_queues(struct nvmet_pci_epf_ctrl *ctrl)
ctrl->cq = NULL;
}

static int nvmet_pci_epf_map_queue(struct nvmet_pci_epf_ctrl *ctrl,
struct nvmet_pci_epf_queue *queue)
{
struct nvmet_pci_epf *nvme_epf = ctrl->nvme_epf;
int ret;

ret = nvmet_pci_epf_mem_map(nvme_epf, queue->pci_addr,
queue->pci_size, &queue->pci_map);
if (ret) {
dev_err(ctrl->dev, "Failed to map queue %u (err=%d)\n",
queue->qid, ret);
return ret;
}

if (queue->pci_map.pci_size < queue->pci_size) {
dev_err(ctrl->dev, "Invalid partial mapping of queue %u\n",
queue->qid);
nvmet_pci_epf_mem_unmap(nvme_epf, &queue->pci_map);
return -ENOMEM;
}

return 0;
}

static inline void nvmet_pci_epf_unmap_queue(struct nvmet_pci_epf_ctrl *ctrl,
struct nvmet_pci_epf_queue *queue)
{
nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &queue->pci_map);
}

static void nvmet_pci_epf_exec_iod_work(struct work_struct *work)
{
struct nvmet_pci_epf_iod *iod =
@@ -1746,11 +1740,7 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work)
struct nvme_completion *cqe;
struct nvmet_pci_epf_iod *iod;
unsigned long flags;
int ret, n = 0;

ret = nvmet_pci_epf_map_queue(ctrl, cq);
if (ret)
goto again;
int ret = 0, n = 0;

while (test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags) && ctrl->link_up) {

@@ -1797,8 +1787,6 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work)
n++;
}

nvmet_pci_epf_unmap_queue(ctrl, cq);

/*
* We do not support precise IRQ coalescing time (100ns units as per
* NVMe specifications). So if we have posted completion entries without
@@ -1807,7 +1795,6 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work)
if (n)
nvmet_pci_epf_raise_irq(ctrl, cq, true);

again:
if (ret < 0)
queue_delayed_work(system_highpri_wq, &cq->work,
NVMET_PCI_EPF_CQ_RETRY_INTERVAL);

0 comments on commit 949dd32

Please sign in to comment.