Skip to content

Commit

Permalink
Merge tag 'for-6.1/passthrough-2022-10-04' of git://git.kernel.dk/linux
Browse files Browse the repository at this point in the history
Pull passthrough updates from Jens Axboe:
 "With these changes, passthrough NVMe support over io_uring now
  performs at the same level as block device O_DIRECT, and in many cases
  6-8% better.

  This contains:

   - Add support for fixed buffers for passthrough (Anuj, Kanchan)

   - Enable batched allocations and freeing on passthrough, similarly to
     what we support on the normal storage path (me)

   - Fix from Geert fixing an issue with !CONFIG_IO_URING"

* tag 'for-6.1/passthrough-2022-10-04' of git://git.kernel.dk/linux:
  io_uring: Add missing inline to io_uring_cmd_import_fixed() dummy
  nvme: wire up fixed buffer support for nvme passthrough
  nvme: pass ubuffer as an integer
  block: extend functionality to map bvec iterator
  block: factor out blk_rq_map_bio_alloc helper
  block: rename bio_map_put to blk_mq_map_bio_put
  nvme: refactor nvme_alloc_request
  nvme: refactor nvme_add_user_metadata
  nvme: Use blk_rq_map_user_io helper
  scsi: Use blk_rq_map_user_io helper
  block: add blk_rq_map_user_io
  io_uring: introduce fixed buffer support for io_uring_cmd
  io_uring: add io_uring_cmd_import_fixed
  nvme: enable batched completions of passthrough IO
  nvme: split out metadata vs non metadata end_io uring_cmd completions
  block: allow end_io based requests in the completion batch handling
  block: change request end_io handler to pass back a return value
  block: enable batched allocation for blk_mq_alloc_request()
  block: kill deprecated BUG_ON() in the flush handling
  • Loading branch information
Linus Torvalds committed Oct 7, 2022
2 parents 5133898 + 0e0abad commit 7c989b1
Show file tree
Hide file tree
Showing 18 changed files with 476 additions and 181 deletions.
11 changes: 7 additions & 4 deletions block/blk-flush.c
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,6 @@ static void blk_flush_complete_seq(struct request *rq,
* flush data request completion path. Restore @rq for
* normal completion and end it.
*/
BUG_ON(!list_empty(&rq->queuelist));
list_del_init(&rq->flush.list);
blk_flush_restore_request(rq);
blk_mq_end_request(rq, error);
Expand All @@ -218,7 +217,8 @@ static void blk_flush_complete_seq(struct request *rq,
blk_kick_flush(q, fq, cmd_flags);
}

static void flush_end_io(struct request *flush_rq, blk_status_t error)
static enum rq_end_io_ret flush_end_io(struct request *flush_rq,
blk_status_t error)
{
struct request_queue *q = flush_rq->q;
struct list_head *running;
Expand All @@ -232,7 +232,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
if (!req_ref_put_and_test(flush_rq)) {
fq->rq_status = error;
spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
return;
return RQ_END_IO_NONE;
}

blk_account_io_flush(flush_rq);
Expand Down Expand Up @@ -269,6 +269,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
}

spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
return RQ_END_IO_NONE;
}

bool is_flush_rq(struct request *rq)
Expand Down Expand Up @@ -354,7 +355,8 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
blk_flush_queue_rq(flush_rq, false);
}

static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
static enum rq_end_io_ret mq_flush_data_end_io(struct request *rq,
blk_status_t error)
{
struct request_queue *q = rq->q;
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
Expand All @@ -376,6 +378,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
spin_unlock_irqrestore(&fq->mq_flush_lock, flags);

blk_mq_sched_restart(hctx);
return RQ_END_IO_NONE;
}

/**
Expand Down
150 changes: 132 additions & 18 deletions block/blk-map.c
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
return ret;
}

static void bio_map_put(struct bio *bio)
static void blk_mq_map_bio_put(struct bio *bio)
{
if (bio->bi_opf & REQ_ALLOC_CACHE) {
bio_put(bio);
Expand All @@ -241,31 +241,42 @@ static void bio_map_put(struct bio *bio)
}
}

static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
gfp_t gfp_mask)
static struct bio *blk_rq_map_bio_alloc(struct request *rq,
unsigned int nr_vecs, gfp_t gfp_mask)
{
unsigned int max_sectors = queue_max_hw_sectors(rq->q);
unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS);
struct bio *bio;
int ret;
int j;

if (!iov_iter_count(iter))
return -EINVAL;

if (rq->cmd_flags & REQ_POLLED) {
blk_opf_t opf = rq->cmd_flags | REQ_ALLOC_CACHE;

bio = bio_alloc_bioset(NULL, nr_vecs, opf, gfp_mask,
&fs_bio_set);
if (!bio)
return -ENOMEM;
return NULL;
} else {
bio = bio_kmalloc(nr_vecs, gfp_mask);
if (!bio)
return -ENOMEM;
return NULL;
bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs, req_op(rq));
}
return bio;
}

static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
gfp_t gfp_mask)
{
unsigned int max_sectors = queue_max_hw_sectors(rq->q);
unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS);
struct bio *bio;
int ret;
int j;

if (!iov_iter_count(iter))
return -EINVAL;

bio = blk_rq_map_bio_alloc(rq, nr_vecs, gfp_mask);
if (bio == NULL)
return -ENOMEM;

while (iov_iter_count(iter)) {
struct page **pages, *stack_pages[UIO_FASTIOV];
Expand Down Expand Up @@ -331,7 +342,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,

out_unmap:
bio_release_pages(bio, false);
bio_map_put(bio);
blk_mq_map_bio_put(bio);
return ret;
}

Expand Down Expand Up @@ -537,6 +548,62 @@ int blk_rq_append_bio(struct request *rq, struct bio *bio)
}
EXPORT_SYMBOL(blk_rq_append_bio);

/* Prepare bio for passthrough IO given ITER_BVEC iter */
static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter)
{
struct request_queue *q = rq->q;
size_t nr_iter = iov_iter_count(iter);
size_t nr_segs = iter->nr_segs;
struct bio_vec *bvecs, *bvprvp = NULL;
struct queue_limits *lim = &q->limits;
unsigned int nsegs = 0, bytes = 0;
struct bio *bio;
size_t i;

if (!nr_iter || (nr_iter >> SECTOR_SHIFT) > queue_max_hw_sectors(q))
return -EINVAL;
if (nr_segs > queue_max_segments(q))
return -EINVAL;

/* no iovecs to alloc, as we already have a BVEC iterator */
bio = blk_rq_map_bio_alloc(rq, 0, GFP_KERNEL);
if (bio == NULL)
return -ENOMEM;

bio_iov_bvec_set(bio, (struct iov_iter *)iter);
blk_rq_bio_prep(rq, bio, nr_segs);

/* loop to perform a bunch of sanity checks */
bvecs = (struct bio_vec *)iter->bvec;
for (i = 0; i < nr_segs; i++) {
struct bio_vec *bv = &bvecs[i];

/*
* If the queue doesn't support SG gaps and adding this
* offset would create a gap, fallback to copy.
*/
if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv->bv_offset)) {
blk_mq_map_bio_put(bio);
return -EREMOTEIO;
}
/* check full condition */
if (nsegs >= nr_segs || bytes > UINT_MAX - bv->bv_len)
goto put_bio;
if (bytes + bv->bv_len > nr_iter)
goto put_bio;
if (bv->bv_offset + bv->bv_len > PAGE_SIZE)
goto put_bio;

nsegs++;
bytes += bv->bv_len;
bvprvp = bv;
}
return 0;
put_bio:
blk_mq_map_bio_put(bio);
return -EINVAL;
}

/**
* blk_rq_map_user_iov - map user data to a request, for passthrough requests
* @q: request queue where request should be inserted
Expand All @@ -556,24 +623,35 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
struct rq_map_data *map_data,
const struct iov_iter *iter, gfp_t gfp_mask)
{
bool copy = false;
bool copy = false, map_bvec = false;
unsigned long align = q->dma_pad_mask | queue_dma_alignment(q);
struct bio *bio = NULL;
struct iov_iter i;
int ret = -EINVAL;

if (!iter_is_iovec(iter))
goto fail;

if (map_data)
copy = true;
else if (blk_queue_may_bounce(q))
copy = true;
else if (iov_iter_alignment(iter) & align)
copy = true;
else if (iov_iter_is_bvec(iter))
map_bvec = true;
else if (!iter_is_iovec(iter))
copy = true;
else if (queue_virt_boundary(q))
copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter);

if (map_bvec) {
ret = blk_rq_map_user_bvec(rq, iter);
if (!ret)
return 0;
if (ret != -EREMOTEIO)
goto fail;
/* fall back to copying the data on limits mismatches */
copy = true;
}

i = *iter;
do {
if (copy)
Expand Down Expand Up @@ -611,6 +689,42 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq,
}
EXPORT_SYMBOL(blk_rq_map_user);

int blk_rq_map_user_io(struct request *req, struct rq_map_data *map_data,
void __user *ubuf, unsigned long buf_len, gfp_t gfp_mask,
bool vec, int iov_count, bool check_iter_count, int rw)
{
int ret = 0;

if (vec) {
struct iovec fast_iov[UIO_FASTIOV];
struct iovec *iov = fast_iov;
struct iov_iter iter;

ret = import_iovec(rw, ubuf, iov_count ? iov_count : buf_len,
UIO_FASTIOV, &iov, &iter);
if (ret < 0)
return ret;

if (iov_count) {
/* SG_IO howto says that the shorter of the two wins */
iov_iter_truncate(&iter, buf_len);
if (check_iter_count && !iov_iter_count(&iter)) {
kfree(iov);
return -EINVAL;
}
}

ret = blk_rq_map_user_iov(req->q, req, map_data, &iter,
gfp_mask);
kfree(iov);
} else if (buf_len) {
ret = blk_rq_map_user(req->q, req, map_data, ubuf, buf_len,
gfp_mask);
}
return ret;
}
EXPORT_SYMBOL(blk_rq_map_user_io);

/**
* blk_rq_unmap_user - unmap a request with user data
* @bio: start of bio list
Expand All @@ -636,7 +750,7 @@ int blk_rq_unmap_user(struct bio *bio)

next_bio = bio;
bio = bio->bi_next;
bio_map_put(next_bio);
blk_mq_map_bio_put(next_bio);
}

return ret;
Expand Down
Loading

0 comments on commit 7c989b1

Please sign in to comment.