Skip to content

Commit

Permalink
blk-mq: always free hctx after request queue is freed
Browse files Browse the repository at this point in the history
In normal queue cleanup path, hctx is released after request queue
is freed, see blk_mq_release().

However, in __blk_mq_update_nr_hw_queues(), hctx may be freed because
of hw queues shrinking. This way is easy to cause use-after-free,
because: one implicit rule is that it is safe to call almost all block
layer APIs if the request queue is alive; and one hctx may be retrieved
by one API, then the hctx can be freed by blk_mq_update_nr_hw_queues();
finally use-after-free is triggered.

Fixes this issue by always freeing hctx after releasing request queue.
If some hctxs are removed in blk_mq_update_nr_hw_queues(), introduce
a per-queue list to hold them, then try to resuse these hctxs if numa
node is matched.

Cc: Dongli Zhang <dongli.zhang@oracle.com>
Cc: James Smart <james.smart@broadcom.com>
Cc: Bart Van Assche <bart.vanassche@wdc.com>
Cc: linux-scsi@vger.kernel.org,
Cc: Martin K . Petersen <martin.petersen@oracle.com>,
Cc: Christoph Hellwig <hch@lst.de>,
Cc: James E . J . Bottomley <jejb@linux.vnet.ibm.com>,
Reviewed-by: Hannes Reinecke <hare@suse.com>
Tested-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
  • Loading branch information
Ming Lei authored and Jens Axboe committed May 4, 2019
1 parent 7c6c5b7 commit 2f8f133
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 13 deletions.
46 changes: 33 additions & 13 deletions block/blk-mq.c
Original file line number Diff line number Diff line change
Expand Up @@ -2269,6 +2269,10 @@ static void blk_mq_exit_hctx(struct request_queue *q,
set->ops->exit_hctx(hctx, hctx_idx);

blk_mq_remove_cpuhp(hctx);

spin_lock(&q->unused_hctx_lock);
list_add(&hctx->hctx_list, &q->unused_hctx_list);
spin_unlock(&q->unused_hctx_lock);
}

static void blk_mq_exit_hw_queues(struct request_queue *q,
Expand Down Expand Up @@ -2351,6 +2355,8 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
hctx->queue = q;
hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED;

INIT_LIST_HEAD(&hctx->hctx_list);

/*
* Allocate space for all possible cpus to avoid allocation at
* runtime
Expand Down Expand Up @@ -2664,15 +2670,17 @@ static int blk_mq_alloc_ctxs(struct request_queue *q)
*/
void blk_mq_release(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
unsigned int i;
struct blk_mq_hw_ctx *hctx, *next;
int i;

cancel_delayed_work_sync(&q->requeue_work);

/* hctx kobj stays in hctx */
queue_for_each_hw_ctx(q, hctx, i) {
if (!hctx)
continue;
queue_for_each_hw_ctx(q, hctx, i)
WARN_ON_ONCE(hctx && list_empty(&hctx->hctx_list));

/* all hctx are in .unused_hctx_list now */
list_for_each_entry_safe(hctx, next, &q->unused_hctx_list, hctx_list) {
list_del_init(&hctx->hctx_list);
kobject_put(&hctx->kobj);
}

Expand Down Expand Up @@ -2739,9 +2747,22 @@ static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
struct blk_mq_tag_set *set, struct request_queue *q,
int hctx_idx, int node)
{
struct blk_mq_hw_ctx *hctx;
struct blk_mq_hw_ctx *hctx = NULL, *tmp;

hctx = blk_mq_alloc_hctx(q, set, node);
/* reuse dead hctx first */
spin_lock(&q->unused_hctx_lock);
list_for_each_entry(tmp, &q->unused_hctx_list, hctx_list) {
if (tmp->numa_node == node) {
hctx = tmp;
break;
}
}
if (hctx)
list_del_init(&hctx->hctx_list);
spin_unlock(&q->unused_hctx_lock);

if (!hctx)
hctx = blk_mq_alloc_hctx(q, set, node);
if (!hctx)
goto fail;

Expand Down Expand Up @@ -2779,10 +2800,8 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,

hctx = blk_mq_alloc_and_init_hctx(set, q, i, node);
if (hctx) {
if (hctxs[i]) {
if (hctxs[i])
blk_mq_exit_hctx(q, set, hctxs[i], i);
kobject_put(&hctxs[i]->kobj);
}
hctxs[i] = hctx;
} else {
if (hctxs[i])
Expand Down Expand Up @@ -2813,9 +2832,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
if (hctx->tags)
blk_mq_free_map_and_requests(set, j);
blk_mq_exit_hctx(q, set, hctx, j);
kobject_put(&hctx->kobj);
hctxs[j] = NULL;

}
}
mutex_unlock(&q->sysfs_lock);
Expand Down Expand Up @@ -2858,6 +2875,9 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
if (!q->queue_hw_ctx)
goto err_sys_init;

INIT_LIST_HEAD(&q->unused_hctx_list);
spin_lock_init(&q->unused_hctx_lock);

blk_mq_realloc_hw_ctxs(set, q);
if (!q->nr_hw_queues)
goto err_hctxs;
Expand Down
2 changes: 2 additions & 0 deletions include/linux/blk-mq.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ struct blk_mq_hw_ctx {
struct dentry *sched_debugfs_dir;
#endif

struct list_head hctx_list;

/* Must be the last member - see also blk_mq_hw_ctx_size(). */
struct srcu_struct srcu[0];
};
Expand Down
7 changes: 7 additions & 0 deletions include/linux/blkdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,13 @@ struct request_queue {

struct mutex sysfs_lock;

/*
* for reusing dead hctx instance in case of updating
* nr_hw_queues
*/
struct list_head unused_hctx_list;
spinlock_t unused_hctx_lock;

atomic_t mq_freeze_depth;

#if defined(CONFIG_BLK_DEV_BSG)
Expand Down

0 comments on commit 2f8f133

Please sign in to comment.