From 78c271344b6f64ce24c845e54903e09928cf2061 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 5 May 2025 22:18:03 +0800 Subject: [PATCH] block: move wbt_enable_default() out of queue freezing from sched ->exit() scheduler's ->exit() is called with queue frozen and elevator lock is held, and wbt_enable_default() can't be called with queue frozen, otherwise the following lockdep warning is triggered: #6 (&q->rq_qos_mutex){+.+.}-{4:4}: #5 (&eq->sysfs_lock){+.+.}-{4:4}: #4 (&q->elevator_lock){+.+.}-{4:4}: #3 (&q->q_usage_counter(io)#3){++++}-{0:0}: #2 (fs_reclaim){+.+.}-{0:0}: #1 (&sb->s_type->i_mutex_key#3){+.+.}-{4:4}: #0 (&q->debugfs_mutex){+.+.}-{4:4}: Fix the issue by moving wbt_enable_default() out of bfq's exit(), and call it from elevator_change_done(). Meantime add disk->rqos_state_mutex for covering wbt state change, which matches the purpose more than ->elevator_lock. Reviewed-by: Hannes Reinecke Reviewed-by: Nilay Shroff Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250505141805.2751237-26-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 2 +- block/blk-sysfs.c | 10 ++++------ block/blk-wbt.c | 6 ++++++ block/elevator.c | 5 +++++ block/elevator.h | 1 + block/genhd.c | 1 + include/linux/blkdev.h | 2 ++ 7 files changed, 20 insertions(+), 7 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index cc6f59836dcd..0cb1e9873aab 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -7211,7 +7211,7 @@ static void bfq_exit_queue(struct elevator_queue *e) blk_stat_disable_accounting(bfqd->queue); blk_queue_flag_clear(QUEUE_FLAG_DISABLE_WBT_DEF, bfqd->queue); - wbt_enable_default(bfqd->queue->disk); + set_bit(ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT, &e->flags); kfree(bfqd); } diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 741e607dfab6..01e0ead13278 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -560,7 +560,7 @@ static ssize_t queue_wb_lat_show(struct gendisk *disk, char *page) ssize_t ret; struct request_queue *q = disk->queue; - mutex_lock(&q->elevator_lock); + mutex_lock(&disk->rqos_state_mutex); if (!wbt_rq_qos(q)) { ret = -EINVAL; goto out; @@ -573,7 +573,7 @@ static ssize_t queue_wb_lat_show(struct gendisk *disk, char *page) ret = sysfs_emit(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000)); out: - mutex_unlock(&q->elevator_lock); + mutex_unlock(&disk->rqos_state_mutex); return ret; } @@ -593,7 +593,6 @@ static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page, return -EINVAL; memflags = blk_mq_freeze_queue(q); - mutex_lock(&q->elevator_lock); rqos = wbt_rq_qos(q); if (!rqos) { @@ -618,11 +617,12 @@ static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page, */ blk_mq_quiesce_queue(q); + mutex_lock(&disk->rqos_state_mutex); wbt_set_min_lat(q, val); + mutex_unlock(&disk->rqos_state_mutex); blk_mq_unquiesce_queue(q); out: - mutex_unlock(&q->elevator_lock); blk_mq_unfreeze_queue(q, memflags); return ret; @@ -871,9 +871,7 @@ int blk_register_queue(struct gendisk *disk) if (queue_is_mq(q)) elevator_set_default(q); - mutex_lock(&q->elevator_lock); wbt_enable_default(disk); - mutex_unlock(&q->elevator_lock); blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q); diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 29cd2e33666f..74ae7131ada9 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -704,6 +704,8 @@ void wbt_enable_default(struct gendisk *disk) struct rq_qos *rqos; bool enable = IS_ENABLED(CONFIG_BLK_WBT_MQ); + mutex_lock(&disk->rqos_state_mutex); + if (blk_queue_disable_wbt(q)) enable = false; @@ -712,8 +714,10 @@ void wbt_enable_default(struct gendisk *disk) if (rqos) { if (enable && RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT) RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT; + mutex_unlock(&disk->rqos_state_mutex); return; } + mutex_unlock(&disk->rqos_state_mutex); /* Queue not registered? Maybe shutting down... */ if (!blk_queue_registered(q)) @@ -773,11 +777,13 @@ void wbt_disable_default(struct gendisk *disk) struct rq_wb *rwb; if (!rqos) return; + mutex_lock(&disk->rqos_state_mutex); rwb = RQWB(rqos); if (rwb->enable_state == WBT_STATE_ON_DEFAULT) { blk_stat_deactivate(rwb->cb); rwb->enable_state = WBT_STATE_OFF_DEFAULT; } + mutex_unlock(&disk->rqos_state_mutex); } EXPORT_SYMBOL_GPL(wbt_disable_default); diff --git a/block/elevator.c b/block/elevator.c index 8578b969e173..f8d72bd20610 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -637,8 +637,13 @@ static int elevator_change_done(struct request_queue *q, int ret = 0; if (ctx->old) { + bool enable_wbt = test_bit(ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT, + &ctx->old->flags); + elv_unregister_queue(q, ctx->old); kobject_put(&ctx->old->kobj); + if (enable_wbt) + wbt_enable_default(q->disk); } if (ctx->new) { ret = elv_register_queue(q, ctx->new, !ctx->no_uevent); diff --git a/block/elevator.h b/block/elevator.h index 76a90a1b7ed6..a07ce773a38f 100644 --- a/block/elevator.h +++ b/block/elevator.h @@ -122,6 +122,7 @@ struct elevator_queue #define ELEVATOR_FLAG_REGISTERED 0 #define ELEVATOR_FLAG_DYING 1 +#define ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT 2 /* * block elevator interface diff --git a/block/genhd.c b/block/genhd.c index a8cb5607b6e3..9c7c657380db 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1470,6 +1470,7 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, #ifdef CONFIG_BLOCK_HOLDER_DEPRECATED INIT_LIST_HEAD(&disk->slave_bdevs); #endif + mutex_init(&disk->rqos_state_mutex); return disk; out_erase_part0: diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3aa1fd637d57..94323f303b37 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -218,6 +218,8 @@ struct gendisk { * devices that do not have multiple independent access ranges. */ struct blk_independent_access_ranges *ia_ranges; + + struct mutex rqos_state_mutex; /* rqos state change mutex */ }; /**