Skip to content

Commit

Permalink
block: lift setting the readahead size into the block layer
Browse files Browse the repository at this point in the history
Drivers shouldn't really mess with the readahead size, as that is a VM
concept.  Instead set it based on the optimal I/O size by lifting the
algorithm from the md driver when registering the disk.  Also set
bdi->io_pages there as well by applying the same scheme based on
max_sectors.  To ensure the limits work well for stacking drivers a
new helper is added to update the readahead limits from the block
limits, which is also called from disk_stack_limits.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Coly Li <colyli@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
  • Loading branch information
Christoph Hellwig authored and Jens Axboe committed Sep 24, 2020
1 parent 16ef510 commit c2e4cd5
Show file tree
Hide file tree
Showing 11 changed files with 24 additions and 68 deletions.
18 changes: 16 additions & 2 deletions block/blk-settings.c
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,19 @@ void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset)
}
EXPORT_SYMBOL(blk_queue_alignment_offset);

void blk_queue_update_readahead(struct request_queue *q)
{
/*
* For read-ahead of large files to be effective, we need to read ahead
* at least twice the optimal I/O size.
*/
q->backing_dev_info->ra_pages =
max(queue_io_opt(q) * 2 / PAGE_SIZE, VM_READAHEAD_PAGES);
q->backing_dev_info->io_pages =
queue_max_sectors(q) >> (PAGE_SHIFT - 9);
}
EXPORT_SYMBOL_GPL(blk_queue_update_readahead);

/**
* blk_limits_io_min - set minimum request size for a device
* @limits: the queue limits
Expand Down Expand Up @@ -450,6 +463,8 @@ EXPORT_SYMBOL(blk_limits_io_opt);
void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
{
blk_limits_io_opt(&q->limits, opt);
q->backing_dev_info->ra_pages =
max(queue_io_opt(q) * 2 / PAGE_SIZE, VM_READAHEAD_PAGES);
}
EXPORT_SYMBOL(blk_queue_io_opt);

Expand Down Expand Up @@ -631,8 +646,7 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
top, bottom);
}

t->backing_dev_info->io_pages =
t->limits.max_sectors >> (PAGE_SHIFT - 9);
blk_queue_update_readahead(disk->queue);
}
EXPORT_SYMBOL(disk_stack_limits);

Expand Down
2 changes: 2 additions & 0 deletions block/blk-sysfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -854,6 +854,8 @@ int blk_register_queue(struct gendisk *disk)
percpu_ref_switch_to_percpu(&q->q_usage_counter);
}

blk_queue_update_readahead(q);

ret = blk_trace_init_sysfs(dev);
if (ret)
return ret;
Expand Down
1 change: 0 additions & 1 deletion drivers/block/aoe/aoeblk.c
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,6 @@ aoeblk_gdalloc(void *vp)
WARN_ON(d->gd);
WARN_ON(d->flags & DEVFL_UP);
blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS);
q->backing_dev_info->ra_pages = SZ_2M / PAGE_SIZE;
blk_queue_io_opt(q, SZ_2M);
d->bufpool = mp;
d->blkq = gd->queue = q;
Expand Down
10 changes: 1 addition & 9 deletions drivers/block/drbd/drbd_nl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1362,15 +1362,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi

if (b) {
blk_stack_limits(&q->limits, &b->limits, 0);

if (q->backing_dev_info->ra_pages !=
b->backing_dev_info->ra_pages) {
drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
q->backing_dev_info->ra_pages,
b->backing_dev_info->ra_pages);
q->backing_dev_info->ra_pages =
b->backing_dev_info->ra_pages;
}
blk_queue_update_readahead(q);
}
fixup_discard_if_not_supported(q);
fixup_write_zeroes(device, q);
Expand Down
3 changes: 0 additions & 3 deletions drivers/md/bcache/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -1427,9 +1427,6 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
if (ret)
return ret;

dc->disk.disk->queue->backing_dev_info->ra_pages =
max(dc->disk.disk->queue->backing_dev_info->ra_pages,
q->backing_dev_info->ra_pages);
blk_queue_io_opt(dc->disk.disk->queue,
max(queue_io_opt(dc->disk.disk->queue), queue_io_opt(q)));

Expand Down
3 changes: 1 addition & 2 deletions drivers/md/dm-table.c
Original file line number Diff line number Diff line change
Expand Up @@ -1925,8 +1925,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
}
#endif

/* Allow reads to exceed readahead limits */
q->backing_dev_info->io_pages = limits->max_sectors >> (PAGE_SHIFT - 9);
blk_queue_update_readahead(q);
}

unsigned int dm_table_get_num_targets(struct dm_table *t)
Expand Down
16 changes: 0 additions & 16 deletions drivers/md/raid0.c
Original file line number Diff line number Diff line change
Expand Up @@ -410,22 +410,6 @@ static int raid0_run(struct mddev *mddev)
mdname(mddev),
(unsigned long long)mddev->array_sectors);

if (mddev->queue) {
/* calculate the max read-ahead size.
* For read-ahead of large files to be effective, we need to
* readahead at least twice a whole stripe. i.e. number of devices
* multiplied by chunk size times 2.
* If an individual device has an ra_pages greater than the
* chunk size, then we will not drive that device as hard as it
* wants. We consider this a configuration error: a larger
* chunksize should be used in that case.
*/
int stripe = mddev->raid_disks *
(mddev->chunk_sectors << 9) / PAGE_SIZE;
if (mddev->queue->backing_dev_info->ra_pages < 2* stripe)
mddev->queue->backing_dev_info->ra_pages = 2* stripe;
}

dump_zones(mddev);

ret = md_integrity_register(mddev);
Expand Down
24 changes: 1 addition & 23 deletions drivers/md/raid10.c
Original file line number Diff line number Diff line change
Expand Up @@ -3873,19 +3873,6 @@ static int raid10_run(struct mddev *mddev)
mddev->resync_max_sectors = size;
set_bit(MD_FAILFAST_SUPPORTED, &mddev->flags);

if (mddev->queue) {
int stripe = conf->geo.raid_disks *
((mddev->chunk_sectors << 9) / PAGE_SIZE);

/* Calculate max read-ahead size.
* We need to readahead at least twice a whole stripe....
* maybe...
*/
stripe /= conf->geo.near_copies;
if (mddev->queue->backing_dev_info->ra_pages < 2 * stripe)
mddev->queue->backing_dev_info->ra_pages = 2 * stripe;
}

if (md_integrity_register(mddev))
goto out_free_conf;

Expand Down Expand Up @@ -4723,17 +4710,8 @@ static void end_reshape(struct r10conf *conf)
conf->reshape_safe = MaxSector;
spin_unlock_irq(&conf->device_lock);

/* read-ahead size must cover two whole stripes, which is
* 2 * (datadisks) * chunksize where 'n' is the number of raid devices
*/
if (conf->mddev->queue) {
int stripe = conf->geo.raid_disks *
((conf->mddev->chunk_sectors << 9) / PAGE_SIZE);
stripe /= conf->geo.near_copies;
if (conf->mddev->queue->backing_dev_info->ra_pages < 2 * stripe)
conf->mddev->queue->backing_dev_info->ra_pages = 2 * stripe;
if (conf->mddev->queue)
raid10_set_io_opt(conf);
}
conf->fullsync = 0;
}

Expand Down
13 changes: 1 addition & 12 deletions drivers/md/raid5.c
Original file line number Diff line number Diff line change
Expand Up @@ -7522,8 +7522,6 @@ static int raid5_run(struct mddev *mddev)
int data_disks = conf->previous_raid_disks - conf->max_degraded;
int stripe = data_disks *
((mddev->chunk_sectors << 9) / PAGE_SIZE);
if (mddev->queue->backing_dev_info->ra_pages < 2 * stripe)
mddev->queue->backing_dev_info->ra_pages = 2 * stripe;

chunk_size = mddev->chunk_sectors << 9;
blk_queue_io_min(mddev->queue, chunk_size);
Expand Down Expand Up @@ -8111,17 +8109,8 @@ static void end_reshape(struct r5conf *conf)
spin_unlock_irq(&conf->device_lock);
wake_up(&conf->wait_for_overlap);

/* read-ahead size must cover two whole stripes, which is
* 2 * (datadisks) * chunksize where 'n' is the number of raid devices
*/
if (conf->mddev->queue) {
int data_disks = conf->raid_disks - conf->max_degraded;
int stripe = data_disks * ((conf->chunk_sectors << 9)
/ PAGE_SIZE);
if (conf->mddev->queue->backing_dev_info->ra_pages < 2 * stripe)
conf->mddev->queue->backing_dev_info->ra_pages = 2 * stripe;
if (conf->mddev->queue)
raid5_set_io_opt(conf);
}
}
}

Expand Down
1 change: 1 addition & 0 deletions drivers/nvme/host/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2147,6 +2147,7 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
nvme_update_disk_info(ns->head->disk, ns, id);
blk_stack_limits(&ns->head->disk->queue->limits,
&ns->queue->limits, 0);
blk_queue_update_readahead(ns->head->disk->queue);
nvme_update_bdev_size(ns->head->disk);
}
#endif
Expand Down
1 change: 1 addition & 0 deletions include/linux/blkdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -1140,6 +1140,7 @@ extern void blk_queue_max_zone_append_sectors(struct request_queue *q,
extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
extern void blk_queue_alignment_offset(struct request_queue *q,
unsigned int alignment);
void blk_queue_update_readahead(struct request_queue *q);
extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
Expand Down

0 comments on commit c2e4cd5

Please sign in to comment.