Skip to content

Commit

Permalink
block: rework zone reporting
Browse files Browse the repository at this point in the history
Avoid the need to allocate a potentially large array of struct blk_zone
in the block layer by switching the ->report_zones method interface to
a callback model. Now the caller simply supplies a callback that is
executed on each reported zone, and private data for it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
  • Loading branch information
Christoph Hellwig authored and Jens Axboe committed Nov 13, 2019
1 parent 23a5086 commit d410035
Show file tree
Hide file tree
Showing 12 changed files with 329 additions and 447 deletions.
253 changes: 97 additions & 156 deletions block/blk-zoned.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,44 +101,35 @@ EXPORT_SYMBOL_GPL(blkdev_nr_zones);
* blkdev_report_zones - Get zones information
* @bdev: Target block device
* @sector: Sector from which to report zones
* @zones: Array of zone structures where to return the zones information
* @nr_zones: Number of zone structures in the zone array
* @nr_zones: Maximum number of zones to report
* @cb: Callback function called for each reported zone
* @data: Private data for the callback
*
* Description:
* Get zone information starting from the zone containing @sector.
* The number of zone information reported may be less than the number
* requested by @nr_zones. The number of zones actually reported is
* returned in @nr_zones.
* The caller must use memalloc_noXX_save/restore() calls to control
* memory allocations done within this function (zone array and command
* buffer allocation by the device driver).
* Get zone information starting from the zone containing @sector for at most
* @nr_zones, and call @cb for each zone reported by the device.
* To report all zones in a device starting from @sector, the BLK_ALL_ZONES
* constant can be passed to @nr_zones.
* Returns the number of zones reported by the device, or a negative errno
* value in case of failure.
*
* Note: The caller must use memalloc_noXX_save/restore() calls to control
* memory allocations done within this function.
*/
int blkdev_report_zones(struct block_device *bdev, sector_t sector,
struct blk_zone *zones, unsigned int *nr_zones)
unsigned int nr_zones, report_zones_cb cb, void *data)
{
struct request_queue *q = bdev_get_queue(bdev);
struct gendisk *disk = bdev->bd_disk;
sector_t capacity = get_capacity(disk);

if (!blk_queue_is_zoned(q))
return -EOPNOTSUPP;

/*
* A block device that advertized itself as zoned must have a
* report_zones method. If it does not have one defined, the device
* driver has a bug. So warn about that.
*/
if (WARN_ON_ONCE(!disk->fops->report_zones))
if (!blk_queue_is_zoned(bdev_get_queue(bdev)) ||
WARN_ON_ONCE(!disk->fops->report_zones))
return -EOPNOTSUPP;

if (!*nr_zones || sector >= capacity) {
*nr_zones = 0;
if (!nr_zones || sector >= capacity)
return 0;
}

*nr_zones = min(*nr_zones, __blkdev_nr_zones(q, capacity - sector));

return disk->fops->report_zones(disk, sector, zones, nr_zones);
return disk->fops->report_zones(disk, sector, nr_zones, cb, data);
}
EXPORT_SYMBOL_GPL(blkdev_report_zones);

Expand Down Expand Up @@ -232,6 +223,20 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
}
EXPORT_SYMBOL_GPL(blkdev_zone_mgmt);

struct zone_report_args {
struct blk_zone __user *zones;
};

static int blkdev_copy_zone_to_user(struct blk_zone *zone, unsigned int idx,
void *data)
{
struct zone_report_args *args = data;

if (copy_to_user(&args->zones[idx], zone, sizeof(struct blk_zone)))
return -EFAULT;
return 0;
}

/*
* BLKREPORTZONE ioctl processing.
* Called from blkdev_ioctl.
Expand All @@ -240,9 +245,9 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
void __user *argp = (void __user *)arg;
struct zone_report_args args;
struct request_queue *q;
struct blk_zone_report rep;
struct blk_zone *zones;
int ret;

if (!argp)
Expand All @@ -264,32 +269,16 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
if (!rep.nr_zones)
return -EINVAL;

rep.nr_zones = min(blkdev_nr_zones(bdev), rep.nr_zones);

zones = kvmalloc_array(rep.nr_zones, sizeof(struct blk_zone),
GFP_KERNEL | __GFP_ZERO);
if (!zones)
return -ENOMEM;

ret = blkdev_report_zones(bdev, rep.sector, zones, &rep.nr_zones);
if (ret)
goto out;

if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) {
ret = -EFAULT;
goto out;
}

if (rep.nr_zones) {
if (copy_to_user(argp + sizeof(struct blk_zone_report), zones,
sizeof(struct blk_zone) * rep.nr_zones))
ret = -EFAULT;
}
args.zones = argp + sizeof(struct blk_zone_report);
ret = blkdev_report_zones(bdev, rep.sector, rep.nr_zones,
blkdev_copy_zone_to_user, &args);
if (ret < 0)
return ret;

out:
kvfree(zones);

return ret;
rep.nr_zones = ret;
if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report)))
return -EFAULT;
return 0;
}

/*
Expand Down Expand Up @@ -351,31 +340,6 @@ static inline unsigned long *blk_alloc_zone_bitmap(int node,
GFP_NOIO, node);
}

/*
* Allocate an array of struct blk_zone to get nr_zones zone information.
* The allocated array may be smaller than nr_zones.
*/
static struct blk_zone *blk_alloc_zones(unsigned int *nr_zones)
{
struct blk_zone *zones;
size_t nrz = min(*nr_zones, BLK_ZONED_REPORT_MAX_ZONES);

/*
* GFP_KERNEL here is meaningless as the caller task context has
* the PF_MEMALLOC_NOIO flag set in blk_revalidate_disk_zones()
* with memalloc_noio_save().
*/
zones = kvcalloc(nrz, sizeof(struct blk_zone), GFP_KERNEL);
if (!zones) {
*nr_zones = 0;
return NULL;
}

*nr_zones = nrz;

return zones;
}

void blk_queue_free_zone_bitmaps(struct request_queue *q)
{
kfree(q->seq_zones_bitmap);
Expand All @@ -384,12 +348,21 @@ void blk_queue_free_zone_bitmaps(struct request_queue *q)
q->seq_zones_wlock = NULL;
}

struct blk_revalidate_zone_args {
struct gendisk *disk;
unsigned long *seq_zones_bitmap;
unsigned long *seq_zones_wlock;
sector_t sector;
};

/*
* Helper function to check the validity of zones of a zoned block device.
*/
static bool blk_zone_valid(struct gendisk *disk, struct blk_zone *zone,
sector_t *sector)
static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
void *data)
{
struct blk_revalidate_zone_args *args = data;
struct gendisk *disk = args->disk;
struct request_queue *q = disk->queue;
sector_t zone_sectors = blk_queue_zone_sectors(q);
sector_t capacity = get_capacity(disk);
Expand All @@ -409,14 +382,14 @@ static bool blk_zone_valid(struct gendisk *disk, struct blk_zone *zone,
zone->len > zone_sectors) {
pr_warn("%s: Invalid zoned device with larger last zone size\n",
disk->disk_name);
return false;
return -ENODEV;
}

/* Check for holes in the zone report */
if (zone->start != *sector) {
if (zone->start != args->sector) {
pr_warn("%s: Zone gap at sectors %llu..%llu\n",
disk->disk_name, *sector, zone->start);
return false;
disk->disk_name, args->sector, zone->start);
return -ENODEV;
}

/* Check zone type */
Expand All @@ -428,12 +401,38 @@ static bool blk_zone_valid(struct gendisk *disk, struct blk_zone *zone,
default:
pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n",
disk->disk_name, (int)zone->type, zone->start);
return false;
return -ENODEV;
}

*sector += zone->len;
if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL)
set_bit(idx, args->seq_zones_bitmap);

return true;
args->sector += zone->len;
return 0;
}

static int blk_update_zone_info(struct gendisk *disk, unsigned int nr_zones,
struct blk_revalidate_zone_args *args)
{
/*
* Ensure that all memory allocations in this context are done as
* if GFP_NOIO was specified.
*/
unsigned int noio_flag = memalloc_noio_save();
struct request_queue *q = disk->queue;
int ret;

args->seq_zones_wlock = blk_alloc_zone_bitmap(q->node, nr_zones);
if (!args->seq_zones_wlock)
return -ENOMEM;
args->seq_zones_bitmap = blk_alloc_zone_bitmap(q->node, nr_zones);
if (!args->seq_zones_bitmap)
return -ENOMEM;

ret = disk->fops->report_zones(disk, 0, nr_zones,
blk_revalidate_zone_cb, args);
memalloc_noio_restore(noio_flag);
return ret;
}

/**
Expand All @@ -449,11 +448,7 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
unsigned int nr_zones = __blkdev_nr_zones(q, get_capacity(disk));
unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL;
unsigned int i, rep_nr_zones = 0, z = 0, nrz;
struct blk_zone *zones = NULL;
unsigned int noio_flag;
sector_t sector = 0;
struct blk_revalidate_zone_args args = { .disk = disk };
int ret = 0;

if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
Expand All @@ -468,82 +463,28 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
return 0;
}

/*
* Ensure that all memory allocations in this context are done as
* if GFP_NOIO was specified.
*/
noio_flag = memalloc_noio_save();

if (!nr_zones)
goto update;

/* Allocate bitmaps */
ret = -ENOMEM;
seq_zones_wlock = blk_alloc_zone_bitmap(q->node, nr_zones);
if (!seq_zones_wlock)
goto out;
seq_zones_bitmap = blk_alloc_zone_bitmap(q->node, nr_zones);
if (!seq_zones_bitmap)
goto out;

/*
* Get zone information to check the zones and initialize
* seq_zones_bitmap.
*/
rep_nr_zones = nr_zones;
zones = blk_alloc_zones(&rep_nr_zones);
if (!zones)
goto out;

while (z < nr_zones) {
nrz = min(nr_zones - z, rep_nr_zones);
ret = disk->fops->report_zones(disk, sector, zones, &nrz);
if (ret)
goto out;
if (!nrz)
break;
for (i = 0; i < nrz; i++) {
if (!blk_zone_valid(disk, &zones[i], &sector)) {
ret = -ENODEV;
goto out;
}
if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL)
set_bit(z, seq_zones_bitmap);
z++;
}
}

if (WARN_ON(z != nr_zones)) {
ret = -EIO;
goto out;
}
if (nr_zones)
ret = blk_update_zone_info(disk, nr_zones, &args);

update:
/*
* Install the new bitmaps, making sure the queue is stopped and
* all I/Os are completed (i.e. a scheduler is not referencing the
* bitmaps).
*/
blk_mq_freeze_queue(q);
q->nr_zones = nr_zones;
swap(q->seq_zones_wlock, seq_zones_wlock);
swap(q->seq_zones_bitmap, seq_zones_bitmap);
blk_mq_unfreeze_queue(q);

out:
memalloc_noio_restore(noio_flag);

kvfree(zones);
kfree(seq_zones_wlock);
kfree(seq_zones_bitmap);

if (ret) {
if (ret >= 0) {
q->nr_zones = nr_zones;
swap(q->seq_zones_wlock, args.seq_zones_wlock);
swap(q->seq_zones_bitmap, args.seq_zones_bitmap);
ret = 0;
} else {
pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
blk_mq_freeze_queue(q);
blk_queue_free_zone_bitmaps(q);
blk_mq_unfreeze_queue(q);
}
blk_mq_unfreeze_queue(q);

kfree(args.seq_zones_wlock);
kfree(args.seq_zones_bitmap);
return ret;
}
EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones);
Expand Down
2 changes: 1 addition & 1 deletion drivers/block/null_blk.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ struct nullb {
int null_zone_init(struct nullb_device *dev);
void null_zone_exit(struct nullb_device *dev);
int null_report_zones(struct gendisk *disk, sector_t sector,
struct blk_zone *zones, unsigned int *nr_zones);
unsigned int nr_zones, report_zones_cb cb, void *data);
blk_status_t null_handle_zoned(struct nullb_cmd *cmd,
enum req_opf op, sector_t sector,
sector_t nr_sectors);
Expand Down
Loading

0 comments on commit d410035

Please sign in to comment.