Skip to content

Commit

Permalink
Merge tag 'for-5.18/dm-changes' of git://git.kernel.org/pub/scm/linux…
Browse files Browse the repository at this point in the history
…/kernel/git/device-mapper/linux-dm

Pull device mapper updates from Mike Snitzer:

 - Significant refactoring and fixing of how DM core does bio-based IO
   accounting with focus on fixing wildly inaccurate IO stats for
   dm-crypt (and other DM targets that defer bio submission in their own
   workqueues). End result is proper IO accounting, made possible by
   targets being updated to use the new dm_submit_bio_remap() interface.

 - Add hipri bio polling support (REQ_POLLED) to bio-based DM.

 - Reduce dm_io and dm_target_io structs so that a single dm_io (which
   contains dm_target_io and first clone bio) weighs in at 256 bytes.
   For reference the bio struct is 128 bytes.

 - Various other small cleanups, fixes or improvements in DM core and
   targets.

 - Update MAINTAINERS with my kernel.org email address to allow
   distinction between my "upstream" and "Red" Hats.

* tag 'for-5.18/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (46 commits)
  dm: consolidate spinlocks in dm_io struct
  dm: reduce size of dm_io and dm_target_io structs
  dm: switch dm_target_io booleans over to proper flags
  dm: switch dm_io booleans over to proper flags
  dm: update email address in MAINTAINERS
  dm: return void from __send_empty_flush
  dm: factor out dm_io_complete
  dm cache: use dm_submit_bio_remap
  dm: simplify dm_sumbit_bio_remap interface
  dm thin: use dm_submit_bio_remap
  dm: add WARN_ON_ONCE to dm_submit_bio_remap
  dm: support bio polling
  block: add ->poll_bio to block_device_operations
  dm mpath: use DMINFO instead of printk with KERN_INFO
  dm: stop using bdevname
  dm-zoned: remove the ->name field in struct dmz_dev
  dm: remove unnecessary local variables in __bind
  dm: requeue IO if mapping table not yet available
  dm io: remove stale comment block for dm_io()
  dm thin metadata: remove unused dm_thin_remove_block and __remove
  ...
  • Loading branch information
Linus Torvalds committed Mar 25, 2022
2 parents 2dacc1e + 4d7bca1 commit b1f8ccd
Show file tree
Hide file tree
Showing 27 changed files with 685 additions and 361 deletions.
2 changes: 1 addition & 1 deletion MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -5605,7 +5605,7 @@ F: include/linux/devm-helpers.h

DEVICE-MAPPER (LVM)
M: Alasdair Kergon <agk@redhat.com>
M: Mike Snitzer <snitzer@redhat.com>
M: Mike Snitzer <snitzer@kernel.org>
M: dm-devel@redhat.com
L: dm-devel@redhat.com
S: Maintained
Expand Down
14 changes: 9 additions & 5 deletions block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,7 @@ static void __submit_bio(struct bio *bio)
*
* bio_list_on_stack[0] contains bios submitted by the current ->submit_bio.
* bio_list_on_stack[1] contains bios that were submitted before the current
* ->submit_bio_bio, but that haven't been processed yet.
* ->submit_bio, but that haven't been processed yet.
*/
static void __submit_bio_noacct(struct bio *bio)
{
Expand Down Expand Up @@ -955,7 +955,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
{
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
blk_qc_t cookie = READ_ONCE(bio->bi_cookie);
int ret;
int ret = 0;

if (cookie == BLK_QC_T_NONE ||
!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
Expand All @@ -965,10 +965,14 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)

if (blk_queue_enter(q, BLK_MQ_REQ_NOWAIT))
return 0;
if (WARN_ON_ONCE(!queue_is_mq(q)))
ret = 0; /* not yet implemented, should not happen */
else
if (queue_is_mq(q)) {
ret = blk_mq_poll(q, cookie, iob, flags);
} else {
struct gendisk *disk = q->disk;

if (disk && disk->fops->poll_bio)
ret = disk->fops->poll_bio(bio, iob, flags);
}
blk_queue_exit(q);
return ret;
}
Expand Down
4 changes: 4 additions & 0 deletions block/genhd.c
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,10 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
struct device *ddev = disk_to_dev(disk);
int ret;

/* Only makes sense for bio-based to set ->poll_bio */
if (queue_is_mq(disk->queue) && disk->fops->poll_bio)
return -EINVAL;

/*
* The disk queue should now be all set with enough information about
* the device for the elevator code to pick an adequate default
Expand Down
4 changes: 3 additions & 1 deletion drivers/md/dm-cache-policy-smq.c
Original file line number Diff line number Diff line change
Expand Up @@ -1026,7 +1026,9 @@ static unsigned default_promote_level(struct smq_policy *mq)
* This scheme reminds me of a graph of entropy vs probability of a
* binary variable.
*/
static unsigned table[] = {1, 1, 1, 2, 4, 6, 7, 8, 7, 6, 4, 4, 3, 3, 2, 2, 1};
static const unsigned int table[] = {
1, 1, 1, 2, 4, 6, 7, 8, 7, 6, 4, 4, 3, 3, 2, 2, 1
};

unsigned hits = mq->cache_stats.hits;
unsigned misses = mq->cache_stats.misses;
Expand Down
17 changes: 8 additions & 9 deletions drivers/md/dm-cache-target.c
Original file line number Diff line number Diff line change
Expand Up @@ -803,7 +803,7 @@ static void accounted_complete(struct cache *cache, struct bio *bio)
static void accounted_request(struct cache *cache, struct bio *bio)
{
accounted_begin(cache, bio);
submit_bio_noacct(bio);
dm_submit_bio_remap(bio, NULL);
}

static void issue_op(struct bio *bio, void *context)
Expand Down Expand Up @@ -1708,7 +1708,7 @@ static bool process_bio(struct cache *cache, struct bio *bio)
bool commit_needed;

if (map_bio(cache, bio, get_bio_block(cache, bio), &commit_needed) == DM_MAPIO_REMAPPED)
submit_bio_noacct(bio);
dm_submit_bio_remap(bio, NULL);

return commit_needed;
}
Expand Down Expand Up @@ -1774,7 +1774,7 @@ static bool process_discard_bio(struct cache *cache, struct bio *bio)

if (cache->features.discard_passdown) {
remap_to_origin(cache, bio);
submit_bio_noacct(bio);
dm_submit_bio_remap(bio, NULL);
} else
bio_endio(bio);

Expand Down Expand Up @@ -2015,7 +2015,6 @@ static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as,
{
int r;
sector_t metadata_dev_size;
char b[BDEVNAME_SIZE];

if (!at_least_one_arg(as, error))
return -EINVAL;
Expand All @@ -2029,8 +2028,8 @@ static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as,

metadata_dev_size = get_dev_size(ca->metadata_dev);
if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING)
DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
bdevname(ca->metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS);
DMWARN("Metadata device %pg is larger than %u sectors: excess space will not be used.",
ca->metadata_dev->bdev, THIN_METADATA_MAX_SECTORS);

return 0;
}
Expand Down Expand Up @@ -2357,6 +2356,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)

cache->ti = ca->ti;
ti->private = cache;
ti->accounts_remapped_io = true;
ti->num_flush_bios = 2;
ti->flush_supported = true;

Expand Down Expand Up @@ -3345,7 +3345,6 @@ static void disable_passdown_if_not_supported(struct cache *cache)
struct block_device *origin_bdev = cache->origin_dev->bdev;
struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits;
const char *reason = NULL;
char buf[BDEVNAME_SIZE];

if (!cache->features.discard_passdown)
return;
Expand All @@ -3357,8 +3356,8 @@ static void disable_passdown_if_not_supported(struct cache *cache)
reason = "max discard sectors smaller than a block";

if (reason) {
DMWARN("Origin device (%s) %s: Disabling discard passdown.",
bdevname(origin_bdev, buf), reason);
DMWARN("Origin device (%pg) %s: Disabling discard passdown.",
origin_bdev, reason);
cache->features.discard_passdown = false;
}
}
Expand Down
10 changes: 4 additions & 6 deletions drivers/md/dm-clone-target.c
Original file line number Diff line number Diff line change
Expand Up @@ -1682,7 +1682,6 @@ static int parse_metadata_dev(struct clone *clone, struct dm_arg_set *as, char *
{
int r;
sector_t metadata_dev_size;
char b[BDEVNAME_SIZE];

r = dm_get_device(clone->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
&clone->metadata_dev);
Expand All @@ -1693,8 +1692,8 @@ static int parse_metadata_dev(struct clone *clone, struct dm_arg_set *as, char *

metadata_dev_size = get_dev_size(clone->metadata_dev);
if (metadata_dev_size > DM_CLONE_METADATA_MAX_SECTORS_WARNING)
DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
bdevname(clone->metadata_dev->bdev, b), DM_CLONE_METADATA_MAX_SECTORS);
DMWARN("Metadata device %pg is larger than %u sectors: excess space will not be used.",
clone->metadata_dev->bdev, DM_CLONE_METADATA_MAX_SECTORS);

return 0;
}
Expand Down Expand Up @@ -2033,7 +2032,6 @@ static void disable_passdown_if_not_supported(struct clone *clone)
struct block_device *dest_dev = clone->dest_dev->bdev;
struct queue_limits *dest_limits = &bdev_get_queue(dest_dev)->limits;
const char *reason = NULL;
char buf[BDEVNAME_SIZE];

if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags))
return;
Expand All @@ -2044,8 +2042,8 @@ static void disable_passdown_if_not_supported(struct clone *clone)
reason = "max discard sectors smaller than a region";

if (reason) {
DMWARN("Destination device (%s) %s: Disabling discard passdown.",
bdevname(dest_dev, buf), reason);
DMWARN("Destination device (%pd) %s: Disabling discard passdown.",
dest_dev, reason);
clear_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags);
}
}
Expand Down
99 changes: 71 additions & 28 deletions drivers/md/dm-core.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,21 @@ struct mapped_device {
struct gendisk *disk;
struct dax_device *dax_dev;

wait_queue_head_t wait;
unsigned long __percpu *pending_io;

/* forced geometry settings */
struct hd_geometry geometry;

/*
* Processing queue (flush)
*/
struct workqueue_struct *wq;

/*
* A list of ios that arrived while we were suspended.
*/
struct work_struct work;
wait_queue_head_t wait;
spinlock_t deferred_lock;
struct bio_list deferred;

Expand All @@ -83,36 +93,28 @@ struct mapped_device {
struct list_head uevent_list;
spinlock_t uevent_lock; /* Protect access to uevent_list */

/* for blk-mq request-based DM support */
bool init_tio_pdu:1;
struct blk_mq_tag_set *tag_set;

struct dm_stats stats;

/* the number of internal suspends */
unsigned internal_suspend_count;

int swap_bios;
struct semaphore swap_bios_semaphore;
struct mutex swap_bios_lock;

/*
* io objects are allocated from here.
*/
struct bio_set io_bs;
struct bio_set bs;

/*
* Processing queue (flush)
*/
struct workqueue_struct *wq;

/* forced geometry settings */
struct hd_geometry geometry;

/* kobject and completion */
struct dm_kobject_holder kobj_holder;

int swap_bios;
struct semaphore swap_bios_semaphore;
struct mutex swap_bios_lock;

struct dm_stats stats;

/* for blk-mq request-based DM support */
struct blk_mq_tag_set *tag_set;
bool init_tio_pdu:1;

struct srcu_struct io_barrier;

#ifdef CONFIG_BLK_DEV_ZONED
Expand Down Expand Up @@ -206,35 +208,76 @@ struct dm_table {
/*
* One of these is allocated per clone bio.
*/
#define DM_TIO_MAGIC 7282014
#define DM_TIO_MAGIC 28714
struct dm_target_io {
unsigned int magic;
unsigned short magic;
unsigned short flags;
unsigned int target_bio_nr;
struct dm_io *io;
struct dm_target *ti;
unsigned int target_bio_nr;
unsigned int *len_ptr;
bool inside_dm_io;
sector_t old_sector;
struct bio clone;
};

/*
* dm_target_io flags
*/
enum {
DM_TIO_INSIDE_DM_IO,
DM_TIO_IS_DUPLICATE_BIO
};

static inline bool dm_tio_flagged(struct dm_target_io *tio, unsigned int bit)
{
return (tio->flags & (1U << bit)) != 0;
}

static inline void dm_tio_set_flag(struct dm_target_io *tio, unsigned int bit)
{
tio->flags |= (1U << bit);
}

/*
* One of these is allocated per original bio.
* It contains the first clone used for that original.
*/
#define DM_IO_MAGIC 5191977
#define DM_IO_MAGIC 19577
struct dm_io {
unsigned int magic;
struct mapped_device *md;
blk_status_t status;
unsigned short magic;
unsigned short flags;
atomic_t io_count;
struct mapped_device *md;
struct bio *orig_bio;
blk_status_t status;
spinlock_t lock;
unsigned long start_time;
spinlock_t endio_lock;
void *data;
struct hlist_node node;
struct task_struct *map_task;
struct dm_stats_aux stats_aux;
/* last member of dm_target_io is 'struct bio' */
struct dm_target_io tio;
};

/*
* dm_io flags
*/
enum {
DM_IO_START_ACCT,
DM_IO_ACCOUNTED
};

static inline bool dm_io_flagged(struct dm_io *io, unsigned int bit)
{
return (io->flags & (1U << bit)) != 0;
}

static inline void dm_io_set_flag(struct dm_io *io, unsigned int bit)
{
io->flags |= (1U << bit);
}

static inline void dm_io_inc_pending(struct dm_io *io)
{
atomic_inc(&io->io_count);
Expand Down
Loading

0 comments on commit b1f8ccd

Please sign in to comment.