Skip to content

Commit

Permalink
Merge tag 'for-5.19/dm-changes' of git://git.kernel.org/pub/scm/linux…
Browse files Browse the repository at this point in the history
…/kernel/git/device-mapper/linux-dm

Pull device mapper updates from Mike Snitzer:

 - Enable DM core bioset's per-cpu bio cache if QUEUE_FLAG_POLL set.
   This change improves DM's hipri bio polling (REQ_POLLED) performance
   by 7 - 20% depending on the system.

 - Update DM core to use jump_labels to further reduce cost of unlikely
   branches for zoned block devices, dm-stats and swap_bios throttling.

 - Various DM core changes to reduce bio-based DM overhead and simplify
   IO accounting.

 - Fundamental DM core improvements to dm_io reference counting and the
   elimination of using bio_split()+bio_chain() -- instead DM's
   bio-based IO accounting is updated to account that a split occurred.

 - Improve DM core's abnormal bio processing to do less work.

 - Improve DM core's hipri polling support to use a single list rather
   than an hlist.

 - Update DM core to pass NULL bdev to bio_alloc_clone() so that
   initialization that isn't useful for DM can be elided.

 - Add cond_resched to DM stats' various loops that loop over all
   entries.

 - Fix incorrect error code return from DM integrity's constructor.

 - Make DM crypt's printing of the key constant-time.

 - Update bio-based DM multipath to provide high-resolution timer to the
   Historical Service Time (HST) path selector.

* tag 'for-5.19/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (26 commits)
  dm: pass NULL bdev to bio_alloc_clone
  dm cache metadata: remove unnecessary variable in __dump_mapping
  dm mpath: provide high-resolution timer to HST for bio-based
  dm crypt: make printing of the key constant-time
  dm integrity: fix error code in dm_integrity_ctr()
  dm stats: add cond_resched when looping over entries
  dm: improve abnormal bio processing
  dm: simplify bio-based IO accounting further
  dm: put all polled dm_io instances into a single list
  dm: improve dm_io reference counting
  dm: don't grab target io reference in dm_zone_map_bio
  dm: improve bio splitting and associated IO accounting
  dm: switch to bdev based IO accounting interfaces
  dm: pass dm_io instance to dm_io_acct directly
  dm: don't pass bio to __dm_start_io_acct and dm_end_io_acct
  dm: use bio_sectors in dm_aceept_partial_bio
  dm: simplify basic targets
  dm: conditionally enable branching for less used features
  dm: introduce dm_{get,put}_live_table_bio called from dm_submit_bio
  dm: move hot dm_io members to same cacheline as dm_target_io
  ...
  • Loading branch information
Linus Torvalds committed May 27, 2022
2 parents 780d8ce + ca52248 commit 7e28407
Show file tree
Hide file tree
Showing 15 changed files with 409 additions and 287 deletions.
3 changes: 1 addition & 2 deletions drivers/md/dm-cache-metadata.c
Original file line number Diff line number Diff line change
Expand Up @@ -1509,15 +1509,14 @@ int dm_cache_load_mappings(struct dm_cache_metadata *cmd,

static int __dump_mapping(void *context, uint64_t cblock, void *leaf)
{
int r = 0;
__le64 value;
dm_oblock_t oblock;
unsigned flags;

memcpy(&value, leaf, sizeof(value));
unpack_value(value, &oblock, &flags);

return r;
return 0;
}

static int __dump_mappings(struct dm_cache_metadata *cmd)
Expand Down
38 changes: 23 additions & 15 deletions drivers/md/dm-core.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <linux/ktime.h>
#include <linux/blk-mq.h>
#include <linux/blk-crypto-profile.h>
#include <linux/jump_label.h>

#include <trace/events/block.h>

Expand Down Expand Up @@ -154,6 +155,10 @@ static inline struct dm_stats *dm_get_stats(struct mapped_device *md)
return &md->stats;
}

DECLARE_STATIC_KEY_FALSE(stats_enabled);
DECLARE_STATIC_KEY_FALSE(swap_bios_enabled);
DECLARE_STATIC_KEY_FALSE(zoned_enabled);

static inline bool dm_emulate_zone_append(struct mapped_device *md)
{
if (blk_queue_is_zoned(md->queue))
Expand Down Expand Up @@ -237,6 +242,12 @@ static inline void dm_tio_set_flag(struct dm_target_io *tio, unsigned int bit)
tio->flags |= (1U << bit);
}

static inline bool dm_tio_is_normal(struct dm_target_io *tio)
{
return (dm_tio_flagged(tio, DM_TIO_INSIDE_DM_IO) &&
!dm_tio_flagged(tio, DM_TIO_IS_DUPLICATE_BIO));
}

/*
* One of these is allocated per original bio.
* It contains the first clone used for that original.
Expand All @@ -245,16 +256,20 @@ static inline void dm_tio_set_flag(struct dm_target_io *tio, unsigned int bit)
struct dm_io {
unsigned short magic;
blk_short_t flags;
atomic_t io_count;
struct mapped_device *md;
struct bio *orig_bio;
blk_status_t status;
spinlock_t lock;
unsigned long start_time;
void *data;
struct hlist_node node;
struct task_struct *map_task;
struct dm_io *next;
struct dm_stats_aux stats_aux;
blk_status_t status;
atomic_t io_count;
struct mapped_device *md;

/* The three fields represent mapped part of original bio */
struct bio *orig_bio;
unsigned int sector_offset; /* offset to end of orig_bio */
unsigned int sectors;

/* last member of dm_target_io is 'struct bio' */
struct dm_target_io tio;
};
Expand All @@ -263,8 +278,8 @@ struct dm_io {
* dm_io flags
*/
enum {
DM_IO_START_ACCT,
DM_IO_ACCOUNTED
DM_IO_ACCOUNTED,
DM_IO_WAS_SPLIT
};

static inline bool dm_io_flagged(struct dm_io *io, unsigned int bit)
Expand All @@ -277,13 +292,6 @@ static inline void dm_io_set_flag(struct dm_io *io, unsigned int bit)
io->flags |= (1U << bit);
}

static inline void dm_io_inc_pending(struct dm_io *io)
{
atomic_inc(&io->io_count);
}

void dm_io_dec_pending(struct dm_io *io, blk_status_t error);

static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj)
{
return &container_of(kobj, struct dm_kobject_holder, kobj)->completion;
Expand Down
14 changes: 11 additions & 3 deletions drivers/md/dm-crypt.c
Original file line number Diff line number Diff line change
Expand Up @@ -3439,6 +3439,11 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_SUBMITTED;
}

static char hex2asc(unsigned char c)
{
return c + '0' + ((unsigned)(9 - c) >> 4 & 0x27);
}

static void crypt_status(struct dm_target *ti, status_type_t type,
unsigned status_flags, char *result, unsigned maxlen)
{
Expand All @@ -3457,9 +3462,12 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
if (cc->key_size > 0) {
if (cc->key_string)
DMEMIT(":%u:%s", cc->key_size, cc->key_string);
else
for (i = 0; i < cc->key_size; i++)
DMEMIT("%02x", cc->key[i]);
else {
for (i = 0; i < cc->key_size; i++) {
DMEMIT("%c%c", hex2asc(cc->key[i] >> 4),
hex2asc(cc->key[i] & 0xf));
}
}
} else
DMEMIT("-");

Expand Down
3 changes: 1 addition & 2 deletions drivers/md/dm-delay.c
Original file line number Diff line number Diff line change
Expand Up @@ -296,8 +296,7 @@ static int delay_map(struct dm_target *ti, struct bio *bio)
}
delayed->class = c;
bio_set_dev(bio, c->dev->bdev);
if (bio_sectors(bio))
bio->bi_iter.bi_sector = c->start + dm_target_offset(ti, bio->bi_iter.bi_sector);
bio->bi_iter.bi_sector = c->start + dm_target_offset(ti, bio->bi_iter.bi_sector);

return delay_bio(dc, c, bio);
}
Expand Down
4 changes: 1 addition & 3 deletions drivers/md/dm-flakey.c
Original file line number Diff line number Diff line change
Expand Up @@ -280,9 +280,7 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
struct flakey_c *fc = ti->private;

bio_set_dev(bio, fc->dev->bdev);
if (bio_sectors(bio) || op_is_zone_mgmt(bio_op(bio)))
bio->bi_iter.bi_sector =
flakey_map_sector(ti, bio->bi_iter.bi_sector);
bio->bi_iter.bi_sector = flakey_map_sector(ti, bio->bi_iter.bi_sector);
}

static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
Expand Down
2 changes: 0 additions & 2 deletions drivers/md/dm-integrity.c
Original file line number Diff line number Diff line change
Expand Up @@ -4494,8 +4494,6 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
}

if (should_write_sb) {
int r;

init_journal(ic, 0, ic->journal_sections, 0);
r = dm_integrity_failed(ic);
if (unlikely(r)) {
Expand Down
11 changes: 2 additions & 9 deletions drivers/md/dm-linear.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,19 +84,12 @@ static sector_t linear_map_sector(struct dm_target *ti, sector_t bi_sector)
return lc->start + dm_target_offset(ti, bi_sector);
}

static void linear_map_bio(struct dm_target *ti, struct bio *bio)
static int linear_map(struct dm_target *ti, struct bio *bio)
{
struct linear_c *lc = ti->private;

bio_set_dev(bio, lc->dev->bdev);
if (bio_sectors(bio) || op_is_zone_mgmt(bio_op(bio)))
bio->bi_iter.bi_sector =
linear_map_sector(ti, bio->bi_iter.bi_sector);
}

static int linear_map(struct dm_target *ti, struct bio *bio)
{
linear_map_bio(ti, bio);
bio->bi_iter.bi_sector = linear_map_sector(ti, bio->bi_iter.bi_sector);

return DM_MAPIO_REMAPPED;
}
Expand Down
8 changes: 7 additions & 1 deletion drivers/md/dm-mpath.c
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ struct multipath {
struct dm_mpath_io {
struct pgpath *pgpath;
size_t nr_bytes;
u64 start_time_ns;
};

typedef int (*action_fn) (struct pgpath *pgpath);
Expand Down Expand Up @@ -295,6 +296,7 @@ static void multipath_init_per_bio_data(struct bio *bio, struct dm_mpath_io **mp

mpio->nr_bytes = bio->bi_iter.bi_size;
mpio->pgpath = NULL;
mpio->start_time_ns = 0;
*mpio_p = mpio;

dm_bio_record(bio_details, bio);
Expand Down Expand Up @@ -647,6 +649,9 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio,

mpio->pgpath = pgpath;

if (dm_ps_use_hr_timer(pgpath->pg->ps.type))
mpio->start_time_ns = ktime_get_ns();

bio->bi_status = 0;
bio_set_dev(bio, pgpath->path.dev->bdev);
bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
Expand Down Expand Up @@ -1713,7 +1718,8 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,

if (ps->type->end_io)
ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes,
dm_start_time_ns_from_clone(clone));
(mpio->start_time_ns ?:
dm_start_time_ns_from_clone(clone)));
}

return r;
Expand Down
15 changes: 15 additions & 0 deletions drivers/md/dm-path-selector.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,26 @@ struct path_selector {
void *context;
};

/*
* If a path selector uses this flag, a high resolution timer is used
* (via ktime_get_ns) to account for IO start time in BIO-based mpath.
* This improves performance of some path selectors (i.e. HST), in
* exchange for slightly higher overhead when submitting the BIO.
* The extra cost is usually offset by improved path selection for
* some benchmarks.
*
* This has no effect for request-based mpath, since it already uses a
* higher precision timer by default.
*/
#define DM_PS_USE_HR_TIMER 0x00000001
#define dm_ps_use_hr_timer(type) ((type)->features & DM_PS_USE_HR_TIMER)

/* Information about a path selector type */
struct path_selector_type {
char *name;
struct module *module;

unsigned int features;
unsigned int table_args;
unsigned int info_args;

Expand Down
1 change: 1 addition & 0 deletions drivers/md/dm-ps-historical-service-time.c
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,7 @@ static int hst_end_io(struct path_selector *ps, struct dm_path *path,
static struct path_selector_type hst_ps = {
.name = "historical-service-time",
.module = THIS_MODULE,
.features = DM_PS_USE_HR_TIMER,
.table_args = 1,
.info_args = 3,
.create = hst_create,
Expand Down
11 changes: 11 additions & 0 deletions drivers/md/dm-stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ void dm_stats_cleanup(struct dm_stats *stats)
atomic_read(&shared->in_flight[READ]),
atomic_read(&shared->in_flight[WRITE]));
}
cond_resched();
}
dm_stat_free(&s->rcu_head);
}
Expand Down Expand Up @@ -330,6 +331,7 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
for (ni = 0; ni < n_entries; ni++) {
atomic_set(&s->stat_shared[ni].in_flight[READ], 0);
atomic_set(&s->stat_shared[ni].in_flight[WRITE], 0);
cond_resched();
}

if (s->n_histogram_entries) {
Expand All @@ -342,6 +344,7 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
for (ni = 0; ni < n_entries; ni++) {
s->stat_shared[ni].tmp.histogram = hi;
hi += s->n_histogram_entries + 1;
cond_resched();
}
}

Expand All @@ -362,6 +365,7 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
for (ni = 0; ni < n_entries; ni++) {
p[ni].histogram = hi;
hi += s->n_histogram_entries + 1;
cond_resched();
}
}
}
Expand Down Expand Up @@ -396,6 +400,9 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,

dm_stats_recalc_precise_timestamps(stats);

if (!static_key_enabled(&stats_enabled.key))
static_branch_enable(&stats_enabled);

mutex_unlock(&stats->mutex);

resume_callback(md);
Expand Down Expand Up @@ -497,6 +504,7 @@ static int dm_stats_list(struct dm_stats *stats, const char *program,
}
DMEMIT("\n");
}
cond_resched();
}
mutex_unlock(&stats->mutex);

Expand Down Expand Up @@ -774,6 +782,7 @@ static void __dm_stat_clear(struct dm_stat *s, size_t idx_start, size_t idx_end,
local_irq_enable();
}
}
cond_resched();
}
}

Expand Down Expand Up @@ -889,6 +898,8 @@ static int dm_stats_print(struct dm_stats *stats, int id,

if (unlikely(sz + 1 >= maxlen))
goto buffer_overflow;

cond_resched();
}

if (clear)
Expand Down
16 changes: 13 additions & 3 deletions drivers/md/dm-table.c
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,9 @@ int dm_table_add_target(struct dm_table *t, const char *type,
DMWARN("%s: %s: ignoring discards_supported because num_discard_bios is zero.",
dm_device_name(t->md), type);

if (tgt->limit_swap_bios && !static_key_enabled(&swap_bios_enabled.key))
static_branch_enable(&swap_bios_enabled);

return 0;

bad:
Expand Down Expand Up @@ -1002,28 +1005,33 @@ bool dm_table_request_based(struct dm_table *t)
return __table_type_request_based(dm_table_get_type(t));
}

static int dm_table_supports_poll(struct dm_table *t);

static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md)
{
enum dm_queue_mode type = dm_table_get_type(t);
unsigned per_io_data_size = 0;
unsigned min_pool_size = 0;
struct dm_target *ti;
unsigned i;
bool poll_supported = false;

if (unlikely(type == DM_TYPE_NONE)) {
DMWARN("no table type is set, can't allocate mempools");
return -EINVAL;
}

if (__table_type_bio_based(type))
if (__table_type_bio_based(type)) {
for (i = 0; i < t->num_targets; i++) {
ti = t->targets + i;
per_io_data_size = max(per_io_data_size, ti->per_io_data_size);
min_pool_size = max(min_pool_size, ti->num_flush_bios);
}
poll_supported = !!dm_table_supports_poll(t);
}

t->mempools = dm_alloc_md_mempools(md, type, t->integrity_supported,
per_io_data_size, min_pool_size);
t->mempools = dm_alloc_md_mempools(md, type, per_io_data_size, min_pool_size,
t->integrity_supported, poll_supported);
if (!t->mempools)
return -ENOMEM;

Expand Down Expand Up @@ -2035,6 +2043,8 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
r = dm_set_zones_restrictions(t, q);
if (r)
return r;
if (!static_key_enabled(&zoned_enabled.key))
static_branch_enable(&zoned_enabled);
}

dm_update_crypto_profile(q, t);
Expand Down
10 changes: 0 additions & 10 deletions drivers/md/dm-zone.c
Original file line number Diff line number Diff line change
Expand Up @@ -550,13 +550,6 @@ int dm_zone_map_bio(struct dm_target_io *tio)
return DM_MAPIO_KILL;
}

/*
* The target map function may issue and complete the IO quickly.
* Take an extra reference on the IO to make sure it does disappear
* until we run dm_zone_map_bio_end().
*/
dm_io_inc_pending(io);

/* Let the target do its work */
r = ti->type->map(ti, clone);
switch (r) {
Expand Down Expand Up @@ -587,9 +580,6 @@ int dm_zone_map_bio(struct dm_target_io *tio)
break;
}

/* Drop the extra reference on the IO */
dm_io_dec_pending(io, sts);

if (sts != BLK_STS_OK)
return DM_MAPIO_KILL;

Expand Down
Loading

0 comments on commit 7e28407

Please sign in to comment.