From c40819f267f76e69418d3bc9fbb57962a6845673 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 10 Feb 2021 21:06:15 +0100 Subject: [PATCH 01/26] dm writecache: fix flexible_array.cocci warnings Zero-length and one-element arrays are deprecated, see Documentation/process/deprecated.rst Flexible-array members should be used instead. Generated by: scripts/coccinelle/misc/flexible_array.cocci CC: Denis Efremov Reported-by: kernel test robot Signed-off-by: kernel test robot Signed-off-by: Julia Lawall Signed-off-by: Mike Snitzer --- drivers/md/dm-writecache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 4f72b6f66c3ae..aecc246ade263 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -73,7 +73,7 @@ struct wc_memory_superblock { }; __le64 padding[8]; }; - struct wc_memory_entry entries[0]; + struct wc_memory_entry entries[]; }; struct wc_entry { From 8615cb65bd638ba5f9ebe71115cc5956eb1713d0 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 1 Mar 2021 04:58:43 -0500 Subject: [PATCH 02/26] dm: remove useless loop in __split_and_process_bio Remove useless "while" loop. If the condition ci.sector_count && !error is true, we go to a branch that ends with "break". If this condition is false, the "while" loop will not be executed again. So, the loop can't be executed more than once. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 61 +++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 32 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 3f3be9408afa7..1dac75cb55abc 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1641,38 +1641,35 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, } else { ci.bio = bio; ci.sector_count = bio_sectors(bio); - while (ci.sector_count && !error) { - error = __split_and_process_non_flush(&ci); - if (ci.sector_count && !error) { - /* - * Remainder must be passed to submit_bio_noacct() - * so that it gets handled *after* bios already submitted - * have been completely processed. - * We take a clone of the original to store in - * ci.io->orig_bio to be used by end_io_acct() and - * for dec_pending to use for completion handling. - */ - struct bio *b = bio_split(bio, bio_sectors(bio) - ci.sector_count, - GFP_NOIO, &md->queue->bio_split); - ci.io->orig_bio = b; - - /* - * Adjust IO stats for each split, otherwise upon queue - * reentry there will be redundant IO accounting. - * NOTE: this is a stop-gap fix, a proper fix involves - * significant refactoring of DM core's bio splitting - * (by eliminating DM's splitting and just using bio_split) - */ - part_stat_lock(); - __dm_part_stat_sub(dm_disk(md)->part0, - sectors[op_stat_group(bio_op(bio))], ci.sector_count); - part_stat_unlock(); - - bio_chain(b, bio); - trace_block_split(b, bio->bi_iter.bi_sector); - ret = submit_bio_noacct(bio); - break; - } + error = __split_and_process_non_flush(&ci); + if (ci.sector_count && !error) { + /* + * Remainder must be passed to submit_bio_noacct() + * so that it gets handled *after* bios already submitted + * have been completely processed. + * We take a clone of the original to store in + * ci.io->orig_bio to be used by end_io_acct() and + * for dec_pending to use for completion handling. + */ + struct bio *b = bio_split(bio, bio_sectors(bio) - ci.sector_count, + GFP_NOIO, &md->queue->bio_split); + ci.io->orig_bio = b; + + /* + * Adjust IO stats for each split, otherwise upon queue + * reentry there will be redundant IO accounting. + * NOTE: this is a stop-gap fix, a proper fix involves + * significant refactoring of DM core's bio splitting + * (by eliminating DM's splitting and just using bio_split) + */ + part_stat_lock(); + __dm_part_stat_sub(dm_disk(md)->part0, + sectors[op_stat_group(bio_op(bio))], ci.sector_count); + part_stat_unlock(); + + bio_chain(b, bio); + trace_block_split(b, bio->bi_iter.bi_sector); + ret = submit_bio_noacct(bio); } } From 219a9b5e738b75a6a5e9effe1d72f60037a2f131 Mon Sep 17 00:00:00 2001 From: JeongHyeon Lee Date: Thu, 11 Mar 2021 21:10:51 +0900 Subject: [PATCH 03/26] dm verity: allow only one error handling mode If more than one one handling mode is requested during DM verity table load, the last requested mode will be used. Change this to impose more strict checking so that the table load will fail if more than one error handling mode is requested. Signed-off-by: JeongHyeon Lee Signed-off-by: Mike Snitzer --- drivers/md/dm-verity-target.c | 40 +++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 808a98ef624c3..d3e76aefc1a6c 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -893,6 +893,28 @@ static int verity_alloc_zero_digest(struct dm_verity *v) return r; } +static inline bool verity_is_verity_mode(const char *arg_name) +{ + return (!strcasecmp(arg_name, DM_VERITY_OPT_LOGGING) || + !strcasecmp(arg_name, DM_VERITY_OPT_RESTART) || + !strcasecmp(arg_name, DM_VERITY_OPT_PANIC)); +} + +static int verity_parse_verity_mode(struct dm_verity *v, const char *arg_name) +{ + if (v->mode) + return -EINVAL; + + if (!strcasecmp(arg_name, DM_VERITY_OPT_LOGGING)) + v->mode = DM_VERITY_MODE_LOGGING; + else if (!strcasecmp(arg_name, DM_VERITY_OPT_RESTART)) + v->mode = DM_VERITY_MODE_RESTART; + else if (!strcasecmp(arg_name, DM_VERITY_OPT_PANIC)) + v->mode = DM_VERITY_MODE_PANIC; + + return 0; +} + static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v, struct dm_verity_sig_opts *verify_args) { @@ -916,16 +938,12 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v, arg_name = dm_shift_arg(as); argc--; - if (!strcasecmp(arg_name, DM_VERITY_OPT_LOGGING)) { - v->mode = DM_VERITY_MODE_LOGGING; - continue; - - } else if (!strcasecmp(arg_name, DM_VERITY_OPT_RESTART)) { - v->mode = DM_VERITY_MODE_RESTART; - continue; - - } else if (!strcasecmp(arg_name, DM_VERITY_OPT_PANIC)) { - v->mode = DM_VERITY_MODE_PANIC; + if (verity_is_verity_mode(arg_name)) { + r = verity_parse_verity_mode(v, arg_name); + if (r) { + ti->error = "Conflicting error handling parameters"; + return r; + } continue; } else if (!strcasecmp(arg_name, DM_VERITY_OPT_IGN_ZEROES)) { @@ -1242,7 +1260,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) static struct target_type verity_target = { .name = "verity", - .version = {1, 7, 0}, + .version = {1, 8, 0}, .module = THIS_MODULE, .ctr = verity_ctr, .dtr = verity_dtr, From b82096afc8afdc55408efb54ede2ec55c1f3f8c8 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 10 Mar 2021 05:18:03 -0500 Subject: [PATCH 04/26] dm ioctl: replace device hash with red-black tree For high numbers of DM devices the 64-entry hash table has non-trivial overhead. Fix this by replacing the hash table with a red-black tree. Reported-by: Zdenek Kabelac Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-ioctl.c | 254 +++++++++++++++++++++++------------------- 1 file changed, 142 insertions(+), 112 deletions(-) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 1ca65b434f1fa..272de8772d52a 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -36,8 +37,10 @@ struct dm_file { * name or uuid. *---------------------------------------------------------------*/ struct hash_cell { - struct list_head name_list; - struct list_head uuid_list; + struct rb_node name_node; + struct rb_node uuid_node; + bool name_set; + bool uuid_set; char *name; char *uuid; @@ -53,10 +56,8 @@ struct vers_iter { }; -#define NUM_BUCKETS 64 -#define MASK_BUCKETS (NUM_BUCKETS - 1) -static struct list_head _name_buckets[NUM_BUCKETS]; -static struct list_head _uuid_buckets[NUM_BUCKETS]; +static struct rb_root name_rb_tree = RB_ROOT; +static struct rb_root uuid_rb_tree = RB_ROOT; static void dm_hash_remove_all(bool keep_open_devices, bool mark_deferred, bool only_deferred); @@ -70,73 +71,110 @@ static DECLARE_RWSEM(_hash_lock); */ static DEFINE_MUTEX(dm_hash_cells_mutex); -static void init_buckets(struct list_head *buckets) -{ - unsigned int i; - - for (i = 0; i < NUM_BUCKETS; i++) - INIT_LIST_HEAD(buckets + i); -} - -static int dm_hash_init(void) -{ - init_buckets(_name_buckets); - init_buckets(_uuid_buckets); - return 0; -} - static void dm_hash_exit(void) { dm_hash_remove_all(false, false, false); } -/*----------------------------------------------------------------- - * Hash function: - * We're not really concerned with the str hash function being - * fast since it's only used by the ioctl interface. - *---------------------------------------------------------------*/ -static unsigned int hash_str(const char *str) -{ - const unsigned int hash_mult = 2654435387U; - unsigned int h = 0; - - while (*str) - h = (h + (unsigned int) *str++) * hash_mult; - - return h & MASK_BUCKETS; -} - /*----------------------------------------------------------------- * Code for looking up a device by name *---------------------------------------------------------------*/ static struct hash_cell *__get_name_cell(const char *str) { - struct hash_cell *hc; - unsigned int h = hash_str(str); + struct rb_node *n = name_rb_tree.rb_node; - list_for_each_entry (hc, _name_buckets + h, name_list) - if (!strcmp(hc->name, str)) { + while (n) { + struct hash_cell *hc = container_of(n, struct hash_cell, name_node); + int c = strcmp(hc->name, str); + if (!c) { dm_get(hc->md); return hc; } + n = c >= 0 ? n->rb_left : n->rb_right; + } return NULL; } static struct hash_cell *__get_uuid_cell(const char *str) { - struct hash_cell *hc; - unsigned int h = hash_str(str); + struct rb_node *n = uuid_rb_tree.rb_node; - list_for_each_entry (hc, _uuid_buckets + h, uuid_list) - if (!strcmp(hc->uuid, str)) { + while (n) { + struct hash_cell *hc = container_of(n, struct hash_cell, uuid_node); + int c = strcmp(hc->uuid, str); + if (!c) { dm_get(hc->md); return hc; } + n = c >= 0 ? n->rb_left : n->rb_right; + } return NULL; } +static void __unlink_name(struct hash_cell *hc) +{ + if (hc->name_set) { + hc->name_set = false; + rb_erase(&hc->name_node, &name_rb_tree); + } +} + +static void __unlink_uuid(struct hash_cell *hc) +{ + if (hc->uuid_set) { + hc->uuid_set = false; + rb_erase(&hc->uuid_node, &uuid_rb_tree); + } +} + +static void __link_name(struct hash_cell *new_hc) +{ + struct rb_node **n, *parent; + + __unlink_name(new_hc); + + new_hc->name_set = true; + + n = &name_rb_tree.rb_node; + parent = NULL; + + while (*n) { + struct hash_cell *hc = container_of(*n, struct hash_cell, name_node); + int c = strcmp(hc->name, new_hc->name); + BUG_ON(!c); + parent = *n; + n = c >= 0 ? &hc->name_node.rb_left : &hc->name_node.rb_right; + } + + rb_link_node(&new_hc->name_node, parent, n); + rb_insert_color(&new_hc->name_node, &name_rb_tree); +} + +static void __link_uuid(struct hash_cell *new_hc) +{ + struct rb_node **n, *parent; + + __unlink_uuid(new_hc); + + new_hc->uuid_set = true; + + n = &uuid_rb_tree.rb_node; + parent = NULL; + + while (*n) { + struct hash_cell *hc = container_of(*n, struct hash_cell, uuid_node); + int c = strcmp(hc->uuid, new_hc->uuid); + BUG_ON(!c); + parent = *n; + n = c > 0 ? &hc->uuid_node.rb_left : &hc->uuid_node.rb_right; + } + + rb_link_node(&new_hc->uuid_node, parent, n); + rb_insert_color(&new_hc->uuid_node, &uuid_rb_tree); +} + static struct hash_cell *__get_dev_cell(uint64_t dev) { struct mapped_device *md; @@ -185,8 +223,7 @@ static struct hash_cell *alloc_cell(const char *name, const char *uuid, } } - INIT_LIST_HEAD(&hc->name_list); - INIT_LIST_HEAD(&hc->uuid_list); + hc->name_set = hc->uuid_set = false; hc->md = md; hc->new_map = NULL; return hc; @@ -226,16 +263,16 @@ static int dm_hash_insert(const char *name, const char *uuid, struct mapped_devi goto bad; } - list_add(&cell->name_list, _name_buckets + hash_str(name)); + __link_name(cell); if (uuid) { hc = __get_uuid_cell(uuid); if (hc) { - list_del(&cell->name_list); + __unlink_name(cell); dm_put(hc->md); goto bad; } - list_add(&cell->uuid_list, _uuid_buckets + hash_str(uuid)); + __link_uuid(cell); } dm_get(md); mutex_lock(&dm_hash_cells_mutex); @@ -256,9 +293,9 @@ static struct dm_table *__hash_remove(struct hash_cell *hc) struct dm_table *table; int srcu_idx; - /* remove from the dev hash */ - list_del(&hc->uuid_list); - list_del(&hc->name_list); + /* remove from the dev trees */ + __unlink_name(hc); + __unlink_uuid(hc); mutex_lock(&dm_hash_cells_mutex); dm_set_mdptr(hc->md, NULL); mutex_unlock(&dm_hash_cells_mutex); @@ -279,7 +316,8 @@ static struct dm_table *__hash_remove(struct hash_cell *hc) static void dm_hash_remove_all(bool keep_open_devices, bool mark_deferred, bool only_deferred) { - int i, dev_skipped; + int dev_skipped; + struct rb_node *n; struct hash_cell *hc; struct mapped_device *md; struct dm_table *t; @@ -289,40 +327,39 @@ static void dm_hash_remove_all(bool keep_open_devices, bool mark_deferred, bool down_write(&_hash_lock); - for (i = 0; i < NUM_BUCKETS; i++) { - list_for_each_entry(hc, _name_buckets + i, name_list) { - md = hc->md; - dm_get(md); + for (n = rb_first(&name_rb_tree); n; n = rb_next(n)) { + hc = container_of(n, struct hash_cell, name_node); + md = hc->md; + dm_get(md); - if (keep_open_devices && - dm_lock_for_deletion(md, mark_deferred, only_deferred)) { - dm_put(md); - dev_skipped++; - continue; - } + if (keep_open_devices && + dm_lock_for_deletion(md, mark_deferred, only_deferred)) { + dm_put(md); + dev_skipped++; + continue; + } - t = __hash_remove(hc); + t = __hash_remove(hc); - up_write(&_hash_lock); + up_write(&_hash_lock); - if (t) { - dm_sync_table(md); - dm_table_destroy(t); - } - dm_put(md); - if (likely(keep_open_devices)) - dm_destroy(md); - else - dm_destroy_immediate(md); - - /* - * Some mapped devices may be using other mapped - * devices, so repeat until we make no further - * progress. If a new mapped device is created - * here it will also get removed. - */ - goto retry; + if (t) { + dm_sync_table(md); + dm_table_destroy(t); } + dm_put(md); + if (likely(keep_open_devices)) + dm_destroy(md); + else + dm_destroy_immediate(md); + + /* + * Some mapped devices may be using other mapped + * devices, so repeat until we make no further + * progress. If a new mapped device is created + * here it will also get removed. + */ + goto retry; } up_write(&_hash_lock); @@ -340,7 +377,7 @@ static void __set_cell_uuid(struct hash_cell *hc, char *new_uuid) hc->uuid = new_uuid; mutex_unlock(&dm_hash_cells_mutex); - list_add(&hc->uuid_list, _uuid_buckets + hash_str(new_uuid)); + __link_uuid(hc); } /* @@ -354,14 +391,14 @@ static char *__change_cell_name(struct hash_cell *hc, char *new_name) /* * Rename and move the name cell. */ - list_del(&hc->name_list); + __unlink_name(hc); old_name = hc->name; mutex_lock(&dm_hash_cells_mutex); hc->name = new_name; mutex_unlock(&dm_hash_cells_mutex); - list_add(&hc->name_list, _name_buckets + hash_str(new_name)); + __link_name(hc); return old_name; } @@ -505,7 +542,7 @@ static void *get_result_buffer(struct dm_ioctl *param, size_t param_size, static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_size) { - unsigned int i; + struct rb_node *n; struct hash_cell *hc; size_t len, needed = 0; struct gendisk *disk; @@ -518,11 +555,10 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_ * Loop through all the devices working out how much * space we need. */ - for (i = 0; i < NUM_BUCKETS; i++) { - list_for_each_entry (hc, _name_buckets + i, name_list) { - needed += align_val(offsetof(struct dm_name_list, name) + strlen(hc->name) + 1); - needed += align_val(sizeof(uint32_t)); - } + for (n = rb_first(&name_rb_tree); n; n = rb_next(n)) { + hc = container_of(n, struct hash_cell, name_node); + needed += align_val(offsetof(struct dm_name_list, name) + strlen(hc->name) + 1); + needed += align_val(sizeof(uint32_t)); } /* @@ -540,21 +576,20 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_ /* * Now loop through filling out the names. */ - for (i = 0; i < NUM_BUCKETS; i++) { - list_for_each_entry (hc, _name_buckets + i, name_list) { - if (old_nl) - old_nl->next = (uint32_t) ((void *) nl - - (void *) old_nl); - disk = dm_disk(hc->md); - nl->dev = huge_encode_dev(disk_devt(disk)); - nl->next = 0; - strcpy(nl->name, hc->name); - - old_nl = nl; - event_nr = align_ptr(nl->name + strlen(hc->name) + 1); - *event_nr = dm_get_event_nr(hc->md); - nl = align_ptr(event_nr + 1); - } + for (n = rb_first(&name_rb_tree); n; n = rb_next(n)) { + hc = container_of(n, struct hash_cell, name_node); + if (old_nl) + old_nl->next = (uint32_t) ((void *) nl - + (void *) old_nl); + disk = dm_disk(hc->md); + nl->dev = huge_encode_dev(disk_devt(disk)); + nl->next = 0; + strcpy(nl->name, hc->name); + + old_nl = nl; + event_nr = align_ptr(nl->name + strlen(hc->name) + 1); + *event_nr = dm_get_event_nr(hc->md); + nl = align_ptr(event_nr + 1); } /* * If mismatch happens, security may be compromised due to buffer @@ -1991,14 +2026,9 @@ int __init dm_interface_init(void) { int r; - r = dm_hash_init(); - if (r) - return r; - r = misc_register(&_dm_misc); if (r) { DMERR("misc_register failed for control device"); - dm_hash_exit(); return r; } From 8b638081bd4520f63db1defc660666ec5f65bc15 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 12 Mar 2021 09:07:30 -0500 Subject: [PATCH 05/26] dm ioctl: return UUID in DM_LIST_DEVICES_CMD result When LVM needs to find a device with a particular UUID it needs to ask for UUID for each device. This patch returns UUID directly in the list of devices, so that LVM doesn't have to query all the devices with an ioctl. The UUID is returned if the flag DM_UUID_FLAG is set in the parameters. Returning UUID is done in backward-compatible way. There's one unused 32-bit word value after the event number. This patch sets the bit DM_NAME_LIST_FLAG_HAS_UUID if UUID is present and DM_NAME_LIST_FLAG_DOESNT_HAVE_UUID if it isn't (if none of these bits is set, then we have an old kernel that doesn't support returning UUIDs). The UUID is stored after this word. The 'next' value is updated to point after the UUID, so that old version of libdevmapper will skip the UUID without attempting to interpret it. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-ioctl.c | 20 +++++++++++++++++--- include/uapi/linux/dm-ioctl.h | 18 ++++++++++++++++-- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 272de8772d52a..0812ac6e9d70a 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -558,7 +558,9 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_ for (n = rb_first(&name_rb_tree); n; n = rb_next(n)) { hc = container_of(n, struct hash_cell, name_node); needed += align_val(offsetof(struct dm_name_list, name) + strlen(hc->name) + 1); - needed += align_val(sizeof(uint32_t)); + needed += align_val(sizeof(uint32_t) * 2); + if (param->flags & DM_UUID_FLAG && hc->uuid) + needed += align_val(strlen(hc->uuid) + 1); } /* @@ -577,6 +579,7 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_ * Now loop through filling out the names. */ for (n = rb_first(&name_rb_tree); n; n = rb_next(n)) { + void *uuid_ptr; hc = container_of(n, struct hash_cell, name_node); if (old_nl) old_nl->next = (uint32_t) ((void *) nl - @@ -588,8 +591,19 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_ old_nl = nl; event_nr = align_ptr(nl->name + strlen(hc->name) + 1); - *event_nr = dm_get_event_nr(hc->md); - nl = align_ptr(event_nr + 1); + event_nr[0] = dm_get_event_nr(hc->md); + event_nr[1] = 0; + uuid_ptr = align_ptr(event_nr + 2); + if (param->flags & DM_UUID_FLAG) { + if (hc->uuid) { + event_nr[1] |= DM_NAME_LIST_FLAG_HAS_UUID; + strcpy(uuid_ptr, hc->uuid); + uuid_ptr = align_ptr(uuid_ptr + strlen(hc->uuid) + 1); + } else { + event_nr[1] |= DM_NAME_LIST_FLAG_DOESNT_HAVE_UUID; + } + } + nl = uuid_ptr; } /* * If mismatch happens, security may be compromised due to buffer diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index fcff6669137b5..e5c6e458bdf73 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -193,8 +193,22 @@ struct dm_name_list { __u32 next; /* offset to the next record from the _start_ of this */ char name[0]; + + /* + * The following members can be accessed by taking a pointer that + * points immediately after the terminating zero character in "name" + * and aligning this pointer to next 8-byte boundary. + * Uuid is present if the flag DM_NAME_LIST_FLAG_HAS_UUID is set. + * + * __u32 event_nr; + * __u32 flags; + * char uuid[0]; + */ }; +#define DM_NAME_LIST_FLAG_HAS_UUID 1 +#define DM_NAME_LIST_FLAG_DOESNT_HAVE_UUID 2 + /* * Used to retrieve the target versions */ @@ -272,9 +286,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 44 +#define DM_VERSION_MINOR 45 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2021-02-01)" +#define DM_VERSION_EXTRA "-ioctl (2021-03-22)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ From c909085bb319c97b7eccbce4dcbd47a32016e0f7 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 11 Mar 2021 13:27:29 -0500 Subject: [PATCH 06/26] dm ioctl: filter the returned values according to name or uuid prefix If we set non-empty param->name or param->uuid on the DM_LIST_DEVICES_CMD ioctl, the set values are considered filter prefixes. The ioctl will only return entries with matching name or uuid prefix. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-ioctl.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 0812ac6e9d70a..2209cbcd84dbf 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -540,6 +540,30 @@ static void *get_result_buffer(struct dm_ioctl *param, size_t param_size, return ((void *) param) + param->data_start; } +static bool filter_device(struct hash_cell *hc, const char *pfx_name, const char *pfx_uuid) +{ + const char *val; + size_t val_len, pfx_len; + + val = hc->name; + val_len = strlen(val); + pfx_len = strnlen(pfx_name, DM_NAME_LEN); + if (pfx_len > val_len) + return false; + if (memcmp(val, pfx_name, pfx_len)) + return false; + + val = hc->uuid ? hc->uuid : ""; + val_len = strlen(val); + pfx_len = strnlen(pfx_uuid, DM_UUID_LEN); + if (pfx_len > val_len) + return false; + if (memcmp(val, pfx_uuid, pfx_len)) + return false; + + return true; +} + static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_size) { struct rb_node *n; @@ -557,6 +581,8 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_ */ for (n = rb_first(&name_rb_tree); n; n = rb_next(n)) { hc = container_of(n, struct hash_cell, name_node); + if (!filter_device(hc, param->name, param->uuid)) + continue; needed += align_val(offsetof(struct dm_name_list, name) + strlen(hc->name) + 1); needed += align_val(sizeof(uint32_t) * 2); if (param->flags & DM_UUID_FLAG && hc->uuid) @@ -581,6 +607,8 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_ for (n = rb_first(&name_rb_tree); n; n = rb_next(n)) { void *uuid_ptr; hc = container_of(n, struct hash_cell, name_node); + if (!filter_device(hc, param->name, param->uuid)) + continue; if (old_nl) old_nl->next = (uint32_t) ((void *) nl - (void *) old_nl); From 1c72e02306b34e65ce1cd53d50b9190a19581086 Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Fri, 19 Mar 2021 06:47:50 +0530 Subject: [PATCH 07/26] dm ebs: fix a few typos s/retrievd/retrieved/ s/misalignement/misalignment/ s/funtion/function/ Signed-off-by: Bhaskar Chowdhury Acked-by: Randy Dunlap Signed-off-by: Mike Snitzer --- drivers/md/dm-ebs-target.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c index 55bcfb74f51fa..71475a2410bea 100644 --- a/drivers/md/dm-ebs-target.c +++ b/drivers/md/dm-ebs-target.c @@ -28,7 +28,7 @@ struct ebs_c { spinlock_t lock; /* Guard bios input list above. */ sector_t start; /* table line argument, see ebs_ctr below. */ unsigned int e_bs; /* Emulated block size in sectors exposed to upper layer. */ - unsigned int u_bs; /* Underlying block size in sectors retrievd from/set on lower layer device. */ + unsigned int u_bs; /* Underlying block size in sectors retrieved from/set on lower layer device. */ unsigned char block_shift; /* bitshift sectors -> blocks used in dm-bufio API. */ bool u_bs_set:1; /* Flag to indicate underlying block size is set on table line. */ }; @@ -43,7 +43,7 @@ static inline sector_t __block_mod(sector_t sector, unsigned int bs) return sector & (bs - 1); } -/* Return number of blocks for a bio, accounting for misalignement of start and end sectors. */ +/* Return number of blocks for a bio, accounting for misalignment of start and end sectors. */ static inline unsigned int __nr_blocks(struct ebs_c *ec, struct bio *bio) { sector_t end_sector = __block_mod(bio->bi_iter.bi_sector, ec->u_bs) + bio_sectors(bio); @@ -171,7 +171,7 @@ static void __ebs_forget_bio(struct ebs_c *ec, struct bio *bio) dm_bufio_forget_buffers(ec->bufio, __sector_to_block(ec, sector), blocks); } -/* Worker funtion to process incoming bios. */ +/* Worker function to process incoming bios. */ static void __ebs_process_bios(struct work_struct *ws) { int r; From e30de3a803448c65433d49847f5db23a221c3ee1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 18 Mar 2021 07:36:08 +0100 Subject: [PATCH 08/26] dm: unexport dm_{get,put}_table_device These are only used by DM core, DM target modules should only use dm_{get,put}_device. Signed-off-by: Christoph Hellwig Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 1dac75cb55abc..ca2aedd8ee7d1 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -840,7 +840,6 @@ int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode, *result = &td->dm_dev; return 0; } -EXPORT_SYMBOL_GPL(dm_get_table_device); void dm_put_table_device(struct mapped_device *md, struct dm_dev *d) { @@ -854,7 +853,6 @@ void dm_put_table_device(struct mapped_device *md, struct dm_dev *d) } mutex_unlock(&md->table_devices_lock); } -EXPORT_SYMBOL(dm_put_table_device); static void free_table_devices(struct list_head *devices) { From 695902bb2e17baf10a5a312ef048b71f738ddbe8 Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Fri, 19 Mar 2021 08:11:13 +0000 Subject: [PATCH 09/26] dm thin: remove needless request_queue NULL pointer check Since commit ff9ea323816d ("block, bdi: an active gendisk always has a request_queue associated with it") the request_queue pointer returned from bdev_get_queue() shall never be NULL. Signed-off-by: Xu Wang Signed-off-by: Mike Snitzer --- drivers/md/dm-thin.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index fff4c50df74db..985baee3a678e 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2816,7 +2816,7 @@ static bool data_dev_supports_discard(struct pool_c *pt) { struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); - return q && blk_queue_discard(q); + return blk_queue_discard(q); } static bool is_factor(sector_t block_size, uint32_t n) From 63508e38c1081c96abb315df1fd1acb77befa42f Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Fri, 19 Mar 2021 08:16:28 +0000 Subject: [PATCH 10/26] dm cache: remove needless request_queue NULL pointer checks Since commit ff9ea323816d ("block, bdi: an active gendisk always has a request_queue associated with it") the request_queue pointer returned from bdev_get_queue() shall never be NULL. Signed-off-by: Xu Wang Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 541c45027cc85..6ab01ff257470 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -3387,7 +3387,7 @@ static bool origin_dev_supports_discard(struct block_device *origin_bdev) { struct request_queue *q = bdev_get_queue(origin_bdev); - return q && blk_queue_discard(q); + return blk_queue_discard(q); } /* From ece2577388334dd5e8d59cb46895f9573bc4b808 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 19 Mar 2021 16:19:30 +0800 Subject: [PATCH 11/26] dm persistent data: remove unused return from exit_shadow_spine() Fix the following coccicheck warnings: ./drivers/md/persistent-data/dm-btree-spine.c:188:5-6: Unneeded variable: "r". Return "0" on line 194. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Mike Snitzer --- drivers/md/persistent-data/dm-btree-internal.h | 2 +- drivers/md/persistent-data/dm-btree-spine.c | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h index fe073d92f01e6..d0c55916daa9a 100644 --- a/drivers/md/persistent-data/dm-btree-internal.h +++ b/drivers/md/persistent-data/dm-btree-internal.h @@ -83,7 +83,7 @@ struct shadow_spine { }; void init_shadow_spine(struct shadow_spine *s, struct dm_btree_info *info); -int exit_shadow_spine(struct shadow_spine *s); +void exit_shadow_spine(struct shadow_spine *s); int shadow_step(struct shadow_spine *s, dm_block_t b, struct dm_btree_value_type *vt); diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c index 8a2bfbfb218b4..dd2ff3ca9bc65 100644 --- a/drivers/md/persistent-data/dm-btree-spine.c +++ b/drivers/md/persistent-data/dm-btree-spine.c @@ -183,15 +183,13 @@ void init_shadow_spine(struct shadow_spine *s, struct dm_btree_info *info) s->count = 0; } -int exit_shadow_spine(struct shadow_spine *s) +void exit_shadow_spine(struct shadow_spine *s) { - int r = 0, i; + int i; for (i = 0; i < s->count; i++) { unlock_block(s->info, s->nodes[i]); } - - return r; } int shadow_step(struct shadow_spine *s, dm_block_t b, From db7b93e38106e58ac2ea24eaaea7ed760a319120 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 23 Mar 2021 10:59:45 -0400 Subject: [PATCH 12/26] dm integrity: add the "reset_recalculate" feature flag Add a new flag "reset_recalculate" that will restart recalculating from the beginning of the device. It can be used if we want to change the hash function. Example: dmsetup remove_all rmmod brd set -e modprobe brd rd_size=1048576 dmsetup create in --table '0 2000000 integrity /dev/ram0 0 16 J 2 internal_hash:sha256 recalculate' sleep 10 dmsetup status dmsetup remove in dmsetup create in --table '0 2000000 integrity /dev/ram0 0 16 J 2 internal_hash:sha3-256 reset_recalculate' Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 46b5d542b8fe6..fed8a7ccd7f91 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -262,6 +262,7 @@ struct dm_integrity_c { bool journal_uptodate; bool just_formatted; bool recalculate_flag; + bool reset_recalculate_flag; bool discard; bool fix_padding; bool fix_hmac; @@ -3134,7 +3135,8 @@ static void dm_integrity_resume(struct dm_target *ti) rw_journal_sectors(ic, REQ_OP_READ, 0, 0, ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); if (ic->mode == 'B') { - if (ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit) { + if (ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit && + !ic->reset_recalculate_flag) { block_bitmap_copy(ic, ic->recalc_bitmap, ic->journal); block_bitmap_copy(ic, ic->may_write_bitmap, ic->journal); if (!block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, @@ -3156,7 +3158,8 @@ static void dm_integrity_resume(struct dm_target *ti) } } else { if (!(ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit && - block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_TEST_ALL_CLEAR))) { + block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_TEST_ALL_CLEAR)) || + ic->reset_recalculate_flag) { ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING); ic->sb->recalc_sector = cpu_to_le64(0); } @@ -3169,6 +3172,10 @@ static void dm_integrity_resume(struct dm_target *ti) dm_integrity_io_error(ic, "writing superblock", r); } else { replay_journal(ic); + if (ic->reset_recalculate_flag) { + ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING); + ic->sb->recalc_sector = cpu_to_le64(0); + } if (ic->mode == 'B') { ic->sb->flags |= cpu_to_le32(SB_FLAG_DIRTY_BITMAP); ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit; @@ -3242,6 +3249,7 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, arg_count += !!ic->meta_dev; arg_count += ic->sectors_per_block != 1; arg_count += !!(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)); + arg_count += ic->reset_recalculate_flag; arg_count += ic->discard; arg_count += ic->mode == 'J'; arg_count += ic->mode == 'J'; @@ -3261,6 +3269,8 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT); if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) DMEMIT(" recalculate"); + if (ic->reset_recalculate_flag) + DMEMIT(" reset_recalculate"); if (ic->discard) DMEMIT(" allow_discards"); DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS); @@ -3914,7 +3924,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) unsigned extra_args; struct dm_arg_set as; static const struct dm_arg _args[] = { - {0, 17, "Invalid number of feature args"}, + {0, 18, "Invalid number of feature args"}, }; unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec; bool should_write_sb; @@ -4058,6 +4068,9 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } else if (!strcmp(opt_string, "recalculate")) { ic->recalculate_flag = true; + } else if (!strcmp(opt_string, "reset_recalculate")) { + ic->recalculate_flag = true; + ic->reset_recalculate_flag = true; } else if (!strcmp(opt_string, "allow_discards")) { ic->discard = true; } else if (!strcmp(opt_string, "fix_padding")) { @@ -4554,7 +4567,7 @@ static void dm_integrity_dtr(struct dm_target *ti) static struct target_type integrity_target = { .name = "integrity", - .version = {1, 7, 0}, + .version = {1, 8, 0}, .module = THIS_MODULE, .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY, .ctr = dm_integrity_ctr, From d6db294fd8194e05a82f368ad97f4efafbd04f2a Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Tue, 9 Mar 2021 14:56:59 +0000 Subject: [PATCH 13/26] dm space map disk: remove redundant calls to sm_disk_get_nr_free() Both sm_disk_new_block and sm_disk_commit are needlessly calling sm_disk_get_nr_free(). Looks like old queries used for some debugging. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/persistent-data/dm-space-map-disk.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c index bf4c5e2ccb6ff..61f56909e00be 100644 --- a/drivers/md/persistent-data/dm-space-map-disk.c +++ b/drivers/md/persistent-data/dm-space-map-disk.c @@ -187,13 +187,8 @@ static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b) static int sm_disk_commit(struct dm_space_map *sm) { int r; - dm_block_t nr_free; struct sm_disk *smd = container_of(sm, struct sm_disk, sm); - r = sm_disk_get_nr_free(sm, &nr_free); - if (r) - return r; - r = sm_ll_commit(&smd->ll); if (r) return r; @@ -202,10 +197,6 @@ static int sm_disk_commit(struct dm_space_map *sm) smd->begin = 0; smd->nr_allocated_this_transaction = 0; - r = sm_disk_get_nr_free(sm, &nr_free); - if (r) - return r; - return 0; } From f73e2e70ec48c9a9d45494c4866230a5059062ad Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Mon, 29 Mar 2021 16:34:03 +0100 Subject: [PATCH 14/26] dm btree spine: remove paranoid node_check call in node_prep_for_write() Remove this extra BUG_ON() that calls node_check() -- which avoids extra crc checking. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/persistent-data/dm-btree-spine.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c index dd2ff3ca9bc65..2061ab8655677 100644 --- a/drivers/md/persistent-data/dm-btree-spine.c +++ b/drivers/md/persistent-data/dm-btree-spine.c @@ -30,8 +30,6 @@ static void node_prepare_for_write(struct dm_block_validator *v, h->csum = cpu_to_le32(dm_bm_checksum(&h->flags, block_size - sizeof(__le32), BTREE_CSUM_XOR)); - - BUG_ON(node_check(v, b, 4096)); } static int node_check(struct dm_block_validator *v, From a88b2358f1da2c9f9fcc432f2e0a79617fea397c Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Mon, 29 Mar 2021 16:34:57 +0100 Subject: [PATCH 15/26] dm persistent data: packed struct should have an aligned() attribute too Otherwise most non-x86 architectures (e.g. riscv, arm) will resort to byte-by-byte access. Cc: stable@vger.kernel.org Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/persistent-data/dm-btree-internal.h | 4 ++-- drivers/md/persistent-data/dm-space-map-common.h | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h index d0c55916daa9a..b1788853a3552 100644 --- a/drivers/md/persistent-data/dm-btree-internal.h +++ b/drivers/md/persistent-data/dm-btree-internal.h @@ -34,12 +34,12 @@ struct node_header { __le32 max_entries; __le32 value_size; __le32 padding; -} __packed; +} __attribute__((packed, aligned(8))); struct btree_node { struct node_header header; __le64 keys[]; -} __packed; +} __attribute__((packed, aligned(8))); /* diff --git a/drivers/md/persistent-data/dm-space-map-common.h b/drivers/md/persistent-data/dm-space-map-common.h index 8de63ce39bdd5..87e17909ef521 100644 --- a/drivers/md/persistent-data/dm-space-map-common.h +++ b/drivers/md/persistent-data/dm-space-map-common.h @@ -33,7 +33,7 @@ struct disk_index_entry { __le64 blocknr; __le32 nr_free; __le32 none_free_before; -} __packed; +} __attribute__ ((packed, aligned(8))); #define MAX_METADATA_BITMAPS 255 @@ -43,7 +43,7 @@ struct disk_metadata_index { __le64 blocknr; struct disk_index_entry index[MAX_METADATA_BITMAPS]; -} __packed; +} __attribute__ ((packed, aligned(8))); struct ll_disk; @@ -86,7 +86,7 @@ struct disk_sm_root { __le64 nr_allocated; __le64 bitmap_root; __le64 ref_count_root; -} __packed; +} __attribute__ ((packed, aligned(8))); #define ENTRIES_PER_BYTE 4 @@ -94,7 +94,7 @@ struct disk_bitmap_header { __le32 csum; __le32 not_used; __le64 blocknr; -} __packed; +} __attribute__ ((packed, aligned(8))); enum allocation_event { SM_NONE, From 5208692e80a1f3c8ce2063a22b675dd5589d1d80 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Tue, 13 Apr 2021 09:11:53 +0100 Subject: [PATCH 16/26] dm space map common: fix division bug in sm_ll_find_free_block() This division bug meant the search for free metadata space could skip the final allocation bitmap's worth of entries. Fix affects DM thinp, cache and era targets. Cc: stable@vger.kernel.org Signed-off-by: Joe Thornber Tested-by: Ming-Hung Tsai Signed-off-by: Mike Snitzer --- drivers/md/persistent-data/dm-space-map-common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index d8b4125e338ca..a213bf11738fb 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -339,6 +339,8 @@ int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin, */ begin = do_div(index_begin, ll->entries_per_block); end = do_div(end, ll->entries_per_block); + if (end == 0) + end = ll->entries_per_block; for (i = index_begin; i < index_end; i++, begin = 0) { struct dm_block *blk; From 7a35693adcd38664b852ad10e3742782b3e87987 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 7 Apr 2021 14:25:22 +0100 Subject: [PATCH 17/26] dm: replace dm_vcalloc() Use kvcalloc or kvmalloc_array instead (depending whether zeroing is useful). Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Mike Snitzer --- drivers/md/dm-snap-persistent.c | 6 +++--- drivers/md/dm-snap.c | 5 +++-- drivers/md/dm-table.c | 30 ++++++------------------------ include/linux/device-mapper.h | 5 ----- 4 files changed, 12 insertions(+), 34 deletions(-) diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 8e329c3f3a78e..9ab4bf651ca93 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -596,7 +596,7 @@ static void persistent_dtr(struct dm_exception_store *store) free_area(ps); /* Allocated in persistent_read_metadata */ - vfree(ps->callbacks); + kvfree(ps->callbacks); kfree(ps); } @@ -621,8 +621,8 @@ static int persistent_read_metadata(struct dm_exception_store *store, */ ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) / sizeof(struct disk_exception); - ps->callbacks = dm_vcalloc(ps->exceptions_per_area, - sizeof(*ps->callbacks)); + ps->callbacks = kvcalloc(ps->exceptions_per_area, + sizeof(*ps->callbacks), GFP_KERNEL); if (!ps->callbacks) return -ENOMEM; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 11890db71f3fe..a2acb014c13ae 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -663,7 +663,8 @@ static int dm_exception_table_init(struct dm_exception_table *et, et->hash_shift = hash_shift; et->hash_mask = size - 1; - et->table = dm_vcalloc(size, sizeof(struct hlist_bl_head)); + et->table = kvmalloc_array(size, sizeof(struct hlist_bl_head), + GFP_KERNEL); if (!et->table) return -ENOMEM; @@ -689,7 +690,7 @@ static void dm_exception_table_exit(struct dm_exception_table *et, kmem_cache_free(mem, ex); } - vfree(et->table); + kvfree(et->table); } static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index e5f0f1703c5dc..ee47a332b4628 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -94,24 +94,6 @@ static int setup_btree_index(unsigned int l, struct dm_table *t) return 0; } -void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size) -{ - unsigned long size; - void *addr; - - /* - * Check that we're not going to overflow. - */ - if (nmemb > (ULONG_MAX / elem_size)) - return NULL; - - size = nmemb * elem_size; - addr = vzalloc(size); - - return addr; -} -EXPORT_SYMBOL(dm_vcalloc); - /* * highs, and targets are managed as dynamic arrays during a * table load. @@ -124,15 +106,15 @@ static int alloc_targets(struct dm_table *t, unsigned int num) /* * Allocate both the target array and offset array at once. */ - n_highs = (sector_t *) dm_vcalloc(num, sizeof(struct dm_target) + - sizeof(sector_t)); + n_highs = kvcalloc(num, sizeof(struct dm_target) + sizeof(sector_t), + GFP_KERNEL); if (!n_highs) return -ENOMEM; n_targets = (struct dm_target *) (n_highs + num); memset(n_highs, -1, sizeof(*n_highs) * num); - vfree(t->highs); + kvfree(t->highs); t->num_allocated = num; t->highs = n_highs; @@ -198,7 +180,7 @@ void dm_table_destroy(struct dm_table *t) /* free the indexes */ if (t->depth >= 2) - vfree(t->index[t->depth - 2]); + kvfree(t->index[t->depth - 2]); /* free the targets */ for (i = 0; i < t->num_targets; i++) { @@ -210,7 +192,7 @@ void dm_table_destroy(struct dm_table *t) dm_put_target_type(tgt->type); } - vfree(t->highs); + kvfree(t->highs); /* free the device list */ free_devices(&t->devices, t->md); @@ -1077,7 +1059,7 @@ static int setup_indexes(struct dm_table *t) total += t->counts[i]; } - indexes = (sector_t *) dm_vcalloc(total, (unsigned long) NODE_SIZE); + indexes = kvcalloc(total, NODE_SIZE, GFP_KERNEL); if (!indexes) return -ENOMEM; diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 5c641f930cafb..ff700fb6ce1db 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -574,11 +574,6 @@ struct dm_table *dm_swap_table(struct mapped_device *md, */ void dm_destroy_keyslot_manager(struct blk_keyslot_manager *ksm); -/* - * A wrapper around vmalloc. - */ -void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size); - /*----------------------------------------------------------------- * Macros. *---------------------------------------------------------------*/ From 17e9e134a8efabbbf689a0904eee92bb5a868172 Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Wed, 14 Apr 2021 09:43:44 +0800 Subject: [PATCH 18/26] dm integrity: fix missing goto in bitmap_flush_interval error handling Fixes: 468dfca38b1a ("dm integrity: add a bitmap mode") Cc: stable@vger.kernel.org Signed-off-by: Tian Tao Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index fed8a7ccd7f91..6977422454a49 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -4049,6 +4049,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) if (val >= (uint64_t)UINT_MAX * 1000 / HZ) { r = -EINVAL; ti->error = "Invalid bitmap_flush_interval argument"; + goto bad; } ic->bitmap_flush_interval = msecs_to_jiffies(val); } else if (!strncmp(opt_string, "internal_hash:", strlen("internal_hash:"))) { From 87d5742b73f24ca389cd832fa088170ca5d3d093 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 14 Apr 2021 14:32:56 +0800 Subject: [PATCH 19/26] dm clone metadata: remove unused function Fix the following clang warning: drivers/md/dm-clone-metadata.c:279:19: warning: unused function 'superblock_write_lock' [-Wunused-function]. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Mike Snitzer --- drivers/md/dm-clone-metadata.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/md/dm-clone-metadata.c b/drivers/md/dm-clone-metadata.c index 17712456fa634..c43d55672bce0 100644 --- a/drivers/md/dm-clone-metadata.c +++ b/drivers/md/dm-clone-metadata.c @@ -276,12 +276,6 @@ static inline int superblock_read_lock(struct dm_clone_metadata *cmd, return dm_bm_read_lock(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock); } -static inline int superblock_write_lock(struct dm_clone_metadata *cmd, - struct dm_block **sblock) -{ - return dm_bm_write_lock(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock); -} - static inline int superblock_write_lock_zero(struct dm_clone_metadata *cmd, struct dm_block **sblock) { From be962b2f077e96533ed3080127fcbe60b224638a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 20 Apr 2021 18:09:02 -0400 Subject: [PATCH 20/26] dm raid: fix fall-through warning in rs_check_takeover() for Clang In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning by explicitly adding a break statement instead of letting the code fall through to the next case. Link: https://github.com/KSPP/linux/issues/115 Signed-off-by: Gustavo A. R. Silva Signed-off-by: Mike Snitzer --- drivers/md/dm-raid.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index cab12b2251bac..59d2150e130da 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1853,6 +1853,7 @@ static int rs_check_takeover(struct raid_set *rs) ((mddev->layout == ALGORITHM_PARITY_N && mddev->new_layout == ALGORITHM_PARITY_N) || __within_range(mddev->new_layout, ALGORITHM_LEFT_ASYMMETRIC, ALGORITHM_RIGHT_SYMMETRIC))) return 0; + break; default: break; From f99a8e4373eeacb279bc9696937a55adbff7a28a Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Wed, 21 Apr 2021 23:32:36 +0200 Subject: [PATCH 21/26] dm raid: fix inconclusive reshape layout on fast raid4/5/6 table reload sequences If fast table reloads occur during an ongoing reshape of raid4/5/6 devices the target may race reading a superblock vs the the MD resync thread; causing an inconclusive reshape state to be read in its constructor. lvm2 test lvconvert-raid-reshape-stripes-load-reload.sh can cause BUG_ON() to trigger in md_run(), e.g.: "kernel BUG at drivers/md/raid5.c:7567!". Scenario triggering the bug: 1. the MD sync thread calls end_reshape() from raid5_sync_request() when done reshaping. However end_reshape() _only_ updates the reshape position to MaxSector keeping the changed layout configuration though (i.e. any delta disks, chunk sector or RAID algorithm changes). That inconclusive configuration is stored in the superblock. 2. dm-raid constructs a mapping, loading named inconsistent superblock as of step 1 before step 3 is able to finish resetting the reshape state completely, and calls md_run() which leads to mentioned bug in raid5.c. 3. the MD RAID personality's finish_reshape() is called; which resets the reshape information on chunk sectors, delta disks, etc. This explains why the bug is rarely seen on multi-core machines, as MD's finish_reshape() superblock update races with the dm-raid constructor's superblock load in step 2. Fix identifies inconclusive superblock content in the dm-raid constructor and resets it before calling md_run(), factoring out identifying checks into rs_is_layout_change() to share in existing rs_reshape_requested() and new rs_reset_inclonclusive_reshape(). Also enhance a comment and remove an empty line. Cc: stable@vger.kernel.org Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- drivers/md/dm-raid.c | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 59d2150e130da..67372e1a167d7 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1869,6 +1869,14 @@ static bool rs_takeover_requested(struct raid_set *rs) return rs->md.new_level != rs->md.level; } +/* True if layout is set to reshape. */ +static bool rs_is_layout_change(struct raid_set *rs, bool use_mddev) +{ + return (use_mddev ? rs->md.delta_disks : rs->delta_disks) || + rs->md.new_layout != rs->md.layout || + rs->md.new_chunk_sectors != rs->md.chunk_sectors; +} + /* True if @rs is requested to reshape by ctr */ static bool rs_reshape_requested(struct raid_set *rs) { @@ -1881,9 +1889,7 @@ static bool rs_reshape_requested(struct raid_set *rs) if (rs_is_raid0(rs)) return false; - change = mddev->new_layout != mddev->layout || - mddev->new_chunk_sectors != mddev->chunk_sectors || - rs->delta_disks; + change = rs_is_layout_change(rs, false); /* Historical case to support raid1 reshape without delta disks */ if (rs_is_raid1(rs)) { @@ -2818,7 +2824,7 @@ static sector_t _get_reshape_sectors(struct raid_set *rs) } /* - * + * Reshape: * - change raid layout * - change chunk size * - add disks @@ -2927,6 +2933,20 @@ static int rs_setup_reshape(struct raid_set *rs) return r; } +/* + * If the md resync thread has updated superblock with max reshape position + * at the end of a reshape but not (yet) reset the layout configuration + * changes -> reset the latter. + */ +static void rs_reset_inconclusive_reshape(struct raid_set *rs) +{ + if (!rs_is_reshaping(rs) && rs_is_layout_change(rs, true)) { + rs_set_cur(rs); + rs->md.delta_disks = 0; + rs->md.reshape_backwards = 0; + } +} + /* * Enable/disable discard support on RAID set depending on * RAID level and discard properties of underlying RAID members. @@ -3213,11 +3233,14 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (r) goto bad; + /* Catch any inconclusive reshape superblock content. */ + rs_reset_inconclusive_reshape(rs); + /* Start raid set read-only and assumed clean to change in raid_resume() */ rs->md.ro = 1; rs->md.in_sync = 1; - /* Keep array frozen */ + /* Keep array frozen until resume. */ set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery); /* Has to be held on running the array */ @@ -3231,7 +3254,6 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) } r = md_start(&rs->md); - if (r) { ti->error = "Failed to start raid array"; mddev_unlock(&rs->md); From a9c0fda4c08292399e08db0a4b70bc161cd247b9 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 27 Apr 2021 11:57:06 -0400 Subject: [PATCH 22/26] dm integrity: don't re-write metadata if discarding same blocks If we discard already discarded blocks we do not need to write discard pattern to the metadata, because it is already there. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 6977422454a49..3b54b7f8c9b9b 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1429,8 +1429,10 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se if (op == TAG_READ) { memcpy(tag, dp, to_copy); } else if (op == TAG_WRITE) { - memcpy(dp, tag, to_copy); - dm_bufio_mark_partial_buffer_dirty(b, *metadata_offset, *metadata_offset + to_copy); + if (memcmp(dp, tag, to_copy)) { + memcpy(dp, tag, to_copy); + dm_bufio_mark_partial_buffer_dirty(b, *metadata_offset, *metadata_offset + to_copy); + } } else { /* e.g.: op == TAG_CMP */ From b1a2b9332050c7ae32a22c2c74bc443e39f37b23 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 27 Apr 2021 11:57:43 -0400 Subject: [PATCH 23/26] dm integrity: increase RECALC_SECTORS to improve recalculate speed Increase RECALC_SECTORS because it improves recalculate speed slightly (from 390kiB/s to 410kiB/s). Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 3b54b7f8c9b9b..45658162e2c92 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -35,7 +35,7 @@ #define MIN_LOG2_INTERLEAVE_SECTORS 3 #define MAX_LOG2_INTERLEAVE_SECTORS 31 #define METADATA_WORKQUEUE_MAX_ACTIVE 16 -#define RECALC_SECTORS 8192 +#define RECALC_SECTORS 32768 #define RECALC_WRITE_SUPER 16 #define BITMAP_BLOCK_SIZE 4096 /* don't change it */ #define BITMAP_FLUSH_INTERVAL (10 * HZ) From 7a5b96b4784454ba258e83dc7469ddbacd3aaac3 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 28 Apr 2021 17:00:23 -0400 Subject: [PATCH 24/26] dm integrity: use discard support when recalculating If we have discard support we don't have to recalculate hash - we can just fill the metadata with the discard pattern. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 57 ++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 45658162e2c92..781942aeddd15 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -2689,26 +2689,30 @@ static void integrity_recalc(struct work_struct *w) if (unlikely(dm_integrity_failed(ic))) goto err; - io_req.bi_op = REQ_OP_READ; - io_req.bi_op_flags = 0; - io_req.mem.type = DM_IO_VMA; - io_req.mem.ptr.addr = ic->recalc_buffer; - io_req.notify.fn = NULL; - io_req.client = ic->io; - io_loc.bdev = ic->dev->bdev; - io_loc.sector = get_data_sector(ic, area, offset); - io_loc.count = n_sectors; + if (!ic->discard) { + io_req.bi_op = REQ_OP_READ; + io_req.bi_op_flags = 0; + io_req.mem.type = DM_IO_VMA; + io_req.mem.ptr.addr = ic->recalc_buffer; + io_req.notify.fn = NULL; + io_req.client = ic->io; + io_loc.bdev = ic->dev->bdev; + io_loc.sector = get_data_sector(ic, area, offset); + io_loc.count = n_sectors; - r = dm_io(&io_req, 1, &io_loc, NULL); - if (unlikely(r)) { - dm_integrity_io_error(ic, "reading data", r); - goto err; - } + r = dm_io(&io_req, 1, &io_loc, NULL); + if (unlikely(r)) { + dm_integrity_io_error(ic, "reading data", r); + goto err; + } - t = ic->recalc_tags; - for (i = 0; i < n_sectors; i += ic->sectors_per_block) { - integrity_sector_checksum(ic, logical_sector + i, ic->recalc_buffer + (i << SECTOR_SHIFT), t); - t += ic->tag_size; + t = ic->recalc_tags; + for (i = 0; i < n_sectors; i += ic->sectors_per_block) { + integrity_sector_checksum(ic, logical_sector + i, ic->recalc_buffer + (i << SECTOR_SHIFT), t); + t += ic->tag_size; + } + } else { + t = ic->recalc_tags + (n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size; } metadata_block = get_metadata_sector_and_offset(ic, area, offset, &metadata_offset); @@ -4364,11 +4368,13 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } INIT_WORK(&ic->recalc_work, integrity_recalc); - ic->recalc_buffer = vmalloc(RECALC_SECTORS << SECTOR_SHIFT); - if (!ic->recalc_buffer) { - ti->error = "Cannot allocate buffer for recalculating"; - r = -ENOMEM; - goto bad; + if (!ic->discard) { + ic->recalc_buffer = vmalloc(RECALC_SECTORS << SECTOR_SHIFT); + if (!ic->recalc_buffer) { + ti->error = "Cannot allocate buffer for recalculating"; + r = -ENOMEM; + goto bad; + } } ic->recalc_tags = kvmalloc_array(RECALC_SECTORS >> ic->sb->log2_sectors_per_block, ic->tag_size, GFP_KERNEL); @@ -4377,6 +4383,9 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) r = -ENOMEM; goto bad; } + if (ic->discard) + memset(ic->recalc_tags, DISCARD_FILLER, + (RECALC_SECTORS >> ic->sb->log2_sectors_per_block) * ic->tag_size); } else { if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { ti->error = "Recalculate can only be specified with internal_hash"; @@ -4570,7 +4579,7 @@ static void dm_integrity_dtr(struct dm_target *ti) static struct target_type integrity_target = { .name = "integrity", - .version = {1, 8, 0}, + .version = {1, 9, 0}, .module = THIS_MODULE, .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY, .ctr = dm_integrity_ctr, From 8e947c8f4a5620df77e43c9c75310dc510250166 Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Thu, 29 Apr 2021 23:37:00 +0200 Subject: [PATCH 25/26] dm rq: fix double free of blk_mq_tag_set in dev remove after table load fails When loading a device-mapper table for a request-based mapped device, and the allocation/initialization of the blk_mq_tag_set for the device fails, a following device remove will cause a double free. E.g. (dmesg): device-mapper: core: Cannot initialize queue for request-based dm-mq mapped device device-mapper: ioctl: unable to set up device queue for new table. Unable to handle kernel pointer dereference in virtual kernel address space Failing address: 0305e098835de000 TEID: 0305e098835de803 Fault in home space mode while using kernel ASCE. AS:000000025efe0007 R3:0000000000000024 Oops: 0038 ilc:3 [#1] SMP Modules linked in: ... lots of modules ... Supported: Yes, External CPU: 0 PID: 7348 Comm: multipathd Kdump: loaded Tainted: G W X 5.3.18-53-default #1 SLE15-SP3 Hardware name: IBM 8561 T01 7I2 (LPAR) Krnl PSW : 0704e00180000000 000000025e368eca (kfree+0x42/0x330) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3 Krnl GPRS: 000000000000004a 000000025efe5230 c1773200d779968d 0000000000000000 000000025e520270 000000025e8d1b40 0000000000000003 00000007aae10000 000000025e5202a2 0000000000000001 c1773200d779968d 0305e098835de640 00000007a8170000 000003ff80138650 000000025e5202a2 000003e00396faa8 Krnl Code: 000000025e368eb8: c4180041e100 lgrl %r1,25eba50b8 000000025e368ebe: ecba06b93a55 risbg %r11,%r10,6,185,58 #000000025e368ec4: e3b010000008 ag %r11,0(%r1) >000000025e368eca: e310b0080004 lg %r1,8(%r11) 000000025e368ed0: a7110001 tmll %r1,1 000000025e368ed4: a7740129 brc 7,25e369126 000000025e368ed8: e320b0080004 lg %r2,8(%r11) 000000025e368ede: b904001b lgr %r1,%r11 Call Trace: [<000000025e368eca>] kfree+0x42/0x330 [<000000025e5202a2>] blk_mq_free_tag_set+0x72/0xb8 [<000003ff801316a8>] dm_mq_cleanup_mapped_device+0x38/0x50 [dm_mod] [<000003ff80120082>] free_dev+0x52/0xd0 [dm_mod] [<000003ff801233f0>] __dm_destroy+0x150/0x1d0 [dm_mod] [<000003ff8012bb9a>] dev_remove+0x162/0x1c0 [dm_mod] [<000003ff8012a988>] ctl_ioctl+0x198/0x478 [dm_mod] [<000003ff8012ac8a>] dm_ctl_ioctl+0x22/0x38 [dm_mod] [<000000025e3b11ee>] ksys_ioctl+0xbe/0xe0 [<000000025e3b127a>] __s390x_sys_ioctl+0x2a/0x40 [<000000025e8c15ac>] system_call+0xd8/0x2c8 Last Breaking-Event-Address: [<000000025e52029c>] blk_mq_free_tag_set+0x6c/0xb8 Kernel panic - not syncing: Fatal exception: panic_on_oops When allocation/initialization of the blk_mq_tag_set fails in dm_mq_init_request_queue(), it is uninitialized/freed, but the pointer is not reset to NULL; so when dev_remove() later gets into dm_mq_cleanup_mapped_device() it sees the pointer and tries to uninitialize and free it again. Fix this by setting the pointer to NULL in dm_mq_init_request_queue() error-handling. Also set it to NULL in dm_mq_cleanup_mapped_device(). Cc: # 4.6+ Fixes: 1c357a1e86a4 ("dm: allocate blk_mq_tag_set rather than embed in mapped_device") Signed-off-by: Benjamin Block Signed-off-by: Mike Snitzer --- drivers/md/dm-rq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 13b4385f4d5a9..9c3bc3711b335 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -569,6 +569,7 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t) blk_mq_free_tag_set(md->tag_set); out_kfree_tag_set: kfree(md->tag_set); + md->tag_set = NULL; return err; } @@ -578,6 +579,7 @@ void dm_mq_cleanup_mapped_device(struct mapped_device *md) if (md->tag_set) { blk_mq_free_tag_set(md->tag_set); kfree(md->tag_set); + md->tag_set = NULL; } } From ca4a4e9a55beeb138bb06e3867f5e486da896d44 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 30 Apr 2021 14:38:37 -0400 Subject: [PATCH 26/26] dm raid: remove unnecessary discard limits for raid0 and raid10 Commit 29efc390b946 ("md/md0: optimize raid0 discard handling") and commit d30588b2731f ("md/raid10: improve raid10 discard request") remove MD raid0's and raid10's inability to properly handle large discards. So eliminate associated constraints from dm-raid's support. Depends-on: 29efc390b946 ("md/md0: optimize raid0 discard handling") Depends-on: d30588b2731f ("md/raid10: improve raid10 discard request") Reported-by: Matthew Ruffell Signed-off-by: Mike Snitzer --- drivers/md/dm-raid.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 67372e1a167d7..bf4a467fc73a4 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3750,15 +3750,6 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits) blk_limits_io_min(limits, chunk_size_bytes); blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs)); - - /* - * RAID0 and RAID10 personalities require bio splitting, - * RAID1/4/5/6 don't and process large discard bios properly. - */ - if (rs_is_raid0(rs) || rs_is_raid10(rs)) { - limits->discard_granularity = chunk_size_bytes; - limits->max_discard_sectors = rs->md.chunk_sectors; - } } static void raid_postsuspend(struct dm_target *ti)