Skip to content

Commit

Permalink
md: make ->congested robust against personality changes.
Browse files Browse the repository at this point in the history
There is currently no locking around calls to the 'congested'
bdi function.  If called at an awkward time while an array is
being converted from one level (or personality) to another, there
is a tiny chance of running code in an unreferenced module etc.

So add a 'congested' function to the md_personality operations
structure, and call it with appropriate locking from a central
'mddev_congested'.

When the array personality is changing the array will be 'suspended'
so no IO is processed.
If mddev_congested detects this, it simply reports that the
array is congested, which is a safe guess.
As mddev_suspend calls synchronize_rcu(), mddev_congested can
avoid races by included the whole call inside an rcu_read_lock()
region.
This require that the congested functions for all subordinate devices
can be run under rcu_lock.  Fortunately this is the case.

Signed-off-by: NeilBrown <neilb@suse.de>
  • Loading branch information
NeilBrown committed Feb 3, 2015
1 parent 85572d7 commit 5c675f8
Show file tree
Hide file tree
Showing 12 changed files with 38 additions and 77 deletions.
8 changes: 1 addition & 7 deletions drivers/md/dm-raid.c
Original file line number Diff line number Diff line change
Expand Up @@ -746,13 +746,7 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits)
{
struct raid_set *rs = container_of(cb, struct raid_set, callbacks);

if (rs->raid_type->level == 1)
return md_raid1_congested(&rs->md, bits);

if (rs->raid_type->level == 10)
return md_raid10_congested(&rs->md, bits);

return md_raid5_congested(&rs->md, bits);
return mddev_congested(&rs->md, bits);
}

/*
Expand Down
9 changes: 2 additions & 7 deletions drivers/md/linear.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,11 @@ static int linear_mergeable_bvec(struct request_queue *q,
return maxsectors << 9;
}

static int linear_congested(void *data, int bits)
static int linear_congested(struct mddev *mddev, int bits)
{
struct mddev *mddev = data;
struct linear_conf *conf;
int i, ret = 0;

if (mddev_congested(mddev, bits))
return 1;

rcu_read_lock();
conf = rcu_dereference(mddev->private);

Expand Down Expand Up @@ -218,8 +214,6 @@ static int linear_run (struct mddev *mddev)
md_set_array_sectors(mddev, linear_size(mddev, 0, 0));

blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
mddev->queue->backing_dev_info.congested_fn = linear_congested;
mddev->queue->backing_dev_info.congested_data = mddev;

ret = md_integrity_register(mddev);
if (ret) {
Expand Down Expand Up @@ -366,6 +360,7 @@ static struct md_personality linear_personality =
.status = linear_status,
.hot_add_disk = linear_add,
.size = linear_size,
.congested = linear_congested,
};

static int __init linear_init (void)
Expand Down
22 changes: 20 additions & 2 deletions drivers/md/md.c
Original file line number Diff line number Diff line change
Expand Up @@ -321,9 +321,23 @@ EXPORT_SYMBOL_GPL(mddev_resume);

int mddev_congested(struct mddev *mddev, int bits)
{
return mddev->suspended;
struct md_personality *pers = mddev->pers;
int ret = 0;

rcu_read_lock();
if (mddev->suspended)
ret = 1;
else if (pers && pers->congested)
ret = pers->congested(mddev, bits);
rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(mddev_congested);
static int md_congested(void *data, int bits)
{
struct mddev *mddev = data;
return mddev_congested(mddev, bits);
}
EXPORT_SYMBOL(mddev_congested);

/*
* Generic flush handling for md
Expand Down Expand Up @@ -4908,6 +4922,10 @@ int md_run(struct mddev *mddev)
bitmap_destroy(mddev);
return err;
}
if (mddev->queue) {
mddev->queue->backing_dev_info.congested_data = mddev;
mddev->queue->backing_dev_info.congested_fn = md_congested;
}
if (mddev->pers->sync_request) {
if (mddev->kobj.sd &&
sysfs_create_group(&mddev->kobj, &md_redundancy_group))
Expand Down
3 changes: 3 additions & 0 deletions drivers/md/md.h
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,9 @@ struct md_personality
* array.
*/
void *(*takeover) (struct mddev *mddev);
/* congested implements bdi.congested_fn().
* Will not be called while array is 'suspended' */
int (*congested)(struct mddev *mddev, int bits);
};

struct md_sysfs_entry {
Expand Down
10 changes: 2 additions & 8 deletions drivers/md/multipath.c
Original file line number Diff line number Diff line change
Expand Up @@ -153,15 +153,11 @@ static void multipath_status (struct seq_file *seq, struct mddev *mddev)
seq_printf (seq, "]");
}

static int multipath_congested(void *data, int bits)
static int multipath_congested(struct mddev *mddev, int bits)
{
struct mddev *mddev = data;
struct mpconf *conf = mddev->private;
int i, ret = 0;

if (mddev_congested(mddev, bits))
return 1;

rcu_read_lock();
for (i = 0; i < mddev->raid_disks ; i++) {
struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev);
Expand Down Expand Up @@ -489,9 +485,6 @@ static int multipath_run (struct mddev *mddev)
*/
md_set_array_sectors(mddev, multipath_size(mddev, 0, 0));

mddev->queue->backing_dev_info.congested_fn = multipath_congested;
mddev->queue->backing_dev_info.congested_data = mddev;

if (md_integrity_register(mddev))
goto out_free_conf;

Expand Down Expand Up @@ -533,6 +526,7 @@ static struct md_personality multipath_personality =
.hot_add_disk = multipath_add_disk,
.hot_remove_disk= multipath_remove_disk,
.size = multipath_size,
.congested = multipath_congested,
};

static int __init multipath_init (void)
Expand Down
9 changes: 2 additions & 7 deletions drivers/md/raid0.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,13 @@
#include "raid0.h"
#include "raid5.h"

static int raid0_congested(void *data, int bits)
static int raid0_congested(struct mddev *mddev, int bits)
{
struct mddev *mddev = data;
struct r0conf *conf = mddev->private;
struct md_rdev **devlist = conf->devlist;
int raid_disks = conf->strip_zone[0].nb_dev;
int i, ret = 0;

if (mddev_congested(mddev, bits))
return 1;

for (i = 0; i < raid_disks && !ret ; i++) {
struct request_queue *q = bdev_get_queue(devlist[i]->bdev);

Expand Down Expand Up @@ -263,8 +259,6 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
mdname(mddev),
(unsigned long long)smallest->sectors);
}
mddev->queue->backing_dev_info.congested_fn = raid0_congested;
mddev->queue->backing_dev_info.congested_data = mddev;

/*
* now since we have the hard sector sizes, we can make sure
Expand Down Expand Up @@ -729,6 +723,7 @@ static struct md_personality raid0_personality=
.size = raid0_size,
.takeover = raid0_takeover,
.quiesce = raid0_quiesce,
.congested = raid0_congested,
};

static int __init raid0_init (void)
Expand Down
14 changes: 2 additions & 12 deletions drivers/md/raid1.c
Original file line number Diff line number Diff line change
Expand Up @@ -734,7 +734,7 @@ static int raid1_mergeable_bvec(struct request_queue *q,

}

int md_raid1_congested(struct mddev *mddev, int bits)
static int raid1_congested(struct mddev *mddev, int bits)
{
struct r1conf *conf = mddev->private;
int i, ret = 0;
Expand Down Expand Up @@ -763,15 +763,6 @@ int md_raid1_congested(struct mddev *mddev, int bits)
rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(md_raid1_congested);

static int raid1_congested(void *data, int bits)
{
struct mddev *mddev = data;

return mddev_congested(mddev, bits) ||
md_raid1_congested(mddev, bits);
}

static void flush_pending_writes(struct r1conf *conf)
{
Expand Down Expand Up @@ -2955,8 +2946,6 @@ static int run(struct mddev *mddev)
md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));

if (mddev->queue) {
mddev->queue->backing_dev_info.congested_fn = raid1_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
blk_queue_merge_bvec(mddev->queue, raid1_mergeable_bvec);

if (discard_supported)
Expand Down Expand Up @@ -3193,6 +3182,7 @@ static struct md_personality raid1_personality =
.check_reshape = raid1_reshape,
.quiesce = raid1_quiesce,
.takeover = raid1_takeover,
.congested = raid1_congested,
};

static int __init raid_init(void)
Expand Down
3 changes: 0 additions & 3 deletions drivers/md/raid1.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,4 @@ struct r1bio {
*/
#define R1BIO_MadeGood 7
#define R1BIO_WriteError 8

extern int md_raid1_congested(struct mddev *mddev, int bits);

#endif
14 changes: 2 additions & 12 deletions drivers/md/raid10.c
Original file line number Diff line number Diff line change
Expand Up @@ -910,7 +910,7 @@ static struct md_rdev *read_balance(struct r10conf *conf,
return rdev;
}

int md_raid10_congested(struct mddev *mddev, int bits)
static int raid10_congested(struct mddev *mddev, int bits)
{
struct r10conf *conf = mddev->private;
int i, ret = 0;
Expand All @@ -934,15 +934,6 @@ int md_raid10_congested(struct mddev *mddev, int bits)
rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(md_raid10_congested);

static int raid10_congested(void *data, int bits)
{
struct mddev *mddev = data;

return mddev_congested(mddev, bits) ||
md_raid10_congested(mddev, bits);
}

static void flush_pending_writes(struct r10conf *conf)
{
Expand Down Expand Up @@ -3757,8 +3748,6 @@ static int run(struct mddev *mddev)
if (mddev->queue) {
int stripe = conf->geo.raid_disks *
((mddev->chunk_sectors << 9) / PAGE_SIZE);
mddev->queue->backing_dev_info.congested_fn = raid10_congested;
mddev->queue->backing_dev_info.congested_data = mddev;

/* Calculate max read-ahead size.
* We need to readahead at least twice a whole stripe....
Expand Down Expand Up @@ -4727,6 +4716,7 @@ static struct md_personality raid10_personality =
.check_reshape = raid10_check_reshape,
.start_reshape = raid10_start_reshape,
.finish_reshape = raid10_finish_reshape,
.congested = raid10_congested,
};

static int __init raid_init(void)
Expand Down
3 changes: 0 additions & 3 deletions drivers/md/raid10.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,4 @@ enum r10bio_state {
*/
R10BIO_Previous,
};

extern int md_raid10_congested(struct mddev *mddev, int bits);

#endif
19 changes: 4 additions & 15 deletions drivers/md/raid5.c
Original file line number Diff line number Diff line change
Expand Up @@ -4149,7 +4149,7 @@ static void activate_bit_delay(struct r5conf *conf,
}
}

int md_raid5_congested(struct mddev *mddev, int bits)
static int raid5_congested(struct mddev *mddev, int bits)
{
struct r5conf *conf = mddev->private;

Expand All @@ -4166,15 +4166,6 @@ int md_raid5_congested(struct mddev *mddev, int bits)

return 0;
}
EXPORT_SYMBOL_GPL(md_raid5_congested);

static int raid5_congested(void *data, int bits)
{
struct mddev *mddev = data;

return mddev_congested(mddev, bits) ||
md_raid5_congested(mddev, bits);
}

/* We want read requests to align with chunks where possible,
* but write requests don't need to.
Expand Down Expand Up @@ -6248,9 +6239,6 @@ static int run(struct mddev *mddev)

blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);

mddev->queue->backing_dev_info.congested_data = mddev;
mddev->queue->backing_dev_info.congested_fn = raid5_congested;

chunk_size = mddev->chunk_sectors << 9;
blk_queue_io_min(mddev->queue, chunk_size);
blk_queue_io_opt(mddev->queue, chunk_size *
Expand Down Expand Up @@ -6333,8 +6321,6 @@ static int stop(struct mddev *mddev)
struct r5conf *conf = mddev->private;

md_unregister_thread(&mddev->thread);
if (mddev->queue)
mddev->queue->backing_dev_info.congested_fn = NULL;
free_conf(conf);
mddev->private = NULL;
mddev->to_remove = &raid5_attrs_group;
Expand Down Expand Up @@ -7126,6 +7112,7 @@ static struct md_personality raid6_personality =
.finish_reshape = raid5_finish_reshape,
.quiesce = raid5_quiesce,
.takeover = raid6_takeover,
.congested = raid5_congested,
};
static struct md_personality raid5_personality =
{
Expand All @@ -7148,6 +7135,7 @@ static struct md_personality raid5_personality =
.finish_reshape = raid5_finish_reshape,
.quiesce = raid5_quiesce,
.takeover = raid5_takeover,
.congested = raid5_congested,
};

static struct md_personality raid4_personality =
Expand All @@ -7171,6 +7159,7 @@ static struct md_personality raid4_personality =
.finish_reshape = raid5_finish_reshape,
.quiesce = raid5_quiesce,
.takeover = raid4_takeover,
.congested = raid5_congested,
};

static int __init raid5_init(void)
Expand Down
1 change: 0 additions & 1 deletion drivers/md/raid5.h
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,6 @@ static inline int algorithm_is_DDF(int layout)
return layout >= 8 && layout <= 10;
}

extern int md_raid5_congested(struct mddev *mddev, int bits);
extern void md_raid5_kick_device(struct r5conf *conf);
extern int raid5_set_cache_size(struct mddev *mddev, int size);
#endif

0 comments on commit 5c675f8

Please sign in to comment.