Skip to content

Commit

Permalink
Merge branch 'bcache' (bcache fixes from Kent Overstreet)
Browse files Browse the repository at this point in the history
Merge bcache fixes from Kent Overstreet:
 "There's fixes for _three_ different data corruption bugs, all of which
  were found by users hitting them in the wild.

  The first one isn't bcache specific - in 3.11 bcache was switched to
  the bio_copy_data in fs/bio.c, and that's when the bug in that code
  was discovered, but it's also used by raid1 and pktcdvd.  (That was my
  code too, so the bug's doubly embarassing given that it was or
  should've been just a cut and paste from bcache code.  Dunno what
  happened there).

  Most of these (all the non data corruption bugs, actually) were ready
  before the merge window and have been sitting in Jens' tree, but I
  don't know what's been up with him lately..."

* emailed patches from Kent Overstreet <kmo@daterainc.com>:
  bcache: Fix flushes in writeback mode
  bcache: Fix for handling overlapping extents when reading in a btree node
  bcache: Fix a shrinker deadlock
  bcache: Fix a dumb CPU spinning bug in writeback
  bcache: Fix a flush/fua performance bug
  bcache: Fix a writeback performance regression
  bcache: Correct printf()-style format length modifier
  bcache: Fix for when no journal entries are found
  bcache: Strip endline when writing the label through sysfs
  bcache: Fix a dumb journal discard bug
  block: Fix bio_copy_data()
  • Loading branch information
Linus Torvalds committed Sep 24, 2013
2 parents db6aaf4 + c0f04d8 commit e288e93
Show file tree
Hide file tree
Showing 10 changed files with 110 additions and 66 deletions.
7 changes: 3 additions & 4 deletions drivers/md/bcache/bcache.h
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,7 @@ struct cached_dev {
*/
atomic_t has_dirty;

struct ratelimit writeback_rate;
struct bch_ratelimit writeback_rate;
struct delayed_work writeback_rate_update;

/*
Expand All @@ -507,10 +507,9 @@ struct cached_dev {
*/
sector_t last_read;

/* Number of writeback bios in flight */
atomic_t in_flight;
/* Limit number of writeback bios in flight */
struct semaphore in_flight;
struct closure_with_timer writeback;
struct closure_waitlist writeback_wait;

struct keybuf writeback_keys;

Expand Down
39 changes: 28 additions & 11 deletions drivers/md/bcache/bset.c
Original file line number Diff line number Diff line change
Expand Up @@ -926,28 +926,45 @@ struct bkey *bch_next_recurse_key(struct btree *b, struct bkey *search)

/* Mergesort */

static void sort_key_next(struct btree_iter *iter,
struct btree_iter_set *i)
{
i->k = bkey_next(i->k);

if (i->k == i->end)
*i = iter->data[--iter->used];
}

static void btree_sort_fixup(struct btree_iter *iter)
{
while (iter->used > 1) {
struct btree_iter_set *top = iter->data, *i = top + 1;
struct bkey *k;

if (iter->used > 2 &&
btree_iter_cmp(i[0], i[1]))
i++;

for (k = i->k;
k != i->end && bkey_cmp(top->k, &START_KEY(k)) > 0;
k = bkey_next(k))
if (top->k > i->k)
__bch_cut_front(top->k, k);
else if (KEY_SIZE(k))
bch_cut_back(&START_KEY(k), top->k);

if (top->k < i->k || k == i->k)
if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0)
break;

heap_sift(iter, i - top, btree_iter_cmp);
if (!KEY_SIZE(i->k)) {
sort_key_next(iter, i);
heap_sift(iter, i - top, btree_iter_cmp);
continue;
}

if (top->k > i->k) {
if (bkey_cmp(top->k, i->k) >= 0)
sort_key_next(iter, i);
else
bch_cut_front(top->k, i->k);

heap_sift(iter, i - top, btree_iter_cmp);
} else {
/* can't happen because of comparison func */
BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
bch_cut_back(&START_KEY(i->k), top->k);
}
}
}

Expand Down
4 changes: 2 additions & 2 deletions drivers/md/bcache/btree.c
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ void bch_btree_node_read(struct btree *b)

return;
err:
bch_cache_set_error(b->c, "io error reading bucket %lu",
bch_cache_set_error(b->c, "io error reading bucket %zu",
PTR_BUCKET_NR(b->c, &b->key, 0));
}

Expand Down Expand Up @@ -612,7 +612,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
return SHRINK_STOP;

/* Return -1 if we can't do anything right now */
if (sc->gfp_mask & __GFP_WAIT)
if (sc->gfp_mask & __GFP_IO)
mutex_lock(&c->bucket_lock);
else if (!mutex_trylock(&c->bucket_lock))
return -1;
Expand Down
33 changes: 20 additions & 13 deletions drivers/md/bcache/journal.c
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,8 @@ int bch_journal_read(struct cache_set *c, struct list_head *list,
bitmap_zero(bitmap, SB_JOURNAL_BUCKETS);
pr_debug("%u journal buckets", ca->sb.njournal_buckets);

/* Read journal buckets ordered by golden ratio hash to quickly
/*
* Read journal buckets ordered by golden ratio hash to quickly
* find a sequence of buckets with valid journal entries
*/
for (i = 0; i < ca->sb.njournal_buckets; i++) {
Expand All @@ -166,18 +167,20 @@ int bch_journal_read(struct cache_set *c, struct list_head *list,
goto bsearch;
}

/* If that fails, check all the buckets we haven't checked
/*
* If that fails, check all the buckets we haven't checked
* already
*/
pr_debug("falling back to linear search");

for (l = 0; l < ca->sb.njournal_buckets; l++) {
if (test_bit(l, bitmap))
continue;

for (l = find_first_zero_bit(bitmap, ca->sb.njournal_buckets);
l < ca->sb.njournal_buckets;
l = find_next_zero_bit(bitmap, ca->sb.njournal_buckets, l + 1))
if (read_bucket(l))
goto bsearch;
}

if (list_empty(list))
continue;
bsearch:
/* Binary search */
m = r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1);
Expand All @@ -197,10 +200,12 @@ int bch_journal_read(struct cache_set *c, struct list_head *list,
r = m;
}

/* Read buckets in reverse order until we stop finding more
/*
* Read buckets in reverse order until we stop finding more
* journal entries
*/
pr_debug("finishing up");
pr_debug("finishing up: m %u njournal_buckets %u",
m, ca->sb.njournal_buckets);
l = m;

while (1) {
Expand Down Expand Up @@ -228,9 +233,10 @@ int bch_journal_read(struct cache_set *c, struct list_head *list,
}
}

c->journal.seq = list_entry(list->prev,
struct journal_replay,
list)->j.seq;
if (!list_empty(list))
c->journal.seq = list_entry(list->prev,
struct journal_replay,
list)->j.seq;

return 0;
#undef read_bucket
Expand Down Expand Up @@ -428,7 +434,7 @@ static void do_journal_discard(struct cache *ca)
return;
}

switch (atomic_read(&ja->discard_in_flight) == DISCARD_IN_FLIGHT) {
switch (atomic_read(&ja->discard_in_flight)) {
case DISCARD_IN_FLIGHT:
return;

Expand Down Expand Up @@ -689,6 +695,7 @@ void bch_journal_meta(struct cache_set *c, struct closure *cl)
if (cl)
BUG_ON(!closure_wait(&w->wait, cl));

closure_flush(&c->journal.io);
__journal_try_write(c, true);
}
}
Expand Down
15 changes: 9 additions & 6 deletions drivers/md/bcache/request.c
Original file line number Diff line number Diff line change
Expand Up @@ -997,14 +997,17 @@ static void request_write(struct cached_dev *dc, struct search *s)
} else {
bch_writeback_add(dc);

if (s->op.flush_journal) {
if (bio->bi_rw & REQ_FLUSH) {
/* Also need to send a flush to the backing device */
s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO,
dc->disk.bio_split);
struct bio *flush = bio_alloc_bioset(0, GFP_NOIO,
dc->disk.bio_split);

bio->bi_size = 0;
bio->bi_vcnt = 0;
closure_bio_submit(bio, cl, s->d);
flush->bi_rw = WRITE_FLUSH;
flush->bi_bdev = bio->bi_bdev;
flush->bi_end_io = request_endio;
flush->bi_private = cl;

closure_bio_submit(flush, cl, s->d);
} else {
s->op.cache_bio = bio;
}
Expand Down
9 changes: 7 additions & 2 deletions drivers/md/bcache/sysfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,13 @@ STORE(__cached_dev)
}

if (attr == &sysfs_label) {
/* note: endlines are preserved */
memcpy(dc->sb.label, buf, SB_LABEL_SIZE);
if (size > SB_LABEL_SIZE)
return -EINVAL;
memcpy(dc->sb.label, buf, size);
if (size < SB_LABEL_SIZE)
dc->sb.label[size] = '\0';
if (size && dc->sb.label[size - 1] == '\n')
dc->sb.label[size - 1] = '\0';
bch_write_bdev_super(dc, NULL);
if (dc->disk.c) {
memcpy(dc->disk.c->uuids[dc->disk.id].label,
Expand Down
11 changes: 10 additions & 1 deletion drivers/md/bcache/util.c
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,16 @@ void bch_time_stats_update(struct time_stats *stats, uint64_t start_time)
stats->last = now ?: 1;
}

unsigned bch_next_delay(struct ratelimit *d, uint64_t done)
/**
* bch_next_delay() - increment @d by the amount of work done, and return how
* long to delay until the next time to do some work.
*
* @d - the struct bch_ratelimit to update
* @done - the amount of work done, in arbitrary units
*
* Returns the amount of time to delay by, in jiffies
*/
uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
{
uint64_t now = local_clock();

Expand Down
12 changes: 9 additions & 3 deletions drivers/md/bcache/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -450,17 +450,23 @@ read_attribute(name ## _last_ ## frequency_units)
(ewma) >> factor; \
})

struct ratelimit {
struct bch_ratelimit {
/* Next time we want to do some work, in nanoseconds */
uint64_t next;

/*
* Rate at which we want to do work, in units per nanosecond
* The units here correspond to the units passed to bch_next_delay()
*/
unsigned rate;
};

static inline void ratelimit_reset(struct ratelimit *d)
static inline void bch_ratelimit_reset(struct bch_ratelimit *d)
{
d->next = local_clock();
}

unsigned bch_next_delay(struct ratelimit *d, uint64_t done);
uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done);

#define __DIV_SAFE(n, d, zero) \
({ \
Expand Down
42 changes: 20 additions & 22 deletions drivers/md/bcache/writeback.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,15 @@ static void update_writeback_rate(struct work_struct *work)

static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
{
uint64_t ret;

if (atomic_read(&dc->disk.detaching) ||
!dc->writeback_percent)
return 0;

return bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL);
ret = bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL);

return min_t(uint64_t, ret, HZ);
}

/* Background writeback */
Expand Down Expand Up @@ -208,7 +212,7 @@ static void refill_dirty(struct closure *cl)

up_write(&dc->writeback_lock);

ratelimit_reset(&dc->writeback_rate);
bch_ratelimit_reset(&dc->writeback_rate);

/* Punt to workqueue only so we don't recurse and blow the stack */
continue_at(cl, read_dirty, dirty_wq);
Expand Down Expand Up @@ -318,9 +322,7 @@ static void write_dirty_finish(struct closure *cl)
}

bch_keybuf_del(&dc->writeback_keys, w);
atomic_dec_bug(&dc->in_flight);

closure_wake_up(&dc->writeback_wait);
up(&dc->in_flight);

closure_return_with_destructor(cl, dirty_io_destructor);
}
Expand Down Expand Up @@ -349,7 +351,7 @@ static void write_dirty(struct closure *cl)

closure_bio_submit(&io->bio, cl, &io->dc->disk);

continue_at(cl, write_dirty_finish, dirty_wq);
continue_at(cl, write_dirty_finish, system_wq);
}

static void read_dirty_endio(struct bio *bio, int error)
Expand All @@ -369,7 +371,7 @@ static void read_dirty_submit(struct closure *cl)

closure_bio_submit(&io->bio, cl, &io->dc->disk);

continue_at(cl, write_dirty, dirty_wq);
continue_at(cl, write_dirty, system_wq);
}

static void read_dirty(struct closure *cl)
Expand All @@ -394,12 +396,8 @@ static void read_dirty(struct closure *cl)

if (delay > 0 &&
(KEY_START(&w->key) != dc->last_read ||
jiffies_to_msecs(delay) > 50)) {
w->private = NULL;

closure_delay(&dc->writeback, delay);
continue_at(cl, read_dirty, dirty_wq);
}
jiffies_to_msecs(delay) > 50))
delay = schedule_timeout_uninterruptible(delay);

dc->last_read = KEY_OFFSET(&w->key);

Expand All @@ -424,15 +422,10 @@ static void read_dirty(struct closure *cl)

trace_bcache_writeback(&w->key);

closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl);
down(&dc->in_flight);
closure_call(&io->cl, read_dirty_submit, NULL, cl);

delay = writeback_delay(dc, KEY_SIZE(&w->key));

atomic_inc(&dc->in_flight);

if (!closure_wait_event(&dc->writeback_wait, cl,
atomic_read(&dc->in_flight) < 64))
continue_at(cl, read_dirty, dirty_wq);
}

if (0) {
Expand All @@ -442,7 +435,11 @@ static void read_dirty(struct closure *cl)
bch_keybuf_del(&dc->writeback_keys, w);
}

refill_dirty(cl);
/*
* Wait for outstanding writeback IOs to finish (and keybuf slots to be
* freed) before refilling again
*/
continue_at(cl, refill_dirty, dirty_wq);
}

/* Init */
Expand Down Expand Up @@ -484,6 +481,7 @@ void bch_sectors_dirty_init(struct cached_dev *dc)

void bch_cached_dev_writeback_init(struct cached_dev *dc)
{
sema_init(&dc->in_flight, 64);
closure_init_unlocked(&dc->writeback);
init_rwsem(&dc->writeback_lock);

Expand Down Expand Up @@ -513,7 +511,7 @@ void bch_writeback_exit(void)

int __init bch_writeback_init(void)
{
dirty_wq = create_singlethread_workqueue("bcache_writeback");
dirty_wq = create_workqueue("bcache_writeback");
if (!dirty_wq)
return -ENOMEM;

Expand Down
4 changes: 2 additions & 2 deletions fs/bio.c
Original file line number Diff line number Diff line change
Expand Up @@ -917,8 +917,8 @@ void bio_copy_data(struct bio *dst, struct bio *src)
src_p = kmap_atomic(src_bv->bv_page);
dst_p = kmap_atomic(dst_bv->bv_page);

memcpy(dst_p + dst_bv->bv_offset,
src_p + src_bv->bv_offset,
memcpy(dst_p + dst_offset,
src_p + src_offset,
bytes);

kunmap_atomic(dst_p);
Expand Down

0 comments on commit e288e93

Please sign in to comment.