Skip to content

Commit

Permalink
dm cache: improve discard support
Browse files Browse the repository at this point in the history
Safely allow the discard blocksize to be larger than the cache blocksize
by using the bio prison's range locking support.  This also improves
discard performance considerly because larger discards are issued to the
dm-cache device.  The discard blocksize was always intended to be
greater than the cache blocksize.  But until now it wasn't implemented
safely.

Also, by safely restoring the ability to have discard blocksize larger
than cache blocksize we're able to significantly reduce the memory used
for the cache's discard bitset.  Before, with a small discard blocksize,
the discard bitset could get quite large because its size is a function
of the discard blocksize and the origin device's size.  For example,
previously, using a 32KB cache blocksize with a 40TB origin resulted in
1280MB of incore memory use for the discard bitset!  Now, the discard
blocksize is scaled up accordingly to ensure the discard bitset is
capped at 2**14 bits, or 16KB.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
  • Loading branch information
Joe Thornber authored and Mike Snitzer committed Nov 10, 2014
1 parent 08b1845 commit 7ae34e7
Showing 1 changed file with 121 additions and 45 deletions.
166 changes: 121 additions & 45 deletions drivers/md/dm-cache-target.c
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,7 @@ struct dm_cache_migration {
dm_cblock_t cblock;

bool err:1;
bool discard:1;
bool writeback:1;
bool demote:1;
bool promote:1;
Expand Down Expand Up @@ -433,12 +434,12 @@ static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cel

/*----------------------------------------------------------------*/

static void build_key(dm_oblock_t oblock, struct dm_cell_key *key)
static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key *key)
{
key->virtual = 0;
key->dev = 0;
key->block_begin = from_oblock(oblock);
key->block_end = key->block_begin + 1ULL;
key->block_begin = from_oblock(begin);
key->block_end = from_oblock(end);
}

/*
Expand All @@ -448,22 +449,32 @@ static void build_key(dm_oblock_t oblock, struct dm_cell_key *key)
*/
typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell);

static int bio_detain(struct cache *cache, dm_oblock_t oblock,
struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
cell_free_fn free_fn, void *free_context,
struct dm_bio_prison_cell **cell_result)
static int bio_detain_range(struct cache *cache, dm_oblock_t oblock_begin, dm_oblock_t oblock_end,
struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
cell_free_fn free_fn, void *free_context,
struct dm_bio_prison_cell **cell_result)
{
int r;
struct dm_cell_key key;

build_key(oblock, &key);
build_key(oblock_begin, oblock_end, &key);
r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result);
if (r)
free_fn(free_context, cell_prealloc);

return r;
}

static int bio_detain(struct cache *cache, dm_oblock_t oblock,
struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
cell_free_fn free_fn, void *free_context,
struct dm_bio_prison_cell **cell_result)
{
dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL);
return bio_detain_range(cache, oblock, end, bio,
cell_prealloc, free_fn, free_context, cell_result);
}

static int get_cell(struct cache *cache,
dm_oblock_t oblock,
struct prealloc *structs,
Expand All @@ -475,7 +486,7 @@ static int get_cell(struct cache *cache,

cell_prealloc = prealloc_get_cell(structs);

build_key(oblock, &key);
build_key(oblock, to_oblock(from_oblock(oblock) + 1ULL), &key);
r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result);
if (r)
prealloc_put_cell(structs, cell_prealloc);
Expand Down Expand Up @@ -525,25 +536,34 @@ static dm_block_t block_div(dm_block_t b, uint32_t n)
return b;
}

static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
static dm_block_t oblocks_per_dblock(struct cache *cache)
{
uint32_t discard_blocks = cache->discard_block_size;
dm_block_t b = from_oblock(oblock);
dm_block_t oblocks = cache->discard_block_size;

if (!block_size_is_power_of_two(cache))
discard_blocks = discard_blocks / cache->sectors_per_block;
if (block_size_is_power_of_two(cache))
oblocks >>= cache->sectors_per_block_shift;
else
discard_blocks >>= cache->sectors_per_block_shift;
oblocks = block_div(oblocks, cache->sectors_per_block);

b = block_div(b, discard_blocks);
return oblocks;
}

static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
{
return to_dblock(block_div(from_oblock(oblock),
oblocks_per_dblock(cache)));
}

return to_dblock(b);
static dm_oblock_t dblock_to_oblock(struct cache *cache, dm_dblock_t dblock)
{
return to_oblock(from_dblock(dblock) * oblocks_per_dblock(cache));
}

static void set_discard(struct cache *cache, dm_dblock_t b)
{
unsigned long flags;

BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks));
atomic_inc(&cache->stats.discard_count);

spin_lock_irqsave(&cache->lock, flags);
Expand Down Expand Up @@ -995,7 +1015,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
wake_worker(cache);
}

static void issue_copy_real(struct dm_cache_migration *mg)
static void issue_copy(struct dm_cache_migration *mg)
{
int r;
struct dm_io_region o_region, c_region;
Expand Down Expand Up @@ -1074,11 +1094,46 @@ static void avoid_copy(struct dm_cache_migration *mg)
migration_success_pre_commit(mg);
}

static void issue_copy(struct dm_cache_migration *mg)
static void calc_discard_block_range(struct cache *cache, struct bio *bio,
dm_dblock_t *b, dm_dblock_t *e)
{
sector_t sb = bio->bi_iter.bi_sector;
sector_t se = bio_end_sector(bio);

*b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size));

if (se - sb < cache->discard_block_size)
*e = *b;
else
*e = to_dblock(block_div(se, cache->discard_block_size));
}

static void issue_discard(struct dm_cache_migration *mg)
{
dm_dblock_t b, e;
struct bio *bio = mg->new_ocell->holder;

calc_discard_block_range(mg->cache, bio, &b, &e);
while (b != e) {
set_discard(mg->cache, b);
b = to_dblock(from_dblock(b) + 1);
}

bio_endio(bio, 0);
cell_defer(mg->cache, mg->new_ocell, false);
free_migration(mg);
}

static void issue_copy_or_discard(struct dm_cache_migration *mg)
{
bool avoid;
struct cache *cache = mg->cache;

if (mg->discard) {
issue_discard(mg);
return;
}

if (mg->writeback || mg->demote)
avoid = !is_dirty(cache, mg->cblock) ||
is_discarded_oblock(cache, mg->old_oblock);
Expand All @@ -1093,7 +1148,7 @@ static void issue_copy(struct dm_cache_migration *mg)
}
}

avoid ? avoid_copy(mg) : issue_copy_real(mg);
avoid ? avoid_copy(mg) : issue_copy(mg);
}

static void complete_migration(struct dm_cache_migration *mg)
Expand Down Expand Up @@ -1178,6 +1233,7 @@ static void promote(struct cache *cache, struct prealloc *structs,
struct dm_cache_migration *mg = prealloc_get_migration(structs);

mg->err = false;
mg->discard = false;
mg->writeback = false;
mg->demote = false;
mg->promote = true;
Expand All @@ -1201,6 +1257,7 @@ static void writeback(struct cache *cache, struct prealloc *structs,
struct dm_cache_migration *mg = prealloc_get_migration(structs);

mg->err = false;
mg->discard = false;
mg->writeback = true;
mg->demote = false;
mg->promote = false;
Expand All @@ -1226,6 +1283,7 @@ static void demote_then_promote(struct cache *cache, struct prealloc *structs,
struct dm_cache_migration *mg = prealloc_get_migration(structs);

mg->err = false;
mg->discard = false;
mg->writeback = false;
mg->demote = true;
mg->promote = true;
Expand Down Expand Up @@ -1254,6 +1312,7 @@ static void invalidate(struct cache *cache, struct prealloc *structs,
struct dm_cache_migration *mg = prealloc_get_migration(structs);

mg->err = false;
mg->discard = false;
mg->writeback = false;
mg->demote = true;
mg->promote = false;
Expand All @@ -1270,6 +1329,26 @@ static void invalidate(struct cache *cache, struct prealloc *structs,
quiesce_migration(mg);
}

static void discard(struct cache *cache, struct prealloc *structs,
struct dm_bio_prison_cell *cell)
{
struct dm_cache_migration *mg = prealloc_get_migration(structs);

mg->err = false;
mg->discard = true;
mg->writeback = false;
mg->demote = false;
mg->promote = false;
mg->requeue_holder = false;
mg->invalidate = false;
mg->cache = cache;
mg->old_ocell = NULL;
mg->new_ocell = cell;
mg->start_jiffies = jiffies;

quiesce_migration(mg);
}

/*----------------------------------------------------------------
* bio processing
*--------------------------------------------------------------*/
Expand Down Expand Up @@ -1303,31 +1382,27 @@ static void process_flush_bio(struct cache *cache, struct bio *bio)
issue(cache, bio);
}

/*
* People generally discard large parts of a device, eg, the whole device
* when formatting. Splitting these large discards up into cache block
* sized ios and then quiescing (always neccessary for discard) takes too
* long.
*
* We keep it simple, and allow any size of discard to come in, and just
* mark off blocks on the discard bitset. No passdown occurs!
*
* To implement passdown we need to change the bio_prison such that a cell
* can have a key that spans many blocks.
*/
static void process_discard_bio(struct cache *cache, struct bio *bio)
static void process_discard_bio(struct cache *cache, struct prealloc *structs,
struct bio *bio)
{
dm_block_t start_block = dm_sector_div_up(bio->bi_iter.bi_sector,
cache->discard_block_size);
dm_block_t end_block = bio_end_sector(bio);
dm_block_t b;
int r;
dm_dblock_t b, e;
struct dm_bio_prison_cell *cell_prealloc, *new_ocell;

end_block = block_div(end_block, cache->discard_block_size);
calc_discard_block_range(cache, bio, &b, &e);
if (b == e) {
bio_endio(bio, 0);
return;
}

for (b = start_block; b < end_block; b++)
set_discard(cache, to_dblock(b));
cell_prealloc = prealloc_get_cell(structs);
r = bio_detain_range(cache, dblock_to_oblock(cache, b), dblock_to_oblock(cache, e), bio, cell_prealloc,
(cell_free_fn) prealloc_put_cell,
structs, &new_ocell);
if (r > 0)
return;

bio_endio(bio, 0);
discard(cache, structs, new_ocell);
}

static bool spare_migration_bandwidth(struct cache *cache)
Expand Down Expand Up @@ -1517,7 +1592,7 @@ static void process_deferred_bios(struct cache *cache)
if (bio->bi_rw & REQ_FLUSH)
process_flush_bio(cache, bio);
else if (bio->bi_rw & REQ_DISCARD)
process_discard_bio(cache, bio);
process_discard_bio(cache, &structs, bio);
else
process_bio(cache, &structs, bio);
}
Expand Down Expand Up @@ -1732,7 +1807,7 @@ static void do_worker(struct work_struct *ws)
process_invalidation_requests(cache);
}

process_migrations(cache, &cache->quiesced_migrations, issue_copy);
process_migrations(cache, &cache->quiesced_migrations, issue_copy_or_discard);
process_migrations(cache, &cache->completed_migrations, complete_migration);

if (commit_if_needed(cache)) {
Expand Down Expand Up @@ -3130,7 +3205,8 @@ static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
/*
* FIXME: these limits may be incompatible with the cache device
*/
limits->max_discard_sectors = cache->discard_block_size * 1024;
limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
cache->origin_sectors);
limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
}

Expand All @@ -3155,7 +3231,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)

static struct target_type cache_target = {
.name = "cache",
.version = {1, 5, 0},
.version = {1, 6, 0},
.module = THIS_MODULE,
.ctr = cache_ctr,
.dtr = cache_dtr,
Expand Down

0 comments on commit 7ae34e7

Please sign in to comment.