Skip to content

Commit

Permalink
dm snapshot: rework writing to origin
Browse files Browse the repository at this point in the history
To track the completion of exceptions relating to the same location on
the device, the current code selects one exception as primary_pe, links
the other exceptions to it and uses reference counting to wait until all
the reallocations are complete.

It is considered too complicated to extend this code to handle the new
snapshot-merge target, where sets of non-overlapping chunks would also
need to become linked.

Instead, a simpler (but less efficient) approach is taken.  Bios are
linked to one exception.  When it completes, bios are simply retried,
and if other related exceptions are still outstanding, they'll get
queued again to wait for another one.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
  • Loading branch information
Mikulas Patocka authored and Alasdair G Kergon committed Dec 10, 2009
1 parent d698aa4 commit 515ad66
Showing 1 changed file with 49 additions and 106 deletions.
155 changes: 49 additions & 106 deletions drivers/md/dm-snap.c
Original file line number Diff line number Diff line change
Expand Up @@ -142,28 +142,6 @@ struct dm_snap_pending_exception {
struct bio_list origin_bios;
struct bio_list snapshot_bios;

/*
* Short-term queue of pending exceptions prior to submission.
*/
struct list_head list;

/*
* The primary pending_exception is the one that holds
* the ref_count and the list of origin_bios for a
* group of pending_exceptions. It is always last to get freed.
* These fields get set up when writing to the origin.
*/
struct dm_snap_pending_exception *primary_pe;

/*
* Number of pending_exceptions processing this chunk.
* When this drops to zero we must complete the origin bios.
* If incrementing or decrementing this, hold pe->snap->lock for
* the sibling concerned and not pe->primary_pe->snap->lock unless
* they are the same.
*/
atomic_t ref_count;

/* Pointer back to snapshot context */
struct dm_snapshot *snap;

Expand Down Expand Up @@ -1019,6 +997,26 @@ static void flush_queued_bios(struct work_struct *work)
flush_bios(queued_bios);
}

static int do_origin(struct dm_dev *origin, struct bio *bio);

/*
* Flush a list of buffers.
*/
static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio)
{
struct bio *n;
int r;

while (bio) {
n = bio->bi_next;
bio->bi_next = NULL;
r = do_origin(s->origin, bio);
if (r == DM_MAPIO_REMAPPED)
generic_make_request(bio);
bio = n;
}
}

/*
* Error a list of buffers.
*/
Expand Down Expand Up @@ -1052,39 +1050,6 @@ static void __invalidate_snapshot(struct dm_snapshot *s, int err)
dm_table_event(s->ti->table);
}

static void get_pending_exception(struct dm_snap_pending_exception *pe)
{
atomic_inc(&pe->ref_count);
}

static struct bio *put_pending_exception(struct dm_snap_pending_exception *pe)
{
struct dm_snap_pending_exception *primary_pe;
struct bio *origin_bios = NULL;

primary_pe = pe->primary_pe;

/*
* If this pe is involved in a write to the origin and
* it is the last sibling to complete then release
* the bios for the original write to the origin.
*/
if (primary_pe &&
atomic_dec_and_test(&primary_pe->ref_count)) {
origin_bios = bio_list_get(&primary_pe->origin_bios);
free_pending_exception(primary_pe);
}

/*
* Free the pe if it's not linked to an origin write or if
* it's not itself a primary pe.
*/
if (!primary_pe || primary_pe != pe)
free_pending_exception(pe);

return origin_bios;
}

static void pending_complete(struct dm_snap_pending_exception *pe, int success)
{
struct dm_exception *e;
Expand Down Expand Up @@ -1129,7 +1094,8 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
out:
dm_remove_exception(&pe->e);
snapshot_bios = bio_list_get(&pe->snapshot_bios);
origin_bios = put_pending_exception(pe);
origin_bios = bio_list_get(&pe->origin_bios);
free_pending_exception(pe);

up_write(&s->lock);

Expand All @@ -1139,7 +1105,7 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
else
flush_bios(snapshot_bios);

flush_bios(origin_bios);
retry_origin_bios(s, origin_bios);
}

static void commit_callback(void *context, int success)
Expand Down Expand Up @@ -1226,16 +1192,13 @@ __find_pending_exception(struct dm_snapshot *s,
pe->e.old_chunk = chunk;
bio_list_init(&pe->origin_bios);
bio_list_init(&pe->snapshot_bios);
pe->primary_pe = NULL;
atomic_set(&pe->ref_count, 0);
pe->started = 0;

if (s->store->type->prepare_exception(s->store, &pe->e)) {
free_pending_exception(pe);
return NULL;
}

get_pending_exception(pe);
dm_insert_exception(&s->pending, &pe->e);

return pe;
Expand Down Expand Up @@ -1492,16 +1455,16 @@ static int snapshot_iterate_devices(struct dm_target *ti,
static int __origin_write(struct list_head *snapshots, sector_t sector,
struct bio *bio)
{
int r = DM_MAPIO_REMAPPED, first = 0;
int r = DM_MAPIO_REMAPPED;
struct dm_snapshot *snap;
struct dm_exception *e;
struct dm_snap_pending_exception *pe, *next_pe, *primary_pe = NULL;
struct dm_snap_pending_exception *pe;
struct dm_snap_pending_exception *pe_to_start_now = NULL;
struct dm_snap_pending_exception *pe_to_start_last = NULL;
chunk_t chunk;
LIST_HEAD(pe_queue);

/* Do all the snapshots on this origin */
list_for_each_entry (snap, snapshots, list) {

down_write(&snap->lock);

/* Only deal with valid and active snapshots */
Expand All @@ -1522,9 +1485,6 @@ static int __origin_write(struct list_head *snapshots, sector_t sector,
* Check exception table to see if block
* is already remapped in this snapshot
* and trigger an exception if not.
*
* ref_count is initialised to 1 so pending_complete()
* won't destroy the primary_pe while we're inside this loop.
*/
e = dm_lookup_exception(&snap->complete, chunk);
if (e)
Expand Down Expand Up @@ -1554,60 +1514,43 @@ static int __origin_write(struct list_head *snapshots, sector_t sector,
}
}

if (!primary_pe) {
/*
* Either every pe here has same
* primary_pe or none has one yet.
*/
if (pe->primary_pe)
primary_pe = pe->primary_pe;
else {
primary_pe = pe;
first = 1;
}

if (bio)
bio_list_add(&primary_pe->origin_bios, bio);
r = DM_MAPIO_SUBMITTED;

r = DM_MAPIO_SUBMITTED;
}
/*
* If an origin bio was supplied, queue it to wait for the
* completion of this exception, and start this one last,
* at the end of the function.
*/
if (bio) {
bio_list_add(&pe->origin_bios, bio);
bio = NULL;

if (!pe->primary_pe) {
pe->primary_pe = primary_pe;
get_pending_exception(primary_pe);
if (!pe->started) {
pe->started = 1;
pe_to_start_last = pe;
}
}

if (!pe->started) {
pe->started = 1;
list_add_tail(&pe->list, &pe_queue);
pe_to_start_now = pe;
}

next_snapshot:
up_write(&snap->lock);
}

if (!primary_pe)
return r;

/*
* If this is the first time we're processing this chunk and
* ref_count is now 1 it means all the pending exceptions
* got completed while we were in the loop above, so it falls to
* us here to remove the primary_pe and submit any origin_bios.
*/

if (first && atomic_dec_and_test(&primary_pe->ref_count)) {
flush_bios(bio_list_get(&primary_pe->origin_bios));
free_pending_exception(primary_pe);
/* If we got here, pe_queue is necessarily empty. */
return r;
if (pe_to_start_now) {
start_copy(pe_to_start_now);
pe_to_start_now = NULL;
}
}

/*
* Now that we have a complete pe list we can start the copying.
* Submit the exception against which the bio is queued last,
* to give the other exceptions a head start.
*/
list_for_each_entry_safe(pe, next_pe, &pe_queue, list)
start_copy(pe);
if (pe_to_start_last)
start_copy(pe_to_start_last);

return r;
}
Expand Down

0 comments on commit 515ad66

Please sign in to comment.