Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 176550
b: refs/heads/master
c: d0bcb87
h: refs/heads/master
v: v3
  • Loading branch information
Kiyoshi Ueda authored and Alasdair G Kergon committed Dec 10, 2009
1 parent 2364a40 commit 2ff52b2
Show file tree
Hide file tree
Showing 2 changed files with 197 additions and 19 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 980691e5f3a1b5ebbb2d34014e028fd7f1c6e4fb
refs/heads/master: d0bcb8786532b01206f04258eb6b7d4ac858436a
214 changes: 196 additions & 18 deletions trunk/drivers/md/dm.c
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,20 @@ struct mapped_device {
*/
int barrier_error;

/*
* Protect barrier_error from concurrent endio processing
* in request-based dm.
*/
spinlock_t barrier_error_lock;

/*
* Processing queue (flush/barriers)
*/
struct workqueue_struct *wq;
struct work_struct barrier_work;

/* A pointer to the currently processing pre/post flush request */
struct request *flush_request;

/*
* The current mapping.
Expand Down Expand Up @@ -722,6 +732,23 @@ static void end_clone_bio(struct bio *clone, int error)
blk_update_request(tio->orig, 0, nr_bytes);
}

static void store_barrier_error(struct mapped_device *md, int error)
{
unsigned long flags;

spin_lock_irqsave(&md->barrier_error_lock, flags);
/*
* Basically, the first error is taken, but:
* -EOPNOTSUPP supersedes any I/O error.
* Requeue request supersedes any I/O error but -EOPNOTSUPP.
*/
if (!md->barrier_error || error == -EOPNOTSUPP ||
(md->barrier_error != -EOPNOTSUPP &&
error == DM_ENDIO_REQUEUE))
md->barrier_error = error;
spin_unlock_irqrestore(&md->barrier_error_lock, flags);
}

/*
* Don't touch any member of the md after calling this function because
* the md may be freed in dm_put() at the end of this function.
Expand Down Expand Up @@ -759,11 +786,13 @@ static void free_rq_clone(struct request *clone)
static void dm_end_request(struct request *clone, int error)
{
int rw = rq_data_dir(clone);
int run_queue = 1;
bool is_barrier = blk_barrier_rq(clone);
struct dm_rq_target_io *tio = clone->end_io_data;
struct mapped_device *md = tio->md;
struct request *rq = tio->orig;

if (blk_pc_request(rq)) {
if (blk_pc_request(rq) && !is_barrier) {
rq->errors = clone->errors;
rq->resid_len = clone->resid_len;

Expand All @@ -778,9 +807,14 @@ static void dm_end_request(struct request *clone, int error)

free_rq_clone(clone);

blk_end_request_all(rq, error);
if (unlikely(is_barrier)) {
if (unlikely(error))
store_barrier_error(md, error);
run_queue = 0;
} else
blk_end_request_all(rq, error);

rq_completed(md, rw, 1);
rq_completed(md, rw, run_queue);
}

static void dm_unprep_request(struct request *rq)
Expand All @@ -805,6 +839,16 @@ void dm_requeue_unmapped_request(struct request *clone)
struct request_queue *q = rq->q;
unsigned long flags;

if (unlikely(blk_barrier_rq(clone))) {
/*
* Barrier clones share an original request.
* Leave it to dm_end_request(), which handles this special
* case.
*/
dm_end_request(clone, DM_ENDIO_REQUEUE);
return;
}

dm_unprep_request(rq);

spin_lock_irqsave(q->queue_lock, flags);
Expand Down Expand Up @@ -894,6 +938,19 @@ static void dm_complete_request(struct request *clone, int error)
struct dm_rq_target_io *tio = clone->end_io_data;
struct request *rq = tio->orig;

if (unlikely(blk_barrier_rq(clone))) {
/*
* Barrier clones share an original request. So can't use
* softirq_done with the original.
* Pass the clone to dm_done() directly in this special case.
* It is safe (even if clone->q->queue_lock is held here)
* because there is no I/O dispatching during the completion
* of barrier clone.
*/
dm_done(clone, error, true);
return;
}

tio->error = error;
rq->completion_data = clone;
blk_complete_request(rq);
Expand All @@ -910,6 +967,17 @@ void dm_kill_unmapped_request(struct request *clone, int error)
struct dm_rq_target_io *tio = clone->end_io_data;
struct request *rq = tio->orig;

if (unlikely(blk_barrier_rq(clone))) {
/*
* Barrier clones share an original request.
* Leave it to dm_end_request(), which handles this special
* case.
*/
BUG_ON(error > 0);
dm_end_request(clone, error);
return;
}

rq->cmd_flags |= REQ_FAILED;
dm_complete_request(clone, error);
}
Expand Down Expand Up @@ -1364,11 +1432,6 @@ static int dm_make_request(struct request_queue *q, struct bio *bio)
{
struct mapped_device *md = q->queuedata;

if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
bio_endio(bio, -EOPNOTSUPP);
return 0;
}

return md->saved_make_request_fn(q, bio); /* call __make_request() */
}

Expand All @@ -1387,6 +1450,25 @@ static int dm_request(struct request_queue *q, struct bio *bio)
return _dm_request(q, bio);
}

/*
* Mark this request as flush request, so that dm_request_fn() can
* recognize.
*/
static void dm_rq_prepare_flush(struct request_queue *q, struct request *rq)
{
rq->cmd_type = REQ_TYPE_LINUX_BLOCK;
rq->cmd[0] = REQ_LB_OP_FLUSH;
}

static bool dm_rq_is_flush_request(struct request *rq)
{
if (rq->cmd_type == REQ_TYPE_LINUX_BLOCK &&
rq->cmd[0] == REQ_LB_OP_FLUSH)
return true;
else
return false;
}

void dm_dispatch_request(struct request *rq)
{
int r;
Expand Down Expand Up @@ -1432,16 +1514,24 @@ static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
static int setup_clone(struct request *clone, struct request *rq,
struct dm_rq_target_io *tio)
{
int r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
dm_rq_bio_constructor, tio);
int r;

if (r)
return r;
if (dm_rq_is_flush_request(rq)) {
blk_rq_init(NULL, clone);
clone->cmd_type = REQ_TYPE_FS;
clone->cmd_flags |= (REQ_HARDBARRIER | WRITE);
} else {
r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
dm_rq_bio_constructor, tio);
if (r)
return r;

clone->cmd = rq->cmd;
clone->cmd_len = rq->cmd_len;
clone->sense = rq->sense;
clone->buffer = rq->buffer;
}

clone->cmd = rq->cmd;
clone->cmd_len = rq->cmd_len;
clone->sense = rq->sense;
clone->buffer = rq->buffer;
clone->end_io = end_clone_request;
clone->end_io_data = tio;

Expand Down Expand Up @@ -1482,6 +1572,9 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq)
struct mapped_device *md = q->queuedata;
struct request *clone;

if (unlikely(dm_rq_is_flush_request(rq)))
return BLKPREP_OK;

if (unlikely(rq->special)) {
DMWARN("Already has something in rq->special.");
return BLKPREP_KILL;
Expand Down Expand Up @@ -1560,6 +1653,14 @@ static void dm_request_fn(struct request_queue *q)
if (!rq)
goto plug_and_out;

if (unlikely(dm_rq_is_flush_request(rq))) {
BUG_ON(md->flush_request);
md->flush_request = rq;
blk_start_request(rq);
queue_work(md->wq, &md->barrier_work);
goto out;
}

ti = dm_table_find_target(map, blk_rq_pos(rq));
if (ti->type->busy && ti->type->busy(ti))
goto plug_and_out;
Expand Down Expand Up @@ -1726,6 +1827,7 @@ static int next_free_minor(int *minor)
static const struct block_device_operations dm_blk_dops;

static void dm_wq_work(struct work_struct *work);
static void dm_rq_barrier_work(struct work_struct *work);

/*
* Allocate and initialise a blank device with a given minor.
Expand Down Expand Up @@ -1755,6 +1857,7 @@ static struct mapped_device *alloc_dev(int minor)
init_rwsem(&md->io_lock);
mutex_init(&md->suspend_lock);
spin_lock_init(&md->deferred_lock);
spin_lock_init(&md->barrier_error_lock);
rwlock_init(&md->map_lock);
atomic_set(&md->holders, 1);
atomic_set(&md->open_count, 0);
Expand Down Expand Up @@ -1789,6 +1892,8 @@ static struct mapped_device *alloc_dev(int minor)
blk_queue_softirq_done(md->queue, dm_softirq_done);
blk_queue_prep_rq(md->queue, dm_prep_fn);
blk_queue_lld_busy(md->queue, dm_lld_busy);
blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH,
dm_rq_prepare_flush);

md->disk = alloc_disk(1);
if (!md->disk)
Expand All @@ -1798,6 +1903,7 @@ static struct mapped_device *alloc_dev(int minor)
atomic_set(&md->pending[1], 0);
init_waitqueue_head(&md->wait);
INIT_WORK(&md->work, dm_wq_work);
INIT_WORK(&md->barrier_work, dm_rq_barrier_work);
init_waitqueue_head(&md->eventq);

md->disk->major = _major;
Expand Down Expand Up @@ -2185,6 +2291,73 @@ static void dm_queue_flush(struct mapped_device *md)
queue_work(md->wq, &md->work);
}

static void dm_rq_set_flush_nr(struct request *clone, unsigned flush_nr)
{
struct dm_rq_target_io *tio = clone->end_io_data;

tio->info.flush_request = flush_nr;
}

/* Issue barrier requests to targets and wait for their completion. */
static int dm_rq_barrier(struct mapped_device *md)
{
int i, j;
struct dm_table *map = dm_get_table(md);
unsigned num_targets = dm_table_get_num_targets(map);
struct dm_target *ti;
struct request *clone;

md->barrier_error = 0;

for (i = 0; i < num_targets; i++) {
ti = dm_table_get_target(map, i);
for (j = 0; j < ti->num_flush_requests; j++) {
clone = clone_rq(md->flush_request, md, GFP_NOIO);
dm_rq_set_flush_nr(clone, j);
atomic_inc(&md->pending[rq_data_dir(clone)]);
map_request(ti, clone, md);
}
}

dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
dm_table_put(map);

return md->barrier_error;
}

static void dm_rq_barrier_work(struct work_struct *work)
{
int error;
struct mapped_device *md = container_of(work, struct mapped_device,
barrier_work);
struct request_queue *q = md->queue;
struct request *rq;
unsigned long flags;

/*
* Hold the md reference here and leave it at the last part so that
* the md can't be deleted by device opener when the barrier request
* completes.
*/
dm_get(md);

error = dm_rq_barrier(md);

rq = md->flush_request;
md->flush_request = NULL;

if (error == DM_ENDIO_REQUEUE) {
spin_lock_irqsave(q->queue_lock, flags);
blk_requeue_request(q, rq);
spin_unlock_irqrestore(q->queue_lock, flags);
} else
blk_end_request_all(rq, error);

blk_run_queue(q);

dm_put(md);
}

/*
* Swap in a new table (destroying old one).
*/
Expand Down Expand Up @@ -2325,11 +2498,16 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
up_write(&md->io_lock);

flush_workqueue(md->wq);

/*
* Request-based dm uses md->wq for barrier (dm_rq_barrier_work) which
* can be kicked until md->queue is stopped. So stop md->queue before
* flushing md->wq.
*/
if (dm_request_based(md))
stop_queue(md->queue);

flush_workqueue(md->wq);

/*
* At this point no more requests are entering target request routines.
* We call dm_wait_for_completion to wait for all existing requests
Expand Down

0 comments on commit 2ff52b2

Please sign in to comment.