From 45ec9bd0fd7abf8705e7cf12205ff69fe9d51181 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 10 Dec 2014 17:06:57 +0000 Subject: [PATCH 1/4] dm thin: fix inability to discard blocks when in out-of-data-space mode When the pool was in PM_OUT_OF_SPACE mode its process_prepared_discard function pointer was incorrectly being set to process_prepared_discard_passdown rather than process_prepared_discard. This incorrect function pointer meant the discard was being passed down, but not effecting the mapping. As such any discard that was issued, in an attempt to reclaim blocks, would not successfully free data space. Reported-by: Eric Sandeen Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org --- drivers/md/dm-thin.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 8735543eacdb9..14b51a4fdf6b5 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2155,7 +2155,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) pool->process_cell = process_cell_read_only; pool->process_discard_cell = process_discard_cell; pool->process_prepared_mapping = process_prepared_mapping; - pool->process_prepared_discard = process_prepared_discard_passdown; + pool->process_prepared_discard = process_prepared_discard; if (!pool->pf.error_if_no_space && no_space_timeout) queue_delayed_work(pool->wq, &pool->no_space_timeout, no_space_timeout); From 2c43fd26e46734430122b8d2ad3024bb532df3ef Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Thu, 11 Dec 2014 11:12:19 +0000 Subject: [PATCH 2/4] dm thin: fix missing out-of-data-space to write mode transition if blocks are released Discard bios and thin device deletion have the potential to release data blocks. If the thin-pool is in out-of-data-space mode, and blocks were released, transition the thin-pool back to full write mode. The correct time to do this is just after the thin-pool metadata commit. It cannot be done before the commit because the space maps will not allow immediate reuse of the data blocks in case there's a rollback following power failure. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org --- drivers/md/dm-thin.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 14b51a4fdf6b5..922aa553e9e0c 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1127,6 +1127,24 @@ static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block, schedule_zero(tc, virt_block, data_dest, cell, bio); } +static void set_pool_mode(struct pool *pool, enum pool_mode new_mode); + +static void check_for_space(struct pool *pool) +{ + int r; + dm_block_t nr_free; + + if (get_pool_mode(pool) != PM_OUT_OF_DATA_SPACE) + return; + + r = dm_pool_get_free_block_count(pool->pmd, &nr_free); + if (r) + return; + + if (nr_free) + set_pool_mode(pool, PM_WRITE); +} + /* * A non-zero return indicates read_only or fail_io mode. * Many callers don't care about the return value. @@ -1141,6 +1159,8 @@ static int commit(struct pool *pool) r = dm_pool_commit_metadata(pool->pmd); if (r) metadata_operation_failed(pool, "dm_pool_commit_metadata", r); + else + check_for_space(pool); return r; } @@ -1159,8 +1179,6 @@ static void check_low_water_mark(struct pool *pool, dm_block_t free_blocks) } } -static void set_pool_mode(struct pool *pool, enum pool_mode new_mode); - static int alloc_data_block(struct thin_c *tc, dm_block_t *result) { int r; From 2b94e8960cc3f225dec058f27570505351f4bc13 Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Wed, 17 Dec 2014 07:59:59 -0500 Subject: [PATCH 3/4] dm thin: fix crash by initializing thin device's refcount and completion earlier Commit 80e96c5484be ("dm thin: do not allow thin device activation while pool is suspended") delayed the initialization of a new thin device's refcount and completion until after this new thin was added to the pool's active_thins list and the pool lock is released. This opens a race with a worker thread that walks the list and calls thin_get/put, noticing that the refcount goes to 0 and calling complete, freezing up the system and giving the oops below: kernel: BUG: unable to handle kernel NULL pointer dereference at (null) kernel: IP: [] __wake_up_common+0x2b/0x90 kernel: Call Trace: kernel: [] __wake_up_locked+0x13/0x20 kernel: [] complete+0x37/0x50 kernel: [] thin_put+0x20/0x30 [dm_thin_pool] kernel: [] do_worker+0x667/0x870 [dm_thin_pool] kernel: [] ? __schedule+0x3ac/0x9a0 kernel: [] process_one_work+0x14f/0x400 kernel: [] worker_thread+0x6b/0x490 kernel: [] ? rescuer_thread+0x260/0x260 kernel: [] kthread+0xdb/0x100 kernel: [] ? kthread_create_on_node+0x170/0x170 kernel: [] ret_from_fork+0x7c/0xb0 kernel: [] ? kthread_create_on_node+0x170/0x170 Set the thin device's initial refcount and initialize the completion before adding it to the pool's active_thins list in thin_ctr(). Signed-off-by: Marc Dionne Signed-off-by: Mike Snitzer --- drivers/md/dm-thin.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 922aa553e9e0c..493478989dbd4 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -3832,6 +3832,8 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) r = -EINVAL; goto bad; } + atomic_set(&tc->refcount, 1); + init_completion(&tc->can_destroy); list_add_tail_rcu(&tc->list, &tc->pool->active_thins); spin_unlock_irqrestore(&tc->pool->lock, flags); /* @@ -3844,9 +3846,6 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) dm_put(pool_md); - atomic_set(&tc->refcount, 1); - init_completion(&tc->can_destroy); - return 0; bad: From 5164bece1673cdf04782f8ed3fba70743700f5da Mon Sep 17 00:00:00 2001 From: zhendong chen Date: Wed, 17 Dec 2014 14:37:04 +0800 Subject: [PATCH 4/4] dm: fix missed error code if .end_io isn't implemented by target_type In bio-based DM's clone_endio(), when target_type doesn't implement .end_io (e.g. linear) r will be always be initialized 0. So if a WRITE SAME bio fails WRITE SAME will not be disabled as intended. Fix this by initializing r to error, rather than 0, in clone_endio(). Signed-off-by: Alex Chen Signed-off-by: Mike Snitzer Fixes: 7eee4ae2db ("dm: disable WRITE SAME if it fails") Cc: stable@vger.kernel.org --- drivers/md/dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 4c06585bf1657..b98cd9d84435f 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -899,7 +899,7 @@ static void disable_write_same(struct mapped_device *md) static void clone_endio(struct bio *bio, int error) { - int r = 0; + int r = error; struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); struct dm_io *io = tio->io; struct mapped_device *md = tio->io->md;