Skip to content

Commit

Permalink
Btrfs: wait on caching if we're loading the free space cache
Browse files Browse the repository at this point in the history
We've been hitting panics when running xfstest 13 in a loop for long periods of
time.  And actually this problem has always existed so we've been hitting these
things randomly for a while.  Basically what happens is we get a thread coming
into the allocator and reading the space cache off of disk and adding the
entries to the free space cache as we go.  Then we get another thread that comes
in and tries to allocate from that block group.  Since block_group->cached !=
BTRFS_CACHE_NO it goes ahead and tries to do the allocation.  We do this because
if we're doing the old slow way of caching we don't want to hold people up and
wait for everything to finish.  The problem with this is we could end up
discarding the space cache at some arbitrary point in the future, which means we
could very well end up allocating space that is either bad, or when the real
caching happens it could end up thinking the space isn't in use when it really
is and cause all sorts of other problems.

The solution is to add a new flag to indicate we are loading the free space
cache from disk, and always try to cache the block group if cache->cached !=
BTRFS_CACHE_FINISHED.  That way if we are loading the space cache anybody else
who tries to allocate from the block group will have to wait until it's finished
to make sure it completes successfully.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
  • Loading branch information
Josef Bacik authored and Chris Mason committed Nov 20, 2011
1 parent 5bb1468 commit 291c7d2
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 41 deletions.
3 changes: 2 additions & 1 deletion fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -848,7 +848,8 @@ struct btrfs_free_cluster {
enum btrfs_caching_type {
BTRFS_CACHE_NO = 0,
BTRFS_CACHE_STARTED = 1,
BTRFS_CACHE_FINISHED = 2,
BTRFS_CACHE_FAST = 2,
BTRFS_CACHE_FINISHED = 3,
};

enum btrfs_disk_cache_state {
Expand Down
119 changes: 79 additions & 40 deletions fs/btrfs/extent-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -467,13 +467,59 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
struct btrfs_root *root,
int load_cache_only)
{
DEFINE_WAIT(wait);
struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_caching_control *caching_ctl;
int ret = 0;

smp_mb();
if (cache->cached != BTRFS_CACHE_NO)
caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
BUG_ON(!caching_ctl);

INIT_LIST_HEAD(&caching_ctl->list);
mutex_init(&caching_ctl->mutex);
init_waitqueue_head(&caching_ctl->wait);
caching_ctl->block_group = cache;
caching_ctl->progress = cache->key.objectid;
atomic_set(&caching_ctl->count, 1);
caching_ctl->work.func = caching_thread;

spin_lock(&cache->lock);
/*
* This should be a rare occasion, but this could happen I think in the
* case where one thread starts to load the space cache info, and then
* some other thread starts a transaction commit which tries to do an
* allocation while the other thread is still loading the space cache
* info. The previous loop should have kept us from choosing this block
* group, but if we've moved to the state where we will wait on caching
* block groups we need to first check if we're doing a fast load here,
* so we can wait for it to finish, otherwise we could end up allocating
* from a block group who's cache gets evicted for one reason or
* another.
*/
while (cache->cached == BTRFS_CACHE_FAST) {
struct btrfs_caching_control *ctl;

ctl = cache->caching_ctl;
atomic_inc(&ctl->count);
prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
spin_unlock(&cache->lock);

schedule();

finish_wait(&ctl->wait, &wait);
put_caching_control(ctl);
spin_lock(&cache->lock);
}

if (cache->cached != BTRFS_CACHE_NO) {
spin_unlock(&cache->lock);
kfree(caching_ctl);
return 0;
}
WARN_ON(cache->caching_ctl);
cache->caching_ctl = caching_ctl;
cache->cached = BTRFS_CACHE_FAST;
spin_unlock(&cache->lock);

/*
* We can't do the read from on-disk cache during a commit since we need
Expand All @@ -484,56 +530,51 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
if (trans && (!trans->transaction->in_commit) &&
(root && root != root->fs_info->tree_root) &&
btrfs_test_opt(root, SPACE_CACHE)) {
spin_lock(&cache->lock);
if (cache->cached != BTRFS_CACHE_NO) {
spin_unlock(&cache->lock);
return 0;
}
cache->cached = BTRFS_CACHE_STARTED;
spin_unlock(&cache->lock);

ret = load_free_space_cache(fs_info, cache);

spin_lock(&cache->lock);
if (ret == 1) {
cache->caching_ctl = NULL;
cache->cached = BTRFS_CACHE_FINISHED;
cache->last_byte_to_unpin = (u64)-1;
} else {
cache->cached = BTRFS_CACHE_NO;
if (load_cache_only) {
cache->caching_ctl = NULL;
cache->cached = BTRFS_CACHE_NO;
} else {
cache->cached = BTRFS_CACHE_STARTED;
}
}
spin_unlock(&cache->lock);
wake_up(&caching_ctl->wait);
if (ret == 1) {
put_caching_control(caching_ctl);
free_excluded_extents(fs_info->extent_root, cache);
return 0;
}
} else {
/*
* We are not going to do the fast caching, set cached to the
* appropriate value and wakeup any waiters.
*/
spin_lock(&cache->lock);
if (load_cache_only) {
cache->caching_ctl = NULL;
cache->cached = BTRFS_CACHE_NO;
} else {
cache->cached = BTRFS_CACHE_STARTED;
}
spin_unlock(&cache->lock);
wake_up(&caching_ctl->wait);
}

if (load_cache_only)
return 0;

caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
BUG_ON(!caching_ctl);

INIT_LIST_HEAD(&caching_ctl->list);
mutex_init(&caching_ctl->mutex);
init_waitqueue_head(&caching_ctl->wait);
caching_ctl->block_group = cache;
caching_ctl->progress = cache->key.objectid;
/* one for caching kthread, one for caching block group list */
atomic_set(&caching_ctl->count, 2);
caching_ctl->work.func = caching_thread;

spin_lock(&cache->lock);
if (cache->cached != BTRFS_CACHE_NO) {
spin_unlock(&cache->lock);
kfree(caching_ctl);
if (load_cache_only) {
put_caching_control(caching_ctl);
return 0;
}
cache->caching_ctl = caching_ctl;
cache->cached = BTRFS_CACHE_STARTED;
spin_unlock(&cache->lock);

down_write(&fs_info->extent_commit_sem);
atomic_inc(&caching_ctl->count);
list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
up_write(&fs_info->extent_commit_sem);

Expand Down Expand Up @@ -5177,13 +5218,15 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
}

have_block_group:
if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
cached = block_group_cache_done(block_group);
if (unlikely(!cached)) {
u64 free_percent;

found_uncached_bg = true;
ret = cache_block_group(block_group, trans,
orig_root, 1);
if (block_group->cached == BTRFS_CACHE_FINISHED)
goto have_block_group;
goto alloc;

free_percent = btrfs_block_group_used(&block_group->item);
free_percent *= 100;
Expand All @@ -5205,7 +5248,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
orig_root, 0);
BUG_ON(ret);
}
found_uncached_bg = true;

/*
* If loop is set for cached only, try the next block
Expand All @@ -5215,10 +5257,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
goto loop;
}

cached = block_group_cache_done(block_group);
if (unlikely(!cached))
found_uncached_bg = true;

alloc:
if (unlikely(block_group->ro))
goto loop;

Expand Down

0 comments on commit 291c7d2

Please sign in to comment.