Skip to content

Commit

Permalink
Btrfs: improve the noflush reservation
Browse files Browse the repository at this point in the history
In some places(such as: evicting inode), we just can not flush the reserved
space of delalloc, flushing the delayed directory index and delayed inode
is OK, but we don't try to flush those things and just go back when there is
no enough space to be reserved. This patch fixes this problem.

We defined 3 types of the flush operations: NO_FLUSH, FLUSH_LIMIT and FLUSH_ALL.
If we can in the transaction, we should not flush anything, or the deadlock
would happen, so use NO_FLUSH. If we flushing the reserved space of delalloc
would cause deadlock, use FLUSH_LIMIT. In the other cases, FLUSH_ALL is used,
and we will flush all things.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
  • Loading branch information
Miao Xie authored and Josef Bacik committed Dec 11, 2012
1 parent 561c294 commit 08e007d
Show file tree
Hide file tree
Showing 8 changed files with 97 additions and 86 deletions.
26 changes: 16 additions & 10 deletions fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -2900,6 +2900,18 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);

enum btrfs_reserve_flush_enum {
/* If we are in the transaction, we can't flush anything.*/
BTRFS_RESERVE_NO_FLUSH,
/*
* Flushing delalloc may cause deadlock somewhere, in this
* case, use FLUSH LIMIT
*/
BTRFS_RESERVE_FLUSH_LIMIT,
BTRFS_RESERVE_FLUSH_ALL,
};

int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
Expand All @@ -2919,19 +2931,13 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root,
void btrfs_free_block_rsv(struct btrfs_root *root,
struct btrfs_block_rsv *rsv);
int btrfs_block_rsv_add(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
int btrfs_block_rsv_add_noflush(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
struct btrfs_block_rsv *block_rsv, u64 num_bytes,
enum btrfs_reserve_flush_enum flush);
int btrfs_block_rsv_check(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, int min_factor);
int btrfs_block_rsv_refill(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 min_reserved);
int btrfs_block_rsv_refill_noflush(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 min_reserved);
struct btrfs_block_rsv *block_rsv, u64 min_reserved,
enum btrfs_reserve_flush_enum flush);
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
struct btrfs_block_rsv *dst_rsv,
u64 num_bytes);
Expand Down
6 changes: 4 additions & 2 deletions fs/btrfs/delayed-inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -651,7 +651,8 @@ static int btrfs_delayed_inode_reserve_metadata(
*/
if (!src_rsv || (!trans->bytes_reserved &&
src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes);
ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
BTRFS_RESERVE_NO_FLUSH);
/*
* Since we're under a transaction reserve_metadata_bytes could
* try to commit the transaction which will make it return
Expand Down Expand Up @@ -686,7 +687,8 @@ static int btrfs_delayed_inode_reserve_metadata(
* reserve something strictly for us. If not be a pain and try
* to steal from the delalloc block rsv.
*/
ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes);
ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
BTRFS_RESERVE_NO_FLUSH);
if (!ret)
goto out;

Expand Down
97 changes: 48 additions & 49 deletions fs/btrfs/extent-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -3644,7 +3644,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,

static int can_overcommit(struct btrfs_root *root,
struct btrfs_space_info *space_info, u64 bytes,
int flush)
enum btrfs_reserve_flush_enum flush)
{
u64 profile = btrfs_get_alloc_profile(root, 0);
u64 avail;
Expand Down Expand Up @@ -3672,7 +3672,7 @@ static int can_overcommit(struct btrfs_root *root,
* 1/2th of the space. If we can flush, don't let us overcommit
* too much, let it overcommit up to 1/8 of the space.
*/
if (flush)
if (flush == BTRFS_RESERVE_FLUSH_ALL)
avail >>= 3;
else
avail >>= 1;
Expand All @@ -3696,6 +3696,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
long time_left;
unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
int loops = 0;
enum btrfs_reserve_flush_enum flush;

trans = (struct btrfs_trans_handle *)current->journal_info;
block_rsv = &root->fs_info->delalloc_block_rsv;
Expand Down Expand Up @@ -3723,8 +3724,12 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
wait_event(root->fs_info->async_submit_wait,
!atomic_read(&root->fs_info->async_delalloc_pages));

if (!trans)
flush = BTRFS_RESERVE_FLUSH_ALL;
else
flush = BTRFS_RESERVE_NO_FLUSH;
spin_lock(&space_info->lock);
if (can_overcommit(root, space_info, orig, !trans)) {
if (can_overcommit(root, space_info, orig, flush)) {
spin_unlock(&space_info->lock);
break;
}
Expand Down Expand Up @@ -3882,7 +3887,8 @@ static int flush_space(struct btrfs_root *root,
*/
static int reserve_metadata_bytes(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 orig_bytes, int flush)
u64 orig_bytes,
enum btrfs_reserve_flush_enum flush)
{
struct btrfs_space_info *space_info = block_rsv->space_info;
u64 used;
Expand All @@ -3895,10 +3901,11 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
ret = 0;
spin_lock(&space_info->lock);
/*
* We only want to wait if somebody other than us is flushing and we are
* actually alloed to flush.
* We only want to wait if somebody other than us is flushing and we
* are actually allowed to flush all things.
*/
while (flush && !flushing && space_info->flush) {
while (flush == BTRFS_RESERVE_FLUSH_ALL && !flushing &&
space_info->flush) {
spin_unlock(&space_info->lock);
/*
* If we have a trans handle we can't wait because the flusher
Expand Down Expand Up @@ -3964,23 +3971,40 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
* Couldn't make our reservation, save our place so while we're trying
* to reclaim space we can actually use it instead of somebody else
* stealing it from us.
*
* We make the other tasks wait for the flush only when we can flush
* all things.
*/
if (ret && flush) {
if (ret && flush == BTRFS_RESERVE_FLUSH_ALL) {
flushing = true;
space_info->flush = 1;
}

spin_unlock(&space_info->lock);

if (!ret || !flush)
if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
goto out;

ret = flush_space(root, space_info, num_bytes, orig_bytes,
flush_state);
flush_state++;

/*
* If we are FLUSH_LIMIT, we can not flush delalloc, or the deadlock
* would happen. So skip delalloc flush.
*/
if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
(flush_state == FLUSH_DELALLOC ||
flush_state == FLUSH_DELALLOC_WAIT))
flush_state = ALLOC_CHUNK;

if (!ret)
goto again;
else if (flush_state <= COMMIT_TRANS)
else if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
flush_state < COMMIT_TRANS)
goto again;
else if (flush == BTRFS_RESERVE_FLUSH_ALL &&
flush_state <= COMMIT_TRANS)
goto again;

out:
Expand Down Expand Up @@ -4131,9 +4155,9 @@ void btrfs_free_block_rsv(struct btrfs_root *root,
kfree(rsv);
}

static inline int __block_rsv_add(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes, int flush)
int btrfs_block_rsv_add(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, u64 num_bytes,
enum btrfs_reserve_flush_enum flush)
{
int ret;

Expand All @@ -4149,20 +4173,6 @@ static inline int __block_rsv_add(struct btrfs_root *root,
return ret;
}

int btrfs_block_rsv_add(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes)
{
return __block_rsv_add(root, block_rsv, num_bytes, 1);
}

int btrfs_block_rsv_add_noflush(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes)
{
return __block_rsv_add(root, block_rsv, num_bytes, 0);
}

int btrfs_block_rsv_check(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, int min_factor)
{
Expand All @@ -4181,9 +4191,9 @@ int btrfs_block_rsv_check(struct btrfs_root *root,
return ret;
}

static inline int __btrfs_block_rsv_refill(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 min_reserved, int flush)
int btrfs_block_rsv_refill(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, u64 min_reserved,
enum btrfs_reserve_flush_enum flush)
{
u64 num_bytes = 0;
int ret = -ENOSPC;
Expand Down Expand Up @@ -4211,20 +4221,6 @@ static inline int __btrfs_block_rsv_refill(struct btrfs_root *root,
return ret;
}

int btrfs_block_rsv_refill(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 min_reserved)
{
return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 1);
}

int btrfs_block_rsv_refill_noflush(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 min_reserved)
{
return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 0);
}

int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
struct btrfs_block_rsv *dst_rsv,
u64 num_bytes)
Expand Down Expand Up @@ -4515,14 +4511,15 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
u64 csum_bytes;
unsigned nr_extents = 0;
int extra_reserve = 0;
int flush = 1;
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
int ret;

/* Need to be holding the i_mutex here if we aren't free space cache */
if (btrfs_is_free_space_inode(inode))
flush = 0;
flush = BTRFS_RESERVE_NO_FLUSH;

if (flush && btrfs_transaction_in_commit(root->fs_info))
if (flush != BTRFS_RESERVE_NO_FLUSH &&
btrfs_transaction_in_commit(root->fs_info))
schedule_timeout(1);

mutex_lock(&BTRFS_I(inode)->delalloc_mutex);
Expand Down Expand Up @@ -6252,7 +6249,8 @@ use_block_rsv(struct btrfs_trans_handle *trans,
block_rsv = get_block_rsv(trans, root);

if (block_rsv->size == 0) {
ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0);
ret = reserve_metadata_bytes(root, block_rsv, blocksize,
BTRFS_RESERVE_NO_FLUSH);
/*
* If we couldn't reserve metadata bytes try and use some from
* the global reserve.
Expand All @@ -6279,7 +6277,8 @@ use_block_rsv(struct btrfs_trans_handle *trans,
printk(KERN_DEBUG "btrfs: block rsv returned %d\n", ret);
WARN_ON(1);
}
ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0);
ret = reserve_metadata_bytes(root, block_rsv, blocksize,
BTRFS_RESERVE_NO_FLUSH);
if (!ret) {
return block_rsv;
} else if (ret && block_rsv != global_rsv) {
Expand Down
5 changes: 3 additions & 2 deletions fs/btrfs/inode-map.c
Original file line number Diff line number Diff line change
Expand Up @@ -434,8 +434,9 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
* 3 items for pre-allocation
*/
trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 8);
ret = btrfs_block_rsv_add_noflush(root, trans->block_rsv,
trans->bytes_reserved);
ret = btrfs_block_rsv_add(root, trans->block_rsv,
trans->bytes_reserved,
BTRFS_RESERVE_NO_FLUSH);
if (ret)
goto out;
trace_btrfs_space_reservation(root->fs_info, "ino_cache",
Expand Down
5 changes: 3 additions & 2 deletions fs/btrfs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -3829,7 +3829,8 @@ void btrfs_evict_inode(struct inode *inode)
* inode item when doing the truncate.
*/
while (1) {
ret = btrfs_block_rsv_refill_noflush(root, rsv, min_size);
ret = btrfs_block_rsv_refill(root, rsv, min_size,
BTRFS_RESERVE_FLUSH_LIMIT);

/*
* Try and steal from the global reserve since we will
Expand All @@ -3847,7 +3848,7 @@ void btrfs_evict_inode(struct inode *inode)
goto no_delete;
}

trans = btrfs_start_transaction_noflush(root, 1);
trans = btrfs_start_transaction_lflush(root, 1);
if (IS_ERR(trans)) {
btrfs_orphan_del(NULL, inode);
btrfs_free_block_rsv(root, rsv);
Expand Down
12 changes: 8 additions & 4 deletions fs/btrfs/relocation.c
Original file line number Diff line number Diff line change
Expand Up @@ -2074,7 +2074,8 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
BUG_ON(IS_ERR(trans));
trans->block_rsv = rc->block_rsv;

ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved);
ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved,
BTRFS_RESERVE_FLUSH_ALL);
if (ret) {
BUG_ON(ret != -EAGAIN);
ret = btrfs_commit_transaction(trans, root);
Expand Down Expand Up @@ -2184,7 +2185,8 @@ int prepare_to_merge(struct reloc_control *rc, int err)
again:
if (!err) {
num_bytes = rc->merging_rsv_size;
ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes);
ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes,
BTRFS_RESERVE_FLUSH_ALL);
if (ret)
err = ret;
}
Expand Down Expand Up @@ -2459,7 +2461,8 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans,
num_bytes = calcu_metadata_size(rc, node, 1) * 2;

trans->block_rsv = rc->block_rsv;
ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes);
ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes,
BTRFS_RESERVE_FLUSH_ALL);
if (ret) {
if (ret == -EAGAIN)
rc->commit_transaction = 1;
Expand Down Expand Up @@ -3685,7 +3688,8 @@ int prepare_to_relocate(struct reloc_control *rc)
* is no reservation in transaction handle.
*/
ret = btrfs_block_rsv_add(rc->extent_root, rc->block_rsv,
rc->extent_root->nodesize * 256);
rc->extent_root->nodesize * 256,
BTRFS_RESERVE_FLUSH_ALL);
if (ret)
return ret;

Expand Down
Loading

0 comments on commit 08e007d

Please sign in to comment.