Skip to content

Commit

Permalink
btrfs: make the delalloc block rsv per inode
Browse files Browse the repository at this point in the history
The way we handle delalloc metadata reservations has gotten
progressively more complicated over the years.  There is so much cruft
and weirdness around keeping the reserved count and outstanding counters
consistent and handling the error cases that it's impossible to
understand.

Fix this by making the delalloc block rsv per-inode.  This way we can
calculate the actual size of the outstanding metadata reservations every
time we make a change, and then reserve the delta based on that amount.
This greatly simplifies the code everywhere, and makes the error
handling in btrfs_delalloc_reserve_metadata far less terrifying.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
  • Loading branch information
Josef Bacik authored and David Sterba committed Nov 1, 2017
1 parent dd48d40 commit 69fe2d7
Show file tree
Hide file tree
Showing 6 changed files with 141 additions and 292 deletions.
26 changes: 9 additions & 17 deletions fs/btrfs/btrfs_inode.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,13 @@
#define BTRFS_INODE_ORPHAN_META_RESERVED 1
#define BTRFS_INODE_DUMMY 2
#define BTRFS_INODE_IN_DEFRAG 3
#define BTRFS_INODE_DELALLOC_META_RESERVED 4
#define BTRFS_INODE_HAS_ORPHAN_ITEM 5
#define BTRFS_INODE_HAS_ASYNC_EXTENT 6
#define BTRFS_INODE_NEEDS_FULL_SYNC 7
#define BTRFS_INODE_COPY_EVERYTHING 8
#define BTRFS_INODE_IN_DELALLOC_LIST 9
#define BTRFS_INODE_READDIO_NEED_LOCK 10
#define BTRFS_INODE_HAS_PROPS 11
#define BTRFS_INODE_HAS_ORPHAN_ITEM 4
#define BTRFS_INODE_HAS_ASYNC_EXTENT 5
#define BTRFS_INODE_NEEDS_FULL_SYNC 6
#define BTRFS_INODE_COPY_EVERYTHING 7
#define BTRFS_INODE_IN_DELALLOC_LIST 8
#define BTRFS_INODE_READDIO_NEED_LOCK 9
#define BTRFS_INODE_HAS_PROPS 10

/* in memory btrfs inode */
struct btrfs_inode {
Expand Down Expand Up @@ -176,7 +175,8 @@ struct btrfs_inode {
* of extent items we've reserved metadata for.
*/
unsigned outstanding_extents;
unsigned reserved_extents;

struct btrfs_block_rsv block_rsv;

/*
* Cached values of inode properties
Expand Down Expand Up @@ -278,14 +278,6 @@ static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode,
mod);
}

static inline void btrfs_mod_reserved_extents(struct btrfs_inode *inode, int mod)
{
lockdep_assert_held(&inode->lock);
inode->reserved_extents += mod;
if (btrfs_is_free_space_inode(inode))
return;
}

static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
{
int ret = 0;
Expand Down
5 changes: 3 additions & 2 deletions fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -763,8 +763,6 @@ struct btrfs_fs_info {
* delayed dir index item
*/
struct btrfs_block_rsv global_block_rsv;
/* block reservation for delay allocation */
struct btrfs_block_rsv delalloc_block_rsv;
/* block reservation for metadata operations */
struct btrfs_block_rsv trans_block_rsv;
/* block reservation for chunk tree */
Expand Down Expand Up @@ -2757,6 +2755,9 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
unsigned short type);
void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv,
unsigned short type);
void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv);
Expand Down
46 changes: 1 addition & 45 deletions fs/btrfs/delayed-inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -581,44 +581,20 @@ static int btrfs_delayed_inode_reserve_metadata(
struct btrfs_block_rsv *dst_rsv;
u64 num_bytes;
int ret;
bool release = false;

src_rsv = trans->block_rsv;
dst_rsv = &fs_info->delayed_block_rsv;

num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);

/*
* If our block_rsv is the delalloc block reserve then check and see if
* we have our extra reservation for updating the inode. If not fall
* through and try to reserve space quickly.
*
* We used to try and steal from the delalloc block rsv or the global
* reserve, but we'd steal a full reservation, which isn't kind. We are
* here through delalloc which means we've likely just cowed down close
* to the leaf that contains the inode, so we would steal less just
* doing the fallback inode update, so if we do end up having to steal
* from the global block rsv we hopefully only steal one or two blocks
* worth which is less likely to hurt us.
*/
if (src_rsv && src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
spin_lock(&inode->lock);
if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
&inode->runtime_flags))
release = true;
else
src_rsv = NULL;
spin_unlock(&inode->lock);
}

/*
* btrfs_dirty_inode will update the inode under btrfs_join_transaction
* which doesn't reserve space for speed. This is a problem since we
* still need to reserve space for this update, so try to reserve the
* space.
*
* Now if src_rsv == delalloc_block_rsv we'll let it just steal since
* we're accounted for.
* we always reserve enough to update the inode item.
*/
if (!src_rsv || (!trans->bytes_reserved &&
src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
Expand All @@ -643,32 +619,12 @@ static int btrfs_delayed_inode_reserve_metadata(
}

ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);

/*
* Migrate only takes a reservation, it doesn't touch the size of the
* block_rsv. This is to simplify people who don't normally have things
* migrated from their block rsv. If they go to release their
* reservation, that will decrease the size as well, so if migrate
* reduced size we'd end up with a negative size. But for the
* delalloc_meta_reserved stuff we will only know to drop 1 reservation,
* but we could in fact do this reserve/migrate dance several times
* between the time we did the original reservation and we'd clean it
* up. So to take care of this, release the space for the meta
* reservation here. I think it may be time for a documentation page on
* how block rsvs. work.
*/
if (!ret) {
trace_btrfs_space_reservation(fs_info, "delayed_inode",
btrfs_ino(inode), num_bytes, 1);
node->bytes_reserved = num_bytes;
}

if (release) {
trace_btrfs_space_reservation(fs_info, "delalloc",
btrfs_ino(inode), num_bytes, 0);
btrfs_block_rsv_release(fs_info, src_rsv, num_bytes);
}

return ret;
}

Expand Down
18 changes: 8 additions & 10 deletions fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -2447,14 +2447,6 @@ int open_ctree(struct super_block *sb,
goto fail_delalloc_bytes;
}

fs_info->btree_inode = new_inode(sb);
if (!fs_info->btree_inode) {
err = -ENOMEM;
goto fail_bio_counter;
}

mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);

INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
INIT_LIST_HEAD(&fs_info->trans_list);
Expand Down Expand Up @@ -2487,8 +2479,6 @@ int open_ctree(struct super_block *sb,
btrfs_mapping_init(&fs_info->mapping_tree);
btrfs_init_block_rsv(&fs_info->global_block_rsv,
BTRFS_BLOCK_RSV_GLOBAL);
btrfs_init_block_rsv(&fs_info->delalloc_block_rsv,
BTRFS_BLOCK_RSV_DELALLOC);
btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS);
btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK);
btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY);
Expand Down Expand Up @@ -2517,6 +2507,14 @@ int open_ctree(struct super_block *sb,

INIT_LIST_HEAD(&fs_info->ordered_roots);
spin_lock_init(&fs_info->ordered_root_lock);

fs_info->btree_inode = new_inode(sb);
if (!fs_info->btree_inode) {
err = -ENOMEM;
goto fail_bio_counter;
}
mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);

fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root),
GFP_KERNEL);
if (!fs_info->delayed_root) {
Expand Down
Loading

0 comments on commit 69fe2d7

Please sign in to comment.