Skip to content

Commit

Permalink
btrfs: qgroup: account shared subtrees during snapshot delete
Browse files Browse the repository at this point in the history
During its tree walk, btrfs_drop_snapshot() will skip any shared
subtrees it encounters. This is incorrect when we have qgroups
turned on as those subtrees need to have their contents
accounted. In particular, the case we're concerned with is when
removing our snapshot root leaves the subtree with only one root
reference.

In those cases we need to find the last remaining root and add
each extent in the subtree to the corresponding qgroup exclusive
counts.

This patch implements the shared subtree walk and a new qgroup
operation, BTRFS_QGROUP_OPER_SUB_SUBTREE. When an operation of
this type is encountered during qgroup accounting, we search for
any root references to that extent and in the case that we find
only one reference left, we go ahead and do the math on it's
exclusive counts.

Signed-off-by: Mark Fasheh <mfasheh@suse.de>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Chris Mason <clm@fb.com>
  • Loading branch information
Mark Fasheh authored and Chris Mason committed Aug 15, 2014
1 parent 6f7ff6d commit 1152651
Show file tree
Hide file tree
Showing 3 changed files with 426 additions and 0 deletions.
261 changes: 261 additions & 0 deletions fs/btrfs/extent-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -7478,6 +7478,220 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
wc->reada_slot = slot;
}

static int account_leaf_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *eb)
{
int nr = btrfs_header_nritems(eb);
int i, extent_type, ret;
struct btrfs_key key;
struct btrfs_file_extent_item *fi;
u64 bytenr, num_bytes;

for (i = 0; i < nr; i++) {
btrfs_item_key_to_cpu(eb, &key, i);

if (key.type != BTRFS_EXTENT_DATA_KEY)
continue;

fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
/* filter out non qgroup-accountable extents */
extent_type = btrfs_file_extent_type(eb, fi);

if (extent_type == BTRFS_FILE_EXTENT_INLINE)
continue;

bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
if (!bytenr)
continue;

num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);

ret = btrfs_qgroup_record_ref(trans, root->fs_info,
root->objectid,
bytenr, num_bytes,
BTRFS_QGROUP_OPER_SUB_SUBTREE, 0);
if (ret)
return ret;
}
return 0;
}

/*
* Walk up the tree from the bottom, freeing leaves and any interior
* nodes which have had all slots visited. If a node (leaf or
* interior) is freed, the node above it will have it's slot
* incremented. The root node will never be freed.
*
* At the end of this function, we should have a path which has all
* slots incremented to the next position for a search. If we need to
* read a new node it will be NULL and the node above it will have the
* correct slot selected for a later read.
*
* If we increment the root nodes slot counter past the number of
* elements, 1 is returned to signal completion of the search.
*/
static int adjust_slots_upwards(struct btrfs_root *root,
struct btrfs_path *path, int root_level)
{
int level = 0;
int nr, slot;
struct extent_buffer *eb;

if (root_level == 0)
return 1;

while (level <= root_level) {
eb = path->nodes[level];
nr = btrfs_header_nritems(eb);
path->slots[level]++;
slot = path->slots[level];
if (slot >= nr || level == 0) {
/*
* Don't free the root - we will detect this
* condition after our loop and return a
* positive value for caller to stop walking the tree.
*/
if (level != root_level) {
btrfs_tree_unlock_rw(eb, path->locks[level]);
path->locks[level] = 0;

free_extent_buffer(eb);
path->nodes[level] = NULL;
path->slots[level] = 0;
}
} else {
/*
* We have a valid slot to walk back down
* from. Stop here so caller can process these
* new nodes.
*/
break;
}

level++;
}

eb = path->nodes[root_level];
if (path->slots[root_level] >= btrfs_header_nritems(eb))
return 1;

return 0;
}

/*
* root_eb is the subtree root and is locked before this function is called.
*/
static int account_shared_subtree(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *root_eb,
u64 root_gen,
int root_level)
{
int ret = 0;
int level;
struct extent_buffer *eb = root_eb;
struct btrfs_path *path = NULL;

BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL);
BUG_ON(root_eb == NULL);

if (!root->fs_info->quota_enabled)
return 0;

if (!extent_buffer_uptodate(root_eb)) {
ret = btrfs_read_buffer(root_eb, root_gen);
if (ret)
goto out;
}

if (root_level == 0) {
ret = account_leaf_items(trans, root, root_eb);
goto out;
}

path = btrfs_alloc_path();
if (!path)
return -ENOMEM;

/*
* Walk down the tree. Missing extent blocks are filled in as
* we go. Metadata is accounted every time we read a new
* extent block.
*
* When we reach a leaf, we account for file extent items in it,
* walk back up the tree (adjusting slot pointers as we go)
* and restart the search process.
*/
extent_buffer_get(root_eb); /* For path */
path->nodes[root_level] = root_eb;
path->slots[root_level] = 0;
path->locks[root_level] = 0; /* so release_path doesn't try to unlock */
walk_down:
level = root_level;
while (level >= 0) {
if (path->nodes[level] == NULL) {
int child_bsize = root->nodesize;
int parent_slot;
u64 child_gen;
u64 child_bytenr;

/* We need to get child blockptr/gen from
* parent before we can read it. */
eb = path->nodes[level + 1];
parent_slot = path->slots[level + 1];
child_bytenr = btrfs_node_blockptr(eb, parent_slot);
child_gen = btrfs_node_ptr_generation(eb, parent_slot);

eb = read_tree_block(root, child_bytenr, child_bsize,
child_gen);
if (!eb || !extent_buffer_uptodate(eb)) {
ret = -EIO;
goto out;
}

path->nodes[level] = eb;
path->slots[level] = 0;

btrfs_tree_read_lock(eb);
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
path->locks[level] = BTRFS_READ_LOCK_BLOCKING;

ret = btrfs_qgroup_record_ref(trans, root->fs_info,
root->objectid,
child_bytenr,
child_bsize,
BTRFS_QGROUP_OPER_SUB_SUBTREE,
0);
if (ret)
goto out;

}

if (level == 0) {
ret = account_leaf_items(trans, root, path->nodes[level]);
if (ret)
goto out;

/* Nonzero return here means we completed our search */
ret = adjust_slots_upwards(root, path, root_level);
if (ret)
break;

/* Restart search with new slots */
goto walk_down;
}

level--;
}

ret = 0;
out:
btrfs_free_path(path);

return ret;
}

/*
* helper to process tree block while walking down the tree.
*
Expand Down Expand Up @@ -7581,6 +7795,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
int level = wc->level;
int reada = 0;
int ret = 0;
bool need_account = false;

generation = btrfs_node_ptr_generation(path->nodes[level],
path->slots[level]);
Expand Down Expand Up @@ -7626,6 +7841,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,

if (wc->stage == DROP_REFERENCE) {
if (wc->refs[level - 1] > 1) {
need_account = true;
if (level == 1 &&
(wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
goto skip;
Expand Down Expand Up @@ -7689,6 +7905,16 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
parent = 0;
}

if (need_account) {
ret = account_shared_subtree(trans, root, next,
generation, level - 1);
if (ret) {
printk_ratelimited(KERN_ERR "BTRFS: %s Error "
"%d accounting shared subtree. Quota "
"is out of sync, rescan required.\n",
root->fs_info->sb->s_id, ret);
}
}
ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
root->root_key.objectid, level - 1, 0, 0);
BUG_ON(ret); /* -ENOMEM */
Expand Down Expand Up @@ -7773,6 +7999,13 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
else
ret = btrfs_dec_ref(trans, root, eb, 0);
BUG_ON(ret); /* -ENOMEM */
ret = account_leaf_items(trans, root, eb);
if (ret) {
printk_ratelimited(KERN_ERR "BTRFS: %s Error "
"%d accounting leaf items. Quota "
"is out of sync, rescan required.\n",
root->fs_info->sb->s_id, ret);
}
}
/* make block locked assertion in clean_tree_block happy */
if (!path->locks[level] &&
Expand Down Expand Up @@ -7898,6 +8131,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
int level;
bool root_dropped = false;

btrfs_debug(root->fs_info, "Drop subvolume %llu", root->objectid);

path = btrfs_alloc_path();
if (!path) {
err = -ENOMEM;
Expand Down Expand Up @@ -8023,6 +8258,24 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
goto out_end_trans;
}

/*
* Qgroup update accounting is run from
* delayed ref handling. This usually works
* out because delayed refs are normally the
* only way qgroup updates are added. However,
* we may have added updates during our tree
* walk so run qgroups here to make sure we
* don't lose any updates.
*/
ret = btrfs_delayed_qgroup_accounting(trans,
root->fs_info);
if (ret)
printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
"running qgroup updates "
"during snapshot delete. "
"Quota is out of sync, "
"rescan required.\n", ret);

btrfs_end_transaction_throttle(trans, tree_root);
if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
pr_debug("BTRFS: drop snapshot early exit\n");
Expand Down Expand Up @@ -8076,6 +8329,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
}
root_dropped = true;
out_end_trans:
ret = btrfs_delayed_qgroup_accounting(trans, tree_root->fs_info);
if (ret)
printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
"running qgroup updates "
"during snapshot delete. "
"Quota is out of sync, "
"rescan required.\n", ret);

btrfs_end_transaction_throttle(trans, tree_root);
out_free:
kfree(wc);
Expand Down
Loading

0 comments on commit 1152651

Please sign in to comment.