Skip to content

Commit

Permalink
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel…
Browse files Browse the repository at this point in the history
…/git/mason/linux-btrfs

Pull btrfs updates from Chris Mason:
 "I held off on my rc5 pull because I hit an oops during log recovery
  after a crash.  I wanted to make sure it wasn't a regression because
  we have some logging fixes in here.

  It turns out that a commit during the merge window just made it much
  more likely to trigger directory logging instead of full commits,
  which exposed an old bug.

  The new backref walking code got some additional fixes.  This should
  be the final set of them.

  Josef fixed up a corner where our O_DIRECT writes and buffered reads
  could expose old file contents (not stale, just not the most recent).
  He and Liu Bo fixed crashes during tree log recover as well.

  Ilya fixed errors while we resume disk balancing operations on
  readonly mounts."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Btrfs: run delayed directory updates during log replay
  Btrfs: hold a ref on the inode during writepages
  Btrfs: fix tree log remove space corner case
  Btrfs: fix wrong check during log recovery
  Btrfs: use _IOR for BTRFS_IOC_SUBVOL_GETFLAGS
  Btrfs: resume balance on rw (re)mounts properly
  Btrfs: restore restriper state on all mounts
  Btrfs: fix dio write vs buffered read race
  Btrfs: don't count I/O statistic read errors for missing devices
  Btrfs: resolve tree mod log locking issue in btrfs_next_leaf
  Btrfs: fix tree mod log rewind of ADD operations
  Btrfs: leave critical region in btrfs_find_all_roots as soon as possible
  Btrfs: always put insert_ptr modifications into the tree mod log
  Btrfs: fix tree mod log for root replacements at leaf level
  Btrfs: support root level changes in __resolve_indirect_ref
  Btrfs: avoid waiting for delayed refs when we must not
  • Loading branch information
Linus Torvalds committed Jul 5, 2012
2 parents 62ad644 + b630556 commit 5eecb9c
Show file tree
Hide file tree
Showing 13 changed files with 258 additions and 201 deletions.
15 changes: 9 additions & 6 deletions fs/btrfs/backref.c
Original file line number Diff line number Diff line change
Expand Up @@ -301,10 +301,14 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
goto out;

eb = path->nodes[level];
if (!eb) {
WARN_ON(1);
ret = 1;
goto out;
while (!eb) {
if (!level) {
WARN_ON(1);
ret = 1;
goto out;
}
level--;
eb = path->nodes[level];
}

ret = add_all_parents(root, path, parents, level, &ref->key_for_search,
Expand Down Expand Up @@ -835,6 +839,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
}
ret = __add_delayed_refs(head, delayed_ref_seq,
&prefs_delayed);
mutex_unlock(&head->mutex);
if (ret) {
spin_unlock(&delayed_refs->lock);
goto out;
Expand Down Expand Up @@ -928,8 +933,6 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
}

out:
if (head)
mutex_unlock(&head->mutex);
btrfs_free_path(path);
while (!list_empty(&prefs)) {
ref = list_first_entry(&prefs, struct __prelim_ref, list);
Expand Down
60 changes: 35 additions & 25 deletions fs/btrfs/ctree.c
Original file line number Diff line number Diff line change
Expand Up @@ -1024,11 +1024,18 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
if (!looped && !tm)
return 0;
/*
* we must have key remove operations in the log before the
* replace operation.
* if there are no tree operation for the oldest root, we simply
* return it. this should only happen if that (old) root is at
* level 0.
*/
BUG_ON(!tm);
if (!tm)
break;

/*
* if there's an operation that's not a root replacement, we
* found the oldest version of our root. normally, we'll find a
* MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here.
*/
if (tm->op != MOD_LOG_ROOT_REPLACE)
break;

Expand Down Expand Up @@ -1087,11 +1094,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
tm->generation);
break;
case MOD_LOG_KEY_ADD:
if (tm->slot != n - 1) {
o_dst = btrfs_node_key_ptr_offset(tm->slot);
o_src = btrfs_node_key_ptr_offset(tm->slot + 1);
memmove_extent_buffer(eb, o_dst, o_src, p_size);
}
/* if a move operation is needed it's in the log */
n--;
break;
case MOD_LOG_MOVE_KEYS:
Expand Down Expand Up @@ -1192,16 +1195,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
}

tm = tree_mod_log_search(root->fs_info, logical, time_seq);
/*
* there was an item in the log when __tree_mod_log_oldest_root
* returned. this one must not go away, because the time_seq passed to
* us must be blocking its removal.
*/
BUG_ON(!tm);

if (old_root)
eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT,
root->nodesize);
eb = alloc_dummy_extent_buffer(logical, root->nodesize);
else
eb = btrfs_clone_extent_buffer(root->node);
btrfs_tree_read_unlock(root->node);
Expand All @@ -1216,7 +1211,10 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
btrfs_set_header_level(eb, old_root->level);
btrfs_set_header_generation(eb, old_generation);
}
__tree_mod_log_rewind(eb, time_seq, tm);
if (tm)
__tree_mod_log_rewind(eb, time_seq, tm);
else
WARN_ON(btrfs_header_level(eb) != 0);
extent_buffer_get(eb);

return eb;
Expand Down Expand Up @@ -2995,7 +2993,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
static void insert_ptr(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_disk_key *key, u64 bytenr,
int slot, int level, int tree_mod_log)
int slot, int level)
{
struct extent_buffer *lower;
int nritems;
Expand All @@ -3008,15 +3006,15 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
BUG_ON(slot > nritems);
BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root));
if (slot != nritems) {
if (tree_mod_log && level)
if (level)
tree_mod_log_eb_move(root->fs_info, lower, slot + 1,
slot, nritems - slot);
memmove_extent_buffer(lower,
btrfs_node_key_ptr_offset(slot + 1),
btrfs_node_key_ptr_offset(slot),
(nritems - slot) * sizeof(struct btrfs_key_ptr));
}
if (tree_mod_log && level) {
if (level) {
ret = tree_mod_log_insert_key(root->fs_info, lower, slot,
MOD_LOG_KEY_ADD);
BUG_ON(ret < 0);
Expand Down Expand Up @@ -3104,7 +3102,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(split);

insert_ptr(trans, root, path, &disk_key, split->start,
path->slots[level + 1] + 1, level + 1, 1);
path->slots[level + 1] + 1, level + 1);

if (path->slots[level] >= mid) {
path->slots[level] -= mid;
Expand Down Expand Up @@ -3641,7 +3639,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
btrfs_set_header_nritems(l, mid);
btrfs_item_key(right, &disk_key, 0);
insert_ptr(trans, root, path, &disk_key, right->start,
path->slots[1] + 1, 1, 0);
path->slots[1] + 1, 1);

btrfs_mark_buffer_dirty(right);
btrfs_mark_buffer_dirty(l);
Expand Down Expand Up @@ -3848,7 +3846,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
if (mid <= slot) {
btrfs_set_header_nritems(right, 0);
insert_ptr(trans, root, path, &disk_key, right->start,
path->slots[1] + 1, 1, 0);
path->slots[1] + 1, 1);
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
path->nodes[0] = right;
Expand All @@ -3857,7 +3855,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
} else {
btrfs_set_header_nritems(right, 0);
insert_ptr(trans, root, path, &disk_key, right->start,
path->slots[1], 1, 0);
path->slots[1], 1);
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
path->nodes[0] = right;
Expand Down Expand Up @@ -5121,6 +5119,18 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,

if (!path->skip_locking) {
ret = btrfs_try_tree_read_lock(next);
if (!ret && time_seq) {
/*
* If we don't get the lock, we may be racing
* with push_leaf_left, holding that lock while
* itself waiting for the leaf we've currently
* locked. To solve this situation, we give up
* on our lock and cycle.
*/
btrfs_release_path(path);
cond_resched();
goto again;
}
if (!ret) {
btrfs_set_path_blocking(path);
btrfs_tree_read_lock(next);
Expand Down
34 changes: 21 additions & 13 deletions fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -2354,12 +2354,17 @@ int open_ctree(struct super_block *sb,
BTRFS_CSUM_TREE_OBJECTID, csum_root);
if (ret)
goto recovery_tree_root;

csum_root->track_dirty = 1;

fs_info->generation = generation;
fs_info->last_trans_committed = generation;

ret = btrfs_recover_balance(fs_info);
if (ret) {
printk(KERN_WARNING "btrfs: failed to recover balance\n");
goto fail_block_groups;
}

ret = btrfs_init_dev_stats(fs_info);
if (ret) {
printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n",
Expand Down Expand Up @@ -2485,20 +2490,23 @@ int open_ctree(struct super_block *sb,
goto fail_trans_kthread;
}

if (!(sb->s_flags & MS_RDONLY)) {
down_read(&fs_info->cleanup_work_sem);
err = btrfs_orphan_cleanup(fs_info->fs_root);
if (!err)
err = btrfs_orphan_cleanup(fs_info->tree_root);
up_read(&fs_info->cleanup_work_sem);
if (sb->s_flags & MS_RDONLY)
return 0;

if (!err)
err = btrfs_recover_balance(fs_info->tree_root);
down_read(&fs_info->cleanup_work_sem);
if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) ||
(ret = btrfs_orphan_cleanup(fs_info->tree_root))) {
up_read(&fs_info->cleanup_work_sem);
close_ctree(tree_root);
return ret;
}
up_read(&fs_info->cleanup_work_sem);

if (err) {
close_ctree(tree_root);
return err;
}
ret = btrfs_resume_balance_async(fs_info);
if (ret) {
printk(KERN_WARNING "btrfs: failed to resume balance\n");
close_ctree(tree_root);
return ret;
}

return 0;
Expand Down
11 changes: 6 additions & 5 deletions fs/btrfs/extent-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -2347,12 +2347,10 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
return count;
}


static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs,
unsigned long num_refs)
unsigned long num_refs,
struct list_head *first_seq)
{
struct list_head *first_seq = delayed_refs->seq_head.next;

spin_unlock(&delayed_refs->lock);
pr_debug("waiting for more refs (num %ld, first %p)\n",
num_refs, first_seq);
Expand Down Expand Up @@ -2381,6 +2379,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_delayed_ref_node *ref;
struct list_head cluster;
struct list_head *first_seq = NULL;
int ret;
u64 delayed_start;
int run_all = count == (unsigned long)-1;
Expand Down Expand Up @@ -2436,8 +2435,10 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
*/
consider_waiting = 1;
num_refs = delayed_refs->num_entries;
first_seq = root->fs_info->tree_mod_seq_list.next;
} else {
wait_for_more_refs(delayed_refs, num_refs);
wait_for_more_refs(delayed_refs,
num_refs, first_seq);
/*
* after waiting, things have changed. we
* dropped the lock and someone else might have
Expand Down
14 changes: 14 additions & 0 deletions fs/btrfs/extent_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -3324,6 +3324,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
writepage_t writepage, void *data,
void (*flush_fn)(void *))
{
struct inode *inode = mapping->host;
int ret = 0;
int done = 0;
int nr_to_write_done = 0;
Expand All @@ -3334,6 +3335,18 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
int scanned = 0;
int tag;

/*
* We have to hold onto the inode so that ordered extents can do their
* work when the IO finishes. The alternative to this is failing to add
* an ordered extent if the igrab() fails there and that is a huge pain
* to deal with, so instead just hold onto the inode throughout the
* writepages operation. If it fails here we are freeing up the inode
* anyway and we'd rather not waste our time writing out stuff that is
* going to be truncated anyway.
*/
if (!igrab(inode))
return 0;

pagevec_init(&pvec, 0);
if (wbc->range_cyclic) {
index = mapping->writeback_index; /* Start from prev offset */
Expand Down Expand Up @@ -3428,6 +3441,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
index = 0;
goto retry;
}
btrfs_add_delayed_iput(inode);
return ret;
}

Expand Down
13 changes: 0 additions & 13 deletions fs/btrfs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -1334,7 +1334,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
loff_t *ppos, size_t count, size_t ocount)
{
struct file *file = iocb->ki_filp;
struct inode *inode = fdentry(file)->d_inode;
struct iov_iter i;
ssize_t written;
ssize_t written_buffered;
Expand All @@ -1344,18 +1343,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos,
count, ocount);

/*
* the generic O_DIRECT will update in-memory i_size after the
* DIOs are done. But our endio handlers that update the on
* disk i_size never update past the in memory i_size. So we
* need one more update here to catch any additions to the
* file
*/
if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
mark_inode_dirty(inode);
}

if (written < 0 || written == count)
return written;

Expand Down
Loading

0 comments on commit 5eecb9c

Please sign in to comment.