Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
Browse files Browse the repository at this point in the history
* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable:
  Btrfs: prevent RAID level downgrades when space is low
  Btrfs: account for missing devices in RAID allocation profiles
  Btrfs: EIO when we fail to read tree roots
  Btrfs: fix compiler warnings
  Btrfs: Make async snapshot ioctl more generic
  Btrfs: pwrite blocked when writing from the mmaped buffer of the same page
  Btrfs: Fix a crash when mounting a subvolume
  Btrfs: fix sync subvol/snapshot creation
  Btrfs: Fix page leak in compressed writeback path
  Btrfs: do not BUG if we fail to remove the orphan item for dead snapshots
  Btrfs: fixup return code for btrfs_del_orphan_item
  Btrfs: do not do fast caching if we are allocating blocks for tree_root
  Btrfs: deal with space cache errors better
  Btrfs: fix use after free in O_DIRECT
  • Loading branch information
Linus Torvalds committed Dec 14, 2010
2 parents 073f21a + 83a50de commit e13cf63
Show file tree
Hide file tree
Showing 11 changed files with 207 additions and 94 deletions.
11 changes: 6 additions & 5 deletions fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
__btree_submit_bio_done);
}

#ifdef CONFIG_MIGRATION
static int btree_migratepage(struct address_space *mapping,
struct page *newpage, struct page *page)
{
Expand All @@ -712,12 +713,9 @@ static int btree_migratepage(struct address_space *mapping,
if (page_has_private(page) &&
!try_to_release_page(page, GFP_KERNEL))
return -EAGAIN;
#ifdef CONFIG_MIGRATION
return migrate_page(mapping, newpage, page);
#else
return -ENOSYS;
#endif
}
#endif

static int btree_writepage(struct page *page, struct writeback_control *wbc)
{
Expand Down Expand Up @@ -1009,7 +1007,10 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
blocksize, generation);
BUG_ON(!root->node);
if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) {
free_extent_buffer(root->node);
return -EIO;
}
root->commit_root = btrfs_root_node(root);
return 0;
}
Expand Down
75 changes: 59 additions & 16 deletions fs/btrfs/extent-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,7 @@ static int caching_kthread(void *data)

static int cache_block_group(struct btrfs_block_group_cache *cache,
struct btrfs_trans_handle *trans,
struct btrfs_root *root,
int load_cache_only)
{
struct btrfs_fs_info *fs_info = cache->fs_info;
Expand All @@ -442,9 +443,12 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,

/*
* We can't do the read from on-disk cache during a commit since we need
* to have the normal tree locking.
* to have the normal tree locking. Also if we are currently trying to
* allocate blocks for the tree root we can't do the fast caching since
* we likely hold important locks.
*/
if (!trans->transaction->in_commit) {
if (!trans->transaction->in_commit &&
(root && root != root->fs_info->tree_root)) {
spin_lock(&cache->lock);
if (cache->cached != BTRFS_CACHE_NO) {
spin_unlock(&cache->lock);
Expand Down Expand Up @@ -2741,6 +2745,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
struct btrfs_root *root = block_group->fs_info->tree_root;
struct inode *inode = NULL;
u64 alloc_hint = 0;
int dcs = BTRFS_DC_ERROR;
int num_pages = 0;
int retries = 0;
int ret = 0;
Expand Down Expand Up @@ -2795,6 +2800,8 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,

spin_lock(&block_group->lock);
if (block_group->cached != BTRFS_CACHE_FINISHED) {
/* We're not cached, don't bother trying to write stuff out */
dcs = BTRFS_DC_WRITTEN;
spin_unlock(&block_group->lock);
goto out_put;
}
Expand All @@ -2821,17 +2828,16 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
num_pages, num_pages,
&alloc_hint);
if (!ret)
dcs = BTRFS_DC_SETUP;
btrfs_free_reserved_data_space(inode, num_pages);
out_put:
iput(inode);
out_free:
btrfs_release_path(root, path);
out:
spin_lock(&block_group->lock);
if (ret)
block_group->disk_cache_state = BTRFS_DC_ERROR;
else
block_group->disk_cache_state = BTRFS_DC_SETUP;
block_group->disk_cache_state = dcs;
spin_unlock(&block_group->lock);

return ret;
Expand Down Expand Up @@ -3037,7 +3043,13 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)

u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
{
u64 num_devices = root->fs_info->fs_devices->rw_devices;
/*
* we add in the count of missing devices because we want
* to make sure that any RAID levels on a degraded FS
* continue to be honored.
*/
u64 num_devices = root->fs_info->fs_devices->rw_devices +
root->fs_info->fs_devices->missing_devices;

if (num_devices == 1)
flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
Expand Down Expand Up @@ -4080,7 +4092,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
* space back to the block group, otherwise we will leak space.
*/
if (!alloc && cache->cached == BTRFS_CACHE_NO)
cache_block_group(cache, trans, 1);
cache_block_group(cache, trans, NULL, 1);

byte_in_group = bytenr - cache->key.objectid;
WARN_ON(byte_in_group > cache->key.offset);
Expand Down Expand Up @@ -4930,11 +4942,31 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
btrfs_get_block_group(block_group);
search_start = block_group->key.objectid;

/*
* this can happen if we end up cycling through all the
* raid types, but we want to make sure we only allocate
* for the proper type.
*/
if (!block_group_bits(block_group, data)) {
u64 extra = BTRFS_BLOCK_GROUP_DUP |
BTRFS_BLOCK_GROUP_RAID1 |
BTRFS_BLOCK_GROUP_RAID10;

/*
* if they asked for extra copies and this block group
* doesn't provide them, bail. This does allow us to
* fill raid0 from raid1.
*/
if ((data & extra) && !(block_group->flags & extra))
goto loop;
}

have_block_group:
if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
u64 free_percent;

ret = cache_block_group(block_group, trans, 1);
ret = cache_block_group(block_group, trans,
orig_root, 1);
if (block_group->cached == BTRFS_CACHE_FINISHED)
goto have_block_group;

Expand All @@ -4958,7 +4990,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
if (loop > LOOP_CACHING_NOWAIT ||
(loop > LOOP_FIND_IDEAL &&
atomic_read(&space_info->caching_threads) < 2)) {
ret = cache_block_group(block_group, trans, 0);
ret = cache_block_group(block_group, trans,
orig_root, 0);
BUG_ON(ret);
}
found_uncached_bg = true;
Expand Down Expand Up @@ -5515,7 +5548,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
u64 num_bytes = ins->offset;

block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
cache_block_group(block_group, trans, 0);
cache_block_group(block_group, trans, NULL, 0);
caching_ctl = get_caching_control(block_group);

if (!caching_ctl) {
Expand Down Expand Up @@ -6300,9 +6333,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
NULL, NULL);
BUG_ON(ret < 0);
if (ret > 0) {
ret = btrfs_del_orphan_item(trans, tree_root,
root->root_key.objectid);
BUG_ON(ret);
/* if we fail to delete the orphan item this time
* around, it'll get picked up the next time.
*
* The most common failure here is just -ENOENT.
*/
btrfs_del_orphan_item(trans, tree_root,
root->root_key.objectid);
}
}

Expand Down Expand Up @@ -7878,7 +7915,14 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;

num_devices = root->fs_info->fs_devices->rw_devices;
/*
* we add in the count of missing devices because we want
* to make sure that any RAID levels on a degraded FS
* continue to be honored.
*/
num_devices = root->fs_info->fs_devices->rw_devices +
root->fs_info->fs_devices->missing_devices;

if (num_devices == 1) {
stripped |= BTRFS_BLOCK_GROUP_DUP;
stripped = flags & ~stripped;
Expand Down Expand Up @@ -8247,7 +8291,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
break;
if (ret != 0)
goto error;

leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
cache = kzalloc(sizeof(*cache), GFP_NOFS);
Expand Down
92 changes: 60 additions & 32 deletions fs/btrfs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,30 +48,34 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
struct page **prepared_pages,
struct iov_iter *i)
{
size_t copied;
size_t copied = 0;
int pg = 0;
int offset = pos & (PAGE_CACHE_SIZE - 1);
int total_copied = 0;

while (write_bytes > 0) {
size_t count = min_t(size_t,
PAGE_CACHE_SIZE - offset, write_bytes);
struct page *page = prepared_pages[pg];
again:
if (unlikely(iov_iter_fault_in_readable(i, count)))
return -EFAULT;

/* Copy data from userspace to the current page */
copied = iov_iter_copy_from_user(page, i, offset, count);
/*
* Copy data from userspace to the current page
*
* Disable pagefault to avoid recursive lock since
* the pages are already locked
*/
pagefault_disable();
copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
pagefault_enable();

/* Flush processor's dcache for this page */
flush_dcache_page(page);
iov_iter_advance(i, copied);
write_bytes -= copied;
total_copied += copied;

/* Return to btrfs_file_aio_write to fault page */
if (unlikely(copied == 0)) {
count = min_t(size_t, PAGE_CACHE_SIZE - offset,
iov_iter_single_seg_count(i));
goto again;
break;
}

if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
Expand All @@ -81,7 +85,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
offset = 0;
}
}
return 0;
return total_copied;
}

/*
Expand Down Expand Up @@ -854,6 +858,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
unsigned long last_index;
int will_write;
int buffered = 0;
int copied = 0;
int dirty_pages = 0;

will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
(file->f_flags & O_DIRECT));
Expand Down Expand Up @@ -970,45 +976,67 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
WARN_ON(num_pages > nrptrs);
memset(pages, 0, sizeof(struct page *) * nrptrs);

ret = btrfs_delalloc_reserve_space(inode, write_bytes);
/*
* Fault pages before locking them in prepare_pages
* to avoid recursive lock
*/
if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) {
ret = -EFAULT;
goto out;
}

ret = btrfs_delalloc_reserve_space(inode,
num_pages << PAGE_CACHE_SHIFT);
if (ret)
goto out;

ret = prepare_pages(root, file, pages, num_pages,
pos, first_index, last_index,
write_bytes);
if (ret) {
btrfs_delalloc_release_space(inode, write_bytes);
btrfs_delalloc_release_space(inode,
num_pages << PAGE_CACHE_SHIFT);
goto out;
}

ret = btrfs_copy_from_user(pos, num_pages,
copied = btrfs_copy_from_user(pos, num_pages,
write_bytes, pages, &i);
if (ret == 0) {
dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >>
PAGE_CACHE_SHIFT;

if (num_pages > dirty_pages) {
if (copied > 0)
atomic_inc(
&BTRFS_I(inode)->outstanding_extents);
btrfs_delalloc_release_space(inode,
(num_pages - dirty_pages) <<
PAGE_CACHE_SHIFT);
}

if (copied > 0) {
dirty_and_release_pages(NULL, root, file, pages,
num_pages, pos, write_bytes);
dirty_pages, pos, copied);
}

btrfs_drop_pages(pages, num_pages);
if (ret) {
btrfs_delalloc_release_space(inode, write_bytes);
goto out;
}

if (will_write) {
filemap_fdatawrite_range(inode->i_mapping, pos,
pos + write_bytes - 1);
} else {
balance_dirty_pages_ratelimited_nr(inode->i_mapping,
num_pages);
if (num_pages <
(root->leafsize >> PAGE_CACHE_SHIFT) + 1)
btrfs_btree_balance_dirty(root, 1);
btrfs_throttle(root);
if (copied > 0) {
if (will_write) {
filemap_fdatawrite_range(inode->i_mapping, pos,
pos + copied - 1);
} else {
balance_dirty_pages_ratelimited_nr(
inode->i_mapping,
dirty_pages);
if (dirty_pages <
(root->leafsize >> PAGE_CACHE_SHIFT) + 1)
btrfs_btree_balance_dirty(root, 1);
btrfs_throttle(root);
}
}

pos += write_bytes;
num_written += write_bytes;
pos += copied;
num_written += copied;

cond_resched();
}
Expand Down
12 changes: 7 additions & 5 deletions fs/btrfs/free-space-cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
(unsigned long long)BTRFS_I(inode)->generation,
(unsigned long long)generation,
(unsigned long long)block_group->key.objectid);
goto out;
goto free_cache;
}

if (!num_entries)
Expand Down Expand Up @@ -524,6 +524,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
return 0;
}

node = rb_first(&block_group->free_space_offset);
if (!node) {
iput(inode);
return 0;
}

last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
filemap_write_and_wait(inode->i_mapping);
btrfs_wait_ordered_range(inode, inode->i_size &
Expand All @@ -543,10 +549,6 @@ int btrfs_write_out_cache(struct btrfs_root *root,
*/
first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);

node = rb_first(&block_group->free_space_offset);
if (!node)
goto out_free;

/*
* Lock all pages first so we can lock the extent safely.
*
Expand Down
Loading

0 comments on commit e13cf63

Please sign in to comment.