Skip to content

Commit

Permalink
Btrfs: Don't pin pages in ram until the entire ordered extent is on d…
Browse files Browse the repository at this point in the history
…isk.

Checksum items are not inserted until the entire ordered extent is on disk,
but individual pages might be clean and available for reclaim long before
the whole extent is on disk.

In order to allow those pages to be freed, we need to be able to search
the list of ordered extents to find the checksum that is going to be inserted
in the tree.  This way if the page needs to be read back in before
the checksums are in the btree, we'll be able to verify the checksum on
the page.

This commit adds the ability to search the pending ordered extents for
a given offset in the file, and changes btrfs_releasepage to allow
ordered pages to be freed.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
  • Loading branch information
Chris Mason committed Sep 25, 2008
1 parent f929574 commit ba1da2f
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 19 deletions.
2 changes: 1 addition & 1 deletion fs/btrfs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
end_of_last_block = start_pos + num_bytes - 1;

lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
trans = btrfs_join_transaction(root, 1);
trans = btrfs_start_transaction(root, 1);
if (!trans) {
err = -ENOMEM;
goto out_unlock;
Expand Down
37 changes: 21 additions & 16 deletions fs/btrfs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -382,23 +382,20 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
return btrfs_map_bio(root, rw, bio, mirror_num, 0);
}

static int add_pending_csums(struct btrfs_trans_handle *trans,
static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
struct inode *inode, u64 file_offset,
struct list_head *list)
{
struct list_head *cur;
struct btrfs_ordered_sum *sum;

btrfs_set_trans_block_group(trans, inode);
while(!list_empty(list)) {
cur = list->next;
list_for_each(cur, list) {
sum = list_entry(cur, struct btrfs_ordered_sum, list);
mutex_lock(&BTRFS_I(inode)->csum_mutex);
btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root,
inode, sum);
mutex_unlock(&BTRFS_I(inode)->csum_mutex);
list_del(&sum->list);
kfree(sum);
}
return 0;
}
Expand Down Expand Up @@ -498,9 +495,8 @@ int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
int ret;

ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1);
if (!ret) {
if (!ret)
return 0;
}

trans = btrfs_join_transaction(root, 1);

Expand Down Expand Up @@ -571,6 +567,18 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
path = btrfs_alloc_path();
item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0);
if (IS_ERR(item)) {
/*
* It is possible there is an ordered extent that has
* not yet finished for this range in the file. If so,
* that extent will have a csum cached, and it will insert
* the sum after all the blocks in the extent are fully
* on disk. So, look for an ordered extent and use the
* sum if found.
*/
ret = btrfs_find_ordered_sum(inode, start, &csum);
if (ret == 0)
goto found;

ret = PTR_ERR(item);
/* a csum that isn't present is a preallocated region. */
if (ret == -ENOENT || ret == -EFBIG)
Expand All @@ -582,6 +590,7 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
}
read_extent_buffer(path->nodes[0], &csum, (unsigned long)item,
BTRFS_CRC32_SIZE);
found:
set_state_private(io_tree, start, csum);
out:
if (path)
Expand Down Expand Up @@ -888,7 +897,7 @@ static void fill_inode_item(struct extent_buffer *leaf,
BTRFS_I(inode)->block_group->key.objectid);
}

int btrfs_update_inode(struct btrfs_trans_handle *trans,
int noinline btrfs_update_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode)
{
Expand Down Expand Up @@ -1567,6 +1576,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
inode->i_mapping, GFP_NOFS);
extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
inode->i_mapping, GFP_NOFS);
btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
mutex_init(&BTRFS_I(inode)->csum_mutex);
return 0;
}
Expand Down Expand Up @@ -1868,6 +1878,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
inode->i_mapping, GFP_NOFS);
extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
inode->i_mapping, GFP_NOFS);
btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
mutex_init(&BTRFS_I(inode)->csum_mutex);
BTRFS_I(inode)->delalloc_bytes = 0;
BTRFS_I(inode)->disk_i_size = 0;
Expand Down Expand Up @@ -2097,6 +2108,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
BTRFS_I(inode)->delalloc_bytes = 0;
BTRFS_I(inode)->disk_i_size = 0;
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
}
dir->i_sb->s_dirt = 1;
btrfs_update_inode_block_group(trans, inode);
Expand Down Expand Up @@ -2618,14 +2630,6 @@ static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)

static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
{
struct btrfs_ordered_extent *ordered;

ordered = btrfs_lookup_ordered_extent(page->mapping->host,
page_offset(page));
if (ordered) {
btrfs_put_ordered_extent(ordered);
return 0;
}
return __btrfs_releasepage(page, gfp_flags);
}

Expand Down Expand Up @@ -3078,6 +3082,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
BTRFS_I(inode)->delalloc_bytes = 0;
BTRFS_I(inode)->disk_i_size = 0;
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
}
dir->i_sb->s_dirt = 1;
btrfs_update_inode_block_group(trans, inode);
Expand Down
48 changes: 46 additions & 2 deletions fs/btrfs/ordered-data.c
Original file line number Diff line number Diff line change
Expand Up @@ -245,8 +245,18 @@ printk("inode %lu not ready yet for extent %Lu %Lu\n", inode->i_ino, entry->file

int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
{
if (atomic_dec_and_test(&entry->refs))
struct list_head *cur;
struct btrfs_ordered_sum *sum;

if (atomic_dec_and_test(&entry->refs)) {
while(!list_empty(&entry->list)) {
cur = entry->list.next;
sum = list_entry(cur, struct btrfs_ordered_sum, list);
list_del(&sum->list);
kfree(sum);
}
kfree(entry);
}
return 0;
}

Expand Down Expand Up @@ -444,8 +454,9 @@ int btrfs_ordered_update_i_size(struct inode *inode,
* if we find an ordered extent then we can't update disk i_size
* yet
*/
node = &ordered->rb_node;
while(1) {
node = rb_prev(&ordered->rb_node);
node = rb_prev(node);
if (!node)
break;
test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
Expand Down Expand Up @@ -495,3 +506,36 @@ int btrfs_ordered_update_i_size(struct inode *inode,
mutex_unlock(&tree->mutex);
return 0;
}

int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum)
{
struct btrfs_ordered_sum *ordered_sum;
struct btrfs_sector_sum *sector_sums;
struct btrfs_ordered_extent *ordered;
struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
struct list_head *cur;
int ret = 1;
int index;

ordered = btrfs_lookup_ordered_extent(inode, offset);
if (!ordered)
return 1;

mutex_lock(&tree->mutex);
list_for_each_prev(cur, &ordered->list) {
ordered_sum = list_entry(cur, struct btrfs_ordered_sum, list);
if (offset >= ordered_sum->file_offset &&
offset < ordered_sum->file_offset + ordered_sum->len) {
index = (offset - ordered_sum->file_offset) /
BTRFS_I(inode)->root->sectorsize;;
sector_sums = &ordered_sum->sums;
*sum = sector_sums[index].sum;
ret = 0;
goto out;
}
}
out:
mutex_unlock(&tree->mutex);
return ret;
}

1 change: 1 addition & 0 deletions fs/btrfs/ordered-data.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,4 +91,5 @@ int btrfs_add_ordered_pending(struct inode *inode,
u64 start, u64 len);
int btrfs_ordered_update_i_size(struct inode *inode,
struct btrfs_ordered_extent *ordered);
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum);
#endif

0 comments on commit ba1da2f

Please sign in to comment.