Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 139473
b: refs/heads/master
c: 5a3f23d
h: refs/heads/master
i:
  139471: 7fa730f
v: v3
  • Loading branch information
Chris Mason committed Mar 31, 2009
1 parent d2e6617 commit b0ab4f5
Show file tree
Hide file tree
Showing 9 changed files with 289 additions and 8 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 1a81af4d1d9c60d4313309f937a1fc5567205a87
refs/heads/master: 5a3f23d515a2ebf0c750db80579ca57b28cbce6d
18 changes: 18 additions & 0 deletions trunk/fs/btrfs/btrfs_inode.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,12 @@ struct btrfs_inode {
*/
struct list_head delalloc_inodes;

/*
* list for tracking inodes that must be sent to disk before a
* rename or truncate commit
*/
struct list_head ordered_operations;

/* the space_info for where this inode's data allocations are done */
struct btrfs_space_info *space_info;

Expand Down Expand Up @@ -122,6 +128,18 @@ struct btrfs_inode {
*/
u64 last_unlink_trans;

/*
* ordered_data_close is set by truncate when a file that used
* to have good data has been truncated to zero. When it is set
* the btrfs file release call will add this inode to the
* ordered operations list so that we make sure to flush out any
* new data the application may have written before commit.
*
* yes, its silly to have a single bitflag, but we might grow more
* of these.
*/
unsigned ordered_data_close:1;

struct inode vfs_inode;
};

Expand Down
35 changes: 35 additions & 0 deletions trunk/fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@ struct btrfs_ordered_sum;

#define BTRFS_MAX_LEVEL 8

/*
* files bigger than this get some pre-flushing when they are added
* to the ordered operations list. That way we limit the total
* work done by the commit
*/
#define BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT (8 * 1024 * 1024)

/* holds pointers to all of the tree roots */
#define BTRFS_ROOT_TREE_OBJECTID 1ULL

Expand Down Expand Up @@ -727,6 +734,15 @@ struct btrfs_fs_info {
struct mutex volume_mutex;
struct mutex tree_reloc_mutex;

/*
* this protects the ordered operations list only while we are
* processing all of the entries on it. This way we make
* sure the commit code doesn't find the list temporarily empty
* because another function happens to be doing non-waiting preflush
* before jumping into the main commit.
*/
struct mutex ordered_operations_mutex;

struct list_head trans_list;
struct list_head hashers;
struct list_head dead_roots;
Expand All @@ -741,9 +757,28 @@ struct btrfs_fs_info {
* ordered extents
*/
spinlock_t ordered_extent_lock;

/*
* all of the data=ordered extents pending writeback
* these can span multiple transactions and basically include
* every dirty data page that isn't from nodatacow
*/
struct list_head ordered_extents;

/*
* all of the inodes that have delalloc bytes. It is possible for
* this list to be empty even when there is still dirty data=ordered
* extents waiting to finish IO.
*/
struct list_head delalloc_inodes;

/*
* special rename and truncate targets that must be on disk before
* we're allowed to commit. This is basically the ext3 style
* data=ordered list.
*/
struct list_head ordered_operations;

/*
* there is a pool of worker threads for checksumming during writes
* and a pool for checksumming after reads. This is because readers
Expand Down
2 changes: 2 additions & 0 deletions trunk/fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -1572,6 +1572,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
INIT_LIST_HEAD(&fs_info->dead_roots);
INIT_LIST_HEAD(&fs_info->hashers);
INIT_LIST_HEAD(&fs_info->delalloc_inodes);
INIT_LIST_HEAD(&fs_info->ordered_operations);
spin_lock_init(&fs_info->delalloc_lock);
spin_lock_init(&fs_info->new_trans_lock);
spin_lock_init(&fs_info->ref_cache_lock);
Expand Down Expand Up @@ -1643,6 +1644,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
insert_inode_hash(fs_info->btree_inode);

mutex_init(&fs_info->trans_mutex);
mutex_init(&fs_info->ordered_operations_mutex);
mutex_init(&fs_info->tree_log_mutex);
mutex_init(&fs_info->drop_mutex);
mutex_init(&fs_info->pinned_mutex);
Expand Down
26 changes: 26 additions & 0 deletions trunk/fs/btrfs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -1161,6 +1161,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
page_cache_release(pinned[1]);
*ppos = pos;

/*
* we want to make sure fsync finds this change
* but we haven't joined a transaction running right now.
*
* Later on, someone is sure to update the inode and get the
* real transid recorded.
*
* We set last_trans now to the fs_info generation + 1,
* this will either be one more than the running transaction
* or the generation used for the next transaction if there isn't
* one running right now.
*/
BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;

if (num_written > 0 && will_write) {
struct btrfs_trans_handle *trans;

Expand Down Expand Up @@ -1194,6 +1208,18 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,

int btrfs_release_file(struct inode *inode, struct file *filp)
{
/*
* ordered_data_close is set by settattr when we are about to truncate
* a file from a non-zero size to a zero size. This tries to
* flush down new bytes that may have been written if the
* application were using truncate to replace a file in place.
*/
if (BTRFS_I(inode)->ordered_data_close) {
BTRFS_I(inode)->ordered_data_close = 0;
btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode);
if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
filemap_flush(inode->i_mapping);
}
if (filp->private_data)
btrfs_ioctl_trans_end(filp);
return 0;
Expand Down
81 changes: 74 additions & 7 deletions trunk/fs/btrfs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -2907,11 +2907,21 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
if (err)
return err;

if (S_ISREG(inode->i_mode) &&
attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
err = btrfs_cont_expand(inode, attr->ia_size);
if (err)
return err;
if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
if (attr->ia_size > inode->i_size) {
err = btrfs_cont_expand(inode, attr->ia_size);
if (err)
return err;
} else if (inode->i_size > 0 &&
attr->ia_size == 0) {

/* we're truncating a file that used to have good
* data down to zero. Make sure it gets into
* the ordered flush list so that any new writes
* get down to disk quickly.
*/
BTRFS_I(inode)->ordered_data_close = 1;
}
}

err = inode_setattr(inode, attr);
Expand Down Expand Up @@ -3050,6 +3060,7 @@ static noinline void init_btrfs_i(struct inode *inode)
extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
inode->i_mapping, GFP_NOFS);
INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes);
INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
mutex_init(&BTRFS_I(inode)->extent_mutex);
mutex_init(&BTRFS_I(inode)->log_mutex);
Expand Down Expand Up @@ -4419,6 +4430,8 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
}
ClearPageChecked(page);
set_page_dirty(page);

BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
unlock_extent(io_tree, page_start, page_end, GFP_NOFS);

out_unlock:
Expand All @@ -4444,6 +4457,27 @@ static void btrfs_truncate(struct inode *inode)
btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);

trans = btrfs_start_transaction(root, 1);

/*
* setattr is responsible for setting the ordered_data_close flag,
* but that is only tested during the last file release. That
* could happen well after the next commit, leaving a great big
* window where new writes may get lost if someone chooses to write
* to this file after truncating to zero
*
* The inode doesn't have any dirty data here, and so if we commit
* this is a noop. If someone immediately starts writing to the inode
* it is very likely we'll catch some of their writes in this
* transaction, and the commit will find this file on the ordered
* data list with good things to send down.
*
* This is a best effort solution, there is still a window where
* using truncate to replace the contents of the file will
* end up with a zero length file after a crash.
*/
if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close)
btrfs_add_ordered_operation(trans, root, inode);

btrfs_set_trans_block_group(trans, inode);
btrfs_i_size_write(inode, inode->i_size);

Expand Down Expand Up @@ -4520,12 +4554,15 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->i_acl = BTRFS_ACL_NOT_CACHED;
ei->i_default_acl = BTRFS_ACL_NOT_CACHED;
INIT_LIST_HEAD(&ei->i_orphan);
INIT_LIST_HEAD(&ei->ordered_operations);
return &ei->vfs_inode;
}

void btrfs_destroy_inode(struct inode *inode)
{
struct btrfs_ordered_extent *ordered;
struct btrfs_root *root = BTRFS_I(inode)->root;

WARN_ON(!list_empty(&inode->i_dentry));
WARN_ON(inode->i_data.nrpages);

Expand All @@ -4536,13 +4573,24 @@ void btrfs_destroy_inode(struct inode *inode)
BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED)
posix_acl_release(BTRFS_I(inode)->i_default_acl);

spin_lock(&BTRFS_I(inode)->root->list_lock);
/*
* Make sure we're properly removed from the ordered operation
* lists.
*/
smp_mb();
if (!list_empty(&BTRFS_I(inode)->ordered_operations)) {
spin_lock(&root->fs_info->ordered_extent_lock);
list_del_init(&BTRFS_I(inode)->ordered_operations);
spin_unlock(&root->fs_info->ordered_extent_lock);
}

spin_lock(&root->list_lock);
if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan"
" list\n", inode->i_ino);
dump_stack();
}
spin_unlock(&BTRFS_I(inode)->root->list_lock);
spin_unlock(&root->list_lock);

while (1) {
ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
Expand Down Expand Up @@ -4667,8 +4715,27 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (ret)
goto out_unlock;

/*
* we're using rename to replace one file with another.
* and the replacement file is large. Start IO on it now so
* we don't add too much work to the end of the transaction
*/
if (new_inode && old_inode && S_ISREG(old_inode->i_mode) &&
new_inode->i_size &&
old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
filemap_flush(old_inode->i_mapping);

trans = btrfs_start_transaction(root, 1);

/*
* make sure the inode gets flushed if it is replacing
* something.
*/
if (new_inode && new_inode->i_size &&
old_inode && S_ISREG(old_inode->i_mode)) {
btrfs_add_ordered_operation(trans, root, old_inode);
}

/*
* this is an ugly little race, but the rename is required to make
* sure that if we crash, the inode is either at the old name
Expand Down
Loading

0 comments on commit b0ab4f5

Please sign in to comment.