Skip to content

Commit

Permalink
Btrfs: wait on ordered extents at the last possible moment
Browse files Browse the repository at this point in the history
Since we don't actually copy the extent information from the source tree in
the fast case we don't need to wait for ordered io to be completed in order
to fsync, we just need to wait for the io to be completed.  So when we're
logging our file just attach all of the ordered extents to the log, and then
when the log syncs just wait for IO_DONE on the ordered extents and then
write the super.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fusionio.com>
  • Loading branch information
Josef Bacik committed Feb 20, 2013
1 parent dfd7982 commit 2ab28f3
Show file tree
Hide file tree
Showing 7 changed files with 247 additions and 9 deletions.
3 changes: 3 additions & 0 deletions fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1623,6 +1623,9 @@ struct btrfs_root {

struct list_head root_list;

spinlock_t log_extents_lock[2];
struct list_head logged_list[2];

spinlock_t orphan_lock;
atomic_t orphan_inodes;
struct btrfs_block_rsv *orphan_block_rsv;
Expand Down
4 changes: 4 additions & 0 deletions fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -1178,9 +1178,13 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,

INIT_LIST_HEAD(&root->dirty_list);
INIT_LIST_HEAD(&root->root_list);
INIT_LIST_HEAD(&root->logged_list[0]);
INIT_LIST_HEAD(&root->logged_list[1]);
spin_lock_init(&root->orphan_lock);
spin_lock_init(&root->inode_lock);
spin_lock_init(&root->accounting_lock);
spin_lock_init(&root->log_extents_lock[0]);
spin_lock_init(&root->log_extents_lock[1]);
mutex_init(&root->objectid_mutex);
mutex_init(&root->log_mutex);
init_waitqueue_head(&root->log_writer_wait);
Expand Down
30 changes: 25 additions & 5 deletions fs/btrfs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -1655,16 +1655,21 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret = 0;
struct btrfs_trans_handle *trans;
bool full_sync = 0;

trace_btrfs_sync_file(file, datasync);

/*
* We write the dirty pages in the range and wait until they complete
* out of the ->i_mutex. If so, we can flush the dirty pages by
* multi-task, and make the performance up.
* multi-task, and make the performance up. See
* btrfs_wait_ordered_range for an explanation of the ASYNC check.
*/
atomic_inc(&BTRFS_I(inode)->sync_writers);
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
&BTRFS_I(inode)->runtime_flags))
ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
atomic_dec(&BTRFS_I(inode)->sync_writers);
if (ret)
return ret;
Expand All @@ -1676,7 +1681,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
* range being left.
*/
atomic_inc(&root->log_batch);
btrfs_wait_ordered_range(inode, start, end - start + 1);
full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
if (full_sync)
btrfs_wait_ordered_range(inode, start, end - start + 1);
atomic_inc(&root->log_batch);

/*
Expand Down Expand Up @@ -1743,13 +1751,25 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)

if (ret != BTRFS_NO_LOG_SYNC) {
if (ret > 0) {
/*
* If we didn't already wait for ordered extents we need
* to do that now.
*/
if (!full_sync)
btrfs_wait_ordered_range(inode, start,
end - start + 1);
ret = btrfs_commit_transaction(trans, root);
} else {
ret = btrfs_sync_log(trans, root);
if (ret == 0)
if (ret == 0) {
ret = btrfs_end_transaction(trans, root);
else
} else {
if (!full_sync)
btrfs_wait_ordered_range(inode, start,
end -
start + 1);
ret = btrfs_commit_transaction(trans, root);
}
}
} else {
ret = btrfs_end_transaction(trans, root);
Expand Down
8 changes: 8 additions & 0 deletions fs/btrfs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,8 @@ static noinline int submit_compressed_extents(struct inode *inode,
em->start = async_extent->start;
em->len = async_extent->ram_size;
em->orig_start = em->start;
em->mod_start = em->start;
em->mod_len = em->len;

em->block_start = ins.objectid;
em->block_len = ins.offset;
Expand Down Expand Up @@ -892,6 +894,8 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
em->orig_start = em->start;
ram_size = ins.offset;
em->len = ins.offset;
em->mod_start = em->start;
em->mod_len = em->len;

em->block_start = ins.objectid;
em->block_len = ins.offset;
Expand Down Expand Up @@ -1338,6 +1342,8 @@ static noinline int run_delalloc_nocow(struct inode *inode,
em->block_start = disk_bytenr;
em->orig_block_len = disk_num_bytes;
em->bdev = root->fs_info->fs_devices->latest_bdev;
em->mod_start = em->start;
em->mod_len = em->len;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
set_bit(EXTENT_FLAG_FILLING, &em->flags);
em->generation = -1;
Expand Down Expand Up @@ -5966,6 +5972,8 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,

em->start = start;
em->orig_start = orig_start;
em->mod_start = start;
em->mod_len = len;
em->len = len;
em->block_len = block_len;
em->block_start = block_start;
Expand Down
68 changes: 68 additions & 0 deletions fs/btrfs/ordered-data.c
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,9 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
entry->file_offset = file_offset;
entry->start = start;
entry->len = len;
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) &&
!(type == BTRFS_ORDERED_NOCOW))
entry->csum_bytes_left = disk_len;
entry->disk_len = disk_len;
entry->bytes_left = len;
entry->inode = igrab(inode);
Expand All @@ -213,6 +216,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
INIT_LIST_HEAD(&entry->root_extent_list);
INIT_LIST_HEAD(&entry->work_list);
init_completion(&entry->completion);
INIT_LIST_HEAD(&entry->log_list);

trace_btrfs_ordered_extent_add(inode, entry);

Expand Down Expand Up @@ -270,6 +274,10 @@ void btrfs_add_ordered_sum(struct inode *inode,
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock_irq(&tree->lock);
list_add_tail(&sum->list, &entry->list);
WARN_ON(entry->csum_bytes_left < sum->len);
entry->csum_bytes_left -= sum->len;
if (entry->csum_bytes_left == 0)
wake_up(&entry->wait);
spin_unlock_irq(&tree->lock);
}

Expand Down Expand Up @@ -405,6 +413,66 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
return ret == 0;
}

/* Needs to either be called under a log transaction or the log_mutex */
void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode)
{
struct btrfs_ordered_inode_tree *tree;
struct btrfs_ordered_extent *ordered;
struct rb_node *n;
int index = log->log_transid % 2;

tree = &BTRFS_I(inode)->ordered_tree;
spin_lock_irq(&tree->lock);
for (n = rb_first(&tree->tree); n; n = rb_next(n)) {
ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node);
spin_lock(&log->log_extents_lock[index]);
if (list_empty(&ordered->log_list)) {
list_add_tail(&ordered->log_list, &log->logged_list[index]);
atomic_inc(&ordered->refs);
}
spin_unlock(&log->log_extents_lock[index]);
}
spin_unlock_irq(&tree->lock);
}

void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid)
{
struct btrfs_ordered_extent *ordered;
int index = transid % 2;

spin_lock_irq(&log->log_extents_lock[index]);
while (!list_empty(&log->logged_list[index])) {
ordered = list_first_entry(&log->logged_list[index],
struct btrfs_ordered_extent,
log_list);
list_del_init(&ordered->log_list);
spin_unlock_irq(&log->log_extents_lock[index]);
wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
&ordered->flags));
btrfs_put_ordered_extent(ordered);
spin_lock_irq(&log->log_extents_lock[index]);
}
spin_unlock_irq(&log->log_extents_lock[index]);
}

void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid)
{
struct btrfs_ordered_extent *ordered;
int index = transid % 2;

spin_lock_irq(&log->log_extents_lock[index]);
while (!list_empty(&log->logged_list[index])) {
ordered = list_first_entry(&log->logged_list[index],
struct btrfs_ordered_extent,
log_list);
list_del_init(&ordered->log_list);
spin_unlock_irq(&log->log_extents_lock[index]);
btrfs_put_ordered_extent(ordered);
spin_lock_irq(&log->log_extents_lock[index]);
}
spin_unlock_irq(&log->log_extents_lock[index]);
}

/*
* used to drop a reference on an ordered extent. This will free
* the extent if the last reference is dropped
Expand Down
11 changes: 11 additions & 0 deletions fs/btrfs/ordered-data.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ struct btrfs_ordered_sum {
#define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates wether this ordered extent
* has done its due diligence in updating
* the isize. */
#define BTRFS_ORDERED_LOGGED_CSUM 8 /* We've logged the csums on this ordered
ordered extent */

struct btrfs_ordered_extent {
/* logical offset in the file */
Expand All @@ -96,6 +98,9 @@ struct btrfs_ordered_extent {
/* number of bytes that still need writing */
u64 bytes_left;

/* number of bytes that still need csumming */
u64 csum_bytes_left;

/*
* the end of the ordered extent which is behind it but
* didn't update disk_i_size. Please see the comment of
Expand All @@ -118,6 +123,9 @@ struct btrfs_ordered_extent {
/* list of checksums for insertion when the extent io is done */
struct list_head list;

/* If we need to wait on this to be done */
struct list_head log_list;

/* used to wait for the BTRFS_ORDERED_COMPLETE bit */
wait_queue_head_t wait;

Expand Down Expand Up @@ -194,6 +202,9 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode);
void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput);
void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode);
void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid);
void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
int __init ordered_data_init(void);
void ordered_data_exit(void);
#endif
Loading

0 comments on commit 2ab28f3

Please sign in to comment.