Skip to content

Commit

Permalink
ext4: Add ordered mode support for delalloc
Browse files Browse the repository at this point in the history
This provides a new ordered mode implementation which gets rid of using
buffer heads to enforce the ordering between metadata change with the
related data chage.  Instead, in the new ordering mode, it keeps track
of all of the inodes touched by each transaction on a list, and when
that transaction is committed, it flushes all of the dirty pages for
those inodes.  In addition, the new ordered mode reverses the lock
ordering of the page lock and transaction lock, which provides easier
support for delayed allocation.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
  • Loading branch information
Aneesh Kumar K.V authored and Theodore Ts'o committed Jul 11, 2008
1 parent 61628a3 commit cd1aac3
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 10 deletions.
30 changes: 25 additions & 5 deletions fs/ext4/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -2043,11 +2043,12 @@ static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
return !buffer_mapped(bh) || buffer_delay(bh);
}

/* FIXME!! only support data=writeback mode */
/*
* get called vi ext4_da_writepages after taking page lock
* We may end up doing block allocation here in case
* mpage_da_map_blocks failed to allocate blocks.
*
* We also get called via journal_submit_inode_data_buffers
*/
static int ext4_da_writepage(struct page *page,
struct writeback_control *wbc)
Expand All @@ -2066,6 +2067,7 @@ static int ext4_da_writepage(struct page *page,
* ext4_da_writepages() but directly (shrink_page_list).
* We cannot easily start a transaction here so we just skip
* writing the page in case we would have to do so.
* We reach here also via journal_submit_inode_data_buffers
*/
size = i_size_read(inode);

Expand All @@ -2081,8 +2083,11 @@ static int ext4_da_writepage(struct page *page,
* We can't do block allocation under
* page lock without a handle . So redirty
* the page and return
* We may reach here when we do a journal commit
* via journal_submit_inode_data_buffers.
* If we don't have mapping block we just ignore
* them
*/
BUG_ON(wbc->sync_mode != WB_SYNC_NONE);
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return 0;
Expand All @@ -2097,7 +2102,6 @@ static int ext4_da_writepage(struct page *page,
return ret;
}


/*
* For now just follow the DIO way to estimate the max credits
* needed to write out EXT4_MAX_WRITEBACK_PAGES.
Expand Down Expand Up @@ -2130,7 +2134,7 @@ static int ext4_da_writepages(struct address_space *mapping,
return 0;

/*
* Estimate the worse case needed credits to write out
* Estimate the worse case needed credits to write out
* EXT4_MAX_BUF_BLOCKS pages
*/
needed_blocks = EXT4_MAX_WRITEBACK_CREDITS;
Expand All @@ -2152,6 +2156,19 @@ static int ext4_da_writepages(struct address_space *mapping,
ret = PTR_ERR(handle);
goto out_writepages;
}
if (ext4_should_order_data(inode)) {
/*
* With ordered mode we need to add
* the inode to the journal handle
* when we do block allocation.
*/
ret = ext4_jbd2_file_inode(handle, inode);
if (ret) {
ext4_journal_stop(handle);
goto out_writepages;
}

}
/*
* set the max dirty pages could be write at a time
* to fit into the reserved transaction credits
Expand Down Expand Up @@ -2735,7 +2752,10 @@ static const struct address_space_operations ext4_da_aops = {

void ext4_set_aops(struct inode *inode)
{
if (ext4_should_order_data(inode))
if (ext4_should_order_data(inode) &&
test_opt(inode->i_sb, DELALLOC))
inode->i_mapping->a_ops = &ext4_da_aops;
else if (ext4_should_order_data(inode))
inode->i_mapping->a_ops = &ext4_ordered_aops;
else if (ext4_should_writeback_data(inode) &&
test_opt(inode->i_sb, DELALLOC))
Expand Down
38 changes: 33 additions & 5 deletions fs/jbd2/commit.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
#include <linux/pagemap.h>
#include <linux/jiffies.h>
#include <linux/crc32.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>

/*
* Default IO end handler for temporary BJ_IO buffer_heads.
Expand Down Expand Up @@ -184,6 +186,27 @@ static int journal_wait_on_commit_record(struct buffer_head *bh)
return ret;
}

/*
* write the filemap data using writepage() address_space_operations.
* We don't do block allocation here even for delalloc. We don't
* use writepages() because with dealyed allocation we may be doing
* block allocation in writepages().
*/
static int journal_submit_inode_data_buffers(struct address_space *mapping)
{
int ret;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = mapping->nrpages * 2,
.range_start = 0,
.range_end = i_size_read(mapping->host),
.for_writepages = 1,
};

ret = generic_writepages(mapping, &wbc);
return ret;
}

/*
* Submit all the data buffers of inode associated with the transaction to
* disk.
Expand All @@ -192,7 +215,7 @@ static int journal_wait_on_commit_record(struct buffer_head *bh)
* our inode list. We use JI_COMMIT_RUNNING flag to protect inode we currently
* operate on from being released while we write out pages.
*/
static int journal_submit_inode_data_buffers(journal_t *journal,
static int journal_submit_data_buffers(journal_t *journal,
transaction_t *commit_transaction)
{
struct jbd2_inode *jinode;
Expand All @@ -204,8 +227,13 @@ static int journal_submit_inode_data_buffers(journal_t *journal,
mapping = jinode->i_vfs_inode->i_mapping;
jinode->i_flags |= JI_COMMIT_RUNNING;
spin_unlock(&journal->j_list_lock);
err = filemap_fdatawrite_range(mapping, 0,
i_size_read(jinode->i_vfs_inode));
/*
* submit the inode data buffers. We use writepage
* instead of writepages. Because writepages can do
* block allocation with delalloc. We need to write
* only allocated blocks here.
*/
err = journal_submit_inode_data_buffers(mapping);
if (!ret)
ret = err;
spin_lock(&journal->j_list_lock);
Expand All @@ -228,7 +256,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
struct jbd2_inode *jinode, *next_i;
int err, ret = 0;

/* For locking, see the comment in journal_submit_inode_data_buffers() */
/* For locking, see the comment in journal_submit_data_buffers() */
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
jinode->i_flags |= JI_COMMIT_RUNNING;
Expand Down Expand Up @@ -431,7 +459,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
* Now start flushing things to disk, in the order they appear
* on the transaction lists. Data blocks go first.
*/
err = journal_submit_inode_data_buffers(journal, commit_transaction);
err = journal_submit_data_buffers(journal, commit_transaction);
if (err)
jbd2_journal_abort(journal, err);

Expand Down

0 comments on commit cd1aac3

Please sign in to comment.