Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 166588
b: refs/heads/master
c: 55138e0
h: refs/heads/master
v: v3
  • Loading branch information
Theodore Ts'o committed Sep 29, 2009
1 parent 3efe971 commit 4285723
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 17 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 71780577306fd1e76c7a92e3b308db624d03adb9
refs/heads/master: 55138e0bc29c0751e2152df9ad35deea542f29b3
1 change: 1 addition & 0 deletions trunk/fs/ext4/ext4.h
Original file line number Diff line number Diff line change
Expand Up @@ -942,6 +942,7 @@ struct ext4_sb_info {
unsigned int s_mb_stats;
unsigned int s_mb_order2_reqs;
unsigned int s_mb_group_prealloc;
unsigned int s_max_writeback_mb_bump;
/* where last allocation was done - for stream allocation */
unsigned long s_mb_last_group;
unsigned long s_mb_last_start;
Expand Down
105 changes: 93 additions & 12 deletions trunk/fs/ext4/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -1144,6 +1144,64 @@ static int check_block_validity(struct inode *inode, const char *msg,
return 0;
}

/*
* Return the number of dirty pages in the given inode starting at
* page frame idx.
*/
static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
unsigned int max_pages)
{
struct address_space *mapping = inode->i_mapping;
pgoff_t index;
struct pagevec pvec;
pgoff_t num = 0;
int i, nr_pages, done = 0;

if (max_pages == 0)
return 0;
pagevec_init(&pvec, 0);
while (!done) {
index = idx;
nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
PAGECACHE_TAG_DIRTY,
(pgoff_t)PAGEVEC_SIZE);
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
struct buffer_head *bh, *head;

lock_page(page);
if (unlikely(page->mapping != mapping) ||
!PageDirty(page) ||
PageWriteback(page) ||
page->index != idx) {
done = 1;
unlock_page(page);
break;
}
head = page_buffers(page);
bh = head;
do {
if (!buffer_delay(bh) &&
!buffer_unwritten(bh)) {
done = 1;
break;
}
} while ((bh = bh->b_this_page) != head);
unlock_page(page);
if (done)
break;
idx++;
num++;
if (num >= max_pages)
break;
}
pagevec_release(&pvec);
}
return num;
}

/*
* The ext4_get_blocks() function tries to look up the requested blocks,
* and returns if the blocks are already mapped.
Expand Down Expand Up @@ -2743,8 +2801,10 @@ static int ext4_da_writepages(struct address_space *mapping,
int no_nrwrite_index_update;
int pages_written = 0;
long pages_skipped;
unsigned int max_pages;
int range_cyclic, cycled = 1, io_done = 0;
int needed_blocks, ret = 0, nr_to_writebump = 0;
int needed_blocks, ret = 0;
long desired_nr_to_write, nr_to_writebump = 0;
loff_t range_start = wbc->range_start;
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);

Expand All @@ -2771,16 +2831,6 @@ static int ext4_da_writepages(struct address_space *mapping,
if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED))
return -EROFS;

/*
* Make sure nr_to_write is >= sbi->s_mb_stream_request
* This make sure small files blocks are allocated in
* single attempt. This ensure that small files
* get less fragmented.
*/
if (wbc->nr_to_write < sbi->s_mb_stream_request) {
nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
wbc->nr_to_write = sbi->s_mb_stream_request;
}
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
range_whole = 1;

Expand All @@ -2795,6 +2845,36 @@ static int ext4_da_writepages(struct address_space *mapping,
} else
index = wbc->range_start >> PAGE_CACHE_SHIFT;

/*
* This works around two forms of stupidity. The first is in
* the writeback code, which caps the maximum number of pages
* written to be 1024 pages. This is wrong on multiple
* levels; different architectues have a different page size,
* which changes the maximum amount of data which gets
* written. Secondly, 4 megabytes is way too small. XFS
* forces this value to be 16 megabytes by multiplying
* nr_to_write parameter by four, and then relies on its
* allocator to allocate larger extents to make them
* contiguous. Unfortunately this brings us to the second
* stupidity, which is that ext4's mballoc code only allocates
* at most 2048 blocks. So we force contiguous writes up to
* the number of dirty blocks in the inode, or
* sbi->max_writeback_mb_bump whichever is smaller.
*/
max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT);
if (!range_cyclic && range_whole)
desired_nr_to_write = wbc->nr_to_write * 8;
else
desired_nr_to_write = ext4_num_dirty_pages(inode, index,
max_pages);
if (desired_nr_to_write > max_pages)
desired_nr_to_write = max_pages;

if (wbc->nr_to_write < desired_nr_to_write) {
nr_to_writebump = desired_nr_to_write - wbc->nr_to_write;
wbc->nr_to_write = desired_nr_to_write;
}

mpd.wbc = wbc;
mpd.inode = mapping->host;

Expand Down Expand Up @@ -2914,7 +2994,8 @@ static int ext4_da_writepages(struct address_space *mapping,
out_writepages:
if (!no_nrwrite_index_update)
wbc->no_nrwrite_index_update = 0;
wbc->nr_to_write -= nr_to_writebump;
if (wbc->nr_to_write > nr_to_writebump)
wbc->nr_to_write -= nr_to_writebump;
wbc->range_start = range_start;
trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
return ret;
Expand Down
3 changes: 3 additions & 0 deletions trunk/fs/ext4/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -2197,6 +2197,7 @@ EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump);

static struct attribute *ext4_attrs[] = {
ATTR_LIST(delayed_allocation_blocks),
Expand All @@ -2210,6 +2211,7 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(mb_order2_req),
ATTR_LIST(mb_stream_req),
ATTR_LIST(mb_group_prealloc),
ATTR_LIST(max_writeback_mb_bump),
NULL,
};

Expand Down Expand Up @@ -2679,6 +2681,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}

sbi->s_stripe = ext4_get_stripe_size(sbi);
sbi->s_max_writeback_mb_bump = 128;

/*
* set up enough so that it can read an inode
Expand Down
14 changes: 10 additions & 4 deletions trunk/include/trace/events/ext4.h
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ TRACE_EVENT(ext4_da_writepages,
__field( char, for_kupdate )
__field( char, for_reclaim )
__field( char, range_cyclic )
__field( pgoff_t, writeback_index )
),

TP_fast_assign(
Expand All @@ -249,15 +250,17 @@ TRACE_EVENT(ext4_da_writepages,
__entry->for_kupdate = wbc->for_kupdate;
__entry->for_reclaim = wbc->for_reclaim;
__entry->range_cyclic = wbc->range_cyclic;
__entry->writeback_index = inode->i_mapping->writeback_index;
),

TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d",
TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d writeback_index %lu",
jbd2_dev_to_name(__entry->dev),
(unsigned long) __entry->ino, __entry->nr_to_write,
__entry->pages_skipped, __entry->range_start,
__entry->range_end, __entry->nonblocking,
__entry->for_kupdate, __entry->for_reclaim,
__entry->range_cyclic)
__entry->range_cyclic,
(unsigned long) __entry->writeback_index)
);

TRACE_EVENT(ext4_da_write_pages,
Expand Down Expand Up @@ -309,6 +312,7 @@ TRACE_EVENT(ext4_da_writepages_result,
__field( char, encountered_congestion )
__field( char, more_io )
__field( char, no_nrwrite_index_update )
__field( pgoff_t, writeback_index )
),

TP_fast_assign(
Expand All @@ -320,14 +324,16 @@ TRACE_EVENT(ext4_da_writepages_result,
__entry->encountered_congestion = wbc->encountered_congestion;
__entry->more_io = wbc->more_io;
__entry->no_nrwrite_index_update = wbc->no_nrwrite_index_update;
__entry->writeback_index = inode->i_mapping->writeback_index;
),

TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld congestion %d more_io %d no_nrwrite_index_update %d",
TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld congestion %d more_io %d no_nrwrite_index_update %d writeback_index %lu",
jbd2_dev_to_name(__entry->dev),
(unsigned long) __entry->ino, __entry->ret,
__entry->pages_written, __entry->pages_skipped,
__entry->encountered_congestion, __entry->more_io,
__entry->no_nrwrite_index_update)
__entry->no_nrwrite_index_update,
(unsigned long) __entry->writeback_index)
);

TRACE_EVENT(ext4_da_write_begin,
Expand Down

0 comments on commit 4285723

Please sign in to comment.