Skip to content

Commit

Permalink
ext4: Fix small file fragmentation
Browse files Browse the repository at this point in the history
For small file block allocations, mballoc uses per cpu prealloc
space.  Use goal block when searching for the right prealloc
space.  Also make sure ext4_da_writepages tries to write
all the pages for small files in single attempt

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
  • Loading branch information
Aneesh Kumar K.V authored and Theodore Ts'o committed Aug 18, 2008
1 parent 91246c0 commit 5e745b0
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 13 deletions.
21 changes: 15 additions & 6 deletions fs/ext4/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -2282,13 +2282,12 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
static int ext4_da_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct inode *inode = mapping->host;
handle_t *handle = NULL;
int needed_blocks;
int ret = 0;
long to_write;
loff_t range_start = 0;
long pages_skipped = 0;
struct inode *inode = mapping->host;
int needed_blocks, ret = 0, nr_to_writebump = 0;
long to_write, pages_skipped = 0;
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);

/*
* No pages to write? This is mainly a kludge to avoid starting
Expand All @@ -2297,6 +2296,16 @@ static int ext4_da_writepages(struct address_space *mapping,
*/
if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
return 0;
/*
* Make sure nr_to_write is >= sbi->s_mb_stream_request
* This make sure small files blocks are allocated in
* single attempt. This ensure that small files
* get less fragmented.
*/
if (wbc->nr_to_write < sbi->s_mb_stream_request) {
nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
wbc->nr_to_write = sbi->s_mb_stream_request;
}

if (!wbc->range_cyclic)
/*
Expand Down Expand Up @@ -2377,7 +2386,7 @@ static int ext4_da_writepages(struct address_space *mapping,
}

out_writepages:
wbc->nr_to_write = to_write;
wbc->nr_to_write = to_write - nr_to_writebump;
wbc->range_start = range_start;
return ret;
}
Expand Down
53 changes: 46 additions & 7 deletions fs/ext4/mballoc.c
Original file line number Diff line number Diff line change
Expand Up @@ -3281,6 +3281,35 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
mb_debug("use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
}

/*
* Return the prealloc space that have minimal distance
* from the goal block. @cpa is the prealloc
* space that is having currently known minimal distance
* from the goal block.
*/
static struct ext4_prealloc_space *
ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
struct ext4_prealloc_space *pa,
struct ext4_prealloc_space *cpa)
{
ext4_fsblk_t cur_distance, new_distance;

if (cpa == NULL) {
atomic_inc(&pa->pa_count);
return pa;
}
cur_distance = abs(goal_block - cpa->pa_pstart);
new_distance = abs(goal_block - pa->pa_pstart);

if (cur_distance < new_distance)
return cpa;

/* drop the previous reference */
atomic_dec(&cpa->pa_count);
atomic_inc(&pa->pa_count);
return pa;
}

/*
* search goal blocks in preallocated space
*/
Expand All @@ -3290,7 +3319,8 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
int order, i;
struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
struct ext4_locality_group *lg;
struct ext4_prealloc_space *pa;
struct ext4_prealloc_space *pa, *cpa = NULL;
ext4_fsblk_t goal_block;

/* only data can be preallocated */
if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
Expand Down Expand Up @@ -3333,24 +3363,33 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
/* The max size of hash table is PREALLOC_TB_SIZE */
order = PREALLOC_TB_SIZE - 1;

goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) +
ac->ac_g_ex.fe_start +
le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block);
/*
* search for the prealloc space that is having
* minimal distance from the goal block.
*/
for (i = order; i < PREALLOC_TB_SIZE; i++) {
rcu_read_lock();
list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
pa_inode_list) {
spin_lock(&pa->pa_lock);
if (pa->pa_deleted == 0 &&
pa->pa_free >= ac->ac_o_ex.fe_len) {
atomic_inc(&pa->pa_count);
ext4_mb_use_group_pa(ac, pa);
spin_unlock(&pa->pa_lock);
ac->ac_criteria = 20;
rcu_read_unlock();
return 1;

cpa = ext4_mb_check_group_pa(goal_block,
pa, cpa);
}
spin_unlock(&pa->pa_lock);
}
rcu_read_unlock();
}
if (cpa) {
ext4_mb_use_group_pa(ac, cpa);
ac->ac_criteria = 20;
return 1;
}
return 0;
}

Expand Down

0 comments on commit 5e745b0

Please sign in to comment.