Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 139483
b: refs/heads/master
c: a491212
h: refs/heads/master
i:
  139481: 0538e0e
  139479: c29dbd5
v: v3
  • Loading branch information
Theodore Ts'o committed Mar 12, 2009
1 parent 79587cf commit 326258f
Show file tree
Hide file tree
Showing 7 changed files with 217 additions and 59 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 2dc6b0d48ca0599837df21b14bb8393d0804af57
refs/heads/master: a4912123b688e057084e6557cef8924f7ae5bbde
6 changes: 6 additions & 0 deletions trunk/fs/ext4/ext4.h
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,12 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
#define EXT4_DEF_MIN_BATCH_TIME 0
#define EXT4_DEF_MAX_BATCH_TIME 15000 /* 15ms */

/*
* Minimum number of groups in a flexgroup before we separate out
* directories into the first block group of a flexgroup
*/
#define EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME 4

/*
* Structure of a directory entry
*/
Expand Down
3 changes: 3 additions & 0 deletions trunk/fs/ext4/ext4_i.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,9 @@ struct ext4_inode_info {
struct list_head i_prealloc_list;
spinlock_t i_prealloc_lock;

/* ialloc */
ext4_group_t i_last_alloc_group;

/* allocation reservation info for delalloc */
unsigned int i_reserved_data_blocks;
unsigned int i_reserved_meta_blocks;
Expand Down
25 changes: 24 additions & 1 deletion trunk/fs/ext4/extents.c
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
ext4_fsblk_t bg_start;
ext4_fsblk_t last_block;
ext4_grpblk_t colour;
ext4_group_t block_group;
int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb));
int depth;

if (path) {
Expand All @@ -170,10 +172,31 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
}

/* OK. use inode's group */
bg_start = (ei->i_block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) +
block_group = ei->i_block_group;
if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
/*
* If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME
* block groups per flexgroup, reserve the first block
* group for directories and special files. Regular
* files will start at the second block group. This
* tends to speed up directory access and improves
* fsck times.
*/
block_group &= ~(flex_size-1);
if (S_ISREG(inode->i_mode))
block_group++;
}
bg_start = (block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) +
le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block);
last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;

/*
* If we are doing delayed allocation, we don't need take
* colour into account.
*/
if (test_opt(inode->i_sb, DELALLOC))
return bg_start;

if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
colour = (current->pid % 16) *
(EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
Expand Down
215 changes: 159 additions & 56 deletions trunk/fs/ext4/ialloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,43 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
return 0;
}

struct orlov_stats {
__u32 free_inodes;
__u32 free_blocks;
__u32 used_dirs;
};

/*
* Helper function for Orlov's allocator; returns critical information
* for a particular block group or flex_bg. If flex_size is 1, then g
* is a block group number; otherwise it is flex_bg number.
*/
void get_orlov_stats(struct super_block *sb, ext4_group_t g,
int flex_size, struct orlov_stats *stats)
{
struct ext4_group_desc *desc;
ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
int i;

stats->free_inodes = 0;
stats->free_blocks = 0;
stats->used_dirs = 0;

g *= flex_size;

for (i = 0; i < flex_size; i++) {
if (g >= ngroups)
break;
desc = ext4_get_group_desc(sb, g++, NULL);
if (!desc)
continue;

stats->free_inodes += ext4_free_inodes_count(sb, desc);
stats->free_blocks += ext4_free_blks_count(sb, desc);
stats->used_dirs += ext4_used_dirs_count(sb, desc);
}
}

/*
* Orlov's allocator for directories.
*
Expand All @@ -425,35 +462,34 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
* it has too many directories already (max_dirs) or
* it has too few free inodes left (min_inodes) or
* it has too few free blocks left (min_blocks) or
* it's already running too large debt (max_debt).
* Parent's group is preferred, if it doesn't satisfy these
* conditions we search cyclically through the rest. If none
* of the groups look good we just look for a group with more
* free inodes than average (starting at parent's group).
*
* Debt is incremented each time we allocate a directory and decremented
* when we allocate an inode, within 0--255.
*/

#define INODE_COST 64
#define BLOCK_COST 256

static int find_group_orlov(struct super_block *sb, struct inode *parent,
ext4_group_t *group)
ext4_group_t *group, int mode)
{
ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
ext4_group_t ngroups = sbi->s_groups_count;
int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
unsigned int freei, avefreei;
ext4_fsblk_t freeb, avefreeb;
ext4_fsblk_t blocks_per_dir;
unsigned int ndirs;
int max_debt, max_dirs, min_inodes;
int max_dirs, min_inodes;
ext4_grpblk_t min_blocks;
ext4_group_t i;
ext4_group_t i, grp, g;
struct ext4_group_desc *desc;
struct orlov_stats stats;
int flex_size = ext4_flex_bg_size(sbi);

if (flex_size > 1) {
ngroups = (ngroups + flex_size - 1) >>
sbi->s_log_groups_per_flex;
parent_group >>= sbi->s_log_groups_per_flex;
}

freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
avefreei = freei / ngroups;
Expand All @@ -462,71 +498,97 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
do_div(avefreeb, ngroups);
ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);

if ((parent == sb->s_root->d_inode) ||
(EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL)) {
if (S_ISDIR(mode) &&
((parent == sb->s_root->d_inode) ||
(EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL))) {
int best_ndir = inodes_per_group;
ext4_group_t grp;
int ret = -1;

get_random_bytes(&grp, sizeof(grp));
parent_group = (unsigned)grp % ngroups;
for (i = 0; i < ngroups; i++) {
grp = (parent_group + i) % ngroups;
desc = ext4_get_group_desc(sb, grp, NULL);
if (!desc || !ext4_free_inodes_count(sb, desc))
g = (parent_group + i) % ngroups;
get_orlov_stats(sb, g, flex_size, &stats);
if (!stats.free_inodes)
continue;
if (ext4_used_dirs_count(sb, desc) >= best_ndir)
if (stats.used_dirs >= best_ndir)
continue;
if (ext4_free_inodes_count(sb, desc) < avefreei)
if (stats.free_inodes < avefreei)
continue;
if (ext4_free_blks_count(sb, desc) < avefreeb)
if (stats.free_blocks < avefreeb)
continue;
*group = grp;
grp = g;
ret = 0;
best_ndir = ext4_used_dirs_count(sb, desc);
best_ndir = stats.used_dirs;
}
if (ret)
goto fallback;
found_flex_bg:
if (flex_size == 1) {
*group = grp;
return 0;
}

/*
* We pack inodes at the beginning of the flexgroup's
* inode tables. Block allocation decisions will do
* something similar, although regular files will
* start at 2nd block group of the flexgroup. See
* ext4_ext_find_goal() and ext4_find_near().
*/
grp *= flex_size;
for (i = 0; i < flex_size; i++) {
if (grp+i >= sbi->s_groups_count)
break;
desc = ext4_get_group_desc(sb, grp+i, NULL);
if (desc && ext4_free_inodes_count(sb, desc)) {
*group = grp+i;
return 0;
}
}
if (ret == 0)
return ret;
goto fallback;
}

blocks_per_dir = ext4_blocks_count(es) - freeb;
do_div(blocks_per_dir, ndirs);

max_dirs = ndirs / ngroups + inodes_per_group / 16;
min_inodes = avefreei - inodes_per_group / 4;
min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb) / 4;

max_debt = EXT4_BLOCKS_PER_GROUP(sb);
max_debt /= max_t(int, blocks_per_dir, BLOCK_COST);
if (max_debt * INODE_COST > inodes_per_group)
max_debt = inodes_per_group / INODE_COST;
if (max_debt > 255)
max_debt = 255;
if (max_debt == 0)
max_debt = 1;
min_inodes = avefreei - inodes_per_group*flex_size / 4;
if (min_inodes < 1)
min_inodes = 1;
min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb)*flex_size / 4;

/*
* Start looking in the flex group where we last allocated an
* inode for this parent directory
*/
if (EXT4_I(parent)->i_last_alloc_group != ~0) {
parent_group = EXT4_I(parent)->i_last_alloc_group;
if (flex_size > 1)
parent_group >>= sbi->s_log_groups_per_flex;
}

for (i = 0; i < ngroups; i++) {
*group = (parent_group + i) % ngroups;
desc = ext4_get_group_desc(sb, *group, NULL);
if (!desc || !ext4_free_inodes_count(sb, desc))
continue;
if (ext4_used_dirs_count(sb, desc) >= max_dirs)
grp = (parent_group + i) % ngroups;
get_orlov_stats(sb, grp, flex_size, &stats);
if (stats.used_dirs >= max_dirs)
continue;
if (ext4_free_inodes_count(sb, desc) < min_inodes)
if (stats.free_inodes < min_inodes)
continue;
if (ext4_free_blks_count(sb, desc) < min_blocks)
if (stats.free_blocks < min_blocks)
continue;
return 0;
goto found_flex_bg;
}

fallback:
ngroups = sbi->s_groups_count;
avefreei = freei / ngroups;
parent_group = EXT4_I(parent)->i_block_group;
for (i = 0; i < ngroups; i++) {
*group = (parent_group + i) % ngroups;
desc = ext4_get_group_desc(sb, *group, NULL);
grp = (parent_group + i) % ngroups;
desc = ext4_get_group_desc(sb, grp, NULL);
if (desc && ext4_free_inodes_count(sb, desc) &&
ext4_free_inodes_count(sb, desc) >= avefreei)
ext4_free_inodes_count(sb, desc) >= avefreei) {
*group = grp;
return 0;
}
}

if (avefreei) {
Expand All @@ -542,12 +604,51 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
}

static int find_group_other(struct super_block *sb, struct inode *parent,
ext4_group_t *group)
ext4_group_t *group, int mode)
{
ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
struct ext4_group_desc *desc;
ext4_group_t i;
ext4_group_t i, last;
int flex_size = ext4_flex_bg_size(EXT4_SB(sb));

/*
* Try to place the inode is the same flex group as its
* parent. If we can't find space, use the Orlov algorithm to
* find another flex group, and store that information in the
* parent directory's inode information so that use that flex
* group for future allocations.
*/
if (flex_size > 1) {
int retry = 0;

try_again:
parent_group &= ~(flex_size-1);
last = parent_group + flex_size;
if (last > ngroups)
last = ngroups;
for (i = parent_group; i < last; i++) {
desc = ext4_get_group_desc(sb, i, NULL);
if (desc && ext4_free_inodes_count(sb, desc)) {
*group = i;
return 0;
}
}
if (!retry && EXT4_I(parent)->i_last_alloc_group != ~0) {
retry = 1;
parent_group = EXT4_I(parent)->i_last_alloc_group;
goto try_again;
}
/*
* If this didn't work, use the Orlov search algorithm
* to find a new flex group; we pass in the mode to
* avoid the topdir algorithms.
*/
*group = parent_group + flex_size;
if (*group > ngroups)
*group = 0;
return find_group_orlov(sb, parent, group, mode);
}

/*
* Try to place the inode in its parent directory
Expand Down Expand Up @@ -716,10 +817,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
sbi = EXT4_SB(sb);
es = sbi->s_es;

if (sbi->s_log_groups_per_flex) {
if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
ret2 = find_group_flex(sb, dir, &group);
if (ret2 == -1) {
ret2 = find_group_other(sb, dir, &group);
ret2 = find_group_other(sb, dir, &group, mode);
if (ret2 == 0 && once)
once = 0;
printk(KERN_NOTICE "ext4: find_group_flex "
Expand All @@ -733,11 +834,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
if (test_opt(sb, OLDALLOC))
ret2 = find_group_dir(sb, dir, &group);
else
ret2 = find_group_orlov(sb, dir, &group);
ret2 = find_group_orlov(sb, dir, &group, mode);
} else
ret2 = find_group_other(sb, dir, &group);
ret2 = find_group_other(sb, dir, &group, mode);

got_group:
EXT4_I(dir)->i_last_alloc_group = group;
err = -ENOSPC;
if (ret2 == -1)
goto out;
Expand Down Expand Up @@ -894,6 +996,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
ei->i_file_acl = 0;
ei->i_dtime = 0;
ei->i_block_group = group;
ei->i_last_alloc_group = ~0;

ext4_set_inode_flags(inode);
if (IS_DIRSYNC(inode))
Expand Down
Loading

0 comments on commit 326258f

Please sign in to comment.