Skip to content

Commit

Permalink
ext4: Avoid races caused by on-line resizing and SMP memory reordering
Browse files Browse the repository at this point in the history
Ext4's on-line resizing adds a new block group and then, only at the
last step adjusts s_groups_count.  However, it's possible on SMP
systems that another CPU could see the updated the s_group_count and
not see the newly initialized data structures for the just-added block
group.  For this reason, it's important to insert a SMP read barrier
after reading s_groups_count and before reading any (for example) the
new block group descriptors allowed by the increased value of
s_groups_count.

Unfortunately, we rather blatently violate this locking protocol
documented in fs/ext4/resize.c.  Fortunately, (1) on-line resizes
happen relatively rarely, and (2) it seems rare that the filesystem
code will immediately try to use just-added block group before any
memory ordering issues resolve themselves.  So apparently problems
here are relatively hard to hit, since ext3 has been vulnerable to the
same issue for years with no one apparently complaining.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
  • Loading branch information
Theodore Ts'o committed May 1, 2009
1 parent 9ca9238 commit 8df9675
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 55 deletions.
15 changes: 7 additions & 8 deletions fs/ext4/balloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
ext4_group_t block_group, struct ext4_group_desc *gdp)
{
int bit, bit_max;
ext4_group_t ngroups = ext4_get_groups_count(sb);
unsigned free_blocks, group_blocks;
struct ext4_sb_info *sbi = EXT4_SB(sb);

Expand Down Expand Up @@ -123,15 +124,15 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
bit_max += ext4_bg_num_gdb(sb, block_group);
}

if (block_group == sbi->s_groups_count - 1) {
if (block_group == ngroups - 1) {
/*
* Even though mke2fs always initialize first and last group
* if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need
* to make sure we calculate the right free blocks
*/
group_blocks = ext4_blocks_count(sbi->s_es) -
le32_to_cpu(sbi->s_es->s_first_data_block) -
(EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1));
(EXT4_BLOCKS_PER_GROUP(sb) * (ngroups - 1));
} else {
group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
}
Expand Down Expand Up @@ -205,18 +206,18 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
{
unsigned int group_desc;
unsigned int offset;
ext4_group_t ngroups = ext4_get_groups_count(sb);
struct ext4_group_desc *desc;
struct ext4_sb_info *sbi = EXT4_SB(sb);

if (block_group >= sbi->s_groups_count) {
if (block_group >= ngroups) {
ext4_error(sb, "ext4_get_group_desc",
"block_group >= groups_count - "
"block_group = %u, groups_count = %u",
block_group, sbi->s_groups_count);
block_group, ngroups);

return NULL;
}
smp_rmb();

group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
Expand Down Expand Up @@ -665,7 +666,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
ext4_fsblk_t desc_count;
struct ext4_group_desc *gdp;
ext4_group_t i;
ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
ext4_group_t ngroups = ext4_get_groups_count(sb);
#ifdef EXT4FS_DEBUG
struct ext4_super_block *es;
ext4_fsblk_t bitmap_count;
Expand All @@ -677,7 +678,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
bitmap_count = 0;
gdp = NULL;

smp_rmb();
for (i = 0; i < ngroups; i++) {
gdp = ext4_get_group_desc(sb, i, NULL);
if (!gdp)
Expand All @@ -700,7 +700,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
return bitmap_count;
#else
desc_count = 0;
smp_rmb();
for (i = 0; i < ngroups; i++) {
gdp = ext4_get_group_desc(sb, i, NULL);
if (!gdp)
Expand Down
12 changes: 12 additions & 0 deletions fs/ext4/ext4.h
Original file line number Diff line number Diff line change
Expand Up @@ -1228,6 +1228,18 @@ struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
return grp_info[indexv][indexh];
}

/*
* Reading s_groups_count requires using smp_rmb() afterwards. See
* the locking protocol documented in the comments of ext4_group_add()
* in resize.c
*/
static inline ext4_group_t ext4_get_groups_count(struct super_block *sb)
{
ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;

smp_rmb();
return ngroups;
}

static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi,
ext4_group_t block_group)
Expand Down
40 changes: 19 additions & 21 deletions fs/ext4/ialloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
static int find_group_dir(struct super_block *sb, struct inode *parent,
ext4_group_t *best_group)
{
ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
ext4_group_t ngroups = ext4_get_groups_count(sb);
unsigned int freei, avefreei;
struct ext4_group_desc *desc, *best_desc = NULL;
ext4_group_t group;
Expand Down Expand Up @@ -353,7 +353,7 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
struct flex_groups *flex_group = sbi->s_flex_groups;
ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group);
ext4_group_t ngroups = sbi->s_groups_count;
ext4_group_t ngroups = ext4_get_groups_count(sb);
int flex_size = ext4_flex_bg_size(sbi);
ext4_group_t best_flex = parent_fbg_group;
int blocks_per_flex = sbi->s_blocks_per_group * flex_size;
Expand All @@ -362,7 +362,7 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
ext4_group_t n_fbg_groups;
ext4_group_t i;

n_fbg_groups = (sbi->s_groups_count + flex_size - 1) >>
n_fbg_groups = (ngroups + flex_size - 1) >>
sbi->s_log_groups_per_flex;

find_close_to_parent:
Expand Down Expand Up @@ -478,20 +478,21 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
{
ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_group_t ngroups = sbi->s_groups_count;
ext4_group_t real_ngroups = ext4_get_groups_count(sb);
int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
unsigned int freei, avefreei;
ext4_fsblk_t freeb, avefreeb;
unsigned int ndirs;
int max_dirs, min_inodes;
ext4_grpblk_t min_blocks;
ext4_group_t i, grp, g;
ext4_group_t i, grp, g, ngroups;
struct ext4_group_desc *desc;
struct orlov_stats stats;
int flex_size = ext4_flex_bg_size(sbi);

ngroups = real_ngroups;
if (flex_size > 1) {
ngroups = (ngroups + flex_size - 1) >>
ngroups = (real_ngroups + flex_size - 1) >>
sbi->s_log_groups_per_flex;
parent_group >>= sbi->s_log_groups_per_flex;
}
Expand Down Expand Up @@ -543,7 +544,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
*/
grp *= flex_size;
for (i = 0; i < flex_size; i++) {
if (grp+i >= sbi->s_groups_count)
if (grp+i >= real_ngroups)
break;
desc = ext4_get_group_desc(sb, grp+i, NULL);
if (desc && ext4_free_inodes_count(sb, desc)) {
Expand Down Expand Up @@ -583,7 +584,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
}

fallback:
ngroups = sbi->s_groups_count;
ngroups = real_ngroups;
avefreei = freei / ngroups;
fallback_retry:
parent_group = EXT4_I(parent)->i_block_group;
Expand Down Expand Up @@ -613,9 +614,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
ext4_group_t *group, int mode)
{
ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
ext4_group_t i, last, ngroups = ext4_get_groups_count(sb);
struct ext4_group_desc *desc;
ext4_group_t i, last;
int flex_size = ext4_flex_bg_size(EXT4_SB(sb));

/*
Expand Down Expand Up @@ -799,11 +799,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
struct super_block *sb;
struct buffer_head *inode_bitmap_bh = NULL;
struct buffer_head *group_desc_bh;
ext4_group_t group = 0;
ext4_group_t ngroups, group = 0;
unsigned long ino = 0;
struct inode *inode;
struct ext4_group_desc *gdp = NULL;
struct ext4_super_block *es;
struct ext4_inode_info *ei;
struct ext4_sb_info *sbi;
int ret2, err = 0;
Expand All @@ -818,15 +817,14 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
return ERR_PTR(-EPERM);

sb = dir->i_sb;
ngroups = ext4_get_groups_count(sb);
trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id,
dir->i_ino, mode);
inode = new_inode(sb);
if (!inode)
return ERR_PTR(-ENOMEM);
ei = EXT4_I(inode);

sbi = EXT4_SB(sb);
es = sbi->s_es;

if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
ret2 = find_group_flex(sb, dir, &group);
Expand Down Expand Up @@ -856,7 +854,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
if (ret2 == -1)
goto out;

for (i = 0; i < sbi->s_groups_count; i++) {
for (i = 0; i < ngroups; i++) {
err = -EIO;

gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
Expand Down Expand Up @@ -917,7 +915,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
* group descriptor metadata has not yet been updated.
* So we just go onto the next blockgroup.
*/
if (++group == sbi->s_groups_count)
if (++group == ngroups)
group = 0;
}
err = -ENOSPC;
Expand Down Expand Up @@ -1158,7 +1156,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
{
unsigned long desc_count;
struct ext4_group_desc *gdp;
ext4_group_t i;
ext4_group_t i, ngroups = ext4_get_groups_count(sb);
#ifdef EXT4FS_DEBUG
struct ext4_super_block *es;
unsigned long bitmap_count, x;
Expand All @@ -1168,7 +1166,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
desc_count = 0;
bitmap_count = 0;
gdp = NULL;
for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
for (i = 0; i < ngroups; i++) {
gdp = ext4_get_group_desc(sb, i, NULL);
if (!gdp)
continue;
Expand All @@ -1190,7 +1188,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
return desc_count;
#else
desc_count = 0;
for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
for (i = 0; i < ngroups; i++) {
gdp = ext4_get_group_desc(sb, i, NULL);
if (!gdp)
continue;
Expand All @@ -1205,9 +1203,9 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
unsigned long ext4_count_dirs(struct super_block * sb)
{
unsigned long count = 0;
ext4_group_t i;
ext4_group_t i, ngroups = ext4_get_groups_count(sb);

for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
for (i = 0; i < ngroups; i++) {
struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
if (!gdp)
continue;
Expand Down
7 changes: 4 additions & 3 deletions fs/ext4/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -4965,7 +4965,8 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
*/
int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
{
int groups, gdpblocks;
ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
int gdpblocks;
int idxblocks;
int ret = 0;

Expand All @@ -4992,8 +4993,8 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
groups += nrblocks;

gdpblocks = groups;
if (groups > EXT4_SB(inode->i_sb)->s_groups_count)
groups = EXT4_SB(inode->i_sb)->s_groups_count;
if (groups > ngroups)
groups = ngroups;
if (groups > EXT4_SB(inode->i_sb)->s_gdb_count)
gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count;

Expand Down
Loading

0 comments on commit 8df9675

Please sign in to comment.