Skip to content

Commit

Permalink
ext4: fix potential race between online resizing and write operations
Browse files Browse the repository at this point in the history
During an online resize an array of pointers to buffer heads gets
replaced so it can get enlarged.  If there is a racing block
allocation or deallocation which uses the old array, and the old array
has gotten reused this can lead to a GPF or some other random kernel
memory getting modified.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=206443
Link: https://lore.kernel.org/r/20200221053458.730016-2-tytso@mit.edu
Reported-by: Suraj Jitindar Singh <surajjs@amazon.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
  • Loading branch information
Theodore Ts'o committed Feb 21, 2020
1 parent 9424ef5 commit 1d0c392
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 25 deletions.
14 changes: 11 additions & 3 deletions fs/ext4/balloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
ext4_group_t ngroups = ext4_get_groups_count(sb);
struct ext4_group_desc *desc;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct buffer_head *bh_p;

if (block_group >= ngroups) {
ext4_error(sb, "block_group >= groups_count - block_group = %u,"
Expand All @@ -280,18 +281,25 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,

group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
if (!sbi->s_group_desc[group_desc]) {
bh_p = sbi_array_rcu_deref(sbi, s_group_desc, group_desc);
/*
* sbi_array_rcu_deref returns with rcu unlocked, this is ok since
* the pointer being dereferenced won't be dereferenced again. By
* looking at the usage in add_new_gdb() the value isn't modified,
* just the pointer, and so it remains valid.
*/
if (!bh_p) {
ext4_error(sb, "Group descriptor not loaded - "
"block_group = %u, group_desc = %u, desc = %u",
block_group, group_desc, offset);
return NULL;
}

desc = (struct ext4_group_desc *)(
(__u8 *)sbi->s_group_desc[group_desc]->b_data +
(__u8 *)bh_p->b_data +
offset * EXT4_DESC_SIZE(sb));
if (bh)
*bh = sbi->s_group_desc[group_desc];
*bh = bh_p;
return desc;
}

Expand Down
20 changes: 19 additions & 1 deletion fs/ext4/ext4.h
Original file line number Diff line number Diff line change
Expand Up @@ -1400,7 +1400,7 @@ struct ext4_sb_info {
loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
struct buffer_head * s_sbh; /* Buffer containing the super block */
struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
struct buffer_head **s_group_desc;
struct buffer_head * __rcu *s_group_desc;
unsigned int s_mount_opt;
unsigned int s_mount_opt2;
unsigned int s_mount_flags;
Expand Down Expand Up @@ -1576,6 +1576,23 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
}

/*
* Returns: sbi->field[index]
* Used to access an array element from the following sbi fields which require
* rcu protection to avoid dereferencing an invalid pointer due to reassignment
* - s_group_desc
* - s_group_info
* - s_flex_group
*/
#define sbi_array_rcu_deref(sbi, field, index) \
({ \
typeof(*((sbi)->field)) _v; \
rcu_read_lock(); \
_v = ((typeof(_v)*)rcu_dereference((sbi)->field))[index]; \
rcu_read_unlock(); \
_v; \
})

/*
* Simulate_fail codes
*/
Expand Down Expand Up @@ -2730,6 +2747,7 @@ extern int ext4_generic_delete_entry(handle_t *handle,
extern bool ext4_empty_dir(struct inode *inode);

/* resize.c */
extern void ext4_kvfree_array_rcu(void *to_free);
extern int ext4_group_add(struct super_block *sb,
struct ext4_new_group_data *input);
extern int ext4_group_extend(struct super_block *sb,
Expand Down
55 changes: 44 additions & 11 deletions fs/ext4/resize.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,33 @@

#include "ext4_jbd2.h"

struct ext4_rcu_ptr {
struct rcu_head rcu;
void *ptr;
};

static void ext4_rcu_ptr_callback(struct rcu_head *head)
{
struct ext4_rcu_ptr *ptr;

ptr = container_of(head, struct ext4_rcu_ptr, rcu);
kvfree(ptr->ptr);
kfree(ptr);
}

void ext4_kvfree_array_rcu(void *to_free)
{
struct ext4_rcu_ptr *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);

if (ptr) {
ptr->ptr = to_free;
call_rcu(&ptr->rcu, ext4_rcu_ptr_callback);
return;
}
synchronize_rcu();
kvfree(to_free);
}

int ext4_resize_begin(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
Expand Down Expand Up @@ -542,8 +569,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
brelse(gdb);
goto out;
}
memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data,
gdb->b_size);
memcpy(gdb->b_data, sbi_array_rcu_deref(sbi,
s_group_desc, j)->b_data, gdb->b_size);
set_buffer_uptodate(gdb);

err = ext4_handle_dirty_metadata(handle, NULL, gdb);
Expand Down Expand Up @@ -860,13 +887,15 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
}
brelse(dind);

o_group_desc = EXT4_SB(sb)->s_group_desc;
rcu_read_lock();
o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc);
memcpy(n_group_desc, o_group_desc,
EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
rcu_read_unlock();
n_group_desc[gdb_num] = gdb_bh;
EXT4_SB(sb)->s_group_desc = n_group_desc;
rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc);
EXT4_SB(sb)->s_gdb_count++;
kvfree(o_group_desc);
ext4_kvfree_array_rcu(o_group_desc);

le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
err = ext4_handle_dirty_super(handle, sb);
Expand Down Expand Up @@ -909,9 +938,11 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
return err;
}

o_group_desc = EXT4_SB(sb)->s_group_desc;
rcu_read_lock();
o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc);
memcpy(n_group_desc, o_group_desc,
EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
rcu_read_unlock();
n_group_desc[gdb_num] = gdb_bh;

BUFFER_TRACE(gdb_bh, "get_write_access");
Expand All @@ -922,9 +953,9 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
return err;
}

EXT4_SB(sb)->s_group_desc = n_group_desc;
rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc);
EXT4_SB(sb)->s_gdb_count++;
kvfree(o_group_desc);
ext4_kvfree_array_rcu(o_group_desc);
return err;
}

Expand Down Expand Up @@ -1188,7 +1219,8 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
* use non-sparse filesystems anymore. This is already checked above.
*/
if (gdb_off) {
gdb_bh = sbi->s_group_desc[gdb_num];
gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc,
gdb_num);
BUFFER_TRACE(gdb_bh, "get_write_access");
err = ext4_journal_get_write_access(handle, gdb_bh);

Expand Down Expand Up @@ -1270,7 +1302,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
/*
* get_write_access() has been called on gdb_bh by ext4_add_new_desc().
*/
gdb_bh = sbi->s_group_desc[gdb_num];
gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, gdb_num);
/* Update group descriptor block for new group */
gdp = (struct ext4_group_desc *)(gdb_bh->b_data +
gdb_off * EXT4_DESC_SIZE(sb));
Expand Down Expand Up @@ -1497,7 +1529,8 @@ static int ext4_flex_group_add(struct super_block *sb,
for (; gdb_num <= gdb_num_end; gdb_num++) {
struct buffer_head *gdb_bh;

gdb_bh = sbi->s_group_desc[gdb_num];
gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc,
gdb_num);
if (old_gdb == gdb_bh->b_blocknr)
continue;
update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data,
Expand Down
33 changes: 23 additions & 10 deletions fs/ext4/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -1014,6 +1014,7 @@ static void ext4_put_super(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
struct buffer_head **group_desc;
int aborted = 0;
int i, err;

Expand Down Expand Up @@ -1046,9 +1047,12 @@ static void ext4_put_super(struct super_block *sb)
if (!sb_rdonly(sb))
ext4_commit_super(sb, 1);

rcu_read_lock();
group_desc = rcu_dereference(sbi->s_group_desc);
for (i = 0; i < sbi->s_gdb_count; i++)
brelse(sbi->s_group_desc[i]);
kvfree(sbi->s_group_desc);
brelse(group_desc[i]);
kvfree(group_desc);
rcu_read_unlock();
kvfree(sbi->s_flex_groups);
percpu_counter_destroy(&sbi->s_freeclusters_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
Expand Down Expand Up @@ -3634,7 +3638,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
{
struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
char *orig_data = kstrdup(data, GFP_KERNEL);
struct buffer_head *bh;
struct buffer_head *bh, **group_desc;
struct ext4_super_block *es = NULL;
struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
ext4_fsblk_t block;
Expand Down Expand Up @@ -4290,9 +4294,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}
}
sbi->s_group_desc = kvmalloc_array(db_count,
sizeof(struct buffer_head *),
GFP_KERNEL);
rcu_assign_pointer(sbi->s_group_desc,
kvmalloc_array(db_count,
sizeof(struct buffer_head *),
GFP_KERNEL));
if (sbi->s_group_desc == NULL) {
ext4_msg(sb, KERN_ERR, "not enough memory");
ret = -ENOMEM;
Expand All @@ -4308,14 +4313,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}

for (i = 0; i < db_count; i++) {
struct buffer_head *bh;

block = descriptor_loc(sb, logical_sb_block, i);
sbi->s_group_desc[i] = sb_bread_unmovable(sb, block);
if (!sbi->s_group_desc[i]) {
bh = sb_bread_unmovable(sb, block);
if (!bh) {
ext4_msg(sb, KERN_ERR,
"can't read group descriptor %d", i);
db_count = i;
goto failed_mount2;
}
rcu_read_lock();
rcu_dereference(sbi->s_group_desc)[i] = bh;
rcu_read_unlock();
}
sbi->s_gdb_count = db_count;
if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
Expand Down Expand Up @@ -4717,9 +4727,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (sbi->s_mmp_tsk)
kthread_stop(sbi->s_mmp_tsk);
failed_mount2:
rcu_read_lock();
group_desc = rcu_dereference(sbi->s_group_desc);
for (i = 0; i < db_count; i++)
brelse(sbi->s_group_desc[i]);
kvfree(sbi->s_group_desc);
brelse(group_desc[i]);
kvfree(group_desc);
rcu_read_unlock();
failed_mount:
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
Expand Down

0 comments on commit 1d0c392

Please sign in to comment.