Skip to content

Commit

Permalink
ext4: fix data corruption with EXT4_GET_BLOCKS_ZERO
Browse files Browse the repository at this point in the history
When ext4_map_blocks() is called with EXT4_GET_BLOCKS_ZERO to zero-out
allocated blocks and these blocks are actually converted from unwritten
extent the following race can happen:

CPU0					CPU1

page fault				page fault
...					...
ext4_map_blocks()
  ext4_ext_map_blocks()
    ext4_ext_handle_unwritten_extents()
      ext4_ext_convert_to_initialized()
	- zero out converted extent
	ext4_zeroout_es()
	  - inserts extent as initialized in status tree

					ext4_map_blocks()
					  ext4_es_lookup_extent()
					    - finds initialized extent
					write data
  ext4_issue_zeroout()
    - zeroes out new extent overwriting data

This problem can be reproduced by generic/340 for the fallocated case
for the last block in the file.

Fix the problem by avoiding zeroing out the area we are mapping with
ext4_map_blocks() in ext4_ext_convert_to_initialized(). It is pointless
to zero out this area in the first place as the caller asked us to
convert the area to initialized because he is just going to write data
there before the transaction finishes. To achieve this we delete the
special case of zeroing out full extent as that will be handled by the
cases below zeroing only the part of the extent that needs it. We also
instruct ext4_split_extent() that the middle of extent being split
contains data so that ext4_split_extent_at() cannot zero out full extent
in case of ENOSPC.

CC: stable@vger.kernel.org
Fixes: 12735f8
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
  • Loading branch information
Jan Kara authored and Theodore Ts'o committed May 26, 2017
1 parent b8cb5a5 commit 4f8caa6
Showing 1 changed file with 37 additions and 43 deletions.
80 changes: 37 additions & 43 deletions fs/ext4/extents.c
Original file line number Diff line number Diff line change
Expand Up @@ -3413,13 +3413,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
struct ext4_sb_info *sbi;
struct ext4_extent_header *eh;
struct ext4_map_blocks split_map;
struct ext4_extent zero_ex;
struct ext4_extent zero_ex1, zero_ex2;
struct ext4_extent *ex, *abut_ex;
ext4_lblk_t ee_block, eof_block;
unsigned int ee_len, depth, map_len = map->m_len;
int allocated = 0, max_zeroout = 0;
int err = 0;
int split_flag = 0;
int split_flag = EXT4_EXT_DATA_VALID2;

ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
"block %llu, max_blocks %u\n", inode->i_ino,
Expand All @@ -3436,7 +3436,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ex = path[depth].p_ext;
ee_block = le32_to_cpu(ex->ee_block);
ee_len = ext4_ext_get_actual_len(ex);
zero_ex.ee_len = 0;
zero_ex1.ee_len = 0;
zero_ex2.ee_len = 0;

trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);

Expand Down Expand Up @@ -3576,62 +3577,52 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
if (ext4_encrypted_inode(inode))
max_zeroout = 0;

/* If extent is less than s_max_zeroout_kb, zeroout directly */
if (max_zeroout && (ee_len <= max_zeroout)) {
err = ext4_ext_zeroout(inode, ex);
if (err)
goto out;
zero_ex.ee_block = ex->ee_block;
zero_ex.ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex));
ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(ex));

err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
goto out;
ext4_ext_mark_initialized(ex);
ext4_ext_try_to_merge(handle, inode, path, ex);
err = ext4_ext_dirty(handle, inode, path + path->p_depth);
goto out;
}

/*
* four cases:
* five cases:
* 1. split the extent into three extents.
* 2. split the extent into two extents, zeroout the first half.
* 3. split the extent into two extents, zeroout the second half.
* 2. split the extent into two extents, zeroout the head of the first
* extent.
* 3. split the extent into two extents, zeroout the tail of the second
* extent.
* 4. split the extent into two extents with out zeroout.
* 5. no splitting needed, just possibly zeroout the head and / or the
* tail of the extent.
*/
split_map.m_lblk = map->m_lblk;
split_map.m_len = map->m_len;

if (max_zeroout && (allocated > map->m_len)) {
if (max_zeroout && (allocated > split_map.m_len)) {
if (allocated <= max_zeroout) {
/* case 3 */
zero_ex.ee_block =
cpu_to_le32(map->m_lblk);
zero_ex.ee_len = cpu_to_le16(allocated);
ext4_ext_store_pblock(&zero_ex,
ext4_ext_pblock(ex) + map->m_lblk - ee_block);
err = ext4_ext_zeroout(inode, &zero_ex);
/* case 3 or 5 */
zero_ex1.ee_block =
cpu_to_le32(split_map.m_lblk +
split_map.m_len);
zero_ex1.ee_len =
cpu_to_le16(allocated - split_map.m_len);
ext4_ext_store_pblock(&zero_ex1,
ext4_ext_pblock(ex) + split_map.m_lblk +
split_map.m_len - ee_block);
err = ext4_ext_zeroout(inode, &zero_ex1);
if (err)
goto out;
split_map.m_lblk = map->m_lblk;
split_map.m_len = allocated;
} else if (map->m_lblk - ee_block + map->m_len < max_zeroout) {
/* case 2 */
if (map->m_lblk != ee_block) {
zero_ex.ee_block = ex->ee_block;
zero_ex.ee_len = cpu_to_le16(map->m_lblk -
}
if (split_map.m_lblk - ee_block + split_map.m_len <
max_zeroout) {
/* case 2 or 5 */
if (split_map.m_lblk != ee_block) {
zero_ex2.ee_block = ex->ee_block;
zero_ex2.ee_len = cpu_to_le16(split_map.m_lblk -
ee_block);
ext4_ext_store_pblock(&zero_ex,
ext4_ext_store_pblock(&zero_ex2,
ext4_ext_pblock(ex));
err = ext4_ext_zeroout(inode, &zero_ex);
err = ext4_ext_zeroout(inode, &zero_ex2);
if (err)
goto out;
}

split_map.m_len += split_map.m_lblk - ee_block;
split_map.m_lblk = ee_block;
split_map.m_len = map->m_lblk - ee_block + map->m_len;
allocated = map->m_len;
}
}
Expand All @@ -3642,8 +3633,11 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
err = 0;
out:
/* If we have gotten a failure, don't zero out status tree */
if (!err)
err = ext4_zeroout_es(inode, &zero_ex);
if (!err) {
err = ext4_zeroout_es(inode, &zero_ex1);
if (!err)
err = ext4_zeroout_es(inode, &zero_ex2);
}
return err ? err : allocated;
}

Expand Down

0 comments on commit 4f8caa6

Please sign in to comment.