Skip to content

Commit

Permalink
ext4: fast commit recovery path
Browse files Browse the repository at this point in the history
This patch adds fast commit recovery path support for Ext4 file
system. We add several helper functions that are similar in spirit to
e2fsprogs journal recovery path handlers. Example of such functions
include - a simple block allocator, idempotent block bitmap update
function etc. Using these routines and the fast commit log in the fast
commit area, the recovery path (ext4_fc_replay()) performs fast commit
log recovery.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Link: https://lore.kernel.org/r/20201015203802.3597742-8-harshadshirwadkar@gmail.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
  • Loading branch information
Harshad Shirwadkar authored and Theodore Ts'o committed Oct 22, 2020
1 parent 5b849b5 commit 8016e29
Show file tree
Hide file tree
Showing 14 changed files with 1,821 additions and 131 deletions.
7 changes: 6 additions & 1 deletion fs/ext4/balloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,12 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
struct buffer_head *bh)
{
ext4_fsblk_t blk;
struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
struct ext4_group_info *grp;

if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)
return 0;

grp = ext4_get_group_info(sb, block_group);

if (buffer_verified(bh))
return 0;
Expand Down
26 changes: 26 additions & 0 deletions fs/ext4/ext4.h
Original file line number Diff line number Diff line change
Expand Up @@ -1170,6 +1170,7 @@ struct ext4_inode_info {
#define EXT4_FC_COMMITTING 0x0010 /* File system underoing a fast
* commit.
*/
#define EXT4_FC_REPLAY 0x0020 /* Fast commit replay ongoing */

/*
* Misc. filesystem flags
Expand Down Expand Up @@ -1666,6 +1667,10 @@ struct ext4_sb_info {
struct buffer_head *s_fc_bh;
struct ext4_fc_stats s_fc_stats;
u64 s_fc_avg_commit_time;
#ifdef CONFIG_EXT4_DEBUG
int s_fc_debug_max_replay;
#endif
struct ext4_fc_replay_state s_fc_replay_state;
};

static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
Expand Down Expand Up @@ -2708,6 +2713,7 @@ extern int ext4fs_dirhash(const struct inode *dir, const char *name, int len,
struct dx_hash_info *hinfo);

/* ialloc.c */
extern int ext4_mark_inode_used(struct super_block *sb, int ino);
extern struct inode *__ext4_new_inode(handle_t *, struct inode *, umode_t,
const struct qstr *qstr, __u32 goal,
uid_t *owner, __u32 i_flags,
Expand Down Expand Up @@ -2749,6 +2755,8 @@ void ext4_fc_stop_ineligible(struct super_block *sb);
void ext4_fc_start_update(struct inode *inode);
void ext4_fc_stop_update(struct inode *inode);
void ext4_fc_del(struct inode *inode);
bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t block);
void ext4_fc_replay_cleanup(struct super_block *sb);
int ext4_fc_commit(journal_t *journal, tid_t commit_tid);
int __init ext4_fc_init_dentry_cache(void);

Expand Down Expand Up @@ -2781,8 +2789,12 @@ extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
ext4_fsblk_t block, unsigned long count);
extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid);
extern void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
int len, int state);

/* inode.c */
void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
struct ext4_inode_info *ei);
int ext4_inode_is_fast_symlink(struct inode *inode);
struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
Expand Down Expand Up @@ -2829,6 +2841,8 @@ extern int ext4_sync_inode(handle_t *, struct inode *);
extern void ext4_dirty_inode(struct inode *, int);
extern int ext4_change_inode_journal_flag(struct inode *, int);
extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
extern int ext4_get_fc_inode_loc(struct super_block *sb, unsigned long ino,
struct ext4_iloc *iloc);
extern int ext4_inode_attach_jinode(struct inode *inode);
extern int ext4_can_truncate(struct inode *inode);
extern int ext4_truncate(struct inode *);
Expand Down Expand Up @@ -2862,12 +2876,15 @@ extern int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
/* ioctl.c */
extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
extern void ext4_reset_inode_seed(struct inode *inode);

/* migrate.c */
extern int ext4_ext_migrate(struct inode *);
extern int ext4_ind_migrate(struct inode *inode);

/* namei.c */
extern int ext4_init_new_dir(handle_t *handle, struct inode *dir,
struct inode *inode);
extern int ext4_dirblock_csum_verify(struct inode *inode,
struct buffer_head *bh);
extern int ext4_orphan_add(handle_t *, struct inode *);
Expand Down Expand Up @@ -3447,6 +3464,10 @@ extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode,
extern int ext4_ci_compare(const struct inode *parent,
const struct qstr *fname,
const struct qstr *entry, bool quick);
extern int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
struct inode *inode);
extern int __ext4_link(struct inode *dir, struct inode *inode,
struct dentry *dentry);

#define S_SHIFT 12
static const unsigned char ext4_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = {
Expand Down Expand Up @@ -3547,6 +3568,11 @@ extern int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu);
extern int ext4_datasem_ensure_credits(handle_t *handle, struct inode *inode,
int check_cred, int restart_cred,
int revoke_cred);
extern void ext4_ext_replay_shrink_inode(struct inode *inode, ext4_lblk_t end);
extern int ext4_ext_replay_set_iblocks(struct inode *inode);
extern int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start,
int len, int unwritten, ext4_fsblk_t pblk);
extern int ext4_ext_clear_bb(struct inode *inode);


/* move_extent.c */
Expand Down
2 changes: 1 addition & 1 deletion fs/ext4/ext4_jbd2.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
return ERR_PTR(err);

journal = EXT4_SB(sb)->s_journal;
if (!journal)
if (!journal || (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
return ext4_get_nojournal();
return jbd2__journal_start(journal, blocks, rsv_blocks, revoke_creds,
GFP_NOFS, type, line);
Expand Down
261 changes: 261 additions & 0 deletions fs/ext4/extents.c
Original file line number Diff line number Diff line change
Expand Up @@ -5804,3 +5804,264 @@ int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu)

return err ? err : mapped;
}

/*
* Updates physical block address and unwritten status of extent
* starting at lblk start and of len. If such an extent doesn't exist,
* this function splits the extent tree appropriately to create an
* extent like this. This function is called in the fast commit
* replay path. Returns 0 on success and error on failure.
*/
int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start,
int len, int unwritten, ext4_fsblk_t pblk)
{
struct ext4_ext_path *path = NULL, *ppath;
struct ext4_extent *ex;
int ret;

path = ext4_find_extent(inode, start, NULL, 0);
if (!path)
return -EINVAL;
ex = path[path->p_depth].p_ext;
if (!ex) {
ret = -EFSCORRUPTED;
goto out;
}

if (le32_to_cpu(ex->ee_block) != start ||
ext4_ext_get_actual_len(ex) != len) {
/* We need to split this extent to match our extent first */
ppath = path;
down_write(&EXT4_I(inode)->i_data_sem);
ret = ext4_force_split_extent_at(NULL, inode, &ppath, start, 1);
up_write(&EXT4_I(inode)->i_data_sem);
if (ret)
goto out;
kfree(path);
path = ext4_find_extent(inode, start, NULL, 0);
if (IS_ERR(path))
return -1;
ppath = path;
ex = path[path->p_depth].p_ext;
WARN_ON(le32_to_cpu(ex->ee_block) != start);
if (ext4_ext_get_actual_len(ex) != len) {
down_write(&EXT4_I(inode)->i_data_sem);
ret = ext4_force_split_extent_at(NULL, inode, &ppath,
start + len, 1);
up_write(&EXT4_I(inode)->i_data_sem);
if (ret)
goto out;
kfree(path);
path = ext4_find_extent(inode, start, NULL, 0);
if (IS_ERR(path))
return -EINVAL;
ex = path[path->p_depth].p_ext;
}
}
if (unwritten)
ext4_ext_mark_unwritten(ex);
else
ext4_ext_mark_initialized(ex);
ext4_ext_store_pblock(ex, pblk);
down_write(&EXT4_I(inode)->i_data_sem);
ret = ext4_ext_dirty(NULL, inode, &path[path->p_depth]);
up_write(&EXT4_I(inode)->i_data_sem);
out:
ext4_ext_drop_refs(path);
kfree(path);
ext4_mark_inode_dirty(NULL, inode);
return ret;
}

/* Try to shrink the extent tree */
void ext4_ext_replay_shrink_inode(struct inode *inode, ext4_lblk_t end)
{
struct ext4_ext_path *path = NULL;
struct ext4_extent *ex;
ext4_lblk_t old_cur, cur = 0;

while (cur < end) {
path = ext4_find_extent(inode, cur, NULL, 0);
if (IS_ERR(path))
return;
ex = path[path->p_depth].p_ext;
if (!ex) {
ext4_ext_drop_refs(path);
kfree(path);
ext4_mark_inode_dirty(NULL, inode);
return;
}
old_cur = cur;
cur = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
if (cur <= old_cur)
cur = old_cur + 1;
ext4_ext_try_to_merge(NULL, inode, path, ex);
down_write(&EXT4_I(inode)->i_data_sem);
ext4_ext_dirty(NULL, inode, &path[path->p_depth]);
up_write(&EXT4_I(inode)->i_data_sem);
ext4_mark_inode_dirty(NULL, inode);
ext4_ext_drop_refs(path);
kfree(path);
}
}

/* Check if *cur is a hole and if it is, skip it */
static void skip_hole(struct inode *inode, ext4_lblk_t *cur)
{
int ret;
struct ext4_map_blocks map;

map.m_lblk = *cur;
map.m_len = ((inode->i_size) >> inode->i_sb->s_blocksize_bits) - *cur;

ret = ext4_map_blocks(NULL, inode, &map, 0);
if (ret != 0)
return;
*cur = *cur + map.m_len;
}

/* Count number of blocks used by this inode and update i_blocks */
int ext4_ext_replay_set_iblocks(struct inode *inode)
{
struct ext4_ext_path *path = NULL, *path2 = NULL;
struct ext4_extent *ex;
ext4_lblk_t cur = 0, end;
int numblks = 0, i, ret = 0;
ext4_fsblk_t cmp1, cmp2;
struct ext4_map_blocks map;

/* Determin the size of the file first */
path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
EXT4_EX_NOCACHE);
if (IS_ERR(path))
return PTR_ERR(path);
ex = path[path->p_depth].p_ext;
if (!ex) {
ext4_ext_drop_refs(path);
kfree(path);
goto out;
}
end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
ext4_ext_drop_refs(path);
kfree(path);

/* Count the number of data blocks */
cur = 0;
while (cur < end) {
map.m_lblk = cur;
map.m_len = end - cur;
ret = ext4_map_blocks(NULL, inode, &map, 0);
if (ret < 0)
break;
if (ret > 0)
numblks += ret;
cur = cur + map.m_len;
}

/*
* Count the number of extent tree blocks. We do it by looking up
* two successive extents and determining the difference between
* their paths. When path is different for 2 successive extents
* we compare the blocks in the path at each level and increment
* iblocks by total number of differences found.
*/
cur = 0;
skip_hole(inode, &cur);
path = ext4_find_extent(inode, cur, NULL, 0);
if (IS_ERR(path))
goto out;
numblks += path->p_depth;
ext4_ext_drop_refs(path);
kfree(path);
while (cur < end) {
path = ext4_find_extent(inode, cur, NULL, 0);
if (IS_ERR(path))
break;
ex = path[path->p_depth].p_ext;
if (!ex) {
ext4_ext_drop_refs(path);
kfree(path);
return 0;
}
cur = max(cur + 1, le32_to_cpu(ex->ee_block) +
ext4_ext_get_actual_len(ex));
skip_hole(inode, &cur);

path2 = ext4_find_extent(inode, cur, NULL, 0);
if (IS_ERR(path2)) {
ext4_ext_drop_refs(path);
kfree(path);
break;
}
ex = path2[path2->p_depth].p_ext;
for (i = 0; i <= max(path->p_depth, path2->p_depth); i++) {
cmp1 = cmp2 = 0;
if (i <= path->p_depth)
cmp1 = path[i].p_bh ?
path[i].p_bh->b_blocknr : 0;
if (i <= path2->p_depth)
cmp2 = path2[i].p_bh ?
path2[i].p_bh->b_blocknr : 0;
if (cmp1 != cmp2 && cmp2 != 0)
numblks++;
}
ext4_ext_drop_refs(path);
ext4_ext_drop_refs(path2);
kfree(path);
kfree(path2);
}

out:
inode->i_blocks = numblks << (inode->i_sb->s_blocksize_bits - 9);
ext4_mark_inode_dirty(NULL, inode);
return 0;
}

int ext4_ext_clear_bb(struct inode *inode)
{
struct ext4_ext_path *path = NULL;
struct ext4_extent *ex;
ext4_lblk_t cur = 0, end;
int j, ret = 0;
struct ext4_map_blocks map;

/* Determin the size of the file first */
path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
EXT4_EX_NOCACHE);
if (IS_ERR(path))
return PTR_ERR(path);
ex = path[path->p_depth].p_ext;
if (!ex) {
ext4_ext_drop_refs(path);
kfree(path);
return 0;
}
end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
ext4_ext_drop_refs(path);
kfree(path);

cur = 0;
while (cur < end) {
map.m_lblk = cur;
map.m_len = end - cur;
ret = ext4_map_blocks(NULL, inode, &map, 0);
if (ret < 0)
break;
if (ret > 0) {
path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
if (!IS_ERR_OR_NULL(path)) {
for (j = 0; j < path->p_depth; j++) {

ext4_mb_mark_bb(inode->i_sb,
path[j].p_block, 1, 0);
}
ext4_ext_drop_refs(path);
kfree(path);
}
ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
}
cur = cur + map.m_len;
}

return 0;
}
Loading

0 comments on commit 8016e29

Please sign in to comment.