From 787c7b8cb3c5196f77e4682e0b1c71375e74822c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 29 Oct 2015 09:13:04 +0800 Subject: [PATCH 01/87] f2fs: report error of f2fs_create_root_stats f2fs_create_root_stats can fail due to no memory, report it to user. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 7 +++++-- fs/f2fs/f2fs.h | 4 ++-- fs/f2fs/super.c | 6 +++++- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 478e5d54154f5..b0966f3b1c9a8 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -406,20 +406,23 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi) kfree(si); } -void __init f2fs_create_root_stats(void) +int __init f2fs_create_root_stats(void) { struct dentry *file; f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL); if (!f2fs_debugfs_root) - return; + return -ENOMEM; file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root, NULL, &stat_fops); if (!file) { debugfs_remove(f2fs_debugfs_root); f2fs_debugfs_root = NULL; + return -ENOMEM; } + + return 0; } void f2fs_destroy_root_stats(void) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9db5500d63d98..3f1570c4fcf05 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1987,7 +1987,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) int f2fs_build_stats(struct f2fs_sb_info *); void f2fs_destroy_stats(struct f2fs_sb_info *); -void __init f2fs_create_root_stats(void); +int __init f2fs_create_root_stats(void); void f2fs_destroy_root_stats(void); #else #define stat_inc_cp_count(si) @@ -2015,7 +2015,7 @@ void f2fs_destroy_root_stats(void); static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } -static inline void __init f2fs_create_root_stats(void) { } +static inline int __init f2fs_create_root_stats(void) { return 0; } static inline void f2fs_destroy_root_stats(void) { } #endif diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 3a65e01323528..67864ab376c89 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1478,10 +1478,14 @@ static int __init init_f2fs_fs(void) err = register_filesystem(&f2fs_fs_type); if (err) goto free_shrinker; - f2fs_create_root_stats(); + err = f2fs_create_root_stats(); + if (err) + goto free_filesystem; f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); return 0; +free_filesystem: + unregister_filesystem(&f2fs_fs_type); free_shrinker: unregister_shrinker(&f2fs_shrinker_info); free_crypto: From 2da3e027461ab0148384b02bd5905f1a7b335dff Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 28 Oct 2015 17:56:14 +0800 Subject: [PATCH 02/87] f2fs: commit atomic written page in LFS mode We should always commit atomic written pages in LFS mode, otherwise data will become corrupted if we encounter suddent power cut after partial pages committed in IPU mode. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 972eab7ac0719..0cc8de2839c41 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1083,6 +1083,7 @@ int do_write_data_page(struct f2fs_io_info *fio) */ if (unlikely(fio->blk_addr != NEW_ADDR && !is_cold_data(page) && + !IS_ATOMIC_WRITTEN_PAGE(page) && need_inplace_update(inode))) { rewrite_data_page(fio); set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); From d323d005ac4a2d413128267af76bb9d71f7303da Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 27 Oct 2015 09:53:45 +0800 Subject: [PATCH 03/87] f2fs: support file defragment This patch introduces a new ioctl F2FS_IOC_DEFRAGMENT to support file defragment in a specified range of regular file. This ioctl can be used in very limited workload: if user expects high sequential read performance in randomly written file, this interface can be used for defragmentation, after that file can be written as continuous as possible in the device. Meanwhile, it has side-effect, it will make holes in segments where blocks located originally, so it's better to trigger GC to eliminate fragment in segments. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 6 +- fs/f2fs/f2fs.h | 8 ++ fs/f2fs/file.c | 195 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 208 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0cc8de2839c41..c3e1ffa0c8d64 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -566,7 +566,7 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset, * b. do not use extent cache for better performance * c. give the block addresses to blockdev */ -static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, +int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int create, int flag) { unsigned int maxblocks = map->m_len; @@ -1355,6 +1355,10 @@ static int f2fs_write_data_pages(struct address_space *mapping, available_free_memory(sbi, DIRTY_DENTS)) goto skip_write; + /* skip writing during file defragment */ + if (is_inode_flag_set(F2FS_I(inode), FI_DO_DEFRAG)) + goto skip_write; + /* during POR, we don't need to trigger writepage at all. */ if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto skip_write; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3f1570c4fcf05..b01ad514fbd87 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -234,6 +234,7 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, #define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) #define F2FS_IOC_GARBAGE_COLLECT _IO(F2FS_IOCTL_MAGIC, 6) #define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7) +#define F2FS_IOC_DEFRAGMENT _IO(F2FS_IOCTL_MAGIC, 8) #define F2FS_IOC_SET_ENCRYPTION_POLICY \ _IOR('f', 19, struct f2fs_encryption_policy) @@ -260,6 +261,11 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, #define F2FS_IOC32_SETFLAGS FS_IOC32_SETFLAGS #endif +struct f2fs_defragment { + u64 start; + u64 len; +}; + /* * For INODE and NODE manager */ @@ -1416,6 +1422,7 @@ enum { FI_DROP_CACHE, /* drop dirty page cache */ FI_DATA_EXIST, /* indicate data exists */ FI_INLINE_DOTS, /* indicate inline dot dentries */ + FI_DO_DEFRAG, /* indicate defragment is running */ }; static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) @@ -1847,6 +1854,7 @@ struct page *find_data_page(struct inode *, pgoff_t); struct page *get_lock_data_page(struct inode *, pgoff_t, bool); struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); int do_write_data_page(struct f2fs_io_info *); +int f2fs_map_blocks(struct inode *, struct f2fs_map_blocks *, int, int); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); void f2fs_invalidate_page(struct page *, unsigned int, unsigned int); int f2fs_release_page(struct page *, gfp_t); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a197215ad52bf..2f392982c5970 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1646,6 +1646,199 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) return 0; } +static int f2fs_defragment_range(struct f2fs_sb_info *sbi, + struct file *filp, + struct f2fs_defragment *range) +{ + struct inode *inode = file_inode(filp); + struct f2fs_map_blocks map; + struct extent_info ei; + pgoff_t pg_start, pg_end; + unsigned int blk_per_seg = 1 << sbi->log_blocks_per_seg; + unsigned int total = 0, sec_num; + unsigned int pages_per_sec = sbi->segs_per_sec * + (1 << sbi->log_blocks_per_seg); + block_t blk_end = 0; + bool fragmented = false; + int err; + + /* if in-place-update policy is enabled, don't waste time here */ + if (need_inplace_update(inode)) + return -EINVAL; + + pg_start = range->start >> PAGE_CACHE_SHIFT; + pg_end = (range->start + range->len) >> PAGE_CACHE_SHIFT; + + f2fs_balance_fs(sbi); + + mutex_lock(&inode->i_mutex); + + /* writeback all dirty pages in the range */ + err = filemap_write_and_wait_range(inode->i_mapping, range->start, + range->start + range->len); + if (err) + goto out; + + /* + * lookup mapping info in extent cache, skip defragmenting if physical + * block addresses are continuous. + */ + if (f2fs_lookup_extent_cache(inode, pg_start, &ei)) { + if (ei.fofs + ei.len >= pg_end) + goto out; + } + + map.m_lblk = pg_start; + map.m_len = pg_end - pg_start; + + /* + * lookup mapping info in dnode page cache, skip defragmenting if all + * physical block addresses are continuous even if there are hole(s) + * in logical blocks. + */ + while (map.m_lblk < pg_end) { + map.m_flags = 0; + err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ); + if (err) + goto out; + + if (!(map.m_flags & F2FS_MAP_FLAGS)) { + map.m_lblk++; + map.m_len--; + continue; + } + + if (blk_end && blk_end != map.m_pblk) { + fragmented = true; + break; + } + blk_end = map.m_pblk + map.m_len; + + map.m_lblk += map.m_len; + map.m_len = pg_end - map.m_lblk; + } + + if (!fragmented) + goto out; + + map.m_lblk = pg_start; + map.m_len = pg_end - pg_start; + + sec_num = (map.m_len + pages_per_sec - 1) / pages_per_sec; + + /* + * make sure there are enough free section for LFS allocation, this can + * avoid defragment running in SSR mode when free section are allocated + * intensively + */ + if (has_not_enough_free_secs(sbi, sec_num)) { + err = -EAGAIN; + goto out; + } + + while (map.m_lblk < pg_end) { + pgoff_t idx; + int cnt = 0; + +do_map: + map.m_flags = 0; + err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ); + if (err) + goto clear_out; + + if (!(map.m_flags & F2FS_MAP_FLAGS)) { + map.m_lblk++; + continue; + } + + set_inode_flag(F2FS_I(inode), FI_DO_DEFRAG); + + idx = map.m_lblk; + while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) { + struct page *page; + + page = get_lock_data_page(inode, idx, true); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto clear_out; + } + + set_page_dirty(page); + f2fs_put_page(page, 1); + + idx++; + cnt++; + total++; + } + + map.m_lblk = idx; + map.m_len = pg_end - idx; + + if (idx < pg_end && cnt < blk_per_seg) + goto do_map; + + clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG); + + err = filemap_fdatawrite(inode->i_mapping); + if (err) + goto out; + } +clear_out: + clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG); +out: + mutex_unlock(&inode->i_mutex); + if (!err) + range->len = (u64)total << PAGE_CACHE_SHIFT; + return err; +} + +static int f2fs_ioc_defragment(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_defragment range; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + err = mnt_want_write_file(filp); + if (err) + return err; + + if (f2fs_readonly(sbi->sb)) { + err = -EROFS; + goto out; + } + + if (copy_from_user(&range, (struct f2fs_defragment __user *)arg, + sizeof(range))) { + err = -EFAULT; + goto out; + } + + /* verify alignment of offset & size */ + if (range.start & (F2FS_BLKSIZE - 1) || + range.len & (F2FS_BLKSIZE - 1)) { + err = -EINVAL; + goto out; + } + + err = f2fs_defragment_range(sbi, filp, &range); + if (err < 0) + goto out; + + if (copy_to_user((struct f2fs_defragment __user *)arg, &range, + sizeof(range))) + err = -EFAULT; +out: + mnt_drop_write_file(filp); + return err; +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { switch (cmd) { @@ -1679,6 +1872,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_gc(filp, arg); case F2FS_IOC_WRITE_CHECKPOINT: return f2fs_ioc_write_checkpoint(filp, arg); + case F2FS_IOC_DEFRAGMENT: + return f2fs_ioc_defragment(filp, arg); default: return -ENOTTY; } From 4bb9998d388b48dc0a4128bd1f4d85f09ec3b705 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Mon, 16 Nov 2015 20:46:28 +0900 Subject: [PATCH 04/87] Doc: f2fs: Fix typos in Documentation/filesystems/f2fs.txt This patch fix some typos in Documentation/filesystems/f2fs.txt Signed-off-by: Masanari Iida Acked-by: Randy Dunlap Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index b102b436563eb..ad10494aa224f 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -102,7 +102,7 @@ background_gc=%s Turn on/off cleaning operations, namely garbage collection, triggered in background when I/O subsystem is idle. If background_gc=on, it will turn on the garbage collection and if background_gc=off, garbage collection - will be truned off. If background_gc=sync, it will turn + will be turned off. If background_gc=sync, it will turn on synchronous garbage collection running in background. Default value for this option is on. So garbage collection is on by default. @@ -145,7 +145,7 @@ extent_cache Enable an extent cache based on rb-tree, it can cache as many as extent which map between contiguous logical address and physical address per inode, resulting in increasing the cache hit ratio. Set by default. -noextent_cache Diable an extent cache based on rb-tree explicitly, see +noextent_cache Disable an extent cache based on rb-tree explicitly, see the above extent_cache mount option. noinline_data Disable the inline data feature, inline data feature is enabled by default. @@ -192,7 +192,7 @@ Files in /sys/fs/f2fs/ policy for garbage collection. Setting gc_idle = 0 (default) will disable this option. Setting gc_idle = 1 will select the Cost Benefit approach - & setting gc_idle = 2 will select the greedy aproach. + & setting gc_idle = 2 will select the greedy approach. reclaim_segments This parameter controls the number of prefree segments to be reclaimed. If the number of prefree @@ -298,7 +298,7 @@ The dump.f2fs shows the information of specific inode and dumps SSA and SIT to file. Each file is dump_ssa and dump_sit. The dump.f2fs is used to debug on-disk data structures of the f2fs filesystem. -It shows on-disk inode information reconized by a given inode number, and is +It shows on-disk inode information recognized by a given inode number, and is able to dump all the SSA and SIT entries into predefined files, ./dump_ssa and ./dump_sit respectively. From 29ba108d9ba44600961418a352871eb967d68c20 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 16 Nov 2015 20:38:25 +0800 Subject: [PATCH 05/87] f2fs: fix memory leak of kobject in error path of fill_super f2fs_sb_info::s_kobj should be released in error path of fill_super, otherwise it will lead to memory leak. This bug was found by kmemleak: dmesg: kmemleak: 2 new suspected memory leaks (see /sys/kernel/debug/kmemleak) cat /sys/kernel/debug/kmemleak unreferenced object 0xffff8800838dc358 (size 8): comm "mount", pid 4154, jiffies 4297482839 (age 1911.412s) hex dump (first 8 bytes): 7a 72 61 6d 31 00 ff ff zram1... backtrace: [] kmemleak_alloc+0x28/0x50 [] __kmalloc_track_caller+0xef/0x1c0 [] kstrdup+0x45/0x80 [] kstrdup_const+0x28/0x30 [] kvasprintf_const+0x63/0xa0 [] kobject_set_name_vargs+0x3c/0xa0 [] kobject_add_varg+0x25/0x60 [] kobject_init_and_add+0x53/0x70 [] f2fs_fill_super+0x9d9/0xc40 [f2fs] [] mount_bdev+0x192/0x1d0 [] f2fs_mount+0x15/0x20 [f2fs] [] mount_fs+0x43/0x170 [] vfs_kern_mount+0x76/0x160 [] do_mount+0x258/0xdc0 [] SyS_mount+0x7b/0xc0 [] entry_SYSCALL_64_fastpath+0x12/0x6f ... Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 67864ab376c89..bd7e9c6c42c80 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1361,6 +1361,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) free_kobj: kobject_del(&sbi->s_kobj); + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); free_proc: if (sbi->s_proc) { remove_proc_entry("segment_info", sbi->s_proc); From 04ef4b626c324a7529c80ffc45787b274a6fa68a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 10 Nov 2015 18:44:20 +0800 Subject: [PATCH 06/87] f2fs: fix to enable missing ioctl interfaces in ->compat_ioctl In 64-bit kernel f2fs can supports 32-bit ioctl system call by identifying encoded code which is converted from 32-bit one to 64-bit one in ->compat_ioctl. When we introduced new interfaces in ->ioctl, we forgot to enable them in ->compat_ioctl, so enable them for fixing. Signed-off-by: Chao Yu [Jaegeuk Kim: fix wrongly added spaces together] Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 5 +++-- fs/f2fs/file.c | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b01ad514fbd87..904384427ca6f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -257,8 +257,9 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, /* * ioctl commands in 32 bit emulation */ -#define F2FS_IOC32_GETFLAGS FS_IOC32_GETFLAGS -#define F2FS_IOC32_SETFLAGS FS_IOC32_SETFLAGS +#define F2FS_IOC32_GETFLAGS FS_IOC32_GETFLAGS +#define F2FS_IOC32_SETFLAGS FS_IOC32_SETFLAGS +#define F2FS_IOC32_GETVERSION FS_IOC32_GETVERSION #endif struct f2fs_defragment { diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 2f392982c5970..3f1026caf8077 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1901,6 +1901,22 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC32_SETFLAGS: cmd = F2FS_IOC_SETFLAGS; break; + case F2FS_IOC32_GETVERSION: + cmd = F2FS_IOC_GETVERSION; + break; + case F2FS_IOC_START_ATOMIC_WRITE: + case F2FS_IOC_COMMIT_ATOMIC_WRITE: + case F2FS_IOC_START_VOLATILE_WRITE: + case F2FS_IOC_RELEASE_VOLATILE_WRITE: + case F2FS_IOC_ABORT_VOLATILE_WRITE: + case F2FS_IOC_SHUTDOWN: + case F2FS_IOC_SET_ENCRYPTION_POLICY: + case F2FS_IOC_GET_ENCRYPTION_PWSALT: + case F2FS_IOC_GET_ENCRYPTION_POLICY: + case F2FS_IOC_GARBAGE_COLLECT: + case F2FS_IOC_WRITE_CHECKPOINT: + case F2FS_IOC_DEFRAGMENT: + break; default: return -ENOIOCTLCMD; } From eb7e813cc791735f2428202d5249a8fd769df1f3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 10 Nov 2015 18:45:07 +0800 Subject: [PATCH 07/87] f2fs: fix to remove directory inode from dirty list If last dirty dentry page was writebacked in reclaim path, we should remove its directory inode from global dirty list to avoid unnecessary flush for this inode when doing checkpoint. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c3e1ffa0c8d64..6c689e9a86a38 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1182,8 +1182,10 @@ static int f2fs_write_data_page(struct page *page, unlock_page(page); if (need_balance_fs) f2fs_balance_fs(sbi); - if (wbc->for_reclaim) + if (wbc->for_reclaim) { f2fs_submit_merged_bio(sbi, DATA, WRITE); + remove_dirty_dir_inode(inode); + } return 0; redirty_out: From 692223d132067ef2c392adec6f1324d581496212 Mon Sep 17 00:00:00 2001 From: Fan Li Date: Thu, 12 Nov 2015 08:43:04 +0800 Subject: [PATCH 08/87] f2fs: optimize __find_rev_next_bit 1. Skip __reverse_ulong if the bitmap is empty. 2. Reduce branches and codes. According to my test, the performance of this new version is 5% higher on an empty bitmap of 64bytes, and remains about the same in the worst scenario. Signed-off-by: Fan li Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 46 ++++++++++++++++++---------------------------- 1 file changed, 18 insertions(+), 28 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f77b3258454a6..efbf6b5f1dc3a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -86,6 +86,7 @@ static inline unsigned long __reverse_ffs(unsigned long word) /* * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because * f2fs_set_bit makes MSB and LSB reversed in a byte. + * @size must be integral times of unsigned long. * Example: * MSB <--> LSB * f2fs_set_bit(0, bitmap) => 1000 0000 @@ -95,47 +96,36 @@ static unsigned long __find_rev_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { const unsigned long *p = addr + BIT_WORD(offset); - unsigned long result = offset & ~(BITS_PER_LONG - 1); + unsigned long result = size; unsigned long tmp; if (offset >= size) return size; - size -= result; + size -= (offset & ~(BITS_PER_LONG - 1)); offset %= BITS_PER_LONG; - if (!offset) - goto aligned; - tmp = __reverse_ulong((unsigned char *)p); - tmp &= ~0UL >> offset; - - if (size < BITS_PER_LONG) - goto found_first; - if (tmp) - goto found_middle; + while (1) { + if (*p == 0) + goto pass; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - p++; -aligned: - while (size & ~(BITS_PER_LONG-1)) { tmp = __reverse_ulong((unsigned char *)p); + + tmp &= ~0UL >> offset; + if (size < BITS_PER_LONG) + tmp &= (~0UL << (BITS_PER_LONG - size)); if (tmp) - goto found_middle; - result += BITS_PER_LONG; + goto found; +pass: + if (size <= BITS_PER_LONG) + break; size -= BITS_PER_LONG; + offset = 0; p++; } - if (!size) - return result; - - tmp = __reverse_ulong((unsigned char *)p); -found_first: - tmp &= (~0UL << (BITS_PER_LONG - size)); - if (!tmp) /* Are any bits set? */ - return result + size; /* Nope. */ -found_middle: - return result + __reverse_ffs(tmp); + return result; +found: + return result - size + __reverse_ffs(tmp); } static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, From f478f43fa0d8f38537848d298980955244afdaee Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 13 Nov 2015 18:27:35 +0800 Subject: [PATCH 09/87] f2fs: clear page uptodate when dropping cache for atomic write We should clear uptodate flag for all pages atomic written when we drop them, otherwise before these cached pages were reclaimed or invalidated eventually, we will see invalid data when hitting them again. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index efbf6b5f1dc3a..ed2c5dec75267 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -247,6 +247,7 @@ int commit_inmem_pages(struct inode *inode, bool abort) submit_bio = true; } } else { + ClearPageUptodate(cur->page); trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP); } set_page_private(cur->page, 0); From 57b62d29ad5b384775974973087d47755a8c6fcc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 19 Nov 2015 16:09:07 +0800 Subject: [PATCH 10/87] f2fs: fix to report error in f2fs_readdir get_lock_data_page in f2fs_readdir can fail due to a lot of reasons (i.e. no memory or IO error...), it's better to report this kind of error to user rather than ignoring it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 7c1678ba8f926..9de898d2ddff3 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -855,8 +855,13 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) for (; n < npages; n++) { dentry_page = get_lock_data_page(inode, n, false); - if (IS_ERR(dentry_page)) - continue; + if (IS_ERR(dentry_page)) { + err = PTR_ERR(dentry_page); + if (err == -ENOENT) + continue; + else + goto out; + } dentry_blk = kmap(dentry_page); From 760de7914e27781abb44564449c761ea4440f982 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 30 Nov 2015 16:26:44 -0800 Subject: [PATCH 11/87] f2fs: avoid deadlock in f2fs_shrink_extent_tree While handling extent trees, we can enter into a reclaiming path anytime. If it tries to release some extent nodes in the same extent tree, write_lock(&et->lock) would be hanged. In order to avoid the deadlock, we can just skip it. Note that, if it is an unreferenced tree, we should get write_lock(&et->lock) successfully and release all of therein nodes. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 7ddba812e11b7..de063f2b23847 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -615,9 +615,10 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) for (i = 0; i < found; i++) { struct extent_tree *et = treevec[i]; - write_lock(&et->lock); - node_cnt += __free_extent_tree(sbi, et, false); - write_unlock(&et->lock); + if (write_trylock(&et->lock)) { + node_cnt += __free_extent_tree(sbi, et, false); + write_unlock(&et->lock); + } if (node_cnt + tree_cnt >= nr_shrink) goto unlock_out; From 807b1e1c8e08452948495b1a9985ab46d329e5c2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 3 Dec 2015 14:14:40 -0800 Subject: [PATCH 12/87] f2fs: do not recover from previous remained wrong dnodes If device does not support discard, some obsolete dnodes can be recovered by roll-forward. This patch enhances the recovery flow. Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index cbf74f47cce8a..fad010faa8591 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -168,6 +168,32 @@ static void recover_inode(struct inode *inode, struct page *page) ino_of_node(page), name); } +static bool is_same_inode(struct inode *inode, struct page *ipage) +{ + struct f2fs_inode *ri = F2FS_INODE(ipage); + struct timespec disk; + + if (!IS_INODE(ipage)) + return true; + + disk.tv_sec = le64_to_cpu(ri->i_ctime); + disk.tv_nsec = le32_to_cpu(ri->i_ctime_nsec); + if (timespec_compare(&inode->i_ctime, &disk) > 0) + return false; + + disk.tv_sec = le64_to_cpu(ri->i_atime); + disk.tv_nsec = le32_to_cpu(ri->i_atime_nsec); + if (timespec_compare(&inode->i_atime, &disk) > 0) + return false; + + disk.tv_sec = le64_to_cpu(ri->i_mtime); + disk.tv_nsec = le32_to_cpu(ri->i_mtime_nsec); + if (timespec_compare(&inode->i_mtime, &disk) > 0) + return false; + + return true; +} + static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) { unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); @@ -197,7 +223,10 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) goto next; entry = get_fsync_inode(head, ino_of_node(page)); - if (!entry) { + if (entry) { + if (!is_same_inode(entry->inode, page)) + goto next; + } else { if (IS_INODE(page) && is_dent_dnode(page)) { err = recover_inode_page(sbi, page); if (err) From e9837bc2a4a407ee366143cf721ee77154ac051e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 1 Dec 2015 11:41:50 +0800 Subject: [PATCH 13/87] f2fs: clean up error path in f2fs_readdir No logic changes, just clean up the error path. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 9de898d2ddff3..6554fd5fce88f 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -867,18 +867,15 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) make_dentry_ptr(inode, &d, (void *)dentry_blk, 1); - if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr)) - goto stop; + if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr)) { + kunmap(dentry_page); + f2fs_put_page(dentry_page, 1); + break; + } ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK; kunmap(dentry_page); f2fs_put_page(dentry_page, 1); - dentry_page = NULL; - } -stop: - if (dentry_page && !IS_ERR(dentry_page)) { - kunmap(dentry_page); - f2fs_put_page(dentry_page, 1); } out: f2fs_fname_crypto_free_buffer(&fstr); From 855639decaa7ba5f356d6928c744a0ae1977c134 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 1 Dec 2015 11:42:54 +0800 Subject: [PATCH 14/87] f2fs: clean up code with __has_cursum_space Clean up codes in lookup_journal_in_cursum() with __has_cursum_space(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ed2c5dec75267..74c474821e5a1 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1740,13 +1740,13 @@ int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type, if (le32_to_cpu(nid_in_journal(sum, i)) == val) return i; } - if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) + if (alloc && __has_cursum_space(sum, 1, NAT_JOURNAL)) return update_nats_in_cursum(sum, 1); } else if (type == SIT_JOURNAL) { for (i = 0; i < sits_in_cursum(sum); i++) if (le32_to_cpu(segno_in_journal(sum, i)) == val) return i; - if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES) + if (alloc && __has_cursum_space(sum, 1, SIT_JOURNAL)) return update_sits_in_cursum(sum, 1); } return -1; From b7973f2378c619d0e17a075f13350bd58a9ebe3d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 1 Dec 2015 11:43:59 +0800 Subject: [PATCH 15/87] f2fs: clean up argument of recover_data In recover_data, value of argument 'type' will be CURSEG_WARM_NODE all the time, remove it for cleanup. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index fad010faa8591..7fcb6e49defff 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -488,8 +488,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, return err; } -static int recover_data(struct f2fs_sb_info *sbi, - struct list_head *head, int type) +static int recover_data(struct f2fs_sb_info *sbi, struct list_head *head) { unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); struct curseg_info *curseg; @@ -498,7 +497,7 @@ static int recover_data(struct f2fs_sb_info *sbi, block_t blkaddr; /* get node pages in the current segment */ - curseg = CURSEG_I(sbi, type); + curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); while (1) { @@ -585,7 +584,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) need_writecp = true; /* step #2: recover data */ - err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); + err = recover_data(sbi, &inode_list); if (!err) f2fs_bug_on(sbi, !list_empty(&inode_list)); out: From 9006f2c93fe5cc450bc0d3a4924b46393f165b4a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 1 Dec 2015 11:47:33 +0800 Subject: [PATCH 16/87] f2fs: kill f2fs_drop_largest_extent For direct IO, f2fs only allocate new address for the block which is not exist in the disk before, its mapping info should not exist in extent cache previously, so here we do not need to call f2fs_drop_largest_extent to drop related cache. Due to no more callers for f2fs_drop_largest_extent now, kill it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ---- fs/f2fs/extent_cache.c | 8 -------- fs/f2fs/f2fs.h | 1 - 3 files changed, 13 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6c689e9a86a38..90a2ffea875bf 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -494,10 +494,6 @@ static int __allocate_data_block(struct dnode_of_data *dn) if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT)) i_size_write(dn->inode, ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT)); - - /* direct IO doesn't use extent cache to maximize the performance */ - f2fs_drop_largest_extent(dn->inode, fofs); - return 0; } diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index de063f2b23847..e86e9f1e0733a 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -164,14 +164,6 @@ static void __drop_largest_extent(struct inode *inode, largest->len = 0; } -void f2fs_drop_largest_extent(struct inode *inode, pgoff_t fofs) -{ - if (!f2fs_may_extent_tree(inode)) - return; - - __drop_largest_extent(inode, fofs, 1); -} - void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 904384427ca6f..0831db2f4b3ae 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2078,7 +2078,6 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *); * extent_cache.c */ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *, int); -void f2fs_drop_largest_extent(struct inode *, pgoff_t); void f2fs_init_extent_tree(struct inode *, struct f2fs_extent *); unsigned int f2fs_destroy_extent_node(struct inode *); void f2fs_destroy_extent_tree(struct inode *); From 3519e3f992995d46c200134cfbf84c61b7a01f4c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 1 Dec 2015 11:56:52 +0800 Subject: [PATCH 17/87] f2fs: use sbi->blocks_per_seg to avoid unnecessary calculation Use sbi->blocks_per_seg directly to avoid unnecessary calculation when using 1 << sbi->log_blocks_per_seg. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 2 +- fs/f2fs/f2fs.h | 3 +-- fs/f2fs/file.c | 5 ++--- fs/f2fs/gc.c | 4 ++-- fs/f2fs/node.h | 2 +- 5 files changed, 7 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index b0966f3b1c9a8..8ce2fe3f65ab5 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -105,7 +105,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi) bimodal = 0; total_vblocks = 0; - blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg); + blks_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg; hblks_per_sec = blks_per_sec / 2; for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0831db2f4b3ae..0052ae8bea3f6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1099,8 +1099,7 @@ static inline int get_dirty_pages(struct inode *inode) static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) { - unsigned int pages_per_sec = sbi->segs_per_sec * - (1 << sbi->log_blocks_per_seg); + unsigned int pages_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg; return ((get_pages(sbi, block_type) + pages_per_sec - 1) >> sbi->log_blocks_per_seg) / sbi->segs_per_sec; } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 3f1026caf8077..96e4e048a8a1a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1654,10 +1654,9 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, struct f2fs_map_blocks map; struct extent_info ei; pgoff_t pg_start, pg_end; - unsigned int blk_per_seg = 1 << sbi->log_blocks_per_seg; + unsigned int blk_per_seg = sbi->blocks_per_seg; unsigned int total = 0, sec_num; - unsigned int pages_per_sec = sbi->segs_per_sec * - (1 << sbi->log_blocks_per_seg); + unsigned int pages_per_sec = sbi->segs_per_sec * blk_per_seg; block_t blk_end = 0; bool fragmented = false; int err; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index fedbf67a08427..ce350c44b5cf4 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -173,9 +173,9 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi, { /* SSR allocates in a segment unit */ if (p->alloc_mode == SSR) - return 1 << sbi->log_blocks_per_seg; + return sbi->blocks_per_seg; if (p->gc_mode == GC_GREEDY) - return (1 << sbi->log_blocks_per_seg) * p->ofs_unit; + return sbi->blocks_per_seg * p->ofs_unit; else if (p->gc_mode == GC_CB) return UINT_MAX; else /* No other gc_mode */ diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index e4fffd2d98c4b..2de759a7746f5 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -183,7 +183,7 @@ static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start) block_addr = (pgoff_t)(nm_i->nat_blkaddr + (seg_off << sbi->log_blocks_per_seg << 1) + - (block_off & ((1 << sbi->log_blocks_per_seg) - 1))); + (block_off & (sbi->blocks_per_seg - 1))); if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) block_addr += sbi->blocks_per_seg; From 0cab80ee0c9e17337468c4c0f96786ccbca693d9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 1 Dec 2015 11:36:16 +0800 Subject: [PATCH 18/87] f2fs: fix to convert inline inode in ->setattr In commit 3c4541452748 ("f2fs: do not trim preallocated blocks when truncating after i_size"), in order to follow the regulation: "truncate(x) where x > i_size will not trim all blocks past i_size." like other file systems, in ->setattr we invoked truncate_setsize instead of f2fs_truncate to avoid unneeded block trimming in such case, but forgot to call f2fs_convert_inline_inode keep consistency of inline data conversion rule. This patch fixes to convert inline data if necessary. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 96e4e048a8a1a..a018ed327713b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -686,6 +686,14 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) * larger than i_size. */ truncate_setsize(inode, attr->ia_size); + + /* should convert inline inode here */ + if (f2fs_has_inline_data(inode) && + !f2fs_may_inline_data(inode)) { + err = f2fs_convert_inline_inode(inode); + if (err) + return err; + } inode->i_mtime = inode->i_ctime = CURRENT_TIME; } } From 80609448cd63b700a37427e423c201fc5e16e95a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 4 Dec 2015 16:51:13 -0800 Subject: [PATCH 19/87] f2fs: enhance the bit operation for SSR This patch enhances the existing bit operation when f2fs allocates SSR blocks. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 50 +++++++++++++++++++---------------------------- 1 file changed, 20 insertions(+), 30 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 74c474821e5a1..5fa519f028602 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -132,47 +132,37 @@ static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { const unsigned long *p = addr + BIT_WORD(offset); - unsigned long result = offset & ~(BITS_PER_LONG - 1); + unsigned long result = size; unsigned long tmp; if (offset >= size) return size; - size -= result; + size -= (offset & ~(BITS_PER_LONG - 1)); offset %= BITS_PER_LONG; - if (!offset) - goto aligned; - - tmp = __reverse_ulong((unsigned char *)p); - tmp |= ~((~0UL << offset) >> offset); - - if (size < BITS_PER_LONG) - goto found_first; - if (tmp != ~0UL) - goto found_middle; - - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - p++; -aligned: - while (size & ~(BITS_PER_LONG - 1)) { + + while (1) { + if (*p == ~0UL) + goto pass; + tmp = __reverse_ulong((unsigned char *)p); + + if (offset) + tmp |= ~0UL << (BITS_PER_LONG - offset); + if (size < BITS_PER_LONG) + tmp |= ~0UL >> size; if (tmp != ~0UL) - goto found_middle; - result += BITS_PER_LONG; + goto found; +pass: + if (size <= BITS_PER_LONG) + break; size -= BITS_PER_LONG; + offset = 0; p++; } - if (!size) - return result; - - tmp = __reverse_ulong((unsigned char *)p); -found_first: - tmp |= ~(~0UL << (BITS_PER_LONG - size)); - if (tmp == ~0UL) /* Are any bits zero? */ - return result + size; /* Nope. */ -found_middle: - return result + __reverse_ffz(tmp); + return result; +found: + return result - size + __reverse_ffz(tmp); } void register_inmem_page(struct inode *inode, struct page *page) From 5d909cdbbba244ecb4c2dfc4dc3e3bc29529fb05 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 7 Dec 2015 10:16:58 -0800 Subject: [PATCH 20/87] f2fs: refactor f2fs_commit_super Previously, f2fs_commit_super hacks the bh->blocknr to write the broken alternate superblock. Instead of it, we should use the correct logic to retrieve its buffer head with locking it appropriately. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index bd7e9c6c42c80..dbf16ade0e9a7 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1099,27 +1099,35 @@ static int read_raw_super_block(struct super_block *sb, int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) { struct buffer_head *sbh = sbi->raw_super_buf; - sector_t block = sbh->b_blocknr; + struct buffer_head *bh; int err; /* write back-up superblock first */ - sbh->b_blocknr = block ? 0 : 1; - mark_buffer_dirty(sbh); - err = sync_dirty_buffer(sbh); + bh = sb_getblk(sbi->sb, sbh->b_blocknr ? 0 : 1); + if (!bh) + return -EIO; - sbh->b_blocknr = block; + lock_buffer(bh); + memcpy(bh->b_data, sbh->b_data, sbh->b_size); + WARN_ON(sbh->b_size != F2FS_BLKSIZE); + set_buffer_uptodate(bh); + set_buffer_dirty(bh); + unlock_buffer(bh); + + /* it's rare case, we can do fua all the time */ + err = __sync_dirty_buffer(bh, WRITE_FLUSH_FUA); + brelse(bh); /* if we are in recovery path, skip writing valid superblock */ if (recover || err) - goto out; + return err; /* write current valid superblock */ - mark_buffer_dirty(sbh); - err = sync_dirty_buffer(sbh); -out: - clear_buffer_write_io_error(sbh); - set_buffer_uptodate(sbh); - return err; + lock_buffer(sbh); + set_buffer_dirty(sbh); + unlock_buffer(sbh); + + return __sync_dirty_buffer(sbh, WRITE_FLUSH_FUA); } static int f2fs_fill_super(struct super_block *sb, void *data, int silent) From ea212a4a7a432e0ecd0f0f53971b70172b3e7f96 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 7 Dec 2015 10:18:54 -0800 Subject: [PATCH 21/87] f2fs: use lock_buffer when changing superblock When modifying sb contents, we need to use lock its buffer. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a018ed327713b..294e71576cec9 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1591,14 +1591,18 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) return err; /* update superblock with uuid */ + lock_buffer(sbi->raw_super_buf); generate_random_uuid(sbi->raw_super->encrypt_pw_salt); + unlock_buffer(sbi->raw_super_buf); err = f2fs_commit_super(sbi, false); mnt_drop_write_file(filp); if (err) { /* undo new data */ + lock_buffer(sbi->raw_super_buf); memset(sbi->raw_super->encrypt_pw_salt, 0, 16); + unlock_buffer(sbi->raw_super_buf); return err; } got_it: From e1c51b9f1df2f9efc2ec11488717e40cd12015f9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 11 Dec 2015 16:08:22 +0800 Subject: [PATCH 22/87] f2fs: clean up node page updating flow If read_node_page return LOCKED_PAGE, in its caller it's better a) skip unneeded 'Update' flag and mapping info verfication; b) check nid value stored in footer structure of node page. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 7bcbc6e9c40d4..d842b199cd024 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1063,12 +1063,13 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) if (err < 0) { f2fs_put_page(page, 1); return ERR_PTR(err); - } else if (err != LOCKED_PAGE) { - lock_page(page); + } else if (err == LOCKED_PAGE) { + goto page_hit; } - if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) { - ClearPageUptodate(page); + lock_page(page); + + if (unlikely(!PageUptodate(page))) { f2fs_put_page(page, 1); return ERR_PTR(-EIO); } @@ -1076,6 +1077,8 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) f2fs_put_page(page, 1); goto repeat; } +page_hit: + f2fs_bug_on(sbi, nid != nid_of_node(page)); return page; } @@ -1114,24 +1117,25 @@ struct page *get_node_page_ra(struct page *parent, int start) end = start + MAX_RA_NODE; end = min(end, NIDS_PER_BLOCK); for (i = start + 1; i < end; i++) { - nid = get_nid(parent, i, false); - if (!nid) + nid_t tnid = get_nid(parent, i, false); + if (!tnid) continue; - ra_node_page(sbi, nid); + ra_node_page(sbi, tnid); } blk_finish_plug(&plug); lock_page(page); + if (unlikely(!PageUptodate(page))) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } if (unlikely(page->mapping != NODE_MAPPING(sbi))) { f2fs_put_page(page, 1); goto repeat; } page_hit: - if (unlikely(!PageUptodate(page))) { - f2fs_put_page(page, 1); - return ERR_PTR(-EIO); - } + f2fs_bug_on(sbi, nid != nid_of_node(page)); return page; } From d8fe4f0e74cb27e79b2b500ca6eae9f9b02748b4 Mon Sep 17 00:00:00 2001 From: Fan Li Date: Mon, 14 Dec 2015 13:34:00 +0800 Subject: [PATCH 23/87] f2fs: write only the pages in range during defragment @lend of filemap_write_and_wait_range is supposed to be a "offset in bytes where the range ends (inclusive)". Subtract 1 to avoid writing an extra page. Signed-off-by: Fan li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 294e71576cec9..5fac4f2599651 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1686,7 +1686,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, /* writeback all dirty pages in the range */ err = filemap_write_and_wait_range(inode->i_mapping, range->start, - range->start + range->len); + range->start + range->len - 1); if (err) goto out; From e8931582bdaf4d4a6ee7e520ef0ccd481713a42a Mon Sep 17 00:00:00 2001 From: Fan Li Date: Mon, 14 Dec 2015 15:26:04 +0800 Subject: [PATCH 24/87] f2fs: fix to update variable correctly when skip a unmapped block map.m_len should be reduced after skip a block Signed-off-by: Fan li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 5fac4f2599651..9949d0f332c2a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1759,6 +1759,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, if (!(map.m_flags & F2FS_MAP_FLAGS)) { map.m_lblk++; + map.m_len--; continue; } From 9a59b62fd88196844cee5fff851bee2cfd7afb6e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 15 Dec 2015 09:58:18 +0800 Subject: [PATCH 25/87] f2fs: do more integrity verification for superblock Do more sanity check for superblock during ->mount. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index dbf16ade0e9a7..a6f2beda22b9f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -918,6 +918,79 @@ static loff_t max_file_size(unsigned bits) return result; } +static inline bool sanity_check_area_boundary(struct super_block *sb, + struct f2fs_super_block *raw_super) +{ + u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr); + u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr); + u32 sit_blkaddr = le32_to_cpu(raw_super->sit_blkaddr); + u32 nat_blkaddr = le32_to_cpu(raw_super->nat_blkaddr); + u32 ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); + u32 main_blkaddr = le32_to_cpu(raw_super->main_blkaddr); + u32 segment_count_ckpt = le32_to_cpu(raw_super->segment_count_ckpt); + u32 segment_count_sit = le32_to_cpu(raw_super->segment_count_sit); + u32 segment_count_nat = le32_to_cpu(raw_super->segment_count_nat); + u32 segment_count_ssa = le32_to_cpu(raw_super->segment_count_ssa); + u32 segment_count_main = le32_to_cpu(raw_super->segment_count_main); + u32 segment_count = le32_to_cpu(raw_super->segment_count); + u32 log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); + + if (segment0_blkaddr != cp_blkaddr) { + f2fs_msg(sb, KERN_INFO, + "Mismatch start address, segment0(%u) cp_blkaddr(%u)", + segment0_blkaddr, cp_blkaddr); + return true; + } + + if (cp_blkaddr + (segment_count_ckpt << log_blocks_per_seg) != + sit_blkaddr) { + f2fs_msg(sb, KERN_INFO, + "Wrong CP boundary, start(%u) end(%u) blocks(%u)", + cp_blkaddr, sit_blkaddr, + segment_count_ckpt << log_blocks_per_seg); + return true; + } + + if (sit_blkaddr + (segment_count_sit << log_blocks_per_seg) != + nat_blkaddr) { + f2fs_msg(sb, KERN_INFO, + "Wrong SIT boundary, start(%u) end(%u) blocks(%u)", + sit_blkaddr, nat_blkaddr, + segment_count_sit << log_blocks_per_seg); + return true; + } + + if (nat_blkaddr + (segment_count_nat << log_blocks_per_seg) != + ssa_blkaddr) { + f2fs_msg(sb, KERN_INFO, + "Wrong NAT boundary, start(%u) end(%u) blocks(%u)", + nat_blkaddr, ssa_blkaddr, + segment_count_nat << log_blocks_per_seg); + return true; + } + + if (ssa_blkaddr + (segment_count_ssa << log_blocks_per_seg) != + main_blkaddr) { + f2fs_msg(sb, KERN_INFO, + "Wrong SSA boundary, start(%u) end(%u) blocks(%u)", + ssa_blkaddr, main_blkaddr, + segment_count_ssa << log_blocks_per_seg); + return true; + } + + if (main_blkaddr + (segment_count_main << log_blocks_per_seg) != + segment0_blkaddr + (segment_count << log_blocks_per_seg)) { + f2fs_msg(sb, KERN_INFO, + "Wrong MAIN_AREA boundary, start(%u) end(%u) blocks(%u)", + main_blkaddr, + segment0_blkaddr + (segment_count << log_blocks_per_seg), + segment_count_main << log_blocks_per_seg); + return true; + } + + return false; +} + static int sanity_check_raw_super(struct super_block *sb, struct f2fs_super_block *raw_super) { @@ -947,6 +1020,14 @@ static int sanity_check_raw_super(struct super_block *sb, return 1; } + /* check log blocks per segment */ + if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) { + f2fs_msg(sb, KERN_INFO, + "Invalid log blocks per segment (%u)\n", + le32_to_cpu(raw_super->log_blocks_per_seg)); + return 1; + } + /* Currently, support 512/1024/2048/4096 bytes sector size */ if (le32_to_cpu(raw_super->log_sectorsize) > F2FS_MAX_LOG_SECTOR_SIZE || @@ -965,6 +1046,23 @@ static int sanity_check_raw_super(struct super_block *sb, le32_to_cpu(raw_super->log_sectorsize)); return 1; } + + /* check reserved ino info */ + if (le32_to_cpu(raw_super->node_ino) != 1 || + le32_to_cpu(raw_super->meta_ino) != 2 || + le32_to_cpu(raw_super->root_ino) != 3) { + f2fs_msg(sb, KERN_INFO, + "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)", + le32_to_cpu(raw_super->node_ino), + le32_to_cpu(raw_super->meta_ino), + le32_to_cpu(raw_super->root_ino)); + return 1; + } + + /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */ + if (sanity_check_area_boundary(sb, raw_super)) + return 1; + return 0; } From a11fac3776fc4db6a9fa1104b1d0477809c677e0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 14 Dec 2015 18:58:42 -0800 Subject: [PATCH 26/87] f2fs: add symbol to avoid any confusion with tools This patch adds MAX_VOLUME_NAME to sync with f2fs-tools. Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 25c6324a0dd04..e59c3be921069 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -51,6 +51,7 @@ #define MAX_ACTIVE_DATA_LOGS 8 #define VERSION_LEN 256 +#define MAX_VOLUME_NAME 512 /* * For superblock @@ -84,7 +85,7 @@ struct f2fs_super_block { __le32 node_ino; /* node inode number */ __le32 meta_ino; /* meta inode number */ __u8 uuid[16]; /* 128-bit uuid for volume */ - __le16 volume_name[512]; /* volume name */ + __le16 volume_name[MAX_VOLUME_NAME]; /* volume name */ __le32 extension_count; /* # of extensions below */ __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */ __le32 cp_payload; From a49324f127dec918f5a3b3f145d0bf2fb81f4588 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 15 Dec 2015 13:29:47 +0800 Subject: [PATCH 27/87] f2fs: rename {add,remove,release}_dirty_inode to {add,remove,release}_ino_entry remove_dirty_dir_inode will be renamed to remove_dirty_inode as a generic function in following patch for removing directory/regular/symlink inode in global dirty list. Here rename ino management related functions for readability, also in order to avoid name conflict. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 8 ++++---- fs/f2fs/f2fs.h | 6 +++--- fs/f2fs/file.c | 4 ++-- fs/f2fs/inode.c | 4 ++-- fs/f2fs/super.c | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f661d80474be7..b839f5f3385c4 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -410,13 +410,13 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) spin_unlock(&im->ino_lock); } -void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type) +void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) { /* add new dirty ino entry into list */ __add_ino_entry(sbi, ino, type); } -void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type) +void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) { /* remove dirty ino entry from list */ __remove_ino_entry(sbi, ino, type); @@ -434,7 +434,7 @@ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) return e ? true : false; } -void release_dirty_inode(struct f2fs_sb_info *sbi) +void release_ino_entry(struct f2fs_sb_info *sbi) { struct ino_entry *e, *tmp; int i; @@ -1081,7 +1081,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) invalidate_mapping_pages(META_MAPPING(sbi), discard_blk, discard_blk); - release_dirty_inode(sbi); + release_ino_entry(sbi); if (unlikely(f2fs_cp_error(sbi))) return; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0052ae8bea3f6..ee8bcbf34f449 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1820,9 +1820,9 @@ bool is_valid_blkaddr(struct f2fs_sb_info *, block_t, int); int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int, bool); void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t); long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); -void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type); -void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type); -void release_dirty_inode(struct f2fs_sb_info *); +void add_ino_entry(struct f2fs_sb_info *, nid_t, int type); +void remove_ino_entry(struct f2fs_sb_info *, nid_t, int type); +void release_ino_entry(struct f2fs_sb_info *); bool exist_written_data(struct f2fs_sb_info *, nid_t, int); int acquire_orphan_inode(struct f2fs_sb_info *); void release_orphan_inode(struct f2fs_sb_info *); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 9949d0f332c2a..c6d909e9661e9 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -275,10 +275,10 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) goto out; /* once recovery info is written, don't need to tack this */ - remove_dirty_inode(sbi, ino, APPEND_INO); + remove_ino_entry(sbi, ino, APPEND_INO); clear_inode_flag(fi, FI_APPEND_WRITE); flush_out: - remove_dirty_inode(sbi, ino, UPDATE_INO); + remove_ino_entry(sbi, ino, UPDATE_INO); clear_inode_flag(fi, FI_UPDATE_WRITE); ret = f2fs_issue_flush(sbi); out: diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 97e20decacb4e..3d2fe595d0782 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -357,9 +357,9 @@ void f2fs_evict_inode(struct inode *inode) if (xnid) invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); if (is_inode_flag_set(fi, FI_APPEND_WRITE)) - add_dirty_inode(sbi, inode->i_ino, APPEND_INO); + add_ino_entry(sbi, inode->i_ino, APPEND_INO); if (is_inode_flag_set(fi, FI_UPDATE_WRITE)) - add_dirty_inode(sbi, inode->i_ino, UPDATE_INO); + add_ino_entry(sbi, inode->i_ino, UPDATE_INO); if (is_inode_flag_set(fi, FI_FREE_NID)) { if (err && err != -ENOENT) alloc_nid_done(sbi, inode->i_ino); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a6f2beda22b9f..9ee7144c53673 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -548,7 +548,7 @@ static void f2fs_put_super(struct super_block *sb) * normally superblock is clean, so we need to release this. * In addition, EIO will skip do checkpoint, we need this as well. */ - release_dirty_inode(sbi); + release_ino_entry(sbi); release_discard_addrs(sbi); f2fs_leave_shrinker(sbi); From 2710fd7e00b4f77dbe807efaf546bed00b62e65e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 15 Dec 2015 13:30:45 +0800 Subject: [PATCH 28/87] f2fs: introduce dirty list node in inode info Add a new dirt list node member in inode info for linking the inode to global dirty list in superblock, instead of old implementation which allocate slab cache memory as an entry to inode. It avoids memory pressure due to slab cache allocation, and also makes codes more clean. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 54 ++++++++++++++------------------------------ fs/f2fs/debug.c | 1 - fs/f2fs/f2fs.h | 10 ++------ fs/f2fs/super.c | 1 + 4 files changed, 20 insertions(+), 46 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index b839f5f3385c4..1aca402cab9ce 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -722,25 +722,23 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) return -EINVAL; } -static int __add_dirty_inode(struct inode *inode, struct inode_entry *new) +static void __add_dirty_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); - if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) - return -EEXIST; + if (is_inode_flag_set(fi, FI_DIRTY_DIR)) + return; - set_inode_flag(F2FS_I(inode), FI_DIRTY_DIR); - F2FS_I(inode)->dirty_dir = new; - list_add_tail(&new->list, &sbi->dir_inode_list); + set_inode_flag(fi, FI_DIRTY_DIR); + list_add_tail(&fi->dirty_list, &sbi->dir_inode_list); stat_inc_dirty_dir(sbi); - return 0; + return; } void update_dirty_page(struct inode *inode, struct page *page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct inode_entry *new; - int ret = 0; if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode)) @@ -751,17 +749,11 @@ void update_dirty_page(struct inode *inode, struct page *page) goto out; } - new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); - new->inode = inode; - INIT_LIST_HEAD(&new->list); - spin_lock(&sbi->dir_inode_lock); - ret = __add_dirty_inode(inode, new); + __add_dirty_inode(inode); inode_inc_dirty_pages(inode); spin_unlock(&sbi->dir_inode_lock); - if (ret) - kmem_cache_free(inode_entry_slab, new); out: SetPagePrivate(page); f2fs_trace_pid(page); @@ -770,25 +762,16 @@ void update_dirty_page(struct inode *inode, struct page *page) void add_dirty_dir_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct inode_entry *new = - f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); - int ret = 0; - - new->inode = inode; - INIT_LIST_HEAD(&new->list); spin_lock(&sbi->dir_inode_lock); - ret = __add_dirty_inode(inode, new); + __add_dirty_inode(inode); spin_unlock(&sbi->dir_inode_lock); - - if (ret) - kmem_cache_free(inode_entry_slab, new); } void remove_dirty_dir_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct inode_entry *entry; + struct f2fs_inode_info *fi = F2FS_I(inode); if (!S_ISDIR(inode->i_mode)) return; @@ -800,17 +783,14 @@ void remove_dirty_dir_inode(struct inode *inode) return; } - entry = F2FS_I(inode)->dirty_dir; - list_del(&entry->list); - F2FS_I(inode)->dirty_dir = NULL; - clear_inode_flag(F2FS_I(inode), FI_DIRTY_DIR); + list_del_init(&fi->dirty_list); + clear_inode_flag(fi, FI_DIRTY_DIR); stat_dec_dirty_dir(sbi); spin_unlock(&sbi->dir_inode_lock); - kmem_cache_free(inode_entry_slab, entry); /* Only from the recovery routine */ - if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) { - clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT); + if (is_inode_flag_set(fi, FI_DELAY_IPUT)) { + clear_inode_flag(fi, FI_DELAY_IPUT); iput(inode); } } @@ -818,8 +798,8 @@ void remove_dirty_dir_inode(struct inode *inode) void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) { struct list_head *head; - struct inode_entry *entry; struct inode *inode; + struct f2fs_inode_info *fi; retry: if (unlikely(f2fs_cp_error(sbi))) return; @@ -831,8 +811,8 @@ void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) spin_unlock(&sbi->dir_inode_lock); return; } - entry = list_entry(head->next, struct inode_entry, list); - inode = igrab(entry->inode); + fi = list_entry(head->next, struct f2fs_inode_info, dirty_list); + inode = igrab(&fi->vfs_inode); spin_unlock(&sbi->dir_inode_lock); if (inode) { filemap_fdatawrite(inode->i_mapping); diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 8ce2fe3f65ab5..f4a7b9e9416d3 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -189,7 +189,6 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->cache_mem += NM_I(sbi)->dirty_nat_cnt * sizeof(struct nat_entry_set); si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages); - si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry); for (i = 0; i <= UPDATE_INO; i++) si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); si->cache_mem += sbi->total_ext_tree * sizeof(struct extent_tree); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ee8bcbf34f449..21048edb72cb3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -158,13 +158,7 @@ struct ino_entry { nid_t ino; /* inode number */ }; -/* - * for the list of directory inodes or gc inodes. - * NOTE: there are two slab users for this structure, if we add/modify/delete - * fields in structure for one of slab users, it may affect fields or size of - * other one, in this condition, it's better to split both of slab and related - * data structure. - */ +/* for the list of inodes to be GCed */ struct inode_entry { struct list_head list; /* list head */ struct inode *inode; /* vfs inode pointer */ @@ -441,8 +435,8 @@ struct f2fs_inode_info { unsigned int clevel; /* maximum level of given file name */ nid_t i_xattr_nid; /* node id that contains xattrs */ unsigned long long xattr_ver; /* cp version of xattr modification */ - struct inode_entry *dirty_dir; /* the pointer of dirty dir */ + struct list_head dirty_list; /* linked in global dirty list */ struct list_head inmem_pages; /* inmemory pages managed by f2fs */ struct mutex inmem_lock; /* lock for inmemory pages */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9ee7144c53673..4031f8ed9d240 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -432,6 +432,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) fi->i_current_depth = 1; fi->i_advise = 0; init_rwsem(&fi->i_sem); + INIT_LIST_HEAD(&fi->dirty_list); INIT_LIST_HEAD(&fi->inmem_pages); mutex_init(&fi->inmem_lock); From 6ad7609a183a250f1a346c7edfcbeaa30a29cfcc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 15 Dec 2015 13:31:40 +0800 Subject: [PATCH 29/87] f2fs: introduce __remove_dirty_inode Introduce __remove_dirty_inode to clean up codes in remove_dirty_dir_inode. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 1aca402cab9ce..a4392f06f7339 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -736,6 +736,20 @@ static void __add_dirty_inode(struct inode *inode) return; } +static void __remove_dirty_inode(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); + + if (get_dirty_pages(inode) || + !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) + return; + + list_del_init(&fi->dirty_list); + clear_inode_flag(fi, FI_DIRTY_DIR); + stat_dec_dirty_dir(sbi); +} + void update_dirty_page(struct inode *inode, struct page *page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -777,15 +791,7 @@ void remove_dirty_dir_inode(struct inode *inode) return; spin_lock(&sbi->dir_inode_lock); - if (get_dirty_pages(inode) || - !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) { - spin_unlock(&sbi->dir_inode_lock); - return; - } - - list_del_init(&fi->dirty_list); - clear_inode_flag(fi, FI_DIRTY_DIR); - stat_dec_dirty_dir(sbi); + __remove_dirty_inode(inode); spin_unlock(&sbi->dir_inode_lock); /* Only from the recovery routine */ From a1c1e9b74ff380176bf3862c764061e0a7efd9bb Mon Sep 17 00:00:00 2001 From: Fan Li Date: Tue, 15 Dec 2015 17:02:41 +0800 Subject: [PATCH 30/87] f2fs: fix to reset variable correctlly f2fs_map_blocks will set m_flags and m_len to 0, so we don't need to reset m_flags ourselves, but have to reset m_len to correct value before use it again. Signed-off-by: Fan li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c6d909e9661e9..1f5892f380189 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1700,7 +1700,6 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, } map.m_lblk = pg_start; - map.m_len = pg_end - pg_start; /* * lookup mapping info in dnode page cache, skip defragmenting if all @@ -1708,14 +1707,13 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, * in logical blocks. */ while (map.m_lblk < pg_end) { - map.m_flags = 0; + map.m_len = pg_end - map.m_lblk; err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ); if (err) goto out; if (!(map.m_flags & F2FS_MAP_FLAGS)) { map.m_lblk++; - map.m_len--; continue; } @@ -1726,7 +1724,6 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, blk_end = map.m_pblk + map.m_len; map.m_lblk += map.m_len; - map.m_len = pg_end - map.m_lblk; } if (!fragmented) @@ -1752,14 +1749,13 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, int cnt = 0; do_map: - map.m_flags = 0; + map.m_len = pg_end - map.m_lblk; err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ); if (err) goto clear_out; if (!(map.m_flags & F2FS_MAP_FLAGS)) { map.m_lblk++; - map.m_len--; continue; } @@ -1784,7 +1780,6 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, } map.m_lblk = idx; - map.m_len = pg_end - idx; if (idx < pg_end && cnt < blk_per_seg) goto do_map; From b39f0de23d8f22253d441b3b68414e9a1d027cf6 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Tue, 15 Dec 2015 17:17:20 +0800 Subject: [PATCH 31/87] f2fs: backup raw_super in sbi f2fs use fields of f2fs_super_block struct directly in a grabbed buffer. Once the buffer happen to be destroyed (e.g. through dd), it may bring in unpredictable effect on f2fs. This patch fixes to allocate additional buffer to store datas of super block rather than using grabbed block buffer directly. Signed-off-by: Yunlei He Signed-off-by: Jaegeuk Kim Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4031f8ed9d240..6dfe0d32ad885 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -568,6 +568,7 @@ static void f2fs_put_super(struct super_block *sb) sb->s_fs_info = NULL; brelse(sbi->raw_super_buf); + kfree(sbi->raw_super); kfree(sbi); } @@ -1139,6 +1140,9 @@ static int read_raw_super_block(struct super_block *sb, struct f2fs_super_block *super; int err = 0; + super = kzalloc(sizeof(struct f2fs_super_block), GFP_KERNEL); + if (!super) + return -ENOMEM; retry: buffer = sb_bread(sb, block); if (!buffer) { @@ -1154,8 +1158,7 @@ static int read_raw_super_block(struct super_block *sb, } } - super = (struct f2fs_super_block *) - ((char *)(buffer)->b_data + F2FS_SUPER_OFFSET); + memcpy(super, buffer->b_data + F2FS_SUPER_OFFSET, sizeof(*super)); /* sanity checking of raw super */ if (sanity_check_raw_super(sb, super)) { @@ -1189,14 +1192,17 @@ static int read_raw_super_block(struct super_block *sb, out: /* No valid superblock */ - if (!*raw_super) + if (!*raw_super) { + kfree(super); return err; + } return 0; } int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) { + struct f2fs_super_block *super = F2FS_RAW_SUPER(sbi); struct buffer_head *sbh = sbi->raw_super_buf; struct buffer_head *bh; int err; @@ -1207,7 +1213,7 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) return -EIO; lock_buffer(bh); - memcpy(bh->b_data, sbh->b_data, sbh->b_size); + memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super)); WARN_ON(sbh->b_size != F2FS_BLKSIZE); set_buffer_uptodate(bh); set_buffer_dirty(bh); @@ -1223,6 +1229,10 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) /* write current valid superblock */ lock_buffer(sbh); + if (memcmp(sbh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super))) { + f2fs_msg(sbi->sb, KERN_INFO, "Write modified valid superblock"); + memcpy(sbh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super)); + } set_buffer_dirty(sbh); unlock_buffer(sbh); @@ -1497,6 +1507,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) kfree(options); free_sb_buf: brelse(raw_super_buf); + kfree(raw_super); free_sbi: kfree(sbi); From e8240f656d4d5d718ce8cf6b4ea266d6719ef547 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 15 Dec 2015 17:19:26 +0800 Subject: [PATCH 32/87] f2fs: don't grab super block buffer header all the time We have already got one copy of valid super block in memory, do not grab buffer header of super block all the time. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 8 ++--- fs/f2fs/super.c | 81 +++++++++++++++++++++---------------------------- 3 files changed, 38 insertions(+), 53 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 21048edb72cb3..37cf04b3ff379 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -715,8 +715,8 @@ enum { struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ - struct buffer_head *raw_super_buf; /* buffer head of raw sb */ struct f2fs_super_block *raw_super; /* raw super block pointer */ + int valid_super_block; /* valid super block no */ int s_flag; /* flags for sbi */ /* for node-related operations */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1f5892f380189..7f8ca47be0afd 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1591,20 +1591,16 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) return err; /* update superblock with uuid */ - lock_buffer(sbi->raw_super_buf); generate_random_uuid(sbi->raw_super->encrypt_pw_salt); - unlock_buffer(sbi->raw_super_buf); err = f2fs_commit_super(sbi, false); - - mnt_drop_write_file(filp); if (err) { /* undo new data */ - lock_buffer(sbi->raw_super_buf); memset(sbi->raw_super->encrypt_pw_salt, 0, 16); - unlock_buffer(sbi->raw_super_buf); + mnt_drop_write_file(filp); return err; } + mnt_drop_write_file(filp); got_it: if (copy_to_user((__u8 __user *)arg, sbi->raw_super->encrypt_pw_salt, 16)) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 6dfe0d32ad885..8c8b4673a5db3 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -567,7 +567,6 @@ static void f2fs_put_super(struct super_block *sb) wait_for_completion(&sbi->s_kobj_unregister); sb->s_fs_info = NULL; - brelse(sbi->raw_super_buf); kfree(sbi->raw_super); kfree(sbi); } @@ -1132,65 +1131,53 @@ static void init_sb_info(struct f2fs_sb_info *sbi) */ static int read_raw_super_block(struct super_block *sb, struct f2fs_super_block **raw_super, - struct buffer_head **raw_super_buf, - int *recovery) + int *valid_super_block, int *recovery) { int block = 0; - struct buffer_head *buffer; - struct f2fs_super_block *super; + struct buffer_head *bh; + struct f2fs_super_block *super, *buf; int err = 0; super = kzalloc(sizeof(struct f2fs_super_block), GFP_KERNEL); if (!super) return -ENOMEM; retry: - buffer = sb_bread(sb, block); - if (!buffer) { + bh = sb_bread(sb, block); + if (!bh) { *recovery = 1; f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock", block + 1); - if (block == 0) { - block++; - goto retry; - } else { - err = -EIO; - goto out; - } + err = -EIO; + goto next; } - memcpy(super, buffer->b_data + F2FS_SUPER_OFFSET, sizeof(*super)); + buf = (struct f2fs_super_block *)(bh->b_data + F2FS_SUPER_OFFSET); /* sanity checking of raw super */ - if (sanity_check_raw_super(sb, super)) { - brelse(buffer); + if (sanity_check_raw_super(sb, buf)) { + brelse(bh); *recovery = 1; f2fs_msg(sb, KERN_ERR, "Can't find valid F2FS filesystem in %dth superblock", block + 1); - if (block == 0) { - block++; - goto retry; - } else { - err = -EINVAL; - goto out; - } + err = -EINVAL; + goto next; } if (!*raw_super) { - *raw_super_buf = buffer; + memcpy(super, buf, sizeof(*super)); + *valid_super_block = block; *raw_super = super; - } else { - /* already have a valid superblock */ - brelse(buffer); } + brelse(bh); +next: /* check the validity of the second superblock */ if (block == 0) { block++; goto retry; } -out: /* No valid superblock */ if (!*raw_super) { kfree(super); @@ -1203,18 +1190,16 @@ static int read_raw_super_block(struct super_block *sb, int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) { struct f2fs_super_block *super = F2FS_RAW_SUPER(sbi); - struct buffer_head *sbh = sbi->raw_super_buf; struct buffer_head *bh; int err; /* write back-up superblock first */ - bh = sb_getblk(sbi->sb, sbh->b_blocknr ? 0 : 1); + bh = sb_getblk(sbi->sb, sbi->valid_super_block ? 0 : 1); if (!bh) return -EIO; lock_buffer(bh); memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super)); - WARN_ON(sbh->b_size != F2FS_BLKSIZE); set_buffer_uptodate(bh); set_buffer_dirty(bh); unlock_buffer(bh); @@ -1227,33 +1212,37 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) if (recover || err) return err; + bh = sb_getblk(sbi->sb, sbi->valid_super_block); + if (!bh) + return -EIO; + /* write current valid superblock */ - lock_buffer(sbh); - if (memcmp(sbh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super))) { - f2fs_msg(sbi->sb, KERN_INFO, "Write modified valid superblock"); - memcpy(sbh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super)); - } - set_buffer_dirty(sbh); - unlock_buffer(sbh); + lock_buffer(bh); + memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super)); + set_buffer_uptodate(bh); + set_buffer_dirty(bh); + unlock_buffer(bh); - return __sync_dirty_buffer(sbh, WRITE_FLUSH_FUA); + err = __sync_dirty_buffer(bh, WRITE_FLUSH_FUA); + brelse(bh); + + return err; } static int f2fs_fill_super(struct super_block *sb, void *data, int silent) { struct f2fs_sb_info *sbi; struct f2fs_super_block *raw_super; - struct buffer_head *raw_super_buf; struct inode *root; long err; bool retry = true, need_fsck = false; char *options = NULL; - int recovery, i; + int recovery, i, valid_super_block; try_onemore: err = -EINVAL; raw_super = NULL; - raw_super_buf = NULL; + valid_super_block = -1; recovery = 0; /* allocate memory for f2fs-specific super block info */ @@ -1267,7 +1256,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) goto free_sbi; } - err = read_raw_super_block(sb, &raw_super, &raw_super_buf, &recovery); + err = read_raw_super_block(sb, &raw_super, &valid_super_block, + &recovery); if (err) goto free_sbi; @@ -1300,7 +1290,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) /* init f2fs-specific super block info */ sbi->sb = sb; sbi->raw_super = raw_super; - sbi->raw_super_buf = raw_super_buf; + sbi->valid_super_block = valid_super_block; mutex_init(&sbi->gc_mutex); mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); @@ -1506,7 +1496,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) free_options: kfree(options); free_sb_buf: - brelse(raw_super_buf); kfree(raw_super); free_sbi: kfree(sbi); From 55d1cdb25a815ba92a917ae579c27cc3ffb9a57d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 15 Dec 2015 16:07:14 -0800 Subject: [PATCH 33/87] f2fs: relocate tracepoint of write_checkpoint It needs to relocate its location to see exact trace logs. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index a4392f06f7339..5008b872f3165 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1130,9 +1130,9 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* do checkpoint periodically */ sbi->cp_expires = round_jiffies_up(jiffies + HZ * sbi->cp_interval); + trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); out: mutex_unlock(&sbi->cp_mutex); - trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); } void init_ino_entry_info(struct f2fs_sb_info *sbi) From b3980910f746d885111db7252f664600de2a5ea3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 16 Dec 2015 13:19:35 +0800 Subject: [PATCH 34/87] f2fs: introduce __f2fs_commit_super Introduce __f2fs_commit_super to include duplicated codes in f2fs_commit_super for cleanup. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8c8b4673a5db3..bc1a8cd38bc8c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1187,14 +1187,13 @@ static int read_raw_super_block(struct super_block *sb, return 0; } -int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) +int __f2fs_commit_super(struct f2fs_sb_info *sbi, int block) { struct f2fs_super_block *super = F2FS_RAW_SUPER(sbi); struct buffer_head *bh; int err; - /* write back-up superblock first */ - bh = sb_getblk(sbi->sb, sbi->valid_super_block ? 0 : 1); + bh = sb_getblk(sbi->sb, block); if (!bh) return -EIO; @@ -1208,25 +1207,22 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) err = __sync_dirty_buffer(bh, WRITE_FLUSH_FUA); brelse(bh); + return err; +} + +int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) +{ + int err; + + /* write back-up superblock first */ + err = __f2fs_commit_super(sbi, sbi->valid_super_block ? 0 : 1); + /* if we are in recovery path, skip writing valid superblock */ if (recover || err) return err; - bh = sb_getblk(sbi->sb, sbi->valid_super_block); - if (!bh) - return -EIO; - /* write current valid superblock */ - lock_buffer(bh); - memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super)); - set_buffer_uptodate(bh); - set_buffer_dirty(bh); - unlock_buffer(bh); - - err = __sync_dirty_buffer(bh, WRITE_FLUSH_FUA); - brelse(bh); - - return err; + return __f2fs_commit_super(sbi, sbi->valid_super_block); } static int f2fs_fill_super(struct super_block *sb, void *data, int silent) From c227f912732f204c0ec4a577ba812401ac4672af Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 16 Dec 2015 13:09:20 +0800 Subject: [PATCH 35/87] f2fs: record dirty status of regular/symlink inode Maintain regular/symlink inode which has dirty pages in global dirty list and record their total dirty pages count like the way of handling directory inode. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 66 ++++++++++++++++++++++---------------------- fs/f2fs/data.c | 4 +-- fs/f2fs/dir.c | 2 +- fs/f2fs/f2fs.h | 27 +++++++++++------- fs/f2fs/inode.c | 2 +- fs/f2fs/super.c | 6 ++-- 6 files changed, 58 insertions(+), 49 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 5008b872f3165..a037bbd89dc64 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -722,53 +722,51 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) return -EINVAL; } -static void __add_dirty_inode(struct inode *inode) +static void __add_dirty_inode(struct inode *inode, enum inode_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); + int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE; - if (is_inode_flag_set(fi, FI_DIRTY_DIR)) + if (is_inode_flag_set(fi, flag)) return; - set_inode_flag(fi, FI_DIRTY_DIR); - list_add_tail(&fi->dirty_list, &sbi->dir_inode_list); - stat_inc_dirty_dir(sbi); - return; + set_inode_flag(fi, flag); + list_add_tail(&fi->dirty_list, &sbi->inode_list[type]); + if (type == DIR_INODE) + stat_inc_dirty_dir(sbi); } -static void __remove_dirty_inode(struct inode *inode) +static void __remove_dirty_inode(struct inode *inode, enum inode_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); + int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE; if (get_dirty_pages(inode) || - !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) + !is_inode_flag_set(F2FS_I(inode), flag)) return; list_del_init(&fi->dirty_list); - clear_inode_flag(fi, FI_DIRTY_DIR); - stat_dec_dirty_dir(sbi); + clear_inode_flag(fi, flag); + if (type == DIR_INODE) + stat_dec_dirty_dir(sbi); } void update_dirty_page(struct inode *inode, struct page *page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE; if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode)) return; - if (!S_ISDIR(inode->i_mode)) { - inode_inc_dirty_pages(inode); - goto out; - } - - spin_lock(&sbi->dir_inode_lock); - __add_dirty_inode(inode); + spin_lock(&sbi->inode_lock[type]); + __add_dirty_inode(inode, type); inode_inc_dirty_pages(inode); - spin_unlock(&sbi->dir_inode_lock); + spin_unlock(&sbi->inode_lock[type]); -out: SetPagePrivate(page); f2fs_trace_pid(page); } @@ -777,22 +775,24 @@ void add_dirty_dir_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - spin_lock(&sbi->dir_inode_lock); - __add_dirty_inode(inode); - spin_unlock(&sbi->dir_inode_lock); + spin_lock(&sbi->inode_lock[DIR_INODE]); + __add_dirty_inode(inode, DIR_INODE); + spin_unlock(&sbi->inode_lock[DIR_INODE]); } -void remove_dirty_dir_inode(struct inode *inode) +void remove_dirty_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); + enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE; - if (!S_ISDIR(inode->i_mode)) + if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) && + !S_ISLNK(inode->i_mode)) return; - spin_lock(&sbi->dir_inode_lock); - __remove_dirty_inode(inode); - spin_unlock(&sbi->dir_inode_lock); + spin_lock(&sbi->inode_lock[type]); + __remove_dirty_inode(inode, type); + spin_unlock(&sbi->inode_lock[type]); /* Only from the recovery routine */ if (is_inode_flag_set(fi, FI_DELAY_IPUT)) { @@ -801,7 +801,7 @@ void remove_dirty_dir_inode(struct inode *inode) } } -void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) +void sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) { struct list_head *head; struct inode *inode; @@ -810,16 +810,16 @@ void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) if (unlikely(f2fs_cp_error(sbi))) return; - spin_lock(&sbi->dir_inode_lock); + spin_lock(&sbi->inode_lock[type]); - head = &sbi->dir_inode_list; + head = &sbi->inode_list[type]; if (list_empty(head)) { - spin_unlock(&sbi->dir_inode_lock); + spin_unlock(&sbi->inode_lock[type]); return; } fi = list_entry(head->next, struct f2fs_inode_info, dirty_list); inode = igrab(&fi->vfs_inode); - spin_unlock(&sbi->dir_inode_lock); + spin_unlock(&sbi->inode_lock[type]); if (inode) { filemap_fdatawrite(inode->i_mapping); iput(inode); @@ -854,7 +854,7 @@ static int block_operations(struct f2fs_sb_info *sbi) /* write all the dirty dentry pages */ if (get_pages(sbi, F2FS_DIRTY_DENTS)) { f2fs_unlock_all(sbi); - sync_dirty_dir_inodes(sbi); + sync_dirty_inodes(sbi, DIR_INODE); if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; goto out; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 90a2ffea875bf..292a06cbea072 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1180,7 +1180,7 @@ static int f2fs_write_data_page(struct page *page, f2fs_balance_fs(sbi); if (wbc->for_reclaim) { f2fs_submit_merged_bio(sbi, DATA, WRITE); - remove_dirty_dir_inode(inode); + remove_dirty_inode(inode); } return 0; @@ -1372,7 +1372,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, if (locked) mutex_unlock(&sbi->writepages); - remove_dirty_dir_inode(inode); + remove_dirty_inode(inode); wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); return ret; diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 6554fd5fce88f..3da58265c0d40 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -444,7 +444,7 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, /* once the failed inode becomes a bad inode, i_mode is S_IFREG */ truncate_inode_pages(&inode->i_data, 0); truncate_blocks(inode, 0, false); - remove_dirty_dir_inode(inode); + remove_dirty_inode(inode); remove_inode_page(inode); return ERR_PTR(err); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 37cf04b3ff379..03a2b86a28ba4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -648,6 +648,7 @@ struct f2fs_sm_info { enum count_type { F2FS_WRITEBACK, F2FS_DIRTY_DENTS, + F2FS_DIRTY_DATA, F2FS_DIRTY_NODES, F2FS_DIRTY_META, F2FS_INMEM_PAGES, @@ -696,6 +697,12 @@ struct f2fs_bio_info { struct rw_semaphore io_rwsem; /* blocking op for bio */ }; +enum inode_type { + DIR_INODE, /* for dirty dir inode */ + FILE_INODE, /* for dirty regular/symlink inode */ + NR_INODE_TYPE, +}; + /* for inner inode cache management */ struct inode_management { struct radix_tree_root ino_root; /* ino entry array */ @@ -745,9 +752,9 @@ struct f2fs_sb_info { /* for orphan inode, use 0'th array */ unsigned int max_orphans; /* max orphan inodes */ - /* for directory inode management */ - struct list_head dir_inode_list; /* dir inode list */ - spinlock_t dir_inode_lock; /* for dir inode list lock */ + /* for inode management */ + struct list_head inode_list[NR_INODE_TYPE]; /* dirty inode list */ + spinlock_t inode_lock[NR_INODE_TYPE]; /* for dirty inode list lock */ /* for extent tree cache */ struct radix_tree_root extent_tree_root;/* cache extent cache entries */ @@ -1060,8 +1067,8 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) static inline void inode_inc_dirty_pages(struct inode *inode) { atomic_inc(&F2FS_I(inode)->dirty_pages); - if (S_ISDIR(inode->i_mode)) - inc_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS); + inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ? + F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA); } static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) @@ -1076,9 +1083,8 @@ static inline void inode_dec_dirty_pages(struct inode *inode) return; atomic_dec(&F2FS_I(inode)->dirty_pages); - - if (S_ISDIR(inode->i_mode)) - dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS); + dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ? + F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA); } static inline int get_pages(struct f2fs_sb_info *sbi, int count_type) @@ -1417,6 +1423,7 @@ enum { FI_DATA_EXIST, /* indicate data exists */ FI_INLINE_DOTS, /* indicate inline dot dentries */ FI_DO_DEFRAG, /* indicate defragment is running */ + FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ }; static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) @@ -1826,8 +1833,8 @@ int recover_orphan_inodes(struct f2fs_sb_info *); int get_valid_checkpoint(struct f2fs_sb_info *); void update_dirty_page(struct inode *, struct page *); void add_dirty_dir_inode(struct inode *); -void remove_dirty_dir_inode(struct inode *); -void sync_dirty_dir_inodes(struct f2fs_sb_info *); +void remove_dirty_inode(struct inode *); +void sync_dirty_inodes(struct f2fs_sb_info *, enum inode_type); void write_checkpoint(struct f2fs_sb_info *, struct cp_control *); void init_ino_entry_info(struct f2fs_sb_info *); int __init create_checkpoint_caches(void); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 3d2fe595d0782..ec3fb32c47262 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -327,7 +327,7 @@ void f2fs_evict_inode(struct inode *inode) goto out_clear; f2fs_bug_on(sbi, get_dirty_pages(inode)); - remove_dirty_dir_inode(inode); + remove_dirty_inode(inode); f2fs_destroy_extent_tree(inode); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index bc1a8cd38bc8c..5b596d6a8d248 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1339,8 +1339,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) le64_to_cpu(sbi->ckpt->valid_block_count); sbi->last_valid_block_count = sbi->total_valid_block_count; sbi->alloc_valid_block_count = 0; - INIT_LIST_HEAD(&sbi->dir_inode_list); - spin_lock_init(&sbi->dir_inode_lock); + for (i = 0; i < NR_INODE_TYPE; i++) { + INIT_LIST_HEAD(&sbi->inode_list[i]); + spin_lock_init(&sbi->inode_lock[i]); + } init_extent_cache_info(sbi); From 343f40f0a70eb7cee9cc8d6fcfbb3917252a5245 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 16 Dec 2015 13:12:16 +0800 Subject: [PATCH 36/87] f2fs: introduce new option for controlling data flush Add a new option 'data_flush' to enable data flush functionality. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 2 ++ fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 7 +++++++ 3 files changed, 10 insertions(+) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index ad10494aa224f..e1c9f0849da6d 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -149,6 +149,8 @@ noextent_cache Disable an extent cache based on rb-tree explicitly, see the above extent_cache mount option. noinline_data Disable the inline data feature, inline data feature is enabled by default. +data_flush Enable data flushing before checkpoint in order to + persist data of regular and symlink. ================================================================================ DEBUGFS ENTRIES diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 03a2b86a28ba4..b1fb8f73fe427 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -54,6 +54,7 @@ #define F2FS_MOUNT_FASTBOOT 0x00001000 #define F2FS_MOUNT_EXTENT_CACHE 0x00002000 #define F2FS_MOUNT_FORCE_FG_GC 0x00004000 +#define F2FS_MOUNT_DATA_FLUSH 0x00008000 #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 5b596d6a8d248..c3070c149c0ec 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -67,6 +67,7 @@ enum { Opt_extent_cache, Opt_noextent_cache, Opt_noinline_data, + Opt_data_flush, Opt_err, }; @@ -91,6 +92,7 @@ static match_table_t f2fs_tokens = { {Opt_extent_cache, "extent_cache"}, {Opt_noextent_cache, "noextent_cache"}, {Opt_noinline_data, "noinline_data"}, + {Opt_data_flush, "data_flush"}, {Opt_err, NULL}, }; @@ -406,6 +408,9 @@ static int parse_options(struct super_block *sb, char *options) case Opt_noinline_data: clear_opt(sbi, INLINE_DATA); break; + case Opt_data_flush: + set_opt(sbi, DATA_FLUSH); + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -687,6 +692,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",extent_cache"); else seq_puts(seq, ",noextent_cache"); + if (test_opt(sbi, DATA_FLUSH)) + seq_puts(seq, ",data_flush"); seq_printf(seq, ",active_logs=%u", sbi->active_logs); return 0; From 33fbd5100de6d0a87f354ecf5ec0486ba01a6da7 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 17 Dec 2015 17:14:44 +0800 Subject: [PATCH 37/87] f2fs: stat dirty regular/symlink inodes Add to stat dirty regular and symlink inode for showing in debugfs. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 7 ++----- fs/f2fs/debug.c | 6 +++++- fs/f2fs/f2fs.h | 13 +++++++------ 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index a037bbd89dc64..53044ea8bb5d8 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -733,13 +733,11 @@ static void __add_dirty_inode(struct inode *inode, enum inode_type type) set_inode_flag(fi, flag); list_add_tail(&fi->dirty_list, &sbi->inode_list[type]); - if (type == DIR_INODE) - stat_inc_dirty_dir(sbi); + stat_inc_dirty_inode(sbi, type); } static void __remove_dirty_inode(struct inode *inode, enum inode_type type) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE; @@ -749,8 +747,7 @@ static void __remove_dirty_inode(struct inode *inode, enum inode_type type) list_del_init(&fi->dirty_list); clear_inode_flag(fi, flag); - if (type == DIR_INODE) - stat_dec_dirty_dir(sbi); + stat_dec_dirty_inode(F2FS_I_SB(inode), type); } void update_dirty_page(struct inode *inode, struct page *page) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index f4a7b9e9416d3..bb307e642fdde 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -42,8 +42,10 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->ext_node = atomic_read(&sbi->total_ext_node); si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); - si->ndirty_dirs = sbi->n_dirty_dirs; si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); + si->ndirty_data = get_pages(sbi, F2FS_DIRTY_DATA); + si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE]; + si->ndirty_files = sbi->ndirty_inode[FILE_INODE]; si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); si->wb_pages = get_pages(sbi, F2FS_WRITEBACK); si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; @@ -298,6 +300,8 @@ static int stat_show(struct seq_file *s, void *v) si->ndirty_node, si->node_pages); seq_printf(s, " - dents: %4d in dirs:%4d\n", si->ndirty_dent, si->ndirty_dirs); + seq_printf(s, " - datas: %4d in files:%4d\n", + si->ndirty_data, si->ndirty_files); seq_printf(s, " - meta: %4d in %4d\n", si->ndirty_meta, si->meta_pages); seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n", diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b1fb8f73fe427..19beabefd8395 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -818,7 +818,7 @@ struct f2fs_sb_info { atomic_t inline_inode; /* # of inline_data inodes */ atomic_t inline_dir; /* # of inline_dentry inodes */ int bg_gc; /* background gc calls */ - unsigned int n_dirty_dirs; /* # of dir inodes */ + unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */ #endif unsigned int last_victim[2]; /* last victim segment # */ spinlock_t stat_lock; /* lock for stat operations */ @@ -1888,7 +1888,8 @@ struct f2fs_stat_info { unsigned long long hit_largest, hit_cached, hit_rbtree; unsigned long long hit_total, total_ext; int ext_tree, ext_node; - int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; + int ndirty_node, ndirty_meta; + int ndirty_dent, ndirty_dirs, ndirty_data, ndirty_files; int nats, dirty_nats, sits, dirty_sits, fnids; int total_count, utilization; int bg_gc, inmem_pages, wb_pages; @@ -1921,8 +1922,8 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) #define stat_inc_cp_count(si) ((si)->cp_count++) #define stat_inc_call_count(si) ((si)->call_count++) #define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) -#define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++) -#define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--) +#define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++) +#define stat_dec_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]--) #define stat_inc_total_hit(sbi) (atomic64_inc(&(sbi)->total_hit_ext)) #define stat_inc_rbtree_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_rbtree)) #define stat_inc_largest_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_largest)) @@ -2003,8 +2004,8 @@ void f2fs_destroy_root_stats(void); #define stat_inc_cp_count(si) #define stat_inc_call_count(si) #define stat_inc_bggc_count(si) -#define stat_inc_dirty_dir(sbi) -#define stat_dec_dirty_dir(sbi) +#define stat_inc_dirty_inode(sbi, type) +#define stat_dec_dirty_inode(sbi, type) #define stat_inc_total_hit(sb) #define stat_inc_rbtree_node_hit(sb) #define stat_inc_largest_node_hit(sbi) From 36b35a0dbe904a06e94154f29db0d0e218420c98 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 17 Dec 2015 17:13:28 +0800 Subject: [PATCH 38/87] f2fs: support data flush in background Previously, when finishing a checkpoint, we have persisted all fs meta info including meta inode, node inode, dentry page of directory inode, so, after a sudden power cut, f2fs can recover from last checkpoint with full directory structure. But during checkpoint, we didn't flush dirty pages of regular and symlink inode, so such dirty datas still in memory will be lost in that moment of power off. In order to reduce the chance of lost data, this patch enables f2fs_balance_fs_bg with the ability of data flushing. It will try to flush user data before starting a checkpoint. So user's data written after last checkpoint which may not be fsynced could be saved. When we mount with data_flush option, after every period of cp_interval (could be configured in sysfs: /sys/fs/f2fs/device/cp_interval) seconds user data could be flushed into device once f2fs_balance_fs_bg was called in kworker thread or gc thread. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5fa519f028602..c2474509e5de5 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -291,8 +291,11 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) if (!available_free_memory(sbi, NAT_ENTRIES) || excess_prefree_segs(sbi) || !available_free_memory(sbi, INO_ENTRIES) || - jiffies > sbi->cp_expires) + jiffies > sbi->cp_expires) { + if (test_opt(sbi, DATA_FLUSH)) + sync_dirty_inodes(sbi, FILE_INODE); f2fs_sync_fs(sbi->sb, true); + } } static int issue_flush_thread(void *data) From 7df3a4318d07ba520b4a8eddad29e9ac748b0a19 Mon Sep 17 00:00:00 2001 From: Fan Li Date: Thu, 17 Dec 2015 13:20:59 +0800 Subject: [PATCH 39/87] f2fs: optimize the flow of f2fs_map_blocks check map->m_len right after it changes to avoid excess call to update dnode_of_data. Signed-off-by: Fan li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 69 ++++++++++++++++++++++++++------------------------ 1 file changed, 36 insertions(+), 33 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 292a06cbea072..e34b1bdfc9955 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -573,6 +573,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int err = 0, ofs = 1; struct extent_info ei; bool allocated = false; + block_t blkaddr; map->m_len = 0; map->m_flags = 0; @@ -636,6 +637,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, pgofs++; get_next: + if (map->m_len >= maxblocks) + goto sync_out; + if (dn.ofs_in_node >= end_offset) { if (allocated) sync_inode_page(&dn); @@ -653,44 +657,43 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); } - if (maxblocks > map->m_len) { - block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); + blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); - if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) { - if (create) { - if (unlikely(f2fs_cp_error(sbi))) { - err = -EIO; - goto sync_out; - } - err = __allocate_data_block(&dn); - if (err) - goto sync_out; - allocated = true; - map->m_flags |= F2FS_MAP_NEW; - blkaddr = dn.data_blkaddr; - } else { - /* - * we only merge preallocated unwritten blocks - * for fiemap. - */ - if (flag != F2FS_GET_BLOCK_FIEMAP || - blkaddr != NEW_ADDR) - goto sync_out; + if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) { + if (create) { + if (unlikely(f2fs_cp_error(sbi))) { + err = -EIO; + goto sync_out; } + err = __allocate_data_block(&dn); + if (err) + goto sync_out; + allocated = true; + map->m_flags |= F2FS_MAP_NEW; + blkaddr = dn.data_blkaddr; + } else { + /* + * we only merge preallocated unwritten blocks + * for fiemap. + */ + if (flag != F2FS_GET_BLOCK_FIEMAP || + blkaddr != NEW_ADDR) + goto sync_out; } + } - /* Give more consecutive addresses for the readahead */ - if ((map->m_pblk != NEW_ADDR && - blkaddr == (map->m_pblk + ofs)) || - (map->m_pblk == NEW_ADDR && - blkaddr == NEW_ADDR)) { - ofs++; - dn.ofs_in_node++; - pgofs++; - map->m_len++; - goto get_next; - } + /* Give more consecutive addresses for the readahead */ + if ((map->m_pblk != NEW_ADDR && + blkaddr == (map->m_pblk + ofs)) || + (map->m_pblk == NEW_ADDR && + blkaddr == NEW_ADDR)) { + ofs++; + dn.ofs_in_node++; + pgofs++; + map->m_len++; + goto get_next; } + sync_out: if (allocated) sync_inode_page(&dn); From 4cf185379b7504d640c9dd72f959f081b25f6ea2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 17 Dec 2015 17:17:16 +0800 Subject: [PATCH 40/87] f2fs: add a tracepoint for sync_dirty_inodes This patch adds a tracepoint for sync_dirty_inodes. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 8 ++++++++ include/trace/events/f2fs.h | 38 +++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 53044ea8bb5d8..fdd43f71d2c6c 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -803,6 +803,11 @@ void sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) struct list_head *head; struct inode *inode; struct f2fs_inode_info *fi; + bool is_dir = (type == DIR_INODE); + + trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir, + get_pages(sbi, is_dir ? + F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA)); retry: if (unlikely(f2fs_cp_error(sbi))) return; @@ -812,6 +817,9 @@ void sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) head = &sbi->inode_list[type]; if (list_empty(head)) { spin_unlock(&sbi->inode_lock[type]); + trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir, + get_pages(sbi, is_dir ? + F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA)); return; } fi = list_entry(head->next, struct f2fs_inode_info, dirty_list); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 00b4a6308249e..a1b488809f069 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1265,6 +1265,44 @@ TRACE_EVENT(f2fs_destroy_extent_tree, __entry->node_cnt) ); +DECLARE_EVENT_CLASS(f2fs_sync_dirty_inodes, + + TP_PROTO(struct super_block *sb, int type, int count), + + TP_ARGS(sb, type, count), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, type) + __field(int, count) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->type = type; + __entry->count = count; + ), + + TP_printk("dev = (%d,%d), %s, dirty count = %d", + show_dev(__entry), + show_file_type(__entry->type), + __entry->count) +); + +DEFINE_EVENT(f2fs_sync_dirty_inodes, f2fs_sync_dirty_inodes_enter, + + TP_PROTO(struct super_block *sb, int type, int count), + + TP_ARGS(sb, type, count) +); + +DEFINE_EVENT(f2fs_sync_dirty_inodes, f2fs_sync_dirty_inodes_exit, + + TP_PROTO(struct super_block *sb, int type, int count), + + TP_ARGS(sb, type, count) +); + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */ From 7441ccef339f87abc27afc4ccfc24c014d7360c9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 21 Dec 2015 19:20:15 -0800 Subject: [PATCH 41/87] f2fs: use atomic variable for total_extent_tree It would be better to use atomic variable for total_extent_tree. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 5 +++-- fs/f2fs/extent_cache.c | 8 ++++---- fs/f2fs/f2fs.h | 2 +- fs/f2fs/node.c | 3 ++- fs/f2fs/shrinker.c | 3 ++- 5 files changed, 12 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index bb307e642fdde..ed5dfcc8886f0 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -38,7 +38,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->hit_rbtree = atomic64_read(&sbi->read_hit_rbtree); si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree; si->total_ext = atomic64_read(&sbi->total_hit_ext); - si->ext_tree = sbi->total_ext_tree; + si->ext_tree = atomic_read(&sbi->total_ext_tree); si->ext_node = atomic_read(&sbi->total_ext_node); si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); @@ -193,7 +193,8 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages); for (i = 0; i <= UPDATE_INO; i++) si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); - si->cache_mem += sbi->total_ext_tree * sizeof(struct extent_tree); + si->cache_mem += atomic_read(&sbi->total_ext_tree) * + sizeof(struct extent_tree); si->cache_mem += atomic_read(&sbi->total_ext_node) * sizeof(struct extent_node); diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index e86e9f1e0733a..0e97d6af98858 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -70,7 +70,7 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode) rwlock_init(&et->lock); atomic_set(&et->refcount, 0); et->count = 0; - sbi->total_ext_tree++; + atomic_inc(&sbi->total_ext_tree); } atomic_inc(&et->refcount); up_write(&sbi->extent_tree_lock); @@ -570,7 +570,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) radix_tree_delete(root, et->ino); kmem_cache_free(extent_tree_slab, et); - sbi->total_ext_tree--; + atomic_dec(&sbi->total_ext_tree); tree_cnt++; if (node_cnt + tree_cnt >= nr_shrink) @@ -663,7 +663,7 @@ void f2fs_destroy_extent_tree(struct inode *inode) f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count); radix_tree_delete(&sbi->extent_tree_root, inode->i_ino); kmem_cache_free(extent_tree_slab, et); - sbi->total_ext_tree--; + atomic_dec(&sbi->total_ext_tree); up_write(&sbi->extent_tree_lock); F2FS_I(inode)->extent_tree = NULL; @@ -715,7 +715,7 @@ void init_extent_cache_info(struct f2fs_sb_info *sbi) init_rwsem(&sbi->extent_tree_lock); INIT_LIST_HEAD(&sbi->extent_list); spin_lock_init(&sbi->extent_lock); - sbi->total_ext_tree = 0; + atomic_set(&sbi->total_ext_tree, 0); atomic_set(&sbi->total_ext_node, 0); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 19beabefd8395..a7f619182cece 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -762,7 +762,7 @@ struct f2fs_sb_info { struct rw_semaphore extent_tree_lock; /* locking extent radix tree */ struct list_head extent_list; /* lru list for shrinker */ spinlock_t extent_lock; /* locking extent lru list */ - int total_ext_tree; /* extent tree count */ + atomic_t total_ext_tree; /* extent tree count */ atomic_t total_ext_node; /* extent info count */ /* basic filesystem units */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d842b199cd024..6cc8ac7e185a2 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -65,7 +65,8 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); } else if (type == EXTENT_CACHE) { - mem_size = (sbi->total_ext_tree * sizeof(struct extent_tree) + + mem_size = (atomic_read(&sbi->total_ext_tree) * + sizeof(struct extent_tree) + atomic_read(&sbi->total_ext_node) * sizeof(struct extent_node)) >> PAGE_CACHE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index da0d8e0b55a5d..a11e099cbddc6 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -32,7 +32,8 @@ static unsigned long __count_free_nids(struct f2fs_sb_info *sbi) static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi) { - return sbi->total_ext_tree + atomic_read(&sbi->total_ext_node); + return atomic_read(&sbi->total_ext_tree) + + atomic_read(&sbi->total_ext_node); } unsigned long f2fs_shrink_count(struct shrinker *shrink, From 74fd8d9927ef08db30a85f131a124152aeba66c7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 21 Dec 2015 19:25:50 -0800 Subject: [PATCH 42/87] f2fs: speed up shrinking extent tree entries If there is no candidates for shrinking slab entries, we don't need to traverse any trees at all. Reviewed-by: Chao Yu [Jaegeuk Kim: fix missing initialization reported by Yunlei He] Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 14 ++++++++++++++ fs/f2fs/f2fs.h | 1 + fs/f2fs/shrinker.c | 2 +- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 0e97d6af98858..5305a29f91a33 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -71,6 +71,8 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode) atomic_set(&et->refcount, 0); et->count = 0; atomic_inc(&sbi->total_ext_tree); + } else { + atomic_dec(&sbi->total_zombie_tree); } atomic_inc(&et->refcount); up_write(&sbi->extent_tree_lock); @@ -547,10 +549,14 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) unsigned int found; unsigned int node_cnt = 0, tree_cnt = 0; int remained; + bool do_free = false; if (!test_opt(sbi, EXTENT_CACHE)) return 0; + if (!atomic_read(&sbi->total_zombie_tree)) + goto free_node; + if (!down_write_trylock(&sbi->extent_tree_lock)) goto out; @@ -571,6 +577,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) radix_tree_delete(root, et->ino); kmem_cache_free(extent_tree_slab, et); atomic_dec(&sbi->total_ext_tree); + atomic_dec(&sbi->total_zombie_tree); tree_cnt++; if (node_cnt + tree_cnt >= nr_shrink) @@ -580,6 +587,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) } up_write(&sbi->extent_tree_lock); +free_node: /* 2. remove LRU extent entries */ if (!down_write_trylock(&sbi->extent_tree_lock)) goto out; @@ -591,9 +599,13 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) if (!remained--) break; list_del_init(&en->list); + do_free = true; } spin_unlock(&sbi->extent_lock); + if (do_free == false) + goto unlock_out; + /* * reset ino for searching victims from beginning of global extent tree. */ @@ -651,6 +663,7 @@ void f2fs_destroy_extent_tree(struct inode *inode) if (inode->i_nlink && !is_bad_inode(inode) && et->count) { atomic_dec(&et->refcount); + atomic_inc(&sbi->total_zombie_tree); return; } @@ -716,6 +729,7 @@ void init_extent_cache_info(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&sbi->extent_list); spin_lock_init(&sbi->extent_lock); atomic_set(&sbi->total_ext_tree, 0); + atomic_set(&sbi->total_zombie_tree, 0); atomic_set(&sbi->total_ext_node, 0); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a7f619182cece..90fb970e2b98d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -763,6 +763,7 @@ struct f2fs_sb_info { struct list_head extent_list; /* lru list for shrinker */ spinlock_t extent_lock; /* locking extent lru list */ atomic_t total_ext_tree; /* extent tree count */ + atomic_t total_zombie_tree; /* extent zombie tree count */ atomic_t total_ext_node; /* extent info count */ /* basic filesystem units */ diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index a11e099cbddc6..93606f281bf9c 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -32,7 +32,7 @@ static unsigned long __count_free_nids(struct f2fs_sb_info *sbi) static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi) { - return atomic_read(&sbi->total_ext_tree) + + return atomic_read(&sbi->total_zombie_tree) + atomic_read(&sbi->total_ext_node); } From b9d777b85ff1ff79a1173190317b25bebc404ab4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 22 Dec 2015 11:09:35 -0800 Subject: [PATCH 43/87] f2fs: check inline_data flag at converting time We can check inode's inline_data flag when calling to convert it. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 8 +++---- fs/f2fs/file.c | 58 +++++++++++++++++++----------------------------- fs/f2fs/inline.c | 3 +++ 3 files changed, 29 insertions(+), 40 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e34b1bdfc9955..cf0c9dda0365c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1573,11 +1573,9 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int err; /* we don't need to use inline_data strictly */ - if (f2fs_has_inline_data(inode)) { - err = f2fs_convert_inline_inode(inode); - if (err) - return err; - } + err = f2fs_convert_inline_inode(inode); + if (err) + return err; if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) return 0; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7f8ca47be0afd..f2effe18d3c5b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -418,19 +418,18 @@ static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) { struct inode *inode = file_inode(file); + int err; if (f2fs_encrypted_inode(inode)) { - int err = f2fs_get_encryption_info(inode); + err = f2fs_get_encryption_info(inode); if (err) return 0; } /* we don't need to use inline_data strictly */ - if (f2fs_has_inline_data(inode)) { - int err = f2fs_convert_inline_inode(inode); - if (err) - return err; - } + err = f2fs_convert_inline_inode(inode); + if (err) + return err; file_accessed(file); vma->vm_ops = &f2fs_file_vm_ops; @@ -604,7 +603,7 @@ int f2fs_truncate(struct inode *inode, bool lock) trace_f2fs_truncate(inode); /* we should check inline_data size */ - if (f2fs_has_inline_data(inode) && !f2fs_may_inline_data(inode)) { + if (!f2fs_may_inline_data(inode)) { err = f2fs_convert_inline_inode(inode); if (err) return err; @@ -688,8 +687,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) truncate_setsize(inode, attr->ia_size); /* should convert inline inode here */ - if (f2fs_has_inline_data(inode) && - !f2fs_may_inline_data(inode)) { + if (!f2fs_may_inline_data(inode)) { err = f2fs_convert_inline_inode(inode); if (err) return err; @@ -786,13 +784,11 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) { pgoff_t pg_start, pg_end; loff_t off_start, off_end; - int ret = 0; + int ret; - if (f2fs_has_inline_data(inode)) { - ret = f2fs_convert_inline_inode(inode); - if (ret) - return ret; - } + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; @@ -951,11 +947,9 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) f2fs_balance_fs(F2FS_I_SB(inode)); - if (f2fs_has_inline_data(inode)) { - ret = f2fs_convert_inline_inode(inode); - if (ret) - return ret; - } + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; pg_start = offset >> PAGE_CACHE_SHIFT; pg_end = (offset + len) >> PAGE_CACHE_SHIFT; @@ -1001,11 +995,9 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, f2fs_balance_fs(sbi); - if (f2fs_has_inline_data(inode)) { - ret = f2fs_convert_inline_inode(inode); - if (ret) - return ret; - } + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1); if (ret) @@ -1114,11 +1106,9 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) f2fs_balance_fs(sbi); - if (f2fs_has_inline_data(inode)) { - ret = f2fs_convert_inline_inode(inode); - if (ret) - return ret; - } + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; ret = truncate_blocks(inode, i_size_read(inode), true); if (ret) @@ -1168,11 +1158,9 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (ret) return ret; - if (f2fs_has_inline_data(inode)) { - ret = f2fs_convert_inline_inode(inode); - if (ret) - return ret; - } + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index bda7126466c09..8090854dd29ce 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -177,6 +177,9 @@ int f2fs_convert_inline_inode(struct inode *inode) struct page *ipage, *page; int err = 0; + if (!f2fs_has_inline_data(inode)) + return 0; + page = grab_cache_page(inode->i_mapping, 0); if (!page) return -ENOMEM; From 00623e6bcf40b03b39f612cef9a744453cf3e2a8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 22 Dec 2015 11:56:08 -0800 Subject: [PATCH 44/87] f2fs: avoid unnecessary f2fs_gc for dir operations The f2fs_balance_fs doesn't need to cover f2fs_new_inode or f2fs_find_entry works. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 2c32110f9fc08..4e27c5c4b05cb 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -128,8 +128,6 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, nid_t ino = 0; int err; - f2fs_balance_fs(sbi); - inode = f2fs_new_inode(dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -142,6 +140,8 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, inode->i_mapping->a_ops = &f2fs_dblock_aops; ino = inode->i_ino; + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) @@ -288,12 +288,13 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) int err = -ENOENT; trace_f2fs_unlink_enter(dir, dentry); - f2fs_balance_fs(sbi); de = f2fs_find_entry(dir, &dentry->d_name, &page); if (!de) goto fail; + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); err = acquire_orphan_inode(sbi); if (err) { @@ -341,8 +342,6 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, if (len > dir->i_sb->s_blocksize) return -ENAMETOOLONG; - f2fs_balance_fs(sbi); - inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -353,6 +352,8 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &f2fs_symlink_inode_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) @@ -433,8 +434,6 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct inode *inode; int err; - f2fs_balance_fs(sbi); - inode = f2fs_new_inode(dir, S_IFDIR | mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -444,6 +443,8 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_mapping->a_ops = &f2fs_dblock_aops; mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); + f2fs_balance_fs(sbi); + set_inode_flag(F2FS_I(inode), FI_INC_LINK); f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); @@ -481,8 +482,6 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, struct inode *inode; int err = 0; - f2fs_balance_fs(sbi); - inode = f2fs_new_inode(dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -490,6 +489,8 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, init_special_inode(inode, inode->i_mode, rdev); inode->i_op = &f2fs_special_inode_operations; + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) @@ -516,9 +517,6 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, struct inode *inode; int err; - if (!whiteout) - f2fs_balance_fs(sbi); - inode = f2fs_new_inode(dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -532,6 +530,8 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, inode->i_mapping->a_ops = &f2fs_dblock_aops; } + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); err = acquire_orphan_inode(sbi); if (err) @@ -604,8 +604,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; } - f2fs_balance_fs(sbi); - old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) goto out; @@ -635,6 +633,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (!new_entry) goto out_whiteout; + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); err = acquire_orphan_inode(sbi); @@ -666,6 +666,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, update_inode_page(old_inode); update_inode_page(new_inode); } else { + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); err = f2fs_add_link(new_dentry, old_inode); @@ -763,8 +765,6 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, new_inode))) return -EPERM; - f2fs_balance_fs(sbi); - old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) goto out; @@ -807,6 +807,8 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, goto out_new_dir; } + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); err = update_dent_inode(old_inode, new_inode, &new_dentry->d_name); From 93bae099eaa0ae784fbe4d9eddcdc54fb5812466 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 22 Dec 2015 12:59:54 -0800 Subject: [PATCH 45/87] f2fs: record node block allocation in dnode_of_data This patch introduces recording node block allocation in dnode_of_data. This information helps to figure out whether any node block is allocated during specific file operations. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 1 + fs/f2fs/f2fs.h | 1 + fs/f2fs/node.c | 5 +++++ 3 files changed, 7 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cf0c9dda0365c..a7a9a05d012a6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -226,6 +226,7 @@ void set_data_blkaddr(struct dnode_of_data *dn) addr_array = blkaddr_in_node(rn); addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr); set_page_dirty(node_page); + dn->node_changed = true; } int reserve_new_block(struct dnode_of_data *dn) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 90fb970e2b98d..3e4a60da408f5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -546,6 +546,7 @@ struct dnode_of_data { nid_t nid; /* node id of the direct node block */ unsigned int ofs_in_node; /* data offset in the node page */ bool inode_page_locked; /* inode page is locked or not */ + bool node_changed; /* is node block changed */ block_t data_blkaddr; /* block address of the node block */ }; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 6cc8ac7e185a2..341de5d2353b3 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -542,6 +542,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) set_nid(parent, offset[i - 1], nids[i], i == 1); alloc_nid_done(sbi, nids[i]); + dn->node_changed = true; done = true; } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) { npage[i] = get_node_page_ra(parent, offset[i - 1]); @@ -678,6 +679,7 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, if (ret < 0) goto out_err; set_nid(page, i, 0, false); + dn->node_changed = true; } } else { child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1; @@ -691,6 +693,7 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1); if (ret == (NIDS_PER_BLOCK + 1)) { set_nid(page, i, 0, false); + dn->node_changed = true; child_nofs += ret; } else if (ret < 0 && ret != -ENOENT) { goto out_err; @@ -752,6 +755,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, if (err < 0) goto fail; set_nid(pages[idx], i, 0, false); + dn->node_changed = true; } if (offset[idx + 1] == 0) { @@ -1153,6 +1157,7 @@ void sync_inode_page(struct dnode_of_data *dn) } else { update_inode_page(dn->inode); } + dn->node_changed = true; } int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, From 3104af35eb6a2452ccc9912997e7728777100de2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 23 Dec 2015 17:11:43 +0800 Subject: [PATCH 46/87] f2fs: reduce covered region of sbi->cp_rwsem in f2fs_map_blocks Only cover sbi->cp_rwsem on one dnode page's allocation and modification instead of multiple's in f2fs_map_blocks, it can reduce the covered region of cp_rwsem, then we can avoid potential long time delay for concurrent checkpointer. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a7a9a05d012a6..82ecaa30fd772 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -590,7 +590,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, } if (create) - f2fs_lock_op(F2FS_I_SB(inode)); + f2fs_lock_op(sbi); /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); @@ -647,6 +647,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, allocated = false; f2fs_put_dnode(&dn); + if (create) { + f2fs_unlock_op(sbi); + f2fs_lock_op(sbi); + } + set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, pgofs, mode); if (err) { @@ -702,7 +707,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, f2fs_put_dnode(&dn); unlock_out: if (create) - f2fs_unlock_op(F2FS_I_SB(inode)); + f2fs_unlock_op(sbi); out: trace_f2fs_map_blocks(inode, map, err); return err; From 2a3407607028f7c780f1c20faa4e922bf631d340 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 22 Dec 2015 13:23:35 -0800 Subject: [PATCH 47/87] f2fs: call f2fs_balance_fs only when node was changed If user tries to update or read data, we don't need to call f2fs_balance_fs which triggers f2fs_gc, which increases unnecessary long latency. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 26 ++++++++++++++++++++++---- fs/f2fs/file.c | 26 +++++++++----------------- fs/f2fs/inline.c | 4 ++++ 3 files changed, 35 insertions(+), 21 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 82ecaa30fd772..958d8261b258a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -509,7 +509,6 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset, u64 end_offset; while (len) { - f2fs_balance_fs(sbi); f2fs_lock_op(sbi); /* When reading holes, we need its node page */ @@ -542,6 +541,9 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset, f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); + + if (dn.node_changed) + f2fs_balance_fs(sbi); } return; @@ -551,6 +553,8 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset, f2fs_put_dnode(&dn); out: f2fs_unlock_op(sbi); + if (dn.node_changed) + f2fs_balance_fs(sbi); return; } @@ -649,6 +653,8 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, if (create) { f2fs_unlock_op(sbi); + if (dn.node_changed) + f2fs_balance_fs(sbi); f2fs_lock_op(sbi); } @@ -706,8 +712,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, put_out: f2fs_put_dnode(&dn); unlock_out: - if (create) + if (create) { f2fs_unlock_op(sbi); + if (dn.node_changed) + f2fs_balance_fs(sbi); + } out: trace_f2fs_map_blocks(inode, map, err); return err; @@ -1415,8 +1424,6 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, trace_f2fs_write_begin(inode, pos, len, flags); - f2fs_balance_fs(sbi); - /* * We should check this at this moment to avoid deadlock on inode page * and #0 page. The locking rule for inline_data conversion should be: @@ -1466,6 +1473,17 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); + if (dn.node_changed && has_not_enough_free_secs(sbi, 0)) { + unlock_page(page); + f2fs_balance_fs(sbi); + lock_page(page); + if (page->mapping != mapping) { + /* The page got truncated from under us */ + f2fs_put_page(page, 1); + goto repeat; + } + } + f2fs_wait_on_page_writeback(page, DATA); /* wait for GCed encrypted page writeback */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f2effe18d3c5b..888ce47657790 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -40,8 +40,6 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, struct dnode_of_data dn; int err; - f2fs_balance_fs(sbi); - sb_start_pagefault(inode->i_sb); f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); @@ -57,6 +55,9 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); + if (dn.node_changed) + f2fs_balance_fs(sbi); + file_update_time(vma->vm_file); lock_page(page); if (unlikely(page->mapping != inode->i_mapping || @@ -233,9 +234,6 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) goto out; } go_write: - /* guarantee free sections for fsync */ - f2fs_balance_fs(sbi); - /* * Both of fdatasync() and fsync() are able to be recovered from * sudden-power-off. @@ -267,6 +265,8 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (need_inode_block_update(sbi, ino)) { mark_inode_dirty_sync(inode); f2fs_write_inode(inode, NULL); + + f2fs_balance_fs(sbi); goto sync_nodes; } @@ -945,8 +945,6 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) return -EINVAL; - f2fs_balance_fs(F2FS_I_SB(inode)); - ret = f2fs_convert_inline_inode(inode); if (ret) return ret; @@ -993,8 +991,6 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, if (ret) return ret; - f2fs_balance_fs(sbi); - ret = f2fs_convert_inline_inode(inode); if (ret) return ret; @@ -1104,12 +1100,12 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) return -EINVAL; - f2fs_balance_fs(sbi); - ret = f2fs_convert_inline_inode(inode); if (ret) return ret; + f2fs_balance_fs(sbi); + ret = truncate_blocks(inode, i_size_read(inode), true); if (ret) return ret; @@ -1152,8 +1148,6 @@ static int expand_inode_data(struct inode *inode, loff_t offset, loff_t off_start, off_end; int ret = 0; - f2fs_balance_fs(sbi); - ret = inode_newsize_ok(inode, (len + offset)); if (ret) return ret; @@ -1162,6 +1156,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (ret) return ret; + f2fs_balance_fs(sbi); + pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; @@ -1349,8 +1345,6 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) if (!inode_owner_or_capable(inode)) return -EACCES; - f2fs_balance_fs(F2FS_I_SB(inode)); - if (f2fs_is_atomic_file(inode)) return 0; @@ -1437,8 +1431,6 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp) if (ret) return ret; - f2fs_balance_fs(F2FS_I_SB(inode)); - clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); commit_inmem_pages(inode, true); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 8090854dd29ce..c24e5d93720d4 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -202,6 +202,10 @@ int f2fs_convert_inline_inode(struct inode *inode) f2fs_unlock_op(sbi); f2fs_put_page(page, 1); + + if (dn.node_changed) + f2fs_balance_fs(sbi); + return err; } From c34f42e2cb2d27650549306de5ff36839e9177d6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 23 Dec 2015 17:50:30 +0800 Subject: [PATCH 48/87] f2fs: report error of do_checkpoint do_checkpoint and write_checkpoint can fail due to reasons like triggering in a readonly fs or encountering IO error of storage device. So it's better to report such error info to user, let user be aware of failure of doing checkpoint. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 29 +++++++++++++++++++---------- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 5 +++-- fs/f2fs/recovery.c | 2 +- fs/f2fs/segment.c | 5 +++-- fs/f2fs/super.c | 5 +++-- 6 files changed, 30 insertions(+), 18 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index fdd43f71d2c6c..9cdb161973518 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -910,7 +910,7 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) finish_wait(&sbi->cp_wait, &wait); } -static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) +static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); @@ -936,7 +936,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) while (get_pages(sbi, F2FS_DIRTY_META)) { sync_meta_pages(sbi, META, LONG_MAX); if (unlikely(f2fs_cp_error(sbi))) - return; + return -EIO; } next_free_nid(sbi, &last_nid); @@ -1021,7 +1021,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* need to wait for end_io results */ wait_on_all_pages_writeback(sbi); if (unlikely(f2fs_cp_error(sbi))) - return; + return -EIO; /* write out checkpoint buffer at block 0 */ update_meta_page(sbi, ckpt, start_blk++); @@ -1049,7 +1049,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) wait_on_all_pages_writeback(sbi); if (unlikely(f2fs_cp_error(sbi))) - return; + return -EIO; filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX); filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX); @@ -1075,19 +1075,22 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) release_ino_entry(sbi); if (unlikely(f2fs_cp_error(sbi))) - return; + return -EIO; clear_prefree_segments(sbi, cpc); clear_sbi_flag(sbi, SBI_IS_DIRTY); + + return 0; } /* * We guarantee that this checkpoint procedure will not fail. */ -void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) +int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); unsigned long long ckpt_ver; + int err = 0; mutex_lock(&sbi->cp_mutex); @@ -1095,14 +1098,19 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC || (cpc->reason == CP_DISCARD && !sbi->discard_blks))) goto out; - if (unlikely(f2fs_cp_error(sbi))) + if (unlikely(f2fs_cp_error(sbi))) { + err = -EIO; goto out; - if (f2fs_readonly(sbi->sb)) + } + if (f2fs_readonly(sbi->sb)) { + err = -EROFS; goto out; + } trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops"); - if (block_operations(sbi)) + err = block_operations(sbi); + if (err) goto out; trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops"); @@ -1124,7 +1132,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) flush_sit_entries(sbi, cpc); /* unlock all the fs_lock[] in do_checkpoint() */ - do_checkpoint(sbi, cpc); + err = do_checkpoint(sbi, cpc); unblock_operations(sbi); stat_inc_cp_count(sbi->stat_info); @@ -1138,6 +1146,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); out: mutex_unlock(&sbi->cp_mutex); + return err; } void init_ino_entry_info(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3e4a60da408f5..79345e7ce6bb8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1838,7 +1838,7 @@ void update_dirty_page(struct inode *, struct page *); void add_dirty_dir_inode(struct inode *); void remove_dirty_inode(struct inode *); void sync_dirty_inodes(struct f2fs_sb_info *, enum inode_type); -void write_checkpoint(struct f2fs_sb_info *, struct cp_control *); +int write_checkpoint(struct f2fs_sb_info *, struct cp_control *); void init_ino_entry_info(struct f2fs_sb_info *); int __init create_checkpoint_caches(void); void destroy_checkpoint_caches(void); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 888ce47657790..780db8bd2451e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1618,6 +1618,7 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) struct inode *inode = file_inode(filp); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct cp_control cpc; + int err; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -1628,10 +1629,10 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) cpc.reason = __get_cp_reason(sbi); mutex_lock(&sbi->gc_mutex); - write_checkpoint(sbi, &cpc); + err = write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); - return 0; + return err; } static int f2fs_defragment_range(struct f2fs_sb_info *sbi, diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 7fcb6e49defff..589b20b8677b8 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -623,7 +623,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) .reason = CP_RECOVERY, }; mutex_unlock(&sbi->cp_mutex); - write_checkpoint(sbi, &cpc); + err = write_checkpoint(sbi, &cpc); } else { mutex_unlock(&sbi->cp_mutex); } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c2474509e5de5..a3474bad57702 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1118,6 +1118,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1; unsigned int start_segno, end_segno; struct cp_control cpc; + int err = 0; if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) return -EINVAL; @@ -1148,12 +1149,12 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) sbi->segs_per_sec) - 1, end_segno); mutex_lock(&sbi->gc_mutex); - write_checkpoint(sbi, &cpc); + err = write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); } out: range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); - return 0; + return err; } static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c3070c149c0ec..597b533634e00 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -579,6 +579,7 @@ static void f2fs_put_super(struct super_block *sb) int f2fs_sync_fs(struct super_block *sb, int sync) { struct f2fs_sb_info *sbi = F2FS_SB(sb); + int err = 0; trace_f2fs_sync_fs(sb, sync); @@ -588,14 +589,14 @@ int f2fs_sync_fs(struct super_block *sb, int sync) cpc.reason = __get_cp_reason(sbi); mutex_lock(&sbi->gc_mutex); - write_checkpoint(sbi, &cpc); + err = write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); } else { f2fs_balance_fs(sbi); } f2fs_trace_ios(NULL, 1); - return 0; + return err; } static int f2fs_freeze(struct super_block *sb) From fba48a8b14f405afc5c80a93ed64a12607dd52c4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 23 Dec 2015 17:51:35 +0800 Subject: [PATCH 49/87] f2fs: don't convert inline inode when inline_data option is disable If inline_data option is disable, when truncating an inline inode with size which is not exceed maxinum inline size, we should not convert inline inode to regular one to avoid the overhead of synchronizing conversion. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 3 --- fs/f2fs/namei.c | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index c24e5d93720d4..5ffbd169b719a 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -16,9 +16,6 @@ bool f2fs_may_inline_data(struct inode *inode) { - if (!test_opt(F2FS_I_SB(inode), INLINE_DATA)) - return false; - if (f2fs_is_atomic_file(inode)) return false; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 4e27c5c4b05cb..e439f32d31e6e 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -60,7 +60,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) f2fs_set_encrypted_inode(inode); - if (f2fs_may_inline_data(inode)) + if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode)) set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); if (f2fs_may_inline_dentry(inode)) set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); From 2aadac085cf0ca3e0295988d4d1dbdeafc15a9f6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 23 Dec 2015 11:55:18 -0800 Subject: [PATCH 50/87] f2fs: introduce prepare_write_begin to clean up This patch adds prepare_write_begin to clean f2fs_write_begin. The major role of this function is to convert any inline_data and allocate or find block address. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 92 +++++++++++++++++++++++++++++--------------------- 1 file changed, 54 insertions(+), 38 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 958d8261b258a..d4839fc2b4ca3 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1410,6 +1410,51 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to) } } +static int prepare_write_begin(struct f2fs_sb_info *sbi, + struct page *page, loff_t pos, unsigned len, + block_t *blk_addr, bool *node_changed) +{ + struct inode *inode = page->mapping->host; + pgoff_t index = page->index; + struct dnode_of_data dn; + struct page *ipage; + int err = 0; + + f2fs_lock_op(sbi); + + /* check inline_data */ + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + err = PTR_ERR(ipage); + goto unlock_out; + } + + set_new_dnode(&dn, inode, ipage, ipage, 0); + + if (f2fs_has_inline_data(inode)) { + if (pos + len <= MAX_INLINE_DATA) { + read_inline_data(page, ipage); + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + sync_inode_page(&dn); + goto done; + } else { + err = f2fs_convert_inline_page(&dn, page); + if (err) + goto err_out; + } + } + err = f2fs_get_block(&dn, index); +done: + /* convert_inline_page can make node_changed */ + *blk_addr = dn.data_blkaddr; + *node_changed = dn.node_changed; +err_out: + f2fs_put_dnode(&dn); +unlock_out: + f2fs_unlock_op(sbi); + return err; +} + static int f2fs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -1417,9 +1462,9 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct page *page = NULL; - struct page *ipage; pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; - struct dnode_of_data dn; + bool need_balance = false; + block_t blkaddr = NULL_ADDR; int err = 0; trace_f2fs_write_begin(inode, pos, len, flags); @@ -1443,37 +1488,12 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, *pagep = page; - f2fs_lock_op(sbi); - - /* check inline_data */ - ipage = get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) { - err = PTR_ERR(ipage); - goto unlock_fail; - } - - set_new_dnode(&dn, inode, ipage, ipage, 0); - - if (f2fs_has_inline_data(inode)) { - if (pos + len <= MAX_INLINE_DATA) { - read_inline_data(page, ipage); - set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); - sync_inode_page(&dn); - goto put_next; - } - err = f2fs_convert_inline_page(&dn, page); - if (err) - goto put_fail; - } - - err = f2fs_get_block(&dn, index); + err = prepare_write_begin(sbi, page, pos, len, + &blkaddr, &need_balance); if (err) - goto put_fail; -put_next: - f2fs_put_dnode(&dn); - f2fs_unlock_op(sbi); + goto fail; - if (dn.node_changed && has_not_enough_free_secs(sbi, 0)) { + if (need_balance && has_not_enough_free_secs(sbi, 0)) { unlock_page(page); f2fs_balance_fs(sbi); lock_page(page); @@ -1488,7 +1508,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, /* wait for GCed encrypted page writeback */ if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) - f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr); + f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr); if (len == PAGE_CACHE_SIZE) goto out_update; @@ -1504,14 +1524,14 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, goto out_update; } - if (dn.data_blkaddr == NEW_ADDR) { + if (blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_CACHE_SIZE); } else { struct f2fs_io_info fio = { .sbi = sbi, .type = DATA, .rw = READ_SYNC, - .blk_addr = dn.data_blkaddr, + .blk_addr = blkaddr, .page = page, .encrypted_page = NULL, }; @@ -1542,10 +1562,6 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, clear_cold_data(page); return 0; -put_fail: - f2fs_put_dnode(&dn); -unlock_fail: - f2fs_unlock_op(sbi); fail: f2fs_put_page(page, 1); f2fs_write_failed(mapping, pos + len); From 4aa69d5667914dd0844d98ad84804b79a4845fa3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 23 Dec 2015 14:17:47 -0800 Subject: [PATCH 51/87] f2fs: return early when trying to read null nid If get_node_page() gets zero nid, we can return early without getting a wrong page. For example, get_dnode_of_data() can try to do that. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 341de5d2353b3..929265d20c327 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1059,6 +1059,10 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) { struct page *page; int err; + + if (!nid) + return ERR_PTR(-ENOENT); + f2fs_bug_on(sbi, check_nid_range(sbi, nid)); repeat: page = grab_cache_page(NODE_MAPPING(sbi), nid); if (!page) From b4d07a3e1a6e783132be7506aeb171dc5728f077 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 23 Dec 2015 13:48:58 -0800 Subject: [PATCH 52/87] f2fs: avoid f2fs_lock_op in f2fs_write_begin If f2fs_write_begin is to update data, we can bypass calling f2fs_lock_op() in order to avoid the checkpoint latency in the write syscall. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 42 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d4839fc2b4ca3..f2a023edfc1d2 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1418,10 +1418,16 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, pgoff_t index = page->index; struct dnode_of_data dn; struct page *ipage; + bool locked = false; + struct extent_info ei; int err = 0; - f2fs_lock_op(sbi); - + if (f2fs_has_inline_data(inode) || + (pos & PAGE_CACHE_MASK) >= i_size_read(inode)) { + f2fs_lock_op(sbi); + locked = true; + } +restart: /* check inline_data */ ipage = get_node_page(sbi, inode->i_ino); if (IS_ERR(ipage)) { @@ -1436,22 +1442,42 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, read_inline_data(page, ipage); set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); sync_inode_page(&dn); - goto done; } else { err = f2fs_convert_inline_page(&dn, page); if (err) - goto err_out; + goto out; + if (dn.data_blkaddr == NULL_ADDR) + err = f2fs_get_block(&dn, index); + } + } else if (locked) { + err = f2fs_get_block(&dn, index); + } else { + if (f2fs_lookup_extent_cache(inode, index, &ei)) { + dn.data_blkaddr = ei.blk + index - ei.fofs; + } else { + bool restart = false; + + /* hole case */ + err = get_dnode_of_data(&dn, index, LOOKUP_NODE); + if (err || (!err && dn.data_blkaddr == NULL_ADDR)) + restart = true; + if (restart) { + f2fs_put_dnode(&dn); + f2fs_lock_op(sbi); + locked = true; + goto restart; + } } } - err = f2fs_get_block(&dn, index); -done: + /* convert_inline_page can make node_changed */ *blk_addr = dn.data_blkaddr; *node_changed = dn.node_changed; -err_out: +out: f2fs_put_dnode(&dn); unlock_out: - f2fs_unlock_op(sbi); + if (locked) + f2fs_unlock_op(sbi); return err; } From 06d6f2263913029ccd6199fd10e7dca525d348b1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 23 Dec 2015 14:56:09 -0800 Subject: [PATCH 53/87] f2fs: declare static function The __f2fs_commit_super is static. Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 597b533634e00..75704d9caae2c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1195,7 +1195,7 @@ static int read_raw_super_block(struct super_block *sb, return 0; } -int __f2fs_commit_super(struct f2fs_sb_info *sbi, int block) +static int __f2fs_commit_super(struct f2fs_sb_info *sbi, int block) { struct f2fs_super_block *super = F2FS_RAW_SUPER(sbi); struct buffer_head *bh; From d53841740fd7feec170339203b198020ff100c58 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 24 Dec 2015 18:03:29 +0800 Subject: [PATCH 54/87] f2fs: add missing f2fs_balance_fs in __recover_dot_dentries __recover_do_dentries will try to grab free space in storage, so fix to add missing f2fs_balance_fs here. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index e439f32d31e6e..fb41c80826964 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -214,6 +214,8 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) struct page *page; int err = 0; + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); de = f2fs_find_entry(dir, &dot, &page); From 6d5a1495eebd441216dc96913a4270100b26e104 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 24 Dec 2015 18:04:56 +0800 Subject: [PATCH 55/87] f2fs: let user being aware of IO error Sometimes we keep dumb when IO error occur in lower layer device, so user will not receive any error return value for some operation, but actually, the operation did not succeed. This sould be avoided, so this patch reports such kind of error to user. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 17 +++++++---------- fs/f2fs/data.c | 23 +++++++++++++---------- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 4 +++- fs/f2fs/gc.c | 4 +++- fs/f2fs/node.c | 5 +++++ 6 files changed, 32 insertions(+), 23 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 9cdb161973518..6b89ac69b7e47 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -798,7 +798,7 @@ void remove_dirty_inode(struct inode *inode) } } -void sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) +int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) { struct list_head *head; struct inode *inode; @@ -810,7 +810,7 @@ void sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA)); retry: if (unlikely(f2fs_cp_error(sbi))) - return; + return -EIO; spin_lock(&sbi->inode_lock[type]); @@ -820,7 +820,7 @@ void sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir, get_pages(sbi, is_dir ? F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA)); - return; + return 0; } fi = list_entry(head->next, struct f2fs_inode_info, dirty_list); inode = igrab(&fi->vfs_inode); @@ -859,11 +859,9 @@ static int block_operations(struct f2fs_sb_info *sbi) /* write all the dirty dentry pages */ if (get_pages(sbi, F2FS_DIRTY_DENTS)) { f2fs_unlock_all(sbi); - sync_dirty_inodes(sbi, DIR_INODE); - if (unlikely(f2fs_cp_error(sbi))) { - err = -EIO; + err = sync_dirty_inodes(sbi, DIR_INODE); + if (err) goto out; - } goto retry_flush_dents; } @@ -876,10 +874,9 @@ static int block_operations(struct f2fs_sb_info *sbi) if (get_pages(sbi, F2FS_DIRTY_NODES)) { up_write(&sbi->node_write); - sync_node_pages(sbi, 0, &wbc); - if (unlikely(f2fs_cp_error(sbi))) { + err = sync_node_pages(sbi, 0, &wbc); + if (err) { f2fs_unlock_all(sbi); - err = -EIO; goto out; } goto retry_flush_nodes; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f2a023edfc1d2..5c43b2d606ec7 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -498,7 +498,7 @@ static int __allocate_data_block(struct dnode_of_data *dn) return 0; } -static void __allocate_data_blocks(struct inode *inode, loff_t offset, +static int __allocate_data_blocks(struct inode *inode, loff_t offset, size_t count) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -507,13 +507,15 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset, u64 len = F2FS_BYTES_TO_BLK(count); bool allocated; u64 end_offset; + int err = 0; while (len) { f2fs_lock_op(sbi); /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); - if (get_dnode_of_data(&dn, start, ALLOC_NODE)) + err = get_dnode_of_data(&dn, start, ALLOC_NODE); + if (err) goto out; allocated = false; @@ -522,12 +524,15 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset, while (dn.ofs_in_node < end_offset && len) { block_t blkaddr; - if (unlikely(f2fs_cp_error(sbi))) + if (unlikely(f2fs_cp_error(sbi))) { + err = -EIO; goto sync_out; + } blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) { - if (__allocate_data_block(&dn)) + err = __allocate_data_block(&dn); + if (err) goto sync_out; allocated = true; } @@ -545,7 +550,7 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset, if (dn.node_changed) f2fs_balance_fs(sbi); } - return; + return err; sync_out: if (allocated) @@ -555,7 +560,7 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset, f2fs_unlock_op(sbi); if (dn.node_changed) f2fs_balance_fs(sbi); - return; + return err; } /* @@ -1653,11 +1658,9 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); if (iov_iter_rw(iter) == WRITE) { - __allocate_data_blocks(inode, offset, count); - if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { - err = -EIO; + err = __allocate_data_blocks(inode, offset, count); + if (err) goto out; - } } err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 79345e7ce6bb8..3406e9966064b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1837,7 +1837,7 @@ int get_valid_checkpoint(struct f2fs_sb_info *); void update_dirty_page(struct inode *, struct page *); void add_dirty_dir_inode(struct inode *); void remove_dirty_inode(struct inode *); -void sync_dirty_inodes(struct f2fs_sb_info *, enum inode_type); +int sync_dirty_inodes(struct f2fs_sb_info *, enum inode_type); int write_checkpoint(struct f2fs_sb_info *, struct cp_control *); void init_ino_entry_info(struct f2fs_sb_info *); int __init create_checkpoint_caches(void); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 780db8bd2451e..2d87a3cf6768f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -259,8 +259,10 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) sync_node_pages(sbi, ino, &wbc); /* if cp_error was enabled, we should avoid infinite loop */ - if (unlikely(f2fs_cp_error(sbi))) + if (unlikely(f2fs_cp_error(sbi))) { + ret = -EIO; goto out; + } if (need_inode_block_update(sbi, ino)) { mark_inode_dirty_sync(inode); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ce350c44b5cf4..c09be339569cf 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -832,8 +832,10 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync) if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) goto stop; - if (unlikely(f2fs_cp_error(sbi))) + if (unlikely(f2fs_cp_error(sbi))) { + ret = -EIO; goto stop; + } if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed)) { gc_type = FG_GC; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 929265d20c327..94d9753f8c532 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1189,6 +1189,11 @@ int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; + if (unlikely(f2fs_cp_error(sbi))) { + pagevec_release(&pvec); + return -EIO; + } + /* * flushing sequence with step: * 0. indirect nodes From 9a950d52b7f0e1c64c2cc70d350562fb18c8b451 Mon Sep 17 00:00:00 2001 From: Fan Li Date: Sat, 26 Dec 2015 18:07:41 +0800 Subject: [PATCH 56/87] f2fs: fix bugs and simplify codes of f2fs_fiemap fix bugs: 1. len could be updated incorrectly when start+len is beyond isize. 2. If there is a hole consisting of more than two blocks, it could fail to add FIEMAP_EXTENT_LAST flag for the last extent. 3. If there is an extent beyond isize, when we search extents in a range that ends at isize, it will also return the extent beyond isize, which is outside the range. Signed-off-by: Fan li Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 80 +++++++++++++++++--------------------------------- 1 file changed, 27 insertions(+), 53 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 5c43b2d606ec7..d67c599510d9b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -783,7 +783,6 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, loff_t isize = i_size_read(inode); u64 logical = 0, phys = 0, size = 0; u32 flags = 0; - bool past_eof = false, whole_file = false; int ret = 0; ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC); @@ -797,17 +796,18 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, } mutex_lock(&inode->i_mutex); + if (start >= isize) + goto out; - if (len >= isize) { - whole_file = true; - len = isize; - } + if (start + len > isize) + len = isize - start; if (logical_to_blk(inode, len) == 0) len = blk_to_logical(inode, 1); start_blk = logical_to_blk(inode, start); last_blk = logical_to_blk(inode, start + len - 1); + next: memset(&map_bh, 0, sizeof(struct buffer_head)); map_bh.b_size = len; @@ -819,59 +819,33 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, /* HOLE */ if (!buffer_mapped(&map_bh)) { - start_blk++; - - if (!past_eof && blk_to_logical(inode, start_blk) >= isize) - past_eof = 1; - - if (past_eof && size) { - flags |= FIEMAP_EXTENT_LAST; - ret = fiemap_fill_next_extent(fieinfo, logical, - phys, size, flags); - } else if (size) { - ret = fiemap_fill_next_extent(fieinfo, logical, - phys, size, flags); - size = 0; - } + /* Go through holes util pass the EOF */ + if (blk_to_logical(inode, start_blk++) < isize) + goto prep_next; + /* Found a hole beyond isize means no more extents. + * Note that the premise is that filesystems don't + * punch holes beyond isize and keep size unchanged. + */ + flags |= FIEMAP_EXTENT_LAST; + } - /* if we have holes up to/past EOF then we're done */ - if (start_blk > last_blk || past_eof || ret) - goto out; - } else { - if (start_blk > last_blk && !whole_file) { - ret = fiemap_fill_next_extent(fieinfo, logical, - phys, size, flags); - goto out; - } + if (size) + ret = fiemap_fill_next_extent(fieinfo, logical, + phys, size, flags); - /* - * if size != 0 then we know we already have an extent - * to add, so add it. - */ - if (size) { - ret = fiemap_fill_next_extent(fieinfo, logical, - phys, size, flags); - if (ret) - goto out; - } + if (start_blk > last_blk || ret) + goto out; - logical = blk_to_logical(inode, start_blk); - phys = blk_to_logical(inode, map_bh.b_blocknr); - size = map_bh.b_size; - flags = 0; - if (buffer_unwritten(&map_bh)) - flags = FIEMAP_EXTENT_UNWRITTEN; + logical = blk_to_logical(inode, start_blk); + phys = blk_to_logical(inode, map_bh.b_blocknr); + size = map_bh.b_size; + flags = 0; + if (buffer_unwritten(&map_bh)) + flags = FIEMAP_EXTENT_UNWRITTEN; - start_blk += logical_to_blk(inode, size); + start_blk += logical_to_blk(inode, size); - /* - * If we are past the EOF, then we need to make sure as - * soon as we find a hole that the last extent we found - * is marked with FIEMAP_EXTENT_LAST - */ - if (!past_eof && logical + size >= isize) - past_eof = true; - } +prep_next: cond_resched(); if (fatal_signal_pending(current)) ret = -EINTR; From 179448bfe4cd201e98e728391c6b01b25c849fe8 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Mon, 28 Dec 2015 21:48:32 +0800 Subject: [PATCH 57/87] f2fs: add a max block check for get_data_block_bmap This patch adds a max block check for get_data_block_bmap. Trinity test program will send a block number as parameter into ioctl_fibmap, which will be used in get_node_path(), when the block number large than f2fs max blocks, it will trigger kernel bug. Signed-off-by: Yunlei He Signed-off-by: Xue Liu [Jaegeuk Kim: fix missing condition, pointed by Chao Yu] Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++++ fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d67c599510d9b..6fbfc70ac8a03 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -761,6 +761,10 @@ static int get_data_block_dio(struct inode *inode, sector_t iblock, static int get_data_block_bmap(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { + /* Block number less than F2FS MAX BLOCKS */ + if (unlikely(iblock >= max_file_size(0))) + return -EFBIG; + return __get_data_block(inode, iblock, bh_result, create, F2FS_GET_BLOCK_BMAP); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3406e9966064b..e04b2be6cd64d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1726,6 +1726,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) * super.c */ int f2fs_commit_super(struct f2fs_sb_info *, bool); +loff_t max_file_size(unsigned bits); int f2fs_sync_fs(struct super_block *, int); extern __printf(3, 4) void f2fs_msg(struct super_block *, const char *, const char *, ...); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 75704d9caae2c..a2e3a8f893edd 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -907,7 +907,7 @@ static const struct export_operations f2fs_export_ops = { .get_parent = f2fs_get_parent, }; -static loff_t max_file_size(unsigned bits) +loff_t max_file_size(unsigned bits) { loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS); loff_t leaf_count = ADDRS_PER_BLOCK; From e96248bb45d42375b23e1c083ec5a55151503e82 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 24 Dec 2015 18:11:32 +0800 Subject: [PATCH 58/87] f2fs: clean up f2fs_ioc_write_checkpoint Use f2fs_sync_fs to clean up codes in f2fs_ioc_write_checkpoint. Signed-off-by: Chao Yu [Jaegeuk Kim: remove unused err variable] Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 2d87a3cf6768f..91f576a7903cf 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1619,8 +1619,6 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct cp_control cpc; - int err; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -1628,13 +1626,7 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) if (f2fs_readonly(sbi->sb)) return -EROFS; - cpc.reason = __get_cp_reason(sbi); - - mutex_lock(&sbi->gc_mutex); - err = write_checkpoint(sbi, &cpc); - mutex_unlock(&sbi->gc_mutex); - - return err; + return f2fs_sync_fs(sbi->sb, 1); } static int f2fs_defragment_range(struct f2fs_sb_info *sbi, From 8dc0d6a11e7d985dd466ce0a8c71eaea50dd7cc6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 24 Dec 2015 16:13:09 -0800 Subject: [PATCH 59/87] f2fs: early check broken symlink length in the encrypted case If link is broken, its len is zero, and we don't need to move forward. Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index fb41c80826964..6c4a94310b541 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -931,7 +931,7 @@ static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cook { struct page *cpage = NULL; char *caddr, *paddr = NULL; - struct f2fs_str cstr; + struct f2fs_str cstr = FSTR_INIT(NULL, 0); struct f2fs_str pstr = FSTR_INIT(NULL, 0); struct inode *inode = d_inode(dentry); struct f2fs_encrypted_symlink_data *sd; @@ -952,6 +952,12 @@ static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cook /* Symlink is encrypted */ sd = (struct f2fs_encrypted_symlink_data *)caddr; cstr.len = le16_to_cpu(sd->len); + + /* this is broken symlink case */ + if (unlikely(cstr.len == 0)) { + res = -ENOENT; + goto errout; + } cstr.name = kmalloc(cstr.len, GFP_NOFS); if (!cstr.name) { res = -ENOMEM; @@ -960,7 +966,7 @@ static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cook memcpy(cstr.name, sd->encrypted_path, cstr.len); /* this is broken symlink case */ - if (cstr.name[0] == 0 && cstr.len == 0) { + if (unlikely(cstr.name[0] == 0)) { res = -ENOENT; goto errout; } From 819d9153d4c87329910a4cb01198610cd24ec62d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 28 Dec 2015 13:48:11 -0800 Subject: [PATCH 60/87] f2fs: use i_size_read to get i_size We need to use i_size_read() to get inode->i_size. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6fbfc70ac8a03..14b40a9db5b39 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1386,10 +1386,11 @@ static int f2fs_write_data_pages(struct address_space *mapping, static void f2fs_write_failed(struct address_space *mapping, loff_t to) { struct inode *inode = mapping->host; + loff_t i_size = i_size_read(inode); - if (to > inode->i_size) { - truncate_pagecache(inode, inode->i_size); - truncate_blocks(inode, inode->i_size, true); + if (to > i_size) { + truncate_pagecache(inode, i_size); + truncate_blocks(inode, i_size, true); } } From ed3d12561a731b99b58c6c95151291cebf0b3feb Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 28 Dec 2015 11:39:06 -0800 Subject: [PATCH 61/87] f2fs: load largest extent all the time Otherwise, we can get mismatched largest extent information. One example is: 1. mount f2fs w/ extent_cache 2. make a small extent 3. umount 4. mount f2fs w/o extent_cache 5. update the largest extent 6. umount 7. mount f2fs w/ extent_cache 8. get the old extent made by #2 Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 18 +++++++++++++----- fs/f2fs/f2fs.h | 2 +- fs/f2fs/inode.c | 3 ++- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 5305a29f91a33..b37184f720e89 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -166,20 +166,27 @@ static void __drop_largest_extent(struct inode *inode, largest->len = 0; } -void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) +/* return true, if inode page is changed */ +bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree *et; struct extent_node *en; struct extent_info ei; - if (!f2fs_may_extent_tree(inode)) - return; + if (!f2fs_may_extent_tree(inode)) { + /* drop largest extent */ + if (i_ext && i_ext->len) { + i_ext->len = 0; + return true; + } + return false; + } et = __grab_extent_tree(inode); - if (!i_ext || le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN) - return; + if (!i_ext || !i_ext->len) + return false; set_extent_info(&ei, le32_to_cpu(i_ext->fofs), le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len)); @@ -196,6 +203,7 @@ void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) } out: write_unlock(&et->lock); + return false; } static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e04b2be6cd64d..a3395088e0f0e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2083,7 +2083,7 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *); * extent_cache.c */ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *, int); -void f2fs_init_extent_tree(struct inode *, struct f2fs_extent *); +bool f2fs_init_extent_tree(struct inode *, struct f2fs_extent *); unsigned int f2fs_destroy_extent_node(struct inode *); void f2fs_destroy_extent_tree(struct inode *); bool f2fs_lookup_extent_cache(struct inode *, pgoff_t, struct extent_info *); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index ec3fb32c47262..e95500802daa6 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -138,7 +138,8 @@ static int do_read_inode(struct inode *inode) fi->i_pino = le32_to_cpu(ri->i_pino); fi->i_dir_level = ri->i_dir_level; - f2fs_init_extent_tree(inode, &ri->i_ext); + if (f2fs_init_extent_tree(inode, &ri->i_ext)) + set_page_dirty(node_page); get_inline_info(fi, ri); From 4e0d836d5fb26d2cdbb75b0d16d98bef6b798490 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 30 Dec 2015 17:40:31 +0800 Subject: [PATCH 62/87] f2fs: fix to skip recovering dot dentries in a readonly fs If filesystem is readonly, leave user message info instead of recovering inline dot inode. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 6c4a94310b541..a629af5cb0ce6 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -214,6 +214,13 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) struct page *page; int err = 0; + if (f2fs_readonly(sbi->sb)) { + f2fs_msg(sbi->sb, KERN_INFO, + "skip recovering inline_dots inode (ino:%lu, pino:%u) " + "in readonly mountpoint", dir->i_ino, pino); + return 0; + } + f2fs_balance_fs(sbi); f2fs_lock_op(sbi); From 732d56489f21c04f7bf60c675f7d152c9239a09c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 29 Dec 2015 15:46:33 -0800 Subject: [PATCH 63/87] f2fs: fix f2fs_ioc_abort_volatile_write There are two rules to handle aborting volatile or atomic writes. 1. drop atomic writes - we don't need to keep any stale db data. 2. write journal data - we should keep the journal data with fsync for db recovery. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 91f576a7903cf..b04ab40ddc737 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1433,9 +1433,14 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp) if (ret) return ret; - clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); - clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); - commit_inmem_pages(inode, true); + if (f2fs_is_atomic_file(inode)) { + clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); + commit_inmem_pages(inode, true); + } + if (f2fs_is_volatile_file(inode)) { + clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); + ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0); + } mnt_drop_write_file(filp); return ret; From 1f6fa26199bb164157fbf81f850df1991d10c959 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 31 Dec 2015 10:28:52 -0800 Subject: [PATCH 64/87] f2fs: remove f2fs_bug_on in terms of max_depth There is no report on this bug_on case, but if malicious attacker changed this field intentionally, we can just reset it as a MAX value. Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 3da58265c0d40..29bb8dd76a46e 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -172,8 +172,6 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, namehash = f2fs_dentry_hash(&name); - f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH); - nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); nblock = bucket_blocks(level); @@ -238,6 +236,14 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, goto out; max_depth = F2FS_I(dir)->i_current_depth; + if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) { + f2fs_msg(F2FS_I_SB(dir)->sb, KERN_WARNING, + "Corrupted max_depth of %lu: %u", + dir->i_ino, max_depth); + max_depth = MAX_DIR_HASH_DEPTH; + F2FS_I(dir)->i_current_depth = max_depth; + mark_inode_dirty(dir); + } for (level = 0; level < max_depth; level++) { de = find_in_level(dir, level, &fname, res_page); From 8d4ea29b6426470456ee9daee64bac55a3b13289 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 31 Dec 2015 13:08:02 -0800 Subject: [PATCH 65/87] f2fs: write pending bios when cp_error is set When testing ioc_shutdown, put_super is able to be hanged by waiting for writebacking pages as follows. INFO: task umount:2723 blocked for more than 120 seconds. Tainted: G O 4.4.0-rc3+ #8 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. umount D ffff88000859f9d8 0 2723 2110 0x00000000 ffff88000859f9d8 0000000000000000 0000000000000000 ffffffff81e11540 ffff880078c225c0 ffff8800085a0000 ffff88007fc17440 7fffffffffffffff ffffffff818239f0 ffff88000859fb48 ffff88000859f9f0 ffffffff8182310c Call Trace: [] ? bit_wait+0x50/0x50 [] schedule+0x3c/0x90 [] schedule_timeout+0x2d9/0x430 [] ? mark_held_locks+0x6f/0xa0 [] ? ktime_get+0x7d/0x140 [] ? bit_wait+0x50/0x50 [] ? kvm_clock_get_cycles+0x25/0x30 [] ? ktime_get+0xac/0x140 [] ? bit_wait+0x50/0x50 [] io_schedule_timeout+0xa4/0x110 [] bit_wait_io+0x35/0x50 [] __wait_on_bit+0x5d/0x90 [] wait_on_page_bit+0xcb/0xf0 [] ? autoremove_wake_function+0x40/0x40 [] truncate_inode_pages_range+0x4bc/0x840 [] truncate_inode_pages_final+0x4d/0x60 [] f2fs_evict_inode+0x75/0x400 [f2fs] [] evict+0xbc/0x190 [] iput+0x229/0x2c0 [] f2fs_put_super+0x105/0x1a0 [f2fs] [] generic_shutdown_super+0x6a/0xf0 [] kill_block_super+0x27/0x70 [] kill_f2fs_super+0x20/0x30 [f2fs] [] deactivate_locked_super+0x43/0x70 [] deactivate_super+0x5c/0x60 [] cleanup_mnt+0x3f/0x90 [] __cleanup_mnt+0x12/0x20 [] task_work_run+0x73/0xa0 [] exit_to_usermode_loop+0xcc/0xd0 [] syscall_return_slowpath+0xcc/0xe0 [] int_ret_from_sys_call+0x25/0x9f Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/data.c | 2 +- fs/f2fs/node.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 6b89ac69b7e47..5dbafd5e83d9e 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -237,7 +237,7 @@ static int f2fs_write_meta_page(struct page *page, dec_page_count(sbi, F2FS_DIRTY_META); unlock_page(page); - if (wbc->for_reclaim) + if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi))) f2fs_submit_merged_bio(sbi, META, WRITE); return 0; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 14b40a9db5b39..4851e84d02837 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1179,7 +1179,7 @@ static int f2fs_write_data_page(struct page *page, unlock_page(page); if (need_balance_fs) f2fs_balance_fs(sbi); - if (wbc->for_reclaim) { + if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi))) { f2fs_submit_merged_bio(sbi, DATA, WRITE); remove_dirty_inode(inode); } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 94d9753f8c532..669c44ef93034 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1368,7 +1368,7 @@ static int f2fs_write_node_page(struct page *page, up_read(&sbi->node_write); unlock_page(page); - if (wbc->for_reclaim) + if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi))) f2fs_submit_merged_bio(sbi, NODE, WRITE); return 0; From c46a155bdf3c8877719aa63d1bf1d6e79e2a9764 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 31 Dec 2015 13:49:17 -0800 Subject: [PATCH 66/87] f2fs: use IPU for fdatasync This patch fixes missing IPU condition when fdatasync is called. With this patch, fdatasync is able to avoid additional node writes for recovery. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index b04ab40ddc737..e3d32f6b4b4f5 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -202,7 +202,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) trace_f2fs_sync_file_enter(inode); /* if fdatasync is triggered, let's do in-place-update */ - if (get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks) + if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks) set_inode_flag(fi, FI_NEED_IPU); ret = filemap_write_and_wait_range(inode->i_mapping, start, end); clear_inode_flag(fi, FI_NEED_IPU); From c00ba5548500a6f5dfd3c0e0300b338b584018ba Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 31 Dec 2015 15:24:14 -0800 Subject: [PATCH 67/87] f2fs: monitor zombie_tree count This patch adds an entry to show the number of zombie extent_tree. Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 5 +++-- fs/f2fs/f2fs.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index ed5dfcc8886f0..b73e8e133c8b7 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -39,6 +39,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree; si->total_ext = atomic64_read(&sbi->total_hit_ext); si->ext_tree = atomic_read(&sbi->total_ext_tree); + si->zombie_tree = atomic_read(&sbi->total_zombie_tree); si->ext_node = atomic_read(&sbi->total_ext_node); si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); @@ -292,8 +293,8 @@ static int stat_show(struct seq_file *s, void *v) !si->total_ext ? 0 : div64_u64(si->hit_total * 100, si->total_ext), si->hit_total, si->total_ext); - seq_printf(s, " - Inner Struct Count: tree: %d, node: %d\n", - si->ext_tree, si->ext_node); + seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", + si->ext_tree, si->zombie_tree, si->ext_node); seq_puts(s, "\nBalancing F2FS Async:\n"); seq_printf(s, " - inmem: %4d, wb: %4d\n", si->inmem_pages, si->wb_pages); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a3395088e0f0e..d81bf5a437147 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1890,7 +1890,7 @@ struct f2fs_stat_info { int main_area_segs, main_area_sections, main_area_zones; unsigned long long hit_largest, hit_cached, hit_rbtree; unsigned long long hit_total, total_ext; - int ext_tree, ext_node; + int ext_tree, zombie_tree, ext_node; int ndirty_node, ndirty_meta; int ndirty_dent, ndirty_dirs, ndirty_data, ndirty_files; int nats, dirty_nats, sits, dirty_sits, fnids; From 137d09f002df7d4e52513d75f8910945a6c1bb08 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 31 Dec 2015 15:02:16 -0800 Subject: [PATCH 68/87] f2fs: introduce zombie list for fast shrinking extent trees This patch removes refcount, and instead, adds zombie_list to shrink directly without radix tree traverse. Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 49 ++++++++++++++++++------------------------ fs/f2fs/f2fs.h | 3 ++- 2 files changed, 23 insertions(+), 29 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index b37184f720e89..4dee2be9a648c 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -68,13 +68,13 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode) et->root = RB_ROOT; et->cached_en = NULL; rwlock_init(&et->lock); - atomic_set(&et->refcount, 0); + INIT_LIST_HEAD(&et->list); et->count = 0; atomic_inc(&sbi->total_ext_tree); } else { atomic_dec(&sbi->total_zombie_tree); + list_del_init(&et->list); } - atomic_inc(&et->refcount); up_write(&sbi->extent_tree_lock); /* never died until evict_inode */ @@ -551,9 +551,9 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode, unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) { struct extent_tree *treevec[EXT_TREE_VEC_SIZE]; + struct extent_tree *et, *next; struct extent_node *en, *tmp; unsigned long ino = F2FS_ROOT_INO(sbi); - struct radix_tree_root *root = &sbi->extent_tree_root; unsigned int found; unsigned int node_cnt = 0, tree_cnt = 0; int remained; @@ -569,29 +569,20 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) goto out; /* 1. remove unreferenced extent tree */ - while ((found = radix_tree_gang_lookup(root, - (void **)treevec, ino, EXT_TREE_VEC_SIZE))) { - unsigned i; - - ino = treevec[found - 1]->ino + 1; - for (i = 0; i < found; i++) { - struct extent_tree *et = treevec[i]; - - if (!atomic_read(&et->refcount)) { - write_lock(&et->lock); - node_cnt += __free_extent_tree(sbi, et, true); - write_unlock(&et->lock); + list_for_each_entry_safe(et, next, &sbi->zombie_list, list) { + write_lock(&et->lock); + node_cnt += __free_extent_tree(sbi, et, true); + write_unlock(&et->lock); - radix_tree_delete(root, et->ino); - kmem_cache_free(extent_tree_slab, et); - atomic_dec(&sbi->total_ext_tree); - atomic_dec(&sbi->total_zombie_tree); - tree_cnt++; + list_del_init(&et->list); + radix_tree_delete(&sbi->extent_tree_root, et->ino); + kmem_cache_free(extent_tree_slab, et); + atomic_dec(&sbi->total_ext_tree); + atomic_dec(&sbi->total_zombie_tree); + tree_cnt++; - if (node_cnt + tree_cnt >= nr_shrink) - goto unlock_out; - } - } + if (node_cnt + tree_cnt >= nr_shrink) + goto unlock_out; } up_write(&sbi->extent_tree_lock); @@ -619,7 +610,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) */ ino = F2FS_ROOT_INO(sbi); - while ((found = radix_tree_gang_lookup(root, + while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root, (void **)treevec, ino, EXT_TREE_VEC_SIZE))) { unsigned i; @@ -670,8 +661,10 @@ void f2fs_destroy_extent_tree(struct inode *inode) return; if (inode->i_nlink && !is_bad_inode(inode) && et->count) { - atomic_dec(&et->refcount); + down_write(&sbi->extent_tree_lock); + list_add_tail(&et->list, &sbi->zombie_list); atomic_inc(&sbi->total_zombie_tree); + up_write(&sbi->extent_tree_lock); return; } @@ -680,8 +673,7 @@ void f2fs_destroy_extent_tree(struct inode *inode) /* delete extent tree entry in radix tree */ down_write(&sbi->extent_tree_lock); - atomic_dec(&et->refcount); - f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count); + f2fs_bug_on(sbi, et->count); radix_tree_delete(&sbi->extent_tree_root, inode->i_ino); kmem_cache_free(extent_tree_slab, et); atomic_dec(&sbi->total_ext_tree); @@ -737,6 +729,7 @@ void init_extent_cache_info(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&sbi->extent_list); spin_lock_init(&sbi->extent_lock); atomic_set(&sbi->total_ext_tree, 0); + INIT_LIST_HEAD(&sbi->zombie_list); atomic_set(&sbi->total_zombie_tree, 0); atomic_set(&sbi->total_ext_node, 0); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d81bf5a437147..e2990c9786614 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -359,8 +359,8 @@ struct extent_tree { struct rb_root root; /* root of extent info rb-tree */ struct extent_node *cached_en; /* recently accessed extent node */ struct extent_info largest; /* largested extent info */ + struct list_head list; /* to be used by sbi->zombie_list */ rwlock_t lock; /* protect extent info rb-tree */ - atomic_t refcount; /* reference count of rb-tree */ unsigned int count; /* # of extent node in rb-tree*/ }; @@ -764,6 +764,7 @@ struct f2fs_sb_info { struct list_head extent_list; /* lru list for shrinker */ spinlock_t extent_lock; /* locking extent lru list */ atomic_t total_ext_tree; /* extent tree count */ + struct list_head zombie_list; /* extent zombie tree list */ atomic_t total_zombie_tree; /* extent zombie tree count */ atomic_t total_ext_node; /* extent info count */ From 3a9e6433a367211a172cb7b4d5b727c720bd0de0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 31 Dec 2015 18:20:10 +0800 Subject: [PATCH 69/87] f2fs crypto: check CONFIG_F2FS_FS_XATTR for encrypted symlink Add missed CONFIG_F2FS_FS_XATTR for encrypted symlink inode in order to avoid unneeded registry of ->{get,set,remove}xattr. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index a629af5cb0ce6..0d61a6864ab10 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -1016,10 +1016,12 @@ const struct inode_operations f2fs_encrypted_symlink_inode_operations = { .put_link = kfree_put_link, .getattr = f2fs_getattr, .setattr = f2fs_setattr, +#ifdef CONFIG_F2FS_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, .listxattr = f2fs_listxattr, .removexattr = generic_removexattr, +#endif }; #endif From e0afc4d6d0d3e7e5a99f691bc64ae7c74bea790e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 31 Dec 2015 14:35:37 +0800 Subject: [PATCH 70/87] f2fs: introduce max_file_blocks in sbi Introduce max_file_blocks in sbi to store max block index of file in f2fs, it could be used to avoid unneeded calculation of max block index in runtime. Signed-off-by: Chao Yu [Jaegeuk Kim: fix overflow of sbi->max_file_blocks] Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 2 +- fs/f2fs/super.c | 7 ++++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 4851e84d02837..89a978c57da92 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -762,7 +762,7 @@ static int get_data_block_bmap(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { /* Block number less than F2FS MAX BLOCKS */ - if (unlikely(iblock >= max_file_size(0))) + if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks)) return -EFBIG; return __get_data_block(inode, iblock, bh_result, create, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e2990c9786614..882babaa678e1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -783,6 +783,7 @@ struct f2fs_sb_info { unsigned int total_node_count; /* total node block count */ unsigned int total_valid_node_count; /* valid node block count */ unsigned int total_valid_inode_count; /* valid inode count */ + loff_t max_file_blocks; /* max block index of file */ int active_logs; /* # of active logs */ int dir_level; /* directory level */ @@ -1727,7 +1728,6 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) * super.c */ int f2fs_commit_super(struct f2fs_sb_info *, bool); -loff_t max_file_size(unsigned bits); int f2fs_sync_fs(struct super_block *, int); extern __printf(3, 4) void f2fs_msg(struct super_block *, const char *, const char *, ...); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a2e3a8f893edd..0bbd756821a7d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -907,7 +907,7 @@ static const struct export_operations f2fs_export_ops = { .get_parent = f2fs_get_parent, }; -loff_t max_file_size(unsigned bits) +static loff_t max_file_blocks(void) { loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS); loff_t leaf_count = ADDRS_PER_BLOCK; @@ -923,7 +923,6 @@ loff_t max_file_size(unsigned bits) leaf_count *= NIDS_PER_BLOCK; result += leaf_count; - result <<= bits; return result; } @@ -1278,7 +1277,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto free_options; - sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize)); + sbi->max_file_blocks = max_file_blocks(); + sb->s_maxbytes = sbi->max_file_blocks << + le32_to_cpu(raw_super->log_blocksize); sb->s_max_links = F2FS_LINK_MAX; get_random_bytes(&sbi->s_next_generation, sizeof(u32)); From a51311938e14c17f5a94d30baac9d7bec71f5858 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 2 Jan 2016 09:19:41 -0800 Subject: [PATCH 71/87] f2fs: cover more area with nat_tree_lock There was a subtle bug on nat cache management which incurs wrong nid allocation or wrong block addresses when try_to_free_nats is triggered heavily. This patch enlarges the previous coverage of nat_tree_lock to avoid data race. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 669c44ef93034..4dab09f141b79 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -262,13 +262,11 @@ static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid, { struct nat_entry *e; - down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (!e) { e = grab_nat_entry(nm_i, nid); node_info_from_raw_nat(&e->ni, ne); } - up_write(&nm_i->nat_tree_lock); } static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, @@ -380,6 +378,8 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) memset(&ne, 0, sizeof(struct f2fs_nat_entry)); + down_write(&nm_i->nat_tree_lock); + /* Check current segment summary */ mutex_lock(&curseg->curseg_mutex); i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0); @@ -400,6 +400,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) cache: /* cache nat entry */ cache_nat_entry(NM_I(sbi), nid, &ne); + up_write(&nm_i->nat_tree_lock); } /* @@ -1459,13 +1460,10 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) if (build) { /* do not add allocated nids */ - down_read(&nm_i->nat_tree_lock); ne = __lookup_nat_cache(nm_i, nid); - if (ne && - (!get_nat_flag(ne, IS_CHECKPOINTED) || + if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || nat_get_blkaddr(ne) != NULL_ADDR)) allocated = true; - up_read(&nm_i->nat_tree_lock); if (allocated) return 0; } @@ -1551,6 +1549,8 @@ static void build_free_nids(struct f2fs_sb_info *sbi) ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT, true); + down_read(&nm_i->nat_tree_lock); + while (1) { struct page *page = get_current_nat_page(sbi, nid); @@ -1579,6 +1579,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi) remove_free_nid(nm_i, nid); } mutex_unlock(&curseg->curseg_mutex); + up_read(&nm_i->nat_tree_lock); ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), nm_i->ra_nid_pages, META_NAT, false); @@ -1861,14 +1862,12 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) raw_ne = nat_in_journal(sum, i); - down_write(&nm_i->nat_tree_lock); ne = __lookup_nat_cache(nm_i, nid); if (!ne) { ne = grab_nat_entry(nm_i, nid); node_info_from_raw_nat(&ne->ni, &raw_ne); } __set_nat_cache_dirty(nm_i, ne); - up_write(&nm_i->nat_tree_lock); } update_nats_in_cursum(sum, -i); mutex_unlock(&curseg->curseg_mutex); @@ -1902,7 +1901,6 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, struct f2fs_nat_block *nat_blk; struct nat_entry *ne, *cur; struct page *page = NULL; - struct f2fs_nm_info *nm_i = NM_I(sbi); /* * there are two steps to flush nat entries: @@ -1939,12 +1937,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, raw_ne = &nat_blk->entries[nid - start_nid]; } raw_nat_from_node_info(raw_ne, &ne->ni); - - down_write(&NM_I(sbi)->nat_tree_lock); nat_reset_flag(ne); __clear_nat_cache_dirty(NM_I(sbi), ne); - up_write(&NM_I(sbi)->nat_tree_lock); - if (nat_get_blkaddr(ne) == NULL_ADDR) add_free_nid(sbi, nid, false); } @@ -1956,9 +1950,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, set->entry_cnt); - down_write(&nm_i->nat_tree_lock); radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); - up_write(&nm_i->nat_tree_lock); kmem_cache_free(nat_entry_set_slab, set); } @@ -1978,6 +1970,9 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) if (!nm_i->dirty_nat_cnt) return; + + down_write(&nm_i->nat_tree_lock); + /* * if there are no enough space in journal to store dirty nat * entries, remove all entries from journal and merge them @@ -1986,7 +1981,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) remove_nats_in_journal(sbi); - down_write(&nm_i->nat_tree_lock); while ((found = __gang_lookup_nat_set(nm_i, set_idx, SETVEC_SIZE, setvec))) { unsigned idx; @@ -1995,12 +1989,13 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) __adjust_nat_entry_set(setvec[idx], &sets, MAX_NAT_JENTRIES(sum)); } - up_write(&nm_i->nat_tree_lock); /* flush dirty nats in nat entry set */ list_for_each_entry_safe(set, tmp, &sets, set_list) __flush_nat_entry_set(sbi, set); + up_write(&nm_i->nat_tree_lock); + f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); } From 957efb0c2144cc5ff1795f43bf2d2ca430eaa227 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 2 Jan 2016 09:23:27 -0800 Subject: [PATCH 72/87] Revert "f2fs: check the node block address of newly allocated nid" Original issue is fixed by: f2fs: cover more area with nat_tree_lock This reverts commit 24928634f81b1592e83b37dcd89ed45c28f12feb. --- fs/f2fs/node.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 4dab09f141b79..6d5f548d20909 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1602,8 +1602,6 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) /* We should not use stale free nids created by build_free_nids */ if (nm_i->fcnt && !on_build_free_nids(nm_i)) { - struct node_info ni; - f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list)); list_for_each_entry(i, &nm_i->free_nid_list, list) if (i->state == NID_NEW) @@ -1614,13 +1612,6 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) i->state = NID_ALLOC; nm_i->fcnt--; spin_unlock(&nm_i->free_nid_list_lock); - - /* check nid is allocated already */ - get_node_info(sbi, *nid, &ni); - if (ni.blk_addr != NULL_ADDR) { - alloc_nid_done(sbi, *nid); - goto retry; - } return true; } spin_unlock(&nm_i->free_nid_list_lock); From de1475cc53b2d6442443dcf5d66ed0fc50ed3c7e Mon Sep 17 00:00:00 2001 From: Fan Li Date: Mon, 4 Jan 2016 15:56:50 +0800 Subject: [PATCH 73/87] f2fs: read isize while holding i_mutex in fiemap make sure the isize we read doesn't change during the process. Signed-off-by: Fan li Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 89a978c57da92..ac5bea0f5f09a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -784,7 +784,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, { struct buffer_head map_bh; sector_t start_blk, last_blk; - loff_t isize = i_size_read(inode); + loff_t isize; u64 logical = 0, phys = 0, size = 0; u32 flags = 0; int ret = 0; @@ -800,6 +800,8 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, } mutex_lock(&inode->i_mutex); + + isize = i_size_read(inode); if (start >= isize) goto out; From e84587250ab7e38b7d85e93a8c317e065e5c0a1f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 8 Jan 2016 20:13:37 +0800 Subject: [PATCH 74/87] f2fs: check node id earily when readaheading node page Add node id check in ra_node_page and get_node_page_ra like get_node_page. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 6d5f548d20909..c1ddf3d88dd99 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1041,6 +1041,10 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) struct page *apage; int err; + if (!nid) + return; + f2fs_bug_on(sbi, check_nid_range(sbi, nid)); + apage = find_get_page(NODE_MAPPING(sbi), nid); if (apage && PageUptodate(apage)) { f2fs_put_page(apage, 0); @@ -1108,6 +1112,7 @@ struct page *get_node_page_ra(struct page *parent, int start) nid = get_nid(parent, start, false); if (!nid) return ERR_PTR(-ENOENT); + f2fs_bug_on(sbi, check_nid_range(sbi, nid)); repeat: page = grab_cache_page(NODE_MAPPING(sbi), nid); if (!page) @@ -1127,9 +1132,9 @@ struct page *get_node_page_ra(struct page *parent, int start) end = start + MAX_RA_NODE; end = min(end, NIDS_PER_BLOCK); for (i = start + 1; i < end; i++) { - nid_t tnid = get_nid(parent, i, false); - if (!tnid) - continue; + nid_t tnid; + + tnid = get_nid(parent, i, false); ra_node_page(sbi, tnid); } From 0e022ea8fc49ed9c72ab9dcd9ca96414dc026184 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 5 Jan 2016 16:52:46 +0800 Subject: [PATCH 75/87] f2fs: introduce __get_node_page to reuse common code There are duplicated code in between get_node_page and get_node_page_ra, introduce __get_node_page to includes common parts of these two, and export get_node_page and get_node_page_ra by reusing __get_node_page. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 88 ++++++++++++++++++++------------------------------ 1 file changed, 35 insertions(+), 53 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index c1ddf3d88dd99..5a2d800f4abca 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1060,56 +1060,35 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) f2fs_put_page(apage, err ? 1 : 0); } -struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) +/* + * readahead MAX_RA_NODE number of node pages. + */ +void ra_node_pages(struct page *parent, int start) { - struct page *page; - int err; + struct f2fs_sb_info *sbi = F2FS_P_SB(parent); + struct blk_plug plug; + int i, end; + nid_t nid; - if (!nid) - return ERR_PTR(-ENOENT); - f2fs_bug_on(sbi, check_nid_range(sbi, nid)); -repeat: - page = grab_cache_page(NODE_MAPPING(sbi), nid); - if (!page) - return ERR_PTR(-ENOMEM); + blk_start_plug(&plug); - err = read_node_page(page, READ_SYNC); - if (err < 0) { - f2fs_put_page(page, 1); - return ERR_PTR(err); - } else if (err == LOCKED_PAGE) { - goto page_hit; + /* Then, try readahead for siblings of the desired node */ + end = start + MAX_RA_NODE; + end = min(end, NIDS_PER_BLOCK); + for (i = start; i < end; i++) { + nid = get_nid(parent, i, false); + ra_node_page(sbi, nid); } - lock_page(page); - - if (unlikely(!PageUptodate(page))) { - f2fs_put_page(page, 1); - return ERR_PTR(-EIO); - } - if (unlikely(page->mapping != NODE_MAPPING(sbi))) { - f2fs_put_page(page, 1); - goto repeat; - } -page_hit: - f2fs_bug_on(sbi, nid != nid_of_node(page)); - return page; + blk_finish_plug(&plug); } -/* - * Return a locked page for the desired node page. - * And, readahead MAX_RA_NODE number of node pages. - */ -struct page *get_node_page_ra(struct page *parent, int start) +struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid, + struct page *parent, int start) { - struct f2fs_sb_info *sbi = F2FS_P_SB(parent); - struct blk_plug plug; struct page *page; - int err, i, end; - nid_t nid; + int err; - /* First, try getting the desired direct node. */ - nid = get_nid(parent, start, false); if (!nid) return ERR_PTR(-ENOENT); f2fs_bug_on(sbi, check_nid_range(sbi, nid)); @@ -1126,21 +1105,11 @@ struct page *get_node_page_ra(struct page *parent, int start) goto page_hit; } - blk_start_plug(&plug); - - /* Then, try readahead for siblings of the desired node */ - end = start + MAX_RA_NODE; - end = min(end, NIDS_PER_BLOCK); - for (i = start + 1; i < end; i++) { - nid_t tnid; - - tnid = get_nid(parent, i, false); - ra_node_page(sbi, tnid); - } - - blk_finish_plug(&plug); + if (parent) + ra_node_pages(parent, start + 1); lock_page(page); + if (unlikely(!PageUptodate(page))) { f2fs_put_page(page, 1); return ERR_PTR(-EIO); @@ -1154,6 +1123,19 @@ struct page *get_node_page_ra(struct page *parent, int start) return page; } +struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) +{ + return __get_node_page(sbi, nid, NULL, 0); +} + +struct page *get_node_page_ra(struct page *parent, int start) +{ + struct f2fs_sb_info *sbi = F2FS_P_SB(parent); + nid_t nid = get_nid(parent, start, false); + + return __get_node_page(sbi, nid, parent, start); +} + void sync_inode_page(struct dnode_of_data *dn) { if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) { From 7612118ae8cdd36cbd74d873855d70252d2d49e3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 1 Jan 2016 22:03:47 -0800 Subject: [PATCH 76/87] f2fs: check the page status filled from disk After reading a page, we need to check whether there is any error. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ac5bea0f5f09a..77c3bbb9bee05 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -413,7 +413,7 @@ struct page *get_new_data_page(struct inode *inode, struct page *page; struct dnode_of_data dn; int err; -repeat: + page = f2fs_grab_cache_page(mapping, index, true); if (!page) { /* @@ -442,12 +442,11 @@ struct page *get_new_data_page(struct inode *inode, } else { f2fs_put_page(page, 1); - page = get_read_data_page(inode, index, READ_SYNC, true); + /* if ipage exists, blkaddr should be NEW_ADDR */ + f2fs_bug_on(F2FS_I_SB(inode), ipage); + page = get_lock_data_page(inode, index, true); if (IS_ERR(page)) - goto repeat; - - /* wait for read completion */ - lock_page(page); + return page; } got_it: if (new_i_size && i_size_read(inode) < From 12719ae14e57980ebf0a7baa63bc80494c76b192 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 7 Jan 2016 13:23:12 -0800 Subject: [PATCH 77/87] f2fs: avoid unnecessary f2fs_balance_fs calls Only when node page is newly dirtied, it needs to check whether we need to do f2fs_gc. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++-- fs/f2fs/f2fs.h | 4 ++-- fs/f2fs/inode.c | 19 ++++++++++--------- fs/f2fs/node.c | 26 ++++++++++++++------------ fs/f2fs/node.h | 4 ++-- fs/f2fs/super.c | 2 -- 6 files changed, 30 insertions(+), 29 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 77c3bbb9bee05..3cf86fda8138f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -225,8 +225,8 @@ void set_data_blkaddr(struct dnode_of_data *dn) /* Get physical address of data block */ addr_array = blkaddr_in_node(rn); addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr); - set_page_dirty(node_page); - dn->node_changed = true; + if (set_page_dirty(node_page)) + dn->node_changed = true; } int reserve_new_block(struct dnode_of_data *dn) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 882babaa678e1..461b32923c147 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1674,8 +1674,8 @@ long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long); void f2fs_set_inode_flags(struct inode *); struct inode *f2fs_iget(struct super_block *, unsigned long); int try_to_free_nats(struct f2fs_sb_info *, int); -void update_inode(struct inode *, struct page *); -void update_inode_page(struct inode *); +int update_inode(struct inode *, struct page *); +int update_inode_page(struct inode *); int f2fs_write_inode(struct inode *, struct writeback_control *); void f2fs_evict_inode(struct inode *); void handle_failed_inode(struct inode *); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index e95500802daa6..cabc1ff108a13 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -222,7 +222,7 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) return ERR_PTR(ret); } -void update_inode(struct inode *inode, struct page *node_page) +int update_inode(struct inode *inode, struct page *node_page) { struct f2fs_inode *ri; @@ -260,15 +260,16 @@ void update_inode(struct inode *inode, struct page *node_page) __set_inode_rdev(inode, ri); set_cold_node(inode, node_page); - set_page_dirty(node_page); - clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); + + return set_page_dirty(node_page); } -void update_inode_page(struct inode *inode) +int update_inode_page(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct page *node_page; + int ret = 0; retry: node_page = get_node_page(sbi, inode->i_ino); if (IS_ERR(node_page)) { @@ -279,10 +280,11 @@ void update_inode_page(struct inode *inode) } else if (err != -ENOENT) { f2fs_stop_checkpoint(sbi); } - return; + return 0; } - update_inode(inode, node_page); + ret = update_inode(inode, node_page); f2fs_put_page(node_page, 1); + return ret; } int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) @@ -300,9 +302,8 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) * We need to balance fs here to prevent from producing dirty node pages * during the urgent cleaning time when runing out of free sections. */ - update_inode_page(inode); - - f2fs_balance_fs(sbi); + if (update_inode_page(inode)) + f2fs_balance_fs(sbi); return 0; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 5a2d800f4abca..c091b757bda60 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -543,7 +543,6 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) set_nid(parent, offset[i - 1], nids[i], i == 1); alloc_nid_done(sbi, nids[i]); - dn->node_changed = true; done = true; } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) { npage[i] = get_node_page_ra(parent, offset[i - 1]); @@ -679,8 +678,8 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, ret = truncate_dnode(&rdn); if (ret < 0) goto out_err; - set_nid(page, i, 0, false); - dn->node_changed = true; + if (set_nid(page, i, 0, false)) + dn->node_changed = true; } } else { child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1; @@ -693,8 +692,8 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, rdn.nid = child_nid; ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1); if (ret == (NIDS_PER_BLOCK + 1)) { - set_nid(page, i, 0, false); - dn->node_changed = true; + if (set_nid(page, i, 0, false)) + dn->node_changed = true; child_nofs += ret; } else if (ret < 0 && ret != -ENOENT) { goto out_err; @@ -755,8 +754,8 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, err = truncate_dnode(dn); if (err < 0) goto fail; - set_nid(pages[idx], i, 0, false); - dn->node_changed = true; + if (set_nid(pages[idx], i, 0, false)) + dn->node_changed = true; } if (offset[idx + 1] == 0) { @@ -981,7 +980,8 @@ struct page *new_node_page(struct dnode_of_data *dn, fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); set_cold_node(dn->inode, page); SetPageUptodate(page); - set_page_dirty(page); + if (set_page_dirty(page)) + dn->node_changed = true; if (f2fs_has_xattr_block(ofs)) F2FS_I(dn->inode)->i_xattr_nid = dn->nid; @@ -1138,18 +1138,20 @@ struct page *get_node_page_ra(struct page *parent, int start) void sync_inode_page(struct dnode_of_data *dn) { + int ret = 0; + if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) { - update_inode(dn->inode, dn->node_page); + ret = update_inode(dn->inode, dn->node_page); } else if (dn->inode_page) { if (!dn->inode_page_locked) lock_page(dn->inode_page); - update_inode(dn->inode, dn->inode_page); + ret = update_inode(dn->inode, dn->inode_page); if (!dn->inode_page_locked) unlock_page(dn->inode_page); } else { - update_inode_page(dn->inode); + ret = update_inode_page(dn->inode); } - dn->node_changed = true; + dn->node_changed = ret ? true: false; } int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 2de759a7746f5..d4d1f636fe1c3 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -317,7 +317,7 @@ static inline bool IS_DNODE(struct page *node_page) return true; } -static inline void set_nid(struct page *p, int off, nid_t nid, bool i) +static inline int set_nid(struct page *p, int off, nid_t nid, bool i) { struct f2fs_node *rn = F2FS_NODE(p); @@ -327,7 +327,7 @@ static inline void set_nid(struct page *p, int off, nid_t nid, bool i) rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid); else rn->in.nid[off] = cpu_to_le32(nid); - set_page_dirty(p); + return set_page_dirty(p); } static inline nid_t get_nid(struct page *p, int off, bool i) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 0bbd756821a7d..f5cc790646e29 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -591,8 +591,6 @@ int f2fs_sync_fs(struct super_block *sb, int sync) mutex_lock(&sbi->gc_mutex); err = write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); - } else { - f2fs_balance_fs(sbi); } f2fs_trace_ios(NULL, 1); From 2a4b8e9fab9cea45d90179d9ee8e718c5ed26457 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 7 Jan 2016 13:52:34 -0800 Subject: [PATCH 78/87] f2fs: remove redundant calls This patch removes redundant calls. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index e3d32f6b4b4f5..69bc65fd862c4 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -267,8 +267,6 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (need_inode_block_update(sbi, ino)) { mark_inode_dirty_sync(inode); f2fs_write_inode(inode, NULL); - - f2fs_balance_fs(sbi); goto sync_nodes; } @@ -484,7 +482,6 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) F2FS_I(dn->inode)) + ofs; f2fs_update_extent_cache_range(dn, fofs, 0, len); dec_valid_block_count(sbi, dn->inode, nr_free); - set_page_dirty(dn->node_page); sync_inode_page(dn); } dn->ofs_in_node = ofs; From 2c4db1a6f6b42e2a9fb611cbbeb71a3a9a358ee0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 7 Jan 2016 14:15:04 -0800 Subject: [PATCH 79/87] f2fs: clean up f2fs_balance_fs This patch adds one parameter to clean up all the callers of f2fs_balance_fs. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 17 ++++++----------- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 17 ++++++++--------- fs/f2fs/inline.c | 3 +-- fs/f2fs/inode.c | 2 +- fs/f2fs/namei.c | 22 +++++++++++----------- fs/f2fs/segment.c | 6 ++++-- fs/f2fs/xattr.c | 2 +- 8 files changed, 33 insertions(+), 38 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3cf86fda8138f..6fae75ddae6d1 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -546,8 +546,7 @@ static int __allocate_data_blocks(struct inode *inode, loff_t offset, f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); - if (dn.node_changed) - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, dn.node_changed); } return err; @@ -557,8 +556,7 @@ static int __allocate_data_blocks(struct inode *inode, loff_t offset, f2fs_put_dnode(&dn); out: f2fs_unlock_op(sbi); - if (dn.node_changed) - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, dn.node_changed); return err; } @@ -657,8 +655,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, if (create) { f2fs_unlock_op(sbi); - if (dn.node_changed) - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, dn.node_changed); f2fs_lock_op(sbi); } @@ -718,8 +715,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, unlock_out: if (create) { f2fs_unlock_op(sbi); - if (dn.node_changed) - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, dn.node_changed); } out: trace_f2fs_map_blocks(inode, map, err); @@ -1178,8 +1174,7 @@ static int f2fs_write_data_page(struct page *page, if (err) ClearPageUptodate(page); unlock_page(page); - if (need_balance_fs) - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, need_balance_fs); if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi))) { f2fs_submit_merged_bio(sbi, DATA, WRITE); remove_dirty_inode(inode); @@ -1506,7 +1501,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, if (need_balance && has_not_enough_free_secs(sbi, 0)) { unlock_page(page); - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); lock_page(page); if (page->mapping != mapping) { /* The page got truncated from under us */ diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 461b32923c147..412865482a0bc 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1780,7 +1780,7 @@ void destroy_node_manager_caches(void); */ void register_inmem_page(struct inode *, struct page *); int commit_inmem_pages(struct inode *, bool); -void f2fs_balance_fs(struct f2fs_sb_info *); +void f2fs_balance_fs(struct f2fs_sb_info *, bool); void f2fs_balance_fs_bg(struct f2fs_sb_info *); int f2fs_issue_flush(struct f2fs_sb_info *); int create_flush_cmd_control(struct f2fs_sb_info *); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 69bc65fd862c4..ff06827aa3695 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -55,8 +55,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); - if (dn.node_changed) - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, dn.node_changed); file_update_time(vma->vm_file); lock_page(page); @@ -677,7 +676,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) err = f2fs_truncate(inode, true); if (err) return err; - f2fs_balance_fs(F2FS_I_SB(inode)); + f2fs_balance_fs(F2FS_I_SB(inode), true); } else { /* * do not trim all blocks after i_size if target size is @@ -732,7 +731,7 @@ static int fill_zero(struct inode *inode, pgoff_t index, if (!len) return 0; - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); page = get_new_data_page(inode, NULL, index, false); @@ -818,7 +817,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) loff_t blk_start, blk_end; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); blk_start = (loff_t)pg_start << PAGE_CACHE_SHIFT; blk_end = (loff_t)pg_end << PAGE_CACHE_SHIFT; @@ -921,7 +920,7 @@ static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end) int ret = 0; for (; end < nrpages; start++, end++) { - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); ret = __exchange_data_block(inode, end, start, true); f2fs_unlock_op(sbi); @@ -1103,7 +1102,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) if (ret) return ret; - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); ret = truncate_blocks(inode, i_size_read(inode), true); if (ret) @@ -1155,7 +1154,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (ret) return ret; - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; @@ -1653,7 +1652,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, pg_start = range->start >> PAGE_CACHE_SHIFT; pg_end = (range->start + range->len) >> PAGE_CACHE_SHIFT; - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); mutex_lock(&inode->i_mutex); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 5ffbd169b719a..c3f0b7d4cfca1 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -200,8 +200,7 @@ int f2fs_convert_inline_inode(struct inode *inode) f2fs_put_page(page, 1); - if (dn.node_changed) - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, dn.node_changed); return err; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index cabc1ff108a13..2ac4b780e8b43 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -303,7 +303,7 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) * during the urgent cleaning time when runing out of free sections. */ if (update_inode_page(inode)) - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); return 0; } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 0d61a6864ab10..53d6227f5581f 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -140,7 +140,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, inode->i_mapping->a_ops = &f2fs_dblock_aops; ino = inode->i_ino; - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); @@ -172,7 +172,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, !f2fs_is_child_context_consistent_with_parent(dir, inode)) return -EPERM; - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); inode->i_ctime = CURRENT_TIME; ihold(inode); @@ -221,7 +221,7 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) return 0; } - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); @@ -302,7 +302,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) if (!de) goto fail; - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); err = acquire_orphan_inode(sbi); @@ -361,7 +361,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &f2fs_symlink_inode_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); @@ -452,7 +452,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_mapping->a_ops = &f2fs_dblock_aops; mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); set_inode_flag(F2FS_I(inode), FI_INC_LINK); f2fs_lock_op(sbi); @@ -498,7 +498,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, init_special_inode(inode, inode->i_mode, rdev); inode->i_op = &f2fs_special_inode_operations; - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); @@ -539,7 +539,7 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, inode->i_mapping->a_ops = &f2fs_dblock_aops; } - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); err = acquire_orphan_inode(sbi); @@ -642,7 +642,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (!new_entry) goto out_whiteout; - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); @@ -675,7 +675,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, update_inode_page(old_inode); update_inode_page(new_inode); } else { - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); @@ -816,7 +816,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, goto out_new_dir; } - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a3474bad57702..c7bbc915d9627 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -213,7 +213,7 @@ int commit_inmem_pages(struct inode *inode, bool abort) * inode becomes free by iget_locked in f2fs_iget. */ if (!abort) { - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); } @@ -262,8 +262,10 @@ int commit_inmem_pages(struct inode *inode, bool abort) * This function balances dirty node and dentry pages. * In addition, it controls garbage collection. */ -void f2fs_balance_fs(struct f2fs_sb_info *sbi) +void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) { + if (!need) + return; /* * We should do GC or end up with checkpoint, if there are so many dirty * dir/node pages without enough free segments. diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 862368a32e535..822a8af89c122 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -609,7 +609,7 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, if (ipage) return __f2fs_setxattr(inode, index, name, value, size, ipage, flags); - f2fs_balance_fs(sbi); + f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); /* protect xattr_ver */ From da5af127a1a17bac121c6889c88cc90f8a278a84 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 8 Jan 2016 20:19:27 +0800 Subject: [PATCH 80/87] f2fs: recognize encrypted data in f2fs_fiemap This patch fixes to teach f2fs_fiemap to recognize encrypted data. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6fae75ddae6d1..a3bce12b0cce5 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -830,9 +830,13 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, flags |= FIEMAP_EXTENT_LAST; } - if (size) + if (size) { + if (f2fs_encrypted_inode(inode)) + flags |= FIEMAP_EXTENT_DATA_ENCRYPTED; + ret = fiemap_fill_next_extent(fieinfo, logical, phys, size, flags); + } if (start_blk > last_blk || ret) goto out; From 68e353851002dc07555b067a0baff1cc2f709c04 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 8 Jan 2016 20:22:52 +0800 Subject: [PATCH 81/87] f2fs: use atomic type for node count in extent tree 1. rename field in struct extent_tree from count to node_cnt for readability. 2. alter to use atomic type for node_cnt. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 17 +++++++++-------- fs/f2fs/f2fs.h | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 4dee2be9a648c..9febbc622bf53 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -36,7 +36,7 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, rb_link_node(&en->rb_node, parent, p); rb_insert_color(&en->rb_node, &et->root); - et->count++; + atomic_inc(&et->node_cnt); atomic_inc(&sbi->total_ext_node); return en; } @@ -45,7 +45,7 @@ static void __detach_extent_node(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_node *en) { rb_erase(&en->rb_node, &et->root); - et->count--; + atomic_dec(&et->node_cnt); atomic_dec(&sbi->total_ext_node); if (et->cached_en == en) @@ -69,7 +69,7 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode) et->cached_en = NULL; rwlock_init(&et->lock); INIT_LIST_HEAD(&et->list); - et->count = 0; + atomic_set(&et->node_cnt, 0); atomic_inc(&sbi->total_ext_tree); } else { atomic_dec(&sbi->total_zombie_tree); @@ -133,7 +133,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, { struct rb_node *node, *next; struct extent_node *en; - unsigned int count = et->count; + unsigned int count = atomic_read(&et->node_cnt); node = rb_first(&et->root); while (node) { @@ -154,7 +154,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, node = next; } - return count - et->count; + return count - atomic_read(&et->node_cnt); } static void __drop_largest_extent(struct inode *inode, @@ -192,7 +192,7 @@ bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len)); write_lock(&et->lock); - if (et->count) + if (atomic_read(&et->node_cnt)) goto out; en = __init_extent_tree(sbi, et, &ei); @@ -660,7 +660,8 @@ void f2fs_destroy_extent_tree(struct inode *inode) if (!et) return; - if (inode->i_nlink && !is_bad_inode(inode) && et->count) { + if (inode->i_nlink && !is_bad_inode(inode) && + atomic_read(&et->node_cnt)) { down_write(&sbi->extent_tree_lock); list_add_tail(&et->list, &sbi->zombie_list); atomic_inc(&sbi->total_zombie_tree); @@ -673,7 +674,7 @@ void f2fs_destroy_extent_tree(struct inode *inode) /* delete extent tree entry in radix tree */ down_write(&sbi->extent_tree_lock); - f2fs_bug_on(sbi, et->count); + f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); radix_tree_delete(&sbi->extent_tree_root, inode->i_ino); kmem_cache_free(extent_tree_slab, et); atomic_dec(&sbi->total_ext_tree); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 412865482a0bc..ae0007df6c2cd 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -361,7 +361,7 @@ struct extent_tree { struct extent_info largest; /* largested extent info */ struct list_head list; /* to be used by sbi->zombie_list */ rwlock_t lock; /* protect extent info rb-tree */ - unsigned int count; /* # of extent node in rb-tree*/ + atomic_t node_cnt; /* # of extent node in rb-tree*/ }; /* From 9b72a388f5867f4a31113a41d24bbf1026611d7b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 8 Jan 2016 20:24:00 +0800 Subject: [PATCH 82/87] f2fs: skip releasing nodes in chindless extent tree If there are no nodes in extent tree, let's skip releasing step to avoid any overhead of grabbing/releasing extent tree lock. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 9febbc622bf53..ccd5c636d3fe0 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -570,9 +570,11 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) /* 1. remove unreferenced extent tree */ list_for_each_entry_safe(et, next, &sbi->zombie_list, list) { - write_lock(&et->lock); - node_cnt += __free_extent_tree(sbi, et, true); - write_unlock(&et->lock); + if (atomic_read(&et->node_cnt)) { + write_lock(&et->lock); + node_cnt += __free_extent_tree(sbi, et, true); + write_unlock(&et->lock); + } list_del_init(&et->list); radix_tree_delete(&sbi->extent_tree_root, et->ino); @@ -618,6 +620,9 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) for (i = 0; i < found; i++) { struct extent_tree *et = treevec[i]; + if (!atomic_read(&et->node_cnt)) + continue; + if (write_trylock(&et->lock)) { node_cnt += __free_extent_tree(sbi, et, false); write_unlock(&et->lock); @@ -641,7 +646,7 @@ unsigned int f2fs_destroy_extent_node(struct inode *inode) struct extent_tree *et = F2FS_I(inode)->extent_tree; unsigned int node_cnt = 0; - if (!et) + if (!et || !atomic_read(&et->node_cnt)) return 0; write_lock(&et->lock); From 6beceb5427aa8731f958d2484e0fd8ff21d604dc Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 8 Jan 2016 15:51:50 -0800 Subject: [PATCH 83/87] f2fs: introduce time and interval facility This patch adds time and interval arrays to store some timing variables. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/f2fs.h | 21 ++++++++++++++++++++- fs/f2fs/segment.c | 2 +- fs/f2fs/super.c | 7 +++---- 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 5dbafd5e83d9e..3842af954cd5b 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1139,7 +1139,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) "checkpoint: version = %llx", ckpt_ver); /* do checkpoint periodically */ - sbi->cp_expires = round_jiffies_up(jiffies + HZ * sbi->cp_interval); + f2fs_update_time(sbi, CP_TIME); trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); out: mutex_unlock(&sbi->cp_mutex); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ae0007df6c2cd..5bbb6a407e79f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -721,6 +721,11 @@ enum { SBI_POR_DOING, /* recovery is doing or not */ }; +enum { + CP_TIME, + MAX_TIME, +}; + struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ @@ -747,7 +752,8 @@ struct f2fs_sb_info { struct rw_semaphore node_write; /* locking node writes */ struct mutex writepages; /* mutex for writepages() */ wait_queue_head_t cp_wait; - long cp_expires, cp_interval; /* next expected periodic cp */ + unsigned long last_time[MAX_TIME]; /* to store time in jiffies */ + long interval_time[MAX_TIME]; /* to store thresholds */ struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ @@ -837,6 +843,19 @@ struct f2fs_sb_info { unsigned int shrinker_run_no; }; +static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type) +{ + sbi->last_time[type] = jiffies; +} + +static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type) +{ + struct timespec ts = {sbi->interval_time[type], 0}; + unsigned long interval = timespec_to_jiffies(&ts); + + return time_after(jiffies, sbi->last_time[type] + interval); +} + /* * Inline functions */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c7bbc915d9627..fed23d5a7b343 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -293,7 +293,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) if (!available_free_memory(sbi, NAT_ENTRIES) || excess_prefree_segs(sbi) || !available_free_memory(sbi, INO_ENTRIES) || - jiffies > sbi->cp_expires) { + f2fs_time_over(sbi, CP_TIME)) { if (test_opt(sbi, DATA_FLUSH)) sync_dirty_inodes(sbi, FILE_INODE); f2fs_sync_fs(sbi->sb, true); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f5cc790646e29..787047f59c007 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -218,7 +218,7 @@ F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, cp_interval); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -1122,7 +1122,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) atomic_set(&sbi->nr_pages[i], 0); sbi->dir_level = DEF_DIR_LEVEL; - sbi->cp_interval = DEF_CP_INTERVAL; + sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL; clear_sbi_flag(sbi, SBI_NEED_FSCK); INIT_LIST_HEAD(&sbi->s_list); @@ -1467,8 +1467,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) f2fs_commit_super(sbi, true); } - sbi->cp_expires = round_jiffies_up(jiffies); - + f2fs_update_time(sbi, CP_TIME); return 0; free_kobj: From d0239e1bf5204d602281f93c01d46bcf3531098d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 8 Jan 2016 16:57:48 -0800 Subject: [PATCH 84/87] f2fs: detect idle time depending on user behavior This patch adds last time that user requested filesystem operations. This information is used to detect whether system is idle or not later. Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 6 ++++++ fs/f2fs/data.c | 1 + fs/f2fs/dir.c | 4 ++++ fs/f2fs/f2fs.h | 15 +++++++++++++++ fs/f2fs/file.c | 12 ++++++++++++ fs/f2fs/gc.c | 1 - fs/f2fs/gc.h | 8 -------- fs/f2fs/segment.c | 2 +- fs/f2fs/super.c | 4 ++++ fs/f2fs/xattr.c | 1 + 10 files changed, 44 insertions(+), 10 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 0345f2d1c7278..e5200f354abfe 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -87,6 +87,12 @@ Contact: "Jaegeuk Kim" Description: Controls the checkpoint timing. +What: /sys/fs/f2fs//idle_interval +Date: January 2016 +Contact: "Jaegeuk Kim" +Description: + Controls the idle timing. + What: /sys/fs/f2fs//ra_nid_pages Date: October 2015 Contact: "Chao Yu" diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a3bce12b0cce5..ac9e7c6aac74d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1596,6 +1596,7 @@ static int f2fs_write_end(struct file *file, } f2fs_put_page(page, 1); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return copied; } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 29bb8dd76a46e..faa7495e2d7e6 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -636,6 +636,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, f2fs_put_page(dentry_page, 1); out: f2fs_fname_free_filename(&fname); + f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); return err; } @@ -657,6 +658,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); fail: up_write(&F2FS_I(inode)->i_sem); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return err; } @@ -701,6 +703,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); int i; + f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); + if (f2fs_has_inline_dentry(dir)) return f2fs_delete_inline_entry(dentry, page, dir, inode); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5bbb6a407e79f..4331b9fe6f27f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -21,6 +21,7 @@ #include #include #include +#include #ifdef CONFIG_F2FS_CHECK_FS #define f2fs_bug_on(sbi, condition) BUG_ON(condition) @@ -126,6 +127,7 @@ enum { #define BATCHED_TRIM_BLOCKS(sbi) \ (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg) #define DEF_CP_INTERVAL 60 /* 60 secs */ +#define DEF_IDLE_INTERVAL 120 /* 2 mins */ struct cp_control { int reason; @@ -723,6 +725,7 @@ enum { enum { CP_TIME, + REQ_TIME, MAX_TIME, }; @@ -856,6 +859,18 @@ static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type) return time_after(jiffies, sbi->last_time[type] + interval); } +static inline bool is_idle(struct f2fs_sb_info *sbi) +{ + struct block_device *bdev = sbi->sb->s_bdev; + struct request_queue *q = bdev_get_queue(bdev); + struct request_list *rl = &q->root_rl; + + if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC]) + return 0; + + return f2fs_time_over(sbi, REQ_TIME); +} + /* * Inline functions */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ff06827aa3695..3d43857e98923 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -96,6 +96,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, clear_cold_data(page); out: sb_end_pagefault(inode->i_sb); + f2fs_update_time(sbi, REQ_TIME); return block_page_mkwrite_return(err); } @@ -280,6 +281,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) remove_ino_entry(sbi, ino, UPDATE_INO); clear_inode_flag(fi, FI_UPDATE_WRITE); ret = f2fs_issue_flush(sbi); + f2fs_update_time(sbi, REQ_TIME); out: trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); f2fs_trace_ios(NULL, 1); @@ -485,6 +487,7 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) } dn->ofs_in_node = ofs; + f2fs_update_time(sbi, REQ_TIME); trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid, dn->ofs_in_node, nr_free); return nr_free; @@ -1236,6 +1239,7 @@ static long f2fs_fallocate(struct file *file, int mode, if (!ret) { inode->i_mtime = inode->i_ctime = CURRENT_TIME; mark_inode_dirty(inode); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); } out: @@ -1351,6 +1355,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) return ret; set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + return 0; } @@ -1398,6 +1404,7 @@ static int f2fs_ioc_start_volatile_write(struct file *filp) return ret; set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return 0; } @@ -1439,6 +1446,7 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp) } mnt_drop_write_file(filp); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return ret; } @@ -1478,6 +1486,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) default: return -EINVAL; } + f2fs_update_time(sbi, REQ_TIME); return 0; } @@ -1508,6 +1517,7 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) if (copy_to_user((struct fstrim_range __user *)arg, &range, sizeof(range))) return -EFAULT; + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return 0; } @@ -1531,6 +1541,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) sizeof(policy))) return -EFAULT; + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return f2fs_process_policy(&policy, inode); #else return -EOPNOTSUPP; @@ -1807,6 +1818,7 @@ static int f2fs_ioc_defragment(struct file *filp, unsigned long arg) } err = f2fs_defragment_range(sbi, filp, &range); + f2fs_update_time(sbi, REQ_TIME); if (err < 0) goto out; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index c09be339569cf..f610c2a9bdde9 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -16,7 +16,6 @@ #include #include #include -#include #include "f2fs.h" #include "node.h" diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index b4a65be9f7d3f..a993967dcdb97 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -100,11 +100,3 @@ static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) return true; return false; } - -static inline int is_idle(struct f2fs_sb_info *sbi) -{ - struct block_device *bdev = sbi->sb->s_bdev; - struct request_queue *q = bdev_get_queue(bdev); - struct request_list *rl = &q->root_rl; - return !(rl->count[BLK_RW_SYNC]) && !(rl->count[BLK_RW_ASYNC]); -} diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fed23d5a7b343..d8ad1abfa4fdb 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -293,7 +293,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) if (!available_free_memory(sbi, NAT_ENTRIES) || excess_prefree_segs(sbi) || !available_free_memory(sbi, INO_ENTRIES) || - f2fs_time_over(sbi, CP_TIME)) { + (is_idle(sbi) && f2fs_time_over(sbi, CP_TIME))) { if (test_opt(sbi, DATA_FLUSH)) sync_dirty_inodes(sbi, FILE_INODE); f2fs_sync_fs(sbi->sb, true); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 787047f59c007..3bf990b800262 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -219,6 +219,7 @@ F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -237,6 +238,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(ram_thresh), ATTR_LIST(ra_nid_pages), ATTR_LIST(cp_interval), + ATTR_LIST(idle_interval), NULL, }; @@ -1123,6 +1125,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->dir_level = DEF_DIR_LEVEL; sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL; + sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL; clear_sbi_flag(sbi, SBI_NEED_FSCK); INIT_LIST_HEAD(&sbi->s_list); @@ -1468,6 +1471,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) } f2fs_update_time(sbi, CP_TIME); + f2fs_update_time(sbi, REQ_TIME); return 0; free_kobj: diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 822a8af89c122..0108f487cc8e4 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -618,5 +618,6 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, up_write(&F2FS_I(inode)->i_sem); f2fs_unlock_op(sbi); + f2fs_update_time(sbi, REQ_TIME); return err; } From 42190d2a8663f3e181894dc4e37a1af06aab2cbb Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 9 Jan 2016 13:45:17 -0800 Subject: [PATCH 85/87] f2fs: monitor the number of background checkpoint This patch adds to show the number of background checkpoint. Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 3 ++- fs/f2fs/f2fs.h | 4 +++- fs/f2fs/segment.c | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index b73e8e133c8b7..48f2ae9452ef4 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -272,7 +272,8 @@ static int stat_show(struct seq_file *s, void *v) si->dirty_count); seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n", si->prefree_count, si->free_segs, si->free_secs); - seq_printf(s, "CP calls: %d\n", si->cp_count); + seq_printf(s, "CP calls: %d (BG: %d)\n", + si->cp_count, si->bg_cp_count); seq_printf(s, "GC calls: %d (BG: %d)\n", si->call_count, si->bg_gc); seq_printf(s, " - data segments : %d (%d)\n", diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4331b9fe6f27f..2c0e478cefb48 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1937,7 +1937,7 @@ struct f2fs_stat_info { int util_free, util_valid, util_invalid; int rsvd_segs, overp_segs; int dirty_count, node_pages, meta_pages; - int prefree_count, call_count, cp_count; + int prefree_count, call_count, cp_count, bg_cp_count; int tot_segs, node_segs, data_segs, free_segs, free_secs; int bg_node_segs, bg_data_segs; int tot_blks, data_blks, node_blks; @@ -1958,6 +1958,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) } #define stat_inc_cp_count(si) ((si)->cp_count++) +#define stat_inc_bg_cp_count(si) ((si)->bg_cp_count++) #define stat_inc_call_count(si) ((si)->call_count++) #define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) #define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++) @@ -2040,6 +2041,7 @@ int __init f2fs_create_root_stats(void); void f2fs_destroy_root_stats(void); #else #define stat_inc_cp_count(si) +#define stat_inc_bg_cp_count(si) #define stat_inc_call_count(si) #define stat_inc_bggc_count(si) #define stat_inc_dirty_inode(sbi, type) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d8ad1abfa4fdb..5904a411c86fe 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -297,6 +297,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) if (test_opt(sbi, DATA_FLUSH)) sync_dirty_inodes(sbi, FILE_INODE); f2fs_sync_fs(sbi->sb, true); + stat_inc_bg_cp_count(sbi->stat_info); } } From 1663cae48ce3ce991c0e3f1a6fbdbd57f3dce9af Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 9 Jan 2016 16:14:08 -0800 Subject: [PATCH 86/87] f2fs: fix wrong memory condition check This patch fixes wrong decision for avaliable_free_memory. The return valus is already set as false, so we should consider true condition below only. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index c091b757bda60..342597a5897f0 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -71,8 +71,8 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) sizeof(struct extent_node)) >> PAGE_CACHE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); } else { - if (sbi->sb->s_bdi->wb.dirty_exceeded) - return false; + if (!sbi->sb->s_bdi->wb.dirty_exceeded) + return true; } return res; } From 447135a86659c646017b8e707c1243c186bf2dff Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 9 Jan 2016 17:08:38 -0800 Subject: [PATCH 87/87] f2fs: should unset atomic flag after successful commit If there is an error during commit, we should keep the flag in order to abort it. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 3d43857e98923..18ddb1e5182a1 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1378,8 +1378,10 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) if (f2fs_is_atomic_file(inode)) { clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); ret = commit_inmem_pages(inode, false); - if (ret) + if (ret) { + set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); goto err_out; + } } ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0);