Skip to content

Commit

Permalink
f2fs: avoid fi->i_gc_rwsem[WRITE] lock in f2fs_gc
Browse files Browse the repository at this point in the history
The f2fs_gc() called by f2fs_balance_fs() requires to be called outside of
fi->i_gc_rwsem[WRITE], since f2fs_gc() can try to grab it in a loop.

If it hits the miximum retrials in GC, let's give a chance to release
gc_mutex for a short time in order not to go into live lock in the worst
case.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
  • Loading branch information
Jaegeuk Kim committed Aug 21, 2018
1 parent 853137c commit 6f8d445
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 67 deletions.
4 changes: 2 additions & 2 deletions fs/f2fs/data.c
Original file line number Diff line number Diff line change
Expand Up @@ -2217,14 +2217,14 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
loff_t i_size = i_size_read(inode);

if (to > i_size) {
down_write(&F2FS_I(inode)->i_mmap_sem);
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);

truncate_pagecache(inode, i_size);
f2fs_truncate_blocks(inode, i_size, true);

up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
up_write(&F2FS_I(inode)->i_mmap_sem);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
}

Expand Down
1 change: 1 addition & 0 deletions fs/f2fs/f2fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -1243,6 +1243,7 @@ struct f2fs_sb_info {
unsigned int gc_mode; /* current GC state */
/* for skip statistic */
unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */
unsigned long long skipped_gc_rwsem; /* FG_GC only */

/* threshold for gc trials on pinned files */
u64 gc_pin_file_threshold;
Expand Down
119 changes: 62 additions & 57 deletions fs/f2fs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -797,8 +797,8 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
if (attr->ia_valid & ATTR_SIZE) {
bool to_smaller = (attr->ia_size <= i_size_read(inode));

down_write(&F2FS_I(inode)->i_mmap_sem);
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);

truncate_setsize(inode, attr->ia_size);

Expand All @@ -808,8 +808,8 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
* do not trim all blocks after i_size if target size is
* larger than i_size.
*/
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
up_write(&F2FS_I(inode)->i_mmap_sem);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);

if (err)
return err;
Expand Down Expand Up @@ -962,8 +962,8 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
blk_start = (loff_t)pg_start << PAGE_SHIFT;
blk_end = (loff_t)pg_end << PAGE_SHIFT;

down_write(&F2FS_I(inode)->i_mmap_sem);
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);

truncate_inode_pages_range(mapping, blk_start,
blk_end - 1);
Expand All @@ -972,8 +972,8 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
ret = f2fs_truncate_hole(inode, pg_start, pg_end);
f2fs_unlock_op(sbi);

up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
up_write(&F2FS_I(inode)->i_mmap_sem);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
}

Expand Down Expand Up @@ -1188,25 +1188,33 @@ static int __exchange_data_block(struct inode *src_inode,
return ret;
}

static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end)
static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
pgoff_t start = offset >> PAGE_SHIFT;
pgoff_t end = (offset + len) >> PAGE_SHIFT;
int ret;

f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);

f2fs_drop_extent_tree(inode);
/* avoid gc operation during block exchange */
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);

f2fs_lock_op(sbi);
f2fs_drop_extent_tree(inode);
truncate_pagecache(inode, offset);
ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true);
f2fs_unlock_op(sbi);

up_write(&F2FS_I(inode)->i_mmap_sem);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
return ret;
}

static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
{
pgoff_t pg_start, pg_end;
loff_t new_size;
int ret;

Expand All @@ -1221,37 +1229,27 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
if (ret)
return ret;

pg_start = offset >> PAGE_SHIFT;
pg_end = (offset + len) >> PAGE_SHIFT;

/* avoid gc operation during block exchange */
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);

down_write(&F2FS_I(inode)->i_mmap_sem);
/* write out all dirty pages from offset */
ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
if (ret)
goto out_unlock;

truncate_pagecache(inode, offset);
return ret;

ret = f2fs_do_collapse(inode, pg_start, pg_end);
ret = f2fs_do_collapse(inode, offset, len);
if (ret)
goto out_unlock;
return ret;

/* write out all moved pages, if possible */
down_write(&F2FS_I(inode)->i_mmap_sem);
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
truncate_pagecache(inode, offset);

new_size = i_size_read(inode) - len;
truncate_pagecache(inode, new_size);

ret = f2fs_truncate_blocks(inode, new_size, true);
up_write(&F2FS_I(inode)->i_mmap_sem);
if (!ret)
f2fs_i_size_write(inode, new_size);
out_unlock:
up_write(&F2FS_I(inode)->i_mmap_sem);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
return ret;
}

Expand Down Expand Up @@ -1317,10 +1315,9 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
if (ret)
return ret;

down_write(&F2FS_I(inode)->i_mmap_sem);
ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
if (ret)
goto out_sem;
return ret;

pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
Expand All @@ -1332,15 +1329,15 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
ret = fill_zero(inode, pg_start, off_start,
off_end - off_start);
if (ret)
goto out_sem;
return ret;

new_size = max_t(loff_t, new_size, offset + len);
} else {
if (off_start) {
ret = fill_zero(inode, pg_start++, off_start,
PAGE_SIZE - off_start);
if (ret)
goto out_sem;
return ret;

new_size = max_t(loff_t, new_size,
(loff_t)pg_start << PAGE_SHIFT);
Expand All @@ -1352,6 +1349,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
pgoff_t end;

down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);

truncate_pagecache_range(inode,
(loff_t)index << PAGE_SHIFT,
Expand All @@ -1363,6 +1361,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
if (ret) {
f2fs_unlock_op(sbi);
up_write(&F2FS_I(inode)->i_mmap_sem);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
goto out;
}
Expand All @@ -1374,6 +1373,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
f2fs_put_dnode(&dn);

f2fs_unlock_op(sbi);
up_write(&F2FS_I(inode)->i_mmap_sem);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);

f2fs_balance_fs(sbi, dn.node_changed);
Expand Down Expand Up @@ -1402,9 +1402,6 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
else
f2fs_i_size_write(inode, new_size);
}
out_sem:
up_write(&F2FS_I(inode)->i_mmap_sem);

return ret;
}

Expand Down Expand Up @@ -1433,26 +1430,27 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)

f2fs_balance_fs(sbi, true);

/* avoid gc operation during block exchange */
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);

down_write(&F2FS_I(inode)->i_mmap_sem);
ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
up_write(&F2FS_I(inode)->i_mmap_sem);
if (ret)
goto out;
return ret;

/* write out all dirty pages from offset */
ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
if (ret)
goto out;

truncate_pagecache(inode, offset);
return ret;

pg_start = offset >> PAGE_SHIFT;
pg_end = (offset + len) >> PAGE_SHIFT;
delta = pg_end - pg_start;
idx = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;

/* avoid gc operation during block exchange */
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);
truncate_pagecache(inode, offset);

while (!ret && idx > pg_start) {
nr = idx - pg_start;
if (nr > delta)
Expand All @@ -1466,16 +1464,17 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
idx + delta, nr, false);
f2fs_unlock_op(sbi);
}
up_write(&F2FS_I(inode)->i_mmap_sem);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);

/* write out all moved pages, if possible */
down_write(&F2FS_I(inode)->i_mmap_sem);
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
truncate_pagecache(inode, offset);
up_write(&F2FS_I(inode)->i_mmap_sem);

if (!ret)
f2fs_i_size_write(inode, new_size);
out:
up_write(&F2FS_I(inode)->i_mmap_sem);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
return ret;
}

Expand Down Expand Up @@ -1722,8 +1721,6 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)

inode_lock(inode);

down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);

if (f2fs_is_atomic_file(inode)) {
if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST))
ret = -EINVAL;
Expand All @@ -1734,25 +1731,29 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
if (ret)
goto out;

down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);

if (!get_dirty_pages(inode))
goto skip_flush;

f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
"Unexpected flush for atomic writes: ino=%lu, npages=%u",
inode->i_ino, get_dirty_pages(inode));
ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
if (ret)
if (ret) {
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
goto out;
}
skip_flush:
set_inode_flag(inode, FI_ATOMIC_FILE);
clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);

f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
F2FS_I(inode)->inmem_task = current;
stat_inc_atomic_write(inode);
stat_update_max_atomic_write(inode);
out:
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
Expand All @@ -1770,9 +1771,9 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
if (ret)
return ret;

inode_lock(inode);
f2fs_balance_fs(F2FS_I_SB(inode), true);

down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
inode_lock(inode);

if (f2fs_is_volatile_file(inode)) {
ret = -EINVAL;
Expand All @@ -1798,7 +1799,6 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
ret = -EINVAL;
}
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
Expand Down Expand Up @@ -2394,15 +2394,10 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
}

inode_lock(src);
down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
if (src != dst) {
ret = -EBUSY;
if (!inode_trylock(dst))
goto out;
if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE])) {
inode_unlock(dst);
goto out;
}
}

ret = -EINVAL;
Expand Down Expand Up @@ -2447,6 +2442,14 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
goto out_unlock;

f2fs_balance_fs(sbi, true);

down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
if (src != dst) {
ret = -EBUSY;
if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE]))
goto out_src;
}

f2fs_lock_op(sbi);
ret = __exchange_data_block(src, dst, pos_in >> F2FS_BLKSIZE_BITS,
pos_out >> F2FS_BLKSIZE_BITS,
Expand All @@ -2459,13 +2462,15 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
f2fs_i_size_write(dst, dst_osize);
}
f2fs_unlock_op(sbi);
out_unlock:
if (src != dst) {

if (src != dst)
up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]);
out_src:
up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
out_unlock:
if (src != dst)
inode_unlock(dst);
}
out:
up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
inode_unlock(src);
return ret;
}
Expand Down
Loading

0 comments on commit 6f8d445

Please sign in to comment.