Skip to content

Commit

Permalink
f2fs: move f2fs to use reader-unfair rwsems
Browse files Browse the repository at this point in the history
f2fs rw_semaphores work better if writers can starve readers,
especially for the checkpoint thread, because writers are strictly
more important than reader threads. This prevents significant priority
inversion between low-priority readers that blocked while trying to
acquire the read lock and a second acquisition of the write lock that
might be blocking high priority work.

Signed-off-by: Tim Murray <timmurray@google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
  • Loading branch information
Tim Murray authored and Jaegeuk Kim committed Jan 25, 2022
1 parent dd81e1c commit e4544b6
Show file tree
Hide file tree
Showing 16 changed files with 342 additions and 274 deletions.
34 changes: 17 additions & 17 deletions fs/f2fs/checkpoint.c
Original file line number Diff line number Diff line change
Expand Up @@ -351,13 +351,13 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
goto skip_write;

/* if locked failed, cp will flush dirty pages instead */
if (!down_write_trylock(&sbi->cp_global_sem))
if (!f2fs_down_write_trylock(&sbi->cp_global_sem))
goto skip_write;

trace_f2fs_writepages(mapping->host, wbc, META);
diff = nr_pages_to_write(sbi, META, wbc);
written = f2fs_sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO);
up_write(&sbi->cp_global_sem);
f2fs_up_write(&sbi->cp_global_sem);
wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
return 0;

Expand Down Expand Up @@ -1159,7 +1159,7 @@ static bool __need_flush_quota(struct f2fs_sb_info *sbi)
if (!is_journalled_quota(sbi))
return false;

if (!down_write_trylock(&sbi->quota_sem))
if (!f2fs_down_write_trylock(&sbi->quota_sem))
return true;
if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) {
ret = false;
Expand All @@ -1171,7 +1171,7 @@ static bool __need_flush_quota(struct f2fs_sb_info *sbi)
} else if (get_pages(sbi, F2FS_DIRTY_QDATA)) {
ret = true;
}
up_write(&sbi->quota_sem);
f2fs_up_write(&sbi->quota_sem);
return ret;
}

Expand Down Expand Up @@ -1228,10 +1228,10 @@ static int block_operations(struct f2fs_sb_info *sbi)
* POR: we should ensure that there are no dirty node pages
* until finishing nat/sit flush. inode->i_blocks can be updated.
*/
down_write(&sbi->node_change);
f2fs_down_write(&sbi->node_change);

if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
up_write(&sbi->node_change);
f2fs_up_write(&sbi->node_change);
f2fs_unlock_all(sbi);
err = f2fs_sync_inode_meta(sbi);
if (err)
Expand All @@ -1241,15 +1241,15 @@ static int block_operations(struct f2fs_sb_info *sbi)
}

retry_flush_nodes:
down_write(&sbi->node_write);
f2fs_down_write(&sbi->node_write);

if (get_pages(sbi, F2FS_DIRTY_NODES)) {
up_write(&sbi->node_write);
f2fs_up_write(&sbi->node_write);
atomic_inc(&sbi->wb_sync_req[NODE]);
err = f2fs_sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO);
atomic_dec(&sbi->wb_sync_req[NODE]);
if (err) {
up_write(&sbi->node_change);
f2fs_up_write(&sbi->node_change);
f2fs_unlock_all(sbi);
return err;
}
Expand All @@ -1262,13 +1262,13 @@ static int block_operations(struct f2fs_sb_info *sbi)
* dirty node blocks and some checkpoint values by block allocation.
*/
__prepare_cp_block(sbi);
up_write(&sbi->node_change);
f2fs_up_write(&sbi->node_change);
return err;
}

static void unblock_operations(struct f2fs_sb_info *sbi)
{
up_write(&sbi->node_write);
f2fs_up_write(&sbi->node_write);
f2fs_unlock_all(sbi);
}

Expand Down Expand Up @@ -1612,7 +1612,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_warn(sbi, "Start checkpoint disabled!");
}
if (cpc->reason != CP_RESIZE)
down_write(&sbi->cp_global_sem);
f2fs_down_write(&sbi->cp_global_sem);

if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) ||
Expand Down Expand Up @@ -1693,7 +1693,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
out:
if (cpc->reason != CP_RESIZE)
up_write(&sbi->cp_global_sem);
f2fs_up_write(&sbi->cp_global_sem);
return err;
}

Expand Down Expand Up @@ -1741,9 +1741,9 @@ static int __write_checkpoint_sync(struct f2fs_sb_info *sbi)
struct cp_control cpc = { .reason = CP_SYNC, };
int err;

down_write(&sbi->gc_lock);
f2fs_down_write(&sbi->gc_lock);
err = f2fs_write_checkpoint(sbi, &cpc);
up_write(&sbi->gc_lock);
f2fs_up_write(&sbi->gc_lock);

return err;
}
Expand Down Expand Up @@ -1831,9 +1831,9 @@ int f2fs_issue_checkpoint(struct f2fs_sb_info *sbi)
if (!test_opt(sbi, MERGE_CHECKPOINT) || cpc.reason != CP_SYNC) {
int ret;

down_write(&sbi->gc_lock);
f2fs_down_write(&sbi->gc_lock);
ret = f2fs_write_checkpoint(sbi, &cpc);
up_write(&sbi->gc_lock);
f2fs_up_write(&sbi->gc_lock);

return ret;
}
Expand Down
6 changes: 3 additions & 3 deletions fs/f2fs/compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -1267,7 +1267,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
* checkpoint. This can only happen to quota writes which can cause
* the below discard race condition.
*/
down_read(&sbi->node_write);
f2fs_down_read(&sbi->node_write);
} else if (!f2fs_trylock_op(sbi)) {
goto out_free;
}
Expand Down Expand Up @@ -1384,7 +1384,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,

f2fs_put_dnode(&dn);
if (IS_NOQUOTA(inode))
up_read(&sbi->node_write);
f2fs_up_read(&sbi->node_write);
else
f2fs_unlock_op(sbi);

Expand All @@ -1410,7 +1410,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
f2fs_put_dnode(&dn);
out_unlock_op:
if (IS_NOQUOTA(inode))
up_read(&sbi->node_write);
f2fs_up_read(&sbi->node_write);
else
f2fs_unlock_op(sbi);
out_free:
Expand Down
50 changes: 25 additions & 25 deletions fs/f2fs/data.c
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,7 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
enum page_type btype = PAGE_TYPE_OF_BIO(type);
struct f2fs_bio_info *io = sbi->write_io[btype] + temp;

down_write(&io->io_rwsem);
f2fs_down_write(&io->io_rwsem);

/* change META to META_FLUSH in the checkpoint procedure */
if (type >= META_FLUSH) {
Expand All @@ -601,7 +601,7 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
}
__submit_merged_bio(io);
up_write(&io->io_rwsem);
f2fs_up_write(&io->io_rwsem);
}

static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
Expand All @@ -616,9 +616,9 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
enum page_type btype = PAGE_TYPE_OF_BIO(type);
struct f2fs_bio_info *io = sbi->write_io[btype] + temp;

down_read(&io->io_rwsem);
f2fs_down_read(&io->io_rwsem);
ret = __has_merged_page(io->bio, inode, page, ino);
up_read(&io->io_rwsem);
f2fs_up_read(&io->io_rwsem);
}
if (ret)
__f2fs_submit_merged_write(sbi, type, temp);
Expand Down Expand Up @@ -742,9 +742,9 @@ static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio,
if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE)
f2fs_bug_on(sbi, 1);

down_write(&io->bio_list_lock);
f2fs_down_write(&io->bio_list_lock);
list_add_tail(&be->list, &io->bio_list);
up_write(&io->bio_list_lock);
f2fs_up_write(&io->bio_list_lock);
}

static void del_bio_entry(struct bio_entry *be)
Expand All @@ -766,7 +766,7 @@ static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio,
struct list_head *head = &io->bio_list;
struct bio_entry *be;

down_write(&io->bio_list_lock);
f2fs_down_write(&io->bio_list_lock);
list_for_each_entry(be, head, list) {
if (be->bio != *bio)
continue;
Expand All @@ -790,7 +790,7 @@ static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio,
__submit_bio(sbi, *bio, DATA);
break;
}
up_write(&io->bio_list_lock);
f2fs_up_write(&io->bio_list_lock);
}

if (ret) {
Expand All @@ -816,7 +816,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
if (list_empty(head))
continue;

down_read(&io->bio_list_lock);
f2fs_down_read(&io->bio_list_lock);
list_for_each_entry(be, head, list) {
if (target)
found = (target == be->bio);
Expand All @@ -826,14 +826,14 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
if (found)
break;
}
up_read(&io->bio_list_lock);
f2fs_up_read(&io->bio_list_lock);

if (!found)
continue;

found = false;

down_write(&io->bio_list_lock);
f2fs_down_write(&io->bio_list_lock);
list_for_each_entry(be, head, list) {
if (target)
found = (target == be->bio);
Expand All @@ -846,7 +846,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
break;
}
}
up_write(&io->bio_list_lock);
f2fs_up_write(&io->bio_list_lock);
}

if (found)
Expand Down Expand Up @@ -906,7 +906,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)

f2fs_bug_on(sbi, is_read_io(fio->op));

down_write(&io->io_rwsem);
f2fs_down_write(&io->io_rwsem);
next:
if (fio->in_list) {
spin_lock(&io->io_lock);
Expand Down Expand Up @@ -973,7 +973,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
!f2fs_is_checkpoint_ready(sbi))
__submit_merged_bio(io);
up_write(&io->io_rwsem);
f2fs_up_write(&io->io_rwsem);
}

static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
Expand Down Expand Up @@ -1383,9 +1383,9 @@ void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
{
if (flag == F2FS_GET_BLOCK_PRE_AIO) {
if (lock)
down_read(&sbi->node_change);
f2fs_down_read(&sbi->node_change);
else
up_read(&sbi->node_change);
f2fs_up_read(&sbi->node_change);
} else {
if (lock)
f2fs_lock_op(sbi);
Expand Down Expand Up @@ -2749,13 +2749,13 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
* the below discard race condition.
*/
if (IS_NOQUOTA(inode))
down_read(&sbi->node_write);
f2fs_down_read(&sbi->node_write);

fio.need_lock = LOCK_DONE;
err = f2fs_do_write_data_page(&fio);

if (IS_NOQUOTA(inode))
up_read(&sbi->node_write);
f2fs_up_read(&sbi->node_write);

goto done;
}
Expand Down Expand Up @@ -3213,14 +3213,14 @@ void f2fs_write_failed(struct inode *inode, loff_t to)

/* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
if (to > i_size && !f2fs_verity_in_progress(inode)) {
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);

truncate_pagecache(inode, i_size);
f2fs_truncate_blocks(inode, i_size, true);

filemap_invalidate_unlock(inode->i_mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
}

Expand Down Expand Up @@ -3721,13 +3721,13 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
unsigned int end_sec = secidx + blkcnt / blk_per_sec;
int ret = 0;

down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);

set_inode_flag(inode, FI_ALIGNED_WRITE);

for (; secidx < end_sec; secidx++) {
down_write(&sbi->pin_sem);
f2fs_down_write(&sbi->pin_sem);

f2fs_lock_op(sbi);
f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
Expand All @@ -3741,7 +3741,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,

page = f2fs_get_lock_data_page(inode, blkidx, true);
if (IS_ERR(page)) {
up_write(&sbi->pin_sem);
f2fs_up_write(&sbi->pin_sem);
ret = PTR_ERR(page);
goto done;
}
Expand All @@ -3754,7 +3754,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,

ret = filemap_fdatawrite(inode->i_mapping);

up_write(&sbi->pin_sem);
f2fs_up_write(&sbi->pin_sem);

if (ret)
break;
Expand All @@ -3765,7 +3765,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
clear_inode_flag(inode, FI_ALIGNED_WRITE);

filemap_invalidate_unlock(inode->i_mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);

return ret;
}
Expand Down
Loading

0 comments on commit e4544b6

Please sign in to comment.