Skip to content

Commit

Permalink
f2fs: refactor flush_nat_entries to remove costly reorganizing ops
Browse files Browse the repository at this point in the history
Previously, f2fs tries to reorganize the dirty nat entries into multiple sets
according to its nid ranges. This can improve the flushing nat pages, however,
if there are a lot of cached nat entries, it becomes a bottleneck.

This patch introduces a new set management flow by removing dirty nat list and
adding a series of set operations when the nat entry becomes dirty.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
  • Loading branch information
Jaegeuk Kim committed Sep 30, 2014
1 parent 4b2fecc commit 309cc2b
Show file tree
Hide file tree
Showing 3 changed files with 162 additions and 159 deletions.
13 changes: 7 additions & 6 deletions fs/f2fs/f2fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,9 @@ struct fsync_inode_entry {
#define sit_in_journal(sum, i) (sum->sit_j.entries[i].se)
#define segno_in_journal(sum, i) (sum->sit_j.entries[i].segno)

#define MAX_NAT_JENTRIES(sum) (NAT_JOURNAL_ENTRIES - nats_in_cursum(sum))
#define MAX_SIT_JENTRIES(sum) (SIT_JOURNAL_ENTRIES - sits_in_cursum(sum))

static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i)
{
int before = nats_in_cursum(rs);
Expand All @@ -182,9 +185,8 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
int type)
{
if (type == NAT_JOURNAL)
return nats_in_cursum(sum) + size <= NAT_JOURNAL_ENTRIES;

return sits_in_cursum(sum) + size <= SIT_JOURNAL_ENTRIES;
return size <= MAX_NAT_JENTRIES(sum);
return size <= MAX_SIT_JENTRIES(sum);
}

/*
Expand Down Expand Up @@ -292,11 +294,10 @@ struct f2fs_nm_info {

/* NAT cache management */
struct radix_tree_root nat_root;/* root of the nat entry cache */
struct radix_tree_root nat_set_root;/* root of the nat set cache */
rwlock_t nat_tree_lock; /* protect nat_tree_lock */
unsigned int nat_cnt; /* the # of cached nat entries */
struct list_head nat_entries; /* cached nat entry list (clean) */
struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */
struct list_head nat_entry_set; /* nat entry set list */
unsigned int nat_cnt; /* the # of cached nat entries */
unsigned int dirty_nat_cnt; /* total num of nat entries in set */

/* free node ids management */
Expand Down
299 changes: 152 additions & 147 deletions fs/f2fs/node.c
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,57 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
kmem_cache_free(nat_entry_slab, e);
}

static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
struct nat_entry *ne)
{
nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
struct nat_entry_set *head;

if (get_nat_flag(ne, IS_DIRTY))
return;
retry:
head = radix_tree_lookup(&nm_i->nat_set_root, set);
if (!head) {
head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);

INIT_LIST_HEAD(&head->entry_list);
INIT_LIST_HEAD(&head->set_list);
head->set = set;
head->entry_cnt = 0;

if (radix_tree_insert(&nm_i->nat_set_root, set, head)) {
cond_resched();
goto retry;
}
}
list_move_tail(&ne->list, &head->entry_list);
nm_i->dirty_nat_cnt++;
head->entry_cnt++;
set_nat_flag(ne, IS_DIRTY, true);
}

static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
struct nat_entry *ne)
{
nid_t set = ne->ni.nid / NAT_ENTRY_PER_BLOCK;
struct nat_entry_set *head;

head = radix_tree_lookup(&nm_i->nat_set_root, set);
if (head) {
list_move_tail(&ne->list, &nm_i->nat_entries);
set_nat_flag(ne, IS_DIRTY, false);
head->entry_cnt--;
nm_i->dirty_nat_cnt--;
}
}

static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
nid_t start, unsigned int nr, struct nat_entry_set **ep)
{
return radix_tree_gang_lookup(&nm_i->nat_set_root, (void **)ep,
start, nr);
}

bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
Expand Down Expand Up @@ -1739,79 +1790,6 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
return err;
}

static struct nat_entry_set *grab_nat_entry_set(void)
{
struct nat_entry_set *nes =
f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);

nes->entry_cnt = 0;
INIT_LIST_HEAD(&nes->set_list);
INIT_LIST_HEAD(&nes->entry_list);
return nes;
}

static void release_nat_entry_set(struct nat_entry_set *nes,
struct f2fs_nm_info *nm_i)
{
nm_i->dirty_nat_cnt -= nes->entry_cnt;
list_del(&nes->set_list);
kmem_cache_free(nat_entry_set_slab, nes);
}

static void adjust_nat_entry_set(struct nat_entry_set *nes,
struct list_head *head)
{
struct nat_entry_set *next = nes;

if (list_is_last(&nes->set_list, head))
return;

list_for_each_entry_continue(next, head, set_list)
if (nes->entry_cnt <= next->entry_cnt)
break;

list_move_tail(&nes->set_list, &next->set_list);
}

static void add_nat_entry(struct nat_entry *ne, struct list_head *head)
{
struct nat_entry_set *nes;
nid_t start_nid = START_NID(ne->ni.nid);

list_for_each_entry(nes, head, set_list) {
if (nes->start_nid == start_nid) {
list_move_tail(&ne->list, &nes->entry_list);
nes->entry_cnt++;
adjust_nat_entry_set(nes, head);
return;
}
}

nes = grab_nat_entry_set();

nes->start_nid = start_nid;
list_move_tail(&ne->list, &nes->entry_list);
nes->entry_cnt++;
list_add(&nes->set_list, head);
}

static void merge_nats_in_set(struct f2fs_sb_info *sbi)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct list_head *dirty_list = &nm_i->dirty_nat_entries;
struct list_head *set_list = &nm_i->nat_entry_set;
struct nat_entry *ne, *tmp;

write_lock(&nm_i->nat_tree_lock);
list_for_each_entry_safe(ne, tmp, dirty_list, list) {
if (nat_get_blkaddr(ne) == NEW_ADDR)
continue;
add_nat_entry(ne, set_list);
nm_i->dirty_nat_cnt++;
}
write_unlock(&nm_i->nat_tree_lock);
}

static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
Expand Down Expand Up @@ -1846,101 +1824,129 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
mutex_unlock(&curseg->curseg_mutex);
}

/*
* This function is called during the checkpointing process.
*/
void flush_nat_entries(struct f2fs_sb_info *sbi)
static void __adjust_nat_entry_set(struct nat_entry_set *nes,
struct list_head *head, int max)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
struct f2fs_summary_block *sum = curseg->sum_blk;
struct nat_entry_set *nes, *tmp;
struct list_head *head = &nm_i->nat_entry_set;
bool to_journal = true;
struct nat_entry_set *cur;

/* merge nat entries of dirty list to nat entry set temporarily */
merge_nats_in_set(sbi);
if (nes->entry_cnt >= max)
goto add_out;

/*
* if there are no enough space in journal to store dirty nat
* entries, remove all entries from journal and merge them
* into nat entry set.
*/
if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) {
remove_nats_in_journal(sbi);

/*
* merge nat entries of dirty list to nat entry set temporarily
*/
merge_nats_in_set(sbi);
list_for_each_entry(cur, head, set_list) {
if (cur->entry_cnt >= nes->entry_cnt) {
list_add(&nes->set_list, cur->set_list.prev);
return;
}
}
add_out:
list_add_tail(&nes->set_list, head);
}

if (!nm_i->dirty_nat_cnt)
return;
static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
struct nat_entry_set *set)
{
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
struct f2fs_summary_block *sum = curseg->sum_blk;
nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK;
bool to_journal = true;
struct f2fs_nat_block *nat_blk;
struct nat_entry *ne, *cur;
struct page *page = NULL;

/*
* there are two steps to flush nat entries:
* #1, flush nat entries to journal in current hot data summary block.
* #2, flush nat entries to nat page.
*/
list_for_each_entry_safe(nes, tmp, head, set_list) {
struct f2fs_nat_block *nat_blk;
struct nat_entry *ne, *cur;
struct page *page;
nid_t start_nid = nes->start_nid;
if (!__has_cursum_space(sum, set->entry_cnt, NAT_JOURNAL))
to_journal = false;

if (to_journal) {
mutex_lock(&curseg->curseg_mutex);
} else {
page = get_next_nat_page(sbi, start_nid);
nat_blk = page_address(page);
f2fs_bug_on(sbi, !nat_blk);
}

if (to_journal &&
!__has_cursum_space(sum, nes->entry_cnt, NAT_JOURNAL))
to_journal = false;
/* flush dirty nats in nat entry set */
list_for_each_entry_safe(ne, cur, &set->entry_list, list) {
struct f2fs_nat_entry *raw_ne;
nid_t nid = nat_get_nid(ne);
int offset;

if (nat_get_blkaddr(ne) == NEW_ADDR)
continue;

if (to_journal) {
mutex_lock(&curseg->curseg_mutex);
offset = lookup_journal_in_cursum(sum,
NAT_JOURNAL, nid, 1);
f2fs_bug_on(sbi, offset < 0);
raw_ne = &nat_in_journal(sum, offset);
nid_in_journal(sum, offset) = cpu_to_le32(nid);
} else {
page = get_next_nat_page(sbi, start_nid);
nat_blk = page_address(page);
f2fs_bug_on(sbi, !nat_blk);
raw_ne = &nat_blk->entries[nid - start_nid];
}
raw_nat_from_node_info(raw_ne, &ne->ni);

/* flush dirty nats in nat entry set */
list_for_each_entry_safe(ne, cur, &nes->entry_list, list) {
struct f2fs_nat_entry *raw_ne;
nid_t nid = nat_get_nid(ne);
int offset;
write_lock(&NM_I(sbi)->nat_tree_lock);
nat_reset_flag(ne);
__clear_nat_cache_dirty(NM_I(sbi), ne);
write_unlock(&NM_I(sbi)->nat_tree_lock);

if (to_journal) {
offset = lookup_journal_in_cursum(sum,
NAT_JOURNAL, nid, 1);
f2fs_bug_on(sbi, offset < 0);
raw_ne = &nat_in_journal(sum, offset);
nid_in_journal(sum, offset) = cpu_to_le32(nid);
} else {
raw_ne = &nat_blk->entries[nid - start_nid];
}
raw_nat_from_node_info(raw_ne, &ne->ni);
if (nat_get_blkaddr(ne) == NULL_ADDR)
add_free_nid(sbi, nid, false);
}

if (nat_get_blkaddr(ne) == NULL_ADDR &&
add_free_nid(sbi, nid, false) <= 0) {
write_lock(&nm_i->nat_tree_lock);
__del_from_nat_cache(nm_i, ne);
write_unlock(&nm_i->nat_tree_lock);
} else {
write_lock(&nm_i->nat_tree_lock);
nat_reset_flag(ne);
__clear_nat_cache_dirty(nm_i, ne);
write_unlock(&nm_i->nat_tree_lock);
}
}
if (to_journal)
mutex_unlock(&curseg->curseg_mutex);
else
f2fs_put_page(page, 1);

if (to_journal)
mutex_unlock(&curseg->curseg_mutex);
else
f2fs_put_page(page, 1);
if (!set->entry_cnt) {
radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
kmem_cache_free(nat_entry_set_slab, set);
}
}

/*
* This function is called during the checkpointing process.
*/
void flush_nat_entries(struct f2fs_sb_info *sbi)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
struct f2fs_summary_block *sum = curseg->sum_blk;
struct nat_entry_set *setvec[NATVEC_SIZE];
struct nat_entry_set *set, *tmp;
unsigned int found;
nid_t set_idx = 0;
LIST_HEAD(sets);

f2fs_bug_on(sbi, !list_empty(&nes->entry_list));
release_nat_entry_set(nes, nm_i);
/*
* if there are no enough space in journal to store dirty nat
* entries, remove all entries from journal and merge them
* into nat entry set.
*/
if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
remove_nats_in_journal(sbi);

if (!nm_i->dirty_nat_cnt)
return;

while ((found = __gang_lookup_nat_set(nm_i,
set_idx, NATVEC_SIZE, setvec))) {
unsigned idx;
set_idx = setvec[found - 1]->set + 1;
for (idx = 0; idx < found; idx++)
__adjust_nat_entry_set(setvec[idx], &sets,
MAX_NAT_JENTRIES(sum));
}

f2fs_bug_on(sbi, !list_empty(head));
/* flush dirty nats in nat entry set */
list_for_each_entry_safe(set, tmp, &sets, set_list)
__flush_nat_entry_set(sbi, set);

f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
}

Expand Down Expand Up @@ -1968,9 +1974,8 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
INIT_LIST_HEAD(&nm_i->free_nid_list);
INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_ATOMIC);
INIT_LIST_HEAD(&nm_i->nat_entries);
INIT_LIST_HEAD(&nm_i->dirty_nat_entries);
INIT_LIST_HEAD(&nm_i->nat_entry_set);

mutex_init(&nm_i->build_lock);
spin_lock_init(&nm_i->free_nid_list_lock);
Expand Down
Loading

0 comments on commit 309cc2b

Please sign in to comment.