Skip to content

Commit

Permalink
ext4: Convert pa->pa_inode_list and pa->pa_obj_lock into a union
Browse files Browse the repository at this point in the history
** Splitting pa->pa_inode_list **

Currently, we use the same pa->pa_inode_list to add a pa to either
the inode preallocation list or the locality group preallocation list.
For better clarity, split this list into a union of 2 list_heads and use
either of the them based on the type of pa.

** Splitting pa->pa_obj_lock **

Currently, pa->pa_obj_lock is either assigned &ei->i_prealloc_lock for
inode PAs or lg_prealloc_lock for lg PAs, and is then used to lock the
lists containing these PAs. Make the distinction between the 2 PA types
clear by changing this lock to a union of 2 locks. Explicitly use the
pa_lock_node.inode_lock for inode PAs and pa_lock_node.lg_lock for lg
PAs.

This patch is required so that the locality group preallocation code
remains the same as in upcoming patches we are going to make changes to
inode preallocation code to move from list to rbtree based
implementation. This patch also makes it easier to review the upcoming
patches.

There are no functional changes in this patch.

Suggested-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/1d7ac0557e998c3fc7eef422b52e4bc67bdef2b0.1679731817.git.ojaswin@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
  • Loading branch information
Ojaswin Mujoo authored and Theodore Ts'o committed Apr 6, 2023
1 parent 93cdf49 commit a8e38fd
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 34 deletions.
76 changes: 44 additions & 32 deletions fs/ext4/mballoc.c
Original file line number Diff line number Diff line change
Expand Up @@ -3988,7 +3988,7 @@ ext4_mb_pa_assert_overlap(struct ext4_allocation_context *ac,
ext4_lblk_t tmp_pa_start, tmp_pa_end;

rcu_read_lock();
list_for_each_entry_rcu(tmp_pa, &ei->i_prealloc_list, pa_inode_list) {
list_for_each_entry_rcu(tmp_pa, &ei->i_prealloc_list, pa_node.inode_list) {
spin_lock(&tmp_pa->pa_lock);
if (tmp_pa->pa_deleted == 0) {
tmp_pa_start = tmp_pa->pa_lstart;
Expand Down Expand Up @@ -4026,7 +4026,7 @@ ext4_mb_pa_adjust_overlap(struct ext4_allocation_context *ac,

/* check we don't cross already preallocated blocks */
rcu_read_lock();
list_for_each_entry_rcu(tmp_pa, &ei->i_prealloc_list, pa_inode_list) {
list_for_each_entry_rcu(tmp_pa, &ei->i_prealloc_list, pa_node.inode_list) {
if (tmp_pa->pa_deleted)
continue;
spin_lock(&tmp_pa->pa_lock);
Expand Down Expand Up @@ -4409,7 +4409,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)

/* first, try per-file preallocation */
rcu_read_lock();
list_for_each_entry_rcu(tmp_pa, &ei->i_prealloc_list, pa_inode_list) {
list_for_each_entry_rcu(tmp_pa, &ei->i_prealloc_list, pa_node.inode_list) {

/* all fields in this condition don't change,
* so we can skip locking for them */
Expand Down Expand Up @@ -4466,7 +4466,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
for (i = order; i < PREALLOC_TB_SIZE; i++) {
rcu_read_lock();
list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[i],
pa_inode_list) {
pa_node.lg_list) {
spin_lock(&tmp_pa->pa_lock);
if (tmp_pa->pa_deleted == 0 &&
tmp_pa->pa_free >= ac->ac_o_ex.fe_len) {
Expand Down Expand Up @@ -4640,9 +4640,15 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
list_del(&pa->pa_group_list);
ext4_unlock_group(sb, grp);

spin_lock(pa->pa_obj_lock);
list_del_rcu(&pa->pa_inode_list);
spin_unlock(pa->pa_obj_lock);
if (pa->pa_type == MB_INODE_PA) {
spin_lock(pa->pa_node_lock.inode_lock);
list_del_rcu(&pa->pa_node.inode_list);
spin_unlock(pa->pa_node_lock.inode_lock);
} else {
spin_lock(pa->pa_node_lock.lg_lock);
list_del_rcu(&pa->pa_node.lg_list);
spin_unlock(pa->pa_node_lock.lg_lock);
}

call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
}
Expand Down Expand Up @@ -4718,7 +4724,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
pa->pa_len = ac->ac_b_ex.fe_len;
pa->pa_free = pa->pa_len;
spin_lock_init(&pa->pa_lock);
INIT_LIST_HEAD(&pa->pa_inode_list);
INIT_LIST_HEAD(&pa->pa_node.inode_list);
INIT_LIST_HEAD(&pa->pa_group_list);
pa->pa_deleted = 0;
pa->pa_type = MB_INODE_PA;
Expand All @@ -4733,14 +4739,14 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
ei = EXT4_I(ac->ac_inode);
grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);

pa->pa_obj_lock = &ei->i_prealloc_lock;
pa->pa_node_lock.inode_lock = &ei->i_prealloc_lock;
pa->pa_inode = ac->ac_inode;

list_add(&pa->pa_group_list, &grp->bb_prealloc_list);

spin_lock(pa->pa_obj_lock);
list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
spin_unlock(pa->pa_obj_lock);
spin_lock(pa->pa_node_lock.inode_lock);
list_add_rcu(&pa->pa_node.inode_list, &ei->i_prealloc_list);
spin_unlock(pa->pa_node_lock.inode_lock);
atomic_inc(&ei->i_prealloc_active);
}

Expand Down Expand Up @@ -4768,7 +4774,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
pa->pa_len = ac->ac_b_ex.fe_len;
pa->pa_free = pa->pa_len;
spin_lock_init(&pa->pa_lock);
INIT_LIST_HEAD(&pa->pa_inode_list);
INIT_LIST_HEAD(&pa->pa_node.lg_list);
INIT_LIST_HEAD(&pa->pa_group_list);
pa->pa_deleted = 0;
pa->pa_type = MB_GROUP_PA;
Expand All @@ -4784,7 +4790,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
lg = ac->ac_lg;
BUG_ON(lg == NULL);

pa->pa_obj_lock = &lg->lg_prealloc_lock;
pa->pa_node_lock.lg_lock = &lg->lg_prealloc_lock;
pa->pa_inode = NULL;

list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
Expand Down Expand Up @@ -4960,9 +4966,15 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {

/* remove from object (inode or locality group) */
spin_lock(pa->pa_obj_lock);
list_del_rcu(&pa->pa_inode_list);
spin_unlock(pa->pa_obj_lock);
if (pa->pa_type == MB_GROUP_PA) {
spin_lock(pa->pa_node_lock.lg_lock);
list_del_rcu(&pa->pa_node.lg_list);
spin_unlock(pa->pa_node_lock.lg_lock);
} else {
spin_lock(pa->pa_node_lock.inode_lock);
list_del_rcu(&pa->pa_node.inode_list);
spin_unlock(pa->pa_node_lock.inode_lock);
}

if (pa->pa_type == MB_GROUP_PA)
ext4_mb_release_group_pa(&e4b, pa);
Expand Down Expand Up @@ -5024,8 +5036,8 @@ void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
spin_lock(&ei->i_prealloc_lock);
while (!list_empty(&ei->i_prealloc_list) && needed) {
pa = list_entry(ei->i_prealloc_list.prev,
struct ext4_prealloc_space, pa_inode_list);
BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
struct ext4_prealloc_space, pa_node.inode_list);
BUG_ON(pa->pa_node_lock.inode_lock != &ei->i_prealloc_lock);
spin_lock(&pa->pa_lock);
if (atomic_read(&pa->pa_count)) {
/* this shouldn't happen often - nobody should
Expand All @@ -5042,7 +5054,7 @@ void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
if (pa->pa_deleted == 0) {
ext4_mb_mark_pa_deleted(sb, pa);
spin_unlock(&pa->pa_lock);
list_del_rcu(&pa->pa_inode_list);
list_del_rcu(&pa->pa_node.inode_list);
list_add(&pa->u.pa_tmp_list, &list);
needed--;
continue;
Expand Down Expand Up @@ -5332,7 +5344,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,

spin_lock(&lg->lg_prealloc_lock);
list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
pa_inode_list,
pa_node.lg_list,
lockdep_is_held(&lg->lg_prealloc_lock)) {
spin_lock(&pa->pa_lock);
if (atomic_read(&pa->pa_count)) {
Expand All @@ -5355,7 +5367,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
ext4_mb_mark_pa_deleted(sb, pa);
spin_unlock(&pa->pa_lock);

list_del_rcu(&pa->pa_inode_list);
list_del_rcu(&pa->pa_node.lg_list);
list_add(&pa->u.pa_tmp_list, &discard_list);

total_entries--;
Expand Down Expand Up @@ -5416,7 +5428,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
/* Add the prealloc space to lg */
spin_lock(&lg->lg_prealloc_lock);
list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
pa_inode_list,
pa_node.lg_list,
lockdep_is_held(&lg->lg_prealloc_lock)) {
spin_lock(&tmp_pa->pa_lock);
if (tmp_pa->pa_deleted) {
Expand All @@ -5425,8 +5437,8 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
}
if (!added && pa->pa_free < tmp_pa->pa_free) {
/* Add to the tail of the previous entry */
list_add_tail_rcu(&pa->pa_inode_list,
&tmp_pa->pa_inode_list);
list_add_tail_rcu(&pa->pa_node.lg_list,
&tmp_pa->pa_node.lg_list);
added = 1;
/*
* we want to count the total
Expand All @@ -5437,7 +5449,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
lg_prealloc_count++;
}
if (!added)
list_add_tail_rcu(&pa->pa_inode_list,
list_add_tail_rcu(&pa->pa_node.lg_list,
&lg->lg_prealloc_list[order]);
spin_unlock(&lg->lg_prealloc_lock);

Expand Down Expand Up @@ -5493,9 +5505,9 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
* doesn't grow big.
*/
if (likely(pa->pa_free)) {
spin_lock(pa->pa_obj_lock);
list_del_rcu(&pa->pa_inode_list);
spin_unlock(pa->pa_obj_lock);
spin_lock(pa->pa_node_lock.lg_lock);
list_del_rcu(&pa->pa_node.lg_list);
spin_unlock(pa->pa_node_lock.lg_lock);
ext4_mb_add_n_trim(ac);
}
}
Expand All @@ -5505,9 +5517,9 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
* treat per-inode prealloc list as a lru list, then try
* to trim the least recently used PA.
*/
spin_lock(pa->pa_obj_lock);
list_move(&pa->pa_inode_list, &ei->i_prealloc_list);
spin_unlock(pa->pa_obj_lock);
spin_lock(pa->pa_node_lock.inode_lock);
list_move(&pa->pa_node.inode_list, &ei->i_prealloc_list);
spin_unlock(pa->pa_node_lock.inode_lock);
}

ext4_mb_put_pa(ac, ac->ac_sb, pa);
Expand Down
10 changes: 8 additions & 2 deletions fs/ext4/mballoc.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,10 @@ struct ext4_free_data {
};

struct ext4_prealloc_space {
struct list_head pa_inode_list;
union {
struct list_head inode_list; /* for inode PAs */
struct list_head lg_list; /* for lg PAs */
} pa_node;
struct list_head pa_group_list;
union {
struct list_head pa_tmp_list;
Expand All @@ -128,7 +131,10 @@ struct ext4_prealloc_space {
ext4_grpblk_t pa_len; /* len of preallocated chunk */
ext4_grpblk_t pa_free; /* how many blocks are free */
unsigned short pa_type; /* pa type. inode or group */
spinlock_t *pa_obj_lock;
union {
spinlock_t *inode_lock; /* locks the inode list holding this PA */
spinlock_t *lg_lock; /* locks the lg list holding this PA */
} pa_node_lock;
struct inode *pa_inode; /* hack, for history only */
};

Expand Down

0 comments on commit a8e38fd

Please sign in to comment.