Skip to content

Commit

Permalink
Merge tag 'ext4_for_linus-6.5-rc3' of git://git.kernel.org/pub/scm/li…
Browse files Browse the repository at this point in the history
…nux/kernel/git/tytso/ext4

Pull ext4 fixes from Ted Ts'o:
 "Bug and regression fixes for 6.5-rc3 for ext4's mballoc and jbd2's
  checkpoint code"

* tag 'ext4_for_linus-6.5-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: fix rbtree traversal bug in ext4_mb_use_preallocated
  ext4: fix off by one issue in ext4_mb_choose_next_group_best_avail()
  ext4: correct inline offset when handling xattrs in inode body
  jbd2: remove __journal_try_to_free_buffer()
  jbd2: fix a race when checking checkpoint buffer busy
  jbd2: Fix wrongly judgement for buffer head removing while doing checkpoint
  jbd2: remove journal_clean_one_cp_list()
  jbd2: remove t_checkpoint_io_list
  jbd2: recheck chechpointing non-dirty buffer
  • Loading branch information
Linus Torvalds committed Jul 23, 2023
2 parents 8266f53 + 9d3de7e commit 15b593b
Show file tree
Hide file tree
Showing 7 changed files with 262 additions and 263 deletions.
172 changes: 140 additions & 32 deletions fs/ext4/mballoc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1006,24 +1006,28 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context
* fls() instead since we need to know the actual length while modifying
* goal length.
*/
order = fls(ac->ac_g_ex.fe_len);
order = fls(ac->ac_g_ex.fe_len) - 1;
min_order = order - sbi->s_mb_best_avail_max_trim_order;
if (min_order < 0)
min_order = 0;

if (1 << min_order < ac->ac_o_ex.fe_len)
min_order = fls(ac->ac_o_ex.fe_len) + 1;

if (sbi->s_stripe > 0) {
/*
* We are assuming that stripe size is always a multiple of
* cluster ratio otherwise __ext4_fill_super exists early.
*/
num_stripe_clusters = EXT4_NUM_B2C(sbi, sbi->s_stripe);
if (1 << min_order < num_stripe_clusters)
min_order = fls(num_stripe_clusters);
/*
* We consider 1 order less because later we round
* up the goal len to num_stripe_clusters
*/
min_order = fls(num_stripe_clusters) - 1;
}

if (1 << min_order < ac->ac_o_ex.fe_len)
min_order = fls(ac->ac_o_ex.fe_len);

for (i = order; i >= min_order; i--) {
int frag_order;
/*
Expand Down Expand Up @@ -4761,56 +4765,160 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
int order, i;
struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
struct ext4_locality_group *lg;
struct ext4_prealloc_space *tmp_pa, *cpa = NULL;
ext4_lblk_t tmp_pa_start, tmp_pa_end;
struct ext4_prealloc_space *tmp_pa = NULL, *cpa = NULL;
loff_t tmp_pa_end;
struct rb_node *iter;
ext4_fsblk_t goal_block;

/* only data can be preallocated */
if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
return false;

/* first, try per-file preallocation */
/*
* first, try per-file preallocation by searching the inode pa rbtree.
*
* Here, we can't do a direct traversal of the tree because
* ext4_mb_discard_group_preallocation() can paralelly mark the pa
* deleted and that can cause direct traversal to skip some entries.
*/
read_lock(&ei->i_prealloc_lock);

if (RB_EMPTY_ROOT(&ei->i_prealloc_node)) {
goto try_group_pa;
}

/*
* Step 1: Find a pa with logical start immediately adjacent to the
* original logical start. This could be on the left or right.
*
* (tmp_pa->pa_lstart never changes so we can skip locking for it).
*/
for (iter = ei->i_prealloc_node.rb_node; iter;
iter = ext4_mb_pa_rb_next_iter(ac->ac_o_ex.fe_logical,
tmp_pa_start, iter)) {
tmp_pa->pa_lstart, iter)) {
tmp_pa = rb_entry(iter, struct ext4_prealloc_space,
pa_node.inode_node);
}

/* all fields in this condition don't change,
* so we can skip locking for them */
tmp_pa_start = tmp_pa->pa_lstart;
tmp_pa_end = tmp_pa->pa_lstart + EXT4_C2B(sbi, tmp_pa->pa_len);

/* original request start doesn't lie in this PA */
if (ac->ac_o_ex.fe_logical < tmp_pa_start ||
ac->ac_o_ex.fe_logical >= tmp_pa_end)
continue;
/*
* Step 2: The adjacent pa might be to the right of logical start, find
* the left adjacent pa. After this step we'd have a valid tmp_pa whose
* logical start is towards the left of original request's logical start
*/
if (tmp_pa->pa_lstart > ac->ac_o_ex.fe_logical) {
struct rb_node *tmp;
tmp = rb_prev(&tmp_pa->pa_node.inode_node);

/* non-extent files can't have physical blocks past 2^32 */
if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
(tmp_pa->pa_pstart + EXT4_C2B(sbi, tmp_pa->pa_len) >
EXT4_MAX_BLOCK_FILE_PHYS)) {
if (tmp) {
tmp_pa = rb_entry(tmp, struct ext4_prealloc_space,
pa_node.inode_node);
} else {
/*
* Since PAs don't overlap, we won't find any
* other PA to satisfy this.
* If there is no adjacent pa to the left then finding
* an overlapping pa is not possible hence stop searching
* inode pa tree
*/
break;
goto try_group_pa;
}
}

BUG_ON(!(tmp_pa && tmp_pa->pa_lstart <= ac->ac_o_ex.fe_logical));

/* found preallocated blocks, use them */
/*
* Step 3: If the left adjacent pa is deleted, keep moving left to find
* the first non deleted adjacent pa. After this step we should have a
* valid tmp_pa which is guaranteed to be non deleted.
*/
for (iter = &tmp_pa->pa_node.inode_node;; iter = rb_prev(iter)) {
if (!iter) {
/*
* no non deleted left adjacent pa, so stop searching
* inode pa tree
*/
goto try_group_pa;
}
tmp_pa = rb_entry(iter, struct ext4_prealloc_space,
pa_node.inode_node);
spin_lock(&tmp_pa->pa_lock);
if (tmp_pa->pa_deleted == 0 && tmp_pa->pa_free &&
likely(ext4_mb_pa_goal_check(ac, tmp_pa))) {
atomic_inc(&tmp_pa->pa_count);
ext4_mb_use_inode_pa(ac, tmp_pa);
if (tmp_pa->pa_deleted == 0) {
/*
* We will keep holding the pa_lock from
* this point on because we don't want group discard
* to delete this pa underneath us. Since group
* discard is anyways an ENOSPC operation it
* should be okay for it to wait a few more cycles.
*/
break;
} else {
spin_unlock(&tmp_pa->pa_lock);
read_unlock(&ei->i_prealloc_lock);
return true;
}
}

BUG_ON(!(tmp_pa && tmp_pa->pa_lstart <= ac->ac_o_ex.fe_logical));
BUG_ON(tmp_pa->pa_deleted == 1);

/*
* Step 4: We now have the non deleted left adjacent pa. Only this
* pa can possibly satisfy the request hence check if it overlaps
* original logical start and stop searching if it doesn't.
*/
tmp_pa_end = (loff_t)tmp_pa->pa_lstart + EXT4_C2B(sbi, tmp_pa->pa_len);

if (ac->ac_o_ex.fe_logical >= tmp_pa_end) {
spin_unlock(&tmp_pa->pa_lock);
goto try_group_pa;
}

/* non-extent files can't have physical blocks past 2^32 */
if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
(tmp_pa->pa_pstart + EXT4_C2B(sbi, tmp_pa->pa_len) >
EXT4_MAX_BLOCK_FILE_PHYS)) {
/*
* Since PAs don't overlap, we won't find any other PA to
* satisfy this.
*/
spin_unlock(&tmp_pa->pa_lock);
goto try_group_pa;
}

if (tmp_pa->pa_free && likely(ext4_mb_pa_goal_check(ac, tmp_pa))) {
atomic_inc(&tmp_pa->pa_count);
ext4_mb_use_inode_pa(ac, tmp_pa);
spin_unlock(&tmp_pa->pa_lock);
read_unlock(&ei->i_prealloc_lock);
return true;
} else {
/*
* We found a valid overlapping pa but couldn't use it because
* it had no free blocks. This should ideally never happen
* because:
*
* 1. When a new inode pa is added to rbtree it must have
* pa_free > 0 since otherwise we won't actually need
* preallocation.
*
* 2. An inode pa that is in the rbtree can only have it's
* pa_free become zero when another thread calls:
* ext4_mb_new_blocks
* ext4_mb_use_preallocated
* ext4_mb_use_inode_pa
*
* 3. Further, after the above calls make pa_free == 0, we will
* immediately remove it from the rbtree in:
* ext4_mb_new_blocks
* ext4_mb_release_context
* ext4_mb_put_pa
*
* 4. Since the pa_free becoming 0 and pa_free getting removed
* from tree both happen in ext4_mb_new_blocks, which is always
* called with i_data_sem held for data allocations, we can be
* sure that another process will never see a pa in rbtree with
* pa_free == 0.
*/
WARN_ON_ONCE(tmp_pa->pa_free == 0);
}
spin_unlock(&tmp_pa->pa_lock);
try_group_pa:
read_unlock(&ei->i_prealloc_lock);

/* can we use group allocation? */
Expand Down
14 changes: 14 additions & 0 deletions fs/ext4/xattr.c
Original file line number Diff line number Diff line change
Expand Up @@ -1782,6 +1782,20 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
memmove(here, (void *)here + size,
(void *)last - (void *)here + sizeof(__u32));
memset(last, 0, size);

/*
* Update i_inline_off - moved ibody region might contain
* system.data attribute. Handling a failure here won't
* cause other complications for setting an xattr.
*/
if (!is_block && ext4_has_inline_data(inode)) {
ret = ext4_find_inline_data_nolock(inode);
if (ret) {
ext4_warning_inode(inode,
"unable to update i_inline_off");
goto out;
}
}
} else if (s->not_found) {
/* Insert new name. */
size_t size = EXT4_XATTR_LEN(name_len);
Expand Down
Loading

0 comments on commit 15b593b

Please sign in to comment.