Skip to content

Commit

Permalink
Merge tag 'ext4_for_linus-6.8-rc1' of git://git.kernel.org/pub/scm/li…
Browse files Browse the repository at this point in the history
…nux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "Various ext4 bug fixes and cleanups. The fixes are mostly in the
  fstrim and mballoc code paths.

  Also enable dioread_nolock in the case where the block size is less
  than the page size (dioread_nolock has been default in the bs == ps
  case for quite some time)"

* tag 'ext4_for_linus-6.8-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: fix inconsistent between segment fstrim and full fstrim
  ext4: fallback to complex scan if aligned scan doesn't work
  ext4: convert ext4_da_do_write_end() to take a folio
  ext4: allow for the last group to be marked as trimmed
  ext4: move ext4_check_bdev_write_error() into nojournal mode
  jbd2: abort journal when detecting metadata writeback error of fs dev
  jbd2: remove unused 'JBD2_CHECKPOINT_IO_ERROR' and 'j_atomic_flags'
  jbd2: replace journal state flag by checking errseq
  jbd2: add errseq to detect client fs's bdev writeback error
  ext4: improving calculation of 'fe_{len|start}' in mb_find_extent()
  ext4: clarify handling of unwritten bh in __ext4_block_zero_page_range()
  ext4: treat end of range as exclusive in ext4_zero_range()
  ext4: enable dioread_nolock as default for bs < ps case
  ext4: delete redundant calculations in ext4_mb_get_buddy_page_lock()
  ext4: reduce unnecessary memory allocation in alloc_flex_gd()
  ext4: avoid online resizing failures due to oversized flex bg
  ext4: remove unnecessary check from alloc_flex_gd()
  ext4: unify the type of flexbg_size to unsigned int
  • Loading branch information
Linus Torvalds committed Jan 11, 2024
2 parents 6bd593b + 68da4c4 commit 0d19d9e
Show file tree
Hide file tree
Showing 11 changed files with 140 additions and 101 deletions.
5 changes: 2 additions & 3 deletions fs/ext4/ext4_jbd2.c
Original file line number Diff line number Diff line change
Expand Up @@ -235,16 +235,15 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line,

might_sleep();

ext4_check_bdev_write_error(sb);

if (ext4_handle_valid(handle)) {
err = jbd2_journal_get_write_access(handle, bh);
if (err) {
ext4_journal_abort_handle(where, line, __func__, bh,
handle, err);
return err;
}
}
} else
ext4_check_bdev_write_error(sb);
if (trigger_type == EXT4_JTR_NONE || !ext4_has_metadata_csum(sb))
return 0;
BUG_ON(trigger_type >= EXT4_JOURNAL_TRIGGER_COUNT);
Expand Down
6 changes: 4 additions & 2 deletions fs/ext4/extents.c
Original file line number Diff line number Diff line change
Expand Up @@ -4523,7 +4523,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
* Round up offset. This is not fallocate, we need to zero out
* blocks, so convert interior block aligned part of the range to
* unwritten and possibly manually zero out unaligned parts of the
* range.
* range. Here, start and partial_begin are inclusive, end and
* partial_end are exclusive.
*/
start = round_up(offset, 1 << blkbits);
end = round_down((offset + len), 1 << blkbits);
Expand Down Expand Up @@ -4609,7 +4610,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
* disk in case of crash before zeroing trans is committed.
*/
if (ext4_should_journal_data(inode)) {
ret = filemap_write_and_wait_range(mapping, start, end);
ret = filemap_write_and_wait_range(mapping, start,
end - 1);
if (ret) {
filemap_invalidate_unlock(mapping);
goto out_mutex;
Expand Down
25 changes: 16 additions & 9 deletions fs/ext4/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -2947,7 +2947,7 @@ static int ext4_da_should_update_i_disksize(struct folio *folio,

static int ext4_da_do_write_end(struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page)
struct folio *folio)
{
struct inode *inode = mapping->host;
loff_t old_size = inode->i_size;
Expand All @@ -2958,12 +2958,13 @@ static int ext4_da_do_write_end(struct address_space *mapping,
* block_write_end() will mark the inode as dirty with I_DIRTY_PAGES
* flag, which all that's needed to trigger page writeback.
*/
copied = block_write_end(NULL, mapping, pos, len, copied, page, NULL);
copied = block_write_end(NULL, mapping, pos, len, copied,
&folio->page, NULL);
new_i_size = pos + copied;

/*
* It's important to update i_size while still holding page lock,
* because page writeout could otherwise come in and zero beyond
* It's important to update i_size while still holding folio lock,
* because folio writeout could otherwise come in and zero beyond
* i_size.
*
* Since we are holding inode lock, we are sure i_disksize <=
Expand All @@ -2981,14 +2982,14 @@ static int ext4_da_do_write_end(struct address_space *mapping,

i_size_write(inode, new_i_size);
end = (new_i_size - 1) & (PAGE_SIZE - 1);
if (copied && ext4_da_should_update_i_disksize(page_folio(page), end)) {
if (copied && ext4_da_should_update_i_disksize(folio, end)) {
ext4_update_i_disksize(inode, new_i_size);
disksize_changed = true;
}
}

unlock_page(page);
put_page(page);
folio_unlock(folio);
folio_put(folio);

if (old_size < pos)
pagecache_isize_extended(inode, old_size, pos);
Expand Down Expand Up @@ -3027,10 +3028,10 @@ static int ext4_da_write_end(struct file *file,
return ext4_write_inline_data_end(inode, pos, len, copied,
folio);

if (unlikely(copied < len) && !PageUptodate(page))
if (unlikely(copied < len) && !folio_test_uptodate(folio))
copied = 0;

return ext4_da_do_write_end(mapping, pos, len, copied, &folio->page);
return ext4_da_do_write_end(mapping, pos, len, copied, folio);
}

/*
Expand Down Expand Up @@ -3630,6 +3631,12 @@ void ext4_set_aops(struct inode *inode)
inode->i_mapping->a_ops = &ext4_aops;
}

/*
* Here we can't skip an unwritten buffer even though it usually reads zero
* because it might have data in pagecache (eg, if called from ext4_zero_range,
* ext4_punch_hole, etc) which needs to be properly zeroed out. Otherwise a
* racing writeback can come later and flush the stale pagecache to disk.
*/
static int __ext4_block_zero_page_range(handle_t *handle,
struct address_space *mapping, loff_t from, loff_t length)
{
Expand Down
65 changes: 37 additions & 28 deletions fs/ext4/mballoc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1456,9 +1456,8 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
return 0;
}

block++;
pnum = block / blocks_per_page;
page = find_or_create_page(inode->i_mapping, pnum, gfp);
/* blocks_per_page == 1, hence we need another page for the buddy */
page = find_or_create_page(inode->i_mapping, block + 1, gfp);
if (!page)
return -ENOMEM;
BUG_ON(page->mapping != inode->i_mapping);
Expand Down Expand Up @@ -1958,8 +1957,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
static int mb_find_extent(struct ext4_buddy *e4b, int block,
int needed, struct ext4_free_extent *ex)
{
int next = block;
int max, order;
int max, order, next;
void *buddy;

assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
Expand All @@ -1977,16 +1975,12 @@ static int mb_find_extent(struct ext4_buddy *e4b, int block,

/* find actual order */
order = mb_find_order_for_block(e4b, block);
block = block >> order;

ex->fe_len = 1 << order;
ex->fe_start = block << order;
ex->fe_len = (1 << order) - (block & ((1 << order) - 1));
ex->fe_start = block;
ex->fe_group = e4b->bd_group;

/* calc difference from given start */
next = next - ex->fe_start;
ex->fe_len -= next;
ex->fe_start += next;
block = block >> order;

while (needed > ex->fe_len &&
mb_find_buddy(e4b, order, &max)) {
Expand Down Expand Up @@ -2895,14 +2889,19 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
ac->ac_groups_scanned++;
if (cr == CR_POWER2_ALIGNED)
ext4_mb_simple_scan_group(ac, &e4b);
else if ((cr == CR_GOAL_LEN_FAST ||
cr == CR_BEST_AVAIL_LEN) &&
sbi->s_stripe &&
!(ac->ac_g_ex.fe_len %
EXT4_B2C(sbi, sbi->s_stripe)))
ext4_mb_scan_aligned(ac, &e4b);
else
ext4_mb_complex_scan_group(ac, &e4b);
else {
bool is_stripe_aligned = sbi->s_stripe &&
!(ac->ac_g_ex.fe_len %
EXT4_B2C(sbi, sbi->s_stripe));

if ((cr == CR_GOAL_LEN_FAST ||
cr == CR_BEST_AVAIL_LEN) &&
is_stripe_aligned)
ext4_mb_scan_aligned(ac, &e4b);

if (ac->ac_status == AC_STATUS_CONTINUE)
ext4_mb_complex_scan_group(ac, &e4b);
}

ext4_unlock_group(sb, group);
ext4_mb_unload_buddy(&e4b);
Expand Down Expand Up @@ -6735,11 +6734,16 @@ __acquires(bitlock)
static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb,
ext4_group_t grp)
{
if (grp < ext4_get_groups_count(sb))
return EXT4_CLUSTERS_PER_GROUP(sb) - 1;
return (ext4_blocks_count(EXT4_SB(sb)->s_es) -
ext4_group_first_block_no(sb, grp) - 1) >>
EXT4_CLUSTER_BITS(sb);
unsigned long nr_clusters_in_group;

if (grp < (ext4_get_groups_count(sb) - 1))
nr_clusters_in_group = EXT4_CLUSTERS_PER_GROUP(sb);
else
nr_clusters_in_group = (ext4_blocks_count(EXT4_SB(sb)->s_es) -
ext4_group_first_block_no(sb, grp))
>> EXT4_CLUSTER_BITS(sb);

return nr_clusters_in_group - 1;
}

static bool ext4_trim_interrupted(void)
Expand All @@ -6753,13 +6757,15 @@ static int ext4_try_to_trim_range(struct super_block *sb,
__acquires(ext4_group_lock_ptr(sb, e4b->bd_group))
__releases(ext4_group_lock_ptr(sb, e4b->bd_group))
{
ext4_grpblk_t next, count, free_count;
ext4_grpblk_t next, count, free_count, last, origin_start;
bool set_trimmed = false;
void *bitmap;

last = ext4_last_grp_cluster(sb, e4b->bd_group);
bitmap = e4b->bd_bitmap;
if (start == 0 && max >= ext4_last_grp_cluster(sb, e4b->bd_group))
if (start == 0 && max >= last)
set_trimmed = true;
origin_start = start;
start = max(e4b->bd_info->bb_first_free, start);
count = 0;
free_count = 0;
Expand All @@ -6768,7 +6774,10 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
start = mb_find_next_zero_bit(bitmap, max + 1, start);
if (start > max)
break;
next = mb_find_next_bit(bitmap, max + 1, start);

next = mb_find_next_bit(bitmap, last + 1, start);
if (origin_start == 0 && next >= last)
set_trimmed = true;

if ((next - start) >= minblocks) {
int ret = ext4_trim_extent(sb, start, next - start, e4b);
Expand Down
49 changes: 33 additions & 16 deletions fs/ext4/resize.c
Original file line number Diff line number Diff line change
Expand Up @@ -218,35 +218,53 @@ struct ext4_new_flex_group_data {
in the flex group */
__u16 *bg_flags; /* block group flags of groups
in @groups */
ext4_group_t resize_bg; /* number of allocated
new_group_data */
ext4_group_t count; /* number of groups in @groups
*/
};

/*
* Avoiding memory allocation failures due to too many groups added each time.
*/
#define MAX_RESIZE_BG 16384

/*
* alloc_flex_gd() allocates a ext4_new_flex_group_data with size of
* @flexbg_size.
*
* Returns NULL on failure otherwise address of the allocated structure.
*/
static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size)
static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned int flexbg_size,
ext4_group_t o_group, ext4_group_t n_group)
{
ext4_group_t last_group;
struct ext4_new_flex_group_data *flex_gd;

flex_gd = kmalloc(sizeof(*flex_gd), GFP_NOFS);
if (flex_gd == NULL)
goto out3;

if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data))
goto out2;
flex_gd->count = flexbg_size;
if (unlikely(flexbg_size > MAX_RESIZE_BG))
flex_gd->resize_bg = MAX_RESIZE_BG;
else
flex_gd->resize_bg = flexbg_size;

/* Avoid allocating large 'groups' array if not needed */
last_group = o_group | (flex_gd->resize_bg - 1);
if (n_group <= last_group)
flex_gd->resize_bg = 1 << fls(n_group - o_group + 1);
else if (n_group - last_group < flex_gd->resize_bg)
flex_gd->resize_bg = 1 << max(fls(last_group - o_group + 1),
fls(n_group - last_group));

flex_gd->groups = kmalloc_array(flexbg_size,
flex_gd->groups = kmalloc_array(flex_gd->resize_bg,
sizeof(struct ext4_new_group_data),
GFP_NOFS);
if (flex_gd->groups == NULL)
goto out2;

flex_gd->bg_flags = kmalloc_array(flexbg_size, sizeof(__u16),
flex_gd->bg_flags = kmalloc_array(flex_gd->resize_bg, sizeof(__u16),
GFP_NOFS);
if (flex_gd->bg_flags == NULL)
goto out1;
Expand Down Expand Up @@ -283,7 +301,7 @@ static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd)
*/
static int ext4_alloc_group_tables(struct super_block *sb,
struct ext4_new_flex_group_data *flex_gd,
int flexbg_size)
unsigned int flexbg_size)
{
struct ext4_new_group_data *group_data = flex_gd->groups;
ext4_fsblk_t start_blk;
Expand Down Expand Up @@ -384,12 +402,12 @@ static int ext4_alloc_group_tables(struct super_block *sb,
group = group_data[0].group;

printk(KERN_DEBUG "EXT4-fs: adding a flex group with "
"%d groups, flexbg size is %d:\n", flex_gd->count,
"%u groups, flexbg size is %u:\n", flex_gd->count,
flexbg_size);

for (i = 0; i < flex_gd->count; i++) {
ext4_debug(
"adding %s group %u: %u blocks (%d free, %d mdata blocks)\n",
"adding %s group %u: %u blocks (%u free, %u mdata blocks)\n",
ext4_bg_has_super(sb, group + i) ? "normal" :
"no-super", group + i,
group_data[i].blocks_count,
Expand Down Expand Up @@ -1605,8 +1623,7 @@ static int ext4_flex_group_add(struct super_block *sb,

static int ext4_setup_next_flex_gd(struct super_block *sb,
struct ext4_new_flex_group_data *flex_gd,
ext4_fsblk_t n_blocks_count,
unsigned long flexbg_size)
ext4_fsblk_t n_blocks_count)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
Expand All @@ -1630,7 +1647,7 @@ static int ext4_setup_next_flex_gd(struct super_block *sb,
BUG_ON(last);
ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &last);

last_group = group | (flexbg_size - 1);
last_group = group | (flex_gd->resize_bg - 1);
if (last_group > n_group)
last_group = n_group;

Expand Down Expand Up @@ -1990,8 +2007,9 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
ext4_fsblk_t o_blocks_count;
ext4_fsblk_t n_blocks_count_retry = 0;
unsigned long last_update_time = 0;
int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex;
int err = 0;
int meta_bg;
unsigned int flexbg_size = ext4_flex_bg_size(sbi);

/* See if the device is actually as big as what was requested */
bh = ext4_sb_bread(sb, n_blocks_count - 1, 0);
Expand Down Expand Up @@ -2123,7 +2141,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
if (err)
goto out;

flex_gd = alloc_flex_gd(flexbg_size);
flex_gd = alloc_flex_gd(flexbg_size, o_group, n_group);
if (flex_gd == NULL) {
err = -ENOMEM;
goto out;
Expand All @@ -2132,8 +2150,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
/* Add flex groups. Note that a regular group is a
* flex group with 1 group.
*/
while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count,
flexbg_size)) {
while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count)) {
if (time_is_before_jiffies(last_update_time + HZ * 10)) {
if (last_update_time)
ext4_msg(sb, KERN_INFO,
Expand Down
11 changes: 1 addition & 10 deletions fs/ext4/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -2793,15 +2793,6 @@ static int ext4_check_opt_consistency(struct fs_context *fc,
return -EINVAL;
}

if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DIOREAD_NOLOCK)) {
int blocksize =
BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
if (blocksize < PAGE_SIZE)
ext4_msg(NULL, KERN_WARNING, "Warning: mounting with an "
"experimental mount option 'dioread_nolock' "
"for blocksize < PAGE_SIZE");
}

err = ext4_check_test_dummy_encryption(fc, sb);
if (err)
return err;
Expand Down Expand Up @@ -4410,7 +4401,7 @@ static void ext4_set_def_opts(struct super_block *sb,
((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
set_opt(sb, DELALLOC);

if (sb->s_blocksize == PAGE_SIZE)
if (sb->s_blocksize <= PAGE_SIZE)
set_opt(sb, DIOREAD_NOLOCK);
}

Expand Down
Loading

0 comments on commit 0d19d9e

Please sign in to comment.