From 6abfaaf124a81b7d2ab132cc2c9885baa14171e5 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 27 Oct 2021 16:18:45 +0200 Subject: [PATCH 01/48] fs_parse: allow parameter value to be empty Allow parameter value to be empty by specifying fs_param_can_be_empty flag. Signed-off-by: Lukas Czerner Cc: Al Viro Reviewed-by: Carlos Maiolino Link: https://lore.kernel.org/r/20211027141857.33657-2-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- fs/fs_parser.c | 31 +++++++++++++++++++++++-------- include/linux/fs_parser.h | 2 +- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/fs/fs_parser.c b/fs/fs_parser.c index 3df07c0e32b34..ed40ce5742fda 100644 --- a/fs/fs_parser.c +++ b/fs/fs_parser.c @@ -199,6 +199,8 @@ int fs_param_is_bool(struct p_log *log, const struct fs_parameter_spec *p, int b; if (param->type != fs_value_is_string) return fs_param_bad_value(log, param); + if (!*param->string && (p->flags & fs_param_can_be_empty)) + return 0; b = lookup_constant(bool_names, param->string, -1); if (b == -1) return fs_param_bad_value(log, param); @@ -211,8 +213,11 @@ int fs_param_is_u32(struct p_log *log, const struct fs_parameter_spec *p, struct fs_parameter *param, struct fs_parse_result *result) { int base = (unsigned long)p->data; - if (param->type != fs_value_is_string || - kstrtouint(param->string, base, &result->uint_32) < 0) + if (param->type != fs_value_is_string) + return fs_param_bad_value(log, param); + if (!*param->string && (p->flags & fs_param_can_be_empty)) + return 0; + if (kstrtouint(param->string, base, &result->uint_32) < 0) return fs_param_bad_value(log, param); return 0; } @@ -221,8 +226,11 @@ EXPORT_SYMBOL(fs_param_is_u32); int fs_param_is_s32(struct p_log *log, const struct fs_parameter_spec *p, struct fs_parameter *param, struct fs_parse_result *result) { - if (param->type != fs_value_is_string || - kstrtoint(param->string, 0, &result->int_32) < 0) + if (param->type != fs_value_is_string) + return fs_param_bad_value(log, param); + if (!*param->string && (p->flags & fs_param_can_be_empty)) + return 0; + if (kstrtoint(param->string, 0, &result->int_32) < 0) return fs_param_bad_value(log, param); return 0; } @@ -231,8 +239,11 @@ EXPORT_SYMBOL(fs_param_is_s32); int fs_param_is_u64(struct p_log *log, const struct fs_parameter_spec *p, struct fs_parameter *param, struct fs_parse_result *result) { - if (param->type != fs_value_is_string || - kstrtoull(param->string, 0, &result->uint_64) < 0) + if (param->type != fs_value_is_string) + return fs_param_bad_value(log, param); + if (!*param->string && (p->flags & fs_param_can_be_empty)) + return 0; + if (kstrtoull(param->string, 0, &result->uint_64) < 0) return fs_param_bad_value(log, param); return 0; } @@ -244,6 +255,8 @@ int fs_param_is_enum(struct p_log *log, const struct fs_parameter_spec *p, const struct constant_table *c; if (param->type != fs_value_is_string) return fs_param_bad_value(log, param); + if (!*param->string && (p->flags & fs_param_can_be_empty)) + return 0; c = __lookup_constant(p->data, param->string); if (!c) return fs_param_bad_value(log, param); @@ -255,7 +268,8 @@ EXPORT_SYMBOL(fs_param_is_enum); int fs_param_is_string(struct p_log *log, const struct fs_parameter_spec *p, struct fs_parameter *param, struct fs_parse_result *result) { - if (param->type != fs_value_is_string || !*param->string) + if (param->type != fs_value_is_string || + (!*param->string && !(p->flags & fs_param_can_be_empty))) return fs_param_bad_value(log, param); return 0; } @@ -275,7 +289,8 @@ int fs_param_is_fd(struct p_log *log, const struct fs_parameter_spec *p, { switch (param->type) { case fs_value_is_string: - if (kstrtouint(param->string, 0, &result->uint_32) < 0) + if ((!*param->string && !(p->flags & fs_param_can_be_empty)) || + kstrtouint(param->string, 0, &result->uint_32) < 0) break; if (result->uint_32 <= INT_MAX) return 0; diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h index aab0ffc6bac67..f103c91139d4a 100644 --- a/include/linux/fs_parser.h +++ b/include/linux/fs_parser.h @@ -42,7 +42,7 @@ struct fs_parameter_spec { u8 opt; /* Option number (returned by fs_parse()) */ unsigned short flags; #define fs_param_neg_with_no 0x0002 /* "noxxx" is negative param */ -#define fs_param_neg_with_empty 0x0004 /* "xxx=" is negative param */ +#define fs_param_can_be_empty 0x0004 /* "xxx=" is allowed */ #define fs_param_deprecated 0x0008 /* The param is deprecated */ const void *data; }; From e5a185c26c11cbd1d386be8ee4c5e57b4f62273a Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 27 Oct 2021 16:18:46 +0200 Subject: [PATCH 02/48] ext4: Add fs parameter specifications for mount options Create an array of fs_parameter_spec called ext4_param_specs to hold the mount option specifications we're going to be using with the new mount api. Signed-off-by: Lukas Czerner Reviewed-by: Carlos Maiolino Link: https://lore.kernel.org/r/20211027141857.33657-3-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 151 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4e33b5eca694d..fb596493513d0 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -47,6 +47,8 @@ #include #include #include +#include +#include #include "ext4.h" #include "ext4_extents.h" /* Needed for trace points definition */ @@ -1688,11 +1690,160 @@ enum { Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache, Opt_no_prefetch_block_bitmaps, Opt_mb_optimize_scan, + Opt_errors, Opt_data, Opt_data_err, Opt_jqfmt, Opt_dax_type, #ifdef CONFIG_EXT4_DEBUG Opt_fc_debug_max_replay, Opt_fc_debug_force #endif }; +static const struct constant_table ext4_param_errors[] = { + {"continue", Opt_err_cont}, + {"panic", Opt_err_panic}, + {"remount-ro", Opt_err_ro}, + {} +}; + +static const struct constant_table ext4_param_data[] = { + {"journal", Opt_data_journal}, + {"ordered", Opt_data_ordered}, + {"writeback", Opt_data_writeback}, + {} +}; + +static const struct constant_table ext4_param_data_err[] = { + {"abort", Opt_data_err_abort}, + {"ignore", Opt_data_err_ignore}, + {} +}; + +static const struct constant_table ext4_param_jqfmt[] = { + {"vfsold", Opt_jqfmt_vfsold}, + {"vfsv0", Opt_jqfmt_vfsv0}, + {"vfsv1", Opt_jqfmt_vfsv1}, + {} +}; + +static const struct constant_table ext4_param_dax[] = { + {"always", Opt_dax_always}, + {"inode", Opt_dax_inode}, + {"never", Opt_dax_never}, + {} +}; + +/* String parameter that allows empty argument */ +#define fsparam_string_empty(NAME, OPT) \ + __fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL) + +/* + * Mount option specification + * We don't use fsparam_flag_no because of the way we set the + * options and the way we show them in _ext4_show_options(). To + * keep the changes to a minimum, let's keep the negative options + * separate for now. + */ +static const struct fs_parameter_spec ext4_param_specs[] = { + fsparam_flag ("bsddf", Opt_bsd_df), + fsparam_flag ("minixdf", Opt_minix_df), + fsparam_flag ("grpid", Opt_grpid), + fsparam_flag ("bsdgroups", Opt_grpid), + fsparam_flag ("nogrpid", Opt_nogrpid), + fsparam_flag ("sysvgroups", Opt_nogrpid), + fsparam_u32 ("resgid", Opt_resgid), + fsparam_u32 ("resuid", Opt_resuid), + fsparam_u32 ("sb", Opt_sb), + fsparam_enum ("errors", Opt_errors, ext4_param_errors), + fsparam_flag ("nouid32", Opt_nouid32), + fsparam_flag ("debug", Opt_debug), + fsparam_flag ("oldalloc", Opt_removed), + fsparam_flag ("orlov", Opt_removed), + fsparam_flag ("user_xattr", Opt_user_xattr), + fsparam_flag ("nouser_xattr", Opt_nouser_xattr), + fsparam_flag ("acl", Opt_acl), + fsparam_flag ("noacl", Opt_noacl), + fsparam_flag ("norecovery", Opt_noload), + fsparam_flag ("noload", Opt_noload), + fsparam_flag ("bh", Opt_removed), + fsparam_flag ("nobh", Opt_removed), + fsparam_u32 ("commit", Opt_commit), + fsparam_u32 ("min_batch_time", Opt_min_batch_time), + fsparam_u32 ("max_batch_time", Opt_max_batch_time), + fsparam_u32 ("journal_dev", Opt_journal_dev), + fsparam_bdev ("journal_path", Opt_journal_path), + fsparam_flag ("journal_checksum", Opt_journal_checksum), + fsparam_flag ("nojournal_checksum", Opt_nojournal_checksum), + fsparam_flag ("journal_async_commit",Opt_journal_async_commit), + fsparam_flag ("abort", Opt_abort), + fsparam_enum ("data", Opt_data, ext4_param_data), + fsparam_enum ("data_err", Opt_data_err, + ext4_param_data_err), + fsparam_string_empty + ("usrjquota", Opt_usrjquota), + fsparam_string_empty + ("grpjquota", Opt_grpjquota), + fsparam_enum ("jqfmt", Opt_jqfmt, ext4_param_jqfmt), + fsparam_flag ("grpquota", Opt_grpquota), + fsparam_flag ("quota", Opt_quota), + fsparam_flag ("noquota", Opt_noquota), + fsparam_flag ("usrquota", Opt_usrquota), + fsparam_flag ("prjquota", Opt_prjquota), + fsparam_flag ("barrier", Opt_barrier), + fsparam_u32 ("barrier", Opt_barrier), + fsparam_flag ("nobarrier", Opt_nobarrier), + fsparam_flag ("i_version", Opt_i_version), + fsparam_flag ("dax", Opt_dax), + fsparam_enum ("dax", Opt_dax_type, ext4_param_dax), + fsparam_u32 ("stripe", Opt_stripe), + fsparam_flag ("delalloc", Opt_delalloc), + fsparam_flag ("nodelalloc", Opt_nodelalloc), + fsparam_flag ("warn_on_error", Opt_warn_on_error), + fsparam_flag ("nowarn_on_error", Opt_nowarn_on_error), + fsparam_flag ("lazytime", Opt_lazytime), + fsparam_flag ("nolazytime", Opt_nolazytime), + fsparam_u32 ("debug_want_extra_isize", + Opt_debug_want_extra_isize), + fsparam_flag ("mblk_io_submit", Opt_removed), + fsparam_flag ("nomblk_io_submit", Opt_removed), + fsparam_flag ("block_validity", Opt_block_validity), + fsparam_flag ("noblock_validity", Opt_noblock_validity), + fsparam_u32 ("inode_readahead_blks", + Opt_inode_readahead_blks), + fsparam_u32 ("journal_ioprio", Opt_journal_ioprio), + fsparam_u32 ("auto_da_alloc", Opt_auto_da_alloc), + fsparam_flag ("auto_da_alloc", Opt_auto_da_alloc), + fsparam_flag ("noauto_da_alloc", Opt_noauto_da_alloc), + fsparam_flag ("dioread_nolock", Opt_dioread_nolock), + fsparam_flag ("nodioread_nolock", Opt_dioread_lock), + fsparam_flag ("dioread_lock", Opt_dioread_lock), + fsparam_flag ("discard", Opt_discard), + fsparam_flag ("nodiscard", Opt_nodiscard), + fsparam_u32 ("init_itable", Opt_init_itable), + fsparam_flag ("init_itable", Opt_init_itable), + fsparam_flag ("noinit_itable", Opt_noinit_itable), +#ifdef CONFIG_EXT4_DEBUG + fsparam_flag ("fc_debug_force", Opt_fc_debug_force), + fsparam_u32 ("fc_debug_max_replay", Opt_fc_debug_max_replay), +#endif + fsparam_u32 ("max_dir_size_kb", Opt_max_dir_size_kb), + fsparam_flag ("test_dummy_encryption", + Opt_test_dummy_encryption), + fsparam_string ("test_dummy_encryption", + Opt_test_dummy_encryption), + fsparam_flag ("inlinecrypt", Opt_inlinecrypt), + fsparam_flag ("nombcache", Opt_nombcache), + fsparam_flag ("no_mbcache", Opt_nombcache), /* for backward compatibility */ + fsparam_flag ("prefetch_block_bitmaps", + Opt_removed), + fsparam_flag ("no_prefetch_block_bitmaps", + Opt_no_prefetch_block_bitmaps), + fsparam_s32 ("mb_optimize_scan", Opt_mb_optimize_scan), + fsparam_string ("check", Opt_removed), /* mount option from ext2/3 */ + fsparam_flag ("nocheck", Opt_removed), /* mount option from ext2/3 */ + fsparam_flag ("reservation", Opt_removed), /* mount option from ext2/3 */ + fsparam_flag ("noreservation", Opt_removed), /* mount option from ext2/3 */ + fsparam_u32 ("journal", Opt_removed), /* mount option from ext2/3 */ + {} +}; + static const match_table_t tokens = { {Opt_bsd_df, "bsddf"}, {Opt_minix_df, "minixdf"}, From 4c94bff967d90e91ace38a9886c1c7777a9c6f91 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 27 Oct 2021 16:18:47 +0200 Subject: [PATCH 03/48] ext4: move option validation to a separate function Move option validation out of parse_options() into a separate function ext4_validate_options(). Signed-off-by: Lukas Czerner Reviewed-by: Carlos Maiolino Link: https://lore.kernel.org/r/20211027141857.33657-4-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index fb596493513d0..e62187b77b4a5 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -88,6 +88,7 @@ static void ext4_unregister_li_request(struct super_block *sb); static void ext4_clear_request_list(void); static struct inode *ext4_get_journal_inode(struct super_block *sb, unsigned int journal_inum); +static int ext4_validate_options(struct super_block *sb); /* * Lock ordering @@ -2582,10 +2583,9 @@ static int parse_options(char *options, struct super_block *sb, struct ext4_parsed_options *ret_opts, int is_remount) { - struct ext4_sb_info __maybe_unused *sbi = EXT4_SB(sb); - char *p, __maybe_unused *usr_qf_name, __maybe_unused *grp_qf_name; substring_t args[MAX_OPT_ARGS]; int token; + char *p; if (!options) return 1; @@ -2603,7 +2603,14 @@ static int parse_options(char *options, struct super_block *sb, is_remount) < 0) return 0; } + return ext4_validate_options(sb); +} + +static int ext4_validate_options(struct super_block *sb) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); #ifdef CONFIG_QUOTA + char *usr_qf_name, *grp_qf_name; /* * We do the test below only for project quotas. 'usrquota' and * 'grpquota' mount options are allowed even without quota feature From 461c3af045d3ab949360fedbfb3ea1dcd9d8b22b Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 27 Oct 2021 16:18:48 +0200 Subject: [PATCH 04/48] ext4: Change handle_mount_opt() to use fs_parameter Use the new mount option specifications to parse the options in handle_mount_opt(). However we're still using the old API to get the options string. Signed-off-by: Lukas Czerner Reviewed-by: Carlos Maiolino Link: https://lore.kernel.org/r/20211027141857.33657-5-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 250 +++++++++++++++++++++++++++--------------------- 1 file changed, 143 insertions(+), 107 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e62187b77b4a5..3d8caf09231b1 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1981,7 +1981,8 @@ static const char deprecated_msg[] = "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n"; #ifdef CONFIG_QUOTA -static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) +static int set_qf_name(struct super_block *sb, int qtype, + struct fs_parameter *param) { struct ext4_sb_info *sbi = EXT4_SB(sb); char *qname, *old_qname = get_qf_name(sb, sbi, qtype); @@ -1998,7 +1999,7 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) "ignored when QUOTA feature is enabled"); return 1; } - qname = match_strdup(args); + qname = kmemdup_nul(param->string, param->size, GFP_KERNEL); if (!qname) { ext4_msg(sb, KERN_ERR, "Not enough memory for storing quotafile name"); @@ -2204,8 +2205,7 @@ static int ext4_sb_read_encoding(const struct ext4_super_block *es, #endif static int ext4_set_test_dummy_encryption(struct super_block *sb, - const char *opt, - const substring_t *arg, + struct fs_parameter *param, bool is_remount) { #ifdef CONFIG_FS_ENCRYPTION @@ -2223,7 +2223,7 @@ static int ext4_set_test_dummy_encryption(struct super_block *sb, "Can't set test_dummy_encryption on remount"); return -1; } - err = fscrypt_set_test_dummy_encryption(sb, arg->from, + err = fscrypt_set_test_dummy_encryption(sb, param->string, &sbi->s_dummy_enc_policy); if (err) { if (err == -EEXIST) @@ -2231,11 +2231,12 @@ static int ext4_set_test_dummy_encryption(struct super_block *sb, "Can't change test_dummy_encryption on remount"); else if (err == -EINVAL) ext4_msg(sb, KERN_WARNING, - "Value of option \"%s\" is unrecognized", opt); + "Value of option \"%s\" is unrecognized", + param->key); else ext4_msg(sb, KERN_WARNING, "Error processing option \"%s\" [%d]", - opt, err); + param->key, err); return -1; } ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled"); @@ -2246,41 +2247,52 @@ static int ext4_set_test_dummy_encryption(struct super_block *sb, return 1; } -struct ext4_parsed_options { +struct ext4_fs_context { unsigned long journal_devnum; unsigned int journal_ioprio; int mb_optimize_scan; }; -static int handle_mount_opt(struct super_block *sb, char *opt, int token, - substring_t *args, struct ext4_parsed_options *parsed_opts, - int is_remount) +static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) { - struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_fs_context *ctx = fc->fs_private; + struct ext4_sb_info *sbi = fc->s_fs_info; + struct super_block *sb = sbi->s_sb; + struct fs_parse_result result; const struct mount_opts *m; + int is_remount; kuid_t uid; kgid_t gid; - int arg = 0; + int token; + + token = fs_parse(fc, ext4_param_specs, param, &result); + if (token < 0) + return token; + is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE; #ifdef CONFIG_QUOTA - if (token == Opt_usrjquota) - return set_qf_name(sb, USRQUOTA, &args[0]); - else if (token == Opt_grpjquota) - return set_qf_name(sb, GRPQUOTA, &args[0]); - else if (token == Opt_offusrjquota) - return clear_qf_name(sb, USRQUOTA); - else if (token == Opt_offgrpjquota) - return clear_qf_name(sb, GRPQUOTA); + if (token == Opt_usrjquota) { + if (!*param->string) + return clear_qf_name(sb, USRQUOTA); + else + return set_qf_name(sb, USRQUOTA, param); + } else if (token == Opt_grpjquota) { + if (!*param->string) + return clear_qf_name(sb, GRPQUOTA); + else + return set_qf_name(sb, GRPQUOTA, param); + } #endif switch (token) { case Opt_noacl: case Opt_nouser_xattr: - ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5"); + ext4_msg(sb, KERN_WARNING, deprecated_msg, param->key, "3.5"); break; case Opt_sb: return 1; /* handled by get_sb_block() */ case Opt_removed: - ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt); + ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", + param->key); return 1; case Opt_abort: ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED); @@ -2301,6 +2313,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, ext4_msg(sb, KERN_ERR, "inline encryption not supported"); #endif return 1; + case Opt_errors: + case Opt_data: + case Opt_data_err: + case Opt_jqfmt: + case Opt_dax_type: + token = result.uint_32; } for (m = ext4_mount_opts; m->token != Opt_err; m++) @@ -2309,25 +2327,23 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, if (m->token == Opt_err) { ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" " - "or missing value", opt); + "or missing value", param->key); return -1; } if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) { ext4_msg(sb, KERN_ERR, - "Mount option \"%s\" incompatible with ext2", opt); + "Mount option \"%s\" incompatible with ext2", + param->key); return -1; } if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) { ext4_msg(sb, KERN_ERR, - "Mount option \"%s\" incompatible with ext3", opt); + "Mount option \"%s\" incompatible with ext3", + param->key); return -1; } - if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg)) - return -1; - if (args->from && (m->flags & MOPT_GTE0) && (arg < 0)) - return -1; if (m->flags & MOPT_EXPLICIT) { if (m->mount_opt & EXT4_MOUNT_DELALLOC) { set_opt2(sb, EXPLICIT_DELALLOC); @@ -2345,63 +2361,69 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, } if (m->flags & MOPT_NOSUPPORT) { - ext4_msg(sb, KERN_ERR, "%s option not supported", opt); + ext4_msg(sb, KERN_ERR, "%s option not supported", + param->key); } else if (token == Opt_commit) { - if (arg == 0) - arg = JBD2_DEFAULT_MAX_COMMIT_AGE; - else if (arg > INT_MAX / HZ) { + if (result.uint_32 == 0) + sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE; + else if (result.uint_32 > INT_MAX / HZ) { ext4_msg(sb, KERN_ERR, "Invalid commit interval %d, " "must be smaller than %d", - arg, INT_MAX / HZ); + result.uint_32, INT_MAX / HZ); return -1; } - sbi->s_commit_interval = HZ * arg; + sbi->s_commit_interval = HZ * result.uint_32; } else if (token == Opt_debug_want_extra_isize) { - if ((arg & 1) || - (arg < 4) || - (arg > (sbi->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE))) { + if ((result.uint_32 & 1) || + (result.uint_32 < 4) || + (result.uint_32 > + (sbi->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE))) { ext4_msg(sb, KERN_ERR, - "Invalid want_extra_isize %d", arg); + "Invalid want_extra_isize %d", result.uint_32); return -1; } - sbi->s_want_extra_isize = arg; + sbi->s_want_extra_isize = result.uint_32; } else if (token == Opt_max_batch_time) { - sbi->s_max_batch_time = arg; + sbi->s_max_batch_time = result.uint_32; } else if (token == Opt_min_batch_time) { - sbi->s_min_batch_time = arg; + sbi->s_min_batch_time = result.uint_32; } else if (token == Opt_inode_readahead_blks) { - if (arg && (arg > (1 << 30) || !is_power_of_2(arg))) { + if (result.uint_32 && + (result.uint_32 > (1 << 30) || + !is_power_of_2(result.uint_32))) { ext4_msg(sb, KERN_ERR, "EXT4-fs: inode_readahead_blks must be " "0 or a power of 2 smaller than 2^31"); return -1; } - sbi->s_inode_readahead_blks = arg; + sbi->s_inode_readahead_blks = result.uint_32; } else if (token == Opt_init_itable) { set_opt(sb, INIT_INODE_TABLE); - if (!args->from) - arg = EXT4_DEF_LI_WAIT_MULT; - sbi->s_li_wait_mult = arg; + sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; + if (param->type == fs_value_is_string) + sbi->s_li_wait_mult = result.uint_32; } else if (token == Opt_max_dir_size_kb) { - sbi->s_max_dir_size_kb = arg; + sbi->s_max_dir_size_kb = result.uint_32; #ifdef CONFIG_EXT4_DEBUG } else if (token == Opt_fc_debug_max_replay) { - sbi->s_fc_debug_max_replay = arg; + sbi->s_fc_debug_max_replay = result.uint_32; #endif } else if (token == Opt_stripe) { - sbi->s_stripe = arg; + sbi->s_stripe = result.uint_32; } else if (token == Opt_resuid) { - uid = make_kuid(current_user_ns(), arg); + uid = make_kuid(current_user_ns(), result.uint_32); if (!uid_valid(uid)) { - ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg); + ext4_msg(sb, KERN_ERR, "Invalid uid value %d", + result.uint_32); return -1; } sbi->s_resuid = uid; } else if (token == Opt_resgid) { - gid = make_kgid(current_user_ns(), arg); + gid = make_kgid(current_user_ns(), result.uint_32); if (!gid_valid(gid)) { - ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg); + ext4_msg(sb, KERN_ERR, "Invalid gid value %d", + result.uint_32); return -1; } sbi->s_resgid = gid; @@ -2411,9 +2433,8 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, "Cannot specify journal on remount"); return -1; } - parsed_opts->journal_devnum = arg; + ctx->journal_devnum = result.uint_32; } else if (token == Opt_journal_path) { - char *journal_path; struct inode *journal_inode; struct path path; int error; @@ -2423,44 +2444,27 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, "Cannot specify journal on remount"); return -1; } - journal_path = match_strdup(&args[0]); - if (!journal_path) { - ext4_msg(sb, KERN_ERR, "error: could not dup " - "journal device string"); - return -1; - } - error = kern_path(journal_path, LOOKUP_FOLLOW, &path); + error = fs_lookup_param(fc, param, 1, &path); if (error) { ext4_msg(sb, KERN_ERR, "error: could not find " - "journal device path: error %d", error); - kfree(journal_path); + "journal device path"); return -1; } journal_inode = d_inode(path.dentry); - if (!S_ISBLK(journal_inode->i_mode)) { - ext4_msg(sb, KERN_ERR, "error: journal path %s " - "is not a block device", journal_path); - path_put(&path); - kfree(journal_path); - return -1; - } - - parsed_opts->journal_devnum = new_encode_dev(journal_inode->i_rdev); + ctx->journal_devnum = new_encode_dev(journal_inode->i_rdev); path_put(&path); - kfree(journal_path); } else if (token == Opt_journal_ioprio) { - if (arg > 7) { + if (result.uint_32 > 7) { ext4_msg(sb, KERN_ERR, "Invalid journal IO priority" " (must be 0-7)"); return -1; } - parsed_opts->journal_ioprio = - IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg); + ctx->journal_ioprio = + IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, result.uint_32); } else if (token == Opt_test_dummy_encryption) { - return ext4_set_test_dummy_encryption(sb, opt, &args[0], - is_remount); + return ext4_set_test_dummy_encryption(sb, param, is_remount); } else if (m->flags & MOPT_DATAJ) { if (is_remount) { if (!sbi->s_journal) @@ -2547,30 +2551,35 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, } else if (token == Opt_data_err_ignore) { sbi->s_mount_opt &= ~m->mount_opt; } else if (token == Opt_mb_optimize_scan) { - if (arg != 0 && arg != 1) { + if (result.int_32 != 0 && result.int_32 != 1) { ext4_msg(sb, KERN_WARNING, "mb_optimize_scan should be set to 0 or 1."); return -1; } - parsed_opts->mb_optimize_scan = arg; + ctx->mb_optimize_scan = result.int_32; } else { - if (!args->from) - arg = 1; + unsigned int set = 0; + + if ((param->type == fs_value_is_flag) || + result.uint_32 > 0) + set = 1; + if (m->flags & MOPT_CLEAR) - arg = !arg; + set = !set; else if (unlikely(!(m->flags & MOPT_SET))) { ext4_msg(sb, KERN_WARNING, - "buggy handling of option %s", opt); + "buggy handling of option %s", + param->key); WARN_ON(1); return -1; } if (m->flags & MOPT_2) { - if (arg != 0) + if (set != 0) sbi->s_mount_opt2 |= m->mount_opt; else sbi->s_mount_opt2 &= ~m->mount_opt; } else { - if (arg != 0) + if (set != 0) sbi->s_mount_opt |= m->mount_opt; else sbi->s_mount_opt &= ~m->mount_opt; @@ -2580,29 +2589,56 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, } static int parse_options(char *options, struct super_block *sb, - struct ext4_parsed_options *ret_opts, + struct ext4_fs_context *ret_opts, int is_remount) { - substring_t args[MAX_OPT_ARGS]; - int token; - char *p; + struct fs_parameter param; + struct fs_context fc; + int ret; + char *key; if (!options) return 1; - while ((p = strsep(&options, ",")) != NULL) { - if (!*p) - continue; - /* - * Initialize args struct so we know whether arg was - * found; some options take optional arguments. - */ - args[0].to = args[0].from = NULL; - token = match_token(p, tokens, args); - if (handle_mount_opt(sb, p, token, args, ret_opts, - is_remount) < 0) - return 0; + memset(&fc, 0, sizeof(fc)); + fc.fs_private = ret_opts; + fc.s_fs_info = EXT4_SB(sb); + + if (is_remount) + fc.purpose = FS_CONTEXT_FOR_RECONFIGURE; + + while ((key = strsep(&options, ",")) != NULL) { + if (*key) { + size_t v_len = 0; + char *value = strchr(key, '='); + + param.type = fs_value_is_flag; + param.string = NULL; + + if (value) { + if (value == key) + continue; + + *value++ = 0; + v_len = strlen(value); + param.string = kmemdup_nul(value, v_len, + GFP_KERNEL); + if (!param.string) + return 0; + param.type = fs_value_is_string; + } + + param.key = key; + param.size = v_len; + + ret = handle_mount_opt(&fc, ¶m); + if (param.string) + kfree(param.string); + if (ret < 0) + return 0; + } } + return ext4_validate_options(sb); } @@ -4057,7 +4093,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) __u64 blocks_count; int err = 0; ext4_group_t first_not_zeroed; - struct ext4_parsed_options parsed_opts; + struct ext4_fs_context parsed_opts; /* Set defaults for the variables that will be set during parsing */ parsed_opts.journal_ioprio = DEFAULT_JOURNAL_IOPRIO; @@ -5899,7 +5935,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) char *to_free[EXT4_MAXQUOTAS]; #endif char *orig_data = kstrdup(data, GFP_KERNEL); - struct ext4_parsed_options parsed_opts; + struct ext4_fs_context parsed_opts; parsed_opts.journal_ioprio = DEFAULT_JOURNAL_IOPRIO; parsed_opts.journal_devnum = 0; From da812f611934bef16fe02d667a76df77ae9cf99a Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 27 Oct 2021 16:18:49 +0200 Subject: [PATCH 05/48] ext4: Allow sb to be NULL in ext4_msg() At the parsing phase of mount in the new mount api sb will not be available so allow sb to be NULL in ext4_msg and use that in handle_mount_opt(). Also change return value to appropriate -EINVAL where needed. Signed-off-by: Lukas Czerner Reviewed-by: Carlos Maiolino Link: https://lore.kernel.org/r/20211027141857.33657-6-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 144 ++++++++++++++++++++++++++---------------------- 1 file changed, 78 insertions(+), 66 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3d8caf09231b1..877eaca993555 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -88,7 +88,7 @@ static void ext4_unregister_li_request(struct super_block *sb); static void ext4_clear_request_list(void); static struct inode *ext4_get_journal_inode(struct super_block *sb, unsigned int journal_inum); -static int ext4_validate_options(struct super_block *sb); +static int ext4_validate_options(struct fs_context *fc); /* * Lock ordering @@ -915,14 +915,20 @@ void __ext4_msg(struct super_block *sb, struct va_format vaf; va_list args; - atomic_inc(&EXT4_SB(sb)->s_msg_count); - if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs")) - return; + if (sb) { + atomic_inc(&EXT4_SB(sb)->s_msg_count); + if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), + "EXT4-fs")) + return; + } va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; - printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf); + if (sb) + printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf); + else + printk("%sEXT4-fs: %pV\n", prefix, &vaf); va_end(args); } @@ -2286,12 +2292,12 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) switch (token) { case Opt_noacl: case Opt_nouser_xattr: - ext4_msg(sb, KERN_WARNING, deprecated_msg, param->key, "3.5"); + ext4_msg(NULL, KERN_WARNING, deprecated_msg, param->key, "3.5"); break; case Opt_sb: return 1; /* handled by get_sb_block() */ case Opt_removed: - ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", + ext4_msg(NULL, KERN_WARNING, "Ignoring removed %s option", param->key); return 1; case Opt_abort: @@ -2310,7 +2316,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT sb->s_flags |= SB_INLINECRYPT; #else - ext4_msg(sb, KERN_ERR, "inline encryption not supported"); + ext4_msg(NULL, KERN_ERR, "inline encryption not supported"); #endif return 1; case Opt_errors: @@ -2326,22 +2332,22 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; if (m->token == Opt_err) { - ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" " + ext4_msg(NULL, KERN_ERR, "Unrecognized mount option \"%s\" " "or missing value", param->key); - return -1; + return -EINVAL; } if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) { - ext4_msg(sb, KERN_ERR, + ext4_msg(NULL, KERN_ERR, "Mount option \"%s\" incompatible with ext2", param->key); - return -1; + return -EINVAL; } if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) { - ext4_msg(sb, KERN_ERR, + ext4_msg(NULL, KERN_ERR, "Mount option \"%s\" incompatible with ext3", param->key); - return -1; + return -EINVAL; } if (m->flags & MOPT_EXPLICIT) { @@ -2350,28 +2356,28 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) } else if (m->mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) { set_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM); } else - return -1; + return -EINVAL; } if (m->flags & MOPT_CLEAR_ERR) clear_opt(sb, ERRORS_MASK); if (token == Opt_noquota && sb_any_quota_loaded(sb)) { - ext4_msg(sb, KERN_ERR, "Cannot change quota " + ext4_msg(NULL, KERN_ERR, "Cannot change quota " "options when quota turned on"); - return -1; + return -EINVAL; } if (m->flags & MOPT_NOSUPPORT) { - ext4_msg(sb, KERN_ERR, "%s option not supported", + ext4_msg(NULL, KERN_ERR, "%s option not supported", param->key); } else if (token == Opt_commit) { if (result.uint_32 == 0) sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE; else if (result.uint_32 > INT_MAX / HZ) { - ext4_msg(sb, KERN_ERR, + ext4_msg(NULL, KERN_ERR, "Invalid commit interval %d, " "must be smaller than %d", result.uint_32, INT_MAX / HZ); - return -1; + return -EINVAL; } sbi->s_commit_interval = HZ * result.uint_32; } else if (token == Opt_debug_want_extra_isize) { @@ -2379,9 +2385,9 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) (result.uint_32 < 4) || (result.uint_32 > (sbi->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE))) { - ext4_msg(sb, KERN_ERR, + ext4_msg(NULL, KERN_ERR, "Invalid want_extra_isize %d", result.uint_32); - return -1; + return -EINVAL; } sbi->s_want_extra_isize = result.uint_32; } else if (token == Opt_max_batch_time) { @@ -2392,10 +2398,10 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) if (result.uint_32 && (result.uint_32 > (1 << 30) || !is_power_of_2(result.uint_32))) { - ext4_msg(sb, KERN_ERR, + ext4_msg(NULL, KERN_ERR, "EXT4-fs: inode_readahead_blks must be " "0 or a power of 2 smaller than 2^31"); - return -1; + return -EINVAL; } sbi->s_inode_readahead_blks = result.uint_32; } else if (token == Opt_init_itable) { @@ -2414,24 +2420,24 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) } else if (token == Opt_resuid) { uid = make_kuid(current_user_ns(), result.uint_32); if (!uid_valid(uid)) { - ext4_msg(sb, KERN_ERR, "Invalid uid value %d", + ext4_msg(NULL, KERN_ERR, "Invalid uid value %d", result.uint_32); - return -1; + return -EINVAL; } sbi->s_resuid = uid; } else if (token == Opt_resgid) { gid = make_kgid(current_user_ns(), result.uint_32); if (!gid_valid(gid)) { - ext4_msg(sb, KERN_ERR, "Invalid gid value %d", + ext4_msg(NULL, KERN_ERR, "Invalid gid value %d", result.uint_32); - return -1; + return -EINVAL; } sbi->s_resgid = gid; } else if (token == Opt_journal_dev) { if (is_remount) { - ext4_msg(sb, KERN_ERR, + ext4_msg(NULL, KERN_ERR, "Cannot specify journal on remount"); - return -1; + return -EINVAL; } ctx->journal_devnum = result.uint_32; } else if (token == Opt_journal_path) { @@ -2440,16 +2446,16 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) int error; if (is_remount) { - ext4_msg(sb, KERN_ERR, + ext4_msg(NULL, KERN_ERR, "Cannot specify journal on remount"); - return -1; + return -EINVAL; } error = fs_lookup_param(fc, param, 1, &path); if (error) { - ext4_msg(sb, KERN_ERR, "error: could not find " + ext4_msg(NULL, KERN_ERR, "error: could not find " "journal device path"); - return -1; + return -EINVAL; } journal_inode = d_inode(path.dentry); @@ -2457,9 +2463,9 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) path_put(&path); } else if (token == Opt_journal_ioprio) { if (result.uint_32 > 7) { - ext4_msg(sb, KERN_ERR, "Invalid journal IO priority" + ext4_msg(NULL, KERN_ERR, "Invalid journal IO priority" " (must be 0-7)"); - return -1; + return -EINVAL; } ctx->journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, result.uint_32); @@ -2468,11 +2474,11 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) } else if (m->flags & MOPT_DATAJ) { if (is_remount) { if (!sbi->s_journal) - ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option"); + ext4_msg(NULL, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option"); else if (test_opt(sb, DATA_FLAGS) != m->mount_opt) { - ext4_msg(sb, KERN_ERR, + ext4_msg(NULL, KERN_ERR, "Cannot change data mode on remount"); - return -1; + return -EINVAL; } } else { clear_opt(sb, DATA_FLAGS); @@ -2482,12 +2488,12 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) } else if (m->flags & MOPT_QFMT) { if (sb_any_quota_loaded(sb) && sbi->s_jquota_fmt != m->mount_opt) { - ext4_msg(sb, KERN_ERR, "Cannot change journaled " + ext4_msg(NULL, KERN_ERR, "Cannot change journaled " "quota options when quota turned on"); - return -1; + return -EINVAL; } if (ext4_has_feature_quota(sb)) { - ext4_msg(sb, KERN_INFO, + ext4_msg(NULL, KERN_INFO, "Quota format mount options ignored " "when QUOTA feature is enabled"); return 1; @@ -2504,18 +2510,18 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) (!(sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) || (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER))) { fail_dax_change_remount: - ext4_msg(sb, KERN_ERR, "can't change " + ext4_msg(NULL, KERN_ERR, "can't change " "dax mount option while remounting"); - return -1; + return -EINVAL; } if (is_remount && (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) { - ext4_msg(sb, KERN_ERR, "can't mount with " + ext4_msg(NULL, KERN_ERR, "can't mount with " "both data=journal and dax"); - return -1; + return -EINVAL; } - ext4_msg(sb, KERN_WARNING, + ext4_msg(NULL, KERN_WARNING, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); sbi->s_mount_opt |= EXT4_MOUNT_DAX_ALWAYS; sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER; @@ -2541,10 +2547,10 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) break; } #else - ext4_msg(sb, KERN_INFO, "dax option not supported"); + ext4_msg(NULL, KERN_INFO, "dax option not supported"); sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER; sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS; - return -1; + return -EINVAL; #endif } else if (token == Opt_data_err_abort) { sbi->s_mount_opt |= m->mount_opt; @@ -2552,9 +2558,9 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) sbi->s_mount_opt &= ~m->mount_opt; } else if (token == Opt_mb_optimize_scan) { if (result.int_32 != 0 && result.int_32 != 1) { - ext4_msg(sb, KERN_WARNING, + ext4_msg(NULL, KERN_WARNING, "mb_optimize_scan should be set to 0 or 1."); - return -1; + return -EINVAL; } ctx->mb_optimize_scan = result.int_32; } else { @@ -2567,11 +2573,11 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) if (m->flags & MOPT_CLEAR) set = !set; else if (unlikely(!(m->flags & MOPT_SET))) { - ext4_msg(sb, KERN_WARNING, + ext4_msg(NULL, KERN_WARNING, "buggy handling of option %s", param->key); WARN_ON(1); - return -1; + return -EINVAL; } if (m->flags & MOPT_2) { if (set != 0) @@ -2639,12 +2645,17 @@ static int parse_options(char *options, struct super_block *sb, } } - return ext4_validate_options(sb); + ret = ext4_validate_options(&fc); + if (ret < 0) + return 0; + + return 1; } -static int ext4_validate_options(struct super_block *sb) +static int ext4_validate_options(struct fs_context *fc) { - struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_sb_info *sbi = fc->s_fs_info; + struct super_block *sb = sbi->s_sb; #ifdef CONFIG_QUOTA char *usr_qf_name, *grp_qf_name; /* @@ -2653,9 +2664,9 @@ static int ext4_validate_options(struct super_block *sb) * to support legacy quotas in quota files. */ if (test_opt(sb, PRJQUOTA) && !ext4_has_feature_project(sb)) { - ext4_msg(sb, KERN_ERR, "Project quota feature not enabled. " + ext4_msg(NULL, KERN_ERR, "Project quota feature not enabled. " "Cannot enable project quota enforcement."); - return 0; + return -EINVAL; } usr_qf_name = get_qf_name(sb, sbi, USRQUOTA); grp_qf_name = get_qf_name(sb, sbi, GRPQUOTA); @@ -2667,15 +2678,15 @@ static int ext4_validate_options(struct super_block *sb) clear_opt(sb, GRPQUOTA); if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { - ext4_msg(sb, KERN_ERR, "old and new quota " + ext4_msg(NULL, KERN_ERR, "old and new quota " "format mixing"); - return 0; + return -EINVAL; } if (!sbi->s_jquota_fmt) { - ext4_msg(sb, KERN_ERR, "journaled quota format " + ext4_msg(NULL, KERN_ERR, "journaled quota format " "not specified"); - return 0; + return -EINVAL; } } #endif @@ -2683,11 +2694,12 @@ static int ext4_validate_options(struct super_block *sb) int blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); if (blocksize < PAGE_SIZE) - ext4_msg(sb, KERN_WARNING, "Warning: mounting with an " - "experimental mount option 'dioread_nolock' " - "for blocksize < PAGE_SIZE"); + ext4_msg(NULL, KERN_WARNING, + "Warning: mounting with an experimental " + "option 'dioread_nolock' for " + "blocksize < PAGE_SIZE"); } - return 1; + return 0; } static inline void ext4_show_quota_options(struct seq_file *seq, From e6e268cb682290da29e3c8408493a4474307b8cc Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 27 Oct 2021 16:18:50 +0200 Subject: [PATCH 06/48] ext4: move quota configuration out of handle_mount_opt() At the parsing phase of mount in the new mount api sb will not be available so move quota confiquration out of handle_mount_opt() by noting the quota file names in the ext4_fs_context structure to be able to apply it later. Signed-off-by: Lukas Czerner Reviewed-by: Carlos Maiolino Link: https://lore.kernel.org/r/20211027141857.33657-7-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 258 +++++++++++++++++++++++++++++++----------------- 1 file changed, 165 insertions(+), 93 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 877eaca993555..c082aead0e334 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -89,6 +89,10 @@ static void ext4_clear_request_list(void); static struct inode *ext4_get_journal_inode(struct super_block *sb, unsigned int journal_inum); static int ext4_validate_options(struct fs_context *fc); +static int ext4_check_quota_consistency(struct fs_context *fc, + struct super_block *sb); +static void ext4_apply_quota_options(struct fs_context *fc, + struct super_block *sb); /* * Lock ordering @@ -1986,71 +1990,6 @@ static const char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n" "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n"; -#ifdef CONFIG_QUOTA -static int set_qf_name(struct super_block *sb, int qtype, - struct fs_parameter *param) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - char *qname, *old_qname = get_qf_name(sb, sbi, qtype); - int ret = -1; - - if (sb_any_quota_loaded(sb) && !old_qname) { - ext4_msg(sb, KERN_ERR, - "Cannot change journaled " - "quota options when quota turned on"); - return -1; - } - if (ext4_has_feature_quota(sb)) { - ext4_msg(sb, KERN_INFO, "Journaled quota options " - "ignored when QUOTA feature is enabled"); - return 1; - } - qname = kmemdup_nul(param->string, param->size, GFP_KERNEL); - if (!qname) { - ext4_msg(sb, KERN_ERR, - "Not enough memory for storing quotafile name"); - return -1; - } - if (old_qname) { - if (strcmp(old_qname, qname) == 0) - ret = 1; - else - ext4_msg(sb, KERN_ERR, - "%s quota file already specified", - QTYPE2NAME(qtype)); - goto errout; - } - if (strchr(qname, '/')) { - ext4_msg(sb, KERN_ERR, - "quotafile must be on filesystem root"); - goto errout; - } - rcu_assign_pointer(sbi->s_qf_names[qtype], qname); - set_opt(sb, QUOTA); - return 1; -errout: - kfree(qname); - return ret; -} - -static int clear_qf_name(struct super_block *sb, int qtype) -{ - - struct ext4_sb_info *sbi = EXT4_SB(sb); - char *old_qname = get_qf_name(sb, sbi, qtype); - - if (sb_any_quota_loaded(sb) && old_qname) { - ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options" - " when quota turned on"); - return -1; - } - rcu_assign_pointer(sbi->s_qf_names[qtype], NULL); - synchronize_rcu(); - kfree(old_qname); - return 1; -} -#endif - #define MOPT_SET 0x0001 #define MOPT_CLEAR 0x0002 #define MOPT_NOSUPPORT 0x0004 @@ -2254,11 +2193,70 @@ static int ext4_set_test_dummy_encryption(struct super_block *sb, } struct ext4_fs_context { - unsigned long journal_devnum; - unsigned int journal_ioprio; - int mb_optimize_scan; + char *s_qf_names[EXT4_MAXQUOTAS]; + int s_jquota_fmt; /* Format of quota to use */ + unsigned short qname_spec; + unsigned long journal_devnum; + unsigned int journal_ioprio; + int mb_optimize_scan; }; +#ifdef CONFIG_QUOTA +/* + * Note the name of the specified quota file. + */ +static int note_qf_name(struct fs_context *fc, int qtype, + struct fs_parameter *param) +{ + struct ext4_fs_context *ctx = fc->fs_private; + char *qname; + + if (param->size < 1) { + ext4_msg(NULL, KERN_ERR, "Missing quota name"); + return -EINVAL; + } + if (strchr(param->string, '/')) { + ext4_msg(NULL, KERN_ERR, + "quotafile must be on filesystem root"); + return -EINVAL; + } + if (ctx->s_qf_names[qtype]) { + if (strcmp(ctx->s_qf_names[qtype], param->string) != 0) { + ext4_msg(NULL, KERN_ERR, + "%s quota file already specified", + QTYPE2NAME(qtype)); + return -EINVAL; + } + return 0; + } + + qname = kmemdup_nul(param->string, param->size, GFP_KERNEL); + if (!qname) { + ext4_msg(NULL, KERN_ERR, + "Not enough memory for storing quotafile name"); + return -ENOMEM; + } + ctx->s_qf_names[qtype] = qname; + ctx->qname_spec |= 1 << qtype; + return 0; +} + +/* + * Clear the name of the specified quota file. + */ +static int unnote_qf_name(struct fs_context *fc, int qtype) +{ + struct ext4_fs_context *ctx = fc->fs_private; + + if (ctx->s_qf_names[qtype]) + kfree(ctx->s_qf_names[qtype]); + + ctx->s_qf_names[qtype] = NULL; + ctx->qname_spec |= 1 << qtype; + return 0; +} +#endif + static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) { struct ext4_fs_context *ctx = fc->fs_private; @@ -2279,14 +2277,14 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) #ifdef CONFIG_QUOTA if (token == Opt_usrjquota) { if (!*param->string) - return clear_qf_name(sb, USRQUOTA); + return unnote_qf_name(fc, USRQUOTA); else - return set_qf_name(sb, USRQUOTA, param); + return note_qf_name(fc, USRQUOTA, param); } else if (token == Opt_grpjquota) { if (!*param->string) - return clear_qf_name(sb, GRPQUOTA); + return unnote_qf_name(fc, GRPQUOTA); else - return set_qf_name(sb, GRPQUOTA, param); + return note_qf_name(fc, GRPQUOTA, param); } #endif switch (token) { @@ -2360,11 +2358,6 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) } if (m->flags & MOPT_CLEAR_ERR) clear_opt(sb, ERRORS_MASK); - if (token == Opt_noquota && sb_any_quota_loaded(sb)) { - ext4_msg(NULL, KERN_ERR, "Cannot change quota " - "options when quota turned on"); - return -EINVAL; - } if (m->flags & MOPT_NOSUPPORT) { ext4_msg(NULL, KERN_ERR, "%s option not supported", @@ -2486,19 +2479,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) } #ifdef CONFIG_QUOTA } else if (m->flags & MOPT_QFMT) { - if (sb_any_quota_loaded(sb) && - sbi->s_jquota_fmt != m->mount_opt) { - ext4_msg(NULL, KERN_ERR, "Cannot change journaled " - "quota options when quota turned on"); - return -EINVAL; - } - if (ext4_has_feature_quota(sb)) { - ext4_msg(NULL, KERN_INFO, - "Quota format mount options ignored " - "when QUOTA feature is enabled"); - return 1; - } - sbi->s_jquota_fmt = m->mount_opt; + ctx->s_jquota_fmt = m->mount_opt; #endif } else if (token == Opt_dax || token == Opt_dax_always || token == Opt_dax_inode || token == Opt_dax_never) { @@ -2595,7 +2576,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) } static int parse_options(char *options, struct super_block *sb, - struct ext4_fs_context *ret_opts, + struct ext4_fs_context *ctx, int is_remount) { struct fs_parameter param; @@ -2607,7 +2588,7 @@ static int parse_options(char *options, struct super_block *sb, return 1; memset(&fc, 0, sizeof(fc)); - fc.fs_private = ret_opts; + fc.fs_private = ctx; fc.s_fs_info = EXT4_SB(sb); if (is_remount) @@ -2649,9 +2630,100 @@ static int parse_options(char *options, struct super_block *sb, if (ret < 0) return 0; + ret = ext4_check_quota_consistency(&fc, sb); + if (ret < 0) + return 0; + + if (ctx->qname_spec) + ext4_apply_quota_options(&fc, sb); + return 1; } +static void ext4_apply_quota_options(struct fs_context *fc, + struct super_block *sb) +{ +#ifdef CONFIG_QUOTA + struct ext4_fs_context *ctx = fc->fs_private; + struct ext4_sb_info *sbi = EXT4_SB(sb); + char *qname; + int i; + + for (i = 0; i < EXT4_MAXQUOTAS; i++) { + if (!(ctx->qname_spec & (1 << i))) + continue; + qname = ctx->s_qf_names[i]; /* May be NULL */ + ctx->s_qf_names[i] = NULL; + kfree(sbi->s_qf_names[i]); + rcu_assign_pointer(sbi->s_qf_names[i], qname); + set_opt(sb, QUOTA); + } +#endif +} + +/* + * Check quota settings consistency. + */ +static int ext4_check_quota_consistency(struct fs_context *fc, + struct super_block *sb) +{ +#ifdef CONFIG_QUOTA + struct ext4_fs_context *ctx = fc->fs_private; + struct ext4_sb_info *sbi = EXT4_SB(sb); + bool quota_feature = ext4_has_feature_quota(sb); + bool quota_loaded = sb_any_quota_loaded(sb); + int i; + + if (ctx->qname_spec && quota_loaded) { + if (quota_feature) + goto err_feature; + + for (i = 0; i < EXT4_MAXQUOTAS; i++) { + if (!(ctx->qname_spec & (1 << i))) + continue; + + if (!!sbi->s_qf_names[i] != !!ctx->s_qf_names[i]) + goto err_jquota_change; + + if (sbi->s_qf_names[i] && ctx->s_qf_names[i] && + strcmp(sbi->s_qf_names[i], + ctx->s_qf_names[i]) != 0) + goto err_jquota_specified; + } + } + + if (ctx->s_jquota_fmt) { + if (sbi->s_jquota_fmt != ctx->s_jquota_fmt && quota_loaded) + goto err_quota_change; + if (quota_feature) { + ext4_msg(NULL, KERN_INFO, "Quota format mount options " + "ignored when QUOTA feature is enabled"); + return 0; + } + } + return 0; + +err_quota_change: + ext4_msg(NULL, KERN_ERR, + "Cannot change quota options when quota turned on"); + return -EINVAL; +err_jquota_change: + ext4_msg(NULL, KERN_ERR, "Cannot change journaled quota " + "options when quota turned on"); + return -EINVAL; +err_jquota_specified: + ext4_msg(NULL, KERN_ERR, "%s quota file already specified", + QTYPE2NAME(i)); + return -EINVAL; +err_feature: + ext4_msg(NULL, KERN_ERR, "Journaled quota options ignored " + "when QUOTA feature is enabled"); + return 0; +#else + return 0; +#endif +} + static int ext4_validate_options(struct fs_context *fc) { struct ext4_sb_info *sbi = fc->s_fs_info; @@ -4105,7 +4177,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) __u64 blocks_count; int err = 0; ext4_group_t first_not_zeroed; - struct ext4_fs_context parsed_opts; + struct ext4_fs_context parsed_opts = {0}; /* Set defaults for the variables that will be set during parsing */ parsed_opts.journal_ioprio = DEFAULT_JOURNAL_IOPRIO; From b6bd243500b6024d92eaaacf592ed8588c2c75ea Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 27 Oct 2021 16:18:51 +0200 Subject: [PATCH 07/48] ext4: check ext2/3 compatibility outside handle_mount_opt() At the parsing phase of mount in the new mount api sb will not be available so move ext2/3 compatibility check outside handle_mount_opt(). Unfortunately we will lose the ability to show exactly which option is not compatible. Signed-off-by: Lukas Czerner Reviewed-by: Carlos Maiolino Link: https://lore.kernel.org/r/20211027141857.33657-8-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c082aead0e334..68395631d2226 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -89,8 +89,8 @@ static void ext4_clear_request_list(void); static struct inode *ext4_get_journal_inode(struct super_block *sb, unsigned int journal_inum); static int ext4_validate_options(struct fs_context *fc); -static int ext4_check_quota_consistency(struct fs_context *fc, - struct super_block *sb); +static int ext4_check_opt_consistency(struct fs_context *fc, + struct super_block *sb); static void ext4_apply_quota_options(struct fs_context *fc, struct super_block *sb); @@ -2199,6 +2199,7 @@ struct ext4_fs_context { unsigned long journal_devnum; unsigned int journal_ioprio; int mb_optimize_scan; + unsigned int opt_flags; /* MOPT flags */ }; #ifdef CONFIG_QUOTA @@ -2329,25 +2330,14 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) if (token == m->token) break; + ctx->opt_flags |= m->flags; + if (m->token == Opt_err) { ext4_msg(NULL, KERN_ERR, "Unrecognized mount option \"%s\" " "or missing value", param->key); return -EINVAL; } - if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) { - ext4_msg(NULL, KERN_ERR, - "Mount option \"%s\" incompatible with ext2", - param->key); - return -EINVAL; - } - if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) { - ext4_msg(NULL, KERN_ERR, - "Mount option \"%s\" incompatible with ext3", - param->key); - return -EINVAL; - } - if (m->flags & MOPT_EXPLICIT) { if (m->mount_opt & EXT4_MOUNT_DELALLOC) { set_opt2(sb, EXPLICIT_DELALLOC); @@ -2630,7 +2620,7 @@ static int parse_options(char *options, struct super_block *sb, if (ret < 0) return 0; - ret = ext4_check_quota_consistency(&fc, sb); + ret = ext4_check_opt_consistency(&fc, sb); if (ret < 0) return 0; @@ -2724,6 +2714,25 @@ static int ext4_check_quota_consistency(struct fs_context *fc, #endif } +static int ext4_check_opt_consistency(struct fs_context *fc, + struct super_block *sb) +{ + struct ext4_fs_context *ctx = fc->fs_private; + + if ((ctx->opt_flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) { + ext4_msg(NULL, KERN_ERR, + "Mount option(s) incompatible with ext2"); + return -EINVAL; + } + if ((ctx->opt_flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) { + ext4_msg(NULL, KERN_ERR, + "Mount option(s) incompatible with ext3"); + return -EINVAL; + } + + return ext4_check_quota_consistency(fc, sb); +} + static int ext4_validate_options(struct fs_context *fc) { struct ext4_sb_info *sbi = fc->s_fs_info; From 6e47a3cc68fc525428297a00524833361ebbb0e9 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 27 Oct 2021 16:18:52 +0200 Subject: [PATCH 08/48] ext4: get rid of super block and sbi from handle_mount_ops() At the parsing phase of mount in the new mount api sb will not be available. We've already removed some uses of sb and sbi, but now we need to get rid of the rest of it. Use ext4_fs_context to store all of the configuration specification so that it can be later applied to the super block and sbi. Signed-off-by: Lukas Czerner Reviewed-by: Carlos Maiolino Link: https://lore.kernel.org/r/20211027141857.33657-9-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 541 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 368 insertions(+), 173 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 68395631d2226..1e4908f5f593a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -91,8 +91,7 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb, static int ext4_validate_options(struct fs_context *fc); static int ext4_check_opt_consistency(struct fs_context *fc, struct super_block *sb); -static void ext4_apply_quota_options(struct fs_context *fc, - struct super_block *sb); +static int ext4_apply_options(struct fs_context *fc, struct super_block *sb); /* * Lock ordering @@ -2149,57 +2148,74 @@ static int ext4_sb_read_encoding(const struct ext4_super_block *es, } #endif -static int ext4_set_test_dummy_encryption(struct super_block *sb, - struct fs_parameter *param, - bool is_remount) +static int ext4_set_test_dummy_encryption(struct super_block *sb, char *arg) { #ifdef CONFIG_FS_ENCRYPTION struct ext4_sb_info *sbi = EXT4_SB(sb); int err; - /* - * This mount option is just for testing, and it's not worthwhile to - * implement the extra complexity (e.g. RCU protection) that would be - * needed to allow it to be set or changed during remount. We do allow - * it to be specified during remount, but only if there is no change. - */ - if (is_remount && !sbi->s_dummy_enc_policy.policy) { - ext4_msg(sb, KERN_WARNING, - "Can't set test_dummy_encryption on remount"); - return -1; - } - err = fscrypt_set_test_dummy_encryption(sb, param->string, + err = fscrypt_set_test_dummy_encryption(sb, arg, &sbi->s_dummy_enc_policy); if (err) { - if (err == -EEXIST) - ext4_msg(sb, KERN_WARNING, - "Can't change test_dummy_encryption on remount"); - else if (err == -EINVAL) - ext4_msg(sb, KERN_WARNING, - "Value of option \"%s\" is unrecognized", - param->key); - else - ext4_msg(sb, KERN_WARNING, - "Error processing option \"%s\" [%d]", - param->key, err); - return -1; + ext4_msg(sb, KERN_WARNING, + "Error while setting test dummy encryption [%d]", err); + return err; } ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled"); -#else - ext4_msg(sb, KERN_WARNING, - "Test dummy encryption mount option ignored"); #endif - return 1; + return 0; } +#define EXT4_SPEC_JQUOTA (1 << 0) +#define EXT4_SPEC_JQFMT (1 << 1) +#define EXT4_SPEC_DATAJ (1 << 2) +#define EXT4_SPEC_SB_BLOCK (1 << 3) +#define EXT4_SPEC_JOURNAL_DEV (1 << 4) +#define EXT4_SPEC_JOURNAL_IOPRIO (1 << 5) +#define EXT4_SPEC_DUMMY_ENCRYPTION (1 << 6) +#define EXT4_SPEC_s_want_extra_isize (1 << 7) +#define EXT4_SPEC_s_max_batch_time (1 << 8) +#define EXT4_SPEC_s_min_batch_time (1 << 9) +#define EXT4_SPEC_s_inode_readahead_blks (1 << 10) +#define EXT4_SPEC_s_li_wait_mult (1 << 11) +#define EXT4_SPEC_s_max_dir_size_kb (1 << 12) +#define EXT4_SPEC_s_stripe (1 << 13) +#define EXT4_SPEC_s_resuid (1 << 14) +#define EXT4_SPEC_s_resgid (1 << 15) +#define EXT4_SPEC_s_commit_interval (1 << 16) +#define EXT4_SPEC_s_fc_debug_max_replay (1 << 17) + struct ext4_fs_context { char *s_qf_names[EXT4_MAXQUOTAS]; + char *test_dummy_enc_arg; int s_jquota_fmt; /* Format of quota to use */ + int mb_optimize_scan; +#ifdef CONFIG_EXT4_DEBUG + int s_fc_debug_max_replay; +#endif unsigned short qname_spec; + unsigned long vals_s_flags; /* Bits to set in s_flags */ + unsigned long mask_s_flags; /* Bits changed in s_flags */ unsigned long journal_devnum; + unsigned long s_commit_interval; + unsigned long s_stripe; + unsigned int s_inode_readahead_blks; + unsigned int s_want_extra_isize; + unsigned int s_li_wait_mult; + unsigned int s_max_dir_size_kb; unsigned int journal_ioprio; - int mb_optimize_scan; + unsigned int vals_s_mount_opt; + unsigned int mask_s_mount_opt; + unsigned int vals_s_mount_opt2; + unsigned int mask_s_mount_opt2; + unsigned int vals_s_mount_flags; + unsigned int mask_s_mount_flags; unsigned int opt_flags; /* MOPT flags */ + unsigned int spec; + u32 s_max_batch_time; + u32 s_min_batch_time; + kuid_t s_resuid; + kgid_t s_resgid; }; #ifdef CONFIG_QUOTA @@ -2239,6 +2255,7 @@ static int note_qf_name(struct fs_context *fc, int qtype, } ctx->s_qf_names[qtype] = qname; ctx->qname_spec |= 1 << qtype; + ctx->spec |= EXT4_SPEC_JQUOTA; return 0; } @@ -2254,15 +2271,35 @@ static int unnote_qf_name(struct fs_context *fc, int qtype) ctx->s_qf_names[qtype] = NULL; ctx->qname_spec |= 1 << qtype; + ctx->spec |= EXT4_SPEC_JQUOTA; return 0; } #endif +#define EXT4_SET_CTX(name) \ +static inline void ctx_set_##name(struct ext4_fs_context *ctx, int flag)\ +{ \ + ctx->mask_s_##name |= flag; \ + ctx->vals_s_##name |= flag; \ +} \ +static inline void ctx_clear_##name(struct ext4_fs_context *ctx, int flag)\ +{ \ + ctx->mask_s_##name |= flag; \ + ctx->vals_s_##name &= ~flag; \ +} \ +static inline bool ctx_test_##name(struct ext4_fs_context *ctx, int flag)\ +{ \ + return ((ctx->vals_s_##name & flag) != 0); \ +} \ + +EXT4_SET_CTX(flags); +EXT4_SET_CTX(mount_opt); +EXT4_SET_CTX(mount_opt2); +EXT4_SET_CTX(mount_flags); + static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) { struct ext4_fs_context *ctx = fc->fs_private; - struct ext4_sb_info *sbi = fc->s_fs_info; - struct super_block *sb = sbi->s_sb; struct fs_parse_result result; const struct mount_opts *m; int is_remount; @@ -2300,20 +2337,20 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) param->key); return 1; case Opt_abort: - ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED); + ctx_set_mount_flags(ctx, EXT4_MF_FS_ABORTED); return 1; case Opt_i_version: - sb->s_flags |= SB_I_VERSION; + ctx_set_flags(ctx, SB_I_VERSION); return 1; case Opt_lazytime: - sb->s_flags |= SB_LAZYTIME; + ctx_set_flags(ctx, SB_LAZYTIME); return 1; case Opt_nolazytime: - sb->s_flags &= ~SB_LAZYTIME; + ctx_clear_flags(ctx, SB_LAZYTIME); return 1; case Opt_inlinecrypt: #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT - sb->s_flags |= SB_INLINECRYPT; + ctx_set_flags(ctx, SB_INLINECRYPT); #else ext4_msg(NULL, KERN_ERR, "inline encryption not supported"); #endif @@ -2340,21 +2377,22 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) if (m->flags & MOPT_EXPLICIT) { if (m->mount_opt & EXT4_MOUNT_DELALLOC) { - set_opt2(sb, EXPLICIT_DELALLOC); + ctx_set_mount_opt2(ctx, EXT4_MOUNT2_EXPLICIT_DELALLOC); } else if (m->mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) { - set_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM); + ctx_set_mount_opt2(ctx, + EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM); } else return -EINVAL; } if (m->flags & MOPT_CLEAR_ERR) - clear_opt(sb, ERRORS_MASK); + ctx_clear_mount_opt(ctx, EXT4_MOUNT_ERRORS_MASK); if (m->flags & MOPT_NOSUPPORT) { ext4_msg(NULL, KERN_ERR, "%s option not supported", param->key); } else if (token == Opt_commit) { if (result.uint_32 == 0) - sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE; + ctx->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE; else if (result.uint_32 > INT_MAX / HZ) { ext4_msg(NULL, KERN_ERR, "Invalid commit interval %d, " @@ -2362,21 +2400,22 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) result.uint_32, INT_MAX / HZ); return -EINVAL; } - sbi->s_commit_interval = HZ * result.uint_32; + ctx->s_commit_interval = HZ * result.uint_32; + ctx->spec |= EXT4_SPEC_s_commit_interval; } else if (token == Opt_debug_want_extra_isize) { - if ((result.uint_32 & 1) || - (result.uint_32 < 4) || - (result.uint_32 > - (sbi->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE))) { + if ((result.uint_32 & 1) || (result.uint_32 < 4)) { ext4_msg(NULL, KERN_ERR, "Invalid want_extra_isize %d", result.uint_32); return -EINVAL; } - sbi->s_want_extra_isize = result.uint_32; + ctx->s_want_extra_isize = result.uint_32; + ctx->spec |= EXT4_SPEC_s_want_extra_isize; } else if (token == Opt_max_batch_time) { - sbi->s_max_batch_time = result.uint_32; + ctx->s_max_batch_time = result.uint_32; + ctx->spec |= EXT4_SPEC_s_max_batch_time; } else if (token == Opt_min_batch_time) { - sbi->s_min_batch_time = result.uint_32; + ctx->s_min_batch_time = result.uint_32; + ctx->spec |= EXT4_SPEC_s_min_batch_time; } else if (token == Opt_inode_readahead_blks) { if (result.uint_32 && (result.uint_32 > (1 << 30) || @@ -2386,20 +2425,25 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) "0 or a power of 2 smaller than 2^31"); return -EINVAL; } - sbi->s_inode_readahead_blks = result.uint_32; + ctx->s_inode_readahead_blks = result.uint_32; + ctx->spec |= EXT4_SPEC_s_inode_readahead_blks; } else if (token == Opt_init_itable) { - set_opt(sb, INIT_INODE_TABLE); - sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; + ctx_set_mount_opt(ctx, EXT4_MOUNT_INIT_INODE_TABLE); + ctx->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; if (param->type == fs_value_is_string) - sbi->s_li_wait_mult = result.uint_32; + ctx->s_li_wait_mult = result.uint_32; + ctx->spec |= EXT4_SPEC_s_li_wait_mult; } else if (token == Opt_max_dir_size_kb) { - sbi->s_max_dir_size_kb = result.uint_32; + ctx->s_max_dir_size_kb = result.uint_32; + ctx->spec |= EXT4_SPEC_s_max_dir_size_kb; #ifdef CONFIG_EXT4_DEBUG } else if (token == Opt_fc_debug_max_replay) { - sbi->s_fc_debug_max_replay = result.uint_32; + ctx->s_fc_debug_max_replay = result.uint_32; + ctx->spec |= EXT4_SPEC_s_fc_debug_max_replay; #endif } else if (token == Opt_stripe) { - sbi->s_stripe = result.uint_32; + ctx->s_stripe = result.uint_32; + ctx->spec |= EXT4_SPEC_s_stripe; } else if (token == Opt_resuid) { uid = make_kuid(current_user_ns(), result.uint_32); if (!uid_valid(uid)) { @@ -2407,7 +2451,8 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) result.uint_32); return -EINVAL; } - sbi->s_resuid = uid; + ctx->s_resuid = uid; + ctx->spec |= EXT4_SPEC_s_resuid; } else if (token == Opt_resgid) { gid = make_kgid(current_user_ns(), result.uint_32); if (!gid_valid(gid)) { @@ -2415,7 +2460,8 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) result.uint_32); return -EINVAL; } - sbi->s_resgid = gid; + ctx->s_resgid = gid; + ctx->spec |= EXT4_SPEC_s_resgid; } else if (token == Opt_journal_dev) { if (is_remount) { ext4_msg(NULL, KERN_ERR, @@ -2423,6 +2469,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) return -EINVAL; } ctx->journal_devnum = result.uint_32; + ctx->spec |= EXT4_SPEC_JOURNAL_DEV; } else if (token == Opt_journal_path) { struct inode *journal_inode; struct path path; @@ -2443,6 +2490,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) journal_inode = d_inode(path.dentry); ctx->journal_devnum = new_encode_dev(journal_inode->i_rdev); + ctx->spec |= EXT4_SPEC_JOURNAL_DEV; path_put(&path); } else if (token == Opt_journal_ioprio) { if (result.uint_32 > 7) { @@ -2452,24 +2500,37 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) } ctx->journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, result.uint_32); + ctx->spec |= EXT4_SPEC_JOURNAL_IOPRIO; } else if (token == Opt_test_dummy_encryption) { - return ext4_set_test_dummy_encryption(sb, param, is_remount); - } else if (m->flags & MOPT_DATAJ) { - if (is_remount) { - if (!sbi->s_journal) - ext4_msg(NULL, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option"); - else if (test_opt(sb, DATA_FLAGS) != m->mount_opt) { - ext4_msg(NULL, KERN_ERR, - "Cannot change data mode on remount"); - return -EINVAL; - } - } else { - clear_opt(sb, DATA_FLAGS); - sbi->s_mount_opt |= m->mount_opt; +#ifdef CONFIG_FS_ENCRYPTION + if (param->type == fs_value_is_flag) { + ctx->spec |= EXT4_SPEC_DUMMY_ENCRYPTION; + ctx->test_dummy_enc_arg = NULL; + return 1; + } + if (*param->string && + !(!strcmp(param->string, "v1") || + !strcmp(param->string, "v2"))) { + ext4_msg(NULL, KERN_WARNING, + "Value of option \"%s\" is unrecognized", + param->key); + return -EINVAL; } + ctx->spec |= EXT4_SPEC_DUMMY_ENCRYPTION; + ctx->test_dummy_enc_arg = kmemdup_nul(param->string, param->size, + GFP_KERNEL); +#else + ext4_msg(NULL, KERN_WARNING, + "Test dummy encryption mount option ignored"); +#endif + } else if (m->flags & MOPT_DATAJ) { + ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS); + ctx_set_mount_opt(ctx, m->mount_opt); + ctx->spec |= EXT4_SPEC_DATAJ; #ifdef CONFIG_QUOTA } else if (m->flags & MOPT_QFMT) { ctx->s_jquota_fmt = m->mount_opt; + ctx->spec |= EXT4_SPEC_JQFMT; #endif } else if (token == Opt_dax || token == Opt_dax_always || token == Opt_dax_inode || token == Opt_dax_never) { @@ -2477,56 +2538,30 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) switch (token) { case Opt_dax: case Opt_dax_always: - if (is_remount && - (!(sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) || - (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER))) { - fail_dax_change_remount: - ext4_msg(NULL, KERN_ERR, "can't change " - "dax mount option while remounting"); - return -EINVAL; - } - if (is_remount && - (test_opt(sb, DATA_FLAGS) == - EXT4_MOUNT_JOURNAL_DATA)) { - ext4_msg(NULL, KERN_ERR, "can't mount with " - "both data=journal and dax"); - return -EINVAL; - } - ext4_msg(NULL, KERN_WARNING, - "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); - sbi->s_mount_opt |= EXT4_MOUNT_DAX_ALWAYS; - sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER; + ctx_set_mount_opt(ctx, m->mount_opt); + ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER); break; case Opt_dax_never: - if (is_remount && - (!(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) || - (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS))) - goto fail_dax_change_remount; - sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER; - sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS; + ctx_set_mount_opt2(ctx, m->mount_opt); + ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS); break; case Opt_dax_inode: - if (is_remount && - ((sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) || - (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) || - !(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_INODE))) - goto fail_dax_change_remount; - sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS; - sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER; + ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS); + ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER); /* Strictly for printing options */ - sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_INODE; + ctx_set_mount_opt2(ctx, m->mount_opt); break; } #else ext4_msg(NULL, KERN_INFO, "dax option not supported"); - sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER; - sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS; + ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER); + ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS); return -EINVAL; #endif } else if (token == Opt_data_err_abort) { - sbi->s_mount_opt |= m->mount_opt; + ctx_set_mount_opt(ctx, m->mount_opt); } else if (token == Opt_data_err_ignore) { - sbi->s_mount_opt &= ~m->mount_opt; + ctx_clear_mount_opt(ctx, m->mount_opt); } else if (token == Opt_mb_optimize_scan) { if (result.int_32 != 0 && result.int_32 != 1) { ext4_msg(NULL, KERN_WARNING, @@ -2552,14 +2587,14 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) } if (m->flags & MOPT_2) { if (set != 0) - sbi->s_mount_opt2 |= m->mount_opt; + ctx_set_mount_opt2(ctx, m->mount_opt); else - sbi->s_mount_opt2 &= ~m->mount_opt; + ctx_clear_mount_opt2(ctx, m->mount_opt); } else { if (set != 0) - sbi->s_mount_opt |= m->mount_opt; + ctx_set_mount_opt(ctx, m->mount_opt); else - sbi->s_mount_opt &= ~m->mount_opt; + ctx_clear_mount_opt(ctx, m->mount_opt); } } return 1; @@ -2624,8 +2659,9 @@ static int parse_options(char *options, struct super_block *sb, if (ret < 0) return 0; - if (ctx->qname_spec) - ext4_apply_quota_options(&fc, sb); + ret = ext4_apply_options(&fc, sb); + if (ret < 0) + return 0; return 1; } @@ -2634,20 +2670,30 @@ static void ext4_apply_quota_options(struct fs_context *fc, struct super_block *sb) { #ifdef CONFIG_QUOTA + bool quota_feature = ext4_has_feature_quota(sb); struct ext4_fs_context *ctx = fc->fs_private; struct ext4_sb_info *sbi = EXT4_SB(sb); char *qname; int i; - for (i = 0; i < EXT4_MAXQUOTAS; i++) { - if (!(ctx->qname_spec & (1 << i))) - continue; - qname = ctx->s_qf_names[i]; /* May be NULL */ - ctx->s_qf_names[i] = NULL; - kfree(sbi->s_qf_names[i]); - rcu_assign_pointer(sbi->s_qf_names[i], qname); - set_opt(sb, QUOTA); + if (quota_feature) + return; + + if (ctx->spec & EXT4_SPEC_JQUOTA) { + for (i = 0; i < EXT4_MAXQUOTAS; i++) { + if (!(ctx->qname_spec & (1 << i))) + continue; + + qname = ctx->s_qf_names[i]; /* May be NULL */ + ctx->s_qf_names[i] = NULL; + kfree(sbi->s_qf_names[i]); + rcu_assign_pointer(sbi->s_qf_names[i], qname); + set_opt(sb, QUOTA); + } } + + if (ctx->spec & EXT4_SPEC_JQFMT) + sbi->s_jquota_fmt = ctx->s_jquota_fmt; #endif } @@ -2662,17 +2708,36 @@ static int ext4_check_quota_consistency(struct fs_context *fc, struct ext4_sb_info *sbi = EXT4_SB(sb); bool quota_feature = ext4_has_feature_quota(sb); bool quota_loaded = sb_any_quota_loaded(sb); - int i; + bool usr_qf_name, grp_qf_name, usrquota, grpquota; + int quota_flags, i; + + /* + * We do the test below only for project quotas. 'usrquota' and + * 'grpquota' mount options are allowed even without quota feature + * to support legacy quotas in quota files. + */ + if (ctx_test_mount_opt(ctx, EXT4_MOUNT_PRJQUOTA) && + !ext4_has_feature_project(sb)) { + ext4_msg(NULL, KERN_ERR, "Project quota feature not enabled. " + "Cannot enable project quota enforcement."); + return -EINVAL; + } + + quota_flags = EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA | + EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA; + if (quota_loaded && + ctx->mask_s_mount_opt & quota_flags && + !ctx_test_mount_opt(ctx, quota_flags)) + goto err_quota_change; - if (ctx->qname_spec && quota_loaded) { - if (quota_feature) - goto err_feature; + if (ctx->spec & EXT4_SPEC_JQUOTA) { for (i = 0; i < EXT4_MAXQUOTAS; i++) { if (!(ctx->qname_spec & (1 << i))) continue; - if (!!sbi->s_qf_names[i] != !!ctx->s_qf_names[i]) + if (quota_loaded && + !!sbi->s_qf_names[i] != !!ctx->s_qf_names[i]) goto err_jquota_change; if (sbi->s_qf_names[i] && ctx->s_qf_names[i] && @@ -2680,17 +2745,60 @@ static int ext4_check_quota_consistency(struct fs_context *fc, ctx->s_qf_names[i]) != 0) goto err_jquota_specified; } + + if (quota_feature) { + ext4_msg(NULL, KERN_INFO, + "Journaled quota options ignored when " + "QUOTA feature is enabled"); + return 0; + } } - if (ctx->s_jquota_fmt) { + if (ctx->spec & EXT4_SPEC_JQFMT) { if (sbi->s_jquota_fmt != ctx->s_jquota_fmt && quota_loaded) - goto err_quota_change; + goto err_jquota_change; if (quota_feature) { ext4_msg(NULL, KERN_INFO, "Quota format mount options " "ignored when QUOTA feature is enabled"); return 0; } } + + /* Make sure we don't mix old and new quota format */ + usr_qf_name = (get_qf_name(sb, sbi, USRQUOTA) || + ctx->s_qf_names[USRQUOTA]); + grp_qf_name = (get_qf_name(sb, sbi, GRPQUOTA) || + ctx->s_qf_names[GRPQUOTA]); + + usrquota = (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) || + test_opt(sb, USRQUOTA)); + + grpquota = (ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA) || + test_opt(sb, GRPQUOTA)); + + if (usr_qf_name) { + ctx_clear_mount_opt(ctx, EXT4_MOUNT_USRQUOTA); + usrquota = false; + } + if (grp_qf_name) { + ctx_clear_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA); + grpquota = false; + } + + if (usr_qf_name || grp_qf_name) { + if (usrquota || grpquota) { + ext4_msg(NULL, KERN_ERR, "old and new quota " + "format mixing"); + return -EINVAL; + } + + if (!(ctx->spec & EXT4_SPEC_JQFMT || sbi->s_jquota_fmt)) { + ext4_msg(NULL, KERN_ERR, "journaled quota format " + "not specified"); + return -EINVAL; + } + } + return 0; err_quota_change: @@ -2705,10 +2813,6 @@ static int ext4_check_quota_consistency(struct fs_context *fc, ext4_msg(NULL, KERN_ERR, "%s quota file already specified", QTYPE2NAME(i)); return -EINVAL; -err_feature: - ext4_msg(NULL, KERN_ERR, "Journaled quota options ignored " - "when QUOTA feature is enabled"); - return 0; #else return 0; #endif @@ -2718,6 +2822,8 @@ static int ext4_check_opt_consistency(struct fs_context *fc, struct super_block *sb) { struct ext4_fs_context *ctx = fc->fs_private; + struct ext4_sb_info *sbi = fc->s_fs_info; + int is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE; if ((ctx->opt_flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) { ext4_msg(NULL, KERN_ERR, @@ -2730,57 +2836,146 @@ static int ext4_check_opt_consistency(struct fs_context *fc, return -EINVAL; } + if (ctx->s_want_extra_isize > + (sbi->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE)) { + ext4_msg(NULL, KERN_ERR, + "Invalid want_extra_isize %d", + ctx->s_want_extra_isize); + return -EINVAL; + } + + if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DIOREAD_NOLOCK)) { + int blocksize = + BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); + if (blocksize < PAGE_SIZE) + ext4_msg(NULL, KERN_WARNING, "Warning: mounting with an " + "experimental mount option 'dioread_nolock' " + "for blocksize < PAGE_SIZE"); + } + +#ifdef CONFIG_FS_ENCRYPTION + /* + * This mount option is just for testing, and it's not worthwhile to + * implement the extra complexity (e.g. RCU protection) that would be + * needed to allow it to be set or changed during remount. We do allow + * it to be specified during remount, but only if there is no change. + */ + if ((ctx->spec & EXT4_SPEC_DUMMY_ENCRYPTION) && + is_remount && !sbi->s_dummy_enc_policy.policy) { + ext4_msg(NULL, KERN_WARNING, + "Can't set test_dummy_encryption on remount"); + return -1; + } +#endif + + if ((ctx->spec & EXT4_SPEC_DATAJ) && is_remount) { + if (!sbi->s_journal) { + ext4_msg(NULL, KERN_WARNING, + "Remounting file system with no journal " + "so ignoring journalled data option"); + ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS); + } else if (ctx->mask_s_mount_opt & EXT4_MOUNT_DATA_FLAGS) { + ext4_msg(NULL, KERN_ERR, "Cannot change data mode " + "on remount"); + return -EINVAL; + } + } + + if (is_remount) { + if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) && + (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) { + ext4_msg(NULL, KERN_ERR, "can't mount with " + "both data=journal and dax"); + return -EINVAL; + } + + if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) && + (!(sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) || + (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER))) { +fail_dax_change_remount: + ext4_msg(NULL, KERN_ERR, "can't change " + "dax mount option while remounting"); + return -EINVAL; + } else if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER) && + (!(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) || + (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS))) { + goto fail_dax_change_remount; + } else if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_DAX_INODE) && + ((sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) || + (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) || + !(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_INODE))) { + goto fail_dax_change_remount; + } + } + return ext4_check_quota_consistency(fc, sb); } -static int ext4_validate_options(struct fs_context *fc) +static int ext4_apply_options(struct fs_context *fc, struct super_block *sb) { + struct ext4_fs_context *ctx = fc->fs_private; struct ext4_sb_info *sbi = fc->s_fs_info; - struct super_block *sb = sbi->s_sb; + int ret = 0; + + sbi->s_mount_opt &= ~ctx->mask_s_mount_opt; + sbi->s_mount_opt |= ctx->vals_s_mount_opt; + sbi->s_mount_opt2 &= ~ctx->mask_s_mount_opt2; + sbi->s_mount_opt2 |= ctx->vals_s_mount_opt2; + sbi->s_mount_flags &= ~ctx->mask_s_mount_flags; + sbi->s_mount_flags |= ctx->vals_s_mount_flags; + sb->s_flags &= ~ctx->mask_s_flags; + sb->s_flags |= ctx->vals_s_flags; + +#define APPLY(X) ({ if (ctx->spec & EXT4_SPEC_##X) sbi->X = ctx->X; }) + APPLY(s_commit_interval); + APPLY(s_stripe); + APPLY(s_max_batch_time); + APPLY(s_min_batch_time); + APPLY(s_want_extra_isize); + APPLY(s_inode_readahead_blks); + APPLY(s_max_dir_size_kb); + APPLY(s_li_wait_mult); + APPLY(s_resgid); + APPLY(s_resuid); + +#ifdef CONFIG_EXT4_DEBUG + APPLY(s_fc_debug_max_replay); +#endif + + ext4_apply_quota_options(fc, sb); + + if (ctx->spec & EXT4_SPEC_DUMMY_ENCRYPTION) + ret = ext4_set_test_dummy_encryption(sb, ctx->test_dummy_enc_arg); + + return ret; +} + + +static int ext4_validate_options(struct fs_context *fc) +{ #ifdef CONFIG_QUOTA + struct ext4_fs_context *ctx = fc->fs_private; char *usr_qf_name, *grp_qf_name; - /* - * We do the test below only for project quotas. 'usrquota' and - * 'grpquota' mount options are allowed even without quota feature - * to support legacy quotas in quota files. - */ - if (test_opt(sb, PRJQUOTA) && !ext4_has_feature_project(sb)) { - ext4_msg(NULL, KERN_ERR, "Project quota feature not enabled. " - "Cannot enable project quota enforcement."); - return -EINVAL; - } - usr_qf_name = get_qf_name(sb, sbi, USRQUOTA); - grp_qf_name = get_qf_name(sb, sbi, GRPQUOTA); + + usr_qf_name = ctx->s_qf_names[USRQUOTA]; + grp_qf_name = ctx->s_qf_names[GRPQUOTA]; + if (usr_qf_name || grp_qf_name) { - if (test_opt(sb, USRQUOTA) && usr_qf_name) - clear_opt(sb, USRQUOTA); + if (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) && usr_qf_name) + ctx_clear_mount_opt(ctx, EXT4_MOUNT_USRQUOTA); - if (test_opt(sb, GRPQUOTA) && grp_qf_name) - clear_opt(sb, GRPQUOTA); + if (ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA) && grp_qf_name) + ctx_clear_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA); - if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { + if (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) || + ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA)) { ext4_msg(NULL, KERN_ERR, "old and new quota " - "format mixing"); - return -EINVAL; - } - - if (!sbi->s_jquota_fmt) { - ext4_msg(NULL, KERN_ERR, "journaled quota format " - "not specified"); + "format mixing"); return -EINVAL; } } #endif - if (test_opt(sb, DIOREAD_NOLOCK)) { - int blocksize = - BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); - if (blocksize < PAGE_SIZE) - ext4_msg(NULL, KERN_WARNING, - "Warning: mounting with an experimental " - "option 'dioread_nolock' for " - "blocksize < PAGE_SIZE"); - } - return 0; + return 1; } static inline void ext4_show_quota_options(struct seq_file *seq, From 7edfd85b1ffd36593011dec96ab395912a340418 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 27 Oct 2021 16:18:53 +0200 Subject: [PATCH 09/48] ext4: Completely separate options parsing and sb setup The new mount api separates option parsing and super block setup into two distinct steps and so we need to separate the options parsing out of the ext4_fill_super() and ext4_remount(). In order to achieve this we have to create new ext4_fill_super() and ext4_remount() functions which will serve its purpose only until we actually do convert to the new api (as such they are only temporary for this patch series) and move the option parsing out of the old function which will now be renamed to __ext4_fill_super() and __ext4_remount(). There is a small complication in the fact that while the mount option parsing is going to happen before we get to __ext4_fill_super(), the mount options stored in the super block itself needs to be applied first, before the user specified mount options. So with this patch we're going through the following sequence: - parse user provided options (including sb block) - initialize sbi and store s_sb_block if provided - in __ext4_fill_super() - read the super block - parse and apply options specified in s_mount_opts - check and apply user provided options stored in ctx - continue with the regular ext4_fill_super operation It's not exactly the most elegant solution, but if we still want to support s_mount_opts we have to do it in this order. Signed-off-by: Lukas Czerner Reviewed-by: Carlos Maiolino Link: https://lore.kernel.org/r/20211027141857.33657-10-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 399 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 264 insertions(+), 135 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 1e4908f5f593a..4e443657d88e3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1959,29 +1959,6 @@ static const match_table_t tokens = { {Opt_err, NULL}, }; -static ext4_fsblk_t get_sb_block(void **data) -{ - ext4_fsblk_t sb_block; - char *options = (char *) *data; - - if (!options || strncmp(options, "sb=", 3) != 0) - return 1; /* Default location */ - - options += 3; - /* TODO: use simple_strtoll with >32bit ext4 */ - sb_block = simple_strtoul(options, &options, 0); - if (*options && *options != ',') { - printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", - (char *) *data); - return 1; - } - if (*options == ',') - options++; - *data = (void *) options; - - return sb_block; -} - #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) #define DEFAULT_MB_OPTIMIZE_SCAN (-1) @@ -2184,6 +2161,7 @@ static int ext4_set_test_dummy_encryption(struct super_block *sb, char *arg) #define EXT4_SPEC_s_resgid (1 << 15) #define EXT4_SPEC_s_commit_interval (1 << 16) #define EXT4_SPEC_s_fc_debug_max_replay (1 << 17) +#define EXT4_SPEC_s_sb_block (1 << 18) struct ext4_fs_context { char *s_qf_names[EXT4_MAXQUOTAS]; @@ -2216,6 +2194,7 @@ struct ext4_fs_context { u32 s_min_batch_time; kuid_t s_resuid; kgid_t s_resgid; + ext4_fsblk_t s_sb_block; }; #ifdef CONFIG_QUOTA @@ -2331,7 +2310,14 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) ext4_msg(NULL, KERN_WARNING, deprecated_msg, param->key, "3.5"); break; case Opt_sb: - return 1; /* handled by get_sb_block() */ + if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { + ext4_msg(NULL, KERN_WARNING, + "Ignoring %s option on remount", param->key); + } else { + ctx->s_sb_block = result.uint_32; + ctx->spec |= EXT4_SPEC_s_sb_block; + } + return 1; case Opt_removed: ext4_msg(NULL, KERN_WARNING, "Ignoring removed %s option", param->key); @@ -2600,24 +2586,14 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) return 1; } -static int parse_options(char *options, struct super_block *sb, - struct ext4_fs_context *ctx, - int is_remount) +static int parse_options(struct fs_context *fc, char *options) { struct fs_parameter param; - struct fs_context fc; int ret; char *key; if (!options) - return 1; - - memset(&fc, 0, sizeof(fc)); - fc.fs_private = ctx; - fc.s_fs_info = EXT4_SB(sb); - - if (is_remount) - fc.purpose = FS_CONTEXT_FOR_RECONFIGURE; + return 0; while ((key = strsep(&options, ",")) != NULL) { if (*key) { @@ -2636,34 +2612,83 @@ static int parse_options(char *options, struct super_block *sb, param.string = kmemdup_nul(value, v_len, GFP_KERNEL); if (!param.string) - return 0; + return -ENOMEM; param.type = fs_value_is_string; } param.key = key; param.size = v_len; - ret = handle_mount_opt(&fc, ¶m); + ret = handle_mount_opt(fc, ¶m); if (param.string) kfree(param.string); if (ret < 0) - return 0; + return ret; } } - ret = ext4_validate_options(&fc); + ret = ext4_validate_options(fc); if (ret < 0) - return 0; + return ret; - ret = ext4_check_opt_consistency(&fc, sb); - if (ret < 0) + return 0; +} + +static int parse_apply_sb_mount_options(struct super_block *sb, + struct ext4_fs_context *m_ctx) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + char *s_mount_opts = NULL; + struct ext4_fs_context *s_ctx = NULL; + struct fs_context *fc = NULL; + int ret = -ENOMEM; + + if (!sbi->s_es->s_mount_opts[0]) return 0; - ret = ext4_apply_options(&fc, sb); + s_mount_opts = kstrndup(sbi->s_es->s_mount_opts, + sizeof(sbi->s_es->s_mount_opts), + GFP_KERNEL); + if (!s_mount_opts) + return ret; + + fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL); + if (!fc) + goto out_free; + + s_ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL); + if (!s_ctx) + goto out_free; + + fc->fs_private = s_ctx; + fc->s_fs_info = sbi; + + ret = parse_options(fc, s_mount_opts); if (ret < 0) - return 0; + goto parse_failed; - return 1; + ret = ext4_check_opt_consistency(fc, sb); + if (ret < 0) { +parse_failed: + ext4_msg(sb, KERN_WARNING, + "failed to parse options in superblock: %s", + s_mount_opts); + ret = 0; + goto out_free; + } + + if (s_ctx->spec & EXT4_SPEC_JOURNAL_DEV) + m_ctx->journal_devnum = s_ctx->journal_devnum; + if (s_ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO) + m_ctx->journal_ioprio = s_ctx->journal_ioprio; + + ret = ext4_apply_options(fc, sb); + +out_free: + kfree(s_ctx); + kfree(fc); + kfree(s_mount_opts); + return ret; } static void ext4_apply_quota_options(struct fs_context *fc, @@ -4358,21 +4383,53 @@ static void ext4_setup_csum_trigger(struct super_block *sb, sbi->s_journal_triggers[type].tr_triggers.t_frozen = trigger; } -static int ext4_fill_super(struct super_block *sb, void *data, int silent) +static void ext4_free_sbi(struct ext4_sb_info *sbi) +{ + if (!sbi) + return; + + kfree(sbi->s_blockgroup_lock); + fs_put_dax(sbi->s_daxdev); + kfree(sbi); +} + +static struct ext4_sb_info *ext4_alloc_sbi(struct super_block *sb) +{ + struct ext4_sb_info *sbi; + + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + if (!sbi) + return NULL; + + sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev); + + sbi->s_blockgroup_lock = + kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); + + if (!sbi->s_blockgroup_lock) + goto err_out; + + sb->s_fs_info = sbi; + sbi->s_sb = sb; + return sbi; +err_out: + fs_put_dax(sbi->s_daxdev); + kfree(sbi); + return NULL; +} + +static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb, + int silent) { - struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev); - char *orig_data = kstrdup(data, GFP_KERNEL); struct buffer_head *bh, **group_desc; struct ext4_super_block *es = NULL; - struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + struct ext4_sb_info *sbi = EXT4_SB(sb); struct flex_groups **flex_groups; ext4_fsblk_t block; - ext4_fsblk_t sb_block = get_sb_block(&data); ext4_fsblk_t logical_sb_block; unsigned long offset = 0; unsigned long def_mount_opts; struct inode *root; - const char *descr; int ret = -ENOMEM; int blocksize, clustersize; unsigned int db_count; @@ -4381,32 +4438,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) __u64 blocks_count; int err = 0; ext4_group_t first_not_zeroed; - struct ext4_fs_context parsed_opts = {0}; + struct ext4_fs_context *ctx = fc->fs_private; /* Set defaults for the variables that will be set during parsing */ - parsed_opts.journal_ioprio = DEFAULT_JOURNAL_IOPRIO; - parsed_opts.journal_devnum = 0; - parsed_opts.mb_optimize_scan = DEFAULT_MB_OPTIMIZE_SCAN; - - if ((data && !orig_data) || !sbi) - goto out_free_base; + ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; + ctx->mb_optimize_scan = DEFAULT_MB_OPTIMIZE_SCAN; - sbi->s_daxdev = dax_dev; - sbi->s_blockgroup_lock = - kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); - if (!sbi->s_blockgroup_lock) - goto out_free_base; - - sb->s_fs_info = sbi; - sbi->s_sb = sb; sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; - sbi->s_sb_block = sb_block; sbi->s_sectors_written_start = part_stat_read(sb->s_bdev, sectors[STAT_WRITE]); - /* Cleanup superblock name */ - strreplace(sb->s_id, '/', '!'); - /* -EINVAL is default */ ret = -EINVAL; blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); @@ -4420,10 +4461,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * block sizes. We need to calculate the offset from buffer start. */ if (blocksize != EXT4_MIN_BLOCK_SIZE) { - logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; + logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE; offset = do_div(logical_sb_block, blocksize); } else { - logical_sb_block = sb_block; + logical_sb_block = sbi->s_sb_block; } bh = ext4_sb_bread_unmovable(sb, logical_sb_block); @@ -4628,21 +4669,18 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } } - if (sbi->s_es->s_mount_opts[0]) { - char *s_mount_opts = kstrndup(sbi->s_es->s_mount_opts, - sizeof(sbi->s_es->s_mount_opts), - GFP_KERNEL); - if (!s_mount_opts) - goto failed_mount; - if (!parse_options(s_mount_opts, sb, &parsed_opts, 0)) { - ext4_msg(sb, KERN_WARNING, - "failed to parse options in superblock: %s", - s_mount_opts); - } - kfree(s_mount_opts); - } + err = parse_apply_sb_mount_options(sb, ctx); + if (err < 0) + goto failed_mount; + sbi->s_def_mount_opt = sbi->s_mount_opt; - if (!parse_options((char *) data, sb, &parsed_opts, 0)) + + err = ext4_check_opt_consistency(fc, sb); + if (err < 0) + goto failed_mount; + + err = ext4_apply_options(fc, sb); + if (err < 0) goto failed_mount; #ifdef CONFIG_UNICODE @@ -4781,7 +4819,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } - if (dax_supported(dax_dev, sb->s_bdev, blocksize, 0, + if (dax_supported(sbi->s_daxdev, sb->s_bdev, blocksize, 0, bdev_nr_sectors(sb->s_bdev))) set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags); @@ -4819,7 +4857,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } - logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; + logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE; offset = do_div(logical_sb_block, blocksize); bh = ext4_sb_bread_unmovable(sb, logical_sb_block); if (IS_ERR(bh)) { @@ -5135,7 +5173,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * root first: it may be modified in the journal! */ if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) { - err = ext4_load_journal(sb, es, parsed_opts.journal_devnum); + err = ext4_load_journal(sb, es, ctx->journal_devnum); if (err) goto failed_mount3a; } else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) && @@ -5235,7 +5273,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount_wq; } - set_task_ioprio(sbi->s_journal->j_task, parsed_opts.journal_ioprio); + set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio); sbi->s_journal->j_submit_inode_data_buffers = ext4_journal_submit_inode_data_buffers; @@ -5347,9 +5385,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * turned off by passing "mb_optimize_scan=0". This can also be * turned on forcefully by passing "mb_optimize_scan=1". */ - if (parsed_opts.mb_optimize_scan == 1) + if (ctx->mb_optimize_scan == 1) set_opt2(sb, MB_OPTIMIZE_SCAN); - else if (parsed_opts.mb_optimize_scan == 0) + else if (ctx->mb_optimize_scan == 0) clear_opt2(sb, MB_OPTIMIZE_SCAN); else if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD) set_opt2(sb, MB_OPTIMIZE_SCAN); @@ -5451,15 +5489,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (err) goto failed_mount9; } - if (EXT4_SB(sb)->s_journal) { - if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) - descr = " journalled data mode"; - else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) - descr = " ordered data mode"; - else - descr = " writeback data mode"; - } else - descr = "out journal"; if (test_opt(sb, DISCARD)) { struct request_queue *q = bdev_get_queue(sb->s_bdev); @@ -5469,14 +5498,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "the device does not support discard"); } - if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount")) - ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " - "Opts: %.*s%s%s. Quota mode: %s.", descr, - (int) sizeof(sbi->s_es->s_mount_opts), - sbi->s_es->s_mount_opts, - *sbi->s_es->s_mount_opts ? "; " : "", orig_data, - ext4_quota_mode(sb)); - if (es->s_error_count) mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ @@ -5487,7 +5508,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) atomic_set(&sbi->s_warning_count, 0); atomic_set(&sbi->s_msg_count, 0); - kfree(orig_data); return 0; cantfind_ext4: @@ -5573,14 +5593,92 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ext4_blkdev_remove(sbi); out_fail: sb->s_fs_info = NULL; - kfree(sbi->s_blockgroup_lock); -out_free_base: - kfree(sbi); - kfree(orig_data); - fs_put_dax(dax_dev); return err ? err : ret; } +static void cleanup_ctx(struct ext4_fs_context *ctx) +{ + int i; + + if (!ctx) + return; + + for (i = 0; i < EXT4_MAXQUOTAS; i++) { + kfree(ctx->s_qf_names[i]); + } + + kfree(ctx->test_dummy_enc_arg); +} + +static int ext4_fill_super(struct super_block *sb, void *data, int silent) +{ + struct ext4_fs_context ctx; + struct ext4_sb_info *sbi; + struct fs_context fc; + const char *descr; + char *orig_data; + int ret = -ENOMEM; + + orig_data = kstrdup(data, GFP_KERNEL); + if (data && !orig_data) + return -ENOMEM; + + /* Cleanup superblock name */ + strreplace(sb->s_id, '/', '!'); + + memset(&fc, 0, sizeof(fc)); + memset(&ctx, 0, sizeof(ctx)); + fc.fs_private = &ctx; + + ret = parse_options(&fc, (char *) data); + if (ret < 0) + goto free_data; + + sbi = ext4_alloc_sbi(sb); + if (!sbi) { + ret = -ENOMEM; + goto free_data; + } + + fc.s_fs_info = sbi; + + sbi->s_sb_block = 1; /* Default super block location */ + if (ctx.spec & EXT4_SPEC_s_sb_block) + sbi->s_sb_block = ctx.s_sb_block; + + ret = __ext4_fill_super(&fc, sb, silent); + if (ret < 0) + goto free_sbi; + + if (EXT4_SB(sb)->s_journal) { + if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) + descr = " journalled data mode"; + else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) + descr = " ordered data mode"; + else + descr = " writeback data mode"; + } else + descr = "out journal"; + + if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount")) + ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " + "Opts: %.*s%s%s. Quota mode: %s.", descr, + (int) sizeof(sbi->s_es->s_mount_opts), + sbi->s_es->s_mount_opts, + *sbi->s_es->s_mount_opts ? "; " : "", orig_data, + ext4_quota_mode(sb)); + + kfree(orig_data); + cleanup_ctx(&ctx); + return 0; +free_sbi: + ext4_free_sbi(sbi); +free_data: + kfree(orig_data); + cleanup_ctx(&ctx); + return ret; +} + /* * Setup any per-fs journal parameters now. We'll do this both on * initial mount, once the journal has been initialised but before we've @@ -6209,8 +6307,10 @@ struct ext4_mount_options { #endif }; -static int ext4_remount(struct super_block *sb, int *flags, char *data) +static int __ext4_remount(struct fs_context *fc, struct super_block *sb, + int *flags) { + struct ext4_fs_context *ctx = fc->fs_private; struct ext4_super_block *es; struct ext4_sb_info *sbi = EXT4_SB(sb); unsigned long old_sb_flags, vfs_flags; @@ -6222,14 +6322,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) int i, j; char *to_free[EXT4_MAXQUOTAS]; #endif - char *orig_data = kstrdup(data, GFP_KERNEL); - struct ext4_fs_context parsed_opts; - - parsed_opts.journal_ioprio = DEFAULT_JOURNAL_IOPRIO; - parsed_opts.journal_devnum = 0; - if (data && !orig_data) - return -ENOMEM; + ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; /* Store the original options */ old_sb_flags = sb->s_flags; @@ -6250,14 +6344,13 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (!old_opts.s_qf_names[i]) { for (j = 0; j < i; j++) kfree(old_opts.s_qf_names[j]); - kfree(orig_data); return -ENOMEM; } } else old_opts.s_qf_names[i] = NULL; #endif if (sbi->s_journal && sbi->s_journal->j_task->io_context) - parsed_opts.journal_ioprio = + ctx->journal_ioprio = sbi->s_journal->j_task->io_context->ioprio; /* @@ -6268,10 +6361,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) vfs_flags = SB_LAZYTIME | SB_I_VERSION; sb->s_flags = (sb->s_flags & ~vfs_flags) | (*flags & vfs_flags); - if (!parse_options(data, sb, &parsed_opts, 1)) { - err = -EINVAL; - goto restore_opts; - } + ext4_apply_options(fc, sb); if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ test_opt(sb, JOURNAL_CHECKSUM)) { @@ -6318,7 +6408,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (sbi->s_journal) { ext4_init_journal_params(sb, sbi->s_journal); - set_task_ioprio(sbi->s_journal->j_task, parsed_opts.journal_ioprio); + set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio); } /* Flush outstanding errors before changing fs state */ @@ -6485,9 +6575,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) */ *flags = (*flags & ~vfs_flags) | (sb->s_flags & vfs_flags); - ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s. Quota mode: %s.", - orig_data, ext4_quota_mode(sb)); - kfree(orig_data); return 0; restore_opts: @@ -6513,10 +6600,52 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) #endif if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb)) ext4_stop_mmpd(sbi); - kfree(orig_data); return err; } +static int ext4_remount(struct super_block *sb, int *flags, char *data) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_fs_context ctx; + struct fs_context fc; + char *orig_data; + int ret; + + orig_data = kstrdup(data, GFP_KERNEL); + if (data && !orig_data) + return -ENOMEM; + + memset(&fc, 0, sizeof(fc)); + memset(&ctx, 0, sizeof(ctx)); + + fc.fs_private = &ctx; + fc.purpose = FS_CONTEXT_FOR_RECONFIGURE; + fc.s_fs_info = sbi; + + ret = parse_options(&fc, (char *) data); + if (ret < 0) + goto err_out; + + ret = ext4_check_opt_consistency(&fc, sb); + if (ret < 0) + goto err_out; + + ret = __ext4_remount(&fc, sb, flags); + if (ret < 0) + goto err_out; + + ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s. Quota mode: %s.", + orig_data, ext4_quota_mode(sb)); + cleanup_ctx(&ctx); + kfree(orig_data); + return 0; + +err_out: + cleanup_ctx(&ctx); + kfree(orig_data); + return ret; +} + #ifdef CONFIG_QUOTA static int ext4_statfs_project(struct super_block *sb, kprojid_t projid, struct kstatfs *buf) From 02f960f8db1cd0aa9c182f8804b2b41ffd2c37b2 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 27 Oct 2021 16:18:54 +0200 Subject: [PATCH 10/48] ext4: clean up return values in handle_mount_opt() Clean up return values in handle_mount_opt() and rename the function to ext4_parse_param() Now we can use it in fs_context_operations as .parse_param. Signed-off-by: Lukas Czerner Reviewed-by: Carlos Maiolino Link: https://lore.kernel.org/r/20211027141857.33657-11-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4e443657d88e3..ebde5fb672221 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -92,6 +92,7 @@ static int ext4_validate_options(struct fs_context *fc); static int ext4_check_opt_consistency(struct fs_context *fc, struct super_block *sb); static int ext4_apply_options(struct fs_context *fc, struct super_block *sb); +static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param); /* * Lock ordering @@ -119,6 +120,11 @@ static int ext4_apply_options(struct fs_context *fc, struct super_block *sb); * transaction start -> page lock(s) -> i_data_sem (rw) */ +static const struct fs_context_operations ext4_context_ops = { + .parse_param = ext4_parse_param, +}; + + #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2) static struct file_system_type ext2_fs_type = { .owner = THIS_MODULE, @@ -2276,7 +2282,7 @@ EXT4_SET_CTX(mount_opt); EXT4_SET_CTX(mount_opt2); EXT4_SET_CTX(mount_flags); -static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) +static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct ext4_fs_context *ctx = fc->fs_private; struct fs_parse_result result; @@ -2317,30 +2323,30 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) ctx->s_sb_block = result.uint_32; ctx->spec |= EXT4_SPEC_s_sb_block; } - return 1; + return 0; case Opt_removed: ext4_msg(NULL, KERN_WARNING, "Ignoring removed %s option", param->key); - return 1; + return 0; case Opt_abort: ctx_set_mount_flags(ctx, EXT4_MF_FS_ABORTED); - return 1; + return 0; case Opt_i_version: ctx_set_flags(ctx, SB_I_VERSION); - return 1; + return 0; case Opt_lazytime: ctx_set_flags(ctx, SB_LAZYTIME); - return 1; + return 0; case Opt_nolazytime: ctx_clear_flags(ctx, SB_LAZYTIME); - return 1; + return 0; case Opt_inlinecrypt: #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT ctx_set_flags(ctx, SB_INLINECRYPT); #else ext4_msg(NULL, KERN_ERR, "inline encryption not supported"); #endif - return 1; + return 0; case Opt_errors: case Opt_data: case Opt_data_err: @@ -2492,7 +2498,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) if (param->type == fs_value_is_flag) { ctx->spec |= EXT4_SPEC_DUMMY_ENCRYPTION; ctx->test_dummy_enc_arg = NULL; - return 1; + return 0; } if (*param->string && !(!strcmp(param->string, "v1") || @@ -2583,7 +2589,7 @@ static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param) ctx_clear_mount_opt(ctx, m->mount_opt); } } - return 1; + return 0; } static int parse_options(struct fs_context *fc, char *options) @@ -2619,7 +2625,7 @@ static int parse_options(struct fs_context *fc, char *options) param.key = key; param.size = v_len; - ret = handle_mount_opt(fc, ¶m); + ret = ext4_parse_param(fc, ¶m); if (param.string) kfree(param.string); if (ret < 0) From 97d8a670b4531437d5b842cf68dafa6d1a932ddf Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 27 Oct 2021 16:18:55 +0200 Subject: [PATCH 11/48] ext4: change token2str() to use ext4_param_specs Change token2str() to use ext4_param_specs instead of tokens so that we can get rid of tokens entirely. Signed-off-by: Lukas Czerner Reviewed-by: Carlos Maiolino Link: https://lore.kernel.org/r/20211027141857.33657-12-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ebde5fb672221..285f18f643ca3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3046,12 +3046,12 @@ static inline void ext4_show_quota_options(struct seq_file *seq, static const char *token2str(int token) { - const struct match_token *t; + const struct fs_parameter_spec *spec; - for (t = tokens; t->token != Opt_err; t++) - if (t->token == token && !strchr(t->pattern, '=')) + for (spec = ext4_param_specs; spec->name != NULL; spec++) + if (spec->opt == token && !spec->type) break; - return t->pattern; + return spec->name; } /* From cebe85d570cf84804e848332d6721bc9e5300e07 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 27 Oct 2021 16:18:56 +0200 Subject: [PATCH 12/48] ext4: switch to the new mount api Add the necessary functions for the fs_context_operations. Convert and rename ext4_remount() and ext4_fill_super() to ext4_get_tree() and ext4_reconfigure() respectively and switch the ext4 to use the new api. One user facing change is the fact that we no longer have access to the entire string of mount options provided by mount(2) since the mount api does not store it anywhere. As a result we can't print the options to the log as we did in the past after the successful mount. Signed-off-by: Lukas Czerner Reviewed-by: Carlos Maiolino Link: https://lore.kernel.org/r/20211027141857.33657-13-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 195 +++++++++++++++++++++--------------------------- 1 file changed, 86 insertions(+), 109 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 285f18f643ca3..f093bb2ccab16 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -75,12 +75,9 @@ static int ext4_mark_recovery_complete(struct super_block *sb, static int ext4_clear_journal_err(struct super_block *sb, struct ext4_super_block *es); static int ext4_sync_fs(struct super_block *sb, int wait); -static int ext4_remount(struct super_block *sb, int *flags, char *data); static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); static int ext4_unfreeze(struct super_block *sb); static int ext4_freeze(struct super_block *sb); -static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data); static inline int ext2_feature_set_ok(struct super_block *sb); static inline int ext3_feature_set_ok(struct super_block *sb); static void ext4_destroy_lazyinit_thread(void); @@ -93,6 +90,11 @@ static int ext4_check_opt_consistency(struct fs_context *fc, struct super_block *sb); static int ext4_apply_options(struct fs_context *fc, struct super_block *sb); static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param); +static int ext4_get_tree(struct fs_context *fc); +static int ext4_reconfigure(struct fs_context *fc); +static void ext4_fc_free(struct fs_context *fc); +static int ext4_init_fs_context(struct fs_context *fc); +static const struct fs_parameter_spec ext4_param_specs[]; /* * Lock ordering @@ -122,16 +124,20 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param); static const struct fs_context_operations ext4_context_ops = { .parse_param = ext4_parse_param, + .get_tree = ext4_get_tree, + .reconfigure = ext4_reconfigure, + .free = ext4_fc_free, }; #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2) static struct file_system_type ext2_fs_type = { - .owner = THIS_MODULE, - .name = "ext2", - .mount = ext4_mount, - .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV, + .owner = THIS_MODULE, + .name = "ext2", + .init_fs_context = ext4_init_fs_context, + .parameters = ext4_param_specs, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, }; MODULE_ALIAS_FS("ext2"); MODULE_ALIAS("ext2"); @@ -142,11 +148,12 @@ MODULE_ALIAS("ext2"); static struct file_system_type ext3_fs_type = { - .owner = THIS_MODULE, - .name = "ext3", - .mount = ext4_mount, - .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV, + .owner = THIS_MODULE, + .name = "ext3", + .init_fs_context = ext4_init_fs_context, + .parameters = ext4_param_specs, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, }; MODULE_ALIAS_FS("ext3"); MODULE_ALIAS("ext3"); @@ -1665,7 +1672,6 @@ static const struct super_operations ext4_sops = { .freeze_fs = ext4_freeze, .unfreeze_fs = ext4_unfreeze, .statfs = ext4_statfs, - .remount_fs = ext4_remount, .show_options = ext4_show_options, #ifdef CONFIG_QUOTA .quota_read = ext4_quota_read, @@ -2203,6 +2209,35 @@ struct ext4_fs_context { ext4_fsblk_t s_sb_block; }; +static void ext4_fc_free(struct fs_context *fc) +{ + struct ext4_fs_context *ctx = fc->fs_private; + int i; + + if (!ctx) + return; + + for (i = 0; i < EXT4_MAXQUOTAS; i++) + kfree(ctx->s_qf_names[i]); + + kfree(ctx->test_dummy_enc_arg); + kfree(ctx); +} + +int ext4_init_fs_context(struct fs_context *fc) +{ + struct xfs_fs_context *ctx; + + ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + fc->fs_private = ctx; + fc->ops = &ext4_context_ops; + + return 0; +} + #ifdef CONFIG_QUOTA /* * Note the name of the specified quota file. @@ -5602,61 +5637,31 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb, return err ? err : ret; } -static void cleanup_ctx(struct ext4_fs_context *ctx) +static int ext4_fill_super(struct super_block *sb, struct fs_context *fc) { - int i; - - if (!ctx) - return; - - for (i = 0; i < EXT4_MAXQUOTAS; i++) { - kfree(ctx->s_qf_names[i]); - } - - kfree(ctx->test_dummy_enc_arg); -} - -static int ext4_fill_super(struct super_block *sb, void *data, int silent) -{ - struct ext4_fs_context ctx; + struct ext4_fs_context *ctx = fc->fs_private; struct ext4_sb_info *sbi; - struct fs_context fc; const char *descr; - char *orig_data; - int ret = -ENOMEM; - - orig_data = kstrdup(data, GFP_KERNEL); - if (data && !orig_data) - return -ENOMEM; - - /* Cleanup superblock name */ - strreplace(sb->s_id, '/', '!'); - - memset(&fc, 0, sizeof(fc)); - memset(&ctx, 0, sizeof(ctx)); - fc.fs_private = &ctx; - - ret = parse_options(&fc, (char *) data); - if (ret < 0) - goto free_data; + int ret; sbi = ext4_alloc_sbi(sb); - if (!sbi) { + if (!sbi) ret = -ENOMEM; - goto free_data; - } - fc.s_fs_info = sbi; + fc->s_fs_info = sbi; + + /* Cleanup superblock name */ + strreplace(sb->s_id, '/', '!'); sbi->s_sb_block = 1; /* Default super block location */ - if (ctx.spec & EXT4_SPEC_s_sb_block) - sbi->s_sb_block = ctx.s_sb_block; + if (ctx->spec & EXT4_SPEC_s_sb_block) + sbi->s_sb_block = ctx->s_sb_block; - ret = __ext4_fill_super(&fc, sb, silent); + ret = __ext4_fill_super(fc, sb, fc->sb_flags & SB_SILENT); if (ret < 0) goto free_sbi; - if (EXT4_SB(sb)->s_journal) { + if (sbi->s_journal) { if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) descr = " journalled data mode"; else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) @@ -5668,23 +5673,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount")) ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " - "Opts: %.*s%s%s. Quota mode: %s.", descr, - (int) sizeof(sbi->s_es->s_mount_opts), - sbi->s_es->s_mount_opts, - *sbi->s_es->s_mount_opts ? "; " : "", orig_data, - ext4_quota_mode(sb)); - - kfree(orig_data); - cleanup_ctx(&ctx); + "Quota mode: %s.", descr, ext4_quota_mode(sb)); + return 0; + free_sbi: ext4_free_sbi(sbi); -free_data: - kfree(orig_data); - cleanup_ctx(&ctx); + fc->s_fs_info = NULL; return ret; } +static int ext4_get_tree(struct fs_context *fc) +{ + return get_tree_bdev(fc, ext4_fill_super); +} + /* * Setup any per-fs journal parameters now. We'll do this both on * initial mount, once the journal has been initialised but before we've @@ -6609,47 +6612,26 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb, return err; } -static int ext4_remount(struct super_block *sb, int *flags, char *data) +static int ext4_reconfigure(struct fs_context *fc) { - struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_fs_context ctx; - struct fs_context fc; - char *orig_data; + struct super_block *sb = fc->root->d_sb; + int flags = fc->sb_flags; int ret; - orig_data = kstrdup(data, GFP_KERNEL); - if (data && !orig_data) - return -ENOMEM; - - memset(&fc, 0, sizeof(fc)); - memset(&ctx, 0, sizeof(ctx)); + fc->s_fs_info = EXT4_SB(sb); - fc.fs_private = &ctx; - fc.purpose = FS_CONTEXT_FOR_RECONFIGURE; - fc.s_fs_info = sbi; - - ret = parse_options(&fc, (char *) data); + ret = ext4_check_opt_consistency(fc, sb); if (ret < 0) - goto err_out; + return ret; - ret = ext4_check_opt_consistency(&fc, sb); + ret = __ext4_remount(fc, sb, &flags); if (ret < 0) - goto err_out; + return ret; - ret = __ext4_remount(&fc, sb, flags); - if (ret < 0) - goto err_out; + ext4_msg(sb, KERN_INFO, "re-mounted. Quota mode: %s.", + ext4_quota_mode(sb)); - ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s. Quota mode: %s.", - orig_data, ext4_quota_mode(sb)); - cleanup_ctx(&ctx); - kfree(orig_data); return 0; - -err_out: - cleanup_ctx(&ctx); - kfree(orig_data); - return ret; } #ifdef CONFIG_QUOTA @@ -7134,12 +7116,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, } #endif -static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) -{ - return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super); -} - #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2) static inline void register_as_ext2(void) { @@ -7197,11 +7173,12 @@ static inline int ext3_feature_set_ok(struct super_block *sb) } static struct file_system_type ext4_fs_type = { - .owner = THIS_MODULE, - .name = "ext4", - .mount = ext4_mount, - .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, + .owner = THIS_MODULE, + .name = "ext4", + .init_fs_context = ext4_init_fs_context, + .parameters = ext4_param_specs, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, }; MODULE_ALIAS_FS("ext4"); From ba2e524d918ab72c0e5edc02354bd6cb43d005f8 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 27 Oct 2021 16:18:57 +0200 Subject: [PATCH 13/48] ext4: Remove unused match_table_t tokens Remove unused match_table_t, slim down mount_opts structure by removing unnecessary definitions, remove redundant MOPT_ flags and clean up ext4_parse_param() by converting the most of the if/else branching to switch except for the MOPT_SET/MOPT_CEAR handling. Signed-off-by: Lukas Czerner Reviewed-by: Carlos Maiolino Link: https://lore.kernel.org/r/20211027141857.33657-14-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 374 +++++++++++++++++------------------------------- 1 file changed, 131 insertions(+), 243 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f093bb2ccab16..6998c07c209a5 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1689,7 +1689,7 @@ static const struct export_operations ext4_export_ops = { enum { Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, - Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, + Opt_resgid, Opt_resuid, Opt_sb, Opt_nouid32, Opt_debug, Opt_removed, Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, @@ -1698,8 +1698,7 @@ enum { Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption, Opt_inlinecrypt, - Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, - Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, + Opt_usrjquota, Opt_grpjquota, Opt_quota, Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never, @@ -1719,16 +1718,16 @@ enum { }; static const struct constant_table ext4_param_errors[] = { - {"continue", Opt_err_cont}, - {"panic", Opt_err_panic}, - {"remount-ro", Opt_err_ro}, + {"continue", EXT4_MOUNT_ERRORS_CONT}, + {"panic", EXT4_MOUNT_ERRORS_PANIC}, + {"remount-ro", EXT4_MOUNT_ERRORS_RO}, {} }; static const struct constant_table ext4_param_data[] = { - {"journal", Opt_data_journal}, - {"ordered", Opt_data_ordered}, - {"writeback", Opt_data_writeback}, + {"journal", EXT4_MOUNT_JOURNAL_DATA}, + {"ordered", EXT4_MOUNT_ORDERED_DATA}, + {"writeback", EXT4_MOUNT_WRITEBACK_DATA}, {} }; @@ -1739,9 +1738,9 @@ static const struct constant_table ext4_param_data_err[] = { }; static const struct constant_table ext4_param_jqfmt[] = { - {"vfsold", Opt_jqfmt_vfsold}, - {"vfsv0", Opt_jqfmt_vfsv0}, - {"vfsv1", Opt_jqfmt_vfsv1}, + {"vfsold", QFMT_VFS_OLD}, + {"vfsv0", QFMT_VFS_V0}, + {"vfsv1", QFMT_VFS_V1}, {} }; @@ -1866,111 +1865,6 @@ static const struct fs_parameter_spec ext4_param_specs[] = { {} }; -static const match_table_t tokens = { - {Opt_bsd_df, "bsddf"}, - {Opt_minix_df, "minixdf"}, - {Opt_grpid, "grpid"}, - {Opt_grpid, "bsdgroups"}, - {Opt_nogrpid, "nogrpid"}, - {Opt_nogrpid, "sysvgroups"}, - {Opt_resgid, "resgid=%u"}, - {Opt_resuid, "resuid=%u"}, - {Opt_sb, "sb=%u"}, - {Opt_err_cont, "errors=continue"}, - {Opt_err_panic, "errors=panic"}, - {Opt_err_ro, "errors=remount-ro"}, - {Opt_nouid32, "nouid32"}, - {Opt_debug, "debug"}, - {Opt_removed, "oldalloc"}, - {Opt_removed, "orlov"}, - {Opt_user_xattr, "user_xattr"}, - {Opt_nouser_xattr, "nouser_xattr"}, - {Opt_acl, "acl"}, - {Opt_noacl, "noacl"}, - {Opt_noload, "norecovery"}, - {Opt_noload, "noload"}, - {Opt_removed, "nobh"}, - {Opt_removed, "bh"}, - {Opt_commit, "commit=%u"}, - {Opt_min_batch_time, "min_batch_time=%u"}, - {Opt_max_batch_time, "max_batch_time=%u"}, - {Opt_journal_dev, "journal_dev=%u"}, - {Opt_journal_path, "journal_path=%s"}, - {Opt_journal_checksum, "journal_checksum"}, - {Opt_nojournal_checksum, "nojournal_checksum"}, - {Opt_journal_async_commit, "journal_async_commit"}, - {Opt_abort, "abort"}, - {Opt_data_journal, "data=journal"}, - {Opt_data_ordered, "data=ordered"}, - {Opt_data_writeback, "data=writeback"}, - {Opt_data_err_abort, "data_err=abort"}, - {Opt_data_err_ignore, "data_err=ignore"}, - {Opt_offusrjquota, "usrjquota="}, - {Opt_usrjquota, "usrjquota=%s"}, - {Opt_offgrpjquota, "grpjquota="}, - {Opt_grpjquota, "grpjquota=%s"}, - {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, - {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, - {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, - {Opt_grpquota, "grpquota"}, - {Opt_noquota, "noquota"}, - {Opt_quota, "quota"}, - {Opt_usrquota, "usrquota"}, - {Opt_prjquota, "prjquota"}, - {Opt_barrier, "barrier=%u"}, - {Opt_barrier, "barrier"}, - {Opt_nobarrier, "nobarrier"}, - {Opt_i_version, "i_version"}, - {Opt_dax, "dax"}, - {Opt_dax_always, "dax=always"}, - {Opt_dax_inode, "dax=inode"}, - {Opt_dax_never, "dax=never"}, - {Opt_stripe, "stripe=%u"}, - {Opt_delalloc, "delalloc"}, - {Opt_warn_on_error, "warn_on_error"}, - {Opt_nowarn_on_error, "nowarn_on_error"}, - {Opt_lazytime, "lazytime"}, - {Opt_nolazytime, "nolazytime"}, - {Opt_debug_want_extra_isize, "debug_want_extra_isize=%u"}, - {Opt_nodelalloc, "nodelalloc"}, - {Opt_removed, "mblk_io_submit"}, - {Opt_removed, "nomblk_io_submit"}, - {Opt_block_validity, "block_validity"}, - {Opt_noblock_validity, "noblock_validity"}, - {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, - {Opt_journal_ioprio, "journal_ioprio=%u"}, - {Opt_auto_da_alloc, "auto_da_alloc=%u"}, - {Opt_auto_da_alloc, "auto_da_alloc"}, - {Opt_noauto_da_alloc, "noauto_da_alloc"}, - {Opt_dioread_nolock, "dioread_nolock"}, - {Opt_dioread_lock, "nodioread_nolock"}, - {Opt_dioread_lock, "dioread_lock"}, - {Opt_discard, "discard"}, - {Opt_nodiscard, "nodiscard"}, - {Opt_init_itable, "init_itable=%u"}, - {Opt_init_itable, "init_itable"}, - {Opt_noinit_itable, "noinit_itable"}, -#ifdef CONFIG_EXT4_DEBUG - {Opt_fc_debug_force, "fc_debug_force"}, - {Opt_fc_debug_max_replay, "fc_debug_max_replay=%u"}, -#endif - {Opt_max_dir_size_kb, "max_dir_size_kb=%u"}, - {Opt_test_dummy_encryption, "test_dummy_encryption=%s"}, - {Opt_test_dummy_encryption, "test_dummy_encryption"}, - {Opt_inlinecrypt, "inlinecrypt"}, - {Opt_nombcache, "nombcache"}, - {Opt_nombcache, "no_mbcache"}, /* for backward compatibility */ - {Opt_removed, "prefetch_block_bitmaps"}, - {Opt_no_prefetch_block_bitmaps, "no_prefetch_block_bitmaps"}, - {Opt_mb_optimize_scan, "mb_optimize_scan=%d"}, - {Opt_removed, "check=none"}, /* mount option from ext2/3 */ - {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ - {Opt_removed, "reservation"}, /* mount option from ext2/3 */ - {Opt_removed, "noreservation"}, /* mount option from ext2/3 */ - {Opt_removed, "journal=%u"}, /* mount option from ext2/3 */ - {Opt_err, NULL}, -}; - #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) #define DEFAULT_MB_OPTIMIZE_SCAN (-1) @@ -1982,22 +1876,18 @@ static const char deprecated_msg[] = #define MOPT_CLEAR 0x0002 #define MOPT_NOSUPPORT 0x0004 #define MOPT_EXPLICIT 0x0008 -#define MOPT_CLEAR_ERR 0x0010 -#define MOPT_GTE0 0x0020 #ifdef CONFIG_QUOTA #define MOPT_Q 0 -#define MOPT_QFMT 0x0040 +#define MOPT_QFMT 0x0010 #else #define MOPT_Q MOPT_NOSUPPORT #define MOPT_QFMT MOPT_NOSUPPORT #endif -#define MOPT_DATAJ 0x0080 -#define MOPT_NO_EXT2 0x0100 -#define MOPT_NO_EXT3 0x0200 +#define MOPT_NO_EXT2 0x0020 +#define MOPT_NO_EXT3 0x0040 #define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3) -#define MOPT_STRING 0x0400 -#define MOPT_SKIP 0x0800 -#define MOPT_2 0x1000 +#define MOPT_SKIP 0x0080 +#define MOPT_2 0x0100 static const struct mount_opts { int token; @@ -2030,40 +1920,17 @@ static const struct mount_opts { EXT4_MOUNT_JOURNAL_CHECKSUM), MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET}, - {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR}, - {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR}, - {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR}, - {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT, - MOPT_NO_EXT2}, - {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT, - MOPT_NO_EXT2}, + {Opt_data_err, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_NO_EXT2}, {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET}, {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR}, {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET}, {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR}, {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR}, - {Opt_commit, 0, MOPT_GTE0}, - {Opt_max_batch_time, 0, MOPT_GTE0}, - {Opt_min_batch_time, 0, MOPT_GTE0}, - {Opt_inode_readahead_blks, 0, MOPT_GTE0}, - {Opt_init_itable, 0, MOPT_GTE0}, - {Opt_dax, EXT4_MOUNT_DAX_ALWAYS, MOPT_SET | MOPT_SKIP}, - {Opt_dax_always, EXT4_MOUNT_DAX_ALWAYS, - MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP}, - {Opt_dax_inode, EXT4_MOUNT2_DAX_INODE, - MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP}, - {Opt_dax_never, EXT4_MOUNT2_DAX_NEVER, - MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP}, - {Opt_stripe, 0, MOPT_GTE0}, - {Opt_resuid, 0, MOPT_GTE0}, - {Opt_resgid, 0, MOPT_GTE0}, - {Opt_journal_dev, 0, MOPT_NO_EXT2 | MOPT_GTE0}, - {Opt_journal_path, 0, MOPT_NO_EXT2 | MOPT_STRING}, - {Opt_journal_ioprio, 0, MOPT_NO_EXT2 | MOPT_GTE0}, - {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, - {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, - {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, - MOPT_NO_EXT2 | MOPT_DATAJ}, + {Opt_dax_type, 0, MOPT_EXT4_ONLY}, + {Opt_journal_dev, 0, MOPT_NO_EXT2}, + {Opt_journal_path, 0, MOPT_NO_EXT2}, + {Opt_journal_ioprio, 0, MOPT_NO_EXT2}, + {Opt_data, 0, MOPT_NO_EXT2}, {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET}, {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR}, #ifdef CONFIG_EXT4_FS_POSIX_ACL @@ -2075,7 +1942,6 @@ static const struct mount_opts { #endif {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET}, {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET}, - {Opt_debug_want_extra_isize, 0, MOPT_GTE0}, {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q}, {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q}, @@ -2086,23 +1952,15 @@ static const struct mount_opts { {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA | EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA), MOPT_CLEAR | MOPT_Q}, - {Opt_usrjquota, 0, MOPT_Q | MOPT_STRING}, - {Opt_grpjquota, 0, MOPT_Q | MOPT_STRING}, - {Opt_offusrjquota, 0, MOPT_Q}, - {Opt_offgrpjquota, 0, MOPT_Q}, - {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT}, - {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT}, - {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, - {Opt_max_dir_size_kb, 0, MOPT_GTE0}, - {Opt_test_dummy_encryption, 0, MOPT_STRING}, + {Opt_usrjquota, 0, MOPT_Q}, + {Opt_grpjquota, 0, MOPT_Q}, + {Opt_jqfmt, 0, MOPT_QFMT}, {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET}, {Opt_no_prefetch_block_bitmaps, EXT4_MOUNT_NO_PREFETCH_BLOCK_BITMAPS, MOPT_SET}, - {Opt_mb_optimize_scan, EXT4_MOUNT2_MB_OPTIMIZE_SCAN, MOPT_GTE0}, #ifdef CONFIG_EXT4_DEBUG {Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT, MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY}, - {Opt_fc_debug_max_replay, 0, MOPT_GTE0}, #endif {Opt_err, 0, 0} }; @@ -2332,20 +2190,41 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) return token; is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE; + for (m = ext4_mount_opts; m->token != Opt_err; m++) + if (token == m->token) + break; + + ctx->opt_flags |= m->flags; + + if (m->flags & MOPT_EXPLICIT) { + if (m->mount_opt & EXT4_MOUNT_DELALLOC) { + ctx_set_mount_opt2(ctx, EXT4_MOUNT2_EXPLICIT_DELALLOC); + } else if (m->mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) { + ctx_set_mount_opt2(ctx, + EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM); + } else + return -EINVAL; + } + + if (m->flags & MOPT_NOSUPPORT) { + ext4_msg(NULL, KERN_ERR, "%s option not supported", + param->key); + return 0; + } + + switch (token) { #ifdef CONFIG_QUOTA - if (token == Opt_usrjquota) { + case Opt_usrjquota: if (!*param->string) return unnote_qf_name(fc, USRQUOTA); else return note_qf_name(fc, USRQUOTA, param); - } else if (token == Opt_grpjquota) { + case Opt_grpjquota: if (!*param->string) return unnote_qf_name(fc, GRPQUOTA); else return note_qf_name(fc, GRPQUOTA, param); - } #endif - switch (token) { case Opt_noacl: case Opt_nouser_xattr: ext4_msg(NULL, KERN_WARNING, deprecated_msg, param->key, "3.5"); @@ -2383,41 +2262,21 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) #endif return 0; case Opt_errors: - case Opt_data: - case Opt_data_err: - case Opt_jqfmt: - case Opt_dax_type: - token = result.uint_32; - } - - for (m = ext4_mount_opts; m->token != Opt_err; m++) - if (token == m->token) - break; - - ctx->opt_flags |= m->flags; - - if (m->token == Opt_err) { - ext4_msg(NULL, KERN_ERR, "Unrecognized mount option \"%s\" " - "or missing value", param->key); - return -EINVAL; - } - - if (m->flags & MOPT_EXPLICIT) { - if (m->mount_opt & EXT4_MOUNT_DELALLOC) { - ctx_set_mount_opt2(ctx, EXT4_MOUNT2_EXPLICIT_DELALLOC); - } else if (m->mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) { - ctx_set_mount_opt2(ctx, - EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM); - } else - return -EINVAL; - } - if (m->flags & MOPT_CLEAR_ERR) ctx_clear_mount_opt(ctx, EXT4_MOUNT_ERRORS_MASK); - - if (m->flags & MOPT_NOSUPPORT) { - ext4_msg(NULL, KERN_ERR, "%s option not supported", - param->key); - } else if (token == Opt_commit) { + ctx_set_mount_opt(ctx, result.uint_32); + return 0; +#ifdef CONFIG_QUOTA + case Opt_jqfmt: + ctx->s_jquota_fmt = result.uint_32; + ctx->spec |= EXT4_SPEC_JQFMT; + return 0; +#endif + case Opt_data: + ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS); + ctx_set_mount_opt(ctx, result.uint_32); + ctx->spec |= EXT4_SPEC_DATAJ; + return 0; + case Opt_commit: if (result.uint_32 == 0) ctx->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE; else if (result.uint_32 > INT_MAX / HZ) { @@ -2429,7 +2288,8 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) } ctx->s_commit_interval = HZ * result.uint_32; ctx->spec |= EXT4_SPEC_s_commit_interval; - } else if (token == Opt_debug_want_extra_isize) { + return 0; + case Opt_debug_want_extra_isize: if ((result.uint_32 & 1) || (result.uint_32 < 4)) { ext4_msg(NULL, KERN_ERR, "Invalid want_extra_isize %d", result.uint_32); @@ -2437,13 +2297,16 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) } ctx->s_want_extra_isize = result.uint_32; ctx->spec |= EXT4_SPEC_s_want_extra_isize; - } else if (token == Opt_max_batch_time) { + return 0; + case Opt_max_batch_time: ctx->s_max_batch_time = result.uint_32; ctx->spec |= EXT4_SPEC_s_max_batch_time; - } else if (token == Opt_min_batch_time) { + return 0; + case Opt_min_batch_time: ctx->s_min_batch_time = result.uint_32; ctx->spec |= EXT4_SPEC_s_min_batch_time; - } else if (token == Opt_inode_readahead_blks) { + return 0; + case Opt_inode_readahead_blks: if (result.uint_32 && (result.uint_32 > (1 << 30) || !is_power_of_2(result.uint_32))) { @@ -2454,24 +2317,29 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) } ctx->s_inode_readahead_blks = result.uint_32; ctx->spec |= EXT4_SPEC_s_inode_readahead_blks; - } else if (token == Opt_init_itable) { + return 0; + case Opt_init_itable: ctx_set_mount_opt(ctx, EXT4_MOUNT_INIT_INODE_TABLE); ctx->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; if (param->type == fs_value_is_string) ctx->s_li_wait_mult = result.uint_32; ctx->spec |= EXT4_SPEC_s_li_wait_mult; - } else if (token == Opt_max_dir_size_kb) { + return 0; + case Opt_max_dir_size_kb: ctx->s_max_dir_size_kb = result.uint_32; ctx->spec |= EXT4_SPEC_s_max_dir_size_kb; + return 0; #ifdef CONFIG_EXT4_DEBUG - } else if (token == Opt_fc_debug_max_replay) { + case Opt_fc_debug_max_replay: ctx->s_fc_debug_max_replay = result.uint_32; ctx->spec |= EXT4_SPEC_s_fc_debug_max_replay; + return 0; #endif - } else if (token == Opt_stripe) { + case Opt_stripe: ctx->s_stripe = result.uint_32; ctx->spec |= EXT4_SPEC_s_stripe; - } else if (token == Opt_resuid) { + return 0; + case Opt_resuid: uid = make_kuid(current_user_ns(), result.uint_32); if (!uid_valid(uid)) { ext4_msg(NULL, KERN_ERR, "Invalid uid value %d", @@ -2480,7 +2348,8 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) } ctx->s_resuid = uid; ctx->spec |= EXT4_SPEC_s_resuid; - } else if (token == Opt_resgid) { + return 0; + case Opt_resgid: gid = make_kgid(current_user_ns(), result.uint_32); if (!gid_valid(gid)) { ext4_msg(NULL, KERN_ERR, "Invalid gid value %d", @@ -2489,7 +2358,8 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) } ctx->s_resgid = gid; ctx->spec |= EXT4_SPEC_s_resgid; - } else if (token == Opt_journal_dev) { + return 0; + case Opt_journal_dev: if (is_remount) { ext4_msg(NULL, KERN_ERR, "Cannot specify journal on remount"); @@ -2497,7 +2367,9 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) } ctx->journal_devnum = result.uint_32; ctx->spec |= EXT4_SPEC_JOURNAL_DEV; - } else if (token == Opt_journal_path) { + return 0; + case Opt_journal_path: + { struct inode *journal_inode; struct path path; int error; @@ -2519,7 +2391,9 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) ctx->journal_devnum = new_encode_dev(journal_inode->i_rdev); ctx->spec |= EXT4_SPEC_JOURNAL_DEV; path_put(&path); - } else if (token == Opt_journal_ioprio) { + return 0; + } + case Opt_journal_ioprio: if (result.uint_32 > 7) { ext4_msg(NULL, KERN_ERR, "Invalid journal IO priority" " (must be 0-7)"); @@ -2528,7 +2402,8 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) ctx->journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, result.uint_32); ctx->spec |= EXT4_SPEC_JOURNAL_IOPRIO; - } else if (token == Opt_test_dummy_encryption) { + return 0; + case Opt_test_dummy_encryption: #ifdef CONFIG_FS_ENCRYPTION if (param->type == fs_value_is_flag) { ctx->spec |= EXT4_SPEC_DUMMY_ENCRYPTION; @@ -2550,53 +2425,65 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) ext4_msg(NULL, KERN_WARNING, "Test dummy encryption mount option ignored"); #endif - } else if (m->flags & MOPT_DATAJ) { - ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS); - ctx_set_mount_opt(ctx, m->mount_opt); - ctx->spec |= EXT4_SPEC_DATAJ; -#ifdef CONFIG_QUOTA - } else if (m->flags & MOPT_QFMT) { - ctx->s_jquota_fmt = m->mount_opt; - ctx->spec |= EXT4_SPEC_JQFMT; -#endif - } else if (token == Opt_dax || token == Opt_dax_always || - token == Opt_dax_inode || token == Opt_dax_never) { + return 0; + case Opt_dax: + case Opt_dax_type: #ifdef CONFIG_FS_DAX - switch (token) { + { + int type = (token == Opt_dax) ? + Opt_dax : result.uint_32; + + switch (type) { case Opt_dax: case Opt_dax_always: - ctx_set_mount_opt(ctx, m->mount_opt); + ctx_set_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS); ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER); break; case Opt_dax_never: - ctx_set_mount_opt2(ctx, m->mount_opt); + ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER); ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS); break; case Opt_dax_inode: ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS); ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER); /* Strictly for printing options */ - ctx_set_mount_opt2(ctx, m->mount_opt); + ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_INODE); break; } + return 0; + } #else ext4_msg(NULL, KERN_INFO, "dax option not supported"); - ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER); - ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS); return -EINVAL; #endif - } else if (token == Opt_data_err_abort) { - ctx_set_mount_opt(ctx, m->mount_opt); - } else if (token == Opt_data_err_ignore) { - ctx_clear_mount_opt(ctx, m->mount_opt); - } else if (token == Opt_mb_optimize_scan) { + case Opt_data_err: + if (result.uint_32 == Opt_data_err_abort) + ctx_set_mount_opt(ctx, m->mount_opt); + else if (result.uint_32 == Opt_data_err_ignore) + ctx_clear_mount_opt(ctx, m->mount_opt); + return 0; + case Opt_mb_optimize_scan: if (result.int_32 != 0 && result.int_32 != 1) { ext4_msg(NULL, KERN_WARNING, "mb_optimize_scan should be set to 0 or 1."); return -EINVAL; } ctx->mb_optimize_scan = result.int_32; - } else { + return 0; + } + + /* + * At this point we should only be getting options requiring MOPT_SET, + * or MOPT_CLEAR. Anything else is a bug + */ + if (m->token == Opt_err) { + ext4_msg(NULL, KERN_WARNING, "buggy handling of option %s", + param->key); + WARN_ON(1); + return -EINVAL; + } + + else { unsigned int set = 0; if ((param->type == fs_value_is_flag) || @@ -2624,6 +2511,7 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) ctx_clear_mount_opt(ctx, m->mount_opt); } } + return 0; } @@ -3112,7 +3000,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, for (m = ext4_mount_opts; m->token != Opt_err; m++) { int want_set = m->flags & MOPT_SET; if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) || - (m->flags & MOPT_CLEAR_ERR) || m->flags & MOPT_SKIP) + m->flags & MOPT_SKIP) continue; if (!nodefs && !(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt))) continue; /* skip if same as the default */ From 4c2467287779f744cdd70c8ec70903034d6584f0 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Mon, 20 Dec 2021 16:26:57 +0100 Subject: [PATCH 14/48] ext4: don't fail remount if journalling mode didn't change Switching to the new mount api introduced inconsistency in how the journalling mode mount option (data=) is handled during a remount. Ext4 always prevented changing the journalling mode during the remount, however the new code always fails the remount when the journalling mode is specified, even if it remains unchanged. Fix it. Signed-off-by: Lukas Czerner Reported-by: Heiner Kallweit Fixes: cebe85d570cf ("ext4: switch to the new mount api") Link: https://lore.kernel.org/r/20211220152657.101599-1-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6998c07c209a5..77ad2101e65ad 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2155,19 +2155,22 @@ static int unnote_qf_name(struct fs_context *fc, int qtype) #endif #define EXT4_SET_CTX(name) \ -static inline void ctx_set_##name(struct ext4_fs_context *ctx, int flag)\ +static inline void ctx_set_##name(struct ext4_fs_context *ctx, \ + unsigned long flag) \ { \ ctx->mask_s_##name |= flag; \ ctx->vals_s_##name |= flag; \ } \ -static inline void ctx_clear_##name(struct ext4_fs_context *ctx, int flag)\ +static inline void ctx_clear_##name(struct ext4_fs_context *ctx, \ + unsigned long flag) \ { \ ctx->mask_s_##name |= flag; \ ctx->vals_s_##name &= ~flag; \ } \ -static inline bool ctx_test_##name(struct ext4_fs_context *ctx, int flag)\ +static inline unsigned long \ +ctx_test_##name(struct ext4_fs_context *ctx, unsigned long flag) \ { \ - return ((ctx->vals_s_##name & flag) != 0); \ + return (ctx->vals_s_##name & flag); \ } \ EXT4_SET_CTX(flags); @@ -2828,7 +2831,8 @@ static int ext4_check_opt_consistency(struct fs_context *fc, "Remounting file system with no journal " "so ignoring journalled data option"); ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS); - } else if (ctx->mask_s_mount_opt & EXT4_MOUNT_DATA_FLAGS) { + } else if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS) != + test_opt(sb, DATA_FLAGS)) { ext4_msg(NULL, KERN_ERR, "Cannot change data mode " "on remount"); return -EINVAL; From 4437992be7ca3ac5dd0a62cad10357112d4fb43e Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 22 Dec 2021 11:45:16 +0100 Subject: [PATCH 15/48] ext4: remove lazytime/nolazytime mount options handled by MS_LAZYTIME The lazytime and nolazytime mount options were added temporarily back in 2015 with commit a26f49926da9 ("ext4: add optimization for the lazytime mount option"). It think it has been enough time for the util-linux with lazytime support to get widely used. Remove the mount options. Signed-off-by: Lukas Czerner Link: https://lore.kernel.org/r/20211222104517.11187-1-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 77ad2101e65ad..9c6cd5d78fea9 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1703,8 +1703,7 @@ enum { Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never, Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error, - Opt_nowarn_on_error, Opt_mblk_io_submit, - Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize, + Opt_nowarn_on_error, Opt_mblk_io_submit, Opt_debug_want_extra_isize, Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, Opt_inode_readahead_blks, Opt_journal_ioprio, Opt_dioread_nolock, Opt_dioread_lock, @@ -1818,8 +1817,6 @@ static const struct fs_parameter_spec ext4_param_specs[] = { fsparam_flag ("nodelalloc", Opt_nodelalloc), fsparam_flag ("warn_on_error", Opt_warn_on_error), fsparam_flag ("nowarn_on_error", Opt_nowarn_on_error), - fsparam_flag ("lazytime", Opt_lazytime), - fsparam_flag ("nolazytime", Opt_nolazytime), fsparam_u32 ("debug_want_extra_isize", Opt_debug_want_extra_isize), fsparam_flag ("mblk_io_submit", Opt_removed), @@ -2251,12 +2248,6 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) case Opt_i_version: ctx_set_flags(ctx, SB_I_VERSION); return 0; - case Opt_lazytime: - ctx_set_flags(ctx, SB_LAZYTIME); - return 0; - case Opt_nolazytime: - ctx_clear_flags(ctx, SB_LAZYTIME); - return 0; case Opt_inlinecrypt: #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT ctx_set_flags(ctx, SB_INLINECRYPT); @@ -6259,7 +6250,7 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb, * either way we need to make sure it matches in both *flags and * s_flags. Copy those selected flags from *flags to s_flags */ - vfs_flags = SB_LAZYTIME | SB_I_VERSION; + vfs_flags = SB_I_VERSION; sb->s_flags = (sb->s_flags & ~vfs_flags) | (*flags & vfs_flags); ext4_apply_options(fc, sb); From 960e0ab63b2e5d8476bc873743f812e9e90cd047 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 22 Dec 2021 11:45:17 +0100 Subject: [PATCH 16/48] ext4: fix i_version handling on remount i_version mount option is getting lost on remount. This is because the 'i_version' mount option differs from the util-linux mount option 'iversion', but it has exactly the same functionality. We have to specifically notify the vfs that this is what we want by setting appropriate flag in fc->sb_flags. Fix it and as a result we can remove *flags argument from __ext4_remount(); do the same for __ext4_fill_super(). In addition set out to deprecate ext4 specific 'i_version' mount option in favor or 'iversion' by kernel version 5.20. Signed-off-by: Lukas Czerner Fixes: cebe85d570cf ("ext4: switch to the new mount api") Link: https://lore.kernel.org/r/20211222104517.11187-2-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 43 ++++++++++++++++++------------------------- 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9c6cd5d78fea9..55be772f6374e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2246,6 +2246,8 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) ctx_set_mount_flags(ctx, EXT4_MF_FS_ABORTED); return 0; case Opt_i_version: + ext4_msg(NULL, KERN_WARNING, deprecated_msg, param->key, "5.20"); + ext4_msg(NULL, KERN_WARNING, "Use iversion instead\n"); ctx_set_flags(ctx, SB_I_VERSION); return 0; case Opt_inlinecrypt: @@ -2875,6 +2877,14 @@ static int ext4_apply_options(struct fs_context *fc, struct super_block *sb) sb->s_flags &= ~ctx->mask_s_flags; sb->s_flags |= ctx->vals_s_flags; + /* + * i_version differs from common mount option iversion so we have + * to let vfs know that it was set, otherwise it would get cleared + * on remount + */ + if (ctx->mask_s_flags & SB_I_VERSION) + fc->sb_flags |= SB_I_VERSION; + #define APPLY(X) ({ if (ctx->spec & EXT4_SPEC_##X) sbi->X = ctx->X; }) APPLY(s_commit_interval); APPLY(s_stripe); @@ -4342,8 +4352,7 @@ static struct ext4_sb_info *ext4_alloc_sbi(struct super_block *sb) return NULL; } -static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb, - int silent) +static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) { struct buffer_head *bh, **group_desc; struct ext4_super_block *es = NULL; @@ -4363,6 +4372,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb, int err = 0; ext4_group_t first_not_zeroed; struct ext4_fs_context *ctx = fc->fs_private; + int silent = fc->sb_flags & SB_SILENT; /* Set defaults for the variables that will be set during parsing */ ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; @@ -5540,7 +5550,7 @@ static int ext4_fill_super(struct super_block *sb, struct fs_context *fc) if (ctx->spec & EXT4_SPEC_s_sb_block) sbi->s_sb_block = ctx->s_sb_block; - ret = __ext4_fill_super(fc, sb, fc->sb_flags & SB_SILENT); + ret = __ext4_fill_super(fc, sb); if (ret < 0) goto free_sbi; @@ -6199,13 +6209,12 @@ struct ext4_mount_options { #endif }; -static int __ext4_remount(struct fs_context *fc, struct super_block *sb, - int *flags) +static int __ext4_remount(struct fs_context *fc, struct super_block *sb) { struct ext4_fs_context *ctx = fc->fs_private; struct ext4_super_block *es; struct ext4_sb_info *sbi = EXT4_SB(sb); - unsigned long old_sb_flags, vfs_flags; + unsigned long old_sb_flags; struct ext4_mount_options old_opts; ext4_group_t g; int err = 0; @@ -6245,14 +6254,6 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb, ctx->journal_ioprio = sbi->s_journal->j_task->io_context->ioprio; - /* - * Some options can be enabled by ext4 and/or by VFS mount flag - * either way we need to make sure it matches in both *flags and - * s_flags. Copy those selected flags from *flags to s_flags - */ - vfs_flags = SB_I_VERSION; - sb->s_flags = (sb->s_flags & ~vfs_flags) | (*flags & vfs_flags); - ext4_apply_options(fc, sb); if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ @@ -6306,13 +6307,13 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb, /* Flush outstanding errors before changing fs state */ flush_work(&sbi->s_error_work); - if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) { + if ((bool)(fc->sb_flags & SB_RDONLY) != sb_rdonly(sb)) { if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) { err = -EROFS; goto restore_opts; } - if (*flags & SB_RDONLY) { + if (fc->sb_flags & SB_RDONLY) { err = sync_filesystem(sb); if (err < 0) goto restore_opts; @@ -6460,13 +6461,6 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb, if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb)) ext4_stop_mmpd(sbi); - /* - * Some options can be enabled by ext4 and/or by VFS mount flag - * either way we need to make sure it matches in both *flags and - * s_flags. Copy those selected flags from s_flags to *flags - */ - *flags = (*flags & ~vfs_flags) | (sb->s_flags & vfs_flags); - return 0; restore_opts: @@ -6498,7 +6492,6 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb, static int ext4_reconfigure(struct fs_context *fc) { struct super_block *sb = fc->root->d_sb; - int flags = fc->sb_flags; int ret; fc->s_fs_info = EXT4_SB(sb); @@ -6507,7 +6500,7 @@ static int ext4_reconfigure(struct fs_context *fc) if (ret < 0) return ret; - ret = __ext4_remount(fc, sb, &flags); + ret = __ext4_remount(fc, sb); if (ret < 0) return ret; From 2729cfdcfa1cc49bef5a90d046fa4a187fdfcc69 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 23 Dec 2021 12:21:37 -0800 Subject: [PATCH 17/48] ext4: use ext4_journal_start/stop for fast commit transactions This patch drops all calls to ext4_fc_start_update() and ext4_fc_stop_update(). To ensure that there are no ongoing journal updates during fast commit, we also make jbd2_fc_begin_commit() lock journal for updates. This way we don't have to maintain two different transaction start stop APIs for fast commit and full commit. This patch doesn't remove the functions altogether since in future we want to have inode level locking for fast commits. Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20211223202140.2061101-2-harshads@google.com Signed-off-by: Theodore Ts'o --- fs/ext4/acl.c | 2 -- fs/ext4/extents.c | 3 --- fs/ext4/file.c | 4 ---- fs/ext4/inode.c | 7 +------ fs/ext4/ioctl.c | 10 +--------- fs/jbd2/journal.c | 2 ++ 6 files changed, 4 insertions(+), 24 deletions(-) diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 0613dfcbfd4aa..5a35768d6149a 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -246,7 +246,6 @@ ext4_set_acl(struct user_namespace *mnt_userns, struct inode *inode, handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits); if (IS_ERR(handle)) return PTR_ERR(handle); - ext4_fc_start_update(inode); if ((type == ACL_TYPE_ACCESS) && acl) { error = posix_acl_update_mode(mnt_userns, inode, &mode, &acl); @@ -264,7 +263,6 @@ ext4_set_acl(struct user_namespace *mnt_userns, struct inode *inode, } out_stop: ext4_journal_stop(handle); - ext4_fc_stop_update(inode); if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; return error; diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 0ecf819bf1891..703feff8cb8c9 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4697,8 +4697,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) FALLOC_FL_INSERT_RANGE)) return -EOPNOTSUPP; - ext4_fc_start_update(inode); - if (mode & FALLOC_FL_PUNCH_HOLE) { ret = ext4_punch_hole(inode, offset, len); goto exit; @@ -4762,7 +4760,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) inode_unlock(inode); trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); exit: - ext4_fc_stop_update(inode); return ret; } diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 4c5f410523514..8cc11715518ac 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -259,7 +259,6 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb, if (iocb->ki_flags & IOCB_NOWAIT) return -EOPNOTSUPP; - ext4_fc_start_update(inode); inode_lock(inode); ret = ext4_write_checks(iocb, from); if (ret <= 0) @@ -271,7 +270,6 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb, out: inode_unlock(inode); - ext4_fc_stop_update(inode); if (likely(ret > 0)) { iocb->ki_pos += ret; ret = generic_write_sync(iocb, ret); @@ -552,9 +550,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) goto out; } - ext4_fc_start_update(inode); ret = ext4_orphan_add(handle, inode); - ext4_fc_stop_update(inode); if (ret) { ext4_journal_stop(handle); goto out; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index bfd3545f1e5d9..82f555d269804 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5320,7 +5320,7 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, if (error) return error; } - ext4_fc_start_update(inode); + if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) || (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) { handle_t *handle; @@ -5344,7 +5344,6 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, if (error) { ext4_journal_stop(handle); - ext4_fc_stop_update(inode); return error; } /* Update corresponding info in inode so that everything is in @@ -5356,7 +5355,6 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, error = ext4_mark_inode_dirty(handle, inode); ext4_journal_stop(handle); if (unlikely(error)) { - ext4_fc_stop_update(inode); return error; } } @@ -5370,12 +5368,10 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); if (attr->ia_size > sbi->s_bitmap_maxbytes) { - ext4_fc_stop_update(inode); return -EFBIG; } } if (!S_ISREG(inode->i_mode)) { - ext4_fc_stop_update(inode); return -EINVAL; } @@ -5499,7 +5495,6 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, ext4_std_error(inode->i_sb, error); if (!error) error = rc; - ext4_fc_stop_update(inode); return error; } diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 606dee9e08a32..e64a12e1218a9 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -743,7 +743,6 @@ int ext4_fileattr_set(struct user_namespace *mnt_userns, u32 flags = fa->flags; int err = -EOPNOTSUPP; - ext4_fc_start_update(inode); if (flags & ~EXT4_FL_USER_VISIBLE) goto out; @@ -764,7 +763,6 @@ int ext4_fileattr_set(struct user_namespace *mnt_userns, goto out; err = ext4_ioctl_setproject(inode, fa->fsx_projid); out: - ext4_fc_stop_update(inode); return err; } @@ -1273,13 +1271,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - long ret; - - ext4_fc_start_update(file_inode(filp)); - ret = __ext4_ioctl(filp, cmd, arg); - ext4_fc_stop_update(file_inode(filp)); - - return ret; + return __ext4_ioctl(filp, cmd, arg); } #ifdef CONFIG_COMPAT diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 35302bc192eb9..0b86a4365b669 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -757,6 +757,7 @@ int jbd2_fc_begin_commit(journal_t *journal, tid_t tid) } journal->j_flags |= JBD2_FAST_COMMIT_ONGOING; write_unlock(&journal->j_state_lock); + jbd2_journal_lock_updates(journal); return 0; } @@ -768,6 +769,7 @@ EXPORT_SYMBOL(jbd2_fc_begin_commit); */ static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback) { + jbd2_journal_unlock_updates(journal); if (journal->j_fc_cleanup_callback) journal->j_fc_cleanup_callback(journal, 0); write_lock(&journal->j_state_lock); From 7bbbe241ec7ce0def9f71464c878fdbd2b0dcf37 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 23 Dec 2021 12:21:38 -0800 Subject: [PATCH 18/48] ext4: drop ineligible txn start stop APIs This patch drops ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() APIs. Fast commit ineligible transactions should simply call ext4_fc_mark_ineligible() after starting the trasaction. Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20211223202140.2061101-3-harshads@google.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 6 ++-- fs/ext4/extents.c | 6 ++-- fs/ext4/fast_commit.c | 79 ++++++++----------------------------------- fs/ext4/ioctl.c | 3 +- fs/ext4/super.c | 1 - 5 files changed, 20 insertions(+), 75 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 404dd50856e5d..d71485d53050c 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1725,9 +1725,9 @@ struct ext4_sb_info { */ struct work_struct s_error_work; - /* Ext4 fast commit stuff */ + /* Ext4 fast commit sub transaction ID */ atomic_t s_fc_subtid; - atomic_t s_fc_ineligible_updates; + /* * After commit starts, the main queue gets locked, and the further * updates get added in the staging queue. @@ -2926,8 +2926,6 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode, void ext4_fc_track_create(handle_t *handle, struct dentry *dentry); void ext4_fc_track_inode(handle_t *handle, struct inode *inode); void ext4_fc_mark_ineligible(struct super_block *sb, int reason); -void ext4_fc_start_ineligible(struct super_block *sb, int reason); -void ext4_fc_stop_ineligible(struct super_block *sb); void ext4_fc_start_update(struct inode *inode); void ext4_fc_stop_update(struct inode *inode); void ext4_fc_del(struct inode *inode); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 703feff8cb8c9..38111ea18ae18 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5341,7 +5341,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) ret = PTR_ERR(handle); goto out_mmap; } - ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE); + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE); down_write(&EXT4_I(inode)->i_data_sem); ext4_discard_preallocations(inode, 0); @@ -5380,7 +5380,6 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) out_stop: ext4_journal_stop(handle); - ext4_fc_stop_ineligible(sb); out_mmap: filemap_invalidate_unlock(mapping); out_mutex: @@ -5482,7 +5481,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) ret = PTR_ERR(handle); goto out_mmap; } - ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE); + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE); /* Expand file to avoid data loss if there is error while shifting */ inode->i_size += len; @@ -5557,7 +5556,6 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) out_stop: ext4_journal_stop(handle); - ext4_fc_stop_ineligible(sb); out_mmap: filemap_invalidate_unlock(mapping); out_mutex: diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 0f32b445582ab..2771adefdba04 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -65,21 +65,11 @@ * * Fast Commit Ineligibility * ------------------------- - * Not all operations are supported by fast commits today (e.g extended - * attributes). Fast commit ineligibility is marked by calling one of the - * two following functions: - * - * - ext4_fc_mark_ineligible(): This makes next fast commit operation to fall - * back to full commit. This is useful in case of transient errors. * - * - ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() - This makes all - * the fast commits happening between ext4_fc_start_ineligible() and - * ext4_fc_stop_ineligible() and one fast commit after the call to - * ext4_fc_stop_ineligible() to fall back to full commits. It is important to - * make one more fast commit to fall back to full commit after stop call so - * that it guaranteed that the fast commit ineligible operation contained - * within ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() is - * followed by at least 1 full commit. + * Not all operations are supported by fast commits today (e.g extended + * attributes). Fast commit ineligibility is marked by calling + * ext4_fc_mark_ineligible(): This makes next fast commit operation to fall back + * to full commit. * * Atomicity of commits * -------------------- @@ -328,44 +318,6 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason) sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; } -/* - * Start a fast commit ineligible update. Any commits that happen while - * such an operation is in progress fall back to full commits. - */ -void ext4_fc_start_ineligible(struct super_block *sb, int reason) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - - if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || - (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) - return; - - WARN_ON(reason >= EXT4_FC_REASON_MAX); - sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; - atomic_inc(&sbi->s_fc_ineligible_updates); -} - -/* - * Stop a fast commit ineligible update. We set EXT4_MF_FC_INELIGIBLE flag here - * to ensure that after stopping the ineligible update, at least one full - * commit takes place. - */ -void ext4_fc_stop_ineligible(struct super_block *sb) -{ - if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || - (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) - return; - - ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); - atomic_dec(&EXT4_SB(sb)->s_fc_ineligible_updates); -} - -static inline int ext4_fc_is_ineligible(struct super_block *sb) -{ - return (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE) || - atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates)); -} - /* * Generic fast commit tracking function. If this is the first time this we are * called after a full commit, we initialize fast commit fields and then call @@ -391,7 +343,7 @@ static int ext4_fc_track_template( (sbi->s_mount_state & EXT4_FC_REPLAY)) return -EOPNOTSUPP; - if (ext4_fc_is_ineligible(inode->i_sb)) + if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) return -EINVAL; tid = handle->h_transaction->t_tid; @@ -1142,11 +1094,8 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid) start_time = ktime_get(); - if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || - (ext4_fc_is_ineligible(sb))) { - reason = EXT4_FC_REASON_INELIGIBLE; - goto out; - } + if (!test_opt2(sb, JOURNAL_FAST_COMMIT)) + return jbd2_complete_transaction(journal, commit_tid); restart_fc: ret = jbd2_fc_begin_commit(journal, commit_tid); @@ -1162,6 +1111,14 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid) reason = EXT4_FC_REASON_FC_START_FAILED; goto out; } + /* + * After establishing journal barrier via jbd2_fc_begin_commit(), check + * if we are fast commit ineligible. + */ + if (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE)) { + reason = EXT4_FC_REASON_INELIGIBLE; + goto out; + } fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize; ret = ext4_fc_perform_commit(journal); @@ -1180,12 +1137,6 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid) atomic_inc(&sbi->s_fc_subtid); jbd2_fc_end_commit(journal); out: - /* Has any ineligible update happened since we started? */ - if (reason == EXT4_FC_REASON_OK && ext4_fc_is_ineligible(sb)) { - sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; - reason = EXT4_FC_REASON_INELIGIBLE; - } - spin_lock(&sbi->s_fc_lock); if (reason != EXT4_FC_REASON_OK && reason != EXT4_FC_REASON_ALREADY_COMMITTED) { diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index e64a12e1218a9..1366afb59fba2 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -169,7 +169,7 @@ static long swap_inode_boot_loader(struct super_block *sb, err = -EINVAL; goto err_out; } - ext4_fc_start_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT); + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT); /* Protect extent tree against block allocations via delalloc */ ext4_double_down_write_data_sem(inode, inode_bl); @@ -252,7 +252,6 @@ static long swap_inode_boot_loader(struct super_block *sb, err_out1: ext4_journal_stop(handle); - ext4_fc_stop_ineligible(sb); ext4_double_up_write_data_sem(inode, inode_bl); err_out: diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 55be772f6374e..acdfd9c0d0910 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5074,7 +5074,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) /* Initialize fast commit stuff */ atomic_set(&sbi->s_fc_subtid, 0); - atomic_set(&sbi->s_fc_ineligible_updates, 0); INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_MAIN]); INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_STAGING]); INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]); From 0915e464cb274648e1ef1663e1356e53ff400983 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 23 Dec 2021 12:21:39 -0800 Subject: [PATCH 19/48] ext4: simplify updating of fast commit stats Move fast commit stats updating logic to a separate function from ext4_fc_commit(). This significantly improves readability of ext4_fc_commit(). Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20211223202140.2061101-4-harshads@google.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 1 - fs/ext4/fast_commit.c | 99 +++++++++++++++++++++++-------------------- fs/ext4/fast_commit.h | 27 ++++++------ 3 files changed, 68 insertions(+), 59 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index d71485d53050c..82fa51d6f1454 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1747,7 +1747,6 @@ struct ext4_sb_info { spinlock_t s_fc_lock; struct buffer_head *s_fc_bh; struct ext4_fc_stats s_fc_stats; - u64 s_fc_avg_commit_time; #ifdef CONFIG_EXT4_DEBUG int s_fc_debug_max_replay; #endif diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 2771adefdba04..a37384054c9e0 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1075,6 +1075,32 @@ static int ext4_fc_perform_commit(journal_t *journal) return ret; } +static void ext4_fc_update_stats(struct super_block *sb, int status, + u64 commit_time, int nblks) +{ + struct ext4_fc_stats *stats = &EXT4_SB(sb)->s_fc_stats; + + jbd_debug(1, "Fast commit ended with status = %d", status); + if (status == EXT4_FC_STATUS_OK) { + stats->fc_num_commits++; + stats->fc_numblks += nblks; + if (likely(stats->s_fc_avg_commit_time)) + stats->s_fc_avg_commit_time = + (commit_time + + stats->s_fc_avg_commit_time * 3) / 4; + else + stats->s_fc_avg_commit_time = commit_time; + } else if (status == EXT4_FC_STATUS_FAILED || + status == EXT4_FC_STATUS_INELIGIBLE) { + if (status == EXT4_FC_STATUS_FAILED) + stats->fc_failed_commits++; + stats->fc_ineligible_commits++; + } else { + stats->fc_skipped_commits++; + } + trace_ext4_fc_commit_stop(sb, nblks, status); +} + /* * The main commit entry point. Performs a fast commit for transaction * commit_tid if needed. If it's not possible to perform a fast commit @@ -1087,7 +1113,7 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid) struct ext4_sb_info *sbi = EXT4_SB(sb); int nblks = 0, ret, bsize = journal->j_blocksize; int subtid = atomic_read(&sbi->s_fc_subtid); - int reason = EXT4_FC_REASON_OK, fc_bufs_before = 0; + int status = EXT4_FC_STATUS_OK, fc_bufs_before = 0; ktime_t start_time, commit_time; trace_ext4_fc_commit_start(sb); @@ -1104,69 +1130,52 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid) if (atomic_read(&sbi->s_fc_subtid) <= subtid && commit_tid > journal->j_commit_sequence) goto restart_fc; - reason = EXT4_FC_REASON_ALREADY_COMMITTED; - goto out; + ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0); + return 0; } else if (ret) { - sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; - reason = EXT4_FC_REASON_FC_START_FAILED; - goto out; + /* + * Commit couldn't start. Just update stats and perform a + * full commit. + */ + ext4_fc_update_stats(sb, EXT4_FC_STATUS_FAILED, 0, 0); + return jbd2_complete_transaction(journal, commit_tid); } + /* * After establishing journal barrier via jbd2_fc_begin_commit(), check * if we are fast commit ineligible. */ if (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE)) { - reason = EXT4_FC_REASON_INELIGIBLE; - goto out; + status = EXT4_FC_STATUS_INELIGIBLE; + goto fallback; } fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize; ret = ext4_fc_perform_commit(journal); if (ret < 0) { - sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; - reason = EXT4_FC_REASON_FC_FAILED; - goto out; + status = EXT4_FC_STATUS_FAILED; + goto fallback; } nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before; ret = jbd2_fc_wait_bufs(journal, nblks); if (ret < 0) { - sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; - reason = EXT4_FC_REASON_FC_FAILED; - goto out; + status = EXT4_FC_STATUS_FAILED; + goto fallback; } atomic_inc(&sbi->s_fc_subtid); - jbd2_fc_end_commit(journal); -out: - spin_lock(&sbi->s_fc_lock); - if (reason != EXT4_FC_REASON_OK && - reason != EXT4_FC_REASON_ALREADY_COMMITTED) { - sbi->s_fc_stats.fc_ineligible_commits++; - } else { - sbi->s_fc_stats.fc_num_commits++; - sbi->s_fc_stats.fc_numblks += nblks; - } - spin_unlock(&sbi->s_fc_lock); - nblks = (reason == EXT4_FC_REASON_OK) ? nblks : 0; - trace_ext4_fc_commit_stop(sb, nblks, reason); - commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); + ret = jbd2_fc_end_commit(journal); /* - * weight the commit time higher than the average time so we don't - * react too strongly to vast changes in the commit time + * weight the commit time higher than the average time so we + * don't react too strongly to vast changes in the commit time */ - if (likely(sbi->s_fc_avg_commit_time)) - sbi->s_fc_avg_commit_time = (commit_time + - sbi->s_fc_avg_commit_time * 3) / 4; - else - sbi->s_fc_avg_commit_time = commit_time; - jbd_debug(1, - "Fast commit ended with blks = %d, reason = %d, subtid - %d", - nblks, reason, subtid); - if (reason == EXT4_FC_REASON_FC_FAILED) - return jbd2_fc_end_commit_fallback(journal); - if (reason == EXT4_FC_REASON_FC_START_FAILED || - reason == EXT4_FC_REASON_INELIGIBLE) - return jbd2_complete_transaction(journal, commit_tid); - return 0; + commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); + ext4_fc_update_stats(sb, status, commit_time, nblks); + return ret; + +fallback: + ret = jbd2_fc_end_commit_fallback(journal); + ext4_fc_update_stats(sb, status, 0, 0); + return ret; } /* @@ -2124,7 +2133,7 @@ int ext4_fc_info_show(struct seq_file *seq, void *v) "fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n", stats->fc_num_commits, stats->fc_ineligible_commits, stats->fc_numblks, - div_u64(sbi->s_fc_avg_commit_time, 1000)); + div_u64(stats->s_fc_avg_commit_time, 1000)); seq_puts(seq, "Ineligible reasons:\n"); for (i = 0; i < EXT4_FC_REASON_MAX; i++) seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i], diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h index 937c381b4c85e..083ad1cb705a7 100644 --- a/fs/ext4/fast_commit.h +++ b/fs/ext4/fast_commit.h @@ -71,21 +71,19 @@ struct ext4_fc_tail { }; /* - * Fast commit reason codes + * Fast commit status codes + */ +enum { + EXT4_FC_STATUS_OK = 0, + EXT4_FC_STATUS_INELIGIBLE, + EXT4_FC_STATUS_SKIPPED, + EXT4_FC_STATUS_FAILED, +}; + +/* + * Fast commit ineligiblity reasons: */ enum { - /* - * Commit status codes: - */ - EXT4_FC_REASON_OK = 0, - EXT4_FC_REASON_INELIGIBLE, - EXT4_FC_REASON_ALREADY_COMMITTED, - EXT4_FC_REASON_FC_START_FAILED, - EXT4_FC_REASON_FC_FAILED, - - /* - * Fast commit ineligiblity reasons: - */ EXT4_FC_REASON_XATTR = 0, EXT4_FC_REASON_CROSS_RENAME, EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, @@ -117,7 +115,10 @@ struct ext4_fc_stats { unsigned int fc_ineligible_reason_count[EXT4_FC_REASON_MAX]; unsigned long fc_num_commits; unsigned long fc_ineligible_commits; + unsigned long fc_failed_commits; + unsigned long fc_skipped_commits; unsigned long fc_numblks; + u64 s_fc_avg_commit_time; }; #define EXT4_FC_REPLAY_REALLOC_INCREMENT 4 From d1199b94474ac4513b8491a4b751a8a466e1886b Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 23 Dec 2021 12:21:40 -0800 Subject: [PATCH 20/48] ext4: update fast commit TODOs This series takes care of a couple of TODOs and adds new ones. Update the TODOs section to reflect current state and future work that needs to happen. Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20211223202140.2061101-5-harshads@google.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index a37384054c9e0..dd002facf6c95 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -156,15 +156,13 @@ * fast commit recovery even if that area is invalidated by later full * commits. * - * 1) Make fast commit atomic updates more fine grained. Today, a fast commit - * eligible update must be protected within ext4_fc_start_update() and - * ext4_fc_stop_update(). These routines are called at much higher - * routines. This can be made more fine grained by combining with - * ext4_journal_start(). + * 1) Fast commit's commit path locks the entire file system during fast + * commit. This has significant performance penalty. Instead of that, we + * should use ext4_fc_start/stop_update functions to start inode level + * updates from ext4_journal_start/stop. Once we do that we can drop file + * system locking during commit path. * - * 2) Same above for ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() - * - * 3) Handle more ineligible cases. + * 2) Handle more ineligible cases. */ #include From 5e4d0eba1ccaf19f93222abdeda5a368be141785 Mon Sep 17 00:00:00 2001 From: Xin Yin Date: Tue, 21 Dec 2021 10:28:39 +0800 Subject: [PATCH 21/48] ext4: fix fast commit may miss tracking range for FALLOC_FL_ZERO_RANGE when call falloc with FALLOC_FL_ZERO_RANGE, to set an range to unwritten, which has been already initialized. If the range is align to blocksize, fast commit will not track range for this change. Also track range for unwritten range in ext4_map_blocks(). Signed-off-by: Xin Yin Reviewed-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20211221022839.374606-1-yinxin.x@bytedance.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/extents.c | 2 -- fs/ext4/inode.c | 7 ++++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 38111ea18ae18..c3e76a5de6615 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4647,8 +4647,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, ret = ext4_mark_inode_dirty(handle, inode); if (unlikely(ret)) goto out_handle; - ext4_fc_track_range(handle, inode, offset >> inode->i_sb->s_blocksize_bits, - (offset + len - 1) >> inode->i_sb->s_blocksize_bits); /* Zero out partial block at the edges of the range */ ret = ext4_zero_partial_blocks(handle, inode, offset, len); if (ret >= 0) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 82f555d269804..4895909de21bf 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -741,10 +741,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, if (ret) return ret; } - ext4_fc_track_range(handle, inode, map->m_lblk, - map->m_lblk + map->m_len - 1); } - + if (retval > 0 && (map->m_flags & EXT4_MAP_UNWRITTEN || + map->m_flags & EXT4_MAP_MAPPED)) + ext4_fc_track_range(handle, inode, map->m_lblk, + map->m_lblk + map->m_len - 1); if (retval < 0) ext_debug(inode, "failed with err %d\n", retval); return retval; From 0b5b5a62b945a141e64011b2f90ee7e46f14be98 Mon Sep 17 00:00:00 2001 From: Xin Yin Date: Thu, 23 Dec 2021 11:23:36 +0800 Subject: [PATCH 22/48] ext4: use ext4_ext_remove_space() for fast commit replay delete range For now ,we use ext4_punch_hole() during fast commit replay delete range procedure. But it will be affected by inode->i_size, which may not correct during fast commit replay procedure. The following test will failed. -create & write foo (len 1000K) -falloc FALLOC_FL_ZERO_RANGE foo (range 400K - 600K) -create & fsync bar -falloc FALLOC_FL_PUNCH_HOLE foo (range 300K-500K) -fsync foo -crash before a full commit After the fast_commit reply procedure, the range 400K-500K will not be removed. Because in this case, when calling ext4_punch_hole() the inode->i_size is 0, and it just retruns with doing nothing. Change to use ext4_ext_remove_space() instead of ext4_punch_hole() to remove blocks of inode directly. Signed-off-by: Xin Yin Reviewed-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20211223032337.5198-2-yinxin.x@bytedance.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/fast_commit.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index dd002facf6c95..28ddeb1d6afb9 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1770,11 +1770,14 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl, } } - ret = ext4_punch_hole(inode, - le32_to_cpu(lrange.fc_lblk) << sb->s_blocksize_bits, - le32_to_cpu(lrange.fc_len) << sb->s_blocksize_bits); - if (ret) - jbd_debug(1, "ext4_punch_hole returned %d", ret); + down_write(&EXT4_I(inode)->i_data_sem); + ret = ext4_ext_remove_space(inode, lrange.fc_lblk, + lrange.fc_lblk + lrange.fc_len - 1); + up_write(&EXT4_I(inode)->i_data_sem); + if (ret) { + iput(inode); + return 0; + } ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >> sb->s_blocksize_bits); ext4_mark_inode_dirty(NULL, inode); From 9725958bb75cdfa10f2ec11526fdb23e7485e8e4 Mon Sep 17 00:00:00 2001 From: Xin Yin Date: Thu, 23 Dec 2021 11:23:37 +0800 Subject: [PATCH 23/48] ext4: fast commit may miss tracking unwritten range during ftruncate If use FALLOC_FL_KEEP_SIZE to alloc unwritten range at bottom, the inode->i_size will not include the unwritten range. When call ftruncate with fast commit enabled, it will miss to track the unwritten range. Change to trace the full range during ftruncate. Signed-off-by: Xin Yin Reviewed-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20211223032337.5198-3-yinxin.x@bytedance.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4895909de21bf..08a90e25b78bc 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5424,8 +5424,7 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, ext4_fc_track_range(handle, inode, (attr->ia_size > 0 ? attr->ia_size - 1 : 0) >> inode->i_sb->s_blocksize_bits, - (oldsize > 0 ? oldsize - 1 : 0) >> - inode->i_sb->s_blocksize_bits); + EXT_MAX_BLOCKS - 1); else ext4_fc_track_range( handle, inode, From ab047d516dea72f011c15c04a929851e4d053109 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 23 Dec 2021 17:44:36 +0100 Subject: [PATCH 24/48] ext4: destroy ext4_fc_dentry_cachep kmemcache on module removal The kmemcache for ext4_fc_dentry_cachep remains registered after module removal. Destroy ext4_fc_dentry_cachep kmemcache on module removal. Fixes: aa75f4d3daaeb ("ext4: main fast-commit commit path") Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Lukas Czerner Reviewed-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20211110134640.lyku5vklvdndw6uk@linutronix.de Link: https://lore.kernel.org/r/YbiK3JetFFl08bd7@linutronix.de Link: https://lore.kernel.org/r/20211223164436.2628390-1-bigeasy@linutronix.de Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/ext4.h | 1 + fs/ext4/fast_commit.c | 5 +++++ fs/ext4/super.c | 2 ++ 3 files changed, 8 insertions(+) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 82fa51d6f1454..714201fa9e6f1 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2932,6 +2932,7 @@ bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t block); void ext4_fc_replay_cleanup(struct super_block *sb); int ext4_fc_commit(journal_t *journal, tid_t commit_tid); int __init ext4_fc_init_dentry_cache(void); +void ext4_fc_destroy_dentry_cache(void); /* mballoc.c */ extern const struct seq_operations ext4_mb_seq_groups_ops; diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 28ddeb1d6afb9..a6d647325742f 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -2153,3 +2153,8 @@ int __init ext4_fc_init_dentry_cache(void) return 0; } + +void ext4_fc_destroy_dentry_cache(void) +{ + kmem_cache_destroy(ext4_fc_dentry_cachep); +} diff --git a/fs/ext4/super.c b/fs/ext4/super.c index acdfd9c0d0910..499d1734818dd 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -7118,6 +7118,7 @@ static int __init ext4_init_fs(void) out: unregister_as_ext2(); unregister_as_ext3(); + ext4_fc_destroy_dentry_cache(); out05: destroy_inodecache(); out1: @@ -7144,6 +7145,7 @@ static void __exit ext4_exit_fs(void) unregister_as_ext2(); unregister_as_ext3(); unregister_filesystem(&ext4_fs_type); + ext4_fc_destroy_dentry_cache(); destroy_inodecache(); ext4_exit_mballoc(); ext4_exit_sysfs(); From 380a0091cab482489e9b19e07f2a166ad2b76d5c Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Thu, 23 Dec 2021 09:55:06 +0800 Subject: [PATCH 25/48] ext4: Fix BUG_ON in ext4_bread when write quota data We got issue as follows when run syzkaller: [ 167.936972] EXT4-fs error (device loop0): __ext4_remount:6314: comm rep: Abort forced by user [ 167.938306] EXT4-fs (loop0): Remounting filesystem read-only [ 167.981637] Assertion failure in ext4_getblk() at fs/ext4/inode.c:847: '(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) || handle != NULL || create == 0' [ 167.983601] ------------[ cut here ]------------ [ 167.984245] kernel BUG at fs/ext4/inode.c:847! [ 167.984882] invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI [ 167.985624] CPU: 7 PID: 2290 Comm: rep Tainted: G B 5.16.0-rc5-next-20211217+ #123 [ 167.986823] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc64le-16.ppc.fedoraproject.org-3.fc31 04/01/2014 [ 167.988590] RIP: 0010:ext4_getblk+0x17e/0x504 [ 167.989189] Code: c6 01 74 28 49 c7 c0 a0 a3 5c 9b b9 4f 03 00 00 48 c7 c2 80 9c 5c 9b 48 c7 c6 40 b6 5c 9b 48 c7 c7 20 a4 5c 9b e8 77 e3 fd ff <0f> 0b 8b 04 244 [ 167.991679] RSP: 0018:ffff8881736f7398 EFLAGS: 00010282 [ 167.992385] RAX: 0000000000000094 RBX: 1ffff1102e6dee75 RCX: 0000000000000000 [ 167.993337] RDX: 0000000000000001 RSI: ffffffff9b6e29e0 RDI: ffffed102e6dee66 [ 167.994292] RBP: ffff88816a076210 R08: 0000000000000094 R09: ffffed107363fa09 [ 167.995252] R10: ffff88839b1fd047 R11: ffffed107363fa08 R12: ffff88816a0761e8 [ 167.996205] R13: 0000000000000000 R14: 0000000000000021 R15: 0000000000000001 [ 167.997158] FS: 00007f6a1428c740(0000) GS:ffff88839b000000(0000) knlGS:0000000000000000 [ 167.998238] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 167.999025] CR2: 00007f6a140716c8 CR3: 0000000133216000 CR4: 00000000000006e0 [ 167.999987] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 168.000944] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 168.001899] Call Trace: [ 168.002235] [ 168.007167] ext4_bread+0xd/0x53 [ 168.007612] ext4_quota_write+0x20c/0x5c0 [ 168.010457] write_blk+0x100/0x220 [ 168.010944] remove_free_dqentry+0x1c6/0x440 [ 168.011525] free_dqentry.isra.0+0x565/0x830 [ 168.012133] remove_tree+0x318/0x6d0 [ 168.014744] remove_tree+0x1eb/0x6d0 [ 168.017346] remove_tree+0x1eb/0x6d0 [ 168.019969] remove_tree+0x1eb/0x6d0 [ 168.022128] qtree_release_dquot+0x291/0x340 [ 168.023297] v2_release_dquot+0xce/0x120 [ 168.023847] dquot_release+0x197/0x3e0 [ 168.024358] ext4_release_dquot+0x22a/0x2d0 [ 168.024932] dqput.part.0+0x1c9/0x900 [ 168.025430] __dquot_drop+0x120/0x190 [ 168.025942] ext4_clear_inode+0x86/0x220 [ 168.026472] ext4_evict_inode+0x9e8/0xa22 [ 168.028200] evict+0x29e/0x4f0 [ 168.028625] dispose_list+0x102/0x1f0 [ 168.029148] evict_inodes+0x2c1/0x3e0 [ 168.030188] generic_shutdown_super+0xa4/0x3b0 [ 168.030817] kill_block_super+0x95/0xd0 [ 168.031360] deactivate_locked_super+0x85/0xd0 [ 168.031977] cleanup_mnt+0x2bc/0x480 [ 168.033062] task_work_run+0xd1/0x170 [ 168.033565] do_exit+0xa4f/0x2b50 [ 168.037155] do_group_exit+0xef/0x2d0 [ 168.037666] __x64_sys_exit_group+0x3a/0x50 [ 168.038237] do_syscall_64+0x3b/0x90 [ 168.038751] entry_SYSCALL_64_after_hwframe+0x44/0xae In order to reproduce this problem, the following conditions need to be met: 1. Ext4 filesystem with no journal; 2. Filesystem image with incorrect quota data; 3. Abort filesystem forced by user; 4. umount filesystem; As in ext4_quota_write: ... if (EXT4_SB(sb)->s_journal && !handle) { ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" " cancelled because transaction is not started", (unsigned long long)off, (unsigned long long)len); return -EIO; } ... We only check handle if NULL when filesystem has journal. There is need check handle if NULL even when filesystem has no journal. Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20211223015506.297766-1-yebin10@huawei.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 499d1734818dd..b72f8f6084e48 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -6940,7 +6940,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, struct buffer_head *bh; handle_t *handle = journal_current_handle(); - if (EXT4_SB(sb)->s_journal && !handle) { + if (!handle) { ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" " cancelled because transaction is not started", (unsigned long long)off, (unsigned long long)len); From 15fc69bbbbbc8c72e5f6cc4e1be0f51283c5448e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 7 Oct 2021 17:53:35 +0200 Subject: [PATCH 26/48] ext4: make sure quota gets properly shutdown on error When we hit an error when enabling quotas and setting inode flags, we do not properly shutdown quota subsystem despite returning error from Q_QUOTAON quotactl. This can lead to some odd situations like kernel using quota file while it is still writeable for userspace. Make sure we properly cleanup the quota subsystem in case of error. Signed-off-by: Jan Kara Cc: stable@kernel.org Link: https://lore.kernel.org/r/20211007155336.12493-2-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b72f8f6084e48..863a3eae505a3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -6749,10 +6749,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA); err = dquot_quota_on(sb, type, format_id, path); - if (err) { - lockdep_set_quota_inode(path->dentry->d_inode, - I_DATA_SEM_NORMAL); - } else { + if (!err) { struct inode *inode = d_inode(path->dentry); handle_t *handle; @@ -6772,7 +6769,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, ext4_journal_stop(handle); unlock_inode: inode_unlock(inode); + if (err) + dquot_quota_off(sb, type); } + if (err) + lockdep_set_quota_inode(path->dentry->d_inode, + I_DATA_SEM_NORMAL); return err; } From 4013d47a5307fdb5c13370b5392498b00fedd274 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 7 Oct 2021 17:53:36 +0200 Subject: [PATCH 27/48] ext4: make sure to reset inode lockdep class when quota enabling fails When we succeed in enabling some quota type but fail to enable another one with quota feature, we correctly disable all enabled quota types. However we forget to reset i_data_sem lockdep class. When the inode gets freed and reused, it will inherit this lockdep class (i_data_sem is initialized only when a slab is created) and thus eventually lockdep barfs about possible deadlocks. Reported-and-tested-by: syzbot+3b6f9218b1301ddda3e2@syzkaller.appspotmail.com Signed-off-by: Jan Kara Cc: stable@kernel.org Link: https://lore.kernel.org/r/20211007155336.12493-3-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 863a3eae505a3..1b55f234e0064 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -6837,8 +6837,19 @@ int ext4_enable_quotas(struct super_block *sb) "Failed to enable quota tracking " "(type=%d, err=%d). Please run " "e2fsck to fix.", type, err); - for (type--; type >= 0; type--) + for (type--; type >= 0; type--) { + struct inode *inode; + + inode = sb_dqopt(sb)->files[type]; + if (inode) + inode = igrab(inode); dquot_quota_off(sb, type); + if (inode) { + lockdep_set_quota_inode(inode, + I_DATA_SEM_NORMAL); + iput(inode); + } + } return err; } From dfac1a167068d60b36cc8f2081394a28b6fc424b Mon Sep 17 00:00:00 2001 From: Qing Wang Date: Tue, 12 Oct 2021 20:28:51 -0700 Subject: [PATCH 28/48] ext4: replace snprintf in show functions with sysfs_emit coccicheck complains about the use of snprintf() in sysfs show functions. Fix the coccicheck warning: WARNING: use scnprintf or sprintf. Use sysfs_emit instead of scnprintf or sprintf makes more sense. Signed-off-by: Qing Wang Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/1634095731-4528-1-git-send-email-wangqing@vivo.com Signed-off-by: Theodore Ts'o --- fs/ext4/sysfs.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 2314f74465924..2a4ae3d105716 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -63,7 +63,7 @@ static ssize_t session_write_kbytes_show(struct ext4_sb_info *sbi, char *buf) { struct super_block *sb = sbi->s_buddy_cache->i_sb; - return snprintf(buf, PAGE_SIZE, "%lu\n", + return sysfs_emit(buf, "%lu\n", (part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) - sbi->s_sectors_written_start) >> 1); } @@ -72,7 +72,7 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_sb_info *sbi, char *buf) { struct super_block *sb = sbi->s_buddy_cache->i_sb; - return snprintf(buf, PAGE_SIZE, "%llu\n", + return sysfs_emit(buf, "%llu\n", (unsigned long long)(sbi->s_kbytes_written + ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) - EXT4_SB(sb)->s_sectors_written_start) >> 1))); @@ -130,8 +130,8 @@ static ssize_t trigger_test_error(struct ext4_sb_info *sbi, static ssize_t journal_task_show(struct ext4_sb_info *sbi, char *buf) { if (!sbi->s_journal) - return snprintf(buf, PAGE_SIZE, "\n"); - return snprintf(buf, PAGE_SIZE, "%d\n", + return sysfs_emit(buf, "\n"); + return sysfs_emit(buf, "%d\n", task_pid_vnr(sbi->s_journal->j_task)); } @@ -357,7 +357,7 @@ static void *calc_ptr(struct ext4_attr *a, struct ext4_sb_info *sbi) static ssize_t __print_tstamp(char *buf, __le32 lo, __u8 hi) { - return snprintf(buf, PAGE_SIZE, "%lld\n", + return sysfs_emit(buf, "%lld\n", ((time64_t)hi << 32) + le32_to_cpu(lo)); } @@ -374,7 +374,7 @@ static ssize_t ext4_attr_show(struct kobject *kobj, switch (a->attr_id) { case attr_delayed_allocation_blocks: - return snprintf(buf, PAGE_SIZE, "%llu\n", + return sysfs_emit(buf, "%llu\n", (s64) EXT4_C2B(sbi, percpu_counter_sum(&sbi->s_dirtyclusters_counter))); case attr_session_write_kbytes: @@ -382,11 +382,11 @@ static ssize_t ext4_attr_show(struct kobject *kobj, case attr_lifetime_write_kbytes: return lifetime_write_kbytes_show(sbi, buf); case attr_reserved_clusters: - return snprintf(buf, PAGE_SIZE, "%llu\n", + return sysfs_emit(buf, "%llu\n", (unsigned long long) atomic64_read(&sbi->s_resv_clusters)); case attr_sra_exceeded_retry_limit: - return snprintf(buf, PAGE_SIZE, "%llu\n", + return sysfs_emit(buf, "%llu\n", (unsigned long long) percpu_counter_sum(&sbi->s_sra_exceeded_retry_limit)); case attr_inode_readahead: @@ -394,42 +394,42 @@ static ssize_t ext4_attr_show(struct kobject *kobj, if (!ptr) return 0; if (a->attr_ptr == ptr_ext4_super_block_offset) - return snprintf(buf, PAGE_SIZE, "%u\n", + return sysfs_emit(buf, "%u\n", le32_to_cpup(ptr)); else - return snprintf(buf, PAGE_SIZE, "%u\n", + return sysfs_emit(buf, "%u\n", *((unsigned int *) ptr)); case attr_pointer_ul: if (!ptr) return 0; - return snprintf(buf, PAGE_SIZE, "%lu\n", + return sysfs_emit(buf, "%lu\n", *((unsigned long *) ptr)); case attr_pointer_u8: if (!ptr) return 0; - return snprintf(buf, PAGE_SIZE, "%u\n", + return sysfs_emit(buf, "%u\n", *((unsigned char *) ptr)); case attr_pointer_u64: if (!ptr) return 0; if (a->attr_ptr == ptr_ext4_super_block_offset) - return snprintf(buf, PAGE_SIZE, "%llu\n", + return sysfs_emit(buf, "%llu\n", le64_to_cpup(ptr)); else - return snprintf(buf, PAGE_SIZE, "%llu\n", + return sysfs_emit(buf, "%llu\n", *((unsigned long long *) ptr)); case attr_pointer_string: if (!ptr) return 0; - return snprintf(buf, PAGE_SIZE, "%.*s\n", a->attr_size, + return sysfs_emit(buf, "%.*s\n", a->attr_size, (char *) ptr); case attr_pointer_atomic: if (!ptr) return 0; - return snprintf(buf, PAGE_SIZE, "%d\n", + return sysfs_emit(buf, "%d\n", atomic_read((atomic_t *) ptr)); case attr_feature: - return snprintf(buf, PAGE_SIZE, "supported\n"); + return sysfs_emit(buf, "supported\n"); case attr_first_error_time: return print_tstamp(buf, sbi->s_es, s_first_error_time); case attr_last_error_time: From 8c80fb312d7abf8bcd66cca1d843a80318a2c522 Mon Sep 17 00:00:00 2001 From: Chunguang Xu Date: Tue, 23 Nov 2021 09:17:57 +0800 Subject: [PATCH 29/48] ext4: fix a possible ABBA deadlock due to busy PA We found on older kernel (3.10) that in the scenario of insufficient disk space, system may trigger an ABBA deadlock problem, it seems that this problem still exists in latest kernel, try to fix it here. The main process triggered by this problem is that task A occupies the PA and waits for the jbd2 transaction finish, the jbd2 transaction waits for the completion of task B's IO (plug_list), but task B waits for the release of PA by task A to finish discard, which indirectly forms an ABBA deadlock. The related calltrace is as follows: Task A vfs_write ext4_mb_new_blocks() ext4_mb_mark_diskspace_used() JBD2 jbd2_journal_get_write_access() -> jbd2_journal_commit_transaction() ->schedule() filemap_fdatawait() | | | Task B | | do_unlinkat() | | ext4_evict_inode() | | jbd2_journal_begin_ordered_truncate() | | filemap_fdatawrite_range() | | ext4_mb_new_blocks() | -ext4_mb_discard_group_preallocations() <----- Here, try to cancel ext4_mb_discard_group_preallocations() internal retry due to PA busy, and do a limited number of retries inside ext4_mb_discard_preallocations(), which can circumvent the above problems, but also has some advantages: 1. Since the PA is in a busy state, if other groups have free PAs, keeping the current PA may help to reduce fragmentation. 2. Continue to traverse forward instead of waiting for the current group PA to be released. In most scenarios, the PA discard time can be reduced. However, in the case of smaller free space, if only a few groups have space, then due to multiple traversals of the group, it may increase CPU overhead. But in contrast, I feel that the overall benefit is better than the cost. Signed-off-by: Chunguang Xu Reported-by: kernel test robot Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/1637630277-23496-1-git-send-email-brookxu.cn@gmail.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/mballoc.c | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 215b7068f548a..3dd9b9e2f9675 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4814,7 +4814,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, */ static noinline_for_stack int ext4_mb_discard_group_preallocations(struct super_block *sb, - ext4_group_t group, int needed) + ext4_group_t group, int *busy) { struct ext4_group_info *grp = ext4_get_group_info(sb, group); struct buffer_head *bitmap_bh = NULL; @@ -4822,8 +4822,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, struct list_head list; struct ext4_buddy e4b; int err; - int busy = 0; - int free, free_total = 0; + int free = 0; mb_debug(sb, "discard preallocation for group %u\n", group); if (list_empty(&grp->bb_prealloc_list)) @@ -4846,19 +4845,14 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, goto out_dbg; } - if (needed == 0) - needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1; - INIT_LIST_HEAD(&list); -repeat: - free = 0; ext4_lock_group(sb, group); list_for_each_entry_safe(pa, tmp, &grp->bb_prealloc_list, pa_group_list) { spin_lock(&pa->pa_lock); if (atomic_read(&pa->pa_count)) { spin_unlock(&pa->pa_lock); - busy = 1; + *busy = 1; continue; } if (pa->pa_deleted) { @@ -4898,22 +4892,13 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); } - free_total += free; - - /* if we still need more blocks and some PAs were used, try again */ - if (free_total < needed && busy) { - ext4_unlock_group(sb, group); - cond_resched(); - busy = 0; - goto repeat; - } ext4_unlock_group(sb, group); ext4_mb_unload_buddy(&e4b); put_bh(bitmap_bh); out_dbg: mb_debug(sb, "discarded (%d) blocks preallocated for group %u bb_free (%d)\n", - free_total, group, grp->bb_free); - return free_total; + free, group, grp->bb_free); + return free; } /* @@ -5455,13 +5440,24 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) { ext4_group_t i, ngroups = ext4_get_groups_count(sb); int ret; - int freed = 0; + int freed = 0, busy = 0; + int retry = 0; trace_ext4_mb_discard_preallocations(sb, needed); + + if (needed == 0) + needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1; + repeat: for (i = 0; i < ngroups && needed > 0; i++) { - ret = ext4_mb_discard_group_preallocations(sb, i, needed); + ret = ext4_mb_discard_group_preallocations(sb, i, &busy); freed += ret; needed -= ret; + cond_resched(); + } + + if (needed > 0 && busy && ++retry < 3) { + busy = 0; + goto repeat; } return freed; From c27c29c6af4f3f4ce925a2111c256733c5a5b430 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Wed, 1 Dec 2021 08:34:21 -0800 Subject: [PATCH 30/48] ext4: initialize err_blk before calling __ext4_get_inode_loc It is not guaranteed that __ext4_get_inode_loc will definitely set err_blk pointer when it returns EIO. To avoid using uninitialized variables, let's first set err_blk to 0. Reported-by: Dan Carpenter Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20211201163421.2631661-1-harshads@google.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 08a90e25b78bc..bca9951634d94 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4524,7 +4524,7 @@ static int __ext4_get_inode_loc(struct super_block *sb, unsigned long ino, static int __ext4_get_inode_loc_noinmem(struct inode *inode, struct ext4_iloc *iloc) { - ext4_fsblk_t err_blk; + ext4_fsblk_t err_blk = 0; int ret; ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, NULL, iloc, @@ -4539,7 +4539,7 @@ static int __ext4_get_inode_loc_noinmem(struct inode *inode, int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) { - ext4_fsblk_t err_blk; + ext4_fsblk_t err_blk = 0; int ret; ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, inode, iloc, From 298b5c521746d69c07beb2757292fb5ccc1b0f85 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Fri, 24 Dec 2021 18:03:41 +0800 Subject: [PATCH 31/48] ext4: fix null-ptr-deref in '__ext4_journal_ensure_credits' We got issue as follows when run syzkaller test: [ 1901.130043] EXT4-fs error (device vda): ext4_remount:5624: comm syz-executor.5: Abort forced by user [ 1901.130901] Aborting journal on device vda-8. [ 1901.131437] EXT4-fs error (device vda): ext4_journal_check_start:61: comm syz-executor.16: Detected aborted journal [ 1901.131566] EXT4-fs error (device vda): ext4_journal_check_start:61: comm syz-executor.11: Detected aborted journal [ 1901.132586] EXT4-fs error (device vda): ext4_journal_check_start:61: comm syz-executor.18: Detected aborted journal [ 1901.132751] EXT4-fs error (device vda): ext4_journal_check_start:61: comm syz-executor.9: Detected aborted journal [ 1901.136149] EXT4-fs error (device vda) in ext4_reserve_inode_write:6035: Journal has aborted [ 1901.136837] EXT4-fs error (device vda): ext4_journal_check_start:61: comm syz-fuzzer: Detected aborted journal [ 1901.136915] ================================================================== [ 1901.138175] BUG: KASAN: null-ptr-deref in __ext4_journal_ensure_credits+0x74/0x140 [ext4] [ 1901.138343] EXT4-fs error (device vda): ext4_journal_check_start:61: comm syz-executor.13: Detected aborted journal [ 1901.138398] EXT4-fs error (device vda): ext4_journal_check_start:61: comm syz-executor.1: Detected aborted journal [ 1901.138808] Read of size 8 at addr 0000000000000000 by task syz-executor.17/968 [ 1901.138817] [ 1901.138852] EXT4-fs error (device vda): ext4_journal_check_start:61: comm syz-executor.30: Detected aborted journal [ 1901.144779] CPU: 1 PID: 968 Comm: syz-executor.17 Not tainted 4.19.90-vhulk2111.1.0.h893.eulerosv2r10.aarch64+ #1 [ 1901.146479] Hardware name: linux,dummy-virt (DT) [ 1901.147317] Call trace: [ 1901.147552] dump_backtrace+0x0/0x2d8 [ 1901.147898] show_stack+0x28/0x38 [ 1901.148215] dump_stack+0xec/0x15c [ 1901.148746] kasan_report+0x108/0x338 [ 1901.149207] __asan_load8+0x58/0xb0 [ 1901.149753] __ext4_journal_ensure_credits+0x74/0x140 [ext4] [ 1901.150579] ext4_xattr_delete_inode+0xe4/0x700 [ext4] [ 1901.151316] ext4_evict_inode+0x524/0xba8 [ext4] [ 1901.151985] evict+0x1a4/0x378 [ 1901.152353] iput+0x310/0x428 [ 1901.152733] do_unlinkat+0x260/0x428 [ 1901.153056] __arm64_sys_unlinkat+0x6c/0xc0 [ 1901.153455] el0_svc_common+0xc8/0x320 [ 1901.153799] el0_svc_handler+0xf8/0x160 [ 1901.154265] el0_svc+0x10/0x218 [ 1901.154682] ================================================================== This issue may happens like this: Process1 Process2 ext4_evict_inode ext4_journal_start ext4_truncate ext4_ind_truncate ext4_free_branches ext4_ind_truncate_ensure_credits ext4_journal_ensure_credits_fn ext4_journal_restart handle->h_transaction = NULL; mount -o remount,abort /mnt -> trigger JBD abort start_this_handle -> will return failed ext4_xattr_delete_inode ext4_journal_ensure_credits ext4_journal_ensure_credits_fn __ext4_journal_ensure_credits jbd2_handle_buffer_credits journal = handle->h_transaction->t_journal; ->null-ptr-deref Now, indirect truncate process didn't handle error. To solve this issue maybe simply add check handle is abort in '__ext4_journal_ensure_credits' is enough, and i also think this is necessary. Cc: stable@kernel.org Signed-off-by: Ye Bin Link: https://lore.kernel.org/r/20211224100341.3299128-1-yebin10@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4_jbd2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 6def7339056db..3477a16d08aee 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -162,6 +162,8 @@ int __ext4_journal_ensure_credits(handle_t *handle, int check_cred, { if (!ext4_handle_valid(handle)) return 0; + if (is_handle_aborted(handle)) + return -EROFS; if (jbd2_handle_buffer_credits(handle) >= check_cred && handle->h_revoke_credits >= revoke_cred) return 0; From 5c48a7df91499e371ef725895b2e2d21a126e227 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Sat, 25 Dec 2021 17:09:37 +0800 Subject: [PATCH 32/48] ext4: fix an use-after-free issue about data=journal writeback mode Our syzkaller report an use-after-free issue that accessing the freed buffer_head on the writeback page in __ext4_journalled_writepage(). The problem is that if there was a truncate racing with the data=journalled writeback procedure, the writeback length could become zero and bget_one() refuse to get buffer_head's refcount, then the truncate procedure release buffer once we drop page lock, finally, the last ext4_walk_page_buffers() trigger the use-after-free problem. sync truncate ext4_sync_file() file_write_and_wait_range() ext4_setattr(0) inode->i_size = 0 ext4_writepage() len = 0 __ext4_journalled_writepage() page_bufs = page_buffers(page) ext4_walk_page_buffers(bget_one) <- does not get refcount do_invalidatepage() free_buffer_head() ext4_walk_page_buffers(page_bufs) <- trigger use-after-free After commit bdf96838aea6 ("ext4: fix race between truncate and __ext4_journalled_writepage()"), we have already handled the racing case, so the bget_one() and bput_one() are not needed. So this patch simply remove these hunk, and recheck the i_size to make it safe. Fixes: bdf96838aea6 ("ext4: fix race between truncate and __ext4_journalled_writepage()") Signed-off-by: Zhang Yi Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211225090937.712867-1-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 37 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index bca9951634d94..68070f34f0cff 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1845,30 +1845,16 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, return 0; } -static int bget_one(handle_t *handle, struct inode *inode, - struct buffer_head *bh) -{ - get_bh(bh); - return 0; -} - -static int bput_one(handle_t *handle, struct inode *inode, - struct buffer_head *bh) -{ - put_bh(bh); - return 0; -} - static int __ext4_journalled_writepage(struct page *page, unsigned int len) { struct address_space *mapping = page->mapping; struct inode *inode = mapping->host; - struct buffer_head *page_bufs = NULL; handle_t *handle = NULL; int ret = 0, err = 0; int inline_data = ext4_has_inline_data(inode); struct buffer_head *inode_bh = NULL; + loff_t size; ClearPageChecked(page); @@ -1878,14 +1864,6 @@ static int __ext4_journalled_writepage(struct page *page, inode_bh = ext4_journalled_write_inline_data(inode, len, page); if (inode_bh == NULL) goto out; - } else { - page_bufs = page_buffers(page); - if (!page_bufs) { - BUG(); - goto out; - } - ext4_walk_page_buffers(handle, inode, page_bufs, 0, len, - NULL, bget_one); } /* * We need to release the page lock before we start the @@ -1906,7 +1884,8 @@ static int __ext4_journalled_writepage(struct page *page, lock_page(page); put_page(page); - if (page->mapping != mapping) { + size = i_size_read(inode); + if (page->mapping != mapping || page_offset(page) > size) { /* The page got truncated from under us */ ext4_journal_stop(handle); ret = 0; @@ -1916,6 +1895,13 @@ static int __ext4_journalled_writepage(struct page *page, if (inline_data) { ret = ext4_mark_inode_dirty(handle, inode); } else { + struct buffer_head *page_bufs = page_buffers(page); + + if (page->index == size >> PAGE_SHIFT) + len = size & ~PAGE_MASK; + else + len = PAGE_SIZE; + ret = ext4_walk_page_buffers(handle, inode, page_bufs, 0, len, NULL, do_journal_get_write_access); @@ -1936,9 +1922,6 @@ static int __ext4_journalled_writepage(struct page *page, out: unlock_page(page); out_no_pagelock: - if (!inline_data && page_bufs) - ext4_walk_page_buffers(NULL, inode, page_bufs, 0, len, - NULL, bput_one); brelse(inode_bh); return ret; } From 173b6e383d2a204c9921ffc1eca3b87aa2106c33 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 12 Nov 2021 16:22:02 +0100 Subject: [PATCH 33/48] ext4: avoid trim error on fs with small groups A user reported FITRIM ioctl failing for him on ext4 on some devices without apparent reason. After some debugging we've found out that these devices (being LVM volumes) report rather large discard granularity of 42MB and the filesystem had 1k blocksize and thus group size of 8MB. Because ext4 FITRIM implementation puts discard granularity into minlen, ext4_trim_fs() declared the trim request as invalid. However just silently doing nothing seems to be a more appropriate reaction to such combination of parameters since user did not specify anything wrong. CC: Lukas Czerner Fixes: 5c2ed62fd447 ("ext4: Adjust minlen with discard_granularity in the FITRIM ioctl") Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20211112152202.26614-1-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/ioctl.c | 2 -- fs/ext4/mballoc.c | 8 ++++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 1366afb59fba2..798d9d8287956 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -1114,8 +1114,6 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) sizeof(range))) return -EFAULT; - range.minlen = max((unsigned int)range.minlen, - q->limits.discard_granularity); ret = ext4_trim_fs(sb, &range); if (ret < 0) return ret; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 3dd9b9e2f9675..ea764137462ef 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -6400,6 +6400,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, */ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) { + struct request_queue *q = bdev_get_queue(sb->s_bdev); struct ext4_group_info *grp; ext4_group_t group, first_group, last_group; ext4_grpblk_t cnt = 0, first_cluster, last_cluster; @@ -6418,6 +6419,13 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) start >= max_blks || range->len < sb->s_blocksize) return -EINVAL; + /* No point to try to trim less than discard granularity */ + if (range->minlen < q->limits.discard_granularity) { + minlen = EXT4_NUM_B2C(EXT4_SB(sb), + q->limits.discard_granularity >> sb->s_blocksize_bits); + if (minlen > EXT4_CLUSTERS_PER_GROUP(sb)) + goto out; + } if (end >= max_blks) end = max_blks - 1; if (end <= first_data_blk) From 13b215a9e657808414a2159b0dec90f1c31ebe05 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Tue, 4 Jan 2022 15:35:17 +0100 Subject: [PATCH 34/48] ext4: don't use kfree() on rcu protected pointer sbi->s_qf_names During ext4 mount api rework the commit e6e268cb6822 ("ext4: move quota configuration out of handle_mount_opt()") introduced a bug where we would kfree(sbi->s_qf_names[i]) before assigning the new quota name in ext4_apply_quota_options(). This is wrong because we're using kfree() on rcu prointer that could be simultaneously accessed from ext4_show_quota_options() during remount. Fix it by using rcu_replace_pointer() to replace the old qname with the new one and then kfree_rcu() the old quota name. Also use get_qf_name() instead of sbi->s_qf_names in strcmp() to silence the sparse warning. Fixes: e6e268cb6822 ("ext4: move quota configuration out of handle_mount_opt()") Reported-by: kernel test robot Signed-off-by: Lukas Czerner Link: https://lore.kernel.org/r/20220104143518.134465-1-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 1b55f234e0064..72e4dfc9acaf9 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2636,8 +2636,10 @@ static void ext4_apply_quota_options(struct fs_context *fc, qname = ctx->s_qf_names[i]; /* May be NULL */ ctx->s_qf_names[i] = NULL; - kfree(sbi->s_qf_names[i]); - rcu_assign_pointer(sbi->s_qf_names[i], qname); + qname = rcu_replace_pointer(sbi->s_qf_names[i], qname, + lockdep_is_held(&sb->s_umount)); + if (qname) + kfree_rcu(qname); set_opt(sb, QUOTA); } } @@ -2691,7 +2693,7 @@ static int ext4_check_quota_consistency(struct fs_context *fc, goto err_jquota_change; if (sbi->s_qf_names[i] && ctx->s_qf_names[i] && - strcmp(sbi->s_qf_names[i], + strcmp(get_qf_name(sb, sbi, i), ctx->s_qf_names[i]) != 0) goto err_jquota_specified; } From 4c1bd5a90c4e716e5bde33f01a40bb66dc4a9903 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Tue, 4 Jan 2022 15:35:18 +0100 Subject: [PATCH 35/48] ext4: only set EXT4_MOUNT_QUOTA when journalled quota file is specified Only set EXT4_MOUNT_QUOTA when journalled quota file is specified, otherwise simply disabling specific quota type (usrjquota=) will also set the EXT4_MOUNT_QUOTA super block option. Signed-off-by: Lukas Czerner Fixes: e6e268cb6822 ("ext4: move quota configuration out of handle_mount_opt()") Link: https://lore.kernel.org/r/20220104143518.134465-2-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 72e4dfc9acaf9..435f24787030a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2635,12 +2635,13 @@ static void ext4_apply_quota_options(struct fs_context *fc, continue; qname = ctx->s_qf_names[i]; /* May be NULL */ + if (qname) + set_opt(sb, QUOTA); ctx->s_qf_names[i] = NULL; qname = rcu_replace_pointer(sbi->s_qf_names[i], qname, lockdep_is_held(&sb->s_umount)); if (qname) kfree_rcu(qname); - set_opt(sb, QUOTA); } } From bbc605cdb1e15aafaec899fedc385dc75dddac0e Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Mon, 13 Dec 2021 14:56:18 +0100 Subject: [PATCH 36/48] ext4: implement support for get/set fs label Implement support for FS_IOC_GETFSLABEL and FS_IOC_SETFSLABEL ioctls for online reading and setting of file system label. ext4_ioctl_getlabel() is simple, just get the label from the primary superblock. This might not be the first sb on the file system if 'sb=' mount option is used. In ext4_ioctl_setlabel() we update what ext4 currently views as a primary superblock and then proceed to update backup superblocks. There are two caveats: - the primary superblock might not be the first superblock and so it might not be the one used by userspace tools if read directly off the disk. - because the primary superblock might not be the first superblock we potentialy have to update it as part of backup superblock update. However the first sb location is a bit more complicated than the rest so we have to account for that. The superblock modification is created generic enough so the infrastructure can be used for other potential superblock modification operations, such as chaning UUID. Tested with generic/492 with various configurations. I also checked the behavior with 'sb=' mount options, including very large file systems with and without sparse_super/sparse_super2. Signed-off-by: Lukas Czerner Link: https://lore.kernel.org/r/20211213135618.43303-1-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 9 +- fs/ext4/ioctl.c | 309 ++++++++++++++++++++++++++++++++++++ fs/ext4/resize.c | 19 ++- fs/ext4/super.c | 4 +- include/trace/events/ext4.h | 23 +++ 5 files changed, 357 insertions(+), 7 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 714201fa9e6f1..5c8de74f5b406 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1298,6 +1298,8 @@ extern void ext4_set_bits(void *bm, int cur, int len); /* Metadata checksum algorithm codes */ #define EXT4_CRC32C_CHKSUM 1 +#define EXT4_LABEL_MAX 16 + /* * Structure of the super block */ @@ -1347,7 +1349,7 @@ struct ext4_super_block { /*60*/ __le32 s_feature_incompat; /* incompatible feature set */ __le32 s_feature_ro_compat; /* readonly-compatible feature set */ /*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */ -/*78*/ char s_volume_name[16]; /* volume name */ +/*78*/ char s_volume_name[EXT4_LABEL_MAX]; /* volume name */ /*88*/ char s_last_mounted[64] __nonstring; /* directory where last mounted */ /*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */ /* @@ -3094,6 +3096,9 @@ extern int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ext4_fsblk_t n_blocks_count); extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count); +extern unsigned int ext4_list_backups(struct super_block *sb, + unsigned int *three, unsigned int *five, + unsigned int *seven); /* super.c */ extern struct buffer_head *ext4_sb_bread(struct super_block *sb, @@ -3108,6 +3113,8 @@ extern int ext4_read_bh_lock(struct buffer_head *bh, int op_flags, bool wait); extern void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block); extern int ext4_seq_options_show(struct seq_file *seq, void *offset); extern int ext4_calculate_overhead(struct super_block *sb); +extern __le32 ext4_superblock_csum(struct super_block *sb, + struct ext4_super_block *es); extern void ext4_superblock_csum_set(struct super_block *sb); extern int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 798d9d8287956..bbbedf27b71c4 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -27,6 +27,248 @@ #include "fsmap.h" #include +typedef void ext4_update_sb_callback(struct ext4_super_block *es, + const void *arg); + +/* + * Superblock modification callback function for changing file system + * label + */ +static void ext4_sb_setlabel(struct ext4_super_block *es, const void *arg) +{ + /* Sanity check, this should never happen */ + BUILD_BUG_ON(sizeof(es->s_volume_name) < EXT4_LABEL_MAX); + + memcpy(es->s_volume_name, (char *)arg, EXT4_LABEL_MAX); +} + +static +int ext4_update_primary_sb(struct super_block *sb, handle_t *handle, + ext4_update_sb_callback func, + const void *arg) +{ + int err = 0; + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct buffer_head *bh = sbi->s_sbh; + struct ext4_super_block *es = sbi->s_es; + + trace_ext4_update_sb(sb, bh->b_blocknr, 1); + + BUFFER_TRACE(bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, sb, + bh, + EXT4_JTR_NONE); + if (err) + goto out_err; + + lock_buffer(bh); + func(es, arg); + ext4_superblock_csum_set(sb); + unlock_buffer(bh); + + if (buffer_write_io_error(bh) || !buffer_uptodate(bh)) { + ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to " + "superblock detected"); + clear_buffer_write_io_error(bh); + set_buffer_uptodate(bh); + } + + err = ext4_handle_dirty_metadata(handle, NULL, bh); + if (err) + goto out_err; + err = sync_dirty_buffer(bh); +out_err: + ext4_std_error(sb, err); + return err; +} + +/* + * Update one backup superblock in the group 'grp' using the callback + * function 'func' and argument 'arg'. If the handle is NULL the + * modification is not journalled. + * + * Returns: 0 when no modification was done (no superblock in the group) + * 1 when the modification was successful + * <0 on error + */ +static int ext4_update_backup_sb(struct super_block *sb, + handle_t *handle, ext4_group_t grp, + ext4_update_sb_callback func, const void *arg) +{ + int err = 0; + ext4_fsblk_t sb_block; + struct buffer_head *bh; + unsigned long offset = 0; + struct ext4_super_block *es; + + if (!ext4_bg_has_super(sb, grp)) + return 0; + + /* + * For the group 0 there is always 1k padding, so we have + * either adjust offset, or sb_block depending on blocksize + */ + if (grp == 0) { + sb_block = 1 * EXT4_MIN_BLOCK_SIZE; + offset = do_div(sb_block, sb->s_blocksize); + } else { + sb_block = ext4_group_first_block_no(sb, grp); + offset = 0; + } + + trace_ext4_update_sb(sb, sb_block, handle ? 1 : 0); + + bh = ext4_sb_bread(sb, sb_block, 0); + if (IS_ERR(bh)) + return PTR_ERR(bh); + + if (handle) { + BUFFER_TRACE(bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, sb, + bh, + EXT4_JTR_NONE); + if (err) + goto out_bh; + } + + es = (struct ext4_super_block *) (bh->b_data + offset); + lock_buffer(bh); + if (ext4_has_metadata_csum(sb) && + es->s_checksum != ext4_superblock_csum(sb, es)) { + ext4_msg(sb, KERN_ERR, "Invalid checksum for backup " + "superblock %llu\n", sb_block); + unlock_buffer(bh); + err = -EFSBADCRC; + goto out_bh; + } + func(es, arg); + if (ext4_has_metadata_csum(sb)) + es->s_checksum = ext4_superblock_csum(sb, es); + set_buffer_uptodate(bh); + unlock_buffer(bh); + + if (err) + goto out_bh; + + if (handle) { + err = ext4_handle_dirty_metadata(handle, NULL, bh); + if (err) + goto out_bh; + } else { + BUFFER_TRACE(bh, "marking dirty"); + mark_buffer_dirty(bh); + } + err = sync_dirty_buffer(bh); + +out_bh: + brelse(bh); + ext4_std_error(sb, err); + return (err) ? err : 1; +} + +/* + * Update primary and backup superblocks using the provided function + * func and argument arg. + * + * Only the primary superblock and at most two backup superblock + * modifications are journalled; the rest is modified without journal. + * This is safe because e2fsck will re-write them if there is a problem, + * and we're very unlikely to ever need more than two backups. + */ +static +int ext4_update_superblocks_fn(struct super_block *sb, + ext4_update_sb_callback func, + const void *arg) +{ + handle_t *handle; + ext4_group_t ngroups; + unsigned int three = 1; + unsigned int five = 5; + unsigned int seven = 7; + int err = 0, ret, i; + ext4_group_t grp, primary_grp; + struct ext4_sb_info *sbi = EXT4_SB(sb); + + /* + * We can't update superblocks while the online resize is running + */ + if (test_and_set_bit_lock(EXT4_FLAGS_RESIZING, + &sbi->s_ext4_flags)) { + ext4_msg(sb, KERN_ERR, "Can't modify superblock while" + "performing online resize"); + return -EBUSY; + } + + /* + * We're only going to update primary superblock and two + * backup superblocks in this transaction. + */ + handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 3); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + goto out; + } + + /* Update primary superblock */ + err = ext4_update_primary_sb(sb, handle, func, arg); + if (err) { + ext4_msg(sb, KERN_ERR, "Failed to update primary " + "superblock"); + goto out_journal; + } + + primary_grp = ext4_get_group_number(sb, sbi->s_sbh->b_blocknr); + ngroups = ext4_get_groups_count(sb); + + /* + * Update backup superblocks. We have to start from group 0 + * because it might not be where the primary superblock is + * if the fs is mounted with -o sb= + */ + i = 0; + grp = 0; + while (grp < ngroups) { + /* Skip primary superblock */ + if (grp == primary_grp) + goto next_grp; + + ret = ext4_update_backup_sb(sb, handle, grp, func, arg); + if (ret < 0) { + /* Ignore bad checksum; try to update next sb */ + if (ret == -EFSBADCRC) + goto next_grp; + err = ret; + goto out_journal; + } + + i += ret; + if (handle && i > 1) { + /* + * We're only journalling primary superblock and + * two backup superblocks; the rest is not + * journalled. + */ + err = ext4_journal_stop(handle); + if (err) + goto out; + handle = NULL; + } +next_grp: + grp = ext4_list_backups(sb, &three, &five, &seven); + } + +out_journal: + if (handle) { + ret = ext4_journal_stop(handle); + if (ret && !err) + err = ret; + } +out: + clear_bit_unlock(EXT4_FLAGS_RESIZING, &sbi->s_ext4_flags); + smp_mb__after_atomic(); + return err ? err : 0; +} + /** * Swap memory between @a and @b for @len bytes. * @@ -847,6 +1089,64 @@ static int ext4_ioctl_checkpoint(struct file *filp, unsigned long arg) return err; } +static int ext4_ioctl_setlabel(struct file *filp, const char __user *user_label) +{ + size_t len; + int ret = 0; + char new_label[EXT4_LABEL_MAX + 1]; + struct super_block *sb = file_inode(filp)->i_sb; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + /* + * Copy the maximum length allowed for ext4 label with one more to + * find the required terminating null byte in order to test the + * label length. The on disk label doesn't need to be null terminated. + */ + if (copy_from_user(new_label, user_label, EXT4_LABEL_MAX + 1)) + return -EFAULT; + + len = strnlen(new_label, EXT4_LABEL_MAX + 1); + if (len > EXT4_LABEL_MAX) + return -EINVAL; + + /* + * Clear the buffer after the new label + */ + memset(new_label + len, 0, EXT4_LABEL_MAX - len); + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + ret = ext4_update_superblocks_fn(sb, ext4_sb_setlabel, new_label); + + mnt_drop_write_file(filp); + return ret; +} + +static int ext4_ioctl_getlabel(struct ext4_sb_info *sbi, char __user *user_label) +{ + char label[EXT4_LABEL_MAX + 1]; + + /* + * EXT4_LABEL_MAX must always be smaller than FSLABEL_MAX because + * FSLABEL_MAX must include terminating null byte, while s_volume_name + * does not have to. + */ + BUILD_BUG_ON(EXT4_LABEL_MAX >= FSLABEL_MAX); + + memset(label, 0, sizeof(label)); + lock_buffer(sbi->s_sbh); + strncpy(label, sbi->s_es->s_volume_name, EXT4_LABEL_MAX); + unlock_buffer(sbi->s_sbh); + + if (copy_to_user(user_label, label, sizeof(label))) + return -EFAULT; + return 0; +} + static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -1261,6 +1561,13 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case EXT4_IOC_CHECKPOINT: return ext4_ioctl_checkpoint(filp, arg); + case FS_IOC_GETFSLABEL: + return ext4_ioctl_getlabel(EXT4_SB(sb), (void __user *)arg); + + case FS_IOC_SETFSLABEL: + return ext4_ioctl_setlabel(filp, + (const void __user *)arg); + default: return -ENOTTY; } @@ -1336,6 +1643,8 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case EXT4_IOC_GETSTATE: case EXT4_IOC_GET_ES_CACHE: case EXT4_IOC_CHECKPOINT: + case FS_IOC_GETFSLABEL: + case FS_IOC_SETFSLABEL: break; default: return -ENOIOCTLCMD; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index b63cb88ccdaed..ee8f02f406cb6 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -717,12 +717,23 @@ static int setup_new_flex_group_blocks(struct super_block *sb, * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ... * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ... */ -static unsigned ext4_list_backups(struct super_block *sb, unsigned *three, - unsigned *five, unsigned *seven) +unsigned int ext4_list_backups(struct super_block *sb, unsigned int *three, + unsigned int *five, unsigned int *seven) { - unsigned *min = three; + struct ext4_super_block *es = EXT4_SB(sb)->s_es; + unsigned int *min = three; int mult = 3; - unsigned ret; + unsigned int ret; + + if (ext4_has_feature_sparse_super2(sb)) { + do { + if (*min > 2) + return UINT_MAX; + ret = le32_to_cpu(es->s_backup_bgs[*min - 1]); + *min += 1; + } while (!ret); + return ret; + } if (!ext4_has_feature_sparse_super(sb)) { ret = *min; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 435f24787030a..806ce08933226 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -279,8 +279,8 @@ static int ext4_verify_csum_type(struct super_block *sb, return es->s_checksum_type == EXT4_CRC32C_CHKSUM; } -static __le32 ext4_superblock_csum(struct super_block *sb, - struct ext4_super_block *es) +__le32 ext4_superblock_csum(struct super_block *sb, + struct ext4_super_block *es) { struct ext4_sb_info *sbi = EXT4_SB(sb); int offset = offsetof(struct ext4_super_block, s_checksum); diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 0ea36b2b0662a..19e957b7f9410 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -2837,6 +2837,29 @@ TRACE_EVENT(ext4_fc_track_range, __entry->end) ); +TRACE_EVENT(ext4_update_sb, + TP_PROTO(struct super_block *sb, ext4_fsblk_t fsblk, + unsigned int flags), + + TP_ARGS(sb, fsblk, flags), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ext4_fsblk_t, fsblk) + __field(unsigned int, flags) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->fsblk = fsblk; + __entry->flags = flags; + ), + + TP_printk("dev %d,%d fsblk %llu flags %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->fsblk, __entry->flags) +); + #endif /* _TRACE_EXT4_H */ /* This part must be outside protection */ From 2327fb2e23416cfb2795ccca2f77d4d65925be99 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 3 Nov 2021 15:51:21 +0100 Subject: [PATCH 37/48] ext4: change s_last_trim_minblks type to unsigned long There is no good reason for the s_last_trim_minblks to be atomic. There is no data integrity needed and there is no real danger in setting and reading it in a racy manner. Change it to be unsigned long, the same type as s_clusters_per_group which is the maximum that's allowed. Signed-off-by: Lukas Czerner Suggested-by: Andreas Dilger Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20211103145122.17338-1-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 2 +- fs/ext4/mballoc.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 5c8de74f5b406..88c240a9cc428 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1663,7 +1663,7 @@ struct ext4_sb_info { struct task_struct *s_mmp_tsk; /* record the last minlen when FITRIM is called. */ - atomic_t s_last_trim_minblks; + unsigned long s_last_trim_minblks; /* Reference to checksum algorithm driver via cryptoapi */ struct crypto_shash *s_chksum_driver; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index ea764137462ef..cf2fd9fc7d986 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -6369,7 +6369,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ext4_lock_group(sb, group); if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) || - minblocks < atomic_read(&EXT4_SB(sb)->s_last_trim_minblks)) { + minblocks < EXT4_SB(sb)->s_last_trim_minblks) { ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks); if (ret >= 0) EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info); @@ -6478,7 +6478,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) } if (!ret) - atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); + EXT4_SB(sb)->s_last_trim_minblks = minlen; out: range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits; From 4a69aecbfb30a3fc85bf8028386c047d5607a97a Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 3 Nov 2021 15:51:22 +0100 Subject: [PATCH 38/48] ext4: allow to change s_last_trim_minblks via sysfs Ext4 has an optimization mechanism for batched disacrd (FITRIM) that should help speed up subsequent calls of FITRIM ioctl by skipping the groups that were previously trimmed. However because the FITRIM allows to set the minimum size of an extent to trim, ext4 stores the last minimum extent size and only avoids trimming the group if it was previously trimmed with minimum extent size equal to, or smaller than the current call. There is currently no way to bypass the optimization without umount/mount cycle. This becomes a problem when the file system is live migrated to a different storage, because the optimization will prevent possibly useful discard calls to the storage. Fix it by exporting the s_last_trim_minblks via sysfs interface which will allow us to set the minimum size to the number of blocks larger than subsequent FITRIM call, effectively bypassing the optimization. By setting the s_last_trim_minblks to ULONG_MAX the optimization will be effectively cleared regardless of the previous state, or file system configuration. For example: getconf ULONG_MAX > /sys/fs/ext4/dm-1/last_trim_minblks Signed-off-by: Lukas Czerner Reported-by: Laurent GUERBY Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20211103145122.17338-2-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- fs/ext4/sysfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 2a4ae3d105716..f61e65ae27d8d 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -245,6 +245,7 @@ EXT4_ATTR(last_error_time, 0444, last_error_time); EXT4_ATTR(journal_task, 0444, journal_task); EXT4_RW_ATTR_SBI_UI(mb_prefetch, s_mb_prefetch); EXT4_RW_ATTR_SBI_UI(mb_prefetch_limit, s_mb_prefetch_limit); +EXT4_RW_ATTR_SBI_UL(last_trim_minblks, s_last_trim_minblks); static unsigned int old_bump_val = 128; EXT4_ATTR_PTR(max_writeback_mb_bump, 0444, pointer_ui, &old_bump_val); @@ -295,6 +296,7 @@ static struct attribute *ext4_attrs[] = { #endif ATTR_LIST(mb_prefetch), ATTR_LIST(mb_prefetch_limit), + ATTR_LIST(last_trim_minblks), NULL, }; ATTRIBUTE_GROUPS(ext4); From effc5b3b0d20ffcb692f107cd347289bfcd6890b Mon Sep 17 00:00:00 2001 From: Nghia Le Date: Tue, 26 Oct 2021 05:18:03 +0700 Subject: [PATCH 39/48] ext4: remove useless resetting io_end_size in mpage_process_page() The command "make clang-analyzer" detects dead stores in mpage_process_page() function. Do not reset io_end_size to 0 in the current paths, as the function exits on those paths without further using io_end_size. Signed-off-by: Nghia Le Link: https://lore.kernel.org/r/20211025221803.3326-1-nghialm78@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 68070f34f0cff..9dbeb772de60d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2241,7 +2241,6 @@ static int mpage_process_page(struct mpage_da_data *mpd, struct page *page, mpd->map.m_len = 0; mpd->map.m_flags = 0; io_end_vec->size += io_end_size; - io_end_size = 0; err = mpage_process_page_bufs(mpd, head, bh, lblk); if (err > 0) @@ -2266,7 +2265,6 @@ static int mpage_process_page(struct mpage_da_data *mpd, struct page *page, } while (lblk++, (bh = bh->b_this_page) != head); io_end_vec->size += io_end_size; - io_end_size = 0; *map_bh = false; out: *m_lblk = lblk; From a660be97eb00c4d87bf881e1226fbd9d812690b7 Mon Sep 17 00:00:00 2001 From: luo penghao Date: Thu, 4 Nov 2021 06:34:06 +0000 Subject: [PATCH 40/48] ext4: remove redundant statement The local variable assignment at the end of the function is meaningless. The clang_analyzer complains as follows: fs/ext4/fast_commit.c:779:2 warning: Value stored to 'dst' is never read Reported-by: Zeal Robot Signed-off-by: luo penghao Link: https://lore.kernel.org/r/20211104063406.2747-1-luo.penghao@zte.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index a6d647325742f..5ae8026a0c562 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -746,7 +746,6 @@ static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc, ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc); dst += sizeof(fcd); ext4_fc_memcpy(sb, dst, fc_dentry->fcd_name.name, dlen, crc); - dst += dlen; return true; } From fac888b2be9993d0c740013d26d69c8784acc293 Mon Sep 17 00:00:00 2001 From: luo penghao Date: Thu, 4 Nov 2021 06:40:07 +0000 Subject: [PATCH 41/48] ext4: remove unused assignments The eh assignment in these two places is meaningless, because the function will goto to merge, which will not use eh. The clang_analyzer complains as follows: fs/ext4/extents.c:1988:4 warning: fs/ext4/extents.c:2016:4 warning: Value stored to 'eh' is never read Reported-by: Zeal Robot Signed-off-by: luo penghao Link: https://lore.kernel.org/r/20211104064007.2919-1-luo.penghao@zte.com Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c3e76a5de6615..299d27d04b2b7 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2025,7 +2025,6 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, + ext4_ext_get_actual_len(newext)); if (unwritten) ext4_ext_mark_unwritten(ex); - eh = path[depth].p_hdr; nearex = ex; goto merge; } @@ -2054,7 +2053,6 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, + ext4_ext_get_actual_len(newext)); if (unwritten) ext4_ext_mark_unwritten(ex); - eh = path[depth].p_hdr; nearex = ex; goto merge; } From 037e7c525d988867811b3613549971a3253b2f7b Mon Sep 17 00:00:00 2001 From: Adam Borowski Date: Mon, 15 Nov 2021 18:20:20 +0100 Subject: [PATCH 42/48] ext4: drop an always true check EXT_FIRST_INDEX(ptr) is ptr+12, which can't possibly be null; gcc-12 warns about this. Signed-off-by: Adam Borowski Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20211115172020.57853-1-kilobyte@angband.pl Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 299d27d04b2b7..1077ce7e189fe 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1496,8 +1496,7 @@ static int ext4_ext_search_left(struct inode *inode, EXT4_ERROR_INODE(inode, "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!", ix != NULL ? le32_to_cpu(ix->ei_block) : 0, - EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ? - le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block) : 0, + le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block), depth); return -EFSCORRUPTED; } From a6dbc76c4d9cc961bfb31a495fb27ee06e037e35 Mon Sep 17 00:00:00 2001 From: luo penghao Date: Wed, 8 Dec 2021 07:51:57 +0000 Subject: [PATCH 43/48] ext4: remove redundant o_start statement The if will goto out of the loop, and until the end of the function execution, o_start will not be used again. The clang_analyzer complains as follows: fs/ext4/move_extent.c:635:5 warning: Value stored to 'o_start' is never read Reported-by: Zeal Robot Signed-off-by: luo penghao Link: https://lore.kernel.org/r/20211208075157.404535-1-luo.penghao@zte.com.cn Signed-off-by: Theodore Ts'o --- fs/ext4/move_extent.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 64a579734f934..95aa212f08632 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -632,7 +632,6 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, /* Check hole before the start pos */ if (cur_blk + cur_len - 1 < o_start) { if (next_blk == EXT_MAX_BLOCKS) { - o_start = o_end; ret = -ENODATA; goto out; } From ae6ec194b55273e52351ee70d694594dcabe405d Mon Sep 17 00:00:00 2001 From: luo penghao Date: Wed, 8 Dec 2021 07:53:07 +0000 Subject: [PATCH 44/48] ext4: remove unnecessary 'offset' assignment Although it is in the loop, offset is reassigned at the beginning of the while loop. And after the loop, the value will not be used The clang_analyzer complains as follows: fs/ext4/dir.c:306:3 warning: Value stored to 'offset' is never read Reported-by: Zeal Robot Signed-off-by: luo penghao Link: https://lore.kernel.org/r/20211208075307.404703-1-luo.penghao@zte.com.cn Signed-off-by: Theodore Ts'o --- fs/ext4/dir.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 74b172a4adda3..a6bb86f52b9aa 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -303,7 +303,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) goto done; brelse(bh); bh = NULL; - offset = 0; } done: err = 0; From e81c9302a6c3c008f5c30beb73b38adb0170ff2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Henriques?= Date: Tue, 14 Dec 2021 17:50:58 +0000 Subject: [PATCH 45/48] ext4: set csum seed in tmp inode while migrating to extents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When migrating to extents, the temporary inode will have it's own checksum seed. This means that, when swapping the inodes data, the inode checksums will be incorrect. This can be fixed by recalculating the extents checksums again. Or simply by copying the seed into the temporary inode. Link: https://bugzilla.kernel.org/show_bug.cgi?id=213357 Reported-by: Jeroen van Wolffelaar Signed-off-by: Luís Henriques Link: https://lore.kernel.org/r/20211214175058.19511-1-lhenriques@suse.de Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/migrate.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 7e0b4f81c6c06..36dfc88ce05bc 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -459,6 +459,17 @@ int ext4_ext_migrate(struct inode *inode) ext4_journal_stop(handle); goto out_unlock; } + /* + * Use the correct seed for checksum (i.e. the seed from 'inode'). This + * is so that the metadata blocks will have the correct checksum after + * the migration. + * + * Note however that, if a crash occurs during the migration process, + * the recovery process is broken because the tmp_inode checksums will + * be wrong and the orphans cleanup will fail. + */ + ei = EXT4_I(inode); + EXT4_I(tmp_inode)->i_csum_seed = ei->i_csum_seed; i_size_write(tmp_inode, i_size_read(inode)); /* * Set the i_nlink to zero so it will be deleted later @@ -502,7 +513,6 @@ int ext4_ext_migrate(struct inode *inode) goto out_tmp_inode; } - ei = EXT4_I(inode); i_data = ei->i_data; memset(&lb, 0, sizeof(lb)); From da9e480212582b336b97848c69fdd4ac8860065b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 15 Dec 2021 14:43:09 +0300 Subject: [PATCH 46/48] ext4: fix a copy and paste typo This was obviously supposed to be an ext4 struct, not xfs. GCC doesn't care either way so it doesn't affect the build or runtime. Fixes: cebe85d570cf ("ext4: switch to the new mount api") Signed-off-by: Dan Carpenter Reviewed-by: Lukas Czerner Link: https://lore.kernel.org/r/20211215114309.GB14552@kili Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 806ce08933226..9a936ecbaa3b1 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2081,7 +2081,7 @@ static void ext4_fc_free(struct fs_context *fc) int ext4_init_fs_context(struct fs_context *fc) { - struct xfs_fs_context *ctx; + struct ext4_fs_context *ctx; ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL); if (!ctx) From a2e3965df40af2f48ee6c97ed573adb91ced5dac Mon Sep 17 00:00:00 2001 From: xu xin Date: Tue, 28 Dec 2021 07:32:52 +0000 Subject: [PATCH 47/48] ext4: use BUG_ON instead of if condition followed by BUG BUG_ON would be better. This issue was detected with the help of Coccinelle. Reported-by: Zeal robot Reviewed-by: Lukas Czerner Signed-off-by: xu xin Link: https://lore.kernel.org/r/20211228073252.580296-1-xu.xin16@zte.com.cn Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 88c240a9cc428..715ee206dfe12 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2400,8 +2400,7 @@ ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize) static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) { - if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3)) - BUG(); + BUG_ON((len > blocksize) || (blocksize > (1 << 18)) || (len & 3)); #if (PAGE_SIZE >= 65536) if (len < 65536) return cpu_to_le16(len); From 6eeaf88fd586f05aaf1d48cb3a139d2a5c6eb055 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 5 Jan 2022 23:59:56 -0500 Subject: [PATCH 48/48] ext4: don't use the orphan list when migrating an inode We probably want to remove the indirect block to extents migration feature after a deprecation window, but until then, let's fix a potential data loss problem caused by the fact that we put the tmp_inode on the orphan list. In the unlikely case where we crash and do a journal recovery, the data blocks belonging to the inode being migrated are also represented in the tmp_inode on the orphan list --- and so its data blocks will get marked unallocated, and available for reuse. Instead, stop putting the tmp_inode on the oprhan list. So in the case where we crash while migrating the inode, we'll leak an inode, which is not a disaster. It will be easily fixed the next time we run fsck, and it's better than potentially having blocks getting claimed by two different files, and losing data as a result. Signed-off-by: Theodore Ts'o Reviewed-by: Lukas Czerner Cc: stable@kernel.org --- fs/ext4/migrate.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 36dfc88ce05bc..ff8916e1d38e9 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -437,12 +437,12 @@ int ext4_ext_migrate(struct inode *inode) percpu_down_write(&sbi->s_writepages_rwsem); /* - * Worst case we can touch the allocation bitmaps, a bgd - * block, and a block to link in the orphan list. We do need - * need to worry about credits for modifying the quota inode. + * Worst case we can touch the allocation bitmaps and a block + * group descriptor block. We do need need to worry about + * credits for modifying the quota inode. */ handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, - 4 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb)); + 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb)); if (IS_ERR(handle)) { retval = PTR_ERR(handle); @@ -463,10 +463,6 @@ int ext4_ext_migrate(struct inode *inode) * Use the correct seed for checksum (i.e. the seed from 'inode'). This * is so that the metadata blocks will have the correct checksum after * the migration. - * - * Note however that, if a crash occurs during the migration process, - * the recovery process is broken because the tmp_inode checksums will - * be wrong and the orphans cleanup will fail. */ ei = EXT4_I(inode); EXT4_I(tmp_inode)->i_csum_seed = ei->i_csum_seed; @@ -478,7 +474,6 @@ int ext4_ext_migrate(struct inode *inode) clear_nlink(tmp_inode); ext4_ext_tree_init(handle, tmp_inode); - ext4_orphan_add(handle, tmp_inode); ext4_journal_stop(handle); /* @@ -503,12 +498,6 @@ int ext4_ext_migrate(struct inode *inode) handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); if (IS_ERR(handle)) { - /* - * It is impossible to update on-disk structures without - * a handle, so just rollback in-core changes and live other - * work to orphan_list_cleanup() - */ - ext4_orphan_del(NULL, tmp_inode); retval = PTR_ERR(handle); goto out_tmp_inode; }