From 1e6e238c3002ea3611465ce5f32777ddd6a40126 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 28 Jul 2020 16:39:26 +0800 Subject: [PATCH 01/10] btrfs: inode: fix NULL pointer dereference if inode doesn't need compression [BUG] There is a bug report of NULL pointer dereference caused in compress_file_extent(): Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries Workqueue: btrfs-delalloc btrfs_delalloc_helper [btrfs] NIP [c008000006dd4d34] compress_file_range.constprop.41+0x75c/0x8a0 [btrfs] LR [c008000006dd4d1c] compress_file_range.constprop.41+0x744/0x8a0 [btrfs] Call Trace: [c000000c69093b00] [c008000006dd4d1c] compress_file_range.constprop.41+0x744/0x8a0 [btrfs] (unreliable) [c000000c69093bd0] [c008000006dd4ebc] async_cow_start+0x44/0xa0 [btrfs] [c000000c69093c10] [c008000006e14824] normal_work_helper+0xdc/0x598 [btrfs] [c000000c69093c80] [c0000000001608c0] process_one_work+0x2c0/0x5b0 [c000000c69093d10] [c000000000160c38] worker_thread+0x88/0x660 [c000000c69093db0] [c00000000016b55c] kthread+0x1ac/0x1c0 [c000000c69093e20] [c00000000000b660] ret_from_kernel_thread+0x5c/0x7c ---[ end trace f16954aa20d822f6 ]--- [CAUSE] For the following execution route of compress_file_range(), it's possible to hit NULL pointer dereference: compress_file_extent() |- pages = NULL; |- start = async_chunk->start = 0; |- end = async_chunk = 4095; |- nr_pages = 1; |- inode_need_compress() == false; <<< Possible, see later explanation | Now, we have nr_pages = 1, pages = NULL |- cont: |- ret = cow_file_range_inline(); |- if (ret <= 0) { |- for (i = 0; i < nr_pages; i++) { |- WARN_ON(pages[i]->mapping); <<< Crash To enter above call execution branch, we need the following race: Thread 1 (chattr) | Thread 2 (writeback) --------------------------+------------------------------ | btrfs_run_delalloc_range | |- inode_need_compress = true | |- cow_file_range_async() btrfs_ioctl_set_flag() | |- binode_flags |= | BTRFS_INODE_NOCOMPRESS | | compress_file_range() | |- inode_need_compress = false | |- nr_page = 1 while pages = NULL | | Then hit the crash [FIX] This patch will fix it by checking @pages before doing accessing it. This patch is only designed as a hot fix and easy to backport. More elegant fix may make btrfs only check inode_need_compress() once to avoid such race, but that would be another story. Reported-by: Luciano Chavez Fixes: 4d3a800ebb12 ("btrfs: merge nr_pages input and output parameter in compress_pages") CC: stable@vger.kernel.org # 4.14.x: cecc8d9038d16: btrfs: Move free_pages_out label in inline extent handling branch in compress_file_range CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/inode.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 611b3412fbfdc..9988d754e465b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -653,12 +653,18 @@ static noinline int compress_file_range(struct async_chunk *async_chunk) page_error_op | PAGE_END_WRITEBACK); - for (i = 0; i < nr_pages; i++) { - WARN_ON(pages[i]->mapping); - put_page(pages[i]); + /* + * Ensure we only free the compressed pages if we have + * them allocated, as we can still reach here with + * inode_need_compress() == false. + */ + if (pages) { + for (i = 0; i < nr_pages; i++) { + WARN_ON(pages[i]->mapping); + put_page(pages[i]); + } + kfree(pages); } - kfree(pages); - return 0; } } From bf53d4687b8f3f6b752f091eb85f62369a515dfd Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 27 Jul 2020 10:28:05 -0400 Subject: [PATCH 02/10] btrfs: only search for left_info if there is no right_info in try_merge_free_space In try_to_merge_free_space we attempt to find entries to the left and right of the entry we are adding to see if they can be merged. We search for an entry past our current info (saved into right_info), and then if right_info exists and it has a rb_prev() we save the rb_prev() into left_info. However there's a slight problem in the case that we have a right_info, but no entry previous to that entry. At that point we will search for an entry just before the info we're attempting to insert. This will simply find right_info again, and assign it to left_info, making them both the same pointer. Now if right_info _can_ be merged with the range we're inserting, we'll add it to the info and free right_info. However further down we'll access left_info, which was right_info, and thus get a use-after-free. Fix this by only searching for the left entry if we don't find a right entry at all. The CVE referenced had a specially crafted file system that could trigger this use-after-free. However with the tree checker improvements we no longer trigger the conditions for the UAF. But the original conditions still apply, hence this fix. Reference: CVE-2019-19448 Fixes: 963030817060 ("Btrfs: use hybrid extents+bitmap rb tree for free space") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/free-space-cache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 6d961e11639e3..ef0fd7afb0b1e 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2282,7 +2282,7 @@ static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info, bool update_stat) { - struct btrfs_free_space *left_info; + struct btrfs_free_space *left_info = NULL; struct btrfs_free_space *right_info; bool merged = false; u64 offset = info->offset; @@ -2298,7 +2298,7 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, if (right_info && rb_prev(&right_info->offset_index)) left_info = rb_entry(rb_prev(&right_info->offset_index), struct btrfs_free_space, offset_index); - else + else if (!right_info) left_info = tree_search_offset(ctl, offset - 1, 0, 0); /* See try_merge_free_space() comment. */ From 27942c9971cc405c60432eca9395e514a2ae9f5e Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 23 Jul 2020 19:08:55 +0200 Subject: [PATCH 03/10] btrfs: fix messages after changing compression level by remount Reported by Forza on IRC that remounting with compression options does not reflect the change in level, or at least it does not appear to do so according to the messages: mount -o compress=zstd:1 /dev/sda /mnt mount -o remount,compress=zstd:15 /mnt does not print the change to the level to syslog: [ 41.366060] BTRFS info (device vda): use zstd compression, level 1 [ 41.368254] BTRFS info (device vda): disk space caching is enabled [ 41.390429] BTRFS info (device vda): disk space caching is enabled What really happens is that the message is lost but the level is actualy changed. There's another weird output, if compression is reset to 'no': [ 45.413776] BTRFS info (device vda): use no compression, level 4 To fix that, save the previous compression level and print the message in that case too and use separate message for 'no' compression. CC: stable@vger.kernel.org # 4.19+ Signed-off-by: David Sterba --- fs/btrfs/super.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 5a9dc31d95c98..aa73422b06785 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -517,6 +517,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, char *compress_type; bool compress_force = false; enum btrfs_compression_type saved_compress_type; + int saved_compress_level; bool saved_compress_force; int no_compress = 0; @@ -598,6 +599,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, info->compress_type : BTRFS_COMPRESS_NONE; saved_compress_force = btrfs_test_opt(info, FORCE_COMPRESS); + saved_compress_level = info->compress_level; if (token == Opt_compress || token == Opt_compress_force || strncmp(args[0].from, "zlib", 4) == 0) { @@ -642,6 +644,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, no_compress = 0; } else if (strncmp(args[0].from, "no", 2) == 0) { compress_type = "no"; + info->compress_level = 0; + info->compress_type = 0; btrfs_clear_opt(info->mount_opt, COMPRESS); btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); compress_force = false; @@ -662,11 +666,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, */ btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); } - if ((btrfs_test_opt(info, COMPRESS) && - (info->compress_type != saved_compress_type || - compress_force != saved_compress_force)) || - (!btrfs_test_opt(info, COMPRESS) && - no_compress == 1)) { + if (no_compress == 1) { + btrfs_info(info, "use no compression"); + } else if ((info->compress_type != saved_compress_type) || + (compress_force != saved_compress_force) || + (info->compress_level != saved_compress_level)) { btrfs_info(info, "%s %s compression, level %d", (compress_force) ? "force" : "use", compress_type, info->compress_level); From 3ef3959b29c4a5bd65526ab310a1a18ae533172a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 22 Jul 2020 11:12:46 -0400 Subject: [PATCH 04/10] btrfs: don't show full path of bind mounts in subvol= Chris Murphy reported a problem where rpm ostree will bind mount a bunch of things for whatever voodoo it's doing. But when it does this /proc/mounts shows something like /dev/sda /mnt/test btrfs rw,relatime,subvolid=256,subvol=/foo 0 0 /dev/sda /mnt/test/baz btrfs rw,relatime,subvolid=256,subvol=/foo/bar 0 0 Despite subvolid=256 being subvol=/foo. This is because we're just spitting out the dentry of the mount point, which in the case of bind mounts is the source path for the mountpoint. Instead we should spit out the path to the actual subvol. Fix this by looking up the name for the subvolid we have mounted. With this fix the same test looks like this /dev/sda /mnt/test btrfs rw,relatime,subvolid=256,subvol=/foo 0 0 /dev/sda /mnt/test/baz btrfs rw,relatime,subvolid=256,subvol=/foo 0 0 Reported-by: Chris Murphy CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index aa73422b06785..9b4e9c4c46736 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1386,6 +1386,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) { struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb); const char *compress_type; + const char *subvol_name; if (btrfs_test_opt(info, DEGRADED)) seq_puts(seq, ",degraded"); @@ -1472,8 +1473,13 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) seq_puts(seq, ",ref_verify"); seq_printf(seq, ",subvolid=%llu", BTRFS_I(d_inode(dentry))->root->root_key.objectid); - seq_puts(seq, ",subvol="); - seq_dentry(seq, dentry, " \t\n\\"); + subvol_name = btrfs_get_subvol_name_from_objectid(info, + BTRFS_I(d_inode(dentry))->root->root_key.objectid); + if (!IS_ERR(subvol_name)) { + seq_puts(seq, ",subvol="); + seq_escape(seq, subvol_name, " \t\n\\"); + kfree(subvol_name); + } return 0; } From 4f26433e9b3eb7a55ed70d8f882ae9cd48ba448b Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 29 Jul 2020 10:17:50 +0100 Subject: [PATCH 05/10] btrfs: fix memory leaks after failure to lookup checksums during inode logging While logging an inode, at copy_items(), if we fail to lookup the checksums for an extent we release the destination path, free the ins_data array and then return immediately. However a previous iteration of the for loop may have added checksums to the ordered_sums list, in which case we leak the memory used by them. So fix this by making sure we iterate the ordered_sums list and free all its checksums before returning. Fixes: 3650860b90cc2a ("Btrfs: remove almost all of the BUG()'s from tree-log.c") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index ea8136dcf71f8..696dd861cc3c6 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4036,11 +4036,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, fs_info->csum_root, ds + cs, ds + cs + cl - 1, &ordered_sums, 0); - if (ret) { - btrfs_release_path(dst_path); - kfree(ins_data); - return ret; - } + if (ret) + break; } } } @@ -4053,7 +4050,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, * we have to do this after the loop above to avoid changing the * log tree while trying to change the log tree. */ - ret = 0; while (!list_empty(&ordered_sums)) { struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, struct btrfs_ordered_sum, From faa008899a4db21a2df99833cb4ff6fa67009a20 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 30 Jul 2020 11:18:09 -0400 Subject: [PATCH 06/10] btrfs: make sure SB_I_VERSION doesn't get unset by remount There's some inconsistency around SB_I_VERSION handling with mount and remount. Since we don't really want it to be off ever just work around this by making sure we don't get the flag cleared on remount. There's a tiny cpu cost of setting the bit, otherwise all changes to i_version also change some of the times (ctime/mtime) so the inode needs to be synced. We wouldn't save anything by disabling it. Reported-by: Eric Sandeen CC: stable@vger.kernel.org # 5.4+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba [ add perf impact analysis ] Signed-off-by: David Sterba --- fs/btrfs/super.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9b4e9c4c46736..e529ddb35b87f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1960,6 +1960,12 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) set_bit(BTRFS_FS_OPEN, &fs_info->flags); } out: + /* + * We need to set SB_I_VERSION here otherwise it'll get cleared by VFS, + * since the absence of the flag means it can be toggled off by remount. + */ + *flags |= SB_I_VERSION; + wake_up_process(fs_info->transaction_kthread); btrfs_remount_cleanup(fs_info, old_opts); clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); From c15c2ec07a26b251040943a1a9f90d3037f041e5 Mon Sep 17 00:00:00 2001 From: Boleyn Su Date: Thu, 6 Aug 2020 15:31:44 +0900 Subject: [PATCH 07/10] btrfs: check correct variable after allocation in btrfs_backref_iter_alloc The `if (!ret)` check will always be false and it may result in ret->path being dereferenced while it is a NULL pointer. Fixes: a37f232b7b65 ("btrfs: backref: introduce the skeleton of btrfs_backref_iter") CC: stable@vger.kernel.org # 5.8+ Reviewed-by: Nikolay Borisov Reviewed-by: Qu Wenruo Signed-off-by: Boleyn Su Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/backref.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index ea10f7bc99abf..ea1c28ccb44ff 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -2303,7 +2303,7 @@ struct btrfs_backref_iter *btrfs_backref_iter_alloc( return NULL; ret->path = btrfs_alloc_path(); - if (!ret) { + if (!ret->path) { kfree(ret); return NULL; } From 62ab2cc04ddc5dfab4e382ae167734fc111da5ed Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 3 Aug 2020 14:20:11 +0800 Subject: [PATCH 08/10] btrfs: sysfs: fix NULL pointer dereference at btrfs_sysfs_del_qgroups() [BUG] Unmounting a btrfs filesystem with quota disabled will cause the following NULL pointer dereference: BTRFS info (device dm-5): has skinny extents BUG: kernel NULL pointer dereference, address: 0000000000000018 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page CPU: 7 PID: 637 Comm: umount Not tainted 5.8.0-rc7-next-20200731-custom #76 RIP: 0010:kobject_del+0x6/0x20 Call Trace: btrfs_sysfs_del_qgroups+0xac/0xf0 [btrfs] btrfs_free_qgroup_config+0x63/0x70 [btrfs] close_ctree+0x1f5/0x323 [btrfs] btrfs_put_super+0x15/0x17 [btrfs] generic_shutdown_super+0x72/0x110 kill_anon_super+0x18/0x30 btrfs_kill_super+0x17/0x30 [btrfs] deactivate_locked_super+0x3b/0xa0 deactivate_super+0x40/0x50 cleanup_mnt+0x135/0x190 __cleanup_mnt+0x12/0x20 task_work_run+0x64/0xb0 exit_to_user_mode_prepare+0x18a/0x190 syscall_exit_to_user_mode+0x4f/0x270 do_syscall_64+0x45/0x50 entry_SYSCALL_64_after_hwframe+0x44/0xa9 ---[ end trace 37b7adca5c1d5c5d ]--- [CAUSE] Commit 079ad2fb4bf9 ("kobject: Avoid premature parent object freeing in kobject_cleanup()") changed kobject_del() that it no longer accepts NULL pointer. Before that commit, kobject_del() and kobject_put() all accept NULL pointers and just ignore such NULL pointers. But that mentioned commit needs to access the parent node, killing the old NULL pointer behavior. Unfortunately btrfs is relying on that hidden feature thus we will trigger such NULL pointer dereference. [FIX] Instead of just saving several lines, do proper fs_info->qgroups_kobj check before calling kobject_del() and kobject_put(). Reviewed-by: Nikolay Borisov Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/sysfs.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 104c80caaa744..c8df2edafd855 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -1565,9 +1565,11 @@ void btrfs_sysfs_del_qgroups(struct btrfs_fs_info *fs_info) rbtree_postorder_for_each_entry_safe(qgroup, next, &fs_info->qgroup_tree, node) btrfs_sysfs_del_one_qgroup(fs_info, qgroup); - kobject_del(fs_info->qgroups_kobj); - kobject_put(fs_info->qgroups_kobj); - fs_info->qgroups_kobj = NULL; + if (fs_info->qgroups_kobj) { + kobject_del(fs_info->qgroups_kobj); + kobject_put(fs_info->qgroups_kobj); + fs_info->qgroups_kobj = NULL; + } } /* Called when qgroups get initialized, thus there is no need for locking */ From 881a3a11c2b858fe9b69ef79ac5ee9978a266dc9 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Mon, 3 Aug 2020 11:35:06 +0200 Subject: [PATCH 09/10] btrfs: fix return value mixup in btrfs_get_extent btrfs_get_extent() sets variable ret, but out: error path expect error to be in variable err so the error code is lost. Fixes: 6bf9e4bd6a27 ("btrfs: inode: Verify inode mode to avoid NULL pointer dereference") CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Nikolay Borisov Signed-off-by: Pavel Machek (CIP) Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9988d754e465b..16ce82039dcf1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6627,7 +6627,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, extent_type == BTRFS_FILE_EXTENT_PREALLOC) { /* Only regular file could have regular/prealloc extent */ if (!S_ISREG(inode->vfs_inode.i_mode)) { - ret = -EUCLEAN; + err = -EUCLEAN; btrfs_crit(fs_info, "regular/prealloc extent found for non-regular inode %llu", btrfs_ino(inode)); From c57dd1f2f6a7cd1bb61802344f59ccdc5278c983 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 31 Jul 2020 19:29:11 +0800 Subject: [PATCH 10/10] btrfs: trim: fix underflow in trim length to prevent access beyond device boundary [BUG] The following script can lead to tons of beyond device boundary access: mkfs.btrfs -f $dev -b 10G mount $dev $mnt trimfs $mnt btrfs filesystem resize 1:-1G $mnt trimfs $mnt [CAUSE] Since commit 929be17a9b49 ("btrfs: Switch btrfs_trim_free_extents to find_first_clear_extent_bit"), we try to avoid trimming ranges that's already trimmed. So we check device->alloc_state by finding the first range which doesn't have CHUNK_TRIMMED and CHUNK_ALLOCATED not set. But if we shrunk the device, that bits are not cleared, thus we could easily got a range starts beyond the shrunk device size. This results the returned @start and @end are all beyond device size, then we call "end = min(end, device->total_bytes -1);" making @end smaller than device size. Then finally we goes "len = end - start + 1", totally underflow the result, and lead to the beyond-device-boundary access. [FIX] This patch will fix the problem in two ways: - Clear CHUNK_TRIMMED | CHUNK_ALLOCATED bits when shrinking device This is the root fix - Add extra safety check when trimming free device extents We check and warn if the returned range is already beyond current device. Link: https://github.com/kdave/btrfs-progs/issues/282 Fixes: 929be17a9b49 ("btrfs: Switch btrfs_trim_free_extents to find_first_clear_extent_bit") CC: stable@vger.kernel.org # 5.4+ Signed-off-by: Qu Wenruo Reviewed-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/extent-io-tree.h | 2 ++ fs/btrfs/extent-tree.c | 14 ++++++++++++++ fs/btrfs/volumes.c | 4 ++++ 3 files changed, 20 insertions(+) diff --git a/fs/btrfs/extent-io-tree.h b/fs/btrfs/extent-io-tree.h index f39d47a2d01a6..219a09a2b7340 100644 --- a/fs/btrfs/extent-io-tree.h +++ b/fs/btrfs/extent-io-tree.h @@ -34,6 +34,8 @@ struct io_failure_record; */ #define CHUNK_ALLOCATED EXTENT_DIRTY #define CHUNK_TRIMMED EXTENT_DEFRAG +#define CHUNK_STATE_MASK (CHUNK_ALLOCATED | \ + CHUNK_TRIMMED) enum { IO_TREE_FS_PINNED_EXTENTS, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 61ede335f6c37..de6fe176fdfb3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -33,6 +33,7 @@ #include "delalloc-space.h" #include "block-group.h" #include "discard.h" +#include "rcu-string.h" #undef SCRAMBLE_DELAYED_REFS @@ -5668,6 +5669,19 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed) &start, &end, CHUNK_TRIMMED | CHUNK_ALLOCATED); + /* Check if there are any CHUNK_* bits left */ + if (start > device->total_bytes) { + WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); + btrfs_warn_in_rcu(fs_info, +"ignoring attempt to trim beyond device size: offset %llu length %llu device %s device size %llu", + start, end - start + 1, + rcu_str_deref(device->name), + device->total_bytes); + mutex_unlock(&fs_info->chunk_mutex); + ret = 0; + break; + } + /* Ensure we skip the reserved area in the first 1M */ start = max_t(u64, start, SZ_1M); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d7670e2a9f393..ee96c5869f57e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4720,6 +4720,10 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) } mutex_lock(&fs_info->chunk_mutex); + /* Clear all state bits beyond the shrunk device size */ + clear_extent_bits(&device->alloc_state, new_size, (u64)-1, + CHUNK_STATE_MASK); + btrfs_device_set_disk_total_bytes(device, new_size); if (list_empty(&device->post_commit_list)) list_add_tail(&device->post_commit_list,