From d62922ba3cfc01dc42e853f90b93c525751e9383 Mon Sep 17 00:00:00 2001 From: Gabriel Shahrouzi Date: Sat, 12 Apr 2025 14:39:33 -0400 Subject: [PATCH 01/11] bcachefs: Prevent granting write refs when filesystem is read-only Fix a shutdown WARNING in bch2_dev_free caused by active write I/O references (ca->io_ref[WRITE]) on a device being freed. The problem occurs when: - The filesystem is marked read-only (BCH_FS_rw clear in c->flags). - A subsequent operation (e.g., error handling for device removal) incorrectly tries to grant write references back to a device. - During final shutdown, the read-only flag causes the system to skip stopping write I/O references (bch2_dev_io_ref_stop(ca, WRITE)). - The leftover active write reference triggers the WARN_ON in bch2_dev_free. Prevent this by checking if the filesystem is read-only before attempting to grant write references to a device in the problematic code path. Ensure consistency between the filesystem state flag and the device I/O reference state during shutdown. Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index b79e80a435e0..788e870bfef6 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1757,7 +1757,8 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) up_write(&c->state_lock); return 0; err: - if (ca->mi.state == BCH_MEMBER_STATE_rw && + if (test_bit(BCH_FS_rw, &c->flags) && + ca->mi.state == BCH_MEMBER_STATE_rw && !percpu_ref_is_zero(&ca->io_ref[READ])) __bch2_dev_read_write(c, ca); up_write(&c->state_lock); From 806776ad9c20c6589f957212ef017e75760a08cd Mon Sep 17 00:00:00 2001 From: Alan Huang Date: Sat, 12 Apr 2025 18:20:49 +0800 Subject: [PATCH 02/11] bcachefs: Add missing error handling Reported-by: syzbot+d10151bf01574a09a915@syzkaller.appspotmail.com Signed-off-by: Alan Huang Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index d2b07f602da9..606d684e6f23 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -1125,7 +1125,10 @@ int bch2_fs_initialize(struct bch_fs *c) * journal_res_get() will crash if called before this has * set up the journal.pin FIFO and journal.cur pointer: */ - bch2_fs_journal_start(&c->journal, 1); + ret = bch2_fs_journal_start(&c->journal, 1); + if (ret) + goto err; + set_bit(BCH_FS_accounting_replay_done, &c->flags); bch2_journal_set_replay_done(&c->journal); From 7dfd42a07acfeaac4e36620927ea227b1e8da3e9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 9 Apr 2025 21:04:17 -0400 Subject: [PATCH 03/11] bcachefs: Don't print data read retry success on non-errors We may end up in the data read retry path when reading cached data and racing with invalidation, or on checksum error when we were reading into a userspace buffer that might have been modified while the read was in flight. These aren't real errors, so we shouldn't print the 'retry success' message. Signed-off-by: Kent Overstreet --- fs/bcachefs/io_read.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index fd627c8d1053..de8ccd593ec7 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -487,6 +487,8 @@ static void bch2_rbio_retry(struct work_struct *work) .inum = rbio->read_pos.inode, }; struct bch_io_failures failed = { .nr = 0 }; + int orig_error = rbio->ret; + struct btree_trans *trans = bch2_trans_get(c); trace_io_read_retry(&rbio->bio); @@ -519,7 +521,9 @@ static void bch2_rbio_retry(struct work_struct *work) if (ret) { rbio->ret = ret; rbio->bio.bi_status = BLK_STS_IOERR; - } else { + } else if (orig_error != -BCH_ERR_data_read_retry_csum_err_maybe_userspace && + orig_error != -BCH_ERR_data_read_ptr_stale_race && + !failed.nr) { struct printbuf buf = PRINTBUF; lockrestart_do(trans, From 345731a389fa145c76bedebec6e4f3db4cb29486 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 13 Apr 2025 14:53:34 -0400 Subject: [PATCH 04/11] bcachefs: fix bch2_dev_usage_full_read_fast() One reference to bch_dev_usage wasn't updated, which meant we weren't reading the full bch_dev_usage_full - oops. Fixes: 955ba7b5ea03 ("bcachefs: bch_dev_usage_full") Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 3 ++- fs/bcachefs/buckets.h | 5 ----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index fea61e60a9ee..4ef261e8db4f 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -37,7 +37,8 @@ void bch2_dev_usage_read_fast(struct bch_dev *ca, struct bch_dev_usage *usage) void bch2_dev_usage_full_read_fast(struct bch_dev *ca, struct bch_dev_usage_full *usage) { memset(usage, 0, sizeof(*usage)); - acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage, dev_usage_u64s()); + acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage, + sizeof(struct bch_dev_usage_full) / sizeof(u64)); } static u64 reserve_factor(u64 r) diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 1c38b165f48b..8d75b27a1418 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -242,11 +242,6 @@ static inline u64 dev_buckets_available(struct bch_dev *ca, /* Filesystem usage: */ -static inline unsigned dev_usage_u64s(void) -{ - return sizeof(struct bch_dev_usage) / sizeof(u64); -} - struct bch_fs_usage_short bch2_fs_usage_read_short(struct bch_fs *); From 8692c7db9a66c5efe7956afc48d21bc994289d95 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 13 Apr 2025 16:28:41 -0400 Subject: [PATCH 05/11] bcachefs: btree_root_unreadable_and_scan_found_nothing now AUTOFIX This will likely mean that the btree had only one node - there was nothing or almost nothing in it, and we should reconstruct and continue. Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-errors_format.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 5d43e3504386..512c56ee5d94 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -290,7 +290,7 @@ enum bch_fsck_flags { x(btree_node_bkey_bad_u64s, 260, 0) \ x(btree_node_topology_empty_interior_node, 261, 0) \ x(btree_ptr_v2_min_key_bad, 262, 0) \ - x(btree_root_unreadable_and_scan_found_nothing, 263, 0) \ + x(btree_root_unreadable_and_scan_found_nothing, 263, FSCK_AUTOFIX) \ x(snapshot_node_missing, 264, 0) \ x(dup_backpointer_to_bad_csum_extent, 265, 0) \ x(btree_bitmap_not_marked, 266, FSCK_AUTOFIX) \ From a06459657e4ecc4b35a2ee7a9bf5359ecfb077cf Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 15 Apr 2025 08:58:22 -0400 Subject: [PATCH 06/11] bcachefs: Silence extent_poisoned error messages extent poisoning is partly so that we don't keep spewing the dmesg log when we've got unreadable data - we don't want to print these. Signed-off-by: Kent Overstreet --- fs/bcachefs/errcode.h | 2 +- fs/bcachefs/extents.c | 2 +- fs/bcachefs/io_read.c | 18 ++++++++++-------- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index c8696f01eb14..a615e4852ded 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -287,7 +287,7 @@ x(EIO, mark_stripe) \ x(EIO, stripe_reconstruct) \ x(EIO, key_type_error) \ - x(EIO, extent_poisened) \ + x(EIO, extent_poisoned) \ x(EIO, missing_indirect_extent) \ x(EIO, invalidate_stripe_to_dev) \ x(EIO, no_encryption_key) \ diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index ae7c7a177e10..dca2b8425cc0 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -139,7 +139,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) - return -BCH_ERR_extent_poisened; + return -BCH_ERR_extent_poisoned; rcu_read_lock(); const union bch_extent_entry *entry; diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index de8ccd593ec7..def4a26a3b45 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -1349,14 +1349,16 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio, bch2_trans_iter_exit(trans, &iter); - if (ret) { - struct printbuf buf = PRINTBUF; - lockrestart_do(trans, - bch2_inum_offset_err_msg_trans(trans, &buf, inum, - bvec_iter.bi_sector << 9)); - prt_printf(&buf, "read error: %s", bch2_err_str(ret)); - bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); + if (unlikely(ret)) { + if (ret != -BCH_ERR_extent_poisoned) { + struct printbuf buf = PRINTBUF; + lockrestart_do(trans, + bch2_inum_offset_err_msg_trans(trans, &buf, inum, + bvec_iter.bi_sector << 9)); + prt_printf(&buf, "data read error: %s", bch2_err_str(ret)); + bch_err_ratelimited(c, "%s", buf.buf); + printbuf_exit(&buf); + } rbio->bio.bi_status = BLK_STS_IOERR; rbio->ret = ret; From 14bcf982f428cd98bababe30d4059f93d39a2f14 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 15 Apr 2025 08:44:40 -0400 Subject: [PATCH 07/11] bcachefs: Print version_incompat_allowed on startup Let users know if incompatible features aren't enabled Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 788e870bfef6..f1eef5e7698b 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1013,6 +1013,11 @@ static void print_mount_opts(struct bch_fs *c) bch2_opt_to_text(&p, c, c->disk_sb.sb, opt, v, OPT_SHOW_MOUNT_STYLE); } + if (c->sb.version_incompat_allowed != c->sb.version) { + prt_printf(&p, "\n allowing incompatible features above "); + bch2_version_to_text(&p, c->sb.version_incompat_allowed); + } + bch_info(c, "%s", p.buf); printbuf_exit(&p); } From c3b02e6d67acb9893b6c219d32cdc75a28ffea65 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 15 Apr 2025 09:27:22 -0400 Subject: [PATCH 08/11] bcachefs: Log message when incompat version requested but not enabled Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 2 ++ fs/bcachefs/fs-ioctl.c | 2 +- fs/bcachefs/super-io.c | 20 ++++++++++++++++++-- fs/bcachefs/super.c | 1 + 4 files changed, 22 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 5cb0fc384ac0..75f7408da173 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -788,6 +788,8 @@ struct bch_fs { unsigned long errors_silent[BITS_TO_LONGS(BCH_FSCK_ERR_MAX)]; u64 btrees_lost_data; } sb; + DARRAY(enum bcachefs_metadata_version) + incompat_versions_requested; #ifdef CONFIG_UNICODE struct unicode_map *cf_encoding; diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index c1553e44e049..14886e1d4d6d 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -69,7 +69,7 @@ static int bch2_inode_flags_set(struct btree_trans *trans, if (ret < 0) return ret; - ret = bch2_request_incompat_feature(c,bcachefs_metadata_version_casefolding); + ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding); if (ret) return ret; diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index e27422b6d9c6..25b6bce05c3c 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -73,14 +73,30 @@ int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version v ? 0 : -BCH_ERR_may_not_use_incompat_feature; + mutex_lock(&c->sb_lock); if (!ret) { - mutex_lock(&c->sb_lock); SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb, max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version)); bch2_write_super(c); - mutex_unlock(&c->sb_lock); + } else { + darray_for_each(c->incompat_versions_requested, i) + if (version == *i) + goto out; + + darray_push(&c->incompat_versions_requested, version); + struct printbuf buf = PRINTBUF; + prt_str(&buf, "requested incompat feature "); + bch2_version_to_text(&buf, version); + prt_str(&buf, " currently not enabled"); + prt_printf(&buf, "\n set version_upgrade=incompat to enable"); + + bch_notice(c, "%s", buf.buf); + printbuf_exit(&buf); } +out: + mutex_unlock(&c->sb_lock); + return ret; } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index f1eef5e7698b..e8a17ed1615d 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -589,6 +589,7 @@ static void __bch2_fs_free(struct bch_fs *c) free_percpu(c->online_reserved); } + darray_exit(&c->incompat_versions_requested); darray_exit(&c->btree_roots_extra); free_percpu(c->pcpu); free_percpu(c->usage); From 72b5259053903552ee0314eb422c990c29eb8546 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 15 Apr 2025 16:58:43 -0400 Subject: [PATCH 09/11] bcachefs: snapshot_node_missing is now autofix Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-errors_format.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 512c56ee5d94..dc53d25c7cbb 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -291,7 +291,7 @@ enum bch_fsck_flags { x(btree_node_topology_empty_interior_node, 261, 0) \ x(btree_ptr_v2_min_key_bad, 262, 0) \ x(btree_root_unreadable_and_scan_found_nothing, 263, FSCK_AUTOFIX) \ - x(snapshot_node_missing, 264, 0) \ + x(snapshot_node_missing, 264, FSCK_AUTOFIX) \ x(dup_backpointer_to_bad_csum_extent, 265, 0) \ x(btree_bitmap_not_marked, 266, FSCK_AUTOFIX) \ x(sb_clean_entry_overrun, 267, 0) \ From 8dd3804bf409095901b2b44b70e2e082f726f556 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 16 Apr 2025 18:28:25 -0400 Subject: [PATCH 10/11] bcachefs: Add missing READ_ONCE() for metadata replicas If we race with the user changing the metadata_replicas setting, this could cause us to get an incorrectly sized disk reservation. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update_interior.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 55fbeeb8eaaa..44b5fe430370 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1221,7 +1221,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, ret = bch2_disk_reservation_get(c, &as->disk_res, (nr_nodes[0] + nr_nodes[1]) * btree_sectors(c), - c->opts.metadata_replicas, + READ_ONCE(c->opts.metadata_replicas), disk_res_flags); if (ret) goto err; From 261592ba06aa44001ab95fd47bafa4225bab25cf Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 17 Apr 2025 14:09:56 -0400 Subject: [PATCH 11/11] bcachefs: Fix snapshotting a subvolume, then renaming it Subvolume roots and the dirents that point to them are special; they don't obey the normal snapshot versioning rules because they cross snapshot boundaries. We don't keep around older versions of subvolume dirents on rename - we don't need to, because subvolume dirents are only visible in the parent subvolume, and we wouldn't be able to match up the different dirent and inode versions due to crossing the snapshot ID boundary. That means that when we rename a subvolume, that's been snapshotted, the older version of the subvolume root will become dangling - it won't have a dirent that points to it. That's expected, we just need to tell fsck that this is ok. Fixes: https://github.com/koverstreet/bcachefs/issues/856 Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 18308f3d64a1..7b25cedd3e40 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -321,6 +321,31 @@ static inline bool inode_should_reattach(struct bch_inode_unpacked *inode) inode->bi_subvol == BCACHEFS_ROOT_SUBVOL) return false; + /* + * Subvolume roots are special: older versions of subvolume roots may be + * disconnected, it's only the newest version that matters. + * + * We only keep a single dirent pointing to a subvolume root, i.e. + * older versions of snapshots will not have a different dirent pointing + * to the same subvolume root. + * + * This is because dirents that point to subvolumes are only visible in + * the parent subvolume - versioning is not needed - and keeping them + * around would break fsck, because when we're crossing subvolumes we + * don't have a consistent snapshot ID to do check the inode <-> dirent + * relationships. + * + * Thus, a subvolume root that's been renamed after a snapshot will have + * a disconnected older version - that's expected. + * + * Note that taking a snapshot always updates the root inode (to update + * the dirent backpointer), so a subvolume root inode with + * BCH_INODE_has_child_snapshot is never visible. + */ + if (inode->bi_subvol && + (inode->bi_flags & BCH_INODE_has_child_snapshot)) + return false; + return !inode->bi_dir && !(inode->bi_flags & BCH_INODE_unlinked); } @@ -1007,6 +1032,23 @@ static int check_inode_dirent_inode(struct btree_trans *trans, if (ret && !bch2_err_matches(ret, ENOENT)) return ret; + if ((ret || dirent_points_to_inode_nowarn(d, inode)) && + inode->bi_subvol && + (inode->bi_flags & BCH_INODE_has_child_snapshot)) { + /* Older version of a renamed subvolume root: we won't have a + * correct dirent for it. That's expected, see + * inode_should_reattach(). + * + * We don't clear the backpointer field when doing the rename + * because there might be arbitrarily many versions in older + * snapshots. + */ + inode->bi_dir = 0; + inode->bi_dir_offset = 0; + *write_inode = true; + goto out; + } + if (fsck_err_on(ret, trans, inode_points_to_missing_dirent, "inode points to missing dirent\n%s", @@ -1027,7 +1069,7 @@ static int check_inode_dirent_inode(struct btree_trans *trans, inode->bi_dir_offset = 0; *write_inode = true; } - +out: ret = 0; fsck_err: bch2_trans_iter_exit(trans, &dirent_iter);