From 72e71bf0298c7ed985bcd0d3c7ff4ca19de60373 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Mon, 6 May 2024 10:14:13 -0400
Subject: [PATCH 01/20] bcachefs: Fix a scheduler splat in
 __bch2_next_write_buffer_flush_journal_buf()

We're using mutex_lock() inside a wait_event() conditional -
prepare_to_wait() has already flipped task state, so potentially
blocking ops need annotation.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/journal.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 9c9a25dbd6137..9c2af544251af 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -870,6 +870,8 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou
 {
 	struct journal_buf *ret = NULL;
 
+	/* We're inside wait_event(), but using mutex_lock(: */
+	sched_annotate_sleep();
 	mutex_lock(&j->buf_lock);
 	spin_lock(&j->lock);
 	max_seq = min(max_seq, journal_cur_seq(j));

From 7ffec9ccdc6ad8356792f9a7823b1fe9c8a10cbf Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Fri, 3 May 2024 10:55:17 -0400
Subject: [PATCH 02/20] bcachefs: don't free error pointers

Reported-by: syzbot+3333603f569fc2ef258c@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/recovery.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index be5b476193270..8091d0686029f 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -902,7 +902,8 @@ int bch2_fs_recovery(struct bch_fs *c)
 		bch2_journal_keys_put_initial(c);
 		bch2_find_btree_nodes_exit(&c->found_btree_nodes);
 	}
-	kfree(clean);
+	if (!IS_ERR(clean))
+		kfree(clean);
 
 	if (!ret &&
 	    test_bit(BCH_FS_need_delete_dead_snapshots, &c->flags) &&

From a2ddaf965f6a15c316f483e7446fbe3d81fba27c Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Fri, 3 May 2024 11:06:54 -0400
Subject: [PATCH 03/20] bcachefs: bucket_pos_to_bp_noerror()

We don't want the assert when we're checking if the backpointer is
valid.

Reported-by: syzbot+bf7215c0525098e7747a@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/backpointers.c |  2 +-
 fs/bcachefs/backpointers.h | 14 ++++++++++----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index a200442010025..af7a71de1bdfe 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -54,7 +54,7 @@ int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k,
 	int ret = 0;
 
 	bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size ||
-			 !bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
+			 !bpos_eq(bp.k->p, bucket_pos_to_bp_noerror(ca, bucket, bp.v->bucket_offset)),
 			 c, err,
 			 backpointer_bucket_offset_wrong,
 			 "backpointer bucket_offset wrong");
diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h
index 85949b9fd880c..c1b274eadda14 100644
--- a/fs/bcachefs/backpointers.h
+++ b/fs/bcachefs/backpointers.h
@@ -45,6 +45,15 @@ static inline struct bpos bp_pos_to_bucket(const struct bch_fs *c,
 	return POS(bp_pos.inode, sector_to_bucket(ca, bucket_sector));
 }
 
+static inline struct bpos bucket_pos_to_bp_noerror(const struct bch_dev *ca,
+						   struct bpos bucket,
+						   u64 bucket_offset)
+{
+	return POS(bucket.inode,
+		   (bucket_to_sector(ca, bucket.offset) <<
+		    MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
+}
+
 /*
  * Convert from pos in alloc btree + bucket offset to pos in backpointer btree:
  */
@@ -53,10 +62,7 @@ static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c,
 					   u64 bucket_offset)
 {
 	struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode);
-	struct bpos ret = POS(bucket.inode,
-			      (bucket_to_sector(ca, bucket.offset) <<
-			       MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
-
+	struct bpos ret = bucket_pos_to_bp_noerror(ca, bucket, bucket_offset);
 	EBUG_ON(!bkey_eq(bucket, bp_pos_to_bucket(c, ret)));
 	return ret;
 }

From b30b70ad8bffcde513d34d525820ec411f48e3d7 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Fri, 3 May 2024 11:39:53 -0400
Subject: [PATCH 04/20] bcachefs: Fix early error path in
 bch2_fs_btree_key_cache_exit()

Reported-by: syzbot+a35cdb62ec34d44fb062@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/btree_key_cache.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
index e8c1c530cd95f..7dafa1accec22 100644
--- a/fs/bcachefs/btree_key_cache.c
+++ b/fs/bcachefs/btree_key_cache.c
@@ -956,13 +956,15 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
 	}
 
 #ifdef __KERNEL__
-	for_each_possible_cpu(cpu) {
-		struct btree_key_cache_freelist *f =
-			per_cpu_ptr(bc->pcpu_freed, cpu);
-
-		for (i = 0; i < f->nr; i++) {
-			ck = f->objs[i];
-			list_add(&ck->list, &items);
+	if (bc->pcpu_freed) {
+		for_each_possible_cpu(cpu) {
+			struct btree_key_cache_freelist *f =
+				per_cpu_ptr(bc->pcpu_freed, cpu);
+
+			for (i = 0; i < f->nr; i++) {
+				ck = f->objs[i];
+				list_add(&ck->list, &items);
+			}
 		}
 	}
 #endif

From 4a8521b6bb81abba9d80d60b80908c77c9236ced Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Fri, 3 May 2024 11:31:22 -0400
Subject: [PATCH 05/20] bcachefs: Inodes need extra padding for
 varint_decode_fast()

Reported-by: syzbot+66b9b74f6520068596a9@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/io_write.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
index f137252bccc57..b72cf31f72743 100644
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -199,9 +199,6 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
 						    u64 new_i_size,
 						    s64 i_sectors_delta)
 {
-	struct btree_iter iter;
-	struct bkey_i *k;
-	struct bkey_i_inode_v3 *inode;
 	/*
 	 * Crazy performance optimization:
 	 * Every extent update needs to also update the inode: the inode trigger
@@ -214,25 +211,36 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
 	 * lost, but that's fine.
 	 */
 	unsigned inode_update_flags = BTREE_UPDATE_NOJOURNAL;
-	int ret;
 
-	k = bch2_bkey_get_mut_noupdate(trans, &iter, BTREE_ID_inodes,
+	struct btree_iter iter;
+	struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
 			      SPOS(0,
 				   extent_iter->pos.inode,
 				   extent_iter->snapshot),
 			      BTREE_ITER_CACHED);
-	ret = PTR_ERR_OR_ZERO(k);
+	int ret = bkey_err(k);
 	if (unlikely(ret))
 		return ret;
 
-	if (unlikely(k->k.type != KEY_TYPE_inode_v3)) {
-		k = bch2_inode_to_v3(trans, k);
-		ret = PTR_ERR_OR_ZERO(k);
+	/*
+	 * varint_decode_fast(), in the inode .invalid method, reads up to 7
+	 * bytes past the end of the buffer:
+	 */
+	struct bkey_i *k_mut = bch2_trans_kmalloc_nomemzero(trans, bkey_bytes(k.k) + 8);
+	ret = PTR_ERR_OR_ZERO(k_mut);
+	if (unlikely(ret))
+		goto err;
+
+	bkey_reassemble(k_mut, k);
+
+	if (unlikely(k_mut->k.type != KEY_TYPE_inode_v3)) {
+		k_mut = bch2_inode_to_v3(trans, k_mut);
+		ret = PTR_ERR_OR_ZERO(k_mut);
 		if (unlikely(ret))
 			goto err;
 	}
 
-	inode = bkey_i_to_inode_v3(k);
+	struct bkey_i_inode_v3 *inode = bkey_i_to_inode_v3(k_mut);
 
 	if (!(le64_to_cpu(inode->v.bi_flags) & BCH_INODE_i_size_dirty) &&
 	    new_i_size > le64_to_cpu(inode->v.bi_size)) {

From feb077c1774e559cddfc41ef26c864780d158fe2 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Fri, 3 May 2024 17:13:21 -0400
Subject: [PATCH 06/20] bcachefs: Fix refcount put in sb_field_resize error
 path

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/super-io.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index 08ea3dbbbe97c..e7527d551e3c8 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -232,7 +232,7 @@ struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb,
 			struct bch_sb_handle *dev_sb = &ca->disk_sb;
 
 			if (bch2_sb_realloc(dev_sb, le32_to_cpu(dev_sb->sb->u64s) + d)) {
-				percpu_ref_put(&ca->ref);
+				percpu_ref_put(&ca->io_ref);
 				return NULL;
 			}
 		}

From 1267df40acb2da62b1641abae26132411d093fb3 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sat, 4 May 2024 12:29:46 -0400
Subject: [PATCH 07/20] bcachefs: Initialize bch_write_op->failed in inline
 data path

Normally this is initialized in __bch2_write(), which is executed in a
loop, but the inline data path skips this.

Reported-by: syzbot+fd3ccb331eb21f05d13b@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/io_write.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
index b72cf31f72743..40d7df7607dfd 100644
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -1513,6 +1513,8 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
 	unsigned sectors;
 	int ret;
 
+	memset(&op->failed, 0, sizeof(op->failed));
+
 	op->flags |= BCH_WRITE_WROTE_DATA_INLINE;
 	op->flags |= BCH_WRITE_DONE;
 

From 3a2d0259274202432e5119463dd4cf5f9fabed98 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sat, 4 May 2024 12:51:49 -0400
Subject: [PATCH 08/20] bcachefs: Fix bch2_dev_lookup() refcounting

bch2_dev_lookup() is supposed to take a ref on the device it returns, but
for_each_member_device() takes refs as it iterates,
for_each_member_device_rcu() does not.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/super.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 88e214c609bb2..c2c80e6890aee 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -2004,13 +2004,9 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
 /* return with ref on ca->ref: */
 struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *name)
 {
-	rcu_read_lock();
-	for_each_member_device_rcu(c, ca, NULL)
-		if (!strcmp(name, ca->name)) {
-			rcu_read_unlock();
+	for_each_member_device(c, ca)
+		if (!strcmp(name, ca->name))
 			return ca;
-		}
-	rcu_read_unlock();
 	return ERR_PTR(-BCH_ERR_ENOENT_dev_not_found);
 }
 

From 18b4abcead744322feb90ba31450e7d770e928bd Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sat, 4 May 2024 12:55:44 -0400
Subject: [PATCH 09/20] bcachefs: Fix lifetime issue in device iterator helpers

bch2_get_next_dev() and bch2_get_next_online_dev() iterate over devices,
dropping and taking refs as they go; we can't access the previous device
(for ca->dev_idx) after we've dropped our ref to it, unless we take
rcu_read_lock() first.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/sb-members.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h
index 5efa64eca5f85..5bf27d30ca296 100644
--- a/fs/bcachefs/sb-members.h
+++ b/fs/bcachefs/sb-members.h
@@ -107,10 +107,10 @@ static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, struct bch_dev *
 
 static inline struct bch_dev *bch2_get_next_dev(struct bch_fs *c, struct bch_dev *ca)
 {
+	rcu_read_lock();
 	if (ca)
 		percpu_ref_put(&ca->ref);
 
-	rcu_read_lock();
 	if ((ca = __bch2_next_dev(c, ca, NULL)))
 		percpu_ref_get(&ca->ref);
 	rcu_read_unlock();
@@ -132,10 +132,10 @@ static inline struct bch_dev *bch2_get_next_online_dev(struct bch_fs *c,
 						       struct bch_dev *ca,
 						       unsigned state_mask)
 {
+	rcu_read_lock();
 	if (ca)
 		percpu_ref_put(&ca->io_ref);
 
-	rcu_read_lock();
 	while ((ca = __bch2_next_dev(c, ca, NULL)) &&
 	       (!((1 << ca->mi.state) & state_mask) ||
 		!percpu_ref_tryget(&ca->io_ref)))

From db42549d402cb44fe67c95d08f1a9ea902d32e7e Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sat, 4 May 2024 13:26:37 -0400
Subject: [PATCH 10/20] bcachefs: Add a better limit for maximum number of
 buckets

The bucket_gens array is a single array allocation (one byte per
bucket), and kernel allocations are still limited to INT_MAX.

Check this limit to avoid failing the bucket_gens array allocation.

Reported-by: syzbot+b29f436493184ea42e2b@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/bcachefs_format.h | 6 ++++++
 fs/bcachefs/errcode.h         | 1 +
 fs/bcachefs/sb-members.c      | 6 +++---
 fs/bcachefs/super.c           | 7 +++++++
 4 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index f7fbfccd2b1e4..8345a2b2d05be 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -591,6 +591,12 @@ struct bch_member {
 	__le64			btree_allocated_bitmap;
 };
 
+/*
+ * This limit comes from the bucket_gens array - it's a single allocation, and
+ * kernel allocation are limited to INT_MAX
+ */
+#define BCH_MEMBER_NBUCKETS_MAX	(INT_MAX - 64)
+
 #define BCH_MEMBER_V1_BYTES	56
 
 LE64_BITMASK(BCH_MEMBER_STATE,		struct bch_member, flags,  0,  4)
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index 01a79fa3eacb2..dbe35b80bc0b8 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -175,6 +175,7 @@
 	x(EINVAL,			block_size_too_small)			\
 	x(EINVAL,			bucket_size_too_small)			\
 	x(EINVAL,			device_size_too_small)			\
+	x(EINVAL,			device_size_too_big)			\
 	x(EINVAL,			device_not_a_member_of_filesystem)	\
 	x(EINVAL,			device_has_been_removed)		\
 	x(EINVAL,			device_splitbrain)			\
diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c
index 5b8e621ac5eb5..44b3f0cb7b497 100644
--- a/fs/bcachefs/sb-members.c
+++ b/fs/bcachefs/sb-members.c
@@ -124,9 +124,9 @@ static int validate_member(struct printbuf *err,
 			   struct bch_sb *sb,
 			   int i)
 {
-	if (le64_to_cpu(m.nbuckets) > LONG_MAX) {
-		prt_printf(err, "device %u: too many buckets (got %llu, max %lu)",
-			   i, le64_to_cpu(m.nbuckets), LONG_MAX);
+	if (le64_to_cpu(m.nbuckets) > BCH_MEMBER_NBUCKETS_MAX) {
+		prt_printf(err, "device %u: too many buckets (got %llu, max %u)",
+			   i, le64_to_cpu(m.nbuckets), BCH_MEMBER_NBUCKETS_MAX);
 		return -BCH_ERR_invalid_sb_members;
 	}
 
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index c2c80e6890aee..dddf57ec4511f 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -1959,6 +1959,13 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
 		goto err;
 	}
 
+	if (nbuckets > BCH_MEMBER_NBUCKETS_MAX) {
+		bch_err(ca, "New device size too big (%llu greater than max %u)",
+			nbuckets, BCH_MEMBER_NBUCKETS_MAX);
+		ret = -BCH_ERR_device_size_too_big;
+		goto err;
+	}
+
 	if (bch2_dev_is_online(ca) &&
 	    get_capacity(ca->disk_sb.bdev->bd_disk) <
 	    ca->mi.bucket_size * nbuckets) {

From 9a0ec045110dbaad4b8d609142b534f913354101 Mon Sep 17 00:00:00 2001
From: Reed Riley <reed@riley.engineer>
Date: Sat, 4 May 2024 22:12:23 +0000
Subject: [PATCH 11/20] bcachefs: fix overflow in fiemap

filefrag (and potentially other utilities that call fiemap) sometimes
pass ULONG_MAX as the length.  fiemap_prep clamps excessively large
lengths - but the calculation of end can overflow if it occurs before
calling fiemap_prep.  When this happens, filefrag assumes it has read to
the end and exits.

Signed-off-by: Reed Riley <reed@riley.engineer>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/fs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index fce690007edfc..6f114803c6f23 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -964,7 +964,6 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
 	struct btree_iter iter;
 	struct bkey_s_c k;
 	struct bkey_buf cur, prev;
-	struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
 	unsigned offset_into_extent, sectors;
 	bool have_extent = false;
 	u32 snapshot;
@@ -974,6 +973,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
 	if (ret)
 		return ret;
 
+	struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
 	if (start + len < start)
 		return -EINVAL;
 

From 6b8cbfc3db7582d6f26c6b757d8e949174641709 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sun, 5 May 2024 22:02:28 -0400
Subject: [PATCH 12/20] bcachefs: Fix assert in bch2_alloc_v4_invalid()

Reported-by: syzbot+10827fa6b176e1acf1d0@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/alloc_background.c | 4 ++--
 fs/bcachefs/alloc_background.h | 8 ++++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index 4ff56fa4d5392..534ba2b02bd65 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -244,10 +244,10 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k,
 	struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k);
 	int ret = 0;
 
-	bkey_fsck_err_on(alloc_v4_u64s(a.v) > bkey_val_u64s(k.k), c, err,
+	bkey_fsck_err_on(alloc_v4_u64s_noerror(a.v) > bkey_val_u64s(k.k), c, err,
 			 alloc_v4_val_size_bad,
 			 "bad val size (%u > %zu)",
-			 alloc_v4_u64s(a.v), bkey_val_u64s(k.k));
+			 alloc_v4_u64s_noerror(a.v), bkey_val_u64s(k.k));
 
 	bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) &&
 			 BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), c, err,
diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h
index 052b2fac25d69..2790e516383d5 100644
--- a/fs/bcachefs/alloc_background.h
+++ b/fs/bcachefs/alloc_background.h
@@ -126,13 +126,17 @@ static inline struct bpos alloc_freespace_pos(struct bpos pos, struct bch_alloc_
 	return pos;
 }
 
-static inline unsigned alloc_v4_u64s(const struct bch_alloc_v4 *a)
+static inline unsigned alloc_v4_u64s_noerror(const struct bch_alloc_v4 *a)
 {
-	unsigned ret = (BCH_ALLOC_V4_BACKPOINTERS_START(a) ?:
+	return (BCH_ALLOC_V4_BACKPOINTERS_START(a) ?:
 			BCH_ALLOC_V4_U64s_V0) +
 		BCH_ALLOC_V4_NR_BACKPOINTERS(a) *
 		(sizeof(struct bch_backpointer) / sizeof(u64));
+}
 
+static inline unsigned alloc_v4_u64s(const struct bch_alloc_v4 *a)
+{
+	unsigned ret = alloc_v4_u64s_noerror(a);
 	BUG_ON(ret > U8_MAX - BKEY_U64s);
 	return ret;
 }

From f39055220f6f98a180e3503fe05bbf9921c425c8 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sun, 5 May 2024 22:28:00 -0400
Subject: [PATCH 13/20] bcachefs: Add missing validation for superblock section
 clean

We were forgetting to check for jset entries that overrun the end of the
section - both in validate and to_text(); to_text() needs to be safe for
types that fail to validate.

Reported-by: syzbot+c48865e11e7e893ec4ab@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/sb-clean.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c
index 35ca3f138de6f..194e55b11137b 100644
--- a/fs/bcachefs/sb-clean.c
+++ b/fs/bcachefs/sb-clean.c
@@ -278,6 +278,17 @@ static int bch2_sb_clean_validate(struct bch_sb *sb,
 		return -BCH_ERR_invalid_sb_clean;
 	}
 
+	for (struct jset_entry *entry = clean->start;
+	     entry != vstruct_end(&clean->field);
+	     entry = vstruct_next(entry)) {
+		if ((void *) vstruct_next(entry) > vstruct_end(&clean->field)) {
+			prt_str(err, "entry type ");
+			bch2_prt_jset_entry_type(err, le16_to_cpu(entry->type));
+			prt_str(err, " overruns end of section");
+			return -BCH_ERR_invalid_sb_clean;
+		}
+	}
+
 	return 0;
 }
 
@@ -295,6 +306,9 @@ static void bch2_sb_clean_to_text(struct printbuf *out, struct bch_sb *sb,
 	for (entry = clean->start;
 	     entry != vstruct_end(&clean->field);
 	     entry = vstruct_next(entry)) {
+		if ((void *) vstruct_next(entry) > vstruct_end(&clean->field))
+			break;
+
 		if (entry->type == BCH_JSET_ENTRY_btree_keys &&
 		    !entry->u64s)
 			continue;

From 2bb9600d5d4735953c47dd1ee99382c68dd04caa Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sun, 5 May 2024 22:33:05 -0400
Subject: [PATCH 14/20] bcachefs: Guard against unknown k.k->type in
 __bkey_invalid()

For forwards compatibility we have to allow unknown key types, and only
run the checks that make sense against them.

Fix a missing guard on k.k->type being known.

Reported-by: syzbot+ae4dc916da3ce51f284f@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/bkey_methods.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c
index db336a43fc083..a275a9e8e341a 100644
--- a/fs/bcachefs/bkey_methods.c
+++ b/fs/bcachefs/bkey_methods.c
@@ -171,8 +171,8 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
 	if (type >= BKEY_TYPE_NR)
 		return 0;
 
-	bkey_fsck_err_on((type == BKEY_TYPE_btree ||
-			  (flags & BKEY_INVALID_COMMIT)) &&
+	bkey_fsck_err_on(k.k->type < KEY_TYPE_MAX &&
+			 (type == BKEY_TYPE_btree || (flags & BKEY_INVALID_COMMIT)) &&
 			 !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err,
 			 bkey_invalid_type_for_btree,
 			 "invalid key type for btree %s (%s)",

From 0ec5b3b7ccfcdca02ab322abf86455d0050ae98f Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sun, 5 May 2024 22:44:27 -0400
Subject: [PATCH 15/20] bcachefs: Fix shift-by-64 in bformat_needs_redo()

Ancient versions of bcachefs produced packed formats that could
represent keys that our in memory format cannot represent;
bformat_needs_redo() has some tricky shifts to check for this sort of
overflow.

Reported-by: syzbot+594427aebfefeebe91c6@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/move.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index bf68ea49447b9..4d94b7742dbba 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -968,24 +968,30 @@ static bool migrate_btree_pred(struct bch_fs *c, void *arg,
 	return migrate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
 }
 
+/*
+ * Ancient versions of bcachefs produced packed formats which could represent
+ * keys that the in memory format cannot represent; this checks for those
+ * formats so we can get rid of them.
+ */
 static bool bformat_needs_redo(struct bkey_format *f)
 {
-	unsigned i;
-
-	for (i = 0; i < f->nr_fields; i++) {
+	for (unsigned i = 0; i < f->nr_fields; i++) {
+		unsigned f_bits = f->bits_per_field[i];
 		unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
 		u64 unpacked_mask = ~((~0ULL << 1) << (unpacked_bits - 1));
 		u64 field_offset = le64_to_cpu(f->field_offset[i]);
 
-		if (f->bits_per_field[i] > unpacked_bits)
+		if (f_bits > unpacked_bits)
 			return true;
 
-		if ((f->bits_per_field[i] == unpacked_bits) && field_offset)
+		if ((f_bits == unpacked_bits) && field_offset)
 			return true;
 
-		if (((field_offset + ((1ULL << f->bits_per_field[i]) - 1)) &
-		     unpacked_mask) <
-		    field_offset)
+		u64 f_mask = f_bits
+			? ~((~0ULL << (f_bits - 1)) << 1)
+			: 0;
+
+		if (((field_offset + f_mask) & unpacked_mask) < field_offset)
 			return true;
 	}
 

From 8060bf1d83f7d404bacb0e5a38f2d4d8f4c9dfb7 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sun, 5 May 2024 22:56:54 -0400
Subject: [PATCH 16/20] bcachefs: Fix snapshot_t() usage in
 bch2_fs_quota_read_inode()

bch2_fs_quota_read_inode() wasn't entirely updated to the
bch2_snapshot_tree() helper, which takes rcu lock.

Reported-by: syzbot+a3a9a61224ed3b7f0010@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/quota.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c
index e68b34eab90a9..556da07381069 100644
--- a/fs/bcachefs/quota.c
+++ b/fs/bcachefs/quota.c
@@ -560,13 +560,11 @@ static int bch2_fs_quota_read_inode(struct btree_trans *trans,
 	struct bch_fs *c = trans->c;
 	struct bch_inode_unpacked u;
 	struct bch_snapshot_tree s_t;
-	int ret;
+	u32 tree = bch2_snapshot_tree(c, k.k->p.snapshot);
 
-	ret = bch2_snapshot_tree_lookup(trans,
-			bch2_snapshot_tree(c, k.k->p.snapshot), &s_t);
+	int ret = bch2_snapshot_tree_lookup(trans, tree, &s_t);
 	bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
-			"%s: snapshot tree %u not found", __func__,
-			snapshot_t(c, k.k->p.snapshot)->tree);
+			"%s: snapshot tree %u not found", __func__, tree);
 	if (ret)
 		return ret;
 

From 88ab10186c44d0a8b90842beab8648b5fd14432d Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Mon, 6 May 2024 08:40:46 -0400
Subject: [PATCH 17/20] bcachefs: Add missing skcipher_request_set_callback()
 call

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/checksum.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c
index 7ed779b411f61..088fd2e7bdf12 100644
--- a/fs/bcachefs/checksum.c
+++ b/fs/bcachefs/checksum.c
@@ -102,6 +102,7 @@ static inline int do_encrypt_sg(struct crypto_sync_skcipher *tfm,
 	int ret;
 
 	skcipher_request_set_sync_tfm(req, tfm);
+	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, len, nonce.d);
 
 	ret = crypto_skcipher_encrypt(req);

From 71dac2482ad3c8d4a8b8998a96751f009bad895f Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Mon, 6 May 2024 09:10:29 -0400
Subject: [PATCH 18/20] bcachefs: BCH_SB_LAYOUT_SIZE_BITS_MAX

Define a constant for the max superblock size, to avoid a too-large
shift.

Reported-by: syzbot+a8b0fb419355c91dda7f@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/bcachefs_format.h | 2 ++
 fs/bcachefs/super-io.c        | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 8345a2b2d05be..2e8b1a489c209 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -903,6 +903,8 @@ unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_re
 #define BCH_SB_SECTOR			8
 #define BCH_SB_MEMBERS_MAX		64 /* XXX kill */
 
+#define BCH_SB_LAYOUT_SIZE_BITS_MAX	16 /* 32 MB */
+
 struct bch_sb_layout {
 	__uuid_t		magic;	/* bcachefs superblock UUID */
 	__u8			layout_type;
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index e7527d551e3c8..989d16bba8f08 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -649,7 +649,7 @@ static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf
 
 	bytes = vstruct_bytes(sb->sb);
 
-	if (bytes > 512 << sb->sb->layout.sb_max_size_bits) {
+	if (bytes > 512ULL << min(BCH_SB_LAYOUT_SIZE_BITS_MAX, sb->sb->layout.sb_max_size_bits)) {
 		prt_printf(err, "Invalid superblock: too big (got %zu bytes, layout max %lu)",
 		       bytes, 512UL << sb->sb->layout.sb_max_size_bits);
 		return -BCH_ERR_invalid_sb_too_big;

From 54541c1f78e12a78487ae63e2e199a7d4f6dbd26 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Mon, 6 May 2024 20:49:24 -0400
Subject: [PATCH 19/20] bcachefs: Fix race in bch2_write_super()

bch2_write_super() was looping over online devices multiple times -
dropping and retaking io_ref each time.

This meant it could race with device removal; it could increment the
sequence number on a device but fail to write it - and then if the
device was re-added, it would get confused the next time around thinking
a superblock write was silently dropped.

Fix this by taking io_ref once, and stashing pointers to online devices
in a darray.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/super-io.c | 47 ++++++++++++++++++++++++++++--------------
 1 file changed, 32 insertions(+), 15 deletions(-)

diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index 989d16bba8f08..bfdb15e7d778e 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -923,6 +923,7 @@ int bch2_write_super(struct bch_fs *c)
 	struct bch_devs_mask sb_written;
 	bool wrote, can_mount_without_written, can_mount_with_written;
 	unsigned degraded_flags = BCH_FORCE_IF_DEGRADED;
+	DARRAY(struct bch_dev *) online_devices = {};
 	int ret = 0;
 
 	trace_and_count(c, write_super, c, _RET_IP_);
@@ -935,6 +936,15 @@ int bch2_write_super(struct bch_fs *c)
 	closure_init_stack(cl);
 	memset(&sb_written, 0, sizeof(sb_written));
 
+	for_each_online_member(c, ca) {
+		ret = darray_push(&online_devices, ca);
+		if (bch2_fs_fatal_err_on(ret, c, "%s: error allocating online devices", __func__)) {
+			percpu_ref_put(&ca->io_ref);
+			goto out;
+		}
+		percpu_ref_get(&ca->io_ref);
+	}
+
 	/* Make sure we're using the new magic numbers: */
 	c->disk_sb.sb->magic = BCHFS_MAGIC;
 	c->disk_sb.sb->layout.magic = BCHFS_MAGIC;
@@ -942,8 +952,8 @@ int bch2_write_super(struct bch_fs *c)
 	le64_add_cpu(&c->disk_sb.sb->seq, 1);
 
 	struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
-	for_each_online_member(c, ca)
-		__bch2_members_v2_get_mut(mi, ca->dev_idx)->seq = c->disk_sb.sb->seq;
+	darray_for_each(online_devices, ca)
+		__bch2_members_v2_get_mut(mi, (*ca)->dev_idx)->seq = c->disk_sb.sb->seq;
 	c->disk_sb.sb->write_time = cpu_to_le64(ktime_get_real_seconds());
 
 	if (test_bit(BCH_FS_error, &c->flags))
@@ -959,16 +969,15 @@ int bch2_write_super(struct bch_fs *c)
 	bch2_sb_errors_from_cpu(c);
 	bch2_sb_downgrade_update(c);
 
-	for_each_online_member(c, ca)
-		bch2_sb_from_fs(c, ca);
+	darray_for_each(online_devices, ca)
+		bch2_sb_from_fs(c, (*ca));
 
-	for_each_online_member(c, ca) {
+	darray_for_each(online_devices, ca) {
 		printbuf_reset(&err);
 
-		ret = bch2_sb_validate(&ca->disk_sb, &err, WRITE);
+		ret = bch2_sb_validate(&(*ca)->disk_sb, &err, WRITE);
 		if (ret) {
 			bch2_fs_inconsistent(c, "sb invalid before write: %s", err.buf);
-			percpu_ref_put(&ca->io_ref);
 			goto out;
 		}
 	}
@@ -995,16 +1004,18 @@ int bch2_write_super(struct bch_fs *c)
 		return -BCH_ERR_sb_not_downgraded;
 	}
 
-	for_each_online_member(c, ca) {
-		__set_bit(ca->dev_idx, sb_written.d);
-		ca->sb_write_error = 0;
+	darray_for_each(online_devices, ca) {
+		__set_bit((*ca)->dev_idx, sb_written.d);
+		(*ca)->sb_write_error = 0;
 	}
 
-	for_each_online_member(c, ca)
-		read_back_super(c, ca);
+	darray_for_each(online_devices, ca)
+		read_back_super(c, *ca);
 	closure_sync(cl);
 
-	for_each_online_member(c, ca) {
+	darray_for_each(online_devices, cap) {
+		struct bch_dev *ca = *cap;
+
 		if (ca->sb_write_error)
 			continue;
 
@@ -1031,17 +1042,20 @@ int bch2_write_super(struct bch_fs *c)
 
 	do {
 		wrote = false;
-		for_each_online_member(c, ca)
+		darray_for_each(online_devices, cap) {
+			struct bch_dev *ca = *cap;
 			if (!ca->sb_write_error &&
 			    sb < ca->disk_sb.sb->layout.nr_superblocks) {
 				write_one_super(c, ca, sb);
 				wrote = true;
 			}
+		}
 		closure_sync(cl);
 		sb++;
 	} while (wrote);
 
-	for_each_online_member(c, ca) {
+	darray_for_each(online_devices, cap) {
+		struct bch_dev *ca = *cap;
 		if (ca->sb_write_error)
 			__clear_bit(ca->dev_idx, sb_written.d);
 		else
@@ -1077,6 +1091,9 @@ int bch2_write_super(struct bch_fs *c)
 out:
 	/* Make new options visible after they're persistent: */
 	bch2_sb_update(c);
+	darray_for_each(online_devices, ca)
+		percpu_ref_put(&(*ca)->io_ref);
+	darray_exit(&online_devices);
 	printbuf_exit(&err);
 	return ret;
 }

From 6e297a73bccf852e7716207caa8eb868737c7155 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Mon, 6 May 2024 23:11:43 -0400
Subject: [PATCH 20/20] bcachefs: Add missing sched_annotate_sleep() in
 bch2_journal_flush_seq_async()

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/journal.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 9c2af544251af..a8b08e76d0d01 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -706,6 +706,12 @@ int bch2_journal_flush_seq_async(struct journal *j, u64 seq,
 
 		spin_unlock(&j->lock);
 
+		/*
+		 * We're called from bch2_journal_flush_seq() -> wait_event();
+		 * but this might block. We won't usually block, so we won't
+		 * livelock:
+		 */
+		sched_annotate_sleep();
 		ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
 		if (ret)
 			return ret;