Skip to content

Commit

Permalink
Merge tag 'bcachefs-2024-05-07.2' of https://evilpiepirate.org/git/bc…
Browse files Browse the repository at this point in the history
…achefs

Pull bcachefs fixes from Kent Overstreet:

 - Various syzbot fixes; mainly small gaps in validation

 - Fix an integer overflow in fiemap() which was preventing filefrag
   from returning the full list of extents

 - Fix a refcounting bug on the device refcount, turned up by new
   assertions in the development branch

 - Fix a device removal/readd bug; write_super() was repeatedly dropping
   and retaking bch_dev->io_ref references

* tag 'bcachefs-2024-05-07.2' of https://evilpiepirate.org/git/bcachefs:
  bcachefs: Add missing sched_annotate_sleep() in bch2_journal_flush_seq_async()
  bcachefs: Fix race in bch2_write_super()
  bcachefs: BCH_SB_LAYOUT_SIZE_BITS_MAX
  bcachefs: Add missing skcipher_request_set_callback() call
  bcachefs: Fix snapshot_t() usage in bch2_fs_quota_read_inode()
  bcachefs: Fix shift-by-64 in bformat_needs_redo()
  bcachefs: Guard against unknown k.k->type in __bkey_invalid()
  bcachefs: Add missing validation for superblock section clean
  bcachefs: Fix assert in bch2_alloc_v4_invalid()
  bcachefs: fix overflow in fiemap
  bcachefs: Add a better limit for maximum number of buckets
  bcachefs: Fix lifetime issue in device iterator helpers
  bcachefs: Fix bch2_dev_lookup() refcounting
  bcachefs: Initialize bch_write_op->failed in inline data path
  bcachefs: Fix refcount put in sb_field_resize error path
  bcachefs: Inodes need extra padding for varint_decode_fast()
  bcachefs: Fix early error path in bch2_fs_btree_key_cache_exit()
  bcachefs: bucket_pos_to_bp_noerror()
  bcachefs: don't free error pointers
  bcachefs: Fix a scheduler splat in __bch2_next_write_buffer_flush_journal_buf()
  • Loading branch information
Linus Torvalds committed May 8, 2024
2 parents 6d7ddd8 + 6e297a7 commit f5fcbc8
Show file tree
Hide file tree
Showing 20 changed files with 150 additions and 71 deletions.
4 changes: 2 additions & 2 deletions fs/bcachefs/alloc_background.c
Original file line number Diff line number Diff line change
Expand Up @@ -244,10 +244,10 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k,
struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k);
int ret = 0;

bkey_fsck_err_on(alloc_v4_u64s(a.v) > bkey_val_u64s(k.k), c, err,
bkey_fsck_err_on(alloc_v4_u64s_noerror(a.v) > bkey_val_u64s(k.k), c, err,
alloc_v4_val_size_bad,
"bad val size (%u > %zu)",
alloc_v4_u64s(a.v), bkey_val_u64s(k.k));
alloc_v4_u64s_noerror(a.v), bkey_val_u64s(k.k));

bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) &&
BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), c, err,
Expand Down
8 changes: 6 additions & 2 deletions fs/bcachefs/alloc_background.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,13 +126,17 @@ static inline struct bpos alloc_freespace_pos(struct bpos pos, struct bch_alloc_
return pos;
}

static inline unsigned alloc_v4_u64s(const struct bch_alloc_v4 *a)
static inline unsigned alloc_v4_u64s_noerror(const struct bch_alloc_v4 *a)
{
unsigned ret = (BCH_ALLOC_V4_BACKPOINTERS_START(a) ?:
return (BCH_ALLOC_V4_BACKPOINTERS_START(a) ?:
BCH_ALLOC_V4_U64s_V0) +
BCH_ALLOC_V4_NR_BACKPOINTERS(a) *
(sizeof(struct bch_backpointer) / sizeof(u64));
}

static inline unsigned alloc_v4_u64s(const struct bch_alloc_v4 *a)
{
unsigned ret = alloc_v4_u64s_noerror(a);
BUG_ON(ret > U8_MAX - BKEY_U64s);
return ret;
}
Expand Down
2 changes: 1 addition & 1 deletion fs/bcachefs/backpointers.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k,
int ret = 0;

bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size ||
!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
!bpos_eq(bp.k->p, bucket_pos_to_bp_noerror(ca, bucket, bp.v->bucket_offset)),
c, err,
backpointer_bucket_offset_wrong,
"backpointer bucket_offset wrong");
Expand Down
14 changes: 10 additions & 4 deletions fs/bcachefs/backpointers.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,15 @@ static inline struct bpos bp_pos_to_bucket(const struct bch_fs *c,
return POS(bp_pos.inode, sector_to_bucket(ca, bucket_sector));
}

static inline struct bpos bucket_pos_to_bp_noerror(const struct bch_dev *ca,
struct bpos bucket,
u64 bucket_offset)
{
return POS(bucket.inode,
(bucket_to_sector(ca, bucket.offset) <<
MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
}

/*
* Convert from pos in alloc btree + bucket offset to pos in backpointer btree:
*/
Expand All @@ -53,10 +62,7 @@ static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c,
u64 bucket_offset)
{
struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode);
struct bpos ret = POS(bucket.inode,
(bucket_to_sector(ca, bucket.offset) <<
MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);

struct bpos ret = bucket_pos_to_bp_noerror(ca, bucket, bucket_offset);
EBUG_ON(!bkey_eq(bucket, bp_pos_to_bucket(c, ret)));
return ret;
}
Expand Down
8 changes: 8 additions & 0 deletions fs/bcachefs/bcachefs_format.h
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,12 @@ struct bch_member {
__le64 btree_allocated_bitmap;
};

/*
* This limit comes from the bucket_gens array - it's a single allocation, and
* kernel allocation are limited to INT_MAX
*/
#define BCH_MEMBER_NBUCKETS_MAX (INT_MAX - 64)

#define BCH_MEMBER_V1_BYTES 56

LE64_BITMASK(BCH_MEMBER_STATE, struct bch_member, flags, 0, 4)
Expand Down Expand Up @@ -897,6 +903,8 @@ unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_re
#define BCH_SB_SECTOR 8
#define BCH_SB_MEMBERS_MAX 64 /* XXX kill */

#define BCH_SB_LAYOUT_SIZE_BITS_MAX 16 /* 32 MB */

struct bch_sb_layout {
__uuid_t magic; /* bcachefs superblock UUID */
__u8 layout_type;
Expand Down
4 changes: 2 additions & 2 deletions fs/bcachefs/bkey_methods.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,8 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
if (type >= BKEY_TYPE_NR)
return 0;

bkey_fsck_err_on((type == BKEY_TYPE_btree ||
(flags & BKEY_INVALID_COMMIT)) &&
bkey_fsck_err_on(k.k->type < KEY_TYPE_MAX &&
(type == BKEY_TYPE_btree || (flags & BKEY_INVALID_COMMIT)) &&
!(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err,
bkey_invalid_type_for_btree,
"invalid key type for btree %s (%s)",
Expand Down
16 changes: 9 additions & 7 deletions fs/bcachefs/btree_key_cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -956,13 +956,15 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
}

#ifdef __KERNEL__
for_each_possible_cpu(cpu) {
struct btree_key_cache_freelist *f =
per_cpu_ptr(bc->pcpu_freed, cpu);

for (i = 0; i < f->nr; i++) {
ck = f->objs[i];
list_add(&ck->list, &items);
if (bc->pcpu_freed) {
for_each_possible_cpu(cpu) {
struct btree_key_cache_freelist *f =
per_cpu_ptr(bc->pcpu_freed, cpu);

for (i = 0; i < f->nr; i++) {
ck = f->objs[i];
list_add(&ck->list, &items);
}
}
}
#endif
Expand Down
1 change: 1 addition & 0 deletions fs/bcachefs/checksum.c
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ static inline int do_encrypt_sg(struct crypto_sync_skcipher *tfm,
int ret;

skcipher_request_set_sync_tfm(req, tfm);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, sg, sg, len, nonce.d);

ret = crypto_skcipher_encrypt(req);
Expand Down
1 change: 1 addition & 0 deletions fs/bcachefs/errcode.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@
x(EINVAL, block_size_too_small) \
x(EINVAL, bucket_size_too_small) \
x(EINVAL, device_size_too_small) \
x(EINVAL, device_size_too_big) \
x(EINVAL, device_not_a_member_of_filesystem) \
x(EINVAL, device_has_been_removed) \
x(EINVAL, device_splitbrain) \
Expand Down
2 changes: 1 addition & 1 deletion fs/bcachefs/fs.c
Original file line number Diff line number Diff line change
Expand Up @@ -964,7 +964,6 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
struct btree_iter iter;
struct bkey_s_c k;
struct bkey_buf cur, prev;
struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
unsigned offset_into_extent, sectors;
bool have_extent = false;
u32 snapshot;
Expand All @@ -974,6 +973,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
if (ret)
return ret;

struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
if (start + len < start)
return -EINVAL;

Expand Down
30 changes: 20 additions & 10 deletions fs/bcachefs/io_write.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,9 +199,6 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
u64 new_i_size,
s64 i_sectors_delta)
{
struct btree_iter iter;
struct bkey_i *k;
struct bkey_i_inode_v3 *inode;
/*
* Crazy performance optimization:
* Every extent update needs to also update the inode: the inode trigger
Expand All @@ -214,25 +211,36 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
* lost, but that's fine.
*/
unsigned inode_update_flags = BTREE_UPDATE_NOJOURNAL;
int ret;

k = bch2_bkey_get_mut_noupdate(trans, &iter, BTREE_ID_inodes,
struct btree_iter iter;
struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
SPOS(0,
extent_iter->pos.inode,
extent_iter->snapshot),
BTREE_ITER_CACHED);
ret = PTR_ERR_OR_ZERO(k);
int ret = bkey_err(k);
if (unlikely(ret))
return ret;

if (unlikely(k->k.type != KEY_TYPE_inode_v3)) {
k = bch2_inode_to_v3(trans, k);
ret = PTR_ERR_OR_ZERO(k);
/*
* varint_decode_fast(), in the inode .invalid method, reads up to 7
* bytes past the end of the buffer:
*/
struct bkey_i *k_mut = bch2_trans_kmalloc_nomemzero(trans, bkey_bytes(k.k) + 8);
ret = PTR_ERR_OR_ZERO(k_mut);
if (unlikely(ret))
goto err;

bkey_reassemble(k_mut, k);

if (unlikely(k_mut->k.type != KEY_TYPE_inode_v3)) {
k_mut = bch2_inode_to_v3(trans, k_mut);
ret = PTR_ERR_OR_ZERO(k_mut);
if (unlikely(ret))
goto err;
}

inode = bkey_i_to_inode_v3(k);
struct bkey_i_inode_v3 *inode = bkey_i_to_inode_v3(k_mut);

if (!(le64_to_cpu(inode->v.bi_flags) & BCH_INODE_i_size_dirty) &&
new_i_size > le64_to_cpu(inode->v.bi_size)) {
Expand Down Expand Up @@ -1505,6 +1513,8 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
unsigned sectors;
int ret;

memset(&op->failed, 0, sizeof(op->failed));

op->flags |= BCH_WRITE_WROTE_DATA_INLINE;
op->flags |= BCH_WRITE_DONE;

Expand Down
8 changes: 8 additions & 0 deletions fs/bcachefs/journal.c
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,12 @@ int bch2_journal_flush_seq_async(struct journal *j, u64 seq,

spin_unlock(&j->lock);

/*
* We're called from bch2_journal_flush_seq() -> wait_event();
* but this might block. We won't usually block, so we won't
* livelock:
*/
sched_annotate_sleep();
ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
if (ret)
return ret;
Expand Down Expand Up @@ -870,6 +876,8 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou
{
struct journal_buf *ret = NULL;

/* We're inside wait_event(), but using mutex_lock(: */
sched_annotate_sleep();
mutex_lock(&j->buf_lock);
spin_lock(&j->lock);
max_seq = min(max_seq, journal_cur_seq(j));
Expand Down
22 changes: 14 additions & 8 deletions fs/bcachefs/move.c
Original file line number Diff line number Diff line change
Expand Up @@ -968,24 +968,30 @@ static bool migrate_btree_pred(struct bch_fs *c, void *arg,
return migrate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
}

/*
* Ancient versions of bcachefs produced packed formats which could represent
* keys that the in memory format cannot represent; this checks for those
* formats so we can get rid of them.
*/
static bool bformat_needs_redo(struct bkey_format *f)
{
unsigned i;

for (i = 0; i < f->nr_fields; i++) {
for (unsigned i = 0; i < f->nr_fields; i++) {
unsigned f_bits = f->bits_per_field[i];
unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
u64 unpacked_mask = ~((~0ULL << 1) << (unpacked_bits - 1));
u64 field_offset = le64_to_cpu(f->field_offset[i]);

if (f->bits_per_field[i] > unpacked_bits)
if (f_bits > unpacked_bits)
return true;

if ((f->bits_per_field[i] == unpacked_bits) && field_offset)
if ((f_bits == unpacked_bits) && field_offset)
return true;

if (((field_offset + ((1ULL << f->bits_per_field[i]) - 1)) &
unpacked_mask) <
field_offset)
u64 f_mask = f_bits
? ~((~0ULL << (f_bits - 1)) << 1)
: 0;

if (((field_offset + f_mask) & unpacked_mask) < field_offset)
return true;
}

Expand Down
8 changes: 3 additions & 5 deletions fs/bcachefs/quota.c
Original file line number Diff line number Diff line change
Expand Up @@ -560,13 +560,11 @@ static int bch2_fs_quota_read_inode(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct bch_inode_unpacked u;
struct bch_snapshot_tree s_t;
int ret;
u32 tree = bch2_snapshot_tree(c, k.k->p.snapshot);

ret = bch2_snapshot_tree_lookup(trans,
bch2_snapshot_tree(c, k.k->p.snapshot), &s_t);
int ret = bch2_snapshot_tree_lookup(trans, tree, &s_t);
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
"%s: snapshot tree %u not found", __func__,
snapshot_t(c, k.k->p.snapshot)->tree);
"%s: snapshot tree %u not found", __func__, tree);
if (ret)
return ret;

Expand Down
3 changes: 2 additions & 1 deletion fs/bcachefs/recovery.c
Original file line number Diff line number Diff line change
Expand Up @@ -902,7 +902,8 @@ int bch2_fs_recovery(struct bch_fs *c)
bch2_journal_keys_put_initial(c);
bch2_find_btree_nodes_exit(&c->found_btree_nodes);
}
kfree(clean);
if (!IS_ERR(clean))
kfree(clean);

if (!ret &&
test_bit(BCH_FS_need_delete_dead_snapshots, &c->flags) &&
Expand Down
14 changes: 14 additions & 0 deletions fs/bcachefs/sb-clean.c
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,17 @@ static int bch2_sb_clean_validate(struct bch_sb *sb,
return -BCH_ERR_invalid_sb_clean;
}

for (struct jset_entry *entry = clean->start;
entry != vstruct_end(&clean->field);
entry = vstruct_next(entry)) {
if ((void *) vstruct_next(entry) > vstruct_end(&clean->field)) {
prt_str(err, "entry type ");
bch2_prt_jset_entry_type(err, le16_to_cpu(entry->type));
prt_str(err, " overruns end of section");
return -BCH_ERR_invalid_sb_clean;
}
}

return 0;
}

Expand All @@ -295,6 +306,9 @@ static void bch2_sb_clean_to_text(struct printbuf *out, struct bch_sb *sb,
for (entry = clean->start;
entry != vstruct_end(&clean->field);
entry = vstruct_next(entry)) {
if ((void *) vstruct_next(entry) > vstruct_end(&clean->field))
break;

if (entry->type == BCH_JSET_ENTRY_btree_keys &&
!entry->u64s)
continue;
Expand Down
6 changes: 3 additions & 3 deletions fs/bcachefs/sb-members.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,9 @@ static int validate_member(struct printbuf *err,
struct bch_sb *sb,
int i)
{
if (le64_to_cpu(m.nbuckets) > LONG_MAX) {
prt_printf(err, "device %u: too many buckets (got %llu, max %lu)",
i, le64_to_cpu(m.nbuckets), LONG_MAX);
if (le64_to_cpu(m.nbuckets) > BCH_MEMBER_NBUCKETS_MAX) {
prt_printf(err, "device %u: too many buckets (got %llu, max %u)",
i, le64_to_cpu(m.nbuckets), BCH_MEMBER_NBUCKETS_MAX);
return -BCH_ERR_invalid_sb_members;
}

Expand Down
4 changes: 2 additions & 2 deletions fs/bcachefs/sb-members.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,10 @@ static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, struct bch_dev *

static inline struct bch_dev *bch2_get_next_dev(struct bch_fs *c, struct bch_dev *ca)
{
rcu_read_lock();
if (ca)
percpu_ref_put(&ca->ref);

rcu_read_lock();
if ((ca = __bch2_next_dev(c, ca, NULL)))
percpu_ref_get(&ca->ref);
rcu_read_unlock();
Expand All @@ -132,10 +132,10 @@ static inline struct bch_dev *bch2_get_next_online_dev(struct bch_fs *c,
struct bch_dev *ca,
unsigned state_mask)
{
rcu_read_lock();
if (ca)
percpu_ref_put(&ca->io_ref);

rcu_read_lock();
while ((ca = __bch2_next_dev(c, ca, NULL)) &&
(!((1 << ca->mi.state) & state_mask) ||
!percpu_ref_tryget(&ca->io_ref)))
Expand Down
Loading

0 comments on commit f5fcbc8

Please sign in to comment.