Skip to content

Commit

Permalink
Merge tag 'bcachefs-2024-08-24' of git://evilpiepirate.org/bcachefs
Browse files Browse the repository at this point in the history
Pull bcachefs fixes from Kent Overstreet:

 - assorted syzbot fixes

 - some upgrade fixes for old (pre 1.0) filesystems

 - fix for moving data off a device that was switched to durability=0
   after data had been written to it.

 - nocow deadlock fix

 - fix for new rebalance_work accounting

* tag 'bcachefs-2024-08-24' of git://evilpiepirate.org/bcachefs: (28 commits)
  bcachefs: Fix rebalance_work accounting
  bcachefs: Fix failure to flush moves before sleeping in copygc
  bcachefs: don't use rht_bucket() in btree_key_cache_scan()
  bcachefs: add missing inode_walker_exit()
  bcachefs: clear path->should_be_locked in bch2_btree_key_cache_drop()
  bcachefs: Fix double assignment in check_dirent_to_subvol()
  bcachefs: Fix refcounting in discard path
  bcachefs: Fix compat issue with old alloc_v4 keys
  bcachefs: Fix warning in bch2_fs_journal_stop()
  fs/super.c: improve get_tree() error message
  bcachefs: Fix missing validation in bch2_sb_journal_v2_validate()
  bcachefs: Fix replay_now_at() assert
  bcachefs: Fix locking in bch2_ioc_setlabel()
  bcachefs: fix failure to relock in btree_node_fill()
  bcachefs: fix failure to relock in bch2_btree_node_mem_alloc()
  bcachefs: unlock_long() before resort in journal replay
  bcachefs: fix missing bch2_err_str()
  bcachefs: fix time_stats_to_text()
  bcachefs: Fix bch2_bucket_gens_init()
  bcachefs: Fix bch2_trigger_alloc assert
  ...
  • Loading branch information
Linus Torvalds committed Aug 25, 2024
2 parents 780bdc1 + 49aa783 commit 72bea05
Show file tree
Hide file tree
Showing 25 changed files with 387 additions and 192 deletions.
66 changes: 34 additions & 32 deletions fs/bcachefs/alloc_background.c
Original file line number Diff line number Diff line change
Expand Up @@ -240,71 +240,73 @@ int bch2_alloc_v3_validate(struct bch_fs *c, struct bkey_s_c k,
int bch2_alloc_v4_validate(struct bch_fs *c, struct bkey_s_c k,
enum bch_validate_flags flags)
{
struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k);
struct bch_alloc_v4 a;
int ret = 0;

bkey_fsck_err_on(alloc_v4_u64s_noerror(a.v) > bkey_val_u64s(k.k),
bkey_val_copy(&a, bkey_s_c_to_alloc_v4(k));

bkey_fsck_err_on(alloc_v4_u64s_noerror(&a) > bkey_val_u64s(k.k),
c, alloc_v4_val_size_bad,
"bad val size (%u > %zu)",
alloc_v4_u64s_noerror(a.v), bkey_val_u64s(k.k));
alloc_v4_u64s_noerror(&a), bkey_val_u64s(k.k));

bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) &&
BCH_ALLOC_V4_NR_BACKPOINTERS(a.v),
bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(&a) &&
BCH_ALLOC_V4_NR_BACKPOINTERS(&a),
c, alloc_v4_backpointers_start_bad,
"invalid backpointers_start");

bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type,
bkey_fsck_err_on(alloc_data_type(a, a.data_type) != a.data_type,
c, alloc_key_data_type_bad,
"invalid data type (got %u should be %u)",
a.v->data_type, alloc_data_type(*a.v, a.v->data_type));
a.data_type, alloc_data_type(a, a.data_type));

for (unsigned i = 0; i < 2; i++)
bkey_fsck_err_on(a.v->io_time[i] > LRU_TIME_MAX,
bkey_fsck_err_on(a.io_time[i] > LRU_TIME_MAX,
c, alloc_key_io_time_bad,
"invalid io_time[%s]: %llu, max %llu",
i == READ ? "read" : "write",
a.v->io_time[i], LRU_TIME_MAX);
a.io_time[i], LRU_TIME_MAX);

unsigned stripe_sectors = BCH_ALLOC_V4_BACKPOINTERS_START(a.v) * sizeof(u64) >
unsigned stripe_sectors = BCH_ALLOC_V4_BACKPOINTERS_START(&a) * sizeof(u64) >
offsetof(struct bch_alloc_v4, stripe_sectors)
? a.v->stripe_sectors
? a.stripe_sectors
: 0;

switch (a.v->data_type) {
switch (a.data_type) {
case BCH_DATA_free:
case BCH_DATA_need_gc_gens:
case BCH_DATA_need_discard:
bkey_fsck_err_on(stripe_sectors ||
a.v->dirty_sectors ||
a.v->cached_sectors ||
a.v->stripe,
a.dirty_sectors ||
a.cached_sectors ||
a.stripe,
c, alloc_key_empty_but_have_data,
"empty data type free but have data %u.%u.%u %u",
stripe_sectors,
a.v->dirty_sectors,
a.v->cached_sectors,
a.v->stripe);
a.dirty_sectors,
a.cached_sectors,
a.stripe);
break;
case BCH_DATA_sb:
case BCH_DATA_journal:
case BCH_DATA_btree:
case BCH_DATA_user:
case BCH_DATA_parity:
bkey_fsck_err_on(!a.v->dirty_sectors &&
bkey_fsck_err_on(!a.dirty_sectors &&
!stripe_sectors,
c, alloc_key_dirty_sectors_0,
"data_type %s but dirty_sectors==0",
bch2_data_type_str(a.v->data_type));
bch2_data_type_str(a.data_type));
break;
case BCH_DATA_cached:
bkey_fsck_err_on(!a.v->cached_sectors ||
a.v->dirty_sectors ||
bkey_fsck_err_on(!a.cached_sectors ||
a.dirty_sectors ||
stripe_sectors ||
a.v->stripe,
a.stripe,
c, alloc_key_cached_inconsistency,
"data type inconsistency");

bkey_fsck_err_on(!a.v->io_time[READ] &&
bkey_fsck_err_on(!a.io_time[READ] &&
c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs,
c, alloc_key_cached_but_read_time_zero,
"cached bucket with read_time == 0");
Expand Down Expand Up @@ -556,7 +558,7 @@ int bch2_bucket_gens_init(struct bch_fs *c)
struct bpos pos = alloc_gens_pos(iter.pos, &offset);
int ret2 = 0;

if (have_bucket_gens_key && bkey_cmp(iter.pos, pos)) {
if (have_bucket_gens_key && !bkey_eq(g.k.p, pos)) {
ret2 = bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0) ?:
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
if (ret2)
Expand Down Expand Up @@ -829,7 +831,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
if (likely(new.k->type == KEY_TYPE_alloc_v4)) {
new_a = bkey_s_to_alloc_v4(new).v;
} else {
BUG_ON(!(flags & BTREE_TRIGGER_gc));
BUG_ON(!(flags & (BTREE_TRIGGER_gc|BTREE_TRIGGER_check_repair)));

struct bkey_i_alloc_v4 *new_ka = bch2_alloc_to_v4_mut_inlined(trans, new.s_c);
ret = PTR_ERR_OR_ZERO(new_ka);
Expand Down Expand Up @@ -1872,26 +1874,26 @@ static void bch2_do_discards_work(struct work_struct *work)
trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded,
bch2_err_str(ret));

bch2_write_ref_put(c, BCH_WRITE_REF_discard);
percpu_ref_put(&ca->io_ref);
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
}

void bch2_dev_do_discards(struct bch_dev *ca)
{
struct bch_fs *c = ca->fs;

if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard))
return;

if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard))
goto put_ioref;
if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
goto put_write_ref;

if (queue_work(c->write_ref_wq, &ca->discard_work))
return;

bch2_write_ref_put(c, BCH_WRITE_REF_discard);
put_ioref:
percpu_ref_put(&ca->io_ref);
put_write_ref:
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
}

void bch2_do_discards(struct bch_fs *c)
Expand Down
1 change: 1 addition & 0 deletions fs/bcachefs/alloc_background_format.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ struct bch_alloc_v4 {
__u64 io_time[2];
__u32 stripe;
__u32 nr_external_backpointers;
/* end of fields in original version of alloc_v4 */
__u64 fragmentation_lru;
__u32 stripe_sectors;
__u32 pad;
Expand Down
3 changes: 2 additions & 1 deletion fs/bcachefs/bcachefs_format.h
Original file line number Diff line number Diff line change
Expand Up @@ -677,7 +677,8 @@ struct bch_sb_field_ext {
x(bucket_stripe_sectors, BCH_VERSION(1, 8)) \
x(disk_accounting_v2, BCH_VERSION(1, 9)) \
x(disk_accounting_v3, BCH_VERSION(1, 10)) \
x(disk_accounting_inum, BCH_VERSION(1, 11))
x(disk_accounting_inum, BCH_VERSION(1, 11)) \
x(rebalance_work_acct_fix, BCH_VERSION(1, 12))

enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
Expand Down
25 changes: 25 additions & 0 deletions fs/bcachefs/btree_cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,16 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
return b;
}

void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b)
{
mutex_lock(&c->btree_cache.lock);
list_move(&b->list, &c->btree_cache.freeable);
mutex_unlock(&c->btree_cache.lock);

six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
}

/* Btree in memory cache - hash table */

void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
Expand Down Expand Up @@ -736,6 +746,13 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
start_time);

memalloc_nofs_restore(flags);

int ret = bch2_trans_relock(trans);
if (unlikely(ret)) {
bch2_btree_node_to_freelist(c, b);
return ERR_PTR(ret);
}

return b;
err:
mutex_lock(&bc->lock);
Expand Down Expand Up @@ -856,6 +873,10 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,

bch2_btree_node_read(trans, b, sync);

int ret = bch2_trans_relock(trans);
if (ret)
return ERR_PTR(ret);

if (!sync)
return NULL;

Expand Down Expand Up @@ -974,6 +995,10 @@ static struct btree *__bch2_btree_node_get(struct btree_trans *trans, struct btr

bch2_btree_node_wait_on_read(b);

ret = bch2_trans_relock(trans);
if (ret)
return ERR_PTR(ret);

/*
* should_be_locked is not set on this path yet, so we need to
* relock it specifically:
Expand Down
2 changes: 2 additions & 0 deletions fs/bcachefs/btree_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ struct btree_iter;

void bch2_recalc_btree_reserve(struct bch_fs *);

void bch2_btree_node_to_freelist(struct bch_fs *, struct btree *);

void bch2_btree_node_hash_remove(struct btree_cache *, struct btree *);
int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *);
int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *,
Expand Down
9 changes: 9 additions & 0 deletions fs/bcachefs/btree_iter.h
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,15 @@ static inline struct bkey_s_c bch2_bkey_get_iter(struct btree_trans *trans,
bkey_s_c_to_##_type(__bch2_bkey_get_iter(_trans, _iter, \
_btree_id, _pos, _flags, KEY_TYPE_##_type))

#define bkey_val_copy(_dst_v, _src_k) \
do { \
unsigned b = min_t(unsigned, sizeof(*_dst_v), \
bkey_val_bytes(_src_k.k)); \
memcpy(_dst_v, _src_k.v, b); \
if (b < sizeof(*_dst_v)) \
memset((void *) (_dst_v) + b, 0, sizeof(*_dst_v) - b); \
} while (0)

static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans,
unsigned btree_id, struct bpos pos,
unsigned flags, unsigned type,
Expand Down
31 changes: 28 additions & 3 deletions fs/bcachefs/btree_key_cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,7 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans,

mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED);
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
path->should_be_locked = false;
}

static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
Expand Down Expand Up @@ -777,14 +778,28 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,

rcu_read_lock();
tbl = rht_dereference_rcu(bc->table.tbl, &bc->table);

/*
* Scanning is expensive while a rehash is in progress - most elements
* will be on the new hashtable, if it's in progress
*
* A rehash could still start while we're scanning - that's ok, we'll
* still see most elements.
*/
if (unlikely(tbl->nest)) {
rcu_read_unlock();
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
return SHRINK_STOP;
}

if (bc->shrink_iter >= tbl->size)
bc->shrink_iter = 0;
start = bc->shrink_iter;

do {
struct rhash_head *pos, *next;

pos = rht_ptr_rcu(rht_bucket(tbl, bc->shrink_iter));
pos = rht_ptr_rcu(&tbl->buckets[bc->shrink_iter]);

while (!rht_is_a_nulls(pos)) {
next = rht_dereference_bucket_rcu(pos->next, tbl, bc->shrink_iter);
Expand Down Expand Up @@ -865,12 +880,22 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
while (atomic_long_read(&bc->nr_keys)) {
rcu_read_lock();
tbl = rht_dereference_rcu(bc->table.tbl, &bc->table);
if (tbl)
if (tbl) {
if (tbl->nest) {
/* wait for in progress rehash */
rcu_read_unlock();
mutex_lock(&bc->table.mutex);
mutex_unlock(&bc->table.mutex);
rcu_read_lock();
continue;
}
for (i = 0; i < tbl->size; i++)
rht_for_each_entry_rcu(ck, pos, tbl, i, hash) {
while (pos = rht_ptr_rcu(&tbl->buckets[i]), !rht_is_a_nulls(pos)) {
ck = container_of(pos, struct bkey_cached, hash);
bkey_cached_evict(bc, ck);
list_add(&ck->list, &items);
}
}
rcu_read_unlock();
}

Expand Down
Loading

0 comments on commit 72bea05

Please sign in to comment.