Skip to content

Commit

Permalink
Merge tag 'bcachefs-2024-06-12' of https://evilpiepirate.org/git/bcac…
Browse files Browse the repository at this point in the history
…hefs

Pull bcachefs fixes from Kent Overstreet:

 - fix kworker explosion, due to calling submit_bio() (which can block)
   from a multithreaded workqueue

 - fix error handling in btree node scan

 - forward compat fix: kill an old debug assert

 - key cache shrinker fixes

   This is a partial fix for stalls doing multithreaded creates - there
   were various O(n^2) issues the key cache shrinker was hitting [1].

   There's more work coming here; I'm working on a patch to delete the
   key cache lock, which initial testing shows to be a pretty drastic
   performance improvement

 - assorted syzbot fixes

Link: https://lore.kernel.org/linux-bcachefs/CAGudoHGenxzk0ZqPXXi1_QDbfqQhGHu+wUwzyS6WmfkUZ1HiXA@mail.gmail.com/ [1]

* tag 'bcachefs-2024-06-12' of https://evilpiepirate.org/git/bcachefs:
  bcachefs: Fix rcu_read_lock() leak in drop_extra_replicas
  bcachefs: Add missing bch_inode_info.ei_flags init
  bcachefs: Add missing synchronize_srcu_expedited() call when shutting down
  bcachefs: Check for invalid bucket from bucket_gen(), gc_bucket()
  bcachefs: Replace bucket_valid() asserts in bucket lookup with proper checks
  bcachefs: Fix snapshot_create_lock lock ordering
  bcachefs: Fix refcount leak in check_fix_ptrs()
  bcachefs: Leave a buffer in the btree key cache to avoid lock thrashing
  bcachefs: Fix reporting of freed objects from key cache shrinker
  bcachefs: set sb->s_shrinker->seeks = 0
  bcachefs: increase key cache shrinker batch size
  bcachefs: Enable automatic shrinking for rhashtables
  bcachefs: fix the display format for show-super
  bcachefs: fix stack frame size in fsck.c
  bcachefs: Delete incorrect BTREE_ID_NR assertion
  bcachefs: Fix incorrect error handling found_btree_node_is_readable()
  bcachefs: Split out btree_write_submit_wq
  • Loading branch information
Linus Torvalds committed Jun 12, 2024
2 parents cea2a26 + f2736b9 commit 0b4989e
Show file tree
Hide file tree
Showing 22 changed files with 344 additions and 220 deletions.
22 changes: 20 additions & 2 deletions fs/bcachefs/alloc_background.c
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
enum btree_iter_update_trigger_flags flags)
{
struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
int ret = 0;

struct bch_dev *ca = bch2_dev_bucket_tryget(c, new.k->p);
Expand Down Expand Up @@ -860,8 +861,14 @@ int bch2_trigger_alloc(struct btree_trans *trans,
}

percpu_down_read(&c->mark_lock);
if (new_a->gen != old_a->gen)
*bucket_gen(ca, new.k->p.offset) = new_a->gen;
if (new_a->gen != old_a->gen) {
u8 *gen = bucket_gen(ca, new.k->p.offset);
if (unlikely(!gen)) {
percpu_up_read(&c->mark_lock);
goto invalid_bucket;
}
*gen = new_a->gen;
}

bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, false);
percpu_up_read(&c->mark_lock);
Expand Down Expand Up @@ -895,6 +902,11 @@ int bch2_trigger_alloc(struct btree_trans *trans,

percpu_down_read(&c->mark_lock);
struct bucket *g = gc_bucket(ca, new.k->p.offset);
if (unlikely(!g)) {
percpu_up_read(&c->mark_lock);
goto invalid_bucket;
}
g->gen_valid = 1;

bucket_lock(g);

Expand All @@ -910,8 +922,14 @@ int bch2_trigger_alloc(struct btree_trans *trans,
percpu_up_read(&c->mark_lock);
}
err:
printbuf_exit(&buf);
bch2_dev_put(ca);
return ret;
invalid_bucket:
bch2_fs_inconsistent(c, "reference to invalid bucket\n %s",
(bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf));
ret = -EIO;
goto err;
}

/*
Expand Down
3 changes: 2 additions & 1 deletion fs/bcachefs/bcachefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -790,7 +790,8 @@ struct bch_fs {

/* BTREE CACHE */
struct bio_set btree_bio;
struct workqueue_struct *io_complete_wq;
struct workqueue_struct *btree_read_complete_wq;
struct workqueue_struct *btree_write_submit_wq;

struct btree_root btree_roots_known[BTREE_ID_NR];
DARRAY(struct btree_root) btree_roots_extra;
Expand Down
9 changes: 5 additions & 4 deletions fs/bcachefs/btree_cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,11 @@ static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg,
}

static const struct rhashtable_params bch_btree_cache_params = {
.head_offset = offsetof(struct btree, hash),
.key_offset = offsetof(struct btree, hash_val),
.key_len = sizeof(u64),
.obj_cmpfn = bch2_btree_cache_cmp_fn,
.head_offset = offsetof(struct btree, hash),
.key_offset = offsetof(struct btree, hash_val),
.key_len = sizeof(u64),
.obj_cmpfn = bch2_btree_cache_cmp_fn,
.automatic_shrinking = true,
};

static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
Expand Down
17 changes: 12 additions & 5 deletions fs/bcachefs/btree_gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -874,6 +874,9 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
const struct bch_alloc_v4 *old;
int ret;

if (!bucket_valid(ca, k.k->p.offset))
return 0;

old = bch2_alloc_to_v4(k, &old_convert);
gc = new = *old;

Expand Down Expand Up @@ -990,6 +993,8 @@ static int bch2_gc_alloc_start(struct bch_fs *c)

buckets->first_bucket = ca->mi.first_bucket;
buckets->nbuckets = ca->mi.nbuckets;
buckets->nbuckets_minus_first =
buckets->nbuckets - buckets->first_bucket;
rcu_assign_pointer(ca->buckets_gc, buckets);
}

Expand All @@ -1003,12 +1008,14 @@ static int bch2_gc_alloc_start(struct bch_fs *c)
continue;
}

struct bch_alloc_v4 a_convert;
const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert);
if (bucket_valid(ca, k.k->p.offset)) {
struct bch_alloc_v4 a_convert;
const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert);

struct bucket *g = gc_bucket(ca, k.k->p.offset);
g->gen_valid = 1;
g->gen = a->gen;
struct bucket *g = gc_bucket(ca, k.k->p.offset);
g->gen_valid = 1;
g->gen = a->gen;
}
0;
})));
bch2_dev_put(ca);
Expand Down
8 changes: 4 additions & 4 deletions fs/bcachefs/btree_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -1389,7 +1389,7 @@ static void btree_node_read_endio(struct bio *bio)
bch2_latency_acct(ca, rb->start_time, READ);
}

queue_work(c->io_complete_wq, &rb->work);
queue_work(c->btree_read_complete_wq, &rb->work);
}

struct btree_node_read_all {
Expand Down Expand Up @@ -1656,7 +1656,7 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool
btree_node_read_all_replicas_done(&ra->cl.work);
} else {
continue_at(&ra->cl, btree_node_read_all_replicas_done,
c->io_complete_wq);
c->btree_read_complete_wq);
}

return 0;
Expand Down Expand Up @@ -1737,7 +1737,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
if (sync)
btree_node_read_work(&rb->work);
else
queue_work(c->io_complete_wq, &rb->work);
queue_work(c->btree_read_complete_wq, &rb->work);
}
}

Expand Down Expand Up @@ -2229,7 +2229,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
atomic64_add(bytes_to_write, &c->btree_write_stats[type].bytes);

INIT_WORK(&wbio->work, btree_write_submit);
queue_work(c->io_complete_wq, &wbio->work);
queue_work(c->btree_write_submit_wq, &wbio->work);
return;
err:
set_btree_node_noevict(b);
Expand Down
11 changes: 4 additions & 7 deletions fs/bcachefs/btree_iter.c
Original file line number Diff line number Diff line change
Expand Up @@ -221,11 +221,8 @@ static void bch2_btree_path_verify(struct btree_trans *trans,
struct btree_path *path)
{
struct bch_fs *c = trans->c;
unsigned i;

EBUG_ON(path->btree_id >= BTREE_ID_NR);

for (i = 0; i < (!path->cached ? BTREE_MAX_DEPTH : 1); i++) {
for (unsigned i = 0; i < (!path->cached ? BTREE_MAX_DEPTH : 1); i++) {
if (!path->l[i].b) {
BUG_ON(!path->cached &&
bch2_btree_id_root(c, path->btree_id)->b->c.level > i);
Expand All @@ -251,8 +248,6 @@ static void bch2_btree_iter_verify(struct btree_iter *iter)
{
struct btree_trans *trans = iter->trans;

BUG_ON(iter->btree_id >= BTREE_ID_NR);

BUG_ON(!!(iter->flags & BTREE_ITER_cached) != btree_iter_path(trans, iter)->cached);

BUG_ON((iter->flags & BTREE_ITER_is_extents) &&
Expand Down Expand Up @@ -3406,8 +3401,10 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
bch2_time_stats_exit(&s->lock_hold_times);
}

if (c->btree_trans_barrier_initialized)
if (c->btree_trans_barrier_initialized) {
synchronize_srcu_expedited(&c->btree_trans_barrier);
cleanup_srcu_struct(&c->btree_trans_barrier);
}
mempool_exit(&c->btree_trans_mem_pool);
mempool_exit(&c->btree_trans_pool);
}
Expand Down
33 changes: 20 additions & 13 deletions fs/bcachefs/btree_key_cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,11 @@ static int bch2_btree_key_cache_cmp_fn(struct rhashtable_compare_arg *arg,
}

static const struct rhashtable_params bch2_btree_key_cache_params = {
.head_offset = offsetof(struct bkey_cached, hash),
.key_offset = offsetof(struct bkey_cached, key),
.key_len = sizeof(struct bkey_cached_key),
.obj_cmpfn = bch2_btree_key_cache_cmp_fn,
.head_offset = offsetof(struct bkey_cached, hash),
.key_offset = offsetof(struct bkey_cached, key),
.key_len = sizeof(struct bkey_cached_key),
.obj_cmpfn = bch2_btree_key_cache_cmp_fn,
.automatic_shrinking = true,
};

__flatten
Expand Down Expand Up @@ -840,7 +841,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
six_lock_exit(&ck->c.lock);
kmem_cache_free(bch2_key_cache, ck);
atomic_long_dec(&bc->nr_freed);
freed++;
bc->nr_freed_nonpcpu--;
bc->freed++;
}
Expand All @@ -854,7 +854,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
six_lock_exit(&ck->c.lock);
kmem_cache_free(bch2_key_cache, ck);
atomic_long_dec(&bc->nr_freed);
freed++;
bc->nr_freed_pcpu--;
bc->freed++;
}
Expand All @@ -876,23 +875,22 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,

if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
bc->skipped_dirty++;
goto next;
} else if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) {
clear_bit(BKEY_CACHED_ACCESSED, &ck->flags);
bc->skipped_accessed++;
goto next;
} else if (bkey_cached_lock_for_evict(ck)) {
} else if (!bkey_cached_lock_for_evict(ck)) {
bc->skipped_lock_fail++;
} else {
bkey_cached_evict(bc, ck);
bkey_cached_free(bc, ck);
bc->moved_to_freelist++;
} else {
bc->skipped_lock_fail++;
freed++;
}

scanned++;
if (scanned >= nr)
break;
next:

pos = next;
}

Expand All @@ -917,6 +915,14 @@ static unsigned long bch2_btree_key_cache_count(struct shrinker *shrink,
long nr = atomic_long_read(&bc->nr_keys) -
atomic_long_read(&bc->nr_dirty);

/*
* Avoid hammering our shrinker too much if it's nearly empty - the
* shrinker code doesn't take into account how big our cache is, if it's
* mostly empty but the system is under memory pressure it causes nasty
* lock contention:
*/
nr -= 128;

return max(0L, nr);
}

Expand Down Expand Up @@ -1025,9 +1031,10 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
if (!shrink)
return -BCH_ERR_ENOMEM_fs_btree_cache_init;
bc->shrink = shrink;
shrink->seeks = 0;
shrink->count_objects = bch2_btree_key_cache_count;
shrink->scan_objects = bch2_btree_key_cache_scan;
shrink->batch = 1 << 14;
shrink->seeks = 0;
shrink->private_data = c;
shrinker_register(shrink);
return 0;
Expand Down
9 changes: 5 additions & 4 deletions fs/bcachefs/btree_node_scan.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,11 @@ static bool found_btree_node_is_readable(struct btree_trans *trans,

struct btree *b = bch2_btree_node_get_noiter(trans, &k.k, f->btree_id, f->level, false);
bool ret = !IS_ERR_OR_NULL(b);
if (ret) {
f->sectors_written = b->written;
six_unlock_read(&b->c.lock);
}
if (!ret)
return ret;

f->sectors_written = b->written;
six_unlock_read(&b->c.lock);

/*
* We might update this node's range; if that happens, we need the node
Expand Down
Loading

0 comments on commit 0b4989e

Please sign in to comment.