Skip to content

Commit

Permalink
Merge tag 'bcachefs-2025-01-29' of git://evilpiepirate.org/bcachefs
Browse files Browse the repository at this point in the history
Pull bcachefs fixes from Kent Overstreet:

 - second half of a fix for a bug that'd been causing oopses on
   filesystems using snapshots with memory pressure (key cache fills for
   snaphots btrees are tricky)

 - build fix for strange compiler configurations that double stack frame
   size

 - "journal stuck timeout" now takes into account device latency: this
   fixes some spurious warnings, and the main remaining source of SRCU
   lock hold time warnings (I'm no longer seeing this in my CI, so any
   users still seeing this should definitely ping me)

 - fix for slow/hanging unmounts (" Improve journal pin flushing")

 - some more tracepoint fixes/improvements, to chase down the "rebalance
   isn't making progress" issues

* tag 'bcachefs-2025-01-29' of git://evilpiepirate.org/bcachefs:
  bcachefs: Improve trace_move_extent_finish
  bcachefs: Fix trace_copygc
  bcachefs: Journal writes are now IOPRIO_CLASS_RT
  bcachefs: Improve journal pin flushing
  bcachefs: fix bch2_btree_node_flags
  bcachefs: rebalance, copygc enabled are runtime opts
  bcachefs: Improve decompression error messages
  bcachefs: bset_blacklisted_journal_seq is now AUTOFIX
  bcachefs: "Journal stuck" timeout now takes into account device latency
  bcachefs: Reduce stack frame size of __bch2_str_hash_check_key()
  bcachefs: Fix btree_trans_peek_key_cache()
  • Loading branch information
Linus Torvalds committed Jan 30, 2025
2 parents 72deda0 + 5d9ccda commit 8080ff5
Show file tree
Hide file tree
Showing 21 changed files with 275 additions and 159 deletions.
5 changes: 4 additions & 1 deletion fs/bcachefs/btree_cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@ do { \
} while (0)

const char * const bch2_btree_node_flags[] = {
#define x(f) #f,
"typebit",
"typebit",
"typebit",
#define x(f) [BTREE_NODE_##f] = #f,
BTREE_FLAGS()
#undef x
NULL
Expand Down
3 changes: 1 addition & 2 deletions fs/bcachefs/btree_iter.c
Original file line number Diff line number Diff line change
Expand Up @@ -2239,8 +2239,6 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos
if (unlikely(ret))
return bkey_s_c_err(ret);

btree_path_set_should_be_locked(trans, trans->paths + iter->key_cache_path);

k = bch2_btree_path_peek_slot(trans->paths + iter->key_cache_path, &u);
if (!k.k)
return k;
Expand All @@ -2251,6 +2249,7 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos

iter->k = u;
k.k = &iter->k;
btree_path_set_should_be_locked(trans, trans->paths + iter->key_cache_path);
return k;
}

Expand Down
4 changes: 3 additions & 1 deletion fs/bcachefs/btree_key_cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -291,8 +291,10 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans,
struct btree_path *ck_path,
unsigned flags)
{
if (flags & BTREE_ITER_cached_nofill)
if (flags & BTREE_ITER_cached_nofill) {
ck_path->l[0].b = NULL;
return 0;
}

struct bch_fs *c = trans->c;
struct btree_iter iter;
Expand Down
2 changes: 1 addition & 1 deletion fs/bcachefs/btree_trans_commit.c
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
unsigned flags)
{
return bch2_journal_res_get(&trans->c->journal, &trans->journal_res,
trans->journal_u64s, flags);
trans->journal_u64s, flags, trans);
}

#define JSET_ENTRY_LOG_U64s 4
Expand Down
31 changes: 22 additions & 9 deletions fs/bcachefs/compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "compress.h"
#include "error.h"
#include "extents.h"
#include "io_write.h"
#include "opts.h"
#include "super-io.h"

Expand Down Expand Up @@ -254,29 +255,41 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
goto out;
}

int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
struct bch_extent_crc_unpacked *crc)
int bch2_bio_uncompress_inplace(struct bch_write_op *op,
struct bio *bio)
{
struct bch_fs *c = op->c;
struct bch_extent_crc_unpacked *crc = &op->crc;
struct bbuf data = { NULL };
size_t dst_len = crc->uncompressed_size << 9;
int ret = 0;

/* bio must own its pages: */
BUG_ON(!bio->bi_vcnt);
BUG_ON(DIV_ROUND_UP(crc->live_size, PAGE_SECTORS) > bio->bi_max_vecs);

if (crc->uncompressed_size << 9 > c->opts.encoded_extent_max ||
crc->compressed_size << 9 > c->opts.encoded_extent_max) {
bch_err(c, "error rewriting existing data: extent too big");
struct printbuf buf = PRINTBUF;
bch2_write_op_error(&buf, op);
prt_printf(&buf, "error rewriting existing data: extent too big");
bch_err_ratelimited(c, "%s", buf.buf);
printbuf_exit(&buf);
return -EIO;
}

data = __bounce_alloc(c, dst_len, WRITE);

if (__bio_uncompress(c, bio, data.b, *crc)) {
if (!c->opts.no_data_io)
bch_err(c, "error rewriting existing data: decompression error");
bio_unmap_or_unbounce(c, data);
return -EIO;
if (!c->opts.no_data_io) {
struct printbuf buf = PRINTBUF;
bch2_write_op_error(&buf, op);
prt_printf(&buf, "error rewriting existing data: decompression error");
bch_err_ratelimited(c, "%s", buf.buf);
printbuf_exit(&buf);
}
ret = -EIO;
goto err;
}

/*
Expand All @@ -293,9 +306,9 @@ int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
crc->uncompressed_size = crc->live_size;
crc->offset = 0;
crc->csum = (struct bch_csum) { 0, 0 };

err:
bio_unmap_or_unbounce(c, data);
return 0;
return ret;
}

int bch2_bio_uncompress(struct bch_fs *c, struct bio *src,
Expand Down
4 changes: 2 additions & 2 deletions fs/bcachefs/compress.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v)
return __bch2_compression_opt_to_type[bch2_compression_decode(v).type];
}

int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *,
struct bch_extent_crc_unpacked *);
struct bch_write_op;
int bch2_bio_uncompress_inplace(struct bch_write_op *, struct bio *);
int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *,
struct bvec_iter, struct bch_extent_crc_unpacked);
unsigned bch2_bio_compress(struct bch_fs *, struct bio *, size_t *,
Expand Down
50 changes: 34 additions & 16 deletions fs/bcachefs/data_update.c
Original file line number Diff line number Diff line change
Expand Up @@ -91,15 +91,28 @@ static bool bkey_nocow_lock(struct bch_fs *c, struct moving_context *ctxt, struc
return true;
}

static void trace_move_extent_finish2(struct bch_fs *c, struct bkey_s_c k)
static noinline void trace_move_extent_finish2(struct data_update *u,
struct bkey_i *new,
struct bkey_i *insert)
{
if (trace_move_extent_finish_enabled()) {
struct printbuf buf = PRINTBUF;
struct bch_fs *c = u->op.c;
struct printbuf buf = PRINTBUF;

bch2_bkey_val_to_text(&buf, c, k);
trace_move_extent_finish(c, buf.buf);
printbuf_exit(&buf);
}
prt_newline(&buf);

bch2_data_update_to_text(&buf, u);
prt_newline(&buf);

prt_str_indented(&buf, "new replicas:\t");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(new));
prt_newline(&buf);

prt_str_indented(&buf, "insert:\t");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
prt_newline(&buf);

trace_move_extent_finish(c, buf.buf);
printbuf_exit(&buf);
}

static void trace_move_extent_fail2(struct data_update *m,
Expand Down Expand Up @@ -372,7 +385,8 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
bch2_btree_iter_set_pos(&iter, next_pos);

this_cpu_add(c->counters[BCH_COUNTER_move_extent_finish], new->k.size);
trace_move_extent_finish2(c, bkey_i_to_s_c(&new->k_i));
if (trace_move_extent_finish_enabled())
trace_move_extent_finish2(m, &new->k_i, insert);
}
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
Expand Down Expand Up @@ -525,34 +539,38 @@ void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
struct data_update_opts *data_opts)
{
printbuf_tabstop_push(out, 20);
prt_str(out, "rewrite ptrs:\t");

prt_str_indented(out, "rewrite ptrs:\t");
bch2_prt_u64_base2(out, data_opts->rewrite_ptrs);
prt_newline(out);

prt_str(out, "kill ptrs:\t");
prt_str_indented(out, "kill ptrs:\t");
bch2_prt_u64_base2(out, data_opts->kill_ptrs);
prt_newline(out);

prt_str(out, "target:\t");
prt_str_indented(out, "target:\t");
bch2_target_to_text(out, c, data_opts->target);
prt_newline(out);

prt_str(out, "compression:\t");
prt_str_indented(out, "compression:\t");
bch2_compression_opt_to_text(out, io_opts->background_compression);
prt_newline(out);

prt_str(out, "opts.replicas:\t");
prt_str_indented(out, "opts.replicas:\t");
prt_u64(out, io_opts->data_replicas);
prt_newline(out);

prt_str(out, "extra replicas:\t");
prt_str_indented(out, "extra replicas:\t");
prt_u64(out, data_opts->extra_replicas);
}

void bch2_data_update_to_text(struct printbuf *out, struct data_update *m)
{
bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k));
prt_newline(out);
bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts);
prt_newline(out);

prt_str_indented(out, "old key:\t");
bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k));
}

int bch2_extent_drop_ptrs(struct btree_trans *trans,
Expand Down
1 change: 1 addition & 0 deletions fs/bcachefs/debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "extents.h"
#include "fsck.h"
#include "inode.h"
#include "journal_reclaim.h"
#include "super.h"

#include <linux/console.h>
Expand Down
4 changes: 2 additions & 2 deletions fs/bcachefs/io_write.c
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ static void __bch2_write_op_error(struct printbuf *out, struct bch_write_op *op,
op->flags & BCH_WRITE_MOVE ? "(internal move)" : "");
}

static void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op)
void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op)
{
__bch2_write_op_error(out, op, op->pos.offset);
}
Expand Down Expand Up @@ -873,7 +873,7 @@ static enum prep_encoded_ret {
if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io)
return PREP_ENCODED_CHECKSUM_ERR;

if (bch2_bio_uncompress_inplace(c, bio, &op->crc))
if (bch2_bio_uncompress_inplace(op, bio))
return PREP_ENCODED_ERR;
}

Expand Down
2 changes: 2 additions & 0 deletions fs/bcachefs/io_write.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ static inline void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw
void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
enum bch_data_type, const struct bkey_i *, bool);

void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op);

#define BCH_WRITE_FLAGS() \
x(ALLOC_NOWAIT) \
x(CACHED) \
Expand Down
92 changes: 32 additions & 60 deletions fs/bcachefs/journal.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,10 @@ journal_seq_to_buf(struct journal *j, u64 seq)

static void journal_pin_list_init(struct journal_entry_pin_list *p, int count)
{
unsigned i;

for (i = 0; i < ARRAY_SIZE(p->list); i++)
INIT_LIST_HEAD(&p->list[i]);
INIT_LIST_HEAD(&p->flushed);
for (unsigned i = 0; i < ARRAY_SIZE(p->unflushed); i++)
INIT_LIST_HEAD(&p->unflushed[i]);
for (unsigned i = 0; i < ARRAY_SIZE(p->flushed); i++)
INIT_LIST_HEAD(&p->flushed[i]);
atomic_set(&p->count, count);
p->devs.nr = 0;
}
Expand Down Expand Up @@ -601,6 +600,16 @@ static int __journal_res_get(struct journal *j, struct journal_res *res,
: -BCH_ERR_journal_res_get_blocked;
}

static unsigned max_dev_latency(struct bch_fs *c)
{
u64 nsecs = 0;

for_each_rw_member(c, ca)
nsecs = max(nsecs, ca->io_latency[WRITE].stats.max_duration);

return nsecs_to_jiffies(nsecs);
}

/*
* Essentially the entry function to the journaling code. When bcachefs is doing
* a btree insert, it calls this function to get the current journal write.
Expand All @@ -612,17 +621,31 @@ static int __journal_res_get(struct journal *j, struct journal_res *res,
* btree node write locks.
*/
int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
unsigned flags)
unsigned flags,
struct btree_trans *trans)
{
int ret;

if (closure_wait_event_timeout(&j->async_wait,
(ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked ||
(flags & JOURNAL_RES_GET_NONBLOCK),
HZ * 10))
HZ))
return ret;

if (trans)
bch2_trans_unlock_long(trans);

struct bch_fs *c = container_of(j, struct bch_fs, journal);
int remaining_wait = max(max_dev_latency(c) * 2, HZ * 10);

remaining_wait = max(0, remaining_wait - HZ);

if (closure_wait_event_timeout(&j->async_wait,
(ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked ||
(flags & JOURNAL_RES_GET_NONBLOCK),
remaining_wait))
return ret;

struct printbuf buf = PRINTBUF;
bch2_journal_debug_to_text(&buf, j);
bch_err(c, "Journal stuck? Waited for 10 seconds...\n%s",
Expand Down Expand Up @@ -727,7 +750,7 @@ int bch2_journal_flush_seq_async(struct journal *j, u64 seq,
* livelock:
*/
sched_annotate_sleep();
ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0, NULL);
if (ret)
return ret;

Expand Down Expand Up @@ -848,7 +871,7 @@ bool bch2_journal_noflush_seq(struct journal *j, u64 start, u64 end)
static int __bch2_journal_meta(struct journal *j)
{
struct journal_res res = {};
int ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
int ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0, NULL);
if (ret)
return ret;

Expand Down Expand Up @@ -1602,54 +1625,3 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
__bch2_journal_debug_to_text(out, j);
spin_unlock(&j->lock);
}

bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 *seq)
{
struct journal_entry_pin_list *pin_list;
struct journal_entry_pin *pin;

spin_lock(&j->lock);
if (!test_bit(JOURNAL_running, &j->flags)) {
spin_unlock(&j->lock);
return true;
}

*seq = max(*seq, j->pin.front);

if (*seq >= j->pin.back) {
spin_unlock(&j->lock);
return true;
}

out->atomic++;

pin_list = journal_seq_pin(j, *seq);

prt_printf(out, "%llu: count %u\n", *seq, atomic_read(&pin_list->count));
printbuf_indent_add(out, 2);

for (unsigned i = 0; i < ARRAY_SIZE(pin_list->list); i++)
list_for_each_entry(pin, &pin_list->list[i], list)
prt_printf(out, "\t%px %ps\n", pin, pin->flush);

if (!list_empty(&pin_list->flushed))
prt_printf(out, "flushed:\n");

list_for_each_entry(pin, &pin_list->flushed, list)
prt_printf(out, "\t%px %ps\n", pin, pin->flush);

printbuf_indent_sub(out, 2);

--out->atomic;
spin_unlock(&j->lock);

return false;
}

void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j)
{
u64 seq = 0;

while (!bch2_journal_seq_pins_to_text(out, j, &seq))
seq++;
}
Loading

0 comments on commit 8080ff5

Please sign in to comment.