Skip to content

Commit

Permalink
Merge tag 'bcachefs-2024-01-01' of https://evilpiepirate.org/git/bcac…
Browse files Browse the repository at this point in the history
…hefs

Pull bcachefs from Kent Overstreet:
 "More bcachefs bugfixes for 6.7, and forwards compatibility work:

   - fix for a nasty extents + snapshot interaction, reported when
     reflink of a snapshotted file wouldn't complete but turned out to
     be a more general bug

   - fix for an invalid free in dio write path when iov vector was
     longer than our inline vector

   - fix for a buffer overflow in the nocow write path -
     BCH_REPLICAS_MAX doesn't actually limit the number of pointers in
     an extent when cached pointers are included

   - RO snapshots are actually RO now

   - And, a new superblock section to avoid future breakage when the
     disk space acounting rewrite rolls out: the new superblock section
     describes versions that need work to downgrade, where the work
     required is a list of recovery passes and errors to silently fix"

* tag 'bcachefs-2024-01-01' of https://evilpiepirate.org/git/bcachefs:
  bcachefs: make RO snapshots actually RO
  bcachefs: bch_sb_field_downgrade
  bcachefs: bch_sb.recovery_passes_required
  bcachefs: Add persistent identifiers for recovery passes
  bcachefs: prt_bitflags_vector()
  bcachefs: move BCH_SB_ERRS() to sb-errors_types.h
  bcachefs: fix buffer overflow in nocow write path
  bcachefs: DARRAY_PREALLOCATED()
  bcachefs: Switch darray to kvmalloc()
  bcachefs: Factor out darray resize slowpath
  bcachefs: fix setting version_upgrade_complete
  bcachefs: fix invalid free in dio write path
  bcachefs: Fix extents iteration + snapshots interaction
  • Loading branch information
Linus Torvalds committed Jan 3, 2024
2 parents 610a9b8 + 0d72ab3 commit 981d041
Show file tree
Hide file tree
Showing 30 changed files with 977 additions and 423 deletions.
2 changes: 2 additions & 0 deletions fs/bcachefs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ bcachefs-y := \
clock.o \
compress.o \
counters.o \
darray.o \
debug.o \
dirent.o \
disk_groups.o \
Expand Down Expand Up @@ -70,6 +71,7 @@ bcachefs-y := \
reflink.o \
replicas.o \
sb-clean.o \
sb-downgrade.o \
sb-errors.o \
sb-members.o \
siphash.o \
Expand Down
3 changes: 2 additions & 1 deletion fs/bcachefs/acl.c
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,8 @@ int bch2_set_acl(struct mnt_idmap *idmap,
bch2_trans_begin(trans);
acl = _acl;

ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode),
ret = bch2_subvol_is_ro_trans(trans, inode->ei_subvol) ?:
bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode),
BTREE_ITER_INTENT);
if (ret)
goto btree_err;
Expand Down
1 change: 1 addition & 0 deletions fs/bcachefs/bcachefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -737,6 +737,7 @@ struct bch_fs {
unsigned nsec_per_time_unit;
u64 features;
u64 compat;
unsigned long errors_silent[BITS_TO_LONGS(BCH_SB_ERR_MAX)];
} sb;


Expand Down
51 changes: 38 additions & 13 deletions fs/bcachefs/bcachefs_format.h
Original file line number Diff line number Diff line change
Expand Up @@ -1207,19 +1207,21 @@ struct bch_sb_field {
};

#define BCH_SB_FIELDS() \
x(journal, 0) \
x(members_v1, 1) \
x(crypt, 2) \
x(replicas_v0, 3) \
x(quota, 4) \
x(disk_groups, 5) \
x(clean, 6) \
x(replicas, 7) \
x(journal_seq_blacklist, 8) \
x(journal_v2, 9) \
x(counters, 10) \
x(members_v2, 11) \
x(errors, 12)
x(journal, 0) \
x(members_v1, 1) \
x(crypt, 2) \
x(replicas_v0, 3) \
x(quota, 4) \
x(disk_groups, 5) \
x(clean, 6) \
x(replicas, 7) \
x(journal_seq_blacklist, 8) \
x(journal_v2, 9) \
x(counters, 10) \
x(members_v2, 11) \
x(errors, 12) \
x(ext, 13) \
x(downgrade, 14)

enum bch_sb_field_type {
#define x(f, nr) BCH_SB_FIELD_##f = nr,
Expand Down Expand Up @@ -1631,6 +1633,24 @@ struct bch_sb_field_errors {
LE64_BITMASK(BCH_SB_ERROR_ENTRY_ID, struct bch_sb_field_error_entry, v, 0, 16);
LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR, struct bch_sb_field_error_entry, v, 16, 64);

struct bch_sb_field_ext {
struct bch_sb_field field;
__le64 recovery_passes_required[2];
__le64 errors_silent[8];
};

struct bch_sb_field_downgrade_entry {
__le16 version;
__le64 recovery_passes[2];
__le16 nr_errors;
__le16 errors[] __counted_by(nr_errors);
} __packed __aligned(2);

struct bch_sb_field_downgrade {
struct bch_sb_field field;
struct bch_sb_field_downgrade_entry entries[];
};

/* Superblock: */

/*
Expand All @@ -1644,6 +1664,11 @@ LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR, struct bch_sb_field_error_entry, v, 16, 64);

#define RECOVERY_PASS_ALL_FSCK (1ULL << 63)

/*
* field 1: version name
* field 2: BCH_VERSION(major, minor)
* field 3: recovery passess required on upgrade
*/
#define BCH_METADATA_VERSIONS() \
x(bkey_renumber, BCH_VERSION(0, 10), \
RECOVERY_PASS_ALL_FSCK) \
Expand Down
35 changes: 24 additions & 11 deletions fs/bcachefs/btree_iter.c
Original file line number Diff line number Diff line change
Expand Up @@ -2085,18 +2085,16 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
goto out_no_locked;

/*
* iter->pos should be mononotically increasing, and always be
* equal to the key we just returned - except extents can
* straddle iter->pos:
* We need to check against @end before FILTER_SNAPSHOTS because
* if we get to a different inode that requested we might be
* seeing keys for a different snapshot tree that will all be
* filtered out.
*
* But we can't do the full check here, because bkey_start_pos()
* isn't monotonically increasing before FILTER_SNAPSHOTS, and
* that's what we check against in extents mode:
*/
if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
iter_pos = k.k->p;
else
iter_pos = bkey_max(iter->pos, bkey_start_pos(k.k));

if (unlikely(!(iter->flags & BTREE_ITER_IS_EXTENTS)
? bkey_gt(iter_pos, end)
: bkey_ge(iter_pos, end)))
if (k.k->p.inode > end.inode)
goto end;

if (iter->update_path &&
Expand Down Expand Up @@ -2155,6 +2153,21 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
continue;
}

/*
* iter->pos should be mononotically increasing, and always be
* equal to the key we just returned - except extents can
* straddle iter->pos:
*/
if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
iter_pos = k.k->p;
else
iter_pos = bkey_max(iter->pos, bkey_start_pos(k.k));

if (unlikely(!(iter->flags & BTREE_ITER_IS_EXTENTS)
? bkey_gt(iter_pos, end)
: bkey_ge(iter_pos, end)))
goto end;

break;
}

Expand Down
24 changes: 24 additions & 0 deletions fs/bcachefs/darray.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// SPDX-License-Identifier: GPL-2.0

#include <linux/log2.h>
#include <linux/slab.h>
#include "darray.h"

int __bch2_darray_resize(darray_char *d, size_t element_size, size_t new_size, gfp_t gfp)
{
if (new_size > d->size) {
new_size = roundup_pow_of_two(new_size);

void *data = kvmalloc_array(new_size, element_size, gfp);
if (!data)
return -ENOMEM;

memcpy(data, d->data, d->size * element_size);
if (d->data != d->preallocated)
kvfree(d->data);
d->data = data;
d->size = new_size;
}

return 0;
}
48 changes: 30 additions & 18 deletions fs/bcachefs/darray.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,39 +8,48 @@
* Inspired by CCAN's darray
*/

#include "util.h"
#include <linux/slab.h>

#define DARRAY(type) \
#define DARRAY_PREALLOCATED(_type, _nr) \
struct { \
size_t nr, size; \
type *data; \
_type *data; \
_type preallocated[_nr]; \
}

typedef DARRAY(void) darray_void;
#define DARRAY(_type) DARRAY_PREALLOCATED(_type, 0)

static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more, gfp_t gfp)
typedef DARRAY(char) darray_char;

int __bch2_darray_resize(darray_char *, size_t, size_t, gfp_t);

static inline int __darray_resize(darray_char *d, size_t element_size,
size_t new_size, gfp_t gfp)
{
if (d->nr + more > d->size) {
size_t new_size = roundup_pow_of_two(d->nr + more);
void *data = krealloc_array(d->data, new_size, t_size, gfp);
return unlikely(new_size > d->size)
? __bch2_darray_resize(d, element_size, new_size, gfp)
: 0;
}

if (!data)
return -ENOMEM;
#define darray_resize_gfp(_d, _new_size, _gfp) \
unlikely(__darray_resize((darray_char *) (_d), sizeof((_d)->data[0]), (_new_size), _gfp))

d->data = data;
d->size = new_size;
}
#define darray_resize(_d, _new_size) \
darray_resize_gfp(_d, _new_size, GFP_KERNEL)

return 0;
static inline int __darray_make_room(darray_char *d, size_t t_size, size_t more, gfp_t gfp)
{
return __darray_resize(d, t_size, d->nr + more, gfp);
}

#define darray_make_room_gfp(_d, _more, _gfp) \
__darray_make_room((darray_void *) (_d), sizeof((_d)->data[0]), (_more), _gfp)
__darray_make_room((darray_char *) (_d), sizeof((_d)->data[0]), (_more), _gfp)

#define darray_make_room(_d, _more) \
darray_make_room_gfp(_d, _more, GFP_KERNEL)

#define darray_room(_d) ((_d).size - (_d).nr)

#define darray_top(_d) ((_d).data[(_d).nr])

#define darray_push_gfp(_d, _item, _gfp) \
Expand Down Expand Up @@ -80,13 +89,16 @@ static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more,

#define darray_init(_d) \
do { \
(_d)->data = NULL; \
(_d)->nr = (_d)->size = 0; \
(_d)->nr = 0; \
(_d)->size = ARRAY_SIZE((_d)->preallocated); \
(_d)->data = (_d)->size ? (_d)->preallocated : NULL; \
} while (0)

#define darray_exit(_d) \
do { \
kfree((_d)->data); \
if (!ARRAY_SIZE((_d)->preallocated) || \
(_d)->data != (_d)->preallocated) \
kvfree((_d)->data); \
darray_init(_d); \
} while (0)

Expand Down
3 changes: 3 additions & 0 deletions fs/bcachefs/errcode.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@
x(ENOSPC, ENOSPC_sb_members) \
x(ENOSPC, ENOSPC_sb_members_v2) \
x(ENOSPC, ENOSPC_sb_crypt) \
x(ENOSPC, ENOSPC_sb_downgrade) \
x(ENOSPC, ENOSPC_btree_slot) \
x(ENOSPC, ENOSPC_snapshot_tree) \
x(ENOENT, ENOENT_bkey_type_mismatch) \
Expand Down Expand Up @@ -218,6 +219,8 @@
x(BCH_ERR_invalid_sb, invalid_sb_quota) \
x(BCH_ERR_invalid_sb, invalid_sb_errors) \
x(BCH_ERR_invalid_sb, invalid_sb_opt_compression) \
x(BCH_ERR_invalid_sb, invalid_sb_ext) \
x(BCH_ERR_invalid_sb, invalid_sb_downgrade) \
x(BCH_ERR_invalid, invalid_bkey) \
x(BCH_ERR_operation_blocked, nocow_lock_blocked) \
x(EIO, btree_node_read_err) \
Expand Down
3 changes: 3 additions & 0 deletions fs/bcachefs/error.c
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,9 @@ int bch2_fsck_err(struct bch_fs *c,
struct printbuf buf = PRINTBUF, *out = &buf;
int ret = -BCH_ERR_fsck_ignore;

if (test_bit(err, c->sb.errors_silent))
return -BCH_ERR_fsck_fix;

bch2_sb_error_count(c, err);

va_start(args, fmt);
Expand Down
13 changes: 5 additions & 8 deletions fs/bcachefs/fs-io-direct.c
Original file line number Diff line number Diff line change
Expand Up @@ -216,11 +216,11 @@ struct dio_write {
struct address_space *mapping;
struct bch_inode_info *inode;
struct mm_struct *mm;
const struct iovec *iov;
unsigned loop:1,
extending:1,
sync:1,
flush:1,
free_iov:1;
flush:1;
struct quota_res quota_res;
u64 written;

Expand Down Expand Up @@ -312,12 +312,10 @@ static noinline int bch2_dio_write_copy_iov(struct dio_write *dio)
return -1;

if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) {
iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov),
dio->iov = iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov),
GFP_KERNEL);
if (unlikely(!iov))
return -ENOMEM;

dio->free_iov = true;
}

memcpy(iov, dio->iter.__iov, dio->iter.nr_segs * sizeof(*iov));
Expand Down Expand Up @@ -381,8 +379,7 @@ static __always_inline long bch2_dio_write_done(struct dio_write *dio)

bch2_pagecache_block_put(inode);

if (dio->free_iov)
kfree(dio->iter.__iov);
kfree(dio->iov);

ret = dio->op.error ?: ((long) dio->written << 9);
bio_put(&dio->op.wbio.bio);
Expand Down Expand Up @@ -626,11 +623,11 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
dio->mapping = mapping;
dio->inode = inode;
dio->mm = current->mm;
dio->iov = NULL;
dio->loop = false;
dio->extending = extending;
dio->sync = is_sync_kiocb(req) || extending;
dio->flush = iocb_is_dsync(req) && !c->opts.journal_flush_disabled;
dio->free_iov = false;
dio->quota_res.sectors = 0;
dio->written = 0;
dio->iter = *iter;
Expand Down
12 changes: 5 additions & 7 deletions fs/bcachefs/fs-ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ static int bch2_ioc_setflags(struct bch_fs *c,
}

mutex_lock(&inode->ei_update_lock);
ret = bch2_write_inode(c, inode, bch2_inode_flags_set, &s,
ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?:
bch2_write_inode(c, inode, bch2_inode_flags_set, &s,
ATTR_CTIME);
mutex_unlock(&inode->ei_update_lock);

Expand Down Expand Up @@ -183,13 +184,10 @@ static int bch2_ioc_fssetxattr(struct bch_fs *c,
}

mutex_lock(&inode->ei_update_lock);
ret = bch2_set_projid(c, inode, fa.fsx_projid);
if (ret)
goto err_unlock;

ret = bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?:
bch2_set_projid(c, inode, fa.fsx_projid) ?:
bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
ATTR_CTIME);
err_unlock:
mutex_unlock(&inode->ei_update_lock);
err:
inode_unlock(&inode->v);
Expand Down
Loading

0 comments on commit 981d041

Please sign in to comment.