Skip to content

Commit

Permalink
Merge tag 'for-5.18-tag' of git://git.kernel.org/pub/scm/linux/kernel…
Browse files Browse the repository at this point in the history
…/git/kdave/linux

Pull btrfs updates from David Sterba:
 "This contains feature updates, performance improvements, preparatory
  and core work and some related VFS updates:

  Features:

   - encoded read/write ioctls, allows user space to read or write raw
     data directly to extents (now compressed, encrypted in the future),
     will be used by send/receive v2 where it saves processing time

   - zoned mode now works with metadata DUP (the mkfs.btrfs default)

   - error message header updates:
      - print error state: transaction abort, other error, log tree
        errors
      - print transient filesystem state: remount, device replace,
        ignored checksum verifications

   - tree-checker: verify the transaction id of the to-be-written dirty
     extent buffer

  Performance improvements for fsync:

   - directory logging speedups (up to -90% run time)

   - avoid logging all directory changes during renames (up to -60% run
     time)

   - avoid inode logging during rename and link when possible (up to
     -60% run time)

   - prepare extents to be logged before locking a log tree path
     (throughput +7%)

   - stop copying old file extents when doing a full fsync()

   - improved logging of old extents after truncate

  Core, fixes:

   - improved stale device identification by dev_t and not just path
     (for devices that are behind other layers like device mapper)

   - continued extent tree v2 preparatory work
      - disable features that won't work yet
      - add wrappers and abstractions for new tree roots

   - improved error handling

   - add super block write annotations around background block group
     reclaim

   - fix device scanning messages potentially accessing stale pointer

   - cleanups and refactoring

  VFS:

   - allow reflinks/deduplication from two different mounts of the same
     filesystem

   - export and add helpers for read/write range verification, for the
     encoded ioctls"

* tag 'for-5.18-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (98 commits)
  btrfs: zoned: put block group after final usage
  btrfs: don't access possibly stale fs_info data in device_list_add
  btrfs: add lockdep_assert_held to need_preemptive_reclaim
  btrfs: verify the tranisd of the to-be-written dirty extent buffer
  btrfs: unify the error handling of btrfs_read_buffer()
  btrfs: unify the error handling pattern for read_tree_block()
  btrfs: factor out do_free_extent_accounting helper
  btrfs: remove last_ref from the extent freeing code
  btrfs: add a alloc_reserved_extent helper
  btrfs: remove BUG_ON(ret) in alloc_reserved_tree_block
  btrfs: add and use helper for unlinking inode during log replay
  btrfs: extend locking to all space_info members accesses
  btrfs: zoned: mark relocation as writing
  fs: allow cross-vfsmount reflink/dedupe
  btrfs: remove the cross file system checks from remap
  btrfs: pass btrfs_fs_info to btrfs_recover_relocation
  btrfs: pass btrfs_fs_info for deleting snapshots and cleaner
  btrfs: add filesystems state details to error messages
  btrfs: deal with unexpected extent type during reflinking
  btrfs: fix unexpected error path when reflinking an inline extent
  ...
  • Loading branch information
Linus Torvalds committed Mar 22, 2022
2 parents 9b03992 + d3e2996 commit 5191290
Show file tree
Hide file tree
Showing 50 changed files with 3,109 additions and 1,331 deletions.
7 changes: 5 additions & 2 deletions fs/btrfs/backref.c
Original file line number Diff line number Diff line change
Expand Up @@ -789,11 +789,13 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
if (IS_ERR(eb)) {
free_pref(ref);
return PTR_ERR(eb);
} else if (!extent_buffer_uptodate(eb)) {
}
if (!extent_buffer_uptodate(eb)) {
free_pref(ref);
free_extent_buffer(eb);
return -EIO;
}

if (lock)
btrfs_tree_read_lock(eb);
if (btrfs_header_level(eb) == 0)
Expand Down Expand Up @@ -1335,7 +1337,8 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
if (IS_ERR(eb)) {
ret = PTR_ERR(eb);
goto out;
} else if (!extent_buffer_uptodate(eb)) {
}
if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
ret = -EIO;
goto out;
Expand Down
36 changes: 33 additions & 3 deletions fs/btrfs/block-group.c
Original file line number Diff line number Diff line change
Expand Up @@ -1522,15 +1522,20 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
return;

if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE))
sb_start_write(fs_info->sb);

if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
sb_end_write(fs_info->sb);
return;
}

/*
* Long running balances can keep us blocked here for eternity, so
* simply skip reclaim if we're unable to get the mutex.
*/
if (!mutex_trylock(&fs_info->reclaim_bgs_lock)) {
btrfs_exclop_finish(fs_info);
sb_end_write(fs_info->sb);
return;
}

Expand Down Expand Up @@ -1605,6 +1610,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
spin_unlock(&fs_info->unused_bgs_lock);
mutex_unlock(&fs_info->reclaim_bgs_lock);
btrfs_exclop_finish(fs_info);
sb_end_write(fs_info->sb);
}

void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info)
Expand Down Expand Up @@ -2006,6 +2012,7 @@ static int read_one_block_group(struct btrfs_fs_info *info,
cache->length = key->offset;
cache->used = btrfs_stack_block_group_used(bgi);
cache->flags = btrfs_stack_block_group_flags(bgi);
cache->global_root_id = btrfs_stack_block_group_chunk_objectid(bgi);

set_free_space_tree_thresholds(cache);

Expand Down Expand Up @@ -2288,7 +2295,7 @@ static int insert_block_group_item(struct btrfs_trans_handle *trans,
spin_lock(&block_group->lock);
btrfs_set_stack_block_group_used(&bgi, block_group->used);
btrfs_set_stack_block_group_chunk_objectid(&bgi,
BTRFS_FIRST_CHUNK_TREE_OBJECTID);
block_group->global_root_id);
btrfs_set_stack_block_group_flags(&bgi, block_group->flags);
key.objectid = block_group->start;
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
Expand Down Expand Up @@ -2444,6 +2451,27 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
btrfs_trans_release_chunk_metadata(trans);
}

/*
* For extent tree v2 we use the block_group_item->chunk_offset to point at our
* global root id. For v1 it's always set to BTRFS_FIRST_CHUNK_TREE_OBJECTID.
*/
static u64 calculate_global_root_id(struct btrfs_fs_info *fs_info, u64 offset)
{
u64 div = SZ_1G;
u64 index;

if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
return BTRFS_FIRST_CHUNK_TREE_OBJECTID;

/* If we have a smaller fs index based on 128MiB. */
if (btrfs_super_total_bytes(fs_info->super_copy) <= (SZ_1G * 10ULL))
div = SZ_128M;

offset = div64_u64(offset, div);
div64_u64_rem(offset, fs_info->nr_global_roots, &index);
return index;
}

struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *trans,
u64 bytes_used, u64 type,
u64 chunk_offset, u64 size)
Expand All @@ -2464,6 +2492,8 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
cache->flags = type;
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
cache->global_root_id = calculate_global_root_id(fs_info, cache->start);

if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
cache->needs_free_space = 1;

Expand Down Expand Up @@ -2693,7 +2723,7 @@ static int update_block_group_item(struct btrfs_trans_handle *trans,
bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
btrfs_set_stack_block_group_used(&bgi, cache->used);
btrfs_set_stack_block_group_chunk_objectid(&bgi,
BTRFS_FIRST_CHUNK_TREE_OBJECTID);
cache->global_root_id);
btrfs_set_stack_block_group_flags(&bgi, cache->flags);
write_extent_buffer(leaf, &bgi, bi, sizeof(bgi));
btrfs_mark_buffer_dirty(leaf);
Expand Down
1 change: 1 addition & 0 deletions fs/btrfs/block-group.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ struct btrfs_block_group {
u64 bytes_super;
u64 flags;
u64 cache_generation;
u64 global_root_id;

/*
* If the free space extent count exceeds this number, convert the block
Expand Down
42 changes: 40 additions & 2 deletions fs/btrfs/btrfs_inode.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@
#include "ordered-data.h"
#include "delayed-inode.h"

/*
* Since we search a directory based on f_pos (struct dir_context::pos) we have
* to start at 2 since '.' and '..' have f_pos of 0 and 1 respectively, so
* everybody else has to start at 2 (see btrfs_real_readdir() and dir_emit_dots()).
*/
#define BTRFS_DIR_START_INDEX 2

/*
* ordered_data_close is set by truncate when a file that used
* to have good data has been truncated to zero. When it is set
Expand Down Expand Up @@ -173,8 +180,9 @@ struct btrfs_inode {
u64 disk_i_size;

/*
* if this is a directory then index_cnt is the counter for the index
* number for new files that are created
* If this is a directory then index_cnt is the counter for the index
* number for new files that are created. For an empty directory, this
* must be initialized to BTRFS_DIR_START_INDEX.
*/
u64 index_cnt;

Expand Down Expand Up @@ -333,6 +341,36 @@ static inline void btrfs_set_inode_last_sub_trans(struct btrfs_inode *inode)
spin_unlock(&inode->lock);
}

/*
* Should be called while holding the inode's VFS lock in exclusive mode or in a
* context where no one else can access the inode concurrently (during inode
* creation or when loading an inode from disk).
*/
static inline void btrfs_set_inode_full_sync(struct btrfs_inode *inode)
{
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
/*
* The inode may have been part of a reflink operation in the last
* transaction that modified it, and then a fsync has reset the
* last_reflink_trans to avoid subsequent fsyncs in the same
* transaction to do unnecessary work. So update last_reflink_trans
* to the last_trans value (we have to be pessimistic and assume a
* reflink happened).
*
* The ->last_trans is protected by the inode's spinlock and we can
* have a concurrent ordered extent completion update it. Also set
* last_reflink_trans to ->last_trans only if the former is less than
* the later, because we can be called in a context where
* last_reflink_trans was set to the current transaction generation
* while ->last_trans was not yet updated in the current transaction,
* and therefore has a lower value.
*/
spin_lock(&inode->lock);
if (inode->last_reflink_trans < inode->last_trans)
inode->last_reflink_trans = inode->last_trans;
spin_unlock(&inode->lock);
}

static inline bool btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
{
bool ret = false;
Expand Down
63 changes: 37 additions & 26 deletions fs/btrfs/compression.c
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *b
bi_size += bvec->bv_len;

if (bio->bi_status)
cb->errors = 1;
cb->status = bio->bi_status;

ASSERT(bi_size && bi_size <= cb->compressed_len);
last_io = refcount_sub_and_test(bi_size >> fs_info->sectorsize_bits,
Expand All @@ -234,7 +234,7 @@ static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *b
return last_io;
}

static void finish_compressed_bio_read(struct compressed_bio *cb, struct bio *bio)
static void finish_compressed_bio_read(struct compressed_bio *cb)
{
unsigned int index;
struct page *page;
Expand All @@ -247,19 +247,18 @@ static void finish_compressed_bio_read(struct compressed_bio *cb, struct bio *bi
}

/* Do io completion on the original bio */
if (cb->errors) {
bio_io_error(cb->orig_bio);
if (cb->status != BLK_STS_OK) {
cb->orig_bio->bi_status = cb->status;
bio_endio(cb->orig_bio);
} else {
struct bio_vec *bvec;
struct bvec_iter_all iter_all;

ASSERT(bio);
ASSERT(!bio->bi_status);
/*
* We have verified the checksum already, set page checked so
* the end_io handlers know about it
*/
ASSERT(!bio_flagged(bio, BIO_CLONED));
ASSERT(!bio_flagged(cb->orig_bio, BIO_CLONED));
bio_for_each_segment_all(bvec, cb->orig_bio, iter_all) {
u64 bvec_start = page_offset(bvec->bv_page) +
bvec->bv_offset;
Expand Down Expand Up @@ -308,7 +307,7 @@ static void end_compressed_bio_read(struct bio *bio)
* Some IO in this cb have failed, just skip checksum as there
* is no way it could be correct.
*/
if (cb->errors == 1)
if (cb->status != BLK_STS_OK)
goto csum_failed;

inode = cb->inode;
Expand All @@ -324,8 +323,8 @@ static void end_compressed_bio_read(struct bio *bio)

csum_failed:
if (ret)
cb->errors = 1;
finish_compressed_bio_read(cb, bio);
cb->status = errno_to_blk_status(ret);
finish_compressed_bio_read(cb);
out:
bio_put(bio);
}
Expand All @@ -342,11 +341,12 @@ static noinline void end_compressed_writeback(struct inode *inode,
unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT;
struct page *pages[16];
unsigned long nr_pages = end_index - index + 1;
const int errno = blk_status_to_errno(cb->status);
int i;
int ret;

if (cb->errors)
mapping_set_error(inode->i_mapping, -EIO);
if (errno)
mapping_set_error(inode->i_mapping, errno);

while (nr_pages > 0) {
ret = find_get_pages_contig(inode->i_mapping, index,
Expand All @@ -358,7 +358,7 @@ static noinline void end_compressed_writeback(struct inode *inode,
continue;
}
for (i = 0; i < ret; i++) {
if (cb->errors)
if (errno)
SetPageError(pages[i]);
btrfs_page_clamp_clear_writeback(fs_info, pages[i],
cb->start, cb->len);
Expand All @@ -381,9 +381,10 @@ static void finish_compressed_bio_write(struct compressed_bio *cb)
*/
btrfs_writepage_endio_finish_ordered(BTRFS_I(inode), NULL,
cb->start, cb->start + cb->len - 1,
!cb->errors);
cb->status == BLK_STS_OK);

end_compressed_writeback(inode, cb);
if (cb->writeback)
end_compressed_writeback(inode, cb);
/* Note, our inode could be gone now */

/*
Expand Down Expand Up @@ -506,7 +507,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
struct page **compressed_pages,
unsigned int nr_pages,
unsigned int write_flags,
struct cgroup_subsys_state *blkcg_css)
struct cgroup_subsys_state *blkcg_css,
bool writeback)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct bio *bio = NULL;
Expand All @@ -524,13 +526,14 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
if (!cb)
return BLK_STS_RESOURCE;
refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
cb->errors = 0;
cb->status = BLK_STS_OK;
cb->inode = &inode->vfs_inode;
cb->start = start;
cb->len = len;
cb->mirror_num = 0;
cb->compressed_pages = compressed_pages;
cb->compressed_len = compressed_len;
cb->writeback = writeback;
cb->orig_bio = NULL;
cb->nr_pages = nr_pages;

Expand Down Expand Up @@ -591,7 +594,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,

if (submit) {
if (!skip_sum) {
ret = btrfs_csum_one_bio(inode, bio, start, 1);
ret = btrfs_csum_one_bio(inode, bio, start, true);
if (ret)
goto finish_cb;
}
Expand Down Expand Up @@ -808,7 +811,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
u64 em_len;
u64 em_start;
struct extent_map *em;
blk_status_t ret = BLK_STS_RESOURCE;
blk_status_t ret;
int faili = 0;
u8 *sums;

Expand All @@ -821,17 +824,21 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, file_offset, fs_info->sectorsize);
read_unlock(&em_tree->lock);
if (!em)
return BLK_STS_IOERR;
if (!em) {
ret = BLK_STS_IOERR;
goto out;
}

ASSERT(em->compress_type != BTRFS_COMPRESS_NONE);
compressed_len = em->block_len;
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
if (!cb)
if (!cb) {
ret = BLK_STS_RESOURCE;
goto out;
}

refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
cb->errors = 0;
cb->status = BLK_STS_OK;
cb->inode = inode;
cb->mirror_num = mirror_num;
sums = cb->sums;
Expand All @@ -851,8 +858,10 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
nr_pages = DIV_ROUND_UP(compressed_len, PAGE_SIZE);
cb->compressed_pages = kcalloc(nr_pages, sizeof(struct page *),
GFP_NOFS);
if (!cb->compressed_pages)
if (!cb->compressed_pages) {
ret = BLK_STS_RESOURCE;
goto fail1;
}

for (pg_index = 0; pg_index < nr_pages; pg_index++) {
cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS);
Expand Down Expand Up @@ -938,7 +947,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
comp_bio = NULL;
}
}
return 0;
return BLK_STS_OK;

fail2:
while (faili >= 0) {
Expand All @@ -951,6 +960,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
kfree(cb);
out:
free_extent_map(em);
bio->bi_status = ret;
bio_endio(bio);
return ret;
finish_cb:
if (comp_bio) {
Expand All @@ -970,7 +981,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
*/
ASSERT(refcount_read(&cb->pending_sectors));
/* Now we are the only one referring @cb, can finish it safely. */
finish_compressed_bio_read(cb, NULL);
finish_compressed_bio_read(cb);
return ret;
}

Expand Down
Loading

0 comments on commit 5191290

Please sign in to comment.