Skip to content

Commit

Permalink
Merge tag 'xfs-5.20-merge-6' of git://git.kernel.org/pub/scm/fs/xfs/x…
Browse files Browse the repository at this point in the history
…fs-linux

Pull xfs updates from Darrick Wong:
 "The biggest changes for this release are the log scalability
  improvements, lockless lookups for the buffer cache, and making the
  attr fork a permanent part of the incore inode in preparation for
  directory parent pointers.

  There's also a bunch of bug fixes that have accumulated since -rc5. I
  might send you a second pull request with some more bug fixes that I'm
  still working on.

  Once the merge window ends, I will hand maintainership back to Dave
  Chinner until the 6.1-rc1 release so that I can conduct the design
  review for the online fsck feature, and try to get it merged.

  Summary:

   - Improve scalability of the XFS log by removing spinlocks and global
     synchronization points.

   - Add security labels to whiteout inodes to match the other
     filesystems.

   - Clean up per-ag pointer passing to simplify call sites.

   - Reduce verifier overhead by precalculating more AG geometry.

   - Implement fast-path lockless lookups in the buffer cache to reduce
     spinlock hammering.

   - Make attr forks a permanent part of the inode structure to fix a
     UAF bug and because most files these days tend to have security
     labels and soon will have parent pointers too.

   - Clean up XFS_IFORK_Q usage and give it a better name.

   - Fix more UAF bugs in the xattr code.

   - SOB my tags.

   - Fix some typos in the timestamp range documentation.

   - Fix a few more memory leaks.

   - Code cleanups and typo fixes.

   - Fix an unlocked inode fork pointer access in getbmap"

* tag 'xfs-5.20-merge-6' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (61 commits)
  xfs: delete extra space and tab in blank line
  xfs: fix NULL pointer dereference in xfs_getbmap()
  xfs: Fix typo 'the the' in comment
  xfs: Fix comment typo
  xfs: don't leak memory when attr fork loading fails
  xfs: fix for variable set but not used warning
  xfs: xfs_buf cache destroy isn't RCU safe
  xfs: delete unnecessary NULL checks
  xfs: fix comment for start time value of inode with bigtime enabled
  xfs: fix use-after-free in xattr node block inactivation
  xfs: lockless buffer lookup
  xfs: remove a superflous hash lookup when inserting new buffers
  xfs: reduce the number of atomic when locking a buffer after lookup
  xfs: merge xfs_buf_find() and xfs_buf_get_map()
  xfs: break up xfs_buf_find() into individual pieces
  xfs: add in-memory iunlink log item
  xfs: add log item precommit operation
  xfs: combine iunlink inode update functions
  xfs: clean up xfs_iunlink_update_inode()
  xfs: double link the unlinked inode list
  ...
  • Loading branch information
Linus Torvalds committed Aug 5, 2022
2 parents 9daee91 + 5e9466a commit b2a88c2
Show file tree
Hide file tree
Showing 86 changed files with 2,306 additions and 1,696 deletions.
361 changes: 322 additions & 39 deletions Documentation/filesystems/xfs-delayed-logging-design.rst

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions fs/xfs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ xfs-y += xfs_log.o \
xfs_icreate_item.o \
xfs_inode_item.o \
xfs_inode_item_recover.o \
xfs_iunlink_item.o \
xfs_refcount_item.o \
xfs_rmap_item.o \
xfs_log_recover.o \
Expand Down
171 changes: 112 additions & 59 deletions fs/xfs/libxfs/xfs_ag.c
Original file line number Diff line number Diff line change
Expand Up @@ -120,18 +120,18 @@ xfs_initialize_perag_data(

for (index = 0; index < agcount; index++) {
/*
* read the agf, then the agi. This gets us
* all the information we need and populates the
* per-ag structures for us.
* Read the AGF and AGI buffers to populate the per-ag
* structures for us.
*/
error = xfs_alloc_pagf_init(mp, NULL, index, 0);
if (error)
pag = xfs_perag_get(mp, index);
error = xfs_alloc_read_agf(pag, NULL, 0, NULL);
if (!error)
error = xfs_ialloc_read_agi(pag, NULL, NULL);
if (error) {
xfs_perag_put(pag);
return error;
}

error = xfs_ialloc_pagi_init(mp, NULL, index);
if (error)
return error;
pag = xfs_perag_get(mp, index);
ifree += pag->pagi_freecount;
ialloc += pag->pagi_count;
bfree += pag->pagf_freeblks;
Expand Down Expand Up @@ -194,17 +194,76 @@ xfs_free_perag(
XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0);

cancel_delayed_work_sync(&pag->pag_blockgc_work);
xfs_iunlink_destroy(pag);
xfs_buf_hash_destroy(pag);

call_rcu(&pag->rcu_head, __xfs_free_perag);
}
}

/* Find the size of the AG, in blocks. */
static xfs_agblock_t
__xfs_ag_block_count(
struct xfs_mount *mp,
xfs_agnumber_t agno,
xfs_agnumber_t agcount,
xfs_rfsblock_t dblocks)
{
ASSERT(agno < agcount);

if (agno < agcount - 1)
return mp->m_sb.sb_agblocks;
return dblocks - (agno * mp->m_sb.sb_agblocks);
}

xfs_agblock_t
xfs_ag_block_count(
struct xfs_mount *mp,
xfs_agnumber_t agno)
{
return __xfs_ag_block_count(mp, agno, mp->m_sb.sb_agcount,
mp->m_sb.sb_dblocks);
}

/* Calculate the first and last possible inode number in an AG. */
static void
__xfs_agino_range(
struct xfs_mount *mp,
xfs_agblock_t eoag,
xfs_agino_t *first,
xfs_agino_t *last)
{
xfs_agblock_t bno;

/*
* Calculate the first inode, which will be in the first
* cluster-aligned block after the AGFL.
*/
bno = round_up(XFS_AGFL_BLOCK(mp) + 1, M_IGEO(mp)->cluster_align);
*first = XFS_AGB_TO_AGINO(mp, bno);

/*
* Calculate the last inode, which will be at the end of the
* last (aligned) cluster that can be allocated in the AG.
*/
bno = round_down(eoag, M_IGEO(mp)->cluster_align);
*last = XFS_AGB_TO_AGINO(mp, bno) - 1;
}

void
xfs_agino_range(
struct xfs_mount *mp,
xfs_agnumber_t agno,
xfs_agino_t *first,
xfs_agino_t *last)
{
return __xfs_agino_range(mp, xfs_ag_block_count(mp, agno), first, last);
}

int
xfs_initialize_perag(
struct xfs_mount *mp,
xfs_agnumber_t agcount,
xfs_rfsblock_t dblocks,
xfs_agnumber_t *maxagi)
{
struct xfs_perag *pag;
Expand Down Expand Up @@ -263,13 +322,18 @@ xfs_initialize_perag(
if (error)
goto out_remove_pag;

error = xfs_iunlink_init(pag);
if (error)
goto out_hash_destroy;

/* first new pag is fully initialized */
if (first_initialised == NULLAGNUMBER)
first_initialised = index;

/*
* Pre-calculated geometry
*/
pag->block_count = __xfs_ag_block_count(mp, index, agcount,
dblocks);
pag->min_block = XFS_AGFL_BLOCK(mp);
__xfs_agino_range(mp, pag->block_count, &pag->agino_min,
&pag->agino_max);
}

index = xfs_set_inode_alloc(mp, agcount);
Expand All @@ -280,8 +344,6 @@ xfs_initialize_perag(
mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp);
return 0;

out_hash_destroy:
xfs_buf_hash_destroy(pag);
out_remove_pag:
radix_tree_delete(&mp->m_perag_tree, index);
out_free_pag:
Expand All @@ -293,7 +355,6 @@ xfs_initialize_perag(
if (!pag)
break;
xfs_buf_hash_destroy(pag);
xfs_iunlink_destroy(pag);
kmem_free(pag);
}
return error;
Expand Down Expand Up @@ -321,12 +382,6 @@ xfs_get_aghdr_buf(
return 0;
}

static inline bool is_log_ag(struct xfs_mount *mp, struct aghdr_init_data *id)
{
return mp->m_sb.sb_logstart > 0 &&
id->agno == XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart);
}

/*
* Generic btree root block init function
*/
Expand All @@ -352,7 +407,7 @@ xfs_freesp_init_recs(
arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);

if (is_log_ag(mp, id)) {
if (xfs_ag_contains_log(mp, id->agno)) {
struct xfs_alloc_rec *nrec;
xfs_agblock_t start = XFS_FSB_TO_AGBNO(mp,
mp->m_sb.sb_logstart);
Expand Down Expand Up @@ -479,7 +534,7 @@ xfs_rmaproot_init(
}

/* account for the log space */
if (is_log_ag(mp, id)) {
if (xfs_ag_contains_log(mp, id->agno)) {
rrec = XFS_RMAP_REC_ADDR(block,
be16_to_cpu(block->bb_numrecs) + 1);
rrec->rm_startblock = cpu_to_be32(
Expand Down Expand Up @@ -550,7 +605,7 @@ xfs_agfblock_init(
agf->agf_refcount_blocks = cpu_to_be32(1);
}

if (is_log_ag(mp, id)) {
if (xfs_ag_contains_log(mp, id->agno)) {
int64_t logblocks = mp->m_sb.sb_logblocks;

be32_add_cpu(&agf->agf_freeblks, -logblocks);
Expand Down Expand Up @@ -761,11 +816,11 @@ xfs_ag_init_headers(

int
xfs_ag_shrink_space(
struct xfs_mount *mp,
struct xfs_perag *pag,
struct xfs_trans **tpp,
xfs_agnumber_t agno,
xfs_extlen_t delta)
{
struct xfs_mount *mp = pag->pag_mount;
struct xfs_alloc_arg args = {
.tp = *tpp,
.mp = mp,
Expand All @@ -782,14 +837,14 @@ xfs_ag_shrink_space(
xfs_agblock_t aglen;
int error, err2;

ASSERT(agno == mp->m_sb.sb_agcount - 1);
error = xfs_ialloc_read_agi(mp, *tpp, agno, &agibp);
ASSERT(pag->pag_agno == mp->m_sb.sb_agcount - 1);
error = xfs_ialloc_read_agi(pag, *tpp, &agibp);
if (error)
return error;

agi = agibp->b_addr;

error = xfs_alloc_read_agf(mp, *tpp, agno, 0, &agfbp);
error = xfs_alloc_read_agf(pag, *tpp, 0, &agfbp);
if (error)
return error;

Expand All @@ -801,21 +856,22 @@ xfs_ag_shrink_space(
if (delta >= aglen)
return -EINVAL;

args.fsbno = XFS_AGB_TO_FSB(mp, agno, aglen - delta);
args.fsbno = XFS_AGB_TO_FSB(mp, pag->pag_agno, aglen - delta);

/*
* Make sure that the last inode cluster cannot overlap with the new
* end of the AG, even if it's sparse.
*/
error = xfs_ialloc_check_shrink(*tpp, agno, agibp, aglen - delta);
error = xfs_ialloc_check_shrink(*tpp, pag->pag_agno, agibp,
aglen - delta);
if (error)
return error;

/*
* Disable perag reservations so it doesn't cause the allocation request
* to fail. We'll reestablish reservation before we return.
*/
error = xfs_ag_resv_free(agibp->b_pag);
error = xfs_ag_resv_free(pag);
if (error)
return error;

Expand Down Expand Up @@ -844,7 +900,7 @@ xfs_ag_shrink_space(
be32_add_cpu(&agi->agi_length, -delta);
be32_add_cpu(&agf->agf_length, -delta);

err2 = xfs_ag_resv_init(agibp->b_pag, *tpp);
err2 = xfs_ag_resv_init(pag, *tpp);
if (err2) {
be32_add_cpu(&agi->agi_length, delta);
be32_add_cpu(&agf->agf_length, delta);
Expand All @@ -868,8 +924,9 @@ xfs_ag_shrink_space(
xfs_ialloc_log_agi(*tpp, agibp, XFS_AGI_LENGTH);
xfs_alloc_log_agf(*tpp, agfbp, XFS_AGF_LENGTH);
return 0;

resv_init_out:
err2 = xfs_ag_resv_init(agibp->b_pag, *tpp);
err2 = xfs_ag_resv_init(pag, *tpp);
if (!err2)
return error;
resv_err:
Expand All @@ -883,33 +940,29 @@ xfs_ag_shrink_space(
*/
int
xfs_ag_extend_space(
struct xfs_mount *mp,
struct xfs_perag *pag,
struct xfs_trans *tp,
struct aghdr_init_data *id,
xfs_extlen_t len)
{
struct xfs_buf *bp;
struct xfs_agi *agi;
struct xfs_agf *agf;
int error;

/*
* Change the agi length.
*/
error = xfs_ialloc_read_agi(mp, tp, id->agno, &bp);
ASSERT(pag->pag_agno == pag->pag_mount->m_sb.sb_agcount - 1);

error = xfs_ialloc_read_agi(pag, tp, &bp);
if (error)
return error;

agi = bp->b_addr;
be32_add_cpu(&agi->agi_length, len);
ASSERT(id->agno == mp->m_sb.sb_agcount - 1 ||
be32_to_cpu(agi->agi_length) == mp->m_sb.sb_agblocks);
xfs_ialloc_log_agi(tp, bp, XFS_AGI_LENGTH);

/*
* Change agf length.
*/
error = xfs_alloc_read_agf(mp, tp, id->agno, 0, &bp);
error = xfs_alloc_read_agf(pag, tp, 0, &bp);
if (error)
return error;

Expand All @@ -924,49 +977,49 @@ xfs_ag_extend_space(
* XFS_RMAP_OINFO_SKIP_UPDATE is used here to tell the rmap btree that
* this doesn't actually exist in the rmap btree.
*/
error = xfs_rmap_free(tp, bp, bp->b_pag,
be32_to_cpu(agf->agf_length) - len,
error = xfs_rmap_free(tp, bp, pag, be32_to_cpu(agf->agf_length) - len,
len, &XFS_RMAP_OINFO_SKIP_UPDATE);
if (error)
return error;

return xfs_free_extent(tp, XFS_AGB_TO_FSB(mp, id->agno,
error = xfs_free_extent(tp, XFS_AGB_TO_FSB(pag->pag_mount, pag->pag_agno,
be32_to_cpu(agf->agf_length) - len),
len, &XFS_RMAP_OINFO_SKIP_UPDATE,
XFS_AG_RESV_NONE);
if (error)
return error;

/* Update perag geometry */
pag->block_count = be32_to_cpu(agf->agf_length);
__xfs_agino_range(pag->pag_mount, pag->block_count, &pag->agino_min,
&pag->agino_max);
return 0;
}

/* Retrieve AG geometry. */
int
xfs_ag_get_geometry(
struct xfs_mount *mp,
xfs_agnumber_t agno,
struct xfs_perag *pag,
struct xfs_ag_geometry *ageo)
{
struct xfs_buf *agi_bp;
struct xfs_buf *agf_bp;
struct xfs_agi *agi;
struct xfs_agf *agf;
struct xfs_perag *pag;
unsigned int freeblks;
int error;

if (agno >= mp->m_sb.sb_agcount)
return -EINVAL;

/* Lock the AG headers. */
error = xfs_ialloc_read_agi(mp, NULL, agno, &agi_bp);
error = xfs_ialloc_read_agi(pag, NULL, &agi_bp);
if (error)
return error;
error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agf_bp);
error = xfs_alloc_read_agf(pag, NULL, 0, &agf_bp);
if (error)
goto out_agi;

pag = agi_bp->b_pag;

/* Fill out form. */
memset(ageo, 0, sizeof(*ageo));
ageo->ag_number = agno;
ageo->ag_number = pag->pag_agno;

agi = agi_bp->b_addr;
ageo->ag_icount = be32_to_cpu(agi->agi_count);
Expand Down
Loading

0 comments on commit b2a88c2

Please sign in to comment.