Skip to content

Commit

Permalink
Merge tag 'xfs-5.5-fixes-2' of git://git.kernel.org/pub/scm/fs/xfs/xf…
Browse files Browse the repository at this point in the history
…s-linux

Pull xfs fixes from Darrick Wong:
 "Fix a few bugs that could lead to corrupt files, fsck complaints, and
  filesystem crashes:

   - Minor documentation fixes

   - Fix a file corruption due to read racing with an insert range
     operation.

   - Fix log reservation overflows when allocating large rt extents

   - Fix a buffer log item flags check

   - Don't allow administrators to mount with sunit= options that will
     cause later xfs_repair complaints about the root directory being
     suspicious because the fs geometry appeared inconsistent

   - Fix a non-static helper that should have been static"

* tag 'xfs-5.5-fixes-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: Make the symbol 'xfs_rtalloc_log_count' static
  xfs: don't commit sunit/swidth updates to disk if that would cause repair failures
  xfs: split the sunit parameter update into two parts
  xfs: refactor agfl length computation function
  libxfs: resync with the userspace libxfs
  xfs: use bitops interface for buf log item AIL flag check
  xfs: fix log reservation overflows when allocating large rt extents
  xfs: stabilize insert range start boundary to avoid COW writeback race
  xfs: fix Sphinx documentation warning
  • Loading branch information
Linus Torvalds committed Dec 22, 2019
2 parents a396560 + 5084bf6 commit c601747
Show file tree
Hide file tree
Showing 13 changed files with 341 additions and 104 deletions.
2 changes: 1 addition & 1 deletion Documentation/admin-guide/xfs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ The following sysctls are available for the XFS filesystem:
pool.

fs.xfs.speculative_prealloc_lifetime
(Units: seconds Min: 1 Default: 300 Max: 86400)
(Units: seconds Min: 1 Default: 300 Max: 86400)
The interval at which the background scanning for inodes
with unused speculative preallocation runs. The scan
removes unused preallocation from clean inodes and releases
Expand Down
18 changes: 13 additions & 5 deletions fs/xfs/libxfs/xfs_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2248,24 +2248,32 @@ xfs_alloc_longest_free_extent(
return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
}

/*
* Compute the minimum length of the AGFL in the given AG. If @pag is NULL,
* return the largest possible minimum length.
*/
unsigned int
xfs_alloc_min_freelist(
struct xfs_mount *mp,
struct xfs_perag *pag)
{
/* AG btrees have at least 1 level. */
static const uint8_t fake_levels[XFS_BTNUM_AGF] = {1, 1, 1};
const uint8_t *levels = pag ? pag->pagf_levels : fake_levels;
unsigned int min_free;

ASSERT(mp->m_ag_maxlevels > 0);

/* space needed by-bno freespace btree */
min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1,
min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
mp->m_ag_maxlevels);
/* space needed by-size freespace btree */
min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
mp->m_ag_maxlevels);
/* space needed reverse mapping used space btree */
if (xfs_sb_version_hasrmapbt(&mp->m_sb))
min_free += min_t(unsigned int,
pag->pagf_levels[XFS_BTNUM_RMAPi] + 1,
mp->m_rmap_maxlevels);
min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
mp->m_rmap_maxlevels);

return min_free;
}
Expand Down
5 changes: 2 additions & 3 deletions fs/xfs/libxfs/xfs_bmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -4561,7 +4561,7 @@ xfs_bmapi_convert_delalloc(
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
struct xfs_bmalloca bma = { NULL };
u16 flags = 0;
uint16_t flags = 0;
struct xfs_trans *tp;
int error;

Expand Down Expand Up @@ -5972,8 +5972,7 @@ xfs_bmap_insert_extents(
goto del_cursor;
}

if (XFS_IS_CORRUPT(mp,
stop_fsb >= got.br_startoff + got.br_blockcount)) {
if (XFS_IS_CORRUPT(mp, stop_fsb > got.br_startoff)) {
error = -EFSCORRUPTED;
goto del_cursor;
}
Expand Down
21 changes: 21 additions & 0 deletions fs/xfs/libxfs/xfs_dir2.c
Original file line number Diff line number Diff line change
Expand Up @@ -724,3 +724,24 @@ xfs_dir2_namecheck(
/* There shouldn't be any slashes or nulls here */
return !memchr(name, '/', length) && !memchr(name, 0, length);
}

xfs_dahash_t
xfs_dir2_hashname(
struct xfs_mount *mp,
struct xfs_name *name)
{
if (unlikely(xfs_sb_version_hasasciici(&mp->m_sb)))
return xfs_ascii_ci_hashname(name);
return xfs_da_hashname(name->name, name->len);
}

enum xfs_dacmp
xfs_dir2_compname(
struct xfs_da_args *args,
const unsigned char *name,
int len)
{
if (unlikely(xfs_sb_version_hasasciici(&args->dp->i_mount->m_sb)))
return xfs_ascii_ci_compname(args, name, len);
return xfs_da_compname(args, name, len);
}
29 changes: 9 additions & 20 deletions fs/xfs/libxfs/xfs_dir2_priv.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,12 @@ extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
extern xfs_failaddr_t xfs_dir2_sf_verify(struct xfs_inode *ip);
int xfs_dir2_sf_entsize(struct xfs_mount *mp,
struct xfs_dir2_sf_hdr *hdr, int len);
void xfs_dir2_sf_put_ino(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *hdr,
struct xfs_dir2_sf_entry *sfep, xfs_ino_t ino);
void xfs_dir2_sf_put_ftype(struct xfs_mount *mp,
struct xfs_dir2_sf_entry *sfep, uint8_t ftype);

/* xfs_dir2_readdir.c */
extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp,
Expand All @@ -194,25 +200,8 @@ xfs_dir2_data_entsize(
return round_up(len, XFS_DIR2_DATA_ALIGN);
}

static inline xfs_dahash_t
xfs_dir2_hashname(
struct xfs_mount *mp,
struct xfs_name *name)
{
if (unlikely(xfs_sb_version_hasasciici(&mp->m_sb)))
return xfs_ascii_ci_hashname(name);
return xfs_da_hashname(name->name, name->len);
}

static inline enum xfs_dacmp
xfs_dir2_compname(
struct xfs_da_args *args,
const unsigned char *name,
int len)
{
if (unlikely(xfs_sb_version_hasasciici(&args->dp->i_mount->m_sb)))
return xfs_ascii_ci_compname(args, name, len);
return xfs_da_compname(args, name, len);
}
xfs_dahash_t xfs_dir2_hashname(struct xfs_mount *mp, struct xfs_name *name);
enum xfs_dacmp xfs_dir2_compname(struct xfs_da_args *args,
const unsigned char *name, int len);

#endif /* __XFS_DIR2_PRIV_H__ */
6 changes: 3 additions & 3 deletions fs/xfs/libxfs/xfs_dir2_sf.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ static void xfs_dir2_sf_check(xfs_da_args_t *args);
static void xfs_dir2_sf_toino4(xfs_da_args_t *args);
static void xfs_dir2_sf_toino8(xfs_da_args_t *args);

static int
int
xfs_dir2_sf_entsize(
struct xfs_mount *mp,
struct xfs_dir2_sf_hdr *hdr,
Expand Down Expand Up @@ -84,7 +84,7 @@ xfs_dir2_sf_get_ino(
return get_unaligned_be64(from) & XFS_MAXINUMBER;
}

static void
void
xfs_dir2_sf_put_ino(
struct xfs_mount *mp,
struct xfs_dir2_sf_hdr *hdr,
Expand Down Expand Up @@ -145,7 +145,7 @@ xfs_dir2_sf_get_ftype(
return XFS_DIR3_FT_UNKNOWN;
}

static void
void
xfs_dir2_sf_put_ftype(
struct xfs_mount *mp,
struct xfs_dir2_sf_entry *sfep,
Expand Down
64 changes: 64 additions & 0 deletions fs/xfs/libxfs/xfs_ialloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2909,3 +2909,67 @@ xfs_ialloc_setup_geometry(
else
igeo->ialloc_align = 0;
}

/* Compute the location of the root directory inode that is laid out by mkfs. */
xfs_ino_t
xfs_ialloc_calc_rootino(
struct xfs_mount *mp,
int sunit)
{
struct xfs_ino_geometry *igeo = M_IGEO(mp);
xfs_agblock_t first_bno;

/*
* Pre-calculate the geometry of AG 0. We know what it looks like
* because libxfs knows how to create allocation groups now.
*
* first_bno is the first block in which mkfs could possibly have
* allocated the root directory inode, once we factor in the metadata
* that mkfs formats before it. Namely, the four AG headers...
*/
first_bno = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);

/* ...the two free space btree roots... */
first_bno += 2;

/* ...the inode btree root... */
first_bno += 1;

/* ...the initial AGFL... */
first_bno += xfs_alloc_min_freelist(mp, NULL);

/* ...the free inode btree root... */
if (xfs_sb_version_hasfinobt(&mp->m_sb))
first_bno++;

/* ...the reverse mapping btree root... */
if (xfs_sb_version_hasrmapbt(&mp->m_sb))
first_bno++;

/* ...the reference count btree... */
if (xfs_sb_version_hasreflink(&mp->m_sb))
first_bno++;

/*
* ...and the log, if it is allocated in the first allocation group.
*
* This can happen with filesystems that only have a single
* allocation group, or very odd geometries created by old mkfs
* versions on very small filesystems.
*/
if (mp->m_sb.sb_logstart &&
XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == 0)
first_bno += mp->m_sb.sb_logblocks;

/*
* Now round first_bno up to whatever allocation alignment is given
* by the filesystem or was passed in.
*/
if (xfs_sb_version_hasdalign(&mp->m_sb) && igeo->ialloc_align > 0)
first_bno = roundup(first_bno, sunit);
else if (xfs_sb_version_hasalign(&mp->m_sb) &&
mp->m_sb.sb_inoalignmt > 1)
first_bno = roundup(first_bno, mp->m_sb.sb_inoalignmt);

return XFS_AGINO_TO_INO(mp, 0, XFS_AGB_TO_AGINO(mp, first_bno));
}
1 change: 1 addition & 0 deletions fs/xfs/libxfs/xfs_ialloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,5 +152,6 @@ int xfs_inobt_insert_rec(struct xfs_btree_cur *cur, uint16_t holemask,

int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
void xfs_ialloc_setup_geometry(struct xfs_mount *mp);
xfs_ino_t xfs_ialloc_calc_rootino(struct xfs_mount *mp, int sunit);

#endif /* __XFS_IALLOC_H__ */
96 changes: 77 additions & 19 deletions fs/xfs/libxfs/xfs_trans_resv.c
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,24 @@ xfs_calc_inode_chunk_res(
return res;
}

/*
* Per-extent log reservation for the btree changes involved in freeing or
* allocating a realtime extent. We have to be able to log as many rtbitmap
* blocks as needed to mark inuse MAXEXTLEN blocks' worth of realtime extents,
* as well as the realtime summary block.
*/
static unsigned int
xfs_rtalloc_log_count(
struct xfs_mount *mp,
unsigned int num_ops)
{
unsigned int blksz = XFS_FSB_TO_B(mp, 1);
unsigned int rtbmp_bytes;

rtbmp_bytes = (MAXEXTLEN / mp->m_sb.sb_rextsize) / NBBY;
return (howmany(rtbmp_bytes, blksz) + 1) * num_ops;
}

/*
* Various log reservation values.
*
Expand All @@ -218,13 +236,21 @@ xfs_calc_inode_chunk_res(

/*
* In a write transaction we can allocate a maximum of 2
* extents. This gives:
* extents. This gives (t1):
* the inode getting the new extents: inode size
* the inode's bmap btree: max depth * block size
* the agfs of the ags from which the extents are allocated: 2 * sector
* the superblock free block counter: sector size
* the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
* And the bmap_finish transaction can free bmap blocks in a join:
* Or, if we're writing to a realtime file (t2):
* the inode getting the new extents: inode size
* the inode's bmap btree: max depth * block size
* the agfs of the ags from which the extents are allocated: 2 * sector
* the superblock free block counter: sector size
* the realtime bitmap: ((MAXEXTLEN / rtextsize) / NBBY) bytes
* the realtime summary: 1 block
* the allocation btrees: 2 trees * (2 * max depth - 1) * block size
* And the bmap_finish transaction can free bmap blocks in a join (t3):
* the agfs of the ags containing the blocks: 2 * sector size
* the agfls of the ags containing the blocks: 2 * sector size
* the super block free block counter: sector size
Expand All @@ -234,40 +260,72 @@ STATIC uint
xfs_calc_write_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
max((xfs_calc_inode_res(mp, 1) +
unsigned int t1, t2, t3;
unsigned int blksz = XFS_FSB_TO_B(mp, 1);

t1 = xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), blksz) +
xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);

if (xfs_sb_version_hasrealtime(&mp->m_sb)) {
t2 = xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
XFS_FSB_TO_B(mp, 1)) +
blksz) +
xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
XFS_FSB_TO_B(mp, 1))));
xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 1), blksz) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), blksz);
} else {
t2 = 0;
}

t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);

return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
}

/*
* In truncating a file we free up to two extents at once. We can modify:
* In truncating a file we free up to two extents at once. We can modify (t1):
* the inode being truncated: inode size
* the inode's bmap btree: (max depth + 1) * block size
* And the bmap_finish transaction can free the blocks and bmap blocks:
* And the bmap_finish transaction can free the blocks and bmap blocks (t2):
* the agf for each of the ags: 4 * sector size
* the agfl for each of the ags: 4 * sector size
* the super block to reflect the freed blocks: sector size
* worst case split in allocation btrees per extent assuming 4 extents:
* 4 exts * 2 trees * (2 * max depth - 1) * block size
* Or, if it's a realtime file (t3):
* the agf for each of the ags: 2 * sector size
* the agfl for each of the ags: 2 * sector size
* the super block to reflect the freed blocks: sector size
* the realtime bitmap: 2 exts * ((MAXEXTLEN / rtextsize) / NBBY) bytes
* the realtime summary: 2 exts * 1 block
* worst case split in allocation btrees per extent assuming 2 extents:
* 2 exts * 2 trees * (2 * max depth - 1) * block size
*/
STATIC uint
xfs_calc_itruncate_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
max((xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4),
XFS_FSB_TO_B(mp, 1))));
unsigned int t1, t2, t3;
unsigned int blksz = XFS_FSB_TO_B(mp, 1);

t1 = xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz);

t2 = xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), blksz);

if (xfs_sb_version_hasrealtime(&mp->m_sb)) {
t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 2), blksz) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
} else {
t3 = 0;
}

return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
}

/*
Expand Down
12 changes: 12 additions & 0 deletions fs/xfs/xfs_bmap_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -992,6 +992,7 @@ xfs_prepare_shift(
struct xfs_inode *ip,
loff_t offset)
{
struct xfs_mount *mp = ip->i_mount;
int error;

/*
Expand All @@ -1004,6 +1005,17 @@ xfs_prepare_shift(
return error;
}

/*
* Shift operations must stabilize the start block offset boundary along
* with the full range of the operation. If we don't, a COW writeback
* completion could race with an insert, front merge with the start
* extent (after split) during the shift and corrupt the file. Start
* with the block just prior to the start to stabilize the boundary.
*/
offset = round_down(offset, 1 << mp->m_sb.sb_blocklog);
if (offset)
offset -= (1 << mp->m_sb.sb_blocklog);

/*
* Writeback and invalidate cache for the remainder of the file as we're
* about to shift down every extent from offset to EOF.
Expand Down
Loading

0 comments on commit c601747

Please sign in to comment.