Skip to content

Commit

Permalink
Merge tag 'for-linus-v3.8-rc1' of git://oss.sgi.com/xfs/xfs
Browse files Browse the repository at this point in the history
Pull xfs update from Ben Myers:
 "There is plenty going on, including the cleanup of xfssyncd, metadata
  verifiers, CRC infrastructure for the log, tracking of inodes with
  speculative allocation, a cleanup of xfs_fs_subr.c, fixes for
  XFS_IOC_ZERO_RANGE, and important fix related to log replay (only
  update the last_sync_lsn when a transaction completes), a fix for
  deadlock on AGF buffers, documentation and comment updates, and a few
  more cleanups and fixes.

  Details:
   - remove the xfssyncd mess
   - only update the last_sync_lsn when a transaction completes
   - zero allocation_args on the kernel stack
   - fix AGF/alloc workqueue deadlock
   - silence uninitialised f.file warning
   - Update inode alloc comments
   - Update mount options documentation
   - report projid32bit feature in geometry call
   - speculative preallocation inode tracking
   - fix attr tree double split corruption
   - fix broken error handling in xfs_vm_writepage
   - drop buffer io reference when a bad bio is built
   - add more attribute tree trace points
   - growfs infrastructure changes for 3.8
   - fs/xfs/xfs_fs_subr.c die die die
   - add CRC infrastructure
   - add CRC checks to the log
   - Remove description of nodelaylog mount option from xfs.txt
   - inode allocation should use unmapped buffers
   - byte range granularity for XFS_IOC_ZERO_RANGE
   - fix direct IO nested transaction deadlock
   - fix stray dquot unlock when reclaiming dquots
   - fix sparse reported log CRC endian issue"

Fix up trivial conflict in fs/xfs/xfs_fsops.c due to the same patch
having been applied twice (commits eaef854 and 1375cb6: "xfs:
growfs: don't read garbage for new secondary superblocks") with later
updates to the affected code in the XFS tree.

* tag 'for-linus-v3.8-rc1' of git://oss.sgi.com/xfs/xfs: (78 commits)
  xfs: fix sparse reported log CRC endian issue
  xfs: fix stray dquot unlock when reclaiming dquots
  xfs: fix direct IO nested transaction deadlock.
  xfs: byte range granularity for XFS_IOC_ZERO_RANGE
  xfs: inode allocation should use unmapped buffers.
  xfs: Remove the description of nodelaylog mount option from xfs.txt
  xfs: add CRC checks to the log
  xfs: add CRC infrastructure
  xfs: convert buffer verifiers to an ops structure.
  xfs: connect up write verifiers to new buffers
  xfs: add pre-write metadata buffer verifier callbacks
  xfs: add buffer pre-write callback
  xfs: Add verifiers to dir2 data readahead.
  xfs: add xfs_da_node verification
  xfs: factor and verify attr leaf reads
  xfs: factor dir2 leaf read
  xfs: factor out dir2 data block reading
  xfs: factor dir2 free block reading
  xfs: verify dir2 block format buffers
  xfs: factor dir2 block read operations
  ...
  • Loading branch information
Linus Torvalds committed Dec 12, 2012
2 parents 22a40fd + f9668a0 commit 3f1c64f
Show file tree
Hide file tree
Showing 70 changed files with 3,747 additions and 2,311 deletions.
13 changes: 10 additions & 3 deletions Documentation/filesystems/xfs.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ When mounting an XFS filesystem, the following options are accepted.
Issue command to let the block device reclaim space freed by the
filesystem. This is useful for SSD devices, thinly provisioned
LUNs and virtual machine images, but may have a performance
impact. This option is incompatible with the nodelaylog option.
impact.

dmapi
Enable the DMAPI (Data Management API) event callouts.
Expand Down Expand Up @@ -72,8 +72,15 @@ When mounting an XFS filesystem, the following options are accepted.
Indicates that XFS is allowed to create inodes at any location
in the filesystem, including those which will result in inode
numbers occupying more than 32 bits of significance. This is
provided for backwards compatibility, but causes problems for
backup applications that cannot handle large inode numbers.
the default allocation option. Applications which do not handle
inode numbers bigger than 32 bits, should use inode32 option.

inode32
Indicates that XFS is limited to create inodes at locations which
will not result in inode numbers with more than 32 bits of
significance. This is provided for backwards compatibility, since
64 bits inode numbers might cause problems for some applications
that cannot handle large inode numbers.

largeio/nolargeio
If "nolargeio" is specified, the optimal I/O reported in
Expand Down
1 change: 1 addition & 0 deletions fs/xfs/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ config XFS_FS
tristate "XFS filesystem support"
depends on BLOCK
select EXPORTFS
select LIBCRC32C
help
XFS is a high performance journaling filesystem which originated
on the SGI IRIX platform. It is completely multi-threaded, can
Expand Down
4 changes: 1 addition & 3 deletions fs/xfs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,15 @@ xfs-y += xfs_aops.o \
xfs_file.o \
xfs_filestream.o \
xfs_fsops.o \
xfs_fs_subr.o \
xfs_globals.o \
xfs_iget.o \
xfs_icache.o \
xfs_ioctl.o \
xfs_iomap.o \
xfs_iops.o \
xfs_itable.o \
xfs_message.o \
xfs_mru_cache.o \
xfs_super.o \
xfs_sync.o \
xfs_xattr.o \
xfs_rename.o \
xfs_utils.o \
Expand Down
6 changes: 6 additions & 0 deletions fs/xfs/uuid.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,10 @@ extern int uuid_is_nil(uuid_t *uuid);
extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]);

static inline void
uuid_copy(uuid_t *dst, uuid_t *src)
{
memcpy(dst, src, sizeof(uuid_t));
}

#endif /* __XFS_SUPPORT_UUID_H__ */
5 changes: 5 additions & 0 deletions fs/xfs/xfs_ag.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ typedef struct xfs_agf {
extern int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_agnumber_t agno, int flags, struct xfs_buf **bpp);

extern const struct xfs_buf_ops xfs_agf_buf_ops;

/*
* Size of the unlinked inode hash table in the agi.
*/
Expand Down Expand Up @@ -161,6 +163,8 @@ typedef struct xfs_agi {
extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_agnumber_t agno, struct xfs_buf **bpp);

extern const struct xfs_buf_ops xfs_agi_buf_ops;

/*
* The third a.g. block contains the a.g. freelist, an array
* of block pointers to blocks owned by the allocation btree code.
Expand Down Expand Up @@ -233,6 +237,7 @@ typedef struct xfs_perag {
#define XFS_ICI_NO_TAG (-1) /* special flag for an untagged lookup
in xfs_inode_ag_iterator */
#define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */
#define XFS_ICI_EOFBLOCKS_TAG 1 /* inode has blocks beyond EOF */

#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels)
#define XFS_MIN_FREELIST_RAW(bl,cl,mp) \
Expand Down
140 changes: 113 additions & 27 deletions fs/xfs/xfs_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,60 @@ xfs_alloc_fixup_trees(
return 0;
}

static void
xfs_agfl_verify(
struct xfs_buf *bp)
{
#ifdef WHEN_CRCS_COME_ALONG
/*
* we cannot actually do any verification of the AGFL because mkfs does
* not initialise the AGFL to zero or NULL. Hence the only valid part of
* the AGFL is what the AGF says is active. We can't get to the AGF, so
* we can't verify just those entries are valid.
*
* This problem goes away when the CRC format change comes along as that
* requires the AGFL to be initialised by mkfs. At that point, we can
* verify the blocks in the agfl -active or not- lie within the bounds
* of the AG. Until then, just leave this check ifdef'd out.
*/
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
int agfl_ok = 1;

int i;

for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
if (be32_to_cpu(agfl->agfl_bno[i]) == NULLAGBLOCK ||
be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
agfl_ok = 0;
}

if (!agfl_ok) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agfl);
xfs_buf_ioerror(bp, EFSCORRUPTED);
}
#endif
}

static void
xfs_agfl_write_verify(
struct xfs_buf *bp)
{
xfs_agfl_verify(bp);
}

static void
xfs_agfl_read_verify(
struct xfs_buf *bp)
{
xfs_agfl_verify(bp);
}

const struct xfs_buf_ops xfs_agfl_buf_ops = {
.verify_read = xfs_agfl_read_verify,
.verify_write = xfs_agfl_write_verify,
};

/*
* Read in the allocation group free block array.
*/
Expand All @@ -447,7 +501,7 @@ xfs_alloc_read_agfl(
error = xfs_trans_read_buf(
mp, tp, mp->m_ddev_targp,
XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
XFS_FSS_TO_BB(mp, 1), 0, &bp);
XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
if (error)
return error;
ASSERT(!xfs_buf_geterror(bp));
Expand Down Expand Up @@ -2091,6 +2145,63 @@ xfs_alloc_put_freelist(
return 0;
}

static void
xfs_agf_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_agf *agf;
int agf_ok;

agf = XFS_BUF_TO_AGF(bp);

agf_ok = agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp);

/*
* during growfs operations, the perag is not fully initialised,
* so we can't use it for any useful checking. growfs ensures we can't
* use it by using uncached buffers that don't have the perag attached
* so we can detect and avoid this problem.
*/
if (bp->b_pag)
agf_ok = agf_ok && be32_to_cpu(agf->agf_seqno) ==
bp->b_pag->pag_agno;

if (xfs_sb_version_haslazysbcount(&mp->m_sb))
agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <=
be32_to_cpu(agf->agf_length);

if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
XFS_RANDOM_ALLOC_READ_AGF))) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agf);
xfs_buf_ioerror(bp, EFSCORRUPTED);
}
}

static void
xfs_agf_read_verify(
struct xfs_buf *bp)
{
xfs_agf_verify(bp);
}

static void
xfs_agf_write_verify(
struct xfs_buf *bp)
{
xfs_agf_verify(bp);
}

const struct xfs_buf_ops xfs_agf_buf_ops = {
.verify_read = xfs_agf_read_verify,
.verify_write = xfs_agf_write_verify,
};

/*
* Read in the allocation group header (free/alloc section).
*/
Expand All @@ -2102,44 +2213,19 @@ xfs_read_agf(
int flags, /* XFS_BUF_ */
struct xfs_buf **bpp) /* buffer for the ag freelist header */
{
struct xfs_agf *agf; /* ag freelist header */
int agf_ok; /* set if agf is consistent */
int error;

ASSERT(agno != NULLAGNUMBER);
error = xfs_trans_read_buf(
mp, tp, mp->m_ddev_targp,
XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
XFS_FSS_TO_BB(mp, 1), flags, bpp);
XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
if (error)
return error;
if (!*bpp)
return 0;

ASSERT(!(*bpp)->b_error);
agf = XFS_BUF_TO_AGF(*bpp);

/*
* Validate the magic number of the agf block.
*/
agf_ok =
agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp) &&
be32_to_cpu(agf->agf_seqno) == agno;
if (xfs_sb_version_haslazysbcount(&mp->m_sb))
agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <=
be32_to_cpu(agf->agf_length);
if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
XFS_RANDOM_ALLOC_READ_AGF))) {
XFS_CORRUPTION_ERROR("xfs_alloc_read_agf",
XFS_ERRLEVEL_LOW, mp, agf);
xfs_trans_brelse(tp, *bpp);
return XFS_ERROR(EFSCORRUPTED);
}
xfs_buf_set_ref(*bpp, XFS_AGF_REF);
return 0;
}
Expand Down
3 changes: 3 additions & 0 deletions fs/xfs/xfs_alloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -231,4 +231,7 @@ xfs_alloc_get_rec(
xfs_extlen_t *len, /* output: length of extent */
int *stat); /* output: success/failure */

extern const struct xfs_buf_ops xfs_agf_buf_ops;
extern const struct xfs_buf_ops xfs_agfl_buf_ops;

#endif /* __XFS_ALLOC_H__ */
77 changes: 77 additions & 0 deletions fs/xfs/xfs_alloc_btree.c
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,82 @@ xfs_allocbt_key_diff(
return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
}

static void
xfs_allocbt_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_perag *pag = bp->b_pag;
unsigned int level;
int sblock_ok; /* block passes checks */

/*
* magic number and level verification
*
* During growfs operations, we can't verify the exact level as the
* perag is not fully initialised and hence not attached to the buffer.
* In this case, check against the maximum tree depth.
*/
level = be16_to_cpu(block->bb_level);
switch (block->bb_magic) {
case cpu_to_be32(XFS_ABTB_MAGIC):
if (pag)
sblock_ok = level < pag->pagf_levels[XFS_BTNUM_BNOi];
else
sblock_ok = level < mp->m_ag_maxlevels;
break;
case cpu_to_be32(XFS_ABTC_MAGIC):
if (pag)
sblock_ok = level < pag->pagf_levels[XFS_BTNUM_CNTi];
else
sblock_ok = level < mp->m_ag_maxlevels;
break;
default:
sblock_ok = 0;
break;
}

/* numrecs verification */
sblock_ok = sblock_ok &&
be16_to_cpu(block->bb_numrecs) <= mp->m_alloc_mxr[level != 0];

/* sibling pointer verification */
sblock_ok = sblock_ok &&
(block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
be32_to_cpu(block->bb_u.s.bb_leftsib) < mp->m_sb.sb_agblocks) &&
block->bb_u.s.bb_leftsib &&
(block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
be32_to_cpu(block->bb_u.s.bb_rightsib) < mp->m_sb.sb_agblocks) &&
block->bb_u.s.bb_rightsib;

if (!sblock_ok) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block);
xfs_buf_ioerror(bp, EFSCORRUPTED);
}
}

static void
xfs_allocbt_read_verify(
struct xfs_buf *bp)
{
xfs_allocbt_verify(bp);
}

static void
xfs_allocbt_write_verify(
struct xfs_buf *bp)
{
xfs_allocbt_verify(bp);
}

const struct xfs_buf_ops xfs_allocbt_buf_ops = {
.verify_read = xfs_allocbt_read_verify,
.verify_write = xfs_allocbt_write_verify,
};


#ifdef DEBUG
STATIC int
xfs_allocbt_keys_inorder(
Expand Down Expand Up @@ -327,6 +403,7 @@ static const struct xfs_btree_ops xfs_allocbt_ops = {
.init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
.init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
.key_diff = xfs_allocbt_key_diff,
.buf_ops = &xfs_allocbt_buf_ops,
#ifdef DEBUG
.keys_inorder = xfs_allocbt_keys_inorder,
.recs_inorder = xfs_allocbt_recs_inorder,
Expand Down
2 changes: 2 additions & 0 deletions fs/xfs/xfs_alloc_btree.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,4 +93,6 @@ extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *,
xfs_agnumber_t, xfs_btnum_t);
extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int);

extern const struct xfs_buf_ops xfs_allocbt_buf_ops;

#endif /* __XFS_ALLOC_BTREE_H__ */
Loading

0 comments on commit 3f1c64f

Please sign in to comment.