Skip to content

Commit

Permalink
xfs: add support for large btree blocks
Browse files Browse the repository at this point in the history
Add support for larger btree blocks that contains a CRC32C checksum,
a filesystem uuid and block number for detecting filesystem
consistency and out of place writes.

[dchinner@redhat.com] Also include an owner field to allow reverse
mappings to be implemented for improved repairability and a LSN
field to so that log recovery can easily determine the last
modification that made it to disk for each buffer.

[dchinner@redhat.com] Add buffer log format flags to indicate the
type of buffer to recovery so that we don't have to do blind magic
number tests to determine what the buffer is.

[dchinner@redhat.com] Modified to fit into the verifier structure.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Ben Myers <bpm@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
  • Loading branch information
Christoph Hellwig authored and Ben Myers committed Apr 21, 2013
1 parent a205064 commit ee1a47a
Show file tree
Hide file tree
Showing 17 changed files with 645 additions and 209 deletions.
105 changes: 73 additions & 32 deletions fs/xfs/xfs_alloc_btree.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "xfs_extent_busy.h"
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_cksum.h"


STATIC struct xfs_btree_cur *
Expand Down Expand Up @@ -272,74 +273,111 @@ xfs_allocbt_key_diff(
return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
}

static void
static bool
xfs_allocbt_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_perag *pag = bp->b_pag;
unsigned int level;
int sblock_ok; /* block passes checks */

/*
* magic number and level verification
*
* During growfs operations, we can't verify the exact level as the
* perag is not fully initialised and hence not attached to the buffer.
* In this case, check against the maximum tree depth.
* During growfs operations, we can't verify the exact level or owner as
* the perag is not fully initialised and hence not attached to the
* buffer. In this case, check against the maximum tree depth.
*
* Similarly, during log recovery we will have a perag structure
* attached, but the agf information will not yet have been initialised
* from the on disk AGF. Again, we can only check against maximum limits
* in this case.
*/
level = be16_to_cpu(block->bb_level);
switch (block->bb_magic) {
case cpu_to_be32(XFS_ABTB_CRC_MAGIC):
if (!xfs_sb_version_hascrc(&mp->m_sb))
return false;
if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
return false;
if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
return false;
if (pag &&
be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
return false;
/* fall through */
case cpu_to_be32(XFS_ABTB_MAGIC):
if (pag)
sblock_ok = level < pag->pagf_levels[XFS_BTNUM_BNOi];
else
sblock_ok = level < mp->m_ag_maxlevels;
if (pag && pag->pagf_init) {
if (level >= pag->pagf_levels[XFS_BTNUM_BNOi])
return false;
} else if (level >= mp->m_ag_maxlevels)
return false;
break;
case cpu_to_be32(XFS_ABTC_CRC_MAGIC):
if (!xfs_sb_version_hascrc(&mp->m_sb))
return false;
if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
return false;
if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
return false;
if (pag &&
be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
return false;
/* fall through */
case cpu_to_be32(XFS_ABTC_MAGIC):
if (pag)
sblock_ok = level < pag->pagf_levels[XFS_BTNUM_CNTi];
else
sblock_ok = level < mp->m_ag_maxlevels;
if (pag && pag->pagf_init) {
if (level >= pag->pagf_levels[XFS_BTNUM_CNTi])
return false;
} else if (level >= mp->m_ag_maxlevels)
return false;
break;
default:
sblock_ok = 0;
break;
return false;
}

/* numrecs verification */
sblock_ok = sblock_ok &&
be16_to_cpu(block->bb_numrecs) <= mp->m_alloc_mxr[level != 0];
if (be16_to_cpu(block->bb_numrecs) > mp->m_alloc_mxr[level != 0])
return false;

/* sibling pointer verification */
sblock_ok = sblock_ok &&
(block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
be32_to_cpu(block->bb_u.s.bb_leftsib) < mp->m_sb.sb_agblocks) &&
block->bb_u.s.bb_leftsib &&
(block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
be32_to_cpu(block->bb_u.s.bb_rightsib) < mp->m_sb.sb_agblocks) &&
block->bb_u.s.bb_rightsib;

if (!sblock_ok) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block);
xfs_buf_ioerror(bp, EFSCORRUPTED);
}
if (!block->bb_u.s.bb_leftsib ||
(be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
return false;
if (!block->bb_u.s.bb_rightsib ||
(be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
return false;

return true;
}

static void
xfs_allocbt_read_verify(
struct xfs_buf *bp)
{
xfs_allocbt_verify(bp);
if (!(xfs_btree_sblock_verify_crc(bp) &&
xfs_allocbt_verify(bp))) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
bp->b_target->bt_mount, bp->b_addr);
xfs_buf_ioerror(bp, EFSCORRUPTED);
}
}

static void
xfs_allocbt_write_verify(
struct xfs_buf *bp)
{
xfs_allocbt_verify(bp);
if (!xfs_allocbt_verify(bp)) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
bp->b_target->bt_mount, bp->b_addr);
xfs_buf_ioerror(bp, EFSCORRUPTED);
}
xfs_btree_sblock_calc_crc(bp);

}

const struct xfs_buf_ops xfs_allocbt_buf_ops = {
Expand Down Expand Up @@ -444,6 +482,9 @@ xfs_allocbt_init_cursor(
cur->bc_private.a.agbp = agbp;
cur->bc_private.a.agno = agno;

if (xfs_sb_version_hascrc(&mp->m_sb))
cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;

return cur;
}

Expand Down
12 changes: 7 additions & 5 deletions fs/xfs/xfs_alloc_btree.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,10 @@ struct xfs_mount;
* by blockcount and blockno. All blocks look the same to make the code
* simpler; if we have time later, we'll make the optimizations.
*/
#define XFS_ABTB_MAGIC 0x41425442 /* 'ABTB' for bno tree */
#define XFS_ABTC_MAGIC 0x41425443 /* 'ABTC' for cnt tree */
#define XFS_ABTB_MAGIC 0x41425442 /* 'ABTB' for bno tree */
#define XFS_ABTB_CRC_MAGIC 0x41423342 /* 'AB3B' */
#define XFS_ABTC_MAGIC 0x41425443 /* 'ABTC' for cnt tree */
#define XFS_ABTC_CRC_MAGIC 0x41423343 /* 'AB3C' */

/*
* Data record/key structure
Expand All @@ -59,10 +61,10 @@ typedef __be32 xfs_alloc_ptr_t;

/*
* Btree block header size depends on a superblock flag.
*
* (not quite yet, but soon)
*/
#define XFS_ALLOC_BLOCK_LEN(mp) XFS_BTREE_SBLOCK_LEN
#define XFS_ALLOC_BLOCK_LEN(mp) \
(xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
XFS_BTREE_SBLOCK_CRC_LEN : XFS_BTREE_SBLOCK_LEN)

/*
* Record, key, and pointer address macros for btree blocks.
Expand Down
2 changes: 1 addition & 1 deletion fs/xfs/xfs_attr_leaf.c
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
return 0;
return dp->i_d.di_forkoff;
}
dsize = XFS_BMAP_BROOT_SPACE(dp->i_df.if_broot);
dsize = XFS_BMAP_BROOT_SPACE(mp, dp->i_df.if_broot);
break;
}

Expand Down
47 changes: 31 additions & 16 deletions fs/xfs/xfs_bmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -439,11 +439,15 @@ xfs_bmap_sanity_check(
{
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);

if (block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC) ||
be16_to_cpu(block->bb_level) != level ||
if (block->bb_magic != cpu_to_be32(XFS_BMAP_CRC_MAGIC) &&
block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC))
return 0;

if (be16_to_cpu(block->bb_level) != level ||
be16_to_cpu(block->bb_numrecs) == 0 ||
be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
return 0;

return 1;
}

Expand Down Expand Up @@ -1031,6 +1035,7 @@ xfs_bmap_extents_to_btree(
xfs_extnum_t nextents; /* number of file extents */
xfs_bmbt_ptr_t *pp; /* root block address pointer */

mp = ip->i_mount;
ifp = XFS_IFORK_PTR(ip, whichfork);
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);

Expand All @@ -1044,16 +1049,18 @@ xfs_bmap_extents_to_btree(
* Fill in the root.
*/
block = ifp->if_broot;
block->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
block->bb_level = cpu_to_be16(1);
block->bb_numrecs = cpu_to_be16(1);
block->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
block->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
if (xfs_sb_version_hascrc(&mp->m_sb))
xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
XFS_BMAP_CRC_MAGIC, 1, 1, ip->i_ino,
XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
else
xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
XFS_BMAP_MAGIC, 1, 1, ip->i_ino,
XFS_BTREE_LONG_PTRS);

/*
* Need a cursor. Can't allocate until bb_level is filled in.
*/
mp = ip->i_mount;
cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
cur->bc_private.b.firstblock = *firstblock;
cur->bc_private.b.flist = flist;
Expand Down Expand Up @@ -1102,10 +1109,15 @@ xfs_bmap_extents_to_btree(
*/
abp->b_ops = &xfs_bmbt_buf_ops;
ablock = XFS_BUF_TO_BLOCK(abp);
ablock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
ablock->bb_level = 0;
ablock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
ablock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
if (xfs_sb_version_hascrc(&mp->m_sb))
xfs_btree_init_block_int(mp, ablock, abp->b_bn,
XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino,
XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
else
xfs_btree_init_block_int(mp, ablock, abp->b_bn,
XFS_BMAP_MAGIC, 0, 0, ip->i_ino,
XFS_BTREE_LONG_PTRS);

arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
for (cnt = i = 0; i < nextents; i++) {
Expand Down Expand Up @@ -1155,7 +1167,8 @@ xfs_bmap_local_to_extents(
xfs_extlen_t total, /* total blocks needed by transaction */
int *logflagsp, /* inode logging flags */
int whichfork,
void (*init_fn)(struct xfs_buf *bp,
void (*init_fn)(struct xfs_trans *tp,
struct xfs_buf *bp,
struct xfs_inode *ip,
struct xfs_ifork *ifp))
{
Expand Down Expand Up @@ -1207,7 +1220,7 @@ xfs_bmap_local_to_extents(
bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);

/* initialise the block and copy the data */
init_fn(bp, ip, ifp);
init_fn(tp, bp, ip, ifp);

/* account for the change in fork size and log everything */
xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
Expand Down Expand Up @@ -1314,16 +1327,19 @@ xfs_bmap_add_attrfork_extents(
*/
STATIC void
xfs_bmap_local_to_extents_init_fn(
struct xfs_trans *tp,
struct xfs_buf *bp,
struct xfs_inode *ip,
struct xfs_ifork *ifp)
{
bp->b_ops = &xfs_bmbt_buf_ops;
memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
xfs_trans_buf_set_type(tp, bp, XFS_BLF_BTREE_BUF);
}

STATIC void
xfs_symlink_local_to_remote(
struct xfs_trans *tp,
struct xfs_buf *bp,
struct xfs_inode *ip,
struct xfs_ifork *ifp)
Expand All @@ -1342,8 +1358,7 @@ xfs_symlink_local_to_remote(
*
* XXX (dgc): investigate whether directory conversion can use the generic
* formatting callout. It should be possible - it's just a very complex
* formatter. it would also require passing the transaction through to the init
* function.
* formatter.
*/
STATIC int /* error */
xfs_bmap_add_attrfork_local(
Expand Down
Loading

0 comments on commit ee1a47a

Please sign in to comment.