Skip to content

Commit

Permalink
xfs: don't use vfs writeback for pure metadata modifications
Browse files Browse the repository at this point in the history
Under heavy multi-way parallel create workloads, the VFS struggles
to write back all the inodes that have been changed in age order.
The bdi flusher thread becomes CPU bound, spending 85% of it's time
in the VFS code, mostly traversing the superblock dirty inode list
to separate dirty inodes old enough to flush.

We already keep an index of all metadata changes in age order - in
the AIL - and continued log pressure will do age ordered writeback
without any extra overhead at all. If there is no pressure on the
log, the xfssyncd will periodically write back metadata in ascending
disk address offset order so will be very efficient.

Hence we can stop marking VFS inodes dirty during transaction commit
or when changing timestamps during transactions. This will keep the
inodes in the superblock dirty list to those containing data or
unlogged metadata changes.

However, the timstamp changes are slightly more complex than this -
there are a couple of places that do unlogged updates of the
timestamps, and the VFS need to be informed of these. Hence add a
new function xfs_trans_ichgtime() for transactional changes,
and leave xfs_ichgtime() for the non-transactional changes.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Alex Elder <aelder@sgi.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
  • Loading branch information
Dave Chinner authored and Alex Elder committed Oct 18, 2010
1 parent e176579 commit dcd79a1
Show file tree
Hide file tree
Showing 12 changed files with 65 additions and 86 deletions.
2 changes: 1 addition & 1 deletion fs/xfs/linux-2.6/xfs_ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1088,8 +1088,8 @@ xfs_ioctl_setattr(
xfs_diflags_to_linux(ip);
}

xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
xfs_ichgtime(ip, XFS_ICHGTIME_CHG);

XFS_STATS_INC(xs_ig_attrchg);

Expand Down
35 changes: 0 additions & 35 deletions fs/xfs/linux-2.6/xfs_iops.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,41 +94,6 @@ xfs_mark_inode_dirty(
mark_inode_dirty(inode);
}

/*
* Change the requested timestamp in the given inode.
* We don't lock across timestamp updates, and we don't log them but
* we do record the fact that there is dirty information in core.
*/
void
xfs_ichgtime(
xfs_inode_t *ip,
int flags)
{
struct inode *inode = VFS_I(ip);
timespec_t tv;
int sync_it = 0;

tv = current_fs_time(inode->i_sb);

if ((flags & XFS_ICHGTIME_MOD) &&
!timespec_equal(&inode->i_mtime, &tv)) {
inode->i_mtime = tv;
sync_it = 1;
}
if ((flags & XFS_ICHGTIME_CHG) &&
!timespec_equal(&inode->i_ctime, &tv)) {
inode->i_ctime = tv;
sync_it = 1;
}

/*
* Update complete - now make sure everyone knows that the inode
* is dirty.
*/
if (sync_it)
xfs_mark_inode_dirty_sync(ip);
}

/*
* Hook in SELinux. This is not quite correct yet, what we really need
* here (as we do for default ACLs) is a mechanism by which creation of
Expand Down
7 changes: 1 addition & 6 deletions fs/xfs/linux-2.6/xfs_super.c
Original file line number Diff line number Diff line change
Expand Up @@ -972,12 +972,7 @@ xfs_fs_inode_init_once(

/*
* Dirty the XFS inode when mark_inode_dirty_sync() is called so that
* we catch unlogged VFS level updates to the inode. Care must be taken
* here - the transaction code calls mark_inode_dirty_sync() to mark the
* VFS inode dirty in a transaction and clears the i_update_core field;
* it must clear the field after calling mark_inode_dirty_sync() to
* correctly indicate that the dirty state has been propagated into the
* inode log item.
* we catch unlogged VFS level updates to the inode.
*
* We need the barrier() to maintain correct ordering between unlogged
* updates and the transaction commit code that clears the i_update_core
Expand Down
2 changes: 1 addition & 1 deletion fs/xfs/quota/xfs_qm_syscalls.c
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ xfs_qm_scall_trunc_qfile(
goto out_unlock;
}

xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);

out_unlock:
Expand Down
31 changes: 11 additions & 20 deletions fs/xfs/xfs_attr.c
Original file line number Diff line number Diff line change
Expand Up @@ -355,16 +355,15 @@ xfs_attr_set_int(
if (mp->m_flags & XFS_MOUNT_WSYNC) {
xfs_trans_set_sync(args.trans);
}

if (!error && (flags & ATTR_KERNOTIME) == 0) {
xfs_trans_ichgtime(args.trans, dp,
XFS_ICHGTIME_CHG);
}
err2 = xfs_trans_commit(args.trans,
XFS_TRANS_RELEASE_LOG_RES);
xfs_iunlock(dp, XFS_ILOCK_EXCL);

/*
* Hit the inode change time.
*/
if (!error && (flags & ATTR_KERNOTIME) == 0) {
xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
}
return(error == 0 ? err2 : error);
}

Expand Down Expand Up @@ -420,20 +419,16 @@ xfs_attr_set_int(
xfs_trans_set_sync(args.trans);
}

if ((flags & ATTR_KERNOTIME) == 0)
xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);

/*
* Commit the last in the sequence of transactions.
*/
xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
xfs_iunlock(dp, XFS_ILOCK_EXCL);

/*
* Hit the inode change time.
*/
if (!error && (flags & ATTR_KERNOTIME) == 0) {
xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
}

return(error);

out:
Expand Down Expand Up @@ -567,20 +562,16 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
xfs_trans_set_sync(args.trans);
}

if ((flags & ATTR_KERNOTIME) == 0)
xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);

/*
* Commit the last in the sequence of transactions.
*/
xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
xfs_iunlock(dp, XFS_ILOCK_EXCL);

/*
* Hit the inode change time.
*/
if (!error && (flags & ATTR_KERNOTIME) == 0) {
xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
}

return(error);

out:
Expand Down
1 change: 0 additions & 1 deletion fs/xfs/xfs_inode.h
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,6 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
void xfs_iext_realloc(xfs_inode_t *, int, int);
void xfs_iunpin_wait(xfs_inode_t *);
int xfs_iflush(xfs_inode_t *, uint);
void xfs_ichgtime(xfs_inode_t *, int);
void xfs_lock_inodes(xfs_inode_t **, int, uint);
void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);

Expand Down
9 changes: 0 additions & 9 deletions fs/xfs/xfs_inode_item.c
Original file line number Diff line number Diff line change
Expand Up @@ -222,15 +222,6 @@ xfs_inode_item_format(
vecp++;
nvecs = 1;

/*
* Make sure the linux inode is dirty. We do this before
* clearing i_update_core as the VFS will call back into
* XFS here and set i_update_core, so we need to dirty the
* inode first so that the ordering of i_update_core and
* unlogged modifications still works as described below.
*/
xfs_mark_inode_dirty_sync(ip);

/*
* Clear i_update_core if the timestamps (or any other
* non-transactional modification) need flushing/logging
Expand Down
12 changes: 8 additions & 4 deletions fs/xfs/xfs_rename.c
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,9 @@ xfs_rename(
goto error_return;
if (error)
goto abort_return;
xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);

xfs_trans_ichgtime(tp, target_dp,
XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);

if (new_parent && src_is_directory) {
error = xfs_bumplink(tp, target_dp);
Expand Down Expand Up @@ -249,7 +251,9 @@ xfs_rename(
&first_block, &free_list, spaceres);
if (error)
goto abort_return;
xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);

xfs_trans_ichgtime(tp, target_dp,
XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);

/*
* Decrement the link count on the target since the target
Expand Down Expand Up @@ -292,7 +296,7 @@ xfs_rename(
* inode isn't really being changed, but old unix file systems did
* it and some incremental backup programs won't work without it.
*/
xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG);
xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG);

/*
* Adjust the link count on src_dp. This is necessary when
Expand All @@ -315,7 +319,7 @@ xfs_rename(
if (error)
goto abort_return;

xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
if (new_parent)
xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
Expand Down
1 change: 1 addition & 0 deletions fs/xfs/xfs_trans.h
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,7 @@ void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
int xfs_trans_iget(struct xfs_mount *, xfs_trans_t *,
xfs_ino_t , uint, uint, struct xfs_inode **);
void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int);
void xfs_trans_ijoin_ref(struct xfs_trans *, struct xfs_inode *, uint);
void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *);
void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint);
Expand Down
30 changes: 30 additions & 0 deletions fs/xfs/xfs_trans_inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,36 @@ xfs_trans_ijoin_ref(
ip->i_itemp->ili_lock_flags = lock_flags;
}

/*
* Transactional inode timestamp update. Requires the inode to be locked and
* joined to the transaction supplied. Relies on the transaction subsystem to
* track dirty state and update/writeback the inode accordingly.
*/
void
xfs_trans_ichgtime(
struct xfs_trans *tp,
struct xfs_inode *ip,
int flags)
{
struct inode *inode = VFS_I(ip);
timespec_t tv;

ASSERT(tp);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(ip->i_transp == tp);

tv = current_fs_time(inode->i_sb);

if ((flags & XFS_ICHGTIME_MOD) &&
!timespec_equal(&inode->i_mtime, &tv)) {
inode->i_mtime = tv;
}
if ((flags & XFS_ICHGTIME_CHG) &&
!timespec_equal(&inode->i_ctime, &tv)) {
inode->i_ctime = tv;
}
}

/*
* This is called to mark the fields indicated in fieldmask as needing
* to be logged when the transaction is committed. The inode must
Expand Down
4 changes: 2 additions & 2 deletions fs/xfs/xfs_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ xfs_droplink(
{
int error;

xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);

ASSERT (ip->i_d.di_nlink > 0);
ip->i_d.di_nlink--;
Expand Down Expand Up @@ -299,7 +299,7 @@ xfs_bumplink(
{
if (ip->i_d.di_nlink >= XFS_MAXLINK)
return XFS_ERROR(EMLINK);
xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);

ASSERT(ip->i_d.di_nlink > 0);
ip->i_d.di_nlink++;
Expand Down
17 changes: 10 additions & 7 deletions fs/xfs/xfs_vnodeops.c
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,11 @@ xfs_setattr(
ip->i_size == 0 && ip->i_d.di_nextents == 0) {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
lock_flags &= ~XFS_ILOCK_EXCL;
if (mask & ATTR_CTIME)
xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
if (mask & ATTR_CTIME) {
inode->i_mtime = inode->i_ctime =
current_fs_time(inode->i_sb);
xfs_mark_inode_dirty_sync(ip);
}
code = 0;
goto error_return;
}
Expand Down Expand Up @@ -1391,7 +1394,7 @@ xfs_create(
ASSERT(error != ENOSPC);
goto out_trans_abort;
}
xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);

if (is_dir) {
Expand Down Expand Up @@ -1742,7 +1745,7 @@ xfs_remove(
ASSERT(error != ENOENT);
goto out_bmap_cancel;
}
xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);

if (is_dir) {
/*
Expand Down Expand Up @@ -1895,7 +1898,7 @@ xfs_link(
&first_block, &free_list, resblks);
if (error)
goto abort_return;
xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);

error = xfs_bumplink(tp, sip);
Expand Down Expand Up @@ -2129,7 +2132,7 @@ xfs_symlink(
&first_block, &free_list, resblks);
if (error)
goto error1;
xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);

/*
Expand Down Expand Up @@ -2833,7 +2836,7 @@ xfs_change_file_space(
if (ip->i_d.di_mode & S_IXGRP)
ip->i_d.di_mode &= ~S_ISGID;

xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
}
if (setprealloc)
ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
Expand Down

0 comments on commit dcd79a1

Please sign in to comment.