Skip to content

Commit

Permalink
Merge tag 'iversion-v4.16-1' of git://git.kernel.org/pub/scm/linux/ke…
Browse files Browse the repository at this point in the history
…rnel/git/jlayton/linux

Pull inode->i_version rework from Jeff Layton:
 "This pile of patches is a rework of the inode->i_version field. We
  have traditionally incremented that field on every inode data or
  metadata change. Typically this increment needs to be logged on disk
  even when nothing else has changed, which is rather expensive.

  It turns out though that none of the consumers of that field actually
  require this behavior. The only real requirement for all of them is
  that it be different iff the inode has changed since the last time the
  field was checked.

  Given that, we can optimize away most of the i_version increments and
  avoid dirtying inode metadata when the only change is to the i_version
  and no one is querying it. Queries of the i_version field are rather
  rare, so we can help write performance under many common workloads.

  This patch series converts existing accesses of the i_version field to
  a new API, and then converts all of the in-kernel filesystems to use
  it. The last patch in the series then converts the backend
  implementation to a scheme that optimizes away a large portion of the
  metadata updates when no one is looking at it.

  In my own testing this series significantly helps performance with
  small I/O sizes. I also got this email for Christmas this year from
  the kernel test robot (a 244% r/w bandwidth improvement with XFS over
  DAX, with 4k writes):

    https://lkml.org/lkml/2017/12/25/8

  A few of the earlier patches in this pile are also flowing to you via
  other trees (mm, integrity, and nfsd trees in particular)".

* tag 'iversion-v4.16-1' of git://git.kernel.org/pub/scm/linux/kernel/git/jlayton/linux: (22 commits)
  fs: handle inode->i_version more efficiently
  btrfs: only dirty the inode in btrfs_update_time if something was changed
  xfs: avoid setting XFS_ILOG_CORE if i_version doesn't need incrementing
  fs: only set S_VERSION when updating times if necessary
  IMA: switch IMA over to new i_version API
  xfs: convert to new i_version API
  ufs: use new i_version API
  ocfs2: convert to new i_version API
  nfsd: convert to new i_version API
  nfs: convert to new i_version API
  ext4: convert to new i_version API
  ext2: convert to new i_version API
  exofs: switch to new i_version API
  btrfs: convert to new i_version API
  afs: convert to new i_version API
  affs: convert to new i_version API
  fat: convert to new i_version API
  fs: don't take the i_lock in inode_inc_iversion
  fs: new API for handling inode->i_version
  ntfs: remove i_version handling
  ...
  • Loading branch information
Linus Torvalds committed Jan 29, 2018
2 parents d1de762 + f02a9ad commit a4b7fd7
Show file tree
Hide file tree
Showing 53 changed files with 525 additions and 153 deletions.
3 changes: 0 additions & 3 deletions drivers/staging/lustre/lustre/llite/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -369,8 +369,6 @@ static int ll_readdir(struct file *filp, struct dir_context *ctx)
}
ctx->pos = pos;
ll_finish_md_op_data(op_data);
filp->f_version = inode->i_version;

out:
if (!rc)
ll_stats_ops_tally(sbi, LPROC_LL_READDIR, 1);
Expand Down Expand Up @@ -1678,7 +1676,6 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
else
fd->lfd_pos = offset;
file->f_pos = offset;
file->f_version = 0;
}
ret = offset;
}
Expand Down
5 changes: 3 additions & 2 deletions fs/affs/amigaffs.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
*/

#include <linux/math64.h>
#include <linux/iversion.h>
#include "affs.h"

/*
Expand Down Expand Up @@ -60,7 +61,7 @@ affs_insert_hash(struct inode *dir, struct buffer_head *bh)
affs_brelse(dir_bh);

dir->i_mtime = dir->i_ctime = current_time(dir);
dir->i_version++;
inode_inc_iversion(dir);
mark_inode_dirty(dir);

return 0;
Expand Down Expand Up @@ -114,7 +115,7 @@ affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh)
affs_brelse(bh);

dir->i_mtime = dir->i_ctime = current_time(dir);
dir->i_version++;
inode_inc_iversion(dir);
mark_inode_dirty(dir);

return retval;
Expand Down
5 changes: 3 additions & 2 deletions fs/affs/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
*
*/

#include <linux/iversion.h>
#include "affs.h"

static int affs_readdir(struct file *, struct dir_context *);
Expand Down Expand Up @@ -80,7 +81,7 @@ affs_readdir(struct file *file, struct dir_context *ctx)
* we can jump directly to where we left off.
*/
ino = (u32)(long)file->private_data;
if (ino && file->f_version == inode->i_version) {
if (ino && inode_cmp_iversion(inode, file->f_version) == 0) {
pr_debug("readdir() left off=%d\n", ino);
goto inside;
}
Expand Down Expand Up @@ -130,7 +131,7 @@ affs_readdir(struct file *file, struct dir_context *ctx)
} while (ino);
}
done:
file->f_version = inode->i_version;
file->f_version = inode_query_iversion(inode);
file->private_data = (void *)(long)ino;
affs_brelse(fh_bh);

Expand Down
3 changes: 2 additions & 1 deletion fs/affs/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/seq_file.h>
#include <linux/iversion.h>
#include "affs.h"

static int affs_statfs(struct dentry *dentry, struct kstatfs *buf);
Expand Down Expand Up @@ -102,7 +103,7 @@ static struct inode *affs_alloc_inode(struct super_block *sb)
if (!i)
return NULL;

i->vfs_inode.i_version = 1;
inode_set_iversion(&i->vfs_inode, 1);
i->i_lc = NULL;
i->i_ext_bh = NULL;
i->i_pa_cnt = 0;
Expand Down
3 changes: 2 additions & 1 deletion fs/afs/fsclient.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/circ_buf.h>
#include <linux/iversion.h>
#include "internal.h"
#include "afs_fs.h"

Expand Down Expand Up @@ -124,7 +125,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
vnode->vfs_inode.i_ctime.tv_sec = status->mtime_client;
vnode->vfs_inode.i_mtime = vnode->vfs_inode.i_ctime;
vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime;
vnode->vfs_inode.i_version = data_version;
inode_set_iversion_raw(&vnode->vfs_inode, data_version);
}

expected_version = status->data_version;
Expand Down
5 changes: 3 additions & 2 deletions fs/afs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <linux/sched.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/iversion.h>
#include "internal.h"

static const struct inode_operations afs_symlink_inode_operations = {
Expand Down Expand Up @@ -89,7 +90,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
inode->i_atime = inode->i_mtime = inode->i_ctime;
inode->i_blocks = 0;
inode->i_generation = vnode->fid.unique;
inode->i_version = vnode->status.data_version;
inode_set_iversion_raw(inode, vnode->status.data_version);
inode->i_mapping->a_ops = &afs_fs_aops;

read_sequnlock_excl(&vnode->cb_lock);
Expand Down Expand Up @@ -218,7 +219,7 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name,
inode->i_ctime.tv_nsec = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime;
inode->i_blocks = 0;
inode->i_version = 0;
inode_set_iversion_raw(inode, 0);
inode->i_generation = 0;

set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags);
Expand Down
7 changes: 5 additions & 2 deletions fs/btrfs/delayed-inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/

#include <linux/slab.h>
#include <linux/iversion.h>
#include "delayed-inode.h"
#include "disk-io.h"
#include "transaction.h"
Expand Down Expand Up @@ -1713,7 +1714,8 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
btrfs_set_stack_inode_generation(inode_item,
BTRFS_I(inode)->generation);
btrfs_set_stack_inode_sequence(inode_item, inode->i_version);
btrfs_set_stack_inode_sequence(inode_item,
inode_peek_iversion(inode));
btrfs_set_stack_inode_transid(inode_item, trans->transid);
btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
Expand Down Expand Up @@ -1767,7 +1769,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item);

inode->i_version = btrfs_stack_inode_sequence(inode_item);
inode_set_iversion_queried(inode,
btrfs_stack_inode_sequence(inode_item));
inode->i_rdev = 0;
*rdev = btrfs_stack_inode_rdev(inode_item);
BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
Expand Down
1 change: 1 addition & 0 deletions fs/btrfs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include <linux/slab.h>
#include <linux/btrfs.h>
#include <linux/uio.h>
#include <linux/iversion.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
Expand Down
12 changes: 8 additions & 4 deletions fs/btrfs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include <linux/posix_acl_xattr.h>
#include <linux/uio.h>
#include <linux/magic.h>
#include <linux/iversion.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
Expand Down Expand Up @@ -3777,7 +3778,8 @@ static int btrfs_read_locked_inode(struct inode *inode)
BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item);

inode->i_version = btrfs_inode_sequence(leaf, inode_item);
inode_set_iversion_queried(inode,
btrfs_inode_sequence(leaf, inode_item));
inode->i_generation = BTRFS_I(inode)->generation;
inode->i_rdev = 0;
rdev = btrfs_inode_rdev(leaf, inode_item);
Expand Down Expand Up @@ -3945,7 +3947,8 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
&token);
btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation,
&token);
btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token);
btrfs_set_token_inode_sequence(leaf, item, inode_peek_iversion(inode),
&token);
btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
Expand Down Expand Up @@ -6104,19 +6107,20 @@ static int btrfs_update_time(struct inode *inode, struct timespec *now,
int flags)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
bool dirty = flags & ~S_VERSION;

if (btrfs_root_readonly(root))
return -EROFS;

if (flags & S_VERSION)
inode_inc_iversion(inode);
dirty |= inode_maybe_inc_iversion(inode, dirty);
if (flags & S_CTIME)
inode->i_ctime = *now;
if (flags & S_MTIME)
inode->i_mtime = *now;
if (flags & S_ATIME)
inode->i_atime = *now;
return btrfs_dirty_inode(inode);
return dirty ? btrfs_dirty_inode(inode) : 0;
}

/*
Expand Down
1 change: 1 addition & 0 deletions fs/btrfs/ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include <linux/uuid.h>
#include <linux/btrfs.h>
#include <linux/uaccess.h>
#include <linux/iversion.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
Expand Down
4 changes: 3 additions & 1 deletion fs/btrfs/tree-log.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/list_sort.h>
#include <linux/iversion.h>
#include "tree-log.h"
#include "disk-io.h"
#include "locking.h"
Expand Down Expand Up @@ -3609,7 +3610,8 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
&token);

btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token);
btrfs_set_token_inode_sequence(leaf, item,
inode_peek_iversion(inode), &token);
btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
Expand Down
1 change: 1 addition & 0 deletions fs/btrfs/xattr.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <linux/xattr.h>
#include <linux/security.h>
#include <linux/posix_acl_xattr.h>
#include <linux/iversion.h>
#include "ctree.h"
#include "btrfs_inode.h"
#include "transaction.h"
Expand Down
9 changes: 5 additions & 4 deletions fs/exofs/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/

#include <linux/iversion.h>
#include "exofs.h"

static inline unsigned exofs_chunk_size(struct inode *inode)
Expand Down Expand Up @@ -60,7 +61,7 @@ static int exofs_commit_chunk(struct page *page, loff_t pos, unsigned len)
struct inode *dir = mapping->host;
int err = 0;

dir->i_version++;
inode_inc_iversion(dir);

if (!PageUptodate(page))
SetPageUptodate(page);
Expand Down Expand Up @@ -241,7 +242,7 @@ exofs_readdir(struct file *file, struct dir_context *ctx)
unsigned long n = pos >> PAGE_SHIFT;
unsigned long npages = dir_pages(inode);
unsigned chunk_mask = ~(exofs_chunk_size(inode)-1);
int need_revalidate = (file->f_version != inode->i_version);
bool need_revalidate = inode_cmp_iversion(inode, file->f_version);

if (pos > inode->i_size - EXOFS_DIR_REC_LEN(1))
return 0;
Expand All @@ -264,8 +265,8 @@ exofs_readdir(struct file *file, struct dir_context *ctx)
chunk_mask);
ctx->pos = (n<<PAGE_SHIFT) + offset;
}
file->f_version = inode->i_version;
need_revalidate = 0;
file->f_version = inode_query_iversion(inode);
need_revalidate = false;
}
de = (struct exofs_dir_entry *)(kaddr + offset);
limit = kaddr + exofs_last_byte(inode, n) -
Expand Down
3 changes: 2 additions & 1 deletion fs/exofs/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include <linux/module.h>
#include <linux/exportfs.h>
#include <linux/slab.h>
#include <linux/iversion.h>

#include "exofs.h"

Expand Down Expand Up @@ -159,7 +160,7 @@ static struct inode *exofs_alloc_inode(struct super_block *sb)
if (!oi)
return NULL;

oi->vfs_inode.i_version = 1;
inode_set_iversion(&oi->vfs_inode, 1);
return &oi->vfs_inode;
}

Expand Down
9 changes: 5 additions & 4 deletions fs/ext2/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <linux/buffer_head.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
#include <linux/iversion.h>

typedef struct ext2_dir_entry_2 ext2_dirent;

Expand Down Expand Up @@ -92,7 +93,7 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len)
struct inode *dir = mapping->host;
int err = 0;

dir->i_version++;
inode_inc_iversion(dir);
block_write_end(NULL, mapping, pos, len, len, page, NULL);

if (pos+len > dir->i_size) {
Expand Down Expand Up @@ -293,7 +294,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx)
unsigned long npages = dir_pages(inode);
unsigned chunk_mask = ~(ext2_chunk_size(inode)-1);
unsigned char *types = NULL;
int need_revalidate = file->f_version != inode->i_version;
bool need_revalidate = inode_cmp_iversion(inode, file->f_version);

if (pos > inode->i_size - EXT2_DIR_REC_LEN(1))
return 0;
Expand All @@ -319,8 +320,8 @@ ext2_readdir(struct file *file, struct dir_context *ctx)
offset = ext2_validate_entry(kaddr, offset, chunk_mask);
ctx->pos = (n<<PAGE_SHIFT) + offset;
}
file->f_version = inode->i_version;
need_revalidate = 0;
file->f_version = inode_query_iversion(inode);
need_revalidate = false;
}
de = (ext2_dirent *)(kaddr+offset);
limit = kaddr + ext2_last_byte(inode, n) - EXT2_DIR_REC_LEN(1);
Expand Down
5 changes: 3 additions & 2 deletions fs/ext2/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <linux/quotaops.h>
#include <linux/uaccess.h>
#include <linux/dax.h>
#include <linux/iversion.h>
#include "ext2.h"
#include "xattr.h"
#include "acl.h"
Expand Down Expand Up @@ -184,7 +185,7 @@ static struct inode *ext2_alloc_inode(struct super_block *sb)
if (!ei)
return NULL;
ei->i_block_alloc_info = NULL;
ei->vfs_inode.i_version = 1;
inode_set_iversion(&ei->vfs_inode, 1);
#ifdef CONFIG_QUOTA
memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
#endif
Expand Down Expand Up @@ -1569,7 +1570,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
return err;
if (inode->i_size < off+len-towrite)
i_size_write(inode, off+len-towrite);
inode->i_version++;
inode_inc_iversion(inode);
inode->i_mtime = inode->i_ctime = current_time(inode);
mark_inode_dirty(inode);
return len - towrite;
Expand Down
Loading

0 comments on commit a4b7fd7

Please sign in to comment.