Skip to content

Commit

Permalink
[XFS] Introduce two new mount options (nolargeio/largeio) to allow
Browse files Browse the repository at this point in the history
filesystems to expose the filesystem stripe width in stat(2) rather than
the page cache size. This allows applications requiring high bandwidth to
easily determine the optimum I/O size for the underlying filesystem. The
default is to report the page cache size (i.e. "nolargeio").

SGI-PV: 942818
SGI-Modid: xfs-linux:xfs-kern:23830a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
  • Loading branch information
David Chinner authored and Nathan Scott committed Nov 1, 2005
1 parent ee34807 commit e8c8b3a
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 35 deletions.
2 changes: 1 addition & 1 deletion fs/xfs/linux-2.6/xfs_super.c
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ xfs_revalidate_inode(
break;
}

inode->i_blksize = PAGE_CACHE_SIZE;
inode->i_blksize = xfs_preferred_iosize(mp);
inode->i_generation = ip->i_d.di_gen;
i_size_write(inode, ip->i_d.di_size);
inode->i_blocks =
Expand Down
1 change: 1 addition & 0 deletions fs/xfs/linux-2.6/xfs_vnode.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ vn_revalidate_core(
inode->i_mtime = vap->va_mtime;
inode->i_ctime = vap->va_ctime;
inode->i_atime = vap->va_atime;
inode->i_blksize = vap->va_blocksize;
if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
inode->i_flags |= S_IMMUTABLE;
else
Expand Down
2 changes: 2 additions & 0 deletions fs/xfs/xfs_clnt.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,5 +106,7 @@ struct xfs_mount_args {
#define XFSMNT_IHASHSIZE 0x20000000 /* inode hash table size */
#define XFSMNT_DIRSYNC 0x40000000 /* sync creat,link,unlink,rename
* symlink,mkdir,rmdir,mknod */
#define XFSMNT_COMPAT_IOSIZE 0x80000000 /* don't report large preferred
* I/O size in stat() */

#endif /* __XFS_CLNT_H__ */
27 changes: 27 additions & 0 deletions fs/xfs/xfs_mount.h
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,9 @@ typedef struct xfs_mount {
* allocation */
#define XFS_MOUNT_IHASHSIZE 0x00100000 /* inode hash table size */
#define XFS_MOUNT_DIRSYNC 0x00200000 /* synchronous directory ops */
#define XFS_MOUNT_COMPAT_IOSIZE 0x00400000 /* don't report large preferred
* I/O size in stat() */


/*
* Default minimum read and write sizes.
Expand All @@ -442,6 +445,30 @@ typedef struct xfs_mount {
#define XFS_WSYNC_READIO_LOG 15 /* 32K */
#define XFS_WSYNC_WRITEIO_LOG 14 /* 16K */

/*
* Allow large block sizes to be reported to userspace programs if the
* "largeio" mount option is used.
*
* If compatibility mode is specified, simply return the basic unit of caching
* so that we don't get inefficient read/modify/write I/O from user apps.
* Otherwise....
*
* If the underlying volume is a stripe, then return the stripe width in bytes
* as the recommended I/O size. It is not a stripe and we've set a default
* buffered I/O size, return that, otherwise return the compat default.
*/
static inline unsigned long
xfs_preferred_iosize(xfs_mount_t *mp)
{
if (mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE)
return PAGE_CACHE_SIZE;
return (mp->m_swidth ?
(mp->m_swidth << mp->m_sb.sb_blocklog) :
((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) ?
(1 << (int)MAX(mp->m_readio_log, mp->m_writeio_log)) :
PAGE_CACHE_SIZE));
}

#define XFS_MAXIOFFSET(mp) ((mp)->m_maxioffset)

#define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN)
Expand Down
11 changes: 11 additions & 0 deletions fs/xfs/xfs_vfsops.c
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,9 @@ xfs_start_flags(
if (ap->flags & XFSMNT_DIRSYNC)
mp->m_flags |= XFS_MOUNT_DIRSYNC;

if (ap->flags & XFSMNT_COMPAT_IOSIZE)
mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;

/*
* no recovery flag requires a read-only mount
*/
Expand Down Expand Up @@ -1645,6 +1648,9 @@ xfs_vget(
#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */
#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */
#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */
#define MNTOPT_LARGEIO "largeio" /* report large I/O sizes in stat() */
#define MNTOPT_NOLARGEIO "nolargeio" /* do not report large I/O sizes
* in stat(). */

STATIC unsigned long
suffix_strtoul(const char *cp, char **endp, unsigned int base)
Expand Down Expand Up @@ -1681,6 +1687,7 @@ xfs_parseargs(
int dsunit, dswidth, vol_dsunit, vol_dswidth;
int iosize;

args->flags |= XFSMNT_COMPAT_IOSIZE;
#if 0 /* XXX: off by default, until some remaining issues ironed out */
args->flags |= XFSMNT_IDELETE; /* default to on */
#endif
Expand Down Expand Up @@ -1809,6 +1816,10 @@ xfs_parseargs(
args->flags &= ~XFSMNT_IDELETE;
} else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
args->flags |= XFSMNT_IDELETE;
} else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
args->flags &= ~XFSMNT_COMPAT_IOSIZE;
} else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
args->flags |= XFSMNT_COMPAT_IOSIZE;
} else if (!strcmp(this_char, "osyncisdsync")) {
/* no-op, this is now the default */
printk("XFS: osyncisdsync is now the default, option is deprecated.\n");
Expand Down
35 changes: 1 addition & 34 deletions fs/xfs/xfs_vnodeops.c
Original file line number Diff line number Diff line change
Expand Up @@ -181,40 +181,7 @@ xfs_getattr(
vap->va_rdev = 0;

if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) {

#if 0
/* Large block sizes confuse various
* user space programs, so letting the
* stripe size through is not a good
* idea for now.
*/
vap->va_blocksize = mp->m_swidth ?
/*
* If the underlying volume is a stripe, then
* return the stripe width in bytes as the
* recommended I/O size.
*/
(mp->m_swidth << mp->m_sb.sb_blocklog) :
/*
* Return the largest of the preferred buffer
* sizes since doing small I/Os into larger
* buffers causes buffers to be decommissioned.
* The value returned is in bytes.
*/
(1 << (int)MAX(mp->m_readio_log,
mp->m_writeio_log));

#else
vap->va_blocksize =
/*
* Return the largest of the preferred buffer
* sizes since doing small I/Os into larger
* buffers causes buffers to be decommissioned.
* The value returned is in bytes.
*/
1 << (int)MAX(mp->m_readio_log,
mp->m_writeio_log);
#endif
vap->va_blocksize = xfs_preferred_iosize(mp);
} else {

/*
Expand Down

0 comments on commit e8c8b3a

Please sign in to comment.