From bd8e0ff956456ad9071dbb6c2ed7d33bd22fc216 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 17 Mar 2015 14:04:02 -0700 Subject: [PATCH 01/30] new helper: iov_iter_rw() Get either READ or WRITE out of iter->type. Signed-off-by: Omar Sandoval Signed-off-by: Al Viro --- include/linux/uio.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/linux/uio.h b/include/linux/uio.h index 15f11fb9fff6f..8b01e1c3c6146 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -111,6 +111,14 @@ static inline bool iter_is_iovec(struct iov_iter *i) return !(i->type & (ITER_BVEC | ITER_KVEC)); } +/* + * Get one of READ or WRITE out of iter->type without any other flags OR'd in + * with it. + * + * The ?: is just for type safety. + */ +#define iov_iter_rw(i) ((0 ? (struct iov_iter *)0 : (i))->type & RW_MASK) + /* * Cap the iov_iter by given limit; note that the second argument is * *not* the new size - it's upper limit for such. Passing it a value From 17f8c842d24ac054e4212c82b5bd6ae455a334f3 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 16 Mar 2015 04:33:50 -0700 Subject: [PATCH 02/30] Remove rw from {,__,do_}blockdev_direct_IO() Most filesystems call through to these at some point, so we'll start here. Signed-off-by: Omar Sandoval Signed-off-by: Al Viro --- fs/affs/file.c | 2 +- fs/block_dev.c | 5 ++--- fs/btrfs/inode.c | 8 ++++---- fs/direct-io.c | 39 ++++++++++++++++++--------------------- fs/ext2/inode.c | 2 +- fs/ext3/inode.c | 2 +- fs/ext4/indirect.c | 11 ++++++----- fs/ext4/inode.c | 2 +- fs/f2fs/data.c | 2 +- fs/fat/inode.c | 2 +- fs/gfs2/aops.c | 5 ++--- fs/hfs/inode.c | 2 +- fs/hfsplus/inode.c | 3 +-- fs/jfs/inode.c | 2 +- fs/nilfs2/inode.c | 3 +-- fs/ocfs2/aops.c | 16 +++++++--------- fs/reiserfs/inode.c | 2 +- fs/udf/inode.c | 2 +- fs/xfs/xfs_aops.c | 9 ++++----- include/linux/fs.h | 22 ++++++++++++---------- 20 files changed, 67 insertions(+), 74 deletions(-) diff --git a/fs/affs/file.c b/fs/affs/file.c index 7c1a3d4c19c23..1edc0d4b40dba 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -405,7 +405,7 @@ affs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, return 0; } - ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, affs_get_block); + ret = blockdev_direct_IO(iocb, inode, iter, offset, affs_get_block); if (ret < 0 && (rw & WRITE)) affs_write_failed(mapping, offset + count); return ret; diff --git a/fs/block_dev.c b/fs/block_dev.c index b5e87896f517d..bc23afd35fdba 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -152,9 +152,8 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iter, - offset, blkdev_get_block, - NULL, NULL, 0); + return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, offset, + blkdev_get_block, NULL, NULL, 0); } int __sync_blockdev(struct block_device *bdev, int wait) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 686331f22b15c..e9a3ff8a85fd4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8174,10 +8174,10 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, wakeup = false; } - ret = __blockdev_direct_IO(rw, iocb, inode, - BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, - iter, offset, btrfs_get_blocks_direct, NULL, - btrfs_submit_direct, flags); + ret = __blockdev_direct_IO(iocb, inode, + BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, + iter, offset, btrfs_get_blocks_direct, NULL, + btrfs_submit_direct, flags); if (rw & WRITE) { current->journal_info = NULL; if (ret < 0 && ret != -EIOCBQUEUED) diff --git a/fs/direct-io.c b/fs/direct-io.c index 6fb00e3f10597..c3b560b24a463 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1093,10 +1093,10 @@ static inline int drop_refcount(struct dio *dio) * for the whole file. */ static inline ssize_t -do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, - struct block_device *bdev, struct iov_iter *iter, loff_t offset, - get_block_t get_block, dio_iodone_t end_io, - dio_submit_t submit_io, int flags) +do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, + struct block_device *bdev, struct iov_iter *iter, + loff_t offset, get_block_t get_block, dio_iodone_t end_io, + dio_submit_t submit_io, int flags) { unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits); unsigned blkbits = i_blkbits; @@ -1110,9 +1110,6 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, struct blk_plug plug; unsigned long align = offset | iov_iter_alignment(iter); - if (rw & WRITE) - rw = WRITE_ODIRECT; - /* * Avoid references to bdev if not absolutely needed to give * the early prefetch in the caller enough time. @@ -1127,7 +1124,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, } /* watch out for a 0 len io from a tricksy fs */ - if (rw == READ && !iov_iter_count(iter)) + if (iov_iter_rw(iter) == READ && !iov_iter_count(iter)) return 0; dio = kmem_cache_alloc(dio_cache, GFP_KERNEL); @@ -1143,7 +1140,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, dio->flags = flags; if (dio->flags & DIO_LOCKING) { - if (rw == READ) { + if (iov_iter_rw(iter) == READ) { struct address_space *mapping = iocb->ki_filp->f_mapping; @@ -1169,19 +1166,19 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, if (is_sync_kiocb(iocb)) dio->is_async = false; else if (!(dio->flags & DIO_ASYNC_EXTEND) && - (rw & WRITE) && end > i_size_read(inode)) + iov_iter_rw(iter) == WRITE && end > i_size_read(inode)) dio->is_async = false; else dio->is_async = true; dio->inode = inode; - dio->rw = rw; + dio->rw = iov_iter_rw(iter) == WRITE ? WRITE_ODIRECT : READ; /* * For AIO O_(D)SYNC writes we need to defer completions to a workqueue * so that we can call ->fsync. */ - if (dio->is_async && (rw & WRITE) && + if (dio->is_async && iov_iter_rw(iter) == WRITE && ((iocb->ki_filp->f_flags & O_DSYNC) || IS_SYNC(iocb->ki_filp->f_mapping->host))) { retval = dio_set_defer_completion(dio); @@ -1274,7 +1271,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, * we can let i_mutex go now that its achieved its purpose * of protecting us from looking up uninitialized blocks. */ - if (rw == READ && (dio->flags & DIO_LOCKING)) + if (iov_iter_rw(iter) == READ && (dio->flags & DIO_LOCKING)) mutex_unlock(&dio->inode->i_mutex); /* @@ -1286,7 +1283,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, */ BUG_ON(retval == -EIOCBQUEUED); if (dio->is_async && retval == 0 && dio->result && - (rw == READ || dio->result == count)) + (iov_iter_rw(iter) == READ || dio->result == count)) retval = -EIOCBQUEUED; else dio_await_completion(dio); @@ -1300,11 +1297,11 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, return retval; } -ssize_t -__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, - struct block_device *bdev, struct iov_iter *iter, loff_t offset, - get_block_t get_block, dio_iodone_t end_io, - dio_submit_t submit_io, int flags) +ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, + struct block_device *bdev, struct iov_iter *iter, + loff_t offset, get_block_t get_block, + dio_iodone_t end_io, dio_submit_t submit_io, + int flags) { /* * The block device state is needed in the end to finally @@ -1318,8 +1315,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, prefetch(bdev->bd_queue); prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES); - return do_blockdev_direct_IO(rw, iocb, inode, bdev, iter, offset, - get_block, end_io, submit_io, flags); + return do_blockdev_direct_IO(iocb, inode, bdev, iter, offset, get_block, + end_io, submit_io, flags); } EXPORT_SYMBOL(__blockdev_direct_IO); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index df9d6afbc5d5e..3cbeb1b63acf8 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -864,7 +864,7 @@ ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, ret = dax_do_io(rw, iocb, inode, iter, offset, ext2_get_block, NULL, DIO_LOCKING); else - ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, + ret = blockdev_direct_IO(iocb, inode, iter, offset, ext2_get_block); if (ret < 0 && (rw & WRITE)) ext2_write_failed(mapping, offset + count); diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index db07ffbe7c85c..6fb376c8d9386 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1856,7 +1856,7 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, } retry: - ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext3_get_block); + ret = blockdev_direct_IO(iocb, inode, iter, offset, ext3_get_block); /* * In case of error extending write may have instantiated a few * blocks outside i_size. Trim these off again. diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 740c7871c1177..ae4ffc27abc66 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -693,9 +693,10 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, ret = dax_do_io(rw, iocb, inode, iter, offset, ext4_get_block, NULL, 0); else - ret = __blockdev_direct_IO(rw, iocb, inode, - inode->i_sb->s_bdev, iter, offset, - ext4_get_block, NULL, NULL, 0); + ret = __blockdev_direct_IO(iocb, inode, + inode->i_sb->s_bdev, iter, + offset, ext4_get_block, NULL, + NULL, 0); inode_dio_done(inode); } else { locked: @@ -703,8 +704,8 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, ret = dax_do_io(rw, iocb, inode, iter, offset, ext4_get_block, NULL, DIO_LOCKING); else - ret = blockdev_direct_IO(rw, iocb, inode, iter, - offset, ext4_get_block); + ret = blockdev_direct_IO(iocb, inode, iter, offset, + ext4_get_block); if (unlikely((rw & WRITE) && ret < 0)) { loff_t isize = i_size_read(inode); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a3f451370bef4..ec049c04b197f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3037,7 +3037,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, ret = dax_do_io(rw, iocb, inode, iter, offset, get_block_func, ext4_end_io_dio, dio_flags); else - ret = __blockdev_direct_IO(rw, iocb, inode, + ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, offset, get_block_func, ext4_end_io_dio, NULL, dio_flags); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 497f8515d2056..e16adebcb9b67 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1159,7 +1159,7 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, if (rw & WRITE) __allocate_data_blocks(inode, offset, count); - err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block); + err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block); if (err < 0 && (rw & WRITE)) f2fs_write_failed(mapping, offset + count); diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 8521207de2293..a1a39f571e781 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -274,7 +274,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, * FAT need to use the DIO_LOCKING for avoiding the race * condition of fat_get_block() and ->truncate(). */ - ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, fat_get_block); + ret = blockdev_direct_IO(iocb, inode, iter, offset, fat_get_block); if (ret < 0 && (rw & WRITE)) fat_write_failed(mapping, offset + count); diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index fe6634d25d1dd..59983a18cab42 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -1095,9 +1095,8 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, truncate_inode_pages_range(mapping, lstart, end); } - rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, - iter, offset, - gfs2_get_block_direct, NULL, NULL, 0); + rv = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, + offset, gfs2_get_block_direct, NULL, NULL, 0); out: gfs2_glock_dq(&gh); gfs2_holder_uninit(&gh); diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 9337065bcc676..e92d175d1fd7c 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -133,7 +133,7 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb, size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, hfs_get_block); + ret = blockdev_direct_IO(iocb, inode, iter, offset, hfs_get_block); /* * In case of error extending write may have instantiated a few diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 5f86cadb0542c..2a98dc07c22c5 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -131,8 +131,7 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb, size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, - hfsplus_get_block); + ret = blockdev_direct_IO(iocb, inode, iter, offset, hfsplus_get_block); /* * In case of error extending write may have instantiated a few diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 3197aed106148..c20f7883543fb 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -339,7 +339,7 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb, size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, jfs_get_block); + ret = blockdev_direct_IO(iocb, inode, iter, offset, jfs_get_block); /* * In case of error extending write may have instantiated a few diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index ab4987bc637f8..3727b8caa46e0 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -318,8 +318,7 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, return 0; /* Needs synchronization with the cleaner */ - size = blockdev_direct_IO(rw, iocb, inode, iter, offset, - nilfs_get_block); + size = blockdev_direct_IO(iocb, inode, iter, offset, nilfs_get_block); /* * In case of error extending write may have instantiated a few diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index e1bf18c5d25e1..68cb199fb2b6a 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -738,10 +738,9 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, di_bh = NULL; } - written = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev, - iter, offset, - ocfs2_direct_IO_get_blocks, - ocfs2_dio_end_io, NULL, 0); + written = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, + offset, ocfs2_direct_IO_get_blocks, + ocfs2_dio_end_io, NULL, 0); if (unlikely(written < 0)) { loff_t i_size = i_size_read(inode); @@ -844,11 +843,10 @@ static ssize_t ocfs2_direct_IO(int rw, return 0; if (rw == READ) - return __blockdev_direct_IO(rw, iocb, inode, - inode->i_sb->s_bdev, - iter, offset, - ocfs2_direct_IO_get_blocks, - ocfs2_dio_end_io, NULL, 0); + return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, + iter, offset, + ocfs2_direct_IO_get_blocks, + ocfs2_dio_end_io, NULL, 0); else return ocfs2_direct_IO_write(iocb, iter, offset); } diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 9312b7842e036..a51e9177b0567 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -3286,7 +3286,7 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, + ret = blockdev_direct_IO(iocb, inode, iter, offset, reiserfs_get_blocks_direct_io); /* diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 9c1fbd23913db..3adf49c01c196 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -225,7 +225,7 @@ static ssize_t udf_direct_IO(int rw, struct kiocb *iocb, size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, udf_get_block); + ret = blockdev_direct_IO(iocb, inode, iter, offset, udf_get_block); if (unlikely(ret < 0 && (rw & WRITE))) udf_write_failed(mapping, offset + count); return ret; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 4f8cdc59bc381..5ca504c66e854 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1504,14 +1504,13 @@ xfs_vm_direct_IO( struct block_device *bdev = xfs_find_bdev_for_inode(inode); if (rw & WRITE) { - return __blockdev_direct_IO(rw, iocb, inode, bdev, iter, - offset, xfs_get_blocks_direct, + return __blockdev_direct_IO(iocb, inode, bdev, iter, offset, + xfs_get_blocks_direct, xfs_end_io_direct_write, NULL, DIO_ASYNC_EXTEND); } - return __blockdev_direct_IO(rw, iocb, inode, bdev, iter, - offset, xfs_get_blocks_direct, - NULL, NULL, 0); + return __blockdev_direct_IO(iocb, inode, bdev, iter, offset, + xfs_get_blocks_direct, NULL, NULL, 0); } /* diff --git a/include/linux/fs.h b/include/linux/fs.h index f1e3f65255a81..c67b6de8be331 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2634,16 +2634,18 @@ enum { void dio_end_io(struct bio *bio, int error); -ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, - struct block_device *bdev, struct iov_iter *iter, loff_t offset, - get_block_t get_block, dio_iodone_t end_io, - dio_submit_t submit_io, int flags); - -static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, - struct inode *inode, struct iov_iter *iter, loff_t offset, - get_block_t get_block) -{ - return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iter, +ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, + struct block_device *bdev, struct iov_iter *iter, + loff_t offset, get_block_t get_block, + dio_iodone_t end_io, dio_submit_t submit_io, + int flags); + +static inline ssize_t blockdev_direct_IO(struct kiocb *iocb, + struct inode *inode, + struct iov_iter *iter, loff_t offset, + get_block_t get_block) +{ + return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, offset, get_block, NULL, NULL, DIO_LOCKING | DIO_SKIP_HOLES); } From a95cd6311512bd954e88684eb39373f7f4b0a984 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 16 Mar 2015 04:33:51 -0700 Subject: [PATCH 03/30] Remove rw from dax_{do_,}io() And use iov_iter_rw() instead. Signed-off-by: Omar Sandoval Signed-off-by: Al Viro --- fs/dax.c | 27 +++++++++++++-------------- fs/ext2/inode.c | 4 ++-- fs/ext4/indirect.c | 4 ++-- fs/ext4/inode.c | 2 +- include/linux/fs.h | 4 ++-- 5 files changed, 20 insertions(+), 21 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index ed1619ec6537c..a27846946525e 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -98,9 +98,9 @@ static bool buffer_size_valid(struct buffer_head *bh) return bh->b_state != 0; } -static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter, - loff_t start, loff_t end, get_block_t get_block, - struct buffer_head *bh) +static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, + loff_t start, loff_t end, get_block_t get_block, + struct buffer_head *bh) { ssize_t retval = 0; loff_t pos = start; @@ -109,7 +109,7 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter, void *addr; bool hole = false; - if (rw != WRITE) + if (iov_iter_rw(iter) != WRITE) end = min(end, i_size_read(inode)); while (pos < end) { @@ -124,7 +124,7 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter, bh->b_size = PAGE_ALIGN(end - pos); bh->b_state = 0; retval = get_block(inode, block, bh, - rw == WRITE); + iov_iter_rw(iter) == WRITE); if (retval) break; if (!buffer_size_valid(bh)) @@ -137,7 +137,7 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter, bh->b_size -= done; } - hole = (rw != WRITE) && !buffer_written(bh); + hole = iov_iter_rw(iter) != WRITE && !buffer_written(bh); if (hole) { addr = NULL; size = bh->b_size - first; @@ -154,7 +154,7 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter, max = min(pos + size, end); } - if (rw == WRITE) + if (iov_iter_rw(iter) == WRITE) len = copy_from_iter(addr, max - pos, iter); else if (!hole) len = copy_to_iter(addr, max - pos, iter); @@ -173,7 +173,6 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter, /** * dax_do_io - Perform I/O to a DAX file - * @rw: READ to read or WRITE to write * @iocb: The control block for this I/O * @inode: The file which the I/O is directed at * @iter: The addresses to do I/O from or to @@ -189,9 +188,9 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter, * As with do_blockdev_direct_IO(), we increment i_dio_count while the I/O * is in progress. */ -ssize_t dax_do_io(int rw, struct kiocb *iocb, struct inode *inode, - struct iov_iter *iter, loff_t pos, - get_block_t get_block, dio_iodone_t end_io, int flags) +ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode, + struct iov_iter *iter, loff_t pos, get_block_t get_block, + dio_iodone_t end_io, int flags) { struct buffer_head bh; ssize_t retval = -EINVAL; @@ -199,7 +198,7 @@ ssize_t dax_do_io(int rw, struct kiocb *iocb, struct inode *inode, memset(&bh, 0, sizeof(bh)); - if ((flags & DIO_LOCKING) && (rw == READ)) { + if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ) { struct address_space *mapping = inode->i_mapping; mutex_lock(&inode->i_mutex); retval = filemap_write_and_wait_range(mapping, pos, end - 1); @@ -212,9 +211,9 @@ ssize_t dax_do_io(int rw, struct kiocb *iocb, struct inode *inode, /* Protects against truncate */ atomic_inc(&inode->i_dio_count); - retval = dax_io(rw, inode, iter, pos, end, get_block, &bh); + retval = dax_io(inode, iter, pos, end, get_block, &bh); - if ((flags & DIO_LOCKING) && (rw == READ)) + if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ) mutex_unlock(&inode->i_mutex); if ((retval > 0) && end_io) diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 3cbeb1b63acf8..14e8d1752685a 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -861,8 +861,8 @@ ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, ssize_t ret; if (IS_DAX(inode)) - ret = dax_do_io(rw, iocb, inode, iter, offset, ext2_get_block, - NULL, DIO_LOCKING); + ret = dax_do_io(iocb, inode, iter, offset, ext2_get_block, NULL, + DIO_LOCKING); else ret = blockdev_direct_IO(iocb, inode, iter, offset, ext2_get_block); diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index ae4ffc27abc66..cd81d01da0b0b 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -690,7 +690,7 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, goto locked; } if (IS_DAX(inode)) - ret = dax_do_io(rw, iocb, inode, iter, offset, + ret = dax_do_io(iocb, inode, iter, offset, ext4_get_block, NULL, 0); else ret = __blockdev_direct_IO(iocb, inode, @@ -701,7 +701,7 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, } else { locked: if (IS_DAX(inode)) - ret = dax_do_io(rw, iocb, inode, iter, offset, + ret = dax_do_io(iocb, inode, iter, offset, ext4_get_block, NULL, DIO_LOCKING); else ret = blockdev_direct_IO(iocb, inode, iter, offset, diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ec049c04b197f..76b8cba5d041c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3034,7 +3034,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, dio_flags = DIO_LOCKING; } if (IS_DAX(inode)) - ret = dax_do_io(rw, iocb, inode, iter, offset, get_block_func, + ret = dax_do_io(iocb, inode, iter, offset, get_block_func, ext4_end_io_dio, dio_flags); else ret = __blockdev_direct_IO(iocb, inode, diff --git a/include/linux/fs.h b/include/linux/fs.h index c67b6de8be331..295bc589fe1b9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2609,8 +2609,8 @@ extern loff_t fixed_size_llseek(struct file *file, loff_t offset, extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); -ssize_t dax_do_io(int rw, struct kiocb *, struct inode *, struct iov_iter *, - loff_t, get_block_t, dio_iodone_t, int flags); +ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t, + get_block_t, dio_iodone_t, int flags); int dax_clear_blocks(struct inode *, sector_t block, long size); int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); int dax_truncate_page(struct inode *, loff_t from, get_block_t); From 6f67376318abea58589ebe6d69dffeabb6f6c26a Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 16 Mar 2015 04:33:52 -0700 Subject: [PATCH 04/30] direct_IO: use iov_iter_rw() instead of rw everywhere The rw parameter to direct_IO is redundant with iov_iter->type, and treated slightly differently just about everywhere it's used: some users do rw & WRITE, and others do rw == WRITE where they should be doing a bitwise check. Simplify this with the new iov_iter_rw() helper, which always returns either READ or WRITE. Signed-off-by: Omar Sandoval Signed-off-by: Al Viro --- drivers/staging/lustre/lustre/llite/rw26.c | 18 +++++++++--------- fs/9p/vfs_addr.c | 2 +- fs/affs/file.c | 4 ++-- fs/btrfs/inode.c | 10 +++++----- fs/ext2/inode.c | 2 +- fs/ext3/inode.c | 8 ++++---- fs/ext4/ext4.h | 4 ++-- fs/ext4/indirect.c | 10 +++++----- fs/ext4/inode.c | 20 ++++++++++---------- fs/f2fs/data.c | 16 ++++++++-------- fs/fat/inode.c | 4 ++-- fs/fuse/file.c | 13 +++++++------ fs/gfs2/aops.c | 7 +++---- fs/hfs/inode.c | 2 +- fs/hfsplus/inode.c | 2 +- fs/jfs/inode.c | 2 +- fs/nfs/direct.c | 2 +- fs/nilfs2/inode.c | 4 ++-- fs/ocfs2/aops.c | 2 +- fs/reiserfs/inode.c | 2 +- fs/udf/inode.c | 2 +- fs/xfs/xfs_aops.c | 2 +- 22 files changed, 69 insertions(+), 69 deletions(-) diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c index 2f21304046aa9..3aa9de6bcc400 100644 --- a/drivers/staging/lustre/lustre/llite/rw26.c +++ b/drivers/staging/lustre/lustre/llite/rw26.c @@ -399,7 +399,7 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, * size changing by concurrent truncates and writes. * 1. Need inode mutex to operate transient pages. */ - if (rw == READ) + if (iov_iter_rw(iter) == READ) mutex_lock(&inode->i_mutex); LASSERT(obj->cob_transient_pages == 0); @@ -408,7 +408,7 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, size_t offs; count = min_t(size_t, iov_iter_count(iter), size); - if (rw == READ) { + if (iov_iter_rw(iter) == READ) { if (file_offset >= i_size_read(inode)) break; if (file_offset + count > i_size_read(inode)) @@ -418,11 +418,11 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, result = iov_iter_get_pages_alloc(iter, &pages, count, &offs); if (likely(result > 0)) { int n = DIV_ROUND_UP(result + offs, PAGE_SIZE); - result = ll_direct_IO_26_seg(env, io, rw, inode, - file->f_mapping, - result, file_offset, - pages, n); - ll_free_user_pages(pages, n, rw==READ); + result = ll_direct_IO_26_seg(env, io, iov_iter_rw(iter), + inode, file->f_mapping, + result, file_offset, pages, + n); + ll_free_user_pages(pages, n, iov_iter_rw(iter) == READ); } if (unlikely(result <= 0)) { /* If we can't allocate a large enough buffer @@ -449,11 +449,11 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, } out: LASSERT(obj->cob_transient_pages == 0); - if (rw == READ) + if (iov_iter_rw(iter) == READ) mutex_unlock(&inode->i_mutex); if (tot_bytes > 0) { - if (rw == WRITE) { + if (iov_iter_rw(iter) == WRITE) { struct lov_stripe_md *lsm; lsm = ccc_inode_lsm_get(inode); diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 2e38f9a5b4728..dd5543b1d183c 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -253,7 +253,7 @@ v9fs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos) struct file *file = iocb->ki_filp; ssize_t n; int err = 0; - if (rw & WRITE) { + if (iov_iter_rw(iter) == WRITE) { n = p9_client_write(file->private_data, pos, iter, &err); if (n) { struct inode *inode = file_inode(file); diff --git a/fs/affs/file.c b/fs/affs/file.c index 1edc0d4b40dba..7f05a468d594b 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -398,7 +398,7 @@ affs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, size_t count = iov_iter_count(iter); ssize_t ret; - if (rw == WRITE) { + if (iov_iter_rw(iter) == WRITE) { loff_t size = offset + count; if (AFFS_I(inode)->mmu_private < size) @@ -406,7 +406,7 @@ affs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, } ret = blockdev_direct_IO(iocb, inode, iter, offset, affs_get_block); - if (ret < 0 && (rw & WRITE)) + if (ret < 0 && iov_iter_rw(iter) == WRITE) affs_write_failed(mapping, offset + count); return ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e9a3ff8a85fd4..ca69e83d4f3c9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8081,7 +8081,7 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, bio_endio(dio_bio, ret); } -static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb, +static ssize_t check_direct_IO(struct btrfs_root *root, struct kiocb *iocb, const struct iov_iter *iter, loff_t offset) { int seg; @@ -8096,7 +8096,7 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io goto out; /* If this is a write we don't need to check anymore */ - if (rw & WRITE) + if (iov_iter_rw(iter) == WRITE) return 0; /* * Check to make sure we don't have duplicate iov_base's in this @@ -8126,7 +8126,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, bool relock = false; ssize_t ret; - if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iter, offset)) + if (check_direct_IO(BTRFS_I(inode)->root, iocb, iter, offset)) return 0; atomic_inc(&inode->i_dio_count); @@ -8144,7 +8144,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, filemap_fdatawrite_range(inode->i_mapping, offset, offset + count - 1); - if (rw & WRITE) { + if (iov_iter_rw(iter) == WRITE) { /* * If the write DIO is beyond the EOF, we need update * the isize, but it is protected by i_mutex. So we can @@ -8178,7 +8178,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, iter, offset, btrfs_get_blocks_direct, NULL, btrfs_submit_direct, flags); - if (rw & WRITE) { + if (iov_iter_rw(iter) == WRITE) { current->journal_info = NULL; if (ret < 0 && ret != -EIOCBQUEUED) btrfs_delalloc_release_space(inode, count); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 14e8d1752685a..685e514c57ddb 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -866,7 +866,7 @@ ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, else ret = blockdev_direct_IO(iocb, inode, iter, offset, ext2_get_block); - if (ret < 0 && (rw & WRITE)) + if (ret < 0 && iov_iter_rw(iter) == WRITE) ext2_write_failed(mapping, offset + count); return ret; } diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 6fb376c8d9386..c70839d26ccdb 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1832,9 +1832,9 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, size_t count = iov_iter_count(iter); int retries = 0; - trace_ext3_direct_IO_enter(inode, offset, count, rw); + trace_ext3_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); - if (rw == WRITE) { + if (iov_iter_rw(iter) == WRITE) { loff_t final_size = offset + count; if (final_size > inode->i_size) { @@ -1861,7 +1861,7 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, * In case of error extending write may have instantiated a few * blocks outside i_size. Trim these off again. */ - if (unlikely((rw & WRITE) && ret < 0)) { + if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { loff_t isize = i_size_read(inode); loff_t end = offset + count; @@ -1908,7 +1908,7 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, ret = err; } out: - trace_ext3_direct_IO_exit(inode, offset, count, rw, ret); + trace_ext3_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret); return ret; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index f63c3d5805c4c..2031c994024ed 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2152,8 +2152,8 @@ extern void ext4_da_update_reserve_space(struct inode *inode, /* indirect.c */ extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, int flags); -extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset); +extern ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset); extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); extern void ext4_ind_truncate(handle_t *, struct inode *inode); diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index cd81d01da0b0b..3580629e42d32 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -642,8 +642,8 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, * crashes then stale disk data _may_ be exposed inside the file. But current * VFS code falls back into buffered path in that case so we are safe. */ -ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset) +ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; @@ -654,7 +654,7 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, size_t count = iov_iter_count(iter); int retries = 0; - if (rw == WRITE) { + if (iov_iter_rw(iter) == WRITE) { loff_t final_size = offset + count; if (final_size > inode->i_size) { @@ -676,7 +676,7 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, } retry: - if (rw == READ && ext4_should_dioread_nolock(inode)) { + if (iov_iter_rw(iter) == READ && ext4_should_dioread_nolock(inode)) { /* * Nolock dioread optimization may be dynamically disabled * via ext4_inode_block_unlocked_dio(). Check inode's state @@ -707,7 +707,7 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, ret = blockdev_direct_IO(iocb, inode, iter, offset, ext4_get_block); - if (unlikely((rw & WRITE) && ret < 0)) { + if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { loff_t isize = i_size_read(inode); loff_t end = offset + count; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 76b8cba5d041c..cf6ba65360355 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2952,8 +2952,8 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, * if the machine crashes during the write. * */ -static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset) +static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; @@ -2966,8 +2966,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, ext4_io_end_t *io_end = NULL; /* Use the old path for reads and writes beyond i_size. */ - if (rw != WRITE || final_size > inode->i_size) - return ext4_ind_direct_IO(rw, iocb, iter, offset); + if (iov_iter_rw(iter) != WRITE || final_size > inode->i_size) + return ext4_ind_direct_IO(iocb, iter, offset); BUG_ON(iocb->private == NULL); @@ -2976,7 +2976,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, * conversion. This also disallows race between truncate() and * overwrite DIO as i_dio_count needs to be incremented under i_mutex. */ - if (rw == WRITE) + if (iov_iter_rw(iter) == WRITE) atomic_inc(&inode->i_dio_count); /* If we do a overwrite dio, i_mutex locking can be released */ @@ -3078,7 +3078,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, } retake_lock: - if (rw == WRITE) + if (iov_iter_rw(iter) == WRITE) inode_dio_done(inode); /* take i_mutex locking again if we do a ovewrite dio */ if (overwrite) { @@ -3107,12 +3107,12 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, if (ext4_has_inline_data(inode)) return 0; - trace_ext4_direct_IO_enter(inode, offset, count, rw); + trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) - ret = ext4_ext_direct_IO(rw, iocb, iter, offset); + ret = ext4_ext_direct_IO(iocb, iter, offset); else - ret = ext4_ind_direct_IO(rw, iocb, iter, offset); - trace_ext4_direct_IO_exit(inode, offset, count, rw, ret); + ret = ext4_ind_direct_IO(iocb, iter, offset); + trace_ext4_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret); return ret; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e16adebcb9b67..ce25f62edfa74 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1118,12 +1118,12 @@ static int f2fs_write_end(struct file *file, return copied; } -static int check_direct_IO(struct inode *inode, int rw, - struct iov_iter *iter, loff_t offset) +static int check_direct_IO(struct inode *inode, struct iov_iter *iter, + loff_t offset) { unsigned blocksize_mask = inode->i_sb->s_blocksize - 1; - if (rw == READ) + if (iov_iter_rw(iter) == READ) return 0; if (offset & blocksize_mask) @@ -1151,19 +1151,19 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, return err; } - if (check_direct_IO(inode, rw, iter, offset)) + if (check_direct_IO(inode, iter, offset)) return 0; - trace_f2fs_direct_IO_enter(inode, offset, count, rw); + trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); - if (rw & WRITE) + if (iov_iter_rw(iter) == WRITE) __allocate_data_blocks(inode, offset, count); err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block); - if (err < 0 && (rw & WRITE)) + if (err < 0 && iov_iter_rw(iter) == WRITE) f2fs_write_failed(mapping, offset + count); - trace_f2fs_direct_IO_exit(inode, offset, count, rw, err); + trace_f2fs_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), err); return err; } diff --git a/fs/fat/inode.c b/fs/fat/inode.c index a1a39f571e781..342d791b28dbc 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -255,7 +255,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, size_t count = iov_iter_count(iter); ssize_t ret; - if (rw == WRITE) { + if (iov_iter_rw(iter) == WRITE) { /* * FIXME: blockdev_direct_IO() doesn't use ->write_begin(), * so we need to update the ->mmu_private to block boundary. @@ -275,7 +275,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, * condition of fat_get_block() and ->truncate(). */ ret = blockdev_direct_IO(iocb, inode, iter, offset, fat_get_block); - if (ret < 0 && (rw & WRITE)) + if (ret < 0 && iov_iter_rw(iter) == WRITE) fat_write_failed(mapping, offset + count); return ret; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index e1afdd7abf901..c1a67da6a8a09 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2800,11 +2800,11 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, inode = file->f_mapping->host; i_size = i_size_read(inode); - if ((rw == READ) && (offset > i_size)) + if ((iov_iter_rw(iter) == READ) && (offset > i_size)) return 0; /* optimization for short read */ - if (async_dio && rw != WRITE && offset + count > i_size) { + if (async_dio && iov_iter_rw(iter) != WRITE && offset + count > i_size) { if (offset >= i_size) return 0; count = min_t(loff_t, count, fuse_round_up(i_size - offset)); @@ -2819,7 +2819,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, io->bytes = -1; io->size = 0; io->offset = offset; - io->write = (rw == WRITE); + io->write = (iov_iter_rw(iter) == WRITE); io->err = 0; io->file = file; /* @@ -2834,13 +2834,14 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, * to wait on real async I/O requests, so we must submit this request * synchronously. */ - if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE) + if (!is_sync_kiocb(iocb) && (offset + count > i_size) && + iov_iter_rw(iter) == WRITE) io->async = false; if (io->async && is_sync_kiocb(iocb)) io->done = &wait; - if (rw == WRITE) { + if (iov_iter_rw(iter) == WRITE) { ret = generic_write_checks(file, &pos, &count, 0); if (!ret) { iov_iter_truncate(iter, count); @@ -2865,7 +2866,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, kfree(io); - if (rw == WRITE) { + if (iov_iter_rw(iter) == WRITE) { if (ret > 0) fuse_write_update_size(inode, pos); else if (ret < 0 && offset + count > i_size) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 59983a18cab42..e22e6e686a113 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -1016,13 +1016,12 @@ static void gfs2_invalidatepage(struct page *page, unsigned int offset, /** * gfs2_ok_for_dio - check that dio is valid on this file * @ip: The inode - * @rw: READ or WRITE * @offset: The offset at which we are reading or writing * * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o) * 1 (to accept the i/o request) */ -static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) +static int gfs2_ok_for_dio(struct gfs2_inode *ip, loff_t offset) { /* * Should we return an error here? I can't see that O_DIRECT for @@ -1061,7 +1060,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, rv = gfs2_glock_nq(&gh); if (rv) return rv; - rv = gfs2_ok_for_dio(ip, rw, offset); + rv = gfs2_ok_for_dio(ip, offset); if (rv != 1) goto out; /* dio not valid, fall back to buffered i/o */ @@ -1091,7 +1090,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, rv = filemap_write_and_wait_range(mapping, lstart, end); if (rv) goto out; - if (rw == WRITE) + if (iov_iter_rw(iter) == WRITE) truncate_inode_pages_range(mapping, lstart, end); } diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index e92d175d1fd7c..0085d527a55cd 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -139,7 +139,7 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb, * In case of error extending write may have instantiated a few * blocks outside i_size. Trim these off again. */ - if (unlikely((rw & WRITE) && ret < 0)) { + if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { loff_t isize = i_size_read(inode); loff_t end = offset + count; diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 2a98dc07c22c5..afcde36b506b6 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -137,7 +137,7 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb, * In case of error extending write may have instantiated a few * blocks outside i_size. Trim these off again. */ - if (unlikely((rw & WRITE) && ret < 0)) { + if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { loff_t isize = i_size_read(inode); loff_t end = offset + count; diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index c20f7883543fb..e7047b63ffc5c 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -345,7 +345,7 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb, * In case of error extending write may have instantiated a few * blocks outside i_size. Trim these off again. */ - if (unlikely((rw & WRITE) && ret < 0)) { + if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { loff_t isize = i_size_read(inode); loff_t end = offset + count; diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index c3929fb2ab26c..e8482b8f48302 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -267,7 +267,7 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t #else VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE); - if (rw == READ) + if (iov_iter_rw(iter) == READ) return nfs_file_direct_read(iocb, iter, pos); return nfs_file_direct_write(iocb, iter, pos); #endif /* CONFIG_NFS_SWAP */ diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 3727b8caa46e0..4726f1493d5d7 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -314,7 +314,7 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, size_t count = iov_iter_count(iter); ssize_t size; - if (rw == WRITE) + if (iov_iter_rw(iter) == WRITE) return 0; /* Needs synchronization with the cleaner */ @@ -324,7 +324,7 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, * In case of error extending write may have instantiated a few * blocks outside i_size. Trim these off again. */ - if (unlikely((rw & WRITE) && size < 0)) { + if (unlikely(iov_iter_rw(iter) == WRITE && size < 0)) { loff_t isize = i_size_read(inode); loff_t end = offset + count; diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 68cb199fb2b6a..0ee9474cca468 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -842,7 +842,7 @@ static ssize_t ocfs2_direct_IO(int rw, if (i_size_read(inode) <= offset && !full_coherency) return 0; - if (rw == READ) + if (iov_iter_rw(iter) == READ) return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, offset, ocfs2_direct_IO_get_blocks, diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index a51e9177b0567..d61ab13244a86 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -3293,7 +3293,7 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, * In case of error extending write may have instantiated a few * blocks outside i_size. Trim these off again. */ - if (unlikely((rw & WRITE) && ret < 0)) { + if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { loff_t isize = i_size_read(inode); loff_t end = offset + count; diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 3adf49c01c196..a685aea930682 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -226,7 +226,7 @@ static ssize_t udf_direct_IO(int rw, struct kiocb *iocb, ssize_t ret; ret = blockdev_direct_IO(iocb, inode, iter, offset, udf_get_block); - if (unlikely(ret < 0 && (rw & WRITE))) + if (unlikely(ret < 0 && iov_iter_rw(iter) == WRITE)) udf_write_failed(mapping, offset + count); return ret; } diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 5ca504c66e854..532d5279df2fd 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1503,7 +1503,7 @@ xfs_vm_direct_IO( struct inode *inode = iocb->ki_filp->f_mapping->host; struct block_device *bdev = xfs_find_bdev_for_inode(inode); - if (rw & WRITE) { + if (iov_iter_rw(iter) == WRITE) { return __blockdev_direct_IO(iocb, inode, bdev, iter, offset, xfs_get_blocks_direct, xfs_end_io_direct_write, NULL, From 22c6186ecea0be9eff1c399298ad36e94a59995f Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 16 Mar 2015 04:33:53 -0700 Subject: [PATCH 05/30] direct_IO: remove rw from a_ops->direct_IO() Now that no one is using rw, remove it completely. Signed-off-by: Omar Sandoval Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 2 +- Documentation/filesystems/vfs.txt | 2 +- drivers/staging/lustre/lustre/llite/rw26.c | 4 ++-- fs/9p/vfs_addr.c | 3 +-- fs/affs/file.c | 3 +-- fs/block_dev.c | 3 +-- fs/btrfs/inode.c | 4 ++-- fs/ceph/addr.c | 3 +-- fs/cifs/file.c | 3 +-- fs/exofs/inode.c | 4 ++-- fs/ext2/inode.c | 3 +-- fs/ext3/inode.c | 4 ++-- fs/ext4/inode.c | 4 ++-- fs/f2fs/data.c | 4 ++-- fs/fat/inode.c | 3 +-- fs/fuse/file.c | 3 +-- fs/gfs2/aops.c | 4 ++-- fs/hfs/inode.c | 4 ++-- fs/hfsplus/inode.c | 4 ++-- fs/jfs/inode.c | 4 ++-- fs/nfs/direct.c | 3 +-- fs/nilfs2/inode.c | 3 +-- fs/ocfs2/aops.c | 4 +--- fs/reiserfs/inode.c | 4 ++-- fs/udf/file.c | 3 +-- fs/udf/inode.c | 3 +-- fs/xfs/xfs_aops.c | 1 - include/linux/fs.h | 2 +- include/linux/nfs_fs.h | 2 +- mm/filemap.c | 4 ++-- mm/page_io.c | 4 +--- 31 files changed, 42 insertions(+), 59 deletions(-) diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index c3cd6279e92e7..7cdbca44e3433 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -196,7 +196,7 @@ prototypes: void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, int); void (*freepage)(struct page *); - int (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset); + int (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset); int (*migratepage)(struct address_space *, struct page *, struct page *); int (*launder_page)(struct page *); int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long); diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 207cdca68bedf..5d833b32bbcd1 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -590,7 +590,7 @@ struct address_space_operations { void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, int); void (*freepage)(struct page *); - ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset); + ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset); /* migrate the contents of a page to the specified target */ int (*migratepage) (struct page *, struct page *); int (*launder_page) (struct page *); diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c index 3aa9de6bcc400..0d7ce6b0e23cc 100644 --- a/drivers/staging/lustre/lustre/llite/rw26.c +++ b/drivers/staging/lustre/lustre/llite/rw26.c @@ -359,8 +359,8 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io, * up to 22MB for 128kB kmalloc and up to 682MB for 4MB kmalloc. */ #define MAX_DIO_SIZE ((MAX_MALLOC / sizeof(struct brw_page) * PAGE_CACHE_SIZE) & \ ~(DT_MAX_BRW_SIZE - 1)) -static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t file_offset) +static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter, + loff_t file_offset) { struct lu_env *env; struct cl_io *io; diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index dd5543b1d183c..be35d05a4d0ef 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -230,7 +230,6 @@ static int v9fs_launder_page(struct page *page) /** * v9fs_direct_IO - 9P address space operation for direct I/O - * @rw: direction (read or write) * @iocb: target I/O control block * @iov: array of vectors that define I/O buffer * @pos: offset in file to begin the operation @@ -248,7 +247,7 @@ static int v9fs_launder_page(struct page *page) * */ static ssize_t -v9fs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos) +v9fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos) { struct file *file = iocb->ki_filp; ssize_t n; diff --git a/fs/affs/file.c b/fs/affs/file.c index 7f05a468d594b..dcf27951781cf 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -389,8 +389,7 @@ static void affs_write_failed(struct address_space *mapping, loff_t to) } static ssize_t -affs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +affs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; diff --git a/fs/block_dev.c b/fs/block_dev.c index bc23afd35fdba..6e3de63c30556 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -146,8 +146,7 @@ blkdev_get_block(struct inode *inode, sector_t iblock, } static ssize_t -blkdev_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ca69e83d4f3c9..43192e10cc433 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8114,8 +8114,8 @@ static ssize_t check_direct_IO(struct btrfs_root *root, struct kiocb *iocb, return retval; } -static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset) +static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index fd5599d323620..155ab9c0246b2 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1198,8 +1198,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, * intercept O_DIRECT reads and writes early, this function should * never get called. */ -static ssize_t ceph_direct_io(int rw, struct kiocb *iocb, - struct iov_iter *iter, +static ssize_t ceph_direct_io(struct kiocb *iocb, struct iov_iter *iter, loff_t pos) { WARN_ON(1); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index ca30c391a894a..72394c5abd0fc 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3877,8 +3877,7 @@ void cifs_oplock_break(struct work_struct *work) * Direct IO is not yet supported in the cached mode. */ static ssize_t -cifs_direct_io(int rw, struct kiocb *iocb, struct iov_iter *iter, - loff_t pos) +cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter, loff_t pos) { /* * FIXME diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index a198e94813fec..35073aaec6e08 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -963,8 +963,8 @@ static void exofs_invalidatepage(struct page *page, unsigned int offset, /* TODO: Should be easy enough to do proprly */ -static ssize_t exofs_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset) +static ssize_t exofs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { return 0; } diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 685e514c57ddb..e1abf75e994cb 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -851,8 +851,7 @@ static sector_t ext2_bmap(struct address_space *mapping, sector_t block) } static ssize_t -ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index c70839d26ccdb..13c0868c7160e 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1820,8 +1820,8 @@ static int ext3_releasepage(struct page *page, gfp_t wait) * crashes then stale disk data _may_ be exposed inside the file. But current * VFS code falls back into buffered path in that case so we are safe. */ -static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset) +static ssize_t ext3_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index cf6ba65360355..42c942a950e15 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3089,8 +3089,8 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, return ret; } -static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset) +static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ce25f62edfa74..319eda511c4ff 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1135,8 +1135,8 @@ static int check_direct_IO(struct inode *inode, struct iov_iter *iter, return 0; } -static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset) +static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 342d791b28dbc..41b729933638a 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -245,8 +245,7 @@ static int fat_write_end(struct file *file, struct address_space *mapping, return err; } -static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, +static ssize_t fat_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index c1a67da6a8a09..3d355e9469914 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2782,8 +2782,7 @@ static inline loff_t fuse_round_up(loff_t off) } static ssize_t -fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) { DECLARE_COMPLETION_ONSTACK(wait); ssize_t ret = 0; diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index e22e6e686a113..20dd33da92ded 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -1038,8 +1038,8 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, loff_t offset) -static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset) +static ssize_t gfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 0085d527a55cd..75fd5d873c196 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -124,8 +124,8 @@ static int hfs_releasepage(struct page *page, gfp_t mask) return res ? try_to_free_buffers(page) : 0; } -static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset) +static ssize_t hfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index afcde36b506b6..a43811f909353 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -122,8 +122,8 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask) return res ? try_to_free_buffers(page) : 0; } -static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset) +static ssize_t hfsplus_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index e7047b63ffc5c..070dc4b335449 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -330,8 +330,8 @@ static sector_t jfs_bmap(struct address_space *mapping, sector_t block) return generic_block_bmap(mapping, block, jfs_get_block); } -static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset) +static ssize_t jfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e8482b8f48302..06503bc604e18 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -240,7 +240,6 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, /** * nfs_direct_IO - NFS address space operation for direct I/O - * @rw: direction (read or write) * @iocb: target I/O control block * @iov: array of vectors that define I/O buffer * @pos: offset in file to begin the operation @@ -251,7 +250,7 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, * shunt off direct read and write requests before the VFS gets them, * so this method is only ever called for swap. */ -ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos) +ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos) { struct inode *inode = iocb->ki_filp->f_mapping->host; diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 4726f1493d5d7..36f057fa8aa3b 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -305,8 +305,7 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping, } static ssize_t -nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 0ee9474cca468..28b5ad81bbec7 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -818,9 +818,7 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, return ret; } -static ssize_t ocfs2_direct_IO(int rw, - struct kiocb *iocb, - struct iov_iter *iter, +static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index d61ab13244a86..742242b609726 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -3278,8 +3278,8 @@ static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags) * We thank Mingming Cao for helping us understand in great detail what * to do in this section of the code. */ -static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset) +static ssize_t reiserfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; diff --git a/fs/udf/file.c b/fs/udf/file.c index 74050bff64f40..78d42548b2608 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -100,8 +100,7 @@ static int udf_adinicb_write_begin(struct file *file, return 0; } -static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, +static ssize_t udf_adinicb_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) { /* Fallback to buffered I/O. */ diff --git a/fs/udf/inode.c b/fs/udf/inode.c index a685aea930682..4f178c83b04f1 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -215,8 +215,7 @@ static int udf_write_begin(struct file *file, struct address_space *mapping, return ret; } -static ssize_t udf_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, +static ssize_t udf_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 532d5279df2fd..1d8eef9cf0f50 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1495,7 +1495,6 @@ xfs_end_io_direct_write( STATIC ssize_t xfs_vm_direct_IO( - int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t offset) diff --git a/include/linux/fs.h b/include/linux/fs.h index 295bc589fe1b9..72e3759de8c38 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -383,7 +383,7 @@ struct address_space_operations { void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, gfp_t); void (*freepage)(struct page *); - ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset); + ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset); /* * migrate the contents of a page to the specified target. If * migrate_mode is MIGRATE_ASYNC, it must not block. diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b01ccf371fdca..3d1b0d2fe55e8 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -447,7 +447,7 @@ static inline struct rpc_cred *nfs_file_cred(struct file *file) /* * linux/fs/nfs/direct.c */ -extern ssize_t nfs_direct_IO(int, struct kiocb *, struct iov_iter *, loff_t); +extern ssize_t nfs_direct_IO(struct kiocb *, struct iov_iter *, loff_t); extern ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, loff_t pos); diff --git a/mm/filemap.c b/mm/filemap.c index 876f4e6f3ed66..9920db455f056 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1707,7 +1707,7 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) pos + count - 1); if (!retval) { struct iov_iter data = *iter; - retval = mapping->a_ops->direct_IO(READ, iocb, &data, pos); + retval = mapping->a_ops->direct_IO(iocb, &data, pos); } if (retval > 0) { @@ -2395,7 +2395,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) } data = *from; - written = mapping->a_ops->direct_IO(WRITE, iocb, &data, pos); + written = mapping->a_ops->direct_IO(iocb, &data, pos); /* * Finally, try again to invalidate clean pages which might have been diff --git a/mm/page_io.c b/mm/page_io.c index a96c8562d8356..6424869e275e2 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -277,9 +277,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, set_page_writeback(page); unlock_page(page); - ret = mapping->a_ops->direct_IO(ITER_BVEC | WRITE, - &kiocb, &from, - kiocb.ki_pos); + ret = mapping->a_ops->direct_IO(&kiocb, &from, kiocb.ki_pos); if (ret == PAGE_SIZE) { count_vm_event(PSWPOUT); ret = 0; From f765b134c0d3f294f6084d3e0a11de184059a387 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 6 Apr 2015 20:50:38 -0400 Subject: [PATCH 06/30] new_sync_write(): discard ->ki_pos unless the return value is positive That allows ->write_iter() instances much more convenient life wrt iocb->ki_pos (and fixes several filesystems with borderline POSIX violations when zero-length write succeeds and changes the current position). Signed-off-by: Al Viro --- fs/read_write.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/read_write.c b/fs/read_write.c index 45d583c33879e..819ef3faf1bb7 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -477,7 +477,8 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t ret = filp->f_op->write_iter(&kiocb, &iter); BUG_ON(ret == -EIOCBQUEUED); - *ppos = kiocb.ki_pos; + if (ret > 0) + *ppos = kiocb.ki_pos; return ret; } From ccca26835dc27f7ba54e09d7aa03f462684a1927 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 5 Apr 2015 14:06:24 -0400 Subject: [PATCH 07/30] ntfs: move iov_iter_truncate() closer to generic_write_checks() Signed-off-by: Al Viro --- fs/ntfs/file.c | 81 ++++++++++++++++++-------------------------------- 1 file changed, 29 insertions(+), 52 deletions(-) diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 840e95e3f1d27..77087d5ad4582 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -328,26 +328,29 @@ static int ntfs_attr_extend_initialized(ntfs_inode *ni, const s64 new_init_size) return err; } -static ssize_t ntfs_prepare_file_for_write(struct file *file, loff_t *ppos, - size_t *count) +static ssize_t ntfs_prepare_file_for_write(struct kiocb *iocb, + struct iov_iter *from) { loff_t pos; s64 end, ll; ssize_t err; unsigned long flags; + struct file *file = iocb->ki_filp; struct inode *vi = file_inode(file); ntfs_inode *base_ni, *ni = NTFS_I(vi); ntfs_volume *vol = ni->vol; + size_t count = iov_iter_count(from); ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos " - "0x%llx, count 0x%lx.", vi->i_ino, + "0x%llx, count 0x%zx.", vi->i_ino, (unsigned)le32_to_cpu(ni->type), - (unsigned long long)*ppos, (unsigned long)*count); - /* We can write back this queue in page reclaim. */ - current->backing_dev_info = inode_to_bdi(vi); - err = generic_write_checks(file, ppos, count, S_ISBLK(vi->i_mode)); + (unsigned long long)iocb->ki_pos, count); + err = generic_write_checks(file, &iocb->ki_pos, &count, S_ISBLK(vi->i_mode)); if (unlikely(err)) goto out; + iov_iter_truncate(from, count); + if (count == 0) + goto out; /* * All checks have passed. Before we start doing any writing we want * to abort any totally illegal writes. @@ -379,8 +382,6 @@ static ssize_t ntfs_prepare_file_for_write(struct file *file, loff_t *ppos, err = -EOPNOTSUPP; goto out; } - if (*count == 0) - goto out; base_ni = ni; if (NInoAttr(ni)) base_ni = ni->ext.base_ntfs_ino; @@ -392,9 +393,9 @@ static ssize_t ntfs_prepare_file_for_write(struct file *file, loff_t *ppos, * cannot fail either so there is no need to check the return code. */ file_update_time(file); - pos = *ppos; + pos = iocb->ki_pos; /* The first byte after the last cluster being written to. */ - end = (pos + *count + vol->cluster_size_mask) & + end = (pos + iov_iter_count(from) + vol->cluster_size_mask) & ~(u64)vol->cluster_size_mask; /* * If the write goes beyond the allocated size, extend the allocation @@ -422,7 +423,7 @@ static ssize_t ntfs_prepare_file_for_write(struct file *file, loff_t *ppos, "partially extended.", vi->i_ino, (unsigned) le32_to_cpu(ni->type)); - *count = ll - pos; + iov_iter_truncate(from, ll - pos); } } else { err = ll; @@ -438,7 +439,7 @@ static ssize_t ntfs_prepare_file_for_write(struct file *file, loff_t *ppos, vi->i_ino, (unsigned) le32_to_cpu(ni->type), (int)-err); - *count = ll - pos; + iov_iter_truncate(from, ll - pos); } else { if (err != -ENOSPC) ntfs_error(vi->i_sb, "Cannot perform " @@ -1929,61 +1930,37 @@ static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i, return written ? written : status; } -/** - * ntfs_file_write_iter_nolock - write data to a file - * @iocb: IO state structure (file, offset, etc.) - * @from: iov_iter with data to write - * - * Basically the same as __generic_file_write_iter() except that it ends - * up calling ntfs_perform_write() instead of generic_perform_write() and that - * O_DIRECT is not implemented. - */ -static ssize_t ntfs_file_write_iter_nolock(struct kiocb *iocb, - struct iov_iter *from) -{ - struct file *file = iocb->ki_filp; - loff_t pos = iocb->ki_pos; - ssize_t written = 0; - ssize_t err; - size_t count = iov_iter_count(from); - - err = ntfs_prepare_file_for_write(file, &pos, &count); - if (count && !err) { - iov_iter_truncate(from, count); - written = ntfs_perform_write(file, from, pos); - if (likely(written >= 0)) - iocb->ki_pos = pos + written; - } - current->backing_dev_info = NULL; - return written ? written : err; -} - /** * ntfs_file_write_iter - simple wrapper for ntfs_file_write_iter_nolock() * @iocb: IO state structure * @from: iov_iter with data to write * * Basically the same as generic_file_write_iter() except that it ends up - * calling ntfs_file_write_iter_nolock() instead of - * __generic_file_write_iter(). + * up calling ntfs_perform_write() instead of generic_perform_write() and that + * O_DIRECT is not implemented. */ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *vi = file_inode(file); - ssize_t ret; + ssize_t written = 0; + ssize_t err; mutex_lock(&vi->i_mutex); - ret = ntfs_file_write_iter_nolock(iocb, from); + /* We can write back this queue in page reclaim. */ + current->backing_dev_info = inode_to_bdi(vi); + err = ntfs_prepare_file_for_write(iocb, from); + if (iov_iter_count(from) && !err) + written = ntfs_perform_write(file, from, iocb->ki_pos); + current->backing_dev_info = NULL; mutex_unlock(&vi->i_mutex); - if (ret > 0) { - ssize_t err; - - err = generic_write_sync(file, iocb->ki_pos - ret, ret); + if (likely(written > 0)) { + err = generic_write_sync(file, iocb->ki_pos, written); if (err < 0) - ret = err; + written = 0; } - return ret; + iocb->ki_pos += written; + return written ? written : err; } /** From e9d1593d4e9311bca040ecf6ec7599e6f235140c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 6 Apr 2015 22:44:11 -0400 Subject: [PATCH 08/30] cifs: fold cifs_iovec_write() into the only caller Signed-off-by: Al Viro --- fs/cifs/file.c | 47 ++++++++++++++++------------------------------- 1 file changed, 16 insertions(+), 31 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 72394c5abd0fc..3cb04129ddb17 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2560,9 +2560,9 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, return rc; } -static ssize_t -cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset) +ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) { + struct file *file = iocb->ki_filp; size_t len; ssize_t total_written = 0; struct cifsFileInfo *open_file; @@ -2573,8 +2573,14 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset) struct iov_iter saved_from; int rc; + /* + * BB - optimize the way when signing is disabled. We can drop this + * extra memory-to-memory copying and use iovec buffers for constructing + * write request. + */ + len = iov_iter_count(from); - rc = generic_write_checks(file, poffset, &len, 0); + rc = generic_write_checks(file, &iocb->ki_pos, &len, 0); if (rc) return rc; @@ -2593,7 +2599,7 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset) memcpy(&saved_from, from, sizeof(struct iov_iter)); - rc = cifs_write_from_iter(*poffset, len, from, open_file, cifs_sb, + rc = cifs_write_from_iter(iocb->ki_pos, len, from, open_file, cifs_sb, &wdata_list); /* @@ -2633,7 +2639,7 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset) memcpy(&tmp_from, &saved_from, sizeof(struct iov_iter)); iov_iter_advance(&tmp_from, - wdata->offset - *poffset); + wdata->offset - iocb->ki_pos); rc = cifs_write_from_iter(wdata->offset, wdata->bytes, &tmp_from, @@ -2650,34 +2656,13 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset) kref_put(&wdata->refcount, cifs_uncached_writedata_release); } - if (total_written > 0) - *poffset += total_written; + if (unlikely(!total_written)) + return rc; + iocb->ki_pos += total_written; + set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags); cifs_stats_bytes_written(tcon, total_written); - return total_written ? total_written : (ssize_t)rc; -} - -ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) -{ - ssize_t written; - struct inode *inode; - loff_t pos = iocb->ki_pos; - - inode = file_inode(iocb->ki_filp); - - /* - * BB - optimize the way when signing is disabled. We can drop this - * extra memory-to-memory copying and use iovec buffers for constructing - * write request. - */ - - written = cifs_iovec_write(iocb->ki_filp, from, &pos); - if (written > 0) { - set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(inode)->flags); - iocb->ki_pos = pos; - } - - return written; + return total_written; } static ssize_t From 0b8def9d6dfa6b2a9a2740cf81d8d2c134688d39 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Apr 2015 10:22:53 -0400 Subject: [PATCH 09/30] __generic_file_write_iter: keep ->ki_pos and return value consistent A side effect worth noting: in O_APPEND case we set ->ki_pos early, so if it turns out to be an error or a zero-length write, we'll end up with ->ki_pos modified. Safe, since all callers never look at the ->ki_pos after the call of __generic_file_write_iter() returning non-positive, all the way to caller of ->write_iter() and those discard ->ki_pos when getting that. Signed-off-by: Al Viro --- mm/filemap.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 9920db455f056..353f82e09e63f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2557,7 +2557,6 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct file *file = iocb->ki_filp; struct address_space * mapping = file->f_mapping; struct inode *inode = mapping->host; - loff_t pos = iocb->ki_pos; ssize_t written = 0; ssize_t err; ssize_t status; @@ -2565,7 +2564,7 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) /* We can write back this queue in page reclaim */ current->backing_dev_info = inode_to_bdi(inode); - err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); + err = generic_write_checks(file, &iocb->ki_pos, &count, S_ISBLK(inode->i_mode)); if (err) goto out; @@ -2583,9 +2582,9 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) goto out; if (io_is_direct(file)) { - loff_t endbyte; + loff_t pos, endbyte; - written = generic_file_direct_write(iocb, from, pos); + written = generic_file_direct_write(iocb, from, iocb->ki_pos); /* * If the write stopped short of completing, fall back to * buffered writes. Some filesystems do this for writes to @@ -2593,13 +2592,10 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) * not succeed (even if it did, DAX does not handle dirty * page-cache pages correctly). */ - if (written < 0 || written == count || IS_DAX(inode)) + if (written < 0 || !iov_iter_count(from) || IS_DAX(inode)) goto out; - pos += written; - count -= written; - - status = generic_perform_write(file, from, pos); + status = generic_perform_write(file, from, pos = iocb->ki_pos); /* * If generic_perform_write() returned a synchronous error * then we want to return the number of bytes which were @@ -2611,15 +2607,15 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) err = status; goto out; } - iocb->ki_pos = pos + status; /* * We need to ensure that the page cache pages are written to * disk and invalidated to preserve the expected O_DIRECT * semantics. */ endbyte = pos + status - 1; - err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte); + err = filemap_write_and_wait_range(mapping, pos, endbyte); if (err == 0) { + iocb->ki_pos = endbyte + 1; written += status; invalidate_mapping_pages(mapping, pos >> PAGE_CACHE_SHIFT, @@ -2631,9 +2627,9 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) */ } } else { - written = generic_perform_write(file, from, pos); - if (likely(written >= 0)) - iocb->ki_pos = pos + written; + written = generic_perform_write(file, from, iocb->ki_pos); + if (likely(written > 0)) + iocb->ki_pos += written; } out: current->backing_dev_info = NULL; From 5f380c7fa7e01f15ca0816bd241ece9a64a73192 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Apr 2015 11:28:12 -0400 Subject: [PATCH 10/30] lift generic_write_checks() into callers of __generic_file_write_iter() Signed-off-by: Al Viro --- fs/block_dev.c | 10 ++++++++++ fs/cifs/file.c | 39 +++++++++++++++++++++++---------------- fs/ext4/file.c | 14 +++++++++++--- fs/udf/file.c | 10 ++++++++++ mm/filemap.c | 17 ++++++----------- 5 files changed, 60 insertions(+), 30 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 6e3de63c30556..bcd7f97beab9d 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1597,6 +1597,16 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) struct file *file = iocb->ki_filp; struct blk_plug plug; ssize_t ret; + size_t count = iov_iter_count(from); + + ret = generic_write_checks(file, &iocb->ki_pos, &count, 1); + if (ret) + return ret; + + if (count == 0) + return 0; + + iov_iter_truncate(from, count); blk_start_plug(&plug); ret = __generic_file_write_iter(iocb, from); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 3cb04129ddb17..3c5c9bc5cbafd 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2673,8 +2673,8 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file->f_mapping->host; struct cifsInodeInfo *cinode = CIFS_I(inode); struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; - ssize_t rc = -EACCES; - loff_t lock_pos = iocb->ki_pos; + ssize_t rc; + size_t count; /* * We need to hold the sem to be sure nobody modifies lock list @@ -2682,23 +2682,30 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from) */ down_read(&cinode->lock_sem); mutex_lock(&inode->i_mutex); - if (file->f_flags & O_APPEND) - lock_pos = i_size_read(inode); - if (!cifs_find_lock_conflict(cfile, lock_pos, iov_iter_count(from), + + count = iov_iter_count(from); + rc = generic_write_checks(file, &iocb->ki_pos, &count, 0); + if (rc) + goto out; + + if (count == 0) + goto out; + + iov_iter_truncate(from, count); + + if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), server->vals->exclusive_lock_type, NULL, - CIFS_WRITE_OP)) { + CIFS_WRITE_OP)) rc = __generic_file_write_iter(iocb, from); - mutex_unlock(&inode->i_mutex); - - if (rc > 0) { - ssize_t err; + else + rc = -EACCES; +out: + mutex_unlock(&inode->i_mutex); - err = generic_write_sync(file, iocb->ki_pos - rc, rc); - if (err < 0) - rc = err; - } - } else { - mutex_unlock(&inode->i_mutex); + if (rc > 0) { + ssize_t err = generic_write_sync(file, iocb->ki_pos - rc, rc); + if (err < 0) + rc = err; } up_read(&cinode->lock_sem); return rc; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 9ad03036d9f56..f7cca423ddedb 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -132,9 +132,8 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = -EFBIG; goto errout; } - - if (pos + length > sbi->s_bitmap_maxbytes) - iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos); + iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos); + length = iov_iter_count(from); } iocb->private = &overwrite; @@ -172,7 +171,16 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) } } + ret = generic_write_checks(file, &iocb->ki_pos, &length, 0); + if (ret) + goto out; + + if (length == 0) + goto out; + + iov_iter_truncate(from, length); ret = __generic_file_write_iter(iocb, from); +out: mutex_unlock(&inode->i_mutex); if (ret > 0) { diff --git a/fs/udf/file.c b/fs/udf/file.c index 78d42548b2608..35e81ed994054 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -151,7 +151,17 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from) } else up_write(&iinfo->i_data_sem); + retval = generic_write_checks(file, &iocb->ki_pos, &count, 0); + if (retval) + goto out; + + if (count == 0) + goto out; + + iov_iter_truncate(from, count); + retval = __generic_file_write_iter(iocb, from); +out: mutex_unlock(&inode->i_mutex); if (retval > 0) { diff --git a/mm/filemap.c b/mm/filemap.c index 353f82e09e63f..a794a7f987435 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2560,19 +2560,9 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ssize_t written = 0; ssize_t err; ssize_t status; - size_t count = iov_iter_count(from); /* We can write back this queue in page reclaim */ current->backing_dev_info = inode_to_bdi(inode); - err = generic_write_checks(file, &iocb->ki_pos, &count, S_ISBLK(inode->i_mode)); - if (err) - goto out; - - if (count == 0) - goto out; - - iov_iter_truncate(from, count); - err = file_remove_suid(file); if (err) goto out; @@ -2651,9 +2641,14 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; ssize_t ret; + size_t count = iov_iter_count(from); mutex_lock(&inode->i_mutex); - ret = __generic_file_write_iter(iocb, from); + ret = generic_write_checks(file, &iocb->ki_pos, &count, 0); + if (!ret && count) { + iov_iter_truncate(from, count); + ret = __generic_file_write_iter(iocb, from); + } mutex_unlock(&inode->i_mutex); if (ret > 0) { From 7ec7b94a3339756dfbb88234e3e45a428e8c08fb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Apr 2015 11:35:14 -0400 Subject: [PATCH 11/30] blkdev_write_iter: expand generic_file_checks() call in there Signed-off-by: Al Viro --- fs/block_dev.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index bcd7f97beab9d..897ee0503932f 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1595,18 +1595,21 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; + struct inode *bd_inode = file->f_mapping->host; + loff_t size = i_size_read(bd_inode); struct blk_plug plug; ssize_t ret; - size_t count = iov_iter_count(from); - ret = generic_write_checks(file, &iocb->ki_pos, &count, 1); - if (ret) - return ret; + if (bdev_read_only(I_BDEV(bd_inode))) + return -EPERM; - if (count == 0) + if (!iov_iter_count(from)) return 0; - iov_iter_truncate(from, count); + if (iocb->ki_pos >= size) + return -ENOSPC; + + iov_iter_truncate(from, size - iocb->ki_pos); blk_start_plug(&plug); ret = __generic_file_write_iter(iocb, from); From 0fa6b005afdb3152ce85df963302e59b61115f9b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 4 Apr 2015 04:05:48 -0400 Subject: [PATCH 12/30] generic_write_checks(): drop isblk argument all remaining callers are passing 0; some just obscure that fact. Signed-off-by: Al Viro --- fs/9p/vfs_file.c | 2 +- fs/btrfs/file.c | 2 +- fs/ceph/file.c | 2 +- fs/cifs/file.c | 4 +-- fs/ext4/file.c | 2 +- fs/fuse/file.c | 6 ++--- fs/ncpfs/file.c | 2 +- fs/nfs/direct.c | 2 +- fs/ntfs/file.c | 2 +- fs/ocfs2/file.c | 3 +-- fs/udf/file.c | 2 +- fs/xfs/xfs_file.c | 2 +- include/linux/fs.h | 2 +- mm/filemap.c | 63 +++++++++++++++------------------------------- 14 files changed, 36 insertions(+), 60 deletions(-) diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index d7fcb775311e5..b5b020ace1b3c 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -409,7 +409,7 @@ v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) size_t count = iov_iter_count(from); int err = 0; - retval = generic_write_checks(file, &origin, &count, 0); + retval = generic_write_checks(file, &origin, &count); if (retval) return retval; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index cdc801c851057..691a84a81e091 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1747,7 +1747,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, mutex_lock(&inode->i_mutex); current->backing_dev_info = inode_to_bdi(inode); - err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); + err = generic_write_checks(file, &pos, &count); if (err) { mutex_unlock(&inode->i_mutex); goto out; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 56237ea5fc227..7618419031602 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -953,7 +953,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) /* We can write back this queue in page reclaim */ current->backing_dev_info = inode_to_bdi(inode); - err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); + err = generic_write_checks(file, &pos, &count); if (err) goto out; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 3c5c9bc5cbafd..4202e74b2db5c 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2580,7 +2580,7 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) */ len = iov_iter_count(from); - rc = generic_write_checks(file, &iocb->ki_pos, &len, 0); + rc = generic_write_checks(file, &iocb->ki_pos, &len); if (rc) return rc; @@ -2684,7 +2684,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from) mutex_lock(&inode->i_mutex); count = iov_iter_count(from); - rc = generic_write_checks(file, &iocb->ki_pos, &count, 0); + rc = generic_write_checks(file, &iocb->ki_pos, &count); if (rc) goto out; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index f7cca423ddedb..1f0afc181b7b4 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -171,7 +171,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) } } - ret = generic_write_checks(file, &iocb->ki_pos, &length, 0); + ret = generic_write_checks(file, &iocb->ki_pos, &length); if (ret) goto out; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 3d355e9469914..4c04a8144a755 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1167,7 +1167,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) /* We can write back this queue in page reclaim */ current->backing_dev_info = inode_to_bdi(inode); - err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); + err = generic_write_checks(file, &pos, &count); if (err) goto out; @@ -1420,7 +1420,7 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) /* Don't allow parallel writes to the same file */ mutex_lock(&inode->i_mutex); - res = generic_write_checks(file, &iocb->ki_pos, &count, 0); + res = generic_write_checks(file, &iocb->ki_pos, &count); if (!res) { iov_iter_truncate(from, count); res = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE); @@ -2841,7 +2841,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) io->done = &wait; if (iov_iter_rw(iter) == WRITE) { - ret = generic_write_checks(file, &pos, &count, 0); + ret = generic_write_checks(file, &pos, &count); if (!ret) { iov_iter_truncate(iter, count); ret = fuse_direct_io(io, iter, &pos, FUSE_DIO_WRITE); diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 479bf8db264e0..ab6363b165565 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -177,7 +177,7 @@ ncp_file_write_iter(struct kiocb *iocb, struct iov_iter *from) void *bouncebuffer; ncp_dbg(1, "enter %pD2\n", file); - errno = generic_write_checks(file, &pos, &count, 0); + errno = generic_write_checks(file, &pos, &count); if (errno) return errno; iov_iter_truncate(from, count); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 06503bc604e18..5ddd77acb3f74 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -977,7 +977,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", file, count, (long long) pos); - result = generic_write_checks(file, &pos, &count, 0); + result = generic_write_checks(file, &pos, &count); if (result) goto out; diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 77087d5ad4582..cec4ec3c1ede2 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -345,7 +345,7 @@ static ssize_t ntfs_prepare_file_for_write(struct kiocb *iocb, "0x%llx, count 0x%zx.", vi->i_ino, (unsigned)le32_to_cpu(ni->type), (unsigned long long)iocb->ki_pos, count); - err = generic_write_checks(file, &iocb->ki_pos, &count, S_ISBLK(vi->i_mode)); + err = generic_write_checks(file, &iocb->ki_pos, &count); if (unlikely(err)) goto out; iov_iter_truncate(from, count); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 0a6ec7e6efd81..1c11314946cb4 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2374,8 +2374,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, /* communicate with ocfs2_dio_end_io */ ocfs2_iocb_set_rw_locked(iocb, rw_level); - ret = generic_write_checks(file, ppos, &count, - S_ISBLK(inode->i_mode)); + ret = generic_write_checks(file, ppos, &count); if (ret) goto out_dio; diff --git a/fs/udf/file.c b/fs/udf/file.c index 35e81ed994054..6834509a7e5a7 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -151,7 +151,7 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from) } else up_write(&iinfo->i_data_sem); - retval = generic_write_checks(file, &iocb->ki_pos, &count, 0); + retval = generic_write_checks(file, &iocb->ki_pos, &count); if (retval) goto out; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 44856c3b9617c..43c0e6686c477 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -554,7 +554,7 @@ xfs_file_aio_write_checks( int error = 0; restart: - error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); + error = generic_write_checks(file, pos, count); if (error) return error; diff --git a/include/linux/fs.h b/include/linux/fs.h index 72e3759de8c38..492948ea4c9bd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2566,7 +2566,7 @@ extern int sb_min_blocksize(struct super_block *, int); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); -int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); +int generic_write_checks(struct file *file, loff_t *pos, size_t *count); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); diff --git a/mm/filemap.c b/mm/filemap.c index a794a7f987435..dfc573c6ec25d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2260,7 +2260,7 @@ EXPORT_SYMBOL(read_cache_page_gfp); * Returns appropriate error code that caller should return or * zero in case that write should be allowed. */ -inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk) +inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count) { struct inode *inode = file->f_mapping->host; unsigned long limit = rlimit(RLIMIT_FSIZE); @@ -2268,20 +2268,17 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i if (unlikely(*pos < 0)) return -EINVAL; - if (!isblk) { - /* FIXME: this is for backwards compatibility with 2.4 */ - if (file->f_flags & O_APPEND) - *pos = i_size_read(inode); + /* FIXME: this is for backwards compatibility with 2.4 */ + if (file->f_flags & O_APPEND) + *pos = i_size_read(inode); - if (limit != RLIM_INFINITY) { - if (*pos >= limit) { - send_sig(SIGXFSZ, current, 0); - return -EFBIG; - } - if (*count > limit - (typeof(limit))*pos) { - *count = limit - (typeof(limit))*pos; - } + if (limit != RLIM_INFINITY) { + if (*pos >= limit) { + send_sig(SIGXFSZ, current, 0); + return -EFBIG; } + if (*count > limit - (typeof(limit))*pos) + *count = limit - (typeof(limit))*pos; } /* @@ -2289,12 +2286,10 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i */ if (unlikely(*pos + *count > MAX_NON_LFS && !(file->f_flags & O_LARGEFILE))) { - if (*pos >= MAX_NON_LFS) { + if (*pos >= MAX_NON_LFS) return -EFBIG; - } - if (*count > MAX_NON_LFS - (unsigned long)*pos) { + if (*count > MAX_NON_LFS - (unsigned long)*pos) *count = MAX_NON_LFS - (unsigned long)*pos; - } } /* @@ -2304,33 +2299,15 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i * exceeded without writing data we send a signal and return EFBIG. * Linus frestrict idea will clean these up nicely.. */ - if (likely(!isblk)) { - if (unlikely(*pos >= inode->i_sb->s_maxbytes)) { - if (*count || *pos > inode->i_sb->s_maxbytes) { - return -EFBIG; - } - /* zero-length writes at ->s_maxbytes are OK */ - } - - if (unlikely(*pos + *count > inode->i_sb->s_maxbytes)) - *count = inode->i_sb->s_maxbytes - *pos; - } else { -#ifdef CONFIG_BLOCK - loff_t isize; - if (bdev_read_only(I_BDEV(inode))) - return -EPERM; - isize = i_size_read(inode); - if (*pos >= isize) { - if (*count || *pos > isize) - return -ENOSPC; + if (unlikely(*pos >= inode->i_sb->s_maxbytes)) { + if (*count || *pos > inode->i_sb->s_maxbytes) { + return -EFBIG; } - - if (*pos + *count > isize) - *count = isize - *pos; -#else - return -EPERM; -#endif + /* zero-length writes at ->s_maxbytes are OK */ } + + if (unlikely(*pos + *count > inode->i_sb->s_maxbytes)) + *count = inode->i_sb->s_maxbytes - *pos; return 0; } EXPORT_SYMBOL(generic_write_checks); @@ -2644,7 +2621,7 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) size_t count = iov_iter_count(from); mutex_lock(&inode->i_mutex); - ret = generic_write_checks(file, &iocb->ki_pos, &count, 0); + ret = generic_write_checks(file, &iocb->ki_pos, &count); if (!ret && count) { iov_iter_truncate(from, count); ret = __generic_file_write_iter(iocb, from); From 99733fa372eaaa59cfb93fd383cee7b0ff056e1d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Apr 2015 14:25:18 -0400 Subject: [PATCH 13/30] xfs_file_aio_write_checks: switch to iocb/iov_iter Signed-off-by: Al Viro --- fs/xfs/xfs_file.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 43c0e6686c477..ebde43e15dd94 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -544,17 +544,18 @@ xfs_zero_eof( */ STATIC ssize_t xfs_file_aio_write_checks( - struct file *file, - loff_t *pos, - size_t *count, + struct kiocb *iocb, + struct iov_iter *from, int *iolock) { + struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); int error = 0; + size_t count = iov_iter_count(from); restart: - error = generic_write_checks(file, pos, count); + error = generic_write_checks(file, &iocb->ki_pos, &count); if (error) return error; @@ -569,7 +570,7 @@ xfs_file_aio_write_checks( * iolock shared, we need to update it to exclusive which implies * having to redo all checks before. */ - if (*pos > i_size_read(inode)) { + if (iocb->ki_pos > i_size_read(inode)) { bool zero = false; if (*iolock == XFS_IOLOCK_SHARED) { @@ -578,10 +579,11 @@ xfs_file_aio_write_checks( xfs_rw_ilock(ip, *iolock); goto restart; } - error = xfs_zero_eof(ip, *pos, i_size_read(inode), &zero); + error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero); if (error) return error; } + iov_iter_truncate(from, count); /* * Updating the timestamps will grab the ilock again from @@ -678,10 +680,11 @@ xfs_file_dio_aio_write( xfs_rw_ilock(ip, iolock); } - ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock); + ret = xfs_file_aio_write_checks(iocb, from, &iolock); if (ret) goto out; - iov_iter_truncate(from, count); + count = iov_iter_count(from); + pos = iocb->ki_pos; if (mapping->nrpages) { ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, @@ -734,24 +737,22 @@ xfs_file_buffered_aio_write( ssize_t ret; int enospc = 0; int iolock = XFS_IOLOCK_EXCL; - loff_t pos = iocb->ki_pos; - size_t count = iov_iter_count(from); xfs_rw_ilock(ip, iolock); - ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock); + ret = xfs_file_aio_write_checks(iocb, from, &iolock); if (ret) goto out; - iov_iter_truncate(from, count); /* We can write back this queue in page reclaim */ current->backing_dev_info = inode_to_bdi(inode); write_retry: - trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); - ret = generic_perform_write(file, from, pos); + trace_xfs_file_buffered_write(ip, iov_iter_count(from), + iocb->ki_pos, 0); + ret = generic_perform_write(file, from, iocb->ki_pos); if (likely(ret >= 0)) - iocb->ki_pos = pos + ret; + iocb->ki_pos += ret; /* * If we hit a space limit, try to free up some lingering preallocated From e768d7ff7b923a74a019d8782e6ee75dc1de12c1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Apr 2015 14:48:22 -0400 Subject: [PATCH 14/30] ext4_file_write_iter: move generic_write_checks() up simpler that way... Signed-off-by: Al Viro --- fs/ext4/file.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 1f0afc181b7b4..42b1fa33a17a5 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -99,7 +99,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) int overwrite = 0; size_t length = iov_iter_count(from); ssize_t ret; - loff_t pos = iocb->ki_pos; + loff_t pos; /* * Unaligned direct AIO must be serialized; see comment above @@ -109,15 +109,22 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && !is_sync_kiocb(iocb) && (file->f_flags & O_APPEND || - ext4_unaligned_aio(inode, from, pos))) { + ext4_unaligned_aio(inode, from, iocb->ki_pos))) { aio_mutex = ext4_aio_mutex(inode); mutex_lock(aio_mutex); ext4_unwritten_wait(inode); } mutex_lock(&inode->i_mutex); - if (file->f_flags & O_APPEND) - iocb->ki_pos = pos = i_size_read(inode); + ret = generic_write_checks(file, &iocb->ki_pos, &length); + if (ret) + goto out; + + if (length == 0) + goto out; + + iov_iter_truncate(from, length); + pos = iocb->ki_pos; /* * If we have encountered a bitmap-format file, the size limit @@ -126,18 +133,16 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - if ((pos > sbi->s_bitmap_maxbytes) || - (pos == sbi->s_bitmap_maxbytes && length > 0)) { - mutex_unlock(&inode->i_mutex); + if (pos >= sbi->s_bitmap_maxbytes) { ret = -EFBIG; - goto errout; + goto out; } iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos); - length = iov_iter_count(from); } iocb->private = &overwrite; if (o_direct) { + length = iov_iter_count(from); blk_start_plug(&plug); @@ -171,16 +176,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) } } - ret = generic_write_checks(file, &iocb->ki_pos, &length); - if (ret) - goto out; - - if (length == 0) - goto out; - - iov_iter_truncate(from, length); ret = __generic_file_write_iter(iocb, from); -out: mutex_unlock(&inode->i_mutex); if (ret > 0) { @@ -193,7 +189,12 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (o_direct) blk_finish_plug(&plug); -errout: + if (aio_mutex) + mutex_unlock(aio_mutex); + return ret; + +out: + mutex_unlock(&inode->i_mutex); if (aio_mutex) mutex_unlock(aio_mutex); return ret; From 6b775b18eecf60b8a44723e05f8eb6251b71a7a9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Apr 2015 15:06:19 -0400 Subject: [PATCH 15/30] fuse: ->direct_IO() doesn't need generic_write_checks() already done by caller. We used to call __fuse_direct_write(), which called generic_write_checks(); now the former got expanded, bringing the latter to the surface. It used to be called all along and calling it from there had been wrong all along... Signed-off-by: Al Viro --- fs/fuse/file.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 4c04a8144a755..8c15d0a077e83 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2806,8 +2806,8 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) if (async_dio && iov_iter_rw(iter) != WRITE && offset + count > i_size) { if (offset >= i_size) return 0; - count = min_t(loff_t, count, fuse_round_up(i_size - offset)); - iov_iter_truncate(iter, count); + iov_iter_truncate(iter, fuse_round_up(i_size - offset)); + count = iov_iter_count(iter); } io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL); @@ -2841,12 +2841,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) io->done = &wait; if (iov_iter_rw(iter) == WRITE) { - ret = generic_write_checks(file, &pos, &count); - if (!ret) { - iov_iter_truncate(iter, count); - ret = fuse_direct_io(io, iter, &pos, FUSE_DIO_WRITE); - } - + ret = fuse_direct_io(io, iter, &pos, FUSE_DIO_WRITE); fuse_invalidate_attr(inode); } else { ret = __fuse_direct_read(io, iter, &pos); From 165f1a6e300d5a1ffb57cf9a9c8762de731228f2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Apr 2015 15:26:36 -0400 Subject: [PATCH 16/30] udf_file_write_iter: reorder and simplify it's easier to do generic_write_checks() first Signed-off-by: Al Viro --- fs/udf/file.c | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/fs/udf/file.c b/fs/udf/file.c index 6834509a7e5a7..ccab8b78e3633 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -120,21 +120,27 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ssize_t retval; struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); - int err, pos; size_t count = iov_iter_count(from); struct udf_inode_info *iinfo = UDF_I(inode); + int err; mutex_lock(&inode->i_mutex); + + retval = generic_write_checks(file, &iocb->ki_pos, &count); + if (retval) + goto out; + + if (count == 0) + goto out; + + iov_iter_truncate(from, count); + down_write(&iinfo->i_data_sem); if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { - if (file->f_flags & O_APPEND) - pos = inode->i_size; - else - pos = iocb->ki_pos; + loff_t end = iocb->ki_pos + iov_iter_count(from); if (inode->i_sb->s_blocksize < - (udf_file_entry_alloc_offset(inode) + - pos + count)) { + (udf_file_entry_alloc_offset(inode) + end)) { err = udf_expand_file_adinicb(inode); if (err) { mutex_unlock(&inode->i_mutex); @@ -142,24 +148,12 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from) return err; } } else { - if (pos + count > inode->i_size) - iinfo->i_lenAlloc = pos + count; - else - iinfo->i_lenAlloc = inode->i_size; + iinfo->i_lenAlloc = max(end, inode->i_size); up_write(&iinfo->i_data_sem); } } else up_write(&iinfo->i_data_sem); - retval = generic_write_checks(file, &iocb->ki_pos, &count); - if (retval) - goto out; - - if (count == 0) - goto out; - - iov_iter_truncate(from, count); - retval = __generic_file_write_iter(iocb, from); out: mutex_unlock(&inode->i_mutex); From 5dc3161cb63265adca0c34fac79512af59b776a4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 Apr 2015 07:25:03 -0400 Subject: [PATCH 17/30] ocfs2_file_write_iter: stop messing with ppos it's &iocb->ki_pos; no need to obfuscate. Signed-off-by: Al Viro --- fs/ocfs2/file.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8096fb6c081b9..78e245df5e326 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2264,7 +2264,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, int can_do_direct, has_refcount = 0; ssize_t written = 0; size_t count = iov_iter_count(from); - loff_t old_size, *ppos = &iocb->ki_pos; + loff_t old_size; u32 old_clusters; struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); @@ -2330,7 +2330,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, } can_do_direct = direct_io; - ret = ocfs2_prepare_inode_for_write(file, ppos, count, appending, + ret = ocfs2_prepare_inode_for_write(file, &iocb->ki_pos, count, appending, &can_do_direct, &has_refcount); if (ret < 0) { mlog_errno(ret); @@ -2338,7 +2338,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, } if (direct_io && !is_sync_kiocb(iocb)) - unaligned_dio = ocfs2_is_io_unaligned(inode, count, *ppos); + unaligned_dio = ocfs2_is_io_unaligned(inode, count, iocb->ki_pos); /* * We can't complete the direct I/O as requested, fall back to @@ -2374,7 +2374,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, /* communicate with ocfs2_dio_end_io */ ocfs2_iocb_set_rw_locked(iocb, rw_level); - ret = generic_write_checks(file, ppos, &count); + ret = generic_write_checks(file, &iocb->ki_pos, &count); if (ret) goto out_dio; @@ -2382,7 +2382,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, if (direct_io) { loff_t endbyte; ssize_t written_buffered; - written = generic_file_direct_write(iocb, from, *ppos); + written = generic_file_direct_write(iocb, from, iocb->ki_pos); if (written < 0 || written == count) { ret = written; goto out_dio; @@ -2392,7 +2392,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, * for completing the rest of the request. */ count -= written; - written_buffered = generic_perform_write(file, from, *ppos); + written_buffered = generic_perform_write(file, from, iocb->ki_pos); /* * If generic_file_buffered_write() returned a synchronous error * then we want to return the number of bytes which were @@ -2409,14 +2409,14 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, * disk and invalidated to preserve the expected O_DIRECT * semantics. */ - endbyte = *ppos + written_buffered - 1; - ret = filemap_write_and_wait_range(file->f_mapping, *ppos, + endbyte = iocb->ki_pos + written_buffered - 1; + ret = filemap_write_and_wait_range(file->f_mapping, iocb->ki_pos, endbyte); if (ret == 0) { - iocb->ki_pos = *ppos + written_buffered; + iocb->ki_pos += written_buffered; written += written_buffered; invalidate_mapping_pages(mapping, - *ppos >> PAGE_CACHE_SHIFT, + iocb->ki_pos >> PAGE_CACHE_SHIFT, endbyte >> PAGE_CACHE_SHIFT); } else { /* @@ -2426,9 +2426,9 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, } } else { current->backing_dev_info = inode_to_bdi(inode); - written = generic_perform_write(file, from, *ppos); + written = generic_perform_write(file, from, iocb->ki_pos); if (likely(written >= 0)) - iocb->ki_pos = *ppos + written; + iocb->ki_pos = iocb->ki_pos + written; current->backing_dev_info = NULL; } From 90320251db0fe3d05f2b10686ec936c7d6ecd99a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 Apr 2015 11:14:45 -0400 Subject: [PATCH 18/30] ocfs2: move generic_write_checks() before the alignment checks Alignment checks for dio depend upon the range truncation done by generic_write_checks(). They can be done as soon as we got ocfs2_rw_lock() and that actually makes ocfs2_prepare_inode_for_write() simpler. The only thing to watch out for is restoring the original count in "unlock and redo without dio" case. Position doesn't need to be restored, since we change it only in O_APPEND case and in that case it will be reassigned anyway. Signed-off-by: Al Viro --- fs/ocfs2/file.c | 42 ++++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 78e245df5e326..fc53ff0653640 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2106,7 +2106,7 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode, } static int ocfs2_prepare_inode_for_write(struct file *file, - loff_t *ppos, + loff_t pos, size_t count, int appending, int *direct_io, @@ -2115,7 +2115,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file, int ret = 0, meta_level = 0; struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; - loff_t saved_pos = 0, end; + loff_t end; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); int full_coherency = !(osb->s_mount_opt & OCFS2_MOUNT_COHERENCY_BUFFERED); @@ -2155,23 +2155,16 @@ static int ocfs2_prepare_inode_for_write(struct file *file, } } - /* work on a copy of ppos until we're sure that we won't have - * to recalculate it due to relocking. */ - if (appending) - saved_pos = i_size_read(inode); - else - saved_pos = *ppos; - - end = saved_pos + count; + end = pos + count; - ret = ocfs2_check_range_for_refcount(inode, saved_pos, count); + ret = ocfs2_check_range_for_refcount(inode, pos, count); if (ret == 1) { ocfs2_inode_unlock(inode, meta_level); meta_level = -1; ret = ocfs2_prepare_inode_for_refcount(inode, file, - saved_pos, + pos, count, &meta_level); if (has_refcount) @@ -2227,7 +2220,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file, * caller will have to retake some cluster * locks and initiate the io as buffered. */ - ret = ocfs2_check_range_for_holes(inode, saved_pos, count); + ret = ocfs2_check_range_for_holes(inode, pos, count); if (ret == 1) { /* * Fallback to old way if the feature bit is not set. @@ -2242,12 +2235,9 @@ static int ocfs2_prepare_inode_for_write(struct file *file, break; } - if (appending) - *ppos = saved_pos; - out_unlock: trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno, - saved_pos, appending, count, + pos, appending, count, direct_io, has_refcount); if (meta_level >= 0) @@ -2263,7 +2253,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, int ret, direct_io, appending, rw_level, have_alloc_sem = 0; int can_do_direct, has_refcount = 0; ssize_t written = 0; - size_t count = iov_iter_count(from); + size_t count = iov_iter_count(from), orig_count; loff_t old_size; u32 old_clusters; struct file *file = iocb->ki_filp; @@ -2329,8 +2319,16 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, ocfs2_inode_unlock(inode, 1); } + orig_count = count; + ret = generic_write_checks(file, &iocb->ki_pos, &count); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + iov_iter_truncate(from, count); + can_do_direct = direct_io; - ret = ocfs2_prepare_inode_for_write(file, &iocb->ki_pos, count, appending, + ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count, appending, &can_do_direct, &has_refcount); if (ret < 0) { mlog_errno(ret); @@ -2351,6 +2349,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, rw_level = -1; direct_io = 0; + iov_iter_reexpand(from, count = orig_count); goto relock; } @@ -2374,11 +2373,6 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, /* communicate with ocfs2_dio_end_io */ ocfs2_iocb_set_rw_locked(iocb, rw_level); - ret = generic_write_checks(file, &iocb->ki_pos, &count); - if (ret) - goto out_dio; - - iov_iter_truncate(from, count); if (direct_io) { loff_t endbyte; ssize_t written_buffered; From 3309dd04cbcd2cdad168485af5cf3576b5051e49 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 Apr 2015 12:55:47 -0400 Subject: [PATCH 19/30] switch generic_write_checks() to iocb and iter ... returning -E... upon error and amount of data left in iter after (possible) truncation upon success. Note, that normal case gives a non-zero (positive) return value, so any tests for != 0 _must_ be updated. Signed-off-by: Al Viro Conflicts: fs/ext4/file.c --- fs/9p/vfs_file.c | 26 ++++++++++---------------- fs/btrfs/file.c | 24 +++++++++--------------- fs/ceph/file.c | 14 ++++++-------- fs/cifs/file.c | 26 ++++++-------------------- fs/ext4/file.c | 20 ++++++-------------- fs/fuse/file.c | 22 +++++++--------------- fs/ncpfs/file.c | 14 +++++--------- fs/nfs/direct.c | 25 ++++++++++--------------- fs/ntfs/file.c | 11 ++++------- fs/ocfs2/file.c | 19 ++++++++++--------- fs/udf/file.c | 10 ++-------- fs/xfs/xfs_file.c | 8 ++++---- include/linux/fs.h | 2 +- mm/filemap.c | 44 +++++++++++++++++++------------------------- 14 files changed, 99 insertions(+), 166 deletions(-) diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index b5b020ace1b3c..2a9dd37dc426d 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -404,21 +404,16 @@ static ssize_t v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; - ssize_t retval = 0; - loff_t origin = iocb->ki_pos; - size_t count = iov_iter_count(from); + ssize_t retval; + loff_t origin; int err = 0; - retval = generic_write_checks(file, &origin, &count); - if (retval) + retval = generic_write_checks(iocb, from); + if (retval <= 0) return retval; - iov_iter_truncate(from, count); - - if (!count) - return 0; - - retval = p9_client_write(file->private_data, origin, from, &err); + origin = iocb->ki_pos; + retval = p9_client_write(file->private_data, iocb->ki_pos, from, &err); if (retval > 0) { struct inode *inode = file_inode(file); loff_t i_size; @@ -428,12 +423,11 @@ v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (inode->i_mapping && inode->i_mapping->nrpages) invalidate_inode_pages2_range(inode->i_mapping, pg_start, pg_end); - origin += retval; + iocb->ki_pos += retval; i_size = i_size_read(inode); - iocb->ki_pos = origin; - if (origin > i_size) { - inode_add_bytes(inode, origin - i_size); - i_size_write(inode, origin); + if (iocb->ki_pos > i_size) { + inode_add_bytes(inode, iocb->ki_pos - i_size); + i_size_write(inode, iocb->ki_pos); } return retval; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 691a84a81e091..c64d11c41eeb5 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1739,27 +1739,19 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, u64 start_pos; u64 end_pos; ssize_t num_written = 0; - ssize_t err = 0; - size_t count = iov_iter_count(from); bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); - loff_t pos = iocb->ki_pos; + ssize_t err; + loff_t pos; + size_t count; mutex_lock(&inode->i_mutex); - - current->backing_dev_info = inode_to_bdi(inode); - err = generic_write_checks(file, &pos, &count); - if (err) { + err = generic_write_checks(iocb, from); + if (err <= 0) { mutex_unlock(&inode->i_mutex); - goto out; - } - - if (count == 0) { - mutex_unlock(&inode->i_mutex); - goto out; + return err; } - iov_iter_truncate(from, count); - + current->backing_dev_info = inode_to_bdi(inode); err = file_remove_suid(file); if (err) { mutex_unlock(&inode->i_mutex); @@ -1786,6 +1778,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, */ update_time_for_write(inode); + pos = iocb->ki_pos; + count = iov_iter_count(from); start_pos = round_down(pos, root->sectorsize); if (start_pos > i_size_read(inode)) { /* Expand hole size to cover write data, preventing empty gap */ diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 7618419031602..3f0b9339d8234 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -941,9 +941,9 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->client->osdc; - ssize_t count = iov_iter_count(from), written = 0; + ssize_t count, written = 0; int err, want, got; - loff_t pos = iocb->ki_pos; + loff_t pos; if (ceph_snap(inode) != CEPH_NOSNAP) return -EROFS; @@ -953,14 +953,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) /* We can write back this queue in page reclaim */ current->backing_dev_info = inode_to_bdi(inode); - err = generic_write_checks(file, &pos, &count); - if (err) - goto out; - - if (count == 0) + err = generic_write_checks(iocb, from); + if (err <= 0) goto out; - iov_iter_truncate(from, count); + pos = iocb->ki_pos; + count = iov_iter_count(from); err = file_remove_suid(file); if (err) goto out; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 4202e74b2db5c..ca2bc5406306e 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2563,7 +2563,6 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; - size_t len; ssize_t total_written = 0; struct cifsFileInfo *open_file; struct cifs_tcon *tcon; @@ -2579,16 +2578,10 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) * write request. */ - len = iov_iter_count(from); - rc = generic_write_checks(file, &iocb->ki_pos, &len); - if (rc) + rc = generic_write_checks(iocb, from); + if (rc <= 0) return rc; - if (!len) - return 0; - - iov_iter_truncate(from, len); - INIT_LIST_HEAD(&wdata_list); cifs_sb = CIFS_FILE_SB(file); open_file = file->private_data; @@ -2599,8 +2592,8 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) memcpy(&saved_from, from, sizeof(struct iov_iter)); - rc = cifs_write_from_iter(iocb->ki_pos, len, from, open_file, cifs_sb, - &wdata_list); + rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from, + open_file, cifs_sb, &wdata_list); /* * If at least one write was successfully sent, then discard any rc @@ -2674,7 +2667,6 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from) struct cifsInodeInfo *cinode = CIFS_I(inode); struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; ssize_t rc; - size_t count; /* * We need to hold the sem to be sure nobody modifies lock list @@ -2683,16 +2675,10 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from) down_read(&cinode->lock_sem); mutex_lock(&inode->i_mutex); - count = iov_iter_count(from); - rc = generic_write_checks(file, &iocb->ki_pos, &count); - if (rc) + rc = generic_write_checks(iocb, from); + if (rc <= 0) goto out; - if (count == 0) - goto out; - - iov_iter_truncate(from, count); - if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), server->vals->exclusive_lock_type, NULL, CIFS_WRITE_OP)) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 42b1fa33a17a5..c10785f10d1d9 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -97,9 +97,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct blk_plug plug; int o_direct = io_is_direct(file); int overwrite = 0; - size_t length = iov_iter_count(from); ssize_t ret; - loff_t pos; /* * Unaligned direct AIO must be serialized; see comment above @@ -116,16 +114,10 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) } mutex_lock(&inode->i_mutex); - ret = generic_write_checks(file, &iocb->ki_pos, &length); - if (ret) + ret = generic_write_checks(iocb, from); + if (ret <= 0) goto out; - if (length == 0) - goto out; - - iov_iter_truncate(from, length); - pos = iocb->ki_pos; - /* * If we have encountered a bitmap-format file, the size limit * is smaller than s_maxbytes, which is for extent-mapped files. @@ -133,19 +125,19 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - if (pos >= sbi->s_bitmap_maxbytes) { + if (iocb->ki_pos >= sbi->s_bitmap_maxbytes) { ret = -EFBIG; goto out; } - iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos); + iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos); } iocb->private = &overwrite; if (o_direct) { - length = iov_iter_count(from); + size_t length = iov_iter_count(from); + loff_t pos = iocb->ki_pos; blk_start_plug(&plug); - /* check whether we do a DIO overwrite or not */ if (ext4_should_dioread_nolock(inode) && !aio_mutex && !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) { diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 8c15d0a077e83..b86c8e08399a8 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1145,13 +1145,11 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; - size_t count = iov_iter_count(from); ssize_t written = 0; ssize_t written_buffered = 0; struct inode *inode = mapping->host; ssize_t err; loff_t endbyte = 0; - loff_t pos = iocb->ki_pos; if (get_fuse_conn(inode)->writeback_cache) { /* Update size (EOF optimization) and mode (SUID clearing) */ @@ -1167,14 +1165,10 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) /* We can write back this queue in page reclaim */ current->backing_dev_info = inode_to_bdi(inode); - err = generic_write_checks(file, &pos, &count); - if (err) - goto out; - - if (count == 0) + err = generic_write_checks(iocb, from); + if (err <= 0) goto out; - iov_iter_truncate(from, count); err = file_remove_suid(file); if (err) goto out; @@ -1184,6 +1178,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) goto out; if (file->f_flags & O_DIRECT) { + loff_t pos = iocb->ki_pos; written = generic_file_direct_write(iocb, from, pos); if (written < 0 || !iov_iter_count(from)) goto out; @@ -1209,9 +1204,9 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) written += written_buffered; iocb->ki_pos = pos + written_buffered; } else { - written = fuse_perform_write(file, mapping, from, pos); + written = fuse_perform_write(file, mapping, from, iocb->ki_pos); if (written >= 0) - iocb->ki_pos = pos + written; + iocb->ki_pos += written; } out: current->backing_dev_info = NULL; @@ -1412,7 +1407,6 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct fuse_io_priv io = { .async = 0, .file = file }; - size_t count = iov_iter_count(from); ssize_t res; if (is_bad_inode(inode)) @@ -1420,11 +1414,9 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) /* Don't allow parallel writes to the same file */ mutex_lock(&inode->i_mutex); - res = generic_write_checks(file, &iocb->ki_pos, &count); - if (!res) { - iov_iter_truncate(from, count); + res = generic_write_checks(iocb, from); + if (res > 0) res = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE); - } fuse_invalidate_attr(inode); if (res > 0) fuse_write_update_size(inode, iocb->ki_pos); diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index ab6363b165565..011324ce9df21 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -170,20 +170,15 @@ ncp_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); size_t already_written = 0; - loff_t pos = iocb->ki_pos; - size_t count = iov_iter_count(from); size_t bufsize; int errno; void *bouncebuffer; + off_t pos; ncp_dbg(1, "enter %pD2\n", file); - errno = generic_write_checks(file, &pos, &count); - if (errno) + errno = generic_write_checks(iocb, from); + if (errno <= 0) return errno; - iov_iter_truncate(from, count); - - if (!count) - return 0; errno = ncp_make_open(inode, O_WRONLY); if (errno) { @@ -201,10 +196,11 @@ ncp_file_write_iter(struct kiocb *iocb, struct iov_iter *from) errno = -EIO; /* -ENOMEM */ goto outrel; } + pos = iocb->ki_pos; while (iov_iter_count(from)) { int written_this_time; size_t to_write = min_t(size_t, - bufsize - ((off_t)pos % bufsize), + bufsize - (pos % bufsize), iov_iter_count(from)); if (copy_from_iter(bouncebuffer, to_write, from) != to_write) { diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 5ddd77acb3f74..9634189b85454 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -969,24 +969,19 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, struct nfs_direct_req *dreq; struct nfs_lock_context *l_ctx; loff_t end; - size_t count = iov_iter_count(iter); - end = (pos + count - 1) >> PAGE_CACHE_SHIFT; - - nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count); dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", - file, count, (long long) pos); + file, iov_iter_count(iter), (long long) iocb->ki_pos); - result = generic_write_checks(file, &pos, &count); - if (result) + result = generic_write_checks(iocb, iter); + if (result <= 0) goto out; - result = -EINVAL; - if ((ssize_t) count < 0) - goto out; - result = 0; - if (!count) - goto out; + nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, + iov_iter_count(iter)); + + pos = iocb->ki_pos; + end = (pos + iov_iter_count(iter) - 1) >> PAGE_CACHE_SHIFT; mutex_lock(&inode->i_mutex); @@ -1001,7 +996,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, goto out_unlock; } - task_io_account_write(count); + task_io_account_write(iov_iter_count(iter)); result = -ENOMEM; dreq = nfs_direct_req_alloc(); @@ -1009,7 +1004,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, goto out_unlock; dreq->inode = inode; - dreq->bytes_left = count; + dreq->bytes_left = iov_iter_count(iter); dreq->io_start = pos; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); l_ctx = nfs_get_lock_context(dreq->ctx); diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index cec4ec3c1ede2..7bb487e663b47 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -339,17 +339,14 @@ static ssize_t ntfs_prepare_file_for_write(struct kiocb *iocb, struct inode *vi = file_inode(file); ntfs_inode *base_ni, *ni = NTFS_I(vi); ntfs_volume *vol = ni->vol; - size_t count = iov_iter_count(from); ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos " "0x%llx, count 0x%zx.", vi->i_ino, (unsigned)le32_to_cpu(ni->type), - (unsigned long long)iocb->ki_pos, count); - err = generic_write_checks(file, &iocb->ki_pos, &count); - if (unlikely(err)) - goto out; - iov_iter_truncate(from, count); - if (count == 0) + (unsigned long long)iocb->ki_pos, + iov_iter_count(from)); + err = generic_write_checks(iocb, from); + if (unlikely(err <= 0)) goto out; /* * All checks have passed. Before we start doing any writing we want diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index fc53ff0653640..b93919f50f0ff 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2250,9 +2250,10 @@ static int ocfs2_prepare_inode_for_write(struct file *file, static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { - int ret, direct_io, appending, rw_level, have_alloc_sem = 0; + int direct_io, appending, rw_level, have_alloc_sem = 0; int can_do_direct, has_refcount = 0; ssize_t written = 0; + ssize_t ret; size_t count = iov_iter_count(from), orig_count; loff_t old_size; u32 old_clusters; @@ -2319,13 +2320,14 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, ocfs2_inode_unlock(inode, 1); } - orig_count = count; - ret = generic_write_checks(file, &iocb->ki_pos, &count); - if (ret < 0) { - mlog_errno(ret); + orig_count = iov_iter_count(from); + ret = generic_write_checks(iocb, from); + if (ret <= 0) { + if (ret) + mlog_errno(ret); goto out; } - iov_iter_truncate(from, count); + count = ret; can_do_direct = direct_io; ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count, appending, @@ -2349,7 +2351,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, rw_level = -1; direct_io = 0; - iov_iter_reexpand(from, count = orig_count); + iov_iter_reexpand(from, orig_count); goto relock; } @@ -2377,7 +2379,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, loff_t endbyte; ssize_t written_buffered; written = generic_file_direct_write(iocb, from, iocb->ki_pos); - if (written < 0 || written == count) { + if (written < 0 || !iov_iter_count(from)) { ret = written; goto out_dio; } @@ -2385,7 +2387,6 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, /* * for completing the rest of the request. */ - count -= written; written_buffered = generic_perform_write(file, from, iocb->ki_pos); /* * If generic_file_buffered_write() returned a synchronous error diff --git a/fs/udf/file.c b/fs/udf/file.c index ccab8b78e3633..3de2edafff737 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -120,21 +120,15 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ssize_t retval; struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); - size_t count = iov_iter_count(from); struct udf_inode_info *iinfo = UDF_I(inode); int err; mutex_lock(&inode->i_mutex); - retval = generic_write_checks(file, &iocb->ki_pos, &count); - if (retval) + retval = generic_write_checks(iocb, from); + if (retval <= 0) goto out; - if (count == 0) - goto out; - - iov_iter_truncate(from, count); - down_write(&iinfo->i_data_sem); if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { loff_t end = iocb->ki_pos + iov_iter_count(from); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index ebde43e15dd94..28d157807b42a 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -551,12 +551,12 @@ xfs_file_aio_write_checks( struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); - int error = 0; + ssize_t error = 0; size_t count = iov_iter_count(from); restart: - error = generic_write_checks(file, &iocb->ki_pos, &count); - if (error) + error = generic_write_checks(iocb, from); + if (error <= 0) return error; error = xfs_break_layouts(inode, iolock); @@ -577,13 +577,13 @@ xfs_file_aio_write_checks( xfs_rw_iunlock(ip, *iolock); *iolock = XFS_IOLOCK_EXCL; xfs_rw_ilock(ip, *iolock); + iov_iter_reexpand(from, count); goto restart; } error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero); if (error) return error; } - iov_iter_truncate(from, count); /* * Updating the timestamps will grab the ilock again from diff --git a/include/linux/fs.h b/include/linux/fs.h index c7b21db7782fd..b4aa400ac7232 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2566,7 +2566,7 @@ extern int sb_min_blocksize(struct super_block *, int); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); -int generic_write_checks(struct file *file, loff_t *pos, size_t *count); +extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); diff --git a/mm/filemap.c b/mm/filemap.c index dfc573c6ec25d..243997a26e7ce 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2260,36 +2260,38 @@ EXPORT_SYMBOL(read_cache_page_gfp); * Returns appropriate error code that caller should return or * zero in case that write should be allowed. */ -inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count) +inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from) { + struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; unsigned long limit = rlimit(RLIMIT_FSIZE); + loff_t pos; - if (unlikely(*pos < 0)) - return -EINVAL; + if (!iov_iter_count(from)) + return 0; /* FIXME: this is for backwards compatibility with 2.4 */ if (file->f_flags & O_APPEND) - *pos = i_size_read(inode); + iocb->ki_pos = i_size_read(inode); + + pos = iocb->ki_pos; if (limit != RLIM_INFINITY) { - if (*pos >= limit) { + if (iocb->ki_pos >= limit) { send_sig(SIGXFSZ, current, 0); return -EFBIG; } - if (*count > limit - (typeof(limit))*pos) - *count = limit - (typeof(limit))*pos; + iov_iter_truncate(from, limit - (unsigned long)pos); } /* * LFS rule */ - if (unlikely(*pos + *count > MAX_NON_LFS && + if (unlikely(pos + iov_iter_count(from) > MAX_NON_LFS && !(file->f_flags & O_LARGEFILE))) { - if (*pos >= MAX_NON_LFS) + if (pos >= MAX_NON_LFS) return -EFBIG; - if (*count > MAX_NON_LFS - (unsigned long)*pos) - *count = MAX_NON_LFS - (unsigned long)*pos; + iov_iter_truncate(from, MAX_NON_LFS - (unsigned long)pos); } /* @@ -2299,16 +2301,11 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count) * exceeded without writing data we send a signal and return EFBIG. * Linus frestrict idea will clean these up nicely.. */ - if (unlikely(*pos >= inode->i_sb->s_maxbytes)) { - if (*count || *pos > inode->i_sb->s_maxbytes) { - return -EFBIG; - } - /* zero-length writes at ->s_maxbytes are OK */ - } + if (unlikely(pos >= inode->i_sb->s_maxbytes)) + return -EFBIG; - if (unlikely(*pos + *count > inode->i_sb->s_maxbytes)) - *count = inode->i_sb->s_maxbytes - *pos; - return 0; + iov_iter_truncate(from, inode->i_sb->s_maxbytes - pos); + return iov_iter_count(from); } EXPORT_SYMBOL(generic_write_checks); @@ -2618,14 +2615,11 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; ssize_t ret; - size_t count = iov_iter_count(from); mutex_lock(&inode->i_mutex); - ret = generic_write_checks(file, &iocb->ki_pos, &count); - if (!ret && count) { - iov_iter_truncate(from, count); + ret = generic_write_checks(iocb, from); + if (ret > 0) ret = __generic_file_write_iter(iocb, from); - } mutex_unlock(&inode->i_mutex); if (ret > 0) { From 2ba48ce513c4e545318d22b138861d5876edf906 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 Apr 2015 13:52:01 -0400 Subject: [PATCH 20/30] mirror O_APPEND and O_DIRECT into iocb->ki_flags ... avoiding write_iter/fcntl races. Signed-off-by: Al Viro --- fs/aio.c | 2 +- fs/btrfs/file.c | 2 +- fs/ceph/file.c | 8 ++++---- fs/ext4/file.c | 4 ++-- fs/fuse/file.c | 2 +- fs/gfs2/file.c | 2 +- fs/nfs/file.c | 6 +++--- fs/ocfs2/file.c | 10 +++++----- fs/xfs/xfs_file.c | 4 ++-- include/linux/fs.h | 15 +++++++++++++++ mm/filemap.c | 6 +++--- 11 files changed, 38 insertions(+), 23 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 5785c4b58fea5..e976185c8e5b3 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1502,7 +1502,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, } req->common.ki_pos = iocb->aio_offset; req->common.ki_complete = aio_complete; - req->common.ki_flags = 0; + req->common.ki_flags = iocb_flags(req->common.ki_filp); if (iocb->aio_flags & IOCB_FLAG_RESFD) { /* diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c64d11c41eeb5..faa7d390841b9 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1794,7 +1794,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, if (sync) atomic_inc(&BTRFS_I(inode)->sync_writers); - if (file->f_flags & O_DIRECT) { + if (iocb->ki_flags & IOCB_DIRECT) { num_written = __btrfs_direct_write(iocb, from, pos); } else { num_written = __btrfs_buffered_write(file, from, pos); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 3f0b9339d8234..b9b8eb225f66e 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -457,7 +457,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, if (ret < 0) return ret; - if (file->f_flags & O_DIRECT) { + if (iocb->ki_flags & IOCB_DIRECT) { while (iov_iter_count(i)) { size_t start; ssize_t n; @@ -828,7 +828,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to) return ret; if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 || - (iocb->ki_filp->f_flags & O_DIRECT) || + (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) { dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n", @@ -995,12 +995,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) inode, ceph_vinop(inode), pos, count, ceph_cap_string(got)); if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || - (file->f_flags & O_DIRECT) || (fi->flags & CEPH_F_SYNC)) { + (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) { struct iov_iter data; mutex_unlock(&inode->i_mutex); /* we might need to revert back to that point */ data = *from; - if (file->f_flags & O_DIRECT) + if (iocb->ki_flags & IOCB_DIRECT) written = ceph_sync_direct_write(iocb, &data, pos); else written = ceph_sync_write(iocb, &data, pos); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index c10785f10d1d9..53bbc0b1995f5 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -95,7 +95,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file_inode(iocb->ki_filp); struct mutex *aio_mutex = NULL; struct blk_plug plug; - int o_direct = io_is_direct(file); + int o_direct = iocb->ki_flags & IOCB_DIRECT; int overwrite = 0; ssize_t ret; @@ -106,7 +106,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (o_direct && ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && !is_sync_kiocb(iocb) && - (file->f_flags & O_APPEND || + (iocb->ki_flags & IOCB_APPEND || ext4_unaligned_aio(inode, from, iocb->ki_pos))) { aio_mutex = ext4_aio_mutex(inode); mutex_lock(aio_mutex); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index b86c8e08399a8..5ef05b5c4cff8 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1177,7 +1177,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (err) goto out; - if (file->f_flags & O_DIRECT) { + if (iocb->ki_flags & IOCB_DIRECT) { loff_t pos = iocb->ki_pos; written = generic_file_direct_write(iocb, from, pos); if (written < 0 || !iov_iter_count(from)) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 614bb42cb7e1b..08329afa13396 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -709,7 +709,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from)); - if (file->f_flags & O_APPEND) { + if (iocb->ki_flags & IOCB_APPEND) { struct gfs2_holder gh; ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index f6a3adedf0270..14364dc001f7c 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -170,7 +170,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to) struct inode *inode = file_inode(iocb->ki_filp); ssize_t result; - if (iocb->ki_filp->f_flags & O_DIRECT) + if (iocb->ki_flags & IOCB_DIRECT) return nfs_file_direct_read(iocb, to, iocb->ki_pos); dprintk("NFS: read(%pD2, %zu@%lu)\n", @@ -680,7 +680,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) if (result) return result; - if (file->f_flags & O_DIRECT) + if (iocb->ki_flags & IOCB_DIRECT) return nfs_file_direct_write(iocb, from, pos); dprintk("NFS: write(%pD2, %zu@%Ld)\n", @@ -692,7 +692,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) /* * O_APPEND implies that we must revalidate the file length. */ - if (file->f_flags & O_APPEND) { + if (iocb->ki_flags & IOCB_APPEND) { result = nfs_revalidate_file_size(inode, file); if (result) goto out; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index b93919f50f0ff..cd37f6cd4d516 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2274,8 +2274,8 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, if (count == 0) return 0; - appending = file->f_flags & O_APPEND ? 1 : 0; - direct_io = file->f_flags & O_DIRECT ? 1 : 0; + appending = iocb->ki_flags & IOCB_APPEND ? 1 : 0; + direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0; mutex_lock(&inode->i_mutex); @@ -2429,7 +2429,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, out_dio: /* buffered aio wouldn't have proper lock coverage today */ - BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); + BUG_ON(ret == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT)); if (unlikely(written <= 0)) goto no_sync; @@ -2546,7 +2546,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, * buffered reads protect themselves in ->readpage(). O_DIRECT reads * need locks to protect pending reads from racing with truncate. */ - if (filp->f_flags & O_DIRECT) { + if (iocb->ki_flags & IOCB_DIRECT) { have_alloc_sem = 1; ocfs2_iocb_set_sem_locked(iocb); @@ -2580,7 +2580,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, trace_generic_file_aio_read_ret(ret); /* buffered aio wouldn't have proper lock coverage today */ - BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); + BUG_ON(ret == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT)); /* see ocfs2_file_write_iter */ if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 28d157807b42a..1f12ad0a8585b 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -279,7 +279,7 @@ xfs_file_read_iter( XFS_STATS_INC(xs_read_calls); - if (unlikely(file->f_flags & O_DIRECT)) + if (unlikely(iocb->ki_flags & IOCB_DIRECT)) ioflags |= XFS_IO_ISDIRECT; if (file->f_mode & FMODE_NOCMTIME) ioflags |= XFS_IO_INVIS; @@ -804,7 +804,7 @@ xfs_file_write_iter( if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; - if (unlikely(file->f_flags & O_DIRECT)) + if (unlikely(iocb->ki_flags & IOCB_DIRECT)) ret = xfs_file_dio_aio_write(iocb, from); else ret = xfs_file_buffered_aio_write(iocb, from); diff --git a/include/linux/fs.h b/include/linux/fs.h index b4aa400ac7232..b1d7db28c13c9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -315,6 +315,8 @@ struct address_space; struct writeback_control; #define IOCB_EVENTFD (1 << 0) +#define IOCB_APPEND (1 << 1) +#define IOCB_DIRECT (1 << 2) struct kiocb { struct file *ki_filp; @@ -329,10 +331,13 @@ static inline bool is_sync_kiocb(struct kiocb *kiocb) return kiocb->ki_complete == NULL; } +static inline int iocb_flags(struct file *file); + static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) { *kiocb = (struct kiocb) { .ki_filp = filp, + .ki_flags = iocb_flags(filp), }; } @@ -2779,6 +2784,16 @@ static inline bool io_is_direct(struct file *filp) return (filp->f_flags & O_DIRECT) || IS_DAX(file_inode(filp)); } +static inline int iocb_flags(struct file *file) +{ + int res = 0; + if (file->f_flags & O_APPEND) + res |= IOCB_APPEND; + if (io_is_direct(file)) + res |= IOCB_DIRECT; + return res; +} + static inline ino_t parent_ino(struct dentry *dentry) { ino_t res; diff --git a/mm/filemap.c b/mm/filemap.c index 243997a26e7ce..405de370e6571 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1694,7 +1694,7 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) loff_t *ppos = &iocb->ki_pos; loff_t pos = *ppos; - if (io_is_direct(file)) { + if (iocb->ki_flags & IOCB_DIRECT) { struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; size_t count = iov_iter_count(iter); @@ -2271,7 +2271,7 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from) return 0; /* FIXME: this is for backwards compatibility with 2.4 */ - if (file->f_flags & O_APPEND) + if (iocb->ki_flags & IOCB_APPEND) iocb->ki_pos = i_size_read(inode); pos = iocb->ki_pos; @@ -2545,7 +2545,7 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (err) goto out; - if (io_is_direct(file)) { + if (iocb->ki_flags & IOCB_DIRECT) { loff_t pos, endbyte; written = generic_file_direct_write(iocb, from, iocb->ki_pos); From 7da839c475894ea872ec909a5d2e83dddccff5be Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 Apr 2015 14:01:33 -0400 Subject: [PATCH 21/30] ocfs2: use __generic_file_write_iter() we can do that now - all we need is to clear IOCB_DIRECT from ->ki_flags in "can't do dio" case. Signed-off-by: Al Viro --- fs/ocfs2/file.c | 64 ++++++------------------------------------------- 1 file changed, 7 insertions(+), 57 deletions(-) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index cd37f6cd4d516..913fc250d85a1 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2259,11 +2259,11 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, u32 old_clusters; struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); - struct address_space *mapping = file->f_mapping; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); int full_coherency = !(osb->s_mount_opt & OCFS2_MOUNT_COHERENCY_BUFFERED); int unaligned_dio = 0; + int dropped_dio = 0; trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry, (unsigned long long)OCFS2_I(inode)->ip_blkno, @@ -2351,7 +2351,9 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, rw_level = -1; direct_io = 0; + iocb->ki_flags &= ~IOCB_DIRECT; iov_iter_reexpand(from, orig_count); + dropped_dio = 1; goto relock; } @@ -2375,67 +2377,15 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, /* communicate with ocfs2_dio_end_io */ ocfs2_iocb_set_rw_locked(iocb, rw_level); - if (direct_io) { - loff_t endbyte; - ssize_t written_buffered; - written = generic_file_direct_write(iocb, from, iocb->ki_pos); - if (written < 0 || !iov_iter_count(from)) { - ret = written; - goto out_dio; - } - - /* - * for completing the rest of the request. - */ - written_buffered = generic_perform_write(file, from, iocb->ki_pos); - /* - * If generic_file_buffered_write() returned a synchronous error - * then we want to return the number of bytes which were - * direct-written, or the error code if that was zero. Note - * that this differs from normal direct-io semantics, which - * will return -EFOO even if some bytes were written. - */ - if (written_buffered < 0) { - ret = written_buffered; - goto out_dio; - } - - /* We need to ensure that the page cache pages are written to - * disk and invalidated to preserve the expected O_DIRECT - * semantics. - */ - endbyte = iocb->ki_pos + written_buffered - 1; - ret = filemap_write_and_wait_range(file->f_mapping, iocb->ki_pos, - endbyte); - if (ret == 0) { - iocb->ki_pos += written_buffered; - written += written_buffered; - invalidate_mapping_pages(mapping, - iocb->ki_pos >> PAGE_CACHE_SHIFT, - endbyte >> PAGE_CACHE_SHIFT); - } else { - /* - * We don't know how much we wrote, so just return - * the number of bytes which were direct-written - */ - } - } else { - current->backing_dev_info = inode_to_bdi(inode); - written = generic_perform_write(file, from, iocb->ki_pos); - if (likely(written >= 0)) - iocb->ki_pos = iocb->ki_pos + written; - current->backing_dev_info = NULL; - } - -out_dio: + written = __generic_file_write_iter(iocb, from); /* buffered aio wouldn't have proper lock coverage today */ - BUG_ON(ret == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT)); + BUG_ON(written == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT)); if (unlikely(written <= 0)) goto no_sync; - if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || - ((file->f_flags & O_DIRECT) && !direct_io)) { + if (((file->f_flags & O_DSYNC) && !direct_io) || + IS_SYNC(inode) || dropped_dio) { ret = filemap_fdatawrite_range(file->f_mapping, iocb->ki_pos - written, iocb->ki_pos - 1); From 65a4a1cad7c56e7056fb4b35ac2d93695612612c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 Apr 2015 14:11:08 -0400 Subject: [PATCH 22/30] nfs: generic_write_checks() shouldn't be done on swapout... Signed-off-by: Al Viro --- fs/nfs/direct.c | 12 +++--------- fs/nfs/file.c | 11 +++++++---- include/linux/nfs_fs.h | 3 +-- 3 files changed, 11 insertions(+), 15 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 9634189b85454..682f65fe09b51 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -268,7 +268,7 @@ ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos) if (iov_iter_rw(iter) == READ) return nfs_file_direct_read(iocb, iter, pos); - return nfs_file_direct_write(iocb, iter, pos); + return nfs_file_direct_write(iocb, iter); #endif /* CONFIG_NFS_SWAP */ } @@ -959,8 +959,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, * Note that O_APPEND is not supported for NFS direct writes, as there * is no atomic O_APPEND write facility in the NFS protocol. */ -ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, - loff_t pos) +ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) { ssize_t result = -EINVAL; struct file *file = iocb->ki_filp; @@ -968,15 +967,11 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, struct inode *inode = mapping->host; struct nfs_direct_req *dreq; struct nfs_lock_context *l_ctx; - loff_t end; + loff_t pos, end; dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", file, iov_iter_count(iter), (long long) iocb->ki_pos); - result = generic_write_checks(iocb, iter); - if (result <= 0) - goto out; - nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, iov_iter_count(iter)); @@ -1044,7 +1039,6 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, nfs_direct_req_release(dreq); out_unlock: mutex_unlock(&inode->i_mutex); -out: return result; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 14364dc001f7c..c40e4363e746e 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -674,17 +674,20 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) unsigned long written = 0; ssize_t result; size_t count = iov_iter_count(from); - loff_t pos = iocb->ki_pos; result = nfs_key_timeout_notify(file, inode); if (result) return result; - if (iocb->ki_flags & IOCB_DIRECT) - return nfs_file_direct_write(iocb, from, pos); + if (iocb->ki_flags & IOCB_DIRECT) { + result = generic_write_checks(iocb, from); + if (result <= 0) + return result; + return nfs_file_direct_write(iocb, from); + } dprintk("NFS: write(%pD2, %zu@%Ld)\n", - file, count, (long long) pos); + file, count, (long long) iocb->ki_pos); result = -EBUSY; if (IS_SWAPFILE(inode)) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 3d1b0d2fe55e8..410abd172febe 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -452,8 +452,7 @@ extern ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, loff_t pos); extern ssize_t nfs_file_direct_write(struct kiocb *iocb, - struct iov_iter *iter, - loff_t pos); + struct iov_iter *iter); /* * linux/fs/nfs/dir.c From 525d27b23555419e0e7b73fb6e78d4d678cb4f32 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 11 Feb 2015 13:40:17 +0000 Subject: [PATCH 23/30] VFS: Add owner-filesystem positive/negative dentry checks Supply two functions to test whether a filesystem's own dentries are positive or negative (d_really_is_positive() and d_really_is_negative()). The problem is that the DCACHE_ENTRY_TYPE field of dentry->d_flags may be overridden by the union part of a layered filesystem and isn't thus necessarily indicative of the type of dentry. Normally, this would involve a negative dentry (ie. ->d_inode == NULL) having ->d_layer.lower pointed to a lower layer dentry, DCACHE_PINNING_LOWER set and the DCACHE_ENTRY_TYPE field set to something other than DCACHE_MISS_TYPE - but it could also involve, say, a DCACHE_SPECIAL_TYPE being overridden to DCACHE_WHITEOUT_TYPE if a 0,0 chardev is detected in the top layer. However, inside a filesystem, when that fs is looking at its own dentries, it probably wants to know if they are really negative or not - and doesn't care about the fallthrough bits used by the union. To this end, a filesystem should normally use d_really_is_positive/negative() when looking at its own dentries rather than d_is_positive/negative() and should use d_inode() to get at the inode. Anyone looking at someone else's dentries (this includes pathwalk) should use d_is_xxx() and d_backing_inode(). Signed-off-by: David Howells Signed-off-by: Al Viro --- include/linux/dcache.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/include/linux/dcache.h b/include/linux/dcache.h index d8358799c5941..e83768ee38fcd 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -482,6 +482,44 @@ static inline bool d_is_positive(const struct dentry *dentry) return !d_is_negative(dentry); } +/** + * d_really_is_negative - Determine if a dentry is really negative (ignoring fallthroughs) + * @dentry: The dentry in question + * + * Returns true if the dentry represents either an absent name or a name that + * doesn't map to an inode (ie. ->d_inode is NULL). The dentry could represent + * a true miss, a whiteout that isn't represented by a 0,0 chardev or a + * fallthrough marker in an opaque directory. + * + * Note! (1) This should be used *only* by a filesystem to examine its own + * dentries. It should not be used to look at some other filesystem's + * dentries. (2) It should also be used in combination with d_inode() to get + * the inode. (3) The dentry may have something attached to ->d_lower and the + * type field of the flags may be set to something other than miss or whiteout. + */ +static inline bool d_really_is_negative(const struct dentry *dentry) +{ + return dentry->d_inode == NULL; +} + +/** + * d_really_is_positive - Determine if a dentry is really positive (ignoring fallthroughs) + * @dentry: The dentry in question + * + * Returns true if the dentry represents a name that maps to an inode + * (ie. ->d_inode is not NULL). The dentry might still represent a whiteout if + * that is represented on medium as a 0,0 chardev. + * + * Note! (1) This should be used *only* by a filesystem to examine its own + * dentries. It should not be used to look at some other filesystem's + * dentries. (2) It should also be used in combination with d_inode() to get + * the inode. + */ +static inline bool d_really_is_positive(const struct dentry *dentry) +{ + return dentry->d_inode != NULL; +} + extern void d_set_fallthru(struct dentry *dentry); static inline bool d_is_fallthru(const struct dentry *dentry) From 4bf46a272647d89e780126b52eda04737defd9f4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 5 Mar 2015 14:09:22 +0000 Subject: [PATCH 24/30] VFS: Impose ordering on accesses of d_inode and d_flags Impose ordering on accesses of d_inode and d_flags to avoid the need to do this: if (!dentry->d_inode || d_is_negative(dentry)) { when this: if (d_is_negative(dentry)) { should suffice. This check is especially problematic if a dentry can have its type field set to something other than DENTRY_MISS_TYPE when d_inode is NULL (as in unionmount). What we really need to do is stick a write barrier between setting d_inode and setting d_flags and a read barrier between reading d_flags and reading d_inode. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/dcache.c | 47 +++++++++++++++++++++++++++++++++++------- include/linux/dcache.h | 21 +++---------------- 2 files changed, 42 insertions(+), 26 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index d99736a63e3cf..656ce522a218f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -269,6 +269,41 @@ static inline int dname_external(const struct dentry *dentry) return dentry->d_name.name != dentry->d_iname; } +/* + * Make sure other CPUs see the inode attached before the type is set. + */ +static inline void __d_set_inode_and_type(struct dentry *dentry, + struct inode *inode, + unsigned type_flags) +{ + unsigned flags; + + dentry->d_inode = inode; + smp_wmb(); + flags = READ_ONCE(dentry->d_flags); + flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); + flags |= type_flags; + WRITE_ONCE(dentry->d_flags, flags); +} + +/* + * Ideally, we want to make sure that other CPUs see the flags cleared before + * the inode is detached, but this is really a violation of RCU principles + * since the ordering suggests we should always set inode before flags. + * + * We should instead replace or discard the entire dentry - but that sucks + * performancewise on mass deletion/rename. + */ +static inline void __d_clear_type_and_inode(struct dentry *dentry) +{ + unsigned flags = READ_ONCE(dentry->d_flags); + + flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); + WRITE_ONCE(dentry->d_flags, flags); + smp_wmb(); + dentry->d_inode = NULL; +} + static void dentry_free(struct dentry *dentry) { WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias)); @@ -311,7 +346,7 @@ static void dentry_iput(struct dentry * dentry) { struct inode *inode = dentry->d_inode; if (inode) { - dentry->d_inode = NULL; + __d_clear_type_and_inode(dentry); hlist_del_init(&dentry->d_u.d_alias); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); @@ -335,8 +370,7 @@ static void dentry_unlink_inode(struct dentry * dentry) __releases(dentry->d_inode->i_lock) { struct inode *inode = dentry->d_inode; - __d_clear_type(dentry); - dentry->d_inode = NULL; + __d_clear_type_and_inode(dentry); hlist_del_init(&dentry->d_u.d_alias); dentry_rcuwalk_barrier(dentry); spin_unlock(&dentry->d_lock); @@ -1715,11 +1749,9 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) unsigned add_flags = d_flags_for_inode(inode); spin_lock(&dentry->d_lock); - dentry->d_flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); - dentry->d_flags |= add_flags; if (inode) hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); - dentry->d_inode = inode; + __d_set_inode_and_type(dentry, inode, add_flags); dentry_rcuwalk_barrier(dentry); spin_unlock(&dentry->d_lock); fsnotify_d_instantiate(dentry, inode); @@ -1937,8 +1969,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected) add_flags |= DCACHE_DISCONNECTED; spin_lock(&tmp->d_lock); - tmp->d_inode = inode; - tmp->d_flags |= add_flags; + __d_set_inode_and_type(tmp, inode, add_flags); hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry); hlist_bl_lock(&tmp->d_sb->s_anon); hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index e83768ee38fcd..df334cbacc6d0 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -404,26 +404,11 @@ static inline bool d_mountpoint(const struct dentry *dentry) /* * Directory cache entry type accessor functions. */ -static inline void __d_set_type(struct dentry *dentry, unsigned type) -{ - dentry->d_flags = (dentry->d_flags & ~DCACHE_ENTRY_TYPE) | type; -} - -static inline void __d_clear_type(struct dentry *dentry) -{ - __d_set_type(dentry, DCACHE_MISS_TYPE); -} - -static inline void d_set_type(struct dentry *dentry, unsigned type) -{ - spin_lock(&dentry->d_lock); - __d_set_type(dentry, type); - spin_unlock(&dentry->d_lock); -} - static inline unsigned __d_entry_type(const struct dentry *dentry) { - return dentry->d_flags & DCACHE_ENTRY_TYPE; + unsigned type = READ_ONCE(dentry->d_flags); + smp_rmb(); + return type & DCACHE_ENTRY_TYPE; } static inline bool d_is_miss(const struct dentry *dentry) From 88e7fbd4a599375a08876e80a76d92e49fdea55c Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Mar 2015 16:38:26 +0000 Subject: [PATCH 25/30] NFS: Don't use d_inode as a variable name Don't use d_inode as a variable name as it now masks a function name. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/nfs/read.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 568ecf0a880f1..b8f5c63f77b27 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -117,15 +117,15 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, static void nfs_readpage_release(struct nfs_page *req) { - struct inode *d_inode = req->wb_context->dentry->d_inode; + struct inode *inode = req->wb_context->dentry->d_inode; - dprintk("NFS: read done (%s/%llu %d@%lld)\n", d_inode->i_sb->s_id, - (unsigned long long)NFS_FILEID(d_inode), req->wb_bytes, + dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id, + (unsigned long long)NFS_FILEID(inode), req->wb_bytes, (long long)req_offset(req)); if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) { if (PageUptodate(req->wb_page)) - nfs_readpage_to_fscache(d_inode, req->wb_page, 0); + nfs_readpage_to_fscache(inode, req->wb_page, 0); unlock_page(req->wb_page); } From 698934df8b45da2a06816ee2d7f9a9034e671e62 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 17 Mar 2015 17:33:52 +0000 Subject: [PATCH 26/30] VFS: Combine inode checks with d_is_negative() and d_is_positive() in pathwalk Where we have: if (!dentry->d_inode || d_is_negative(dentry)) { type constructions in pathwalk we should be able to eliminate the check of d_inode and rely solely on the result of d_is_negative() or d_is_positive(). What we do have to take care to do is to read d_inode after calling a d_is_xxx() typecheck function to get the barriering right. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/namei.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 76fb76a0818bc..5a9291c318815 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1585,7 +1585,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path, inode = path->dentry->d_inode; } err = -ENOENT; - if (!inode || d_is_negative(path->dentry)) + if (d_is_negative(path->dentry)) goto out_path_put; if (should_follow_link(path->dentry, follow)) { @@ -2310,7 +2310,7 @@ mountpoint_last(struct nameidata *nd, struct path *path) mutex_unlock(&dir->d_inode->i_mutex); done: - if (!dentry->d_inode || d_is_negative(dentry)) { + if (d_is_negative(dentry)) { error = -ENOENT; dput(dentry); goto out; @@ -3038,7 +3038,7 @@ static int do_last(struct nameidata *nd, struct path *path, finish_lookup: /* we _can_ be in RCU mode here */ error = -ENOENT; - if (!inode || d_is_negative(path->dentry)) { + if (d_is_negative(path->dentry)) { path_to_nameidata(path, nd); goto out; } From 7ceab50c0be56cf1bbaf2b3dd1c6cda80e5335fb Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 5 Mar 2015 12:46:49 +0000 Subject: [PATCH 27/30] VFS: Fix up debugfs to use d_is_dir() in place of S_ISDIR() Fix up debugfs to use d_is_dir(dentry) in place of S_ISDIR(dentry->d_inode->i_mode). Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/debugfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 96400ab42d135..26856ecdea5ea 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -521,7 +521,7 @@ static int __debugfs_remove(struct dentry *dentry, struct dentry *parent) if (debugfs_positive(dentry)) { dget(dentry); - if (S_ISDIR(dentry->d_inode->i_mode)) + if (d_is_dir(dentry)) ret = simple_rmdir(parent->d_inode, dentry); else simple_unlink(parent->d_inode, dentry); From 4bbcbd3b11dbc676a272be508e47d1c4a5056349 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 17 Mar 2015 22:16:40 +0000 Subject: [PATCH 28/30] VFS: Make pathwalk use d_is_reg() rather than S_ISREG() Make pathwalk use d_is_reg() rather than S_ISREG() to determine whether to honour O_TRUNC. Since this occurs after complete_walk(), the dentry type field cannot change and the inode pointer cannot change as we hold a ref on the dentry, so this should be safe. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/namei.c b/fs/namei.c index 5a9291c318815..ffab2e06e1472 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3077,7 +3077,7 @@ static int do_last(struct nameidata *nd, struct path *path, error = -ENOTDIR; if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry)) goto out; - if (!S_ISREG(nd->inode->i_mode)) + if (!d_is_reg(nd->path.dentry)) will_truncate = false; if (will_truncate) { From 6683de3886a313ae3d4b8c0323313a987073481b Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 2 Mar 2015 16:40:32 +0000 Subject: [PATCH 29/30] configfs: Fix inconsistent use of file_inode() vs file->f_path.dentry->d_inode Fix inconsistent use of file_inode() vs file->f_path.dentry->d_inode. Reported-by: Dan Carpenter Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/configfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index cf0db005d2f58..acb3d63bc9dc7 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1598,7 +1598,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence) if (offset >= 0) break; default: - mutex_unlock(&file_inode(file)->i_mutex); + mutex_unlock(&dentry->d_inode->i_mutex); return -EINVAL; } if (offset != file->f_pos) { From aa4d86163e4e91a1ac560954a554bab417e338f4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 7 Apr 2015 18:23:29 +0200 Subject: [PATCH 30/30] block: loop: switch to VFS ITER_BVEC Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- drivers/block/loop.c | 294 ++++++++++++++++++------------------------- 1 file changed, 120 insertions(+), 174 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index c4fd1e45ce1e8..ae3fcb4199e9b 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -88,28 +88,6 @@ static int part_shift; static struct workqueue_struct *loop_wq; -/* - * Transfer functions - */ -static int transfer_none(struct loop_device *lo, int cmd, - struct page *raw_page, unsigned raw_off, - struct page *loop_page, unsigned loop_off, - int size, sector_t real_block) -{ - char *raw_buf = kmap_atomic(raw_page) + raw_off; - char *loop_buf = kmap_atomic(loop_page) + loop_off; - - if (cmd == READ) - memcpy(loop_buf, raw_buf, size); - else - memcpy(raw_buf, loop_buf, size); - - kunmap_atomic(loop_buf); - kunmap_atomic(raw_buf); - cond_resched(); - return 0; -} - static int transfer_xor(struct loop_device *lo, int cmd, struct page *raw_page, unsigned raw_off, struct page *loop_page, unsigned loop_off, @@ -148,14 +126,13 @@ static int xor_init(struct loop_device *lo, const struct loop_info64 *info) static struct loop_func_table none_funcs = { .number = LO_CRYPT_NONE, - .transfer = transfer_none, -}; +}; static struct loop_func_table xor_funcs = { .number = LO_CRYPT_XOR, .transfer = transfer_xor, .init = xor_init -}; +}; /* xfer_funcs[0] is special - its release function is never called */ static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = { @@ -215,207 +192,169 @@ lo_do_transfer(struct loop_device *lo, int cmd, struct page *lpage, unsigned loffs, int size, sector_t rblock) { - if (unlikely(!lo->transfer)) + int ret; + + ret = lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock); + if (likely(!ret)) return 0; - return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock); + printk_ratelimited(KERN_ERR + "loop: Transfer error at byte offset %llu, length %i.\n", + (unsigned long long)rblock << 9, size); + return ret; } -/** - * __do_lo_send_write - helper for writing data to a loop device - * - * This helper just factors out common code between do_lo_send_direct_write() - * and do_lo_send_write(). - */ -static int __do_lo_send_write(struct file *file, - u8 *buf, const int len, loff_t pos) +static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos) { - struct kvec kvec = {.iov_base = buf, .iov_len = len}; - struct iov_iter from; + struct iov_iter i; ssize_t bw; - iov_iter_kvec(&from, ITER_KVEC | WRITE, &kvec, 1, len); + iov_iter_bvec(&i, ITER_BVEC, bvec, 1, bvec->bv_len); file_start_write(file); - bw = vfs_iter_write(file, &from, &pos); + bw = vfs_iter_write(file, &i, ppos); file_end_write(file); - if (likely(bw == len)) + + if (likely(bw == bvec->bv_len)) return 0; - printk_ratelimited(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n", - (unsigned long long)pos, len); + + printk_ratelimited(KERN_ERR + "loop: Write error at byte offset %llu, length %i.\n", + (unsigned long long)*ppos, bvec->bv_len); if (bw >= 0) bw = -EIO; return bw; } -/** - * do_lo_send_direct_write - helper for writing data to a loop device - * - * This is the fast, non-transforming version that does not need double - * buffering. - */ -static int do_lo_send_direct_write(struct loop_device *lo, - struct bio_vec *bvec, loff_t pos, struct page *page) +static int lo_write_simple(struct loop_device *lo, struct request *rq, + loff_t pos) { - ssize_t bw = __do_lo_send_write(lo->lo_backing_file, - kmap(bvec->bv_page) + bvec->bv_offset, - bvec->bv_len, pos); - kunmap(bvec->bv_page); - cond_resched(); - return bw; + struct bio_vec bvec; + struct req_iterator iter; + int ret = 0; + + rq_for_each_segment(bvec, rq, iter) { + ret = lo_write_bvec(lo->lo_backing_file, &bvec, &pos); + if (ret < 0) + break; + cond_resched(); + } + + return ret; } -/** - * do_lo_send_write - helper for writing data to a loop device - * +/* * This is the slow, transforming version that needs to double buffer the * data as it cannot do the transformations in place without having direct * access to the destination pages of the backing file. */ -static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec, - loff_t pos, struct page *page) +static int lo_write_transfer(struct loop_device *lo, struct request *rq, + loff_t pos) { - int ret = lo_do_transfer(lo, WRITE, page, 0, bvec->bv_page, - bvec->bv_offset, bvec->bv_len, pos >> 9); - if (likely(!ret)) - return __do_lo_send_write(lo->lo_backing_file, - page_address(page), bvec->bv_len, - pos); - printk_ratelimited(KERN_ERR "loop: Transfer error at byte offset %llu, " - "length %i.\n", (unsigned long long)pos, bvec->bv_len); - if (ret > 0) - ret = -EIO; - return ret; -} - -static int lo_send(struct loop_device *lo, struct request *rq, loff_t pos) -{ - int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t, - struct page *page); - struct bio_vec bvec; + struct bio_vec bvec, b; struct req_iterator iter; - struct page *page = NULL; + struct page *page; int ret = 0; - if (lo->transfer != transfer_none) { - page = alloc_page(GFP_NOIO | __GFP_HIGHMEM); - if (unlikely(!page)) - goto fail; - kmap(page); - do_lo_send = do_lo_send_write; - } else { - do_lo_send = do_lo_send_direct_write; - } + page = alloc_page(GFP_NOIO); + if (unlikely(!page)) + return -ENOMEM; rq_for_each_segment(bvec, rq, iter) { - ret = do_lo_send(lo, &bvec, pos, page); + ret = lo_do_transfer(lo, WRITE, page, 0, bvec.bv_page, + bvec.bv_offset, bvec.bv_len, pos >> 9); + if (unlikely(ret)) + break; + + b.bv_page = page; + b.bv_offset = 0; + b.bv_len = bvec.bv_len; + ret = lo_write_bvec(lo->lo_backing_file, &b, &pos); if (ret < 0) break; - pos += bvec.bv_len; } - if (page) { - kunmap(page); - __free_page(page); - } -out: + + __free_page(page); return ret; -fail: - printk_ratelimited(KERN_ERR "loop: Failed to allocate temporary page for write.\n"); - ret = -ENOMEM; - goto out; } -struct lo_read_data { - struct loop_device *lo; - struct page *page; - unsigned offset; - int bsize; -}; +static int lo_read_simple(struct loop_device *lo, struct request *rq, + loff_t pos) +{ + struct bio_vec bvec; + struct req_iterator iter; + struct iov_iter i; + ssize_t len; -static int -lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, - struct splice_desc *sd) -{ - struct lo_read_data *p = sd->u.data; - struct loop_device *lo = p->lo; - struct page *page = buf->page; - sector_t IV; - int size; - - IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) + - (buf->offset >> 9); - size = sd->len; - if (size > p->bsize) - size = p->bsize; - - if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) { - printk_ratelimited(KERN_ERR "loop: transfer error block %ld\n", - page->index); - size = -EINVAL; - } + rq_for_each_segment(bvec, rq, iter) { + iov_iter_bvec(&i, ITER_BVEC, &bvec, 1, bvec.bv_len); + len = vfs_iter_read(lo->lo_backing_file, &i, &pos); + if (len < 0) + return len; - flush_dcache_page(p->page); + flush_dcache_page(bvec.bv_page); - if (size > 0) - p->offset += size; + if (len != bvec.bv_len) { + struct bio *bio; - return size; -} + __rq_for_each_bio(bio, rq) + zero_fill_bio(bio); + break; + } + cond_resched(); + } -static int -lo_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd) -{ - return __splice_from_pipe(pipe, sd, lo_splice_actor); + return 0; } -static ssize_t -do_lo_receive(struct loop_device *lo, - struct bio_vec *bvec, int bsize, loff_t pos) +static int lo_read_transfer(struct loop_device *lo, struct request *rq, + loff_t pos) { - struct lo_read_data cookie; - struct splice_desc sd; - struct file *file; - ssize_t retval; + struct bio_vec bvec, b; + struct req_iterator iter; + struct iov_iter i; + struct page *page; + ssize_t len; + int ret = 0; - cookie.lo = lo; - cookie.page = bvec->bv_page; - cookie.offset = bvec->bv_offset; - cookie.bsize = bsize; + page = alloc_page(GFP_NOIO); + if (unlikely(!page)) + return -ENOMEM; - sd.len = 0; - sd.total_len = bvec->bv_len; - sd.flags = 0; - sd.pos = pos; - sd.u.data = &cookie; + rq_for_each_segment(bvec, rq, iter) { + loff_t offset = pos; - file = lo->lo_backing_file; - retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor); + b.bv_page = page; + b.bv_offset = 0; + b.bv_len = bvec.bv_len; - return retval; -} + iov_iter_bvec(&i, ITER_BVEC, &b, 1, b.bv_len); + len = vfs_iter_read(lo->lo_backing_file, &i, &pos); + if (len < 0) { + ret = len; + goto out_free_page; + } -static int -lo_receive(struct loop_device *lo, struct request *rq, int bsize, loff_t pos) -{ - struct bio_vec bvec; - struct req_iterator iter; - ssize_t s; + ret = lo_do_transfer(lo, READ, page, 0, bvec.bv_page, + bvec.bv_offset, len, offset >> 9); + if (ret) + goto out_free_page; - rq_for_each_segment(bvec, rq, iter) { - s = do_lo_receive(lo, &bvec, bsize, pos); - if (s < 0) - return s; + flush_dcache_page(bvec.bv_page); - if (s != bvec.bv_len) { + if (len != bvec.bv_len) { struct bio *bio; __rq_for_each_bio(bio, rq) zero_fill_bio(bio); break; } - pos += bvec.bv_len; } - return 0; + + ret = 0; +out_free_page: + __free_page(page); + return ret; } static int lo_discard(struct loop_device *lo, struct request *rq, loff_t pos) @@ -464,10 +403,17 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq) ret = lo_req_flush(lo, rq); else if (rq->cmd_flags & REQ_DISCARD) ret = lo_discard(lo, rq, pos); + else if (lo->transfer) + ret = lo_write_transfer(lo, rq, pos); else - ret = lo_send(lo, rq, pos); - } else - ret = lo_receive(lo, rq, lo->lo_blocksize, pos); + ret = lo_write_simple(lo, rq, pos); + + } else { + if (lo->transfer) + ret = lo_read_transfer(lo, rq, pos); + else + ret = lo_read_simple(lo, rq, pos); + } return ret; } @@ -788,7 +734,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, lo->lo_device = bdev; lo->lo_flags = lo_flags; lo->lo_backing_file = file; - lo->transfer = transfer_none; + lo->transfer = NULL; lo->ioctl = NULL; lo->lo_sizelimit = 0; lo->old_gfp_mask = mapping_gfp_mask(mapping); @@ -1007,7 +953,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) memcpy(lo->lo_encrypt_key, info->lo_encrypt_key, info->lo_encrypt_key_size); lo->lo_key_owner = uid; - } + } return 0; }