From 27a4f7e61e1eb4f18737926f4a66db7c48349fea Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Jan 2011 00:48:17 +0900 Subject: [PATCH 01/12] vfs: cleanup do_vfs_ioctl() Move declaration of 'inode' to beginning of the function. Since it is referenced directly or indirectly (in case of FIFREEZE/FITHAW/ FS_IOC_FIEMAP) it's not harmful IMHO. And remove unnecessary casts using 'argp' instead. Signed-off-by: Namhyung Kim Signed-off-by: Al Viro --- fs/ioctl.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/fs/ioctl.c b/fs/ioctl.c index 1eebeb72b202..1d9b9fcb2db4 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -548,6 +548,7 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, { int error = 0; int __user *argp = (int __user *)arg; + struct inode *inode = filp->f_path.dentry->d_inode; switch (cmd) { case FIOCLEX: @@ -567,13 +568,11 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, break; case FIOQSIZE: - if (S_ISDIR(filp->f_path.dentry->d_inode->i_mode) || - S_ISREG(filp->f_path.dentry->d_inode->i_mode) || - S_ISLNK(filp->f_path.dentry->d_inode->i_mode)) { - loff_t res = - inode_get_bytes(filp->f_path.dentry->d_inode); - error = copy_to_user((loff_t __user *)arg, &res, - sizeof(res)) ? -EFAULT : 0; + if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) || + S_ISLNK(inode->i_mode)) { + loff_t res = inode_get_bytes(inode); + error = copy_to_user(argp, &res, sizeof(res)) ? + -EFAULT : 0; } else error = -ENOTTY; break; @@ -590,14 +589,10 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, return ioctl_fiemap(filp, arg); case FIGETBSZ: - { - struct inode *inode = filp->f_path.dentry->d_inode; - int __user *p = (int __user *)arg; - return put_user(inode->i_sb->s_blocksize, p); - } + return put_user(inode->i_sb->s_blocksize, argp); default: - if (S_ISREG(filp->f_path.dentry->d_inode->i_mode)) + if (S_ISREG(inode->i_mode)) error = file_ioctl(filp, cmd, arg); else error = vfs_ioctl(filp, cmd, arg); From 2c3d44dc4a1262168ef31bef22b3aa554c0572d8 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 21 Jan 2011 13:59:59 +0900 Subject: [PATCH 02/12] select: remove unused MAX_SELECT_SECONDS Remove the leftover from the commit 8ff3e8e85fa6 ("select: switch select() and poll() over to hrtimers"). Signed-off-by: Namhyung Kim Acked-by: Arjan van de Ven Signed-off-by: Al Viro --- fs/compat.c | 3 --- fs/select.c | 3 --- 2 files changed, 6 deletions(-) diff --git a/fs/compat.c b/fs/compat.c index c6d31a3bab88..72fe6cda9108 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1671,9 +1671,6 @@ int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, * Update: ERESTARTSYS breaks at least the xview clock binary, so * I'm trying ERESTARTNOHAND which restart only when you want to. */ -#define MAX_SELECT_SECONDS \ - ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) - int compat_core_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, struct timespec *end_time) diff --git a/fs/select.c b/fs/select.c index e56560d2b08a..d33418fdc858 100644 --- a/fs/select.c +++ b/fs/select.c @@ -517,9 +517,6 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) * Update: ERESTARTSYS breaks at least the xview clock binary, so * I'm trying ERESTARTNOHAND which restart only when you want to. */ -#define MAX_SELECT_SECONDS \ - ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) - int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timespec *end_time) { From eaae668d01e15435cf977cced3975ccc436257fc Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 15 Feb 2011 12:48:09 +0000 Subject: [PATCH 03/12] fs/inode: Fix kernel-doc format for inode_init_owner Signed-off-by: Ben Hutchings Signed-off-by: Al Viro --- fs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/inode.c b/fs/inode.c index 9910c039f026..16fefd373fc2 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1715,7 +1715,7 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) EXPORT_SYMBOL(init_special_inode); /** - * Init uid,gid,mode for new inode according to posix standards + * inode_init_owner - Init uid,gid,mode for new inode according to posix standards * @inode: New inode * @dir: Directory inode * @mode: mode of the new inode From ff38c083ad3bb8dbbed80aa9090fcc96bc4af7db Mon Sep 17 00:00:00 2001 From: David Jenni Date: Wed, 23 Feb 2011 16:51:05 +0100 Subject: [PATCH 04/12] Filesystem: fifo: Fixed coding style issue. Fixed coding style issue. Signed-off-by: David Jenni Signed-off-by: Al Viro --- fs/fifo.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/fifo.c b/fs/fifo.c index 4e303c22d5ee..b1a524d798e7 100644 --- a/fs/fifo.c +++ b/fs/fifo.c @@ -66,8 +66,7 @@ static int fifo_open(struct inode *inode, struct file *filp) /* suppress POLLHUP until we have * seen a writer */ filp->f_version = pipe->w_counter; - } else - { + } else { wait_for_partner(inode, &pipe->w_counter); if(signal_pending(current)) goto err_rd; From 1bef82917c74249ff21982127e57defd6ca2bb1b Mon Sep 17 00:00:00 2001 From: Holger Hans Peter Freyther Date: Thu, 24 Feb 2011 17:46:49 +0100 Subject: [PATCH 05/12] Small typo fix... Hi, I was backporting the coredump over pipe feature and noticed this small typo, I wish I would have something bigger to contribute... >From 15d6080e0ed4267da103c706917a33b1015e8804 Mon Sep 17 00:00:00 2001 From: Holger Hans Peter Freyther Date: Thu, 24 Feb 2011 17:42:50 +0100 Subject: [PATCH] fs: Fix a small typo in the comment The function is called umh_pipe_setup not uhm_pipe_setup. Signed-off-by: Holger Hans Peter Freyther Signed-off-by: Al Viro --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/exec.c b/fs/exec.c index ba99e1abb1aa..5e62d26a4fec 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1875,7 +1875,7 @@ static void wait_for_dump_helpers(struct file *file) /* - * uhm_pipe_setup + * umh_pipe_setup * helper function to customize the process used * to collect the core in userspace. Specifically * it sets up a pipe and installs it as fd 0 (stdin) From b7ed78f56575074f29ec99d8984f347f6c99c914 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 10 Mar 2011 11:31:30 -0800 Subject: [PATCH 06/12] introduce sys_syncfs to sync a single file system It is frequently useful to sync a single file system, instead of all mounted file systems via sync(2): - On machines with many mounts, it is not at all uncommon for some of them to hang (e.g. unresponsive NFS server). sync(2) will get stuck on those and may never get to the one you do care about (e.g., /). - Some applications write lots of data to the file system and then want to make sure it is flushed to disk. Calling fsync(2) on each file introduces unnecessary ordering constraints that result in a large amount of sub-optimal writeback/flush/commit behavior by the file system. There are currently two ways (that I know of) to sync a single super_block: - BLKFLSBUF ioctl on the block device: That also invalidates the bdev mapping, which isn't usually desirable, and doesn't work for non-block file systems. - 'mount -o remount,rw' will call sync_filesystem as an artifact of the current implemention. Relying on this little-known side effect for something like data safety sounds foolish. Both of these approaches require root privileges, which some applications do not have (nor should they need?) given that sync(2) is an unprivileged operation. This patch introduces a new system call syncfs(2) that takes an fd and syncs only the file system it references. Maybe someday we can $ sync /some/path and not get sync: ignoring all arguments The syscall is motivated by comments by Al and Christoph at the last LSF. syncfs(2) seems like an appropriate name given statfs(2). A similar ioctl was also proposed a while back, see http://marc.info/?l=linux-fsdevel&m=127970513829285&w=2 Signed-off-by: Sage Weil Signed-off-by: Al Viro --- arch/x86/ia32/ia32entry.S | 1 + arch/x86/include/asm/unistd_32.h | 3 ++- arch/x86/include/asm/unistd_64.h | 2 ++ arch/x86/kernel/syscall_table_32.S | 1 + fs/sync.c | 24 ++++++++++++++++++++++++ include/asm-generic/unistd.h | 4 +++- include/linux/syscalls.h | 1 + 7 files changed, 34 insertions(+), 2 deletions(-) diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 430312ba6e3f..849a9d23c71d 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -847,4 +847,5 @@ ia32_sys_call_table: .quad sys_name_to_handle_at .quad compat_sys_open_by_handle_at .quad compat_sys_clock_adjtime + .quad sys_syncfs ia32_syscall_end: diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index ffaf183c619a..a755ef5e5977 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h @@ -349,10 +349,11 @@ #define __NR_name_to_handle_at 341 #define __NR_open_by_handle_at 342 #define __NR_clock_adjtime 343 +#define __NR_syncfs 344 #ifdef __KERNEL__ -#define NR_syscalls 344 +#define NR_syscalls 345 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 5466bea670e7..160fa76bd578 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -675,6 +675,8 @@ __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at) __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at) #define __NR_clock_adjtime 305 __SYSCALL(__NR_clock_adjtime, sys_clock_adjtime) +#define __NR_syncfs 306 +__SYSCALL(__NR_syncfs, sys_syncfs) #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index 5f181742e8f9..abce34d5c79d 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -343,3 +343,4 @@ ENTRY(sys_call_table) .long sys_name_to_handle_at .long sys_open_by_handle_at .long sys_clock_adjtime + .long sys_syncfs diff --git a/fs/sync.c b/fs/sync.c index ba76b9623e7e..92ca208777d5 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -128,6 +129,29 @@ void emergency_sync(void) } } +/* + * sync a single super + */ +SYSCALL_DEFINE1(syncfs, int, fd) +{ + struct file *file; + struct super_block *sb; + int ret; + int fput_needed; + + file = fget_light(fd, &fput_needed); + if (!file) + return -EBADF; + sb = file->f_dentry->d_sb; + + down_read(&sb->s_umount); + ret = sync_filesystem(sb); + up_read(&sb->s_umount); + + fput_light(file, fput_needed); + return ret; +} + /** * vfs_fsync_range - helper to sync a range of data & metadata to disk * @file: file to sync diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h index d94f447c667a..176b825add52 100644 --- a/include/asm-generic/unistd.h +++ b/include/asm-generic/unistd.h @@ -652,9 +652,11 @@ __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at) __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at) #define __NR_clock_adjtime 266 __SYSCALL(__NR_clock_adjtime, sys_clock_adjtime) +#define __NR_syncfs 264 +__SYSCALL(__NR_syncfs, sys_syncfs) #undef __NR_syscalls -#define __NR_syscalls 267 +#define __NR_syscalls 268 /* * All syscalls below here should go away really, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 1f5c18e6f4f1..83ecc1749ef6 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -825,6 +825,7 @@ asmlinkage long sys_fanotify_init(unsigned int flags, unsigned int event_f_flags asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags, u64 mask, int fd, const char __user *pathname); +asmlinkage long sys_syncfs(int fd); int kernel_execve(const char *filename, const char *const argv[], const char *const envp[]); From a872d5101008b091035d579897bccefdeff70def Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 18 Mar 2011 11:44:48 -0700 Subject: [PATCH 07/12] pstore: fix leaking ->i_private Move kfree() of i_private out of ->unlink() and into ->evict_inode() Signed-off-by: Tony Luck Signed-off-by: Al Viro --- fs/pstore/inode.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 08342232cb1c..f777f2902c49 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -73,11 +73,16 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry) struct pstore_private *p = dentry->d_inode->i_private; p->erase(p->id); - kfree(p); return simple_unlink(dir, dentry); } +static void pstore_evict_inode(struct inode *inode) +{ + end_writeback(inode); + kfree(inode->i_private); +} + static const struct inode_operations pstore_dir_inode_operations = { .lookup = simple_lookup, .unlink = pstore_unlink, @@ -110,6 +115,7 @@ static struct inode *pstore_get_inode(struct super_block *sb, static const struct super_operations pstore_ops = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, + .evict_inode = pstore_evict_inode, .show_options = generic_show_options, }; From 1c34092adf1feaba25b7c739cc4def2751f4fa05 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sun, 20 Mar 2011 14:22:07 +0300 Subject: [PATCH 08/12] nfs: lock() vs unlock() typo These should be spin_unlock() instead of spin_lock(). It's a typo. Signed-off-by: Dan Carpenter Signed-off-by: Al Viro --- fs/nfs/namespace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index c0b8344db0c6..bf1c68009ffd 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -98,7 +98,7 @@ char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen) namelen--; buflen -= namelen; if (buflen < 0) { - spin_lock(&dentry->d_lock); + spin_unlock(&dentry->d_lock); rcu_read_unlock(); goto Elong; } @@ -108,7 +108,7 @@ char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen) rcu_read_unlock(); return end; Elong_unlock: - spin_lock(&dentry->d_lock); + spin_unlock(&dentry->d_lock); rcu_read_unlock(); if (read_seqretry(&rename_lock, seq)) goto rename_retry; From aa597bc1f9476d0527e35d6dd9b481422e8205a0 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 8 Feb 2011 00:14:52 +0300 Subject: [PATCH 09/12] fs: devpts_pty_new() return -ENOMEM if dentry allocation failed In this case nobody can open a slave point, so will be better return from devpts_pty_new() Now we should not check error code from d_find_alias() in devpts_pty_kill(), because the dentry exists all times. Signed-off-by: Andrey Vagin Signed-off-by: Al Viro --- fs/devpts/inode.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 1bb547c9cad6..c6bd815dc794 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -479,6 +479,7 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty) struct dentry *root = sb->s_root; struct pts_fs_info *fsi = DEVPTS_SB(sb); struct pts_mount_opts *opts = &fsi->mount_opts; + int ret = 0; char s[12]; /* We're supposed to be given the slave end of a pty */ @@ -504,11 +505,14 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty) if (!IS_ERR(dentry)) { d_add(dentry, inode); fsnotify_create(root->d_inode, dentry); + } else { + iput(inode); + ret = -ENOMEM; } mutex_unlock(&root->d_inode->i_mutex); - return 0; + return ret; } struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number) @@ -544,17 +548,12 @@ void devpts_pty_kill(struct tty_struct *tty) mutex_lock(&root->d_inode->i_mutex); dentry = d_find_alias(inode); - if (IS_ERR(dentry)) - goto out; - - if (dentry) { - inode->i_nlink--; - d_delete(dentry); - dput(dentry); /* d_alloc_name() in devpts_pty_new() */ - } + inode->i_nlink--; + d_delete(dentry); + dput(dentry); /* d_alloc_name() in devpts_pty_new() */ dput(dentry); /* d_find_alias above */ -out: + mutex_unlock(&root->d_inode->i_mutex); } From c212f9aaf9101a037fb7f59e75e639437e11d758 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 19 Jan 2011 21:08:41 +0900 Subject: [PATCH 10/12] fs: Use BUG_ON(!mnt) at dentry_open(). dentry_open() requires callers to pass a valid vfsmount. Signed-off-by: Tetsuo Handa Signed-off-by: Al Viro --- fs/open.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/fs/open.c b/fs/open.c index f83ca80cc59a..b52cf013ffa1 100644 --- a/fs/open.c +++ b/fs/open.c @@ -835,17 +835,8 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags, validate_creds(cred); - /* - * We must always pass in a valid mount pointer. Historically - * callers got away with not passing it, but we must enforce this at - * the earliest possible point now to avoid strange problems deep in the - * filesystem stack. - */ - if (!mnt) { - printk(KERN_WARNING "%s called with NULL vfsmount\n", __func__); - dump_stack(); - return ERR_PTR(-EINVAL); - } + /* We must always pass in a valid mount pointer. */ + BUG_ON(!mnt); error = -ENFILE; f = get_empty_filp(); From 69b195be51620d72956acbf3029adad5765695dc Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 21 Mar 2011 08:32:53 -0400 Subject: [PATCH 11/12] bfs: fix bitmap size argument to find_first_zero_bit() The usage of find_first_zero_bit() in bfs_create() is wrong for two reasons. The bitmap size argument to find_first_zero_bit() is info->si_lasti but the correct bitmap size is info->si_lasti + 1 as info->si_lasti is the last valid index in info->si_imap bitmap. Another problem is that it is impossible to detect that info->si_imap bitmap is full because there is an off-by-one bug in the return value check for find_first_zero_bit(). If no zero bits exist in info->si_imap, find_first_zero_bit() returns info->si_lasti. But the check can't catch it due to the off-by-one. Signed-off-by: Akinobu Mita Acked-by: "Tigran A. Aivazian" Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/bfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 685ecff3ab31..b14cebfd9047 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -97,7 +97,7 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, int mode, if (!inode) return -ENOSPC; mutex_lock(&info->bfs_lock); - ino = find_first_zero_bit(info->si_imap, info->si_lasti); + ino = find_first_zero_bit(info->si_imap, info->si_lasti + 1); if (ino > info->si_lasti) { mutex_unlock(&info->bfs_lock); iput(inode); From 0f60f240d522772467c7d2cebedb910748c78ed4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 21 Mar 2011 14:28:58 +0000 Subject: [PATCH 12/12] FS: lookup_mnt() is only used in the core fs routines now lookup_mnt() is only used in the core fs routines now, so it doesn't need to be globally declared anymore. It isn't exported to modules at the moment, so nothing that can be modularised seems to be using it. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/internal.h | 1 + include/linux/dcache.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/internal.h b/fs/internal.h index 17191546d527..8318059b42c6 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -64,6 +64,7 @@ extern int copy_mount_string(const void __user *, char **); extern unsigned int mnt_get_count(struct vfsmount *mnt); extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int); +extern struct vfsmount *lookup_mnt(struct path *); extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, struct vfsmount *); extern void release_mounts(struct list_head *); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 1a87760d6532..f2afed4fa945 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -416,7 +416,6 @@ static inline bool d_mountpoint(struct dentry *dentry) return dentry->d_flags & DCACHE_MOUNTED; } -extern struct vfsmount *lookup_mnt(struct path *); extern struct dentry *lookup_create(struct nameidata *nd, int is_dir); extern int sysctl_vfs_cache_pressure;