From 74992557461a63fda53ca35ac6a49b2bbcac7fb3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 26 Apr 2012 18:31:00 -0400 Subject: [PATCH] --- yaml --- r: 310071 b: refs/heads/master c: 754421c8cab1a568be844a7069fe04c1cf6391b8 h: refs/heads/master i: 310069: 5a6df523c28219bcb92272bc0dd69ea24399da8c 310067: f645dffc905d3b88498d4d0b1755204f642ba0ca 310063: 25f741023997fdd87e123e2ee91f399ce77a8856 v: v3 --- [refs] | 2 +- trunk/Documentation/filesystems/Locking | 3 - trunk/Documentation/filesystems/vfs.txt | 4 - trunk/arch/alpha/include/asm/posix_types.h | 3 + trunk/arch/arm/include/asm/posix_types.h | 3 + trunk/arch/avr32/include/asm/posix_types.h | 3 + trunk/arch/blackfin/include/asm/posix_types.h | 3 + trunk/arch/cris/include/asm/posix_types.h | 3 + trunk/arch/frv/include/asm/posix_types.h | 3 + trunk/arch/h8300/include/asm/posix_types.h | 3 + trunk/arch/ia64/include/asm/posix_types.h | 3 + trunk/arch/ia64/kernel/perfmon.c | 10 +- trunk/arch/ia64/kernel/sys_ia64.c | 19 +- trunk/arch/m32r/include/asm/posix_types.h | 3 + trunk/arch/m68k/include/asm/posix_types.h | 3 + trunk/arch/mips/include/asm/posix_types.h | 5 + trunk/arch/mips/include/asm/stat.h | 6 +- trunk/arch/mn10300/include/asm/posix_types.h | 3 + trunk/arch/parisc/include/asm/posix_types.h | 3 + trunk/arch/parisc/include/asm/stat.h | 4 +- trunk/arch/powerpc/include/asm/posix_types.h | 3 + trunk/arch/powerpc/include/asm/stat.h | 4 +- trunk/arch/s390/include/asm/posix_types.h | 3 + trunk/arch/sh/include/asm/posix_types_32.h | 2 + trunk/arch/sh/include/asm/posix_types_64.h | 2 + trunk/arch/sparc/include/asm/posix_types.h | 5 + trunk/arch/sparc/kernel/sys_sparc_64.c | 11 +- trunk/arch/tile/include/asm/compat.h | 1 + trunk/arch/x86/include/asm/posix_types_32.h | 3 + trunk/drivers/base/soc.c | 2 +- trunk/drivers/gpu/drm/i810/i810_dma.c | 4 +- trunk/fs/9p/vfs_inode_dotl.c | 24 +- trunk/fs/affs/affs.h | 8 + trunk/fs/aio.c | 6 +- trunk/fs/attr.c | 5 - trunk/fs/binfmt_elf.c | 8 +- trunk/fs/binfmt_flat.c | 8 +- trunk/fs/btrfs/acl.c | 4 - trunk/fs/btrfs/backref.c | 495 +++------- trunk/fs/btrfs/backref.h | 3 +- trunk/fs/btrfs/btrfs_inode.h | 50 +- trunk/fs/btrfs/check-integrity.c | 584 +++--------- trunk/fs/btrfs/ctree.c | 861 +----------------- trunk/fs/btrfs/ctree.h | 78 +- trunk/fs/btrfs/delayed-inode.c | 8 +- trunk/fs/btrfs/delayed-ref.c | 10 +- trunk/fs/btrfs/delayed-ref.h | 24 + trunk/fs/btrfs/disk-io.c | 57 +- trunk/fs/btrfs/disk-io.h | 1 + trunk/fs/btrfs/export.c | 15 +- trunk/fs/btrfs/extent-tree.c | 23 +- trunk/fs/btrfs/extent_io.c | 168 ++-- trunk/fs/btrfs/extent_io.h | 8 +- trunk/fs/btrfs/file.c | 78 +- trunk/fs/btrfs/free-space-cache.c | 52 +- trunk/fs/btrfs/inode.c | 317 ++++--- trunk/fs/btrfs/ioctl.c | 50 +- trunk/fs/btrfs/ioctl.h | 33 - trunk/fs/btrfs/ordered-data.c | 165 ++-- trunk/fs/btrfs/ordered-data.h | 13 +- trunk/fs/btrfs/print-tree.c | 3 - trunk/fs/btrfs/reada.c | 5 - trunk/fs/btrfs/scrub.c | 65 +- trunk/fs/btrfs/super.c | 117 +-- trunk/fs/btrfs/transaction.c | 59 +- trunk/fs/btrfs/tree-log.c | 35 +- trunk/fs/btrfs/ulist.c | 38 +- trunk/fs/btrfs/ulist.h | 15 +- trunk/fs/btrfs/volumes.c | 306 +------ trunk/fs/btrfs/volumes.h | 52 -- trunk/fs/btrfs/xattr.c | 1 - trunk/fs/buffer.c | 2 +- trunk/fs/ceph/export.c | 32 +- trunk/fs/compat.c | 37 +- trunk/fs/dcache.c | 20 +- trunk/fs/ecryptfs/inode.c | 48 +- trunk/fs/eventpoll.c | 4 - trunk/fs/exec.c | 4 + trunk/fs/exportfs/expfs.c | 33 +- trunk/fs/ext4/Kconfig | 2 - trunk/fs/ext4/balloc.c | 41 +- trunk/fs/ext4/bitmap.c | 83 -- trunk/fs/ext4/dir.c | 12 - trunk/fs/ext4/ext4.h | 130 +-- trunk/fs/ext4/ext4_extents.h | 24 - trunk/fs/ext4/ext4_jbd2.c | 9 +- trunk/fs/ext4/ext4_jbd2.h | 7 +- trunk/fs/ext4/extents.c | 91 +- trunk/fs/ext4/file.c | 2 +- trunk/fs/ext4/ialloc.c | 81 +- trunk/fs/ext4/inode.c | 119 +-- trunk/fs/ext4/ioctl.c | 19 +- trunk/fs/ext4/mballoc.c | 30 +- trunk/fs/ext4/mmp.c | 44 +- trunk/fs/ext4/namei.c | 445 +-------- trunk/fs/ext4/resize.c | 71 +- trunk/fs/ext4/super.c | 253 +---- trunk/fs/ext4/xattr.c | 92 +- trunk/fs/ext4/xattr.h | 4 +- trunk/fs/fat/inode.c | 9 +- trunk/fs/fcntl.c | 42 +- trunk/fs/file_table.c | 17 +- trunk/fs/fuse/file.c | 4 +- trunk/fs/fuse/inode.c | 17 +- trunk/fs/gfs2/export.c | 17 +- trunk/fs/hpfs/alloc.c | 14 +- trunk/fs/hpfs/anode.c | 43 +- trunk/fs/hpfs/dir.c | 2 +- trunk/fs/hpfs/dnode.c | 10 +- trunk/fs/hpfs/ea.c | 60 +- trunk/fs/hpfs/hpfs.h | 289 +++--- trunk/fs/hpfs/hpfs_fn.h | 16 +- trunk/fs/hpfs/inode.c | 2 +- trunk/fs/hpfs/map.c | 20 +- trunk/fs/hpfs/namei.c | 2 +- trunk/fs/hpfs/super.c | 4 +- trunk/fs/inode.c | 124 +-- trunk/fs/internal.h | 3 +- trunk/fs/isofs/export.c | 13 +- trunk/fs/jbd2/Kconfig | 2 - trunk/fs/jbd2/commit.c | 70 +- trunk/fs/jbd2/journal.c | 132 +-- trunk/fs/jbd2/recovery.c | 126 +-- trunk/fs/jbd2/revoke.c | 27 +- trunk/fs/jbd2/transaction.c | 4 +- trunk/fs/jffs2/jffs2_fs_sb.h | 4 - trunk/fs/jffs2/os-linux.h | 7 +- trunk/fs/jffs2/super.c | 21 + trunk/fs/jffs2/wbuf.c | 55 +- trunk/fs/lockd/svc.c | 145 ++- trunk/fs/locks.c | 5 +- trunk/fs/namei.c | 177 +--- trunk/fs/namespace.c | 142 ++- trunk/fs/ncpfs/file.c | 6 +- trunk/fs/ncpfs/ncp_fs_sb.h | 10 +- trunk/fs/nfs/callback.c | 13 +- trunk/fs/nfs/dir.c | 56 +- trunk/fs/nfs/file.c | 77 +- trunk/fs/nfsd/auth.c | 2 +- trunk/fs/nfsd/export.c | 8 +- trunk/fs/nfsd/fault_inject.c | 1 - trunk/fs/nfsd/nfs4callback.c | 5 +- trunk/fs/nfsd/nfs4idmap.c | 4 +- trunk/fs/nfsd/nfs4recover.c | 4 +- trunk/fs/nfsd/nfs4state.c | 525 ++++++----- trunk/fs/nfsd/nfs4xdr.c | 62 +- trunk/fs/nfsd/nfsctl.c | 12 +- trunk/fs/nfsd/nfssvc.c | 23 - trunk/fs/nfsd/state.h | 1 + trunk/fs/nfsd/xdr4.h | 6 +- trunk/fs/nilfs2/namei.c | 22 +- trunk/fs/notify/fsnotify.c | 12 +- trunk/fs/ntfs/file.c | 4 +- trunk/fs/ocfs2/blockcheck.c | 42 +- trunk/fs/ocfs2/dlm/dlmast.c | 2 +- trunk/fs/ocfs2/dlm/dlmcommon.h | 6 +- trunk/fs/ocfs2/dlm/dlmdomain.c | 2 +- trunk/fs/ocfs2/export.c | 19 +- trunk/fs/ocfs2/inode.c | 13 +- trunk/fs/ocfs2/ioctl.c | 31 +- trunk/fs/ocfs2/move_extents.c | 6 +- trunk/fs/ocfs2/namei.c | 5 +- trunk/fs/ocfs2/symlink.c | 115 ++- trunk/fs/ocfs2/symlink.h | 2 +- trunk/fs/open.c | 76 +- trunk/fs/pipe.c | 7 +- trunk/fs/pnode.c | 4 +- trunk/fs/proc_namespace.c | 4 +- trunk/fs/readdir.c | 33 +- trunk/fs/reiserfs/inode.c | 30 +- trunk/fs/reiserfs/journal.c | 15 +- trunk/fs/reiserfs/reiserfs.h | 12 +- trunk/fs/reiserfs/resize.c | 1 + trunk/fs/reiserfs/super.c | 74 +- trunk/fs/select.c | 4 - trunk/fs/signalfd.c | 7 +- trunk/fs/splice.c | 6 +- trunk/fs/statfs.c | 5 +- trunk/fs/sync.c | 5 +- trunk/fs/ubifs/dir.c | 11 +- trunk/fs/udf/namei.c | 14 +- trunk/fs/utimes.c | 5 +- trunk/fs/xattr.c | 20 +- trunk/fs/xfs/kmem.c | 10 +- trunk/fs/xfs/kmem.h | 21 +- trunk/fs/xfs/xfs_export.c | 23 +- trunk/fs/xfs/xfs_file.c | 7 +- trunk/fs/xfs/xfs_log.c | 2 +- trunk/fs/xfs/xfs_log_priv.h | 2 +- trunk/fs/xfs/xfs_trans.c | 2 +- trunk/fs/xfs/xfs_trans.h | 2 +- trunk/include/asm-generic/posix_types.h | 4 + trunk/include/linux/errno.h | 1 - trunk/include/linux/exportfs.h | 4 +- trunk/include/linux/fs.h | 10 +- trunk/include/linux/fsnotify_backend.h | 2 +- trunk/include/linux/jbd2.h | 59 +- trunk/include/linux/jbd_common.h | 2 - trunk/include/linux/lglock.h | 179 +++- trunk/include/linux/mm.h | 2 +- trunk/include/linux/security.h | 40 +- trunk/include/linux/sunrpc/svc.h | 2 +- trunk/include/linux/sunrpc/svcauth.h | 10 - trunk/include/linux/sunrpc/svcauth_gss.h | 1 + trunk/include/linux/thread_info.h | 4 + trunk/include/linux/types.h | 2 +- trunk/ipc/shm.c | 7 +- trunk/kernel/Makefile | 2 +- trunk/kernel/lglock.c | 89 -- trunk/kernel/signal.c | 2 - trunk/mm/cleancache.c | 6 +- trunk/mm/filemap.c | 69 +- trunk/mm/filemap_xip.c | 4 +- trunk/mm/internal.h | 4 - trunk/mm/mmap.c | 54 +- trunk/mm/mremap.c | 26 +- trunk/mm/nommu.c | 35 +- trunk/mm/shmem.c | 6 +- trunk/mm/util.c | 30 - trunk/net/sched/sch_atm.c | 2 + trunk/net/sunrpc/auth_gss/gss_krb5_wrap.c | 61 +- trunk/net/sunrpc/auth_gss/svcauth_gss.c | 27 +- trunk/net/sunrpc/rpcb_clnt.c | 12 +- trunk/net/sunrpc/svc.c | 23 +- trunk/net/sunrpc/svc_xprt.c | 4 +- trunk/net/sunrpc/svcauth_unix.c | 6 +- trunk/security/apparmor/lsm.c | 15 +- trunk/security/capability.c | 3 +- trunk/security/commoncap.c | 17 +- trunk/security/security.c | 51 +- trunk/security/selinux/hooks.c | 15 +- trunk/security/selinux/selinuxfs.c | 36 +- trunk/security/smack/smack_lsm.c | 15 +- 233 files changed, 3121 insertions(+), 6665 deletions(-) delete mode 100644 trunk/kernel/lglock.c diff --git a/[refs] b/[refs] index 5abba0221cf2..02cea2bf5611 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 1193755ac6328ad240ba987e6ec41d5e8baf0680 +refs/heads/master: 754421c8cab1a568be844a7069fe04c1cf6391b8 diff --git a/trunk/Documentation/filesystems/Locking b/trunk/Documentation/filesystems/Locking index 8e2da1e06e3b..d449e632e6a0 100644 --- a/trunk/Documentation/filesystems/Locking +++ b/trunk/Documentation/filesystems/Locking @@ -61,7 +61,6 @@ ata *); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); - void (*update_time)(struct inode *, struct timespec *, int); locking rules: all may block @@ -88,8 +87,6 @@ getxattr: no listxattr: no removexattr: yes fiemap: no -update_time: no - Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on victim. cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. diff --git a/trunk/Documentation/filesystems/vfs.txt b/trunk/Documentation/filesystems/vfs.txt index efd23f481704..ef19f91a0f12 100644 --- a/trunk/Documentation/filesystems/vfs.txt +++ b/trunk/Documentation/filesystems/vfs.txt @@ -363,7 +363,6 @@ struct inode_operations { ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); - void (*update_time)(struct inode *, struct timespec *, int); }; Again, all methods are called without any locks being held, unless @@ -472,9 +471,6 @@ otherwise noted. removexattr: called by the VFS to remove an extended attribute from a file. This method is called by removexattr(2) system call. - update_time: called by the VFS to update a specific time or the i_version of - an inode. If this is not defined the VFS will update the inode itself - and call mark_inode_dirty_sync. The Address Space Object ======================== diff --git a/trunk/arch/alpha/include/asm/posix_types.h b/trunk/arch/alpha/include/asm/posix_types.h index 5a8a48320efe..24779fc95994 100644 --- a/trunk/arch/alpha/include/asm/posix_types.h +++ b/trunk/arch/alpha/include/asm/posix_types.h @@ -10,6 +10,9 @@ typedef unsigned int __kernel_ino_t; #define __kernel_ino_t __kernel_ino_t +typedef unsigned int __kernel_nlink_t; +#define __kernel_nlink_t __kernel_nlink_t + typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ #include diff --git a/trunk/arch/arm/include/asm/posix_types.h b/trunk/arch/arm/include/asm/posix_types.h index d2de9cbbcd9b..efdf99045d87 100644 --- a/trunk/arch/arm/include/asm/posix_types.h +++ b/trunk/arch/arm/include/asm/posix_types.h @@ -22,6 +22,9 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t +typedef unsigned short __kernel_nlink_t; +#define __kernel_nlink_t __kernel_nlink_t + typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/trunk/arch/avr32/include/asm/posix_types.h b/trunk/arch/avr32/include/asm/posix_types.h index 9ba9e749b3f3..74667bfc88cc 100644 --- a/trunk/arch/avr32/include/asm/posix_types.h +++ b/trunk/arch/avr32/include/asm/posix_types.h @@ -17,6 +17,9 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t +typedef unsigned short __kernel_nlink_t; +#define __kernel_nlink_t __kernel_nlink_t + typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/trunk/arch/blackfin/include/asm/posix_types.h b/trunk/arch/blackfin/include/asm/posix_types.h index 1bd3436db6a7..41bc1875c4d7 100644 --- a/trunk/arch/blackfin/include/asm/posix_types.h +++ b/trunk/arch/blackfin/include/asm/posix_types.h @@ -10,6 +10,9 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t +typedef unsigned short __kernel_nlink_t; +#define __kernel_nlink_t __kernel_nlink_t + typedef unsigned int __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/trunk/arch/cris/include/asm/posix_types.h b/trunk/arch/cris/include/asm/posix_types.h index ce4e51793151..234891c74e2b 100644 --- a/trunk/arch/cris/include/asm/posix_types.h +++ b/trunk/arch/cris/include/asm/posix_types.h @@ -15,6 +15,9 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t +typedef unsigned short __kernel_nlink_t; +#define __kernel_nlink_t __kernel_nlink_t + typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/trunk/arch/frv/include/asm/posix_types.h b/trunk/arch/frv/include/asm/posix_types.h index fe512af74a5a..3f34cb45fbb3 100644 --- a/trunk/arch/frv/include/asm/posix_types.h +++ b/trunk/arch/frv/include/asm/posix_types.h @@ -10,6 +10,9 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t +typedef unsigned short __kernel_nlink_t; +#define __kernel_nlink_t __kernel_nlink_t + typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/trunk/arch/h8300/include/asm/posix_types.h b/trunk/arch/h8300/include/asm/posix_types.h index 91e62ba4c7b0..bc4c34efb1ad 100644 --- a/trunk/arch/h8300/include/asm/posix_types.h +++ b/trunk/arch/h8300/include/asm/posix_types.h @@ -10,6 +10,9 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t +typedef unsigned short __kernel_nlink_t; +#define __kernel_nlink_t __kernel_nlink_t + typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/trunk/arch/ia64/include/asm/posix_types.h b/trunk/arch/ia64/include/asm/posix_types.h index 99ee1d6510cf..7323ab9467eb 100644 --- a/trunk/arch/ia64/include/asm/posix_types.h +++ b/trunk/arch/ia64/include/asm/posix_types.h @@ -1,6 +1,9 @@ #ifndef _ASM_IA64_POSIX_TYPES_H #define _ASM_IA64_POSIX_TYPES_H +typedef unsigned int __kernel_nlink_t; +#define __kernel_nlink_t __kernel_nlink_t + typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ #include diff --git a/trunk/arch/ia64/kernel/perfmon.c b/trunk/arch/ia64/kernel/perfmon.c index d7f558c1e711..f00ba025375d 100644 --- a/trunk/arch/ia64/kernel/perfmon.c +++ b/trunk/arch/ia64/kernel/perfmon.c @@ -604,6 +604,12 @@ pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f) spin_unlock(&(x)->ctx_lock); } +static inline unsigned long +pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, unsigned long exec) +{ + return get_unmapped_area(file, addr, len, pgoff, flags); +} + /* forward declaration */ static const struct dentry_operations pfmfs_dentry_operations; @@ -2327,8 +2333,8 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t down_write(&task->mm->mmap_sem); /* find some free area in address space, must have mmap sem held */ - vma->vm_start = get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS); - if (IS_ERR_VALUE(vma->vm_start)) { + vma->vm_start = pfm_get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS, 0); + if (vma->vm_start == 0UL) { DPRINT(("Cannot find unmapped area for size %ld\n", size)); up_write(&task->mm->mmap_sem); goto error; diff --git a/trunk/arch/ia64/kernel/sys_ia64.c b/trunk/arch/ia64/kernel/sys_ia64.c index d9439ef2f661..609d50056a6c 100644 --- a/trunk/arch/ia64/kernel/sys_ia64.c +++ b/trunk/arch/ia64/kernel/sys_ia64.c @@ -171,9 +171,22 @@ asmlinkage unsigned long ia64_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr) { - addr = sys_mremap(addr, old_len, new_len, flags, new_addr); - if (!IS_ERR((void *) addr)) - force_successful_syscall_return(); + extern unsigned long do_mremap (unsigned long addr, + unsigned long old_len, + unsigned long new_len, + unsigned long flags, + unsigned long new_addr); + + down_write(¤t->mm->mmap_sem); + { + addr = do_mremap(addr, old_len, new_len, flags, new_addr); + } + up_write(¤t->mm->mmap_sem); + + if (IS_ERR((void *) addr)) + return addr; + + force_successful_syscall_return(); return addr; } diff --git a/trunk/arch/m32r/include/asm/posix_types.h b/trunk/arch/m32r/include/asm/posix_types.h index 236de26a409b..0195850e1f88 100644 --- a/trunk/arch/m32r/include/asm/posix_types.h +++ b/trunk/arch/m32r/include/asm/posix_types.h @@ -10,6 +10,9 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t +typedef unsigned short __kernel_nlink_t; +#define __kernel_nlink_t __kernel_nlink_t + typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/trunk/arch/m68k/include/asm/posix_types.h b/trunk/arch/m68k/include/asm/posix_types.h index cf4dbf70fdc7..6373093be72b 100644 --- a/trunk/arch/m68k/include/asm/posix_types.h +++ b/trunk/arch/m68k/include/asm/posix_types.h @@ -10,6 +10,9 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t +typedef unsigned short __kernel_nlink_t; +#define __kernel_nlink_t __kernel_nlink_t + typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/trunk/arch/mips/include/asm/posix_types.h b/trunk/arch/mips/include/asm/posix_types.h index fa03ec3fbf89..e0308dcca135 100644 --- a/trunk/arch/mips/include/asm/posix_types.h +++ b/trunk/arch/mips/include/asm/posix_types.h @@ -17,6 +17,11 @@ * assume GCC is being used. */ +#if (_MIPS_SZLONG == 64) +typedef unsigned int __kernel_nlink_t; +#define __kernel_nlink_t __kernel_nlink_t +#endif + typedef long __kernel_daddr_t; #define __kernel_daddr_t __kernel_daddr_t diff --git a/trunk/arch/mips/include/asm/stat.h b/trunk/arch/mips/include/asm/stat.h index fe9a4c3ec5a1..6e00f751ab6d 100644 --- a/trunk/arch/mips/include/asm/stat.h +++ b/trunk/arch/mips/include/asm/stat.h @@ -20,7 +20,7 @@ struct stat { long st_pad1[3]; /* Reserved for network id */ ino_t st_ino; mode_t st_mode; - __u32 st_nlink; + nlink_t st_nlink; uid_t st_uid; gid_t st_gid; unsigned st_rdev; @@ -55,7 +55,7 @@ struct stat64 { unsigned long long st_ino; mode_t st_mode; - __u32 st_nlink; + nlink_t st_nlink; uid_t st_uid; gid_t st_gid; @@ -96,7 +96,7 @@ struct stat { unsigned long st_ino; mode_t st_mode; - __u32 st_nlink; + nlink_t st_nlink; uid_t st_uid; gid_t st_gid; diff --git a/trunk/arch/mn10300/include/asm/posix_types.h b/trunk/arch/mn10300/include/asm/posix_types.h index d31eeea480cf..ab506181ec31 100644 --- a/trunk/arch/mn10300/include/asm/posix_types.h +++ b/trunk/arch/mn10300/include/asm/posix_types.h @@ -20,6 +20,9 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t +typedef unsigned short __kernel_nlink_t; +#define __kernel_nlink_t __kernel_nlink_t + typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/trunk/arch/parisc/include/asm/posix_types.h b/trunk/arch/parisc/include/asm/posix_types.h index b9344256f76b..5212b0357daf 100644 --- a/trunk/arch/parisc/include/asm/posix_types.h +++ b/trunk/arch/parisc/include/asm/posix_types.h @@ -10,6 +10,9 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t +typedef unsigned short __kernel_nlink_t; +#define __kernel_nlink_t __kernel_nlink_t + typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/trunk/arch/parisc/include/asm/stat.h b/trunk/arch/parisc/include/asm/stat.h index d76fbda5d62c..9d5fbbc5c31f 100644 --- a/trunk/arch/parisc/include/asm/stat.h +++ b/trunk/arch/parisc/include/asm/stat.h @@ -7,7 +7,7 @@ struct stat { unsigned int st_dev; /* dev_t is 32 bits on parisc */ ino_t st_ino; /* 32 bits */ mode_t st_mode; /* 16 bits */ - unsigned short st_nlink; /* 16 bits */ + nlink_t st_nlink; /* 16 bits */ unsigned short st_reserved1; /* old st_uid */ unsigned short st_reserved2; /* old st_gid */ unsigned int st_rdev; @@ -42,7 +42,7 @@ struct hpux_stat64 { unsigned int st_dev; /* dev_t is 32 bits on parisc */ ino_t st_ino; /* 32 bits */ mode_t st_mode; /* 16 bits */ - unsigned short st_nlink; /* 16 bits */ + nlink_t st_nlink; /* 16 bits */ unsigned short st_reserved1; /* old st_uid */ unsigned short st_reserved2; /* old st_gid */ unsigned int st_rdev; diff --git a/trunk/arch/powerpc/include/asm/posix_types.h b/trunk/arch/powerpc/include/asm/posix_types.h index 2958c5b97b2d..f1393252bbda 100644 --- a/trunk/arch/powerpc/include/asm/posix_types.h +++ b/trunk/arch/powerpc/include/asm/posix_types.h @@ -16,6 +16,9 @@ typedef int __kernel_ssize_t; typedef long __kernel_ptrdiff_t; #define __kernel_size_t __kernel_size_t +typedef unsigned short __kernel_nlink_t; +#define __kernel_nlink_t __kernel_nlink_t + typedef short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t #endif diff --git a/trunk/arch/powerpc/include/asm/stat.h b/trunk/arch/powerpc/include/asm/stat.h index 10cfb558e0fd..e4edc510b530 100644 --- a/trunk/arch/powerpc/include/asm/stat.h +++ b/trunk/arch/powerpc/include/asm/stat.h @@ -30,11 +30,11 @@ struct stat { unsigned long st_dev; ino_t st_ino; #ifdef __powerpc64__ - unsigned short st_nlink; + nlink_t st_nlink; mode_t st_mode; #else mode_t st_mode; - unsigned short st_nlink; + nlink_t st_nlink; #endif uid_t st_uid; gid_t st_gid; diff --git a/trunk/arch/s390/include/asm/posix_types.h b/trunk/arch/s390/include/asm/posix_types.h index 7be104c0f192..edf8527ff08d 100644 --- a/trunk/arch/s390/include/asm/posix_types.h +++ b/trunk/arch/s390/include/asm/posix_types.h @@ -24,6 +24,7 @@ typedef unsigned short __kernel_old_dev_t; typedef unsigned long __kernel_ino_t; typedef unsigned short __kernel_mode_t; +typedef unsigned short __kernel_nlink_t; typedef unsigned short __kernel_ipc_pid_t; typedef unsigned short __kernel_uid_t; typedef unsigned short __kernel_gid_t; @@ -34,6 +35,7 @@ typedef int __kernel_ptrdiff_t; typedef unsigned int __kernel_ino_t; typedef unsigned int __kernel_mode_t; +typedef unsigned int __kernel_nlink_t; typedef int __kernel_ipc_pid_t; typedef unsigned int __kernel_uid_t; typedef unsigned int __kernel_gid_t; @@ -45,6 +47,7 @@ typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ #define __kernel_ino_t __kernel_ino_t #define __kernel_mode_t __kernel_mode_t +#define __kernel_nlink_t __kernel_nlink_t #define __kernel_ipc_pid_t __kernel_ipc_pid_t #define __kernel_uid_t __kernel_uid_t #define __kernel_gid_t __kernel_gid_t diff --git a/trunk/arch/sh/include/asm/posix_types_32.h b/trunk/arch/sh/include/asm/posix_types_32.h index ba0bdc423b07..abda58467ece 100644 --- a/trunk/arch/sh/include/asm/posix_types_32.h +++ b/trunk/arch/sh/include/asm/posix_types_32.h @@ -3,6 +3,8 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t +typedef unsigned short __kernel_nlink_t; +#define __kernel_nlink_t __kernel_nlink_t typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t typedef unsigned short __kernel_uid_t; diff --git a/trunk/arch/sh/include/asm/posix_types_64.h b/trunk/arch/sh/include/asm/posix_types_64.h index 244f7e950e17..fcda07b4a616 100644 --- a/trunk/arch/sh/include/asm/posix_types_64.h +++ b/trunk/arch/sh/include/asm/posix_types_64.h @@ -3,6 +3,8 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t +typedef unsigned short __kernel_nlink_t; +#define __kernel_nlink_t __kernel_nlink_t typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t typedef unsigned short __kernel_uid_t; diff --git a/trunk/arch/sparc/include/asm/posix_types.h b/trunk/arch/sparc/include/asm/posix_types.h index 156220ed99eb..3070f25ae90a 100644 --- a/trunk/arch/sparc/include/asm/posix_types.h +++ b/trunk/arch/sparc/include/asm/posix_types.h @@ -9,6 +9,8 @@ #if defined(__sparc__) && defined(__arch64__) /* sparc 64 bit */ +typedef unsigned int __kernel_nlink_t; +#define __kernel_nlink_t __kernel_nlink_t typedef unsigned short __kernel_old_uid_t; typedef unsigned short __kernel_old_gid_t; @@ -36,6 +38,9 @@ typedef unsigned short __kernel_gid_t; typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t +typedef short __kernel_nlink_t; +#define __kernel_nlink_t __kernel_nlink_t + typedef long __kernel_daddr_t; #define __kernel_daddr_t __kernel_daddr_t diff --git a/trunk/arch/sparc/kernel/sys_sparc_64.c b/trunk/arch/sparc/kernel/sys_sparc_64.c index 275f74fd6f6a..3ee51f189a55 100644 --- a/trunk/arch/sparc/kernel/sys_sparc_64.c +++ b/trunk/arch/sparc/kernel/sys_sparc_64.c @@ -580,9 +580,16 @@ SYSCALL_DEFINE5(64_mremap, unsigned long, addr, unsigned long, old_len, unsigned long, new_len, unsigned long, flags, unsigned long, new_addr) { + unsigned long ret = -EINVAL; + if (test_thread_flag(TIF_32BIT)) - return -EINVAL; - return sys_mremap(addr, old_len, new_len, flags, new_addr); + goto out; + + down_write(¤t->mm->mmap_sem); + ret = do_mremap(addr, old_len, new_len, flags, new_addr); + up_write(¤t->mm->mmap_sem); +out: + return ret; } /* we come to here via sys_nis_syscall so it can setup the regs argument */ diff --git a/trunk/arch/tile/include/asm/compat.h b/trunk/arch/tile/include/asm/compat.h index 6e74450ff0a1..69adc08d36a5 100644 --- a/trunk/arch/tile/include/asm/compat.h +++ b/trunk/arch/tile/include/asm/compat.h @@ -44,6 +44,7 @@ typedef __kernel_uid32_t __compat_gid32_t; typedef __kernel_mode_t compat_mode_t; typedef __kernel_dev_t compat_dev_t; typedef __kernel_loff_t compat_loff_t; +typedef __kernel_nlink_t compat_nlink_t; typedef __kernel_ipc_pid_t compat_ipc_pid_t; typedef __kernel_daddr_t compat_daddr_t; typedef __kernel_fsid_t compat_fsid_t; diff --git a/trunk/arch/x86/include/asm/posix_types_32.h b/trunk/arch/x86/include/asm/posix_types_32.h index 8e525059e7d8..99f262e04b91 100644 --- a/trunk/arch/x86/include/asm/posix_types_32.h +++ b/trunk/arch/x86/include/asm/posix_types_32.h @@ -10,6 +10,9 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t +typedef unsigned short __kernel_nlink_t; +#define __kernel_nlink_t __kernel_nlink_t + typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/trunk/drivers/base/soc.c b/trunk/drivers/base/soc.c index 72b5e7280d14..ba29b2e73d48 100644 --- a/trunk/drivers/base/soc.c +++ b/trunk/drivers/base/soc.c @@ -42,7 +42,7 @@ struct device *soc_device_to_device(struct soc_device *soc_dev) return &soc_dev->dev; } -static umode_t soc_attribute_mode(struct kobject *kobj, +static mode_t soc_attribute_mode(struct kobject *kobj, struct attribute *attr, int index) { diff --git a/trunk/drivers/gpu/drm/i810/i810_dma.c b/trunk/drivers/gpu/drm/i810/i810_dma.c index fa9439159ebd..f920fb5e42b6 100644 --- a/trunk/drivers/gpu/drm/i810/i810_dma.c +++ b/trunk/drivers/gpu/drm/i810/i810_dma.c @@ -130,10 +130,11 @@ static int i810_map_buffer(struct drm_buf *buf, struct drm_file *file_priv) return -EINVAL; /* This is all entirely broken */ + down_write(¤t->mm->mmap_sem); old_fops = file_priv->filp->f_op; file_priv->filp->f_op = &i810_buffer_fops; dev_priv->mmap_buffer = buf; - buf_priv->virtual = (void *)vm_mmap(file_priv->filp, 0, buf->total, + buf_priv->virtual = (void *)do_mmap(file_priv->filp, 0, buf->total, PROT_READ | PROT_WRITE, MAP_SHARED, buf->bus_address); dev_priv->mmap_buffer = NULL; @@ -144,6 +145,7 @@ static int i810_map_buffer(struct drm_buf *buf, struct drm_file *file_priv) retcode = PTR_ERR(buf_priv->virtual); buf_priv->virtual = NULL; } + up_write(¤t->mm->mmap_sem); return retcode; } diff --git a/trunk/fs/9p/vfs_inode_dotl.c b/trunk/fs/9p/vfs_inode_dotl.c index e3dd2a1e2bfc..a1e6c990cd41 100644 --- a/trunk/fs/9p/vfs_inode_dotl.c +++ b/trunk/fs/9p/vfs_inode_dotl.c @@ -68,6 +68,24 @@ static gid_t v9fs_get_fsgid_for_create(struct inode *dir_inode) return current_fsgid(); } +/** + * v9fs_dentry_from_dir_inode - helper function to get the dentry from + * dir inode. + * + */ + +static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode) +{ + struct dentry *dentry; + + spin_lock(&inode->i_lock); + /* Directory should have only one entry. */ + BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry)); + dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias); + spin_unlock(&inode->i_lock); + return dentry; +} + static int v9fs_test_inode_dotl(struct inode *inode, void *data) { struct v9fs_inode *v9inode = V9FS_I(inode); @@ -397,7 +415,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, if (dir->i_mode & S_ISGID) omode |= S_ISGID; - dir_dentry = dentry->d_parent; + dir_dentry = v9fs_dentry_from_dir_inode(dir); dfid = v9fs_fid_lookup(dir_dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); @@ -775,7 +793,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, dir->i_ino, old_dentry->d_name.name, dentry->d_name.name); v9ses = v9fs_inode2v9ses(dir); - dir_dentry = dentry->d_parent; + dir_dentry = v9fs_dentry_from_dir_inode(dir); dfid = v9fs_fid_lookup(dir_dentry); if (IS_ERR(dfid)) return PTR_ERR(dfid); @@ -840,7 +858,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, return -EINVAL; v9ses = v9fs_inode2v9ses(dir); - dir_dentry = dentry->d_parent; + dir_dentry = v9fs_dentry_from_dir_inode(dir); dfid = v9fs_fid_lookup(dir_dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); diff --git a/trunk/fs/affs/affs.h b/trunk/fs/affs/affs.h index 1fceb320d2f2..45a0ce45d7b4 100644 --- a/trunk/fs/affs/affs.h +++ b/trunk/fs/affs/affs.h @@ -18,6 +18,14 @@ #define AFFS_GET_HASHENTRY(data,hashkey) be32_to_cpu(((struct dir_front *)data)->hashtable[hashkey]) #define AFFS_BLOCK(sb, bh, blk) (AFFS_HEAD(bh)->table[AFFS_SB(sb)->s_hashsize-1-(blk)]) +#ifdef __LITTLE_ENDIAN +#define BO_EXBITS 0x18UL +#elif defined(__BIG_ENDIAN) +#define BO_EXBITS 0x00UL +#else +#error Endianness must be known for affs to work. +#endif + #define AFFS_HEAD(bh) ((struct affs_head *)(bh)->b_data) #define AFFS_TAIL(sb, bh) ((struct affs_tail *)((bh)->b_data+(sb)->s_blocksize-sizeof(struct affs_tail))) #define AFFS_ROOT_HEAD(bh) ((struct affs_root_head *)(bh)->b_data) diff --git a/trunk/fs/aio.c b/trunk/fs/aio.c index 55c4c7656053..8c7c8b805372 100644 --- a/trunk/fs/aio.c +++ b/trunk/fs/aio.c @@ -134,9 +134,9 @@ static int aio_setup_ring(struct kioctx *ctx) info->mmap_size = nr_pages * PAGE_SIZE; dprintk("attempting mmap of %lu bytes\n", info->mmap_size); down_write(&ctx->mm->mmap_sem); - info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size, - PROT_READ|PROT_WRITE, - MAP_ANONYMOUS|MAP_PRIVATE, 0); + info->mmap_base = do_mmap(NULL, 0, info->mmap_size, + PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, + 0); if (IS_ERR((void *)info->mmap_base)) { up_write(&ctx->mm->mmap_sem); info->mmap_size = 0; diff --git a/trunk/fs/attr.c b/trunk/fs/attr.c index 0da90951d277..584620e5dee5 100644 --- a/trunk/fs/attr.c +++ b/trunk/fs/attr.c @@ -176,11 +176,6 @@ int notify_change(struct dentry * dentry, struct iattr * attr) return -EPERM; } - if ((ia_valid & ATTR_SIZE) && IS_I_VERSION(inode)) { - if (attr->ia_size != inode->i_size) - inode_inc_iversion(inode); - } - if ((ia_valid & ATTR_MODE)) { umode_t amode = attr->ia_mode; /* Flag setting protected by i_mutex */ diff --git a/trunk/fs/binfmt_elf.c b/trunk/fs/binfmt_elf.c index 1b52956afe33..e658dd134b95 100644 --- a/trunk/fs/binfmt_elf.c +++ b/trunk/fs/binfmt_elf.c @@ -329,6 +329,7 @@ static unsigned long elf_map(struct file *filep, unsigned long addr, if (!size) return addr; + down_write(¤t->mm->mmap_sem); /* * total_size is the size of the ELF (interpreter) image. * The _first_ mmap needs to know the full size, otherwise @@ -339,12 +340,13 @@ static unsigned long elf_map(struct file *filep, unsigned long addr, */ if (total_size) { total_size = ELF_PAGEALIGN(total_size); - map_addr = vm_mmap(filep, addr, total_size, prot, type, off); + map_addr = do_mmap(filep, addr, total_size, prot, type, off); if (!BAD_ADDR(map_addr)) - vm_munmap(map_addr+size, total_size-size); + do_munmap(current->mm, map_addr+size, total_size-size); } else - map_addr = vm_mmap(filep, addr, size, prot, type, off); + map_addr = do_mmap(filep, addr, size, prot, type, off); + up_write(¤t->mm->mmap_sem); return(map_addr); } diff --git a/trunk/fs/binfmt_flat.c b/trunk/fs/binfmt_flat.c index 178cb70acc26..6b2daf99fab8 100644 --- a/trunk/fs/binfmt_flat.c +++ b/trunk/fs/binfmt_flat.c @@ -562,7 +562,7 @@ static int load_flat_file(struct linux_binprm * bprm, realdatastart = (unsigned long) -ENOMEM; printk("Unable to allocate RAM for process data, errno %d\n", (int)-realdatastart); - vm_munmap(textpos, text_len); + do_munmap(current->mm, textpos, text_len); ret = realdatastart; goto err; } @@ -586,8 +586,8 @@ static int load_flat_file(struct linux_binprm * bprm, } if (IS_ERR_VALUE(result)) { printk("Unable to read data+bss, errno %d\n", (int)-result); - vm_munmap(textpos, text_len); - vm_munmap(realdatastart, len); + do_munmap(current->mm, textpos, text_len); + do_munmap(current->mm, realdatastart, len); ret = result; goto err; } @@ -654,7 +654,7 @@ static int load_flat_file(struct linux_binprm * bprm, } if (IS_ERR_VALUE(result)) { printk("Unable to read code+data+bss, errno %d\n",(int)-result); - vm_munmap(textpos, text_len + data_len + extra + + do_munmap(current->mm, textpos, text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long)); ret = result; goto err; diff --git a/trunk/fs/btrfs/acl.c b/trunk/fs/btrfs/acl.c index 761e2cd8fed1..89b156d85d63 100644 --- a/trunk/fs/btrfs/acl.c +++ b/trunk/fs/btrfs/acl.c @@ -227,11 +227,7 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans, if (ret > 0) { /* we need an acl */ ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS); - } else { - cache_no_acl(inode); } - } else { - cache_no_acl(inode); } failed: posix_acl_release(acl); diff --git a/trunk/fs/btrfs/backref.c b/trunk/fs/btrfs/backref.c index 3f75895c919b..bcec06750232 100644 --- a/trunk/fs/btrfs/backref.c +++ b/trunk/fs/btrfs/backref.c @@ -24,135 +24,22 @@ #include "delayed-ref.h" #include "locking.h" -struct extent_inode_elem { - u64 inum; - u64 offset; - struct extent_inode_elem *next; -}; - -static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb, - struct btrfs_file_extent_item *fi, - u64 extent_item_pos, - struct extent_inode_elem **eie) -{ - u64 data_offset; - u64 data_len; - struct extent_inode_elem *e; - - data_offset = btrfs_file_extent_offset(eb, fi); - data_len = btrfs_file_extent_num_bytes(eb, fi); - - if (extent_item_pos < data_offset || - extent_item_pos >= data_offset + data_len) - return 1; - - e = kmalloc(sizeof(*e), GFP_NOFS); - if (!e) - return -ENOMEM; - - e->next = *eie; - e->inum = key->objectid; - e->offset = key->offset + (extent_item_pos - data_offset); - *eie = e; - - return 0; -} - -static int find_extent_in_eb(struct extent_buffer *eb, u64 wanted_disk_byte, - u64 extent_item_pos, - struct extent_inode_elem **eie) -{ - u64 disk_byte; - struct btrfs_key key; - struct btrfs_file_extent_item *fi; - int slot; - int nritems; - int extent_type; - int ret; - - /* - * from the shared data ref, we only have the leaf but we need - * the key. thus, we must look into all items and see that we - * find one (some) with a reference to our extent item. - */ - nritems = btrfs_header_nritems(eb); - for (slot = 0; slot < nritems; ++slot) { - btrfs_item_key_to_cpu(eb, &key, slot); - if (key.type != BTRFS_EXTENT_DATA_KEY) - continue; - fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); - extent_type = btrfs_file_extent_type(eb, fi); - if (extent_type == BTRFS_FILE_EXTENT_INLINE) - continue; - /* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */ - disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); - if (disk_byte != wanted_disk_byte) - continue; - - ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie); - if (ret < 0) - return ret; - } - - return 0; -} - /* * this structure records all encountered refs on the way up to the root */ struct __prelim_ref { struct list_head list; u64 root_id; - struct btrfs_key key_for_search; + struct btrfs_key key; int level; int count; - struct extent_inode_elem *inode_list; u64 parent; u64 wanted_disk_byte; }; -/* - * the rules for all callers of this function are: - * - obtaining the parent is the goal - * - if you add a key, you must know that it is a correct key - * - if you cannot add the parent or a correct key, then we will look into the - * block later to set a correct key - * - * delayed refs - * ============ - * backref type | shared | indirect | shared | indirect - * information | tree | tree | data | data - * --------------------+--------+----------+--------+---------- - * parent logical | y | - | - | - - * key to resolve | - | y | y | y - * tree block logical | - | - | - | - - * root for resolving | y | y | y | y - * - * - column 1: we've the parent -> done - * - column 2, 3, 4: we use the key to find the parent - * - * on disk refs (inline or keyed) - * ============================== - * backref type | shared | indirect | shared | indirect - * information | tree | tree | data | data - * --------------------+--------+----------+--------+---------- - * parent logical | y | - | y | - - * key to resolve | - | - | - | y - * tree block logical | y | y | y | y - * root for resolving | - | y | y | y - * - * - column 1, 3: we've the parent -> done - * - column 2: we take the first key from the block to find the parent - * (see __add_missing_keys) - * - column 4: we use the key to find the parent - * - * additional information that's available but not required to find the parent - * block might help in merging entries to gain some speed. - */ - static int __add_prelim_ref(struct list_head *head, u64 root_id, - struct btrfs_key *key, int level, - u64 parent, u64 wanted_disk_byte, int count) + struct btrfs_key *key, int level, u64 parent, + u64 wanted_disk_byte, int count) { struct __prelim_ref *ref; @@ -163,11 +50,10 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, ref->root_id = root_id; if (key) - ref->key_for_search = *key; + ref->key = *key; else - memset(&ref->key_for_search, 0, sizeof(ref->key_for_search)); + memset(&ref->key, 0, sizeof(ref->key)); - ref->inode_list = NULL; ref->level = level; ref->count = count; ref->parent = parent; @@ -178,26 +64,18 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, } static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, - struct ulist *parents, int level, - struct btrfs_key *key, u64 wanted_disk_byte, - const u64 *extent_item_pos) + struct ulist *parents, + struct extent_buffer *eb, int level, + u64 wanted_objectid, u64 wanted_disk_byte) { int ret; - int slot = path->slots[level]; - struct extent_buffer *eb = path->nodes[level]; + int slot; struct btrfs_file_extent_item *fi; - struct extent_inode_elem *eie = NULL; + struct btrfs_key key; u64 disk_byte; - u64 wanted_objectid = key->objectid; add_parent: - if (level == 0 && extent_item_pos) { - fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); - ret = check_extent_in_eb(key, eb, fi, *extent_item_pos, &eie); - if (ret < 0) - return ret; - } - ret = ulist_add(parents, eb->start, (unsigned long)eie, GFP_NOFS); + ret = ulist_add(parents, eb->start, 0, GFP_NOFS); if (ret < 0) return ret; @@ -211,7 +89,6 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, * repeat this until we don't find any additional EXTENT_DATA items. */ while (1) { - eie = NULL; ret = btrfs_next_leaf(root, path); if (ret < 0) return ret; @@ -220,9 +97,9 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, eb = path->nodes[0]; for (slot = 0; slot < btrfs_header_nritems(eb); ++slot) { - btrfs_item_key_to_cpu(eb, key, slot); - if (key->objectid != wanted_objectid || - key->type != BTRFS_EXTENT_DATA_KEY) + btrfs_item_key_to_cpu(eb, &key, slot); + if (key.objectid != wanted_objectid || + key.type != BTRFS_EXTENT_DATA_KEY) return 0; fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); @@ -241,10 +118,8 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, */ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, int search_commit_root, - u64 time_seq, struct __prelim_ref *ref, - struct ulist *parents, - const u64 *extent_item_pos) + struct ulist *parents) { struct btrfs_path *path; struct btrfs_root *root; @@ -277,13 +152,12 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, goto out; path->lowest_level = level; - ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq); + ret = btrfs_search_slot(NULL, root, &ref->key, path, 0, 0); pr_debug("search slot in root %llu (level %d, ref count %d) returned " "%d for key (%llu %u %llu)\n", (unsigned long long)ref->root_id, level, ref->count, ret, - (unsigned long long)ref->key_for_search.objectid, - ref->key_for_search.type, - (unsigned long long)ref->key_for_search.offset); + (unsigned long long)ref->key.objectid, ref->key.type, + (unsigned long long)ref->key.offset); if (ret < 0) goto out; @@ -305,8 +179,9 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, btrfs_item_key_to_cpu(eb, &key, path->slots[0]); } - ret = add_all_parents(root, path, parents, level, &key, - ref->wanted_disk_byte, extent_item_pos); + /* the last two parameters will only be used for level == 0 */ + ret = add_all_parents(root, path, parents, eb, level, key.objectid, + ref->wanted_disk_byte); out: btrfs_free_path(path); return ret; @@ -316,9 +191,8 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, * resolve all indirect backrefs from the list */ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, - int search_commit_root, u64 time_seq, - struct list_head *head, - const u64 *extent_item_pos) + int search_commit_root, + struct list_head *head) { int err; int ret = 0; @@ -327,7 +201,6 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, struct __prelim_ref *new_ref; struct ulist *parents; struct ulist_node *node; - struct ulist_iterator uiter; parents = ulist_alloc(GFP_NOFS); if (!parents) @@ -344,8 +217,7 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, if (ref->count == 0) continue; err = __resolve_indirect_ref(fs_info, search_commit_root, - time_seq, ref, parents, - extent_item_pos); + ref, parents); if (err) { if (ret == 0) ret = err; @@ -353,14 +225,11 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, } /* we put the first parent into the ref at hand */ - ULIST_ITER_INIT(&uiter); - node = ulist_next(parents, &uiter); + node = ulist_next(parents, NULL); ref->parent = node ? node->val : 0; - ref->inode_list = - node ? (struct extent_inode_elem *)node->aux : 0; /* additional parents require new refs being added here */ - while ((node = ulist_next(parents, &uiter))) { + while ((node = ulist_next(parents, node))) { new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS); if (!new_ref) { ret = -ENOMEM; @@ -368,8 +237,6 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, } memcpy(new_ref, ref, sizeof(*ref)); new_ref->parent = node->val; - new_ref->inode_list = - (struct extent_inode_elem *)node->aux; list_add(&new_ref->list, &ref->list); } ulist_reinit(parents); @@ -379,65 +246,10 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, return ret; } -static inline int ref_for_same_block(struct __prelim_ref *ref1, - struct __prelim_ref *ref2) -{ - if (ref1->level != ref2->level) - return 0; - if (ref1->root_id != ref2->root_id) - return 0; - if (ref1->key_for_search.type != ref2->key_for_search.type) - return 0; - if (ref1->key_for_search.objectid != ref2->key_for_search.objectid) - return 0; - if (ref1->key_for_search.offset != ref2->key_for_search.offset) - return 0; - if (ref1->parent != ref2->parent) - return 0; - - return 1; -} - -/* - * read tree blocks and add keys where required. - */ -static int __add_missing_keys(struct btrfs_fs_info *fs_info, - struct list_head *head) -{ - struct list_head *pos; - struct extent_buffer *eb; - - list_for_each(pos, head) { - struct __prelim_ref *ref; - ref = list_entry(pos, struct __prelim_ref, list); - - if (ref->parent) - continue; - if (ref->key_for_search.type) - continue; - BUG_ON(!ref->wanted_disk_byte); - eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte, - fs_info->tree_root->leafsize, 0); - BUG_ON(!eb); - btrfs_tree_read_lock(eb); - if (btrfs_header_level(eb) == 0) - btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0); - else - btrfs_node_key_to_cpu(eb, &ref->key_for_search, 0); - btrfs_tree_read_unlock(eb); - free_extent_buffer(eb); - } - return 0; -} - /* * merge two lists of backrefs and adjust counts accordingly * * mode = 1: merge identical keys, if key is set - * FIXME: if we add more keys in __add_prelim_ref, we can merge more here. - * additionally, we could even add a key range for the blocks we - * looked into to merge even more (-> replace unresolved refs by those - * having a parent). * mode = 2: merge identical parents */ static int __merge_refs(struct list_head *head, int mode) @@ -451,21 +263,20 @@ static int __merge_refs(struct list_head *head, int mode) ref1 = list_entry(pos1, struct __prelim_ref, list); + if (mode == 1 && ref1->key.type == 0) + continue; for (pos2 = pos1->next, n2 = pos2->next; pos2 != head; pos2 = n2, n2 = pos2->next) { struct __prelim_ref *ref2; - struct __prelim_ref *xchg; ref2 = list_entry(pos2, struct __prelim_ref, list); if (mode == 1) { - if (!ref_for_same_block(ref1, ref2)) + if (memcmp(&ref1->key, &ref2->key, + sizeof(ref1->key)) || + ref1->level != ref2->level || + ref1->root_id != ref2->root_id) continue; - if (!ref1->parent && ref2->parent) { - xchg = ref1; - ref1 = ref2; - ref2 = xchg; - } ref1->count += ref2->count; } else { if (ref1->parent != ref2->parent) @@ -485,17 +296,16 @@ static int __merge_refs(struct list_head *head, int mode) * smaller or equal that seq to the list */ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, + struct btrfs_key *info_key, struct list_head *prefs) { struct btrfs_delayed_extent_op *extent_op = head->extent_op; struct rb_node *n = &head->node.rb_node; - struct btrfs_key key; - struct btrfs_key op_key = {0}; int sgn; int ret = 0; if (extent_op && extent_op->update_key) - btrfs_disk_key_to_cpu(&op_key, &extent_op->key); + btrfs_disk_key_to_cpu(info_key, &extent_op->key); while ((n = rb_prev(n))) { struct btrfs_delayed_ref_node *node; @@ -527,7 +337,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, struct btrfs_delayed_tree_ref *ref; ref = btrfs_delayed_node_to_tree_ref(node); - ret = __add_prelim_ref(prefs, ref->root, &op_key, + ret = __add_prelim_ref(prefs, ref->root, info_key, ref->level + 1, 0, node->bytenr, node->ref_mod * sgn); break; @@ -536,7 +346,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, struct btrfs_delayed_tree_ref *ref; ref = btrfs_delayed_node_to_tree_ref(node); - ret = __add_prelim_ref(prefs, ref->root, NULL, + ret = __add_prelim_ref(prefs, ref->root, info_key, ref->level + 1, ref->parent, node->bytenr, node->ref_mod * sgn); @@ -544,6 +354,8 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, } case BTRFS_EXTENT_DATA_REF_KEY: { struct btrfs_delayed_data_ref *ref; + struct btrfs_key key; + ref = btrfs_delayed_node_to_data_ref(node); key.objectid = ref->objectid; @@ -556,6 +368,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, } case BTRFS_SHARED_DATA_REF_KEY: { struct btrfs_delayed_data_ref *ref; + struct btrfs_key key; ref = btrfs_delayed_node_to_data_ref(node); @@ -581,7 +394,8 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, */ static int __add_inline_refs(struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 bytenr, - int *info_level, struct list_head *prefs) + struct btrfs_key *info_key, int *info_level, + struct list_head *prefs) { int ret = 0; int slot; @@ -597,7 +411,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, * enumerate all inline refs */ leaf = path->nodes[0]; - slot = path->slots[0]; + slot = path->slots[0] - 1; item_size = btrfs_item_size_nr(leaf, slot); BUG_ON(item_size < sizeof(*ei)); @@ -610,9 +424,12 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { struct btrfs_tree_block_info *info; + struct btrfs_disk_key disk_key; info = (struct btrfs_tree_block_info *)ptr; *info_level = btrfs_tree_block_level(leaf, info); + btrfs_tree_block_key(leaf, info, &disk_key); + btrfs_disk_key_to_cpu(info_key, &disk_key); ptr += sizeof(struct btrfs_tree_block_info); BUG_ON(ptr > end); } else { @@ -630,7 +447,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, switch (type) { case BTRFS_SHARED_BLOCK_REF_KEY: - ret = __add_prelim_ref(prefs, 0, NULL, + ret = __add_prelim_ref(prefs, 0, info_key, *info_level + 1, offset, bytenr, 1); break; @@ -645,9 +462,8 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, break; } case BTRFS_TREE_BLOCK_REF_KEY: - ret = __add_prelim_ref(prefs, offset, NULL, - *info_level + 1, 0, - bytenr, 1); + ret = __add_prelim_ref(prefs, offset, info_key, + *info_level + 1, 0, bytenr, 1); break; case BTRFS_EXTENT_DATA_REF_KEY: { struct btrfs_extent_data_ref *dref; @@ -661,8 +477,8 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, key.type = BTRFS_EXTENT_DATA_KEY; key.offset = btrfs_extent_data_ref_offset(leaf, dref); root = btrfs_extent_data_ref_root(leaf, dref); - ret = __add_prelim_ref(prefs, root, &key, 0, 0, - bytenr, count); + ret = __add_prelim_ref(prefs, root, &key, 0, 0, bytenr, + count); break; } default: @@ -680,7 +496,8 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, */ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 bytenr, - int info_level, struct list_head *prefs) + struct btrfs_key *info_key, int info_level, + struct list_head *prefs) { struct btrfs_root *extent_root = fs_info->extent_root; int ret; @@ -710,7 +527,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, switch (key.type) { case BTRFS_SHARED_BLOCK_REF_KEY: - ret = __add_prelim_ref(prefs, 0, NULL, + ret = __add_prelim_ref(prefs, 0, info_key, info_level + 1, key.offset, bytenr, 1); break; @@ -726,9 +543,8 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, break; } case BTRFS_TREE_BLOCK_REF_KEY: - ret = __add_prelim_ref(prefs, key.offset, NULL, - info_level + 1, 0, - bytenr, 1); + ret = __add_prelim_ref(prefs, key.offset, info_key, + info_level + 1, 0, bytenr, 1); break; case BTRFS_EXTENT_DATA_REF_KEY: { struct btrfs_extent_data_ref *dref; @@ -744,7 +560,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, key.offset = btrfs_extent_data_ref_offset(leaf, dref); root = btrfs_extent_data_ref_root(leaf, dref); ret = __add_prelim_ref(prefs, root, &key, 0, 0, - bytenr, count); + bytenr, count); break; } default: @@ -766,12 +582,11 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, */ static int find_parent_nodes(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, - u64 delayed_ref_seq, u64 time_seq, - struct ulist *refs, struct ulist *roots, - const u64 *extent_item_pos) + u64 seq, struct ulist *refs, struct ulist *roots) { struct btrfs_key key; struct btrfs_path *path; + struct btrfs_key info_key = { 0 }; struct btrfs_delayed_ref_root *delayed_refs = NULL; struct btrfs_delayed_ref_head *head; int info_level = 0; @@ -830,7 +645,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, btrfs_put_delayed_ref(&head->node); goto again; } - ret = __add_delayed_refs(head, delayed_ref_seq, + ret = __add_delayed_refs(head, seq, &info_key, &prefs_delayed); if (ret) { spin_unlock(&delayed_refs->lock); @@ -844,17 +659,16 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; int slot; - path->slots[0]--; leaf = path->nodes[0]; - slot = path->slots[0]; + slot = path->slots[0] - 1; btrfs_item_key_to_cpu(leaf, &key, slot); if (key.objectid == bytenr && key.type == BTRFS_EXTENT_ITEM_KEY) { ret = __add_inline_refs(fs_info, path, bytenr, - &info_level, &prefs); + &info_key, &info_level, &prefs); if (ret) goto out; - ret = __add_keyed_refs(fs_info, path, bytenr, + ret = __add_keyed_refs(fs_info, path, bytenr, &info_key, info_level, &prefs); if (ret) goto out; @@ -862,18 +676,21 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, } btrfs_release_path(path); + /* + * when adding the delayed refs above, the info_key might not have + * been known yet. Go over the list and replace the missing keys + */ + list_for_each_entry(ref, &prefs_delayed, list) { + if ((ref->key.offset | ref->key.type | ref->key.objectid) == 0) + memcpy(&ref->key, &info_key, sizeof(ref->key)); + } list_splice_init(&prefs_delayed, &prefs); - ret = __add_missing_keys(fs_info, &prefs); - if (ret) - goto out; - ret = __merge_refs(&prefs, 1); if (ret) goto out; - ret = __resolve_indirect_refs(fs_info, search_commit_root, time_seq, - &prefs, extent_item_pos); + ret = __resolve_indirect_refs(fs_info, search_commit_root, &prefs); if (ret) goto out; @@ -892,33 +709,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, BUG_ON(ret < 0); } if (ref->count && ref->parent) { - struct extent_inode_elem *eie = NULL; - if (extent_item_pos && !ref->inode_list) { - u32 bsz; - struct extent_buffer *eb; - bsz = btrfs_level_size(fs_info->extent_root, - info_level); - eb = read_tree_block(fs_info->extent_root, - ref->parent, bsz, 0); - BUG_ON(!eb); - ret = find_extent_in_eb(eb, bytenr, - *extent_item_pos, &eie); - ref->inode_list = eie; - free_extent_buffer(eb); - } - ret = ulist_add_merge(refs, ref->parent, - (unsigned long)ref->inode_list, - (unsigned long *)&eie, GFP_NOFS); - if (!ret && extent_item_pos) { - /* - * we've recorded that parent, so we must extend - * its inode list here - */ - BUG_ON(!eie); - while (eie->next) - eie = eie->next; - eie->next = ref->inode_list; - } + ret = ulist_add(refs, ref->parent, 0, GFP_NOFS); BUG_ON(ret < 0); } kfree(ref); @@ -943,28 +734,6 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, return ret; } -static void free_leaf_list(struct ulist *blocks) -{ - struct ulist_node *node = NULL; - struct extent_inode_elem *eie; - struct extent_inode_elem *eie_next; - struct ulist_iterator uiter; - - ULIST_ITER_INIT(&uiter); - while ((node = ulist_next(blocks, &uiter))) { - if (!node->aux) - continue; - eie = (struct extent_inode_elem *)node->aux; - for (; eie; eie = eie_next) { - eie_next = eie->next; - kfree(eie); - } - node->aux = 0; - } - - ulist_free(blocks); -} - /* * Finds all leafs with a reference to the specified combination of bytenr and * offset. key_list_head will point to a list of corresponding keys (caller must @@ -975,9 +744,7 @@ static void free_leaf_list(struct ulist *blocks) */ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, - u64 delayed_ref_seq, u64 time_seq, - struct ulist **leafs, - const u64 *extent_item_pos) + u64 num_bytes, u64 seq, struct ulist **leafs) { struct ulist *tmp; int ret; @@ -991,12 +758,11 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, return -ENOMEM; } - ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq, - time_seq, *leafs, tmp, extent_item_pos); + ret = find_parent_nodes(trans, fs_info, bytenr, seq, *leafs, tmp); ulist_free(tmp); if (ret < 0 && ret != -ENOENT) { - free_leaf_list(*leafs); + ulist_free(*leafs); return ret; } @@ -1018,12 +784,10 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, */ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, - u64 delayed_ref_seq, u64 time_seq, - struct ulist **roots) + u64 num_bytes, u64 seq, struct ulist **roots) { struct ulist *tmp; struct ulist_node *node = NULL; - struct ulist_iterator uiter; int ret; tmp = ulist_alloc(GFP_NOFS); @@ -1035,16 +799,15 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, return -ENOMEM; } - ULIST_ITER_INIT(&uiter); while (1) { - ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq, - time_seq, tmp, *roots, NULL); + ret = find_parent_nodes(trans, fs_info, bytenr, seq, + tmp, *roots); if (ret < 0 && ret != -ENOENT) { ulist_free(tmp); ulist_free(*roots); return ret; } - node = ulist_next(tmp, &uiter); + node = ulist_next(tmp, node); if (!node) break; bytenr = node->val; @@ -1330,25 +1093,67 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, return 0; } -static int iterate_leaf_refs(struct extent_inode_elem *inode_list, - u64 root, u64 extent_item_objectid, +static int iterate_leaf_refs(struct btrfs_fs_info *fs_info, u64 logical, + u64 orig_extent_item_objectid, + u64 extent_item_pos, u64 root, iterate_extent_inodes_t *iterate, void *ctx) { - struct extent_inode_elem *eie; + u64 disk_byte; + struct btrfs_key key; + struct btrfs_file_extent_item *fi; + struct extent_buffer *eb; + int slot; + int nritems; int ret = 0; + int extent_type; + u64 data_offset; + u64 data_len; + + eb = read_tree_block(fs_info->tree_root, logical, + fs_info->tree_root->leafsize, 0); + if (!eb) + return -EIO; + + /* + * from the shared data ref, we only have the leaf but we need + * the key. thus, we must look into all items and see that we + * find one (some) with a reference to our extent item. + */ + nritems = btrfs_header_nritems(eb); + for (slot = 0; slot < nritems; ++slot) { + btrfs_item_key_to_cpu(eb, &key, slot); + if (key.type != BTRFS_EXTENT_DATA_KEY) + continue; + fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); + extent_type = btrfs_file_extent_type(eb, fi); + if (extent_type == BTRFS_FILE_EXTENT_INLINE) + continue; + /* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */ + disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); + if (disk_byte != orig_extent_item_objectid) + continue; + + data_offset = btrfs_file_extent_offset(eb, fi); + data_len = btrfs_file_extent_num_bytes(eb, fi); + + if (extent_item_pos < data_offset || + extent_item_pos >= data_offset + data_len) + continue; - for (eie = inode_list; eie; eie = eie->next) { pr_debug("ref for %llu resolved, key (%llu EXTEND_DATA %llu), " - "root %llu\n", extent_item_objectid, - eie->inum, eie->offset, root); - ret = iterate(eie->inum, eie->offset, root, ctx); + "root %llu\n", orig_extent_item_objectid, + key.objectid, key.offset, root); + ret = iterate(key.objectid, + key.offset + (extent_item_pos - data_offset), + root, ctx); if (ret) { - pr_debug("stopping iteration for %llu due to ret=%d\n", - extent_item_objectid, ret); + pr_debug("stopping iteration because ret=%d\n", ret); break; } } + free_extent_buffer(eb); + return ret; } @@ -1370,10 +1175,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, struct ulist *roots = NULL; struct ulist_node *ref_node = NULL; struct ulist_node *root_node = NULL; - struct seq_list seq_elem = {}; - struct seq_list tree_mod_seq_elem = {}; - struct ulist_iterator ref_uiter; - struct ulist_iterator root_uiter; + struct seq_list seq_elem; struct btrfs_delayed_ref_root *delayed_refs = NULL; pr_debug("resolving all inodes for extent %llu\n", @@ -1390,41 +1192,34 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, spin_lock(&delayed_refs->lock); btrfs_get_delayed_seq(delayed_refs, &seq_elem); spin_unlock(&delayed_refs->lock); - btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); } ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, - seq_elem.seq, tree_mod_seq_elem.seq, &refs, - &extent_item_pos); + extent_item_pos, seq_elem.seq, + &refs); + if (ret) goto out; - ULIST_ITER_INIT(&ref_uiter); - while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { - ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, - seq_elem.seq, - tree_mod_seq_elem.seq, &roots); + while (!ret && (ref_node = ulist_next(refs, ref_node))) { + ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, -1, + seq_elem.seq, &roots); if (ret) break; - ULIST_ITER_INIT(&root_uiter); - while (!ret && (root_node = ulist_next(roots, &root_uiter))) { - pr_debug("root %llu references leaf %llu, data list " - "%#lx\n", root_node->val, ref_node->val, - ref_node->aux); - ret = iterate_leaf_refs( - (struct extent_inode_elem *)ref_node->aux, - root_node->val, extent_item_objectid, - iterate, ctx); + while (!ret && (root_node = ulist_next(roots, root_node))) { + pr_debug("root %llu references leaf %llu\n", + root_node->val, ref_node->val); + ret = iterate_leaf_refs(fs_info, ref_node->val, + extent_item_objectid, + extent_item_pos, root_node->val, + iterate, ctx); } - ulist_free(roots); - roots = NULL; } - free_leaf_list(refs); + ulist_free(refs); ulist_free(roots); out: if (!search_commit_root) { - btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); btrfs_put_delayed_seq(delayed_refs, &seq_elem); btrfs_end_transaction(trans, fs_info->extent_root); } diff --git a/trunk/fs/btrfs/backref.h b/trunk/fs/btrfs/backref.h index c18d8ac7b795..57ea2e959e4d 100644 --- a/trunk/fs/btrfs/backref.h +++ b/trunk/fs/btrfs/backref.h @@ -58,8 +58,7 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); int btrfs_find_all_roots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, - u64 delayed_ref_seq, u64 time_seq, - struct ulist **roots); + u64 num_bytes, u64 seq, struct ulist **roots); struct btrfs_data_container *init_data_container(u32 total_bytes); struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, diff --git a/trunk/fs/btrfs/btrfs_inode.h b/trunk/fs/btrfs/btrfs_inode.h index e616f8872e69..9b9b15fd5204 100644 --- a/trunk/fs/btrfs/btrfs_inode.h +++ b/trunk/fs/btrfs/btrfs_inode.h @@ -24,20 +24,6 @@ #include "ordered-data.h" #include "delayed-inode.h" -/* - * ordered_data_close is set by truncate when a file that used - * to have good data has been truncated to zero. When it is set - * the btrfs file release call will add this inode to the - * ordered operations list so that we make sure to flush out any - * new data the application may have written before commit. - */ -#define BTRFS_INODE_ORDERED_DATA_CLOSE 0 -#define BTRFS_INODE_ORPHAN_META_RESERVED 1 -#define BTRFS_INODE_DUMMY 2 -#define BTRFS_INODE_IN_DEFRAG 3 -#define BTRFS_INODE_DELALLOC_META_RESERVED 4 -#define BTRFS_INODE_HAS_ORPHAN_ITEM 5 - /* in memory btrfs inode */ struct btrfs_inode { /* which subvolume this inode belongs to */ @@ -71,6 +57,9 @@ struct btrfs_inode { /* used to order data wrt metadata */ struct btrfs_ordered_inode_tree ordered_tree; + /* for keeping track of orphaned inodes */ + struct list_head i_orphan; + /* list of all the delalloc inodes in the FS. There are times we need * to write all the delalloc pages to disk, and this list is used * to walk them all. @@ -89,13 +78,14 @@ struct btrfs_inode { /* the space_info for where this inode's data allocations are done */ struct btrfs_space_info *space_info; - unsigned long runtime_flags; - /* full 64 bit generation number, struct vfs_inode doesn't have a big * enough field for this. */ u64 generation; + /* sequence number for NFS changes */ + u64 sequence; + /* * transid of the trans_handle that last modified this inode */ @@ -154,10 +144,23 @@ struct btrfs_inode { unsigned outstanding_extents; unsigned reserved_extents; + /* + * ordered_data_close is set by truncate when a file that used + * to have good data has been truncated to zero. When it is set + * the btrfs file release call will add this inode to the + * ordered operations list so that we make sure to flush out any + * new data the application may have written before commit. + */ + unsigned ordered_data_close:1; + unsigned orphan_meta_reserved:1; + unsigned dummy_inode:1; + unsigned in_defrag:1; + unsigned delalloc_meta_reserved:1; + /* * always compress this one file */ - unsigned force_compress; + unsigned force_compress:4; struct btrfs_delayed_node *delayed_node; @@ -199,17 +202,4 @@ static inline bool btrfs_is_free_space_inode(struct btrfs_root *root, return false; } -static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) -{ - struct btrfs_root *root = BTRFS_I(inode)->root; - int ret = 0; - - mutex_lock(&root->log_mutex); - if (BTRFS_I(inode)->logged_trans == generation && - BTRFS_I(inode)->last_sub_trans <= root->last_log_commit) - ret = 1; - mutex_unlock(&root->log_mutex); - return ret; -} - #endif diff --git a/trunk/fs/btrfs/check-integrity.c b/trunk/fs/btrfs/check-integrity.c index 9cebb1fd6a3c..c053e90f2006 100644 --- a/trunk/fs/btrfs/check-integrity.c +++ b/trunk/fs/btrfs/check-integrity.c @@ -103,6 +103,8 @@ #define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300 #define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters, * excluding " [...]" */ +#define BTRFSIC_BLOCK_SIZE PAGE_SIZE + #define BTRFSIC_GENERATION_UNKNOWN ((u64)-1) /* @@ -208,9 +210,8 @@ struct btrfsic_block_data_ctx { u64 dev_bytenr; /* physical bytenr on device */ u32 len; struct btrfsic_dev_state *dev; - char **datav; - struct page **pagev; - void *mem_to_free; + char *data; + struct buffer_head *bh; /* do not use if set to NULL */ }; /* This structure is used to implement recursion without occupying @@ -242,8 +243,6 @@ struct btrfsic_state { struct btrfs_root *root; u64 max_superblock_generation; struct btrfsic_block *latest_superblock; - u32 metablock_size; - u32 datablock_size; }; static void btrfsic_block_init(struct btrfsic_block *b); @@ -291,10 +290,8 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, static int btrfsic_process_metablock(struct btrfsic_state *state, struct btrfsic_block *block, struct btrfsic_block_data_ctx *block_ctx, + struct btrfs_header *hdr, int limit_nesting, int force_iodone_flag); -static void btrfsic_read_from_block_data( - struct btrfsic_block_data_ctx *block_ctx, - void *dst, u32 offset, size_t len); static int btrfsic_create_link_to_next_block( struct btrfsic_state *state, struct btrfsic_block *block, @@ -321,13 +318,12 @@ static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx); static int btrfsic_read_block(struct btrfsic_state *state, struct btrfsic_block_data_ctx *block_ctx); static void btrfsic_dump_database(struct btrfsic_state *state); -static void btrfsic_complete_bio_end_io(struct bio *bio, int err); static int btrfsic_test_for_metadata(struct btrfsic_state *state, - char **datav, unsigned int num_pages); + const u8 *data, unsigned int size); static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, - u64 dev_bytenr, char **mapped_datav, - unsigned int num_pages, - struct bio *bio, int *bio_is_patched, + u64 dev_bytenr, u8 *mapped_data, + unsigned int len, struct bio *bio, + int *bio_is_patched, struct buffer_head *bh, int submit_bio_bh_rw); static int btrfsic_process_written_superblock( @@ -379,7 +375,7 @@ static struct btrfsic_dev_state *btrfsic_dev_state_lookup( static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, u64 bytenr, struct btrfsic_dev_state *dev_state, - u64 dev_bytenr); + u64 dev_bytenr, char *data); static struct mutex btrfsic_mutex; static int btrfsic_is_initialized; @@ -655,7 +651,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, int pass; BUG_ON(NULL == state); - selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS); + selected_super = kmalloc(sizeof(*selected_super), GFP_NOFS); if (NULL == selected_super) { printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); return -1; @@ -722,7 +718,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree, - next_bytenr, state->metablock_size); + next_bytenr, PAGE_SIZE); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", (unsigned long long)next_bytenr, num_copies); @@ -731,9 +727,9 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, struct btrfsic_block *next_block; struct btrfsic_block_data_ctx tmp_next_block_ctx; struct btrfsic_block_link *l; + struct btrfs_header *hdr; - ret = btrfsic_map_block(state, next_bytenr, - state->metablock_size, + ret = btrfsic_map_block(state, next_bytenr, PAGE_SIZE, &tmp_next_block_ctx, mirror_num); if (ret) { @@ -762,7 +758,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, BUG_ON(NULL == l); ret = btrfsic_read_block(state, &tmp_next_block_ctx); - if (ret < (int)PAGE_CACHE_SIZE) { + if (ret < (int)BTRFSIC_BLOCK_SIZE) { printk(KERN_INFO "btrfsic: read @logical %llu failed!\n", (unsigned long long) @@ -772,9 +768,11 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, return -1; } + hdr = (struct btrfs_header *)tmp_next_block_ctx.data; ret = btrfsic_process_metablock(state, next_block, &tmp_next_block_ctx, + hdr, BTRFS_MAX_LEVEL + 3, 1); btrfsic_release_block_ctx(&tmp_next_block_ctx); } @@ -801,10 +799,7 @@ static int btrfsic_process_superblock_dev_mirror( /* super block bytenr is always the unmapped device bytenr */ dev_bytenr = btrfs_sb_offset(superblock_mirror_num); - if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes) - return -1; - bh = __bread(superblock_bdev, dev_bytenr / 4096, - BTRFS_SUPER_INFO_SIZE); + bh = __bread(superblock_bdev, dev_bytenr / 4096, 4096); if (NULL == bh) return -1; super_tmp = (struct btrfs_super_block *) @@ -813,10 +808,7 @@ static int btrfsic_process_superblock_dev_mirror( if (btrfs_super_bytenr(super_tmp) != dev_bytenr || strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC, sizeof(super_tmp->magic)) || - memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) || - btrfs_super_nodesize(super_tmp) != state->metablock_size || - btrfs_super_leafsize(super_tmp) != state->metablock_size || - btrfs_super_sectorsize(super_tmp) != state->datablock_size) { + memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE)) { brelse(bh); return 0; } @@ -901,7 +893,7 @@ static int btrfsic_process_superblock_dev_mirror( num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree, - next_bytenr, state->metablock_size); + next_bytenr, PAGE_SIZE); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", (unsigned long long)next_bytenr, num_copies); @@ -910,8 +902,7 @@ static int btrfsic_process_superblock_dev_mirror( struct btrfsic_block_data_ctx tmp_next_block_ctx; struct btrfsic_block_link *l; - if (btrfsic_map_block(state, next_bytenr, - state->metablock_size, + if (btrfsic_map_block(state, next_bytenr, PAGE_SIZE, &tmp_next_block_ctx, mirror_num)) { printk(KERN_INFO "btrfsic: btrfsic_map_block(" @@ -975,15 +966,13 @@ static int btrfsic_process_metablock( struct btrfsic_state *state, struct btrfsic_block *const first_block, struct btrfsic_block_data_ctx *const first_block_ctx, + struct btrfs_header *const first_hdr, int first_limit_nesting, int force_iodone_flag) { struct btrfsic_stack_frame initial_stack_frame = { 0 }; struct btrfsic_stack_frame *sf; struct btrfsic_stack_frame *next_stack; - struct btrfs_header *const first_hdr = - (struct btrfs_header *)first_block_ctx->datav[0]; - BUG_ON(!first_hdr); sf = &initial_stack_frame; sf->error = 0; sf->i = -1; @@ -1023,47 +1012,21 @@ static int btrfsic_process_metablock( } if (sf->i < sf->nr) { - struct btrfs_item disk_item; - u32 disk_item_offset = - (uintptr_t)(leafhdr->items + sf->i) - - (uintptr_t)leafhdr; - struct btrfs_disk_key *disk_key; + struct btrfs_item *disk_item = leafhdr->items + sf->i; + struct btrfs_disk_key *disk_key = &disk_item->key; u8 type; - u32 item_offset; + const u32 item_offset = le32_to_cpu(disk_item->offset); - if (disk_item_offset + sizeof(struct btrfs_item) > - sf->block_ctx->len) { -leaf_item_out_of_bounce_error: - printk(KERN_INFO - "btrfsic: leaf item out of bounce at logical %llu, dev %s\n", - sf->block_ctx->start, - sf->block_ctx->dev->name); - goto one_stack_frame_backwards; - } - btrfsic_read_from_block_data(sf->block_ctx, - &disk_item, - disk_item_offset, - sizeof(struct btrfs_item)); - item_offset = le32_to_cpu(disk_item.offset); - disk_key = &disk_item.key; type = disk_key->type; if (BTRFS_ROOT_ITEM_KEY == type) { - struct btrfs_root_item root_item; - u32 root_item_offset; - u64 next_bytenr; - - root_item_offset = item_offset + - offsetof(struct btrfs_leaf, items); - if (root_item_offset + - sizeof(struct btrfs_root_item) > - sf->block_ctx->len) - goto leaf_item_out_of_bounce_error; - btrfsic_read_from_block_data( - sf->block_ctx, &root_item, - root_item_offset, - sizeof(struct btrfs_root_item)); - next_bytenr = le64_to_cpu(root_item.bytenr); + const struct btrfs_root_item *const root_item = + (struct btrfs_root_item *) + (sf->block_ctx->data + + offsetof(struct btrfs_leaf, items) + + item_offset); + const u64 next_bytenr = + le64_to_cpu(root_item->bytenr); sf->error = btrfsic_create_link_to_next_block( @@ -1078,7 +1041,7 @@ static int btrfsic_process_metablock( &sf->num_copies, &sf->mirror_num, disk_key, - le64_to_cpu(root_item. + le64_to_cpu(root_item-> generation)); if (sf->error) goto one_stack_frame_backwards; @@ -1086,7 +1049,7 @@ static int btrfsic_process_metablock( if (NULL != sf->next_block) { struct btrfs_header *const next_hdr = (struct btrfs_header *) - sf->next_block_ctx.datav[0]; + sf->next_block_ctx.data; next_stack = btrfsic_stack_frame_alloc(); @@ -1148,24 +1111,10 @@ static int btrfsic_process_metablock( } if (sf->i < sf->nr) { - struct btrfs_key_ptr key_ptr; - u32 key_ptr_offset; - u64 next_bytenr; - - key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) - - (uintptr_t)nodehdr; - if (key_ptr_offset + sizeof(struct btrfs_key_ptr) > - sf->block_ctx->len) { - printk(KERN_INFO - "btrfsic: node item out of bounce at logical %llu, dev %s\n", - sf->block_ctx->start, - sf->block_ctx->dev->name); - goto one_stack_frame_backwards; - } - btrfsic_read_from_block_data( - sf->block_ctx, &key_ptr, key_ptr_offset, - sizeof(struct btrfs_key_ptr)); - next_bytenr = le64_to_cpu(key_ptr.blockptr); + struct btrfs_key_ptr *disk_key_ptr = + nodehdr->ptrs + sf->i; + const u64 next_bytenr = + le64_to_cpu(disk_key_ptr->blockptr); sf->error = btrfsic_create_link_to_next_block( state, @@ -1178,15 +1127,15 @@ static int btrfsic_process_metablock( force_iodone_flag, &sf->num_copies, &sf->mirror_num, - &key_ptr.key, - le64_to_cpu(key_ptr.generation)); + &disk_key_ptr->key, + le64_to_cpu(disk_key_ptr->generation)); if (sf->error) goto one_stack_frame_backwards; if (NULL != sf->next_block) { struct btrfs_header *const next_hdr = (struct btrfs_header *) - sf->next_block_ctx.datav[0]; + sf->next_block_ctx.data; next_stack = btrfsic_stack_frame_alloc(); if (NULL == next_stack) @@ -1232,35 +1181,6 @@ static int btrfsic_process_metablock( return sf->error; } -static void btrfsic_read_from_block_data( - struct btrfsic_block_data_ctx *block_ctx, - void *dstv, u32 offset, size_t len) -{ - size_t cur; - size_t offset_in_page; - char *kaddr; - char *dst = (char *)dstv; - size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT; - - WARN_ON(offset + len > block_ctx->len); - offset_in_page = (start_offset + offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); - - while (len > 0) { - cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page)); - BUG_ON(i >= (block_ctx->len + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT); - kaddr = block_ctx->datav[i]; - memcpy(dst, kaddr + offset_in_page, cur); - - dst += cur; - len -= cur; - offset_in_page = 0; - i++; - } -} - static int btrfsic_create_link_to_next_block( struct btrfsic_state *state, struct btrfsic_block *block, @@ -1284,7 +1204,7 @@ static int btrfsic_create_link_to_next_block( if (0 == *num_copiesp) { *num_copiesp = btrfs_num_copies(&state->root->fs_info->mapping_tree, - next_bytenr, state->metablock_size); + next_bytenr, PAGE_SIZE); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", (unsigned long long)next_bytenr, *num_copiesp); @@ -1299,7 +1219,7 @@ static int btrfsic_create_link_to_next_block( "btrfsic_create_link_to_next_block(mirror_num=%d)\n", *mirror_nump); ret = btrfsic_map_block(state, next_bytenr, - state->metablock_size, + BTRFSIC_BLOCK_SIZE, next_block_ctx, *mirror_nump); if (ret) { printk(KERN_INFO @@ -1394,7 +1314,7 @@ static int btrfsic_create_link_to_next_block( if (limit_nesting > 0 && did_alloc_block_link) { ret = btrfsic_read_block(state, next_block_ctx); - if (ret < (int)next_block_ctx->len) { + if (ret < (int)BTRFSIC_BLOCK_SIZE) { printk(KERN_INFO "btrfsic: read block @logical %llu failed!\n", (unsigned long long)next_bytenr); @@ -1419,74 +1339,43 @@ static int btrfsic_handle_extent_data( u32 item_offset, int force_iodone_flag) { int ret; - struct btrfs_file_extent_item file_extent_item; - u64 file_extent_item_offset; - u64 next_bytenr; - u64 num_bytes; - u64 generation; + struct btrfs_file_extent_item *file_extent_item = + (struct btrfs_file_extent_item *)(block_ctx->data + + offsetof(struct btrfs_leaf, + items) + item_offset); + u64 next_bytenr = + le64_to_cpu(file_extent_item->disk_bytenr) + + le64_to_cpu(file_extent_item->offset); + u64 num_bytes = le64_to_cpu(file_extent_item->num_bytes); + u64 generation = le64_to_cpu(file_extent_item->generation); struct btrfsic_block_link *l; - file_extent_item_offset = offsetof(struct btrfs_leaf, items) + - item_offset; - if (file_extent_item_offset + - offsetof(struct btrfs_file_extent_item, disk_num_bytes) > - block_ctx->len) { - printk(KERN_INFO - "btrfsic: file item out of bounce at logical %llu, dev %s\n", - block_ctx->start, block_ctx->dev->name); - return -1; - } - - btrfsic_read_from_block_data(block_ctx, &file_extent_item, - file_extent_item_offset, - offsetof(struct btrfs_file_extent_item, disk_num_bytes)); - if (BTRFS_FILE_EXTENT_REG != file_extent_item.type || - ((u64)0) == le64_to_cpu(file_extent_item.disk_bytenr)) { - if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) - printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n", - file_extent_item.type, - (unsigned long long) - le64_to_cpu(file_extent_item.disk_bytenr)); - return 0; - } - - if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) > - block_ctx->len) { - printk(KERN_INFO - "btrfsic: file item out of bounce at logical %llu, dev %s\n", - block_ctx->start, block_ctx->dev->name); - return -1; - } - btrfsic_read_from_block_data(block_ctx, &file_extent_item, - file_extent_item_offset, - sizeof(struct btrfs_file_extent_item)); - next_bytenr = le64_to_cpu(file_extent_item.disk_bytenr) + - le64_to_cpu(file_extent_item.offset); - generation = le64_to_cpu(file_extent_item.generation); - num_bytes = le64_to_cpu(file_extent_item.num_bytes); - generation = le64_to_cpu(file_extent_item.generation); - if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu," " offset = %llu, num_bytes = %llu\n", - file_extent_item.type, + file_extent_item->type, (unsigned long long) - le64_to_cpu(file_extent_item.disk_bytenr), - (unsigned long long)le64_to_cpu(file_extent_item.offset), - (unsigned long long)num_bytes); + le64_to_cpu(file_extent_item->disk_bytenr), + (unsigned long long) + le64_to_cpu(file_extent_item->offset), + (unsigned long long) + le64_to_cpu(file_extent_item->num_bytes)); + if (BTRFS_FILE_EXTENT_REG != file_extent_item->type || + ((u64)0) == le64_to_cpu(file_extent_item->disk_bytenr)) + return 0; while (num_bytes > 0) { u32 chunk_len; int num_copies; int mirror_num; - if (num_bytes > state->datablock_size) - chunk_len = state->datablock_size; + if (num_bytes > BTRFSIC_BLOCK_SIZE) + chunk_len = BTRFSIC_BLOCK_SIZE; else chunk_len = num_bytes; num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree, - next_bytenr, state->datablock_size); + next_bytenr, PAGE_SIZE); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", (unsigned long long)next_bytenr, num_copies); @@ -1586,9 +1475,8 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, block_ctx_out->dev_bytenr = multi->stripes[0].physical; block_ctx_out->start = bytenr; block_ctx_out->len = len; - block_ctx_out->datav = NULL; - block_ctx_out->pagev = NULL; - block_ctx_out->mem_to_free = NULL; + block_ctx_out->data = NULL; + block_ctx_out->bh = NULL; if (0 == ret) kfree(multi); @@ -1608,9 +1496,8 @@ static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr, block_ctx_out->dev_bytenr = bytenr; block_ctx_out->start = bytenr; block_ctx_out->len = len; - block_ctx_out->datav = NULL; - block_ctx_out->pagev = NULL; - block_ctx_out->mem_to_free = NULL; + block_ctx_out->data = NULL; + block_ctx_out->bh = NULL; if (NULL != block_ctx_out->dev) { return 0; } else { @@ -1621,127 +1508,38 @@ static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr, static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx) { - if (block_ctx->mem_to_free) { - unsigned int num_pages; - - BUG_ON(!block_ctx->datav); - BUG_ON(!block_ctx->pagev); - num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT; - while (num_pages > 0) { - num_pages--; - if (block_ctx->datav[num_pages]) { - kunmap(block_ctx->pagev[num_pages]); - block_ctx->datav[num_pages] = NULL; - } - if (block_ctx->pagev[num_pages]) { - __free_page(block_ctx->pagev[num_pages]); - block_ctx->pagev[num_pages] = NULL; - } - } - - kfree(block_ctx->mem_to_free); - block_ctx->mem_to_free = NULL; - block_ctx->pagev = NULL; - block_ctx->datav = NULL; + if (NULL != block_ctx->bh) { + brelse(block_ctx->bh); + block_ctx->bh = NULL; } } static int btrfsic_read_block(struct btrfsic_state *state, struct btrfsic_block_data_ctx *block_ctx) { - unsigned int num_pages; - unsigned int i; - u64 dev_bytenr; - int ret; - - BUG_ON(block_ctx->datav); - BUG_ON(block_ctx->pagev); - BUG_ON(block_ctx->mem_to_free); - if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) { + block_ctx->bh = NULL; + if (block_ctx->dev_bytenr & 4095) { printk(KERN_INFO "btrfsic: read_block() with unaligned bytenr %llu\n", (unsigned long long)block_ctx->dev_bytenr); return -1; } - - num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT; - block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) + - sizeof(*block_ctx->pagev)) * - num_pages, GFP_NOFS); - if (!block_ctx->mem_to_free) + if (block_ctx->len > 4096) { + printk(KERN_INFO + "btrfsic: read_block() with too huge size %d\n", + block_ctx->len); return -1; - block_ctx->datav = block_ctx->mem_to_free; - block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages); - for (i = 0; i < num_pages; i++) { - block_ctx->pagev[i] = alloc_page(GFP_NOFS); - if (!block_ctx->pagev[i]) - return -1; } - dev_bytenr = block_ctx->dev_bytenr; - for (i = 0; i < num_pages;) { - struct bio *bio; - unsigned int j; - DECLARE_COMPLETION_ONSTACK(complete); - - bio = bio_alloc(GFP_NOFS, num_pages - i); - if (!bio) { - printk(KERN_INFO - "btrfsic: bio_alloc() for %u pages failed!\n", - num_pages - i); - return -1; - } - bio->bi_bdev = block_ctx->dev->bdev; - bio->bi_sector = dev_bytenr >> 9; - bio->bi_end_io = btrfsic_complete_bio_end_io; - bio->bi_private = &complete; - - for (j = i; j < num_pages; j++) { - ret = bio_add_page(bio, block_ctx->pagev[j], - PAGE_CACHE_SIZE, 0); - if (PAGE_CACHE_SIZE != ret) - break; - } - if (j == i) { - printk(KERN_INFO - "btrfsic: error, failed to add a single page!\n"); - return -1; - } - submit_bio(READ, bio); - - /* this will also unplug the queue */ - wait_for_completion(&complete); - - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { - printk(KERN_INFO - "btrfsic: read error at logical %llu dev %s!\n", - block_ctx->start, block_ctx->dev->name); - bio_put(bio); - return -1; - } - bio_put(bio); - dev_bytenr += (j - i) * PAGE_CACHE_SIZE; - i = j; - } - for (i = 0; i < num_pages; i++) { - block_ctx->datav[i] = kmap(block_ctx->pagev[i]); - if (!block_ctx->datav[i]) { - printk(KERN_INFO "btrfsic: kmap() failed (dev %s)!\n", - block_ctx->dev->name); - return -1; - } - } + block_ctx->bh = __bread(block_ctx->dev->bdev, + block_ctx->dev_bytenr >> 12, 4096); + if (NULL == block_ctx->bh) + return -1; + block_ctx->data = block_ctx->bh->b_data; return block_ctx->len; } -static void btrfsic_complete_bio_end_io(struct bio *bio, int err) -{ - complete((struct completion *)bio->bi_private); -} - static void btrfsic_dump_database(struct btrfsic_state *state) { struct list_head *elem_all; @@ -1819,39 +1617,32 @@ static void btrfsic_dump_database(struct btrfsic_state *state) * (note that this test fails for the super block) */ static int btrfsic_test_for_metadata(struct btrfsic_state *state, - char **datav, unsigned int num_pages) + const u8 *data, unsigned int size) { struct btrfs_header *h; u8 csum[BTRFS_CSUM_SIZE]; u32 crc = ~(u32)0; - unsigned int i; + int fail = 0; + int crc_fail = 0; - if (num_pages * PAGE_CACHE_SIZE < state->metablock_size) - return 1; /* not metadata */ - num_pages = state->metablock_size >> PAGE_CACHE_SHIFT; - h = (struct btrfs_header *)datav[0]; + h = (struct btrfs_header *)data; if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE)) - return 1; - - for (i = 0; i < num_pages; i++) { - u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE); - size_t sublen = i ? PAGE_CACHE_SIZE : - (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE); + fail++; - crc = crc32c(crc, data, sublen); - } + crc = crc32c(crc, data + BTRFS_CSUM_SIZE, PAGE_SIZE - BTRFS_CSUM_SIZE); btrfs_csum_final(crc, csum); if (memcmp(csum, h->csum, state->csum_size)) - return 1; + crc_fail++; - return 0; /* is metadata */ + return fail || crc_fail; } static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, - u64 dev_bytenr, char **mapped_datav, - unsigned int num_pages, - struct bio *bio, int *bio_is_patched, + u64 dev_bytenr, + u8 *mapped_data, unsigned int len, + struct bio *bio, + int *bio_is_patched, struct buffer_head *bh, int submit_bio_bh_rw) { @@ -1861,19 +1652,12 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, int ret; struct btrfsic_state *state = dev_state->state; struct block_device *bdev = dev_state->bdev; - unsigned int processed_len; + WARN_ON(len > PAGE_SIZE); + is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_data, len)); if (NULL != bio_is_patched) *bio_is_patched = 0; -again: - if (num_pages == 0) - return; - - processed_len = 0; - is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav, - num_pages)); - block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr, &state->block_hashtable); if (NULL != block) { @@ -1883,16 +1667,8 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, if (block->is_superblock) { bytenr = le64_to_cpu(((struct btrfs_super_block *) - mapped_datav[0])->bytenr); - if (num_pages * PAGE_CACHE_SIZE < - BTRFS_SUPER_INFO_SIZE) { - printk(KERN_INFO - "btrfsic: cannot work with too short bios!\n"); - return; - } + mapped_data)->bytenr); is_metadata = 1; - BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1)); - processed_len = BTRFS_SUPER_INFO_SIZE; if (state->print_mask & BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) { printk(KERN_INFO @@ -1902,18 +1678,12 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, } if (is_metadata) { if (!block->is_superblock) { - if (num_pages * PAGE_CACHE_SIZE < - state->metablock_size) { - printk(KERN_INFO - "btrfsic: cannot work with too short bios!\n"); - return; - } - processed_len = state->metablock_size; bytenr = le64_to_cpu(((struct btrfs_header *) - mapped_datav[0])->bytenr); + mapped_data)->bytenr); btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, - dev_bytenr); + dev_bytenr, + mapped_data); } if (block->logical_bytenr != bytenr) { printk(KERN_INFO @@ -1940,13 +1710,6 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, block->mirror_num, btrfsic_get_block_type(state, block)); } else { - if (num_pages * PAGE_CACHE_SIZE < - state->datablock_size) { - printk(KERN_INFO - "btrfsic: cannot work with too short bios!\n"); - return; - } - processed_len = state->datablock_size; bytenr = block->logical_bytenr; if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) printk(KERN_INFO @@ -1984,7 +1747,7 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, le64_to_cpu(block->disk_key.offset), (unsigned long long) le64_to_cpu(((struct btrfs_header *) - mapped_datav[0])->generation), + mapped_data)->generation), (unsigned long long) state->max_superblock_generation); btrfsic_dump_tree(state); @@ -2002,10 +1765,10 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, (unsigned long long)block->generation, (unsigned long long) le64_to_cpu(((struct btrfs_header *) - mapped_datav[0])->generation)); + mapped_data)->generation)); /* it would not be safe to go on */ btrfsic_dump_tree(state); - goto continue_loop; + return; } /* @@ -2033,19 +1796,18 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, } if (block->is_superblock) - ret = btrfsic_map_superblock(state, bytenr, - processed_len, + ret = btrfsic_map_superblock(state, bytenr, len, bdev, &block_ctx); else - ret = btrfsic_map_block(state, bytenr, processed_len, + ret = btrfsic_map_block(state, bytenr, len, &block_ctx, 0); if (ret) { printk(KERN_INFO "btrfsic: btrfsic_map_block(root @%llu)" " failed!\n", (unsigned long long)bytenr); - goto continue_loop; + return; } - block_ctx.datav = mapped_datav; + block_ctx.data = mapped_data; /* the following is required in case of writes to mirrors, * use the same that was used for the lookup */ block_ctx.dev = dev_state; @@ -2101,13 +1863,11 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, block->logical_bytenr = bytenr; block->is_metadata = 1; if (block->is_superblock) { - BUG_ON(PAGE_CACHE_SIZE != - BTRFS_SUPER_INFO_SIZE); ret = btrfsic_process_written_superblock( state, block, (struct btrfs_super_block *) - mapped_datav[0]); + mapped_data); if (state->print_mask & BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) { printk(KERN_INFO @@ -2120,6 +1880,8 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, state, block, &block_ctx, + (struct btrfs_header *) + block_ctx.data, 0, 0); } if (ret) @@ -2150,30 +1912,26 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, u64 bytenr; if (!is_metadata) { - processed_len = state->datablock_size; if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) printk(KERN_INFO "Written block (%s/%llu/?)" " !found in hash table, D.\n", dev_state->name, (unsigned long long)dev_bytenr); - if (!state->include_extent_data) { - /* ignore that written D block */ - goto continue_loop; - } + if (!state->include_extent_data) + return; /* ignore that written D block */ /* this is getting ugly for the * include_extent_data case... */ bytenr = 0; /* unknown */ block_ctx.start = bytenr; - block_ctx.len = processed_len; - block_ctx.mem_to_free = NULL; - block_ctx.pagev = NULL; + block_ctx.len = len; + block_ctx.bh = NULL; } else { - processed_len = state->metablock_size; bytenr = le64_to_cpu(((struct btrfs_header *) - mapped_datav[0])->bytenr); + mapped_data)->bytenr); btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, - dev_bytenr); + dev_bytenr, + mapped_data); if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) printk(KERN_INFO "Written block @%llu (%s/%llu/?)" @@ -2182,17 +1940,17 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, dev_state->name, (unsigned long long)dev_bytenr); - ret = btrfsic_map_block(state, bytenr, processed_len, - &block_ctx, 0); + ret = btrfsic_map_block(state, bytenr, len, &block_ctx, + 0); if (ret) { printk(KERN_INFO "btrfsic: btrfsic_map_block(root @%llu)" " failed!\n", (unsigned long long)dev_bytenr); - goto continue_loop; + return; } } - block_ctx.datav = mapped_datav; + block_ctx.data = mapped_data; /* the following is required in case of writes to mirrors, * use the same that was used for the lookup */ block_ctx.dev = dev_state; @@ -2202,7 +1960,7 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, if (NULL == block) { printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); btrfsic_release_block_ctx(&block_ctx); - goto continue_loop; + return; } block->dev_state = dev_state; block->dev_bytenr = dev_bytenr; @@ -2262,7 +2020,9 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, if (is_metadata) { ret = btrfsic_process_metablock(state, block, - &block_ctx, 0, 0); + &block_ctx, + (struct btrfs_header *) + block_ctx.data, 0, 0); if (ret) printk(KERN_INFO "btrfsic: process_metablock(root @%llu)" @@ -2271,13 +2031,6 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, } btrfsic_release_block_ctx(&block_ctx); } - -continue_loop: - BUG_ON(!processed_len); - dev_bytenr += processed_len; - mapped_datav += processed_len >> PAGE_CACHE_SHIFT; - num_pages -= processed_len >> PAGE_CACHE_SHIFT; - goto again; } static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) @@ -2460,7 +2213,7 @@ static int btrfsic_process_written_superblock( num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree, - next_bytenr, BTRFS_SUPER_INFO_SIZE); + next_bytenr, PAGE_SIZE); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", (unsigned long long)next_bytenr, num_copies); @@ -2471,8 +2224,7 @@ static int btrfsic_process_written_superblock( printk(KERN_INFO "btrfsic_process_written_superblock(" "mirror_num=%d)\n", mirror_num); - ret = btrfsic_map_block(state, next_bytenr, - BTRFS_SUPER_INFO_SIZE, + ret = btrfsic_map_block(state, next_bytenr, PAGE_SIZE, &tmp_next_block_ctx, mirror_num); if (ret) { @@ -2937,7 +2689,7 @@ static struct btrfsic_block *btrfsic_block_lookup_or_add( static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, u64 bytenr, struct btrfsic_dev_state *dev_state, - u64 dev_bytenr) + u64 dev_bytenr, char *data) { int num_copies; int mirror_num; @@ -2946,10 +2698,10 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, int match = 0; num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree, - bytenr, state->metablock_size); + bytenr, PAGE_SIZE); for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { - ret = btrfsic_map_block(state, bytenr, state->metablock_size, + ret = btrfsic_map_block(state, bytenr, PAGE_SIZE, &block_ctx, mirror_num); if (ret) { printk(KERN_INFO "btrfsic:" @@ -2975,8 +2727,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, (unsigned long long)bytenr, dev_state->name, (unsigned long long)dev_bytenr); for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { - ret = btrfsic_map_block(state, bytenr, - state->metablock_size, + ret = btrfsic_map_block(state, bytenr, PAGE_SIZE, &block_ctx, mirror_num); if (ret) continue; @@ -3030,13 +2781,13 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh) (unsigned long)bh->b_size, bh->b_data, bh->b_bdev); btrfsic_process_written_block(dev_state, dev_bytenr, - &bh->b_data, 1, NULL, + bh->b_data, bh->b_size, NULL, NULL, bh, rw); } else if (NULL != dev_state && (rw & REQ_FLUSH)) { if (dev_state->state->print_mask & BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) printk(KERN_INFO - "submit_bh(rw=0x%x FLUSH, bdev=%p)\n", + "submit_bh(rw=0x%x) FLUSH, bdev=%p)\n", rw, bh->b_bdev); if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { if ((dev_state->state->print_mask & @@ -3085,7 +2836,6 @@ void btrfsic_submit_bio(int rw, struct bio *bio) unsigned int i; u64 dev_bytenr; int bio_is_patched; - char **mapped_datav; dev_bytenr = 512 * bio->bi_sector; bio_is_patched = 0; @@ -3098,46 +2848,35 @@ void btrfsic_submit_bio(int rw, struct bio *bio) (unsigned long long)dev_bytenr, bio->bi_bdev); - mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt, - GFP_NOFS); - if (!mapped_datav) - goto leave; for (i = 0; i < bio->bi_vcnt; i++) { - BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE); - mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page); - if (!mapped_datav[i]) { - while (i > 0) { - i--; - kunmap(bio->bi_io_vec[i].bv_page); - } - kfree(mapped_datav); - goto leave; - } + u8 *mapped_data; + + mapped_data = kmap(bio->bi_io_vec[i].bv_page); if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | BTRFSIC_PRINT_MASK_VERBOSE) == (dev_state->state->print_mask & (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | BTRFSIC_PRINT_MASK_VERBOSE))) printk(KERN_INFO - "#%u: page=%p, len=%u, offset=%u\n", + "#%u: page=%p, mapped=%p, len=%u," + " offset=%u\n", i, bio->bi_io_vec[i].bv_page, + mapped_data, bio->bi_io_vec[i].bv_len, bio->bi_io_vec[i].bv_offset); - } - btrfsic_process_written_block(dev_state, dev_bytenr, - mapped_datav, bio->bi_vcnt, - bio, &bio_is_patched, - NULL, rw); - while (i > 0) { - i--; + btrfsic_process_written_block(dev_state, dev_bytenr, + mapped_data, + bio->bi_io_vec[i].bv_len, + bio, &bio_is_patched, + NULL, rw); kunmap(bio->bi_io_vec[i].bv_page); + dev_bytenr += bio->bi_io_vec[i].bv_len; } - kfree(mapped_datav); } else if (NULL != dev_state && (rw & REQ_FLUSH)) { if (dev_state->state->print_mask & BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) printk(KERN_INFO - "submit_bio(rw=0x%x FLUSH, bdev=%p)\n", + "submit_bio(rw=0x%x) FLUSH, bdev=%p)\n", rw, bio->bi_bdev); if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { if ((dev_state->state->print_mask & @@ -3164,7 +2903,6 @@ void btrfsic_submit_bio(int rw, struct bio *bio) bio->bi_end_io = btrfsic_bio_end_io; } } -leave: mutex_unlock(&btrfsic_mutex); submit_bio(rw, bio); @@ -3179,30 +2917,6 @@ int btrfsic_mount(struct btrfs_root *root, struct list_head *dev_head = &fs_devices->devices; struct btrfs_device *device; - if (root->nodesize != root->leafsize) { - printk(KERN_INFO - "btrfsic: cannot handle nodesize %d != leafsize %d!\n", - root->nodesize, root->leafsize); - return -1; - } - if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) { - printk(KERN_INFO - "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", - root->nodesize, (unsigned long)PAGE_CACHE_SIZE); - return -1; - } - if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) { - printk(KERN_INFO - "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", - root->leafsize, (unsigned long)PAGE_CACHE_SIZE); - return -1; - } - if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) { - printk(KERN_INFO - "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", - root->sectorsize, (unsigned long)PAGE_CACHE_SIZE); - return -1; - } state = kzalloc(sizeof(*state), GFP_NOFS); if (NULL == state) { printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n"); @@ -3219,8 +2933,6 @@ int btrfsic_mount(struct btrfs_root *root, state->print_mask = print_mask; state->include_extent_data = including_extent_data; state->csum_size = 0; - state->metablock_size = root->nodesize; - state->datablock_size = root->sectorsize; INIT_LIST_HEAD(&state->all_blocks_list); btrfsic_block_hashtable_init(&state->block_hashtable); btrfsic_block_link_hashtable_init(&state->block_link_hashtable); @@ -3337,7 +3049,7 @@ void btrfsic_unmount(struct btrfs_root *root, btrfsic_block_link_free(l); } - if (b_all->is_iodone || b_all->never_written) + if (b_all->is_iodone) btrfsic_block_free(b_all); else printk(KERN_INFO "btrfs: attempt to free %c-block" diff --git a/trunk/fs/btrfs/ctree.c b/trunk/fs/btrfs/ctree.c index d7a96cfdc50a..4106264fbc65 100644 --- a/trunk/fs/btrfs/ctree.c +++ b/trunk/fs/btrfs/ctree.c @@ -18,7 +18,6 @@ #include #include -#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -38,16 +37,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct extent_buffer *dst_buf, struct extent_buffer *src_buf); static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_path *path, int level, int slot, - int tree_mod_log); -static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb); -struct extent_buffer *read_old_tree_block(struct btrfs_root *root, u64 bytenr, - u32 blocksize, u64 parent_transid, - u64 time_seq); -struct extent_buffer *btrfs_find_old_tree_block(struct btrfs_root *root, - u64 bytenr, u32 blocksize, - u64 time_seq); + struct btrfs_path *path, int level, int slot); struct btrfs_path *btrfs_alloc_path(void) { @@ -265,7 +255,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, cow = btrfs_alloc_free_block(trans, root, buf->len, 0, new_root_objectid, &disk_key, level, - buf->start, 0); + buf->start, 0, 1); if (IS_ERR(cow)) return PTR_ERR(cow); @@ -298,434 +288,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, return 0; } -enum mod_log_op { - MOD_LOG_KEY_REPLACE, - MOD_LOG_KEY_ADD, - MOD_LOG_KEY_REMOVE, - MOD_LOG_KEY_REMOVE_WHILE_FREEING, - MOD_LOG_KEY_REMOVE_WHILE_MOVING, - MOD_LOG_MOVE_KEYS, - MOD_LOG_ROOT_REPLACE, -}; - -struct tree_mod_move { - int dst_slot; - int nr_items; -}; - -struct tree_mod_root { - u64 logical; - u8 level; -}; - -struct tree_mod_elem { - struct rb_node node; - u64 index; /* shifted logical */ - struct seq_list elem; - enum mod_log_op op; - - /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */ - int slot; - - /* this is used for MOD_LOG_KEY* and MOD_LOG_ROOT_REPLACE */ - u64 generation; - - /* those are used for op == MOD_LOG_KEY_{REPLACE,REMOVE} */ - struct btrfs_disk_key key; - u64 blockptr; - - /* this is used for op == MOD_LOG_MOVE_KEYS */ - struct tree_mod_move move; - - /* this is used for op == MOD_LOG_ROOT_REPLACE */ - struct tree_mod_root old_root; -}; - -static inline void -__get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem) -{ - elem->seq = atomic_inc_return(&fs_info->tree_mod_seq); - list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); -} - -void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, - struct seq_list *elem) -{ - elem->flags = 1; - spin_lock(&fs_info->tree_mod_seq_lock); - __get_tree_mod_seq(fs_info, elem); - spin_unlock(&fs_info->tree_mod_seq_lock); -} - -void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, - struct seq_list *elem) -{ - struct rb_root *tm_root; - struct rb_node *node; - struct rb_node *next; - struct seq_list *cur_elem; - struct tree_mod_elem *tm; - u64 min_seq = (u64)-1; - u64 seq_putting = elem->seq; - - if (!seq_putting) - return; - - BUG_ON(!(elem->flags & 1)); - spin_lock(&fs_info->tree_mod_seq_lock); - list_del(&elem->list); - - list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) { - if ((cur_elem->flags & 1) && cur_elem->seq < min_seq) { - if (seq_putting > cur_elem->seq) { - /* - * blocker with lower sequence number exists, we - * cannot remove anything from the log - */ - goto out; - } - min_seq = cur_elem->seq; - } - } - - /* - * anything that's lower than the lowest existing (read: blocked) - * sequence number can be removed from the tree. - */ - write_lock(&fs_info->tree_mod_log_lock); - tm_root = &fs_info->tree_mod_log; - for (node = rb_first(tm_root); node; node = next) { - next = rb_next(node); - tm = container_of(node, struct tree_mod_elem, node); - if (tm->elem.seq > min_seq) - continue; - rb_erase(node, tm_root); - list_del(&tm->elem.list); - kfree(tm); - } - write_unlock(&fs_info->tree_mod_log_lock); -out: - spin_unlock(&fs_info->tree_mod_seq_lock); -} - -/* - * key order of the log: - * index -> sequence - * - * the index is the shifted logical of the *new* root node for root replace - * operations, or the shifted logical of the affected block for all other - * operations. - */ -static noinline int -__tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) -{ - struct rb_root *tm_root; - struct rb_node **new; - struct rb_node *parent = NULL; - struct tree_mod_elem *cur; - int ret = 0; - - BUG_ON(!tm || !tm->elem.seq); - - write_lock(&fs_info->tree_mod_log_lock); - tm_root = &fs_info->tree_mod_log; - new = &tm_root->rb_node; - while (*new) { - cur = container_of(*new, struct tree_mod_elem, node); - parent = *new; - if (cur->index < tm->index) - new = &((*new)->rb_left); - else if (cur->index > tm->index) - new = &((*new)->rb_right); - else if (cur->elem.seq < tm->elem.seq) - new = &((*new)->rb_left); - else if (cur->elem.seq > tm->elem.seq) - new = &((*new)->rb_right); - else { - kfree(tm); - ret = -EEXIST; - goto unlock; - } - } - - rb_link_node(&tm->node, parent, new); - rb_insert_color(&tm->node, tm_root); -unlock: - write_unlock(&fs_info->tree_mod_log_lock); - return ret; -} - -static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb) { - smp_mb(); - if (list_empty(&(fs_info)->tree_mod_seq_list)) - return 1; - if (!eb) - return 0; - if (btrfs_header_level(eb) == 0) - return 1; - return 0; -} - -static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, - struct tree_mod_elem **tm_ret) -{ - struct tree_mod_elem *tm; - int seq; - - if (tree_mod_dont_log(fs_info, NULL)) - return 0; - - tm = *tm_ret = kzalloc(sizeof(*tm), flags); - if (!tm) - return -ENOMEM; - - tm->elem.flags = 0; - spin_lock(&fs_info->tree_mod_seq_lock); - if (list_empty(&fs_info->tree_mod_seq_list)) { - /* - * someone emptied the list while we were waiting for the lock. - * we must not add to the list, because no blocker exists. items - * are removed from the list only when the existing blocker is - * removed from the list. - */ - kfree(tm); - seq = 0; - } else { - __get_tree_mod_seq(fs_info, &tm->elem); - seq = tm->elem.seq; - } - spin_unlock(&fs_info->tree_mod_seq_lock); - - return seq; -} - -static noinline int -tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int slot, - enum mod_log_op op, gfp_t flags) -{ - struct tree_mod_elem *tm; - int ret; - - ret = tree_mod_alloc(fs_info, flags, &tm); - if (ret <= 0) - return ret; - - tm->index = eb->start >> PAGE_CACHE_SHIFT; - if (op != MOD_LOG_KEY_ADD) { - btrfs_node_key(eb, &tm->key, slot); - tm->blockptr = btrfs_node_blockptr(eb, slot); - } - tm->op = op; - tm->slot = slot; - tm->generation = btrfs_node_ptr_generation(eb, slot); - - return __tree_mod_log_insert(fs_info, tm); -} - -static noinline int -tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, - int slot, enum mod_log_op op) -{ - return tree_mod_log_insert_key_mask(fs_info, eb, slot, op, GFP_NOFS); -} - -static noinline int -tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int dst_slot, int src_slot, - int nr_items, gfp_t flags) -{ - struct tree_mod_elem *tm; - int ret; - int i; - - if (tree_mod_dont_log(fs_info, eb)) - return 0; - - for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { - ret = tree_mod_log_insert_key(fs_info, eb, i + dst_slot, - MOD_LOG_KEY_REMOVE_WHILE_MOVING); - BUG_ON(ret < 0); - } - - ret = tree_mod_alloc(fs_info, flags, &tm); - if (ret <= 0) - return ret; - - tm->index = eb->start >> PAGE_CACHE_SHIFT; - tm->slot = src_slot; - tm->move.dst_slot = dst_slot; - tm->move.nr_items = nr_items; - tm->op = MOD_LOG_MOVE_KEYS; - - return __tree_mod_log_insert(fs_info, tm); -} - -static noinline int -tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, - struct extent_buffer *old_root, - struct extent_buffer *new_root, gfp_t flags) -{ - struct tree_mod_elem *tm; - int ret; - - ret = tree_mod_alloc(fs_info, flags, &tm); - if (ret <= 0) - return ret; - - tm->index = new_root->start >> PAGE_CACHE_SHIFT; - tm->old_root.logical = old_root->start; - tm->old_root.level = btrfs_header_level(old_root); - tm->generation = btrfs_header_generation(old_root); - tm->op = MOD_LOG_ROOT_REPLACE; - - return __tree_mod_log_insert(fs_info, tm); -} - -static struct tree_mod_elem * -__tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, - int smallest) -{ - struct rb_root *tm_root; - struct rb_node *node; - struct tree_mod_elem *cur = NULL; - struct tree_mod_elem *found = NULL; - u64 index = start >> PAGE_CACHE_SHIFT; - - read_lock(&fs_info->tree_mod_log_lock); - tm_root = &fs_info->tree_mod_log; - node = tm_root->rb_node; - while (node) { - cur = container_of(node, struct tree_mod_elem, node); - if (cur->index < index) { - node = node->rb_left; - } else if (cur->index > index) { - node = node->rb_right; - } else if (cur->elem.seq < min_seq) { - node = node->rb_left; - } else if (!smallest) { - /* we want the node with the highest seq */ - if (found) - BUG_ON(found->elem.seq > cur->elem.seq); - found = cur; - node = node->rb_left; - } else if (cur->elem.seq > min_seq) { - /* we want the node with the smallest seq */ - if (found) - BUG_ON(found->elem.seq < cur->elem.seq); - found = cur; - node = node->rb_right; - } else { - found = cur; - break; - } - } - read_unlock(&fs_info->tree_mod_log_lock); - - return found; -} - -/* - * this returns the element from the log with the smallest time sequence - * value that's in the log (the oldest log item). any element with a time - * sequence lower than min_seq will be ignored. - */ -static struct tree_mod_elem * -tree_mod_log_search_oldest(struct btrfs_fs_info *fs_info, u64 start, - u64 min_seq) -{ - return __tree_mod_log_search(fs_info, start, min_seq, 1); -} - -/* - * this returns the element from the log with the largest time sequence - * value that's in the log (the most recent log item). any element with - * a time sequence lower than min_seq will be ignored. - */ -static struct tree_mod_elem * -tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq) -{ - return __tree_mod_log_search(fs_info, start, min_seq, 0); -} - -static inline void -tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, - struct extent_buffer *src, unsigned long dst_offset, - unsigned long src_offset, int nr_items) -{ - int ret; - int i; - - if (tree_mod_dont_log(fs_info, NULL)) - return; - - if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) - return; - - /* speed this up by single seq for all operations? */ - for (i = 0; i < nr_items; i++) { - ret = tree_mod_log_insert_key(fs_info, src, i + src_offset, - MOD_LOG_KEY_REMOVE); - BUG_ON(ret < 0); - ret = tree_mod_log_insert_key(fs_info, dst, i + dst_offset, - MOD_LOG_KEY_ADD); - BUG_ON(ret < 0); - } -} - -static inline void -tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, - int dst_offset, int src_offset, int nr_items) -{ - int ret; - ret = tree_mod_log_insert_move(fs_info, dst, dst_offset, src_offset, - nr_items, GFP_NOFS); - BUG_ON(ret < 0); -} - -static inline void -tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, - struct btrfs_disk_key *disk_key, int slot, int atomic) -{ - int ret; - - ret = tree_mod_log_insert_key_mask(fs_info, eb, slot, - MOD_LOG_KEY_REPLACE, - atomic ? GFP_ATOMIC : GFP_NOFS); - BUG_ON(ret < 0); -} - -static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb) -{ - int i; - int ret; - u32 nritems; - - if (tree_mod_dont_log(fs_info, eb)) - return; - - nritems = btrfs_header_nritems(eb); - for (i = nritems - 1; i >= 0; i--) { - ret = tree_mod_log_insert_key(fs_info, eb, i, - MOD_LOG_KEY_REMOVE_WHILE_FREEING); - BUG_ON(ret < 0); - } -} - -static inline void -tree_mod_log_set_root_pointer(struct btrfs_root *root, - struct extent_buffer *new_root_node) -{ - int ret; - tree_mod_log_free_eb(root->fs_info, root->node); - ret = tree_mod_log_insert_root(root->fs_info, root->node, - new_root_node, GFP_NOFS); - BUG_ON(ret < 0); -} - /* * check if the tree block can be shared by multiple trees */ @@ -847,12 +409,6 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, ret = btrfs_dec_ref(trans, root, buf, 1, 1); BUG_ON(ret); /* -ENOMEM */ } - /* - * don't log freeing in case we're freeing the root node, this - * is done by tree_mod_log_set_root_pointer later - */ - if (buf != root->node && btrfs_header_level(buf) != 0) - tree_mod_log_free_eb(root->fs_info, buf); clean_tree_block(trans, root, buf); *last_ref = 1; } @@ -911,7 +467,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, root->root_key.objectid, &disk_key, - level, search_start, empty_size); + level, search_start, empty_size, 1); if (IS_ERR(cow)) return PTR_ERR(cow); @@ -950,11 +506,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, parent_start = 0; extent_buffer_get(cow); - tree_mod_log_set_root_pointer(root, cow); rcu_assign_pointer(root->node, cow); btrfs_free_tree_block(trans, root, buf, parent_start, - last_ref); + last_ref, 1); free_extent_buffer(buf); add_root_to_dirty_list(root); } else { @@ -964,15 +519,13 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, parent_start = 0; WARN_ON(trans->transid != btrfs_header_generation(parent)); - tree_mod_log_insert_key(root->fs_info, parent, parent_slot, - MOD_LOG_KEY_REPLACE); btrfs_set_node_blockptr(parent, parent_slot, cow->start); btrfs_set_node_ptr_generation(parent, parent_slot, trans->transid); btrfs_mark_buffer_dirty(parent); btrfs_free_tree_block(trans, root, buf, parent_start, - last_ref); + last_ref, 1); } if (unlock_orig) btrfs_tree_unlock(buf); @@ -982,210 +535,6 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, return 0; } -/* - * returns the logical address of the oldest predecessor of the given root. - * entries older than time_seq are ignored. - */ -static struct tree_mod_elem * -__tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, - struct btrfs_root *root, u64 time_seq) -{ - struct tree_mod_elem *tm; - struct tree_mod_elem *found = NULL; - u64 root_logical = root->node->start; - int looped = 0; - - if (!time_seq) - return 0; - - /* - * the very last operation that's logged for a root is the replacement - * operation (if it is replaced at all). this has the index of the *new* - * root, making it the very first operation that's logged for this root. - */ - while (1) { - tm = tree_mod_log_search_oldest(fs_info, root_logical, - time_seq); - if (!looped && !tm) - return 0; - /* - * we must have key remove operations in the log before the - * replace operation. - */ - BUG_ON(!tm); - - if (tm->op != MOD_LOG_ROOT_REPLACE) - break; - - found = tm; - root_logical = tm->old_root.logical; - BUG_ON(root_logical == root->node->start); - looped = 1; - } - - return found; -} - -/* - * tm is a pointer to the first operation to rewind within eb. then, all - * previous operations will be rewinded (until we reach something older than - * time_seq). - */ -static void -__tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, - struct tree_mod_elem *first_tm) -{ - u32 n; - struct rb_node *next; - struct tree_mod_elem *tm = first_tm; - unsigned long o_dst; - unsigned long o_src; - unsigned long p_size = sizeof(struct btrfs_key_ptr); - - n = btrfs_header_nritems(eb); - while (tm && tm->elem.seq >= time_seq) { - /* - * all the operations are recorded with the operator used for - * the modification. as we're going backwards, we do the - * opposite of each operation here. - */ - switch (tm->op) { - case MOD_LOG_KEY_REMOVE_WHILE_FREEING: - BUG_ON(tm->slot < n); - case MOD_LOG_KEY_REMOVE_WHILE_MOVING: - case MOD_LOG_KEY_REMOVE: - btrfs_set_node_key(eb, &tm->key, tm->slot); - btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr); - btrfs_set_node_ptr_generation(eb, tm->slot, - tm->generation); - n++; - break; - case MOD_LOG_KEY_REPLACE: - BUG_ON(tm->slot >= n); - btrfs_set_node_key(eb, &tm->key, tm->slot); - btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr); - btrfs_set_node_ptr_generation(eb, tm->slot, - tm->generation); - break; - case MOD_LOG_KEY_ADD: - if (tm->slot != n - 1) { - o_dst = btrfs_node_key_ptr_offset(tm->slot); - o_src = btrfs_node_key_ptr_offset(tm->slot + 1); - memmove_extent_buffer(eb, o_dst, o_src, p_size); - } - n--; - break; - case MOD_LOG_MOVE_KEYS: - o_dst = btrfs_node_key_ptr_offset(tm->slot); - o_src = btrfs_node_key_ptr_offset(tm->move.dst_slot); - memmove_extent_buffer(eb, o_dst, o_src, - tm->move.nr_items * p_size); - break; - case MOD_LOG_ROOT_REPLACE: - /* - * this operation is special. for roots, this must be - * handled explicitly before rewinding. - * for non-roots, this operation may exist if the node - * was a root: root A -> child B; then A gets empty and - * B is promoted to the new root. in the mod log, we'll - * have a root-replace operation for B, a tree block - * that is no root. we simply ignore that operation. - */ - break; - } - next = rb_next(&tm->node); - if (!next) - break; - tm = container_of(next, struct tree_mod_elem, node); - if (tm->index != first_tm->index) - break; - } - btrfs_set_header_nritems(eb, n); -} - -static struct extent_buffer * -tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, - u64 time_seq) -{ - struct extent_buffer *eb_rewin; - struct tree_mod_elem *tm; - - if (!time_seq) - return eb; - - if (btrfs_header_level(eb) == 0) - return eb; - - tm = tree_mod_log_search(fs_info, eb->start, time_seq); - if (!tm) - return eb; - - if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { - BUG_ON(tm->slot != 0); - eb_rewin = alloc_dummy_extent_buffer(eb->start, - fs_info->tree_root->nodesize); - BUG_ON(!eb_rewin); - btrfs_set_header_bytenr(eb_rewin, eb->start); - btrfs_set_header_backref_rev(eb_rewin, - btrfs_header_backref_rev(eb)); - btrfs_set_header_owner(eb_rewin, btrfs_header_owner(eb)); - btrfs_set_header_level(eb_rewin, btrfs_header_level(eb)); - } else { - eb_rewin = btrfs_clone_extent_buffer(eb); - BUG_ON(!eb_rewin); - } - - extent_buffer_get(eb_rewin); - free_extent_buffer(eb); - - __tree_mod_log_rewind(eb_rewin, time_seq, tm); - - return eb_rewin; -} - -static inline struct extent_buffer * -get_old_root(struct btrfs_root *root, u64 time_seq) -{ - struct tree_mod_elem *tm; - struct extent_buffer *eb; - struct tree_mod_root *old_root; - u64 old_generation; - - tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); - if (!tm) - return root->node; - - old_root = &tm->old_root; - old_generation = tm->generation; - - tm = tree_mod_log_search(root->fs_info, old_root->logical, time_seq); - /* - * there was an item in the log when __tree_mod_log_oldest_root - * returned. this one must not go away, because the time_seq passed to - * us must be blocking its removal. - */ - BUG_ON(!tm); - - if (old_root->logical == root->node->start) { - /* there are logged operations for the current root */ - eb = btrfs_clone_extent_buffer(root->node); - } else { - /* there's a root replace operation for the current root */ - eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT, - root->nodesize); - btrfs_set_header_bytenr(eb, eb->start); - btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV); - btrfs_set_header_owner(eb, root->root_key.objectid); - } - if (!eb) - return NULL; - btrfs_set_header_level(eb, old_root->level); - btrfs_set_header_generation(eb, old_generation); - __tree_mod_log_rewind(eb, time_seq, tm); - - return eb; -} - static inline int should_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf) @@ -1390,11 +739,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, if (!cur) return -EIO; } else if (!uptodate) { - err = btrfs_read_buffer(cur, gen); - if (err) { - free_extent_buffer(cur); - return err; - } + btrfs_read_buffer(cur, gen); } } if (search_start == 0) @@ -1509,18 +854,20 @@ static noinline int generic_bin_search(struct extent_buffer *eb, static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, int level, int *slot) { - if (level == 0) + if (level == 0) { return generic_bin_search(eb, offsetof(struct btrfs_leaf, items), sizeof(struct btrfs_item), key, btrfs_header_nritems(eb), slot); - else + } else { return generic_bin_search(eb, offsetof(struct btrfs_node, ptrs), sizeof(struct btrfs_key_ptr), key, btrfs_header_nritems(eb), slot); + } + return -1; } int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, @@ -1627,7 +974,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, goto enospc; } - tree_mod_log_set_root_pointer(root, child); rcu_assign_pointer(root->node, child); add_root_to_dirty_list(root); @@ -1641,7 +987,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, free_extent_buffer(mid); root_sub_used(root, mid->len); - btrfs_free_tree_block(trans, root, mid, 0, 1); + btrfs_free_tree_block(trans, root, mid, 0, 1, 0); /* once for the root ptr */ free_extent_buffer_stale(mid); return 0; @@ -1694,16 +1040,14 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, if (btrfs_header_nritems(right) == 0) { clean_tree_block(trans, root, right); btrfs_tree_unlock(right); - del_ptr(trans, root, path, level + 1, pslot + 1, 1); + del_ptr(trans, root, path, level + 1, pslot + 1); root_sub_used(root, right->len); - btrfs_free_tree_block(trans, root, right, 0, 1); + btrfs_free_tree_block(trans, root, right, 0, 1, 0); free_extent_buffer_stale(right); right = NULL; } else { struct btrfs_disk_key right_key; btrfs_node_key(right, &right_key, 0); - tree_mod_log_set_node_key(root->fs_info, parent, - &right_key, pslot + 1, 0); btrfs_set_node_key(parent, &right_key, pslot + 1); btrfs_mark_buffer_dirty(parent); } @@ -1738,17 +1082,15 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, if (btrfs_header_nritems(mid) == 0) { clean_tree_block(trans, root, mid); btrfs_tree_unlock(mid); - del_ptr(trans, root, path, level + 1, pslot, 1); + del_ptr(trans, root, path, level + 1, pslot); root_sub_used(root, mid->len); - btrfs_free_tree_block(trans, root, mid, 0, 1); + btrfs_free_tree_block(trans, root, mid, 0, 1, 0); free_extent_buffer_stale(mid); mid = NULL; } else { /* update the parent key to reflect our changes */ struct btrfs_disk_key mid_key; btrfs_node_key(mid, &mid_key, 0); - tree_mod_log_set_node_key(root->fs_info, parent, &mid_key, - pslot, 0); btrfs_set_node_key(parent, &mid_key, pslot); btrfs_mark_buffer_dirty(parent); } @@ -1846,8 +1188,6 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, struct btrfs_disk_key disk_key; orig_slot += left_nr; btrfs_node_key(mid, &disk_key, 0); - tree_mod_log_set_node_key(root->fs_info, parent, - &disk_key, pslot, 0); btrfs_set_node_key(parent, &disk_key, pslot); btrfs_mark_buffer_dirty(parent); if (btrfs_header_nritems(left) > orig_slot) { @@ -1899,8 +1239,6 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, struct btrfs_disk_key disk_key; btrfs_node_key(right, &disk_key, 0); - tree_mod_log_set_node_key(root->fs_info, parent, - &disk_key, pslot + 1, 0); btrfs_set_node_key(parent, &disk_key, pslot + 1); btrfs_mark_buffer_dirty(parent); @@ -2158,7 +1496,7 @@ static int read_block_for_search(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *p, struct extent_buffer **eb_ret, int level, int slot, - struct btrfs_key *key, u64 time_seq) + struct btrfs_key *key) { u64 blocknr; u64 gen; @@ -2512,7 +1850,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root } err = read_block_for_search(trans, root, p, - &b, level, slot, key, 0); + &b, level, slot, key); if (err == -EAGAIN) goto again; if (err) { @@ -2583,115 +1921,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } -/* - * Like btrfs_search_slot, this looks for a key in the given tree. It uses the - * current state of the tree together with the operations recorded in the tree - * modification log to search for the key in a previous version of this tree, as - * denoted by the time_seq parameter. - * - * Naturally, there is no support for insert, delete or cow operations. - * - * The resulting path and return value will be set up as if we called - * btrfs_search_slot at that point in time with ins_len and cow both set to 0. - */ -int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, - struct btrfs_path *p, u64 time_seq) -{ - struct extent_buffer *b; - int slot; - int ret; - int err; - int level; - int lowest_unlock = 1; - u8 lowest_level = 0; - - lowest_level = p->lowest_level; - WARN_ON(p->nodes[0] != NULL); - - if (p->search_commit_root) { - BUG_ON(time_seq); - return btrfs_search_slot(NULL, root, key, p, 0, 0); - } - -again: - b = get_old_root(root, time_seq); - extent_buffer_get(b); - level = btrfs_header_level(b); - btrfs_tree_read_lock(b); - p->locks[level] = BTRFS_READ_LOCK; - - while (b) { - level = btrfs_header_level(b); - p->nodes[level] = b; - btrfs_clear_path_blocking(p, NULL, 0); - - /* - * we have a lock on b and as long as we aren't changing - * the tree, there is no way to for the items in b to change. - * It is safe to drop the lock on our parent before we - * go through the expensive btree search on b. - */ - btrfs_unlock_up_safe(p, level + 1); - - ret = bin_search(b, key, level, &slot); - - if (level != 0) { - int dec = 0; - if (ret && slot > 0) { - dec = 1; - slot -= 1; - } - p->slots[level] = slot; - unlock_up(p, level, lowest_unlock, 0, NULL); - - if (level == lowest_level) { - if (dec) - p->slots[level]++; - goto done; - } - - err = read_block_for_search(NULL, root, p, &b, level, - slot, key, time_seq); - if (err == -EAGAIN) - goto again; - if (err) { - ret = err; - goto done; - } - - level = btrfs_header_level(b); - err = btrfs_try_tree_read_lock(b); - if (!err) { - btrfs_set_path_blocking(p); - btrfs_tree_read_lock(b); - btrfs_clear_path_blocking(p, b, - BTRFS_READ_LOCK); - } - p->locks[level] = BTRFS_READ_LOCK; - p->nodes[level] = b; - b = tree_mod_log_rewind(root->fs_info, b, time_seq); - if (b != p->nodes[level]) { - btrfs_tree_unlock_rw(p->nodes[level], - p->locks[level]); - p->locks[level] = 0; - p->nodes[level] = b; - } - } else { - p->slots[level] = slot; - unlock_up(p, level, lowest_unlock, 0, NULL); - goto done; - } - } - ret = 1; -done: - if (!p->leave_spinning) - btrfs_set_path_blocking(p); - if (ret < 0) - btrfs_release_path(p); - - return ret; -} - /* * adjust the pointers going up the tree, starting at level * making sure the right key of each node is points to 'key'. @@ -2712,7 +1941,6 @@ static void fixup_low_keys(struct btrfs_trans_handle *trans, if (!path->nodes[i]) break; t = path->nodes[i]; - tree_mod_log_set_node_key(root->fs_info, t, key, tslot, 1); btrfs_set_node_key(t, key, tslot); btrfs_mark_buffer_dirty(path->nodes[i]); if (tslot != 0) @@ -2795,16 +2023,12 @@ static int push_node_left(struct btrfs_trans_handle *trans, } else push_items = min(src_nritems - 8, push_items); - tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0, - push_items); copy_extent_buffer(dst, src, btrfs_node_key_ptr_offset(dst_nritems), btrfs_node_key_ptr_offset(0), push_items * sizeof(struct btrfs_key_ptr)); if (push_items < src_nritems) { - tree_mod_log_eb_move(root->fs_info, src, 0, push_items, - src_nritems - push_items); memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), btrfs_node_key_ptr_offset(push_items), (src_nritems - push_items) * @@ -2858,14 +2082,11 @@ static int balance_node_right(struct btrfs_trans_handle *trans, if (max_push < push_items) push_items = max_push; - tree_mod_log_eb_move(root->fs_info, dst, push_items, 0, dst_nritems); memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), btrfs_node_key_ptr_offset(0), (dst_nritems) * sizeof(struct btrfs_key_ptr)); - tree_mod_log_eb_copy(root->fs_info, dst, src, 0, - src_nritems - push_items, push_items); copy_extent_buffer(dst, src, btrfs_node_key_ptr_offset(0), btrfs_node_key_ptr_offset(src_nritems - push_items), @@ -2908,7 +2129,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, root->root_key.objectid, &lower_key, - level, root->node->start, 0); + level, root->node->start, 0, 0); if (IS_ERR(c)) return PTR_ERR(c); @@ -2940,7 +2161,6 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(c); old = root->node; - tree_mod_log_set_root_pointer(root, c); rcu_assign_pointer(root->node, c); /* the super has an extra ref to root->node */ @@ -2964,11 +2184,10 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, static void insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_disk_key *key, u64 bytenr, - int slot, int level, int tree_mod_log) + int slot, int level) { struct extent_buffer *lower; int nritems; - int ret; BUG_ON(!path->nodes[level]); btrfs_assert_tree_locked(path->nodes[level]); @@ -2977,19 +2196,11 @@ static void insert_ptr(struct btrfs_trans_handle *trans, BUG_ON(slot > nritems); BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root)); if (slot != nritems) { - if (tree_mod_log && level) - tree_mod_log_eb_move(root->fs_info, lower, slot + 1, - slot, nritems - slot); memmove_extent_buffer(lower, btrfs_node_key_ptr_offset(slot + 1), btrfs_node_key_ptr_offset(slot), (nritems - slot) * sizeof(struct btrfs_key_ptr)); } - if (tree_mod_log && level) { - ret = tree_mod_log_insert_key(root->fs_info, lower, slot, - MOD_LOG_KEY_ADD); - BUG_ON(ret < 0); - } btrfs_set_node_key(lower, key, slot); btrfs_set_node_blockptr(lower, slot, bytenr); WARN_ON(trans->transid == 0); @@ -3041,7 +2252,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, split = btrfs_alloc_free_block(trans, root, root->nodesize, 0, root->root_key.objectid, - &disk_key, level, c->start, 0); + &disk_key, level, c->start, 0, 0); if (IS_ERR(split)) return PTR_ERR(split); @@ -3060,7 +2271,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, (unsigned long)btrfs_header_chunk_tree_uuid(split), BTRFS_UUID_SIZE); - tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid); + copy_extent_buffer(split, c, btrfs_node_key_ptr_offset(0), btrfs_node_key_ptr_offset(mid), @@ -3073,7 +2284,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(split); insert_ptr(trans, root, path, &disk_key, split->start, - path->slots[level + 1] + 1, level + 1, 1); + path->slots[level + 1] + 1, level + 1); if (path->slots[level] >= mid) { path->slots[level] -= mid; @@ -3610,7 +2821,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans, btrfs_set_header_nritems(l, mid); btrfs_item_key(right, &disk_key, 0); insert_ptr(trans, root, path, &disk_key, right->start, - path->slots[1] + 1, 1, 0); + path->slots[1] + 1, 1); btrfs_mark_buffer_dirty(right); btrfs_mark_buffer_dirty(l); @@ -3793,7 +3004,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, root->root_key.objectid, - &disk_key, 0, l->start, 0); + &disk_key, 0, l->start, 0, 0); if (IS_ERR(right)) return PTR_ERR(right); @@ -3817,7 +3028,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, if (mid <= slot) { btrfs_set_header_nritems(right, 0); insert_ptr(trans, root, path, &disk_key, right->start, - path->slots[1] + 1, 1, 0); + path->slots[1] + 1, 1); btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = right; @@ -3826,7 +3037,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, } else { btrfs_set_header_nritems(right, 0); insert_ptr(trans, root, path, &disk_key, right->start, - path->slots[1], 1, 0); + path->slots[1], 1); btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = right; @@ -4538,29 +3749,19 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root * empty a node. */ static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_path *path, int level, int slot, - int tree_mod_log) + struct btrfs_path *path, int level, int slot) { struct extent_buffer *parent = path->nodes[level]; u32 nritems; - int ret; nritems = btrfs_header_nritems(parent); if (slot != nritems - 1) { - if (tree_mod_log && level) - tree_mod_log_eb_move(root->fs_info, parent, slot, - slot + 1, nritems - slot - 1); memmove_extent_buffer(parent, btrfs_node_key_ptr_offset(slot), btrfs_node_key_ptr_offset(slot + 1), sizeof(struct btrfs_key_ptr) * (nritems - slot - 1)); - } else if (tree_mod_log && level) { - ret = tree_mod_log_insert_key(root->fs_info, parent, slot, - MOD_LOG_KEY_REMOVE); - BUG_ON(ret < 0); } - nritems--; btrfs_set_header_nritems(parent, nritems); if (nritems == 0 && parent == root->node) { @@ -4592,7 +3793,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans, struct extent_buffer *leaf) { WARN_ON(btrfs_header_generation(leaf) != trans->transid); - del_ptr(trans, root, path, 1, path->slots[1], 1); + del_ptr(trans, root, path, 1, path->slots[1]); /* * btrfs_free_extent is expensive, we want to make sure we @@ -4603,7 +3804,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans, root_sub_used(root, leaf->len); extent_buffer_get(leaf); - btrfs_free_tree_block(trans, root, leaf, 0, 1); + btrfs_free_tree_block(trans, root, leaf, 0, 1, 0); free_extent_buffer_stale(leaf); } /* @@ -5070,7 +4271,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) next = c; next_rw_lock = path->locks[level]; ret = read_block_for_search(NULL, root, path, &next, level, - slot, &key, 0); + slot, &key); if (ret == -EAGAIN) goto again; @@ -5107,7 +4308,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) break; ret = read_block_for_search(NULL, root, path, &next, level, - 0, &key, 0); + 0, &key); if (ret == -EAGAIN) goto again; diff --git a/trunk/fs/btrfs/ctree.h b/trunk/fs/btrfs/ctree.h index 0236d03c6732..8fd72331d600 100644 --- a/trunk/fs/btrfs/ctree.h +++ b/trunk/fs/btrfs/ctree.h @@ -173,9 +173,6 @@ static int btrfs_csum_sizes[] = { 4, 0 }; #define BTRFS_FT_XATTR 8 #define BTRFS_FT_MAX 9 -/* ioprio of readahead is set to idle */ -#define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)) - /* * The key defines the order in the tree, and so it also defines (optimal) * block layout. @@ -826,14 +823,6 @@ struct btrfs_csum_item { u8 csum; } __attribute__ ((__packed__)); -struct btrfs_dev_stats_item { - /* - * grow this item struct at the end for future enhancements and keep - * the existing values unchanged - */ - __le64 values[BTRFS_DEV_STAT_VALUES_MAX]; -} __attribute__ ((__packed__)); - /* different types of block groups (and chunks) */ #define BTRFS_BLOCK_GROUP_DATA (1ULL << 0) #define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1) @@ -1140,15 +1129,6 @@ struct btrfs_fs_info { spinlock_t delayed_iput_lock; struct list_head delayed_iputs; - /* this protects tree_mod_seq_list */ - spinlock_t tree_mod_seq_lock; - atomic_t tree_mod_seq; - struct list_head tree_mod_seq_list; - - /* this protects tree_mod_log */ - rwlock_t tree_mod_log_lock; - struct rb_root tree_mod_log; - atomic_t nr_async_submits; atomic_t async_submit_draining; atomic_t nr_async_bios; @@ -1395,7 +1375,7 @@ struct btrfs_root { struct list_head root_list; spinlock_t orphan_lock; - atomic_t orphan_inodes; + struct list_head orphan_list; struct btrfs_block_rsv *orphan_block_rsv; int orphan_item_inserted; int orphan_cleanup_state; @@ -1527,12 +1507,6 @@ struct btrfs_ioctl_defrag_range_args { #define BTRFS_BALANCE_ITEM_KEY 248 -/* - * Persistantly stores the io stats in the device tree. - * One key for all stats, (0, BTRFS_DEV_STATS_KEY, devid). - */ -#define BTRFS_DEV_STATS_KEY 249 - /* * string items are for debugging. They just store a short string of * data in the FS @@ -2441,30 +2415,6 @@ static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb, return btrfs_item_size(eb, e) - offset; } -/* btrfs_dev_stats_item */ -static inline u64 btrfs_dev_stats_value(struct extent_buffer *eb, - struct btrfs_dev_stats_item *ptr, - int index) -{ - u64 val; - - read_extent_buffer(eb, &val, - offsetof(struct btrfs_dev_stats_item, values) + - ((unsigned long)ptr) + (index * sizeof(u64)), - sizeof(val)); - return val; -} - -static inline void btrfs_set_dev_stats_value(struct extent_buffer *eb, - struct btrfs_dev_stats_item *ptr, - int index, u64 val) -{ - write_extent_buffer(eb, &val, - offsetof(struct btrfs_dev_stats_item, values) + - ((unsigned long)ptr) + (index * sizeof(u64)), - sizeof(val)); -} - static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) { return sb->s_fs_info; @@ -2546,11 +2496,11 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u32 blocksize, u64 parent, u64 root_objectid, struct btrfs_disk_key *key, int level, - u64 hint, u64 empty_size); + u64 hint, u64 empty_size, int for_cow); void btrfs_free_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, - u64 parent, int last_ref); + u64 parent, int last_ref, int for_cow); struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u32 blocksize, @@ -2709,8 +2659,6 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans, int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); -int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, - struct btrfs_path *p, u64 time_seq); int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *parent, int start_slot, int cache_only, u64 *last_ret, @@ -2974,6 +2922,7 @@ int btrfs_readpage(struct file *file, struct page *page); void btrfs_evict_inode(struct inode *inode); int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); int btrfs_dirty_inode(struct inode *inode); +int btrfs_update_time(struct file *file); struct inode *btrfs_alloc_inode(struct super_block *sb); void btrfs_destroy_inode(struct inode *inode); int btrfs_drop_inode(struct inode *inode); @@ -3149,23 +3098,4 @@ void btrfs_reada_detach(void *handle); int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, u64 start, int err); -/* delayed seq elem */ -struct seq_list { - struct list_head list; - u64 seq; - u32 flags; -}; - -void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, - struct seq_list *elem); -void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, - struct seq_list *elem); - -static inline int is_fstree(u64 rootid) -{ - if (rootid == BTRFS_FS_TREE_OBJECTID || - (s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID) - return 1; - return 0; -} #endif diff --git a/trunk/fs/btrfs/delayed-inode.c b/trunk/fs/btrfs/delayed-inode.c index c18d0442ae6d..03e3748d84d0 100644 --- a/trunk/fs/btrfs/delayed-inode.c +++ b/trunk/fs/btrfs/delayed-inode.c @@ -669,8 +669,8 @@ static int btrfs_delayed_inode_reserve_metadata( return ret; } else if (src_rsv == &root->fs_info->delalloc_block_rsv) { spin_lock(&BTRFS_I(inode)->lock); - if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, - &BTRFS_I(inode)->runtime_flags)) { + if (BTRFS_I(inode)->delalloc_meta_reserved) { + BTRFS_I(inode)->delalloc_meta_reserved = 0; spin_unlock(&BTRFS_I(inode)->lock); release = true; goto migrate; @@ -1706,7 +1706,7 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans, btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode)); btrfs_set_stack_inode_generation(inode_item, BTRFS_I(inode)->generation); - btrfs_set_stack_inode_sequence(inode_item, inode->i_version); + btrfs_set_stack_inode_sequence(inode_item, BTRFS_I(inode)->sequence); btrfs_set_stack_inode_transid(inode_item, trans->transid); btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev); btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); @@ -1754,7 +1754,7 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev) set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); - inode->i_version = btrfs_stack_inode_sequence(inode_item); + BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item); inode->i_rdev = 0; *rdev = btrfs_stack_inode_rdev(inode_item); BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item); diff --git a/trunk/fs/btrfs/delayed-ref.c b/trunk/fs/btrfs/delayed-ref.c index 13ae7b04790e..69f22e3ab3bc 100644 --- a/trunk/fs/btrfs/delayed-ref.c +++ b/trunk/fs/btrfs/delayed-ref.c @@ -525,7 +525,7 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info, ref->is_head = 0; ref->in_tree = 1; - if (is_fstree(ref_root)) + if (need_ref_seq(for_cow, ref_root)) seq = inc_delayed_seq(delayed_refs); ref->seq = seq; @@ -584,7 +584,7 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info, ref->is_head = 0; ref->in_tree = 1; - if (is_fstree(ref_root)) + if (need_ref_seq(for_cow, ref_root)) seq = inc_delayed_seq(delayed_refs); ref->seq = seq; @@ -658,11 +658,10 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, num_bytes, parent, ref_root, level, action, for_cow); - if (!is_fstree(ref_root) && + if (!need_ref_seq(for_cow, ref_root) && waitqueue_active(&delayed_refs->seq_wait)) wake_up(&delayed_refs->seq_wait); spin_unlock(&delayed_refs->lock); - return 0; } @@ -707,11 +706,10 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, num_bytes, parent, ref_root, owner, offset, action, for_cow); - if (!is_fstree(ref_root) && + if (!need_ref_seq(for_cow, ref_root) && waitqueue_active(&delayed_refs->seq_wait)) wake_up(&delayed_refs->seq_wait); spin_unlock(&delayed_refs->lock); - return 0; } diff --git a/trunk/fs/btrfs/delayed-ref.h b/trunk/fs/btrfs/delayed-ref.h index 413927fb9957..d8f244d94925 100644 --- a/trunk/fs/btrfs/delayed-ref.h +++ b/trunk/fs/btrfs/delayed-ref.h @@ -195,6 +195,11 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, struct list_head *cluster, u64 search_start); +struct seq_list { + struct list_head list; + u64 seq; +}; + static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs) { assert_spin_locked(&delayed_refs->lock); @@ -224,6 +229,25 @@ btrfs_put_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, u64 seq); +/* + * delayed refs with a ref_seq > 0 must be held back during backref walking. + * this only applies to items in one of the fs-trees. for_cow items never need + * to be held back, so they won't get a ref_seq number. + */ +static inline int need_ref_seq(int for_cow, u64 rootid) +{ + if (for_cow) + return 0; + + if (rootid == BTRFS_FS_TREE_OBJECTID) + return 1; + + if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID) + return 1; + + return 0; +} + /* * a node might live in a head or a regular ref, this lets you * test for the proper type to use. diff --git a/trunk/fs/btrfs/disk-io.c b/trunk/fs/btrfs/disk-io.c index 7ae51decf6d3..e1fe74a2ce16 100644 --- a/trunk/fs/btrfs/disk-io.c +++ b/trunk/fs/btrfs/disk-io.c @@ -1153,6 +1153,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->orphan_block_rsv = NULL; INIT_LIST_HEAD(&root->dirty_list); + INIT_LIST_HEAD(&root->orphan_list); INIT_LIST_HEAD(&root->root_list); spin_lock_init(&root->orphan_lock); spin_lock_init(&root->inode_lock); @@ -1165,7 +1166,6 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, atomic_set(&root->log_commit[0], 0); atomic_set(&root->log_commit[1], 0); atomic_set(&root->log_writers, 0); - atomic_set(&root->orphan_inodes, 0); root->log_batch = 0; root->log_transid = 0; root->last_log_commit = 0; @@ -1252,7 +1252,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, BTRFS_TREE_LOG_OBJECTID, NULL, - 0, 0, 0); + 0, 0, 0, 0); if (IS_ERR(leaf)) { kfree(root); return ERR_CAST(leaf); @@ -1914,14 +1914,11 @@ int open_ctree(struct super_block *sb, spin_lock_init(&fs_info->delayed_iput_lock); spin_lock_init(&fs_info->defrag_inodes_lock); spin_lock_init(&fs_info->free_chunk_lock); - spin_lock_init(&fs_info->tree_mod_seq_lock); - rwlock_init(&fs_info->tree_mod_log_lock); mutex_init(&fs_info->reloc_mutex); init_completion(&fs_info->kobj_unregister); INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); INIT_LIST_HEAD(&fs_info->space_info); - INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); btrfs_mapping_init(&fs_info->mapping_tree); btrfs_init_block_rsv(&fs_info->global_block_rsv); btrfs_init_block_rsv(&fs_info->delalloc_block_rsv); @@ -1934,14 +1931,12 @@ int open_ctree(struct super_block *sb, atomic_set(&fs_info->async_submit_draining, 0); atomic_set(&fs_info->nr_async_bios, 0); atomic_set(&fs_info->defrag_running, 0); - atomic_set(&fs_info->tree_mod_seq, 0); fs_info->sb = sb; fs_info->max_inline = 8192 * 1024; fs_info->metadata_ratio = 0; fs_info->defrag_inodes = RB_ROOT; fs_info->trans_no_join = 0; fs_info->free_chunk_space = 0; - fs_info->tree_mod_log = RB_ROOT; /* readahead state */ INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); @@ -2006,8 +2001,7 @@ int open_ctree(struct super_block *sb, BTRFS_I(fs_info->btree_inode)->root = tree_root; memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); - set_bit(BTRFS_INODE_DUMMY, - &BTRFS_I(fs_info->btree_inode)->runtime_flags); + BTRFS_I(fs_info->btree_inode)->dummy_inode = 1; insert_inode_hash(fs_info->btree_inode); spin_lock_init(&fs_info->block_group_cache_lock); @@ -2359,13 +2353,6 @@ int open_ctree(struct super_block *sb, fs_info->generation = generation; fs_info->last_trans_committed = generation; - ret = btrfs_init_dev_stats(fs_info); - if (ret) { - printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n", - ret); - goto fail_block_groups; - } - ret = btrfs_init_space_info(fs_info); if (ret) { printk(KERN_ERR "Failed to initial space info: %d\n", ret); @@ -2569,19 +2556,18 @@ int open_ctree(struct super_block *sb, static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) { + char b[BDEVNAME_SIZE]; + if (uptodate) { set_buffer_uptodate(bh); } else { - struct btrfs_device *device = (struct btrfs_device *) - bh->b_private; - printk_ratelimited(KERN_WARNING "lost page write due to " - "I/O error on %s\n", device->name); + "I/O error on %s\n", + bdevname(bh->b_bdev, b)); /* note, we dont' set_buffer_write_io_error because we have * our own ways of dealing with the IO errors */ clear_buffer_uptodate(bh); - btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_WRITE_ERRS); } unlock_buffer(bh); put_bh(bh); @@ -2696,7 +2682,6 @@ static int write_dev_supers(struct btrfs_device *device, set_buffer_uptodate(bh); lock_buffer(bh); bh->b_end_io = btrfs_end_buffer_write_sync; - bh->b_private = device; } /* @@ -2755,9 +2740,6 @@ static int write_dev_flush(struct btrfs_device *device, int wait) } if (!bio_flagged(bio, BIO_UPTODATE)) { ret = -EIO; - if (!bio_flagged(bio, BIO_EOPNOTSUPP)) - btrfs_dev_stat_inc_and_print(device, - BTRFS_DEV_STAT_FLUSH_ERRS); } /* drop the reference from the wait == 0 run */ @@ -2920,6 +2902,19 @@ int write_ctree_super(struct btrfs_trans_handle *trans, return ret; } +/* Kill all outstanding I/O */ +void btrfs_abort_devices(struct btrfs_root *root) +{ + struct list_head *head; + struct btrfs_device *dev; + mutex_lock(&root->fs_info->fs_devices->device_list_mutex); + head = &root->fs_info->fs_devices->devices; + list_for_each_entry_rcu(dev, head, dev_list) { + blk_abort_queue(dev->bdev->bd_disk->queue); + } + mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); +} + void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) { spin_lock(&fs_info->fs_roots_radix_lock); @@ -3676,6 +3671,17 @@ int btrfs_cleanup_transaction(struct btrfs_root *root) return 0; } +static int btree_writepage_io_failed_hook(struct bio *bio, struct page *page, + u64 start, u64 end, + struct extent_state *state) +{ + struct super_block *sb = page->mapping->host->i_sb; + struct btrfs_fs_info *fs_info = btrfs_sb(sb); + btrfs_error(fs_info, -EIO, + "Error occured while writing out btree at %llu", start); + return -EIO; +} + static struct extent_io_ops btree_extent_io_ops = { .write_cache_pages_lock_hook = btree_lock_page_hook, .readpage_end_io_hook = btree_readpage_end_io_hook, @@ -3683,4 +3689,5 @@ static struct extent_io_ops btree_extent_io_ops = { .submit_bio_hook = btree_submit_bio_hook, /* note we're sharing with inode.c for the merge bio hook */ .merge_bio_hook = btrfs_merge_bio_hook, + .writepage_io_failed_hook = btree_writepage_io_failed_hook, }; diff --git a/trunk/fs/btrfs/disk-io.h b/trunk/fs/btrfs/disk-io.h index 05b3fab39f7e..ab1830aaf0ed 100644 --- a/trunk/fs/btrfs/disk-io.h +++ b/trunk/fs/btrfs/disk-io.h @@ -89,6 +89,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, int btrfs_cleanup_transaction(struct btrfs_root *root); void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans, struct btrfs_root *root); +void btrfs_abort_devices(struct btrfs_root *root); #ifdef CONFIG_DEBUG_LOCK_ALLOC void btrfs_init_lockdep(void); diff --git a/trunk/fs/btrfs/export.c b/trunk/fs/btrfs/export.c index 614f34a899c2..e887ee62b6d4 100644 --- a/trunk/fs/btrfs/export.c +++ b/trunk/fs/btrfs/export.c @@ -13,14 +13,15 @@ parent_root_objectid) / 4) #define BTRFS_FID_SIZE_CONNECTABLE_ROOT (sizeof(struct btrfs_fid) / 4) -static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len, - struct inode *parent) +static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, + int connectable) { struct btrfs_fid *fid = (struct btrfs_fid *)fh; + struct inode *inode = dentry->d_inode; int len = *max_len; int type; - if (parent && (len < BTRFS_FID_SIZE_CONNECTABLE)) { + if (connectable && (len < BTRFS_FID_SIZE_CONNECTABLE)) { *max_len = BTRFS_FID_SIZE_CONNECTABLE; return 255; } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) { @@ -35,13 +36,19 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len, fid->root_objectid = BTRFS_I(inode)->root->objectid; fid->gen = inode->i_generation; - if (parent) { + if (connectable && !S_ISDIR(inode->i_mode)) { + struct inode *parent; u64 parent_root_id; + spin_lock(&dentry->d_lock); + + parent = dentry->d_parent->d_inode; fid->parent_objectid = BTRFS_I(parent)->location.objectid; fid->parent_gen = parent->i_generation; parent_root_id = BTRFS_I(parent)->root->objectid; + spin_unlock(&dentry->d_lock); + if (parent_root_id != fid->root_objectid) { fid->parent_root_objectid = parent_root_id; len = BTRFS_FID_SIZE_CONNECTABLE_ROOT; diff --git a/trunk/fs/btrfs/extent-tree.c b/trunk/fs/btrfs/extent-tree.c index 4b5a1e1bdefb..49fd7b66d57b 100644 --- a/trunk/fs/btrfs/extent-tree.c +++ b/trunk/fs/btrfs/extent-tree.c @@ -3578,7 +3578,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, space_info->chunk_alloc = 0; spin_unlock(&space_info->lock); out: - mutex_unlock(&fs_info->chunk_mutex); + mutex_unlock(&extent_root->fs_info->chunk_mutex); return ret; } @@ -4355,9 +4355,10 @@ static unsigned drop_outstanding_extent(struct inode *inode) BTRFS_I(inode)->outstanding_extents--; if (BTRFS_I(inode)->outstanding_extents == 0 && - test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, - &BTRFS_I(inode)->runtime_flags)) + BTRFS_I(inode)->delalloc_meta_reserved) { drop_inode_space = 1; + BTRFS_I(inode)->delalloc_meta_reserved = 0; + } /* * If we have more or the same amount of outsanding extents than we have @@ -4464,8 +4465,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) * Add an item to reserve for updating the inode when we complete the * delalloc io. */ - if (!test_bit(BTRFS_INODE_DELALLOC_META_RESERVED, - &BTRFS_I(inode)->runtime_flags)) { + if (!BTRFS_I(inode)->delalloc_meta_reserved) { nr_extents++; extra_reserve = 1; } @@ -4511,8 +4511,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) spin_lock(&BTRFS_I(inode)->lock); if (extra_reserve) { - set_bit(BTRFS_INODE_DELALLOC_META_RESERVED, - &BTRFS_I(inode)->runtime_flags); + BTRFS_I(inode)->delalloc_meta_reserved = 1; nr_extents--; } BTRFS_I(inode)->reserved_extents += nr_extents; @@ -5218,7 +5217,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, void btrfs_free_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, - u64 parent, int last_ref) + u64 parent, int last_ref, int for_cow) { struct btrfs_block_group_cache *cache = NULL; int ret; @@ -5228,7 +5227,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, buf->start, buf->len, parent, root->root_key.objectid, btrfs_header_level(buf), - BTRFS_DROP_DELAYED_REF, NULL, 0); + BTRFS_DROP_DELAYED_REF, NULL, for_cow); BUG_ON(ret); /* -ENOMEM */ } @@ -6250,7 +6249,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u32 blocksize, u64 parent, u64 root_objectid, struct btrfs_disk_key *key, int level, - u64 hint, u64 empty_size) + u64 hint, u64 empty_size, int for_cow) { struct btrfs_key ins; struct btrfs_block_rsv *block_rsv; @@ -6298,7 +6297,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, ins.objectid, ins.offset, parent, root_objectid, level, BTRFS_ADD_DELAYED_EXTENT, - extent_op, 0); + extent_op, for_cow); BUG_ON(ret); /* -ENOMEM */ } return buf; @@ -6716,7 +6715,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, btrfs_header_owner(path->nodes[level + 1])); } - btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); + btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1, 0); out: wc->refs[level] = 0; wc->flags[level] = 0; diff --git a/trunk/fs/btrfs/extent_io.c b/trunk/fs/btrfs/extent_io.c index 2c8f7b204617..c9018a05036e 100644 --- a/trunk/fs/btrfs/extent_io.c +++ b/trunk/fs/btrfs/extent_io.c @@ -186,6 +186,7 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset, return parent; } + entry = rb_entry(node, struct tree_entry, rb_node); rb_link_node(node, parent, p); rb_insert_color(node, root); return NULL; @@ -412,7 +413,7 @@ static struct extent_state *next_state(struct extent_state *state) /* * utility function to clear some bits in an extent state struct. - * it will optionally wake up any one waiting on this state (wake == 1). + * it will optionally wake up any one waiting on this state (wake == 1) * * If no bits are set on the state struct after clearing things, the * struct is freed and removed from the tree @@ -569,8 +570,10 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, if (err) goto out; if (state->end <= end) { - state = clear_state_bit(tree, state, &bits, wake); - goto next; + clear_state_bit(tree, state, &bits, wake); + if (last_end == (u64)-1) + goto out; + start = last_end + 1; } goto search_again; } @@ -778,6 +781,7 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, * Just lock what we found and keep going */ if (state->start == start && state->end <= end) { + struct rb_node *next_node; if (state->state & exclusive_bits) { *failed_start = state->start; err = -EEXIST; @@ -785,15 +789,20 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, } set_state_bits(tree, state, &bits); + cache_state(state, cached_state); merge_state(tree, state); if (last_end == (u64)-1) goto out; + start = last_end + 1; - state = next_state(state); - if (start < end && state && state->start == start && - !need_resched()) - goto hit_next; + next_node = rb_next(&state->rb_node); + if (next_node && start < end && prealloc && !need_resched()) { + state = rb_entry(next_node, struct extent_state, + rb_node); + if (state->start == start) + goto hit_next; + } goto search_again; } @@ -836,10 +845,6 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, if (last_end == (u64)-1) goto out; start = last_end + 1; - state = next_state(state); - if (start < end && state && state->start == start && - !need_resched()) - goto hit_next; } goto search_again; } @@ -989,14 +994,21 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, * Just lock what we found and keep going */ if (state->start == start && state->end <= end) { + struct rb_node *next_node; + set_state_bits(tree, state, &bits); - state = clear_state_bit(tree, state, &clear_bits, 0); + clear_state_bit(tree, state, &clear_bits, 0); if (last_end == (u64)-1) goto out; + start = last_end + 1; - if (start < end && state && state->start == start && - !need_resched()) - goto hit_next; + next_node = rb_next(&state->rb_node); + if (next_node && start < end && prealloc && !need_resched()) { + state = rb_entry(next_node, struct extent_state, + rb_node); + if (state->start == start) + goto hit_next; + } goto search_again; } @@ -1030,13 +1042,10 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, goto out; if (state->end <= end) { set_state_bits(tree, state, &bits); - state = clear_state_bit(tree, state, &clear_bits, 0); + clear_state_bit(tree, state, &clear_bits, 0); if (last_end == (u64)-1) goto out; start = last_end + 1; - if (start < end && state && state->start == start && - !need_resched()) - goto hit_next; } goto search_again; } @@ -1164,8 +1173,9 @@ int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, cached_state, mask); } -int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, - struct extent_state **cached_state, gfp_t mask) +static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, + u64 end, struct extent_state **cached_state, + gfp_t mask) { return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, cached_state, mask); @@ -1283,7 +1293,7 @@ struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, * returned if we find something, and *start_ret and *end_ret are * set to reflect the state struct that was found. * - * If nothing was found, 1 is returned. If found something, return 0. + * If nothing was found, 1 is returned, < 0 on error */ int find_first_extent_bit(struct extent_io_tree *tree, u64 start, u64 *start_ret, u64 *end_ret, int bits) @@ -1913,7 +1923,6 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { /* try to remap that extent elsewhere? */ bio_put(bio); - btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); return -EIO; } @@ -2213,7 +2222,17 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end) uptodate = 0; } + if (!uptodate && tree->ops && + tree->ops->writepage_io_failed_hook) { + ret = tree->ops->writepage_io_failed_hook(NULL, page, + start, end, NULL); + /* Writeback already completed */ + if (ret == 0) + return 1; + } + if (!uptodate) { + clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS); ClearPageUptodate(page); SetPageError(page); } @@ -2328,23 +2347,10 @@ static void end_bio_extent_readpage(struct bio *bio, int err) if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { ret = tree->ops->readpage_end_io_hook(page, start, end, state, mirror); - if (ret) { - /* no IO indicated but software detected errors - * in the block, either checksum errors or - * issues with the contents */ - struct btrfs_root *root = - BTRFS_I(page->mapping->host)->root; - struct btrfs_device *device; - + if (ret) uptodate = 0; - device = btrfs_find_device_for_logical( - root, start, mirror); - if (device) - btrfs_dev_stat_inc_and_print(device, - BTRFS_DEV_STAT_CORRUPTION_ERRS); - } else { + else clean_io_failure(start, page); - } } if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { @@ -3158,7 +3164,7 @@ static int write_one_eb(struct extent_buffer *eb, u64 offset = eb->start; unsigned long i, num_pages; int rw = (epd->sync_io ? WRITE_SYNC : WRITE); - int ret = 0; + int ret; clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); num_pages = num_extent_pages(eb->start, eb->len); @@ -3924,7 +3930,6 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, eb->start = start; eb->len = len; eb->tree = tree; - eb->bflags = 0; rwlock_init(&eb->lock); atomic_set(&eb->write_locks, 0); atomic_set(&eb->read_locks, 0); @@ -3962,60 +3967,6 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, return eb; } -struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src) -{ - unsigned long i; - struct page *p; - struct extent_buffer *new; - unsigned long num_pages = num_extent_pages(src->start, src->len); - - new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_ATOMIC); - if (new == NULL) - return NULL; - - for (i = 0; i < num_pages; i++) { - p = alloc_page(GFP_ATOMIC); - BUG_ON(!p); - attach_extent_buffer_page(new, p); - WARN_ON(PageDirty(p)); - SetPageUptodate(p); - new->pages[i] = p; - } - - copy_extent_buffer(new, src, 0, 0, src->len); - set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags); - set_bit(EXTENT_BUFFER_DUMMY, &new->bflags); - - return new; -} - -struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len) -{ - struct extent_buffer *eb; - unsigned long num_pages = num_extent_pages(0, len); - unsigned long i; - - eb = __alloc_extent_buffer(NULL, start, len, GFP_ATOMIC); - if (!eb) - return NULL; - - for (i = 0; i < num_pages; i++) { - eb->pages[i] = alloc_page(GFP_ATOMIC); - if (!eb->pages[i]) - goto err; - } - set_extent_buffer_uptodate(eb); - btrfs_set_header_nritems(eb, 0); - set_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); - - return eb; -err: - for (i--; i > 0; i--) - __free_page(eb->pages[i]); - __free_extent_buffer(eb); - return NULL; -} - static int extent_buffer_under_io(struct extent_buffer *eb) { return (atomic_read(&eb->io_pages) || @@ -4030,21 +3981,18 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, unsigned long start_idx) { unsigned long index; - unsigned long num_pages; struct page *page; - int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); BUG_ON(extent_buffer_under_io(eb)); - num_pages = num_extent_pages(eb->start, eb->len); - index = start_idx + num_pages; + index = num_extent_pages(eb->start, eb->len); if (start_idx >= index) return; do { index--; page = extent_buffer_page(eb, index); - if (page && mapped) { + if (page) { spin_lock(&page->mapping->private_lock); /* * We do this since we'll remove the pages after we've @@ -4069,8 +4017,6 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, } spin_unlock(&page->mapping->private_lock); - } - if (page) { /* One for when we alloced the page */ page_cache_release(page); } @@ -4289,18 +4235,14 @@ static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask) { WARN_ON(atomic_read(&eb->refs) == 0); if (atomic_dec_and_test(&eb->refs)) { - if (test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags)) { - spin_unlock(&eb->refs_lock); - } else { - struct extent_io_tree *tree = eb->tree; + struct extent_io_tree *tree = eb->tree; - spin_unlock(&eb->refs_lock); + spin_unlock(&eb->refs_lock); - spin_lock(&tree->buffer_lock); - radix_tree_delete(&tree->buffer, - eb->start >> PAGE_CACHE_SHIFT); - spin_unlock(&tree->buffer_lock); - } + spin_lock(&tree->buffer_lock); + radix_tree_delete(&tree->buffer, + eb->start >> PAGE_CACHE_SHIFT); + spin_unlock(&tree->buffer_lock); /* Should be safe to release our pages at this point */ btrfs_release_extent_buffer_page(eb, 0); @@ -4317,10 +4259,6 @@ void free_extent_buffer(struct extent_buffer *eb) return; spin_lock(&eb->refs_lock); - if (atomic_read(&eb->refs) == 2 && - test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags)) - atomic_dec(&eb->refs); - if (atomic_read(&eb->refs) == 2 && test_bit(EXTENT_BUFFER_STALE, &eb->bflags) && !extent_buffer_under_io(eb) && diff --git a/trunk/fs/btrfs/extent_io.h b/trunk/fs/btrfs/extent_io.h index 25900af5b15d..b516c3b8dec6 100644 --- a/trunk/fs/btrfs/extent_io.h +++ b/trunk/fs/btrfs/extent_io.h @@ -39,7 +39,6 @@ #define EXTENT_BUFFER_STALE 6 #define EXTENT_BUFFER_WRITEBACK 7 #define EXTENT_BUFFER_IOERR 8 -#define EXTENT_BUFFER_DUMMY 9 /* these are flags for extent_clear_unlock_delalloc */ #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 @@ -76,6 +75,9 @@ struct extent_io_ops { unsigned long bio_flags); int (*readpage_io_hook)(struct page *page, u64 start, u64 end); int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); + int (*writepage_io_failed_hook)(struct bio *bio, struct page *page, + u64 start, u64 end, + struct extent_state *state); int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state, int mirror); int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, @@ -223,8 +225,6 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state **cached_state, gfp_t mask); int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state **cached_state, gfp_t mask); -int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, - struct extent_state **cached_state, gfp_t mask); int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, @@ -265,8 +265,6 @@ void set_page_extent_mapped(struct page *page); struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, u64 start, unsigned long len); -struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len); -struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src); struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, u64 start, unsigned long len); void free_extent_buffer(struct extent_buffer *eb); diff --git a/trunk/fs/btrfs/file.c b/trunk/fs/btrfs/file.c index 70dc8ca73e25..53bf2d764bbc 100644 --- a/trunk/fs/btrfs/file.c +++ b/trunk/fs/btrfs/file.c @@ -65,21 +65,6 @@ struct inode_defrag { int cycled; }; -static int __compare_inode_defrag(struct inode_defrag *defrag1, - struct inode_defrag *defrag2) -{ - if (defrag1->root > defrag2->root) - return 1; - else if (defrag1->root < defrag2->root) - return -1; - else if (defrag1->ino > defrag2->ino) - return 1; - else if (defrag1->ino < defrag2->ino) - return -1; - else - return 0; -} - /* pop a record for an inode into the defrag tree. The lock * must be held already * @@ -96,17 +81,15 @@ static void __btrfs_add_inode_defrag(struct inode *inode, struct inode_defrag *entry; struct rb_node **p; struct rb_node *parent = NULL; - int ret; p = &root->fs_info->defrag_inodes.rb_node; while (*p) { parent = *p; entry = rb_entry(parent, struct inode_defrag, rb_node); - ret = __compare_inode_defrag(defrag, entry); - if (ret < 0) + if (defrag->ino < entry->ino) p = &parent->rb_left; - else if (ret > 0) + else if (defrag->ino > entry->ino) p = &parent->rb_right; else { /* if we're reinserting an entry for @@ -120,7 +103,7 @@ static void __btrfs_add_inode_defrag(struct inode *inode, goto exists; } } - set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); + BTRFS_I(inode)->in_defrag = 1; rb_link_node(&defrag->rb_node, parent, p); rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); return; @@ -148,7 +131,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, if (btrfs_fs_closing(root->fs_info)) return 0; - if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) + if (BTRFS_I(inode)->in_defrag) return 0; if (trans) @@ -165,7 +148,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, defrag->root = root->root_key.objectid; spin_lock(&root->fs_info->defrag_inodes_lock); - if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) + if (!BTRFS_I(inode)->in_defrag) __btrfs_add_inode_defrag(inode, defrag); else kfree(defrag); @@ -176,35 +159,28 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, /* * must be called with the defrag_inodes lock held */ -struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, - u64 root, u64 ino, +struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, u64 ino, struct rb_node **next) { struct inode_defrag *entry = NULL; - struct inode_defrag tmp; struct rb_node *p; struct rb_node *parent = NULL; - int ret; - - tmp.ino = ino; - tmp.root = root; p = info->defrag_inodes.rb_node; while (p) { parent = p; entry = rb_entry(parent, struct inode_defrag, rb_node); - ret = __compare_inode_defrag(&tmp, entry); - if (ret < 0) + if (ino < entry->ino) p = parent->rb_left; - else if (ret > 0) + else if (ino > entry->ino) p = parent->rb_right; else return entry; } if (next) { - while (parent && __compare_inode_defrag(&tmp, entry) > 0) { + while (parent && ino > entry->ino) { parent = rb_next(parent); entry = rb_entry(parent, struct inode_defrag, rb_node); } @@ -226,7 +202,6 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) struct btrfs_key key; struct btrfs_ioctl_defrag_range_args range; u64 first_ino = 0; - u64 root_objectid = 0; int num_defrag; int defrag_batch = 1024; @@ -239,14 +214,11 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) n = NULL; /* find an inode to defrag */ - defrag = btrfs_find_defrag_inode(fs_info, root_objectid, - first_ino, &n); + defrag = btrfs_find_defrag_inode(fs_info, first_ino, &n); if (!defrag) { - if (n) { - defrag = rb_entry(n, struct inode_defrag, - rb_node); - } else if (root_objectid || first_ino) { - root_objectid = 0; + if (n) + defrag = rb_entry(n, struct inode_defrag, rb_node); + else if (first_ino) { first_ino = 0; continue; } else { @@ -256,7 +228,6 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) /* remove it from the rbtree */ first_ino = defrag->ino + 1; - root_objectid = defrag->root; rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); if (btrfs_fs_closing(fs_info)) @@ -281,7 +252,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) goto next; /* do a chunk of defrag */ - clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); + BTRFS_I(inode)->in_defrag = 0; range.start = defrag->last_offset; num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, defrag_batch); @@ -1433,11 +1404,12 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, goto out; } - err = file_update_time(file); + err = btrfs_update_time(file); if (err) { mutex_unlock(&inode->i_mutex); goto out; } + BTRFS_I(inode)->sequence++; start_pos = round_down(pos, root->sectorsize); if (start_pos > i_size_read(inode)) { @@ -1494,8 +1466,8 @@ int btrfs_release_file(struct inode *inode, struct file *filp) * flush down new bytes that may have been written if the * application were using truncate to replace a file in place. */ - if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, - &BTRFS_I(inode)->runtime_flags)) { + if (BTRFS_I(inode)->ordered_data_close) { + BTRFS_I(inode)->ordered_data_close = 0; btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode); if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) filemap_flush(inode->i_mapping); @@ -1526,15 +1498,14 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) trace_btrfs_sync_file(file, datasync); + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (ret) + return ret; mutex_lock(&inode->i_mutex); - /* - * we wait first, since the writeback may change the inode, also wait - * ordered range does a filemape_write_and_wait_range which is why we - * don't do it above like other file systems. - */ + /* we wait first, since the writeback may change the inode */ root->log_batch++; - btrfs_wait_ordered_range(inode, start, end); + btrfs_wait_ordered_range(inode, 0, (u64)-1); root->log_batch++; /* @@ -1552,8 +1523,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) * syncing */ smp_mb(); - if (btrfs_inode_in_log(inode, root->fs_info->generation) || - BTRFS_I(inode)->last_trans <= + if (BTRFS_I(inode)->last_trans <= root->fs_info->last_trans_committed) { BTRFS_I(inode)->last_trans = 0; mutex_unlock(&inode->i_mutex); diff --git a/trunk/fs/btrfs/free-space-cache.c b/trunk/fs/btrfs/free-space-cache.c index 81296c57405a..202008ec367d 100644 --- a/trunk/fs/btrfs/free-space-cache.c +++ b/trunk/fs/btrfs/free-space-cache.c @@ -33,8 +33,6 @@ static int link_free_space(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info); -static void unlink_free_space(struct btrfs_free_space_ctl *ctl, - struct btrfs_free_space *info); static struct inode *__lookup_free_space_inode(struct btrfs_root *root, struct btrfs_path *path, @@ -77,8 +75,7 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root, return ERR_PTR(-ENOENT); } - mapping_set_gfp_mask(inode->i_mapping, - mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); + inode->i_mapping->flags &= ~__GFP_FS; return inode; } @@ -368,7 +365,7 @@ static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode, static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation) { - __le64 *val; + u64 *val; io_ctl_map_page(io_ctl, 1); @@ -391,7 +388,7 @@ static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation) static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation) { - __le64 *gen; + u64 *gen; /* * Skip the crc area. If we don't check crcs then we just have a 64bit @@ -587,44 +584,6 @@ static int io_ctl_read_bitmap(struct io_ctl *io_ctl, return 0; } -/* - * Since we attach pinned extents after the fact we can have contiguous sections - * of free space that are split up in entries. This poses a problem with the - * tree logging stuff since it could have allocated across what appears to be 2 - * entries since we would have merged the entries when adding the pinned extents - * back to the free space cache. So run through the space cache that we just - * loaded and merge contiguous entries. This will make the log replay stuff not - * blow up and it will make for nicer allocator behavior. - */ -static void merge_space_tree(struct btrfs_free_space_ctl *ctl) -{ - struct btrfs_free_space *e, *prev = NULL; - struct rb_node *n; - -again: - spin_lock(&ctl->tree_lock); - for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { - e = rb_entry(n, struct btrfs_free_space, offset_index); - if (!prev) - goto next; - if (e->bitmap || prev->bitmap) - goto next; - if (prev->offset + prev->bytes == e->offset) { - unlink_free_space(ctl, prev); - unlink_free_space(ctl, e); - prev->bytes += e->bytes; - kmem_cache_free(btrfs_free_space_cachep, e); - link_free_space(ctl, prev); - prev = NULL; - spin_unlock(&ctl->tree_lock); - goto again; - } -next: - prev = e; - } - spin_unlock(&ctl->tree_lock); -} - int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, struct btrfs_free_space_ctl *ctl, struct btrfs_path *path, u64 offset) @@ -767,7 +726,6 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, } io_ctl_drop_pages(&io_ctl); - merge_space_tree(ctl); ret = 1; out: io_ctl_free(&io_ctl); @@ -1014,7 +972,9 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, goto out; - btrfs_wait_ordered_range(inode, 0, (u64)-1); + ret = filemap_write_and_wait(inode->i_mapping); + if (ret) + goto out; key.objectid = BTRFS_FREE_SPACE_OBJECTID; key.offset = offset; diff --git a/trunk/fs/btrfs/inode.c b/trunk/fs/btrfs/inode.c index f6ab6f5e635a..ceb7b9c9edcc 100644 --- a/trunk/fs/btrfs/inode.c +++ b/trunk/fs/btrfs/inode.c @@ -89,7 +89,7 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { static int btrfs_setsize(struct inode *inode, loff_t newsize); static int btrfs_truncate(struct inode *inode); -static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent); +static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end); static noinline int cow_file_range(struct inode *inode, struct page *locked_page, u64 start, u64 end, int *page_started, @@ -257,13 +257,10 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, ret = insert_inline_extent(trans, root, inode, start, inline_len, compressed_size, compress_type, compressed_pages); - if (ret && ret != -ENOSPC) { + if (ret) { btrfs_abort_transaction(trans, root, ret); return ret; - } else if (ret == -ENOSPC) { - return 1; } - btrfs_delalloc_release_metadata(inode, end + 1 - start); btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); return 0; @@ -1575,11 +1572,11 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, if (btrfs_is_free_space_inode(root, inode)) metadata = 2; - if (!(rw & REQ_WRITE)) { - ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata); - if (ret) - return ret; + ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata); + if (ret) + return ret; + if (!(rw & REQ_WRITE)) { if (bio_flags & EXTENT_BIO_COMPRESSED) { return btrfs_submit_compressed_read(inode, bio, mirror_num, bio_flags); @@ -1818,23 +1815,24 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, * an ordered extent if the range of bytes in the file it covers are * fully written. */ -static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) +static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) { - struct inode *inode = ordered_extent->inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans = NULL; + struct btrfs_ordered_extent *ordered_extent = NULL; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_state *cached_state = NULL; int compress_type = 0; int ret; bool nolock; - nolock = btrfs_is_free_space_inode(root, inode); + ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, + end - start + 1); + if (!ret) + return 0; + BUG_ON(!ordered_extent); /* Logic error */ - if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) { - ret = -EIO; - goto out; - } + nolock = btrfs_is_free_space_inode(root, inode); if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */ @@ -1891,10 +1889,12 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) ordered_extent->file_offset, ordered_extent->len); } - + unlock_extent_cached(io_tree, ordered_extent->file_offset, + ordered_extent->file_offset + + ordered_extent->len - 1, &cached_state, GFP_NOFS); if (ret < 0) { btrfs_abort_transaction(trans, root, ret); - goto out_unlock; + goto out; } add_pending_csums(trans, inode, ordered_extent->file_offset, @@ -1905,14 +1905,10 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) ret = btrfs_update_inode_fallback(trans, root, inode); if (ret) { /* -ENOMEM or corruption */ btrfs_abort_transaction(trans, root, ret); - goto out_unlock; + goto out; } } ret = 0; -out_unlock: - unlock_extent_cached(io_tree, ordered_extent->file_offset, - ordered_extent->file_offset + - ordered_extent->len - 1, &cached_state, GFP_NOFS); out: if (root != root->fs_info->tree_root) btrfs_delalloc_release_metadata(inode, ordered_extent->len); @@ -1923,57 +1919,26 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) btrfs_end_transaction(trans, root); } - if (ret) - clear_extent_uptodate(io_tree, ordered_extent->file_offset, - ordered_extent->file_offset + - ordered_extent->len - 1, NULL, GFP_NOFS); - - /* - * This needs to be dont to make sure anybody waiting knows we are done - * upating everything for this ordered extent. - */ - btrfs_remove_ordered_extent(inode, ordered_extent); - /* once for us */ btrfs_put_ordered_extent(ordered_extent); /* once for the tree */ btrfs_put_ordered_extent(ordered_extent); - return ret; -} - -static void finish_ordered_fn(struct btrfs_work *work) -{ - struct btrfs_ordered_extent *ordered_extent; - ordered_extent = container_of(work, struct btrfs_ordered_extent, work); - btrfs_finish_ordered_io(ordered_extent); + return 0; +out_unlock: + unlock_extent_cached(io_tree, ordered_extent->file_offset, + ordered_extent->file_offset + + ordered_extent->len - 1, &cached_state, GFP_NOFS); + goto out; } static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state, int uptodate) { - struct inode *inode = page->mapping->host; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_ordered_extent *ordered_extent = NULL; - struct btrfs_workers *workers; - trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); ClearPagePrivate2(page); - if (!btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, - end - start + 1, uptodate)) - return 0; - - ordered_extent->work.func = finish_ordered_fn; - ordered_extent->work.flags = 0; - - if (btrfs_is_free_space_inode(root, inode)) - workers = &root->fs_info->endio_freespace_worker; - else - workers = &root->fs_info->endio_write_workers; - btrfs_queue_worker(workers, &ordered_extent->work); - - return 0; + return btrfs_finish_ordered_io(page->mapping->host, start, end); } /* @@ -2107,12 +2072,12 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, struct btrfs_block_rsv *block_rsv; int ret; - if (atomic_read(&root->orphan_inodes) || + if (!list_empty(&root->orphan_list) || root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) return; spin_lock(&root->orphan_lock); - if (atomic_read(&root->orphan_inodes)) { + if (!list_empty(&root->orphan_list)) { spin_unlock(&root->orphan_lock); return; } @@ -2169,8 +2134,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) block_rsv = NULL; } - if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, - &BTRFS_I(inode)->runtime_flags)) { + if (list_empty(&BTRFS_I(inode)->i_orphan)) { + list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); #if 0 /* * For proper ENOSPC handling, we should do orphan @@ -2183,12 +2148,12 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) insert = 1; #endif insert = 1; - atomic_dec(&root->orphan_inodes); } - if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED, - &BTRFS_I(inode)->runtime_flags)) + if (!BTRFS_I(inode)->orphan_meta_reserved) { + BTRFS_I(inode)->orphan_meta_reserved = 1; reserve = 1; + } spin_unlock(&root->orphan_lock); /* grab metadata reservation from transaction handle */ @@ -2201,8 +2166,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) if (insert >= 1) { ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); if (ret && ret != -EEXIST) { - clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, - &BTRFS_I(inode)->runtime_flags); btrfs_abort_transaction(trans, root, ret); return ret; } @@ -2233,13 +2196,15 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) int ret = 0; spin_lock(&root->orphan_lock); - if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, - &BTRFS_I(inode)->runtime_flags)) + if (!list_empty(&BTRFS_I(inode)->i_orphan)) { + list_del_init(&BTRFS_I(inode)->i_orphan); delete_item = 1; + } - if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, - &BTRFS_I(inode)->runtime_flags)) + if (BTRFS_I(inode)->orphan_meta_reserved) { + BTRFS_I(inode)->orphan_meta_reserved = 0; release_rsv = 1; + } spin_unlock(&root->orphan_lock); if (trans && delete_item) { @@ -2247,10 +2212,8 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ } - if (release_rsv) { + if (release_rsv) btrfs_orphan_release_metadata(inode); - atomic_dec(&root->orphan_inodes); - } return 0; } @@ -2378,8 +2341,6 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) ret = PTR_ERR(trans); goto out; } - printk(KERN_ERR "auto deleting %Lu\n", - found_key.objectid); ret = btrfs_del_orphan_item(trans, root, found_key.objectid); BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ @@ -2391,8 +2352,9 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) * add this inode to the orphan list so btrfs_orphan_del does * the proper thing when we hit it */ - set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, - &BTRFS_I(inode)->runtime_flags); + spin_lock(&root->orphan_lock); + list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); + spin_unlock(&root->orphan_lock); /* if we have links, this was a truncate, lets do that */ if (inode->i_nlink) { @@ -2548,7 +2510,7 @@ static void btrfs_read_locked_inode(struct inode *inode) inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); - inode->i_version = btrfs_inode_sequence(leaf, inode_item); + BTRFS_I(inode)->sequence = btrfs_inode_sequence(leaf, inode_item); inode->i_generation = BTRFS_I(inode)->generation; inode->i_rdev = 0; rdev = btrfs_inode_rdev(leaf, inode_item); @@ -2632,7 +2594,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode)); btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation); - btrfs_set_inode_sequence(leaf, item, inode->i_version); + btrfs_set_inode_sequence(leaf, item, BTRFS_I(inode)->sequence); btrfs_set_inode_transid(leaf, item, trans->transid); btrfs_set_inode_rdev(leaf, item, inode->i_rdev); btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); @@ -2790,8 +2752,6 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, goto out; btrfs_i_size_write(dir, dir->i_size - name_len * 2); - inode_inc_iversion(inode); - inode_inc_iversion(dir); inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; btrfs_update_inode(trans, root, dir); out: @@ -3129,7 +3089,6 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, } btrfs_i_size_write(dir, dir->i_size - name_len * 2); - inode_inc_iversion(dir); dir->i_mtime = dir->i_ctime = CURRENT_TIME; ret = btrfs_update_inode(trans, root, dir); if (ret) @@ -3648,8 +3607,7 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize) * any new writes get down to disk quickly. */ if (newsize == 0) - set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, - &BTRFS_I(inode)->runtime_flags); + BTRFS_I(inode)->ordered_data_close = 1; /* we don't support swapfiles, so vmtruncate shouldn't fail */ truncate_setsize(inode, newsize); @@ -3680,7 +3638,6 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_valid) { setattr_copy(inode, attr); - inode_inc_iversion(inode); err = btrfs_dirty_inode(inode); if (!err && attr->ia_valid & ATTR_MODE) @@ -3714,8 +3671,7 @@ void btrfs_evict_inode(struct inode *inode) btrfs_wait_ordered_range(inode, 0, (u64)-1); if (root->fs_info->log_root_recovering) { - BUG_ON(!test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, - &BTRFS_I(inode)->runtime_flags)); + BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan)); goto no_delete; } @@ -4110,7 +4066,7 @@ static struct inode *new_simple_dir(struct super_block *s, BTRFS_I(inode)->root = root; memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); - set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags); + BTRFS_I(inode)->dummy_inode = 1; inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; inode->i_op = &btrfs_dir_ro_inode_operations; @@ -4414,7 +4370,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) int ret = 0; bool nolock = false; - if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags)) + if (BTRFS_I(inode)->dummy_inode) return 0; if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode)) @@ -4447,7 +4403,7 @@ int btrfs_dirty_inode(struct inode *inode) struct btrfs_trans_handle *trans; int ret; - if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags)) + if (BTRFS_I(inode)->dummy_inode) return 0; trans = btrfs_join_transaction(root); @@ -4475,18 +4431,46 @@ int btrfs_dirty_inode(struct inode *inode) * This is a copy of file_update_time. We need this so we can return error on * ENOSPC for updating the inode in the case of file write and mmap writes. */ -static int btrfs_update_time(struct inode *inode, struct timespec *now, - int flags) +int btrfs_update_time(struct file *file) { - if (flags & S_VERSION) + struct inode *inode = file->f_path.dentry->d_inode; + struct timespec now; + int ret; + enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0; + + /* First try to exhaust all avenues to not sync */ + if (IS_NOCMTIME(inode)) + return 0; + + now = current_fs_time(inode->i_sb); + if (!timespec_equal(&inode->i_mtime, &now)) + sync_it = S_MTIME; + + if (!timespec_equal(&inode->i_ctime, &now)) + sync_it |= S_CTIME; + + if (IS_I_VERSION(inode)) + sync_it |= S_VERSION; + + if (!sync_it) + return 0; + + /* Finally allowed to write? Takes lock. */ + if (mnt_want_write_file(file)) + return 0; + + /* Only change inode inside the lock region */ + if (sync_it & S_VERSION) inode_inc_iversion(inode); - if (flags & S_CTIME) - inode->i_ctime = *now; - if (flags & S_MTIME) - inode->i_mtime = *now; - if (flags & S_ATIME) - inode->i_atime = *now; - return btrfs_dirty_inode(inode); + if (sync_it & S_CTIME) + inode->i_ctime = now; + if (sync_it & S_MTIME) + inode->i_mtime = now; + ret = btrfs_dirty_inode(inode); + if (!ret) + mark_inode_dirty_sync(inode); + mnt_drop_write(file->f_path.mnt); + return ret; } /* @@ -4746,7 +4730,6 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, btrfs_i_size_write(parent_inode, parent_inode->i_size + name_len * 2); - inode_inc_iversion(parent_inode); parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; ret = btrfs_update_inode(trans, root, parent_inode); if (ret) @@ -4954,7 +4937,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, } btrfs_inc_nlink(inode); - inode_inc_iversion(inode); inode->i_ctime = CURRENT_TIME; ihold(inode); @@ -5921,7 +5903,9 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) struct btrfs_dio_private *dip = bio->bi_private; struct inode *inode = dip->inode; struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; struct btrfs_ordered_extent *ordered = NULL; + struct extent_state *cached_state = NULL; u64 ordered_offset = dip->logical_offset; u64 ordered_bytes = dip->bytes; int ret; @@ -5931,14 +5915,73 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) again: ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, &ordered_offset, - ordered_bytes, !err); + ordered_bytes); if (!ret) goto out_test; - ordered->work.func = finish_ordered_fn; - ordered->work.flags = 0; - btrfs_queue_worker(&root->fs_info->endio_write_workers, - &ordered->work); + BUG_ON(!ordered); + + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) { + err = -ENOMEM; + goto out; + } + trans->block_rsv = &root->fs_info->delalloc_block_rsv; + + if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { + ret = btrfs_ordered_update_i_size(inode, 0, ordered); + if (!ret) + err = btrfs_update_inode_fallback(trans, root, inode); + goto out; + } + + lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset, + ordered->file_offset + ordered->len - 1, 0, + &cached_state); + + if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) { + ret = btrfs_mark_extent_written(trans, inode, + ordered->file_offset, + ordered->file_offset + + ordered->len); + if (ret) { + err = ret; + goto out_unlock; + } + } else { + ret = insert_reserved_file_extent(trans, inode, + ordered->file_offset, + ordered->start, + ordered->disk_len, + ordered->len, + ordered->len, + 0, 0, 0, + BTRFS_FILE_EXTENT_REG); + unpin_extent_cache(&BTRFS_I(inode)->extent_tree, + ordered->file_offset, ordered->len); + if (ret) { + err = ret; + WARN_ON(1); + goto out_unlock; + } + } + + add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); + ret = btrfs_ordered_update_i_size(inode, 0, ordered); + if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) + btrfs_update_inode_fallback(trans, root, inode); + ret = 0; +out_unlock: + unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, + ordered->file_offset + ordered->len - 1, + &cached_state, GFP_NOFS); +out: + btrfs_delalloc_release_metadata(inode, ordered->len); + btrfs_end_transaction(trans, root); + ordered_offset = ordered->file_offset + ordered->len; + btrfs_put_ordered_extent(ordered); + btrfs_put_ordered_extent(ordered); + out_test: /* * our bio might span multiple ordered extents. If we haven't @@ -5947,12 +5990,12 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) if (ordered_offset < dip->logical_offset + dip->bytes) { ordered_bytes = dip->logical_offset + dip->bytes - ordered_offset; - ordered = NULL; goto again; } out_done: bio->bi_private = dip->private; + kfree(dip->csums); kfree(dip); /* If we had an error make sure to clear the uptodate flag */ @@ -6020,12 +6063,9 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, int ret; bio_get(bio); - - if (!write) { - ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); - if (ret) - goto err; - } + ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); + if (ret) + goto err; if (skip_sum) goto map; @@ -6445,13 +6485,13 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) static void btrfs_invalidatepage(struct page *page, unsigned long offset) { - struct inode *inode = page->mapping->host; struct extent_io_tree *tree; struct btrfs_ordered_extent *ordered; struct extent_state *cached_state = NULL; u64 page_start = page_offset(page); u64 page_end = page_start + PAGE_CACHE_SIZE - 1; + /* * we have the page locked, so new writeback can't start, * and the dirty bit won't be cleared while we are here. @@ -6461,13 +6501,13 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) */ wait_on_page_writeback(page); - tree = &BTRFS_I(inode)->io_tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; if (offset) { btrfs_releasepage(page, GFP_NOFS); return; } lock_extent_bits(tree, page_start, page_end, 0, &cached_state); - ordered = btrfs_lookup_ordered_extent(inode, + ordered = btrfs_lookup_ordered_extent(page->mapping->host, page_offset(page)); if (ordered) { /* @@ -6482,10 +6522,9 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) * whoever cleared the private bit is responsible * for the finish_ordered_io */ - if (TestClearPagePrivate2(page) && - btrfs_dec_test_ordered_pending(inode, &ordered, page_start, - PAGE_CACHE_SIZE, 1)) { - btrfs_finish_ordered_io(ordered); + if (TestClearPagePrivate2(page)) { + btrfs_finish_ordered_io(page->mapping->host, + page_start, page_end); } btrfs_put_ordered_extent(ordered); cached_state = NULL; @@ -6537,7 +6576,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); if (!ret) { - ret = file_update_time(vma->vm_file); + ret = btrfs_update_time(vma->vm_file); reserved = 1; } if (ret) { @@ -6732,8 +6771,7 @@ static int btrfs_truncate(struct inode *inode) * using truncate to replace the contents of the file will * end up with a zero length file after a crash. */ - if (inode->i_size == 0 && test_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, - &BTRFS_I(inode)->runtime_flags)) + if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close) btrfs_add_ordered_operation(trans, root, inode); while (1) { @@ -6856,6 +6894,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->root = NULL; ei->space_info = NULL; ei->generation = 0; + ei->sequence = 0; ei->last_trans = 0; ei->last_sub_trans = 0; ei->logged_trans = 0; @@ -6870,7 +6909,11 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->outstanding_extents = 0; ei->reserved_extents = 0; - ei->runtime_flags = 0; + ei->ordered_data_close = 0; + ei->orphan_meta_reserved = 0; + ei->dummy_inode = 0; + ei->in_defrag = 0; + ei->delalloc_meta_reserved = 0; ei->force_compress = BTRFS_COMPRESS_NONE; ei->delayed_node = NULL; @@ -6884,6 +6927,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) mutex_init(&ei->log_mutex); mutex_init(&ei->delalloc_mutex); btrfs_ordered_inode_tree_init(&ei->ordered_tree); + INIT_LIST_HEAD(&ei->i_orphan); INIT_LIST_HEAD(&ei->delalloc_inodes); INIT_LIST_HEAD(&ei->ordered_operations); RB_CLEAR_NODE(&ei->rb_node); @@ -6928,12 +6972,13 @@ void btrfs_destroy_inode(struct inode *inode) spin_unlock(&root->fs_info->ordered_extent_lock); } - if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, - &BTRFS_I(inode)->runtime_flags)) { + spin_lock(&root->orphan_lock); + if (!list_empty(&BTRFS_I(inode)->i_orphan)) { printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n", (unsigned long long)btrfs_ino(inode)); - atomic_dec(&root->orphan_inodes); + list_del_init(&BTRFS_I(inode)->i_orphan); } + spin_unlock(&root->orphan_lock); while (1) { ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); @@ -7148,9 +7193,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode)) btrfs_add_ordered_operation(trans, root, old_inode); - inode_inc_iversion(old_dir); - inode_inc_iversion(new_dir); - inode_inc_iversion(old_inode); old_dir->i_ctime = old_dir->i_mtime = ctime; new_dir->i_ctime = new_dir->i_mtime = ctime; old_inode->i_ctime = ctime; @@ -7177,7 +7219,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, } if (new_inode) { - inode_inc_iversion(new_inode); new_inode->i_ctime = CURRENT_TIME; if (unlikely(btrfs_ino(new_inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { @@ -7449,7 +7490,6 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, cur_offset += ins.offset; *alloc_hint = ins.objectid + ins.offset; - inode_inc_iversion(inode); inode->i_ctime = CURRENT_TIME; BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; if (!(mode & FALLOC_FL_KEEP_SIZE) && @@ -7607,7 +7647,6 @@ static const struct inode_operations btrfs_file_inode_operations = { .permission = btrfs_permission, .fiemap = btrfs_fiemap, .get_acl = btrfs_get_acl, - .update_time = btrfs_update_time, }; static const struct inode_operations btrfs_special_inode_operations = { .getattr = btrfs_getattr, @@ -7618,7 +7657,6 @@ static const struct inode_operations btrfs_special_inode_operations = { .listxattr = btrfs_listxattr, .removexattr = btrfs_removexattr, .get_acl = btrfs_get_acl, - .update_time = btrfs_update_time, }; static const struct inode_operations btrfs_symlink_inode_operations = { .readlink = generic_readlink, @@ -7632,7 +7670,6 @@ static const struct inode_operations btrfs_symlink_inode_operations = { .listxattr = btrfs_listxattr, .removexattr = btrfs_removexattr, .get_acl = btrfs_get_acl, - .update_time = btrfs_update_time, }; const struct dentry_operations btrfs_dentry_operations = { diff --git a/trunk/fs/btrfs/ioctl.c b/trunk/fs/btrfs/ioctl.c index 24b776c08d99..14f8e1faa46e 100644 --- a/trunk/fs/btrfs/ioctl.c +++ b/trunk/fs/btrfs/ioctl.c @@ -261,7 +261,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) } btrfs_update_iflags(inode); - inode_inc_iversion(inode); inode->i_ctime = CURRENT_TIME; ret = btrfs_update_inode(trans, root, inode); @@ -368,7 +367,7 @@ static noinline int create_subvol(struct btrfs_root *root, return PTR_ERR(trans); leaf = btrfs_alloc_free_block(trans, root, root->leafsize, - 0, objectid, NULL, 0, 0, 0); + 0, objectid, NULL, 0, 0, 0, 0); if (IS_ERR(leaf)) { ret = PTR_ERR(leaf); goto fail; @@ -2263,12 +2262,10 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) di_args->bytes_used = dev->bytes_used; di_args->total_bytes = dev->total_bytes; memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); - if (dev->name) { + if (dev->name) strncpy(di_args->path, dev->name, sizeof(di_args->path)); - di_args->path[sizeof(di_args->path) - 1] = 0; - } else { + else di_args->path[0] = '\0'; - } out: if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args))) @@ -2625,7 +2622,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, btrfs_mark_buffer_dirty(leaf); btrfs_release_path(path); - inode_inc_iversion(inode); inode->i_mtime = inode->i_ctime = CURRENT_TIME; /* @@ -2918,7 +2914,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) up_read(&info->groups_sem); } - user_dest = (struct btrfs_ioctl_space_info __user *) + user_dest = (struct btrfs_ioctl_space_info *) (arg + sizeof(struct btrfs_ioctl_space_args)); if (copy_to_user(user_dest, dest_orig, alloc_size)) @@ -3046,28 +3042,6 @@ static long btrfs_ioctl_scrub_progress(struct btrfs_root *root, return ret; } -static long btrfs_ioctl_get_dev_stats(struct btrfs_root *root, - void __user *arg, int reset_after_read) -{ - struct btrfs_ioctl_get_dev_stats *sa; - int ret; - - if (reset_after_read && !capable(CAP_SYS_ADMIN)) - return -EPERM; - - sa = memdup_user(arg, sizeof(*sa)); - if (IS_ERR(sa)) - return PTR_ERR(sa); - - ret = btrfs_get_dev_stats(root, sa, reset_after_read); - - if (copy_to_user(arg, sa, sizeof(*sa))) - ret = -EFAULT; - - kfree(sa); - return ret; -} - static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) { int ret = 0; @@ -3238,9 +3212,8 @@ void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, } } -static long btrfs_ioctl_balance(struct file *file, void __user *arg) +static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg) { - struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_ioctl_balance_args *bargs; struct btrfs_balance_control *bctl; @@ -3252,10 +3225,6 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) if (fs_info->sb->s_flags & MS_RDONLY) return -EROFS; - ret = mnt_want_write(file->f_path.mnt); - if (ret) - return ret; - mutex_lock(&fs_info->volume_mutex); mutex_lock(&fs_info->balance_mutex); @@ -3322,7 +3291,6 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) out: mutex_unlock(&fs_info->balance_mutex); mutex_unlock(&fs_info->volume_mutex); - mnt_drop_write(file->f_path.mnt); return ret; } @@ -3418,7 +3386,7 @@ long btrfs_ioctl(struct file *file, unsigned int case BTRFS_IOC_DEV_INFO: return btrfs_ioctl_dev_info(root, argp); case BTRFS_IOC_BALANCE: - return btrfs_ioctl_balance(file, NULL); + return btrfs_ioctl_balance(root, NULL); case BTRFS_IOC_CLONE: return btrfs_ioctl_clone(file, arg, 0, 0, 0); case BTRFS_IOC_CLONE_RANGE: @@ -3451,15 +3419,11 @@ long btrfs_ioctl(struct file *file, unsigned int case BTRFS_IOC_SCRUB_PROGRESS: return btrfs_ioctl_scrub_progress(root, argp); case BTRFS_IOC_BALANCE_V2: - return btrfs_ioctl_balance(file, argp); + return btrfs_ioctl_balance(root, argp); case BTRFS_IOC_BALANCE_CTL: return btrfs_ioctl_balance_ctl(root, arg); case BTRFS_IOC_BALANCE_PROGRESS: return btrfs_ioctl_balance_progress(root, argp); - case BTRFS_IOC_GET_DEV_STATS: - return btrfs_ioctl_get_dev_stats(root, argp, 0); - case BTRFS_IOC_GET_AND_RESET_DEV_STATS: - return btrfs_ioctl_get_dev_stats(root, argp, 1); } return -ENOTTY; diff --git a/trunk/fs/btrfs/ioctl.h b/trunk/fs/btrfs/ioctl.h index 497c530724cf..086e6bdae1c4 100644 --- a/trunk/fs/btrfs/ioctl.h +++ b/trunk/fs/btrfs/ioctl.h @@ -266,35 +266,6 @@ struct btrfs_ioctl_logical_ino_args { __u64 inodes; }; -enum btrfs_dev_stat_values { - /* disk I/O failure stats */ - BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */ - BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */ - BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */ - - /* stats for indirect indications for I/O failures */ - BTRFS_DEV_STAT_CORRUPTION_ERRS, /* checksum error, bytenr error or - * contents is illegal: this is an - * indication that the block was damaged - * during read or write, or written to - * wrong location or read from wrong - * location */ - BTRFS_DEV_STAT_GENERATION_ERRS, /* an indication that blocks have not - * been written */ - - BTRFS_DEV_STAT_VALUES_MAX -}; - -struct btrfs_ioctl_get_dev_stats { - __u64 devid; /* in */ - __u64 nr_items; /* in/out */ - - /* out values: */ - __u64 values[BTRFS_DEV_STAT_VALUES_MAX]; - - __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */ -}; - #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ struct btrfs_ioctl_vol_args) #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ @@ -359,9 +330,5 @@ struct btrfs_ioctl_get_dev_stats { struct btrfs_ioctl_ino_path_args) #define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ struct btrfs_ioctl_ino_path_args) -#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ - struct btrfs_ioctl_get_dev_stats) -#define BTRFS_IOC_GET_AND_RESET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 53, \ - struct btrfs_ioctl_get_dev_stats) #endif diff --git a/trunk/fs/btrfs/ordered-data.c b/trunk/fs/btrfs/ordered-data.c index 9e138cdc36c5..bbf6d0d9aebe 100644 --- a/trunk/fs/btrfs/ordered-data.c +++ b/trunk/fs/btrfs/ordered-data.c @@ -196,7 +196,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, entry->len = len; entry->disk_len = disk_len; entry->bytes_left = len; - entry->inode = igrab(inode); + entry->inode = inode; entry->compress_type = compress_type; if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) set_bit(type, &entry->flags); @@ -212,12 +212,12 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, trace_btrfs_ordered_extent_add(inode, entry); - spin_lock_irq(&tree->lock); + spin_lock(&tree->lock); node = tree_insert(&tree->tree, file_offset, &entry->rb_node); if (node) ordered_data_tree_panic(inode, -EEXIST, file_offset); - spin_unlock_irq(&tree->lock); + spin_unlock(&tree->lock); spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); list_add_tail(&entry->root_extent_list, @@ -264,9 +264,9 @@ void btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_inode_tree *tree; tree = &BTRFS_I(inode)->ordered_tree; - spin_lock_irq(&tree->lock); + spin_lock(&tree->lock); list_add_tail(&sum->list, &entry->list); - spin_unlock_irq(&tree->lock); + spin_unlock(&tree->lock); } /* @@ -283,19 +283,18 @@ void btrfs_add_ordered_sum(struct inode *inode, */ int btrfs_dec_test_first_ordered_pending(struct inode *inode, struct btrfs_ordered_extent **cached, - u64 *file_offset, u64 io_size, int uptodate) + u64 *file_offset, u64 io_size) { struct btrfs_ordered_inode_tree *tree; struct rb_node *node; struct btrfs_ordered_extent *entry = NULL; int ret; - unsigned long flags; u64 dec_end; u64 dec_start; u64 to_dec; tree = &BTRFS_I(inode)->ordered_tree; - spin_lock_irqsave(&tree->lock, flags); + spin_lock(&tree->lock); node = tree_search(tree, *file_offset); if (!node) { ret = 1; @@ -324,9 +323,6 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode, (unsigned long long)to_dec); } entry->bytes_left -= to_dec; - if (!uptodate) - set_bit(BTRFS_ORDERED_IOERR, &entry->flags); - if (entry->bytes_left == 0) ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); else @@ -336,7 +332,7 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode, *cached = entry; atomic_inc(&entry->refs); } - spin_unlock_irqrestore(&tree->lock, flags); + spin_unlock(&tree->lock); return ret == 0; } @@ -351,21 +347,15 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode, */ int btrfs_dec_test_ordered_pending(struct inode *inode, struct btrfs_ordered_extent **cached, - u64 file_offset, u64 io_size, int uptodate) + u64 file_offset, u64 io_size) { struct btrfs_ordered_inode_tree *tree; struct rb_node *node; struct btrfs_ordered_extent *entry = NULL; - unsigned long flags; int ret; tree = &BTRFS_I(inode)->ordered_tree; - spin_lock_irqsave(&tree->lock, flags); - if (cached && *cached) { - entry = *cached; - goto have_entry; - } - + spin_lock(&tree->lock); node = tree_search(tree, file_offset); if (!node) { ret = 1; @@ -373,7 +363,6 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, } entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); -have_entry: if (!offset_in_entry(entry, file_offset)) { ret = 1; goto out; @@ -385,9 +374,6 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, (unsigned long long)io_size); } entry->bytes_left -= io_size; - if (!uptodate) - set_bit(BTRFS_ORDERED_IOERR, &entry->flags); - if (entry->bytes_left == 0) ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); else @@ -397,7 +383,7 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, *cached = entry; atomic_inc(&entry->refs); } - spin_unlock_irqrestore(&tree->lock, flags); + spin_unlock(&tree->lock); return ret == 0; } @@ -413,8 +399,6 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) trace_btrfs_ordered_extent_put(entry->inode, entry); if (atomic_dec_and_test(&entry->refs)) { - if (entry->inode) - btrfs_add_delayed_iput(entry->inode); while (!list_empty(&entry->list)) { cur = entry->list.next; sum = list_entry(cur, struct btrfs_ordered_sum, list); @@ -427,22 +411,21 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) /* * remove an ordered extent from the tree. No references are dropped - * and waiters are woken up. + * and you must wake_up entry->wait. You must hold the tree lock + * while you call this function. */ -void btrfs_remove_ordered_extent(struct inode *inode, - struct btrfs_ordered_extent *entry) +static void __btrfs_remove_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry) { struct btrfs_ordered_inode_tree *tree; struct btrfs_root *root = BTRFS_I(inode)->root; struct rb_node *node; tree = &BTRFS_I(inode)->ordered_tree; - spin_lock_irq(&tree->lock); node = &entry->rb_node; rb_erase(node, &tree->tree); tree->last = NULL; set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); - spin_unlock_irq(&tree->lock); spin_lock(&root->fs_info->ordered_extent_lock); list_del_init(&entry->root_extent_list); @@ -459,6 +442,21 @@ void btrfs_remove_ordered_extent(struct inode *inode, list_del_init(&BTRFS_I(inode)->ordered_operations); } spin_unlock(&root->fs_info->ordered_extent_lock); +} + +/* + * remove an ordered extent from the tree. No references are dropped + * but any waiters are woken. + */ +void btrfs_remove_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry) +{ + struct btrfs_ordered_inode_tree *tree; + + tree = &BTRFS_I(inode)->ordered_tree; + spin_lock(&tree->lock); + __btrfs_remove_ordered_extent(inode, entry); + spin_unlock(&tree->lock); wake_up(&entry->wait); } @@ -623,11 +621,19 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) if (orig_end > INT_LIMIT(loff_t)) orig_end = INT_LIMIT(loff_t); } - +again: /* start IO across the range first to instantiate any delalloc * extents */ - filemap_write_and_wait_range(inode->i_mapping, start, orig_end); + filemap_fdatawrite_range(inode->i_mapping, start, orig_end); + + /* The compression code will leave pages locked but return from + * writepage without setting the page writeback. Starting again + * with WB_SYNC_ALL will end up waiting for the IO to actually start. + */ + filemap_fdatawrite_range(inode->i_mapping, start, orig_end); + + filemap_fdatawait_range(inode->i_mapping, start, orig_end); end = orig_end; found = 0; @@ -651,6 +657,11 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) break; end--; } + if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, + EXTENT_DELALLOC, 0, NULL)) { + schedule_timeout(1); + goto again; + } } /* @@ -665,7 +676,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, struct btrfs_ordered_extent *entry = NULL; tree = &BTRFS_I(inode)->ordered_tree; - spin_lock_irq(&tree->lock); + spin_lock(&tree->lock); node = tree_search(tree, file_offset); if (!node) goto out; @@ -676,7 +687,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, if (entry) atomic_inc(&entry->refs); out: - spin_unlock_irq(&tree->lock); + spin_unlock(&tree->lock); return entry; } @@ -692,7 +703,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, struct btrfs_ordered_extent *entry = NULL; tree = &BTRFS_I(inode)->ordered_tree; - spin_lock_irq(&tree->lock); + spin_lock(&tree->lock); node = tree_search(tree, file_offset); if (!node) { node = tree_search(tree, file_offset + len); @@ -717,7 +728,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, out: if (entry) atomic_inc(&entry->refs); - spin_unlock_irq(&tree->lock); + spin_unlock(&tree->lock); return entry; } @@ -733,7 +744,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset) struct btrfs_ordered_extent *entry = NULL; tree = &BTRFS_I(inode)->ordered_tree; - spin_lock_irq(&tree->lock); + spin_lock(&tree->lock); node = tree_search(tree, file_offset); if (!node) goto out; @@ -741,7 +752,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset) entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); atomic_inc(&entry->refs); out: - spin_unlock_irq(&tree->lock); + spin_unlock(&tree->lock); return entry; } @@ -753,6 +764,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, struct btrfs_ordered_extent *ordered) { struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; u64 disk_i_size; u64 new_i_size; u64 i_size_test; @@ -767,7 +779,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, else offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize); - spin_lock_irq(&tree->lock); + spin_lock(&tree->lock); disk_i_size = BTRFS_I(inode)->disk_i_size; /* truncate file */ @@ -785,6 +797,14 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, goto out; } + /* + * we can't update the disk_isize if there are delalloc bytes + * between disk_i_size and this ordered extent + */ + if (test_range_bit(io_tree, disk_i_size, offset - 1, + EXTENT_DELALLOC, 0, NULL)) { + goto out; + } /* * walk backward from this ordered extent to disk_i_size. * if we find an ordered extent then we can't update disk i_size @@ -805,18 +825,15 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, } node = prev; } - for (; node; node = rb_prev(node)) { + while (node) { test = rb_entry(node, struct btrfs_ordered_extent, rb_node); - - /* We treat this entry as if it doesnt exist */ - if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags)) - continue; if (test->file_offset + test->len <= disk_i_size) break; if (test->file_offset >= i_size) break; if (test->file_offset >= disk_i_size) goto out; + node = rb_prev(node); } new_i_size = min_t(u64, offset, i_size); @@ -834,49 +851,43 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, else node = rb_first(&tree->tree); } - - /* - * We are looking for an area between our current extent and the next - * ordered extent to update the i_size to. There are 3 cases here - * - * 1) We don't actually have anything and we can update to i_size. - * 2) We have stuff but they already did their i_size update so again we - * can just update to i_size. - * 3) We have an outstanding ordered extent so the most we can update - * our disk_i_size to is the start of the next offset. - */ - i_size_test = i_size; - for (; node; node = rb_next(node)) { + i_size_test = 0; + if (node) { + /* + * do we have an area where IO might have finished + * between our ordered extent and the next one. + */ test = rb_entry(node, struct btrfs_ordered_extent, rb_node); - - if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags)) - continue; - if (test->file_offset > offset) { + if (test->file_offset > offset) i_size_test = test->file_offset; - break; - } + } else { + i_size_test = i_size; } /* * i_size_test is the end of a region after this ordered - * extent where there are no ordered extents, we can safely set - * disk_i_size to this. + * extent where there are no ordered extents. As long as there + * are no delalloc bytes in this area, it is safe to update + * disk_i_size to the end of the region. */ - if (i_size_test > offset) + if (i_size_test > offset && + !test_range_bit(io_tree, offset, i_size_test - 1, + EXTENT_DELALLOC, 0, NULL)) { new_i_size = min_t(u64, i_size_test, i_size); + } BTRFS_I(inode)->disk_i_size = new_i_size; ret = 0; out: /* - * We need to do this because we can't remove ordered extents until - * after the i_disk_size has been updated and then the inode has been - * updated to reflect the change, so we need to tell anybody who finds - * this ordered extent that we've already done all the real work, we - * just haven't completed all the other work. + * we need to remove the ordered extent with the tree lock held + * so that other people calling this function don't find our fully + * processed ordered entry and skip updating the i_size */ if (ordered) - set_bit(BTRFS_ORDERED_UPDATED_ISIZE, &ordered->flags); - spin_unlock_irq(&tree->lock); + __btrfs_remove_ordered_extent(inode, ordered); + spin_unlock(&tree->lock); + if (ordered) + wake_up(&ordered->wait); return ret; } @@ -901,7 +912,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, if (!ordered) return 1; - spin_lock_irq(&tree->lock); + spin_lock(&tree->lock); list_for_each_entry_reverse(ordered_sum, &ordered->list, list) { if (disk_bytenr >= ordered_sum->bytenr) { num_sectors = ordered_sum->len / sectorsize; @@ -916,7 +927,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, } } out: - spin_unlock_irq(&tree->lock); + spin_unlock(&tree->lock); btrfs_put_ordered_extent(ordered); return ret; } diff --git a/trunk/fs/btrfs/ordered-data.h b/trunk/fs/btrfs/ordered-data.h index e03c560d2997..c355ad4dc1a6 100644 --- a/trunk/fs/btrfs/ordered-data.h +++ b/trunk/fs/btrfs/ordered-data.h @@ -74,12 +74,6 @@ struct btrfs_ordered_sum { #define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */ -#define BTRFS_ORDERED_IOERR 6 /* We had an io error when writing this out */ - -#define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates wether this ordered extent - * has done its due diligence in updating - * the isize. */ - struct btrfs_ordered_extent { /* logical offset in the file */ u64 file_offset; @@ -119,8 +113,6 @@ struct btrfs_ordered_extent { /* a per root list of all the pending ordered extents */ struct list_head root_extent_list; - - struct btrfs_work work; }; @@ -151,11 +143,10 @@ void btrfs_remove_ordered_extent(struct inode *inode, struct btrfs_ordered_extent *entry); int btrfs_dec_test_ordered_pending(struct inode *inode, struct btrfs_ordered_extent **cached, - u64 file_offset, u64 io_size, int uptodate); + u64 file_offset, u64 io_size); int btrfs_dec_test_first_ordered_pending(struct inode *inode, struct btrfs_ordered_extent **cached, - u64 *file_offset, u64 io_size, - int uptodate); + u64 *file_offset, u64 io_size); int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, u64 start, u64 len, u64 disk_len, int type); int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, diff --git a/trunk/fs/btrfs/print-tree.c b/trunk/fs/btrfs/print-tree.c index 5e23684887eb..f38e452486b8 100644 --- a/trunk/fs/btrfs/print-tree.c +++ b/trunk/fs/btrfs/print-tree.c @@ -294,9 +294,6 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) btrfs_dev_extent_chunk_offset(l, dev_extent), (unsigned long long) btrfs_dev_extent_length(l, dev_extent)); - case BTRFS_DEV_STATS_KEY: - printk(KERN_INFO "\t\tdevice stats\n"); - break; }; } } diff --git a/trunk/fs/btrfs/reada.c b/trunk/fs/btrfs/reada.c index 48a4882d8ad5..ac5d01085884 100644 --- a/trunk/fs/btrfs/reada.c +++ b/trunk/fs/btrfs/reada.c @@ -718,18 +718,13 @@ static void reada_start_machine_worker(struct btrfs_work *work) { struct reada_machine_work *rmw; struct btrfs_fs_info *fs_info; - int old_ioprio; rmw = container_of(work, struct reada_machine_work, work); fs_info = rmw->fs_info; kfree(rmw); - old_ioprio = IOPRIO_PRIO_VALUE(task_nice_ioclass(current), - task_nice_ioprio(current)); - set_task_ioprio(current, BTRFS_IOPRIO_READA); __reada_start_machine(fs_info); - set_task_ioprio(current, old_ioprio); } static void __reada_start_machine(struct btrfs_fs_info *fs_info) diff --git a/trunk/fs/btrfs/scrub.c b/trunk/fs/btrfs/scrub.c index a38cfa4f251e..2f3d6f917fb3 100644 --- a/trunk/fs/btrfs/scrub.c +++ b/trunk/fs/btrfs/scrub.c @@ -50,7 +50,7 @@ struct scrub_dev; struct scrub_page { struct scrub_block *sblock; struct page *page; - struct btrfs_device *dev; + struct block_device *bdev; u64 flags; /* extent flags */ u64 generation; u64 logical; @@ -86,7 +86,6 @@ struct scrub_block { unsigned int header_error:1; unsigned int checksum_error:1; unsigned int no_io_error_seen:1; - unsigned int generation_error:1; /* also sets header_error */ }; }; @@ -676,8 +675,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) sdev->stat.read_errors++; sdev->stat.uncorrectable_errors++; spin_unlock(&sdev->stat_lock); - btrfs_dev_stat_inc_and_print(sdev->dev, - BTRFS_DEV_STAT_READ_ERRS); goto out; } @@ -689,8 +686,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) sdev->stat.read_errors++; sdev->stat.uncorrectable_errors++; spin_unlock(&sdev->stat_lock); - btrfs_dev_stat_inc_and_print(sdev->dev, - BTRFS_DEV_STAT_READ_ERRS); goto out; } BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS); @@ -704,8 +699,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) sdev->stat.read_errors++; sdev->stat.uncorrectable_errors++; spin_unlock(&sdev->stat_lock); - btrfs_dev_stat_inc_and_print(sdev->dev, - BTRFS_DEV_STAT_READ_ERRS); goto out; } @@ -732,16 +725,12 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) spin_unlock(&sdev->stat_lock); if (__ratelimit(&_rs)) scrub_print_warning("i/o error", sblock_to_check); - btrfs_dev_stat_inc_and_print(sdev->dev, - BTRFS_DEV_STAT_READ_ERRS); } else if (sblock_bad->checksum_error) { spin_lock(&sdev->stat_lock); sdev->stat.csum_errors++; spin_unlock(&sdev->stat_lock); if (__ratelimit(&_rs)) scrub_print_warning("checksum error", sblock_to_check); - btrfs_dev_stat_inc_and_print(sdev->dev, - BTRFS_DEV_STAT_CORRUPTION_ERRS); } else if (sblock_bad->header_error) { spin_lock(&sdev->stat_lock); sdev->stat.verify_errors++; @@ -749,12 +738,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) if (__ratelimit(&_rs)) scrub_print_warning("checksum/header error", sblock_to_check); - if (sblock_bad->generation_error) - btrfs_dev_stat_inc_and_print(sdev->dev, - BTRFS_DEV_STAT_GENERATION_ERRS); - else - btrfs_dev_stat_inc_and_print(sdev->dev, - BTRFS_DEV_STAT_CORRUPTION_ERRS); } if (sdev->readonly) @@ -1015,8 +998,8 @@ static int scrub_setup_recheck_block(struct scrub_dev *sdev, page = sblock->pagev + page_index; page->logical = logical; page->physical = bbio->stripes[mirror_index].physical; - /* for missing devices, dev->bdev is NULL */ - page->dev = bbio->stripes[mirror_index].dev; + /* for missing devices, bdev is NULL */ + page->bdev = bbio->stripes[mirror_index].dev->bdev; page->mirror_num = mirror_index + 1; page->page = alloc_page(GFP_NOFS); if (!page->page) { @@ -1060,7 +1043,7 @@ static int scrub_recheck_block(struct btrfs_fs_info *fs_info, struct scrub_page *page = sblock->pagev + page_num; DECLARE_COMPLETION_ONSTACK(complete); - if (page->dev->bdev == NULL) { + if (page->bdev == NULL) { page->io_error = 1; sblock->no_io_error_seen = 0; continue; @@ -1070,7 +1053,7 @@ static int scrub_recheck_block(struct btrfs_fs_info *fs_info, bio = bio_alloc(GFP_NOFS, 1); if (!bio) return -EIO; - bio->bi_bdev = page->dev->bdev; + bio->bi_bdev = page->bdev; bio->bi_sector = page->physical >> 9; bio->bi_end_io = scrub_complete_bio_end_io; bio->bi_private = &complete; @@ -1119,14 +1102,11 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, h = (struct btrfs_header *)mapped_buffer; if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr) || + generation != le64_to_cpu(h->generation) || memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) || memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, - BTRFS_UUID_SIZE)) { + BTRFS_UUID_SIZE)) sblock->header_error = 1; - } else if (generation != le64_to_cpu(h->generation)) { - sblock->header_error = 1; - sblock->generation_error = 1; - } csum = h->csum; } else { if (!have_csum) @@ -1202,7 +1182,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, bio = bio_alloc(GFP_NOFS, 1); if (!bio) return -EIO; - bio->bi_bdev = page_bad->dev->bdev; + bio->bi_bdev = page_bad->bdev; bio->bi_sector = page_bad->physical >> 9; bio->bi_end_io = scrub_complete_bio_end_io; bio->bi_private = &complete; @@ -1216,12 +1196,6 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, /* this will also unplug the queue */ wait_for_completion(&complete); - if (!bio_flagged(bio, BIO_UPTODATE)) { - btrfs_dev_stat_inc_and_print(page_bad->dev, - BTRFS_DEV_STAT_WRITE_ERRS); - bio_put(bio); - return -EIO; - } bio_put(bio); } @@ -1378,8 +1352,7 @@ static int scrub_checksum_super(struct scrub_block *sblock) u64 mapped_size; void *p; u32 crc = ~(u32)0; - int fail_gen = 0; - int fail_cor = 0; + int fail = 0; u64 len; int index; @@ -1390,13 +1363,13 @@ static int scrub_checksum_super(struct scrub_block *sblock) memcpy(on_disk_csum, s->csum, sdev->csum_size); if (sblock->pagev[0].logical != le64_to_cpu(s->bytenr)) - ++fail_cor; + ++fail; if (sblock->pagev[0].generation != le64_to_cpu(s->generation)) - ++fail_gen; + ++fail; if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) - ++fail_cor; + ++fail; len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE; mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; @@ -1421,9 +1394,9 @@ static int scrub_checksum_super(struct scrub_block *sblock) btrfs_csum_final(crc, calculated_csum); if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size)) - ++fail_cor; + ++fail; - if (fail_cor + fail_gen) { + if (fail) { /* * if we find an error in a super block, we just report it. * They will get written with the next transaction commit @@ -1432,15 +1405,9 @@ static int scrub_checksum_super(struct scrub_block *sblock) spin_lock(&sdev->stat_lock); ++sdev->stat.super_errors; spin_unlock(&sdev->stat_lock); - if (fail_cor) - btrfs_dev_stat_inc_and_print(sdev->dev, - BTRFS_DEV_STAT_CORRUPTION_ERRS); - else - btrfs_dev_stat_inc_and_print(sdev->dev, - BTRFS_DEV_STAT_GENERATION_ERRS); } - return fail_cor + fail_gen; + return fail; } static void scrub_block_get(struct scrub_block *sblock) @@ -1584,7 +1551,7 @@ static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len, return -ENOMEM; } spage->sblock = sblock; - spage->dev = sdev->dev; + spage->bdev = sdev->dev->bdev; spage->flags = flags; spage->generation = gen; spage->logical = logical; diff --git a/trunk/fs/btrfs/super.c b/trunk/fs/btrfs/super.c index 96eb9fef7bd2..c5f8fca4195f 100644 --- a/trunk/fs/btrfs/super.c +++ b/trunk/fs/btrfs/super.c @@ -188,8 +188,7 @@ void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...) va_start(args, fmt); if (fmt[0] == '<' && isdigit(fmt[1]) && fmt[2] == '>') { - memcpy(lvl, fmt, 3); - lvl[3] = '\0'; + strncpy(lvl, fmt, 3); fmt += 3; type = logtypes[fmt[1] - '0']; } else @@ -436,8 +435,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) case Opt_thread_pool: intarg = 0; match_int(&args[0], &intarg); - if (intarg) + if (intarg) { info->thread_pool_size = intarg; + printk(KERN_INFO "btrfs: thread pool %d\n", + info->thread_pool_size); + } break; case Opt_max_inline: num = match_strdup(&args[0]); @@ -767,7 +769,7 @@ static int btrfs_fill_super(struct super_block *sb, #ifdef CONFIG_BTRFS_FS_POSIX_ACL sb->s_flags |= MS_POSIXACL; #endif - sb->s_flags |= MS_I_VERSION; + err = open_ctree(sb, fs_devices, (char *)data); if (err) { printk("btrfs: open_ctree failed\n"); @@ -923,48 +925,63 @@ static inline int is_subvolume_inode(struct inode *inode) */ static char *setup_root_args(char *args) { - unsigned len = strlen(args) + 2 + 1; - char *src, *dst, *buf; + unsigned copied = 0; + unsigned len = strlen(args) + 2; + char *pos; + char *ret; /* - * We need the same args as before, but with this substitution: - * s!subvol=[^,]+!subvolid=0! + * We need the same args as before, but minus + * + * subvol=a + * + * and add + * + * subvolid=0 * - * Since the replacement string is up to 2 bytes longer than the - * original, allocate strlen(args) + 2 + 1 bytes. + * which is a difference of 2 characters, so we allocate strlen(args) + + * 2 characters. */ - - src = strstr(args, "subvol="); - /* This shouldn't happen, but just in case.. */ - if (!src) + ret = kzalloc(len * sizeof(char), GFP_NOFS); + if (!ret) return NULL; + pos = strstr(args, "subvol="); - buf = dst = kmalloc(len, GFP_NOFS); - if (!buf) + /* This shouldn't happen, but just in case.. */ + if (!pos) { + kfree(ret); return NULL; + } /* - * If the subvol= arg is not at the start of the string, - * copy whatever precedes it into buf. + * The subvol=<> arg is not at the front of the string, copy everybody + * up to that into ret. */ - if (src != args) { - *src++ = '\0'; - strcpy(buf, args); - dst += strlen(args); + if (pos != args) { + *pos = '\0'; + strcpy(ret, args); + copied += strlen(args); + pos++; } - strcpy(dst, "subvolid=0"); - dst += strlen("subvolid=0"); + strncpy(ret + copied, "subvolid=0", len - copied); + + /* Length of subvolid=0 */ + copied += 10; /* - * If there is a "," after the original subvol=... string, - * copy that suffix into our buffer. Otherwise, we're done. + * If there is no , after the subvol= option then we know there's no + * other options and we can just return. */ - src = strchr(src, ','); - if (src) - strcpy(dst, src); + pos = strchr(pos, ','); + if (!pos) + return ret; - return buf; + /* Copy the rest of the arguments into our buffer */ + strncpy(ret + copied, pos, len - copied); + copied += strlen(pos); + + return ret; } static struct dentry *mount_subvol(const char *subvol_name, int flags, @@ -1101,40 +1118,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, return ERR_PTR(error); } -static void btrfs_set_max_workers(struct btrfs_workers *workers, int new_limit) -{ - spin_lock_irq(&workers->lock); - workers->max_workers = new_limit; - spin_unlock_irq(&workers->lock); -} - -static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, - int new_pool_size, int old_pool_size) -{ - if (new_pool_size == old_pool_size) - return; - - fs_info->thread_pool_size = new_pool_size; - - printk(KERN_INFO "btrfs: resize thread pool %d -> %d\n", - old_pool_size, new_pool_size); - - btrfs_set_max_workers(&fs_info->generic_worker, new_pool_size); - btrfs_set_max_workers(&fs_info->workers, new_pool_size); - btrfs_set_max_workers(&fs_info->delalloc_workers, new_pool_size); - btrfs_set_max_workers(&fs_info->submit_workers, new_pool_size); - btrfs_set_max_workers(&fs_info->caching_workers, new_pool_size); - btrfs_set_max_workers(&fs_info->fixup_workers, new_pool_size); - btrfs_set_max_workers(&fs_info->endio_workers, new_pool_size); - btrfs_set_max_workers(&fs_info->endio_meta_workers, new_pool_size); - btrfs_set_max_workers(&fs_info->endio_meta_write_workers, new_pool_size); - btrfs_set_max_workers(&fs_info->endio_write_workers, new_pool_size); - btrfs_set_max_workers(&fs_info->endio_freespace_worker, new_pool_size); - btrfs_set_max_workers(&fs_info->delayed_workers, new_pool_size); - btrfs_set_max_workers(&fs_info->readahead_workers, new_pool_size); - btrfs_set_max_workers(&fs_info->scrub_workers, new_pool_size); -} - static int btrfs_remount(struct super_block *sb, int *flags, char *data) { struct btrfs_fs_info *fs_info = btrfs_sb(sb); @@ -1154,9 +1137,6 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) goto restore; } - btrfs_resize_thread_pool(fs_info, - fs_info->thread_pool_size, old_thread_pool_size); - if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) return 0; @@ -1200,8 +1180,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) fs_info->compress_type = old_compress_type; fs_info->max_inline = old_max_inline; fs_info->alloc_start = old_alloc_start; - btrfs_resize_thread_pool(fs_info, - old_thread_pool_size, fs_info->thread_pool_size); + fs_info->thread_pool_size = old_thread_pool_size; fs_info->metadata_ratio = old_metadata_ratio; return ret; } diff --git a/trunk/fs/btrfs/transaction.c b/trunk/fs/btrfs/transaction.c index 1791c6e3d834..36422254ef67 100644 --- a/trunk/fs/btrfs/transaction.c +++ b/trunk/fs/btrfs/transaction.c @@ -28,7 +28,6 @@ #include "locking.h" #include "tree-log.h" #include "inode-map.h" -#include "volumes.h" #define BTRFS_ROOT_TRANS_TAG 0 @@ -56,49 +55,48 @@ static noinline void switch_commit_root(struct btrfs_root *root) static noinline int join_transaction(struct btrfs_root *root, int nofail) { struct btrfs_transaction *cur_trans; - struct btrfs_fs_info *fs_info = root->fs_info; - spin_lock(&fs_info->trans_lock); + spin_lock(&root->fs_info->trans_lock); loop: /* The file system has been taken offline. No new transactions. */ - if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { - spin_unlock(&fs_info->trans_lock); + if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { + spin_unlock(&root->fs_info->trans_lock); return -EROFS; } - if (fs_info->trans_no_join) { + if (root->fs_info->trans_no_join) { if (!nofail) { - spin_unlock(&fs_info->trans_lock); + spin_unlock(&root->fs_info->trans_lock); return -EBUSY; } } - cur_trans = fs_info->running_transaction; + cur_trans = root->fs_info->running_transaction; if (cur_trans) { if (cur_trans->aborted) { - spin_unlock(&fs_info->trans_lock); + spin_unlock(&root->fs_info->trans_lock); return cur_trans->aborted; } atomic_inc(&cur_trans->use_count); atomic_inc(&cur_trans->num_writers); cur_trans->num_joined++; - spin_unlock(&fs_info->trans_lock); + spin_unlock(&root->fs_info->trans_lock); return 0; } - spin_unlock(&fs_info->trans_lock); + spin_unlock(&root->fs_info->trans_lock); cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); if (!cur_trans) return -ENOMEM; - spin_lock(&fs_info->trans_lock); - if (fs_info->running_transaction) { + spin_lock(&root->fs_info->trans_lock); + if (root->fs_info->running_transaction) { /* * someone started a transaction after we unlocked. Make sure * to redo the trans_no_join checks above */ kmem_cache_free(btrfs_transaction_cachep, cur_trans); - cur_trans = fs_info->running_transaction; + cur_trans = root->fs_info->running_transaction; goto loop; } @@ -123,38 +121,20 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail) cur_trans->delayed_refs.flushing = 0; cur_trans->delayed_refs.run_delayed_start = 0; cur_trans->delayed_refs.seq = 1; - - /* - * although the tree mod log is per file system and not per transaction, - * the log must never go across transaction boundaries. - */ - smp_mb(); - if (!list_empty(&fs_info->tree_mod_seq_list)) { - printk(KERN_ERR "btrfs: tree_mod_seq_list not empty when " - "creating a fresh transaction\n"); - WARN_ON(1); - } - if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log)) { - printk(KERN_ERR "btrfs: tree_mod_log rb tree not empty when " - "creating a fresh transaction\n"); - WARN_ON(1); - } - atomic_set(&fs_info->tree_mod_seq, 0); - init_waitqueue_head(&cur_trans->delayed_refs.seq_wait); spin_lock_init(&cur_trans->commit_lock); spin_lock_init(&cur_trans->delayed_refs.lock); INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head); INIT_LIST_HEAD(&cur_trans->pending_snapshots); - list_add_tail(&cur_trans->list, &fs_info->trans_list); + list_add_tail(&cur_trans->list, &root->fs_info->trans_list); extent_io_tree_init(&cur_trans->dirty_pages, - fs_info->btree_inode->i_mapping); - fs_info->generation++; - cur_trans->transid = fs_info->generation; - fs_info->running_transaction = cur_trans; + root->fs_info->btree_inode->i_mapping); + root->fs_info->generation++; + cur_trans->transid = root->fs_info->generation; + root->fs_info->running_transaction = cur_trans; cur_trans->aborted = 0; - spin_unlock(&fs_info->trans_lock); + spin_unlock(&root->fs_info->trans_lock); return 0; } @@ -778,9 +758,6 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, if (ret) return ret; - ret = btrfs_run_dev_stats(trans, root->fs_info); - BUG_ON(ret); - while (!list_empty(&fs_info->dirty_cowonly_roots)) { next = fs_info->dirty_cowonly_roots.next; list_del_init(next); diff --git a/trunk/fs/btrfs/tree-log.c b/trunk/fs/btrfs/tree-log.c index 2017d0ff511c..eb1ae908582c 100644 --- a/trunk/fs/btrfs/tree-log.c +++ b/trunk/fs/btrfs/tree-log.c @@ -1628,9 +1628,7 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, int i; int ret; - ret = btrfs_read_buffer(eb, gen); - if (ret) - return ret; + btrfs_read_buffer(eb, gen); level = btrfs_header_level(eb); @@ -1751,11 +1749,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, path->slots[*level]++; if (wc->free) { - ret = btrfs_read_buffer(next, ptr_gen); - if (ret) { - free_extent_buffer(next); - return ret; - } + btrfs_read_buffer(next, ptr_gen); btrfs_tree_lock(next); btrfs_set_lock_blocking(next); @@ -1772,11 +1766,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, free_extent_buffer(next); continue; } - ret = btrfs_read_buffer(next, ptr_gen); - if (ret) { - free_extent_buffer(next); - return ret; - } + btrfs_read_buffer(next, ptr_gen); WARN_ON(*level <= 0); if (path->nodes[*level-1]) @@ -2667,8 +2657,6 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, btrfs_release_path(path); } btrfs_release_path(path); - if (ret > 0) - ret = 0; return ret; } @@ -3040,6 +3028,21 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, return ret; } +static int inode_in_log(struct btrfs_trans_handle *trans, + struct inode *inode) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret = 0; + + mutex_lock(&root->log_mutex); + if (BTRFS_I(inode)->logged_trans == trans->transid && + BTRFS_I(inode)->last_sub_trans <= root->last_log_commit) + ret = 1; + mutex_unlock(&root->log_mutex); + return ret; +} + + /* * helper function around btrfs_log_inode to make sure newly created * parent directories also end up in the log. A minimal inode and backref @@ -3080,7 +3083,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, if (ret) goto end_no_trans; - if (btrfs_inode_in_log(inode, trans->transid)) { + if (inode_in_log(trans, inode)) { ret = BTRFS_NO_LOG_SYNC; goto end_no_trans; } diff --git a/trunk/fs/btrfs/ulist.c b/trunk/fs/btrfs/ulist.c index ab942f46b3dd..12f5147bd2b1 100644 --- a/trunk/fs/btrfs/ulist.c +++ b/trunk/fs/btrfs/ulist.c @@ -23,9 +23,9 @@ * * ulist = ulist_alloc(); * ulist_add(ulist, root); - * ULIST_ITER_INIT(&uiter); + * elem = NULL; * - * while ((elem = ulist_next(ulist, &uiter)) { + * while ((elem = ulist_next(ulist, elem)) { * for (all child nodes n in elem) * ulist_add(ulist, n); * do something useful with the node; @@ -95,7 +95,7 @@ EXPORT_SYMBOL(ulist_reinit); * * The allocated ulist will be returned in an initialized state. */ -struct ulist *ulist_alloc(gfp_t gfp_mask) +struct ulist *ulist_alloc(unsigned long gfp_mask) { struct ulist *ulist = kmalloc(sizeof(*ulist), gfp_mask); @@ -144,22 +144,13 @@ EXPORT_SYMBOL(ulist_free); * unaltered. */ int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, - gfp_t gfp_mask) -{ - return ulist_add_merge(ulist, val, aux, NULL, gfp_mask); -} - -int ulist_add_merge(struct ulist *ulist, u64 val, unsigned long aux, - unsigned long *old_aux, gfp_t gfp_mask) + unsigned long gfp_mask) { int i; for (i = 0; i < ulist->nnodes; ++i) { - if (ulist->nodes[i].val == val) { - if (old_aux) - *old_aux = ulist->nodes[i].aux; + if (ulist->nodes[i].val == val) return 0; - } } if (ulist->nnodes >= ulist->nodes_alloced) { @@ -197,26 +188,33 @@ EXPORT_SYMBOL(ulist_add); /** * ulist_next - iterate ulist * @ulist: ulist to iterate - * @uiter: iterator variable, initialized with ULIST_ITER_INIT(&iterator) + * @prev: previously returned element or %NULL to start iteration * * Note: locking must be provided by the caller. In case of rwlocks only read * locking is needed * - * This function is used to iterate an ulist. - * It returns the next element from the ulist or %NULL when the + * This function is used to iterate an ulist. The iteration is started with + * @prev = %NULL. It returns the next element from the ulist or %NULL when the * end is reached. No guarantee is made with respect to the order in which * the elements are returned. They might neither be returned in order of * addition nor in ascending order. * It is allowed to call ulist_add during an enumeration. Newly added items * are guaranteed to show up in the running enumeration. */ -struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_iterator *uiter) +struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev) { + int next; + if (ulist->nnodes == 0) return NULL; - if (uiter->i < 0 || uiter->i >= ulist->nnodes) + + if (!prev) + return &ulist->nodes[0]; + + next = (prev - ulist->nodes) + 1; + if (next < 0 || next >= ulist->nnodes) return NULL; - return &ulist->nodes[uiter->i++]; + return &ulist->nodes[next]; } EXPORT_SYMBOL(ulist_next); diff --git a/trunk/fs/btrfs/ulist.h b/trunk/fs/btrfs/ulist.h index 21bdc8ec8130..2e25dec58ec0 100644 --- a/trunk/fs/btrfs/ulist.h +++ b/trunk/fs/btrfs/ulist.h @@ -24,10 +24,6 @@ */ #define ULIST_SIZE 16 -struct ulist_iterator { - int i; -}; - /* * element of the list */ @@ -63,15 +59,10 @@ struct ulist { void ulist_init(struct ulist *ulist); void ulist_fini(struct ulist *ulist); void ulist_reinit(struct ulist *ulist); -struct ulist *ulist_alloc(gfp_t gfp_mask); +struct ulist *ulist_alloc(unsigned long gfp_mask); void ulist_free(struct ulist *ulist); int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, - gfp_t gfp_mask); -int ulist_add_merge(struct ulist *ulist, u64 val, unsigned long aux, - unsigned long *old_aux, gfp_t gfp_mask); -struct ulist_node *ulist_next(struct ulist *ulist, - struct ulist_iterator *uiter); - -#define ULIST_ITER_INIT(uiter) ((uiter)->i = 0) + unsigned long gfp_mask); +struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev); #endif diff --git a/trunk/fs/btrfs/volumes.c b/trunk/fs/btrfs/volumes.c index 7782020996fe..1411b99555a4 100644 --- a/trunk/fs/btrfs/volumes.c +++ b/trunk/fs/btrfs/volumes.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include "compat.h" @@ -40,8 +39,6 @@ static int init_first_rw_device(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_device *device); static int btrfs_relocate_sys_chunks(struct btrfs_root *root); -static void __btrfs_reset_dev_stats(struct btrfs_device *dev); -static void btrfs_dev_stat_print_on_load(struct btrfs_device *device); static DEFINE_MUTEX(uuid_mutex); static LIST_HEAD(fs_uuids); @@ -364,7 +361,6 @@ static noinline int device_list_add(const char *path, return -ENOMEM; } device->devid = devid; - device->dev_stats_valid = 0; device->work.func = pending_bios_fn; memcpy(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE); @@ -1637,7 +1633,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) int ret = 0; if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) - return -EROFS; + return -EINVAL; bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, root->fs_info->bdev_holder); @@ -4005,58 +4001,13 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, return 0; } -static void *merge_stripe_index_into_bio_private(void *bi_private, - unsigned int stripe_index) -{ - /* - * with single, dup, RAID0, RAID1 and RAID10, stripe_index is - * at most 1. - * The alternative solution (instead of stealing bits from the - * pointer) would be to allocate an intermediate structure - * that contains the old private pointer plus the stripe_index. - */ - BUG_ON((((uintptr_t)bi_private) & 3) != 0); - BUG_ON(stripe_index > 3); - return (void *)(((uintptr_t)bi_private) | stripe_index); -} - -static struct btrfs_bio *extract_bbio_from_bio_private(void *bi_private) -{ - return (struct btrfs_bio *)(((uintptr_t)bi_private) & ~((uintptr_t)3)); -} - -static unsigned int extract_stripe_index_from_bio_private(void *bi_private) -{ - return (unsigned int)((uintptr_t)bi_private) & 3; -} - static void btrfs_end_bio(struct bio *bio, int err) { - struct btrfs_bio *bbio = extract_bbio_from_bio_private(bio->bi_private); + struct btrfs_bio *bbio = bio->bi_private; int is_orig_bio = 0; - if (err) { + if (err) atomic_inc(&bbio->error); - if (err == -EIO || err == -EREMOTEIO) { - unsigned int stripe_index = - extract_stripe_index_from_bio_private( - bio->bi_private); - struct btrfs_device *dev; - - BUG_ON(stripe_index >= bbio->num_stripes); - dev = bbio->stripes[stripe_index].dev; - if (bio->bi_rw & WRITE) - btrfs_dev_stat_inc(dev, - BTRFS_DEV_STAT_WRITE_ERRS); - else - btrfs_dev_stat_inc(dev, - BTRFS_DEV_STAT_READ_ERRS); - if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH) - btrfs_dev_stat_inc(dev, - BTRFS_DEV_STAT_FLUSH_ERRS); - btrfs_dev_stat_print_on_error(dev); - } - } if (bio == bbio->orig_bio) is_orig_bio = 1; @@ -4198,8 +4149,6 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, bio = first_bio; } bio->bi_private = bbio; - bio->bi_private = merge_stripe_index_into_bio_private( - bio->bi_private, (unsigned int)dev_nr); bio->bi_end_io = btrfs_end_bio; bio->bi_sector = bbio->stripes[dev_nr].physical >> 9; dev = bbio->stripes[dev_nr].dev; @@ -4560,28 +4509,6 @@ int btrfs_read_sys_array(struct btrfs_root *root) return ret; } -struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root, - u64 logical, int mirror_num) -{ - struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; - int ret; - u64 map_length = 0; - struct btrfs_bio *bbio = NULL; - struct btrfs_device *device; - - BUG_ON(mirror_num == 0); - ret = btrfs_map_block(map_tree, WRITE, logical, &map_length, &bbio, - mirror_num); - if (ret) { - BUG_ON(bbio != NULL); - return NULL; - } - BUG_ON(mirror_num != bbio->mirror_num); - device = bbio->stripes[mirror_num - 1].dev; - kfree(bbio); - return device; -} - int btrfs_read_chunk_tree(struct btrfs_root *root) { struct btrfs_path *path; @@ -4656,230 +4583,3 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) btrfs_free_path(path); return ret; } - -static void __btrfs_reset_dev_stats(struct btrfs_device *dev) -{ - int i; - - for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) - btrfs_dev_stat_reset(dev, i); -} - -int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info) -{ - struct btrfs_key key; - struct btrfs_key found_key; - struct btrfs_root *dev_root = fs_info->dev_root; - struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; - struct extent_buffer *eb; - int slot; - int ret = 0; - struct btrfs_device *device; - struct btrfs_path *path = NULL; - int i; - - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto out; - } - - mutex_lock(&fs_devices->device_list_mutex); - list_for_each_entry(device, &fs_devices->devices, dev_list) { - int item_size; - struct btrfs_dev_stats_item *ptr; - - key.objectid = 0; - key.type = BTRFS_DEV_STATS_KEY; - key.offset = device->devid; - ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); - if (ret) { - printk(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n", - device->name, (unsigned long long)device->devid); - __btrfs_reset_dev_stats(device); - device->dev_stats_valid = 1; - btrfs_release_path(path); - continue; - } - slot = path->slots[0]; - eb = path->nodes[0]; - btrfs_item_key_to_cpu(eb, &found_key, slot); - item_size = btrfs_item_size_nr(eb, slot); - - ptr = btrfs_item_ptr(eb, slot, - struct btrfs_dev_stats_item); - - for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) { - if (item_size >= (1 + i) * sizeof(__le64)) - btrfs_dev_stat_set(device, i, - btrfs_dev_stats_value(eb, ptr, i)); - else - btrfs_dev_stat_reset(device, i); - } - - device->dev_stats_valid = 1; - btrfs_dev_stat_print_on_load(device); - btrfs_release_path(path); - } - mutex_unlock(&fs_devices->device_list_mutex); - -out: - btrfs_free_path(path); - return ret < 0 ? ret : 0; -} - -static int update_dev_stat_item(struct btrfs_trans_handle *trans, - struct btrfs_root *dev_root, - struct btrfs_device *device) -{ - struct btrfs_path *path; - struct btrfs_key key; - struct extent_buffer *eb; - struct btrfs_dev_stats_item *ptr; - int ret; - int i; - - key.objectid = 0; - key.type = BTRFS_DEV_STATS_KEY; - key.offset = device->devid; - - path = btrfs_alloc_path(); - BUG_ON(!path); - ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); - if (ret < 0) { - printk(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n", - ret, device->name); - goto out; - } - - if (ret == 0 && - btrfs_item_size_nr(path->nodes[0], path->slots[0]) < sizeof(*ptr)) { - /* need to delete old one and insert a new one */ - ret = btrfs_del_item(trans, dev_root, path); - if (ret != 0) { - printk(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n", - device->name, ret); - goto out; - } - ret = 1; - } - - if (ret == 1) { - /* need to insert a new item */ - btrfs_release_path(path); - ret = btrfs_insert_empty_item(trans, dev_root, path, - &key, sizeof(*ptr)); - if (ret < 0) { - printk(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n", - device->name, ret); - goto out; - } - } - - eb = path->nodes[0]; - ptr = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dev_stats_item); - for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) - btrfs_set_dev_stats_value(eb, ptr, i, - btrfs_dev_stat_read(device, i)); - btrfs_mark_buffer_dirty(eb); - -out: - btrfs_free_path(path); - return ret; -} - -/* - * called from commit_transaction. Writes all changed device stats to disk. - */ -int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) -{ - struct btrfs_root *dev_root = fs_info->dev_root; - struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; - struct btrfs_device *device; - int ret = 0; - - mutex_lock(&fs_devices->device_list_mutex); - list_for_each_entry(device, &fs_devices->devices, dev_list) { - if (!device->dev_stats_valid || !device->dev_stats_dirty) - continue; - - ret = update_dev_stat_item(trans, dev_root, device); - if (!ret) - device->dev_stats_dirty = 0; - } - mutex_unlock(&fs_devices->device_list_mutex); - - return ret; -} - -void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index) -{ - btrfs_dev_stat_inc(dev, index); - btrfs_dev_stat_print_on_error(dev); -} - -void btrfs_dev_stat_print_on_error(struct btrfs_device *dev) -{ - if (!dev->dev_stats_valid) - return; - printk_ratelimited(KERN_ERR - "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", - dev->name, - btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), - btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), - btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), - btrfs_dev_stat_read(dev, - BTRFS_DEV_STAT_CORRUPTION_ERRS), - btrfs_dev_stat_read(dev, - BTRFS_DEV_STAT_GENERATION_ERRS)); -} - -static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) -{ - printk(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", - dev->name, - btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), - btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), - btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), - btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS), - btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS)); -} - -int btrfs_get_dev_stats(struct btrfs_root *root, - struct btrfs_ioctl_get_dev_stats *stats, - int reset_after_read) -{ - struct btrfs_device *dev; - struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; - int i; - - mutex_lock(&fs_devices->device_list_mutex); - dev = btrfs_find_device(root, stats->devid, NULL, NULL); - mutex_unlock(&fs_devices->device_list_mutex); - - if (!dev) { - printk(KERN_WARNING - "btrfs: get dev_stats failed, device not found\n"); - return -ENODEV; - } else if (!dev->dev_stats_valid) { - printk(KERN_WARNING - "btrfs: get dev_stats failed, not yet valid\n"); - return -ENODEV; - } else if (reset_after_read) { - for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) { - if (stats->nr_items > i) - stats->values[i] = - btrfs_dev_stat_read_and_reset(dev, i); - else - btrfs_dev_stat_reset(dev, i); - } - } else { - for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) - if (stats->nr_items > i) - stats->values[i] = btrfs_dev_stat_read(dev, i); - } - if (stats->nr_items > BTRFS_DEV_STAT_VALUES_MAX) - stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX; - return 0; -} diff --git a/trunk/fs/btrfs/volumes.h b/trunk/fs/btrfs/volumes.h index 3406a88ca83e..bb6b03f97aaa 100644 --- a/trunk/fs/btrfs/volumes.h +++ b/trunk/fs/btrfs/volumes.h @@ -22,7 +22,6 @@ #include #include #include "async-thread.h" -#include "ioctl.h" #define BTRFS_STRIPE_LEN (64 * 1024) @@ -107,11 +106,6 @@ struct btrfs_device { struct completion flush_wait; int nobarriers; - /* disk I/O failure stats. For detailed description refer to - * enum btrfs_dev_stat_values in ioctl.h */ - int dev_stats_valid; - int dev_stats_dirty; /* counters need to be written to disk */ - atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX]; }; struct btrfs_fs_devices { @@ -287,50 +281,4 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, u64 *start, u64 *max_avail); -struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root, - u64 logical, int mirror_num); -void btrfs_dev_stat_print_on_error(struct btrfs_device *device); -void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); -int btrfs_get_dev_stats(struct btrfs_root *root, - struct btrfs_ioctl_get_dev_stats *stats, - int reset_after_read); -int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); -int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info); - -static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, - int index) -{ - atomic_inc(dev->dev_stat_values + index); - dev->dev_stats_dirty = 1; -} - -static inline int btrfs_dev_stat_read(struct btrfs_device *dev, - int index) -{ - return atomic_read(dev->dev_stat_values + index); -} - -static inline int btrfs_dev_stat_read_and_reset(struct btrfs_device *dev, - int index) -{ - int ret; - - ret = atomic_xchg(dev->dev_stat_values + index, 0); - dev->dev_stats_dirty = 1; - return ret; -} - -static inline void btrfs_dev_stat_set(struct btrfs_device *dev, - int index, unsigned long val) -{ - atomic_set(dev->dev_stat_values + index, val); - dev->dev_stats_dirty = 1; -} - -static inline void btrfs_dev_stat_reset(struct btrfs_device *dev, - int index) -{ - btrfs_dev_stat_set(dev, index, 0); -} #endif diff --git a/trunk/fs/btrfs/xattr.c b/trunk/fs/btrfs/xattr.c index 3f4e2d69e83a..e7a5659087e6 100644 --- a/trunk/fs/btrfs/xattr.c +++ b/trunk/fs/btrfs/xattr.c @@ -196,7 +196,6 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, if (ret) goto out; - inode_inc_iversion(inode); inode->i_ctime = CURRENT_TIME; ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); diff --git a/trunk/fs/buffer.c b/trunk/fs/buffer.c index 838a9cf246bd..ad5938ca357c 100644 --- a/trunk/fs/buffer.c +++ b/trunk/fs/buffer.c @@ -3152,7 +3152,7 @@ SYSCALL_DEFINE2(bdflush, int, func, long, data) /* * Buffer-head allocation */ -static struct kmem_cache *bh_cachep __read_mostly; +static struct kmem_cache *bh_cachep; /* * Once the number of bh's in the machine exceeds this level, we start diff --git a/trunk/fs/ceph/export.c b/trunk/fs/ceph/export.c index 8e1b60e557b6..fbb2a643ef10 100644 --- a/trunk/fs/ceph/export.c +++ b/trunk/fs/ceph/export.c @@ -40,49 +40,38 @@ struct ceph_nfs_confh { u32 parent_name_hash; } __attribute__ ((packed)); -/* - * The presence of @parent_inode here tells us whether NFS wants a - * connectable file handle. However, we want to make a connectionable - * file handle unconditionally so that the MDS gets as much of a hint - * as possible. That means we only use @parent_dentry to indicate - * whether nfsd wants a connectable fh, and whether we should indicate - * failure from a too-small @max_len. - */ -static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, - struct inode *parent_inode) +static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, + int connectable) { int type; struct ceph_nfs_fh *fh = (void *)rawfh; struct ceph_nfs_confh *cfh = (void *)rawfh; + struct dentry *parent; + struct inode *inode = dentry->d_inode; int connected_handle_length = sizeof(*cfh)/4; int handle_length = sizeof(*fh)/4; - struct dentry *dentry = d_find_alias(inode); - struct dentry *parent; /* don't re-export snaps */ if (ceph_snap(inode) != CEPH_NOSNAP) return -EINVAL; - /* if we found an alias, generate a connectable fh */ - if (*max_len >= connected_handle_length && dentry) { + spin_lock(&dentry->d_lock); + parent = dentry->d_parent; + if (*max_len >= connected_handle_length) { dout("encode_fh %p connectable\n", dentry); - spin_lock(&dentry->d_lock); - parent = dentry->d_parent; - cfh->ino = ceph_ino(inode); + cfh->ino = ceph_ino(dentry->d_inode); cfh->parent_ino = ceph_ino(parent->d_inode); cfh->parent_name_hash = ceph_dentry_hash(parent->d_inode, dentry); *max_len = connected_handle_length; type = 2; - spin_unlock(&dentry->d_lock); } else if (*max_len >= handle_length) { - if (parent_inode) { - /* nfsd wants connectable */ + if (connectable) { *max_len = connected_handle_length; type = 255; } else { dout("encode_fh %p\n", dentry); - fh->ino = ceph_ino(inode); + fh->ino = ceph_ino(dentry->d_inode); *max_len = handle_length; type = 1; } @@ -90,6 +79,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, *max_len = handle_length; type = 255; } + spin_unlock(&dentry->d_lock); return type; } diff --git a/trunk/fs/compat.c b/trunk/fs/compat.c index 3004d2bad13f..3adf3d4c2cd9 100644 --- a/trunk/fs/compat.c +++ b/trunk/fs/compat.c @@ -871,12 +871,12 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd, { int error; struct file *file; - int fput_needed; struct compat_readdir_callback buf; - file = fget_light(fd, &fput_needed); + error = -EBADF; + file = fget(fd); if (!file) - return -EBADF; + goto out; buf.result = 0; buf.dirent = dirent; @@ -885,7 +885,8 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd, if (buf.result) error = buf.result; - fput_light(file, fput_needed); + fput(file); +out: return error; } @@ -952,15 +953,16 @@ asmlinkage long compat_sys_getdents(unsigned int fd, struct file * file; struct compat_linux_dirent __user * lastdirent; struct compat_getdents_callback buf; - int fput_needed; int error; + error = -EFAULT; if (!access_ok(VERIFY_WRITE, dirent, count)) - return -EFAULT; + goto out; - file = fget_light(fd, &fput_needed); + error = -EBADF; + file = fget(fd); if (!file) - return -EBADF; + goto out; buf.current_dir = dirent; buf.previous = NULL; @@ -977,7 +979,8 @@ asmlinkage long compat_sys_getdents(unsigned int fd, else error = count - buf.count; } - fput_light(file, fput_needed); + fput(file); +out: return error; } @@ -1038,15 +1041,16 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, struct file * file; struct linux_dirent64 __user * lastdirent; struct compat_getdents_callback64 buf; - int fput_needed; int error; + error = -EFAULT; if (!access_ok(VERIFY_WRITE, dirent, count)) - return -EFAULT; + goto out; - file = fget_light(fd, &fput_needed); + error = -EBADF; + file = fget(fd); if (!file) - return -EBADF; + goto out; buf.current_dir = dirent; buf.previous = NULL; @@ -1064,7 +1068,8 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, else error = count - buf.count; } - fput_light(file, fput_needed); + fput(file); +out: return error; } #endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */ @@ -1542,7 +1547,6 @@ asmlinkage long compat_sys_old_select(struct compat_sel_arg_struct __user *arg) compat_ptr(a.exp), compat_ptr(a.tvp)); } -#ifdef HAVE_SET_RESTORE_SIGMASK static long do_compat_pselect(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask, @@ -1665,11 +1669,9 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, return ret; } -#endif /* HAVE_SET_RESTORE_SIGMASK */ #ifdef CONFIG_EPOLL -#ifdef HAVE_SET_RESTORE_SIGMASK asmlinkage long compat_sys_epoll_pwait(int epfd, struct compat_epoll_event __user *events, int maxevents, int timeout, @@ -1713,7 +1715,6 @@ asmlinkage long compat_sys_epoll_pwait(int epfd, return err; } -#endif /* HAVE_SET_RESTORE_SIGMASK */ #endif /* CONFIG_EPOLL */ diff --git a/trunk/fs/dcache.c b/trunk/fs/dcache.c index 85c9e2bff8e6..4435d8b32904 100644 --- a/trunk/fs/dcache.c +++ b/trunk/fs/dcache.c @@ -683,6 +683,8 @@ EXPORT_SYMBOL(dget_parent); /** * d_find_alias - grab a hashed alias of inode * @inode: inode in question + * @want_discon: flag, used by d_splice_alias, to request + * that only a DISCONNECTED alias be returned. * * If inode has a hashed alias, or is a directory and has any alias, * acquire the reference to alias and return it. Otherwise return NULL. @@ -691,9 +693,10 @@ EXPORT_SYMBOL(dget_parent); * of a filesystem. * * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer - * any other hashed alias over that. + * any other hashed alias over that one unless @want_discon is set, + * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. */ -static struct dentry *__d_find_alias(struct inode *inode) +static struct dentry *__d_find_alias(struct inode *inode, int want_discon) { struct dentry *alias, *discon_alias; @@ -705,7 +708,7 @@ static struct dentry *__d_find_alias(struct inode *inode) if (IS_ROOT(alias) && (alias->d_flags & DCACHE_DISCONNECTED)) { discon_alias = alias; - } else { + } else if (!want_discon) { __dget_dlock(alias); spin_unlock(&alias->d_lock); return alias; @@ -736,7 +739,7 @@ struct dentry *d_find_alias(struct inode *inode) if (!list_empty(&inode->i_dentry)) { spin_lock(&inode->i_lock); - de = __d_find_alias(inode); + de = __d_find_alias(inode, 0); spin_unlock(&inode->i_lock); } return de; @@ -1647,8 +1650,9 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) if (inode && S_ISDIR(inode->i_mode)) { spin_lock(&inode->i_lock); - new = __d_find_any_alias(inode); + new = __d_find_alias(inode, 1); if (new) { + BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); spin_unlock(&inode->i_lock); security_d_instantiate(new, inode); d_move(new, dentry); @@ -2478,7 +2482,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) struct dentry *alias; /* Does an aliased dentry already exist? */ - alias = __d_find_alias(inode); + alias = __d_find_alias(inode, 0); if (alias) { actual = alias; write_seqlock(&rename_lock); @@ -2571,7 +2575,7 @@ static int prepend_path(const struct path *path, bool slash = false; int error = 0; - br_read_lock(&vfsmount_lock); + br_read_lock(vfsmount_lock); while (dentry != root->dentry || vfsmnt != root->mnt) { struct dentry * parent; @@ -2602,7 +2606,7 @@ static int prepend_path(const struct path *path, error = prepend(buffer, buflen, "/", 1); out: - br_read_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return error; global_root: diff --git a/trunk/fs/ecryptfs/inode.c b/trunk/fs/ecryptfs/inode.c index a07441a0a878..ab35b113003b 100644 --- a/trunk/fs/ecryptfs/inode.c +++ b/trunk/fs/ecryptfs/inode.c @@ -660,10 +660,11 @@ static int ecryptfs_readlink_lower(struct dentry *dentry, char **buf, { struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); char *lower_buf; + size_t lower_bufsiz = PATH_MAX; mm_segment_t old_fs; int rc; - lower_buf = kmalloc(PATH_MAX, GFP_KERNEL); + lower_buf = kmalloc(lower_bufsiz, GFP_KERNEL); if (!lower_buf) { rc = -ENOMEM; goto out; @@ -672,29 +673,58 @@ static int ecryptfs_readlink_lower(struct dentry *dentry, char **buf, set_fs(get_ds()); rc = lower_dentry->d_inode->i_op->readlink(lower_dentry, (char __user *)lower_buf, - PATH_MAX); + lower_bufsiz); set_fs(old_fs); if (rc < 0) goto out; + lower_bufsiz = rc; rc = ecryptfs_decode_and_decrypt_filename(buf, bufsiz, dentry, - lower_buf, rc); + lower_buf, lower_bufsiz); out: kfree(lower_buf); return rc; } -static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd) +static int +ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) { - char *buf; - size_t len = PATH_MAX; + char *kbuf; + size_t kbufsiz, copied; int rc; - rc = ecryptfs_readlink_lower(dentry, &buf, &len); + rc = ecryptfs_readlink_lower(dentry, &kbuf, &kbufsiz); if (rc) goto out; + copied = min_t(size_t, bufsiz, kbufsiz); + rc = copy_to_user(buf, kbuf, copied) ? -EFAULT : copied; + kfree(kbuf); fsstack_copy_attr_atime(dentry->d_inode, ecryptfs_dentry_to_lower(dentry)->d_inode); - buf[len] = '\0'; +out: + return rc; +} + +static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + char *buf; + int len = PAGE_SIZE, rc; + mm_segment_t old_fs; + + /* Released in ecryptfs_put_link(); only release here on error */ + buf = kmalloc(len, GFP_KERNEL); + if (!buf) { + buf = ERR_PTR(-ENOMEM); + goto out; + } + old_fs = get_fs(); + set_fs(get_ds()); + rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len); + set_fs(old_fs); + if (rc < 0) { + kfree(buf); + buf = ERR_PTR(rc); + } else + buf[rc] = '\0'; out: nd_set_link(nd, buf); return NULL; @@ -1123,7 +1153,7 @@ static int ecryptfs_removexattr(struct dentry *dentry, const char *name) } const struct inode_operations ecryptfs_symlink_iops = { - .readlink = generic_readlink, + .readlink = ecryptfs_readlink, .follow_link = ecryptfs_follow_link, .put_link = ecryptfs_put_link, .permission = ecryptfs_permission, diff --git a/trunk/fs/eventpoll.c b/trunk/fs/eventpoll.c index 079d1be65ba9..74598f67efeb 100644 --- a/trunk/fs/eventpoll.c +++ b/trunk/fs/eventpoll.c @@ -1853,8 +1853,6 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, return error; } -#ifdef HAVE_SET_RESTORE_SIGMASK - /* * Implement the event wait interface for the eventpoll file. It is the kernel * part of the user space epoll_pwait(2). @@ -1899,8 +1897,6 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, return error; } -#endif /* HAVE_SET_RESTORE_SIGMASK */ - static int __init eventpoll_init(void) { struct sysinfo si; diff --git a/trunk/fs/exec.c b/trunk/fs/exec.c index a79786a8d2c8..52c9e2ff6e6b 100644 --- a/trunk/fs/exec.c +++ b/trunk/fs/exec.c @@ -280,6 +280,10 @@ static int __bprm_mm_init(struct linux_binprm *bprm) vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); INIT_LIST_HEAD(&vma->anon_vma_chain); + err = security_file_mmap(NULL, 0, 0, 0, vma->vm_start, 1); + if (err) + goto err; + err = insert_vm_struct(mm, vma); if (err) goto err; diff --git a/trunk/fs/exportfs/expfs.c b/trunk/fs/exportfs/expfs.c index b0201ca6e9c6..b05acb796135 100644 --- a/trunk/fs/exportfs/expfs.c +++ b/trunk/fs/exportfs/expfs.c @@ -304,23 +304,24 @@ static int get_name(struct vfsmount *mnt, struct dentry *dentry, /** * export_encode_fh - default export_operations->encode_fh function - * @inode: the object to encode + * @dentry: the dentry to encode * @fh: where to store the file handle fragment * @max_len: maximum length to store there - * @parent: parent directory inode, if wanted + * @connectable: whether to store parent information * * This default encode_fh function assumes that the 32 inode number * is suitable for locating an inode, and that the generation number * can be used to check that it is still valid. It places them in the * filehandle fragment where export_decode_fh expects to find them. */ -static int export_encode_fh(struct inode *inode, struct fid *fid, - int *max_len, struct inode *parent) +static int export_encode_fh(struct dentry *dentry, struct fid *fid, + int *max_len, int connectable) { + struct inode * inode = dentry->d_inode; int len = *max_len; int type = FILEID_INO32_GEN; - if (parent && (len < 4)) { + if (connectable && (len < 4)) { *max_len = 4; return 255; } else if (len < 2) { @@ -331,9 +332,14 @@ static int export_encode_fh(struct inode *inode, struct fid *fid, len = 2; fid->i32.ino = inode->i_ino; fid->i32.gen = inode->i_generation; - if (parent) { + if (connectable && !S_ISDIR(inode->i_mode)) { + struct inode *parent; + + spin_lock(&dentry->d_lock); + parent = dentry->d_parent->d_inode; fid->i32.parent_ino = parent->i_ino; fid->i32.parent_gen = parent->i_generation; + spin_unlock(&dentry->d_lock); len = 4; type = FILEID_INO32_GEN_PARENT; } @@ -346,22 +352,11 @@ int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len, { const struct export_operations *nop = dentry->d_sb->s_export_op; int error; - struct dentry *p = NULL; - struct inode *inode = dentry->d_inode, *parent = NULL; - if (connectable && !S_ISDIR(inode->i_mode)) { - p = dget_parent(dentry); - /* - * note that while p might've ceased to be our parent already, - * it's still pinned by and still positive. - */ - parent = p->d_inode; - } if (nop->encode_fh) - error = nop->encode_fh(inode, fid->raw, max_len, parent); + error = nop->encode_fh(dentry, fid->raw, max_len, connectable); else - error = export_encode_fh(inode, fid, max_len, parent); - dput(p); + error = export_encode_fh(dentry, fid, max_len, connectable); return error; } diff --git a/trunk/fs/ext4/Kconfig b/trunk/fs/ext4/Kconfig index c22f17021b6e..9ed1bb1f319f 100644 --- a/trunk/fs/ext4/Kconfig +++ b/trunk/fs/ext4/Kconfig @@ -2,8 +2,6 @@ config EXT4_FS tristate "The Extended 4 (ext4) filesystem" select JBD2 select CRC16 - select CRYPTO - select CRYPTO_CRC32C help This is the next generation of the ext3 filesystem. diff --git a/trunk/fs/ext4/balloc.c b/trunk/fs/ext4/balloc.c index 99b6324290db..c45c41129a35 100644 --- a/trunk/fs/ext4/balloc.c +++ b/trunk/fs/ext4/balloc.c @@ -168,14 +168,12 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, /* If checksum is bad mark all blocks used to prevent allocation * essentially implementing a per-group read-only flag. */ - if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { + if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { ext4_error(sb, "Checksum bad for group %u", block_group); ext4_free_group_clusters_set(sb, gdp, 0); ext4_free_inodes_set(sb, gdp, 0); ext4_itable_unused_set(sb, gdp, 0); memset(bh->b_data, 0xff, sb->s_blocksize); - ext4_block_bitmap_csum_set(sb, block_group, gdp, bh, - EXT4_BLOCKS_PER_GROUP(sb) / 8); return; } memset(bh->b_data, 0, sb->s_blocksize); @@ -212,9 +210,6 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, */ ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group), sb->s_blocksize * 8, bh->b_data); - ext4_block_bitmap_csum_set(sb, block_group, gdp, bh, - EXT4_BLOCKS_PER_GROUP(sb) / 8); - ext4_group_desc_csum_set(sb, block_group, gdp); } /* Return the number of free blocks in a block group. It is used when @@ -281,9 +276,9 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, } static int ext4_valid_block_bitmap(struct super_block *sb, - struct ext4_group_desc *desc, - unsigned int block_group, - struct buffer_head *bh) + struct ext4_group_desc *desc, + unsigned int block_group, + struct buffer_head *bh) { ext4_grpblk_t offset; ext4_grpblk_t next_zero_bit; @@ -330,23 +325,6 @@ static int ext4_valid_block_bitmap(struct super_block *sb, block_group, bitmap_blk); return 0; } - -void ext4_validate_block_bitmap(struct super_block *sb, - struct ext4_group_desc *desc, - unsigned int block_group, - struct buffer_head *bh) -{ - if (buffer_verified(bh)) - return; - - ext4_lock_group(sb, block_group); - if (ext4_valid_block_bitmap(sb, desc, block_group, bh) && - ext4_block_bitmap_csum_verify(sb, block_group, desc, bh, - EXT4_BLOCKS_PER_GROUP(sb) / 8)) - set_buffer_verified(bh); - ext4_unlock_group(sb, block_group); -} - /** * ext4_read_block_bitmap() * @sb: super block @@ -377,12 +355,12 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) } if (bitmap_uptodate(bh)) - goto verify; + return bh; lock_buffer(bh); if (bitmap_uptodate(bh)) { unlock_buffer(bh); - goto verify; + return bh; } ext4_lock_group(sb, block_group); if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { @@ -401,7 +379,7 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) */ set_bitmap_uptodate(bh); unlock_buffer(bh); - goto verify; + return bh; } /* * submit the buffer_head for reading @@ -412,9 +390,6 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) get_bh(bh); submit_bh(READ, bh); return bh; -verify: - ext4_validate_block_bitmap(sb, desc, block_group, bh); - return bh; } /* Returns 0 on success, 1 on error */ @@ -437,7 +412,7 @@ int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group, } clear_buffer_new(bh); /* Panic or remount fs read-only if block bitmap is invalid */ - ext4_validate_block_bitmap(sb, desc, block_group, bh); + ext4_valid_block_bitmap(sb, desc, block_group, bh); return 0; } diff --git a/trunk/fs/ext4/bitmap.c b/trunk/fs/ext4/bitmap.c index b319721da26a..fa3af81ac565 100644 --- a/trunk/fs/ext4/bitmap.c +++ b/trunk/fs/ext4/bitmap.c @@ -29,86 +29,3 @@ unsigned int ext4_count_free(struct buffer_head *map, unsigned int numchars) #endif /* EXT4FS_DEBUG */ -int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, - struct ext4_group_desc *gdp, - struct buffer_head *bh, int sz) -{ - __u32 hi; - __u32 provided, calculated; - struct ext4_sb_info *sbi = EXT4_SB(sb); - - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - return 1; - - provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo); - calculated = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); - if (sbi->s_desc_size >= EXT4_BG_INODE_BITMAP_CSUM_HI_END) { - hi = le16_to_cpu(gdp->bg_inode_bitmap_csum_hi); - provided |= (hi << 16); - } else - calculated &= 0xFFFF; - - return provided == calculated; -} - -void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group, - struct ext4_group_desc *gdp, - struct buffer_head *bh, int sz) -{ - __u32 csum; - struct ext4_sb_info *sbi = EXT4_SB(sb); - - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - return; - - csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); - gdp->bg_inode_bitmap_csum_lo = cpu_to_le16(csum & 0xFFFF); - if (sbi->s_desc_size >= EXT4_BG_INODE_BITMAP_CSUM_HI_END) - gdp->bg_inode_bitmap_csum_hi = cpu_to_le16(csum >> 16); -} - -int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, - struct ext4_group_desc *gdp, - struct buffer_head *bh, int sz) -{ - __u32 hi; - __u32 provided, calculated; - struct ext4_sb_info *sbi = EXT4_SB(sb); - - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - return 1; - - provided = le16_to_cpu(gdp->bg_block_bitmap_csum_lo); - calculated = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); - if (sbi->s_desc_size >= EXT4_BG_BLOCK_BITMAP_CSUM_HI_END) { - hi = le16_to_cpu(gdp->bg_block_bitmap_csum_hi); - provided |= (hi << 16); - } else - calculated &= 0xFFFF; - - if (provided == calculated) - return 1; - - ext4_error(sb, "Bad block bitmap checksum: block_group = %u", group); - return 0; -} - -void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group, - struct ext4_group_desc *gdp, - struct buffer_head *bh, int sz) -{ - __u32 csum; - struct ext4_sb_info *sbi = EXT4_SB(sb); - - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - return; - - csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); - gdp->bg_block_bitmap_csum_lo = cpu_to_le16(csum & 0xFFFF); - if (sbi->s_desc_size >= EXT4_BG_BLOCK_BITMAP_CSUM_HI_END) - gdp->bg_block_bitmap_csum_hi = cpu_to_le16(csum >> 16); -} diff --git a/trunk/fs/ext4/dir.c b/trunk/fs/ext4/dir.c index aa39e600d159..b86786202643 100644 --- a/trunk/fs/ext4/dir.c +++ b/trunk/fs/ext4/dir.c @@ -179,18 +179,6 @@ static int ext4_readdir(struct file *filp, continue; } - /* Check the checksum */ - if (!buffer_verified(bh) && - !ext4_dirent_csum_verify(inode, - (struct ext4_dir_entry *)bh->b_data)) { - EXT4_ERROR_FILE(filp, 0, "directory fails checksum " - "at offset %llu", - (unsigned long long)filp->f_pos); - filp->f_pos += sb->s_blocksize - offset; - continue; - } - set_buffer_verified(bh); - revalidate: /* If the dir block has changed since the last call to * readdir(2), then we might be pointing to an invalid diff --git a/trunk/fs/ext4/ext4.h b/trunk/fs/ext4/ext4.h index cfc4e01b3c83..c21b1de51afb 100644 --- a/trunk/fs/ext4/ext4.h +++ b/trunk/fs/ext4/ext4.h @@ -29,7 +29,6 @@ #include #include #include -#include #ifdef __KERNEL__ #include #endif @@ -299,9 +298,7 @@ struct ext4_group_desc __le16 bg_free_inodes_count_lo;/* Free inodes count */ __le16 bg_used_dirs_count_lo; /* Directories count */ __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */ - __le32 bg_exclude_bitmap_lo; /* Exclude bitmap for snapshots */ - __le16 bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bbitmap) LE */ - __le16 bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+ibitmap) LE */ + __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */ __le16 bg_itable_unused_lo; /* Unused inodes count */ __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */ __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */ @@ -311,19 +308,9 @@ struct ext4_group_desc __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */ __le16 bg_used_dirs_count_hi; /* Directories count MSB */ __le16 bg_itable_unused_hi; /* Unused inodes count MSB */ - __le32 bg_exclude_bitmap_hi; /* Exclude bitmap block MSB */ - __le16 bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bbitmap) BE */ - __le16 bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+ibitmap) BE */ - __u32 bg_reserved; + __u32 bg_reserved2[3]; }; -#define EXT4_BG_INODE_BITMAP_CSUM_HI_END \ - (offsetof(struct ext4_group_desc, bg_inode_bitmap_csum_hi) + \ - sizeof(__le16)) -#define EXT4_BG_BLOCK_BITMAP_CSUM_HI_END \ - (offsetof(struct ext4_group_desc, bg_block_bitmap_csum_hi) + \ - sizeof(__le16)) - /* * Structure of a flex block group info */ @@ -663,8 +650,7 @@ struct ext4_inode { __le16 l_i_file_acl_high; __le16 l_i_uid_high; /* these 2 fields */ __le16 l_i_gid_high; /* were reserved2[0] */ - __le16 l_i_checksum_lo;/* crc32c(uuid+inum+inode) LE */ - __le16 l_i_reserved; + __u32 l_i_reserved2; } linux2; struct { __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */ @@ -680,7 +666,7 @@ struct ext4_inode { } masix2; } osd2; /* OS dependent 2 */ __le16 i_extra_isize; - __le16 i_checksum_hi; /* crc32c(uuid+inum+inode) BE */ + __le16 i_pad1; __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */ __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */ __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ @@ -782,7 +768,7 @@ do { \ #define i_gid_low i_gid #define i_uid_high osd2.linux2.l_i_uid_high #define i_gid_high osd2.linux2.l_i_gid_high -#define i_checksum_lo osd2.linux2.l_i_checksum_lo +#define i_reserved2 osd2.linux2.l_i_reserved2 #elif defined(__GNU__) @@ -922,9 +908,6 @@ struct ext4_inode_info { */ tid_t i_sync_tid; tid_t i_datasync_tid; - - /* Precomputed uuid+inum+igen checksum for seeding inode checksums */ - __u32 i_csum_seed; }; /* @@ -1018,9 +1001,6 @@ extern void ext4_set_bits(void *bm, int cur, int len); #define EXT4_ERRORS_PANIC 3 /* Panic */ #define EXT4_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE -/* Metadata checksum algorithm codes */ -#define EXT4_CRC32C_CHKSUM 1 - /* * Structure of the super block */ @@ -1107,7 +1087,7 @@ struct ext4_super_block { __le64 s_mmp_block; /* Block for multi-mount protection */ __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ __u8 s_log_groups_per_flex; /* FLEX_BG group size */ - __u8 s_checksum_type; /* metadata checksum algorithm used */ + __u8 s_reserved_char_pad; __le16 s_reserved_pad; __le64 s_kbytes_written; /* nr of lifetime kilobytes written */ __le32 s_snapshot_inum; /* Inode number of active snapshot */ @@ -1133,8 +1113,7 @@ struct ext4_super_block { __le32 s_usr_quota_inum; /* inode for tracking user quota */ __le32 s_grp_quota_inum; /* inode for tracking group quota */ __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */ - __le32 s_reserved[108]; /* Padding to the end of the block */ - __le32 s_checksum; /* crc32c(superblock) */ + __le32 s_reserved[109]; /* Padding to the end of the block */ }; #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START) @@ -1197,7 +1176,6 @@ struct ext4_sb_info { struct proc_dir_entry *s_proc; struct kobject s_kobj; struct completion s_kobj_unregister; - struct super_block *s_sb; /* Journaling */ struct journal_s *s_journal; @@ -1288,12 +1266,6 @@ struct ext4_sb_info { /* record the last minlen when FITRIM is called. */ atomic_t s_last_trim_minblks; - - /* Reference to checksum algorithm driver via cryptoapi */ - struct crypto_shash *s_chksum_driver; - - /* Precomputed FS UUID checksum for seeding other checksums */ - __u32 s_csum_seed; }; static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) @@ -1442,12 +1414,6 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 #define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 #define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200 -/* - * METADATA_CSUM also enables group descriptor checksums (GDT_CSUM). When - * METADATA_CSUM is set, group descriptor checksums use the same algorithm as - * all other data structures' checksums. However, the METADATA_CSUM and - * GDT_CSUM bits are mutually exclusive. - */ #define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400 #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 @@ -1495,8 +1461,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\ EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\ - EXT4_FEATURE_RO_COMPAT_BIGALLOC |\ - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) + EXT4_FEATURE_RO_COMPAT_BIGALLOC) /* * Default values for user and/or group using reserved blocks @@ -1561,18 +1526,6 @@ struct ext4_dir_entry_2 { char name[EXT4_NAME_LEN]; /* File name */ }; -/* - * This is a bogus directory entry at the end of each leaf block that - * records checksums. - */ -struct ext4_dir_entry_tail { - __le32 det_reserved_zero1; /* Pretend to be unused */ - __le16 det_rec_len; /* 12 */ - __u8 det_reserved_zero2; /* Zero name length */ - __u8 det_reserved_ft; /* 0xDE, fake file type */ - __le32 det_checksum; /* crc32c(uuid+inum+dirblock) */ -}; - /* * Ext4 directory file types. Only the low 3 bits are used. The * other bits are reserved for now. @@ -1588,8 +1541,6 @@ struct ext4_dir_entry_tail { #define EXT4_FT_MAX 8 -#define EXT4_FT_DIR_CSUM 0xDE - /* * EXT4_DIR_PAD defines the directory entries boundaries * @@ -1658,25 +1609,6 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) #define DX_HASH_HALF_MD4_UNSIGNED 4 #define DX_HASH_TEA_UNSIGNED 5 -static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc, - const void *address, unsigned int length) -{ - struct { - struct shash_desc shash; - char ctx[crypto_shash_descsize(sbi->s_chksum_driver)]; - } desc; - int err; - - desc.shash.tfm = sbi->s_chksum_driver; - desc.shash.flags = 0; - *(u32 *)desc.ctx = crc; - - err = crypto_shash_update(&desc.shash, address, length); - BUG_ON(err); - - return *(u32 *)desc.ctx; -} - #ifdef __KERNEL__ /* hash info structure used by the directory hash */ @@ -1809,8 +1741,7 @@ struct mmp_struct { __le16 mmp_check_interval; __le16 mmp_pad1; - __le32 mmp_pad2[226]; - __le32 mmp_checksum; /* crc32c(uuid+mmp_block) */ + __le32 mmp_pad2[227]; }; /* arguments passed to the mmp thread */ @@ -1853,24 +1784,8 @@ struct mmpd_data { /* bitmap.c */ extern unsigned int ext4_count_free(struct buffer_head *, unsigned); -void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group, - struct ext4_group_desc *gdp, - struct buffer_head *bh, int sz); -int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, - struct ext4_group_desc *gdp, - struct buffer_head *bh, int sz); -void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group, - struct ext4_group_desc *gdp, - struct buffer_head *bh, int sz); -int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, - struct ext4_group_desc *gdp, - struct buffer_head *bh, int sz); /* balloc.c */ -extern void ext4_validate_block_bitmap(struct super_block *sb, - struct ext4_group_desc *desc, - unsigned int block_group, - struct buffer_head *bh); extern unsigned int ext4_block_group(struct super_block *sb, ext4_fsblk_t blocknr); extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb, @@ -1949,7 +1864,7 @@ extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate); /* mballoc.c */ extern long ext4_mb_stats; extern long ext4_mb_max_to_scan; -extern int ext4_mb_init(struct super_block *); +extern int ext4_mb_init(struct super_block *, int); extern int ext4_mb_release(struct super_block *); extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, struct ext4_allocation_request *, int *); @@ -2021,8 +1936,6 @@ extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); extern int ext4_ext_migrate(struct inode *); /* namei.c */ -extern int ext4_dirent_csum_verify(struct inode *inode, - struct ext4_dir_entry *dirent); extern int ext4_orphan_add(handle_t *, struct inode *); extern int ext4_orphan_del(handle_t *, struct inode *); extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, @@ -2037,10 +1950,6 @@ extern int ext4_group_extend(struct super_block *sb, extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count); /* super.c */ -extern int ext4_superblock_csum_verify(struct super_block *sb, - struct ext4_super_block *es); -extern void ext4_superblock_csum_set(struct super_block *sb, - struct ext4_super_block *es); extern void *ext4_kvmalloc(size_t size, gfp_t flags); extern void *ext4_kvzalloc(size_t size, gfp_t flags); extern void ext4_kvfree(void *ptr); @@ -2116,17 +2025,10 @@ extern void ext4_used_dirs_set(struct super_block *sb, struct ext4_group_desc *bg, __u32 count); extern void ext4_itable_unused_set(struct super_block *sb, struct ext4_group_desc *bg, __u32 count); -extern int ext4_group_desc_csum_verify(struct super_block *sb, __u32 group, +extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group, + struct ext4_group_desc *gdp); +extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group, struct ext4_group_desc *gdp); -extern void ext4_group_desc_csum_set(struct super_block *sb, __u32 group, - struct ext4_group_desc *gdp); - -static inline int ext4_has_group_desc_csum(struct super_block *sb) -{ - return EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_GDT_CSUM | - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM); -} static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) { @@ -2323,9 +2225,6 @@ static inline void ext4_unlock_group(struct super_block *sb, static inline void ext4_mark_super_dirty(struct super_block *sb) { - struct ext4_super_block *es = EXT4_SB(sb)->s_es; - - ext4_superblock_csum_set(sb, es); if (EXT4_SB(sb)->s_journal == NULL) sb->s_dirt =1; } @@ -2415,9 +2314,6 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io, /* mmp.c */ extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); -extern void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp); -extern int ext4_mmp_csum_verify(struct super_block *sb, - struct mmp_struct *mmp); /* BH_Uninit flag: blocks are allocated but uninitialized on disk */ enum ext4_state_bits { diff --git a/trunk/fs/ext4/ext4_extents.h b/trunk/fs/ext4/ext4_extents.h index cb1b2c919963..0f58b86e3a02 100644 --- a/trunk/fs/ext4/ext4_extents.h +++ b/trunk/fs/ext4/ext4_extents.h @@ -63,21 +63,8 @@ * ext4_inode has i_block array (60 bytes total). * The first 12 bytes store ext4_extent_header; * the remainder stores an array of ext4_extent. - * For non-inode extent blocks, ext4_extent_tail - * follows the array. */ -/* - * This is the extent tail on-disk structure. - * All other extent structures are 12 bytes long. It turns out that - * block_size % 12 >= 4 for at least all powers of 2 greater than 512, which - * covers all valid ext4 block sizes. Therefore, this tail structure can be - * crammed into the end of the block without having to rebalance the tree. - */ -struct ext4_extent_tail { - __le32 et_checksum; /* crc32c(uuid+inum+extent_block) */ -}; - /* * This is the extent on-disk structure. * It's used at the bottom of the tree. @@ -114,17 +101,6 @@ struct ext4_extent_header { #define EXT4_EXT_MAGIC cpu_to_le16(0xf30a) -#define EXT4_EXTENT_TAIL_OFFSET(hdr) \ - (sizeof(struct ext4_extent_header) + \ - (sizeof(struct ext4_extent) * le16_to_cpu((hdr)->eh_max))) - -static inline struct ext4_extent_tail * -find_ext4_extent_tail(struct ext4_extent_header *eh) -{ - return (struct ext4_extent_tail *)(((void *)eh) + - EXT4_EXTENT_TAIL_OFFSET(eh)); -} - /* * Array of ext4_ext_path contains path to some extent. * Creation/lookup routines use it for traversal/splitting/etc. diff --git a/trunk/fs/ext4/ext4_jbd2.c b/trunk/fs/ext4/ext4_jbd2.c index 90f7c2e84db1..aca179017582 100644 --- a/trunk/fs/ext4/ext4_jbd2.c +++ b/trunk/fs/ext4/ext4_jbd2.c @@ -138,23 +138,16 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, } int __ext4_handle_dirty_super(const char *where, unsigned int line, - handle_t *handle, struct super_block *sb, - int now) + handle_t *handle, struct super_block *sb) { struct buffer_head *bh = EXT4_SB(sb)->s_sbh; int err = 0; if (ext4_handle_valid(handle)) { - ext4_superblock_csum_set(sb, - (struct ext4_super_block *)bh->b_data); err = jbd2_journal_dirty_metadata(handle, bh); if (err) ext4_journal_abort_handle(where, line, __func__, bh, handle, err); - } else if (now) { - ext4_superblock_csum_set(sb, - (struct ext4_super_block *)bh->b_data); - mark_buffer_dirty(bh); } else sb->s_dirt = 1; return err; diff --git a/trunk/fs/ext4/ext4_jbd2.h b/trunk/fs/ext4/ext4_jbd2.h index f440e8f1841f..83b20fcf9400 100644 --- a/trunk/fs/ext4/ext4_jbd2.h +++ b/trunk/fs/ext4/ext4_jbd2.h @@ -213,8 +213,7 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, struct buffer_head *bh); int __ext4_handle_dirty_super(const char *where, unsigned int line, - handle_t *handle, struct super_block *sb, - int now); + handle_t *handle, struct super_block *sb); #define ext4_journal_get_write_access(handle, bh) \ __ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh)) @@ -226,10 +225,8 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line, #define ext4_handle_dirty_metadata(handle, inode, bh) \ __ext4_handle_dirty_metadata(__func__, __LINE__, (handle), (inode), \ (bh)) -#define ext4_handle_dirty_super_now(handle, sb) \ - __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb), 1) #define ext4_handle_dirty_super(handle, sb) \ - __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb), 0) + __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb)) handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle); diff --git a/trunk/fs/ext4/extents.c b/trunk/fs/ext4/extents.c index 91341ec6e06a..abcdeab67f52 100644 --- a/trunk/fs/ext4/extents.c +++ b/trunk/fs/ext4/extents.c @@ -52,46 +52,6 @@ #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ -static __le32 ext4_extent_block_csum(struct inode *inode, - struct ext4_extent_header *eh) -{ - struct ext4_inode_info *ei = EXT4_I(inode); - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - __u32 csum; - - csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)eh, - EXT4_EXTENT_TAIL_OFFSET(eh)); - return cpu_to_le32(csum); -} - -static int ext4_extent_block_csum_verify(struct inode *inode, - struct ext4_extent_header *eh) -{ - struct ext4_extent_tail *et; - - if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - return 1; - - et = find_ext4_extent_tail(eh); - if (et->et_checksum != ext4_extent_block_csum(inode, eh)) - return 0; - return 1; -} - -static void ext4_extent_block_csum_set(struct inode *inode, - struct ext4_extent_header *eh) -{ - struct ext4_extent_tail *et; - - if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - return; - - et = find_ext4_extent_tail(eh); - et->et_checksum = ext4_extent_block_csum(inode, eh); -} - static int ext4_split_extent(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, @@ -157,7 +117,6 @@ static int __ext4_ext_dirty(const char *where, unsigned int line, { int err; if (path->p_bh) { - ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh)); /* path points to block */ err = __ext4_handle_dirty_metadata(where, line, handle, inode, path->p_bh); @@ -432,12 +391,6 @@ static int __ext4_ext_check(const char *function, unsigned int line, error_msg = "invalid extent entries"; goto corrupted; } - /* Verify checksum on non-root extent tree nodes */ - if (ext_depth(inode) != depth && - !ext4_extent_block_csum_verify(inode, eh)) { - error_msg = "extent tree corrupted"; - goto corrupted; - } return 0; corrupted: @@ -459,26 +412,6 @@ int ext4_ext_check_inode(struct inode *inode) return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode)); } -static int __ext4_ext_check_block(const char *function, unsigned int line, - struct inode *inode, - struct ext4_extent_header *eh, - int depth, - struct buffer_head *bh) -{ - int ret; - - if (buffer_verified(bh)) - return 0; - ret = ext4_ext_check(inode, eh, depth); - if (ret) - return ret; - set_buffer_verified(bh); - return ret; -} - -#define ext4_ext_check_block(inode, eh, depth, bh) \ - __ext4_ext_check_block(__func__, __LINE__, inode, eh, depth, bh) - #ifdef EXT_DEBUG static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) { @@ -603,7 +536,7 @@ ext4_ext_binsearch_idx(struct inode *inode, } path->p_idx = l - 1; - ext_debug(" -> %u->%lld ", le32_to_cpu(path->p_idx->ei_block), + ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block), ext4_idx_pblock(path->p_idx)); #ifdef CHECK_BINSEARCH @@ -735,6 +668,8 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, i = depth; /* walk through the tree */ while (i) { + int need_to_validate = 0; + ext_debug("depth %d: num %d, max %d\n", ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); @@ -753,6 +688,8 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, put_bh(bh); goto err; } + /* validate the extent entries */ + need_to_validate = 1; } eh = ext_block_hdr(bh); ppos++; @@ -766,7 +703,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, path[ppos].p_hdr = eh; i--; - if (ext4_ext_check_block(inode, eh, i, bh)) + if (need_to_validate && ext4_ext_check(inode, eh, i)) goto err; } @@ -977,7 +914,6 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, le16_add_cpu(&neh->eh_entries, m); } - ext4_extent_block_csum_set(inode, neh); set_buffer_uptodate(bh); unlock_buffer(bh); @@ -1056,7 +992,6 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, sizeof(struct ext4_extent_idx) * m); le16_add_cpu(&neh->eh_entries, m); } - ext4_extent_block_csum_set(inode, neh); set_buffer_uptodate(bh); unlock_buffer(bh); @@ -1154,7 +1089,6 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, else neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); neh->eh_magic = EXT4_EXT_MAGIC; - ext4_extent_block_csum_set(inode, neh); set_buffer_uptodate(bh); unlock_buffer(bh); @@ -1410,8 +1344,7 @@ static int ext4_ext_search_right(struct inode *inode, return -EIO; eh = ext_block_hdr(bh); /* subtract from p_depth to get proper eh_depth */ - if (ext4_ext_check_block(inode, eh, - path->p_depth - depth, bh)) { + if (ext4_ext_check(inode, eh, path->p_depth - depth)) { put_bh(bh); return -EIO; } @@ -1424,7 +1357,7 @@ static int ext4_ext_search_right(struct inode *inode, if (bh == NULL) return -EIO; eh = ext_block_hdr(bh); - if (ext4_ext_check_block(inode, eh, path->p_depth - depth, bh)) { + if (ext4_ext_check(inode, eh, path->p_depth - depth)) { put_bh(bh); return -EIO; } @@ -2711,8 +2644,8 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, err = -EIO; break; } - if (ext4_ext_check_block(inode, ext_block_hdr(bh), - depth - i - 1, bh)) { + if (ext4_ext_check(inode, ext_block_hdr(bh), + depth - i - 1)) { err = -EIO; break; } @@ -4789,8 +4722,8 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) /* Now release the pages */ if (last_page_offset > first_page_offset) { - truncate_pagecache_range(inode, first_page_offset, - last_page_offset - 1); + truncate_inode_pages_range(mapping, first_page_offset, + last_page_offset-1); } /* finish any pending end_io work */ diff --git a/trunk/fs/ext4/file.c b/trunk/fs/ext4/file.c index 8c7642a00054..cb70f1812a70 100644 --- a/trunk/fs/ext4/file.c +++ b/trunk/fs/ext4/file.c @@ -95,7 +95,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, { struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; int unaligned_aio = 0; - ssize_t ret; + int ret; /* * If we have encountered a bitmap-format file, the size limit diff --git a/trunk/fs/ext4/ialloc.c b/trunk/fs/ext4/ialloc.c index d48e8b14928c..9f9acac6c43f 100644 --- a/trunk/fs/ext4/ialloc.c +++ b/trunk/fs/ext4/ialloc.c @@ -70,27 +70,24 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb, ext4_group_t block_group, struct ext4_group_desc *gdp) { + struct ext4_sb_info *sbi = EXT4_SB(sb); + J_ASSERT_BH(bh, buffer_locked(bh)); /* If checksum is bad mark all blocks and inodes use to prevent * allocation, essentially implementing a per-group read-only flag. */ - if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { + if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { ext4_error(sb, "Checksum bad for group %u", block_group); ext4_free_group_clusters_set(sb, gdp, 0); ext4_free_inodes_set(sb, gdp, 0); ext4_itable_unused_set(sb, gdp, 0); memset(bh->b_data, 0xff, sb->s_blocksize); - ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh, - EXT4_INODES_PER_GROUP(sb) / 8); return 0; } memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, bh->b_data); - ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh, - EXT4_INODES_PER_GROUP(sb) / 8); - ext4_group_desc_csum_set(sb, block_group, gdp); return EXT4_INODES_PER_GROUP(sb); } @@ -131,12 +128,12 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) return NULL; } if (bitmap_uptodate(bh)) - goto verify; + return bh; lock_buffer(bh); if (bitmap_uptodate(bh)) { unlock_buffer(bh); - goto verify; + return bh; } ext4_lock_group(sb, block_group); @@ -144,7 +141,6 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) ext4_init_inode_bitmap(sb, bh, block_group, desc); set_bitmap_uptodate(bh); set_buffer_uptodate(bh); - set_buffer_verified(bh); ext4_unlock_group(sb, block_group); unlock_buffer(bh); return bh; @@ -158,7 +154,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) */ set_bitmap_uptodate(bh); unlock_buffer(bh); - goto verify; + return bh; } /* * submit the buffer_head for reading @@ -175,20 +171,6 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) block_group, bitmap_blk); return NULL; } - -verify: - ext4_lock_group(sb, block_group); - if (!buffer_verified(bh) && - !ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh, - EXT4_INODES_PER_GROUP(sb) / 8)) { - ext4_unlock_group(sb, block_group); - put_bh(bh); - ext4_error(sb, "Corrupt inode bitmap - block_group = %u, " - "inode_bitmap = %llu", block_group, bitmap_blk); - return NULL; - } - ext4_unlock_group(sb, block_group); - set_buffer_verified(bh); return bh; } @@ -294,9 +276,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) ext4_used_dirs_set(sb, gdp, count); percpu_counter_dec(&sbi->s_dirs_counter); } - ext4_inode_bitmap_csum_set(sb, block_group, gdp, bitmap_bh, - EXT4_INODES_PER_GROUP(sb) / 8); - ext4_group_desc_csum_set(sb, block_group, gdp); + gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); ext4_unlock_group(sb, block_group); percpu_counter_inc(&sbi->s_freeinodes_counter); @@ -508,12 +488,10 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, for (i = 0; i < ngroups; i++) { grp = (parent_group + i) % ngroups; desc = ext4_get_group_desc(sb, grp, NULL); - if (desc) { - grp_free = ext4_free_inodes_count(sb, desc); - if (grp_free && grp_free >= avefreei) { - *group = grp; - return 0; - } + grp_free = ext4_free_inodes_count(sb, desc); + if (desc && grp_free && grp_free >= avefreei) { + *group = grp; + return 0; } } @@ -731,7 +709,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode, got: /* We may have to initialize the block bitmap if it isn't already */ - if (ext4_has_group_desc_csum(sb) && + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { struct buffer_head *block_bitmap_bh; @@ -753,11 +731,8 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode, gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); ext4_free_group_clusters_set(sb, gdp, ext4_free_clusters_after_init(sb, group, gdp)); - ext4_block_bitmap_csum_set(sb, group, gdp, - block_bitmap_bh, - EXT4_BLOCKS_PER_GROUP(sb) / - 8); - ext4_group_desc_csum_set(sb, group, gdp); + gdp->bg_checksum = ext4_group_desc_csum(sbi, group, + gdp); } ext4_unlock_group(sb, group); @@ -776,7 +751,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode, goto fail; /* Update the relevant bg descriptor fields */ - if (ext4_has_group_desc_csum(sb)) { + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { int free; struct ext4_group_info *grp = ext4_get_group_info(sb, group); @@ -797,10 +772,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode, ext4_itable_unused_set(sb, gdp, (EXT4_INODES_PER_GROUP(sb) - ino)); up_read(&grp->alloc_sem); - } else { - ext4_lock_group(sb, group); } - ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1); if (S_ISDIR(mode)) { ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1); @@ -810,12 +782,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode, atomic_inc(&sbi->s_flex_groups[f].used_dirs); } } - if (ext4_has_group_desc_csum(sb)) { - ext4_inode_bitmap_csum_set(sb, group, gdp, inode_bitmap_bh, - EXT4_INODES_PER_GROUP(sb) / 8); - ext4_group_desc_csum_set(sb, group, gdp); + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { + gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); + ext4_unlock_group(sb, group); } - ext4_unlock_group(sb, group); BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata"); err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh); @@ -880,19 +850,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode, inode->i_generation = sbi->s_next_generation++; spin_unlock(&sbi->s_next_gen_lock); - /* Precompute checksum seed for inode metadata */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { - __u32 csum; - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - __le32 inum = cpu_to_le32(inode->i_ino); - __le32 gen = cpu_to_le32(inode->i_generation); - csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum, - sizeof(inum)); - ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen, - sizeof(gen)); - } - ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ ext4_set_inode_state(inode, EXT4_STATE_NEW); @@ -1183,7 +1140,7 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, skip_zeroout: ext4_lock_group(sb, group); gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED); - ext4_group_desc_csum_set(sb, group, gdp); + gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); ext4_unlock_group(sb, group); BUFFER_TRACE(group_desc_bh, diff --git a/trunk/fs/ext4/inode.c b/trunk/fs/ext4/inode.c index 02bc8cbe7281..07eaf565fdcb 100644 --- a/trunk/fs/ext4/inode.c +++ b/trunk/fs/ext4/inode.c @@ -47,73 +47,6 @@ #define MPAGE_DA_EXTENT_TAIL 0x01 -static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw, - struct ext4_inode_info *ei) -{ - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - __u16 csum_lo; - __u16 csum_hi = 0; - __u32 csum; - - csum_lo = raw->i_checksum_lo; - raw->i_checksum_lo = 0; - if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && - EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) { - csum_hi = raw->i_checksum_hi; - raw->i_checksum_hi = 0; - } - - csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw, - EXT4_INODE_SIZE(inode->i_sb)); - - raw->i_checksum_lo = csum_lo; - if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && - EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) - raw->i_checksum_hi = csum_hi; - - return csum; -} - -static int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw, - struct ext4_inode_info *ei) -{ - __u32 provided, calculated; - - if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != - cpu_to_le32(EXT4_OS_LINUX) || - !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - return 1; - - provided = le16_to_cpu(raw->i_checksum_lo); - calculated = ext4_inode_csum(inode, raw, ei); - if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && - EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) - provided |= ((__u32)le16_to_cpu(raw->i_checksum_hi)) << 16; - else - calculated &= 0xFFFF; - - return provided == calculated; -} - -static void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw, - struct ext4_inode_info *ei) -{ - __u32 csum; - - if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != - cpu_to_le32(EXT4_OS_LINUX) || - !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - return; - - csum = ext4_inode_csum(inode, raw, ei); - raw->i_checksum_lo = cpu_to_le16(csum & 0xFFFF); - if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && - EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) - raw->i_checksum_hi = cpu_to_le16(csum >> 16); -} - static inline int ext4_begin_ordered_truncate(struct inode *inode, loff_t new_size) { @@ -3584,7 +3517,8 @@ static int __ext4_get_inode_loc(struct inode *inode, b = table; end = b + EXT4_SB(sb)->s_inode_readahead_blks; num = EXT4_INODES_PER_GROUP(sb); - if (ext4_has_group_desc_csum(sb)) + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) num -= ext4_itable_unused_count(sb, gdp); table += num / inodes_per_block; if (end > table) @@ -3712,39 +3646,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) if (ret < 0) goto bad_inode; raw_inode = ext4_raw_inode(&iloc); - - if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { - ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); - if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > - EXT4_INODE_SIZE(inode->i_sb)) { - EXT4_ERROR_INODE(inode, "bad extra_isize (%u != %u)", - EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize, - EXT4_INODE_SIZE(inode->i_sb)); - ret = -EIO; - goto bad_inode; - } - } else - ei->i_extra_isize = 0; - - /* Precompute checksum seed for inode metadata */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - __u32 csum; - __le32 inum = cpu_to_le32(inode->i_ino); - __le32 gen = raw_inode->i_generation; - csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum, - sizeof(inum)); - ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen, - sizeof(gen)); - } - - if (!ext4_inode_csum_verify(inode, raw_inode, ei)) { - EXT4_ERROR_INODE(inode, "checksum invalid"); - ret = -EIO; - goto bad_inode; - } - inode->i_mode = le16_to_cpu(raw_inode->i_mode); i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); @@ -3824,6 +3725,12 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) } if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { + ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); + if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > + EXT4_INODE_SIZE(inode->i_sb)) { + ret = -EIO; + goto bad_inode; + } if (ei->i_extra_isize == 0) { /* The extra space is currently unused. Use it. */ ei->i_extra_isize = sizeof(struct ext4_inode) - @@ -3835,7 +3742,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) ext4_set_inode_state(inode, EXT4_STATE_XATTR); } - } + } else + ei->i_extra_isize = 0; EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode); EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode); @@ -4034,7 +3942,7 @@ static int ext4_do_update_inode(handle_t *handle, EXT4_SET_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_LARGE_FILE); ext4_handle_sync(handle); - err = ext4_handle_dirty_super_now(handle, sb); + err = ext4_handle_dirty_super(handle, sb); } } raw_inode->i_generation = cpu_to_le32(inode->i_generation); @@ -4061,8 +3969,6 @@ static int ext4_do_update_inode(handle_t *handle, raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); } - ext4_inode_csum_set(inode, raw_inode, ei); - BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); rc = ext4_handle_dirty_metadata(handle, NULL, bh); if (!err) @@ -4307,8 +4213,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, * will return the blocks that include the delayed allocation * blocks for this file. */ - delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb), - EXT4_I(inode)->i_reserved_data_blocks); + delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks; stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; return 0; diff --git a/trunk/fs/ext4/ioctl.c b/trunk/fs/ext4/ioctl.c index 8ad112ae0ade..6eee25591b81 100644 --- a/trunk/fs/ext4/ioctl.c +++ b/trunk/fs/ext4/ioctl.c @@ -38,7 +38,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) handle_t *handle = NULL; int err, migrate = 0; struct ext4_iloc iloc; - unsigned int oldflags, mask, i; + unsigned int oldflags; unsigned int jflag; if (!inode_owner_or_capable(inode)) @@ -115,14 +115,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (err) goto flags_err; - for (i = 0, mask = 1; i < 32; i++, mask <<= 1) { - if (!(mask & EXT4_FL_USER_MODIFIABLE)) - continue; - if (mask & flags) - ext4_set_inode_flag(inode, i); - else - ext4_clear_inode_flag(inode, i); - } + flags = flags & EXT4_FL_USER_MODIFIABLE; + flags |= oldflags & ~EXT4_FL_USER_MODIFIABLE; ei->i_flags = flags; ext4_set_inode_flags(inode); @@ -158,13 +152,6 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (!inode_owner_or_capable(inode)) return -EPERM; - if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { - ext4_warning(sb, "Setting inode version is not " - "supported with metadata_csum enabled."); - return -ENOTTY; - } - err = mnt_want_write_file(filp); if (err) return err; diff --git a/trunk/fs/ext4/mballoc.c b/trunk/fs/ext4/mballoc.c index 1cd6994fc446..99ab428bcfa0 100644 --- a/trunk/fs/ext4/mballoc.c +++ b/trunk/fs/ext4/mballoc.c @@ -788,7 +788,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) int first_block; struct super_block *sb; struct buffer_head *bhs; - struct buffer_head **bh = NULL; + struct buffer_head **bh; struct inode *inode; char *data; char *bitmap; @@ -2375,7 +2375,7 @@ static int ext4_groupinfo_create_slab(size_t size) return 0; } -int ext4_mb_init(struct super_block *sb) +int ext4_mb_init(struct super_block *sb, int needs_recovery) { struct ext4_sb_info *sbi = EXT4_SB(sb); unsigned i, j; @@ -2517,9 +2517,6 @@ int ext4_mb_release(struct super_block *sb) struct ext4_sb_info *sbi = EXT4_SB(sb); struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); - if (sbi->s_proc) - remove_proc_entry("mb_groups", sbi->s_proc); - if (sbi->s_group_info) { for (i = 0; i < ngroups; i++) { grinfo = ext4_get_group_info(sb, i); @@ -2567,6 +2564,8 @@ int ext4_mb_release(struct super_block *sb) } free_percpu(sbi->s_locality_groups); + if (sbi->s_proc) + remove_proc_entry("mb_groups", sbi->s_proc); return 0; } @@ -2798,9 +2797,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, } len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len; ext4_free_group_clusters_set(sb, gdp, len); - ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh, - EXT4_BLOCKS_PER_GROUP(sb) / 8); - ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp); + gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); ext4_unlock_group(sb, ac->ac_b_ex.fe_group); percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len); @@ -3074,9 +3071,13 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac) static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac) { struct ext4_prealloc_space *pa = ac->ac_pa; + int len; + + if (pa && pa->pa_type == MB_INODE_PA) { + len = ac->ac_b_ex.fe_len; + pa->pa_free += len; + } - if (pa && pa->pa_type == MB_INODE_PA) - pa->pa_free += ac->ac_b_ex.fe_len; } /* @@ -4635,7 +4636,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, */ new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); if (!new_entry) { - ext4_mb_unload_buddy(&e4b); err = -ENOMEM; goto error_return; } @@ -4659,9 +4659,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, ret = ext4_free_group_clusters(sb, gdp) + count_clusters; ext4_free_group_clusters_set(sb, gdp, ret); - ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh, - EXT4_BLOCKS_PER_GROUP(sb) / 8); - ext4_group_desc_csum_set(sb, block_group, gdp); + gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); ext4_unlock_group(sb, block_group); percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters); @@ -4805,9 +4803,7 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, mb_free_blocks(NULL, &e4b, bit, count); blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc); ext4_free_group_clusters_set(sb, desc, blk_free_count); - ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh, - EXT4_BLOCKS_PER_GROUP(sb) / 8); - ext4_group_desc_csum_set(sb, block_group, desc); + desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); ext4_unlock_group(sb, block_group); percpu_counter_add(&sbi->s_freeclusters_counter, EXT4_B2C(sbi, blocks_freed)); diff --git a/trunk/fs/ext4/mmp.c b/trunk/fs/ext4/mmp.c index f99a1311e847..ed6548d89165 100644 --- a/trunk/fs/ext4/mmp.c +++ b/trunk/fs/ext4/mmp.c @@ -6,45 +6,12 @@ #include "ext4.h" -/* Checksumming functions */ -static __u32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - int offset = offsetof(struct mmp_struct, mmp_checksum); - __u32 csum; - - csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset); - - return cpu_to_le32(csum); -} - -int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) -{ - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - return 1; - - return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp); -} - -void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) -{ - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - return; - - mmp->mmp_checksum = ext4_mmp_csum(sb, mmp); -} - /* * Write the MMP block using WRITE_SYNC to try to get the block on-disk * faster. */ -static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) +static int write_mmp_block(struct buffer_head *bh) { - struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data); - - ext4_mmp_csum_set(sb, mmp); mark_buffer_dirty(bh); lock_buffer(bh); bh->b_end_io = end_buffer_write_sync; @@ -92,8 +59,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, } mmp = (struct mmp_struct *)((*bh)->b_data); - if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC || - !ext4_mmp_csum_verify(sb, mmp)) + if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) return -EINVAL; return 0; @@ -154,7 +120,7 @@ static int kmmpd(void *data) mmp->mmp_time = cpu_to_le64(get_seconds()); last_update_time = jiffies; - retval = write_mmp_block(sb, bh); + retval = write_mmp_block(bh); /* * Don't spew too many error messages. Print one every * (s_mmp_update_interval * 60) seconds. @@ -234,7 +200,7 @@ static int kmmpd(void *data) mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN); mmp->mmp_time = cpu_to_le64(get_seconds()); - retval = write_mmp_block(sb, bh); + retval = write_mmp_block(bh); failed: kfree(data); @@ -333,7 +299,7 @@ int ext4_multi_mount_protect(struct super_block *sb, seq = mmp_new_seq(); mmp->mmp_seq = cpu_to_le32(seq); - retval = write_mmp_block(sb, bh); + retval = write_mmp_block(bh); if (retval) goto failed; diff --git a/trunk/fs/ext4/namei.c b/trunk/fs/ext4/namei.c index 5845cd97bf8b..e2a3f4b0ff78 100644 --- a/trunk/fs/ext4/namei.c +++ b/trunk/fs/ext4/namei.c @@ -145,14 +145,6 @@ struct dx_map_entry u16 size; }; -/* - * This goes at the end of each htree block. - */ -struct dx_tail { - u32 dt_reserved; - __le32 dt_checksum; /* crc32c(uuid+inum+dirblock) */ -}; - static inline ext4_lblk_t dx_get_block(struct dx_entry *entry); static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value); static inline unsigned dx_get_hash(struct dx_entry *entry); @@ -188,230 +180,6 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, struct inode *inode); -/* checksumming functions */ -#define EXT4_DIRENT_TAIL(block, blocksize) \ - ((struct ext4_dir_entry_tail *)(((void *)(block)) + \ - ((blocksize) - \ - sizeof(struct ext4_dir_entry_tail)))) - -static void initialize_dirent_tail(struct ext4_dir_entry_tail *t, - unsigned int blocksize) -{ - memset(t, 0, sizeof(struct ext4_dir_entry_tail)); - t->det_rec_len = ext4_rec_len_to_disk( - sizeof(struct ext4_dir_entry_tail), blocksize); - t->det_reserved_ft = EXT4_FT_DIR_CSUM; -} - -/* Walk through a dirent block to find a checksum "dirent" at the tail */ -static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode, - struct ext4_dir_entry *de) -{ - struct ext4_dir_entry_tail *t; - -#ifdef PARANOID - struct ext4_dir_entry *d, *top; - - d = de; - top = (struct ext4_dir_entry *)(((void *)de) + - (EXT4_BLOCK_SIZE(inode->i_sb) - - sizeof(struct ext4_dir_entry_tail))); - while (d < top && d->rec_len) - d = (struct ext4_dir_entry *)(((void *)d) + - le16_to_cpu(d->rec_len)); - - if (d != top) - return NULL; - - t = (struct ext4_dir_entry_tail *)d; -#else - t = EXT4_DIRENT_TAIL(de, EXT4_BLOCK_SIZE(inode->i_sb)); -#endif - - if (t->det_reserved_zero1 || - le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) || - t->det_reserved_zero2 || - t->det_reserved_ft != EXT4_FT_DIR_CSUM) - return NULL; - - return t; -} - -static __le32 ext4_dirent_csum(struct inode *inode, - struct ext4_dir_entry *dirent, int size) -{ - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - struct ext4_inode_info *ei = EXT4_I(inode); - __u32 csum; - - csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size); - return cpu_to_le32(csum); -} - -int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent) -{ - struct ext4_dir_entry_tail *t; - - if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - return 1; - - t = get_dirent_tail(inode, dirent); - if (!t) { - EXT4_ERROR_INODE(inode, "metadata_csum set but no space in dir " - "leaf for checksum. Please run e2fsck -D."); - return 0; - } - - if (t->det_checksum != ext4_dirent_csum(inode, dirent, - (void *)t - (void *)dirent)) - return 0; - - return 1; -} - -static void ext4_dirent_csum_set(struct inode *inode, - struct ext4_dir_entry *dirent) -{ - struct ext4_dir_entry_tail *t; - - if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - return; - - t = get_dirent_tail(inode, dirent); - if (!t) { - EXT4_ERROR_INODE(inode, "metadata_csum set but no space in dir " - "leaf for checksum. Please run e2fsck -D."); - return; - } - - t->det_checksum = ext4_dirent_csum(inode, dirent, - (void *)t - (void *)dirent); -} - -static inline int ext4_handle_dirty_dirent_node(handle_t *handle, - struct inode *inode, - struct buffer_head *bh) -{ - ext4_dirent_csum_set(inode, (struct ext4_dir_entry *)bh->b_data); - return ext4_handle_dirty_metadata(handle, inode, bh); -} - -static struct dx_countlimit *get_dx_countlimit(struct inode *inode, - struct ext4_dir_entry *dirent, - int *offset) -{ - struct ext4_dir_entry *dp; - struct dx_root_info *root; - int count_offset; - - if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb)) - count_offset = 8; - else if (le16_to_cpu(dirent->rec_len) == 12) { - dp = (struct ext4_dir_entry *)(((void *)dirent) + 12); - if (le16_to_cpu(dp->rec_len) != - EXT4_BLOCK_SIZE(inode->i_sb) - 12) - return NULL; - root = (struct dx_root_info *)(((void *)dp + 12)); - if (root->reserved_zero || - root->info_length != sizeof(struct dx_root_info)) - return NULL; - count_offset = 32; - } else - return NULL; - - if (offset) - *offset = count_offset; - return (struct dx_countlimit *)(((void *)dirent) + count_offset); -} - -static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent, - int count_offset, int count, struct dx_tail *t) -{ - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - struct ext4_inode_info *ei = EXT4_I(inode); - __u32 csum, old_csum; - int size; - - size = count_offset + (count * sizeof(struct dx_entry)); - old_csum = t->dt_checksum; - t->dt_checksum = 0; - csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size); - csum = ext4_chksum(sbi, csum, (__u8 *)t, sizeof(struct dx_tail)); - t->dt_checksum = old_csum; - - return cpu_to_le32(csum); -} - -static int ext4_dx_csum_verify(struct inode *inode, - struct ext4_dir_entry *dirent) -{ - struct dx_countlimit *c; - struct dx_tail *t; - int count_offset, limit, count; - - if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - return 1; - - c = get_dx_countlimit(inode, dirent, &count_offset); - if (!c) { - EXT4_ERROR_INODE(inode, "dir seems corrupt? Run e2fsck -D."); - return 1; - } - limit = le16_to_cpu(c->limit); - count = le16_to_cpu(c->count); - if (count_offset + (limit * sizeof(struct dx_entry)) > - EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) { - EXT4_ERROR_INODE(inode, "metadata_csum set but no space for " - "tree checksum found. Run e2fsck -D."); - return 1; - } - t = (struct dx_tail *)(((struct dx_entry *)c) + limit); - - if (t->dt_checksum != ext4_dx_csum(inode, dirent, count_offset, - count, t)) - return 0; - return 1; -} - -static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent) -{ - struct dx_countlimit *c; - struct dx_tail *t; - int count_offset, limit, count; - - if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - return; - - c = get_dx_countlimit(inode, dirent, &count_offset); - if (!c) { - EXT4_ERROR_INODE(inode, "dir seems corrupt? Run e2fsck -D."); - return; - } - limit = le16_to_cpu(c->limit); - count = le16_to_cpu(c->count); - if (count_offset + (limit * sizeof(struct dx_entry)) > - EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) { - EXT4_ERROR_INODE(inode, "metadata_csum set but no space for " - "tree checksum. Run e2fsck -D."); - return; - } - t = (struct dx_tail *)(((struct dx_entry *)c) + limit); - - t->dt_checksum = ext4_dx_csum(inode, dirent, count_offset, count, t); -} - -static inline int ext4_handle_dirty_dx_node(handle_t *handle, - struct inode *inode, - struct buffer_head *bh) -{ - ext4_dx_csum_set(inode, (struct ext4_dir_entry *)bh->b_data); - return ext4_handle_dirty_metadata(handle, inode, bh); -} - /* * p is at least 6 bytes before the end of page */ @@ -471,20 +239,12 @@ static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize) { unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - EXT4_DIR_REC_LEN(2) - infosize; - - if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - entry_space -= sizeof(struct dx_tail); return entry_space / sizeof(struct dx_entry); } static inline unsigned dx_node_limit(struct inode *dir) { unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); - - if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - entry_space -= sizeof(struct dx_tail); return entry_space / sizeof(struct dx_entry); } @@ -630,15 +390,6 @@ dx_probe(const struct qstr *d_name, struct inode *dir, goto fail; } - if (!buffer_verified(bh) && - !ext4_dx_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) { - ext4_warning(dir->i_sb, "Root failed checksum"); - brelse(bh); - *err = ERR_BAD_DX_DIR; - goto fail; - } - set_buffer_verified(bh); - entries = (struct dx_entry *) (((char *)&root->info) + root->info.info_length); @@ -699,17 +450,6 @@ dx_probe(const struct qstr *d_name, struct inode *dir, if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err))) goto fail2; at = entries = ((struct dx_node *) bh->b_data)->entries; - - if (!buffer_verified(bh) && - !ext4_dx_csum_verify(dir, - (struct ext4_dir_entry *)bh->b_data)) { - ext4_warning(dir->i_sb, "Node failed checksum"); - brelse(bh); - *err = ERR_BAD_DX_DIR; - goto fail; - } - set_buffer_verified(bh); - if (dx_get_limit(entries) != dx_node_limit (dir)) { ext4_warning(dir->i_sb, "dx entry: limit != node limit"); @@ -809,15 +549,6 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash, if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at), 0, &err))) return err; /* Failure */ - - if (!buffer_verified(bh) && - !ext4_dx_csum_verify(dir, - (struct ext4_dir_entry *)bh->b_data)) { - ext4_warning(dir->i_sb, "Node failed checksum"); - return -EIO; - } - set_buffer_verified(bh); - p++; brelse(p->bh); p->bh = bh; @@ -846,11 +577,6 @@ static int htree_dirblock_to_tree(struct file *dir_file, if (!(bh = ext4_bread (NULL, dir, block, 0, &err))) return err; - if (!buffer_verified(bh) && - !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) - return -EIO; - set_buffer_verified(bh); - de = (struct ext4_dir_entry_2 *) bh->b_data; top = (struct ext4_dir_entry_2 *) ((char *) de + dir->i_sb->s_blocksize - @@ -1210,15 +936,6 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, brelse(bh); goto next; } - if (!buffer_verified(bh) && - !ext4_dirent_csum_verify(dir, - (struct ext4_dir_entry *)bh->b_data)) { - EXT4_ERROR_INODE(dir, "checksumming directory " - "block %lu", (unsigned long)block); - brelse(bh); - goto next; - } - set_buffer_verified(bh); i = search_dirblock(bh, dir, d_name, block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); if (i == 1) { @@ -1270,16 +987,6 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q if (!(bh = ext4_bread(NULL, dir, block, 0, err))) goto errout; - if (!buffer_verified(bh) && - !ext4_dirent_csum_verify(dir, - (struct ext4_dir_entry *)bh->b_data)) { - EXT4_ERROR_INODE(dir, "checksumming directory " - "block %lu", (unsigned long)block); - brelse(bh); - *err = -EIO; - goto errout; - } - set_buffer_verified(bh); retval = search_dirblock(bh, dir, d_name, block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); @@ -1330,12 +1037,6 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru EXT4_ERROR_INODE(dir, "bad inode number: %u", ino); return ERR_PTR(-EIO); } - if (unlikely(ino == dir->i_ino)) { - EXT4_ERROR_INODE(dir, "'%.*s' linked to parent dir", - dentry->d_name.len, - dentry->d_name.name); - return ERR_PTR(-EIO); - } inode = ext4_iget(dir->i_sb, ino); if (inode == ERR_PTR(-ESTALE)) { EXT4_ERROR_INODE(dir, @@ -1455,14 +1156,8 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, char *data1 = (*bh)->b_data, *data2; unsigned split, move, size; struct ext4_dir_entry_2 *de = NULL, *de2; - struct ext4_dir_entry_tail *t; - int csum_size = 0; int err = 0, i; - if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - csum_size = sizeof(struct ext4_dir_entry_tail); - bh2 = ext4_append (handle, dir, &newblock, &err); if (!(bh2)) { brelse(*bh); @@ -1509,20 +1204,10 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, /* Fancy dance to stay within two buffers */ de2 = dx_move_dirents(data1, data2, map + split, count - split, blocksize); de = dx_pack_dirents(data1, blocksize); - de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) - - (char *) de, + de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de, blocksize); - de2->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) - - (char *) de2, + de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2, blocksize); - if (csum_size) { - t = EXT4_DIRENT_TAIL(data2, blocksize); - initialize_dirent_tail(t, blocksize); - - t = EXT4_DIRENT_TAIL(data1, blocksize); - initialize_dirent_tail(t, blocksize); - } - dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1)); @@ -1533,10 +1218,10 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, de = de2; } dx_insert_block(frame, hash2 + continued, newblock); - err = ext4_handle_dirty_dirent_node(handle, dir, bh2); + err = ext4_handle_dirty_metadata(handle, dir, bh2); if (err) goto journal_error; - err = ext4_handle_dirty_dx_node(handle, dir, frame->bh); + err = ext4_handle_dirty_metadata(handle, dir, frame->bh); if (err) goto journal_error; brelse(bh2); @@ -1573,16 +1258,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, unsigned short reclen; int nlen, rlen, err; char *top; - int csum_size = 0; - - if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - csum_size = sizeof(struct ext4_dir_entry_tail); reclen = EXT4_DIR_REC_LEN(namelen); if (!de) { de = (struct ext4_dir_entry_2 *)bh->b_data; - top = bh->b_data + (blocksize - csum_size) - reclen; + top = bh->b_data + blocksize - reclen; while ((char *) de <= top) { if (ext4_check_dir_entry(dir, NULL, de, bh, offset)) return -EIO; @@ -1615,8 +1295,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, de = de1; } de->file_type = EXT4_FT_UNKNOWN; - de->inode = cpu_to_le32(inode->i_ino); - ext4_set_de_type(dir->i_sb, de, inode->i_mode); + if (inode) { + de->inode = cpu_to_le32(inode->i_ino); + ext4_set_de_type(dir->i_sb, de, inode->i_mode); + } else + de->inode = 0; de->name_len = namelen; memcpy(de->name, name, namelen); /* @@ -1635,7 +1318,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, dir->i_version++; ext4_mark_inode_dirty(handle, dir); BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_dirent_node(handle, dir, bh); + err = ext4_handle_dirty_metadata(handle, dir, bh); if (err) ext4_std_error(dir->i_sb, err); return 0; @@ -1656,7 +1339,6 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, struct dx_frame frames[2], *frame; struct dx_entry *entries; struct ext4_dir_entry_2 *de, *de2; - struct ext4_dir_entry_tail *t; char *data1, *top; unsigned len; int retval; @@ -1664,11 +1346,6 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, struct dx_hash_info hinfo; ext4_lblk_t block; struct fake_dirent *fde; - int csum_size = 0; - - if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - csum_size = sizeof(struct ext4_dir_entry_tail); blocksize = dir->i_sb->s_blocksize; dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino)); @@ -1689,7 +1366,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, brelse(bh); return -EIO; } - len = ((char *) root) + (blocksize - csum_size) - (char *) de; + len = ((char *) root) + blocksize - (char *) de; /* Allocate new block for the 0th block's dirents */ bh2 = ext4_append(handle, dir, &block, &retval); @@ -1705,15 +1382,8 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, top = data1 + len; while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) de = de2; - de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) - - (char *) de, + de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de, blocksize); - - if (csum_size) { - t = EXT4_DIRENT_TAIL(data1, blocksize); - initialize_dirent_tail(t, blocksize); - } - /* Initialize the root; the dot dirents already exist */ de = (struct ext4_dir_entry_2 *) (&root->dotdot); de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2), @@ -1738,8 +1408,8 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, frame->bh = bh; bh = bh2; - ext4_handle_dirty_dx_node(handle, dir, frame->bh); - ext4_handle_dirty_dirent_node(handle, dir, bh); + ext4_handle_dirty_metadata(handle, dir, frame->bh); + ext4_handle_dirty_metadata(handle, dir, bh); de = do_split(handle,dir, &bh, frame, &hinfo, &retval); if (!de) { @@ -1775,17 +1445,11 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, struct inode *dir = dentry->d_parent->d_inode; struct buffer_head *bh; struct ext4_dir_entry_2 *de; - struct ext4_dir_entry_tail *t; struct super_block *sb; int retval; int dx_fallback=0; unsigned blocksize; ext4_lblk_t block, blocks; - int csum_size = 0; - - if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - csum_size = sizeof(struct ext4_dir_entry_tail); sb = dir->i_sb; blocksize = sb->s_blocksize; @@ -1804,11 +1468,6 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, bh = ext4_bread(handle, dir, block, 0, &retval); if(!bh) return retval; - if (!buffer_verified(bh) && - !ext4_dirent_csum_verify(dir, - (struct ext4_dir_entry *)bh->b_data)) - return -EIO; - set_buffer_verified(bh); retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); if (retval != -ENOSPC) { brelse(bh); @@ -1825,13 +1484,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, return retval; de = (struct ext4_dir_entry_2 *) bh->b_data; de->inode = 0; - de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize); - - if (csum_size) { - t = EXT4_DIRENT_TAIL(bh->b_data, blocksize); - initialize_dirent_tail(t, blocksize); - } - + de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); retval = add_dirent_to_buf(handle, dentry, inode, de, bh); brelse(bh); if (retval == 0) @@ -1863,11 +1516,6 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, if (!(bh = ext4_bread(handle,dir, dx_get_block(frame->at), 0, &err))) goto cleanup; - if (!buffer_verified(bh) && - !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) - goto journal_error; - set_buffer_verified(bh); - BUFFER_TRACE(bh, "get_write_access"); err = ext4_journal_get_write_access(handle, bh); if (err) @@ -1935,7 +1583,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, dxtrace(dx_show_index("node", frames[1].entries)); dxtrace(dx_show_index("node", ((struct dx_node *) bh2->b_data)->entries)); - err = ext4_handle_dirty_dx_node(handle, dir, bh2); + err = ext4_handle_dirty_metadata(handle, dir, bh2); if (err) goto journal_error; brelse (bh2); @@ -1961,7 +1609,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, if (err) goto journal_error; } - err = ext4_handle_dirty_dx_node(handle, dir, frames[0].bh); + err = ext4_handle_dirty_metadata(handle, dir, frames[0].bh); if (err) { ext4_std_error(inode->i_sb, err); goto cleanup; @@ -1993,17 +1641,12 @@ static int ext4_delete_entry(handle_t *handle, { struct ext4_dir_entry_2 *de, *pde; unsigned int blocksize = dir->i_sb->s_blocksize; - int csum_size = 0; int i, err; - if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - csum_size = sizeof(struct ext4_dir_entry_tail); - i = 0; pde = NULL; de = (struct ext4_dir_entry_2 *) bh->b_data; - while (i < bh->b_size - csum_size) { + while (i < bh->b_size) { if (ext4_check_dir_entry(dir, NULL, de, bh, i)) return -EIO; if (de == de_del) { @@ -2024,7 +1667,7 @@ static int ext4_delete_entry(handle_t *handle, de->inode = 0; dir->i_version++; BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_dirent_node(handle, dir, bh); + err = ext4_handle_dirty_metadata(handle, dir, bh); if (unlikely(err)) { ext4_std_error(dir->i_sb, err); return err; @@ -2166,15 +1809,9 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct inode *inode; struct buffer_head *dir_block = NULL; struct ext4_dir_entry_2 *de; - struct ext4_dir_entry_tail *t; unsigned int blocksize = dir->i_sb->s_blocksize; - int csum_size = 0; int err, retries = 0; - if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - csum_size = sizeof(struct ext4_dir_entry_tail); - if (EXT4_DIR_LINK_MAX(dir)) return -EMLINK; @@ -2215,24 +1852,16 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) ext4_set_de_type(dir->i_sb, de, S_IFDIR); de = ext4_next_entry(de, blocksize); de->inode = cpu_to_le32(dir->i_ino); - de->rec_len = ext4_rec_len_to_disk(blocksize - - (csum_size + EXT4_DIR_REC_LEN(1)), + de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(1), blocksize); de->name_len = 2; strcpy(de->name, ".."); ext4_set_de_type(dir->i_sb, de, S_IFDIR); set_nlink(inode, 2); - - if (csum_size) { - t = EXT4_DIRENT_TAIL(dir_block->b_data, blocksize); - initialize_dirent_tail(t, blocksize); - } - BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_dirent_node(handle, inode, dir_block); + err = ext4_handle_dirty_metadata(handle, inode, dir_block); if (err) goto out_clear_inode; - set_buffer_verified(dir_block); err = ext4_mark_inode_dirty(handle, inode); if (!err) err = ext4_add_entry(handle, dentry, inode); @@ -2282,14 +1911,6 @@ static int empty_dir(struct inode *inode) inode->i_ino); return 1; } - if (!buffer_verified(bh) && - !ext4_dirent_csum_verify(inode, - (struct ext4_dir_entry *)bh->b_data)) { - EXT4_ERROR_INODE(inode, "checksum error reading directory " - "lblock 0"); - return -EIO; - } - set_buffer_verified(bh); de = (struct ext4_dir_entry_2 *) bh->b_data; de1 = ext4_next_entry(de, sb->s_blocksize); if (le32_to_cpu(de->inode) != inode->i_ino || @@ -2321,14 +1942,6 @@ static int empty_dir(struct inode *inode) offset += sb->s_blocksize; continue; } - if (!buffer_verified(bh) && - !ext4_dirent_csum_verify(inode, - (struct ext4_dir_entry *)bh->b_data)) { - EXT4_ERROR_INODE(inode, "checksum error " - "reading directory lblock 0"); - return -EIO; - } - set_buffer_verified(bh); de = (struct ext4_dir_entry_2 *) bh->b_data; } if (ext4_check_dir_entry(inode, NULL, de, bh, offset)) { @@ -2397,7 +2010,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) /* Insert this inode at the head of the on-disk orphan list... */ NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan); EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); - err = ext4_handle_dirty_super_now(handle, sb); + err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); rc = ext4_mark_iloc_dirty(handle, inode, &iloc); if (!err) err = rc; @@ -2470,7 +2083,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) if (err) goto out_brelse; sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); - err = ext4_handle_dirty_super_now(handle, inode->i_sb); + err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); } else { struct ext4_iloc iloc2; struct inode *i_prev = @@ -2829,11 +2442,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval); if (!dir_bh) goto end_rename; - if (!buffer_verified(dir_bh) && - !ext4_dirent_csum_verify(old_inode, - (struct ext4_dir_entry *)dir_bh->b_data)) - goto end_rename; - set_buffer_verified(dir_bh); if (le32_to_cpu(PARENT_INO(dir_bh->b_data, old_dir->i_sb->s_blocksize)) != old_dir->i_ino) goto end_rename; @@ -2864,7 +2472,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, ext4_current_time(new_dir); ext4_mark_inode_dirty(handle, new_dir); BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata"); - retval = ext4_handle_dirty_dirent_node(handle, new_dir, new_bh); + retval = ext4_handle_dirty_metadata(handle, new_dir, new_bh); if (unlikely(retval)) { ext4_std_error(new_dir->i_sb, retval); goto end_rename; @@ -2918,8 +2526,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = cpu_to_le32(new_dir->i_ino); BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); - retval = ext4_handle_dirty_dirent_node(handle, old_inode, - dir_bh); + retval = ext4_handle_dirty_metadata(handle, old_inode, dir_bh); if (retval) { ext4_std_error(old_dir->i_sb, retval); goto end_rename; diff --git a/trunk/fs/ext4/resize.c b/trunk/fs/ext4/resize.c index 7ea6cbb44121..59fa0be27251 100644 --- a/trunk/fs/ext4/resize.c +++ b/trunk/fs/ext4/resize.c @@ -161,8 +161,6 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size) if (flex_gd == NULL) goto out3; - if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_flex_group_data)) - goto out2; flex_gd->count = flexbg_size; flex_gd->groups = kmalloc(sizeof(struct ext4_new_group_data) * @@ -798,7 +796,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, ext4_kvfree(o_group_desc); le16_add_cpu(&es->s_reserved_gdt_blocks, -1); - err = ext4_handle_dirty_super_now(handle, sb); + err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); if (err) ext4_std_error(sb, err); @@ -970,8 +968,6 @@ static void update_backups(struct super_block *sb, goto exit_err; } - ext4_superblock_csum_set(sb, (struct ext4_super_block *)data); - while ((group = ext4_list_backups(sb, &three, &five, &seven)) < last) { struct buffer_head *bh; @@ -1071,54 +1067,6 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, return err; } -static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block) -{ - struct buffer_head *bh = sb_getblk(sb, block); - if (!bh) - return NULL; - - if (bitmap_uptodate(bh)) - return bh; - - lock_buffer(bh); - if (bh_submit_read(bh) < 0) { - unlock_buffer(bh); - brelse(bh); - return NULL; - } - unlock_buffer(bh); - - return bh; -} - -static int ext4_set_bitmap_checksums(struct super_block *sb, - ext4_group_t group, - struct ext4_group_desc *gdp, - struct ext4_new_group_data *group_data) -{ - struct buffer_head *bh; - - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - return 0; - - bh = ext4_get_bitmap(sb, group_data->inode_bitmap); - if (!bh) - return -EIO; - ext4_inode_bitmap_csum_set(sb, group, gdp, bh, - EXT4_INODES_PER_GROUP(sb) / 8); - brelse(bh); - - bh = ext4_get_bitmap(sb, group_data->block_bitmap); - if (!bh) - return -EIO; - ext4_block_bitmap_csum_set(sb, group, gdp, bh, - EXT4_BLOCKS_PER_GROUP(sb) / 8); - brelse(bh); - - return 0; -} - /* * ext4_setup_new_descs() will set up the group descriptor descriptors of a flex bg */ @@ -1145,24 +1093,18 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, */ gdb_bh = sbi->s_group_desc[gdb_num]; /* Update group descriptor block for new group */ - gdp = (struct ext4_group_desc *)(gdb_bh->b_data + + gdp = (struct ext4_group_desc *)((char *)gdb_bh->b_data + gdb_off * EXT4_DESC_SIZE(sb)); memset(gdp, 0, EXT4_DESC_SIZE(sb)); ext4_block_bitmap_set(sb, gdp, group_data->block_bitmap); ext4_inode_bitmap_set(sb, gdp, group_data->inode_bitmap); - err = ext4_set_bitmap_checksums(sb, group, gdp, group_data); - if (err) { - ext4_std_error(sb, err); - break; - } - ext4_inode_table_set(sb, gdp, group_data->inode_table); ext4_free_group_clusters_set(sb, gdp, EXT4_B2C(sbi, group_data->free_blocks_count)); ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); gdp->bg_flags = cpu_to_le16(*bg_flags); - ext4_group_desc_csum_set(sb, group, gdp); + gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); if (unlikely(err)) { @@ -1401,14 +1343,17 @@ static int ext4_setup_next_flex_gd(struct super_block *sb, (1 + ext4_bg_num_gdb(sb, group + i) + le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; group_data[i].free_blocks_count = blocks_per_group - overhead; - if (ext4_has_group_desc_csum(sb)) + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT | EXT4_BG_INODE_UNINIT; else flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED; } - if (last_group == n_group && ext4_has_group_desc_csum(sb)) + if (last_group == n_group && + EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) /* We need to initialize block bitmap of last group. */ flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT; diff --git a/trunk/fs/ext4/super.c b/trunk/fs/ext4/super.c index eb7aa3e4ef05..35b5954489ee 100644 --- a/trunk/fs/ext4/super.c +++ b/trunk/fs/ext4/super.c @@ -112,48 +112,6 @@ static struct file_system_type ext3_fs_type = { #define IS_EXT3_SB(sb) (0) #endif -static int ext4_verify_csum_type(struct super_block *sb, - struct ext4_super_block *es) -{ - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - return 1; - - return es->s_checksum_type == EXT4_CRC32C_CHKSUM; -} - -static __le32 ext4_superblock_csum(struct super_block *sb, - struct ext4_super_block *es) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - int offset = offsetof(struct ext4_super_block, s_checksum); - __u32 csum; - - csum = ext4_chksum(sbi, ~0, (char *)es, offset); - - return cpu_to_le32(csum); -} - -int ext4_superblock_csum_verify(struct super_block *sb, - struct ext4_super_block *es) -{ - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - return 1; - - return es->s_checksum == ext4_superblock_csum(sb, es); -} - -void ext4_superblock_csum_set(struct super_block *sb, - struct ext4_super_block *es) -{ - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - return; - - es->s_checksum = ext4_superblock_csum(sb, es); -} - void *ext4_kvmalloc(size_t size, gfp_t flags) { void *ret; @@ -539,7 +497,6 @@ void __ext4_error(struct super_block *sb, const char *function, printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n", sb->s_id, function, line, current->comm, &vaf); va_end(args); - save_error_info(sb, function, line); ext4_handle_error(sb); } @@ -948,8 +905,6 @@ static void ext4_put_super(struct super_block *sb) unlock_super(sb); kobject_put(&sbi->s_kobj); wait_for_completion(&sbi->s_kobj_unregister); - if (sbi->s_chksum_driver) - crypto_free_shash(sbi->s_chksum_driver); kfree(sbi->s_blockgroup_lock); kfree(sbi); } @@ -1967,69 +1922,43 @@ static int ext4_fill_flex_info(struct super_block *sb) return 0; } -static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, - struct ext4_group_desc *gdp) +__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, + struct ext4_group_desc *gdp) { - int offset; __u16 crc = 0; - __le32 le_group = cpu_to_le32(block_group); - if ((sbi->s_es->s_feature_ro_compat & - cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))) { - /* Use new metadata_csum algorithm */ - __u16 old_csum; - __u32 csum32; - - old_csum = gdp->bg_checksum; - gdp->bg_checksum = 0; - csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group, - sizeof(le_group)); - csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, - sbi->s_desc_size); - gdp->bg_checksum = old_csum; - - crc = csum32 & 0xFFFF; - goto out; - } + if (sbi->s_es->s_feature_ro_compat & + cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { + int offset = offsetof(struct ext4_group_desc, bg_checksum); + __le32 le_group = cpu_to_le32(block_group); - /* old crc16 code */ - offset = offsetof(struct ext4_group_desc, bg_checksum); - - crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); - crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); - crc = crc16(crc, (__u8 *)gdp, offset); - offset += sizeof(gdp->bg_checksum); /* skip checksum */ - /* for checksum of struct ext4_group_desc do the rest...*/ - if ((sbi->s_es->s_feature_incompat & - cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && - offset < le16_to_cpu(sbi->s_es->s_desc_size)) - crc = crc16(crc, (__u8 *)gdp + offset, - le16_to_cpu(sbi->s_es->s_desc_size) - - offset); + crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); + crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); + crc = crc16(crc, (__u8 *)gdp, offset); + offset += sizeof(gdp->bg_checksum); /* skip checksum */ + /* for checksum of struct ext4_group_desc do the rest...*/ + if ((sbi->s_es->s_feature_incompat & + cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && + offset < le16_to_cpu(sbi->s_es->s_desc_size)) + crc = crc16(crc, (__u8 *)gdp + offset, + le16_to_cpu(sbi->s_es->s_desc_size) - + offset); + } -out: return cpu_to_le16(crc); } -int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group, +int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group, struct ext4_group_desc *gdp) { - if (ext4_has_group_desc_csum(sb) && - (gdp->bg_checksum != ext4_group_desc_csum(EXT4_SB(sb), - block_group, gdp))) + if ((sbi->s_es->s_feature_ro_compat & + cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) && + (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp))) return 0; return 1; } -void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group, - struct ext4_group_desc *gdp) -{ - if (!ext4_has_group_desc_csum(sb)) - return; - gdp->bg_checksum = ext4_group_desc_csum(EXT4_SB(sb), block_group, gdp); -} - /* Called at mount-time, super-block is locked */ static int ext4_check_descriptors(struct super_block *sb, ext4_group_t *first_not_zeroed) @@ -2084,7 +2013,7 @@ static int ext4_check_descriptors(struct super_block *sb, return 0; } ext4_lock_group(sb, i); - if (!ext4_group_desc_csum_verify(sb, i, gdp)) { + if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Checksum for group %u failed (%u!=%u)", i, le16_to_cpu(ext4_group_desc_csum(sbi, i, @@ -2488,23 +2417,6 @@ static ssize_t sbi_ui_store(struct ext4_attr *a, return count; } -static ssize_t trigger_test_error(struct ext4_attr *a, - struct ext4_sb_info *sbi, - const char *buf, size_t count) -{ - int len = count; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (len && buf[len-1] == '\n') - len--; - - if (len) - ext4_error(sbi->s_sb, "%.*s", len, buf); - return count; -} - #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ static struct ext4_attr ext4_attr_##_name = { \ .attr = {.name = __stringify(_name), .mode = _mode }, \ @@ -2535,7 +2447,6 @@ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); -EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); static struct attribute *ext4_attrs[] = { ATTR_LIST(delayed_allocation_blocks), @@ -2550,7 +2461,6 @@ static struct attribute *ext4_attrs[] = { ATTR_LIST(mb_stream_req), ATTR_LIST(mb_group_prealloc), ATTR_LIST(max_writeback_mb_bump), - ATTR_LIST(trigger_fs_error), NULL, }; @@ -3047,44 +2957,6 @@ static void ext4_destroy_lazyinit_thread(void) kthread_stop(ext4_lazyinit_task); } -static int set_journal_csum_feature_set(struct super_block *sb) -{ - int ret = 1; - int compat, incompat; - struct ext4_sb_info *sbi = EXT4_SB(sb); - - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { - /* journal checksum v2 */ - compat = 0; - incompat = JBD2_FEATURE_INCOMPAT_CSUM_V2; - } else { - /* journal checksum v1 */ - compat = JBD2_FEATURE_COMPAT_CHECKSUM; - incompat = 0; - } - - if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { - ret = jbd2_journal_set_features(sbi->s_journal, - compat, 0, - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | - incompat); - } else if (test_opt(sb, JOURNAL_CHECKSUM)) { - ret = jbd2_journal_set_features(sbi->s_journal, - compat, 0, - incompat); - jbd2_journal_clear_features(sbi->s_journal, 0, 0, - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); - } else { - jbd2_journal_clear_features(sbi->s_journal, - JBD2_FEATURE_COMPAT_CHECKSUM, 0, - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | - JBD2_FEATURE_INCOMPAT_CSUM_V2); - } - - return ret; -} - static int ext4_fill_super(struct super_block *sb, void *data, int silent) { char *orig_data = kstrdup(data, GFP_KERNEL); @@ -3121,7 +2993,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto out_free_orig; } sb->s_fs_info = sbi; - sbi->s_sb = sb; sbi->s_mount_opt = 0; sbi->s_resuid = make_kuid(&init_user_ns, EXT4_DEF_RESUID); sbi->s_resgid = make_kgid(&init_user_ns, EXT4_DEF_RESGID); @@ -3161,54 +3032,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * Note: s_es must be initialized as soon as possible because * some ext4 macro-instructions depend on its value */ - es = (struct ext4_super_block *) (bh->b_data + offset); + es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); sbi->s_es = es; sb->s_magic = le16_to_cpu(es->s_magic); if (sb->s_magic != EXT4_SUPER_MAGIC) goto cantfind_ext4; sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); - /* Warn if metadata_csum and gdt_csum are both set. */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && - EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) - ext4_warning(sb, KERN_INFO "metadata_csum and uninit_bg are " - "redundant flags; please run fsck."); - - /* Check for a known checksum algorithm */ - if (!ext4_verify_csum_type(sb, es)) { - ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " - "unknown checksum algorithm."); - silent = 1; - goto cantfind_ext4; - } - - /* Load the checksum driver */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { - sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); - if (IS_ERR(sbi->s_chksum_driver)) { - ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); - ret = PTR_ERR(sbi->s_chksum_driver); - sbi->s_chksum_driver = NULL; - goto failed_mount; - } - } - - /* Check superblock checksum */ - if (!ext4_superblock_csum_verify(sb, es)) { - ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " - "invalid superblock checksum. Run e2fsck?"); - silent = 1; - goto cantfind_ext4; - } - - /* Precompute checksum seed for all metadata */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, - sizeof(es->s_uuid)); - /* Set defaults before we parse the mount options */ def_mount_opts = le32_to_cpu(es->s_default_mount_opts); set_opt(sb, INIT_INODE_TABLE); @@ -3370,7 +3200,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "Can't read superblock on 2nd try"); goto failed_mount; } - es = (struct ext4_super_block *)(bh->b_data + offset); + es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); sbi->s_es = es; if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { ext4_msg(sb, KERN_ERR, @@ -3562,7 +3392,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) GFP_KERNEL); if (sbi->s_group_desc == NULL) { ext4_msg(sb, KERN_ERR, "not enough memory"); - ret = -ENOMEM; goto failed_mount; } @@ -3620,7 +3449,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } if (err) { ext4_msg(sb, KERN_ERR, "insufficient memory"); - ret = err; goto failed_mount3; } @@ -3678,17 +3506,26 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto no_journal; } - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT) && + if (ext4_blocks_count(es) > 0xffffffffULL && !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_64BIT)) { ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); goto failed_mount_wq; } - if (!set_journal_csum_feature_set(sb)) { - ext4_msg(sb, KERN_ERR, "Failed to set journal checksum " - "feature set"); - goto failed_mount_wq; + if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { + jbd2_journal_set_features(sbi->s_journal, + JBD2_FEATURE_COMPAT_CHECKSUM, 0, + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); + } else if (test_opt(sb, JOURNAL_CHECKSUM)) { + jbd2_journal_set_features(sbi->s_journal, + JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); + jbd2_journal_clear_features(sbi->s_journal, 0, 0, + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); + } else { + jbd2_journal_clear_features(sbi->s_journal, + JBD2_FEATURE_COMPAT_CHECKSUM, 0, + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); } /* We have now updated the journal if required, so we can @@ -3769,8 +3606,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount4; } - if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY)) - sb->s_flags |= MS_RDONLY; + ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY); /* determine the minimum size of new large inodes, if present */ if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { @@ -3805,7 +3641,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } ext4_ext_init(sb); - err = ext4_mb_init(sb); + err = ext4_mb_init(sb, needs_recovery); if (err) { ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)", err); @@ -3888,8 +3724,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) brelse(sbi->s_group_desc[i]); ext4_kvfree(sbi->s_group_desc); failed_mount: - if (sbi->s_chksum_driver) - crypto_free_shash(sbi->s_chksum_driver); if (sbi->s_proc) { remove_proc_entry("options", sbi->s_proc); remove_proc_entry(sb->s_id, ext4_proc_root); @@ -4013,7 +3847,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, goto out_bdev; } - es = (struct ext4_super_block *) (bh->b_data + offset); + es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || !(le32_to_cpu(es->s_feature_incompat) & EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { @@ -4205,7 +4039,6 @@ static int ext4_commit_super(struct super_block *sb, int sync) &EXT4_SB(sb)->s_freeinodes_counter)); sb->s_dirt = 0; BUFFER_TRACE(sbh, "marking dirty"); - ext4_superblock_csum_set(sb, es); mark_buffer_dirty(sbh); if (sync) { error = sync_dirty_buffer(sbh); @@ -4500,7 +4333,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) struct ext4_group_desc *gdp = ext4_get_group_desc(sb, g, NULL); - if (!ext4_group_desc_csum_verify(sb, g, gdp)) { + if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { ext4_msg(sb, KERN_ERR, "ext4_remount: Checksum for group %u failed (%u!=%u)", g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), diff --git a/trunk/fs/ext4/xattr.c b/trunk/fs/ext4/xattr.c index e56c9ed7d6e3..e88748e55c0f 100644 --- a/trunk/fs/ext4/xattr.c +++ b/trunk/fs/ext4/xattr.c @@ -122,58 +122,6 @@ const struct xattr_handler *ext4_xattr_handlers[] = { NULL }; -static __le32 ext4_xattr_block_csum(struct inode *inode, - sector_t block_nr, - struct ext4_xattr_header *hdr) -{ - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - struct ext4_inode_info *ei = EXT4_I(inode); - __u32 csum, old; - - old = hdr->h_checksum; - hdr->h_checksum = 0; - if (le32_to_cpu(hdr->h_refcount) != 1) { - block_nr = cpu_to_le64(block_nr); - csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&block_nr, - sizeof(block_nr)); - } else - csum = ei->i_csum_seed; - csum = ext4_chksum(sbi, csum, (__u8 *)hdr, - EXT4_BLOCK_SIZE(inode->i_sb)); - hdr->h_checksum = old; - return cpu_to_le32(csum); -} - -static int ext4_xattr_block_csum_verify(struct inode *inode, - sector_t block_nr, - struct ext4_xattr_header *hdr) -{ - if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && - (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr))) - return 0; - return 1; -} - -static void ext4_xattr_block_csum_set(struct inode *inode, - sector_t block_nr, - struct ext4_xattr_header *hdr) -{ - if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) - return; - - hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr); -} - -static inline int ext4_handle_dirty_xattr_block(handle_t *handle, - struct inode *inode, - struct buffer_head *bh) -{ - ext4_xattr_block_csum_set(inode, bh->b_blocknr, BHDR(bh)); - return ext4_handle_dirty_metadata(handle, inode, bh); -} - static inline const struct xattr_handler * ext4_xattr_handler(int name_index) { @@ -208,22 +156,12 @@ ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end) } static inline int -ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh) +ext4_xattr_check_block(struct buffer_head *bh) { - int error; - - if (buffer_verified(bh)) - return 0; - if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || BHDR(bh)->h_blocks != cpu_to_le32(1)) return -EIO; - if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh))) - return -EIO; - error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); - if (!error) - set_buffer_verified(bh); - return error; + return ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); } static inline int @@ -286,7 +224,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, goto cleanup; ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); - if (ext4_xattr_check_block(inode, bh)) { + if (ext4_xattr_check_block(bh)) { bad_block: EXT4_ERROR_INODE(inode, "bad block %llu", EXT4_I(inode)->i_file_acl); @@ -431,7 +369,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) goto cleanup; ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); - if (ext4_xattr_check_block(inode, bh)) { + if (ext4_xattr_check_block(bh)) { EXT4_ERROR_INODE(inode, "bad block %llu", EXT4_I(inode)->i_file_acl); error = -EIO; @@ -554,7 +492,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, if (ce) mb_cache_entry_release(ce); unlock_buffer(bh); - error = ext4_handle_dirty_xattr_block(handle, inode, bh); + error = ext4_handle_dirty_metadata(handle, inode, bh); if (IS_SYNC(inode)) ext4_handle_sync(handle); dquot_free_block(inode, 1); @@ -724,7 +662,7 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i, ea_bdebug(bs->bh, "b_count=%d, refcount=%d", atomic_read(&(bs->bh->b_count)), le32_to_cpu(BHDR(bs->bh)->h_refcount)); - if (ext4_xattr_check_block(inode, bs->bh)) { + if (ext4_xattr_check_block(bs->bh)) { EXT4_ERROR_INODE(inode, "bad block %llu", EXT4_I(inode)->i_file_acl); error = -EIO; @@ -787,9 +725,9 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, if (error == -EIO) goto bad_block; if (!error) - error = ext4_handle_dirty_xattr_block(handle, - inode, - bs->bh); + error = ext4_handle_dirty_metadata(handle, + inode, + bs->bh); if (error) goto cleanup; goto inserted; @@ -858,9 +796,9 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, ea_bdebug(new_bh, "reusing; refcount now=%d", le32_to_cpu(BHDR(new_bh)->h_refcount)); unlock_buffer(new_bh); - error = ext4_handle_dirty_xattr_block(handle, - inode, - new_bh); + error = ext4_handle_dirty_metadata(handle, + inode, + new_bh); if (error) goto cleanup_dquot; } @@ -917,8 +855,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, set_buffer_uptodate(new_bh); unlock_buffer(new_bh); ext4_xattr_cache_insert(new_bh); - error = ext4_handle_dirty_xattr_block(handle, - inode, new_bh); + error = ext4_handle_dirty_metadata(handle, + inode, new_bh); if (error) goto cleanup; } @@ -1255,7 +1193,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, error = -EIO; if (!bh) goto cleanup; - if (ext4_xattr_check_block(inode, bh)) { + if (ext4_xattr_check_block(bh)) { EXT4_ERROR_INODE(inode, "bad block %llu", EXT4_I(inode)->i_file_acl); error = -EIO; diff --git a/trunk/fs/ext4/xattr.h b/trunk/fs/ext4/xattr.h index 91f31ca7d9af..25b7387ff183 100644 --- a/trunk/fs/ext4/xattr.h +++ b/trunk/fs/ext4/xattr.h @@ -27,9 +27,7 @@ struct ext4_xattr_header { __le32 h_refcount; /* reference count */ __le32 h_blocks; /* number of disk blocks used */ __le32 h_hash; /* hash value of all attributes */ - __le32 h_checksum; /* crc32c(uuid+id+xattrblock) */ - /* id = inum if refcount=1, blknum otherwise */ - __u32 h_reserved[3]; /* zero right now */ + __u32 h_reserved[4]; /* zero right now */ }; struct ext4_xattr_ibody_header { diff --git a/trunk/fs/fat/inode.c b/trunk/fs/fat/inode.c index a3d81ebf6d86..c2973ea5df9a 100644 --- a/trunk/fs/fat/inode.c +++ b/trunk/fs/fat/inode.c @@ -735,9 +735,10 @@ static struct dentry *fat_fh_to_dentry(struct super_block *sb, } static int -fat_encode_fh(struct inode *inode, __u32 *fh, int *lenp, struct inode *parent) +fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable) { int len = *lenp; + struct inode *inode = de->d_inode; u32 ipos_h, ipos_m, ipos_l; if (len < 5) { @@ -753,9 +754,9 @@ fat_encode_fh(struct inode *inode, __u32 *fh, int *lenp, struct inode *parent) fh[1] = inode->i_generation; fh[2] = ipos_h; fh[3] = ipos_m | MSDOS_I(inode)->i_logstart; - fh[4] = ipos_l; - if (parent) - fh[4] |= MSDOS_I(parent)->i_logstart; + spin_lock(&de->d_lock); + fh[4] = ipos_l | MSDOS_I(de->d_parent->d_inode)->i_logstart; + spin_unlock(&de->d_lock); return 3; } diff --git a/trunk/fs/fcntl.c b/trunk/fs/fcntl.c index 81b70e665bf0..d078b75572a7 100644 --- a/trunk/fs/fcntl.c +++ b/trunk/fs/fcntl.c @@ -442,24 +442,28 @@ static int check_fcntl_cmd(unsigned cmd) SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { struct file *filp; - int fput_needed; long err = -EBADF; - filp = fget_raw_light(fd, &fput_needed); + filp = fget_raw(fd); if (!filp) goto out; if (unlikely(filp->f_mode & FMODE_PATH)) { - if (!check_fcntl_cmd(cmd)) - goto out1; + if (!check_fcntl_cmd(cmd)) { + fput(filp); + goto out; + } } err = security_file_fcntl(filp, cmd, arg); - if (!err) - err = do_fcntl(fd, cmd, arg, filp); + if (err) { + fput(filp); + return err; + } -out1: - fput_light(filp, fput_needed); + err = do_fcntl(fd, cmd, arg, filp); + + fput(filp); out: return err; } @@ -469,21 +473,26 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { struct file * filp; - long err = -EBADF; - int fput_needed; + long err; - filp = fget_raw_light(fd, &fput_needed); + err = -EBADF; + filp = fget_raw(fd); if (!filp) goto out; if (unlikely(filp->f_mode & FMODE_PATH)) { - if (!check_fcntl_cmd(cmd)) - goto out1; + if (!check_fcntl_cmd(cmd)) { + fput(filp); + goto out; + } } err = security_file_fcntl(filp, cmd, arg); - if (err) - goto out1; + if (err) { + fput(filp); + return err; + } + err = -EBADF; switch (cmd) { case F_GETLK64: @@ -498,8 +507,7 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, err = do_fcntl(fd, cmd, arg, filp); break; } -out1: - fput_light(filp, fput_needed); + fput(filp); out: return err; } diff --git a/trunk/fs/file_table.c b/trunk/fs/file_table.c index a305d9e2d1b2..70f2a0fd6aec 100644 --- a/trunk/fs/file_table.c +++ b/trunk/fs/file_table.c @@ -34,6 +34,7 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; +DECLARE_LGLOCK(files_lglock); DEFINE_LGLOCK(files_lglock); /* SLAB cache for file structures */ @@ -420,9 +421,9 @@ static inline void __file_sb_list_add(struct file *file, struct super_block *sb) */ void file_sb_list_add(struct file *file, struct super_block *sb) { - lg_local_lock(&files_lglock); + lg_local_lock(files_lglock); __file_sb_list_add(file, sb); - lg_local_unlock(&files_lglock); + lg_local_unlock(files_lglock); } /** @@ -435,9 +436,9 @@ void file_sb_list_add(struct file *file, struct super_block *sb) void file_sb_list_del(struct file *file) { if (!list_empty(&file->f_u.fu_list)) { - lg_local_lock_cpu(&files_lglock, file_list_cpu(file)); + lg_local_lock_cpu(files_lglock, file_list_cpu(file)); list_del_init(&file->f_u.fu_list); - lg_local_unlock_cpu(&files_lglock, file_list_cpu(file)); + lg_local_unlock_cpu(files_lglock, file_list_cpu(file)); } } @@ -484,7 +485,7 @@ void mark_files_ro(struct super_block *sb) struct file *f; retry: - lg_global_lock(&files_lglock); + lg_global_lock(files_lglock); do_file_list_for_each_entry(sb, f) { struct vfsmount *mnt; if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) @@ -501,12 +502,12 @@ void mark_files_ro(struct super_block *sb) file_release_write(f); mnt = mntget(f->f_path.mnt); /* This can sleep, so we can't hold the spinlock. */ - lg_global_unlock(&files_lglock); + lg_global_unlock(files_lglock); mnt_drop_write(mnt); mntput(mnt); goto retry; } while_file_list_for_each_entry; - lg_global_unlock(&files_lglock); + lg_global_unlock(files_lglock); } void __init files_init(unsigned long mempages) @@ -524,6 +525,6 @@ void __init files_init(unsigned long mempages) n = (mempages * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = max_t(unsigned long, n, NR_FILE); files_defer_init(); - lg_lock_init(&files_lglock, "files_lglock"); + lg_lock_init(files_lglock); percpu_counter_init(&nr_files, 0); } diff --git a/trunk/fs/fuse/file.c b/trunk/fs/fuse/file.c index 9562109d3a87..504e61b7fd75 100644 --- a/trunk/fs/fuse/file.c +++ b/trunk/fs/fuse/file.c @@ -962,9 +962,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err) goto out; - err = file_update_time(file); - if (err) - goto out; + file_update_time(file); if (file->f_flags & O_DIRECT) { written = generic_file_direct_write(iocb, iov, &nr_segs, diff --git a/trunk/fs/fuse/inode.c b/trunk/fs/fuse/inode.c index 42678a33b7bb..56f6dcf30768 100644 --- a/trunk/fs/fuse/inode.c +++ b/trunk/fs/fuse/inode.c @@ -627,10 +627,12 @@ static struct dentry *fuse_get_dentry(struct super_block *sb, return ERR_PTR(err); } -static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len, - struct inode *parent) +static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, + int connectable) { - int len = parent ? 6 : 3; + struct inode *inode = dentry->d_inode; + bool encode_parent = connectable && !S_ISDIR(inode->i_mode); + int len = encode_parent ? 6 : 3; u64 nodeid; u32 generation; @@ -646,9 +648,14 @@ static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len, fh[1] = (u32)(nodeid & 0xffffffff); fh[2] = generation; - if (parent) { + if (encode_parent) { + struct inode *parent; + + spin_lock(&dentry->d_lock); + parent = dentry->d_parent->d_inode; nodeid = get_fuse_inode(parent)->nodeid; generation = parent->i_generation; + spin_unlock(&dentry->d_lock); fh[3] = (u32)(nodeid >> 32); fh[4] = (u32)(nodeid & 0xffffffff); @@ -656,7 +663,7 @@ static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len, } *max_len = len; - return parent ? 0x82 : 0x81; + return encode_parent ? 0x82 : 0x81; } static struct dentry *fuse_fh_to_dentry(struct super_block *sb, diff --git a/trunk/fs/gfs2/export.c b/trunk/fs/gfs2/export.c index e8ed6d4a6181..70ba891654f8 100644 --- a/trunk/fs/gfs2/export.c +++ b/trunk/fs/gfs2/export.c @@ -28,14 +28,15 @@ #define GFS2_LARGE_FH_SIZE 8 #define GFS2_OLD_FH_SIZE 10 -static int gfs2_encode_fh(struct inode *inode, __u32 *p, int *len, - struct inode *parent) +static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len, + int connectable) { __be32 *fh = (__force __be32 *)p; + struct inode *inode = dentry->d_inode; struct super_block *sb = inode->i_sb; struct gfs2_inode *ip = GFS2_I(inode); - if (parent && (*len < GFS2_LARGE_FH_SIZE)) { + if (connectable && (*len < GFS2_LARGE_FH_SIZE)) { *len = GFS2_LARGE_FH_SIZE; return 255; } else if (*len < GFS2_SMALL_FH_SIZE) { @@ -49,10 +50,14 @@ static int gfs2_encode_fh(struct inode *inode, __u32 *p, int *len, fh[3] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF); *len = GFS2_SMALL_FH_SIZE; - if (!parent || inode == sb->s_root->d_inode) + if (!connectable || inode == sb->s_root->d_inode) return *len; - ip = GFS2_I(parent); + spin_lock(&dentry->d_lock); + inode = dentry->d_parent->d_inode; + ip = GFS2_I(inode); + igrab(inode); + spin_unlock(&dentry->d_lock); fh[4] = cpu_to_be32(ip->i_no_formal_ino >> 32); fh[5] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF); @@ -60,6 +65,8 @@ static int gfs2_encode_fh(struct inode *inode, __u32 *p, int *len, fh[7] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF); *len = GFS2_LARGE_FH_SIZE; + iput(inode); + return *len; } diff --git a/trunk/fs/hpfs/alloc.c b/trunk/fs/hpfs/alloc.c index cdb84a838068..7a5eb2c718c8 100644 --- a/trunk/fs/hpfs/alloc.c +++ b/trunk/fs/hpfs/alloc.c @@ -16,9 +16,9 @@ static int chk_if_allocated(struct super_block *s, secno sec, char *msg) { struct quad_buffer_head qbh; - __le32 *bmp; + u32 *bmp; if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "chk"))) goto fail; - if ((le32_to_cpu(bmp[(sec & 0x3fff) >> 5]) >> (sec & 0x1f)) & 1) { + if ((cpu_to_le32(bmp[(sec & 0x3fff) >> 5]) >> (sec & 0x1f)) & 1) { hpfs_error(s, "sector '%s' - %08x not allocated in bitmap", msg, sec); goto fail1; } @@ -62,7 +62,7 @@ int hpfs_chk_sectors(struct super_block *s, secno start, int len, char *msg) static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigned forward) { struct quad_buffer_head qbh; - __le32 *bmp; + unsigned *bmp; unsigned bs = near & ~0x3fff; unsigned nr = (near & 0x3fff) & ~(n - 1); /*unsigned mnr;*/ @@ -236,7 +236,7 @@ static secno alloc_in_dirband(struct super_block *s, secno near) int hpfs_alloc_if_possible(struct super_block *s, secno sec) { struct quad_buffer_head qbh; - __le32 *bmp; + u32 *bmp; if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "aip"))) goto end; if (le32_to_cpu(bmp[(sec & 0x3fff) >> 5]) & (1 << (sec & 0x1f))) { bmp[(sec & 0x3fff) >> 5] &= cpu_to_le32(~(1 << (sec & 0x1f))); @@ -254,7 +254,7 @@ int hpfs_alloc_if_possible(struct super_block *s, secno sec) void hpfs_free_sectors(struct super_block *s, secno sec, unsigned n) { struct quad_buffer_head qbh; - __le32 *bmp; + u32 *bmp; struct hpfs_sb_info *sbi = hpfs_sb(s); /*printk("2 - ");*/ if (!n) return; @@ -299,7 +299,7 @@ int hpfs_check_free_dnodes(struct super_block *s, int n) int n_bmps = (hpfs_sb(s)->sb_fs_size + 0x4000 - 1) >> 14; int b = hpfs_sb(s)->sb_c_bitmap & 0x0fffffff; int i, j; - __le32 *bmp; + u32 *bmp; struct quad_buffer_head qbh; if ((bmp = hpfs_map_dnode_bitmap(s, &qbh))) { for (j = 0; j < 512; j++) { @@ -351,7 +351,7 @@ void hpfs_free_dnode(struct super_block *s, dnode_secno dno) hpfs_free_sectors(s, dno, 4); } else { struct quad_buffer_head qbh; - __le32 *bmp; + u32 *bmp; unsigned ssec = (dno - hpfs_sb(s)->sb_dirband_start) / 4; if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) { return; diff --git a/trunk/fs/hpfs/anode.c b/trunk/fs/hpfs/anode.c index 4bae4a4a60b1..08b503e8ed29 100644 --- a/trunk/fs/hpfs/anode.c +++ b/trunk/fs/hpfs/anode.c @@ -20,7 +20,7 @@ secno hpfs_bplus_lookup(struct super_block *s, struct inode *inode, int c1, c2 = 0; go_down: if (hpfs_sb(s)->sb_chk) if (hpfs_stop_cycles(s, a, &c1, &c2, "hpfs_bplus_lookup")) return -1; - if (bp_internal(btree)) { + if (btree->internal) { for (i = 0; i < btree->n_used_nodes; i++) if (le32_to_cpu(btree->u.internal[i].file_secno) > sec) { a = le32_to_cpu(btree->u.internal[i].down); @@ -82,7 +82,7 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi brelse(bh); return -1; } - if (bp_internal(btree)) { + if (btree->internal) { a = le32_to_cpu(btree->u.internal[n].down); btree->u.internal[n].file_secno = cpu_to_le32(-1); mark_buffer_dirty(bh); @@ -129,12 +129,12 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi } if (a == node && fnod) { anode->up = cpu_to_le32(node); - anode->btree.flags |= BP_fnode_parent; + anode->btree.fnode_parent = 1; anode->btree.n_used_nodes = btree->n_used_nodes; anode->btree.first_free = btree->first_free; anode->btree.n_free_nodes = 40 - anode->btree.n_used_nodes; memcpy(&anode->u, &btree->u, btree->n_used_nodes * 12); - btree->flags |= BP_internal; + btree->internal = 1; btree->n_free_nodes = 11; btree->n_used_nodes = 1; btree->first_free = cpu_to_le16((char *)&(btree->u.internal[1]) - (char *)btree); @@ -184,10 +184,7 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi hpfs_free_sectors(s, ra, 1); if ((anode = hpfs_map_anode(s, na, &bh))) { anode->up = cpu_to_le32(up); - if (up == node && fnod) - anode->btree.flags |= BP_fnode_parent; - else - anode->btree.flags &= ~BP_fnode_parent; + anode->btree.fnode_parent = up == node && fnod; mark_buffer_dirty(bh); brelse(bh); } @@ -201,7 +198,7 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi if ((new_anode = hpfs_alloc_anode(s, a, &na, &bh))) { anode = new_anode; /*anode->up = cpu_to_le32(up != -1 ? up : ra);*/ - anode->btree.flags |= BP_internal; + anode->btree.internal = 1; anode->btree.n_used_nodes = 1; anode->btree.n_free_nodes = 59; anode->btree.first_free = cpu_to_le16(16); @@ -218,8 +215,7 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi } if ((anode = hpfs_map_anode(s, na, &bh))) { anode->up = cpu_to_le32(node); - if (fnod) - anode->btree.flags |= BP_fnode_parent; + if (fnod) anode->btree.fnode_parent = 1; mark_buffer_dirty(bh); brelse(bh); } @@ -238,19 +234,18 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi } ranode->up = cpu_to_le32(node); memcpy(&ranode->btree, btree, le16_to_cpu(btree->first_free)); - if (fnod) - ranode->btree.flags |= BP_fnode_parent; - ranode->btree.n_free_nodes = (bp_internal(&ranode->btree) ? 60 : 40) - ranode->btree.n_used_nodes; - if (bp_internal(&ranode->btree)) for (n = 0; n < ranode->btree.n_used_nodes; n++) { + if (fnod) ranode->btree.fnode_parent = 1; + ranode->btree.n_free_nodes = (ranode->btree.internal ? 60 : 40) - ranode->btree.n_used_nodes; + if (ranode->btree.internal) for (n = 0; n < ranode->btree.n_used_nodes; n++) { struct anode *unode; if ((unode = hpfs_map_anode(s, le32_to_cpu(ranode->u.internal[n].down), &bh1))) { unode->up = cpu_to_le32(ra); - unode->btree.flags &= ~BP_fnode_parent; + unode->btree.fnode_parent = 0; mark_buffer_dirty(bh1); brelse(bh1); } } - btree->flags |= BP_internal; + btree->internal = 1; btree->n_free_nodes = fnod ? 10 : 58; btree->n_used_nodes = 2; btree->first_free = cpu_to_le16((char *)&btree->u.internal[2] - (char *)btree); @@ -283,7 +278,7 @@ void hpfs_remove_btree(struct super_block *s, struct bplus_header *btree) int d1, d2; go_down: d2 = 0; - while (bp_internal(btree1)) { + while (btree1->internal) { ano = le32_to_cpu(btree1->u.internal[pos].down); if (level) brelse(bh); if (hpfs_sb(s)->sb_chk) @@ -417,13 +412,13 @@ void hpfs_truncate_btree(struct super_block *s, secno f, int fno, unsigned secs) btree->n_free_nodes = 8; btree->n_used_nodes = 0; btree->first_free = cpu_to_le16(8); - btree->flags &= ~BP_internal; + btree->internal = 0; mark_buffer_dirty(bh); } else hpfs_free_sectors(s, f, 1); brelse(bh); return; } - while (bp_internal(btree)) { + while (btree->internal) { nodes = btree->n_used_nodes + btree->n_free_nodes; for (i = 0; i < btree->n_used_nodes; i++) if (le32_to_cpu(btree->u.internal[i].file_secno) >= secs) goto f; @@ -484,13 +479,13 @@ void hpfs_remove_fnode(struct super_block *s, fnode_secno fno) struct extended_attribute *ea; struct extended_attribute *ea_end; if (!(fnode = hpfs_map_fnode(s, fno, &bh))) return; - if (!fnode_is_dir(fnode)) hpfs_remove_btree(s, &fnode->btree); + if (!fnode->dirflag) hpfs_remove_btree(s, &fnode->btree); else hpfs_remove_dtree(s, le32_to_cpu(fnode->u.external[0].disk_secno)); ea_end = fnode_end_ea(fnode); for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) - if (ea_indirect(ea)) - hpfs_ea_remove(s, ea_sec(ea), ea_in_anode(ea), ea_len(ea)); - hpfs_ea_ext_remove(s, le32_to_cpu(fnode->ea_secno), fnode_in_anode(fnode), le32_to_cpu(fnode->ea_size_l)); + if (ea->indirect) + hpfs_ea_remove(s, ea_sec(ea), ea->anode, ea_len(ea)); + hpfs_ea_ext_remove(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l)); brelse(bh); hpfs_free_sectors(s, fno, 1); } diff --git a/trunk/fs/hpfs/dir.c b/trunk/fs/hpfs/dir.c index b8472f803f4e..2fa0089a02a8 100644 --- a/trunk/fs/hpfs/dir.c +++ b/trunk/fs/hpfs/dir.c @@ -87,7 +87,7 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir) ret = -EIOERROR; goto out; } - if (!fnode_is_dir(fno)) { + if (!fno->dirflag) { e = 1; hpfs_error(inode->i_sb, "not a directory, fnode %08lx", (unsigned long)inode->i_ino); diff --git a/trunk/fs/hpfs/dnode.c b/trunk/fs/hpfs/dnode.c index 3228c524ebe5..1e0e2ac30fd3 100644 --- a/trunk/fs/hpfs/dnode.c +++ b/trunk/fs/hpfs/dnode.c @@ -153,7 +153,7 @@ static void set_last_pointer(struct super_block *s, struct dnode *d, dnode_secno } de->length = cpu_to_le16(36); de->down = 1; - *(__le32 *)((char *)de + 32) = cpu_to_le32(ptr); + *(dnode_secno *)((char *)de + 32) = cpu_to_le32(ptr); } } @@ -177,7 +177,7 @@ struct hpfs_dirent *hpfs_add_de(struct super_block *s, struct dnode *d, memmove((char *)de + d_size, de, (char *)de_end - (char *)de); memset(de, 0, d_size); if (down_ptr) { - *(__le32 *)((char *)de + d_size - 4) = cpu_to_le32(down_ptr); + *(dnode_secno *)((char *)de + d_size - 4) = cpu_to_le32(down_ptr); de->down = 1; } de->length = cpu_to_le16(d_size); @@ -656,7 +656,7 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno) del->down = 0; d1->first_free = cpu_to_le32(le32_to_cpu(d1->first_free) - 4); } else if (down) - *(__le32 *) ((void *) del + le16_to_cpu(del->length) - 4) = cpu_to_le32(down); + *(dnode_secno *) ((void *) del + le16_to_cpu(del->length) - 4) = cpu_to_le32(down); } else goto endm; if (!(de_cp = kmalloc(le16_to_cpu(de_prev->length), GFP_NOFS))) { printk("HPFS: out of memory for dtree balancing\n"); @@ -672,7 +672,7 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno) de_prev->down = 1; dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) + 4); } - *(__le32 *) ((void *) de_prev + le16_to_cpu(de_prev->length) - 4) = cpu_to_le32(ndown); + *(dnode_secno *) ((void *) de_prev + le16_to_cpu(de_prev->length) - 4) = cpu_to_le32(ndown); hpfs_mark_4buffers_dirty(&qbh); hpfs_brelse4(&qbh); for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | (p - 1), 4); @@ -1015,7 +1015,7 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno, kfree(name2); return NULL; } - if (!fnode_is_dir(upf)) { + if (!upf->dirflag) { brelse(bh); hpfs_error(s, "fnode %08x has non-directory parent %08x", fno, le32_to_cpu(f->up)); kfree(name2); diff --git a/trunk/fs/hpfs/ea.c b/trunk/fs/hpfs/ea.c index bcaafcd2666a..d8b84d113c89 100644 --- a/trunk/fs/hpfs/ea.c +++ b/trunk/fs/hpfs/ea.c @@ -23,15 +23,15 @@ void hpfs_ea_ext_remove(struct super_block *s, secno a, int ano, unsigned len) return; } if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return; - if (ea_indirect(ea)) { + if (ea->indirect) { if (ea_valuelen(ea) != 8) { - hpfs_error(s, "ea_indirect(ea) set while ea->valuelen!=8, %s %08x, pos %08x", + hpfs_error(s, "ea->indirect set while ea->valuelen!=8, %s %08x, pos %08x", ano ? "anode" : "sectors", a, pos); return; } if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 9, ex+4)) return; - hpfs_ea_remove(s, ea_sec(ea), ea_in_anode(ea), ea_len(ea)); + hpfs_ea_remove(s, ea_sec(ea), ea->anode, ea_len(ea)); } pos += ea->namelen + ea_valuelen(ea) + 5; } @@ -81,7 +81,7 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key, struct extended_attribute *ea_end = fnode_end_ea(fnode); for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) if (!strcmp(ea->name, key)) { - if (ea_indirect(ea)) + if (ea->indirect) goto indirect; if (ea_valuelen(ea) >= size) return -EINVAL; @@ -91,7 +91,7 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key, } a = le32_to_cpu(fnode->ea_secno); len = le32_to_cpu(fnode->ea_size_l); - ano = fnode_in_anode(fnode); + ano = fnode->ea_anode; pos = 0; while (pos < len) { ea = (struct extended_attribute *)ex; @@ -101,10 +101,10 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key, return -EIO; } if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return -EIO; - if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 1 + (ea_indirect(ea) ? 8 : 0), ex + 4)) + if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 1 + (ea->indirect ? 8 : 0), ex + 4)) return -EIO; if (!strcmp(ea->name, key)) { - if (ea_indirect(ea)) + if (ea->indirect) goto indirect; if (ea_valuelen(ea) >= size) return -EINVAL; @@ -119,7 +119,7 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key, indirect: if (ea_len(ea) >= size) return -EINVAL; - if (hpfs_ea_read(s, ea_sec(ea), ea_in_anode(ea), 0, ea_len(ea), buf)) + if (hpfs_ea_read(s, ea_sec(ea), ea->anode, 0, ea_len(ea), buf)) return -EIO; buf[ea_len(ea)] = 0; return 0; @@ -136,8 +136,8 @@ char *hpfs_get_ea(struct super_block *s, struct fnode *fnode, char *key, int *si struct extended_attribute *ea_end = fnode_end_ea(fnode); for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) if (!strcmp(ea->name, key)) { - if (ea_indirect(ea)) - return get_indirect_ea(s, ea_in_anode(ea), ea_sec(ea), *size = ea_len(ea)); + if (ea->indirect) + return get_indirect_ea(s, ea->anode, ea_sec(ea), *size = ea_len(ea)); if (!(ret = kmalloc((*size = ea_valuelen(ea)) + 1, GFP_NOFS))) { printk("HPFS: out of memory for EA\n"); return NULL; @@ -148,7 +148,7 @@ char *hpfs_get_ea(struct super_block *s, struct fnode *fnode, char *key, int *si } a = le32_to_cpu(fnode->ea_secno); len = le32_to_cpu(fnode->ea_size_l); - ano = fnode_in_anode(fnode); + ano = fnode->ea_anode; pos = 0; while (pos < len) { char ex[4 + 255 + 1 + 8]; @@ -159,11 +159,11 @@ char *hpfs_get_ea(struct super_block *s, struct fnode *fnode, char *key, int *si return NULL; } if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return NULL; - if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 1 + (ea_indirect(ea) ? 8 : 0), ex + 4)) + if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 1 + (ea->indirect ? 8 : 0), ex + 4)) return NULL; if (!strcmp(ea->name, key)) { - if (ea_indirect(ea)) - return get_indirect_ea(s, ea_in_anode(ea), ea_sec(ea), *size = ea_len(ea)); + if (ea->indirect) + return get_indirect_ea(s, ea->anode, ea_sec(ea), *size = ea_len(ea)); if (!(ret = kmalloc((*size = ea_valuelen(ea)) + 1, GFP_NOFS))) { printk("HPFS: out of memory for EA\n"); return NULL; @@ -199,9 +199,9 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, struct extended_attribute *ea_end = fnode_end_ea(fnode); for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) if (!strcmp(ea->name, key)) { - if (ea_indirect(ea)) { + if (ea->indirect) { if (ea_len(ea) == size) - set_indirect_ea(s, ea_in_anode(ea), ea_sec(ea), data, size); + set_indirect_ea(s, ea->anode, ea_sec(ea), data, size); } else if (ea_valuelen(ea) == size) { memcpy(ea_data(ea), data, size); } @@ -209,7 +209,7 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, } a = le32_to_cpu(fnode->ea_secno); len = le32_to_cpu(fnode->ea_size_l); - ano = fnode_in_anode(fnode); + ano = fnode->ea_anode; pos = 0; while (pos < len) { char ex[4 + 255 + 1 + 8]; @@ -220,12 +220,12 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, return; } if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return; - if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 1 + (ea_indirect(ea) ? 8 : 0), ex + 4)) + if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 1 + (ea->indirect ? 8 : 0), ex + 4)) return; if (!strcmp(ea->name, key)) { - if (ea_indirect(ea)) { + if (ea->indirect) { if (ea_len(ea) == size) - set_indirect_ea(s, ea_in_anode(ea), ea_sec(ea), data, size); + set_indirect_ea(s, ea->anode, ea_sec(ea), data, size); } else { if (ea_valuelen(ea) == size) @@ -246,7 +246,7 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, if (le16_to_cpu(fnode->ea_offs) < 0xc4 || le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s) > 0x200) { hpfs_error(s, "fnode %08lx: ea_offs == %03x, ea_size_s == %03x", (unsigned long)inode->i_ino, - le16_to_cpu(fnode->ea_offs), le16_to_cpu(fnode->ea_size_s)); + le32_to_cpu(fnode->ea_offs), le16_to_cpu(fnode->ea_size_s)); return; } if ((le16_to_cpu(fnode->ea_size_s) || !le32_to_cpu(fnode->ea_size_l)) && @@ -276,7 +276,7 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, fnode->ea_size_l = cpu_to_le32(le16_to_cpu(fnode->ea_size_s)); fnode->ea_size_s = cpu_to_le16(0); fnode->ea_secno = cpu_to_le32(n); - fnode->flags &= ~FNODE_anode; + fnode->ea_anode = cpu_to_le32(0); mark_buffer_dirty(bh); brelse(bh); } @@ -288,9 +288,9 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, secno q = hpfs_alloc_sector(s, fno, 1, 0); if (!q) goto bail; fnode->ea_secno = cpu_to_le32(q); - fnode->flags &= ~FNODE_anode; + fnode->ea_anode = 0; len++; - } else if (!fnode_in_anode(fnode)) { + } else if (!fnode->ea_anode) { if (hpfs_alloc_if_possible(s, le32_to_cpu(fnode->ea_secno) + len)) { len++; } else { @@ -310,7 +310,7 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, anode->u.external[0].length = cpu_to_le32(len); mark_buffer_dirty(bh); brelse(bh); - fnode->flags |= FNODE_anode; + fnode->ea_anode = 1; fnode->ea_secno = cpu_to_le32(a_s);*/ secno new_sec; int i; @@ -338,7 +338,7 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, len = (pos + 511) >> 9; } } - if (fnode_in_anode(fnode)) { + if (fnode->ea_anode) { if (hpfs_add_sector_to_btree(s, le32_to_cpu(fnode->ea_secno), 0, len) != -1) { len++; @@ -351,16 +351,16 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, h[1] = strlen(key); h[2] = size & 0xff; h[3] = size >> 8; - if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode_in_anode(fnode), le32_to_cpu(fnode->ea_size_l), 4, h)) goto bail; - if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode_in_anode(fnode), le32_to_cpu(fnode->ea_size_l) + 4, h[1] + 1, key)) goto bail; - if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode_in_anode(fnode), le32_to_cpu(fnode->ea_size_l) + 5 + h[1], size, data)) goto bail; + if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l), 4, h)) goto bail; + if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l) + 4, h[1] + 1, key)) goto bail; + if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l) + 5 + h[1], size, data)) goto bail; fnode->ea_size_l = cpu_to_le32(pos); ret: hpfs_i(inode)->i_ea_size += 5 + strlen(key) + size; return; bail: if (le32_to_cpu(fnode->ea_secno)) - if (fnode_in_anode(fnode)) hpfs_truncate_btree(s, le32_to_cpu(fnode->ea_secno), 1, (le32_to_cpu(fnode->ea_size_l) + 511) >> 9); + if (fnode->ea_anode) hpfs_truncate_btree(s, le32_to_cpu(fnode->ea_secno), 1, (le32_to_cpu(fnode->ea_size_l) + 511) >> 9); else hpfs_free_sectors(s, le32_to_cpu(fnode->ea_secno) + ((le32_to_cpu(fnode->ea_size_l) + 511) >> 9), len - ((le32_to_cpu(fnode->ea_size_l) + 511) >> 9)); else fnode->ea_secno = fnode->ea_size_l = cpu_to_le32(0); } diff --git a/trunk/fs/hpfs/hpfs.h b/trunk/fs/hpfs/hpfs.h index cce025aff1b1..8b0650aae328 100644 --- a/trunk/fs/hpfs/hpfs.h +++ b/trunk/fs/hpfs/hpfs.h @@ -51,11 +51,11 @@ struct hpfs_boot_block u8 n_rootdir_entries[2]; u8 n_sectors_s[2]; u8 media_byte; - __le16 sectors_per_fat; - __le16 sectors_per_track; - __le16 heads_per_cyl; - __le32 n_hidden_sectors; - __le32 n_sectors_l; /* size of partition */ + u16 sectors_per_fat; + u16 sectors_per_track; + u16 heads_per_cyl; + u32 n_hidden_sectors; + u32 n_sectors_l; /* size of partition */ u8 drive_number; u8 mbz; u8 sig_28h; /* 28h */ @@ -63,7 +63,7 @@ struct hpfs_boot_block u8 vol_label[11]; u8 sig_hpfs[8]; /* "HPFS " */ u8 pad[448]; - __le16 magic; /* aa55 */ + u16 magic; /* aa55 */ }; @@ -75,28 +75,28 @@ struct hpfs_boot_block struct hpfs_super_block { - __le32 magic; /* f995 e849 */ - __le32 magic1; /* fa53 e9c5, more magic? */ + u32 magic; /* f995 e849 */ + u32 magic1; /* fa53 e9c5, more magic? */ u8 version; /* version of a filesystem usually 2 */ u8 funcversion; /* functional version - oldest version of filesystem that can understand this disk */ - __le16 zero; /* 0 */ - __le32 root; /* fnode of root directory */ - __le32 n_sectors; /* size of filesystem */ - __le32 n_badblocks; /* number of bad blocks */ - __le32 bitmaps; /* pointers to free space bit maps */ - __le32 zero1; /* 0 */ - __le32 badblocks; /* bad block list */ - __le32 zero3; /* 0 */ - __le32 last_chkdsk; /* date last checked, 0 if never */ - __le32 last_optimize; /* date last optimized, 0 if never */ - __le32 n_dir_band; /* number of sectors in dir band */ - __le32 dir_band_start; /* first sector in dir band */ - __le32 dir_band_end; /* last sector in dir band */ - __le32 dir_band_bitmap; /* free space map, 1 dnode per bit */ + u16 zero; /* 0 */ + fnode_secno root; /* fnode of root directory */ + secno n_sectors; /* size of filesystem */ + u32 n_badblocks; /* number of bad blocks */ + secno bitmaps; /* pointers to free space bit maps */ + u32 zero1; /* 0 */ + secno badblocks; /* bad block list */ + u32 zero3; /* 0 */ + time32_t last_chkdsk; /* date last checked, 0 if never */ + time32_t last_optimize; /* date last optimized, 0 if never */ + secno n_dir_band; /* number of sectors in dir band */ + secno dir_band_start; /* first sector in dir band */ + secno dir_band_end; /* last sector in dir band */ + secno dir_band_bitmap; /* free space map, 1 dnode per bit */ u8 volume_name[32]; /* not used */ - __le32 user_id_table; /* 8 preallocated sectors - user id */ + secno user_id_table; /* 8 preallocated sectors - user id */ u32 zero6[103]; /* 0 */ }; @@ -109,8 +109,8 @@ struct hpfs_super_block struct hpfs_spare_block { - __le32 magic; /* f991 1849 */ - __le32 magic1; /* fa52 29c5, more magic? */ + u32 magic; /* f991 1849 */ + u32 magic1; /* fa52 29c5, more magic? */ #ifdef __LITTLE_ENDIAN u8 dirty: 1; /* 0 clean, 1 "improperly stopped" */ @@ -153,21 +153,21 @@ struct hpfs_spare_block u8 mm_contlgulty; u8 unused; - __le32 hotfix_map; /* info about remapped bad sectors */ - __le32 n_spares_used; /* number of hotfixes */ - __le32 n_spares; /* number of spares in hotfix map */ - __le32 n_dnode_spares_free; /* spare dnodes unused */ - __le32 n_dnode_spares; /* length of spare_dnodes[] list, + secno hotfix_map; /* info about remapped bad sectors */ + u32 n_spares_used; /* number of hotfixes */ + u32 n_spares; /* number of spares in hotfix map */ + u32 n_dnode_spares_free; /* spare dnodes unused */ + u32 n_dnode_spares; /* length of spare_dnodes[] list, follows in this block*/ - __le32 code_page_dir; /* code page directory block */ - __le32 n_code_pages; /* number of code pages */ - __le32 super_crc; /* on HPFS386 and LAN Server this is + secno code_page_dir; /* code page directory block */ + u32 n_code_pages; /* number of code pages */ + u32 super_crc; /* on HPFS386 and LAN Server this is checksum of superblock, on normal OS/2 unused */ - __le32 spare_crc; /* on HPFS386 checksum of spareblock */ - __le32 zero1[15]; /* unused */ - __le32 spare_dnodes[100]; /* emergency free dnode list */ - __le32 zero2[1]; /* room for more? */ + u32 spare_crc; /* on HPFS386 checksum of spareblock */ + u32 zero1[15]; /* unused */ + dnode_secno spare_dnodes[100]; /* emergency free dnode list */ + u32 zero2[1]; /* room for more? */ }; /* The bad block list is 4 sectors long. The first word must be zero, @@ -202,18 +202,18 @@ struct hpfs_spare_block struct code_page_directory { - __le32 magic; /* 4945 21f7 */ - __le32 n_code_pages; /* number of pointers following */ - __le32 zero1[2]; + u32 magic; /* 4945 21f7 */ + u32 n_code_pages; /* number of pointers following */ + u32 zero1[2]; struct { - __le16 ix; /* index */ - __le16 code_page_number; /* code page number */ - __le32 bounds; /* matches corresponding word + u16 ix; /* index */ + u16 code_page_number; /* code page number */ + u32 bounds; /* matches corresponding word in data block */ - __le32 code_page_data; /* sector number of a code_page_data + secno code_page_data; /* sector number of a code_page_data containing c.p. array */ - __le16 index; /* index in c.p. array in that sector*/ - __le16 unknown; /* some unknown value; usually 0; + u16 index; /* index in c.p. array in that sector*/ + u16 unknown; /* some unknown value; usually 0; 2 in Japanese version */ } array[31]; /* unknown length */ }; @@ -224,19 +224,19 @@ struct code_page_directory struct code_page_data { - __le32 magic; /* 8945 21f7 */ - __le32 n_used; /* # elements used in c_p_data[] */ - __le32 bounds[3]; /* looks a bit like + u32 magic; /* 8945 21f7 */ + u32 n_used; /* # elements used in c_p_data[] */ + u32 bounds[3]; /* looks a bit like (beg1,end1), (beg2,end2) one byte each */ - __le16 offs[3]; /* offsets from start of sector + u16 offs[3]; /* offsets from start of sector to start of c_p_data[ix] */ struct { - __le16 ix; /* index */ - __le16 code_page_number; /* code page number */ - __le16 unknown; /* the same as in cp directory */ + u16 ix; /* index */ + u16 code_page_number; /* code page number */ + u16 unknown; /* the same as in cp directory */ u8 map[128]; /* upcase table for chars 80..ff */ - __le16 zero2; + u16 zero2; } code_page[3]; u8 incognita[78]; }; @@ -278,8 +278,8 @@ struct code_page_data #define DNODE_MAGIC 0x77e40aae struct dnode { - __le32 magic; /* 77e4 0aae */ - __le32 first_free; /* offset from start of dnode to + u32 magic; /* 77e4 0aae */ + u32 first_free; /* offset from start of dnode to first free dir entry */ #ifdef __LITTLE_ENDIAN u8 root_dnode: 1; /* Is it root dnode? */ @@ -293,14 +293,14 @@ struct dnode { u8 root_dnode: 1; /* Is it root dnode? */ #endif u8 increment_me2[3]; - __le32 up; /* (root dnode) directory's fnode + secno up; /* (root dnode) directory's fnode (nonroot) parent dnode */ - __le32 self; /* pointer to this dnode */ + dnode_secno self; /* pointer to this dnode */ u8 dirent[2028]; /* one or more dirents */ }; struct hpfs_dirent { - __le16 length; /* offset to next dirent */ + u16 length; /* offset to next dirent */ #ifdef __LITTLE_ENDIAN u8 first: 1; /* set on phony ^A^A (".") entry */ @@ -346,12 +346,12 @@ struct hpfs_dirent { u8 read_only: 1; /* dos attrib */ #endif - __le32 fnode; /* fnode giving allocation info */ - __le32 write_date; /* mtime */ - __le32 file_size; /* file length, bytes */ - __le32 read_date; /* atime */ - __le32 creation_date; /* ctime */ - __le32 ea_size; /* total EA length, bytes */ + fnode_secno fnode; /* fnode giving allocation info */ + time32_t write_date; /* mtime */ + u32 file_size; /* file length, bytes */ + time32_t read_date; /* atime */ + time32_t creation_date; /* ctime */ + u32 ea_size; /* total EA length, bytes */ u8 no_of_acls; /* number of ACL's (low 3 bits) */ u8 ix; /* code page index (of filename), see struct code_page_data */ @@ -375,36 +375,50 @@ struct hpfs_dirent { struct bplus_leaf_node { - __le32 file_secno; /* first file sector in extent */ - __le32 length; /* length, sectors */ - __le32 disk_secno; /* first corresponding disk sector */ + u32 file_secno; /* first file sector in extent */ + u32 length; /* length, sectors */ + secno disk_secno; /* first corresponding disk sector */ }; struct bplus_internal_node { - __le32 file_secno; /* subtree maps sectors < this */ - __le32 down; /* pointer to subtree */ + u32 file_secno; /* subtree maps sectors < this */ + anode_secno down; /* pointer to subtree */ }; -enum { - BP_hbff = 1, - BP_fnode_parent = 0x20, - BP_binary_search = 0x40, - BP_internal = 0x80 -}; struct bplus_header { - u8 flags; /* bit 0 - high bit of first free entry offset - bit 5 - we're pointed to by an fnode, +#ifdef __LITTLE_ENDIAN + u8 hbff: 1; /* high bit of first free entry offset */ + u8 flag1234: 4; + u8 fnode_parent: 1; /* ? we're pointed to by an fnode, + the data btree or some ea or the + main ea bootage pointer ea_secno */ + /* also can get set in fnodes, which + may be a chkdsk glitch or may mean + this bit is irrelevant in fnodes, + or this interpretation is all wet */ + u8 binary_search: 1; /* suggest binary search (unused) */ + u8 internal: 1; /* 1 -> (internal) tree of anodes + 0 -> (leaf) list of extents */ +#else + u8 internal: 1; /* 1 -> (internal) tree of anodes + 0 -> (leaf) list of extents */ + u8 binary_search: 1; /* suggest binary search (unused) */ + u8 fnode_parent: 1; /* ? we're pointed to by an fnode, the data btree or some ea or the - main ea bootage pointer ea_secno - bit 6 - suggest binary search (unused) - bit 7 - 1 -> (internal) tree of anodes - 0 -> (leaf) list of extents */ + main ea bootage pointer ea_secno */ + /* also can get set in fnodes, which + may be a chkdsk glitch or may mean + this bit is irrelevant in fnodes, + or this interpretation is all wet */ + u8 flag1234: 4; + u8 hbff: 1; /* high bit of first free entry offset */ +#endif u8 fill[3]; u8 n_free_nodes; /* free nodes in following array */ u8 n_used_nodes; /* used nodes in following array */ - __le16 first_free; /* offset from start of header to + u16 first_free; /* offset from start of header to first free node in array */ union { struct bplus_internal_node internal[0]; /* (internal) 2-word entries giving @@ -414,16 +428,6 @@ struct bplus_header } u; }; -static inline bool bp_internal(struct bplus_header *bp) -{ - return bp->flags & BP_internal; -} - -static inline bool bp_fnode_parent(struct bplus_header *bp) -{ - return bp->flags & BP_fnode_parent; -} - /* fnode: root of allocation b+ tree, and EA's */ /* Every file and every directory has one fnode, pointed to by the directory @@ -432,56 +436,62 @@ static inline bool bp_fnode_parent(struct bplus_header *bp) #define FNODE_MAGIC 0xf7e40aae -enum {FNODE_anode = cpu_to_le16(2), FNODE_dir = cpu_to_le16(256)}; struct fnode { - __le32 magic; /* f7e4 0aae */ - __le32 zero1[2]; /* read history */ + u32 magic; /* f7e4 0aae */ + u32 zero1[2]; /* read history */ u8 len, name[15]; /* true length, truncated name */ - __le32 up; /* pointer to file's directory fnode */ - __le32 acl_size_l; - __le32 acl_secno; - __le16 acl_size_s; + fnode_secno up; /* pointer to file's directory fnode */ + secno acl_size_l; + secno acl_secno; + u16 acl_size_s; u8 acl_anode; u8 zero2; /* history bit count */ - __le32 ea_size_l; /* length of disk-resident ea's */ - __le32 ea_secno; /* first sector of disk-resident ea's*/ - __le16 ea_size_s; /* length of fnode-resident ea's */ + u32 ea_size_l; /* length of disk-resident ea's */ + secno ea_secno; /* first sector of disk-resident ea's*/ + u16 ea_size_s; /* length of fnode-resident ea's */ + +#ifdef __LITTLE_ENDIAN + u8 flag0: 1; + u8 ea_anode: 1; /* 1 -> ea_secno is an anode */ + u8 flag234567: 6; +#else + u8 flag234567: 6; + u8 ea_anode: 1; /* 1 -> ea_secno is an anode */ + u8 flag0: 1; +#endif - __le16 flags; /* bit 1 set -> ea_secno is an anode */ - /* bit 8 set -> directory. first & only extent +#ifdef __LITTLE_ENDIAN + u8 dirflag: 1; /* 1 -> directory. first & only extent + points to dnode. */ + u8 flag9012345: 7; +#else + u8 flag9012345: 7; + u8 dirflag: 1; /* 1 -> directory. first & only extent points to dnode. */ +#endif + struct bplus_header btree; /* b+ tree, 8 extents or 12 subtrees */ union { struct bplus_leaf_node external[8]; struct bplus_internal_node internal[12]; } u; - __le32 file_size; /* file length, bytes */ - __le32 n_needea; /* number of EA's with NEEDEA set */ + u32 file_size; /* file length, bytes */ + u32 n_needea; /* number of EA's with NEEDEA set */ u8 user_id[16]; /* unused */ - __le16 ea_offs; /* offset from start of fnode + u16 ea_offs; /* offset from start of fnode to first fnode-resident ea */ u8 dasd_limit_treshhold; u8 dasd_limit_delta; - __le32 dasd_limit; - __le32 dasd_usage; + u32 dasd_limit; + u32 dasd_usage; u8 ea[316]; /* zero or more EA's, packed together with no alignment padding. (Do not use this name, get here via fnode + ea_offs. I think.) */ }; -static inline bool fnode_in_anode(struct fnode *p) -{ - return (p->flags & FNODE_anode) != 0; -} - -static inline bool fnode_is_dir(struct fnode *p) -{ - return (p->flags & FNODE_dir) != 0; -} - /* anode: 99.44% pure allocation tree */ @@ -489,9 +499,9 @@ static inline bool fnode_is_dir(struct fnode *p) struct anode { - __le32 magic; /* 37e4 0aae */ - __le32 self; /* pointer to this anode */ - __le32 up; /* parent anode or fnode */ + u32 magic; /* 37e4 0aae */ + anode_secno self; /* pointer to this anode */ + secno up; /* parent anode or fnode */ struct bplus_header btree; /* b+tree, 40 extents or 60 subtrees */ union { @@ -499,7 +509,7 @@ struct anode struct bplus_internal_node internal[60]; } u; - __le32 fill[3]; /* unused */ + u32 fill[3]; /* unused */ }; @@ -518,23 +528,32 @@ struct anode run, or in multiple runs. Flags in the fnode tell whether the EA list is immediate, in a single run, or in multiple runs. */ -enum {EA_indirect = 1, EA_anode = 2, EA_needea = 128 }; struct extended_attribute { - u8 flags; /* bit 0 set -> value gives sector number +#ifdef __LITTLE_ENDIAN + u8 indirect: 1; /* 1 -> value gives sector number where real value starts */ - /* bit 1 set -> sector is an anode + u8 anode: 1; /* 1 -> sector is an anode that points to fragmented value */ - /* bit 7 set -> required ea */ + u8 flag23456: 5; + u8 needea: 1; /* required ea */ +#else + u8 needea: 1; /* required ea */ + u8 flag23456: 5; + u8 anode: 1; /* 1 -> sector is an anode + that points to fragmented value */ + u8 indirect: 1; /* 1 -> value gives sector number + where real value starts */ +#endif u8 namelen; /* length of name, bytes */ u8 valuelen_lo; /* length of value, bytes */ u8 valuelen_hi; /* length of value, bytes */ - u8 name[]; + u8 name[0]; /* u8 name[namelen]; ascii attrib name u8 nul; terminating '\0', not counted u8 value[valuelen]; value, arbitrary - if this.flags & 1, valuelen is 8 and the value is + if this.indirect, valuelen is 8 and the value is u32 length; real length of value, bytes secno secno; sector address where it starts if this.anode, the above sector number is the root of an anode tree @@ -542,16 +561,6 @@ struct extended_attribute */ }; -static inline bool ea_indirect(struct extended_attribute *ea) -{ - return ea->flags & EA_indirect; -} - -static inline bool ea_in_anode(struct extended_attribute *ea) -{ - return ea->flags & EA_anode; -} - /* Local Variables: comment-column: 40 diff --git a/trunk/fs/hpfs/hpfs_fn.h b/trunk/fs/hpfs/hpfs_fn.h index c07ef1f1ced6..6d2d5008fa43 100644 --- a/trunk/fs/hpfs/hpfs_fn.h +++ b/trunk/fs/hpfs/hpfs_fn.h @@ -75,7 +75,7 @@ struct hpfs_sb_info { unsigned char *sb_cp_table; /* code page tables: */ /* 128 bytes uppercasing table & */ /* 128 bytes lowercasing table */ - __le32 *sb_bmp_dir; /* main bitmap directory */ + unsigned *sb_bmp_dir; /* main bitmap directory */ unsigned sb_c_bitmap; /* current bitmap */ unsigned sb_max_fwd_alloc; /* max forwad allocation */ int sb_timeshift; @@ -93,7 +93,7 @@ struct quad_buffer_head { static inline dnode_secno de_down_pointer (struct hpfs_dirent *de) { CHKCOND(de->down,("HPFS: de_down_pointer: !de->down\n")); - return le32_to_cpu(*(__le32 *) ((void *) de + le16_to_cpu(de->length) - 4)); + return le32_to_cpu(*(dnode_secno *) ((void *) de + le16_to_cpu(de->length) - 4)); } /* The first dir entry in a dnode */ @@ -141,12 +141,12 @@ static inline struct extended_attribute *next_ea(struct extended_attribute *ea) static inline secno ea_sec(struct extended_attribute *ea) { - return le32_to_cpu(get_unaligned((__le32 *)((char *)ea + 9 + ea->namelen))); + return le32_to_cpu(get_unaligned((secno *)((char *)ea + 9 + ea->namelen))); } static inline secno ea_len(struct extended_attribute *ea) { - return le32_to_cpu(get_unaligned((__le32 *)((char *)ea + 5 + ea->namelen))); + return le32_to_cpu(get_unaligned((secno *)((char *)ea + 5 + ea->namelen))); } static inline char *ea_data(struct extended_attribute *ea) @@ -171,7 +171,7 @@ static inline void copy_de(struct hpfs_dirent *dst, struct hpfs_dirent *src) dst->not_8x3 = n; } -static inline unsigned tstbits(__le32 *bmp, unsigned b, unsigned n) +static inline unsigned tstbits(u32 *bmp, unsigned b, unsigned n) { int i; if ((b >= 0x4000) || (b + n - 1 >= 0x4000)) return n; @@ -268,10 +268,10 @@ void hpfs_evict_inode(struct inode *); /* map.c */ -__le32 *hpfs_map_dnode_bitmap(struct super_block *, struct quad_buffer_head *); -__le32 *hpfs_map_bitmap(struct super_block *, unsigned, struct quad_buffer_head *, char *); +unsigned *hpfs_map_dnode_bitmap(struct super_block *, struct quad_buffer_head *); +unsigned *hpfs_map_bitmap(struct super_block *, unsigned, struct quad_buffer_head *, char *); unsigned char *hpfs_load_code_page(struct super_block *, secno); -__le32 *hpfs_load_bitmap_directory(struct super_block *, secno bmp); +secno *hpfs_load_bitmap_directory(struct super_block *, secno bmp); struct fnode *hpfs_map_fnode(struct super_block *s, ino_t, struct buffer_head **); struct anode *hpfs_map_anode(struct super_block *s, anode_secno, struct buffer_head **); struct dnode *hpfs_map_dnode(struct super_block *s, dnode_secno, struct quad_buffer_head *); diff --git a/trunk/fs/hpfs/inode.c b/trunk/fs/hpfs/inode.c index ed671e0ea784..b43066cbdc6a 100644 --- a/trunk/fs/hpfs/inode.c +++ b/trunk/fs/hpfs/inode.c @@ -110,7 +110,7 @@ void hpfs_read_inode(struct inode *i) } } } - if (fnode_is_dir(fnode)) { + if (fnode->dirflag) { int n_dnodes, n_subdirs; i->i_mode |= S_IFDIR; i->i_op = &hpfs_dir_iops; diff --git a/trunk/fs/hpfs/map.c b/trunk/fs/hpfs/map.c index 4acb19d78359..a790821366a7 100644 --- a/trunk/fs/hpfs/map.c +++ b/trunk/fs/hpfs/map.c @@ -8,12 +8,12 @@ #include "hpfs_fn.h" -__le32 *hpfs_map_dnode_bitmap(struct super_block *s, struct quad_buffer_head *qbh) +unsigned *hpfs_map_dnode_bitmap(struct super_block *s, struct quad_buffer_head *qbh) { return hpfs_map_4sectors(s, hpfs_sb(s)->sb_dmap, qbh, 0); } -__le32 *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block, +unsigned int *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block, struct quad_buffer_head *qbh, char *id) { secno sec; @@ -89,18 +89,18 @@ unsigned char *hpfs_load_code_page(struct super_block *s, secno cps) return cp_table; } -__le32 *hpfs_load_bitmap_directory(struct super_block *s, secno bmp) +secno *hpfs_load_bitmap_directory(struct super_block *s, secno bmp) { struct buffer_head *bh; int n = (hpfs_sb(s)->sb_fs_size + 0x200000 - 1) >> 21; int i; - __le32 *b; + secno *b; if (!(b = kmalloc(n * 512, GFP_KERNEL))) { printk("HPFS: can't allocate memory for bitmap directory\n"); return NULL; } for (i=0;idirflag) { if ((unsigned)fnode->btree.n_used_nodes + (unsigned)fnode->btree.n_free_nodes != - (bp_internal(&fnode->btree) ? 12 : 8)) { + (fnode->btree.internal ? 12 : 8)) { hpfs_error(s, "bad number of nodes in fnode %08lx", (unsigned long)ino); goto bail; } if (le16_to_cpu(fnode->btree.first_free) != - 8 + fnode->btree.n_used_nodes * (bp_internal(&fnode->btree) ? 8 : 12)) { + 8 + fnode->btree.n_used_nodes * (fnode->btree.internal ? 8 : 12)) { hpfs_error(s, "bad first_free pointer in fnode %08lx", (unsigned long)ino); @@ -187,12 +187,12 @@ struct anode *hpfs_map_anode(struct super_block *s, anode_secno ano, struct buff goto bail; } if ((unsigned)anode->btree.n_used_nodes + (unsigned)anode->btree.n_free_nodes != - (bp_internal(&anode->btree) ? 60 : 40)) { + (anode->btree.internal ? 60 : 40)) { hpfs_error(s, "bad number of nodes in anode %08x", ano); goto bail; } if (le16_to_cpu(anode->btree.first_free) != - 8 + anode->btree.n_used_nodes * (bp_internal(&anode->btree) ? 8 : 12)) { + 8 + anode->btree.n_used_nodes * (anode->btree.internal ? 8 : 12)) { hpfs_error(s, "bad first_free pointer in anode %08x", ano); goto bail; } diff --git a/trunk/fs/hpfs/namei.c b/trunk/fs/hpfs/namei.c index 9083ef8af58c..30dd7b10b507 100644 --- a/trunk/fs/hpfs/namei.c +++ b/trunk/fs/hpfs/namei.c @@ -70,7 +70,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) fnode->len = len; memcpy(fnode->name, name, len > 15 ? 15 : len); fnode->up = cpu_to_le32(dir->i_ino); - fnode->flags |= FNODE_dir; + fnode->dirflag = 1; fnode->btree.n_free_nodes = 7; fnode->btree.n_used_nodes = 1; fnode->btree.first_free = cpu_to_le16(0x14); diff --git a/trunk/fs/hpfs/super.c b/trunk/fs/hpfs/super.c index 706a12c083ea..54f6eccb79d9 100644 --- a/trunk/fs/hpfs/super.c +++ b/trunk/fs/hpfs/super.c @@ -572,7 +572,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) mark_buffer_dirty(bh2); } - if (spareblock->hotfixes_used || spareblock->n_spares_used) { + if (le32_to_cpu(spareblock->hotfixes_used) || le32_to_cpu(spareblock->n_spares_used)) { if (errs >= 2) { printk("HPFS: Hotfixes not supported here, try chkdsk\n"); mark_dirty(s, 0); @@ -645,7 +645,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) root->i_mtime.tv_nsec = 0; root->i_ctime.tv_sec = local_to_gmt(s, le32_to_cpu(de->creation_date)); root->i_ctime.tv_nsec = 0; - hpfs_i(root)->i_ea_size = le32_to_cpu(de->ea_size); + hpfs_i(root)->i_ea_size = le16_to_cpu(de->ea_size); hpfs_i(root)->i_parent_dir = root->i_ino; if (root->i_size == -1) root->i_size = 2048; diff --git a/trunk/fs/inode.c b/trunk/fs/inode.c index c99163b1b310..c474c1d7062b 100644 --- a/trunk/fs/inode.c +++ b/trunk/fs/inode.c @@ -1487,30 +1487,10 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, return 0; } -/* - * This does the actual work of updating an inodes time or version. Must have - * had called mnt_want_write() before calling this. - */ -static int update_time(struct inode *inode, struct timespec *time, int flags) -{ - if (inode->i_op->update_time) - return inode->i_op->update_time(inode, time, flags); - - if (flags & S_ATIME) - inode->i_atime = *time; - if (flags & S_VERSION) - inode_inc_iversion(inode); - if (flags & S_CTIME) - inode->i_ctime = *time; - if (flags & S_MTIME) - inode->i_mtime = *time; - mark_inode_dirty_sync(inode); - return 0; -} - /** * touch_atime - update the access time - * @path: the &struct path to update + * @mnt: mount the inode is accessed on + * @dentry: dentry accessed * * Update the accessed time on an inode and mark it for writeback. * This function automatically handles read only file systems and media, @@ -1545,83 +1525,12 @@ void touch_atime(struct path *path) if (mnt_want_write(mnt)) return; - /* - * File systems can error out when updating inodes if they need to - * allocate new space to modify an inode (such is the case for - * Btrfs), but since we touch atime while walking down the path we - * really don't care if we failed to update the atime of the file, - * so just ignore the return value. - */ - update_time(inode, &now, S_ATIME); + inode->i_atime = now; + mark_inode_dirty_sync(inode); mnt_drop_write(mnt); } EXPORT_SYMBOL(touch_atime); -/* - * The logic we want is - * - * if suid or (sgid and xgrp) - * remove privs - */ -int should_remove_suid(struct dentry *dentry) -{ - umode_t mode = dentry->d_inode->i_mode; - int kill = 0; - - /* suid always must be killed */ - if (unlikely(mode & S_ISUID)) - kill = ATTR_KILL_SUID; - - /* - * sgid without any exec bits is just a mandatory locking mark; leave - * it alone. If some exec bits are set, it's a real sgid; kill it. - */ - if (unlikely((mode & S_ISGID) && (mode & S_IXGRP))) - kill |= ATTR_KILL_SGID; - - if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode))) - return kill; - - return 0; -} -EXPORT_SYMBOL(should_remove_suid); - -static int __remove_suid(struct dentry *dentry, int kill) -{ - struct iattr newattrs; - - newattrs.ia_valid = ATTR_FORCE | kill; - return notify_change(dentry, &newattrs); -} - -int file_remove_suid(struct file *file) -{ - struct dentry *dentry = file->f_path.dentry; - struct inode *inode = dentry->d_inode; - int killsuid; - int killpriv; - int error = 0; - - /* Fast path for nothing security related */ - if (IS_NOSEC(inode)) - return 0; - - killsuid = should_remove_suid(dentry); - killpriv = security_inode_need_killpriv(dentry); - - if (killpriv < 0) - return killpriv; - if (killpriv) - error = security_inode_killpriv(dentry); - if (!error && killsuid) - error = __remove_suid(dentry, killsuid); - if (!error && (inode->i_sb->s_flags & MS_NOSEC)) - inode->i_flags |= S_NOSEC; - - return error; -} -EXPORT_SYMBOL(file_remove_suid); - /** * file_update_time - update mtime and ctime time * @file: file accessed @@ -1631,20 +1540,18 @@ EXPORT_SYMBOL(file_remove_suid); * usage in the file write path of filesystems, and filesystems may * choose to explicitly ignore update via this function with the * S_NOCMTIME inode flag, e.g. for network filesystem where these - * timestamps are handled by the server. This can return an error for - * file systems who need to allocate space in order to update an inode. + * timestamps are handled by the server. */ -int file_update_time(struct file *file) +void file_update_time(struct file *file) { struct inode *inode = file->f_path.dentry->d_inode; struct timespec now; - int sync_it = 0; - int ret; + enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0; /* First try to exhaust all avenues to not sync */ if (IS_NOCMTIME(inode)) - return 0; + return; now = current_fs_time(inode->i_sb); if (!timespec_equal(&inode->i_mtime, &now)) @@ -1657,16 +1564,21 @@ int file_update_time(struct file *file) sync_it |= S_VERSION; if (!sync_it) - return 0; + return; /* Finally allowed to write? Takes lock. */ if (mnt_want_write_file(file)) - return 0; + return; - ret = update_time(inode, &now, sync_it); + /* Only change inode inside the lock region */ + if (sync_it & S_VERSION) + inode_inc_iversion(inode); + if (sync_it & S_CTIME) + inode->i_ctime = now; + if (sync_it & S_MTIME) + inode->i_mtime = now; + mark_inode_dirty_sync(inode); mnt_drop_write_file(file); - - return ret; } EXPORT_SYMBOL(file_update_time); diff --git a/trunk/fs/internal.h b/trunk/fs/internal.h index 18bc216ea09d..9962c59ba280 100644 --- a/trunk/fs/internal.h +++ b/trunk/fs/internal.h @@ -56,7 +56,7 @@ extern int sb_prepare_remount_readonly(struct super_block *); extern void __init mnt_init(void); -extern struct lglock vfsmount_lock; +DECLARE_BRLOCK(vfsmount_lock); /* @@ -100,7 +100,6 @@ extern struct file *do_file_open_root(struct dentry *, struct vfsmount *, extern long do_handle_open(int mountdirfd, struct file_handle __user *ufh, int open_flag); -extern int open_check_o_direct(struct file *f); /* * inode.c diff --git a/trunk/fs/isofs/export.c b/trunk/fs/isofs/export.c index aa4356d09eee..dd4687ff30d0 100644 --- a/trunk/fs/isofs/export.c +++ b/trunk/fs/isofs/export.c @@ -107,11 +107,12 @@ static struct dentry *isofs_export_get_parent(struct dentry *child) } static int -isofs_export_encode_fh(struct inode *inode, +isofs_export_encode_fh(struct dentry *dentry, __u32 *fh32, int *max_len, - struct inode *parent) + int connectable) { + struct inode * inode = dentry->d_inode; struct iso_inode_info * ei = ISOFS_I(inode); int len = *max_len; int type = 1; @@ -123,7 +124,7 @@ isofs_export_encode_fh(struct inode *inode, * offset of the inode and the upper 16 bits of fh32[1] to * hold the offset of the parent. */ - if (parent && (len < 5)) { + if (connectable && (len < 5)) { *max_len = 5; return 255; } else if (len < 3) { @@ -135,12 +136,16 @@ isofs_export_encode_fh(struct inode *inode, fh32[0] = ei->i_iget5_block; fh16[2] = (__u16)ei->i_iget5_offset; /* fh16 [sic] */ fh32[2] = inode->i_generation; - if (parent) { + if (connectable && !S_ISDIR(inode->i_mode)) { + struct inode *parent; struct iso_inode_info *eparent; + spin_lock(&dentry->d_lock); + parent = dentry->d_parent->d_inode; eparent = ISOFS_I(parent); fh32[3] = eparent->i_iget5_block; fh16[3] = (__u16)eparent->i_iget5_offset; /* fh16 [sic] */ fh32[4] = parent->i_generation; + spin_unlock(&dentry->d_lock); len = 5; type = 2; } diff --git a/trunk/fs/jbd2/Kconfig b/trunk/fs/jbd2/Kconfig index 69a48c2944da..f32f346f4b0a 100644 --- a/trunk/fs/jbd2/Kconfig +++ b/trunk/fs/jbd2/Kconfig @@ -1,8 +1,6 @@ config JBD2 tristate select CRC32 - select CRYPTO - select CRYPTO_CRC32C help This is a generic journaling layer for block devices that support both 32-bit and 64-bit block numbers. It is currently used by diff --git a/trunk/fs/jbd2/commit.c b/trunk/fs/jbd2/commit.c index 216f4299f65e..840f70f50792 100644 --- a/trunk/fs/jbd2/commit.c +++ b/trunk/fs/jbd2/commit.c @@ -85,24 +85,6 @@ static void release_buffer_page(struct buffer_head *bh) __brelse(bh); } -static void jbd2_commit_block_csum_set(journal_t *j, - struct journal_head *descriptor) -{ - struct commit_header *h; - __u32 csum; - - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) - return; - - h = (struct commit_header *)(jh2bh(descriptor)->b_data); - h->h_chksum_type = 0; - h->h_chksum_size = 0; - h->h_chksum[0] = 0; - csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, - j->j_blocksize); - h->h_chksum[0] = cpu_to_be32(csum); -} - /* * Done it all: now submit the commit record. We should have * cleaned up our previous buffers by now, so if we are in abort @@ -146,7 +128,6 @@ static int journal_submit_commit_record(journal_t *journal, tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE; tmp->h_chksum[0] = cpu_to_be32(crc32_sum); } - jbd2_commit_block_csum_set(journal, descriptor); JBUFFER_TRACE(descriptor, "submit commit block"); lock_buffer(bh); @@ -320,44 +301,6 @@ static void write_tag_block(int tag_bytes, journal_block_tag_t *tag, tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1); } -static void jbd2_descr_block_csum_set(journal_t *j, - struct journal_head *descriptor) -{ - struct jbd2_journal_block_tail *tail; - __u32 csum; - - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) - return; - - tail = (struct jbd2_journal_block_tail *) - (jh2bh(descriptor)->b_data + j->j_blocksize - - sizeof(struct jbd2_journal_block_tail)); - tail->t_checksum = 0; - csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, - j->j_blocksize); - tail->t_checksum = cpu_to_be32(csum); -} - -static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, - struct buffer_head *bh, __u32 sequence) -{ - struct page *page = bh->b_page; - __u8 *addr; - __u32 csum; - - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) - return; - - sequence = cpu_to_be32(sequence); - addr = kmap_atomic(page, KM_USER0); - csum = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, - sizeof(sequence)); - csum = jbd2_chksum(j, csum, addr + offset_in_page(bh->b_data), - bh->b_size); - kunmap_atomic(addr, KM_USER0); - - tag->t_checksum = cpu_to_be32(csum); -} /* * jbd2_journal_commit_transaction * @@ -391,10 +334,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) unsigned long first_block; tid_t first_tid; int update_tail; - int csum_size = 0; - - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) - csum_size = sizeof(struct jbd2_journal_block_tail); /* * First job: lock down the current transaction and wait for @@ -688,9 +627,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) tag = (journal_block_tag_t *) tagp; write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); - tag->t_flags = cpu_to_be16(tag_flag); - jbd2_block_tag_csum_set(journal, tag, jh2bh(new_jh), - commit_transaction->t_tid); + tag->t_flags = cpu_to_be32(tag_flag); tagp += tag_bytes; space_left -= tag_bytes; @@ -706,7 +643,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) if (bufs == journal->j_wbufsize || commit_transaction->t_buffers == NULL || - space_left < tag_bytes + 16 + csum_size) { + space_left < tag_bytes + 16) { jbd_debug(4, "JBD2: Submit %d IOs\n", bufs); @@ -714,9 +651,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) submitting the IOs. "tag" still points to the last tag we set up. */ - tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG); + tag->t_flags |= cpu_to_be32(JBD2_FLAG_LAST_TAG); - jbd2_descr_block_csum_set(journal, descriptor); start_journal_io: for (i = 0; i < bufs; i++) { struct buffer_head *bh = wbuf[i]; diff --git a/trunk/fs/jbd2/journal.c b/trunk/fs/jbd2/journal.c index e9a3c4c85594..1afb701622b0 100644 --- a/trunk/fs/jbd2/journal.c +++ b/trunk/fs/jbd2/journal.c @@ -97,43 +97,6 @@ EXPORT_SYMBOL(jbd2_inode_cache); static void __journal_abort_soft (journal_t *journal, int errno); static int jbd2_journal_create_slab(size_t slab_size); -/* Checksumming functions */ -int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) -{ - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) - return 1; - - return sb->s_checksum_type == JBD2_CRC32C_CHKSUM; -} - -static __u32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) -{ - __u32 csum, old_csum; - - old_csum = sb->s_checksum; - sb->s_checksum = 0; - csum = jbd2_chksum(j, ~0, (char *)sb, sizeof(journal_superblock_t)); - sb->s_checksum = old_csum; - - return cpu_to_be32(csum); -} - -int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) -{ - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) - return 1; - - return sb->s_checksum == jbd2_superblock_csum(j, sb); -} - -void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) -{ - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) - return; - - sb->s_checksum = jbd2_superblock_csum(j, sb); -} - /* * Helper function used to manage commit timeouts */ @@ -1385,7 +1348,6 @@ static void jbd2_journal_update_sb_errno(journal_t *journal) jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", journal->j_errno); sb->s_errno = cpu_to_be32(journal->j_errno); - jbd2_superblock_csum_set(journal, sb); read_unlock(&journal->j_state_lock); jbd2_write_superblock(journal, WRITE_SYNC); @@ -1414,9 +1376,6 @@ static int journal_get_superblock(journal_t *journal) } } - if (buffer_verified(bh)) - return 0; - sb = journal->j_superblock; err = -EINVAL; @@ -1454,43 +1413,6 @@ static int journal_get_superblock(journal_t *journal) goto out; } - if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) && - JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { - /* Can't have checksum v1 and v2 on at the same time! */ - printk(KERN_ERR "JBD: Can't enable checksumming v1 and v2 " - "at the same time!\n"); - goto out; - } - - if (!jbd2_verify_csum_type(journal, sb)) { - printk(KERN_ERR "JBD: Unknown checksum type\n"); - goto out; - } - - /* Load the checksum driver */ - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { - journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); - if (IS_ERR(journal->j_chksum_driver)) { - printk(KERN_ERR "JBD: Cannot load crc32c driver.\n"); - err = PTR_ERR(journal->j_chksum_driver); - journal->j_chksum_driver = NULL; - goto out; - } - } - - /* Check superblock checksum */ - if (!jbd2_superblock_csum_verify(journal, sb)) { - printk(KERN_ERR "JBD: journal checksum error\n"); - goto out; - } - - /* Precompute checksum seed for all metadata */ - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) - journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, - sizeof(sb->s_uuid)); - - set_buffer_verified(bh); - return 0; out: @@ -1642,8 +1564,6 @@ int jbd2_journal_destroy(journal_t *journal) iput(journal->j_inode); if (journal->j_revoke) jbd2_journal_destroy_revoke(journal); - if (journal->j_chksum_driver) - crypto_free_shash(journal->j_chksum_driver); kfree(journal->j_wbuf); kfree(journal); @@ -1733,10 +1653,6 @@ int jbd2_journal_check_available_features (journal_t *journal, unsigned long com int jbd2_journal_set_features (journal_t *journal, unsigned long compat, unsigned long ro, unsigned long incompat) { -#define INCOMPAT_FEATURE_ON(f) \ - ((incompat & (f)) && !(sb->s_feature_incompat & cpu_to_be32(f))) -#define COMPAT_FEATURE_ON(f) \ - ((compat & (f)) && !(sb->s_feature_compat & cpu_to_be32(f))) journal_superblock_t *sb; if (jbd2_journal_check_used_features(journal, compat, ro, incompat)) @@ -1745,54 +1661,16 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, if (!jbd2_journal_check_available_features(journal, compat, ro, incompat)) return 0; - /* Asking for checksumming v2 and v1? Only give them v2. */ - if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2 && - compat & JBD2_FEATURE_COMPAT_CHECKSUM) - compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM; - jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n", compat, ro, incompat); sb = journal->j_superblock; - /* If enabling v2 checksums, update superblock */ - if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V2)) { - sb->s_checksum_type = JBD2_CRC32C_CHKSUM; - sb->s_feature_compat &= - ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM); - - /* Load the checksum driver */ - if (journal->j_chksum_driver == NULL) { - journal->j_chksum_driver = crypto_alloc_shash("crc32c", - 0, 0); - if (IS_ERR(journal->j_chksum_driver)) { - printk(KERN_ERR "JBD: Cannot load crc32c " - "driver.\n"); - journal->j_chksum_driver = NULL; - return 0; - } - } - - /* Precompute checksum seed for all metadata */ - if (JBD2_HAS_INCOMPAT_FEATURE(journal, - JBD2_FEATURE_INCOMPAT_CSUM_V2)) - journal->j_csum_seed = jbd2_chksum(journal, ~0, - sb->s_uuid, - sizeof(sb->s_uuid)); - } - - /* If enabling v1 checksums, downgrade superblock */ - if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM)) - sb->s_feature_incompat &= - ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2); - sb->s_feature_compat |= cpu_to_be32(compat); sb->s_feature_ro_compat |= cpu_to_be32(ro); sb->s_feature_incompat |= cpu_to_be32(incompat); return 1; -#undef COMPAT_FEATURE_ON -#undef INCOMPAT_FEATURE_ON } /* @@ -2097,16 +1975,10 @@ int jbd2_journal_blocks_per_page(struct inode *inode) */ size_t journal_tag_bytes(journal_t *journal) { - journal_block_tag_t tag; - size_t x = 0; - - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) - x += sizeof(tag.t_checksum); - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) - return x + JBD2_TAG_SIZE64; + return JBD2_TAG_SIZE64; else - return x + JBD2_TAG_SIZE32; + return JBD2_TAG_SIZE32; } /* diff --git a/trunk/fs/jbd2/recovery.c b/trunk/fs/jbd2/recovery.c index 0131e4362534..c1a03354a22f 100644 --- a/trunk/fs/jbd2/recovery.c +++ b/trunk/fs/jbd2/recovery.c @@ -174,25 +174,6 @@ static int jread(struct buffer_head **bhp, journal_t *journal, return 0; } -static int jbd2_descr_block_csum_verify(journal_t *j, - void *buf) -{ - struct jbd2_journal_block_tail *tail; - __u32 provided, calculated; - - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) - return 1; - - tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize - - sizeof(struct jbd2_journal_block_tail)); - provided = tail->t_checksum; - tail->t_checksum = 0; - calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); - tail->t_checksum = provided; - - provided = be32_to_cpu(provided); - return provided == calculated; -} /* * Count the number of in-use tags in a journal descriptor block. @@ -205,9 +186,6 @@ static int count_tags(journal_t *journal, struct buffer_head *bh) int nr = 0, size = journal->j_blocksize; int tag_bytes = journal_tag_bytes(journal); - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) - size -= sizeof(struct jbd2_journal_block_tail); - tagp = &bh->b_data[sizeof(journal_header_t)]; while ((tagp - bh->b_data + tag_bytes) <= size) { @@ -215,10 +193,10 @@ static int count_tags(journal_t *journal, struct buffer_head *bh) nr++; tagp += tag_bytes; - if (!(tag->t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID))) + if (!(tag->t_flags & cpu_to_be32(JBD2_FLAG_SAME_UUID))) tagp += 16; - if (tag->t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG)) + if (tag->t_flags & cpu_to_be32(JBD2_FLAG_LAST_TAG)) break; } @@ -375,41 +353,6 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh, return 0; } -static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) -{ - struct commit_header *h; - __u32 provided, calculated; - - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) - return 1; - - h = buf; - provided = h->h_chksum[0]; - h->h_chksum[0] = 0; - calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); - h->h_chksum[0] = provided; - - provided = be32_to_cpu(provided); - return provided == calculated; -} - -static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, - void *buf, __u32 sequence) -{ - __u32 provided, calculated; - - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) - return 1; - - sequence = cpu_to_be32(sequence); - calculated = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, - sizeof(sequence)); - calculated = jbd2_chksum(j, calculated, buf, j->j_blocksize); - provided = be32_to_cpu(tag->t_checksum); - - return provided == cpu_to_be32(calculated); -} - static int do_one_pass(journal_t *journal, struct recovery_info *info, enum passtype pass) { @@ -423,7 +366,6 @@ static int do_one_pass(journal_t *journal, int blocktype; int tag_bytes = journal_tag_bytes(journal); __u32 crc32_sum = ~0; /* Transactional Checksums */ - int descr_csum_size = 0; /* * First thing is to establish what we expect to find in the log @@ -509,18 +451,6 @@ static int do_one_pass(journal_t *journal, switch(blocktype) { case JBD2_DESCRIPTOR_BLOCK: - /* Verify checksum first */ - if (JBD2_HAS_INCOMPAT_FEATURE(journal, - JBD2_FEATURE_INCOMPAT_CSUM_V2)) - descr_csum_size = - sizeof(struct jbd2_journal_block_tail); - if (descr_csum_size > 0 && - !jbd2_descr_block_csum_verify(journal, - bh->b_data)) { - err = -EIO; - goto failed; - } - /* If it is a valid descriptor block, replay it * in pass REPLAY; if journal_checksums enabled, then * calculate checksums in PASS_SCAN, otherwise, @@ -551,11 +481,11 @@ static int do_one_pass(journal_t *journal, tagp = &bh->b_data[sizeof(journal_header_t)]; while ((tagp - bh->b_data + tag_bytes) - <= journal->j_blocksize - descr_csum_size) { + <= journal->j_blocksize) { unsigned long io_block; tag = (journal_block_tag_t *) tagp; - flags = be16_to_cpu(tag->t_flags); + flags = be32_to_cpu(tag->t_flags); io_block = next_log_block++; wrap(journal, next_log_block); @@ -586,19 +516,6 @@ static int do_one_pass(journal_t *journal, goto skip_write; } - /* Look for block corruption */ - if (!jbd2_block_tag_csum_verify( - journal, tag, obh->b_data, - be32_to_cpu(tmp->h_sequence))) { - brelse(obh); - success = -EIO; - printk(KERN_ERR "JBD: Invalid " - "checksum recovering " - "block %llu in log\n", - blocknr); - continue; - } - /* Find a buffer for the new * data being restored */ nbh = __getblk(journal->j_fs_dev, @@ -733,19 +650,6 @@ static int do_one_pass(journal_t *journal, } crc32_sum = ~0; } - if (pass == PASS_SCAN && - !jbd2_commit_block_csum_verify(journal, - bh->b_data)) { - info->end_transaction = next_commit_ID; - - if (!JBD2_HAS_INCOMPAT_FEATURE(journal, - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { - journal->j_failed_commit = - next_commit_ID; - brelse(bh); - break; - } - } brelse(bh); next_commit_ID++; continue; @@ -802,25 +706,6 @@ static int do_one_pass(journal_t *journal, return err; } -static int jbd2_revoke_block_csum_verify(journal_t *j, - void *buf) -{ - struct jbd2_journal_revoke_tail *tail; - __u32 provided, calculated; - - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) - return 1; - - tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize - - sizeof(struct jbd2_journal_revoke_tail)); - provided = tail->r_checksum; - tail->r_checksum = 0; - calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); - tail->r_checksum = provided; - - provided = be32_to_cpu(provided); - return provided == calculated; -} /* Scan a revoke record, marking all blocks mentioned as revoked. */ @@ -835,9 +720,6 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, offset = sizeof(jbd2_journal_revoke_header_t); max = be32_to_cpu(header->r_count); - if (!jbd2_revoke_block_csum_verify(journal, header)) - return -EINVAL; - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) record_len = 8; diff --git a/trunk/fs/jbd2/revoke.c b/trunk/fs/jbd2/revoke.c index f30b80b4ce8b..6973705d6a3d 100644 --- a/trunk/fs/jbd2/revoke.c +++ b/trunk/fs/jbd2/revoke.c @@ -578,7 +578,6 @@ static void write_one_revoke_record(journal_t *journal, struct jbd2_revoke_record_s *record, int write_op) { - int csum_size = 0; struct journal_head *descriptor; int offset; journal_header_t *header; @@ -593,13 +592,9 @@ static void write_one_revoke_record(journal_t *journal, descriptor = *descriptorp; offset = *offsetp; - /* Do we need to leave space at the end for a checksum? */ - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) - csum_size = sizeof(struct jbd2_journal_revoke_tail); - /* Make sure we have a descriptor with space left for the record */ if (descriptor) { - if (offset >= journal->j_blocksize - csum_size) { + if (offset == journal->j_blocksize) { flush_descriptor(journal, descriptor, offset, write_op); descriptor = NULL; } @@ -636,24 +631,6 @@ static void write_one_revoke_record(journal_t *journal, *offsetp = offset; } -static void jbd2_revoke_csum_set(journal_t *j, - struct journal_head *descriptor) -{ - struct jbd2_journal_revoke_tail *tail; - __u32 csum; - - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) - return; - - tail = (struct jbd2_journal_revoke_tail *) - (jh2bh(descriptor)->b_data + j->j_blocksize - - sizeof(struct jbd2_journal_revoke_tail)); - tail->r_checksum = 0; - csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, - j->j_blocksize); - tail->r_checksum = cpu_to_be32(csum); -} - /* * Flush a revoke descriptor out to the journal. If we are aborting, * this is a noop; otherwise we are generating a buffer which needs to @@ -675,8 +652,6 @@ static void flush_descriptor(journal_t *journal, header = (jbd2_journal_revoke_header_t *) jh2bh(descriptor)->b_data; header->r_count = cpu_to_be32(offset); - jbd2_revoke_csum_set(journal, descriptor); - set_buffer_jwrite(bh); BUFFER_TRACE(bh, "write"); set_buffer_dirty(bh); diff --git a/trunk/fs/jbd2/transaction.c b/trunk/fs/jbd2/transaction.c index fb1ab9533b67..ddcd3549c6c2 100644 --- a/trunk/fs/jbd2/transaction.c +++ b/trunk/fs/jbd2/transaction.c @@ -162,8 +162,8 @@ static int start_this_handle(journal_t *journal, handle_t *handle, alloc_transaction: if (!journal->j_running_transaction) { - new_transaction = kmem_cache_zalloc(transaction_cache, - gfp_mask); + new_transaction = kmem_cache_alloc(transaction_cache, + gfp_mask | __GFP_ZERO); if (!new_transaction) { /* * If __GFP_FS is not present, then we may be diff --git a/trunk/fs/jffs2/jffs2_fs_sb.h b/trunk/fs/jffs2/jffs2_fs_sb.h index 44dca1f041c5..55a0c1dceadf 100644 --- a/trunk/fs/jffs2/jffs2_fs_sb.h +++ b/trunk/fs/jffs2/jffs2_fs_sb.h @@ -126,10 +126,6 @@ struct jffs2_sb_info { struct jffs2_inodirty *wbuf_inodes; struct rw_semaphore wbuf_sem; /* Protects the write buffer */ - struct delayed_work wbuf_dwork; /* write-buffer write-out work */ - int wbuf_queued; /* non-zero delayed work is queued */ - spinlock_t wbuf_dwork_lock; /* protects wbuf_dwork and and wbuf_queued */ - unsigned char *oobbuf; int oobavail; /* How many bytes are available for JFFS2 in OOB */ #endif diff --git a/trunk/fs/jffs2/os-linux.h b/trunk/fs/jffs2/os-linux.h index bcd983d7e7f9..1cd3aec9d9ae 100644 --- a/trunk/fs/jffs2/os-linux.h +++ b/trunk/fs/jffs2/os-linux.h @@ -95,7 +95,6 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f) #define jffs2_ubivol(c) (0) #define jffs2_ubivol_setup(c) (0) #define jffs2_ubivol_cleanup(c) do {} while (0) -#define jffs2_dirty_trigger(c) do {} while (0) #else /* NAND and/or ECC'd NOR support present */ @@ -136,10 +135,14 @@ void jffs2_ubivol_cleanup(struct jffs2_sb_info *c); #define jffs2_nor_wbuf_flash(c) (c->mtd->type == MTD_NORFLASH && ! (c->mtd->flags & MTD_BIT_WRITEABLE)) int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c); void jffs2_nor_wbuf_flash_cleanup(struct jffs2_sb_info *c); -void jffs2_dirty_trigger(struct jffs2_sb_info *c); #endif /* WRITEBUFFER */ +static inline void jffs2_dirty_trigger(struct jffs2_sb_info *c) +{ + OFNI_BS_2SFFJ(c)->s_dirt = 1; +} + /* background.c */ int jffs2_start_garbage_collect_thread(struct jffs2_sb_info *c); void jffs2_stop_garbage_collect_thread(struct jffs2_sb_info *c); diff --git a/trunk/fs/jffs2/super.c b/trunk/fs/jffs2/super.c index bc586f204228..f9916f312bd8 100644 --- a/trunk/fs/jffs2/super.c +++ b/trunk/fs/jffs2/super.c @@ -63,6 +63,21 @@ static void jffs2_i_init_once(void *foo) inode_init_once(&f->vfs_inode); } +static void jffs2_write_super(struct super_block *sb) +{ + struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); + + lock_super(sb); + sb->s_dirt = 0; + + if (!(sb->s_flags & MS_RDONLY)) { + jffs2_dbg(1, "%s()\n", __func__); + jffs2_flush_wbuf_gc(c, 0); + } + + unlock_super(sb); +} + static const char *jffs2_compr_name(unsigned int compr) { switch (compr) { @@ -98,6 +113,8 @@ static int jffs2_sync_fs(struct super_block *sb, int wait) { struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); + jffs2_write_super(sb); + mutex_lock(&c->alloc_sem); jffs2_flush_wbuf_pad(c); mutex_unlock(&c->alloc_sem); @@ -234,6 +251,7 @@ static const struct super_operations jffs2_super_operations = .alloc_inode = jffs2_alloc_inode, .destroy_inode =jffs2_destroy_inode, .put_super = jffs2_put_super, + .write_super = jffs2_write_super, .statfs = jffs2_statfs, .remount_fs = jffs2_remount_fs, .evict_inode = jffs2_evict_inode, @@ -301,6 +319,9 @@ static void jffs2_put_super (struct super_block *sb) jffs2_dbg(2, "%s()\n", __func__); + if (sb->s_dirt) + jffs2_write_super(sb); + mutex_lock(&c->alloc_sem); jffs2_flush_wbuf_pad(c); mutex_unlock(&c->alloc_sem); diff --git a/trunk/fs/jffs2/wbuf.c b/trunk/fs/jffs2/wbuf.c index 6f4529d3697f..74d9be19df3f 100644 --- a/trunk/fs/jffs2/wbuf.c +++ b/trunk/fs/jffs2/wbuf.c @@ -20,7 +20,6 @@ #include #include #include -#include #include "nodelist.h" @@ -86,7 +85,7 @@ static void jffs2_wbuf_dirties_inode(struct jffs2_sb_info *c, uint32_t ino) { struct jffs2_inodirty *new; - /* Schedule delayed write-buffer write-out */ + /* Mark the superblock dirty so that kupdated will flush... */ jffs2_dirty_trigger(c); if (jffs2_wbuf_pending_for_ino(c, ino)) @@ -1149,47 +1148,6 @@ int jffs2_write_nand_badblock(struct jffs2_sb_info *c, struct jffs2_eraseblock * return 1; } -static struct jffs2_sb_info *work_to_sb(struct work_struct *work) -{ - struct delayed_work *dwork; - - dwork = container_of(work, struct delayed_work, work); - return container_of(dwork, struct jffs2_sb_info, wbuf_dwork); -} - -static void delayed_wbuf_sync(struct work_struct *work) -{ - struct jffs2_sb_info *c = work_to_sb(work); - struct super_block *sb = OFNI_BS_2SFFJ(c); - - spin_lock(&c->wbuf_dwork_lock); - c->wbuf_queued = 0; - spin_unlock(&c->wbuf_dwork_lock); - - if (!(sb->s_flags & MS_RDONLY)) { - jffs2_dbg(1, "%s()\n", __func__); - jffs2_flush_wbuf_gc(c, 0); - } -} - -void jffs2_dirty_trigger(struct jffs2_sb_info *c) -{ - struct super_block *sb = OFNI_BS_2SFFJ(c); - unsigned long delay; - - if (sb->s_flags & MS_RDONLY) - return; - - spin_lock(&c->wbuf_dwork_lock); - if (!c->wbuf_queued) { - jffs2_dbg(1, "%s()\n", __func__); - delay = msecs_to_jiffies(dirty_writeback_interval * 10); - queue_delayed_work(system_long_wq, &c->wbuf_dwork, delay); - c->wbuf_queued = 1; - } - spin_unlock(&c->wbuf_dwork_lock); -} - int jffs2_nand_flash_setup(struct jffs2_sb_info *c) { struct nand_ecclayout *oinfo = c->mtd->ecclayout; @@ -1211,8 +1169,6 @@ int jffs2_nand_flash_setup(struct jffs2_sb_info *c) /* Initialise write buffer */ init_rwsem(&c->wbuf_sem); - spin_lock_init(&c->wbuf_dwork_lock); - INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); c->wbuf_pagesize = c->mtd->writesize; c->wbuf_ofs = 0xFFFFFFFF; @@ -1251,8 +1207,8 @@ int jffs2_dataflash_setup(struct jffs2_sb_info *c) { /* Initialize write buffer */ init_rwsem(&c->wbuf_sem); - spin_lock_init(&c->wbuf_dwork_lock); - INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); + + c->wbuf_pagesize = c->mtd->erasesize; /* Find a suitable c->sector_size @@ -1311,9 +1267,6 @@ int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c) { /* Initialize write buffer */ init_rwsem(&c->wbuf_sem); - spin_lock_init(&c->wbuf_dwork_lock); - INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); - c->wbuf_pagesize = c->mtd->writesize; c->wbuf_ofs = 0xFFFFFFFF; @@ -1346,8 +1299,6 @@ int jffs2_ubivol_setup(struct jffs2_sb_info *c) { return 0; init_rwsem(&c->wbuf_sem); - spin_lock_init(&c->wbuf_dwork_lock); - INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); c->wbuf_pagesize = c->mtd->writesize; c->wbuf_ofs = 0xFFFFFFFF; diff --git a/trunk/fs/lockd/svc.c b/trunk/fs/lockd/svc.c index 80938fda67e0..1ead0750cdbb 100644 --- a/trunk/fs/lockd/svc.c +++ b/trunk/fs/lockd/svc.c @@ -251,40 +251,39 @@ static int make_socks(struct svc_serv *serv, struct net *net) return err; } -static int lockd_up_net(struct svc_serv *serv, struct net *net) +static int lockd_up_net(struct net *net) { struct lockd_net *ln = net_generic(net, lockd_net_id); + struct svc_serv *serv = nlmsvc_rqst->rq_server; int error; - if (ln->nlmsvc_users++) + if (ln->nlmsvc_users) return 0; - error = svc_bind(serv, net); + error = svc_rpcb_setup(serv, net); if (error) - goto err_bind; + goto err_rpcb; error = make_socks(serv, net); if (error < 0) goto err_socks; - dprintk("lockd_up_net: per-net data created; net=%p\n", net); return 0; err_socks: svc_rpcb_cleanup(serv, net); -err_bind: - ln->nlmsvc_users--; +err_rpcb: return error; } -static void lockd_down_net(struct svc_serv *serv, struct net *net) +static void lockd_down_net(struct net *net) { struct lockd_net *ln = net_generic(net, lockd_net_id); + struct svc_serv *serv = nlmsvc_rqst->rq_server; if (ln->nlmsvc_users) { if (--ln->nlmsvc_users == 0) { nlm_shutdown_hosts_net(net); svc_shutdown_net(serv, net); - dprintk("lockd_down_net: per-net data destroyed; net=%p\n", net); } } else { printk(KERN_ERR "lockd_down_net: no users! task=%p, net=%p\n", @@ -293,60 +292,21 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net) } } -static int lockd_start_svc(struct svc_serv *serv) -{ - int error; - - if (nlmsvc_rqst) - return 0; - - /* - * Create the kernel thread and wait for it to start. - */ - nlmsvc_rqst = svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE); - if (IS_ERR(nlmsvc_rqst)) { - error = PTR_ERR(nlmsvc_rqst); - printk(KERN_WARNING - "lockd_up: svc_rqst allocation failed, error=%d\n", - error); - goto out_rqst; - } - - svc_sock_update_bufs(serv); - serv->sv_maxconn = nlm_max_connections; - - nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, serv->sv_name); - if (IS_ERR(nlmsvc_task)) { - error = PTR_ERR(nlmsvc_task); - printk(KERN_WARNING - "lockd_up: kthread_run failed, error=%d\n", error); - goto out_task; - } - dprintk("lockd_up: service started\n"); - return 0; - -out_task: - svc_exit_thread(nlmsvc_rqst); - nlmsvc_task = NULL; -out_rqst: - nlmsvc_rqst = NULL; - return error; -} - -static struct svc_serv *lockd_create_svc(void) +/* + * Bring up the lockd process if it's not already up. + */ +int lockd_up(struct net *net) { struct svc_serv *serv; + int error = 0; + mutex_lock(&nlmsvc_mutex); /* * Check whether we're already up and running. */ if (nlmsvc_rqst) { - /* - * Note: increase service usage, because later in case of error - * svc_destroy() will be called. - */ - svc_get(nlmsvc_rqst->rq_server); - return nlmsvc_rqst->rq_server; + error = lockd_up_net(net); + goto out; } /* @@ -357,53 +317,59 @@ static struct svc_serv *lockd_create_svc(void) printk(KERN_WARNING "lockd_up: no pid, %d users??\n", nlmsvc_users); + error = -ENOMEM; serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, NULL); if (!serv) { printk(KERN_WARNING "lockd_up: create service failed\n"); - return ERR_PTR(-ENOMEM); + goto out; } - dprintk("lockd_up: service created\n"); - return serv; -} -/* - * Bring up the lockd process if it's not already up. - */ -int lockd_up(struct net *net) -{ - struct svc_serv *serv; - int error; - - mutex_lock(&nlmsvc_mutex); + error = make_socks(serv, net); + if (error < 0) + goto destroy_and_out; - serv = lockd_create_svc(); - if (IS_ERR(serv)) { - error = PTR_ERR(serv); - goto err_create; + /* + * Create the kernel thread and wait for it to start. + */ + nlmsvc_rqst = svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE); + if (IS_ERR(nlmsvc_rqst)) { + error = PTR_ERR(nlmsvc_rqst); + nlmsvc_rqst = NULL; + printk(KERN_WARNING + "lockd_up: svc_rqst allocation failed, error=%d\n", + error); + goto destroy_and_out; } - error = lockd_up_net(serv, net); - if (error < 0) - goto err_net; + svc_sock_update_bufs(serv); + serv->sv_maxconn = nlm_max_connections; - error = lockd_start_svc(serv); - if (error < 0) - goto err_start; + nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, serv->sv_name); + if (IS_ERR(nlmsvc_task)) { + error = PTR_ERR(nlmsvc_task); + svc_exit_thread(nlmsvc_rqst); + nlmsvc_task = NULL; + nlmsvc_rqst = NULL; + printk(KERN_WARNING + "lockd_up: kthread_run failed, error=%d\n", error); + goto destroy_and_out; + } - nlmsvc_users++; /* * Note: svc_serv structures have an initial use count of 1, * so we exit through here on both success and failure. */ -err_net: +destroy_and_out: svc_destroy(serv); -err_create: +out: + if (!error) { + struct lockd_net *ln = net_generic(net, lockd_net_id); + + ln->nlmsvc_users++; + nlmsvc_users++; + } mutex_unlock(&nlmsvc_mutex); return error; - -err_start: - lockd_down_net(serv, net); - goto err_net; } EXPORT_SYMBOL_GPL(lockd_up); @@ -414,10 +380,11 @@ void lockd_down(struct net *net) { mutex_lock(&nlmsvc_mutex); - lockd_down_net(nlmsvc_rqst->rq_server, net); if (nlmsvc_users) { - if (--nlmsvc_users) + if (--nlmsvc_users) { + lockd_down_net(net); goto out; + } } else { printk(KERN_ERR "lockd_down: no users! task=%p\n", nlmsvc_task); @@ -429,9 +396,7 @@ lockd_down(struct net *net) BUG(); } kthread_stop(nlmsvc_task); - dprintk("lockd_down: service stopped\n"); svc_exit_thread(nlmsvc_rqst); - dprintk("lockd_down: service destroyed\n"); nlmsvc_task = NULL; nlmsvc_rqst = NULL; out: diff --git a/trunk/fs/locks.c b/trunk/fs/locks.c index 814c51d0de47..4f441e46cef4 100644 --- a/trunk/fs/locks.c +++ b/trunk/fs/locks.c @@ -1636,13 +1636,12 @@ EXPORT_SYMBOL(flock_lock_file_wait); SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) { struct file *filp; - int fput_needed; struct file_lock *lock; int can_sleep, unlock; int error; error = -EBADF; - filp = fget_light(fd, &fput_needed); + filp = fget(fd); if (!filp) goto out; @@ -1675,7 +1674,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) locks_free_lock(lock); out_putf: - fput_light(filp, fput_needed); + fput(filp); out: return error; } diff --git a/trunk/fs/namei.c b/trunk/fs/namei.c index 7d694194024a..c651f02c9fec 100644 --- a/trunk/fs/namei.c +++ b/trunk/fs/namei.c @@ -449,7 +449,7 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) mntget(nd->path.mnt); rcu_read_unlock(); - br_read_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); nd->flags &= ~LOOKUP_RCU; return 0; @@ -507,14 +507,14 @@ static int complete_walk(struct nameidata *nd) if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) { spin_unlock(&dentry->d_lock); rcu_read_unlock(); - br_read_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return -ECHILD; } BUG_ON(nd->inode != dentry->d_inode); spin_unlock(&dentry->d_lock); mntget(nd->path.mnt); rcu_read_unlock(); - br_read_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); } if (likely(!(nd->flags & LOOKUP_JUMPED))) @@ -681,15 +681,15 @@ int follow_up(struct path *path) struct mount *parent; struct dentry *mountpoint; - br_read_lock(&vfsmount_lock); + br_read_lock(vfsmount_lock); parent = mnt->mnt_parent; if (&parent->mnt == path->mnt) { - br_read_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return 0; } mntget(&parent->mnt); mountpoint = dget(mnt->mnt_mountpoint); - br_read_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); dput(path->dentry); path->dentry = mountpoint; mntput(path->mnt); @@ -947,7 +947,7 @@ static int follow_dotdot_rcu(struct nameidata *nd) if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; rcu_read_unlock(); - br_read_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return -ECHILD; } @@ -1125,8 +1125,8 @@ static struct dentry *__lookup_hash(struct qstr *name, * small and for now I'd prefer to have fast path as straight as possible. * It _is_ time-critical. */ -static int lookup_fast(struct nameidata *nd, struct qstr *name, - struct path *path, struct inode **inode) +static int do_lookup(struct nameidata *nd, struct qstr *name, + struct path *path, struct inode **inode) { struct vfsmount *mnt = nd->path.mnt; struct dentry *dentry, *parent = nd->path.dentry; @@ -1208,7 +1208,7 @@ static int lookup_fast(struct nameidata *nd, struct qstr *name, goto need_lookup; } } - +done: path->mnt = mnt; path->dentry = dentry; err = follow_managed(path, nd->flags); @@ -1222,17 +1222,6 @@ static int lookup_fast(struct nameidata *nd, struct qstr *name, return 0; need_lookup: - return 1; -} - -/* Fast lookup failed, do it the slow way */ -static int lookup_slow(struct nameidata *nd, struct qstr *name, - struct path *path) -{ - struct dentry *dentry, *parent; - int err; - - parent = nd->path.dentry; BUG_ON(nd->inode != parent->d_inode); mutex_lock(&parent->d_inode->i_mutex); @@ -1240,16 +1229,7 @@ static int lookup_slow(struct nameidata *nd, struct qstr *name, mutex_unlock(&parent->d_inode->i_mutex); if (IS_ERR(dentry)) return PTR_ERR(dentry); - path->mnt = nd->path.mnt; - path->dentry = dentry; - err = follow_managed(path, nd->flags); - if (unlikely(err < 0)) { - path_put_conditional(path, nd); - return err; - } - if (err) - nd->flags |= LOOKUP_JUMPED; - return 0; + goto done; } static inline int may_lookup(struct nameidata *nd) @@ -1285,7 +1265,7 @@ static void terminate_walk(struct nameidata *nd) if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; rcu_read_unlock(); - br_read_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); } } @@ -1321,26 +1301,21 @@ static inline int walk_component(struct nameidata *nd, struct path *path, */ if (unlikely(type != LAST_NORM)) return handle_dots(nd, type); - err = lookup_fast(nd, name, path, &inode); + err = do_lookup(nd, name, path, &inode); if (unlikely(err)) { - if (err < 0) - goto out_err; - - err = lookup_slow(nd, name, path); - if (err < 0) - goto out_err; - - inode = path->dentry->d_inode; + terminate_walk(nd); + return err; + } + if (!inode) { + path_to_nameidata(path, nd); + terminate_walk(nd); + return -ENOENT; } - err = -ENOENT; - if (!inode) - goto out_path_put; - if (should_follow_link(inode, follow)) { if (nd->flags & LOOKUP_RCU) { if (unlikely(unlazy_walk(nd, path->dentry))) { - err = -ECHILD; - goto out_err; + terminate_walk(nd); + return -ECHILD; } } BUG_ON(inode != path->dentry->d_inode); @@ -1349,12 +1324,6 @@ static inline int walk_component(struct nameidata *nd, struct path *path, path_to_nameidata(path, nd); nd->inode = inode; return 0; - -out_path_put: - path_to_nameidata(path, nd); -out_err: - terminate_walk(nd); - return err; } /* @@ -1651,7 +1620,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, nd->path = nd->root; nd->inode = inode; if (flags & LOOKUP_RCU) { - br_read_lock(&vfsmount_lock); + br_read_lock(vfsmount_lock); rcu_read_lock(); nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); } else { @@ -1664,7 +1633,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, if (*name=='/') { if (flags & LOOKUP_RCU) { - br_read_lock(&vfsmount_lock); + br_read_lock(vfsmount_lock); rcu_read_lock(); set_root_rcu(nd); } else { @@ -1677,7 +1646,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct fs_struct *fs = current->fs; unsigned seq; - br_read_lock(&vfsmount_lock); + br_read_lock(vfsmount_lock); rcu_read_lock(); do { @@ -1713,7 +1682,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, if (fput_needed) *fp = file; nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); - br_read_lock(&vfsmount_lock); + br_read_lock(vfsmount_lock); rcu_read_lock(); } else { path_get(&file->f_path); @@ -2200,10 +2169,6 @@ static struct file *do_last(struct nameidata *nd, struct path *path, int want_write = 0; int acc_mode = op->acc_mode; struct file *filp; - struct inode *inode; - int symlink_ok = 0; - struct path save_parent = { .dentry = NULL, .mnt = NULL }; - bool retried = false; int error; nd->flags &= ~LOOKUP_PARENT; @@ -2235,23 +2200,30 @@ static struct file *do_last(struct nameidata *nd, struct path *path, } if (!(open_flag & O_CREAT)) { + int symlink_ok = 0; if (nd->last.name[nd->last.len]) nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW)) symlink_ok = 1; /* we _can_ be in RCU mode here */ - error = lookup_fast(nd, &nd->last, path, &inode); - if (unlikely(error)) { - if (error < 0) - goto exit; + error = walk_component(nd, path, &nd->last, LAST_NORM, + !symlink_ok); + if (error < 0) + return ERR_PTR(error); + if (error) /* symlink */ + return NULL; + /* sayonara */ + error = complete_walk(nd); + if (error) + return ERR_PTR(error); - error = lookup_slow(nd, &nd->last, path); - if (error < 0) + error = -ENOTDIR; + if (nd->flags & LOOKUP_DIRECTORY) { + if (!nd->inode->i_op->lookup) goto exit; - - inode = path->dentry->d_inode; } - goto finish_lookup; + audit_inode(pathname, nd->path.dentry); + goto ok; } /* create side of things */ @@ -2269,7 +2241,6 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (nd->last.name[nd->last.len]) goto exit; -retry_lookup: mutex_lock(&dir->d_inode->i_mutex); dentry = lookup_hash(nd); @@ -2331,49 +2302,22 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (error) nd->flags |= LOOKUP_JUMPED; - BUG_ON(nd->flags & LOOKUP_RCU); - inode = path->dentry->d_inode; -finish_lookup: - /* we _can_ be in RCU mode here */ error = -ENOENT; - if (!inode) { - path_to_nameidata(path, nd); - goto exit; - } + if (!path->dentry->d_inode) + goto exit_dput; - if (should_follow_link(inode, !symlink_ok)) { - if (nd->flags & LOOKUP_RCU) { - if (unlikely(unlazy_walk(nd, path->dentry))) { - error = -ECHILD; - goto exit; - } - } - BUG_ON(inode != path->dentry->d_inode); + if (path->dentry->d_inode->i_op->follow_link) return NULL; - } - if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) { - path_to_nameidata(path, nd); - } else { - save_parent.dentry = nd->path.dentry; - save_parent.mnt = mntget(path->mnt); - nd->path.dentry = path->dentry; - - } - nd->inode = inode; + path_to_nameidata(path, nd); + nd->inode = path->dentry->d_inode; /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ error = complete_walk(nd); - if (error) { - path_put(&save_parent); + if (error) return ERR_PTR(error); - } error = -EISDIR; - if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode)) - goto exit; - error = -ENOTDIR; - if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup) + if (S_ISDIR(nd->inode->i_mode)) goto exit; - audit_inode(pathname, nd->path.dentry); ok: if (!S_ISREG(nd->inode->i_mode)) will_truncate = 0; @@ -2389,20 +2333,6 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (error) goto exit; filp = nameidata_to_filp(nd); - if (filp == ERR_PTR(-EOPENSTALE) && save_parent.dentry && !retried) { - BUG_ON(save_parent.dentry != dir); - path_put(&nd->path); - nd->path = save_parent; - nd->inode = dir->d_inode; - save_parent.mnt = NULL; - save_parent.dentry = NULL; - if (want_write) { - mnt_drop_write(nd->path.mnt); - want_write = 0; - } - retried = true; - goto retry_lookup; - } if (!IS_ERR(filp)) { error = ima_file_check(filp, op->acc_mode); if (error) { @@ -2422,8 +2352,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, out: if (want_write) mnt_drop_write(nd->path.mnt); - path_put(&save_parent); - terminate_walk(nd); + path_put(&nd->path); return filp; exit_mutex_unlock: @@ -2486,12 +2415,6 @@ static struct file *path_openat(int dfd, const char *pathname, if (base) fput(base); release_open_intent(nd); - if (filp == ERR_PTR(-EOPENSTALE)) { - if (flags & LOOKUP_RCU) - filp = ERR_PTR(-ECHILD); - else - filp = ERR_PTR(-ESTALE); - } return filp; out_filp: diff --git a/trunk/fs/namespace.c b/trunk/fs/namespace.c index 1e4a5fe3d7b7..e6081996c9a2 100644 --- a/trunk/fs/namespace.c +++ b/trunk/fs/namespace.c @@ -397,7 +397,7 @@ static int mnt_make_readonly(struct mount *mnt) { int ret = 0; - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; /* * After storing MNT_WRITE_HOLD, we'll read the counters. This store @@ -431,15 +431,15 @@ static int mnt_make_readonly(struct mount *mnt) */ smp_wmb(); mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); return ret; } static void __mnt_unmake_readonly(struct mount *mnt) { - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); mnt->mnt.mnt_flags &= ~MNT_READONLY; - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } int sb_prepare_remount_readonly(struct super_block *sb) @@ -451,7 +451,7 @@ int sb_prepare_remount_readonly(struct super_block *sb) if (atomic_long_read(&sb->s_remove_count)) return -EBUSY; - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { if (!(mnt->mnt.mnt_flags & MNT_READONLY)) { mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; @@ -473,7 +473,7 @@ int sb_prepare_remount_readonly(struct super_block *sb) if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD) mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; } - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); return err; } @@ -522,14 +522,14 @@ struct vfsmount *lookup_mnt(struct path *path) { struct mount *child_mnt; - br_read_lock(&vfsmount_lock); + br_read_lock(vfsmount_lock); child_mnt = __lookup_mnt(path->mnt, path->dentry, 1); if (child_mnt) { mnt_add_count(child_mnt, 1); - br_read_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return &child_mnt->mnt; } else { - br_read_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return NULL; } } @@ -714,9 +714,9 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void mnt->mnt.mnt_sb = root->d_sb; mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts); - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); return &mnt->mnt; } EXPORT_SYMBOL_GPL(vfs_kern_mount); @@ -745,9 +745,9 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, mnt->mnt.mnt_root = dget(root); mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); list_add_tail(&mnt->mnt_instance, &sb->s_mounts); - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); if (flag & CL_SLAVE) { list_add(&mnt->mnt_slave, &old->mnt_slave_list); @@ -803,36 +803,35 @@ static void mntput_no_expire(struct mount *mnt) { put_again: #ifdef CONFIG_SMP - br_read_lock(&vfsmount_lock); + br_read_lock(vfsmount_lock); if (likely(atomic_read(&mnt->mnt_longterm))) { mnt_add_count(mnt, -1); - br_read_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return; } - br_read_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); mnt_add_count(mnt, -1); if (mnt_get_count(mnt)) { - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); return; } #else mnt_add_count(mnt, -1); if (likely(mnt_get_count(mnt))) return; - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); #endif if (unlikely(mnt->mnt_pinned)) { mnt_add_count(mnt, mnt->mnt_pinned + 1); mnt->mnt_pinned = 0; - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); acct_auto_close_mnt(&mnt->mnt); goto put_again; } - list_del(&mnt->mnt_instance); - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); mntfree(mnt); } @@ -858,21 +857,21 @@ EXPORT_SYMBOL(mntget); void mnt_pin(struct vfsmount *mnt) { - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); real_mount(mnt)->mnt_pinned++; - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } EXPORT_SYMBOL(mnt_pin); void mnt_unpin(struct vfsmount *m) { struct mount *mnt = real_mount(m); - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); if (mnt->mnt_pinned) { mnt_add_count(mnt, 1); mnt->mnt_pinned--; } - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } EXPORT_SYMBOL(mnt_unpin); @@ -989,12 +988,12 @@ int may_umount_tree(struct vfsmount *m) BUG_ON(!m); /* write lock needed for mnt_get_count */ - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); for (p = mnt; p; p = next_mnt(p, mnt)) { actual_refs += mnt_get_count(p); minimum_refs += 2; } - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); if (actual_refs > minimum_refs) return 0; @@ -1021,10 +1020,10 @@ int may_umount(struct vfsmount *mnt) { int ret = 1; down_read(&namespace_sem); - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); if (propagate_mount_busy(real_mount(mnt), 2)) ret = 0; - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); up_read(&namespace_sem); return ret; } @@ -1041,13 +1040,13 @@ void release_mounts(struct list_head *head) struct dentry *dentry; struct mount *m; - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); dentry = mnt->mnt_mountpoint; m = mnt->mnt_parent; mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; m->mnt_ghosts--; - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); dput(dentry); mntput(&m->mnt); } @@ -1074,9 +1073,8 @@ void umount_tree(struct mount *mnt, int propagate, struct list_head *kill) list_del_init(&p->mnt_expire); list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); - if (p->mnt_ns) - __mnt_make_shortterm(p); p->mnt_ns = NULL; + __mnt_make_shortterm(p); list_del_init(&p->mnt_child); if (mnt_has_parent(p)) { p->mnt_parent->mnt_ghosts++; @@ -1114,12 +1112,12 @@ static int do_umount(struct mount *mnt, int flags) * probably don't strictly need the lock here if we examined * all race cases, but it's a slowpath. */ - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); if (mnt_get_count(mnt) != 2) { - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); return -EBUSY; } - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); if (!xchg(&mnt->mnt_expiry_mark, 1)) return -EAGAIN; @@ -1161,7 +1159,7 @@ static int do_umount(struct mount *mnt, int flags) } down_write(&namespace_sem); - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); event++; if (!(flags & MNT_DETACH)) @@ -1173,7 +1171,7 @@ static int do_umount(struct mount *mnt, int flags) umount_tree(mnt, 1, &umount_list); retval = 0; } - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); return retval; @@ -1288,19 +1286,19 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, q = clone_mnt(p, p->mnt.mnt_root, flag); if (!q) goto Enomem; - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); list_add_tail(&q->mnt_list, &res->mnt_list); attach_mnt(q, &path); - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } } return res; Enomem: if (res) { LIST_HEAD(umount_list); - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); umount_tree(res, 0, &umount_list); - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); release_mounts(&umount_list); } return NULL; @@ -1320,9 +1318,9 @@ void drop_collected_mounts(struct vfsmount *mnt) { LIST_HEAD(umount_list); down_write(&namespace_sem); - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); umount_tree(real_mount(mnt), 0, &umount_list); - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); } @@ -1450,7 +1448,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, if (err) goto out_cleanup_ids; - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); if (IS_MNT_SHARED(dest_mnt)) { for (p = source_mnt; p; p = next_mnt(p, source_mnt)) @@ -1469,7 +1467,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, list_del_init(&child->mnt_hash); commit_tree(child); } - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); return 0; @@ -1567,10 +1565,10 @@ static int do_change_type(struct path *path, int flag) goto out_unlock; } - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) change_mnt_propagation(m, type); - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); out_unlock: up_write(&namespace_sem); @@ -1619,9 +1617,9 @@ static int do_loopback(struct path *path, char *old_name, err = graft_tree(mnt, path); if (err) { - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); umount_tree(mnt, 0, &umount_list); - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } out2: unlock_mount(path); @@ -1679,16 +1677,16 @@ static int do_remount(struct path *path, int flags, int mnt_flags, else err = do_remount_sb(sb, flags, data, 0); if (!err) { - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK; mnt->mnt.mnt_flags = mnt_flags; - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } up_write(&sb->s_umount); if (!err) { - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); touch_mnt_namespace(mnt->mnt_ns); - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } return err; } @@ -1895,9 +1893,9 @@ int finish_automount(struct vfsmount *m, struct path *path) /* remove m from any expiration list it may be on */ if (!list_empty(&mnt->mnt_expire)) { down_write(&namespace_sem); - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); list_del_init(&mnt->mnt_expire); - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); up_write(&namespace_sem); } mntput(m); @@ -1913,11 +1911,11 @@ int finish_automount(struct vfsmount *m, struct path *path) void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) { down_write(&namespace_sem); - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list); - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); up_write(&namespace_sem); } EXPORT_SYMBOL(mnt_set_expiry); @@ -1937,7 +1935,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) return; down_write(&namespace_sem); - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); /* extract from the expiration list every vfsmount that matches the * following criteria: @@ -1956,7 +1954,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) touch_mnt_namespace(mnt->mnt_ns); umount_tree(mnt, 1, &umounts); } - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); up_write(&namespace_sem); release_mounts(&umounts); @@ -2220,9 +2218,9 @@ void mnt_make_shortterm(struct vfsmount *m) struct mount *mnt = real_mount(m); if (atomic_add_unless(&mnt->mnt_longterm, -1, 1)) return; - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); atomic_dec(&mnt->mnt_longterm); - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); #endif } @@ -2252,9 +2250,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, return ERR_PTR(-ENOMEM); } new_ns->root = new; - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); list_add_tail(&new_ns->list, &new->mnt_list); - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); /* * Second pass: switch the tsk->fs->* elements and mark new vfsmounts @@ -2418,9 +2416,9 @@ bool is_path_reachable(struct mount *mnt, struct dentry *dentry, int path_is_under(struct path *path1, struct path *path2) { int res; - br_read_lock(&vfsmount_lock); + br_read_lock(vfsmount_lock); res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2); - br_read_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return res; } EXPORT_SYMBOL(path_is_under); @@ -2507,7 +2505,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, /* make sure we can reach put_old from new_root */ if (!is_path_reachable(real_mount(old.mnt), old.dentry, &new)) goto out4; - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); detach_mnt(new_mnt, &parent_path); detach_mnt(root_mnt, &root_parent); /* mount old root on put_old */ @@ -2515,7 +2513,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, /* mount new_root on / */ attach_mnt(new_mnt, &root_parent); touch_mnt_namespace(current->nsproxy->mnt_ns); - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); chroot_fs_refs(&root, &new); error = 0; out4: @@ -2578,7 +2576,7 @@ void __init mnt_init(void) for (u = 0; u < HASH_SIZE; u++) INIT_LIST_HEAD(&mount_hashtable[u]); - br_lock_init(&vfsmount_lock); + br_lock_init(vfsmount_lock); err = sysfs_init(); if (err) @@ -2598,9 +2596,9 @@ void put_mnt_ns(struct mnt_namespace *ns) if (!atomic_dec_and_test(&ns->count)) return; down_write(&namespace_sem); - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); umount_tree(ns->root, 0, &umount_list); - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); kfree(ns); diff --git a/trunk/fs/ncpfs/file.c b/trunk/fs/ncpfs/file.c index 122e260247f5..3ff5fcc1528f 100644 --- a/trunk/fs/ncpfs/file.c +++ b/trunk/fs/ncpfs/file.c @@ -221,10 +221,6 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * already_written = 0; - errno = file_update_time(file); - if (errno) - goto outrel; - bouncebuffer = vmalloc(bufsize); if (!bouncebuffer) { errno = -EIO; /* -ENOMEM */ @@ -256,6 +252,8 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * } vfree(bouncebuffer); + file_update_time(file); + *ppos = pos; if (pos > i_size_read(inode)) { diff --git a/trunk/fs/ncpfs/ncp_fs_sb.h b/trunk/fs/ncpfs/ncp_fs_sb.h index 54cc0cdb3dcb..4af803f13516 100644 --- a/trunk/fs/ncpfs/ncp_fs_sb.h +++ b/trunk/fs/ncpfs/ncp_fs_sb.h @@ -23,17 +23,17 @@ struct ncp_mount_data_kernel { unsigned long flags; /* NCP_MOUNT_* flags */ unsigned int int_flags; /* internal flags */ #define NCP_IMOUNT_LOGGEDIN_POSSIBLE 0x0001 - uid_t mounted_uid; /* Who may umount() this filesystem? */ + __kernel_uid32_t mounted_uid; /* Who may umount() this filesystem? */ struct pid *wdog_pid; /* Who cares for our watchdog packets? */ unsigned int ncp_fd; /* The socket to the ncp port */ unsigned int time_out; /* How long should I wait after sending a NCP request? */ unsigned int retry_count; /* And how often should I retry? */ unsigned char mounted_vol[NCP_VOLNAME_LEN + 1]; - uid_t uid; - gid_t gid; - umode_t file_mode; - umode_t dir_mode; + __kernel_uid32_t uid; + __kernel_gid32_t gid; + __kernel_mode_t file_mode; + __kernel_mode_t dir_mode; int info_fd; }; diff --git a/trunk/fs/nfs/callback.c b/trunk/fs/nfs/callback.c index 970659daa323..eb95f5091c1a 100644 --- a/trunk/fs/nfs/callback.c +++ b/trunk/fs/nfs/callback.c @@ -17,7 +17,6 @@ #include #include #include -#include #include @@ -254,7 +253,6 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) char svc_name[12]; int ret = 0; int minorversion_setup; - struct net *net = current->nsproxy->net_ns; mutex_lock(&nfs_callback_mutex); if (cb_info->users++ || cb_info->task != NULL) { @@ -267,12 +265,6 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) goto out_err; } - ret = svc_bind(serv, net); - if (ret < 0) { - printk(KERN_WARNING "NFS: bind callback service failed\n"); - goto out_err; - } - minorversion_setup = nfs_minorversion_callback_svc_setup(minorversion, serv, xprt, &rqstp, &callback_svc); if (!minorversion_setup) { @@ -314,8 +306,6 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) dprintk("NFS: Couldn't create callback socket or server thread; " "err = %d\n", ret); cb_info->users--; - if (serv) - svc_shutdown_net(serv, net); goto out; } @@ -330,7 +320,6 @@ void nfs_callback_down(int minorversion) cb_info->users--; if (cb_info->users == 0 && cb_info->task != NULL) { kthread_stop(cb_info->task); - svc_shutdown_net(cb_info->serv, current->nsproxy->net_ns); svc_exit_thread(cb_info->rqst); cb_info->serv = NULL; cb_info->rqst = NULL; @@ -343,7 +332,7 @@ void nfs_callback_down(int minorversion) int check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) { - char *p = rqstp->rq_cred.cr_principal; + char *p = svc_gss_principal(rqstp); if (rqstp->rq_authop->flavour != RPC_AUTH_GSS) return 1; diff --git a/trunk/fs/nfs/dir.c b/trunk/fs/nfs/dir.c index f430057ff3b3..0989a2099688 100644 --- a/trunk/fs/nfs/dir.c +++ b/trunk/fs/nfs/dir.c @@ -1354,10 +1354,10 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru } #ifdef CONFIG_NFS_V4 -static int nfs4_lookup_revalidate(struct dentry *, struct nameidata *); +static int nfs_open_revalidate(struct dentry *, struct nameidata *); const struct dentry_operations nfs4_dentry_operations = { - .d_revalidate = nfs4_lookup_revalidate, + .d_revalidate = nfs_open_revalidate, .d_delete = nfs_dentry_delete, .d_iput = nfs_dentry_iput, .d_automount = nfs_d_automount, @@ -1519,11 +1519,13 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry return nfs_lookup(dir, dentry, nd); } -static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) +static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) { struct dentry *parent = NULL; struct inode *inode; struct inode *dir; + struct nfs_open_context *ctx; + struct iattr attr; int openflags, ret = 0; if (nd->flags & LOOKUP_RCU) @@ -1552,13 +1554,57 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) /* We cannot do exclusive creation on a positive dentry */ if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) goto no_open_dput; + /* We can't create new files here */ + openflags &= ~(O_CREAT|O_EXCL); + + ctx = create_nfs_open_context(dentry, openflags); + ret = PTR_ERR(ctx); + if (IS_ERR(ctx)) + goto out; - /* Let f_op->open() actually open (and revalidate) the file */ - ret = 1; + attr.ia_valid = ATTR_OPEN; + if (openflags & O_TRUNC) { + attr.ia_valid |= ATTR_SIZE; + attr.ia_size = 0; + nfs_wb_all(inode); + } + + /* + * Note: we're not holding inode->i_mutex and so may be racing with + * operations that change the directory. We therefore save the + * change attribute *before* we do the RPC call. + */ + inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + switch (ret) { + case -EPERM: + case -EACCES: + case -EDQUOT: + case -ENOSPC: + case -EROFS: + goto out_put_ctx; + default: + goto out_drop; + } + } + iput(inode); + if (inode != dentry->d_inode) + goto out_drop; + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + ret = nfs_intent_set_file(nd, ctx); + if (ret >= 0) + ret = 1; out: dput(parent); return ret; +out_drop: + d_drop(dentry); + ret = 0; +out_put_ctx: + put_nfs_open_context(ctx); + goto out; no_open_dput: dput(parent); diff --git a/trunk/fs/nfs/file.c b/trunk/fs/nfs/file.c index a6708e6b438d..56311ca5f9f8 100644 --- a/trunk/fs/nfs/file.c +++ b/trunk/fs/nfs/file.c @@ -879,81 +879,12 @@ const struct file_operations nfs_file_operations = { static int nfs4_file_open(struct inode *inode, struct file *filp) { - struct nfs_open_context *ctx; - struct dentry *dentry = filp->f_path.dentry; - struct dentry *parent = NULL; - struct inode *dir; - unsigned openflags = filp->f_flags; - struct iattr attr; - int err; - - BUG_ON(inode != dentry->d_inode); /* - * If no cached dentry exists or if it's negative, NFSv4 handled the - * opens in ->lookup() or ->create(). - * - * We only get this far for a cached positive dentry. We skipped - * revalidation, so handle it here by dropping the dentry and returning - * -EOPENSTALE. The VFS will retry the lookup/create/open. + * NFSv4 opens are handled in d_lookup and d_revalidate. If we get to + * this point, then something is very wrong */ - - dprintk("NFS: open file(%s/%s)\n", - dentry->d_parent->d_name.name, - dentry->d_name.name); - - if ((openflags & O_ACCMODE) == 3) - openflags--; - - /* We can't create new files here */ - openflags &= ~(O_CREAT|O_EXCL); - - parent = dget_parent(dentry); - dir = parent->d_inode; - - ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode); - err = PTR_ERR(ctx); - if (IS_ERR(ctx)) - goto out; - - attr.ia_valid = ATTR_OPEN; - if (openflags & O_TRUNC) { - attr.ia_valid |= ATTR_SIZE; - attr.ia_size = 0; - nfs_wb_all(inode); - } - - inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr); - if (IS_ERR(inode)) { - err = PTR_ERR(inode); - switch (err) { - case -EPERM: - case -EACCES: - case -EDQUOT: - case -ENOSPC: - case -EROFS: - goto out_put_ctx; - default: - goto out_drop; - } - } - iput(inode); - if (inode != dentry->d_inode) - goto out_drop; - - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - nfs_file_set_open_context(filp, ctx); - err = 0; - -out_put_ctx: - put_nfs_open_context(ctx); -out: - dput(parent); - return err; - -out_drop: - d_drop(dentry); - err = -EOPENSTALE; - goto out_put_ctx; + dprintk("NFS: %s called! inode=%p filp=%p\n", __func__, inode, filp); + return -ENOTDIR; } const struct file_operations nfs4_file_operations = { diff --git a/trunk/fs/nfsd/auth.c b/trunk/fs/nfsd/auth.c index 34a10d78b839..204438cc914e 100644 --- a/trunk/fs/nfsd/auth.c +++ b/trunk/fs/nfsd/auth.c @@ -11,7 +11,7 @@ int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; for (f = exp->ex_flavors; f < end; f++) { - if (f->pseudoflavor == rqstp->rq_cred.cr_flavor) + if (f->pseudoflavor == rqstp->rq_flavor) return f->flags; } return exp->ex_flags; diff --git a/trunk/fs/nfsd/export.c b/trunk/fs/nfsd/export.c index ba233499b9a5..dcb52b884519 100644 --- a/trunk/fs/nfsd/export.c +++ b/trunk/fs/nfsd/export.c @@ -706,7 +706,7 @@ static struct cache_head *svc_export_alloc(void) return NULL; } -static struct cache_detail svc_export_cache_template = { +struct cache_detail svc_export_cache_template = { .owner = THIS_MODULE, .hash_size = EXPORT_HASHMAX, .name = "nfsd.export", @@ -904,13 +904,13 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp) return 0; /* ip-address based client; check sec= export option: */ for (f = exp->ex_flavors; f < end; f++) { - if (f->pseudoflavor == rqstp->rq_cred.cr_flavor) + if (f->pseudoflavor == rqstp->rq_flavor) return 0; } /* defaults in absence of sec= options: */ if (exp->ex_nflavors == 0) { - if (rqstp->rq_cred.cr_flavor == RPC_AUTH_NULL || - rqstp->rq_cred.cr_flavor == RPC_AUTH_UNIX) + if (rqstp->rq_flavor == RPC_AUTH_NULL || + rqstp->rq_flavor == RPC_AUTH_UNIX) return 0; } return nfserr_wrongsec; diff --git a/trunk/fs/nfsd/fault_inject.c b/trunk/fs/nfsd/fault_inject.c index e6c38159622f..9559ce468732 100644 --- a/trunk/fs/nfsd/fault_inject.c +++ b/trunk/fs/nfsd/fault_inject.c @@ -58,7 +58,6 @@ static int nfsd_inject_set(void *op_ptr, u64 val) static int nfsd_inject_get(void *data, u64 *val) { - *val = 0; return 0; } diff --git a/trunk/fs/nfsd/nfs4callback.c b/trunk/fs/nfsd/nfs4callback.c index a5fd6b982f27..c8e9f637153a 100644 --- a/trunk/fs/nfsd/nfs4callback.c +++ b/trunk/fs/nfsd/nfs4callback.c @@ -650,10 +650,9 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c struct rpc_clnt *client; if (clp->cl_minorversion == 0) { - if (!clp->cl_cred.cr_principal && - (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) + if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) return -EINVAL; - args.client_name = clp->cl_cred.cr_principal; + args.client_name = clp->cl_principal; args.prognumber = conn->cb_prog, args.protocol = XPRT_TRANSPORT_TCP; args.authflavor = clp->cl_flavor; diff --git a/trunk/fs/nfsd/nfs4idmap.c b/trunk/fs/nfsd/nfs4idmap.c index dae36f1dee95..286a7f8f2024 100644 --- a/trunk/fs/nfsd/nfs4idmap.c +++ b/trunk/fs/nfsd/nfs4idmap.c @@ -605,7 +605,7 @@ numeric_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namel static __be32 do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, uid_t *id) { - if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS) + if (nfs4_disable_idmapping && rqstp->rq_flavor < RPC_AUTH_GSS) if (numeric_name_to_id(rqstp, type, name, namelen, id)) return 0; /* @@ -618,7 +618,7 @@ do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, u static int do_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name) { - if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS) + if (nfs4_disable_idmapping && rqstp->rq_flavor < RPC_AUTH_GSS) return sprintf(name, "%u", id); return idmap_id_to_name(rqstp, type, id, name); } diff --git a/trunk/fs/nfsd/nfs4recover.c b/trunk/fs/nfsd/nfs4recover.c index 5ff0b7b9fc08..ed3f9206a0ee 100644 --- a/trunk/fs/nfsd/nfs4recover.c +++ b/trunk/fs/nfsd/nfs4recover.c @@ -570,7 +570,7 @@ static ssize_t cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { struct cld_upcall *tmp, *cup; - struct cld_msg __user *cmsg = (struct cld_msg __user *)src; + struct cld_msg *cmsg = (struct cld_msg *)src; uint32_t xid; struct nfsd_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info, nfsd_net_id); @@ -1029,7 +1029,7 @@ rpc_pipefs_event(struct notifier_block *nb, unsigned long event, void *ptr) return ret; } -static struct notifier_block nfsd4_cld_block = { +struct notifier_block nfsd4_cld_block = { .notifier_call = rpc_pipefs_event, }; diff --git a/trunk/fs/nfsd/nfs4state.c b/trunk/fs/nfsd/nfs4state.c index 8fdc9ec5c5d3..03f82c0bc35d 100644 --- a/trunk/fs/nfsd/nfs4state.c +++ b/trunk/fs/nfsd/nfs4state.c @@ -42,7 +42,6 @@ #include #include "xdr4.h" #include "vfs.h" -#include "current_stateid.h" #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -448,69 +447,37 @@ static struct list_head close_lru; * * which we should reject. */ -static unsigned int -bmap_to_share_mode(unsigned long bmap) { +static void +set_access(unsigned int *access, unsigned long bmap) { int i; - unsigned int access = 0; + *access = 0; for (i = 1; i < 4; i++) { if (test_bit(i, &bmap)) - access |= i; + *access |= i; } - return access; -} - -static bool -test_share(struct nfs4_ol_stateid *stp, struct nfsd4_open *open) { - unsigned int access, deny; - - access = bmap_to_share_mode(stp->st_access_bmap); - deny = bmap_to_share_mode(stp->st_deny_bmap); - if ((access & open->op_share_deny) || (deny & open->op_share_access)) - return false; - return true; -} - -/* set share access for a given stateid */ -static inline void -set_access(u32 access, struct nfs4_ol_stateid *stp) -{ - __set_bit(access, &stp->st_access_bmap); } -/* clear share access for a given stateid */ -static inline void -clear_access(u32 access, struct nfs4_ol_stateid *stp) -{ - __clear_bit(access, &stp->st_access_bmap); -} - -/* test whether a given stateid has access */ -static inline bool -test_access(u32 access, struct nfs4_ol_stateid *stp) -{ - return test_bit(access, &stp->st_access_bmap); -} +static void +set_deny(unsigned int *deny, unsigned long bmap) { + int i; -/* set share deny for a given stateid */ -static inline void -set_deny(u32 access, struct nfs4_ol_stateid *stp) -{ - __set_bit(access, &stp->st_deny_bmap); + *deny = 0; + for (i = 0; i < 4; i++) { + if (test_bit(i, &bmap)) + *deny |= i ; + } } -/* clear share deny for a given stateid */ -static inline void -clear_deny(u32 access, struct nfs4_ol_stateid *stp) -{ - __clear_bit(access, &stp->st_deny_bmap); -} +static int +test_share(struct nfs4_ol_stateid *stp, struct nfsd4_open *open) { + unsigned int access, deny; -/* test whether a given stateid is denying specific access */ -static inline bool -test_deny(u32 access, struct nfs4_ol_stateid *stp) -{ - return test_bit(access, &stp->st_deny_bmap); + set_access(&access, stp->st_access_bmap); + set_deny(&deny, stp->st_deny_bmap); + if ((access & open->op_share_deny) || (deny & open->op_share_access)) + return 0; + return 1; } static int nfs4_access_to_omode(u32 access) @@ -526,20 +493,6 @@ static int nfs4_access_to_omode(u32 access) BUG(); } -/* release all access and file references for a given stateid */ -static void -release_all_access(struct nfs4_ol_stateid *stp) -{ - int i; - - for (i = 1; i < 4; i++) { - if (test_access(i, stp)) - nfs4_file_put_access(stp->st_file, - nfs4_access_to_omode(i)); - clear_access(i, stp); - } -} - static void unhash_generic_stateid(struct nfs4_ol_stateid *stp) { list_del(&stp->st_perfile); @@ -548,7 +501,16 @@ static void unhash_generic_stateid(struct nfs4_ol_stateid *stp) static void close_generic_stateid(struct nfs4_ol_stateid *stp) { - release_all_access(stp); + int i; + + if (stp->st_access_bmap) { + for (i = 1; i < 4; i++) { + if (test_bit(i, &stp->st_access_bmap)) + nfs4_file_put_access(stp->st_file, + nfs4_access_to_omode(i)); + __clear_bit(i, &stp->st_access_bmap); + } + } put_nfs4_file(stp->st_file); stp->st_file = NULL; } @@ -923,7 +885,7 @@ static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct n struct nfsd4_session *new; struct nfsd4_channel_attrs *fchan = &cses->fore_channel; int numslots, slotsize; - __be32 status; + int status; int idx; /* @@ -1022,8 +984,7 @@ static inline void renew_client_locked(struct nfs4_client *clp) { if (is_client_expired(clp)) { - WARN_ON(1); - printk("%s: client (clientid %08x/%08x) already expired\n", + dprintk("%s: client (clientid %08x/%08x) already expired\n", __func__, clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); @@ -1088,7 +1049,9 @@ free_client(struct nfs4_client *clp) list_del(&ses->se_perclnt); nfsd4_put_session_locked(ses); } - free_svc_cred(&clp->cl_cred); + if (clp->cl_cred.cr_group_info) + put_group_info(clp->cl_cred.cr_group_info); + kfree(clp->cl_principal); kfree(clp->cl_name.data); kfree(clp); } @@ -1169,21 +1132,12 @@ static void copy_clid(struct nfs4_client *target, struct nfs4_client *source) target->cl_clientid.cl_id = source->cl_clientid.cl_id; } -static int copy_cred(struct svc_cred *target, struct svc_cred *source) +static void copy_cred(struct svc_cred *target, struct svc_cred *source) { - if (source->cr_principal) { - target->cr_principal = - kstrdup(source->cr_principal, GFP_KERNEL); - if (target->cr_principal == NULL) - return -ENOMEM; - } else - target->cr_principal = NULL; - target->cr_flavor = source->cr_flavor; target->cr_uid = source->cr_uid; target->cr_gid = source->cr_gid; target->cr_group_info = source->cr_group_info; get_group_info(target->cr_group_info); - return 0; } static int same_name(const char *n1, const char *n2) @@ -1203,31 +1157,11 @@ same_clid(clientid_t *cl1, clientid_t *cl2) return (cl1->cl_boot == cl2->cl_boot) && (cl1->cl_id == cl2->cl_id); } -static bool groups_equal(struct group_info *g1, struct group_info *g2) -{ - int i; - - if (g1->ngroups != g2->ngroups) - return false; - for (i=0; ingroups; i++) - if (GROUP_AT(g1, i) != GROUP_AT(g2, i)) - return false; - return true; -} - +/* XXX what about NGROUP */ static int same_creds(struct svc_cred *cr1, struct svc_cred *cr2) { - if ((cr1->cr_flavor != cr2->cr_flavor) - || (cr1->cr_uid != cr2->cr_uid) - || (cr1->cr_gid != cr2->cr_gid) - || !groups_equal(cr1->cr_group_info, cr2->cr_group_info)) - return false; - if (cr1->cr_principal == cr2->cr_principal) - return true; - if (!cr1->cr_principal || !cr2->cr_principal) - return false; - return 0 == strcmp(cr1->cr_principal, cr1->cr_principal); + return cr1->cr_uid == cr2->cr_uid; } static void gen_clid(struct nfs4_client *clp) @@ -1270,20 +1204,25 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, { struct nfs4_client *clp; struct sockaddr *sa = svc_addr(rqstp); - int ret; + char *princ; clp = alloc_client(name); if (clp == NULL) return NULL; INIT_LIST_HEAD(&clp->cl_sessions); - ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred); - if (ret) { - spin_lock(&client_lock); - free_client(clp); - spin_unlock(&client_lock); - return NULL; + + princ = svc_gss_principal(rqstp); + if (princ) { + clp->cl_principal = kstrdup(princ, GFP_KERNEL); + if (clp->cl_principal == NULL) { + spin_lock(&client_lock); + free_client(clp); + spin_unlock(&client_lock); + return NULL; + } } + idr_init(&clp->cl_stateids); memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); atomic_set(&clp->cl_refcount, 0); @@ -1301,6 +1240,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); copy_verf(clp, verf); rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa); + clp->cl_flavor = rqstp->rq_flavor; + copy_cred(&clp->cl_cred, &rqstp->rq_cred); gen_confirm(clp); clp->cl_cb_session = NULL; return clp; @@ -1529,32 +1470,18 @@ nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid) clid->flags = new->cl_exchange_flags; } -static bool client_has_state(struct nfs4_client *clp) -{ - /* - * Note clp->cl_openowners check isn't quite right: there's no - * need to count owners without stateid's. - * - * Also note we should probably be using this in 4.0 case too. - */ - return !list_empty(&clp->cl_openowners) - || !list_empty(&clp->cl_delegations) - || !list_empty(&clp->cl_sessions); -} - __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_exchange_id *exid) { struct nfs4_client *unconf, *conf, *new; - __be32 status; + int status; unsigned int strhashval; char dname[HEXDIR_LEN]; char addr_str[INET6_ADDRSTRLEN]; nfs4_verifier verf = exid->verifier; struct sockaddr *sa = svc_addr(rqstp); - bool update = exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A; rpc_ntop(sa, addr_str, sizeof(addr_str)); dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p " @@ -1580,63 +1507,71 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, status = nfs4_make_rec_clidname(dname, &exid->clname); if (status) - return status; + goto error; strhashval = clientstr_hashval(dname); - /* Cases below refer to rfc 5661 section 18.35.4: */ nfs4_lock_state(); + status = nfs_ok; + conf = find_confirmed_client_by_str(dname, strhashval); if (conf) { - bool creds_match = same_creds(&conf->cl_cred, &rqstp->rq_cred); - bool verfs_match = same_verf(&verf, &conf->cl_verifier); - - if (update) { - if (!clp_used_exchangeid(conf)) { /* buggy client */ - status = nfserr_inval; - goto out; - } - if (!creds_match) { /* case 9 */ - status = nfserr_perm; - goto out; - } - if (!verfs_match) { /* case 8 */ + if (!clp_used_exchangeid(conf)) { + status = nfserr_clid_inuse; /* XXX: ? */ + goto out; + } + if (!same_verf(&verf, &conf->cl_verifier)) { + /* 18.35.4 case 8 */ + if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { status = nfserr_not_same; goto out; } - /* case 6 */ - exid->flags |= EXCHGID4_FLAG_CONFIRMED_R; - new = conf; - goto out_copy; + /* Client reboot: destroy old state */ + expire_client(conf); + goto out_new; } - if (!creds_match) { /* case 3 */ - if (client_has_state(conf)) { - status = nfserr_clid_inuse; + if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { + /* 18.35.4 case 9 */ + if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { + status = nfserr_perm; goto out; } expire_client(conf); goto out_new; } - if (verfs_match) { /* case 2 */ - conf->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R; - new = conf; - goto out_copy; - } - /* case 5, client reboot */ - goto out_new; + /* + * Set bit when the owner id and verifier map to an already + * confirmed client id (18.35.3). + */ + exid->flags |= EXCHGID4_FLAG_CONFIRMED_R; + + /* + * Falling into 18.35.4 case 2, possible router replay. + * Leave confirmed record intact and return same result. + */ + copy_verf(conf, &verf); + new = conf; + goto out_copy; } - if (update) { /* case 7 */ + /* 18.35.4 case 7 */ + if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { status = nfserr_noent; goto out; } unconf = find_unconfirmed_client_by_str(dname, strhashval); - if (unconf) /* case 4, possible retry or client restart */ + if (unconf) { + /* + * Possible retry or client restart. Per 18.35.4 case 4, + * a new unconfirmed record should be generated regardless + * of whether any properties have changed. + */ expire_client(unconf); + } - /* case 1 (normal case) */ out_new: + /* Normal case */ new = create_client(exid->clname, dname, rqstp, &verf); if (new == NULL) { status = nfserr_jukebox; @@ -1649,7 +1584,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, exid->clientid.cl_boot = new->cl_clientid.cl_boot; exid->clientid.cl_id = new->cl_clientid.cl_id; - exid->seqid = new->cl_cs_slot.sl_seqid + 1; + exid->seqid = 1; nfsd4_set_ex_flags(new, exid); dprintk("nfsd4_exchange_id seqid %d flags %x\n", @@ -1658,10 +1593,12 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, out: nfs4_unlock_state(); +error: + dprintk("nfsd4_exchange_id returns %d\n", ntohl(status)); return status; } -static __be32 +static int check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse) { dprintk("%s enter. seqid %d slot_seqid %d\n", __func__, seqid, @@ -1689,7 +1626,7 @@ check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse) */ static void nfsd4_cache_create_session(struct nfsd4_create_session *cr_ses, - struct nfsd4_clid_slot *slot, __be32 nfserr) + struct nfsd4_clid_slot *slot, int nfserr) { slot->sl_status = nfserr; memcpy(&slot->sl_cr_ses, cr_ses, sizeof(*cr_ses)); @@ -1720,7 +1657,7 @@ nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses, /* seqid, slotID, slotID, slotID, status */ \ 5 ) * sizeof(__be32)) -static bool check_forechannel_attrs(struct nfsd4_channel_attrs fchannel) +static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs fchannel) { return fchannel.maxreq_sz < NFSD_MIN_REQ_HDR_SEQ_SZ || fchannel.maxresp_sz < NFSD_MIN_RESP_HDR_SEQ_SZ; @@ -1736,7 +1673,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, struct nfsd4_session *new; struct nfsd4_clid_slot *cs_slot = NULL; bool confirm_me = false; - __be32 status = 0; + int status = 0; if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) return nfserr_inval; @@ -1749,10 +1686,16 @@ nfsd4_create_session(struct svc_rqst *rqstp, cs_slot = &conf->cl_cs_slot; status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); if (status == nfserr_replay_cache) { + dprintk("Got a create_session replay! seqid= %d\n", + cs_slot->sl_seqid); + /* Return the cached reply status */ status = nfsd4_replay_create_session(cr_ses, cs_slot); goto out; } else if (cr_ses->seqid != cs_slot->sl_seqid + 1) { status = nfserr_seq_misordered; + dprintk("Sequence misordered!\n"); + dprintk("Expected seqid= %d but got seqid= %d\n", + cs_slot->sl_seqid, cr_ses->seqid); goto out; } } else if (unconf) { @@ -1761,6 +1704,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, status = nfserr_clid_inuse; goto out; } + cs_slot = &unconf->cl_cs_slot; status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); if (status) { @@ -1768,6 +1712,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, status = nfserr_seq_misordered; goto out; } + confirm_me = true; conf = unconf; } else { @@ -1804,14 +1749,8 @@ nfsd4_create_session(struct svc_rqst *rqstp, /* cache solo and embedded create sessions under the state lock */ nfsd4_cache_create_session(cr_ses, cs_slot, status); - if (confirm_me) { - unsigned int hash = clientstr_hashval(unconf->cl_recdir); - struct nfs4_client *old = - find_confirmed_client_by_str(conf->cl_recdir, hash); - if (old) - expire_client(old); + if (confirm_me) move_to_confirmed(conf); - } out: nfs4_unlock_state(); dprintk("%s returns %d\n", __func__, ntohl(status)); @@ -1879,7 +1818,7 @@ nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_destroy_session *sessionid) { struct nfsd4_session *ses; - __be32 status = nfserr_badsession; + u32 status = nfserr_badsession; /* Notes: * - The confirmed nfs4_client->cl_sessionid holds destroyed sessinid @@ -1975,7 +1914,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_session *session; struct nfsd4_slot *slot; struct nfsd4_conn *conn; - __be32 status; + int status; if (resp->opcnt != 1) return nfserr_sequence_pos; @@ -2069,11 +2008,18 @@ nfsd4_sequence(struct svc_rqst *rqstp, return status; } +static inline bool has_resources(struct nfs4_client *clp) +{ + return !list_empty(&clp->cl_openowners) + || !list_empty(&clp->cl_delegations) + || !list_empty(&clp->cl_sessions); +} + __be32 nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_destroy_clientid *dc) { struct nfs4_client *conf, *unconf, *clp; - __be32 status = 0; + int status = 0; nfs4_lock_state(); unconf = find_unconfirmed_client(&dc->clientid); @@ -2082,7 +2028,7 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta if (conf) { clp = conf; - if (!is_client_expired(conf) && client_has_state(conf)) { + if (!is_client_expired(conf) && has_resources(conf)) { status = nfserr_clientid_busy; goto out; } @@ -2109,7 +2055,7 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta __be32 nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc) { - __be32 status = 0; + int status = 0; if (rc->rca_one_fs) { if (!cstate->current_fh.fh_dentry) @@ -2160,13 +2106,17 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) return status; + /* + * XXX The Duplicate Request Cache (DRC) has been checked (??) + * We get here on a DRC miss. + */ + strhashval = clientstr_hashval(dname); - /* Cases below refer to rfc 3530 section 14.2.33: */ nfs4_lock_state(); conf = find_confirmed_client_by_str(dname, strhashval); if (conf) { - /* case 0: */ + /* RFC 3530 14.2.33 CASE 0: */ status = nfserr_clid_inuse; if (clp_used_exchangeid(conf)) goto out; @@ -2179,18 +2129,63 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } } + /* + * section 14.2.33 of RFC 3530 (under the heading "IMPLEMENTATION") + * has a description of SETCLIENTID request processing consisting + * of 5 bullet points, labeled as CASE0 - CASE4 below. + */ unconf = find_unconfirmed_client_by_str(dname, strhashval); - if (unconf) - expire_client(unconf); status = nfserr_jukebox; - new = create_client(clname, dname, rqstp, &clverifier); - if (new == NULL) - goto out; - if (conf && same_verf(&conf->cl_verifier, &clverifier)) - /* case 1: probable callback update */ + if (!conf) { + /* + * RFC 3530 14.2.33 CASE 4: + * placed first, because it is the normal case + */ + if (unconf) + expire_client(unconf); + new = create_client(clname, dname, rqstp, &clverifier); + if (new == NULL) + goto out; + gen_clid(new); + } else if (same_verf(&conf->cl_verifier, &clverifier)) { + /* + * RFC 3530 14.2.33 CASE 1: + * probable callback update + */ + if (unconf) { + /* Note this is removing unconfirmed {*x***}, + * which is stronger than RFC recommended {vxc**}. + * This has the advantage that there is at most + * one {*x***} in either list at any time. + */ + expire_client(unconf); + } + new = create_client(clname, dname, rqstp, &clverifier); + if (new == NULL) + goto out; copy_clid(new, conf); - else /* case 4 (new client) or cases 2, 3 (client reboot): */ + } else if (!unconf) { + /* + * RFC 3530 14.2.33 CASE 2: + * probable client reboot; state will be removed if + * confirmed. + */ + new = create_client(clname, dname, rqstp, &clverifier); + if (new == NULL) + goto out; + gen_clid(new); + } else { + /* + * RFC 3530 14.2.33 CASE 3: + * probable client reboot; state will be removed if + * confirmed. + */ + expire_client(unconf); + new = create_client(clname, dname, rqstp, &clverifier); + if (new == NULL) + goto out; gen_clid(new); + } /* * XXX: we should probably set this at creation time, and check * for consistent minorversion use throughout: @@ -2208,11 +2203,17 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } +/* + * Section 14.2.34 of RFC 3530 (under the heading "IMPLEMENTATION") has + * a description of SETCLIENTID_CONFIRM request processing consisting of 4 + * bullets, labeled as CASE1 - CASE4 below. + */ __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_setclientid_confirm *setclientid_confirm) { + struct sockaddr *sa = svc_addr(rqstp); struct nfs4_client *conf, *unconf; nfs4_verifier confirm = setclientid_confirm->sc_confirm; clientid_t * clid = &setclientid_confirm->sc_clientid; @@ -2220,44 +2221,84 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, if (STALE_CLIENTID(clid)) return nfserr_stale_clientid; + /* + * XXX The Duplicate Request Cache (DRC) has been checked (??) + * We get here on a DRC miss. + */ + nfs4_lock_state(); conf = find_confirmed_client(clid); unconf = find_unconfirmed_client(clid); - /* - * We try hard to give out unique clientid's, so if we get an - * attempt to confirm the same clientid with a different cred, - * there's a bug somewhere. Let's charitably assume it's our - * bug. - */ - status = nfserr_serverfault; - if (unconf && !same_creds(&unconf->cl_cred, &rqstp->rq_cred)) + + status = nfserr_clid_inuse; + if (conf && !rpc_cmp_addr((struct sockaddr *) &conf->cl_addr, sa)) goto out; - if (conf && !same_creds(&conf->cl_cred, &rqstp->rq_cred)) + if (unconf && !rpc_cmp_addr((struct sockaddr *) &unconf->cl_addr, sa)) goto out; - /* cases below refer to rfc 3530 section 14.2.34: */ - if (!unconf || !same_verf(&confirm, &unconf->cl_confirm)) { - if (conf && !unconf) /* case 2: probable retransmit */ + + /* + * section 14.2.34 of RFC 3530 has a description of + * SETCLIENTID_CONFIRM request processing consisting + * of 4 bullet points, labeled as CASE1 - CASE4 below. + */ + if (conf && unconf && same_verf(&confirm, &unconf->cl_confirm)) { + /* + * RFC 3530 14.2.34 CASE 1: + * callback update + */ + if (!same_creds(&conf->cl_cred, &unconf->cl_cred)) + status = nfserr_clid_inuse; + else { + nfsd4_change_callback(conf, &unconf->cl_cb_conn); + nfsd4_probe_callback(conf); + expire_client(unconf); status = nfs_ok; - else /* case 4: client hasn't noticed we rebooted yet? */ - status = nfserr_stale_clientid; - goto out; - } - status = nfs_ok; - if (conf) { /* case 1: callback update */ - nfsd4_change_callback(conf, &unconf->cl_cb_conn); - nfsd4_probe_callback(conf); - expire_client(unconf); - } else { /* case 3: normal case; new or rebooted client */ - unsigned int hash = clientstr_hashval(unconf->cl_recdir); - conf = find_confirmed_client_by_str(unconf->cl_recdir, hash); - if (conf) { - nfsd4_client_record_remove(conf); - expire_client(conf); } - move_to_confirmed(unconf); - nfsd4_probe_callback(unconf); + } else if (conf && !unconf) { + /* + * RFC 3530 14.2.34 CASE 2: + * probable retransmitted request; play it safe and + * do nothing. + */ + if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) + status = nfserr_clid_inuse; + else + status = nfs_ok; + } else if (!conf && unconf + && same_verf(&unconf->cl_confirm, &confirm)) { + /* + * RFC 3530 14.2.34 CASE 3: + * Normal case; new or rebooted client: + */ + if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred)) { + status = nfserr_clid_inuse; + } else { + unsigned int hash = + clientstr_hashval(unconf->cl_recdir); + conf = find_confirmed_client_by_str(unconf->cl_recdir, + hash); + if (conf) { + nfsd4_client_record_remove(conf); + expire_client(conf); + } + move_to_confirmed(unconf); + conf = unconf; + nfsd4_probe_callback(conf); + status = nfs_ok; + } + } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm))) + && (!unconf || (unconf && !same_verf(&unconf->cl_confirm, + &confirm)))) { + /* + * RFC 3530 14.2.34 CASE 4: + * Client probably hasn't noticed that we rebooted yet. + */ + status = nfserr_stale_clientid; + } else { + /* check that we have hit one of the cases...*/ + status = nfserr_clid_inuse; } out: nfs4_unlock_state(); @@ -2413,8 +2454,8 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, stp->st_file = fp; stp->st_access_bmap = 0; stp->st_deny_bmap = 0; - set_access(open->op_share_access, stp); - set_deny(open->op_share_deny, stp); + __set_bit(open->op_share_access, &stp->st_access_bmap); + __set_bit(open->op_share_deny, &stp->st_deny_bmap); stp->st_openstp = NULL; } @@ -2493,8 +2534,8 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) ret = nfserr_locked; /* Search for conflicting share reservations */ list_for_each_entry(stp, &fp->fi_stateids, st_perfile) { - if (test_deny(deny_type, stp) || - test_deny(NFS4_SHARE_DENY_BOTH, stp)) + if (test_bit(deny_type, &stp->st_deny_bmap) || + test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap)) goto out; } ret = nfs_ok; @@ -2750,7 +2791,7 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *c bool new_access; __be32 status; - new_access = !test_access(op_share_access, stp); + new_access = !test_bit(op_share_access, &stp->st_access_bmap); if (new_access) { status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open); if (status) @@ -2765,8 +2806,8 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *c return status; } /* remember the open */ - set_access(op_share_access, stp); - set_deny(open->op_share_deny, stp); + __set_bit(op_share_access, &stp->st_access_bmap); + __set_bit(open->op_share_deny, &stp->st_deny_bmap); return nfs_ok; } @@ -3241,18 +3282,18 @@ STALE_STATEID(stateid_t *stateid) } static inline int -access_permit_read(struct nfs4_ol_stateid *stp) +access_permit_read(unsigned long access_bmap) { - return test_access(NFS4_SHARE_ACCESS_READ, stp) || - test_access(NFS4_SHARE_ACCESS_BOTH, stp) || - test_access(NFS4_SHARE_ACCESS_WRITE, stp); + return test_bit(NFS4_SHARE_ACCESS_READ, &access_bmap) || + test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap) || + test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap); } static inline int -access_permit_write(struct nfs4_ol_stateid *stp) +access_permit_write(unsigned long access_bmap) { - return test_access(NFS4_SHARE_ACCESS_WRITE, stp) || - test_access(NFS4_SHARE_ACCESS_BOTH, stp); + return test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap) || + test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap); } static @@ -3263,9 +3304,9 @@ __be32 nfs4_check_openmode(struct nfs4_ol_stateid *stp, int flags) /* For lock stateid's, we test the parent open, not the lock: */ if (stp->st_openstp) stp = stp->st_openstp; - if ((flags & WR_STATE) && !access_permit_write(stp)) + if ((flags & WR_STATE) && (!access_permit_write(stp->st_access_bmap))) goto out; - if ((flags & RD_STATE) && !access_permit_read(stp)) + if ((flags & RD_STATE) && (!access_permit_read(stp->st_access_bmap))) goto out; status = nfs_ok; out: @@ -3305,7 +3346,7 @@ static bool stateid_generation_after(stateid_t *a, stateid_t *b) return (s32)a->si_generation - (s32)b->si_generation > 0; } -static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session) +static int check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session) { /* * When sessions are used the stateid generation number is ignored @@ -3614,10 +3655,10 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, static inline void nfs4_stateid_downgrade_bit(struct nfs4_ol_stateid *stp, u32 access) { - if (!test_access(access, stp)) + if (!test_bit(access, &stp->st_access_bmap)) return; nfs4_file_put_access(stp->st_file, nfs4_access_to_omode(access)); - clear_access(access, stp); + __clear_bit(access, &stp->st_access_bmap); } static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_access) @@ -3639,12 +3680,12 @@ static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_ac } static void -reset_union_bmap_deny(unsigned long deny, struct nfs4_ol_stateid *stp) +reset_union_bmap_deny(unsigned long deny, unsigned long *bmap) { int i; for (i = 0; i < 4; i++) { if ((i & deny) != i) - clear_deny(i, stp); + __clear_bit(i, bmap); } } @@ -3671,19 +3712,19 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, if (status) goto out; status = nfserr_inval; - if (!test_access(od->od_share_access, stp)) { - dprintk("NFSD: access not a subset current bitmap: 0x%lx, input access=%08x\n", + if (!test_bit(od->od_share_access, &stp->st_access_bmap)) { + dprintk("NFSD:access not a subset current bitmap: 0x%lx, input access=%08x\n", stp->st_access_bmap, od->od_share_access); goto out; } - if (!test_deny(od->od_share_deny, stp)) { + if (!test_bit(od->od_share_deny, &stp->st_deny_bmap)) { dprintk("NFSD:deny not a subset current bitmap: 0x%lx, input deny=%08x\n", stp->st_deny_bmap, od->od_share_deny); goto out; } nfs4_stateid_downgrade(stp, od->od_share_access); - reset_union_bmap_deny(od->od_share_deny, stp); + reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap); update_stateid(&stp->st_stid.sc_stateid); memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); @@ -3973,13 +4014,13 @@ static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access) struct nfs4_file *fp = lock_stp->st_file; int oflag = nfs4_access_to_omode(access); - if (test_access(access, lock_stp)) + if (test_bit(access, &lock_stp->st_access_bmap)) return; nfs4_file_get_access(fp, oflag); - set_access(access, lock_stp); + __set_bit(access, &lock_stp->st_access_bmap); } -static __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new) +__be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new) { struct nfs4_file *fi = ost->st_file; struct nfs4_openowner *oo = openowner(ost->st_stateowner); diff --git a/trunk/fs/nfsd/nfs4xdr.c b/trunk/fs/nfsd/nfs4xdr.c index 4949667c84ea..74c00bc92b9a 100644 --- a/trunk/fs/nfsd/nfs4xdr.c +++ b/trunk/fs/nfsd/nfs4xdr.c @@ -1674,12 +1674,12 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) static void write32(__be32 **p, u32 n) { - *(*p)++ = htonl(n); + *(*p)++ = n; } static void write64(__be32 **p, u64 n) { - write32(p, (n >> 32)); + write32(p, (u32)(n >> 32)); write32(p, (u32)n); } @@ -1744,16 +1744,15 @@ static void encode_seqid_op_tail(struct nfsd4_compoundres *resp, __be32 *save, _ } /* Encode as an array of strings the string given with components - * separated @sep, escaped with esc_enter and esc_exit. + * separated @sep. */ -static __be32 nfsd4_encode_components_esc(char sep, char *components, - __be32 **pp, int *buflen, - char esc_enter, char esc_exit) +static __be32 nfsd4_encode_components(char sep, char *components, + __be32 **pp, int *buflen) { __be32 *p = *pp; __be32 *countp = p; int strlen, count=0; - char *str, *end, *next; + char *str, *end; dprintk("nfsd4_encode_components(%s)\n", components); if ((*buflen -= 4) < 0) @@ -1761,23 +1760,8 @@ static __be32 nfsd4_encode_components_esc(char sep, char *components, WRITE32(0); /* We will fill this in with @count later */ end = str = components; while (*end) { - bool found_esc = false; - - /* try to parse as esc_start, ..., esc_end, sep */ - if (*str == esc_enter) { - for (; *end && (*end != esc_exit); end++) - /* find esc_exit or end of string */; - next = end + 1; - if (*end && (!*next || *next == sep)) { - str++; - found_esc = true; - } - } - - if (!found_esc) - for (; *end && (*end != sep); end++) - /* find sep or end of string */; - + for (; *end && (*end != sep); end++) + ; /* Point to end of component */ strlen = end - str; if (strlen) { if ((*buflen -= ((XDR_QUADLEN(strlen) << 2) + 4)) < 0) @@ -1796,15 +1780,6 @@ static __be32 nfsd4_encode_components_esc(char sep, char *components, return 0; } -/* Encode as an array of strings the string given with components - * separated @sep. - */ -static __be32 nfsd4_encode_components(char sep, char *components, - __be32 **pp, int *buflen) -{ - return nfsd4_encode_components_esc(sep, components, pp, buflen, 0, 0); -} - /* * encode a location element of a fs_locations structure */ @@ -1814,8 +1789,7 @@ static __be32 nfsd4_encode_fs_location4(struct nfsd4_fs_location *location, __be32 status; __be32 *p = *pp; - status = nfsd4_encode_components_esc(':', location->hosts, &p, buflen, - '[', ']'); + status = nfsd4_encode_components(':', location->hosts, &p, buflen); if (status) return status; status = nfsd4_encode_components('/', location->path, &p, buflen); @@ -3277,7 +3251,7 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w } static __be32 -nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, +nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_exchange_id *exid) { __be32 *p; @@ -3332,7 +3306,7 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, } static __be32 -nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, +nfsd4_encode_create_session(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_create_session *sess) { __be32 *p; @@ -3381,14 +3355,14 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, } static __be32 -nfsd4_encode_destroy_session(struct nfsd4_compoundres *resp, __be32 nfserr, +nfsd4_encode_destroy_session(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_destroy_session *destroy_session) { return nfserr; } static __be32 -nfsd4_encode_free_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, +nfsd4_encode_free_stateid(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_free_stateid *free_stateid) { __be32 *p; @@ -3397,13 +3371,13 @@ nfsd4_encode_free_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, return nfserr; RESERVE_SPACE(4); - *p++ = nfserr; + WRITE32(nfserr); ADJUST_ARGS(); return nfserr; } static __be32 -nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, +nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_sequence *seq) { __be32 *p; @@ -3425,8 +3399,8 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, return 0; } -static __be32 -nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, +__be32 +nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_test_stateid *test_stateid) { struct nfsd4_test_stateid_id *stateid, *next; @@ -3529,7 +3503,7 @@ static nfsd4_enc nfsd4_enc_ops[] = { * Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so * will be at least a page and will therefore hold the xdr_buf head. */ -__be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad) +int nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad) { struct xdr_buf *xb = &resp->rqstp->rq_res; struct nfsd4_session *session = NULL; diff --git a/trunk/fs/nfsd/nfsctl.c b/trunk/fs/nfsd/nfsctl.c index c55298ed5772..72699885ac48 100644 --- a/trunk/fs/nfsd/nfsctl.c +++ b/trunk/fs/nfsd/nfsctl.c @@ -661,7 +661,6 @@ static ssize_t __write_ports_addfd(char *buf) { char *mesg = buf; int fd, err; - struct net *net = &init_net; err = get_int(&mesg, &fd); if (err != 0 || fd < 0) @@ -673,8 +672,6 @@ static ssize_t __write_ports_addfd(char *buf) err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT); if (err < 0) { - if (nfsd_serv->sv_nrthreads == 1) - svc_shutdown_net(nfsd_serv, net); svc_destroy(nfsd_serv); return err; } @@ -712,7 +709,6 @@ static ssize_t __write_ports_addxprt(char *buf) char transport[16]; struct svc_xprt *xprt; int port, err; - struct net *net = &init_net; if (sscanf(buf, "%15s %4u", transport, &port) != 2) return -EINVAL; @@ -724,12 +720,12 @@ static ssize_t __write_ports_addxprt(char *buf) if (err != 0) return err; - err = svc_create_xprt(nfsd_serv, transport, net, + err = svc_create_xprt(nfsd_serv, transport, &init_net, PF_INET, port, SVC_SOCK_ANONYMOUS); if (err < 0) goto out_err; - err = svc_create_xprt(nfsd_serv, transport, net, + err = svc_create_xprt(nfsd_serv, transport, &init_net, PF_INET6, port, SVC_SOCK_ANONYMOUS); if (err < 0 && err != -EAFNOSUPPORT) goto out_close; @@ -738,14 +734,12 @@ static ssize_t __write_ports_addxprt(char *buf) nfsd_serv->sv_nrthreads--; return 0; out_close: - xprt = svc_find_xprt(nfsd_serv, transport, net, PF_INET, port); + xprt = svc_find_xprt(nfsd_serv, transport, &init_net, PF_INET, port); if (xprt != NULL) { svc_close_xprt(xprt); svc_xprt_put(xprt); } out_err: - if (nfsd_serv->sv_nrthreads == 1) - svc_shutdown_net(nfsd_serv, net); svc_destroy(nfsd_serv); return err; } diff --git a/trunk/fs/nfsd/nfssvc.c b/trunk/fs/nfsd/nfssvc.c index ee709fc8f58b..cb4d51d8cbdb 100644 --- a/trunk/fs/nfsd/nfssvc.c +++ b/trunk/fs/nfsd/nfssvc.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include @@ -331,8 +330,6 @@ static int nfsd_get_default_max_blksize(void) int nfsd_create_serv(void) { - int error; - WARN_ON(!mutex_is_locked(&nfsd_mutex)); if (nfsd_serv) { svc_get(nfsd_serv); @@ -346,12 +343,6 @@ int nfsd_create_serv(void) if (nfsd_serv == NULL) return -ENOMEM; - error = svc_bind(nfsd_serv, current->nsproxy->net_ns); - if (error < 0) { - svc_destroy(nfsd_serv); - return error; - } - set_max_drc(); do_gettimeofday(&nfssvc_boot); /* record boot time */ return 0; @@ -382,7 +373,6 @@ int nfsd_set_nrthreads(int n, int *nthreads) int i = 0; int tot = 0; int err = 0; - struct net *net = &init_net; WARN_ON(!mutex_is_locked(&nfsd_mutex)); @@ -427,9 +417,6 @@ int nfsd_set_nrthreads(int n, int *nthreads) if (err) break; } - - if (nfsd_serv->sv_nrthreads == 1) - svc_shutdown_net(nfsd_serv, net); svc_destroy(nfsd_serv); return err; @@ -445,7 +432,6 @@ nfsd_svc(unsigned short port, int nrservs) { int error; bool nfsd_up_before; - struct net *net = &init_net; mutex_lock(&nfsd_mutex); dprintk("nfsd: creating service\n"); @@ -478,8 +464,6 @@ nfsd_svc(unsigned short port, int nrservs) if (error < 0 && !nfsd_up_before) nfsd_shutdown(); out_destroy: - if (nfsd_serv->sv_nrthreads == 1) - svc_shutdown_net(nfsd_serv, net); svc_destroy(nfsd_serv); /* Release server */ out: mutex_unlock(&nfsd_mutex); @@ -563,9 +547,6 @@ nfsd(void *vrqstp) nfsdstats.th_cnt --; out: - if (rqstp->rq_server->sv_nrthreads == 1) - svc_shutdown_net(rqstp->rq_server, &init_net); - /* Release the thread */ svc_exit_thread(rqstp); @@ -678,12 +659,8 @@ int nfsd_pool_stats_open(struct inode *inode, struct file *file) int nfsd_pool_stats_release(struct inode *inode, struct file *file) { int ret = seq_release(inode, file); - struct net *net = &init_net; - mutex_lock(&nfsd_mutex); /* this function really, really should have been called svc_put() */ - if (nfsd_serv->sv_nrthreads == 1) - svc_shutdown_net(nfsd_serv, net); svc_destroy(nfsd_serv); mutex_unlock(&nfsd_mutex); return ret; diff --git a/trunk/fs/nfsd/state.h b/trunk/fs/nfsd/state.h index 849091e16ea6..89ab137d379a 100644 --- a/trunk/fs/nfsd/state.h +++ b/trunk/fs/nfsd/state.h @@ -232,6 +232,7 @@ struct nfs4_client { time_t cl_time; /* time of last lease renewal */ struct sockaddr_storage cl_addr; /* client ipaddress */ u32 cl_flavor; /* setclientid pseudoflavor */ + char *cl_principal; /* setclientid principal name */ struct svc_cred cl_cred; /* setclientid principal */ clientid_t cl_clientid; /* generated by server */ nfs4_verifier cl_confirm; /* generated by server */ diff --git a/trunk/fs/nfsd/xdr4.h b/trunk/fs/nfsd/xdr4.h index acd127d4ee82..1b3501598ab5 100644 --- a/trunk/fs/nfsd/xdr4.h +++ b/trunk/fs/nfsd/xdr4.h @@ -60,7 +60,7 @@ struct nfsd4_compound_state { __be32 *datap; size_t iovlen; u32 minorversion; - __be32 status; + u32 status; stateid_t current_stateid; stateid_t save_stateid; /* to indicate current and saved state id presents */ @@ -364,7 +364,7 @@ struct nfsd4_test_stateid_id { }; struct nfsd4_test_stateid { - u32 ts_num_ids; + __be32 ts_num_ids; struct list_head ts_stateid_list; }; @@ -549,7 +549,7 @@ int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *, struct nfsd4_compoundargs *); int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *, struct nfsd4_compoundres *); -__be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); +int nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op); __be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, diff --git a/trunk/fs/nilfs2/namei.c b/trunk/fs/nilfs2/namei.c index b72847988b78..0bb2c2010b95 100644 --- a/trunk/fs/nilfs2/namei.c +++ b/trunk/fs/nilfs2/namei.c @@ -508,29 +508,31 @@ static struct dentry *nilfs_fh_to_parent(struct super_block *sb, struct fid *fh, return nilfs_get_dentry(sb, fid->cno, fid->parent_ino, fid->parent_gen); } -static int nilfs_encode_fh(struct inode *inode, __u32 *fh, int *lenp, - struct inode *parent) +static int nilfs_encode_fh(struct dentry *dentry, __u32 *fh, int *lenp, + int connectable) { struct nilfs_fid *fid = (struct nilfs_fid *)fh; + struct inode *inode = dentry->d_inode; struct nilfs_root *root = NILFS_I(inode)->i_root; int type; - if (parent && *lenp < NILFS_FID_SIZE_CONNECTABLE) { - *lenp = NILFS_FID_SIZE_CONNECTABLE; - return 255; - } - if (*lenp < NILFS_FID_SIZE_NON_CONNECTABLE) { - *lenp = NILFS_FID_SIZE_NON_CONNECTABLE; + if (*lenp < NILFS_FID_SIZE_NON_CONNECTABLE || + (connectable && *lenp < NILFS_FID_SIZE_CONNECTABLE)) return 255; - } fid->cno = root->cno; fid->ino = inode->i_ino; fid->gen = inode->i_generation; - if (parent) { + if (connectable && !S_ISDIR(inode->i_mode)) { + struct inode *parent; + + spin_lock(&dentry->d_lock); + parent = dentry->d_parent->d_inode; fid->parent_ino = parent->i_ino; fid->parent_gen = parent->i_generation; + spin_unlock(&dentry->d_lock); + type = FILEID_NILFS_WITH_PARENT; *lenp = NILFS_FID_SIZE_CONNECTABLE; } else { diff --git a/trunk/fs/notify/fsnotify.c b/trunk/fs/notify/fsnotify.c index b39c5c161adb..ccb14d3fc0de 100644 --- a/trunk/fs/notify/fsnotify.c +++ b/trunk/fs/notify/fsnotify.c @@ -123,7 +123,7 @@ int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask) } EXPORT_SYMBOL_GPL(__fsnotify_parent); -static int send_to_group(struct inode *to_tell, +static int send_to_group(struct inode *to_tell, struct vfsmount *mnt, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, __u32 mask, void *data, @@ -168,10 +168,10 @@ static int send_to_group(struct inode *to_tell, vfsmount_test_mask &= ~inode_mark->ignored_mask; } - pr_debug("%s: group=%p to_tell=%p mask=%x inode_mark=%p" + pr_debug("%s: group=%p to_tell=%p mnt=%p mask=%x inode_mark=%p" " inode_test_mask=%x vfsmount_mark=%p vfsmount_test_mask=%x" " data=%p data_is=%d cookie=%d event=%p\n", - __func__, group, to_tell, mask, inode_mark, + __func__, group, to_tell, mnt, mask, inode_mark, inode_test_mask, vfsmount_mark, vfsmount_test_mask, data, data_is, cookie, *event); @@ -258,16 +258,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, if (inode_group > vfsmount_group) { /* handle inode */ - ret = send_to_group(to_tell, inode_mark, NULL, mask, data, + ret = send_to_group(to_tell, NULL, inode_mark, NULL, mask, data, data_is, cookie, file_name, &event); /* we didn't use the vfsmount_mark */ vfsmount_group = NULL; } else if (vfsmount_group > inode_group) { - ret = send_to_group(to_tell, NULL, vfsmount_mark, mask, data, + ret = send_to_group(to_tell, &mnt->mnt, NULL, vfsmount_mark, mask, data, data_is, cookie, file_name, &event); inode_group = NULL; } else { - ret = send_to_group(to_tell, inode_mark, vfsmount_mark, + ret = send_to_group(to_tell, &mnt->mnt, inode_mark, vfsmount_mark, mask, data, data_is, cookie, file_name, &event); } diff --git a/trunk/fs/ntfs/file.c b/trunk/fs/ntfs/file.c index 7389d2d5e51d..8639169221c7 100644 --- a/trunk/fs/ntfs/file.c +++ b/trunk/fs/ntfs/file.c @@ -2096,9 +2096,7 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb, err = file_remove_suid(file); if (err) goto out; - err = file_update_time(file); - if (err) - goto out; + file_update_time(file); written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos, count); out: diff --git a/trunk/fs/ocfs2/blockcheck.c b/trunk/fs/ocfs2/blockcheck.c index 0725e6054650..c7ee03c22226 100644 --- a/trunk/fs/ocfs2/blockcheck.c +++ b/trunk/fs/ocfs2/blockcheck.c @@ -422,46 +422,45 @@ int ocfs2_block_check_validate(void *data, size_t blocksize, struct ocfs2_blockcheck_stats *stats) { int rc = 0; - u32 bc_crc32e; - u16 bc_ecc; + struct ocfs2_block_check check; u32 crc, ecc; ocfs2_blockcheck_inc_check(stats); - bc_crc32e = le32_to_cpu(bc->bc_crc32e); - bc_ecc = le16_to_cpu(bc->bc_ecc); + check.bc_crc32e = le32_to_cpu(bc->bc_crc32e); + check.bc_ecc = le16_to_cpu(bc->bc_ecc); memset(bc, 0, sizeof(struct ocfs2_block_check)); /* Fast path - if the crc32 validates, we're good to go */ crc = crc32_le(~0, data, blocksize); - if (crc == bc_crc32e) + if (crc == check.bc_crc32e) goto out; ocfs2_blockcheck_inc_failure(stats); mlog(ML_ERROR, "CRC32 failed: stored: 0x%x, computed 0x%x. Applying ECC.\n", - (unsigned int)bc_crc32e, (unsigned int)crc); + (unsigned int)check.bc_crc32e, (unsigned int)crc); /* Ok, try ECC fixups */ ecc = ocfs2_hamming_encode_block(data, blocksize); - ocfs2_hamming_fix_block(data, blocksize, ecc ^ bc_ecc); + ocfs2_hamming_fix_block(data, blocksize, ecc ^ check.bc_ecc); /* And check the crc32 again */ crc = crc32_le(~0, data, blocksize); - if (crc == bc_crc32e) { + if (crc == check.bc_crc32e) { ocfs2_blockcheck_inc_recover(stats); goto out; } mlog(ML_ERROR, "Fixed CRC32 failed: stored: 0x%x, computed 0x%x\n", - (unsigned int)bc_crc32e, (unsigned int)crc); + (unsigned int)check.bc_crc32e, (unsigned int)crc); rc = -EIO; out: - bc->bc_crc32e = cpu_to_le32(bc_crc32e); - bc->bc_ecc = cpu_to_le16(bc_ecc); + bc->bc_crc32e = cpu_to_le32(check.bc_crc32e); + bc->bc_ecc = cpu_to_le16(check.bc_ecc); return rc; } @@ -529,8 +528,7 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, struct ocfs2_blockcheck_stats *stats) { int i, rc = 0; - u32 bc_crc32e; - u16 bc_ecc; + struct ocfs2_block_check check; u32 crc, ecc, fix; BUG_ON(nr < 0); @@ -540,21 +538,21 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, ocfs2_blockcheck_inc_check(stats); - bc_crc32e = le32_to_cpu(bc->bc_crc32e); - bc_ecc = le16_to_cpu(bc->bc_ecc); + check.bc_crc32e = le32_to_cpu(bc->bc_crc32e); + check.bc_ecc = le16_to_cpu(bc->bc_ecc); memset(bc, 0, sizeof(struct ocfs2_block_check)); /* Fast path - if the crc32 validates, we're good to go */ for (i = 0, crc = ~0; i < nr; i++) crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size); - if (crc == bc_crc32e) + if (crc == check.bc_crc32e) goto out; ocfs2_blockcheck_inc_failure(stats); mlog(ML_ERROR, "CRC32 failed: stored: %u, computed %u. Applying ECC.\n", - (unsigned int)bc_crc32e, (unsigned int)crc); + (unsigned int)check.bc_crc32e, (unsigned int)crc); /* Ok, try ECC fixups */ for (i = 0, ecc = 0; i < nr; i++) { @@ -567,7 +565,7 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, bhs[i]->b_size * 8, bhs[i]->b_size * 8 * i); } - fix = ecc ^ bc_ecc; + fix = ecc ^ check.bc_ecc; for (i = 0; i < nr; i++) { /* * Try the fix against each buffer. It will only affect @@ -580,19 +578,19 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, /* And check the crc32 again */ for (i = 0, crc = ~0; i < nr; i++) crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size); - if (crc == bc_crc32e) { + if (crc == check.bc_crc32e) { ocfs2_blockcheck_inc_recover(stats); goto out; } mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n", - (unsigned int)bc_crc32e, (unsigned int)crc); + (unsigned int)check.bc_crc32e, (unsigned int)crc); rc = -EIO; out: - bc->bc_crc32e = cpu_to_le32(bc_crc32e); - bc->bc_ecc = cpu_to_le16(bc_ecc); + bc->bc_crc32e = cpu_to_le32(check.bc_crc32e); + bc->bc_ecc = cpu_to_le16(check.bc_ecc); return rc; } diff --git a/trunk/fs/ocfs2/dlm/dlmast.c b/trunk/fs/ocfs2/dlm/dlmast.c index fbec0be62326..3a3ed4bb794b 100644 --- a/trunk/fs/ocfs2/dlm/dlmast.c +++ b/trunk/fs/ocfs2/dlm/dlmast.c @@ -293,7 +293,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, struct dlm_proxy_ast *past = (struct dlm_proxy_ast *) msg->buf; char *name; struct list_head *iter, *head=NULL; - __be64 cookie; + u64 cookie; u32 flags; u8 node; diff --git a/trunk/fs/ocfs2/dlm/dlmcommon.h b/trunk/fs/ocfs2/dlm/dlmcommon.h index de854cca12a2..a5952ceecba5 100644 --- a/trunk/fs/ocfs2/dlm/dlmcommon.h +++ b/trunk/fs/ocfs2/dlm/dlmcommon.h @@ -679,7 +679,7 @@ struct dlm_query_join_packet { }; union dlm_query_join_response { - __be32 intval; + u32 intval; struct dlm_query_join_packet packet; }; @@ -755,8 +755,8 @@ struct dlm_query_region { struct dlm_node_info { u8 ni_nodenum; u8 pad1; - __be16 ni_ipv4_port; - __be32 ni_ipv4_address; + u16 ni_ipv4_port; + u32 ni_ipv4_address; }; struct dlm_query_nodeinfo { diff --git a/trunk/fs/ocfs2/dlm/dlmdomain.c b/trunk/fs/ocfs2/dlm/dlmdomain.c index 9e89d70df337..92f2ead0fab6 100644 --- a/trunk/fs/ocfs2/dlm/dlmdomain.c +++ b/trunk/fs/ocfs2/dlm/dlmdomain.c @@ -818,7 +818,7 @@ static void dlm_query_join_packet_to_wire(struct dlm_query_join_packet *packet, union dlm_query_join_response response; response.packet = *packet; - *wire = be32_to_cpu(response.intval); + *wire = cpu_to_be32(response.intval); } static void dlm_query_join_wire_to_packet(u32 wire, diff --git a/trunk/fs/ocfs2/export.c b/trunk/fs/ocfs2/export.c index 322216a5f0dd..745db42528d5 100644 --- a/trunk/fs/ocfs2/export.c +++ b/trunk/fs/ocfs2/export.c @@ -177,23 +177,21 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) return parent; } -static int ocfs2_encode_fh(struct inode *inode, u32 *fh_in, int *max_len, - struct inode *parent) +static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len, + int connectable) { + struct inode *inode = dentry->d_inode; int len = *max_len; int type = 1; u64 blkno; u32 generation; __le32 *fh = (__force __le32 *) fh_in; -#ifdef TRACE_HOOKS_ARE_NOT_BRAINDEAD_IN_YOUR_OPINION -#error "You go ahead and fix that mess, then. Somehow" trace_ocfs2_encode_fh_begin(dentry, dentry->d_name.len, dentry->d_name.name, fh, len, connectable); -#endif - if (parent && (len < 6)) { + if (connectable && (len < 6)) { *max_len = 6; type = 255; goto bail; @@ -213,7 +211,12 @@ static int ocfs2_encode_fh(struct inode *inode, u32 *fh_in, int *max_len, fh[1] = cpu_to_le32((u32)(blkno & 0xffffffff)); fh[2] = cpu_to_le32(generation); - if (parent) { + if (connectable && !S_ISDIR(inode->i_mode)) { + struct inode *parent; + + spin_lock(&dentry->d_lock); + + parent = dentry->d_parent->d_inode; blkno = OCFS2_I(parent)->ip_blkno; generation = parent->i_generation; @@ -221,6 +224,8 @@ static int ocfs2_encode_fh(struct inode *inode, u32 *fh_in, int *max_len, fh[4] = cpu_to_le32((u32)(blkno & 0xffffffff)); fh[5] = cpu_to_le32(generation); + spin_unlock(&dentry->d_lock); + len = 6; type = 2; diff --git a/trunk/fs/ocfs2/inode.c b/trunk/fs/ocfs2/inode.c index d89e08a81eda..735514ca400f 100644 --- a/trunk/fs/ocfs2/inode.c +++ b/trunk/fs/ocfs2/inode.c @@ -273,13 +273,11 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, inode->i_gid = le32_to_cpu(fe->i_gid); /* Fast symlinks will have i_size but no allocated clusters. */ - if (S_ISLNK(inode->i_mode) && !fe->i_clusters) { + if (S_ISLNK(inode->i_mode) && !fe->i_clusters) inode->i_blocks = 0; - inode->i_mapping->a_ops = &ocfs2_fast_symlink_aops; - } else { + else inode->i_blocks = ocfs2_inode_sector_count(inode); - inode->i_mapping->a_ops = &ocfs2_aops; - } + inode->i_mapping->a_ops = &ocfs2_aops; inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime); inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec); inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime); @@ -333,7 +331,10 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, OCFS2_I(inode)->ip_dir_lock_gen = 1; break; case S_IFLNK: - inode->i_op = &ocfs2_symlink_inode_operations; + if (ocfs2_inode_is_fast_symlink(inode)) + inode->i_op = &ocfs2_fast_symlink_inode_operations; + else + inode->i_op = &ocfs2_symlink_inode_operations; i_size_write(inode, le64_to_cpu(fe->i_size)); break; default: diff --git a/trunk/fs/ocfs2/ioctl.c b/trunk/fs/ocfs2/ioctl.c index d96f7f81d8dd..a1a1bfd652c9 100644 --- a/trunk/fs/ocfs2/ioctl.c +++ b/trunk/fs/ocfs2/ioctl.c @@ -864,7 +864,7 @@ int ocfs2_info_handle(struct inode *inode, struct ocfs2_info *info, if (status) break; - reqp = (struct ocfs2_info_request __user *)(unsigned long)req_addr; + reqp = (struct ocfs2_info_request *)(unsigned long)req_addr; if (!reqp) { status = -EINVAL; goto bail; @@ -888,11 +888,9 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct ocfs2_space_resv sr; struct ocfs2_new_group_input input; struct reflink_arguments args; - const char __user *old_path; - const char __user *new_path; + const char *old_path, *new_path; bool preserve; struct ocfs2_info info; - void __user *argp = (void __user *)arg; switch (cmd) { case OCFS2_IOC_GETFLAGS: @@ -939,15 +937,17 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return ocfs2_group_add(inode, &input); case OCFS2_IOC_REFLINK: - if (copy_from_user(&args, argp, sizeof(args))) + if (copy_from_user(&args, (struct reflink_arguments *)arg, + sizeof(args))) return -EFAULT; - old_path = (const char __user *)(unsigned long)args.old_path; - new_path = (const char __user *)(unsigned long)args.new_path; + old_path = (const char *)(unsigned long)args.old_path; + new_path = (const char *)(unsigned long)args.new_path; preserve = (args.preserve != 0); return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve); case OCFS2_IOC_INFO: - if (copy_from_user(&info, argp, sizeof(struct ocfs2_info))) + if (copy_from_user(&info, (struct ocfs2_info __user *)arg, + sizeof(struct ocfs2_info))) return -EFAULT; return ocfs2_info_handle(inode, &info, 0); @@ -960,20 +960,22 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&range, argp, sizeof(range))) + if (copy_from_user(&range, (struct fstrim_range *)arg, + sizeof(range))) return -EFAULT; ret = ocfs2_trim_fs(sb, &range); if (ret < 0) return ret; - if (copy_to_user(argp, &range, sizeof(range))) + if (copy_to_user((struct fstrim_range *)arg, &range, + sizeof(range))) return -EFAULT; return 0; } case OCFS2_IOC_MOVE_EXT: - return ocfs2_ioctl_move_extents(filp, argp); + return ocfs2_ioctl_move_extents(filp, (void __user *)arg); default: return -ENOTTY; } @@ -986,7 +988,6 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) struct reflink_arguments args; struct inode *inode = file->f_path.dentry->d_inode; struct ocfs2_info info; - void __user *argp = (void __user *)arg; switch (cmd) { case OCFS2_IOC32_GETFLAGS: @@ -1005,14 +1006,16 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) case FITRIM: break; case OCFS2_IOC_REFLINK: - if (copy_from_user(&args, argp, sizeof(args))) + if (copy_from_user(&args, (struct reflink_arguments *)arg, + sizeof(args))) return -EFAULT; preserve = (args.preserve != 0); return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path), compat_ptr(args.new_path), preserve); case OCFS2_IOC_INFO: - if (copy_from_user(&info, argp, sizeof(struct ocfs2_info))) + if (copy_from_user(&info, (struct ocfs2_info __user *)arg, + sizeof(struct ocfs2_info))) return -EFAULT; return ocfs2_info_handle(inode, &info, 1); diff --git a/trunk/fs/ocfs2/move_extents.c b/trunk/fs/ocfs2/move_extents.c index 6083432f667e..b1e3fce72ea4 100644 --- a/trunk/fs/ocfs2/move_extents.c +++ b/trunk/fs/ocfs2/move_extents.c @@ -1082,7 +1082,8 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) context->file = filp; if (argp) { - if (copy_from_user(&range, argp, sizeof(range))) { + if (copy_from_user(&range, (struct ocfs2_move_extents *)argp, + sizeof(range))) { status = -EFAULT; goto out; } @@ -1137,7 +1138,8 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) * length and new_offset even if failure happens somewhere. */ if (argp) { - if (copy_to_user(argp, &range, sizeof(range))) + if (copy_to_user((struct ocfs2_move_extents *)argp, &range, + sizeof(range))) status = -EFAULT; } diff --git a/trunk/fs/ocfs2/namei.c b/trunk/fs/ocfs2/namei.c index 9f39c640cddf..a9856e3eaaf0 100644 --- a/trunk/fs/ocfs2/namei.c +++ b/trunk/fs/ocfs2/namei.c @@ -1724,16 +1724,15 @@ static int ocfs2_symlink(struct inode *dir, fe = (struct ocfs2_dinode *) new_fe_bh->b_data; inode->i_rdev = 0; newsize = l - 1; - inode->i_op = &ocfs2_symlink_inode_operations; if (l > ocfs2_fast_symlink_chars(sb)) { u32 offset = 0; + inode->i_op = &ocfs2_symlink_inode_operations; status = dquot_alloc_space_nodirty(inode, ocfs2_clusters_to_bytes(osb->sb, 1)); if (status) goto bail; did_quota = 1; - inode->i_mapping->a_ops = &ocfs2_aops; status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0, new_fe_bh, handle, data_ac, NULL, @@ -1751,7 +1750,7 @@ static int ocfs2_symlink(struct inode *dir, i_size_write(inode, newsize); inode->i_blocks = ocfs2_inode_sector_count(inode); } else { - inode->i_mapping->a_ops = &ocfs2_fast_symlink_aops; + inode->i_op = &ocfs2_fast_symlink_inode_operations; memcpy((char *) fe->id2.i_symlink, symname, l); i_size_write(inode, newsize); inode->i_blocks = 0; diff --git a/trunk/fs/ocfs2/symlink.c b/trunk/fs/ocfs2/symlink.c index f1fbb4b552ad..5d22872e2bb3 100644 --- a/trunk/fs/ocfs2/symlink.c +++ b/trunk/fs/ocfs2/symlink.c @@ -54,40 +54,101 @@ #include "buffer_head_io.h" -static int ocfs2_fast_symlink_readpage(struct file *unused, struct page *page) +static char *ocfs2_fast_symlink_getlink(struct inode *inode, + struct buffer_head **bh) { - struct inode *inode = page->mapping->host; - struct buffer_head *bh; - int status = ocfs2_read_inode_block(inode, &bh); + int status; + char *link = NULL; struct ocfs2_dinode *fe; - const char *link; - void *kaddr; - size_t len; + status = ocfs2_read_inode_block(inode, bh); if (status < 0) { mlog_errno(status); - return status; + link = ERR_PTR(status); + goto bail; } - fe = (struct ocfs2_dinode *) bh->b_data; + fe = (struct ocfs2_dinode *) (*bh)->b_data; link = (char *) fe->id2.i_symlink; - /* will be less than a page size */ - len = strnlen(link, ocfs2_fast_symlink_chars(inode->i_sb)); - kaddr = kmap_atomic(page); - memcpy(kaddr, link, len + 1); - kunmap_atomic(kaddr); - SetPageUptodate(page); - unlock_page(page); +bail: + + return link; +} + +static int ocfs2_readlink(struct dentry *dentry, + char __user *buffer, + int buflen) +{ + int ret; + char *link; + struct buffer_head *bh = NULL; + struct inode *inode = dentry->d_inode; + + link = ocfs2_fast_symlink_getlink(inode, &bh); + if (IS_ERR(link)) { + ret = PTR_ERR(link); + goto out; + } + + /* + * Without vfsmount we can't update atime now, + * but we will update atime here ultimately. + */ + ret = vfs_readlink(dentry, buffer, buflen, link); + brelse(bh); - return 0; +out: + if (ret < 0) + mlog_errno(ret); + return ret; } -const struct address_space_operations ocfs2_fast_symlink_aops = { - .readpage = ocfs2_fast_symlink_readpage, -}; +static void *ocfs2_fast_follow_link(struct dentry *dentry, + struct nameidata *nd) +{ + int status = 0; + int len; + char *target, *link = ERR_PTR(-ENOMEM); + struct inode *inode = dentry->d_inode; + struct buffer_head *bh = NULL; + + BUG_ON(!ocfs2_inode_is_fast_symlink(inode)); + target = ocfs2_fast_symlink_getlink(inode, &bh); + if (IS_ERR(target)) { + status = PTR_ERR(target); + mlog_errno(status); + goto bail; + } + + /* Fast symlinks can't be large */ + len = strnlen(target, ocfs2_fast_symlink_chars(inode->i_sb)); + link = kzalloc(len + 1, GFP_NOFS); + if (!link) { + status = -ENOMEM; + mlog_errno(status); + goto bail; + } + + memcpy(link, target, len); + +bail: + nd_set_link(nd, status ? ERR_PTR(status) : link); + brelse(bh); + + if (status) + mlog_errno(status); + return NULL; +} + +static void ocfs2_fast_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) +{ + char *link = nd_get_link(nd); + if (!IS_ERR(link)) + kfree(link); +} const struct inode_operations ocfs2_symlink_inode_operations = { - .readlink = generic_readlink, + .readlink = page_readlink, .follow_link = page_follow_link_light, .put_link = page_put_link, .getattr = ocfs2_getattr, @@ -98,3 +159,15 @@ const struct inode_operations ocfs2_symlink_inode_operations = { .removexattr = generic_removexattr, .fiemap = ocfs2_fiemap, }; +const struct inode_operations ocfs2_fast_symlink_inode_operations = { + .readlink = ocfs2_readlink, + .follow_link = ocfs2_fast_follow_link, + .put_link = ocfs2_fast_put_link, + .getattr = ocfs2_getattr, + .setattr = ocfs2_setattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = ocfs2_listxattr, + .removexattr = generic_removexattr, + .fiemap = ocfs2_fiemap, +}; diff --git a/trunk/fs/ocfs2/symlink.h b/trunk/fs/ocfs2/symlink.h index 71ee4245e919..65a6c9c6ad51 100644 --- a/trunk/fs/ocfs2/symlink.h +++ b/trunk/fs/ocfs2/symlink.h @@ -27,7 +27,7 @@ #define OCFS2_SYMLINK_H extern const struct inode_operations ocfs2_symlink_inode_operations; -extern const struct address_space_operations ocfs2_fast_symlink_aops; +extern const struct inode_operations ocfs2_fast_symlink_inode_operations; /* * Test whether an inode is a fast symlink. diff --git a/trunk/fs/open.c b/trunk/fs/open.c index d6c79a0dffc7..d54301219d04 100644 --- a/trunk/fs/open.c +++ b/trunk/fs/open.c @@ -654,23 +654,10 @@ static inline int __get_file_write_access(struct inode *inode, return error; } -int open_check_o_direct(struct file *f) -{ - /* NB: we're sure to have correct a_ops only after f_op->open */ - if (f->f_flags & O_DIRECT) { - if (!f->f_mapping->a_ops || - ((!f->f_mapping->a_ops->direct_IO) && - (!f->f_mapping->a_ops->get_xip_mem))) { - return -EINVAL; - } - } - return 0; -} - -static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, - struct file *f, - int (*open)(struct inode *, struct file *), - const struct cred *cred) +static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, + struct file *f, + int (*open)(struct inode *, struct file *), + const struct cred *cred) { static const struct file_operations empty_fops = {}; struct inode *inode; @@ -726,6 +713,16 @@ static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); + /* NB: we're sure to have correct a_ops only after f_op->open */ + if (f->f_flags & O_DIRECT) { + if (!f->f_mapping->a_ops || + ((!f->f_mapping->a_ops->direct_IO) && + (!f->f_mapping->a_ops->get_xip_mem))) { + fput(f); + f = ERR_PTR(-EINVAL); + } + } + return f; cleanup_all: @@ -747,29 +744,12 @@ static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, f->f_path.dentry = NULL; f->f_path.mnt = NULL; cleanup_file: + put_filp(f); dput(dentry); mntput(mnt); return ERR_PTR(error); } -static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, - struct file *f, - int (*open)(struct inode *, struct file *), - const struct cred *cred) -{ - struct file *res = do_dentry_open(dentry, mnt, f, open, cred); - if (!IS_ERR(res)) { - int error = open_check_o_direct(f); - if (error) { - fput(res); - res = ERR_PTR(error); - } - } else { - put_filp(f); - } - return res; -} - /** * lookup_instantiate_filp - instantiates the open intent filp * @nd: pointer to nameidata @@ -824,31 +804,13 @@ struct file *nameidata_to_filp(struct nameidata *nd) /* Pick up the filp from the open intent */ filp = nd->intent.open.file; + nd->intent.open.file = NULL; /* Has the filesystem initialised the file for us? */ - if (filp->f_path.dentry != NULL) { - nd->intent.open.file = NULL; - } else { - struct file *res; - + if (filp->f_path.dentry == NULL) { path_get(&nd->path); - res = do_dentry_open(nd->path.dentry, nd->path.mnt, - filp, NULL, cred); - if (!IS_ERR(res)) { - int error; - - nd->intent.open.file = NULL; - BUG_ON(res != filp); - - error = open_check_o_direct(filp); - if (error) { - fput(filp); - filp = ERR_PTR(error); - } - } else { - /* Allow nd->intent.open.file to be recycled */ - filp = res; - } + filp = __dentry_open(nd->path.dentry, nd->path.mnt, filp, + NULL, cred); } return filp; } diff --git a/trunk/fs/pipe.c b/trunk/fs/pipe.c index 49c1065256fd..95ebb56de494 100644 --- a/trunk/fs/pipe.c +++ b/trunk/fs/pipe.c @@ -654,11 +654,8 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov, wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } - if (ret > 0) { - int err = file_update_time(filp); - if (err) - ret = err; - } + if (ret > 0) + file_update_time(filp); return ret; } diff --git a/trunk/fs/pnode.c b/trunk/fs/pnode.c index bed378db0758..ab5fa9e1a79a 100644 --- a/trunk/fs/pnode.c +++ b/trunk/fs/pnode.c @@ -257,12 +257,12 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry, prev_src_mnt = child; } out: - br_write_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); while (!list_empty(&tmp_list)) { child = list_first_entry(&tmp_list, struct mount, mnt_hash); umount_tree(child, 0, &umount_list); } - br_write_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); release_mounts(&umount_list); return ret; } diff --git a/trunk/fs/proc_namespace.c b/trunk/fs/proc_namespace.c index 5e289a7cbad1..12412852d88a 100644 --- a/trunk/fs/proc_namespace.c +++ b/trunk/fs/proc_namespace.c @@ -23,12 +23,12 @@ static unsigned mounts_poll(struct file *file, poll_table *wait) poll_wait(file, &p->ns->poll, wait); - br_read_lock(&vfsmount_lock); + br_read_lock(vfsmount_lock); if (p->m.poll_event != ns->event) { p->m.poll_event = ns->event; res |= POLLERR | POLLPRI; } - br_read_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return res; } diff --git a/trunk/fs/readdir.c b/trunk/fs/readdir.c index 39e3370d79cf..cc0a8227cddf 100644 --- a/trunk/fs/readdir.c +++ b/trunk/fs/readdir.c @@ -108,11 +108,11 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd, int error; struct file * file; struct readdir_callback buf; - int fput_needed; - file = fget_light(fd, &fput_needed); + error = -EBADF; + file = fget(fd); if (!file) - return -EBADF; + goto out; buf.result = 0; buf.dirent = dirent; @@ -121,7 +121,8 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd, if (buf.result) error = buf.result; - fput_light(file, fput_needed); + fput(file); +out: return error; } @@ -194,15 +195,16 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, struct file * file; struct linux_dirent __user * lastdirent; struct getdents_callback buf; - int fput_needed; int error; + error = -EFAULT; if (!access_ok(VERIFY_WRITE, dirent, count)) - return -EFAULT; + goto out; - file = fget_light(fd, &fput_needed); + error = -EBADF; + file = fget(fd); if (!file) - return -EBADF; + goto out; buf.current_dir = dirent; buf.previous = NULL; @@ -219,7 +221,8 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, else error = count - buf.count; } - fput_light(file, fput_needed); + fput(file); +out: return error; } @@ -275,15 +278,16 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, struct file * file; struct linux_dirent64 __user * lastdirent; struct getdents_callback64 buf; - int fput_needed; int error; + error = -EFAULT; if (!access_ok(VERIFY_WRITE, dirent, count)) - return -EFAULT; + goto out; - file = fget_light(fd, &fput_needed); + error = -EBADF; + file = fget(fd); if (!file) - return -EBADF; + goto out; buf.current_dir = dirent; buf.previous = NULL; @@ -301,6 +305,7 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, else error = count - buf.count; } - fput_light(file, fput_needed); + fput(file); +out: return error; } diff --git a/trunk/fs/reiserfs/inode.c b/trunk/fs/reiserfs/inode.c index a6d4268fb6c1..59d06871a850 100644 --- a/trunk/fs/reiserfs/inode.c +++ b/trunk/fs/reiserfs/inode.c @@ -1592,12 +1592,13 @@ struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, (fh_type == 6) ? fid->raw[5] : 0); } -int reiserfs_encode_fh(struct inode *inode, __u32 * data, int *lenp, - struct inode *parent) +int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, + int need_parent) { + struct inode *inode = dentry->d_inode; int maxlen = *lenp; - if (parent && (maxlen < 5)) { + if (need_parent && (maxlen < 5)) { *lenp = 5; return 255; } else if (maxlen < 3) { @@ -1609,15 +1610,20 @@ int reiserfs_encode_fh(struct inode *inode, __u32 * data, int *lenp, data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); data[2] = inode->i_generation; *lenp = 3; - if (parent) { - data[3] = parent->i_ino; - data[4] = le32_to_cpu(INODE_PKEY(parent)->k_dir_id); - *lenp = 5; - if (maxlen >= 6) { - data[5] = parent->i_generation; - *lenp = 6; - } - } + /* no room for directory info? return what we've stored so far */ + if (maxlen < 5 || !need_parent) + return 3; + + spin_lock(&dentry->d_lock); + inode = dentry->d_parent->d_inode; + data[3] = inode->i_ino; + data[4] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); + *lenp = 5; + if (maxlen >= 6) { + data[5] = inode->i_generation; + *lenp = 6; + } + spin_unlock(&dentry->d_lock); return *lenp; } diff --git a/trunk/fs/reiserfs/journal.c b/trunk/fs/reiserfs/journal.c index afcadcc03e8a..b1a08573fe14 100644 --- a/trunk/fs/reiserfs/journal.c +++ b/trunk/fs/reiserfs/journal.c @@ -1923,8 +1923,6 @@ static int do_journal_release(struct reiserfs_transaction_handle *th, * the workqueue job (flush_async_commit) needs this lock */ reiserfs_write_unlock(sb); - - cancel_delayed_work_sync(&REISERFS_SB(sb)->old_work); flush_workqueue(commit_wq); if (!reiserfs_mounted_fs_count) { @@ -3233,6 +3231,8 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th, th->t_trans_id, journal->j_trans_id); } + sb->s_dirt = 1; + prepared = test_clear_buffer_journal_prepared(bh); clear_buffer_journal_restore_dirty(bh); /* already in this transaction, we are done */ @@ -3316,7 +3316,6 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th, journal->j_first = cn; journal->j_last = cn; } - reiserfs_schedule_old_flush(sb); return 0; } @@ -3493,7 +3492,7 @@ static void flush_async_commits(struct work_struct *work) ** flushes any old transactions to disk ** ends the current transaction if it is too old */ -void reiserfs_flush_old_commits(struct super_block *sb) +int reiserfs_flush_old_commits(struct super_block *sb) { time_t now; struct reiserfs_transaction_handle th; @@ -3503,8 +3502,9 @@ void reiserfs_flush_old_commits(struct super_block *sb) /* safety check so we don't flush while we are replaying the log during * mount */ - if (list_empty(&journal->j_journal_list)) - return; + if (list_empty(&journal->j_journal_list)) { + return 0; + } /* check the current transaction. If there are no writers, and it is * too old, finish it, and force the commit blocks to disk @@ -3526,6 +3526,7 @@ void reiserfs_flush_old_commits(struct super_block *sb) do_journal_end(&th, sb, 1, COMMIT_NOW | WAIT); } } + return sb->s_dirt; } /* @@ -3954,7 +3955,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, ** it tells us if we should continue with the journal_end, or just return */ if (!check_journal_end(th, sb, nblocks, flags)) { - reiserfs_schedule_old_flush(sb); + sb->s_dirt = 1; wake_queued_writers(sb); reiserfs_async_progress_wait(sb); goto out; diff --git a/trunk/fs/reiserfs/reiserfs.h b/trunk/fs/reiserfs/reiserfs.h index 33215f57ea06..a59d27126338 100644 --- a/trunk/fs/reiserfs/reiserfs.h +++ b/trunk/fs/reiserfs/reiserfs.h @@ -480,11 +480,6 @@ struct reiserfs_sb_info { struct dentry *priv_root; /* root of /.reiserfs_priv */ struct dentry *xattr_root; /* root of /.reiserfs_priv/xattrs */ int j_errno; - - int work_queued; /* non-zero delayed work is queued */ - struct delayed_work old_work; /* old transactions flush delayed work */ - spinlock_t old_work_lock; /* protects old_work and work_queued */ - #ifdef CONFIG_QUOTA char *s_qf_names[MAXQUOTAS]; int s_jquota_fmt; @@ -2457,7 +2452,7 @@ struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *); int reiserfs_commit_page(struct inode *inode, struct page *page, unsigned from, unsigned to); -void reiserfs_flush_old_commits(struct super_block *); +int reiserfs_flush_old_commits(struct super_block *); int reiserfs_commit_for_inode(struct inode *); int reiserfs_inode_needs_commit(struct inode *); void reiserfs_update_inode_transaction(struct inode *); @@ -2492,7 +2487,6 @@ void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...); int reiserfs_allocate_list_bitmaps(struct super_block *s, struct reiserfs_list_bitmap *, unsigned int); -void reiserfs_schedule_old_flush(struct super_block *s); void add_save_link(struct reiserfs_transaction_handle *th, struct inode *inode, int truncate); int remove_save_link(struct inode *inode, int truncate); @@ -2617,8 +2611,8 @@ struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); -int reiserfs_encode_fh(struct inode *inode, __u32 * data, int *lenp, - struct inode *parent); +int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, + int connectable); int reiserfs_truncate_file(struct inode *, int update_timestamps); void make_cpu_key(struct cpu_key *cpu_key, struct inode *inode, loff_t offset, diff --git a/trunk/fs/reiserfs/resize.c b/trunk/fs/reiserfs/resize.c index 3ce02cff5e90..9a17f63c3fd7 100644 --- a/trunk/fs/reiserfs/resize.c +++ b/trunk/fs/reiserfs/resize.c @@ -200,6 +200,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) (bmap_nr_new - bmap_nr))); PUT_SB_BLOCK_COUNT(s, block_count_new); PUT_SB_BMAP_NR(s, bmap_would_wrap(bmap_nr_new) ? : bmap_nr_new); + s->s_dirt = 1; journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); diff --git a/trunk/fs/reiserfs/super.c b/trunk/fs/reiserfs/super.c index 651ce767b55d..c07b7d709447 100644 --- a/trunk/fs/reiserfs/super.c +++ b/trunk/fs/reiserfs/super.c @@ -72,58 +72,20 @@ static int reiserfs_sync_fs(struct super_block *s, int wait) if (!journal_begin(&th, s, 1)) if (!journal_end_sync(&th, s, 1)) reiserfs_flush_old_commits(s); + s->s_dirt = 0; /* Even if it's not true. + * We'll loop forever in sync_supers otherwise */ reiserfs_write_unlock(s); return 0; } -static void flush_old_commits(struct work_struct *work) +static void reiserfs_write_super(struct super_block *s) { - struct reiserfs_sb_info *sbi; - struct super_block *s; - - sbi = container_of(work, struct reiserfs_sb_info, old_work.work); - s = sbi->s_journal->j_work_sb; - - spin_lock(&sbi->old_work_lock); - sbi->work_queued = 0; - spin_unlock(&sbi->old_work_lock); - reiserfs_sync_fs(s, 1); } -void reiserfs_schedule_old_flush(struct super_block *s) -{ - struct reiserfs_sb_info *sbi = REISERFS_SB(s); - unsigned long delay; - - if (s->s_flags & MS_RDONLY) - return; - - spin_lock(&sbi->old_work_lock); - if (!sbi->work_queued) { - delay = msecs_to_jiffies(dirty_writeback_interval * 10); - queue_delayed_work(system_long_wq, &sbi->old_work, delay); - sbi->work_queued = 1; - } - spin_unlock(&sbi->old_work_lock); -} - -static void cancel_old_flush(struct super_block *s) -{ - struct reiserfs_sb_info *sbi = REISERFS_SB(s); - - cancel_delayed_work_sync(&REISERFS_SB(s)->old_work); - spin_lock(&sbi->old_work_lock); - sbi->work_queued = 0; - spin_unlock(&sbi->old_work_lock); -} - static int reiserfs_freeze(struct super_block *s) { struct reiserfs_transaction_handle th; - - cancel_old_flush(s); - reiserfs_write_lock(s); if (!(s->s_flags & MS_RDONLY)) { int err = journal_begin(&th, s, 1); @@ -137,6 +99,7 @@ static int reiserfs_freeze(struct super_block *s) journal_end_sync(&th, s, 1); } } + s->s_dirt = 0; reiserfs_write_unlock(s); return 0; } @@ -520,6 +483,9 @@ static void reiserfs_put_super(struct super_block *s) reiserfs_write_lock(s); + if (s->s_dirt) + reiserfs_write_super(s); + /* change file system state to current state if it was mounted with read-write permissions */ if (!(s->s_flags & MS_RDONLY)) { if (!journal_begin(&th, s, 10)) { @@ -726,6 +692,7 @@ static const struct super_operations reiserfs_sops = { .dirty_inode = reiserfs_dirty_inode, .evict_inode = reiserfs_evict_inode, .put_super = reiserfs_put_super, + .write_super = reiserfs_write_super, .sync_fs = reiserfs_sync_fs, .freeze_fs = reiserfs_freeze, .unfreeze_fs = reiserfs_unfreeze, @@ -1433,6 +1400,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) err = journal_end(&th, s, 10); if (err) goto out_err; + s->s_dirt = 0; if (!(*mount_flags & MS_RDONLY)) { dquot_resume(s, -1); @@ -1762,21 +1730,19 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) return -ENOMEM; s->s_fs_info = sbi; /* Set default values for options: non-aggressive tails, RO on errors */ - sbi->s_mount_opt |= (1 << REISERFS_SMALLTAIL); - sbi->s_mount_opt |= (1 << REISERFS_ERROR_RO); - sbi->s_mount_opt |= (1 << REISERFS_BARRIER_FLUSH); + REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_SMALLTAIL); + REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_ERROR_RO); + REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_BARRIER_FLUSH); /* no preallocation minimum, be smart in reiserfs_file_write instead */ - sbi->s_alloc_options.preallocmin = 0; + REISERFS_SB(s)->s_alloc_options.preallocmin = 0; /* Preallocate by 16 blocks (17-1) at once */ - sbi->s_alloc_options.preallocsize = 17; + REISERFS_SB(s)->s_alloc_options.preallocsize = 17; /* setup default block allocator options */ reiserfs_init_alloc_options(s); - spin_lock_init(&sbi->old_work_lock); - INIT_DELAYED_WORK(&sbi->old_work, flush_old_commits); - mutex_init(&sbi->lock); - sbi->lock_depth = -1; + mutex_init(&REISERFS_SB(s)->lock); + REISERFS_SB(s)->lock_depth = -1; jdev_name = NULL; if (reiserfs_parse_options @@ -1785,8 +1751,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) goto error_unlocked; } if (jdev_name && jdev_name[0]) { - sbi->s_jdev = kstrdup(jdev_name, GFP_KERNEL); - if (!sbi->s_jdev) { + REISERFS_SB(s)->s_jdev = kstrdup(jdev_name, GFP_KERNEL); + if (!REISERFS_SB(s)->s_jdev) { SWARN(silent, s, "", "Cannot allocate memory for " "journal device name"); goto error; @@ -1844,7 +1810,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) /* make data=ordered the default */ if (!reiserfs_data_log(s) && !reiserfs_data_ordered(s) && !reiserfs_data_writeback(s)) { - sbi->s_mount_opt |= (1 << REISERFS_DATA_ORDERED); + REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_DATA_ORDERED); } if (reiserfs_data_log(s)) { @@ -2037,8 +2003,6 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) reiserfs_write_unlock(s); } - cancel_delayed_work_sync(&REISERFS_SB(s)->old_work); - reiserfs_free_bitmap_cache(s); if (SB_BUFFER_WITH_SB(s)) brelse(SB_BUFFER_WITH_SB(s)); diff --git a/trunk/fs/select.c b/trunk/fs/select.c index 17d33d09fc16..bae321569dfa 100644 --- a/trunk/fs/select.c +++ b/trunk/fs/select.c @@ -614,7 +614,6 @@ SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp, return ret; } -#ifdef HAVE_SET_RESTORE_SIGMASK static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timespec __user *tsp, const sigset_t __user *sigmask, size_t sigsetsize) @@ -686,7 +685,6 @@ SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp, return do_pselect(n, inp, outp, exp, tsp, up, sigsetsize); } -#endif /* HAVE_SET_RESTORE_SIGMASK */ #ifdef __ARCH_WANT_SYS_OLD_SELECT struct sel_arg_struct { @@ -941,7 +939,6 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds, return ret; } -#ifdef HAVE_SET_RESTORE_SIGMASK SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, struct timespec __user *, tsp, const sigset_t __user *, sigmask, size_t, sigsetsize) @@ -992,4 +989,3 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, return ret; } -#endif /* HAVE_SET_RESTORE_SIGMASK */ diff --git a/trunk/fs/signalfd.c b/trunk/fs/signalfd.c index 9f35a37173de..7ae2a574cb25 100644 --- a/trunk/fs/signalfd.c +++ b/trunk/fs/signalfd.c @@ -269,13 +269,12 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, if (ufd < 0) kfree(ctx); } else { - int fput_needed; - struct file *file = fget_light(ufd, &fput_needed); + struct file *file = fget(ufd); if (!file) return -EBADF; ctx = file->private_data; if (file->f_op != &signalfd_fops) { - fput_light(file, fput_needed); + fput(file); return -EINVAL; } spin_lock_irq(¤t->sighand->siglock); @@ -283,7 +282,7 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, spin_unlock_irq(¤t->sighand->siglock); wake_up(¤t->sighand->signalfd_wqh); - fput_light(file, fput_needed); + fput(file); } return ufd; diff --git a/trunk/fs/splice.c b/trunk/fs/splice.c index c9f1318a3b82..406ef2b792c2 100644 --- a/trunk/fs/splice.c +++ b/trunk/fs/splice.c @@ -1003,10 +1003,8 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); ret = file_remove_suid(out); if (!ret) { - ret = file_update_time(out); - if (!ret) - ret = splice_from_pipe_feed(pipe, &sd, - pipe_to_file); + file_update_time(out); + ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file); } mutex_unlock(&inode->i_mutex); } while (ret > 0); diff --git a/trunk/fs/statfs.c b/trunk/fs/statfs.c index 95ad5c0e586c..43e6b6fe4e85 100644 --- a/trunk/fs/statfs.c +++ b/trunk/fs/statfs.c @@ -87,12 +87,11 @@ int user_statfs(const char __user *pathname, struct kstatfs *st) int fd_statfs(int fd, struct kstatfs *st) { - int fput_needed; - struct file *file = fget_light(fd, &fput_needed); + struct file *file = fget(fd); int error = -EBADF; if (file) { error = vfs_statfs(&file->f_path, st); - fput_light(file, fput_needed); + fput(file); } return error; } diff --git a/trunk/fs/sync.c b/trunk/fs/sync.c index 11e3d1c44901..0e8db939d96f 100644 --- a/trunk/fs/sync.c +++ b/trunk/fs/sync.c @@ -188,12 +188,11 @@ static int do_fsync(unsigned int fd, int datasync) { struct file *file; int ret = -EBADF; - int fput_needed; - file = fget_light(fd, &fput_needed); + file = fget(fd); if (file) { ret = vfs_fsync(file, datasync); - fput_light(file, fput_needed); + fput(file); } return ret; } diff --git a/trunk/fs/ubifs/dir.c b/trunk/fs/ubifs/dir.c index a6d42efc76d2..62a2727f4ecf 100644 --- a/trunk/fs/ubifs/dir.c +++ b/trunk/fs/ubifs/dir.c @@ -1127,7 +1127,16 @@ int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct ubifs_inode *ui = ubifs_inode(inode); mutex_lock(&ui->ui_mutex); - generic_fillattr(inode, stat); + stat->dev = inode->i_sb->s_dev; + stat->ino = inode->i_ino; + stat->mode = inode->i_mode; + stat->nlink = inode->i_nlink; + stat->uid = inode->i_uid; + stat->gid = inode->i_gid; + stat->rdev = inode->i_rdev; + stat->atime = inode->i_atime; + stat->mtime = inode->i_mtime; + stat->ctime = inode->i_ctime; stat->blksize = UBIFS_BLOCK_SIZE; stat->size = ui->ui_size; diff --git a/trunk/fs/udf/namei.c b/trunk/fs/udf/namei.c index 18024178ac4c..a165c66e3eef 100644 --- a/trunk/fs/udf/namei.c +++ b/trunk/fs/udf/namei.c @@ -1260,15 +1260,16 @@ static struct dentry *udf_fh_to_parent(struct super_block *sb, fid->udf.parent_partref, fid->udf.parent_generation); } -static int udf_encode_fh(struct inode *inode, __u32 *fh, int *lenp, - struct inode *parent) +static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp, + int connectable) { int len = *lenp; + struct inode *inode = de->d_inode; struct kernel_lb_addr location = UDF_I(inode)->i_location; struct fid *fid = (struct fid *)fh; int type = FILEID_UDF_WITHOUT_PARENT; - if (parent && (len < 5)) { + if (connectable && (len < 5)) { *lenp = 5; return 255; } else if (len < 3) { @@ -1281,11 +1282,14 @@ static int udf_encode_fh(struct inode *inode, __u32 *fh, int *lenp, fid->udf.partref = location.partitionReferenceNum; fid->udf.generation = inode->i_generation; - if (parent) { - location = UDF_I(parent)->i_location; + if (connectable && !S_ISDIR(inode->i_mode)) { + spin_lock(&de->d_lock); + inode = de->d_parent->d_inode; + location = UDF_I(inode)->i_location; fid->udf.parent_block = location.logicalBlockNum; fid->udf.parent_partref = location.partitionReferenceNum; fid->udf.parent_generation = inode->i_generation; + spin_unlock(&de->d_lock); *lenp = 5; type = FILEID_UDF_WITH_PARENT; } diff --git a/trunk/fs/utimes.c b/trunk/fs/utimes.c index fa4dbe451e27..ba653f3dc1bc 100644 --- a/trunk/fs/utimes.c +++ b/trunk/fs/utimes.c @@ -140,19 +140,18 @@ long do_utimes(int dfd, const char __user *filename, struct timespec *times, goto out; if (filename == NULL && dfd != AT_FDCWD) { - int fput_needed; struct file *file; if (flags & AT_SYMLINK_NOFOLLOW) goto out; - file = fget_light(dfd, &fput_needed); + file = fget(dfd); error = -EBADF; if (!file) goto out; error = utimes_common(&file->f_path, times); - fput_light(file, fput_needed); + fput(file); } else { struct path path; int lookup_flags = 0; diff --git a/trunk/fs/xattr.c b/trunk/fs/xattr.c index 1d7ac3790458..3c8c1cc333c7 100644 --- a/trunk/fs/xattr.c +++ b/trunk/fs/xattr.c @@ -399,12 +399,11 @@ SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname, SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, const void __user *,value, size_t, size, int, flags) { - int fput_needed; struct file *f; struct dentry *dentry; int error = -EBADF; - f = fget_light(fd, &fput_needed); + f = fget(fd); if (!f) return error; dentry = f->f_path.dentry; @@ -414,7 +413,7 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, error = setxattr(dentry, name, value, size, flags); mnt_drop_write_file(f); } - fput_light(f, fput_needed); + fput(f); return error; } @@ -487,16 +486,15 @@ SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname, SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name, void __user *, value, size_t, size) { - int fput_needed; struct file *f; ssize_t error = -EBADF; - f = fget_light(fd, &fput_needed); + f = fget(fd); if (!f) return error; audit_inode(NULL, f->f_path.dentry); error = getxattr(f->f_path.dentry, name, value, size); - fput_light(f, fput_needed); + fput(f); return error; } @@ -568,16 +566,15 @@ SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list, SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size) { - int fput_needed; struct file *f; ssize_t error = -EBADF; - f = fget_light(fd, &fput_needed); + f = fget(fd); if (!f) return error; audit_inode(NULL, f->f_path.dentry); error = listxattr(f->f_path.dentry, list, size); - fput_light(f, fput_needed); + fput(f); return error; } @@ -637,12 +634,11 @@ SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname, SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) { - int fput_needed; struct file *f; struct dentry *dentry; int error = -EBADF; - f = fget_light(fd, &fput_needed); + f = fget(fd); if (!f) return error; dentry = f->f_path.dentry; @@ -652,7 +648,7 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) error = removexattr(dentry, name); mnt_drop_write_file(f); } - fput_light(f, fput_needed); + fput(f); return error; } diff --git a/trunk/fs/xfs/kmem.c b/trunk/fs/xfs/kmem.c index 4a7286c1dc80..a907de565db3 100644 --- a/trunk/fs/xfs/kmem.c +++ b/trunk/fs/xfs/kmem.c @@ -46,7 +46,7 @@ kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize) } void * -kmem_alloc(size_t size, xfs_km_flags_t flags) +kmem_alloc(size_t size, unsigned int __nocast flags) { int retries = 0; gfp_t lflags = kmem_flags_convert(flags); @@ -65,7 +65,7 @@ kmem_alloc(size_t size, xfs_km_flags_t flags) } void * -kmem_zalloc(size_t size, xfs_km_flags_t flags) +kmem_zalloc(size_t size, unsigned int __nocast flags) { void *ptr; @@ -87,7 +87,7 @@ kmem_free(const void *ptr) void * kmem_realloc(const void *ptr, size_t newsize, size_t oldsize, - xfs_km_flags_t flags) + unsigned int __nocast flags) { void *new; @@ -102,7 +102,7 @@ kmem_realloc(const void *ptr, size_t newsize, size_t oldsize, } void * -kmem_zone_alloc(kmem_zone_t *zone, xfs_km_flags_t flags) +kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) { int retries = 0; gfp_t lflags = kmem_flags_convert(flags); @@ -121,7 +121,7 @@ kmem_zone_alloc(kmem_zone_t *zone, xfs_km_flags_t flags) } void * -kmem_zone_zalloc(kmem_zone_t *zone, xfs_km_flags_t flags) +kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags) { void *ptr; diff --git a/trunk/fs/xfs/kmem.h b/trunk/fs/xfs/kmem.h index b2f2620f9a87..ab7c53fe346e 100644 --- a/trunk/fs/xfs/kmem.h +++ b/trunk/fs/xfs/kmem.h @@ -27,11 +27,10 @@ * General memory allocation interfaces */ -typedef unsigned __bitwise xfs_km_flags_t; -#define KM_SLEEP ((__force xfs_km_flags_t)0x0001u) -#define KM_NOSLEEP ((__force xfs_km_flags_t)0x0002u) -#define KM_NOFS ((__force xfs_km_flags_t)0x0004u) -#define KM_MAYFAIL ((__force xfs_km_flags_t)0x0008u) +#define KM_SLEEP 0x0001u +#define KM_NOSLEEP 0x0002u +#define KM_NOFS 0x0004u +#define KM_MAYFAIL 0x0008u /* * We use a special process flag to avoid recursive callbacks into @@ -39,7 +38,7 @@ typedef unsigned __bitwise xfs_km_flags_t; * warnings, so we explicitly skip any generic ones (silly of us). */ static inline gfp_t -kmem_flags_convert(xfs_km_flags_t flags) +kmem_flags_convert(unsigned int __nocast flags) { gfp_t lflags; @@ -55,9 +54,9 @@ kmem_flags_convert(xfs_km_flags_t flags) return lflags; } -extern void *kmem_alloc(size_t, xfs_km_flags_t); -extern void *kmem_zalloc(size_t, xfs_km_flags_t); -extern void *kmem_realloc(const void *, size_t, size_t, xfs_km_flags_t); +extern void *kmem_alloc(size_t, unsigned int __nocast); +extern void *kmem_zalloc(size_t, unsigned int __nocast); +extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast); extern void kmem_free(const void *); static inline void *kmem_zalloc_large(size_t size) @@ -108,7 +107,7 @@ kmem_zone_destroy(kmem_zone_t *zone) kmem_cache_destroy(zone); } -extern void *kmem_zone_alloc(kmem_zone_t *, xfs_km_flags_t); -extern void *kmem_zone_zalloc(kmem_zone_t *, xfs_km_flags_t); +extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); +extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); #endif /* __XFS_SUPPORT_KMEM_H__ */ diff --git a/trunk/fs/xfs/xfs_export.c b/trunk/fs/xfs/xfs_export.c index 42679223a0fd..2d25d19c4ea1 100644 --- a/trunk/fs/xfs/xfs_export.c +++ b/trunk/fs/xfs/xfs_export.c @@ -52,18 +52,19 @@ static int xfs_fileid_length(int fileid_type) STATIC int xfs_fs_encode_fh( - struct inode *inode, - __u32 *fh, - int *max_len, - struct inode *parent) + struct dentry *dentry, + __u32 *fh, + int *max_len, + int connectable) { struct fid *fid = (struct fid *)fh; struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fh; + struct inode *inode = dentry->d_inode; int fileid_type; int len; /* Directories don't need their parent encoded, they have ".." */ - if (!parent) + if (S_ISDIR(inode->i_mode) || !connectable) fileid_type = FILEID_INO32_GEN; else fileid_type = FILEID_INO32_GEN_PARENT; @@ -95,16 +96,20 @@ xfs_fs_encode_fh( switch (fileid_type) { case FILEID_INO32_GEN_PARENT: - fid->i32.parent_ino = XFS_I(parent)->i_ino; - fid->i32.parent_gen = parent->i_generation; + spin_lock(&dentry->d_lock); + fid->i32.parent_ino = XFS_I(dentry->d_parent->d_inode)->i_ino; + fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation; + spin_unlock(&dentry->d_lock); /*FALLTHRU*/ case FILEID_INO32_GEN: fid->i32.ino = XFS_I(inode)->i_ino; fid->i32.gen = inode->i_generation; break; case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: - fid64->parent_ino = XFS_I(parent)->i_ino; - fid64->parent_gen = parent->i_generation; + spin_lock(&dentry->d_lock); + fid64->parent_ino = XFS_I(dentry->d_parent->d_inode)->i_ino; + fid64->parent_gen = dentry->d_parent->d_inode->i_generation; + spin_unlock(&dentry->d_lock); /*FALLTHRU*/ case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: fid64->ino = XFS_I(inode)->i_ino; diff --git a/trunk/fs/xfs/xfs_file.c b/trunk/fs/xfs/xfs_file.c index 9f7ec15a6522..8d214b87f6bb 100644 --- a/trunk/fs/xfs/xfs_file.c +++ b/trunk/fs/xfs/xfs_file.c @@ -586,11 +586,8 @@ xfs_file_aio_write_checks( * lock above. Eventually we should look into a way to avoid * the pointless lock roundtrip. */ - if (likely(!(file->f_mode & FMODE_NOCMTIME))) { - error = file_update_time(file); - if (error) - return error; - } + if (likely(!(file->f_mode & FMODE_NOCMTIME))) + file_update_time(file); /* * If we're writing the file then make sure to clear the setuid and diff --git a/trunk/fs/xfs/xfs_log.c b/trunk/fs/xfs/xfs_log.c index f30d9807dc48..6b965bf450e4 100644 --- a/trunk/fs/xfs/xfs_log.c +++ b/trunk/fs/xfs/xfs_log.c @@ -3152,7 +3152,7 @@ xlog_ticket_alloc( int cnt, char client, bool permanent, - xfs_km_flags_t alloc_flags) + int alloc_flags) { struct xlog_ticket *tic; uint num_headers; diff --git a/trunk/fs/xfs/xfs_log_priv.h b/trunk/fs/xfs/xfs_log_priv.h index 5bc33261f5be..735ff1ee53da 100644 --- a/trunk/fs/xfs/xfs_log_priv.h +++ b/trunk/fs/xfs/xfs_log_priv.h @@ -555,7 +555,7 @@ extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); extern kmem_zone_t *xfs_log_ticket_zone; struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes, int count, char client, bool permanent, - xfs_km_flags_t alloc_flags); + int alloc_flags); static inline void diff --git a/trunk/fs/xfs/xfs_trans.c b/trunk/fs/xfs/xfs_trans.c index fdf324508c5e..cdf896fcbfa4 100644 --- a/trunk/fs/xfs/xfs_trans.c +++ b/trunk/fs/xfs/xfs_trans.c @@ -584,7 +584,7 @@ xfs_trans_t * _xfs_trans_alloc( xfs_mount_t *mp, uint type, - xfs_km_flags_t memflags) + uint memflags) { xfs_trans_t *tp; diff --git a/trunk/fs/xfs/xfs_trans.h b/trunk/fs/xfs/xfs_trans.h index 7c37b533aa8e..7ab99e1898c8 100644 --- a/trunk/fs/xfs/xfs_trans.h +++ b/trunk/fs/xfs/xfs_trans.h @@ -443,7 +443,7 @@ typedef struct xfs_trans { * XFS transaction mechanism exported interfaces. */ xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint); -xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, xfs_km_flags_t); +xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, uint); xfs_trans_t *xfs_trans_dup(xfs_trans_t *); int xfs_trans_reserve(xfs_trans_t *, uint, uint, uint, uint, uint); diff --git a/trunk/include/asm-generic/posix_types.h b/trunk/include/asm-generic/posix_types.h index fe74fccf18db..91d44bd4dde3 100644 --- a/trunk/include/asm-generic/posix_types.h +++ b/trunk/include/asm-generic/posix_types.h @@ -23,6 +23,10 @@ typedef __kernel_ulong_t __kernel_ino_t; typedef unsigned int __kernel_mode_t; #endif +#ifndef __kernel_nlink_t +typedef __kernel_ulong_t __kernel_nlink_t; +#endif + #ifndef __kernel_pid_t typedef int __kernel_pid_t; #endif diff --git a/trunk/include/linux/errno.h b/trunk/include/linux/errno.h index e0de516374da..2d09bfa5c262 100644 --- a/trunk/include/linux/errno.h +++ b/trunk/include/linux/errno.h @@ -17,7 +17,6 @@ #define ENOIOCTLCMD 515 /* No ioctl command */ #define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */ #define EPROBE_DEFER 517 /* Driver requests probe retry */ -#define EOPENSTALE 518 /* open found a stale dentry */ /* Defined for the NFSv3 protocol */ #define EBADHANDLE 521 /* Illegal NFS file handle */ diff --git a/trunk/include/linux/exportfs.h b/trunk/include/linux/exportfs.h index 12291a7ee275..3a4cef5322dc 100644 --- a/trunk/include/linux/exportfs.h +++ b/trunk/include/linux/exportfs.h @@ -165,8 +165,8 @@ struct fid { */ struct export_operations { - int (*encode_fh)(struct inode *inode, __u32 *fh, int *max_len, - struct inode *parent); + int (*encode_fh)(struct dentry *de, __u32 *fh, int *max_len, + int connectable); struct dentry * (*fh_to_dentry)(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); struct dentry * (*fh_to_parent)(struct super_block *sb, struct fid *fid, diff --git a/trunk/include/linux/fs.h b/trunk/include/linux/fs.h index 51978ed43e97..40887afaaca7 100644 --- a/trunk/include/linux/fs.h +++ b/trunk/include/linux/fs.h @@ -1692,7 +1692,6 @@ struct inode_operations { int (*removexattr) (struct dentry *, const char *); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); - int (*update_time)(struct inode *, struct timespec *, int); } ____cacheline_aligned; struct seq_file; @@ -1851,13 +1850,6 @@ static inline void inode_inc_iversion(struct inode *inode) spin_unlock(&inode->i_lock); } -enum file_time_flags { - S_ATIME = 1, - S_MTIME = 2, - S_CTIME = 4, - S_VERSION = 8, -}; - extern void touch_atime(struct path *); static inline void file_accessed(struct file *file) { @@ -2591,7 +2583,7 @@ extern int inode_change_ok(const struct inode *, struct iattr *); extern int inode_newsize_ok(const struct inode *, loff_t offset); extern void setattr_copy(struct inode *inode, const struct iattr *attr); -extern int file_update_time(struct file *file); +extern void file_update_time(struct file *file); extern int generic_show_options(struct seq_file *m, struct dentry *root); extern void save_mount_options(struct super_block *sb, char *options); diff --git a/trunk/include/linux/fsnotify_backend.h b/trunk/include/linux/fsnotify_backend.h index 63d966d5c2ea..91d0e0a34ef3 100644 --- a/trunk/include/linux/fsnotify_backend.h +++ b/trunk/include/linux/fsnotify_backend.h @@ -60,7 +60,7 @@ #define FS_EVENTS_POSS_ON_CHILD (FS_ACCESS | FS_MODIFY | FS_ATTRIB |\ FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN |\ FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\ - FS_DELETE | FS_OPEN_PERM | FS_ACCESS_PERM) + FS_DELETE) #define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO) diff --git a/trunk/include/linux/jbd2.h b/trunk/include/linux/jbd2.h index f334c7fab967..912c30a8ddb1 100644 --- a/trunk/include/linux/jbd2.h +++ b/trunk/include/linux/jbd2.h @@ -31,7 +31,6 @@ #include #include #include -#include #endif #define journal_oom_retry 1 @@ -148,24 +147,12 @@ typedef struct journal_header_s #define JBD2_CRC32_CHKSUM 1 #define JBD2_MD5_CHKSUM 2 #define JBD2_SHA1_CHKSUM 3 -#define JBD2_CRC32C_CHKSUM 4 #define JBD2_CRC32_CHKSUM_SIZE 4 #define JBD2_CHECKSUM_BYTES (32 / sizeof(u32)) /* * Commit block header for storing transactional checksums: - * - * NOTE: If FEATURE_COMPAT_CHECKSUM (checksum v1) is set, the h_chksum* - * fields are used to store a checksum of the descriptor and data blocks. - * - * If FEATURE_INCOMPAT_CSUM_V2 (checksum v2) is set, then the h_chksum - * field is used to store crc32c(uuid+commit_block). Each journal metadata - * block gets its own checksum, and data block checksums are stored in - * journal_block_tag (in the descriptor). The other h_chksum* fields are - * not used. - * - * Checksum v1 and v2 are mutually exclusive features. */ struct commit_header { __be32 h_magic; @@ -188,19 +175,13 @@ struct commit_header { typedef struct journal_block_tag_s { __be32 t_blocknr; /* The on-disk block number */ - __be16 t_checksum; /* truncated crc32c(uuid+seq+block) */ - __be16 t_flags; /* See below */ + __be32 t_flags; /* See below */ __be32 t_blocknr_high; /* most-significant high 32bits. */ } journal_block_tag_t; #define JBD2_TAG_SIZE32 (offsetof(journal_block_tag_t, t_blocknr_high)) #define JBD2_TAG_SIZE64 (sizeof(journal_block_tag_t)) -/* Tail of descriptor block, for checksumming */ -struct jbd2_journal_block_tail { - __be32 t_checksum; /* crc32c(uuid+descr_block) */ -}; - /* * The revoke descriptor: used on disk to describe a series of blocks to * be revoked from the log @@ -211,10 +192,6 @@ typedef struct jbd2_journal_revoke_header_s __be32 r_count; /* Count of bytes used in the block */ } jbd2_journal_revoke_header_t; -/* Tail of revoke block, for checksumming */ -struct jbd2_journal_revoke_tail { - __be32 r_checksum; /* crc32c(uuid+revoke_block) */ -}; /* Definitions for the journal tag flags word: */ #define JBD2_FLAG_ESCAPE 1 /* on-disk block is escaped */ @@ -264,10 +241,7 @@ typedef struct journal_superblock_s __be32 s_max_trans_data; /* Limit of data blocks per trans. */ /* 0x0050 */ - __u8 s_checksum_type; /* checksum type */ - __u8 s_padding2[3]; - __u32 s_padding[42]; - __be32 s_checksum; /* crc32c(superblock) */ + __u32 s_padding[44]; /* 0x0100 */ __u8 s_users[16*48]; /* ids of all fs'es sharing the log */ @@ -289,15 +263,13 @@ typedef struct journal_superblock_s #define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001 #define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002 #define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004 -#define JBD2_FEATURE_INCOMPAT_CSUM_V2 0x00000008 /* Features known to this kernel version: */ #define JBD2_KNOWN_COMPAT_FEATURES JBD2_FEATURE_COMPAT_CHECKSUM #define JBD2_KNOWN_ROCOMPAT_FEATURES 0 #define JBD2_KNOWN_INCOMPAT_FEATURES (JBD2_FEATURE_INCOMPAT_REVOKE | \ JBD2_FEATURE_INCOMPAT_64BIT | \ - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | \ - JBD2_FEATURE_INCOMPAT_CSUM_V2) + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) #ifdef __KERNEL__ @@ -967,12 +939,6 @@ struct journal_s * superblock pointer here */ void *j_private; - - /* Reference to checksum algorithm driver via cryptoapi */ - struct crypto_shash *j_chksum_driver; - - /* Precomputed journal UUID checksum for seeding other checksums */ - __u32 j_csum_seed; }; /* @@ -1302,25 +1268,6 @@ static inline int jbd_space_needed(journal_t *journal) extern int jbd_blocks_per_page(struct inode *inode); -static inline u32 jbd2_chksum(journal_t *journal, u32 crc, - const void *address, unsigned int length) -{ - struct { - struct shash_desc shash; - char ctx[crypto_shash_descsize(journal->j_chksum_driver)]; - } desc; - int err; - - desc.shash.tfm = journal->j_chksum_driver; - desc.shash.flags = 0; - *(u32 *)desc.ctx = crc; - - err = crypto_shash_update(&desc.shash, address, length); - BUG_ON(err); - - return *(u32 *)desc.ctx; -} - #ifdef __KERNEL__ #define buffer_trace_init(bh) do {} while (0) diff --git a/trunk/include/linux/jbd_common.h b/trunk/include/linux/jbd_common.h index 6133679bc4c0..6230f8556a4e 100644 --- a/trunk/include/linux/jbd_common.h +++ b/trunk/include/linux/jbd_common.h @@ -12,7 +12,6 @@ enum jbd_state_bits { BH_State, /* Pins most journal_head state */ BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */ - BH_Verified, /* Metadata block has been verified ok */ BH_JBDPrivateStart, /* First bit available for private use by FS */ }; @@ -25,7 +24,6 @@ TAS_BUFFER_FNS(Revoked, revoked) BUFFER_FNS(RevokeValid, revokevalid) TAS_BUFFER_FNS(RevokeValid, revokevalid) BUFFER_FNS(Freed, freed) -BUFFER_FNS(Verified, verified) static inline struct buffer_head *jh2bh(struct journal_head *jh) { diff --git a/trunk/include/linux/lglock.h b/trunk/include/linux/lglock.h index f01e5f6d1f07..87f402ccec55 100644 --- a/trunk/include/linux/lglock.h +++ b/trunk/include/linux/lglock.h @@ -23,17 +23,28 @@ #include #include #include -#include /* can make br locks by using local lock for read side, global lock for write */ -#define br_lock_init(name) lg_lock_init(name, #name) -#define br_read_lock(name) lg_local_lock(name) -#define br_read_unlock(name) lg_local_unlock(name) -#define br_write_lock(name) lg_global_lock(name) -#define br_write_unlock(name) lg_global_unlock(name) +#define br_lock_init(name) name##_lock_init() +#define br_read_lock(name) name##_local_lock() +#define br_read_unlock(name) name##_local_unlock() +#define br_write_lock(name) name##_global_lock_online() +#define br_write_unlock(name) name##_global_unlock_online() +#define DECLARE_BRLOCK(name) DECLARE_LGLOCK(name) #define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name) + +#define lg_lock_init(name) name##_lock_init() +#define lg_local_lock(name) name##_local_lock() +#define lg_local_unlock(name) name##_local_unlock() +#define lg_local_lock_cpu(name, cpu) name##_local_lock_cpu(cpu) +#define lg_local_unlock_cpu(name, cpu) name##_local_unlock_cpu(cpu) +#define lg_global_lock(name) name##_global_lock() +#define lg_global_unlock(name) name##_global_unlock() +#define lg_global_lock_online(name) name##_global_lock_online() +#define lg_global_unlock_online(name) name##_global_unlock_online() + #ifdef CONFIG_DEBUG_LOCK_ALLOC #define LOCKDEP_INIT_MAP lockdep_init_map @@ -48,26 +59,142 @@ #define DEFINE_LGLOCK_LOCKDEP(name) #endif -struct lglock { - arch_spinlock_t __percpu *lock; -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lock_class_key lock_key; - struct lockdep_map lock_dep_map; -#endif -}; -#define DEFINE_LGLOCK(name) \ - DEFINE_LGLOCK_LOCKDEP(name); \ - DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \ - = __ARCH_SPIN_LOCK_UNLOCKED; \ - struct lglock name = { .lock = &name ## _lock } - -void lg_lock_init(struct lglock *lg, char *name); -void lg_local_lock(struct lglock *lg); -void lg_local_unlock(struct lglock *lg); -void lg_local_lock_cpu(struct lglock *lg, int cpu); -void lg_local_unlock_cpu(struct lglock *lg, int cpu); -void lg_global_lock(struct lglock *lg); -void lg_global_unlock(struct lglock *lg); +#define DECLARE_LGLOCK(name) \ + extern void name##_lock_init(void); \ + extern void name##_local_lock(void); \ + extern void name##_local_unlock(void); \ + extern void name##_local_lock_cpu(int cpu); \ + extern void name##_local_unlock_cpu(int cpu); \ + extern void name##_global_lock(void); \ + extern void name##_global_unlock(void); \ + extern void name##_global_lock_online(void); \ + extern void name##_global_unlock_online(void); \ +#define DEFINE_LGLOCK(name) \ + \ + DEFINE_SPINLOCK(name##_cpu_lock); \ + cpumask_t name##_cpus __read_mostly; \ + DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ + DEFINE_LGLOCK_LOCKDEP(name); \ + \ + static int \ + name##_lg_cpu_callback(struct notifier_block *nb, \ + unsigned long action, void *hcpu) \ + { \ + switch (action & ~CPU_TASKS_FROZEN) { \ + case CPU_UP_PREPARE: \ + spin_lock(&name##_cpu_lock); \ + cpu_set((unsigned long)hcpu, name##_cpus); \ + spin_unlock(&name##_cpu_lock); \ + break; \ + case CPU_UP_CANCELED: case CPU_DEAD: \ + spin_lock(&name##_cpu_lock); \ + cpu_clear((unsigned long)hcpu, name##_cpus); \ + spin_unlock(&name##_cpu_lock); \ + } \ + return NOTIFY_OK; \ + } \ + static struct notifier_block name##_lg_cpu_notifier = { \ + .notifier_call = name##_lg_cpu_callback, \ + }; \ + void name##_lock_init(void) { \ + int i; \ + LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ + for_each_possible_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ + } \ + register_hotcpu_notifier(&name##_lg_cpu_notifier); \ + get_online_cpus(); \ + for_each_online_cpu(i) \ + cpu_set(i, name##_cpus); \ + put_online_cpus(); \ + } \ + EXPORT_SYMBOL(name##_lock_init); \ + \ + void name##_local_lock(void) { \ + arch_spinlock_t *lock; \ + preempt_disable(); \ + rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ + lock = &__get_cpu_var(name##_lock); \ + arch_spin_lock(lock); \ + } \ + EXPORT_SYMBOL(name##_local_lock); \ + \ + void name##_local_unlock(void) { \ + arch_spinlock_t *lock; \ + rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ + lock = &__get_cpu_var(name##_lock); \ + arch_spin_unlock(lock); \ + preempt_enable(); \ + } \ + EXPORT_SYMBOL(name##_local_unlock); \ + \ + void name##_local_lock_cpu(int cpu) { \ + arch_spinlock_t *lock; \ + preempt_disable(); \ + rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ + lock = &per_cpu(name##_lock, cpu); \ + arch_spin_lock(lock); \ + } \ + EXPORT_SYMBOL(name##_local_lock_cpu); \ + \ + void name##_local_unlock_cpu(int cpu) { \ + arch_spinlock_t *lock; \ + rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ + lock = &per_cpu(name##_lock, cpu); \ + arch_spin_unlock(lock); \ + preempt_enable(); \ + } \ + EXPORT_SYMBOL(name##_local_unlock_cpu); \ + \ + void name##_global_lock_online(void) { \ + int i; \ + spin_lock(&name##_cpu_lock); \ + rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ + for_each_cpu(i, &name##_cpus) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + arch_spin_lock(lock); \ + } \ + } \ + EXPORT_SYMBOL(name##_global_lock_online); \ + \ + void name##_global_unlock_online(void) { \ + int i; \ + rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ + for_each_cpu(i, &name##_cpus) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + arch_spin_unlock(lock); \ + } \ + spin_unlock(&name##_cpu_lock); \ + } \ + EXPORT_SYMBOL(name##_global_unlock_online); \ + \ + void name##_global_lock(void) { \ + int i; \ + preempt_disable(); \ + rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ + for_each_possible_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + arch_spin_lock(lock); \ + } \ + } \ + EXPORT_SYMBOL(name##_global_lock); \ + \ + void name##_global_unlock(void) { \ + int i; \ + rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ + for_each_possible_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + arch_spin_unlock(lock); \ + } \ + preempt_enable(); \ + } \ + EXPORT_SYMBOL(name##_global_unlock); #endif diff --git a/trunk/include/linux/mm.h b/trunk/include/linux/mm.h index b36d08ce5c57..ce26716238c3 100644 --- a/trunk/include/linux/mm.h +++ b/trunk/include/linux/mm.h @@ -1392,7 +1392,7 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo extern unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long len, unsigned long flags, vm_flags_t vm_flags, unsigned long pgoff); -extern unsigned long do_mmap_pgoff(struct file *, unsigned long, +extern unsigned long do_mmap(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); extern int do_munmap(struct mm_struct *, unsigned long, size_t); diff --git a/trunk/include/linux/security.h b/trunk/include/linux/security.h index 4e5a73cdbbef..ab0e091ce5fa 100644 --- a/trunk/include/linux/security.h +++ b/trunk/include/linux/security.h @@ -86,9 +86,9 @@ extern int cap_inode_setxattr(struct dentry *dentry, const char *name, extern int cap_inode_removexattr(struct dentry *dentry, const char *name); extern int cap_inode_need_killpriv(struct dentry *dentry); extern int cap_inode_killpriv(struct dentry *dentry); -extern int cap_mmap_addr(unsigned long addr); -extern int cap_mmap_file(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags); +extern int cap_file_mmap(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags, + unsigned long addr, unsigned long addr_only); extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags); extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5); @@ -586,17 +586,15 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * simple integer value. When @arg represents a user space pointer, it * should never be used by the security module. * Return 0 if permission is granted. - * @mmap_addr : - * Check permissions for a mmap operation at @addr. - * @addr contains virtual address that will be used for the operation. - * Return 0 if permission is granted. - * @mmap_file : + * @file_mmap : * Check permissions for a mmap operation. The @file may be NULL, e.g. * if mapping anonymous memory. * @file contains the file structure for file to map (may be NULL). * @reqprot contains the protection requested by the application. * @prot contains the protection that will be applied by the kernel. * @flags contains the operational flags. + * @addr contains virtual address that will be used for the operation. + * @addr_only contains a boolean: 0 if file-backed VMA, otherwise 1. * Return 0 if permission is granted. * @file_mprotect: * Check permissions before changing memory access permissions. @@ -1483,10 +1481,10 @@ struct security_operations { void (*file_free_security) (struct file *file); int (*file_ioctl) (struct file *file, unsigned int cmd, unsigned long arg); - int (*mmap_addr) (unsigned long addr); - int (*mmap_file) (struct file *file, + int (*file_mmap) (struct file *file, unsigned long reqprot, unsigned long prot, - unsigned long flags); + unsigned long flags, unsigned long addr, + unsigned long addr_only); int (*file_mprotect) (struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot); @@ -1745,9 +1743,9 @@ int security_file_permission(struct file *file, int mask); int security_file_alloc(struct file *file); void security_file_free(struct file *file); int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg); -int security_mmap_file(struct file *file, unsigned long prot, - unsigned long flags); -int security_mmap_addr(unsigned long addr); +int security_file_mmap(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags, + unsigned long addr, unsigned long addr_only); int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot); int security_file_lock(struct file *file, unsigned int cmd); @@ -2183,15 +2181,13 @@ static inline int security_file_ioctl(struct file *file, unsigned int cmd, return 0; } -static inline int security_mmap_file(struct file *file, unsigned long prot, - unsigned long flags) -{ - return 0; -} - -static inline int security_mmap_addr(unsigned long addr) +static inline int security_file_mmap(struct file *file, unsigned long reqprot, + unsigned long prot, + unsigned long flags, + unsigned long addr, + unsigned long addr_only) { - return cap_mmap_addr(addr); + return cap_file_mmap(file, reqprot, prot, flags, addr, addr_only); } static inline int security_file_mprotect(struct vm_area_struct *vma, diff --git a/trunk/include/linux/sunrpc/svc.h b/trunk/include/linux/sunrpc/svc.h index 40e0a273faea..51b29ac45a8e 100644 --- a/trunk/include/linux/sunrpc/svc.h +++ b/trunk/include/linux/sunrpc/svc.h @@ -232,6 +232,7 @@ struct svc_rqst { struct svc_pool * rq_pool; /* thread pool */ struct svc_procedure * rq_procinfo; /* procedure info */ struct auth_ops * rq_authop; /* authentication flavour */ + u32 rq_flavor; /* pseudoflavor */ struct svc_cred rq_cred; /* auth info */ void * rq_xprt_ctxt; /* transport specific context ptr */ struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */ @@ -415,7 +416,6 @@ struct svc_procedure { */ int svc_rpcb_setup(struct svc_serv *serv, struct net *net); void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); -int svc_bind(struct svc_serv *serv, struct net *net); struct svc_serv *svc_create(struct svc_program *, unsigned int, void (*shutdown)(struct svc_serv *, struct net *net)); struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, diff --git a/trunk/include/linux/sunrpc/svcauth.h b/trunk/include/linux/sunrpc/svcauth.h index dd74084a9799..2c54683b91de 100644 --- a/trunk/include/linux/sunrpc/svcauth.h +++ b/trunk/include/linux/sunrpc/svcauth.h @@ -15,23 +15,13 @@ #include #include #include -#include struct svc_cred { uid_t cr_uid; gid_t cr_gid; struct group_info *cr_group_info; - u32 cr_flavor; /* pseudoflavor */ - char *cr_principal; /* for gss */ }; -static inline void free_svc_cred(struct svc_cred *cred) -{ - if (cred->cr_group_info) - put_group_info(cred->cr_group_info); - kfree(cred->cr_principal); -} - struct svc_rqst; /* forward decl */ struct in6_addr; diff --git a/trunk/include/linux/sunrpc/svcauth_gss.h b/trunk/include/linux/sunrpc/svcauth_gss.h index 726aff1a5201..7c32daa025eb 100644 --- a/trunk/include/linux/sunrpc/svcauth_gss.h +++ b/trunk/include/linux/sunrpc/svcauth_gss.h @@ -22,6 +22,7 @@ int gss_svc_init_net(struct net *net); void gss_svc_shutdown_net(struct net *net); int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name); u32 svcauth_gss_flavor(struct auth_domain *dom); +char *svc_gss_principal(struct svc_rqst *); #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_SVCAUTH_GSS_H */ diff --git a/trunk/include/linux/thread_info.h b/trunk/include/linux/thread_info.h index db78775eff3b..eee729428683 100644 --- a/trunk/include/linux/thread_info.h +++ b/trunk/include/linux/thread_info.h @@ -129,6 +129,10 @@ static inline void set_restore_sigmask(void) } #endif /* TIF_RESTORE_SIGMASK && !HAVE_SET_RESTORE_SIGMASK */ +#ifndef HAVE_SET_RESTORE_SIGMASK +#error "no set_restore_sigmask() provided and default one won't work" +#endif + #endif /* __KERNEL__ */ #endif /* _LINUX_THREAD_INFO_H */ diff --git a/trunk/include/linux/types.h b/trunk/include/linux/types.h index 9c1bd539ea70..7f480db60231 100644 --- a/trunk/include/linux/types.h +++ b/trunk/include/linux/types.h @@ -25,7 +25,7 @@ typedef __kernel_dev_t dev_t; typedef __kernel_ino_t ino_t; typedef __kernel_mode_t mode_t; typedef unsigned short umode_t; -typedef __u32 nlink_t; +typedef __kernel_nlink_t nlink_t; typedef __kernel_off_t off_t; typedef __kernel_pid_t pid_t; typedef __kernel_daddr_t daddr_t; diff --git a/trunk/ipc/shm.c b/trunk/ipc/shm.c index 5e2cbfdab6fc..406c5b208193 100644 --- a/trunk/ipc/shm.c +++ b/trunk/ipc/shm.c @@ -1036,10 +1036,6 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) sfd->file = shp->shm_file; sfd->vm_ops = NULL; - err = security_mmap_file(file, prot, flags); - if (err) - goto out_fput; - down_write(¤t->mm->mmap_sem); if (addr && !(shmflg & SHM_REMAP)) { err = -EINVAL; @@ -1054,7 +1050,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) goto invalid; } - user_addr = do_mmap_pgoff(file, addr, size, prot, flags, 0); + user_addr = do_mmap (file, addr, size, prot, flags, 0); *raddr = user_addr; err = 0; if (IS_ERR_VALUE(user_addr)) @@ -1062,7 +1058,6 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) invalid: up_write(¤t->mm->mmap_sem); -out_fput: fput(file); out_nattch: diff --git a/trunk/kernel/Makefile b/trunk/kernel/Makefile index c0cc67ad764c..6f3d0ae044b2 100644 --- a/trunk/kernel/Makefile +++ b/trunk/kernel/Makefile @@ -10,7 +10,7 @@ obj-y = fork.o exec_domain.o panic.o printk.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o cred.o \ - async.o range.o groups.o lglock.o + async.o range.o groups.o ifdef CONFIG_FUNCTION_TRACER # Do not trace debug files and internal ftrace files diff --git a/trunk/kernel/lglock.c b/trunk/kernel/lglock.c deleted file mode 100644 index 6535a667a5a7..000000000000 --- a/trunk/kernel/lglock.c +++ /dev/null @@ -1,89 +0,0 @@ -/* See include/linux/lglock.h for description */ -#include -#include -#include -#include - -/* - * Note there is no uninit, so lglocks cannot be defined in - * modules (but it's fine to use them from there) - * Could be added though, just undo lg_lock_init - */ - -void lg_lock_init(struct lglock *lg, char *name) -{ - LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0); -} -EXPORT_SYMBOL(lg_lock_init); - -void lg_local_lock(struct lglock *lg) -{ - arch_spinlock_t *lock; - - preempt_disable(); - rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_); - lock = this_cpu_ptr(lg->lock); - arch_spin_lock(lock); -} -EXPORT_SYMBOL(lg_local_lock); - -void lg_local_unlock(struct lglock *lg) -{ - arch_spinlock_t *lock; - - rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); - lock = this_cpu_ptr(lg->lock); - arch_spin_unlock(lock); - preempt_enable(); -} -EXPORT_SYMBOL(lg_local_unlock); - -void lg_local_lock_cpu(struct lglock *lg, int cpu) -{ - arch_spinlock_t *lock; - - preempt_disable(); - rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_); - lock = per_cpu_ptr(lg->lock, cpu); - arch_spin_lock(lock); -} -EXPORT_SYMBOL(lg_local_lock_cpu); - -void lg_local_unlock_cpu(struct lglock *lg, int cpu) -{ - arch_spinlock_t *lock; - - rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); - lock = per_cpu_ptr(lg->lock, cpu); - arch_spin_unlock(lock); - preempt_enable(); -} -EXPORT_SYMBOL(lg_local_unlock_cpu); - -void lg_global_lock(struct lglock *lg) -{ - int i; - - preempt_disable(); - rwlock_acquire(&lg->lock_dep_map, 0, 0, _RET_IP_); - for_each_possible_cpu(i) { - arch_spinlock_t *lock; - lock = per_cpu_ptr(lg->lock, i); - arch_spin_lock(lock); - } -} -EXPORT_SYMBOL(lg_global_lock); - -void lg_global_unlock(struct lglock *lg) -{ - int i; - - rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); - for_each_possible_cpu(i) { - arch_spinlock_t *lock; - lock = per_cpu_ptr(lg->lock, i); - arch_spin_unlock(lock); - } - preempt_enable(); -} -EXPORT_SYMBOL(lg_global_unlock); diff --git a/trunk/kernel/signal.c b/trunk/kernel/signal.c index 08dfbd748cd2..95a9d9d8122b 100644 --- a/trunk/kernel/signal.c +++ b/trunk/kernel/signal.c @@ -3235,7 +3235,6 @@ SYSCALL_DEFINE0(pause) #endif -#ifdef HAVE_SET_RESTORE_SIGMASK int sigsuspend(sigset_t *set) { sigdelsetmask(set, sigmask(SIGKILL)|sigmask(SIGSTOP)); @@ -3248,7 +3247,6 @@ int sigsuspend(sigset_t *set) set_restore_sigmask(); return -ERESTARTNOHAND; } -#endif #ifdef __ARCH_WANT_SYS_RT_SIGSUSPEND /** diff --git a/trunk/mm/cleancache.c b/trunk/mm/cleancache.c index 32e6f4136fa2..5646c740f613 100644 --- a/trunk/mm/cleancache.c +++ b/trunk/mm/cleancache.c @@ -80,7 +80,7 @@ EXPORT_SYMBOL(__cleancache_init_shared_fs); static int cleancache_get_key(struct inode *inode, struct cleancache_filekey *key) { - int (*fhfn)(struct inode *, __u32 *fh, int *, struct inode *); + int (*fhfn)(struct dentry *, __u32 *fh, int *, int); int len = 0, maxlen = CLEANCACHE_KEY_MAX; struct super_block *sb = inode->i_sb; @@ -88,7 +88,9 @@ static int cleancache_get_key(struct inode *inode, if (sb->s_export_op != NULL) { fhfn = sb->s_export_op->encode_fh; if (fhfn) { - len = (*fhfn)(inode, &key->u.fh[0], &maxlen, NULL); + struct dentry d; + d.d_inode = inode; + len = (*fhfn)(&d, &key->u.fh[0], &maxlen, 0); if (len <= 0 || len == 255) return -1; if (maxlen > CLEANCACHE_KEY_MAX) diff --git a/trunk/mm/filemap.c b/trunk/mm/filemap.c index a4a5260b0279..64b48f934b89 100644 --- a/trunk/mm/filemap.c +++ b/trunk/mm/filemap.c @@ -1899,6 +1899,71 @@ struct page *read_cache_page(struct address_space *mapping, } EXPORT_SYMBOL(read_cache_page); +/* + * The logic we want is + * + * if suid or (sgid and xgrp) + * remove privs + */ +int should_remove_suid(struct dentry *dentry) +{ + umode_t mode = dentry->d_inode->i_mode; + int kill = 0; + + /* suid always must be killed */ + if (unlikely(mode & S_ISUID)) + kill = ATTR_KILL_SUID; + + /* + * sgid without any exec bits is just a mandatory locking mark; leave + * it alone. If some exec bits are set, it's a real sgid; kill it. + */ + if (unlikely((mode & S_ISGID) && (mode & S_IXGRP))) + kill |= ATTR_KILL_SGID; + + if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode))) + return kill; + + return 0; +} +EXPORT_SYMBOL(should_remove_suid); + +static int __remove_suid(struct dentry *dentry, int kill) +{ + struct iattr newattrs; + + newattrs.ia_valid = ATTR_FORCE | kill; + return notify_change(dentry, &newattrs); +} + +int file_remove_suid(struct file *file) +{ + struct dentry *dentry = file->f_path.dentry; + struct inode *inode = dentry->d_inode; + int killsuid; + int killpriv; + int error = 0; + + /* Fast path for nothing security related */ + if (IS_NOSEC(inode)) + return 0; + + killsuid = should_remove_suid(dentry); + killpriv = security_inode_need_killpriv(dentry); + + if (killpriv < 0) + return killpriv; + if (killpriv) + error = security_inode_killpriv(dentry); + if (!error && killsuid) + error = __remove_suid(dentry, killsuid); + if (!error && (inode->i_sb->s_flags & MS_NOSEC)) + inode->i_flags |= S_NOSEC; + + return error; +} +EXPORT_SYMBOL(file_remove_suid); + static size_t __iovec_copy_from_user_inatomic(char *vaddr, const struct iovec *iov, size_t base, size_t bytes) { @@ -2424,9 +2489,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err) goto out; - err = file_update_time(file); - if (err) - goto out; + file_update_time(file); /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ if (unlikely(file->f_flags & O_DIRECT)) { diff --git a/trunk/mm/filemap_xip.c b/trunk/mm/filemap_xip.c index 213ca1f53409..a4eb31132229 100644 --- a/trunk/mm/filemap_xip.c +++ b/trunk/mm/filemap_xip.c @@ -426,9 +426,7 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len, if (ret) goto out_backing; - ret = file_update_time(filp); - if (ret) - goto out_backing; + file_update_time(filp); ret = __xip_file_write (filp, buf, count, pos, ppos); diff --git a/trunk/mm/internal.h b/trunk/mm/internal.h index 5cbb78190041..4194ab9dc19b 100644 --- a/trunk/mm/internal.h +++ b/trunk/mm/internal.h @@ -350,7 +350,3 @@ extern u64 hwpoison_filter_flags_mask; extern u64 hwpoison_filter_flags_value; extern u64 hwpoison_filter_memcg; extern u32 hwpoison_filter_enable; - -extern unsigned long vm_mmap_pgoff(struct file *, unsigned long, - unsigned long, unsigned long, - unsigned long, unsigned long); diff --git a/trunk/mm/mmap.c b/trunk/mm/mmap.c index 3edfcdfa42d9..4a9c2a391e28 100644 --- a/trunk/mm/mmap.c +++ b/trunk/mm/mmap.c @@ -971,13 +971,15 @@ static inline unsigned long round_hint_to_min(unsigned long hint) * The caller must hold down_write(¤t->mm->mmap_sem). */ -unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, +static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff) { struct mm_struct * mm = current->mm; struct inode *inode; vm_flags_t vm_flags; + int error; + unsigned long reqprot = prot; /* * Does the application expect PROT_READ to imply PROT_EXEC? @@ -1099,9 +1101,39 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, } } + error = security_file_mmap(file, reqprot, prot, flags, addr, 0); + if (error) + return error; + return mmap_region(file, addr, len, flags, vm_flags, pgoff); } +unsigned long do_mmap(struct file *file, unsigned long addr, + unsigned long len, unsigned long prot, + unsigned long flag, unsigned long offset) +{ + if (unlikely(offset + PAGE_ALIGN(len) < offset)) + return -EINVAL; + if (unlikely(offset & ~PAGE_MASK)) + return -EINVAL; + return do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); +} +EXPORT_SYMBOL(do_mmap); + +unsigned long vm_mmap(struct file *file, unsigned long addr, + unsigned long len, unsigned long prot, + unsigned long flag, unsigned long offset) +{ + unsigned long ret; + struct mm_struct *mm = current->mm; + + down_write(&mm->mmap_sem); + ret = do_mmap(file, addr, len, prot, flag, offset); + up_write(&mm->mmap_sem); + return ret; +} +EXPORT_SYMBOL(vm_mmap); + SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, unsigned long, fd, unsigned long, pgoff) @@ -1133,7 +1165,10 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); + down_write(¤t->mm->mmap_sem); + retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + up_write(¤t->mm->mmap_sem); + if (file) fput(file); out: @@ -1594,9 +1629,7 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, if (addr & ~PAGE_MASK) return -EINVAL; - addr = arch_rebalance_pgtables(addr, len); - error = security_mmap_addr(addr); - return error ? error : addr; + return arch_rebalance_pgtables(addr, len); } EXPORT_SYMBOL(get_unmapped_area); @@ -1786,7 +1819,7 @@ int expand_downwards(struct vm_area_struct *vma, return -ENOMEM; address &= PAGE_MASK; - error = security_mmap_addr(address); + error = security_file_mmap(NULL, 0, 0, 0, address, 1); if (error) return error; @@ -2126,6 +2159,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) return 0; } +EXPORT_SYMBOL(do_munmap); int vm_munmap(unsigned long start, size_t len) { @@ -2173,6 +2207,10 @@ static unsigned long do_brk(unsigned long addr, unsigned long len) if (!len) return addr; + error = security_file_mmap(NULL, 0, 0, 0, addr, 1); + if (error) + return error; + flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED); @@ -2525,6 +2563,10 @@ int install_special_mapping(struct mm_struct *mm, vma->vm_ops = &special_mapping_vmops; vma->vm_private_data = pages; + ret = security_file_mmap(NULL, 0, 0, 0, vma->vm_start, 1); + if (ret) + goto out; + ret = insert_vm_struct(mm, vma); if (ret) goto out; diff --git a/trunk/mm/mremap.c b/trunk/mm/mremap.c index 21fed202ddad..db8d983b5a7d 100644 --- a/trunk/mm/mremap.c +++ b/trunk/mm/mremap.c @@ -371,6 +371,10 @@ static unsigned long mremap_to(unsigned long addr, if ((addr <= new_addr) && (addr+old_len) > new_addr) goto out; + ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1); + if (ret) + goto out; + ret = do_munmap(mm, new_addr, new_len); if (ret) goto out; @@ -428,17 +432,15 @@ static int vma_expandable(struct vm_area_struct *vma, unsigned long delta) * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise * This option implies MREMAP_MAYMOVE. */ -SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, - unsigned long, new_len, unsigned long, flags, - unsigned long, new_addr) +unsigned long do_mremap(unsigned long addr, + unsigned long old_len, unsigned long new_len, + unsigned long flags, unsigned long new_addr) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long ret = -EINVAL; unsigned long charged = 0; - down_write(¤t->mm->mmap_sem); - if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) goto out; @@ -528,11 +530,25 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, goto out; } + ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1); + if (ret) + goto out; ret = move_vma(vma, addr, old_len, new_len, new_addr); } out: if (ret & ~PAGE_MASK) vm_unacct_memory(charged); + return ret; +} + +SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, + unsigned long, new_len, unsigned long, flags, + unsigned long, new_addr) +{ + unsigned long ret; + + down_write(¤t->mm->mmap_sem); + ret = do_mremap(addr, old_len, new_len, flags, new_addr); up_write(¤t->mm->mmap_sem); return ret; } diff --git a/trunk/mm/nommu.c b/trunk/mm/nommu.c index c4acfbc09972..bb8f4f004a82 100644 --- a/trunk/mm/nommu.c +++ b/trunk/mm/nommu.c @@ -889,6 +889,7 @@ static int validate_mmap_request(struct file *file, unsigned long *_capabilities) { unsigned long capabilities, rlen; + unsigned long reqprot = prot; int ret; /* do the simple checks first */ @@ -1046,7 +1047,7 @@ static int validate_mmap_request(struct file *file, } /* allow the security API to have its say */ - ret = security_mmap_addr(addr); + ret = security_file_mmap(file, reqprot, prot, flags, addr, 0); if (ret < 0) return ret; @@ -1232,7 +1233,7 @@ static int do_mmap_private(struct vm_area_struct *vma, /* * handle mapping creation for uClinux */ -unsigned long do_mmap_pgoff(struct file *file, +static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, @@ -1470,6 +1471,32 @@ unsigned long do_mmap_pgoff(struct file *file, return -ENOMEM; } +unsigned long do_mmap(struct file *file, unsigned long addr, + unsigned long len, unsigned long prot, + unsigned long flag, unsigned long offset) +{ + if (unlikely(offset + PAGE_ALIGN(len) < offset)) + return -EINVAL; + if (unlikely(offset & ~PAGE_MASK)) + return -EINVAL; + return do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); +} +EXPORT_SYMBOL(do_mmap); + +unsigned long vm_mmap(struct file *file, unsigned long addr, + unsigned long len, unsigned long prot, + unsigned long flag, unsigned long offset) +{ + unsigned long ret; + struct mm_struct *mm = current->mm; + + down_write(&mm->mmap_sem); + ret = do_mmap(file, addr, len, prot, flag, offset); + up_write(&mm->mmap_sem); + return ret; +} +EXPORT_SYMBOL(vm_mmap); + SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, unsigned long, fd, unsigned long, pgoff) @@ -1486,7 +1513,9 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - ret = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); + down_write(¤t->mm->mmap_sem); + retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + up_write(¤t->mm->mmap_sem); if (file) fput(file); diff --git a/trunk/mm/shmem.c b/trunk/mm/shmem.c index 585bd220a21e..d576b84d913c 100644 --- a/trunk/mm/shmem.c +++ b/trunk/mm/shmem.c @@ -2439,9 +2439,11 @@ static struct dentry *shmem_fh_to_dentry(struct super_block *sb, return dentry; } -static int shmem_encode_fh(struct inode *inode, __u32 *fh, int *len, - struct inode *parent) +static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len, + int connectable) { + struct inode *inode = dentry->d_inode; + if (*len < 3) { *len = 3; return 255; diff --git a/trunk/mm/util.c b/trunk/mm/util.c index 8c7265afa29f..ae962b31de88 100644 --- a/trunk/mm/util.c +++ b/trunk/mm/util.c @@ -4,7 +4,6 @@ #include #include #include -#include #include #include "internal.h" @@ -342,35 +341,6 @@ int __attribute__((weak)) get_user_pages_fast(unsigned long start, } EXPORT_SYMBOL_GPL(get_user_pages_fast); -unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, - unsigned long len, unsigned long prot, - unsigned long flag, unsigned long pgoff) -{ - unsigned long ret; - struct mm_struct *mm = current->mm; - - ret = security_mmap_file(file, prot, flag); - if (!ret) { - down_write(&mm->mmap_sem); - ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff); - up_write(&mm->mmap_sem); - } - return ret; -} - -unsigned long vm_mmap(struct file *file, unsigned long addr, - unsigned long len, unsigned long prot, - unsigned long flag, unsigned long offset) -{ - if (unlikely(offset + PAGE_ALIGN(len) < offset)) - return -EINVAL; - if (unlikely(offset & ~PAGE_MASK)) - return -EINVAL; - - return vm_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); -} -EXPORT_SYMBOL(vm_mmap); - /* Tracepoints definitions. */ EXPORT_TRACEPOINT_SYMBOL(kmalloc); EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); diff --git a/trunk/net/sched/sch_atm.c b/trunk/net/sched/sch_atm.c index ca8e0a57d945..8522a4793374 100644 --- a/trunk/net/sched/sch_atm.c +++ b/trunk/net/sched/sch_atm.c @@ -16,6 +16,8 @@ #include #include +extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */ + /* * The ATM queuing discipline provides a framework for invoking classifiers * (aka "filters"), which in turn select classes of this queuing discipline. diff --git a/trunk/net/sunrpc/auth_gss/gss_krb5_wrap.c b/trunk/net/sunrpc/auth_gss/gss_krb5_wrap.c index 107c4528654f..38f388c39dce 100644 --- a/trunk/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/trunk/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -381,53 +381,21 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) } /* - * We can shift data by up to LOCAL_BUF_LEN bytes in a pass. If we need - * to do more than that, we shift repeatedly. Kevin Coffman reports - * seeing 28 bytes as the value used by Microsoft clients and servers - * with AES, so this constant is chosen to allow handling 28 in one pass - * without using too much stack space. - * - * If that proves to a problem perhaps we could use a more clever - * algorithm. + * We cannot currently handle tokens with rotated data. We need a + * generalized routine to rotate the data in place. It is anticipated + * that we won't encounter rotated data in the general case. */ -#define LOCAL_BUF_LEN 32u - -static void rotate_buf_a_little(struct xdr_buf *buf, unsigned int shift) -{ - char head[LOCAL_BUF_LEN]; - char tmp[LOCAL_BUF_LEN]; - unsigned int this_len, i; - - BUG_ON(shift > LOCAL_BUF_LEN); - - read_bytes_from_xdr_buf(buf, 0, head, shift); - for (i = 0; i + shift < buf->len; i += LOCAL_BUF_LEN) { - this_len = min(LOCAL_BUF_LEN, buf->len - (i + shift)); - read_bytes_from_xdr_buf(buf, i+shift, tmp, this_len); - write_bytes_to_xdr_buf(buf, i, tmp, this_len); - } - write_bytes_to_xdr_buf(buf, buf->len - shift, head, shift); -} - -static void _rotate_left(struct xdr_buf *buf, unsigned int shift) +static u32 +rotate_left(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, u16 rrc) { - int shifted = 0; - int this_shift; - - shift %= buf->len; - while (shifted < shift) { - this_shift = min(shift - shifted, LOCAL_BUF_LEN); - rotate_buf_a_little(buf, this_shift); - shifted += this_shift; - } -} + unsigned int realrrc = rrc % (buf->len - offset - GSS_KRB5_TOK_HDR_LEN); -static void rotate_left(u32 base, struct xdr_buf *buf, unsigned int shift) -{ - struct xdr_buf subbuf; + if (realrrc == 0) + return 0; - xdr_buf_subsegment(buf, &subbuf, base, buf->len - base); - _rotate_left(&subbuf, shift); + dprintk("%s: cannot process token with rotated data: " + "rrc %u, realrrc %u\n", __func__, rrc, realrrc); + return 1; } static u32 @@ -527,8 +495,11 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) seqnum = be64_to_cpup((__be64 *)(ptr + 8)); - if (rrc != 0) - rotate_left(offset + 16, buf, rrc); + if (rrc != 0) { + err = rotate_left(kctx, offset, buf, rrc); + if (err) + return GSS_S_FAILURE; + } err = (*kctx->gk5e->decrypt_v2)(kctx, offset, buf, &headskip, &tailskip); diff --git a/trunk/net/sunrpc/auth_gss/svcauth_gss.c b/trunk/net/sunrpc/auth_gss/svcauth_gss.c index 73e957386600..3089de37c433 100644 --- a/trunk/net/sunrpc/auth_gss/svcauth_gss.c +++ b/trunk/net/sunrpc/auth_gss/svcauth_gss.c @@ -336,6 +336,7 @@ struct rsc { struct svc_cred cred; struct gss_svc_seq_data seqdata; struct gss_ctx *mechctx; + char *client_name; }; static struct rsc *rsc_update(struct cache_detail *cd, struct rsc *new, struct rsc *old); @@ -346,7 +347,9 @@ static void rsc_free(struct rsc *rsci) kfree(rsci->handle.data); if (rsci->mechctx) gss_delete_sec_context(&rsci->mechctx); - free_svc_cred(&rsci->cred); + if (rsci->cred.cr_group_info) + put_group_info(rsci->cred.cr_group_info); + kfree(rsci->client_name); } static void rsc_put(struct kref *ref) @@ -384,7 +387,7 @@ rsc_init(struct cache_head *cnew, struct cache_head *ctmp) tmp->handle.data = NULL; new->mechctx = NULL; new->cred.cr_group_info = NULL; - new->cred.cr_principal = NULL; + new->client_name = NULL; } static void @@ -399,8 +402,8 @@ update_rsc(struct cache_head *cnew, struct cache_head *ctmp) spin_lock_init(&new->seqdata.sd_lock); new->cred = tmp->cred; tmp->cred.cr_group_info = NULL; - new->cred.cr_principal = tmp->cred.cr_principal; - tmp->cred.cr_principal = NULL; + new->client_name = tmp->client_name; + tmp->client_name = NULL; } static struct cache_head * @@ -498,8 +501,8 @@ static int rsc_parse(struct cache_detail *cd, /* get client name */ len = qword_get(&mesg, buf, mlen); if (len > 0) { - rsci.cred.cr_principal = kstrdup(buf, GFP_KERNEL); - if (!rsci.cred.cr_principal) + rsci.client_name = kstrdup(buf, GFP_KERNEL); + if (!rsci.client_name) goto out; } @@ -929,6 +932,16 @@ struct gss_svc_data { struct rsc *rsci; }; +char *svc_gss_principal(struct svc_rqst *rqstp) +{ + struct gss_svc_data *gd = (struct gss_svc_data *)rqstp->rq_auth_data; + + if (gd && gd->rsci) + return gd->rsci->client_name; + return NULL; +} +EXPORT_SYMBOL_GPL(svc_gss_principal); + static int svcauth_gss_set_client(struct svc_rqst *rqstp) { @@ -1207,7 +1220,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) } svcdata->rsci = rsci; cache_get(&rsci->h); - rqstp->rq_cred.cr_flavor = gss_svc_to_pseudoflavor( + rqstp->rq_flavor = gss_svc_to_pseudoflavor( rsci->mechctx->mech_type, gc->gc_svc); ret = SVC_OK; goto out; diff --git a/trunk/net/sunrpc/rpcb_clnt.c b/trunk/net/sunrpc/rpcb_clnt.c index 92509ffe15fc..3c0653439f3d 100644 --- a/trunk/net/sunrpc/rpcb_clnt.c +++ b/trunk/net/sunrpc/rpcb_clnt.c @@ -180,16 +180,14 @@ void rpcb_put_local(struct net *net) struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); struct rpc_clnt *clnt = sn->rpcb_local_clnt; struct rpc_clnt *clnt4 = sn->rpcb_local_clnt4; - int shutdown = 0; + int shutdown; spin_lock(&sn->rpcb_clnt_lock); - if (sn->rpcb_users) { - if (--sn->rpcb_users == 0) { - sn->rpcb_local_clnt = NULL; - sn->rpcb_local_clnt4 = NULL; - } - shutdown = !sn->rpcb_users; + if (--sn->rpcb_users == 0) { + sn->rpcb_local_clnt = NULL; + sn->rpcb_local_clnt4 = NULL; } + shutdown = !sn->rpcb_users; spin_unlock(&sn->rpcb_clnt_lock); if (shutdown) { diff --git a/trunk/net/sunrpc/svc.c b/trunk/net/sunrpc/svc.c index 7e9baaa1e543..017c0117d154 100644 --- a/trunk/net/sunrpc/svc.c +++ b/trunk/net/sunrpc/svc.c @@ -407,14 +407,6 @@ static int svc_uses_rpcbind(struct svc_serv *serv) return 0; } -int svc_bind(struct svc_serv *serv, struct net *net) -{ - if (!svc_uses_rpcbind(serv)) - return 0; - return svc_rpcb_setup(serv, net); -} -EXPORT_SYMBOL_GPL(svc_bind); - /* * Create an RPC service */ @@ -479,8 +471,15 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, spin_lock_init(&pool->sp_lock); } - if (svc_uses_rpcbind(serv) && (!serv->sv_shutdown)) - serv->sv_shutdown = svc_rpcb_cleanup; + if (svc_uses_rpcbind(serv)) { + if (svc_rpcb_setup(serv, current->nsproxy->net_ns) < 0) { + kfree(serv->sv_pools); + kfree(serv); + return NULL; + } + if (!serv->sv_shutdown) + serv->sv_shutdown = svc_rpcb_cleanup; + } return serv; } @@ -537,6 +536,8 @@ EXPORT_SYMBOL_GPL(svc_shutdown_net); void svc_destroy(struct svc_serv *serv) { + struct net *net = current->nsproxy->net_ns; + dprintk("svc: svc_destroy(%s, %d)\n", serv->sv_program->pg_name, serv->sv_nrthreads); @@ -551,6 +552,8 @@ svc_destroy(struct svc_serv *serv) del_timer_sync(&serv->sv_temptimer); + svc_shutdown_net(serv, net); + /* * The last user is gone and thus all sockets have to be destroyed to * the point. Check this. diff --git a/trunk/net/sunrpc/svc_xprt.c b/trunk/net/sunrpc/svc_xprt.c index 88f2bf671960..b98ee3514912 100644 --- a/trunk/net/sunrpc/svc_xprt.c +++ b/trunk/net/sunrpc/svc_xprt.c @@ -598,7 +598,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) /* now allocate needed pages. If we get a failure, sleep briefly */ pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE; - BUG_ON(pages >= RPCSVC_MAXPAGES); for (i = 0; i < pages ; i++) while (rqstp->rq_pages[i] == NULL) { struct page *p = alloc_page(GFP_KERNEL); @@ -613,6 +612,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) rqstp->rq_pages[i] = p; } rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */ + BUG_ON(pages >= RPCSVC_MAXPAGES); /* Make arg->head point to first page and arg->pages point to rest */ arg = &rqstp->rq_arg; @@ -973,7 +973,7 @@ void svc_close_net(struct svc_serv *serv, struct net *net) svc_clear_pools(serv, net); /* * At this point the sp_sockets lists will stay empty, since - * svc_xprt_enqueue will not add new entries without taking the + * svc_enqueue will not add new entries without taking the * sp_lock and checking XPT_BUSY. */ svc_clear_list(&serv->sv_tempsocks, net); diff --git a/trunk/net/sunrpc/svcauth_unix.c b/trunk/net/sunrpc/svcauth_unix.c index 2777fa896645..6138c925923d 100644 --- a/trunk/net/sunrpc/svcauth_unix.c +++ b/trunk/net/sunrpc/svcauth_unix.c @@ -746,7 +746,6 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp) struct svc_cred *cred = &rqstp->rq_cred; cred->cr_group_info = NULL; - cred->cr_principal = NULL; rqstp->rq_client = NULL; if (argv->iov_len < 3*4) @@ -774,7 +773,7 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp) svc_putnl(resv, RPC_AUTH_NULL); svc_putnl(resv, 0); - rqstp->rq_cred.cr_flavor = RPC_AUTH_NULL; + rqstp->rq_flavor = RPC_AUTH_NULL; return SVC_OK; } @@ -812,7 +811,6 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) int len = argv->iov_len; cred->cr_group_info = NULL; - cred->cr_principal = NULL; rqstp->rq_client = NULL; if ((len -= 3*4) < 0) @@ -849,7 +847,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) svc_putnl(resv, RPC_AUTH_NULL); svc_putnl(resv, 0); - rqstp->rq_cred.cr_flavor = RPC_AUTH_UNIX; + rqstp->rq_flavor = RPC_AUTH_UNIX; return SVC_OK; badcred: diff --git a/trunk/security/apparmor/lsm.c b/trunk/security/apparmor/lsm.c index 8ea39aabe948..032daab449b0 100644 --- a/trunk/security/apparmor/lsm.c +++ b/trunk/security/apparmor/lsm.c @@ -490,9 +490,17 @@ static int common_mmap(int op, struct file *file, unsigned long prot, return common_file_perm(op, file, mask); } -static int apparmor_mmap_file(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags) +static int apparmor_file_mmap(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags, + unsigned long addr, unsigned long addr_only) { + int rc = 0; + + /* do DAC check */ + rc = cap_file_mmap(file, reqprot, prot, flags, addr, addr_only); + if (rc || addr_only) + return rc; + return common_mmap(OP_FMMAP, file, prot, flags); } @@ -638,8 +646,7 @@ static struct security_operations apparmor_ops = { .file_permission = apparmor_file_permission, .file_alloc_security = apparmor_file_alloc_security, .file_free_security = apparmor_file_free_security, - .mmap_file = apparmor_mmap_file, - .mmap_addr = cap_mmap_addr, + .file_mmap = apparmor_file_mmap, .file_mprotect = apparmor_file_mprotect, .file_lock = apparmor_file_lock, diff --git a/trunk/security/capability.c b/trunk/security/capability.c index 61095df8b89a..fca889676c5e 100644 --- a/trunk/security/capability.c +++ b/trunk/security/capability.c @@ -949,8 +949,7 @@ void __init security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, file_alloc_security); set_to_cap_if_null(ops, file_free_security); set_to_cap_if_null(ops, file_ioctl); - set_to_cap_if_null(ops, mmap_addr); - set_to_cap_if_null(ops, mmap_file); + set_to_cap_if_null(ops, file_mmap); set_to_cap_if_null(ops, file_mprotect); set_to_cap_if_null(ops, file_lock); set_to_cap_if_null(ops, file_fcntl); diff --git a/trunk/security/commoncap.c b/trunk/security/commoncap.c index 6dbae4650abe..e771cb1b2d79 100644 --- a/trunk/security/commoncap.c +++ b/trunk/security/commoncap.c @@ -958,15 +958,22 @@ int cap_vm_enough_memory(struct mm_struct *mm, long pages) } /* - * cap_mmap_addr - check if able to map given addr + * cap_file_mmap - check if able to map given addr + * @file: unused + * @reqprot: unused + * @prot: unused + * @flags: unused * @addr: address attempting to be mapped + * @addr_only: unused * * If the process is attempting to map memory below dac_mmap_min_addr they need * CAP_SYS_RAWIO. The other parameters to this function are unused by the * capability security module. Returns 0 if this mapping should be allowed * -EPERM if not. */ -int cap_mmap_addr(unsigned long addr) +int cap_file_mmap(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags, + unsigned long addr, unsigned long addr_only) { int ret = 0; @@ -979,9 +986,3 @@ int cap_mmap_addr(unsigned long addr) } return ret; } - -int cap_mmap_file(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags) -{ - return 0; -} diff --git a/trunk/security/security.c b/trunk/security/security.c index 3efc9b12aef4..5497a57fba01 100644 --- a/trunk/security/security.c +++ b/trunk/security/security.c @@ -20,9 +20,6 @@ #include #include #include -#include -#include -#include #include #define MAX_LSM_EVM_XATTR 2 @@ -660,56 +657,18 @@ int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return security_ops->file_ioctl(file, cmd, arg); } -static inline unsigned long mmap_prot(struct file *file, unsigned long prot) -{ - /* - * Does we have PROT_READ and does the application expect - * it to imply PROT_EXEC? If not, nothing to talk about... - */ - if ((prot & (PROT_READ | PROT_EXEC)) != PROT_READ) - return prot; - if (!(current->personality & READ_IMPLIES_EXEC)) - return prot; - /* - * if that's an anonymous mapping, let it. - */ - if (!file) - return prot | PROT_EXEC; - /* - * ditto if it's not on noexec mount, except that on !MMU we need - * BDI_CAP_EXEC_MMAP (== VM_MAYEXEC) in this case - */ - if (!(file->f_path.mnt->mnt_flags & MNT_NOEXEC)) { -#ifndef CONFIG_MMU - unsigned long caps = 0; - struct address_space *mapping = file->f_mapping; - if (mapping && mapping->backing_dev_info) - caps = mapping->backing_dev_info->capabilities; - if (!(caps & BDI_CAP_EXEC_MAP)) - return prot; -#endif - return prot | PROT_EXEC; - } - /* anything on noexec mount won't get PROT_EXEC */ - return prot; -} - -int security_mmap_file(struct file *file, unsigned long prot, - unsigned long flags) +int security_file_mmap(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags, + unsigned long addr, unsigned long addr_only) { int ret; - ret = security_ops->mmap_file(file, prot, - mmap_prot(file, prot), flags); + + ret = security_ops->file_mmap(file, reqprot, prot, flags, addr, addr_only); if (ret) return ret; return ima_file_mmap(file, prot); } -int security_mmap_addr(unsigned long addr) -{ - return security_ops->mmap_addr(addr); -} - int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot) { diff --git a/trunk/security/selinux/hooks.c b/trunk/security/selinux/hooks.c index 372ec6502aa8..fa2341b68331 100644 --- a/trunk/security/selinux/hooks.c +++ b/trunk/security/selinux/hooks.c @@ -3083,7 +3083,9 @@ static int file_map_prot_check(struct file *file, unsigned long prot, int shared return rc; } -static int selinux_mmap_addr(unsigned long addr) +static int selinux_file_mmap(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags, + unsigned long addr, unsigned long addr_only) { int rc = 0; u32 sid = current_sid(); @@ -3102,12 +3104,10 @@ static int selinux_mmap_addr(unsigned long addr) } /* do DAC check on address space usage */ - return cap_mmap_addr(addr); -} + rc = cap_file_mmap(file, reqprot, prot, flags, addr, addr_only); + if (rc || addr_only) + return rc; -static int selinux_mmap_file(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags) -{ if (selinux_checkreqprot) prot = reqprot; @@ -5570,8 +5570,7 @@ static struct security_operations selinux_ops = { .file_alloc_security = selinux_file_alloc_security, .file_free_security = selinux_file_free_security, .file_ioctl = selinux_file_ioctl, - .mmap_file = selinux_mmap_file, - .mmap_addr = selinux_mmap_addr, + .file_mmap = selinux_file_mmap, .file_mprotect = selinux_file_mprotect, .file_lock = selinux_file_lock, .file_fcntl = selinux_file_fcntl, diff --git a/trunk/security/selinux/selinuxfs.c b/trunk/security/selinux/selinuxfs.c index 3ad290251288..4e93f9ef970b 100644 --- a/trunk/security/selinux/selinuxfs.c +++ b/trunk/security/selinux/selinuxfs.c @@ -1259,8 +1259,12 @@ static int sel_make_bools(void) if (!inode) goto out; - ret = -ENAMETOOLONG; + ret = -EINVAL; len = snprintf(page, PAGE_SIZE, "/%s/%s", BOOL_DIR_NAME, names[i]); + if (len < 0) + goto out; + + ret = -ENAMETOOLONG; if (len >= PAGE_SIZE) goto out; @@ -1553,10 +1557,19 @@ static inline u32 sel_ino_to_perm(unsigned long ino) static ssize_t sel_read_class(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + ssize_t rc, len; + char *page; unsigned long ino = file->f_path.dentry->d_inode->i_ino; - char res[TMPBUFLEN]; - ssize_t len = snprintf(res, sizeof(res), "%d", sel_ino_to_class(ino)); - return simple_read_from_buffer(buf, count, ppos, res, len); + + page = (char *)__get_free_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + len = snprintf(page, PAGE_SIZE, "%d", sel_ino_to_class(ino)); + rc = simple_read_from_buffer(buf, count, ppos, page, len); + free_page((unsigned long)page); + + return rc; } static const struct file_operations sel_class_ops = { @@ -1567,10 +1580,19 @@ static const struct file_operations sel_class_ops = { static ssize_t sel_read_perm(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + ssize_t rc, len; + char *page; unsigned long ino = file->f_path.dentry->d_inode->i_ino; - char res[TMPBUFLEN]; - ssize_t len = snprintf(res, sizeof(res), "%d", sel_ino_to_perm(ino)); - return simple_read_from_buffer(buf, count, ppos, res, len); + + page = (char *)__get_free_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + len = snprintf(page, PAGE_SIZE, "%d", sel_ino_to_perm(ino)); + rc = simple_read_from_buffer(buf, count, ppos, page, len); + free_page((unsigned long)page); + + return rc; } static const struct file_operations sel_perm_ops = { diff --git a/trunk/security/smack/smack_lsm.c b/trunk/security/smack/smack_lsm.c index ee0bb5735f35..d583c0545808 100644 --- a/trunk/security/smack/smack_lsm.c +++ b/trunk/security/smack/smack_lsm.c @@ -1171,7 +1171,7 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, } /** - * smack_mmap_file : + * smack_file_mmap : * Check permissions for a mmap operation. The @file may be NULL, e.g. * if mapping anonymous memory. * @file contains the file structure for file to map (may be NULL). @@ -1180,9 +1180,10 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, * @flags contains the operational flags. * Return 0 if permission is granted. */ -static int smack_mmap_file(struct file *file, +static int smack_file_mmap(struct file *file, unsigned long reqprot, unsigned long prot, - unsigned long flags) + unsigned long flags, unsigned long addr, + unsigned long addr_only) { struct smack_known *skp; struct smack_rule *srp; @@ -1197,6 +1198,11 @@ static int smack_mmap_file(struct file *file, int tmay; int rc; + /* do DAC check on address space usage */ + rc = cap_file_mmap(file, reqprot, prot, flags, addr, addr_only); + if (rc || addr_only) + return rc; + if (file == NULL || file->f_dentry == NULL) return 0; @@ -3476,8 +3482,7 @@ struct security_operations smack_ops = { .file_ioctl = smack_file_ioctl, .file_lock = smack_file_lock, .file_fcntl = smack_file_fcntl, - .mmap_file = smack_mmap_file, - .mmap_addr = cap_mmap_addr, + .file_mmap = smack_file_mmap, .file_set_fowner = smack_file_set_fowner, .file_send_sigiotask = smack_file_send_sigiotask, .file_receive = smack_file_receive,