From 31b2b02240925ce4fc7682783f4065b2f0aec46d Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 14 Aug 2009 17:18:44 +0300 Subject: [PATCH] --- yaml --- r: 158580 b: refs/heads/master c: 9f162d2a810b4db48f7b8d7e734d0932c81ec2a1 h: refs/heads/master v: v3 --- [refs] | 2 +- trunk/Documentation/ioctl/ioctl-number.txt | 1 + trunk/Documentation/kernel-parameters.txt | 25 +- trunk/Documentation/lockdep-design.txt | 6 +- trunk/Makefile | 2 +- trunk/arch/ia64/Makefile | 5 - trunk/arch/ia64/include/asm/bitops.h | 2 +- trunk/arch/ia64/include/asm/pgtable.h | 1 - trunk/arch/ia64/kernel/ia64_ksyms.c | 4 +- trunk/arch/ia64/kernel/iosapic.c | 4 + trunk/arch/ia64/kernel/pci-dma.c | 5 - trunk/arch/ia64/kernel/topology.c | 6 +- trunk/arch/ia64/kvm/mmio.c | 6 +- trunk/arch/ia64/kvm/vcpu.c | 6 +- trunk/arch/ia64/kvm/vcpu.h | 13 +- trunk/arch/mn10300/include/asm/pci.h | 1 - trunk/arch/powerpc/include/asm/kvm_host.h | 2 +- trunk/arch/powerpc/kernel/dma.c | 6 +- trunk/arch/powerpc/kernel/perf_counter.c | 8 + trunk/arch/s390/kvm/interrupt.c | 2 +- trunk/arch/sh/boards/board-ap325rxa.c | 2 +- trunk/arch/sh/boards/mach-migor/setup.c | 2 +- trunk/arch/sh/kernel/cpu/sh2/setup-sh7619.c | 2 +- trunk/arch/sh/kernel/cpu/sh2a/setup-mxg.c | 2 +- trunk/arch/sh/kernel/cpu/sh2a/setup-sh7201.c | 2 +- trunk/arch/sh/kernel/cpu/sh2a/setup-sh7203.c | 2 +- trunk/arch/sh/kernel/cpu/sh2a/setup-sh7206.c | 2 +- trunk/arch/sh/kernel/cpu/sh3/setup-sh7705.c | 2 +- trunk/arch/sh/kernel/cpu/sh3/setup-sh770x.c | 2 +- trunk/arch/sh/kernel/cpu/sh3/setup-sh7710.c | 2 +- trunk/arch/sh/kernel/cpu/sh3/setup-sh7720.c | 2 +- trunk/arch/sh/kernel/cpu/sh4/setup-sh4-202.c | 2 +- trunk/arch/sh/kernel/cpu/sh4/setup-sh7750.c | 2 +- trunk/arch/sh/kernel/cpu/sh4/setup-sh7760.c | 2 +- trunk/arch/sh/kernel/cpu/sh4a/setup-sh7343.c | 2 +- trunk/arch/sh/kernel/cpu/sh4a/setup-sh7366.c | 2 +- trunk/arch/sh/kernel/cpu/sh4a/setup-sh7722.c | 2 +- trunk/arch/sh/kernel/cpu/sh4a/setup-sh7723.c | 2 +- trunk/arch/sh/kernel/cpu/sh4a/setup-sh7724.c | 2 +- trunk/arch/sh/kernel/cpu/sh4a/setup-sh7763.c | 2 +- trunk/arch/sh/kernel/cpu/sh4a/setup-sh7770.c | 2 +- trunk/arch/sh/kernel/cpu/sh4a/setup-sh7780.c | 2 +- trunk/arch/sh/kernel/cpu/sh4a/setup-sh7785.c | 2 +- trunk/arch/sh/kernel/cpu/sh4a/setup-sh7786.c | 2 +- trunk/arch/sh/kernel/cpu/sh4a/setup-shx3.c | 2 +- trunk/arch/sh/kernel/cpu/sh5/setup-sh5.c | 2 +- trunk/arch/x86/Kconfig | 2 +- trunk/arch/x86/kernel/apic/x2apic_cluster.c | 8 +- trunk/arch/x86/kernel/apic/x2apic_phys.c | 8 +- trunk/arch/x86/kernel/cpu/amd.c | 7 + trunk/arch/x86/kernel/cpu/common.c | 48 +- .../arch/x86/kernel/cpu/mcheck/therm_throt.c | 18 +- trunk/arch/x86/kernel/cpu/perf_counter.c | 40 +- trunk/arch/x86/kernel/efi.c | 2 +- trunk/arch/x86/kernel/reboot.c | 16 +- trunk/arch/x86/kernel/tsc.c | 29 +- trunk/arch/x86/kernel/vmi_32.c | 2 +- trunk/arch/x86/kvm/i8254.c | 3 + trunk/arch/x86/kvm/mmu.c | 48 +- trunk/arch/x86/kvm/svm.c | 6 +- trunk/arch/x86/kvm/vmx.c | 6 +- trunk/arch/x86/kvm/x86.c | 44 +- trunk/drivers/ata/ahci.c | 79 +- trunk/drivers/ata/libata-core.c | 3 + trunk/drivers/ata/pata_at91.c | 17 +- trunk/drivers/ata/pata_atiixp.c | 19 +- trunk/drivers/ata/sata_nv.c | 8 + trunk/drivers/base/platform.c | 3 - trunk/drivers/char/pty.c | 2 + trunk/drivers/gpu/drm/drm_irq.c | 2 +- trunk/drivers/gpu/drm/drm_modes.c | 2 + trunk/drivers/gpu/drm/i915/i915_irq.c | 4 +- trunk/drivers/md/md.c | 32 +- trunk/drivers/md/md.h | 10 + trunk/drivers/md/raid5.c | 34 +- trunk/drivers/mtd/maps/sbc8240.c | 0 trunk/drivers/mtd/ubi/eba.c | 1 + trunk/drivers/mtd/ubi/scan.c | 13 +- trunk/drivers/pci/hotplug/sgi_hotplug.c | 7 +- trunk/fs/lockd/host.c | 14 +- trunk/fs/lockd/mon.c | 44 +- trunk/fs/nfs/callback.c | 26 +- trunk/fs/nfs/client.c | 15 +- trunk/fs/nfs/direct.c | 20 +- trunk/fs/nfs/file.c | 49 +- trunk/fs/nfs/idmap.c | 6 +- trunk/fs/nfs/inode.c | 92 +-- trunk/fs/nfs/internal.h | 29 +- trunk/fs/nfs/mount_clnt.c | 83 +-- trunk/fs/nfs/nfs4namespace.c | 8 +- trunk/fs/nfs/nfs4proc.c | 40 +- trunk/fs/nfs/nfs4xdr.c | 86 +-- trunk/fs/nfs/read.c | 6 +- trunk/fs/nfs/super.c | 247 ++++--- trunk/fs/nfs/write.c | 97 +-- trunk/fs/nfsd/export.c | 14 +- trunk/fs/nfsd/nfs4idmap.c | 20 +- trunk/fs/nfsd/nfsctl.c | 21 +- trunk/fs/ocfs2/alloc.c | 47 +- trunk/fs/ocfs2/aops.c | 69 +- trunk/fs/ocfs2/dcache.c | 35 +- trunk/fs/ocfs2/dcache.h | 3 + trunk/fs/ocfs2/dlm/dlmast.c | 1 - trunk/fs/ocfs2/dlm/dlmrecovery.c | 2 +- trunk/fs/ocfs2/file.c | 5 +- trunk/fs/ocfs2/journal.c | 8 +- trunk/fs/ocfs2/journal.h | 19 +- trunk/fs/ocfs2/ocfs2.h | 22 +- trunk/fs/ocfs2/quota.h | 1 - trunk/fs/ocfs2/quota_global.c | 134 ++-- trunk/fs/ocfs2/quota_local.c | 110 ++- trunk/fs/ocfs2/stack_o2cb.c | 3 +- trunk/fs/ocfs2/super.c | 30 +- trunk/fs/ocfs2/xattr.c | 3 +- trunk/fs/proc/base.c | 27 +- trunk/fs/proc/task_mmu.c | 1 + trunk/fs/proc/task_nommu.c | 1 + trunk/fs/xfs/linux-2.6/xfs_buf.c | 2 +- trunk/fs/xfs/xfs_attr.c | 8 +- trunk/fs/xfs/xfs_bmap.c | 2 +- trunk/fs/xfs/xfs_btree.c | 4 +- trunk/fs/xfs/xfs_da_btree.c | 6 +- trunk/fs/xfs/xfs_dir2.c | 2 +- trunk/fs/xfs/xfs_fsops.c | 20 +- trunk/fs/xfs/xfs_inode.c | 10 + trunk/fs/xfs/xfs_log.c | 2 +- trunk/fs/xfs/xfs_vnodeops.c | 4 +- trunk/include/linux/ftrace_event.h | 4 +- trunk/include/linux/kvm_host.h | 1 + trunk/include/linux/nfs_fs.h | 5 +- trunk/include/linux/nfs_fs_sb.h | 9 - trunk/include/linux/perf_counter.h | 60 +- trunk/include/linux/sunrpc/cache.h | 40 +- trunk/include/linux/sunrpc/clnt.h | 43 +- trunk/include/linux/sunrpc/msg_prot.h | 17 +- trunk/include/linux/sunrpc/rpc_pipe_fs.h | 18 +- trunk/include/linux/sunrpc/xdr.h | 5 +- trunk/include/linux/sunrpc/xprt.h | 2 + trunk/include/linux/wait.h | 9 +- trunk/include/trace/ftrace.h | 183 ++++- trunk/kernel/futex.c | 28 +- trunk/kernel/futex_compat.c | 6 +- trunk/kernel/irq/manage.c | 17 +- trunk/kernel/irq/numa_migrate.c | 4 +- trunk/kernel/lockdep_proc.c | 3 +- trunk/kernel/perf_counter.c | 581 ++++++++++----- trunk/kernel/posix-cpu-timers.c | 7 +- trunk/kernel/rtmutex.c | 4 +- trunk/kernel/trace/blktrace.c | 12 +- trunk/kernel/trace/ring_buffer.c | 15 +- trunk/kernel/trace/trace.c | 1 + trunk/kernel/trace/trace.h | 4 - trunk/kernel/trace/trace_events_filter.c | 20 +- trunk/kernel/wait.c | 5 +- trunk/mm/mempool.c | 4 +- trunk/net/socket.c | 2 +- trunk/net/sunrpc/Makefile | 2 +- trunk/net/sunrpc/addr.c | 364 ---------- trunk/net/sunrpc/auth_gss/auth_gss.c | 12 +- trunk/net/sunrpc/auth_gss/svcauth_gss.c | 7 +- trunk/net/sunrpc/cache.c | 550 ++++---------- trunk/net/sunrpc/clnt.c | 60 +- trunk/net/sunrpc/rpc_pipe.c | 678 ++++++++---------- trunk/net/sunrpc/rpcb_clnt.c | 420 ++++------- trunk/net/sunrpc/svcauth_unix.c | 14 +- trunk/net/sunrpc/timer.c | 45 +- trunk/net/sunrpc/xdr.c | 6 +- trunk/net/sunrpc/xprtrdma/transport.c | 48 +- trunk/net/sunrpc/xprtsock.c | 287 ++++---- trunk/scripts/recordmcount.pl | 9 +- trunk/security/selinux/hooks.c | 3 +- trunk/sound/pci/hda/patch_realtek.c | 20 +- trunk/sound/soc/fsl/efika-audio-fabric.c | 2 + trunk/sound/soc/fsl/pcm030-audio-fabric.c | 2 + .../perf/Documentation/perf-examples.txt | 225 ++++++ trunk/tools/perf/Documentation/perf-stat.txt | 2 +- trunk/tools/perf/Documentation/perf-top.txt | 112 ++- trunk/tools/perf/Makefile | 25 +- trunk/tools/perf/builtin-list.c | 3 +- trunk/tools/perf/builtin-record.c | 108 +-- trunk/tools/perf/builtin-report.c | 111 ++- trunk/tools/perf/builtin-stat.c | 2 +- trunk/tools/perf/builtin-top.c | 552 ++++++++++++-- trunk/tools/perf/util/callchain.c | 32 +- trunk/tools/perf/util/callchain.h | 8 +- trunk/tools/perf/util/header.c | 5 +- trunk/tools/perf/util/parse-events.c | 36 +- trunk/tools/perf/util/parse-events.h | 1 + trunk/tools/perf/util/symbol.c | 72 +- trunk/tools/perf/util/symbol.h | 26 + trunk/virt/kvm/ioapic.c | 10 +- trunk/virt/kvm/irq_comm.c | 4 +- 192 files changed, 3926 insertions(+), 3226 deletions(-) delete mode 100644 trunk/drivers/mtd/maps/sbc8240.c delete mode 100644 trunk/net/sunrpc/addr.c create mode 100644 trunk/tools/perf/Documentation/perf-examples.txt diff --git a/[refs] b/[refs] index 5cffe825420b..baee2ba9bb17 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: f884dcaead5f17bf586ac5fe6a3ad07b5203616a +refs/heads/master: 9f162d2a810b4db48f7b8d7e734d0932c81ec2a1 diff --git a/trunk/Documentation/ioctl/ioctl-number.txt b/trunk/Documentation/ioctl/ioctl-number.txt index 7bb0d934b6d8..dbea4f95fc85 100644 --- a/trunk/Documentation/ioctl/ioctl-number.txt +++ b/trunk/Documentation/ioctl/ioctl-number.txt @@ -139,6 +139,7 @@ Code Seq# Include File Comments 'm' all linux/synclink.h conflict! 'm' 00-1F net/irda/irmod.h conflict! 'n' 00-7F linux/ncp_fs.h +'n' 80-8F linux/nilfs2_fs.h NILFS2 'n' E0-FF video/matrox.h matroxfb 'o' 00-1F fs/ocfs2/ocfs2_fs.h OCFS2 'o' 00-03 include/mtd/ubi-user.h conflict! (OCFS2 and UBI overlaps) diff --git a/trunk/Documentation/kernel-parameters.txt b/trunk/Documentation/kernel-parameters.txt index 2f1820683b69..7936b801fe6a 100644 --- a/trunk/Documentation/kernel-parameters.txt +++ b/trunk/Documentation/kernel-parameters.txt @@ -1115,6 +1115,10 @@ and is between 256 and 4096 characters. It is defined in the file libata.dma=4 Compact Flash DMA only Combinations also work, so libata.dma=3 enables DMA for disks and CDROMs, but not CFs. + + libata.ignore_hpa= [LIBATA] Ignore HPA limit + libata.ignore_hpa=0 keep BIOS limits (default) + libata.ignore_hpa=1 ignore limits, using full disk libata.noacpi [LIBATA] Disables use of ACPI in libata suspend/resume when set. @@ -2391,18 +2395,6 @@ and is between 256 and 4096 characters. It is defined in the file stifb= [HW] Format: bpp:[:[:...]] - sunrpc.min_resvport= - sunrpc.max_resvport= - [NFS,SUNRPC] - SunRPC servers often require that client requests - originate from a privileged port (i.e. a port in the - range 0 < portnr < 1024). - An administrator who wishes to reserve some of these - ports for other uses may adjust the range that the - kernel's sunrpc client considers to be privileged - using these two parameters to set the minimum and - maximum port values. - sunrpc.pool_mode= [NFS] Control how the NFS server code allocates CPUs to @@ -2419,15 +2411,6 @@ and is between 256 and 4096 characters. It is defined in the file pernode one pool for each NUMA node (equivalent to global on non-NUMA machines) - sunrpc.tcp_slot_table_entries= - sunrpc.udp_slot_table_entries= - [NFS,SUNRPC] - Sets the upper limit on the number of simultaneous - RPC calls that can be sent from the client to a - server. Increasing these values may allow you to - improve throughput, but will also increase the - amount of memory reserved for use by the client. - swiotlb= [IA-64] Number of I/O TLB slabs switches= [HW,M68k] diff --git a/trunk/Documentation/lockdep-design.txt b/trunk/Documentation/lockdep-design.txt index e20d913d5914..abf768c681e2 100644 --- a/trunk/Documentation/lockdep-design.txt +++ b/trunk/Documentation/lockdep-design.txt @@ -30,9 +30,9 @@ State The validator tracks lock-class usage history into 4n + 1 separate state bits: - 'ever held in STATE context' -- 'ever head as readlock in STATE context' -- 'ever head with STATE enabled' -- 'ever head as readlock with STATE enabled' +- 'ever held as readlock in STATE context' +- 'ever held with STATE enabled' +- 'ever held as readlock with STATE enabled' Where STATE can be either one of (kernel/lockdep_states.h) - hardirq diff --git a/trunk/Makefile b/trunk/Makefile index 0d46615bffe5..abcfa85f8f82 100644 --- a/trunk/Makefile +++ b/trunk/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 31 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Man-Eating Seals of Antiquity # *DOCUMENTATION* diff --git a/trunk/arch/ia64/Makefile b/trunk/arch/ia64/Makefile index 58a7e46affda..e7cbaa02cd0b 100644 --- a/trunk/arch/ia64/Makefile +++ b/trunk/arch/ia64/Makefile @@ -41,11 +41,6 @@ $(error Sorry, you need a newer version of the assember, one that is built from ftp://ftp.hpl.hp.com/pub/linux-ia64/gas-030124.tar.gz) endif -ifeq ($(call cc-version),0304) - cflags-$(CONFIG_ITANIUM) += -mtune=merced - cflags-$(CONFIG_MCKINLEY) += -mtune=mckinley -endif - KBUILD_CFLAGS += $(cflags-y) head-y := arch/ia64/kernel/head.o arch/ia64/kernel/init_task.o diff --git a/trunk/arch/ia64/include/asm/bitops.h b/trunk/arch/ia64/include/asm/bitops.h index e2ca80037335..57a2787bc9fb 100644 --- a/trunk/arch/ia64/include/asm/bitops.h +++ b/trunk/arch/ia64/include/asm/bitops.h @@ -286,7 +286,7 @@ __test_and_clear_bit(int nr, volatile void * addr) { __u32 *p = (__u32 *) addr + (nr >> 5); __u32 m = 1 << (nr & 31); - int oldbitset = *p & m; + int oldbitset = (*p & m) != 0; *p &= ~m; return oldbitset; diff --git a/trunk/arch/ia64/include/asm/pgtable.h b/trunk/arch/ia64/include/asm/pgtable.h index 0a9cc73d35c7..8840a690d1e7 100644 --- a/trunk/arch/ia64/include/asm/pgtable.h +++ b/trunk/arch/ia64/include/asm/pgtable.h @@ -155,7 +155,6 @@ #include #include #include -#include /* * Next come the mappings that determine how mmap() protection bits diff --git a/trunk/arch/ia64/kernel/ia64_ksyms.c b/trunk/arch/ia64/kernel/ia64_ksyms.c index 2d311864e359..8ebccb589e1c 100644 --- a/trunk/arch/ia64/kernel/ia64_ksyms.c +++ b/trunk/arch/ia64/kernel/ia64_ksyms.c @@ -21,6 +21,7 @@ EXPORT_SYMBOL(csum_ipv6_magic); #include EXPORT_SYMBOL(clear_page); +EXPORT_SYMBOL(copy_page); #ifdef CONFIG_VIRTUAL_MEM_MAP #include @@ -60,9 +61,6 @@ EXPORT_SYMBOL(__udivdi3); EXPORT_SYMBOL(__moddi3); EXPORT_SYMBOL(__umoddi3); -#include -EXPORT_SYMBOL(copy_page); - #if defined(CONFIG_MD_RAID456) || defined(CONFIG_MD_RAID456_MODULE) extern void xor_ia64_2(void); extern void xor_ia64_3(void); diff --git a/trunk/arch/ia64/kernel/iosapic.c b/trunk/arch/ia64/kernel/iosapic.c index c48b03f2b61d..dab4d393908c 100644 --- a/trunk/arch/ia64/kernel/iosapic.c +++ b/trunk/arch/ia64/kernel/iosapic.c @@ -1072,6 +1072,10 @@ iosapic_init (unsigned long phys_addr, unsigned int gsi_base) } addr = ioremap(phys_addr, 0); + if (addr == NULL) { + spin_unlock_irqrestore(&iosapic_lock, flags); + return -ENOMEM; + } ver = iosapic_version(addr); if ((err = iosapic_check_gsi_range(gsi_base, ver))) { iounmap(addr); diff --git a/trunk/arch/ia64/kernel/pci-dma.c b/trunk/arch/ia64/kernel/pci-dma.c index 05695962fe44..f6b1ff0aea76 100644 --- a/trunk/arch/ia64/kernel/pci-dma.c +++ b/trunk/arch/ia64/kernel/pci-dma.c @@ -69,11 +69,6 @@ iommu_dma_init(void) int iommu_dma_supported(struct device *dev, u64 mask) { - struct dma_map_ops *ops = platform_dma_get_ops(dev); - - if (ops->dma_supported) - return ops->dma_supported(dev, mask); - /* Copied from i386. Doesn't make much sense, because it will only work for pci_alloc_coherent. The caller just has to use GFP_DMA in this case. */ diff --git a/trunk/arch/ia64/kernel/topology.c b/trunk/arch/ia64/kernel/topology.c index bc80dff1df7a..8f060352e129 100644 --- a/trunk/arch/ia64/kernel/topology.c +++ b/trunk/arch/ia64/kernel/topology.c @@ -372,6 +372,10 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) retval = kobject_init_and_add(&all_cpu_cache_info[cpu].kobj, &cache_ktype_percpu_entry, &sys_dev->kobj, "%s", "cache"); + if (unlikely(retval < 0)) { + cpu_cache_sysfs_exit(cpu); + return retval; + } for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++) { this_object = LEAF_KOBJECT_PTR(cpu,i); @@ -385,7 +389,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) } kobject_put(&all_cpu_cache_info[cpu].kobj); cpu_cache_sysfs_exit(cpu); - break; + return retval; } kobject_uevent(&(this_object->kobj), KOBJ_ADD); } diff --git a/trunk/arch/ia64/kvm/mmio.c b/trunk/arch/ia64/kvm/mmio.c index 21f63fffc379..9bf55afd08d0 100644 --- a/trunk/arch/ia64/kvm/mmio.c +++ b/trunk/arch/ia64/kvm/mmio.c @@ -247,7 +247,8 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma) vcpu_get_fpreg(vcpu, inst.M9.f2, &v); /* Write high word. FIXME: this is a kludge! */ v.u.bits[1] &= 0x3ffff; - mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE); + mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1], 8, + ma, IOREQ_WRITE); data = v.u.bits[0]; size = 3; } else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) { @@ -265,7 +266,8 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma) /* Write high word.FIXME: this is a kludge! */ v.u.bits[1] &= 0x3ffff; - mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE); + mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1], + 8, ma, IOREQ_WRITE); data = v.u.bits[0]; size = 3; } else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) { diff --git a/trunk/arch/ia64/kvm/vcpu.c b/trunk/arch/ia64/kvm/vcpu.c index 46b02cbcc874..cc406d064a09 100644 --- a/trunk/arch/ia64/kvm/vcpu.c +++ b/trunk/arch/ia64/kvm/vcpu.c @@ -461,7 +461,7 @@ void setreg(unsigned long regnum, unsigned long val, u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg) { struct kvm_pt_regs *regs = vcpu_regs(vcpu); - u64 val; + unsigned long val; if (!reg) return 0; @@ -469,7 +469,7 @@ u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg) return val; } -void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 value, int nat) +void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg, u64 value, int nat) { struct kvm_pt_regs *regs = vcpu_regs(vcpu); long sof = (regs->cr_ifs) & 0x7f; @@ -1072,7 +1072,7 @@ void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst) vcpu_set_gr(vcpu, inst.M46.r1, tag, 0); } -int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, u64 *padr) +int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, unsigned long *padr) { struct thash_data *data; union ia64_isr visr, pt_isr; diff --git a/trunk/arch/ia64/kvm/vcpu.h b/trunk/arch/ia64/kvm/vcpu.h index 042af92ced83..360724d3ae69 100644 --- a/trunk/arch/ia64/kvm/vcpu.h +++ b/trunk/arch/ia64/kvm/vcpu.h @@ -686,14 +686,15 @@ static inline int highest_inservice_irq(struct kvm_vcpu *vcpu) return highest_bits((int *)&(VMX(vcpu, insvc[0]))); } -extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, u64 reg, +extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg, struct ia64_fpreg *val); -extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, u64 reg, +extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg, struct ia64_fpreg *val); -extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, u64 reg); -extern void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 val, int nat); -extern u64 vcpu_get_psr(struct kvm_vcpu *vcpu); -extern void vcpu_set_psr(struct kvm_vcpu *vcpu, u64 val); +extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg); +extern void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg, + u64 val, int nat); +extern unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu); +extern void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val); extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr); extern void vcpu_bsw0(struct kvm_vcpu *vcpu); extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, diff --git a/trunk/arch/mn10300/include/asm/pci.h b/trunk/arch/mn10300/include/asm/pci.h index 35d2ed6396f6..19aecc90f7a4 100644 --- a/trunk/arch/mn10300/include/asm/pci.h +++ b/trunk/arch/mn10300/include/asm/pci.h @@ -59,7 +59,6 @@ void pcibios_penalize_isa_irq(int irq); #include #include #include -#include #include struct pci_dev; diff --git a/trunk/arch/powerpc/include/asm/kvm_host.h b/trunk/arch/powerpc/include/asm/kvm_host.h index dfdf13c9fefd..fddc3ed715fa 100644 --- a/trunk/arch/powerpc/include/asm/kvm_host.h +++ b/trunk/arch/powerpc/include/asm/kvm_host.h @@ -34,7 +34,7 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 /* We don't currently support large pages. */ -#define KVM_PAGES_PER_HPAGE (1<<31) +#define KVM_PAGES_PER_HPAGE (1UL << 31) struct kvm; struct kvm_run; diff --git a/trunk/arch/powerpc/kernel/dma.c b/trunk/arch/powerpc/kernel/dma.c index 20a60d661ba8..ccf129d47d84 100644 --- a/trunk/arch/powerpc/kernel/dma.c +++ b/trunk/arch/powerpc/kernel/dma.c @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -90,11 +91,10 @@ static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sg, static int dma_direct_dma_supported(struct device *dev, u64 mask) { #ifdef CONFIG_PPC64 - /* Could be improved to check for memory though it better be - * done via some global so platforms can set the limit in case + /* Could be improved so platforms can set the limit in case * they have limited DMA windows */ - return mask >= DMA_BIT_MASK(32); + return mask >= (lmb_end_of_DRAM() - 1); #else return 1; #endif diff --git a/trunk/arch/powerpc/kernel/perf_counter.c b/trunk/arch/powerpc/kernel/perf_counter.c index 809fdf94b95f..70e1f57f7dd8 100644 --- a/trunk/arch/powerpc/kernel/perf_counter.c +++ b/trunk/arch/powerpc/kernel/perf_counter.c @@ -518,6 +518,8 @@ void hw_perf_disable(void) struct cpu_hw_counters *cpuhw; unsigned long flags; + if (!ppmu) + return; local_irq_save(flags); cpuhw = &__get_cpu_var(cpu_hw_counters); @@ -572,6 +574,8 @@ void hw_perf_enable(void) int n_lim; int idx; + if (!ppmu) + return; local_irq_save(flags); cpuhw = &__get_cpu_var(cpu_hw_counters); if (!cpuhw->disabled) { @@ -737,6 +741,8 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader, long i, n, n0; struct perf_counter *sub; + if (!ppmu) + return 0; cpuhw = &__get_cpu_var(cpu_hw_counters); n0 = cpuhw->n_counters; n = collect_events(group_leader, ppmu->n_counter - n0, @@ -1281,6 +1287,8 @@ void hw_perf_counter_setup(int cpu) { struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu); + if (!ppmu) + return; memset(cpuhw, 0, sizeof(*cpuhw)); cpuhw->mmcr[0] = MMCR0_FC; } diff --git a/trunk/arch/s390/kvm/interrupt.c b/trunk/arch/s390/kvm/interrupt.c index f04f5301b1b4..4d613415c435 100644 --- a/trunk/arch/s390/kvm/interrupt.c +++ b/trunk/arch/s390/kvm/interrupt.c @@ -386,7 +386,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) } __unset_cpu_idle(vcpu); __set_current_state(TASK_RUNNING); - remove_wait_queue(&vcpu->wq, &wait); + remove_wait_queue(&vcpu->arch.local_int.wq, &wait); spin_unlock_bh(&vcpu->arch.local_int.lock); spin_unlock(&vcpu->arch.local_int.float_int->lock); hrtimer_try_to_cancel(&vcpu->arch.ckc_timer); diff --git a/trunk/arch/sh/boards/board-ap325rxa.c b/trunk/arch/sh/boards/board-ap325rxa.c index 7ffd1b4315bd..b9c88cc519e2 100644 --- a/trunk/arch/sh/boards/board-ap325rxa.c +++ b/trunk/arch/sh/boards/board-ap325rxa.c @@ -547,7 +547,7 @@ static int __init ap325rxa_devices_setup(void) return platform_add_devices(ap325rxa_devices, ARRAY_SIZE(ap325rxa_devices)); } -device_initcall(ap325rxa_devices_setup); +arch_initcall(ap325rxa_devices_setup); /* Return the board specific boot mode pin configuration */ static int ap325rxa_mode_pins(void) diff --git a/trunk/arch/sh/boards/mach-migor/setup.c b/trunk/arch/sh/boards/mach-migor/setup.c index f70f4644deb4..f9b2e4df35b9 100644 --- a/trunk/arch/sh/boards/mach-migor/setup.c +++ b/trunk/arch/sh/boards/mach-migor/setup.c @@ -608,7 +608,7 @@ static int __init migor_devices_setup(void) return platform_add_devices(migor_devices, ARRAY_SIZE(migor_devices)); } -__initcall(migor_devices_setup); +arch_initcall(migor_devices_setup); /* Return the board specific boot mode pin configuration */ static int migor_mode_pins(void) diff --git a/trunk/arch/sh/kernel/cpu/sh2/setup-sh7619.c b/trunk/arch/sh/kernel/cpu/sh2/setup-sh7619.c index 13798733f2db..8555c05e8667 100644 --- a/trunk/arch/sh/kernel/cpu/sh2/setup-sh7619.c +++ b/trunk/arch/sh/kernel/cpu/sh2/setup-sh7619.c @@ -187,7 +187,7 @@ static int __init sh7619_devices_setup(void) return platform_add_devices(sh7619_devices, ARRAY_SIZE(sh7619_devices)); } -__initcall(sh7619_devices_setup); +arch_initcall(sh7619_devices_setup); void __init plat_irq_setup(void) { diff --git a/trunk/arch/sh/kernel/cpu/sh2a/setup-mxg.c b/trunk/arch/sh/kernel/cpu/sh2a/setup-mxg.c index 869c2da4820b..b67376445315 100644 --- a/trunk/arch/sh/kernel/cpu/sh2a/setup-mxg.c +++ b/trunk/arch/sh/kernel/cpu/sh2a/setup-mxg.c @@ -238,7 +238,7 @@ static int __init mxg_devices_setup(void) return platform_add_devices(mxg_devices, ARRAY_SIZE(mxg_devices)); } -__initcall(mxg_devices_setup); +arch_initcall(mxg_devices_setup); void __init plat_irq_setup(void) { diff --git a/trunk/arch/sh/kernel/cpu/sh2a/setup-sh7201.c b/trunk/arch/sh/kernel/cpu/sh2a/setup-sh7201.c index d8febe128066..fbde5b75deb9 100644 --- a/trunk/arch/sh/kernel/cpu/sh2a/setup-sh7201.c +++ b/trunk/arch/sh/kernel/cpu/sh2a/setup-sh7201.c @@ -357,7 +357,7 @@ static int __init sh7201_devices_setup(void) return platform_add_devices(sh7201_devices, ARRAY_SIZE(sh7201_devices)); } -__initcall(sh7201_devices_setup); +arch_initcall(sh7201_devices_setup); void __init plat_irq_setup(void) { diff --git a/trunk/arch/sh/kernel/cpu/sh2a/setup-sh7203.c b/trunk/arch/sh/kernel/cpu/sh2a/setup-sh7203.c index 62e3039d2398..d3fd536c9a84 100644 --- a/trunk/arch/sh/kernel/cpu/sh2a/setup-sh7203.c +++ b/trunk/arch/sh/kernel/cpu/sh2a/setup-sh7203.c @@ -367,7 +367,7 @@ static int __init sh7203_devices_setup(void) return platform_add_devices(sh7203_devices, ARRAY_SIZE(sh7203_devices)); } -__initcall(sh7203_devices_setup); +arch_initcall(sh7203_devices_setup); void __init plat_irq_setup(void) { diff --git a/trunk/arch/sh/kernel/cpu/sh2a/setup-sh7206.c b/trunk/arch/sh/kernel/cpu/sh2a/setup-sh7206.c index 3e6f3d7a58be..a9ccc5e8d9e9 100644 --- a/trunk/arch/sh/kernel/cpu/sh2a/setup-sh7206.c +++ b/trunk/arch/sh/kernel/cpu/sh2a/setup-sh7206.c @@ -338,7 +338,7 @@ static int __init sh7206_devices_setup(void) return platform_add_devices(sh7206_devices, ARRAY_SIZE(sh7206_devices)); } -__initcall(sh7206_devices_setup); +arch_initcall(sh7206_devices_setup); void __init plat_irq_setup(void) { diff --git a/trunk/arch/sh/kernel/cpu/sh3/setup-sh7705.c b/trunk/arch/sh/kernel/cpu/sh3/setup-sh7705.c index 88f742fed9ed..c23105983878 100644 --- a/trunk/arch/sh/kernel/cpu/sh3/setup-sh7705.c +++ b/trunk/arch/sh/kernel/cpu/sh3/setup-sh7705.c @@ -222,7 +222,7 @@ static int __init sh7705_devices_setup(void) return platform_add_devices(sh7705_devices, ARRAY_SIZE(sh7705_devices)); } -__initcall(sh7705_devices_setup); +arch_initcall(sh7705_devices_setup); static struct platform_device *sh7705_early_devices[] __initdata = { &tmu0_device, diff --git a/trunk/arch/sh/kernel/cpu/sh3/setup-sh770x.c b/trunk/arch/sh/kernel/cpu/sh3/setup-sh770x.c index c56306798584..347ab35d0697 100644 --- a/trunk/arch/sh/kernel/cpu/sh3/setup-sh770x.c +++ b/trunk/arch/sh/kernel/cpu/sh3/setup-sh770x.c @@ -250,7 +250,7 @@ static int __init sh770x_devices_setup(void) return platform_add_devices(sh770x_devices, ARRAY_SIZE(sh770x_devices)); } -__initcall(sh770x_devices_setup); +arch_initcall(sh770x_devices_setup); static struct platform_device *sh770x_early_devices[] __initdata = { &tmu0_device, diff --git a/trunk/arch/sh/kernel/cpu/sh3/setup-sh7710.c b/trunk/arch/sh/kernel/cpu/sh3/setup-sh7710.c index efa76c8148f4..717e90ae1097 100644 --- a/trunk/arch/sh/kernel/cpu/sh3/setup-sh7710.c +++ b/trunk/arch/sh/kernel/cpu/sh3/setup-sh7710.c @@ -226,7 +226,7 @@ static int __init sh7710_devices_setup(void) return platform_add_devices(sh7710_devices, ARRAY_SIZE(sh7710_devices)); } -__initcall(sh7710_devices_setup); +arch_initcall(sh7710_devices_setup); static struct platform_device *sh7710_early_devices[] __initdata = { &tmu0_device, diff --git a/trunk/arch/sh/kernel/cpu/sh3/setup-sh7720.c b/trunk/arch/sh/kernel/cpu/sh3/setup-sh7720.c index 5b2107798edb..74d8baaf8e96 100644 --- a/trunk/arch/sh/kernel/cpu/sh3/setup-sh7720.c +++ b/trunk/arch/sh/kernel/cpu/sh3/setup-sh7720.c @@ -388,7 +388,7 @@ static int __init sh7720_devices_setup(void) return platform_add_devices(sh7720_devices, ARRAY_SIZE(sh7720_devices)); } -__initcall(sh7720_devices_setup); +arch_initcall(sh7720_devices_setup); static struct platform_device *sh7720_early_devices[] __initdata = { &cmt0_device, diff --git a/trunk/arch/sh/kernel/cpu/sh4/setup-sh4-202.c b/trunk/arch/sh/kernel/cpu/sh4/setup-sh4-202.c index 6d088d123591..de4827df19aa 100644 --- a/trunk/arch/sh/kernel/cpu/sh4/setup-sh4-202.c +++ b/trunk/arch/sh/kernel/cpu/sh4/setup-sh4-202.c @@ -138,7 +138,7 @@ static int __init sh4202_devices_setup(void) return platform_add_devices(sh4202_devices, ARRAY_SIZE(sh4202_devices)); } -__initcall(sh4202_devices_setup); +arch_initcall(sh4202_devices_setup); static struct platform_device *sh4202_early_devices[] __initdata = { &tmu0_device, diff --git a/trunk/arch/sh/kernel/cpu/sh4/setup-sh7750.c b/trunk/arch/sh/kernel/cpu/sh4/setup-sh7750.c index 851672d15cf4..1b8b122e8f3d 100644 --- a/trunk/arch/sh/kernel/cpu/sh4/setup-sh7750.c +++ b/trunk/arch/sh/kernel/cpu/sh4/setup-sh7750.c @@ -239,7 +239,7 @@ static int __init sh7750_devices_setup(void) return platform_add_devices(sh7750_devices, ARRAY_SIZE(sh7750_devices)); } -__initcall(sh7750_devices_setup); +arch_initcall(sh7750_devices_setup); static struct platform_device *sh7750_early_devices[] __initdata = { &tmu0_device, diff --git a/trunk/arch/sh/kernel/cpu/sh4/setup-sh7760.c b/trunk/arch/sh/kernel/cpu/sh4/setup-sh7760.c index 5b822519bd90..7fbb7be9284c 100644 --- a/trunk/arch/sh/kernel/cpu/sh4/setup-sh7760.c +++ b/trunk/arch/sh/kernel/cpu/sh4/setup-sh7760.c @@ -265,7 +265,7 @@ static int __init sh7760_devices_setup(void) return platform_add_devices(sh7760_devices, ARRAY_SIZE(sh7760_devices)); } -__initcall(sh7760_devices_setup); +arch_initcall(sh7760_devices_setup); static struct platform_device *sh7760_early_devices[] __initdata = { &tmu0_device, diff --git a/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7343.c b/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7343.c index 6307e087c864..ac4d5672ec1a 100644 --- a/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7343.c +++ b/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7343.c @@ -325,7 +325,7 @@ static int __init sh7343_devices_setup(void) return platform_add_devices(sh7343_devices, ARRAY_SIZE(sh7343_devices)); } -__initcall(sh7343_devices_setup); +arch_initcall(sh7343_devices_setup); static struct platform_device *sh7343_early_devices[] __initdata = { &cmt_device, diff --git a/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7366.c b/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7366.c index c18f7d09281b..1a956b1beccc 100644 --- a/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7366.c +++ b/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7366.c @@ -318,7 +318,7 @@ static int __init sh7366_devices_setup(void) return platform_add_devices(sh7366_devices, ARRAY_SIZE(sh7366_devices)); } -__initcall(sh7366_devices_setup); +arch_initcall(sh7366_devices_setup); static struct platform_device *sh7366_early_devices[] __initdata = { &cmt_device, diff --git a/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7722.c b/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7722.c index ea524a2da3e4..cda76ebf87c3 100644 --- a/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7722.c +++ b/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7722.c @@ -359,7 +359,7 @@ static int __init sh7722_devices_setup(void) return platform_add_devices(sh7722_devices, ARRAY_SIZE(sh7722_devices)); } -__initcall(sh7722_devices_setup); +arch_initcall(sh7722_devices_setup); static struct platform_device *sh7722_early_devices[] __initdata = { &cmt_device, diff --git a/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7723.c b/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7723.c index e1bb80b2a27b..b45dace9539f 100644 --- a/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7723.c +++ b/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7723.c @@ -473,7 +473,7 @@ static int __init sh7723_devices_setup(void) return platform_add_devices(sh7723_devices, ARRAY_SIZE(sh7723_devices)); } -__initcall(sh7723_devices_setup); +arch_initcall(sh7723_devices_setup); static struct platform_device *sh7723_early_devices[] __initdata = { &cmt_device, diff --git a/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7724.c index e5ac9eb11c63..a04edaab9a29 100644 --- a/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7724.c +++ b/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7724.c @@ -508,7 +508,7 @@ static int __init sh7724_devices_setup(void) return platform_add_devices(sh7724_devices, ARRAY_SIZE(sh7724_devices)); } -device_initcall(sh7724_devices_setup); +arch_initcall(sh7724_devices_setup); static struct platform_device *sh7724_early_devices[] __initdata = { &cmt_device, diff --git a/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7763.c b/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7763.c index f1e0c0d36da7..4659fff6b842 100644 --- a/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7763.c +++ b/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7763.c @@ -314,7 +314,7 @@ static int __init sh7763_devices_setup(void) return platform_add_devices(sh7763_devices, ARRAY_SIZE(sh7763_devices)); } -__initcall(sh7763_devices_setup); +arch_initcall(sh7763_devices_setup); static struct platform_device *sh7763_early_devices[] __initdata = { &tmu0_device, diff --git a/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7770.c b/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7770.c index 1e86209db284..eead08d89d32 100644 --- a/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7770.c +++ b/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7770.c @@ -368,7 +368,7 @@ static int __init sh7770_devices_setup(void) return platform_add_devices(sh7770_devices, ARRAY_SIZE(sh7770_devices)); } -__initcall(sh7770_devices_setup); +arch_initcall(sh7770_devices_setup); static struct platform_device *sh7770_early_devices[] __initdata = { &tmu0_device, diff --git a/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7780.c b/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7780.c index 715e05b431e5..2c901f446959 100644 --- a/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7780.c +++ b/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7780.c @@ -256,7 +256,7 @@ static int __init sh7780_devices_setup(void) return platform_add_devices(sh7780_devices, ARRAY_SIZE(sh7780_devices)); } -__initcall(sh7780_devices_setup); +arch_initcall(sh7780_devices_setup); static struct platform_device *sh7780_early_devices[] __initdata = { &tmu0_device, diff --git a/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7785.c b/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7785.c index af561402570b..7f6c718b6c36 100644 --- a/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7785.c +++ b/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7785.c @@ -263,7 +263,7 @@ static int __init sh7785_devices_setup(void) return platform_add_devices(sh7785_devices, ARRAY_SIZE(sh7785_devices)); } -__initcall(sh7785_devices_setup); +arch_initcall(sh7785_devices_setup); static struct platform_device *sh7785_early_devices[] __initdata = { &tmu0_device, diff --git a/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7786.c b/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7786.c index b70049470a0b..0104a8ec5369 100644 --- a/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7786.c +++ b/trunk/arch/sh/kernel/cpu/sh4a/setup-sh7786.c @@ -547,7 +547,7 @@ static int __init sh7786_devices_setup(void) return platform_add_devices(sh7786_devices, ARRAY_SIZE(sh7786_devices)); } -device_initcall(sh7786_devices_setup); +arch_initcall(sh7786_devices_setup); void __init plat_early_device_setup(void) { diff --git a/trunk/arch/sh/kernel/cpu/sh4a/setup-shx3.c b/trunk/arch/sh/kernel/cpu/sh4a/setup-shx3.c index 53c65fd9ccef..07f078961c71 100644 --- a/trunk/arch/sh/kernel/cpu/sh4a/setup-shx3.c +++ b/trunk/arch/sh/kernel/cpu/sh4a/setup-shx3.c @@ -256,7 +256,7 @@ static int __init shx3_devices_setup(void) return platform_add_devices(shx3_devices, ARRAY_SIZE(shx3_devices)); } -__initcall(shx3_devices_setup); +arch_initcall(shx3_devices_setup); void __init plat_early_device_setup(void) { diff --git a/trunk/arch/sh/kernel/cpu/sh5/setup-sh5.c b/trunk/arch/sh/kernel/cpu/sh5/setup-sh5.c index f5ff1ac57fc2..6a0f82f70032 100644 --- a/trunk/arch/sh/kernel/cpu/sh5/setup-sh5.c +++ b/trunk/arch/sh/kernel/cpu/sh5/setup-sh5.c @@ -186,7 +186,7 @@ static int __init sh5_devices_setup(void) return platform_add_devices(sh5_devices, ARRAY_SIZE(sh5_devices)); } -__initcall(sh5_devices_setup); +arch_initcall(sh5_devices_setup); void __init plat_early_device_setup(void) { diff --git a/trunk/arch/x86/Kconfig b/trunk/arch/x86/Kconfig index 738bdc6b0f8b..13ffa5df37d7 100644 --- a/trunk/arch/x86/Kconfig +++ b/trunk/arch/x86/Kconfig @@ -24,6 +24,7 @@ config X86 select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_IDE select HAVE_OPROFILE + select HAVE_PERF_COUNTERS if (!M386 && !M486) select HAVE_IOREMAP_PROT select HAVE_KPROBES select ARCH_WANT_OPTIONAL_GPIOLIB @@ -742,7 +743,6 @@ config X86_UP_IOAPIC config X86_LOCAL_APIC def_bool y depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC - select HAVE_PERF_COUNTERS if (!M386 && !M486) config X86_IO_APIC def_bool y diff --git a/trunk/arch/x86/kernel/apic/x2apic_cluster.c b/trunk/arch/x86/kernel/apic/x2apic_cluster.c index 2ed4e2bb3b32..a5371ec36776 100644 --- a/trunk/arch/x86/kernel/apic/x2apic_cluster.c +++ b/trunk/arch/x86/kernel/apic/x2apic_cluster.c @@ -17,11 +17,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return x2apic_enabled(); } -/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ - +/* + * need to use more than cpu 0, because we need more vectors when + * MSI-X are used. + */ static const struct cpumask *x2apic_target_cpus(void) { - return cpumask_of(0); + return cpu_online_mask; } /* diff --git a/trunk/arch/x86/kernel/apic/x2apic_phys.c b/trunk/arch/x86/kernel/apic/x2apic_phys.c index 0b631c6a2e00..a8989aadc99a 100644 --- a/trunk/arch/x86/kernel/apic/x2apic_phys.c +++ b/trunk/arch/x86/kernel/apic/x2apic_phys.c @@ -27,11 +27,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ - +/* + * need to use more than cpu 0, because we need more vectors when + * MSI-X are used. + */ static const struct cpumask *x2apic_target_cpus(void) { - return cpumask_of(0); + return cpu_online_mask; } static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) diff --git a/trunk/arch/x86/kernel/cpu/amd.c b/trunk/arch/x86/kernel/cpu/amd.c index e2485b03f1cf..63fddcd082cd 100644 --- a/trunk/arch/x86/kernel/cpu/amd.c +++ b/trunk/arch/x86/kernel/cpu/amd.c @@ -400,6 +400,13 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) level = cpuid_eax(1); if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) set_cpu_cap(c, X86_FEATURE_REP_GOOD); + + /* + * Some BIOSes incorrectly force this feature, but only K8 + * revision D (model = 0x14) and later actually support it. + */ + if (c->x86_model < 0x14) + clear_cpu_cap(c, X86_FEATURE_LAHF_LM); } if (c->x86 == 0x10 || c->x86 == 0x11) set_cpu_cap(c, X86_FEATURE_REP_GOOD); diff --git a/trunk/arch/x86/kernel/cpu/common.c b/trunk/arch/x86/kernel/cpu/common.c index f1961c07af9a..5ce60a88027b 100644 --- a/trunk/arch/x86/kernel/cpu/common.c +++ b/trunk/arch/x86/kernel/cpu/common.c @@ -59,7 +59,30 @@ void __init setup_cpu_local_masks(void) alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); } -static const struct cpu_dev *this_cpu __cpuinitdata; +static void __cpuinit default_init(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_X86_64 + display_cacheinfo(c); +#else + /* Not much we can do here... */ + /* Check if at least it has cpuid */ + if (c->cpuid_level == -1) { + /* No cpuid. It must be an ancient CPU */ + if (c->x86 == 4) + strcpy(c->x86_model_id, "486"); + else if (c->x86 == 3) + strcpy(c->x86_model_id, "386"); + } +#endif +} + +static const struct cpu_dev __cpuinitconst default_cpu = { + .c_init = default_init, + .c_vendor = "Unknown", + .c_x86_vendor = X86_VENDOR_UNKNOWN, +}; + +static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { #ifdef CONFIG_X86_64 @@ -332,29 +355,6 @@ void switch_to_new_gdt(int cpu) static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {}; -static void __cpuinit default_init(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_64 - display_cacheinfo(c); -#else - /* Not much we can do here... */ - /* Check if at least it has cpuid */ - if (c->cpuid_level == -1) { - /* No cpuid. It must be an ancient CPU */ - if (c->x86 == 4) - strcpy(c->x86_model_id, "486"); - else if (c->x86 == 3) - strcpy(c->x86_model_id, "386"); - } -#endif -} - -static const struct cpu_dev __cpuinitconst default_cpu = { - .c_init = default_init, - .c_vendor = "Unknown", - .c_x86_vendor = X86_VENDOR_UNKNOWN, -}; - static void __cpuinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; diff --git a/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c b/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c index bff8dd191dd5..8bc64cfbe936 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -36,6 +36,7 @@ static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); +static DEFINE_PER_CPU(bool, thermal_throttle_active); static atomic_t therm_throt_en = ATOMIC_INIT(0); @@ -96,24 +97,27 @@ static int therm_throt_process(int curr) { unsigned int cpu = smp_processor_id(); __u64 tmp_jiffs = get_jiffies_64(); + bool was_throttled = __get_cpu_var(thermal_throttle_active); + bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr; - if (curr) + if (is_throttled) __get_cpu_var(thermal_throttle_count)++; - if (time_before64(tmp_jiffs, __get_cpu_var(next_check))) + if (!(was_throttled ^ is_throttled) && + time_before64(tmp_jiffs, __get_cpu_var(next_check))) return 0; __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; /* if we just entered the thermal event */ - if (curr) { + if (is_throttled) { printk(KERN_CRIT "CPU%d: Temperature above threshold, " - "cpu clock throttled (total events = %lu)\n", cpu, - __get_cpu_var(thermal_throttle_count)); + "cpu clock throttled (total events = %lu)\n", + cpu, __get_cpu_var(thermal_throttle_count)); add_taint(TAINT_MACHINE_CHECK); - } else { - printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu); + } else if (was_throttled) { + printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu); } return 1; diff --git a/trunk/arch/x86/kernel/cpu/perf_counter.c b/trunk/arch/x86/kernel/cpu/perf_counter.c index a7aa8f900954..900332b800f8 100644 --- a/trunk/arch/x86/kernel/cpu/perf_counter.c +++ b/trunk/arch/x86/kernel/cpu/perf_counter.c @@ -55,6 +55,7 @@ struct x86_pmu { int num_counters_fixed; int counter_bits; u64 counter_mask; + int apic; u64 max_period; u64 intel_ctrl; }; @@ -72,8 +73,8 @@ static const u64 p6_perfmon_event_map[] = { [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0000, - [PERF_COUNT_HW_CACHE_MISSES] = 0x0000, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, + [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, @@ -613,6 +614,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex); static bool reserve_pmc_hardware(void) { +#ifdef CONFIG_X86_LOCAL_APIC int i; if (nmi_watchdog == NMI_LOCAL_APIC) @@ -627,9 +629,11 @@ static bool reserve_pmc_hardware(void) if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) goto eventsel_fail; } +#endif return true; +#ifdef CONFIG_X86_LOCAL_APIC eventsel_fail: for (i--; i >= 0; i--) release_evntsel_nmi(x86_pmu.eventsel + i); @@ -644,10 +648,12 @@ static bool reserve_pmc_hardware(void) enable_lapic_nmi_watchdog(); return false; +#endif } static void release_pmc_hardware(void) { +#ifdef CONFIG_X86_LOCAL_APIC int i; for (i = 0; i < x86_pmu.num_counters; i++) { @@ -657,6 +663,7 @@ static void release_pmc_hardware(void) if (nmi_watchdog == NMI_LOCAL_APIC) enable_lapic_nmi_watchdog(); +#endif } static void hw_perf_counter_destroy(struct perf_counter *counter) @@ -748,6 +755,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter) hwc->sample_period = x86_pmu.max_period; hwc->last_period = hwc->sample_period; atomic64_set(&hwc->period_left, hwc->sample_period); + } else { + /* + * If we have a PMU initialized but no APIC + * interrupts, we cannot sample hardware + * counters (user-space has to fall back and + * sample via a hrtimer based software counter): + */ + if (!x86_pmu.apic) + return -EOPNOTSUPP; } counter->destroy = hw_perf_counter_destroy; @@ -1449,18 +1465,22 @@ void smp_perf_pending_interrupt(struct pt_regs *regs) void set_perf_counter_pending(void) { +#ifdef CONFIG_X86_LOCAL_APIC apic->send_IPI_self(LOCAL_PENDING_VECTOR); +#endif } void perf_counters_lapic_init(void) { - if (!x86_pmu_initialized()) +#ifdef CONFIG_X86_LOCAL_APIC + if (!x86_pmu.apic || !x86_pmu_initialized()) return; /* * Always use NMI for PMU */ apic_write(APIC_LVTPC, APIC_DM_NMI); +#endif } static int __kprobes @@ -1484,7 +1504,9 @@ perf_counter_nmi_handler(struct notifier_block *self, regs = args->regs; +#ifdef CONFIG_X86_LOCAL_APIC apic_write(APIC_LVTPC, APIC_DM_NMI); +#endif /* * Can't rely on the handled return value to say it was our NMI, two * counters could trigger 'simultaneously' raising two back-to-back NMIs. @@ -1515,6 +1537,7 @@ static struct x86_pmu p6_pmu = { .event_map = p6_pmu_event_map, .raw_event = p6_pmu_raw_event, .max_events = ARRAY_SIZE(p6_perfmon_event_map), + .apic = 1, .max_period = (1ULL << 31) - 1, .version = 0, .num_counters = 2, @@ -1541,6 +1564,7 @@ static struct x86_pmu intel_pmu = { .event_map = intel_pmu_event_map, .raw_event = intel_pmu_raw_event, .max_events = ARRAY_SIZE(intel_perfmon_event_map), + .apic = 1, /* * Intel PMCs cannot be accessed sanely above 32 bit width, * so we install an artificial 1<<31 period regardless of @@ -1564,6 +1588,7 @@ static struct x86_pmu amd_pmu = { .num_counters = 4, .counter_bits = 48, .counter_mask = (1ULL << 48) - 1, + .apic = 1, /* use highest bit to detect overflow */ .max_period = (1ULL << 47) - 1, }; @@ -1589,13 +1614,14 @@ static int p6_pmu_init(void) return -ENODEV; } + x86_pmu = p6_pmu; + if (!cpu_has_apic) { - pr_info("no Local APIC, try rebooting with lapic"); - return -ENODEV; + pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); + pr_info("no hardware sampling interrupt available.\n"); + x86_pmu.apic = 0; } - x86_pmu = p6_pmu; - return 0; } diff --git a/trunk/arch/x86/kernel/efi.c b/trunk/arch/x86/kernel/efi.c index 19ccf6d0dccf..fe26ba3e3451 100644 --- a/trunk/arch/x86/kernel/efi.c +++ b/trunk/arch/x86/kernel/efi.c @@ -354,7 +354,7 @@ void __init efi_init(void) */ c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2); if (c16) { - for (i = 0; i < sizeof(vendor) && *c16; ++i) + for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i) vendor[i] = *c16++; vendor[i] = '\0'; } else diff --git a/trunk/arch/x86/kernel/reboot.c b/trunk/arch/x86/kernel/reboot.c index 834c9da8bf9d..a06e8d101844 100644 --- a/trunk/arch/x86/kernel/reboot.c +++ b/trunk/arch/x86/kernel/reboot.c @@ -405,7 +405,7 @@ EXPORT_SYMBOL(machine_real_restart); #endif /* CONFIG_X86_32 */ /* - * Apple MacBook5,2 (2009 MacBook) needs reboot=p + * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot */ static int __init set_pci_reboot(const struct dmi_system_id *d) { @@ -418,12 +418,20 @@ static int __init set_pci_reboot(const struct dmi_system_id *d) } static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { - { /* Handle problems with rebooting on Apple MacBook5,2 */ + { /* Handle problems with rebooting on Apple MacBook5 */ .callback = set_pci_reboot, - .ident = "Apple MacBook", + .ident = "Apple MacBook5", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5,2"), + DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"), + }, + }, + { /* Handle problems with rebooting on Apple MacBookPro5 */ + .callback = set_pci_reboot, + .ident = "Apple MacBookPro5", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"), }, }, { } diff --git a/trunk/arch/x86/kernel/tsc.c b/trunk/arch/x86/kernel/tsc.c index 6e1a368d21d4..71f4368b357e 100644 --- a/trunk/arch/x86/kernel/tsc.c +++ b/trunk/arch/x86/kernel/tsc.c @@ -275,15 +275,20 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) * use the TSC value at the transitions to calculate a pretty * good value for the TSC frequencty. */ +static inline int pit_verify_msb(unsigned char val) +{ + /* Ignore LSB */ + inb(0x42); + return inb(0x42) == val; +} + static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap) { int count; u64 tsc = 0; for (count = 0; count < 50000; count++) { - /* Ignore LSB */ - inb(0x42); - if (inb(0x42) != val) + if (!pit_verify_msb(val)) break; tsc = get_cycles(); } @@ -336,8 +341,7 @@ static unsigned long quick_pit_calibrate(void) * to do that is to just read back the 16-bit counter * once from the PIT. */ - inb(0x42); - inb(0x42); + pit_verify_msb(0); if (pit_expect_msb(0xff, &tsc, &d1)) { for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) { @@ -348,8 +352,19 @@ static unsigned long quick_pit_calibrate(void) * Iterate until the error is less than 500 ppm */ delta -= tsc; - if (d1+d2 < delta >> 11) - goto success; + if (d1+d2 >= delta >> 11) + continue; + + /* + * Check the PIT one more time to verify that + * all TSC reads were stable wrt the PIT. + * + * This also guarantees serialization of the + * last cycle read ('d2') in pit_expect_msb. + */ + if (!pit_verify_msb(0xfe - i)) + break; + goto success; } } printk("Fast TSC calibration failed\n"); diff --git a/trunk/arch/x86/kernel/vmi_32.c b/trunk/arch/x86/kernel/vmi_32.c index b263423fbe2a..95a7289e4b0c 100644 --- a/trunk/arch/x86/kernel/vmi_32.c +++ b/trunk/arch/x86/kernel/vmi_32.c @@ -441,7 +441,7 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, ap.ds = __USER_DS; ap.es = __USER_DS; ap.fs = __KERNEL_PERCPU; - ap.gs = 0; + ap.gs = __KERNEL_STACK_CANARY; ap.eflags = 0; diff --git a/trunk/arch/x86/kvm/i8254.c b/trunk/arch/x86/kvm/i8254.c index 4d6f0d293ee2..21f68e00524f 100644 --- a/trunk/arch/x86/kvm/i8254.c +++ b/trunk/arch/x86/kvm/i8254.c @@ -104,6 +104,9 @@ static s64 __kpit_elapsed(struct kvm *kvm) ktime_t remaining; struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; + if (!ps->pit_timer.period) + return 0; + /* * The Counter does not stop when it reaches zero. In * Modes 0, 1, 4, and 5 the Counter ``wraps around'' to diff --git a/trunk/arch/x86/kvm/mmu.c b/trunk/arch/x86/kvm/mmu.c index 7030b5f911bf..0ef5bb2b4043 100644 --- a/trunk/arch/x86/kvm/mmu.c +++ b/trunk/arch/x86/kvm/mmu.c @@ -489,16 +489,20 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int lpage) * * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc * containing more mappings. + * + * Returns the number of rmap entries before the spte was added or zero if + * the spte was not added. + * */ -static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) +static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) { struct kvm_mmu_page *sp; struct kvm_rmap_desc *desc; unsigned long *rmapp; - int i; + int i, count = 0; if (!is_rmap_pte(*spte)) - return; + return count; gfn = unalias_gfn(vcpu->kvm, gfn); sp = page_header(__pa(spte)); sp->gfns[spte - sp->spt] = gfn; @@ -515,8 +519,10 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) } else { rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); - while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) + while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) { desc = desc->more; + count += RMAP_EXT; + } if (desc->shadow_ptes[RMAP_EXT-1]) { desc->more = mmu_alloc_rmap_desc(vcpu); desc = desc->more; @@ -525,6 +531,7 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) ; desc->shadow_ptes[i] = spte; } + return count; } static void rmap_desc_remove_entry(unsigned long *rmapp, @@ -754,6 +761,19 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) return young; } +#define RMAP_RECYCLE_THRESHOLD 1000 + +static void rmap_recycle(struct kvm_vcpu *vcpu, gfn_t gfn, int lpage) +{ + unsigned long *rmapp; + + gfn = unalias_gfn(vcpu->kvm, gfn); + rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage); + + kvm_unmap_rmapp(vcpu->kvm, rmapp); + kvm_flush_remote_tlbs(vcpu->kvm); +} + int kvm_age_hva(struct kvm *kvm, unsigned long hva) { return kvm_handle_hva(kvm, hva, kvm_age_rmapp); @@ -1407,24 +1427,25 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) */ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) { + int used_pages; + + used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages; + used_pages = max(0, used_pages); + /* * If we set the number of mmu pages to be smaller be than the * number of actived pages , we must to free some mmu pages before we * change the value */ - if ((kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages) > - kvm_nr_mmu_pages) { - int n_used_mmu_pages = kvm->arch.n_alloc_mmu_pages - - kvm->arch.n_free_mmu_pages; - - while (n_used_mmu_pages > kvm_nr_mmu_pages) { + if (used_pages > kvm_nr_mmu_pages) { + while (used_pages > kvm_nr_mmu_pages) { struct kvm_mmu_page *page; page = container_of(kvm->arch.active_mmu_pages.prev, struct kvm_mmu_page, link); kvm_mmu_zap_page(kvm, page); - n_used_mmu_pages--; + used_pages--; } kvm->arch.n_free_mmu_pages = 0; } @@ -1740,6 +1761,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, { int was_rmapped = 0; int was_writeble = is_writeble_pte(*shadow_pte); + int rmap_count; pgprintk("%s: spte %llx access %x write_fault %d" " user_fault %d gfn %lx\n", @@ -1781,9 +1803,11 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, page_header_update_slot(vcpu->kvm, shadow_pte, gfn); if (!was_rmapped) { - rmap_add(vcpu, shadow_pte, gfn, largepage); + rmap_count = rmap_add(vcpu, shadow_pte, gfn, largepage); if (!is_rmap_pte(*shadow_pte)) kvm_release_pfn_clean(pfn); + if (rmap_count > RMAP_RECYCLE_THRESHOLD) + rmap_recycle(vcpu, gfn, largepage); } else { if (was_writeble) kvm_release_pfn_dirty(pfn); diff --git a/trunk/arch/x86/kvm/svm.c b/trunk/arch/x86/kvm/svm.c index 71510e07e69e..b1f658ad2f06 100644 --- a/trunk/arch/x86/kvm/svm.c +++ b/trunk/arch/x86/kvm/svm.c @@ -711,6 +711,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) svm->vmcb->control.tsc_offset += delta; vcpu->cpu = cpu; kvm_migrate_timers(vcpu); + svm->asid_generation = 0; } for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) @@ -1031,7 +1032,6 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data) svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; } - svm->vcpu.cpu = svm_data->cpu; svm->asid_generation = svm_data->asid_generation; svm->vmcb->control.asid = svm_data->next_asid++; } @@ -2300,8 +2300,8 @@ static void pre_svm_run(struct vcpu_svm *svm) struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; - if (svm->vcpu.cpu != cpu || - svm->asid_generation != svm_data->asid_generation) + /* FIXME: handle wraparound of asid_generation */ + if (svm->asid_generation != svm_data->asid_generation) new_asid(svm, svm_data); } diff --git a/trunk/arch/x86/kvm/vmx.c b/trunk/arch/x86/kvm/vmx.c index 356a0ce85c68..29f912927a58 100644 --- a/trunk/arch/x86/kvm/vmx.c +++ b/trunk/arch/x86/kvm/vmx.c @@ -3157,8 +3157,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx = to_vmx(vcpu); enum emulation_result err = EMULATE_DONE; - preempt_enable(); local_irq_enable(); + preempt_enable(); while (!guest_state_valid(vcpu)) { err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); @@ -3168,7 +3168,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, if (err != EMULATE_DONE) { kvm_report_emulation_failure(vcpu, "emulation failure"); - return; + break; } if (signal_pending(current)) @@ -3177,8 +3177,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, schedule(); } - local_irq_disable(); preempt_disable(); + local_irq_disable(); vmx->invalid_state_emulation_result = err; } diff --git a/trunk/arch/x86/kvm/x86.c b/trunk/arch/x86/kvm/x86.c index fe5474aec41a..3d4529011828 100644 --- a/trunk/arch/x86/kvm/x86.c +++ b/trunk/arch/x86/kvm/x86.c @@ -704,11 +704,48 @@ static bool msr_mtrr_valid(unsigned msr) return false; } +static bool valid_pat_type(unsigned t) +{ + return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */ +} + +static bool valid_mtrr_type(unsigned t) +{ + return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */ +} + +static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) +{ + int i; + + if (!msr_mtrr_valid(msr)) + return false; + + if (msr == MSR_IA32_CR_PAT) { + for (i = 0; i < 8; i++) + if (!valid_pat_type((data >> (i * 8)) & 0xff)) + return false; + return true; + } else if (msr == MSR_MTRRdefType) { + if (data & ~0xcff) + return false; + return valid_mtrr_type(data & 0xff); + } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) { + for (i = 0; i < 8 ; i++) + if (!valid_mtrr_type((data >> (i * 8)) & 0xff)) + return false; + return true; + } + + /* variable MTRRs */ + return valid_mtrr_type(data & 0xff); +} + static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) { u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; - if (!msr_mtrr_valid(msr)) + if (!mtrr_valid(vcpu, msr, data)) return 1; if (msr == MSR_MTRRdefType) { @@ -1079,14 +1116,13 @@ long kvm_arch_dev_ioctl(struct file *filp, if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list)) goto out; r = -E2BIG; - if (n < num_msrs_to_save) + if (n < msr_list.nmsrs) goto out; r = -EFAULT; if (copy_to_user(user_msr_list->indices, &msrs_to_save, num_msrs_to_save * sizeof(u32))) goto out; - if (copy_to_user(user_msr_list->indices - + num_msrs_to_save * sizeof(u32), + if (copy_to_user(user_msr_list->indices + num_msrs_to_save, &emulated_msrs, ARRAY_SIZE(emulated_msrs) * sizeof(u32))) goto out; diff --git a/trunk/drivers/ata/ahci.c b/trunk/drivers/ata/ahci.c index 958c1fa41900..fe3eba5d6b3e 100644 --- a/trunk/drivers/ata/ahci.c +++ b/trunk/drivers/ata/ahci.c @@ -219,6 +219,8 @@ enum { AHCI_HFLAG_SECT255 = (1 << 8), /* max 255 sectors */ AHCI_HFLAG_YES_NCQ = (1 << 9), /* force NCQ cap on */ AHCI_HFLAG_NO_SUSPEND = (1 << 10), /* don't suspend */ + AHCI_HFLAG_SRST_TOUT_IS_OFFLINE = (1 << 11), /* treat SRST timeout as + link offline */ /* ap->flags bits */ @@ -1663,6 +1665,7 @@ static int ahci_do_softreset(struct ata_link *link, unsigned int *class, int (*check_ready)(struct ata_link *link)) { struct ata_port *ap = link->ap; + struct ahci_host_priv *hpriv = ap->host->private_data; const char *reason = NULL; unsigned long now, msecs; struct ata_taskfile tf; @@ -1701,12 +1704,21 @@ static int ahci_do_softreset(struct ata_link *link, unsigned int *class, /* wait for link to become ready */ rc = ata_wait_after_reset(link, deadline, check_ready); - /* link occupied, -ENODEV too is an error */ - if (rc) { + if (rc == -EBUSY && hpriv->flags & AHCI_HFLAG_SRST_TOUT_IS_OFFLINE) { + /* + * Workaround for cases where link online status can't + * be trusted. Treat device readiness timeout as link + * offline. + */ + ata_link_printk(link, KERN_INFO, + "device not ready, treating as offline\n"); + *class = ATA_DEV_NONE; + } else if (rc) { + /* link occupied, -ENODEV too is an error */ reason = "device not ready"; goto fail; - } - *class = ahci_dev_classify(ap); + } else + *class = ahci_dev_classify(ap); DPRINTK("EXIT, class=%u\n", *class); return 0; @@ -1773,7 +1785,8 @@ static int ahci_sb600_softreset(struct ata_link *link, unsigned int *class, irq_sts = readl(port_mmio + PORT_IRQ_STAT); if (irq_sts & PORT_IRQ_BAD_PMP) { ata_link_printk(link, KERN_WARNING, - "failed due to HW bug, retry pmp=0\n"); + "applying SB600 PMP SRST workaround " + "and retrying\n"); rc = ahci_do_softreset(link, class, 0, deadline, ahci_check_ready); } @@ -2726,6 +2739,56 @@ static bool ahci_broken_suspend(struct pci_dev *pdev) return !ver || strcmp(ver, dmi->driver_data) < 0; } +static bool ahci_broken_online(struct pci_dev *pdev) +{ +#define ENCODE_BUSDEVFN(bus, slot, func) \ + (void *)(unsigned long)(((bus) << 8) | PCI_DEVFN((slot), (func))) + static const struct dmi_system_id sysids[] = { + /* + * There are several gigabyte boards which use + * SIMG5723s configured as hardware RAID. Certain + * 5723 firmware revisions shipped there keep the link + * online but fail to answer properly to SRST or + * IDENTIFY when no device is attached downstream + * causing libata to retry quite a few times leading + * to excessive detection delay. + * + * As these firmwares respond to the second reset try + * with invalid device signature, considering unknown + * sig as offline works around the problem acceptably. + */ + { + .ident = "EP45-DQ6", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, + "Gigabyte Technology Co., Ltd."), + DMI_MATCH(DMI_BOARD_NAME, "EP45-DQ6"), + }, + .driver_data = ENCODE_BUSDEVFN(0x0a, 0x00, 0), + }, + { + .ident = "EP45-DS5", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, + "Gigabyte Technology Co., Ltd."), + DMI_MATCH(DMI_BOARD_NAME, "EP45-DS5"), + }, + .driver_data = ENCODE_BUSDEVFN(0x03, 0x00, 0), + }, + { } /* terminate list */ + }; +#undef ENCODE_BUSDEVFN + const struct dmi_system_id *dmi = dmi_first_match(sysids); + unsigned int val; + + if (!dmi) + return false; + + val = (unsigned long)dmi->driver_data; + + return pdev->bus->number == (val >> 8) && pdev->devfn == (val & 0xff); +} + static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { static int printed_version; @@ -2841,6 +2904,12 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) "BIOS update required for suspend/resume\n"); } + if (ahci_broken_online(pdev)) { + hpriv->flags |= AHCI_HFLAG_SRST_TOUT_IS_OFFLINE; + dev_info(&pdev->dev, + "online status unreliable, applying workaround\n"); + } + /* CAP.NP sometimes indicate the index of the last enabled * port, at other times, that of the last possible port, so * determining the maximum port number requires looking at diff --git a/trunk/drivers/ata/libata-core.c b/trunk/drivers/ata/libata-core.c index 8ac98ff16d7d..072ba5ea138f 100644 --- a/trunk/drivers/ata/libata-core.c +++ b/trunk/drivers/ata/libata-core.c @@ -4302,6 +4302,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "WDC WD2500JD-00HBB0", "WD-WMAL71490727", ATA_HORKAGE_BROKEN_HPA }, { "MAXTOR 6L080L4", "A93.0500", ATA_HORKAGE_BROKEN_HPA }, + /* this one allows HPA unlocking but fails IOs on the area */ + { "OCZ-VERTEX", "1.30", ATA_HORKAGE_BROKEN_HPA }, + /* Devices which report 1 sector over size HPA */ { "ST340823A", NULL, ATA_HORKAGE_HPA_SIZE, }, { "ST320413A", NULL, ATA_HORKAGE_HPA_SIZE, }, diff --git a/trunk/drivers/ata/pata_at91.c b/trunk/drivers/ata/pata_at91.c index 5702affcb325..41c94b1ae493 100644 --- a/trunk/drivers/ata/pata_at91.c +++ b/trunk/drivers/ata/pata_at91.c @@ -250,7 +250,7 @@ static int __devinit pata_at91_probe(struct platform_device *pdev) ata_port_desc(ap, "no IRQ, using PIO polling"); } - info = kzalloc(sizeof(*info), GFP_KERNEL); + info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL); if (!info) { dev_err(dev, "failed to allocate memory for private data\n"); @@ -275,7 +275,7 @@ static int __devinit pata_at91_probe(struct platform_device *pdev) if (!info->ide_addr) { dev_err(dev, "failed to map IO base\n"); ret = -ENOMEM; - goto err_ide_ioremap; + goto err_put; } info->alt_addr = devm_ioremap(dev, @@ -284,7 +284,7 @@ static int __devinit pata_at91_probe(struct platform_device *pdev) if (!info->alt_addr) { dev_err(dev, "failed to map CTL base\n"); ret = -ENOMEM; - goto err_alt_ioremap; + goto err_put; } ap->ioaddr.cmd_addr = info->ide_addr; @@ -303,13 +303,8 @@ static int __devinit pata_at91_probe(struct platform_device *pdev) irq ? ata_sff_interrupt : NULL, irq_flags, &pata_at91_sht); -err_alt_ioremap: - devm_iounmap(dev, info->ide_addr); - -err_ide_ioremap: +err_put: clk_put(info->mck); - kfree(info); - return ret; } @@ -317,7 +312,6 @@ static int __devexit pata_at91_remove(struct platform_device *pdev) { struct ata_host *host = dev_get_drvdata(&pdev->dev); struct at91_ide_info *info; - struct device *dev = &pdev->dev; if (!host) return 0; @@ -328,11 +322,8 @@ static int __devexit pata_at91_remove(struct platform_device *pdev) if (!info) return 0; - devm_iounmap(dev, info->ide_addr); - devm_iounmap(dev, info->alt_addr); clk_put(info->mck); - kfree(info); return 0; } diff --git a/trunk/drivers/ata/pata_atiixp.c b/trunk/drivers/ata/pata_atiixp.c index bec0b8ade66d..45915566e4e9 100644 --- a/trunk/drivers/ata/pata_atiixp.c +++ b/trunk/drivers/ata/pata_atiixp.c @@ -1,6 +1,7 @@ /* * pata_atiixp.c - ATI PATA for new ATA layer * (C) 2005 Red Hat Inc + * (C) 2009 Bartlomiej Zolnierkiewicz * * Based on * @@ -61,20 +62,19 @@ static void atiixp_set_pio_timing(struct ata_port *ap, struct ata_device *adev, struct pci_dev *pdev = to_pci_dev(ap->host->dev); int dn = 2 * ap->port_no + adev->devno; - - /* Check this is correct - the order is odd in both drivers */ int timing_shift = (16 * ap->port_no) + 8 * (adev->devno ^ 1); - u16 pio_mode_data, pio_timing_data; + u32 pio_timing_data; + u16 pio_mode_data; pci_read_config_word(pdev, ATIIXP_IDE_PIO_MODE, &pio_mode_data); pio_mode_data &= ~(0x7 << (4 * dn)); pio_mode_data |= pio << (4 * dn); pci_write_config_word(pdev, ATIIXP_IDE_PIO_MODE, pio_mode_data); - pci_read_config_word(pdev, ATIIXP_IDE_PIO_TIMING, &pio_timing_data); + pci_read_config_dword(pdev, ATIIXP_IDE_PIO_TIMING, &pio_timing_data); pio_timing_data &= ~(0xFF << timing_shift); pio_timing_data |= (pio_timings[pio] << timing_shift); - pci_write_config_word(pdev, ATIIXP_IDE_PIO_TIMING, pio_timing_data); + pci_write_config_dword(pdev, ATIIXP_IDE_PIO_TIMING, pio_timing_data); } /** @@ -119,16 +119,17 @@ static void atiixp_set_dmamode(struct ata_port *ap, struct ata_device *adev) udma_mode_data |= dma << (4 * dn); pci_write_config_word(pdev, ATIIXP_IDE_UDMA_MODE, udma_mode_data); } else { - u16 mwdma_timing_data; - /* Check this is correct - the order is odd in both drivers */ int timing_shift = (16 * ap->port_no) + 8 * (adev->devno ^ 1); + u32 mwdma_timing_data; dma -= XFER_MW_DMA_0; - pci_read_config_word(pdev, ATIIXP_IDE_MWDMA_TIMING, &mwdma_timing_data); + pci_read_config_dword(pdev, ATIIXP_IDE_MWDMA_TIMING, + &mwdma_timing_data); mwdma_timing_data &= ~(0xFF << timing_shift); mwdma_timing_data |= (mwdma_timings[dma] << timing_shift); - pci_write_config_word(pdev, ATIIXP_IDE_MWDMA_TIMING, mwdma_timing_data); + pci_write_config_dword(pdev, ATIIXP_IDE_MWDMA_TIMING, + mwdma_timing_data); } /* * We must now look at the PIO mode situation. We may need to diff --git a/trunk/drivers/ata/sata_nv.c b/trunk/drivers/ata/sata_nv.c index b2d11f300c39..86a40582999c 100644 --- a/trunk/drivers/ata/sata_nv.c +++ b/trunk/drivers/ata/sata_nv.c @@ -602,6 +602,7 @@ MODULE_VERSION(DRV_VERSION); static int adma_enabled; static int swncq_enabled = 1; +static int msi_enabled; static void nv_adma_register_mode(struct ata_port *ap) { @@ -2459,6 +2460,11 @@ static int nv_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } else if (type == SWNCQ) nv_swncq_host_init(host); + if (msi_enabled) { + dev_printk(KERN_NOTICE, &pdev->dev, "Using MSI\n"); + pci_enable_msi(pdev); + } + pci_set_master(pdev); return ata_host_activate(host, pdev->irq, ipriv->irq_handler, IRQF_SHARED, ipriv->sht); @@ -2558,4 +2564,6 @@ module_param_named(adma, adma_enabled, bool, 0444); MODULE_PARM_DESC(adma, "Enable use of ADMA (Default: false)"); module_param_named(swncq, swncq_enabled, bool, 0444); MODULE_PARM_DESC(swncq, "Enable use of SWNCQ (Default: true)"); +module_param_named(msi, msi_enabled, bool, 0444); +MODULE_PARM_DESC(msi, "Enable use of MSI (Default: false)"); diff --git a/trunk/drivers/base/platform.c b/trunk/drivers/base/platform.c index 81cb01bfc356..456594bd97bc 100644 --- a/trunk/drivers/base/platform.c +++ b/trunk/drivers/base/platform.c @@ -483,9 +483,6 @@ int platform_driver_register(struct platform_driver *drv) drv->driver.remove = platform_drv_remove; if (drv->shutdown) drv->driver.shutdown = platform_drv_shutdown; - if (drv->suspend || drv->resume) - pr_warning("Platform driver '%s' needs updating - please use " - "dev_pm_ops\n", drv->driver.name); return driver_register(&drv->driver); } diff --git a/trunk/drivers/char/pty.c b/trunk/drivers/char/pty.c index 6e6942c45f5b..d083c73d784a 100644 --- a/trunk/drivers/char/pty.c +++ b/trunk/drivers/char/pty.c @@ -144,6 +144,8 @@ static int pty_write(struct tty_struct *tty, const unsigned char *buf, static int pty_write_room(struct tty_struct *tty) { + if (tty->stopped) + return 0; return pty_space(tty->link); } diff --git a/trunk/drivers/gpu/drm/drm_irq.c b/trunk/drivers/gpu/drm/drm_irq.c index b4a3dbcebe9b..f85aaf21e783 100644 --- a/trunk/drivers/gpu/drm/drm_irq.c +++ b/trunk/drivers/gpu/drm/drm_irq.c @@ -566,7 +566,7 @@ int drm_wait_vblank(struct drm_device *dev, void *data, ret = drm_vblank_get(dev, crtc); if (ret) { - DRM_ERROR("failed to acquire vblank counter, %d\n", ret); + DRM_DEBUG("failed to acquire vblank counter, %d\n", ret); return ret; } seq = drm_vblank_count(dev, crtc); diff --git a/trunk/drivers/gpu/drm/drm_modes.c b/trunk/drivers/gpu/drm/drm_modes.c index 54f492a488a9..7914097b09c6 100644 --- a/trunk/drivers/gpu/drm/drm_modes.c +++ b/trunk/drivers/gpu/drm/drm_modes.c @@ -566,6 +566,8 @@ void drm_mode_connector_list_update(struct drm_connector *connector) found_it = 1; /* if equal delete the probed mode */ mode->status = pmode->status; + /* Merge type bits together */ + mode->type |= pmode->type; list_del(&pmode->head); drm_mode_destroy(connector->dev, pmode); break; diff --git a/trunk/drivers/gpu/drm/i915/i915_irq.c b/trunk/drivers/gpu/drm/i915/i915_irq.c index 83aee80e77a6..7ebc84c2881e 100644 --- a/trunk/drivers/gpu/drm/i915/i915_irq.c +++ b/trunk/drivers/gpu/drm/i915/i915_irq.c @@ -190,7 +190,7 @@ u32 i915_get_vblank_counter(struct drm_device *dev, int pipe) low_frame = pipe ? PIPEBFRAMEPIXEL : PIPEAFRAMEPIXEL; if (!i915_pipe_enabled(dev, pipe)) { - DRM_ERROR("trying to get vblank count for disabled pipe %d\n", pipe); + DRM_DEBUG("trying to get vblank count for disabled pipe %d\n", pipe); return 0; } @@ -219,7 +219,7 @@ u32 gm45_get_vblank_counter(struct drm_device *dev, int pipe) int reg = pipe ? PIPEB_FRMCOUNT_GM45 : PIPEA_FRMCOUNT_GM45; if (!i915_pipe_enabled(dev, pipe)) { - DRM_ERROR("trying to get vblank count for disabled pipe %d\n", pipe); + DRM_DEBUG("trying to get vblank count for disabled pipe %d\n", pipe); return 0; } diff --git a/trunk/drivers/md/md.c b/trunk/drivers/md/md.c index 5b98bea4ff9b..103f2d33fa89 100644 --- a/trunk/drivers/md/md.c +++ b/trunk/drivers/md/md.c @@ -359,6 +359,7 @@ static mddev_t * mddev_find(dev_t unit) else new->md_minor = MINOR(unit) >> MdpMinorShift; + mutex_init(&new->open_mutex); mutex_init(&new->reconfig_mutex); INIT_LIST_HEAD(&new->disks); INIT_LIST_HEAD(&new->all_mddevs); @@ -1974,17 +1975,14 @@ static void md_update_sb(mddev_t * mddev, int force_change) /* otherwise we have to go forward and ... */ mddev->events ++; if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ - /* .. if the array isn't clean, insist on an odd 'events' */ - if ((mddev->events&1)==0) { - mddev->events++; + /* .. if the array isn't clean, an 'even' event must also go + * to spares. */ + if ((mddev->events&1)==0) nospares = 0; - } } else { - /* otherwise insist on an even 'events' (for clean states) */ - if ((mddev->events&1)) { - mddev->events++; + /* otherwise an 'odd' event must go to spares */ + if ((mddev->events&1)) nospares = 0; - } } } @@ -3601,6 +3599,7 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len) if (max < mddev->resync_min) return -EINVAL; if (max < mddev->resync_max && + mddev->ro == 0 && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) return -EBUSY; @@ -4304,12 +4303,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) struct gendisk *disk = mddev->gendisk; mdk_rdev_t *rdev; + mutex_lock(&mddev->open_mutex); if (atomic_read(&mddev->openers) > is_open) { printk("md: %s still in use.\n",mdname(mddev)); - return -EBUSY; - } - - if (mddev->pers) { + err = -EBUSY; + } else if (mddev->pers) { if (mddev->sync_thread) { set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); @@ -4367,7 +4365,10 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) set_disk_ro(disk, 1); clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); } - +out: + mutex_unlock(&mddev->open_mutex); + if (err) + return err; /* * Free resources if final stop */ @@ -4433,7 +4434,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) blk_integrity_unregister(disk); md_new_event(mddev); sysfs_notify_dirent(mddev->sysfs_state); -out: return err; } @@ -5518,12 +5518,12 @@ static int md_open(struct block_device *bdev, fmode_t mode) } BUG_ON(mddev != bdev->bd_disk->private_data); - if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1))) + if ((err = mutex_lock_interruptible(&mddev->open_mutex))) goto out; err = 0; atomic_inc(&mddev->openers); - mddev_unlock(mddev); + mutex_unlock(&mddev->open_mutex); check_disk_change(bdev); out: diff --git a/trunk/drivers/md/md.h b/trunk/drivers/md/md.h index 78f03168baf9..f8fc188bc762 100644 --- a/trunk/drivers/md/md.h +++ b/trunk/drivers/md/md.h @@ -223,6 +223,16 @@ struct mddev_s * so we don't loop trying */ int in_sync; /* know to not need resync */ + /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so + * that we are never stopping an array while it is open. + * 'reconfig_mutex' protects all other reconfiguration. + * These locks are separate due to conflicting interactions + * with bdev->bd_mutex. + * Lock ordering is: + * reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk + * bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open + */ + struct mutex open_mutex; struct mutex reconfig_mutex; atomic_t active; /* general refcount */ atomic_t openers; /* number of active opens */ diff --git a/trunk/drivers/md/raid5.c b/trunk/drivers/md/raid5.c index 2b521ee67dfa..b8a2c5dc67ba 100644 --- a/trunk/drivers/md/raid5.c +++ b/trunk/drivers/md/raid5.c @@ -3785,7 +3785,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped conf->reshape_progress < raid5_size(mddev, 0, 0)) { sector_nr = raid5_size(mddev, 0, 0) - conf->reshape_progress; - } else if (mddev->delta_disks > 0 && + } else if (mddev->delta_disks >= 0 && conf->reshape_progress > 0) sector_nr = conf->reshape_progress; sector_div(sector_nr, new_data_disks); @@ -4509,7 +4509,26 @@ static int run(mddev_t *mddev) (old_disks-max_degraded)); /* here_old is the first stripe that we might need to read * from */ - if (here_new >= here_old) { + if (mddev->delta_disks == 0) { + /* We cannot be sure it is safe to start an in-place + * reshape. It is only safe if user-space if monitoring + * and taking constant backups. + * mdadm always starts a situation like this in + * readonly mode so it can take control before + * allowing any writes. So just check for that. + */ + if ((here_new * mddev->new_chunk_sectors != + here_old * mddev->chunk_sectors) || + mddev->ro == 0) { + printk(KERN_ERR "raid5: in-place reshape must be started" + " in read-only mode - aborting\n"); + return -EINVAL; + } + } else if (mddev->delta_disks < 0 + ? (here_new * mddev->new_chunk_sectors <= + here_old * mddev->chunk_sectors) + : (here_new * mddev->new_chunk_sectors >= + here_old * mddev->chunk_sectors)) { /* Reading from the same stripe as writing to - bad */ printk(KERN_ERR "raid5: reshape_position too early for " "auto-recovery - aborting.\n"); @@ -5078,8 +5097,15 @@ static void raid5_finish_reshape(mddev_t *mddev) mddev->degraded--; for (d = conf->raid_disks ; d < conf->raid_disks - mddev->delta_disks; - d++) - raid5_remove_disk(mddev, d); + d++) { + mdk_rdev_t *rdev = conf->disks[d].rdev; + if (rdev && raid5_remove_disk(mddev, d) == 0) { + char nm[20]; + sprintf(nm, "rd%d", rdev->raid_disk); + sysfs_remove_link(&mddev->kobj, nm); + rdev->raid_disk = -1; + } + } } mddev->layout = conf->algorithm; mddev->chunk_sectors = conf->chunk_sectors; diff --git a/trunk/drivers/mtd/maps/sbc8240.c b/trunk/drivers/mtd/maps/sbc8240.c deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/trunk/drivers/mtd/ubi/eba.c b/trunk/drivers/mtd/ubi/eba.c index 0f2034c3ed2f..e4d9ef0c965a 100644 --- a/trunk/drivers/mtd/ubi/eba.c +++ b/trunk/drivers/mtd/ubi/eba.c @@ -1254,6 +1254,7 @@ int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) if (!ubi->volumes[i]) continue; kfree(ubi->volumes[i]->eba_tbl); + ubi->volumes[i]->eba_tbl = NULL; } return err; } diff --git a/trunk/drivers/mtd/ubi/scan.c b/trunk/drivers/mtd/ubi/scan.c index a423131b6171..b847745394b4 100644 --- a/trunk/drivers/mtd/ubi/scan.c +++ b/trunk/drivers/mtd/ubi/scan.c @@ -781,11 +781,22 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, return -EINVAL; } + /* + * Make sure that all PEBs have the same image sequence number. + * This allows us to detect situations when users flash UBI + * images incorrectly, so that the flash has the new UBI image + * and leftovers from the old one. This feature was added + * relatively recently, and the sequence number was always + * zero, because old UBI implementations always set it to zero. + * For this reasons, we do not panic if some PEBs have zero + * sequence number, while other PEBs have non-zero sequence + * number. + */ image_seq = be32_to_cpu(ech->image_seq); if (!si->image_seq_set) { ubi->image_seq = image_seq; si->image_seq_set = 1; - } else if (ubi->image_seq != image_seq) { + } else if (ubi->image_seq && ubi->image_seq != image_seq) { ubi_err("bad image sequence number %d in PEB %d, " "expected %d", image_seq, pnum, ubi->image_seq); ubi_dbg_dump_ec_hdr(ech); diff --git a/trunk/drivers/pci/hotplug/sgi_hotplug.c b/trunk/drivers/pci/hotplug/sgi_hotplug.c index a4494d78e7c2..8aebe1e9d3d6 100644 --- a/trunk/drivers/pci/hotplug/sgi_hotplug.c +++ b/trunk/drivers/pci/hotplug/sgi_hotplug.c @@ -90,11 +90,10 @@ static struct hotplug_slot_ops sn_hotplug_slot_ops = { static DEFINE_MUTEX(sn_hotplug_mutex); -static ssize_t path_show (struct hotplug_slot *bss_hotplug_slot, - char *buf) +static ssize_t path_show(struct pci_slot *pci_slot, char *buf) { int retval = -ENOENT; - struct slot *slot = bss_hotplug_slot->private; + struct slot *slot = pci_slot->hotplug->private; if (!slot) return retval; @@ -103,7 +102,7 @@ static ssize_t path_show (struct hotplug_slot *bss_hotplug_slot, return retval; } -static struct hotplug_slot_attribute sn_slot_path_attr = __ATTR_RO(path); +static struct pci_slot_attribute sn_slot_path_attr = __ATTR_RO(path); static int sn_pci_slot_valid(struct pci_bus *pci_bus, int device) { diff --git a/trunk/fs/lockd/host.c b/trunk/fs/lockd/host.c index 7cb076ac6b45..99d737bd4325 100644 --- a/trunk/fs/lockd/host.c +++ b/trunk/fs/lockd/host.c @@ -87,6 +87,18 @@ static unsigned int nlm_hash_address(const struct sockaddr *sap) return hash & (NLM_HOST_NRHASH - 1); } +static void nlm_clear_port(struct sockaddr *sap) +{ + switch (sap->sa_family) { + case AF_INET: + ((struct sockaddr_in *)sap)->sin_port = 0; + break; + case AF_INET6: + ((struct sockaddr_in6 *)sap)->sin6_port = 0; + break; + } +} + /* * Common host lookup routine for server & client */ @@ -165,7 +177,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni) host->h_addrbuf = nsm->sm_addrbuf; memcpy(nlm_addr(host), ni->sap, ni->salen); host->h_addrlen = ni->salen; - rpc_set_port(nlm_addr(host), 0); + nlm_clear_port(nlm_addr(host)); memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len); host->h_version = ni->version; host->h_proto = ni->protocol; diff --git a/trunk/fs/lockd/mon.c b/trunk/fs/lockd/mon.c index 30c933188dd7..7fce1b525849 100644 --- a/trunk/fs/lockd/mon.c +++ b/trunk/fs/lockd/mon.c @@ -61,6 +61,43 @@ static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm) return (struct sockaddr *)&nsm->sm_addr; } +static void nsm_display_ipv4_address(const struct sockaddr *sap, char *buf, + const size_t len) +{ + const struct sockaddr_in *sin = (struct sockaddr_in *)sap; + snprintf(buf, len, "%pI4", &sin->sin_addr.s_addr); +} + +static void nsm_display_ipv6_address(const struct sockaddr *sap, char *buf, + const size_t len) +{ + const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + + if (ipv6_addr_v4mapped(&sin6->sin6_addr)) + snprintf(buf, len, "%pI4", &sin6->sin6_addr.s6_addr32[3]); + else if (sin6->sin6_scope_id != 0) + snprintf(buf, len, "%pI6%%%u", &sin6->sin6_addr, + sin6->sin6_scope_id); + else + snprintf(buf, len, "%pI6", &sin6->sin6_addr); +} + +static void nsm_display_address(const struct sockaddr *sap, + char *buf, const size_t len) +{ + switch (sap->sa_family) { + case AF_INET: + nsm_display_ipv4_address(sap, buf, len); + break; + case AF_INET6: + nsm_display_ipv6_address(sap, buf, len); + break; + default: + snprintf(buf, len, "unsupported address family"); + break; + } +} + static struct rpc_clnt *nsm_create(void) { struct sockaddr_in sin = { @@ -270,11 +307,8 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap, memcpy(nsm_addr(new), sap, salen); new->sm_addrlen = salen; nsm_init_private(new); - - if (rpc_ntop(nsm_addr(new), new->sm_addrbuf, - sizeof(new->sm_addrbuf)) == 0) - (void)snprintf(new->sm_addrbuf, sizeof(new->sm_addrbuf), - "unsupported address family"); + nsm_display_address((const struct sockaddr *)&new->sm_addr, + new->sm_addrbuf, sizeof(new->sm_addrbuf)); memcpy(new->sm_name, hostname, hostname_len); new->sm_name[hostname_len] = '\0'; diff --git a/trunk/fs/nfs/callback.c b/trunk/fs/nfs/callback.c index 293fa0528a6e..7f604c7941fb 100644 --- a/trunk/fs/nfs/callback.c +++ b/trunk/fs/nfs/callback.c @@ -43,29 +43,21 @@ static struct svc_program nfs4_callback_program; unsigned int nfs_callback_set_tcpport; unsigned short nfs_callback_tcpport; unsigned short nfs_callback_tcpport6; -#define NFS_CALLBACK_MAXPORTNR (65535U) +static const int nfs_set_port_min = 0; +static const int nfs_set_port_max = 65535; -static int param_set_portnr(const char *val, struct kernel_param *kp) +static int param_set_port(const char *val, struct kernel_param *kp) { - unsigned long num; - int ret; - - if (!val) - return -EINVAL; - ret = strict_strtoul(val, 0, &num); - if (ret == -EINVAL || num > NFS_CALLBACK_MAXPORTNR) + char *endp; + int num = simple_strtol(val, &endp, 0); + if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max) return -EINVAL; - *((unsigned int *)kp->arg) = num; + *((int *)kp->arg) = num; return 0; } -static int param_get_portnr(char *buffer, struct kernel_param *kp) -{ - return param_get_uint(buffer, kp); -} -#define param_check_portnr(name, p) __param_check(name, p, unsigned int); - -module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); +module_param_call(callback_tcpport, param_set_port, param_get_int, + &nfs_callback_set_tcpport, 0644); /* * This is the NFSv4 callback kernel thread. diff --git a/trunk/fs/nfs/client.c b/trunk/fs/nfs/client.c index d36925f9b952..8d25ccb2d51d 100644 --- a/trunk/fs/nfs/client.c +++ b/trunk/fs/nfs/client.c @@ -809,9 +809,6 @@ static int nfs_init_server(struct nfs_server *server, /* Initialise the client representation from the mount data */ server->flags = data->flags; server->options = data->options; - server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID| - NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP| - NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME; if (data->rsize) server->rsize = nfs_block_size(data->rsize, NULL); @@ -1077,6 +1074,10 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, (unsigned long long) server->fsid.major, (unsigned long long) server->fsid.minor); + BUG_ON(!server->nfs_client); + BUG_ON(!server->nfs_client->rpc_ops); + BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + spin_lock(&nfs_client_lock); list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); list_add_tail(&server->master_link, &nfs_volume_list); @@ -1273,7 +1274,7 @@ static int nfs4_init_server(struct nfs_server *server, /* Initialise the client representation from the mount data */ server->flags = data->flags; - server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR; + server->caps |= NFS_CAP_ATOMIC_OPEN; server->options = data->options; /* Get a client record */ @@ -1358,6 +1359,10 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) server->namelen = NFS4_MAXNAMLEN; + BUG_ON(!server->nfs_client); + BUG_ON(!server->nfs_client->rpc_ops); + BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + spin_lock(&nfs_client_lock); list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); list_add_tail(&server->master_link, &nfs_volume_list); @@ -1395,7 +1400,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, /* Initialise the client representation from the parent server */ nfs_server_copy_userdata(server, parent_server); - server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR; + server->caps |= NFS_CAP_ATOMIC_OPEN; /* Get a client representation. * Note: NFSv4 always uses TCP, */ diff --git a/trunk/fs/nfs/direct.c b/trunk/fs/nfs/direct.c index 489fc01a3204..e4e089a8f294 100644 --- a/trunk/fs/nfs/direct.c +++ b/trunk/fs/nfs/direct.c @@ -255,7 +255,7 @@ static void nfs_direct_read_release(void *calldata) if (put_dreq(dreq)) nfs_direct_complete(dreq); - nfs_readdata_release(calldata); + nfs_readdata_free(data); } static const struct rpc_call_ops nfs_read_direct_ops = { @@ -314,14 +314,14 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, data->npages, 1, 0, data->pagevec, NULL); up_read(¤t->mm->mmap_sem); if (result < 0) { - nfs_readdata_release(data); + nfs_readdata_free(data); break; } if ((unsigned)result < data->npages) { bytes = result * PAGE_SIZE; if (bytes <= pgbase) { nfs_direct_release_pages(data->pagevec, result); - nfs_readdata_release(data); + nfs_readdata_free(data); break; } bytes -= pgbase; @@ -334,7 +334,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, data->inode = inode; data->cred = msg.rpc_cred; data->args.fh = NFS_FH(inode); - data->args.context = get_nfs_open_context(ctx); + data->args.context = ctx; data->args.offset = pos; data->args.pgbase = pgbase; data->args.pages = data->pagevec; @@ -441,7 +441,7 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages); list_del(&data->pages); nfs_direct_release_pages(data->pagevec, data->npages); - nfs_writedata_release(data); + nfs_writedata_free(data); } } @@ -534,7 +534,7 @@ static void nfs_direct_commit_release(void *calldata) dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status); nfs_direct_write_complete(dreq, data->inode); - nfs_commitdata_release(calldata); + nfs_commit_free(data); } static const struct rpc_call_ops nfs_commit_direct_ops = { @@ -570,7 +570,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) data->args.fh = NFS_FH(data->inode); data->args.offset = 0; data->args.count = 0; - data->args.context = get_nfs_open_context(dreq->ctx); + data->args.context = dreq->ctx; data->res.count = 0; data->res.fattr = &data->fattr; data->res.verf = &data->verf; @@ -734,14 +734,14 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, data->npages, 0, 0, data->pagevec, NULL); up_read(¤t->mm->mmap_sem); if (result < 0) { - nfs_writedata_release(data); + nfs_writedata_free(data); break; } if ((unsigned)result < data->npages) { bytes = result * PAGE_SIZE; if (bytes <= pgbase) { nfs_direct_release_pages(data->pagevec, result); - nfs_writedata_release(data); + nfs_writedata_free(data); break; } bytes -= pgbase; @@ -756,7 +756,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, data->inode = inode; data->cred = msg.rpc_cred; data->args.fh = NFS_FH(inode); - data->args.context = get_nfs_open_context(ctx); + data->args.context = ctx; data->args.offset = pos; data->args.pgbase = pgbase; data->args.pages = data->pagevec; diff --git a/trunk/fs/nfs/file.c b/trunk/fs/nfs/file.c index 5021b75d2d1e..05062329b678 100644 --- a/trunk/fs/nfs/file.c +++ b/trunk/fs/nfs/file.c @@ -327,42 +327,6 @@ nfs_file_fsync(struct file *file, struct dentry *dentry, int datasync) return nfs_do_fsync(ctx, inode); } -/* - * Decide whether a read/modify/write cycle may be more efficient - * then a modify/write/read cycle when writing to a page in the - * page cache. - * - * The modify/write/read cycle may occur if a page is read before - * being completely filled by the writer. In this situation, the - * page must be completely written to stable storage on the server - * before it can be refilled by reading in the page from the server. - * This can lead to expensive, small, FILE_SYNC mode writes being - * done. - * - * It may be more efficient to read the page first if the file is - * open for reading in addition to writing, the page is not marked - * as Uptodate, it is not dirty or waiting to be committed, - * indicating that it was previously allocated and then modified, - * that there were valid bytes of data in that range of the file, - * and that the new data won't completely replace the old data in - * that range of the file. - */ -static int nfs_want_read_modify_write(struct file *file, struct page *page, - loff_t pos, unsigned len) -{ - unsigned int pglen = nfs_page_length(page); - unsigned int offset = pos & (PAGE_CACHE_SIZE - 1); - unsigned int end = offset + len; - - if ((file->f_mode & FMODE_READ) && /* open for read? */ - !PageUptodate(page) && /* Uptodate? */ - !PagePrivate(page) && /* i/o request already? */ - pglen && /* valid bytes of file? */ - (end < pglen || offset)) /* replace all valid bytes? */ - return 1; - return 0; -} - /* * This does the "real" work of the write. We must allocate and lock the * page to be sent back to the generic routine, which then copies the @@ -376,16 +340,15 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata) { int ret; - pgoff_t index = pos >> PAGE_CACHE_SHIFT; + pgoff_t index; struct page *page; - int once_thru = 0; + index = pos >> PAGE_CACHE_SHIFT; dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n", file->f_path.dentry->d_parent->d_name.name, file->f_path.dentry->d_name.name, mapping->host->i_ino, len, (long long) pos); -start: /* * Prevent starvation issues if someone is doing a consistency * sync-to-disk @@ -404,13 +367,6 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, if (ret) { unlock_page(page); page_cache_release(page); - } else if (!once_thru && - nfs_want_read_modify_write(file, page, pos, len)) { - once_thru = 1; - ret = nfs_readpage(file, page); - page_cache_release(page); - if (!ret) - goto start; } return ret; } @@ -523,7 +479,6 @@ const struct address_space_operations nfs_file_aops = { .invalidatepage = nfs_invalidate_page, .releasepage = nfs_release_page, .direct_IO = nfs_direct_IO, - .migratepage = nfs_migrate_page, .launder_page = nfs_launder_page, }; diff --git a/trunk/fs/nfs/idmap.c b/trunk/fs/nfs/idmap.c index 21a84d45916f..86147b0ab2cf 100644 --- a/trunk/fs/nfs/idmap.c +++ b/trunk/fs/nfs/idmap.c @@ -101,7 +101,7 @@ static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); static unsigned int fnvhash32(const void *, size_t); -static const struct rpc_pipe_ops idmap_upcall_ops = { +static struct rpc_pipe_ops idmap_upcall_ops = { .upcall = idmap_pipe_upcall, .downcall = idmap_pipe_downcall, .destroy_msg = idmap_pipe_destroy_msg, @@ -119,8 +119,8 @@ nfs_idmap_new(struct nfs_client *clp) if (idmap == NULL) return -ENOMEM; - idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_path.dentry, - "idmap", idmap, &idmap_upcall_ops, 0); + idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry, "idmap", + idmap, &idmap_upcall_ops, 0); if (IS_ERR(idmap->idmap_dentry)) { error = PTR_ERR(idmap->idmap_dentry); kfree(idmap); diff --git a/trunk/fs/nfs/inode.c b/trunk/fs/nfs/inode.c index fe5a8b45d867..bd7938eda6a8 100644 --- a/trunk/fs/nfs/inode.c +++ b/trunk/fs/nfs/inode.c @@ -286,11 +286,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) /* We can't support update_atime(), since the server will reset it */ inode->i_flags |= S_NOATIME|S_NOCMTIME; inode->i_mode = fattr->mode; - if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 - && nfs_server_capable(inode, NFS_CAP_MODE)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL; /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. */ @@ -335,46 +330,20 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) nfsi->attr_gencount = fattr->gencount; if (fattr->valid & NFS_ATTR_FATTR_ATIME) inode->i_atime = fattr->atime; - else if (nfs_server_capable(inode, NFS_CAP_ATIME)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (fattr->valid & NFS_ATTR_FATTR_MTIME) inode->i_mtime = fattr->mtime; - else if (nfs_server_capable(inode, NFS_CAP_MTIME)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_DATA; if (fattr->valid & NFS_ATTR_FATTR_CTIME) inode->i_ctime = fattr->ctime; - else if (nfs_server_capable(inode, NFS_CAP_CTIME)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL; if (fattr->valid & NFS_ATTR_FATTR_CHANGE) nfsi->change_attr = fattr->change_attr; - else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_DATA; if (fattr->valid & NFS_ATTR_FATTR_SIZE) inode->i_size = nfs_size_to_loff_t(fattr->size); - else - nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_DATA - | NFS_INO_REVAL_PAGECACHE; if (fattr->valid & NFS_ATTR_FATTR_NLINK) inode->i_nlink = fattr->nlink; - else if (nfs_server_capable(inode, NFS_CAP_NLINK)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (fattr->valid & NFS_ATTR_FATTR_OWNER) inode->i_uid = fattr->uid; - else if (nfs_server_capable(inode, NFS_CAP_OWNER)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL; if (fattr->valid & NFS_ATTR_FATTR_GROUP) inode->i_gid = fattr->gid; - else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL; if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) inode->i_blocks = fattr->du.nfs2.blocks; if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { @@ -1176,7 +1145,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) loff_t cur_isize, new_isize; unsigned long invalid = 0; unsigned long now = jiffies; - unsigned long save_cache_validity; dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", __func__, inode->i_sb->s_id, inode->i_ino, @@ -1203,11 +1171,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) */ nfsi->read_cache_jiffies = fattr->time_start; - save_cache_validity = nfsi->cache_validity; - nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ATIME - | NFS_INO_REVAL_FORCED - | NFS_INO_REVAL_PAGECACHE); + if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) || (fattr->valid & (NFS_ATTR_FATTR_MTIME|NFS_ATTR_FATTR_CTIME))) + nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ATIME + | NFS_INO_REVAL_PAGECACHE); /* Do atomic weak cache consistency updates */ nfs_wcc_update_inode(inode, fattr); @@ -1222,8 +1189,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_force_lookup_revalidate(inode); nfsi->change_attr = fattr->change_attr; } - } else if (server->caps & NFS_CAP_CHANGE_ATTR) - invalid |= save_cache_validity; + } if (fattr->valid & NFS_ATTR_FATTR_MTIME) { /* NFSv2/v3: Check if the mtime agrees */ @@ -1235,12 +1201,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_force_lookup_revalidate(inode); memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); } - } else if (server->caps & NFS_CAP_MTIME) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_DATA - | NFS_INO_REVAL_PAGECACHE - | NFS_INO_REVAL_FORCED); - + } if (fattr->valid & NFS_ATTR_FATTR_CTIME) { /* If ctime has changed we should definitely clear access+acl caches */ if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { @@ -1254,11 +1215,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) } memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); } - } else if (server->caps & NFS_CAP_CTIME) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_REVAL_FORCED); + } /* Check if our cached file size is stale */ if (fattr->valid & NFS_ATTR_FATTR_SIZE) { @@ -1274,50 +1231,30 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) dprintk("NFS: isize change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); } - } else - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR - | NFS_INO_REVAL_PAGECACHE - | NFS_INO_REVAL_FORCED); + } if (fattr->valid & NFS_ATTR_FATTR_ATIME) memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); - else if (server->caps & NFS_CAP_ATIME) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATIME - | NFS_INO_REVAL_FORCED); if (fattr->valid & NFS_ATTR_FATTR_MODE) { if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) { invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_mode = fattr->mode; } - } else if (server->caps & NFS_CAP_MODE) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_REVAL_FORCED); - + } if (fattr->valid & NFS_ATTR_FATTR_OWNER) { if (inode->i_uid != fattr->uid) { invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_uid = fattr->uid; } - } else if (server->caps & NFS_CAP_OWNER) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_REVAL_FORCED); - + } if (fattr->valid & NFS_ATTR_FATTR_GROUP) { if (inode->i_gid != fattr->gid) { invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_gid = fattr->gid; } - } else if (server->caps & NFS_CAP_OWNER_GROUP) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_REVAL_FORCED); + } if (fattr->valid & NFS_ATTR_FATTR_NLINK) { if (inode->i_nlink != fattr->nlink) { @@ -1326,9 +1263,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) invalid |= NFS_INO_INVALID_DATA; inode->i_nlink = fattr->nlink; } - } else if (server->caps & NFS_CAP_NLINK) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR - | NFS_INO_REVAL_FORCED); + } if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { /* @@ -1358,8 +1293,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) || S_ISLNK(inode->i_mode))) invalid &= ~NFS_INO_INVALID_DATA; if (!nfs_have_delegation(inode, FMODE_READ) || - (save_cache_validity & NFS_INO_REVAL_FORCED)) + (nfsi->cache_validity & NFS_INO_REVAL_FORCED)) nfsi->cache_validity |= invalid; + nfsi->cache_validity &= ~NFS_INO_REVAL_FORCED; return 0; out_changed: diff --git a/trunk/fs/nfs/internal.h b/trunk/fs/nfs/internal.h index 2e485677019c..7dd90a6769d0 100644 --- a/trunk/fs/nfs/internal.h +++ b/trunk/fs/nfs/internal.h @@ -102,7 +102,6 @@ struct nfs_mount_request { }; extern int nfs_mount(struct nfs_mount_request *info); -extern void nfs_umount(const struct nfs_mount_request *info); /* client.c */ extern struct rpc_program nfs_program; @@ -214,6 +213,7 @@ void nfs_zap_acl_cache(struct inode *inode); extern int nfs_wait_bit_killable(void *word); /* super.c */ +void nfs_parse_ip_address(char *, size_t, struct sockaddr *, size_t *); extern struct file_system_type nfs_xdev_fs_type; #ifdef CONFIG_NFS_V4 extern struct file_system_type nfs4_xdev_fs_type; @@ -248,12 +248,6 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata); /* write.c */ extern void nfs_write_prepare(struct rpc_task *task, void *calldata); -#ifdef CONFIG_MIGRATION -extern int nfs_migrate_page(struct address_space *, - struct page *, struct page *); -#else -#define nfs_migrate_page NULL -#endif /* nfs4proc.c */ extern int _nfs4_call_sync(struct nfs_server *server, @@ -374,3 +368,24 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len) return ((unsigned long)len + (unsigned long)base + PAGE_SIZE - 1) >> PAGE_SHIFT; } + +#define IPV6_SCOPE_DELIMITER '%' + +/* + * Set the port number in an address. Be agnostic about the address + * family. + */ +static inline void nfs_set_port(struct sockaddr *sap, unsigned short port) +{ + struct sockaddr_in *ap = (struct sockaddr_in *)sap; + struct sockaddr_in6 *ap6 = (struct sockaddr_in6 *)sap; + + switch (sap->sa_family) { + case AF_INET: + ap->sin_port = htons(port); + break; + case AF_INET6: + ap6->sin6_port = htons(port); + break; + } +} diff --git a/trunk/fs/nfs/mount_clnt.c b/trunk/fs/nfs/mount_clnt.c index 0adefc40cc89..38ef9eaec407 100644 --- a/trunk/fs/nfs/mount_clnt.c +++ b/trunk/fs/nfs/mount_clnt.c @@ -209,71 +209,6 @@ int nfs_mount(struct nfs_mount_request *info) goto out; } -/** - * nfs_umount - Notify a server that we have unmounted this export - * @info: pointer to umount request arguments - * - * MOUNTPROC_UMNT is advisory, so we set a short timeout, and always - * use UDP. - */ -void nfs_umount(const struct nfs_mount_request *info) -{ - static const struct rpc_timeout nfs_umnt_timeout = { - .to_initval = 1 * HZ, - .to_maxval = 3 * HZ, - .to_retries = 2, - }; - struct rpc_create_args args = { - .protocol = IPPROTO_UDP, - .address = info->sap, - .addrsize = info->salen, - .timeout = &nfs_umnt_timeout, - .servername = info->hostname, - .program = &mnt_program, - .version = info->version, - .authflavor = RPC_AUTH_UNIX, - .flags = RPC_CLNT_CREATE_NOPING, - }; - struct mountres result; - struct rpc_message msg = { - .rpc_argp = info->dirpath, - .rpc_resp = &result, - }; - struct rpc_clnt *clnt; - int status; - - if (info->noresvport) - args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; - - clnt = rpc_create(&args); - if (unlikely(IS_ERR(clnt))) - goto out_clnt_err; - - dprintk("NFS: sending UMNT request for %s:%s\n", - (info->hostname ? info->hostname : "server"), info->dirpath); - - if (info->version == NFS_MNT3_VERSION) - msg.rpc_proc = &clnt->cl_procinfo[MOUNTPROC3_UMNT]; - else - msg.rpc_proc = &clnt->cl_procinfo[MOUNTPROC_UMNT]; - - status = rpc_call_sync(clnt, &msg, 0); - rpc_shutdown_client(clnt); - - if (unlikely(status < 0)) - goto out_call_err; - - return; - -out_clnt_err: - dprintk("NFS: failed to create UMNT RPC client, status=%ld\n", - PTR_ERR(clnt)); - return; - -out_call_err: - dprintk("NFS: UMNT request failed, status=%d\n", status); -} - /* * XDR encode/decode functions for MOUNT */ @@ -323,7 +258,7 @@ static int decode_status(struct xdr_stream *xdr, struct mountres *res) return -EIO; status = ntohl(*p); - for (i = 0; i < ARRAY_SIZE(mnt_errtbl); i++) { + for (i = 0; i <= ARRAY_SIZE(mnt_errtbl); i++) { if (mnt_errtbl[i].status == status) { res->errno = mnt_errtbl[i].errno; return 0; @@ -374,7 +309,7 @@ static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res) return -EIO; status = ntohl(*p); - for (i = 0; i < ARRAY_SIZE(mnt3_errtbl); i++) { + for (i = 0; i <= ARRAY_SIZE(mnt3_errtbl); i++) { if (mnt3_errtbl[i].status == status) { res->errno = mnt3_errtbl[i].errno; return 0; @@ -472,13 +407,6 @@ static struct rpc_procinfo mnt_procedures[] = { .p_statidx = MOUNTPROC_MNT, .p_name = "MOUNT", }, - [MOUNTPROC_UMNT] = { - .p_proc = MOUNTPROC_UMNT, - .p_encode = (kxdrproc_t)mnt_enc_dirpath, - .p_arglen = MNT_enc_dirpath_sz, - .p_statidx = MOUNTPROC_UMNT, - .p_name = "UMOUNT", - }, }; static struct rpc_procinfo mnt3_procedures[] = { @@ -491,13 +419,6 @@ static struct rpc_procinfo mnt3_procedures[] = { .p_statidx = MOUNTPROC3_MNT, .p_name = "MOUNT", }, - [MOUNTPROC3_UMNT] = { - .p_proc = MOUNTPROC3_UMNT, - .p_encode = (kxdrproc_t)mnt_enc_dirpath, - .p_arglen = MNT_enc_dirpath_sz, - .p_statidx = MOUNTPROC3_UMNT, - .p_name = "UMOUNT", - }, }; diff --git a/trunk/fs/nfs/nfs4namespace.c b/trunk/fs/nfs/nfs4namespace.c index ef22ee89aa77..2a2a0a7143ad 100644 --- a/trunk/fs/nfs/nfs4namespace.c +++ b/trunk/fs/nfs/nfs4namespace.c @@ -121,11 +121,11 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len)) continue; - mountdata->addrlen = rpc_pton(buf->data, buf->len, - mountdata->addr, mountdata->addrlen); - if (mountdata->addrlen == 0) + nfs_parse_ip_address(buf->data, buf->len, + mountdata->addr, &mountdata->addrlen); + if (mountdata->addr->sa_family == AF_UNSPEC) continue; - rpc_set_port(mountdata->addr, NFS_PORT); + nfs_set_port(mountdata->addr, NFS_PORT); memcpy(page2, buf->data, buf->len); page2[buf->len] = '\0'; diff --git a/trunk/fs/nfs/nfs4proc.c b/trunk/fs/nfs/nfs4proc.c index be6544aef41f..6917311f201c 100644 --- a/trunk/fs/nfs/nfs4proc.c +++ b/trunk/fs/nfs/nfs4proc.c @@ -61,8 +61,6 @@ #define NFS4_POLL_RETRY_MIN (HZ/10) #define NFS4_POLL_RETRY_MAX (15*HZ) -#define NFS4_MAX_LOOP_ON_RECOVER (10) - struct nfs4_opendata; static int _nfs4_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); @@ -428,19 +426,17 @@ nfs4_find_slot(struct nfs4_slot_table *tbl, struct rpc_task *task) static int nfs4_recover_session(struct nfs4_session *session) { struct nfs_client *clp = session->clp; - unsigned int loop; int ret; - for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) { + for (;;) { ret = nfs4_wait_clnt_recover(clp); if (ret != 0) - break; + return ret; if (!test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) break; nfs4_schedule_state_manager(clp); - ret = -EIO; } - return ret; + return 0; } static int nfs41_setup_sequence(struct nfs4_session *session, @@ -1448,20 +1444,18 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) static int nfs4_recover_expired_lease(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; - unsigned int loop; int ret; - for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) { + for (;;) { ret = nfs4_wait_clnt_recover(clp); if (ret != 0) - break; + return ret; if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) && !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state)) break; nfs4_schedule_state_recovery(clp); - ret = -EIO; } - return ret; + return 0; } /* @@ -2003,34 +1997,12 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f status = nfs4_call_sync(server, &msg, &args, &res, 0); if (status == 0) { memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); - server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS| - NFS_CAP_SYMLINKS|NFS_CAP_FILEID| - NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER| - NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME| - NFS_CAP_CTIME|NFS_CAP_MTIME); if (res.attr_bitmask[0] & FATTR4_WORD0_ACL) server->caps |= NFS_CAP_ACLS; if (res.has_links != 0) server->caps |= NFS_CAP_HARDLINKS; if (res.has_symlinks != 0) server->caps |= NFS_CAP_SYMLINKS; - if (res.attr_bitmask[0] & FATTR4_WORD0_FILEID) - server->caps |= NFS_CAP_FILEID; - if (res.attr_bitmask[1] & FATTR4_WORD1_MODE) - server->caps |= NFS_CAP_MODE; - if (res.attr_bitmask[1] & FATTR4_WORD1_NUMLINKS) - server->caps |= NFS_CAP_NLINK; - if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER) - server->caps |= NFS_CAP_OWNER; - if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER_GROUP) - server->caps |= NFS_CAP_OWNER_GROUP; - if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_ACCESS) - server->caps |= NFS_CAP_ATIME; - if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_METADATA) - server->caps |= NFS_CAP_CTIME; - if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY) - server->caps |= NFS_CAP_MTIME; - memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask)); server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; diff --git a/trunk/fs/nfs/nfs4xdr.c b/trunk/fs/nfs/nfs4xdr.c index e65cc2e650c8..617273e7d47f 100644 --- a/trunk/fs/nfs/nfs4xdr.c +++ b/trunk/fs/nfs/nfs4xdr.c @@ -3075,8 +3075,7 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t return ret; } -static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, - struct nfs_client *clp, uint32_t *uid, int may_sleep) +static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *uid) { uint32_t len; __be32 *p; @@ -3089,9 +3088,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, READ_BUF(4); READ32(len); READ_BUF(len); - if (!may_sleep) { - /* do nothing */ - } else if (len < XDR_MAX_NETOBJ) { + if (len < XDR_MAX_NETOBJ) { if (nfs_map_name_to_uid(clp, (char *)p, len, uid) == 0) ret = NFS_ATTR_FATTR_OWNER; else @@ -3106,8 +3103,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, return ret; } -static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, - struct nfs_client *clp, uint32_t *gid, int may_sleep) +static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *gid) { uint32_t len; __be32 *p; @@ -3120,9 +3116,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, READ_BUF(4); READ32(len); READ_BUF(len); - if (!may_sleep) { - /* do nothing */ - } else if (len < XDR_MAX_NETOBJ) { + if (len < XDR_MAX_NETOBJ) { if (nfs_map_group_to_gid(clp, (char *)p, len, gid) == 0) ret = NFS_ATTR_FATTR_GROUP; else @@ -3472,8 +3466,7 @@ static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf return status; } -static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, - const struct nfs_server *server, int may_sleep) +static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, const struct nfs_server *server) { __be32 *savep; uint32_t attrlen, @@ -3545,14 +3538,12 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, goto xdr_error; fattr->valid |= status; - status = decode_attr_owner(xdr, bitmap, server->nfs_client, - &fattr->uid, may_sleep); + status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid); if (status < 0) goto xdr_error; fattr->valid |= status; - status = decode_attr_group(xdr, bitmap, server->nfs_client, - &fattr->gid, may_sleep); + status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid); if (status < 0) goto xdr_error; fattr->valid |= status; @@ -4379,8 +4370,7 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, __be32 *p, struct status = decode_open_downgrade(&xdr, res); if (status != 0) goto out; - decode_getfattr(&xdr, res->fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + decode_getfattr(&xdr, res->fattr, res->server); out: return status; } @@ -4407,8 +4397,7 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_ac status = decode_access(&xdr, res); if (status != 0) goto out; - decode_getfattr(&xdr, res->fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + decode_getfattr(&xdr, res->fattr, res->server); out: return status; } @@ -4435,8 +4424,7 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lo goto out; if ((status = decode_getfh(&xdr, res->fh)) != 0) goto out; - status = decode_getfattr(&xdr, res->fattr, res->server - ,!RPC_IS_ASYNC(rqstp->rq_task)); + status = decode_getfattr(&xdr, res->fattr, res->server); out: return status; } @@ -4460,8 +4448,7 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, __be32 *p, struct nf if ((status = decode_putrootfh(&xdr)) != 0) goto out; if ((status = decode_getfh(&xdr, res->fh)) == 0) - status = decode_getfattr(&xdr, res->fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + status = decode_getfattr(&xdr, res->fattr, res->server); out: return status; } @@ -4486,8 +4473,7 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, __be32 *p, struct nfs_rem goto out; if ((status = decode_remove(&xdr, &res->cinfo)) != 0) goto out; - decode_getfattr(&xdr, &res->dir_attr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + decode_getfattr(&xdr, &res->dir_attr, res->server); out: return status; } @@ -4517,13 +4503,11 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_re if ((status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo)) != 0) goto out; /* Current FH is target directory */ - if (decode_getfattr(&xdr, res->new_fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)) != 0) + if (decode_getfattr(&xdr, res->new_fattr, res->server) != 0) goto out; if ((status = decode_restorefh(&xdr)) != 0) goto out; - decode_getfattr(&xdr, res->old_fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + decode_getfattr(&xdr, res->old_fattr, res->server); out: return status; } @@ -4556,13 +4540,11 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_link * Note order: OP_LINK leaves the directory as the current * filehandle. */ - if (decode_getfattr(&xdr, res->dir_attr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)) != 0) + if (decode_getfattr(&xdr, res->dir_attr, res->server) != 0) goto out; if ((status = decode_restorefh(&xdr)) != 0) goto out; - decode_getfattr(&xdr, res->fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + decode_getfattr(&xdr, res->fattr, res->server); out: return status; } @@ -4591,13 +4573,11 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_cr goto out; if ((status = decode_getfh(&xdr, res->fh)) != 0) goto out; - if (decode_getfattr(&xdr, res->fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)) != 0) + if (decode_getfattr(&xdr, res->fattr, res->server) != 0) goto out; if ((status = decode_restorefh(&xdr)) != 0) goto out; - decode_getfattr(&xdr, res->dir_fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + decode_getfattr(&xdr, res->dir_fattr, res->server); out: return status; } @@ -4629,8 +4609,7 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_g status = decode_putfh(&xdr); if (status) goto out; - status = decode_getfattr(&xdr, res->fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + status = decode_getfattr(&xdr, res->fattr, res->server); out: return status; } @@ -4737,8 +4716,7 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_clos * an ESTALE error. Shouldn't be a problem, * though, since fattr->valid will remain unset. */ - decode_getfattr(&xdr, res->fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + decode_getfattr(&xdr, res->fattr, res->server); out: return status; } @@ -4770,13 +4748,11 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openr goto out; if (decode_getfh(&xdr, &res->fh) != 0) goto out; - if (decode_getfattr(&xdr, res->f_attr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)) != 0) + if (decode_getfattr(&xdr, res->f_attr, res->server) != 0) goto out; if (decode_restorefh(&xdr) != 0) goto out; - decode_getfattr(&xdr, res->dir_attr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + decode_getfattr(&xdr, res->dir_attr, res->server); out: return status; } @@ -4824,8 +4800,7 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, __be32 *p, struct nf status = decode_open(&xdr, res); if (status) goto out; - decode_getfattr(&xdr, res->f_attr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + decode_getfattr(&xdr, res->f_attr, res->server); out: return status; } @@ -4852,8 +4827,7 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_se status = decode_setattr(&xdr); if (status) goto out; - decode_getfattr(&xdr, res->fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + decode_getfattr(&xdr, res->fattr, res->server); out: return status; } @@ -5027,8 +5001,7 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writ status = decode_write(&xdr, res); if (status) goto out; - decode_getfattr(&xdr, res->fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + decode_getfattr(&xdr, res->fattr, res->server); if (!status) status = res->count; out: @@ -5057,8 +5030,7 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, __be32 *p, struct nfs_wri status = decode_commit(&xdr, res); if (status) goto out; - decode_getfattr(&xdr, res->fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + decode_getfattr(&xdr, res->fattr, res->server); out: return status; } @@ -5222,8 +5194,7 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, __be32 *p, struct nf if (status != 0) goto out; status = decode_delegreturn(&xdr); - decode_getfattr(&xdr, res->fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + decode_getfattr(&xdr, res->fattr, res->server); out: return status; } @@ -5251,8 +5222,7 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p, goto out; xdr_enter_page(&xdr, PAGE_SIZE); status = decode_getfattr(&xdr, &res->fs_locations->fattr, - res->fs_locations->server, - !RPC_IS_ASYNC(req->rq_task)); + res->fs_locations->server); out: return status; } diff --git a/trunk/fs/nfs/read.c b/trunk/fs/nfs/read.c index 73ea5e8d66ce..12c9e66d3f1d 100644 --- a/trunk/fs/nfs/read.c +++ b/trunk/fs/nfs/read.c @@ -60,17 +60,15 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) return p; } -static void nfs_readdata_free(struct nfs_read_data *p) +void nfs_readdata_free(struct nfs_read_data *p) { if (p && (p->pagevec != &p->page_array[0])) kfree(p->pagevec); mempool_free(p, nfs_rdata_mempool); } -void nfs_readdata_release(void *data) +static void nfs_readdata_release(struct nfs_read_data *rdata) { - struct nfs_read_data *rdata = data; - put_nfs_open_context(rdata->args.context); nfs_readdata_free(rdata); } diff --git a/trunk/fs/nfs/super.c b/trunk/fs/nfs/super.c index 9c85cdb353aa..0b4cbdc60abd 100644 --- a/trunk/fs/nfs/super.c +++ b/trunk/fs/nfs/super.c @@ -158,7 +158,7 @@ static const match_table_t nfs_mount_option_tokens = { { Opt_mountvers, "mountvers=%s" }, { Opt_nfsvers, "nfsvers=%s" }, { Opt_nfsvers, "vers=%s" }, - { Opt_minorversion, "minorversion=%s" }, + { Opt_minorversion, "minorversion=%u" }, { Opt_sec, "sec=%s" }, { Opt_proto, "proto=%s" }, @@ -742,10 +742,129 @@ static int nfs_verify_server_address(struct sockaddr *addr) } } - dfprintk(MOUNT, "NFS: Invalid IP address specified\n"); return 0; } +static void nfs_parse_ipv4_address(char *string, size_t str_len, + struct sockaddr *sap, size_t *addr_len) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)sap; + u8 *addr = (u8 *)&sin->sin_addr.s_addr; + + if (str_len <= INET_ADDRSTRLEN) { + dfprintk(MOUNT, "NFS: parsing IPv4 address %*s\n", + (int)str_len, string); + + sin->sin_family = AF_INET; + *addr_len = sizeof(*sin); + if (in4_pton(string, str_len, addr, '\0', NULL)) + return; + } + + sap->sa_family = AF_UNSPEC; + *addr_len = 0; +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static int nfs_parse_ipv6_scope_id(const char *string, const size_t str_len, + const char *delim, + struct sockaddr_in6 *sin6) +{ + char *p; + size_t len; + + if ((string + str_len) == delim) + return 1; + + if (*delim != IPV6_SCOPE_DELIMITER) + return 0; + + if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) + return 0; + + len = (string + str_len) - delim - 1; + p = kstrndup(delim + 1, len, GFP_KERNEL); + if (p) { + unsigned long scope_id = 0; + struct net_device *dev; + + dev = dev_get_by_name(&init_net, p); + if (dev != NULL) { + scope_id = dev->ifindex; + dev_put(dev); + } else { + if (strict_strtoul(p, 10, &scope_id) == 0) { + kfree(p); + return 0; + } + } + + kfree(p); + + sin6->sin6_scope_id = scope_id; + dfprintk(MOUNT, "NFS: IPv6 scope ID = %lu\n", scope_id); + return 1; + } + + return 0; +} + +static void nfs_parse_ipv6_address(char *string, size_t str_len, + struct sockaddr *sap, size_t *addr_len) +{ + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + u8 *addr = (u8 *)&sin6->sin6_addr.in6_u; + const char *delim; + + if (str_len <= INET6_ADDRSTRLEN) { + dfprintk(MOUNT, "NFS: parsing IPv6 address %*s\n", + (int)str_len, string); + + sin6->sin6_family = AF_INET6; + *addr_len = sizeof(*sin6); + if (in6_pton(string, str_len, addr, + IPV6_SCOPE_DELIMITER, &delim) != 0) { + if (nfs_parse_ipv6_scope_id(string, str_len, + delim, sin6) != 0) + return; + } + } + + sap->sa_family = AF_UNSPEC; + *addr_len = 0; +} +#else +static void nfs_parse_ipv6_address(char *string, size_t str_len, + struct sockaddr *sap, size_t *addr_len) +{ + sap->sa_family = AF_UNSPEC; + *addr_len = 0; +} +#endif + +/* + * Construct a sockaddr based on the contents of a string that contains + * an IP address in presentation format. + * + * If there is a problem constructing the new sockaddr, set the address + * family to AF_UNSPEC. + */ +void nfs_parse_ip_address(char *string, size_t str_len, + struct sockaddr *sap, size_t *addr_len) +{ + unsigned int i, colons; + + colons = 0; + for (i = 0; i < str_len; i++) + if (string[i] == ':') + colons++; + + if (colons >= 2) + nfs_parse_ipv6_address(string, str_len, sap, addr_len); + else + nfs_parse_ipv4_address(string, str_len, sap, addr_len); +} + /* * Sanity check the NFS transport protocol. * @@ -785,6 +904,8 @@ static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt) /* * Parse the value of the 'sec=' option. + * + * The flavor_len setting is for v4 mounts. */ static int nfs_parse_security_flavors(char *value, struct nfs_parsed_mount_data *mnt) @@ -795,43 +916,53 @@ static int nfs_parse_security_flavors(char *value, switch (match_token(value, nfs_secflavor_tokens, args)) { case Opt_sec_none: + mnt->auth_flavor_len = 0; mnt->auth_flavors[0] = RPC_AUTH_NULL; break; case Opt_sec_sys: + mnt->auth_flavor_len = 0; mnt->auth_flavors[0] = RPC_AUTH_UNIX; break; case Opt_sec_krb5: + mnt->auth_flavor_len = 1; mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5; break; case Opt_sec_krb5i: + mnt->auth_flavor_len = 1; mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I; break; case Opt_sec_krb5p: + mnt->auth_flavor_len = 1; mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P; break; case Opt_sec_lkey: + mnt->auth_flavor_len = 1; mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY; break; case Opt_sec_lkeyi: + mnt->auth_flavor_len = 1; mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI; break; case Opt_sec_lkeyp: + mnt->auth_flavor_len = 1; mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP; break; case Opt_sec_spkm: + mnt->auth_flavor_len = 1; mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM; break; case Opt_sec_spkmi: + mnt->auth_flavor_len = 1; mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI; break; case Opt_sec_spkmp: + mnt->auth_flavor_len = 1; mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP; break; default: return 0; } - mnt->auth_flavor_len = 1; return 1; } @@ -870,6 +1001,7 @@ static int nfs_parse_mount_options(char *raw, while ((p = strsep(&raw, ",")) != NULL) { substring_t args[MAX_OPT_ARGS]; unsigned long option; + int int_option; int token; if (!*p) @@ -1141,16 +1273,11 @@ static int nfs_parse_mount_options(char *raw, } break; case Opt_minorversion: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - rc = strict_strtoul(string, 10, &option); - kfree(string); - if (rc != 0) - goto out_invalid_value; - if (option > NFS4_MAX_MINOR_VERSION) - goto out_invalid_value; - mnt->minorversion = option; + if (match_int(args, &int_option)) + return 0; + if (int_option < 0 || int_option > NFS4_MAX_MINOR_VERSION) + return 0; + mnt->minorversion = int_option; break; /* @@ -1225,14 +1352,11 @@ static int nfs_parse_mount_options(char *raw, string = match_strdup(args); if (string == NULL) goto out_nomem; - mnt->nfs_server.addrlen = - rpc_pton(string, strlen(string), - (struct sockaddr *) - &mnt->nfs_server.address, - sizeof(mnt->nfs_server.address)); + nfs_parse_ip_address(string, strlen(string), + (struct sockaddr *) + &mnt->nfs_server.address, + &mnt->nfs_server.addrlen); kfree(string); - if (mnt->nfs_server.addrlen == 0) - goto out_invalid_address; break; case Opt_clientaddr: string = match_strdup(args); @@ -1252,14 +1376,11 @@ static int nfs_parse_mount_options(char *raw, string = match_strdup(args); if (string == NULL) goto out_nomem; - mnt->mount_server.addrlen = - rpc_pton(string, strlen(string), - (struct sockaddr *) - &mnt->mount_server.address, - sizeof(mnt->mount_server.address)); + nfs_parse_ip_address(string, strlen(string), + (struct sockaddr *) + &mnt->mount_server.address, + &mnt->mount_server.addrlen); kfree(string); - if (mnt->mount_server.addrlen == 0) - goto out_invalid_address; break; case Opt_lookupcache: string = match_strdup(args); @@ -1311,11 +1432,8 @@ static int nfs_parse_mount_options(char *raw, return 1; -out_invalid_address: - printk(KERN_INFO "NFS: bad IP address specified: %s\n", p); - return 0; out_invalid_value: - printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p); + printk(KERN_INFO "NFS: bad mount option value specified: %s \n", p); return 0; out_nomem: printk(KERN_INFO "NFS: not enough memory to parse option\n"); @@ -1326,42 +1444,6 @@ static int nfs_parse_mount_options(char *raw, return 0; } -/* - * Match the requested auth flavors with the list returned by - * the server. Returns zero and sets the mount's authentication - * flavor on success; returns -EACCES if server does not support - * the requested flavor. - */ -static int nfs_walk_authlist(struct nfs_parsed_mount_data *args, - struct nfs_mount_request *request) -{ - unsigned int i, j, server_authlist_len = *(request->auth_flav_len); - - /* - * We avoid sophisticated negotiating here, as there are - * plenty of cases where we can get it wrong, providing - * either too little or too much security. - * - * RFC 2623, section 2.7 suggests we SHOULD prefer the - * flavor listed first. However, some servers list - * AUTH_NULL first. Our caller plants AUTH_SYS, the - * preferred default, in args->auth_flavors[0] if user - * didn't specify sec= mount option. - */ - for (i = 0; i < args->auth_flavor_len; i++) - for (j = 0; j < server_authlist_len; j++) - if (args->auth_flavors[i] == request->auth_flavs[j]) { - dfprintk(MOUNT, "NFS: using auth flavor %d\n", - request->auth_flavs[j]); - args->auth_flavors[0] = request->auth_flavs[j]; - return 0; - } - - dfprintk(MOUNT, "NFS: server does not support requested auth flavor\n"); - nfs_umount(request); - return -EACCES; -} - /* * Use the remote server's MOUNT service to request the NFS file handle * corresponding to the provided path. @@ -1369,8 +1451,7 @@ static int nfs_walk_authlist(struct nfs_parsed_mount_data *args, static int nfs_try_mount(struct nfs_parsed_mount_data *args, struct nfs_fh *root_fh) { - rpc_authflavor_t server_authlist[NFS_MAX_SECFLAVORS]; - unsigned int server_authlist_len = ARRAY_SIZE(server_authlist); + unsigned int auth_flavor_len = 0; struct nfs_mount_request request = { .sap = (struct sockaddr *) &args->mount_server.address, @@ -1378,8 +1459,7 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, .protocol = args->mount_server.protocol, .fh = root_fh, .noresvport = args->flags & NFS_MOUNT_NORESVPORT, - .auth_flav_len = &server_authlist_len, - .auth_flavs = server_authlist, + .auth_flav_len = &auth_flavor_len, }; int status; @@ -1409,25 +1489,19 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, /* * autobind will be used if mount_server.port == 0 */ - rpc_set_port(request.sap, args->mount_server.port); + nfs_set_port(request.sap, args->mount_server.port); /* * Now ask the mount server to map our export path * to a file handle. */ status = nfs_mount(&request); - if (status != 0) { - dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n", - request.hostname, status); - return status; - } - - /* - * MNTv1 (NFSv2) does not support auth flavor negotiation. - */ - if (args->mount_server.version != NFS_MNT3_VERSION) + if (status == 0) return 0; - return nfs_walk_authlist(args, &request); + + dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n", + request.hostname, status); + return status; } static int nfs_parse_simple_hostname(const char *dev_name, @@ -1602,7 +1676,6 @@ static int nfs_validate_mount_data(void *options, args->nfs_server.port = 0; /* autobind unless user sets port */ args->nfs_server.protocol = XPRT_TRANSPORT_TCP; args->auth_flavors[0] = RPC_AUTH_UNIX; - args->auth_flavor_len = 1; switch (data->version) { case 1: @@ -1703,7 +1776,7 @@ static int nfs_validate_mount_data(void *options, &args->nfs_server.address)) goto out_no_address; - rpc_set_port((struct sockaddr *)&args->nfs_server.address, + nfs_set_port((struct sockaddr *)&args->nfs_server.address, args->nfs_server.port); nfs_set_mount_transport_protocol(args); @@ -2266,7 +2339,7 @@ static int nfs4_validate_mount_data(void *options, args->acdirmax = NFS_DEF_ACDIRMAX; args->nfs_server.port = NFS_PORT; /* 2049 unless user set port= */ args->auth_flavors[0] = RPC_AUTH_UNIX; - args->auth_flavor_len = 1; + args->auth_flavor_len = 0; args->minorversion = 0; switch (data->version) { @@ -2336,7 +2409,7 @@ static int nfs4_validate_mount_data(void *options, &args->nfs_server.address)) return -EINVAL; - rpc_set_port((struct sockaddr *)&args->nfs_server.address, + nfs_set_port((struct sockaddr *)&args->nfs_server.address, args->nfs_server.port); nfs_validate_transport_protocol(args); diff --git a/trunk/fs/nfs/write.c b/trunk/fs/nfs/write.c index 6240e644f249..a34fae21fe10 100644 --- a/trunk/fs/nfs/write.c +++ b/trunk/fs/nfs/write.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include @@ -27,7 +26,6 @@ #include "internal.h" #include "iostat.h" #include "nfs4_fs.h" -#include "fscache.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -89,17 +87,15 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) return p; } -static void nfs_writedata_free(struct nfs_write_data *p) +void nfs_writedata_free(struct nfs_write_data *p) { if (p && (p->pagevec != &p->page_array[0])) kfree(p->pagevec); mempool_free(p, nfs_wdata_mempool); } -void nfs_writedata_release(void *data) +static void nfs_writedata_release(struct nfs_write_data *wdata) { - struct nfs_write_data *wdata = data; - put_nfs_open_context(wdata->args.context); nfs_writedata_free(wdata); } @@ -222,17 +218,24 @@ static void nfs_end_page_writeback(struct page *page) clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); } -static struct nfs_page *nfs_find_and_lock_request(struct page *page) +/* + * Find an associated nfs write request, and prepare to flush it out + * May return an error if the user signalled nfs_wait_on_request(). + */ +static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, + struct page *page) { struct inode *inode = page->mapping->host; struct nfs_page *req; int ret; spin_lock(&inode->i_lock); - for (;;) { + for(;;) { req = nfs_page_find_request_locked(page); - if (req == NULL) - break; + if (req == NULL) { + spin_unlock(&inode->i_lock); + return 0; + } if (nfs_set_page_tag_locked(req)) break; /* Note: If we hold the page lock, as is the case in nfs_writepage, @@ -244,40 +247,23 @@ static struct nfs_page *nfs_find_and_lock_request(struct page *page) ret = nfs_wait_on_request(req); nfs_release_request(req); if (ret != 0) - return ERR_PTR(ret); + return ret; spin_lock(&inode->i_lock); } + if (test_bit(PG_CLEAN, &req->wb_flags)) { + spin_unlock(&inode->i_lock); + BUG(); + } + if (nfs_set_page_writeback(page) != 0) { + spin_unlock(&inode->i_lock); + BUG(); + } spin_unlock(&inode->i_lock); - return req; -} - -/* - * Find an associated nfs write request, and prepare to flush it out - * May return an error if the user signalled nfs_wait_on_request(). - */ -static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, - struct page *page) -{ - struct nfs_page *req; - int ret = 0; - - req = nfs_find_and_lock_request(page); - if (!req) - goto out; - ret = PTR_ERR(req); - if (IS_ERR(req)) - goto out; - - ret = nfs_set_page_writeback(page); - BUG_ON(ret != 0); - BUG_ON(test_bit(PG_CLEAN, &req->wb_flags)); - if (!nfs_pageio_add_request(pgio, req)) { nfs_redirty_request(req); - ret = pgio->pg_error; + return pgio->pg_error; } -out: - return ret; + return 0; } static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) @@ -1594,41 +1580,6 @@ int nfs_wb_page(struct inode *inode, struct page* page) return nfs_wb_page_priority(inode, page, FLUSH_STABLE); } -#ifdef CONFIG_MIGRATION -int nfs_migrate_page(struct address_space *mapping, struct page *newpage, - struct page *page) -{ - struct nfs_page *req; - int ret; - - if (PageFsCache(page)) - nfs_fscache_release_page(page, GFP_KERNEL); - - req = nfs_find_and_lock_request(page); - ret = PTR_ERR(req); - if (IS_ERR(req)) - goto out; - - ret = migrate_page(mapping, newpage, page); - if (!req) - goto out; - if (ret) - goto out_unlock; - page_cache_get(newpage); - req->wb_page = newpage; - SetPagePrivate(newpage); - set_page_private(newpage, page_private(page)); - ClearPagePrivate(page); - set_page_private(page, 0); - page_cache_release(page); -out_unlock: - nfs_clear_page_tag_locked(req); - nfs_release_request(req); -out: - return ret; -} -#endif - int __init nfs_init_writepagecache(void) { nfs_wdata_cachep = kmem_cache_create("nfs_write_data", diff --git a/trunk/fs/nfsd/export.c b/trunk/fs/nfsd/export.c index d9462643155c..b92a27629fb7 100644 --- a/trunk/fs/nfsd/export.c +++ b/trunk/fs/nfsd/export.c @@ -85,11 +85,6 @@ static void expkey_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, expkey_request); -} - static struct svc_expkey *svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old); static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *); static struct cache_detail svc_expkey_cache; @@ -264,7 +259,7 @@ static struct cache_detail svc_expkey_cache = { .hash_table = expkey_table, .name = "nfsd.fh", .cache_put = expkey_put, - .cache_upcall = expkey_upcall, + .cache_request = expkey_request, .cache_parse = expkey_parse, .cache_show = expkey_show, .match = expkey_match, @@ -360,11 +355,6 @@ static void svc_export_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, svc_export_request); -} - static struct svc_export *svc_export_update(struct svc_export *new, struct svc_export *old); static struct svc_export *svc_export_lookup(struct svc_export *); @@ -734,7 +724,7 @@ struct cache_detail svc_export_cache = { .hash_table = export_table, .name = "nfsd.export", .cache_put = svc_export_put, - .cache_upcall = svc_export_upcall, + .cache_request = svc_export_request, .cache_parse = svc_export_parse, .cache_show = svc_export_show, .match = svc_export_match, diff --git a/trunk/fs/nfsd/nfs4idmap.c b/trunk/fs/nfsd/nfs4idmap.c index cdfa86fa1471..5b398421b051 100644 --- a/trunk/fs/nfsd/nfs4idmap.c +++ b/trunk/fs/nfsd/nfs4idmap.c @@ -145,12 +145,6 @@ idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, (*bpp)[-1] = '\n'; } -static int -idtoname_upcall(struct cache_detail *cd, struct cache_head *ch) -{ - return sunrpc_cache_pipe_upcall(cd, ch, idtoname_request); -} - static int idtoname_match(struct cache_head *ca, struct cache_head *cb) { @@ -181,10 +175,10 @@ idtoname_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h) } static void -warn_no_idmapd(struct cache_detail *detail, int has_died) +warn_no_idmapd(struct cache_detail *detail) { printk("nfsd: nfsv4 idmapping failing: has idmapd %s?\n", - has_died ? "died" : "not been started"); + detail->last_close? "died" : "not been started"); } @@ -198,7 +192,7 @@ static struct cache_detail idtoname_cache = { .hash_table = idtoname_table, .name = "nfs4.idtoname", .cache_put = ent_put, - .cache_upcall = idtoname_upcall, + .cache_request = idtoname_request, .cache_parse = idtoname_parse, .cache_show = idtoname_show, .warn_no_listener = warn_no_idmapd, @@ -330,12 +324,6 @@ nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, (*bpp)[-1] = '\n'; } -static int -nametoid_upcall(struct cache_detail *cd, struct cache_head *ch) -{ - return sunrpc_cache_pipe_upcall(cd, ch, nametoid_request); -} - static int nametoid_match(struct cache_head *ca, struct cache_head *cb) { @@ -375,7 +363,7 @@ static struct cache_detail nametoid_cache = { .hash_table = nametoid_table, .name = "nfs4.nametoid", .cache_put = ent_put, - .cache_upcall = nametoid_upcall, + .cache_request = nametoid_request, .cache_parse = nametoid_parse, .cache_show = nametoid_show, .warn_no_listener = warn_no_idmapd, diff --git a/trunk/fs/nfsd/nfsctl.c b/trunk/fs/nfsd/nfsctl.c index 7e906c5b7671..6d0847562d87 100644 --- a/trunk/fs/nfsd/nfsctl.c +++ b/trunk/fs/nfsd/nfsctl.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include @@ -491,18 +490,22 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size) * * Input: * buf: '\n'-terminated C string containing a - * presentation format IP address + * presentation format IPv4 address * size: length of C string in @buf * Output: * On success: returns zero if all specified locks were released; * returns one if one or more locks were not released * On error: return code is negative errno value + * + * Note: Only AF_INET client addresses are passed in */ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) { - struct sockaddr_storage address; - struct sockaddr *sap = (struct sockaddr *)&address; - size_t salen = sizeof(address); + struct sockaddr_in sin = { + .sin_family = AF_INET, + }; + int b1, b2, b3, b4; + char c; char *fo_path; /* sanity check */ @@ -516,10 +519,14 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) if (qword_get(&buf, fo_path, size) < 0) return -EINVAL; - if (rpc_pton(fo_path, size, sap, salen) == 0) + /* get ipv4 address */ + if (sscanf(fo_path, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4) + return -EINVAL; + if (b1 > 255 || b2 > 255 || b3 > 255 || b4 > 255) return -EINVAL; + sin.sin_addr.s_addr = htonl((b1 << 24) | (b2 << 16) | (b3 << 8) | b4); - return nlmsvc_unlock_all_by_ip(sap); + return nlmsvc_unlock_all_by_ip((struct sockaddr *)&sin); } /** diff --git a/trunk/fs/ocfs2/alloc.c b/trunk/fs/ocfs2/alloc.c index 9edcde4974aa..f9a3e8942669 100644 --- a/trunk/fs/ocfs2/alloc.c +++ b/trunk/fs/ocfs2/alloc.c @@ -1914,7 +1914,8 @@ static void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec, * immediately to their right. */ left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos); - if (ocfs2_is_empty_extent(&right_child_el->l_recs[0])) { + if (!ocfs2_rec_clusters(right_child_el, &right_child_el->l_recs[0])) { + BUG_ON(right_child_el->l_tree_depth); BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1); left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos); } @@ -2476,15 +2477,37 @@ static int ocfs2_rotate_tree_right(struct inode *inode, return ret; } -static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle, - struct ocfs2_path *path) +static int ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle, + int subtree_index, struct ocfs2_path *path) { - int i, idx; + int i, idx, ret; struct ocfs2_extent_rec *rec; struct ocfs2_extent_list *el; struct ocfs2_extent_block *eb; u32 range; + /* + * In normal tree rotation process, we will never touch the + * tree branch above subtree_index and ocfs2_extend_rotate_transaction + * doesn't reserve the credits for them either. + * + * But we do have a special case here which will update the rightmost + * records for all the bh in the path. + * So we have to allocate extra credits and access them. + */ + ret = ocfs2_extend_trans(handle, + handle->h_buffer_credits + subtree_index); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_journal_access_path(inode, handle, path); + if (ret) { + mlog_errno(ret); + goto out; + } + /* Path should always be rightmost. */ eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data; BUG_ON(eb->h_next_leaf_blk != 0ULL); @@ -2505,6 +2528,8 @@ static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle, ocfs2_journal_dirty(handle, path->p_node[i].bh); } +out: + return ret; } static void ocfs2_unlink_path(struct inode *inode, handle_t *handle, @@ -2717,7 +2742,12 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, if (del_right_subtree) { ocfs2_unlink_subtree(inode, handle, left_path, right_path, subtree_index, dealloc); - ocfs2_update_edge_lengths(inode, handle, left_path); + ret = ocfs2_update_edge_lengths(inode, handle, subtree_index, + left_path); + if (ret) { + mlog_errno(ret); + goto out; + } eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); @@ -3034,7 +3064,12 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle, ocfs2_unlink_subtree(inode, handle, left_path, path, subtree_index, dealloc); - ocfs2_update_edge_lengths(inode, handle, left_path); + ret = ocfs2_update_edge_lengths(inode, handle, subtree_index, + left_path); + if (ret) { + mlog_errno(ret); + goto out; + } eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); diff --git a/trunk/fs/ocfs2/aops.c b/trunk/fs/ocfs2/aops.c index b2c52b3a1484..b401654011a2 100644 --- a/trunk/fs/ocfs2/aops.c +++ b/trunk/fs/ocfs2/aops.c @@ -193,6 +193,7 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock, (unsigned long long)OCFS2_I(inode)->ip_blkno); mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters); dump_stack(); + goto bail; } past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); @@ -894,18 +895,17 @@ struct ocfs2_write_cluster_desc { */ unsigned c_new; unsigned c_unwritten; + unsigned c_needs_zero; }; -static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d) -{ - return d->c_new || d->c_unwritten; -} - struct ocfs2_write_ctxt { /* Logical cluster position / len of write */ u32 w_cpos; u32 w_clen; + /* First cluster allocated in a nonsparse extend */ + u32 w_first_new_cpos; + struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE]; /* @@ -983,6 +983,7 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp, return -ENOMEM; wc->w_cpos = pos >> osb->s_clustersize_bits; + wc->w_first_new_cpos = UINT_MAX; cend = (pos + len - 1) >> osb->s_clustersize_bits; wc->w_clen = cend - wc->w_cpos + 1; get_bh(di_bh); @@ -1217,20 +1218,18 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping, */ static int ocfs2_write_cluster(struct address_space *mapping, u32 phys, unsigned int unwritten, + unsigned int should_zero, struct ocfs2_alloc_context *data_ac, struct ocfs2_alloc_context *meta_ac, struct ocfs2_write_ctxt *wc, u32 cpos, loff_t user_pos, unsigned user_len) { - int ret, i, new, should_zero = 0; + int ret, i, new; u64 v_blkno, p_blkno; struct inode *inode = mapping->host; struct ocfs2_extent_tree et; new = phys == 0 ? 1 : 0; - if (new || unwritten) - should_zero = 1; - if (new) { u32 tmp_pos; @@ -1301,7 +1300,7 @@ static int ocfs2_write_cluster(struct address_space *mapping, if (tmpret) { mlog_errno(tmpret); if (ret == 0) - tmpret = ret; + ret = tmpret; } } @@ -1341,7 +1340,9 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping, local_len = osb->s_clustersize - cluster_off; ret = ocfs2_write_cluster(mapping, desc->c_phys, - desc->c_unwritten, data_ac, meta_ac, + desc->c_unwritten, + desc->c_needs_zero, + data_ac, meta_ac, wc, desc->c_cpos, pos, local_len); if (ret) { mlog_errno(ret); @@ -1391,14 +1392,14 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb, * newly allocated cluster. */ desc = &wc->w_desc[0]; - if (ocfs2_should_zero_cluster(desc)) + if (desc->c_needs_zero) ocfs2_figure_cluster_boundaries(osb, desc->c_cpos, &wc->w_target_from, NULL); desc = &wc->w_desc[wc->w_clen - 1]; - if (ocfs2_should_zero_cluster(desc)) + if (desc->c_needs_zero) ocfs2_figure_cluster_boundaries(osb, desc->c_cpos, NULL, @@ -1466,13 +1467,28 @@ static int ocfs2_populate_write_desc(struct inode *inode, phys++; } + /* + * If w_first_new_cpos is < UINT_MAX, we have a non-sparse + * file that got extended. w_first_new_cpos tells us + * where the newly allocated clusters are so we can + * zero them. + */ + if (desc->c_cpos >= wc->w_first_new_cpos) { + BUG_ON(phys == 0); + desc->c_needs_zero = 1; + } + desc->c_phys = phys; if (phys == 0) { desc->c_new = 1; + desc->c_needs_zero = 1; *clusters_to_alloc = *clusters_to_alloc + 1; } - if (ext_flags & OCFS2_EXT_UNWRITTEN) + + if (ext_flags & OCFS2_EXT_UNWRITTEN) { desc->c_unwritten = 1; + desc->c_needs_zero = 1; + } num_clusters--; } @@ -1632,10 +1648,13 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos, if (newsize <= i_size_read(inode)) return 0; - ret = ocfs2_extend_no_holes(inode, newsize, newsize - len); + ret = ocfs2_extend_no_holes(inode, newsize, pos); if (ret) mlog_errno(ret); + wc->w_first_new_cpos = + ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode)); + return ret; } @@ -1644,7 +1663,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, struct page **pagep, void **fsdata, struct buffer_head *di_bh, struct page *mmap_page) { - int ret, credits = OCFS2_INODE_UPDATE_CREDITS; + int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS; unsigned int clusters_to_alloc, extents_to_split; struct ocfs2_write_ctxt *wc; struct inode *inode = mapping->host; @@ -1722,8 +1741,19 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, } - ocfs2_set_target_boundaries(osb, wc, pos, len, - clusters_to_alloc + extents_to_split); + /* + * We have to zero sparse allocated clusters, unwritten extent clusters, + * and non-sparse clusters we just extended. For non-sparse writes, + * we know zeros will only be needed in the first and/or last cluster. + */ + if (clusters_to_alloc || extents_to_split || + wc->w_desc[0].c_needs_zero || + wc->w_desc[wc->w_clen - 1].c_needs_zero) + cluster_of_pages = 1; + else + cluster_of_pages = 0; + + ocfs2_set_target_boundaries(osb, wc, pos, len, cluster_of_pages); handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { @@ -1756,8 +1786,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, * extent. */ ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, - clusters_to_alloc + extents_to_split, - mmap_page); + cluster_of_pages, mmap_page); if (ret) { mlog_errno(ret); goto out_quota; diff --git a/trunk/fs/ocfs2/dcache.c b/trunk/fs/ocfs2/dcache.c index b574431a031d..2f28b7de2c8d 100644 --- a/trunk/fs/ocfs2/dcache.c +++ b/trunk/fs/ocfs2/dcache.c @@ -310,22 +310,19 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, return ret; } -static DEFINE_SPINLOCK(dentry_list_lock); +DEFINE_SPINLOCK(dentry_list_lock); /* We limit the number of dentry locks to drop in one go. We have * this limit so that we don't starve other users of ocfs2_wq. */ #define DL_INODE_DROP_COUNT 64 /* Drop inode references from dentry locks */ -void ocfs2_drop_dl_inodes(struct work_struct *work) +static void __ocfs2_drop_dl_inodes(struct ocfs2_super *osb, int drop_count) { - struct ocfs2_super *osb = container_of(work, struct ocfs2_super, - dentry_lock_work); struct ocfs2_dentry_lock *dl; - int drop_count = DL_INODE_DROP_COUNT; spin_lock(&dentry_list_lock); - while (osb->dentry_lock_list && drop_count--) { + while (osb->dentry_lock_list && (drop_count < 0 || drop_count--)) { dl = osb->dentry_lock_list; osb->dentry_lock_list = dl->dl_next; spin_unlock(&dentry_list_lock); @@ -333,11 +330,32 @@ void ocfs2_drop_dl_inodes(struct work_struct *work) kfree(dl); spin_lock(&dentry_list_lock); } - if (osb->dentry_lock_list) + spin_unlock(&dentry_list_lock); +} + +void ocfs2_drop_dl_inodes(struct work_struct *work) +{ + struct ocfs2_super *osb = container_of(work, struct ocfs2_super, + dentry_lock_work); + + __ocfs2_drop_dl_inodes(osb, DL_INODE_DROP_COUNT); + /* + * Don't queue dropping if umount is in progress. We flush the + * list in ocfs2_dismount_volume + */ + spin_lock(&dentry_list_lock); + if (osb->dentry_lock_list && + !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED)) queue_work(ocfs2_wq, &osb->dentry_lock_work); spin_unlock(&dentry_list_lock); } +/* Flush the whole work queue */ +void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb) +{ + __ocfs2_drop_dl_inodes(osb, -1); +} + /* * ocfs2_dentry_iput() and friends. * @@ -368,7 +386,8 @@ static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb, /* We leave dropping of inode reference to ocfs2_wq as that can * possibly lead to inode deletion which gets tricky */ spin_lock(&dentry_list_lock); - if (!osb->dentry_lock_list) + if (!osb->dentry_lock_list && + !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED)) queue_work(ocfs2_wq, &osb->dentry_lock_work); dl->dl_next = osb->dentry_lock_list; osb->dentry_lock_list = dl; diff --git a/trunk/fs/ocfs2/dcache.h b/trunk/fs/ocfs2/dcache.h index faa12e75f98d..f5dd1789acf1 100644 --- a/trunk/fs/ocfs2/dcache.h +++ b/trunk/fs/ocfs2/dcache.h @@ -49,10 +49,13 @@ struct ocfs2_dentry_lock { int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode, u64 parent_blkno); +extern spinlock_t dentry_list_lock; + void ocfs2_dentry_lock_put(struct ocfs2_super *osb, struct ocfs2_dentry_lock *dl); void ocfs2_drop_dl_inodes(struct work_struct *work); +void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb); struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, int skip_unhashed); diff --git a/trunk/fs/ocfs2/dlm/dlmast.c b/trunk/fs/ocfs2/dlm/dlmast.c index d07ddbe4b283..81eff8e58322 100644 --- a/trunk/fs/ocfs2/dlm/dlmast.c +++ b/trunk/fs/ocfs2/dlm/dlmast.c @@ -103,7 +103,6 @@ static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock) lock->ast_pending, lock->ml.type); BUG(); } - BUG_ON(!list_empty(&lock->ast_list)); if (lock->ast_pending) mlog(0, "lock has an ast getting flushed right now\n"); diff --git a/trunk/fs/ocfs2/dlm/dlmrecovery.c b/trunk/fs/ocfs2/dlm/dlmrecovery.c index bcb9260c3735..43e6e3280569 100644 --- a/trunk/fs/ocfs2/dlm/dlmrecovery.c +++ b/trunk/fs/ocfs2/dlm/dlmrecovery.c @@ -1118,7 +1118,7 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm, mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n", dlm->name, res->lockname.len, res->lockname.name, - orig_flags & DLM_MRES_MIGRATION ? "migrate" : "recovery", + orig_flags & DLM_MRES_MIGRATION ? "migration" : "recovery", send_to); /* send it */ diff --git a/trunk/fs/ocfs2/file.c b/trunk/fs/ocfs2/file.c index 62442e413a00..aa501d3f93f1 100644 --- a/trunk/fs/ocfs2/file.c +++ b/trunk/fs/ocfs2/file.c @@ -1851,6 +1851,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, if (ret) goto out_dio; + count = ocount; ret = generic_write_checks(file, ppos, &count, S_ISBLK(inode->i_mode)); if (ret) @@ -1918,8 +1919,10 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, mutex_unlock(&inode->i_mutex); + if (written) + ret = written; mlog_exit(ret); - return written ? written : ret; + return ret; } static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, diff --git a/trunk/fs/ocfs2/journal.c b/trunk/fs/ocfs2/journal.c index f033760ecbea..c48b93ac6b65 100644 --- a/trunk/fs/ocfs2/journal.c +++ b/trunk/fs/ocfs2/journal.c @@ -1954,10 +1954,16 @@ void ocfs2_orphan_scan_init(struct ocfs2_super *osb) os->os_osb = osb; os->os_count = 0; os->os_seqno = 0; - os->os_scantime = CURRENT_TIME; mutex_init(&os->os_lock); INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work); +} +void ocfs2_orphan_scan_start(struct ocfs2_super *osb) +{ + struct ocfs2_orphan_scan *os; + + os = &osb->osb_orphan_scan; + os->os_scantime = CURRENT_TIME; if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb)) atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); else { diff --git a/trunk/fs/ocfs2/journal.h b/trunk/fs/ocfs2/journal.h index 5432c7f79cc6..2c3222aec622 100644 --- a/trunk/fs/ocfs2/journal.h +++ b/trunk/fs/ocfs2/journal.h @@ -145,6 +145,7 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb, /* Exported only for the journal struct init code in super.c. Do not call. */ void ocfs2_orphan_scan_init(struct ocfs2_super *osb); +void ocfs2_orphan_scan_start(struct ocfs2_super *osb); void ocfs2_orphan_scan_stop(struct ocfs2_super *osb); void ocfs2_orphan_scan_exit(struct ocfs2_super *osb); @@ -329,20 +330,27 @@ int ocfs2_journal_dirty(handle_t *handle, /* extended attribute block update */ #define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1 +/* Update of a single quota block */ +#define OCFS2_QUOTA_BLOCK_UPDATE_CREDITS 1 + /* global quotafile inode update, data block */ -#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) +#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + \ + OCFS2_QUOTA_BLOCK_UPDATE_CREDITS) +#define OCFS2_LOCAL_QINFO_WRITE_CREDITS OCFS2_QUOTA_BLOCK_UPDATE_CREDITS /* * The two writes below can accidentally see global info dirty due * to set_info() quotactl so make them prepared for the writes. */ /* quota data block, global info */ /* Write to local quota file */ -#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + 1) +#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + \ + OCFS2_QUOTA_BLOCK_UPDATE_CREDITS) /* global quota data block, local quota data block, global quota inode, * global quota info */ -#define OCFS2_QSYNC_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 3) +#define OCFS2_QSYNC_CREDITS (OCFS2_QINFO_WRITE_CREDITS + \ + 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS) static inline int ocfs2_quota_trans_credits(struct super_block *sb) { @@ -355,11 +363,6 @@ static inline int ocfs2_quota_trans_credits(struct super_block *sb) return credits; } -/* Number of credits needed for removing quota structure from file */ -int ocfs2_calc_qdel_credits(struct super_block *sb, int type); -/* Number of credits needed for initialization of new quota structure */ -int ocfs2_calc_qinit_credits(struct super_block *sb, int type); - /* group extend. inode update and last group update. */ #define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) diff --git a/trunk/fs/ocfs2/ocfs2.h b/trunk/fs/ocfs2/ocfs2.h index c9345ebb8493..39e1d5a39505 100644 --- a/trunk/fs/ocfs2/ocfs2.h +++ b/trunk/fs/ocfs2/ocfs2.h @@ -224,10 +224,12 @@ enum ocfs2_mount_options OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */ }; -#define OCFS2_OSB_SOFT_RO 0x0001 -#define OCFS2_OSB_HARD_RO 0x0002 -#define OCFS2_OSB_ERROR_FS 0x0004 -#define OCFS2_DEFAULT_ATIME_QUANTUM 60 +#define OCFS2_OSB_SOFT_RO 0x0001 +#define OCFS2_OSB_HARD_RO 0x0002 +#define OCFS2_OSB_ERROR_FS 0x0004 +#define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED 0x0008 + +#define OCFS2_DEFAULT_ATIME_QUANTUM 60 struct ocfs2_journal; struct ocfs2_slot_info; @@ -490,6 +492,18 @@ static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb, spin_unlock(&osb->osb_lock); } + +static inline unsigned long ocfs2_test_osb_flag(struct ocfs2_super *osb, + unsigned long flag) +{ + unsigned long ret; + + spin_lock(&osb->osb_lock); + ret = osb->osb_flags & flag; + spin_unlock(&osb->osb_lock); + return ret; +} + static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb, int hard) { diff --git a/trunk/fs/ocfs2/quota.h b/trunk/fs/ocfs2/quota.h index 7365e2e08706..3fb96fcd4c81 100644 --- a/trunk/fs/ocfs2/quota.h +++ b/trunk/fs/ocfs2/quota.h @@ -50,7 +50,6 @@ struct ocfs2_mem_dqinfo { unsigned int dqi_chunks; /* Number of chunks in local quota file */ unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */ unsigned int dqi_syncms; /* How often should we sync with other nodes */ - unsigned int dqi_syncjiff; /* Precomputed dqi_syncms in jiffies */ struct list_head dqi_chunk; /* List of chunks */ struct inode *dqi_gqinode; /* Global quota file inode */ struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */ diff --git a/trunk/fs/ocfs2/quota_global.c b/trunk/fs/ocfs2/quota_global.c index edfa60cd155c..bf7742d0ee3b 100644 --- a/trunk/fs/ocfs2/quota_global.c +++ b/trunk/fs/ocfs2/quota_global.c @@ -69,6 +69,7 @@ static void ocfs2_global_mem2diskdqb(void *dp, struct dquot *dquot) d->dqb_curspace = cpu_to_le64(m->dqb_curspace); d->dqb_btime = cpu_to_le64(m->dqb_btime); d->dqb_itime = cpu_to_le64(m->dqb_itime); + d->dqb_pad1 = d->dqb_pad2 = 0; } static int ocfs2_global_is_id(void *dp, struct dquot *dquot) @@ -211,14 +212,13 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type, mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA); if (gqinode->i_size < off + len) { - down_write(&OCFS2_I(gqinode)->ip_alloc_sem); - err = ocfs2_extend_no_holes(gqinode, off + len, off); - up_write(&OCFS2_I(gqinode)->ip_alloc_sem); - if (err < 0) - goto out; + loff_t rounded_end = + ocfs2_align_bytes_to_blocks(sb, off + len); + + /* Space is already allocated in ocfs2_global_read_dquot() */ err = ocfs2_simple_size_update(gqinode, oinfo->dqi_gqi_bh, - off + len); + rounded_end); if (err < 0) goto out; new = 1; @@ -234,7 +234,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type, } if (err) { mlog_errno(err); - return err; + goto out; } lock_buffer(bh); if (new) @@ -342,7 +342,6 @@ int ocfs2_global_read_info(struct super_block *sb, int type) info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms); - oinfo->dqi_syncjiff = msecs_to_jiffies(oinfo->dqi_syncms); oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks); oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk); oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry); @@ -352,7 +351,7 @@ int ocfs2_global_read_info(struct super_block *sb, int type) oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi); INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn); queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, - oinfo->dqi_syncjiff); + msecs_to_jiffies(oinfo->dqi_syncms)); out_err: mlog_exit(status); @@ -402,13 +401,36 @@ int ocfs2_global_write_info(struct super_block *sb, int type) return err; } +static int ocfs2_global_qinit_alloc(struct super_block *sb, int type) +{ + struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; + + /* + * We may need to allocate tree blocks and a leaf block but not the + * root block + */ + return oinfo->dqi_gi.dqi_qtree_depth; +} + +static int ocfs2_calc_global_qinit_credits(struct super_block *sb, int type) +{ + /* We modify all the allocated blocks, tree root, and info block */ + return (ocfs2_global_qinit_alloc(sb, type) + 2) * + OCFS2_QUOTA_BLOCK_UPDATE_CREDITS; +} + /* Read in information from global quota file and acquire a reference to it. * dquot_acquire() has already started the transaction and locked quota file */ int ocfs2_global_read_dquot(struct dquot *dquot) { int err, err2, ex = 0; - struct ocfs2_mem_dqinfo *info = - sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; + struct super_block *sb = dquot->dq_sb; + int type = dquot->dq_type; + struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv; + struct ocfs2_super *osb = OCFS2_SB(sb); + struct inode *gqinode = info->dqi_gqinode; + int need_alloc = ocfs2_global_qinit_alloc(sb, type); + handle_t *handle = NULL; err = ocfs2_qinfo_lock(info, 0); if (err < 0) @@ -419,14 +441,33 @@ int ocfs2_global_read_dquot(struct dquot *dquot) OCFS2_DQUOT(dquot)->dq_use_count++; OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; + ocfs2_qinfo_unlock(info, 0); + if (!dquot->dq_off) { /* No real quota entry? */ - /* Upgrade to exclusive lock for allocation */ - ocfs2_qinfo_unlock(info, 0); - err = ocfs2_qinfo_lock(info, 1); - if (err < 0) - goto out_qlock; ex = 1; + /* + * Add blocks to quota file before we start a transaction since + * locking allocators ranks above a transaction start + */ + WARN_ON(journal_current_handle()); + down_write(&OCFS2_I(gqinode)->ip_alloc_sem); + err = ocfs2_extend_no_holes(gqinode, + gqinode->i_size + (need_alloc << sb->s_blocksize_bits), + gqinode->i_size); + up_write(&OCFS2_I(gqinode)->ip_alloc_sem); + if (err < 0) + goto out; } + + handle = ocfs2_start_trans(osb, + ocfs2_calc_global_qinit_credits(sb, type)); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + goto out; + } + err = ocfs2_qinfo_lock(info, ex); + if (err < 0) + goto out_trans; err = qtree_write_dquot(&info->dqi_gi, dquot); if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) { err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type); @@ -438,6 +479,9 @@ int ocfs2_global_read_dquot(struct dquot *dquot) ocfs2_qinfo_unlock(info, 1); else ocfs2_qinfo_unlock(info, 0); +out_trans: + if (handle) + ocfs2_commit_trans(osb, handle); out: if (err < 0) mlog_errno(err); @@ -607,7 +651,7 @@ static void qsync_work_fn(struct work_struct *work) dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type); queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, - oinfo->dqi_syncjiff); + msecs_to_jiffies(oinfo->dqi_syncms)); } /* @@ -635,20 +679,18 @@ static int ocfs2_write_dquot(struct dquot *dquot) return status; } -int ocfs2_calc_qdel_credits(struct super_block *sb, int type) +static int ocfs2_calc_qdel_credits(struct super_block *sb, int type) { - struct ocfs2_mem_dqinfo *oinfo; - int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, - OCFS2_FEATURE_RO_COMPAT_GRPQUOTA }; - - if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type])) - return 0; - - oinfo = sb_dqinfo(sb, type)->dqi_priv; - /* We modify tree, leaf block, global info, local chunk header, - * global and local inode */ - return oinfo->dqi_gi.dqi_qtree_depth + 2 + 1 + - 2 * OCFS2_INODE_UPDATE_CREDITS; + struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; + /* + * We modify tree, leaf block, global info, local chunk header, + * global and local inode; OCFS2_QINFO_WRITE_CREDITS already + * accounts for inode update + */ + return (oinfo->dqi_gi.dqi_qtree_depth + 2) * + OCFS2_QUOTA_BLOCK_UPDATE_CREDITS + + OCFS2_QINFO_WRITE_CREDITS + + OCFS2_INODE_UPDATE_CREDITS; } static int ocfs2_release_dquot(struct dquot *dquot) @@ -680,33 +722,10 @@ static int ocfs2_release_dquot(struct dquot *dquot) return status; } -int ocfs2_calc_qinit_credits(struct super_block *sb, int type) -{ - struct ocfs2_mem_dqinfo *oinfo; - int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, - OCFS2_FEATURE_RO_COMPAT_GRPQUOTA }; - struct ocfs2_dinode *lfe, *gfe; - - if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type])) - return 0; - - oinfo = sb_dqinfo(sb, type)->dqi_priv; - gfe = (struct ocfs2_dinode *)oinfo->dqi_gqi_bh->b_data; - lfe = (struct ocfs2_dinode *)oinfo->dqi_lqi_bh->b_data; - /* We can extend local file + global file. In local file we - * can modify info, chunk header block and dquot block. In - * global file we can modify info, tree and leaf block */ - return ocfs2_calc_extend_credits(sb, &lfe->id2.i_list, 0) + - ocfs2_calc_extend_credits(sb, &gfe->id2.i_list, 0) + - 3 + oinfo->dqi_gi.dqi_qtree_depth + 2; -} - static int ocfs2_acquire_dquot(struct dquot *dquot) { - handle_t *handle; struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; - struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb); int status = 0; mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); @@ -715,16 +734,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot) status = ocfs2_lock_global_qf(oinfo, 1); if (status < 0) goto out; - handle = ocfs2_start_trans(osb, - ocfs2_calc_qinit_credits(dquot->dq_sb, dquot->dq_type)); - if (IS_ERR(handle)) { - status = PTR_ERR(handle); - mlog_errno(status); - goto out_ilock; - } status = dquot_acquire(dquot); - ocfs2_commit_trans(osb, handle); -out_ilock: ocfs2_unlock_global_qf(oinfo, 1); out: mlog_exit(status); diff --git a/trunk/fs/ocfs2/quota_local.c b/trunk/fs/ocfs2/quota_local.c index 5a460fa82553..bdb09cb6e1fe 100644 --- a/trunk/fs/ocfs2/quota_local.c +++ b/trunk/fs/ocfs2/quota_local.c @@ -20,6 +20,7 @@ #include "sysfile.h" #include "dlmglue.h" #include "quota.h" +#include "uptodate.h" /* Number of local quota structures per block */ static inline unsigned int ol_quota_entries_per_block(struct super_block *sb) @@ -100,7 +101,8 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh, handle_t *handle; int status; - handle = ocfs2_start_trans(OCFS2_SB(sb), 1); + handle = ocfs2_start_trans(OCFS2_SB(sb), + OCFS2_QUOTA_BLOCK_UPDATE_CREDITS); if (IS_ERR(handle)) { status = PTR_ERR(handle); mlog_errno(status); @@ -610,7 +612,8 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, goto out_bh; /* Mark quota file as clean if we are recovering quota file of * some other node. */ - handle = ocfs2_start_trans(osb, 1); + handle = ocfs2_start_trans(osb, + OCFS2_LOCAL_QINFO_WRITE_CREDITS); if (IS_ERR(handle)) { status = PTR_ERR(handle); mlog_errno(status); @@ -940,7 +943,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk( struct ocfs2_local_disk_chunk *dchunk; int status; handle_t *handle; - struct buffer_head *bh = NULL; + struct buffer_head *bh = NULL, *dbh = NULL; u64 p_blkno; /* We are protected by dqio_sem so no locking needed */ @@ -964,32 +967,35 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk( mlog_errno(status); goto out; } + /* Local quota info and two new blocks we initialize */ + handle = ocfs2_start_trans(OCFS2_SB(sb), + OCFS2_LOCAL_QINFO_WRITE_CREDITS + + 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto out; + } + /* Initialize chunk header */ down_read(&OCFS2_I(lqinode)->ip_alloc_sem); status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks, &p_blkno, NULL, NULL); up_read(&OCFS2_I(lqinode)->ip_alloc_sem); if (status < 0) { mlog_errno(status); - goto out; + goto out_trans; } bh = sb_getblk(sb, p_blkno); if (!bh) { status = -ENOMEM; mlog_errno(status); - goto out; + goto out_trans; } dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data; - - handle = ocfs2_start_trans(OCFS2_SB(sb), 2); - if (IS_ERR(handle)) { - status = PTR_ERR(handle); - mlog_errno(status); - goto out; - } - + ocfs2_set_new_buffer_uptodate(lqinode, bh); status = ocfs2_journal_access_dq(handle, lqinode, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + OCFS2_JOURNAL_ACCESS_CREATE); if (status < 0) { mlog_errno(status); goto out_trans; @@ -999,7 +1005,6 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk( memset(dchunk->dqc_bitmap, 0, sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) - OCFS2_QBLK_RESERVED_SPACE); - set_buffer_uptodate(bh); unlock_buffer(bh); status = ocfs2_journal_dirty(handle, bh); if (status < 0) { @@ -1007,6 +1012,38 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk( goto out_trans; } + /* Initialize new block with structures */ + down_read(&OCFS2_I(lqinode)->ip_alloc_sem); + status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks + 1, + &p_blkno, NULL, NULL); + up_read(&OCFS2_I(lqinode)->ip_alloc_sem); + if (status < 0) { + mlog_errno(status); + goto out_trans; + } + dbh = sb_getblk(sb, p_blkno); + if (!dbh) { + status = -ENOMEM; + mlog_errno(status); + goto out_trans; + } + ocfs2_set_new_buffer_uptodate(lqinode, dbh); + status = ocfs2_journal_access_dq(handle, lqinode, dbh, + OCFS2_JOURNAL_ACCESS_CREATE); + if (status < 0) { + mlog_errno(status); + goto out_trans; + } + lock_buffer(dbh); + memset(dbh->b_data, 0, sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE); + unlock_buffer(dbh); + status = ocfs2_journal_dirty(handle, dbh); + if (status < 0) { + mlog_errno(status); + goto out_trans; + } + + /* Update local quotafile info */ oinfo->dqi_blocks += 2; oinfo->dqi_chunks++; status = ocfs2_local_write_info(sb, type); @@ -1031,6 +1068,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk( ocfs2_commit_trans(OCFS2_SB(sb), handle); out: brelse(bh); + brelse(dbh); kmem_cache_free(ocfs2_qf_chunk_cachep, chunk); return ERR_PTR(status); } @@ -1048,6 +1086,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file( struct ocfs2_local_disk_chunk *dchunk; int epb = ol_quota_entries_per_block(sb); unsigned int chunk_blocks; + struct buffer_head *bh; + u64 p_blkno; int status; handle_t *handle; @@ -1075,12 +1115,49 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file( mlog_errno(status); goto out; } - handle = ocfs2_start_trans(OCFS2_SB(sb), 2); + + /* Get buffer from the just added block */ + down_read(&OCFS2_I(lqinode)->ip_alloc_sem); + status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks, + &p_blkno, NULL, NULL); + up_read(&OCFS2_I(lqinode)->ip_alloc_sem); + if (status < 0) { + mlog_errno(status); + goto out; + } + bh = sb_getblk(sb, p_blkno); + if (!bh) { + status = -ENOMEM; + mlog_errno(status); + goto out; + } + ocfs2_set_new_buffer_uptodate(lqinode, bh); + + /* Local quota info, chunk header and the new block we initialize */ + handle = ocfs2_start_trans(OCFS2_SB(sb), + OCFS2_LOCAL_QINFO_WRITE_CREDITS + + 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS); if (IS_ERR(handle)) { status = PTR_ERR(handle); mlog_errno(status); goto out; } + /* Zero created block */ + status = ocfs2_journal_access_dq(handle, lqinode, bh, + OCFS2_JOURNAL_ACCESS_CREATE); + if (status < 0) { + mlog_errno(status); + goto out_trans; + } + lock_buffer(bh); + memset(bh->b_data, 0, sb->s_blocksize); + unlock_buffer(bh); + status = ocfs2_journal_dirty(handle, bh); + if (status < 0) { + mlog_errno(status); + goto out_trans; + } + /* Update chunk header */ status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { @@ -1097,6 +1174,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file( mlog_errno(status); goto out_trans; } + /* Update file header */ oinfo->dqi_blocks++; status = ocfs2_local_write_info(sb, type); if (status < 0) { diff --git a/trunk/fs/ocfs2/stack_o2cb.c b/trunk/fs/ocfs2/stack_o2cb.c index 3f661376a2de..e49c41050264 100644 --- a/trunk/fs/ocfs2/stack_o2cb.c +++ b/trunk/fs/ocfs2/stack_o2cb.c @@ -17,6 +17,7 @@ * General Public License for more details. */ +#include #include #include @@ -153,7 +154,7 @@ static int status_map[] = { static int dlm_status_to_errno(enum dlm_status status) { - BUG_ON(status > (sizeof(status_map) / sizeof(status_map[0]))); + BUG_ON(status < 0 || status >= ARRAY_SIZE(status_map)); return status_map[status]; } diff --git a/trunk/fs/ocfs2/super.c b/trunk/fs/ocfs2/super.c index 7efb349fb9bd..b0ee0fdf799a 100644 --- a/trunk/fs/ocfs2/super.c +++ b/trunk/fs/ocfs2/super.c @@ -777,6 +777,7 @@ static int ocfs2_sb_probe(struct super_block *sb, } di = (struct ocfs2_dinode *) (*bh)->b_data; memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats)); + spin_lock_init(&stats->b_lock); status = ocfs2_verify_volume(di, *bh, blksize, stats); if (status >= 0) goto bail; @@ -1182,7 +1183,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) wake_up(&osb->osb_mount_event); /* Start this when the mount is almost sure of being successful */ - ocfs2_orphan_scan_init(osb); + ocfs2_orphan_scan_start(osb); mlog_exit(status); return status; @@ -1213,14 +1214,27 @@ static int ocfs2_get_sb(struct file_system_type *fs_type, mnt); } +static void ocfs2_kill_sb(struct super_block *sb) +{ + struct ocfs2_super *osb = OCFS2_SB(sb); + + /* Prevent further queueing of inode drop events */ + spin_lock(&dentry_list_lock); + ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED); + spin_unlock(&dentry_list_lock); + /* Wait for work to finish and/or remove it */ + cancel_work_sync(&osb->dentry_lock_work); + + kill_block_super(sb); +} + static struct file_system_type ocfs2_fs_type = { .owner = THIS_MODULE, .name = "ocfs2", .get_sb = ocfs2_get_sb, /* is this called when we mount * the fs? */ - .kill_sb = kill_block_super, /* set to the generic one - * right now, but do we - * need to change that? */ + .kill_sb = ocfs2_kill_sb, + .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, .next = NULL }; @@ -1819,6 +1833,12 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) debugfs_remove(osb->osb_ctxt); + /* + * Flush inode dropping work queue so that deletes are + * performed while the filesystem is still working + */ + ocfs2_drop_all_dl_inodes(osb); + /* Orphan scan should be stopped as early as possible */ ocfs2_orphan_scan_stop(osb); @@ -1981,6 +2001,8 @@ static int ocfs2_initialize_super(struct super_block *sb, snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); + ocfs2_orphan_scan_init(osb); + status = ocfs2_recovery_init(osb); if (status) { mlog(ML_ERROR, "Unable to initialize recovery state\n"); diff --git a/trunk/fs/ocfs2/xattr.c b/trunk/fs/ocfs2/xattr.c index ba320e250747..d1a27cda984f 100644 --- a/trunk/fs/ocfs2/xattr.c +++ b/trunk/fs/ocfs2/xattr.c @@ -1052,7 +1052,8 @@ static int ocfs2_xattr_block_get(struct inode *inode, struct ocfs2_xattr_block *xb; struct ocfs2_xattr_value_root *xv; size_t size; - int ret = -ENODATA, name_offset, name_len, block_off, i; + int ret = -ENODATA, name_offset, name_len, i; + int uninitialized_var(block_off); xs->bucket = ocfs2_xattr_bucket_new(inode); if (!xs->bucket) { diff --git a/trunk/fs/proc/base.c b/trunk/fs/proc/base.c index 3ce5ae9e3d2d..175db258942f 100644 --- a/trunk/fs/proc/base.c +++ b/trunk/fs/proc/base.c @@ -234,23 +234,20 @@ static int check_mem_permission(struct task_struct *task) struct mm_struct *mm_for_maps(struct task_struct *task) { - struct mm_struct *mm = get_task_mm(task); - if (!mm) + struct mm_struct *mm; + + if (mutex_lock_killable(&task->cred_guard_mutex)) return NULL; - down_read(&mm->mmap_sem); - task_lock(task); - if (task->mm != mm) - goto out; - if (task->mm != current->mm && - __ptrace_may_access(task, PTRACE_MODE_READ) < 0) - goto out; - task_unlock(task); + + mm = get_task_mm(task); + if (mm && mm != current->mm && + !ptrace_may_access(task, PTRACE_MODE_READ)) { + mmput(mm); + mm = NULL; + } + mutex_unlock(&task->cred_guard_mutex); + return mm; -out: - task_unlock(task); - up_read(&mm->mmap_sem); - mmput(mm); - return NULL; } static int proc_pid_cmdline(struct task_struct *task, char * buffer) diff --git a/trunk/fs/proc/task_mmu.c b/trunk/fs/proc/task_mmu.c index 6f61b7cc32e0..9bd8be1d235c 100644 --- a/trunk/fs/proc/task_mmu.c +++ b/trunk/fs/proc/task_mmu.c @@ -119,6 +119,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) mm = mm_for_maps(priv->task); if (!mm) return NULL; + down_read(&mm->mmap_sem); tail_vma = get_gate_vma(priv->task); priv->tail_vma = tail_vma; diff --git a/trunk/fs/proc/task_nommu.c b/trunk/fs/proc/task_nommu.c index 64a72e2e7650..8f5c05d3dbd3 100644 --- a/trunk/fs/proc/task_nommu.c +++ b/trunk/fs/proc/task_nommu.c @@ -189,6 +189,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) priv->task = NULL; return NULL; } + down_read(&mm->mmap_sem); /* start from the Nth VMA */ for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) diff --git a/trunk/fs/xfs/linux-2.6/xfs_buf.c b/trunk/fs/xfs/linux-2.6/xfs_buf.c index 0c93c7ef3d18..965df1227d64 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_buf.c +++ b/trunk/fs/xfs/linux-2.6/xfs_buf.c @@ -770,7 +770,7 @@ xfs_buf_associate_memory( bp->b_pages = NULL; bp->b_addr = mem; - rval = _xfs_buf_get_pages(bp, page_count, 0); + rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK); if (rval) return rval; diff --git a/trunk/fs/xfs/xfs_attr.c b/trunk/fs/xfs/xfs_attr.c index db15feb906ff..4ece1906bd41 100644 --- a/trunk/fs/xfs/xfs_attr.c +++ b/trunk/fs/xfs/xfs_attr.c @@ -2010,7 +2010,9 @@ xfs_attr_rmtval_get(xfs_da_args_t *args) dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno, - blkcnt, XFS_BUF_LOCK, &bp); + blkcnt, + XFS_BUF_LOCK | XBF_DONT_BLOCK, + &bp); if (error) return(error); @@ -2141,8 +2143,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); - bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno, - blkcnt, XFS_BUF_LOCK); + bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno, blkcnt, + XFS_BUF_LOCK | XBF_DONT_BLOCK); ASSERT(bp); ASSERT(!XFS_BUF_GETERROR(bp)); diff --git a/trunk/fs/xfs/xfs_bmap.c b/trunk/fs/xfs/xfs_bmap.c index 7928b9983c1d..8ee5b5a76a2a 100644 --- a/trunk/fs/xfs/xfs_bmap.c +++ b/trunk/fs/xfs/xfs_bmap.c @@ -6009,7 +6009,7 @@ xfs_getbmap( */ error = ENOMEM; subnex = 16; - map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL); + map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS); if (!map) goto out_unlock_ilock; diff --git a/trunk/fs/xfs/xfs_btree.c b/trunk/fs/xfs/xfs_btree.c index e9df99574829..26717388acf5 100644 --- a/trunk/fs/xfs/xfs_btree.c +++ b/trunk/fs/xfs/xfs_btree.c @@ -120,8 +120,8 @@ xfs_btree_check_sblock( XFS_RANDOM_BTREE_CHECK_SBLOCK))) { if (bp) xfs_buftrace("SBTREE ERROR", bp); - XFS_ERROR_REPORT("xfs_btree_check_sblock", XFS_ERRLEVEL_LOW, - cur->bc_mp); + XFS_CORRUPTION_ERROR("xfs_btree_check_sblock", + XFS_ERRLEVEL_LOW, cur->bc_mp, block); return XFS_ERROR(EFSCORRUPTED); } return 0; diff --git a/trunk/fs/xfs/xfs_da_btree.c b/trunk/fs/xfs/xfs_da_btree.c index 9ff6e57a5075..2847bbc1c534 100644 --- a/trunk/fs/xfs/xfs_da_btree.c +++ b/trunk/fs/xfs/xfs_da_btree.c @@ -2201,7 +2201,7 @@ kmem_zone_t *xfs_dabuf_zone; /* dabuf zone */ xfs_da_state_t * xfs_da_state_alloc(void) { - return kmem_zone_zalloc(xfs_da_state_zone, KM_SLEEP); + return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS); } /* @@ -2261,9 +2261,9 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra) int off; if (nbuf == 1) - dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_SLEEP); + dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_NOFS); else - dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_SLEEP); + dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS); dabuf->dirty = 0; #ifdef XFS_DABUF_DEBUG dabuf->ra = ra; diff --git a/trunk/fs/xfs/xfs_dir2.c b/trunk/fs/xfs/xfs_dir2.c index c657bec6d951..bb1d58eb3982 100644 --- a/trunk/fs/xfs/xfs_dir2.c +++ b/trunk/fs/xfs/xfs_dir2.c @@ -256,7 +256,7 @@ xfs_dir_cilookup_result( !(args->op_flags & XFS_DA_OP_CILOOKUP)) return EEXIST; - args->value = kmem_alloc(len, KM_MAYFAIL); + args->value = kmem_alloc(len, KM_NOFS | KM_MAYFAIL); if (!args->value) return ENOMEM; diff --git a/trunk/fs/xfs/xfs_fsops.c b/trunk/fs/xfs/xfs_fsops.c index cbd451bb4848..2d0b3e1da9e6 100644 --- a/trunk/fs/xfs/xfs_fsops.c +++ b/trunk/fs/xfs/xfs_fsops.c @@ -167,17 +167,25 @@ xfs_growfs_data_private( new = nb - mp->m_sb.sb_dblocks; oagcount = mp->m_sb.sb_agcount; if (nagcount > oagcount) { + void *new_perag, *old_perag; + xfs_filestream_flush(mp); + + new_perag = kmem_zalloc(sizeof(xfs_perag_t) * nagcount, + KM_MAYFAIL); + if (!new_perag) + return XFS_ERROR(ENOMEM); + down_write(&mp->m_peraglock); - mp->m_perag = kmem_realloc(mp->m_perag, - sizeof(xfs_perag_t) * nagcount, - sizeof(xfs_perag_t) * oagcount, - KM_SLEEP); - memset(&mp->m_perag[oagcount], 0, - (nagcount - oagcount) * sizeof(xfs_perag_t)); + memcpy(new_perag, mp->m_perag, sizeof(xfs_perag_t) * oagcount); + old_perag = mp->m_perag; + mp->m_perag = new_perag; + mp->m_flags |= XFS_MOUNT_32BITINODES; nagimax = xfs_initialize_perag(mp, nagcount); up_write(&mp->m_peraglock); + + kmem_free(old_perag); } tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS); tp->t_flags |= XFS_TRANS_RESERVE; diff --git a/trunk/fs/xfs/xfs_inode.c b/trunk/fs/xfs/xfs_inode.c index 1f22d65fed0a..da428b3fe0f5 100644 --- a/trunk/fs/xfs/xfs_inode.c +++ b/trunk/fs/xfs/xfs_inode.c @@ -343,6 +343,16 @@ xfs_iformat( return XFS_ERROR(EFSCORRUPTED); } + if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && + !ip->i_mount->m_rtdev_targp)) { + xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, + "corrupt dinode %Lu, has realtime flag set.", + ip->i_ino); + XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", + XFS_ERRLEVEL_LOW, ip->i_mount, dip); + return XFS_ERROR(EFSCORRUPTED); + } + switch (ip->i_d.di_mode & S_IFMT) { case S_IFIFO: case S_IFCHR: diff --git a/trunk/fs/xfs/xfs_log.c b/trunk/fs/xfs/xfs_log.c index 3750f04ede0b..9dbdff3ea484 100644 --- a/trunk/fs/xfs/xfs_log.c +++ b/trunk/fs/xfs/xfs_log.c @@ -3180,7 +3180,7 @@ xlog_state_sync(xlog_t *log, STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) { - ASSERT(spin_is_locked(&log->l_icloglock)); + assert_spin_locked(&log->l_icloglock); if (iclog->ic_state == XLOG_STATE_ACTIVE) { xlog_state_switch_iclogs(log, iclog, 0); diff --git a/trunk/fs/xfs/xfs_vnodeops.c b/trunk/fs/xfs/xfs_vnodeops.c index c4eca5ed5dab..492d75bae2bf 100644 --- a/trunk/fs/xfs/xfs_vnodeops.c +++ b/trunk/fs/xfs/xfs_vnodeops.c @@ -538,7 +538,9 @@ xfs_readlink_bmap( d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); - bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0); + bp = xfs_buf_read_flags(mp->m_ddev_targp, d, BTOBB(byte_cnt), + XBF_LOCK | XBF_MAPPED | + XBF_DONT_BLOCK); error = XFS_BUF_GETERROR(bp); if (error) { xfs_ioerror_alert("xfs_readlink", diff --git a/trunk/include/linux/ftrace_event.h b/trunk/include/linux/ftrace_event.h index d7cd193c2277..a81170de7f6b 100644 --- a/trunk/include/linux/ftrace_event.h +++ b/trunk/include/linux/ftrace_event.h @@ -89,7 +89,9 @@ enum print_line_t { TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */ }; - +void tracing_generic_entry_update(struct trace_entry *entry, + unsigned long flags, + int pc); struct ring_buffer_event * trace_current_buffer_lock_reserve(int type, unsigned long len, unsigned long flags, int pc); diff --git a/trunk/include/linux/kvm_host.h b/trunk/include/linux/kvm_host.h index 16713dc672e4..3060bdc35ffe 100644 --- a/trunk/include/linux/kvm_host.h +++ b/trunk/include/linux/kvm_host.h @@ -110,6 +110,7 @@ struct kvm_memory_slot { struct kvm_kernel_irq_routing_entry { u32 gsi; + u32 type; int (*set)(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int level); union { diff --git a/trunk/include/linux/nfs_fs.h b/trunk/include/linux/nfs_fs.h index fdffb413b192..f6b90240dd41 100644 --- a/trunk/include/linux/nfs_fs.h +++ b/trunk/include/linux/nfs_fs.h @@ -473,7 +473,6 @@ extern int nfs_writepages(struct address_space *, struct writeback_control *); extern int nfs_flush_incompatible(struct file *file, struct page *page); extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); -extern void nfs_writedata_release(void *); /* * Try to write back everything synchronously (but check the @@ -488,7 +487,6 @@ extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); extern int nfs_commit_inode(struct inode *, int); extern struct nfs_write_data *nfs_commitdata_alloc(void); extern void nfs_commit_free(struct nfs_write_data *wdata); -extern void nfs_commitdata_release(void *wdata); #else static inline int nfs_commit_inode(struct inode *inode, int how) @@ -507,6 +505,7 @@ nfs_have_writebacks(struct inode *inode) * Allocate nfs_write_data structures */ extern struct nfs_write_data *nfs_writedata_alloc(unsigned int npages); +extern void nfs_writedata_free(struct nfs_write_data *); /* * linux/fs/nfs/read.c @@ -515,7 +514,6 @@ extern int nfs_readpage(struct file *, struct page *); extern int nfs_readpages(struct file *, struct address_space *, struct list_head *, unsigned); extern int nfs_readpage_result(struct rpc_task *, struct nfs_read_data *); -extern void nfs_readdata_release(void *data); extern int nfs_readpage_async(struct nfs_open_context *, struct inode *, struct page *); @@ -523,6 +521,7 @@ extern int nfs_readpage_async(struct nfs_open_context *, struct inode *, * Allocate nfs_read_data structures */ extern struct nfs_read_data *nfs_readdata_alloc(unsigned int npages); +extern void nfs_readdata_free(struct nfs_read_data *); /* * linux/fs/nfs3proc.c diff --git a/trunk/include/linux/nfs_fs_sb.h b/trunk/include/linux/nfs_fs_sb.h index 320569eabe3b..19fe15d12042 100644 --- a/trunk/include/linux/nfs_fs_sb.h +++ b/trunk/include/linux/nfs_fs_sb.h @@ -167,15 +167,6 @@ struct nfs_server { #define NFS_CAP_SYMLINKS (1U << 2) #define NFS_CAP_ACLS (1U << 3) #define NFS_CAP_ATOMIC_OPEN (1U << 4) -#define NFS_CAP_CHANGE_ATTR (1U << 5) -#define NFS_CAP_FILEID (1U << 6) -#define NFS_CAP_MODE (1U << 7) -#define NFS_CAP_NLINK (1U << 8) -#define NFS_CAP_OWNER (1U << 9) -#define NFS_CAP_OWNER_GROUP (1U << 10) -#define NFS_CAP_ATIME (1U << 11) -#define NFS_CAP_CTIME (1U << 12) -#define NFS_CAP_MTIME (1U << 13) /* maximum number of slots to use */ diff --git a/trunk/include/linux/perf_counter.h b/trunk/include/linux/perf_counter.h index e604e6ef72dd..b53f7006cc4e 100644 --- a/trunk/include/linux/perf_counter.h +++ b/trunk/include/linux/perf_counter.h @@ -115,27 +115,44 @@ enum perf_counter_sample_format { PERF_SAMPLE_TID = 1U << 1, PERF_SAMPLE_TIME = 1U << 2, PERF_SAMPLE_ADDR = 1U << 3, - PERF_SAMPLE_GROUP = 1U << 4, + PERF_SAMPLE_READ = 1U << 4, PERF_SAMPLE_CALLCHAIN = 1U << 5, PERF_SAMPLE_ID = 1U << 6, PERF_SAMPLE_CPU = 1U << 7, PERF_SAMPLE_PERIOD = 1U << 8, PERF_SAMPLE_STREAM_ID = 1U << 9, + PERF_SAMPLE_RAW = 1U << 10, - PERF_SAMPLE_MAX = 1U << 10, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ }; /* - * Bits that can be set in attr.read_format to request that - * reads on the counter should return the indicated quantities, - * in increasing order of bit value, after the counter value. + * The format of the data returned by read() on a perf counter fd, + * as specified by attr.read_format: + * + * struct read_format { + * { u64 value; + * { u64 time_enabled; } && PERF_FORMAT_ENABLED + * { u64 time_running; } && PERF_FORMAT_RUNNING + * { u64 id; } && PERF_FORMAT_ID + * } && !PERF_FORMAT_GROUP + * + * { u64 nr; + * { u64 time_enabled; } && PERF_FORMAT_ENABLED + * { u64 time_running; } && PERF_FORMAT_RUNNING + * { u64 value; + * { u64 id; } && PERF_FORMAT_ID + * } cntr[nr]; + * } && PERF_FORMAT_GROUP + * }; */ enum perf_counter_read_format { PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, PERF_FORMAT_ID = 1U << 2, + PERF_FORMAT_GROUP = 1U << 3, - PERF_FORMAT_MAX = 1U << 3, /* non-ABI */ + PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ }; #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ @@ -342,10 +359,8 @@ enum perf_event_type { * struct { * struct perf_event_header header; * u32 pid, tid; - * u64 value; - * { u64 time_enabled; } && PERF_FORMAT_ENABLED - * { u64 time_running; } && PERF_FORMAT_RUNNING - * { u64 parent_id; } && PERF_FORMAT_ID + * + * struct read_format values; * }; */ PERF_EVENT_READ = 8, @@ -363,11 +378,24 @@ enum perf_event_type { * { u32 cpu, res; } && PERF_SAMPLE_CPU * { u64 period; } && PERF_SAMPLE_PERIOD * - * { u64 nr; - * { u64 id, val; } cnt[nr]; } && PERF_SAMPLE_GROUP + * { struct read_format values; } && PERF_SAMPLE_READ * * { u64 nr, * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN + * + * # + * # The RAW record below is opaque data wrt the ABI + * # + * # That is, the ABI doesn't make any promises wrt to + * # the stability of its content, it may vary depending + * # on event, hardware, kernel version and phase of + * # the moon. + * # + * # In other words, PERF_SAMPLE_RAW contents are not an ABI. + * # + * + * { u32 size; + * char data[size];}&& PERF_SAMPLE_RAW * }; */ PERF_EVENT_SAMPLE = 9, @@ -413,6 +441,11 @@ struct perf_callchain_entry { __u64 ip[PERF_MAX_STACK_DEPTH]; }; +struct perf_raw_record { + u32 size; + void *data; +}; + struct task_struct; /** @@ -681,10 +714,13 @@ struct perf_sample_data { struct pt_regs *regs; u64 addr; u64 period; + struct perf_raw_record *raw; }; extern int perf_counter_overflow(struct perf_counter *counter, int nmi, struct perf_sample_data *data); +extern void perf_counter_output(struct perf_counter *counter, int nmi, + struct perf_sample_data *data); /* * Return 1 for a software counter, 0 for a hardware counter diff --git a/trunk/include/linux/sunrpc/cache.h b/trunk/include/linux/sunrpc/cache.h index 6f52b4d7c447..2d8b211b9324 100644 --- a/trunk/include/linux/sunrpc/cache.h +++ b/trunk/include/linux/sunrpc/cache.h @@ -59,15 +59,6 @@ struct cache_head { #define CACHE_NEW_EXPIRY 120 /* keep new things pending confirmation for 120 seconds */ -struct cache_detail_procfs { - struct proc_dir_entry *proc_ent; - struct proc_dir_entry *flush_ent, *channel_ent, *content_ent; -}; - -struct cache_detail_pipefs { - struct dentry *dir; -}; - struct cache_detail { struct module * owner; int hash_size; @@ -79,17 +70,15 @@ struct cache_detail { char *name; void (*cache_put)(struct kref *); - int (*cache_upcall)(struct cache_detail *, - struct cache_head *); - + void (*cache_request)(struct cache_detail *cd, + struct cache_head *h, + char **bpp, int *blen); int (*cache_parse)(struct cache_detail *, char *buf, int len); int (*cache_show)(struct seq_file *m, struct cache_detail *cd, struct cache_head *h); - void (*warn_no_listener)(struct cache_detail *cd, - int has_died); struct cache_head * (*alloc)(void); int (*match)(struct cache_head *orig, struct cache_head *new); @@ -107,15 +96,13 @@ struct cache_detail { /* fields for communication over channel */ struct list_head queue; + struct proc_dir_entry *proc_ent; + struct proc_dir_entry *flush_ent, *channel_ent, *content_ent; atomic_t readers; /* how many time is /chennel open */ time_t last_close; /* if no readers, when did last close */ time_t last_warn; /* when we last warned about no readers */ - - union { - struct cache_detail_procfs procfs; - struct cache_detail_pipefs pipefs; - } u; + void (*warn_no_listener)(struct cache_detail *cd); }; @@ -140,10 +127,6 @@ struct cache_deferred_req { }; -extern const struct file_operations cache_file_operations_pipefs; -extern const struct file_operations content_file_operations_pipefs; -extern const struct file_operations cache_flush_operations_pipefs; - extern struct cache_head * sunrpc_cache_lookup(struct cache_detail *detail, struct cache_head *key, int hash); @@ -151,13 +134,6 @@ extern struct cache_head * sunrpc_cache_update(struct cache_detail *detail, struct cache_head *new, struct cache_head *old, int hash); -extern int -sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, - void (*cache_request)(struct cache_detail *, - struct cache_head *, - char **, - int *)); - extern void cache_clean_deferred(void *owner); @@ -195,10 +171,6 @@ extern void cache_purge(struct cache_detail *detail); extern int cache_register(struct cache_detail *cd); extern void cache_unregister(struct cache_detail *cd); -extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *, - mode_t, struct cache_detail *); -extern void sunrpc_cache_unregister_pipefs(struct cache_detail *); - extern void qword_add(char **bpp, int *lp, char *str); extern void qword_addhex(char **bpp, int *lp, char *buf, int blen); extern int qword_get(char **bpp, char *dest, int bufsize); diff --git a/trunk/include/linux/sunrpc/clnt.h b/trunk/include/linux/sunrpc/clnt.h index ab3f6e90caa5..37881f1a0bd7 100644 --- a/trunk/include/linux/sunrpc/clnt.h +++ b/trunk/include/linux/sunrpc/clnt.h @@ -9,10 +9,6 @@ #ifndef _LINUX_SUNRPC_CLNT_H #define _LINUX_SUNRPC_CLNT_H -#include -#include -#include - #include #include #include @@ -21,7 +17,6 @@ #include #include #include -#include struct rpc_inode; @@ -55,7 +50,9 @@ struct rpc_clnt { int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME]; - struct path cl_path; + char cl_pathname[30];/* Path in rpc_pipe_fs */ + struct vfsmount * cl_vfsmnt; + struct dentry * cl_dentry; /* inode */ struct rpc_clnt * cl_parent; /* Points to parent of clones */ struct rpc_rtt cl_rtt_default; struct rpc_timeout cl_timeout_default; @@ -154,39 +151,5 @@ void rpc_force_rebind(struct rpc_clnt *); size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); const char *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); -size_t rpc_ntop(const struct sockaddr *, char *, const size_t); -size_t rpc_pton(const char *, const size_t, - struct sockaddr *, const size_t); -char * rpc_sockaddr2uaddr(const struct sockaddr *); -size_t rpc_uaddr2sockaddr(const char *, const size_t, - struct sockaddr *, const size_t); - -static inline unsigned short rpc_get_port(const struct sockaddr *sap) -{ - switch (sap->sa_family) { - case AF_INET: - return ntohs(((struct sockaddr_in *)sap)->sin_port); - case AF_INET6: - return ntohs(((struct sockaddr_in6 *)sap)->sin6_port); - } - return 0; -} - -static inline void rpc_set_port(struct sockaddr *sap, - const unsigned short port) -{ - switch (sap->sa_family) { - case AF_INET: - ((struct sockaddr_in *)sap)->sin_port = htons(port); - break; - case AF_INET6: - ((struct sockaddr_in6 *)sap)->sin6_port = htons(port); - break; - } -} - -#define IPV6_SCOPE_DELIMITER '%' -#define IPV6_SCOPE_ID_LEN sizeof("%nnnnnnnnnn") - #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ diff --git a/trunk/include/linux/sunrpc/msg_prot.h b/trunk/include/linux/sunrpc/msg_prot.h index 77e624883393..70df4f1d8847 100644 --- a/trunk/include/linux/sunrpc/msg_prot.h +++ b/trunk/include/linux/sunrpc/msg_prot.h @@ -189,22 +189,7 @@ typedef __be32 rpc_fraghdr; * Additionally, the two alternative forms specified in Section 2.2 of * [RFC2373] are also acceptable. */ - -#include - -/* Maximum size of the port number part of a universal address */ -#define RPCBIND_MAXUADDRPLEN sizeof(".255.255") - -/* Maximum size of an IPv4 universal address */ -#define RPCBIND_MAXUADDR4LEN \ - (INET_ADDRSTRLEN + RPCBIND_MAXUADDRPLEN) - -/* Maximum size of an IPv6 universal address */ -#define RPCBIND_MAXUADDR6LEN \ - (INET6_ADDRSTRLEN + RPCBIND_MAXUADDRPLEN) - -/* Assume INET6_ADDRSTRLEN will always be larger than INET_ADDRSTRLEN... */ -#define RPCBIND_MAXUADDRLEN RPCBIND_MAXUADDR6LEN +#define RPCBIND_MAXUADDRLEN (56u) #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_MSGPROT_H_ */ diff --git a/trunk/include/linux/sunrpc/rpc_pipe_fs.h b/trunk/include/linux/sunrpc/rpc_pipe_fs.h index a92571a34556..cea764c2359f 100644 --- a/trunk/include/linux/sunrpc/rpc_pipe_fs.h +++ b/trunk/include/linux/sunrpc/rpc_pipe_fs.h @@ -32,8 +32,8 @@ struct rpc_inode { wait_queue_head_t waitq; #define RPC_PIPE_WAIT_FOR_OPEN 1 int flags; + struct rpc_pipe_ops *ops; struct delayed_work queue_timeout; - const struct rpc_pipe_ops *ops; }; static inline struct rpc_inode * @@ -44,19 +44,9 @@ RPC_I(struct inode *inode) extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *); -struct rpc_clnt; -extern struct dentry *rpc_create_client_dir(struct dentry *, struct qstr *, struct rpc_clnt *); -extern int rpc_remove_client_dir(struct dentry *); - -struct cache_detail; -extern struct dentry *rpc_create_cache_dir(struct dentry *, - struct qstr *, - mode_t umode, - struct cache_detail *); -extern void rpc_remove_cache_dir(struct dentry *); - -extern struct dentry *rpc_mkpipe(struct dentry *, const char *, void *, - const struct rpc_pipe_ops *, int flags); +extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *); +extern int rpc_rmdir(struct dentry *); +extern struct dentry *rpc_mkpipe(struct dentry *, const char *, void *, struct rpc_pipe_ops *, int flags); extern int rpc_unlink(struct dentry *); extern struct vfsmount *rpc_get_mount(void); extern void rpc_put_mount(void); diff --git a/trunk/include/linux/sunrpc/xdr.h b/trunk/include/linux/sunrpc/xdr.h index b99c625fddfe..f94bbdc75c4b 100644 --- a/trunk/include/linux/sunrpc/xdr.h +++ b/trunk/include/linux/sunrpc/xdr.h @@ -117,9 +117,8 @@ static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int le static inline __be32 * xdr_encode_hyper(__be32 *p, __u64 val) { - *p++ = htonl(val >> 32); - *p++ = htonl(val & 0xFFFFFFFF); - return p; + *(__be64 *)p = cpu_to_be64(val); + return p + 2; } static inline __be32 * diff --git a/trunk/include/linux/sunrpc/xprt.h b/trunk/include/linux/sunrpc/xprt.h index c090df442572..1175d58efc2e 100644 --- a/trunk/include/linux/sunrpc/xprt.h +++ b/trunk/include/linux/sunrpc/xprt.h @@ -38,8 +38,10 @@ enum rpc_display_format_t { RPC_DISPLAY_ADDR = 0, RPC_DISPLAY_PORT, RPC_DISPLAY_PROTO, + RPC_DISPLAY_ALL, RPC_DISPLAY_HEX_ADDR, RPC_DISPLAY_HEX_PORT, + RPC_DISPLAY_UNIVERSAL_ADDR, RPC_DISPLAY_NETID, RPC_DISPLAY_MAX, }; diff --git a/trunk/include/linux/wait.h b/trunk/include/linux/wait.h index 6788e1a4d4ca..cf3c2f5dba51 100644 --- a/trunk/include/linux/wait.h +++ b/trunk/include/linux/wait.h @@ -77,7 +77,14 @@ struct task_struct; #define __WAIT_BIT_KEY_INITIALIZER(word, bit) \ { .flags = word, .bit_nr = bit, } -extern void init_waitqueue_head(wait_queue_head_t *q); +extern void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *); + +#define init_waitqueue_head(q) \ + do { \ + static struct lock_class_key __key; \ + \ + __init_waitqueue_head((q), &__key); \ + } while (0) #ifdef CONFIG_LOCKDEP # define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \ diff --git a/trunk/include/trace/ftrace.h b/trunk/include/trace/ftrace.h index 1867553c61e5..f64fbaae781a 100644 --- a/trunk/include/trace/ftrace.h +++ b/trunk/include/trace/ftrace.h @@ -144,6 +144,9 @@ #undef TP_fast_assign #define TP_fast_assign(args...) args +#undef TP_perf_assign +#define TP_perf_assign(args...) + #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, func, print) \ static int \ @@ -345,6 +348,56 @@ static inline int ftrace_get_offsets_##call( \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +#ifdef CONFIG_EVENT_PROFILE + +/* + * Generate the functions needed for tracepoint perf_counter support. + * + * NOTE: The insertion profile callback (ftrace_profile_) is defined later + * + * static int ftrace_profile_enable_(struct ftrace_event_call *event_call) + * { + * int ret = 0; + * + * if (!atomic_inc_return(&event_call->profile_count)) + * ret = register_trace_(ftrace_profile_); + * + * return ret; + * } + * + * static void ftrace_profile_disable_(struct ftrace_event_call *event_call) + * { + * if (atomic_add_negative(-1, &event->call->profile_count)) + * unregister_trace_(ftrace_profile_); + * } + * + */ + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ + \ +static void ftrace_profile_##call(proto); \ + \ +static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \ +{ \ + int ret = 0; \ + \ + if (!atomic_inc_return(&event_call->profile_count)) \ + ret = register_trace_##call(ftrace_profile_##call); \ + \ + return ret; \ +} \ + \ +static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ +{ \ + if (atomic_add_negative(-1, &event_call->profile_count)) \ + unregister_trace_##call(ftrace_profile_##call); \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#endif + /* * Stage 4 of the trace events. * @@ -447,28 +500,6 @@ static inline int ftrace_get_offsets_##call( \ #define TP_FMT(fmt, args...) fmt "\n", ##args #ifdef CONFIG_EVENT_PROFILE -#define _TRACE_PROFILE(call, proto, args) \ -static void ftrace_profile_##call(proto) \ -{ \ - extern void perf_tpcounter_event(int); \ - perf_tpcounter_event(event_##call.id); \ -} \ - \ -static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \ -{ \ - int ret = 0; \ - \ - if (!atomic_inc_return(&event_call->profile_count)) \ - ret = register_trace_##call(ftrace_profile_##call); \ - \ - return ret; \ -} \ - \ -static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ -{ \ - if (atomic_add_negative(-1, &event_call->profile_count)) \ - unregister_trace_##call(ftrace_profile_##call); \ -} #define _TRACE_PROFILE_INIT(call) \ .profile_count = ATOMIC_INIT(-1), \ @@ -476,7 +507,6 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ .profile_disable = ftrace_profile_disable_##call, #else -#define _TRACE_PROFILE(call, proto, args) #define _TRACE_PROFILE_INIT(call) #endif @@ -502,7 +532,6 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ -_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \ \ static struct ftrace_event_call event_##call; \ \ @@ -586,6 +615,110 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#undef _TRACE_PROFILE +/* + * Define the insertion callback to profile events + * + * The job is very similar to ftrace_raw_event_ except that we don't + * insert in the ring buffer but in a perf counter. + * + * static void ftrace_profile_(proto) + * { + * struct ftrace_data_offsets_ __maybe_unused __data_offsets; + * struct ftrace_event_call *event_call = &event_; + * extern void perf_tpcounter_event(int, u64, u64, void *, int); + * struct ftrace_raw_##call *entry; + * u64 __addr = 0, __count = 1; + * unsigned long irq_flags; + * int __entry_size; + * int __data_size; + * int pc; + * + * local_save_flags(irq_flags); + * pc = preempt_count(); + * + * __data_size = ftrace_get_offsets_(&__data_offsets, args); + * + * // Below we want to get the aligned size by taking into account + * // the u32 field that will later store the buffer size + * __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32), + * sizeof(u64)); + * __entry_size -= sizeof(u32); + * + * do { + * char raw_data[__entry_size]; <- allocate our sample in the stack + * struct trace_entry *ent; + * + * zero dead bytes from alignment to avoid stack leak to userspace: + * + * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; + * entry = (struct ftrace_raw_ *)raw_data; + * ent = &entry->ent; + * tracing_generic_entry_update(ent, irq_flags, pc); + * ent->type = event_call->id; + * + * <- do some jobs with dynamic arrays + * + * <- affect our values + * + * perf_tpcounter_event(event_call->id, __addr, __count, entry, + * __entry_size); <- submit them to perf counter + * } while (0); + * + * } + */ + +#ifdef CONFIG_EVENT_PROFILE + +#undef __perf_addr +#define __perf_addr(a) __addr = (a) + +#undef __perf_count +#define __perf_count(c) __count = (c) + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +static void ftrace_profile_##call(proto) \ +{ \ + struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ + struct ftrace_event_call *event_call = &event_##call; \ + extern void perf_tpcounter_event(int, u64, u64, void *, int); \ + struct ftrace_raw_##call *entry; \ + u64 __addr = 0, __count = 1; \ + unsigned long irq_flags; \ + int __entry_size; \ + int __data_size; \ + int pc; \ + \ + local_save_flags(irq_flags); \ + pc = preempt_count(); \ + \ + __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ + __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\ + sizeof(u64)); \ + __entry_size -= sizeof(u32); \ + \ + do { \ + char raw_data[__entry_size]; \ + struct trace_entry *ent; \ + \ + *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ + entry = (struct ftrace_raw_##call *)raw_data; \ + ent = &entry->ent; \ + tracing_generic_entry_update(ent, irq_flags, pc); \ + ent->type = event_call->id; \ + \ + tstruct \ + \ + { assign; } \ + \ + perf_tpcounter_event(event_call->id, __addr, __count, entry,\ + __entry_size); \ + } while (0); \ + \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +#endif /* CONFIG_EVENT_PROFILE */ + #undef _TRACE_PROFILE_INIT diff --git a/trunk/kernel/futex.c b/trunk/kernel/futex.c index 0672ff88f159..e18cfbdc7190 100644 --- a/trunk/kernel/futex.c +++ b/trunk/kernel/futex.c @@ -1010,15 +1010,19 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue * q: the futex_q * key: the key of the requeue target futex + * hb: the hash_bucket of the requeue target futex * * During futex_requeue, with requeue_pi=1, it is possible to acquire the * target futex if it is uncontended or via a lock steal. Set the futex_q key * to the requeue target futex so the waiter can detect the wakeup on the right * futex, but remove it from the hb and NULL the rt_waiter so it can detect - * atomic lock acquisition. Must be called with the q->lock_ptr held. + * atomic lock acquisition. Set the q->lock_ptr to the requeue target hb->lock + * to protect access to the pi_state to fixup the owner later. Must be called + * with both q->lock_ptr and hb->lock held. */ static inline -void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key) +void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, + struct futex_hash_bucket *hb) { drop_futex_key_refs(&q->key); get_futex_key_refs(key); @@ -1030,6 +1034,11 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key) WARN_ON(!q->rt_waiter); q->rt_waiter = NULL; + q->lock_ptr = &hb->lock; +#ifdef CONFIG_DEBUG_PI_LIST + q->list.plist.lock = &hb->lock; +#endif + wake_up_state(q->task, TASK_NORMAL); } @@ -1088,7 +1097,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, set_waiters); if (ret == 1) - requeue_pi_wake_futex(top_waiter, key2); + requeue_pi_wake_futex(top_waiter, key2, hb2); return ret; } @@ -1247,8 +1256,15 @@ static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, if (!match_futex(&this->key, &key1)) continue; - WARN_ON(!requeue_pi && this->rt_waiter); - WARN_ON(requeue_pi && !this->rt_waiter); + /* + * FUTEX_WAIT_REQEUE_PI and FUTEX_CMP_REQUEUE_PI should always + * be paired with each other and no other futex ops. + */ + if ((requeue_pi && !this->rt_waiter) || + (!requeue_pi && this->rt_waiter)) { + ret = -EINVAL; + break; + } /* * Wake nr_wake waiters. For requeue_pi, if we acquired the @@ -1273,7 +1289,7 @@ static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, this->task, 1); if (ret == 1) { /* We got the lock. */ - requeue_pi_wake_futex(this, &key2); + requeue_pi_wake_futex(this, &key2, hb2); continue; } else if (ret) { /* -EDEADLK */ diff --git a/trunk/kernel/futex_compat.c b/trunk/kernel/futex_compat.c index d607a5b9ee29..235716556bf1 100644 --- a/trunk/kernel/futex_compat.c +++ b/trunk/kernel/futex_compat.c @@ -180,7 +180,8 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val, int cmd = op & FUTEX_CMD_MASK; if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || - cmd == FUTEX_WAIT_BITSET)) { + cmd == FUTEX_WAIT_BITSET || + cmd == FUTEX_WAIT_REQUEUE_PI)) { if (get_compat_timespec(&ts, utime)) return -EFAULT; if (!timespec_valid(&ts)) @@ -191,7 +192,8 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val, t = ktime_add_safe(ktime_get(), t); tp = &t; } - if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE) + if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE || + cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) val2 = (int) (unsigned long) utime; return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); diff --git a/trunk/kernel/irq/manage.c b/trunk/kernel/irq/manage.c index 61c679db4687..d222515a5a06 100644 --- a/trunk/kernel/irq/manage.c +++ b/trunk/kernel/irq/manage.c @@ -761,7 +761,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) { struct irq_desc *desc = irq_to_desc(irq); struct irqaction *action, **action_ptr; - struct task_struct *irqthread; unsigned long flags; WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq); @@ -809,9 +808,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) desc->chip->disable(irq); } - irqthread = action->thread; - action->thread = NULL; - spin_unlock_irqrestore(&desc->lock, flags); unregister_handler_proc(irq, action); @@ -819,12 +815,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) /* Make sure it's not being used on another CPU: */ synchronize_irq(irq); - if (irqthread) { - if (!test_bit(IRQTF_DIED, &action->thread_flags)) - kthread_stop(irqthread); - put_task_struct(irqthread); - } - #ifdef CONFIG_DEBUG_SHIRQ /* * It's a shared IRQ -- the driver ought to be prepared for an IRQ @@ -840,6 +830,13 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) local_irq_restore(flags); } #endif + + if (action->thread) { + if (!test_bit(IRQTF_DIED, &action->thread_flags)) + kthread_stop(action->thread); + put_task_struct(action->thread); + } + return action; } diff --git a/trunk/kernel/irq/numa_migrate.c b/trunk/kernel/irq/numa_migrate.c index 2f69bee57bf2..3fd30197da2e 100644 --- a/trunk/kernel/irq/numa_migrate.c +++ b/trunk/kernel/irq/numa_migrate.c @@ -107,8 +107,8 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, struct irq_desc *move_irq_desc(struct irq_desc *desc, int node) { - /* those all static, do move them */ - if (desc->irq < NR_IRQS_LEGACY) + /* those static or target node is -1, do not move them */ + if (desc->irq < NR_IRQS_LEGACY || node == -1) return desc; if (desc->node != node) diff --git a/trunk/kernel/lockdep_proc.c b/trunk/kernel/lockdep_proc.c index d7135aa2d2c4..e94caa666dba 100644 --- a/trunk/kernel/lockdep_proc.c +++ b/trunk/kernel/lockdep_proc.c @@ -758,7 +758,8 @@ static int __init lockdep_proc_init(void) &proc_lockdep_stats_operations); #ifdef CONFIG_LOCK_STAT - proc_create("lock_stat", S_IRUSR, NULL, &proc_lock_stat_operations); + proc_create("lock_stat", S_IRUSR | S_IWUSR, NULL, + &proc_lock_stat_operations); #endif return 0; diff --git a/trunk/kernel/perf_counter.c b/trunk/kernel/perf_counter.c index 673c1aaf7332..534e20d14d63 100644 --- a/trunk/kernel/perf_counter.c +++ b/trunk/kernel/perf_counter.c @@ -88,6 +88,7 @@ void __weak hw_perf_disable(void) { barrier(); } void __weak hw_perf_enable(void) { barrier(); } void __weak hw_perf_counter_setup(int cpu) { barrier(); } +void __weak hw_perf_counter_setup_online(int cpu) { barrier(); } int __weak hw_perf_group_sched_in(struct perf_counter *group_leader, @@ -306,6 +307,10 @@ counter_sched_out(struct perf_counter *counter, return; counter->state = PERF_COUNTER_STATE_INACTIVE; + if (counter->pending_disable) { + counter->pending_disable = 0; + counter->state = PERF_COUNTER_STATE_OFF; + } counter->tstamp_stopped = ctx->time; counter->pmu->disable(counter); counter->oncpu = -1; @@ -1691,7 +1696,32 @@ static int perf_release(struct inode *inode, struct file *file) return 0; } -static u64 perf_counter_read_tree(struct perf_counter *counter) +static int perf_counter_read_size(struct perf_counter *counter) +{ + int entry = sizeof(u64); /* value */ + int size = 0; + int nr = 1; + + if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + size += sizeof(u64); + + if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + size += sizeof(u64); + + if (counter->attr.read_format & PERF_FORMAT_ID) + entry += sizeof(u64); + + if (counter->attr.read_format & PERF_FORMAT_GROUP) { + nr += counter->group_leader->nr_siblings; + size += sizeof(u64); + } + + size += entry * nr; + + return size; +} + +static u64 perf_counter_read_value(struct perf_counter *counter) { struct perf_counter *child; u64 total = 0; @@ -1703,14 +1733,96 @@ static u64 perf_counter_read_tree(struct perf_counter *counter) return total; } +static int perf_counter_read_entry(struct perf_counter *counter, + u64 read_format, char __user *buf) +{ + int n = 0, count = 0; + u64 values[2]; + + values[n++] = perf_counter_read_value(counter); + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_counter_id(counter); + + count = n * sizeof(u64); + + if (copy_to_user(buf, values, count)) + return -EFAULT; + + return count; +} + +static int perf_counter_read_group(struct perf_counter *counter, + u64 read_format, char __user *buf) +{ + struct perf_counter *leader = counter->group_leader, *sub; + int n = 0, size = 0, err = -EFAULT; + u64 values[3]; + + values[n++] = 1 + leader->nr_siblings; + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { + values[n++] = leader->total_time_enabled + + atomic64_read(&leader->child_total_time_enabled); + } + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { + values[n++] = leader->total_time_running + + atomic64_read(&leader->child_total_time_running); + } + + size = n * sizeof(u64); + + if (copy_to_user(buf, values, size)) + return -EFAULT; + + err = perf_counter_read_entry(leader, read_format, buf + size); + if (err < 0) + return err; + + size += err; + + list_for_each_entry(sub, &leader->sibling_list, list_entry) { + err = perf_counter_read_entry(counter, read_format, + buf + size); + if (err < 0) + return err; + + size += err; + } + + return size; +} + +static int perf_counter_read_one(struct perf_counter *counter, + u64 read_format, char __user *buf) +{ + u64 values[4]; + int n = 0; + + values[n++] = perf_counter_read_value(counter); + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { + values[n++] = counter->total_time_enabled + + atomic64_read(&counter->child_total_time_enabled); + } + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { + values[n++] = counter->total_time_running + + atomic64_read(&counter->child_total_time_running); + } + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_counter_id(counter); + + if (copy_to_user(buf, values, n * sizeof(u64))) + return -EFAULT; + + return n * sizeof(u64); +} + /* * Read the performance counter - simple non blocking version for now */ static ssize_t perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) { - u64 values[4]; - int n; + u64 read_format = counter->attr.read_format; + int ret; /* * Return end-of-file for a read on a counter that is in @@ -1720,28 +1832,18 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) if (counter->state == PERF_COUNTER_STATE_ERROR) return 0; + if (count < perf_counter_read_size(counter)) + return -ENOSPC; + WARN_ON_ONCE(counter->ctx->parent_ctx); mutex_lock(&counter->child_mutex); - values[0] = perf_counter_read_tree(counter); - n = 1; - if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) - values[n++] = counter->total_time_enabled + - atomic64_read(&counter->child_total_time_enabled); - if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) - values[n++] = counter->total_time_running + - atomic64_read(&counter->child_total_time_running); - if (counter->attr.read_format & PERF_FORMAT_ID) - values[n++] = primary_counter_id(counter); + if (read_format & PERF_FORMAT_GROUP) + ret = perf_counter_read_group(counter, read_format, buf); + else + ret = perf_counter_read_one(counter, read_format, buf); mutex_unlock(&counter->child_mutex); - if (count < n * sizeof(u64)) - return -EINVAL; - count = n * sizeof(u64); - - if (copy_to_user(buf, values, count)) - return -EFAULT; - - return count; + return ret; } static ssize_t @@ -2245,7 +2347,7 @@ static void perf_pending_counter(struct perf_pending_entry *entry) if (counter->pending_disable) { counter->pending_disable = 0; - perf_counter_disable(counter); + __perf_counter_disable(counter); } if (counter->pending_wakeup) { @@ -2630,7 +2732,80 @@ static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p) return task_pid_nr_ns(p, counter->ns); } -static void perf_counter_output(struct perf_counter *counter, int nmi, +static void perf_output_read_one(struct perf_output_handle *handle, + struct perf_counter *counter) +{ + u64 read_format = counter->attr.read_format; + u64 values[4]; + int n = 0; + + values[n++] = atomic64_read(&counter->count); + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { + values[n++] = counter->total_time_enabled + + atomic64_read(&counter->child_total_time_enabled); + } + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { + values[n++] = counter->total_time_running + + atomic64_read(&counter->child_total_time_running); + } + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_counter_id(counter); + + perf_output_copy(handle, values, n * sizeof(u64)); +} + +/* + * XXX PERF_FORMAT_GROUP vs inherited counters seems difficult. + */ +static void perf_output_read_group(struct perf_output_handle *handle, + struct perf_counter *counter) +{ + struct perf_counter *leader = counter->group_leader, *sub; + u64 read_format = counter->attr.read_format; + u64 values[5]; + int n = 0; + + values[n++] = 1 + leader->nr_siblings; + + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + values[n++] = leader->total_time_enabled; + + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + values[n++] = leader->total_time_running; + + if (leader != counter) + leader->pmu->read(leader); + + values[n++] = atomic64_read(&leader->count); + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_counter_id(leader); + + perf_output_copy(handle, values, n * sizeof(u64)); + + list_for_each_entry(sub, &leader->sibling_list, list_entry) { + n = 0; + + if (sub != counter) + sub->pmu->read(sub); + + values[n++] = atomic64_read(&sub->count); + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_counter_id(sub); + + perf_output_copy(handle, values, n * sizeof(u64)); + } +} + +static void perf_output_read(struct perf_output_handle *handle, + struct perf_counter *counter) +{ + if (counter->attr.read_format & PERF_FORMAT_GROUP) + perf_output_read_group(handle, counter); + else + perf_output_read_one(handle, counter); +} + +void perf_counter_output(struct perf_counter *counter, int nmi, struct perf_sample_data *data) { int ret; @@ -2641,10 +2816,6 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, struct { u32 pid, tid; } tid_entry; - struct { - u64 id; - u64 counter; - } group_entry; struct perf_callchain_entry *callchain = NULL; int callchain_size = 0; u64 time; @@ -2699,10 +2870,8 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, if (sample_type & PERF_SAMPLE_PERIOD) header.size += sizeof(u64); - if (sample_type & PERF_SAMPLE_GROUP) { - header.size += sizeof(u64) + - counter->nr_siblings * sizeof(group_entry); - } + if (sample_type & PERF_SAMPLE_READ) + header.size += perf_counter_read_size(counter); if (sample_type & PERF_SAMPLE_CALLCHAIN) { callchain = perf_callchain(data->regs); @@ -2714,6 +2883,18 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, header.size += sizeof(u64); } + if (sample_type & PERF_SAMPLE_RAW) { + int size = sizeof(u32); + + if (data->raw) + size += data->raw->size; + else + size += sizeof(u32); + + WARN_ON_ONCE(size & (sizeof(u64)-1)); + header.size += size; + } + ret = perf_output_begin(&handle, counter, header.size, nmi, 1); if (ret) return; @@ -2747,26 +2928,8 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, if (sample_type & PERF_SAMPLE_PERIOD) perf_output_put(&handle, data->period); - /* - * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult. - */ - if (sample_type & PERF_SAMPLE_GROUP) { - struct perf_counter *leader, *sub; - u64 nr = counter->nr_siblings; - - perf_output_put(&handle, nr); - - leader = counter->group_leader; - list_for_each_entry(sub, &leader->sibling_list, list_entry) { - if (sub != counter) - sub->pmu->read(sub); - - group_entry.id = primary_counter_id(sub); - group_entry.counter = atomic64_read(&sub->count); - - perf_output_put(&handle, group_entry); - } - } + if (sample_type & PERF_SAMPLE_READ) + perf_output_read(&handle, counter); if (sample_type & PERF_SAMPLE_CALLCHAIN) { if (callchain) @@ -2777,6 +2940,22 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, } } + if (sample_type & PERF_SAMPLE_RAW) { + if (data->raw) { + perf_output_put(&handle, data->raw->size); + perf_output_copy(&handle, data->raw->data, data->raw->size); + } else { + struct { + u32 size; + u32 data; + } raw = { + .size = sizeof(u32), + .data = 0, + }; + perf_output_put(&handle, raw); + } + } + perf_output_end(&handle); } @@ -2789,8 +2968,6 @@ struct perf_read_event { u32 pid; u32 tid; - u64 value; - u64 format[3]; }; static void @@ -2802,34 +2979,20 @@ perf_counter_read_event(struct perf_counter *counter, .header = { .type = PERF_EVENT_READ, .misc = 0, - .size = sizeof(event) - sizeof(event.format), + .size = sizeof(event) + perf_counter_read_size(counter), }, .pid = perf_counter_pid(counter, task), .tid = perf_counter_tid(counter, task), - .value = atomic64_read(&counter->count), }; - int ret, i = 0; - - if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { - event.header.size += sizeof(u64); - event.format[i++] = counter->total_time_enabled; - } - - if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { - event.header.size += sizeof(u64); - event.format[i++] = counter->total_time_running; - } - - if (counter->attr.read_format & PERF_FORMAT_ID) { - event.header.size += sizeof(u64); - event.format[i++] = primary_counter_id(counter); - } + int ret; ret = perf_output_begin(&handle, counter, event.header.size, 0, 0); if (ret) return; - perf_output_copy(&handle, &event, event.header.size); + perf_output_put(&handle, event); + perf_output_read(&handle, counter); + perf_output_end(&handle); } @@ -2840,7 +3003,8 @@ perf_counter_read_event(struct perf_counter *counter, */ struct perf_task_event { - struct task_struct *task; + struct task_struct *task; + struct perf_counter_context *task_ctx; struct { struct perf_event_header header; @@ -2864,10 +3028,10 @@ static void perf_counter_task_output(struct perf_counter *counter, return; task_event->event.pid = perf_counter_pid(counter, task); - task_event->event.ppid = perf_counter_pid(counter, task->real_parent); + task_event->event.ppid = perf_counter_pid(counter, current); task_event->event.tid = perf_counter_tid(counter, task); - task_event->event.ptid = perf_counter_tid(counter, task->real_parent); + task_event->event.ptid = perf_counter_tid(counter, current); perf_output_put(&handle, task_event->event); perf_output_end(&handle); @@ -2900,24 +3064,23 @@ static void perf_counter_task_ctx(struct perf_counter_context *ctx, static void perf_counter_task_event(struct perf_task_event *task_event) { struct perf_cpu_context *cpuctx; - struct perf_counter_context *ctx; + struct perf_counter_context *ctx = task_event->task_ctx; cpuctx = &get_cpu_var(perf_cpu_context); perf_counter_task_ctx(&cpuctx->ctx, task_event); put_cpu_var(perf_cpu_context); rcu_read_lock(); - /* - * doesn't really matter which of the child contexts the - * events ends up in. - */ - ctx = rcu_dereference(current->perf_counter_ctxp); + if (!ctx) + ctx = rcu_dereference(task_event->task->perf_counter_ctxp); if (ctx) perf_counter_task_ctx(ctx, task_event); rcu_read_unlock(); } -static void perf_counter_task(struct task_struct *task, int new) +static void perf_counter_task(struct task_struct *task, + struct perf_counter_context *task_ctx, + int new) { struct perf_task_event task_event; @@ -2927,8 +3090,9 @@ static void perf_counter_task(struct task_struct *task, int new) return; task_event = (struct perf_task_event){ - .task = task, - .event = { + .task = task, + .task_ctx = task_ctx, + .event = { .header = { .type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT, .misc = 0, @@ -2946,7 +3110,7 @@ static void perf_counter_task(struct task_struct *task, int new) void perf_counter_fork(struct task_struct *task) { - perf_counter_task(task, 1); + perf_counter_task(task, NULL, 1); } /* @@ -3335,125 +3499,111 @@ int perf_counter_overflow(struct perf_counter *counter, int nmi, * Generic software counter infrastructure */ -static void perf_swcounter_update(struct perf_counter *counter) +/* + * We directly increment counter->count and keep a second value in + * counter->hw.period_left to count intervals. This period counter + * is kept in the range [-sample_period, 0] so that we can use the + * sign as trigger. + */ + +static u64 perf_swcounter_set_period(struct perf_counter *counter) { struct hw_perf_counter *hwc = &counter->hw; - u64 prev, now; - s64 delta; + u64 period = hwc->last_period; + u64 nr, offset; + s64 old, val; + + hwc->last_period = hwc->sample_period; again: - prev = atomic64_read(&hwc->prev_count); - now = atomic64_read(&hwc->count); - if (atomic64_cmpxchg(&hwc->prev_count, prev, now) != prev) - goto again; + old = val = atomic64_read(&hwc->period_left); + if (val < 0) + return 0; - delta = now - prev; + nr = div64_u64(period + val, period); + offset = nr * period; + val -= offset; + if (atomic64_cmpxchg(&hwc->period_left, old, val) != old) + goto again; - atomic64_add(delta, &counter->count); - atomic64_sub(delta, &hwc->period_left); + return nr; } -static void perf_swcounter_set_period(struct perf_counter *counter) +static void perf_swcounter_overflow(struct perf_counter *counter, + int nmi, struct perf_sample_data *data) { struct hw_perf_counter *hwc = &counter->hw; - s64 left = atomic64_read(&hwc->period_left); - s64 period = hwc->sample_period; + u64 overflow; - if (unlikely(left <= -period)) { - left = period; - atomic64_set(&hwc->period_left, left); - hwc->last_period = period; - } + data->period = counter->hw.last_period; + overflow = perf_swcounter_set_period(counter); - if (unlikely(left <= 0)) { - left += period; - atomic64_add(period, &hwc->period_left); - hwc->last_period = period; - } + if (hwc->interrupts == MAX_INTERRUPTS) + return; - atomic64_set(&hwc->prev_count, -left); - atomic64_set(&hwc->count, -left); + for (; overflow; overflow--) { + if (perf_counter_overflow(counter, nmi, data)) { + /* + * We inhibit the overflow from happening when + * hwc->interrupts == MAX_INTERRUPTS. + */ + break; + } + } } -static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) +static void perf_swcounter_unthrottle(struct perf_counter *counter) { - enum hrtimer_restart ret = HRTIMER_RESTART; - struct perf_sample_data data; - struct perf_counter *counter; - u64 period; - - counter = container_of(hrtimer, struct perf_counter, hw.hrtimer); - counter->pmu->read(counter); - - data.addr = 0; - data.regs = get_irq_regs(); /* - * In case we exclude kernel IPs or are somehow not in interrupt - * context, provide the next best thing, the user IP. + * Nothing to do, we already reset hwc->interrupts. */ - if ((counter->attr.exclude_kernel || !data.regs) && - !counter->attr.exclude_user) - data.regs = task_pt_regs(current); +} - if (data.regs) { - if (perf_counter_overflow(counter, 0, &data)) - ret = HRTIMER_NORESTART; - } +static void perf_swcounter_add(struct perf_counter *counter, u64 nr, + int nmi, struct perf_sample_data *data) +{ + struct hw_perf_counter *hwc = &counter->hw; - period = max_t(u64, 10000, counter->hw.sample_period); - hrtimer_forward_now(hrtimer, ns_to_ktime(period)); + atomic64_add(nr, &counter->count); - return ret; -} + if (!hwc->sample_period) + return; -static void perf_swcounter_overflow(struct perf_counter *counter, - int nmi, struct perf_sample_data *data) -{ - data->period = counter->hw.last_period; + if (!data->regs) + return; - perf_swcounter_update(counter); - perf_swcounter_set_period(counter); - if (perf_counter_overflow(counter, nmi, data)) - /* soft-disable the counter */ - ; + if (!atomic64_add_negative(nr, &hwc->period_left)) + perf_swcounter_overflow(counter, nmi, data); } static int perf_swcounter_is_counting(struct perf_counter *counter) { - struct perf_counter_context *ctx; - unsigned long flags; - int count; - + /* + * The counter is active, we're good! + */ if (counter->state == PERF_COUNTER_STATE_ACTIVE) return 1; + /* + * The counter is off/error, not counting. + */ if (counter->state != PERF_COUNTER_STATE_INACTIVE) return 0; /* - * If the counter is inactive, it could be just because - * its task is scheduled out, or because it's in a group - * which could not go on the PMU. We want to count in - * the first case but not the second. If the context is - * currently active then an inactive software counter must - * be the second case. If it's not currently active then - * we need to know whether the counter was active when the - * context was last active, which we can determine by - * comparing counter->tstamp_stopped with ctx->time. - * - * We are within an RCU read-side critical section, - * which protects the existence of *ctx. + * The counter is inactive, if the context is active + * we're part of a group that didn't make it on the 'pmu', + * not counting. */ - ctx = counter->ctx; - spin_lock_irqsave(&ctx->lock, flags); - count = 1; - /* Re-check state now we have the lock */ - if (counter->state < PERF_COUNTER_STATE_INACTIVE || - counter->ctx->is_active || - counter->tstamp_stopped < ctx->time) - count = 0; - spin_unlock_irqrestore(&ctx->lock, flags); - return count; + if (counter->ctx->is_active) + return 0; + + /* + * We're inactive and the context is too, this means the + * task is scheduled out, we're counting events that happen + * to us, like migration events. + */ + return 1; } static int perf_swcounter_match(struct perf_counter *counter, @@ -3479,15 +3629,6 @@ static int perf_swcounter_match(struct perf_counter *counter, return 1; } -static void perf_swcounter_add(struct perf_counter *counter, u64 nr, - int nmi, struct perf_sample_data *data) -{ - int neg = atomic64_add_negative(nr, &counter->hw.count); - - if (counter->hw.sample_period && !neg && data->regs) - perf_swcounter_overflow(counter, nmi, data); -} - static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, enum perf_type_id type, u32 event, u64 nr, int nmi, @@ -3566,26 +3707,65 @@ void __perf_swcounter_event(u32 event, u64 nr, int nmi, static void perf_swcounter_read(struct perf_counter *counter) { - perf_swcounter_update(counter); } static int perf_swcounter_enable(struct perf_counter *counter) { - perf_swcounter_set_period(counter); + struct hw_perf_counter *hwc = &counter->hw; + + if (hwc->sample_period) { + hwc->last_period = hwc->sample_period; + perf_swcounter_set_period(counter); + } return 0; } static void perf_swcounter_disable(struct perf_counter *counter) { - perf_swcounter_update(counter); } static const struct pmu perf_ops_generic = { .enable = perf_swcounter_enable, .disable = perf_swcounter_disable, .read = perf_swcounter_read, + .unthrottle = perf_swcounter_unthrottle, }; +/* + * hrtimer based swcounter callback + */ + +static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) +{ + enum hrtimer_restart ret = HRTIMER_RESTART; + struct perf_sample_data data; + struct perf_counter *counter; + u64 period; + + counter = container_of(hrtimer, struct perf_counter, hw.hrtimer); + counter->pmu->read(counter); + + data.addr = 0; + data.regs = get_irq_regs(); + /* + * In case we exclude kernel IPs or are somehow not in interrupt + * context, provide the next best thing, the user IP. + */ + if ((counter->attr.exclude_kernel || !data.regs) && + !counter->attr.exclude_user) + data.regs = task_pt_regs(current); + + if (data.regs) { + if (perf_counter_overflow(counter, 0, &data)) + ret = HRTIMER_NORESTART; + } + + period = max_t(u64, 10000, counter->hw.sample_period); + hrtimer_forward_now(hrtimer, ns_to_ktime(period)); + + return ret; +} + /* * Software counter: cpu wall time clock */ @@ -3703,17 +3883,24 @@ static const struct pmu perf_ops_task_clock = { }; #ifdef CONFIG_EVENT_PROFILE -void perf_tpcounter_event(int event_id) +void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record, + int entry_size) { + struct perf_raw_record raw = { + .size = entry_size, + .data = record, + }; + struct perf_sample_data data = { .regs = get_irq_regs(), - .addr = 0, + .addr = addr, + .raw = &raw, }; if (!data.regs) data.regs = task_pt_regs(current); - do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, &data); + do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data); } EXPORT_SYMBOL_GPL(perf_tpcounter_event); @@ -3727,6 +3914,14 @@ static void tp_perf_counter_destroy(struct perf_counter *counter) static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) { + /* + * Raw tracepoint data is a severe data leak, only allow root to + * have these. + */ + if ((counter->attr.sample_type & PERF_SAMPLE_RAW) && + !capable(CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + if (ftrace_profile_enable(counter->attr.config)) return NULL; @@ -3860,9 +4055,9 @@ perf_counter_alloc(struct perf_counter_attr *attr, atomic64_set(&hwc->period_left, hwc->sample_period); /* - * we currently do not support PERF_SAMPLE_GROUP on inherited counters + * we currently do not support PERF_FORMAT_GROUP on inherited counters */ - if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP)) + if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) goto done; switch (attr->type) { @@ -4269,7 +4464,7 @@ void perf_counter_exit_task(struct task_struct *child) unsigned long flags; if (likely(!child->perf_counter_ctxp)) { - perf_counter_task(child, 0); + perf_counter_task(child, NULL, 0); return; } @@ -4289,6 +4484,7 @@ void perf_counter_exit_task(struct task_struct *child) * incremented the context's refcount before we do put_ctx below. */ spin_lock(&child_ctx->lock); + child->perf_counter_ctxp = NULL; /* * If this context is a clone; unclone it so it can't get * swapped to another process while we're removing all @@ -4302,9 +4498,7 @@ void perf_counter_exit_task(struct task_struct *child) * won't get any samples after PERF_EVENT_EXIT. We can however still * get a few PERF_EVENT_READ events. */ - perf_counter_task(child, 0); - - child->perf_counter_ctxp = NULL; + perf_counter_task(child, child_ctx, 0); /* * We can recurse on the same lock type through: @@ -4525,6 +4719,11 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) perf_counter_init_cpu(cpu); break; + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + hw_perf_counter_setup_online(cpu); + break; + case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: perf_counter_exit_cpu(cpu); @@ -4549,6 +4748,8 @@ void __init perf_counter_init(void) { perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, (void *)(long)smp_processor_id()); + perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE, + (void *)(long)smp_processor_id()); register_cpu_notifier(&perf_cpu_nb); } diff --git a/trunk/kernel/posix-cpu-timers.c b/trunk/kernel/posix-cpu-timers.c index bece7c0b67b2..e33a21cb9407 100644 --- a/trunk/kernel/posix-cpu-timers.c +++ b/trunk/kernel/posix-cpu-timers.c @@ -521,11 +521,12 @@ void posix_cpu_timers_exit(struct task_struct *tsk) } void posix_cpu_timers_exit_group(struct task_struct *tsk) { - struct task_cputime cputime; + struct signal_struct *const sig = tsk->signal; - thread_group_cputimer(tsk, &cputime); cleanup_timers(tsk->signal->cpu_timers, - cputime.utime, cputime.stime, cputime.sum_exec_runtime); + cputime_add(tsk->utime, sig->utime), + cputime_add(tsk->stime, sig->stime), + tsk->se.sum_exec_runtime + sig->sum_sched_runtime); } static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now) diff --git a/trunk/kernel/rtmutex.c b/trunk/kernel/rtmutex.c index fcd107a78c5a..29bd4baf9e75 100644 --- a/trunk/kernel/rtmutex.c +++ b/trunk/kernel/rtmutex.c @@ -1039,16 +1039,14 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) { /* We got the lock for task. */ debug_rt_mutex_lock(lock); - rt_mutex_set_owner(lock, task, 0); - + spin_unlock(&lock->wait_lock); rt_mutex_deadlock_account_lock(lock, task); return 1; } ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock); - if (ret && !waiter->task) { /* * Reset the return value. We might have diff --git a/trunk/kernel/trace/blktrace.c b/trunk/kernel/trace/blktrace.c index 1090b0aed9ba..7a34cb563fec 100644 --- a/trunk/kernel/trace/blktrace.c +++ b/trunk/kernel/trace/blktrace.c @@ -267,8 +267,8 @@ static void blk_trace_free(struct blk_trace *bt) { debugfs_remove(bt->msg_file); debugfs_remove(bt->dropped_file); - debugfs_remove(bt->dir); relay_close(bt->rchan); + debugfs_remove(bt->dir); free_percpu(bt->sequence); free_percpu(bt->msg_data); kfree(bt); @@ -378,18 +378,8 @@ static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, static int blk_remove_buf_file_callback(struct dentry *dentry) { - struct dentry *parent = dentry->d_parent; debugfs_remove(dentry); - /* - * this will fail for all but the last file, but that is ok. what we - * care about is the top level buts->name directory going away, when - * the last trace file is gone. Then we don't have to rmdir() that - * manually on trace stop, so it nicely solves the issue with - * force killing of running traces. - */ - - debugfs_remove(parent); return 0; } diff --git a/trunk/kernel/trace/ring_buffer.c b/trunk/kernel/trace/ring_buffer.c index bf27bb7a63e2..a330513d96ce 100644 --- a/trunk/kernel/trace/ring_buffer.c +++ b/trunk/kernel/trace/ring_buffer.c @@ -735,6 +735,7 @@ ring_buffer_free(struct ring_buffer *buffer) put_online_cpus(); + kfree(buffer->buffers); free_cpumask_var(buffer->cpumask); kfree(buffer); @@ -1785,7 +1786,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer, */ RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing)); - if (!rb_try_to_discard(cpu_buffer, event)) + if (rb_try_to_discard(cpu_buffer, event)) goto out; /* @@ -2383,7 +2384,6 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) * the box. Return the padding, and we will release * the current locks, and try again. */ - rb_advance_reader(cpu_buffer); return event; case RINGBUF_TYPE_TIME_EXTEND: @@ -2486,7 +2486,7 @@ static inline int rb_ok_to_lock(void) * buffer too. A one time deal is all you get from reading * the ring buffer from an NMI. */ - if (likely(!in_nmi() && !oops_in_progress)) + if (likely(!in_nmi())) return 1; tracing_off_permanent(); @@ -2519,6 +2519,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) if (dolock) spin_lock(&cpu_buffer->reader_lock); event = rb_buffer_peek(buffer, cpu, ts); + if (event && event->type_len == RINGBUF_TYPE_PADDING) + rb_advance_reader(cpu_buffer); if (dolock) spin_unlock(&cpu_buffer->reader_lock); local_irq_restore(flags); @@ -2590,12 +2592,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) spin_lock(&cpu_buffer->reader_lock); event = rb_buffer_peek(buffer, cpu, ts); - if (!event) - goto out_unlock; - - rb_advance_reader(cpu_buffer); + if (event) + rb_advance_reader(cpu_buffer); - out_unlock: if (dolock) spin_unlock(&cpu_buffer->reader_lock); local_irq_restore(flags); diff --git a/trunk/kernel/trace/trace.c b/trunk/kernel/trace/trace.c index 8930e39b9d8c..c22b40f8f576 100644 --- a/trunk/kernel/trace/trace.c +++ b/trunk/kernel/trace/trace.c @@ -848,6 +848,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); } +EXPORT_SYMBOL_GPL(tracing_generic_entry_update); struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, int type, diff --git a/trunk/kernel/trace/trace.h b/trunk/kernel/trace/trace.h index 3548ae5cc780..8b9f4f6e9559 100644 --- a/trunk/kernel/trace/trace.h +++ b/trunk/kernel/trace/trace.h @@ -438,10 +438,6 @@ struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts); -void tracing_generic_entry_update(struct trace_entry *entry, - unsigned long flags, - int pc); - void default_wait_pipe(struct trace_iterator *iter); void poll_wait_pipe(struct trace_iterator *iter); diff --git a/trunk/kernel/trace/trace_events_filter.c b/trunk/kernel/trace/trace_events_filter.c index 936c621bbf46..f32dc9d1ea7b 100644 --- a/trunk/kernel/trace/trace_events_filter.c +++ b/trunk/kernel/trace/trace_events_filter.c @@ -624,9 +624,6 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps, return -ENOSPC; } - filter->preds[filter->n_preds] = pred; - filter->n_preds++; - list_for_each_entry(call, &ftrace_events, list) { if (!call->define_fields) @@ -643,6 +640,9 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps, } replace_filter_string(call->filter, filter_string); } + + filter->preds[filter->n_preds] = pred; + filter->n_preds++; out: return err; } @@ -1029,12 +1029,17 @@ static int replace_preds(struct event_subsystem *system, if (elt->op == OP_AND || elt->op == OP_OR) { pred = create_logical_pred(elt->op); + if (!pred) + return -ENOMEM; if (call) { err = filter_add_pred(ps, call, pred); filter_free_pred(pred); - } else + } else { err = filter_add_subsystem_pred(ps, system, pred, filter_string); + if (err) + filter_free_pred(pred); + } if (err) return err; @@ -1048,12 +1053,17 @@ static int replace_preds(struct event_subsystem *system, } pred = create_pred(elt->op, operand1, operand2); + if (!pred) + return -ENOMEM; if (call) { err = filter_add_pred(ps, call, pred); filter_free_pred(pred); - } else + } else { err = filter_add_subsystem_pred(ps, system, pred, filter_string); + if (err) + filter_free_pred(pred); + } if (err) return err; diff --git a/trunk/kernel/wait.c b/trunk/kernel/wait.c index ea7c3b4275cf..c4bd3d825f35 100644 --- a/trunk/kernel/wait.c +++ b/trunk/kernel/wait.c @@ -10,13 +10,14 @@ #include #include -void init_waitqueue_head(wait_queue_head_t *q) +void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *key) { spin_lock_init(&q->lock); + lockdep_set_class(&q->lock, key); INIT_LIST_HEAD(&q->task_list); } -EXPORT_SYMBOL(init_waitqueue_head); +EXPORT_SYMBOL(__init_waitqueue_head); void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) { diff --git a/trunk/mm/mempool.c b/trunk/mm/mempool.c index a46eb1b4bb66..32e75d400503 100644 --- a/trunk/mm/mempool.c +++ b/trunk/mm/mempool.c @@ -303,14 +303,14 @@ EXPORT_SYMBOL(mempool_free_slab); */ void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data) { - size_t size = (size_t)(long)pool_data; + size_t size = (size_t)pool_data; return kmalloc(size, gfp_mask); } EXPORT_SYMBOL(mempool_kmalloc); void *mempool_kzalloc(gfp_t gfp_mask, void *pool_data) { - size_t size = (size_t) pool_data; + size_t size = (size_t)pool_data; return kzalloc(size, gfp_mask); } EXPORT_SYMBOL(mempool_kzalloc); diff --git a/trunk/net/socket.c b/trunk/net/socket.c index 791d71a36a93..6d4716559047 100644 --- a/trunk/net/socket.c +++ b/trunk/net/socket.c @@ -736,7 +736,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page, if (more) flags |= MSG_MORE; - return sock->ops->sendpage(sock, page, offset, size, flags); + return kernel_sendpage(sock, page, offset, size, flags); } static ssize_t sock_splice_read(struct file *file, loff_t *ppos, diff --git a/trunk/net/sunrpc/Makefile b/trunk/net/sunrpc/Makefile index 9d2fca5ad14a..db73fd2a3f0e 100644 --- a/trunk/net/sunrpc/Makefile +++ b/trunk/net/sunrpc/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ auth.o auth_null.o auth_unix.o auth_generic.o \ svc.o svcsock.o svcauth.o svcauth_unix.o \ - addr.o rpcb_clnt.o timer.o xdr.o \ + rpcb_clnt.o timer.o xdr.o \ sunrpc_syms.o cache.o rpc_pipe.o \ svc_xprt.o sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o diff --git a/trunk/net/sunrpc/addr.c b/trunk/net/sunrpc/addr.c deleted file mode 100644 index 22e8fd89477f..000000000000 --- a/trunk/net/sunrpc/addr.c +++ /dev/null @@ -1,364 +0,0 @@ -/* - * Copyright 2009, Oracle. All rights reserved. - * - * Convert socket addresses to presentation addresses and universal - * addresses, and vice versa. - * - * Universal addresses are introduced by RFC 1833 and further refined by - * recent RFCs describing NFSv4. The universal address format is part - * of the external (network) interface provided by rpcbind version 3 - * and 4, and by NFSv4. Such an address is a string containing a - * presentation format IP address followed by a port number in - * "hibyte.lobyte" format. - * - * IPv6 addresses can also include a scope ID, typically denoted by - * a '%' followed by a device name or a non-negative integer. Refer to - * RFC 4291, Section 2.2 for details on IPv6 presentation formats. - */ - -#include -#include - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - -static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap, - char *buf, const int buflen) -{ - const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; - const struct in6_addr *addr = &sin6->sin6_addr; - - /* - * RFC 4291, Section 2.2.2 - * - * Shorthanded ANY address - */ - if (ipv6_addr_any(addr)) - return snprintf(buf, buflen, "::"); - - /* - * RFC 4291, Section 2.2.2 - * - * Shorthanded loopback address - */ - if (ipv6_addr_loopback(addr)) - return snprintf(buf, buflen, "::1"); - - /* - * RFC 4291, Section 2.2.3 - * - * Special presentation address format for mapped v4 - * addresses. - */ - if (ipv6_addr_v4mapped(addr)) - return snprintf(buf, buflen, "::ffff:%pI4", - &addr->s6_addr32[3]); - - /* - * RFC 4291, Section 2.2.1 - * - * To keep the result as short as possible, especially - * since we don't shorthand, we don't want leading zeros - * in each halfword, so avoid %pI6. - */ - return snprintf(buf, buflen, "%x:%x:%x:%x:%x:%x:%x:%x", - ntohs(addr->s6_addr16[0]), ntohs(addr->s6_addr16[1]), - ntohs(addr->s6_addr16[2]), ntohs(addr->s6_addr16[3]), - ntohs(addr->s6_addr16[4]), ntohs(addr->s6_addr16[5]), - ntohs(addr->s6_addr16[6]), ntohs(addr->s6_addr16[7])); -} - -static size_t rpc_ntop6(const struct sockaddr *sap, - char *buf, const size_t buflen) -{ - const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; - char scopebuf[IPV6_SCOPE_ID_LEN]; - size_t len; - int rc; - - len = rpc_ntop6_noscopeid(sap, buf, buflen); - if (unlikely(len == 0)) - return len; - - if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) && - !(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_SITELOCAL)) - return len; - - rc = snprintf(scopebuf, sizeof(scopebuf), "%c%u", - IPV6_SCOPE_DELIMITER, sin6->sin6_scope_id); - if (unlikely((size_t)rc > sizeof(scopebuf))) - return 0; - - len += rc; - if (unlikely(len > buflen)) - return 0; - - strcat(buf, scopebuf); - return len; -} - -#else /* !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) */ - -static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap, - char *buf, const int buflen) -{ - return 0; -} - -static size_t rpc_ntop6(const struct sockaddr *sap, - char *buf, const size_t buflen) -{ - return 0; -} - -#endif /* !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) */ - -static int rpc_ntop4(const struct sockaddr *sap, - char *buf, const size_t buflen) -{ - const struct sockaddr_in *sin = (struct sockaddr_in *)sap; - - return snprintf(buf, buflen, "%pI4", &sin->sin_addr); -} - -/** - * rpc_ntop - construct a presentation address in @buf - * @sap: socket address - * @buf: construction area - * @buflen: size of @buf, in bytes - * - * Plants a %NUL-terminated string in @buf and returns the length - * of the string, excluding the %NUL. Otherwise zero is returned. - */ -size_t rpc_ntop(const struct sockaddr *sap, char *buf, const size_t buflen) -{ - switch (sap->sa_family) { - case AF_INET: - return rpc_ntop4(sap, buf, buflen); - case AF_INET6: - return rpc_ntop6(sap, buf, buflen); - } - - return 0; -} -EXPORT_SYMBOL_GPL(rpc_ntop); - -static size_t rpc_pton4(const char *buf, const size_t buflen, - struct sockaddr *sap, const size_t salen) -{ - struct sockaddr_in *sin = (struct sockaddr_in *)sap; - u8 *addr = (u8 *)&sin->sin_addr.s_addr; - - if (buflen > INET_ADDRSTRLEN || salen < sizeof(struct sockaddr_in)) - return 0; - - memset(sap, 0, sizeof(struct sockaddr_in)); - - if (in4_pton(buf, buflen, addr, '\0', NULL) == 0) - return 0; - - sin->sin_family = AF_INET; - return sizeof(struct sockaddr_in);; -} - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static int rpc_parse_scope_id(const char *buf, const size_t buflen, - const char *delim, struct sockaddr_in6 *sin6) -{ - char *p; - size_t len; - - if ((buf + buflen) == delim) - return 1; - - if (*delim != IPV6_SCOPE_DELIMITER) - return 0; - - if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) && - !(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_SITELOCAL)) - return 0; - - len = (buf + buflen) - delim - 1; - p = kstrndup(delim + 1, len, GFP_KERNEL); - if (p) { - unsigned long scope_id = 0; - struct net_device *dev; - - dev = dev_get_by_name(&init_net, p); - if (dev != NULL) { - scope_id = dev->ifindex; - dev_put(dev); - } else { - if (strict_strtoul(p, 10, &scope_id) == 0) { - kfree(p); - return 0; - } - } - - kfree(p); - - sin6->sin6_scope_id = scope_id; - return 1; - } - - return 0; -} - -static size_t rpc_pton6(const char *buf, const size_t buflen, - struct sockaddr *sap, const size_t salen) -{ - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; - u8 *addr = (u8 *)&sin6->sin6_addr.in6_u; - const char *delim; - - if (buflen > (INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN) || - salen < sizeof(struct sockaddr_in6)) - return 0; - - memset(sap, 0, sizeof(struct sockaddr_in6)); - - if (in6_pton(buf, buflen, addr, IPV6_SCOPE_DELIMITER, &delim) == 0) - return 0; - - if (!rpc_parse_scope_id(buf, buflen, delim, sin6)) - return 0; - - sin6->sin6_family = AF_INET6; - return sizeof(struct sockaddr_in6); -} -#else -static size_t rpc_pton6(const char *buf, const size_t buflen, - struct sockaddr *sap, const size_t salen) -{ - return 0; -} -#endif - -/** - * rpc_pton - Construct a sockaddr in @sap - * @buf: C string containing presentation format IP address - * @buflen: length of presentation address in bytes - * @sap: buffer into which to plant socket address - * @salen: size of buffer in bytes - * - * Returns the size of the socket address if successful; otherwise - * zero is returned. - * - * Plants a socket address in @sap and returns the size of the - * socket address, if successful. Returns zero if an error - * occurred. - */ -size_t rpc_pton(const char *buf, const size_t buflen, - struct sockaddr *sap, const size_t salen) -{ - unsigned int i; - - for (i = 0; i < buflen; i++) - if (buf[i] == ':') - return rpc_pton6(buf, buflen, sap, salen); - return rpc_pton4(buf, buflen, sap, salen); -} -EXPORT_SYMBOL_GPL(rpc_pton); - -/** - * rpc_sockaddr2uaddr - Construct a universal address string from @sap. - * @sap: socket address - * - * Returns a %NUL-terminated string in dynamically allocated memory; - * otherwise NULL is returned if an error occurred. Caller must - * free the returned string. - */ -char *rpc_sockaddr2uaddr(const struct sockaddr *sap) -{ - char portbuf[RPCBIND_MAXUADDRPLEN]; - char addrbuf[RPCBIND_MAXUADDRLEN]; - unsigned short port; - - switch (sap->sa_family) { - case AF_INET: - if (rpc_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0) - return NULL; - port = ntohs(((struct sockaddr_in *)sap)->sin_port); - break; - case AF_INET6: - if (rpc_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0) - return NULL; - port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port); - break; - default: - return NULL; - } - - if (snprintf(portbuf, sizeof(portbuf), - ".%u.%u", port >> 8, port & 0xff) > (int)sizeof(portbuf)) - return NULL; - - if (strlcat(addrbuf, portbuf, sizeof(addrbuf)) > sizeof(addrbuf)) - return NULL; - - return kstrdup(addrbuf, GFP_KERNEL); -} -EXPORT_SYMBOL_GPL(rpc_sockaddr2uaddr); - -/** - * rpc_uaddr2sockaddr - convert a universal address to a socket address. - * @uaddr: C string containing universal address to convert - * @uaddr_len: length of universal address string - * @sap: buffer into which to plant socket address - * @salen: size of buffer - * - * Returns the size of the socket address if successful; otherwise - * zero is returned. - */ -size_t rpc_uaddr2sockaddr(const char *uaddr, const size_t uaddr_len, - struct sockaddr *sap, const size_t salen) -{ - char *c, buf[RPCBIND_MAXUADDRLEN]; - unsigned long portlo, porthi; - unsigned short port; - - if (uaddr_len > sizeof(buf)) - return 0; - - memcpy(buf, uaddr, uaddr_len); - - buf[uaddr_len] = '\n'; - buf[uaddr_len + 1] = '\0'; - - c = strrchr(buf, '.'); - if (unlikely(c == NULL)) - return 0; - if (unlikely(strict_strtoul(c + 1, 10, &portlo) != 0)) - return 0; - if (unlikely(portlo > 255)) - return 0; - - c[0] = '\n'; - c[1] = '\0'; - - c = strrchr(buf, '.'); - if (unlikely(c == NULL)) - return 0; - if (unlikely(strict_strtoul(c + 1, 10, &porthi) != 0)) - return 0; - if (unlikely(porthi > 255)) - return 0; - - port = (unsigned short)((porthi << 8) | portlo); - - c[0] = '\0'; - - if (rpc_pton(buf, strlen(buf), sap, salen) == 0) - return 0; - - switch (sap->sa_family) { - case AF_INET: - ((struct sockaddr_in *)sap)->sin_port = htons(port); - return sizeof(struct sockaddr_in); - case AF_INET6: - ((struct sockaddr_in6 *)sap)->sin6_port = htons(port); - return sizeof(struct sockaddr_in6); - } - - return 0; -} -EXPORT_SYMBOL_GPL(rpc_uaddr2sockaddr); diff --git a/trunk/net/sunrpc/auth_gss/auth_gss.c b/trunk/net/sunrpc/auth_gss/auth_gss.c index fc6a43ccd950..66d458fc6920 100644 --- a/trunk/net/sunrpc/auth_gss/auth_gss.c +++ b/trunk/net/sunrpc/auth_gss/auth_gss.c @@ -89,8 +89,8 @@ static struct rpc_wait_queue pipe_version_rpc_waitqueue; static DECLARE_WAIT_QUEUE_HEAD(pipe_version_waitqueue); static void gss_free_ctx(struct gss_cl_ctx *); -static const struct rpc_pipe_ops gss_upcall_ops_v0; -static const struct rpc_pipe_ops gss_upcall_ops_v1; +static struct rpc_pipe_ops gss_upcall_ops_v0; +static struct rpc_pipe_ops gss_upcall_ops_v1; static inline struct gss_cl_ctx * gss_get_ctx(struct gss_cl_ctx *ctx) @@ -777,7 +777,7 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) * that we supported only the old pipe. So we instead create * the new pipe first. */ - gss_auth->dentry[1] = rpc_mkpipe(clnt->cl_path.dentry, + gss_auth->dentry[1] = rpc_mkpipe(clnt->cl_dentry, "gssd", clnt, &gss_upcall_ops_v1, RPC_PIPE_WAIT_FOR_OPEN); @@ -786,7 +786,7 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) goto err_put_mech; } - gss_auth->dentry[0] = rpc_mkpipe(clnt->cl_path.dentry, + gss_auth->dentry[0] = rpc_mkpipe(clnt->cl_dentry, gss_auth->mech->gm_name, clnt, &gss_upcall_ops_v0, RPC_PIPE_WAIT_FOR_OPEN); @@ -1507,7 +1507,7 @@ static const struct rpc_credops gss_nullops = { .crunwrap_resp = gss_unwrap_resp, }; -static const struct rpc_pipe_ops gss_upcall_ops_v0 = { +static struct rpc_pipe_ops gss_upcall_ops_v0 = { .upcall = gss_pipe_upcall, .downcall = gss_pipe_downcall, .destroy_msg = gss_pipe_destroy_msg, @@ -1515,7 +1515,7 @@ static const struct rpc_pipe_ops gss_upcall_ops_v0 = { .release_pipe = gss_pipe_release, }; -static const struct rpc_pipe_ops gss_upcall_ops_v1 = { +static struct rpc_pipe_ops gss_upcall_ops_v1 = { .upcall = gss_pipe_upcall, .downcall = gss_pipe_downcall, .destroy_msg = gss_pipe_destroy_msg, diff --git a/trunk/net/sunrpc/auth_gss/svcauth_gss.c b/trunk/net/sunrpc/auth_gss/svcauth_gss.c index 2e6a148d277c..2278a50c6444 100644 --- a/trunk/net/sunrpc/auth_gss/svcauth_gss.c +++ b/trunk/net/sunrpc/auth_gss/svcauth_gss.c @@ -181,11 +181,6 @@ static void rsi_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int rsi_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, rsi_request); -} - static int rsi_parse(struct cache_detail *cd, char *mesg, int mlen) @@ -275,7 +270,7 @@ static struct cache_detail rsi_cache = { .hash_table = rsi_table, .name = "auth.rpcsec.init", .cache_put = rsi_put, - .cache_upcall = rsi_upcall, + .cache_request = rsi_request, .cache_parse = rsi_parse, .match = rsi_match, .init = rsi_init, diff --git a/trunk/net/sunrpc/cache.c b/trunk/net/sunrpc/cache.c index db7720e453c3..ff0c23053d2f 100644 --- a/trunk/net/sunrpc/cache.c +++ b/trunk/net/sunrpc/cache.c @@ -27,12 +27,10 @@ #include #include #include -#include #include #include #include #include -#include #define RPCDBG_FACILITY RPCDBG_CACHE @@ -177,13 +175,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, } EXPORT_SYMBOL_GPL(sunrpc_cache_update); -static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h) -{ - if (!cd->cache_upcall) - return -EINVAL; - return cd->cache_upcall(cd, h); -} - +static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h); /* * This is the generic cache management routine for all * the authentication caches. @@ -292,11 +284,76 @@ static DEFINE_SPINLOCK(cache_list_lock); static struct cache_detail *current_detail; static int current_index; +static const struct file_operations cache_file_operations; +static const struct file_operations content_file_operations; +static const struct file_operations cache_flush_operations; + static void do_cache_clean(struct work_struct *work); static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean); -static void sunrpc_init_cache_detail(struct cache_detail *cd) +static void remove_cache_proc_entries(struct cache_detail *cd) +{ + if (cd->proc_ent == NULL) + return; + if (cd->flush_ent) + remove_proc_entry("flush", cd->proc_ent); + if (cd->channel_ent) + remove_proc_entry("channel", cd->proc_ent); + if (cd->content_ent) + remove_proc_entry("content", cd->proc_ent); + cd->proc_ent = NULL; + remove_proc_entry(cd->name, proc_net_rpc); +} + +#ifdef CONFIG_PROC_FS +static int create_cache_proc_entries(struct cache_detail *cd) +{ + struct proc_dir_entry *p; + + cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc); + if (cd->proc_ent == NULL) + goto out_nomem; + cd->channel_ent = cd->content_ent = NULL; + + p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR, + cd->proc_ent, &cache_flush_operations, cd); + cd->flush_ent = p; + if (p == NULL) + goto out_nomem; + + if (cd->cache_request || cd->cache_parse) { + p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR, + cd->proc_ent, &cache_file_operations, cd); + cd->channel_ent = p; + if (p == NULL) + goto out_nomem; + } + if (cd->cache_show) { + p = proc_create_data("content", S_IFREG|S_IRUSR|S_IWUSR, + cd->proc_ent, &content_file_operations, cd); + cd->content_ent = p; + if (p == NULL) + goto out_nomem; + } + return 0; +out_nomem: + remove_cache_proc_entries(cd); + return -ENOMEM; +} +#else /* CONFIG_PROC_FS */ +static int create_cache_proc_entries(struct cache_detail *cd) +{ + return 0; +} +#endif + +int cache_register(struct cache_detail *cd) { + int ret; + + ret = create_cache_proc_entries(cd); + if (ret) + return ret; rwlock_init(&cd->hash_lock); INIT_LIST_HEAD(&cd->queue); spin_lock(&cache_list_lock); @@ -310,9 +367,11 @@ static void sunrpc_init_cache_detail(struct cache_detail *cd) /* start the cleaning process */ schedule_delayed_work(&cache_cleaner, 0); + return 0; } +EXPORT_SYMBOL_GPL(cache_register); -static void sunrpc_destroy_cache_detail(struct cache_detail *cd) +void cache_unregister(struct cache_detail *cd) { cache_purge(cd); spin_lock(&cache_list_lock); @@ -327,6 +386,7 @@ static void sunrpc_destroy_cache_detail(struct cache_detail *cd) list_del_init(&cd->others); write_unlock(&cd->hash_lock); spin_unlock(&cache_list_lock); + remove_cache_proc_entries(cd); if (list_empty(&cache_list)) { /* module must be being unloaded so its safe to kill the worker */ cancel_delayed_work_sync(&cache_cleaner); @@ -335,6 +395,7 @@ static void sunrpc_destroy_cache_detail(struct cache_detail *cd) out: printk(KERN_ERR "nfsd: failed to unregister %s cache\n", cd->name); } +EXPORT_SYMBOL_GPL(cache_unregister); /* clean cache tries to find something to clean * and cleans it. @@ -626,18 +687,18 @@ struct cache_reader { int offset; /* if non-0, we have a refcnt on next request */ }; -static ssize_t cache_read(struct file *filp, char __user *buf, size_t count, - loff_t *ppos, struct cache_detail *cd) +static ssize_t +cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { struct cache_reader *rp = filp->private_data; struct cache_request *rq; - struct inode *inode = filp->f_path.dentry->d_inode; + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; int err; if (count == 0) return 0; - mutex_lock(&inode->i_mutex); /* protect against multiple concurrent + mutex_lock(&queue_io_mutex); /* protect against multiple concurrent * readers on this file */ again: spin_lock(&queue_lock); @@ -650,7 +711,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count, } if (rp->q.list.next == &cd->queue) { spin_unlock(&queue_lock); - mutex_unlock(&inode->i_mutex); + mutex_unlock(&queue_io_mutex); BUG_ON(rp->offset); return 0; } @@ -697,90 +758,49 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count, } if (err == -EAGAIN) goto again; - mutex_unlock(&inode->i_mutex); + mutex_unlock(&queue_io_mutex); return err ? err : count; } -static ssize_t cache_do_downcall(char *kaddr, const char __user *buf, - size_t count, struct cache_detail *cd) -{ - ssize_t ret; - - if (copy_from_user(kaddr, buf, count)) - return -EFAULT; - kaddr[count] = '\0'; - ret = cd->cache_parse(cd, kaddr, count); - if (!ret) - ret = count; - return ret; -} +static char write_buf[8192]; /* protected by queue_io_mutex */ -static ssize_t cache_slow_downcall(const char __user *buf, - size_t count, struct cache_detail *cd) +static ssize_t +cache_write(struct file *filp, const char __user *buf, size_t count, + loff_t *ppos) { - static char write_buf[8192]; /* protected by queue_io_mutex */ - ssize_t ret = -EINVAL; + int err; + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + if (count == 0) + return 0; if (count >= sizeof(write_buf)) - goto out; - mutex_lock(&queue_io_mutex); - ret = cache_do_downcall(write_buf, buf, count, cd); - mutex_unlock(&queue_io_mutex); -out: - return ret; -} - -static ssize_t cache_downcall(struct address_space *mapping, - const char __user *buf, - size_t count, struct cache_detail *cd) -{ - struct page *page; - char *kaddr; - ssize_t ret = -ENOMEM; - - if (count >= PAGE_CACHE_SIZE) - goto out_slow; - - page = find_or_create_page(mapping, 0, GFP_KERNEL); - if (!page) - goto out_slow; - - kaddr = kmap(page); - ret = cache_do_downcall(kaddr, buf, count, cd); - kunmap(page); - unlock_page(page); - page_cache_release(page); - return ret; -out_slow: - return cache_slow_downcall(buf, count, cd); -} + return -EINVAL; -static ssize_t cache_write(struct file *filp, const char __user *buf, - size_t count, loff_t *ppos, - struct cache_detail *cd) -{ - struct address_space *mapping = filp->f_mapping; - struct inode *inode = filp->f_path.dentry->d_inode; - ssize_t ret = -EINVAL; + mutex_lock(&queue_io_mutex); - if (!cd->cache_parse) - goto out; + if (copy_from_user(write_buf, buf, count)) { + mutex_unlock(&queue_io_mutex); + return -EFAULT; + } + write_buf[count] = '\0'; + if (cd->cache_parse) + err = cd->cache_parse(cd, write_buf, count); + else + err = -EINVAL; - mutex_lock(&inode->i_mutex); - ret = cache_downcall(mapping, buf, count, cd); - mutex_unlock(&inode->i_mutex); -out: - return ret; + mutex_unlock(&queue_io_mutex); + return err ? err : count; } static DECLARE_WAIT_QUEUE_HEAD(queue_wait); -static unsigned int cache_poll(struct file *filp, poll_table *wait, - struct cache_detail *cd) +static unsigned int +cache_poll(struct file *filp, poll_table *wait) { unsigned int mask; struct cache_reader *rp = filp->private_data; struct cache_queue *cq; + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; poll_wait(filp, &queue_wait, wait); @@ -802,13 +822,14 @@ static unsigned int cache_poll(struct file *filp, poll_table *wait, return mask; } -static int cache_ioctl(struct inode *ino, struct file *filp, - unsigned int cmd, unsigned long arg, - struct cache_detail *cd) +static int +cache_ioctl(struct inode *ino, struct file *filp, + unsigned int cmd, unsigned long arg) { int len = 0; struct cache_reader *rp = filp->private_data; struct cache_queue *cq; + struct cache_detail *cd = PDE(ino)->data; if (cmd != FIONREAD || !rp) return -EINVAL; @@ -831,13 +852,15 @@ static int cache_ioctl(struct inode *ino, struct file *filp, return put_user(len, (int __user *)arg); } -static int cache_open(struct inode *inode, struct file *filp, - struct cache_detail *cd) +static int +cache_open(struct inode *inode, struct file *filp) { struct cache_reader *rp = NULL; nonseekable_open(inode, filp); if (filp->f_mode & FMODE_READ) { + struct cache_detail *cd = PDE(inode)->data; + rp = kmalloc(sizeof(*rp), GFP_KERNEL); if (!rp) return -ENOMEM; @@ -852,10 +875,11 @@ static int cache_open(struct inode *inode, struct file *filp, return 0; } -static int cache_release(struct inode *inode, struct file *filp, - struct cache_detail *cd) +static int +cache_release(struct inode *inode, struct file *filp) { struct cache_reader *rp = filp->private_data; + struct cache_detail *cd = PDE(inode)->data; if (rp) { spin_lock(&queue_lock); @@ -884,6 +908,18 @@ static int cache_release(struct inode *inode, struct file *filp, +static const struct file_operations cache_file_operations = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = cache_read, + .write = cache_write, + .poll = cache_poll, + .ioctl = cache_ioctl, /* for FIONREAD */ + .open = cache_open, + .release = cache_release, +}; + + static void queue_loose(struct cache_detail *detail, struct cache_head *ch) { struct cache_queue *cq; @@ -984,21 +1020,15 @@ static void warn_no_listener(struct cache_detail *detail) if (detail->last_warn != detail->last_close) { detail->last_warn = detail->last_close; if (detail->warn_no_listener) - detail->warn_no_listener(detail, detail->last_close != 0); + detail->warn_no_listener(detail); } } /* - * register an upcall request to user-space and queue it up for read() by the - * upcall daemon. - * + * register an upcall request to user-space. * Each request is at most one page long. */ -int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, - void (*cache_request)(struct cache_detail *, - struct cache_head *, - char **, - int *)) +static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h) { char *buf; @@ -1006,6 +1036,9 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, char *bp; int len; + if (detail->cache_request == NULL) + return -EINVAL; + if (atomic_read(&detail->readers) == 0 && detail->last_close < get_seconds() - 30) { warn_no_listener(detail); @@ -1024,7 +1057,7 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, bp = buf; len = PAGE_SIZE; - cache_request(detail, h, &bp, &len); + detail->cache_request(detail, h, &bp, &len); if (len < 0) { kfree(buf); @@ -1042,7 +1075,6 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, wake_up(&queue_wait); return 0; } -EXPORT_SYMBOL_GPL(sunrpc_cache_pipe_upcall); /* * parse a message from user-space and pass it @@ -1210,10 +1242,10 @@ static const struct seq_operations cache_content_op = { .show = c_show, }; -static int content_open(struct inode *inode, struct file *file, - struct cache_detail *cd) +static int content_open(struct inode *inode, struct file *file) { struct handle *han; + struct cache_detail *cd = PDE(inode)->data; han = __seq_open_private(file, &cache_content_op, sizeof(*han)); if (han == NULL) @@ -1223,10 +1255,17 @@ static int content_open(struct inode *inode, struct file *file, return 0; } +static const struct file_operations content_file_operations = { + .open = content_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + static ssize_t read_flush(struct file *file, char __user *buf, - size_t count, loff_t *ppos, - struct cache_detail *cd) + size_t count, loff_t *ppos) { + struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data; char tbuf[20]; unsigned long p = *ppos; size_t len; @@ -1244,10 +1283,10 @@ static ssize_t read_flush(struct file *file, char __user *buf, return len; } -static ssize_t write_flush(struct file *file, const char __user *buf, - size_t count, loff_t *ppos, - struct cache_detail *cd) +static ssize_t write_flush(struct file * file, const char __user * buf, + size_t count, loff_t *ppos) { + struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data; char tbuf[20]; char *ep; long flushtime; @@ -1268,299 +1307,8 @@ static ssize_t write_flush(struct file *file, const char __user *buf, return count; } -static ssize_t cache_read_procfs(struct file *filp, char __user *buf, - size_t count, loff_t *ppos) -{ - struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; - - return cache_read(filp, buf, count, ppos, cd); -} - -static ssize_t cache_write_procfs(struct file *filp, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; - - return cache_write(filp, buf, count, ppos, cd); -} - -static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait) -{ - struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; - - return cache_poll(filp, wait, cd); -} - -static int cache_ioctl_procfs(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - struct cache_detail *cd = PDE(inode)->data; - - return cache_ioctl(inode, filp, cmd, arg, cd); -} - -static int cache_open_procfs(struct inode *inode, struct file *filp) -{ - struct cache_detail *cd = PDE(inode)->data; - - return cache_open(inode, filp, cd); -} - -static int cache_release_procfs(struct inode *inode, struct file *filp) -{ - struct cache_detail *cd = PDE(inode)->data; - - return cache_release(inode, filp, cd); -} - -static const struct file_operations cache_file_operations_procfs = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .read = cache_read_procfs, - .write = cache_write_procfs, - .poll = cache_poll_procfs, - .ioctl = cache_ioctl_procfs, /* for FIONREAD */ - .open = cache_open_procfs, - .release = cache_release_procfs, -}; - -static int content_open_procfs(struct inode *inode, struct file *filp) -{ - struct cache_detail *cd = PDE(inode)->data; - - return content_open(inode, filp, cd); -} - -static const struct file_operations content_file_operations_procfs = { - .open = content_open_procfs, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; - -static ssize_t read_flush_procfs(struct file *filp, char __user *buf, - size_t count, loff_t *ppos) -{ - struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; - - return read_flush(filp, buf, count, ppos, cd); -} - -static ssize_t write_flush_procfs(struct file *filp, - const char __user *buf, - size_t count, loff_t *ppos) -{ - struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; - - return write_flush(filp, buf, count, ppos, cd); -} - -static const struct file_operations cache_flush_operations_procfs = { - .open = nonseekable_open, - .read = read_flush_procfs, - .write = write_flush_procfs, -}; - -static void remove_cache_proc_entries(struct cache_detail *cd) -{ - if (cd->u.procfs.proc_ent == NULL) - return; - if (cd->u.procfs.flush_ent) - remove_proc_entry("flush", cd->u.procfs.proc_ent); - if (cd->u.procfs.channel_ent) - remove_proc_entry("channel", cd->u.procfs.proc_ent); - if (cd->u.procfs.content_ent) - remove_proc_entry("content", cd->u.procfs.proc_ent); - cd->u.procfs.proc_ent = NULL; - remove_proc_entry(cd->name, proc_net_rpc); -} - -#ifdef CONFIG_PROC_FS -static int create_cache_proc_entries(struct cache_detail *cd) -{ - struct proc_dir_entry *p; - - cd->u.procfs.proc_ent = proc_mkdir(cd->name, proc_net_rpc); - if (cd->u.procfs.proc_ent == NULL) - goto out_nomem; - cd->u.procfs.channel_ent = NULL; - cd->u.procfs.content_ent = NULL; - - p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR, - cd->u.procfs.proc_ent, - &cache_flush_operations_procfs, cd); - cd->u.procfs.flush_ent = p; - if (p == NULL) - goto out_nomem; - - if (cd->cache_upcall || cd->cache_parse) { - p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR, - cd->u.procfs.proc_ent, - &cache_file_operations_procfs, cd); - cd->u.procfs.channel_ent = p; - if (p == NULL) - goto out_nomem; - } - if (cd->cache_show) { - p = proc_create_data("content", S_IFREG|S_IRUSR|S_IWUSR, - cd->u.procfs.proc_ent, - &content_file_operations_procfs, cd); - cd->u.procfs.content_ent = p; - if (p == NULL) - goto out_nomem; - } - return 0; -out_nomem: - remove_cache_proc_entries(cd); - return -ENOMEM; -} -#else /* CONFIG_PROC_FS */ -static int create_cache_proc_entries(struct cache_detail *cd) -{ - return 0; -} -#endif - -int cache_register(struct cache_detail *cd) -{ - int ret; - - sunrpc_init_cache_detail(cd); - ret = create_cache_proc_entries(cd); - if (ret) - sunrpc_destroy_cache_detail(cd); - return ret; -} -EXPORT_SYMBOL_GPL(cache_register); - -void cache_unregister(struct cache_detail *cd) -{ - remove_cache_proc_entries(cd); - sunrpc_destroy_cache_detail(cd); -} -EXPORT_SYMBOL_GPL(cache_unregister); - -static ssize_t cache_read_pipefs(struct file *filp, char __user *buf, - size_t count, loff_t *ppos) -{ - struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; - - return cache_read(filp, buf, count, ppos, cd); -} - -static ssize_t cache_write_pipefs(struct file *filp, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; - - return cache_write(filp, buf, count, ppos, cd); -} - -static unsigned int cache_poll_pipefs(struct file *filp, poll_table *wait) -{ - struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; - - return cache_poll(filp, wait, cd); -} - -static int cache_ioctl_pipefs(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - struct cache_detail *cd = RPC_I(inode)->private; - - return cache_ioctl(inode, filp, cmd, arg, cd); -} - -static int cache_open_pipefs(struct inode *inode, struct file *filp) -{ - struct cache_detail *cd = RPC_I(inode)->private; - - return cache_open(inode, filp, cd); -} - -static int cache_release_pipefs(struct inode *inode, struct file *filp) -{ - struct cache_detail *cd = RPC_I(inode)->private; - - return cache_release(inode, filp, cd); -} - -const struct file_operations cache_file_operations_pipefs = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .read = cache_read_pipefs, - .write = cache_write_pipefs, - .poll = cache_poll_pipefs, - .ioctl = cache_ioctl_pipefs, /* for FIONREAD */ - .open = cache_open_pipefs, - .release = cache_release_pipefs, -}; - -static int content_open_pipefs(struct inode *inode, struct file *filp) -{ - struct cache_detail *cd = RPC_I(inode)->private; - - return content_open(inode, filp, cd); -} - -const struct file_operations content_file_operations_pipefs = { - .open = content_open_pipefs, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; - -static ssize_t read_flush_pipefs(struct file *filp, char __user *buf, - size_t count, loff_t *ppos) -{ - struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; - - return read_flush(filp, buf, count, ppos, cd); -} - -static ssize_t write_flush_pipefs(struct file *filp, - const char __user *buf, - size_t count, loff_t *ppos) -{ - struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; - - return write_flush(filp, buf, count, ppos, cd); -} - -const struct file_operations cache_flush_operations_pipefs = { +static const struct file_operations cache_flush_operations = { .open = nonseekable_open, - .read = read_flush_pipefs, - .write = write_flush_pipefs, + .read = read_flush, + .write = write_flush, }; - -int sunrpc_cache_register_pipefs(struct dentry *parent, - const char *name, mode_t umode, - struct cache_detail *cd) -{ - struct qstr q; - struct dentry *dir; - int ret = 0; - - sunrpc_init_cache_detail(cd); - q.name = name; - q.len = strlen(name); - q.hash = full_name_hash(q.name, q.len); - dir = rpc_create_cache_dir(parent, &q, umode, cd); - if (!IS_ERR(dir)) - cd->u.pipefs.dir = dir; - else { - sunrpc_destroy_cache_detail(cd); - ret = PTR_ERR(dir); - } - return ret; -} -EXPORT_SYMBOL_GPL(sunrpc_cache_register_pipefs); - -void sunrpc_cache_unregister_pipefs(struct cache_detail *cd) -{ - rpc_remove_cache_dir(cd->u.pipefs.dir); - cd->u.pipefs.dir = NULL; - sunrpc_destroy_cache_detail(cd); -} -EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs); - diff --git a/trunk/net/sunrpc/clnt.c b/trunk/net/sunrpc/clnt.c index c1e467e1b07d..ebfcf9b89909 100644 --- a/trunk/net/sunrpc/clnt.c +++ b/trunk/net/sunrpc/clnt.c @@ -27,8 +27,6 @@ #include #include #include -#include -#include #include #include #include @@ -99,49 +97,33 @@ static int rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) { static uint32_t clntid; - struct nameidata nd; - struct path path; - char name[15]; - struct qstr q = { - .name = name, - }; int error; - clnt->cl_path.mnt = ERR_PTR(-ENOENT); - clnt->cl_path.dentry = ERR_PTR(-ENOENT); + clnt->cl_vfsmnt = ERR_PTR(-ENOENT); + clnt->cl_dentry = ERR_PTR(-ENOENT); if (dir_name == NULL) return 0; - path.mnt = rpc_get_mount(); - if (IS_ERR(path.mnt)) - return PTR_ERR(path.mnt); - error = vfs_path_lookup(path.mnt->mnt_root, path.mnt, dir_name, 0, &nd); - if (error) - goto err; + clnt->cl_vfsmnt = rpc_get_mount(); + if (IS_ERR(clnt->cl_vfsmnt)) + return PTR_ERR(clnt->cl_vfsmnt); for (;;) { - q.len = snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++); - name[sizeof(name) - 1] = '\0'; - q.hash = full_name_hash(q.name, q.len); - path.dentry = rpc_create_client_dir(nd.path.dentry, &q, clnt); - if (!IS_ERR(path.dentry)) - break; - error = PTR_ERR(path.dentry); + snprintf(clnt->cl_pathname, sizeof(clnt->cl_pathname), + "%s/clnt%x", dir_name, + (unsigned int)clntid++); + clnt->cl_pathname[sizeof(clnt->cl_pathname) - 1] = '\0'; + clnt->cl_dentry = rpc_mkdir(clnt->cl_pathname, clnt); + if (!IS_ERR(clnt->cl_dentry)) + return 0; + error = PTR_ERR(clnt->cl_dentry); if (error != -EEXIST) { - printk(KERN_INFO "RPC: Couldn't create pipefs entry" - " %s/%s, error %d\n", - dir_name, name, error); - goto err_path_put; + printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n", + clnt->cl_pathname, error); + rpc_put_mount(); + return error; } } - path_put(&nd.path); - clnt->cl_path = path; - return 0; -err_path_put: - path_put(&nd.path); -err: - rpc_put_mount(); - return error; } static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt) @@ -249,8 +231,8 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru return clnt; out_no_auth: - if (!IS_ERR(clnt->cl_path.dentry)) { - rpc_remove_client_dir(clnt->cl_path.dentry); + if (!IS_ERR(clnt->cl_dentry)) { + rpc_rmdir(clnt->cl_dentry); rpc_put_mount(); } out_no_path: @@ -441,8 +423,8 @@ rpc_free_client(struct kref *kref) dprintk("RPC: destroying %s client for %s\n", clnt->cl_protname, clnt->cl_server); - if (!IS_ERR(clnt->cl_path.dentry)) { - rpc_remove_client_dir(clnt->cl_path.dentry); + if (!IS_ERR(clnt->cl_dentry)) { + rpc_rmdir(clnt->cl_dentry); rpc_put_mount(); } if (clnt->cl_parent != clnt) { diff --git a/trunk/net/sunrpc/rpc_pipe.c b/trunk/net/sunrpc/rpc_pipe.c index 8dd81535e08f..9ced0628d69c 100644 --- a/trunk/net/sunrpc/rpc_pipe.c +++ b/trunk/net/sunrpc/rpc_pipe.c @@ -26,7 +26,6 @@ #include #include #include -#include static struct vfsmount *rpc_mount __read_mostly; static int rpc_mount_count; @@ -126,7 +125,7 @@ static void rpc_close_pipes(struct inode *inode) { struct rpc_inode *rpci = RPC_I(inode); - const struct rpc_pipe_ops *ops; + struct rpc_pipe_ops *ops; int need_release; mutex_lock(&inode->i_mutex); @@ -398,13 +397,67 @@ static const struct file_operations rpc_info_operations = { }; +/* + * We have a single directory with 1 node in it. + */ +enum { + RPCAUTH_Root = 1, + RPCAUTH_lockd, + RPCAUTH_mount, + RPCAUTH_nfs, + RPCAUTH_portmap, + RPCAUTH_statd, + RPCAUTH_nfsd4_cb, + RPCAUTH_RootEOF +}; + /* * Description of fs contents. */ struct rpc_filelist { - const char *name; + char *name; const struct file_operations *i_fop; - umode_t mode; + int mode; +}; + +static struct rpc_filelist files[] = { + [RPCAUTH_lockd] = { + .name = "lockd", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + }, + [RPCAUTH_mount] = { + .name = "mount", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + }, + [RPCAUTH_nfs] = { + .name = "nfs", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + }, + [RPCAUTH_portmap] = { + .name = "portmap", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + }, + [RPCAUTH_statd] = { + .name = "statd", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + }, + [RPCAUTH_nfsd4_cb] = { + .name = "nfsd4_cb", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + }, +}; + +enum { + RPCAUTH_info = 2, + RPCAUTH_EOF +}; + +static struct rpc_filelist authfiles[] = { + [RPCAUTH_info] = { + .name = "info", + .i_fop = &rpc_info_operations, + .mode = S_IFREG | S_IRUSR, + }, }; struct vfsmount *rpc_get_mount(void) @@ -431,8 +484,39 @@ static const struct dentry_operations rpc_dentry_operations = { .d_delete = rpc_delete_dentry, }; +static int +rpc_lookup_parent(char *path, struct nameidata *nd) +{ + struct vfsmount *mnt; + + if (path[0] == '\0') + return -ENOENT; + + mnt = rpc_get_mount(); + if (IS_ERR(mnt)) { + printk(KERN_WARNING "%s: %s failed to mount " + "pseudofilesystem \n", __FILE__, __func__); + return PTR_ERR(mnt); + } + + if (vfs_path_lookup(mnt->mnt_root, mnt, path, LOOKUP_PARENT, nd)) { + printk(KERN_WARNING "%s: %s failed to find path %s\n", + __FILE__, __func__, path); + rpc_put_mount(); + return -ENOENT; + } + return 0; +} + +static void +rpc_release_path(struct nameidata *nd) +{ + path_put(&nd->path); + rpc_put_mount(); +} + static struct inode * -rpc_get_inode(struct super_block *sb, umode_t mode) +rpc_get_inode(struct super_block *sb, int mode) { struct inode *inode = new_inode(sb); if (!inode) @@ -450,274 +534,212 @@ rpc_get_inode(struct super_block *sb, umode_t mode) return inode; } -static int __rpc_create_common(struct inode *dir, struct dentry *dentry, - umode_t mode, - const struct file_operations *i_fop, - void *private) +/* + * FIXME: This probably has races. + */ +static void rpc_depopulate(struct dentry *parent, + unsigned long start, unsigned long eof) { - struct inode *inode; + struct inode *dir = parent->d_inode; + struct list_head *pos, *next; + struct dentry *dentry, *dvec[10]; + int n = 0; - BUG_ON(!d_unhashed(dentry)); - inode = rpc_get_inode(dir->i_sb, mode); - if (!inode) - goto out_err; - inode->i_ino = iunique(dir->i_sb, 100); - if (i_fop) - inode->i_fop = i_fop; - if (private) - rpc_inode_setowner(inode, private); - d_add(dentry, inode); - return 0; -out_err: - printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n", - __FILE__, __func__, dentry->d_name.name); - dput(dentry); - return -ENOMEM; + mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD); +repeat: + spin_lock(&dcache_lock); + list_for_each_safe(pos, next, &parent->d_subdirs) { + dentry = list_entry(pos, struct dentry, d_u.d_child); + if (!dentry->d_inode || + dentry->d_inode->i_ino < start || + dentry->d_inode->i_ino >= eof) + continue; + spin_lock(&dentry->d_lock); + if (!d_unhashed(dentry)) { + dget_locked(dentry); + __d_drop(dentry); + spin_unlock(&dentry->d_lock); + dvec[n++] = dentry; + if (n == ARRAY_SIZE(dvec)) + break; + } else + spin_unlock(&dentry->d_lock); + } + spin_unlock(&dcache_lock); + if (n) { + do { + dentry = dvec[--n]; + if (S_ISREG(dentry->d_inode->i_mode)) + simple_unlink(dir, dentry); + else if (S_ISDIR(dentry->d_inode->i_mode)) + simple_rmdir(dir, dentry); + d_delete(dentry); + dput(dentry); + } while (n); + goto repeat; + } + mutex_unlock(&dir->i_mutex); } -static int __rpc_create(struct inode *dir, struct dentry *dentry, - umode_t mode, - const struct file_operations *i_fop, - void *private) +static int +rpc_populate(struct dentry *parent, + struct rpc_filelist *files, + int start, int eof) { - int err; + struct inode *inode, *dir = parent->d_inode; + void *private = RPC_I(dir)->private; + struct dentry *dentry; + int mode, i; - err = __rpc_create_common(dir, dentry, S_IFREG | mode, i_fop, private); - if (err) - return err; - fsnotify_create(dir, dentry); + mutex_lock(&dir->i_mutex); + for (i = start; i < eof; i++) { + dentry = d_alloc_name(parent, files[i].name); + if (!dentry) + goto out_bad; + dentry->d_op = &rpc_dentry_operations; + mode = files[i].mode; + inode = rpc_get_inode(dir->i_sb, mode); + if (!inode) { + dput(dentry); + goto out_bad; + } + inode->i_ino = i; + if (files[i].i_fop) + inode->i_fop = files[i].i_fop; + if (private) + rpc_inode_setowner(inode, private); + if (S_ISDIR(mode)) + inc_nlink(dir); + d_add(dentry, inode); + fsnotify_create(dir, dentry); + } + mutex_unlock(&dir->i_mutex); return 0; +out_bad: + mutex_unlock(&dir->i_mutex); + printk(KERN_WARNING "%s: %s failed to populate directory %s\n", + __FILE__, __func__, parent->d_name.name); + return -ENOMEM; } -static int __rpc_mkdir(struct inode *dir, struct dentry *dentry, - umode_t mode, - const struct file_operations *i_fop, - void *private) +static int +__rpc_mkdir(struct inode *dir, struct dentry *dentry) { - int err; + struct inode *inode; - err = __rpc_create_common(dir, dentry, S_IFDIR | mode, i_fop, private); - if (err) - return err; + inode = rpc_get_inode(dir->i_sb, S_IFDIR | S_IRUGO | S_IXUGO); + if (!inode) + goto out_err; + inode->i_ino = iunique(dir->i_sb, 100); + d_instantiate(dentry, inode); inc_nlink(dir); fsnotify_mkdir(dir, dentry); return 0; -} - -static int __rpc_mkpipe(struct inode *dir, struct dentry *dentry, - umode_t mode, - const struct file_operations *i_fop, - void *private, - const struct rpc_pipe_ops *ops, - int flags) -{ - struct rpc_inode *rpci; - int err; - - err = __rpc_create_common(dir, dentry, S_IFIFO | mode, i_fop, private); - if (err) - return err; - rpci = RPC_I(dentry->d_inode); - rpci->nkern_readwriters = 1; - rpci->private = private; - rpci->flags = flags; - rpci->ops = ops; - fsnotify_create(dir, dentry); - return 0; -} - -static int __rpc_rmdir(struct inode *dir, struct dentry *dentry) -{ - int ret; - - dget(dentry); - ret = simple_rmdir(dir, dentry); - d_delete(dentry); - dput(dentry); - return ret; -} - -static int __rpc_unlink(struct inode *dir, struct dentry *dentry) -{ - int ret; - - dget(dentry); - ret = simple_unlink(dir, dentry); - d_delete(dentry); - dput(dentry); - return ret; -} - -static int __rpc_rmpipe(struct inode *dir, struct dentry *dentry) -{ - struct inode *inode = dentry->d_inode; - struct rpc_inode *rpci = RPC_I(inode); - - rpci->nkern_readwriters--; - if (rpci->nkern_readwriters != 0) - return 0; - rpc_close_pipes(inode); - return __rpc_unlink(dir, dentry); -} - -static struct dentry *__rpc_lookup_create(struct dentry *parent, - struct qstr *name) -{ - struct dentry *dentry; - - dentry = d_lookup(parent, name); - if (!dentry) { - dentry = d_alloc(parent, name); - if (!dentry) { - dentry = ERR_PTR(-ENOMEM); - goto out_err; - } - } - if (!dentry->d_inode) - dentry->d_op = &rpc_dentry_operations; out_err: - return dentry; + printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n", + __FILE__, __func__, dentry->d_name.name); + return -ENOMEM; } -static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent, - struct qstr *name) +static int +__rpc_rmdir(struct inode *dir, struct dentry *dentry) { - struct dentry *dentry; - - dentry = __rpc_lookup_create(parent, name); - if (dentry->d_inode == NULL) - return dentry; - dput(dentry); - return ERR_PTR(-EEXIST); + int error; + error = simple_rmdir(dir, dentry); + if (!error) + d_delete(dentry); + return error; } -/* - * FIXME: This probably has races. - */ -static void __rpc_depopulate(struct dentry *parent, - const struct rpc_filelist *files, - int start, int eof) +static struct dentry * +rpc_lookup_create(struct dentry *parent, const char *name, int len, int exclusive) { struct inode *dir = parent->d_inode; struct dentry *dentry; - struct qstr name; - int i; - - for (i = start; i < eof; i++) { - name.name = files[i].name; - name.len = strlen(files[i].name); - name.hash = full_name_hash(name.name, name.len); - dentry = d_lookup(parent, &name); - if (dentry == NULL) - continue; - if (dentry->d_inode == NULL) - goto next; - switch (dentry->d_inode->i_mode & S_IFMT) { - default: - BUG(); - case S_IFREG: - __rpc_unlink(dir, dentry); - break; - case S_IFDIR: - __rpc_rmdir(dir, dentry); - } -next: + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); + dentry = lookup_one_len(name, parent, len); + if (IS_ERR(dentry)) + goto out_err; + if (!dentry->d_inode) + dentry->d_op = &rpc_dentry_operations; + else if (exclusive) { dput(dentry); + dentry = ERR_PTR(-EEXIST); + goto out_err; } -} - -static void rpc_depopulate(struct dentry *parent, - const struct rpc_filelist *files, - int start, int eof) -{ - struct inode *dir = parent->d_inode; - - mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD); - __rpc_depopulate(parent, files, start, eof); + return dentry; +out_err: mutex_unlock(&dir->i_mutex); + return dentry; } -static int rpc_populate(struct dentry *parent, - const struct rpc_filelist *files, - int start, int eof, - void *private) +static struct dentry * +rpc_lookup_negative(char *path, struct nameidata *nd) { - struct inode *dir = parent->d_inode; struct dentry *dentry; - int i, err; + int error; - mutex_lock(&dir->i_mutex); - for (i = start; i < eof; i++) { - struct qstr q; - - q.name = files[i].name; - q.len = strlen(files[i].name); - q.hash = full_name_hash(q.name, q.len); - dentry = __rpc_lookup_create_exclusive(parent, &q); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - goto out_bad; - switch (files[i].mode & S_IFMT) { - default: - BUG(); - case S_IFREG: - err = __rpc_create(dir, dentry, - files[i].mode, - files[i].i_fop, - private); - break; - case S_IFDIR: - err = __rpc_mkdir(dir, dentry, - files[i].mode, - NULL, - private); - } - if (err != 0) - goto out_bad; - } - mutex_unlock(&dir->i_mutex); - return 0; -out_bad: - __rpc_depopulate(parent, files, start, eof); - mutex_unlock(&dir->i_mutex); - printk(KERN_WARNING "%s: %s failed to populate directory %s\n", - __FILE__, __func__, parent->d_name.name); - return err; + if ((error = rpc_lookup_parent(path, nd)) != 0) + return ERR_PTR(error); + dentry = rpc_lookup_create(nd->path.dentry, nd->last.name, nd->last.len, + 1); + if (IS_ERR(dentry)) + rpc_release_path(nd); + return dentry; } -static struct dentry *rpc_mkdir_populate(struct dentry *parent, - struct qstr *name, umode_t mode, void *private, - int (*populate)(struct dentry *, void *), void *args_populate) +/** + * rpc_mkdir - Create a new directory in rpc_pipefs + * @path: path from the rpc_pipefs root to the new directory + * @rpc_client: rpc client to associate with this directory + * + * This creates a directory at the given @path associated with + * @rpc_clnt, which will contain a file named "info" with some basic + * information about the client, together with any "pipes" that may + * later be created using rpc_mkpipe(). + */ +struct dentry * +rpc_mkdir(char *path, struct rpc_clnt *rpc_client) { + struct nameidata nd; struct dentry *dentry; - struct inode *dir = parent->d_inode; + struct inode *dir; int error; - mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - dentry = __rpc_lookup_create_exclusive(parent, name); + dentry = rpc_lookup_negative(path, &nd); if (IS_ERR(dentry)) - goto out; - error = __rpc_mkdir(dir, dentry, mode, NULL, private); - if (error != 0) - goto out_err; - if (populate != NULL) { - error = populate(dentry, args_populate); - if (error) - goto err_rmdir; - } + return dentry; + dir = nd.path.dentry->d_inode; + if ((error = __rpc_mkdir(dir, dentry)) != 0) + goto err_dput; + RPC_I(dentry->d_inode)->private = rpc_client; + error = rpc_populate(dentry, authfiles, + RPCAUTH_info, RPCAUTH_EOF); + if (error) + goto err_depopulate; + dget(dentry); out: mutex_unlock(&dir->i_mutex); + rpc_release_path(&nd); return dentry; -err_rmdir: +err_depopulate: + rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF); __rpc_rmdir(dir, dentry); -out_err: +err_dput: + dput(dentry); + printk(KERN_WARNING "%s: %s() failed to create directory %s (errno = %d)\n", + __FILE__, __func__, path, error); dentry = ERR_PTR(error); goto out; } -static int rpc_rmdir_depopulate(struct dentry *dentry, - void (*depopulate)(struct dentry *)) +/** + * rpc_rmdir - Remove a directory created with rpc_mkdir() + * @dentry: directory to remove + */ +int +rpc_rmdir(struct dentry *dentry) { struct dentry *parent; struct inode *dir; @@ -726,9 +748,9 @@ static int rpc_rmdir_depopulate(struct dentry *dentry, parent = dget_parent(dentry); dir = parent->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - if (depopulate != NULL) - depopulate(dentry); + rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF); error = __rpc_rmdir(dir, dentry); + dput(dentry); mutex_unlock(&dir->i_mutex); dput(parent); return error; @@ -754,54 +776,50 @@ static int rpc_rmdir_depopulate(struct dentry *dentry, * The @private argument passed here will be available to all these methods * from the file pointer, via RPC_I(file->f_dentry->d_inode)->private. */ -struct dentry *rpc_mkpipe(struct dentry *parent, const char *name, - void *private, const struct rpc_pipe_ops *ops, - int flags) +struct dentry * +rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pipe_ops *ops, int flags) { struct dentry *dentry; - struct inode *dir = parent->d_inode; - umode_t umode = S_IFIFO | S_IRUSR | S_IWUSR; - struct qstr q; - int err; - - if (ops->upcall == NULL) - umode &= ~S_IRUGO; - if (ops->downcall == NULL) - umode &= ~S_IWUGO; - - q.name = name; - q.len = strlen(name); - q.hash = full_name_hash(q.name, q.len), + struct inode *dir, *inode; + struct rpc_inode *rpci; - mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - dentry = __rpc_lookup_create(parent, &q); + dentry = rpc_lookup_create(parent, name, strlen(name), 0); if (IS_ERR(dentry)) - goto out; + return dentry; + dir = parent->d_inode; if (dentry->d_inode) { - struct rpc_inode *rpci = RPC_I(dentry->d_inode); + rpci = RPC_I(dentry->d_inode); if (rpci->private != private || rpci->ops != ops || rpci->flags != flags) { dput (dentry); - err = -EBUSY; - goto out_err; + dentry = ERR_PTR(-EBUSY); } rpci->nkern_readwriters++; goto out; } - - err = __rpc_mkpipe(dir, dentry, umode, &rpc_pipe_fops, - private, ops, flags); - if (err) - goto out_err; + inode = rpc_get_inode(dir->i_sb, S_IFIFO | S_IRUSR | S_IWUSR); + if (!inode) + goto err_dput; + inode->i_ino = iunique(dir->i_sb, 100); + inode->i_fop = &rpc_pipe_fops; + d_instantiate(dentry, inode); + rpci = RPC_I(inode); + rpci->private = private; + rpci->flags = flags; + rpci->ops = ops; + rpci->nkern_readwriters = 1; + fsnotify_create(dir, dentry); + dget(dentry); out: mutex_unlock(&dir->i_mutex); return dentry; -out_err: - dentry = ERR_PTR(err); +err_dput: + dput(dentry); + dentry = ERR_PTR(-ENOMEM); printk(KERN_WARNING "%s: %s() failed to create pipe %s/%s (errno = %d)\n", __FILE__, __func__, parent->d_name.name, name, - err); + -ENOMEM); goto out; } EXPORT_SYMBOL_GPL(rpc_mkpipe); @@ -824,107 +842,19 @@ rpc_unlink(struct dentry *dentry) parent = dget_parent(dentry); dir = parent->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - error = __rpc_rmpipe(dir, dentry); + if (--RPC_I(dentry->d_inode)->nkern_readwriters == 0) { + rpc_close_pipes(dentry->d_inode); + error = simple_unlink(dir, dentry); + if (!error) + d_delete(dentry); + } + dput(dentry); mutex_unlock(&dir->i_mutex); dput(parent); return error; } EXPORT_SYMBOL_GPL(rpc_unlink); -enum { - RPCAUTH_info, - RPCAUTH_EOF -}; - -static const struct rpc_filelist authfiles[] = { - [RPCAUTH_info] = { - .name = "info", - .i_fop = &rpc_info_operations, - .mode = S_IFREG | S_IRUSR, - }, -}; - -static int rpc_clntdir_populate(struct dentry *dentry, void *private) -{ - return rpc_populate(dentry, - authfiles, RPCAUTH_info, RPCAUTH_EOF, - private); -} - -static void rpc_clntdir_depopulate(struct dentry *dentry) -{ - rpc_depopulate(dentry, authfiles, RPCAUTH_info, RPCAUTH_EOF); -} - -/** - * rpc_create_client_dir - Create a new rpc_client directory in rpc_pipefs - * @path: path from the rpc_pipefs root to the new directory - * @rpc_client: rpc client to associate with this directory - * - * This creates a directory at the given @path associated with - * @rpc_clnt, which will contain a file named "info" with some basic - * information about the client, together with any "pipes" that may - * later be created using rpc_mkpipe(). - */ -struct dentry *rpc_create_client_dir(struct dentry *dentry, - struct qstr *name, - struct rpc_clnt *rpc_client) -{ - return rpc_mkdir_populate(dentry, name, S_IRUGO | S_IXUGO, NULL, - rpc_clntdir_populate, rpc_client); -} - -/** - * rpc_remove_client_dir - Remove a directory created with rpc_create_client_dir() - * @dentry: directory to remove - */ -int rpc_remove_client_dir(struct dentry *dentry) -{ - return rpc_rmdir_depopulate(dentry, rpc_clntdir_depopulate); -} - -static const struct rpc_filelist cache_pipefs_files[3] = { - [0] = { - .name = "channel", - .i_fop = &cache_file_operations_pipefs, - .mode = S_IFIFO|S_IRUSR|S_IWUSR, - }, - [1] = { - .name = "content", - .i_fop = &content_file_operations_pipefs, - .mode = S_IFREG|S_IRUSR, - }, - [2] = { - .name = "flush", - .i_fop = &cache_flush_operations_pipefs, - .mode = S_IFREG|S_IRUSR|S_IWUSR, - }, -}; - -static int rpc_cachedir_populate(struct dentry *dentry, void *private) -{ - return rpc_populate(dentry, - cache_pipefs_files, 0, 3, - private); -} - -static void rpc_cachedir_depopulate(struct dentry *dentry) -{ - rpc_depopulate(dentry, cache_pipefs_files, 0, 3); -} - -struct dentry *rpc_create_cache_dir(struct dentry *parent, struct qstr *name, - mode_t umode, struct cache_detail *cd) -{ - return rpc_mkdir_populate(parent, name, umode, NULL, - rpc_cachedir_populate, cd); -} - -void rpc_remove_cache_dir(struct dentry *dentry) -{ - rpc_rmdir_depopulate(dentry, rpc_cachedir_depopulate); -} - /* * populate the filesystem */ @@ -936,46 +866,6 @@ static struct super_operations s_ops = { #define RPCAUTH_GSSMAGIC 0x67596969 -/* - * We have a single directory with 1 node in it. - */ -enum { - RPCAUTH_lockd, - RPCAUTH_mount, - RPCAUTH_nfs, - RPCAUTH_portmap, - RPCAUTH_statd, - RPCAUTH_nfsd4_cb, - RPCAUTH_RootEOF -}; - -static const struct rpc_filelist files[] = { - [RPCAUTH_lockd] = { - .name = "lockd", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, - }, - [RPCAUTH_mount] = { - .name = "mount", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, - }, - [RPCAUTH_nfs] = { - .name = "nfs", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, - }, - [RPCAUTH_portmap] = { - .name = "portmap", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, - }, - [RPCAUTH_statd] = { - .name = "statd", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, - }, - [RPCAUTH_nfsd4_cb] = { - .name = "nfsd4_cb", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, - }, -}; - static int rpc_fill_super(struct super_block *sb, void *data, int silent) { @@ -996,7 +886,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) iput(inode); return -ENOMEM; } - if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL)) + if (rpc_populate(root, files, RPCAUTH_Root + 1, RPCAUTH_RootEOF)) goto out; sb->s_root = root; return 0; diff --git a/trunk/net/sunrpc/rpcb_clnt.c b/trunk/net/sunrpc/rpcb_clnt.c index 830faf4d9997..beee6da33035 100644 --- a/trunk/net/sunrpc/rpcb_clnt.c +++ b/trunk/net/sunrpc/rpcb_clnt.c @@ -75,37 +75,6 @@ enum { #define RPCB_OWNER_STRING "0" #define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING) -/* - * XDR data type sizes - */ -#define RPCB_program_sz (1) -#define RPCB_version_sz (1) -#define RPCB_protocol_sz (1) -#define RPCB_port_sz (1) -#define RPCB_boolean_sz (1) - -#define RPCB_netid_sz (1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN)) -#define RPCB_addr_sz (1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN)) -#define RPCB_ownerstring_sz (1 + XDR_QUADLEN(RPCB_MAXOWNERLEN)) - -/* - * XDR argument and result sizes - */ -#define RPCB_mappingargs_sz (RPCB_program_sz + RPCB_version_sz + \ - RPCB_protocol_sz + RPCB_port_sz) -#define RPCB_getaddrargs_sz (RPCB_program_sz + RPCB_version_sz + \ - RPCB_netid_sz + RPCB_addr_sz + \ - RPCB_ownerstring_sz) - -#define RPCB_getportres_sz RPCB_port_sz -#define RPCB_setres_sz RPCB_boolean_sz - -/* - * Note that RFC 1833 does not put any size restrictions on the - * address string returned by the remote rpcbind database. - */ -#define RPCB_getaddrres_sz RPCB_addr_sz - static void rpcb_getport_done(struct rpc_task *, void *); static void rpcb_map_release(void *data); static struct rpc_program rpcb_program; @@ -153,7 +122,6 @@ static void rpcb_map_release(void *data) rpcb_wake_rpcbind_waiters(map->r_xprt, map->r_status); xprt_put(map->r_xprt); - kfree(map->r_addr); kfree(map); } @@ -300,9 +268,12 @@ static int rpcb_register_inet4(const struct sockaddr *sap, const struct sockaddr_in *sin = (const struct sockaddr_in *)sap; struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin->sin_port); - int result; + char buf[32]; - map->r_addr = rpc_sockaddr2uaddr(sap); + /* Construct AF_INET universal address */ + snprintf(buf, sizeof(buf), "%pI4.%u.%u", + &sin->sin_addr.s_addr, port >> 8, port & 0xff); + map->r_addr = buf; dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " "local rpcbind\n", (port ? "" : "un"), @@ -313,9 +284,7 @@ static int rpcb_register_inet4(const struct sockaddr *sap, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - result = rpcb_register_call(RPCBVERS_4, msg); - kfree(map->r_addr); - return result; + return rpcb_register_call(RPCBVERS_4, msg); } /* @@ -327,9 +296,16 @@ static int rpcb_register_inet6(const struct sockaddr *sap, const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap; struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin6->sin6_port); - int result; + char buf[64]; - map->r_addr = rpc_sockaddr2uaddr(sap); + /* Construct AF_INET6 universal address */ + if (ipv6_addr_any(&sin6->sin6_addr)) + snprintf(buf, sizeof(buf), "::.%u.%u", + port >> 8, port & 0xff); + else + snprintf(buf, sizeof(buf), "%pI6.%u.%u", + &sin6->sin6_addr, port >> 8, port & 0xff); + map->r_addr = buf; dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " "local rpcbind\n", (port ? "" : "un"), @@ -340,9 +316,7 @@ static int rpcb_register_inet6(const struct sockaddr *sap, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - result = rpcb_register_call(RPCBVERS_4, msg); - kfree(map->r_addr); - return result; + return rpcb_register_call(RPCBVERS_4, msg); } static int rpcb_unregister_all_protofamilies(struct rpc_message *msg) @@ -454,7 +428,7 @@ int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot) struct rpc_message msg = { .rpc_proc = &rpcb_procedures2[RPCBPROC_GETPORT], .rpc_argp = &map, - .rpc_resp = &map, + .rpc_resp = &map.r_port, }; struct rpc_clnt *rpcb_clnt; int status; @@ -484,7 +458,7 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi struct rpc_message msg = { .rpc_proc = proc, .rpc_argp = map, - .rpc_resp = map, + .rpc_resp = &map->r_port, }; struct rpc_task_setup task_setup_data = { .rpc_client = rpcb_clnt, @@ -565,7 +539,6 @@ void rpcb_getport_async(struct rpc_task *task) goto bailout_nofree; } - /* Parent transport's destination address */ salen = rpc_peeraddr(clnt, sap, sizeof(addr)); /* Don't ever use rpcbind v2 for AF_INET6 requests */ @@ -616,22 +589,11 @@ void rpcb_getport_async(struct rpc_task *task) map->r_prot = xprt->prot; map->r_port = 0; map->r_xprt = xprt_get(xprt); + map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); + map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR); + map->r_owner = ""; map->r_status = -EIO; - switch (bind_version) { - case RPCBVERS_4: - case RPCBVERS_3: - map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); - map->r_addr = rpc_sockaddr2uaddr(sap); - map->r_owner = ""; - break; - case RPCBVERS_2: - map->r_addr = NULL; - break; - default: - BUG(); - } - child = rpcb_call_async(rpcb_clnt, map, proc); rpc_release_client(rpcb_clnt); if (IS_ERR(child)) { @@ -694,278 +656,176 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) * XDR functions for rpcbind */ -static int rpcb_enc_mapping(struct rpc_rqst *req, __be32 *p, - const struct rpcbind_args *rpcb) +static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p, + struct rpcbind_args *rpcb) { - struct rpc_task *task = req->rq_task; - struct xdr_stream xdr; - - dprintk("RPC: %5u encoding PMAP_%s call (%u, %u, %d, %u)\n", - task->tk_pid, task->tk_msg.rpc_proc->p_name, + dprintk("RPC: encoding rpcb request (%u, %u, %d, %u)\n", rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port); - - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - - p = xdr_reserve_space(&xdr, sizeof(__be32) * RPCB_mappingargs_sz); - if (unlikely(p == NULL)) - return -EIO; - *p++ = htonl(rpcb->r_prog); *p++ = htonl(rpcb->r_vers); *p++ = htonl(rpcb->r_prot); - *p = htonl(rpcb->r_port); + *p++ = htonl(rpcb->r_port); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; } -static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p, - struct rpcbind_args *rpcb) +static int rpcb_decode_getport(struct rpc_rqst *req, __be32 *p, + unsigned short *portp) { - struct rpc_task *task = req->rq_task; - struct xdr_stream xdr; - unsigned long port; - - xdr_init_decode(&xdr, &req->rq_rcv_buf, p); - - rpcb->r_port = 0; - - p = xdr_inline_decode(&xdr, sizeof(__be32)); - if (unlikely(p == NULL)) - return -EIO; - - port = ntohl(*p); - dprintk("RPC: %5u PMAP_%s result: %lu\n", task->tk_pid, - task->tk_msg.rpc_proc->p_name, port); - if (unlikely(port > USHORT_MAX)) - return -EIO; - - rpcb->r_port = port; + *portp = (unsigned short) ntohl(*p++); + dprintk("RPC: rpcb getport result: %u\n", + *portp); return 0; } -static int rpcb_dec_set(struct rpc_rqst *req, __be32 *p, - unsigned int *boolp) +static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p, + unsigned int *boolp) { - struct rpc_task *task = req->rq_task; - struct xdr_stream xdr; - - xdr_init_decode(&xdr, &req->rq_rcv_buf, p); - - p = xdr_inline_decode(&xdr, sizeof(__be32)); - if (unlikely(p == NULL)) - return -EIO; - - *boolp = 0; - if (*p) - *boolp = 1; - - dprintk("RPC: %5u RPCB_%s call %s\n", - task->tk_pid, task->tk_msg.rpc_proc->p_name, + *boolp = (unsigned int) ntohl(*p++); + dprintk("RPC: rpcb set/unset call %s\n", (*boolp ? "succeeded" : "failed")); return 0; } -static int encode_rpcb_string(struct xdr_stream *xdr, const char *string, - const u32 maxstrlen) +static int rpcb_encode_getaddr(struct rpc_rqst *req, __be32 *p, + struct rpcbind_args *rpcb) { - u32 len; - __be32 *p; - - if (unlikely(string == NULL)) - return -EIO; - len = strlen(string); - if (unlikely(len > maxstrlen)) - return -EIO; - - p = xdr_reserve_space(xdr, sizeof(__be32) + len); - if (unlikely(p == NULL)) - return -EIO; - xdr_encode_opaque(p, string, len); - - return 0; -} - -static int rpcb_enc_getaddr(struct rpc_rqst *req, __be32 *p, - const struct rpcbind_args *rpcb) -{ - struct rpc_task *task = req->rq_task; - struct xdr_stream xdr; - - dprintk("RPC: %5u encoding RPCB_%s call (%u, %u, '%s', '%s')\n", - task->tk_pid, task->tk_msg.rpc_proc->p_name, - rpcb->r_prog, rpcb->r_vers, - rpcb->r_netid, rpcb->r_addr); - - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - - p = xdr_reserve_space(&xdr, - sizeof(__be32) * (RPCB_program_sz + RPCB_version_sz)); - if (unlikely(p == NULL)) - return -EIO; + dprintk("RPC: encoding rpcb request (%u, %u, %s)\n", + rpcb->r_prog, rpcb->r_vers, rpcb->r_addr); *p++ = htonl(rpcb->r_prog); - *p = htonl(rpcb->r_vers); + *p++ = htonl(rpcb->r_vers); + + p = xdr_encode_string(p, rpcb->r_netid); + p = xdr_encode_string(p, rpcb->r_addr); + p = xdr_encode_string(p, rpcb->r_owner); - if (encode_rpcb_string(&xdr, rpcb->r_netid, RPCBIND_MAXNETIDLEN)) - return -EIO; - if (encode_rpcb_string(&xdr, rpcb->r_addr, RPCBIND_MAXUADDRLEN)) - return -EIO; - if (encode_rpcb_string(&xdr, rpcb->r_owner, RPCB_MAXOWNERLEN)) - return -EIO; + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; } -static int rpcb_dec_getaddr(struct rpc_rqst *req, __be32 *p, - struct rpcbind_args *rpcb) +static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p, + unsigned short *portp) { - struct sockaddr_storage address; - struct sockaddr *sap = (struct sockaddr *)&address; - struct rpc_task *task = req->rq_task; - struct xdr_stream xdr; - u32 len; + char *addr; + u32 addr_len; + int c, i, f, first, val; - rpcb->r_port = 0; + *portp = 0; + addr_len = ntohl(*p++); - xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + if (addr_len == 0) { + dprintk("RPC: rpcb_decode_getaddr: " + "service is not registered\n"); + return 0; + } - p = xdr_inline_decode(&xdr, sizeof(__be32)); - if (unlikely(p == NULL)) - goto out_fail; - len = ntohl(*p); + /* + * Simple sanity check. + */ + if (addr_len > RPCBIND_MAXUADDRLEN) + goto out_err; /* - * If the returned universal address is a null string, - * the requested RPC service was not registered. + * Start at the end and walk backwards until the first dot + * is encountered. When the second dot is found, we have + * both parts of the port number. */ - if (len == 0) { - dprintk("RPC: %5u RPCB reply: program not registered\n", - task->tk_pid); - return 0; + addr = (char *)p; + val = 0; + first = 1; + f = 1; + for (i = addr_len - 1; i > 0; i--) { + c = addr[i]; + if (c >= '0' && c <= '9') { + val += (c - '0') * f; + f *= 10; + } else if (c == '.') { + if (first) { + *portp = val; + val = first = 0; + f = 1; + } else { + *portp |= (val << 8); + break; + } + } } - if (unlikely(len > RPCBIND_MAXUADDRLEN)) - goto out_fail; - - p = xdr_inline_decode(&xdr, len); - if (unlikely(p == NULL)) - goto out_fail; - dprintk("RPC: %5u RPCB_%s reply: %s\n", task->tk_pid, - task->tk_msg.rpc_proc->p_name, (char *)p); - - if (rpc_uaddr2sockaddr((char *)p, len, sap, sizeof(address)) == 0) - goto out_fail; - rpcb->r_port = rpc_get_port(sap); + /* + * Simple sanity check. If we never saw a dot in the reply, + * then this was probably just garbage. + */ + if (first) + goto out_err; + dprintk("RPC: rpcb_decode_getaddr port=%u\n", *portp); return 0; -out_fail: - dprintk("RPC: %5u malformed RPCB_%s reply\n", - task->tk_pid, task->tk_msg.rpc_proc->p_name); +out_err: + dprintk("RPC: rpcbind server returned malformed reply\n"); return -EIO; } +#define RPCB_program_sz (1u) +#define RPCB_version_sz (1u) +#define RPCB_protocol_sz (1u) +#define RPCB_port_sz (1u) +#define RPCB_boolean_sz (1u) + +#define RPCB_netid_sz (1+XDR_QUADLEN(RPCBIND_MAXNETIDLEN)) +#define RPCB_addr_sz (1+XDR_QUADLEN(RPCBIND_MAXUADDRLEN)) +#define RPCB_ownerstring_sz (1+XDR_QUADLEN(RPCB_MAXOWNERLEN)) + +#define RPCB_mappingargs_sz RPCB_program_sz+RPCB_version_sz+ \ + RPCB_protocol_sz+RPCB_port_sz +#define RPCB_getaddrargs_sz RPCB_program_sz+RPCB_version_sz+ \ + RPCB_netid_sz+RPCB_addr_sz+ \ + RPCB_ownerstring_sz + +#define RPCB_setres_sz RPCB_boolean_sz +#define RPCB_getportres_sz RPCB_port_sz + +/* + * Note that RFC 1833 does not put any size restrictions on the + * address string returned by the remote rpcbind database. + */ +#define RPCB_getaddrres_sz RPCB_addr_sz + +#define PROC(proc, argtype, restype) \ + [RPCBPROC_##proc] = { \ + .p_proc = RPCBPROC_##proc, \ + .p_encode = (kxdrproc_t) rpcb_encode_##argtype, \ + .p_decode = (kxdrproc_t) rpcb_decode_##restype, \ + .p_arglen = RPCB_##argtype##args_sz, \ + .p_replen = RPCB_##restype##res_sz, \ + .p_statidx = RPCBPROC_##proc, \ + .p_timer = 0, \ + .p_name = #proc, \ + } + /* * Not all rpcbind procedures described in RFC 1833 are implemented * since the Linux kernel RPC code requires only these. */ - static struct rpc_procinfo rpcb_procedures2[] = { - [RPCBPROC_SET] = { - .p_proc = RPCBPROC_SET, - .p_encode = (kxdrproc_t)rpcb_enc_mapping, - .p_decode = (kxdrproc_t)rpcb_dec_set, - .p_arglen = RPCB_mappingargs_sz, - .p_replen = RPCB_setres_sz, - .p_statidx = RPCBPROC_SET, - .p_timer = 0, - .p_name = "SET", - }, - [RPCBPROC_UNSET] = { - .p_proc = RPCBPROC_UNSET, - .p_encode = (kxdrproc_t)rpcb_enc_mapping, - .p_decode = (kxdrproc_t)rpcb_dec_set, - .p_arglen = RPCB_mappingargs_sz, - .p_replen = RPCB_setres_sz, - .p_statidx = RPCBPROC_UNSET, - .p_timer = 0, - .p_name = "UNSET", - }, - [RPCBPROC_GETPORT] = { - .p_proc = RPCBPROC_GETPORT, - .p_encode = (kxdrproc_t)rpcb_enc_mapping, - .p_decode = (kxdrproc_t)rpcb_dec_getport, - .p_arglen = RPCB_mappingargs_sz, - .p_replen = RPCB_getportres_sz, - .p_statidx = RPCBPROC_GETPORT, - .p_timer = 0, - .p_name = "GETPORT", - }, + PROC(SET, mapping, set), + PROC(UNSET, mapping, set), + PROC(GETPORT, mapping, getport), }; static struct rpc_procinfo rpcb_procedures3[] = { - [RPCBPROC_SET] = { - .p_proc = RPCBPROC_SET, - .p_encode = (kxdrproc_t)rpcb_enc_getaddr, - .p_decode = (kxdrproc_t)rpcb_dec_set, - .p_arglen = RPCB_getaddrargs_sz, - .p_replen = RPCB_setres_sz, - .p_statidx = RPCBPROC_SET, - .p_timer = 0, - .p_name = "SET", - }, - [RPCBPROC_UNSET] = { - .p_proc = RPCBPROC_UNSET, - .p_encode = (kxdrproc_t)rpcb_enc_getaddr, - .p_decode = (kxdrproc_t)rpcb_dec_set, - .p_arglen = RPCB_getaddrargs_sz, - .p_replen = RPCB_setres_sz, - .p_statidx = RPCBPROC_UNSET, - .p_timer = 0, - .p_name = "UNSET", - }, - [RPCBPROC_GETADDR] = { - .p_proc = RPCBPROC_GETADDR, - .p_encode = (kxdrproc_t)rpcb_enc_getaddr, - .p_decode = (kxdrproc_t)rpcb_dec_getaddr, - .p_arglen = RPCB_getaddrargs_sz, - .p_replen = RPCB_getaddrres_sz, - .p_statidx = RPCBPROC_GETADDR, - .p_timer = 0, - .p_name = "GETADDR", - }, + PROC(SET, getaddr, set), + PROC(UNSET, getaddr, set), + PROC(GETADDR, getaddr, getaddr), }; static struct rpc_procinfo rpcb_procedures4[] = { - [RPCBPROC_SET] = { - .p_proc = RPCBPROC_SET, - .p_encode = (kxdrproc_t)rpcb_enc_getaddr, - .p_decode = (kxdrproc_t)rpcb_dec_set, - .p_arglen = RPCB_getaddrargs_sz, - .p_replen = RPCB_setres_sz, - .p_statidx = RPCBPROC_SET, - .p_timer = 0, - .p_name = "SET", - }, - [RPCBPROC_UNSET] = { - .p_proc = RPCBPROC_UNSET, - .p_encode = (kxdrproc_t)rpcb_enc_getaddr, - .p_decode = (kxdrproc_t)rpcb_dec_set, - .p_arglen = RPCB_getaddrargs_sz, - .p_replen = RPCB_setres_sz, - .p_statidx = RPCBPROC_UNSET, - .p_timer = 0, - .p_name = "UNSET", - }, - [RPCBPROC_GETADDR] = { - .p_proc = RPCBPROC_GETADDR, - .p_encode = (kxdrproc_t)rpcb_enc_getaddr, - .p_decode = (kxdrproc_t)rpcb_dec_getaddr, - .p_arglen = RPCB_getaddrargs_sz, - .p_replen = RPCB_getaddrres_sz, - .p_statidx = RPCBPROC_GETADDR, - .p_timer = 0, - .p_name = "GETADDR", - }, + PROC(SET, getaddr, set), + PROC(UNSET, getaddr, set), + PROC(GETADDR, getaddr, getaddr), + PROC(GETVERSADDR, getaddr, getaddr), }; static struct rpcb_info rpcb_next_version[] = { diff --git a/trunk/net/sunrpc/svcauth_unix.c b/trunk/net/sunrpc/svcauth_unix.c index 6caffa34ac01..5c865e2d299e 100644 --- a/trunk/net/sunrpc/svcauth_unix.c +++ b/trunk/net/sunrpc/svcauth_unix.c @@ -171,11 +171,6 @@ static void ip_map_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int ip_map_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, ip_map_request); -} - static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr); static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry); @@ -294,7 +289,7 @@ struct cache_detail ip_map_cache = { .hash_table = ip_table, .name = "auth.unix.ip", .cache_put = ip_map_put, - .cache_upcall = ip_map_upcall, + .cache_request = ip_map_request, .cache_parse = ip_map_parse, .cache_show = ip_map_show, .match = ip_map_match, @@ -528,11 +523,6 @@ static void unix_gid_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int unix_gid_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, unix_gid_request); -} - static struct unix_gid *unix_gid_lookup(uid_t uid); extern struct cache_detail unix_gid_cache; @@ -632,7 +622,7 @@ struct cache_detail unix_gid_cache = { .hash_table = gid_table, .name = "auth.unix.gid", .cache_put = unix_gid_put, - .cache_upcall = unix_gid_upcall, + .cache_request = unix_gid_request, .cache_parse = unix_gid_parse, .cache_show = unix_gid_show, .match = unix_gid_match, diff --git a/trunk/net/sunrpc/timer.c b/trunk/net/sunrpc/timer.c index dd824341c349..31becbf09263 100644 --- a/trunk/net/sunrpc/timer.c +++ b/trunk/net/sunrpc/timer.c @@ -25,13 +25,8 @@ #define RPC_RTO_INIT (HZ/5) #define RPC_RTO_MIN (HZ/10) -/** - * rpc_init_rtt - Initialize an RPC RTT estimator context - * @rt: context to initialize - * @timeo: initial timeout value, in jiffies - * - */ -void rpc_init_rtt(struct rpc_rtt *rt, unsigned long timeo) +void +rpc_init_rtt(struct rpc_rtt *rt, unsigned long timeo) { unsigned long init = 0; unsigned i; @@ -48,16 +43,12 @@ void rpc_init_rtt(struct rpc_rtt *rt, unsigned long timeo) } EXPORT_SYMBOL_GPL(rpc_init_rtt); -/** - * rpc_update_rtt - Update an RPC RTT estimator context - * @rt: context to update - * @timer: timer array index (request type) - * @m: recent actual RTT, in jiffies - * +/* * NB: When computing the smoothed RTT and standard deviation, * be careful not to produce negative intermediate results. */ -void rpc_update_rtt(struct rpc_rtt *rt, unsigned timer, long m) +void +rpc_update_rtt(struct rpc_rtt *rt, unsigned timer, long m) { long *srtt, *sdrtt; @@ -88,25 +79,21 @@ void rpc_update_rtt(struct rpc_rtt *rt, unsigned timer, long m) } EXPORT_SYMBOL_GPL(rpc_update_rtt); -/** - * rpc_calc_rto - Provide an estimated timeout value - * @rt: context to use for calculation - * @timer: timer array index (request type) - * - * Estimate RTO for an NFS RPC sent via an unreliable datagram. Use - * the mean and mean deviation of RTT for the appropriate type of RPC - * for frequently issued RPCs, and a fixed default for the others. - * - * The justification for doing "other" this way is that these RPCs - * happen so infrequently that timer estimation would probably be - * stale. Also, since many of these RPCs are non-idempotent, a - * conservative timeout is desired. - * +/* + * Estimate rto for an nfs rpc sent via. an unreliable datagram. + * Use the mean and mean deviation of rtt for the appropriate type of rpc + * for the frequent rpcs and a default for the others. + * The justification for doing "other" this way is that these rpcs + * happen so infrequently that timer est. would probably be stale. + * Also, since many of these rpcs are + * non-idempotent, a conservative timeout is desired. * getattr, lookup, * read, write, commit - A+4D * other - timeo */ -unsigned long rpc_calc_rto(struct rpc_rtt *rt, unsigned timer) + +unsigned long +rpc_calc_rto(struct rpc_rtt *rt, unsigned timer) { unsigned long res; diff --git a/trunk/net/sunrpc/xdr.c b/trunk/net/sunrpc/xdr.c index 406e26de584e..0d05d2554b42 100644 --- a/trunk/net/sunrpc/xdr.c +++ b/trunk/net/sunrpc/xdr.c @@ -24,7 +24,7 @@ xdr_encode_netobj(__be32 *p, const struct xdr_netobj *obj) unsigned int quadlen = XDR_QUADLEN(obj->len); p[quadlen] = 0; /* zero trailing bytes */ - *p++ = htonl(obj->len); + *p++ = cpu_to_be32(obj->len); memcpy(p, obj->data, obj->len); return p + XDR_QUADLEN(obj->len); } @@ -83,7 +83,7 @@ EXPORT_SYMBOL_GPL(xdr_encode_opaque_fixed); */ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes) { - *p++ = htonl(nbytes); + *p++ = cpu_to_be32(nbytes); return xdr_encode_opaque_fixed(p, ptr, nbytes); } EXPORT_SYMBOL_GPL(xdr_encode_opaque); @@ -779,7 +779,7 @@ EXPORT_SYMBOL_GPL(xdr_decode_word); int xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj) { - __be32 raw = htonl(obj); + __be32 raw = cpu_to_be32(obj); return write_bytes_to_xdr_buf(buf, base, &raw, sizeof(obj)); } diff --git a/trunk/net/sunrpc/xprtrdma/transport.c b/trunk/net/sunrpc/xprtrdma/transport.c index 9a63f669ece4..1dd6123070e9 100644 --- a/trunk/net/sunrpc/xprtrdma/transport.c +++ b/trunk/net/sunrpc/xprtrdma/transport.c @@ -168,25 +168,47 @@ static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ static void xprt_rdma_format_addresses(struct rpc_xprt *xprt) { - struct sockaddr *sap = (struct sockaddr *) + struct sockaddr_in *addr = (struct sockaddr_in *) &rpcx_to_rdmad(xprt).addr; - struct sockaddr_in *sin = (struct sockaddr_in *)sap; - char buf[64]; + char *buf; - (void)rpc_ntop(sap, buf, sizeof(buf)); - xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); + buf = kzalloc(20, GFP_KERNEL); + if (buf) + snprintf(buf, 20, "%pI4", &addr->sin_addr.s_addr); + xprt->address_strings[RPC_DISPLAY_ADDR] = buf; - (void)snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); - xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); + buf = kzalloc(8, GFP_KERNEL); + if (buf) + snprintf(buf, 8, "%u", ntohs(addr->sin_port)); + xprt->address_strings[RPC_DISPLAY_PORT] = buf; xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; - (void)snprintf(buf, sizeof(buf), "%02x%02x%02x%02x", - NIPQUAD(sin->sin_addr.s_addr)); - xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); - - (void)snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); - xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); + buf = kzalloc(48, GFP_KERNEL); + if (buf) + snprintf(buf, 48, "addr=%pI4 port=%u proto=%s", + &addr->sin_addr.s_addr, + ntohs(addr->sin_port), "rdma"); + xprt->address_strings[RPC_DISPLAY_ALL] = buf; + + buf = kzalloc(10, GFP_KERNEL); + if (buf) + snprintf(buf, 10, "%02x%02x%02x%02x", + NIPQUAD(addr->sin_addr.s_addr)); + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) + snprintf(buf, 8, "%4hx", ntohs(addr->sin_port)); + xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; + + buf = kzalloc(30, GFP_KERNEL); + if (buf) + snprintf(buf, 30, "%pI4.%u.%u", + &addr->sin_addr.s_addr, + ntohs(addr->sin_port) >> 8, + ntohs(addr->sin_port) & 0xff); + xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; /* netid */ xprt->address_strings[RPC_DISPLAY_NETID] = "rdma"; diff --git a/trunk/net/sunrpc/xprtsock.c b/trunk/net/sunrpc/xprtsock.c index 62438f3a914d..83c73c4d017a 100644 --- a/trunk/net/sunrpc/xprtsock.c +++ b/trunk/net/sunrpc/xprtsock.c @@ -248,8 +248,8 @@ struct sock_xprt { * Connection of transports */ struct delayed_work connect_worker; - struct sockaddr_storage srcaddr; - unsigned short srcport; + struct sockaddr_storage addr; + unsigned short port; /* * UDP socket buffer size parameters @@ -296,60 +296,117 @@ static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt) return (struct sockaddr_in6 *) &xprt->addr; } -static void xs_format_common_peer_addresses(struct rpc_xprt *xprt) +static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt, + const char *protocol, + const char *netid) { - struct sockaddr *sap = xs_addr(xprt); - struct sockaddr_in6 *sin6; - struct sockaddr_in *sin; - char buf[128]; + struct sockaddr_in *addr = xs_addr_in(xprt); + char *buf; - (void)rpc_ntop(sap, buf, sizeof(buf)); - xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); + buf = kzalloc(20, GFP_KERNEL); + if (buf) { + snprintf(buf, 20, "%pI4", &addr->sin_addr.s_addr); + } + xprt->address_strings[RPC_DISPLAY_ADDR] = buf; - switch (sap->sa_family) { - case AF_INET: - sin = xs_addr_in(xprt); - (void)snprintf(buf, sizeof(buf), "%02x%02x%02x%02x", - NIPQUAD(sin->sin_addr.s_addr)); - break; - case AF_INET6: - sin6 = xs_addr_in6(xprt); - (void)snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); - break; - default: - BUG(); + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + snprintf(buf, 8, "%u", + ntohs(addr->sin_port)); } - xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); -} + xprt->address_strings[RPC_DISPLAY_PORT] = buf; -static void xs_format_common_peer_ports(struct rpc_xprt *xprt) -{ - struct sockaddr *sap = xs_addr(xprt); - char buf[128]; + xprt->address_strings[RPC_DISPLAY_PROTO] = protocol; - (void)snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); - xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); + buf = kzalloc(48, GFP_KERNEL); + if (buf) { + snprintf(buf, 48, "addr=%pI4 port=%u proto=%s", + &addr->sin_addr.s_addr, + ntohs(addr->sin_port), + protocol); + } + xprt->address_strings[RPC_DISPLAY_ALL] = buf; - (void)snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); - xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); -} + buf = kzalloc(10, GFP_KERNEL); + if (buf) { + snprintf(buf, 10, "%02x%02x%02x%02x", + NIPQUAD(addr->sin_addr.s_addr)); + } + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + snprintf(buf, 8, "%4hx", + ntohs(addr->sin_port)); + } + xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; + + buf = kzalloc(30, GFP_KERNEL); + if (buf) { + snprintf(buf, 30, "%pI4.%u.%u", + &addr->sin_addr.s_addr, + ntohs(addr->sin_port) >> 8, + ntohs(addr->sin_port) & 0xff); + } + xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; -static void xs_format_peer_addresses(struct rpc_xprt *xprt, - const char *protocol, - const char *netid) -{ - xprt->address_strings[RPC_DISPLAY_PROTO] = protocol; xprt->address_strings[RPC_DISPLAY_NETID] = netid; - xs_format_common_peer_addresses(xprt); - xs_format_common_peer_ports(xprt); } -static void xs_update_peer_port(struct rpc_xprt *xprt) +static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt, + const char *protocol, + const char *netid) { - kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); - kfree(xprt->address_strings[RPC_DISPLAY_PORT]); + struct sockaddr_in6 *addr = xs_addr_in6(xprt); + char *buf; + + buf = kzalloc(40, GFP_KERNEL); + if (buf) { + snprintf(buf, 40, "%pI6",&addr->sin6_addr); + } + xprt->address_strings[RPC_DISPLAY_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + snprintf(buf, 8, "%u", + ntohs(addr->sin6_port)); + } + xprt->address_strings[RPC_DISPLAY_PORT] = buf; + + xprt->address_strings[RPC_DISPLAY_PROTO] = protocol; + + buf = kzalloc(64, GFP_KERNEL); + if (buf) { + snprintf(buf, 64, "addr=%pI6 port=%u proto=%s", + &addr->sin6_addr, + ntohs(addr->sin6_port), + protocol); + } + xprt->address_strings[RPC_DISPLAY_ALL] = buf; + + buf = kzalloc(36, GFP_KERNEL); + if (buf) + snprintf(buf, 36, "%pi6", &addr->sin6_addr); + + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + snprintf(buf, 8, "%4hx", + ntohs(addr->sin6_port)); + } + xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; + + buf = kzalloc(50, GFP_KERNEL); + if (buf) { + snprintf(buf, 50, "%pI6.%u.%u", + &addr->sin6_addr, + ntohs(addr->sin6_port) >> 8, + ntohs(addr->sin6_port) & 0xff); + } + xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; - xs_format_common_peer_ports(xprt); + xprt->address_strings[RPC_DISPLAY_NETID] = netid; } static void xs_free_peer_addresses(struct rpc_xprt *xprt) @@ -1530,15 +1587,25 @@ static unsigned short xs_get_random_port(void) */ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) { + struct sockaddr *addr = xs_addr(xprt); + dprintk("RPC: setting port for xprt %p to %u\n", xprt, port); - rpc_set_port(xs_addr(xprt), port); - xs_update_peer_port(xprt); + switch (addr->sa_family) { + case AF_INET: + ((struct sockaddr_in *)addr)->sin_port = htons(port); + break; + case AF_INET6: + ((struct sockaddr_in6 *)addr)->sin6_port = htons(port); + break; + default: + BUG(); + } } static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket *sock) { - unsigned short port = transport->srcport; + unsigned short port = transport->port; if (port == 0 && transport->xprt.resvport) port = xs_get_random_port(); @@ -1547,8 +1614,8 @@ static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket static unsigned short xs_next_srcport(struct sock_xprt *transport, struct socket *sock, unsigned short port) { - if (transport->srcport != 0) - transport->srcport = 0; + if (transport->port != 0) + transport->port = 0; if (!transport->xprt.resvport) return 0; if (port <= xprt_min_resvport || port > xprt_max_resvport) @@ -1566,7 +1633,7 @@ static int xs_bind4(struct sock_xprt *transport, struct socket *sock) unsigned short port = xs_get_srcport(transport, sock); unsigned short last; - sa = (struct sockaddr_in *)&transport->srcaddr; + sa = (struct sockaddr_in *)&transport->addr; myaddr.sin_addr = sa->sin_addr; do { myaddr.sin_port = htons(port); @@ -1575,7 +1642,7 @@ static int xs_bind4(struct sock_xprt *transport, struct socket *sock) if (port == 0) break; if (err == 0) { - transport->srcport = port; + transport->port = port; break; } last = port; @@ -1599,7 +1666,7 @@ static int xs_bind6(struct sock_xprt *transport, struct socket *sock) unsigned short port = xs_get_srcport(transport, sock); unsigned short last; - sa = (struct sockaddr_in6 *)&transport->srcaddr; + sa = (struct sockaddr_in6 *)&transport->addr; myaddr.sin6_addr = sa->sin6_addr; do { myaddr.sin6_port = htons(port); @@ -1608,7 +1675,7 @@ static int xs_bind6(struct sock_xprt *transport, struct socket *sock) if (port == 0) break; if (err == 0) { - transport->srcport = port; + transport->port = port; break; } last = port; @@ -1713,11 +1780,8 @@ static void xs_udp_connect_worker4(struct work_struct *work) goto out; } - dprintk("RPC: worker connecting xprt %p via %s to " - "%s (port %s)\n", xprt, - xprt->address_strings[RPC_DISPLAY_PROTO], - xprt->address_strings[RPC_DISPLAY_ADDR], - xprt->address_strings[RPC_DISPLAY_PORT]); + dprintk("RPC: worker connecting xprt %p to address: %s\n", + xprt, xprt->address_strings[RPC_DISPLAY_ALL]); xs_udp_finish_connecting(xprt, sock); status = 0; @@ -1758,11 +1822,8 @@ static void xs_udp_connect_worker6(struct work_struct *work) goto out; } - dprintk("RPC: worker connecting xprt %p via %s to " - "%s (port %s)\n", xprt, - xprt->address_strings[RPC_DISPLAY_PROTO], - xprt->address_strings[RPC_DISPLAY_ADDR], - xprt->address_strings[RPC_DISPLAY_PORT]); + dprintk("RPC: worker connecting xprt %p to address: %s\n", + xprt, xprt->address_strings[RPC_DISPLAY_ALL]); xs_udp_finish_connecting(xprt, sock); status = 0; @@ -1887,11 +1948,8 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt, goto out_eagain; } - dprintk("RPC: worker connecting xprt %p via %s to " - "%s (port %s)\n", xprt, - xprt->address_strings[RPC_DISPLAY_PROTO], - xprt->address_strings[RPC_DISPLAY_ADDR], - xprt->address_strings[RPC_DISPLAY_PORT]); + dprintk("RPC: worker connecting xprt %p to address: %s\n", + xprt, xprt->address_strings[RPC_DISPLAY_ALL]); status = xs_tcp_finish_connecting(xprt, sock); dprintk("RPC: %p connect status %d connected %d sock state %d\n", @@ -2062,7 +2120,7 @@ static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); seq_printf(seq, "\txprt:\tudp %u %lu %lu %lu %lu %Lu %Lu\n", - transport->srcport, + transport->port, xprt->stat.bind_count, xprt->stat.sends, xprt->stat.recvs, @@ -2086,7 +2144,7 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) idle_time = (long)(jiffies - xprt->last_used) / HZ; seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu\n", - transport->srcport, + transport->port, xprt->stat.bind_count, xprt->stat.connect_count, xprt->stat.connect_time, @@ -2165,7 +2223,7 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, memcpy(&xprt->addr, args->dstaddr, args->addrlen); xprt->addrlen = args->addrlen; if (args->srcaddr) - memcpy(&new->srcaddr, args->srcaddr, args->addrlen); + memcpy(&new->addr, args->srcaddr, args->addrlen); return xprt; } @@ -2214,7 +2272,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker4); - xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP); + xs_format_ipv4_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP); break; case AF_INET6: if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) @@ -2222,22 +2280,15 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker6); - xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6); + xs_format_ipv6_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6); break; default: kfree(xprt); return ERR_PTR(-EAFNOSUPPORT); } - if (xprt_bound(xprt)) - dprintk("RPC: set up xprt to %s (port %s) via %s\n", - xprt->address_strings[RPC_DISPLAY_ADDR], - xprt->address_strings[RPC_DISPLAY_PORT], - xprt->address_strings[RPC_DISPLAY_PROTO]); - else - dprintk("RPC: set up xprt to %s (autobind) via %s\n", - xprt->address_strings[RPC_DISPLAY_ADDR], - xprt->address_strings[RPC_DISPLAY_PROTO]); + dprintk("RPC: set up transport to address %s\n", + xprt->address_strings[RPC_DISPLAY_ALL]); if (try_module_get(THIS_MODULE)) return xprt; @@ -2286,33 +2337,23 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) if (((struct sockaddr_in *)addr)->sin_port != htons(0)) xprt_set_bound(xprt); - INIT_DELAYED_WORK(&transport->connect_worker, - xs_tcp_connect_worker4); - xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP); + INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker4); + xs_format_ipv4_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP); break; case AF_INET6: if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) xprt_set_bound(xprt); - INIT_DELAYED_WORK(&transport->connect_worker, - xs_tcp_connect_worker6); - xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); + INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker6); + xs_format_ipv6_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); break; default: kfree(xprt); return ERR_PTR(-EAFNOSUPPORT); } - if (xprt_bound(xprt)) - dprintk("RPC: set up xprt to %s (port %s) via %s\n", - xprt->address_strings[RPC_DISPLAY_ADDR], - xprt->address_strings[RPC_DISPLAY_PORT], - xprt->address_strings[RPC_DISPLAY_PROTO]); - else - dprintk("RPC: set up xprt to %s (autobind) via %s\n", - xprt->address_strings[RPC_DISPLAY_ADDR], - xprt->address_strings[RPC_DISPLAY_PROTO]); - + dprintk("RPC: set up transport to address %s\n", + xprt->address_strings[RPC_DISPLAY_ALL]); if (try_module_get(THIS_MODULE)) return xprt; @@ -2371,55 +2412,3 @@ void cleanup_socket_xprt(void) xprt_unregister_transport(&xs_udp_transport); xprt_unregister_transport(&xs_tcp_transport); } - -static int param_set_uint_minmax(const char *val, struct kernel_param *kp, - unsigned int min, unsigned int max) -{ - unsigned long num; - int ret; - - if (!val) - return -EINVAL; - ret = strict_strtoul(val, 0, &num); - if (ret == -EINVAL || num < min || num > max) - return -EINVAL; - *((unsigned int *)kp->arg) = num; - return 0; -} - -static int param_set_portnr(const char *val, struct kernel_param *kp) -{ - return param_set_uint_minmax(val, kp, - RPC_MIN_RESVPORT, - RPC_MAX_RESVPORT); -} - -static int param_get_portnr(char *buffer, struct kernel_param *kp) -{ - return param_get_uint(buffer, kp); -} -#define param_check_portnr(name, p) \ - __param_check(name, p, unsigned int); - -module_param_named(min_resvport, xprt_min_resvport, portnr, 0644); -module_param_named(max_resvport, xprt_max_resvport, portnr, 0644); - -static int param_set_slot_table_size(const char *val, struct kernel_param *kp) -{ - return param_set_uint_minmax(val, kp, - RPC_MIN_SLOT_TABLE, - RPC_MAX_SLOT_TABLE); -} - -static int param_get_slot_table_size(char *buffer, struct kernel_param *kp) -{ - return param_get_uint(buffer, kp); -} -#define param_check_slot_table_size(name, p) \ - __param_check(name, p, unsigned int); - -module_param_named(tcp_slot_table_entries, xprt_tcp_slot_table_entries, - slot_table_size, 0644); -module_param_named(udp_slot_table_entries, xprt_udp_slot_table_entries, - slot_table_size, 0644); - diff --git a/trunk/scripts/recordmcount.pl b/trunk/scripts/recordmcount.pl index d29baa2e063a..911ba7ffab84 100755 --- a/trunk/scripts/recordmcount.pl +++ b/trunk/scripts/recordmcount.pl @@ -393,7 +393,7 @@ sub update_funcs $read_function = 0; } # print out any recorded offsets - update_funcs() if ($text_found); + update_funcs() if (defined($ref_func)); # reset all markers and arrays $text_found = 0; @@ -414,7 +414,10 @@ sub update_funcs $offset = hex $1; } else { # if we already have a function, and this is weak, skip it - if (!defined($ref_func) && !defined($weak{$text})) { + if (!defined($ref_func) && !defined($weak{$text}) && + # PPC64 can have symbols that start with .L and + # gcc considers these special. Don't use them! + $text !~ /^\.L/) { $ref_func = $text; $offset = hex $1; } @@ -441,7 +444,7 @@ sub update_funcs } # dump out anymore offsets that may have been found -update_funcs() if ($text_found); +update_funcs() if (defined($ref_func)); # If we did not find any mcount callers, we are done (do nothing). if (!$opened) { diff --git a/trunk/security/selinux/hooks.c b/trunk/security/selinux/hooks.c index 15c2a08a66f1..1e8cfc4c2ed6 100644 --- a/trunk/security/selinux/hooks.c +++ b/trunk/security/selinux/hooks.c @@ -1285,6 +1285,8 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX, context, len); if (rc == -ERANGE) { + kfree(context); + /* Need a larger buffer. Query for the right size. */ rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX, NULL, 0); @@ -1292,7 +1294,6 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent dput(dentry); goto out_unlock; } - kfree(context); len = rc; context = kmalloc(len+1, GFP_NOFS); if (!context) { diff --git a/trunk/sound/pci/hda/patch_realtek.c b/trunk/sound/pci/hda/patch_realtek.c index 51c44fdbc0f0..fea976793ae5 100644 --- a/trunk/sound/pci/hda/patch_realtek.c +++ b/trunk/sound/pci/hda/patch_realtek.c @@ -13563,6 +13563,8 @@ static int patch_alc269(struct hda_codec *codec) set_capture_mixer(spec); set_beep_amp(spec, 0x0b, 0x04, HDA_INPUT); + spec->vmaster_nid = 0x02; + codec->patch_ops = alc_patch_ops; if (board_config == ALC269_AUTO) spec->init_hook = alc269_auto_init; @@ -15577,9 +15579,12 @@ static int patch_alc861vd(struct hda_codec *codec) spec->stream_digital_playback = &alc861vd_pcm_digital_playback; spec->stream_digital_capture = &alc861vd_pcm_digital_capture; - spec->adc_nids = alc861vd_adc_nids; - spec->num_adc_nids = ARRAY_SIZE(alc861vd_adc_nids); - spec->capsrc_nids = alc861vd_capsrc_nids; + if (!spec->adc_nids) { + spec->adc_nids = alc861vd_adc_nids; + spec->num_adc_nids = ARRAY_SIZE(alc861vd_adc_nids); + } + if (!spec->capsrc_nids) + spec->capsrc_nids = alc861vd_capsrc_nids; set_capture_mixer(spec); set_beep_amp(spec, 0x0b, 0x05, HDA_INPUT); @@ -17496,9 +17501,12 @@ static int patch_alc662(struct hda_codec *codec) spec->stream_digital_playback = &alc662_pcm_digital_playback; spec->stream_digital_capture = &alc662_pcm_digital_capture; - spec->adc_nids = alc662_adc_nids; - spec->num_adc_nids = ARRAY_SIZE(alc662_adc_nids); - spec->capsrc_nids = alc662_capsrc_nids; + if (!spec->adc_nids) { + spec->adc_nids = alc662_adc_nids; + spec->num_adc_nids = ARRAY_SIZE(alc662_adc_nids); + } + if (!spec->capsrc_nids) + spec->capsrc_nids = alc662_capsrc_nids; if (!spec->cap_mixer) set_capture_mixer(spec); diff --git a/trunk/sound/soc/fsl/efika-audio-fabric.c b/trunk/sound/soc/fsl/efika-audio-fabric.c index 85b0e7569504..3326e2a1e863 100644 --- a/trunk/sound/soc/fsl/efika-audio-fabric.c +++ b/trunk/sound/soc/fsl/efika-audio-fabric.c @@ -30,6 +30,8 @@ #include "mpc5200_psc_ac97.h" #include "../codecs/stac9766.h" +#define DRV_NAME "efika-audio-fabric" + static struct snd_soc_device device; static struct snd_soc_card card; diff --git a/trunk/sound/soc/fsl/pcm030-audio-fabric.c b/trunk/sound/soc/fsl/pcm030-audio-fabric.c index 8766f7a3893d..b928ef7d28eb 100644 --- a/trunk/sound/soc/fsl/pcm030-audio-fabric.c +++ b/trunk/sound/soc/fsl/pcm030-audio-fabric.c @@ -30,6 +30,8 @@ #include "mpc5200_psc_ac97.h" #include "../codecs/wm9712.h" +#define DRV_NAME "pcm030-audio-fabric" + static struct snd_soc_device device; static struct snd_soc_card card; diff --git a/trunk/tools/perf/Documentation/perf-examples.txt b/trunk/tools/perf/Documentation/perf-examples.txt new file mode 100644 index 000000000000..8eb6c489fb15 --- /dev/null +++ b/trunk/tools/perf/Documentation/perf-examples.txt @@ -0,0 +1,225 @@ + + ------------------------------ + ****** perf by examples ****** + ------------------------------ + +[ From an e-mail by Ingo Molnar, http://lkml.org/lkml/2009/8/4/346 ] + + +First, discovery/enumeration of available counters can be done via +'perf list': + +titan:~> perf list + [...] + kmem:kmalloc [Tracepoint event] + kmem:kmem_cache_alloc [Tracepoint event] + kmem:kmalloc_node [Tracepoint event] + kmem:kmem_cache_alloc_node [Tracepoint event] + kmem:kfree [Tracepoint event] + kmem:kmem_cache_free [Tracepoint event] + kmem:mm_page_free_direct [Tracepoint event] + kmem:mm_pagevec_free [Tracepoint event] + kmem:mm_page_alloc [Tracepoint event] + kmem:mm_page_alloc_zone_locked [Tracepoint event] + kmem:mm_page_pcpu_drain [Tracepoint event] + kmem:mm_page_alloc_extfrag [Tracepoint event] + +Then any (or all) of the above event sources can be activated and +measured. For example the page alloc/free properties of a 'hackbench +run' are: + + titan:~> perf stat -e kmem:mm_page_pcpu_drain -e kmem:mm_page_alloc + -e kmem:mm_pagevec_free -e kmem:mm_page_free_direct ./hackbench 10 + Time: 0.575 + + Performance counter stats for './hackbench 10': + + 13857 kmem:mm_page_pcpu_drain + 27576 kmem:mm_page_alloc + 6025 kmem:mm_pagevec_free + 20934 kmem:mm_page_free_direct + + 0.613972165 seconds time elapsed + +You can observe the statistical properties as well, by using the +'repeat the workload N times' feature of perf stat: + + titan:~> perf stat --repeat 5 -e kmem:mm_page_pcpu_drain -e + kmem:mm_page_alloc -e kmem:mm_pagevec_free -e + kmem:mm_page_free_direct ./hackbench 10 + Time: 0.627 + Time: 0.644 + Time: 0.564 + Time: 0.559 + Time: 0.626 + + Performance counter stats for './hackbench 10' (5 runs): + + 12920 kmem:mm_page_pcpu_drain ( +- 3.359% ) + 25035 kmem:mm_page_alloc ( +- 3.783% ) + 6104 kmem:mm_pagevec_free ( +- 0.934% ) + 18376 kmem:mm_page_free_direct ( +- 4.941% ) + + 0.643954516 seconds time elapsed ( +- 2.363% ) + +Furthermore, these tracepoints can be used to sample the workload as +well. For example the page allocations done by a 'git gc' can be +captured the following way: + + titan:~/git> perf record -f -e kmem:mm_page_alloc -c 1 ./git gc + Counting objects: 1148, done. + Delta compression using up to 2 threads. + Compressing objects: 100% (450/450), done. + Writing objects: 100% (1148/1148), done. + Total 1148 (delta 690), reused 1148 (delta 690) + [ perf record: Captured and wrote 0.267 MB perf.data (~11679 samples) ] + +To check which functions generated page allocations: + + titan:~/git> perf report + # Samples: 10646 + # + # Overhead Command Shared Object + # ........ ............... .......................... + # + 23.57% git-repack /lib64/libc-2.5.so + 21.81% git /lib64/libc-2.5.so + 14.59% git ./git + 11.79% git-repack ./git + 7.12% git /lib64/ld-2.5.so + 3.16% git-repack /lib64/libpthread-2.5.so + 2.09% git-repack /bin/bash + 1.97% rm /lib64/libc-2.5.so + 1.39% mv /lib64/ld-2.5.so + 1.37% mv /lib64/libc-2.5.so + 1.12% git-repack /lib64/ld-2.5.so + 0.95% rm /lib64/ld-2.5.so + 0.90% git-update-serv /lib64/libc-2.5.so + 0.73% git-update-serv /lib64/ld-2.5.so + 0.68% perf /lib64/libpthread-2.5.so + 0.64% git-repack /usr/lib64/libz.so.1.2.3 + +Or to see it on a more finegrained level: + +titan:~/git> perf report --sort comm,dso,symbol +# Samples: 10646 +# +# Overhead Command Shared Object Symbol +# ........ ............... .......................... ...... +# + 9.35% git-repack ./git [.] insert_obj_hash + 9.12% git ./git [.] insert_obj_hash + 7.31% git /lib64/libc-2.5.so [.] memcpy + 6.34% git-repack /lib64/libc-2.5.so [.] _int_malloc + 6.24% git-repack /lib64/libc-2.5.so [.] memcpy + 5.82% git-repack /lib64/libc-2.5.so [.] __GI___fork + 5.47% git /lib64/libc-2.5.so [.] _int_malloc + 2.99% git /lib64/libc-2.5.so [.] memset + +Furthermore, call-graph sampling can be done too, of page +allocations - to see precisely what kind of page allocations there +are: + + titan:~/git> perf record -f -g -e kmem:mm_page_alloc -c 1 ./git gc + Counting objects: 1148, done. + Delta compression using up to 2 threads. + Compressing objects: 100% (450/450), done. + Writing objects: 100% (1148/1148), done. + Total 1148 (delta 690), reused 1148 (delta 690) + [ perf record: Captured and wrote 0.963 MB perf.data (~42069 samples) ] + + titan:~/git> perf report -g + # Samples: 10686 + # + # Overhead Command Shared Object + # ........ ............... .......................... + # + 23.25% git-repack /lib64/libc-2.5.so + | + |--50.00%-- _int_free + | + |--37.50%-- __GI___fork + | make_child + | + |--12.50%-- ptmalloc_unlock_all2 + | make_child + | + --6.25%-- __GI_strcpy + 21.61% git /lib64/libc-2.5.so + | + |--30.00%-- __GI_read + | | + | --83.33%-- git_config_from_file + | git_config + | | + [...] + +Or you can observe the whole system's page allocations for 10 +seconds: + +titan:~/git> perf stat -a -e kmem:mm_page_pcpu_drain -e +kmem:mm_page_alloc -e kmem:mm_pagevec_free -e +kmem:mm_page_free_direct sleep 10 + + Performance counter stats for 'sleep 10': + + 171585 kmem:mm_page_pcpu_drain + 322114 kmem:mm_page_alloc + 73623 kmem:mm_pagevec_free + 254115 kmem:mm_page_free_direct + + 10.000591410 seconds time elapsed + +Or observe how fluctuating the page allocations are, via statistical +analysis done over ten 1-second intervals: + + titan:~/git> perf stat --repeat 10 -a -e kmem:mm_page_pcpu_drain -e + kmem:mm_page_alloc -e kmem:mm_pagevec_free -e + kmem:mm_page_free_direct sleep 1 + + Performance counter stats for 'sleep 1' (10 runs): + + 17254 kmem:mm_page_pcpu_drain ( +- 3.709% ) + 34394 kmem:mm_page_alloc ( +- 4.617% ) + 7509 kmem:mm_pagevec_free ( +- 4.820% ) + 25653 kmem:mm_page_free_direct ( +- 3.672% ) + + 1.058135029 seconds time elapsed ( +- 3.089% ) + +Or you can annotate the recorded 'git gc' run on a per symbol basis +and check which instructions/source-code generated page allocations: + + titan:~/git> perf annotate __GI___fork + ------------------------------------------------ + Percent | Source code & Disassembly of libc-2.5.so + ------------------------------------------------ + : + : + : Disassembly of section .plt: + : Disassembly of section .text: + : + : 00000031a2e95560 <__fork>: + [...] + 0.00 : 31a2e95602: b8 38 00 00 00 mov $0x38,%eax + 0.00 : 31a2e95607: 0f 05 syscall + 83.42 : 31a2e95609: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax + 0.00 : 31a2e9560f: 0f 87 4d 01 00 00 ja 31a2e95762 <__fork+0x202> + 0.00 : 31a2e95615: 85 c0 test %eax,%eax + +( this shows that 83.42% of __GI___fork's page allocations come from + the 0x38 system call it performs. ) + +etc. etc. - a lot more is possible. I could list a dozen of +other different usecases straight away - neither of which is +possible via /proc/vmstat. + +/proc/vmstat is not in the same league really, in terms of +expressive power of system analysis and performance +analysis. + +All that the above results needed were those new tracepoints +in include/tracing/events/kmem.h. + + Ingo + + diff --git a/trunk/tools/perf/Documentation/perf-stat.txt b/trunk/tools/perf/Documentation/perf-stat.txt index 0d74346d21ab..484080dd5b6f 100644 --- a/trunk/tools/perf/Documentation/perf-stat.txt +++ b/trunk/tools/perf/Documentation/perf-stat.txt @@ -40,7 +40,7 @@ OPTIONS -a:: system-wide collection --S:: +-c:: scale counter values EXAMPLES diff --git a/trunk/tools/perf/Documentation/perf-top.txt b/trunk/tools/perf/Documentation/perf-top.txt index 539d01289725..4a7d558dc309 100644 --- a/trunk/tools/perf/Documentation/perf-top.txt +++ b/trunk/tools/perf/Documentation/perf-top.txt @@ -3,36 +3,122 @@ perf-top(1) NAME ---- -perf-top - Run a command and profile it +perf-top - System profiling tool. SYNOPSIS -------- [verse] -'perf top' [-e | --event=EVENT] [-l] [-a] +'perf top' [-e | --event=EVENT] [] DESCRIPTION ----------- -This command runs a command and gathers a performance counter profile -from it. +This command generates and displays a performance counter profile in realtime. OPTIONS ------- -...:: - Any command you can specify in a shell. +-a:: +--all-cpus:: + System-wide collection. (default) + +-c :: +--count=:: + Event period to sample. + +-C :: +--CPU=:: + CPU to profile. + +-d :: +--delay=:: + Number of seconds to delay between refreshes. --e:: ---event=:: +-e :: +--event=:: Select the PMU event. Selection can be a symbolic event name (use 'perf list' to list all events) or a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a - hexadecimal event descriptor. + hexadecimal event descriptor. --a:: - system-wide collection +-E :: +--entries=:: + Display this many functions. + +-f :: +--count-filter=:: + Only display functions with more events than this. + +-F :: +--freq=:: + Profile at this frequency. + +-i:: +--inherit:: + Child tasks inherit counters, only makes sens with -p option. + +-k :: +--vmlinux=:: + Path to vmlinux. Required for annotation functionality. + +-m :: +--mmap-pages=:: + Number of mmapped data pages. + +-p :: +--pid=:: + Profile events on existing pid. + +-r :: +--realtime=:: + Collect data with this RT SCHED_FIFO priority. + +-s :: +--sym-annotate=:: + Annotate this symbol. Requires -k option. + +-v:: +--verbose:: + Be more verbose (show counter open errors, etc). + +-z:: +--zero:: + Zero history across display updates. + +INTERACTIVE PROMPTING KEYS +-------------------------- + +[d]:: + Display refresh delay. + +[e]:: + Number of entries to display. + +[E]:: + Event to display when multiple counters are active. + +[f]:: + Profile display filter (>= hit count). + +[F]:: + Annotation display filter (>= % of total). + +[s]:: + Annotate symbol. + +[S]:: + Stop annotation, return to full profile display. + +[w]:: + Toggle between weighted sum and individual count[E]r profile. + +[z]:: + Toggle event count zeroing across display updates. + +[qQ]:: + Quit. + +Pressing any unmapped key displays a menu, and prompts for input. --l:: - scale counter values SEE ALSO -------- diff --git a/trunk/tools/perf/Makefile b/trunk/tools/perf/Makefile index 1916e44b9bb0..c045b4271e57 100644 --- a/trunk/tools/perf/Makefile +++ b/trunk/tools/perf/Makefile @@ -382,18 +382,29 @@ endif ifdef NO_DEMANGLE BASIC_CFLAGS += -DNO_DEMANGLE else - has_bfd := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd > /dev/null 2>&1 && echo y") - has_bfd_iberty := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y") - ifeq ($(has_bfd),y) EXTLIBS += -lbfd - else ifeq ($(has_bfd_iberty),y) - EXTLIBS += -lbfd -liberty else - msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling) - BASIC_CFLAGS += -DNO_DEMANGLE + has_bfd_iberty := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y") + ifeq ($(has_bfd_iberty),y) + EXTLIBS += -lbfd -liberty + else + has_bfd_iberty_z := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y") + ifeq ($(has_bfd_iberty_z),y) + EXTLIBS += -lbfd -liberty -lz + else + has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -liberty > /dev/null 2>&1 && echo y") + ifeq ($(has_cplus_demangle),y) + EXTLIBS += -liberty + BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE + else + msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling) + BASIC_CFLAGS += -DNO_DEMANGLE + endif + endif + endif endif endif diff --git a/trunk/tools/perf/builtin-list.c b/trunk/tools/perf/builtin-list.c index f990fa8a35c9..d88c6961274c 100644 --- a/trunk/tools/perf/builtin-list.c +++ b/trunk/tools/perf/builtin-list.c @@ -10,11 +10,12 @@ #include "perf.h" -#include "util/parse-options.h" #include "util/parse-events.h" +#include "util/cache.h" int cmd_list(int argc __used, const char **argv __used, const char *prefix __used) { + setup_pager(); print_events(); return 0; } diff --git a/trunk/tools/perf/builtin-record.c b/trunk/tools/perf/builtin-record.c index 6da09928130f..3d051b9cf25f 100644 --- a/trunk/tools/perf/builtin-record.c +++ b/trunk/tools/perf/builtin-record.c @@ -34,7 +34,9 @@ static int output; static const char *output_name = "perf.data"; static int group = 0; static unsigned int realtime_prio = 0; +static int raw_samples = 0; static int system_wide = 0; +static int profile_cpu = -1; static pid_t target_pid = -1; static int inherit = 1; static int force = 0; @@ -203,46 +205,48 @@ static void sig_atexit(void) kill(getpid(), signr); } -static void pid_synthesize_comm_event(pid_t pid, int full) +static pid_t pid_synthesize_comm_event(pid_t pid, int full) { struct comm_event comm_ev; char filename[PATH_MAX]; char bf[BUFSIZ]; - int fd; - size_t size; - char *field, *sep; + FILE *fp; + size_t size = 0; DIR *tasks; struct dirent dirent, *next; + pid_t tgid = 0; - snprintf(filename, sizeof(filename), "/proc/%d/stat", pid); + snprintf(filename, sizeof(filename), "/proc/%d/status", pid); - fd = open(filename, O_RDONLY); - if (fd < 0) { + fp = fopen(filename, "r"); + if (fd == NULL) { /* * We raced with a task exiting - just return: */ if (verbose) fprintf(stderr, "couldn't open %s\n", filename); - return; + return 0; } - if (read(fd, bf, sizeof(bf)) < 0) { - fprintf(stderr, "couldn't read %s\n", filename); - exit(EXIT_FAILURE); - } - close(fd); - /* 9027 (cat) R 6747 9027 6747 34816 9027 ... */ memset(&comm_ev, 0, sizeof(comm_ev)); - field = strchr(bf, '('); - if (field == NULL) - goto out_failure; - sep = strchr(++field, ')'); - if (sep == NULL) - goto out_failure; - size = sep - field; - memcpy(comm_ev.comm, field, size++); - - comm_ev.pid = pid; + while (!comm_ev.comm[0] || !comm_ev.pid) { + if (fgets(bf, sizeof(bf), fp) == NULL) + goto out_failure; + + if (memcmp(bf, "Name:", 5) == 0) { + char *name = bf + 5; + while (*name && isspace(*name)) + ++name; + size = strlen(name) - 1; + memcpy(comm_ev.comm, name, size++); + } else if (memcmp(bf, "Tgid:", 5) == 0) { + char *tgids = bf + 5; + while (*tgids && isspace(*tgids)) + ++tgids; + tgid = comm_ev.pid = atoi(tgids); + } + } + comm_ev.header.type = PERF_EVENT_COMM; size = ALIGN(size, sizeof(u64)); comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size); @@ -251,7 +255,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full) comm_ev.tid = pid; write_output(&comm_ev, comm_ev.header.size); - return; + goto out_fclose; } snprintf(filename, sizeof(filename), "/proc/%d/task", pid); @@ -268,7 +272,10 @@ static void pid_synthesize_comm_event(pid_t pid, int full) write_output(&comm_ev, comm_ev.header.size); } closedir(tasks); - return; + +out_fclose: + fclose(fp); + return tgid; out_failure: fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n", @@ -276,7 +283,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full) exit(EXIT_FAILURE); } -static void pid_synthesize_mmap_samples(pid_t pid) +static void pid_synthesize_mmap_samples(pid_t pid, pid_t tgid) { char filename[PATH_MAX]; FILE *fp; @@ -328,7 +335,7 @@ static void pid_synthesize_mmap_samples(pid_t pid) mmap_ev.len -= mmap_ev.start; mmap_ev.header.size = (sizeof(mmap_ev) - (sizeof(mmap_ev.filename) - size)); - mmap_ev.pid = pid; + mmap_ev.pid = tgid; mmap_ev.tid = pid; write_output(&mmap_ev, mmap_ev.header.size); @@ -347,14 +354,14 @@ static void synthesize_all(void) while (!readdir_r(proc, &dirent, &next) && next) { char *end; - pid_t pid; + pid_t pid, tgid; pid = strtol(dirent.d_name, &end, 10); if (*end) /* only interested in proper numerical dirents */ continue; - pid_synthesize_comm_event(pid, 1); - pid_synthesize_mmap_samples(pid); + tgid = pid_synthesize_comm_event(pid, 1); + pid_synthesize_mmap_samples(pid, tgid); } closedir(proc); @@ -392,7 +399,7 @@ static void create_counter(int counter, int cpu, pid_t pid) PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID; - attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; + attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID; if (freq) { attr->sample_type |= PERF_SAMPLE_PERIOD; @@ -412,6 +419,9 @@ static void create_counter(int counter, int cpu, pid_t pid) if (call_graph) attr->sample_type |= PERF_SAMPLE_CALLCHAIN; + if (raw_samples) + attr->sample_type |= PERF_SAMPLE_RAW; + attr->mmap = track; attr->comm = track; attr->inherit = (cpu < 0) && inherit; @@ -425,6 +435,8 @@ static void create_counter(int counter, int cpu, pid_t pid) if (err == EPERM) die("Permission error - are you root?\n"); + else if (err == ENODEV && profile_cpu != -1) + die("No such device - did you specify an out-of-range profile CPU?\n"); /* * If it's cycles then fall back to hrtimer @@ -524,10 +536,14 @@ static int __cmd_record(int argc, const char **argv) signal(SIGCHLD, sig_handler); signal(SIGINT, sig_handler); - if (!stat(output_name, &st) && !force && !append_file) { - fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n", - output_name); - exit(-1); + if (!stat(output_name, &st) && st.st_size) { + if (!force && !append_file) { + fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n", + output_name); + exit(-1); + } + } else { + append_file = 0; } flags = O_CREAT|O_RDWR; @@ -554,16 +570,22 @@ static int __cmd_record(int argc, const char **argv) if (pid == -1) pid = getpid(); - open_counters(-1, pid); - } else for (i = 0; i < nr_cpus; i++) - open_counters(i, target_pid); + open_counters(profile_cpu, pid); + } else { + if (profile_cpu != -1) { + open_counters(profile_cpu, target_pid); + } else { + for (i = 0; i < nr_cpus; i++) + open_counters(i, target_pid); + } + } if (file_new) perf_header__write(header, output); if (!system_wide) { - pid_synthesize_comm_event(pid, 0); - pid_synthesize_mmap_samples(pid); + pid_t tgid = pid_synthesize_comm_event(pid, 0); + pid_synthesize_mmap_samples(pid, tgid); } else synthesize_all(); @@ -631,10 +653,14 @@ static const struct option options[] = { "record events on existing pid"), OPT_INTEGER('r', "realtime", &realtime_prio, "collect data with this RT SCHED_FIFO priority"), + OPT_BOOLEAN('R', "raw-samples", &raw_samples, + "collect raw sample records from all opened counters"), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), OPT_BOOLEAN('A', "append", &append_file, "append to the output file to do incremental profiling"), + OPT_INTEGER('C', "profile_cpu", &profile_cpu, + "CPU to profile on"), OPT_BOOLEAN('f', "force", &force, "overwrite existing data file"), OPT_LONG('c', "count", &default_interval, diff --git a/trunk/tools/perf/builtin-report.c b/trunk/tools/perf/builtin-report.c index 8cb58d68a006..b53a60fc12de 100644 --- a/trunk/tools/perf/builtin-report.c +++ b/trunk/tools/perf/builtin-report.c @@ -68,7 +68,7 @@ static int callchain; static struct callchain_param callchain_param = { - .mode = CHAIN_GRAPH_ABS, + .mode = CHAIN_GRAPH_REL, .min_percent = 0.5 }; @@ -112,7 +112,9 @@ struct read_event { struct perf_event_header header; u32 pid,tid; u64 value; - u64 format[3]; + u64 time_enabled; + u64 time_running; + u64 id; }; typedef union event_union { @@ -698,7 +700,8 @@ sort__sym_print(FILE *fp, struct hist_entry *self, unsigned int width __used) size_t ret = 0; if (verbose) - ret += repsep_fprintf(fp, "%#018llx ", (u64)self->ip); + ret += repsep_fprintf(fp, "%#018llx %c ", (u64)self->ip, + dso__symtab_origin(self->dso)); ret += repsep_fprintf(fp, "[%c] ", self->level); if (self->sym) { @@ -888,6 +891,21 @@ ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, int depth, return ret; } +static struct symbol *rem_sq_bracket; +static struct callchain_list rem_hits; + +static void init_rem_hits(void) +{ + rem_sq_bracket = malloc(sizeof(*rem_sq_bracket) + 6); + if (!rem_sq_bracket) { + fprintf(stderr, "Not enough memory to display remaining hits\n"); + return; + } + + strcpy(rem_sq_bracket->name, "[...]"); + rem_hits.sym = rem_sq_bracket; +} + static size_t callchain__fprintf_graph(FILE *fp, struct callchain_node *self, u64 total_samples, int depth, int depth_mask) @@ -897,25 +915,34 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self, struct callchain_list *chain; int new_depth_mask = depth_mask; u64 new_total; + u64 remaining; size_t ret = 0; int i; if (callchain_param.mode == CHAIN_GRAPH_REL) - new_total = self->cumul_hit; + new_total = self->children_hit; else new_total = total_samples; + remaining = new_total; + node = rb_first(&self->rb_root); while (node) { + u64 cumul; + child = rb_entry(node, struct callchain_node, rb_node); + cumul = cumul_hits(child); + remaining -= cumul; /* * The depth mask manages the output of pipes that show * the depth. We don't want to keep the pipes of the current - * level for the last child of this depth + * level for the last child of this depth. + * Except if we have remaining filtered hits. They will + * supersede the last child */ next = rb_next(node); - if (!next) + if (!next && (callchain_param.mode != CHAIN_GRAPH_REL || !remaining)) new_depth_mask &= ~(1 << (depth - 1)); /* @@ -930,7 +957,7 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self, ret += ipchain__fprintf_graph(fp, chain, depth, new_depth_mask, i++, new_total, - child->cumul_hit); + cumul); } ret += callchain__fprintf_graph(fp, child, new_total, depth + 1, @@ -938,6 +965,19 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self, node = next; } + if (callchain_param.mode == CHAIN_GRAPH_REL && + remaining && remaining != new_total) { + + if (!rem_sq_bracket) + return ret; + + new_depth_mask &= ~(1 << (depth - 1)); + + ret += ipchain__fprintf_graph(fp, &rem_hits, depth, + new_depth_mask, 0, new_total, + remaining); + } + return ret; } @@ -1358,6 +1398,8 @@ static size_t output__fprintf(FILE *fp, u64 total_samples) unsigned int width; char *col_width = col_width_list_str; + init_rem_hits(); + fprintf(fp, "# Samples: %Ld\n", (u64)total_samples); fprintf(fp, "#\n"); @@ -1429,6 +1471,8 @@ static size_t output__fprintf(FILE *fp, u64 total_samples) } fprintf(fp, "\n"); + free(rem_sq_bracket); + return ret; } @@ -1482,11 +1526,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) more_data += sizeof(u64); } - dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d: %p period: %Ld\n", + dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", (void *)(offset + head), (void *)(long)(event->header.size), event->header.misc, - event->ip.pid, + event->ip.pid, event->ip.tid, (void *)(long)ip, (long long)period); @@ -1546,10 +1590,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (show & show_mask) { struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip); - if (dso_list && dso && dso->name && !strlist__has_entry(dso_list, dso->name)) + if (dso_list && (!dso || !dso->name || + !strlist__has_entry(dso_list, dso->name))) return 0; - if (sym_list && sym && !strlist__has_entry(sym_list, sym->name)) + if (sym_list && (!sym || !strlist__has_entry(sym_list, sym->name))) return 0; if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) { @@ -1568,10 +1613,11 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head) struct thread *thread = threads__findnew(event->mmap.pid); struct map *map = map__new(&event->mmap); - dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n", + dprintf("%p [%p]: PERF_EVENT_MMAP %d/%d: [%p(%p) @ %p]: %s\n", (void *)(offset + head), (void *)(long)(event->header.size), event->mmap.pid, + event->mmap.tid, (void *)(long)event->mmap.start, (void *)(long)event->mmap.len, (void *)(long)event->mmap.pgoff, @@ -1690,14 +1736,37 @@ static void trace_event(event_t *event) dprintf(".\n"); } +static struct perf_header *header; + +static struct perf_counter_attr *perf_header__find_attr(u64 id) +{ + int i; + + for (i = 0; i < header->attrs; i++) { + struct perf_header_attr *attr = header->attr[i]; + int j; + + for (j = 0; j < attr->ids; j++) { + if (attr->id[j] == id) + return &attr->attr; + } + } + + return NULL; +} + static int process_read_event(event_t *event, unsigned long offset, unsigned long head) { - dprintf("%p [%p]: PERF_EVENT_READ: %d %d %Lu\n", + struct perf_counter_attr *attr = perf_header__find_attr(event->read.id); + + dprintf("%p [%p]: PERF_EVENT_READ: %d %d %s %Lu\n", (void *)(offset + head), (void *)(long)(event->header.size), event->read.pid, event->read.tid, + attr ? __event_name(attr->type, attr->config) + : "FAIL", event->read.value); return 0; @@ -1743,8 +1812,6 @@ process_event(event_t *event, unsigned long offset, unsigned long head) return 0; } -static struct perf_header *header; - static u64 perf_header__sample_type(void) { u64 sample_type = 0; @@ -1812,6 +1879,13 @@ static int __cmd_report(void) " -g?\n"); exit(-1); } + } else if (callchain_param.mode != CHAIN_NONE && !callchain) { + callchain = 1; + if (register_callchain_param(&callchain_param) < 0) { + fprintf(stderr, "Can't register callchain" + " params\n"); + exit(-1); + } } if (load_kernel() < 0) { @@ -1950,6 +2024,13 @@ parse_callchain_opt(const struct option *opt __used, const char *arg, else if (!strncmp(tok, "fractal", strlen(arg))) callchain_param.mode = CHAIN_GRAPH_REL; + else if (!strncmp(tok, "none", strlen(arg))) { + callchain_param.mode = CHAIN_NONE; + callchain = 0; + + return 0; + } + else return -1; diff --git a/trunk/tools/perf/builtin-stat.c b/trunk/tools/perf/builtin-stat.c index f9510eeeb6c7..b4b06c7903e1 100644 --- a/trunk/tools/perf/builtin-stat.c +++ b/trunk/tools/perf/builtin-stat.c @@ -496,7 +496,7 @@ static const struct option options[] = { "stat events on existing pid"), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), - OPT_BOOLEAN('S', "scale", &scale, + OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), diff --git a/trunk/tools/perf/builtin-top.c b/trunk/tools/perf/builtin-top.c index f139f1ab9333..7de28ce9ca26 100644 --- a/trunk/tools/perf/builtin-top.c +++ b/trunk/tools/perf/builtin-top.c @@ -31,6 +31,8 @@ #include #include +#include +#include #include #include @@ -54,7 +56,7 @@ static int system_wide = 0; static int default_interval = 100000; -static u64 count_filter = 5; +static int count_filter = 5; static int print_entries = 15; static int target_pid = -1; @@ -69,14 +71,27 @@ static int freq = 0; static int verbose = 0; static char *vmlinux = NULL; -static char *sym_filter; -static unsigned long filter_start; -static unsigned long filter_end; - static int delay_secs = 2; static int zero; static int dump_symtab; +/* + * Source + */ + +struct source_line { + u64 eip; + unsigned long count[MAX_COUNTERS]; + char *line; + struct source_line *next; +}; + +static char *sym_filter = NULL; +struct sym_entry *sym_filter_entry = NULL; +static int sym_pcnt_filter = 5; +static int sym_counter = 0; +static int display_weighted = -1; + /* * Symbols */ @@ -91,9 +106,237 @@ struct sym_entry { unsigned long snap_count; double weight; int skip; + struct source_line *source; + struct source_line *lines; + struct source_line **lines_tail; + pthread_mutex_t source_lock; }; -struct sym_entry *sym_filter_entry; +/* + * Source functions + */ + +static void parse_source(struct sym_entry *syme) +{ + struct symbol *sym; + struct module *module; + struct section *section = NULL; + FILE *file; + char command[PATH_MAX*2], *path = vmlinux; + u64 start, end, len; + + if (!syme) + return; + + if (syme->lines) { + pthread_mutex_lock(&syme->source_lock); + goto out_assign; + } + + sym = (struct symbol *)(syme + 1); + module = sym->module; + + if (module) + path = module->path; + if (!path) + return; + + start = sym->obj_start; + if (!start) + start = sym->start; + + if (module) { + section = module->sections->find_section(module->sections, ".text"); + if (section) + start -= section->vma; + } + + end = start + sym->end - sym->start + 1; + len = sym->end - sym->start; + + sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", start, end, path); + + file = popen(command, "r"); + if (!file) + return; + + pthread_mutex_lock(&syme->source_lock); + syme->lines_tail = &syme->lines; + while (!feof(file)) { + struct source_line *src; + size_t dummy = 0; + char *c; + + src = malloc(sizeof(struct source_line)); + assert(src != NULL); + memset(src, 0, sizeof(struct source_line)); + + if (getline(&src->line, &dummy, file) < 0) + break; + if (!src->line) + break; + + c = strchr(src->line, '\n'); + if (c) + *c = 0; + + src->next = NULL; + *syme->lines_tail = src; + syme->lines_tail = &src->next; + + if (strlen(src->line)>8 && src->line[8] == ':') { + src->eip = strtoull(src->line, NULL, 16); + if (section) + src->eip += section->vma; + } + if (strlen(src->line)>8 && src->line[16] == ':') { + src->eip = strtoull(src->line, NULL, 16); + if (section) + src->eip += section->vma; + } + } + pclose(file); +out_assign: + sym_filter_entry = syme; + pthread_mutex_unlock(&syme->source_lock); +} + +static void __zero_source_counters(struct sym_entry *syme) +{ + int i; + struct source_line *line; + + line = syme->lines; + while (line) { + for (i = 0; i < nr_counters; i++) + line->count[i] = 0; + line = line->next; + } +} + +static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip) +{ + struct source_line *line; + + if (syme != sym_filter_entry) + return; + + if (pthread_mutex_trylock(&syme->source_lock)) + return; + + if (!syme->source) + goto out_unlock; + + for (line = syme->lines; line; line = line->next) { + if (line->eip == ip) { + line->count[counter]++; + break; + } + if (line->eip > ip) + break; + } +out_unlock: + pthread_mutex_unlock(&syme->source_lock); +} + +static void lookup_sym_source(struct sym_entry *syme) +{ + struct symbol *symbol = (struct symbol *)(syme + 1); + struct source_line *line; + char pattern[PATH_MAX]; + char *idx; + + sprintf(pattern, "<%s>:", symbol->name); + + if (symbol->module) { + idx = strstr(pattern, "\t"); + if (idx) + *idx = 0; + } + + pthread_mutex_lock(&syme->source_lock); + for (line = syme->lines; line; line = line->next) { + if (strstr(line->line, pattern)) { + syme->source = line; + break; + } + } + pthread_mutex_unlock(&syme->source_lock); +} + +static void show_lines(struct source_line *queue, int count, int total) +{ + int i; + struct source_line *line; + + line = queue; + for (i = 0; i < count; i++) { + float pcnt = 100.0*(float)line->count[sym_counter]/(float)total; + + printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line); + line = line->next; + } +} + +#define TRACE_COUNT 3 + +static void show_details(struct sym_entry *syme) +{ + struct symbol *symbol; + struct source_line *line; + struct source_line *line_queue = NULL; + int displayed = 0; + int line_queue_count = 0, total = 0, more = 0; + + if (!syme) + return; + + if (!syme->source) + lookup_sym_source(syme); + + if (!syme->source) + return; + + symbol = (struct symbol *)(syme + 1); + printf("Showing %s for %s\n", event_name(sym_counter), symbol->name); + printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter); + + pthread_mutex_lock(&syme->source_lock); + line = syme->source; + while (line) { + total += line->count[sym_counter]; + line = line->next; + } + + line = syme->source; + while (line) { + float pcnt = 0.0; + + if (!line_queue_count) + line_queue = line; + line_queue_count++; + + if (line->count[sym_counter]) + pcnt = 100.0 * line->count[sym_counter] / (float)total; + if (pcnt >= (float)sym_pcnt_filter) { + if (displayed <= print_entries) + show_lines(line_queue, line_queue_count, total); + else more++; + displayed += line_queue_count; + line_queue_count = 0; + line_queue = NULL; + } else if (line_queue_count > TRACE_COUNT) { + line_queue = line_queue->next; + line_queue_count--; + } + + line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8; + line = line->next; + } + pthread_mutex_unlock(&syme->source_lock); + if (more) + printf("%d lines not displayed, maybe increase display entries [e]\n", more); +} struct dso *kernel_dso; @@ -112,6 +355,9 @@ static double sym_weight(const struct sym_entry *sym) double weight = sym->snap_count; int counter; + if (!display_weighted) + return weight; + for (counter = 1; counter < nr_counters-1; counter++) weight *= sym->count[counter]; @@ -159,7 +405,7 @@ static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se) static void print_sym_table(void) { int printed = 0, j; - int counter; + int counter, snap = !display_weighted ? sym_counter : 0; float samples_per_sec = samples/delay_secs; float ksamples_per_sec = (samples-userspace_samples)/delay_secs; float sum_ksamples = 0.0; @@ -175,7 +421,7 @@ static void print_sym_table(void) pthread_mutex_unlock(&active_symbols_lock); list_for_each_entry_safe_from(syme, n, &active_symbols, node) { - syme->snap_count = syme->count[0]; + syme->snap_count = syme->count[snap]; if (syme->snap_count != 0) { syme->weight = sym_weight(syme); rb_insert_active_sym(&tmp, syme); @@ -195,7 +441,7 @@ static void print_sym_table(void) samples_per_sec, 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); - if (nr_counters == 1) { + if (nr_counters == 1 || !display_weighted) { printf("%Ld", (u64)attrs[0].sample_period); if (freq) printf("Hz "); @@ -203,7 +449,9 @@ static void print_sym_table(void) printf(" "); } - for (counter = 0; counter < nr_counters; counter++) { + if (!display_weighted) + printf("%s", event_name(sym_counter)); + else for (counter = 0; counter < nr_counters; counter++) { if (counter) printf("/"); @@ -228,6 +476,11 @@ static void print_sym_table(void) printf("------------------------------------------------------------------------------\n\n"); + if (sym_filter_entry) { + show_details(sym_filter_entry); + return; + } + if (nr_counters == 1) printf(" samples pcnt"); else @@ -242,13 +495,13 @@ static void print_sym_table(void) struct symbol *sym = (struct symbol *)(syme + 1); double pcnt; - if (++printed > print_entries || syme->snap_count < count_filter) + if (++printed > print_entries || (int)syme->snap_count < count_filter) continue; pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / sum_ksamples)); - if (nr_counters == 1) + if (nr_counters == 1 || !display_weighted) printf("%20.2f - ", syme->weight); else printf("%9.1f %10ld - ", syme->weight, syme->snap_count); @@ -261,19 +514,250 @@ static void print_sym_table(void) } } +static void prompt_integer(int *target, const char *msg) +{ + char *buf = malloc(0), *p; + size_t dummy = 0; + int tmp; + + fprintf(stdout, "\n%s: ", msg); + if (getline(&buf, &dummy, stdin) < 0) + return; + + p = strchr(buf, '\n'); + if (p) + *p = 0; + + p = buf; + while(*p) { + if (!isdigit(*p)) + goto out_free; + p++; + } + tmp = strtoul(buf, NULL, 10); + *target = tmp; +out_free: + free(buf); +} + +static void prompt_percent(int *target, const char *msg) +{ + int tmp = 0; + + prompt_integer(&tmp, msg); + if (tmp >= 0 && tmp <= 100) + *target = tmp; +} + +static void prompt_symbol(struct sym_entry **target, const char *msg) +{ + char *buf = malloc(0), *p; + struct sym_entry *syme = *target, *n, *found = NULL; + size_t dummy = 0; + + /* zero counters of active symbol */ + if (syme) { + pthread_mutex_lock(&syme->source_lock); + __zero_source_counters(syme); + *target = NULL; + pthread_mutex_unlock(&syme->source_lock); + } + + fprintf(stdout, "\n%s: ", msg); + if (getline(&buf, &dummy, stdin) < 0) + goto out_free; + + p = strchr(buf, '\n'); + if (p) + *p = 0; + + pthread_mutex_lock(&active_symbols_lock); + syme = list_entry(active_symbols.next, struct sym_entry, node); + pthread_mutex_unlock(&active_symbols_lock); + + list_for_each_entry_safe_from(syme, n, &active_symbols, node) { + struct symbol *sym = (struct symbol *)(syme + 1); + + if (!strcmp(buf, sym->name)) { + found = syme; + break; + } + } + + if (!found) { + fprintf(stderr, "Sorry, %s is not active.\n", sym_filter); + sleep(1); + return; + } else + parse_source(found); + +out_free: + free(buf); +} + +static void print_mapped_keys(void) +{ + char *name = NULL; + + if (sym_filter_entry) { + struct symbol *sym = (struct symbol *)(sym_filter_entry+1); + name = sym->name; + } + + fprintf(stdout, "\nMapped keys:\n"); + fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", delay_secs); + fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries); + + if (nr_counters > 1) + fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_counter)); + + fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter); + + if (vmlinux) { + fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter); + fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); + fprintf(stdout, "\t[S] stop annotation.\n"); + } + + if (nr_counters > 1) + fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0); + + fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0); + fprintf(stdout, "\t[qQ] quit.\n"); +} + +static int key_mapped(int c) +{ + switch (c) { + case 'd': + case 'e': + case 'f': + case 'z': + case 'q': + case 'Q': + return 1; + case 'E': + case 'w': + return nr_counters > 1 ? 1 : 0; + case 'F': + case 's': + case 'S': + return vmlinux ? 1 : 0; + } + + return 0; +} + +static void handle_keypress(int c) +{ + if (!key_mapped(c)) { + struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; + struct termios tc, save; + + print_mapped_keys(); + fprintf(stdout, "\nEnter selection, or unmapped key to continue: "); + fflush(stdout); + + tcgetattr(0, &save); + tc = save; + tc.c_lflag &= ~(ICANON | ECHO); + tc.c_cc[VMIN] = 0; + tc.c_cc[VTIME] = 0; + tcsetattr(0, TCSANOW, &tc); + + poll(&stdin_poll, 1, -1); + c = getc(stdin); + + tcsetattr(0, TCSAFLUSH, &save); + if (!key_mapped(c)) + return; + } + + switch (c) { + case 'd': + prompt_integer(&delay_secs, "Enter display delay"); + break; + case 'e': + prompt_integer(&print_entries, "Enter display entries (lines)"); + break; + case 'E': + if (nr_counters > 1) { + int i; + + fprintf(stderr, "\nAvailable events:"); + for (i = 0; i < nr_counters; i++) + fprintf(stderr, "\n\t%d %s", i, event_name(i)); + + prompt_integer(&sym_counter, "Enter details event counter"); + + if (sym_counter >= nr_counters) { + fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(0)); + sym_counter = 0; + sleep(1); + } + } else sym_counter = 0; + break; + case 'f': + prompt_integer(&count_filter, "Enter display event count filter"); + break; + case 'F': + prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)"); + break; + case 'q': + case 'Q': + printf("exiting.\n"); + exit(0); + case 's': + prompt_symbol(&sym_filter_entry, "Enter details symbol"); + break; + case 'S': + if (!sym_filter_entry) + break; + else { + struct sym_entry *syme = sym_filter_entry; + + pthread_mutex_lock(&syme->source_lock); + sym_filter_entry = NULL; + __zero_source_counters(syme); + pthread_mutex_unlock(&syme->source_lock); + } + break; + case 'w': + display_weighted = ~display_weighted; + break; + case 'z': + zero = ~zero; + break; + } +} + static void *display_thread(void *arg __used) { struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; - int delay_msecs = delay_secs * 1000; + struct termios tc, save; + int delay_msecs, c; + + tcgetattr(0, &save); + tc = save; + tc.c_lflag &= ~(ICANON | ECHO); + tc.c_cc[VMIN] = 0; + tc.c_cc[VTIME] = 0; - printf("PerfTop refresh period: %d seconds\n", delay_secs); +repeat: + delay_msecs = delay_secs * 1000; + tcsetattr(0, TCSANOW, &tc); + /* trash return*/ + getc(stdin); do { print_sym_table(); } while (!poll(&stdin_poll, 1, delay_msecs) == 1); - printf("key pressed - exiting.\n"); - exit(0); + c = getc(stdin); + tcsetattr(0, TCSAFLUSH, &save); + + handle_keypress(c); + goto repeat; return NULL; } @@ -293,7 +777,6 @@ static const char *skip_symbols[] = { static int symbol_filter(struct dso *self, struct symbol *sym) { - static int filter_match; struct sym_entry *syme; const char *name = sym->name; int i; @@ -315,6 +798,10 @@ static int symbol_filter(struct dso *self, struct symbol *sym) return 1; syme = dso__sym_priv(self, sym); + pthread_mutex_init(&syme->source_lock, NULL); + if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) + sym_filter_entry = syme; + for (i = 0; skip_symbols[i]; i++) { if (!strcmp(skip_symbols[i], name)) { syme->skip = 1; @@ -322,29 +809,6 @@ static int symbol_filter(struct dso *self, struct symbol *sym) } } - if (filter_match == 1) { - filter_end = sym->start; - filter_match = -1; - if (filter_end - filter_start > 10000) { - fprintf(stderr, - "hm, too large filter symbol <%s> - skipping.\n", - sym_filter); - fprintf(stderr, "symbol filter start: %016lx\n", - filter_start); - fprintf(stderr, " end: %016lx\n", - filter_end); - filter_end = filter_start = 0; - sym_filter = NULL; - sleep(1); - } - } - - if (filter_match == 0 && sym_filter && !strcmp(name, sym_filter)) { - filter_match = 1; - filter_start = sym->start; - } - - return 0; } @@ -380,8 +844,6 @@ static int parse_symbols(void) return -1; } -#define TRACE_COUNT 3 - /* * Binary search in the histogram table and record the hit: */ @@ -394,6 +856,7 @@ static void record_ip(u64 ip, int counter) if (!syme->skip) { syme->count[counter]++; + record_precise_ip(syme, counter, ip); pthread_mutex_lock(&active_symbols_lock); if (list_empty(&syme->node) || !syme->node.next) __list_insert_active_sym(syme); @@ -690,8 +1153,8 @@ static const struct option options[] = { "put the counters into a counter group"), OPT_BOOLEAN('i', "inherit", &inherit, "child tasks inherit counters"), - OPT_STRING('s', "sym-filter", &sym_filter, "pattern", - "only display symbols matchig this pattern"), + OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name", + "symbol to annotate - requires -k option"), OPT_BOOLEAN('z', "zero", &zero, "zero history across updates"), OPT_INTEGER('F', "freq", &freq, @@ -734,6 +1197,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) delay_secs = 1; parse_symbols(); + parse_source(sym_filter_entry); /* * Fill in the ones not specifically initialized via -c: diff --git a/trunk/tools/perf/util/callchain.c b/trunk/tools/perf/util/callchain.c index 9d3c8141b8c1..011473411642 100644 --- a/trunk/tools/perf/util/callchain.c +++ b/trunk/tools/perf/util/callchain.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "callchain.h" @@ -26,10 +27,14 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; struct callchain_node *rnode; + u64 chain_cumul = cumul_hits(chain); while (*p) { + u64 rnode_cumul; + parent = *p; rnode = rb_entry(parent, struct callchain_node, rb_node); + rnode_cumul = cumul_hits(rnode); switch (mode) { case CHAIN_FLAT: @@ -40,7 +45,7 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, break; case CHAIN_GRAPH_ABS: /* Falldown */ case CHAIN_GRAPH_REL: - if (rnode->cumul_hit < chain->cumul_hit) + if (rnode_cumul < chain_cumul) p = &(*p)->rb_left; else p = &(*p)->rb_right; @@ -87,7 +92,7 @@ static void __sort_chain_graph_abs(struct callchain_node *node, chain_for_each_child(child, node) { __sort_chain_graph_abs(child, min_hit); - if (child->cumul_hit >= min_hit) + if (cumul_hits(child) >= min_hit) rb_insert_callchain(&node->rb_root, child, CHAIN_GRAPH_ABS); } @@ -108,11 +113,11 @@ static void __sort_chain_graph_rel(struct callchain_node *node, u64 min_hit; node->rb_root = RB_ROOT; - min_hit = node->cumul_hit * min_percent / 100.0; + min_hit = ceil(node->children_hit * min_percent); chain_for_each_child(child, node) { __sort_chain_graph_rel(child, min_percent); - if (child->cumul_hit >= min_hit) + if (cumul_hits(child) >= min_hit) rb_insert_callchain(&node->rb_root, child, CHAIN_GRAPH_REL); } @@ -122,7 +127,7 @@ static void sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_node *chain_root, u64 min_hit __used, struct callchain_param *param) { - __sort_chain_graph_rel(chain_root, param->min_percent); + __sort_chain_graph_rel(chain_root, param->min_percent / 100.0); rb_root->rb_node = chain_root->rb_root.rb_node; } @@ -211,7 +216,8 @@ add_child(struct callchain_node *parent, struct ip_callchain *chain, new = create_child(parent, false); fill_node(new, chain, start, syms); - new->cumul_hit = new->hit = 1; + new->children_hit = 0; + new->hit = 1; } /* @@ -241,7 +247,8 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain, /* split the hits */ new->hit = parent->hit; - new->cumul_hit = parent->cumul_hit; + new->children_hit = parent->children_hit; + parent->children_hit = cumul_hits(new); new->val_nr = parent->val_nr - idx_local; parent->val_nr = idx_local; @@ -249,6 +256,7 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain, if (idx_total < chain->nr) { parent->hit = 0; add_child(parent, chain, idx_total, syms); + parent->children_hit++; } else { parent->hit = 1; } @@ -269,13 +277,13 @@ __append_chain_children(struct callchain_node *root, struct ip_callchain *chain, unsigned int ret = __append_chain(rnode, chain, start, syms); if (!ret) - goto cumul; + goto inc_children_hit; } /* nothing in children, add to the current node */ add_child(root, chain, start, syms); -cumul: - root->cumul_hit++; +inc_children_hit: + root->children_hit++; } static int @@ -317,8 +325,6 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain, /* we match 100% of the path, increment the hit */ if (i - start == root->val_nr && i == chain->nr) { root->hit++; - root->cumul_hit++; - return 0; } @@ -331,5 +337,7 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain, void append_chain(struct callchain_node *root, struct ip_callchain *chain, struct symbol **syms) { + if (!chain->nr) + return; __append_chain_children(root, chain, syms, 0); } diff --git a/trunk/tools/perf/util/callchain.h b/trunk/tools/perf/util/callchain.h index 7812122bea1d..a926ae4f5a16 100644 --- a/trunk/tools/perf/util/callchain.h +++ b/trunk/tools/perf/util/callchain.h @@ -7,6 +7,7 @@ #include "symbol.h" enum chain_mode { + CHAIN_NONE, CHAIN_FLAT, CHAIN_GRAPH_ABS, CHAIN_GRAPH_REL @@ -21,7 +22,7 @@ struct callchain_node { struct rb_root rb_root; /* sorted tree of children */ unsigned int val_nr; u64 hit; - u64 cumul_hit; /* hit + hits of children */ + u64 children_hit; }; struct callchain_param; @@ -48,6 +49,11 @@ static inline void callchain_init(struct callchain_node *node) INIT_LIST_HEAD(&node->val); } +static inline u64 cumul_hits(struct callchain_node *node) +{ + return node->hit + node->children_hit; +} + int register_callchain_param(struct callchain_param *param); void append_chain(struct callchain_node *root, struct ip_callchain *chain, struct symbol **syms); diff --git a/trunk/tools/perf/util/header.c b/trunk/tools/perf/util/header.c index 450384b3bbe5..b92a457ca32e 100644 --- a/trunk/tools/perf/util/header.c +++ b/trunk/tools/perf/util/header.c @@ -185,6 +185,8 @@ static void do_read(int fd, void *buf, size_t size) if (ret < 0) die("failed to read"); + if (ret == 0) + die("failed to read: missing data"); size -= ret; buf += ret; @@ -213,9 +215,10 @@ struct perf_header *perf_header__read(int fd) for (i = 0; i < nr_attrs; i++) { struct perf_header_attr *attr; - off_t tmp = lseek(fd, 0, SEEK_CUR); + off_t tmp; do_read(fd, &f_attr, sizeof(f_attr)); + tmp = lseek(fd, 0, SEEK_CUR); attr = perf_header_attr__new(&f_attr.attr); diff --git a/trunk/tools/perf/util/parse-events.c b/trunk/tools/perf/util/parse-events.c index 7bdad8df22a6..044178408783 100644 --- a/trunk/tools/perf/util/parse-events.c +++ b/trunk/tools/perf/util/parse-events.c @@ -121,13 +121,29 @@ static unsigned long hw_cache_stat[C(MAX)] = { (strcmp(sys_dirent.d_name, ".")) && \ (strcmp(sys_dirent.d_name, ".."))) +static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir) +{ + char evt_path[MAXPATHLEN]; + int fd; + + snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path, + sys_dir->d_name, evt_dir->d_name); + fd = open(evt_path, O_RDONLY); + if (fd < 0) + return -EINVAL; + close(fd); + + return 0; +} + #define for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next, file, st) \ while (!readdir_r(evt_dir, &evt_dirent, &evt_next) && evt_next) \ if (snprintf(file, MAXPATHLEN, "%s/%s/%s", debugfs_path, \ sys_dirent.d_name, evt_dirent.d_name) && \ (!stat(file, &st)) && (S_ISDIR(st.st_mode)) && \ (strcmp(evt_dirent.d_name, ".")) && \ - (strcmp(evt_dirent.d_name, ".."))) + (strcmp(evt_dirent.d_name, "..")) && \ + (!tp_event_has_id(&sys_dirent, &evt_dirent))) #define MAX_EVENT_LENGTH 30 @@ -223,9 +239,15 @@ char *event_name(int counter) { u64 config = attrs[counter].config; int type = attrs[counter].type; + + return __event_name(type, config); +} + +char *__event_name(int type, u64 config) +{ static char buf[32]; - if (attrs[counter].type == PERF_TYPE_RAW) { + if (type == PERF_TYPE_RAW) { sprintf(buf, "raw 0x%llx", config); return buf; } @@ -357,6 +379,7 @@ static int parse_tracepoint_event(const char **strp, struct perf_counter_attr *attr) { const char *evt_name; + char *flags; char sys_name[MAX_EVENT_LENGTH]; char id_buf[4]; int fd; @@ -378,6 +401,15 @@ static int parse_tracepoint_event(const char **strp, strncpy(sys_name, *strp, sys_length); sys_name[sys_length] = '\0'; evt_name = evt_name + 1; + + flags = strchr(evt_name, ':'); + if (flags) { + *flags = '\0'; + flags++; + if (!strncmp(flags, "record", strlen(flags))) + attr->sample_type |= PERF_SAMPLE_RAW; + } + evt_length = strlen(evt_name); if (evt_length >= MAX_EVENT_LENGTH) return 0; diff --git a/trunk/tools/perf/util/parse-events.h b/trunk/tools/perf/util/parse-events.h index 1ea5d09b6eb1..192a962e3a0f 100644 --- a/trunk/tools/perf/util/parse-events.h +++ b/trunk/tools/perf/util/parse-events.h @@ -10,6 +10,7 @@ extern int nr_counters; extern struct perf_counter_attr attrs[MAX_COUNTERS]; extern char *event_name(int ctr); +extern char *__event_name(int type, u64 config); extern int parse_events(const struct option *opt, const char *str, int unset); diff --git a/trunk/tools/perf/util/symbol.c b/trunk/tools/perf/util/symbol.c index 16ddca202948..5c0f42e6b33b 100644 --- a/trunk/tools/perf/util/symbol.c +++ b/trunk/tools/perf/util/symbol.c @@ -7,22 +7,17 @@ #include #include -#ifndef NO_DEMANGLE -#include -#else -static inline -char *bfd_demangle(void __used *v, const char __used *c, int __used i) -{ - return NULL; -} -#endif - const char *sym_hist_filter; -#ifndef DMGL_PARAMS -#define DMGL_PARAMS (1 << 0) /* Include function args */ -#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */ -#endif +enum dso_origin { + DSO__ORIG_KERNEL = 0, + DSO__ORIG_JAVA_JIT, + DSO__ORIG_FEDORA, + DSO__ORIG_UBUNTU, + DSO__ORIG_BUILDID, + DSO__ORIG_DSO, + DSO__ORIG_NOT_FOUND, +}; static struct symbol *symbol__new(u64 start, u64 len, const char *name, unsigned int priv_size, @@ -81,6 +76,7 @@ struct dso *dso__new(const char *name, unsigned int sym_priv_size) self->sym_priv_size = sym_priv_size; self->find_symbol = dso__find_symbol; self->slen_calculated = 0; + self->origin = DSO__ORIG_NOT_FOUND; } return self; @@ -710,7 +706,7 @@ static char *dso__read_build_id(struct dso *self, int verbose) ++raw; bid += 2; } - if (verbose) + if (verbose >= 2) printf("%s(%s): %s\n", __func__, self->name, build_id); out_elf_end: elf_end(elf); @@ -720,11 +716,26 @@ static char *dso__read_build_id(struct dso *self, int verbose) return build_id; } +char dso__symtab_origin(const struct dso *self) +{ + static const char origin[] = { + [DSO__ORIG_KERNEL] = 'k', + [DSO__ORIG_JAVA_JIT] = 'j', + [DSO__ORIG_FEDORA] = 'f', + [DSO__ORIG_UBUNTU] = 'u', + [DSO__ORIG_BUILDID] = 'b', + [DSO__ORIG_DSO] = 'd', + }; + + if (self == NULL || self->origin == DSO__ORIG_NOT_FOUND) + return '!'; + return origin[self->origin]; +} + int dso__load(struct dso *self, symbol_filter_t filter, int verbose) { int size = PATH_MAX; char *name = malloc(size), *build_id = NULL; - int variant = 0; int ret = -1; int fd; @@ -733,19 +744,26 @@ int dso__load(struct dso *self, symbol_filter_t filter, int verbose) self->adjust_symbols = 0; - if (strncmp(self->name, "/tmp/perf-", 10) == 0) - return dso__load_perf_map(self, filter, verbose); + if (strncmp(self->name, "/tmp/perf-", 10) == 0) { + ret = dso__load_perf_map(self, filter, verbose); + self->origin = ret > 0 ? DSO__ORIG_JAVA_JIT : + DSO__ORIG_NOT_FOUND; + return ret; + } + + self->origin = DSO__ORIG_FEDORA - 1; more: do { - switch (variant) { - case 0: /* Fedora */ + self->origin++; + switch (self->origin) { + case DSO__ORIG_FEDORA: snprintf(name, size, "/usr/lib/debug%s.debug", self->name); break; - case 1: /* Ubuntu */ + case DSO__ORIG_UBUNTU: snprintf(name, size, "/usr/lib/debug%s", self->name); break; - case 2: + case DSO__ORIG_BUILDID: build_id = dso__read_build_id(self, verbose); if (build_id != NULL) { snprintf(name, size, @@ -754,16 +772,15 @@ int dso__load(struct dso *self, symbol_filter_t filter, int verbose) free(build_id); break; } - variant++; + self->origin++; /* Fall thru */ - case 3: /* Sane people */ + case DSO__ORIG_DSO: snprintf(name, size, "%s", self->name); break; default: goto out; } - variant++; fd = open(name, O_RDONLY); } while (fd < 0); @@ -784,6 +801,8 @@ int dso__load(struct dso *self, symbol_filter_t filter, int verbose) } out: free(name); + if (ret < 0 && strstr(self->name, " (deleted)") != NULL) + return 0; return ret; } @@ -899,6 +918,9 @@ int dso__load_kernel(struct dso *self, const char *vmlinux, if (err <= 0) err = dso__load_kallsyms(self, filter, verbose); + if (err > 0) + self->origin = DSO__ORIG_KERNEL; + return err; } diff --git a/trunk/tools/perf/util/symbol.h b/trunk/tools/perf/util/symbol.h index 2f92b21c712d..b53bf0125c1b 100644 --- a/trunk/tools/perf/util/symbol.h +++ b/trunk/tools/perf/util/symbol.h @@ -7,6 +7,30 @@ #include #include "module.h" +#ifdef HAVE_CPLUS_DEMANGLE +extern char *cplus_demangle(const char *, int); + +static inline char *bfd_demangle(void __used *v, const char *c, int i) +{ + return cplus_demangle(c, i); +} +#else +#ifdef NO_DEMANGLE +static inline char *bfd_demangle(void __used *v, const char __used *c, + int __used i) +{ + return NULL; +} +#else +#include +#endif +#endif + +#ifndef DMGL_PARAMS +#define DMGL_PARAMS (1 << 0) /* Include function args */ +#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */ +#endif + struct symbol { struct rb_node rb_node; u64 start; @@ -26,6 +50,7 @@ struct dso { unsigned int sym_priv_size; unsigned char adjust_symbols; unsigned char slen_calculated; + unsigned char origin; char name[0]; }; @@ -49,6 +74,7 @@ int dso__load_modules(struct dso *self, symbol_filter_t filter, int verbose); int dso__load(struct dso *self, symbol_filter_t filter, int verbose); size_t dso__fprintf(struct dso *self, FILE *fp); +char dso__symtab_origin(const struct dso *self); void symbol__init(void); #endif /* _PERF_SYMBOL_ */ diff --git a/trunk/virt/kvm/ioapic.c b/trunk/virt/kvm/ioapic.c index 1eddae94bab3..1150c6d5c7b8 100644 --- a/trunk/virt/kvm/ioapic.c +++ b/trunk/virt/kvm/ioapic.c @@ -95,8 +95,6 @@ static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG) pent->fields.remote_irr = 1; } - if (!pent->fields.trig_mode) - ioapic->irr &= ~(1 << idx); return injected; } @@ -136,7 +134,8 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) mask_after = ioapic->redirtbl[index].fields.mask; if (mask_before != mask_after) kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after); - if (ioapic->irr & (1 << index)) + if (ioapic->redirtbl[index].fields.trig_mode == IOAPIC_LEVEL_TRIG + && ioapic->irr & (1 << index)) ioapic_service(ioapic, index); break; } @@ -184,9 +183,10 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) if (!level) ioapic->irr &= ~mask; else { + int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); ioapic->irr |= mask; - if ((!entry.fields.trig_mode && old_irr != ioapic->irr) - || !entry.fields.remote_irr) + if ((edge && old_irr != ioapic->irr) || + (!edge && !entry.fields.remote_irr)) ret = ioapic_service(ioapic, irq); } } diff --git a/trunk/virt/kvm/irq_comm.c b/trunk/virt/kvm/irq_comm.c index a8bd466d00cc..ddc17f0e2f35 100644 --- a/trunk/virt/kvm/irq_comm.c +++ b/trunk/virt/kvm/irq_comm.c @@ -160,7 +160,8 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) unsigned gsi = pin; list_for_each_entry(e, &kvm->irq_routing, link) - if (e->irqchip.irqchip == irqchip && + if (e->type == KVM_IRQ_ROUTING_IRQCHIP && + e->irqchip.irqchip == irqchip && e->irqchip.pin == pin) { gsi = e->gsi; break; @@ -259,6 +260,7 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, int delta; e->gsi = ue->gsi; + e->type = ue->type; switch (ue->type) { case KVM_IRQ_ROUTING_IRQCHIP: delta = 0;