diff --git a/[refs] b/[refs]
index 8b8500103a41..267f75619439 100644
--- a/[refs]
+++ b/[refs]
@@ -1,2 +1,2 @@
 ---
-refs/heads/master: 026f149ca38adf96118d3b5fdba6977797861ce6
+refs/heads/master: 32068f6527b8f1822a30671dedaf59c567325026
diff --git a/trunk/Documentation/ABI/testing/sysfs-bus-event_source-devices-events b/trunk/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
deleted file mode 100644
index 0adeb524c0d4..000000000000
--- a/trunk/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
+++ /dev/null
@@ -1,62 +0,0 @@
-What:		/sys/devices/cpu/events/
-		/sys/devices/cpu/events/branch-misses
-		/sys/devices/cpu/events/cache-references
-		/sys/devices/cpu/events/cache-misses
-		/sys/devices/cpu/events/stalled-cycles-frontend
-		/sys/devices/cpu/events/branch-instructions
-		/sys/devices/cpu/events/stalled-cycles-backend
-		/sys/devices/cpu/events/instructions
-		/sys/devices/cpu/events/cpu-cycles
-
-Date:		2013/01/08
-
-Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
-
-Description:	Generic performance monitoring events
-
-		A collection of performance monitoring events that may be
-		supported by many/most CPUs. These events can be monitored
-		using the 'perf(1)' tool.
-
-		The contents of each file would look like:
-
-			event=0xNNNN
-
-		where 'N' is a hex digit and the number '0xNNNN' shows the
-		"raw code" for the perf event identified by the file's
-		"basename".
-
-
-What: 		/sys/devices/cpu/events/PM_LD_MISS_L1
-		/sys/devices/cpu/events/PM_LD_REF_L1
-		/sys/devices/cpu/events/PM_CYC
-		/sys/devices/cpu/events/PM_BRU_FIN
-		/sys/devices/cpu/events/PM_GCT_NOSLOT_CYC
-		/sys/devices/cpu/events/PM_BRU_MPRED
-		/sys/devices/cpu/events/PM_INST_CMPL
-		/sys/devices/cpu/events/PM_CMPLU_STALL
-
-Date:		2013/01/08
-
-Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
-		Linux Powerpc mailing list <linuxppc-dev@ozlabs.org>
-
-Description:	POWER-systems specific performance monitoring events
-
-		A collection of performance monitoring events that may be
-		supported by the POWER CPU. These events can be monitored
-		using the 'perf(1)' tool.
-
-		These events may not be supported by other CPUs.
-
-		The contents of each file would look like:
-
-			event=0xNNNN
-
-		where 'N' is a hex digit and the number '0xNNNN' shows the
-		"raw code" for the perf event identified by the file's
-		"basename".
-
-		Further, multiple terms like 'event=0xNNNN' can be specified
-		and separated with comma. All available terms are defined in
-		the /sys/bus/event_source/devices/<dev>/format file.
diff --git a/trunk/Documentation/PCI/MSI-HOWTO.txt b/trunk/Documentation/PCI/MSI-HOWTO.txt
index a09178086c30..53e6fca146d7 100644
--- a/trunk/Documentation/PCI/MSI-HOWTO.txt
+++ b/trunk/Documentation/PCI/MSI-HOWTO.txt
@@ -127,42 +127,15 @@ on the number of vectors that can be allocated; pci_enable_msi_block()
 returns as soon as it finds any constraint that doesn't allow the
 call to succeed.
 
-4.2.3 pci_enable_msi_block_auto
-
-int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *count)
-
-This variation on pci_enable_msi() call allows a device driver to request
-the maximum possible number of MSIs.  The MSI specification only allows
-interrupts to be allocated in powers of two, up to a maximum of 2^5 (32).
-
-If this function returns a positive number, it indicates that it has
-succeeded and the returned value is the number of allocated interrupts. In
-this case, the function enables MSI on this device and updates dev->irq to
-be the lowest of the new interrupts assigned to it.  The other interrupts
-assigned to the device are in the range dev->irq to dev->irq + returned
-value - 1.
-
-If this function returns a negative number, it indicates an error and
-the driver should not attempt to request any more MSI interrupts for
-this device.
-
-If the device driver needs to know the number of interrupts the device
-supports it can pass the pointer count where that number is stored. The
-device driver must decide what action to take if pci_enable_msi_block_auto()
-succeeds, but returns a value less than the number of interrupts supported.
-If the device driver does not need to know the number of interrupts
-supported, it can set the pointer count to NULL.
-
-4.2.4 pci_disable_msi
+4.2.3 pci_disable_msi
 
 void pci_disable_msi(struct pci_dev *dev)
 
 This function should be used to undo the effect of pci_enable_msi() or
-pci_enable_msi_block() or pci_enable_msi_block_auto().  Calling it restores
-dev->irq to the pin-based interrupt number and frees the previously
-allocated message signaled interrupt(s).  The interrupt may subsequently be
-assigned to another device, so drivers should not cache the value of
-dev->irq.
+pci_enable_msi_block().  Calling it restores dev->irq to the pin-based
+interrupt number and frees the previously allocated message signaled
+interrupt(s).  The interrupt may subsequently be assigned to another
+device, so drivers should not cache the value of dev->irq.
 
 Before calling this function, a device driver must always call free_irq()
 on any interrupt for which it previously called request_irq().
diff --git a/trunk/Documentation/atomic_ops.txt b/trunk/Documentation/atomic_ops.txt
index d9ca5be9b471..27f2b21a9d5c 100644
--- a/trunk/Documentation/atomic_ops.txt
+++ b/trunk/Documentation/atomic_ops.txt
@@ -253,8 +253,6 @@ This performs an atomic exchange operation on the atomic variable v, setting
 the given new value.  It returns the old value that the atomic variable v had
 just before the operation.
 
-atomic_xchg requires explicit memory barriers around the operation.
-
 	int atomic_cmpxchg(atomic_t *v, int old, int new);
 
 This performs an atomic compare exchange operation on the atomic value v,
diff --git a/trunk/Documentation/memory-barriers.txt b/trunk/Documentation/memory-barriers.txt
index fa5d8a9ae205..3c4e1b3b80a1 100644
--- a/trunk/Documentation/memory-barriers.txt
+++ b/trunk/Documentation/memory-barriers.txt
@@ -1685,7 +1685,6 @@ explicit lock operations, described later).  These include:
 
 	xchg();
 	cmpxchg();
-	atomic_xchg();
 	atomic_cmpxchg();
 	atomic_inc_return();
 	atomic_dec_return();
diff --git a/trunk/Documentation/trace/ftrace.txt b/trunk/Documentation/trace/ftrace.txt
index 53d6a3c51d87..6f51fed45f2d 100644
--- a/trunk/Documentation/trace/ftrace.txt
+++ b/trunk/Documentation/trace/ftrace.txt
@@ -1842,89 +1842,6 @@ an error.
  # cat buffer_size_kb
 85
 
-Snapshot
---------
-CONFIG_TRACER_SNAPSHOT makes a generic snapshot feature
-available to all non latency tracers. (Latency tracers which
-record max latency, such as "irqsoff" or "wakeup", can't use
-this feature, since those are already using the snapshot
-mechanism internally.)
-
-Snapshot preserves a current trace buffer at a particular point
-in time without stopping tracing. Ftrace swaps the current
-buffer with a spare buffer, and tracing continues in the new
-current (=previous spare) buffer.
-
-The following debugfs files in "tracing" are related to this
-feature:
-
-  snapshot:
-
-	This is used to take a snapshot and to read the output
-	of the snapshot. Echo 1 into this file to allocate a
-	spare buffer and to take a snapshot (swap), then read
-	the snapshot from this file in the same format as
-	"trace" (described above in the section "The File
-	System"). Both reads snapshot and tracing are executable
-	in parallel. When the spare buffer is allocated, echoing
-	0 frees it, and echoing else (positive) values clear the
-	snapshot contents.
-	More details are shown in the table below.
-
-	status\input  |     0      |     1      |    else    |
-	--------------+------------+------------+------------+
-	not allocated |(do nothing)| alloc+swap |   EINVAL   |
-	--------------+------------+------------+------------+
-	allocated     |    free    |    swap    |   clear    |
-	--------------+------------+------------+------------+
-
-Here is an example of using the snapshot feature.
-
- # echo 1 > events/sched/enable
- # echo 1 > snapshot
- # cat snapshot
-# tracer: nop
-#
-# entries-in-buffer/entries-written: 71/71   #P:8
-#
-#                              _-----=> irqs-off
-#                             / _----=> need-resched
-#                            | / _---=> hardirq/softirq
-#                            || / _--=> preempt-depth
-#                            ||| /     delay
-#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
-#              | |       |   ||||       |         |
-          <idle>-0     [005] d...  2440.603828: sched_switch: prev_comm=swapper/5 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=snapshot-test-2 next_pid=2242 next_prio=120
-           sleep-2242  [005] d...  2440.603846: sched_switch: prev_comm=snapshot-test-2 prev_pid=2242 prev_prio=120 prev_state=R ==> next_comm=kworker/5:1 next_pid=60 next_prio=120
-[...]
-          <idle>-0     [002] d...  2440.707230: sched_switch: prev_comm=swapper/2 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=snapshot-test-2 next_pid=2229 next_prio=120
-
- # cat trace
-# tracer: nop
-#
-# entries-in-buffer/entries-written: 77/77   #P:8
-#
-#                              _-----=> irqs-off
-#                             / _----=> need-resched
-#                            | / _---=> hardirq/softirq
-#                            || / _--=> preempt-depth
-#                            ||| /     delay
-#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
-#              | |       |   ||||       |         |
-          <idle>-0     [007] d...  2440.707395: sched_switch: prev_comm=swapper/7 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=snapshot-test-2 next_pid=2243 next_prio=120
- snapshot-test-2-2229  [002] d...  2440.707438: sched_switch: prev_comm=snapshot-test-2 prev_pid=2229 prev_prio=120 prev_state=S ==> next_comm=swapper/2 next_pid=0 next_prio=120
-[...]
-
-
-If you try to use this snapshot feature when current tracer is
-one of the latency tracers, you will get the following results.
-
- # echo wakeup > current_tracer
- # echo 1 > snapshot
-bash: echo: write error: Device or resource busy
- # cat snapshot
-cat: snapshot: Device or resource busy
-
 -----------
 
 More details can be found in the source code, in the
diff --git a/trunk/Documentation/x86/boot.txt b/trunk/Documentation/x86/boot.txt
index b443f1de0e5a..e540fd67f767 100644
--- a/trunk/Documentation/x86/boot.txt
+++ b/trunk/Documentation/x86/boot.txt
@@ -390,7 +390,6 @@ Protocol:	2.00+
 	F  Special		(0xFF = undefined)
        10  Reserved
        11  Minimal Linux Bootloader <http://sebastian-plotz.blogspot.de>
-       12  OVMF UEFI virtualization stack
 
   Please contact <hpa@zytor.com> if you need a bootloader ID
   value assigned.
diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS
index 168590fc0d5d..35a56bcd5e75 100644
--- a/trunk/MAINTAINERS
+++ b/trunk/MAINTAINERS
@@ -1303,7 +1303,7 @@ F:	include/linux/dmaengine.h
 F:	include/linux/async_tx.h
 
 AT24 EEPROM DRIVER
-M:	Wolfram Sang <wsa@the-dreams.de>
+M:	Wolfram Sang <w.sang@pengutronix.de>
 L:	linux-i2c@vger.kernel.org
 S:	Maintained
 F:	drivers/misc/eeprom/at24.c
@@ -3757,11 +3757,12 @@ S:	Maintained
 F:	drivers/i2c/i2c-stub.c
 
 I2C SUBSYSTEM
-M:	Wolfram Sang <wsa@the-dreams.de>
+M:	Wolfram Sang <w.sang@pengutronix.de>
 M:	"Ben Dooks (embedded platforms)" <ben-linux@fluff.org>
 L:	linux-i2c@vger.kernel.org
 W:	http://i2c.wiki.kernel.org/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git
+T:	quilt kernel.org/pub/linux/kernel/people/jdelvare/linux-2.6/jdelvare-i2c/
+T:	git git://git.pengutronix.de/git/wsa/linux.git
 S:	Maintained
 F:	Documentation/i2c/
 F:	drivers/i2c/
@@ -5777,6 +5778,15 @@ L:	linux-i2c@vger.kernel.org
 S:	Maintained
 F:	drivers/i2c/muxes/i2c-mux-pca9541.c
 
+PCA9564/PCA9665 I2C BUS DRIVER
+M:	Wolfram Sang <w.sang@pengutronix.de>
+L:	linux-i2c@vger.kernel.org
+S:	Maintained
+F:	drivers/i2c/algos/i2c-algo-pca.c
+F:	drivers/i2c/busses/i2c-pca-*
+F:	include/linux/i2c-algo-pca.h
+F:	include/linux/i2c-pca-platform.h
+
 PCDP - PRIMARY CONSOLE AND DEBUG PORT
 M:	Khalid Aziz <khalid@gonehiking.org>
 S:	Maintained
@@ -6588,7 +6598,7 @@ F:	drivers/dma/dw_dmac_regs.h
 F:	drivers/dma/dw_dmac.c
 
 TIMEKEEPING, NTP
-M:	John Stultz <john.stultz@linaro.org>
+M:	John Stultz <johnstul@us.ibm.com>
 M:	Thomas Gleixner <tglx@linutronix.de>
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
 S:	Supported
diff --git a/trunk/Makefile b/trunk/Makefile
index 6fccf6531770..08ef9bdb80c7 100644
--- a/trunk/Makefile
+++ b/trunk/Makefile
@@ -1,7 +1,7 @@
 VERSION = 3
 PATCHLEVEL = 8
 SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc7
 NAME = Unicycling Gorilla
 
 # *DOCUMENTATION*
@@ -165,8 +165,7 @@ export srctree objtree VPATH
 # then ARCH is assigned, getting whatever value it gets normally, and 
 # SUBARCH is subsequently ignored.
 
-SUBARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \
-				  -e s/sun4u/sparc64/ \
+SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \
 				  -e s/arm.*/arm/ -e s/sa110/arm/ \
 				  -e s/s390x/s390/ -e s/parisc64/parisc/ \
 				  -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \
diff --git a/trunk/arch/Kconfig b/trunk/arch/Kconfig
index 97fb7d0365d1..7f8f281f2585 100644
--- a/trunk/arch/Kconfig
+++ b/trunk/arch/Kconfig
@@ -76,15 +76,6 @@ config OPTPROBES
 	depends on KPROBES && HAVE_OPTPROBES
 	depends on !PREEMPT
 
-config KPROBES_ON_FTRACE
-	def_bool y
-	depends on KPROBES && HAVE_KPROBES_ON_FTRACE
-	depends on DYNAMIC_FTRACE_WITH_REGS
-	help
-	 If function tracer is enabled and the arch supports full
-	 passing of pt_regs to function tracing, then kprobes can
-	 optimize on top of function tracing.
-
 config UPROBES
 	bool "Transparent user-space probes (EXPERIMENTAL)"
 	depends on UPROBE_EVENT && PERF_EVENTS
@@ -167,9 +158,6 @@ config HAVE_KRETPROBES
 config HAVE_OPTPROBES
 	bool
 
-config HAVE_KPROBES_ON_FTRACE
-	bool
-
 config HAVE_NMI_WATCHDOG
 	bool
 #
diff --git a/trunk/arch/alpha/Kconfig b/trunk/arch/alpha/Kconfig
index 9b504af2e966..9d5904cc7712 100644
--- a/trunk/arch/alpha/Kconfig
+++ b/trunk/arch/alpha/Kconfig
@@ -5,6 +5,7 @@ config ALPHA
 	select HAVE_IDE
 	select HAVE_OPROFILE
 	select HAVE_SYSCALL_WRAPPERS
+	select HAVE_IRQ_WORK
 	select HAVE_PCSPKR_PLATFORM
 	select HAVE_PERF_EVENTS
 	select HAVE_DMA_ATTRS
diff --git a/trunk/arch/alpha/kernel/osf_sys.c b/trunk/arch/alpha/kernel/osf_sys.c
index dbc1760f418b..14db93e4c8a8 100644
--- a/trunk/arch/alpha/kernel/osf_sys.c
+++ b/trunk/arch/alpha/kernel/osf_sys.c
@@ -1139,7 +1139,6 @@ struct rusage32 {
 SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru)
 {
 	struct rusage32 r;
-	cputime_t utime, stime;
 
 	if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
 		return -EINVAL;
@@ -1147,9 +1146,8 @@ SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru)
 	memset(&r, 0, sizeof(r));
 	switch (who) {
 	case RUSAGE_SELF:
-		task_cputime(current, &utime, &stime);
-		jiffies_to_timeval32(utime, &r.ru_utime);
-		jiffies_to_timeval32(stime, &r.ru_stime);
+		jiffies_to_timeval32(current->utime, &r.ru_utime);
+		jiffies_to_timeval32(current->stime, &r.ru_stime);
 		r.ru_minflt = current->min_flt;
 		r.ru_majflt = current->maj_flt;
 		break;
diff --git a/trunk/arch/arm/Kconfig b/trunk/arch/arm/Kconfig
index 9bbe760f2352..67874b82a4ed 100644
--- a/trunk/arch/arm/Kconfig
+++ b/trunk/arch/arm/Kconfig
@@ -36,6 +36,7 @@ config ARM
 	select HAVE_GENERIC_HARDIRQS
 	select HAVE_HW_BREAKPOINT if (PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7))
 	select HAVE_IDE if PCI || ISA || PCMCIA
+	select HAVE_IRQ_WORK
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_LZMA
 	select HAVE_KERNEL_LZO
diff --git a/trunk/arch/arm/include/asm/smp_scu.h b/trunk/arch/arm/include/asm/smp_scu.h
index 86dff32a0737..4eb6d005ffaa 100644
--- a/trunk/arch/arm/include/asm/smp_scu.h
+++ b/trunk/arch/arm/include/asm/smp_scu.h
@@ -7,14 +7,8 @@
 
 #ifndef __ASSEMBLER__
 unsigned int scu_get_core_count(void __iomem *);
+void scu_enable(void __iomem *);
 int scu_power_mode(void __iomem *, unsigned int);
-
-#ifdef CONFIG_SMP
-void scu_enable(void __iomem *scu_base);
-#else
-static inline void scu_enable(void __iomem *scu_base) {}
-#endif
-
 #endif
 
 #endif
diff --git a/trunk/arch/arm/kernel/smp_scu.c b/trunk/arch/arm/kernel/smp_scu.c
index 45eac87ed66a..b9f015e843d8 100644
--- a/trunk/arch/arm/kernel/smp_scu.c
+++ b/trunk/arch/arm/kernel/smp_scu.c
@@ -75,7 +75,7 @@ void scu_enable(void __iomem *scu_base)
 int scu_power_mode(void __iomem *scu_base, unsigned int mode)
 {
 	unsigned int val;
-	int cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(smp_processor_id()), 0);
+	int cpu = cpu_logical_map(smp_processor_id());
 
 	if (mode > 3 || mode == 1 || cpu > 3)
 		return -EINVAL;
diff --git a/trunk/arch/arm/mach-highbank/highbank.c b/trunk/arch/arm/mach-highbank/highbank.c
index e6c061282939..981dc1e1da51 100644
--- a/trunk/arch/arm/mach-highbank/highbank.c
+++ b/trunk/arch/arm/mach-highbank/highbank.c
@@ -28,7 +28,6 @@
 
 #include <asm/arch_timer.h>
 #include <asm/cacheflush.h>
-#include <asm/cputype.h>
 #include <asm/smp_plat.h>
 #include <asm/smp_twd.h>
 #include <asm/hardware/arm_timer.h>
@@ -60,7 +59,7 @@ static void __init highbank_scu_map_io(void)
 
 void highbank_set_cpu_jump(int cpu, void *jump_addr)
 {
-	cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 0);
+	cpu = cpu_logical_map(cpu);
 	writel(virt_to_phys(jump_addr), HB_JUMP_TABLE_VIRT(cpu));
 	__cpuc_flush_dcache_area(HB_JUMP_TABLE_VIRT(cpu), 16);
 	outer_clean_range(HB_JUMP_TABLE_PHYS(cpu),
diff --git a/trunk/arch/arm/mach-highbank/sysregs.h b/trunk/arch/arm/mach-highbank/sysregs.h
index 5995df7f2622..70af9d13fcef 100644
--- a/trunk/arch/arm/mach-highbank/sysregs.h
+++ b/trunk/arch/arm/mach-highbank/sysregs.h
@@ -37,7 +37,7 @@ extern void __iomem *sregs_base;
 
 static inline void highbank_set_core_pwr(void)
 {
-	int cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(smp_processor_id()), 0);
+	int cpu = cpu_logical_map(smp_processor_id());
 	if (scu_base_addr)
 		scu_power_mode(scu_base_addr, SCU_PM_POWEROFF);
 	else
@@ -46,7 +46,7 @@ static inline void highbank_set_core_pwr(void)
 
 static inline void highbank_clear_core_pwr(void)
 {
-	int cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(smp_processor_id()), 0);
+	int cpu = cpu_logical_map(smp_processor_id());
 	if (scu_base_addr)
 		scu_power_mode(scu_base_addr, SCU_PM_NORMAL);
 	else
diff --git a/trunk/arch/arm64/Kconfig b/trunk/arch/arm64/Kconfig
index 75e915b72471..f8f362aafee9 100644
--- a/trunk/arch/arm64/Kconfig
+++ b/trunk/arch/arm64/Kconfig
@@ -21,6 +21,7 @@ config ARM64
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_GENERIC_HARDIRQS
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS
+	select HAVE_IRQ_WORK
 	select HAVE_MEMBLOCK
 	select HAVE_PERF_EVENTS
 	select IRQ_DOMAIN
diff --git a/trunk/arch/blackfin/Kconfig b/trunk/arch/blackfin/Kconfig
index 67e4aaad78f5..b6f3ad5441c5 100644
--- a/trunk/arch/blackfin/Kconfig
+++ b/trunk/arch/blackfin/Kconfig
@@ -24,6 +24,7 @@ config BLACKFIN
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	select HAVE_IDE
+	select HAVE_IRQ_WORK
 	select HAVE_KERNEL_GZIP if RAMKERNEL
 	select HAVE_KERNEL_BZIP2 if RAMKERNEL
 	select HAVE_KERNEL_LZMA if RAMKERNEL
@@ -37,6 +38,7 @@ config BLACKFIN
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_ATOMIC64
 	select GENERIC_IRQ_PROBE
+	select IRQ_PER_CPU if SMP
 	select USE_GENERIC_SMP_HELPERS if SMP
 	select HAVE_NMI_WATCHDOG if NMI_WATCHDOG
 	select GENERIC_SMP_IDLE_THREAD
diff --git a/trunk/arch/frv/Kconfig b/trunk/arch/frv/Kconfig
index 17df48fc8f44..9d262645f667 100644
--- a/trunk/arch/frv/Kconfig
+++ b/trunk/arch/frv/Kconfig
@@ -3,6 +3,7 @@ config FRV
 	default y
 	select HAVE_IDE
 	select HAVE_ARCH_TRACEHOOK
+	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select HAVE_UID16
 	select HAVE_GENERIC_HARDIRQS
diff --git a/trunk/arch/hexagon/Kconfig b/trunk/arch/hexagon/Kconfig
index e4decc6b8947..0744f7d7b1fd 100644
--- a/trunk/arch/hexagon/Kconfig
+++ b/trunk/arch/hexagon/Kconfig
@@ -12,7 +12,9 @@ config HEXAGON
 	# select ARCH_WANT_OPTIONAL_GPIOLIB
 	# select ARCH_REQUIRE_GPIOLIB
 	# select HAVE_CLK
+	# select IRQ_PER_CPU
 	# select GENERIC_PENDING_IRQ if SMP
+	select HAVE_IRQ_WORK
 	select GENERIC_ATOMIC64
 	select HAVE_PERF_EVENTS
 	select HAVE_GENERIC_HARDIRQS
diff --git a/trunk/arch/ia64/Kconfig b/trunk/arch/ia64/Kconfig
index 00c2e88f7755..3279646120e3 100644
--- a/trunk/arch/ia64/Kconfig
+++ b/trunk/arch/ia64/Kconfig
@@ -29,6 +29,7 @@ config IA64
 	select ARCH_DISCARD_MEMBLOCK
 	select GENERIC_IRQ_PROBE
 	select GENERIC_PENDING_IRQ if SMP
+	select IRQ_PER_CPU
 	select GENERIC_IRQ_SHOW
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
diff --git a/trunk/arch/ia64/include/asm/cputime.h b/trunk/arch/ia64/include/asm/cputime.h
index e2d3f5baf265..7fcf7f08ab06 100644
--- a/trunk/arch/ia64/include/asm/cputime.h
+++ b/trunk/arch/ia64/include/asm/cputime.h
@@ -11,19 +11,99 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  *
- * If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in nsec.
+ * If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in nsec.
  * Otherwise we measure cpu time in jiffies using the generic definitions.
  */
 
 #ifndef __IA64_CPUTIME_H
 #define __IA64_CPUTIME_H
 
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-# include <asm-generic/cputime.h>
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#include <asm-generic/cputime.h>
 #else
-# include <asm/processor.h>
-# include <asm-generic/cputime_nsecs.h>
+
+#include <linux/time.h>
+#include <linux/jiffies.h>
+#include <asm/processor.h>
+
+typedef u64 __nocast cputime_t;
+typedef u64 __nocast cputime64_t;
+
+#define cputime_one_jiffy		jiffies_to_cputime(1)
+
+/*
+ * Convert cputime <-> jiffies (HZ)
+ */
+#define cputime_to_jiffies(__ct)	\
+	((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
+#define jiffies_to_cputime(__jif)	\
+	(__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ))
+#define cputime64_to_jiffies64(__ct)	\
+	((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
+#define jiffies64_to_cputime64(__jif)	\
+	(__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ))
+
+/*
+ * Convert cputime <-> microseconds
+ */
+#define cputime_to_usecs(__ct)		\
+	((__force u64)(__ct) / NSEC_PER_USEC)
+#define usecs_to_cputime(__usecs)	\
+	(__force cputime_t)((__usecs) * NSEC_PER_USEC)
+#define usecs_to_cputime64(__usecs)	\
+	(__force cputime64_t)((__usecs) * NSEC_PER_USEC)
+
+/*
+ * Convert cputime <-> seconds
+ */
+#define cputime_to_secs(__ct)		\
+	((__force u64)(__ct) / NSEC_PER_SEC)
+#define secs_to_cputime(__secs)		\
+	(__force cputime_t)((__secs) * NSEC_PER_SEC)
+
+/*
+ * Convert cputime <-> timespec (nsec)
+ */
+static inline cputime_t timespec_to_cputime(const struct timespec *val)
+{
+	u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec;
+	return (__force cputime_t) ret;
+}
+static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
+{
+	val->tv_sec  = (__force u64) ct / NSEC_PER_SEC;
+	val->tv_nsec = (__force u64) ct % NSEC_PER_SEC;
+}
+
+/*
+ * Convert cputime <-> timeval (msec)
+ */
+static inline cputime_t timeval_to_cputime(struct timeval *val)
+{
+	u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC;
+	return (__force cputime_t) ret;
+}
+static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
+{
+	val->tv_sec = (__force u64) ct / NSEC_PER_SEC;
+	val->tv_usec = ((__force u64) ct % NSEC_PER_SEC) / NSEC_PER_USEC;
+}
+
+/*
+ * Convert cputime <-> clock (USER_HZ)
+ */
+#define cputime_to_clock_t(__ct)	\
+	((__force u64)(__ct) / (NSEC_PER_SEC / USER_HZ))
+#define clock_t_to_cputime(__x)		\
+	(__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ))
+
+/*
+ * Convert cputime64 to clock.
+ */
+#define cputime64_to_clock_t(__ct)	\
+	cputime_to_clock_t((__force cputime_t)__ct)
+
 extern void arch_vtime_task_switch(struct task_struct *tsk);
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 #endif /* __IA64_CPUTIME_H */
diff --git a/trunk/arch/ia64/include/asm/thread_info.h b/trunk/arch/ia64/include/asm/thread_info.h
index 020d655ed082..ff2ae4136584 100644
--- a/trunk/arch/ia64/include/asm/thread_info.h
+++ b/trunk/arch/ia64/include/asm/thread_info.h
@@ -31,7 +31,7 @@ struct thread_info {
 	mm_segment_t addr_limit;	/* user-level address space limit */
 	int preempt_count;		/* 0=premptable, <0=BUG; will also serve as bh-counter */
 	struct restart_block restart_block;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 	__u64 ac_stamp;
 	__u64 ac_leave;
 	__u64 ac_stime;
@@ -69,7 +69,7 @@ struct thread_info {
 #define task_stack_page(tsk)	((void *)(tsk))
 
 #define __HAVE_THREAD_FUNCTIONS
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 #define setup_thread_stack(p, org)			\
 	*task_thread_info(p) = *task_thread_info(org);	\
 	task_thread_info(p)->ac_stime = 0;		\
diff --git a/trunk/arch/ia64/include/asm/xen/minstate.h b/trunk/arch/ia64/include/asm/xen/minstate.h
index 00cf03e0cb82..c57fa910f2c9 100644
--- a/trunk/arch/ia64/include/asm/xen/minstate.h
+++ b/trunk/arch/ia64/include/asm/xen/minstate.h
@@ -1,5 +1,5 @@
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 /* read ar.itc in advance, and use it before leaving bank 0 */
 #define XEN_ACCOUNT_GET_STAMP		\
 	MOV_FROM_ITC(pUStk, p6, r20, r2);
diff --git a/trunk/arch/ia64/kernel/asm-offsets.c b/trunk/arch/ia64/kernel/asm-offsets.c
index 46c9e3007315..a48bd9a9927b 100644
--- a/trunk/arch/ia64/kernel/asm-offsets.c
+++ b/trunk/arch/ia64/kernel/asm-offsets.c
@@ -41,7 +41,7 @@ void foo(void)
 	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
 	DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
 	DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 	DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp));
 	DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave));
 	DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime));
diff --git a/trunk/arch/ia64/kernel/entry.S b/trunk/arch/ia64/kernel/entry.S
index 7a53530f22c2..6bfd8429ee0f 100644
--- a/trunk/arch/ia64/kernel/entry.S
+++ b/trunk/arch/ia64/kernel/entry.S
@@ -724,7 +724,7 @@ GLOBAL_ENTRY(__paravirt_leave_syscall)
 #endif
 .global __paravirt_work_processed_syscall;
 __paravirt_work_processed_syscall:
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 	adds r2=PT(LOADRS)+16,r12
 	MOV_FROM_ITC(pUStk, p9, r22, r19)	// fetch time at leave
 	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
@@ -762,7 +762,7 @@ __paravirt_work_processed_syscall:
 
 	ld8 r29=[r2],16		// M0|1 load cr.ipsr
 	ld8 r28=[r3],16		// M0|1 load cr.iip
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 (pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13
 	;;
 	ld8 r30=[r2],16		// M0|1 load cr.ifs
@@ -793,7 +793,7 @@ __paravirt_work_processed_syscall:
 	ld8.fill r1=[r3],16			// M0|1 load r1
 (pUStk) mov r17=1				// A
 	;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 (pUStk) st1 [r15]=r17				// M2|3
 #else
 (pUStk) st1 [r14]=r17				// M2|3
@@ -813,7 +813,7 @@ __paravirt_work_processed_syscall:
 	shr.u r18=r19,16		// I0|1 get byte size of existing "dirty" partition
 	COVER				// B    add current frame into dirty partition & set cr.ifs
 	;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 	mov r19=ar.bsp			// M2   get new backing store pointer
 	st8 [r14]=r22			// M	save time at leave
 	mov f10=f0			// F    clear f10
@@ -948,7 +948,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
 	adds r16=PT(CR_IPSR)+16,r12
 	adds r17=PT(CR_IIP)+16,r12
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 	.pred.rel.mutex pUStk,pKStk
 	MOV_FROM_PSR(pKStk, r22, r29)	// M2 read PSR now that interrupts are disabled
 	MOV_FROM_ITC(pUStk, p9, r22, r29)	// M  fetch time at leave
@@ -981,7 +981,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
 	;;
 	ld8.fill r12=[r16],16
 	ld8.fill r13=[r17],16
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 (pUStk)	adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18
 #else
 (pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
@@ -989,7 +989,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
 	;;
 	ld8 r20=[r16],16	// ar.fpsr
 	ld8.fill r15=[r17],16
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 (pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18	// deferred
 #endif
 	;;
@@ -997,7 +997,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
 	ld8.fill r2=[r17]
 (pUStk)	mov r17=1
 	;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 	//  mmi_ :  ld8 st1 shr;;         mmi_ : st8 st1 shr;;
 	//  mib  :  mov add br        ->  mib  : ld8 add br
 	//  bbb_ :  br  nop cover;;       mbb_ : mov br  cover;;
diff --git a/trunk/arch/ia64/kernel/fsys.S b/trunk/arch/ia64/kernel/fsys.S
index c4cd45d97749..e662f178b990 100644
--- a/trunk/arch/ia64/kernel/fsys.S
+++ b/trunk/arch/ia64/kernel/fsys.S
@@ -529,7 +529,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down)
 	nop.i 0
 	;;
 	mov ar.rsc=0				// M2   set enforced lazy mode, pl 0, LE, loadrs=0
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 	MOV_FROM_ITC(p0, p6, r30, r23)		// M    get cycle for accounting
 #else
 	nop.m 0
@@ -555,7 +555,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down)
 	cmp.ne pKStk,pUStk=r0,r0		// A    set pKStk <- 0, pUStk <- 1
 	br.call.sptk.many b7=ia64_syscall_setup	// B
 	;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 	// mov.m r30=ar.itc is called in advance
 	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
 	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
diff --git a/trunk/arch/ia64/kernel/head.S b/trunk/arch/ia64/kernel/head.S
index 9be4e497f3d3..4738ff7bd66a 100644
--- a/trunk/arch/ia64/kernel/head.S
+++ b/trunk/arch/ia64/kernel/head.S
@@ -1073,7 +1073,7 @@ END(ia64_native_sched_clock)
 sched_clock = ia64_native_sched_clock
 #endif
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 GLOBAL_ENTRY(cycle_to_cputime)
 	alloc r16=ar.pfs,1,0,0,0
 	addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
@@ -1091,7 +1091,7 @@ GLOBAL_ENTRY(cycle_to_cputime)
 	shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
 	br.ret.sptk.many rp
 END(cycle_to_cputime)
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 
 #ifdef CONFIG_IA64_BRL_EMU
 
diff --git a/trunk/arch/ia64/kernel/ivt.S b/trunk/arch/ia64/kernel/ivt.S
index 689ffcaa284e..fa25689fc453 100644
--- a/trunk/arch/ia64/kernel/ivt.S
+++ b/trunk/arch/ia64/kernel/ivt.S
@@ -784,7 +784,7 @@ ENTRY(break_fault)
 
 (p8)	adds r28=16,r28				// A    switch cr.iip to next bundle
 (p9)	adds r8=1,r8				// A    increment ei to next slot
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 	;;
 	mov b6=r30				// I0   setup syscall handler branch reg early
 #else
@@ -801,7 +801,7 @@ ENTRY(break_fault)
 	//
 ///////////////////////////////////////////////////////////////////////
 	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 	MOV_FROM_ITC(p0, p14, r30, r18)		// M    get cycle for accounting
 #else
 	mov b6=r30				// I0   setup syscall handler branch reg early
@@ -817,7 +817,7 @@ ENTRY(break_fault)
 	cmp.eq p14,p0=r9,r0			// A    are syscalls being traced/audited?
 	br.call.sptk.many b7=ia64_syscall_setup	// B
 1:
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 	// mov.m r30=ar.itc is called in advance, and r13 is current
 	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13	// A
 	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13	// A
@@ -1043,7 +1043,7 @@ END(ia64_syscall_setup)
 	DBG_FAULT(16)
 	FAULT(16)
 
-#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE)
+#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE)
 	/*
 	 * There is no particular reason for this code to be here, other than
 	 * that there happens to be space here that would go unused otherwise.
diff --git a/trunk/arch/ia64/kernel/minstate.h b/trunk/arch/ia64/kernel/minstate.h
index cc82a7d744c9..d56753a11636 100644
--- a/trunk/arch/ia64/kernel/minstate.h
+++ b/trunk/arch/ia64/kernel/minstate.h
@@ -4,7 +4,7 @@
 #include "entry.h"
 #include "paravirt_inst.h"
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 /* read ar.itc in advance, and use it before leaving bank 0 */
 #define ACCOUNT_GET_STAMP				\
 (pUStk) mov.m r20=ar.itc;
diff --git a/trunk/arch/ia64/kernel/time.c b/trunk/arch/ia64/kernel/time.c
index fbaac1afb844..88a794536bc0 100644
--- a/trunk/arch/ia64/kernel/time.c
+++ b/trunk/arch/ia64/kernel/time.c
@@ -77,7 +77,7 @@ static struct clocksource clocksource_itc = {
 };
 static struct clocksource *itc_clocksource;
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 
 #include <linux/kernel_stat.h>
 
@@ -136,14 +136,13 @@ void vtime_account_system(struct task_struct *tsk)
 
 	account_system_time(tsk, 0, delta, delta);
 }
-EXPORT_SYMBOL_GPL(vtime_account_system);
 
 void vtime_account_idle(struct task_struct *tsk)
 {
 	account_idle_time(vtime_delta(tsk));
 }
 
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 
 static irqreturn_t
 timer_interrupt (int irq, void *dev_id)
diff --git a/trunk/arch/m68k/include/asm/processor.h b/trunk/arch/m68k/include/asm/processor.h
index b0768a657920..ae700f49e51d 100644
--- a/trunk/arch/m68k/include/asm/processor.h
+++ b/trunk/arch/m68k/include/asm/processor.h
@@ -130,6 +130,7 @@ extern int handle_kernel_fault(struct pt_regs *regs);
 #define start_thread(_regs, _pc, _usp)                  \
 do {                                                    \
 	(_regs)->pc = (_pc);                            \
+	((struct switch_stack *)(_regs))[-1].a6 = 0;    \
 	setframeformat(_regs);                          \
 	if (current->mm)                                \
 		(_regs)->d5 = current->mm->start_data;  \
diff --git a/trunk/arch/mips/Kconfig b/trunk/arch/mips/Kconfig
index 9becc44d9d7a..2ac626ab9d43 100644
--- a/trunk/arch/mips/Kconfig
+++ b/trunk/arch/mips/Kconfig
@@ -4,6 +4,7 @@ config MIPS
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_IDE
 	select HAVE_OPROFILE
+	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select PERF_USE_VMALLOC
 	select HAVE_ARCH_KGDB
@@ -2160,6 +2161,7 @@ source "mm/Kconfig"
 config SMP
 	bool "Multi-Processing support"
 	depends on SYS_SUPPORTS_SMP
+	select IRQ_PER_CPU
 	select USE_GENERIC_SMP_HELPERS
 	help
 	  This enables support for systems with more than one CPU. If you have
diff --git a/trunk/arch/parisc/Kconfig b/trunk/arch/parisc/Kconfig
index a32e34ecda9e..b77feffbadea 100644
--- a/trunk/arch/parisc/Kconfig
+++ b/trunk/arch/parisc/Kconfig
@@ -9,12 +9,14 @@ config PARISC
 	select RTC_DRV_GENERIC
 	select INIT_ALL_POSSIBLE
 	select BUG
+	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select GENERIC_ATOMIC64 if !64BIT
 	select HAVE_GENERIC_HARDIRQS
 	select BROKEN_RODATA
 	select GENERIC_IRQ_PROBE
 	select GENERIC_PCI_IOMAP
+	select IRQ_PER_CPU
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_STRNCPY_FROM_USER
diff --git a/trunk/arch/powerpc/Kconfig b/trunk/arch/powerpc/Kconfig
index 561ccca7b1a7..17903f1f356b 100644
--- a/trunk/arch/powerpc/Kconfig
+++ b/trunk/arch/powerpc/Kconfig
@@ -118,12 +118,14 @@ config PPC
 	select HAVE_SYSCALL_WRAPPERS if PPC64
 	select GENERIC_ATOMIC64 if PPC32
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
+	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
 	select HAVE_GENERIC_HARDIRQS
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select SPARSE_IRQ
+	select IRQ_PER_CPU
 	select IRQ_DOMAIN
 	select GENERIC_IRQ_SHOW
 	select GENERIC_IRQ_SHOW_LEVEL
diff --git a/trunk/arch/powerpc/configs/chroma_defconfig b/trunk/arch/powerpc/configs/chroma_defconfig
index 4f35fc462385..29bb11ec6c64 100644
--- a/trunk/arch/powerpc/configs/chroma_defconfig
+++ b/trunk/arch/powerpc/configs/chroma_defconfig
@@ -1,6 +1,6 @@
 CONFIG_PPC64=y
 CONFIG_PPC_BOOK3E_64=y
-# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
+# CONFIG_VIRT_CPU_ACCOUNTING is not set
 CONFIG_SMP=y
 CONFIG_NR_CPUS=256
 CONFIG_EXPERIMENTAL=y
diff --git a/trunk/arch/powerpc/configs/corenet64_smp_defconfig b/trunk/arch/powerpc/configs/corenet64_smp_defconfig
index f7df8362911f..88fa5c46f66f 100644
--- a/trunk/arch/powerpc/configs/corenet64_smp_defconfig
+++ b/trunk/arch/powerpc/configs/corenet64_smp_defconfig
@@ -1,6 +1,6 @@
 CONFIG_PPC64=y
 CONFIG_PPC_BOOK3E_64=y
-# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
+# CONFIG_VIRT_CPU_ACCOUNTING is not set
 CONFIG_SMP=y
 CONFIG_NR_CPUS=2
 CONFIG_EXPERIMENTAL=y
diff --git a/trunk/arch/powerpc/configs/pasemi_defconfig b/trunk/arch/powerpc/configs/pasemi_defconfig
index bcedeea0df89..840a2c2d0430 100644
--- a/trunk/arch/powerpc/configs/pasemi_defconfig
+++ b/trunk/arch/powerpc/configs/pasemi_defconfig
@@ -1,6 +1,6 @@
 CONFIG_PPC64=y
 CONFIG_ALTIVEC=y
-# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
+# CONFIG_VIRT_CPU_ACCOUNTING is not set
 CONFIG_SMP=y
 CONFIG_NR_CPUS=2
 CONFIG_EXPERIMENTAL=y
diff --git a/trunk/arch/powerpc/include/asm/cputime.h b/trunk/arch/powerpc/include/asm/cputime.h
index 607559ab271f..483733bd06d4 100644
--- a/trunk/arch/powerpc/include/asm/cputime.h
+++ b/trunk/arch/powerpc/include/asm/cputime.h
@@ -8,7 +8,7 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  *
- * If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in
+ * If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in
  * the same units as the timebase.  Otherwise we measure cpu time
  * in jiffies using the generic definitions.
  */
@@ -16,7 +16,7 @@
 #ifndef __POWERPC_CPUTIME_H
 #define __POWERPC_CPUTIME_H
 
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
 #include <asm-generic/cputime.h>
 #ifdef __KERNEL__
 static inline void setup_cputime_one_jiffy(void) { }
@@ -231,5 +231,5 @@ static inline cputime_t clock_t_to_cputime(const unsigned long clk)
 static inline void arch_vtime_task_switch(struct task_struct *tsk) { }
 
 #endif /* __KERNEL__ */
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 #endif /* __POWERPC_CPUTIME_H */
diff --git a/trunk/arch/powerpc/include/asm/lppaca.h b/trunk/arch/powerpc/include/asm/lppaca.h
index b1e7f2af1016..531fe0c3108f 100644
--- a/trunk/arch/powerpc/include/asm/lppaca.h
+++ b/trunk/arch/powerpc/include/asm/lppaca.h
@@ -145,7 +145,7 @@ struct dtl_entry {
 extern struct kmem_cache *dtl_cache;
 
 /*
- * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls
+ * When CONFIG_VIRT_CPU_ACCOUNTING = y, the cpu accounting code controls
  * reading from the dispatch trace log.  If other code wants to consume
  * DTL entries, it can set this pointer to a function that will get
  * called once for each DTL entry that gets processed.
diff --git a/trunk/arch/powerpc/include/asm/perf_event_server.h b/trunk/arch/powerpc/include/asm/perf_event_server.h
index 136bba62efa4..9710be3a2d17 100644
--- a/trunk/arch/powerpc/include/asm/perf_event_server.h
+++ b/trunk/arch/powerpc/include/asm/perf_event_server.h
@@ -11,7 +11,6 @@
 
 #include <linux/types.h>
 #include <asm/hw_irq.h>
-#include <linux/device.h>
 
 #define MAX_HWEVENTS		8
 #define MAX_EVENT_ALTERNATIVES	8
@@ -36,7 +35,6 @@ struct power_pmu {
 	void		(*disable_pmc)(unsigned int pmc, unsigned long mmcr[]);
 	int		(*limited_pmc_event)(u64 event_id);
 	u32		flags;
-	const struct attribute_group	**attr_groups;
 	int		n_generic;
 	int		*generic_events;
 	int		(*cache_events)[PERF_COUNT_HW_CACHE_MAX]
@@ -111,27 +109,3 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
  * If an event_id is not subject to the constraint expressed by a particular
  * field, then it will have 0 in both the mask and value for that field.
  */
-
-extern ssize_t power_events_sysfs_show(struct device *dev,
-				struct device_attribute *attr, char *page);
-
-/*
- * EVENT_VAR() is same as PMU_EVENT_VAR with a suffix.
- *
- * Having a suffix allows us to have aliases in sysfs - eg: the generic
- * event 'cpu-cycles' can have two entries in sysfs: 'cpu-cycles' and
- * 'PM_CYC' where the latter is the name by which the event is known in
- * POWER CPU specification.
- */
-#define	EVENT_VAR(_id, _suffix)		event_attr_##_id##_suffix
-#define	EVENT_PTR(_id, _suffix)		&EVENT_VAR(_id, _suffix).attr.attr
-
-#define	EVENT_ATTR(_name, _id, _suffix)					\
-	PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), PME_PM_##_id,	\
-			power_events_sysfs_show)
-
-#define	GENERIC_EVENT_ATTR(_name, _id)	EVENT_ATTR(_name, _id, _g)
-#define	GENERIC_EVENT_PTR(_id)		EVENT_PTR(_id, _g)
-
-#define	POWER_EVENT_ATTR(_name, _id)	EVENT_ATTR(PM_##_name, _id, _p)
-#define	POWER_EVENT_PTR(_id)		EVENT_PTR(_id, _p)
diff --git a/trunk/arch/powerpc/include/asm/ppc_asm.h b/trunk/arch/powerpc/include/asm/ppc_asm.h
index 2d0e1f5d8339..ea2a86e8ff95 100644
--- a/trunk/arch/powerpc/include/asm/ppc_asm.h
+++ b/trunk/arch/powerpc/include/asm/ppc_asm.h
@@ -24,7 +24,7 @@
  * user_time and system_time fields in the paca.
  */
 
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
 #define ACCOUNT_CPU_USER_ENTRY(ra, rb)
 #define ACCOUNT_CPU_USER_EXIT(ra, rb)
 #define ACCOUNT_STOLEN_TIME
@@ -70,7 +70,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
 
 #endif /* CONFIG_PPC_SPLPAR */
 
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 
 /*
  * Macros for storing registers into and loading registers from
diff --git a/trunk/arch/powerpc/kernel/entry_64.S b/trunk/arch/powerpc/kernel/entry_64.S
index ac057013f9fd..3d990d3bd8ba 100644
--- a/trunk/arch/powerpc/kernel/entry_64.S
+++ b/trunk/arch/powerpc/kernel/entry_64.S
@@ -94,7 +94,7 @@ system_call_common:
 	addi	r9,r1,STACK_FRAME_OVERHEAD
 	ld	r11,exception_marker@toc(r2)
 	std	r11,-16(r9)		/* "regshere" marker */
-#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
+#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR)
 BEGIN_FW_FTR_SECTION
 	beq	33f
 	/* if from user, see if there are any DTL entries to process */
@@ -110,7 +110,7 @@ BEGIN_FW_FTR_SECTION
 	addi	r9,r1,STACK_FRAME_OVERHEAD
 33:
 END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE && CONFIG_PPC_SPLPAR */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */
 
 	/*
 	 * A syscall should always be called with interrupts enabled
diff --git a/trunk/arch/powerpc/kernel/time.c b/trunk/arch/powerpc/kernel/time.c
index f77fa22754bc..127361e093f4 100644
--- a/trunk/arch/powerpc/kernel/time.c
+++ b/trunk/arch/powerpc/kernel/time.c
@@ -143,7 +143,7 @@ EXPORT_SYMBOL_GPL(ppc_proc_freq);
 unsigned long ppc_tb_freq;
 EXPORT_SYMBOL_GPL(ppc_tb_freq);
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 /*
  * Factors for converting from cputime_t (timebase ticks) to
  * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds).
@@ -347,7 +347,6 @@ void vtime_account_system(struct task_struct *tsk)
 	if (stolen)
 		account_steal_time(stolen);
 }
-EXPORT_SYMBOL_GPL(vtime_account_system);
 
 void vtime_account_idle(struct task_struct *tsk)
 {
@@ -378,7 +377,7 @@ void vtime_account_user(struct task_struct *tsk)
 	account_user_time(tsk, utime, utimescaled);
 }
 
-#else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */
 #define calc_cputime_factors()
 #endif
 
@@ -669,7 +668,7 @@ int update_persistent_clock(struct timespec now)
 	struct rtc_time tm;
 
 	if (!ppc_md.set_rtc_time)
-		return -ENODEV;
+		return 0;
 
 	to_tm(now.tv_sec + 1 + timezone_offset, &tm);
 	tm.tm_year -= 1900;
diff --git a/trunk/arch/powerpc/perf/core-book3s.c b/trunk/arch/powerpc/perf/core-book3s.c
index fa476d50791f..aa2465e21f1a 100644
--- a/trunk/arch/powerpc/perf/core-book3s.c
+++ b/trunk/arch/powerpc/perf/core-book3s.c
@@ -1305,16 +1305,6 @@ static int power_pmu_event_idx(struct perf_event *event)
 	return event->hw.idx;
 }
 
-ssize_t power_events_sysfs_show(struct device *dev,
-				struct device_attribute *attr, char *page)
-{
-	struct perf_pmu_events_attr *pmu_attr;
-
-	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
-
-	return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
-}
-
 struct pmu power_pmu = {
 	.pmu_enable	= power_pmu_enable,
 	.pmu_disable	= power_pmu_disable,
@@ -1547,8 +1537,6 @@ int __cpuinit register_power_pmu(struct power_pmu *pmu)
 	pr_info("%s performance monitor hardware support registered\n",
 		pmu->name);
 
-	power_pmu.attr_groups = ppmu->attr_groups;
-
 #ifdef MSR_HV
 	/*
 	 * Use FCHV to ignore kernel events if MSR.HV is set.
diff --git a/trunk/arch/powerpc/perf/power7-pmu.c b/trunk/arch/powerpc/perf/power7-pmu.c
index b554879bd31e..2ee01e38d5e2 100644
--- a/trunk/arch/powerpc/perf/power7-pmu.c
+++ b/trunk/arch/powerpc/perf/power7-pmu.c
@@ -50,18 +50,6 @@
 #define MMCR1_PMCSEL_SH(n)	(MMCR1_PMC1SEL_SH - (n) * 8)
 #define MMCR1_PMCSEL_MSK	0xff
 
-/*
- * Power7 event codes.
- */
-#define	PME_PM_CYC			0x1e
-#define	PME_PM_GCT_NOSLOT_CYC		0x100f8
-#define	PME_PM_CMPLU_STALL		0x4000a
-#define	PME_PM_INST_CMPL		0x2
-#define	PME_PM_LD_REF_L1		0xc880
-#define	PME_PM_LD_MISS_L1		0x400f0
-#define	PME_PM_BRU_FIN			0x10068
-#define	PME_PM_BRU_MPRED		0x400f6
-
 /*
  * Layout of constraint bits:
  * 6666555555555544444444443333333333222222222211111111110000000000
@@ -319,14 +307,14 @@ static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[])
 }
 
 static int power7_generic_events[] = {
-	[PERF_COUNT_HW_CPU_CYCLES] =			PME_PM_CYC,
-	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =	PME_PM_GCT_NOSLOT_CYC,
-	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =	PME_PM_CMPLU_STALL,
-	[PERF_COUNT_HW_INSTRUCTIONS] =			PME_PM_INST_CMPL,
-	[PERF_COUNT_HW_CACHE_REFERENCES] =		PME_PM_LD_REF_L1,
-	[PERF_COUNT_HW_CACHE_MISSES] =			PME_PM_LD_MISS_L1,
-	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PME_PM_BRU_FIN,
-	[PERF_COUNT_HW_BRANCH_MISSES] =			PME_PM_BRU_MPRED,
+	[PERF_COUNT_HW_CPU_CYCLES] = 0x1e,
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x100f8, /* GCT_NOSLOT_CYC */
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x4000a,  /* CMPLU_STALL */
+	[PERF_COUNT_HW_INSTRUCTIONS] = 2,
+	[PERF_COUNT_HW_CACHE_REFERENCES] = 0xc880,	/* LD_REF_L1_LSU*/
+	[PERF_COUNT_HW_CACHE_MISSES] = 0x400f0,		/* LD_MISS_L1	*/
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x10068,	/* BRU_FIN	*/
+	[PERF_COUNT_HW_BRANCH_MISSES] = 0x400f6,	/* BR_MPRED	*/
 };
 
 #define C(x)	PERF_COUNT_HW_CACHE_##x
@@ -374,57 +362,6 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 	},
 };
 
-
-GENERIC_EVENT_ATTR(cpu-cycles,			CYC);
-GENERIC_EVENT_ATTR(stalled-cycles-frontend,	GCT_NOSLOT_CYC);
-GENERIC_EVENT_ATTR(stalled-cycles-backend,	CMPLU_STALL);
-GENERIC_EVENT_ATTR(instructions,		INST_CMPL);
-GENERIC_EVENT_ATTR(cache-references,		LD_REF_L1);
-GENERIC_EVENT_ATTR(cache-misses,		LD_MISS_L1);
-GENERIC_EVENT_ATTR(branch-instructions,		BRU_FIN);
-GENERIC_EVENT_ATTR(branch-misses,		BRU_MPRED);
-
-POWER_EVENT_ATTR(CYC,				CYC);
-POWER_EVENT_ATTR(GCT_NOSLOT_CYC,		GCT_NOSLOT_CYC);
-POWER_EVENT_ATTR(CMPLU_STALL,			CMPLU_STALL);
-POWER_EVENT_ATTR(INST_CMPL,			INST_CMPL);
-POWER_EVENT_ATTR(LD_REF_L1,			LD_REF_L1);
-POWER_EVENT_ATTR(LD_MISS_L1,			LD_MISS_L1);
-POWER_EVENT_ATTR(BRU_FIN,			BRU_FIN)
-POWER_EVENT_ATTR(BRU_MPRED,			BRU_MPRED);
-
-static struct attribute *power7_events_attr[] = {
-	GENERIC_EVENT_PTR(CYC),
-	GENERIC_EVENT_PTR(GCT_NOSLOT_CYC),
-	GENERIC_EVENT_PTR(CMPLU_STALL),
-	GENERIC_EVENT_PTR(INST_CMPL),
-	GENERIC_EVENT_PTR(LD_REF_L1),
-	GENERIC_EVENT_PTR(LD_MISS_L1),
-	GENERIC_EVENT_PTR(BRU_FIN),
-	GENERIC_EVENT_PTR(BRU_MPRED),
-
-	POWER_EVENT_PTR(CYC),
-	POWER_EVENT_PTR(GCT_NOSLOT_CYC),
-	POWER_EVENT_PTR(CMPLU_STALL),
-	POWER_EVENT_PTR(INST_CMPL),
-	POWER_EVENT_PTR(LD_REF_L1),
-	POWER_EVENT_PTR(LD_MISS_L1),
-	POWER_EVENT_PTR(BRU_FIN),
-	POWER_EVENT_PTR(BRU_MPRED),
-	NULL
-};
-
-
-static struct attribute_group power7_pmu_events_group = {
-	.name = "events",
-	.attrs = power7_events_attr,
-};
-
-static const struct attribute_group *power7_pmu_attr_groups[] = {
-	&power7_pmu_events_group,
-	NULL,
-};
-
 static struct power_pmu power7_pmu = {
 	.name			= "POWER7",
 	.n_counter		= 6,
@@ -436,7 +373,6 @@ static struct power_pmu power7_pmu = {
 	.get_alternatives	= power7_get_alternatives,
 	.disable_pmc		= power7_disable_pmc,
 	.flags			= PPMU_ALT_SIPR,
-	.attr_groups		= power7_pmu_attr_groups,
 	.n_generic		= ARRAY_SIZE(power7_generic_events),
 	.generic_events		= power7_generic_events,
 	.cache_events		= &power7_cache_events,
diff --git a/trunk/arch/powerpc/platforms/cell/spufs/sched.c b/trunk/arch/powerpc/platforms/cell/spufs/sched.c
index 49318385d4fa..25db92a8e1cf 100644
--- a/trunk/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/trunk/arch/powerpc/platforms/cell/spufs/sched.c
@@ -24,7 +24,6 @@
 
 #include <linux/errno.h>
 #include <linux/sched.h>
-#include <linux/sched/rt.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
diff --git a/trunk/arch/powerpc/platforms/pseries/dtl.c b/trunk/arch/powerpc/platforms/pseries/dtl.c
index 0cc0ac07a55d..a7648543c59e 100644
--- a/trunk/arch/powerpc/platforms/pseries/dtl.c
+++ b/trunk/arch/powerpc/platforms/pseries/dtl.c
@@ -57,7 +57,7 @@ static u8 dtl_event_mask = 0x7;
  */
 static int dtl_buf_entries = N_DISPATCH_LOG;
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 struct dtl_ring {
 	u64	write_index;
 	struct dtl_entry *write_ptr;
@@ -142,7 +142,7 @@ static u64 dtl_current_index(struct dtl *dtl)
 	return per_cpu(dtl_rings, dtl->cpu).write_index;
 }
 
-#else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+#else /* CONFIG_VIRT_CPU_ACCOUNTING */
 
 static int dtl_start(struct dtl *dtl)
 {
@@ -188,7 +188,7 @@ static u64 dtl_current_index(struct dtl *dtl)
 {
 	return lppaca_of(dtl->cpu).dtl_idx;
 }
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 
 static int dtl_enable(struct dtl *dtl)
 {
diff --git a/trunk/arch/powerpc/platforms/pseries/setup.c b/trunk/arch/powerpc/platforms/pseries/setup.c
index 527e12c9573b..ca55882465d6 100644
--- a/trunk/arch/powerpc/platforms/pseries/setup.c
+++ b/trunk/arch/powerpc/platforms/pseries/setup.c
@@ -281,7 +281,7 @@ static struct notifier_block pci_dn_reconfig_nb = {
 
 struct kmem_cache *dtl_cache;
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 /*
  * Allocate space for the dispatch trace log for all possible cpus
  * and register the buffers with the hypervisor.  This is used for
@@ -332,12 +332,12 @@ static int alloc_dispatch_logs(void)
 
 	return 0;
 }
-#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
 static inline int alloc_dispatch_logs(void)
 {
 	return 0;
 }
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 
 static int alloc_dispatch_log_kmem_cache(void)
 {
diff --git a/trunk/arch/s390/Kconfig b/trunk/arch/s390/Kconfig
index c15ba7d1be64..b5ea38c25647 100644
--- a/trunk/arch/s390/Kconfig
+++ b/trunk/arch/s390/Kconfig
@@ -78,6 +78,7 @@ config S390
 	select HAVE_KVM if 64BIT
 	select HAVE_ARCH_TRACEHOOK
 	select INIT_ALL_POSSIBLE
+	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select HAVE_DEBUG_KMEMLEAK
diff --git a/trunk/arch/s390/kernel/time.c b/trunk/arch/s390/kernel/time.c
index 0aa98db8a80d..a5f4f5a1d24b 100644
--- a/trunk/arch/s390/kernel/time.c
+++ b/trunk/arch/s390/kernel/time.c
@@ -120,9 +120,6 @@ static int s390_next_ktime(ktime_t expires,
 	nsecs = ktime_to_ns(ktime_add(timespec_to_ktime(ts), expires));
 	do_div(nsecs, 125);
 	S390_lowcore.clock_comparator = sched_clock_base_cc + (nsecs << 9);
-	/* Program the maximum value if we have an overflow (== year 2042) */
-	if (unlikely(S390_lowcore.clock_comparator < sched_clock_base_cc))
-		S390_lowcore.clock_comparator = -1ULL;
 	set_clock_comparator(S390_lowcore.clock_comparator);
 	return 0;
 }
diff --git a/trunk/arch/s390/kernel/vtime.c b/trunk/arch/s390/kernel/vtime.c
index ce9cc5aa2033..e84b8b68444a 100644
--- a/trunk/arch/s390/kernel/vtime.c
+++ b/trunk/arch/s390/kernel/vtime.c
@@ -127,7 +127,7 @@ void vtime_account_user(struct task_struct *tsk)
  * Update process times based on virtual cpu times stored by entry.S
  * to the lowcore fields user_timer, system_timer & steal_clock.
  */
-void vtime_account_irq_enter(struct task_struct *tsk)
+void vtime_account(struct task_struct *tsk)
 {
 	struct thread_info *ti = task_thread_info(tsk);
 	u64 timer, system;
@@ -145,10 +145,10 @@ void vtime_account_irq_enter(struct task_struct *tsk)
 
 	virt_timer_forward(system);
 }
-EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
+EXPORT_SYMBOL_GPL(vtime_account);
 
 void vtime_account_system(struct task_struct *tsk)
-__attribute__((alias("vtime_account_irq_enter")));
+__attribute__((alias("vtime_account")));
 EXPORT_SYMBOL_GPL(vtime_account_system);
 
 void __kprobes vtime_stop_cpu(void)
diff --git a/trunk/arch/sh/Kconfig b/trunk/arch/sh/Kconfig
index 9c833c585871..babc2b826c5c 100644
--- a/trunk/arch/sh/Kconfig
+++ b/trunk/arch/sh/Kconfig
@@ -11,6 +11,7 @@ config SUPERH
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_DMA_API_DEBUG
 	select HAVE_DMA_ATTRS
+	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select HAVE_DEBUG_BUGVERBOSE
 	select ARCH_HAVE_CUSTOM_GPIO_H
@@ -90,6 +91,9 @@ config GENERIC_CSUM
 config GENERIC_HWEIGHT
 	def_bool y
 
+config IRQ_PER_CPU
+	def_bool y
+
 config GENERIC_GPIO
 	def_bool n
 
diff --git a/trunk/arch/sparc/Kconfig b/trunk/arch/sparc/Kconfig
index 9bff3db17c8c..9f2edb5c5551 100644
--- a/trunk/arch/sparc/Kconfig
+++ b/trunk/arch/sparc/Kconfig
@@ -23,6 +23,7 @@ config SPARC
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select RTC_CLASS
 	select RTC_DRV_M48T59
+	select HAVE_IRQ_WORK
 	select HAVE_DMA_ATTRS
 	select HAVE_DMA_API_DEBUG
 	select HAVE_ARCH_JUMP_LABEL
@@ -60,7 +61,6 @@ config SPARC64
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_SYSCALL_WRAPPERS
-	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_SYSCALL_TRACEPOINTS
diff --git a/trunk/arch/sparc/include/asm/pgtable_64.h b/trunk/arch/sparc/include/asm/pgtable_64.h
index 08fcce90316b..7870be0f5adc 100644
--- a/trunk/arch/sparc/include/asm/pgtable_64.h
+++ b/trunk/arch/sparc/include/asm/pgtable_64.h
@@ -71,6 +71,7 @@
 #define PMD_PADDR	_AC(0xfffffffe,UL)
 #define PMD_PADDR_SHIFT	_AC(11,UL)
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #define PMD_ISHUGE	_AC(0x00000001,UL)
 
 /* This is the PMD layout when PMD_ISHUGE is set.  With 4MB huge
@@ -85,6 +86,7 @@
 #define PMD_HUGE_ACCESSED	_AC(0x00000080,UL)
 #define PMD_HUGE_EXEC		_AC(0x00000040,UL)
 #define PMD_HUGE_SPLITTING	_AC(0x00000020,UL)
+#endif
 
 /* PGDs point to PMD tables which are 8K aligned.  */
 #define PGD_PADDR	_AC(0xfffffffc,UL)
@@ -626,12 +628,6 @@ static inline unsigned long pte_special(pte_t pte)
 	return pte_val(pte) & _PAGE_SPECIAL;
 }
 
-static inline int pmd_large(pmd_t pmd)
-{
-	return (pmd_val(pmd) & (PMD_ISHUGE | PMD_HUGE_PRESENT)) ==
-		(PMD_ISHUGE | PMD_HUGE_PRESENT);
-}
-
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static inline int pmd_young(pmd_t pmd)
 {
@@ -650,6 +646,12 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
 	return val >> (PAGE_SHIFT - PMD_PADDR_SHIFT);
 }
 
+static inline int pmd_large(pmd_t pmd)
+{
+	return (pmd_val(pmd) & (PMD_ISHUGE | PMD_HUGE_PRESENT)) ==
+		(PMD_ISHUGE | PMD_HUGE_PRESENT);
+}
+
 static inline int pmd_trans_splitting(pmd_t pmd)
 {
 	return (pmd_val(pmd) & (PMD_ISHUGE|PMD_HUGE_SPLITTING)) ==
diff --git a/trunk/arch/sparc/kernel/sbus.c b/trunk/arch/sparc/kernel/sbus.c
index be5bdf93c767..1271b3a27d4e 100644
--- a/trunk/arch/sparc/kernel/sbus.c
+++ b/trunk/arch/sparc/kernel/sbus.c
@@ -554,8 +554,10 @@ static void __init sbus_iommu_init(struct platform_device *op)
 	regs = pr->phys_addr;
 
 	iommu = kzalloc(sizeof(*iommu), GFP_ATOMIC);
+	if (!iommu)
+		goto fatal_memory_error;
 	strbuf = kzalloc(sizeof(*strbuf), GFP_ATOMIC);
-	if (!iommu || !strbuf)
+	if (!strbuf)
 		goto fatal_memory_error;
 
 	op->dev.archdata.iommu = iommu;
@@ -654,8 +656,6 @@ static void __init sbus_iommu_init(struct platform_device *op)
 	return;
 
 fatal_memory_error:
-	kfree(iommu);
-	kfree(strbuf);
 	prom_printf("sbus_iommu_init: Fatal memory allocation error.\n");
 }
 
diff --git a/trunk/arch/sparc/mm/gup.c b/trunk/arch/sparc/mm/gup.c
index 01ee23dd724d..42c55df3aec3 100644
--- a/trunk/arch/sparc/mm/gup.c
+++ b/trunk/arch/sparc/mm/gup.c
@@ -66,56 +66,6 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
 	return 1;
 }
 
-static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
-			unsigned long end, int write, struct page **pages,
-			int *nr)
-{
-	struct page *head, *page, *tail;
-	u32 mask;
-	int refs;
-
-	mask = PMD_HUGE_PRESENT;
-	if (write)
-		mask |= PMD_HUGE_WRITE;
-	if ((pmd_val(pmd) & mask) != mask)
-		return 0;
-
-	refs = 0;
-	head = pmd_page(pmd);
-	page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
-	tail = page;
-	do {
-		VM_BUG_ON(compound_head(page) != head);
-		pages[*nr] = page;
-		(*nr)++;
-		page++;
-		refs++;
-	} while (addr += PAGE_SIZE, addr != end);
-
-	if (!page_cache_add_speculative(head, refs)) {
-		*nr -= refs;
-		return 0;
-	}
-
-	if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) {
-		*nr -= refs;
-		while (refs--)
-			put_page(head);
-		return 0;
-	}
-
-	/* Any tail page need their mapcount reference taken before we
-	 * return.
-	 */
-	while (refs--) {
-		if (PageTail(tail))
-			get_huge_page_tail(tail);
-		tail++;
-	}
-
-	return 1;
-}
-
 static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
 		int write, struct page **pages, int *nr)
 {
@@ -127,14 +77,9 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
 		pmd_t pmd = *pmdp;
 
 		next = pmd_addr_end(addr, end);
-		if (pmd_none(pmd) || pmd_trans_splitting(pmd))
+		if (pmd_none(pmd))
 			return 0;
-		if (unlikely(pmd_large(pmd))) {
-			if (!gup_huge_pmd(pmdp, pmd, addr, next,
-					  write, pages, nr))
-				return 0;
-		} else if (!gup_pte_range(pmd, addr, next, write,
-					  pages, nr))
+		if (!gup_pte_range(pmd, addr, next, write, pages, nr))
 			return 0;
 	} while (pmdp++, addr = next, addr != end);
 
diff --git a/trunk/arch/tile/Kconfig b/trunk/arch/tile/Kconfig
index 1bb7ad4aeff4..875d008828b8 100644
--- a/trunk/arch/tile/Kconfig
+++ b/trunk/arch/tile/Kconfig
@@ -140,8 +140,6 @@ config ARCH_DEFCONFIG
 
 source "init/Kconfig"
 
-source "kernel/Kconfig.freezer"
-
 menu "Tilera-specific configuration"
 
 config NR_CPUS
diff --git a/trunk/arch/tile/include/asm/io.h b/trunk/arch/tile/include/asm/io.h
index 31672918064c..2a9b293fece6 100644
--- a/trunk/arch/tile/include/asm/io.h
+++ b/trunk/arch/tile/include/asm/io.h
@@ -250,9 +250,7 @@ static inline void writeq(u64 val, unsigned long addr)
 #define iowrite32 writel
 #define iowrite64 writeq
 
-#if CHIP_HAS_MMIO() || defined(CONFIG_PCI)
-
-static inline void memset_io(volatile void *dst, int val, size_t len)
+static inline void memset_io(void *dst, int val, size_t len)
 {
 	int x;
 	BUG_ON((unsigned long)dst & 0x3);
@@ -279,8 +277,6 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src,
 		writel(*(u32 *)(src + x), dst + x);
 }
 
-#endif
-
 /*
  * The Tile architecture does not support IOPORT, even with PCI.
  * Unfortunately we can't yet simply not declare these methods,
diff --git a/trunk/arch/tile/include/asm/irqflags.h b/trunk/arch/tile/include/asm/irqflags.h
index 241c0bb60b12..b4e96fef2cf8 100644
--- a/trunk/arch/tile/include/asm/irqflags.h
+++ b/trunk/arch/tile/include/asm/irqflags.h
@@ -18,20 +18,32 @@
 #include <arch/interrupts.h>
 #include <arch/chip.h>
 
+#if !defined(__tilegx__) && defined(__ASSEMBLY__)
+
 /*
  * The set of interrupts we want to allow when interrupts are nominally
  * disabled.  The remainder are effectively "NMI" interrupts from
  * the point of view of the generic Linux code.  Note that synchronous
  * interrupts (aka "non-queued") are not blocked by the mask in any case.
  */
+#if CHIP_HAS_AUX_PERF_COUNTERS()
+#define LINUX_MASKABLE_INTERRUPTS_HI \
+	(~(INT_MASK_HI(INT_PERF_COUNT) | INT_MASK_HI(INT_AUX_PERF_COUNT)))
+#else
+#define LINUX_MASKABLE_INTERRUPTS_HI \
+	(~(INT_MASK_HI(INT_PERF_COUNT)))
+#endif
+
+#else
+
+#if CHIP_HAS_AUX_PERF_COUNTERS()
+#define LINUX_MASKABLE_INTERRUPTS \
+	(~(INT_MASK(INT_PERF_COUNT) | INT_MASK(INT_AUX_PERF_COUNT)))
+#else
 #define LINUX_MASKABLE_INTERRUPTS \
-	(~((_AC(1,ULL) << INT_PERF_COUNT) | (_AC(1,ULL) << INT_AUX_PERF_COUNT)))
+	(~(INT_MASK(INT_PERF_COUNT)))
+#endif
 
-#if CHIP_HAS_SPLIT_INTR_MASK()
-/* The same macro, but for the two 32-bit SPRs separately. */
-#define LINUX_MASKABLE_INTERRUPTS_LO (-1)
-#define LINUX_MASKABLE_INTERRUPTS_HI \
-	(~((1 << (INT_PERF_COUNT - 32)) | (1 << (INT_AUX_PERF_COUNT - 32))))
 #endif
 
 #ifndef __ASSEMBLY__
@@ -114,7 +126,7 @@
  * to know our current state.
  */
 DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
-#define INITIAL_INTERRUPTS_ENABLED (1ULL << INT_MEM_ERROR)
+#define INITIAL_INTERRUPTS_ENABLED INT_MASK(INT_MEM_ERROR)
 
 /* Disable interrupts. */
 #define arch_local_irq_disable() \
@@ -153,7 +165,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
 
 /* Prevent the given interrupt from being enabled next time we enable irqs. */
 #define arch_local_irq_mask(interrupt) \
-	(__get_cpu_var(interrupts_enabled_mask) &= ~(1ULL << (interrupt)))
+	(__get_cpu_var(interrupts_enabled_mask) &= ~INT_MASK(interrupt))
 
 /* Prevent the given interrupt from being enabled immediately. */
 #define arch_local_irq_mask_now(interrupt) do { \
@@ -163,7 +175,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
 
 /* Allow the given interrupt to be enabled next time we enable irqs. */
 #define arch_local_irq_unmask(interrupt) \
-	(__get_cpu_var(interrupts_enabled_mask) |= (1ULL << (interrupt)))
+	(__get_cpu_var(interrupts_enabled_mask) |= INT_MASK(interrupt))
 
 /* Allow the given interrupt to be enabled immediately, if !irqs_disabled. */
 #define arch_local_irq_unmask_now(interrupt) do { \
@@ -238,7 +250,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
 /* Disable interrupts. */
 #define IRQ_DISABLE(tmp0, tmp1)					\
 	{							\
-	 movei  tmp0, LINUX_MASKABLE_INTERRUPTS_LO;		\
+	 movei  tmp0, -1;					\
 	 moveli tmp1, lo16(LINUX_MASKABLE_INTERRUPTS_HI)	\
 	};							\
 	{							\
diff --git a/trunk/arch/tile/include/uapi/arch/interrupts_32.h b/trunk/arch/tile/include/uapi/arch/interrupts_32.h
index 2efe3f68b2d6..96b5710505b6 100644
--- a/trunk/arch/tile/include/uapi/arch/interrupts_32.h
+++ b/trunk/arch/tile/include/uapi/arch/interrupts_32.h
@@ -15,7 +15,6 @@
 #ifndef __ARCH_INTERRUPTS_H__
 #define __ARCH_INTERRUPTS_H__
 
-#ifndef __KERNEL__
 /** Mask for an interrupt. */
 /* Note: must handle breaking interrupts into high and low words manually. */
 #define INT_MASK_LO(intno) (1 << (intno))
@@ -24,7 +23,6 @@
 #ifndef __ASSEMBLER__
 #define INT_MASK(intno) (1ULL << (intno))
 #endif
-#endif
 
 
 /** Where a given interrupt executes */
@@ -94,216 +92,216 @@
 
 #ifndef __ASSEMBLER__
 #define QUEUED_INTERRUPTS ( \
-    (1ULL << INT_MEM_ERROR) | \
-    (1ULL << INT_DMATLB_MISS) | \
-    (1ULL << INT_DMATLB_ACCESS) | \
-    (1ULL << INT_SNITLB_MISS) | \
-    (1ULL << INT_SN_NOTIFY) | \
-    (1ULL << INT_SN_FIREWALL) | \
-    (1ULL << INT_IDN_FIREWALL) | \
-    (1ULL << INT_UDN_FIREWALL) | \
-    (1ULL << INT_TILE_TIMER) | \
-    (1ULL << INT_IDN_TIMER) | \
-    (1ULL << INT_UDN_TIMER) | \
-    (1ULL << INT_DMA_NOTIFY) | \
-    (1ULL << INT_IDN_CA) | \
-    (1ULL << INT_UDN_CA) | \
-    (1ULL << INT_IDN_AVAIL) | \
-    (1ULL << INT_UDN_AVAIL) | \
-    (1ULL << INT_PERF_COUNT) | \
-    (1ULL << INT_INTCTRL_3) | \
-    (1ULL << INT_INTCTRL_2) | \
-    (1ULL << INT_INTCTRL_1) | \
-    (1ULL << INT_INTCTRL_0) | \
-    (1ULL << INT_BOOT_ACCESS) | \
-    (1ULL << INT_WORLD_ACCESS) | \
-    (1ULL << INT_I_ASID) | \
-    (1ULL << INT_D_ASID) | \
-    (1ULL << INT_DMA_ASID) | \
-    (1ULL << INT_SNI_ASID) | \
-    (1ULL << INT_DMA_CPL) | \
-    (1ULL << INT_SN_CPL) | \
-    (1ULL << INT_DOUBLE_FAULT) | \
-    (1ULL << INT_AUX_PERF_COUNT) | \
+    INT_MASK(INT_MEM_ERROR) | \
+    INT_MASK(INT_DMATLB_MISS) | \
+    INT_MASK(INT_DMATLB_ACCESS) | \
+    INT_MASK(INT_SNITLB_MISS) | \
+    INT_MASK(INT_SN_NOTIFY) | \
+    INT_MASK(INT_SN_FIREWALL) | \
+    INT_MASK(INT_IDN_FIREWALL) | \
+    INT_MASK(INT_UDN_FIREWALL) | \
+    INT_MASK(INT_TILE_TIMER) | \
+    INT_MASK(INT_IDN_TIMER) | \
+    INT_MASK(INT_UDN_TIMER) | \
+    INT_MASK(INT_DMA_NOTIFY) | \
+    INT_MASK(INT_IDN_CA) | \
+    INT_MASK(INT_UDN_CA) | \
+    INT_MASK(INT_IDN_AVAIL) | \
+    INT_MASK(INT_UDN_AVAIL) | \
+    INT_MASK(INT_PERF_COUNT) | \
+    INT_MASK(INT_INTCTRL_3) | \
+    INT_MASK(INT_INTCTRL_2) | \
+    INT_MASK(INT_INTCTRL_1) | \
+    INT_MASK(INT_INTCTRL_0) | \
+    INT_MASK(INT_BOOT_ACCESS) | \
+    INT_MASK(INT_WORLD_ACCESS) | \
+    INT_MASK(INT_I_ASID) | \
+    INT_MASK(INT_D_ASID) | \
+    INT_MASK(INT_DMA_ASID) | \
+    INT_MASK(INT_SNI_ASID) | \
+    INT_MASK(INT_DMA_CPL) | \
+    INT_MASK(INT_SN_CPL) | \
+    INT_MASK(INT_DOUBLE_FAULT) | \
+    INT_MASK(INT_AUX_PERF_COUNT) | \
     0)
 #define NONQUEUED_INTERRUPTS ( \
-    (1ULL << INT_ITLB_MISS) | \
-    (1ULL << INT_ILL) | \
-    (1ULL << INT_GPV) | \
-    (1ULL << INT_SN_ACCESS) | \
-    (1ULL << INT_IDN_ACCESS) | \
-    (1ULL << INT_UDN_ACCESS) | \
-    (1ULL << INT_IDN_REFILL) | \
-    (1ULL << INT_UDN_REFILL) | \
-    (1ULL << INT_IDN_COMPLETE) | \
-    (1ULL << INT_UDN_COMPLETE) | \
-    (1ULL << INT_SWINT_3) | \
-    (1ULL << INT_SWINT_2) | \
-    (1ULL << INT_SWINT_1) | \
-    (1ULL << INT_SWINT_0) | \
-    (1ULL << INT_UNALIGN_DATA) | \
-    (1ULL << INT_DTLB_MISS) | \
-    (1ULL << INT_DTLB_ACCESS) | \
-    (1ULL << INT_SN_STATIC_ACCESS) | \
+    INT_MASK(INT_ITLB_MISS) | \
+    INT_MASK(INT_ILL) | \
+    INT_MASK(INT_GPV) | \
+    INT_MASK(INT_SN_ACCESS) | \
+    INT_MASK(INT_IDN_ACCESS) | \
+    INT_MASK(INT_UDN_ACCESS) | \
+    INT_MASK(INT_IDN_REFILL) | \
+    INT_MASK(INT_UDN_REFILL) | \
+    INT_MASK(INT_IDN_COMPLETE) | \
+    INT_MASK(INT_UDN_COMPLETE) | \
+    INT_MASK(INT_SWINT_3) | \
+    INT_MASK(INT_SWINT_2) | \
+    INT_MASK(INT_SWINT_1) | \
+    INT_MASK(INT_SWINT_0) | \
+    INT_MASK(INT_UNALIGN_DATA) | \
+    INT_MASK(INT_DTLB_MISS) | \
+    INT_MASK(INT_DTLB_ACCESS) | \
+    INT_MASK(INT_SN_STATIC_ACCESS) | \
     0)
 #define CRITICAL_MASKED_INTERRUPTS ( \
-    (1ULL << INT_MEM_ERROR) | \
-    (1ULL << INT_DMATLB_MISS) | \
-    (1ULL << INT_DMATLB_ACCESS) | \
-    (1ULL << INT_SNITLB_MISS) | \
-    (1ULL << INT_SN_NOTIFY) | \
-    (1ULL << INT_SN_FIREWALL) | \
-    (1ULL << INT_IDN_FIREWALL) | \
-    (1ULL << INT_UDN_FIREWALL) | \
-    (1ULL << INT_TILE_TIMER) | \
-    (1ULL << INT_IDN_TIMER) | \
-    (1ULL << INT_UDN_TIMER) | \
-    (1ULL << INT_DMA_NOTIFY) | \
-    (1ULL << INT_IDN_CA) | \
-    (1ULL << INT_UDN_CA) | \
-    (1ULL << INT_IDN_AVAIL) | \
-    (1ULL << INT_UDN_AVAIL) | \
-    (1ULL << INT_PERF_COUNT) | \
-    (1ULL << INT_INTCTRL_3) | \
-    (1ULL << INT_INTCTRL_2) | \
-    (1ULL << INT_INTCTRL_1) | \
-    (1ULL << INT_INTCTRL_0) | \
-    (1ULL << INT_AUX_PERF_COUNT) | \
+    INT_MASK(INT_MEM_ERROR) | \
+    INT_MASK(INT_DMATLB_MISS) | \
+    INT_MASK(INT_DMATLB_ACCESS) | \
+    INT_MASK(INT_SNITLB_MISS) | \
+    INT_MASK(INT_SN_NOTIFY) | \
+    INT_MASK(INT_SN_FIREWALL) | \
+    INT_MASK(INT_IDN_FIREWALL) | \
+    INT_MASK(INT_UDN_FIREWALL) | \
+    INT_MASK(INT_TILE_TIMER) | \
+    INT_MASK(INT_IDN_TIMER) | \
+    INT_MASK(INT_UDN_TIMER) | \
+    INT_MASK(INT_DMA_NOTIFY) | \
+    INT_MASK(INT_IDN_CA) | \
+    INT_MASK(INT_UDN_CA) | \
+    INT_MASK(INT_IDN_AVAIL) | \
+    INT_MASK(INT_UDN_AVAIL) | \
+    INT_MASK(INT_PERF_COUNT) | \
+    INT_MASK(INT_INTCTRL_3) | \
+    INT_MASK(INT_INTCTRL_2) | \
+    INT_MASK(INT_INTCTRL_1) | \
+    INT_MASK(INT_INTCTRL_0) | \
+    INT_MASK(INT_AUX_PERF_COUNT) | \
     0)
 #define CRITICAL_UNMASKED_INTERRUPTS ( \
-    (1ULL << INT_ITLB_MISS) | \
-    (1ULL << INT_ILL) | \
-    (1ULL << INT_GPV) | \
-    (1ULL << INT_SN_ACCESS) | \
-    (1ULL << INT_IDN_ACCESS) | \
-    (1ULL << INT_UDN_ACCESS) | \
-    (1ULL << INT_IDN_REFILL) | \
-    (1ULL << INT_UDN_REFILL) | \
-    (1ULL << INT_IDN_COMPLETE) | \
-    (1ULL << INT_UDN_COMPLETE) | \
-    (1ULL << INT_SWINT_3) | \
-    (1ULL << INT_SWINT_2) | \
-    (1ULL << INT_SWINT_1) | \
-    (1ULL << INT_SWINT_0) | \
-    (1ULL << INT_UNALIGN_DATA) | \
-    (1ULL << INT_DTLB_MISS) | \
-    (1ULL << INT_DTLB_ACCESS) | \
-    (1ULL << INT_BOOT_ACCESS) | \
-    (1ULL << INT_WORLD_ACCESS) | \
-    (1ULL << INT_I_ASID) | \
-    (1ULL << INT_D_ASID) | \
-    (1ULL << INT_DMA_ASID) | \
-    (1ULL << INT_SNI_ASID) | \
-    (1ULL << INT_DMA_CPL) | \
-    (1ULL << INT_SN_CPL) | \
-    (1ULL << INT_DOUBLE_FAULT) | \
-    (1ULL << INT_SN_STATIC_ACCESS) | \
+    INT_MASK(INT_ITLB_MISS) | \
+    INT_MASK(INT_ILL) | \
+    INT_MASK(INT_GPV) | \
+    INT_MASK(INT_SN_ACCESS) | \
+    INT_MASK(INT_IDN_ACCESS) | \
+    INT_MASK(INT_UDN_ACCESS) | \
+    INT_MASK(INT_IDN_REFILL) | \
+    INT_MASK(INT_UDN_REFILL) | \
+    INT_MASK(INT_IDN_COMPLETE) | \
+    INT_MASK(INT_UDN_COMPLETE) | \
+    INT_MASK(INT_SWINT_3) | \
+    INT_MASK(INT_SWINT_2) | \
+    INT_MASK(INT_SWINT_1) | \
+    INT_MASK(INT_SWINT_0) | \
+    INT_MASK(INT_UNALIGN_DATA) | \
+    INT_MASK(INT_DTLB_MISS) | \
+    INT_MASK(INT_DTLB_ACCESS) | \
+    INT_MASK(INT_BOOT_ACCESS) | \
+    INT_MASK(INT_WORLD_ACCESS) | \
+    INT_MASK(INT_I_ASID) | \
+    INT_MASK(INT_D_ASID) | \
+    INT_MASK(INT_DMA_ASID) | \
+    INT_MASK(INT_SNI_ASID) | \
+    INT_MASK(INT_DMA_CPL) | \
+    INT_MASK(INT_SN_CPL) | \
+    INT_MASK(INT_DOUBLE_FAULT) | \
+    INT_MASK(INT_SN_STATIC_ACCESS) | \
     0)
 #define MASKABLE_INTERRUPTS ( \
-    (1ULL << INT_MEM_ERROR) | \
-    (1ULL << INT_IDN_REFILL) | \
-    (1ULL << INT_UDN_REFILL) | \
-    (1ULL << INT_IDN_COMPLETE) | \
-    (1ULL << INT_UDN_COMPLETE) | \
-    (1ULL << INT_DMATLB_MISS) | \
-    (1ULL << INT_DMATLB_ACCESS) | \
-    (1ULL << INT_SNITLB_MISS) | \
-    (1ULL << INT_SN_NOTIFY) | \
-    (1ULL << INT_SN_FIREWALL) | \
-    (1ULL << INT_IDN_FIREWALL) | \
-    (1ULL << INT_UDN_FIREWALL) | \
-    (1ULL << INT_TILE_TIMER) | \
-    (1ULL << INT_IDN_TIMER) | \
-    (1ULL << INT_UDN_TIMER) | \
-    (1ULL << INT_DMA_NOTIFY) | \
-    (1ULL << INT_IDN_CA) | \
-    (1ULL << INT_UDN_CA) | \
-    (1ULL << INT_IDN_AVAIL) | \
-    (1ULL << INT_UDN_AVAIL) | \
-    (1ULL << INT_PERF_COUNT) | \
-    (1ULL << INT_INTCTRL_3) | \
-    (1ULL << INT_INTCTRL_2) | \
-    (1ULL << INT_INTCTRL_1) | \
-    (1ULL << INT_INTCTRL_0) | \
-    (1ULL << INT_AUX_PERF_COUNT) | \
+    INT_MASK(INT_MEM_ERROR) | \
+    INT_MASK(INT_IDN_REFILL) | \
+    INT_MASK(INT_UDN_REFILL) | \
+    INT_MASK(INT_IDN_COMPLETE) | \
+    INT_MASK(INT_UDN_COMPLETE) | \
+    INT_MASK(INT_DMATLB_MISS) | \
+    INT_MASK(INT_DMATLB_ACCESS) | \
+    INT_MASK(INT_SNITLB_MISS) | \
+    INT_MASK(INT_SN_NOTIFY) | \
+    INT_MASK(INT_SN_FIREWALL) | \
+    INT_MASK(INT_IDN_FIREWALL) | \
+    INT_MASK(INT_UDN_FIREWALL) | \
+    INT_MASK(INT_TILE_TIMER) | \
+    INT_MASK(INT_IDN_TIMER) | \
+    INT_MASK(INT_UDN_TIMER) | \
+    INT_MASK(INT_DMA_NOTIFY) | \
+    INT_MASK(INT_IDN_CA) | \
+    INT_MASK(INT_UDN_CA) | \
+    INT_MASK(INT_IDN_AVAIL) | \
+    INT_MASK(INT_UDN_AVAIL) | \
+    INT_MASK(INT_PERF_COUNT) | \
+    INT_MASK(INT_INTCTRL_3) | \
+    INT_MASK(INT_INTCTRL_2) | \
+    INT_MASK(INT_INTCTRL_1) | \
+    INT_MASK(INT_INTCTRL_0) | \
+    INT_MASK(INT_AUX_PERF_COUNT) | \
     0)
 #define UNMASKABLE_INTERRUPTS ( \
-    (1ULL << INT_ITLB_MISS) | \
-    (1ULL << INT_ILL) | \
-    (1ULL << INT_GPV) | \
-    (1ULL << INT_SN_ACCESS) | \
-    (1ULL << INT_IDN_ACCESS) | \
-    (1ULL << INT_UDN_ACCESS) | \
-    (1ULL << INT_SWINT_3) | \
-    (1ULL << INT_SWINT_2) | \
-    (1ULL << INT_SWINT_1) | \
-    (1ULL << INT_SWINT_0) | \
-    (1ULL << INT_UNALIGN_DATA) | \
-    (1ULL << INT_DTLB_MISS) | \
-    (1ULL << INT_DTLB_ACCESS) | \
-    (1ULL << INT_BOOT_ACCESS) | \
-    (1ULL << INT_WORLD_ACCESS) | \
-    (1ULL << INT_I_ASID) | \
-    (1ULL << INT_D_ASID) | \
-    (1ULL << INT_DMA_ASID) | \
-    (1ULL << INT_SNI_ASID) | \
-    (1ULL << INT_DMA_CPL) | \
-    (1ULL << INT_SN_CPL) | \
-    (1ULL << INT_DOUBLE_FAULT) | \
-    (1ULL << INT_SN_STATIC_ACCESS) | \
+    INT_MASK(INT_ITLB_MISS) | \
+    INT_MASK(INT_ILL) | \
+    INT_MASK(INT_GPV) | \
+    INT_MASK(INT_SN_ACCESS) | \
+    INT_MASK(INT_IDN_ACCESS) | \
+    INT_MASK(INT_UDN_ACCESS) | \
+    INT_MASK(INT_SWINT_3) | \
+    INT_MASK(INT_SWINT_2) | \
+    INT_MASK(INT_SWINT_1) | \
+    INT_MASK(INT_SWINT_0) | \
+    INT_MASK(INT_UNALIGN_DATA) | \
+    INT_MASK(INT_DTLB_MISS) | \
+    INT_MASK(INT_DTLB_ACCESS) | \
+    INT_MASK(INT_BOOT_ACCESS) | \
+    INT_MASK(INT_WORLD_ACCESS) | \
+    INT_MASK(INT_I_ASID) | \
+    INT_MASK(INT_D_ASID) | \
+    INT_MASK(INT_DMA_ASID) | \
+    INT_MASK(INT_SNI_ASID) | \
+    INT_MASK(INT_DMA_CPL) | \
+    INT_MASK(INT_SN_CPL) | \
+    INT_MASK(INT_DOUBLE_FAULT) | \
+    INT_MASK(INT_SN_STATIC_ACCESS) | \
     0)
 #define SYNC_INTERRUPTS ( \
-    (1ULL << INT_ITLB_MISS) | \
-    (1ULL << INT_ILL) | \
-    (1ULL << INT_GPV) | \
-    (1ULL << INT_SN_ACCESS) | \
-    (1ULL << INT_IDN_ACCESS) | \
-    (1ULL << INT_UDN_ACCESS) | \
-    (1ULL << INT_IDN_REFILL) | \
-    (1ULL << INT_UDN_REFILL) | \
-    (1ULL << INT_IDN_COMPLETE) | \
-    (1ULL << INT_UDN_COMPLETE) | \
-    (1ULL << INT_SWINT_3) | \
-    (1ULL << INT_SWINT_2) | \
-    (1ULL << INT_SWINT_1) | \
-    (1ULL << INT_SWINT_0) | \
-    (1ULL << INT_UNALIGN_DATA) | \
-    (1ULL << INT_DTLB_MISS) | \
-    (1ULL << INT_DTLB_ACCESS) | \
-    (1ULL << INT_SN_STATIC_ACCESS) | \
+    INT_MASK(INT_ITLB_MISS) | \
+    INT_MASK(INT_ILL) | \
+    INT_MASK(INT_GPV) | \
+    INT_MASK(INT_SN_ACCESS) | \
+    INT_MASK(INT_IDN_ACCESS) | \
+    INT_MASK(INT_UDN_ACCESS) | \
+    INT_MASK(INT_IDN_REFILL) | \
+    INT_MASK(INT_UDN_REFILL) | \
+    INT_MASK(INT_IDN_COMPLETE) | \
+    INT_MASK(INT_UDN_COMPLETE) | \
+    INT_MASK(INT_SWINT_3) | \
+    INT_MASK(INT_SWINT_2) | \
+    INT_MASK(INT_SWINT_1) | \
+    INT_MASK(INT_SWINT_0) | \
+    INT_MASK(INT_UNALIGN_DATA) | \
+    INT_MASK(INT_DTLB_MISS) | \
+    INT_MASK(INT_DTLB_ACCESS) | \
+    INT_MASK(INT_SN_STATIC_ACCESS) | \
     0)
 #define NON_SYNC_INTERRUPTS ( \
-    (1ULL << INT_MEM_ERROR) | \
-    (1ULL << INT_DMATLB_MISS) | \
-    (1ULL << INT_DMATLB_ACCESS) | \
-    (1ULL << INT_SNITLB_MISS) | \
-    (1ULL << INT_SN_NOTIFY) | \
-    (1ULL << INT_SN_FIREWALL) | \
-    (1ULL << INT_IDN_FIREWALL) | \
-    (1ULL << INT_UDN_FIREWALL) | \
-    (1ULL << INT_TILE_TIMER) | \
-    (1ULL << INT_IDN_TIMER) | \
-    (1ULL << INT_UDN_TIMER) | \
-    (1ULL << INT_DMA_NOTIFY) | \
-    (1ULL << INT_IDN_CA) | \
-    (1ULL << INT_UDN_CA) | \
-    (1ULL << INT_IDN_AVAIL) | \
-    (1ULL << INT_UDN_AVAIL) | \
-    (1ULL << INT_PERF_COUNT) | \
-    (1ULL << INT_INTCTRL_3) | \
-    (1ULL << INT_INTCTRL_2) | \
-    (1ULL << INT_INTCTRL_1) | \
-    (1ULL << INT_INTCTRL_0) | \
-    (1ULL << INT_BOOT_ACCESS) | \
-    (1ULL << INT_WORLD_ACCESS) | \
-    (1ULL << INT_I_ASID) | \
-    (1ULL << INT_D_ASID) | \
-    (1ULL << INT_DMA_ASID) | \
-    (1ULL << INT_SNI_ASID) | \
-    (1ULL << INT_DMA_CPL) | \
-    (1ULL << INT_SN_CPL) | \
-    (1ULL << INT_DOUBLE_FAULT) | \
-    (1ULL << INT_AUX_PERF_COUNT) | \
+    INT_MASK(INT_MEM_ERROR) | \
+    INT_MASK(INT_DMATLB_MISS) | \
+    INT_MASK(INT_DMATLB_ACCESS) | \
+    INT_MASK(INT_SNITLB_MISS) | \
+    INT_MASK(INT_SN_NOTIFY) | \
+    INT_MASK(INT_SN_FIREWALL) | \
+    INT_MASK(INT_IDN_FIREWALL) | \
+    INT_MASK(INT_UDN_FIREWALL) | \
+    INT_MASK(INT_TILE_TIMER) | \
+    INT_MASK(INT_IDN_TIMER) | \
+    INT_MASK(INT_UDN_TIMER) | \
+    INT_MASK(INT_DMA_NOTIFY) | \
+    INT_MASK(INT_IDN_CA) | \
+    INT_MASK(INT_UDN_CA) | \
+    INT_MASK(INT_IDN_AVAIL) | \
+    INT_MASK(INT_UDN_AVAIL) | \
+    INT_MASK(INT_PERF_COUNT) | \
+    INT_MASK(INT_INTCTRL_3) | \
+    INT_MASK(INT_INTCTRL_2) | \
+    INT_MASK(INT_INTCTRL_1) | \
+    INT_MASK(INT_INTCTRL_0) | \
+    INT_MASK(INT_BOOT_ACCESS) | \
+    INT_MASK(INT_WORLD_ACCESS) | \
+    INT_MASK(INT_I_ASID) | \
+    INT_MASK(INT_D_ASID) | \
+    INT_MASK(INT_DMA_ASID) | \
+    INT_MASK(INT_SNI_ASID) | \
+    INT_MASK(INT_DMA_CPL) | \
+    INT_MASK(INT_SN_CPL) | \
+    INT_MASK(INT_DOUBLE_FAULT) | \
+    INT_MASK(INT_AUX_PERF_COUNT) | \
     0)
 #endif /* !__ASSEMBLER__ */
 #endif /* !__ARCH_INTERRUPTS_H__ */
diff --git a/trunk/arch/tile/include/uapi/arch/interrupts_64.h b/trunk/arch/tile/include/uapi/arch/interrupts_64.h
index 13c9f9182348..5bb58b2e4e6f 100644
--- a/trunk/arch/tile/include/uapi/arch/interrupts_64.h
+++ b/trunk/arch/tile/include/uapi/arch/interrupts_64.h
@@ -15,7 +15,6 @@
 #ifndef __ARCH_INTERRUPTS_H__
 #define __ARCH_INTERRUPTS_H__
 
-#ifndef __KERNEL__
 /** Mask for an interrupt. */
 #ifdef __ASSEMBLER__
 /* Note: must handle breaking interrupts into high and low words manually. */
@@ -23,7 +22,6 @@
 #else
 #define INT_MASK(intno) (1ULL << (intno))
 #endif
-#endif
 
 
 /** Where a given interrupt executes */
@@ -87,192 +85,192 @@
 
 #ifndef __ASSEMBLER__
 #define QUEUED_INTERRUPTS ( \
-    (1ULL << INT_MEM_ERROR) | \
-    (1ULL << INT_IDN_COMPLETE) | \
-    (1ULL << INT_UDN_COMPLETE) | \
-    (1ULL << INT_IDN_FIREWALL) | \
-    (1ULL << INT_UDN_FIREWALL) | \
-    (1ULL << INT_TILE_TIMER) | \
-    (1ULL << INT_AUX_TILE_TIMER) | \
-    (1ULL << INT_IDN_TIMER) | \
-    (1ULL << INT_UDN_TIMER) | \
-    (1ULL << INT_IDN_AVAIL) | \
-    (1ULL << INT_UDN_AVAIL) | \
-    (1ULL << INT_IPI_3) | \
-    (1ULL << INT_IPI_2) | \
-    (1ULL << INT_IPI_1) | \
-    (1ULL << INT_IPI_0) | \
-    (1ULL << INT_PERF_COUNT) | \
-    (1ULL << INT_AUX_PERF_COUNT) | \
-    (1ULL << INT_INTCTRL_3) | \
-    (1ULL << INT_INTCTRL_2) | \
-    (1ULL << INT_INTCTRL_1) | \
-    (1ULL << INT_INTCTRL_0) | \
-    (1ULL << INT_BOOT_ACCESS) | \
-    (1ULL << INT_WORLD_ACCESS) | \
-    (1ULL << INT_I_ASID) | \
-    (1ULL << INT_D_ASID) | \
-    (1ULL << INT_DOUBLE_FAULT) | \
+    INT_MASK(INT_MEM_ERROR) | \
+    INT_MASK(INT_IDN_COMPLETE) | \
+    INT_MASK(INT_UDN_COMPLETE) | \
+    INT_MASK(INT_IDN_FIREWALL) | \
+    INT_MASK(INT_UDN_FIREWALL) | \
+    INT_MASK(INT_TILE_TIMER) | \
+    INT_MASK(INT_AUX_TILE_TIMER) | \
+    INT_MASK(INT_IDN_TIMER) | \
+    INT_MASK(INT_UDN_TIMER) | \
+    INT_MASK(INT_IDN_AVAIL) | \
+    INT_MASK(INT_UDN_AVAIL) | \
+    INT_MASK(INT_IPI_3) | \
+    INT_MASK(INT_IPI_2) | \
+    INT_MASK(INT_IPI_1) | \
+    INT_MASK(INT_IPI_0) | \
+    INT_MASK(INT_PERF_COUNT) | \
+    INT_MASK(INT_AUX_PERF_COUNT) | \
+    INT_MASK(INT_INTCTRL_3) | \
+    INT_MASK(INT_INTCTRL_2) | \
+    INT_MASK(INT_INTCTRL_1) | \
+    INT_MASK(INT_INTCTRL_0) | \
+    INT_MASK(INT_BOOT_ACCESS) | \
+    INT_MASK(INT_WORLD_ACCESS) | \
+    INT_MASK(INT_I_ASID) | \
+    INT_MASK(INT_D_ASID) | \
+    INT_MASK(INT_DOUBLE_FAULT) | \
     0)
 #define NONQUEUED_INTERRUPTS ( \
-    (1ULL << INT_SINGLE_STEP_3) | \
-    (1ULL << INT_SINGLE_STEP_2) | \
-    (1ULL << INT_SINGLE_STEP_1) | \
-    (1ULL << INT_SINGLE_STEP_0) | \
-    (1ULL << INT_ITLB_MISS) | \
-    (1ULL << INT_ILL) | \
-    (1ULL << INT_GPV) | \
-    (1ULL << INT_IDN_ACCESS) | \
-    (1ULL << INT_UDN_ACCESS) | \
-    (1ULL << INT_SWINT_3) | \
-    (1ULL << INT_SWINT_2) | \
-    (1ULL << INT_SWINT_1) | \
-    (1ULL << INT_SWINT_0) | \
-    (1ULL << INT_ILL_TRANS) | \
-    (1ULL << INT_UNALIGN_DATA) | \
-    (1ULL << INT_DTLB_MISS) | \
-    (1ULL << INT_DTLB_ACCESS) | \
+    INT_MASK(INT_SINGLE_STEP_3) | \
+    INT_MASK(INT_SINGLE_STEP_2) | \
+    INT_MASK(INT_SINGLE_STEP_1) | \
+    INT_MASK(INT_SINGLE_STEP_0) | \
+    INT_MASK(INT_ITLB_MISS) | \
+    INT_MASK(INT_ILL) | \
+    INT_MASK(INT_GPV) | \
+    INT_MASK(INT_IDN_ACCESS) | \
+    INT_MASK(INT_UDN_ACCESS) | \
+    INT_MASK(INT_SWINT_3) | \
+    INT_MASK(INT_SWINT_2) | \
+    INT_MASK(INT_SWINT_1) | \
+    INT_MASK(INT_SWINT_0) | \
+    INT_MASK(INT_ILL_TRANS) | \
+    INT_MASK(INT_UNALIGN_DATA) | \
+    INT_MASK(INT_DTLB_MISS) | \
+    INT_MASK(INT_DTLB_ACCESS) | \
     0)
 #define CRITICAL_MASKED_INTERRUPTS ( \
-    (1ULL << INT_MEM_ERROR) | \
-    (1ULL << INT_SINGLE_STEP_3) | \
-    (1ULL << INT_SINGLE_STEP_2) | \
-    (1ULL << INT_SINGLE_STEP_1) | \
-    (1ULL << INT_SINGLE_STEP_0) | \
-    (1ULL << INT_IDN_COMPLETE) | \
-    (1ULL << INT_UDN_COMPLETE) | \
-    (1ULL << INT_IDN_FIREWALL) | \
-    (1ULL << INT_UDN_FIREWALL) | \
-    (1ULL << INT_TILE_TIMER) | \
-    (1ULL << INT_AUX_TILE_TIMER) | \
-    (1ULL << INT_IDN_TIMER) | \
-    (1ULL << INT_UDN_TIMER) | \
-    (1ULL << INT_IDN_AVAIL) | \
-    (1ULL << INT_UDN_AVAIL) | \
-    (1ULL << INT_IPI_3) | \
-    (1ULL << INT_IPI_2) | \
-    (1ULL << INT_IPI_1) | \
-    (1ULL << INT_IPI_0) | \
-    (1ULL << INT_PERF_COUNT) | \
-    (1ULL << INT_AUX_PERF_COUNT) | \
-    (1ULL << INT_INTCTRL_3) | \
-    (1ULL << INT_INTCTRL_2) | \
-    (1ULL << INT_INTCTRL_1) | \
-    (1ULL << INT_INTCTRL_0) | \
+    INT_MASK(INT_MEM_ERROR) | \
+    INT_MASK(INT_SINGLE_STEP_3) | \
+    INT_MASK(INT_SINGLE_STEP_2) | \
+    INT_MASK(INT_SINGLE_STEP_1) | \
+    INT_MASK(INT_SINGLE_STEP_0) | \
+    INT_MASK(INT_IDN_COMPLETE) | \
+    INT_MASK(INT_UDN_COMPLETE) | \
+    INT_MASK(INT_IDN_FIREWALL) | \
+    INT_MASK(INT_UDN_FIREWALL) | \
+    INT_MASK(INT_TILE_TIMER) | \
+    INT_MASK(INT_AUX_TILE_TIMER) | \
+    INT_MASK(INT_IDN_TIMER) | \
+    INT_MASK(INT_UDN_TIMER) | \
+    INT_MASK(INT_IDN_AVAIL) | \
+    INT_MASK(INT_UDN_AVAIL) | \
+    INT_MASK(INT_IPI_3) | \
+    INT_MASK(INT_IPI_2) | \
+    INT_MASK(INT_IPI_1) | \
+    INT_MASK(INT_IPI_0) | \
+    INT_MASK(INT_PERF_COUNT) | \
+    INT_MASK(INT_AUX_PERF_COUNT) | \
+    INT_MASK(INT_INTCTRL_3) | \
+    INT_MASK(INT_INTCTRL_2) | \
+    INT_MASK(INT_INTCTRL_1) | \
+    INT_MASK(INT_INTCTRL_0) | \
     0)
 #define CRITICAL_UNMASKED_INTERRUPTS ( \
-    (1ULL << INT_ITLB_MISS) | \
-    (1ULL << INT_ILL) | \
-    (1ULL << INT_GPV) | \
-    (1ULL << INT_IDN_ACCESS) | \
-    (1ULL << INT_UDN_ACCESS) | \
-    (1ULL << INT_SWINT_3) | \
-    (1ULL << INT_SWINT_2) | \
-    (1ULL << INT_SWINT_1) | \
-    (1ULL << INT_SWINT_0) | \
-    (1ULL << INT_ILL_TRANS) | \
-    (1ULL << INT_UNALIGN_DATA) | \
-    (1ULL << INT_DTLB_MISS) | \
-    (1ULL << INT_DTLB_ACCESS) | \
-    (1ULL << INT_BOOT_ACCESS) | \
-    (1ULL << INT_WORLD_ACCESS) | \
-    (1ULL << INT_I_ASID) | \
-    (1ULL << INT_D_ASID) | \
-    (1ULL << INT_DOUBLE_FAULT) | \
+    INT_MASK(INT_ITLB_MISS) | \
+    INT_MASK(INT_ILL) | \
+    INT_MASK(INT_GPV) | \
+    INT_MASK(INT_IDN_ACCESS) | \
+    INT_MASK(INT_UDN_ACCESS) | \
+    INT_MASK(INT_SWINT_3) | \
+    INT_MASK(INT_SWINT_2) | \
+    INT_MASK(INT_SWINT_1) | \
+    INT_MASK(INT_SWINT_0) | \
+    INT_MASK(INT_ILL_TRANS) | \
+    INT_MASK(INT_UNALIGN_DATA) | \
+    INT_MASK(INT_DTLB_MISS) | \
+    INT_MASK(INT_DTLB_ACCESS) | \
+    INT_MASK(INT_BOOT_ACCESS) | \
+    INT_MASK(INT_WORLD_ACCESS) | \
+    INT_MASK(INT_I_ASID) | \
+    INT_MASK(INT_D_ASID) | \
+    INT_MASK(INT_DOUBLE_FAULT) | \
     0)
 #define MASKABLE_INTERRUPTS ( \
-    (1ULL << INT_MEM_ERROR) | \
-    (1ULL << INT_SINGLE_STEP_3) | \
-    (1ULL << INT_SINGLE_STEP_2) | \
-    (1ULL << INT_SINGLE_STEP_1) | \
-    (1ULL << INT_SINGLE_STEP_0) | \
-    (1ULL << INT_IDN_COMPLETE) | \
-    (1ULL << INT_UDN_COMPLETE) | \
-    (1ULL << INT_IDN_FIREWALL) | \
-    (1ULL << INT_UDN_FIREWALL) | \
-    (1ULL << INT_TILE_TIMER) | \
-    (1ULL << INT_AUX_TILE_TIMER) | \
-    (1ULL << INT_IDN_TIMER) | \
-    (1ULL << INT_UDN_TIMER) | \
-    (1ULL << INT_IDN_AVAIL) | \
-    (1ULL << INT_UDN_AVAIL) | \
-    (1ULL << INT_IPI_3) | \
-    (1ULL << INT_IPI_2) | \
-    (1ULL << INT_IPI_1) | \
-    (1ULL << INT_IPI_0) | \
-    (1ULL << INT_PERF_COUNT) | \
-    (1ULL << INT_AUX_PERF_COUNT) | \
-    (1ULL << INT_INTCTRL_3) | \
-    (1ULL << INT_INTCTRL_2) | \
-    (1ULL << INT_INTCTRL_1) | \
-    (1ULL << INT_INTCTRL_0) | \
+    INT_MASK(INT_MEM_ERROR) | \
+    INT_MASK(INT_SINGLE_STEP_3) | \
+    INT_MASK(INT_SINGLE_STEP_2) | \
+    INT_MASK(INT_SINGLE_STEP_1) | \
+    INT_MASK(INT_SINGLE_STEP_0) | \
+    INT_MASK(INT_IDN_COMPLETE) | \
+    INT_MASK(INT_UDN_COMPLETE) | \
+    INT_MASK(INT_IDN_FIREWALL) | \
+    INT_MASK(INT_UDN_FIREWALL) | \
+    INT_MASK(INT_TILE_TIMER) | \
+    INT_MASK(INT_AUX_TILE_TIMER) | \
+    INT_MASK(INT_IDN_TIMER) | \
+    INT_MASK(INT_UDN_TIMER) | \
+    INT_MASK(INT_IDN_AVAIL) | \
+    INT_MASK(INT_UDN_AVAIL) | \
+    INT_MASK(INT_IPI_3) | \
+    INT_MASK(INT_IPI_2) | \
+    INT_MASK(INT_IPI_1) | \
+    INT_MASK(INT_IPI_0) | \
+    INT_MASK(INT_PERF_COUNT) | \
+    INT_MASK(INT_AUX_PERF_COUNT) | \
+    INT_MASK(INT_INTCTRL_3) | \
+    INT_MASK(INT_INTCTRL_2) | \
+    INT_MASK(INT_INTCTRL_1) | \
+    INT_MASK(INT_INTCTRL_0) | \
     0)
 #define UNMASKABLE_INTERRUPTS ( \
-    (1ULL << INT_ITLB_MISS) | \
-    (1ULL << INT_ILL) | \
-    (1ULL << INT_GPV) | \
-    (1ULL << INT_IDN_ACCESS) | \
-    (1ULL << INT_UDN_ACCESS) | \
-    (1ULL << INT_SWINT_3) | \
-    (1ULL << INT_SWINT_2) | \
-    (1ULL << INT_SWINT_1) | \
-    (1ULL << INT_SWINT_0) | \
-    (1ULL << INT_ILL_TRANS) | \
-    (1ULL << INT_UNALIGN_DATA) | \
-    (1ULL << INT_DTLB_MISS) | \
-    (1ULL << INT_DTLB_ACCESS) | \
-    (1ULL << INT_BOOT_ACCESS) | \
-    (1ULL << INT_WORLD_ACCESS) | \
-    (1ULL << INT_I_ASID) | \
-    (1ULL << INT_D_ASID) | \
-    (1ULL << INT_DOUBLE_FAULT) | \
+    INT_MASK(INT_ITLB_MISS) | \
+    INT_MASK(INT_ILL) | \
+    INT_MASK(INT_GPV) | \
+    INT_MASK(INT_IDN_ACCESS) | \
+    INT_MASK(INT_UDN_ACCESS) | \
+    INT_MASK(INT_SWINT_3) | \
+    INT_MASK(INT_SWINT_2) | \
+    INT_MASK(INT_SWINT_1) | \
+    INT_MASK(INT_SWINT_0) | \
+    INT_MASK(INT_ILL_TRANS) | \
+    INT_MASK(INT_UNALIGN_DATA) | \
+    INT_MASK(INT_DTLB_MISS) | \
+    INT_MASK(INT_DTLB_ACCESS) | \
+    INT_MASK(INT_BOOT_ACCESS) | \
+    INT_MASK(INT_WORLD_ACCESS) | \
+    INT_MASK(INT_I_ASID) | \
+    INT_MASK(INT_D_ASID) | \
+    INT_MASK(INT_DOUBLE_FAULT) | \
     0)
 #define SYNC_INTERRUPTS ( \
-    (1ULL << INT_SINGLE_STEP_3) | \
-    (1ULL << INT_SINGLE_STEP_2) | \
-    (1ULL << INT_SINGLE_STEP_1) | \
-    (1ULL << INT_SINGLE_STEP_0) | \
-    (1ULL << INT_IDN_COMPLETE) | \
-    (1ULL << INT_UDN_COMPLETE) | \
-    (1ULL << INT_ITLB_MISS) | \
-    (1ULL << INT_ILL) | \
-    (1ULL << INT_GPV) | \
-    (1ULL << INT_IDN_ACCESS) | \
-    (1ULL << INT_UDN_ACCESS) | \
-    (1ULL << INT_SWINT_3) | \
-    (1ULL << INT_SWINT_2) | \
-    (1ULL << INT_SWINT_1) | \
-    (1ULL << INT_SWINT_0) | \
-    (1ULL << INT_ILL_TRANS) | \
-    (1ULL << INT_UNALIGN_DATA) | \
-    (1ULL << INT_DTLB_MISS) | \
-    (1ULL << INT_DTLB_ACCESS) | \
+    INT_MASK(INT_SINGLE_STEP_3) | \
+    INT_MASK(INT_SINGLE_STEP_2) | \
+    INT_MASK(INT_SINGLE_STEP_1) | \
+    INT_MASK(INT_SINGLE_STEP_0) | \
+    INT_MASK(INT_IDN_COMPLETE) | \
+    INT_MASK(INT_UDN_COMPLETE) | \
+    INT_MASK(INT_ITLB_MISS) | \
+    INT_MASK(INT_ILL) | \
+    INT_MASK(INT_GPV) | \
+    INT_MASK(INT_IDN_ACCESS) | \
+    INT_MASK(INT_UDN_ACCESS) | \
+    INT_MASK(INT_SWINT_3) | \
+    INT_MASK(INT_SWINT_2) | \
+    INT_MASK(INT_SWINT_1) | \
+    INT_MASK(INT_SWINT_0) | \
+    INT_MASK(INT_ILL_TRANS) | \
+    INT_MASK(INT_UNALIGN_DATA) | \
+    INT_MASK(INT_DTLB_MISS) | \
+    INT_MASK(INT_DTLB_ACCESS) | \
     0)
 #define NON_SYNC_INTERRUPTS ( \
-    (1ULL << INT_MEM_ERROR) | \
-    (1ULL << INT_IDN_FIREWALL) | \
-    (1ULL << INT_UDN_FIREWALL) | \
-    (1ULL << INT_TILE_TIMER) | \
-    (1ULL << INT_AUX_TILE_TIMER) | \
-    (1ULL << INT_IDN_TIMER) | \
-    (1ULL << INT_UDN_TIMER) | \
-    (1ULL << INT_IDN_AVAIL) | \
-    (1ULL << INT_UDN_AVAIL) | \
-    (1ULL << INT_IPI_3) | \
-    (1ULL << INT_IPI_2) | \
-    (1ULL << INT_IPI_1) | \
-    (1ULL << INT_IPI_0) | \
-    (1ULL << INT_PERF_COUNT) | \
-    (1ULL << INT_AUX_PERF_COUNT) | \
-    (1ULL << INT_INTCTRL_3) | \
-    (1ULL << INT_INTCTRL_2) | \
-    (1ULL << INT_INTCTRL_1) | \
-    (1ULL << INT_INTCTRL_0) | \
-    (1ULL << INT_BOOT_ACCESS) | \
-    (1ULL << INT_WORLD_ACCESS) | \
-    (1ULL << INT_I_ASID) | \
-    (1ULL << INT_D_ASID) | \
-    (1ULL << INT_DOUBLE_FAULT) | \
+    INT_MASK(INT_MEM_ERROR) | \
+    INT_MASK(INT_IDN_FIREWALL) | \
+    INT_MASK(INT_UDN_FIREWALL) | \
+    INT_MASK(INT_TILE_TIMER) | \
+    INT_MASK(INT_AUX_TILE_TIMER) | \
+    INT_MASK(INT_IDN_TIMER) | \
+    INT_MASK(INT_UDN_TIMER) | \
+    INT_MASK(INT_IDN_AVAIL) | \
+    INT_MASK(INT_UDN_AVAIL) | \
+    INT_MASK(INT_IPI_3) | \
+    INT_MASK(INT_IPI_2) | \
+    INT_MASK(INT_IPI_1) | \
+    INT_MASK(INT_IPI_0) | \
+    INT_MASK(INT_PERF_COUNT) | \
+    INT_MASK(INT_AUX_PERF_COUNT) | \
+    INT_MASK(INT_INTCTRL_3) | \
+    INT_MASK(INT_INTCTRL_2) | \
+    INT_MASK(INT_INTCTRL_1) | \
+    INT_MASK(INT_INTCTRL_0) | \
+    INT_MASK(INT_BOOT_ACCESS) | \
+    INT_MASK(INT_WORLD_ACCESS) | \
+    INT_MASK(INT_I_ASID) | \
+    INT_MASK(INT_D_ASID) | \
+    INT_MASK(INT_DOUBLE_FAULT) | \
     0)
 #endif /* !__ASSEMBLER__ */
 #endif /* !__ARCH_INTERRUPTS_H__ */
diff --git a/trunk/arch/tile/kernel/intvec_64.S b/trunk/arch/tile/kernel/intvec_64.S
index 4ea080902654..54bc9a6678e8 100644
--- a/trunk/arch/tile/kernel/intvec_64.S
+++ b/trunk/arch/tile/kernel/intvec_64.S
@@ -1035,9 +1035,7 @@ handle_syscall:
 	/* Ensure that the syscall number is within the legal range. */
 	{
 	 moveli r20, hw2(sys_call_table)
-#ifdef CONFIG_COMPAT
 	 blbs   r30, .Lcompat_syscall
-#endif
 	}
 	{
 	 cmpltu r21, TREG_SYSCALL_NR_NAME, r21
@@ -1095,7 +1093,6 @@ handle_syscall:
 	 j      .Lresume_userspace   /* jump into middle of interrupt_return */
 	}
 
-#ifdef CONFIG_COMPAT
 .Lcompat_syscall:
 	/*
 	 * Load the base of the compat syscall table in r20, and
@@ -1120,7 +1117,6 @@ handle_syscall:
 	{ move r15, r4; addxi r4, r4, 0 }
 	{ move r16, r5; addxi r5, r5, 0 }
 	j .Lload_syscall_pointer
-#endif
 
 .Linvalid_syscall:
 	/* Report an invalid syscall back to the user program */
diff --git a/trunk/arch/tile/kernel/process.c b/trunk/arch/tile/kernel/process.c
index caf93ae11793..0e5661e7d00d 100644
--- a/trunk/arch/tile/kernel/process.c
+++ b/trunk/arch/tile/kernel/process.c
@@ -159,7 +159,7 @@ static void save_arch_state(struct thread_struct *t);
 int copy_thread(unsigned long clone_flags, unsigned long sp,
 		unsigned long arg, struct task_struct *p)
 {
-	struct pt_regs *childregs = task_pt_regs(p);
+	struct pt_regs *childregs = task_pt_regs(p), *regs = current_pt_regs();
 	unsigned long ksp;
 	unsigned long *callee_regs;
 
diff --git a/trunk/arch/tile/kernel/reboot.c b/trunk/arch/tile/kernel/reboot.c
index d1b5c913ae72..baa3d905fee2 100644
--- a/trunk/arch/tile/kernel/reboot.c
+++ b/trunk/arch/tile/kernel/reboot.c
@@ -16,7 +16,6 @@
 #include <linux/reboot.h>
 #include <linux/smp.h>
 #include <linux/pm.h>
-#include <linux/export.h>
 #include <asm/page.h>
 #include <asm/setup.h>
 #include <hv/hypervisor.h>
@@ -50,4 +49,3 @@ void machine_restart(char *cmd)
 
 /* No interesting distinction to be made here. */
 void (*pm_power_off)(void) = NULL;
-EXPORT_SYMBOL(pm_power_off);
diff --git a/trunk/arch/tile/kernel/setup.c b/trunk/arch/tile/kernel/setup.c
index d1e15f7b59c6..6a649a4462d3 100644
--- a/trunk/arch/tile/kernel/setup.c
+++ b/trunk/arch/tile/kernel/setup.c
@@ -31,7 +31,6 @@
 #include <linux/timex.h>
 #include <linux/hugetlb.h>
 #include <linux/start_kernel.h>
-#include <linux/screen_info.h>
 #include <asm/setup.h>
 #include <asm/sections.h>
 #include <asm/cacheflush.h>
@@ -50,10 +49,6 @@ static inline int ABS(int x) { return x >= 0 ? x : -x; }
 /* Chip information */
 char chip_model[64] __write_once;
 
-#ifdef CONFIG_VT
-struct screen_info screen_info;
-#endif
-
 struct pglist_data node_data[MAX_NUMNODES] __read_mostly;
 EXPORT_SYMBOL(node_data);
 
diff --git a/trunk/arch/tile/kernel/stack.c b/trunk/arch/tile/kernel/stack.c
index ed258b8ae320..b2f44c28dda6 100644
--- a/trunk/arch/tile/kernel/stack.c
+++ b/trunk/arch/tile/kernel/stack.c
@@ -112,7 +112,7 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt)
 		       p->pc, p->sp, p->ex1);
 		p = NULL;
 	}
-	if (!kbt->profile || ((1ULL << p->faultnum) & QUEUED_INTERRUPTS) == 0)
+	if (!kbt->profile || (INT_MASK(p->faultnum) & QUEUED_INTERRUPTS) == 0)
 		return p;
 	return NULL;
 }
@@ -484,7 +484,6 @@ void save_stack_trace(struct stack_trace *trace)
 {
 	save_stack_trace_tsk(NULL, trace);
 }
-EXPORT_SYMBOL_GPL(save_stack_trace);
 
 #endif
 
diff --git a/trunk/arch/tile/lib/cacheflush.c b/trunk/arch/tile/lib/cacheflush.c
index 8f8ad814b139..db4fb89e12d8 100644
--- a/trunk/arch/tile/lib/cacheflush.c
+++ b/trunk/arch/tile/lib/cacheflush.c
@@ -12,7 +12,6 @@
  *   more details.
  */
 
-#include <linux/export.h>
 #include <asm/page.h>
 #include <asm/cacheflush.h>
 #include <arch/icache.h>
@@ -166,4 +165,3 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh)
 	__insn_mtspr(SPR_DSTREAM_PF, old_dstream_pf);
 #endif
 }
-EXPORT_SYMBOL_GPL(finv_buffer_remote);
diff --git a/trunk/arch/tile/lib/cpumask.c b/trunk/arch/tile/lib/cpumask.c
index 75947edccb26..fdc403614d12 100644
--- a/trunk/arch/tile/lib/cpumask.c
+++ b/trunk/arch/tile/lib/cpumask.c
@@ -16,7 +16,6 @@
 #include <linux/ctype.h>
 #include <linux/errno.h>
 #include <linux/smp.h>
-#include <linux/export.h>
 
 /*
  * Allow cropping out bits beyond the end of the array.
@@ -51,4 +50,3 @@ int bitmap_parselist_crop(const char *bp, unsigned long *maskp, int nmaskbits)
 	} while (*bp != '\0' && *bp != '\n');
 	return 0;
 }
-EXPORT_SYMBOL(bitmap_parselist_crop);
diff --git a/trunk/arch/tile/lib/exports.c b/trunk/arch/tile/lib/exports.c
index 4385cb6fa00a..dd5f0a33fdaf 100644
--- a/trunk/arch/tile/lib/exports.c
+++ b/trunk/arch/tile/lib/exports.c
@@ -55,8 +55,6 @@ EXPORT_SYMBOL(hv_dev_poll_cancel);
 EXPORT_SYMBOL(hv_dev_close);
 EXPORT_SYMBOL(hv_sysconf);
 EXPORT_SYMBOL(hv_confstr);
-EXPORT_SYMBOL(hv_get_rtc);
-EXPORT_SYMBOL(hv_set_rtc);
 
 /* libgcc.a */
 uint32_t __udivsi3(uint32_t dividend, uint32_t divisor);
diff --git a/trunk/arch/tile/mm/homecache.c b/trunk/arch/tile/mm/homecache.c
index 1ae911939a18..5f7868dcd6d4 100644
--- a/trunk/arch/tile/mm/homecache.c
+++ b/trunk/arch/tile/mm/homecache.c
@@ -408,7 +408,6 @@ void homecache_change_page_home(struct page *page, int order, int home)
 		__set_pte(ptep, pte_set_home(pteval, home));
 	}
 }
-EXPORT_SYMBOL(homecache_change_page_home);
 
 struct page *homecache_alloc_pages(gfp_t gfp_mask,
 				   unsigned int order, int home)
diff --git a/trunk/arch/x86/Kconfig b/trunk/arch/x86/Kconfig
index 5e05ee3a9810..225543bf45a5 100644
--- a/trunk/arch/x86/Kconfig
+++ b/trunk/arch/x86/Kconfig
@@ -1,7 +1,7 @@
 # Select 32 or 64 bit
 config 64BIT
 	bool "64-bit kernel" if ARCH = "x86"
-	default ARCH != "i386"
+	default ARCH = "x86_64"
 	---help---
 	  Say yes to build a 64-bit kernel - formerly known as x86_64
 	  Say no to build a 32-bit kernel - formerly known as i386
@@ -28,6 +28,7 @@ config X86
 	select HAVE_OPROFILE
 	select HAVE_PCSPKR_PLATFORM
 	select HAVE_PERF_EVENTS
+	select HAVE_IRQ_WORK
 	select HAVE_IOREMAP_PROT
 	select HAVE_KPROBES
 	select HAVE_MEMBLOCK
@@ -39,12 +40,10 @@ config X86
 	select HAVE_DMA_CONTIGUOUS if !SWIOTLB
 	select HAVE_KRETPROBES
 	select HAVE_OPTPROBES
-	select HAVE_KPROBES_ON_FTRACE
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_FENTRY if X86_64
 	select HAVE_C_RECORDMCOUNT
 	select HAVE_DYNAMIC_FTRACE
-	select HAVE_DYNAMIC_FTRACE_WITH_REGS
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_GRAPH_FP_TEST
@@ -107,7 +106,6 @@ config X86
 	select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
 	select GENERIC_TIME_VSYSCALL if X86_64
 	select KTIME_SCALAR if X86_32
-	select ALWAYS_USE_PERSISTENT_CLOCK
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select HAVE_CONTEXT_TRACKING if X86_64
@@ -116,7 +114,6 @@ config X86
 	select MODULES_USE_ELF_RELA if X86_64
 	select CLONE_BACKWARDS if X86_32
 	select GENERIC_SIGALTSTACK
-	select ARCH_USE_BUILTIN_BSWAP
 
 config INSTRUCTION_DECODER
 	def_bool y
diff --git a/trunk/arch/x86/Makefile b/trunk/arch/x86/Makefile
index 5c477260294f..e71fc4279aab 100644
--- a/trunk/arch/x86/Makefile
+++ b/trunk/arch/x86/Makefile
@@ -2,11 +2,7 @@
 
 # select defconfig based on actual architecture
 ifeq ($(ARCH),x86)
-  ifeq ($(shell uname -m),x86_64)
-        KBUILD_DEFCONFIG := x86_64_defconfig
-  else
         KBUILD_DEFCONFIG := i386_defconfig
-  endif
 else
         KBUILD_DEFCONFIG := $(ARCH)_defconfig
 endif
diff --git a/trunk/arch/x86/boot/compressed/misc.c b/trunk/arch/x86/boot/compressed/misc.c
index 7cb56c6ca351..88f7ff6da404 100644
--- a/trunk/arch/x86/boot/compressed/misc.c
+++ b/trunk/arch/x86/boot/compressed/misc.c
@@ -325,8 +325,6 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
 {
 	real_mode = rmode;
 
-	sanitize_boot_params(real_mode);
-
 	if (real_mode->screen_info.orig_video_mode == 7) {
 		vidmem = (char *) 0xb0000;
 		vidport = 0x3b4;
diff --git a/trunk/arch/x86/boot/compressed/misc.h b/trunk/arch/x86/boot/compressed/misc.h
index 674019d8e235..0e6dc0ee0eea 100644
--- a/trunk/arch/x86/boot/compressed/misc.h
+++ b/trunk/arch/x86/boot/compressed/misc.h
@@ -18,7 +18,6 @@
 #include <asm/page.h>
 #include <asm/boot.h>
 #include <asm/bootparam.h>
-#include <asm/bootparam_utils.h>
 
 #define BOOT_BOOT_H
 #include "../ctype.h"
diff --git a/trunk/arch/x86/configs/i386_defconfig b/trunk/arch/x86/configs/i386_defconfig
index 94447086e551..5598547281a7 100644
--- a/trunk/arch/x86/configs/i386_defconfig
+++ b/trunk/arch/x86/configs/i386_defconfig
@@ -1,4 +1,3 @@
-# CONFIG_64BIT is not set
 CONFIG_EXPERIMENTAL=y
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
diff --git a/trunk/arch/x86/include/asm/bootparam_utils.h b/trunk/arch/x86/include/asm/bootparam_utils.h
deleted file mode 100644
index 5b5e9cb774b5..000000000000
--- a/trunk/arch/x86/include/asm/bootparam_utils.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#ifndef _ASM_X86_BOOTPARAM_UTILS_H
-#define _ASM_X86_BOOTPARAM_UTILS_H
-
-#include <asm/bootparam.h>
-
-/*
- * This file is included from multiple environments.  Do not
- * add completing #includes to make it standalone.
- */
-
-/*
- * Deal with bootloaders which fail to initialize unknown fields in
- * boot_params to zero.  The list fields in this list are taken from
- * analysis of kexec-tools; if other broken bootloaders initialize a
- * different set of fields we will need to figure out how to disambiguate.
- *
- */
-static void sanitize_boot_params(struct boot_params *boot_params)
-{
-	if (boot_params->sentinel) {
-		/*fields in boot_params are not valid, clear them */
-		memset(&boot_params->olpc_ofw_header, 0,
-		       (char *)&boot_params->alt_mem_k -
-			(char *)&boot_params->olpc_ofw_header);
-		memset(&boot_params->kbd_status, 0,
-		       (char *)&boot_params->hdr -
-		       (char *)&boot_params->kbd_status);
-		memset(&boot_params->_pad7[0], 0,
-		       (char *)&boot_params->edd_mbr_sig_buffer[0] -
-			(char *)&boot_params->_pad7[0]);
-		memset(&boot_params->_pad8[0], 0,
-		       (char *)&boot_params->eddbuf[0] -
-			(char *)&boot_params->_pad8[0]);
-		memset(&boot_params->_pad9[0], 0, sizeof(boot_params->_pad9));
-	}
-}
-
-#endif /* _ASM_X86_BOOTPARAM_UTILS_H */
diff --git a/trunk/arch/x86/include/asm/cpufeature.h b/trunk/arch/x86/include/asm/cpufeature.h
index 93fe929d1cee..2d9075e863a0 100644
--- a/trunk/arch/x86/include/asm/cpufeature.h
+++ b/trunk/arch/x86/include/asm/cpufeature.h
@@ -167,7 +167,6 @@
 #define X86_FEATURE_TBM		(6*32+21) /* trailing bit manipulations */
 #define X86_FEATURE_TOPOEXT	(6*32+22) /* topology extensions CPUID leafs */
 #define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */
-#define X86_FEATURE_PERFCTR_NB  (6*32+24) /* NB performance counter extensions */
 
 /*
  * Auxiliary flags: Linux defined - For features scattered in various
@@ -310,7 +309,6 @@ extern const char * const x86_power_flags[32];
 #define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
 #define cpu_has_pclmulqdq	boot_cpu_has(X86_FEATURE_PCLMULQDQ)
 #define cpu_has_perfctr_core	boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
-#define cpu_has_perfctr_nb	boot_cpu_has(X86_FEATURE_PERFCTR_NB)
 #define cpu_has_cx8		boot_cpu_has(X86_FEATURE_CX8)
 #define cpu_has_cx16		boot_cpu_has(X86_FEATURE_CX16)
 #define cpu_has_eager_fpu	boot_cpu_has(X86_FEATURE_EAGER_FPU)
diff --git a/trunk/arch/x86/include/asm/ftrace.h b/trunk/arch/x86/include/asm/ftrace.h
index 86cb51e1ca96..9a25b522d377 100644
--- a/trunk/arch/x86/include/asm/ftrace.h
+++ b/trunk/arch/x86/include/asm/ftrace.h
@@ -44,6 +44,7 @@
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 #define ARCH_SUPPORTS_FTRACE_OPS 1
+#define ARCH_SUPPORTS_FTRACE_SAVE_REGS
 #endif
 
 #ifndef __ASSEMBLY__
diff --git a/trunk/arch/x86/include/asm/hpet.h b/trunk/arch/x86/include/asm/hpet.h
index b18df579c0e9..434e2106cc87 100644
--- a/trunk/arch/x86/include/asm/hpet.h
+++ b/trunk/arch/x86/include/asm/hpet.h
@@ -80,9 +80,9 @@ extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg);
 extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg);
 
 #ifdef CONFIG_PCI_MSI
-extern int default_setup_hpet_msi(unsigned int irq, unsigned int id);
+extern int arch_setup_hpet_msi(unsigned int irq, unsigned int id);
 #else
-static inline int default_setup_hpet_msi(unsigned int irq, unsigned int id)
+static inline int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
 {
 	return -EINVAL;
 }
@@ -111,7 +111,6 @@ extern void hpet_unregister_irq_handler(rtc_irq_handler handler);
 static inline int hpet_enable(void) { return 0; }
 static inline int is_hpet_enabled(void) { return 0; }
 #define hpet_readl(a) 0
-#define default_setup_hpet_msi	NULL
 
 #endif
 #endif /* _ASM_X86_HPET_H */
diff --git a/trunk/arch/x86/include/asm/hw_irq.h b/trunk/arch/x86/include/asm/hw_irq.h
index 10a78c3d3d5a..eb92a6ed2be7 100644
--- a/trunk/arch/x86/include/asm/hw_irq.h
+++ b/trunk/arch/x86/include/asm/hw_irq.h
@@ -101,7 +101,6 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
 	irq_attr->polarity	= polarity;
 }
 
-/* Intel specific interrupt remapping information */
 struct irq_2_iommu {
 	struct intel_iommu *iommu;
 	u16 irte_index;
@@ -109,12 +108,6 @@ struct irq_2_iommu {
 	u8  irte_mask;
 };
 
-/* AMD specific interrupt remapping information */
-struct irq_2_irte {
-	u16 devid; /* Device ID for IRTE table */
-	u16 index; /* Index into IRTE table*/
-};
-
 /*
  * This is performance-critical, we want to do it O(1)
  *
@@ -127,11 +120,7 @@ struct irq_cfg {
 	u8			vector;
 	u8			move_in_progress : 1;
 #ifdef CONFIG_IRQ_REMAP
-	u8			remapped : 1;
-	union {
-		struct irq_2_iommu irq_2_iommu;
-		struct irq_2_irte  irq_2_irte;
-	};
+	struct irq_2_iommu	irq_2_iommu;
 #endif
 };
 
diff --git a/trunk/arch/x86/include/asm/hypervisor.h b/trunk/arch/x86/include/asm/hypervisor.h
index 86095ed14135..b518c7509933 100644
--- a/trunk/arch/x86/include/asm/hypervisor.h
+++ b/trunk/arch/x86/include/asm/hypervisor.h
@@ -25,7 +25,6 @@
 
 extern void init_hypervisor(struct cpuinfo_x86 *c);
 extern void init_hypervisor_platform(void);
-extern bool hypervisor_x2apic_available(void);
 
 /*
  * x86 hypervisor information
@@ -42,9 +41,6 @@ struct hypervisor_x86 {
 
 	/* Platform setup (run once per boot) */
 	void		(*init_platform)(void);
-
-	/* X2APIC detection (run once per boot) */
-	bool		(*x2apic_available)(void);
 };
 
 extern const struct hypervisor_x86 *x86_hyper;
@@ -55,4 +51,13 @@ extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
 extern const struct hypervisor_x86 x86_hyper_xen_hvm;
 extern const struct hypervisor_x86 x86_hyper_kvm;
 
+static inline bool hypervisor_x2apic_available(void)
+{
+	if (kvm_para_available())
+		return true;
+	if (xen_x2apic_para_available())
+		return true;
+	return false;
+}
+
 #endif
diff --git a/trunk/arch/x86/include/asm/io_apic.h b/trunk/arch/x86/include/asm/io_apic.h
index 459e50a424d1..73d8c5398ea9 100644
--- a/trunk/arch/x86/include/asm/io_apic.h
+++ b/trunk/arch/x86/include/asm/io_apic.h
@@ -144,24 +144,11 @@ extern int timer_through_8259;
 	(mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
 
 struct io_apic_irq_attr;
-struct irq_cfg;
 extern int io_apic_set_pci_routing(struct device *dev, int irq,
 		 struct io_apic_irq_attr *irq_attr);
 void setup_IO_APIC_irq_extra(u32 gsi);
 extern void ioapic_insert_resources(void);
 
-extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *,
-				     unsigned int, int,
-				     struct io_apic_irq_attr *);
-extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *,
-				     unsigned int, int,
-				     struct io_apic_irq_attr *);
-extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg);
-
-extern void native_compose_msi_msg(struct pci_dev *pdev,
-				   unsigned int irq, unsigned int dest,
-				   struct msi_msg *msg, u8 hpet_id);
-extern void native_eoi_ioapic_pin(int apic, int pin, int vector);
 int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr);
 
 extern int save_ioapic_entries(void);
@@ -192,12 +179,6 @@ extern void __init native_io_apic_init_mappings(void);
 extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg);
 extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val);
 extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val);
-extern void native_disable_io_apic(void);
-extern void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries);
-extern void intel_ir_io_apic_print_entries(unsigned int apic, unsigned int nr_entries);
-extern int native_ioapic_set_affinity(struct irq_data *,
-				      const struct cpumask *,
-				      bool);
 
 static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
 {
@@ -212,9 +193,6 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
 {
 	x86_io_apic_ops.modify(apic, reg, value);
 }
-
-extern void io_apic_eoi(unsigned int apic, unsigned int vector);
-
 #else  /* !CONFIG_X86_IO_APIC */
 
 #define io_apic_assign_pci_irqs 0
@@ -245,12 +223,6 @@ static inline void disable_ioapic_support(void) { }
 #define native_io_apic_read		NULL
 #define native_io_apic_write		NULL
 #define native_io_apic_modify		NULL
-#define native_disable_io_apic		NULL
-#define native_io_apic_print_entries	NULL
-#define native_ioapic_set_affinity	NULL
-#define native_setup_ioapic_entry	NULL
-#define native_compose_msi_msg		NULL
-#define native_eoi_ioapic_pin		NULL
 #endif
 
 #endif /* _ASM_X86_IO_APIC_H */
diff --git a/trunk/arch/x86/include/asm/irq_remapping.h b/trunk/arch/x86/include/asm/irq_remapping.h
index 95fd3527f632..5fb9bbbd2f14 100644
--- a/trunk/arch/x86/include/asm/irq_remapping.h
+++ b/trunk/arch/x86/include/asm/irq_remapping.h
@@ -26,6 +26,8 @@
 
 #ifdef CONFIG_IRQ_REMAP
 
+extern int irq_remapping_enabled;
+
 extern void setup_irq_remapping_ops(void);
 extern int irq_remapping_supported(void);
 extern int irq_remapping_prepare(void);
@@ -38,20 +40,22 @@ extern int setup_ioapic_remapped_entry(int irq,
 				       unsigned int destination,
 				       int vector,
 				       struct io_apic_irq_attr *attr);
+extern int set_remapped_irq_affinity(struct irq_data *data,
+				     const struct cpumask *mask,
+				     bool force);
 extern void free_remapped_irq(int irq);
 extern void compose_remapped_msi_msg(struct pci_dev *pdev,
 				     unsigned int irq, unsigned int dest,
 				     struct msi_msg *msg, u8 hpet_id);
+extern int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
+extern int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
+				  int index, int sub_handle);
 extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id);
-extern void panic_if_irq_remap(const char *msg);
-extern bool setup_remapped_irq(int irq,
-			       struct irq_cfg *cfg,
-			       struct irq_chip *chip);
-
-void irq_remap_modify_chip_defaults(struct irq_chip *chip);
 
 #else  /* CONFIG_IRQ_REMAP */
 
+#define irq_remapping_enabled	0
+
 static inline void setup_irq_remapping_ops(void) { }
 static inline int irq_remapping_supported(void) { return 0; }
 static inline int irq_remapping_prepare(void) { return -ENODEV; }
@@ -67,30 +71,30 @@ static inline int setup_ioapic_remapped_entry(int irq,
 {
 	return -ENODEV;
 }
+static inline int set_remapped_irq_affinity(struct irq_data *data,
+					    const struct cpumask *mask,
+					    bool force)
+{
+	return 0;
+}
 static inline void free_remapped_irq(int irq) { }
 static inline void compose_remapped_msi_msg(struct pci_dev *pdev,
 					    unsigned int irq, unsigned int dest,
 					    struct msi_msg *msg, u8 hpet_id)
 {
 }
-static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
+static inline int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec)
 {
 	return -ENODEV;
 }
-
-static inline void panic_if_irq_remap(const char *msg)
-{
-}
-
-static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip)
+static inline int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
+					 int index, int sub_handle)
 {
+	return -ENODEV;
 }
-
-static inline bool setup_remapped_irq(int irq,
-				      struct irq_cfg *cfg,
-				      struct irq_chip *chip)
+static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
 {
-	return false;
+	return -ENODEV;
 }
 #endif /* CONFIG_IRQ_REMAP */
 
diff --git a/trunk/arch/x86/include/asm/kvm_para.h b/trunk/arch/x86/include/asm/kvm_para.h
index 65231e173baf..5ed1f16187be 100644
--- a/trunk/arch/x86/include/asm/kvm_para.h
+++ b/trunk/arch/x86/include/asm/kvm_para.h
@@ -85,13 +85,13 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
 	return ret;
 }
 
-static inline bool kvm_para_available(void)
+static inline int kvm_para_available(void)
 {
 	unsigned int eax, ebx, ecx, edx;
 	char signature[13];
 
 	if (boot_cpu_data.cpuid_level < 0)
-		return false;	/* So we don't blow up on old processors */
+		return 0;	/* So we don't blow up on old processors */
 
 	if (cpu_has_hypervisor) {
 		cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx);
@@ -101,10 +101,10 @@ static inline bool kvm_para_available(void)
 		signature[12] = 0;
 
 		if (strcmp(signature, "KVMKVMKVM") == 0)
-			return true;
+			return 1;
 	}
 
-	return false;
+	return 0;
 }
 
 static inline unsigned int kvm_arch_para_features(void)
diff --git a/trunk/arch/x86/include/asm/linkage.h b/trunk/arch/x86/include/asm/linkage.h
index 79327e9483a3..48142971b25d 100644
--- a/trunk/arch/x86/include/asm/linkage.h
+++ b/trunk/arch/x86/include/asm/linkage.h
@@ -27,20 +27,20 @@
 #define __asmlinkage_protect0(ret) \
 	__asmlinkage_protect_n(ret)
 #define __asmlinkage_protect1(ret, arg1) \
-	__asmlinkage_protect_n(ret, "m" (arg1))
+	__asmlinkage_protect_n(ret, "g" (arg1))
 #define __asmlinkage_protect2(ret, arg1, arg2) \
-	__asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2))
+	__asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2))
 #define __asmlinkage_protect3(ret, arg1, arg2, arg3) \
-	__asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3))
+	__asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3))
 #define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \
-	__asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
-			      "m" (arg4))
+	__asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \
+			      "g" (arg4))
 #define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \
-	__asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
-			      "m" (arg4), "m" (arg5))
+	__asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \
+			      "g" (arg4), "g" (arg5))
 #define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \
-	__asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
-			      "m" (arg4), "m" (arg5), "m" (arg6))
+	__asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \
+			      "g" (arg4), "g" (arg5), "g" (arg6))
 
 #endif /* CONFIG_X86_32 */
 
diff --git a/trunk/arch/x86/include/asm/mce.h b/trunk/arch/x86/include/asm/mce.h
index f4076af1f4ed..ecdfee60ee4a 100644
--- a/trunk/arch/x86/include/asm/mce.h
+++ b/trunk/arch/x86/include/asm/mce.h
@@ -3,90 +3,6 @@
 
 #include <uapi/asm/mce.h>
 
-/*
- * Machine Check support for x86
- */
-
-/* MCG_CAP register defines */
-#define MCG_BANKCNT_MASK	0xff         /* Number of Banks */
-#define MCG_CTL_P		(1ULL<<8)    /* MCG_CTL register available */
-#define MCG_EXT_P		(1ULL<<9)    /* Extended registers available */
-#define MCG_CMCI_P		(1ULL<<10)   /* CMCI supported */
-#define MCG_EXT_CNT_MASK	0xff0000     /* Number of Extended registers */
-#define MCG_EXT_CNT_SHIFT	16
-#define MCG_EXT_CNT(c)		(((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
-#define MCG_SER_P		(1ULL<<24)   /* MCA recovery/new status bits */
-
-/* MCG_STATUS register defines */
-#define MCG_STATUS_RIPV  (1ULL<<0)   /* restart ip valid */
-#define MCG_STATUS_EIPV  (1ULL<<1)   /* ip points to correct instruction */
-#define MCG_STATUS_MCIP  (1ULL<<2)   /* machine check in progress */
-
-/* MCi_STATUS register defines */
-#define MCI_STATUS_VAL   (1ULL<<63)  /* valid error */
-#define MCI_STATUS_OVER  (1ULL<<62)  /* previous errors lost */
-#define MCI_STATUS_UC    (1ULL<<61)  /* uncorrected error */
-#define MCI_STATUS_EN    (1ULL<<60)  /* error enabled */
-#define MCI_STATUS_MISCV (1ULL<<59)  /* misc error reg. valid */
-#define MCI_STATUS_ADDRV (1ULL<<58)  /* addr reg. valid */
-#define MCI_STATUS_PCC   (1ULL<<57)  /* processor context corrupt */
-#define MCI_STATUS_S	 (1ULL<<56)  /* Signaled machine check */
-#define MCI_STATUS_AR	 (1ULL<<55)  /* Action required */
-#define MCACOD		  0xffff     /* MCA Error Code */
-
-/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
-#define MCACOD_SCRUB	0x00C0	/* 0xC0-0xCF Memory Scrubbing */
-#define MCACOD_SCRUBMSK	0xfff0
-#define MCACOD_L3WB	0x017A	/* L3 Explicit Writeback */
-#define MCACOD_DATA	0x0134	/* Data Load */
-#define MCACOD_INSTR	0x0150	/* Instruction Fetch */
-
-/* MCi_MISC register defines */
-#define MCI_MISC_ADDR_LSB(m)	((m) & 0x3f)
-#define MCI_MISC_ADDR_MODE(m)	(((m) >> 6) & 7)
-#define  MCI_MISC_ADDR_SEGOFF	0	/* segment offset */
-#define  MCI_MISC_ADDR_LINEAR	1	/* linear address */
-#define  MCI_MISC_ADDR_PHYS	2	/* physical address */
-#define  MCI_MISC_ADDR_MEM	3	/* memory address */
-#define  MCI_MISC_ADDR_GENERIC	7	/* generic */
-
-/* CTL2 register defines */
-#define MCI_CTL2_CMCI_EN		(1ULL << 30)
-#define MCI_CTL2_CMCI_THRESHOLD_MASK	0x7fffULL
-
-#define MCJ_CTX_MASK		3
-#define MCJ_CTX(flags)		((flags) & MCJ_CTX_MASK)
-#define MCJ_CTX_RANDOM		0    /* inject context: random */
-#define MCJ_CTX_PROCESS		0x1  /* inject context: process */
-#define MCJ_CTX_IRQ		0x2  /* inject context: IRQ */
-#define MCJ_NMI_BROADCAST	0x4  /* do NMI broadcasting */
-#define MCJ_EXCEPTION		0x8  /* raise as exception */
-#define MCJ_IRQ_BRAODCAST	0x10 /* do IRQ broadcasting */
-
-#define MCE_OVERFLOW 0		/* bit 0 in flags means overflow */
-
-/* Software defined banks */
-#define MCE_EXTENDED_BANK	128
-#define MCE_THERMAL_BANK	(MCE_EXTENDED_BANK + 0)
-#define K8_MCE_THRESHOLD_BASE   (MCE_EXTENDED_BANK + 1)
-
-#define MCE_LOG_LEN 32
-#define MCE_LOG_SIGNATURE	"MACHINECHECK"
-
-/*
- * This structure contains all data related to the MCE log.  Also
- * carries a signature to make it easier to find from external
- * debugging tools.  Each entry is only valid when its finished flag
- * is set.
- */
-struct mce_log {
-	char signature[12]; /* "MACHINECHECK" */
-	unsigned len;	    /* = MCE_LOG_LEN */
-	unsigned next;
-	unsigned flags;
-	unsigned recordlen;	/* length of struct mce */
-	struct mce entry[MCE_LOG_LEN];
-};
 
 struct mca_config {
 	bool dont_log_ce;
diff --git a/trunk/arch/x86/include/asm/pci.h b/trunk/arch/x86/include/asm/pci.h
index c28fd02f4bf7..dba7805176bf 100644
--- a/trunk/arch/x86/include/asm/pci.h
+++ b/trunk/arch/x86/include/asm/pci.h
@@ -121,12 +121,9 @@ static inline void x86_restore_msi_irqs(struct pci_dev *dev, int irq)
 #define arch_teardown_msi_irq x86_teardown_msi_irq
 #define arch_restore_msi_irqs x86_restore_msi_irqs
 /* implemented in arch/x86/kernel/apic/io_apic. */
-struct msi_desc;
 int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
 void native_teardown_msi_irq(unsigned int irq);
 void native_restore_msi_irqs(struct pci_dev *dev, int irq);
-int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
-		  unsigned int irq_base, unsigned int irq_offset);
 /* default to the implementation in drivers/lib/msi.c */
 #define HAVE_DEFAULT_MSI_TEARDOWN_IRQS
 #define HAVE_DEFAULT_MSI_RESTORE_IRQS
diff --git a/trunk/arch/x86/include/asm/perf_event.h b/trunk/arch/x86/include/asm/perf_event.h
index 57cb63402213..4fabcdf1cfa7 100644
--- a/trunk/arch/x86/include/asm/perf_event.h
+++ b/trunk/arch/x86/include/asm/perf_event.h
@@ -29,13 +29,8 @@
 #define ARCH_PERFMON_EVENTSEL_INV			(1ULL << 23)
 #define ARCH_PERFMON_EVENTSEL_CMASK			0xFF000000ULL
 
-#define AMD64_EVENTSEL_INT_CORE_ENABLE			(1ULL << 36)
-#define AMD64_EVENTSEL_GUESTONLY			(1ULL << 40)
-#define AMD64_EVENTSEL_HOSTONLY				(1ULL << 41)
-
-#define AMD64_EVENTSEL_INT_CORE_SEL_SHIFT		37
-#define AMD64_EVENTSEL_INT_CORE_SEL_MASK		\
-	(0xFULL << AMD64_EVENTSEL_INT_CORE_SEL_SHIFT)
+#define AMD_PERFMON_EVENTSEL_GUESTONLY			(1ULL << 40)
+#define AMD_PERFMON_EVENTSEL_HOSTONLY			(1ULL << 41)
 
 #define AMD64_EVENTSEL_EVENT	\
 	(ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32))
@@ -51,12 +46,8 @@
 #define AMD64_RAW_EVENT_MASK		\
 	(X86_RAW_EVENT_MASK          |  \
 	 AMD64_EVENTSEL_EVENT)
-#define AMD64_RAW_EVENT_MASK_NB		\
-	(AMD64_EVENTSEL_EVENT        |  \
-	 ARCH_PERFMON_EVENTSEL_UMASK)
 #define AMD64_NUM_COUNTERS				4
 #define AMD64_NUM_COUNTERS_CORE				6
-#define AMD64_NUM_COUNTERS_NB				4
 
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		0x3c
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
diff --git a/trunk/arch/x86/include/asm/pgtable.h b/trunk/arch/x86/include/asm/pgtable.h
index fc304279b559..5199db2923d3 100644
--- a/trunk/arch/x86/include/asm/pgtable.h
+++ b/trunk/arch/x86/include/asm/pgtable.h
@@ -142,11 +142,6 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
 	return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
 }
 
-static inline unsigned long pud_pfn(pud_t pud)
-{
-	return (pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT;
-}
-
 #define pte_page(pte)	pfn_to_page(pte_pfn(pte))
 
 static inline int pmd_large(pmd_t pte)
@@ -786,18 +781,6 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
        memcpy(dst, src, count * sizeof(pgd_t));
 }
 
-/*
- * The x86 doesn't have any external MMU info: the kernel page
- * tables contain all the necessary information.
- */
-static inline void update_mmu_cache(struct vm_area_struct *vma,
-		unsigned long addr, pte_t *ptep)
-{
-}
-static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
-		unsigned long addr, pmd_t *pmd)
-{
-}
 
 #include <asm-generic/pgtable.h>
 #endif	/* __ASSEMBLY__ */
diff --git a/trunk/arch/x86/include/asm/pgtable_32.h b/trunk/arch/x86/include/asm/pgtable_32.h
index 9ee322103c6d..8faa215a503e 100644
--- a/trunk/arch/x86/include/asm/pgtable_32.h
+++ b/trunk/arch/x86/include/asm/pgtable_32.h
@@ -66,6 +66,13 @@ do {						\
 	__flush_tlb_one((vaddr));		\
 } while (0)
 
+/*
+ * The i386 doesn't have any external MMU info: the kernel page
+ * tables contain all the necessary information.
+ */
+#define update_mmu_cache(vma, address, ptep) do { } while (0)
+#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
+
 #endif /* !__ASSEMBLY__ */
 
 /*
diff --git a/trunk/arch/x86/include/asm/pgtable_64.h b/trunk/arch/x86/include/asm/pgtable_64.h
index 615b0c78449f..47356f9df82e 100644
--- a/trunk/arch/x86/include/asm/pgtable_64.h
+++ b/trunk/arch/x86/include/asm/pgtable_64.h
@@ -142,6 +142,9 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
 #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
 #define pte_unmap(pte) ((void)(pte))/* NOP */
 
+#define update_mmu_cache(vma, address, ptep) do { } while (0)
+#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
+
 /* Encode and de-code a swap entry */
 #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
 #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
diff --git a/trunk/arch/x86/include/asm/required-features.h b/trunk/arch/x86/include/asm/required-features.h
index 5c6e4fb370f5..6c7fc25f2c34 100644
--- a/trunk/arch/x86/include/asm/required-features.h
+++ b/trunk/arch/x86/include/asm/required-features.h
@@ -47,12 +47,6 @@
 # define NEED_NOPL	0
 #endif
 
-#ifdef CONFIG_MATOM
-# define NEED_MOVBE	(1<<(X86_FEATURE_MOVBE & 31))
-#else
-# define NEED_MOVBE	0
-#endif
-
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_PARAVIRT
 /* Paravirtualized systems may not have PSE or PGE available */
@@ -86,7 +80,7 @@
 
 #define REQUIRED_MASK2	0
 #define REQUIRED_MASK3	(NEED_NOPL)
-#define REQUIRED_MASK4	(NEED_MOVBE)
+#define REQUIRED_MASK4	0
 #define REQUIRED_MASK5	0
 #define REQUIRED_MASK6	0
 #define REQUIRED_MASK7	0
diff --git a/trunk/arch/x86/include/asm/x86_init.h b/trunk/arch/x86/include/asm/x86_init.h
index 7669941cc9d2..57693498519c 100644
--- a/trunk/arch/x86/include/asm/x86_init.h
+++ b/trunk/arch/x86/include/asm/x86_init.h
@@ -181,38 +181,19 @@ struct x86_platform_ops {
 };
 
 struct pci_dev;
-struct msi_msg;
 
 struct x86_msi_ops {
 	int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type);
-	void (*compose_msi_msg)(struct pci_dev *dev, unsigned int irq,
-				unsigned int dest, struct msi_msg *msg,
-			       u8 hpet_id);
 	void (*teardown_msi_irq)(unsigned int irq);
 	void (*teardown_msi_irqs)(struct pci_dev *dev);
 	void (*restore_msi_irqs)(struct pci_dev *dev, int irq);
-	int  (*setup_hpet_msi)(unsigned int irq, unsigned int id);
 };
 
-struct IO_APIC_route_entry;
-struct io_apic_irq_attr;
-struct irq_data;
-struct cpumask;
-
 struct x86_io_apic_ops {
-	void		(*init)   (void);
-	unsigned int	(*read)   (unsigned int apic, unsigned int reg);
-	void		(*write)  (unsigned int apic, unsigned int reg, unsigned int value);
-	void		(*modify) (unsigned int apic, unsigned int reg, unsigned int value);
-	void		(*disable)(void);
-	void		(*print_entries)(unsigned int apic, unsigned int nr_entries);
-	int		(*set_affinity)(struct irq_data *data,
-					const struct cpumask *mask,
-					bool force);
-	int		(*setup_entry)(int irq, struct IO_APIC_route_entry *entry,
-				       unsigned int destination, int vector,
-				       struct io_apic_irq_attr *attr);
-	void		(*eoi_ioapic_pin)(int apic, int pin, int vector);
+	void		(*init)  (void);
+	unsigned int	(*read)  (unsigned int apic, unsigned int reg);
+	void		(*write) (unsigned int apic, unsigned int reg, unsigned int value);
+	void		(*modify)(unsigned int apic, unsigned int reg, unsigned int value);
 };
 
 extern struct x86_init_ops x86_init;
diff --git a/trunk/arch/x86/include/asm/xor.h b/trunk/arch/x86/include/asm/xor.h
index d8829751b3f8..f8fde90bc45e 100644
--- a/trunk/arch/x86/include/asm/xor.h
+++ b/trunk/arch/x86/include/asm/xor.h
@@ -1,499 +1,10 @@
 #ifdef CONFIG_KMEMCHECK
 /* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */
 # include <asm-generic/xor.h>
-#elif !defined(_ASM_X86_XOR_H)
-#define _ASM_X86_XOR_H
-
-/*
- * Optimized RAID-5 checksumming functions for SSE.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * You should have received a copy of the GNU General Public License
- * (for example /usr/src/linux/COPYING); if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/*
- * Cache avoiding checksumming functions utilizing KNI instructions
- * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
- */
-
-/*
- * Based on
- * High-speed RAID5 checksumming functions utilizing SSE instructions.
- * Copyright (C) 1998 Ingo Molnar.
- */
-
-/*
- * x86-64 changes / gcc fixes from Andi Kleen.
- * Copyright 2002 Andi Kleen, SuSE Labs.
- *
- * This hasn't been optimized for the hammer yet, but there are likely
- * no advantages to be gotten from x86-64 here anyways.
- */
-
-#include <asm/i387.h>
-
-#ifdef CONFIG_X86_32
-/* reduce register pressure */
-# define XOR_CONSTANT_CONSTRAINT "i"
 #else
-# define XOR_CONSTANT_CONSTRAINT "re"
-#endif
-
-#define OFFS(x)		"16*("#x")"
-#define PF_OFFS(x)	"256+16*("#x")"
-#define PF0(x)		"	prefetchnta "PF_OFFS(x)"(%[p1])		;\n"
-#define LD(x, y)	"	movaps "OFFS(x)"(%[p1]), %%xmm"#y"	;\n"
-#define ST(x, y)	"	movaps %%xmm"#y", "OFFS(x)"(%[p1])	;\n"
-#define PF1(x)		"	prefetchnta "PF_OFFS(x)"(%[p2])		;\n"
-#define PF2(x)		"	prefetchnta "PF_OFFS(x)"(%[p3])		;\n"
-#define PF3(x)		"	prefetchnta "PF_OFFS(x)"(%[p4])		;\n"
-#define PF4(x)		"	prefetchnta "PF_OFFS(x)"(%[p5])		;\n"
-#define XO1(x, y)	"	xorps "OFFS(x)"(%[p2]), %%xmm"#y"	;\n"
-#define XO2(x, y)	"	xorps "OFFS(x)"(%[p3]), %%xmm"#y"	;\n"
-#define XO3(x, y)	"	xorps "OFFS(x)"(%[p4]), %%xmm"#y"	;\n"
-#define XO4(x, y)	"	xorps "OFFS(x)"(%[p5]), %%xmm"#y"	;\n"
-#define NOP(x)
-
-#define BLK64(pf, op, i)				\
-		pf(i)					\
-		op(i, 0)				\
-			op(i + 1, 1)			\
-				op(i + 2, 2)		\
-					op(i + 3, 3)
-
-static void
-xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
-{
-	unsigned long lines = bytes >> 8;
-
-	kernel_fpu_begin();
-
-	asm volatile(
-#undef BLOCK
-#define BLOCK(i)					\
-		LD(i, 0)				\
-			LD(i + 1, 1)			\
-		PF1(i)					\
-				PF1(i + 2)		\
-				LD(i + 2, 2)		\
-					LD(i + 3, 3)	\
-		PF0(i + 4)				\
-				PF0(i + 6)		\
-		XO1(i, 0)				\
-			XO1(i + 1, 1)			\
-				XO1(i + 2, 2)		\
-					XO1(i + 3, 3)	\
-		ST(i, 0)				\
-			ST(i + 1, 1)			\
-				ST(i + 2, 2)		\
-					ST(i + 3, 3)	\
-
-
-		PF0(0)
-				PF0(2)
-
-	" .align 32			;\n"
-	" 1:                            ;\n"
-
-		BLOCK(0)
-		BLOCK(4)
-		BLOCK(8)
-		BLOCK(12)
-
-	"       add %[inc], %[p1]       ;\n"
-	"       add %[inc], %[p2]       ;\n"
-	"       dec %[cnt]              ;\n"
-	"       jnz 1b                  ;\n"
-	: [cnt] "+r" (lines),
-	  [p1] "+r" (p1), [p2] "+r" (p2)
-	: [inc] XOR_CONSTANT_CONSTRAINT (256UL)
-	: "memory");
-
-	kernel_fpu_end();
-}
-
-static void
-xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2)
-{
-	unsigned long lines = bytes >> 8;
-
-	kernel_fpu_begin();
-
-	asm volatile(
-#undef BLOCK
-#define BLOCK(i)			\
-		BLK64(PF0, LD, i)	\
-		BLK64(PF1, XO1, i)	\
-		BLK64(NOP, ST, i)	\
-
-	" .align 32			;\n"
-	" 1:                            ;\n"
-
-		BLOCK(0)
-		BLOCK(4)
-		BLOCK(8)
-		BLOCK(12)
-
-	"       add %[inc], %[p1]       ;\n"
-	"       add %[inc], %[p2]       ;\n"
-	"       dec %[cnt]              ;\n"
-	"       jnz 1b                  ;\n"
-	: [cnt] "+r" (lines),
-	  [p1] "+r" (p1), [p2] "+r" (p2)
-	: [inc] XOR_CONSTANT_CONSTRAINT (256UL)
-	: "memory");
-
-	kernel_fpu_end();
-}
-
-static void
-xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	  unsigned long *p3)
-{
-	unsigned long lines = bytes >> 8;
-
-	kernel_fpu_begin();
-
-	asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
-		PF1(i)					\
-				PF1(i + 2)		\
-		LD(i, 0)				\
-			LD(i + 1, 1)			\
-				LD(i + 2, 2)		\
-					LD(i + 3, 3)	\
-		PF2(i)					\
-				PF2(i + 2)		\
-		PF0(i + 4)				\
-				PF0(i + 6)		\
-		XO1(i, 0)				\
-			XO1(i + 1, 1)			\
-				XO1(i + 2, 2)		\
-					XO1(i + 3, 3)	\
-		XO2(i, 0)				\
-			XO2(i + 1, 1)			\
-				XO2(i + 2, 2)		\
-					XO2(i + 3, 3)	\
-		ST(i, 0)				\
-			ST(i + 1, 1)			\
-				ST(i + 2, 2)		\
-					ST(i + 3, 3)	\
-
-
-		PF0(0)
-				PF0(2)
-
-	" .align 32			;\n"
-	" 1:                            ;\n"
-
-		BLOCK(0)
-		BLOCK(4)
-		BLOCK(8)
-		BLOCK(12)
-
-	"       add %[inc], %[p1]       ;\n"
-	"       add %[inc], %[p2]       ;\n"
-	"       add %[inc], %[p3]       ;\n"
-	"       dec %[cnt]              ;\n"
-	"       jnz 1b                  ;\n"
-	: [cnt] "+r" (lines),
-	  [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
-	: [inc] XOR_CONSTANT_CONSTRAINT (256UL)
-	: "memory");
-
-	kernel_fpu_end();
-}
-
-static void
-xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	       unsigned long *p3)
-{
-	unsigned long lines = bytes >> 8;
-
-	kernel_fpu_begin();
-
-	asm volatile(
-#undef BLOCK
-#define BLOCK(i)			\
-		BLK64(PF0, LD, i)	\
-		BLK64(PF1, XO1, i)	\
-		BLK64(PF2, XO2, i)	\
-		BLK64(NOP, ST, i)	\
-
-	" .align 32			;\n"
-	" 1:                            ;\n"
-
-		BLOCK(0)
-		BLOCK(4)
-		BLOCK(8)
-		BLOCK(12)
-
-	"       add %[inc], %[p1]       ;\n"
-	"       add %[inc], %[p2]       ;\n"
-	"       add %[inc], %[p3]       ;\n"
-	"       dec %[cnt]              ;\n"
-	"       jnz 1b                  ;\n"
-	: [cnt] "+r" (lines),
-	  [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
-	: [inc] XOR_CONSTANT_CONSTRAINT (256UL)
-	: "memory");
-
-	kernel_fpu_end();
-}
-
-static void
-xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	  unsigned long *p3, unsigned long *p4)
-{
-	unsigned long lines = bytes >> 8;
-
-	kernel_fpu_begin();
-
-	asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
-		PF1(i)					\
-				PF1(i + 2)		\
-		LD(i, 0)				\
-			LD(i + 1, 1)			\
-				LD(i + 2, 2)		\
-					LD(i + 3, 3)	\
-		PF2(i)					\
-				PF2(i + 2)		\
-		XO1(i, 0)				\
-			XO1(i + 1, 1)			\
-				XO1(i + 2, 2)		\
-					XO1(i + 3, 3)	\
-		PF3(i)					\
-				PF3(i + 2)		\
-		PF0(i + 4)				\
-				PF0(i + 6)		\
-		XO2(i, 0)				\
-			XO2(i + 1, 1)			\
-				XO2(i + 2, 2)		\
-					XO2(i + 3, 3)	\
-		XO3(i, 0)				\
-			XO3(i + 1, 1)			\
-				XO3(i + 2, 2)		\
-					XO3(i + 3, 3)	\
-		ST(i, 0)				\
-			ST(i + 1, 1)			\
-				ST(i + 2, 2)		\
-					ST(i + 3, 3)	\
-
-
-		PF0(0)
-				PF0(2)
-
-	" .align 32			;\n"
-	" 1:                            ;\n"
-
-		BLOCK(0)
-		BLOCK(4)
-		BLOCK(8)
-		BLOCK(12)
-
-	"       add %[inc], %[p1]       ;\n"
-	"       add %[inc], %[p2]       ;\n"
-	"       add %[inc], %[p3]       ;\n"
-	"       add %[inc], %[p4]       ;\n"
-	"       dec %[cnt]              ;\n"
-	"       jnz 1b                  ;\n"
-	: [cnt] "+r" (lines), [p1] "+r" (p1),
-	  [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
-	: [inc] XOR_CONSTANT_CONSTRAINT (256UL)
-	: "memory");
-
-	kernel_fpu_end();
-}
-
-static void
-xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	       unsigned long *p3, unsigned long *p4)
-{
-	unsigned long lines = bytes >> 8;
-
-	kernel_fpu_begin();
-
-	asm volatile(
-#undef BLOCK
-#define BLOCK(i)			\
-		BLK64(PF0, LD, i)	\
-		BLK64(PF1, XO1, i)	\
-		BLK64(PF2, XO2, i)	\
-		BLK64(PF3, XO3, i)	\
-		BLK64(NOP, ST, i)	\
-
-	" .align 32			;\n"
-	" 1:                            ;\n"
-
-		BLOCK(0)
-		BLOCK(4)
-		BLOCK(8)
-		BLOCK(12)
-
-	"       add %[inc], %[p1]       ;\n"
-	"       add %[inc], %[p2]       ;\n"
-	"       add %[inc], %[p3]       ;\n"
-	"       add %[inc], %[p4]       ;\n"
-	"       dec %[cnt]              ;\n"
-	"       jnz 1b                  ;\n"
-	: [cnt] "+r" (lines), [p1] "+r" (p1),
-	  [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
-	: [inc] XOR_CONSTANT_CONSTRAINT (256UL)
-	: "memory");
-
-	kernel_fpu_end();
-}
-
-static void
-xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	  unsigned long *p3, unsigned long *p4, unsigned long *p5)
-{
-	unsigned long lines = bytes >> 8;
-
-	kernel_fpu_begin();
-
-	asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
-		PF1(i)					\
-				PF1(i + 2)		\
-		LD(i, 0)				\
-			LD(i + 1, 1)			\
-				LD(i + 2, 2)		\
-					LD(i + 3, 3)	\
-		PF2(i)					\
-				PF2(i + 2)		\
-		XO1(i, 0)				\
-			XO1(i + 1, 1)			\
-				XO1(i + 2, 2)		\
-					XO1(i + 3, 3)	\
-		PF3(i)					\
-				PF3(i + 2)		\
-		XO2(i, 0)				\
-			XO2(i + 1, 1)			\
-				XO2(i + 2, 2)		\
-					XO2(i + 3, 3)	\
-		PF4(i)					\
-				PF4(i + 2)		\
-		PF0(i + 4)				\
-				PF0(i + 6)		\
-		XO3(i, 0)				\
-			XO3(i + 1, 1)			\
-				XO3(i + 2, 2)		\
-					XO3(i + 3, 3)	\
-		XO4(i, 0)				\
-			XO4(i + 1, 1)			\
-				XO4(i + 2, 2)		\
-					XO4(i + 3, 3)	\
-		ST(i, 0)				\
-			ST(i + 1, 1)			\
-				ST(i + 2, 2)		\
-					ST(i + 3, 3)	\
-
-
-		PF0(0)
-				PF0(2)
-
-	" .align 32			;\n"
-	" 1:                            ;\n"
-
-		BLOCK(0)
-		BLOCK(4)
-		BLOCK(8)
-		BLOCK(12)
-
-	"       add %[inc], %[p1]       ;\n"
-	"       add %[inc], %[p2]       ;\n"
-	"       add %[inc], %[p3]       ;\n"
-	"       add %[inc], %[p4]       ;\n"
-	"       add %[inc], %[p5]       ;\n"
-	"       dec %[cnt]              ;\n"
-	"       jnz 1b                  ;\n"
-	: [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2),
-	  [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5)
-	: [inc] XOR_CONSTANT_CONSTRAINT (256UL)
-	: "memory");
-
-	kernel_fpu_end();
-}
-
-static void
-xor_sse_5_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-	       unsigned long *p3, unsigned long *p4, unsigned long *p5)
-{
-	unsigned long lines = bytes >> 8;
-
-	kernel_fpu_begin();
-
-	asm volatile(
-#undef BLOCK
-#define BLOCK(i)			\
-		BLK64(PF0, LD, i)	\
-		BLK64(PF1, XO1, i)	\
-		BLK64(PF2, XO2, i)	\
-		BLK64(PF3, XO3, i)	\
-		BLK64(PF4, XO4, i)	\
-		BLK64(NOP, ST, i)	\
-
-	" .align 32			;\n"
-	" 1:                            ;\n"
-
-		BLOCK(0)
-		BLOCK(4)
-		BLOCK(8)
-		BLOCK(12)
-
-	"       add %[inc], %[p1]       ;\n"
-	"       add %[inc], %[p2]       ;\n"
-	"       add %[inc], %[p3]       ;\n"
-	"       add %[inc], %[p4]       ;\n"
-	"       add %[inc], %[p5]       ;\n"
-	"       dec %[cnt]              ;\n"
-	"       jnz 1b                  ;\n"
-	: [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2),
-	  [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5)
-	: [inc] XOR_CONSTANT_CONSTRAINT (256UL)
-	: "memory");
-
-	kernel_fpu_end();
-}
-
-static struct xor_block_template xor_block_sse_pf64 = {
-	.name = "prefetch64-sse",
-	.do_2 = xor_sse_2_pf64,
-	.do_3 = xor_sse_3_pf64,
-	.do_4 = xor_sse_4_pf64,
-	.do_5 = xor_sse_5_pf64,
-};
-
-#undef LD
-#undef XO1
-#undef XO2
-#undef XO3
-#undef XO4
-#undef ST
-#undef NOP
-#undef BLK64
-#undef BLOCK
-
-#undef XOR_CONSTANT_CONSTRAINT
-
 #ifdef CONFIG_X86_32
 # include <asm/xor_32.h>
 #else
 # include <asm/xor_64.h>
 #endif
-
-#define XOR_SELECT_TEMPLATE(FASTEST) \
-	AVX_SELECT(FASTEST)
-
-#endif /* _ASM_X86_XOR_H */
+#endif
diff --git a/trunk/arch/x86/include/asm/xor_32.h b/trunk/arch/x86/include/asm/xor_32.h
index ce05722e3c68..f79cb7ec0e06 100644
--- a/trunk/arch/x86/include/asm/xor_32.h
+++ b/trunk/arch/x86/include/asm/xor_32.h
@@ -2,7 +2,7 @@
 #define _ASM_X86_XOR_32_H
 
 /*
- * Optimized RAID-5 checksumming functions for MMX.
+ * Optimized RAID-5 checksumming functions for MMX and SSE.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -529,6 +529,290 @@ static struct xor_block_template xor_block_p5_mmx = {
 	.do_5 = xor_p5_mmx_5,
 };
 
+/*
+ * Cache avoiding checksumming functions utilizing KNI instructions
+ * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
+ */
+
+#define OFFS(x)		"16*("#x")"
+#define PF_OFFS(x)	"256+16*("#x")"
+#define	PF0(x)		"	prefetchnta "PF_OFFS(x)"(%1)		;\n"
+#define LD(x, y)	"       movaps   "OFFS(x)"(%1), %%xmm"#y"	;\n"
+#define ST(x, y)	"       movaps %%xmm"#y",   "OFFS(x)"(%1)	;\n"
+#define PF1(x)		"	prefetchnta "PF_OFFS(x)"(%2)		;\n"
+#define PF2(x)		"	prefetchnta "PF_OFFS(x)"(%3)		;\n"
+#define PF3(x)		"	prefetchnta "PF_OFFS(x)"(%4)		;\n"
+#define PF4(x)		"	prefetchnta "PF_OFFS(x)"(%5)		;\n"
+#define PF5(x)		"	prefetchnta "PF_OFFS(x)"(%6)		;\n"
+#define XO1(x, y)	"       xorps   "OFFS(x)"(%2), %%xmm"#y"	;\n"
+#define XO2(x, y)	"       xorps   "OFFS(x)"(%3), %%xmm"#y"	;\n"
+#define XO3(x, y)	"       xorps   "OFFS(x)"(%4), %%xmm"#y"	;\n"
+#define XO4(x, y)	"       xorps   "OFFS(x)"(%5), %%xmm"#y"	;\n"
+#define XO5(x, y)	"       xorps   "OFFS(x)"(%6), %%xmm"#y"	;\n"
+
+
+static void
+xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+	unsigned long lines = bytes >> 8;
+
+	kernel_fpu_begin();
+
+	asm volatile(
+#undef BLOCK
+#define BLOCK(i)					\
+		LD(i, 0)				\
+			LD(i + 1, 1)			\
+		PF1(i)					\
+				PF1(i + 2)		\
+				LD(i + 2, 2)		\
+					LD(i + 3, 3)	\
+		PF0(i + 4)				\
+				PF0(i + 6)		\
+		XO1(i, 0)				\
+			XO1(i + 1, 1)			\
+				XO1(i + 2, 2)		\
+					XO1(i + 3, 3)	\
+		ST(i, 0)				\
+			ST(i + 1, 1)			\
+				ST(i + 2, 2)		\
+					ST(i + 3, 3)	\
+
+
+		PF0(0)
+				PF0(2)
+
+	" .align 32			;\n"
+	" 1:                            ;\n"
+
+		BLOCK(0)
+		BLOCK(4)
+		BLOCK(8)
+		BLOCK(12)
+
+	"       addl $256, %1           ;\n"
+	"       addl $256, %2           ;\n"
+	"       decl %0                 ;\n"
+	"       jnz 1b                  ;\n"
+	: "+r" (lines),
+	  "+r" (p1), "+r" (p2)
+	:
+	: "memory");
+
+	kernel_fpu_end();
+}
+
+static void
+xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+	  unsigned long *p3)
+{
+	unsigned long lines = bytes >> 8;
+
+	kernel_fpu_begin();
+
+	asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+		PF1(i)					\
+				PF1(i + 2)		\
+		LD(i,0)					\
+			LD(i + 1, 1)			\
+				LD(i + 2, 2)		\
+					LD(i + 3, 3)	\
+		PF2(i)					\
+				PF2(i + 2)		\
+		PF0(i + 4)				\
+				PF0(i + 6)		\
+		XO1(i,0)				\
+			XO1(i + 1, 1)			\
+				XO1(i + 2, 2)		\
+					XO1(i + 3, 3)	\
+		XO2(i,0)				\
+			XO2(i + 1, 1)			\
+				XO2(i + 2, 2)		\
+					XO2(i + 3, 3)	\
+		ST(i,0)					\
+			ST(i + 1, 1)			\
+				ST(i + 2, 2)		\
+					ST(i + 3, 3)	\
+
+
+		PF0(0)
+				PF0(2)
+
+	" .align 32			;\n"
+	" 1:                            ;\n"
+
+		BLOCK(0)
+		BLOCK(4)
+		BLOCK(8)
+		BLOCK(12)
+
+	"       addl $256, %1           ;\n"
+	"       addl $256, %2           ;\n"
+	"       addl $256, %3           ;\n"
+	"       decl %0                 ;\n"
+	"       jnz 1b                  ;\n"
+	: "+r" (lines),
+	  "+r" (p1), "+r"(p2), "+r"(p3)
+	:
+	: "memory" );
+
+	kernel_fpu_end();
+}
+
+static void
+xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+	  unsigned long *p3, unsigned long *p4)
+{
+	unsigned long lines = bytes >> 8;
+
+	kernel_fpu_begin();
+
+	asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+		PF1(i)					\
+				PF1(i + 2)		\
+		LD(i,0)					\
+			LD(i + 1, 1)			\
+				LD(i + 2, 2)		\
+					LD(i + 3, 3)	\
+		PF2(i)					\
+				PF2(i + 2)		\
+		XO1(i,0)				\
+			XO1(i + 1, 1)			\
+				XO1(i + 2, 2)		\
+					XO1(i + 3, 3)	\
+		PF3(i)					\
+				PF3(i + 2)		\
+		PF0(i + 4)				\
+				PF0(i + 6)		\
+		XO2(i,0)				\
+			XO2(i + 1, 1)			\
+				XO2(i + 2, 2)		\
+					XO2(i + 3, 3)	\
+		XO3(i,0)				\
+			XO3(i + 1, 1)			\
+				XO3(i + 2, 2)		\
+					XO3(i + 3, 3)	\
+		ST(i,0)					\
+			ST(i + 1, 1)			\
+				ST(i + 2, 2)		\
+					ST(i + 3, 3)	\
+
+
+		PF0(0)
+				PF0(2)
+
+	" .align 32			;\n"
+	" 1:                            ;\n"
+
+		BLOCK(0)
+		BLOCK(4)
+		BLOCK(8)
+		BLOCK(12)
+
+	"       addl $256, %1           ;\n"
+	"       addl $256, %2           ;\n"
+	"       addl $256, %3           ;\n"
+	"       addl $256, %4           ;\n"
+	"       decl %0                 ;\n"
+	"       jnz 1b                  ;\n"
+	: "+r" (lines),
+	  "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
+	:
+	: "memory" );
+
+	kernel_fpu_end();
+}
+
+static void
+xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+	  unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+	unsigned long lines = bytes >> 8;
+
+	kernel_fpu_begin();
+
+	/* Make sure GCC forgets anything it knows about p4 or p5,
+	   such that it won't pass to the asm volatile below a
+	   register that is shared with any other variable.  That's
+	   because we modify p4 and p5 there, but we can't mark them
+	   as read/write, otherwise we'd overflow the 10-asm-operands
+	   limit of GCC < 3.1.  */
+	asm("" : "+r" (p4), "+r" (p5));
+
+	asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+		PF1(i)					\
+				PF1(i + 2)		\
+		LD(i,0)					\
+			LD(i + 1, 1)			\
+				LD(i + 2, 2)		\
+					LD(i + 3, 3)	\
+		PF2(i)					\
+				PF2(i + 2)		\
+		XO1(i,0)				\
+			XO1(i + 1, 1)			\
+				XO1(i + 2, 2)		\
+					XO1(i + 3, 3)	\
+		PF3(i)					\
+				PF3(i + 2)		\
+		XO2(i,0)				\
+			XO2(i + 1, 1)			\
+				XO2(i + 2, 2)		\
+					XO2(i + 3, 3)	\
+		PF4(i)					\
+				PF4(i + 2)		\
+		PF0(i + 4)				\
+				PF0(i + 6)		\
+		XO3(i,0)				\
+			XO3(i + 1, 1)			\
+				XO3(i + 2, 2)		\
+					XO3(i + 3, 3)	\
+		XO4(i,0)				\
+			XO4(i + 1, 1)			\
+				XO4(i + 2, 2)		\
+					XO4(i + 3, 3)	\
+		ST(i,0)					\
+			ST(i + 1, 1)			\
+				ST(i + 2, 2)		\
+					ST(i + 3, 3)	\
+
+
+		PF0(0)
+				PF0(2)
+
+	" .align 32			;\n"
+	" 1:                            ;\n"
+
+		BLOCK(0)
+		BLOCK(4)
+		BLOCK(8)
+		BLOCK(12)
+
+	"       addl $256, %1           ;\n"
+	"       addl $256, %2           ;\n"
+	"       addl $256, %3           ;\n"
+	"       addl $256, %4           ;\n"
+	"       addl $256, %5           ;\n"
+	"       decl %0                 ;\n"
+	"       jnz 1b                  ;\n"
+	: "+r" (lines),
+	  "+r" (p1), "+r" (p2), "+r" (p3)
+	: "r" (p4), "r" (p5)
+	: "memory");
+
+	/* p4 and p5 were modified, and now the variables are dead.
+	   Clobber them just to be sure nobody does something stupid
+	   like assuming they have some legal value.  */
+	asm("" : "=r" (p4), "=r" (p5));
+
+	kernel_fpu_end();
+}
+
 static struct xor_block_template xor_block_pIII_sse = {
 	.name = "pIII_sse",
 	.do_2 = xor_sse_2,
@@ -543,25 +827,26 @@ static struct xor_block_template xor_block_pIII_sse = {
 /* Also try the generic routines.  */
 #include <asm-generic/xor.h>
 
-/* We force the use of the SSE xor block because it can write around L2.
-   We may also be able to load into the L1 only depending on how the cpu
-   deals with a load to a line that is being prefetched.  */
 #undef XOR_TRY_TEMPLATES
 #define XOR_TRY_TEMPLATES				\
 do {							\
+	xor_speed(&xor_block_8regs);			\
+	xor_speed(&xor_block_8regs_p);			\
+	xor_speed(&xor_block_32regs);			\
+	xor_speed(&xor_block_32regs_p);			\
 	AVX_XOR_SPEED;					\
-	if (cpu_has_xmm) {				\
+	if (cpu_has_xmm)				\
 		xor_speed(&xor_block_pIII_sse);		\
-		xor_speed(&xor_block_sse_pf64);		\
-	} else if (cpu_has_mmx) {			\
+	if (cpu_has_mmx) {				\
 		xor_speed(&xor_block_pII_mmx);		\
 		xor_speed(&xor_block_p5_mmx);		\
-	} else {					\
-		xor_speed(&xor_block_8regs);		\
-		xor_speed(&xor_block_8regs_p);		\
-		xor_speed(&xor_block_32regs);		\
-		xor_speed(&xor_block_32regs_p);		\
 	}						\
 } while (0)
 
+/* We force the use of the SSE xor block because it can write around L2.
+   We may also be able to load into the L1 only depending on how the cpu
+   deals with a load to a line that is being prefetched.  */
+#define XOR_SELECT_TEMPLATE(FASTEST)			\
+	AVX_SELECT(cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
+
 #endif /* _ASM_X86_XOR_32_H */
diff --git a/trunk/arch/x86/include/asm/xor_64.h b/trunk/arch/x86/include/asm/xor_64.h
index 546f1e3b87cc..87ac522c4af5 100644
--- a/trunk/arch/x86/include/asm/xor_64.h
+++ b/trunk/arch/x86/include/asm/xor_64.h
@@ -1,6 +1,301 @@
 #ifndef _ASM_X86_XOR_64_H
 #define _ASM_X86_XOR_64_H
 
+/*
+ * Optimized RAID-5 checksumming functions for MMX and SSE.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * (for example /usr/src/linux/COPYING); if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+/*
+ * Cache avoiding checksumming functions utilizing KNI instructions
+ * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
+ */
+
+/*
+ * Based on
+ * High-speed RAID5 checksumming functions utilizing SSE instructions.
+ * Copyright (C) 1998 Ingo Molnar.
+ */
+
+/*
+ * x86-64 changes / gcc fixes from Andi Kleen.
+ * Copyright 2002 Andi Kleen, SuSE Labs.
+ *
+ * This hasn't been optimized for the hammer yet, but there are likely
+ * no advantages to be gotten from x86-64 here anyways.
+ */
+
+#include <asm/i387.h>
+
+#define OFFS(x)		"16*("#x")"
+#define PF_OFFS(x)	"256+16*("#x")"
+#define	PF0(x)		"	prefetchnta "PF_OFFS(x)"(%[p1])		;\n"
+#define LD(x, y)	"       movaps   "OFFS(x)"(%[p1]), %%xmm"#y"	;\n"
+#define ST(x, y)	"       movaps %%xmm"#y",   "OFFS(x)"(%[p1])	;\n"
+#define PF1(x)		"	prefetchnta "PF_OFFS(x)"(%[p2])		;\n"
+#define PF2(x)		"	prefetchnta "PF_OFFS(x)"(%[p3])		;\n"
+#define PF3(x)		"	prefetchnta "PF_OFFS(x)"(%[p4])		;\n"
+#define PF4(x)		"	prefetchnta "PF_OFFS(x)"(%[p5])		;\n"
+#define PF5(x)		"	prefetchnta "PF_OFFS(x)"(%[p6])		;\n"
+#define XO1(x, y)	"       xorps   "OFFS(x)"(%[p2]), %%xmm"#y"	;\n"
+#define XO2(x, y)	"       xorps   "OFFS(x)"(%[p3]), %%xmm"#y"	;\n"
+#define XO3(x, y)	"       xorps   "OFFS(x)"(%[p4]), %%xmm"#y"	;\n"
+#define XO4(x, y)	"       xorps   "OFFS(x)"(%[p5]), %%xmm"#y"	;\n"
+#define XO5(x, y)	"       xorps   "OFFS(x)"(%[p6]), %%xmm"#y"	;\n"
+
+
+static void
+xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+	unsigned int lines = bytes >> 8;
+
+	kernel_fpu_begin();
+
+	asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+		LD(i, 0)				\
+			LD(i + 1, 1)			\
+		PF1(i)					\
+				PF1(i + 2)		\
+				LD(i + 2, 2)		\
+					LD(i + 3, 3)	\
+		PF0(i + 4)				\
+				PF0(i + 6)		\
+		XO1(i, 0)				\
+			XO1(i + 1, 1)			\
+				XO1(i + 2, 2)		\
+					XO1(i + 3, 3)	\
+		ST(i, 0)				\
+			ST(i + 1, 1)			\
+				ST(i + 2, 2)		\
+					ST(i + 3, 3)	\
+
+
+		PF0(0)
+				PF0(2)
+
+	" .align 32			;\n"
+	" 1:                            ;\n"
+
+		BLOCK(0)
+		BLOCK(4)
+		BLOCK(8)
+		BLOCK(12)
+
+	"       addq %[inc], %[p1]           ;\n"
+	"       addq %[inc], %[p2]           ;\n"
+		"		decl %[cnt] ; jnz 1b"
+	: [p1] "+r" (p1), [p2] "+r" (p2), [cnt] "+r" (lines)
+	: [inc] "r" (256UL)
+	: "memory");
+
+	kernel_fpu_end();
+}
+
+static void
+xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+	  unsigned long *p3)
+{
+	unsigned int lines = bytes >> 8;
+
+	kernel_fpu_begin();
+	asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+		PF1(i)					\
+				PF1(i + 2)		\
+		LD(i, 0)					\
+			LD(i + 1, 1)			\
+				LD(i + 2, 2)		\
+					LD(i + 3, 3)	\
+		PF2(i)					\
+				PF2(i + 2)		\
+		PF0(i + 4)				\
+				PF0(i + 6)		\
+		XO1(i, 0)				\
+			XO1(i + 1, 1)			\
+				XO1(i + 2, 2)		\
+					XO1(i + 3, 3)	\
+		XO2(i, 0)				\
+			XO2(i + 1, 1)			\
+				XO2(i + 2, 2)		\
+					XO2(i + 3, 3)	\
+		ST(i, 0)				\
+			ST(i + 1, 1)			\
+				ST(i + 2, 2)		\
+					ST(i + 3, 3)	\
+
+
+		PF0(0)
+				PF0(2)
+
+	" .align 32			;\n"
+	" 1:                            ;\n"
+
+		BLOCK(0)
+		BLOCK(4)
+		BLOCK(8)
+		BLOCK(12)
+
+	"       addq %[inc], %[p1]           ;\n"
+	"       addq %[inc], %[p2]          ;\n"
+	"       addq %[inc], %[p3]           ;\n"
+		"		decl %[cnt] ; jnz 1b"
+	: [cnt] "+r" (lines),
+	  [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
+	: [inc] "r" (256UL)
+	: "memory");
+	kernel_fpu_end();
+}
+
+static void
+xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+	  unsigned long *p3, unsigned long *p4)
+{
+	unsigned int lines = bytes >> 8;
+
+	kernel_fpu_begin();
+
+	asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+		PF1(i)					\
+				PF1(i + 2)		\
+		LD(i, 0)				\
+			LD(i + 1, 1)			\
+				LD(i + 2, 2)		\
+					LD(i + 3, 3)	\
+		PF2(i)					\
+				PF2(i + 2)		\
+		XO1(i, 0)				\
+			XO1(i + 1, 1)			\
+				XO1(i + 2, 2)		\
+					XO1(i + 3, 3)	\
+		PF3(i)					\
+				PF3(i + 2)		\
+		PF0(i + 4)				\
+				PF0(i + 6)		\
+		XO2(i, 0)				\
+			XO2(i + 1, 1)			\
+				XO2(i + 2, 2)		\
+					XO2(i + 3, 3)	\
+		XO3(i, 0)				\
+			XO3(i + 1, 1)			\
+				XO3(i + 2, 2)		\
+					XO3(i + 3, 3)	\
+		ST(i, 0)				\
+			ST(i + 1, 1)			\
+				ST(i + 2, 2)		\
+					ST(i + 3, 3)	\
+
+
+		PF0(0)
+				PF0(2)
+
+	" .align 32			;\n"
+	" 1:                            ;\n"
+
+		BLOCK(0)
+		BLOCK(4)
+		BLOCK(8)
+		BLOCK(12)
+
+	"       addq %[inc], %[p1]           ;\n"
+	"       addq %[inc], %[p2]           ;\n"
+	"       addq %[inc], %[p3]           ;\n"
+	"       addq %[inc], %[p4]           ;\n"
+	"	decl %[cnt] ; jnz 1b"
+	: [cnt] "+c" (lines),
+	  [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
+	: [inc] "r" (256UL)
+	: "memory" );
+
+	kernel_fpu_end();
+}
+
+static void
+xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+	  unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+	unsigned int lines = bytes >> 8;
+
+	kernel_fpu_begin();
+
+	asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+		PF1(i)					\
+				PF1(i + 2)		\
+		LD(i, 0)				\
+			LD(i + 1, 1)			\
+				LD(i + 2, 2)		\
+					LD(i + 3, 3)	\
+		PF2(i)					\
+				PF2(i + 2)		\
+		XO1(i, 0)				\
+			XO1(i + 1, 1)			\
+				XO1(i + 2, 2)		\
+					XO1(i + 3, 3)	\
+		PF3(i)					\
+				PF3(i + 2)		\
+		XO2(i, 0)				\
+			XO2(i + 1, 1)			\
+				XO2(i + 2, 2)		\
+					XO2(i + 3, 3)	\
+		PF4(i)					\
+				PF4(i + 2)		\
+		PF0(i + 4)				\
+				PF0(i + 6)		\
+		XO3(i, 0)				\
+			XO3(i + 1, 1)			\
+				XO3(i + 2, 2)		\
+					XO3(i + 3, 3)	\
+		XO4(i, 0)				\
+			XO4(i + 1, 1)			\
+				XO4(i + 2, 2)		\
+					XO4(i + 3, 3)	\
+		ST(i, 0)				\
+			ST(i + 1, 1)			\
+				ST(i + 2, 2)		\
+					ST(i + 3, 3)	\
+
+
+		PF0(0)
+				PF0(2)
+
+	" .align 32			;\n"
+	" 1:                            ;\n"
+
+		BLOCK(0)
+		BLOCK(4)
+		BLOCK(8)
+		BLOCK(12)
+
+	"       addq %[inc], %[p1]           ;\n"
+	"       addq %[inc], %[p2]           ;\n"
+	"       addq %[inc], %[p3]           ;\n"
+	"       addq %[inc], %[p4]           ;\n"
+	"       addq %[inc], %[p5]           ;\n"
+	"	decl %[cnt] ; jnz 1b"
+	: [cnt] "+c" (lines),
+	  [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4),
+	  [p5] "+r" (p5)
+	: [inc] "r" (256UL)
+	: "memory");
+
+	kernel_fpu_end();
+}
+
 static struct xor_block_template xor_block_sse = {
 	.name = "generic_sse",
 	.do_2 = xor_sse_2,
@@ -13,15 +308,17 @@ static struct xor_block_template xor_block_sse = {
 /* Also try the AVX routines */
 #include <asm/xor_avx.h>
 
-/* We force the use of the SSE xor block because it can write around L2.
-   We may also be able to load into the L1 only depending on how the cpu
-   deals with a load to a line that is being prefetched.  */
 #undef XOR_TRY_TEMPLATES
 #define XOR_TRY_TEMPLATES			\
 do {						\
 	AVX_XOR_SPEED;				\
-	xor_speed(&xor_block_sse_pf64);		\
 	xor_speed(&xor_block_sse);		\
 } while (0)
 
+/* We force the use of the SSE xor block because it can write around L2.
+   We may also be able to load into the L1 only depending on how the cpu
+   deals with a load to a line that is being prefetched.  */
+#define XOR_SELECT_TEMPLATE(FASTEST) \
+	AVX_SELECT(&xor_block_sse)
+
 #endif /* _ASM_X86_XOR_64_H */
diff --git a/trunk/arch/x86/include/uapi/asm/mce.h b/trunk/arch/x86/include/uapi/asm/mce.h
index a0eab85ce7b8..58c829871c31 100644
--- a/trunk/arch/x86/include/uapi/asm/mce.h
+++ b/trunk/arch/x86/include/uapi/asm/mce.h
@@ -4,6 +4,66 @@
 #include <linux/types.h>
 #include <asm/ioctls.h>
 
+/*
+ * Machine Check support for x86
+ */
+
+/* MCG_CAP register defines */
+#define MCG_BANKCNT_MASK	0xff         /* Number of Banks */
+#define MCG_CTL_P		(1ULL<<8)    /* MCG_CTL register available */
+#define MCG_EXT_P		(1ULL<<9)    /* Extended registers available */
+#define MCG_CMCI_P		(1ULL<<10)   /* CMCI supported */
+#define MCG_EXT_CNT_MASK	0xff0000     /* Number of Extended registers */
+#define MCG_EXT_CNT_SHIFT	16
+#define MCG_EXT_CNT(c)		(((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
+#define MCG_SER_P	 	(1ULL<<24)   /* MCA recovery/new status bits */
+
+/* MCG_STATUS register defines */
+#define MCG_STATUS_RIPV  (1ULL<<0)   /* restart ip valid */
+#define MCG_STATUS_EIPV  (1ULL<<1)   /* ip points to correct instruction */
+#define MCG_STATUS_MCIP  (1ULL<<2)   /* machine check in progress */
+
+/* MCi_STATUS register defines */
+#define MCI_STATUS_VAL   (1ULL<<63)  /* valid error */
+#define MCI_STATUS_OVER  (1ULL<<62)  /* previous errors lost */
+#define MCI_STATUS_UC    (1ULL<<61)  /* uncorrected error */
+#define MCI_STATUS_EN    (1ULL<<60)  /* error enabled */
+#define MCI_STATUS_MISCV (1ULL<<59)  /* misc error reg. valid */
+#define MCI_STATUS_ADDRV (1ULL<<58)  /* addr reg. valid */
+#define MCI_STATUS_PCC   (1ULL<<57)  /* processor context corrupt */
+#define MCI_STATUS_S	 (1ULL<<56)  /* Signaled machine check */
+#define MCI_STATUS_AR	 (1ULL<<55)  /* Action required */
+#define MCACOD		  0xffff     /* MCA Error Code */
+
+/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
+#define MCACOD_SCRUB	0x00C0	/* 0xC0-0xCF Memory Scrubbing */
+#define MCACOD_SCRUBMSK	0xfff0
+#define MCACOD_L3WB	0x017A	/* L3 Explicit Writeback */
+#define MCACOD_DATA	0x0134	/* Data Load */
+#define MCACOD_INSTR	0x0150	/* Instruction Fetch */
+
+/* MCi_MISC register defines */
+#define MCI_MISC_ADDR_LSB(m)	((m) & 0x3f)
+#define MCI_MISC_ADDR_MODE(m)	(((m) >> 6) & 7)
+#define  MCI_MISC_ADDR_SEGOFF	0	/* segment offset */
+#define  MCI_MISC_ADDR_LINEAR	1	/* linear address */
+#define  MCI_MISC_ADDR_PHYS	2	/* physical address */
+#define  MCI_MISC_ADDR_MEM	3	/* memory address */
+#define  MCI_MISC_ADDR_GENERIC	7	/* generic */
+
+/* CTL2 register defines */
+#define MCI_CTL2_CMCI_EN		(1ULL << 30)
+#define MCI_CTL2_CMCI_THRESHOLD_MASK	0x7fffULL
+
+#define MCJ_CTX_MASK		3
+#define MCJ_CTX(flags)		((flags) & MCJ_CTX_MASK)
+#define MCJ_CTX_RANDOM		0    /* inject context: random */
+#define MCJ_CTX_PROCESS		0x1  /* inject context: process */
+#define MCJ_CTX_IRQ		0x2  /* inject context: IRQ */
+#define MCJ_NMI_BROADCAST	0x4  /* do NMI broadcasting */
+#define MCJ_EXCEPTION		0x8  /* raise as exception */
+#define MCJ_IRQ_BRAODCAST	0x10 /* do IRQ broadcasting */
+
 /* Fields are zero when not available */
 struct mce {
 	__u64 status;
@@ -27,8 +87,35 @@ struct mce {
 	__u64 mcgcap;	/* MCGCAP MSR: machine check capabilities of CPU */
 };
 
+/*
+ * This structure contains all data related to the MCE log.  Also
+ * carries a signature to make it easier to find from external
+ * debugging tools.  Each entry is only valid when its finished flag
+ * is set.
+ */
+
+#define MCE_LOG_LEN 32
+
+struct mce_log {
+	char signature[12]; /* "MACHINECHECK" */
+	unsigned len;	    /* = MCE_LOG_LEN */
+	unsigned next;
+	unsigned flags;
+	unsigned recordlen;	/* length of struct mce */
+	struct mce entry[MCE_LOG_LEN];
+};
+
+#define MCE_OVERFLOW 0		/* bit 0 in flags means overflow */
+
+#define MCE_LOG_SIGNATURE	"MACHINECHECK"
+
 #define MCE_GET_RECORD_LEN   _IOR('M', 1, int)
 #define MCE_GET_LOG_LEN      _IOR('M', 2, int)
 #define MCE_GETCLEAR_FLAGS   _IOR('M', 3, int)
 
+/* Software defined banks */
+#define MCE_EXTENDED_BANK	128
+#define MCE_THERMAL_BANK	MCE_EXTENDED_BANK + 0
+#define K8_MCE_THRESHOLD_BASE      (MCE_EXTENDED_BANK + 1)
+
 #endif /* _UAPI_ASM_X86_MCE_H */
diff --git a/trunk/arch/x86/include/uapi/asm/msr-index.h b/trunk/arch/x86/include/uapi/asm/msr-index.h
index 075a40255591..433a59fb1a74 100644
--- a/trunk/arch/x86/include/uapi/asm/msr-index.h
+++ b/trunk/arch/x86/include/uapi/asm/msr-index.h
@@ -194,8 +194,6 @@
 /* Fam 15h MSRs */
 #define MSR_F15H_PERF_CTL		0xc0010200
 #define MSR_F15H_PERF_CTR		0xc0010201
-#define MSR_F15H_NB_PERF_CTL		0xc0010240
-#define MSR_F15H_NB_PERF_CTR		0xc0010241
 
 /* Fam 10h MSRs */
 #define MSR_FAM10H_MMIO_CONF_BASE	0xc0010058
diff --git a/trunk/arch/x86/kernel/Makefile b/trunk/arch/x86/kernel/Makefile
index ac3b3d002833..34e923a53762 100644
--- a/trunk/arch/x86/kernel/Makefile
+++ b/trunk/arch/x86/kernel/Makefile
@@ -65,7 +65,8 @@ obj-$(CONFIG_X86_TSC)		+= trace_clock.o
 obj-$(CONFIG_KEXEC)		+= machine_kexec_$(BITS).o
 obj-$(CONFIG_KEXEC)		+= relocate_kernel_$(BITS).o crash.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump_$(BITS).o
-obj-y				+= kprobes/
+obj-$(CONFIG_KPROBES)		+= kprobes.o
+obj-$(CONFIG_OPTPROBES)		+= kprobes-opt.o
 obj-$(CONFIG_MODULES)		+= module.o
 obj-$(CONFIG_DOUBLEFAULT) 	+= doublefault_32.o
 obj-$(CONFIG_KGDB)		+= kgdb.o
diff --git a/trunk/arch/x86/kernel/apb_timer.c b/trunk/arch/x86/kernel/apb_timer.c
index cc74fd0c90f2..afdc3f756dea 100644
--- a/trunk/arch/x86/kernel/apb_timer.c
+++ b/trunk/arch/x86/kernel/apb_timer.c
@@ -311,6 +311,7 @@ void __init apbt_time_init(void)
 #ifdef CONFIG_SMP
 	int i;
 	struct sfi_timer_table_entry *p_mtmr;
+	unsigned int percpu_timer;
 	struct apbt_dev *adev;
 #endif
 
@@ -345,10 +346,13 @@ void __init apbt_time_init(void)
 		return;
 	}
 	pr_debug("%s: %d CPUs online\n", __func__, num_online_cpus());
-	if (num_possible_cpus() <= sfi_mtimer_num)
+	if (num_possible_cpus() <= sfi_mtimer_num) {
+		percpu_timer = 1;
 		apbt_num_timers_used = num_possible_cpus();
-	else
+	} else {
+		percpu_timer = 0;
 		apbt_num_timers_used = 1;
+	}
 	pr_debug("%s: %d APB timers used\n", __func__, apbt_num_timers_used);
 
 	/* here we set up per CPU timer data structure */
diff --git a/trunk/arch/x86/kernel/apic/apic.c b/trunk/arch/x86/kernel/apic/apic.c
index a5b4dce1b7ac..b994cc84aa7e 100644
--- a/trunk/arch/x86/kernel/apic/apic.c
+++ b/trunk/arch/x86/kernel/apic/apic.c
@@ -1477,7 +1477,8 @@ void __init bsp_end_local_APIC_setup(void)
 	 * Now that local APIC setup is completed for BP, configure the fault
 	 * handling for interrupt remapping.
 	 */
-	irq_remap_enable_fault_handling();
+	if (irq_remapping_enabled)
+		irq_remap_enable_fault_handling();
 
 }
 
@@ -2250,7 +2251,8 @@ static int lapic_suspend(void)
 	local_irq_save(flags);
 	disable_local_APIC();
 
-	irq_remapping_disable();
+	if (irq_remapping_enabled)
+		irq_remapping_disable();
 
 	local_irq_restore(flags);
 	return 0;
@@ -2266,15 +2268,16 @@ static void lapic_resume(void)
 		return;
 
 	local_irq_save(flags);
-
-	/*
-	 * IO-APIC and PIC have their own resume routines.
-	 * We just mask them here to make sure the interrupt
-	 * subsystem is completely quiet while we enable x2apic
-	 * and interrupt-remapping.
-	 */
-	mask_ioapic_entries();
-	legacy_pic->mask_all();
+	if (irq_remapping_enabled) {
+		/*
+		 * IO-APIC and PIC have their own resume routines.
+		 * We just mask them here to make sure the interrupt
+		 * subsystem is completely quiet while we enable x2apic
+		 * and interrupt-remapping.
+		 */
+		mask_ioapic_entries();
+		legacy_pic->mask_all();
+	}
 
 	if (x2apic_mode)
 		enable_x2apic();
@@ -2317,7 +2320,8 @@ static void lapic_resume(void)
 	apic_write(APIC_ESR, 0);
 	apic_read(APIC_ESR);
 
-	irq_remapping_reenable(x2apic_mode);
+	if (irq_remapping_enabled)
+		irq_remapping_reenable(x2apic_mode);
 
 	local_irq_restore(flags);
 }
diff --git a/trunk/arch/x86/kernel/apic/io_apic.c b/trunk/arch/x86/kernel/apic/io_apic.c
index 9ed796ccc32c..b739d398bb29 100644
--- a/trunk/arch/x86/kernel/apic/io_apic.c
+++ b/trunk/arch/x86/kernel/apic/io_apic.c
@@ -68,6 +68,22 @@
 #define for_each_irq_pin(entry, head) \
 	for (entry = head; entry; entry = entry->next)
 
+#ifdef CONFIG_IRQ_REMAP
+static void irq_remap_modify_chip_defaults(struct irq_chip *chip);
+static inline bool irq_remapped(struct irq_cfg *cfg)
+{
+	return cfg->irq_2_iommu.iommu != NULL;
+}
+#else
+static inline bool irq_remapped(struct irq_cfg *cfg)
+{
+	return false;
+}
+static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip)
+{
+}
+#endif
+
 /*
  *      Is the SiS APIC rmw bug present ?
  *      -1 = don't know, 0 = no, 1 = yes
@@ -284,9 +300,9 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
 	return cfg;
 }
 
-static int alloc_irqs_from(unsigned int from, unsigned int count, int node)
+static int alloc_irq_from(unsigned int from, int node)
 {
-	return irq_alloc_descs_from(from, count, node);
+	return irq_alloc_desc_from(from, node);
 }
 
 static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
@@ -310,7 +326,7 @@ static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
 		+ (mpc_ioapic_addr(idx) & ~PAGE_MASK);
 }
 
-void io_apic_eoi(unsigned int apic, unsigned int vector)
+static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
 {
 	struct io_apic __iomem *io_apic = io_apic_base(apic);
 	writel(vector, &io_apic->eoi);
@@ -557,10 +573,19 @@ static void unmask_ioapic_irq(struct irq_data *data)
  * Otherwise, we simulate the EOI message manually by changing the trigger
  * mode to edge and then back to level, with RTE being masked during this.
  */
-void native_eoi_ioapic_pin(int apic, int pin, int vector)
+static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg)
 {
 	if (mpc_ioapic_ver(apic) >= 0x20) {
-		io_apic_eoi(apic, vector);
+		/*
+		 * Intr-remapping uses pin number as the virtual vector
+		 * in the RTE. Actual vector is programmed in
+		 * intr-remapping table entry. Hence for the io-apic
+		 * EOI we use the pin number.
+		 */
+		if (cfg && irq_remapped(cfg))
+			io_apic_eoi(apic, pin);
+		else
+			io_apic_eoi(apic, vector);
 	} else {
 		struct IO_APIC_route_entry entry, entry1;
 
@@ -581,15 +606,14 @@ void native_eoi_ioapic_pin(int apic, int pin, int vector)
 	}
 }
 
-void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
 {
 	struct irq_pin_list *entry;
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
 	for_each_irq_pin(entry, cfg->irq_2_pin)
-		x86_io_apic_ops.eoi_ioapic_pin(entry->apic, entry->pin,
-					       cfg->vector);
+		__eoi_ioapic_pin(entry->apic, entry->pin, cfg->vector, cfg);
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
@@ -626,7 +650,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
 		}
 
 		raw_spin_lock_irqsave(&ioapic_lock, flags);
-		x86_io_apic_ops.eoi_ioapic_pin(apic, pin, entry.vector);
+		__eoi_ioapic_pin(apic, pin, entry.vector, NULL);
 		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 	}
 
@@ -1280,18 +1304,25 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
 		fasteoi = false;
 	}
 
-	if (setup_remapped_irq(irq, cfg, chip))
+	if (irq_remapped(cfg)) {
+		irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
+		irq_remap_modify_chip_defaults(chip);
 		fasteoi = trigger != 0;
+	}
 
 	hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq;
 	irq_set_chip_and_handler_name(irq, chip, hdl,
 				      fasteoi ? "fasteoi" : "edge");
 }
 
-int native_setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
-			      unsigned int destination, int vector,
-			      struct io_apic_irq_attr *attr)
+static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
+			       unsigned int destination, int vector,
+			       struct io_apic_irq_attr *attr)
 {
+	if (irq_remapping_enabled)
+		return setup_ioapic_remapped_entry(irq, entry, destination,
+						   vector, attr);
+
 	memset(entry, 0, sizeof(*entry));
 
 	entry->delivery_mode = apic->irq_delivery_mode;
@@ -1339,8 +1370,8 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
 		    attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin,
 		    cfg->vector, irq, attr->trigger, attr->polarity, dest);
 
-	if (x86_io_apic_ops.setup_entry(irq, &entry, dest, cfg->vector, attr)) {
-		pr_warn("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
+	if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) {
+		pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n",
 			mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
 		__clear_irq_vector(irq, cfg);
 
@@ -1448,6 +1479,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
 	struct IO_APIC_route_entry entry;
 	unsigned int dest;
 
+	if (irq_remapping_enabled)
+		return;
+
 	memset(&entry, 0, sizeof(entry));
 
 	/*
@@ -1479,63 +1513,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
 	ioapic_write_entry(ioapic_idx, pin, entry);
 }
 
-void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries)
-{
-	int i;
-
-	pr_debug(" NR Dst Mask Trig IRR Pol Stat Dmod Deli Vect:\n");
-
-	for (i = 0; i <= nr_entries; i++) {
-		struct IO_APIC_route_entry entry;
-
-		entry = ioapic_read_entry(apic, i);
-
-		pr_debug(" %02x %02X  ", i, entry.dest);
-		pr_cont("%1d    %1d    %1d   %1d   %1d    "
-			"%1d    %1d    %02X\n",
-			entry.mask,
-			entry.trigger,
-			entry.irr,
-			entry.polarity,
-			entry.delivery_status,
-			entry.dest_mode,
-			entry.delivery_mode,
-			entry.vector);
-	}
-}
-
-void intel_ir_io_apic_print_entries(unsigned int apic,
-				    unsigned int nr_entries)
-{
-	int i;
-
-	pr_debug(" NR Indx Fmt Mask Trig IRR Pol Stat Indx2 Zero Vect:\n");
-
-	for (i = 0; i <= nr_entries; i++) {
-		struct IR_IO_APIC_route_entry *ir_entry;
-		struct IO_APIC_route_entry entry;
-
-		entry = ioapic_read_entry(apic, i);
-
-		ir_entry = (struct IR_IO_APIC_route_entry *)&entry;
-
-		pr_debug(" %02x %04X ", i, ir_entry->index);
-		pr_cont("%1d   %1d    %1d    %1d   %1d   "
-			"%1d    %1d     %X    %02X\n",
-			ir_entry->format,
-			ir_entry->mask,
-			ir_entry->trigger,
-			ir_entry->irr,
-			ir_entry->polarity,
-			ir_entry->delivery_status,
-			ir_entry->index2,
-			ir_entry->zero,
-			ir_entry->vector);
-	}
-}
-
 __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
 {
+	int i;
 	union IO_APIC_reg_00 reg_00;
 	union IO_APIC_reg_01 reg_01;
 	union IO_APIC_reg_02 reg_02;
@@ -1588,7 +1568,58 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
 
 	printk(KERN_DEBUG ".... IRQ redirection table:\n");
 
-	x86_io_apic_ops.print_entries(ioapic_idx, reg_01.bits.entries);
+	if (irq_remapping_enabled) {
+		printk(KERN_DEBUG " NR Indx Fmt Mask Trig IRR"
+			" Pol Stat Indx2 Zero Vect:\n");
+	} else {
+		printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
+			" Stat Dmod Deli Vect:\n");
+	}
+
+	for (i = 0; i <= reg_01.bits.entries; i++) {
+		if (irq_remapping_enabled) {
+			struct IO_APIC_route_entry entry;
+			struct IR_IO_APIC_route_entry *ir_entry;
+
+			entry = ioapic_read_entry(ioapic_idx, i);
+			ir_entry = (struct IR_IO_APIC_route_entry *) &entry;
+			printk(KERN_DEBUG " %02x %04X ",
+				i,
+				ir_entry->index
+			);
+			pr_cont("%1d   %1d    %1d    %1d   %1d   "
+				"%1d    %1d     %X    %02X\n",
+				ir_entry->format,
+				ir_entry->mask,
+				ir_entry->trigger,
+				ir_entry->irr,
+				ir_entry->polarity,
+				ir_entry->delivery_status,
+				ir_entry->index2,
+				ir_entry->zero,
+				ir_entry->vector
+			);
+		} else {
+			struct IO_APIC_route_entry entry;
+
+			entry = ioapic_read_entry(ioapic_idx, i);
+			printk(KERN_DEBUG " %02x %02X  ",
+				i,
+				entry.dest
+			);
+			pr_cont("%1d    %1d    %1d   %1d   %1d    "
+				"%1d    %1d    %02X\n",
+				entry.mask,
+				entry.trigger,
+				entry.irr,
+				entry.polarity,
+				entry.delivery_status,
+				entry.dest_mode,
+				entry.delivery_mode,
+				entry.vector
+			);
+		}
+	}
 }
 
 __apicdebuginit(void) print_IO_APICs(void)
@@ -1890,14 +1921,30 @@ void __init enable_IO_APIC(void)
 	clear_IO_APIC();
 }
 
-void native_disable_io_apic(void)
+/*
+ * Not an __init, needed by the reboot code
+ */
+void disable_IO_APIC(void)
 {
+	/*
+	 * Clear the IO-APIC before rebooting:
+	 */
+	clear_IO_APIC();
+
+	if (!legacy_pic->nr_legacy_irqs)
+		return;
+
 	/*
 	 * If the i8259 is routed through an IOAPIC
 	 * Put that IOAPIC in virtual wire mode
 	 * so legacy interrupts can be delivered.
+	 *
+	 * With interrupt-remapping, for now we will use virtual wire A mode,
+	 * as virtual wire B is little complex (need to configure both
+	 * IOAPIC RTE as well as interrupt-remapping table entry).
+	 * As this gets called during crash dump, keep this simple for now.
 	 */
-	if (ioapic_i8259.pin != -1) {
+	if (ioapic_i8259.pin != -1 && !irq_remapping_enabled) {
 		struct IO_APIC_route_entry entry;
 
 		memset(&entry, 0, sizeof(entry));
@@ -1917,25 +1964,12 @@ void native_disable_io_apic(void)
 		ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
 	}
 
-	if (cpu_has_apic || apic_from_smp_config())
-		disconnect_bsp_APIC(ioapic_i8259.pin != -1);
-
-}
-
-/*
- * Not an __init, needed by the reboot code
- */
-void disable_IO_APIC(void)
-{
 	/*
-	 * Clear the IO-APIC before rebooting:
+	 * Use virtual wire A mode when interrupt remapping is enabled.
 	 */
-	clear_IO_APIC();
-
-	if (!legacy_pic->nr_legacy_irqs)
-		return;
-
-	x86_io_apic_ops.disable();
+	if (cpu_has_apic || apic_from_smp_config())
+		disconnect_bsp_APIC(!irq_remapping_enabled &&
+				ioapic_i8259.pin != -1);
 }
 
 #ifdef CONFIG_X86_32
@@ -2288,8 +2322,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
 
 		apic = entry->apic;
 		pin = entry->pin;
-
-		io_apic_write(apic, 0x11 + pin*2, dest);
+		/*
+		 * With interrupt-remapping, destination information comes
+		 * from interrupt-remapping table entry.
+		 */
+		if (!irq_remapped(cfg))
+			io_apic_write(apic, 0x11 + pin*2, dest);
 		reg = io_apic_read(apic, 0x10 + pin*2);
 		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
 		reg |= vector;
@@ -2331,10 +2369,9 @@ int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
 	return 0;
 }
 
-
-int native_ioapic_set_affinity(struct irq_data *data,
-			       const struct cpumask *mask,
-			       bool force)
+static int
+ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+		    bool force)
 {
 	unsigned int dest, irq = data->irq;
 	unsigned long flags;
@@ -2511,6 +2548,33 @@ static void ack_apic_level(struct irq_data *data)
 	ioapic_irqd_unmask(data, cfg, masked);
 }
 
+#ifdef CONFIG_IRQ_REMAP
+static void ir_ack_apic_edge(struct irq_data *data)
+{
+	ack_APIC_irq();
+}
+
+static void ir_ack_apic_level(struct irq_data *data)
+{
+	ack_APIC_irq();
+	eoi_ioapic_irq(data->irq, data->chip_data);
+}
+
+static void ir_print_prefix(struct irq_data *data, struct seq_file *p)
+{
+	seq_printf(p, " IR-%s", data->chip->name);
+}
+
+static void irq_remap_modify_chip_defaults(struct irq_chip *chip)
+{
+	chip->irq_print_chip = ir_print_prefix;
+	chip->irq_ack = ir_ack_apic_edge;
+	chip->irq_eoi = ir_ack_apic_level;
+
+	chip->irq_set_affinity = set_remapped_irq_affinity;
+}
+#endif /* CONFIG_IRQ_REMAP */
+
 static struct irq_chip ioapic_chip __read_mostly = {
 	.name			= "IO-APIC",
 	.irq_startup		= startup_ioapic_irq,
@@ -2518,7 +2582,7 @@ static struct irq_chip ioapic_chip __read_mostly = {
 	.irq_unmask		= unmask_ioapic_irq,
 	.irq_ack		= ack_apic_edge,
 	.irq_eoi		= ack_apic_level,
-	.irq_set_affinity	= native_ioapic_set_affinity,
+	.irq_set_affinity	= ioapic_set_affinity,
 	.irq_retrigger		= ioapic_retrigger_irq,
 };
 
@@ -2717,7 +2781,8 @@ static inline void __init check_timer(void)
 	 * 8259A.
 	 */
 	if (pin1 == -1) {
-		panic_if_irq_remap("BIOS bug: timer not connected to IO-APIC");
+		if (irq_remapping_enabled)
+			panic("BIOS bug: timer not connected to IO-APIC");
 		pin1 = pin2;
 		apic1 = apic2;
 		no_pin1 = 1;
@@ -2749,7 +2814,8 @@ static inline void __init check_timer(void)
 				clear_IO_APIC_pin(0, pin1);
 			goto out;
 		}
-		panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC");
+		if (irq_remapping_enabled)
+			panic("timer doesn't work through Interrupt-remapped IO-APIC");
 		local_irq_disable();
 		clear_IO_APIC_pin(apic1, pin1);
 		if (!no_pin1)
@@ -2916,58 +2982,37 @@ device_initcall(ioapic_init_ops);
 /*
  * Dynamic irq allocate and deallocation
  */
-unsigned int __create_irqs(unsigned int from, unsigned int count, int node)
+unsigned int create_irq_nr(unsigned int from, int node)
 {
-	struct irq_cfg **cfg;
+	struct irq_cfg *cfg;
 	unsigned long flags;
-	int irq, i;
+	unsigned int ret = 0;
+	int irq;
 
 	if (from < nr_irqs_gsi)
 		from = nr_irqs_gsi;
 
-	cfg = kzalloc_node(count * sizeof(cfg[0]), GFP_KERNEL, node);
-	if (!cfg)
-		return 0;
-
-	irq = alloc_irqs_from(from, count, node);
+	irq = alloc_irq_from(from, node);
 	if (irq < 0)
-		goto out_cfgs;
-
-	for (i = 0; i < count; i++) {
-		cfg[i] = alloc_irq_cfg(irq + i, node);
-		if (!cfg[i])
-			goto out_irqs;
+		return 0;
+	cfg = alloc_irq_cfg(irq, node);
+	if (!cfg) {
+		free_irq_at(irq, NULL);
+		return 0;
 	}
 
 	raw_spin_lock_irqsave(&vector_lock, flags);
-	for (i = 0; i < count; i++)
-		if (__assign_irq_vector(irq + i, cfg[i], apic->target_cpus()))
-			goto out_vecs;
+	if (!__assign_irq_vector(irq, cfg, apic->target_cpus()))
+		ret = irq;
 	raw_spin_unlock_irqrestore(&vector_lock, flags);
 
-	for (i = 0; i < count; i++) {
-		irq_set_chip_data(irq + i, cfg[i]);
-		irq_clear_status_flags(irq + i, IRQ_NOREQUEST);
+	if (ret) {
+		irq_set_chip_data(irq, cfg);
+		irq_clear_status_flags(irq, IRQ_NOREQUEST);
+	} else {
+		free_irq_at(irq, cfg);
 	}
-
-	kfree(cfg);
-	return irq;
-
-out_vecs:
-	for (i--; i >= 0; i--)
-		__clear_irq_vector(irq + i, cfg[i]);
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
-out_irqs:
-	for (i = 0; i < count; i++)
-		free_irq_at(irq + i, cfg[i]);
-out_cfgs:
-	kfree(cfg);
-	return 0;
-}
-
-unsigned int create_irq_nr(unsigned int from, int node)
-{
-	return __create_irqs(from, 1, node);
+	return ret;
 }
 
 int create_irq(void)
@@ -2992,35 +3037,48 @@ void destroy_irq(unsigned int irq)
 
 	irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
 
-	free_remapped_irq(irq);
-
+	if (irq_remapped(cfg))
+		free_remapped_irq(irq);
 	raw_spin_lock_irqsave(&vector_lock, flags);
 	__clear_irq_vector(irq, cfg);
 	raw_spin_unlock_irqrestore(&vector_lock, flags);
 	free_irq_at(irq, cfg);
 }
 
-void destroy_irqs(unsigned int irq, unsigned int count)
-{
-	unsigned int i;
-
-	for (i = 0; i < count; i++)
-		destroy_irq(irq + i);
-}
-
 /*
  * MSI message composition
  */
-void native_compose_msi_msg(struct pci_dev *pdev,
-			    unsigned int irq, unsigned int dest,
-			    struct msi_msg *msg, u8 hpet_id)
+#ifdef CONFIG_PCI_MSI
+static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
+			   struct msi_msg *msg, u8 hpet_id)
 {
-	struct irq_cfg *cfg = irq_cfg(irq);
+	struct irq_cfg *cfg;
+	int err;
+	unsigned dest;
+
+	if (disable_apic)
+		return -ENXIO;
+
+	cfg = irq_cfg(irq);
+	err = assign_irq_vector(irq, cfg, apic->target_cpus());
+	if (err)
+		return err;
 
-	msg->address_hi = MSI_ADDR_BASE_HI;
+	err = apic->cpu_mask_to_apicid_and(cfg->domain,
+					   apic->target_cpus(), &dest);
+	if (err)
+		return err;
+
+	if (irq_remapped(cfg)) {
+		compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id);
+		return err;
+	}
 
 	if (x2apic_enabled())
-		msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest);
+		msg->address_hi = MSI_ADDR_BASE_HI |
+				  MSI_ADDR_EXT_DEST_ID(dest);
+	else
+		msg->address_hi = MSI_ADDR_BASE_HI;
 
 	msg->address_lo =
 		MSI_ADDR_BASE_LO |
@@ -3039,32 +3097,8 @@ void native_compose_msi_msg(struct pci_dev *pdev,
 			MSI_DATA_DELIVERY_FIXED:
 			MSI_DATA_DELIVERY_LOWPRI) |
 		MSI_DATA_VECTOR(cfg->vector);
-}
-
-#ifdef CONFIG_PCI_MSI
-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
-			   struct msi_msg *msg, u8 hpet_id)
-{
-	struct irq_cfg *cfg;
-	int err;
-	unsigned dest;
-
-	if (disable_apic)
-		return -ENXIO;
 
-	cfg = irq_cfg(irq);
-	err = assign_irq_vector(irq, cfg, apic->target_cpus());
-	if (err)
-		return err;
-
-	err = apic->cpu_mask_to_apicid_and(cfg->domain,
-					   apic->target_cpus(), &dest);
-	if (err)
-		return err;
-
-	x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id);
-
-	return 0;
+	return err;
 }
 
 static int
@@ -3102,28 +3136,23 @@ static struct irq_chip msi_chip = {
 	.irq_retrigger		= ioapic_retrigger_irq,
 };
 
-int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
-		  unsigned int irq_base, unsigned int irq_offset)
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
 {
 	struct irq_chip *chip = &msi_chip;
 	struct msi_msg msg;
-	unsigned int irq = irq_base + irq_offset;
 	int ret;
 
 	ret = msi_compose_msg(dev, irq, &msg, -1);
 	if (ret < 0)
 		return ret;
 
-	irq_set_msi_desc_off(irq_base, irq_offset, msidesc);
-
-	/*
-	 * MSI-X message is written per-IRQ, the offset is always 0.
-	 * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
-	 */
-	if (!irq_offset)
-		write_msi_msg(irq, &msg);
+	irq_set_msi_desc(irq, msidesc);
+	write_msi_msg(irq, &msg);
 
-	setup_remapped_irq(irq, irq_get_chip_data(irq), chip);
+	if (irq_remapped(irq_get_chip_data(irq))) {
+		irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
+		irq_remap_modify_chip_defaults(chip);
+	}
 
 	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
 
@@ -3134,26 +3163,46 @@ int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
 
 int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
+	int node, ret, sub_handle, index = 0;
 	unsigned int irq, irq_want;
 	struct msi_desc *msidesc;
-	int node, ret;
 
-	/* Multiple MSI vectors only supported with interrupt remapping */
+	/* x86 doesn't support multiple MSI yet */
 	if (type == PCI_CAP_ID_MSI && nvec > 1)
 		return 1;
 
 	node = dev_to_node(&dev->dev);
 	irq_want = nr_irqs_gsi;
+	sub_handle = 0;
 	list_for_each_entry(msidesc, &dev->msi_list, list) {
 		irq = create_irq_nr(irq_want, node);
 		if (irq == 0)
-			return -ENOSPC;
-
+			return -1;
 		irq_want = irq + 1;
+		if (!irq_remapping_enabled)
+			goto no_ir;
 
-		ret = setup_msi_irq(dev, msidesc, irq, 0);
+		if (!sub_handle) {
+			/*
+			 * allocate the consecutive block of IRTE's
+			 * for 'nvec'
+			 */
+			index = msi_alloc_remapped_irq(dev, irq, nvec);
+			if (index < 0) {
+				ret = index;
+				goto error;
+			}
+		} else {
+			ret = msi_setup_remapped_irq(dev, irq, index,
+						     sub_handle);
+			if (ret < 0)
+				goto error;
+		}
+no_ir:
+		ret = setup_msi_irq(dev, msidesc, irq);
 		if (ret < 0)
 			goto error;
+		sub_handle++;
 	}
 	return 0;
 
@@ -3249,19 +3298,26 @@ static struct irq_chip hpet_msi_type = {
 	.irq_retrigger = ioapic_retrigger_irq,
 };
 
-int default_setup_hpet_msi(unsigned int irq, unsigned int id)
+int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
 {
 	struct irq_chip *chip = &hpet_msi_type;
 	struct msi_msg msg;
 	int ret;
 
+	if (irq_remapping_enabled) {
+		ret = setup_hpet_msi_remapped(irq, id);
+		if (ret)
+			return ret;
+	}
+
 	ret = msi_compose_msg(NULL, irq, &msg, id);
 	if (ret < 0)
 		return ret;
 
 	hpet_msi_write(irq_get_handler_data(irq), &msg);
 	irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
-	setup_remapped_irq(irq, irq_get_chip_data(irq), chip);
+	if (irq_remapped(irq_get_chip_data(irq)))
+		irq_remap_modify_chip_defaults(chip);
 
 	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
 	return 0;
@@ -3627,7 +3683,10 @@ void __init setup_ioapic_dest(void)
 		else
 			mask = apic->target_cpus();
 
-		x86_io_apic_ops.set_affinity(idata, mask, false);
+		if (irq_remapping_enabled)
+			set_remapped_irq_affinity(idata, mask, false);
+		else
+			ioapic_set_affinity(idata, mask, false);
 	}
 
 }
diff --git a/trunk/arch/x86/kernel/apic/ipi.c b/trunk/arch/x86/kernel/apic/ipi.c
index 7434d8556d09..cce91bf26676 100644
--- a/trunk/arch/x86/kernel/apic/ipi.c
+++ b/trunk/arch/x86/kernel/apic/ipi.c
@@ -106,7 +106,7 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
 	unsigned long mask = cpumask_bits(cpumask)[0];
 	unsigned long flags;
 
-	if (!mask)
+	if (WARN_ONCE(!mask, "empty IPI mask"))
 		return;
 
 	local_irq_save(flags);
diff --git a/trunk/arch/x86/kernel/apic/x2apic_phys.c b/trunk/arch/x86/kernel/apic/x2apic_phys.c
index 562a76d433c8..e03a1e180e81 100644
--- a/trunk/arch/x86/kernel/apic/x2apic_phys.c
+++ b/trunk/arch/x86/kernel/apic/x2apic_phys.c
@@ -20,19 +20,18 @@ static int set_x2apic_phys_mode(char *arg)
 }
 early_param("x2apic_phys", set_x2apic_phys_mode);
 
-static bool x2apic_fadt_phys(void)
+static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
-	if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) &&
-		(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
+	if (x2apic_phys)
+		return x2apic_enabled();
+	else if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) &&
+		(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) &&
+		x2apic_enabled()) {
 		printk(KERN_DEBUG "System requires x2apic physical mode\n");
-		return true;
+		return 1;
 	}
-	return false;
-}
-
-static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
-	return x2apic_enabled() && (x2apic_phys || x2apic_fadt_phys());
+	else
+		return 0;
 }
 
 static void
@@ -83,7 +82,7 @@ static void init_x2apic_ldr(void)
 
 static int x2apic_phys_probe(void)
 {
-	if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys()))
+	if (x2apic_mode && x2apic_phys)
 		return 1;
 
 	return apic == &apic_x2apic_phys;
diff --git a/trunk/arch/x86/kernel/apm_32.c b/trunk/arch/x86/kernel/apm_32.c
index 8d7012b7f402..d65464e43503 100644
--- a/trunk/arch/x86/kernel/apm_32.c
+++ b/trunk/arch/x86/kernel/apm_32.c
@@ -899,7 +899,6 @@ static void apm_cpu_idle(void)
 	static int use_apm_idle; /* = 0 */
 	static unsigned int last_jiffies; /* = 0 */
 	static unsigned int last_stime; /* = 0 */
-	cputime_t stime;
 
 	int apm_idle_done = 0;
 	unsigned int jiffies_since_last_check = jiffies - last_jiffies;
@@ -907,23 +906,23 @@ static void apm_cpu_idle(void)
 
 	WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012");
 recalc:
-	task_cputime(current, NULL, &stime);
 	if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
 		use_apm_idle = 0;
+		last_jiffies = jiffies;
+		last_stime = current->stime;
 	} else if (jiffies_since_last_check > idle_period) {
 		unsigned int idle_percentage;
 
-		idle_percentage = stime - last_stime;
+		idle_percentage = current->stime - last_stime;
 		idle_percentage *= 100;
 		idle_percentage /= jiffies_since_last_check;
 		use_apm_idle = (idle_percentage > idle_threshold);
 		if (apm_info.forbid_idle)
 			use_apm_idle = 0;
+		last_jiffies = jiffies;
+		last_stime = current->stime;
 	}
 
-	last_jiffies = jiffies;
-	last_stime = stime;
-
 	bucket = IDLE_LEAKY_MAX;
 
 	while (!need_resched()) {
diff --git a/trunk/arch/x86/kernel/cpu/hypervisor.c b/trunk/arch/x86/kernel/cpu/hypervisor.c
index 1e7e84a02eba..a8f8fa9769d6 100644
--- a/trunk/arch/x86/kernel/cpu/hypervisor.c
+++ b/trunk/arch/x86/kernel/cpu/hypervisor.c
@@ -79,10 +79,3 @@ void __init init_hypervisor_platform(void)
 	if (x86_hyper->init_platform)
 		x86_hyper->init_platform();
 }
-
-bool __init hypervisor_x2apic_available(void)
-{
-	return x86_hyper                   &&
-	       x86_hyper->x2apic_available &&
-	       x86_hyper->x2apic_available();
-}
diff --git a/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c b/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c
index 7c6f7d548c0f..84c1309c4c0c 100644
--- a/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/trunk/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -1226,7 +1226,7 @@ static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = {
 	.notifier_call = cacheinfo_cpu_callback,
 };
 
-static int __init cache_sysfs_init(void)
+static int __cpuinit cache_sysfs_init(void)
 {
 	int i;
 
diff --git a/trunk/arch/x86/kernel/cpu/mshyperv.c b/trunk/arch/x86/kernel/cpu/mshyperv.c
index 0a630dd4b620..646d192b18a2 100644
--- a/trunk/arch/x86/kernel/cpu/mshyperv.c
+++ b/trunk/arch/x86/kernel/cpu/mshyperv.c
@@ -68,7 +68,8 @@ static void __init ms_hyperv_init_platform(void)
 	printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n",
 	       ms_hyperv.features, ms_hyperv.hints);
 
-	clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100);
+	if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
+		clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100);
 }
 
 const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
diff --git a/trunk/arch/x86/kernel/cpu/perf_event.c b/trunk/arch/x86/kernel/cpu/perf_event.c
index bf0f01aea994..6774c17a5576 100644
--- a/trunk/arch/x86/kernel/cpu/perf_event.c
+++ b/trunk/arch/x86/kernel/cpu/perf_event.c
@@ -829,7 +829,7 @@ static inline void x86_assign_hw_event(struct perf_event *event,
 	} else {
 		hwc->config_base = x86_pmu_config_addr(hwc->idx);
 		hwc->event_base  = x86_pmu_event_addr(hwc->idx);
-		hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx);
+		hwc->event_base_rdpmc = hwc->idx;
 	}
 }
 
@@ -1310,6 +1310,11 @@ static struct attribute_group x86_pmu_format_group = {
 	.attrs = NULL,
 };
 
+struct perf_pmu_events_attr {
+	struct device_attribute attr;
+	u64 id;
+};
+
 /*
  * Remove all undefined events (x86_pmu.event_map(id) == 0)
  * out of events_attr attributes.
@@ -1343,9 +1348,11 @@ static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *at
 #define EVENT_VAR(_id)  event_attr_##_id
 #define EVENT_PTR(_id) &event_attr_##_id.attr.attr
 
-#define EVENT_ATTR(_name, _id)						\
-	PMU_EVENT_ATTR(_name, EVENT_VAR(_id), PERF_COUNT_HW_##_id,	\
-			events_sysfs_show)
+#define EVENT_ATTR(_name, _id)					\
+static struct perf_pmu_events_attr EVENT_VAR(_id) = {		\
+	.attr = __ATTR(_name, 0444, events_sysfs_show, NULL),	\
+	.id   =  PERF_COUNT_HW_##_id,				\
+};
 
 EVENT_ATTR(cpu-cycles,			CPU_CYCLES		);
 EVENT_ATTR(instructions,		INSTRUCTIONS		);
diff --git a/trunk/arch/x86/kernel/cpu/perf_event.h b/trunk/arch/x86/kernel/cpu/perf_event.h
index 7f5c75c2afdd..115c1ea97746 100644
--- a/trunk/arch/x86/kernel/cpu/perf_event.h
+++ b/trunk/arch/x86/kernel/cpu/perf_event.h
@@ -325,8 +325,6 @@ struct x86_pmu {
 	int		(*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
 	unsigned	eventsel;
 	unsigned	perfctr;
-	int		(*addr_offset)(int index, bool eventsel);
-	int		(*rdpmc_index)(int index);
 	u64		(*event_map)(int);
 	int		max_events;
 	int		num_counters;
@@ -448,21 +446,28 @@ extern u64 __read_mostly hw_cache_extra_regs
 
 u64 x86_perf_event_update(struct perf_event *event);
 
-static inline unsigned int x86_pmu_config_addr(int index)
+static inline int x86_pmu_addr_offset(int index)
 {
-	return x86_pmu.eventsel + (x86_pmu.addr_offset ?
-				   x86_pmu.addr_offset(index, true) : index);
+	int offset;
+
+	/* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */
+	alternative_io(ASM_NOP2,
+		       "shll $1, %%eax",
+		       X86_FEATURE_PERFCTR_CORE,
+		       "=a" (offset),
+		       "a"  (index));
+
+	return offset;
 }
 
-static inline unsigned int x86_pmu_event_addr(int index)
+static inline unsigned int x86_pmu_config_addr(int index)
 {
-	return x86_pmu.perfctr + (x86_pmu.addr_offset ?
-				  x86_pmu.addr_offset(index, false) : index);
+	return x86_pmu.eventsel + x86_pmu_addr_offset(index);
 }
 
-static inline int x86_pmu_rdpmc_index(int index)
+static inline unsigned int x86_pmu_event_addr(int index)
 {
-	return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
+	return x86_pmu.perfctr + x86_pmu_addr_offset(index);
 }
 
 int x86_setup_perfctr(struct perf_event *event);
diff --git a/trunk/arch/x86/kernel/cpu/perf_event_amd.c b/trunk/arch/x86/kernel/cpu/perf_event_amd.c
index dfdab42aed27..c93bc4e813a0 100644
--- a/trunk/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/trunk/arch/x86/kernel/cpu/perf_event_amd.c
@@ -132,102 +132,21 @@ static u64 amd_pmu_event_map(int hw_event)
 	return amd_perfmon_event_map[hw_event];
 }
 
-static struct event_constraint *amd_nb_event_constraint;
-
-/*
- * Previously calculated offsets
- */
-static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
-static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;
-static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly;
-
-/*
- * Legacy CPUs:
- *   4 counters starting at 0xc0010000 each offset by 1
- *
- * CPUs with core performance counter extensions:
- *   6 counters starting at 0xc0010200 each offset by 2
- *
- * CPUs with north bridge performance counter extensions:
- *   4 additional counters starting at 0xc0010240 each offset by 2
- *   (indexed right above either one of the above core counters)
- */
-static inline int amd_pmu_addr_offset(int index, bool eventsel)
-{
-	int offset, first, base;
-
-	if (!index)
-		return index;
-
-	if (eventsel)
-		offset = event_offsets[index];
-	else
-		offset = count_offsets[index];
-
-	if (offset)
-		return offset;
-
-	if (amd_nb_event_constraint &&
-	    test_bit(index, amd_nb_event_constraint->idxmsk)) {
-		/*
-		 * calculate the offset of NB counters with respect to
-		 * base eventsel or perfctr
-		 */
-
-		first = find_first_bit(amd_nb_event_constraint->idxmsk,
-				       X86_PMC_IDX_MAX);
-
-		if (eventsel)
-			base = MSR_F15H_NB_PERF_CTL - x86_pmu.eventsel;
-		else
-			base = MSR_F15H_NB_PERF_CTR - x86_pmu.perfctr;
-
-		offset = base + ((index - first) << 1);
-	} else if (!cpu_has_perfctr_core)
-		offset = index;
-	else
-		offset = index << 1;
-
-	if (eventsel)
-		event_offsets[index] = offset;
-	else
-		count_offsets[index] = offset;
-
-	return offset;
-}
-
-static inline int amd_pmu_rdpmc_index(int index)
+static int amd_pmu_hw_config(struct perf_event *event)
 {
-	int ret, first;
-
-	if (!index)
-		return index;
+	int ret;
 
-	ret = rdpmc_indexes[index];
+	/* pass precise event sampling to ibs: */
+	if (event->attr.precise_ip && get_ibs_caps())
+		return -ENOENT;
 
+	ret = x86_pmu_hw_config(event);
 	if (ret)
 		return ret;
 
-	if (amd_nb_event_constraint &&
-	    test_bit(index, amd_nb_event_constraint->idxmsk)) {
-		/*
-		 * according to the mnual, ECX value of the NB counters is
-		 * the index of the NB counter (0, 1, 2 or 3) plus 6
-		 */
-
-		first = find_first_bit(amd_nb_event_constraint->idxmsk,
-				       X86_PMC_IDX_MAX);
-		ret = index - first + 6;
-	} else
-		ret = index;
-
-	rdpmc_indexes[index] = ret;
-
-	return ret;
-}
+	if (has_branch_stack(event))
+		return -EOPNOTSUPP;
 
-static int amd_core_hw_config(struct perf_event *event)
-{
 	if (event->attr.exclude_host && event->attr.exclude_guest)
 		/*
 		 * When HO == GO == 1 the hardware treats that as GO == HO == 0
@@ -237,37 +156,14 @@ static int amd_core_hw_config(struct perf_event *event)
 		event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
 				      ARCH_PERFMON_EVENTSEL_OS);
 	else if (event->attr.exclude_host)
-		event->hw.config |= AMD64_EVENTSEL_GUESTONLY;
+		event->hw.config |= AMD_PERFMON_EVENTSEL_GUESTONLY;
 	else if (event->attr.exclude_guest)
-		event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
-
-	return 0;
-}
-
-/*
- * NB counters do not support the following event select bits:
- *   Host/Guest only
- *   Counter mask
- *   Invert counter mask
- *   Edge detect
- *   OS/User mode
- */
-static int amd_nb_hw_config(struct perf_event *event)
-{
-	/* for NB, we only allow system wide counting mode */
-	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
-		return -EINVAL;
-
-	if (event->attr.exclude_user || event->attr.exclude_kernel ||
-	    event->attr.exclude_host || event->attr.exclude_guest)
-		return -EINVAL;
+		event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY;
 
-	event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
-			      ARCH_PERFMON_EVENTSEL_OS);
+	if (event->attr.type != PERF_TYPE_RAW)
+		return 0;
 
-	if (event->hw.config & ~(AMD64_RAW_EVENT_MASK_NB |
-				 ARCH_PERFMON_EVENTSEL_INT))
-		return -EINVAL;
+	event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
 
 	return 0;
 }
@@ -285,11 +181,6 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc)
 	return (hwc->config & 0xe0) == 0xe0;
 }
 
-static inline int amd_is_perfctr_nb_event(struct hw_perf_event *hwc)
-{
-	return amd_nb_event_constraint && amd_is_nb_event(hwc);
-}
-
 static inline int amd_has_nb(struct cpu_hw_events *cpuc)
 {
 	struct amd_nb *nb = cpuc->amd_nb;
@@ -297,36 +188,19 @@ static inline int amd_has_nb(struct cpu_hw_events *cpuc)
 	return nb && nb->nb_id != -1;
 }
 
-static int amd_pmu_hw_config(struct perf_event *event)
-{
-	int ret;
-
-	/* pass precise event sampling to ibs: */
-	if (event->attr.precise_ip && get_ibs_caps())
-		return -ENOENT;
-
-	if (has_branch_stack(event))
-		return -EOPNOTSUPP;
-
-	ret = x86_pmu_hw_config(event);
-	if (ret)
-		return ret;
-
-	if (event->attr.type == PERF_TYPE_RAW)
-		event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
-
-	if (amd_is_perfctr_nb_event(&event->hw))
-		return amd_nb_hw_config(event);
-
-	return amd_core_hw_config(event);
-}
-
-static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
-					   struct perf_event *event)
+static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
+				      struct perf_event *event)
 {
+	struct hw_perf_event *hwc = &event->hw;
 	struct amd_nb *nb = cpuc->amd_nb;
 	int i;
 
+	/*
+	 * only care about NB events
+	 */
+	if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
+		return;
+
 	/*
 	 * need to scan whole list because event may not have
 	 * been assigned during scheduling
@@ -341,19 +215,6 @@ static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
 	}
 }
 
-static void amd_nb_interrupt_hw_config(struct hw_perf_event *hwc)
-{
-	int core_id = cpu_data(smp_processor_id()).cpu_core_id;
-
-	/* deliver interrupts only to this core */
-	if (hwc->config & ARCH_PERFMON_EVENTSEL_INT) {
-		hwc->config |= AMD64_EVENTSEL_INT_CORE_ENABLE;
-		hwc->config &= ~AMD64_EVENTSEL_INT_CORE_SEL_MASK;
-		hwc->config |= (u64)(core_id) <<
-			AMD64_EVENTSEL_INT_CORE_SEL_SHIFT;
-	}
-}
-
  /*
   * AMD64 NorthBridge events need special treatment because
   * counter access needs to be synchronized across all cores
@@ -386,24 +247,24 @@ static void amd_nb_interrupt_hw_config(struct hw_perf_event *hwc)
   *
   * Given that resources are allocated (cmpxchg), they must be
   * eventually freed for others to use. This is accomplished by
-  * calling __amd_put_nb_event_constraints()
+  * calling amd_put_event_constraints().
   *
   * Non NB events are not impacted by this restriction.
   */
 static struct event_constraint *
-__amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
-			       struct event_constraint *c)
+amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	struct amd_nb *nb = cpuc->amd_nb;
-	struct perf_event *old;
-	int idx, new = -1;
+	struct perf_event *old = NULL;
+	int max = x86_pmu.num_counters;
+	int i, j, k = -1;
 
-	if (!c)
-		c = &unconstrained;
-
-	if (cpuc->is_fake)
-		return c;
+	/*
+	 * if not NB event or no NB, then no constraints
+	 */
+	if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
+		return &unconstrained;
 
 	/*
 	 * detect if already present, if so reuse
@@ -415,36 +276,48 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev
 	 * because of successive calls to x86_schedule_events() from
 	 * hw_perf_group_sched_in() without hw_perf_enable()
 	 */
-	for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
-		if (new == -1 || hwc->idx == idx)
-			/* assign free slot, prefer hwc->idx */
-			old = cmpxchg(nb->owners + idx, NULL, event);
-		else if (nb->owners[idx] == event)
-			/* event already present */
-			old = event;
-		else
-			continue;
-
-		if (old && old != event)
-			continue;
-
-		/* reassign to this slot */
-		if (new != -1)
-			cmpxchg(nb->owners + new, event, NULL);
-		new = idx;
+	for (i = 0; i < max; i++) {
+		/*
+		 * keep track of first free slot
+		 */
+		if (k == -1 && !nb->owners[i])
+			k = i;
 
 		/* already present, reuse */
-		if (old == event)
-			break;
+		if (nb->owners[i] == event)
+			goto done;
 	}
-
-	if (new == -1)
-		return &emptyconstraint;
-
-	if (amd_is_perfctr_nb_event(hwc))
-		amd_nb_interrupt_hw_config(hwc);
-
-	return &nb->event_constraints[new];
+	/*
+	 * not present, so grab a new slot
+	 * starting either at:
+	 */
+	if (hwc->idx != -1) {
+		/* previous assignment */
+		i = hwc->idx;
+	} else if (k != -1) {
+		/* start from free slot found */
+		i = k;
+	} else {
+		/*
+		 * event not found, no slot found in
+		 * first pass, try again from the
+		 * beginning
+		 */
+		i = 0;
+	}
+	j = i;
+	do {
+		old = cmpxchg(nb->owners+i, NULL, event);
+		if (!old)
+			break;
+		if (++i == max)
+			i = 0;
+	} while (i != j);
+done:
+	if (!old)
+		return &nb->event_constraints[i];
+
+	return &emptyconstraint;
 }
 
 static struct amd_nb *amd_alloc_nb(int cpu)
@@ -491,7 +364,7 @@ static void amd_pmu_cpu_starting(int cpu)
 	struct amd_nb *nb;
 	int i, nb_id;
 
-	cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
+	cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY;
 
 	if (boot_cpu_data.x86_max_cores < 2)
 		return;
@@ -534,26 +407,6 @@ static void amd_pmu_cpu_dead(int cpu)
 	}
 }
 
-static struct event_constraint *
-amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
-{
-	/*
-	 * if not NB event or no NB, then no constraints
-	 */
-	if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
-		return &unconstrained;
-
-	return __amd_get_nb_event_constraints(cpuc, event,
-					      amd_nb_event_constraint);
-}
-
-static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
-				      struct perf_event *event)
-{
-	if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))
-		__amd_put_nb_event_constraints(cpuc, event);
-}
-
 PMU_FORMAT_ATTR(event,	"config:0-7,32-35");
 PMU_FORMAT_ATTR(umask,	"config:8-15"	);
 PMU_FORMAT_ATTR(edge,	"config:18"	);
@@ -643,9 +496,6 @@ static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09,
 static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
 static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
 
-static struct event_constraint amd_NBPMC96 = EVENT_CONSTRAINT(0, 0x3C0, 0);
-static struct event_constraint amd_NBPMC74 = EVENT_CONSTRAINT(0, 0xF0, 0);
-
 static struct event_constraint *
 amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
 {
@@ -711,8 +561,8 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev
 			return &amd_f15_PMC20;
 		}
 	case AMD_EVENT_NB:
-		return __amd_get_nb_event_constraints(cpuc, event,
-						      amd_nb_event_constraint);
+		/* not yet implemented */
+		return &emptyconstraint;
 	default:
 		return &emptyconstraint;
 	}
@@ -737,8 +587,6 @@ static __initconst const struct x86_pmu amd_pmu = {
 	.schedule_events	= x86_schedule_events,
 	.eventsel		= MSR_K7_EVNTSEL0,
 	.perfctr		= MSR_K7_PERFCTR0,
-	.addr_offset            = amd_pmu_addr_offset,
-	.rdpmc_index		= amd_pmu_rdpmc_index,
 	.event_map		= amd_pmu_event_map,
 	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
 	.num_counters		= AMD64_NUM_COUNTERS,
@@ -760,7 +608,7 @@ static __initconst const struct x86_pmu amd_pmu = {
 
 static int setup_event_constraints(void)
 {
-	if (boot_cpu_data.x86 == 0x15)
+	if (boot_cpu_data.x86 >= 0x15)
 		x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
 	return 0;
 }
@@ -790,23 +638,6 @@ static int setup_perfctr_core(void)
 	return 0;
 }
 
-static int setup_perfctr_nb(void)
-{
-	if (!cpu_has_perfctr_nb)
-		return -ENODEV;
-
-	x86_pmu.num_counters += AMD64_NUM_COUNTERS_NB;
-
-	if (cpu_has_perfctr_core)
-		amd_nb_event_constraint = &amd_NBPMC96;
-	else
-		amd_nb_event_constraint = &amd_NBPMC74;
-
-	printk(KERN_INFO "perf: AMD northbridge performance counters detected\n");
-
-	return 0;
-}
-
 __init int amd_pmu_init(void)
 {
 	/* Performance-monitoring supported from K7 and later: */
@@ -817,7 +648,6 @@ __init int amd_pmu_init(void)
 
 	setup_event_constraints();
 	setup_perfctr_core();
-	setup_perfctr_nb();
 
 	/* Events are common for all AMDs */
 	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
@@ -848,7 +678,7 @@ void amd_pmu_disable_virt(void)
 	 * SVM is disabled the Guest-only bits still gets set and the counter
 	 * will not count anything.
 	 */
-	cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
+	cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY;
 
 	/* Reload all events */
 	x86_pmu_disable_all();
diff --git a/trunk/arch/x86/kernel/cpu/vmware.c b/trunk/arch/x86/kernel/cpu/vmware.c
index 03a36321ec54..d22d0c4edcfd 100644
--- a/trunk/arch/x86/kernel/cpu/vmware.c
+++ b/trunk/arch/x86/kernel/cpu/vmware.c
@@ -33,9 +33,6 @@
 
 #define VMWARE_PORT_CMD_GETVERSION	10
 #define VMWARE_PORT_CMD_GETHZ		45
-#define VMWARE_PORT_CMD_GETVCPU_INFO	68
-#define VMWARE_PORT_CMD_LEGACY_X2APIC	3
-#define VMWARE_PORT_CMD_VCPU_RESERVED	31
 
 #define VMWARE_PORT(cmd, eax, ebx, ecx, edx)				\
 	__asm__("inl (%%dx)" :						\
@@ -128,20 +125,10 @@ static void __cpuinit vmware_set_cpu_features(struct cpuinfo_x86 *c)
 	set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
 }
 
-/* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */
-static bool __init vmware_legacy_x2apic_available(void)
-{
-	uint32_t eax, ebx, ecx, edx;
-	VMWARE_PORT(GETVCPU_INFO, eax, ebx, ecx, edx);
-	return (eax & (1 << VMWARE_PORT_CMD_VCPU_RESERVED)) == 0 &&
-	       (eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0;
-}
-
 const __refconst struct hypervisor_x86 x86_hyper_vmware = {
 	.name			= "VMware",
 	.detect			= vmware_platform,
 	.set_cpu_features	= vmware_set_cpu_features,
 	.init_platform		= vmware_platform_setup,
-	.x2apic_available	= vmware_legacy_x2apic_available,
 };
 EXPORT_SYMBOL(x86_hyper_vmware);
diff --git a/trunk/arch/x86/kernel/head32.c b/trunk/arch/x86/kernel/head32.c
index 6773c918b8cc..c18f59d10101 100644
--- a/trunk/arch/x86/kernel/head32.c
+++ b/trunk/arch/x86/kernel/head32.c
@@ -18,7 +18,6 @@
 #include <asm/io_apic.h>
 #include <asm/bios_ebda.h>
 #include <asm/tlbflush.h>
-#include <asm/bootparam_utils.h>
 
 static void __init i386_default_early_setup(void)
 {
@@ -31,8 +30,6 @@ static void __init i386_default_early_setup(void)
 
 void __init i386_start_kernel(void)
 {
-	sanitize_boot_params(&boot_params);
-
 	memblock_reserve(__pa_symbol(&_text),
 			 __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
 
diff --git a/trunk/arch/x86/kernel/head64.c b/trunk/arch/x86/kernel/head64.c
index 849fc9e63c2f..037df57a99ac 100644
--- a/trunk/arch/x86/kernel/head64.c
+++ b/trunk/arch/x86/kernel/head64.c
@@ -25,7 +25,6 @@
 #include <asm/kdebug.h>
 #include <asm/e820.h>
 #include <asm/bios_ebda.h>
-#include <asm/bootparam_utils.h>
 
 static void __init zap_identity_mappings(void)
 {
@@ -47,7 +46,6 @@ static void __init copy_bootdata(char *real_mode_data)
 	char * command_line;
 
 	memcpy(&boot_params, real_mode_data, sizeof boot_params);
-	sanitize_boot_params(&boot_params);
 	if (boot_params.hdr.cmd_line_ptr) {
 		command_line = __va(boot_params.hdr.cmd_line_ptr);
 		memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
diff --git a/trunk/arch/x86/kernel/head_32.S b/trunk/arch/x86/kernel/head_32.S
index 3c3f58a0808f..c8932c79e78b 100644
--- a/trunk/arch/x86/kernel/head_32.S
+++ b/trunk/arch/x86/kernel/head_32.S
@@ -307,45 +307,36 @@ default_entry:
 	movl %eax,%cr0
 
 /*
- * We want to start out with EFLAGS unambiguously cleared. Some BIOSes leave
- * bits like NT set. This would confuse the debugger if this code is traced. So
- * initialize them properly now before switching to protected mode. That means
- * DF in particular (even though we have cleared it earlier after copying the
- * command line) because GCC expects it.
- */
-	pushl $0
-	popfl
-
-/*
- * New page tables may be in 4Mbyte page mode and may be using the global pages.
+ *	New page tables may be in 4Mbyte page mode and may
+ *	be using the global pages. 
  *
- * NOTE! If we are on a 486 we may have no cr4 at all! Specifically, cr4 exists
- * if and only if CPUID exists and has flags other than the FPU flag set.
+ *	NOTE! If we are on a 486 we may have no cr4 at all!
+ *	Specifically, cr4 exists if and only if CPUID exists
+ *	and has flags other than the FPU flag set.
  */
-	movl $-1,pa(X86_CPUID)		# preset CPUID level
 	movl $X86_EFLAGS_ID,%ecx
 	pushl %ecx
-	popfl				# set EFLAGS=ID
+	popfl
 	pushfl
-	popl %eax			# get EFLAGS
-	testl $X86_EFLAGS_ID,%eax	# did EFLAGS.ID remained set?
-	jz enable_paging		# hw disallowed setting of ID bit
-					# which means no CPUID and no CR4
-
-	xorl %eax,%eax
-	cpuid
-	movl %eax,pa(X86_CPUID)		# save largest std CPUID function
+	popl %eax
+	pushl $0
+	popfl
+	pushfl
+	popl %edx
+	xorl %edx,%eax
+	testl %ecx,%eax
+	jz 6f			# No ID flag = no CPUID = no CR4
 
 	movl $1,%eax
 	cpuid
-	andl $~1,%edx			# Ignore CPUID.FPU
-	jz enable_paging		# No flags or only CPUID.FPU = no CR4
+	andl $~1,%edx		# Ignore CPUID.FPU
+	jz 6f			# No flags or only CPUID.FPU = no CR4
 
 	movl pa(mmu_cr4_features),%eax
 	movl %eax,%cr4
 
 	testb $X86_CR4_PAE, %al		# check if PAE is enabled
-	jz enable_paging
+	jz 6f
 
 	/* Check if extended functions are implemented */
 	movl $0x80000000, %eax
@@ -353,7 +344,7 @@ default_entry:
 	/* Value must be in the range 0x80000001 to 0x8000ffff */
 	subl $0x80000001, %eax
 	cmpl $(0x8000ffff-0x80000001), %eax
-	ja enable_paging
+	ja 6f
 
 	/* Clear bogus XD_DISABLE bits */
 	call verify_cpu
@@ -362,7 +353,7 @@ default_entry:
 	cpuid
 	/* Execute Disable bit supported? */
 	btl $(X86_FEATURE_NX & 31), %edx
-	jnc enable_paging
+	jnc 6f
 
 	/* Setup EFER (Extended Feature Enable Register) */
 	movl $MSR_EFER, %ecx
@@ -372,7 +363,7 @@ default_entry:
 	/* Make changes effective */
 	wrmsr
 
-enable_paging:
+6:
 
 /*
  * Enable paging
@@ -386,6 +377,14 @@ enable_paging:
 	/* Shift the stack pointer to a virtual address */
 	addl $__PAGE_OFFSET, %esp
 
+/*
+ * Initialize eflags.  Some BIOS's leave bits like NT set.  This would
+ * confuse the debugger if this code is traced.
+ * XXX - best to initialize before switching to protected mode.
+ */
+	pushl $0
+	popfl
+
 /*
  * start system 32-bit setup. We need to re-do some of the things done
  * in 16-bit mode for the "real" operations.
@@ -395,11 +394,31 @@ enable_paging:
 	jz 1f				# Did we do this already?
 	call *%eax
 1:
-
+	
+/* check if it is 486 or 386. */
 /*
- * Check if it is 486
+ * XXX - this does a lot of unnecessary setup.  Alignment checks don't
+ * apply at our cpl of 0 and the stack ought to be aligned already, and
+ * we don't need to preserve eflags.
  */
-	cmpl $-1,X86_CPUID
+	movl $-1,X86_CPUID	# -1 for no CPUID initially
+	movb $3,X86		# at least 386
+	pushfl			# push EFLAGS
+	popl %eax		# get EFLAGS
+	movl %eax,%ecx		# save original EFLAGS
+	xorl $0x240000,%eax	# flip AC and ID bits in EFLAGS
+	pushl %eax		# copy to EFLAGS
+	popfl			# set EFLAGS
+	pushfl			# get new EFLAGS
+	popl %eax		# put it in eax
+	xorl %ecx,%eax		# change in flags
+	pushl %ecx		# restore original EFLAGS
+	popfl
+	testl $0x40000,%eax	# check if AC bit changed
+	je is386
+
+	movb $4,X86		# at least 486
+	testl $0x200000,%eax	# check if ID bit changed
 	je is486
 
 	/* get vendor info */
@@ -425,10 +444,11 @@ enable_paging:
 	movb %cl,X86_MASK
 	movl %edx,X86_CAPABILITY
 
-is486:
-	movb $4,X86
-	movl $0x50022,%ecx	# set AM, WP, NE and MP
-	movl %cr0,%eax
+is486:	movl $0x50022,%ecx	# set AM, WP, NE and MP
+	jmp 2f
+
+is386:	movl $2,%ecx		# set MP
+2:	movl %cr0,%eax
 	andl $0x80000011,%eax	# Save PG,PE,ET
 	orl %ecx,%eax
 	movl %eax,%cr0
@@ -453,6 +473,7 @@ is486:
 	xorl %eax,%eax			# Clear LDT
 	lldt %ax
 
+	cld			# gcc2 wants the direction flag cleared at all times
 	pushl $0		# fake return address for unwinder
 	jmp *(initial_code)
 
diff --git a/trunk/arch/x86/kernel/hpet.c b/trunk/arch/x86/kernel/hpet.c
index da85a8e830a1..e28670f9a589 100644
--- a/trunk/arch/x86/kernel/hpet.c
+++ b/trunk/arch/x86/kernel/hpet.c
@@ -478,7 +478,7 @@ static int hpet_msi_next_event(unsigned long delta,
 
 static int hpet_setup_msi_irq(unsigned int irq)
 {
-	if (x86_msi.setup_hpet_msi(irq, hpet_blockid)) {
+	if (arch_setup_hpet_msi(irq, hpet_blockid)) {
 		destroy_irq(irq);
 		return -EINVAL;
 	}
diff --git a/trunk/arch/x86/kernel/kprobes/common.h b/trunk/arch/x86/kernel/kprobes-common.h
similarity index 90%
rename from trunk/arch/x86/kernel/kprobes/common.h
rename to trunk/arch/x86/kernel/kprobes-common.h
index 2e9d4b5af036..3230b68ef29a 100644
--- a/trunk/arch/x86/kernel/kprobes/common.h
+++ b/trunk/arch/x86/kernel/kprobes-common.h
@@ -99,15 +99,4 @@ static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsig
 	return addr;
 }
 #endif
-
-#ifdef CONFIG_KPROBES_ON_FTRACE
-extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
-			   struct kprobe_ctlblk *kcb);
-#else
-static inline int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
-				  struct kprobe_ctlblk *kcb)
-{
-	return 0;
-}
-#endif
 #endif
diff --git a/trunk/arch/x86/kernel/kprobes/opt.c b/trunk/arch/x86/kernel/kprobes-opt.c
similarity index 99%
rename from trunk/arch/x86/kernel/kprobes/opt.c
rename to trunk/arch/x86/kernel/kprobes-opt.c
index 76dc6f095724..c5e410eed403 100644
--- a/trunk/arch/x86/kernel/kprobes/opt.c
+++ b/trunk/arch/x86/kernel/kprobes-opt.c
@@ -37,7 +37,7 @@
 #include <asm/insn.h>
 #include <asm/debugreg.h>
 
-#include "common.h"
+#include "kprobes-common.h"
 
 unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
 {
diff --git a/trunk/arch/x86/kernel/kprobes/core.c b/trunk/arch/x86/kernel/kprobes.c
similarity index 94%
rename from trunk/arch/x86/kernel/kprobes/core.c
rename to trunk/arch/x86/kernel/kprobes.c
index e124554598ee..57916c0d3cf6 100644
--- a/trunk/arch/x86/kernel/kprobes/core.c
+++ b/trunk/arch/x86/kernel/kprobes.c
@@ -58,7 +58,7 @@
 #include <asm/insn.h>
 #include <asm/debugreg.h>
 
-#include "common.h"
+#include "kprobes-common.h"
 
 void jprobe_return_end(void);
 
@@ -78,7 +78,7 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 	 * Groups, and some special opcodes can not boost.
 	 * This is non-const and volatile to keep gcc from statically
 	 * optimizing it out, as variable_test_bit makes gcc think only
-	 * *(unsigned long*) is used.
+	 * *(unsigned long*) is used. 
 	 */
 static volatile u32 twobyte_is_boostable[256 / 32] = {
 	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
@@ -117,7 +117,7 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
 	struct __arch_relative_insn {
 		u8 op;
 		s32 raddr;
-	} __packed *insn;
+	} __attribute__((packed)) *insn;
 
 	insn = (struct __arch_relative_insn *)from;
 	insn->raddr = (s32)((long)(to) - ((long)(from) + 5));
@@ -541,6 +541,23 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb
 	return 1;
 }
 
+#ifdef KPROBES_CAN_USE_FTRACE
+static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+				      struct kprobe_ctlblk *kcb)
+{
+	/*
+	 * Emulate singlestep (and also recover regs->ip)
+	 * as if there is a 5byte nop
+	 */
+	regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
+	if (unlikely(p->post_handler)) {
+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
+		p->post_handler(p, regs, 0);
+	}
+	__this_cpu_write(current_kprobe, NULL);
+}
+#endif
+
 /*
  * Interrupts are disabled on entry as trap3 is an interrupt gate and they
  * remain disabled throughout this function.
@@ -599,8 +616,13 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 	} else if (kprobe_running()) {
 		p = __this_cpu_read(current_kprobe);
 		if (p->break_handler && p->break_handler(p, regs)) {
-			if (!skip_singlestep(p, regs, kcb))
-				setup_singlestep(p, regs, kcb, 0);
+#ifdef KPROBES_CAN_USE_FTRACE
+			if (kprobe_ftrace(p)) {
+				skip_singlestep(p, regs, kcb);
+				return 1;
+			}
+#endif
+			setup_singlestep(p, regs, kcb, 0);
 			return 1;
 		}
 	} /* else: not a kprobe fault; let the kernel handle it */
@@ -1053,6 +1075,50 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	return 0;
 }
 
+#ifdef KPROBES_CAN_USE_FTRACE
+/* Ftrace callback handler for kprobes */
+void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+				     struct ftrace_ops *ops, struct pt_regs *regs)
+{
+	struct kprobe *p;
+	struct kprobe_ctlblk *kcb;
+	unsigned long flags;
+
+	/* Disable irq for emulating a breakpoint and avoiding preempt */
+	local_irq_save(flags);
+
+	p = get_kprobe((kprobe_opcode_t *)ip);
+	if (unlikely(!p) || kprobe_disabled(p))
+		goto end;
+
+	kcb = get_kprobe_ctlblk();
+	if (kprobe_running()) {
+		kprobes_inc_nmissed_count(p);
+	} else {
+		/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
+		regs->ip = ip + sizeof(kprobe_opcode_t);
+
+		__this_cpu_write(current_kprobe, p);
+		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+		if (!p->pre_handler || !p->pre_handler(p, regs))
+			skip_singlestep(p, regs, kcb);
+		/*
+		 * If pre_handler returns !0, it sets regs->ip and
+		 * resets current kprobe.
+		 */
+	}
+end:
+	local_irq_restore(flags);
+}
+
+int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+	p->ainsn.insn = NULL;
+	p->ainsn.boostable = -1;
+	return 0;
+}
+#endif
+
 int __init arch_init_kprobes(void)
 {
 	return arch_init_optprobes();
diff --git a/trunk/arch/x86/kernel/kprobes/Makefile b/trunk/arch/x86/kernel/kprobes/Makefile
deleted file mode 100644
index 0d33169cc1a2..000000000000
--- a/trunk/arch/x86/kernel/kprobes/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# Makefile for kernel probes
-#
-
-obj-$(CONFIG_KPROBES)		+= core.o
-obj-$(CONFIG_OPTPROBES)		+= opt.o
-obj-$(CONFIG_KPROBES_ON_FTRACE)	+= ftrace.o
diff --git a/trunk/arch/x86/kernel/kprobes/ftrace.c b/trunk/arch/x86/kernel/kprobes/ftrace.c
deleted file mode 100644
index 23ef5c556f06..000000000000
--- a/trunk/arch/x86/kernel/kprobes/ftrace.c
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Dynamic Ftrace based Kprobes Optimization
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) Hitachi Ltd., 2012
- */
-#include <linux/kprobes.h>
-#include <linux/ptrace.h>
-#include <linux/hardirq.h>
-#include <linux/preempt.h>
-#include <linux/ftrace.h>
-
-#include "common.h"
-
-static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
-			     struct kprobe_ctlblk *kcb)
-{
-	/*
-	 * Emulate singlestep (and also recover regs->ip)
-	 * as if there is a 5byte nop
-	 */
-	regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
-	if (unlikely(p->post_handler)) {
-		kcb->kprobe_status = KPROBE_HIT_SSDONE;
-		p->post_handler(p, regs, 0);
-	}
-	__this_cpu_write(current_kprobe, NULL);
-	return 1;
-}
-
-int __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
-			      struct kprobe_ctlblk *kcb)
-{
-	if (kprobe_ftrace(p))
-		return __skip_singlestep(p, regs, kcb);
-	else
-		return 0;
-}
-
-/* Ftrace callback handler for kprobes */
-void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
-				     struct ftrace_ops *ops, struct pt_regs *regs)
-{
-	struct kprobe *p;
-	struct kprobe_ctlblk *kcb;
-	unsigned long flags;
-
-	/* Disable irq for emulating a breakpoint and avoiding preempt */
-	local_irq_save(flags);
-
-	p = get_kprobe((kprobe_opcode_t *)ip);
-	if (unlikely(!p) || kprobe_disabled(p))
-		goto end;
-
-	kcb = get_kprobe_ctlblk();
-	if (kprobe_running()) {
-		kprobes_inc_nmissed_count(p);
-	} else {
-		/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
-		regs->ip = ip + sizeof(kprobe_opcode_t);
-
-		__this_cpu_write(current_kprobe, p);
-		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
-		if (!p->pre_handler || !p->pre_handler(p, regs))
-			__skip_singlestep(p, regs, kcb);
-		/*
-		 * If pre_handler returns !0, it sets regs->ip and
-		 * resets current kprobe.
-		 */
-	}
-end:
-	local_irq_restore(flags);
-}
-
-int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
-{
-	p->ainsn.insn = NULL;
-	p->ainsn.boostable = -1;
-	return 0;
-}
diff --git a/trunk/arch/x86/kernel/kvm.c b/trunk/arch/x86/kernel/kvm.c
index 2b44ea5f269d..9c2bd8bd4b4c 100644
--- a/trunk/arch/x86/kernel/kvm.c
+++ b/trunk/arch/x86/kernel/kvm.c
@@ -505,7 +505,6 @@ static bool __init kvm_detect(void)
 const struct hypervisor_x86 x86_hyper_kvm __refconst = {
 	.name			= "KVM",
 	.detect			= kvm_detect,
-	.x2apic_available	= kvm_para_available,
 };
 EXPORT_SYMBOL_GPL(x86_hyper_kvm);
 
diff --git a/trunk/arch/x86/kernel/ptrace.c b/trunk/arch/x86/kernel/ptrace.c
index 29a8120e6fe8..b629bbe0d9bd 100644
--- a/trunk/arch/x86/kernel/ptrace.c
+++ b/trunk/arch/x86/kernel/ptrace.c
@@ -22,7 +22,7 @@
 #include <linux/perf_event.h>
 #include <linux/hw_breakpoint.h>
 #include <linux/rcupdate.h>
-#include <linux/export.h>
+#include <linux/module.h>
 #include <linux/context_tracking.h>
 
 #include <asm/uaccess.h>
diff --git a/trunk/arch/x86/kernel/rtc.c b/trunk/arch/x86/kernel/rtc.c
index 2e8f3d3b5641..801602b5d745 100644
--- a/trunk/arch/x86/kernel/rtc.c
+++ b/trunk/arch/x86/kernel/rtc.c
@@ -149,6 +149,7 @@ unsigned long mach_get_cmos_time(void)
 	if (century) {
 		century = bcd2bin(century);
 		year += century * 100;
+		printk(KERN_INFO "Extended CMOS year: %d\n", century * 100);
 	} else
 		year += CMOS_YEARS_OFFS;
 
diff --git a/trunk/arch/x86/kernel/sys_x86_64.c b/trunk/arch/x86/kernel/sys_x86_64.c
index dbded5aedb81..97ef74b88e0f 100644
--- a/trunk/arch/x86/kernel/sys_x86_64.c
+++ b/trunk/arch/x86/kernel/sys_x86_64.c
@@ -157,7 +157,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 	if (flags & MAP_FIXED)
 		return addr;
 
-	/* for MAP_32BIT mappings we force the legacy mmap base */
+	/* for MAP_32BIT mappings we force the legact mmap base */
 	if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT))
 		goto bottomup;
 
diff --git a/trunk/arch/x86/kernel/tsc.c b/trunk/arch/x86/kernel/tsc.c
index 4b9ea101fe3b..06ccb5073a3f 100644
--- a/trunk/arch/x86/kernel/tsc.c
+++ b/trunk/arch/x86/kernel/tsc.c
@@ -623,8 +623,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
 	ns_now = __cycles_2_ns(tsc_now);
 
 	if (cpu_khz) {
-		*scale = ((NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR) +
-				cpu_khz / 2) / cpu_khz;
+		*scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
 		*offset = ns_now - mult_frac(tsc_now, *scale,
 					     (1UL << CYC2NS_SCALE_FACTOR));
 	}
diff --git a/trunk/arch/x86/kernel/uprobes.c b/trunk/arch/x86/kernel/uprobes.c
index 0ba4cfb4f412..c71025b67462 100644
--- a/trunk/arch/x86/kernel/uprobes.c
+++ b/trunk/arch/x86/kernel/uprobes.c
@@ -680,10 +680,8 @@ static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
 		if (auprobe->insn[i] == 0x66)
 			continue;
 
-		if (auprobe->insn[i] == 0x90) {
-			regs->ip += i + 1;
+		if (auprobe->insn[i] == 0x90)
 			return true;
-		}
 
 		break;
 	}
diff --git a/trunk/arch/x86/kernel/x86_init.c b/trunk/arch/x86/kernel/x86_init.c
index d065d67c2672..7a3d075a814a 100644
--- a/trunk/arch/x86/kernel/x86_init.c
+++ b/trunk/arch/x86/kernel/x86_init.c
@@ -19,7 +19,6 @@
 #include <asm/time.h>
 #include <asm/irq.h>
 #include <asm/io_apic.h>
-#include <asm/hpet.h>
 #include <asm/pat.h>
 #include <asm/tsc.h>
 #include <asm/iommu.h>
@@ -112,22 +111,15 @@ struct x86_platform_ops x86_platform = {
 
 EXPORT_SYMBOL_GPL(x86_platform);
 struct x86_msi_ops x86_msi = {
-	.setup_msi_irqs		= native_setup_msi_irqs,
-	.compose_msi_msg	= native_compose_msi_msg,
-	.teardown_msi_irq	= native_teardown_msi_irq,
-	.teardown_msi_irqs	= default_teardown_msi_irqs,
-	.restore_msi_irqs	= default_restore_msi_irqs,
-	.setup_hpet_msi		= default_setup_hpet_msi,
+	.setup_msi_irqs = native_setup_msi_irqs,
+	.teardown_msi_irq = native_teardown_msi_irq,
+	.teardown_msi_irqs = default_teardown_msi_irqs,
+	.restore_msi_irqs = default_restore_msi_irqs,
 };
 
 struct x86_io_apic_ops x86_io_apic_ops = {
-	.init			= native_io_apic_init_mappings,
-	.read			= native_io_apic_read,
-	.write			= native_io_apic_write,
-	.modify			= native_io_apic_modify,
-	.disable		= native_disable_io_apic,
-	.print_entries		= native_io_apic_print_entries,
-	.set_affinity		= native_ioapic_set_affinity,
-	.setup_entry		= native_setup_ioapic_entry,
-	.eoi_ioapic_pin		= native_eoi_ioapic_pin,
+	.init	= native_io_apic_init_mappings,
+	.read	= native_io_apic_read,
+	.write	= native_io_apic_write,
+	.modify	= native_io_apic_modify,
 };
diff --git a/trunk/arch/x86/mm/fault.c b/trunk/arch/x86/mm/fault.c
index fb674fd3fc22..027088f2f7dd 100644
--- a/trunk/arch/x86/mm/fault.c
+++ b/trunk/arch/x86/mm/fault.c
@@ -748,15 +748,13 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
 				return;
 		}
 #endif
-		/* Kernel addresses are always protection faults: */
-		if (address >= TASK_SIZE)
-			error_code |= PF_PROT;
 
-		if (likely(show_unhandled_signals))
+		if (unlikely(show_unhandled_signals))
 			show_signal_msg(regs, error_code, address, tsk);
 
+		/* Kernel addresses are always protection faults: */
 		tsk->thread.cr2		= address;
-		tsk->thread.error_code	= error_code;
+		tsk->thread.error_code	= error_code | (address >= TASK_SIZE);
 		tsk->thread.trap_nr	= X86_TRAP_PF;
 
 		force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0);
diff --git a/trunk/arch/x86/mm/init_64.c b/trunk/arch/x86/mm/init_64.c
index d6eeead43758..2ead3c8a4c84 100644
--- a/trunk/arch/x86/mm/init_64.c
+++ b/trunk/arch/x86/mm/init_64.c
@@ -605,7 +605,7 @@ kernel_physical_mapping_init(unsigned long start,
 	}
 
 	if (pgd_changed)
-		sync_global_pgds(addr, end - 1);
+		sync_global_pgds(addr, end);
 
 	__flush_tlb_all();
 
@@ -831,9 +831,6 @@ int kern_addr_valid(unsigned long addr)
 	if (pud_none(*pud))
 		return 0;
 
-	if (pud_large(*pud))
-		return pfn_valid(pud_pfn(*pud));
-
 	pmd = pmd_offset(pud, addr);
 	if (pmd_none(*pmd))
 		return 0;
@@ -984,7 +981,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
 		}
 
 	}
-	sync_global_pgds((unsigned long)start_page, end - 1);
+	sync_global_pgds((unsigned long)start_page, end);
 	return 0;
 }
 
diff --git a/trunk/arch/x86/mm/memtest.c b/trunk/arch/x86/mm/memtest.c
index 8dabbed409ee..c80b9fb95734 100644
--- a/trunk/arch/x86/mm/memtest.c
+++ b/trunk/arch/x86/mm/memtest.c
@@ -9,7 +9,6 @@
 #include <linux/memblock.h>
 
 static u64 patterns[] __initdata = {
-	/* The first entry has to be 0 to leave memtest with zeroed memory */
 	0,
 	0xffffffffffffffffULL,
 	0x5555555555555555ULL,
@@ -111,8 +110,15 @@ void __init early_memtest(unsigned long start, unsigned long end)
 		return;
 
 	printk(KERN_INFO "early_memtest: # of tests: %d\n", memtest_pattern);
-	for (i = memtest_pattern-1; i < UINT_MAX; --i) {
+	for (i = 0; i < memtest_pattern; i++) {
 		idx = i % ARRAY_SIZE(patterns);
 		do_one_pass(patterns[idx], start, end);
 	}
+
+	if (idx > 0) {
+		printk(KERN_INFO "early_memtest: wipe out "
+		       "test pattern from memory\n");
+		/* additional test with pattern 0 will do this */
+		do_one_pass(0, start, end);
+	}
 }
diff --git a/trunk/arch/x86/mm/tlb.c b/trunk/arch/x86/mm/tlb.c
index 282375f13c7e..13a6b29e2e5d 100644
--- a/trunk/arch/x86/mm/tlb.c
+++ b/trunk/arch/x86/mm/tlb.c
@@ -335,7 +335,7 @@ static const struct file_operations fops_tlbflush = {
 	.llseek = default_llseek,
 };
 
-static int __init create_tlb_flushall_shift(void)
+static int __cpuinit create_tlb_flushall_shift(void)
 {
 	debugfs_create_file("tlb_flushall_shift", S_IRUSR | S_IWUSR,
 			    arch_debugfs_dir, NULL, &fops_tlbflush);
diff --git a/trunk/arch/x86/platform/efi/efi-bgrt.c b/trunk/arch/x86/platform/efi/efi-bgrt.c
index 7145ec63c520..d9c1b95af17c 100644
--- a/trunk/arch/x86/platform/efi/efi-bgrt.c
+++ b/trunk/arch/x86/platform/efi/efi-bgrt.c
@@ -11,21 +11,20 @@
  * published by the Free Software Foundation.
  */
 #include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/acpi.h>
 #include <linux/efi.h>
 #include <linux/efi-bgrt.h>
 
 struct acpi_table_bgrt *bgrt_tab;
-void *__initdata bgrt_image;
-size_t __initdata bgrt_image_size;
+void *bgrt_image;
+size_t bgrt_image_size;
 
 struct bmp_header {
 	u16 id;
 	u32 size;
 } __packed;
 
-void __init efi_bgrt_init(void)
+void efi_bgrt_init(void)
 {
 	acpi_status status;
 	void __iomem *image;
diff --git a/trunk/arch/x86/platform/efi/efi.c b/trunk/arch/x86/platform/efi/efi.c
index 928bf837040a..77cf0090c0a3 100644
--- a/trunk/arch/x86/platform/efi/efi.c
+++ b/trunk/arch/x86/platform/efi/efi.c
@@ -87,7 +87,7 @@ EXPORT_SYMBOL(efi_enabled);
 
 static int __init setup_noefi(char *arg)
 {
-	clear_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility);
+	clear_bit(EFI_BOOT, &x86_efi_facility);
 	return 0;
 }
 early_param("noefi", setup_noefi);
diff --git a/trunk/arch/x86/platform/sfi/sfi.c b/trunk/arch/x86/platform/sfi/sfi.c
index bcd1a703e3e6..7785b72ecc3a 100644
--- a/trunk/arch/x86/platform/sfi/sfi.c
+++ b/trunk/arch/x86/platform/sfi/sfi.c
@@ -35,7 +35,7 @@
 static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
 
 /* All CPUs enumerated by SFI must be present and enabled */
-static void __init mp_sfi_register_lapic(u8 id)
+static void __cpuinit mp_sfi_register_lapic(u8 id)
 {
 	if (MAX_LOCAL_APIC - id <= 0) {
 		pr_warning("Processor #%d invalid (max %d)\n",
diff --git a/trunk/arch/x86/platform/uv/tlb_uv.c b/trunk/arch/x86/platform/uv/tlb_uv.c
index 0f92173a12b6..dbbdca5f508c 100644
--- a/trunk/arch/x86/platform/uv/tlb_uv.c
+++ b/trunk/arch/x86/platform/uv/tlb_uv.c
@@ -1467,7 +1467,7 @@ static ssize_t ptc_proc_write(struct file *file, const char __user *user,
 	}
 
 	if (input_arg == 0) {
-		elements = ARRAY_SIZE(stat_description);
+		elements = sizeof(stat_description)/sizeof(*stat_description);
 		printk(KERN_DEBUG "# cpu:      cpu number\n");
 		printk(KERN_DEBUG "Sender statistics:\n");
 		for (i = 0; i < elements; i++)
@@ -1508,7 +1508,7 @@ static int parse_tunables_write(struct bau_control *bcp, char *instr,
 	char *q;
 	int cnt = 0;
 	int val;
-	int e = ARRAY_SIZE(tunables);
+	int e = sizeof(tunables) / sizeof(*tunables);
 
 	p = instr + strspn(instr, WHITESPACE);
 	q = p;
diff --git a/trunk/arch/x86/um/fault.c b/trunk/arch/x86/um/fault.c
index 84ac7f7b0257..8784ab30d91b 100644
--- a/trunk/arch/x86/um/fault.c
+++ b/trunk/arch/x86/um/fault.c
@@ -20,7 +20,7 @@ int arch_fixup(unsigned long address, struct uml_pt_regs *regs)
 	const struct exception_table_entry *fixup;
 
 	fixup = search_exception_tables(address);
-	if (fixup) {
+	if (fixup != 0) {
 		UPT_IP(regs) = fixup->fixup;
 		return 1;
 	}
diff --git a/trunk/arch/x86/vdso/vclock_gettime.c b/trunk/arch/x86/vdso/vclock_gettime.c
index c74436e687bf..205ad328aa52 100644
--- a/trunk/arch/x86/vdso/vclock_gettime.c
+++ b/trunk/arch/x86/vdso/vclock_gettime.c
@@ -60,7 +60,7 @@ notrace static cycle_t vread_tsc(void)
 
 static notrace cycle_t vread_hpet(void)
 {
-	return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + HPET_COUNTER);
+	return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0);
 }
 
 #ifdef CONFIG_PARAVIRT_CLOCK
diff --git a/trunk/arch/x86/xen/enlighten.c b/trunk/arch/x86/xen/enlighten.c
index 39928d16be3b..138e5667409a 100644
--- a/trunk/arch/x86/xen/enlighten.c
+++ b/trunk/arch/x86/xen/enlighten.c
@@ -1517,51 +1517,72 @@ asmlinkage void __init xen_start_kernel(void)
 #endif
 }
 
-void __ref xen_hvm_init_shared_info(void)
+#ifdef CONFIG_XEN_PVHVM
+#define HVM_SHARED_INFO_ADDR 0xFE700000UL
+static struct shared_info *xen_hvm_shared_info;
+static unsigned long xen_hvm_sip_phys;
+static int xen_major, xen_minor;
+
+static void xen_hvm_connect_shared_info(unsigned long pfn)
 {
-	int cpu;
 	struct xen_add_to_physmap xatp;
-	static struct shared_info *shared_info_page = 0;
 
-	if (!shared_info_page)
-		shared_info_page = (struct shared_info *)
-			extend_brk(PAGE_SIZE, PAGE_SIZE);
 	xatp.domid = DOMID_SELF;
 	xatp.idx = 0;
 	xatp.space = XENMAPSPACE_shared_info;
-	xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
+	xatp.gpfn = pfn;
 	if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
 		BUG();
 
-	HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
+}
+static void __init xen_hvm_set_shared_info(struct shared_info *sip)
+{
+	int cpu;
+
+	HYPERVISOR_shared_info = sip;
 
 	/* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
 	 * page, we use it in the event channel upcall and in some pvclock
 	 * related functions. We don't need the vcpu_info placement
 	 * optimizations because we don't use any pv_mmu or pv_irq op on
-	 * HVM.
-	 * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is
-	 * online but xen_hvm_init_shared_info is run at resume time too and
-	 * in that case multiple vcpus might be online. */
-	for_each_online_cpu(cpu) {
+	 * HVM. */
+	for_each_online_cpu(cpu)
 		per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+}
+
+/* Reconnect the shared_info pfn to a (new) mfn */
+void xen_hvm_resume_shared_info(void)
+{
+	xen_hvm_connect_shared_info(xen_hvm_sip_phys >> PAGE_SHIFT);
+}
+
+/* Xen tools prior to Xen 4 do not provide a E820_Reserved area for guest usage.
+ * On these old tools the shared info page will be placed in E820_Ram.
+ * Xen 4 provides a E820_Reserved area at 0xFC000000, and this code expects
+ * that nothing is mapped up to HVM_SHARED_INFO_ADDR.
+ * Xen 4.3+ provides an explicit 1MB area at HVM_SHARED_INFO_ADDR which is used
+ * here for the shared info page. */
+static void __init xen_hvm_init_shared_info(void)
+{
+	if (xen_major < 4) {
+		xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE);
+		xen_hvm_sip_phys = __pa(xen_hvm_shared_info);
+	} else {
+		xen_hvm_sip_phys = HVM_SHARED_INFO_ADDR;
+		set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_hvm_sip_phys);
+		xen_hvm_shared_info =
+		(struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
 	}
+	xen_hvm_connect_shared_info(xen_hvm_sip_phys >> PAGE_SHIFT);
+	xen_hvm_set_shared_info(xen_hvm_shared_info);
 }
 
-#ifdef CONFIG_XEN_PVHVM
 static void __init init_hvm_pv_info(void)
 {
-	int major, minor;
-	uint32_t eax, ebx, ecx, edx, pages, msr, base;
+	uint32_t ecx, edx, pages, msr, base;
 	u64 pfn;
 
 	base = xen_cpuid_base();
-	cpuid(base + 1, &eax, &ebx, &ecx, &edx);
-
-	major = eax >> 16;
-	minor = eax & 0xffff;
-	printk(KERN_INFO "Xen version %d.%d.\n", major, minor);
-
 	cpuid(base + 2, &pages, &msr, &ecx, &edx);
 
 	pfn = __pa(hypercall_page);
@@ -1612,12 +1633,22 @@ static void __init xen_hvm_guest_init(void)
 
 static bool __init xen_hvm_platform(void)
 {
+	uint32_t eax, ebx, ecx, edx, base;
+
 	if (xen_pv_domain())
 		return false;
 
-	if (!xen_cpuid_base())
+	base = xen_cpuid_base();
+	if (!base)
 		return false;
 
+	cpuid(base + 1, &eax, &ebx, &ecx, &edx);
+
+	xen_major = eax >> 16;
+	xen_minor = eax & 0xffff;
+
+	printk(KERN_INFO "Xen version %d.%d.\n", xen_major, xen_minor);
+
 	return true;
 }
 
@@ -1637,7 +1668,6 @@ const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = {
 	.name			= "Xen HVM",
 	.detect			= xen_hvm_platform,
 	.init_platform		= xen_hvm_guest_init,
-	.x2apic_available	= xen_x2apic_para_available,
 };
 EXPORT_SYMBOL(x86_hyper_xen_hvm);
 #endif
diff --git a/trunk/arch/x86/xen/suspend.c b/trunk/arch/x86/xen/suspend.c
index 45329c8c226e..ae8a00c39de4 100644
--- a/trunk/arch/x86/xen/suspend.c
+++ b/trunk/arch/x86/xen/suspend.c
@@ -30,7 +30,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled)
 {
 #ifdef CONFIG_XEN_PVHVM
 	int cpu;
-	xen_hvm_init_shared_info();
+	xen_hvm_resume_shared_info();
 	xen_callback_vector();
 	xen_unplug_emulated_devices();
 	if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
diff --git a/trunk/arch/x86/xen/xen-asm_32.S b/trunk/arch/x86/xen/xen-asm_32.S
index 33ca6e42a4ca..f9643fc50de5 100644
--- a/trunk/arch/x86/xen/xen-asm_32.S
+++ b/trunk/arch/x86/xen/xen-asm_32.S
@@ -89,11 +89,11 @@ ENTRY(xen_iret)
 	 */
 #ifdef CONFIG_SMP
 	GET_THREAD_INFO(%eax)
-	movl %ss:TI_cpu(%eax), %eax
-	movl %ss:__per_cpu_offset(,%eax,4), %eax
-	mov %ss:xen_vcpu(%eax), %eax
+	movl TI_cpu(%eax), %eax
+	movl __per_cpu_offset(,%eax,4), %eax
+	mov xen_vcpu(%eax), %eax
 #else
-	movl %ss:xen_vcpu, %eax
+	movl xen_vcpu, %eax
 #endif
 
 	/* check IF state we're restoring */
@@ -106,11 +106,11 @@ ENTRY(xen_iret)
 	 * resuming the code, so we don't have to be worried about
 	 * being preempted to another CPU.
 	 */
-	setz %ss:XEN_vcpu_info_mask(%eax)
+	setz XEN_vcpu_info_mask(%eax)
 xen_iret_start_crit:
 
 	/* check for unmasked and pending */
-	cmpw $0x0001, %ss:XEN_vcpu_info_pending(%eax)
+	cmpw $0x0001, XEN_vcpu_info_pending(%eax)
 
 	/*
 	 * If there's something pending, mask events again so we can
@@ -118,7 +118,7 @@ xen_iret_start_crit:
 	 * touch XEN_vcpu_info_mask.
 	 */
 	jne 1f
-	movb $1, %ss:XEN_vcpu_info_mask(%eax)
+	movb $1, XEN_vcpu_info_mask(%eax)
 
 1:	popl %eax
 
diff --git a/trunk/arch/x86/xen/xen-ops.h b/trunk/arch/x86/xen/xen-ops.h
index a95b41744ad0..d2e73d19d366 100644
--- a/trunk/arch/x86/xen/xen-ops.h
+++ b/trunk/arch/x86/xen/xen-ops.h
@@ -40,7 +40,7 @@ void xen_enable_syscall(void);
 void xen_vcpu_restore(void);
 
 void xen_callback_vector(void);
-void xen_hvm_init_shared_info(void);
+void xen_hvm_resume_shared_info(void);
 void xen_unplug_emulated_devices(void);
 
 void __init xen_build_dynamic_phys_to_machine(void);
diff --git a/trunk/block/blk-exec.c b/trunk/block/blk-exec.c
index c88202f973d9..74638ec234c8 100644
--- a/trunk/block/blk-exec.c
+++ b/trunk/block/blk-exec.c
@@ -5,7 +5,6 @@
 #include <linux/module.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
-#include <linux/sched/sysctl.h>
 
 #include "blk.h"
 
diff --git a/trunk/drivers/acpi/apei/cper.c b/trunk/drivers/acpi/apei/cper.c
index 1e5d8a40101e..e6defd86b424 100644
--- a/trunk/drivers/acpi/apei/cper.c
+++ b/trunk/drivers/acpi/apei/cper.c
@@ -29,7 +29,6 @@
 #include <linux/time.h>
 #include <linux/cper.h>
 #include <linux/acpi.h>
-#include <linux/pci.h>
 #include <linux/aer.h>
 
 /*
@@ -250,10 +249,6 @@ static const char *cper_pcie_port_type_strs[] = {
 static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
 			    const struct acpi_hest_generic_data *gdata)
 {
-#ifdef CONFIG_ACPI_APEI_PCIEAER
-	struct pci_dev *dev;
-#endif
-
 	if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
 		printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
 		       pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
@@ -286,18 +281,10 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
 	"%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
 	pfx, pcie->bridge.secondary_status, pcie->bridge.control);
 #ifdef CONFIG_ACPI_APEI_PCIEAER
-	dev = pci_get_domain_bus_and_slot(pcie->device_id.segment,
-			pcie->device_id.bus, pcie->device_id.function);
-	if (!dev) {
-		pr_err("PCI AER Cannot get PCI device %04x:%02x:%02x.%d\n",
-			pcie->device_id.segment, pcie->device_id.bus,
-			pcie->device_id.slot, pcie->device_id.function);
-		return;
+	if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) {
+		struct aer_capability_regs *aer_regs = (void *)pcie->aer_info;
+		cper_print_aer(pfx, gdata->error_severity, aer_regs);
 	}
-	if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO)
-		cper_print_aer(pfx, dev, gdata->error_severity,
-				(struct aer_capability_regs *) pcie->aer_info);
-	pci_dev_put(dev);
 #endif
 }
 
diff --git a/trunk/drivers/ata/ahci.c b/trunk/drivers/ata/ahci.c
index 495aeed26779..497912732566 100644
--- a/trunk/drivers/ata/ahci.c
+++ b/trunk/drivers/ata/ahci.c
@@ -1061,86 +1061,6 @@ static inline void ahci_gtf_filter_workaround(struct ata_host *host)
 {}
 #endif
 
-int ahci_init_interrupts(struct pci_dev *pdev, struct ahci_host_priv *hpriv)
-{
-	int rc;
-	unsigned int maxvec;
-
-	if (!(hpriv->flags & AHCI_HFLAG_NO_MSI)) {
-		rc = pci_enable_msi_block_auto(pdev, &maxvec);
-		if (rc > 0) {
-			if ((rc == maxvec) || (rc == 1))
-				return rc;
-			/*
-			 * Assume that advantage of multipe MSIs is negated,
-			 * so fallback to single MSI mode to save resources
-			 */
-			pci_disable_msi(pdev);
-			if (!pci_enable_msi(pdev))
-				return 1;
-		}
-	}
-
-	pci_intx(pdev, 1);
-	return 0;
-}
-
-/**
- *	ahci_host_activate - start AHCI host, request IRQs and register it
- *	@host: target ATA host
- *	@irq: base IRQ number to request
- *	@n_msis: number of MSIs allocated for this host
- *	@irq_handler: irq_handler used when requesting IRQs
- *	@irq_flags: irq_flags used when requesting IRQs
- *
- *	Similar to ata_host_activate, but requests IRQs according to AHCI-1.1
- *	when multiple MSIs were allocated. That is one MSI per port, starting
- *	from @irq.
- *
- *	LOCKING:
- *	Inherited from calling layer (may sleep).
- *
- *	RETURNS:
- *	0 on success, -errno otherwise.
- */
-int ahci_host_activate(struct ata_host *host, int irq, unsigned int n_msis)
-{
-	int i, rc;
-
-	/* Sharing Last Message among several ports is not supported */
-	if (n_msis < host->n_ports)
-		return -EINVAL;
-
-	rc = ata_host_start(host);
-	if (rc)
-		return rc;
-
-	for (i = 0; i < host->n_ports; i++) {
-		rc = devm_request_threaded_irq(host->dev,
-			irq + i, ahci_hw_interrupt, ahci_thread_fn, IRQF_SHARED,
-			dev_driver_string(host->dev), host->ports[i]);
-		if (rc)
-			goto out_free_irqs;
-	}
-
-	for (i = 0; i < host->n_ports; i++)
-		ata_port_desc(host->ports[i], "irq %d", irq + i);
-
-	rc = ata_host_register(host, &ahci_sht);
-	if (rc)
-		goto out_free_all_irqs;
-
-	return 0;
-
-out_free_all_irqs:
-	i = host->n_ports;
-out_free_irqs:
-	for (i--; i >= 0; i--)
-		devm_free_irq(host->dev, irq + i, host->ports[i]);
-
-	return rc;
-}
-
 static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	unsigned int board_id = ent->driver_data;
@@ -1149,7 +1069,7 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct device *dev = &pdev->dev;
 	struct ahci_host_priv *hpriv;
 	struct ata_host *host;
-	int n_ports, n_msis, i, rc;
+	int n_ports, i, rc;
 	int ahci_pci_bar = AHCI_PCI_BAR_STANDARD;
 
 	VPRINTK("ENTER\n");
@@ -1236,11 +1156,10 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (ahci_sb600_enable_64bit(pdev))
 		hpriv->flags &= ~AHCI_HFLAG_32BIT_ONLY;
 
-	hpriv->mmio = pcim_iomap_table(pdev)[ahci_pci_bar];
+	if ((hpriv->flags & AHCI_HFLAG_NO_MSI) || pci_enable_msi(pdev))
+		pci_intx(pdev, 1);
 
-	n_msis = ahci_init_interrupts(pdev, hpriv);
-	if (n_msis > 1)
-		hpriv->flags |= AHCI_HFLAG_MULTI_MSI;
+	hpriv->mmio = pcim_iomap_table(pdev)[ahci_pci_bar];
 
 	/* save initial config */
 	ahci_pci_save_initial_config(pdev, hpriv);
@@ -1337,10 +1256,6 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	ahci_pci_print_info(host);
 
 	pci_set_master(pdev);
-
-	if (hpriv->flags & AHCI_HFLAG_MULTI_MSI)
-		return ahci_host_activate(host, pdev->irq, n_msis);
-
 	return ata_host_activate(host, pdev->irq, ahci_interrupt, IRQF_SHARED,
 				 &ahci_sht);
 }
diff --git a/trunk/drivers/ata/ahci.h b/trunk/drivers/ata/ahci.h
index b830e6c9fe49..9be471200a07 100644
--- a/trunk/drivers/ata/ahci.h
+++ b/trunk/drivers/ata/ahci.h
@@ -231,7 +231,6 @@ enum {
 	AHCI_HFLAG_DELAY_ENGINE		= (1 << 15), /* do not start engine on
 						        port start (wait until
 						        error-handling stage) */
-	AHCI_HFLAG_MULTI_MSI		= (1 << 16), /* multiple PCI MSIs */
 
 	/* ap->flags bits */
 
@@ -298,8 +297,6 @@ struct ahci_port_priv {
 	unsigned int		ncq_saw_d2h:1;
 	unsigned int		ncq_saw_dmas:1;
 	unsigned int		ncq_saw_sdb:1;
-	u32			intr_status;	/* interrupts to handle */
-	spinlock_t		lock;		/* protects parent ata_port */
 	u32 			intr_mask;	/* interrupts to enable */
 	bool			fbs_supported;	/* set iff FBS is supported */
 	bool			fbs_enabled;	/* set iff FBS is enabled */
@@ -362,10 +359,7 @@ void ahci_set_em_messages(struct ahci_host_priv *hpriv,
 			  struct ata_port_info *pi);
 int ahci_reset_em(struct ata_host *host);
 irqreturn_t ahci_interrupt(int irq, void *dev_instance);
-irqreturn_t ahci_hw_interrupt(int irq, void *dev_instance);
-irqreturn_t ahci_thread_fn(int irq, void *dev_instance);
 void ahci_print_info(struct ata_host *host, const char *scc_s);
-int ahci_host_activate(struct ata_host *host, int irq, unsigned int n_msis);
 
 static inline void __iomem *__ahci_port_base(struct ata_host *host,
 					     unsigned int port_no)
diff --git a/trunk/drivers/ata/libahci.c b/trunk/drivers/ata/libahci.c
index 34c82167b962..6cd7805e47ca 100644
--- a/trunk/drivers/ata/libahci.c
+++ b/trunk/drivers/ata/libahci.c
@@ -1655,16 +1655,19 @@ static void ahci_error_intr(struct ata_port *ap, u32 irq_stat)
 		ata_port_abort(ap);
 }
 
-static void ahci_handle_port_interrupt(struct ata_port *ap,
-				       void __iomem *port_mmio, u32 status)
+static void ahci_port_intr(struct ata_port *ap)
 {
+	void __iomem *port_mmio = ahci_port_base(ap);
 	struct ata_eh_info *ehi = &ap->link.eh_info;
 	struct ahci_port_priv *pp = ap->private_data;
 	struct ahci_host_priv *hpriv = ap->host->private_data;
 	int resetting = !!(ap->pflags & ATA_PFLAG_RESETTING);
-	u32 qc_active = 0;
+	u32 status, qc_active = 0;
 	int rc;
 
+	status = readl(port_mmio + PORT_IRQ_STAT);
+	writel(status, port_mmio + PORT_IRQ_STAT);
+
 	/* ignore BAD_PMP while resetting */
 	if (unlikely(resetting))
 		status &= ~PORT_IRQ_BAD_PMP;
@@ -1740,107 +1743,6 @@ static void ahci_handle_port_interrupt(struct ata_port *ap,
 	}
 }
 
-void ahci_port_intr(struct ata_port *ap)
-{
-	void __iomem *port_mmio = ahci_port_base(ap);
-	u32 status;
-
-	status = readl(port_mmio + PORT_IRQ_STAT);
-	writel(status, port_mmio + PORT_IRQ_STAT);
-
-	ahci_handle_port_interrupt(ap, port_mmio, status);
-}
-
-irqreturn_t ahci_thread_fn(int irq, void *dev_instance)
-{
-	struct ata_port *ap = dev_instance;
-	struct ahci_port_priv *pp = ap->private_data;
-	void __iomem *port_mmio = ahci_port_base(ap);
-	unsigned long flags;
-	u32 status;
-
-	spin_lock_irqsave(&ap->host->lock, flags);
-	status = pp->intr_status;
-	if (status)
-		pp->intr_status = 0;
-	spin_unlock_irqrestore(&ap->host->lock, flags);
-
-	spin_lock_bh(ap->lock);
-	ahci_handle_port_interrupt(ap, port_mmio, status);
-	spin_unlock_bh(ap->lock);
-
-	return IRQ_HANDLED;
-}
-EXPORT_SYMBOL_GPL(ahci_thread_fn);
-
-void ahci_hw_port_interrupt(struct ata_port *ap)
-{
-	void __iomem *port_mmio = ahci_port_base(ap);
-	struct ahci_port_priv *pp = ap->private_data;
-	u32 status;
-
-	status = readl(port_mmio + PORT_IRQ_STAT);
-	writel(status, port_mmio + PORT_IRQ_STAT);
-
-	pp->intr_status |= status;
-}
-
-irqreturn_t ahci_hw_interrupt(int irq, void *dev_instance)
-{
-	struct ata_port *ap_this = dev_instance;
-	struct ahci_port_priv *pp = ap_this->private_data;
-	struct ata_host *host = ap_this->host;
-	struct ahci_host_priv *hpriv = host->private_data;
-	void __iomem *mmio = hpriv->mmio;
-	unsigned int i;
-	u32 irq_stat, irq_masked;
-
-	VPRINTK("ENTER\n");
-
-	spin_lock(&host->lock);
-
-	irq_stat = readl(mmio + HOST_IRQ_STAT);
-
-	if (!irq_stat) {
-		u32 status = pp->intr_status;
-
-		spin_unlock(&host->lock);
-
-		VPRINTK("EXIT\n");
-
-		return status ? IRQ_WAKE_THREAD : IRQ_NONE;
-	}
-
-	irq_masked = irq_stat & hpriv->port_map;
-
-	for (i = 0; i < host->n_ports; i++) {
-		struct ata_port *ap;
-
-		if (!(irq_masked & (1 << i)))
-			continue;
-
-		ap = host->ports[i];
-		if (ap) {
-			ahci_hw_port_interrupt(ap);
-			VPRINTK("port %u\n", i);
-		} else {
-			VPRINTK("port %u (no irq)\n", i);
-			if (ata_ratelimit())
-				dev_warn(host->dev,
-					 "interrupt on disabled port %u\n", i);
-		}
-	}
-
-	writel(irq_stat, mmio + HOST_IRQ_STAT);
-
-	spin_unlock(&host->lock);
-
-	VPRINTK("EXIT\n");
-
-	return IRQ_WAKE_THREAD;
-}
-EXPORT_SYMBOL_GPL(ahci_hw_interrupt);
-
 irqreturn_t ahci_interrupt(int irq, void *dev_instance)
 {
 	struct ata_host *host = dev_instance;
@@ -2294,14 +2196,6 @@ static int ahci_port_start(struct ata_port *ap)
 	 */
 	pp->intr_mask = DEF_PORT_IRQ;
 
-	/*
-	 * Switch to per-port locking in case each port has its own MSI vector.
-	 */
-	if ((hpriv->flags & AHCI_HFLAG_MULTI_MSI)) {
-		spin_lock_init(&pp->lock);
-		ap->lock = &pp->lock;
-	}
-
 	ap->private_data = pp;
 
 	/* engage engines, captain */
diff --git a/trunk/drivers/block/sunvdc.c b/trunk/drivers/block/sunvdc.c
index 5814deb6963d..564156a8e572 100644
--- a/trunk/drivers/block/sunvdc.c
+++ b/trunk/drivers/block/sunvdc.c
@@ -461,7 +461,7 @@ static int generic_request(struct vdc_port *port, u8 op, void *buf, int len)
 	int op_len, err;
 	void *req_buf;
 
-	if (!(((u64)1 << (u64)op) & port->operations))
+	if (!(((u64)1 << ((u64)op - 1)) & port->operations))
 		return -EOPNOTSUPP;
 
 	switch (op) {
diff --git a/trunk/drivers/gpu/drm/nouveau/core/core/falcon.c b/trunk/drivers/gpu/drm/nouveau/core/core/falcon.c
index e05c15777588..6b0843c33877 100644
--- a/trunk/drivers/gpu/drm/nouveau/core/core/falcon.c
+++ b/trunk/drivers/gpu/drm/nouveau/core/core/falcon.c
@@ -73,11 +73,8 @@ _nouveau_falcon_init(struct nouveau_object *object)
 	nv_debug(falcon, "data limit: %d\n", falcon->data.limit);
 
 	/* wait for 'uc halted' to be signalled before continuing */
-	if (falcon->secret && falcon->version < 4) {
-		if (!falcon->version)
-			nv_wait(falcon, 0x008, 0x00000010, 0x00000010);
-		else
-			nv_wait(falcon, 0x180, 0x80000000, 0);
+	if (falcon->secret) {
+		nv_wait(falcon, 0x008, 0x00000010, 0x00000010);
 		nv_wo32(falcon, 0x004, 0x00000010);
 	}
 
diff --git a/trunk/drivers/gpu/drm/nouveau/core/core/subdev.c b/trunk/drivers/gpu/drm/nouveau/core/core/subdev.c
index 48f06378d3f9..f74c30aa33a0 100644
--- a/trunk/drivers/gpu/drm/nouveau/core/core/subdev.c
+++ b/trunk/drivers/gpu/drm/nouveau/core/core/subdev.c
@@ -99,7 +99,7 @@ nouveau_subdev_create_(struct nouveau_object *parent,
 	if (ret)
 		return ret;
 
-	__mutex_init(&subdev->mutex, subname, &oclass->lock_class_key);
+	mutex_init(&subdev->mutex);
 	subdev->name = subname;
 
 	if (parent) {
diff --git a/trunk/drivers/gpu/drm/nouveau/core/include/core/object.h b/trunk/drivers/gpu/drm/nouveau/core/include/core/object.h
index 106bb19fdd9a..5982935ee23a 100644
--- a/trunk/drivers/gpu/drm/nouveau/core/include/core/object.h
+++ b/trunk/drivers/gpu/drm/nouveau/core/include/core/object.h
@@ -50,13 +50,10 @@ int  nouveau_object_fini(struct nouveau_object *, bool suspend);
 
 extern struct nouveau_ofuncs nouveau_object_ofuncs;
 
-/* Don't allocate dynamically, because lockdep needs lock_class_keys to be in
- * ".data". */
 struct nouveau_oclass {
 	u32 handle;
-	struct nouveau_ofuncs * const ofuncs;
-	struct nouveau_omthds * const omthds;
-	struct lock_class_key lock_class_key;
+	struct nouveau_ofuncs *ofuncs;
+	struct nouveau_omthds *omthds;
 };
 
 #define nv_oclass(o)    nv_object(o)->oclass
diff --git a/trunk/drivers/gpu/drm/nouveau/core/subdev/fb/base.c b/trunk/drivers/gpu/drm/nouveau/core/subdev/fb/base.c
index d62045f454b2..d6d16007ec1a 100644
--- a/trunk/drivers/gpu/drm/nouveau/core/subdev/fb/base.c
+++ b/trunk/drivers/gpu/drm/nouveau/core/subdev/fb/base.c
@@ -86,8 +86,8 @@ nouveau_fb_preinit(struct nouveau_fb *pfb)
 			return ret;
 	}
 
-	if (!nouveau_mm_initialised(&pfb->tags)) {
-		ret = nouveau_mm_init(&pfb->tags, 0, tags ? ++tags : 0, 1);
+	if (!nouveau_mm_initialised(&pfb->tags) && tags) {
+		ret = nouveau_mm_init(&pfb->tags, 0, ++tags, 1);
 		if (ret)
 			return ret;
 	}
diff --git a/trunk/drivers/gpu/drm/nouveau/core/subdev/fb/nv50.c b/trunk/drivers/gpu/drm/nouveau/core/subdev/fb/nv50.c
index eac236ed19b2..487cb8c6c204 100644
--- a/trunk/drivers/gpu/drm/nouveau/core/subdev/fb/nv50.c
+++ b/trunk/drivers/gpu/drm/nouveau/core/subdev/fb/nv50.c
@@ -99,7 +99,7 @@ nv50_fb_vram_init(struct nouveau_fb *pfb)
 	struct nouveau_bios *bios = nouveau_bios(device);
 	const u32 rsvd_head = ( 256 * 1024) >> 12; /* vga memory */
 	const u32 rsvd_tail = (1024 * 1024) >> 12; /* vbios etc */
-	u32 size, tags = 0;
+	u32 size;
 	int ret;
 
 	pfb->ram.size = nv_rd32(pfb, 0x10020c);
@@ -140,11 +140,10 @@ nv50_fb_vram_init(struct nouveau_fb *pfb)
 			return ret;
 
 		pfb->ram.ranks = (nv_rd32(pfb, 0x100200) & 0x4) ? 2 : 1;
-		tags = nv_rd32(pfb, 0x100320);
 		break;
 	}
 
-	return tags;
+	return nv_rd32(pfb, 0x100320);
 }
 
 static int
diff --git a/trunk/drivers/gpu/drm/nouveau/nouveau_bo.c b/trunk/drivers/gpu/drm/nouveau/nouveau_bo.c
index 1699a9083a2f..69d7b1d0b9d6 100644
--- a/trunk/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/trunk/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -28,7 +28,6 @@
  */
 
 #include <core/engine.h>
-#include <linux/swiotlb.h>
 
 #include <subdev/fb.h>
 #include <subdev/vm.h>
diff --git a/trunk/drivers/gpu/drm/nouveau/nouveau_drm.c b/trunk/drivers/gpu/drm/nouveau/nouveau_drm.c
index 5e7aef23825a..8b090f1eb51d 100644
--- a/trunk/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/trunk/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -245,8 +245,6 @@ static int nouveau_drm_probe(struct pci_dev *pdev,
 	return 0;
 }
 
-static struct lock_class_key drm_client_lock_class_key;
-
 static int
 nouveau_drm_load(struct drm_device *dev, unsigned long flags)
 {
@@ -258,7 +256,6 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags)
 	ret = nouveau_cli_create(pdev, "DRM", sizeof(*drm), (void**)&drm);
 	if (ret)
 		return ret;
-	lockdep_set_class(&drm->client.mutex, &drm_client_lock_class_key);
 
 	dev->dev_private = drm;
 	drm->dev = dev;
diff --git a/trunk/drivers/gpu/drm/radeon/evergreen_cs.c b/trunk/drivers/gpu/drm/radeon/evergreen_cs.c
index ee4cff534f10..7a445666e71f 100644
--- a/trunk/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/trunk/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -2909,14 +2909,14 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 				return -EINVAL;
 			}
 			if (tiled) {
-				dst_offset = radeon_get_ib_value(p, idx+1);
+				dst_offset = ib[idx+1];
 				dst_offset <<= 8;
 
 				ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
 				p->idx += count + 7;
 			} else {
-				dst_offset = radeon_get_ib_value(p, idx+1);
-				dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
+				dst_offset = ib[idx+1];
+				dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32;
 
 				ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
 				ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
@@ -2954,12 +2954,12 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 							DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
 							return -EINVAL;
 						}
-						dst_offset = radeon_get_ib_value(p, idx+1);
+						dst_offset = ib[idx+1];
 						dst_offset <<= 8;
-						dst2_offset = radeon_get_ib_value(p, idx+2);
+						dst2_offset = ib[idx+2];
 						dst2_offset <<= 8;
-						src_offset = radeon_get_ib_value(p, idx+8);
-						src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
+						src_offset = ib[idx+8];
+						src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
 						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
 							dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n",
 								 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
@@ -3014,12 +3014,12 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 							DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
 							return -EINVAL;
 						}
-						dst_offset = radeon_get_ib_value(p, idx+1);
+						dst_offset = ib[idx+1];
 						dst_offset <<= 8;
-						dst2_offset = radeon_get_ib_value(p, idx+2);
+						dst2_offset = ib[idx+2];
 						dst2_offset <<= 8;
-						src_offset = radeon_get_ib_value(p, idx+8);
-						src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
+						src_offset = ib[idx+8];
+						src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
 						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
 							dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
 								 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
@@ -3046,22 +3046,22 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 						/* detile bit */
 						if (idx_value & (1 << 31)) {
 							/* tiled src, linear dst */
-							src_offset = radeon_get_ib_value(p, idx+1);
+							src_offset = ib[idx+1];
 							src_offset <<= 8;
 							ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
 
-							dst_offset = radeon_get_ib_value(p, idx+7);
-							dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
+							dst_offset = ib[idx+7];
+							dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
 							ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
 							ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
 						} else {
 							/* linear src, tiled dst */
-							src_offset = radeon_get_ib_value(p, idx+7);
-							src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
+							src_offset = ib[idx+7];
+							src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
 							ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
 							ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
 
-							dst_offset = radeon_get_ib_value(p, idx+1);
+							dst_offset = ib[idx+1];
 							dst_offset <<= 8;
 							ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
 						}
@@ -3098,12 +3098,12 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 							DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
 							return -EINVAL;
 						}
-						dst_offset = radeon_get_ib_value(p, idx+1);
+						dst_offset = ib[idx+1];
 						dst_offset <<= 8;
-						dst2_offset = radeon_get_ib_value(p, idx+2);
+						dst2_offset = ib[idx+2];
 						dst2_offset <<= 8;
-						src_offset = radeon_get_ib_value(p, idx+8);
-						src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
+						src_offset = ib[idx+8];
+						src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
 						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
 							dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
 								 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
@@ -3135,22 +3135,22 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 						/* detile bit */
 						if (idx_value & (1 << 31)) {
 							/* tiled src, linear dst */
-							src_offset = radeon_get_ib_value(p, idx+1);
+							src_offset = ib[idx+1];
 							src_offset <<= 8;
 							ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
 
-							dst_offset = radeon_get_ib_value(p, idx+7);
-							dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
+							dst_offset = ib[idx+7];
+							dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
 							ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
 							ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
 						} else {
 							/* linear src, tiled dst */
-							src_offset = radeon_get_ib_value(p, idx+7);
-							src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
+							src_offset = ib[idx+7];
+							src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
 							ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
 							ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
 
-							dst_offset = radeon_get_ib_value(p, idx+1);
+							dst_offset = ib[idx+1];
 							dst_offset <<= 8;
 							ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
 						}
@@ -3176,10 +3176,10 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 					switch (misc) {
 					case 0:
 						/* L2L, byte */
-						src_offset = radeon_get_ib_value(p, idx+2);
-						src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
-						dst_offset = radeon_get_ib_value(p, idx+1);
-						dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
+						src_offset = ib[idx+2];
+						src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+						dst_offset = ib[idx+1];
+						dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
 						if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
 							dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n",
 								 src_offset + count, radeon_bo_size(src_reloc->robj));
@@ -3216,12 +3216,12 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 							DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
 							return -EINVAL;
 						}
-						dst_offset = radeon_get_ib_value(p, idx+1);
-						dst_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
-						dst2_offset = radeon_get_ib_value(p, idx+2);
-						dst2_offset |= ((u64)(radeon_get_ib_value(p, idx+5) & 0xff)) << 32;
-						src_offset = radeon_get_ib_value(p, idx+3);
-						src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
+						dst_offset = ib[idx+1];
+						dst_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+						dst2_offset = ib[idx+2];
+						dst2_offset |= ((u64)(ib[idx+5] & 0xff)) << 32;
+						src_offset = ib[idx+3];
+						src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
 						if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
 							dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n",
 								 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
@@ -3251,10 +3251,10 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 					}
 				} else {
 					/* L2L, dw */
-					src_offset = radeon_get_ib_value(p, idx+2);
-					src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
-					dst_offset = radeon_get_ib_value(p, idx+1);
-					dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
+					src_offset = ib[idx+2];
+					src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+					dst_offset = ib[idx+1];
+					dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
 					if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
 						dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n",
 							 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
@@ -3279,8 +3279,8 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 				DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n");
 				return -EINVAL;
 			}
-			dst_offset = radeon_get_ib_value(p, idx+1);
-			dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16;
+			dst_offset = ib[idx+1];
+			dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16;
 			if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
 				dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
 					 dst_offset, radeon_bo_size(dst_reloc->robj));
diff --git a/trunk/drivers/gpu/drm/radeon/r600_cs.c b/trunk/drivers/gpu/drm/radeon/r600_cs.c
index 9b2512bf1a46..69ec24ab8d63 100644
--- a/trunk/drivers/gpu/drm/radeon/r600_cs.c
+++ b/trunk/drivers/gpu/drm/radeon/r600_cs.c
@@ -2623,14 +2623,14 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p)
 				return -EINVAL;
 			}
 			if (tiled) {
-				dst_offset = radeon_get_ib_value(p, idx+1);
+				dst_offset = ib[idx+1];
 				dst_offset <<= 8;
 
 				ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
 				p->idx += count + 5;
 			} else {
-				dst_offset = radeon_get_ib_value(p, idx+1);
-				dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
+				dst_offset = ib[idx+1];
+				dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32;
 
 				ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
 				ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
@@ -2658,32 +2658,32 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p)
 				/* detile bit */
 				if (idx_value & (1 << 31)) {
 					/* tiled src, linear dst */
-					src_offset = radeon_get_ib_value(p, idx+1);
+					src_offset = ib[idx+1];
 					src_offset <<= 8;
 					ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
 
-					dst_offset = radeon_get_ib_value(p, idx+5);
-					dst_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
+					dst_offset = ib[idx+5];
+					dst_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
 					ib[idx+5] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
 					ib[idx+6] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
 				} else {
 					/* linear src, tiled dst */
-					src_offset = radeon_get_ib_value(p, idx+5);
-					src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
+					src_offset = ib[idx+5];
+					src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
 					ib[idx+5] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
 					ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
 
-					dst_offset = radeon_get_ib_value(p, idx+1);
+					dst_offset = ib[idx+1];
 					dst_offset <<= 8;
 					ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
 				}
 				p->idx += 7;
 			} else {
 				if (p->family >= CHIP_RV770) {
-					src_offset = radeon_get_ib_value(p, idx+2);
-					src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
-					dst_offset = radeon_get_ib_value(p, idx+1);
-					dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
+					src_offset = ib[idx+2];
+					src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+					dst_offset = ib[idx+1];
+					dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
 
 					ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
 					ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
@@ -2691,10 +2691,10 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p)
 					ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
 					p->idx += 5;
 				} else {
-					src_offset = radeon_get_ib_value(p, idx+2);
-					src_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
-					dst_offset = radeon_get_ib_value(p, idx+1);
-					dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff0000)) << 16;
+					src_offset = ib[idx+2];
+					src_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
+					dst_offset = ib[idx+1];
+					dst_offset |= ((u64)(ib[idx+3] & 0xff0000)) << 16;
 
 					ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
 					ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
@@ -2724,8 +2724,8 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p)
 				DRM_ERROR("bad DMA_PACKET_WRITE\n");
 				return -EINVAL;
 			}
-			dst_offset = radeon_get_ib_value(p, idx+1);
-			dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16;
+			dst_offset = ib[idx+1];
+			dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16;
 			if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
 				dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
 					 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
diff --git a/trunk/drivers/gpu/drm/radeon/radeon_ttm.c b/trunk/drivers/gpu/drm/radeon/radeon_ttm.c
index 93f760e27a92..1d8ff2f850ba 100644
--- a/trunk/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/trunk/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -38,7 +38,6 @@
 #include <drm/radeon_drm.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
-#include <linux/swiotlb.h>
 #include "radeon_reg.h"
 #include "radeon.h"
 
diff --git a/trunk/drivers/input/input.c b/trunk/drivers/input/input.c
index c04469928925..ce01332f7b3a 100644
--- a/trunk/drivers/input/input.c
+++ b/trunk/drivers/input/input.c
@@ -1785,13 +1785,12 @@ static void devm_input_device_release(struct device *dev, void *res)
  * its driver (or binding fails). Once managed input device is allocated,
  * it is ready to be set up and registered in the same fashion as regular
  * input device. There are no special devm_input_device_[un]register()
- * variants, regular ones work with both managed and unmanaged devices,
- * should you need them. In most cases however, managed input device need
- * not be explicitly unregistered or freed.
+ * variants, regular ones work with both managed and unmanaged devices.
  *
  * NOTE: the owner device is set up as parent of input device and users
  * should not override it.
  */
+
 struct input_dev *devm_input_allocate_device(struct device *dev)
 {
 	struct input_dev *input;
@@ -2005,17 +2004,6 @@ static void devm_input_device_unregister(struct device *dev, void *res)
  * Once device has been successfully registered it can be unregistered
  * with input_unregister_device(); input_free_device() should not be
  * called in this case.
- *
- * Note that this function is also used to register managed input devices
- * (ones allocated with devm_input_allocate_device()). Such managed input
- * devices need not be explicitly unregistered or freed, their tear down
- * is controlled by the devres infrastructure. It is also worth noting
- * that tear down of managed input devices is internally a 2-step process:
- * registered managed input device is first unregistered, but stays in
- * memory and can still handle input_event() calls (although events will
- * not be delivered anywhere). The freeing of managed input device will
- * happen later, when devres stack is unwound to the point where device
- * allocation was made.
  */
 int input_register_device(struct input_dev *dev)
 {
diff --git a/trunk/drivers/input/joystick/analog.c b/trunk/drivers/input/joystick/analog.c
index 7cd74e29cbc8..358cd7ee905b 100644
--- a/trunk/drivers/input/joystick/analog.c
+++ b/trunk/drivers/input/joystick/analog.c
@@ -162,7 +162,7 @@ static unsigned int get_time_pit(void)
 #define GET_TIME(x)	do { x = get_cycles(); } while (0)
 #define DELTA(x,y)	((y)-(x))
 #define TIME_NAME	"PCC"
-#elif defined(CONFIG_MN10300) || defined(CONFIG_TILE)
+#elif defined(CONFIG_MN10300)
 #define GET_TIME(x)	do { x = get_cycles(); } while (0)
 #define DELTA(x, y)	((x) - (y))
 #define TIME_NAME	"TSC"
diff --git a/trunk/drivers/input/keyboard/lm8323.c b/trunk/drivers/input/keyboard/lm8323.c
index 0de23f41b2d3..93c812662134 100644
--- a/trunk/drivers/input/keyboard/lm8323.c
+++ b/trunk/drivers/input/keyboard/lm8323.c
@@ -398,7 +398,7 @@ static irqreturn_t lm8323_irq(int irq, void *_lm)
 			lm8323_configure(lm);
 		}
 		for (i = 0; i < LM8323_NUM_PWMS; i++) {
-			if (ints & (INT_PWM1 << i)) {
+			if (ints & (1 << (INT_PWM1 + i))) {
 				dev_vdbg(&lm->client->dev,
 					 "pwm%d engine completed\n", i);
 				pwm_done(&lm->pwm[i]);
diff --git a/trunk/drivers/input/tablet/wacom_sys.c b/trunk/drivers/input/tablet/wacom_sys.c
index aaf23aeae2ea..f92d34f45a1c 100644
--- a/trunk/drivers/input/tablet/wacom_sys.c
+++ b/trunk/drivers/input/tablet/wacom_sys.c
@@ -553,10 +553,10 @@ static int wacom_set_device_mode(struct usb_interface *intf, int report_id, int
 	if (!rep_data)
 		return error;
 
-	do {
-		rep_data[0] = report_id;
-		rep_data[1] = mode;
+	rep_data[0] = report_id;
+	rep_data[1] = mode;
 
+	do {
 		error = wacom_set_report(intf, WAC_HID_FEATURE_REPORT,
 		                         report_id, rep_data, length, 1);
 		if (error >= 0)
diff --git a/trunk/drivers/iommu/amd_iommu.c b/trunk/drivers/iommu/amd_iommu.c
index d33eaaf783ad..c1c74e030a58 100644
--- a/trunk/drivers/iommu/amd_iommu.c
+++ b/trunk/drivers/iommu/amd_iommu.c
@@ -4017,10 +4017,10 @@ static int alloc_irq_index(struct irq_cfg *cfg, u16 devid, int count)
 
 			index -= count - 1;
 
-			cfg->remapped	      = 1;
 			irte_info             = &cfg->irq_2_iommu;
 			irte_info->sub_handle = devid;
 			irte_info->irte_index = index;
+			irte_info->iommu      = (void *)cfg;
 
 			goto out;
 		}
@@ -4127,9 +4127,9 @@ static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
 	index = attr->ioapic_pin;
 
 	/* Setup IRQ remapping info */
-	cfg->remapped	      = 1;
 	irte_info->sub_handle = devid;
 	irte_info->irte_index = index;
+	irte_info->iommu      = (void *)cfg;
 
 	/* Setup IRTE for IOMMU */
 	irte.val		= 0;
@@ -4288,9 +4288,9 @@ static int msi_setup_irq(struct pci_dev *pdev, unsigned int irq,
 	devid		= get_device_id(&pdev->dev);
 	irte_info	= &cfg->irq_2_iommu;
 
-	cfg->remapped	      = 1;
 	irte_info->sub_handle = devid;
 	irte_info->irte_index = index + offset;
+	irte_info->iommu      = (void *)cfg;
 
 	return 0;
 }
@@ -4314,9 +4314,9 @@ static int setup_hpet_msi(unsigned int irq, unsigned int id)
 	if (index < 0)
 		return index;
 
-	cfg->remapped	      = 1;
 	irte_info->sub_handle = devid;
 	irte_info->irte_index = index;
+	irte_info->iommu      = (void *)cfg;
 
 	return 0;
 }
diff --git a/trunk/drivers/iommu/dmar.c b/trunk/drivers/iommu/dmar.c
index 174bb654453d..86e2f4a62b9a 100644
--- a/trunk/drivers/iommu/dmar.c
+++ b/trunk/drivers/iommu/dmar.c
@@ -41,8 +41,6 @@
 #include <asm/irq_remapping.h>
 #include <asm/iommu_table.h>
 
-#include "irq_remapping.h"
-
 /* No locks are needed as DMA remapping hardware unit
  * list is constructed at boot time and hotplug of
  * these units are not supported by the architecture.
diff --git a/trunk/drivers/iommu/intel-iommu.c b/trunk/drivers/iommu/intel-iommu.c
index 43d5c8b8e7ad..eca28014ef3e 100644
--- a/trunk/drivers/iommu/intel-iommu.c
+++ b/trunk/drivers/iommu/intel-iommu.c
@@ -46,8 +46,6 @@
 #include <asm/cacheflush.h>
 #include <asm/iommu.h>
 
-#include "irq_remapping.h"
-
 #define ROOT_SIZE		VTD_PAGE_SIZE
 #define CONTEXT_SIZE		VTD_PAGE_SIZE
 
diff --git a/trunk/drivers/iommu/intel_irq_remapping.c b/trunk/drivers/iommu/intel_irq_remapping.c
index f3b8f23b5d8f..af8904de1d44 100644
--- a/trunk/drivers/iommu/intel_irq_remapping.c
+++ b/trunk/drivers/iommu/intel_irq_remapping.c
@@ -68,7 +68,6 @@ static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 {
 	struct ir_table *table = iommu->ir_table;
 	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
-	struct irq_cfg *cfg = irq_get_chip_data(irq);
 	u16 index, start_index;
 	unsigned int mask = 0;
 	unsigned long flags;
@@ -116,7 +115,6 @@ static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 	for (i = index; i < index + count; i++)
 		table->base[i].present = 1;
 
-	cfg->remapped = 1;
 	irq_iommu->iommu = iommu;
 	irq_iommu->irte_index =  index;
 	irq_iommu->sub_handle = 0;
@@ -157,7 +155,6 @@ static int map_irq_to_irte_handle(int irq, u16 *sub_handle)
 static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 {
 	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
-	struct irq_cfg *cfg = irq_get_chip_data(irq);
 	unsigned long flags;
 
 	if (!irq_iommu)
@@ -165,7 +162,6 @@ static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subha
 
 	raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
 
-	cfg->remapped = 1;
 	irq_iommu->iommu = iommu;
 	irq_iommu->irte_index = index;
 	irq_iommu->sub_handle = subhandle;
@@ -429,22 +425,11 @@ static void iommu_set_irq_remapping(struct intel_iommu *iommu, int mode)
 
 	/* Enable interrupt-remapping */
 	iommu->gcmd |= DMA_GCMD_IRE;
-	iommu->gcmd &= ~DMA_GCMD_CFI;  /* Block compatibility-format MSIs */
 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
 
 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
 		      readl, (sts & DMA_GSTS_IRES), sts);
 
-	/*
-	 * With CFI clear in the Global Command register, we should be
-	 * protected from dangerous (i.e. compatibility) interrupts
-	 * regardless of x2apic status.  Check just to be sure.
-	 */
-	if (sts & DMA_GSTS_CFIS)
-		WARN(1, KERN_WARNING
-			"Compatibility-format IRQs enabled despite intr remapping;\n"
-			"you are vulnerable to IRQ injection.\n");
-
 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
 }
 
@@ -541,24 +526,20 @@ static int __init intel_irq_remapping_supported(void)
 static int __init intel_enable_irq_remapping(void)
 {
 	struct dmar_drhd_unit *drhd;
-	bool x2apic_present;
 	int setup = 0;
 	int eim = 0;
 
-	x2apic_present = x2apic_supported();
-
 	if (parse_ioapics_under_ir() != 1) {
 		printk(KERN_INFO "Not enable interrupt remapping\n");
-		goto error;
+		return -1;
 	}
 
-	if (x2apic_present) {
+	if (x2apic_supported()) {
 		eim = !dmar_x2apic_optout();
-		if (!eim)
-			printk(KERN_WARNING
-				"Your BIOS is broken and requested that x2apic be disabled.\n"
-				"This will slightly decrease performance.\n"
-				"Use 'intremap=no_x2apic_optout' to override BIOS request.\n");
+		WARN(!eim, KERN_WARNING
+			   "Your BIOS is broken and requested that x2apic be disabled\n"
+			   "This will leave your machine vulnerable to irq-injection attacks\n"
+			   "Use 'intremap=no_x2apic_optout' to override BIOS request\n");
 	}
 
 	for_each_drhd_unit(drhd) {
@@ -597,7 +578,7 @@ static int __init intel_enable_irq_remapping(void)
 		if (eim && !ecap_eim_support(iommu->ecap)) {
 			printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, "
 			       " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap);
-			goto error;
+			return -1;
 		}
 	}
 
@@ -613,7 +594,7 @@ static int __init intel_enable_irq_remapping(void)
 			printk(KERN_ERR "DRHD %Lx: failed to enable queued, "
 			       " invalidation, ecap %Lx, ret %d\n",
 			       drhd->reg_base_addr, iommu->ecap, ret);
-			goto error;
+			return -1;
 		}
 	}
 
@@ -636,14 +617,6 @@ static int __init intel_enable_irq_remapping(void)
 		goto error;
 
 	irq_remapping_enabled = 1;
-
-	/*
-	 * VT-d has a different layout for IO-APIC entries when
-	 * interrupt remapping is enabled. So it needs a special routine
-	 * to print IO-APIC entries for debugging purposes too.
-	 */
-	x86_io_apic_ops.print_entries = intel_ir_io_apic_print_entries;
-
 	pr_info("Enabled IRQ remapping in %s mode\n", eim ? "x2apic" : "xapic");
 
 	return eim ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE;
@@ -652,11 +625,6 @@ static int __init intel_enable_irq_remapping(void)
 	/*
 	 * handle error condition gracefully here!
 	 */
-
-	if (x2apic_present)
-		WARN(1, KERN_WARNING
-			"Failed to enable irq remapping.  You are vulnerable to irq-injection attacks.\n");
-
 	return -1;
 }
 
diff --git a/trunk/drivers/iommu/irq_remapping.c b/trunk/drivers/iommu/irq_remapping.c
index d56f8c17c5fe..faf85d6e33fe 100644
--- a/trunk/drivers/iommu/irq_remapping.c
+++ b/trunk/drivers/iommu/irq_remapping.c
@@ -1,18 +1,11 @@
-#include <linux/seq_file.h>
-#include <linux/cpumask.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/cpumask.h>
 #include <linux/errno.h>
 #include <linux/msi.h>
-#include <linux/irq.h>
-#include <linux/pci.h>
 
 #include <asm/hw_irq.h>
 #include <asm/irq_remapping.h>
-#include <asm/processor.h>
-#include <asm/x86_init.h>
-#include <asm/apic.h>
 
 #include "irq_remapping.h"
 
@@ -24,152 +17,6 @@ int no_x2apic_optout;
 
 static struct irq_remap_ops *remap_ops;
 
-static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
-static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
-				  int index, int sub_handle);
-static int set_remapped_irq_affinity(struct irq_data *data,
-				     const struct cpumask *mask,
-				     bool force);
-
-static bool irq_remapped(struct irq_cfg *cfg)
-{
-	return (cfg->remapped == 1);
-}
-
-static void irq_remapping_disable_io_apic(void)
-{
-	/*
-	 * With interrupt-remapping, for now we will use virtual wire A
-	 * mode, as virtual wire B is little complex (need to configure
-	 * both IOAPIC RTE as well as interrupt-remapping table entry).
-	 * As this gets called during crash dump, keep this simple for
-	 * now.
-	 */
-	if (cpu_has_apic || apic_from_smp_config())
-		disconnect_bsp_APIC(0);
-}
-
-static int do_setup_msi_irqs(struct pci_dev *dev, int nvec)
-{
-	int node, ret, sub_handle, index = 0;
-	unsigned int irq;
-	struct msi_desc *msidesc;
-
-	nvec = __roundup_pow_of_two(nvec);
-
-	WARN_ON(!list_is_singular(&dev->msi_list));
-	msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
-	WARN_ON(msidesc->irq);
-	WARN_ON(msidesc->msi_attrib.multiple);
-
-	node = dev_to_node(&dev->dev);
-	irq = __create_irqs(get_nr_irqs_gsi(), nvec, node);
-	if (irq == 0)
-		return -ENOSPC;
-
-	msidesc->msi_attrib.multiple = ilog2(nvec);
-	for (sub_handle = 0; sub_handle < nvec; sub_handle++) {
-		if (!sub_handle) {
-			index = msi_alloc_remapped_irq(dev, irq, nvec);
-			if (index < 0) {
-				ret = index;
-				goto error;
-			}
-		} else {
-			ret = msi_setup_remapped_irq(dev, irq + sub_handle,
-						     index, sub_handle);
-			if (ret < 0)
-				goto error;
-		}
-		ret = setup_msi_irq(dev, msidesc, irq, sub_handle);
-		if (ret < 0)
-			goto error;
-	}
-	return 0;
-
-error:
-	destroy_irqs(irq, nvec);
-
-	/*
-	 * Restore altered MSI descriptor fields and prevent just destroyed
-	 * IRQs from tearing down again in default_teardown_msi_irqs()
-	 */
-	msidesc->irq = 0;
-	msidesc->msi_attrib.multiple = 0;
-
-	return ret;
-}
-
-static int do_setup_msix_irqs(struct pci_dev *dev, int nvec)
-{
-	int node, ret, sub_handle, index = 0;
-	struct msi_desc *msidesc;
-	unsigned int irq;
-
-	node		= dev_to_node(&dev->dev);
-	irq		= get_nr_irqs_gsi();
-	sub_handle	= 0;
-
-	list_for_each_entry(msidesc, &dev->msi_list, list) {
-
-		irq = create_irq_nr(irq, node);
-		if (irq == 0)
-			return -1;
-
-		if (sub_handle == 0)
-			ret = index = msi_alloc_remapped_irq(dev, irq, nvec);
-		else
-			ret = msi_setup_remapped_irq(dev, irq, index, sub_handle);
-
-		if (ret < 0)
-			goto error;
-
-		ret = setup_msi_irq(dev, msidesc, irq, 0);
-		if (ret < 0)
-			goto error;
-
-		sub_handle += 1;
-		irq        += 1;
-	}
-
-	return 0;
-
-error:
-	destroy_irq(irq);
-	return ret;
-}
-
-static int irq_remapping_setup_msi_irqs(struct pci_dev *dev,
-					int nvec, int type)
-{
-	if (type == PCI_CAP_ID_MSI)
-		return do_setup_msi_irqs(dev, nvec);
-	else
-		return do_setup_msix_irqs(dev, nvec);
-}
-
-void eoi_ioapic_pin_remapped(int apic, int pin, int vector)
-{
-	/*
-	 * Intr-remapping uses pin number as the virtual vector
-	 * in the RTE. Actual vector is programmed in
-	 * intr-remapping table entry. Hence for the io-apic
-	 * EOI we use the pin number.
-	 */
-	io_apic_eoi(apic, pin);
-}
-
-static void __init irq_remapping_modify_x86_ops(void)
-{
-	x86_io_apic_ops.disable		= irq_remapping_disable_io_apic;
-	x86_io_apic_ops.set_affinity	= set_remapped_irq_affinity;
-	x86_io_apic_ops.setup_entry	= setup_ioapic_remapped_entry;
-	x86_io_apic_ops.eoi_ioapic_pin	= eoi_ioapic_pin_remapped;
-	x86_msi.setup_msi_irqs		= irq_remapping_setup_msi_irqs;
-	x86_msi.setup_hpet_msi		= setup_hpet_msi_remapped;
-	x86_msi.compose_msi_msg		= compose_remapped_msi_msg;
-}
-
 static __init int setup_nointremap(char *str)
 {
 	disable_irq_remap = 1;
@@ -232,24 +79,15 @@ int __init irq_remapping_prepare(void)
 
 int __init irq_remapping_enable(void)
 {
-	int ret;
-
 	if (!remap_ops || !remap_ops->enable)
 		return -ENODEV;
 
-	ret = remap_ops->enable();
-
-	if (irq_remapping_enabled)
-		irq_remapping_modify_x86_ops();
-
-	return ret;
+	return remap_ops->enable();
 }
 
 void irq_remapping_disable(void)
 {
-	if (!irq_remapping_enabled ||
-	    !remap_ops ||
-	    !remap_ops->disable)
+	if (!remap_ops || !remap_ops->disable)
 		return;
 
 	remap_ops->disable();
@@ -257,9 +95,7 @@ void irq_remapping_disable(void)
 
 int irq_remapping_reenable(int mode)
 {
-	if (!irq_remapping_enabled ||
-	    !remap_ops ||
-	    !remap_ops->reenable)
+	if (!remap_ops || !remap_ops->reenable)
 		return 0;
 
 	return remap_ops->reenable(mode);
@@ -267,9 +103,6 @@ int irq_remapping_reenable(int mode)
 
 int __init irq_remap_enable_fault_handling(void)
 {
-	if (!irq_remapping_enabled)
-		return 0;
-
 	if (!remap_ops || !remap_ops->enable_faulting)
 		return -ENODEV;
 
@@ -300,28 +133,23 @@ int set_remapped_irq_affinity(struct irq_data *data, const struct cpumask *mask,
 
 void free_remapped_irq(int irq)
 {
-	struct irq_cfg *cfg = irq_get_chip_data(irq);
-
 	if (!remap_ops || !remap_ops->free_irq)
 		return;
 
-	if (irq_remapped(cfg))
-		remap_ops->free_irq(irq);
+	remap_ops->free_irq(irq);
 }
 
 void compose_remapped_msi_msg(struct pci_dev *pdev,
 			      unsigned int irq, unsigned int dest,
 			      struct msi_msg *msg, u8 hpet_id)
 {
-	struct irq_cfg *cfg = irq_get_chip_data(irq);
+	if (!remap_ops || !remap_ops->compose_msi_msg)
+		return;
 
-	if (!irq_remapped(cfg))
-		native_compose_msi_msg(pdev, irq, dest, msg, hpet_id);
-	else if (remap_ops && remap_ops->compose_msi_msg)
-		remap_ops->compose_msi_msg(pdev, irq, dest, msg, hpet_id);
+	remap_ops->compose_msi_msg(pdev, irq, dest, msg, hpet_id);
 }
 
-static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec)
+int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec)
 {
 	if (!remap_ops || !remap_ops->msi_alloc_irq)
 		return -ENODEV;
@@ -329,8 +157,8 @@ static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec)
 	return remap_ops->msi_alloc_irq(pdev, irq, nvec);
 }
 
-static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
-				  int index, int sub_handle)
+int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
+			   int index, int sub_handle)
 {
 	if (!remap_ops || !remap_ops->msi_setup_irq)
 		return -ENODEV;
@@ -345,42 +173,3 @@ int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
 
 	return remap_ops->setup_hpet_msi(irq, id);
 }
-
-void panic_if_irq_remap(const char *msg)
-{
-	if (irq_remapping_enabled)
-		panic(msg);
-}
-
-static void ir_ack_apic_edge(struct irq_data *data)
-{
-	ack_APIC_irq();
-}
-
-static void ir_ack_apic_level(struct irq_data *data)
-{
-	ack_APIC_irq();
-	eoi_ioapic_irq(data->irq, data->chip_data);
-}
-
-static void ir_print_prefix(struct irq_data *data, struct seq_file *p)
-{
-	seq_printf(p, " IR-%s", data->chip->name);
-}
-
-void irq_remap_modify_chip_defaults(struct irq_chip *chip)
-{
-	chip->irq_print_chip = ir_print_prefix;
-	chip->irq_ack = ir_ack_apic_edge;
-	chip->irq_eoi = ir_ack_apic_level;
-	chip->irq_set_affinity = x86_io_apic_ops.set_affinity;
-}
-
-bool setup_remapped_irq(int irq, struct irq_cfg *cfg, struct irq_chip *chip)
-{
-	if (!irq_remapped(cfg))
-		return false;
-	irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
-	irq_remap_modify_chip_defaults(chip);
-	return true;
-}
diff --git a/trunk/drivers/iommu/irq_remapping.h b/trunk/drivers/iommu/irq_remapping.h
index ecb637670405..95363acb583f 100644
--- a/trunk/drivers/iommu/irq_remapping.h
+++ b/trunk/drivers/iommu/irq_remapping.h
@@ -34,7 +34,6 @@ struct msi_msg;
 extern int disable_irq_remap;
 extern int disable_sourceid_checking;
 extern int no_x2apic_optout;
-extern int irq_remapping_enabled;
 
 struct irq_remap_ops {
 	/* Check whether Interrupt Remapping is supported */
diff --git a/trunk/drivers/isdn/mISDN/stack.c b/trunk/drivers/isdn/mISDN/stack.c
index deda591f70b9..5f21f629b7ae 100644
--- a/trunk/drivers/isdn/mISDN/stack.c
+++ b/trunk/drivers/isdn/mISDN/stack.c
@@ -18,7 +18,6 @@
 #include <linux/slab.h>
 #include <linux/mISDNif.h>
 #include <linux/kthread.h>
-#include <linux/sched.h>
 #include "core.h"
 
 static u_int	*debug;
@@ -203,9 +202,6 @@ static int
 mISDNStackd(void *data)
 {
 	struct mISDNstack *st = data;
-#ifdef MISDN_MSG_STATS
-	cputime_t utime, stime;
-#endif
 	int err = 0;
 
 	sigfillset(&current->blocked);
@@ -307,10 +303,9 @@ mISDNStackd(void *data)
 	       "msg %d sleep %d stopped\n",
 	       dev_name(&st->dev->dev), st->msg_cnt, st->sleep_cnt,
 	       st->stopped_cnt);
-	task_cputime(st->thread, &utime, &stime);
 	printk(KERN_DEBUG
 	       "mISDNStackd daemon for %s utime(%ld) stime(%ld)\n",
-	       dev_name(&st->dev->dev), utime, stime);
+	       dev_name(&st->dev->dev), st->thread->utime, st->thread->stime);
 	printk(KERN_DEBUG
 	       "mISDNStackd daemon for %s nvcsw(%ld) nivcsw(%ld)\n",
 	       dev_name(&st->dev->dev), st->thread->nvcsw, st->thread->nivcsw);
diff --git a/trunk/drivers/media/dvb-core/dvb_frontend.c b/trunk/drivers/media/dvb-core/dvb_frontend.c
index 0223ad255cb4..49d95040096a 100644
--- a/trunk/drivers/media/dvb-core/dvb_frontend.c
+++ b/trunk/drivers/media/dvb-core/dvb_frontend.c
@@ -1820,7 +1820,7 @@ static int dvb_frontend_ioctl(struct file *file,
 	struct dvb_frontend *fe = dvbdev->priv;
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
 	struct dvb_frontend_private *fepriv = fe->frontend_priv;
-	int err = -EOPNOTSUPP;
+	int err = -ENOTTY;
 
 	dev_dbg(fe->dvb->device, "%s: (%d)\n", __func__, _IOC_NR(cmd));
 	if (fepriv->exit != DVB_FE_NO_EXIT)
@@ -1938,7 +1938,7 @@ static int dvb_frontend_ioctl_properties(struct file *file,
 		}
 
 	} else
-		err = -EOPNOTSUPP;
+		err = -ENOTTY;
 
 out:
 	kfree(tvp);
@@ -2071,7 +2071,7 @@ static int dvb_frontend_ioctl_legacy(struct file *file,
 	struct dvb_frontend *fe = dvbdev->priv;
 	struct dvb_frontend_private *fepriv = fe->frontend_priv;
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
-	int err = -EOPNOTSUPP;
+	int err = -ENOTTY;
 
 	switch (cmd) {
 	case FE_GET_INFO: {
diff --git a/trunk/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/trunk/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 0035c01660b6..56d3f697e0c7 100644
--- a/trunk/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/trunk/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -21,7 +21,7 @@
 
 #include "atl1c.h"
 
-#define ATL1C_DRV_VERSION "1.0.1.1-NAPI"
+#define ATL1C_DRV_VERSION "1.0.1.0-NAPI"
 char atl1c_driver_name[] = "atl1c";
 char atl1c_driver_version[] = ATL1C_DRV_VERSION;
 
@@ -1652,7 +1652,6 @@ static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter)
 	u16 num_alloc = 0;
 	u16 rfd_next_to_use, next_next;
 	struct atl1c_rx_free_desc *rfd_desc;
-	dma_addr_t mapping;
 
 	next_next = rfd_next_to_use = rfd_ring->next_to_use;
 	if (++next_next == rfd_ring->count)
@@ -1679,18 +1678,9 @@ static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter)
 		ATL1C_SET_BUFFER_STATE(buffer_info, ATL1C_BUFFER_BUSY);
 		buffer_info->skb = skb;
 		buffer_info->length = adapter->rx_buffer_len;
-		mapping = pci_map_single(pdev, vir_addr,
+		buffer_info->dma = pci_map_single(pdev, vir_addr,
 						buffer_info->length,
 						PCI_DMA_FROMDEVICE);
-		if (unlikely(pci_dma_mapping_error(pdev, mapping))) {
-			dev_kfree_skb(skb);
-			buffer_info->skb = NULL;
-			buffer_info->length = 0;
-			ATL1C_SET_BUFFER_STATE(buffer_info, ATL1C_BUFFER_FREE);
-			netif_warn(adapter, rx_err, adapter->netdev, "RX pci_map_single failed");
-			break;
-		}
-		buffer_info->dma = mapping;
 		ATL1C_SET_PCIMAP_TYPE(buffer_info, ATL1C_PCIMAP_SINGLE,
 			ATL1C_PCIMAP_FROMDEVICE);
 		rfd_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
@@ -2025,29 +2015,7 @@ static int atl1c_tso_csum(struct atl1c_adapter *adapter,
 	return 0;
 }
 
-static void atl1c_tx_rollback(struct atl1c_adapter *adpt,
-			      struct atl1c_tpd_desc *first_tpd,
-			      enum atl1c_trans_queue type)
-{
-	struct atl1c_tpd_ring *tpd_ring = &adpt->tpd_ring[type];
-	struct atl1c_buffer *buffer_info;
-	struct atl1c_tpd_desc *tpd;
-	u16 first_index, index;
-
-	first_index = first_tpd - (struct atl1c_tpd_desc *)tpd_ring->desc;
-	index = first_index;
-	while (index != tpd_ring->next_to_use) {
-		tpd = ATL1C_TPD_DESC(tpd_ring, index);
-		buffer_info = &tpd_ring->buffer_info[index];
-		atl1c_clean_buffer(adpt->pdev, buffer_info, 0);
-		memset(tpd, 0, sizeof(struct atl1c_tpd_desc));
-		if (++index == tpd_ring->count)
-			index = 0;
-	}
-	tpd_ring->next_to_use = first_index;
-}
-
-static int atl1c_tx_map(struct atl1c_adapter *adapter,
+static void atl1c_tx_map(struct atl1c_adapter *adapter,
 		      struct sk_buff *skb, struct atl1c_tpd_desc *tpd,
 			enum atl1c_trans_queue type)
 {
@@ -2072,10 +2040,7 @@ static int atl1c_tx_map(struct atl1c_adapter *adapter,
 		buffer_info->length = map_len;
 		buffer_info->dma = pci_map_single(adapter->pdev,
 					skb->data, hdr_len, PCI_DMA_TODEVICE);
-		if (unlikely(pci_dma_mapping_error(adapter->pdev,
-						   buffer_info->dma)))
-			goto err_dma;
-
+		ATL1C_SET_BUFFER_STATE(buffer_info, ATL1C_BUFFER_BUSY);
 		ATL1C_SET_PCIMAP_TYPE(buffer_info, ATL1C_PCIMAP_SINGLE,
 			ATL1C_PCIMAP_TODEVICE);
 		mapped_len += map_len;
@@ -2097,10 +2062,6 @@ static int atl1c_tx_map(struct atl1c_adapter *adapter,
 		buffer_info->dma =
 			pci_map_single(adapter->pdev, skb->data + mapped_len,
 					buffer_info->length, PCI_DMA_TODEVICE);
-		if (unlikely(pci_dma_mapping_error(adapter->pdev,
-						   buffer_info->dma)))
-			goto err_dma;
-
 		ATL1C_SET_BUFFER_STATE(buffer_info, ATL1C_BUFFER_BUSY);
 		ATL1C_SET_PCIMAP_TYPE(buffer_info, ATL1C_PCIMAP_SINGLE,
 			ATL1C_PCIMAP_TODEVICE);
@@ -2122,9 +2083,6 @@ static int atl1c_tx_map(struct atl1c_adapter *adapter,
 						    frag, 0,
 						    buffer_info->length,
 						    DMA_TO_DEVICE);
-		if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma))
-			goto err_dma;
-
 		ATL1C_SET_BUFFER_STATE(buffer_info, ATL1C_BUFFER_BUSY);
 		ATL1C_SET_PCIMAP_TYPE(buffer_info, ATL1C_PCIMAP_PAGE,
 			ATL1C_PCIMAP_TODEVICE);
@@ -2137,13 +2095,6 @@ static int atl1c_tx_map(struct atl1c_adapter *adapter,
 	/* The last buffer info contain the skb address,
 	   so it will be free after unmap */
 	buffer_info->skb = skb;
-
-	return 0;
-
-err_dma:
-	buffer_info->dma = 0;
-	buffer_info->length = 0;
-	return -1;
 }
 
 static void atl1c_tx_queue(struct atl1c_adapter *adapter, struct sk_buff *skb,
@@ -2206,18 +2157,10 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb,
 	if (skb_network_offset(skb) != ETH_HLEN)
 		tpd->word1 |= 1 << TPD_ETH_TYPE_SHIFT; /* Ethernet frame */
 
-	if (atl1c_tx_map(adapter, skb, tpd, type) < 0) {
-		netif_info(adapter, tx_done, adapter->netdev,
-			   "tx-skb droppted due to dma error\n");
-		/* roll back tpd/buffer */
-		atl1c_tx_rollback(adapter, tpd, type);
-		spin_unlock_irqrestore(&adapter->tx_lock, flags);
-		dev_kfree_skb(skb);
-	} else {
-		atl1c_tx_queue(adapter, skb, tpd, type);
-		spin_unlock_irqrestore(&adapter->tx_lock, flags);
-	}
+	atl1c_tx_map(adapter, skb, tpd, type);
+	atl1c_tx_queue(adapter, skb, tpd, type);
 
+	spin_unlock_irqrestore(&adapter->tx_lock, flags);
 	return NETDEV_TX_OK;
 }
 
diff --git a/trunk/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/trunk/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index a5edac8df67b..f771ddfba646 100644
--- a/trunk/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/trunk/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -504,11 +504,13 @@ static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp,
 		skb_shinfo(skb)->gso_size = bnx2x_set_lro_mss(bp,
 					tpa_info->parsing_flags, len_on_bd);
 
-		skb_shinfo(skb)->gso_type =
-			(GET_FLAG(tpa_info->parsing_flags,
-				  PARSING_FLAGS_OVER_ETHERNET_PROTOCOL) ==
-			 PRS_FLAG_OVERETH_IPV6) ?
-			SKB_GSO_TCPV6 : SKB_GSO_TCPV4;
+		/* set for GRO */
+		if (fp->mode == TPA_MODE_GRO)
+			skb_shinfo(skb)->gso_type =
+			    (GET_FLAG(tpa_info->parsing_flags,
+				      PARSING_FLAGS_OVER_ETHERNET_PROTOCOL) ==
+						PRS_FLAG_OVERETH_IPV6) ?
+				SKB_GSO_TCPV6 : SKB_GSO_TCPV4;
 	}
 
 
diff --git a/trunk/drivers/net/ethernet/cadence/macb.c b/trunk/drivers/net/ethernet/cadence/macb.c
index b9d4bb9530e5..a9b0830fb39d 100644
--- a/trunk/drivers/net/ethernet/cadence/macb.c
+++ b/trunk/drivers/net/ethernet/cadence/macb.c
@@ -693,11 +693,6 @@ static int macb_poll(struct napi_struct *napi, int budget)
 		 * get notified when new packets arrive.
 		 */
 		macb_writel(bp, IER, MACB_RX_INT_FLAGS);
-
-		/* Packets received while interrupts were disabled */
-		status = macb_readl(bp, RSR);
-		if (unlikely(status))
-			napi_reschedule(napi);
 	}
 
 	/* TODO: Handle errors */
diff --git a/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index b3e3294cfe53..20a5af6d87d0 100644
--- a/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/trunk/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1401,7 +1401,6 @@ static void ixgbe_set_rsc_gso_size(struct ixgbe_ring *ring,
 	/* set gso_size to avoid messing up TCP MSS */
 	skb_shinfo(skb)->gso_size = DIV_ROUND_UP((skb->len - hdr_len),
 						 IXGBE_CB(skb)->append_cnt);
-	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
 }
 
 static void ixgbe_update_rsc_stats(struct ixgbe_ring *rx_ring,
diff --git a/trunk/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/trunk/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
index 09aa310b6194..6f82812d0fab 100644
--- a/trunk/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
+++ b/trunk/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
@@ -986,13 +986,8 @@ qlcnic_process_lro(struct qlcnic_adapter *adapter,
 	th->seq = htonl(seq_number);
 	length = skb->len;
 
-	if (adapter->flags & QLCNIC_FW_LRO_MSS_CAP) {
+	if (adapter->flags & QLCNIC_FW_LRO_MSS_CAP)
 		skb_shinfo(skb)->gso_size = qlcnic_get_lro_sts_mss(sts_data1);
-		if (skb->protocol == htons(ETH_P_IPV6))
-			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
-		else
-			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
-	}
 
 	if (vid != 0xffff)
 		__vlan_hwaccel_put_tag(skb, vid);
diff --git a/trunk/drivers/net/ethernet/realtek/r8169.c b/trunk/drivers/net/ethernet/realtek/r8169.c
index 998974f78742..11702324a071 100644
--- a/trunk/drivers/net/ethernet/realtek/r8169.c
+++ b/trunk/drivers/net/ethernet/realtek/r8169.c
@@ -450,6 +450,7 @@ enum rtl8168_registers {
 #define PWM_EN				(1 << 22)
 #define RXDV_GATED_EN			(1 << 19)
 #define EARLY_TALLY_EN			(1 << 16)
+#define FORCE_CLK			(1 << 15) /* force clock request */
 };
 
 enum rtl_register_content {
@@ -513,6 +514,7 @@ enum rtl_register_content {
 	PMEnable	= (1 << 0),	/* Power Management Enable */
 
 	/* Config2 register p. 25 */
+	ClkReqEn	= (1 << 7),	/* Clock Request Enable */
 	MSIEnable	= (1 << 5),	/* 8169 only. Reserved in the 8168. */
 	PCI_Clock_66MHz = 0x01,
 	PCI_Clock_33MHz = 0x00,
@@ -533,6 +535,7 @@ enum rtl_register_content {
 	Spi_en		= (1 << 3),
 	LanWake		= (1 << 1),	/* LanWake enable/disable */
 	PMEStatus	= (1 << 0),	/* PME status can be reset by PCI RST# */
+	ASPM_en		= (1 << 0),	/* ASPM enable */
 
 	/* TBICSR p.28 */
 	TBIReset	= 0x80000000,
@@ -681,6 +684,7 @@ enum features {
 	RTL_FEATURE_WOL		= (1 << 0),
 	RTL_FEATURE_MSI		= (1 << 1),
 	RTL_FEATURE_GMII	= (1 << 2),
+	RTL_FEATURE_FW_LOADED	= (1 << 3),
 };
 
 struct rtl8169_counters {
@@ -2385,8 +2389,10 @@ static void rtl_apply_firmware(struct rtl8169_private *tp)
 	struct rtl_fw *rtl_fw = tp->rtl_fw;
 
 	/* TODO: release firmware once rtl_phy_write_fw signals failures. */
-	if (!IS_ERR_OR_NULL(rtl_fw))
+	if (!IS_ERR_OR_NULL(rtl_fw)) {
 		rtl_phy_write_fw(tp, rtl_fw);
+		tp->features |= RTL_FEATURE_FW_LOADED;
+	}
 }
 
 static void rtl_apply_firmware_cond(struct rtl8169_private *tp, u8 reg, u16 val)
@@ -2397,6 +2403,31 @@ static void rtl_apply_firmware_cond(struct rtl8169_private *tp, u8 reg, u16 val)
 		rtl_apply_firmware(tp);
 }
 
+static void r810x_aldps_disable(struct rtl8169_private *tp)
+{
+	rtl_writephy(tp, 0x1f, 0x0000);
+	rtl_writephy(tp, 0x18, 0x0310);
+	msleep(100);
+}
+
+static void r810x_aldps_enable(struct rtl8169_private *tp)
+{
+	if (!(tp->features & RTL_FEATURE_FW_LOADED))
+		return;
+
+	rtl_writephy(tp, 0x1f, 0x0000);
+	rtl_writephy(tp, 0x18, 0x8310);
+}
+
+static void r8168_aldps_enable_1(struct rtl8169_private *tp)
+{
+	if (!(tp->features & RTL_FEATURE_FW_LOADED))
+		return;
+
+	rtl_writephy(tp, 0x1f, 0x0000);
+	rtl_w1w0_phy(tp, 0x15, 0x1000, 0x0000);
+}
+
 static void rtl8169s_hw_phy_config(struct rtl8169_private *tp)
 {
 	static const struct phy_reg phy_reg_init[] = {
@@ -3187,6 +3218,8 @@ static void rtl8168e_2_hw_phy_config(struct rtl8169_private *tp)
 	rtl_w1w0_phy(tp, 0x10, 0x0000, 0x0400);
 	rtl_writephy(tp, 0x1f, 0x0000);
 
+	r8168_aldps_enable_1(tp);
+
 	/* Broken BIOS workaround: feed GigaMAC registers with MAC address. */
 	rtl_rar_exgmac_set(tp, tp->dev->dev_addr);
 }
@@ -3261,6 +3294,8 @@ static void rtl8168f_1_hw_phy_config(struct rtl8169_private *tp)
 	rtl_writephy(tp, 0x05, 0x8b85);
 	rtl_w1w0_phy(tp, 0x06, 0x4000, 0x0000);
 	rtl_writephy(tp, 0x1f, 0x0000);
+
+	r8168_aldps_enable_1(tp);
 }
 
 static void rtl8168f_2_hw_phy_config(struct rtl8169_private *tp)
@@ -3268,6 +3303,8 @@ static void rtl8168f_2_hw_phy_config(struct rtl8169_private *tp)
 	rtl_apply_firmware(tp);
 
 	rtl8168f_hw_phy_config(tp);
+
+	r8168_aldps_enable_1(tp);
 }
 
 static void rtl8411_hw_phy_config(struct rtl8169_private *tp)
@@ -3365,6 +3402,8 @@ static void rtl8411_hw_phy_config(struct rtl8169_private *tp)
 	rtl_w1w0_phy(tp, 0x19, 0x0000, 0x0001);
 	rtl_w1w0_phy(tp, 0x10, 0x0000, 0x0400);
 	rtl_writephy(tp, 0x1f, 0x0000);
+
+	r8168_aldps_enable_1(tp);
 }
 
 static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp)
@@ -3450,21 +3489,19 @@ static void rtl8105e_hw_phy_config(struct rtl8169_private *tp)
 	};
 
 	/* Disable ALDPS before ram code */
-	rtl_writephy(tp, 0x1f, 0x0000);
-	rtl_writephy(tp, 0x18, 0x0310);
-	msleep(100);
+	r810x_aldps_disable(tp);
 
 	rtl_apply_firmware(tp);
 
 	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+
+	r810x_aldps_enable(tp);
 }
 
 static void rtl8402_hw_phy_config(struct rtl8169_private *tp)
 {
 	/* Disable ALDPS before setting firmware */
-	rtl_writephy(tp, 0x1f, 0x0000);
-	rtl_writephy(tp, 0x18, 0x0310);
-	msleep(20);
+	r810x_aldps_disable(tp);
 
 	rtl_apply_firmware(tp);
 
@@ -3474,6 +3511,8 @@ static void rtl8402_hw_phy_config(struct rtl8169_private *tp)
 	rtl_writephy(tp, 0x10, 0x401f);
 	rtl_writephy(tp, 0x19, 0x7030);
 	rtl_writephy(tp, 0x1f, 0x0000);
+
+	r810x_aldps_enable(tp);
 }
 
 static void rtl8106e_hw_phy_config(struct rtl8169_private *tp)
@@ -3486,9 +3525,7 @@ static void rtl8106e_hw_phy_config(struct rtl8169_private *tp)
 	};
 
 	/* Disable ALDPS before ram code */
-	rtl_writephy(tp, 0x1f, 0x0000);
-	rtl_writephy(tp, 0x18, 0x0310);
-	msleep(100);
+	r810x_aldps_disable(tp);
 
 	rtl_apply_firmware(tp);
 
@@ -3496,6 +3533,8 @@ static void rtl8106e_hw_phy_config(struct rtl8169_private *tp)
 	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
 
 	rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
+
+	r810x_aldps_enable(tp);
 }
 
 static void rtl_hw_phy_config(struct net_device *dev)
@@ -5012,8 +5051,6 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
 
 	RTL_W8(MaxTxPacketSize, EarlySize);
 
-	rtl_disable_clock_request(pdev);
-
 	RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
 	RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
 
@@ -5022,7 +5059,8 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
 
 	RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN);
 	RTL_W32(MISC, RTL_R32(MISC) | PWM_EN);
-	RTL_W8(Config5, RTL_R8(Config5) & ~Spi_en);
+	RTL_W8(Config5, (RTL_R8(Config5) & ~Spi_en) | ASPM_en);
+	RTL_W8(Config2, RTL_R8(Config2) | ClkReqEn);
 }
 
 static void rtl_hw_start_8168f(struct rtl8169_private *tp)
@@ -5047,13 +5085,12 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp)
 
 	RTL_W8(MaxTxPacketSize, EarlySize);
 
-	rtl_disable_clock_request(pdev);
-
 	RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
 	RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
 	RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN);
-	RTL_W32(MISC, RTL_R32(MISC) | PWM_EN);
-	RTL_W8(Config5, RTL_R8(Config5) & ~Spi_en);
+	RTL_W32(MISC, RTL_R32(MISC) | PWM_EN | FORCE_CLK);
+	RTL_W8(Config5, (RTL_R8(Config5) & ~Spi_en) | ASPM_en);
+	RTL_W8(Config2, RTL_R8(Config2) | ClkReqEn);
 }
 
 static void rtl_hw_start_8168f_1(struct rtl8169_private *tp)
@@ -5110,8 +5147,10 @@ static void rtl_hw_start_8168g_1(struct rtl8169_private *tp)
 	rtl_w1w0_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
 
 	RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
-	RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN);
+	RTL_W32(MISC, (RTL_R32(MISC) | FORCE_CLK) & ~RXDV_GATED_EN);
 	RTL_W8(MaxTxPacketSize, EarlySize);
+	RTL_W8(Config5, RTL_R8(Config5) | ASPM_en);
+	RTL_W8(Config2, RTL_R8(Config2) | ClkReqEn);
 
 	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
@@ -5327,6 +5366,9 @@ static void rtl_hw_start_8105e_1(struct rtl8169_private *tp)
 
 	RTL_W8(MCU, RTL_R8(MCU) | EN_NDP | EN_OOB_RESET);
 	RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN);
+	RTL_W8(Config5, RTL_R8(Config5) | ASPM_en);
+	RTL_W8(Config2, RTL_R8(Config2) | ClkReqEn);
+	RTL_W32(MISC, RTL_R32(MISC) | FORCE_CLK);
 
 	rtl_ephy_init(tp, e_info_8105e_1, ARRAY_SIZE(e_info_8105e_1));
 }
@@ -5352,6 +5394,9 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp)
 
 	RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
 	RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
+	RTL_W8(Config5, RTL_R8(Config5) | ASPM_en);
+	RTL_W8(Config2, RTL_R8(Config2) | ClkReqEn);
+	RTL_W32(MISC, RTL_R32(MISC) | FORCE_CLK);
 
 	rtl_ephy_init(tp, e_info_8402, ARRAY_SIZE(e_info_8402));
 
@@ -5373,7 +5418,10 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp)
 	/* Force LAN exit from ASPM if Rx/Tx are not idle */
 	RTL_W32(FuncEvent, RTL_R32(FuncEvent) | 0x002800);
 
-	RTL_W32(MISC, (RTL_R32(MISC) | DISABLE_LAN_EN) & ~EARLY_TALLY_EN);
+	RTL_W32(MISC,
+		(RTL_R32(MISC) | DISABLE_LAN_EN | FORCE_CLK) & ~EARLY_TALLY_EN);
+	RTL_W8(Config5, RTL_R8(Config5) | ASPM_en);
+	RTL_W8(Config2, RTL_R8(Config2) | ClkReqEn);
 	RTL_W8(MCU, RTL_R8(MCU) | EN_NDP | EN_OOB_RESET);
 	RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN);
 }
diff --git a/trunk/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/trunk/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index b75f4b286895..f07c0612abf6 100644
--- a/trunk/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/trunk/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -69,7 +69,7 @@
 
 #undef STMMAC_XMIT_DEBUG
 /*#define STMMAC_XMIT_DEBUG*/
-#ifdef STMMAC_XMIT_DEBUG
+#ifdef STMMAC_TX_DEBUG
 #define TX_DBG(fmt, args...)  printk(fmt, ## args)
 #else
 #define TX_DBG(fmt, args...)  do { } while (0)
diff --git a/trunk/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/trunk/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index 0b9829fe3eea..0376a5e6b2bf 100644
--- a/trunk/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/trunk/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -188,6 +188,8 @@ int stmmac_mdio_register(struct net_device *ndev)
 		goto bus_register_fail;
 	}
 
+	priv->mii = new_bus;
+
 	found = 0;
 	for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
 		struct phy_device *phydev = new_bus->phy_map[addr];
@@ -235,14 +237,8 @@ int stmmac_mdio_register(struct net_device *ndev)
 		}
 	}
 
-	if (!found) {
+	if (!found)
 		pr_warning("%s: No PHY found\n", ndev->name);
-		mdiobus_unregister(new_bus);
-		mdiobus_free(new_bus);
-		return -ENODEV;
-	}
-
-	priv->mii = new_bus;
 
 	return 0;
 
diff --git a/trunk/drivers/net/usb/qmi_wwan.c b/trunk/drivers/net/usb/qmi_wwan.c
index 19d903598b0d..c8e05e27f38c 100644
--- a/trunk/drivers/net/usb/qmi_wwan.c
+++ b/trunk/drivers/net/usb/qmi_wwan.c
@@ -411,7 +411,6 @@ static const struct usb_device_id products[] = {
 	},
 
 	/* 3. Combined interface devices matching on interface number */
-	{QMI_FIXED_INTF(0x0408, 0xea42, 4)},	/* Yota / Megafon M100-1 */
 	{QMI_FIXED_INTF(0x12d1, 0x140c, 1)},	/* Huawei E173 */
 	{QMI_FIXED_INTF(0x19d2, 0x0002, 1)},
 	{QMI_FIXED_INTF(0x19d2, 0x0012, 1)},
diff --git a/trunk/drivers/net/wireless/mwl8k.c b/trunk/drivers/net/wireless/mwl8k.c
index a00a03ea4ec9..83564d36e801 100644
--- a/trunk/drivers/net/wireless/mwl8k.c
+++ b/trunk/drivers/net/wireless/mwl8k.c
@@ -318,20 +318,20 @@ struct mwl8k_sta {
 #define MWL8K_STA(_sta) ((struct mwl8k_sta *)&((_sta)->drv_priv))
 
 static const struct ieee80211_channel mwl8k_channels_24[] = {
-	{ .band = IEEE80211_BAND_2GHZ, .center_freq = 2412, .hw_value = 1, },
-	{ .band = IEEE80211_BAND_2GHZ, .center_freq = 2417, .hw_value = 2, },
-	{ .band = IEEE80211_BAND_2GHZ, .center_freq = 2422, .hw_value = 3, },
-	{ .band = IEEE80211_BAND_2GHZ, .center_freq = 2427, .hw_value = 4, },
-	{ .band = IEEE80211_BAND_2GHZ, .center_freq = 2432, .hw_value = 5, },
-	{ .band = IEEE80211_BAND_2GHZ, .center_freq = 2437, .hw_value = 6, },
-	{ .band = IEEE80211_BAND_2GHZ, .center_freq = 2442, .hw_value = 7, },
-	{ .band = IEEE80211_BAND_2GHZ, .center_freq = 2447, .hw_value = 8, },
-	{ .band = IEEE80211_BAND_2GHZ, .center_freq = 2452, .hw_value = 9, },
-	{ .band = IEEE80211_BAND_2GHZ, .center_freq = 2457, .hw_value = 10, },
-	{ .band = IEEE80211_BAND_2GHZ, .center_freq = 2462, .hw_value = 11, },
-	{ .band = IEEE80211_BAND_2GHZ, .center_freq = 2467, .hw_value = 12, },
-	{ .band = IEEE80211_BAND_2GHZ, .center_freq = 2472, .hw_value = 13, },
-	{ .band = IEEE80211_BAND_2GHZ, .center_freq = 2484, .hw_value = 14, },
+	{ .center_freq = 2412, .hw_value = 1, },
+	{ .center_freq = 2417, .hw_value = 2, },
+	{ .center_freq = 2422, .hw_value = 3, },
+	{ .center_freq = 2427, .hw_value = 4, },
+	{ .center_freq = 2432, .hw_value = 5, },
+	{ .center_freq = 2437, .hw_value = 6, },
+	{ .center_freq = 2442, .hw_value = 7, },
+	{ .center_freq = 2447, .hw_value = 8, },
+	{ .center_freq = 2452, .hw_value = 9, },
+	{ .center_freq = 2457, .hw_value = 10, },
+	{ .center_freq = 2462, .hw_value = 11, },
+	{ .center_freq = 2467, .hw_value = 12, },
+	{ .center_freq = 2472, .hw_value = 13, },
+	{ .center_freq = 2484, .hw_value = 14, },
 };
 
 static const struct ieee80211_rate mwl8k_rates_24[] = {
@@ -352,10 +352,10 @@ static const struct ieee80211_rate mwl8k_rates_24[] = {
 };
 
 static const struct ieee80211_channel mwl8k_channels_50[] = {
-	{ .band = IEEE80211_BAND_5GHZ, .center_freq = 5180, .hw_value = 36, },
-	{ .band = IEEE80211_BAND_5GHZ, .center_freq = 5200, .hw_value = 40, },
-	{ .band = IEEE80211_BAND_5GHZ, .center_freq = 5220, .hw_value = 44, },
-	{ .band = IEEE80211_BAND_5GHZ, .center_freq = 5240, .hw_value = 48, },
+	{ .center_freq = 5180, .hw_value = 36, },
+	{ .center_freq = 5200, .hw_value = 40, },
+	{ .center_freq = 5220, .hw_value = 44, },
+	{ .center_freq = 5240, .hw_value = 48, },
 };
 
 static const struct ieee80211_rate mwl8k_rates_50[] = {
diff --git a/trunk/drivers/pci/msi.c b/trunk/drivers/pci/msi.c
index 00cc78c7aa04..5099636a6e5f 100644
--- a/trunk/drivers/pci/msi.c
+++ b/trunk/drivers/pci/msi.c
@@ -845,32 +845,6 @@ int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
 }
 EXPORT_SYMBOL(pci_enable_msi_block);
 
-int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec)
-{
-	int ret, pos, nvec;
-	u16 msgctl;
-
-	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
-	if (!pos)
-		return -EINVAL;
-
-	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
-	ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);
-
-	if (maxvec)
-		*maxvec = ret;
-
-	do {
-		nvec = ret;
-		ret = pci_enable_msi_block(dev, nvec);
-	} while (ret > 0);
-
-	if (ret < 0)
-		return ret;
-	return nvec;
-}
-EXPORT_SYMBOL(pci_enable_msi_block_auto);
-
 void pci_msi_shutdown(struct pci_dev *dev)
 {
 	struct msi_desc *desc;
diff --git a/trunk/drivers/pci/pcie/aer/aerdrv_errprint.c b/trunk/drivers/pci/pcie/aer/aerdrv_errprint.c
index 5ab14251839d..3ea51736f18d 100644
--- a/trunk/drivers/pci/pcie/aer/aerdrv_errprint.c
+++ b/trunk/drivers/pci/pcie/aer/aerdrv_errprint.c
@@ -23,9 +23,6 @@
 
 #include "aerdrv.h"
 
-#define CREATE_TRACE_POINTS
-#include <trace/events/ras.h>
-
 #define AER_AGENT_RECEIVER		0
 #define AER_AGENT_REQUESTER		1
 #define AER_AGENT_COMPLETER		2
@@ -124,11 +121,12 @@ static const char *aer_agent_string[] = {
 	"Transmitter ID"
 };
 
-static void __aer_print_error(struct pci_dev *dev,
+static void __aer_print_error(const char *prefix,
 			      struct aer_err_info *info)
 {
 	int i, status;
 	const char *errmsg = NULL;
+
 	status = (info->status & ~info->mask);
 
 	for (i = 0; i < 32; i++) {
@@ -143,22 +141,26 @@ static void __aer_print_error(struct pci_dev *dev,
 				aer_uncorrectable_error_string[i] : NULL;
 
 		if (errmsg)
-			dev_err(&dev->dev, "   [%2d] %-22s%s\n", i, errmsg,
+			printk("%s""   [%2d] %-22s%s\n", prefix, i, errmsg,
 				info->first_error == i ? " (First)" : "");
 		else
-			dev_err(&dev->dev, "   [%2d] Unknown Error Bit%s\n",
-				i, info->first_error == i ? " (First)" : "");
+			printk("%s""   [%2d] Unknown Error Bit%s\n", prefix, i,
+				info->first_error == i ? " (First)" : "");
 	}
 }
 
 void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
 {
 	int id = ((dev->bus->number << 8) | dev->devfn);
+	char prefix[44];
+
+	snprintf(prefix, sizeof(prefix), "%s%s %s: ",
+		 (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR,
+		 dev_driver_string(&dev->dev), dev_name(&dev->dev));
 
 	if (info->status == 0) {
-		dev_err(&dev->dev,
-			"PCIe Bus Error: severity=%s, type=Unaccessible, "
-			"id=%04x(Unregistered Agent ID)\n",
+		printk("%s""PCIe Bus Error: severity=%s, type=Unaccessible, "
+			"id=%04x(Unregistered Agent ID)\n", prefix,
 			aer_error_severity_string[info->severity], id);
 	} else {
 		int layer, agent;
@@ -166,24 +168,22 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
 		layer = AER_GET_LAYER_ERROR(info->severity, info->status);
 		agent = AER_GET_AGENT(info->severity, info->status);
 
-		dev_err(&dev->dev,
-			"PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n",
-			aer_error_severity_string[info->severity],
+		printk("%s""PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n",
+			prefix, aer_error_severity_string[info->severity],
 			aer_error_layer[layer], id, aer_agent_string[agent]);
 
-		dev_err(&dev->dev,
-			"  device [%04x:%04x] error status/mask=%08x/%08x\n",
-			dev->vendor, dev->device,
+		printk("%s""  device [%04x:%04x] error status/mask=%08x/%08x\n",
+			prefix, dev->vendor, dev->device,
 			info->status, info->mask);
 
-		__aer_print_error(dev, info);
+		__aer_print_error(prefix, info);
 
 		if (info->tlp_header_valid) {
 			unsigned char *tlp = (unsigned char *) &info->tlp;
-			dev_err(&dev->dev, "  TLP Header:"
+			printk("%s""  TLP Header:"
 				" %02x%02x%02x%02x %02x%02x%02x%02x"
 				" %02x%02x%02x%02x %02x%02x%02x%02x\n",
-				*(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
+				prefix, *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
 				*(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
 				*(tlp + 11), *(tlp + 10), *(tlp + 9),
 				*(tlp + 8), *(tlp + 15), *(tlp + 14),
@@ -192,11 +192,8 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
 	}
 
 	if (info->id && info->error_dev_num > 1 && info->id == id)
-		dev_err(&dev->dev,
-			   "  Error of this Agent(%04x) is reported first\n",
-			id);
-	trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
-			info->severity);
+		printk("%s""  Error of this Agent(%04x) is reported first\n",
+			prefix, id);
 }
 
 void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
@@ -220,7 +217,7 @@ int cper_severity_to_aer(int cper_severity)
 }
 EXPORT_SYMBOL_GPL(cper_severity_to_aer);
 
-void cper_print_aer(const char *prefix, struct pci_dev *dev, int cper_severity,
+void cper_print_aer(const char *prefix, int cper_severity,
 		    struct aer_capability_regs *aer)
 {
 	int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0;
@@ -242,27 +239,25 @@ void cper_print_aer(const char *prefix, struct pci_dev *dev, int cper_severity,
 	}
 	layer = AER_GET_LAYER_ERROR(aer_severity, status);
 	agent = AER_GET_AGENT(aer_severity, status);
-	dev_err(&dev->dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n",
-	       status, mask);
+	printk("%s""aer_status: 0x%08x, aer_mask: 0x%08x\n",
+	       prefix, status, mask);
 	cper_print_bits(prefix, status, status_strs, status_strs_size);
-	dev_err(&dev->dev, "aer_layer=%s, aer_agent=%s\n",
+	printk("%s""aer_layer=%s, aer_agent=%s\n", prefix,
 	       aer_error_layer[layer], aer_agent_string[agent]);
 	if (aer_severity != AER_CORRECTABLE)
-		dev_err(&dev->dev, "aer_uncor_severity: 0x%08x\n",
-		       aer->uncor_severity);
+		printk("%s""aer_uncor_severity: 0x%08x\n",
+		       prefix, aer->uncor_severity);
 	if (tlp_header_valid) {
 		const unsigned char *tlp;
 		tlp = (const unsigned char *)&aer->header_log;
-		dev_err(&dev->dev, "aer_tlp_header:"
+		printk("%s""aer_tlp_header:"
 			" %02x%02x%02x%02x %02x%02x%02x%02x"
 			" %02x%02x%02x%02x %02x%02x%02x%02x\n",
-			*(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
+			prefix, *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
 			*(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
 			*(tlp + 11), *(tlp + 10), *(tlp + 9),
 			*(tlp + 8), *(tlp + 15), *(tlp + 14),
 			*(tlp + 13), *(tlp + 12));
 	}
-	trace_aer_event(dev_name(&dev->dev), (status & ~mask),
-			aer_severity);
 }
 #endif
diff --git a/trunk/drivers/pci/remove.c b/trunk/drivers/pci/remove.c
index 84954a726a94..7c0fd9252e6f 100644
--- a/trunk/drivers/pci/remove.c
+++ b/trunk/drivers/pci/remove.c
@@ -19,8 +19,6 @@ static void pci_free_resources(struct pci_dev *dev)
 
 static void pci_stop_dev(struct pci_dev *dev)
 {
-	pci_pme_active(dev, false);
-
 	if (dev->is_added) {
 		pci_proc_detach_device(dev);
 		pci_remove_sysfs_dev_files(dev);
diff --git a/trunk/drivers/rtc/Kconfig b/trunk/drivers/rtc/Kconfig
index 5e44eaabf457..923a9da9c829 100644
--- a/trunk/drivers/rtc/Kconfig
+++ b/trunk/drivers/rtc/Kconfig
@@ -20,24 +20,14 @@ if RTC_CLASS
 config RTC_HCTOSYS
 	bool "Set system time from RTC on startup and resume"
 	default y
-	depends on !ALWAYS_USE_PERSISTENT_CLOCK
 	help
 	  If you say yes here, the system time (wall clock) will be set using
 	  the value read from a specified RTC device. This is useful to avoid
 	  unnecessary fsck runs at boot time, and to network better.
 
-config RTC_SYSTOHC
-	bool "Set the RTC time based on NTP synchronization"
-	default y
-	depends on !ALWAYS_USE_PERSISTENT_CLOCK
-	help
-	  If you say yes here, the system time (wall clock) will be stored
-	  in the RTC specified by RTC_HCTOSYS_DEVICE approximately every 11
-	  minutes if userspace reports synchronized NTP status.
-
 config RTC_HCTOSYS_DEVICE
 	string "RTC used to set the system time"
-	depends on RTC_HCTOSYS = y || RTC_SYSTOHC = y
+	depends on RTC_HCTOSYS = y
 	default "rtc0"
 	help
 	  The RTC device that will be used to (re)initialize the system
diff --git a/trunk/drivers/rtc/Makefile b/trunk/drivers/rtc/Makefile
index ec2988b00a44..4418ef3f9ecc 100644
--- a/trunk/drivers/rtc/Makefile
+++ b/trunk/drivers/rtc/Makefile
@@ -6,7 +6,6 @@ ccflags-$(CONFIG_RTC_DEBUG)	:= -DDEBUG
 
 obj-$(CONFIG_RTC_LIB)		+= rtc-lib.o
 obj-$(CONFIG_RTC_HCTOSYS)	+= hctosys.o
-obj-$(CONFIG_RTC_SYSTOHC)	+= systohc.o
 obj-$(CONFIG_RTC_CLASS)		+= rtc-core.o
 rtc-core-y			:= class.o interface.o
 
diff --git a/trunk/drivers/rtc/class.c b/trunk/drivers/rtc/class.c
index 26388f182594..5143629dedbd 100644
--- a/trunk/drivers/rtc/class.c
+++ b/trunk/drivers/rtc/class.c
@@ -50,10 +50,6 @@ static int rtc_suspend(struct device *dev, pm_message_t mesg)
 	struct rtc_device	*rtc = to_rtc_device(dev);
 	struct rtc_time		tm;
 	struct timespec		delta, delta_delta;
-
-	if (has_persistent_clock())
-		return 0;
-
 	if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0)
 		return 0;
 
@@ -92,9 +88,6 @@ static int rtc_resume(struct device *dev)
 	struct timespec		new_system, new_rtc;
 	struct timespec		sleep_time;
 
-	if (has_persistent_clock())
-		return 0;
-
 	rtc_hctosys_ret = -ENODEV;
 	if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0)
 		return 0;
diff --git a/trunk/drivers/rtc/rtc-pl031.c b/trunk/drivers/rtc/rtc-pl031.c
index 81c5077feff3..10c1a3454e48 100644
--- a/trunk/drivers/rtc/rtc-pl031.c
+++ b/trunk/drivers/rtc/rtc-pl031.c
@@ -350,9 +350,7 @@ static int pl031_probe(struct amba_device *adev, const struct amba_id *id)
 	/* Enable the clockwatch on ST Variants */
 	if (vendor->clockwatch)
 		data |= RTC_CR_CWEN;
-	else
-		data |= RTC_CR_EN;
-	writel(data, ldata->base + RTC_CR);
+	writel(data | RTC_CR_EN, ldata->base + RTC_CR);
 
 	/*
 	 * On ST PL031 variants, the RTC reset value does not provide correct
diff --git a/trunk/drivers/rtc/systohc.c b/trunk/drivers/rtc/systohc.c
deleted file mode 100644
index bf3e242ccc5c..000000000000
--- a/trunk/drivers/rtc/systohc.c
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- */
-#include <linux/rtc.h>
-#include <linux/time.h>
-
-/**
- * rtc_set_ntp_time - Save NTP synchronized time to the RTC
- * @now: Current time of day
- *
- * Replacement for the NTP platform function update_persistent_clock
- * that stores time for later retrieval by rtc_hctosys.
- *
- * Returns 0 on successful RTC update, -ENODEV if a RTC update is not
- * possible at all, and various other -errno for specific temporary failure
- * cases.
- *
- * If temporary failure is indicated the caller should try again 'soon'
- */
-int rtc_set_ntp_time(struct timespec now)
-{
-	struct rtc_device *rtc;
-	struct rtc_time tm;
-	int err = -ENODEV;
-
-	if (now.tv_nsec < (NSEC_PER_SEC >> 1))
-		rtc_time_to_tm(now.tv_sec, &tm);
-	else
-		rtc_time_to_tm(now.tv_sec + 1, &tm);
-
-	rtc = rtc_class_open(CONFIG_RTC_HCTOSYS_DEVICE);
-	if (rtc) {
-		/* rtc_hctosys exclusively uses UTC, so we call set_time here,
-		 * not set_mmss. */
-		if (rtc->ops && (rtc->ops->set_time || rtc->ops->set_mmss))
-			err = rtc_set_time(rtc, &tm);
-		rtc_class_close(rtc);
-	}
-
-	return err;
-}
diff --git a/trunk/drivers/spi/spi.c b/trunk/drivers/spi/spi.c
index 3a6083b386a1..19ee901577da 100644
--- a/trunk/drivers/spi/spi.c
+++ b/trunk/drivers/spi/spi.c
@@ -33,7 +33,7 @@
 #include <linux/of_gpio.h>
 #include <linux/pm_runtime.h>
 #include <linux/export.h>
-#include <linux/sched/rt.h>
+#include <linux/sched.h>
 #include <linux/delay.h>
 #include <linux/kthread.h>
 #include <linux/ioport.h>
diff --git a/trunk/drivers/staging/csr/bh.c b/trunk/drivers/staging/csr/bh.c
index 7b133597e923..1a1f5c79822a 100644
--- a/trunk/drivers/staging/csr/bh.c
+++ b/trunk/drivers/staging/csr/bh.c
@@ -15,7 +15,7 @@
  */
 #include "csr_wifi_hip_unifi.h"
 #include "unifi_priv.h"
-#include <linux/sched/rt.h>
+
 
 /*
  * ---------------------------------------------------------------------------
diff --git a/trunk/drivers/staging/csr/unifi_sme.c b/trunk/drivers/staging/csr/unifi_sme.c
index 49395da34b7f..7c6c4138fc76 100644
--- a/trunk/drivers/staging/csr/unifi_sme.c
+++ b/trunk/drivers/staging/csr/unifi_sme.c
@@ -15,7 +15,7 @@
 #include "unifi_priv.h"
 #include "csr_wifi_hip_unifi.h"
 #include "csr_wifi_hip_conversions.h"
-#include <linux/sched/rt.h>
+
 
 
 
diff --git a/trunk/drivers/staging/iio/trigger/Kconfig b/trunk/drivers/staging/iio/trigger/Kconfig
index d44d3ad26fa5..7d3207559265 100644
--- a/trunk/drivers/staging/iio/trigger/Kconfig
+++ b/trunk/drivers/staging/iio/trigger/Kconfig
@@ -21,6 +21,7 @@ config IIO_GPIO_TRIGGER
 config IIO_SYSFS_TRIGGER
 	tristate "SYSFS trigger"
 	depends on SYSFS
+	depends on HAVE_IRQ_WORK
 	select IRQ_WORK
 	help
 	  Provides support for using SYSFS entry as IIO triggers.
diff --git a/trunk/drivers/staging/omapdrm/Kconfig b/trunk/drivers/staging/omapdrm/Kconfig
index 09f65dc3d2c8..b724a4131435 100644
--- a/trunk/drivers/staging/omapdrm/Kconfig
+++ b/trunk/drivers/staging/omapdrm/Kconfig
@@ -3,8 +3,8 @@ config DRM_OMAP
 	tristate "OMAP DRM"
 	depends on DRM && !CONFIG_FB_OMAP2
 	depends on ARCH_OMAP2PLUS || ARCH_MULTIPLATFORM
-	depends on OMAP2_DSS
 	select DRM_KMS_HELPER
+	select OMAP2_DSS
 	select FB_SYS_FILLRECT
 	select FB_SYS_COPYAREA
 	select FB_SYS_IMAGEBLIT
diff --git a/trunk/drivers/tty/sysrq.c b/trunk/drivers/tty/sysrq.c
index 40e5b3919e27..b3c4a250ff86 100644
--- a/trunk/drivers/tty/sysrq.c
+++ b/trunk/drivers/tty/sysrq.c
@@ -15,7 +15,6 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/sched.h>
-#include <linux/sched/rt.h>
 #include <linux/interrupt.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
diff --git a/trunk/drivers/video/omap2/dss/dss_features.c b/trunk/drivers/video/omap2/dss/dss_features.c
index d7d66ef5cb58..18688c12e30d 100644
--- a/trunk/drivers/video/omap2/dss/dss_features.c
+++ b/trunk/drivers/video/omap2/dss/dss_features.c
@@ -538,7 +538,6 @@ static const enum dss_feat_id omap3630_dss_feat_list[] = {
 	FEAT_ALPHA_FIXED_ZORDER,
 	FEAT_FIFO_MERGE,
 	FEAT_OMAP3_DSI_FIFO_BUG,
-	FEAT_DPI_USES_VDDS_DSI,
 };
 
 static const enum dss_feat_id omap4430_es1_0_dss_feat_list[] = {
diff --git a/trunk/drivers/xen/pcpu.c b/trunk/drivers/xen/pcpu.c
index 5a27a4599a4a..067fcfa1723e 100644
--- a/trunk/drivers/xen/pcpu.c
+++ b/trunk/drivers/xen/pcpu.c
@@ -278,7 +278,8 @@ static int sync_pcpu(uint32_t cpu, uint32_t *max_cpu)
 	 * Only those at cpu present map has its sys interface.
 	 */
 	if (info->flags & XEN_PCPU_FLAGS_INVALID) {
-		unregister_and_remove_pcpu(pcpu);
+		if (pcpu)
+			unregister_and_remove_pcpu(pcpu);
 		return 0;
 	}
 
diff --git a/trunk/fs/binfmt_elf.c b/trunk/fs/binfmt_elf.c
index 49d0b43458b7..0c42cdbabecf 100644
--- a/trunk/fs/binfmt_elf.c
+++ b/trunk/fs/binfmt_elf.c
@@ -33,7 +33,6 @@
 #include <linux/elf.h>
 #include <linux/utsname.h>
 #include <linux/coredump.h>
-#include <linux/sched.h>
 #include <asm/uaccess.h>
 #include <asm/param.h>
 #include <asm/page.h>
@@ -1321,11 +1320,8 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
 		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
 	} else {
-		cputime_t utime, stime;
-
-		task_cputime(p, &utime, &stime);
-		cputime_to_timeval(utime, &prstatus->pr_utime);
-		cputime_to_timeval(stime, &prstatus->pr_stime);
+		cputime_to_timeval(p->utime, &prstatus->pr_utime);
+		cputime_to_timeval(p->stime, &prstatus->pr_stime);
 	}
 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
diff --git a/trunk/fs/binfmt_elf_fdpic.c b/trunk/fs/binfmt_elf_fdpic.c
index cb240dd3b402..dc84732e554f 100644
--- a/trunk/fs/binfmt_elf_fdpic.c
+++ b/trunk/fs/binfmt_elf_fdpic.c
@@ -1375,11 +1375,8 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
 		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
 	} else {
-		cputime_t utime, stime;
-
-		task_cputime(p, &utime, &stime);
-		cputime_to_timeval(utime, &prstatus->pr_utime);
-		cputime_to_timeval(stime, &prstatus->pr_stime);
+		cputime_to_timeval(p->utime, &prstatus->pr_utime);
+		cputime_to_timeval(p->stime, &prstatus->pr_stime);
 	}
 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
diff --git a/trunk/fs/proc/array.c b/trunk/fs/proc/array.c
index f7ed9ee46eb9..6a91e6ffbcbd 100644
--- a/trunk/fs/proc/array.c
+++ b/trunk/fs/proc/array.c
@@ -449,7 +449,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 			do {
 				min_flt += t->min_flt;
 				maj_flt += t->maj_flt;
-				gtime += task_gtime(t);
+				gtime += t->gtime;
 				t = next_thread(t);
 			} while (t != task);
 
@@ -472,7 +472,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 		min_flt = task->min_flt;
 		maj_flt = task->maj_flt;
 		task_cputime_adjusted(task, &utime, &stime);
-		gtime = task_gtime(task);
+		gtime = task->gtime;
 	}
 
 	/* scale priority and nice values from timeslices to -20..20 */
diff --git a/trunk/fs/pstore/ram.c b/trunk/fs/pstore/ram.c
index 288f068740f6..7003e5266f25 100644
--- a/trunk/fs/pstore/ram.c
+++ b/trunk/fs/pstore/ram.c
@@ -167,16 +167,12 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
 static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz)
 {
 	char *hdr;
-	struct timespec timestamp;
+	struct timeval timestamp;
 	size_t len;
 
-	/* Report zeroed timestamp if called before timekeeping has resumed. */
-	if (__getnstimeofday(&timestamp)) {
-		timestamp.tv_sec = 0;
-		timestamp.tv_nsec = 0;
-	}
+	do_gettimeofday(&timestamp);
 	hdr = kasprintf(GFP_ATOMIC, RAMOOPS_KERNMSG_HDR "%lu.%lu\n",
-		(long)timestamp.tv_sec, (long)(timestamp.tv_nsec / 1000));
+		(long)timestamp.tv_sec, (long)timestamp.tv_usec);
 	WARN_ON_ONCE(!hdr);
 	len = hdr ? strlen(hdr) : 0;
 	persistent_ram_write(prz, hdr, len);
diff --git a/trunk/fs/select.c b/trunk/fs/select.c
index 8c1c96c27062..2ef72d965036 100644
--- a/trunk/fs/select.c
+++ b/trunk/fs/select.c
@@ -26,7 +26,6 @@
 #include <linux/fs.h>
 #include <linux/rcupdate.h>
 #include <linux/hrtimer.h>
-#include <linux/sched/rt.h>
 
 #include <asm/uaccess.h>
 
diff --git a/trunk/include/asm-generic/cputime.h b/trunk/include/asm-generic/cputime.h
index 51969436b8b8..9a62937c56ca 100644
--- a/trunk/include/asm-generic/cputime.h
+++ b/trunk/include/asm-generic/cputime.h
@@ -4,12 +4,66 @@
 #include <linux/time.h>
 #include <linux/jiffies.h>
 
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
-# include <asm-generic/cputime_jiffies.h>
-#endif
+typedef unsigned long __nocast cputime_t;
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-# include <asm-generic/cputime_nsecs.h>
-#endif
+#define cputime_one_jiffy		jiffies_to_cputime(1)
+#define cputime_to_jiffies(__ct)	(__force unsigned long)(__ct)
+#define cputime_to_scaled(__ct)		(__ct)
+#define jiffies_to_cputime(__hz)	(__force cputime_t)(__hz)
+
+typedef u64 __nocast cputime64_t;
+
+#define cputime64_to_jiffies64(__ct)	(__force u64)(__ct)
+#define jiffies64_to_cputime64(__jif)	(__force cputime64_t)(__jif)
+
+#define nsecs_to_cputime64(__ct)	\
+	jiffies64_to_cputime64(nsecs_to_jiffies64(__ct))
+
+
+/*
+ * Convert cputime to microseconds and back.
+ */
+#define cputime_to_usecs(__ct)		\
+	jiffies_to_usecs(cputime_to_jiffies(__ct))
+#define usecs_to_cputime(__usec)	\
+	jiffies_to_cputime(usecs_to_jiffies(__usec))
+#define usecs_to_cputime64(__usec)	\
+	jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000))
+
+/*
+ * Convert cputime to seconds and back.
+ */
+#define cputime_to_secs(jif)		(cputime_to_jiffies(jif) / HZ)
+#define secs_to_cputime(sec)		jiffies_to_cputime((sec) * HZ)
+
+/*
+ * Convert cputime to timespec and back.
+ */
+#define timespec_to_cputime(__val)	\
+	jiffies_to_cputime(timespec_to_jiffies(__val))
+#define cputime_to_timespec(__ct,__val)	\
+	jiffies_to_timespec(cputime_to_jiffies(__ct),__val)
+
+/*
+ * Convert cputime to timeval and back.
+ */
+#define timeval_to_cputime(__val)	\
+	jiffies_to_cputime(timeval_to_jiffies(__val))
+#define cputime_to_timeval(__ct,__val)	\
+	jiffies_to_timeval(cputime_to_jiffies(__ct),__val)
+
+/*
+ * Convert cputime to clock and back.
+ */
+#define cputime_to_clock_t(__ct)	\
+	jiffies_to_clock_t(cputime_to_jiffies(__ct))
+#define clock_t_to_cputime(__x)		\
+	jiffies_to_cputime(clock_t_to_jiffies(__x))
+
+/*
+ * Convert cputime64 to clock.
+ */
+#define cputime64_to_clock_t(__ct)	\
+	jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct))
 
 #endif
diff --git a/trunk/include/asm-generic/cputime_jiffies.h b/trunk/include/asm-generic/cputime_jiffies.h
deleted file mode 100644
index 272ecba9f588..000000000000
--- a/trunk/include/asm-generic/cputime_jiffies.h
+++ /dev/null
@@ -1,72 +0,0 @@
-#ifndef _ASM_GENERIC_CPUTIME_JIFFIES_H
-#define _ASM_GENERIC_CPUTIME_JIFFIES_H
-
-typedef unsigned long __nocast cputime_t;
-
-#define cputime_one_jiffy		jiffies_to_cputime(1)
-#define cputime_to_jiffies(__ct)	(__force unsigned long)(__ct)
-#define cputime_to_scaled(__ct)		(__ct)
-#define jiffies_to_cputime(__hz)	(__force cputime_t)(__hz)
-
-typedef u64 __nocast cputime64_t;
-
-#define cputime64_to_jiffies64(__ct)	(__force u64)(__ct)
-#define jiffies64_to_cputime64(__jif)	(__force cputime64_t)(__jif)
-
-
-/*
- * Convert nanoseconds to cputime
- */
-#define nsecs_to_cputime64(__nsec)	\
-	jiffies64_to_cputime64(nsecs_to_jiffies64(__nsec))
-#define nsecs_to_cputime(__nsec)	\
-	jiffies_to_cputime(nsecs_to_jiffies(__nsec))
-
-
-/*
- * Convert cputime to microseconds and back.
- */
-#define cputime_to_usecs(__ct)		\
-	jiffies_to_usecs(cputime_to_jiffies(__ct))
-#define usecs_to_cputime(__usec)	\
-	jiffies_to_cputime(usecs_to_jiffies(__usec))
-#define usecs_to_cputime64(__usec)	\
-	jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000))
-
-/*
- * Convert cputime to seconds and back.
- */
-#define cputime_to_secs(jif)		(cputime_to_jiffies(jif) / HZ)
-#define secs_to_cputime(sec)		jiffies_to_cputime((sec) * HZ)
-
-/*
- * Convert cputime to timespec and back.
- */
-#define timespec_to_cputime(__val)	\
-	jiffies_to_cputime(timespec_to_jiffies(__val))
-#define cputime_to_timespec(__ct,__val)	\
-	jiffies_to_timespec(cputime_to_jiffies(__ct),__val)
-
-/*
- * Convert cputime to timeval and back.
- */
-#define timeval_to_cputime(__val)	\
-	jiffies_to_cputime(timeval_to_jiffies(__val))
-#define cputime_to_timeval(__ct,__val)	\
-	jiffies_to_timeval(cputime_to_jiffies(__ct),__val)
-
-/*
- * Convert cputime to clock and back.
- */
-#define cputime_to_clock_t(__ct)	\
-	jiffies_to_clock_t(cputime_to_jiffies(__ct))
-#define clock_t_to_cputime(__x)		\
-	jiffies_to_cputime(clock_t_to_jiffies(__x))
-
-/*
- * Convert cputime64 to clock.
- */
-#define cputime64_to_clock_t(__ct)	\
-	jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct))
-
-#endif
diff --git a/trunk/include/asm-generic/cputime_nsecs.h b/trunk/include/asm-generic/cputime_nsecs.h
deleted file mode 100644
index b6485cafb7bd..000000000000
--- a/trunk/include/asm-generic/cputime_nsecs.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Definitions for measuring cputime in nsecs resolution.
- *
- * Based on <arch/ia64/include/asm/cputime.h>
- *
- * Copyright (C) 2007 FUJITSU LIMITED
- * Copyright (C) 2007 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#ifndef _ASM_GENERIC_CPUTIME_NSECS_H
-#define _ASM_GENERIC_CPUTIME_NSECS_H
-
-typedef u64 __nocast cputime_t;
-typedef u64 __nocast cputime64_t;
-
-#define cputime_one_jiffy		jiffies_to_cputime(1)
-
-/*
- * Convert cputime <-> jiffies (HZ)
- */
-#define cputime_to_jiffies(__ct)	\
-	((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
-#define cputime_to_scaled(__ct)		(__ct)
-#define jiffies_to_cputime(__jif)	\
-	(__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ))
-#define cputime64_to_jiffies64(__ct)	\
-	((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
-#define jiffies64_to_cputime64(__jif)	\
-	(__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ))
-
-
-/*
- * Convert cputime <-> nanoseconds
- */
-#define nsecs_to_cputime(__nsecs)	((__force u64)(__nsecs))
-
-
-/*
- * Convert cputime <-> microseconds
- */
-#define cputime_to_usecs(__ct)		\
-	((__force u64)(__ct) / NSEC_PER_USEC)
-#define usecs_to_cputime(__usecs)	\
-	(__force cputime_t)((__usecs) * NSEC_PER_USEC)
-#define usecs_to_cputime64(__usecs)	\
-	(__force cputime64_t)((__usecs) * NSEC_PER_USEC)
-
-/*
- * Convert cputime <-> seconds
- */
-#define cputime_to_secs(__ct)		\
-	((__force u64)(__ct) / NSEC_PER_SEC)
-#define secs_to_cputime(__secs)		\
-	(__force cputime_t)((__secs) * NSEC_PER_SEC)
-
-/*
- * Convert cputime <-> timespec (nsec)
- */
-static inline cputime_t timespec_to_cputime(const struct timespec *val)
-{
-	u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec;
-	return (__force cputime_t) ret;
-}
-static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
-{
-	val->tv_sec  = (__force u64) ct / NSEC_PER_SEC;
-	val->tv_nsec = (__force u64) ct % NSEC_PER_SEC;
-}
-
-/*
- * Convert cputime <-> timeval (msec)
- */
-static inline cputime_t timeval_to_cputime(struct timeval *val)
-{
-	u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC;
-	return (__force cputime_t) ret;
-}
-static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
-{
-	val->tv_sec = (__force u64) ct / NSEC_PER_SEC;
-	val->tv_usec = ((__force u64) ct % NSEC_PER_SEC) / NSEC_PER_USEC;
-}
-
-/*
- * Convert cputime <-> clock (USER_HZ)
- */
-#define cputime_to_clock_t(__ct)	\
-	((__force u64)(__ct) / (NSEC_PER_SEC / USER_HZ))
-#define clock_t_to_cputime(__x)		\
-	(__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ))
-
-/*
- * Convert cputime64 to clock.
- */
-#define cputime64_to_clock_t(__ct)	\
-	cputime_to_clock_t((__force cputime_t)__ct)
-
-#endif
diff --git a/trunk/include/linux/aer.h b/trunk/include/linux/aer.h
index ec10e1b24c1c..544abdb2238c 100644
--- a/trunk/include/linux/aer.h
+++ b/trunk/include/linux/aer.h
@@ -49,8 +49,8 @@ static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
 }
 #endif
 
-extern void cper_print_aer(const char *prefix, struct pci_dev *dev,
-			   int cper_severity, struct aer_capability_regs *aer);
+extern void cper_print_aer(const char *prefix, int cper_severity,
+			   struct aer_capability_regs *aer);
 extern int cper_severity_to_aer(int cper_severity);
 extern void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
 			      int severity);
diff --git a/trunk/include/linux/clockchips.h b/trunk/include/linux/clockchips.h
index 66346521cb65..8a7096fcb01e 100644
--- a/trunk/include/linux/clockchips.h
+++ b/trunk/include/linux/clockchips.h
@@ -161,15 +161,6 @@ clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec)
 extern void clockevents_suspend(void);
 extern void clockevents_resume(void);
 
-#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
-#ifdef CONFIG_ARCH_HAS_TICK_BROADCAST
-extern void tick_broadcast(const struct cpumask *mask);
-#else
-#define tick_broadcast	NULL
-#endif
-extern int tick_receive_broadcast(void);
-#endif
-
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 extern void clockevents_notify(unsigned long reason, void *arg);
 #else
diff --git a/trunk/include/linux/context_tracking.h b/trunk/include/linux/context_tracking.h
index b28d161c1091..e24339ccb7f0 100644
--- a/trunk/include/linux/context_tracking.h
+++ b/trunk/include/linux/context_tracking.h
@@ -3,40 +3,12 @@
 
 #ifdef CONFIG_CONTEXT_TRACKING
 #include <linux/sched.h>
-#include <linux/percpu.h>
-
-struct context_tracking {
-	/*
-	 * When active is false, probes are unset in order
-	 * to minimize overhead: TIF flags are cleared
-	 * and calls to user_enter/exit are ignored. This
-	 * may be further optimized using static keys.
-	 */
-	bool active;
-	enum {
-		IN_KERNEL = 0,
-		IN_USER,
-	} state;
-};
-
-DECLARE_PER_CPU(struct context_tracking, context_tracking);
-
-static inline bool context_tracking_in_user(void)
-{
-	return __this_cpu_read(context_tracking.state) == IN_USER;
-}
-
-static inline bool context_tracking_active(void)
-{
-	return __this_cpu_read(context_tracking.active);
-}
 
 extern void user_enter(void);
 extern void user_exit(void);
 extern void context_tracking_task_switch(struct task_struct *prev,
 					 struct task_struct *next);
 #else
-static inline bool context_tracking_in_user(void) { return false; }
 static inline void user_enter(void) { }
 static inline void user_exit(void) { }
 static inline void context_tracking_task_switch(struct task_struct *prev,
diff --git a/trunk/include/linux/ftrace.h b/trunk/include/linux/ftrace.h
index e5ca8ef50e9b..92691d85c320 100644
--- a/trunk/include/linux/ftrace.h
+++ b/trunk/include/linux/ftrace.h
@@ -74,7 +74,7 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
  * SAVE_REGS - The ftrace_ops wants regs saved at each function called
  *            and passed to the callback. If this flag is set, but the
  *            architecture does not support passing regs
- *            (CONFIG_DYNAMIC_FTRACE_WITH_REGS is not defined), then the
+ *            (ARCH_SUPPORTS_FTRACE_SAVE_REGS is not defined), then the
  *            ftrace_ops will fail to register, unless the next flag
  *            is set.
  * SAVE_REGS_IF_SUPPORTED - This is the same as SAVE_REGS, but if the
@@ -418,7 +418,7 @@ void ftrace_modify_all_code(int command);
 #endif
 
 #ifndef FTRACE_REGS_ADDR
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
 # define FTRACE_REGS_ADDR ((unsigned long)ftrace_regs_caller)
 #else
 # define FTRACE_REGS_ADDR FTRACE_ADDR
@@ -480,7 +480,7 @@ extern int ftrace_make_nop(struct module *mod,
  */
 extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
 
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
 /**
  * ftrace_modify_call - convert from one addr to another (no nop)
  * @rec: the mcount call site record
diff --git a/trunk/include/linux/ftrace_event.h b/trunk/include/linux/ftrace_event.h
index 13a54d0bdfa8..a3d489531d83 100644
--- a/trunk/include/linux/ftrace_event.h
+++ b/trunk/include/linux/ftrace_event.h
@@ -49,6 +49,7 @@ struct trace_entry {
 	unsigned char		flags;
 	unsigned char		preempt_count;
 	int			pid;
+	int			padding;
 };
 
 #define FTRACE_MAX_EVENT						\
@@ -83,9 +84,6 @@ struct trace_iterator {
 	long			idx;
 
 	cpumask_var_t		started;
-
-	/* it's true when current open file is snapshot */
-	bool			snapshot;
 };
 
 enum trace_iter_flags {
@@ -274,7 +272,7 @@ extern int trace_define_field(struct ftrace_event_call *call, const char *type,
 extern int trace_add_event_call(struct ftrace_event_call *call);
 extern void trace_remove_event_call(struct ftrace_event_call *call);
 
-#define is_signed_type(type)	(((type)(-1)) < (type)0)
+#define is_signed_type(type)	(((type)(-1)) < 0)
 
 int trace_set_clr_event(const char *system, const char *event, int set);
 
diff --git a/trunk/include/linux/hardirq.h b/trunk/include/linux/hardirq.h
index 29eb805ea4a6..624ef3f45c8e 100644
--- a/trunk/include/linux/hardirq.h
+++ b/trunk/include/linux/hardirq.h
@@ -153,7 +153,7 @@ extern void rcu_nmi_exit(void);
  */
 #define __irq_enter()					\
 	do {						\
-		account_irq_enter_time(current);	\
+		vtime_account_irq_enter(current);	\
 		add_preempt_count(HARDIRQ_OFFSET);	\
 		trace_hardirq_enter();			\
 	} while (0)
@@ -169,7 +169,7 @@ extern void irq_enter(void);
 #define __irq_exit()					\
 	do {						\
 		trace_hardirq_exit();			\
-		account_irq_exit_time(current);		\
+		vtime_account_irq_exit(current);	\
 		sub_preempt_count(HARDIRQ_OFFSET);	\
 	} while (0)
 
@@ -180,10 +180,10 @@ extern void irq_exit(void);
 
 #define nmi_enter()						\
 	do {							\
-		lockdep_off();					\
 		ftrace_nmi_enter();				\
 		BUG_ON(in_nmi());				\
 		add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET);	\
+		lockdep_off();					\
 		rcu_nmi_enter();				\
 		trace_hardirq_enter();				\
 	} while (0)
@@ -192,10 +192,10 @@ extern void irq_exit(void);
 	do {							\
 		trace_hardirq_exit();				\
 		rcu_nmi_exit();					\
+		lockdep_on();					\
 		BUG_ON(!in_nmi());				\
 		sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET);	\
 		ftrace_nmi_exit();				\
-		lockdep_on();					\
 	} while (0)
 
 #endif /* LINUX_HARDIRQ_H */
diff --git a/trunk/include/linux/init_task.h b/trunk/include/linux/init_task.h
index 5cd0f0949927..6d087c5f57f7 100644
--- a/trunk/include/linux/init_task.h
+++ b/trunk/include/linux/init_task.h
@@ -10,9 +10,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/user_namespace.h>
 #include <linux/securebits.h>
-#include <linux/seqlock.h>
 #include <net/net_namespace.h>
-#include <linux/sched/rt.h>
 
 #ifdef CONFIG_SMP
 # define INIT_PUSHABLE_TASKS(tsk)					\
@@ -143,15 +141,6 @@ extern struct task_group root_task_group;
 # define INIT_PERF_EVENTS(tsk)
 #endif
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-# define INIT_VTIME(tsk)						\
-	.vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock),	\
-	.vtime_snap = 0,				\
-	.vtime_snap_whence = VTIME_SYS,
-#else
-# define INIT_VTIME(tsk)
-#endif
-
 #define INIT_TASK_COMM "swapper"
 
 /*
@@ -221,7 +210,6 @@ extern struct task_group root_task_group;
 	INIT_TRACE_RECURSION						\
 	INIT_TASK_RCU_PREEMPT(tsk)					\
 	INIT_CPUSET_SEQ							\
-	INIT_VTIME(tsk)							\
 }
 
 
diff --git a/trunk/include/linux/irq.h b/trunk/include/linux/irq.h
index bc4e06611958..fdf2c4a238cc 100644
--- a/trunk/include/linux/irq.h
+++ b/trunk/include/linux/irq.h
@@ -509,11 +509,8 @@ static inline void irq_set_percpu_devid_flags(unsigned int irq)
 
 /* Handle dynamic irq creation and destruction */
 extern unsigned int create_irq_nr(unsigned int irq_want, int node);
-extern unsigned int __create_irqs(unsigned int from, unsigned int count,
-				  int node);
 extern int create_irq(void);
 extern void destroy_irq(unsigned int irq);
-extern void destroy_irqs(unsigned int irq, unsigned int count);
 
 /*
  * Dynamic irq helper functions. Obsolete. Use irq_alloc_desc* and
@@ -531,8 +528,6 @@ extern int irq_set_handler_data(unsigned int irq, void *data);
 extern int irq_set_chip_data(unsigned int irq, void *data);
 extern int irq_set_irq_type(unsigned int irq, unsigned int type);
 extern int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry);
-extern int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset,
-				struct msi_desc *entry);
 extern struct irq_data *irq_get_irq_data(unsigned int irq);
 
 static inline struct irq_chip *irq_get_chip(unsigned int irq)
@@ -595,9 +590,6 @@ int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
 #define irq_alloc_desc_from(from, node)		\
 	irq_alloc_descs(-1, from, 1, node)
 
-#define irq_alloc_descs_from(from, cnt, node)	\
-	irq_alloc_descs(-1, from, cnt, node)
-
 void irq_free_descs(unsigned int irq, unsigned int cnt);
 int irq_reserve_irqs(unsigned int from, unsigned int cnt);
 
diff --git a/trunk/include/linux/irq_work.h b/trunk/include/linux/irq_work.h
index f5dbce50466e..6a9e8f5399e2 100644
--- a/trunk/include/linux/irq_work.h
+++ b/trunk/include/linux/irq_work.h
@@ -3,20 +3,6 @@
 
 #include <linux/llist.h>
 
-/*
- * An entry can be in one of four states:
- *
- * free	     NULL, 0 -> {claimed}       : free to be used
- * claimed   NULL, 3 -> {pending}       : claimed to be enqueued
- * pending   next, 3 -> {busy}          : queued, pending callback
- * busy      NULL, 2 -> {free, claimed} : callback in progress, can be claimed
- */
-
-#define IRQ_WORK_PENDING	1UL
-#define IRQ_WORK_BUSY		2UL
-#define IRQ_WORK_FLAGS		3UL
-#define IRQ_WORK_LAZY		4UL /* Doesn't want IPI, wait for tick */
-
 struct irq_work {
 	unsigned long flags;
 	struct llist_node llnode;
@@ -30,14 +16,8 @@ void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *))
 	work->func = func;
 }
 
-void irq_work_queue(struct irq_work *work);
+bool irq_work_queue(struct irq_work *work);
 void irq_work_run(void);
 void irq_work_sync(struct irq_work *work);
 
-#ifdef CONFIG_IRQ_WORK
-bool irq_work_needs_cpu(void);
-#else
-static bool irq_work_needs_cpu(void) { return false; }
-#endif
-
 #endif /* _LINUX_IRQ_WORK_H */
diff --git a/trunk/include/linux/kernel_stat.h b/trunk/include/linux/kernel_stat.h
index ed5f6ed6eb77..66b70780e910 100644
--- a/trunk/include/linux/kernel_stat.h
+++ b/trunk/include/linux/kernel_stat.h
@@ -127,7 +127,7 @@ extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t)
 extern void account_steal_time(cputime_t);
 extern void account_idle_time(cputime_t);
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 static inline void account_process_tick(struct task_struct *tsk, int user)
 {
 	vtime_account_user(tsk);
diff --git a/trunk/include/linux/kprobes.h b/trunk/include/linux/kprobes.h
index 4b6ef4d33cc2..23755ba42abc 100644
--- a/trunk/include/linux/kprobes.h
+++ b/trunk/include/linux/kprobes.h
@@ -49,6 +49,16 @@
 #define KPROBE_REENTER		0x00000004
 #define KPROBE_HIT_SSDONE	0x00000008
 
+/*
+ * If function tracer is enabled and the arch supports full
+ * passing of pt_regs to function tracing, then kprobes can
+ * optimize on top of function tracing.
+ */
+#if defined(CONFIG_FUNCTION_TRACER) && defined(ARCH_SUPPORTS_FTRACE_SAVE_REGS) \
+	&& defined(ARCH_SUPPORTS_KPROBES_ON_FTRACE)
+# define KPROBES_CAN_USE_FTRACE
+#endif
+
 /* Attach to insert probes on any functions which should be ignored*/
 #define __kprobes	__attribute__((__section__(".kprobes.text")))
 
@@ -306,7 +316,7 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table,
 #endif
 
 #endif /* CONFIG_OPTPROBES */
-#ifdef CONFIG_KPROBES_ON_FTRACE
+#ifdef KPROBES_CAN_USE_FTRACE
 extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 				  struct ftrace_ops *ops, struct pt_regs *regs);
 extern int arch_prepare_kprobe_ftrace(struct kprobe *p);
diff --git a/trunk/include/linux/kvm_host.h b/trunk/include/linux/kvm_host.h
index b7996a768eb2..2c497ab0d03d 100644
--- a/trunk/include/linux/kvm_host.h
+++ b/trunk/include/linux/kvm_host.h
@@ -22,7 +22,6 @@
 #include <linux/rcupdate.h>
 #include <linux/ratelimit.h>
 #include <linux/err.h>
-#include <linux/irqflags.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -741,52 +740,15 @@ static inline int kvm_deassign_device(struct kvm *kvm,
 }
 #endif /* CONFIG_IOMMU_API */
 
-static inline void __guest_enter(void)
+static inline void kvm_guest_enter(void)
 {
+	BUG_ON(preemptible());
 	/*
 	 * This is running in ioctl context so we can avoid
 	 * the call to vtime_account() with its unnecessary idle check.
 	 */
-	vtime_account_system(current);
+	vtime_account_system_irqsafe(current);
 	current->flags |= PF_VCPU;
-}
-
-static inline void __guest_exit(void)
-{
-	/*
-	 * This is running in ioctl context so we can avoid
-	 * the call to vtime_account() with its unnecessary idle check.
-	 */
-	vtime_account_system(current);
-	current->flags &= ~PF_VCPU;
-}
-
-#ifdef CONFIG_CONTEXT_TRACKING
-extern void guest_enter(void);
-extern void guest_exit(void);
-
-#else /* !CONFIG_CONTEXT_TRACKING */
-static inline void guest_enter(void)
-{
-	__guest_enter();
-}
-
-static inline void guest_exit(void)
-{
-	__guest_exit();
-}
-#endif /* !CONFIG_CONTEXT_TRACKING */
-
-static inline void kvm_guest_enter(void)
-{
-	unsigned long flags;
-
-	BUG_ON(preemptible());
-
-	local_irq_save(flags);
-	guest_enter();
-	local_irq_restore(flags);
-
 	/* KVM does not hold any references to rcu protected data when it
 	 * switches CPU into a guest mode. In fact switching to a guest mode
 	 * is very similar to exiting to userspase from rcu point of view. In
@@ -799,11 +761,12 @@ static inline void kvm_guest_enter(void)
 
 static inline void kvm_guest_exit(void)
 {
-	unsigned long flags;
-
-	local_irq_save(flags);
-	guest_exit();
-	local_irq_restore(flags);
+	/*
+	 * This is running in ioctl context so we can avoid
+	 * the call to vtime_account() with its unnecessary idle check.
+	 */
+	vtime_account_system_irqsafe(current);
+	current->flags &= ~PF_VCPU;
 }
 
 /*
diff --git a/trunk/include/linux/pci.h b/trunk/include/linux/pci.h
index 6fa4dd2a3b9e..15472d691ee6 100644
--- a/trunk/include/linux/pci.h
+++ b/trunk/include/linux/pci.h
@@ -1101,12 +1101,6 @@ static inline int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
 	return -1;
 }
 
-static inline int
-pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec)
-{
-	return -1;
-}
-
 static inline void pci_msi_shutdown(struct pci_dev *dev)
 { }
 static inline void pci_disable_msi(struct pci_dev *dev)
@@ -1138,7 +1132,6 @@ static inline int pci_msi_enabled(void)
 }
 #else
 extern int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec);
-extern int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec);
 extern void pci_msi_shutdown(struct pci_dev *dev);
 extern void pci_disable_msi(struct pci_dev *dev);
 extern int pci_msix_table_size(struct pci_dev *dev);
diff --git a/trunk/include/linux/perf_event.h b/trunk/include/linux/perf_event.h
index e47ee462c2f2..6bfb2faa0b19 100644
--- a/trunk/include/linux/perf_event.h
+++ b/trunk/include/linux/perf_event.h
@@ -135,21 +135,16 @@ struct hw_perf_event {
 		struct { /* software */
 			struct hrtimer	hrtimer;
 		};
-		struct { /* tracepoint */
-			struct task_struct	*tp_target;
-			/* for tp_event->class */
-			struct list_head	tp_list;
-		};
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 		struct { /* breakpoint */
+			struct arch_hw_breakpoint	info;
+			struct list_head		bp_list;
 			/*
 			 * Crufty hack to avoid the chicken and egg
 			 * problem hw_breakpoint has with context
 			 * creation and event initalization.
 			 */
 			struct task_struct		*bp_target;
-			struct arch_hw_breakpoint	info;
-			struct list_head		bp_list;
 		};
 #endif
 	};
@@ -822,17 +817,6 @@ do {									\
 } while (0)
 
 
-struct perf_pmu_events_attr {
-	struct device_attribute attr;
-	u64 id;
-};
-
-#define PMU_EVENT_ATTR(_name, _var, _id, _show)				\
-static struct perf_pmu_events_attr _var = {				\
-	.attr = __ATTR(_name, 0444, _show, NULL),			\
-	.id   =  _id,							\
-};
-
 #define PMU_FORMAT_ATTR(_name, _format)					\
 static ssize_t								\
 _name##_show(struct device *dev,					\
diff --git a/trunk/include/linux/printk.h b/trunk/include/linux/printk.h
index 86c4b6294713..9afc01e5a0a6 100644
--- a/trunk/include/linux/printk.h
+++ b/trunk/include/linux/printk.h
@@ -98,6 +98,9 @@ int no_printk(const char *fmt, ...)
 extern asmlinkage __printf(1, 2)
 void early_printk(const char *fmt, ...);
 
+extern int printk_needs_cpu(int cpu);
+extern void printk_tick(void);
+
 #ifdef CONFIG_PRINTK
 asmlinkage __printf(5, 0)
 int vprintk_emit(int facility, int level,
diff --git a/trunk/include/linux/profile.h b/trunk/include/linux/profile.h
index 21123902366d..a0fc32279fc0 100644
--- a/trunk/include/linux/profile.h
+++ b/trunk/include/linux/profile.h
@@ -82,6 +82,9 @@ int task_handoff_unregister(struct notifier_block * n);
 int profile_event_register(enum profile_type, struct notifier_block * n);
 int profile_event_unregister(enum profile_type, struct notifier_block * n);
 
+int register_timer_hook(int (*hook)(struct pt_regs *));
+void unregister_timer_hook(int (*hook)(struct pt_regs *));
+
 struct pt_regs;
 
 #else
@@ -132,6 +135,16 @@ static inline int profile_event_unregister(enum profile_type t, struct notifier_
 #define profile_handoff_task(a) (0)
 #define profile_munmap(a) do { } while (0)
 
+static inline int register_timer_hook(int (*hook)(struct pt_regs *))
+{
+	return -ENOSYS;
+}
+
+static inline void unregister_timer_hook(int (*hook)(struct pt_regs *))
+{
+	return;
+}
+
 #endif /* CONFIG_PROFILING */
 
 #endif /* _LINUX_PROFILE_H */
diff --git a/trunk/include/linux/rcupdate.h b/trunk/include/linux/rcupdate.h
index b758ce17b309..275aa3f1062d 100644
--- a/trunk/include/linux/rcupdate.h
+++ b/trunk/include/linux/rcupdate.h
@@ -53,10 +53,7 @@ extern int rcutorture_runnable; /* for sysctl */
 extern void rcutorture_record_test_transition(void);
 extern void rcutorture_record_progress(unsigned long vernum);
 extern void do_trace_rcu_torture_read(char *rcutorturename,
-				      struct rcu_head *rhp,
-				      unsigned long secs,
-				      unsigned long c_old,
-				      unsigned long c);
+				      struct rcu_head *rhp);
 #else
 static inline void rcutorture_record_test_transition(void)
 {
@@ -66,13 +63,9 @@ static inline void rcutorture_record_progress(unsigned long vernum)
 }
 #ifdef CONFIG_RCU_TRACE
 extern void do_trace_rcu_torture_read(char *rcutorturename,
-				      struct rcu_head *rhp,
-				      unsigned long secs,
-				      unsigned long c_old,
-				      unsigned long c);
+				      struct rcu_head *rhp);
 #else
-#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
-	do { } while (0)
+#define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
 #endif
 #endif
 
@@ -756,7 +749,7 @@ static inline void rcu_preempt_sleep_check(void)
  * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU)
  * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may
  * be preempted, but explicit blocking is illegal.  Finally, in preemptible
- * RCU implementations in real-time (with -rt patchset) kernel builds,
+ * RCU implementations in real-time (CONFIG_PREEMPT_RT) kernel builds,
  * RCU read-side critical sections may be preempted and they may also
  * block, but only when acquiring spinlocks that are subject to priority
  * inheritance.
diff --git a/trunk/include/linux/ring_buffer.h b/trunk/include/linux/ring_buffer.h
index 1342e69542f3..519777e3fa01 100644
--- a/trunk/include/linux/ring_buffer.h
+++ b/trunk/include/linux/ring_buffer.h
@@ -167,7 +167,6 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu);
 unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
 unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu);
 unsigned long ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu);
-unsigned long ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu);
 
 u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu);
 void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
diff --git a/trunk/include/linux/rtc.h b/trunk/include/linux/rtc.h
index 11d05f9fe8b6..9531845c419f 100644
--- a/trunk/include/linux/rtc.h
+++ b/trunk/include/linux/rtc.h
@@ -138,7 +138,6 @@ extern void rtc_device_unregister(struct rtc_device *rtc);
 extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm);
 extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm);
 extern int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs);
-extern int rtc_set_ntp_time(struct timespec now);
 int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm);
 extern int rtc_read_alarm(struct rtc_device *rtc,
 			struct rtc_wkalrm *alrm);
diff --git a/trunk/include/linux/sched.h b/trunk/include/linux/sched.h
index 33cc42130371..d2112477ff5e 100644
--- a/trunk/include/linux/sched.h
+++ b/trunk/include/linux/sched.h
@@ -304,6 +304,19 @@ static inline void lockup_detector_init(void)
 }
 #endif
 
+#ifdef CONFIG_DETECT_HUNG_TASK
+extern unsigned int  sysctl_hung_task_panic;
+extern unsigned long sysctl_hung_task_check_count;
+extern unsigned long sysctl_hung_task_timeout_secs;
+extern unsigned long sysctl_hung_task_warnings;
+extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
+					 void __user *buffer,
+					 size_t *lenp, loff_t *ppos);
+#else
+/* Avoid need for ifdefs elsewhere in the code */
+enum { sysctl_hung_task_timeout_secs = 0 };
+#endif
+
 /* Attach to any functions which should be ignored in wchan output. */
 #define __sched		__attribute__((__section__(".sched.text")))
 
@@ -325,6 +338,23 @@ extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner);
 struct nsproxy;
 struct user_namespace;
 
+/*
+ * Default maximum number of active map areas, this limits the number of vmas
+ * per mm struct. Users can overwrite this number by sysctl but there is a
+ * problem.
+ *
+ * When a program's coredump is generated as ELF format, a section is created
+ * per a vma. In ELF, the number of sections is represented in unsigned short.
+ * This means the number of sections should be smaller than 65535 at coredump.
+ * Because the kernel adds some informative sections to a image of program at
+ * generating coredump, we need some margin. The number of extra sections is
+ * 1-3 now and depends on arch. We use "5" as safe margin, here.
+ */
+#define MAPCOUNT_ELF_CORE_MARGIN	(5)
+#define DEFAULT_MAX_MAP_COUNT	(USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
+
+extern int sysctl_max_map_count;
+
 #include <linux/aio.h>
 
 #ifdef CONFIG_MMU
@@ -1164,7 +1194,6 @@ struct sched_entity {
 	/* rq "owned" by this entity/group: */
 	struct cfs_rq		*my_q;
 #endif
-
 /*
  * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be
  * removed when useful for applications beyond shares distribution (e.g.
@@ -1179,7 +1208,6 @@ struct sched_entity {
 struct sched_rt_entity {
 	struct list_head run_list;
 	unsigned long timeout;
-	unsigned long watchdog_stamp;
 	unsigned int time_slice;
 
 	struct sched_rt_entity *back;
@@ -1192,6 +1220,11 @@ struct sched_rt_entity {
 #endif
 };
 
+/*
+ * default timeslice is 100 msecs (used only for SCHED_RR tasks).
+ * Timeslices get refilled after they expire.
+ */
+#define RR_TIMESLICE		(100 * HZ / 1000)
 
 struct rcu_node;
 
@@ -1334,15 +1367,6 @@ struct task_struct {
 	cputime_t gtime;
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
 	struct cputime prev_cputime;
-#endif
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-	seqlock_t vtime_seqlock;
-	unsigned long long vtime_snap;
-	enum {
-		VTIME_SLEEPING = 0,
-		VTIME_USER,
-		VTIME_SYS,
-	} vtime_snap_whence;
 #endif
 	unsigned long nvcsw, nivcsw; /* context switch counts */
 	struct timespec start_time; 		/* monotonic time */
@@ -1598,6 +1622,37 @@ static inline void set_numabalancing_state(bool enabled)
 }
 #endif
 
+/*
+ * Priority of a process goes from 0..MAX_PRIO-1, valid RT
+ * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
+ * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
+ * values are inverted: lower p->prio value means higher priority.
+ *
+ * The MAX_USER_RT_PRIO value allows the actual maximum
+ * RT priority to be separate from the value exported to
+ * user-space.  This allows kernel threads to set their
+ * priority to a value higher than any user task. Note:
+ * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
+ */
+
+#define MAX_USER_RT_PRIO	100
+#define MAX_RT_PRIO		MAX_USER_RT_PRIO
+
+#define MAX_PRIO		(MAX_RT_PRIO + 40)
+#define DEFAULT_PRIO		(MAX_RT_PRIO + 20)
+
+static inline int rt_prio(int prio)
+{
+	if (unlikely(prio < MAX_RT_PRIO))
+		return 1;
+	return 0;
+}
+
+static inline int rt_task(struct task_struct *p)
+{
+	return rt_prio(p->prio);
+}
+
 static inline struct pid *task_pid(struct task_struct *task)
 {
 	return task->pids[PIDTYPE_PID].pid;
@@ -1737,37 +1792,6 @@ static inline void put_task_struct(struct task_struct *t)
 		__put_task_struct(t);
 }
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-extern void task_cputime(struct task_struct *t,
-			 cputime_t *utime, cputime_t *stime);
-extern void task_cputime_scaled(struct task_struct *t,
-				cputime_t *utimescaled, cputime_t *stimescaled);
-extern cputime_t task_gtime(struct task_struct *t);
-#else
-static inline void task_cputime(struct task_struct *t,
-				cputime_t *utime, cputime_t *stime)
-{
-	if (utime)
-		*utime = t->utime;
-	if (stime)
-		*stime = t->stime;
-}
-
-static inline void task_cputime_scaled(struct task_struct *t,
-				       cputime_t *utimescaled,
-				       cputime_t *stimescaled)
-{
-	if (utimescaled)
-		*utimescaled = t->utimescaled;
-	if (stimescaled)
-		*stimescaled = t->stimescaled;
-}
-
-static inline cputime_t task_gtime(struct task_struct *t)
-{
-	return t->gtime;
-}
-#endif
 extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
 extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
 
@@ -2009,7 +2033,58 @@ extern void wake_up_idle_cpu(int cpu);
 static inline void wake_up_idle_cpu(int cpu) { }
 #endif
 
+extern unsigned int sysctl_sched_latency;
+extern unsigned int sysctl_sched_min_granularity;
+extern unsigned int sysctl_sched_wakeup_granularity;
+extern unsigned int sysctl_sched_child_runs_first;
+
+enum sched_tunable_scaling {
+	SCHED_TUNABLESCALING_NONE,
+	SCHED_TUNABLESCALING_LOG,
+	SCHED_TUNABLESCALING_LINEAR,
+	SCHED_TUNABLESCALING_END,
+};
+extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
+
+extern unsigned int sysctl_numa_balancing_scan_delay;
+extern unsigned int sysctl_numa_balancing_scan_period_min;
+extern unsigned int sysctl_numa_balancing_scan_period_max;
+extern unsigned int sysctl_numa_balancing_scan_period_reset;
+extern unsigned int sysctl_numa_balancing_scan_size;
+extern unsigned int sysctl_numa_balancing_settle_count;
+
+#ifdef CONFIG_SCHED_DEBUG
+extern unsigned int sysctl_sched_migration_cost;
+extern unsigned int sysctl_sched_nr_migrate;
+extern unsigned int sysctl_sched_time_avg;
+extern unsigned int sysctl_timer_migration;
+extern unsigned int sysctl_sched_shares_window;
+
+int sched_proc_update_handler(struct ctl_table *table, int write,
+		void __user *buffer, size_t *length,
+		loff_t *ppos);
+#endif
+#ifdef CONFIG_SCHED_DEBUG
+static inline unsigned int get_sysctl_timer_migration(void)
+{
+	return sysctl_timer_migration;
+}
+#else
+static inline unsigned int get_sysctl_timer_migration(void)
+{
+	return 1;
+}
+#endif
+extern unsigned int sysctl_sched_rt_period;
+extern int sysctl_sched_rt_runtime;
+
+int sched_rt_handler(struct ctl_table *table, int write,
+		void __user *buffer, size_t *lenp,
+		loff_t *ppos);
+
 #ifdef CONFIG_SCHED_AUTOGROUP
+extern unsigned int sysctl_sched_autogroup_enabled;
+
 extern void sched_autogroup_create_attach(struct task_struct *p);
 extern void sched_autogroup_detach(struct task_struct *p);
 extern void sched_autogroup_fork(struct signal_struct *sig);
@@ -2025,6 +2100,30 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { }
 static inline void sched_autogroup_exit(struct signal_struct *sig) { }
 #endif
 
+#ifdef CONFIG_CFS_BANDWIDTH
+extern unsigned int sysctl_sched_cfs_bandwidth_slice;
+#endif
+
+#ifdef CONFIG_RT_MUTEXES
+extern int rt_mutex_getprio(struct task_struct *p);
+extern void rt_mutex_setprio(struct task_struct *p, int prio);
+extern void rt_mutex_adjust_pi(struct task_struct *p);
+static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
+{
+	return tsk->pi_blocked_on != NULL;
+}
+#else
+static inline int rt_mutex_getprio(struct task_struct *p)
+{
+	return p->normal_prio;
+}
+# define rt_mutex_adjust_pi(p)		do { } while (0)
+static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
+{
+	return false;
+}
+#endif
+
 extern bool yield_to(struct task_struct *p, bool preempt);
 extern void set_user_nice(struct task_struct *p, long nice);
 extern int task_prio(const struct task_struct *p);
@@ -2654,6 +2753,8 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
 
+extern void normalize_rt_tasks(void);
+
 #ifdef CONFIG_CGROUP_SCHED
 
 extern struct task_group root_task_group;
diff --git a/trunk/include/linux/sched/rt.h b/trunk/include/linux/sched/rt.h
deleted file mode 100644
index 94e19ea28fc3..000000000000
--- a/trunk/include/linux/sched/rt.h
+++ /dev/null
@@ -1,58 +0,0 @@
-#ifndef _SCHED_RT_H
-#define _SCHED_RT_H
-
-/*
- * Priority of a process goes from 0..MAX_PRIO-1, valid RT
- * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
- * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
- * values are inverted: lower p->prio value means higher priority.
- *
- * The MAX_USER_RT_PRIO value allows the actual maximum
- * RT priority to be separate from the value exported to
- * user-space.  This allows kernel threads to set their
- * priority to a value higher than any user task. Note:
- * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
- */
-
-#define MAX_USER_RT_PRIO	100
-#define MAX_RT_PRIO		MAX_USER_RT_PRIO
-
-#define MAX_PRIO		(MAX_RT_PRIO + 40)
-#define DEFAULT_PRIO		(MAX_RT_PRIO + 20)
-
-static inline int rt_prio(int prio)
-{
-	if (unlikely(prio < MAX_RT_PRIO))
-		return 1;
-	return 0;
-}
-
-static inline int rt_task(struct task_struct *p)
-{
-	return rt_prio(p->prio);
-}
-
-#ifdef CONFIG_RT_MUTEXES
-extern int rt_mutex_getprio(struct task_struct *p);
-extern void rt_mutex_setprio(struct task_struct *p, int prio);
-extern void rt_mutex_adjust_pi(struct task_struct *p);
-static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
-{
-	return tsk->pi_blocked_on != NULL;
-}
-#else
-static inline int rt_mutex_getprio(struct task_struct *p)
-{
-	return p->normal_prio;
-}
-# define rt_mutex_adjust_pi(p)		do { } while (0)
-static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
-{
-	return false;
-}
-#endif
-
-extern void normalize_rt_tasks(void);
-
-
-#endif /* _SCHED_RT_H */
diff --git a/trunk/include/linux/sched/sysctl.h b/trunk/include/linux/sched/sysctl.h
deleted file mode 100644
index d2bb0ae979d0..000000000000
--- a/trunk/include/linux/sched/sysctl.h
+++ /dev/null
@@ -1,110 +0,0 @@
-#ifndef _SCHED_SYSCTL_H
-#define _SCHED_SYSCTL_H
-
-#ifdef CONFIG_DETECT_HUNG_TASK
-extern unsigned int  sysctl_hung_task_panic;
-extern unsigned long sysctl_hung_task_check_count;
-extern unsigned long sysctl_hung_task_timeout_secs;
-extern unsigned long sysctl_hung_task_warnings;
-extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
-					 void __user *buffer,
-					 size_t *lenp, loff_t *ppos);
-#else
-/* Avoid need for ifdefs elsewhere in the code */
-enum { sysctl_hung_task_timeout_secs = 0 };
-#endif
-
-/*
- * Default maximum number of active map areas, this limits the number of vmas
- * per mm struct. Users can overwrite this number by sysctl but there is a
- * problem.
- *
- * When a program's coredump is generated as ELF format, a section is created
- * per a vma. In ELF, the number of sections is represented in unsigned short.
- * This means the number of sections should be smaller than 65535 at coredump.
- * Because the kernel adds some informative sections to a image of program at
- * generating coredump, we need some margin. The number of extra sections is
- * 1-3 now and depends on arch. We use "5" as safe margin, here.
- */
-#define MAPCOUNT_ELF_CORE_MARGIN	(5)
-#define DEFAULT_MAX_MAP_COUNT	(USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
-
-extern int sysctl_max_map_count;
-
-extern unsigned int sysctl_sched_latency;
-extern unsigned int sysctl_sched_min_granularity;
-extern unsigned int sysctl_sched_wakeup_granularity;
-extern unsigned int sysctl_sched_child_runs_first;
-
-enum sched_tunable_scaling {
-	SCHED_TUNABLESCALING_NONE,
-	SCHED_TUNABLESCALING_LOG,
-	SCHED_TUNABLESCALING_LINEAR,
-	SCHED_TUNABLESCALING_END,
-};
-extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
-
-extern unsigned int sysctl_numa_balancing_scan_delay;
-extern unsigned int sysctl_numa_balancing_scan_period_min;
-extern unsigned int sysctl_numa_balancing_scan_period_max;
-extern unsigned int sysctl_numa_balancing_scan_period_reset;
-extern unsigned int sysctl_numa_balancing_scan_size;
-extern unsigned int sysctl_numa_balancing_settle_count;
-
-#ifdef CONFIG_SCHED_DEBUG
-extern unsigned int sysctl_sched_migration_cost;
-extern unsigned int sysctl_sched_nr_migrate;
-extern unsigned int sysctl_sched_time_avg;
-extern unsigned int sysctl_timer_migration;
-extern unsigned int sysctl_sched_shares_window;
-
-int sched_proc_update_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *length,
-		loff_t *ppos);
-#endif
-#ifdef CONFIG_SCHED_DEBUG
-static inline unsigned int get_sysctl_timer_migration(void)
-{
-	return sysctl_timer_migration;
-}
-#else
-static inline unsigned int get_sysctl_timer_migration(void)
-{
-	return 1;
-}
-#endif
-
-/*
- *  control realtime throttling:
- *
- *  /proc/sys/kernel/sched_rt_period_us
- *  /proc/sys/kernel/sched_rt_runtime_us
- */
-extern unsigned int sysctl_sched_rt_period;
-extern int sysctl_sched_rt_runtime;
-
-#ifdef CONFIG_CFS_BANDWIDTH
-extern unsigned int sysctl_sched_cfs_bandwidth_slice;
-#endif
-
-#ifdef CONFIG_SCHED_AUTOGROUP
-extern unsigned int sysctl_sched_autogroup_enabled;
-#endif
-
-/*
- * default timeslice is 100 msecs (used only for SCHED_RR tasks).
- * Timeslices get refilled after they expire.
- */
-#define RR_TIMESLICE		(100 * HZ / 1000)
-
-extern int sched_rr_timeslice;
-
-extern int sched_rr_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp,
-		loff_t *ppos);
-
-extern int sched_rt_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp,
-		loff_t *ppos);
-
-#endif /* _SCHED_SYSCTL_H */
diff --git a/trunk/include/linux/smpboot.h b/trunk/include/linux/smpboot.h
index c65dee059913..e0106d8581d3 100644
--- a/trunk/include/linux/smpboot.h
+++ b/trunk/include/linux/smpboot.h
@@ -14,8 +14,6 @@ struct smpboot_thread_data;
  * @thread_should_run:	Check whether the thread should run or not. Called with
  *			preemption disabled.
  * @thread_fn:		The associated thread function
- * @create:		Optional setup function, called when the thread gets
- *			created (Not called from the thread context)
  * @setup:		Optional setup function, called when the thread gets
  *			operational the first time
  * @cleanup:		Optional cleanup function, called when the thread
@@ -24,7 +22,6 @@ struct smpboot_thread_data;
  *			parked (cpu offline)
  * @unpark:		Optional unpark function, called when the thread is
  *			unparked (cpu online)
- * @selfparking:	Thread is not parked by the park function.
  * @thread_comm:	The base name of the thread
  */
 struct smp_hotplug_thread {
@@ -32,12 +29,10 @@ struct smp_hotplug_thread {
 	struct list_head		list;
 	int				(*thread_should_run)(unsigned int cpu);
 	void				(*thread_fn)(unsigned int cpu);
-	void				(*create)(unsigned int cpu);
 	void				(*setup)(unsigned int cpu);
 	void				(*cleanup)(unsigned int cpu, bool online);
 	void				(*park)(unsigned int cpu);
 	void				(*unpark)(unsigned int cpu);
-	bool				selfparking;
 	const char			*thread_comm;
 };
 
diff --git a/trunk/include/linux/srcu.h b/trunk/include/linux/srcu.h
index 04f4121a23ae..6eb691b08358 100644
--- a/trunk/include/linux/srcu.h
+++ b/trunk/include/linux/srcu.h
@@ -151,14 +151,30 @@ void srcu_barrier(struct srcu_struct *sp);
  * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
  * and while lockdep is disabled.
  *
- * Note that SRCU is based on its own statemachine and it doesn't
- * relies on normal RCU, it can be called from the CPU which
- * is in the idle loop from an RCU point of view or offline.
+ * Note that if the CPU is in the idle loop from an RCU point of view
+ * (ie: that we are in the section between rcu_idle_enter() and
+ * rcu_idle_exit()) then srcu_read_lock_held() returns false even if
+ * the CPU did an srcu_read_lock().  The reason for this is that RCU
+ * ignores CPUs that are in such a section, considering these as in
+ * extended quiescent state, so such a CPU is effectively never in an
+ * RCU read-side critical section regardless of what RCU primitives it
+ * invokes.  This state of affairs is required --- we need to keep an
+ * RCU-free window in idle where the CPU may possibly enter into low
+ * power mode. This way we can notice an extended quiescent state to
+ * other CPUs that started a grace period. Otherwise we would delay any
+ * grace period as long as we run in the idle task.
+ *
+ * Similarly, we avoid claiming an SRCU read lock held if the current
+ * CPU is offline.
  */
 static inline int srcu_read_lock_held(struct srcu_struct *sp)
 {
 	if (!debug_lockdep_rcu_enabled())
 		return 1;
+	if (rcu_is_cpu_idle())
+		return 0;
+	if (!rcu_lockdep_current_cpu_online())
+		return 0;
 	return lock_is_held(&sp->dep_map);
 }
 
@@ -220,6 +236,8 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
 	int retval = __srcu_read_lock(sp);
 
 	rcu_lock_acquire(&(sp)->dep_map);
+	rcu_lockdep_assert(!rcu_is_cpu_idle(),
+			   "srcu_read_lock() used illegally while idle");
 	return retval;
 }
 
@@ -233,6 +251,8 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
 static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
 	__releases(sp)
 {
+	rcu_lockdep_assert(!rcu_is_cpu_idle(),
+			   "srcu_read_unlock() used illegally while idle");
 	rcu_lock_release(&(sp)->dep_map);
 	__srcu_read_unlock(sp, idx);
 }
diff --git a/trunk/include/linux/tick.h b/trunk/include/linux/tick.h
index 553272e6af55..1a6567b48492 100644
--- a/trunk/include/linux/tick.h
+++ b/trunk/include/linux/tick.h
@@ -8,8 +8,6 @@
 
 #include <linux/clockchips.h>
 #include <linux/irqflags.h>
-#include <linux/percpu.h>
-#include <linux/hrtimer.h>
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 
@@ -124,26 +122,13 @@ static inline int tick_oneshot_mode_active(void) { return 0; }
 #endif /* !CONFIG_GENERIC_CLOCKEVENTS */
 
 # ifdef CONFIG_NO_HZ
-DECLARE_PER_CPU(struct tick_sched, tick_cpu_sched);
-
-static inline int tick_nohz_tick_stopped(void)
-{
-	return __this_cpu_read(tick_cpu_sched.tick_stopped);
-}
-
 extern void tick_nohz_idle_enter(void);
 extern void tick_nohz_idle_exit(void);
 extern void tick_nohz_irq_exit(void);
 extern ktime_t tick_nohz_get_sleep_length(void);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
-
-# else /* !CONFIG_NO_HZ */
-static inline int tick_nohz_tick_stopped(void)
-{
-	return 0;
-}
-
+# else
 static inline void tick_nohz_idle_enter(void) { }
 static inline void tick_nohz_idle_exit(void) { }
 
diff --git a/trunk/include/linux/time.h b/trunk/include/linux/time.h
index a3ab6a814a9c..4d358e9d10f1 100644
--- a/trunk/include/linux/time.h
+++ b/trunk/include/linux/time.h
@@ -115,20 +115,8 @@ static inline bool timespec_valid_strict(const struct timespec *ts)
 	return true;
 }
 
-extern bool persistent_clock_exist;
-
-#ifdef ALWAYS_USE_PERSISTENT_CLOCK
-#define has_persistent_clock()	true
-#else
-static inline bool has_persistent_clock(void)
-{
-	return persistent_clock_exist;
-}
-#endif
-
 extern void read_persistent_clock(struct timespec *ts);
 extern void read_boot_clock(struct timespec *ts);
-extern int persistent_clock_is_local;
 extern int update_persistent_clock(struct timespec now);
 void timekeeping_init(void);
 extern int timekeeping_suspended;
@@ -170,7 +158,6 @@ extern int do_setitimer(int which, struct itimerval *value,
 			struct itimerval *ovalue);
 extern unsigned int alarm_setitimer(unsigned int seconds);
 extern int do_getitimer(int which, struct itimerval *value);
-extern int __getnstimeofday(struct timespec *tv);
 extern void getnstimeofday(struct timespec *tv);
 extern void getrawmonotonic(struct timespec *ts);
 extern void getnstime_raw_and_real(struct timespec *ts_raw,
diff --git a/trunk/include/linux/tsacct_kern.h b/trunk/include/linux/tsacct_kern.h
index 3251965bf4cc..44893e5ec8f7 100644
--- a/trunk/include/linux/tsacct_kern.h
+++ b/trunk/include/linux/tsacct_kern.h
@@ -23,15 +23,12 @@ static inline void bacct_add_tsk(struct user_namespace *user_ns,
 #ifdef CONFIG_TASK_XACCT
 extern void xacct_add_tsk(struct taskstats *stats, struct task_struct *p);
 extern void acct_update_integrals(struct task_struct *tsk);
-extern void acct_account_cputime(struct task_struct *tsk);
 extern void acct_clear_integrals(struct task_struct *tsk);
 #else
 static inline void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
 {}
 static inline void acct_update_integrals(struct task_struct *tsk)
 {}
-static inline void acct_account_cputime(struct task_struct *tsk)
-{}
 static inline void acct_clear_integrals(struct task_struct *tsk)
 {}
 #endif /* CONFIG_TASK_XACCT */
diff --git a/trunk/include/linux/uprobes.h b/trunk/include/linux/uprobes.h
index 02b83db8e2c5..4f628a6fc5b4 100644
--- a/trunk/include/linux/uprobes.h
+++ b/trunk/include/linux/uprobes.h
@@ -35,20 +35,13 @@ struct inode;
 # include <asm/uprobes.h>
 #endif
 
-#define UPROBE_HANDLER_REMOVE		1
-#define UPROBE_HANDLER_MASK		1
-
-enum uprobe_filter_ctx {
-	UPROBE_FILTER_REGISTER,
-	UPROBE_FILTER_UNREGISTER,
-	UPROBE_FILTER_MMAP,
-};
-
 struct uprobe_consumer {
 	int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
-	bool (*filter)(struct uprobe_consumer *self,
-				enum uprobe_filter_ctx ctx,
-				struct mm_struct *mm);
+	/*
+	 * filter is optional; If a filter exists, handler is run
+	 * if and only if filter returns true.
+	 */
+	bool (*filter)(struct uprobe_consumer *self, struct task_struct *task);
 
 	struct uprobe_consumer *next;
 };
@@ -101,7 +94,6 @@ extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsign
 extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
 extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
 extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
-extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
 extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
 extern int uprobe_mmap(struct vm_area_struct *vma);
 extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end);
@@ -125,11 +117,6 @@ uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
 {
 	return -ENOSYS;
 }
-static inline int
-uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool add)
-{
-	return -ENOSYS;
-}
 static inline void
 uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
 {
diff --git a/trunk/include/linux/vtime.h b/trunk/include/linux/vtime.h
index 71a5782d8c59..ae30ab58431a 100644
--- a/trunk/include/linux/vtime.h
+++ b/trunk/include/linux/vtime.h
@@ -6,46 +6,15 @@ struct task_struct;
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 extern void vtime_task_switch(struct task_struct *prev);
 extern void vtime_account_system(struct task_struct *tsk);
+extern void vtime_account_system_irqsafe(struct task_struct *tsk);
 extern void vtime_account_idle(struct task_struct *tsk);
 extern void vtime_account_user(struct task_struct *tsk);
-extern void vtime_account_irq_enter(struct task_struct *tsk);
-
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-static inline bool vtime_accounting_enabled(void) { return true; }
-#endif
-
-#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
-
+extern void vtime_account(struct task_struct *tsk);
+#else
 static inline void vtime_task_switch(struct task_struct *prev) { }
 static inline void vtime_account_system(struct task_struct *tsk) { }
-static inline void vtime_account_user(struct task_struct *tsk) { }
-static inline void vtime_account_irq_enter(struct task_struct *tsk) { }
-static inline bool vtime_accounting_enabled(void) { return false; }
-#endif
-
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-extern void arch_vtime_task_switch(struct task_struct *tsk);
-extern void vtime_account_irq_exit(struct task_struct *tsk);
-extern bool vtime_accounting_enabled(void);
-extern void vtime_user_enter(struct task_struct *tsk);
-static inline void vtime_user_exit(struct task_struct *tsk)
-{
-	vtime_account_user(tsk);
-}
-extern void vtime_guest_enter(struct task_struct *tsk);
-extern void vtime_guest_exit(struct task_struct *tsk);
-extern void vtime_init_idle(struct task_struct *tsk);
-#else
-static inline void vtime_account_irq_exit(struct task_struct *tsk)
-{
-	/* On hard|softirq exit we always account to hard|softirq cputime */
-	vtime_account_system(tsk);
-}
-static inline void vtime_user_enter(struct task_struct *tsk) { }
-static inline void vtime_user_exit(struct task_struct *tsk) { }
-static inline void vtime_guest_enter(struct task_struct *tsk) { }
-static inline void vtime_guest_exit(struct task_struct *tsk) { }
-static inline void vtime_init_idle(struct task_struct *tsk) { }
+static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { }
+static inline void vtime_account(struct task_struct *tsk) { }
 #endif
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
@@ -54,15 +23,25 @@ extern void irqtime_account_irq(struct task_struct *tsk);
 static inline void irqtime_account_irq(struct task_struct *tsk) { }
 #endif
 
-static inline void account_irq_enter_time(struct task_struct *tsk)
+static inline void vtime_account_irq_enter(struct task_struct *tsk)
 {
-	vtime_account_irq_enter(tsk);
+	/*
+	 * Hardirq can interrupt idle task anytime. So we need vtime_account()
+	 * that performs the idle check in CONFIG_VIRT_CPU_ACCOUNTING.
+	 * Softirq can also interrupt idle task directly if it calls
+	 * local_bh_enable(). Such case probably don't exist but we never know.
+	 * Ksoftirqd is not concerned because idle time is flushed on context
+	 * switch. Softirqs in the end of hardirqs are also not a problem because
+	 * the idle time is flushed on hardirq time already.
+	 */
+	vtime_account(tsk);
 	irqtime_account_irq(tsk);
 }
 
-static inline void account_irq_exit_time(struct task_struct *tsk)
+static inline void vtime_account_irq_exit(struct task_struct *tsk)
 {
-	vtime_account_irq_exit(tsk);
+	/* On hard|softirq exit we always account to hard|softirq cputime */
+	vtime_account_system(tsk);
 	irqtime_account_irq(tsk);
 }
 
diff --git a/trunk/include/trace/events/ras.h b/trunk/include/trace/events/ras.h
deleted file mode 100644
index 88b878383797..000000000000
--- a/trunk/include/trace/events/ras.h
+++ /dev/null
@@ -1,77 +0,0 @@
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM ras
-
-#if !defined(_TRACE_AER_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_AER_H
-
-#include <linux/tracepoint.h>
-#include <linux/edac.h>
-
-
-/*
- * PCIe AER Trace event
- *
- * These events are generated when hardware detects a corrected or
- * uncorrected event on a PCIe device. The event report has
- * the following structure:
- *
- * char * dev_name -	The name of the slot where the device resides
- *			([domain:]bus:device.function).
- * u32 status -		Either the correctable or uncorrectable register
- *			indicating what error or errors have been seen
- * u8 severity -	error severity 0:NONFATAL 1:FATAL 2:CORRECTED
- */
-
-#define aer_correctable_errors		\
-	{BIT(0),	"Receiver Error"},		\
-	{BIT(6),	"Bad TLP"},			\
-	{BIT(7),	"Bad DLLP"},			\
-	{BIT(8),	"RELAY_NUM Rollover"},		\
-	{BIT(12),	"Replay Timer Timeout"},	\
-	{BIT(13),	"Advisory Non-Fatal"}
-
-#define aer_uncorrectable_errors		\
-	{BIT(4),	"Data Link Protocol"},		\
-	{BIT(12),	"Poisoned TLP"},		\
-	{BIT(13),	"Flow Control Protocol"},	\
-	{BIT(14),	"Completion Timeout"},		\
-	{BIT(15),	"Completer Abort"},		\
-	{BIT(16),	"Unexpected Completion"},	\
-	{BIT(17),	"Receiver Overflow"},		\
-	{BIT(18),	"Malformed TLP"},		\
-	{BIT(19),	"ECRC"},			\
-	{BIT(20),	"Unsupported Request"}
-
-TRACE_EVENT(aer_event,
-	TP_PROTO(const char *dev_name,
-		 const u32 status,
-		 const u8 severity),
-
-	TP_ARGS(dev_name, status, severity),
-
-	TP_STRUCT__entry(
-		__string(	dev_name,	dev_name	)
-		__field(	u32,		status		)
-		__field(	u8,		severity	)
-	),
-
-	TP_fast_assign(
-		__assign_str(dev_name, dev_name);
-		__entry->status		= status;
-		__entry->severity	= severity;
-	),
-
-	TP_printk("%s PCIe Bus Error: severity=%s, %s\n",
-		__get_str(dev_name),
-		__entry->severity == HW_EVENT_ERR_CORRECTED ? "Corrected" :
-			__entry->severity == HW_EVENT_ERR_FATAL ?
-			"Fatal" : "Uncorrected",
-		__entry->severity == HW_EVENT_ERR_CORRECTED ?
-		__print_flags(__entry->status, "|", aer_correctable_errors) :
-		__print_flags(__entry->status, "|", aer_uncorrectable_errors))
-);
-
-#endif /* _TRACE_AER_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/trunk/include/trace/events/rcu.h b/trunk/include/trace/events/rcu.h
index 1918e832da4f..d4f559b1ec34 100644
--- a/trunk/include/trace/events/rcu.h
+++ b/trunk/include/trace/events/rcu.h
@@ -44,10 +44,8 @@ TRACE_EVENT(rcu_utilization,
  * of a new grace period or the end of an old grace period ("cpustart"
  * and "cpuend", respectively), a CPU passing through a quiescent
  * state ("cpuqs"), a CPU coming online or going offline ("cpuonl"
- * and "cpuofl", respectively), a CPU being kicked for being too
- * long in dyntick-idle mode ("kick"), a CPU accelerating its new
- * callbacks to RCU_NEXT_READY_TAIL ("AccReadyCB"), and a CPU
- * accelerating its new callbacks to RCU_WAIT_TAIL ("AccWaitCB").
+ * and "cpuofl", respectively), and a CPU being kicked for being too
+ * long in dyntick-idle mode ("kick").
  */
 TRACE_EVENT(rcu_grace_period,
 
@@ -395,7 +393,7 @@ TRACE_EVENT(rcu_kfree_callback,
  */
 TRACE_EVENT(rcu_batch_start,
 
-	TP_PROTO(char *rcuname, long qlen_lazy, long qlen, long blimit),
+	TP_PROTO(char *rcuname, long qlen_lazy, long qlen, int blimit),
 
 	TP_ARGS(rcuname, qlen_lazy, qlen, blimit),
 
@@ -403,7 +401,7 @@ TRACE_EVENT(rcu_batch_start,
 		__field(char *, rcuname)
 		__field(long, qlen_lazy)
 		__field(long, qlen)
-		__field(long, blimit)
+		__field(int, blimit)
 	),
 
 	TP_fast_assign(
@@ -413,7 +411,7 @@ TRACE_EVENT(rcu_batch_start,
 		__entry->blimit = blimit;
 	),
 
-	TP_printk("%s CBs=%ld/%ld bl=%ld",
+	TP_printk("%s CBs=%ld/%ld bl=%d",
 		  __entry->rcuname, __entry->qlen_lazy, __entry->qlen,
 		  __entry->blimit)
 );
@@ -525,30 +523,22 @@ TRACE_EVENT(rcu_batch_end,
  */
 TRACE_EVENT(rcu_torture_read,
 
-	TP_PROTO(char *rcutorturename, struct rcu_head *rhp,
-		 unsigned long secs, unsigned long c_old, unsigned long c),
+	TP_PROTO(char *rcutorturename, struct rcu_head *rhp),
 
-	TP_ARGS(rcutorturename, rhp, secs, c_old, c),
+	TP_ARGS(rcutorturename, rhp),
 
 	TP_STRUCT__entry(
 		__field(char *, rcutorturename)
 		__field(struct rcu_head *, rhp)
-		__field(unsigned long, secs)
-		__field(unsigned long, c_old)
-		__field(unsigned long, c)
 	),
 
 	TP_fast_assign(
 		__entry->rcutorturename = rcutorturename;
 		__entry->rhp = rhp;
-		__entry->secs = secs;
-		__entry->c_old = c_old;
-		__entry->c = c;
 	),
 
-	TP_printk("%s torture read %p %luus c: %lu %lu",
-		  __entry->rcutorturename, __entry->rhp,
-		  __entry->secs, __entry->c_old, __entry->c)
+	TP_printk("%s torture read %p",
+		  __entry->rcutorturename, __entry->rhp)
 );
 
 /*
@@ -618,8 +608,7 @@ TRACE_EVENT(rcu_barrier,
 #define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0)
 #define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \
 	do { } while (0)
-#define trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
-	do { } while (0)
+#define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
 #define trace_rcu_barrier(name, s, cpu, cnt, done) do { } while (0)
 
 #endif /* #else #ifdef CONFIG_RCU_TRACE */
diff --git a/trunk/include/uapi/linux/auto_fs.h b/trunk/include/uapi/linux/auto_fs.h
index bb991dfe134f..77cdba9df274 100644
--- a/trunk/include/uapi/linux/auto_fs.h
+++ b/trunk/include/uapi/linux/auto_fs.h
@@ -28,16 +28,25 @@
 #define AUTOFS_MIN_PROTO_VERSION	AUTOFS_PROTO_VERSION
 
 /*
- * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed
- * back to the kernel via ioctl from userspace. On architectures where 32- and
- * 64-bit userspace binaries can be executed it's important that the size of
- * autofs_wqt_t stays constant between 32- and 64-bit Linux kernels so that we
- * do not break the binary ABI interface by changing the structure size.
+ * Architectures where both 32- and 64-bit binaries can be executed
+ * on 64-bit kernels need this.  This keeps the structure format
+ * uniform, and makes sure the wait_queue_token isn't too big to be
+ * passed back down to the kernel.
+ *
+ * This assumes that on these architectures:
+ * mode     32 bit    64 bit
+ * -------------------------
+ * int      32 bit    32 bit
+ * long     32 bit    64 bit
+ *
+ * If so, 32-bit user-space code should be backwards compatible.
  */
-#if defined(__ia64__) || defined(__alpha__) /* pure 64bit architectures */
-typedef unsigned long autofs_wqt_t;
-#else
+
+#if defined(__sparc__) || defined(__mips__) || defined(__x86_64__) \
+ || defined(__powerpc__) || defined(__s390__)
 typedef unsigned int autofs_wqt_t;
+#else
+typedef unsigned long autofs_wqt_t;
 #endif
 
 /* Packet types */
diff --git a/trunk/include/uapi/linux/perf_event.h b/trunk/include/uapi/linux/perf_event.h
index 9fa9c622a7f4..4f63c05d27c9 100644
--- a/trunk/include/uapi/linux/perf_event.h
+++ b/trunk/include/uapi/linux/perf_event.h
@@ -579,8 +579,7 @@ enum perf_event_type {
 	 *	{ u32			size;
 	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
 	 *
-	 *	{ u64                   nr;
-	 *        { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
+	 *	{ u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
 	 *
 	 * 	{ u64			abi; # enum perf_sample_regs_abi
 	 * 	  u64			regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
diff --git a/trunk/init/Kconfig b/trunk/init/Kconfig
index 7000d9657402..be8b7f55312d 100644
--- a/trunk/init/Kconfig
+++ b/trunk/init/Kconfig
@@ -20,8 +20,12 @@ config CONSTRUCTORS
 	bool
 	depends on !UML
 
+config HAVE_IRQ_WORK
+	bool
+
 config IRQ_WORK
 	bool
+	depends on HAVE_IRQ_WORK
 
 config BUILDTIME_EXTABLE_SORT
 	bool
@@ -322,13 +326,10 @@ source "kernel/time/Kconfig"
 
 menu "CPU/Task time and stats accounting"
 
-config VIRT_CPU_ACCOUNTING
-	bool
-
 choice
 	prompt "Cputime accounting"
 	default TICK_CPU_ACCOUNTING if !PPC64
-	default VIRT_CPU_ACCOUNTING_NATIVE if PPC64
+	default VIRT_CPU_ACCOUNTING if PPC64
 
 # Kind of a stub config for the pure tick based cputime accounting
 config TICK_CPU_ACCOUNTING
@@ -341,10 +342,9 @@ config TICK_CPU_ACCOUNTING
 
 	  If unsure, say Y.
 
-config VIRT_CPU_ACCOUNTING_NATIVE
+config VIRT_CPU_ACCOUNTING
 	bool "Deterministic task and CPU time accounting"
 	depends on HAVE_VIRT_CPU_ACCOUNTING
-	select VIRT_CPU_ACCOUNTING
 	help
 	  Select this option to enable more accurate task and CPU time
 	  accounting.  This is done by reading a CPU counter on each
@@ -354,23 +354,6 @@ config VIRT_CPU_ACCOUNTING_NATIVE
 	  this also enables accounting of stolen time on logically-partitioned
 	  systems.
 
-config VIRT_CPU_ACCOUNTING_GEN
-	bool "Full dynticks CPU time accounting"
-	depends on HAVE_CONTEXT_TRACKING && 64BIT
-	select VIRT_CPU_ACCOUNTING
-	select CONTEXT_TRACKING
-	help
-	  Select this option to enable task and CPU time accounting on full
-	  dynticks systems. This accounting is implemented by watching every
-	  kernel-user boundaries using the context tracking subsystem.
-	  The accounting is thus performed at the expense of some significant
-	  overhead.
-
-	  For now this is only useful if you are working on the full
-	  dynticks subsystem development.
-
-	  If unsure, say N.
-
 config IRQ_TIME_ACCOUNTING
 	bool "Fine granularity task level IRQ time accounting"
 	depends on HAVE_IRQ_TIME_ACCOUNTING
@@ -470,7 +453,7 @@ config TREE_RCU
 
 config TREE_PREEMPT_RCU
 	bool "Preemptible tree-based hierarchical RCU"
-	depends on PREEMPT
+	depends on PREEMPT && SMP
 	help
 	  This option selects the RCU implementation that is
 	  designed for very large SMP systems with hundreds or
@@ -478,8 +461,6 @@ config TREE_PREEMPT_RCU
 	  is also required.  It also scales down nicely to
 	  smaller systems.
 
-	  Select this option if you are unsure.
-
 config TINY_RCU
 	bool "UP-only small-memory-footprint RCU"
 	depends on !PREEMPT && !SMP
@@ -505,14 +486,6 @@ config PREEMPT_RCU
 	  This option enables preemptible-RCU code that is common between
 	  the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
 
-config RCU_STALL_COMMON
-	def_bool ( TREE_RCU || TREE_PREEMPT_RCU || RCU_TRACE )
-	help
-	  This option enables RCU CPU stall code that is common between
-	  the TINY and TREE variants of RCU.  The purpose is to allow
-	  the tiny variants to disable RCU CPU stall warnings, while
-	  making these warnings mandatory for the tree variants.
-
 config CONTEXT_TRACKING
        bool
 
@@ -1290,7 +1263,6 @@ config HOTPLUG
 config PRINTK
 	default y
 	bool "Enable support for printk" if EXPERT
-	select IRQ_WORK
 	help
 	  This option enables normal printk support. Removing it
 	  eliminates most of the message strings from the kernel image
diff --git a/trunk/init/init_task.c b/trunk/init/init_task.c
index ba0a7f362d9e..8b2f3996b035 100644
--- a/trunk/init/init_task.c
+++ b/trunk/init/init_task.c
@@ -2,8 +2,6 @@
 #include <linux/export.h>
 #include <linux/mqueue.h>
 #include <linux/sched.h>
-#include <linux/sched/sysctl.h>
-#include <linux/sched/rt.h>
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
diff --git a/trunk/kernel/acct.c b/trunk/kernel/acct.c
index e8b1627ab9c7..051e071a06e7 100644
--- a/trunk/kernel/acct.c
+++ b/trunk/kernel/acct.c
@@ -566,7 +566,6 @@ static void do_acct_process(struct bsd_acct_struct *acct,
 void acct_collect(long exitcode, int group_dead)
 {
 	struct pacct_struct *pacct = &current->signal->pacct;
-	cputime_t utime, stime;
 	unsigned long vsize = 0;
 
 	if (group_dead && current->mm) {
@@ -594,9 +593,8 @@ void acct_collect(long exitcode, int group_dead)
 		pacct->ac_flag |= ACORE;
 	if (current->flags & PF_SIGNALED)
 		pacct->ac_flag |= AXSIG;
-	task_cputime(current, &utime, &stime);
-	pacct->ac_utime += utime;
-	pacct->ac_stime += stime;
+	pacct->ac_utime += current->utime;
+	pacct->ac_stime += current->stime;
 	pacct->ac_minflt += current->min_flt;
 	pacct->ac_majflt += current->maj_flt;
 	spin_unlock_irq(&current->sighand->siglock);
diff --git a/trunk/kernel/context_tracking.c b/trunk/kernel/context_tracking.c
index 65349f07b878..e0e07fd55508 100644
--- a/trunk/kernel/context_tracking.c
+++ b/trunk/kernel/context_tracking.c
@@ -1,41 +1,29 @@
-/*
- * Context tracking: Probe on high level context boundaries such as kernel
- * and userspace. This includes syscalls and exceptions entry/exit.
- *
- * This is used by RCU to remove its dependency on the timer tick while a CPU
- * runs in userspace.
- *
- *  Started by Frederic Weisbecker:
- *
- * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
- *
- * Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton,
- * Steven Rostedt, Peter Zijlstra for suggestions and improvements.
- *
- */
-
 #include <linux/context_tracking.h>
-#include <linux/kvm_host.h>
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
+#include <linux/percpu.h>
 #include <linux/hardirq.h>
-#include <linux/export.h>
 
-DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
+struct context_tracking {
+	/*
+	 * When active is false, hooks are not set to
+	 * minimize overhead: TIF flags are cleared
+	 * and calls to user_enter/exit are ignored. This
+	 * may be further optimized using static keys.
+	 */
+	bool active;
+	enum {
+		IN_KERNEL = 0,
+		IN_USER,
+	} state;
+};
+
+static DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
 #ifdef CONFIG_CONTEXT_TRACKING_FORCE
 	.active = true,
 #endif
 };
 
-/**
- * user_enter - Inform the context tracking that the CPU is going to
- *              enter userspace mode.
- *
- * This function must be called right before we switch from the kernel
- * to userspace, when it's guaranteed the remaining kernel instructions
- * to execute won't use any RCU read side critical section because this
- * function sets RCU in extended quiescent state.
- */
 void user_enter(void)
 {
 	unsigned long flags;
@@ -51,90 +39,40 @@ void user_enter(void)
 	if (in_interrupt())
 		return;
 
-	/* Kernel threads aren't supposed to go to userspace */
 	WARN_ON_ONCE(!current->mm);
 
 	local_irq_save(flags);
 	if (__this_cpu_read(context_tracking.active) &&
 	    __this_cpu_read(context_tracking.state) != IN_USER) {
-		/*
-		 * At this stage, only low level arch entry code remains and
-		 * then we'll run in userspace. We can assume there won't be
-		 * any RCU read-side critical section until the next call to
-		 * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
-		 * on the tick.
-		 */
-		vtime_user_enter(current);
-		rcu_user_enter();
 		__this_cpu_write(context_tracking.state, IN_USER);
+		rcu_user_enter();
 	}
 	local_irq_restore(flags);
 }
 
-
-/**
- * user_exit - Inform the context tracking that the CPU is
- *             exiting userspace mode and entering the kernel.
- *
- * This function must be called after we entered the kernel from userspace
- * before any use of RCU read side critical section. This potentially include
- * any high level kernel code like syscalls, exceptions, signal handling, etc...
- *
- * This call supports re-entrancy. This way it can be called from any exception
- * handler without needing to know if we came from userspace or not.
- */
 void user_exit(void)
 {
 	unsigned long flags;
 
+	/*
+	 * Some contexts may involve an exception occuring in an irq,
+	 * leading to that nesting:
+	 * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
+	 * This would mess up the dyntick_nesting count though. And rcu_irq_*()
+	 * helpers are enough to protect RCU uses inside the exception. So
+	 * just return immediately if we detect we are in an IRQ.
+	 */
 	if (in_interrupt())
 		return;
 
 	local_irq_save(flags);
 	if (__this_cpu_read(context_tracking.state) == IN_USER) {
-		/*
-		 * We are going to run code that may use RCU. Inform
-		 * RCU core about that (ie: we may need the tick again).
-		 */
-		rcu_user_exit();
-		vtime_user_exit(current);
 		__this_cpu_write(context_tracking.state, IN_KERNEL);
+		rcu_user_exit();
 	}
 	local_irq_restore(flags);
 }
 
-void guest_enter(void)
-{
-	if (vtime_accounting_enabled())
-		vtime_guest_enter(current);
-	else
-		__guest_enter();
-}
-EXPORT_SYMBOL_GPL(guest_enter);
-
-void guest_exit(void)
-{
-	if (vtime_accounting_enabled())
-		vtime_guest_exit(current);
-	else
-		__guest_exit();
-}
-EXPORT_SYMBOL_GPL(guest_exit);
-
-
-/**
- * context_tracking_task_switch - context switch the syscall callbacks
- * @prev: the task that is being switched out
- * @next: the task that is being switched in
- *
- * The context tracking uses the syscall slow path to implement its user-kernel
- * boundaries probes on syscalls. This way it doesn't impact the syscall fast
- * path on CPUs that don't do context tracking.
- *
- * But we need to clear the flag on the previous task because it may later
- * migrate to some CPU that doesn't do the context tracking. As such the TIF
- * flag may not be desired there.
- */
 void context_tracking_task_switch(struct task_struct *prev,
 			     struct task_struct *next)
 {
diff --git a/trunk/kernel/cpu.c b/trunk/kernel/cpu.c
index b5e4ab2d427e..3046a503242c 100644
--- a/trunk/kernel/cpu.c
+++ b/trunk/kernel/cpu.c
@@ -224,13 +224,11 @@ void clear_tasks_mm_cpumask(int cpu)
 static inline void check_for_tasks(int cpu)
 {
 	struct task_struct *p;
-	cputime_t utime, stime;
 
 	write_lock_irq(&tasklist_lock);
 	for_each_process(p) {
-		task_cputime(p, &utime, &stime);
 		if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
-		    (utime || stime))
+		    (p->utime || p->stime))
 			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d "
 				"(state = %ld, flags = %x)\n",
 				p->comm, task_pid_nr(p), cpu,
@@ -256,8 +254,6 @@ static int __ref take_cpu_down(void *_param)
 		return err;
 
 	cpu_notify(CPU_DYING | param->mod, param->hcpu);
-	/* Park the stopper thread */
-	kthread_park(current);
 	return 0;
 }
 
diff --git a/trunk/kernel/delayacct.c b/trunk/kernel/delayacct.c
index d473988c1d0b..418b3f7053aa 100644
--- a/trunk/kernel/delayacct.c
+++ b/trunk/kernel/delayacct.c
@@ -106,7 +106,6 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
 	unsigned long long t2, t3;
 	unsigned long flags;
 	struct timespec ts;
-	cputime_t utime, stime, stimescaled, utimescaled;
 
 	/* Though tsk->delays accessed later, early exit avoids
 	 * unnecessary returning of other data
@@ -115,14 +114,12 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
 		goto done;
 
 	tmp = (s64)d->cpu_run_real_total;
-	task_cputime(tsk, &utime, &stime);
-	cputime_to_timespec(utime + stime, &ts);
+	cputime_to_timespec(tsk->utime + tsk->stime, &ts);
 	tmp += timespec_to_ns(&ts);
 	d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
 
 	tmp = (s64)d->cpu_scaled_run_real_total;
-	task_cputime_scaled(tsk, &utimescaled, &stimescaled);
-	cputime_to_timespec(utimescaled + stimescaled, &ts);
+	cputime_to_timespec(tsk->utimescaled + tsk->stimescaled, &ts);
 	tmp += timespec_to_ns(&ts);
 	d->cpu_scaled_run_real_total =
 		(tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp;
diff --git a/trunk/kernel/events/core.c b/trunk/kernel/events/core.c
index 5c75791d7269..7b6646a8c067 100644
--- a/trunk/kernel/events/core.c
+++ b/trunk/kernel/events/core.c
@@ -6171,14 +6171,11 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 
 	if (task) {
 		event->attach_state = PERF_ATTACH_TASK;
-
-		if (attr->type == PERF_TYPE_TRACEPOINT)
-			event->hw.tp_target = task;
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 		/*
 		 * hw_breakpoint is a bit difficult here..
 		 */
-		else if (attr->type == PERF_TYPE_BREAKPOINT)
+		if (attr->type == PERF_TYPE_BREAKPOINT)
 			event->hw.bp_target = task;
 #endif
 	}
diff --git a/trunk/kernel/events/hw_breakpoint.c b/trunk/kernel/events/hw_breakpoint.c
index a64f8aeb5c1f..fe8a916507ed 100644
--- a/trunk/kernel/events/hw_breakpoint.c
+++ b/trunk/kernel/events/hw_breakpoint.c
@@ -676,7 +676,7 @@ int __init init_hw_breakpoint(void)
  err_alloc:
 	for_each_possible_cpu(err_cpu) {
 		for (i = 0; i < TYPE_MAX; i++)
-			kfree(per_cpu(nr_task_bp_pinned[i], err_cpu));
+			kfree(per_cpu(nr_task_bp_pinned[i], cpu));
 		if (err_cpu == cpu)
 			break;
 	}
diff --git a/trunk/kernel/events/uprobes.c b/trunk/kernel/events/uprobes.c
index a567c8c7ef31..dea7acfbb071 100644
--- a/trunk/kernel/events/uprobes.c
+++ b/trunk/kernel/events/uprobes.c
@@ -27,7 +27,6 @@
 #include <linux/pagemap.h>	/* read_mapping_page */
 #include <linux/slab.h>
 #include <linux/sched.h>
-#include <linux/export.h>
 #include <linux/rmap.h>		/* anon_vma_prepare */
 #include <linux/mmu_notifier.h>	/* set_pte_at_notify */
 #include <linux/swap.h>		/* try_to_free_swap */
@@ -42,31 +41,58 @@
 #define MAX_UPROBE_XOL_SLOTS		UINSNS_PER_PAGE
 
 static struct rb_root uprobes_tree = RB_ROOT;
-/*
- * allows us to skip the uprobe_mmap if there are no uprobe events active
- * at this time.  Probably a fine grained per inode count is better?
- */
-#define no_uprobe_events()	RB_EMPTY_ROOT(&uprobes_tree)
 
 static DEFINE_SPINLOCK(uprobes_treelock);	/* serialize rbtree access */
 
 #define UPROBES_HASH_SZ	13
+
+/*
+ * We need separate register/unregister and mmap/munmap lock hashes because
+ * of mmap_sem nesting.
+ *
+ * uprobe_register() needs to install probes on (potentially) all processes
+ * and thus needs to acquire multiple mmap_sems (consequtively, not
+ * concurrently), whereas uprobe_mmap() is called while holding mmap_sem
+ * for the particular process doing the mmap.
+ *
+ * uprobe_register()->register_for_each_vma() needs to drop/acquire mmap_sem
+ * because of lock order against i_mmap_mutex. This means there's a hole in
+ * the register vma iteration where a mmap() can happen.
+ *
+ * Thus uprobe_register() can race with uprobe_mmap() and we can try and
+ * install a probe where one is already installed.
+ */
+
+/* serialize (un)register */
+static struct mutex uprobes_mutex[UPROBES_HASH_SZ];
+
+#define uprobes_hash(v)		(&uprobes_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
+
 /* serialize uprobe->pending_list */
 static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
 #define uprobes_mmap_hash(v)	(&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
 
 static struct percpu_rw_semaphore dup_mmap_sem;
 
+/*
+ * uprobe_events allows us to skip the uprobe_mmap if there are no uprobe
+ * events active at this time.  Probably a fine grained per inode count is
+ * better?
+ */
+static atomic_t uprobe_events = ATOMIC_INIT(0);
+
 /* Have a copy of original instruction */
 #define UPROBE_COPY_INSN	0
+/* Dont run handlers when first register/ last unregister in progress*/
+#define UPROBE_RUN_HANDLER	1
 /* Can skip singlestep */
-#define UPROBE_SKIP_SSTEP	1
+#define UPROBE_SKIP_SSTEP	2
 
 struct uprobe {
 	struct rb_node		rb_node;	/* node in the rb tree */
 	atomic_t		ref;
-	struct rw_semaphore	register_rwsem;
 	struct rw_semaphore	consumer_rwsem;
+	struct mutex		copy_mutex;	/* TODO: kill me and UPROBE_COPY_INSN */
 	struct list_head	pending_list;
 	struct uprobe_consumer	*consumers;
 	struct inode		*inode;		/* Also hold a ref to inode */
@@ -404,6 +430,9 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe)
 	u = __insert_uprobe(uprobe);
 	spin_unlock(&uprobes_treelock);
 
+	/* For now assume that the instruction need not be single-stepped */
+	__set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
+
 	return u;
 }
 
@@ -423,10 +452,8 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
 
 	uprobe->inode = igrab(inode);
 	uprobe->offset = offset;
-	init_rwsem(&uprobe->register_rwsem);
 	init_rwsem(&uprobe->consumer_rwsem);
-	/* For now assume that the instruction need not be single-stepped */
-	__set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
+	mutex_init(&uprobe->copy_mutex);
 
 	/* add to uprobes_tree, sorted on inode:offset */
 	cur_uprobe = insert_uprobe(uprobe);
@@ -436,17 +463,38 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
 		kfree(uprobe);
 		uprobe = cur_uprobe;
 		iput(inode);
+	} else {
+		atomic_inc(&uprobe_events);
 	}
 
 	return uprobe;
 }
 
-static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
+static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
+{
+	struct uprobe_consumer *uc;
+
+	if (!test_bit(UPROBE_RUN_HANDLER, &uprobe->flags))
+		return;
+
+	down_read(&uprobe->consumer_rwsem);
+	for (uc = uprobe->consumers; uc; uc = uc->next) {
+		if (!uc->filter || uc->filter(uc, current))
+			uc->handler(uc, regs);
+	}
+	up_read(&uprobe->consumer_rwsem);
+}
+
+/* Returns the previous consumer */
+static struct uprobe_consumer *
+consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
 {
 	down_write(&uprobe->consumer_rwsem);
 	uc->next = uprobe->consumers;
 	uprobe->consumers = uc;
 	up_write(&uprobe->consumer_rwsem);
+
+	return uc->next;
 }
 
 /*
@@ -540,8 +588,7 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
 	if (test_bit(UPROBE_COPY_INSN, &uprobe->flags))
 		return ret;
 
-	/* TODO: move this into _register, until then we abuse this sem. */
-	down_write(&uprobe->consumer_rwsem);
+	mutex_lock(&uprobe->copy_mutex);
 	if (test_bit(UPROBE_COPY_INSN, &uprobe->flags))
 		goto out;
 
@@ -565,30 +612,7 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
 	set_bit(UPROBE_COPY_INSN, &uprobe->flags);
 
  out:
-	up_write(&uprobe->consumer_rwsem);
-
-	return ret;
-}
-
-static inline bool consumer_filter(struct uprobe_consumer *uc,
-				   enum uprobe_filter_ctx ctx, struct mm_struct *mm)
-{
-	return !uc->filter || uc->filter(uc, ctx, mm);
-}
-
-static bool filter_chain(struct uprobe *uprobe,
-			 enum uprobe_filter_ctx ctx, struct mm_struct *mm)
-{
-	struct uprobe_consumer *uc;
-	bool ret = false;
-
-	down_read(&uprobe->consumer_rwsem);
-	for (uc = uprobe->consumers; uc; uc = uc->next) {
-		ret = consumer_filter(uc, ctx, mm);
-		if (ret)
-			break;
-	}
-	up_read(&uprobe->consumer_rwsem);
+	mutex_unlock(&uprobe->copy_mutex);
 
 	return ret;
 }
@@ -600,6 +624,16 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
 	bool first_uprobe;
 	int ret;
 
+	/*
+	 * If probe is being deleted, unregister thread could be done with
+	 * the vma-rmap-walk through. Adding a probe now can be fatal since
+	 * nobody will be able to cleanup. Also we could be from fork or
+	 * mremap path, where the probe might have already been inserted.
+	 * Hence behave as if probe already existed.
+	 */
+	if (!uprobe->consumers)
+		return 0;
+
 	ret = prepare_uprobe(uprobe, vma->vm_file, mm, vaddr);
 	if (ret)
 		return ret;
@@ -624,14 +658,14 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
 static int
 remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr)
 {
+	/* can happen if uprobe_register() fails */
+	if (!test_bit(MMF_HAS_UPROBES, &mm->flags))
+		return 0;
+
 	set_bit(MMF_RECALC_UPROBES, &mm->flags);
 	return set_orig_insn(&uprobe->arch, mm, vaddr);
 }
 
-static inline bool uprobe_is_active(struct uprobe *uprobe)
-{
-	return !RB_EMPTY_NODE(&uprobe->rb_node);
-}
 /*
  * There could be threads that have already hit the breakpoint. They
  * will recheck the current insn and restart if find_uprobe() fails.
@@ -639,15 +673,12 @@ static inline bool uprobe_is_active(struct uprobe *uprobe)
  */
 static void delete_uprobe(struct uprobe *uprobe)
 {
-	if (WARN_ON(!uprobe_is_active(uprobe)))
-		return;
-
 	spin_lock(&uprobes_treelock);
 	rb_erase(&uprobe->rb_node, &uprobes_tree);
 	spin_unlock(&uprobes_treelock);
-	RB_CLEAR_NODE(&uprobe->rb_node); /* for uprobe_is_active() */
 	iput(uprobe->inode);
 	put_uprobe(uprobe);
+	atomic_dec(&uprobe_events);
 }
 
 struct map_info {
@@ -733,10 +764,8 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
 	return curr;
 }
 
-static int
-register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new)
+static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
 {
-	bool is_register = !!new;
 	struct map_info *info;
 	int err = 0;
 
@@ -765,16 +794,10 @@ register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new)
 		    vaddr_to_offset(vma, info->vaddr) != uprobe->offset)
 			goto unlock;
 
-		if (is_register) {
-			/* consult only the "caller", new consumer. */
-			if (consumer_filter(new,
-					UPROBE_FILTER_REGISTER, mm))
-				err = install_breakpoint(uprobe, mm, vma, info->vaddr);
-		} else if (test_bit(MMF_HAS_UPROBES, &mm->flags)) {
-			if (!filter_chain(uprobe,
-					UPROBE_FILTER_UNREGISTER, mm))
-				err |= remove_breakpoint(uprobe, mm, info->vaddr);
-		}
+		if (is_register)
+			err = install_breakpoint(uprobe, mm, vma, info->vaddr);
+		else
+			err |= remove_breakpoint(uprobe, mm, info->vaddr);
 
  unlock:
 		up_write(&mm->mmap_sem);
@@ -787,23 +810,17 @@ register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new)
 	return err;
 }
 
-static int __uprobe_register(struct uprobe *uprobe, struct uprobe_consumer *uc)
+static int __uprobe_register(struct uprobe *uprobe)
 {
-	consumer_add(uprobe, uc);
-	return register_for_each_vma(uprobe, uc);
+	return register_for_each_vma(uprobe, true);
 }
 
-static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc)
+static void __uprobe_unregister(struct uprobe *uprobe)
 {
-	int err;
-
-	if (!consumer_del(uprobe, uc))	/* WARN? */
-		return;
+	if (!register_for_each_vma(uprobe, false))
+		delete_uprobe(uprobe);
 
-	err = register_for_each_vma(uprobe, NULL);
 	/* TODO : cant unregister? schedule a worker thread */
-	if (!uprobe->consumers && !err)
-		delete_uprobe(uprobe);
 }
 
 /*
@@ -828,59 +845,31 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *
 	struct uprobe *uprobe;
 	int ret;
 
-	/* Racy, just to catch the obvious mistakes */
+	if (!inode || !uc || uc->next)
+		return -EINVAL;
+
 	if (offset > i_size_read(inode))
 		return -EINVAL;
 
- retry:
+	ret = 0;
+	mutex_lock(uprobes_hash(inode));
 	uprobe = alloc_uprobe(inode, offset);
-	if (!uprobe)
-		return -ENOMEM;
-	/*
-	 * We can race with uprobe_unregister()->delete_uprobe().
-	 * Check uprobe_is_active() and retry if it is false.
-	 */
-	down_write(&uprobe->register_rwsem);
-	ret = -EAGAIN;
-	if (likely(uprobe_is_active(uprobe))) {
-		ret = __uprobe_register(uprobe, uc);
-		if (ret)
-			__uprobe_unregister(uprobe, uc);
-	}
-	up_write(&uprobe->register_rwsem);
-	put_uprobe(uprobe);
-
-	if (unlikely(ret == -EAGAIN))
-		goto retry;
-	return ret;
-}
-EXPORT_SYMBOL_GPL(uprobe_register);
 
-/*
- * uprobe_apply - unregister a already registered probe.
- * @inode: the file in which the probe has to be removed.
- * @offset: offset from the start of the file.
- * @uc: consumer which wants to add more or remove some breakpoints
- * @add: add or remove the breakpoints
- */
-int uprobe_apply(struct inode *inode, loff_t offset,
-			struct uprobe_consumer *uc, bool add)
-{
-	struct uprobe *uprobe;
-	struct uprobe_consumer *con;
-	int ret = -ENOENT;
-
-	uprobe = find_uprobe(inode, offset);
-	if (!uprobe)
-		return ret;
+	if (!uprobe) {
+		ret = -ENOMEM;
+	} else if (!consumer_add(uprobe, uc)) {
+		ret = __uprobe_register(uprobe);
+		if (ret) {
+			uprobe->consumers = NULL;
+			__uprobe_unregister(uprobe);
+		} else {
+			set_bit(UPROBE_RUN_HANDLER, &uprobe->flags);
+		}
+	}
 
-	down_write(&uprobe->register_rwsem);
-	for (con = uprobe->consumers; con && con != uc ; con = con->next)
-		;
-	if (con)
-		ret = register_for_each_vma(uprobe, add ? uc : NULL);
-	up_write(&uprobe->register_rwsem);
-	put_uprobe(uprobe);
+	mutex_unlock(uprobes_hash(inode));
+	if (uprobe)
+		put_uprobe(uprobe);
 
 	return ret;
 }
@@ -895,42 +884,25 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume
 {
 	struct uprobe *uprobe;
 
+	if (!inode || !uc)
+		return;
+
 	uprobe = find_uprobe(inode, offset);
 	if (!uprobe)
 		return;
 
-	down_write(&uprobe->register_rwsem);
-	__uprobe_unregister(uprobe, uc);
-	up_write(&uprobe->register_rwsem);
-	put_uprobe(uprobe);
-}
-EXPORT_SYMBOL_GPL(uprobe_unregister);
-
-static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm)
-{
-	struct vm_area_struct *vma;
-	int err = 0;
-
-	down_read(&mm->mmap_sem);
-	for (vma = mm->mmap; vma; vma = vma->vm_next) {
-		unsigned long vaddr;
-		loff_t offset;
-
-		if (!valid_vma(vma, false) ||
-		    vma->vm_file->f_mapping->host != uprobe->inode)
-			continue;
-
-		offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
-		if (uprobe->offset <  offset ||
-		    uprobe->offset >= offset + vma->vm_end - vma->vm_start)
-			continue;
+	mutex_lock(uprobes_hash(inode));
 
-		vaddr = offset_to_vaddr(vma, uprobe->offset);
-		err |= remove_breakpoint(uprobe, mm, vaddr);
+	if (consumer_del(uprobe, uc)) {
+		if (!uprobe->consumers) {
+			__uprobe_unregister(uprobe);
+			clear_bit(UPROBE_RUN_HANDLER, &uprobe->flags);
+		}
 	}
-	up_read(&mm->mmap_sem);
 
-	return err;
+	mutex_unlock(uprobes_hash(inode));
+	if (uprobe)
+		put_uprobe(uprobe);
 }
 
 static struct rb_node *
@@ -1007,7 +979,7 @@ int uprobe_mmap(struct vm_area_struct *vma)
 	struct uprobe *uprobe, *u;
 	struct inode *inode;
 
-	if (no_uprobe_events() || !valid_vma(vma, true))
+	if (!atomic_read(&uprobe_events) || !valid_vma(vma, true))
 		return 0;
 
 	inode = vma->vm_file->f_mapping->host;
@@ -1016,14 +988,9 @@ int uprobe_mmap(struct vm_area_struct *vma)
 
 	mutex_lock(uprobes_mmap_hash(inode));
 	build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list);
-	/*
-	 * We can race with uprobe_unregister(), this uprobe can be already
-	 * removed. But in this case filter_chain() must return false, all
-	 * consumers have gone away.
-	 */
+
 	list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
-		if (!fatal_signal_pending(current) &&
-		    filter_chain(uprobe, UPROBE_FILTER_MMAP, vma->vm_mm)) {
+		if (!fatal_signal_pending(current)) {
 			unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset);
 			install_breakpoint(uprobe, vma->vm_mm, vma, vaddr);
 		}
@@ -1058,7 +1025,7 @@ vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long e
  */
 void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end)
 {
-	if (no_uprobe_events() || !valid_vma(vma, false))
+	if (!atomic_read(&uprobe_events) || !valid_vma(vma, false))
 		return;
 
 	if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */
@@ -1075,14 +1042,22 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon
 /* Slot allocation for XOL */
 static int xol_add_vma(struct xol_area *area)
 {
-	struct mm_struct *mm = current->mm;
-	int ret = -EALREADY;
+	struct mm_struct *mm;
+	int ret;
+
+	area->page = alloc_page(GFP_HIGHUSER);
+	if (!area->page)
+		return -ENOMEM;
+
+	ret = -EALREADY;
+	mm = current->mm;
 
 	down_write(&mm->mmap_sem);
 	if (mm->uprobes_state.xol_area)
 		goto fail;
 
 	ret = -ENOMEM;
+
 	/* Try to map as high as possible, this is only a hint. */
 	area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0);
 	if (area->vaddr & ~PAGE_MASK) {
@@ -1098,53 +1073,54 @@ static int xol_add_vma(struct xol_area *area)
 	smp_wmb();	/* pairs with get_xol_area() */
 	mm->uprobes_state.xol_area = area;
 	ret = 0;
- fail:
+
+fail:
 	up_write(&mm->mmap_sem);
+	if (ret)
+		__free_page(area->page);
 
 	return ret;
 }
 
+static struct xol_area *get_xol_area(struct mm_struct *mm)
+{
+	struct xol_area *area;
+
+	area = mm->uprobes_state.xol_area;
+	smp_read_barrier_depends();	/* pairs with wmb in xol_add_vma() */
+
+	return area;
+}
+
 /*
- * get_xol_area - Allocate process's xol_area if necessary.
- * This area will be used for storing instructions for execution out of line.
+ * xol_alloc_area - Allocate process's xol_area.
+ * This area will be used for storing instructions for execution out of
+ * line.
  *
  * Returns the allocated area or NULL.
  */
-static struct xol_area *get_xol_area(void)
+static struct xol_area *xol_alloc_area(void)
 {
-	struct mm_struct *mm = current->mm;
 	struct xol_area *area;
 
-	area = mm->uprobes_state.xol_area;
-	if (area)
-		goto ret;
-
 	area = kzalloc(sizeof(*area), GFP_KERNEL);
 	if (unlikely(!area))
-		goto out;
+		return NULL;
 
 	area->bitmap = kzalloc(BITS_TO_LONGS(UINSNS_PER_PAGE) * sizeof(long), GFP_KERNEL);
-	if (!area->bitmap)
-		goto free_area;
 
-	area->page = alloc_page(GFP_HIGHUSER);
-	if (!area->page)
-		goto free_bitmap;
+	if (!area->bitmap)
+		goto fail;
 
 	init_waitqueue_head(&area->wq);
 	if (!xol_add_vma(area))
 		return area;
 
-	__free_page(area->page);
- free_bitmap:
+fail:
 	kfree(area->bitmap);
- free_area:
 	kfree(area);
- out:
-	area = mm->uprobes_state.xol_area;
- ret:
-	smp_read_barrier_depends();     /* pairs with wmb in xol_add_vma() */
-	return area;
+
+	return get_xol_area(current->mm);
 }
 
 /*
@@ -1210,26 +1186,33 @@ static unsigned long xol_take_insn_slot(struct xol_area *area)
 }
 
 /*
- * xol_get_insn_slot - allocate a slot for xol.
+ * xol_get_insn_slot - If was not allocated a slot, then
+ * allocate a slot.
  * Returns the allocated slot address or 0.
  */
-static unsigned long xol_get_insn_slot(struct uprobe *uprobe)
+static unsigned long xol_get_insn_slot(struct uprobe *uprobe, unsigned long slot_addr)
 {
 	struct xol_area *area;
 	unsigned long offset;
-	unsigned long xol_vaddr;
 	void *vaddr;
 
-	area = get_xol_area();
-	if (!area)
-		return 0;
+	area = get_xol_area(current->mm);
+	if (!area) {
+		area = xol_alloc_area();
+		if (!area)
+			return 0;
+	}
+	current->utask->xol_vaddr = xol_take_insn_slot(area);
 
-	xol_vaddr = xol_take_insn_slot(area);
-	if (unlikely(!xol_vaddr))
+	/*
+	 * Initialize the slot if xol_vaddr points to valid
+	 * instruction slot.
+	 */
+	if (unlikely(!current->utask->xol_vaddr))
 		return 0;
 
-	/* Initialize the slot */
-	offset = xol_vaddr & ~PAGE_MASK;
+	current->utask->vaddr = slot_addr;
+	offset = current->utask->xol_vaddr & ~PAGE_MASK;
 	vaddr = kmap_atomic(area->page);
 	memcpy(vaddr + offset, uprobe->arch.insn, MAX_UINSN_BYTES);
 	kunmap_atomic(vaddr);
@@ -1239,7 +1222,7 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe)
 	 */
 	flush_dcache_page(area->page);
 
-	return xol_vaddr;
+	return current->utask->xol_vaddr;
 }
 
 /*
@@ -1257,7 +1240,8 @@ static void xol_free_insn_slot(struct task_struct *tsk)
 		return;
 
 	slot_addr = tsk->utask->xol_vaddr;
-	if (unlikely(!slot_addr))
+
+	if (unlikely(!slot_addr || IS_ERR_VALUE(slot_addr)))
 		return;
 
 	area = tsk->mm->uprobes_state.xol_area;
@@ -1319,48 +1303,33 @@ void uprobe_copy_process(struct task_struct *t)
 }
 
 /*
- * Allocate a uprobe_task object for the task if if necessary.
- * Called when the thread hits a breakpoint.
+ * Allocate a uprobe_task object for the task.
+ * Called when the thread hits a breakpoint for the first time.
  *
  * Returns:
  * - pointer to new uprobe_task on success
  * - NULL otherwise
  */
-static struct uprobe_task *get_utask(void)
+static struct uprobe_task *add_utask(void)
 {
-	if (!current->utask)
-		current->utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL);
-	return current->utask;
+	struct uprobe_task *utask;
+
+	utask = kzalloc(sizeof *utask, GFP_KERNEL);
+	if (unlikely(!utask))
+		return NULL;
+
+	current->utask = utask;
+	return utask;
 }
 
 /* Prepare to single-step probed instruction out of line. */
 static int
-pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr)
+pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long vaddr)
 {
-	struct uprobe_task *utask;
-	unsigned long xol_vaddr;
-	int err;
-
-	utask = get_utask();
-	if (!utask)
-		return -ENOMEM;
-
-	xol_vaddr = xol_get_insn_slot(uprobe);
-	if (!xol_vaddr)
-		return -ENOMEM;
-
-	utask->xol_vaddr = xol_vaddr;
-	utask->vaddr = bp_vaddr;
-
-	err = arch_uprobe_pre_xol(&uprobe->arch, regs);
-	if (unlikely(err)) {
-		xol_free_insn_slot(current);
-		return err;
-	}
+	if (xol_get_insn_slot(uprobe, vaddr) && !arch_uprobe_pre_xol(&uprobe->arch, regs))
+		return 0;
 
-	utask->active_uprobe = uprobe;
-	utask->state = UTASK_SSTEP;
-	return 0;
+	return -EFAULT;
 }
 
 /*
@@ -1422,7 +1391,6 @@ static void mmf_recalc_uprobes(struct mm_struct *mm)
 		 * This is not strictly accurate, we can race with
 		 * uprobe_unregister() and see the already removed
 		 * uprobe if delete_uprobe() was not yet called.
-		 * Or this uprobe can be filtered out.
 		 */
 		if (vma_has_uprobes(vma, vma->vm_start, vma->vm_end))
 			return;
@@ -1484,33 +1452,13 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
 	return uprobe;
 }
 
-static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
-{
-	struct uprobe_consumer *uc;
-	int remove = UPROBE_HANDLER_REMOVE;
-
-	down_read(&uprobe->register_rwsem);
-	for (uc = uprobe->consumers; uc; uc = uc->next) {
-		int rc = uc->handler(uc, regs);
-
-		WARN(rc & ~UPROBE_HANDLER_MASK,
-			"bad rc=0x%x from %pf()\n", rc, uc->handler);
-		remove &= rc;
-	}
-
-	if (remove && uprobe->consumers) {
-		WARN_ON(!uprobe_is_active(uprobe));
-		unapply_uprobe(uprobe, current->mm);
-	}
-	up_read(&uprobe->register_rwsem);
-}
-
 /*
  * Run handler and ask thread to singlestep.
  * Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
  */
 static void handle_swbp(struct pt_regs *regs)
 {
+	struct uprobe_task *utask;
 	struct uprobe *uprobe;
 	unsigned long bp_vaddr;
 	int uninitialized_var(is_swbp);
@@ -1535,10 +1483,6 @@ static void handle_swbp(struct pt_regs *regs)
 		}
 		return;
 	}
-
-	/* change it in advance for ->handler() and restart */
-	instruction_pointer_set(regs, bp_vaddr);
-
 	/*
 	 * TODO: move copy_insn/etc into _register and remove this hack.
 	 * After we hit the bp, _unregister + _register can install the
@@ -1546,16 +1490,32 @@ static void handle_swbp(struct pt_regs *regs)
 	 */
 	smp_rmb(); /* pairs with wmb() in install_breakpoint() */
 	if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags)))
-		goto out;
+		goto restart;
+
+	utask = current->utask;
+	if (!utask) {
+		utask = add_utask();
+		/* Cannot allocate; re-execute the instruction. */
+		if (!utask)
+			goto restart;
+	}
 
 	handler_chain(uprobe, regs);
 	if (can_skip_sstep(uprobe, regs))
 		goto out;
 
-	if (!pre_ssout(uprobe, regs, bp_vaddr))
+	if (!pre_ssout(uprobe, regs, bp_vaddr)) {
+		utask->active_uprobe = uprobe;
+		utask->state = UTASK_SSTEP;
 		return;
+	}
 
-	/* can_skip_sstep() succeeded, or restart if can't singlestep */
+restart:
+	/*
+	 * cannot singlestep; cannot skip instruction;
+	 * re-execute the instruction.
+	 */
+	instruction_pointer_set(regs, bp_vaddr);
 out:
 	put_uprobe(uprobe);
 }
@@ -1649,8 +1609,10 @@ static int __init init_uprobes(void)
 {
 	int i;
 
-	for (i = 0; i < UPROBES_HASH_SZ; i++)
+	for (i = 0; i < UPROBES_HASH_SZ; i++) {
+		mutex_init(&uprobes_mutex[i]);
 		mutex_init(&uprobes_mmap_mutex[i]);
+	}
 
 	if (percpu_init_rwsem(&dup_mmap_sem))
 		return -ENOMEM;
diff --git a/trunk/kernel/exit.c b/trunk/kernel/exit.c
index 7dd20408707c..b4df21937216 100644
--- a/trunk/kernel/exit.c
+++ b/trunk/kernel/exit.c
@@ -85,7 +85,6 @@ static void __exit_signal(struct task_struct *tsk)
 	bool group_dead = thread_group_leader(tsk);
 	struct sighand_struct *sighand;
 	struct tty_struct *uninitialized_var(tty);
-	cputime_t utime, stime;
 
 	sighand = rcu_dereference_check(tsk->sighand,
 					lockdep_tasklist_lock_is_held());
@@ -124,10 +123,9 @@ static void __exit_signal(struct task_struct *tsk)
 		 * We won't ever get here for the group leader, since it
 		 * will have been the last reference on the signal_struct.
 		 */
-		task_cputime(tsk, &utime, &stime);
-		sig->utime += utime;
-		sig->stime += stime;
-		sig->gtime += task_gtime(tsk);
+		sig->utime += tsk->utime;
+		sig->stime += tsk->stime;
+		sig->gtime += tsk->gtime;
 		sig->min_flt += tsk->min_flt;
 		sig->maj_flt += tsk->maj_flt;
 		sig->nvcsw += tsk->nvcsw;
@@ -1094,7 +1092,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 		sig = p->signal;
 		psig->cutime += tgutime + sig->cutime;
 		psig->cstime += tgstime + sig->cstime;
-		psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
+		psig->cgtime += p->gtime + sig->gtime + sig->cgtime;
 		psig->cmin_flt +=
 			p->min_flt + sig->min_flt + sig->cmin_flt;
 		psig->cmaj_flt +=
diff --git a/trunk/kernel/fork.c b/trunk/kernel/fork.c
index 4133876d8cd2..c535f33bbb9c 100644
--- a/trunk/kernel/fork.c
+++ b/trunk/kernel/fork.c
@@ -1233,12 +1233,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
 	p->prev_cputime.utime = p->prev_cputime.stime = 0;
 #endif
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-	seqlock_init(&p->vtime_seqlock);
-	p->vtime_snap = 0;
-	p->vtime_snap_whence = VTIME_SLEEPING;
-#endif
-
 #if defined(SPLIT_RSS_COUNTING)
 	memset(&p->rss_stat, 0, sizeof(p->rss_stat));
 #endif
diff --git a/trunk/kernel/futex.c b/trunk/kernel/futex.c
index 9618b6e9fb36..19eb089ca003 100644
--- a/trunk/kernel/futex.c
+++ b/trunk/kernel/futex.c
@@ -60,7 +60,6 @@
 #include <linux/pid.h>
 #include <linux/nsproxy.h>
 #include <linux/ptrace.h>
-#include <linux/sched/rt.h>
 
 #include <asm/futex.h>
 
diff --git a/trunk/kernel/hrtimer.c b/trunk/kernel/hrtimer.c
index cc47812d3feb..6db7a5ed52b5 100644
--- a/trunk/kernel/hrtimer.c
+++ b/trunk/kernel/hrtimer.c
@@ -44,8 +44,6 @@
 #include <linux/err.h>
 #include <linux/debugobjects.h>
 #include <linux/sched.h>
-#include <linux/sched/sysctl.h>
-#include <linux/sched/rt.h>
 #include <linux/timer.h>
 
 #include <asm/uaccess.h>
@@ -642,9 +640,21 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
  * and expiry check is done in the hrtimer_interrupt or in the softirq.
  */
 static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
-					    struct hrtimer_clock_base *base)
+					    struct hrtimer_clock_base *base,
+					    int wakeup)
 {
-	return base->cpu_base->hres_active && hrtimer_reprogram(timer, base);
+	if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) {
+		if (wakeup) {
+			raw_spin_unlock(&base->cpu_base->lock);
+			raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+			raw_spin_lock(&base->cpu_base->lock);
+		} else
+			__raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+
+		return 1;
+	}
+
+	return 0;
 }
 
 static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
@@ -725,7 +735,8 @@ static inline int hrtimer_switch_to_hres(void) { return 0; }
 static inline void
 hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { }
 static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
-					    struct hrtimer_clock_base *base)
+					    struct hrtimer_clock_base *base,
+					    int wakeup)
 {
 	return 0;
 }
@@ -984,21 +995,8 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
 	 *
 	 * XXX send_remote_softirq() ?
 	 */
-	if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)
-		&& hrtimer_enqueue_reprogram(timer, new_base)) {
-		if (wakeup) {
-			/*
-			 * We need to drop cpu_base->lock to avoid a
-			 * lock ordering issue vs. rq->lock.
-			 */
-			raw_spin_unlock(&new_base->cpu_base->lock);
-			raise_softirq_irqoff(HRTIMER_SOFTIRQ);
-			local_irq_restore(flags);
-			return ret;
-		} else {
-			__raise_softirq_irqoff(HRTIMER_SOFTIRQ);
-		}
-	}
+	if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases))
+		hrtimer_enqueue_reprogram(timer, new_base, wakeup);
 
 	unlock_hrtimer_base(timer, &flags);
 
diff --git a/trunk/kernel/irq/chip.c b/trunk/kernel/irq/chip.c
index cbd97ce0b000..3aca9f29d30e 100644
--- a/trunk/kernel/irq/chip.c
+++ b/trunk/kernel/irq/chip.c
@@ -90,40 +90,26 @@ int irq_set_handler_data(unsigned int irq, void *data)
 EXPORT_SYMBOL(irq_set_handler_data);
 
 /**
- *	irq_set_msi_desc_off - set MSI descriptor data for an irq at offset
- *	@irq_base:	Interrupt number base
- *	@irq_offset:	Interrupt number offset
- *	@entry:		Pointer to MSI descriptor data
+ *	irq_set_msi_desc - set MSI descriptor data for an irq
+ *	@irq:	Interrupt number
+ *	@entry:	Pointer to MSI descriptor data
  *
- *	Set the MSI descriptor entry for an irq at offset
+ *	Set the MSI descriptor entry for an irq
  */
-int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset,
-			 struct msi_desc *entry)
+int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry)
 {
 	unsigned long flags;
-	struct irq_desc *desc = irq_get_desc_lock(irq_base + irq_offset, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
+	struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
 
 	if (!desc)
 		return -EINVAL;
 	desc->irq_data.msi_desc = entry;
-	if (entry && !irq_offset)
-		entry->irq = irq_base;
+	if (entry)
+		entry->irq = irq;
 	irq_put_desc_unlock(desc, flags);
 	return 0;
 }
 
-/**
- *	irq_set_msi_desc - set MSI descriptor data for an irq
- *	@irq:	Interrupt number
- *	@entry:	Pointer to MSI descriptor data
- *
- *	Set the MSI descriptor entry for an irq
- */
-int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry)
-{
-	return irq_set_msi_desc_off(irq, 0, entry);
-}
-
 /**
  *	irq_set_chip_data - set irq chip data for an irq
  *	@irq:	Interrupt number
diff --git a/trunk/kernel/irq/manage.c b/trunk/kernel/irq/manage.c
index fa17855ca65a..e49a288fa479 100644
--- a/trunk/kernel/irq/manage.c
+++ b/trunk/kernel/irq/manage.c
@@ -16,7 +16,6 @@
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
-#include <linux/sched/rt.h>
 #include <linux/task_work.h>
 
 #include "internals.h"
@@ -1525,7 +1524,6 @@ void enable_percpu_irq(unsigned int irq, unsigned int type)
 out:
 	irq_put_desc_unlock(desc, flags);
 }
-EXPORT_SYMBOL_GPL(enable_percpu_irq);
 
 void disable_percpu_irq(unsigned int irq)
 {
@@ -1539,7 +1537,6 @@ void disable_percpu_irq(unsigned int irq)
 	irq_percpu_disable(desc, cpu);
 	irq_put_desc_unlock(desc, flags);
 }
-EXPORT_SYMBOL_GPL(disable_percpu_irq);
 
 /*
  * Internal function to unregister a percpu irqaction.
diff --git a/trunk/kernel/irq/spurious.c b/trunk/kernel/irq/spurious.c
index 7b5f012bde9d..611cd6003c45 100644
--- a/trunk/kernel/irq/spurious.c
+++ b/trunk/kernel/irq/spurious.c
@@ -80,11 +80,13 @@ static int try_one_irq(int irq, struct irq_desc *desc, bool force)
 
 	/*
 	 * All handlers must agree on IRQF_SHARED, so we test just the
-	 * first.
+	 * first. Check for action->next as well.
 	 */
 	action = desc->action;
 	if (!action || !(action->flags & IRQF_SHARED) ||
-	    (action->flags & __IRQF_TIMER))
+	    (action->flags & __IRQF_TIMER) ||
+	    (action->handler(irq, action->dev_id) == IRQ_HANDLED) ||
+	    !action->next)
 		goto out;
 
 	/* Already running on another processor */
@@ -102,7 +104,6 @@ static int try_one_irq(int irq, struct irq_desc *desc, bool force)
 	do {
 		if (handle_irq_event(desc) == IRQ_HANDLED)
 			ret = IRQ_HANDLED;
-		/* Make sure that there is still a valid action */
 		action = desc->action;
 	} while ((desc->istate & IRQS_PENDING) && action);
 	desc->istate &= ~IRQS_POLL_INPROGRESS;
diff --git a/trunk/kernel/irq_work.c b/trunk/kernel/irq_work.c
index 55fcce6065cf..1588e3b2871b 100644
--- a/trunk/kernel/irq_work.c
+++ b/trunk/kernel/irq_work.c
@@ -12,36 +12,37 @@
 #include <linux/percpu.h>
 #include <linux/hardirq.h>
 #include <linux/irqflags.h>
-#include <linux/sched.h>
-#include <linux/tick.h>
-#include <linux/cpu.h>
-#include <linux/notifier.h>
 #include <asm/processor.h>
 
+/*
+ * An entry can be in one of four states:
+ *
+ * free	     NULL, 0 -> {claimed}       : free to be used
+ * claimed   NULL, 3 -> {pending}       : claimed to be enqueued
+ * pending   next, 3 -> {busy}          : queued, pending callback
+ * busy      NULL, 2 -> {free, claimed} : callback in progress, can be claimed
+ */
+
+#define IRQ_WORK_PENDING	1UL
+#define IRQ_WORK_BUSY		2UL
+#define IRQ_WORK_FLAGS		3UL
 
 static DEFINE_PER_CPU(struct llist_head, irq_work_list);
-static DEFINE_PER_CPU(int, irq_work_raised);
 
 /*
  * Claim the entry so that no one else will poke at it.
  */
 static bool irq_work_claim(struct irq_work *work)
 {
-	unsigned long flags, oflags, nflags;
+	unsigned long flags, nflags;
 
-	/*
-	 * Start with our best wish as a premise but only trust any
-	 * flag value after cmpxchg() result.
-	 */
-	flags = work->flags & ~IRQ_WORK_PENDING;
 	for (;;) {
+		flags = work->flags;
+		if (flags & IRQ_WORK_PENDING)
+			return false;
 		nflags = flags | IRQ_WORK_FLAGS;
-		oflags = cmpxchg(&work->flags, flags, nflags);
-		if (oflags == flags)
+		if (cmpxchg(&work->flags, flags, nflags) == flags)
 			break;
-		if (oflags & IRQ_WORK_PENDING)
-			return false;
-		flags = oflags;
 		cpu_relax();
 	}
 
@@ -56,69 +57,57 @@ void __weak arch_irq_work_raise(void)
 }
 
 /*
- * Enqueue the irq_work @entry unless it's already pending
- * somewhere.
- *
- * Can be re-enqueued while the callback is still in progress.
+ * Queue the entry and raise the IPI if needed.
  */
-void irq_work_queue(struct irq_work *work)
+static void __irq_work_queue(struct irq_work *work)
 {
-	/* Only queue if not already pending */
-	if (!irq_work_claim(work))
-		return;
+	bool empty;
 
-	/* Queue the entry and raise the IPI if needed. */
 	preempt_disable();
 
-	llist_add(&work->llnode, &__get_cpu_var(irq_work_list));
-
-	/*
-	 * If the work is not "lazy" or the tick is stopped, raise the irq
-	 * work interrupt (if supported by the arch), otherwise, just wait
-	 * for the next tick.
-	 */
-	if (!(work->flags & IRQ_WORK_LAZY) || tick_nohz_tick_stopped()) {
-		if (!this_cpu_cmpxchg(irq_work_raised, 0, 1))
-			arch_irq_work_raise();
-	}
+	empty = llist_add(&work->llnode, &__get_cpu_var(irq_work_list));
+	/* The list was empty, raise self-interrupt to start processing. */
+	if (empty)
+		arch_irq_work_raise();
 
 	preempt_enable();
 }
-EXPORT_SYMBOL_GPL(irq_work_queue);
 
-bool irq_work_needs_cpu(void)
+/*
+ * Enqueue the irq_work @entry, returns true on success, failure when the
+ * @entry was already enqueued by someone else.
+ *
+ * Can be re-enqueued while the callback is still in progress.
+ */
+bool irq_work_queue(struct irq_work *work)
 {
-	struct llist_head *this_list;
-
-	this_list = &__get_cpu_var(irq_work_list);
-	if (llist_empty(this_list))
+	if (!irq_work_claim(work)) {
+		/*
+		 * Already enqueued, can't do!
+		 */
 		return false;
+	}
 
-	/* All work should have been flushed before going offline */
-	WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
-
+	__irq_work_queue(work);
 	return true;
 }
+EXPORT_SYMBOL_GPL(irq_work_queue);
 
-static void __irq_work_run(void)
+/*
+ * Run the irq_work entries on this cpu. Requires to be ran from hardirq
+ * context with local IRQs disabled.
+ */
+void irq_work_run(void)
 {
-	unsigned long flags;
 	struct irq_work *work;
 	struct llist_head *this_list;
 	struct llist_node *llnode;
 
-
-	/*
-	 * Reset the "raised" state right before we check the list because
-	 * an NMI may enqueue after we find the list empty from the runner.
-	 */
-	__this_cpu_write(irq_work_raised, 0);
-	barrier();
-
 	this_list = &__get_cpu_var(irq_work_list);
 	if (llist_empty(this_list))
 		return;
 
+	BUG_ON(!in_irq());
 	BUG_ON(!irqs_disabled());
 
 	llnode = llist_del_all(this_list);
@@ -130,31 +119,16 @@ static void __irq_work_run(void)
 		/*
 		 * Clear the PENDING bit, after this point the @work
 		 * can be re-used.
-		 * Make it immediately visible so that other CPUs trying
-		 * to claim that work don't rely on us to handle their data
-		 * while we are in the middle of the func.
 		 */
-		flags = work->flags & ~IRQ_WORK_PENDING;
-		xchg(&work->flags, flags);
-
+		work->flags = IRQ_WORK_BUSY;
 		work->func(work);
 		/*
 		 * Clear the BUSY bit and return to the free state if
 		 * no-one else claimed it meanwhile.
 		 */
-		(void)cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
+		(void)cmpxchg(&work->flags, IRQ_WORK_BUSY, 0);
 	}
 }
-
-/*
- * Run the irq_work entries on this cpu. Requires to be ran from hardirq
- * context with local IRQs disabled.
- */
-void irq_work_run(void)
-{
-	BUG_ON(!in_irq());
-	__irq_work_run();
-}
 EXPORT_SYMBOL_GPL(irq_work_run);
 
 /*
@@ -169,35 +143,3 @@ void irq_work_sync(struct irq_work *work)
 		cpu_relax();
 }
 EXPORT_SYMBOL_GPL(irq_work_sync);
-
-#ifdef CONFIG_HOTPLUG_CPU
-static int irq_work_cpu_notify(struct notifier_block *self,
-			       unsigned long action, void *hcpu)
-{
-	long cpu = (long)hcpu;
-
-	switch (action) {
-	case CPU_DYING:
-		/* Called from stop_machine */
-		if (WARN_ON_ONCE(cpu != smp_processor_id()))
-			break;
-		__irq_work_run();
-		break;
-	default:
-		break;
-	}
-	return NOTIFY_OK;
-}
-
-static struct notifier_block cpu_notify;
-
-static __init int irq_work_init_cpu_notifier(void)
-{
-	cpu_notify.notifier_call = irq_work_cpu_notify;
-	cpu_notify.priority = 0;
-	register_cpu_notifier(&cpu_notify);
-	return 0;
-}
-device_initcall(irq_work_init_cpu_notifier);
-
-#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/trunk/kernel/kprobes.c b/trunk/kernel/kprobes.c
index f423c3ef4a82..098f396aa409 100644
--- a/trunk/kernel/kprobes.c
+++ b/trunk/kernel/kprobes.c
@@ -919,7 +919,7 @@ static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
 }
 #endif /* CONFIG_OPTPROBES */
 
-#ifdef CONFIG_KPROBES_ON_FTRACE
+#ifdef KPROBES_CAN_USE_FTRACE
 static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
 	.func = kprobe_ftrace_handler,
 	.flags = FTRACE_OPS_FL_SAVE_REGS,
@@ -964,7 +964,7 @@ static void __kprobes disarm_kprobe_ftrace(struct kprobe *p)
 			   (unsigned long)p->addr, 1, 0);
 	WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret);
 }
-#else	/* !CONFIG_KPROBES_ON_FTRACE */
+#else	/* !KPROBES_CAN_USE_FTRACE */
 #define prepare_kprobe(p)	arch_prepare_kprobe(p)
 #define arm_kprobe_ftrace(p)	do {} while (0)
 #define disarm_kprobe_ftrace(p)	do {} while (0)
@@ -1414,12 +1414,12 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p,
 	 */
 	ftrace_addr = ftrace_location((unsigned long)p->addr);
 	if (ftrace_addr) {
-#ifdef CONFIG_KPROBES_ON_FTRACE
+#ifdef KPROBES_CAN_USE_FTRACE
 		/* Given address is not on the instruction boundary */
 		if ((unsigned long)p->addr != ftrace_addr)
 			return -EILSEQ;
 		p->flags |= KPROBE_FLAG_FTRACE;
-#else	/* !CONFIG_KPROBES_ON_FTRACE */
+#else	/* !KPROBES_CAN_USE_FTRACE */
 		return -EINVAL;
 #endif
 	}
diff --git a/trunk/kernel/mutex.c b/trunk/kernel/mutex.c
index 52f23011b6e0..a307cc9c9526 100644
--- a/trunk/kernel/mutex.c
+++ b/trunk/kernel/mutex.c
@@ -19,7 +19,6 @@
  */
 #include <linux/mutex.h>
 #include <linux/sched.h>
-#include <linux/sched/rt.h>
 #include <linux/export.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
diff --git a/trunk/kernel/pid.c b/trunk/kernel/pid.c
index f2c6a6825098..de9af600006f 100644
--- a/trunk/kernel/pid.c
+++ b/trunk/kernel/pid.c
@@ -331,7 +331,7 @@ struct pid *alloc_pid(struct pid_namespace *ns)
 	return pid;
 
 out_unlock:
-	spin_unlock_irq(&pidmap_lock);
+	spin_unlock(&pidmap_lock);
 out_free:
 	while (++i <= ns->level)
 		free_pidmap(pid->numbers + i);
diff --git a/trunk/kernel/posix-cpu-timers.c b/trunk/kernel/posix-cpu-timers.c
index 8fd709c9bb58..a278cad1d5d6 100644
--- a/trunk/kernel/posix-cpu-timers.c
+++ b/trunk/kernel/posix-cpu-timers.c
@@ -155,19 +155,11 @@ static void bump_cpu_timer(struct k_itimer *timer,
 
 static inline cputime_t prof_ticks(struct task_struct *p)
 {
-	cputime_t utime, stime;
-
-	task_cputime(p, &utime, &stime);
-
-	return utime + stime;
+	return p->utime + p->stime;
 }
 static inline cputime_t virt_ticks(struct task_struct *p)
 {
-	cputime_t utime;
-
-	task_cputime(p, &utime, NULL);
-
-	return utime;
+	return p->utime;
 }
 
 static int
@@ -479,23 +471,18 @@ static void cleanup_timers(struct list_head *head,
  */
 void posix_cpu_timers_exit(struct task_struct *tsk)
 {
-	cputime_t utime, stime;
-
 	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
 						sizeof(unsigned long long));
-	task_cputime(tsk, &utime, &stime);
 	cleanup_timers(tsk->cpu_timers,
-		       utime, stime, tsk->se.sum_exec_runtime);
+		       tsk->utime, tsk->stime, tsk->se.sum_exec_runtime);
 
 }
 void posix_cpu_timers_exit_group(struct task_struct *tsk)
 {
 	struct signal_struct *const sig = tsk->signal;
-	cputime_t utime, stime;
 
-	task_cputime(tsk, &utime, &stime);
 	cleanup_timers(tsk->signal->cpu_timers,
-		       utime + sig->utime, stime + sig->stime,
+		       tsk->utime + sig->utime, tsk->stime + sig->stime,
 		       tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
 }
 
@@ -1239,14 +1226,11 @@ static inline int task_cputime_expired(const struct task_cputime *sample,
 static inline int fastpath_timer_check(struct task_struct *tsk)
 {
 	struct signal_struct *sig;
-	cputime_t utime, stime;
-
-	task_cputime(tsk, &utime, &stime);
 
 	if (!task_cputime_zero(&tsk->cputime_expires)) {
 		struct task_cputime task_sample = {
-			.utime = utime,
-			.stime = stime,
+			.utime = tsk->utime,
+			.stime = tsk->stime,
 			.sum_exec_runtime = tsk->se.sum_exec_runtime
 		};
 
@@ -1417,10 +1401,8 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
 		while (!signal_pending(current)) {
 			if (timer.it.cpu.expires.sched == 0) {
 				/*
-				 * Our timer fired and was reset, below
-				 * deletion can not fail.
+				 * Our timer fired and was reset.
 				 */
-				posix_cpu_timer_del(&timer);
 				spin_unlock_irq(&timer.it_lock);
 				return 0;
 			}
@@ -1438,26 +1420,9 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
 		 * We were interrupted by a signal.
 		 */
 		sample_to_timespec(which_clock, timer.it.cpu.expires, rqtp);
-		error = posix_cpu_timer_set(&timer, 0, &zero_it, it);
-		if (!error) {
-			/*
-			 * Timer is now unarmed, deletion can not fail.
-			 */
-			posix_cpu_timer_del(&timer);
-		}
+		posix_cpu_timer_set(&timer, 0, &zero_it, it);
 		spin_unlock_irq(&timer.it_lock);
 
-		while (error == TIMER_RETRY) {
-			/*
-			 * We need to handle case when timer was or is in the
-			 * middle of firing. In other cases we already freed
-			 * resources.
-			 */
-			spin_lock_irq(&timer.it_lock);
-			error = posix_cpu_timer_del(&timer);
-			spin_unlock_irq(&timer.it_lock);
-		}
-
 		if ((it->it_value.tv_sec | it->it_value.tv_nsec) == 0) {
 			/*
 			 * It actually did fire already.
diff --git a/trunk/kernel/posix-timers.c b/trunk/kernel/posix-timers.c
index 10349d5f2ec3..69185ae6b701 100644
--- a/trunk/kernel/posix-timers.c
+++ b/trunk/kernel/posix-timers.c
@@ -997,7 +997,7 @@ SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
 
 	err = kc->clock_adj(which_clock, &ktx);
 
-	if (err >= 0 && copy_to_user(utx, &ktx, sizeof(ktx)))
+	if (!err && copy_to_user(utx, &ktx, sizeof(ktx)))
 		return -EFAULT;
 
 	return err;
diff --git a/trunk/kernel/printk.c b/trunk/kernel/printk.c
index f24633afa46a..267ce780abe8 100644
--- a/trunk/kernel/printk.c
+++ b/trunk/kernel/printk.c
@@ -42,7 +42,6 @@
 #include <linux/notifier.h>
 #include <linux/rculist.h>
 #include <linux/poll.h>
-#include <linux/irq_work.h>
 
 #include <asm/uaccess.h>
 
@@ -1960,32 +1959,30 @@ int is_console_locked(void)
 static DEFINE_PER_CPU(int, printk_pending);
 static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf);
 
-static void wake_up_klogd_work_func(struct irq_work *irq_work)
+void printk_tick(void)
 {
-	int pending = __this_cpu_xchg(printk_pending, 0);
-
-	if (pending & PRINTK_PENDING_SCHED) {
-		char *buf = __get_cpu_var(printk_sched_buf);
-		printk(KERN_WARNING "[sched_delayed] %s", buf);
+	if (__this_cpu_read(printk_pending)) {
+		int pending = __this_cpu_xchg(printk_pending, 0);
+		if (pending & PRINTK_PENDING_SCHED) {
+			char *buf = __get_cpu_var(printk_sched_buf);
+			printk(KERN_WARNING "[sched_delayed] %s", buf);
+		}
+		if (pending & PRINTK_PENDING_WAKEUP)
+			wake_up_interruptible(&log_wait);
 	}
-
-	if (pending & PRINTK_PENDING_WAKEUP)
-		wake_up_interruptible(&log_wait);
 }
 
-static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
-	.func = wake_up_klogd_work_func,
-	.flags = IRQ_WORK_LAZY,
-};
+int printk_needs_cpu(int cpu)
+{
+	if (cpu_is_offline(cpu))
+		printk_tick();
+	return __this_cpu_read(printk_pending);
+}
 
 void wake_up_klogd(void)
 {
-	preempt_disable();
-	if (waitqueue_active(&log_wait)) {
+	if (waitqueue_active(&log_wait))
 		this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
-		irq_work_queue(&__get_cpu_var(wake_up_klogd_work));
-	}
-	preempt_enable();
 }
 
 static void console_cont_flush(char *text, size_t size)
@@ -2465,7 +2462,6 @@ int printk_sched(const char *fmt, ...)
 	va_end(args);
 
 	__this_cpu_or(printk_pending, PRINTK_PENDING_SCHED);
-	irq_work_queue(&__get_cpu_var(wake_up_klogd_work));
 	local_irq_restore(flags);
 
 	return r;
diff --git a/trunk/kernel/profile.c b/trunk/kernel/profile.c
index dc3384ee874e..1f391819c42f 100644
--- a/trunk/kernel/profile.c
+++ b/trunk/kernel/profile.c
@@ -37,6 +37,9 @@ struct profile_hit {
 #define NR_PROFILE_HIT		(PAGE_SIZE/sizeof(struct profile_hit))
 #define NR_PROFILE_GRP		(NR_PROFILE_HIT/PROFILE_GRPSZ)
 
+/* Oprofile timer tick hook */
+static int (*timer_hook)(struct pt_regs *) __read_mostly;
+
 static atomic_t *prof_buffer;
 static unsigned long prof_len, prof_shift;
 
@@ -205,6 +208,25 @@ int profile_event_unregister(enum profile_type type, struct notifier_block *n)
 }
 EXPORT_SYMBOL_GPL(profile_event_unregister);
 
+int register_timer_hook(int (*hook)(struct pt_regs *))
+{
+	if (timer_hook)
+		return -EBUSY;
+	timer_hook = hook;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(register_timer_hook);
+
+void unregister_timer_hook(int (*hook)(struct pt_regs *))
+{
+	WARN_ON(hook != timer_hook);
+	timer_hook = NULL;
+	/* make sure all CPUs see the NULL hook */
+	synchronize_sched();  /* Allow ongoing interrupts to complete. */
+}
+EXPORT_SYMBOL_GPL(unregister_timer_hook);
+
+
 #ifdef CONFIG_SMP
 /*
  * Each cpu has a pair of open-addressed hashtables for pending
@@ -414,6 +436,8 @@ void profile_tick(int type)
 {
 	struct pt_regs *regs = get_irq_regs();
 
+	if (type == CPU_PROFILING && timer_hook)
+		timer_hook(regs);
 	if (!user_mode(regs) && prof_cpu_mask != NULL &&
 	    cpumask_test_cpu(smp_processor_id(), prof_cpu_mask))
 		profile_hit(type, (void *)profile_pc(regs));
diff --git a/trunk/kernel/ptrace.c b/trunk/kernel/ptrace.c
index acbd28424d81..6cbeaae4406d 100644
--- a/trunk/kernel/ptrace.c
+++ b/trunk/kernel/ptrace.c
@@ -712,12 +712,6 @@ static int ptrace_regset(struct task_struct *task, int req, unsigned int type,
 					     kiov->iov_len, kiov->iov_base);
 }
 
-/*
- * This is declared in linux/regset.h and defined in machine-dependent
- * code.  We put the export here, near the primary machine-neutral use,
- * to ensure no machine forgets it.
- */
-EXPORT_SYMBOL_GPL(task_user_regset_view);
 #endif
 
 int ptrace_request(struct task_struct *child, long request,
diff --git a/trunk/kernel/rcu.h b/trunk/kernel/rcu.h
index 7f8e7590e3e5..20dfba576c2b 100644
--- a/trunk/kernel/rcu.h
+++ b/trunk/kernel/rcu.h
@@ -111,11 +111,4 @@ static inline bool __rcu_reclaim(char *rn, struct rcu_head *head)
 
 extern int rcu_expedited;
 
-#ifdef CONFIG_RCU_STALL_COMMON
-
-extern int rcu_cpu_stall_suppress;
-int rcu_jiffies_till_stall_check(void);
-
-#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
-
 #endif /* __LINUX_RCU_H */
diff --git a/trunk/kernel/rcupdate.c b/trunk/kernel/rcupdate.c
index 48ab70384a4c..a2cf76177b44 100644
--- a/trunk/kernel/rcupdate.c
+++ b/trunk/kernel/rcupdate.c
@@ -404,65 +404,11 @@ EXPORT_SYMBOL_GPL(rcuhead_debug_descr);
 #endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 
 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE)
-void do_trace_rcu_torture_read(char *rcutorturename, struct rcu_head *rhp,
-			       unsigned long secs,
-			       unsigned long c_old, unsigned long c)
+void do_trace_rcu_torture_read(char *rcutorturename, struct rcu_head *rhp)
 {
-	trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c);
+	trace_rcu_torture_read(rcutorturename, rhp);
 }
 EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read);
 #else
-#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
-	do { } while (0)
+#define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
 #endif
-
-#ifdef CONFIG_RCU_STALL_COMMON
-
-#ifdef CONFIG_PROVE_RCU
-#define RCU_STALL_DELAY_DELTA	       (5 * HZ)
-#else
-#define RCU_STALL_DELAY_DELTA	       0
-#endif
-
-int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
-int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
-
-module_param(rcu_cpu_stall_suppress, int, 0644);
-module_param(rcu_cpu_stall_timeout, int, 0644);
-
-int rcu_jiffies_till_stall_check(void)
-{
-	int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout);
-
-	/*
-	 * Limit check must be consistent with the Kconfig limits
-	 * for CONFIG_RCU_CPU_STALL_TIMEOUT.
-	 */
-	if (till_stall_check < 3) {
-		ACCESS_ONCE(rcu_cpu_stall_timeout) = 3;
-		till_stall_check = 3;
-	} else if (till_stall_check > 300) {
-		ACCESS_ONCE(rcu_cpu_stall_timeout) = 300;
-		till_stall_check = 300;
-	}
-	return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
-}
-
-static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
-{
-	rcu_cpu_stall_suppress = 1;
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block rcu_panic_block = {
-	.notifier_call = rcu_panic,
-};
-
-static int __init check_cpu_stall_init(void)
-{
-	atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
-	return 0;
-}
-early_initcall(check_cpu_stall_init);
-
-#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
diff --git a/trunk/kernel/rcutiny.c b/trunk/kernel/rcutiny.c
index a0714a51b6d7..e7dce58f9c2a 100644
--- a/trunk/kernel/rcutiny.c
+++ b/trunk/kernel/rcutiny.c
@@ -51,10 +51,10 @@ static void __call_rcu(struct rcu_head *head,
 		       void (*func)(struct rcu_head *rcu),
 		       struct rcu_ctrlblk *rcp);
 
-static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
-
 #include "rcutiny_plugin.h"
 
+static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
+
 /* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */
 static void rcu_idle_enter_common(long long newval)
 {
@@ -193,7 +193,7 @@ EXPORT_SYMBOL(rcu_is_cpu_idle);
  * interrupts don't count, we must be running at the first interrupt
  * level.
  */
-static int rcu_is_cpu_rrupt_from_idle(void)
+int rcu_is_cpu_rrupt_from_idle(void)
 {
 	return rcu_dynticks_nesting <= 1;
 }
@@ -205,7 +205,6 @@ static int rcu_is_cpu_rrupt_from_idle(void)
  */
 static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
 {
-	reset_cpu_stall_ticks(rcp);
 	if (rcp->rcucblist != NULL &&
 	    rcp->donetail != rcp->curtail) {
 		rcp->donetail = rcp->curtail;
@@ -252,7 +251,6 @@ void rcu_bh_qs(int cpu)
  */
 void rcu_check_callbacks(int cpu, int user)
 {
-	check_cpu_stalls();
 	if (user || rcu_is_cpu_rrupt_from_idle())
 		rcu_sched_qs(cpu);
 	else if (!in_softirq())
diff --git a/trunk/kernel/rcutiny_plugin.h b/trunk/kernel/rcutiny_plugin.h
index 8a233002faeb..f85016a2309b 100644
--- a/trunk/kernel/rcutiny_plugin.h
+++ b/trunk/kernel/rcutiny_plugin.h
@@ -33,9 +33,6 @@ struct rcu_ctrlblk {
 	struct rcu_head **donetail;	/* ->next pointer of last "done" CB. */
 	struct rcu_head **curtail;	/* ->next pointer of last CB. */
 	RCU_TRACE(long qlen);		/* Number of pending CBs. */
-	RCU_TRACE(unsigned long gp_start); /* Start time for stalls. */
-	RCU_TRACE(unsigned long ticks_this_gp); /* Statistic for stalls. */
-	RCU_TRACE(unsigned long jiffies_stall); /* Jiffies at next stall. */
 	RCU_TRACE(char *name);		/* Name of RCU type. */
 };
 
@@ -57,51 +54,6 @@ int rcu_scheduler_active __read_mostly;
 EXPORT_SYMBOL_GPL(rcu_scheduler_active);
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
-#ifdef CONFIG_RCU_TRACE
-
-static void check_cpu_stall(struct rcu_ctrlblk *rcp)
-{
-	unsigned long j;
-	unsigned long js;
-
-	if (rcu_cpu_stall_suppress)
-		return;
-	rcp->ticks_this_gp++;
-	j = jiffies;
-	js = rcp->jiffies_stall;
-	if (*rcp->curtail && ULONG_CMP_GE(j, js)) {
-		pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n",
-		       rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting,
-		       jiffies - rcp->gp_start, rcp->qlen);
-		dump_stack();
-	}
-	if (*rcp->curtail && ULONG_CMP_GE(j, js))
-		rcp->jiffies_stall = jiffies +
-			3 * rcu_jiffies_till_stall_check() + 3;
-	else if (ULONG_CMP_GE(j, js))
-		rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
-}
-
-static void check_cpu_stall_preempt(void);
-
-#endif /* #ifdef CONFIG_RCU_TRACE */
-
-static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp)
-{
-#ifdef CONFIG_RCU_TRACE
-	rcp->ticks_this_gp = 0;
-	rcp->gp_start = jiffies;
-	rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
-#endif /* #ifdef CONFIG_RCU_TRACE */
-}
-
-static void check_cpu_stalls(void)
-{
-	RCU_TRACE(check_cpu_stall(&rcu_bh_ctrlblk));
-	RCU_TRACE(check_cpu_stall(&rcu_sched_ctrlblk));
-	RCU_TRACE(check_cpu_stall_preempt());
-}
-
 #ifdef CONFIG_TINY_PREEMPT_RCU
 
 #include <linux/delay.h>
@@ -496,7 +448,6 @@ static void rcu_preempt_start_gp(void)
 		/* Official start of GP. */
 		rcu_preempt_ctrlblk.gpnum++;
 		RCU_TRACE(rcu_preempt_ctrlblk.n_grace_periods++);
-		reset_cpu_stall_ticks(&rcu_preempt_ctrlblk.rcb);
 
 		/* Any blocked RCU readers block new GP. */
 		if (rcu_preempt_blocked_readers_any())
@@ -1103,11 +1054,4 @@ MODULE_AUTHOR("Paul E. McKenney");
 MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation");
 MODULE_LICENSE("GPL");
 
-static void check_cpu_stall_preempt(void)
-{
-#ifdef CONFIG_TINY_PREEMPT_RCU
-	check_cpu_stall(&rcu_preempt_ctrlblk.rcb);
-#endif /* #ifdef CONFIG_TINY_PREEMPT_RCU */
-}
-
 #endif /* #ifdef CONFIG_RCU_TRACE */
diff --git a/trunk/kernel/rcutorture.c b/trunk/kernel/rcutorture.c
index e1f3a8c96724..31dea01c85fd 100644
--- a/trunk/kernel/rcutorture.c
+++ b/trunk/kernel/rcutorture.c
@@ -46,7 +46,6 @@
 #include <linux/stat.h>
 #include <linux/srcu.h>
 #include <linux/slab.h>
-#include <linux/trace_clock.h>
 #include <asm/byteorder.h>
 
 MODULE_LICENSE("GPL");
@@ -208,20 +207,6 @@ MODULE_PARM_DESC(rcutorture_runnable, "Start rcutorture at boot");
 #define rcu_can_boost() 0
 #endif /* #else #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */
 
-#ifdef CONFIG_RCU_TRACE
-static u64 notrace rcu_trace_clock_local(void)
-{
-	u64 ts = trace_clock_local();
-	unsigned long __maybe_unused ts_rem = do_div(ts, NSEC_PER_USEC);
-	return ts;
-}
-#else /* #ifdef CONFIG_RCU_TRACE */
-static u64 notrace rcu_trace_clock_local(void)
-{
-	return 0ULL;
-}
-#endif /* #else #ifdef CONFIG_RCU_TRACE */
-
 static unsigned long shutdown_time;	/* jiffies to system shutdown. */
 static unsigned long boost_starttime;	/* jiffies of next boost test start. */
 DEFINE_MUTEX(boost_mutex);		/* protect setting boost_starttime */
@@ -860,7 +845,7 @@ static int rcu_torture_boost(void *arg)
 		/* Wait for the next test interval. */
 		oldstarttime = boost_starttime;
 		while (ULONG_CMP_LT(jiffies, oldstarttime)) {
-			schedule_timeout_interruptible(oldstarttime - jiffies);
+			schedule_timeout_uninterruptible(1);
 			rcu_stutter_wait("rcu_torture_boost");
 			if (kthread_should_stop() ||
 			    fullstop != FULLSTOP_DONTSTOP)
@@ -1043,6 +1028,7 @@ void rcutorture_trace_dump(void)
 		return;
 	if (atomic_xchg(&beenhere, 1) != 0)
 		return;
+	do_trace_rcu_torture_read(cur_ops->name, (struct rcu_head *)~0UL);
 	ftrace_dump(DUMP_ALL);
 }
 
@@ -1056,16 +1042,13 @@ static void rcu_torture_timer(unsigned long unused)
 {
 	int idx;
 	int completed;
-	int completed_end;
 	static DEFINE_RCU_RANDOM(rand);
 	static DEFINE_SPINLOCK(rand_lock);
 	struct rcu_torture *p;
 	int pipe_count;
-	unsigned long long ts;
 
 	idx = cur_ops->readlock();
 	completed = cur_ops->completed();
-	ts = rcu_trace_clock_local();
 	p = rcu_dereference_check(rcu_torture_current,
 				  rcu_read_lock_bh_held() ||
 				  rcu_read_lock_sched_held() ||
@@ -1075,6 +1058,7 @@ static void rcu_torture_timer(unsigned long unused)
 		cur_ops->readunlock(idx);
 		return;
 	}
+	do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
 	if (p->rtort_mbtest == 0)
 		atomic_inc(&n_rcu_torture_mberror);
 	spin_lock(&rand_lock);
@@ -1087,14 +1071,10 @@ static void rcu_torture_timer(unsigned long unused)
 		/* Should not happen, but... */
 		pipe_count = RCU_TORTURE_PIPE_LEN;
 	}
-	completed_end = cur_ops->completed();
-	if (pipe_count > 1) {
-		do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, ts,
-					  completed, completed_end);
+	if (pipe_count > 1)
 		rcutorture_trace_dump();
-	}
 	__this_cpu_inc(rcu_torture_count[pipe_count]);
-	completed = completed_end - completed;
+	completed = cur_ops->completed() - completed;
 	if (completed > RCU_TORTURE_PIPE_LEN) {
 		/* Should not happen, but... */
 		completed = RCU_TORTURE_PIPE_LEN;
@@ -1114,13 +1094,11 @@ static int
 rcu_torture_reader(void *arg)
 {
 	int completed;
-	int completed_end;
 	int idx;
 	DEFINE_RCU_RANDOM(rand);
 	struct rcu_torture *p;
 	int pipe_count;
 	struct timer_list t;
-	unsigned long long ts;
 
 	VERBOSE_PRINTK_STRING("rcu_torture_reader task started");
 	set_user_nice(current, 19);
@@ -1134,7 +1112,6 @@ rcu_torture_reader(void *arg)
 		}
 		idx = cur_ops->readlock();
 		completed = cur_ops->completed();
-		ts = rcu_trace_clock_local();
 		p = rcu_dereference_check(rcu_torture_current,
 					  rcu_read_lock_bh_held() ||
 					  rcu_read_lock_sched_held() ||
@@ -1145,6 +1122,7 @@ rcu_torture_reader(void *arg)
 			schedule_timeout_interruptible(HZ);
 			continue;
 		}
+		do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
 		if (p->rtort_mbtest == 0)
 			atomic_inc(&n_rcu_torture_mberror);
 		cur_ops->read_delay(&rand);
@@ -1154,14 +1132,10 @@ rcu_torture_reader(void *arg)
 			/* Should not happen, but... */
 			pipe_count = RCU_TORTURE_PIPE_LEN;
 		}
-		completed_end = cur_ops->completed();
-		if (pipe_count > 1) {
-			do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu,
-						  ts, completed, completed_end);
+		if (pipe_count > 1)
 			rcutorture_trace_dump();
-		}
 		__this_cpu_inc(rcu_torture_count[pipe_count]);
-		completed = completed_end - completed;
+		completed = cur_ops->completed() - completed;
 		if (completed > RCU_TORTURE_PIPE_LEN) {
 			/* Should not happen, but... */
 			completed = RCU_TORTURE_PIPE_LEN;
@@ -1327,35 +1301,19 @@ static void rcu_torture_shuffle_tasks(void)
 				set_cpus_allowed_ptr(reader_tasks[i],
 						     shuffle_tmp_mask);
 	}
+
 	if (fakewriter_tasks) {
 		for (i = 0; i < nfakewriters; i++)
 			if (fakewriter_tasks[i])
 				set_cpus_allowed_ptr(fakewriter_tasks[i],
 						     shuffle_tmp_mask);
 	}
+
 	if (writer_task)
 		set_cpus_allowed_ptr(writer_task, shuffle_tmp_mask);
+
 	if (stats_task)
 		set_cpus_allowed_ptr(stats_task, shuffle_tmp_mask);
-	if (stutter_task)
-		set_cpus_allowed_ptr(stutter_task, shuffle_tmp_mask);
-	if (fqs_task)
-		set_cpus_allowed_ptr(fqs_task, shuffle_tmp_mask);
-	if (shutdown_task)
-		set_cpus_allowed_ptr(shutdown_task, shuffle_tmp_mask);
-#ifdef CONFIG_HOTPLUG_CPU
-	if (onoff_task)
-		set_cpus_allowed_ptr(onoff_task, shuffle_tmp_mask);
-#endif /* #ifdef CONFIG_HOTPLUG_CPU */
-	if (stall_task)
-		set_cpus_allowed_ptr(stall_task, shuffle_tmp_mask);
-	if (barrier_cbs_tasks)
-		for (i = 0; i < n_barrier_cbs; i++)
-			if (barrier_cbs_tasks[i])
-				set_cpus_allowed_ptr(barrier_cbs_tasks[i],
-						     shuffle_tmp_mask);
-	if (barrier_task)
-		set_cpus_allowed_ptr(barrier_task, shuffle_tmp_mask);
 
 	if (rcu_idle_cpu == -1)
 		rcu_idle_cpu = num_online_cpus() - 1;
@@ -1791,7 +1749,7 @@ static int rcu_torture_barrier_init(void)
 	barrier_cbs_wq =
 		kzalloc(n_barrier_cbs * sizeof(barrier_cbs_wq[0]),
 			GFP_KERNEL);
-	if (barrier_cbs_tasks == NULL || !barrier_cbs_wq)
+	if (barrier_cbs_tasks == NULL || barrier_cbs_wq == 0)
 		return -ENOMEM;
 	for (i = 0; i < n_barrier_cbs; i++) {
 		init_waitqueue_head(&barrier_cbs_wq[i]);
diff --git a/trunk/kernel/rcutree.c b/trunk/kernel/rcutree.c
index 5b8ad827fd86..e441b77b614e 100644
--- a/trunk/kernel/rcutree.c
+++ b/trunk/kernel/rcutree.c
@@ -105,7 +105,7 @@ int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
  * The rcu_scheduler_active variable transitions from zero to one just
  * before the first task is spawned.  So when this variable is zero, RCU
  * can assume that there is but one task, allowing RCU to (for example)
- * optimize synchronize_sched() to a simple barrier().  When this variable
+ * optimized synchronize_sched() to a simple barrier().  When this variable
  * is one, RCU must actually do all the hard work required to detect real
  * grace periods.  This variable is also used to suppress boot-time false
  * positives from lockdep-RCU error checking.
@@ -217,6 +217,12 @@ module_param(blimit, long, 0444);
 module_param(qhimark, long, 0444);
 module_param(qlowmark, long, 0444);
 
+int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
+int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
+
+module_param(rcu_cpu_stall_suppress, int, 0644);
+module_param(rcu_cpu_stall_timeout, int, 0644);
+
 static ulong jiffies_till_first_fqs = RCU_JIFFIES_TILL_FORCE_QS;
 static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS;
 
@@ -299,27 +305,17 @@ cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
 }
 
 /*
- * Does the current CPU require a not-yet-started grace period?
- * The caller must have disabled interrupts to prevent races with
- * normal callback registry.
+ * Does the current CPU require a yet-as-unscheduled grace period?
  */
 static int
 cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
 {
-	int i;
+	struct rcu_head **ntp;
 
-	if (rcu_gp_in_progress(rsp))
-		return 0;  /* No, a grace period is already in progress. */
-	if (!rdp->nxttail[RCU_NEXT_TAIL])
-		return 0;  /* No, this is a no-CBs (or offline) CPU. */
-	if (*rdp->nxttail[RCU_NEXT_READY_TAIL])
-		return 1;  /* Yes, this CPU has newly registered callbacks. */
-	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
-		if (rdp->nxttail[i - 1] != rdp->nxttail[i] &&
-		    ULONG_CMP_LT(ACCESS_ONCE(rsp->completed),
-				 rdp->nxtcompleted[i]))
-			return 1;  /* Yes, CBs for future grace period. */
-	return 0; /* No grace period needed. */
+	ntp = rdp->nxttail[RCU_DONE_TAIL +
+			   (ACCESS_ONCE(rsp->completed) != rdp->completed)];
+	return rdp->nxttail[RCU_DONE_TAIL] && ntp && *ntp &&
+	       !rcu_gp_in_progress(rsp);
 }
 
 /*
@@ -340,7 +336,7 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
 static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
 				bool user)
 {
-	trace_rcu_dyntick("Start", oldval, rdtp->dynticks_nesting);
+	trace_rcu_dyntick("Start", oldval, 0);
 	if (!user && !is_idle_task(current)) {
 		struct task_struct *idle = idle_task(smp_processor_id());
 
@@ -731,7 +727,7 @@ EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
  * interrupt from idle, return true.  The caller must have at least
  * disabled preemption.
  */
-static int rcu_is_cpu_rrupt_from_idle(void)
+int rcu_is_cpu_rrupt_from_idle(void)
 {
 	return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1;
 }
@@ -797,10 +793,28 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 	return 0;
 }
 
+static int jiffies_till_stall_check(void)
+{
+	int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout);
+
+	/*
+	 * Limit check must be consistent with the Kconfig limits
+	 * for CONFIG_RCU_CPU_STALL_TIMEOUT.
+	 */
+	if (till_stall_check < 3) {
+		ACCESS_ONCE(rcu_cpu_stall_timeout) = 3;
+		till_stall_check = 3;
+	} else if (till_stall_check > 300) {
+		ACCESS_ONCE(rcu_cpu_stall_timeout) = 300;
+		till_stall_check = 300;
+	}
+	return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
+}
+
 static void record_gp_stall_check_time(struct rcu_state *rsp)
 {
 	rsp->gp_start = jiffies;
-	rsp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
+	rsp->jiffies_stall = jiffies + jiffies_till_stall_check();
 }
 
 /*
@@ -843,7 +857,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
 		raw_spin_unlock_irqrestore(&rnp->lock, flags);
 		return;
 	}
-	rsp->jiffies_stall = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
+	rsp->jiffies_stall = jiffies + 3 * jiffies_till_stall_check() + 3;
 	raw_spin_unlock_irqrestore(&rnp->lock, flags);
 
 	/*
@@ -921,7 +935,7 @@ static void print_cpu_stall(struct rcu_state *rsp)
 	raw_spin_lock_irqsave(&rnp->lock, flags);
 	if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
 		rsp->jiffies_stall = jiffies +
-				     3 * rcu_jiffies_till_stall_check() + 3;
+				     3 * jiffies_till_stall_check() + 3;
 	raw_spin_unlock_irqrestore(&rnp->lock, flags);
 
 	set_need_resched();  /* kick ourselves to get things going. */
@@ -952,6 +966,12 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
 	}
 }
 
+static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
+{
+	rcu_cpu_stall_suppress = 1;
+	return NOTIFY_DONE;
+}
+
 /**
  * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
  *
@@ -969,6 +989,15 @@ void rcu_cpu_stall_reset(void)
 		rsp->jiffies_stall = jiffies + ULONG_MAX / 2;
 }
 
+static struct notifier_block rcu_panic_block = {
+	.notifier_call = rcu_panic,
+};
+
+static void __init check_cpu_stall_init(void)
+{
+	atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
+}
+
 /*
  * Update CPU-local rcu_data state to record the newly noticed grace period.
  * This is used both when we started the grace period and when we notice
@@ -1041,145 +1070,6 @@ static void init_callback_list(struct rcu_data *rdp)
 	init_nocb_callback_list(rdp);
 }
 
-/*
- * Determine the value that ->completed will have at the end of the
- * next subsequent grace period.  This is used to tag callbacks so that
- * a CPU can invoke callbacks in a timely fashion even if that CPU has
- * been dyntick-idle for an extended period with callbacks under the
- * influence of RCU_FAST_NO_HZ.
- *
- * The caller must hold rnp->lock with interrupts disabled.
- */
-static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
-				       struct rcu_node *rnp)
-{
-	/*
-	 * If RCU is idle, we just wait for the next grace period.
-	 * But we can only be sure that RCU is idle if we are looking
-	 * at the root rcu_node structure -- otherwise, a new grace
-	 * period might have started, but just not yet gotten around
-	 * to initializing the current non-root rcu_node structure.
-	 */
-	if (rcu_get_root(rsp) == rnp && rnp->gpnum == rnp->completed)
-		return rnp->completed + 1;
-
-	/*
-	 * Otherwise, wait for a possible partial grace period and
-	 * then the subsequent full grace period.
-	 */
-	return rnp->completed + 2;
-}
-
-/*
- * If there is room, assign a ->completed number to any callbacks on
- * this CPU that have not already been assigned.  Also accelerate any
- * callbacks that were previously assigned a ->completed number that has
- * since proven to be too conservative, which can happen if callbacks get
- * assigned a ->completed number while RCU is idle, but with reference to
- * a non-root rcu_node structure.  This function is idempotent, so it does
- * not hurt to call it repeatedly.
- *
- * The caller must hold rnp->lock with interrupts disabled.
- */
-static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
-			       struct rcu_data *rdp)
-{
-	unsigned long c;
-	int i;
-
-	/* If the CPU has no callbacks, nothing to do. */
-	if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
-		return;
-
-	/*
-	 * Starting from the sublist containing the callbacks most
-	 * recently assigned a ->completed number and working down, find the
-	 * first sublist that is not assignable to an upcoming grace period.
-	 * Such a sublist has something in it (first two tests) and has
-	 * a ->completed number assigned that will complete sooner than
-	 * the ->completed number for newly arrived callbacks (last test).
-	 *
-	 * The key point is that any later sublist can be assigned the
-	 * same ->completed number as the newly arrived callbacks, which
-	 * means that the callbacks in any of these later sublist can be
-	 * grouped into a single sublist, whether or not they have already
-	 * been assigned a ->completed number.
-	 */
-	c = rcu_cbs_completed(rsp, rnp);
-	for (i = RCU_NEXT_TAIL - 1; i > RCU_DONE_TAIL; i--)
-		if (rdp->nxttail[i] != rdp->nxttail[i - 1] &&
-		    !ULONG_CMP_GE(rdp->nxtcompleted[i], c))
-			break;
-
-	/*
-	 * If there are no sublist for unassigned callbacks, leave.
-	 * At the same time, advance "i" one sublist, so that "i" will
-	 * index into the sublist where all the remaining callbacks should
-	 * be grouped into.
-	 */
-	if (++i >= RCU_NEXT_TAIL)
-		return;
-
-	/*
-	 * Assign all subsequent callbacks' ->completed number to the next
-	 * full grace period and group them all in the sublist initially
-	 * indexed by "i".
-	 */
-	for (; i <= RCU_NEXT_TAIL; i++) {
-		rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL];
-		rdp->nxtcompleted[i] = c;
-	}
-
-	/* Trace depending on how much we were able to accelerate. */
-	if (!*rdp->nxttail[RCU_WAIT_TAIL])
-		trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccWaitCB");
-	else
-		trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccReadyCB");
-}
-
-/*
- * Move any callbacks whose grace period has completed to the
- * RCU_DONE_TAIL sublist, then compact the remaining sublists and
- * assign ->completed numbers to any callbacks in the RCU_NEXT_TAIL
- * sublist.  This function is idempotent, so it does not hurt to
- * invoke it repeatedly.  As long as it is not invoked -too- often...
- *
- * The caller must hold rnp->lock with interrupts disabled.
- */
-static void rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
-			    struct rcu_data *rdp)
-{
-	int i, j;
-
-	/* If the CPU has no callbacks, nothing to do. */
-	if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
-		return;
-
-	/*
-	 * Find all callbacks whose ->completed numbers indicate that they
-	 * are ready to invoke, and put them into the RCU_DONE_TAIL sublist.
-	 */
-	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
-		if (ULONG_CMP_LT(rnp->completed, rdp->nxtcompleted[i]))
-			break;
-		rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[i];
-	}
-	/* Clean up any sublist tail pointers that were misordered above. */
-	for (j = RCU_WAIT_TAIL; j < i; j++)
-		rdp->nxttail[j] = rdp->nxttail[RCU_DONE_TAIL];
-
-	/* Copy down callbacks to fill in empty sublists. */
-	for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
-		if (rdp->nxttail[j] == rdp->nxttail[RCU_NEXT_TAIL])
-			break;
-		rdp->nxttail[j] = rdp->nxttail[i];
-		rdp->nxtcompleted[j] = rdp->nxtcompleted[i];
-	}
-
-	/* Classify any remaining callbacks. */
-	rcu_accelerate_cbs(rsp, rnp, rdp);
-}
-
 /*
  * Advance this CPU's callbacks, but only if the current grace period
  * has ended.  This may be called only from the CPU to whom the rdp
@@ -1190,15 +1080,12 @@ static void
 __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
 {
 	/* Did another grace period end? */
-	if (rdp->completed == rnp->completed) {
+	if (rdp->completed != rnp->completed) {
 
-		/* No, so just accelerate recent callbacks. */
-		rcu_accelerate_cbs(rsp, rnp, rdp);
-
-	} else {
-
-		/* Advance callbacks. */
-		rcu_advance_cbs(rsp, rnp, rdp);
+		/* Advance callbacks.  No harm if list empty. */
+		rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL];
+		rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL];
+		rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
 
 		/* Remember that we saw this grace-period completion. */
 		rdp->completed = rnp->completed;
@@ -1505,10 +1392,17 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
 	/*
 	 * Because there is no grace period in progress right now,
 	 * any callbacks we have up to this point will be satisfied
-	 * by the next grace period.  So this is a good place to
-	 * assign a grace period number to recently posted callbacks.
+	 * by the next grace period.  So promote all callbacks to be
+	 * handled after the end of the next grace period.  If the
+	 * CPU is not yet aware of the end of the previous grace period,
+	 * we need to allow for the callback advancement that will
+	 * occur when it does become aware.  Deadlock prevents us from
+	 * making it aware at this point: We cannot acquire a leaf
+	 * rcu_node ->lock while holding the root rcu_node ->lock.
 	 */
-	rcu_accelerate_cbs(rsp, rnp, rdp);
+	rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
+	if (rdp->completed == rsp->completed)
+		rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
 
 	rsp->gp_flags = RCU_GP_FLAG_INIT;
 	raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */
@@ -1633,7 +1527,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
 		 * This GP can't end until cpu checks in, so all of our
 		 * callbacks can be processed during the next GP.
 		 */
-		rcu_accelerate_cbs(rsp, rnp, rdp);
+		rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
 
 		rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */
 	}
@@ -1885,7 +1779,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
 	long bl, count, count_lazy;
 	int i;
 
-	/* If no callbacks are ready, just return. */
+	/* If no callbacks are ready, just return.*/
 	if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
 		trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0);
 		trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist),
@@ -2114,19 +2008,19 @@ __rcu_process_callbacks(struct rcu_state *rsp)
 
 	WARN_ON_ONCE(rdp->beenonline == 0);
 
-	/* Handle the end of a grace period that some other CPU ended.  */
+	/*
+	 * Advance callbacks in response to end of earlier grace
+	 * period that some other CPU ended.
+	 */
 	rcu_process_gp_end(rsp, rdp);
 
 	/* Update RCU state based on any recent quiescent states. */
 	rcu_check_quiescent_state(rsp, rdp);
 
 	/* Does this CPU require a not-yet-started grace period? */
-	local_irq_save(flags);
 	if (cpu_needs_another_gp(rsp, rdp)) {
-		raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */
+		raw_spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags);
 		rcu_start_gp(rsp, flags);  /* releases above lock */
-	} else {
-		local_irq_restore(flags);
 	}
 
 	/* If there are callbacks ready, invoke them. */
@@ -2825,6 +2719,9 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
 	rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
 	WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
 	WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
+#ifdef CONFIG_RCU_USER_QS
+	WARN_ON_ONCE(rdp->dynticks->in_user);
+#endif
 	rdp->cpu = cpu;
 	rdp->rsp = rsp;
 	rcu_boot_init_nocb_percpu_data(rdp);
@@ -3041,10 +2938,6 @@ static void __init rcu_init_one(struct rcu_state *rsp,
 
 	BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf));  /* Fix buf[] init! */
 
-	/* Silence gcc 4.8 warning about array index out of range. */
-	if (rcu_num_lvls > RCU_NUM_LVLS)
-		panic("rcu_init_one: rcu_num_lvls overflow");
-
 	/* Initialize the level-tracking arrays. */
 
 	for (i = 0; i < rcu_num_lvls; i++)
@@ -3181,6 +3074,7 @@ void __init rcu_init(void)
 	cpu_notifier(rcu_cpu_notify, 0);
 	for_each_online_cpu(cpu)
 		rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
+	check_cpu_stall_init();
 }
 
 #include "rcutree_plugin.h"
diff --git a/trunk/kernel/rcutree.h b/trunk/kernel/rcutree.h
index c896b5045d9d..4b69291b093d 100644
--- a/trunk/kernel/rcutree.h
+++ b/trunk/kernel/rcutree.h
@@ -102,6 +102,10 @@ struct rcu_dynticks {
 				    /* idle-period nonlazy_posted snapshot. */
 	int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
 #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
+#ifdef CONFIG_RCU_USER_QS
+	bool ignore_user_qs;	    /* Treat userspace as extended QS or not */
+	bool in_user;		    /* Is the CPU in userland from RCU POV? */
+#endif
 };
 
 /* RCU's kthread states for tracing. */
@@ -278,8 +282,6 @@ struct rcu_data {
 	 */
 	struct rcu_head *nxtlist;
 	struct rcu_head **nxttail[RCU_NEXT_SIZE];
-	unsigned long	nxtcompleted[RCU_NEXT_SIZE];
-					/* grace periods for sublists. */
 	long		qlen_lazy;	/* # of lazy queued callbacks */
 	long		qlen;		/* # of queued callbacks, incl lazy */
 	long		qlen_last_fqs_check;
@@ -341,6 +343,11 @@ struct rcu_data {
 
 #define RCU_JIFFIES_TILL_FORCE_QS	 3	/* for rsp->jiffies_force_qs */
 
+#ifdef CONFIG_PROVE_RCU
+#define RCU_STALL_DELAY_DELTA	       (5 * HZ)
+#else
+#define RCU_STALL_DELAY_DELTA	       0
+#endif
 #define RCU_STALL_RAT_DELAY		2	/* Allow other CPUs time */
 						/*  to take at least one */
 						/*  scheduling clock irq */
diff --git a/trunk/kernel/rtmutex-debug.c b/trunk/kernel/rtmutex-debug.c
index 13b243a323fa..16502d3a71c8 100644
--- a/trunk/kernel/rtmutex-debug.c
+++ b/trunk/kernel/rtmutex-debug.c
@@ -17,7 +17,6 @@
  * See rt.c in preempt-rt for proper credits and further information
  */
 #include <linux/sched.h>
-#include <linux/sched/rt.h>
 #include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/spinlock.h>
diff --git a/trunk/kernel/rtmutex-tester.c b/trunk/kernel/rtmutex-tester.c
index 7890b10084a7..98ec49475460 100644
--- a/trunk/kernel/rtmutex-tester.c
+++ b/trunk/kernel/rtmutex-tester.c
@@ -10,7 +10,6 @@
 #include <linux/kthread.h>
 #include <linux/export.h>
 #include <linux/sched.h>
-#include <linux/sched/rt.h>
 #include <linux/spinlock.h>
 #include <linux/timer.h>
 #include <linux/freezer.h>
diff --git a/trunk/kernel/rtmutex.c b/trunk/kernel/rtmutex.c
index 1e09308bf2a1..a242e691c993 100644
--- a/trunk/kernel/rtmutex.c
+++ b/trunk/kernel/rtmutex.c
@@ -13,7 +13,6 @@
 #include <linux/spinlock.h>
 #include <linux/export.h>
 #include <linux/sched.h>
-#include <linux/sched/rt.h>
 #include <linux/timer.h>
 
 #include "rtmutex_common.h"
diff --git a/trunk/kernel/sched/core.c b/trunk/kernel/sched/core.c
index 4a88f1d51563..26058d0bebba 100644
--- a/trunk/kernel/sched/core.c
+++ b/trunk/kernel/sched/core.c
@@ -4371,7 +4371,7 @@ bool __sched yield_to(struct task_struct *p, bool preempt)
 	struct task_struct *curr = current;
 	struct rq *rq, *p_rq;
 	unsigned long flags;
-	int yielded = 0;
+	bool yielded = 0;
 
 	local_irq_save(flags);
 	rq = this_rq();
@@ -4667,7 +4667,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
 	 */
 	idle->sched_class = &idle_sched_class;
 	ftrace_graph_init_idle_task(idle, cpu);
-	vtime_init_idle(idle);
 #if defined(CONFIG_SMP)
 	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
 #endif
@@ -7509,25 +7508,6 @@ static int sched_rt_global_constraints(void)
 }
 #endif /* CONFIG_RT_GROUP_SCHED */
 
-int sched_rr_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp,
-		loff_t *ppos)
-{
-	int ret;
-	static DEFINE_MUTEX(mutex);
-
-	mutex_lock(&mutex);
-	ret = proc_dointvec(table, write, buffer, lenp, ppos);
-	/* make sure that internally we keep jiffies */
-	/* also, writing zero resets timeslice to default */
-	if (!ret && write) {
-		sched_rr_timeslice = sched_rr_timeslice <= 0 ?
-			RR_TIMESLICE : msecs_to_jiffies(sched_rr_timeslice);
-	}
-	mutex_unlock(&mutex);
-	return ret;
-}
-
 int sched_rt_handler(struct ctl_table *table, int write,
 		void __user *buffer, size_t *lenp,
 		loff_t *ppos)
diff --git a/trunk/kernel/sched/cpupri.c b/trunk/kernel/sched/cpupri.c
index 1095e878a46f..23aa789c53ee 100644
--- a/trunk/kernel/sched/cpupri.c
+++ b/trunk/kernel/sched/cpupri.c
@@ -28,8 +28,6 @@
  */
 
 #include <linux/gfp.h>
-#include <linux/sched.h>
-#include <linux/sched/rt.h>
 #include "cpupri.h"
 
 /* Convert between a 140 based task->prio, and our 102 based cpupri */
diff --git a/trunk/kernel/sched/cputime.c b/trunk/kernel/sched/cputime.c
index 9857329ed280..293b202fcf79 100644
--- a/trunk/kernel/sched/cputime.c
+++ b/trunk/kernel/sched/cputime.c
@@ -3,7 +3,6 @@
 #include <linux/tsacct_kern.h>
 #include <linux/kernel_stat.h>
 #include <linux/static_key.h>
-#include <linux/context_tracking.h>
 #include "sched.h"
 
 
@@ -164,7 +163,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
 	task_group_account_field(p, index, (__force u64) cputime);
 
 	/* Account for user time used */
-	acct_account_cputime(p);
+	acct_update_integrals(p);
 }
 
 /*
@@ -214,7 +213,7 @@ void __account_system_time(struct task_struct *p, cputime_t cputime,
 	task_group_account_field(p, index, (__force u64) cputime);
 
 	/* Account for system time used */
-	acct_account_cputime(p);
+	acct_update_integrals(p);
 }
 
 /*
@@ -296,7 +295,6 @@ static __always_inline bool steal_account_process_tick(void)
 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 {
 	struct signal_struct *sig = tsk->signal;
-	cputime_t utime, stime;
 	struct task_struct *t;
 
 	times->utime = sig->utime;
@@ -310,15 +308,16 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 
 	t = tsk;
 	do {
-		task_cputime(tsk, &utime, &stime);
-		times->utime += utime;
-		times->stime += stime;
+		times->utime += t->utime;
+		times->stime += t->stime;
 		times->sum_exec_runtime += task_sched_runtime(t);
 	} while_each_thread(tsk, t);
 out:
 	rcu_read_unlock();
 }
 
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 /*
  * Account a tick to a process and cpustat
@@ -383,12 +382,11 @@ static void irqtime_account_idle_ticks(int ticks)
 		irqtime_account_process_tick(current, 0, rq);
 }
 #else /* CONFIG_IRQ_TIME_ACCOUNTING */
-static inline void irqtime_account_idle_ticks(int ticks) {}
-static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,
+static void irqtime_account_idle_ticks(int ticks) {}
+static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
 						struct rq *rq) {}
 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
 
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 /*
  * Account a single tick of cpu time.
  * @p: the process that the cpu time gets accounted to
@@ -399,9 +397,6 @@ void account_process_tick(struct task_struct *p, int user_tick)
 	cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
 	struct rq *rq = this_rq();
 
-	if (vtime_accounting_enabled())
-		return;
-
 	if (sched_clock_irqtime) {
 		irqtime_account_process_tick(p, user_tick, rq);
 		return;
@@ -443,7 +438,8 @@ void account_idle_ticks(unsigned long ticks)
 
 	account_idle_time(jiffies_to_cputime(ticks));
 }
-#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+
+#endif
 
 /*
  * Use precise platform statistics if available:
@@ -465,20 +461,25 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
 	*st = cputime.stime;
 }
 
+void vtime_account_system_irqsafe(struct task_struct *tsk)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	vtime_account_system(tsk);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(vtime_account_system_irqsafe);
+
 #ifndef __ARCH_HAS_VTIME_TASK_SWITCH
 void vtime_task_switch(struct task_struct *prev)
 {
-	if (!vtime_accounting_enabled())
-		return;
-
 	if (is_idle_task(prev))
 		vtime_account_idle(prev);
 	else
 		vtime_account_system(prev);
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	vtime_account_user(prev);
-#endif
 	arch_vtime_task_switch(prev);
 }
 #endif
@@ -492,40 +493,27 @@ void vtime_task_switch(struct task_struct *prev)
  * vtime_account().
  */
 #ifndef __ARCH_HAS_VTIME_ACCOUNT
-void vtime_account_irq_enter(struct task_struct *tsk)
+void vtime_account(struct task_struct *tsk)
 {
-	if (!vtime_accounting_enabled())
-		return;
-
-	if (!in_interrupt()) {
-		/*
-		 * If we interrupted user, context_tracking_in_user()
-		 * is 1 because the context tracking don't hook
-		 * on irq entry/exit. This way we know if
-		 * we need to flush user time on kernel entry.
-		 */
-		if (context_tracking_in_user()) {
-			vtime_account_user(tsk);
-			return;
-		}
-
-		if (is_idle_task(tsk)) {
-			vtime_account_idle(tsk);
-			return;
-		}
-	}
-	vtime_account_system(tsk);
+	if (in_interrupt() || !is_idle_task(tsk))
+		vtime_account_system(tsk);
+	else
+		vtime_account_idle(tsk);
 }
-EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
+EXPORT_SYMBOL_GPL(vtime_account);
 #endif /* __ARCH_HAS_VTIME_ACCOUNT */
 
-#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
+#else
+
+#ifndef nsecs_to_cputime
+# define nsecs_to_cputime(__nsecs)	nsecs_to_jiffies(__nsecs)
+#endif
 
-static cputime_t scale_stime(cputime_t stime, cputime_t rtime, cputime_t total)
+static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total)
 {
 	u64 temp = (__force u64) rtime;
 
-	temp *= (__force u64) stime;
+	temp *= (__force u64) utime;
 
 	if (sizeof(cputime_t) == 4)
 		temp = div_u64(temp, (__force u32) total);
@@ -543,10 +531,10 @@ static void cputime_adjust(struct task_cputime *curr,
 			   struct cputime *prev,
 			   cputime_t *ut, cputime_t *st)
 {
-	cputime_t rtime, stime, total;
+	cputime_t rtime, utime, total;
 
-	stime = curr->stime;
-	total = stime + curr->utime;
+	utime = curr->utime;
+	total = utime + curr->stime;
 
 	/*
 	 * Tick based cputime accounting depend on random scheduling
@@ -561,17 +549,17 @@ static void cputime_adjust(struct task_cputime *curr,
 	rtime = nsecs_to_cputime(curr->sum_exec_runtime);
 
 	if (total)
-		stime = scale_stime(stime, rtime, total);
+		utime = scale_utime(utime, rtime, total);
 	else
-		stime = rtime;
+		utime = rtime;
 
 	/*
 	 * If the tick based count grows faster than the scheduler one,
 	 * the result of the scaling may go backward.
 	 * Let's enforce monotonicity.
 	 */
-	prev->stime = max(prev->stime, stime);
-	prev->utime = max(prev->utime, rtime - prev->stime);
+	prev->utime = max(prev->utime, utime);
+	prev->stime = max(prev->stime, rtime - prev->utime);
 
 	*ut = prev->utime;
 	*st = prev->stime;
@@ -580,10 +568,11 @@ static void cputime_adjust(struct task_cputime *curr,
 void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
 	struct task_cputime cputime = {
+		.utime = p->utime,
+		.stime = p->stime,
 		.sum_exec_runtime = p->se.sum_exec_runtime,
 	};
 
-	task_cputime(p, &cputime.utime, &cputime.stime);
 	cputime_adjust(&cputime, &p->prev_cputime, ut, st);
 }
 
@@ -597,221 +586,4 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
 	thread_group_cputime(p, &cputime);
 	cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
 }
-#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
-
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-static unsigned long long vtime_delta(struct task_struct *tsk)
-{
-	unsigned long long clock;
-
-	clock = sched_clock();
-	if (clock < tsk->vtime_snap)
-		return 0;
-
-	return clock - tsk->vtime_snap;
-}
-
-static cputime_t get_vtime_delta(struct task_struct *tsk)
-{
-	unsigned long long delta = vtime_delta(tsk);
-
-	WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING);
-	tsk->vtime_snap += delta;
-
-	/* CHECKME: always safe to convert nsecs to cputime? */
-	return nsecs_to_cputime(delta);
-}
-
-static void __vtime_account_system(struct task_struct *tsk)
-{
-	cputime_t delta_cpu = get_vtime_delta(tsk);
-
-	account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu));
-}
-
-void vtime_account_system(struct task_struct *tsk)
-{
-	if (!vtime_accounting_enabled())
-		return;
-
-	write_seqlock(&tsk->vtime_seqlock);
-	__vtime_account_system(tsk);
-	write_sequnlock(&tsk->vtime_seqlock);
-}
-
-void vtime_account_irq_exit(struct task_struct *tsk)
-{
-	if (!vtime_accounting_enabled())
-		return;
-
-	write_seqlock(&tsk->vtime_seqlock);
-	if (context_tracking_in_user())
-		tsk->vtime_snap_whence = VTIME_USER;
-	__vtime_account_system(tsk);
-	write_sequnlock(&tsk->vtime_seqlock);
-}
-
-void vtime_account_user(struct task_struct *tsk)
-{
-	cputime_t delta_cpu;
-
-	if (!vtime_accounting_enabled())
-		return;
-
-	delta_cpu = get_vtime_delta(tsk);
-
-	write_seqlock(&tsk->vtime_seqlock);
-	tsk->vtime_snap_whence = VTIME_SYS;
-	account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
-	write_sequnlock(&tsk->vtime_seqlock);
-}
-
-void vtime_user_enter(struct task_struct *tsk)
-{
-	if (!vtime_accounting_enabled())
-		return;
-
-	write_seqlock(&tsk->vtime_seqlock);
-	tsk->vtime_snap_whence = VTIME_USER;
-	__vtime_account_system(tsk);
-	write_sequnlock(&tsk->vtime_seqlock);
-}
-
-void vtime_guest_enter(struct task_struct *tsk)
-{
-	write_seqlock(&tsk->vtime_seqlock);
-	__vtime_account_system(tsk);
-	current->flags |= PF_VCPU;
-	write_sequnlock(&tsk->vtime_seqlock);
-}
-
-void vtime_guest_exit(struct task_struct *tsk)
-{
-	write_seqlock(&tsk->vtime_seqlock);
-	__vtime_account_system(tsk);
-	current->flags &= ~PF_VCPU;
-	write_sequnlock(&tsk->vtime_seqlock);
-}
-
-void vtime_account_idle(struct task_struct *tsk)
-{
-	cputime_t delta_cpu = get_vtime_delta(tsk);
-
-	account_idle_time(delta_cpu);
-}
-
-bool vtime_accounting_enabled(void)
-{
-	return context_tracking_active();
-}
-
-void arch_vtime_task_switch(struct task_struct *prev)
-{
-	write_seqlock(&prev->vtime_seqlock);
-	prev->vtime_snap_whence = VTIME_SLEEPING;
-	write_sequnlock(&prev->vtime_seqlock);
-
-	write_seqlock(&current->vtime_seqlock);
-	current->vtime_snap_whence = VTIME_SYS;
-	current->vtime_snap = sched_clock();
-	write_sequnlock(&current->vtime_seqlock);
-}
-
-void vtime_init_idle(struct task_struct *t)
-{
-	unsigned long flags;
-
-	write_seqlock_irqsave(&t->vtime_seqlock, flags);
-	t->vtime_snap_whence = VTIME_SYS;
-	t->vtime_snap = sched_clock();
-	write_sequnlock_irqrestore(&t->vtime_seqlock, flags);
-}
-
-cputime_t task_gtime(struct task_struct *t)
-{
-	unsigned int seq;
-	cputime_t gtime;
-
-	do {
-		seq = read_seqbegin(&t->vtime_seqlock);
-
-		gtime = t->gtime;
-		if (t->flags & PF_VCPU)
-			gtime += vtime_delta(t);
-
-	} while (read_seqretry(&t->vtime_seqlock, seq));
-
-	return gtime;
-}
-
-/*
- * Fetch cputime raw values from fields of task_struct and
- * add up the pending nohz execution time since the last
- * cputime snapshot.
- */
-static void
-fetch_task_cputime(struct task_struct *t,
-		   cputime_t *u_dst, cputime_t *s_dst,
-		   cputime_t *u_src, cputime_t *s_src,
-		   cputime_t *udelta, cputime_t *sdelta)
-{
-	unsigned int seq;
-	unsigned long long delta;
-
-	do {
-		*udelta = 0;
-		*sdelta = 0;
-
-		seq = read_seqbegin(&t->vtime_seqlock);
-
-		if (u_dst)
-			*u_dst = *u_src;
-		if (s_dst)
-			*s_dst = *s_src;
-
-		/* Task is sleeping, nothing to add */
-		if (t->vtime_snap_whence == VTIME_SLEEPING ||
-		    is_idle_task(t))
-			continue;
-
-		delta = vtime_delta(t);
-
-		/*
-		 * Task runs either in user or kernel space, add pending nohz time to
-		 * the right place.
-		 */
-		if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) {
-			*udelta = delta;
-		} else {
-			if (t->vtime_snap_whence == VTIME_SYS)
-				*sdelta = delta;
-		}
-	} while (read_seqretry(&t->vtime_seqlock, seq));
-}
-
-
-void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
-{
-	cputime_t udelta, sdelta;
-
-	fetch_task_cputime(t, utime, stime, &t->utime,
-			   &t->stime, &udelta, &sdelta);
-	if (utime)
-		*utime += udelta;
-	if (stime)
-		*stime += sdelta;
-}
-
-void task_cputime_scaled(struct task_struct *t,
-			 cputime_t *utimescaled, cputime_t *stimescaled)
-{
-	cputime_t udelta, sdelta;
-
-	fetch_task_cputime(t, utimescaled, stimescaled,
-			   &t->utimescaled, &t->stimescaled, &udelta, &sdelta);
-	if (utimescaled)
-		*utimescaled += cputime_to_scaled(udelta);
-	if (stimescaled)
-		*stimescaled += cputime_to_scaled(sdelta);
-}
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
+#endif
diff --git a/trunk/kernel/sched/fair.c b/trunk/kernel/sched/fair.c
index 7a33e5986fc5..81fa53643409 100644
--- a/trunk/kernel/sched/fair.c
+++ b/trunk/kernel/sched/fair.c
@@ -1680,7 +1680,9 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 	}
 
 	/* ensure we never gain time by being placed backwards. */
-	se->vruntime = max_vruntime(se->vruntime, vruntime);
+	vruntime = max_vruntime(se->vruntime, vruntime);
+
+	se->vruntime = vruntime;
 }
 
 static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
@@ -3252,18 +3254,25 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
  */
 static int select_idle_sibling(struct task_struct *p, int target)
 {
+	int cpu = smp_processor_id();
+	int prev_cpu = task_cpu(p);
 	struct sched_domain *sd;
 	struct sched_group *sg;
-	int i = task_cpu(p);
+	int i;
 
-	if (idle_cpu(target))
-		return target;
+	/*
+	 * If the task is going to be woken-up on this cpu and if it is
+	 * already idle, then it is the right target.
+	 */
+	if (target == cpu && idle_cpu(cpu))
+		return cpu;
 
 	/*
-	 * If the prevous cpu is cache affine and idle, don't be stupid.
+	 * If the task is going to be woken-up on the cpu where it previously
+	 * ran and if it is currently idle, then it the right target.
 	 */
-	if (i != target && cpus_share_cache(i, target) && idle_cpu(i))
-		return i;
+	if (target == prev_cpu && idle_cpu(prev_cpu))
+		return prev_cpu;
 
 	/*
 	 * Otherwise, iterate the domains and find an elegible idle cpu.
@@ -3277,7 +3286,7 @@ static int select_idle_sibling(struct task_struct *p, int target)
 				goto next;
 
 			for_each_cpu(i, sched_group_cpus(sg)) {
-				if (i == target || !idle_cpu(i))
+				if (!idle_cpu(i))
 					goto next;
 			}
 
@@ -6092,7 +6101,7 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task
 	 * idle runqueue:
 	 */
 	if (rq->cfs.load.weight)
-		rr_interval = NS_TO_JIFFIES(sched_slice(cfs_rq_of(se), se));
+		rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se));
 
 	return rr_interval;
 }
diff --git a/trunk/kernel/sched/rt.c b/trunk/kernel/sched/rt.c
index 127a2c4cf4ab..4f02b2847357 100644
--- a/trunk/kernel/sched/rt.c
+++ b/trunk/kernel/sched/rt.c
@@ -7,8 +7,6 @@
 
 #include <linux/slab.h>
 
-int sched_rr_timeslice = RR_TIMESLICE;
-
 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
 
 struct rt_bandwidth def_rt_bandwidth;
@@ -927,8 +925,8 @@ static void update_curr_rt(struct rq *rq)
 		return;
 
 	delta_exec = rq->clock_task - curr->se.exec_start;
-	if (unlikely((s64)delta_exec <= 0))
-		return;
+	if (unlikely((s64)delta_exec < 0))
+		delta_exec = 0;
 
 	schedstat_set(curr->se.statistics.exec_max,
 		      max(curr->se.statistics.exec_max, delta_exec));
@@ -1429,7 +1427,8 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
 {
 	if (!task_running(rq, p) &&
-	    cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
+	    (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) &&
+	    (p->nr_cpus_allowed > 1))
 		return 1;
 	return 0;
 }
@@ -1890,11 +1889,8 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
 	 * we may need to handle the pulling of RT tasks
 	 * now.
 	 */
-	if (!p->on_rq || rq->rt.rt_nr_running)
-		return;
-
-	if (pull_rt_task(rq))
-		resched_task(rq->curr);
+	if (p->on_rq && !rq->rt.rt_nr_running)
+		pull_rt_task(rq);
 }
 
 void init_sched_rt_class(void)
@@ -1989,11 +1985,7 @@ static void watchdog(struct rq *rq, struct task_struct *p)
 	if (soft != RLIM_INFINITY) {
 		unsigned long next;
 
-		if (p->rt.watchdog_stamp != jiffies) {
-			p->rt.timeout++;
-			p->rt.watchdog_stamp = jiffies;
-		}
-
+		p->rt.timeout++;
 		next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
 		if (p->rt.timeout > next)
 			p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
@@ -2018,7 +2010,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
 	if (--p->rt.time_slice)
 		return;
 
-	p->rt.time_slice = sched_rr_timeslice;
+	p->rt.time_slice = RR_TIMESLICE;
 
 	/*
 	 * Requeue to the end of queue if we (and all of our ancestors) are the
@@ -2049,7 +2041,7 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
 	 * Time slice is 0 for SCHED_FIFO tasks
 	 */
 	if (task->policy == SCHED_RR)
-		return sched_rr_timeslice;
+		return RR_TIMESLICE;
 	else
 		return 0;
 }
diff --git a/trunk/kernel/sched/sched.h b/trunk/kernel/sched/sched.h
index cc03cfdf469f..fc886441436a 100644
--- a/trunk/kernel/sched/sched.h
+++ b/trunk/kernel/sched/sched.h
@@ -1,7 +1,5 @@
 
 #include <linux/sched.h>
-#include <linux/sched/sysctl.h>
-#include <linux/sched/rt.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/stop_machine.h>
diff --git a/trunk/kernel/signal.c b/trunk/kernel/signal.c
index 7f82adbad480..3d09cf6cde75 100644
--- a/trunk/kernel/signal.c
+++ b/trunk/kernel/signal.c
@@ -1632,7 +1632,6 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
 	unsigned long flags;
 	struct sighand_struct *psig;
 	bool autoreap = false;
-	cputime_t utime, stime;
 
 	BUG_ON(sig == -1);
 
@@ -1670,9 +1669,8 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
 				       task_uid(tsk));
 	rcu_read_unlock();
 
-	task_cputime(tsk, &utime, &stime);
-	info.si_utime = cputime_to_clock_t(utime + tsk->signal->utime);
-	info.si_stime = cputime_to_clock_t(stime + tsk->signal->stime);
+	info.si_utime = cputime_to_clock_t(tsk->utime + tsk->signal->utime);
+	info.si_stime = cputime_to_clock_t(tsk->stime + tsk->signal->stime);
 
 	info.si_status = tsk->exit_code & 0x7f;
 	if (tsk->exit_code & 0x80)
@@ -1736,7 +1734,6 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
 	unsigned long flags;
 	struct task_struct *parent;
 	struct sighand_struct *sighand;
-	cputime_t utime, stime;
 
 	if (for_ptracer) {
 		parent = tsk->parent;
@@ -1755,9 +1752,8 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
 	info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk));
 	rcu_read_unlock();
 
-	task_cputime(tsk, &utime, &stime);
-	info.si_utime = cputime_to_clock_t(utime);
-	info.si_stime = cputime_to_clock_t(stime);
+	info.si_utime = cputime_to_clock_t(tsk->utime);
+	info.si_stime = cputime_to_clock_t(tsk->stime);
 
  	info.si_code = why;
  	switch (why) {
diff --git a/trunk/kernel/smpboot.c b/trunk/kernel/smpboot.c
index d4abac261779..d6c5fc054242 100644
--- a/trunk/kernel/smpboot.c
+++ b/trunk/kernel/smpboot.c
@@ -183,10 +183,9 @@ __smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
 		kfree(td);
 		return PTR_ERR(tsk);
 	}
+
 	get_task_struct(tsk);
 	*per_cpu_ptr(ht->store, cpu) = tsk;
-	if (ht->create)
-		ht->create(cpu);
 	return 0;
 }
 
@@ -226,7 +225,7 @@ static void smpboot_park_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
 {
 	struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu);
 
-	if (tsk && !ht->selfparking)
+	if (tsk)
 		kthread_park(tsk);
 }
 
diff --git a/trunk/kernel/softirq.c b/trunk/kernel/softirq.c
index f5cc25f147a6..ed567babe789 100644
--- a/trunk/kernel/softirq.c
+++ b/trunk/kernel/softirq.c
@@ -221,7 +221,7 @@ asmlinkage void __do_softirq(void)
 	current->flags &= ~PF_MEMALLOC;
 
 	pending = local_softirq_pending();
-	account_irq_enter_time(current);
+	vtime_account_irq_enter(current);
 
 	__local_bh_disable((unsigned long)__builtin_return_address(0),
 				SOFTIRQ_OFFSET);
@@ -272,7 +272,7 @@ asmlinkage void __do_softirq(void)
 
 	lockdep_softirq_exit();
 
-	account_irq_exit_time(current);
+	vtime_account_irq_exit(current);
 	__local_bh_enable(SOFTIRQ_OFFSET);
 	tsk_restore_flags(current, old_flags, PF_MEMALLOC);
 }
@@ -341,7 +341,7 @@ static inline void invoke_softirq(void)
  */
 void irq_exit(void)
 {
-	account_irq_exit_time(current);
+	vtime_account_irq_exit(current);
 	trace_hardirq_exit();
 	sub_preempt_count(IRQ_EXIT_OFFSET);
 	if (!in_interrupt() && local_softirq_pending())
diff --git a/trunk/kernel/srcu.c b/trunk/kernel/srcu.c
index 01d5ccb8bfe3..2b859828cdc3 100644
--- a/trunk/kernel/srcu.c
+++ b/trunk/kernel/srcu.c
@@ -282,8 +282,12 @@ static int srcu_readers_active(struct srcu_struct *sp)
  */
 void cleanup_srcu_struct(struct srcu_struct *sp)
 {
-	if (WARN_ON(srcu_readers_active(sp)))
-		return; /* Leakage unless caller handles error. */
+	int sum;
+
+	sum = srcu_readers_active(sp);
+	WARN_ON(sum);  /* Leakage unless caller handles error. */
+	if (sum != 0)
+		return;
 	free_percpu(sp->per_cpu_ref);
 	sp->per_cpu_ref = NULL;
 }
@@ -298,8 +302,9 @@ int __srcu_read_lock(struct srcu_struct *sp)
 {
 	int idx;
 
-	idx = ACCESS_ONCE(sp->completed) & 0x1;
 	preempt_disable();
+	idx = rcu_dereference_index_check(sp->completed,
+					  rcu_read_lock_sched_held()) & 0x1;
 	ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) += 1;
 	smp_mb(); /* B */  /* Avoid leaking the critical section. */
 	ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->seq[idx]) += 1;
@@ -316,8 +321,10 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock);
  */
 void __srcu_read_unlock(struct srcu_struct *sp, int idx)
 {
+	preempt_disable();
 	smp_mb(); /* C */  /* Avoid leaking the critical section. */
-	this_cpu_dec(sp->per_cpu_ref->c[idx]);
+	ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) -= 1;
+	preempt_enable();
 }
 EXPORT_SYMBOL_GPL(__srcu_read_unlock);
 
@@ -416,7 +423,6 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
 			   !lock_is_held(&rcu_sched_lock_map),
 			   "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section");
 
-	might_sleep();
 	init_completion(&rcu.completion);
 
 	head->next = NULL;
@@ -449,12 +455,10 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
  * synchronize_srcu - wait for prior SRCU read-side critical-section completion
  * @sp: srcu_struct with which to synchronize.
  *
- * Wait for the count to drain to zero of both indexes. To avoid the
- * possible starvation of synchronize_srcu(), it waits for the count of
- * the index=((->completed & 1) ^ 1) to drain to zero at first,
- * and then flip the completed and wait for the count of the other index.
- *
- * Can block; must be called from process context.
+ * Flip the completed counter, and wait for the old count to drain to zero.
+ * As with classic RCU, the updater must use some separate means of
+ * synchronizing concurrent updates.  Can block; must be called from
+ * process context.
  *
  * Note that it is illegal to call synchronize_srcu() from the corresponding
  * SRCU read-side critical section; doing so will result in deadlock.
@@ -476,11 +480,12 @@ EXPORT_SYMBOL_GPL(synchronize_srcu);
  * Wait for an SRCU grace period to elapse, but be more aggressive about
  * spinning rather than blocking when waiting.
  *
- * Note that it is also illegal to call synchronize_srcu_expedited()
- * from the corresponding SRCU read-side critical section;
- * doing so will result in deadlock.  However, it is perfectly legal
- * to call synchronize_srcu_expedited() on one srcu_struct from some
- * other srcu_struct's read-side critical section, as long as
+ * Note that it is illegal to call this function while holding any lock
+ * that is acquired by a CPU-hotplug notifier.  It is also illegal to call
+ * synchronize_srcu_expedited() from the corresponding SRCU read-side
+ * critical section; doing so will result in deadlock.  However, it is
+ * perfectly legal to call synchronize_srcu_expedited() on one srcu_struct
+ * from some other srcu_struct's read-side critical section, as long as
  * the resulting graph of srcu_structs is acyclic.
  */
 void synchronize_srcu_expedited(struct srcu_struct *sp)
diff --git a/trunk/kernel/stop_machine.c b/trunk/kernel/stop_machine.c
index 95d178c62d5a..2f194e965715 100644
--- a/trunk/kernel/stop_machine.c
+++ b/trunk/kernel/stop_machine.c
@@ -18,7 +18,7 @@
 #include <linux/stop_machine.h>
 #include <linux/interrupt.h>
 #include <linux/kallsyms.h>
-#include <linux/smpboot.h>
+
 #include <linux/atomic.h>
 
 /*
@@ -37,10 +37,10 @@ struct cpu_stopper {
 	spinlock_t		lock;
 	bool			enabled;	/* is this stopper enabled? */
 	struct list_head	works;		/* list of pending works */
+	struct task_struct	*thread;	/* stopper thread */
 };
 
 static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
-static DEFINE_PER_CPU(struct task_struct *, cpu_stopper_task);
 static bool stop_machine_initialized = false;
 
 static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
@@ -62,18 +62,16 @@ static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed)
 }
 
 /* queue @work to @stopper.  if offline, @work is completed immediately */
-static void cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
+static void cpu_stop_queue_work(struct cpu_stopper *stopper,
+				struct cpu_stop_work *work)
 {
-	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
-	struct task_struct *p = per_cpu(cpu_stopper_task, cpu);
-
 	unsigned long flags;
 
 	spin_lock_irqsave(&stopper->lock, flags);
 
 	if (stopper->enabled) {
 		list_add_tail(&work->list, &stopper->works);
-		wake_up_process(p);
+		wake_up_process(stopper->thread);
 	} else
 		cpu_stop_signal_done(work->done, false);
 
@@ -110,7 +108,7 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
 	struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
 
 	cpu_stop_init_done(&done, 1);
-	cpu_stop_queue_work(cpu, &work);
+	cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), &work);
 	wait_for_completion(&done.completion);
 	return done.executed ? done.ret : -ENOENT;
 }
@@ -132,7 +130,7 @@ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
 			struct cpu_stop_work *work_buf)
 {
 	*work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
-	cpu_stop_queue_work(cpu, work_buf);
+	cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), work_buf);
 }
 
 /* static data for stop_cpus */
@@ -161,7 +159,8 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask,
 	 */
 	preempt_disable();
 	for_each_cpu(cpu, cpumask)
-		cpu_stop_queue_work(cpu, &per_cpu(stop_cpus_work, cpu));
+		cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu),
+				    &per_cpu(stop_cpus_work, cpu));
 	preempt_enable();
 }
 
@@ -245,25 +244,20 @@ int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
 	return ret;
 }
 
-static int cpu_stop_should_run(unsigned int cpu)
-{
-	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
-	unsigned long flags;
-	int run;
-
-	spin_lock_irqsave(&stopper->lock, flags);
-	run = !list_empty(&stopper->works);
-	spin_unlock_irqrestore(&stopper->lock, flags);
-	return run;
-}
-
-static void cpu_stopper_thread(unsigned int cpu)
+static int cpu_stopper_thread(void *data)
 {
-	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
+	struct cpu_stopper *stopper = data;
 	struct cpu_stop_work *work;
 	int ret;
 
 repeat:
+	set_current_state(TASK_INTERRUPTIBLE);	/* mb paired w/ kthread_stop */
+
+	if (kthread_should_stop()) {
+		__set_current_state(TASK_RUNNING);
+		return 0;
+	}
+
 	work = NULL;
 	spin_lock_irq(&stopper->lock);
 	if (!list_empty(&stopper->works)) {
@@ -279,6 +273,8 @@ static void cpu_stopper_thread(unsigned int cpu)
 		struct cpu_stop_done *done = work->done;
 		char ksym_buf[KSYM_NAME_LEN] __maybe_unused;
 
+		__set_current_state(TASK_RUNNING);
+
 		/* cpu stop callbacks are not allowed to sleep */
 		preempt_disable();
 
@@ -294,55 +290,88 @@ static void cpu_stopper_thread(unsigned int cpu)
 					  ksym_buf), arg);
 
 		cpu_stop_signal_done(done, true);
-		goto repeat;
-	}
-}
-
-extern void sched_set_stop_task(int cpu, struct task_struct *stop);
+	} else
+		schedule();
 
-static void cpu_stop_create(unsigned int cpu)
-{
-	sched_set_stop_task(cpu, per_cpu(cpu_stopper_task, cpu));
+	goto repeat;
 }
 
-static void cpu_stop_park(unsigned int cpu)
-{
-	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
-	struct cpu_stop_work *work;
-	unsigned long flags;
-
-	/* drain remaining works */
-	spin_lock_irqsave(&stopper->lock, flags);
-	list_for_each_entry(work, &stopper->works, list)
-		cpu_stop_signal_done(work->done, false);
-	stopper->enabled = false;
-	spin_unlock_irqrestore(&stopper->lock, flags);
-}
+extern void sched_set_stop_task(int cpu, struct task_struct *stop);
 
-static void cpu_stop_unpark(unsigned int cpu)
+/* manage stopper for a cpu, mostly lifted from sched migration thread mgmt */
+static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
+					   unsigned long action, void *hcpu)
 {
+	unsigned int cpu = (unsigned long)hcpu;
 	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
+	struct task_struct *p;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_UP_PREPARE:
+		BUG_ON(stopper->thread || stopper->enabled ||
+		       !list_empty(&stopper->works));
+		p = kthread_create_on_node(cpu_stopper_thread,
+					   stopper,
+					   cpu_to_node(cpu),
+					   "migration/%d", cpu);
+		if (IS_ERR(p))
+			return notifier_from_errno(PTR_ERR(p));
+		get_task_struct(p);
+		kthread_bind(p, cpu);
+		sched_set_stop_task(cpu, p);
+		stopper->thread = p;
+		break;
+
+	case CPU_ONLINE:
+		/* strictly unnecessary, as first user will wake it */
+		wake_up_process(stopper->thread);
+		/* mark enabled */
+		spin_lock_irq(&stopper->lock);
+		stopper->enabled = true;
+		spin_unlock_irq(&stopper->lock);
+		break;
+
+#ifdef CONFIG_HOTPLUG_CPU
+	case CPU_UP_CANCELED:
+	case CPU_POST_DEAD:
+	{
+		struct cpu_stop_work *work;
+
+		sched_set_stop_task(cpu, NULL);
+		/* kill the stopper */
+		kthread_stop(stopper->thread);
+		/* drain remaining works */
+		spin_lock_irq(&stopper->lock);
+		list_for_each_entry(work, &stopper->works, list)
+			cpu_stop_signal_done(work->done, false);
+		stopper->enabled = false;
+		spin_unlock_irq(&stopper->lock);
+		/* release the stopper */
+		put_task_struct(stopper->thread);
+		stopper->thread = NULL;
+		break;
+	}
+#endif
+	}
 
-	spin_lock_irq(&stopper->lock);
-	stopper->enabled = true;
-	spin_unlock_irq(&stopper->lock);
+	return NOTIFY_OK;
 }
 
-static struct smp_hotplug_thread cpu_stop_threads = {
-	.store			= &cpu_stopper_task,
-	.thread_should_run	= cpu_stop_should_run,
-	.thread_fn		= cpu_stopper_thread,
-	.thread_comm		= "migration/%u",
-	.create			= cpu_stop_create,
-	.setup			= cpu_stop_unpark,
-	.park			= cpu_stop_park,
-	.unpark			= cpu_stop_unpark,
-	.selfparking		= true,
+/*
+ * Give it a higher priority so that cpu stopper is available to other
+ * cpu notifiers.  It currently shares the same priority as sched
+ * migration_notifier.
+ */
+static struct notifier_block __cpuinitdata cpu_stop_cpu_notifier = {
+	.notifier_call	= cpu_stop_cpu_callback,
+	.priority	= 10,
 };
 
 static int __init cpu_stop_init(void)
 {
+	void *bcpu = (void *)(long)smp_processor_id();
 	unsigned int cpu;
+	int err;
 
 	for_each_possible_cpu(cpu) {
 		struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
@@ -351,8 +380,15 @@ static int __init cpu_stop_init(void)
 		INIT_LIST_HEAD(&stopper->works);
 	}
 
-	BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
+	/* start one for the boot cpu */
+	err = cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_UP_PREPARE,
+				    bcpu);
+	BUG_ON(err != NOTIFY_OK);
+	cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_ONLINE, bcpu);
+	register_cpu_notifier(&cpu_stop_cpu_notifier);
+
 	stop_machine_initialized = true;
+
 	return 0;
 }
 early_initcall(cpu_stop_init);
diff --git a/trunk/kernel/sysctl.c b/trunk/kernel/sysctl.c
index 4fc9be955c71..c88878db491e 100644
--- a/trunk/kernel/sysctl.c
+++ b/trunk/kernel/sysctl.c
@@ -61,7 +61,6 @@
 #include <linux/kmod.h>
 #include <linux/capability.h>
 #include <linux/binfmts.h>
-#include <linux/sched/sysctl.h>
 
 #include <asm/uaccess.h>
 #include <asm/processor.h>
@@ -404,13 +403,6 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= sched_rt_handler,
 	},
-	{
-		.procname	= "sched_rr_timeslice_ms",
-		.data		= &sched_rr_timeslice,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= sched_rr_handler,
-	},
 #ifdef CONFIG_SCHED_AUTOGROUP
 	{
 		.procname	= "sched_autogroup_enabled",
diff --git a/trunk/kernel/time.c b/trunk/kernel/time.c
index c2a27dd93142..d226c6a3fd28 100644
--- a/trunk/kernel/time.c
+++ b/trunk/kernel/time.c
@@ -114,12 +114,6 @@ SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv,
 	return 0;
 }
 
-/*
- * Indicates if there is an offset between the system clock and the hardware
- * clock/persistent clock/rtc.
- */
-int persistent_clock_is_local;
-
 /*
  * Adjust the time obtained from the CMOS to be UTC time instead of
  * local time.
@@ -141,8 +135,6 @@ static inline void warp_clock(void)
 	struct timespec adjust;
 
 	adjust = current_kernel_time();
-	if (sys_tz.tz_minuteswest != 0)
-		persistent_clock_is_local = 1;
 	adjust.tv_sec += sys_tz.tz_minuteswest * 60;
 	do_settimeofday(&adjust);
 }
diff --git a/trunk/kernel/time/Kconfig b/trunk/kernel/time/Kconfig
index 24510d84efd7..8601f0db1261 100644
--- a/trunk/kernel/time/Kconfig
+++ b/trunk/kernel/time/Kconfig
@@ -12,11 +12,6 @@ config CLOCKSOURCE_WATCHDOG
 config ARCH_CLOCKSOURCE_DATA
 	bool
 
-# Platforms has a persistent clock
-config ALWAYS_USE_PERSISTENT_CLOCK
-	bool
-	default n
-
 # Timekeeping vsyscall support
 config GENERIC_TIME_VSYSCALL
 	bool
@@ -43,10 +38,6 @@ config GENERIC_CLOCKEVENTS_BUILD
 	default y
 	depends on GENERIC_CLOCKEVENTS
 
-# Architecture can handle broadcast in a driver-agnostic way
-config ARCH_HAS_TICK_BROADCAST
-	bool
-
 # Clockevents broadcasting infrastructure
 config GENERIC_CLOCKEVENTS_BROADCAST
 	bool
diff --git a/trunk/kernel/time/ntp.c b/trunk/kernel/time/ntp.c
index b10a42bb0165..24174b4d669b 100644
--- a/trunk/kernel/time/ntp.c
+++ b/trunk/kernel/time/ntp.c
@@ -15,7 +15,6 @@
 #include <linux/time.h>
 #include <linux/mm.h>
 #include <linux/module.h>
-#include <linux/rtc.h>
 
 #include "tick-internal.h"
 
@@ -484,7 +483,8 @@ int second_overflow(unsigned long secs)
 	return leap;
 }
 
-#if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC)
+#ifdef CONFIG_GENERIC_CMOS_UPDATE
+
 static void sync_cmos_clock(struct work_struct *work);
 
 static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock);
@@ -510,26 +510,14 @@ static void sync_cmos_clock(struct work_struct *work)
 	}
 
 	getnstimeofday(&now);
-	if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2) {
-		struct timespec adjust = now;
-
-		fail = -ENODEV;
-		if (persistent_clock_is_local)
-			adjust.tv_sec -= (sys_tz.tz_minuteswest * 60);
-#ifdef CONFIG_GENERIC_CMOS_UPDATE
-		fail = update_persistent_clock(adjust);
-#endif
-#ifdef CONFIG_RTC_SYSTOHC
-		if (fail == -ENODEV)
-			fail = rtc_set_ntp_time(adjust);
-#endif
-	}
+	if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2)
+		fail = update_persistent_clock(now);
 
 	next.tv_nsec = (NSEC_PER_SEC / 2) - now.tv_nsec - (TICK_NSEC / 2);
 	if (next.tv_nsec <= 0)
 		next.tv_nsec += NSEC_PER_SEC;
 
-	if (!fail || fail == -ENODEV)
+	if (!fail)
 		next.tv_sec = 659;
 	else
 		next.tv_sec = 0;
diff --git a/trunk/kernel/time/tick-broadcast.c b/trunk/kernel/time/tick-broadcast.c
index 2fb8cb88df8d..f113755695e2 100644
--- a/trunk/kernel/time/tick-broadcast.c
+++ b/trunk/kernel/time/tick-broadcast.c
@@ -18,7 +18,6 @@
 #include <linux/percpu.h>
 #include <linux/profile.h>
 #include <linux/sched.h>
-#include <linux/smp.h>
 
 #include "tick-internal.h"
 
@@ -87,22 +86,6 @@ int tick_is_broadcast_device(struct clock_event_device *dev)
 	return (dev && tick_broadcast_device.evtdev == dev);
 }
 
-static void err_broadcast(const struct cpumask *mask)
-{
-	pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
-}
-
-static void tick_device_setup_broadcast_func(struct clock_event_device *dev)
-{
-	if (!dev->broadcast)
-		dev->broadcast = tick_broadcast;
-	if (!dev->broadcast) {
-		pr_warn_once("%s depends on broadcast, but no broadcast function available\n",
-			     dev->name);
-		dev->broadcast = err_broadcast;
-	}
-}
-
 /*
  * Check, if the device is disfunctional and a place holder, which
  * needs to be handled by the broadcast device.
@@ -122,7 +105,6 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
 	 */
 	if (!tick_device_is_functional(dev)) {
 		dev->event_handler = tick_handle_periodic;
-		tick_device_setup_broadcast_func(dev);
 		cpumask_set_cpu(cpu, tick_get_broadcast_mask());
 		tick_broadcast_start_periodic(tick_broadcast_device.evtdev);
 		ret = 1;
@@ -134,33 +116,15 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
 		 */
 		if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) {
 			int cpu = smp_processor_id();
+
 			cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
 			tick_broadcast_clear_oneshot(cpu);
-		} else {
-			tick_device_setup_broadcast_func(dev);
 		}
 	}
 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 	return ret;
 }
 
-#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
-int tick_receive_broadcast(void)
-{
-	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
-	struct clock_event_device *evt = td->evtdev;
-
-	if (!evt)
-		return -ENODEV;
-
-	if (!evt->event_handler)
-		return -EINVAL;
-
-	evt->event_handler(evt);
-	return 0;
-}
-#endif
-
 /*
  * Broadcast the event to the cpus, which are set in the mask (mangled).
  */
diff --git a/trunk/kernel/time/tick-sched.c b/trunk/kernel/time/tick-sched.c
index 314b9ee07edf..d58e552d9fd1 100644
--- a/trunk/kernel/time/tick-sched.c
+++ b/trunk/kernel/time/tick-sched.c
@@ -20,7 +20,6 @@
 #include <linux/profile.h>
 #include <linux/sched.h>
 #include <linux/module.h>
-#include <linux/irq_work.h>
 
 #include <asm/irq_regs.h>
 
@@ -29,7 +28,7 @@
 /*
  * Per cpu nohz control structure
  */
-DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
+static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
 
 /*
  * The time, when the last jiffy update happened. Protected by jiffies_lock.
@@ -332,8 +331,8 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 		time_delta = timekeeping_max_deferment();
 	} while (read_seqretry(&jiffies_lock, seq));
 
-	if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) ||
-	    arch_needs_cpu(cpu) || irq_work_needs_cpu()) {
+	if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) ||
+	    arch_needs_cpu(cpu)) {
 		next_jiffies = last_jiffies + 1;
 		delta_jiffies = 1;
 	} else {
@@ -632,11 +631,8 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
 
 static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
 {
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
 	unsigned long ticks;
-
-	if (vtime_accounting_enabled())
-		return;
 	/*
 	 * We stopped the tick in idle. Update process times would miss the
 	 * time we slept as update_process_times does only a 1 tick
diff --git a/trunk/kernel/time/timekeeping.c b/trunk/kernel/time/timekeeping.c
index 1e35515a875e..cbc6acb0db3f 100644
--- a/trunk/kernel/time/timekeeping.c
+++ b/trunk/kernel/time/timekeeping.c
@@ -29,9 +29,6 @@ static struct timekeeper timekeeper;
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
 
-/* Flag for if there is a persistent clock on this platform */
-bool __read_mostly persistent_clock_exist = false;
-
 static inline void tk_normalize_xtime(struct timekeeper *tk)
 {
 	while (tk->xtime_nsec >= ((u64)NSEC_PER_SEC << tk->shift)) {
@@ -267,18 +264,19 @@ static void timekeeping_forward_now(struct timekeeper *tk)
 }
 
 /**
- * __getnstimeofday - Returns the time of day in a timespec.
+ * getnstimeofday - Returns the time of day in a timespec
  * @ts:		pointer to the timespec to be set
  *
- * Updates the time of day in the timespec.
- * Returns 0 on success, or -ve when suspended (timespec will be undefined).
+ * Returns the time of day in a timespec.
  */
-int __getnstimeofday(struct timespec *ts)
+void getnstimeofday(struct timespec *ts)
 {
 	struct timekeeper *tk = &timekeeper;
 	unsigned long seq;
 	s64 nsecs = 0;
 
+	WARN_ON(timekeeping_suspended);
+
 	do {
 		seq = read_seqbegin(&tk->lock);
 
@@ -289,26 +287,6 @@ int __getnstimeofday(struct timespec *ts)
 
 	ts->tv_nsec = 0;
 	timespec_add_ns(ts, nsecs);
-
-	/*
-	 * Do not bail out early, in case there were callers still using
-	 * the value, even in the face of the WARN_ON.
-	 */
-	if (unlikely(timekeeping_suspended))
-		return -EAGAIN;
-	return 0;
-}
-EXPORT_SYMBOL(__getnstimeofday);
-
-/**
- * getnstimeofday - Returns the time of day in a timespec.
- * @ts:		pointer to the timespec to be set
- *
- * Returns the time of day in a timespec (WARN if suspended).
- */
-void getnstimeofday(struct timespec *ts)
-{
-	WARN_ON(__getnstimeofday(ts));
 }
 EXPORT_SYMBOL(getnstimeofday);
 
@@ -662,14 +640,12 @@ void __init timekeeping_init(void)
 	struct timespec now, boot, tmp;
 
 	read_persistent_clock(&now);
-
 	if (!timespec_valid_strict(&now)) {
 		pr_warn("WARNING: Persistent clock returned invalid value!\n"
 			"         Check your CMOS/BIOS settings.\n");
 		now.tv_sec = 0;
 		now.tv_nsec = 0;
-	} else if (now.tv_sec || now.tv_nsec)
-		persistent_clock_exist = true;
+	}
 
 	read_boot_clock(&boot);
 	if (!timespec_valid_strict(&boot)) {
@@ -742,12 +718,11 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
 {
 	struct timekeeper *tk = &timekeeper;
 	unsigned long flags;
+	struct timespec ts;
 
-	/*
-	 * Make sure we don't set the clock twice, as timekeeping_resume()
-	 * already did it
-	 */
-	if (has_persistent_clock())
+	/* Make sure we don't set the clock twice */
+	read_persistent_clock(&ts);
+	if (!(ts.tv_sec == 0 && ts.tv_nsec == 0))
 		return;
 
 	write_seqlock_irqsave(&tk->lock, flags);
diff --git a/trunk/kernel/timeconst.pl b/trunk/kernel/timeconst.pl
index 3f42652a6a37..eb51d76e058a 100644
--- a/trunk/kernel/timeconst.pl
+++ b/trunk/kernel/timeconst.pl
@@ -369,8 +369,10 @@ (@)
 		die "Usage: $0 HZ\n";
 	}
 
-	$cv = $canned_values{$hz};
-	@val = defined($cv) ? @$cv : compute_values($hz);
+	@val = @{$canned_values{$hz}};
+	if (!defined(@val)) {
+		@val = compute_values($hz);
+	}
 	output($hz, @val);
 }
 exit 0;
diff --git a/trunk/kernel/timer.c b/trunk/kernel/timer.c
index dbf7a78a1ef1..367d00858482 100644
--- a/trunk/kernel/timer.c
+++ b/trunk/kernel/timer.c
@@ -39,7 +39,6 @@
 #include <linux/kallsyms.h>
 #include <linux/irq_work.h>
 #include <linux/sched.h>
-#include <linux/sched/sysctl.h>
 #include <linux/slab.h>
 
 #include <asm/uaccess.h>
@@ -1352,6 +1351,7 @@ void update_process_times(int user_tick)
 	account_process_tick(p, user_tick);
 	run_local_timers();
 	rcu_check_callbacks(cpu, user_tick);
+	printk_tick();
 #ifdef CONFIG_IRQ_WORK
 	if (in_irq())
 		irq_work_run();
diff --git a/trunk/kernel/trace/Kconfig b/trunk/kernel/trace/Kconfig
index 36567564e221..5d89335a485f 100644
--- a/trunk/kernel/trace/Kconfig
+++ b/trunk/kernel/trace/Kconfig
@@ -39,9 +39,6 @@ config HAVE_DYNAMIC_FTRACE
 	help
 	  See Documentation/trace/ftrace-design.txt
 
-config HAVE_DYNAMIC_FTRACE_WITH_REGS
-	bool
-
 config HAVE_FTRACE_MCOUNT_RECORD
 	bool
 	help
@@ -253,16 +250,6 @@ config FTRACE_SYSCALLS
 	help
 	  Basic tracer to catch the syscall entry and exit events.
 
-config TRACER_SNAPSHOT
-	bool "Create a snapshot trace buffer"
-	select TRACER_MAX_TRACE
-	help
-	  Allow tracing users to take snapshot of the current buffer using the
-	  ftrace interface, e.g.:
-
-	      echo 1 > /sys/kernel/debug/tracing/snapshot
-	      cat snapshot
-
 config TRACE_BRANCH_PROFILING
 	bool
 	select GENERIC_TRACER
@@ -447,11 +434,6 @@ config DYNAMIC_FTRACE
 	  were made. If so, it runs stop_machine (stops all CPUS)
 	  and modifies the code to jump over the call to ftrace.
 
-config DYNAMIC_FTRACE_WITH_REGS
-	def_bool y
-	depends on DYNAMIC_FTRACE
-	depends on HAVE_DYNAMIC_FTRACE_WITH_REGS
-
 config FUNCTION_PROFILER
 	bool "Kernel function profiler"
 	depends on FUNCTION_TRACER
diff --git a/trunk/kernel/trace/blktrace.c b/trunk/kernel/trace/blktrace.c
index 71259e2b6b61..c0bd0308741c 100644
--- a/trunk/kernel/trace/blktrace.c
+++ b/trunk/kernel/trace/blktrace.c
@@ -147,7 +147,7 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
 		return;
 
 	local_irq_save(flags);
-	buf = this_cpu_ptr(bt->msg_data);
+	buf = per_cpu_ptr(bt->msg_data, smp_processor_id());
 	va_start(args, fmt);
 	n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
 	va_end(args);
diff --git a/trunk/kernel/trace/ftrace.c b/trunk/kernel/trace/ftrace.c
index ce8c3d68292f..41473b4ad7a4 100644
--- a/trunk/kernel/trace/ftrace.c
+++ b/trunk/kernel/trace/ftrace.c
@@ -111,26 +111,6 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
 #define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
 #endif
 
-/*
- * Traverse the ftrace_global_list, invoking all entries.  The reason that we
- * can use rcu_dereference_raw() is that elements removed from this list
- * are simply leaked, so there is no need to interact with a grace-period
- * mechanism.  The rcu_dereference_raw() calls are needed to handle
- * concurrent insertions into the ftrace_global_list.
- *
- * Silly Alpha and silly pointer-speculation compiler optimizations!
- */
-#define do_for_each_ftrace_op(op, list)			\
-	op = rcu_dereference_raw(list);			\
-	do
-
-/*
- * Optimized for just a single item in the list (as that is the normal case).
- */
-#define while_for_each_ftrace_op(op)				\
-	while (likely(op = rcu_dereference_raw((op)->next)) &&	\
-	       unlikely((op) != &ftrace_list_end))
-
 /**
  * ftrace_nr_registered_ops - return number of ops registered
  *
@@ -152,21 +132,29 @@ int ftrace_nr_registered_ops(void)
 	return cnt;
 }
 
+/*
+ * Traverse the ftrace_global_list, invoking all entries.  The reason that we
+ * can use rcu_dereference_raw() is that elements removed from this list
+ * are simply leaked, so there is no need to interact with a grace-period
+ * mechanism.  The rcu_dereference_raw() calls are needed to handle
+ * concurrent insertions into the ftrace_global_list.
+ *
+ * Silly Alpha and silly pointer-speculation compiler optimizations!
+ */
 static void
 ftrace_global_list_func(unsigned long ip, unsigned long parent_ip,
 			struct ftrace_ops *op, struct pt_regs *regs)
 {
-	int bit;
-
-	bit = trace_test_and_set_recursion(TRACE_GLOBAL_START, TRACE_GLOBAL_MAX);
-	if (bit < 0)
+	if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT)))
 		return;
 
-	do_for_each_ftrace_op(op, ftrace_global_list) {
+	trace_recursion_set(TRACE_GLOBAL_BIT);
+	op = rcu_dereference_raw(ftrace_global_list); /*see above*/
+	while (op != &ftrace_list_end) {
 		op->func(ip, parent_ip, op, regs);
-	} while_for_each_ftrace_op(op);
-
-	trace_clear_recursion(bit);
+		op = rcu_dereference_raw(op->next); /*see above*/
+	};
+	trace_recursion_clear(TRACE_GLOBAL_BIT);
 }
 
 static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
@@ -233,24 +221,10 @@ static void update_global_ops(void)
 	 * registered callers.
 	 */
 	if (ftrace_global_list == &ftrace_list_end ||
-	    ftrace_global_list->next == &ftrace_list_end) {
+	    ftrace_global_list->next == &ftrace_list_end)
 		func = ftrace_global_list->func;
-		/*
-		 * As we are calling the function directly.
-		 * If it does not have recursion protection,
-		 * the function_trace_op needs to be updated
-		 * accordingly.
-		 */
-		if (ftrace_global_list->flags & FTRACE_OPS_FL_RECURSION_SAFE)
-			global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE;
-		else
-			global_ops.flags &= ~FTRACE_OPS_FL_RECURSION_SAFE;
-	} else {
+	else
 		func = ftrace_global_list_func;
-		/* The list has its own recursion protection. */
-		global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE;
-	}
-
 
 	/* If we filter on pids, update to use the pid function */
 	if (!list_empty(&ftrace_pids)) {
@@ -363,7 +337,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
 	if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK)
 		return -EINVAL;
 
-#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#ifndef ARCH_SUPPORTS_FTRACE_SAVE_REGS
 	/*
 	 * If the ftrace_ops specifies SAVE_REGS, then it only can be used
 	 * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set.
@@ -4116,11 +4090,14 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
 	 */
 	preempt_disable_notrace();
 	trace_recursion_set(TRACE_CONTROL_BIT);
-	do_for_each_ftrace_op(op, ftrace_control_list) {
+	op = rcu_dereference_raw(ftrace_control_list);
+	while (op != &ftrace_list_end) {
 		if (!ftrace_function_local_disabled(op) &&
 		    ftrace_ops_test(op, ip))
 			op->func(ip, parent_ip, op, regs);
-	} while_for_each_ftrace_op(op);
+
+		op = rcu_dereference_raw(op->next);
+	};
 	trace_recursion_clear(TRACE_CONTROL_BIT);
 	preempt_enable_notrace();
 }
@@ -4135,26 +4112,27 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
 		       struct ftrace_ops *ignored, struct pt_regs *regs)
 {
 	struct ftrace_ops *op;
-	int bit;
 
 	if (function_trace_stop)
 		return;
 
-	bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
-	if (bit < 0)
+	if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT)))
 		return;
 
+	trace_recursion_set(TRACE_INTERNAL_BIT);
 	/*
 	 * Some of the ops may be dynamically allocated,
 	 * they must be freed after a synchronize_sched().
 	 */
 	preempt_disable_notrace();
-	do_for_each_ftrace_op(op, ftrace_ops_list) {
+	op = rcu_dereference_raw(ftrace_ops_list);
+	while (op != &ftrace_list_end) {
 		if (ftrace_ops_test(op, ip))
 			op->func(ip, parent_ip, op, regs);
-	} while_for_each_ftrace_op(op);
+		op = rcu_dereference_raw(op->next);
+	};
 	preempt_enable_notrace();
-	trace_clear_recursion(bit);
+	trace_recursion_clear(TRACE_INTERNAL_BIT);
 }
 
 /*
@@ -4165,8 +4143,8 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
  * Archs are to support both the regs and ftrace_ops at the same time.
  * If they support ftrace_ops, it is assumed they support regs.
  * If call backs want to use regs, they must either check for regs
- * being NULL, or CONFIG_DYNAMIC_FTRACE_WITH_REGS.
- * Note, CONFIG_DYNAMIC_FTRACE_WITH_REGS expects a full regs to be saved.
+ * being NULL, or ARCH_SUPPORTS_FTRACE_SAVE_REGS.
+ * Note, ARCH_SUPPORT_SAVE_REGS expects a full regs to be saved.
  * An architecture can pass partial regs with ftrace_ops and still
  * set the ARCH_SUPPORT_FTARCE_OPS.
  */
diff --git a/trunk/kernel/trace/ring_buffer.c b/trunk/kernel/trace/ring_buffer.c
index 7244acde77b0..ce8514feedcd 100644
--- a/trunk/kernel/trace/ring_buffer.c
+++ b/trunk/kernel/trace/ring_buffer.c
@@ -3,10 +3,8 @@
  *
  * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
  */
-#include <linux/ftrace_event.h>
 #include <linux/ring_buffer.h>
 #include <linux/trace_clock.h>
-#include <linux/trace_seq.h>
 #include <linux/spinlock.h>
 #include <linux/debugfs.h>
 #include <linux/uaccess.h>
@@ -23,6 +21,7 @@
 #include <linux/fs.h>
 
 #include <asm/local.h>
+#include "trace.h"
 
 static void update_pages_handler(struct work_struct *work);
 
@@ -2433,76 +2432,41 @@ rb_reserve_next_event(struct ring_buffer *buffer,
 
 #ifdef CONFIG_TRACING
 
-/*
- * The lock and unlock are done within a preempt disable section.
- * The current_context per_cpu variable can only be modified
- * by the current task between lock and unlock. But it can
- * be modified more than once via an interrupt. To pass this
- * information from the lock to the unlock without having to
- * access the 'in_interrupt()' functions again (which do show
- * a bit of overhead in something as critical as function tracing,
- * we use a bitmask trick.
- *
- *  bit 0 =  NMI context
- *  bit 1 =  IRQ context
- *  bit 2 =  SoftIRQ context
- *  bit 3 =  normal context.
- *
- * This works because this is the order of contexts that can
- * preempt other contexts. A SoftIRQ never preempts an IRQ
- * context.
- *
- * When the context is determined, the corresponding bit is
- * checked and set (if it was set, then a recursion of that context
- * happened).
- *
- * On unlock, we need to clear this bit. To do so, just subtract
- * 1 from the current_context and AND it to itself.
- *
- * (binary)
- *  101 - 1 = 100
- *  101 & 100 = 100 (clearing bit zero)
- *
- *  1010 - 1 = 1001
- *  1010 & 1001 = 1000 (clearing bit 1)
- *
- * The least significant bit can be cleared this way, and it
- * just so happens that it is the same bit corresponding to
- * the current context.
- */
-static DEFINE_PER_CPU(unsigned int, current_context);
+#define TRACE_RECURSIVE_DEPTH 16
 
-static __always_inline int trace_recursive_lock(void)
+/* Keep this code out of the fast path cache */
+static noinline void trace_recursive_fail(void)
 {
-	unsigned int val = this_cpu_read(current_context);
-	int bit;
+	/* Disable all tracing before we do anything else */
+	tracing_off_permanent();
 
-	if (in_interrupt()) {
-		if (in_nmi())
-			bit = 0;
-		else if (in_irq())
-			bit = 1;
-		else
-			bit = 2;
-	} else
-		bit = 3;
+	printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:"
+		    "HC[%lu]:SC[%lu]:NMI[%lu]\n",
+		    trace_recursion_buffer(),
+		    hardirq_count() >> HARDIRQ_SHIFT,
+		    softirq_count() >> SOFTIRQ_SHIFT,
+		    in_nmi());
 
-	if (unlikely(val & (1 << bit)))
-		return 1;
+	WARN_ON_ONCE(1);
+}
 
-	val |= (1 << bit);
-	this_cpu_write(current_context, val);
+static inline int trace_recursive_lock(void)
+{
+	trace_recursion_inc();
 
-	return 0;
+	if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH))
+		return 0;
+
+	trace_recursive_fail();
+
+	return -1;
 }
 
-static __always_inline void trace_recursive_unlock(void)
+static inline void trace_recursive_unlock(void)
 {
-	unsigned int val = this_cpu_read(current_context);
+	WARN_ON_ONCE(!trace_recursion_buffer());
 
-	val--;
-	val &= this_cpu_read(current_context);
-	this_cpu_write(current_context, val);
+	trace_recursion_dec();
 }
 
 #else
@@ -3102,24 +3066,6 @@ ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu)
 }
 EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu);
 
-/**
- * ring_buffer_read_events_cpu - get the number of events successfully read
- * @buffer: The ring buffer
- * @cpu: The per CPU buffer to get the number of events read
- */
-unsigned long
-ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu)
-{
-	struct ring_buffer_per_cpu *cpu_buffer;
-
-	if (!cpumask_test_cpu(cpu, buffer->cpumask))
-		return 0;
-
-	cpu_buffer = buffer->buffers[cpu];
-	return cpu_buffer->read;
-}
-EXPORT_SYMBOL_GPL(ring_buffer_read_events_cpu);
-
 /**
  * ring_buffer_entries - get the number of entries in a buffer
  * @buffer: The ring buffer
@@ -3479,7 +3425,7 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
 	/* check for end of page padding */
 	if ((iter->head >= rb_page_size(iter->head_page)) &&
 	    (iter->head_page != cpu_buffer->commit_page))
-		rb_inc_iter(iter);
+		rb_advance_iter(iter);
 }
 
 static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
diff --git a/trunk/kernel/trace/trace.c b/trunk/kernel/trace/trace.c
index c2e2c2310374..3c13e46d7d24 100644
--- a/trunk/kernel/trace/trace.c
+++ b/trunk/kernel/trace/trace.c
@@ -39,7 +39,6 @@
 #include <linux/poll.h>
 #include <linux/nmi.h>
 #include <linux/fs.h>
-#include <linux/sched/rt.h>
 
 #include "trace.h"
 #include "trace_output.h"
@@ -250,7 +249,7 @@ static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
 static struct tracer		*trace_types __read_mostly;
 
 /* current_trace points to the tracer that is currently active */
-static struct tracer		*current_trace __read_mostly = &nop_trace;
+static struct tracer		*current_trace __read_mostly;
 
 /*
  * trace_types_lock is used to protect the trace_types list.
@@ -710,13 +709,10 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 		return;
 
 	WARN_ON_ONCE(!irqs_disabled());
-
-	if (!current_trace->allocated_snapshot) {
-		/* Only the nop tracer should hit this when disabling */
-		WARN_ON_ONCE(current_trace != &nop_trace);
+	if (!current_trace->use_max_tr) {
+		WARN_ON_ONCE(1);
 		return;
 	}
-
 	arch_spin_lock(&ftrace_max_lock);
 
 	tr->buffer = max_tr.buffer;
@@ -743,8 +739,10 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 		return;
 
 	WARN_ON_ONCE(!irqs_disabled());
-	if (WARN_ON_ONCE(!current_trace->allocated_snapshot))
+	if (!current_trace->use_max_tr) {
+		WARN_ON_ONCE(1);
 		return;
+	}
 
 	arch_spin_lock(&ftrace_max_lock);
 
@@ -864,13 +862,10 @@ int register_tracer(struct tracer *type)
 
 		current_trace = type;
 
-		if (type->use_max_tr) {
-			/* If we expanded the buffers, make sure the max is expanded too */
-			if (ring_buffer_expanded)
-				ring_buffer_resize(max_tr.buffer, trace_buf_size,
-						   RING_BUFFER_ALL_CPUS);
-			type->allocated_snapshot = true;
-		}
+		/* If we expanded the buffers, make sure the max is expanded too */
+		if (ring_buffer_expanded && type->use_max_tr)
+			ring_buffer_resize(max_tr.buffer, trace_buf_size,
+						RING_BUFFER_ALL_CPUS);
 
 		/* the test is responsible for initializing and enabling */
 		pr_info("Testing tracer %s: ", type->name);
@@ -886,14 +881,10 @@ int register_tracer(struct tracer *type)
 		/* Only reset on passing, to avoid touching corrupted buffers */
 		tracing_reset_online_cpus(tr);
 
-		if (type->use_max_tr) {
-			type->allocated_snapshot = false;
-
-			/* Shrink the max buffer again */
-			if (ring_buffer_expanded)
-				ring_buffer_resize(max_tr.buffer, 1,
-						   RING_BUFFER_ALL_CPUS);
-		}
+		/* Shrink the max buffer again */
+		if (ring_buffer_expanded && type->use_max_tr)
+			ring_buffer_resize(max_tr.buffer, 1,
+						RING_BUFFER_ALL_CPUS);
 
 		printk(KERN_CONT "PASSED\n");
 	}
@@ -931,9 +922,6 @@ void tracing_reset(struct trace_array *tr, int cpu)
 {
 	struct ring_buffer *buffer = tr->buffer;
 
-	if (!buffer)
-		return;
-
 	ring_buffer_record_disable(buffer);
 
 	/* Make sure all commits have finished */
@@ -948,9 +936,6 @@ void tracing_reset_online_cpus(struct trace_array *tr)
 	struct ring_buffer *buffer = tr->buffer;
 	int cpu;
 
-	if (!buffer)
-		return;
-
 	ring_buffer_record_disable(buffer);
 
 	/* Make sure all commits have finished */
@@ -1182,6 +1167,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
 
 	entry->preempt_count		= pc & 0xff;
 	entry->pid			= (tsk) ? tsk->pid : 0;
+	entry->padding			= 0;
 	entry->flags =
 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -1349,7 +1335,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
 	 */
 	preempt_disable_notrace();
 
-	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
+	use_stack = ++__get_cpu_var(ftrace_stack_reserve);
 	/*
 	 * We don't need any atomic variables, just a barrier.
 	 * If an interrupt comes in, we don't care, because it would
@@ -1403,7 +1389,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
  out:
 	/* Again, don't let gcc optimize things here */
 	barrier();
-	__this_cpu_dec(ftrace_stack_reserve);
+	__get_cpu_var(ftrace_stack_reserve)--;
 	preempt_enable_notrace();
 
 }
@@ -1531,6 +1517,7 @@ static struct trace_buffer_struct *trace_percpu_nmi_buffer;
 static char *get_trace_buf(void)
 {
 	struct trace_buffer_struct *percpu_buffer;
+	struct trace_buffer_struct *buffer;
 
 	/*
 	 * If we have allocated per cpu buffers, then we do not
@@ -1548,7 +1535,9 @@ static char *get_trace_buf(void)
 	if (!percpu_buffer)
 		return NULL;
 
-	return this_cpu_ptr(&percpu_buffer->buffer[0]);
+	buffer = per_cpu_ptr(percpu_buffer, smp_processor_id());
+
+	return buffer->buffer;
 }
 
 static int alloc_percpu_trace_buffer(void)
@@ -1953,27 +1942,21 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu)
 static void *s_start(struct seq_file *m, loff_t *pos)
 {
 	struct trace_iterator *iter = m->private;
+	static struct tracer *old_tracer;
 	int cpu_file = iter->cpu_file;
 	void *p = NULL;
 	loff_t l = 0;
 	int cpu;
 
-	/*
-	 * copy the tracer to avoid using a global lock all around.
-	 * iter->trace is a copy of current_trace, the pointer to the
-	 * name may be used instead of a strcmp(), as iter->trace->name
-	 * will point to the same string as current_trace->name.
-	 */
+	/* copy the tracer to avoid using a global lock all around */
 	mutex_lock(&trace_types_lock);
-	if (unlikely(current_trace && iter->trace->name != current_trace->name))
+	if (unlikely(old_tracer != current_trace && current_trace)) {
+		old_tracer = current_trace;
 		*iter->trace = *current_trace;
+	}
 	mutex_unlock(&trace_types_lock);
 
-	if (iter->snapshot && iter->trace->use_max_tr)
-		return ERR_PTR(-EBUSY);
-
-	if (!iter->snapshot)
-		atomic_inc(&trace_record_cmdline_disabled);
+	atomic_inc(&trace_record_cmdline_disabled);
 
 	if (*pos != iter->pos) {
 		iter->ent = NULL;
@@ -2012,11 +1995,7 @@ static void s_stop(struct seq_file *m, void *p)
 {
 	struct trace_iterator *iter = m->private;
 
-	if (iter->snapshot && iter->trace->use_max_tr)
-		return;
-
-	if (!iter->snapshot)
-		atomic_dec(&trace_record_cmdline_disabled);
+	atomic_dec(&trace_record_cmdline_disabled);
 	trace_access_unlock(iter->cpu_file);
 	trace_event_read_unlock();
 }
@@ -2101,7 +2080,8 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
 	unsigned long total;
 	const char *name = "preemption";
 
-	name = type->name;
+	if (type)
+		name = type->name;
 
 	get_total_entries(tr, &total, &entries);
 
@@ -2450,7 +2430,7 @@ static const struct seq_operations tracer_seq_ops = {
 };
 
 static struct trace_iterator *
-__tracing_open(struct inode *inode, struct file *file, bool snapshot)
+__tracing_open(struct inode *inode, struct file *file)
 {
 	long cpu_file = (long) inode->i_private;
 	struct trace_iterator *iter;
@@ -2477,16 +2457,16 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
 	if (!iter->trace)
 		goto fail;
 
-	*iter->trace = *current_trace;
+	if (current_trace)
+		*iter->trace = *current_trace;
 
 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
 		goto fail;
 
-	if (current_trace->print_max || snapshot)
+	if (current_trace && current_trace->print_max)
 		iter->tr = &max_tr;
 	else
 		iter->tr = &global_trace;
-	iter->snapshot = snapshot;
 	iter->pos = -1;
 	mutex_init(&iter->mutex);
 	iter->cpu_file = cpu_file;
@@ -2503,9 +2483,8 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
 	if (trace_clocks[trace_clock_id].in_ns)
 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
 
-	/* stop the trace while dumping if we are not opening "snapshot" */
-	if (!iter->snapshot)
-		tracing_stop();
+	/* stop the trace while dumping */
+	tracing_stop();
 
 	if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
 		for_each_tracing_cpu(cpu) {
@@ -2568,9 +2547,8 @@ static int tracing_release(struct inode *inode, struct file *file)
 	if (iter->trace && iter->trace->close)
 		iter->trace->close(iter);
 
-	if (!iter->snapshot)
-		/* reenable tracing if it was previously enabled */
-		tracing_start();
+	/* reenable tracing if it was previously enabled */
+	tracing_start();
 	mutex_unlock(&trace_types_lock);
 
 	mutex_destroy(&iter->mutex);
@@ -2598,7 +2576,7 @@ static int tracing_open(struct inode *inode, struct file *file)
 	}
 
 	if (file->f_mode & FMODE_READ) {
-		iter = __tracing_open(inode, file, false);
+		iter = __tracing_open(inode, file);
 		if (IS_ERR(iter))
 			ret = PTR_ERR(iter);
 		else if (trace_flags & TRACE_ITER_LATENCY_FMT)
@@ -3036,7 +3014,10 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf,
 	int r;
 
 	mutex_lock(&trace_types_lock);
-	r = sprintf(buf, "%s\n", current_trace->name);
+	if (current_trace)
+		r = sprintf(buf, "%s\n", current_trace->name);
+	else
+		r = sprintf(buf, "\n");
 	mutex_unlock(&trace_types_lock);
 
 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
@@ -3202,7 +3183,6 @@ static int tracing_set_tracer(const char *buf)
 	static struct trace_option_dentry *topts;
 	struct trace_array *tr = &global_trace;
 	struct tracer *t;
-	bool had_max_tr;
 	int ret = 0;
 
 	mutex_lock(&trace_types_lock);
@@ -3227,21 +3207,9 @@ static int tracing_set_tracer(const char *buf)
 		goto out;
 
 	trace_branch_disable();
-	if (current_trace->reset)
+	if (current_trace && current_trace->reset)
 		current_trace->reset(tr);
-
-	had_max_tr = current_trace->allocated_snapshot;
-	current_trace = &nop_trace;
-
-	if (had_max_tr && !t->use_max_tr) {
-		/*
-		 * We need to make sure that the update_max_tr sees that
-		 * current_trace changed to nop_trace to keep it from
-		 * swapping the buffers after we resize it.
-		 * The update_max_tr is called from interrupts disabled
-		 * so a synchronized_sched() is sufficient.
-		 */
-		synchronize_sched();
+	if (current_trace && current_trace->use_max_tr) {
 		/*
 		 * We don't free the ring buffer. instead, resize it because
 		 * The max_tr ring buffer has some state (e.g. ring->clock) and
@@ -3249,19 +3217,18 @@ static int tracing_set_tracer(const char *buf)
 		 */
 		ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS);
 		set_buffer_entries(&max_tr, 1);
-		tracing_reset_online_cpus(&max_tr);
-		current_trace->allocated_snapshot = false;
 	}
 	destroy_trace_option_files(topts);
 
+	current_trace = &nop_trace;
+
 	topts = create_trace_option_files(t);
-	if (t->use_max_tr && !had_max_tr) {
+	if (t->use_max_tr) {
 		/* we need to make per cpu buffer sizes equivalent */
 		ret = resize_buffer_duplicate_size(&max_tr, &global_trace,
 						   RING_BUFFER_ALL_CPUS);
 		if (ret < 0)
 			goto out;
-		t->allocated_snapshot = true;
 	}
 
 	if (t->init) {
@@ -3369,7 +3336,8 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
 		ret = -ENOMEM;
 		goto fail;
 	}
-	*iter->trace = *current_trace;
+	if (current_trace)
+		*iter->trace = *current_trace;
 
 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
 		ret = -ENOMEM;
@@ -3509,6 +3477,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 		  size_t cnt, loff_t *ppos)
 {
 	struct trace_iterator *iter = filp->private_data;
+	static struct tracer *old_tracer;
 	ssize_t sret;
 
 	/* return any leftover data */
@@ -3520,8 +3489,10 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 
 	/* copy the tracer to avoid using a global lock all around */
 	mutex_lock(&trace_types_lock);
-	if (unlikely(iter->trace->name != current_trace->name))
+	if (unlikely(old_tracer != current_trace && current_trace)) {
+		old_tracer = current_trace;
 		*iter->trace = *current_trace;
+	}
 	mutex_unlock(&trace_types_lock);
 
 	/*
@@ -3677,6 +3648,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
 		.ops		= &tracing_pipe_buf_ops,
 		.spd_release	= tracing_spd_release_pipe,
 	};
+	static struct tracer *old_tracer;
 	ssize_t ret;
 	size_t rem;
 	unsigned int i;
@@ -3686,8 +3658,10 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
 
 	/* copy the tracer to avoid using a global lock all around */
 	mutex_lock(&trace_types_lock);
-	if (unlikely(iter->trace->name != current_trace->name))
+	if (unlikely(old_tracer != current_trace && current_trace)) {
+		old_tracer = current_trace;
 		*iter->trace = *current_trace;
+	}
 	mutex_unlock(&trace_types_lock);
 
 	mutex_lock(&iter->mutex);
@@ -4063,7 +4037,8 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
 	 * Reset the buffer so that it doesn't have incomparable timestamps.
 	 */
 	tracing_reset_online_cpus(&global_trace);
-	tracing_reset_online_cpus(&max_tr);
+	if (max_tr.buffer)
+		tracing_reset_online_cpus(&max_tr);
 
 	mutex_unlock(&trace_types_lock);
 
@@ -4079,87 +4054,6 @@ static int tracing_clock_open(struct inode *inode, struct file *file)
 	return single_open(file, tracing_clock_show, NULL);
 }
 
-#ifdef CONFIG_TRACER_SNAPSHOT
-static int tracing_snapshot_open(struct inode *inode, struct file *file)
-{
-	struct trace_iterator *iter;
-	int ret = 0;
-
-	if (file->f_mode & FMODE_READ) {
-		iter = __tracing_open(inode, file, true);
-		if (IS_ERR(iter))
-			ret = PTR_ERR(iter);
-	}
-	return ret;
-}
-
-static ssize_t
-tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
-		       loff_t *ppos)
-{
-	unsigned long val;
-	int ret;
-
-	ret = tracing_update_buffers();
-	if (ret < 0)
-		return ret;
-
-	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
-	if (ret)
-		return ret;
-
-	mutex_lock(&trace_types_lock);
-
-	if (current_trace->use_max_tr) {
-		ret = -EBUSY;
-		goto out;
-	}
-
-	switch (val) {
-	case 0:
-		if (current_trace->allocated_snapshot) {
-			/* free spare buffer */
-			ring_buffer_resize(max_tr.buffer, 1,
-					   RING_BUFFER_ALL_CPUS);
-			set_buffer_entries(&max_tr, 1);
-			tracing_reset_online_cpus(&max_tr);
-			current_trace->allocated_snapshot = false;
-		}
-		break;
-	case 1:
-		if (!current_trace->allocated_snapshot) {
-			/* allocate spare buffer */
-			ret = resize_buffer_duplicate_size(&max_tr,
-					&global_trace, RING_BUFFER_ALL_CPUS);
-			if (ret < 0)
-				break;
-			current_trace->allocated_snapshot = true;
-		}
-
-		local_irq_disable();
-		/* Now, we're going to swap */
-		update_max_tr(&global_trace, current, smp_processor_id());
-		local_irq_enable();
-		break;
-	default:
-		if (current_trace->allocated_snapshot)
-			tracing_reset_online_cpus(&max_tr);
-		else
-			ret = -EINVAL;
-		break;
-	}
-
-	if (ret >= 0) {
-		*ppos += cnt;
-		ret = cnt;
-	}
-out:
-	mutex_unlock(&trace_types_lock);
-	return ret;
-}
-#endif /* CONFIG_TRACER_SNAPSHOT */
-
-
 static const struct file_operations tracing_max_lat_fops = {
 	.open		= tracing_open_generic,
 	.read		= tracing_max_lat_read,
@@ -4216,16 +4110,6 @@ static const struct file_operations trace_clock_fops = {
 	.write		= tracing_clock_write,
 };
 
-#ifdef CONFIG_TRACER_SNAPSHOT
-static const struct file_operations snapshot_fops = {
-	.open		= tracing_snapshot_open,
-	.read		= seq_read,
-	.write		= tracing_snapshot_write,
-	.llseek		= tracing_seek,
-	.release	= tracing_release,
-};
-#endif /* CONFIG_TRACER_SNAPSHOT */
-
 struct ftrace_buffer_info {
 	struct trace_array	*tr;
 	void			*spare;
@@ -4530,9 +4414,6 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
 	cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu);
 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
 
-	cnt = ring_buffer_read_events_cpu(tr->buffer, cpu);
-	trace_seq_printf(s, "read events: %ld\n", cnt);
-
 	count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
 
 	kfree(s);
@@ -4609,7 +4490,7 @@ struct dentry *tracing_init_dentry(void)
 
 static struct dentry *d_percpu;
 
-static struct dentry *tracing_dentry_percpu(void)
+struct dentry *tracing_dentry_percpu(void)
 {
 	static int once;
 	struct dentry *d_tracer;
@@ -5025,11 +4906,6 @@ static __init int tracer_init_debugfs(void)
 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
 #endif
 
-#ifdef CONFIG_TRACER_SNAPSHOT
-	trace_create_file("snapshot", 0644, d_tracer,
-			  (void *) TRACE_PIPE_ALL_CPU, &snapshot_fops);
-#endif
-
 	create_trace_options_dir();
 
 	for_each_tracing_cpu(cpu)
@@ -5138,7 +5014,6 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
 	if (disable_tracing)
 		ftrace_kill();
 
-	/* Simulate the iterator */
 	trace_init_global_iter(&iter);
 
 	for_each_tracing_cpu(cpu) {
@@ -5150,6 +5025,10 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
 	/* don't look at user memory in panic mode */
 	trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
 
+	/* Simulate the iterator */
+	iter.tr = &global_trace;
+	iter.trace = current_trace;
+
 	switch (oops_dump_mode) {
 	case DUMP_ALL:
 		iter.cpu_file = TRACE_PIPE_ALL_CPU;
@@ -5294,7 +5173,7 @@ __init static int tracer_alloc_buffers(void)
 	init_irq_work(&trace_work_wakeup, trace_wake_up);
 
 	register_tracer(&nop_trace);
-
+	current_trace = &nop_trace;
 	/* All seems OK, enable tracing */
 	tracing_disabled = 0;
 
diff --git a/trunk/kernel/trace/trace.h b/trunk/kernel/trace/trace.h
index 57d7e5397d56..c75d7988902c 100644
--- a/trunk/kernel/trace/trace.h
+++ b/trunk/kernel/trace/trace.h
@@ -287,62 +287,20 @@ struct tracer {
 	struct tracer_flags	*flags;
 	bool			print_max;
 	bool			use_max_tr;
-	bool			allocated_snapshot;
 };
 
 
 /* Only current can touch trace_recursion */
+#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
+#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
 
-/*
- * For function tracing recursion:
- *  The order of these bits are important.
- *
- *  When function tracing occurs, the following steps are made:
- *   If arch does not support a ftrace feature:
- *    call internal function (uses INTERNAL bits) which calls...
- *   If callback is registered to the "global" list, the list
- *    function is called and recursion checks the GLOBAL bits.
- *    then this function calls...
- *   The function callback, which can use the FTRACE bits to
- *    check for recursion.
- *
- * Now if the arch does not suppport a feature, and it calls
- * the global list function which calls the ftrace callback
- * all three of these steps will do a recursion protection.
- * There's no reason to do one if the previous caller already
- * did. The recursion that we are protecting against will
- * go through the same steps again.
- *
- * To prevent the multiple recursion checks, if a recursion
- * bit is set that is higher than the MAX bit of the current
- * check, then we know that the check was made by the previous
- * caller, and we can skip the current check.
- */
-enum {
-	TRACE_BUFFER_BIT,
-	TRACE_BUFFER_NMI_BIT,
-	TRACE_BUFFER_IRQ_BIT,
-	TRACE_BUFFER_SIRQ_BIT,
-
-	/* Start of function recursion bits */
-	TRACE_FTRACE_BIT,
-	TRACE_FTRACE_NMI_BIT,
-	TRACE_FTRACE_IRQ_BIT,
-	TRACE_FTRACE_SIRQ_BIT,
-
-	/* GLOBAL_BITs must be greater than FTRACE_BITs */
-	TRACE_GLOBAL_BIT,
-	TRACE_GLOBAL_NMI_BIT,
-	TRACE_GLOBAL_IRQ_BIT,
-	TRACE_GLOBAL_SIRQ_BIT,
-
-	/* INTERNAL_BITs must be greater than GLOBAL_BITs */
-	TRACE_INTERNAL_BIT,
-	TRACE_INTERNAL_NMI_BIT,
-	TRACE_INTERNAL_IRQ_BIT,
-	TRACE_INTERNAL_SIRQ_BIT,
-
-	TRACE_CONTROL_BIT,
+/* Ring buffer has the 10 LSB bits to count */
+#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff)
+
+/* for function tracing recursion */
+#define TRACE_INTERNAL_BIT		(1<<11)
+#define TRACE_GLOBAL_BIT		(1<<12)
+#define TRACE_CONTROL_BIT		(1<<13)
 
 /*
  * Abuse of the trace_recursion.
@@ -351,77 +309,11 @@ enum {
  * was called in irq context but we have irq tracing off. Since this
  * can only be modified by current, we can reuse trace_recursion.
  */
-	TRACE_IRQ_BIT,
-};
-
-#define trace_recursion_set(bit)	do { (current)->trace_recursion |= (1<<(bit)); } while (0)
-#define trace_recursion_clear(bit)	do { (current)->trace_recursion &= ~(1<<(bit)); } while (0)
-#define trace_recursion_test(bit)	((current)->trace_recursion & (1<<(bit)))
-
-#define TRACE_CONTEXT_BITS	4
-
-#define TRACE_FTRACE_START	TRACE_FTRACE_BIT
-#define TRACE_FTRACE_MAX	((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1)
-
-#define TRACE_GLOBAL_START	TRACE_GLOBAL_BIT
-#define TRACE_GLOBAL_MAX	((1 << (TRACE_GLOBAL_START + TRACE_CONTEXT_BITS)) - 1)
-
-#define TRACE_LIST_START	TRACE_INTERNAL_BIT
-#define TRACE_LIST_MAX		((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
-
-#define TRACE_CONTEXT_MASK	TRACE_LIST_MAX
-
-static __always_inline int trace_get_context_bit(void)
-{
-	int bit;
+#define TRACE_IRQ_BIT			(1<<13)
 
-	if (in_interrupt()) {
-		if (in_nmi())
-			bit = 0;
-
-		else if (in_irq())
-			bit = 1;
-		else
-			bit = 2;
-	} else
-		bit = 3;
-
-	return bit;
-}
-
-static __always_inline int trace_test_and_set_recursion(int start, int max)
-{
-	unsigned int val = current->trace_recursion;
-	int bit;
-
-	/* A previous recursion check was made */
-	if ((val & TRACE_CONTEXT_MASK) > max)
-		return 0;
-
-	bit = trace_get_context_bit() + start;
-	if (unlikely(val & (1 << bit)))
-		return -1;
-
-	val |= 1 << bit;
-	current->trace_recursion = val;
-	barrier();
-
-	return bit;
-}
-
-static __always_inline void trace_clear_recursion(int bit)
-{
-	unsigned int val = current->trace_recursion;
-
-	if (!bit)
-		return;
-
-	bit = 1 << bit;
-	val &= ~bit;
-
-	barrier();
-	current->trace_recursion = val;
-}
+#define trace_recursion_set(bit)	do { (current)->trace_recursion |= (bit); } while (0)
+#define trace_recursion_clear(bit)	do { (current)->trace_recursion &= ~(bit); } while (0)
+#define trace_recursion_test(bit)	((current)->trace_recursion & (bit))
 
 #define TRACE_PIPE_ALL_CPU	-1
 
diff --git a/trunk/kernel/trace/trace_clock.c b/trunk/kernel/trace/trace_clock.c
index aa8f5f48dae6..394783531cbb 100644
--- a/trunk/kernel/trace/trace_clock.c
+++ b/trunk/kernel/trace/trace_clock.c
@@ -21,6 +21,8 @@
 #include <linux/ktime.h>
 #include <linux/trace_clock.h>
 
+#include "trace.h"
+
 /*
  * trace_clock_local(): the simplest and least coherent tracing clock.
  *
@@ -42,7 +44,6 @@ u64 notrace trace_clock_local(void)
 
 	return clock;
 }
-EXPORT_SYMBOL_GPL(trace_clock_local);
 
 /*
  * trace_clock(): 'between' trace clock. Not completely serialized,
@@ -85,7 +86,7 @@ u64 notrace trace_clock_global(void)
 	local_irq_save(flags);
 
 	this_cpu = raw_smp_processor_id();
-	now = sched_clock_cpu(this_cpu);
+	now = cpu_clock(this_cpu);
 	/*
 	 * If in an NMI context then dont risk lockups and return the
 	 * cpu_clock() time:
diff --git a/trunk/kernel/trace/trace_events.c b/trunk/kernel/trace/trace_events.c
index 57e9b284250c..880073d0b946 100644
--- a/trunk/kernel/trace/trace_events.c
+++ b/trunk/kernel/trace/trace_events.c
@@ -116,6 +116,7 @@ static int trace_define_common_fields(void)
 	__common_field(unsigned char, flags);
 	__common_field(unsigned char, preempt_count);
 	__common_field(int, pid);
+	__common_field(int, padding);
 
 	return ret;
 }
diff --git a/trunk/kernel/trace/trace_functions.c b/trunk/kernel/trace/trace_functions.c
index 601152523326..8e3ad8082ab7 100644
--- a/trunk/kernel/trace/trace_functions.c
+++ b/trunk/kernel/trace/trace_functions.c
@@ -47,6 +47,34 @@ static void function_trace_start(struct trace_array *tr)
 	tracing_reset_online_cpus(tr);
 }
 
+static void
+function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip,
+				 struct ftrace_ops *op, struct pt_regs *pt_regs)
+{
+	struct trace_array *tr = func_trace;
+	struct trace_array_cpu *data;
+	unsigned long flags;
+	long disabled;
+	int cpu;
+	int pc;
+
+	if (unlikely(!ftrace_function_enabled))
+		return;
+
+	pc = preempt_count();
+	preempt_disable_notrace();
+	local_save_flags(flags);
+	cpu = raw_smp_processor_id();
+	data = tr->data[cpu];
+	disabled = atomic_inc_return(&data->disabled);
+
+	if (likely(disabled == 1))
+		trace_function(tr, ip, parent_ip, flags, pc);
+
+	atomic_dec(&data->disabled);
+	preempt_enable_notrace();
+}
+
 /* Our option */
 enum {
 	TRACE_FUNC_OPT_STACK	= 0x1,
@@ -57,34 +85,34 @@ static struct tracer_flags func_flags;
 static void
 function_trace_call(unsigned long ip, unsigned long parent_ip,
 		    struct ftrace_ops *op, struct pt_regs *pt_regs)
+
 {
 	struct trace_array *tr = func_trace;
 	struct trace_array_cpu *data;
 	unsigned long flags;
-	int bit;
+	long disabled;
 	int cpu;
 	int pc;
 
 	if (unlikely(!ftrace_function_enabled))
 		return;
 
-	pc = preempt_count();
-	preempt_disable_notrace();
-
-	bit = trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX);
-	if (bit < 0)
-		goto out;
-
-	cpu = smp_processor_id();
+	/*
+	 * Need to use raw, since this must be called before the
+	 * recursive protection is performed.
+	 */
+	local_irq_save(flags);
+	cpu = raw_smp_processor_id();
 	data = tr->data[cpu];
-	if (!atomic_read(&data->disabled)) {
-		local_save_flags(flags);
+	disabled = atomic_inc_return(&data->disabled);
+
+	if (likely(disabled == 1)) {
+		pc = preempt_count();
 		trace_function(tr, ip, parent_ip, flags, pc);
 	}
-	trace_clear_recursion(bit);
 
- out:
-	preempt_enable_notrace();
+	atomic_dec(&data->disabled);
+	local_irq_restore(flags);
 }
 
 static void
@@ -157,6 +185,11 @@ static void tracing_start_function_trace(void)
 {
 	ftrace_function_enabled = 0;
 
+	if (trace_flags & TRACE_ITER_PREEMPTONLY)
+		trace_ops.func = function_trace_call_preempt_only;
+	else
+		trace_ops.func = function_trace_call;
+
 	if (func_flags.val & TRACE_FUNC_OPT_STACK)
 		register_ftrace_function(&trace_stack_ops);
 	else
diff --git a/trunk/kernel/trace/trace_functions_graph.c b/trunk/kernel/trace/trace_functions_graph.c
index 39ada66389cc..4edb4b74eb7e 100644
--- a/trunk/kernel/trace/trace_functions_graph.c
+++ b/trunk/kernel/trace/trace_functions_graph.c
@@ -47,8 +47,6 @@ struct fgraph_data {
 #define TRACE_GRAPH_PRINT_ABS_TIME	0x20
 #define TRACE_GRAPH_PRINT_IRQS		0x40
 
-static unsigned int max_depth;
-
 static struct tracer_opt trace_opts[] = {
 	/* Display overruns? (for self-debug purpose) */
 	{ TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
@@ -191,16 +189,10 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
 
 	ftrace_pop_return_trace(&trace, &ret, frame_pointer);
 	trace.rettime = trace_clock_local();
+	ftrace_graph_return(&trace);
 	barrier();
 	current->curr_ret_stack--;
 
-	/*
-	 * The trace should run after decrementing the ret counter
-	 * in case an interrupt were to come in. We don't want to
-	 * lose the interrupt if max_depth is set.
-	 */
-	ftrace_graph_return(&trace);
-
 	if (unlikely(!ret)) {
 		ftrace_graph_stop();
 		WARN_ON(1);
@@ -258,9 +250,8 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
 		return 0;
 
 	/* trace it when it is-nested-in or is a function enabled. */
-	if ((!(trace->depth || ftrace_graph_addr(trace->func)) ||
-	     ftrace_graph_ignore_irqs()) ||
-	    (max_depth && trace->depth >= max_depth))
+	if (!(trace->depth || ftrace_graph_addr(trace->func)) ||
+	      ftrace_graph_ignore_irqs())
 		return 0;
 
 	local_irq_save(flags);
@@ -1466,59 +1457,6 @@ static struct tracer graph_trace __read_mostly = {
 #endif
 };
 
-
-static ssize_t
-graph_depth_write(struct file *filp, const char __user *ubuf, size_t cnt,
-		  loff_t *ppos)
-{
-	unsigned long val;
-	int ret;
-
-	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
-	if (ret)
-		return ret;
-
-	max_depth = val;
-
-	*ppos += cnt;
-
-	return cnt;
-}
-
-static ssize_t
-graph_depth_read(struct file *filp, char __user *ubuf, size_t cnt,
-		 loff_t *ppos)
-{
-	char buf[15]; /* More than enough to hold UINT_MAX + "\n"*/
-	int n;
-
-	n = sprintf(buf, "%d\n", max_depth);
-
-	return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
-}
-
-static const struct file_operations graph_depth_fops = {
-	.open		= tracing_open_generic,
-	.write		= graph_depth_write,
-	.read		= graph_depth_read,
-	.llseek		= generic_file_llseek,
-};
-
-static __init int init_graph_debugfs(void)
-{
-	struct dentry *d_tracer;
-
-	d_tracer = tracing_init_dentry();
-	if (!d_tracer)
-		return 0;
-
-	trace_create_file("max_graph_depth", 0644, d_tracer,
-			  NULL, &graph_depth_fops);
-
-	return 0;
-}
-fs_initcall(init_graph_debugfs);
-
 static __init int init_graph_trace(void)
 {
 	max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1);
diff --git a/trunk/kernel/trace/trace_probe.h b/trunk/kernel/trace/trace_probe.h
index 5c7e09d10d74..933708677814 100644
--- a/trunk/kernel/trace/trace_probe.h
+++ b/trunk/kernel/trace/trace_probe.h
@@ -66,6 +66,7 @@
 #define TP_FLAG_TRACE		1
 #define TP_FLAG_PROFILE		2
 #define TP_FLAG_REGISTERED	4
+#define TP_FLAG_UPROBE		8
 
 
 /* data_rloc: data relative location, compatible with u32 */
diff --git a/trunk/kernel/trace/trace_sched_wakeup.c b/trunk/kernel/trace/trace_sched_wakeup.c
index 75aa97fbe1a1..9fe45fcefca0 100644
--- a/trunk/kernel/trace/trace_sched_wakeup.c
+++ b/trunk/kernel/trace/trace_sched_wakeup.c
@@ -15,8 +15,8 @@
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
-#include <linux/sched/rt.h>
 #include <trace/events/sched.h>
+
 #include "trace.h"
 
 static struct trace_array	*wakeup_trace;
diff --git a/trunk/kernel/trace/trace_selftest.c b/trunk/kernel/trace/trace_selftest.c
index 51c819c12c29..47623169a815 100644
--- a/trunk/kernel/trace/trace_selftest.c
+++ b/trunk/kernel/trace/trace_selftest.c
@@ -415,8 +415,7 @@ static void trace_selftest_test_recursion_func(unsigned long ip,
 	 * The ftrace infrastructure should provide the recursion
 	 * protection. If not, this will crash the kernel!
 	 */
-	if (trace_selftest_recursion_cnt++ > 10)
-		return;
+	trace_selftest_recursion_cnt++;
 	DYN_FTRACE_TEST_NAME();
 }
 
@@ -453,6 +452,7 @@ trace_selftest_function_recursion(void)
 	char *func_name;
 	int len;
 	int ret;
+	int cnt;
 
 	/* The previous test PASSED */
 	pr_cont("PASSED\n");
@@ -510,10 +510,19 @@ trace_selftest_function_recursion(void)
 
 	unregister_ftrace_function(&test_recsafe_probe);
 
+	/*
+	 * If arch supports all ftrace features, and no other task
+	 * was on the list, we should be fine.
+	 */
+	if (!ftrace_nr_registered_ops() && !FTRACE_FORCE_LIST_FUNC)
+		cnt = 2; /* Should have recursed */
+	else
+		cnt = 1;
+
 	ret = -1;
-	if (trace_selftest_recursion_cnt != 2) {
-		pr_cont("*callback not called expected 2 times (%d)* ",
-			trace_selftest_recursion_cnt);
+	if (trace_selftest_recursion_cnt != cnt) {
+		pr_cont("*callback not called expected %d times (%d)* ",
+			cnt, trace_selftest_recursion_cnt);
 		goto out;
 	}
 
@@ -559,7 +568,7 @@ trace_selftest_function_regs(void)
 	int ret;
 	int supported = 0;
 
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
 	supported = 1;
 #endif
 
diff --git a/trunk/kernel/trace/trace_syscalls.c b/trunk/kernel/trace/trace_syscalls.c
index 5329e13e74a1..7609dd6714c2 100644
--- a/trunk/kernel/trace/trace_syscalls.c
+++ b/trunk/kernel/trace/trace_syscalls.c
@@ -77,7 +77,7 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr)
 	return syscalls_metadata[nr];
 }
 
-static enum print_line_t
+enum print_line_t
 print_syscall_enter(struct trace_iterator *iter, int flags,
 		    struct trace_event *event)
 {
@@ -130,7 +130,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags,
 	return TRACE_TYPE_HANDLED;
 }
 
-static enum print_line_t
+enum print_line_t
 print_syscall_exit(struct trace_iterator *iter, int flags,
 		   struct trace_event *event)
 {
@@ -270,7 +270,7 @@ static int syscall_exit_define_fields(struct ftrace_event_call *call)
 	return ret;
 }
 
-static void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
+void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
 {
 	struct syscall_trace_enter *entry;
 	struct syscall_metadata *sys_data;
@@ -305,7 +305,7 @@ static void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
 		trace_current_buffer_unlock_commit(buffer, event, 0, 0);
 }
 
-static void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
+void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
 {
 	struct syscall_trace_exit *entry;
 	struct syscall_metadata *sys_data;
@@ -337,7 +337,7 @@ static void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
 		trace_current_buffer_unlock_commit(buffer, event, 0, 0);
 }
 
-static int reg_event_syscall_enter(struct ftrace_event_call *call)
+int reg_event_syscall_enter(struct ftrace_event_call *call)
 {
 	int ret = 0;
 	int num;
@@ -356,7 +356,7 @@ static int reg_event_syscall_enter(struct ftrace_event_call *call)
 	return ret;
 }
 
-static void unreg_event_syscall_enter(struct ftrace_event_call *call)
+void unreg_event_syscall_enter(struct ftrace_event_call *call)
 {
 	int num;
 
@@ -371,7 +371,7 @@ static void unreg_event_syscall_enter(struct ftrace_event_call *call)
 	mutex_unlock(&syscall_trace_lock);
 }
 
-static int reg_event_syscall_exit(struct ftrace_event_call *call)
+int reg_event_syscall_exit(struct ftrace_event_call *call)
 {
 	int ret = 0;
 	int num;
@@ -390,7 +390,7 @@ static int reg_event_syscall_exit(struct ftrace_event_call *call)
 	return ret;
 }
 
-static void unreg_event_syscall_exit(struct ftrace_event_call *call)
+void unreg_event_syscall_exit(struct ftrace_event_call *call)
 {
 	int num;
 
@@ -459,7 +459,7 @@ unsigned long __init __weak arch_syscall_addr(int nr)
 	return (unsigned long)sys_call_table[nr];
 }
 
-static int __init init_ftrace_syscalls(void)
+int __init init_ftrace_syscalls(void)
 {
 	struct syscall_metadata *meta;
 	unsigned long addr;
diff --git a/trunk/kernel/trace/trace_uprobe.c b/trunk/kernel/trace/trace_uprobe.c
index 8dad2a92dee9..c86e6d4f67fb 100644
--- a/trunk/kernel/trace/trace_uprobe.c
+++ b/trunk/kernel/trace/trace_uprobe.c
@@ -28,21 +28,20 @@
 
 #define UPROBE_EVENT_SYSTEM	"uprobes"
 
-struct trace_uprobe_filter {
-	rwlock_t		rwlock;
-	int			nr_systemwide;
-	struct list_head	perf_events;
-};
-
 /*
  * uprobe event core functions
  */
+struct trace_uprobe;
+struct uprobe_trace_consumer {
+	struct uprobe_consumer		cons;
+	struct trace_uprobe		*tu;
+};
+
 struct trace_uprobe {
 	struct list_head		list;
 	struct ftrace_event_class	class;
 	struct ftrace_event_call	call;
-	struct trace_uprobe_filter	filter;
-	struct uprobe_consumer		consumer;
+	struct uprobe_trace_consumer	*consumer;
 	struct inode			*inode;
 	char				*filename;
 	unsigned long			offset;
@@ -65,18 +64,6 @@ static LIST_HEAD(uprobe_list);
 
 static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
 
-static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter)
-{
-	rwlock_init(&filter->rwlock);
-	filter->nr_systemwide = 0;
-	INIT_LIST_HEAD(&filter->perf_events);
-}
-
-static inline bool uprobe_filter_is_empty(struct trace_uprobe_filter *filter)
-{
-	return !filter->nr_systemwide && list_empty(&filter->perf_events);
-}
-
 /*
  * Allocate new trace_uprobe and initialize it (including uprobes).
  */
@@ -105,8 +92,6 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs)
 		goto error;
 
 	INIT_LIST_HEAD(&tu->list);
-	tu->consumer.handler = uprobe_dispatcher;
-	init_trace_uprobe_filter(&tu->filter);
 	return tu;
 
 error:
@@ -268,18 +253,12 @@ static int create_trace_uprobe(int argc, char **argv)
 	if (ret)
 		goto fail_address_parse;
 
-	inode = igrab(path.dentry->d_inode);
-	path_put(&path);
-
-	if (!inode || !S_ISREG(inode->i_mode)) {
-		ret = -EINVAL;
-		goto fail_address_parse;
-	}
-
 	ret = kstrtoul(arg, 0, &offset);
 	if (ret)
 		goto fail_address_parse;
 
+	inode = igrab(path.dentry->d_inode);
+
 	argc -= 2;
 	argv += 2;
 
@@ -377,7 +356,7 @@ static int create_trace_uprobe(int argc, char **argv)
 	if (inode)
 		iput(inode);
 
-	pr_info("Failed to parse address or file.\n");
+	pr_info("Failed to parse address.\n");
 
 	return ret;
 }
@@ -486,7 +465,7 @@ static const struct file_operations uprobe_profile_ops = {
 };
 
 /* uprobe handler */
-static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
+static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
 {
 	struct uprobe_trace_entry_head *entry;
 	struct ring_buffer_event *event;
@@ -496,6 +475,8 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
 	unsigned long irq_flags;
 	struct ftrace_event_call *call = &tu->call;
 
+	tu->nhit++;
+
 	local_save_flags(irq_flags);
 	pc = preempt_count();
 
@@ -504,18 +485,16 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
 	event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
 						  size, irq_flags, pc);
 	if (!event)
-		return 0;
+		return;
 
 	entry = ring_buffer_event_data(event);
-	entry->ip = instruction_pointer(task_pt_regs(current));
+	entry->ip = uprobe_get_swbp_addr(task_pt_regs(current));
 	data = (u8 *)&entry[1];
 	for (i = 0; i < tu->nr_args; i++)
 		call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
 
 	if (!filter_current_check_discard(buffer, call, entry, event))
 		trace_buffer_unlock_commit(buffer, event, irq_flags, pc);
-
-	return 0;
 }
 
 /* Event entry printers */
@@ -554,43 +533,42 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e
 	return TRACE_TYPE_PARTIAL_LINE;
 }
 
-static inline bool is_trace_uprobe_enabled(struct trace_uprobe *tu)
-{
-	return tu->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE);
-}
-
-typedef bool (*filter_func_t)(struct uprobe_consumer *self,
-				enum uprobe_filter_ctx ctx,
-				struct mm_struct *mm);
-
-static int
-probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter)
+static int probe_event_enable(struct trace_uprobe *tu, int flag)
 {
+	struct uprobe_trace_consumer *utc;
 	int ret = 0;
 
-	if (is_trace_uprobe_enabled(tu))
+	if (!tu->inode || tu->consumer)
 		return -EINTR;
 
-	WARN_ON(!uprobe_filter_is_empty(&tu->filter));
+	utc = kzalloc(sizeof(struct uprobe_trace_consumer), GFP_KERNEL);
+	if (!utc)
+		return -EINTR;
+
+	utc->cons.handler = uprobe_dispatcher;
+	utc->cons.filter = NULL;
+	ret = uprobe_register(tu->inode, tu->offset, &utc->cons);
+	if (ret) {
+		kfree(utc);
+		return ret;
+	}
 
 	tu->flags |= flag;
-	tu->consumer.filter = filter;
-	ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
-	if (ret)
-		tu->flags &= ~flag;
+	utc->tu = tu;
+	tu->consumer = utc;
 
-	return ret;
+	return 0;
 }
 
 static void probe_event_disable(struct trace_uprobe *tu, int flag)
 {
-	if (!is_trace_uprobe_enabled(tu))
+	if (!tu->inode || !tu->consumer)
 		return;
 
-	WARN_ON(!uprobe_filter_is_empty(&tu->filter));
-
-	uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
+	uprobe_unregister(tu->inode, tu->offset, &tu->consumer->cons);
 	tu->flags &= ~flag;
+	kfree(tu->consumer);
+	tu->consumer = NULL;
 }
 
 static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
@@ -664,96 +642,8 @@ static int set_print_fmt(struct trace_uprobe *tu)
 }
 
 #ifdef CONFIG_PERF_EVENTS
-static bool
-__uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm)
-{
-	struct perf_event *event;
-
-	if (filter->nr_systemwide)
-		return true;
-
-	list_for_each_entry(event, &filter->perf_events, hw.tp_list) {
-		if (event->hw.tp_target->mm == mm)
-			return true;
-	}
-
-	return false;
-}
-
-static inline bool
-uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event)
-{
-	return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);
-}
-
-static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
-{
-	bool done;
-
-	write_lock(&tu->filter.rwlock);
-	if (event->hw.tp_target) {
-		/*
-		 * event->parent != NULL means copy_process(), we can avoid
-		 * uprobe_apply(). current->mm must be probed and we can rely
-		 * on dup_mmap() which preserves the already installed bp's.
-		 *
-		 * attr.enable_on_exec means that exec/mmap will install the
-		 * breakpoints we need.
-		 */
-		done = tu->filter.nr_systemwide ||
-			event->parent || event->attr.enable_on_exec ||
-			uprobe_filter_event(tu, event);
-		list_add(&event->hw.tp_list, &tu->filter.perf_events);
-	} else {
-		done = tu->filter.nr_systemwide;
-		tu->filter.nr_systemwide++;
-	}
-	write_unlock(&tu->filter.rwlock);
-
-	if (!done)
-		uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
-
-	return 0;
-}
-
-static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
-{
-	bool done;
-
-	write_lock(&tu->filter.rwlock);
-	if (event->hw.tp_target) {
-		list_del(&event->hw.tp_list);
-		done = tu->filter.nr_systemwide ||
-			(event->hw.tp_target->flags & PF_EXITING) ||
-			uprobe_filter_event(tu, event);
-	} else {
-		tu->filter.nr_systemwide--;
-		done = tu->filter.nr_systemwide;
-	}
-	write_unlock(&tu->filter.rwlock);
-
-	if (!done)
-		uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
-
-	return 0;
-}
-
-static bool uprobe_perf_filter(struct uprobe_consumer *uc,
-				enum uprobe_filter_ctx ctx, struct mm_struct *mm)
-{
-	struct trace_uprobe *tu;
-	int ret;
-
-	tu = container_of(uc, struct trace_uprobe, consumer);
-	read_lock(&tu->filter.rwlock);
-	ret = __uprobe_perf_filter(&tu->filter, mm);
-	read_unlock(&tu->filter.rwlock);
-
-	return ret;
-}
-
 /* uprobe profile handler */
-static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
+static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
 {
 	struct ftrace_event_call *call = &tu->call;
 	struct uprobe_trace_entry_head *entry;
@@ -762,14 +652,11 @@ static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
 	int size, __size, i;
 	int rctx;
 
-	if (!uprobe_perf_filter(&tu->consumer, 0, current->mm))
-		return UPROBE_HANDLER_REMOVE;
-
 	__size = sizeof(*entry) + tu->size;
 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
 	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
-		return 0;
+		return;
 
 	preempt_disable();
 
@@ -777,7 +664,7 @@ static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
 	if (!entry)
 		goto out;
 
-	entry->ip = instruction_pointer(task_pt_regs(current));
+	entry->ip = uprobe_get_swbp_addr(task_pt_regs(current));
 	data = (u8 *)&entry[1];
 	for (i = 0; i < tu->nr_args; i++)
 		call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
@@ -787,7 +674,6 @@ static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
 
  out:
 	preempt_enable();
-	return 0;
 }
 #endif	/* CONFIG_PERF_EVENTS */
 
@@ -798,7 +684,7 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
 
 	switch (type) {
 	case TRACE_REG_REGISTER:
-		return probe_event_enable(tu, TP_FLAG_TRACE, NULL);
+		return probe_event_enable(tu, TP_FLAG_TRACE);
 
 	case TRACE_REG_UNREGISTER:
 		probe_event_disable(tu, TP_FLAG_TRACE);
@@ -806,18 +692,11 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
 
 #ifdef CONFIG_PERF_EVENTS
 	case TRACE_REG_PERF_REGISTER:
-		return probe_event_enable(tu, TP_FLAG_PROFILE, uprobe_perf_filter);
+		return probe_event_enable(tu, TP_FLAG_PROFILE);
 
 	case TRACE_REG_PERF_UNREGISTER:
 		probe_event_disable(tu, TP_FLAG_PROFILE);
 		return 0;
-
-	case TRACE_REG_PERF_OPEN:
-		return uprobe_perf_open(tu, data);
-
-	case TRACE_REG_PERF_CLOSE:
-		return uprobe_perf_close(tu, data);
-
 #endif
 	default:
 		return 0;
@@ -827,20 +706,22 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
 
 static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
 {
+	struct uprobe_trace_consumer *utc;
 	struct trace_uprobe *tu;
-	int ret = 0;
 
-	tu = container_of(con, struct trace_uprobe, consumer);
-	tu->nhit++;
+	utc = container_of(con, struct uprobe_trace_consumer, cons);
+	tu = utc->tu;
+	if (!tu || tu->consumer != utc)
+		return 0;
 
 	if (tu->flags & TP_FLAG_TRACE)
-		ret |= uprobe_trace_func(tu, regs);
+		uprobe_trace_func(tu, regs);
 
 #ifdef CONFIG_PERF_EVENTS
 	if (tu->flags & TP_FLAG_PROFILE)
-		ret |= uprobe_perf_func(tu, regs);
+		uprobe_perf_func(tu, regs);
 #endif
-	return ret;
+	return 0;
 }
 
 static struct trace_event_functions uprobe_funcs = {
diff --git a/trunk/kernel/tsacct.c b/trunk/kernel/tsacct.c
index a1dd9a1b1327..625df0b44690 100644
--- a/trunk/kernel/tsacct.c
+++ b/trunk/kernel/tsacct.c
@@ -32,7 +32,6 @@ void bacct_add_tsk(struct user_namespace *user_ns,
 {
 	const struct cred *tcred;
 	struct timespec uptime, ts;
-	cputime_t utime, stime, utimescaled, stimescaled;
 	u64 ac_etime;
 
 	BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN);
@@ -66,15 +65,10 @@ void bacct_add_tsk(struct user_namespace *user_ns,
 	stats->ac_ppid	 = pid_alive(tsk) ?
 		task_tgid_nr_ns(rcu_dereference(tsk->real_parent), pid_ns) : 0;
 	rcu_read_unlock();
-
-	task_cputime(tsk, &utime, &stime);
-	stats->ac_utime = cputime_to_usecs(utime);
-	stats->ac_stime = cputime_to_usecs(stime);
-
-	task_cputime_scaled(tsk, &utimescaled, &stimescaled);
-	stats->ac_utimescaled = cputime_to_usecs(utimescaled);
-	stats->ac_stimescaled = cputime_to_usecs(stimescaled);
-
+	stats->ac_utime = cputime_to_usecs(tsk->utime);
+	stats->ac_stime = cputime_to_usecs(tsk->stime);
+	stats->ac_utimescaled = cputime_to_usecs(tsk->utimescaled);
+	stats->ac_stimescaled = cputime_to_usecs(tsk->stimescaled);
 	stats->ac_minflt = tsk->min_flt;
 	stats->ac_majflt = tsk->maj_flt;
 
@@ -121,8 +115,11 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
 #undef KB
 #undef MB
 
-static void __acct_update_integrals(struct task_struct *tsk,
-				    cputime_t utime, cputime_t stime)
+/**
+ * acct_update_integrals - update mm integral fields in task_struct
+ * @tsk: task_struct for accounting
+ */
+void acct_update_integrals(struct task_struct *tsk)
 {
 	if (likely(tsk->mm)) {
 		cputime_t time, dtime;
@@ -131,7 +128,7 @@ static void __acct_update_integrals(struct task_struct *tsk,
 		u64 delta;
 
 		local_irq_save(flags);
-		time = stime + utime;
+		time = tsk->stime + tsk->utime;
 		dtime = time - tsk->acct_timexpd;
 		jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
 		delta = value.tv_sec;
@@ -147,27 +144,6 @@ static void __acct_update_integrals(struct task_struct *tsk,
 	}
 }
 
-/**
- * acct_update_integrals - update mm integral fields in task_struct
- * @tsk: task_struct for accounting
- */
-void acct_update_integrals(struct task_struct *tsk)
-{
-	cputime_t utime, stime;
-
-	task_cputime(tsk, &utime, &stime);
-	__acct_update_integrals(tsk, utime, stime);
-}
-
-/**
- * acct_account_cputime - update mm integral after cputime update
- * @tsk: task_struct for accounting
- */
-void acct_account_cputime(struct task_struct *tsk)
-{
-	__acct_update_integrals(tsk, tsk->utime, tsk->stime);
-}
-
 /**
  * acct_clear_integrals - clear the mm integral fields in task_struct
  * @tsk: task_struct whose accounting fields are cleared
diff --git a/trunk/kernel/watchdog.c b/trunk/kernel/watchdog.c
index 27689422aa92..75a2ab3d0b02 100644
--- a/trunk/kernel/watchdog.c
+++ b/trunk/kernel/watchdog.c
@@ -23,7 +23,6 @@
 #include <linux/module.h>
 #include <linux/sysctl.h>
 #include <linux/smpboot.h>
-#include <linux/sched/rt.h>
 
 #include <asm/irq_regs.h>
 #include <linux/kvm_para.h>
diff --git a/trunk/lib/Kconfig.debug b/trunk/lib/Kconfig.debug
index a1714c897e3f..67604e599384 100644
--- a/trunk/lib/Kconfig.debug
+++ b/trunk/lib/Kconfig.debug
@@ -605,6 +605,61 @@ config PROVE_LOCKING
 
 	 For more details, see Documentation/lockdep-design.txt.
 
+config PROVE_RCU
+	bool "RCU debugging: prove RCU correctness"
+	depends on PROVE_LOCKING
+	default n
+	help
+	 This feature enables lockdep extensions that check for correct
+	 use of RCU APIs.  This is currently under development.  Say Y
+	 if you want to debug RCU usage or help work on the PROVE_RCU
+	 feature.
+
+	 Say N if you are unsure.
+
+config PROVE_RCU_REPEATEDLY
+	bool "RCU debugging: don't disable PROVE_RCU on first splat"
+	depends on PROVE_RCU
+	default n
+	help
+	 By itself, PROVE_RCU will disable checking upon issuing the
+	 first warning (or "splat").  This feature prevents such
+	 disabling, allowing multiple RCU-lockdep warnings to be printed
+	 on a single reboot.
+
+	 Say Y to allow multiple RCU-lockdep warnings per boot.
+
+	 Say N if you are unsure.
+
+config PROVE_RCU_DELAY
+	bool "RCU debugging: preemptible RCU race provocation"
+	depends on DEBUG_KERNEL && PREEMPT_RCU
+	default n
+	help
+	 There is a class of races that involve an unlikely preemption
+	 of __rcu_read_unlock() just after ->rcu_read_lock_nesting has
+	 been set to INT_MIN.  This feature inserts a delay at that
+	 point to increase the probability of these races.
+
+	 Say Y to increase probability of preemption of __rcu_read_unlock().
+
+	 Say N if you are unsure.
+
+config SPARSE_RCU_POINTER
+	bool "RCU debugging: sparse-based checks for pointer usage"
+	default n
+	help
+	 This feature enables the __rcu sparse annotation for
+	 RCU-protected pointers.  This annotation will cause sparse
+	 to flag any non-RCU used of annotated pointers.  This can be
+	 helpful when debugging RCU usage.  Please note that this feature
+	 is not intended to enforce code cleanliness; it is instead merely
+	 a debugging aid.
+
+	 Say Y to make sparse flag questionable use of RCU-protected pointers
+
+	 Say N if you are unsure.
+
 config LOCKDEP
 	bool
 	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
@@ -882,63 +937,6 @@ config BOOT_PRINTK_DELAY
 	  BOOT_PRINTK_DELAY also may cause LOCKUP_DETECTOR to detect
 	  what it believes to be lockup conditions.
 
-menu "RCU Debugging"
-
-config PROVE_RCU
-	bool "RCU debugging: prove RCU correctness"
-	depends on PROVE_LOCKING
-	default n
-	help
-	 This feature enables lockdep extensions that check for correct
-	 use of RCU APIs.  This is currently under development.  Say Y
-	 if you want to debug RCU usage or help work on the PROVE_RCU
-	 feature.
-
-	 Say N if you are unsure.
-
-config PROVE_RCU_REPEATEDLY
-	bool "RCU debugging: don't disable PROVE_RCU on first splat"
-	depends on PROVE_RCU
-	default n
-	help
-	 By itself, PROVE_RCU will disable checking upon issuing the
-	 first warning (or "splat").  This feature prevents such
-	 disabling, allowing multiple RCU-lockdep warnings to be printed
-	 on a single reboot.
-
-	 Say Y to allow multiple RCU-lockdep warnings per boot.
-
-	 Say N if you are unsure.
-
-config PROVE_RCU_DELAY
-	bool "RCU debugging: preemptible RCU race provocation"
-	depends on DEBUG_KERNEL && PREEMPT_RCU
-	default n
-	help
-	 There is a class of races that involve an unlikely preemption
-	 of __rcu_read_unlock() just after ->rcu_read_lock_nesting has
-	 been set to INT_MIN.  This feature inserts a delay at that
-	 point to increase the probability of these races.
-
-	 Say Y to increase probability of preemption of __rcu_read_unlock().
-
-	 Say N if you are unsure.
-
-config SPARSE_RCU_POINTER
-	bool "RCU debugging: sparse-based checks for pointer usage"
-	default n
-	help
-	 This feature enables the __rcu sparse annotation for
-	 RCU-protected pointers.  This annotation will cause sparse
-	 to flag any non-RCU used of annotated pointers.  This can be
-	 helpful when debugging RCU usage.  Please note that this feature
-	 is not intended to enforce code cleanliness; it is instead merely
-	 a debugging aid.
-
-	 Say Y to make sparse flag questionable use of RCU-protected pointers
-
-	 Say N if you are unsure.
-
 config RCU_TORTURE_TEST
 	tristate "torture tests for RCU"
 	depends on DEBUG_KERNEL
@@ -972,7 +970,7 @@ config RCU_TORTURE_TEST_RUNNABLE
 
 config RCU_CPU_STALL_TIMEOUT
 	int "RCU CPU stall timeout in seconds"
-	depends on RCU_STALL_COMMON
+	depends on TREE_RCU || TREE_PREEMPT_RCU
 	range 3 300
 	default 21
 	help
@@ -1010,7 +1008,6 @@ config RCU_CPU_STALL_INFO
 config RCU_TRACE
 	bool "Enable tracing for RCU"
 	depends on DEBUG_KERNEL
-	select TRACE_CLOCK
 	help
 	  This option provides tracing in RCU which presents stats
 	  in debugfs for debugging RCU implementation.
@@ -1018,8 +1015,6 @@ config RCU_TRACE
 	  Say Y here if you want to enable RCU tracing
 	  Say N if you are unsure.
 
-endmenu # "RCU Debugging"
-
 config KPROBES_SANITY_TEST
 	bool "Kprobes sanity tests"
 	depends on DEBUG_KERNEL
diff --git a/trunk/mm/memcontrol.c b/trunk/mm/memcontrol.c
index fbb60b103e64..09255ec8159c 100644
--- a/trunk/mm/memcontrol.c
+++ b/trunk/mm/memcontrol.c
@@ -3030,9 +3030,7 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
 	if (memcg) {
 		s->memcg_params->memcg = memcg;
 		s->memcg_params->root_cache = root_cache;
-	} else
-		s->memcg_params->is_root_cache = true;
-
+	}
 	return 0;
 }
 
diff --git a/trunk/mm/mlock.c b/trunk/mm/mlock.c
index c9bd528b01d2..f0b9ce572fc7 100644
--- a/trunk/mm/mlock.c
+++ b/trunk/mm/mlock.c
@@ -517,11 +517,11 @@ SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
 static int do_mlockall(int flags)
 {
 	struct vm_area_struct * vma, * prev = NULL;
+	unsigned int def_flags = 0;
 
 	if (flags & MCL_FUTURE)
-		current->mm->def_flags |= VM_LOCKED;
-	else
-		current->mm->def_flags &= ~VM_LOCKED;
+		def_flags = VM_LOCKED;
+	current->mm->def_flags = def_flags;
 	if (flags == MCL_FUTURE)
 		goto out;
 
diff --git a/trunk/mm/mmap.c b/trunk/mm/mmap.c
index 09da0b264982..d1e4124f3d0e 100644
--- a/trunk/mm/mmap.c
+++ b/trunk/mm/mmap.c
@@ -32,7 +32,6 @@
 #include <linux/khugepaged.h>
 #include <linux/uprobes.h>
 #include <linux/rbtree_augmented.h>
-#include <linux/sched/sysctl.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
diff --git a/trunk/mm/mremap.c b/trunk/mm/mremap.c
index f9766f460299..e1031e1f6a61 100644
--- a/trunk/mm/mremap.c
+++ b/trunk/mm/mremap.c
@@ -19,7 +19,6 @@
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/mmu_notifier.h>
-#include <linux/sched/sysctl.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
diff --git a/trunk/mm/nommu.c b/trunk/mm/nommu.c
index b20db4e22263..79c3cac87afa 100644
--- a/trunk/mm/nommu.c
+++ b/trunk/mm/nommu.c
@@ -29,7 +29,6 @@
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/audit.h>
-#include <linux/sched/sysctl.h>
 
 #include <asm/uaccess.h>
 #include <asm/tlb.h>
diff --git a/trunk/mm/page-writeback.c b/trunk/mm/page-writeback.c
index 66a0024becd9..0713bfbf0954 100644
--- a/trunk/mm/page-writeback.c
+++ b/trunk/mm/page-writeback.c
@@ -35,7 +35,6 @@
 #include <linux/buffer_head.h> /* __set_page_dirty_buffers */
 #include <linux/pagevec.h>
 #include <linux/timer.h>
-#include <linux/sched/rt.h>
 #include <trace/events/writeback.h>
 
 /*
diff --git a/trunk/mm/page_alloc.c b/trunk/mm/page_alloc.c
index d1107adf174a..df2022ff0c8a 100644
--- a/trunk/mm/page_alloc.c
+++ b/trunk/mm/page_alloc.c
@@ -58,7 +58,6 @@
 #include <linux/prefetch.h>
 #include <linux/migrate.h>
 #include <linux/page-debug-flags.h>
-#include <linux/sched/rt.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -774,10 +773,6 @@ void __init init_cma_reserved_pageblock(struct page *page)
 	set_pageblock_migratetype(page, MIGRATE_CMA);
 	__free_pages(page, pageblock_order);
 	totalram_pages += pageblock_nr_pages;
-#ifdef CONFIG_HIGHMEM
-	if (PageHighMem(page))
-		totalhigh_pages += pageblock_nr_pages;
-#endif
 }
 #endif
 
@@ -4421,11 +4416,10 @@ static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
  * round what is now in bits to nearest long in bits, then return it in
  * bytes.
  */
-static unsigned long __init usemap_size(unsigned long zone_start_pfn, unsigned long zonesize)
+static unsigned long __init usemap_size(unsigned long zonesize)
 {
 	unsigned long usemapsize;
 
-	zonesize += zone_start_pfn & (pageblock_nr_pages-1);
 	usemapsize = roundup(zonesize, pageblock_nr_pages);
 	usemapsize = usemapsize >> pageblock_order;
 	usemapsize *= NR_PAGEBLOCK_BITS;
@@ -4435,19 +4429,17 @@ static unsigned long __init usemap_size(unsigned long zone_start_pfn, unsigned l
 }
 
 static void __init setup_usemap(struct pglist_data *pgdat,
-				struct zone *zone,
-				unsigned long zone_start_pfn,
-				unsigned long zonesize)
+				struct zone *zone, unsigned long zonesize)
 {
-	unsigned long usemapsize = usemap_size(zone_start_pfn, zonesize);
+	unsigned long usemapsize = usemap_size(zonesize);
 	zone->pageblock_flags = NULL;
 	if (usemapsize)
 		zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat,
 								   usemapsize);
 }
 #else
-static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone,
-				unsigned long zone_start_pfn, unsigned long zonesize) {}
+static inline void setup_usemap(struct pglist_data *pgdat,
+				struct zone *zone, unsigned long zonesize) {}
 #endif /* CONFIG_SPARSEMEM */
 
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
@@ -4598,7 +4590,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 			continue;
 
 		set_pageblock_order();
-		setup_usemap(pgdat, zone, zone_start_pfn, size);
+		setup_usemap(pgdat, zone, size);
 		ret = init_currently_empty_zone(zone, zone_start_pfn,
 						size, MEMMAP_EARLY);
 		BUG_ON(ret);
diff --git a/trunk/net/batman-adv/distributed-arp-table.c b/trunk/net/batman-adv/distributed-arp-table.c
index 553921511e4e..183f97a86bb2 100644
--- a/trunk/net/batman-adv/distributed-arp-table.c
+++ b/trunk/net/batman-adv/distributed-arp-table.c
@@ -440,7 +440,7 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res,
 	/* this is an hash collision with the temporary selected node. Choose
 	 * the one with the lowest address
 	 */
-	if ((tmp_max == max) && max_orig_node &&
+	if ((tmp_max == max) &&
 	    (batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0))
 		goto out;
 
diff --git a/trunk/net/bridge/br_stp_bpdu.c b/trunk/net/bridge/br_stp_bpdu.c
index 8660ea3be705..7f884e3fb955 100644
--- a/trunk/net/bridge/br_stp_bpdu.c
+++ b/trunk/net/bridge/br_stp_bpdu.c
@@ -16,7 +16,6 @@
 #include <linux/etherdevice.h>
 #include <linux/llc.h>
 #include <linux/slab.h>
-#include <linux/pkt_sched.h>
 #include <net/net_namespace.h>
 #include <net/llc.h>
 #include <net/llc_pdu.h>
@@ -41,7 +40,6 @@ static void br_send_bpdu(struct net_bridge_port *p,
 
 	skb->dev = p->dev;
 	skb->protocol = htons(ETH_P_802_2);
-	skb->priority = TC_PRIO_CONTROL;
 
 	skb_reserve(skb, LLC_RESERVE);
 	memcpy(__skb_put(skb, length), data, length);
diff --git a/trunk/net/core/datagram.c b/trunk/net/core/datagram.c
index 368f9c3f9dc6..0337e2b76862 100644
--- a/trunk/net/core/datagram.c
+++ b/trunk/net/core/datagram.c
@@ -187,7 +187,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
 		skb_queue_walk(queue, skb) {
 			*peeked = skb->peeked;
 			if (flags & MSG_PEEK) {
-				if (*off >= skb->len && skb->len) {
+				if (*off >= skb->len) {
 					*off -= skb->len;
 					continue;
 				}
diff --git a/trunk/net/ipv4/arp.c b/trunk/net/ipv4/arp.c
index ded146b217f1..9547a273b9e9 100644
--- a/trunk/net/ipv4/arp.c
+++ b/trunk/net/ipv4/arp.c
@@ -928,25 +928,24 @@ static void parp_redo(struct sk_buff *skb)
 static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
 		   struct packet_type *pt, struct net_device *orig_dev)
 {
-	const struct arphdr *arp;
-
-	if (dev->flags & IFF_NOARP ||
-	    skb->pkt_type == PACKET_OTHERHOST ||
-	    skb->pkt_type == PACKET_LOOPBACK)
-		goto freeskb;
-
-	skb = skb_share_check(skb, GFP_ATOMIC);
-	if (!skb)
-		goto out_of_mem;
+	struct arphdr *arp;
 
 	/* ARP header, plus 2 device addresses, plus 2 IP addresses.  */
 	if (!pskb_may_pull(skb, arp_hdr_len(dev)))
 		goto freeskb;
 
 	arp = arp_hdr(skb);
-	if (arp->ar_hln != dev->addr_len || arp->ar_pln != 4)
+	if (arp->ar_hln != dev->addr_len ||
+	    dev->flags & IFF_NOARP ||
+	    skb->pkt_type == PACKET_OTHERHOST ||
+	    skb->pkt_type == PACKET_LOOPBACK ||
+	    arp->ar_pln != 4)
 		goto freeskb;
 
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (skb == NULL)
+		goto out_of_mem;
+
 	memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
 
 	return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, skb, dev, NULL, arp_process);
diff --git a/trunk/net/ipv6/netfilter/ip6t_NPT.c b/trunk/net/ipv6/netfilter/ip6t_NPT.c
index 83acc1405a18..7302b0b7b642 100644
--- a/trunk/net/ipv6/netfilter/ip6t_NPT.c
+++ b/trunk/net/ipv6/netfilter/ip6t_NPT.c
@@ -9,7 +9,6 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ipv6.h>
-#include <net/ipv6.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
 #include <linux/netfilter_ipv6/ip6t_NPT.h>
@@ -19,20 +18,11 @@ static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
 {
 	struct ip6t_npt_tginfo *npt = par->targinfo;
 	__wsum src_sum = 0, dst_sum = 0;
-	struct in6_addr pfx;
 	unsigned int i;
 
 	if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64)
 		return -EINVAL;
 
-	/* Ensure that LSB of prefix is zero */
-	ipv6_addr_prefix(&pfx, &npt->src_pfx.in6, npt->src_pfx_len);
-	if (!ipv6_addr_equal(&pfx, &npt->src_pfx.in6))
-		return -EINVAL;
-	ipv6_addr_prefix(&pfx, &npt->dst_pfx.in6, npt->dst_pfx_len);
-	if (!ipv6_addr_equal(&pfx, &npt->dst_pfx.in6))
-		return -EINVAL;
-
 	for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) {
 		src_sum = csum_add(src_sum,
 				(__force __wsum)npt->src_pfx.in6.s6_addr16[i]);
@@ -40,7 +30,7 @@ static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
 				(__force __wsum)npt->dst_pfx.in6.s6_addr16[i]);
 	}
 
-	npt->adjustment = ~csum_fold(csum_sub(src_sum, dst_sum));
+	npt->adjustment = (__force __sum16) csum_sub(src_sum, dst_sum);
 	return 0;
 }
 
@@ -61,7 +51,7 @@ static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt,
 
 		idx = i / 32;
 		addr->s6_addr32[idx] &= mask;
-		addr->s6_addr32[idx] |= ~mask & npt->dst_pfx.in6.s6_addr32[idx];
+		addr->s6_addr32[idx] |= npt->dst_pfx.in6.s6_addr32[idx];
 	}
 
 	if (pfx_len <= 48)
@@ -76,8 +66,8 @@ static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt,
 			return false;
 	}
 
-	sum = ~csum_fold(csum_add(csum_unfold((__force __sum16)addr->s6_addr16[idx]),
-				  csum_unfold(npt->adjustment)));
+	sum = (__force __sum16) csum_add((__force __wsum)addr->s6_addr16[idx],
+			 npt->adjustment);
 	if (sum == CSUM_MANGLED_0)
 		sum = 0;
 	*(__force __sum16 *)&addr->s6_addr16[idx] = sum;
diff --git a/trunk/net/mac80211/cfg.c b/trunk/net/mac80211/cfg.c
index 0479c64aa83c..516fbc96feff 100644
--- a/trunk/net/mac80211/cfg.c
+++ b/trunk/net/mac80211/cfg.c
@@ -2004,8 +2004,7 @@ static int ieee80211_set_mcast_rate(struct wiphy *wiphy, struct net_device *dev,
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	memcpy(sdata->vif.bss_conf.mcast_rate, rate,
-	       sizeof(int) * IEEE80211_NUM_BANDS);
+	memcpy(sdata->vif.bss_conf.mcast_rate, rate, sizeof(rate));
 
 	return 0;
 }
diff --git a/trunk/net/mac80211/mlme.c b/trunk/net/mac80211/mlme.c
index 5107248af7fb..a3552929a21d 100644
--- a/trunk/net/mac80211/mlme.c
+++ b/trunk/net/mac80211/mlme.c
@@ -3400,7 +3400,6 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
 
 	ret = 0;
 
-out:
 	while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef,
 					IEEE80211_CHAN_DISABLED)) {
 		if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) {
@@ -3409,13 +3408,14 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
 			goto out;
 		}
 
-		ret |= chandef_downgrade(chandef);
+		ret = chandef_downgrade(chandef);
 	}
 
 	if (chandef->width != vht_chandef.width)
 		sdata_info(sdata,
-			   "capabilities/regulatory prevented using AP HT/VHT configuration, downgraded\n");
+			   "local regulatory prevented using AP HT/VHT configuration, downgraded\n");
 
+out:
 	WARN_ON_ONCE(!cfg80211_chandef_valid(chandef));
 	return ret;
 }
@@ -3529,11 +3529,8 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 	 */
 	ret = ieee80211_vif_use_channel(sdata, &chandef,
 					IEEE80211_CHANCTX_SHARED);
-	while (ret && chandef.width != NL80211_CHAN_WIDTH_20_NOHT) {
+	while (ret && chandef.width != NL80211_CHAN_WIDTH_20_NOHT)
 		ifmgd->flags |= chandef_downgrade(&chandef);
-		ret = ieee80211_vif_use_channel(sdata, &chandef,
-						IEEE80211_CHANCTX_SHARED);
-	}
 	return ret;
 }
 
diff --git a/trunk/net/netfilter/ipvs/ip_vs_proto_sctp.c b/trunk/net/netfilter/ipvs/ip_vs_proto_sctp.c
index ae8ec6f27688..746048b13ef3 100644
--- a/trunk/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/trunk/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -61,27 +61,14 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 	return 1;
 }
 
-static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph,
-			  unsigned int sctphoff)
-{
-	__u32 crc32;
-	struct sk_buff *iter;
-
-	crc32 = sctp_start_cksum((__u8 *)sctph, skb_headlen(skb) - sctphoff);
-	skb_walk_frags(skb, iter)
-		crc32 = sctp_update_cksum((u8 *) iter->data,
-					  skb_headlen(iter), crc32);
-	sctph->checksum = sctp_end_cksum(crc32);
-
-	skb->ip_summed = CHECKSUM_UNNECESSARY;
-}
-
 static int
 sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		  struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
 {
 	sctp_sctphdr_t *sctph;
 	unsigned int sctphoff = iph->len;
+	struct sk_buff *iter;
+	__be32 crc32;
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (cp->af == AF_INET6 && iph->fragoffs)
@@ -105,7 +92,13 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 	sctph = (void *) skb_network_header(skb) + sctphoff;
 	sctph->source = cp->vport;
 
-	sctp_nat_csum(skb, sctph, sctphoff);
+	/* Calculate the checksum */
+	crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff);
+	skb_walk_frags(skb, iter)
+		crc32 = sctp_update_cksum((u8 *) iter->data, skb_headlen(iter),
+				          crc32);
+	crc32 = sctp_end_cksum(crc32);
+	sctph->checksum = crc32;
 
 	return 1;
 }
@@ -116,6 +109,8 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 {
 	sctp_sctphdr_t *sctph;
 	unsigned int sctphoff = iph->len;
+	struct sk_buff *iter;
+	__be32 crc32;
 
 #ifdef CONFIG_IP_VS_IPV6
 	if (cp->af == AF_INET6 && iph->fragoffs)
@@ -139,7 +134,13 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 	sctph = (void *) skb_network_header(skb) + sctphoff;
 	sctph->dest = cp->dport;
 
-	sctp_nat_csum(skb, sctph, sctphoff);
+	/* Calculate the checksum */
+	crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff);
+	skb_walk_frags(skb, iter)
+		crc32 = sctp_update_cksum((u8 *) iter->data, skb_headlen(iter),
+					  crc32);
+	crc32 = sctp_end_cksum(crc32);
+	sctph->checksum = crc32;
 
 	return 1;
 }
diff --git a/trunk/net/netfilter/ipvs/ip_vs_sync.c b/trunk/net/netfilter/ipvs/ip_vs_sync.c
index 44fd10c539ac..effa10c9e4e3 100644
--- a/trunk/net/netfilter/ipvs/ip_vs_sync.c
+++ b/trunk/net/netfilter/ipvs/ip_vs_sync.c
@@ -1795,8 +1795,6 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
 					     GFP_KERNEL);
 			if (!tinfo->buf)
 				goto outtinfo;
-		} else {
-			tinfo->buf = NULL;
 		}
 		tinfo->id = id;
 
diff --git a/trunk/net/sched/sch_htb.c b/trunk/net/sched/sch_htb.c
index 79e8ed4ac7ce..51561eafcb72 100644
--- a/trunk/net/sched/sch_htb.c
+++ b/trunk/net/sched/sch_htb.c
@@ -1135,9 +1135,9 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 	memset(&opt, 0, sizeof(opt));
 
 	opt.rate.rate = cl->rate.rate_bps >> 3;
-	opt.buffer = PSCHED_NS2TICKS(cl->buffer);
+	opt.buffer = cl->buffer;
 	opt.ceil.rate = cl->ceil.rate_bps >> 3;
-	opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
+	opt.cbuffer = cl->cbuffer;
 	opt.quantum = cl->quantum;
 	opt.prio = cl->prio;
 	opt.level = cl->level;
diff --git a/trunk/net/sctp/Kconfig b/trunk/net/sctp/Kconfig
index cf4852814e0c..7521d944c0fb 100644
--- a/trunk/net/sctp/Kconfig
+++ b/trunk/net/sctp/Kconfig
@@ -3,8 +3,8 @@
 #
 
 menuconfig IP_SCTP
-	tristate "The SCTP Protocol"
-	depends on INET
+	tristate "The SCTP Protocol (EXPERIMENTAL)"
+	depends on INET && EXPERIMENTAL
 	depends on IPV6 || IPV6=n
 	select CRYPTO
 	select CRYPTO_HMAC
diff --git a/trunk/net/sctp/ipv6.c b/trunk/net/sctp/ipv6.c
index 391a245d5203..f3f0f4dc31dd 100644
--- a/trunk/net/sctp/ipv6.c
+++ b/trunk/net/sctp/ipv6.c
@@ -326,10 +326,9 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 	 */
 	rcu_read_lock();
 	list_for_each_entry_rcu(laddr, &bp->address_list, list) {
-		if (!laddr->valid)
+		if (!laddr->valid && laddr->state != SCTP_ADDR_SRC)
 			continue;
-		if ((laddr->state == SCTP_ADDR_SRC) &&
-		    (laddr->a.sa.sa_family == AF_INET6) &&
+		if ((laddr->a.sa.sa_family == AF_INET6) &&
 		    (scope <= sctp_scope(&laddr->a))) {
 			bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a);
 			if (!baddr || (matchlen < bmatchlen)) {
diff --git a/trunk/samples/Kconfig b/trunk/samples/Kconfig
index 6181c2cc9ca0..7b6792a18c05 100644
--- a/trunk/samples/Kconfig
+++ b/trunk/samples/Kconfig
@@ -5,6 +5,12 @@ menuconfig SAMPLES
 
 if SAMPLES
 
+config SAMPLE_TRACEPOINTS
+	tristate "Build tracepoints examples -- loadable modules only"
+	depends on TRACEPOINTS && m
+	help
+	  This build tracepoints example modules.
+
 config SAMPLE_TRACE_EVENTS
 	tristate "Build trace_events examples -- loadable modules only"
 	depends on EVENT_TRACING && m
diff --git a/trunk/samples/Makefile b/trunk/samples/Makefile
index 1a60c62e2045..5ef08bba96ce 100644
--- a/trunk/samples/Makefile
+++ b/trunk/samples/Makefile
@@ -1,4 +1,4 @@
 # Makefile for Linux samples code
 
-obj-$(CONFIG_SAMPLES)	+= kobject/ kprobes/ trace_events/ \
+obj-$(CONFIG_SAMPLES)	+= kobject/ kprobes/ tracepoints/ trace_events/ \
 			   hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/
diff --git a/trunk/samples/tracepoints/Makefile b/trunk/samples/tracepoints/Makefile
new file mode 100644
index 000000000000..36479ad9ae14
--- /dev/null
+++ b/trunk/samples/tracepoints/Makefile
@@ -0,0 +1,6 @@
+# builds the tracepoint example kernel modules;
+# then to use one (as root):  insmod <module_name.ko>
+
+obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-sample.o
+obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-probe-sample.o
+obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-probe-sample2.o
diff --git a/trunk/samples/tracepoints/tp-samples-trace.h b/trunk/samples/tracepoints/tp-samples-trace.h
new file mode 100644
index 000000000000..4d46be965961
--- /dev/null
+++ b/trunk/samples/tracepoints/tp-samples-trace.h
@@ -0,0 +1,11 @@
+#ifndef _TP_SAMPLES_TRACE_H
+#define _TP_SAMPLES_TRACE_H
+
+#include <linux/proc_fs.h>	/* for struct inode and struct file */
+#include <linux/tracepoint.h>
+
+DECLARE_TRACE(subsys_event,
+	TP_PROTO(struct inode *inode, struct file *file),
+	TP_ARGS(inode, file));
+DECLARE_TRACE_NOARGS(subsys_eventb);
+#endif
diff --git a/trunk/samples/tracepoints/tracepoint-probe-sample.c b/trunk/samples/tracepoints/tracepoint-probe-sample.c
new file mode 100644
index 000000000000..744c0b9652a7
--- /dev/null
+++ b/trunk/samples/tracepoints/tracepoint-probe-sample.c
@@ -0,0 +1,57 @@
+/*
+ * tracepoint-probe-sample.c
+ *
+ * sample tracepoint probes.
+ */
+
+#include <linux/module.h>
+#include <linux/file.h>
+#include <linux/dcache.h>
+#include "tp-samples-trace.h"
+
+/*
+ * Here the caller only guarantees locking for struct file and struct inode.
+ * Locking must therefore be done in the probe to use the dentry.
+ */
+static void probe_subsys_event(void *ignore,
+			       struct inode *inode, struct file *file)
+{
+	path_get(&file->f_path);
+	dget(file->f_path.dentry);
+	printk(KERN_INFO "Event is encountered with filename %s\n",
+		file->f_path.dentry->d_name.name);
+	dput(file->f_path.dentry);
+	path_put(&file->f_path);
+}
+
+static void probe_subsys_eventb(void *ignore)
+{
+	printk(KERN_INFO "Event B is encountered\n");
+}
+
+static int __init tp_sample_trace_init(void)
+{
+	int ret;
+
+	ret = register_trace_subsys_event(probe_subsys_event, NULL);
+	WARN_ON(ret);
+	ret = register_trace_subsys_eventb(probe_subsys_eventb, NULL);
+	WARN_ON(ret);
+
+	return 0;
+}
+
+module_init(tp_sample_trace_init);
+
+static void __exit tp_sample_trace_exit(void)
+{
+	unregister_trace_subsys_eventb(probe_subsys_eventb, NULL);
+	unregister_trace_subsys_event(probe_subsys_event, NULL);
+	tracepoint_synchronize_unregister();
+}
+
+module_exit(tp_sample_trace_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Tracepoint Probes Samples");
diff --git a/trunk/samples/tracepoints/tracepoint-probe-sample2.c b/trunk/samples/tracepoints/tracepoint-probe-sample2.c
new file mode 100644
index 000000000000..9fcf990e5d4b
--- /dev/null
+++ b/trunk/samples/tracepoints/tracepoint-probe-sample2.c
@@ -0,0 +1,44 @@
+/*
+ * tracepoint-probe-sample2.c
+ *
+ * 2nd sample tracepoint probes.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include "tp-samples-trace.h"
+
+/*
+ * Here the caller only guarantees locking for struct file and struct inode.
+ * Locking must therefore be done in the probe to use the dentry.
+ */
+static void probe_subsys_event(void *ignore,
+			       struct inode *inode, struct file *file)
+{
+	printk(KERN_INFO "Event is encountered with inode number %lu\n",
+		inode->i_ino);
+}
+
+static int __init tp_sample_trace_init(void)
+{
+	int ret;
+
+	ret = register_trace_subsys_event(probe_subsys_event, NULL);
+	WARN_ON(ret);
+
+	return 0;
+}
+
+module_init(tp_sample_trace_init);
+
+static void __exit tp_sample_trace_exit(void)
+{
+	unregister_trace_subsys_event(probe_subsys_event, NULL);
+	tracepoint_synchronize_unregister();
+}
+
+module_exit(tp_sample_trace_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Tracepoint Probes Samples");
diff --git a/trunk/samples/tracepoints/tracepoint-sample.c b/trunk/samples/tracepoints/tracepoint-sample.c
new file mode 100644
index 000000000000..f4d89e008c32
--- /dev/null
+++ b/trunk/samples/tracepoints/tracepoint-sample.c
@@ -0,0 +1,57 @@
+/* tracepoint-sample.c
+ *
+ * Executes a tracepoint when /proc/tracepoint-sample is opened.
+ *
+ * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+#include "tp-samples-trace.h"
+
+DEFINE_TRACE(subsys_event);
+DEFINE_TRACE(subsys_eventb);
+
+struct proc_dir_entry *pentry_sample;
+
+static int my_open(struct inode *inode, struct file *file)
+{
+	int i;
+
+	trace_subsys_event(inode, file);
+	for (i = 0; i < 10; i++)
+		trace_subsys_eventb();
+	return -EPERM;
+}
+
+static const struct file_operations mark_ops = {
+	.open = my_open,
+	.llseek = noop_llseek,
+};
+
+static int __init sample_init(void)
+{
+	printk(KERN_ALERT "sample init\n");
+	pentry_sample = proc_create("tracepoint-sample", 0444, NULL,
+		&mark_ops);
+	if (!pentry_sample)
+		return -EPERM;
+	return 0;
+}
+
+static void __exit sample_exit(void)
+{
+	printk(KERN_ALERT "sample exit\n");
+	remove_proc_entry("tracepoint-sample", NULL);
+}
+
+module_init(sample_init)
+module_exit(sample_exit)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Tracepoint sample");
diff --git a/trunk/tools/Makefile b/trunk/tools/Makefile
index 798fa0ef048e..1f9a529fe544 100644
--- a/trunk/tools/Makefile
+++ b/trunk/tools/Makefile
@@ -15,7 +15,7 @@ help:
 	@echo '  x86_energy_perf_policy - Intel energy policy tool'
 	@echo ''
 	@echo 'You can do:'
-	@echo ' $$ make -C tools/ <tool>_install'
+	@echo ' $$ make -C tools/<tool>_install'
 	@echo ''
 	@echo '  from the kernel command line to build and install one of'
 	@echo '  the tools above'
diff --git a/trunk/tools/lib/traceevent/event-parse.c b/trunk/tools/lib/traceevent/event-parse.c
index 82b0606dcb8a..5a824e355d04 100644
--- a/trunk/tools/lib/traceevent/event-parse.c
+++ b/trunk/tools/lib/traceevent/event-parse.c
@@ -13,7 +13,8 @@
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  *
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  *
@@ -1223,34 +1224,6 @@ static int field_is_long(struct format_field *field)
 	return 0;
 }
 
-static unsigned int type_size(const char *name)
-{
-	/* This covers all FIELD_IS_STRING types. */
-	static struct {
-		const char *type;
-		unsigned int size;
-	} table[] = {
-		{ "u8",   1 },
-		{ "u16",  2 },
-		{ "u32",  4 },
-		{ "u64",  8 },
-		{ "s8",   1 },
-		{ "s16",  2 },
-		{ "s32",  4 },
-		{ "s64",  8 },
-		{ "char", 1 },
-		{ },
-	};
-	int i;
-
-	for (i = 0; table[i].type; i++) {
-		if (!strcmp(table[i].type, name))
-			return table[i].size;
-	}
-
-	return 0;
-}
-
 static int event_read_fields(struct event_format *event, struct format_field **fields)
 {
 	struct format_field *field = NULL;
@@ -1260,8 +1233,6 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 	int count = 0;
 
 	do {
-		unsigned int size_dynamic = 0;
-
 		type = read_token(&token);
 		if (type == EVENT_NEWLINE) {
 			free_token(token);
@@ -1420,7 +1391,6 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 				field->type = new_type;
 				strcat(field->type, " ");
 				strcat(field->type, field->name);
-				size_dynamic = type_size(field->name);
 				free_token(field->name);
 				strcat(field->type, brackets);
 				field->name = token;
@@ -1493,8 +1463,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 			if (read_expect_type(EVENT_ITEM, &token))
 				goto fail;
 
-			if (strtoul(token, NULL, 0))
-				field->flags |= FIELD_IS_SIGNED;
+			/* add signed type */
 
 			free_token(token);
 			if (read_expected(EVENT_OP, ";") < 0)
@@ -1509,14 +1478,10 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 		if (field->flags & FIELD_IS_ARRAY) {
 			if (field->arraylen)
 				field->elementsize = field->size / field->arraylen;
-			else if (field->flags & FIELD_IS_DYNAMIC)
-				field->elementsize = size_dynamic;
 			else if (field->flags & FIELD_IS_STRING)
 				field->elementsize = 1;
-			else if (field->flags & FIELD_IS_LONG)
-				field->elementsize = event->pevent ?
-						     event->pevent->long_size :
-						     sizeof(long);
+			else
+				field->elementsize = event->pevent->long_size;
 		} else
 			field->elementsize = field->size;
 
@@ -1820,8 +1785,6 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok)
 		   strcmp(token, "/") == 0 ||
 		   strcmp(token, "<") == 0 ||
 		   strcmp(token, ">") == 0 ||
-		   strcmp(token, "<=") == 0 ||
-		   strcmp(token, ">=") == 0 ||
 		   strcmp(token, "==") == 0 ||
 		   strcmp(token, "!=") == 0) {
 
@@ -2518,7 +2481,7 @@ process_dynamic_array(struct event_format *event, struct print_arg *arg, char **
 
 	free_token(token);
 	arg = alloc_arg();
-	if (!arg) {
+	if (!field) {
 		do_warning("%s: not enough memory!", __func__);
 		*tok = NULL;
 		return EVENT_ERROR;
diff --git a/trunk/tools/lib/traceevent/event-parse.h b/trunk/tools/lib/traceevent/event-parse.h
index 7be7e89533e4..24a4bbabc5d5 100644
--- a/trunk/tools/lib/traceevent/event-parse.h
+++ b/trunk/tools/lib/traceevent/event-parse.h
@@ -13,7 +13,8 @@
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  *
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
diff --git a/trunk/tools/lib/traceevent/event-utils.h b/trunk/tools/lib/traceevent/event-utils.h
index e76c9acb92cd..bc075006966e 100644
--- a/trunk/tools/lib/traceevent/event-utils.h
+++ b/trunk/tools/lib/traceevent/event-utils.h
@@ -13,7 +13,8 @@
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  *
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
diff --git a/trunk/tools/lib/traceevent/parse-filter.c b/trunk/tools/lib/traceevent/parse-filter.c
index 2500e75583fc..5ea4326ad11f 100644
--- a/trunk/tools/lib/traceevent/parse-filter.c
+++ b/trunk/tools/lib/traceevent/parse-filter.c
@@ -13,7 +13,8 @@
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  *
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
diff --git a/trunk/tools/lib/traceevent/parse-utils.c b/trunk/tools/lib/traceevent/parse-utils.c
index bba701cf10e6..f023a133abb6 100644
--- a/trunk/tools/lib/traceevent/parse-utils.c
+++ b/trunk/tools/lib/traceevent/parse-utils.c
@@ -1,22 +1,3 @@
-/*
- * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
diff --git a/trunk/tools/lib/traceevent/trace-seq.c b/trunk/tools/lib/traceevent/trace-seq.c
index a57db805136a..b1ccc923e8a5 100644
--- a/trunk/tools/lib/traceevent/trace-seq.c
+++ b/trunk/tools/lib/traceevent/trace-seq.c
@@ -13,7 +13,8 @@
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  *
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
diff --git a/trunk/tools/perf/Documentation/Makefile b/trunk/tools/perf/Documentation/Makefile
index eb30044a922a..ef6d22e879eb 100644
--- a/trunk/tools/perf/Documentation/Makefile
+++ b/trunk/tools/perf/Documentation/Makefile
@@ -222,14 +222,10 @@ install-pdf: pdf
 #install-html: html
 #	'$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir)
 
-ifneq ($(MAKECMDGOALS),clean)
-ifneq ($(MAKECMDGOALS),tags)
 $(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
 	$(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) $(OUTPUT)PERF-VERSION-FILE
 
 -include $(OUTPUT)PERF-VERSION-FILE
-endif
-endif
 
 #
 # Determine "include::" file references in asciidoc files.
diff --git a/trunk/tools/perf/Documentation/perf-annotate.txt b/trunk/tools/perf/Documentation/perf-annotate.txt
index 5ad07ef417f0..c8ffd9fd5c6a 100644
--- a/trunk/tools/perf/Documentation/perf-annotate.txt
+++ b/trunk/tools/perf/Documentation/perf-annotate.txt
@@ -61,13 +61,11 @@ OPTIONS
 
 --stdio:: Use the stdio interface.
 
---tui:: Use the TUI interface. Use of --tui requires a tty, if one is not
+--tui:: Use the TUI interface Use of --tui requires a tty, if one is not
 	present, as when piping to other commands, the stdio interface is
 	used. This interfaces starts by centering on the line with more
 	samples, TAB/UNTAB cycles through the lines with more samples.
 
---gtk:: Use the GTK interface.
-
 -C::
 --cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
 	be provided as a comma-separated list with no space: 0,1. Ranges of
@@ -90,9 +88,6 @@ OPTIONS
 --objdump=<path>::
         Path to objdump binary.
 
---skip-missing::
-	Skip symbols that cannot be annotated.
-
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/trunk/tools/perf/Documentation/perf-buildid-cache.txt b/trunk/tools/perf/Documentation/perf-buildid-cache.txt
index e9a8349a7172..c1057701a7dc 100644
--- a/trunk/tools/perf/Documentation/perf-buildid-cache.txt
+++ b/trunk/tools/perf/Documentation/perf-buildid-cache.txt
@@ -24,13 +24,6 @@ OPTIONS
 -r::
 --remove=::
         Remove specified file from the cache.
--M::
---missing=:: 
-	List missing build ids in the cache for the specified file.
--u::
---update::
-	Update specified file of the cache. It can be used to update kallsyms
-	kernel dso to vmlinux in order to support annotation.
 -v::
 --verbose::
 	Be more verbose.
diff --git a/trunk/tools/perf/Documentation/perf-diff.txt b/trunk/tools/perf/Documentation/perf-diff.txt
index 5b3123d5721f..194f37d635df 100644
--- a/trunk/tools/perf/Documentation/perf-diff.txt
+++ b/trunk/tools/perf/Documentation/perf-diff.txt
@@ -22,6 +22,10 @@ specified perf.data files.
 
 OPTIONS
 -------
+-M::
+--displacement::
+        Show position displacement relative to baseline.
+
 -D::
 --dump-raw-trace::
         Dump raw trace in ASCII.
diff --git a/trunk/tools/perf/Documentation/perf-evlist.txt b/trunk/tools/perf/Documentation/perf-evlist.txt
index 1ceb3700ffbb..15217345c2fa 100644
--- a/trunk/tools/perf/Documentation/perf-evlist.txt
+++ b/trunk/tools/perf/Documentation/perf-evlist.txt
@@ -28,10 +28,6 @@ OPTIONS
 --verbose=::
 	Show all fields.
 
--g::
---group::
-	Show event group information.
-
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-list[1],
diff --git a/trunk/tools/perf/Documentation/perf-report.txt b/trunk/tools/perf/Documentation/perf-report.txt
index 02284a0067f0..f4d91bebd59d 100644
--- a/trunk/tools/perf/Documentation/perf-report.txt
+++ b/trunk/tools/perf/Documentation/perf-report.txt
@@ -57,44 +57,11 @@ OPTIONS
 
 -s::
 --sort=::
-	Sort histogram entries by given key(s) - multiple keys can be specified
-	in CSV format.  Following sort keys are available:
-	pid, comm, dso, symbol, parent, cpu, srcline.
-
-	Each key has following meaning:
-
-	- comm: command (name) of the task which can be read via /proc/<pid>/comm
-	- pid: command and tid of the task
-	- dso: name of library or module executed at the time of sample
-	- symbol: name of function executed at the time of sample
-	- parent: name of function matched to the parent regex filter. Unmatched
-	entries are displayed as "[other]".
-	- cpu: cpu number the task ran at the time of sample
-	- srcline: filename and line number executed at the time of sample.  The
-	DWARF debuggin info must be provided.
-
-	By default, comm, dso and symbol keys are used.
-	(i.e. --sort comm,dso,symbol)
-
-	If --branch-stack option is used, following sort keys are also
-	available:
-	dso_from, dso_to, symbol_from, symbol_to, mispredict.
-
-	- dso_from: name of library or module branched from
-	- dso_to: name of library or module branched to
-	- symbol_from: name of function branched from
-	- symbol_to: name of function branched to
-	- mispredict: "N" for predicted branch, "Y" for mispredicted branch
-
-	And default sort keys are changed to comm, dso_from, symbol_from, dso_to
-	and symbol_to, see '--branch-stack'.
+	Sort by key(s): pid, comm, dso, symbol, parent, srcline.
 
 -p::
 --parent=<regex>::
-        A regex filter to identify parent. The parent is a caller of this
-	function and searched through the callchain, thus it requires callchain
-	information recorded. The pattern is in the exteneded regex format and
-	defaults to "\^sys_|^do_page_fault", see '--sort parent'.
+        regex filter to identify parent, see: '--sort parent'
 
 -x::
 --exclude-other::
@@ -107,6 +74,7 @@ OPTIONS
 
 -t::
 --field-separator=::
+
 	Use a special separator character and don't pad with spaces, replacing
 	all occurrences of this separator in symbol names (and other output)
 	with a '.' character, that thus it's the only non valid separator.
@@ -203,9 +171,6 @@ OPTIONS
 --objdump=<path>::
         Path to objdump binary.
 
---group::
-	Show event group information together.
-
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-annotate[1]
diff --git a/trunk/tools/perf/Documentation/perf-script-python.txt b/trunk/tools/perf/Documentation/perf-script-python.txt
index 9f1f054b8432..a4027f221a53 100644
--- a/trunk/tools/perf/Documentation/perf-script-python.txt
+++ b/trunk/tools/perf/Documentation/perf-script-python.txt
@@ -336,6 +336,7 @@ scripts listed by the 'perf script -l' command e.g.:
 ----
 root@tropicana:~# perf script -l
 List of available trace scripts:
+  workqueue-stats                      workqueue stats (ins/exe/create/destroy)
   wakeup-latency                       system-wide min/max/avg wakeup latency
   rw-by-file <comm>                    r/w activity for a program, by file
   rw-by-pid                            system-wide r/w activity
@@ -401,6 +402,7 @@ should show a new entry for your script:
 ----
 root@tropicana:~# perf script -l
 List of available trace scripts:
+  workqueue-stats                      workqueue stats (ins/exe/create/destroy)
   wakeup-latency                       system-wide min/max/avg wakeup latency
   rw-by-file <comm>                    r/w activity for a program, by file
   rw-by-pid                            system-wide r/w activity
diff --git a/trunk/tools/perf/Documentation/perf-stat.txt b/trunk/tools/perf/Documentation/perf-stat.txt
index faf4f4feebcc..cf0c3107e06e 100644
--- a/trunk/tools/perf/Documentation/perf-stat.txt
+++ b/trunk/tools/perf/Documentation/perf-stat.txt
@@ -114,17 +114,6 @@ with it.  --append may be used here.  Examples:
 
 perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- make -s -j64 O=defconfig-build/ bzImage
 
--I msecs::
---interval-print msecs::
-	Print count deltas every N milliseconds (minimum: 100ms)
-	example: perf stat -I 1000 -e cycles -a sleep 5
-
---aggr-socket::
-Aggregate counts per processor socket for system-wide mode measurements.  This
-is a useful mode to detect imbalance between sockets.  To enable this mode,
-use --aggr-socket in addition to -a. (system-wide).  The output includes the
-socket number and the number of online processors on that socket. This is
-useful to gauge the amount of aggregation.
 
 EXAMPLES
 --------
diff --git a/trunk/tools/perf/Documentation/perf-test.txt b/trunk/tools/perf/Documentation/perf-test.txt
index d1d3e5121f89..b24ac40fcd58 100644
--- a/trunk/tools/perf/Documentation/perf-test.txt
+++ b/trunk/tools/perf/Documentation/perf-test.txt
@@ -23,10 +23,6 @@ from 'perf test list'.
 
 OPTIONS
 -------
--s::
---skip::
-	Tests to skip (comma separater numeric list).
-
 -v::
 --verbose::
 	Be more verbose.
diff --git a/trunk/tools/perf/Documentation/perf-top.txt b/trunk/tools/perf/Documentation/perf-top.txt
index a414bc95fd52..5b80d84d6b4a 100644
--- a/trunk/tools/perf/Documentation/perf-top.txt
+++ b/trunk/tools/perf/Documentation/perf-top.txt
@@ -60,7 +60,7 @@ Default is to monitor all CPUS.
 
 -i::
 --inherit::
-	Child tasks do not inherit counters.
+	Child tasks inherit counters, only makes sens with -p option.
 
 -k <path>::
 --vmlinux=<path>::
diff --git a/trunk/tools/perf/Makefile b/trunk/tools/perf/Makefile
index a2108ca1cc17..8ab05e543ef4 100644
--- a/trunk/tools/perf/Makefile
+++ b/trunk/tools/perf/Makefile
@@ -47,11 +47,10 @@ include config/utilities.mak
 # backtrace post unwind.
 #
 # Define NO_BACKTRACE if you do not want stack backtrace debug feature
-#
-# Define NO_LIBNUMA if you do not want numa perf benchmark
 
 $(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
 	@$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
+-include $(OUTPUT)PERF-VERSION-FILE
 
 uname_M := $(shell uname -m 2>/dev/null || echo not)
 
@@ -149,25 +148,13 @@ RM = rm -f
 MKDIR = mkdir
 FIND = find
 INSTALL = install
-FLEX = flex
-BISON= bison
 
 # sparse is architecture-neutral, which means that we need to tell it
 # explicitly what architecture to check for. Fix this up for yours..
 SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
 
-ifneq ($(MAKECMDGOALS),clean)
-ifneq ($(MAKECMDGOALS),tags)
 -include config/feature-tests.mak
 
-ifeq ($(call get-executable,$(FLEX)),)
-	dummy := $(error Error: $(FLEX) is missing on this system, please install it)
-endif
-
-ifeq ($(call get-executable,$(BISON)),)
-	dummy := $(error Error: $(BISON) is missing on this system, please install it)
-endif
-
 ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -fstack-protector-all,-fstack-protector-all),y)
 	CFLAGS := $(CFLAGS) -fstack-protector-all
 endif
@@ -219,8 +206,6 @@ ifeq ($(call try-cc,$(SOURCE_BIONIC),$(CFLAGS),bionic),y)
 	EXTLIBS := $(filter-out -lpthread,$(EXTLIBS))
 	BASIC_CFLAGS += -I.
 endif
-endif # MAKECMDGOALS != tags
-endif # MAKECMDGOALS != clean
 
 # Guard against environment variables
 BUILTIN_OBJS =
@@ -245,19 +230,11 @@ endif
 LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
 TE_LIB := -L$(TE_PATH) -ltraceevent
 
-export LIBTRACEEVENT
-
-# python extension build directories
-PYTHON_EXTBUILD     := $(OUTPUT)python_ext_build/
-PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/
-PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/
-export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
-
-python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so
-
 PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
 PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py
 
+export LIBTRACEEVENT
+
 $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
 	$(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \
 	  --quiet build_ext; \
@@ -292,17 +269,20 @@ endif
 
 export PERL_PATH
 
+FLEX = flex
+BISON= bison
+
 $(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
 	$(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) -t util/parse-events.l > $(OUTPUT)util/parse-events-flex.c
 
 $(OUTPUT)util/parse-events-bison.c: util/parse-events.y
-	$(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c -p parse_events_
+	$(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c
 
 $(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
 	$(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/pmu-flex.h -t util/pmu.l > $(OUTPUT)util/pmu-flex.c
 
 $(OUTPUT)util/pmu-bison.c: util/pmu.y
-	$(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_
+	$(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c
 
 $(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
 $(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
@@ -398,11 +378,8 @@ LIB_H += util/rblist.h
 LIB_H += util/intlist.h
 LIB_H += util/perf_regs.h
 LIB_H += util/unwind.h
-LIB_H += util/vdso.h
 LIB_H += ui/helpline.h
-LIB_H += ui/progress.h
-LIB_H += ui/util.h
-LIB_H += ui/ui.h
+LIB_H += util/vdso.h
 
 LIB_OBJS += $(OUTPUT)util/abspath.o
 LIB_OBJS += $(OUTPUT)util/alias.o
@@ -476,7 +453,6 @@ LIB_OBJS += $(OUTPUT)util/stat.o
 LIB_OBJS += $(OUTPUT)ui/setup.o
 LIB_OBJS += $(OUTPUT)ui/helpline.o
 LIB_OBJS += $(OUTPUT)ui/progress.o
-LIB_OBJS += $(OUTPUT)ui/util.o
 LIB_OBJS += $(OUTPUT)ui/hist.o
 LIB_OBJS += $(OUTPUT)ui/stdio/hist.o
 
@@ -495,8 +471,7 @@ LIB_OBJS += $(OUTPUT)tests/rdpmc.o
 LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o
 LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o
 LIB_OBJS += $(OUTPUT)tests/pmu.o
-LIB_OBJS += $(OUTPUT)tests/hists_link.o
-LIB_OBJS += $(OUTPUT)tests/python-use.o
+LIB_OBJS += $(OUTPUT)tests/util.o
 
 BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
 BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
@@ -535,13 +510,14 @@ PERFLIBS = $(LIB_FILE) $(LIBTRACEEVENT)
 #
 # Platform specific tweaks
 #
-ifneq ($(MAKECMDGOALS),clean)
-ifneq ($(MAKECMDGOALS),tags)
 
 # We choose to avoid "if .. else if .. else .. endif endif"
 # because maintaining the nesting to match is a pain.  If
 # we had "elif" things would have been much nicer...
 
+-include config.mak.autogen
+-include config.mak
+
 ifdef NO_LIBELF
 	NO_DWARF := 1
 	NO_DEMANGLE := 1
@@ -581,11 +557,6 @@ else
 endif # SOURCE_LIBELF
 endif # NO_LIBELF
 
-# There's only x86 (both 32 and 64) support for CFI unwind so far
-ifneq ($(ARCH),x86)
-	NO_LIBUNWIND := 1
-endif
-
 ifndef NO_LIBUNWIND
 # for linking with debug library, run like:
 # make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/
@@ -675,6 +646,7 @@ ifndef NO_NEWT
 		LIB_OBJS += $(OUTPUT)ui/browsers/hists.o
 		LIB_OBJS += $(OUTPUT)ui/browsers/map.o
 		LIB_OBJS += $(OUTPUT)ui/browsers/scripts.o
+		LIB_OBJS += $(OUTPUT)ui/util.o
 		LIB_OBJS += $(OUTPUT)ui/tui/setup.o
 		LIB_OBJS += $(OUTPUT)ui/tui/util.o
 		LIB_OBJS += $(OUTPUT)ui/tui/helpline.o
@@ -683,6 +655,9 @@ ifndef NO_NEWT
 		LIB_H += ui/browsers/map.h
 		LIB_H += ui/keysyms.h
 		LIB_H += ui/libslang.h
+		LIB_H += ui/progress.h
+		LIB_H += ui/util.h
+		LIB_H += ui/ui.h
 	endif
 endif
 
@@ -698,12 +673,14 @@ ifndef NO_GTK2
 		BASIC_CFLAGS += $(shell pkg-config --cflags gtk+-2.0 2>/dev/null)
 		EXTLIBS += $(shell pkg-config --libs gtk+-2.0 2>/dev/null)
 		LIB_OBJS += $(OUTPUT)ui/gtk/browser.o
-		LIB_OBJS += $(OUTPUT)ui/gtk/hists.o
 		LIB_OBJS += $(OUTPUT)ui/gtk/setup.o
 		LIB_OBJS += $(OUTPUT)ui/gtk/util.o
 		LIB_OBJS += $(OUTPUT)ui/gtk/helpline.o
 		LIB_OBJS += $(OUTPUT)ui/gtk/progress.o
-		LIB_OBJS += $(OUTPUT)ui/gtk/annotate.o
+		# Make sure that it'd be included only once.
+		ifeq ($(findstring -DNEWT_SUPPORT,$(BASIC_CFLAGS)),)
+			LIB_OBJS += $(OUTPUT)ui/util.o
+		endif
 	endif
 endif
 
@@ -730,7 +707,7 @@ disable-python = $(eval $(disable-python_code))
 define disable-python_code
   BASIC_CFLAGS += -DNO_LIBPYTHON
   $(if $(1),$(warning No $(1) was found))
-  $(warning Python support will not be built)
+  $(warning Python support won't be built)
 endef
 
 override PYTHON := \
@@ -738,10 +715,19 @@ override PYTHON := \
 
 ifndef PYTHON
   $(call disable-python,python interpreter)
+  python-clean :=
 else
 
   PYTHON_WORD := $(call shell-wordify,$(PYTHON))
 
+  # python extension build directories
+  PYTHON_EXTBUILD     := $(OUTPUT)python_ext_build/
+  PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/
+  PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/
+  export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
+
+  python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so
+
   ifdef NO_LIBPYTHON
     $(call disable-python)
   else
@@ -853,24 +839,10 @@ ifndef NO_BACKTRACE
        endif
 endif
 
-ifndef NO_LIBNUMA
-	FLAGS_LIBNUMA = $(ALL_CFLAGS) $(ALL_LDFLAGS) -lnuma
-	ifneq ($(call try-cc,$(SOURCE_LIBNUMA),$(FLAGS_LIBNUMA),libnuma),y)
-		msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numa-libs-devel or libnuma-dev);
-	else
-		BASIC_CFLAGS += -DLIBNUMA_SUPPORT
-		BUILTIN_OBJS += $(OUTPUT)bench/numa.o
-		EXTLIBS += -lnuma
-	endif
-endif
-
 ifdef ASCIIDOC8
 	export ASCIIDOC8
 endif
 
-endif # MAKECMDGOALS != tags
-endif # MAKECMDGOALS != clean
-
 # Shell quote (do not use $(call) to accommodate ancient setups);
 
 ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG))
@@ -912,7 +884,7 @@ strip: $(PROGRAMS) $(OUTPUT)perf
 	$(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf
 
 $(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -include $(OUTPUT)PERF-VERSION-FILE \
+	$(QUIET_CC)$(CC) -DPERF_VERSION='"$(PERF_VERSION)"' \
 		'-DPERF_HTML_PATH="$(htmldir_SQ)"' \
 		$(ALL_CFLAGS) -c $(filter %.c,$^) -o $@
 
@@ -976,13 +948,7 @@ $(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS
 
 $(OUTPUT)tests/attr.o: tests/attr.c $(OUTPUT)PERF-CFLAGS
 	$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \
-		'-DBINDIR="$(bindir_SQ)"' -DPYTHON='"$(PYTHON_WORD)"' \
-		$<
-
-$(OUTPUT)tests/python-use.o: tests/python-use.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \
-		-DPYTHONPATH='"$(OUTPUT)python"' \
-		-DPYTHON='"$(PYTHON_WORD)"' \
+		'-DBINDIR="$(bindir_SQ)"' \
 		$<
 
 $(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS
@@ -1133,7 +1099,7 @@ perfexec_instdir = $(prefix)/$(perfexecdir)
 endif
 perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir))
 
-install-bin: all
+install: all try-install-man
 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
 	$(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'
 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
@@ -1154,8 +1120,6 @@ install-bin: all
 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'
 	$(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'
 
-install: install-bin try-install-man
-
 install-python_ext:
 	$(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)'
 
diff --git a/trunk/tools/perf/arch/common.c b/trunk/tools/perf/arch/common.c
index aacef07ebf31..3e975cb6232e 100644
--- a/trunk/tools/perf/arch/common.c
+++ b/trunk/tools/perf/arch/common.c
@@ -155,7 +155,6 @@ static int perf_session_env__lookup_binutils_path(struct perf_session_env *env,
 		if (lookup_path(buf))
 			goto out;
 		free(buf);
-		buf = NULL;
 	}
 
 	if (!strcmp(arch, "arm"))
diff --git a/trunk/tools/perf/bench/bench.h b/trunk/tools/perf/bench/bench.h
index a5223e6a7b43..8f89998eeaf4 100644
--- a/trunk/tools/perf/bench/bench.h
+++ b/trunk/tools/perf/bench/bench.h
@@ -1,7 +1,6 @@
 #ifndef BENCH_H
 #define BENCH_H
 
-extern int bench_numa(int argc, const char **argv, const char *prefix);
 extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
 extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
 extern int bench_mem_memcpy(int argc, const char **argv,
diff --git a/trunk/tools/perf/bench/numa.c b/trunk/tools/perf/bench/numa.c
deleted file mode 100644
index 30d1c3225b46..000000000000
--- a/trunk/tools/perf/bench/numa.c
+++ /dev/null
@@ -1,1731 +0,0 @@
-/*
- * numa.c
- *
- * numa: Simulate NUMA-sensitive workload and measure their NUMA performance
- */
-
-#include "../perf.h"
-#include "../builtin.h"
-#include "../util/util.h"
-#include "../util/parse-options.h"
-
-#include "bench.h"
-
-#include <errno.h>
-#include <sched.h>
-#include <stdio.h>
-#include <assert.h>
-#include <malloc.h>
-#include <signal.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <pthread.h>
-#include <sys/mman.h>
-#include <sys/time.h>
-#include <sys/wait.h>
-#include <sys/prctl.h>
-#include <sys/types.h>
-
-#include <numa.h>
-#include <numaif.h>
-
-/*
- * Regular printout to the terminal, supressed if -q is specified:
- */
-#define tprintf(x...) do { if (g && g->p.show_details >= 0) printf(x); } while (0)
-
-/*
- * Debug printf:
- */
-#define dprintf(x...) do { if (g && g->p.show_details >= 1) printf(x); } while (0)
-
-struct thread_data {
-	int			curr_cpu;
-	cpu_set_t		bind_cpumask;
-	int			bind_node;
-	u8			*process_data;
-	int			process_nr;
-	int			thread_nr;
-	int			task_nr;
-	unsigned int		loops_done;
-	u64			val;
-	u64			runtime_ns;
-	pthread_mutex_t		*process_lock;
-};
-
-/* Parameters set by options: */
-
-struct params {
-	/* Startup synchronization: */
-	bool			serialize_startup;
-
-	/* Task hierarchy: */
-	int			nr_proc;
-	int			nr_threads;
-
-	/* Working set sizes: */
-	const char		*mb_global_str;
-	const char		*mb_proc_str;
-	const char		*mb_proc_locked_str;
-	const char		*mb_thread_str;
-
-	double			mb_global;
-	double			mb_proc;
-	double			mb_proc_locked;
-	double			mb_thread;
-
-	/* Access patterns to the working set: */
-	bool			data_reads;
-	bool			data_writes;
-	bool			data_backwards;
-	bool			data_zero_memset;
-	bool			data_rand_walk;
-	u32			nr_loops;
-	u32			nr_secs;
-	u32			sleep_usecs;
-
-	/* Working set initialization: */
-	bool			init_zero;
-	bool			init_random;
-	bool			init_cpu0;
-
-	/* Misc options: */
-	int			show_details;
-	int			run_all;
-	int			thp;
-
-	long			bytes_global;
-	long			bytes_process;
-	long			bytes_process_locked;
-	long			bytes_thread;
-
-	int			nr_tasks;
-	bool			show_quiet;
-
-	bool			show_convergence;
-	bool			measure_convergence;
-
-	int			perturb_secs;
-	int			nr_cpus;
-	int			nr_nodes;
-
-	/* Affinity options -C and -N: */
-	char			*cpu_list_str;
-	char			*node_list_str;
-};
-
-
-/* Global, read-writable area, accessible to all processes and threads: */
-
-struct global_info {
-	u8			*data;
-
-	pthread_mutex_t		startup_mutex;
-	int			nr_tasks_started;
-
-	pthread_mutex_t		startup_done_mutex;
-
-	pthread_mutex_t		start_work_mutex;
-	int			nr_tasks_working;
-
-	pthread_mutex_t		stop_work_mutex;
-	u64			bytes_done;
-
-	struct thread_data	*threads;
-
-	/* Convergence latency measurement: */
-	bool			all_converged;
-	bool			stop_work;
-
-	int			print_once;
-
-	struct params		p;
-};
-
-static struct global_info	*g = NULL;
-
-static int parse_cpus_opt(const struct option *opt, const char *arg, int unset);
-static int parse_nodes_opt(const struct option *opt, const char *arg, int unset);
-
-struct params p0;
-
-static const struct option options[] = {
-	OPT_INTEGER('p', "nr_proc"	, &p0.nr_proc,		"number of processes"),
-	OPT_INTEGER('t', "nr_threads"	, &p0.nr_threads,	"number of threads per process"),
-
-	OPT_STRING('G', "mb_global"	, &p0.mb_global_str,	"MB", "global  memory (MBs)"),
-	OPT_STRING('P', "mb_proc"	, &p0.mb_proc_str,	"MB", "process memory (MBs)"),
-	OPT_STRING('L', "mb_proc_locked", &p0.mb_proc_locked_str,"MB", "process serialized/locked memory access (MBs), <= process_memory"),
-	OPT_STRING('T', "mb_thread"	, &p0.mb_thread_str,	"MB", "thread  memory (MBs)"),
-
-	OPT_UINTEGER('l', "nr_loops"	, &p0.nr_loops,		"max number of loops to run"),
-	OPT_UINTEGER('s', "nr_secs"	, &p0.nr_secs,		"max number of seconds to run"),
-	OPT_UINTEGER('u', "usleep"	, &p0.sleep_usecs,	"usecs to sleep per loop iteration"),
-
-	OPT_BOOLEAN('R', "data_reads"	, &p0.data_reads,	"access the data via writes (can be mixed with -W)"),
-	OPT_BOOLEAN('W', "data_writes"	, &p0.data_writes,	"access the data via writes (can be mixed with -R)"),
-	OPT_BOOLEAN('B', "data_backwards", &p0.data_backwards,	"access the data backwards as well"),
-	OPT_BOOLEAN('Z', "data_zero_memset", &p0.data_zero_memset,"access the data via glibc bzero only"),
-	OPT_BOOLEAN('r', "data_rand_walk", &p0.data_rand_walk,	"access the data with random (32bit LFSR) walk"),
-
-
-	OPT_BOOLEAN('z', "init_zero"	, &p0.init_zero,	"bzero the initial allocations"),
-	OPT_BOOLEAN('I', "init_random"	, &p0.init_random,	"randomize the contents of the initial allocations"),
-	OPT_BOOLEAN('0', "init_cpu0"	, &p0.init_cpu0,	"do the initial allocations on CPU#0"),
-	OPT_INTEGER('x', "perturb_secs", &p0.perturb_secs,	"perturb thread 0/0 every X secs, to test convergence stability"),
-
-	OPT_INCR   ('d', "show_details"	, &p0.show_details,	"Show details"),
-	OPT_INCR   ('a', "all"		, &p0.run_all,		"Run all tests in the suite"),
-	OPT_INTEGER('H', "thp"		, &p0.thp,		"MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"),
-	OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details"),
-	OPT_BOOLEAN('m', "measure_convergence",	&p0.measure_convergence, "measure convergence latency"),
-	OPT_BOOLEAN('q', "quiet"	, &p0.show_quiet,	"bzero the initial allocations"),
-	OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"),
-
-	/* Special option string parsing callbacks: */
-        OPT_CALLBACK('C', "cpus", NULL, "cpu[,cpu2,...cpuN]",
-			"bind the first N tasks to these specific cpus (the rest is unbound)",
-			parse_cpus_opt),
-        OPT_CALLBACK('M', "memnodes", NULL, "node[,node2,...nodeN]",
-			"bind the first N tasks to these specific memory nodes (the rest is unbound)",
-			parse_nodes_opt),
-	OPT_END()
-};
-
-static const char * const bench_numa_usage[] = {
-	"perf bench numa <options>",
-	NULL
-};
-
-static const char * const numa_usage[] = {
-	"perf bench numa mem [<options>]",
-	NULL
-};
-
-static cpu_set_t bind_to_cpu(int target_cpu)
-{
-	cpu_set_t orig_mask, mask;
-	int ret;
-
-	ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
-	BUG_ON(ret);
-
-	CPU_ZERO(&mask);
-
-	if (target_cpu == -1) {
-		int cpu;
-
-		for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
-			CPU_SET(cpu, &mask);
-	} else {
-		BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus);
-		CPU_SET(target_cpu, &mask);
-	}
-
-	ret = sched_setaffinity(0, sizeof(mask), &mask);
-	BUG_ON(ret);
-
-	return orig_mask;
-}
-
-static cpu_set_t bind_to_node(int target_node)
-{
-	int cpus_per_node = g->p.nr_cpus/g->p.nr_nodes;
-	cpu_set_t orig_mask, mask;
-	int cpu;
-	int ret;
-
-	BUG_ON(cpus_per_node*g->p.nr_nodes != g->p.nr_cpus);
-	BUG_ON(!cpus_per_node);
-
-	ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
-	BUG_ON(ret);
-
-	CPU_ZERO(&mask);
-
-	if (target_node == -1) {
-		for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
-			CPU_SET(cpu, &mask);
-	} else {
-		int cpu_start = (target_node + 0) * cpus_per_node;
-		int cpu_stop  = (target_node + 1) * cpus_per_node;
-
-		BUG_ON(cpu_stop > g->p.nr_cpus);
-
-		for (cpu = cpu_start; cpu < cpu_stop; cpu++)
-			CPU_SET(cpu, &mask);
-	}
-
-	ret = sched_setaffinity(0, sizeof(mask), &mask);
-	BUG_ON(ret);
-
-	return orig_mask;
-}
-
-static void bind_to_cpumask(cpu_set_t mask)
-{
-	int ret;
-
-	ret = sched_setaffinity(0, sizeof(mask), &mask);
-	BUG_ON(ret);
-}
-
-static void mempol_restore(void)
-{
-	int ret;
-
-	ret = set_mempolicy(MPOL_DEFAULT, NULL, g->p.nr_nodes-1);
-
-	BUG_ON(ret);
-}
-
-static void bind_to_memnode(int node)
-{
-	unsigned long nodemask;
-	int ret;
-
-	if (node == -1)
-		return;
-
-	BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask));
-	nodemask = 1L << node;
-
-	ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8);
-	dprintf("binding to node %d, mask: %016lx => %d\n", node, nodemask, ret);
-
-	BUG_ON(ret);
-}
-
-#define HPSIZE (2*1024*1024)
-
-#define set_taskname(fmt...)				\
-do {							\
-	char name[20];					\
-							\
-	snprintf(name, 20, fmt);			\
-	prctl(PR_SET_NAME, name);			\
-} while (0)
-
-static u8 *alloc_data(ssize_t bytes0, int map_flags,
-		      int init_zero, int init_cpu0, int thp, int init_random)
-{
-	cpu_set_t orig_mask;
-	ssize_t bytes;
-	u8 *buf;
-	int ret;
-
-	if (!bytes0)
-		return NULL;
-
-	/* Allocate and initialize all memory on CPU#0: */
-	if (init_cpu0) {
-		orig_mask = bind_to_node(0);
-		bind_to_memnode(0);
-	}
-
-	bytes = bytes0 + HPSIZE;
-
-	buf = (void *)mmap(0, bytes, PROT_READ|PROT_WRITE, MAP_ANON|map_flags, -1, 0);
-	BUG_ON(buf == (void *)-1);
-
-	if (map_flags == MAP_PRIVATE) {
-		if (thp > 0) {
-			ret = madvise(buf, bytes, MADV_HUGEPAGE);
-			if (ret && !g->print_once) {
-				g->print_once = 1;
-				printf("WARNING: Could not enable THP - do: 'echo madvise > /sys/kernel/mm/transparent_hugepage/enabled'\n");
-			}
-		}
-		if (thp < 0) {
-			ret = madvise(buf, bytes, MADV_NOHUGEPAGE);
-			if (ret && !g->print_once) {
-				g->print_once = 1;
-				printf("WARNING: Could not disable THP: run a CONFIG_TRANSPARENT_HUGEPAGE kernel?\n");
-			}
-		}
-	}
-
-	if (init_zero) {
-		bzero(buf, bytes);
-	} else {
-		/* Initialize random contents, different in each word: */
-		if (init_random) {
-			u64 *wbuf = (void *)buf;
-			long off = rand();
-			long i;
-
-			for (i = 0; i < bytes/8; i++)
-				wbuf[i] = i + off;
-		}
-	}
-
-	/* Align to 2MB boundary: */
-	buf = (void *)(((unsigned long)buf + HPSIZE-1) & ~(HPSIZE-1));
-
-	/* Restore affinity: */
-	if (init_cpu0) {
-		bind_to_cpumask(orig_mask);
-		mempol_restore();
-	}
-
-	return buf;
-}
-
-static void free_data(void *data, ssize_t bytes)
-{
-	int ret;
-
-	if (!data)
-		return;
-
-	ret = munmap(data, bytes);
-	BUG_ON(ret);
-}
-
-/*
- * Create a shared memory buffer that can be shared between processes, zeroed:
- */
-static void * zalloc_shared_data(ssize_t bytes)
-{
-	return alloc_data(bytes, MAP_SHARED, 1, g->p.init_cpu0,  g->p.thp, g->p.init_random);
-}
-
-/*
- * Create a shared memory buffer that can be shared between processes:
- */
-static void * setup_shared_data(ssize_t bytes)
-{
-	return alloc_data(bytes, MAP_SHARED, 0, g->p.init_cpu0,  g->p.thp, g->p.init_random);
-}
-
-/*
- * Allocate process-local memory - this will either be shared between
- * threads of this process, or only be accessed by this thread:
- */
-static void * setup_private_data(ssize_t bytes)
-{
-	return alloc_data(bytes, MAP_PRIVATE, 0, g->p.init_cpu0,  g->p.thp, g->p.init_random);
-}
-
-/*
- * Return a process-shared (global) mutex:
- */
-static void init_global_mutex(pthread_mutex_t *mutex)
-{
-	pthread_mutexattr_t attr;
-
-	pthread_mutexattr_init(&attr);
-	pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
-	pthread_mutex_init(mutex, &attr);
-}
-
-static int parse_cpu_list(const char *arg)
-{
-	p0.cpu_list_str = strdup(arg);
-
-	dprintf("got CPU list: {%s}\n", p0.cpu_list_str);
-
-	return 0;
-}
-
-static void parse_setup_cpu_list(void)
-{
-	struct thread_data *td;
-	char *str0, *str;
-	int t;
-
-	if (!g->p.cpu_list_str)
-		return;
-
-	dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
-
-	str0 = str = strdup(g->p.cpu_list_str);
-	t = 0;
-
-	BUG_ON(!str);
-
-	tprintf("# binding tasks to CPUs:\n");
-	tprintf("#  ");
-
-	while (true) {
-		int bind_cpu, bind_cpu_0, bind_cpu_1;
-		char *tok, *tok_end, *tok_step, *tok_len, *tok_mul;
-		int bind_len;
-		int step;
-		int mul;
-
-		tok = strsep(&str, ",");
-		if (!tok)
-			break;
-
-		tok_end = strstr(tok, "-");
-
-		dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end);
-		if (!tok_end) {
-			/* Single CPU specified: */
-			bind_cpu_0 = bind_cpu_1 = atol(tok);
-		} else {
-			/* CPU range specified (for example: "5-11"): */
-			bind_cpu_0 = atol(tok);
-			bind_cpu_1 = atol(tok_end + 1);
-		}
-
-		step = 1;
-		tok_step = strstr(tok, "#");
-		if (tok_step) {
-			step = atol(tok_step + 1);
-			BUG_ON(step <= 0 || step >= g->p.nr_cpus);
-		}
-
-		/*
-		 * Mask length.
-		 * Eg: "--cpus 8_4-16#4" means: '--cpus 8_4,12_4,16_4',
-		 * where the _4 means the next 4 CPUs are allowed.
-		 */
-		bind_len = 1;
-		tok_len = strstr(tok, "_");
-		if (tok_len) {
-			bind_len = atol(tok_len + 1);
-			BUG_ON(bind_len <= 0 || bind_len > g->p.nr_cpus);
-		}
-
-		/* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */
-		mul = 1;
-		tok_mul = strstr(tok, "x");
-		if (tok_mul) {
-			mul = atol(tok_mul + 1);
-			BUG_ON(mul <= 0);
-		}
-
-		dprintf("CPUs: %d_%d-%d#%dx%d\n", bind_cpu_0, bind_len, bind_cpu_1, step, mul);
-
-		BUG_ON(bind_cpu_0 < 0 || bind_cpu_0 >= g->p.nr_cpus);
-		BUG_ON(bind_cpu_1 < 0 || bind_cpu_1 >= g->p.nr_cpus);
-		BUG_ON(bind_cpu_0 > bind_cpu_1);
-
-		for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) {
-			int i;
-
-			for (i = 0; i < mul; i++) {
-				int cpu;
-
-				if (t >= g->p.nr_tasks) {
-					printf("\n# NOTE: ignoring bind CPUs starting at CPU#%d\n #", bind_cpu);
-					goto out;
-				}
-				td = g->threads + t;
-
-				if (t)
-					tprintf(",");
-				if (bind_len > 1) {
-					tprintf("%2d/%d", bind_cpu, bind_len);
-				} else {
-					tprintf("%2d", bind_cpu);
-				}
-
-				CPU_ZERO(&td->bind_cpumask);
-				for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) {
-					BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus);
-					CPU_SET(cpu, &td->bind_cpumask);
-				}
-				t++;
-			}
-		}
-	}
-out:
-
-	tprintf("\n");
-
-	if (t < g->p.nr_tasks)
-		printf("# NOTE: %d tasks bound, %d tasks unbound\n", t, g->p.nr_tasks - t);
-
-	free(str0);
-}
-
-static int parse_cpus_opt(const struct option *opt __maybe_unused,
-			  const char *arg, int unset __maybe_unused)
-{
-	if (!arg)
-		return -1;
-
-	return parse_cpu_list(arg);
-}
-
-static int parse_node_list(const char *arg)
-{
-	p0.node_list_str = strdup(arg);
-
-	dprintf("got NODE list: {%s}\n", p0.node_list_str);
-
-	return 0;
-}
-
-static void parse_setup_node_list(void)
-{
-	struct thread_data *td;
-	char *str0, *str;
-	int t;
-
-	if (!g->p.node_list_str)
-		return;
-
-	dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
-
-	str0 = str = strdup(g->p.node_list_str);
-	t = 0;
-
-	BUG_ON(!str);
-
-	tprintf("# binding tasks to NODEs:\n");
-	tprintf("# ");
-
-	while (true) {
-		int bind_node, bind_node_0, bind_node_1;
-		char *tok, *tok_end, *tok_step, *tok_mul;
-		int step;
-		int mul;
-
-		tok = strsep(&str, ",");
-		if (!tok)
-			break;
-
-		tok_end = strstr(tok, "-");
-
-		dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end);
-		if (!tok_end) {
-			/* Single NODE specified: */
-			bind_node_0 = bind_node_1 = atol(tok);
-		} else {
-			/* NODE range specified (for example: "5-11"): */
-			bind_node_0 = atol(tok);
-			bind_node_1 = atol(tok_end + 1);
-		}
-
-		step = 1;
-		tok_step = strstr(tok, "#");
-		if (tok_step) {
-			step = atol(tok_step + 1);
-			BUG_ON(step <= 0 || step >= g->p.nr_nodes);
-		}
-
-		/* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */
-		mul = 1;
-		tok_mul = strstr(tok, "x");
-		if (tok_mul) {
-			mul = atol(tok_mul + 1);
-			BUG_ON(mul <= 0);
-		}
-
-		dprintf("NODEs: %d-%d #%d\n", bind_node_0, bind_node_1, step);
-
-		BUG_ON(bind_node_0 < 0 || bind_node_0 >= g->p.nr_nodes);
-		BUG_ON(bind_node_1 < 0 || bind_node_1 >= g->p.nr_nodes);
-		BUG_ON(bind_node_0 > bind_node_1);
-
-		for (bind_node = bind_node_0; bind_node <= bind_node_1; bind_node += step) {
-			int i;
-
-			for (i = 0; i < mul; i++) {
-				if (t >= g->p.nr_tasks) {
-					printf("\n# NOTE: ignoring bind NODEs starting at NODE#%d\n", bind_node);
-					goto out;
-				}
-				td = g->threads + t;
-
-				if (!t)
-					tprintf(" %2d", bind_node);
-				else
-					tprintf(",%2d", bind_node);
-
-				td->bind_node = bind_node;
-				t++;
-			}
-		}
-	}
-out:
-
-	tprintf("\n");
-
-	if (t < g->p.nr_tasks)
-		printf("# NOTE: %d tasks mem-bound, %d tasks unbound\n", t, g->p.nr_tasks - t);
-
-	free(str0);
-}
-
-static int parse_nodes_opt(const struct option *opt __maybe_unused,
-			  const char *arg, int unset __maybe_unused)
-{
-	if (!arg)
-		return -1;
-
-	return parse_node_list(arg);
-
-	return 0;
-}
-
-#define BIT(x) (1ul << x)
-
-static inline uint32_t lfsr_32(uint32_t lfsr)
-{
-	const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31);
-	return (lfsr>>1) ^ ((0x0u - (lfsr & 0x1u)) & taps);
-}
-
-/*
- * Make sure there's real data dependency to RAM (when read
- * accesses are enabled), so the compiler, the CPU and the
- * kernel (KSM, zero page, etc.) cannot optimize away RAM
- * accesses:
- */
-static inline u64 access_data(u64 *data __attribute__((unused)), u64 val)
-{
-	if (g->p.data_reads)
-		val += *data;
-	if (g->p.data_writes)
-		*data = val + 1;
-	return val;
-}
-
-/*
- * The worker process does two types of work, a forwards going
- * loop and a backwards going loop.
- *
- * We do this so that on multiprocessor systems we do not create
- * a 'train' of processing, with highly synchronized processes,
- * skewing the whole benchmark.
- */
-static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val)
-{
-	long words = bytes/sizeof(u64);
-	u64 *data = (void *)__data;
-	long chunk_0, chunk_1;
-	u64 *d0, *d, *d1;
-	long off;
-	long i;
-
-	BUG_ON(!data && words);
-	BUG_ON(data && !words);
-
-	if (!data)
-		return val;
-
-	/* Very simple memset() work variant: */
-	if (g->p.data_zero_memset && !g->p.data_rand_walk) {
-		bzero(data, bytes);
-		return val;
-	}
-
-	/* Spread out by PID/TID nr and by loop nr: */
-	chunk_0 = words/nr_max;
-	chunk_1 = words/g->p.nr_loops;
-	off = nr*chunk_0 + loop*chunk_1;
-
-	while (off >= words)
-		off -= words;
-
-	if (g->p.data_rand_walk) {
-		u32 lfsr = nr + loop + val;
-		int j;
-
-		for (i = 0; i < words/1024; i++) {
-			long start, end;
-
-			lfsr = lfsr_32(lfsr);
-
-			start = lfsr % words;
-			end = min(start + 1024, words-1);
-
-			if (g->p.data_zero_memset) {
-				bzero(data + start, (end-start) * sizeof(u64));
-			} else {
-				for (j = start; j < end; j++)
-					val = access_data(data + j, val);
-			}
-		}
-	} else if (!g->p.data_backwards || (nr + loop) & 1) {
-
-		d0 = data + off;
-		d  = data + off + 1;
-		d1 = data + words;
-
-		/* Process data forwards: */
-		for (;;) {
-			if (unlikely(d >= d1))
-				d = data;
-			if (unlikely(d == d0))
-				break;
-
-			val = access_data(d, val);
-
-			d++;
-		}
-	} else {
-		/* Process data backwards: */
-
-		d0 = data + off;
-		d  = data + off - 1;
-		d1 = data + words;
-
-		/* Process data forwards: */
-		for (;;) {
-			if (unlikely(d < data))
-				d = data + words-1;
-			if (unlikely(d == d0))
-				break;
-
-			val = access_data(d, val);
-
-			d--;
-		}
-	}
-
-	return val;
-}
-
-static void update_curr_cpu(int task_nr, unsigned long bytes_worked)
-{
-	unsigned int cpu;
-
-	cpu = sched_getcpu();
-
-	g->threads[task_nr].curr_cpu = cpu;
-	prctl(0, bytes_worked);
-}
-
-#define MAX_NR_NODES	64
-
-/*
- * Count the number of nodes a process's threads
- * are spread out on.
- *
- * A count of 1 means that the process is compressed
- * to a single node. A count of g->p.nr_nodes means it's
- * spread out on the whole system.
- */
-static int count_process_nodes(int process_nr)
-{
-	char node_present[MAX_NR_NODES] = { 0, };
-	int nodes;
-	int n, t;
-
-	for (t = 0; t < g->p.nr_threads; t++) {
-		struct thread_data *td;
-		int task_nr;
-		int node;
-
-		task_nr = process_nr*g->p.nr_threads + t;
-		td = g->threads + task_nr;
-
-		node = numa_node_of_cpu(td->curr_cpu);
-		node_present[node] = 1;
-	}
-
-	nodes = 0;
-
-	for (n = 0; n < MAX_NR_NODES; n++)
-		nodes += node_present[n];
-
-	return nodes;
-}
-
-/*
- * Count the number of distinct process-threads a node contains.
- *
- * A count of 1 means that the node contains only a single
- * process. If all nodes on the system contain at most one
- * process then we are well-converged.
- */
-static int count_node_processes(int node)
-{
-	int processes = 0;
-	int t, p;
-
-	for (p = 0; p < g->p.nr_proc; p++) {
-		for (t = 0; t < g->p.nr_threads; t++) {
-			struct thread_data *td;
-			int task_nr;
-			int n;
-
-			task_nr = p*g->p.nr_threads + t;
-			td = g->threads + task_nr;
-
-			n = numa_node_of_cpu(td->curr_cpu);
-			if (n == node) {
-				processes++;
-				break;
-			}
-		}
-	}
-
-	return processes;
-}
-
-static void calc_convergence_compression(int *strong)
-{
-	unsigned int nodes_min, nodes_max;
-	int p;
-
-	nodes_min = -1;
-	nodes_max =  0;
-
-	for (p = 0; p < g->p.nr_proc; p++) {
-		unsigned int nodes = count_process_nodes(p);
-
-		nodes_min = min(nodes, nodes_min);
-		nodes_max = max(nodes, nodes_max);
-	}
-
-	/* Strong convergence: all threads compress on a single node: */
-	if (nodes_min == 1 && nodes_max == 1) {
-		*strong = 1;
-	} else {
-		*strong = 0;
-		tprintf(" {%d-%d}", nodes_min, nodes_max);
-	}
-}
-
-static void calc_convergence(double runtime_ns_max, double *convergence)
-{
-	unsigned int loops_done_min, loops_done_max;
-	int process_groups;
-	int nodes[MAX_NR_NODES];
-	int distance;
-	int nr_min;
-	int nr_max;
-	int strong;
-	int sum;
-	int nr;
-	int node;
-	int cpu;
-	int t;
-
-	if (!g->p.show_convergence && !g->p.measure_convergence)
-		return;
-
-	for (node = 0; node < g->p.nr_nodes; node++)
-		nodes[node] = 0;
-
-	loops_done_min = -1;
-	loops_done_max = 0;
-
-	for (t = 0; t < g->p.nr_tasks; t++) {
-		struct thread_data *td = g->threads + t;
-		unsigned int loops_done;
-
-		cpu = td->curr_cpu;
-
-		/* Not all threads have written it yet: */
-		if (cpu < 0)
-			continue;
-
-		node = numa_node_of_cpu(cpu);
-
-		nodes[node]++;
-
-		loops_done = td->loops_done;
-		loops_done_min = min(loops_done, loops_done_min);
-		loops_done_max = max(loops_done, loops_done_max);
-	}
-
-	nr_max = 0;
-	nr_min = g->p.nr_tasks;
-	sum = 0;
-
-	for (node = 0; node < g->p.nr_nodes; node++) {
-		nr = nodes[node];
-		nr_min = min(nr, nr_min);
-		nr_max = max(nr, nr_max);
-		sum += nr;
-	}
-	BUG_ON(nr_min > nr_max);
-
-	BUG_ON(sum > g->p.nr_tasks);
-
-	if (0 && (sum < g->p.nr_tasks))
-		return;
-
-	/*
-	 * Count the number of distinct process groups present
-	 * on nodes - when we are converged this will decrease
-	 * to g->p.nr_proc:
-	 */
-	process_groups = 0;
-
-	for (node = 0; node < g->p.nr_nodes; node++) {
-		int processes = count_node_processes(node);
-
-		nr = nodes[node];
-		tprintf(" %2d/%-2d", nr, processes);
-
-		process_groups += processes;
-	}
-
-	distance = nr_max - nr_min;
-
-	tprintf(" [%2d/%-2d]", distance, process_groups);
-
-	tprintf(" l:%3d-%-3d (%3d)",
-		loops_done_min, loops_done_max, loops_done_max-loops_done_min);
-
-	if (loops_done_min && loops_done_max) {
-		double skew = 1.0 - (double)loops_done_min/loops_done_max;
-
-		tprintf(" [%4.1f%%]", skew * 100.0);
-	}
-
-	calc_convergence_compression(&strong);
-
-	if (strong && process_groups == g->p.nr_proc) {
-		if (!*convergence) {
-			*convergence = runtime_ns_max;
-			tprintf(" (%6.1fs converged)\n", *convergence/1e9);
-			if (g->p.measure_convergence) {
-				g->all_converged = true;
-				g->stop_work = true;
-			}
-		}
-	} else {
-		if (*convergence) {
-			tprintf(" (%6.1fs de-converged)", runtime_ns_max/1e9);
-			*convergence = 0;
-		}
-		tprintf("\n");
-	}
-}
-
-static void show_summary(double runtime_ns_max, int l, double *convergence)
-{
-	tprintf("\r #  %5.1f%%  [%.1f mins]",
-		(double)(l+1)/g->p.nr_loops*100.0, runtime_ns_max/1e9 / 60.0);
-
-	calc_convergence(runtime_ns_max, convergence);
-
-	if (g->p.show_details >= 0)
-		fflush(stdout);
-}
-
-static void *worker_thread(void *__tdata)
-{
-	struct thread_data *td = __tdata;
-	struct timeval start0, start, stop, diff;
-	int process_nr = td->process_nr;
-	int thread_nr = td->thread_nr;
-	unsigned long last_perturbance;
-	int task_nr = td->task_nr;
-	int details = g->p.show_details;
-	int first_task, last_task;
-	double convergence = 0;
-	u64 val = td->val;
-	double runtime_ns_max;
-	u8 *global_data;
-	u8 *process_data;
-	u8 *thread_data;
-	u64 bytes_done;
-	long work_done;
-	u32 l;
-
-	bind_to_cpumask(td->bind_cpumask);
-	bind_to_memnode(td->bind_node);
-
-	set_taskname("thread %d/%d", process_nr, thread_nr);
-
-	global_data = g->data;
-	process_data = td->process_data;
-	thread_data = setup_private_data(g->p.bytes_thread);
-
-	bytes_done = 0;
-
-	last_task = 0;
-	if (process_nr == g->p.nr_proc-1 && thread_nr == g->p.nr_threads-1)
-		last_task = 1;
-
-	first_task = 0;
-	if (process_nr == 0 && thread_nr == 0)
-		first_task = 1;
-
-	if (details >= 2) {
-		printf("#  thread %2d / %2d global mem: %p, process mem: %p, thread mem: %p\n",
-			process_nr, thread_nr, global_data, process_data, thread_data);
-	}
-
-	if (g->p.serialize_startup) {
-		pthread_mutex_lock(&g->startup_mutex);
-		g->nr_tasks_started++;
-		pthread_mutex_unlock(&g->startup_mutex);
-
-		/* Here we will wait for the main process to start us all at once: */
-		pthread_mutex_lock(&g->start_work_mutex);
-		g->nr_tasks_working++;
-
-		/* Last one wake the main process: */
-		if (g->nr_tasks_working == g->p.nr_tasks)
-			pthread_mutex_unlock(&g->startup_done_mutex);
-
-		pthread_mutex_unlock(&g->start_work_mutex);
-	}
-
-	gettimeofday(&start0, NULL);
-
-	start = stop = start0;
-	last_perturbance = start.tv_sec;
-
-	for (l = 0; l < g->p.nr_loops; l++) {
-		start = stop;
-
-		if (g->stop_work)
-			break;
-
-		val += do_work(global_data,  g->p.bytes_global,  process_nr, g->p.nr_proc,	l, val);
-		val += do_work(process_data, g->p.bytes_process, thread_nr,  g->p.nr_threads,	l, val);
-		val += do_work(thread_data,  g->p.bytes_thread,  0,          1,		l, val);
-
-		if (g->p.sleep_usecs) {
-			pthread_mutex_lock(td->process_lock);
-			usleep(g->p.sleep_usecs);
-			pthread_mutex_unlock(td->process_lock);
-		}
-		/*
-		 * Amount of work to be done under a process-global lock:
-		 */
-		if (g->p.bytes_process_locked) {
-			pthread_mutex_lock(td->process_lock);
-			val += do_work(process_data, g->p.bytes_process_locked, thread_nr,  g->p.nr_threads,	l, val);
-			pthread_mutex_unlock(td->process_lock);
-		}
-
-		work_done = g->p.bytes_global + g->p.bytes_process +
-			    g->p.bytes_process_locked + g->p.bytes_thread;
-
-		update_curr_cpu(task_nr, work_done);
-		bytes_done += work_done;
-
-		if (details < 0 && !g->p.perturb_secs && !g->p.measure_convergence && !g->p.nr_secs)
-			continue;
-
-		td->loops_done = l;
-
-		gettimeofday(&stop, NULL);
-
-		/* Check whether our max runtime timed out: */
-		if (g->p.nr_secs) {
-			timersub(&stop, &start0, &diff);
-			if (diff.tv_sec >= g->p.nr_secs) {
-				g->stop_work = true;
-				break;
-			}
-		}
-
-		/* Update the summary at most once per second: */
-		if (start.tv_sec == stop.tv_sec)
-			continue;
-
-		/*
-		 * Perturb the first task's equilibrium every g->p.perturb_secs seconds,
-		 * by migrating to CPU#0:
-		 */
-		if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs) {
-			cpu_set_t orig_mask;
-			int target_cpu;
-			int this_cpu;
-
-			last_perturbance = stop.tv_sec;
-
-			/*
-			 * Depending on where we are running, move into
-			 * the other half of the system, to create some
-			 * real disturbance:
-			 */
-			this_cpu = g->threads[task_nr].curr_cpu;
-			if (this_cpu < g->p.nr_cpus/2)
-				target_cpu = g->p.nr_cpus-1;
-			else
-				target_cpu = 0;
-
-			orig_mask = bind_to_cpu(target_cpu);
-
-			/* Here we are running on the target CPU already */
-			if (details >= 1)
-				printf(" (injecting perturbalance, moved to CPU#%d)\n", target_cpu);
-
-			bind_to_cpumask(orig_mask);
-		}
-
-		if (details >= 3) {
-			timersub(&stop, &start, &diff);
-			runtime_ns_max = diff.tv_sec * 1000000000;
-			runtime_ns_max += diff.tv_usec * 1000;
-
-			if (details >= 0) {
-				printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016lx]\n",
-					process_nr, thread_nr, runtime_ns_max / bytes_done, val);
-			}
-			fflush(stdout);
-		}
-		if (!last_task)
-			continue;
-
-		timersub(&stop, &start0, &diff);
-		runtime_ns_max = diff.tv_sec * 1000000000ULL;
-		runtime_ns_max += diff.tv_usec * 1000ULL;
-
-		show_summary(runtime_ns_max, l, &convergence);
-	}
-
-	gettimeofday(&stop, NULL);
-	timersub(&stop, &start0, &diff);
-	td->runtime_ns = diff.tv_sec * 1000000000ULL;
-	td->runtime_ns += diff.tv_usec * 1000ULL;
-
-	free_data(thread_data, g->p.bytes_thread);
-
-	pthread_mutex_lock(&g->stop_work_mutex);
-	g->bytes_done += bytes_done;
-	pthread_mutex_unlock(&g->stop_work_mutex);
-
-	return NULL;
-}
-
-/*
- * A worker process starts a couple of threads:
- */
-static void worker_process(int process_nr)
-{
-	pthread_mutex_t process_lock;
-	struct thread_data *td;
-	pthread_t *pthreads;
-	u8 *process_data;
-	int task_nr;
-	int ret;
-	int t;
-
-	pthread_mutex_init(&process_lock, NULL);
-	set_taskname("process %d", process_nr);
-
-	/*
-	 * Pick up the memory policy and the CPU binding of our first thread,
-	 * so that we initialize memory accordingly:
-	 */
-	task_nr = process_nr*g->p.nr_threads;
-	td = g->threads + task_nr;
-
-	bind_to_memnode(td->bind_node);
-	bind_to_cpumask(td->bind_cpumask);
-
-	pthreads = zalloc(g->p.nr_threads * sizeof(pthread_t));
-	process_data = setup_private_data(g->p.bytes_process);
-
-	if (g->p.show_details >= 3) {
-		printf(" # process %2d global mem: %p, process mem: %p\n",
-			process_nr, g->data, process_data);
-	}
-
-	for (t = 0; t < g->p.nr_threads; t++) {
-		task_nr = process_nr*g->p.nr_threads + t;
-		td = g->threads + task_nr;
-
-		td->process_data = process_data;
-		td->process_nr   = process_nr;
-		td->thread_nr    = t;
-		td->task_nr	 = task_nr;
-		td->val          = rand();
-		td->curr_cpu	 = -1;
-		td->process_lock = &process_lock;
-
-		ret = pthread_create(pthreads + t, NULL, worker_thread, td);
-		BUG_ON(ret);
-	}
-
-	for (t = 0; t < g->p.nr_threads; t++) {
-                ret = pthread_join(pthreads[t], NULL);
-		BUG_ON(ret);
-	}
-
-	free_data(process_data, g->p.bytes_process);
-	free(pthreads);
-}
-
-static void print_summary(void)
-{
-	if (g->p.show_details < 0)
-		return;
-
-	printf("\n ###\n");
-	printf(" # %d %s will execute (on %d nodes, %d CPUs):\n",
-		g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", g->p.nr_nodes, g->p.nr_cpus);
-	printf(" #      %5dx %5ldMB global  shared mem operations\n",
-			g->p.nr_loops, g->p.bytes_global/1024/1024);
-	printf(" #      %5dx %5ldMB process shared mem operations\n",
-			g->p.nr_loops, g->p.bytes_process/1024/1024);
-	printf(" #      %5dx %5ldMB thread  local  mem operations\n",
-			g->p.nr_loops, g->p.bytes_thread/1024/1024);
-
-	printf(" ###\n");
-
-	printf("\n ###\n"); fflush(stdout);
-}
-
-static void init_thread_data(void)
-{
-	ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
-	int t;
-
-	g->threads = zalloc_shared_data(size);
-
-	for (t = 0; t < g->p.nr_tasks; t++) {
-		struct thread_data *td = g->threads + t;
-		int cpu;
-
-		/* Allow all nodes by default: */
-		td->bind_node = -1;
-
-		/* Allow all CPUs by default: */
-		CPU_ZERO(&td->bind_cpumask);
-		for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
-			CPU_SET(cpu, &td->bind_cpumask);
-	}
-}
-
-static void deinit_thread_data(void)
-{
-	ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
-
-	free_data(g->threads, size);
-}
-
-static int init(void)
-{
-	g = (void *)alloc_data(sizeof(*g), MAP_SHARED, 1, 0, 0 /* THP */, 0);
-
-	/* Copy over options: */
-	g->p = p0;
-
-	g->p.nr_cpus = numa_num_configured_cpus();
-
-	g->p.nr_nodes = numa_max_node() + 1;
-
-	/* char array in count_process_nodes(): */
-	BUG_ON(g->p.nr_nodes > MAX_NR_NODES || g->p.nr_nodes < 0);
-
-	if (g->p.show_quiet && !g->p.show_details)
-		g->p.show_details = -1;
-
-	/* Some memory should be specified: */
-	if (!g->p.mb_global_str && !g->p.mb_proc_str && !g->p.mb_thread_str)
-		return -1;
-
-	if (g->p.mb_global_str) {
-		g->p.mb_global = atof(g->p.mb_global_str);
-		BUG_ON(g->p.mb_global < 0);
-	}
-
-	if (g->p.mb_proc_str) {
-		g->p.mb_proc = atof(g->p.mb_proc_str);
-		BUG_ON(g->p.mb_proc < 0);
-	}
-
-	if (g->p.mb_proc_locked_str) {
-		g->p.mb_proc_locked = atof(g->p.mb_proc_locked_str);
-		BUG_ON(g->p.mb_proc_locked < 0);
-		BUG_ON(g->p.mb_proc_locked > g->p.mb_proc);
-	}
-
-	if (g->p.mb_thread_str) {
-		g->p.mb_thread = atof(g->p.mb_thread_str);
-		BUG_ON(g->p.mb_thread < 0);
-	}
-
-	BUG_ON(g->p.nr_threads <= 0);
-	BUG_ON(g->p.nr_proc <= 0);
-
-	g->p.nr_tasks = g->p.nr_proc*g->p.nr_threads;
-
-	g->p.bytes_global		= g->p.mb_global	*1024L*1024L;
-	g->p.bytes_process		= g->p.mb_proc		*1024L*1024L;
-	g->p.bytes_process_locked	= g->p.mb_proc_locked	*1024L*1024L;
-	g->p.bytes_thread		= g->p.mb_thread	*1024L*1024L;
-
-	g->data = setup_shared_data(g->p.bytes_global);
-
-	/* Startup serialization: */
-	init_global_mutex(&g->start_work_mutex);
-	init_global_mutex(&g->startup_mutex);
-	init_global_mutex(&g->startup_done_mutex);
-	init_global_mutex(&g->stop_work_mutex);
-
-	init_thread_data();
-
-	tprintf("#\n");
-	parse_setup_cpu_list();
-	parse_setup_node_list();
-	tprintf("#\n");
-
-	print_summary();
-
-	return 0;
-}
-
-static void deinit(void)
-{
-	free_data(g->data, g->p.bytes_global);
-	g->data = NULL;
-
-	deinit_thread_data();
-
-	free_data(g, sizeof(*g));
-	g = NULL;
-}
-
-/*
- * Print a short or long result, depending on the verbosity setting:
- */
-static void print_res(const char *name, double val,
-		      const char *txt_unit, const char *txt_short, const char *txt_long)
-{
-	if (!name)
-		name = "main,";
-
-	if (g->p.show_quiet)
-		printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short);
-	else
-		printf(" %14.3f %s\n", val, txt_long);
-}
-
-static int __bench_numa(const char *name)
-{
-	struct timeval start, stop, diff;
-	u64 runtime_ns_min, runtime_ns_sum;
-	pid_t *pids, pid, wpid;
-	double delta_runtime;
-	double runtime_avg;
-	double runtime_sec_max;
-	double runtime_sec_min;
-	int wait_stat;
-	double bytes;
-	int i, t;
-
-	if (init())
-		return -1;
-
-	pids = zalloc(g->p.nr_proc * sizeof(*pids));
-	pid = -1;
-
-	/* All threads try to acquire it, this way we can wait for them to start up: */
-	pthread_mutex_lock(&g->start_work_mutex);
-
-	if (g->p.serialize_startup) {
-		tprintf(" #\n");
-		tprintf(" # Startup synchronization: ..."); fflush(stdout);
-	}
-
-	gettimeofday(&start, NULL);
-
-	for (i = 0; i < g->p.nr_proc; i++) {
-		pid = fork();
-		dprintf(" # process %2d: PID %d\n", i, pid);
-
-		BUG_ON(pid < 0);
-		if (!pid) {
-			/* Child process: */
-			worker_process(i);
-
-			exit(0);
-		}
-		pids[i] = pid;
-
-	}
-	/* Wait for all the threads to start up: */
-	while (g->nr_tasks_started != g->p.nr_tasks)
-		usleep(1000);
-
-	BUG_ON(g->nr_tasks_started != g->p.nr_tasks);
-
-	if (g->p.serialize_startup) {
-		double startup_sec;
-
-		pthread_mutex_lock(&g->startup_done_mutex);
-
-		/* This will start all threads: */
-		pthread_mutex_unlock(&g->start_work_mutex);
-
-		/* This mutex is locked - the last started thread will wake us: */
-		pthread_mutex_lock(&g->startup_done_mutex);
-
-		gettimeofday(&stop, NULL);
-
-		timersub(&stop, &start, &diff);
-
-		startup_sec = diff.tv_sec * 1000000000.0;
-		startup_sec += diff.tv_usec * 1000.0;
-		startup_sec /= 1e9;
-
-		tprintf(" threads initialized in %.6f seconds.\n", startup_sec);
-		tprintf(" #\n");
-
-		start = stop;
-		pthread_mutex_unlock(&g->startup_done_mutex);
-	} else {
-		gettimeofday(&start, NULL);
-	}
-
-	/* Parent process: */
-
-
-	for (i = 0; i < g->p.nr_proc; i++) {
-		wpid = waitpid(pids[i], &wait_stat, 0);
-		BUG_ON(wpid < 0);
-		BUG_ON(!WIFEXITED(wait_stat));
-
-	}
-
-	runtime_ns_sum = 0;
-	runtime_ns_min = -1LL;
-
-	for (t = 0; t < g->p.nr_tasks; t++) {
-		u64 thread_runtime_ns = g->threads[t].runtime_ns;
-
-		runtime_ns_sum += thread_runtime_ns;
-		runtime_ns_min = min(thread_runtime_ns, runtime_ns_min);
-	}
-
-	gettimeofday(&stop, NULL);
-	timersub(&stop, &start, &diff);
-
-	BUG_ON(bench_format != BENCH_FORMAT_DEFAULT);
-
-	tprintf("\n ###\n");
-	tprintf("\n");
-
-	runtime_sec_max = diff.tv_sec * 1000000000.0;
-	runtime_sec_max += diff.tv_usec * 1000.0;
-	runtime_sec_max /= 1e9;
-
-	runtime_sec_min = runtime_ns_min/1e9;
-
-	bytes = g->bytes_done;
-	runtime_avg = (double)runtime_ns_sum / g->p.nr_tasks / 1e9;
-
-	if (g->p.measure_convergence) {
-		print_res(name, runtime_sec_max,
-			"secs,", "NUMA-convergence-latency", "secs latency to NUMA-converge");
-	}
-
-	print_res(name, runtime_sec_max,
-		"secs,", "runtime-max/thread",	"secs slowest (max) thread-runtime");
-
-	print_res(name, runtime_sec_min,
-		"secs,", "runtime-min/thread",	"secs fastest (min) thread-runtime");
-
-	print_res(name, runtime_avg,
-		"secs,", "runtime-avg/thread",	"secs average thread-runtime");
-
-	delta_runtime = (runtime_sec_max - runtime_sec_min)/2.0;
-	print_res(name, delta_runtime / runtime_sec_max * 100.0,
-		"%,", "spread-runtime/thread",	"% difference between max/avg runtime");
-
-	print_res(name, bytes / g->p.nr_tasks / 1e9,
-		"GB,", "data/thread",		"GB data processed, per thread");
-
-	print_res(name, bytes / 1e9,
-		"GB,", "data-total",		"GB data processed, total");
-
-	print_res(name, runtime_sec_max * 1e9 / (bytes / g->p.nr_tasks),
-		"nsecs,", "runtime/byte/thread","nsecs/byte/thread runtime");
-
-	print_res(name, bytes / g->p.nr_tasks / 1e9 / runtime_sec_max,
-		"GB/sec,", "thread-speed",	"GB/sec/thread speed");
-
-	print_res(name, bytes / runtime_sec_max / 1e9,
-		"GB/sec,", "total-speed",	"GB/sec total speed");
-
-	free(pids);
-
-	deinit();
-
-	return 0;
-}
-
-#define MAX_ARGS 50
-
-static int command_size(const char **argv)
-{
-	int size = 0;
-
-	while (*argv) {
-		size++;
-		argv++;
-	}
-
-	BUG_ON(size >= MAX_ARGS);
-
-	return size;
-}
-
-static void init_params(struct params *p, const char *name, int argc, const char **argv)
-{
-	int i;
-
-	printf("\n # Running %s \"perf bench numa", name);
-
-	for (i = 0; i < argc; i++)
-		printf(" %s", argv[i]);
-
-	printf("\"\n");
-
-	memset(p, 0, sizeof(*p));
-
-	/* Initialize nonzero defaults: */
-
-	p->serialize_startup		= 1;
-	p->data_reads			= true;
-	p->data_writes			= true;
-	p->data_backwards		= true;
-	p->data_rand_walk		= true;
-	p->nr_loops			= -1;
-	p->init_random			= true;
-}
-
-static int run_bench_numa(const char *name, const char **argv)
-{
-	int argc = command_size(argv);
-
-	init_params(&p0, name, argc, argv);
-	argc = parse_options(argc, argv, options, bench_numa_usage, 0);
-	if (argc)
-		goto err;
-
-	if (__bench_numa(name))
-		goto err;
-
-	return 0;
-
-err:
-	usage_with_options(numa_usage, options);
-	return -1;
-}
-
-#define OPT_BW_RAM		"-s",  "20", "-zZq",    "--thp", " 1", "--no-data_rand_walk"
-#define OPT_BW_RAM_NOTHP	OPT_BW_RAM,		"--thp", "-1"
-
-#define OPT_CONV		"-s", "100", "-zZ0qcm", "--thp", " 1"
-#define OPT_CONV_NOTHP		OPT_CONV,		"--thp", "-1"
-
-#define OPT_BW			"-s",  "20", "-zZ0q",   "--thp", " 1"
-#define OPT_BW_NOTHP		OPT_BW,			"--thp", "-1"
-
-/*
- * The built-in test-suite executed by "perf bench numa -a".
- *
- * (A minimum of 4 nodes and 16 GB of RAM is recommended.)
- */
-static const char *tests[][MAX_ARGS] = {
-   /* Basic single-stream NUMA bandwidth measurements: */
-   { "RAM-bw-local,",	  "mem",  "-p",  "1",  "-t",  "1", "-P", "1024",
-			  "-C" ,   "0", "-M",   "0", OPT_BW_RAM },
-   { "RAM-bw-local-NOTHP,",
-			  "mem",  "-p",  "1",  "-t",  "1", "-P", "1024",
-			  "-C" ,   "0", "-M",   "0", OPT_BW_RAM_NOTHP },
-   { "RAM-bw-remote,",	  "mem",  "-p",  "1",  "-t",  "1", "-P", "1024",
-			  "-C" ,   "0", "-M",   "1", OPT_BW_RAM },
-
-   /* 2-stream NUMA bandwidth measurements: */
-   { "RAM-bw-local-2x,",  "mem",  "-p",  "2",  "-t",  "1", "-P", "1024",
-			   "-C", "0,2", "-M", "0x2", OPT_BW_RAM },
-   { "RAM-bw-remote-2x,", "mem",  "-p",  "2",  "-t",  "1", "-P", "1024",
-		 	   "-C", "0,2", "-M", "1x2", OPT_BW_RAM },
-
-   /* Cross-stream NUMA bandwidth measurement: */
-   { "RAM-bw-cross,",     "mem",  "-p",  "2",  "-t",  "1", "-P", "1024",
-		 	   "-C", "0,8", "-M", "1,0", OPT_BW_RAM },
-
-   /* Convergence latency measurements: */
-   { " 1x3-convergence,", "mem",  "-p",  "1", "-t",  "3", "-P",  "512", OPT_CONV },
-   { " 1x4-convergence,", "mem",  "-p",  "1", "-t",  "4", "-P",  "512", OPT_CONV },
-   { " 1x6-convergence,", "mem",  "-p",  "1", "-t",  "6", "-P", "1020", OPT_CONV },
-   { " 2x3-convergence,", "mem",  "-p",  "3", "-t",  "3", "-P", "1020", OPT_CONV },
-   { " 3x3-convergence,", "mem",  "-p",  "3", "-t",  "3", "-P", "1020", OPT_CONV },
-   { " 4x4-convergence,", "mem",  "-p",  "4", "-t",  "4", "-P",  "512", OPT_CONV },
-   { " 4x4-convergence-NOTHP,",
-			  "mem",  "-p",  "4", "-t",  "4", "-P",  "512", OPT_CONV_NOTHP },
-   { " 4x6-convergence,", "mem",  "-p",  "4", "-t",  "6", "-P", "1020", OPT_CONV },
-   { " 4x8-convergence,", "mem",  "-p",  "4", "-t",  "8", "-P",  "512", OPT_CONV },
-   { " 8x4-convergence,", "mem",  "-p",  "8", "-t",  "4", "-P",  "512", OPT_CONV },
-   { " 8x4-convergence-NOTHP,",
-			  "mem",  "-p",  "8", "-t",  "4", "-P",  "512", OPT_CONV_NOTHP },
-   { " 3x1-convergence,", "mem",  "-p",  "3", "-t",  "1", "-P",  "512", OPT_CONV },
-   { " 4x1-convergence,", "mem",  "-p",  "4", "-t",  "1", "-P",  "512", OPT_CONV },
-   { " 8x1-convergence,", "mem",  "-p",  "8", "-t",  "1", "-P",  "512", OPT_CONV },
-   { "16x1-convergence,", "mem",  "-p", "16", "-t",  "1", "-P",  "256", OPT_CONV },
-   { "32x1-convergence,", "mem",  "-p", "32", "-t",  "1", "-P",  "128", OPT_CONV },
-
-   /* Various NUMA process/thread layout bandwidth measurements: */
-   { " 2x1-bw-process,",  "mem",  "-p",  "2", "-t",  "1", "-P", "1024", OPT_BW },
-   { " 3x1-bw-process,",  "mem",  "-p",  "3", "-t",  "1", "-P", "1024", OPT_BW },
-   { " 4x1-bw-process,",  "mem",  "-p",  "4", "-t",  "1", "-P", "1024", OPT_BW },
-   { " 8x1-bw-process,",  "mem",  "-p",  "8", "-t",  "1", "-P", " 512", OPT_BW },
-   { " 8x1-bw-process-NOTHP,",
-			  "mem",  "-p",  "8", "-t",  "1", "-P", " 512", OPT_BW_NOTHP },
-   { "16x1-bw-process,",  "mem",  "-p", "16", "-t",  "1", "-P",  "256", OPT_BW },
-
-   { " 4x1-bw-thread,",	  "mem",  "-p",  "1", "-t",  "4", "-T",  "256", OPT_BW },
-   { " 8x1-bw-thread,",	  "mem",  "-p",  "1", "-t",  "8", "-T",  "256", OPT_BW },
-   { "16x1-bw-thread,",   "mem",  "-p",  "1", "-t", "16", "-T",  "128", OPT_BW },
-   { "32x1-bw-thread,",   "mem",  "-p",  "1", "-t", "32", "-T",   "64", OPT_BW },
-
-   { " 2x3-bw-thread,",	  "mem",  "-p",  "2", "-t",  "3", "-P",  "512", OPT_BW },
-   { " 4x4-bw-thread,",	  "mem",  "-p",  "4", "-t",  "4", "-P",  "512", OPT_BW },
-   { " 4x6-bw-thread,",	  "mem",  "-p",  "4", "-t",  "6", "-P",  "512", OPT_BW },
-   { " 4x8-bw-thread,",	  "mem",  "-p",  "4", "-t",  "8", "-P",  "512", OPT_BW },
-   { " 4x8-bw-thread-NOTHP,",
-			  "mem",  "-p",  "4", "-t",  "8", "-P",  "512", OPT_BW_NOTHP },
-   { " 3x3-bw-thread,",	  "mem",  "-p",  "3", "-t",  "3", "-P",  "512", OPT_BW },
-   { " 5x5-bw-thread,",	  "mem",  "-p",  "5", "-t",  "5", "-P",  "512", OPT_BW },
-
-   { "2x16-bw-thread,",   "mem",  "-p",  "2", "-t", "16", "-P",  "512", OPT_BW },
-   { "1x32-bw-thread,",   "mem",  "-p",  "1", "-t", "32", "-P", "2048", OPT_BW },
-
-   { "numa02-bw,",	  "mem",  "-p",  "1", "-t", "32", "-T",   "32", OPT_BW },
-   { "numa02-bw-NOTHP,",  "mem",  "-p",  "1", "-t", "32", "-T",   "32", OPT_BW_NOTHP },
-   { "numa01-bw-thread,", "mem",  "-p",  "2", "-t", "16", "-T",  "192", OPT_BW },
-   { "numa01-bw-thread-NOTHP,",
-			  "mem",  "-p",  "2", "-t", "16", "-T",  "192", OPT_BW_NOTHP },
-};
-
-static int bench_all(void)
-{
-	int nr = ARRAY_SIZE(tests);
-	int ret;
-	int i;
-
-	ret = system("echo ' #'; echo ' # Running test on: '$(uname -a); echo ' #'");
-	BUG_ON(ret < 0);
-
-	for (i = 0; i < nr; i++) {
-		if (run_bench_numa(tests[i][0], tests[i] + 1))
-			return -1;
-	}
-
-	printf("\n");
-
-	return 0;
-}
-
-int bench_numa(int argc, const char **argv, const char *prefix __maybe_unused)
-{
-	init_params(&p0, "main,", argc, argv);
-	argc = parse_options(argc, argv, options, bench_numa_usage, 0);
-	if (argc)
-		goto err;
-
-	if (p0.run_all)
-		return bench_all();
-
-	if (__bench_numa(NULL))
-		goto err;
-
-	return 0;
-
-err:
-	usage_with_options(numa_usage, options);
-	return -1;
-}
diff --git a/trunk/tools/perf/builtin-annotate.c b/trunk/tools/perf/builtin-annotate.c
index 2e6961ea3184..dc870cf31b79 100644
--- a/trunk/tools/perf/builtin-annotate.c
+++ b/trunk/tools/perf/builtin-annotate.c
@@ -34,10 +34,9 @@
 
 struct perf_annotate {
 	struct perf_tool tool;
-	bool	   force, use_tui, use_stdio, use_gtk;
+	bool	   force, use_tui, use_stdio;
 	bool	   full_paths;
 	bool	   print_line;
-	bool	   skip_missing;
 	const char *sym_hist_filter;
 	const char *cpu_list;
 	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
@@ -139,22 +138,9 @@ static void hists__find_annotations(struct hists *self, int evidx,
 			continue;
 		}
 
-		if (use_browser == 2) {
-			int ret;
-
-			ret = hist_entry__gtk_annotate(he, evidx, NULL);
-			if (!ret || !ann->skip_missing)
-				return;
-
-			/* skip missing symbols */
-			nd = rb_next(nd);
-		} else if (use_browser == 1) {
+		if (use_browser > 0) {
 			key = hist_entry__tui_annotate(he, evidx, NULL);
 			switch (key) {
-			case -1:
-				if (!ann->skip_missing)
-					return;
-				/* fall through */
 			case K_RIGHT:
 				next = rb_next(nd);
 				break;
@@ -238,10 +224,6 @@ static int __cmd_annotate(struct perf_annotate *ann)
 		ui__error("The %s file has no samples!\n", session->filename);
 		goto out_delete;
 	}
-
-	if (use_browser == 2)
-		perf_gtk__show_annotations();
-
 out_delete:
 	/*
 	 * Speed up the exit process, for large files this can
@@ -288,7 +270,6 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
 		    "be more verbose (show symbol address, etc)"),
 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 		    "dump raw trace in ASCII"),
-	OPT_BOOLEAN(0, "gtk", &annotate.use_gtk, "Use the GTK interface"),
 	OPT_BOOLEAN(0, "tui", &annotate.use_tui, "Use the TUI interface"),
 	OPT_BOOLEAN(0, "stdio", &annotate.use_stdio, "Use the stdio interface"),
 	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
@@ -299,8 +280,6 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
 		    "print matching source lines (may be slow)"),
 	OPT_BOOLEAN('P', "full-paths", &annotate.full_paths,
 		    "Don't shorten the displayed pathnames"),
-	OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing,
-		    "Skip symbols that cannot be annotated"),
 	OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"),
 	OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
 		   "Look for files with symbols relative to this directory"),
@@ -321,8 +300,6 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
 		use_browser = 0;
 	else if (annotate.use_tui)
 		use_browser = 1;
-	else if (annotate.use_gtk)
-		use_browser = 2;
 
 	setup_browser(true);
 
@@ -332,8 +309,7 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (symbol__init() < 0)
 		return -1;
 
-	if (setup_sorting() < 0)
-		usage_with_options(annotate_usage, options);
+	setup_sorting(annotate_usage, options);
 
 	if (argc) {
 		/*
diff --git a/trunk/tools/perf/builtin-bench.c b/trunk/tools/perf/builtin-bench.c
index 77298bf892b8..cae9a5fd2ecf 100644
--- a/trunk/tools/perf/builtin-bench.c
+++ b/trunk/tools/perf/builtin-bench.c
@@ -35,18 +35,6 @@ struct bench_suite {
 /* sentinel: easy for help */
 #define suite_all { "all", "Test all benchmark suites", NULL }
 
-#ifdef LIBNUMA_SUPPORT
-static struct bench_suite numa_suites[] = {
-	{ "mem",
-	  "Benchmark for NUMA workloads",
-	  bench_numa },
-	suite_all,
-	{ NULL,
-	  NULL,
-	  NULL                  }
-};
-#endif
-
 static struct bench_suite sched_suites[] = {
 	{ "messaging",
 	  "Benchmark for scheduler and IPC mechanisms",
@@ -80,11 +68,6 @@ struct bench_subsys {
 };
 
 static struct bench_subsys subsystems[] = {
-#ifdef LIBNUMA_SUPPORT
-	{ "numa",
-	  "NUMA scheduling and MM behavior",
-	  numa_suites },
-#endif
 	{ "sched",
 	  "scheduler and IPC mechanism",
 	  sched_suites },
@@ -176,7 +159,6 @@ static void all_suite(struct bench_subsys *subsys)	  /* FROM HERE */
 		printf("# Running %s/%s benchmark...\n",
 		       subsys->name,
 		       suites[i].name);
-		fflush(stdout);
 
 		argv[1] = suites[i].name;
 		suites[i].fn(1, argv, NULL);
@@ -243,7 +225,6 @@ int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused)
 				printf("# Running %s/%s benchmark...\n",
 				       subsystems[i].name,
 				       subsystems[i].suites[j].name);
-			fflush(stdout);
 			status = subsystems[i].suites[j].fn(argc - 1,
 							    argv + 1, prefix);
 			goto end;
diff --git a/trunk/tools/perf/builtin-buildid-cache.c b/trunk/tools/perf/builtin-buildid-cache.c
index c96c8fa38243..fae8b250b2ca 100644
--- a/trunk/tools/perf/builtin-buildid-cache.c
+++ b/trunk/tools/perf/builtin-buildid-cache.c
@@ -14,7 +14,6 @@
 #include "util/parse-options.h"
 #include "util/strlist.h"
 #include "util/build-id.h"
-#include "util/session.h"
 #include "util/symbol.h"
 
 static int build_id_cache__add_file(const char *filename, const char *debugdir)
@@ -59,89 +58,19 @@ static int build_id_cache__remove_file(const char *filename,
 	return err;
 }
 
-static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused)
-{
-	char filename[PATH_MAX];
-	u8 build_id[BUILD_ID_SIZE];
-
-	if (dso__build_id_filename(dso, filename, sizeof(filename)) &&
-	    filename__read_build_id(filename, build_id,
-				    sizeof(build_id)) != sizeof(build_id)) {
-		if (errno == ENOENT)
-			return false;
-
-		pr_warning("Problems with %s file, consider removing it from the cache\n", 
-			   filename);
-	} else if (memcmp(dso->build_id, build_id, sizeof(dso->build_id))) {
-		pr_warning("Problems with %s file, consider removing it from the cache\n", 
-			   filename);
-	}
-
-	return true;
-}
-
-static int build_id_cache__fprintf_missing(const char *filename, bool force, FILE *fp)
-{
-	struct perf_session *session = perf_session__new(filename, O_RDONLY,
-							 force, false, NULL);
-	if (session == NULL)
-		return -1;
-
-	perf_session__fprintf_dsos_buildid(session, fp, dso__missing_buildid_cache, 0);
-	perf_session__delete(session);
-
-	return 0;
-}
-
-static int build_id_cache__update_file(const char *filename,
-				       const char *debugdir)
-{
-	u8 build_id[BUILD_ID_SIZE];
-	char sbuild_id[BUILD_ID_SIZE * 2 + 1];
-
-	int err;
-
-	if (filename__read_build_id(filename, &build_id, sizeof(build_id)) < 0) {
-		pr_debug("Couldn't read a build-id in %s\n", filename);
-		return -1;
-	}
-
-	build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
-	err = build_id_cache__remove_s(sbuild_id, debugdir);
-	if (!err) {
-		err = build_id_cache__add_s(sbuild_id, debugdir, filename,
-					    false, false);
-	}
-	if (verbose)
-		pr_info("Updating %s %s: %s\n", sbuild_id, filename,
-			err ? "FAIL" : "Ok");
-
-	return err;
-}
-
 int cmd_buildid_cache(int argc, const char **argv,
 		      const char *prefix __maybe_unused)
 {
 	struct strlist *list;
 	struct str_node *pos;
-	int ret = 0;
-	bool force = false;
 	char debugdir[PATH_MAX];
 	char const *add_name_list_str = NULL,
-		   *remove_name_list_str = NULL,
-		   *missing_filename = NULL,
-		   *update_name_list_str = NULL;
-
+		   *remove_name_list_str = NULL;
 	const struct option buildid_cache_options[] = {
 	OPT_STRING('a', "add", &add_name_list_str,
 		   "file list", "file(s) to add"),
 	OPT_STRING('r', "remove", &remove_name_list_str, "file list",
 		    "file(s) to remove"),
-	OPT_STRING('M', "missing", &missing_filename, "file",
-		   "to find missing build ids in the cache"),
-	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
-	OPT_STRING('u', "update", &update_name_list_str, "file list",
-		    "file(s) to update"),
 	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
 	OPT_END()
 	};
@@ -196,26 +125,5 @@ int cmd_buildid_cache(int argc, const char **argv,
 		}
 	}
 
-	if (missing_filename)
-		ret = build_id_cache__fprintf_missing(missing_filename, force, stdout);
-
-	if (update_name_list_str) {
-		list = strlist__new(true, update_name_list_str);
-		if (list) {
-			strlist__for_each(pos, list)
-				if (build_id_cache__update_file(pos->s, debugdir)) {
-					if (errno == ENOENT) {
-						pr_debug("%s wasn't in the cache\n",
-							 pos->s);
-						continue;
-					}
-					pr_warning("Couldn't update %s: %s\n",
-						   pos->s, strerror(errno));
-				}
-
-			strlist__delete(list);
-		}
-	}
-
-	return ret;
+	return 0;
 }
diff --git a/trunk/tools/perf/builtin-buildid-list.c b/trunk/tools/perf/builtin-buildid-list.c
index e74366a13218..a82d99fec83e 100644
--- a/trunk/tools/perf/builtin-buildid-list.c
+++ b/trunk/tools/perf/builtin-buildid-list.c
@@ -44,26 +44,23 @@ static int filename__fprintf_build_id(const char *name, FILE *fp)
 	return fprintf(fp, "%s\n", sbuild_id);
 }
 
-static bool dso__skip_buildid(struct dso *dso, int with_hits)
-{
-	return with_hits && !dso->hit;
-}
-
 static int perf_session__list_build_ids(bool force, bool with_hits)
 {
 	struct perf_session *session;
 
 	symbol__elf_init();
-	/*
-	 * See if this is an ELF file first:
-	 */
-	if (filename__fprintf_build_id(input_name, stdout))
-		goto out;
 
 	session = perf_session__new(input_name, O_RDONLY, force, false,
 				    &build_id__mark_dso_hit_ops);
 	if (session == NULL)
 		return -1;
+
+	/*
+	 * See if this is an ELF file first:
+	 */
+	if (filename__fprintf_build_id(session->filename, stdout))
+		goto out;
+
 	/*
 	 * in pipe-mode, the only way to get the buildids is to parse
 	 * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID
@@ -71,9 +68,9 @@ static int perf_session__list_build_ids(bool force, bool with_hits)
 	if (with_hits || session->fd_pipe)
 		perf_session__process_events(session, &build_id__mark_dso_hit_ops);
 
-	perf_session__fprintf_dsos_buildid(session, stdout, dso__skip_buildid, with_hits);
-	perf_session__delete(session);
+	perf_session__fprintf_dsos_buildid(session, stdout, with_hits);
 out:
+	perf_session__delete(session);
 	return 0;
 }
 
diff --git a/trunk/tools/perf/builtin-diff.c b/trunk/tools/perf/builtin-diff.c
index d207a97a2db1..93b852f8a5d5 100644
--- a/trunk/tools/perf/builtin-diff.c
+++ b/trunk/tools/perf/builtin-diff.c
@@ -23,6 +23,7 @@ static char const *input_old = "perf.data.old",
 		  *input_new = "perf.data";
 static char	  diff__default_sort_order[] = "dso,symbol";
 static bool  force;
+static bool show_displacement;
 static bool show_period;
 static bool show_formula;
 static bool show_baseline_only;
@@ -145,47 +146,58 @@ static int setup_compute(const struct option *opt, const char *str,
 	return -EINVAL;
 }
 
-double perf_diff__period_percent(struct hist_entry *he, u64 period)
+static double get_period_percent(struct hist_entry *he, u64 period)
 {
 	u64 total = he->hists->stats.total_period;
 	return (period * 100.0) / total;
 }
 
-double perf_diff__compute_delta(struct hist_entry *he, struct hist_entry *pair)
+double perf_diff__compute_delta(struct hist_entry *he)
 {
-	double new_percent = perf_diff__period_percent(he, he->stat.period);
-	double old_percent = perf_diff__period_percent(pair, pair->stat.period);
+	struct hist_entry *pair = hist_entry__next_pair(he);
+	double new_percent = get_period_percent(he, he->stat.period);
+	double old_percent = pair ? get_period_percent(pair, pair->stat.period) : 0.0;
 
 	he->diff.period_ratio_delta = new_percent - old_percent;
 	he->diff.computed = true;
 	return he->diff.period_ratio_delta;
 }
 
-double perf_diff__compute_ratio(struct hist_entry *he, struct hist_entry *pair)
+double perf_diff__compute_ratio(struct hist_entry *he)
 {
+	struct hist_entry *pair = hist_entry__next_pair(he);
 	double new_period = he->stat.period;
-	double old_period = pair->stat.period;
+	double old_period = pair ? pair->stat.period : 0;
 
 	he->diff.computed = true;
-	he->diff.period_ratio = new_period / old_period;
+	he->diff.period_ratio = pair ? (new_period / old_period) : 0;
 	return he->diff.period_ratio;
 }
 
-s64 perf_diff__compute_wdiff(struct hist_entry *he, struct hist_entry *pair)
+s64 perf_diff__compute_wdiff(struct hist_entry *he)
 {
+	struct hist_entry *pair = hist_entry__next_pair(he);
 	u64 new_period = he->stat.period;
-	u64 old_period = pair->stat.period;
+	u64 old_period = pair ? pair->stat.period : 0;
 
 	he->diff.computed = true;
-	he->diff.wdiff = new_period * compute_wdiff_w2 -
-			 old_period * compute_wdiff_w1;
+
+	if (!pair)
+		he->diff.wdiff = 0;
+	else
+		he->diff.wdiff = new_period * compute_wdiff_w2 -
+				 old_period * compute_wdiff_w1;
 
 	return he->diff.wdiff;
 }
 
-static int formula_delta(struct hist_entry *he, struct hist_entry *pair,
-			 char *buf, size_t size)
+static int formula_delta(struct hist_entry *he, char *buf, size_t size)
 {
+	struct hist_entry *pair = hist_entry__next_pair(he);
+
+	if (!pair)
+		return -1;
+
 	return scnprintf(buf, size,
 			 "(%" PRIu64 " * 100 / %" PRIu64 ") - "
 			 "(%" PRIu64 " * 100 / %" PRIu64 ")",
@@ -193,36 +205,41 @@ static int formula_delta(struct hist_entry *he, struct hist_entry *pair,
 			  pair->stat.period, pair->hists->stats.total_period);
 }
 
-static int formula_ratio(struct hist_entry *he, struct hist_entry *pair,
-			 char *buf, size_t size)
+static int formula_ratio(struct hist_entry *he, char *buf, size_t size)
 {
+	struct hist_entry *pair = hist_entry__next_pair(he);
 	double new_period = he->stat.period;
-	double old_period = pair->stat.period;
+	double old_period = pair ? pair->stat.period : 0;
+
+	if (!pair)
+		return -1;
 
 	return scnprintf(buf, size, "%.0F / %.0F", new_period, old_period);
 }
 
-static int formula_wdiff(struct hist_entry *he, struct hist_entry *pair,
-			 char *buf, size_t size)
+static int formula_wdiff(struct hist_entry *he, char *buf, size_t size)
 {
+	struct hist_entry *pair = hist_entry__next_pair(he);
 	u64 new_period = he->stat.period;
-	u64 old_period = pair->stat.period;
+	u64 old_period = pair ? pair->stat.period : 0;
+
+	if (!pair)
+		return -1;
 
 	return scnprintf(buf, size,
 		  "(%" PRIu64 " * " "%" PRId64 ") - (%" PRIu64 " * " "%" PRId64 ")",
 		  new_period, compute_wdiff_w2, old_period, compute_wdiff_w1);
 }
 
-int perf_diff__formula(struct hist_entry *he, struct hist_entry *pair,
-		       char *buf, size_t size)
+int perf_diff__formula(char *buf, size_t size, struct hist_entry *he)
 {
 	switch (compute) {
 	case COMPUTE_DELTA:
-		return formula_delta(he, pair, buf, size);
+		return formula_delta(he, buf, size);
 	case COMPUTE_RATIO:
-		return formula_ratio(he, pair, buf, size);
+		return formula_ratio(he, buf, size);
 	case COMPUTE_WEIGHTED_DIFF:
-		return formula_wdiff(he, pair, buf, size);
+		return formula_wdiff(he, buf, size);
 	default:
 		BUG_ON(1);
 	}
@@ -275,6 +292,48 @@ static struct perf_tool tool = {
 	.ordering_requires_timestamps = true,
 };
 
+static void insert_hist_entry_by_name(struct rb_root *root,
+				      struct hist_entry *he)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+		if (hist_entry__cmp(he, iter) < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, root);
+}
+
+static void hists__name_resort(struct hists *self, bool sort)
+{
+	unsigned long position = 1;
+	struct rb_root tmp = RB_ROOT;
+	struct rb_node *next = rb_first(&self->entries);
+
+	while (next != NULL) {
+		struct hist_entry *n = rb_entry(next, struct hist_entry, rb_node);
+
+		next = rb_next(&n->rb_node);
+		n->position = position++;
+
+		if (sort) {
+			rb_erase(&n->rb_node, &self->entries);
+			insert_hist_entry_by_name(&tmp, n);
+		}
+	}
+
+	if (sort)
+		self->entries = tmp;
+}
+
 static struct perf_evsel *evsel_match(struct perf_evsel *evsel,
 				      struct perf_evlist *evlist)
 {
@@ -287,34 +346,34 @@ static struct perf_evsel *evsel_match(struct perf_evsel *evsel,
 	return NULL;
 }
 
-static void perf_evlist__collapse_resort(struct perf_evlist *evlist)
+static void perf_evlist__resort_hists(struct perf_evlist *evlist, bool name)
 {
 	struct perf_evsel *evsel;
 
 	list_for_each_entry(evsel, &evlist->entries, node) {
 		struct hists *hists = &evsel->hists;
 
-		hists__collapse_resort(hists);
+		hists__output_resort(hists);
+
+		/*
+		 * The hists__name_resort only sets possition
+		 * if name is false.
+		 */
+		if (name || ((!name) && show_displacement))
+			hists__name_resort(hists, name);
 	}
 }
 
 static void hists__baseline_only(struct hists *hists)
 {
-	struct rb_root *root;
-	struct rb_node *next;
-
-	if (sort__need_collapse)
-		root = &hists->entries_collapsed;
-	else
-		root = hists->entries_in;
+	struct rb_node *next = rb_first(&hists->entries);
 
-	next = rb_first(root);
 	while (next != NULL) {
-		struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node_in);
+		struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node);
 
-		next = rb_next(&he->rb_node_in);
+		next = rb_next(&he->rb_node);
 		if (!hist_entry__next_pair(he)) {
-			rb_erase(&he->rb_node_in, root);
+			rb_erase(&he->rb_node, &hists->entries);
 			hist_entry__free(he);
 		}
 	}
@@ -326,21 +385,18 @@ static void hists__precompute(struct hists *hists)
 
 	while (next != NULL) {
 		struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node);
-		struct hist_entry *pair = hist_entry__next_pair(he);
 
 		next = rb_next(&he->rb_node);
-		if (!pair)
-			continue;
 
 		switch (compute) {
 		case COMPUTE_DELTA:
-			perf_diff__compute_delta(he, pair);
+			perf_diff__compute_delta(he);
 			break;
 		case COMPUTE_RATIO:
-			perf_diff__compute_ratio(he, pair);
+			perf_diff__compute_ratio(he);
 			break;
 		case COMPUTE_WEIGHTED_DIFF:
-			perf_diff__compute_wdiff(he, pair);
+			perf_diff__compute_wdiff(he);
 			break;
 		default:
 			BUG_ON(1);
@@ -414,30 +470,19 @@ static void insert_hist_entry_by_compute(struct rb_root *root,
 
 static void hists__compute_resort(struct hists *hists)
 {
-	struct rb_root *root;
-	struct rb_node *next;
-
-	if (sort__need_collapse)
-		root = &hists->entries_collapsed;
-	else
-		root = hists->entries_in;
-
-	hists->entries = RB_ROOT;
-	next = rb_first(root);
-
-	hists->nr_entries = 0;
-	hists->stats.total_period = 0;
-	hists__reset_col_len(hists);
+	struct rb_root tmp = RB_ROOT;
+	struct rb_node *next = rb_first(&hists->entries);
 
 	while (next != NULL) {
-		struct hist_entry *he;
+		struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node);
 
-		he = rb_entry(next, struct hist_entry, rb_node_in);
-		next = rb_next(&he->rb_node_in);
+		next = rb_next(&he->rb_node);
 
-		insert_hist_entry_by_compute(&hists->entries, he, compute);
-		hists__inc_nr_entries(hists, he);
+		rb_erase(&he->rb_node, &hists->entries);
+		insert_hist_entry_by_compute(&tmp, he, compute);
 	}
+
+	hists->entries = tmp;
 }
 
 static void hists__process(struct hists *old, struct hists *new)
@@ -452,8 +497,6 @@ static void hists__process(struct hists *old, struct hists *new)
 	if (sort_compute) {
 		hists__precompute(new);
 		hists__compute_resort(new);
-	} else {
-		hists__output_resort(new);
 	}
 
 	hists__fprintf(new, true, 0, 0, stdout);
@@ -485,8 +528,8 @@ static int __cmd_diff(void)
 	evlist_old = older->evlist;
 	evlist_new = newer->evlist;
 
-	perf_evlist__collapse_resort(evlist_old);
-	perf_evlist__collapse_resort(evlist_new);
+	perf_evlist__resort_hists(evlist_old, true);
+	perf_evlist__resort_hists(evlist_new, false);
 
 	list_for_each_entry(evsel, &evlist_new->entries, node) {
 		struct perf_evsel *evsel_old;
@@ -519,6 +562,8 @@ static const char * const diff_usage[] = {
 static const struct option options[] = {
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('M', "displacement", &show_displacement,
+		    "Show position displacement relative to baseline"),
 	OPT_BOOLEAN('b', "baseline-only", &show_baseline_only,
 		    "Show only items with match in baseline"),
 	OPT_CALLBACK('c', "compute", &compute,
@@ -552,32 +597,40 @@ static const struct option options[] = {
 
 static void ui_init(void)
 {
+	perf_hpp__init();
+
+	/* No overhead column. */
+	perf_hpp__column_enable(PERF_HPP__OVERHEAD, false);
+
 	/*
-	 * Display baseline/delta/ratio
+	 * Display baseline/delta/ratio/displacement/
 	 * formula/periods columns.
 	 */
-	perf_hpp__column_enable(PERF_HPP__BASELINE);
+	perf_hpp__column_enable(PERF_HPP__BASELINE, true);
 
 	switch (compute) {
 	case COMPUTE_DELTA:
-		perf_hpp__column_enable(PERF_HPP__DELTA);
+		perf_hpp__column_enable(PERF_HPP__DELTA, true);
 		break;
 	case COMPUTE_RATIO:
-		perf_hpp__column_enable(PERF_HPP__RATIO);
+		perf_hpp__column_enable(PERF_HPP__RATIO, true);
 		break;
 	case COMPUTE_WEIGHTED_DIFF:
-		perf_hpp__column_enable(PERF_HPP__WEIGHTED_DIFF);
+		perf_hpp__column_enable(PERF_HPP__WEIGHTED_DIFF, true);
 		break;
 	default:
 		BUG_ON(1);
 	};
 
+	if (show_displacement)
+		perf_hpp__column_enable(PERF_HPP__DISPL, true);
+
 	if (show_formula)
-		perf_hpp__column_enable(PERF_HPP__FORMULA);
+		perf_hpp__column_enable(PERF_HPP__FORMULA, true);
 
 	if (show_period) {
-		perf_hpp__column_enable(PERF_HPP__PERIOD);
-		perf_hpp__column_enable(PERF_HPP__PERIOD_BASELINE);
+		perf_hpp__column_enable(PERF_HPP__PERIOD, true);
+		perf_hpp__column_enable(PERF_HPP__PERIOD_BASELINE, true);
 	}
 }
 
@@ -605,9 +658,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	ui_init();
 
-	if (setup_sorting() < 0)
-		usage_with_options(diff_usage, options);
-
+	setup_sorting(diff_usage, options);
 	setup_pager();
 
 	sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", NULL);
diff --git a/trunk/tools/perf/builtin-evlist.c b/trunk/tools/perf/builtin-evlist.c
index 05bd9dfe875c..c20f1dcfb7e2 100644
--- a/trunk/tools/perf/builtin-evlist.c
+++ b/trunk/tools/perf/builtin-evlist.c
@@ -15,6 +15,39 @@
 #include "util/parse-options.h"
 #include "util/session.h"
 
+struct perf_attr_details {
+	bool freq;
+	bool verbose;
+};
+
+static int comma_printf(bool *first, const char *fmt, ...)
+{
+	va_list args;
+	int ret = 0;
+
+	if (!*first) {
+		ret += printf(",");
+	} else {
+		ret += printf(":");
+		*first = false;
+	}
+
+	va_start(args, fmt);
+	ret += vprintf(fmt, args);
+	va_end(args);
+	return ret;
+}
+
+static int __if_print(bool *first, const char *field, u64 value)
+{
+	if (value == 0)
+		return 0;
+
+	return comma_printf(first, " %s: %" PRIu64, field, value);
+}
+
+#define if_print(field) __if_print(&first, #field, pos->attr.field)
+
 static int __cmd_evlist(const char *file_name, struct perf_attr_details *details)
 {
 	struct perf_session *session;
@@ -24,8 +57,52 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details
 	if (session == NULL)
 		return -ENOMEM;
 
-	list_for_each_entry(pos, &session->evlist->entries, node)
-		perf_evsel__fprintf(pos, details, stdout);
+	list_for_each_entry(pos, &session->evlist->entries, node) {
+		bool first = true;
+
+		printf("%s", perf_evsel__name(pos));
+
+		if (details->verbose || details->freq) {
+			comma_printf(&first, " sample_freq=%" PRIu64,
+				     (u64)pos->attr.sample_freq);
+		}
+
+		if (details->verbose) {
+			if_print(type);
+			if_print(config);
+			if_print(config1);
+			if_print(config2);
+			if_print(size);
+			if_print(sample_type);
+			if_print(read_format);
+			if_print(disabled);
+			if_print(inherit);
+			if_print(pinned);
+			if_print(exclusive);
+			if_print(exclude_user);
+			if_print(exclude_kernel);
+			if_print(exclude_hv);
+			if_print(exclude_idle);
+			if_print(mmap);
+			if_print(comm);
+			if_print(freq);
+			if_print(inherit_stat);
+			if_print(enable_on_exec);
+			if_print(task);
+			if_print(watermark);
+			if_print(precise_ip);
+			if_print(mmap_data);
+			if_print(sample_id_all);
+			if_print(exclude_host);
+			if_print(exclude_guest);
+			if_print(__reserved_1);
+			if_print(wakeup_events);
+			if_print(bp_type);
+			if_print(branch_sample_type);
+		}
+
+		putchar('\n');
+	}
 
 	perf_session__delete(session);
 	return 0;
@@ -39,8 +116,6 @@ int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_BOOLEAN('F', "freq", &details.freq, "Show the sample frequency"),
 	OPT_BOOLEAN('v', "verbose", &details.verbose,
 		    "Show all event attr details"),
-	OPT_BOOLEAN('g', "group", &details.event_group,
-		    "Show event group information"),
 	OPT_END()
 	};
 	const char * const evlist_usage[] = {
@@ -52,10 +127,5 @@ int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (argc)
 		usage_with_options(evlist_usage, options);
 
-	if (details.event_group && (details.verbose || details.freq)) {
-		pr_err("--group option is not compatible with other options\n");
-		usage_with_options(evlist_usage, options);
-	}
-
 	return __cmd_evlist(input_name, &details);
 }
diff --git a/trunk/tools/perf/builtin-kmem.c b/trunk/tools/perf/builtin-kmem.c
index 46878daca5cc..0b4b796167be 100644
--- a/trunk/tools/perf/builtin-kmem.c
+++ b/trunk/tools/perf/builtin-kmem.c
@@ -17,7 +17,6 @@
 #include "util/debug.h"
 
 #include <linux/rbtree.h>
-#include <linux/string.h>
 
 struct alloc_stat;
 typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
@@ -341,7 +340,7 @@ static void __print_result(struct rb_root *root, struct perf_session *session,
 			   int n_lines, int is_caller)
 {
 	struct rb_node *next;
-	struct machine *machine = &session->machines.host;
+	struct machine *machine;
 
 	printf("%.102s\n", graph_dotted_line);
 	printf(" %-34s |",  is_caller ? "Callsite": "Alloc Ptr");
@@ -350,6 +349,11 @@ static void __print_result(struct rb_root *root, struct perf_session *session,
 
 	next = rb_first(root);
 
+	machine = perf_session__find_host_machine(session);
+	if (!machine) {
+		pr_err("__print_result: couldn't find kernel information\n");
+		return;
+	}
 	while (next && n_lines--) {
 		struct alloc_stat *data = rb_entry(next, struct alloc_stat,
 						   node);
@@ -610,7 +614,8 @@ static struct sort_dimension *avail_sorts[] = {
 	&pingpong_sort_dimension,
 };
 
-#define NUM_AVAIL_SORTS	((int)ARRAY_SIZE(avail_sorts))
+#define NUM_AVAIL_SORTS	\
+	(int)(sizeof(avail_sorts) / sizeof(struct sort_dimension *))
 
 static int sort_dimension__add(const char *tok, struct list_head *list)
 {
@@ -619,11 +624,12 @@ static int sort_dimension__add(const char *tok, struct list_head *list)
 
 	for (i = 0; i < NUM_AVAIL_SORTS; i++) {
 		if (!strcmp(avail_sorts[i]->name, tok)) {
-			sort = memdup(avail_sorts[i], sizeof(*avail_sorts[i]));
+			sort = malloc(sizeof(*sort));
 			if (!sort) {
-				pr_err("%s: memdup failed\n", __func__);
+				pr_err("%s: malloc failed\n", __func__);
 				return -1;
 			}
+			memcpy(sort, avail_sorts[i], sizeof(*sort));
 			list_add_tail(&sort->list, list);
 			return 0;
 		}
diff --git a/trunk/tools/perf/builtin-kvm.c b/trunk/tools/perf/builtin-kvm.c
index 37a769d7f9fe..ca3f80ebc100 100644
--- a/trunk/tools/perf/builtin-kvm.c
+++ b/trunk/tools/perf/builtin-kvm.c
@@ -973,7 +973,8 @@ __cmd_buildid_list(const char *file_name, int argc, const char **argv)
 
 int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused)
 {
-	const char *file_name = NULL;
+	const char *file_name;
+
 	const struct option kvm_options[] = {
 		OPT_STRING('i', "input", &file_name, "file",
 			   "Input file name"),
diff --git a/trunk/tools/perf/builtin-record.c b/trunk/tools/perf/builtin-record.c
index 774c90713a53..f3151d3c70ce 100644
--- a/trunk/tools/perf/builtin-record.c
+++ b/trunk/tools/perf/builtin-record.c
@@ -224,28 +224,130 @@ static bool perf_evlist__equal(struct perf_evlist *evlist,
 
 static int perf_record__open(struct perf_record *rec)
 {
-	char msg[512];
 	struct perf_evsel *pos;
 	struct perf_evlist *evlist = rec->evlist;
 	struct perf_session *session = rec->session;
 	struct perf_record_opts *opts = &rec->opts;
 	int rc = 0;
 
-	perf_evlist__config(evlist, opts);
+	/*
+	 * Set the evsel leader links before we configure attributes,
+	 * since some might depend on this info.
+	 */
+	if (opts->group)
+		perf_evlist__set_leader(evlist);
+
+	perf_evlist__config_attrs(evlist, opts);
 
 	list_for_each_entry(pos, &evlist->entries, node) {
+		struct perf_event_attr *attr = &pos->attr;
+		/*
+		 * Check if parse_single_tracepoint_event has already asked for
+		 * PERF_SAMPLE_TIME.
+		 *
+		 * XXX this is kludgy but short term fix for problems introduced by
+		 * eac23d1c that broke 'perf script' by having different sample_types
+		 * when using multiple tracepoint events when we use a perf binary
+		 * that tries to use sample_id_all on an older kernel.
+		 *
+		 * We need to move counter creation to perf_session, support
+		 * different sample_types, etc.
+		 */
+		bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
+
+fallback_missing_features:
+		if (opts->exclude_guest_missing)
+			attr->exclude_guest = attr->exclude_host = 0;
+retry_sample_id:
+		attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
 try_again:
 		if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
-			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
+			int err = errno;
+
+			if (err == EPERM || err == EACCES) {
+				ui__error_paranoid();
+				rc = -err;
+				goto out;
+			} else if (err ==  ENODEV && opts->target.cpu_list) {
+				pr_err("No such device - did you specify"
+				       " an out-of-range profile CPU?\n");
+				rc = -err;
+				goto out;
+			} else if (err == EINVAL) {
+				if (!opts->exclude_guest_missing &&
+				    (attr->exclude_guest || attr->exclude_host)) {
+					pr_debug("Old kernel, cannot exclude "
+						 "guest or host samples.\n");
+					opts->exclude_guest_missing = true;
+					goto fallback_missing_features;
+				} else if (!opts->sample_id_all_missing) {
+					/*
+					 * Old kernel, no attr->sample_id_type_all field
+					 */
+					opts->sample_id_all_missing = true;
+					if (!opts->sample_time && !opts->raw_samples && !time_needed)
+						attr->sample_type &= ~PERF_SAMPLE_TIME;
+
+					goto retry_sample_id;
+				}
+			}
+
+			/*
+			 * If it's cycles then fall back to hrtimer
+			 * based cpu-clock-tick sw counter, which
+			 * is always available even if no PMU support.
+			 *
+			 * PPC returns ENXIO until 2.6.37 (behavior changed
+			 * with commit b0a873e).
+			 */
+			if ((err == ENOENT || err == ENXIO)
+					&& attr->type == PERF_TYPE_HARDWARE
+					&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {
+
 				if (verbose)
-					ui__warning("%s\n", msg);
+					ui__warning("The cycles event is not supported, "
+						    "trying to fall back to cpu-clock-ticks\n");
+				attr->type = PERF_TYPE_SOFTWARE;
+				attr->config = PERF_COUNT_SW_CPU_CLOCK;
+				if (pos->name) {
+					free(pos->name);
+					pos->name = NULL;
+				}
 				goto try_again;
 			}
 
-			rc = -errno;
-			perf_evsel__open_strerror(pos, &opts->target,
-						  errno, msg, sizeof(msg));
-			ui__error("%s\n", msg);
+			if (err == ENOENT) {
+				ui__error("The %s event is not supported.\n",
+					  perf_evsel__name(pos));
+				rc = -err;
+				goto out;
+			} else if ((err == EOPNOTSUPP) && (attr->precise_ip)) {
+				ui__error("\'precise\' request may not be supported. "
+					  "Try removing 'p' modifier\n");
+				rc = -err;
+				goto out;
+			}
+
+			printf("\n");
+			error("sys_perf_event_open() syscall returned with %d "
+			      "(%s) for event %s. /bin/dmesg may provide "
+			      "additional information.\n",
+			      err, strerror(err), perf_evsel__name(pos));
+
+#if defined(__i386__) || defined(__x86_64__)
+			if (attr->type == PERF_TYPE_HARDWARE &&
+			    err == EOPNOTSUPP) {
+				pr_err("No hardware sampling interrupt available."
+				       " No APIC? If so then you can boot the kernel"
+				       " with the \"lapic\" boot parameter to"
+				       " force-enable it.\n");
+				rc = -err;
+				goto out;
+			}
+#endif
+
+			pr_err("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
+			rc = -err;
 			goto out;
 		}
 	}
@@ -328,6 +430,10 @@ static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
 {
 	int err;
 	struct perf_tool *tool = data;
+
+	if (machine__is_host(machine))
+		return;
+
 	/*
 	 *As for guest kernel when processing subcommand record&report,
 	 *we arrange module mmap prior to guest kernel mmap and trigger
@@ -486,9 +592,6 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
 		goto out_delete_session;
 	}
 
-	if (!evsel_list->nr_groups)
-		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
-
 	/*
 	 * perf_session__delete(session) will be called at perf_record__exit()
 	 */
@@ -515,7 +618,12 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
 
 	rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
 
-	machine = &session->machines.host;
+	machine = perf_session__find_host_machine(session);
+	if (!machine) {
+		pr_err("Couldn't find native kernel information.\n");
+		err = -1;
+		goto out_delete_session;
+	}
 
 	if (opts->pipe_output) {
 		err = perf_event__synthesize_attrs(tool, session,
@@ -568,10 +676,9 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
 		       "Check /proc/modules permission or run as root.\n");
 
-	if (perf_guest) {
-		machines__process_guests(&session->machines,
-					 perf_event__synthesize_guest_os, tool);
-	}
+	if (perf_guest)
+		perf_session__process_machines(session, tool,
+					       perf_event__synthesize_guest_os);
 
 	if (!opts->target.system_wide)
 		err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
@@ -768,10 +875,11 @@ static int get_stack_size(char *str, unsigned long *_size)
 }
 #endif /* LIBUNWIND_SUPPORT */
 
-int record_parse_callchain_opt(const struct option *opt,
-			       const char *arg, int unset)
+static int
+parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg,
+		    int unset)
 {
-	struct perf_record_opts *opts = opt->value;
+	struct perf_record *rec = (struct perf_record *)opt->value;
 	char *tok, *name, *saveptr = NULL;
 	char *buf;
 	int ret = -1;
@@ -797,7 +905,7 @@ int record_parse_callchain_opt(const struct option *opt,
 		/* Framepointer style */
 		if (!strncmp(name, "fp", sizeof("fp"))) {
 			if (!strtok_r(NULL, ",", &saveptr)) {
-				opts->call_graph = CALLCHAIN_FP;
+				rec->opts.call_graph = CALLCHAIN_FP;
 				ret = 0;
 			} else
 				pr_err("callchain: No more arguments "
@@ -810,20 +918,20 @@ int record_parse_callchain_opt(const struct option *opt,
 			const unsigned long default_stack_dump_size = 8192;
 
 			ret = 0;
-			opts->call_graph = CALLCHAIN_DWARF;
-			opts->stack_dump_size = default_stack_dump_size;
+			rec->opts.call_graph = CALLCHAIN_DWARF;
+			rec->opts.stack_dump_size = default_stack_dump_size;
 
 			tok = strtok_r(NULL, ",", &saveptr);
 			if (tok) {
 				unsigned long size = 0;
 
 				ret = get_stack_size(tok, &size);
-				opts->stack_dump_size = size;
+				rec->opts.stack_dump_size = size;
 			}
 
 			if (!ret)
 				pr_debug("callchain: stack dump size %d\n",
-					 opts->stack_dump_size);
+					 rec->opts.stack_dump_size);
 #endif /* LIBUNWIND_SUPPORT */
 		} else {
 			pr_err("callchain: Unknown -g option "
@@ -836,7 +944,7 @@ int record_parse_callchain_opt(const struct option *opt,
 	free(buf);
 
 	if (!ret)
-		pr_debug("callchain: type %d\n", opts->call_graph);
+		pr_debug("callchain: type %d\n", rec->opts.call_graph);
 
 	return ret;
 }
@@ -874,9 +982,9 @@ static struct perf_record record = {
 #define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
 
 #ifdef LIBUNWIND_SUPPORT
-const char record_callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
+static const char callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
 #else
-const char record_callchain_help[] = CALLCHAIN_HELP "[fp]";
+static const char callchain_help[] = CALLCHAIN_HELP "[fp]";
 #endif
 
 /*
@@ -920,9 +1028,9 @@ const struct option record_options[] = {
 		     "number of mmap data pages"),
 	OPT_BOOLEAN(0, "group", &record.opts.group,
 		    "put the counters into a counter group"),
-	OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts,
-			     "mode[,dump_size]", record_callchain_help,
-			     &record_parse_callchain_opt, "fp"),
+	OPT_CALLBACK_DEFAULT('g', "call-graph", &record, "mode[,dump_size]",
+			     callchain_help, &parse_callchain_opt,
+			     "fp"),
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show counter open errors, etc)"),
 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
diff --git a/trunk/tools/perf/builtin-report.c b/trunk/tools/perf/builtin-report.c
index 96b5a7fee4bb..fc251005dd3d 100644
--- a/trunk/tools/perf/builtin-report.c
+++ b/trunk/tools/perf/builtin-report.c
@@ -8,7 +8,6 @@
 #include "builtin.h"
 
 #include "util/util.h"
-#include "util/cache.h"
 
 #include "util/annotate.h"
 #include "util/color.h"
@@ -55,16 +54,6 @@ struct perf_report {
 	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
 };
 
-static int perf_report_config(const char *var, const char *value, void *cb)
-{
-	if (!strcmp(var, "report.group")) {
-		symbol_conf.event_group = perf_config_bool(var, value);
-		return 0;
-	}
-
-	return perf_default_config(var, value, cb);
-}
-
 static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
 					struct addr_location *al,
 					struct perf_sample *sample,
@@ -310,21 +299,6 @@ static size_t hists__fprintf_nr_sample_events(struct hists *self,
 	char unit;
 	unsigned long nr_samples = self->stats.nr_events[PERF_RECORD_SAMPLE];
 	u64 nr_events = self->stats.total_period;
-	struct perf_evsel *evsel = hists_to_evsel(self);
-	char buf[512];
-	size_t size = sizeof(buf);
-
-	if (symbol_conf.event_group && evsel->nr_members > 1) {
-		struct perf_evsel *pos;
-
-		perf_evsel__group_desc(evsel, buf, size);
-		evname = buf;
-
-		for_each_group_member(pos, evsel) {
-			nr_samples += pos->hists.stats.nr_events[PERF_RECORD_SAMPLE];
-			nr_events += pos->hists.stats.total_period;
-		}
-	}
 
 	nr_samples = convert_unit(nr_samples, &unit);
 	ret = fprintf(fp, "# Samples: %lu%c", nr_samples, unit);
@@ -345,10 +319,6 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
 		struct hists *hists = &pos->hists;
 		const char *evname = perf_evsel__name(pos);
 
-		if (symbol_conf.event_group &&
-		    !perf_evsel__is_group_leader(pos))
-			continue;
-
 		hists__fprintf_nr_sample_events(hists, evname, stdout);
 		hists__fprintf(hists, true, 0, 0, stdout);
 		fprintf(stdout, "\n\n");
@@ -402,7 +372,7 @@ static int __cmd_report(struct perf_report *rep)
 	if (ret)
 		goto out_delete;
 
-	kernel_map = session->machines.host.vmlinux_maps[MAP__FUNCTION];
+	kernel_map = session->host_machine.vmlinux_maps[MAP__FUNCTION];
 	kernel_kmap = map__kmap(kernel_map);
 	if (kernel_map == NULL ||
 	    (kernel_map->dso->hit &&
@@ -446,16 +416,8 @@ static int __cmd_report(struct perf_report *rep)
 			hists->symbol_filter_str = rep->symbol_filter_str;
 
 		hists__collapse_resort(hists);
+		hists__output_resort(hists);
 		nr_samples += hists->stats.nr_events[PERF_RECORD_SAMPLE];
-
-		/* Non-group events are considered as leader */
-		if (symbol_conf.event_group &&
-		    !perf_evsel__is_group_leader(pos)) {
-			struct hists *leader_hists = &pos->leader->hists;
-
-			hists__match(leader_hists, hists);
-			hists__link(leader_hists, hists);
-		}
 	}
 
 	if (nr_samples == 0) {
@@ -463,22 +425,11 @@ static int __cmd_report(struct perf_report *rep)
 		goto out_delete;
 	}
 
-	list_for_each_entry(pos, &session->evlist->entries, node)
-		hists__output_resort(&pos->hists);
-
 	if (use_browser > 0) {
 		if (use_browser == 1) {
-			ret = perf_evlist__tui_browse_hists(session->evlist,
-							help,
-							NULL,
-							&session->header.env);
-			/*
-			 * Usually "ret" is the last pressed key, and we only
-			 * care if the key notifies us to switch data file.
-			 */
-			if (ret != K_SWITCH_INPUT_DATA)
-				ret = 0;
-
+			perf_evlist__tui_browse_hists(session->evlist, help,
+						      NULL,
+						      &session->header.env);
 		} else if (use_browser == 2) {
 			perf_evlist__gtk_browse_hists(session->evlist, help,
 						      NULL);
@@ -644,8 +595,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_BOOLEAN(0, "stdio", &report.use_stdio,
 		    "Use the stdio interface"),
 	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
-		   "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline,"
-		   " dso_to, dso_from, symbol_to, symbol_from, mispredict"),
+		   "sort by key(s): pid, comm, dso, symbol, parent, dso_to,"
+		   " dso_from, symbol_to, symbol_from, mispredict"),
 	OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
 		    "Show sample percentage for different cpu modes"),
 	OPT_STRING('p', "parent", &parent_pattern, "regex",
@@ -687,8 +638,6 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
 	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
 		    "Show a column with the sum of periods"),
-	OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
-		    "Show event group information together"),
 	OPT_CALLBACK_NOOPT('b', "branch-stack", &sort__branch_mode, "",
 		    "use branch records for histogram filling", parse_branch_mode),
 	OPT_STRING(0, "objdump", &objdump_path, "path",
@@ -696,8 +645,6 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_END()
 	};
 
-	perf_config(perf_report_config, NULL);
-
 	argc = parse_options(argc, argv, options, report_usage, 0);
 
 	if (report.use_stdio)
@@ -716,16 +663,6 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 		else
 			input_name = "perf.data";
 	}
-
-	if (strcmp(input_name, "-") != 0)
-		setup_browser(true);
-	else {
-		use_browser = 0;
-		perf_hpp__column_enable(PERF_HPP__OVERHEAD);
-		perf_hpp__init();
-	}
-
-repeat:
 	session = perf_session__new(input_name, O_RDONLY,
 				    report.force, false, &report.tool);
 	if (session == NULL)
@@ -751,8 +688,14 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	}
 
-	if (setup_sorting() < 0)
-		usage_with_options(report_usage, options);
+	if (strcmp(input_name, "-") != 0)
+		setup_browser(true);
+	else {
+		use_browser = 0;
+		perf_hpp__init();
+	}
+
+	setup_sorting(report_usage, options);
 
 	/*
 	 * Only in the newt browser we are doing integrated annotation,
@@ -820,12 +763,6 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 	}
 
 	ret = __cmd_report(&report);
-	if (ret == K_SWITCH_INPUT_DATA) {
-		perf_session__delete(session);
-		goto repeat;
-	} else
-		ret = 0;
-
 error:
 	perf_session__delete(session);
 	return ret;
diff --git a/trunk/tools/perf/builtin-sched.c b/trunk/tools/perf/builtin-sched.c
index 138229439a93..cc28b85dabd5 100644
--- a/trunk/tools/perf/builtin-sched.c
+++ b/trunk/tools/perf/builtin-sched.c
@@ -1475,9 +1475,9 @@ static int perf_sched__read_events(struct perf_sched *sched, bool destroy,
 			goto out_delete;
 		}
 
-		sched->nr_events      = session->stats.nr_events[0];
-		sched->nr_lost_events = session->stats.total_lost;
-		sched->nr_lost_chunks = session->stats.nr_events[PERF_RECORD_LOST];
+		sched->nr_events      = session->hists.stats.nr_events[0];
+		sched->nr_lost_events = session->hists.stats.total_lost;
+		sched->nr_lost_chunks = session->hists.stats.nr_events[PERF_RECORD_LOST];
 	}
 
 	if (destroy)
diff --git a/trunk/tools/perf/builtin-script.c b/trunk/tools/perf/builtin-script.c
index 92d4658f56fb..b363e7b292b2 100644
--- a/trunk/tools/perf/builtin-script.c
+++ b/trunk/tools/perf/builtin-script.c
@@ -692,7 +692,7 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
 			    const char *arg, int unset __maybe_unused)
 {
 	char *tok;
-	int i, imax = ARRAY_SIZE(all_output_options);
+	int i, imax = sizeof(all_output_options) / sizeof(struct output_option);
 	int j;
 	int rc = 0;
 	char *str = strdup(arg);
@@ -909,6 +909,18 @@ static const char *ends_with(const char *str, const char *suffix)
 	return NULL;
 }
 
+static char *ltrim(char *str)
+{
+	int len = strlen(str);
+
+	while (len && isspace(*str)) {
+		len--;
+		str++;
+	}
+
+	return str;
+}
+
 static int read_script_info(struct script_desc *desc, const char *filename)
 {
 	char line[BUFSIZ], *p;
@@ -1475,8 +1487,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 			return -1;
 	}
 
-	if (!script_name && !generate_script_lang)
-		perf_session__fprintf_info(session, stdout, show_full_info);
+	perf_session__fprintf_info(session, stdout, show_full_info);
 
 	if (!no_callchain)
 		symbol_conf.use_callchain = true;
diff --git a/trunk/tools/perf/builtin-stat.c b/trunk/tools/perf/builtin-stat.c
index 99848761f573..c247faca7127 100644
--- a/trunk/tools/perf/builtin-stat.c
+++ b/trunk/tools/perf/builtin-stat.c
@@ -65,11 +65,6 @@
 #define CNTR_NOT_SUPPORTED	"<not supported>"
 #define CNTR_NOT_COUNTED	"<not counted>"
 
-static void print_stat(int argc, const char **argv);
-static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
-static void print_counter(struct perf_evsel *counter, char *prefix);
-static void print_aggr_socket(char *prefix);
-
 static struct perf_evlist	*evsel_list;
 
 static struct perf_target	target = {
@@ -80,7 +75,6 @@ static int			run_count			=  1;
 static bool			no_inherit			= false;
 static bool			scale				=  true;
 static bool			no_aggr				= false;
-static bool			aggr_socket			= false;
 static pid_t			child_pid			= -1;
 static bool			null_run			=  false;
 static int			detailed_run			=  0;
@@ -93,9 +87,6 @@ static FILE			*output				= NULL;
 static const char		*pre_cmd			= NULL;
 static const char		*post_cmd			= NULL;
 static bool			sync_run			= false;
-static unsigned int		interval			= 0;
-static struct timespec		ref_time;
-static struct cpu_map		*sock_map;
 
 static volatile int done = 0;
 
@@ -103,28 +94,6 @@ struct perf_stat {
 	struct stats	  res_stats[3];
 };
 
-static inline void diff_timespec(struct timespec *r, struct timespec *a,
-				 struct timespec *b)
-{
-	r->tv_sec = a->tv_sec - b->tv_sec;
-	if (a->tv_nsec < b->tv_nsec) {
-		r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
-		r->tv_sec--;
-	} else {
-		r->tv_nsec = a->tv_nsec - b->tv_nsec ;
-	}
-}
-
-static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
-{
-	return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus;
-}
-
-static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
-{
-	return perf_evsel__cpus(evsel)->nr;
-}
-
 static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
 {
 	evsel->priv = zalloc(sizeof(struct perf_stat));
@@ -137,27 +106,14 @@ static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
 	evsel->priv = NULL;
 }
 
-static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel)
+static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
 {
-	void *addr;
-	size_t sz;
-
-	sz = sizeof(*evsel->counts) +
-	     (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values));
-
-	addr = zalloc(sz);
-	if (!addr)
-		return -ENOMEM;
-
-	evsel->prev_raw_counts =  addr;
-
-	return 0;
+	return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus;
 }
 
-static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
+static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
 {
-	free(evsel->prev_raw_counts);
-	evsel->prev_raw_counts = NULL;
+	return perf_evsel__cpus(evsel)->nr;
 }
 
 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
@@ -176,6 +132,8 @@ static struct stats walltime_nsecs_stats;
 static int create_perf_stat_counter(struct perf_evsel *evsel)
 {
 	struct perf_event_attr *attr = &evsel->attr;
+	bool exclude_guest_missing = false;
+	int ret;
 
 	if (scale)
 		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
@@ -183,16 +141,38 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
 
 	attr->inherit = !no_inherit;
 
-	if (perf_target__has_cpu(&target))
-		return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
+retry:
+	if (exclude_guest_missing)
+		evsel->attr.exclude_guest = evsel->attr.exclude_host = 0;
+
+	if (perf_target__has_cpu(&target)) {
+		ret = perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
+		if (ret)
+			goto check_ret;
+		return 0;
+	}
 
 	if (!perf_target__has_task(&target) &&
-	    perf_evsel__is_group_leader(evsel)) {
+	    !perf_evsel__is_group_member(evsel)) {
 		attr->disabled = 1;
 		attr->enable_on_exec = 1;
 	}
 
-	return perf_evsel__open_per_thread(evsel, evsel_list->threads);
+	ret = perf_evsel__open_per_thread(evsel, evsel_list->threads);
+	if (!ret)
+		return 0;
+	/* fall through */
+check_ret:
+	if (ret && errno == EINVAL) {
+		if (!exclude_guest_missing &&
+		    (evsel->attr.exclude_guest || evsel->attr.exclude_host)) {
+			pr_debug("Old kernel, cannot exclude "
+				 "guest or host samples.\n");
+			exclude_guest_missing = true;
+			goto retry;
+		}
+	}
+	return ret;
 }
 
 /*
@@ -289,79 +269,15 @@ static int read_counter(struct perf_evsel *counter)
 	return 0;
 }
 
-static void print_interval(void)
-{
-	static int num_print_interval;
-	struct perf_evsel *counter;
-	struct perf_stat *ps;
-	struct timespec ts, rs;
-	char prefix[64];
-
-	if (no_aggr) {
-		list_for_each_entry(counter, &evsel_list->entries, node) {
-			ps = counter->priv;
-			memset(ps->res_stats, 0, sizeof(ps->res_stats));
-			read_counter(counter);
-		}
-	} else {
-		list_for_each_entry(counter, &evsel_list->entries, node) {
-			ps = counter->priv;
-			memset(ps->res_stats, 0, sizeof(ps->res_stats));
-			read_counter_aggr(counter);
-		}
-	}
-	clock_gettime(CLOCK_MONOTONIC, &ts);
-	diff_timespec(&rs, &ts, &ref_time);
-	sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);
-
-	if (num_print_interval == 0 && !csv_output) {
-		if (aggr_socket)
-			fprintf(output, "#           time socket cpus             counts events\n");
-		else if (no_aggr)
-			fprintf(output, "#           time CPU                 counts events\n");
-		else
-			fprintf(output, "#           time             counts events\n");
-	}
-
-	if (++num_print_interval == 25)
-		num_print_interval = 0;
-
-	if (aggr_socket)
-		print_aggr_socket(prefix);
-	else if (no_aggr) {
-		list_for_each_entry(counter, &evsel_list->entries, node)
-			print_counter(counter, prefix);
-	} else {
-		list_for_each_entry(counter, &evsel_list->entries, node)
-			print_counter_aggr(counter, prefix);
-	}
-}
-
 static int __run_perf_stat(int argc __maybe_unused, const char **argv)
 {
-	char msg[512];
 	unsigned long long t0, t1;
 	struct perf_evsel *counter;
-	struct timespec ts;
 	int status = 0;
 	int child_ready_pipe[2], go_pipe[2];
 	const bool forks = (argc > 0);
 	char buf;
 
-	if (interval) {
-		ts.tv_sec  = interval / 1000;
-		ts.tv_nsec = (interval % 1000) * 1000000;
-	} else {
-		ts.tv_sec  = 1;
-		ts.tv_nsec = 0;
-	}
-
-	if (aggr_socket
-	    && cpu_map__build_socket_map(evsel_list->cpus, &sock_map)) {
-		perror("cannot build socket map");
-		return -1;
-	}
-
 	if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
 		perror("failed to create pipes");
 		return -1;
@@ -432,13 +348,20 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv)
 				continue;
 			}
 
-			perf_evsel__open_strerror(counter, &target,
-						  errno, msg, sizeof(msg));
-			ui__error("%s\n", msg);
-
+			if (errno == EPERM || errno == EACCES) {
+				error("You may not have permission to collect %sstats.\n"
+				      "\t Consider tweaking"
+				      " /proc/sys/kernel/perf_event_paranoid or running as root.",
+				      target.system_wide ? "system-wide " : "");
+			} else {
+				error("open_counter returned with %d (%s). "
+				      "/bin/dmesg may provide additional information.\n",
+				       errno, strerror(errno));
+			}
 			if (child_pid != -1)
 				kill(child_pid, SIGTERM);
 
+			pr_err("Not all events could be opened.\n");
 			return -1;
 		}
 		counter->supported = true;
@@ -454,25 +377,14 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv)
 	 * Enable counters and exec the command:
 	 */
 	t0 = rdclock();
-	clock_gettime(CLOCK_MONOTONIC, &ref_time);
 
 	if (forks) {
 		close(go_pipe[1]);
-		if (interval) {
-			while (!waitpid(child_pid, &status, WNOHANG)) {
-				nanosleep(&ts, NULL);
-				print_interval();
-			}
-		}
 		wait(&status);
 		if (WIFSIGNALED(status))
 			psignal(WTERMSIG(status), argv[0]);
 	} else {
-		while (!done) {
-			nanosleep(&ts, NULL);
-			if (interval)
-				print_interval();
-		}
+		while(!done) sleep(1);
 	}
 
 	t1 = rdclock();
@@ -542,21 +454,13 @@ static void print_noise(struct perf_evsel *evsel, double avg)
 	print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
 }
 
-static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
+static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
 {
 	double msecs = avg / 1e6;
 	char cpustr[16] = { '\0', };
 	const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s";
 
-	if (aggr_socket)
-		sprintf(cpustr, "S%*d%s%*d%s",
-			csv_output ? 0 : -5,
-			cpu,
-			csv_sep,
-			csv_output ? 0 : 4,
-			nr,
-			csv_sep);
-	else if (no_aggr)
+	if (no_aggr)
 		sprintf(cpustr, "CPU%*d%s",
 			csv_output ? 0 : -4,
 			perf_evsel__cpus(evsel)->map[cpu], csv_sep);
@@ -566,7 +470,7 @@ static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
 	if (evsel->cgrp)
 		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 
-	if (csv_output || interval)
+	if (csv_output)
 		return;
 
 	if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
@@ -755,7 +659,7 @@ static void print_ll_cache_misses(int cpu,
 	fprintf(output, " of all LL-cache hits   ");
 }
 
-static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
+static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
 {
 	double total, ratio = 0.0;
 	char cpustr[16] = { '\0', };
@@ -768,15 +672,7 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
 	else
 		fmt = "%s%18.0f%s%-25s";
 
-	if (aggr_socket)
-		sprintf(cpustr, "S%*d%s%*d%s",
-			csv_output ? 0 : -5,
-			cpu,
-			csv_sep,
-			csv_output ? 0 : 4,
-			nr,
-			csv_sep);
-	else if (no_aggr)
+	if (no_aggr)
 		sprintf(cpustr, "CPU%*d%s",
 			csv_output ? 0 : -4,
 			perf_evsel__cpus(evsel)->map[cpu], csv_sep);
@@ -788,11 +684,12 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
 	if (evsel->cgrp)
 		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 
-	if (csv_output || interval)
+	if (csv_output)
 		return;
 
 	if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
 		total = avg_stats(&runtime_cycles_stats[cpu]);
+
 		if (total)
 			ratio = avg / total;
 
@@ -882,83 +779,16 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
 	}
 }
 
-static void print_aggr_socket(char *prefix)
-{
-	struct perf_evsel *counter;
-	u64 ena, run, val;
-	int cpu, s, s2, sock, nr;
-
-	if (!sock_map)
-		return;
-
-	for (s = 0; s < sock_map->nr; s++) {
-		sock = cpu_map__socket(sock_map, s);
-		list_for_each_entry(counter, &evsel_list->entries, node) {
-			val = ena = run = 0;
-			nr = 0;
-			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
-				s2 = cpu_map__get_socket(evsel_list->cpus, cpu);
-				if (s2 != sock)
-					continue;
-				val += counter->counts->cpu[cpu].val;
-				ena += counter->counts->cpu[cpu].ena;
-				run += counter->counts->cpu[cpu].run;
-				nr++;
-			}
-			if (prefix)
-				fprintf(output, "%s", prefix);
-
-			if (run == 0 || ena == 0) {
-				fprintf(output, "S%*d%s%*d%s%*s%s%*s",
-					csv_output ? 0 : -5,
-					s,
-					csv_sep,
-					csv_output ? 0 : 4,
-					nr,
-					csv_sep,
-					csv_output ? 0 : 18,
-					counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
-					csv_sep,
-					csv_output ? 0 : -24,
-					perf_evsel__name(counter));
-				if (counter->cgrp)
-					fprintf(output, "%s%s",
-						csv_sep, counter->cgrp->name);
-
-				fputc('\n', output);
-				continue;
-			}
-
-			if (nsec_counter(counter))
-				nsec_printout(sock, nr, counter, val);
-			else
-				abs_printout(sock, nr, counter, val);
-
-			if (!csv_output) {
-				print_noise(counter, 1.0);
-
-				if (run != ena)
-					fprintf(output, "  (%.2f%%)",
-						100.0 * run / ena);
-			}
-			fputc('\n', output);
-		}
-	}
-}
-
 /*
  * Print out the results of a single counter:
  * aggregated counts in system-wide mode
  */
-static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
+static void print_counter_aggr(struct perf_evsel *counter)
 {
 	struct perf_stat *ps = counter->priv;
 	double avg = avg_stats(&ps->res_stats[0]);
 	int scaled = counter->counts->scaled;
 
-	if (prefix)
-		fprintf(output, "%s", prefix);
-
 	if (scaled == -1) {
 		fprintf(output, "%*s%s%*s",
 			csv_output ? 0 : 18,
@@ -975,9 +805,9 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
 	}
 
 	if (nsec_counter(counter))
-		nsec_printout(-1, 0, counter, avg);
+		nsec_printout(-1, counter, avg);
 	else
-		abs_printout(-1, 0, counter, avg);
+		abs_printout(-1, counter, avg);
 
 	print_noise(counter, avg);
 
@@ -1001,7 +831,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
  * Print out the results of a single counter:
  * does not use aggregated count in system-wide
  */
-static void print_counter(struct perf_evsel *counter, char *prefix)
+static void print_counter(struct perf_evsel *counter)
 {
 	u64 ena, run, val;
 	int cpu;
@@ -1010,10 +840,6 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
 		val = counter->counts->cpu[cpu].val;
 		ena = counter->counts->cpu[cpu].ena;
 		run = counter->counts->cpu[cpu].run;
-
-		if (prefix)
-			fprintf(output, "%s", prefix);
-
 		if (run == 0 || ena == 0) {
 			fprintf(output, "CPU%*d%s%*s%s%*s",
 				csv_output ? 0 : -4,
@@ -1033,9 +859,9 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
 		}
 
 		if (nsec_counter(counter))
-			nsec_printout(cpu, 0, counter, val);
+			nsec_printout(cpu, counter, val);
 		else
-			abs_printout(cpu, 0, counter, val);
+			abs_printout(cpu, counter, val);
 
 		if (!csv_output) {
 			print_noise(counter, 1.0);
@@ -1073,14 +899,12 @@ static void print_stat(int argc, const char **argv)
 		fprintf(output, ":\n\n");
 	}
 
-	if (aggr_socket)
-		print_aggr_socket(NULL);
-	else if (no_aggr) {
+	if (no_aggr) {
 		list_for_each_entry(counter, &evsel_list->entries, node)
-			print_counter(counter, NULL);
+			print_counter(counter);
 	} else {
 		list_for_each_entry(counter, &evsel_list->entries, node)
-			print_counter_aggr(counter, NULL);
+			print_counter_aggr(counter);
 	}
 
 	if (!csv_output) {
@@ -1101,7 +925,7 @@ static volatile int signr = -1;
 
 static void skip_signal(int signo)
 {
-	if ((child_pid == -1) || interval)
+	if(child_pid == -1)
 		done = 1;
 
 	signr = signo;
@@ -1321,9 +1145,6 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 			"command to run prior to the measured command"),
 	OPT_STRING(0, "post", &post_cmd, "command",
 			"command to run after to the measured command"),
-	OPT_UINTEGER('I', "interval-print", &interval,
-		    "print counts at regular interval in ms (>= 100)"),
-	OPT_BOOLEAN(0, "aggr-socket", &aggr_socket, "aggregate counts per processor socket"),
 	OPT_END()
 	};
 	const char * const stat_usage[] = {
@@ -1410,14 +1231,6 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		usage_with_options(stat_usage, options);
 	}
 
-	if (aggr_socket) {
-		if (!perf_target__has_cpu(&target)) {
-			fprintf(stderr, "--aggr-socket only available in system-wide mode (-a)\n");
-			usage_with_options(stat_usage, options);
-		}
-		no_aggr = true;
-	}
-
 	if (add_default_attributes())
 		goto out;
 
@@ -1432,23 +1245,12 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		usage_with_options(stat_usage, options);
 		return -1;
 	}
-	if (interval && interval < 100) {
-		pr_err("print interval must be >= 100ms\n");
-		usage_with_options(stat_usage, options);
-		return -1;
-	}
 
 	list_for_each_entry(pos, &evsel_list->entries, node) {
 		if (perf_evsel__alloc_stat_priv(pos) < 0 ||
 		    perf_evsel__alloc_counts(pos, perf_evsel__nr_cpus(pos)) < 0)
 			goto out_free_fd;
 	}
-	if (interval) {
-		list_for_each_entry(pos, &evsel_list->entries, node) {
-			if (perf_evsel__alloc_prev_raw_counts(pos) < 0)
-				goto out_free_fd;
-		}
-	}
 
 	/*
 	 * We dont want to block the signals - that would cause
@@ -1458,7 +1260,6 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 	 */
 	atexit(sig_atexit);
 	signal(SIGINT,  skip_signal);
-	signal(SIGCHLD, skip_signal);
 	signal(SIGALRM, skip_signal);
 	signal(SIGABRT, skip_signal);
 
@@ -1471,14 +1272,11 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		status = run_perf_stat(argc, argv);
 	}
 
-	if (status != -1 && !interval)
+	if (status != -1)
 		print_stat(argc, argv);
 out_free_fd:
-	list_for_each_entry(pos, &evsel_list->entries, node) {
+	list_for_each_entry(pos, &evsel_list->entries, node)
 		perf_evsel__free_stat_priv(pos);
-		perf_evsel__free_counts(pos);
-		perf_evsel__free_prev_raw_counts(pos);
-	}
 	perf_evlist__delete_maps(evsel_list);
 out:
 	perf_evlist__delete(evsel_list);
diff --git a/trunk/tools/perf/builtin-top.c b/trunk/tools/perf/builtin-top.c
index 72f6eb7b4173..c9ff3950cd4b 100644
--- a/trunk/tools/perf/builtin-top.c
+++ b/trunk/tools/perf/builtin-top.c
@@ -68,7 +68,27 @@
 #include <linux/unistd.h>
 #include <linux/types.h>
 
-static volatile int done;
+void get_term_dimensions(struct winsize *ws)
+{
+	char *s = getenv("LINES");
+
+	if (s != NULL) {
+		ws->ws_row = atoi(s);
+		s = getenv("COLUMNS");
+		if (s != NULL) {
+			ws->ws_col = atoi(s);
+			if (ws->ws_row && ws->ws_col)
+				return;
+		}
+	}
+#ifdef TIOCGWINSZ
+	if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
+	    ws->ws_row && ws->ws_col)
+		return;
+#endif
+	ws->ws_row = 25;
+	ws->ws_col = 80;
+}
 
 static void perf_top__update_print_entries(struct perf_top *top)
 {
@@ -433,10 +453,8 @@ static int perf_top__key_mapped(struct perf_top *top, int c)
 	return 0;
 }
 
-static bool perf_top__handle_keypress(struct perf_top *top, int c)
+static void perf_top__handle_keypress(struct perf_top *top, int c)
 {
-	bool ret = true;
-
 	if (!perf_top__key_mapped(top, c)) {
 		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
 		struct termios tc, save;
@@ -457,7 +475,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c)
 
 		tcsetattr(0, TCSAFLUSH, &save);
 		if (!perf_top__key_mapped(top, c))
-			return ret;
+			return;
 	}
 
 	switch (c) {
@@ -519,8 +537,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c)
 			printf("exiting.\n");
 			if (top->dump_symtab)
 				perf_session__fprintf_dsos(top->session, stderr);
-			ret = false;
-			break;
+			exit(0);
 		case 's':
 			perf_top__prompt_symbol(top, "Enter details symbol");
 			break;
@@ -543,8 +560,6 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c)
 		default:
 			break;
 	}
-
-	return ret;
 }
 
 static void perf_top__sort_new_samples(void *arg)
@@ -581,12 +596,13 @@ static void *display_thread_tui(void *arg)
 	 * via --uid.
 	 */
 	list_for_each_entry(pos, &top->evlist->entries, node)
-		pos->hists.uid_filter_str = top->record_opts.target.uid_str;
+		pos->hists.uid_filter_str = top->target.uid_str;
 
 	perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
 				      &top->session->header.env);
 
-	done = 1;
+	exit_browser(0);
+	exit(0);
 	return NULL;
 }
 
@@ -610,7 +626,7 @@ static void *display_thread(void *arg)
 	/* trash return*/
 	getc(stdin);
 
-	while (!done) {
+	while (1) {
 		perf_top__print_sym_table(top);
 		/*
 		 * Either timeout expired or we got an EINTR due to SIGWINCH,
@@ -624,14 +640,15 @@ static void *display_thread(void *arg)
 				continue;
 			/* Fall trhu */
 		default:
-			c = getc(stdin);
-			tcsetattr(0, TCSAFLUSH, &save);
-
-			if (perf_top__handle_keypress(top, c))
-				goto repeat;
-			done = 1;
+			goto process_hotkey;
 		}
 	}
+process_hotkey:
+	c = getc(stdin);
+	tcsetattr(0, TCSAFLUSH, &save);
+
+	perf_top__handle_keypress(top, c);
+	goto repeat;
 
 	return NULL;
 }
@@ -699,7 +716,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
 		static struct intlist *seen;
 
 		if (!seen)
-			seen = intlist__new(NULL);
+			seen = intlist__new();
 
 		if (!intlist__has_entry(seen, event->ip.pid)) {
 			pr_err("Can't find guest [%d]'s kernel information\n",
@@ -710,8 +727,8 @@ static void perf_event__process_sample(struct perf_tool *tool,
 	}
 
 	if (!machine) {
-		pr_err("%u unprocessable samples recorded.\r",
-		       top->session->stats.nr_unprocessable_samples++);
+		pr_err("%u unprocessable samples recorded.",
+		       top->session->hists.stats.nr_unprocessable_samples++);
 		return;
 	}
 
@@ -830,13 +847,13 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
 			++top->us_samples;
 			if (top->hide_user_symbols)
 				continue;
-			machine = &session->machines.host;
+			machine = perf_session__find_host_machine(session);
 			break;
 		case PERF_RECORD_MISC_KERNEL:
 			++top->kernel_samples;
 			if (top->hide_kernel_symbols)
 				continue;
-			machine = &session->machines.host;
+			machine = perf_session__find_host_machine(session);
 			break;
 		case PERF_RECORD_MISC_GUEST_KERNEL:
 			++top->guest_kernel_samples;
@@ -861,7 +878,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
 			hists__inc_nr_events(&evsel->hists, event->header.type);
 			machine__process_event(machine, event);
 		} else
-			++session->stats.nr_unknown_events;
+			++session->hists.stats.nr_unknown_events;
 	}
 }
 
@@ -873,42 +890,123 @@ static void perf_top__mmap_read(struct perf_top *top)
 		perf_top__mmap_read_idx(top, i);
 }
 
-static int perf_top__start_counters(struct perf_top *top)
+static void perf_top__start_counters(struct perf_top *top)
 {
-	char msg[512];
 	struct perf_evsel *counter;
 	struct perf_evlist *evlist = top->evlist;
-	struct perf_record_opts *opts = &top->record_opts;
 
-	perf_evlist__config(evlist, opts);
+	if (top->group)
+		perf_evlist__set_leader(evlist);
 
 	list_for_each_entry(counter, &evlist->entries, node) {
+		struct perf_event_attr *attr = &counter->attr;
+
+		attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
+
+		if (top->freq) {
+			attr->sample_type |= PERF_SAMPLE_PERIOD;
+			attr->freq	  = 1;
+			attr->sample_freq = top->freq;
+		}
+
+		if (evlist->nr_entries > 1) {
+			attr->sample_type |= PERF_SAMPLE_ID;
+			attr->read_format |= PERF_FORMAT_ID;
+		}
+
+		if (perf_target__has_cpu(&top->target))
+			attr->sample_type |= PERF_SAMPLE_CPU;
+
+		if (symbol_conf.use_callchain)
+			attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
+
+		attr->mmap = 1;
+		attr->comm = 1;
+		attr->inherit = top->inherit;
+fallback_missing_features:
+		if (top->exclude_guest_missing)
+			attr->exclude_guest = attr->exclude_host = 0;
+retry_sample_id:
+		attr->sample_id_all = top->sample_id_all_missing ? 0 : 1;
 try_again:
 		if (perf_evsel__open(counter, top->evlist->cpus,
 				     top->evlist->threads) < 0) {
-			if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
+			int err = errno;
+
+			if (err == EPERM || err == EACCES) {
+				ui__error_paranoid();
+				goto out_err;
+			} else if (err == EINVAL) {
+				if (!top->exclude_guest_missing &&
+				    (attr->exclude_guest || attr->exclude_host)) {
+					pr_debug("Old kernel, cannot exclude "
+						 "guest or host samples.\n");
+					top->exclude_guest_missing = true;
+					goto fallback_missing_features;
+				} else if (!top->sample_id_all_missing) {
+					/*
+					 * Old kernel, no attr->sample_id_type_all field
+					 */
+					top->sample_id_all_missing = true;
+					goto retry_sample_id;
+				}
+			}
+			/*
+			 * If it's cycles then fall back to hrtimer
+			 * based cpu-clock-tick sw counter, which
+			 * is always available even if no PMU support:
+			 */
+			if ((err == ENOENT || err == ENXIO) &&
+			    (attr->type == PERF_TYPE_HARDWARE) &&
+			    (attr->config == PERF_COUNT_HW_CPU_CYCLES)) {
+
 				if (verbose)
-					ui__warning("%s\n", msg);
+					ui__warning("Cycles event not supported,\n"
+						    "trying to fall back to cpu-clock-ticks\n");
+
+				attr->type = PERF_TYPE_SOFTWARE;
+				attr->config = PERF_COUNT_SW_CPU_CLOCK;
+				if (counter->name) {
+					free(counter->name);
+					counter->name = NULL;
+				}
 				goto try_again;
 			}
 
-			perf_evsel__open_strerror(counter, &opts->target,
-						  errno, msg, sizeof(msg));
-			ui__error("%s\n", msg);
+			if (err == ENOENT) {
+				ui__error("The %s event is not supported.\n",
+					  perf_evsel__name(counter));
+				goto out_err;
+			} else if (err == EMFILE) {
+				ui__error("Too many events are opened.\n"
+					    "Try again after reducing the number of events\n");
+				goto out_err;
+			} else if ((err == EOPNOTSUPP) && (attr->precise_ip)) {
+				ui__error("\'precise\' request may not be supported. "
+					  "Try removing 'p' modifier\n");
+				goto out_err;
+			}
+
+			ui__error("The sys_perf_event_open() syscall "
+				    "returned with %d (%s).  /bin/dmesg "
+				    "may provide additional information.\n"
+				    "No CONFIG_PERF_EVENTS=y kernel support "
+				    "configured?\n", err, strerror(err));
 			goto out_err;
 		}
 	}
 
-	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
+	if (perf_evlist__mmap(evlist, top->mmap_pages, false) < 0) {
 		ui__error("Failed to mmap with %d (%s)\n",
 			    errno, strerror(errno));
 		goto out_err;
 	}
 
-	return 0;
+	return;
 
 out_err:
-	return -1;
+	exit_browser(0);
+	exit(0);
 }
 
 static int perf_top__setup_sample_type(struct perf_top *top)
@@ -918,7 +1016,7 @@ static int perf_top__setup_sample_type(struct perf_top *top)
 			ui__error("Selected -g but \"sym\" not present in --sort/-s.");
 			return -EINVAL;
 		}
-	} else if (callchain_param.mode != CHAIN_NONE) {
+	} else if (!top->dont_use_callchains && callchain_param.mode != CHAIN_NONE) {
 		if (callchain_register_param(&callchain_param) < 0) {
 			ui__error("Can't register callchain params.\n");
 			return -EINVAL;
@@ -930,7 +1028,6 @@ static int perf_top__setup_sample_type(struct perf_top *top)
 
 static int __cmd_top(struct perf_top *top)
 {
-	struct perf_record_opts *opts = &top->record_opts;
 	pthread_t thread;
 	int ret;
 	/*
@@ -945,42 +1042,26 @@ static int __cmd_top(struct perf_top *top)
 	if (ret)
 		goto out_delete;
 
-	if (perf_target__has_task(&opts->target))
+	if (perf_target__has_task(&top->target))
 		perf_event__synthesize_thread_map(&top->tool, top->evlist->threads,
 						  perf_event__process,
-						  &top->session->machines.host);
+						  &top->session->host_machine);
 	else
 		perf_event__synthesize_threads(&top->tool, perf_event__process,
-					       &top->session->machines.host);
-
-	ret = perf_top__start_counters(top);
-	if (ret)
-		goto out_delete;
-
+					       &top->session->host_machine);
+	perf_top__start_counters(top);
 	top->session->evlist = top->evlist;
 	perf_session__set_id_hdr_size(top->session);
 
-	/*
-	 * When perf is starting the traced process, all the events (apart from
-	 * group members) have enable_on_exec=1 set, so don't spoil it by
-	 * prematurely enabling them.
-	 *
-	 * XXX 'top' still doesn't start workloads like record, trace, but should,
-	 * so leave the check here.
-	 */
-        if (!perf_target__none(&opts->target))
-                perf_evlist__enable(top->evlist);
-
 	/* Wait for a minimal set of events before starting the snapshot */
 	poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
 
 	perf_top__mmap_read(top);
 
-	ret = -1;
 	if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
 							    display_thread), top)) {
 		ui__error("Could not create display thread.\n");
-		goto out_delete;
+		exit(-1);
 	}
 
 	if (top->realtime_prio) {
@@ -989,11 +1070,11 @@ static int __cmd_top(struct perf_top *top)
 		param.sched_priority = top->realtime_prio;
 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
 			ui__error("Could not set realtime priority.\n");
-			goto out_delete;
+			exit(-1);
 		}
 	}
 
-	while (!done) {
+	while (1) {
 		u64 hits = top->samples;
 
 		perf_top__mmap_read(top);
@@ -1002,67 +1083,126 @@ static int __cmd_top(struct perf_top *top)
 			ret = poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
 	}
 
-	ret = 0;
 out_delete:
 	perf_session__delete(top->session);
 	top->session = NULL;
 
-	return ret;
+	return 0;
 }
 
 static int
 parse_callchain_opt(const struct option *opt, const char *arg, int unset)
 {
+	struct perf_top *top = (struct perf_top *)opt->value;
+	char *tok, *tok2;
+	char *endptr;
+
 	/*
 	 * --no-call-graph
 	 */
-	if (unset)
+	if (unset) {
+		top->dont_use_callchains = true;
 		return 0;
+	}
 
 	symbol_conf.use_callchain = true;
 
-	return record_parse_callchain_opt(opt, arg, unset);
+	if (!arg)
+		return 0;
+
+	tok = strtok((char *)arg, ",");
+	if (!tok)
+		return -1;
+
+	/* get the output mode */
+	if (!strncmp(tok, "graph", strlen(arg)))
+		callchain_param.mode = CHAIN_GRAPH_ABS;
+
+	else if (!strncmp(tok, "flat", strlen(arg)))
+		callchain_param.mode = CHAIN_FLAT;
+
+	else if (!strncmp(tok, "fractal", strlen(arg)))
+		callchain_param.mode = CHAIN_GRAPH_REL;
+
+	else if (!strncmp(tok, "none", strlen(arg))) {
+		callchain_param.mode = CHAIN_NONE;
+		symbol_conf.use_callchain = false;
+
+		return 0;
+	} else
+		return -1;
+
+	/* get the min percentage */
+	tok = strtok(NULL, ",");
+	if (!tok)
+		goto setup;
+
+	callchain_param.min_percent = strtod(tok, &endptr);
+	if (tok == endptr)
+		return -1;
+
+	/* get the print limit */
+	tok2 = strtok(NULL, ",");
+	if (!tok2)
+		goto setup;
+
+	if (tok2[0] != 'c') {
+		callchain_param.print_limit = strtod(tok2, &endptr);
+		tok2 = strtok(NULL, ",");
+		if (!tok2)
+			goto setup;
+	}
+
+	/* get the call chain order */
+	if (!strcmp(tok2, "caller"))
+		callchain_param.order = ORDER_CALLER;
+	else if (!strcmp(tok2, "callee"))
+		callchain_param.order = ORDER_CALLEE;
+	else
+		return -1;
+setup:
+	if (callchain_register_param(&callchain_param) < 0) {
+		fprintf(stderr, "Can't register callchain params\n");
+		return -1;
+	}
+	return 0;
 }
 
 int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 {
+	struct perf_evsel *pos;
 	int status;
 	char errbuf[BUFSIZ];
 	struct perf_top top = {
 		.count_filter	     = 5,
 		.delay_secs	     = 2,
-		.record_opts = {
-			.mmap_pages	= UINT_MAX,
-			.user_freq	= UINT_MAX,
-			.user_interval	= ULLONG_MAX,
-			.freq		= 4000, /* 4 KHz */
-			.target		     = {
-				.uses_mmap   = true,
-			},
-		},
+		.freq		     = 4000, /* 4 KHz */
+		.mmap_pages	     = 128,
 		.sym_pcnt_filter     = 5,
+		.target		     = {
+			.uses_mmap   = true,
+		},
 	};
-	struct perf_record_opts *opts = &top.record_opts;
-	struct perf_target *target = &opts->target;
+	char callchain_default_opt[] = "fractal,0.5,callee";
 	const struct option options[] = {
 	OPT_CALLBACK('e', "event", &top.evlist, "event",
 		     "event selector. use 'perf list' to list available events",
 		     parse_events_option),
-	OPT_U64('c', "count", &opts->user_interval, "event period to sample"),
-	OPT_STRING('p', "pid", &target->pid, "pid",
+	OPT_INTEGER('c', "count", &top.default_interval,
+		    "event period to sample"),
+	OPT_STRING('p', "pid", &top.target.pid, "pid",
 		    "profile events on existing process id"),
-	OPT_STRING('t', "tid", &target->tid, "tid",
+	OPT_STRING('t', "tid", &top.target.tid, "tid",
 		    "profile events on existing thread id"),
-	OPT_BOOLEAN('a', "all-cpus", &target->system_wide,
+	OPT_BOOLEAN('a', "all-cpus", &top.target.system_wide,
 			    "system-wide collection from all CPUs"),
-	OPT_STRING('C', "cpu", &target->cpu_list, "cpu",
+	OPT_STRING('C', "cpu", &top.target.cpu_list, "cpu",
 		    "list of cpus to monitor"),
 	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
 		   "file", "vmlinux pathname"),
 	OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
 		    "hide kernel symbols"),
-	OPT_UINTEGER('m', "mmap-pages", &opts->mmap_pages,
-		     "number of mmap data pages"),
+	OPT_UINTEGER('m', "mmap-pages", &top.mmap_pages, "number of mmap data pages"),
 	OPT_INTEGER('r', "realtime", &top.realtime_prio,
 		    "collect data with this RT SCHED_FIFO priority"),
 	OPT_INTEGER('d', "delay", &top.delay_secs,
@@ -1071,14 +1211,16 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 			    "dump the symbol table used for profiling"),
 	OPT_INTEGER('f', "count-filter", &top.count_filter,
 		    "only display functions with more events than this"),
-	OPT_BOOLEAN('g', "group", &opts->group,
+	OPT_BOOLEAN('g', "group", &top.group,
 			    "put the counters into a counter group"),
-	OPT_BOOLEAN('i', "no-inherit", &opts->no_inherit,
-		    "child tasks do not inherit counters"),
+	OPT_BOOLEAN('i', "inherit", &top.inherit,
+		    "child tasks inherit counters"),
 	OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name",
 		    "symbol to annotate"),
-	OPT_BOOLEAN('z', "zero", &top.zero, "zero history across updates"),
-	OPT_UINTEGER('F', "freq", &opts->user_freq, "profile at this frequency"),
+	OPT_BOOLEAN('z', "zero", &top.zero,
+		    "zero history across updates"),
+	OPT_INTEGER('F', "freq", &top.freq,
+		    "profile at this frequency"),
 	OPT_INTEGER('E', "entries", &top.print_entries,
 		    "display this many functions"),
 	OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
@@ -1091,9 +1233,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 		   "sort by key(s): pid, comm, dso, symbol, parent"),
 	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
 		    "Show a column with the number of samples"),
-	OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts,
-			     "mode[,dump_size]", record_callchain_help,
-			     &parse_callchain_opt, "fp"),
+	OPT_CALLBACK_DEFAULT('G', "call-graph", &top, "output_type,min_percent, call_order",
+		     "Display callchains using output_type (graph, flat, fractal, or none), min percent threshold and callchain order. "
+		     "Default: fractal,0.5,callee", &parse_callchain_opt,
+		     callchain_default_opt),
 	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
 		    "Show a column with the sum of periods"),
 	OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
@@ -1108,7 +1251,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 		    "Display raw encoding of assembly instructions (default)"),
 	OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
 		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
-	OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"),
+	OPT_STRING('u', "uid", &top.target.uid_str, "user", "user to profile"),
 	OPT_END()
 	};
 	const char * const top_usage[] = {
@@ -1129,8 +1272,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (sort_order == default_sort_order)
 		sort_order = "dso,symbol";
 
-	if (setup_sorting() < 0)
-		usage_with_options(top_usage, options);
+	setup_sorting(top_usage, options);
 
 	if (top.use_stdio)
 		use_browser = 0;
@@ -1139,33 +1281,33 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	setup_browser(false);
 
-	status = perf_target__validate(target);
+	status = perf_target__validate(&top.target);
 	if (status) {
-		perf_target__strerror(target, status, errbuf, BUFSIZ);
+		perf_target__strerror(&top.target, status, errbuf, BUFSIZ);
 		ui__warning("%s", errbuf);
 	}
 
-	status = perf_target__parse_uid(target);
+	status = perf_target__parse_uid(&top.target);
 	if (status) {
 		int saved_errno = errno;
 
-		perf_target__strerror(target, status, errbuf, BUFSIZ);
+		perf_target__strerror(&top.target, status, errbuf, BUFSIZ);
 		ui__error("%s", errbuf);
 
 		status = -saved_errno;
 		goto out_delete_evlist;
 	}
 
-	if (perf_target__none(target))
-		target->system_wide = true;
+	if (perf_target__none(&top.target))
+		top.target.system_wide = true;
 
-	if (perf_evlist__create_maps(top.evlist, target) < 0)
+	if (perf_evlist__create_maps(top.evlist, &top.target) < 0)
 		usage_with_options(top_usage, options);
 
 	if (!top.evlist->nr_entries &&
 	    perf_evlist__add_default(top.evlist) < 0) {
 		ui__error("Not enough memory for event selector list\n");
-		goto out_delete_maps;
+		return -ENOMEM;
 	}
 
 	symbol_conf.nr_events = top.evlist->nr_entries;
@@ -1173,22 +1315,24 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (top.delay_secs < 1)
 		top.delay_secs = 1;
 
-	if (opts->user_interval != ULLONG_MAX)
-		opts->default_interval = opts->user_interval;
-	if (opts->user_freq != UINT_MAX)
-		opts->freq = opts->user_freq;
-
 	/*
 	 * User specified count overrides default frequency.
 	 */
-	if (opts->default_interval)
-		opts->freq = 0;
-	else if (opts->freq) {
-		opts->default_interval = opts->freq;
+	if (top.default_interval)
+		top.freq = 0;
+	else if (top.freq) {
+		top.default_interval = top.freq;
 	} else {
 		ui__error("frequency and count are zero, aborting\n");
-		status = -EINVAL;
-		goto out_delete_maps;
+		exit(EXIT_FAILURE);
+	}
+
+	list_for_each_entry(pos, &top.evlist->entries, node) {
+		/*
+		 * Fill in the ones not specifically initialized via -c:
+		 */
+		if (!pos->attr.sample_period)
+			pos->attr.sample_period = top.default_interval;
 	}
 
 	top.sym_evsel = perf_evlist__first(top.evlist);
@@ -1221,8 +1365,6 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	status = __cmd_top(&top);
 
-out_delete_maps:
-	perf_evlist__delete_maps(top.evlist);
 out_delete_evlist:
 	perf_evlist__delete(top.evlist);
 
diff --git a/trunk/tools/perf/builtin-trace.c b/trunk/tools/perf/builtin-trace.c
index d222d7fc7e96..7932ffa29889 100644
--- a/trunk/tools/perf/builtin-trace.c
+++ b/trunk/tools/perf/builtin-trace.c
@@ -455,7 +455,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 		goto out_delete_evlist;
 	}
 
-	perf_evlist__config(evlist, &trace->opts);
+	perf_evlist__config_attrs(evlist, &trace->opts);
 
 	signal(SIGCHLD, sig_handler);
 	signal(SIGINT, sig_handler);
diff --git a/trunk/tools/perf/config/feature-tests.mak b/trunk/tools/perf/config/feature-tests.mak
index b4eabb44e381..f5ac77485a4f 100644
--- a/trunk/tools/perf/config/feature-tests.mak
+++ b/trunk/tools/perf/config/feature-tests.mak
@@ -225,14 +225,3 @@ int main(void)
 	return on_exit(NULL, NULL);
 }
 endef
-
-define SOURCE_LIBNUMA
-#include <numa.h>
-#include <numaif.h>
-
-int main(void)
-{
-	numa_available();
-	return 0;
-}
-endef
\ No newline at end of file
diff --git a/trunk/tools/perf/config/utilities.mak b/trunk/tools/perf/config/utilities.mak
index 8ef3bd30a549..e5413125e6bb 100644
--- a/trunk/tools/perf/config/utilities.mak
+++ b/trunk/tools/perf/config/utilities.mak
@@ -13,7 +13,7 @@ newline := $(newline)
 # what should replace a newline when escaping
 # newlines; the default is a bizarre string.
 #
-nl-escape = $(if $(1),$(1),m822df3020w6a44id34bt574ctac44eb9f4n)
+nl-escape = $(or $(1),m822df3020w6a44id34bt574ctac44eb9f4n)
 
 # escape-nl
 #
@@ -173,9 +173,9 @@ _ge-abspath = $(if $(is-executable),$(1))
 # Usage: absolute-executable-path-or-empty = $(call get-executable-or-default,variable,default)
 #
 define get-executable-or-default
-$(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2),$(1)))
+$(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2)))
 endef
-_ge_attempt = $(if $(get-executable),$(get-executable),$(_gea_warn)$(call _gea_err,$(2)))
+_ge_attempt = $(or $(get-executable),$(_gea_warn),$(call _gea_err,$(2)))
 _gea_warn = $(warning The path '$(1)' is not executable.)
 _gea_err  = $(if $(1),$(error Please set '$(1)' appropriately))
 
diff --git a/trunk/tools/perf/perf.c b/trunk/tools/perf/perf.c
index 095b88207cd3..0f661fbce6a8 100644
--- a/trunk/tools/perf/perf.c
+++ b/trunk/tools/perf/perf.c
@@ -328,23 +328,14 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
 	if (S_ISFIFO(st.st_mode) || S_ISSOCK(st.st_mode))
 		return 0;
 
-	status = 1;
 	/* Check for ENOSPC and EIO errors.. */
-	if (fflush(stdout)) {
-		fprintf(stderr, "write failure on standard output: %s", strerror(errno));
-		goto out;
-	}
-	if (ferror(stdout)) {
-		fprintf(stderr, "unknown write failure on standard output");
-		goto out;
-	}
-	if (fclose(stdout)) {
-		fprintf(stderr, "close failed on standard output: %s", strerror(errno));
-		goto out;
-	}
-	status = 0;
-out:
-	return status;
+	if (fflush(stdout))
+		die("write failure on standard output: %s", strerror(errno));
+	if (ferror(stdout))
+		die("unknown write failure on standard output");
+	if (fclose(stdout))
+		die("close failed on standard output: %s", strerror(errno));
+	return 0;
 }
 
 static void handle_internal_command(int argc, const char **argv)
@@ -476,8 +467,7 @@ int main(int argc, const char **argv)
 		cmd += 5;
 		argv[0] = cmd;
 		handle_internal_command(argc, argv);
-		fprintf(stderr, "cannot handle %s internally", cmd);
-		goto out;
+		die("cannot handle %s internally", cmd);
 	}
 
 	/* Look for flags.. */
@@ -495,7 +485,7 @@ int main(int argc, const char **argv)
 		printf("\n usage: %s\n\n", perf_usage_string);
 		list_common_cmds_help();
 		printf("\n %s\n\n", perf_more_info_string);
-		goto out;
+		exit(1);
 	}
 	cmd = argv[0];
 
@@ -527,7 +517,7 @@ int main(int argc, const char **argv)
 			fprintf(stderr, "Expansion of alias '%s' failed; "
 				"'%s' is not a perf-command\n",
 				cmd, argv[0]);
-			goto out;
+			exit(1);
 		}
 		if (!done_help) {
 			cmd = argv[0] = help_unknown_cmd(cmd);
@@ -538,6 +528,6 @@ int main(int argc, const char **argv)
 
 	fprintf(stderr, "Failed to run command '%s': %s\n",
 		cmd, strerror(errno));
-out:
+
 	return 1;
 }
diff --git a/trunk/tools/perf/perf.h b/trunk/tools/perf/perf.h
index c2206c87fc9f..2c340e7da458 100644
--- a/trunk/tools/perf/perf.h
+++ b/trunk/tools/perf/perf.h
@@ -1,6 +1,10 @@
 #ifndef _PERF_PERF_H
 #define _PERF_PERF_H
 
+struct winsize;
+
+void get_term_dimensions(struct winsize *ws);
+
 #include <asm/unistd.h>
 
 #if defined(__i386__)
@@ -103,6 +107,32 @@
 #include "util/types.h"
 #include <stdbool.h>
 
+struct perf_mmap {
+	void			*base;
+	int			mask;
+	unsigned int		prev;
+};
+
+static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm)
+{
+	struct perf_event_mmap_page *pc = mm->base;
+	int head = pc->data_head;
+	rmb();
+	return head;
+}
+
+static inline void perf_mmap__write_tail(struct perf_mmap *md,
+					 unsigned long tail)
+{
+	struct perf_event_mmap_page *pc = md->base;
+
+	/*
+	 * ensure all reads are done before we write the tail out.
+	 */
+	/* mb(); */
+	pc->data_tail = tail;
+}
+
 /*
  * prctl(PR_TASK_PERF_EVENTS_DISABLE) will (cheaply) disable all
  * counters in the current task.
@@ -207,6 +237,8 @@ struct perf_record_opts {
 	bool	     raw_samples;
 	bool	     sample_address;
 	bool	     sample_time;
+	bool	     sample_id_all_missing;
+	bool	     exclude_guest_missing;
 	bool	     period;
 	unsigned int freq;
 	unsigned int mmap_pages;
diff --git a/trunk/tools/perf/scripts/perl/bin/workqueue-stats-record b/trunk/tools/perf/scripts/perl/bin/workqueue-stats-record
new file mode 100644
index 000000000000..8edda9078d5d
--- /dev/null
+++ b/trunk/tools/perf/scripts/perl/bin/workqueue-stats-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -e workqueue:workqueue_creation -e workqueue:workqueue_destruction -e workqueue:workqueue_execution -e workqueue:workqueue_insertion $@
diff --git a/trunk/tools/perf/scripts/perl/bin/workqueue-stats-report b/trunk/tools/perf/scripts/perl/bin/workqueue-stats-report
new file mode 100644
index 000000000000..6d91411d248c
--- /dev/null
+++ b/trunk/tools/perf/scripts/perl/bin/workqueue-stats-report
@@ -0,0 +1,3 @@
+#!/bin/bash
+# description: workqueue stats (ins/exe/create/destroy)
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/workqueue-stats.pl
diff --git a/trunk/tools/perf/scripts/perl/rwtop.pl b/trunk/tools/perf/scripts/perl/rwtop.pl
index 8b20787021c1..4bb3ecd33472 100644
--- a/trunk/tools/perf/scripts/perl/rwtop.pl
+++ b/trunk/tools/perf/scripts/perl/rwtop.pl
@@ -17,7 +17,6 @@
 use lib "./Perf-Trace-Util/lib";
 use Perf::Trace::Core;
 use Perf::Trace::Util;
-use POSIX qw/SIGALRM SA_RESTART/;
 
 my $default_interval = 3;
 my $nlines = 20;
@@ -91,10 +90,7 @@ sub syscalls::sys_enter_write
 
 sub trace_begin
 {
-    my $sa = POSIX::SigAction->new(\&set_print_pending);
-    $sa->flags(SA_RESTART);
-    $sa->safe(1);
-    POSIX::sigaction(SIGALRM, $sa) or die "Can't set SIGALRM handler: $!\n";
+    $SIG{ALRM} = \&set_print_pending;
     alarm 1;
 }
 
diff --git a/trunk/tools/perf/scripts/perl/workqueue-stats.pl b/trunk/tools/perf/scripts/perl/workqueue-stats.pl
new file mode 100644
index 000000000000..a8eaff5119e0
--- /dev/null
+++ b/trunk/tools/perf/scripts/perl/workqueue-stats.pl
@@ -0,0 +1,129 @@
+#!/usr/bin/perl -w
+# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+
+# Displays workqueue stats
+#
+# Usage:
+#
+#   perf record -c 1 -f -a -R -e workqueue:workqueue_creation -e
+#     workqueue:workqueue_destruction -e workqueue:workqueue_execution
+#     -e workqueue:workqueue_insertion
+#
+#   perf script -p -s tools/perf/scripts/perl/workqueue-stats.pl
+
+use 5.010000;
+use strict;
+use warnings;
+
+use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+use lib "./Perf-Trace-Util/lib";
+use Perf::Trace::Core;
+use Perf::Trace::Util;
+
+my @cpus;
+
+sub workqueue::workqueue_destruction
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm,
+	$thread_comm, $thread_pid) = @_;
+
+    $cpus[$common_cpu]{$thread_pid}{destroyed}++;
+    $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm;
+}
+
+sub workqueue::workqueue_creation
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm,
+	$thread_comm, $thread_pid, $cpu) = @_;
+
+    $cpus[$common_cpu]{$thread_pid}{created}++;
+    $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm;
+}
+
+sub workqueue::workqueue_execution
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm,
+	$thread_comm, $thread_pid, $func) = @_;
+
+    $cpus[$common_cpu]{$thread_pid}{executed}++;
+    $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm;
+}
+
+sub workqueue::workqueue_insertion
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm,
+	$thread_comm, $thread_pid, $func) = @_;
+
+    $cpus[$common_cpu]{$thread_pid}{inserted}++;
+    $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm;
+}
+
+sub trace_end
+{
+    print "workqueue work stats:\n\n";
+    my $cpu = 0;
+    printf("%3s %6s %6s\t%-20s\n", "cpu", "ins", "exec", "name");
+    printf("%3s %6s %6s\t%-20s\n", "---", "---", "----", "----");
+    foreach my $pidhash (@cpus) {
+	while ((my $pid, my $wqhash) = each %$pidhash) {
+	    my $ins = $$wqhash{'inserted'} || 0;
+	    my $exe = $$wqhash{'executed'} || 0;
+	    my $comm = $$wqhash{'comm'} || "";
+	    if ($ins || $exe) {
+		printf("%3u %6u %6u\t%-20s\n", $cpu, $ins, $exe, $comm);
+	    }
+	}
+	$cpu++;
+    }
+
+    $cpu = 0;
+    print "\nworkqueue lifecycle stats:\n\n";
+    printf("%3s %6s %6s\t%-20s\n", "cpu", "created", "destroyed", "name");
+    printf("%3s %6s %6s\t%-20s\n", "---", "-------", "---------", "----");
+    foreach my $pidhash (@cpus) {
+	while ((my $pid, my $wqhash) = each %$pidhash) {
+	    my $created = $$wqhash{'created'} || 0;
+	    my $destroyed = $$wqhash{'destroyed'} || 0;
+	    my $comm = $$wqhash{'comm'} || "";
+	    if ($created || $destroyed) {
+		printf("%3u %6u %6u\t%-20s\n", $cpu, $created, $destroyed,
+		       $comm);
+	    }
+	}
+	$cpu++;
+    }
+
+    print_unhandled();
+}
+
+my %unhandled;
+
+sub print_unhandled
+{
+    if ((scalar keys %unhandled) == 0) {
+	return;
+    }
+
+    print "\nunhandled events:\n\n";
+
+    printf("%-40s  %10s\n", "event", "count");
+    printf("%-40s  %10s\n", "----------------------------------------",
+	   "-----------");
+
+    foreach my $event_name (keys %unhandled) {
+	printf("%-40s  %10d\n", $event_name, $unhandled{$event_name});
+    }
+}
+
+sub trace_unhandled
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm) = @_;
+
+    $unhandled{$event_name}++;
+}
diff --git a/trunk/tools/perf/tests/attr.c b/trunk/tools/perf/tests/attr.c
index bdcceb886f77..25638a986257 100644
--- a/trunk/tools/perf/tests/attr.c
+++ b/trunk/tools/perf/tests/attr.c
@@ -19,11 +19,6 @@
  * permissions. All the event text files are stored there.
  */
 
-/*
- * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select
- * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu.
- */
-#define __SANE_USERSPACE_TYPES__
 #include <stdlib.h>
 #include <stdio.h>
 #include <inttypes.h>
@@ -38,6 +33,8 @@
 
 extern int verbose;
 
+bool test_attr__enabled;
+
 static char *dir;
 
 void test_attr__init(void)
@@ -149,7 +146,7 @@ static int run_dir(const char *d, const char *perf)
 {
 	char cmd[3*PATH_MAX];
 
-	snprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %s",
+	snprintf(cmd, 3*PATH_MAX, "python %s/attr.py -d %s/attr/ -p %s %s",
 		 d, d, perf, verbose ? "-v" : "");
 
 	return system(cmd);
diff --git a/trunk/tools/perf/tests/attr.py b/trunk/tools/perf/tests/attr.py
index 2f629ca485bc..e702b82dcb86 100644
--- a/trunk/tools/perf/tests/attr.py
+++ b/trunk/tools/perf/tests/attr.py
@@ -68,7 +68,7 @@ def add(self, data):
             self[key] = val
 
     def __init__(self, name, data, base):
-        log.debug("    Event %s" % name);
+        log.info("    Event %s" % name);
         self.name  = name;
         self.group = ''
         self.add(base)
@@ -97,14 +97,6 @@ def equal(self, other):
                 return False
         return True
 
-    def diff(self, other):
-        for t in Event.terms:
-            if not self.has_key(t) or not other.has_key(t):
-                continue
-            if not self.compare_data(self[t], other[t]):
-		log.warning("expected %s=%s, got %s" % (t, self[t], other[t]))
-                
-
 # Test file description needs to have following sections:
 # [config]
 #   - just single instance in file
@@ -121,7 +113,7 @@ def __init__(self, path, options):
         parser = ConfigParser.SafeConfigParser()
         parser.read(path)
 
-        log.debug("running '%s'" % path)
+        log.warning("running '%s'" % path)
 
         self.path     = path
         self.test_dir = options.test_dir
@@ -136,7 +128,7 @@ def __init__(self, path, options):
 
         self.expect   = {}
         self.result   = {}
-        log.debug("  loading expected events");
+        log.info("  loading expected events");
         self.load_events(path, self.expect)
 
     def is_event(self, name):
@@ -172,7 +164,7 @@ def run_cmd(self, tempdir):
               self.perf, self.command, tempdir, self.args)
         ret = os.WEXITSTATUS(os.system(cmd))
 
-        log.warning("  running '%s' ret %d " % (cmd, ret))
+        log.info("  running '%s' ret %d " % (cmd, ret))
 
         if ret != int(self.ret):
             raise Unsup(self)
@@ -180,7 +172,7 @@ def run_cmd(self, tempdir):
     def compare(self, expect, result):
         match = {}
 
-        log.debug("  compare");
+        log.info("  compare");
 
         # For each expected event find all matching
         # events in result. Fail if there's not any.
@@ -195,11 +187,10 @@ def compare(self, expect, result):
                 else:
                     log.debug("    ->FAIL");
 
-            log.debug("    match: [%s] matches %s" % (exp_name, str(exp_list)))
+            log.info("    match: [%s] matches %s" % (exp_name, str(exp_list)))
 
             # we did not any matching event - fail
             if (not exp_list):
-		exp_event.diff(res_event)
                 raise Fail(self, 'match failure');
 
             match[exp_name] = exp_list
@@ -217,10 +208,10 @@ def compare(self, expect, result):
                 if res_group not in match[group]:
                     raise Fail(self, 'group failure')
 
-                log.debug("    group: [%s] matches group leader %s" %
+                log.info("    group: [%s] matches group leader %s" %
                          (exp_name, str(match[group])))
 
-        log.debug("  matched")
+        log.info("  matched")
 
     def resolve_groups(self, events):
         for name, event in events.items():
@@ -242,7 +233,7 @@ def run(self):
             self.run_cmd(tempdir);
 
             # load events expectation for the test
-            log.debug("  loading result events");
+            log.info("  loading result events");
             for f in glob.glob(tempdir + '/event*'):
                 self.load_events(f, self.result);
 
diff --git a/trunk/tools/perf/tests/attr/base-record b/trunk/tools/perf/tests/attr/base-record
index 5bc3880f7be5..f1485d8e6a0b 100644
--- a/trunk/tools/perf/tests/attr/base-record
+++ b/trunk/tools/perf/tests/attr/base-record
@@ -7,7 +7,7 @@ size=96
 config=0
 sample_period=4000
 sample_type=263
-read_format=0
+read_format=7
 disabled=1
 inherit=1
 pinned=0
diff --git a/trunk/tools/perf/tests/attr/test-record-group b/trunk/tools/perf/tests/attr/test-record-group
index 57739cacdb2a..a6599e9a19d3 100644
--- a/trunk/tools/perf/tests/attr/test-record-group
+++ b/trunk/tools/perf/tests/attr/test-record-group
@@ -6,14 +6,12 @@ args    = --group -e cycles,instructions kill >/dev/null 2>&1
 fd=1
 group_fd=-1
 sample_type=327
-read_format=4
 
 [event-2:base-record]
 fd=2
 group_fd=1
 config=1
 sample_type=327
-read_format=4
 mmap=0
 comm=0
 enable_on_exec=0
diff --git a/trunk/tools/perf/tests/attr/test-record-group1 b/trunk/tools/perf/tests/attr/test-record-group1
index c5548d054aff..5a8359da38af 100644
--- a/trunk/tools/perf/tests/attr/test-record-group1
+++ b/trunk/tools/perf/tests/attr/test-record-group1
@@ -1,12 +1,11 @@
 [config]
 command = record
-args    = -e '{cycles,instructions}' kill >/dev/null 2>&1
+args    = -e '{cycles,instructions}' kill >/tmp/krava 2>&1
 
 [event-1:base-record]
 fd=1
 group_fd=-1
 sample_type=327
-read_format=4
 
 [event-2:base-record]
 fd=2
@@ -14,7 +13,6 @@ group_fd=1
 type=0
 config=1
 sample_type=327
-read_format=4
 mmap=0
 comm=0
 enable_on_exec=0
diff --git a/trunk/tools/perf/tests/builtin-test.c b/trunk/tools/perf/tests/builtin-test.c
index acb98e0e39f2..186f67535494 100644
--- a/trunk/tools/perf/tests/builtin-test.c
+++ b/trunk/tools/perf/tests/builtin-test.c
@@ -4,7 +4,6 @@
  * Builtin regression testing command: ever growing number of sanity tests
  */
 #include "builtin.h"
-#include "intlist.h"
 #include "tests.h"
 #include "debug.h"
 #include "color.h"
@@ -69,14 +68,6 @@ static struct test {
 		.desc = "struct perf_event_attr setup",
 		.func = test__attr,
 	},
-	{
-		.desc = "Test matching and linking mutliple hists",
-		.func = test__hists_link,
-	},
-	{
-		.desc = "Try 'use perf' in python, checking link problems",
-		.func = test__python_use,
-	},
 	{
 		.func = NULL,
 	},
@@ -106,7 +97,7 @@ static bool perf_test__matches(int curr, int argc, const char *argv[])
 	return false;
 }
 
-static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
+static int __cmd_test(int argc, const char *argv[])
 {
 	int i = 0;
 	int width = 0;
@@ -127,28 +118,13 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
 			continue;
 
 		pr_info("%2d: %-*s:", i, width, tests[curr].desc);
-
-		if (intlist__find(skiplist, i)) {
-			color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n");
-			continue;
-		}
-
 		pr_debug("\n--- start ---\n");
 		err = tests[curr].func();
 		pr_debug("---- end ----\n%s:", tests[curr].desc);
-
-		switch (err) {
-		case TEST_OK:
-			pr_info(" Ok\n");
-			break;
-		case TEST_SKIP:
-			color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n");
-			break;
-		case TEST_FAIL:
-		default:
+		if (err)
 			color_fprintf(stderr, PERF_COLOR_RED, " FAILED!\n");
-			break;
-		}
+		else
+			pr_info(" Ok\n");
 	}
 
 	return 0;
@@ -176,14 +152,11 @@ int cmd_test(int argc, const char **argv, const char *prefix __maybe_unused)
 	"perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]",
 	NULL,
 	};
-	const char *skip = NULL;
 	const struct option test_options[] = {
-	OPT_STRING('s', "skip", &skip, "tests", "tests to skip"),
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show symbol address, etc)"),
 	OPT_END()
 	};
-	struct intlist *skiplist = NULL;
 
 	argc = parse_options(argc, argv, test_options, test_usage, 0);
 	if (argc >= 1 && !strcmp(argv[0], "list"))
@@ -196,8 +169,5 @@ int cmd_test(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (symbol__init() < 0)
 		return -1;
 
-	if (skip != NULL)
-		skiplist = intlist__new(skip);
-
-	return __cmd_test(argc, argv, skiplist);
+	return __cmd_test(argc, argv);
 }
diff --git a/trunk/tools/perf/tests/evsel-roundtrip-name.c b/trunk/tools/perf/tests/evsel-roundtrip-name.c
index 0fd99a9adb91..e61fc828a158 100644
--- a/trunk/tools/perf/tests/evsel-roundtrip-name.c
+++ b/trunk/tools/perf/tests/evsel-roundtrip-name.c
@@ -22,7 +22,7 @@ static int perf_evsel__roundtrip_cache_name_test(void)
 			for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
 				__perf_evsel__hw_cache_type_op_res_name(type, op, i,
 									name, sizeof(name));
-				err = parse_events(evlist, name);
+				err = parse_events(evlist, name, 0);
 				if (err)
 					ret = err;
 			}
@@ -70,7 +70,7 @@ static int __perf_evsel__name_array_test(const char *names[], int nr_names)
                 return -ENOMEM;
 
 	for (i = 0; i < nr_names; ++i) {
-		err = parse_events(evlist, names[i]);
+		err = parse_events(evlist, names[i], 0);
 		if (err) {
 			pr_debug("failed to parse event '%s', err %d\n",
 				 names[i], err);
diff --git a/trunk/tools/perf/tests/hists_link.c b/trunk/tools/perf/tests/hists_link.c
deleted file mode 100644
index 1be64a6c5daf..000000000000
--- a/trunk/tools/perf/tests/hists_link.c
+++ /dev/null
@@ -1,500 +0,0 @@
-#include "perf.h"
-#include "tests.h"
-#include "debug.h"
-#include "symbol.h"
-#include "sort.h"
-#include "evsel.h"
-#include "evlist.h"
-#include "machine.h"
-#include "thread.h"
-#include "parse-events.h"
-
-static struct {
-	u32 pid;
-	const char *comm;
-} fake_threads[] = {
-	{ 100, "perf" },
-	{ 200, "perf" },
-	{ 300, "bash" },
-};
-
-static struct {
-	u32 pid;
-	u64 start;
-	const char *filename;
-} fake_mmap_info[] = {
-	{ 100, 0x40000, "perf" },
-	{ 100, 0x50000, "libc" },
-	{ 100, 0xf0000, "[kernel]" },
-	{ 200, 0x40000, "perf" },
-	{ 200, 0x50000, "libc" },
-	{ 200, 0xf0000, "[kernel]" },
-	{ 300, 0x40000, "bash" },
-	{ 300, 0x50000, "libc" },
-	{ 300, 0xf0000, "[kernel]" },
-};
-
-struct fake_sym {
-	u64 start;
-	u64 length;
-	const char *name;
-};
-
-static struct fake_sym perf_syms[] = {
-	{ 700, 100, "main" },
-	{ 800, 100, "run_command" },
-	{ 900, 100, "cmd_record" },
-};
-
-static struct fake_sym bash_syms[] = {
-	{ 700, 100, "main" },
-	{ 800, 100, "xmalloc" },
-	{ 900, 100, "xfree" },
-};
-
-static struct fake_sym libc_syms[] = {
-	{ 700, 100, "malloc" },
-	{ 800, 100, "free" },
-	{ 900, 100, "realloc" },
-};
-
-static struct fake_sym kernel_syms[] = {
-	{ 700, 100, "schedule" },
-	{ 800, 100, "page_fault" },
-	{ 900, 100, "sys_perf_event_open" },
-};
-
-static struct {
-	const char *dso_name;
-	struct fake_sym *syms;
-	size_t nr_syms;
-} fake_symbols[] = {
-	{ "perf", perf_syms, ARRAY_SIZE(perf_syms) },
-	{ "bash", bash_syms, ARRAY_SIZE(bash_syms) },
-	{ "libc", libc_syms, ARRAY_SIZE(libc_syms) },
-	{ "[kernel]", kernel_syms, ARRAY_SIZE(kernel_syms) },
-};
-
-static struct machine *setup_fake_machine(struct machines *machines)
-{
-	struct machine *machine = machines__find(machines, HOST_KERNEL_ID);
-	size_t i;
-
-	if (machine == NULL) {
-		pr_debug("Not enough memory for machine setup\n");
-		return NULL;
-	}
-
-	for (i = 0; i < ARRAY_SIZE(fake_threads); i++) {
-		struct thread *thread;
-
-		thread = machine__findnew_thread(machine, fake_threads[i].pid);
-		if (thread == NULL)
-			goto out;
-
-		thread__set_comm(thread, fake_threads[i].comm);
-	}
-
-	for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) {
-		union perf_event fake_mmap_event = {
-			.mmap = {
-				.header = { .misc = PERF_RECORD_MISC_USER, },
-				.pid = fake_mmap_info[i].pid,
-				.start = fake_mmap_info[i].start,
-				.len = 0x1000ULL,
-				.pgoff = 0ULL,
-			},
-		};
-
-		strcpy(fake_mmap_event.mmap.filename,
-		       fake_mmap_info[i].filename);
-
-		machine__process_mmap_event(machine, &fake_mmap_event);
-	}
-
-	for (i = 0; i < ARRAY_SIZE(fake_symbols); i++) {
-		size_t k;
-		struct dso *dso;
-
-		dso = __dsos__findnew(&machine->user_dsos,
-				      fake_symbols[i].dso_name);
-		if (dso == NULL)
-			goto out;
-
-		/* emulate dso__load() */
-		dso__set_loaded(dso, MAP__FUNCTION);
-
-		for (k = 0; k < fake_symbols[i].nr_syms; k++) {
-			struct symbol *sym;
-			struct fake_sym *fsym = &fake_symbols[i].syms[k];
-
-			sym = symbol__new(fsym->start, fsym->length,
-					  STB_GLOBAL, fsym->name);
-			if (sym == NULL)
-				goto out;
-
-			symbols__insert(&dso->symbols[MAP__FUNCTION], sym);
-		}
-	}
-
-	return machine;
-
-out:
-	pr_debug("Not enough memory for machine setup\n");
-	machine__delete_threads(machine);
-	machine__delete(machine);
-	return NULL;
-}
-
-struct sample {
-	u32 pid;
-	u64 ip;
-	struct thread *thread;
-	struct map *map;
-	struct symbol *sym;
-};
-
-static struct sample fake_common_samples[] = {
-	/* perf [kernel] schedule() */
-	{ .pid = 100, .ip = 0xf0000 + 700, },
-	/* perf [perf]   main() */
-	{ .pid = 200, .ip = 0x40000 + 700, },
-	/* perf [perf]   cmd_record() */
-	{ .pid = 200, .ip = 0x40000 + 900, },
-	/* bash [bash]   xmalloc() */
-	{ .pid = 300, .ip = 0x40000 + 800, },
-	/* bash [libc]   malloc() */
-	{ .pid = 300, .ip = 0x50000 + 700, },
-};
-
-static struct sample fake_samples[][5] = {
-	{
-		/* perf [perf]   run_command() */
-		{ .pid = 100, .ip = 0x40000 + 800, },
-		/* perf [libc]   malloc() */
-		{ .pid = 100, .ip = 0x50000 + 700, },
-		/* perf [kernel] page_fault() */
-		{ .pid = 100, .ip = 0xf0000 + 800, },
-		/* perf [kernel] sys_perf_event_open() */
-		{ .pid = 200, .ip = 0xf0000 + 900, },
-		/* bash [libc]   free() */
-		{ .pid = 300, .ip = 0x50000 + 800, },
-	},
-	{
-		/* perf [libc]   free() */
-		{ .pid = 200, .ip = 0x50000 + 800, },
-		/* bash [libc]   malloc() */
-		{ .pid = 300, .ip = 0x50000 + 700, }, /* will be merged */
-		/* bash [bash]   xfee() */
-		{ .pid = 300, .ip = 0x40000 + 900, },
-		/* bash [libc]   realloc() */
-		{ .pid = 300, .ip = 0x50000 + 900, },
-		/* bash [kernel] page_fault() */
-		{ .pid = 300, .ip = 0xf0000 + 800, },
-	},
-};
-
-static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
-{
-	struct perf_evsel *evsel;
-	struct addr_location al;
-	struct hist_entry *he;
-	struct perf_sample sample = { .cpu = 0, };
-	size_t i = 0, k;
-
-	/*
-	 * each evsel will have 10 samples - 5 common and 5 distinct.
-	 * However the second evsel also has a collapsed entry for
-	 * "bash [libc] malloc" so total 9 entries will be in the tree.
-	 */
-	list_for_each_entry(evsel, &evlist->entries, node) {
-		for (k = 0; k < ARRAY_SIZE(fake_common_samples); k++) {
-			const union perf_event event = {
-				.ip = {
-					.header = {
-						.misc = PERF_RECORD_MISC_USER,
-					},
-					.pid = fake_common_samples[k].pid,
-					.ip  = fake_common_samples[k].ip,
-				},
-			};
-
-			if (perf_event__preprocess_sample(&event, machine, &al,
-							  &sample, 0) < 0)
-				goto out;
-
-			he = __hists__add_entry(&evsel->hists, &al, NULL, 1);
-			if (he == NULL)
-				goto out;
-
-			fake_common_samples[k].thread = al.thread;
-			fake_common_samples[k].map = al.map;
-			fake_common_samples[k].sym = al.sym;
-		}
-
-		for (k = 0; k < ARRAY_SIZE(fake_samples[i]); k++) {
-			const union perf_event event = {
-				.ip = {
-					.header = {
-						.misc = PERF_RECORD_MISC_USER,
-					},
-					.pid = fake_samples[i][k].pid,
-					.ip  = fake_samples[i][k].ip,
-				},
-			};
-
-			if (perf_event__preprocess_sample(&event, machine, &al,
-							  &sample, 0) < 0)
-				goto out;
-
-			he = __hists__add_entry(&evsel->hists, &al, NULL, 1);
-			if (he == NULL)
-				goto out;
-
-			fake_samples[i][k].thread = al.thread;
-			fake_samples[i][k].map = al.map;
-			fake_samples[i][k].sym = al.sym;
-		}
-		i++;
-	}
-
-	return 0;
-
-out:
-	pr_debug("Not enough memory for adding a hist entry\n");
-	return -1;
-}
-
-static int find_sample(struct sample *samples, size_t nr_samples,
-		       struct thread *t, struct map *m, struct symbol *s)
-{
-	while (nr_samples--) {
-		if (samples->thread == t && samples->map == m &&
-		    samples->sym == s)
-			return 1;
-		samples++;
-	}
-	return 0;
-}
-
-static int __validate_match(struct hists *hists)
-{
-	size_t count = 0;
-	struct rb_root *root;
-	struct rb_node *node;
-
-	/*
-	 * Only entries from fake_common_samples should have a pair.
-	 */
-	if (sort__need_collapse)
-		root = &hists->entries_collapsed;
-	else
-		root = hists->entries_in;
-
-	node = rb_first(root);
-	while (node) {
-		struct hist_entry *he;
-
-		he = rb_entry(node, struct hist_entry, rb_node_in);
-
-		if (hist_entry__has_pairs(he)) {
-			if (find_sample(fake_common_samples,
-					ARRAY_SIZE(fake_common_samples),
-					he->thread, he->ms.map, he->ms.sym)) {
-				count++;
-			} else {
-				pr_debug("Can't find the matched entry\n");
-				return -1;
-			}
-		}
-
-		node = rb_next(node);
-	}
-
-	if (count != ARRAY_SIZE(fake_common_samples)) {
-		pr_debug("Invalid count for matched entries: %zd of %zd\n",
-			 count, ARRAY_SIZE(fake_common_samples));
-		return -1;
-	}
-
-	return 0;
-}
-
-static int validate_match(struct hists *leader, struct hists *other)
-{
-	return __validate_match(leader) || __validate_match(other);
-}
-
-static int __validate_link(struct hists *hists, int idx)
-{
-	size_t count = 0;
-	size_t count_pair = 0;
-	size_t count_dummy = 0;
-	struct rb_root *root;
-	struct rb_node *node;
-
-	/*
-	 * Leader hists (idx = 0) will have dummy entries from other,
-	 * and some entries will have no pair.  However every entry
-	 * in other hists should have (dummy) pair.
-	 */
-	if (sort__need_collapse)
-		root = &hists->entries_collapsed;
-	else
-		root = hists->entries_in;
-
-	node = rb_first(root);
-	while (node) {
-		struct hist_entry *he;
-
-		he = rb_entry(node, struct hist_entry, rb_node_in);
-
-		if (hist_entry__has_pairs(he)) {
-			if (!find_sample(fake_common_samples,
-					 ARRAY_SIZE(fake_common_samples),
-					 he->thread, he->ms.map, he->ms.sym) &&
-			    !find_sample(fake_samples[idx],
-					 ARRAY_SIZE(fake_samples[idx]),
-					 he->thread, he->ms.map, he->ms.sym)) {
-				count_dummy++;
-			}
-			count_pair++;
-		} else if (idx) {
-			pr_debug("A entry from the other hists should have pair\n");
-			return -1;
-		}
-
-		count++;
-		node = rb_next(node);
-	}
-
-	/*
-	 * Note that we have a entry collapsed in the other (idx = 1) hists.
-	 */
-	if (idx == 0) {
-		if (count_dummy != ARRAY_SIZE(fake_samples[1]) - 1) {
-			pr_debug("Invalid count of dummy entries: %zd of %zd\n",
-				 count_dummy, ARRAY_SIZE(fake_samples[1]) - 1);
-			return -1;
-		}
-		if (count != count_pair + ARRAY_SIZE(fake_samples[0])) {
-			pr_debug("Invalid count of total leader entries: %zd of %zd\n",
-				 count, count_pair + ARRAY_SIZE(fake_samples[0]));
-			return -1;
-		}
-	} else {
-		if (count != count_pair) {
-			pr_debug("Invalid count of total other entries: %zd of %zd\n",
-				 count, count_pair);
-			return -1;
-		}
-		if (count_dummy > 0) {
-			pr_debug("Other hists should not have dummy entries: %zd\n",
-				 count_dummy);
-			return -1;
-		}
-	}
-
-	return 0;
-}
-
-static int validate_link(struct hists *leader, struct hists *other)
-{
-	return __validate_link(leader, 0) || __validate_link(other, 1);
-}
-
-static void print_hists(struct hists *hists)
-{
-	int i = 0;
-	struct rb_root *root;
-	struct rb_node *node;
-
-	if (sort__need_collapse)
-		root = &hists->entries_collapsed;
-	else
-		root = hists->entries_in;
-
-	pr_info("----- %s --------\n", __func__);
-	node = rb_first(root);
-	while (node) {
-		struct hist_entry *he;
-
-		he = rb_entry(node, struct hist_entry, rb_node_in);
-
-		pr_info("%2d: entry: %-8s [%-8s] %20s: period = %"PRIu64"\n",
-			i, he->thread->comm, he->ms.map->dso->short_name,
-			he->ms.sym->name, he->stat.period);
-
-		i++;
-		node = rb_next(node);
-	}
-}
-
-int test__hists_link(void)
-{
-	int err = -1;
-	struct machines machines;
-	struct machine *machine = NULL;
-	struct perf_evsel *evsel, *first;
-        struct perf_evlist *evlist = perf_evlist__new(NULL, NULL);
-
-	if (evlist == NULL)
-                return -ENOMEM;
-
-	err = parse_events(evlist, "cpu-clock");
-	if (err)
-		goto out;
-	err = parse_events(evlist, "task-clock");
-	if (err)
-		goto out;
-
-	/* default sort order (comm,dso,sym) will be used */
-	if (setup_sorting() < 0)
-		goto out;
-
-	machines__init(&machines);
-
-	/* setup threads/dso/map/symbols also */
-	machine = setup_fake_machine(&machines);
-	if (!machine)
-		goto out;
-
-	if (verbose > 1)
-		machine__fprintf(machine, stderr);
-
-	/* process sample events */
-	err = add_hist_entries(evlist, machine);
-	if (err < 0)
-		goto out;
-
-	list_for_each_entry(evsel, &evlist->entries, node) {
-		hists__collapse_resort(&evsel->hists);
-
-		if (verbose > 2)
-			print_hists(&evsel->hists);
-	}
-
-	first = perf_evlist__first(evlist);
-	evsel = perf_evlist__last(evlist);
-
-	/* match common entries */
-	hists__match(&first->hists, &evsel->hists);
-	err = validate_match(&first->hists, &evsel->hists);
-	if (err)
-		goto out;
-
-	/* link common and/or dummy entries */
-	hists__link(&first->hists, &evsel->hists);
-	err = validate_link(&first->hists, &evsel->hists);
-	if (err)
-		goto out;
-
-	err = 0;
-
-out:
-	/* tear down everything */
-	perf_evlist__delete(evlist);
-	machines__exit(&machines);
-
-	return err;
-}
diff --git a/trunk/tools/perf/tests/mmap-basic.c b/trunk/tools/perf/tests/mmap-basic.c
index cdd50755af51..e1746811e14b 100644
--- a/trunk/tools/perf/tests/mmap-basic.c
+++ b/trunk/tools/perf/tests/mmap-basic.c
@@ -22,16 +22,36 @@ int test__basic_mmap(void)
 	struct thread_map *threads;
 	struct cpu_map *cpus;
 	struct perf_evlist *evlist;
+	struct perf_event_attr attr = {
+		.type		= PERF_TYPE_TRACEPOINT,
+		.read_format	= PERF_FORMAT_ID,
+		.sample_type	= PERF_SAMPLE_ID,
+		.watermark	= 0,
+	};
 	cpu_set_t cpu_set;
 	const char *syscall_names[] = { "getsid", "getppid", "getpgrp",
 					"getpgid", };
 	pid_t (*syscalls[])(void) = { (void *)getsid, getppid, getpgrp,
 				      (void*)getpgid };
 #define nsyscalls ARRAY_SIZE(syscall_names)
+	int ids[nsyscalls];
 	unsigned int nr_events[nsyscalls],
 		     expected_nr_events[nsyscalls], i, j;
 	struct perf_evsel *evsels[nsyscalls], *evsel;
 
+	for (i = 0; i < nsyscalls; ++i) {
+		char name[64];
+
+		snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]);
+		ids[i] = trace_event__id(name);
+		if (ids[i] < 0) {
+			pr_debug("Is debugfs mounted on /sys/kernel/debug?\n");
+			return -1;
+		}
+		nr_events[i] = 0;
+		expected_nr_events[i] = random() % 257;
+	}
+
 	threads = thread_map__new(-1, getpid(), UINT_MAX);
 	if (threads == NULL) {
 		pr_debug("thread_map__new\n");
@@ -59,19 +79,18 @@ int test__basic_mmap(void)
 		goto out_free_cpus;
 	}
 
-	for (i = 0; i < nsyscalls; ++i) {
-		char name[64];
+	/* anonymous union fields, can't be initialized above */
+	attr.wakeup_events = 1;
+	attr.sample_period = 1;
 
-		snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]);
-		evsels[i] = perf_evsel__newtp("syscalls", name, i);
+	for (i = 0; i < nsyscalls; ++i) {
+		attr.config = ids[i];
+		evsels[i] = perf_evsel__new(&attr, i);
 		if (evsels[i] == NULL) {
 			pr_debug("perf_evsel__new\n");
 			goto out_free_evlist;
 		}
 
-		evsels[i]->attr.wakeup_events = 1;
-		perf_evsel__set_sample_id(evsels[i]);
-
 		perf_evlist__add(evlist, evsels[i]);
 
 		if (perf_evsel__open(evsels[i], cpus, threads) < 0) {
@@ -80,9 +99,6 @@ int test__basic_mmap(void)
 				 strerror(errno));
 			goto out_close_fd;
 		}
-
-		nr_events[i] = 0;
-		expected_nr_events[i] = 1 + rand() % 127;
 	}
 
 	if (perf_evlist__mmap(evlist, 128, true) < 0) {
@@ -112,7 +128,6 @@ int test__basic_mmap(void)
 			goto out_munmap;
 		}
 
-		err = -1;
 		evsel = perf_evlist__id2evsel(evlist, sample.id);
 		if (evsel == NULL) {
 			pr_debug("event with id %" PRIu64
@@ -122,17 +137,16 @@ int test__basic_mmap(void)
 		nr_events[evsel->idx]++;
 	}
 
-	err = 0;
 	list_for_each_entry(evsel, &evlist->entries, node) {
 		if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) {
 			pr_debug("expected %d %s events, got %d\n",
 				 expected_nr_events[evsel->idx],
 				 perf_evsel__name(evsel), nr_events[evsel->idx]);
-			err = -1;
 			goto out_munmap;
 		}
 	}
 
+	err = 0;
 out_munmap:
 	perf_evlist__munmap(evlist);
 out_close_fd:
diff --git a/trunk/tools/perf/tests/open-syscall-all-cpus.c b/trunk/tools/perf/tests/open-syscall-all-cpus.c
index b0657a9ccda6..31072aba0d54 100644
--- a/trunk/tools/perf/tests/open-syscall-all-cpus.c
+++ b/trunk/tools/perf/tests/open-syscall-all-cpus.c
@@ -7,12 +7,20 @@
 int test__open_syscall_event_on_all_cpus(void)
 {
 	int err = -1, fd, cpu;
+	struct thread_map *threads;
 	struct cpu_map *cpus;
 	struct perf_evsel *evsel;
+	struct perf_event_attr attr;
 	unsigned int nr_open_calls = 111, i;
 	cpu_set_t cpu_set;
-	struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX);
+	int id = trace_event__id("sys_enter_open");
 
+	if (id < 0) {
+		pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
+		return -1;
+	}
+
+	threads = thread_map__new(-1, getpid(), UINT_MAX);
 	if (threads == NULL) {
 		pr_debug("thread_map__new\n");
 		return -1;
@@ -24,11 +32,15 @@ int test__open_syscall_event_on_all_cpus(void)
 		goto out_thread_map_delete;
 	}
 
+
 	CPU_ZERO(&cpu_set);
 
-	evsel = perf_evsel__newtp("syscalls", "sys_enter_open", 0);
+	memset(&attr, 0, sizeof(attr));
+	attr.type = PERF_TYPE_TRACEPOINT;
+	attr.config = id;
+	evsel = perf_evsel__new(&attr, 0);
 	if (evsel == NULL) {
-		pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
+		pr_debug("perf_evsel__new\n");
 		goto out_thread_map_delete;
 	}
 
@@ -98,7 +110,6 @@ int test__open_syscall_event_on_all_cpus(void)
 		}
 	}
 
-	perf_evsel__free_counts(evsel);
 out_close_fd:
 	perf_evsel__close_fd(evsel, 1, threads->nr);
 out_evsel_delete:
diff --git a/trunk/tools/perf/tests/open-syscall.c b/trunk/tools/perf/tests/open-syscall.c
index befc0671f95d..98be8b518b4f 100644
--- a/trunk/tools/perf/tests/open-syscall.c
+++ b/trunk/tools/perf/tests/open-syscall.c
@@ -6,18 +6,29 @@
 int test__open_syscall_event(void)
 {
 	int err = -1, fd;
+	struct thread_map *threads;
 	struct perf_evsel *evsel;
+	struct perf_event_attr attr;
 	unsigned int nr_open_calls = 111, i;
-	struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX);
+	int id = trace_event__id("sys_enter_open");
 
+	if (id < 0) {
+		pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
+		return -1;
+	}
+
+	threads = thread_map__new(-1, getpid(), UINT_MAX);
 	if (threads == NULL) {
 		pr_debug("thread_map__new\n");
 		return -1;
 	}
 
-	evsel = perf_evsel__newtp("syscalls", "sys_enter_open", 0);
+	memset(&attr, 0, sizeof(attr));
+	attr.type = PERF_TYPE_TRACEPOINT;
+	attr.config = id;
+	evsel = perf_evsel__new(&attr, 0);
 	if (evsel == NULL) {
-		pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
+		pr_debug("perf_evsel__new\n");
 		goto out_thread_map_delete;
 	}
 
diff --git a/trunk/tools/perf/tests/parse-events.c b/trunk/tools/perf/tests/parse-events.c
index c5636f36fe31..32ee478905eb 100644
--- a/trunk/tools/perf/tests/parse-events.c
+++ b/trunk/tools/perf/tests/parse-events.c
@@ -3,7 +3,6 @@
 #include "evsel.h"
 #include "evlist.h"
 #include "sysfs.h"
-#include "debugfs.h"
 #include "tests.h"
 #include <linux/hw_breakpoint.h>
 
@@ -23,7 +22,6 @@ static int test__checkevent_tracepoint(struct perf_evlist *evlist)
 	struct perf_evsel *evsel = perf_evlist__first(evlist);
 
 	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
-	TEST_ASSERT_VAL("wrong number of groups", 0 == evlist->nr_groups);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type);
 	TEST_ASSERT_VAL("wrong sample_type",
 		PERF_TP_SAMPLE_TYPE == evsel->attr.sample_type);
@@ -36,7 +34,6 @@ static int test__checkevent_tracepoint_multi(struct perf_evlist *evlist)
 	struct perf_evsel *evsel;
 
 	TEST_ASSERT_VAL("wrong number of entries", evlist->nr_entries > 1);
-	TEST_ASSERT_VAL("wrong number of groups", 0 == evlist->nr_groups);
 
 	list_for_each_entry(evsel, &evlist->entries, node) {
 		TEST_ASSERT_VAL("wrong type",
@@ -466,10 +463,10 @@ static int test__checkevent_pmu_events(struct perf_evlist *evlist)
 
 static int test__checkterms_simple(struct list_head *terms)
 {
-	struct parse_events_term *term;
+	struct parse_events__term *term;
 
 	/* config=10 */
-	term = list_entry(terms->next, struct parse_events_term, list);
+	term = list_entry(terms->next, struct parse_events__term, list);
 	TEST_ASSERT_VAL("wrong type term",
 			term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG);
 	TEST_ASSERT_VAL("wrong type val",
@@ -478,7 +475,7 @@ static int test__checkterms_simple(struct list_head *terms)
 	TEST_ASSERT_VAL("wrong config", !term->config);
 
 	/* config1 */
-	term = list_entry(term->list.next, struct parse_events_term, list);
+	term = list_entry(term->list.next, struct parse_events__term, list);
 	TEST_ASSERT_VAL("wrong type term",
 			term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG1);
 	TEST_ASSERT_VAL("wrong type val",
@@ -487,7 +484,7 @@ static int test__checkterms_simple(struct list_head *terms)
 	TEST_ASSERT_VAL("wrong config", !term->config);
 
 	/* config2=3 */
-	term = list_entry(term->list.next, struct parse_events_term, list);
+	term = list_entry(term->list.next, struct parse_events__term, list);
 	TEST_ASSERT_VAL("wrong type term",
 			term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG2);
 	TEST_ASSERT_VAL("wrong type val",
@@ -496,7 +493,7 @@ static int test__checkterms_simple(struct list_head *terms)
 	TEST_ASSERT_VAL("wrong config", !term->config);
 
 	/* umask=1*/
-	term = list_entry(term->list.next, struct parse_events_term, list);
+	term = list_entry(term->list.next, struct parse_events__term, list);
 	TEST_ASSERT_VAL("wrong type term",
 			term->type_term == PARSE_EVENTS__TERM_TYPE_USER);
 	TEST_ASSERT_VAL("wrong type val",
@@ -512,7 +509,6 @@ static int test__group1(struct perf_evlist *evlist)
 	struct perf_evsel *evsel, *leader;
 
 	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
-	TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
 
 	/* instructions:k */
 	evsel = leader = perf_evlist__first(evlist);
@@ -525,9 +521,7 @@ static int test__group1(struct perf_evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
-	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
 
 	/* cycles:upp */
 	evsel = perf_evsel__next(evsel);
@@ -542,7 +536,6 @@ static int test__group1(struct perf_evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 2);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
 
 	return 0;
 }
@@ -552,7 +545,6 @@ static int test__group2(struct perf_evlist *evlist)
 	struct perf_evsel *evsel, *leader;
 
 	TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->nr_entries);
-	TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
 
 	/* faults + :ku modifier */
 	evsel = leader = perf_evlist__first(evlist);
@@ -565,9 +557,7 @@ static int test__group2(struct perf_evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
-	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
 
 	/* cache-references + :u modifier */
 	evsel = perf_evsel__next(evsel);
@@ -577,11 +567,10 @@ static int test__group2(struct perf_evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
 	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
 	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
-	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
 
 	/* cycles:k */
 	evsel = perf_evsel__next(evsel);
@@ -594,7 +583,7 @@ static int test__group2(struct perf_evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
 
 	return 0;
 }
@@ -604,7 +593,6 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
 	struct perf_evsel *evsel, *leader;
 
 	TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries);
-	TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups);
 
 	/* group1 syscalls:sys_enter_open:H */
 	evsel = leader = perf_evlist__first(evlist);
@@ -618,11 +606,9 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
 	TEST_ASSERT_VAL("wrong group name",
 		!strcmp(leader->group_name, "group1"));
-	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
 
 	/* group1 cycles:kppp */
 	evsel = perf_evsel__next(evsel);
@@ -638,7 +624,6 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 3);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
 	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
 
 	/* group2 cycles + G modifier */
 	evsel = leader = perf_evsel__next(evsel);
@@ -651,11 +636,9 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
 	TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
 	TEST_ASSERT_VAL("wrong group name",
 		!strcmp(leader->group_name, "group2"));
-	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
 
 	/* group2 1:3 + G modifier */
 	evsel = perf_evsel__next(evsel);
@@ -668,7 +651,6 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
 
 	/* instructions:u */
 	evsel = perf_evsel__next(evsel);
@@ -681,7 +663,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
 
 	return 0;
 }
@@ -691,7 +673,6 @@ static int test__group4(struct perf_evlist *evlist __maybe_unused)
 	struct perf_evsel *evsel, *leader;
 
 	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
-	TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
 
 	/* cycles:u + p */
 	evsel = leader = perf_evlist__first(evlist);
@@ -706,9 +687,7 @@ static int test__group4(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 1);
 	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
-	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
 
 	/* instructions:kp + p */
 	evsel = perf_evsel__next(evsel);
@@ -723,7 +702,6 @@ static int test__group4(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 2);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
 
 	return 0;
 }
@@ -733,7 +711,6 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused)
 	struct perf_evsel *evsel, *leader;
 
 	TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries);
-	TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups);
 
 	/* cycles + G */
 	evsel = leader = perf_evlist__first(evlist);
@@ -747,9 +724,7 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
 	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
-	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
 
 	/* instructions + G */
 	evsel = perf_evsel__next(evsel);
@@ -763,7 +738,6 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
 
 	/* cycles:G */
 	evsel = leader = perf_evsel__next(evsel);
@@ -777,9 +751,7 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
 	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
-	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
 
 	/* instructions:G */
 	evsel = perf_evsel__next(evsel);
@@ -793,7 +765,6 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
 
 	/* cycles */
 	evsel = perf_evsel__next(evsel);
@@ -806,235 +777,18 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
-
-	return 0;
-}
-
-static int test__group_gh1(struct perf_evlist *evlist)
-{
-	struct perf_evsel *evsel, *leader;
-
-	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
-	TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
-
-	/* cycles + :H group modifier */
-	evsel = leader = perf_evlist__first(evlist);
-	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
-	TEST_ASSERT_VAL("wrong config",
-			PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
-	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
-	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
-	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
-	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
-	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
-	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
-	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
-	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
-
-	/* cache-misses:G + :H group modifier */
-	evsel = perf_evsel__next(evsel);
-	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
-	TEST_ASSERT_VAL("wrong config",
-			PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
-	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
-	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
-	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
-	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
-	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
-	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
-	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
-
-	return 0;
-}
-
-static int test__group_gh2(struct perf_evlist *evlist)
-{
-	struct perf_evsel *evsel, *leader;
-
-	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
-	TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
-
-	/* cycles + :G group modifier */
-	evsel = leader = perf_evlist__first(evlist);
-	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
-	TEST_ASSERT_VAL("wrong config",
-			PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
-	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
-	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
-	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
-	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
-	TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
-	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
-	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
-	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
-
-	/* cache-misses:H + :G group modifier */
-	evsel = perf_evsel__next(evsel);
-	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
-	TEST_ASSERT_VAL("wrong config",
-			PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
-	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
-	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
-	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
-	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
-	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
-	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
-	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
-
-	return 0;
-}
-
-static int test__group_gh3(struct perf_evlist *evlist)
-{
-	struct perf_evsel *evsel, *leader;
-
-	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
-	TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
-
-	/* cycles:G + :u group modifier */
-	evsel = leader = perf_evlist__first(evlist);
-	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
-	TEST_ASSERT_VAL("wrong config",
-			PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
-	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
-	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
-	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
-	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
-	TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
-	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
-	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
-	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
-
-	/* cache-misses:H + :u group modifier */
-	evsel = perf_evsel__next(evsel);
-	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
-	TEST_ASSERT_VAL("wrong config",
-			PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
-	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
-	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
-	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
-	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
-	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
-	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
-	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
-
-	return 0;
-}
-
-static int test__group_gh4(struct perf_evlist *evlist)
-{
-	struct perf_evsel *evsel, *leader;
-
-	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
-	TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
-
-	/* cycles:G + :uG group modifier */
-	evsel = leader = perf_evlist__first(evlist);
-	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
-	TEST_ASSERT_VAL("wrong config",
-			PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
-	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
-	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
-	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
-	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
-	TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
-	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
-	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
-	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
-	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
-
-	/* cache-misses:H + :uG group modifier */
-	evsel = perf_evsel__next(evsel);
-	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
-	TEST_ASSERT_VAL("wrong config",
-			PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
-	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
-	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
-	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
-	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
-	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
-	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
-	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
-	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel));
 
 	return 0;
 }
 
-static int count_tracepoints(void)
-{
-	char events_path[PATH_MAX];
-	struct dirent *events_ent;
-	DIR *events_dir;
-	int cnt = 0;
-
-	scnprintf(events_path, PATH_MAX, "%s/tracing/events",
-		  debugfs_find_mountpoint());
-
-	events_dir = opendir(events_path);
-
-	TEST_ASSERT_VAL("Can't open events dir", events_dir);
-
-	while ((events_ent = readdir(events_dir))) {
-		char sys_path[PATH_MAX];
-		struct dirent *sys_ent;
-		DIR *sys_dir;
-
-		if (!strcmp(events_ent->d_name, ".")
-		    || !strcmp(events_ent->d_name, "..")
-		    || !strcmp(events_ent->d_name, "enable")
-		    || !strcmp(events_ent->d_name, "header_event")
-		    || !strcmp(events_ent->d_name, "header_page"))
-			continue;
-
-		scnprintf(sys_path, PATH_MAX, "%s/%s",
-			  events_path, events_ent->d_name);
-
-		sys_dir = opendir(sys_path);
-		TEST_ASSERT_VAL("Can't open sys dir", sys_dir);
-
-		while ((sys_ent = readdir(sys_dir))) {
-			if (!strcmp(sys_ent->d_name, ".")
-			    || !strcmp(sys_ent->d_name, "..")
-			    || !strcmp(sys_ent->d_name, "enable")
-			    || !strcmp(sys_ent->d_name, "filter"))
-				continue;
-
-			cnt++;
-		}
-
-		closedir(sys_dir);
-	}
-
-	closedir(events_dir);
-	return cnt;
-}
-
-static int test__all_tracepoints(struct perf_evlist *evlist)
-{
-	TEST_ASSERT_VAL("wrong events count",
-			count_tracepoints() == evlist->nr_entries);
-
-	return test__checkevent_tracepoint_multi(evlist);
-}
-
-struct evlist_test {
+struct test__event_st {
 	const char *name;
 	__u32 type;
 	int (*check)(struct perf_evlist *evlist);
 };
 
-static struct evlist_test test__events[] = {
+static struct test__event_st test__events[] = {
 	[0] = {
 		.name  = "syscalls:sys_enter_open",
 		.check = test__checkevent_tracepoint,
@@ -1167,29 +921,9 @@ static struct evlist_test test__events[] = {
 		.name  = "{cycles,instructions}:G,{cycles:G,instructions:G},cycles",
 		.check = test__group5,
 	},
-	[33] = {
-		.name  = "*:*",
-		.check = test__all_tracepoints,
-	},
-	[34] = {
-		.name  = "{cycles,cache-misses:G}:H",
-		.check = test__group_gh1,
-	},
-	[35] = {
-		.name  = "{cycles,cache-misses:H}:G",
-		.check = test__group_gh2,
-	},
-	[36] = {
-		.name  = "{cycles:G,cache-misses:H}:u",
-		.check = test__group_gh3,
-	},
-	[37] = {
-		.name  = "{cycles:G,cache-misses:H}:uG",
-		.check = test__group_gh4,
-	},
 };
 
-static struct evlist_test test__events_pmu[] = {
+static struct test__event_st test__events_pmu[] = {
 	[0] = {
 		.name  = "cpu/config=10,config1,config2=3,period=1000/u",
 		.check = test__checkevent_pmu,
@@ -1200,20 +934,20 @@ static struct evlist_test test__events_pmu[] = {
 	},
 };
 
-struct terms_test {
+struct test__term {
 	const char *str;
 	__u32 type;
 	int (*check)(struct list_head *terms);
 };
 
-static struct terms_test test__terms[] = {
+static struct test__term test__terms[] = {
 	[0] = {
 		.str   = "config=10,config1,config2=3,umask=1",
 		.check = test__checkterms_simple,
 	},
 };
 
-static int test_event(struct evlist_test *e)
+static int test_event(struct test__event_st *e)
 {
 	struct perf_evlist *evlist;
 	int ret;
@@ -1222,7 +956,7 @@ static int test_event(struct evlist_test *e)
 	if (evlist == NULL)
 		return -ENOMEM;
 
-	ret = parse_events(evlist, e->name);
+	ret = parse_events(evlist, e->name, 0);
 	if (ret) {
 		pr_debug("failed to parse event '%s', err %d\n",
 			 e->name, ret);
@@ -1235,13 +969,13 @@ static int test_event(struct evlist_test *e)
 	return ret;
 }
 
-static int test_events(struct evlist_test *events, unsigned cnt)
+static int test_events(struct test__event_st *events, unsigned cnt)
 {
 	int ret1, ret2 = 0;
 	unsigned i;
 
 	for (i = 0; i < cnt; i++) {
-		struct evlist_test *e = &events[i];
+		struct test__event_st *e = &events[i];
 
 		pr_debug("running test %d '%s'\n", i, e->name);
 		ret1 = test_event(e);
@@ -1252,7 +986,7 @@ static int test_events(struct evlist_test *events, unsigned cnt)
 	return ret2;
 }
 
-static int test_term(struct terms_test *t)
+static int test_term(struct test__term *t)
 {
 	struct list_head *terms;
 	int ret;
@@ -1276,13 +1010,13 @@ static int test_term(struct terms_test *t)
 	return ret;
 }
 
-static int test_terms(struct terms_test *terms, unsigned cnt)
+static int test_terms(struct test__term *terms, unsigned cnt)
 {
 	int ret = 0;
 	unsigned i;
 
 	for (i = 0; i < cnt; i++) {
-		struct terms_test *t = &terms[i];
+		struct test__term *t = &terms[i];
 
 		pr_debug("running test %d '%s'\n", i, t->str);
 		ret = test_term(t);
@@ -1333,7 +1067,7 @@ static int test_pmu_events(void)
 
 	while (!ret && (ent = readdir(dir))) {
 #define MAX_NAME 100
-		struct evlist_test e;
+		struct test__event_st e;
 		char name[MAX_NAME];
 
 		if (!strcmp(ent->d_name, ".") ||
diff --git a/trunk/tools/perf/tests/perf-record.c b/trunk/tools/perf/tests/perf-record.c
index 1e8e5128d0da..70e0d4421df8 100644
--- a/trunk/tools/perf/tests/perf-record.c
+++ b/trunk/tools/perf/tests/perf-record.c
@@ -96,22 +96,22 @@ int test__PERF_RECORD(void)
 	err = perf_evlist__prepare_workload(evlist, &opts, argv);
 	if (err < 0) {
 		pr_debug("Couldn't run the workload!\n");
-		goto out_delete_maps;
+		goto out_delete_evlist;
 	}
 
 	/*
 	 * Config the evsels, setting attr->comm on the first one, etc.
 	 */
 	evsel = perf_evlist__first(evlist);
-	perf_evsel__set_sample_bit(evsel, CPU);
-	perf_evsel__set_sample_bit(evsel, TID);
-	perf_evsel__set_sample_bit(evsel, TIME);
-	perf_evlist__config(evlist, &opts);
+	evsel->attr.sample_type |= PERF_SAMPLE_CPU;
+	evsel->attr.sample_type |= PERF_SAMPLE_TID;
+	evsel->attr.sample_type |= PERF_SAMPLE_TIME;
+	perf_evlist__config_attrs(evlist, &opts);
 
 	err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask);
 	if (err < 0) {
 		pr_debug("sched__get_first_possible_cpu: %s\n", strerror(errno));
-		goto out_delete_maps;
+		goto out_delete_evlist;
 	}
 
 	cpu = err;
@@ -121,7 +121,7 @@ int test__PERF_RECORD(void)
 	 */
 	if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, &cpu_mask) < 0) {
 		pr_debug("sched_setaffinity: %s\n", strerror(errno));
-		goto out_delete_maps;
+		goto out_delete_evlist;
 	}
 
 	/*
@@ -131,7 +131,7 @@ int test__PERF_RECORD(void)
 	err = perf_evlist__open(evlist);
 	if (err < 0) {
 		pr_debug("perf_evlist__open: %s\n", strerror(errno));
-		goto out_delete_maps;
+		goto out_delete_evlist;
 	}
 
 	/*
@@ -142,7 +142,7 @@ int test__PERF_RECORD(void)
 	err = perf_evlist__mmap(evlist, opts.mmap_pages, false);
 	if (err < 0) {
 		pr_debug("perf_evlist__mmap: %s\n", strerror(errno));
-		goto out_delete_maps;
+		goto out_delete_evlist;
 	}
 
 	/*
@@ -305,8 +305,6 @@ int test__PERF_RECORD(void)
 	}
 out_err:
 	perf_evlist__munmap(evlist);
-out_delete_maps:
-	perf_evlist__delete_maps(evlist);
 out_delete_evlist:
 	perf_evlist__delete(evlist);
 out:
diff --git a/trunk/tools/perf/tests/pmu.c b/trunk/tools/perf/tests/pmu.c
index 12b322fa3475..a5f379863b8f 100644
--- a/trunk/tools/perf/tests/pmu.c
+++ b/trunk/tools/perf/tests/pmu.c
@@ -19,8 +19,10 @@ static struct test_format {
 	{ "krava23", "config2:28-29,38\n", },
 };
 
+#define TEST_FORMATS_CNT (sizeof(test_formats) / sizeof(struct test_format))
+
 /* Simulated users input. */
-static struct parse_events_term test_terms[] = {
+static struct parse_events__term test_terms[] = {
 	{
 		.config    = (char *) "krava01",
 		.val.num   = 15,
@@ -76,6 +78,7 @@ static struct parse_events_term test_terms[] = {
 		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
 	},
 };
+#define TERMS_CNT (sizeof(test_terms) / sizeof(struct parse_events__term))
 
 /*
  * Prepare format directory data, exported by kernel
@@ -90,7 +93,7 @@ static char *test_format_dir_get(void)
 	if (!mkdtemp(dir))
 		return NULL;
 
-	for (i = 0; i < ARRAY_SIZE(test_formats); i++) {
+	for (i = 0; i < TEST_FORMATS_CNT; i++) {
 		static char name[PATH_MAX];
 		struct test_format *format = &test_formats[i];
 		FILE *file;
@@ -127,12 +130,14 @@ static struct list_head *test_terms_list(void)
 	static LIST_HEAD(terms);
 	unsigned int i;
 
-	for (i = 0; i < ARRAY_SIZE(test_terms); i++)
+	for (i = 0; i < TERMS_CNT; i++)
 		list_add_tail(&test_terms[i].list, &terms);
 
 	return &terms;
 }
 
+#undef TERMS_CNT
+
 int test__pmu(void)
 {
 	char *format = test_format_dir_get();
diff --git a/trunk/tools/perf/tests/python-use.c b/trunk/tools/perf/tests/python-use.c
deleted file mode 100644
index 7760277c6def..000000000000
--- a/trunk/tools/perf/tests/python-use.c
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Just test if we can load the python binding.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "tests.h"
-
-extern int verbose;
-
-int test__python_use(void)
-{
-	char *cmd;
-	int ret;
-
-	if (asprintf(&cmd, "echo \"import sys ; sys.path.append('%s'); import perf\" | %s %s",
-		     PYTHONPATH, PYTHON, verbose ? "" : "2> /dev/null") < 0)
-		return -1;
-
-	ret = system(cmd) ? -1 : 0;
-	free(cmd);
-	return ret;
-}
diff --git a/trunk/tools/perf/tests/tests.h b/trunk/tools/perf/tests/tests.h
index 5de0be1ff4b6..fc121edab016 100644
--- a/trunk/tools/perf/tests/tests.h
+++ b/trunk/tools/perf/tests/tests.h
@@ -1,12 +1,6 @@
 #ifndef TESTS_H
 #define TESTS_H
 
-enum {
-	TEST_OK   =  0,
-	TEST_FAIL = -1,
-	TEST_SKIP = -2,
-};
-
 /* Tests */
 int test__vmlinux_matches_kallsyms(void);
 int test__open_syscall_event(void);
@@ -21,7 +15,8 @@ int test__pmu(void);
 int test__attr(void);
 int test__dso_data(void);
 int test__parse_events(void);
-int test__hists_link(void);
-int test__python_use(void);
+
+/* Util */
+int trace_event__id(const char *evname);
 
 #endif /* TESTS_H */
diff --git a/trunk/tools/perf/tests/util.c b/trunk/tools/perf/tests/util.c
new file mode 100644
index 000000000000..748f2e8f6961
--- /dev/null
+++ b/trunk/tools/perf/tests/util.c
@@ -0,0 +1,30 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "tests.h"
+#include "debugfs.h"
+
+int trace_event__id(const char *evname)
+{
+	char *filename;
+	int err = -1, fd;
+
+	if (asprintf(&filename,
+		     "%s/syscalls/%s/id",
+		     tracing_events_path, evname) < 0)
+		return -1;
+
+	fd = open(filename, O_RDONLY);
+	if (fd >= 0) {
+		char id[16];
+		if (read(fd, id, sizeof(id)) > 0)
+			err = atoi(id);
+		close(fd);
+	}
+
+	free(filename);
+	return err;
+}
diff --git a/trunk/tools/perf/tests/vmlinux-kallsyms.c b/trunk/tools/perf/tests/vmlinux-kallsyms.c
index 7b4c4d26d1ba..0d1cdbee2f59 100644
--- a/trunk/tools/perf/tests/vmlinux-kallsyms.c
+++ b/trunk/tools/perf/tests/vmlinux-kallsyms.c
@@ -44,7 +44,7 @@ int test__vmlinux_matches_kallsyms(void)
 	 */
 	if (machine__create_kernel_maps(&kallsyms) < 0) {
 		pr_debug("machine__create_kernel_maps ");
-		goto out;
+		return -1;
 	}
 
 	/*
@@ -101,8 +101,7 @@ int test__vmlinux_matches_kallsyms(void)
 	 */
 	if (machine__load_vmlinux_path(&vmlinux, type,
 				       vmlinux_matches_kallsyms_filter) <= 0) {
-		pr_debug("Couldn't find a vmlinux that matches the kernel running on this machine, skipping test\n");
-		err = TEST_SKIP;
+		pr_debug("machine__load_vmlinux_path ");
 		goto out;
 	}
 
@@ -227,7 +226,5 @@ int test__vmlinux_matches_kallsyms(void)
 			map__fprintf(pos, stderr);
 	}
 out:
-	machine__exit(&kallsyms);
-	machine__exit(&vmlinux);
 	return err;
 }
diff --git a/trunk/tools/perf/ui/browser.c b/trunk/tools/perf/ui/browser.c
index 809ea4632a34..4aeb7d5df939 100644
--- a/trunk/tools/perf/ui/browser.c
+++ b/trunk/tools/perf/ui/browser.c
@@ -273,8 +273,6 @@ void ui_browser__hide(struct ui_browser *browser __maybe_unused)
 {
 	pthread_mutex_lock(&ui__lock);
 	ui_helpline__pop();
-	free(browser->helpline);
-	browser->helpline = NULL;
 	pthread_mutex_unlock(&ui__lock);
 }
 
@@ -473,7 +471,7 @@ unsigned int ui_browser__list_head_refresh(struct ui_browser *browser)
 	return row;
 }
 
-static struct ui_browser_colorset {
+static struct ui_browser__colorset {
 	const char *name, *fg, *bg;
 	int colorset;
 } ui_browser__colorsets[] = {
@@ -708,7 +706,7 @@ void ui_browser__init(void)
 	perf_config(ui_browser__color_config, NULL);
 
 	while (ui_browser__colorsets[i].name) {
-		struct ui_browser_colorset *c = &ui_browser__colorsets[i++];
+		struct ui_browser__colorset *c = &ui_browser__colorsets[i++];
 		sltt_set_color(c->colorset, c->name, c->fg, c->bg);
 	}
 
diff --git a/trunk/tools/perf/ui/browsers/annotate.c b/trunk/tools/perf/ui/browsers/annotate.c
index 7dca1555c610..5dab3ca96980 100644
--- a/trunk/tools/perf/ui/browsers/annotate.c
+++ b/trunk/tools/perf/ui/browsers/annotate.c
@@ -182,16 +182,6 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 		ab->selection = dl;
 }
 
-static bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sym)
-{
-	if (!dl || !dl->ins || !ins__is_jump(dl->ins)
-	    || !disasm_line__has_offset(dl)
-	    || dl->ops.target.offset >= symbol__size(sym))
-		return false;
-
-	return true;
-}
-
 static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 {
 	struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
@@ -205,7 +195,8 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 	if (strstr(sym->name, "@plt"))
 		return;
 
-	if (!disasm_line__is_valid_jump(cursor, sym))
+	if (!cursor || !cursor->ins || !ins__is_jump(cursor->ins) ||
+	    !disasm_line__has_offset(cursor))
 		return;
 
 	target = ab->offsets[cursor->ops.target.offset];
@@ -797,9 +788,17 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser
 		struct disasm_line *dl = browser->offsets[offset], *dlt;
 		struct browser_disasm_line *bdlt;
 
-		if (!disasm_line__is_valid_jump(dl, sym))
+		if (!dl || !dl->ins || !ins__is_jump(dl->ins) ||
+		    !disasm_line__has_offset(dl))
 			continue;
 
+		if (dl->ops.target.offset >= size) {
+			ui__error("jump to after symbol!\n"
+				  "size: %zx, jump target: %" PRIx64,
+				  size, dl->ops.target.offset);
+			continue;
+		}
+
 		dlt = browser->offsets[dl->ops.target.offset];
 		/*
  		 * FIXME: Oops, no jump target? Buggy disassembler? Or do we
@@ -922,11 +921,11 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
 
 #define ANNOTATE_CFG(n) \
 	{ .name = #n, .value = &annotate_browser__opts.n, }
-
+	
 /*
  * Keep the entries sorted, they are bsearch'ed
  */
-static struct annotate_config {
+static struct annotate__config {
 	const char *name;
 	bool *value;
 } annotate__configs[] = {
@@ -940,7 +939,7 @@ static struct annotate_config {
 
 static int annotate_config__cmp(const void *name, const void *cfgp)
 {
-	const struct annotate_config *cfg = cfgp;
+	const struct annotate__config *cfg = cfgp;
 
 	return strcmp(name, cfg->name);
 }
@@ -948,7 +947,7 @@ static int annotate_config__cmp(const void *name, const void *cfgp)
 static int annotate__config(const char *var, const char *value,
 			    void *data __maybe_unused)
 {
-	struct annotate_config *cfg;
+	struct annotate__config *cfg;
 	const char *name;
 
 	if (prefixcmp(var, "annotate.") != 0)
@@ -956,7 +955,7 @@ static int annotate__config(const char *var, const char *value,
 
 	name = var + 9;
 	cfg = bsearch(name, annotate__configs, ARRAY_SIZE(annotate__configs),
-		      sizeof(struct annotate_config), annotate_config__cmp);
+		      sizeof(struct annotate__config), annotate_config__cmp);
 
 	if (cfg == NULL)
 		return -1;
diff --git a/trunk/tools/perf/ui/browsers/hists.c b/trunk/tools/perf/ui/browsers/hists.c
index aa22704047d6..ccc4bd161420 100644
--- a/trunk/tools/perf/ui/browsers/hists.c
+++ b/trunk/tools/perf/ui/browsers/hists.c
@@ -567,128 +567,26 @@ static int hist_browser__show_callchain(struct hist_browser *browser,
 	return row - first_row;
 }
 
-struct hpp_arg {
-	struct ui_browser *b;
-	char folded_sign;
-	bool current_entry;
-};
-
-static int __hpp__color_callchain(struct hpp_arg *arg)
-{
-	if (!symbol_conf.use_callchain)
-		return 0;
-
-	slsmg_printf("%c ", arg->folded_sign);
-	return 2;
-}
-
-static int __hpp__color_fmt(struct perf_hpp *hpp, struct hist_entry *he,
-			    u64 (*get_field)(struct hist_entry *),
-			    int (*callchain_cb)(struct hpp_arg *))
-{
-	int ret = 0;
-	double percent = 0.0;
-	struct hists *hists = he->hists;
-	struct hpp_arg *arg = hpp->ptr;
-
-	if (hists->stats.total_period)
-		percent = 100.0 * get_field(he) / hists->stats.total_period;
-
-	ui_browser__set_percent_color(arg->b, percent, arg->current_entry);
-
-	if (callchain_cb)
-		ret += callchain_cb(arg);
-
-	ret += scnprintf(hpp->buf, hpp->size, "%6.2f%%", percent);
-	slsmg_printf("%s", hpp->buf);
-
-	if (symbol_conf.event_group) {
-		int prev_idx, idx_delta;
-		struct perf_evsel *evsel = hists_to_evsel(hists);
-		struct hist_entry *pair;
-		int nr_members = evsel->nr_members;
-
-		if (nr_members <= 1)
-			goto out;
-
-		prev_idx = perf_evsel__group_idx(evsel);
-
-		list_for_each_entry(pair, &he->pairs.head, pairs.node) {
-			u64 period = get_field(pair);
-			u64 total = pair->hists->stats.total_period;
-
-			if (!total)
-				continue;
-
-			evsel = hists_to_evsel(pair->hists);
-			idx_delta = perf_evsel__group_idx(evsel) - prev_idx - 1;
-
-			while (idx_delta--) {
-				/*
-				 * zero-fill group members in the middle which
-				 * have no sample
-				 */
-				ui_browser__set_percent_color(arg->b, 0.0,
-							arg->current_entry);
-				ret += scnprintf(hpp->buf, hpp->size,
-						 " %6.2f%%", 0.0);
-				slsmg_printf("%s", hpp->buf);
-			}
-
-			percent = 100.0 * period / total;
-			ui_browser__set_percent_color(arg->b, percent,
-						      arg->current_entry);
-			ret += scnprintf(hpp->buf, hpp->size,
-					 " %6.2f%%", percent);
-			slsmg_printf("%s", hpp->buf);
-
-			prev_idx = perf_evsel__group_idx(evsel);
-		}
-
-		idx_delta = nr_members - prev_idx - 1;
-
-		while (idx_delta--) {
-			/*
-			 * zero-fill group members at last which have no sample
-			 */
-			ui_browser__set_percent_color(arg->b, 0.0,
-						      arg->current_entry);
-			ret += scnprintf(hpp->buf, hpp->size,
-					 " %6.2f%%", 0.0);
-			slsmg_printf("%s", hpp->buf);
-		}
-	}
-out:
-	if (!arg->current_entry || !arg->b->navkeypressed)
-		ui_browser__set_color(arg->b, HE_COLORSET_NORMAL);
-
-	return ret;
-}
-
-#define __HPP_COLOR_PERCENT_FN(_type, _field, _cb)			\
-static u64 __hpp_get_##_field(struct hist_entry *he)			\
-{									\
-	return he->stat._field;						\
-}									\
-									\
-static int hist_browser__hpp_color_##_type(struct perf_hpp *hpp,	\
-					   struct hist_entry *he)	\
+#define HPP__COLOR_FN(_name, _field)					\
+static int hist_browser__hpp_color_ ## _name(struct perf_hpp *hpp,	\
+					     struct hist_entry *he)	\
 {									\
-	return __hpp__color_fmt(hpp, he, __hpp_get_##_field, _cb);	\
+	struct hists *hists = he->hists;				\
+	double percent = 100.0 * he->stat._field / hists->stats.total_period; \
+	*(double *)hpp->ptr = percent;					\
+	return scnprintf(hpp->buf, hpp->size, "%6.2f%%", percent);	\
 }
 
-__HPP_COLOR_PERCENT_FN(overhead, period, __hpp__color_callchain)
-__HPP_COLOR_PERCENT_FN(overhead_sys, period_sys, NULL)
-__HPP_COLOR_PERCENT_FN(overhead_us, period_us, NULL)
-__HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys, NULL)
-__HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us, NULL)
+HPP__COLOR_FN(overhead, period)
+HPP__COLOR_FN(overhead_sys, period_sys)
+HPP__COLOR_FN(overhead_us, period_us)
+HPP__COLOR_FN(overhead_guest_sys, period_guest_sys)
+HPP__COLOR_FN(overhead_guest_us, period_guest_us)
 
-#undef __HPP_COLOR_PERCENT_FN
+#undef HPP__COLOR_FN
 
 void hist_browser__init_hpp(void)
 {
-	perf_hpp__column_enable(PERF_HPP__OVERHEAD);
-
 	perf_hpp__init();
 
 	perf_hpp__format[PERF_HPP__OVERHEAD].color =
@@ -708,13 +606,13 @@ static int hist_browser__show_entry(struct hist_browser *browser,
 				    unsigned short row)
 {
 	char s[256];
-	int printed = 0;
+	double percent;
+	int i, printed = 0;
 	int width = browser->b.width;
 	char folded_sign = ' ';
 	bool current_entry = ui_browser__is_current_entry(&browser->b, row);
 	off_t row_offset = entry->row_offset;
 	bool first = true;
-	struct perf_hpp_fmt *fmt;
 
 	if (current_entry) {
 		browser->he_selection = entry;
@@ -727,30 +625,41 @@ static int hist_browser__show_entry(struct hist_browser *browser,
 	}
 
 	if (row_offset == 0) {
-		struct hpp_arg arg = {
-			.b 		= &browser->b,
-			.folded_sign	= folded_sign,
-			.current_entry	= current_entry,
-		};
 		struct perf_hpp hpp = {
 			.buf		= s,
 			.size		= sizeof(s),
-			.ptr		= &arg,
 		};
 
 		ui_browser__gotorc(&browser->b, row, 0);
 
-		perf_hpp__for_each_format(fmt) {
+		for (i = 0; i < PERF_HPP__MAX_INDEX; i++) {
+			if (!perf_hpp__format[i].cond)
+				continue;
+
 			if (!first) {
 				slsmg_printf("  ");
 				width -= 2;
 			}
 			first = false;
 
-			if (fmt->color) {
-				width -= fmt->color(&hpp, entry);
+			if (perf_hpp__format[i].color) {
+				hpp.ptr = &percent;
+				/* It will set percent for us. See HPP__COLOR_FN above. */
+				width -= perf_hpp__format[i].color(&hpp, entry);
+
+				ui_browser__set_percent_color(&browser->b, percent, current_entry);
+
+				if (i == PERF_HPP__OVERHEAD && symbol_conf.use_callchain) {
+					slsmg_printf("%c ", folded_sign);
+					width -= 2;
+				}
+
+				slsmg_printf("%s", s);
+
+				if (!current_entry || !browser->b.navkeypressed)
+					ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL);
 			} else {
-				width -= fmt->entry(&hpp, entry);
+				width -= perf_hpp__format[i].entry(&hpp, entry);
 				slsmg_printf("%s", s);
 			}
 		}
@@ -1189,21 +1098,6 @@ static int hists__browser_title(struct hists *hists, char *bf, size_t size,
 	const struct thread *thread = hists->thread_filter;
 	unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
 	u64 nr_events = hists->stats.total_period;
-	struct perf_evsel *evsel = hists_to_evsel(hists);
-	char buf[512];
-	size_t buflen = sizeof(buf);
-
-	if (symbol_conf.event_group && evsel->nr_members > 1) {
-		struct perf_evsel *pos;
-
-		perf_evsel__group_desc(evsel, buf, buflen);
-		ev_name = buf;
-
-		for_each_group_member(pos, evsel) {
-			nr_samples += pos->hists.stats.nr_events[PERF_RECORD_SAMPLE];
-			nr_events += pos->hists.stats.total_period;
-		}
-	}
 
 	nr_samples = convert_unit(nr_samples, &unit);
 	printed = scnprintf(bf, size,
@@ -1241,96 +1135,6 @@ static inline bool is_report_browser(void *timer)
 	return timer == NULL;
 }
 
-/*
- * Only runtime switching of perf data file will make "input_name" point
- * to a malloced buffer. So add "is_input_name_malloced" flag to decide
- * whether we need to call free() for current "input_name" during the switch.
- */
-static bool is_input_name_malloced = false;
-
-static int switch_data_file(void)
-{
-	char *pwd, *options[32], *abs_path[32], *tmp;
-	DIR *pwd_dir;
-	int nr_options = 0, choice = -1, ret = -1;
-	struct dirent *dent;
-
-	pwd = getenv("PWD");
-	if (!pwd)
-		return ret;
-
-	pwd_dir = opendir(pwd);
-	if (!pwd_dir)
-		return ret;
-
-	memset(options, 0, sizeof(options));
-	memset(options, 0, sizeof(abs_path));
-
-	while ((dent = readdir(pwd_dir))) {
-		char path[PATH_MAX];
-		u64 magic;
-		char *name = dent->d_name;
-		FILE *file;
-
-		if (!(dent->d_type == DT_REG))
-			continue;
-
-		snprintf(path, sizeof(path), "%s/%s", pwd, name);
-
-		file = fopen(path, "r");
-		if (!file)
-			continue;
-
-		if (fread(&magic, 1, 8, file) < 8)
-			goto close_file_and_continue;
-
-		if (is_perf_magic(magic)) {
-			options[nr_options] = strdup(name);
-			if (!options[nr_options])
-				goto close_file_and_continue;
-
-			abs_path[nr_options] = strdup(path);
-			if (!abs_path[nr_options]) {
-				free(options[nr_options]);
-				ui__warning("Can't search all data files due to memory shortage.\n");
-				fclose(file);
-				break;
-			}
-
-			nr_options++;
-		}
-
-close_file_and_continue:
-		fclose(file);
-		if (nr_options >= 32) {
-			ui__warning("Too many perf data files in PWD!\n"
-				    "Only the first 32 files will be listed.\n");
-			break;
-		}
-	}
-	closedir(pwd_dir);
-
-	if (nr_options) {
-		choice = ui__popup_menu(nr_options, options);
-		if (choice < nr_options && choice >= 0) {
-			tmp = strdup(abs_path[choice]);
-			if (tmp) {
-				if (is_input_name_malloced)
-					free((void *)input_name);
-				input_name = tmp;
-				is_input_name_malloced = true;
-				ret = 0;
-			} else
-				ui__warning("Data switch failed due to memory shortage!\n");
-		}
-	}
-
-	free_popup_options(options, nr_options);
-	free_popup_options(abs_path, nr_options);
-	return ret;
-}
-
-
 static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 				    const char *helpline, const char *ev_name,
 				    bool left_exits,
@@ -1365,8 +1169,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 		int choice = 0,
 		    annotate = -2, zoom_dso = -2, zoom_thread = -2,
 		    annotate_f = -2, annotate_t = -2, browse_map = -2;
-		int scripts_comm = -2, scripts_symbol = -2,
-		    scripts_all = -2, switch_data = -2;
+		int scripts_comm = -2, scripts_symbol = -2, scripts_all = -2;
 
 		nr_options = 0;
 
@@ -1423,10 +1226,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 			if (is_report_browser(hbt))
 				goto do_scripts;
 			continue;
-		case 's':
-			if (is_report_browser(hbt))
-				goto do_data_switch;
-			continue;
 		case K_F1:
 		case 'h':
 		case '?':
@@ -1446,7 +1245,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 					"d             Zoom into current DSO\n"
 					"t             Zoom into current Thread\n"
 					"r             Run available scripts('perf report' only)\n"
-					"s             Switch to another data file in PWD ('perf report' only)\n"
 					"P             Print histograms to perf.hist.N\n"
 					"V             Verbose (DSO names in callchains, etc)\n"
 					"/             Filter symbol by name");
@@ -1554,9 +1352,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 		if (asprintf(&options[nr_options], "Run scripts for all samples") > 0)
 			scripts_all = nr_options++;
 
-		if (is_report_browser(hbt) && asprintf(&options[nr_options],
-				"Switch to another data file in PWD") > 0)
-			switch_data = nr_options++;
 add_exit_option:
 		options[nr_options++] = (char *)"Exit";
 retry_popup_menu:
@@ -1667,16 +1462,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 
 			script_browse(script_opt);
 		}
-		/* Switch to another data file */
-		else if (choice == switch_data) {
-do_data_switch:
-			if (!switch_data_file()) {
-				key = K_SWITCH_INPUT_DATA;
-				break;
-			} else
-				ui__warning("Won't switch the data files due to\n"
-					"no valid data file get selected!\n");
-		}
 	}
 out_free_stack:
 	pstack__delete(fstack);
@@ -1709,16 +1494,6 @@ static void perf_evsel_menu__write(struct ui_browser *browser,
 	ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
 						       HE_COLORSET_NORMAL);
 
-	if (symbol_conf.event_group && evsel->nr_members > 1) {
-		struct perf_evsel *pos;
-
-		ev_name = perf_evsel__group_name(evsel);
-
-		for_each_group_member(pos, evsel) {
-			nr_events += pos->hists.stats.nr_events[PERF_RECORD_SAMPLE];
-		}
-	}
-
 	nr_events = convert_unit(nr_events, &unit);
 	printed = scnprintf(bf, sizeof(bf), "%lu%c%s%s", nr_events,
 			   unit, unit == ' ' ? "" : " ", ev_name);
@@ -1803,7 +1578,6 @@ static int perf_evsel_menu__run(struct perf_evsel_menu *menu,
 						"Do you really want to exit?"))
 					continue;
 				/* Fall thru */
-			case K_SWITCH_INPUT_DATA:
 			case 'q':
 			case CTRL('c'):
 				goto out;
@@ -1830,19 +1604,8 @@ static int perf_evsel_menu__run(struct perf_evsel_menu *menu,
 	return key;
 }
 
-static bool filter_group_entries(struct ui_browser *self __maybe_unused,
-				 void *entry)
-{
-	struct perf_evsel *evsel = list_entry(entry, struct perf_evsel, node);
-
-	if (symbol_conf.event_group && !perf_evsel__is_group_leader(evsel))
-		return true;
-
-	return false;
-}
-
 static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
-					   int nr_entries, const char *help,
+					   const char *help,
 					   struct hist_browser_timer *hbt,
 					   struct perf_session_env *env)
 {
@@ -1853,8 +1616,7 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
 			.refresh    = ui_browser__list_head_refresh,
 			.seek	    = ui_browser__list_head_seek,
 			.write	    = perf_evsel_menu__write,
-			.filter	    = filter_group_entries,
-			.nr_entries = nr_entries,
+			.nr_entries = evlist->nr_entries,
 			.priv	    = evlist,
 		},
 		.env = env,
@@ -1870,37 +1632,20 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
 			menu.b.width = line_len;
 	}
 
-	return perf_evsel_menu__run(&menu, nr_entries, help, hbt);
+	return perf_evsel_menu__run(&menu, evlist->nr_entries, help, hbt);
 }
 
 int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
 				  struct hist_browser_timer *hbt,
 				  struct perf_session_env *env)
 {
-	int nr_entries = evlist->nr_entries;
-
-single_entry:
-	if (nr_entries == 1) {
+	if (evlist->nr_entries == 1) {
 		struct perf_evsel *first = list_entry(evlist->entries.next,
 						      struct perf_evsel, node);
 		const char *ev_name = perf_evsel__name(first);
-
-		return perf_evsel__hists_browse(first, nr_entries, help,
+		return perf_evsel__hists_browse(first, evlist->nr_entries, help,
 						ev_name, false, hbt, env);
 	}
 
-	if (symbol_conf.event_group) {
-		struct perf_evsel *pos;
-
-		nr_entries = 0;
-		list_for_each_entry(pos, &evlist->entries, node)
-			if (perf_evsel__is_group_leader(pos))
-				nr_entries++;
-
-		if (nr_entries == 1)
-			goto single_entry;
-	}
-
-	return __perf_evlist__tui_browse_hists(evlist, nr_entries, help,
-					       hbt, env);
+	return __perf_evlist__tui_browse_hists(evlist, help, hbt, env);
 }
diff --git a/trunk/tools/perf/ui/gtk/annotate.c b/trunk/tools/perf/ui/gtk/annotate.c
deleted file mode 100644
index 7d8dc581a545..000000000000
--- a/trunk/tools/perf/ui/gtk/annotate.c
+++ /dev/null
@@ -1,229 +0,0 @@
-#include "gtk.h"
-#include "util/debug.h"
-#include "util/annotate.h"
-#include "ui/helpline.h"
-
-
-enum {
-	ANN_COL__PERCENT,
-	ANN_COL__OFFSET,
-	ANN_COL__LINE,
-
-	MAX_ANN_COLS
-};
-
-static const char *const col_names[] = {
-	"Overhead",
-	"Offset",
-	"Line"
-};
-
-static int perf_gtk__get_percent(char *buf, size_t size, struct symbol *sym,
-				 struct disasm_line *dl, int evidx)
-{
-	struct sym_hist *symhist;
-	double percent = 0.0;
-	const char *markup;
-	int ret = 0;
-
-	strcpy(buf, "");
-
-	if (dl->offset == (s64) -1)
-		return 0;
-
-	symhist = annotation__histogram(symbol__annotation(sym), evidx);
-	if (!symhist->addr[dl->offset])
-		return 0;
-
-	percent = 100.0 * symhist->addr[dl->offset] / symhist->sum;
-
-	markup = perf_gtk__get_percent_color(percent);
-	if (markup)
-		ret += scnprintf(buf, size, "%s", markup);
-	ret += scnprintf(buf + ret, size - ret, "%6.2f%%", percent);
-	if (markup)
-		ret += scnprintf(buf + ret, size - ret, "</span>");
-
-	return ret;
-}
-
-static int perf_gtk__get_offset(char *buf, size_t size, struct symbol *sym,
-				struct map *map, struct disasm_line *dl)
-{
-	u64 start = map__rip_2objdump(map, sym->start);
-
-	strcpy(buf, "");
-
-	if (dl->offset == (s64) -1)
-		return 0;
-
-	return scnprintf(buf, size, "%"PRIx64, start + dl->offset);
-}
-
-static int perf_gtk__get_line(char *buf, size_t size, struct disasm_line *dl)
-{
-	int ret = 0;
-	char *line = g_markup_escape_text(dl->line, -1);
-	const char *markup = "<span fgcolor='gray'>";
-
-	strcpy(buf, "");
-
-	if (!line)
-		return 0;
-
-	if (dl->offset != (s64) -1)
-		markup = NULL;
-
-	if (markup)
-		ret += scnprintf(buf, size, "%s", markup);
-	ret += scnprintf(buf + ret, size - ret, "%s", line);
-	if (markup)
-		ret += scnprintf(buf + ret, size - ret, "</span>");
-
-	g_free(line);
-	return ret;
-}
-
-static int perf_gtk__annotate_symbol(GtkWidget *window, struct symbol *sym,
-				struct map *map, int evidx,
-				struct hist_browser_timer *hbt __maybe_unused)
-{
-	struct disasm_line *pos, *n;
-	struct annotation *notes;
-	GType col_types[MAX_ANN_COLS];
-	GtkCellRenderer *renderer;
-	GtkListStore *store;
-	GtkWidget *view;
-	int i;
-	char s[512];
-
-	notes = symbol__annotation(sym);
-
-	for (i = 0; i < MAX_ANN_COLS; i++) {
-		col_types[i] = G_TYPE_STRING;
-	}
-	store = gtk_list_store_newv(MAX_ANN_COLS, col_types);
-
-	view = gtk_tree_view_new();
-	renderer = gtk_cell_renderer_text_new();
-
-	for (i = 0; i < MAX_ANN_COLS; i++) {
-		gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
-					-1, col_names[i], renderer, "markup",
-					i, NULL);
-	}
-
-	gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store));
-	g_object_unref(GTK_TREE_MODEL(store));
-
-	list_for_each_entry(pos, &notes->src->source, node) {
-		GtkTreeIter iter;
-
-		gtk_list_store_append(store, &iter);
-
-		if (perf_gtk__get_percent(s, sizeof(s), sym, pos, evidx))
-			gtk_list_store_set(store, &iter, ANN_COL__PERCENT, s, -1);
-		if (perf_gtk__get_offset(s, sizeof(s), sym, map, pos))
-			gtk_list_store_set(store, &iter, ANN_COL__OFFSET, s, -1);
-		if (perf_gtk__get_line(s, sizeof(s), pos))
-			gtk_list_store_set(store, &iter, ANN_COL__LINE, s, -1);
-	}
-
-	gtk_container_add(GTK_CONTAINER(window), view);
-
-	list_for_each_entry_safe(pos, n, &notes->src->source, node) {
-		list_del(&pos->node);
-		disasm_line__free(pos);
-	}
-
-	return 0;
-}
-
-int symbol__gtk_annotate(struct symbol *sym, struct map *map, int evidx,
-			 struct hist_browser_timer *hbt)
-{
-	GtkWidget *window;
-	GtkWidget *notebook;
-	GtkWidget *scrolled_window;
-	GtkWidget *tab_label;
-
-	if (map->dso->annotate_warned)
-		return -1;
-
-	if (symbol__annotate(sym, map, 0) < 0) {
-		ui__error("%s", ui_helpline__current);
-		return -1;
-	}
-
-	if (perf_gtk__is_active_context(pgctx)) {
-		window = pgctx->main_window;
-		notebook = pgctx->notebook;
-	} else {
-		GtkWidget *vbox;
-		GtkWidget *infobar;
-		GtkWidget *statbar;
-
-		signal(SIGSEGV, perf_gtk__signal);
-		signal(SIGFPE,  perf_gtk__signal);
-		signal(SIGINT,  perf_gtk__signal);
-		signal(SIGQUIT, perf_gtk__signal);
-		signal(SIGTERM, perf_gtk__signal);
-
-		window = gtk_window_new(GTK_WINDOW_TOPLEVEL);
-		gtk_window_set_title(GTK_WINDOW(window), "perf annotate");
-
-		g_signal_connect(window, "delete_event", gtk_main_quit, NULL);
-
-		pgctx = perf_gtk__activate_context(window);
-		if (!pgctx)
-			return -1;
-
-		vbox = gtk_vbox_new(FALSE, 0);
-		notebook = gtk_notebook_new();
-		pgctx->notebook = notebook;
-
-		gtk_box_pack_start(GTK_BOX(vbox), notebook, TRUE, TRUE, 0);
-
-		infobar = perf_gtk__setup_info_bar();
-		if (infobar) {
-			gtk_box_pack_start(GTK_BOX(vbox), infobar,
-					   FALSE, FALSE, 0);
-		}
-
-		statbar = perf_gtk__setup_statusbar();
-		gtk_box_pack_start(GTK_BOX(vbox), statbar, FALSE, FALSE, 0);
-
-		gtk_container_add(GTK_CONTAINER(window), vbox);
-	}
-
-	scrolled_window = gtk_scrolled_window_new(NULL, NULL);
-	tab_label = gtk_label_new(sym->name);
-
-	gtk_scrolled_window_set_policy(GTK_SCROLLED_WINDOW(scrolled_window),
-				       GTK_POLICY_AUTOMATIC,
-				       GTK_POLICY_AUTOMATIC);
-
-	gtk_notebook_append_page(GTK_NOTEBOOK(notebook), scrolled_window,
-				 tab_label);
-
-	perf_gtk__annotate_symbol(scrolled_window, sym, map, evidx, hbt);
-	return 0;
-}
-
-void perf_gtk__show_annotations(void)
-{
-	GtkWidget *window;
-
-	if (!perf_gtk__is_active_context(pgctx))
-		return;
-
-	window = pgctx->main_window;
-	gtk_widget_show_all(window);
-
-	perf_gtk__resize_window(window);
-	gtk_window_set_position(GTK_WINDOW(window), GTK_WIN_POS_CENTER);
-
-	gtk_main();
-
-	perf_gtk__deactivate_context(&pgctx);
-}
diff --git a/trunk/tools/perf/ui/gtk/browser.c b/trunk/tools/perf/ui/gtk/browser.c
index c95012cdb438..253b6219a39e 100644
--- a/trunk/tools/perf/ui/gtk/browser.c
+++ b/trunk/tools/perf/ui/gtk/browser.c
@@ -8,13 +8,15 @@
 
 #include <signal.h>
 
-void perf_gtk__signal(int sig)
+#define MAX_COLUMNS			32
+
+static void perf_gtk__signal(int sig)
 {
 	perf_gtk__exit(false);
 	psignal(sig, "perf");
 }
 
-void perf_gtk__resize_window(GtkWidget *window)
+static void perf_gtk__resize_window(GtkWidget *window)
 {
 	GdkRectangle rect;
 	GdkScreen *screen;
@@ -34,7 +36,7 @@ void perf_gtk__resize_window(GtkWidget *window)
 	gtk_window_resize(GTK_WINDOW(window), width, height);
 }
 
-const char *perf_gtk__get_percent_color(double percent)
+static const char *perf_gtk__get_percent_color(double percent)
 {
 	if (percent >= MIN_RED)
 		return "<span fgcolor='red'>";
@@ -43,8 +45,155 @@ const char *perf_gtk__get_percent_color(double percent)
 	return NULL;
 }
 
+#define HPP__COLOR_FN(_name, _field)						\
+static int perf_gtk__hpp_color_ ## _name(struct perf_hpp *hpp,			\
+					 struct hist_entry *he)			\
+{										\
+	struct hists *hists = he->hists;					\
+	double percent = 100.0 * he->stat._field / hists->stats.total_period;	\
+	const char *markup;							\
+	int ret = 0;								\
+										\
+	markup = perf_gtk__get_percent_color(percent);				\
+	if (markup)								\
+		ret += scnprintf(hpp->buf, hpp->size, "%s", markup);		\
+	ret += scnprintf(hpp->buf + ret, hpp->size - ret, "%6.2f%%", percent); 	\
+	if (markup)								\
+		ret += scnprintf(hpp->buf + ret, hpp->size - ret, "</span>"); 	\
+										\
+	return ret;								\
+}
+
+HPP__COLOR_FN(overhead, period)
+HPP__COLOR_FN(overhead_sys, period_sys)
+HPP__COLOR_FN(overhead_us, period_us)
+HPP__COLOR_FN(overhead_guest_sys, period_guest_sys)
+HPP__COLOR_FN(overhead_guest_us, period_guest_us)
+
+#undef HPP__COLOR_FN
+
+void perf_gtk__init_hpp(void)
+{
+	perf_hpp__init();
+
+	perf_hpp__format[PERF_HPP__OVERHEAD].color =
+				perf_gtk__hpp_color_overhead;
+	perf_hpp__format[PERF_HPP__OVERHEAD_SYS].color =
+				perf_gtk__hpp_color_overhead_sys;
+	perf_hpp__format[PERF_HPP__OVERHEAD_US].color =
+				perf_gtk__hpp_color_overhead_us;
+	perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_SYS].color =
+				perf_gtk__hpp_color_overhead_guest_sys;
+	perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
+				perf_gtk__hpp_color_overhead_guest_us;
+}
+
+static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists)
+{
+	GType col_types[MAX_COLUMNS];
+	GtkCellRenderer *renderer;
+	struct sort_entry *se;
+	GtkListStore *store;
+	struct rb_node *nd;
+	GtkWidget *view;
+	int i, col_idx;
+	int nr_cols;
+	char s[512];
+
+	struct perf_hpp hpp = {
+		.buf		= s,
+		.size		= sizeof(s),
+	};
+
+	nr_cols = 0;
+
+	for (i = 0; i < PERF_HPP__MAX_INDEX; i++) {
+		if (!perf_hpp__format[i].cond)
+			continue;
+
+		col_types[nr_cols++] = G_TYPE_STRING;
+	}
+
+	list_for_each_entry(se, &hist_entry__sort_list, list) {
+		if (se->elide)
+			continue;
+
+		col_types[nr_cols++] = G_TYPE_STRING;
+	}
+
+	store = gtk_list_store_newv(nr_cols, col_types);
+
+	view = gtk_tree_view_new();
+
+	renderer = gtk_cell_renderer_text_new();
+
+	col_idx = 0;
+
+	for (i = 0; i < PERF_HPP__MAX_INDEX; i++) {
+		if (!perf_hpp__format[i].cond)
+			continue;
+
+		perf_hpp__format[i].header(&hpp);
+
+		gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
+							    -1, s,
+							    renderer, "markup",
+							    col_idx++, NULL);
+	}
+
+	list_for_each_entry(se, &hist_entry__sort_list, list) {
+		if (se->elide)
+			continue;
+
+		gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
+							    -1, se->se_header,
+							    renderer, "text",
+							    col_idx++, NULL);
+	}
+
+	gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store));
+
+	g_object_unref(GTK_TREE_MODEL(store));
+
+	for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
+		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+		GtkTreeIter iter;
+
+		if (h->filtered)
+			continue;
+
+		gtk_list_store_append(store, &iter);
+
+		col_idx = 0;
+
+		for (i = 0; i < PERF_HPP__MAX_INDEX; i++) {
+			if (!perf_hpp__format[i].cond)
+				continue;
+
+			if (perf_hpp__format[i].color)
+				perf_hpp__format[i].color(&hpp, h);
+			else
+				perf_hpp__format[i].entry(&hpp, h);
+
+			gtk_list_store_set(store, &iter, col_idx++, s, -1);
+		}
+
+		list_for_each_entry(se, &hist_entry__sort_list, list) {
+			if (se->elide)
+				continue;
+
+			se->se_snprintf(h, s, ARRAY_SIZE(s),
+					hists__col_len(hists, se->se_width_idx));
+
+			gtk_list_store_set(store, &iter, col_idx++, s, -1);
+		}
+	}
+
+	gtk_container_add(GTK_CONTAINER(window), view);
+}
+
 #ifdef HAVE_GTK_INFO_BAR
-GtkWidget *perf_gtk__setup_info_bar(void)
+static GtkWidget *perf_gtk__setup_info_bar(void)
 {
 	GtkWidget *info_bar;
 	GtkWidget *label;
@@ -71,7 +220,7 @@ GtkWidget *perf_gtk__setup_info_bar(void)
 }
 #endif
 
-GtkWidget *perf_gtk__setup_statusbar(void)
+static GtkWidget *perf_gtk__setup_statusbar(void)
 {
 	GtkWidget *stbar;
 	unsigned ctxid;
@@ -85,3 +234,79 @@ GtkWidget *perf_gtk__setup_statusbar(void)
 
 	return stbar;
 }
+
+int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
+				  const char *help,
+				  struct hist_browser_timer *hbt __maybe_unused)
+{
+	struct perf_evsel *pos;
+	GtkWidget *vbox;
+	GtkWidget *notebook;
+	GtkWidget *info_bar;
+	GtkWidget *statbar;
+	GtkWidget *window;
+
+	signal(SIGSEGV, perf_gtk__signal);
+	signal(SIGFPE,  perf_gtk__signal);
+	signal(SIGINT,  perf_gtk__signal);
+	signal(SIGQUIT, perf_gtk__signal);
+	signal(SIGTERM, perf_gtk__signal);
+
+	window = gtk_window_new(GTK_WINDOW_TOPLEVEL);
+
+	gtk_window_set_title(GTK_WINDOW(window), "perf report");
+
+	g_signal_connect(window, "delete_event", gtk_main_quit, NULL);
+
+	pgctx = perf_gtk__activate_context(window);
+	if (!pgctx)
+		return -1;
+
+	vbox = gtk_vbox_new(FALSE, 0);
+
+	notebook = gtk_notebook_new();
+
+	list_for_each_entry(pos, &evlist->entries, node) {
+		struct hists *hists = &pos->hists;
+		const char *evname = perf_evsel__name(pos);
+		GtkWidget *scrolled_window;
+		GtkWidget *tab_label;
+
+		scrolled_window = gtk_scrolled_window_new(NULL, NULL);
+
+		gtk_scrolled_window_set_policy(GTK_SCROLLED_WINDOW(scrolled_window),
+							GTK_POLICY_AUTOMATIC,
+							GTK_POLICY_AUTOMATIC);
+
+		perf_gtk__show_hists(scrolled_window, hists);
+
+		tab_label = gtk_label_new(evname);
+
+		gtk_notebook_append_page(GTK_NOTEBOOK(notebook), scrolled_window, tab_label);
+	}
+
+	gtk_box_pack_start(GTK_BOX(vbox), notebook, TRUE, TRUE, 0);
+
+	info_bar = perf_gtk__setup_info_bar();
+	if (info_bar)
+		gtk_box_pack_start(GTK_BOX(vbox), info_bar, FALSE, FALSE, 0);
+
+	statbar = perf_gtk__setup_statusbar();
+	gtk_box_pack_start(GTK_BOX(vbox), statbar, FALSE, FALSE, 0);
+
+	gtk_container_add(GTK_CONTAINER(window), vbox);
+
+	gtk_widget_show_all(window);
+
+	perf_gtk__resize_window(window);
+
+	gtk_window_set_position(GTK_WINDOW(window), GTK_WIN_POS_CENTER);
+
+	ui_helpline__push(help);
+
+	gtk_main();
+
+	perf_gtk__deactivate_context(&pgctx);
+
+	return 0;
+}
diff --git a/trunk/tools/perf/ui/gtk/gtk.h b/trunk/tools/perf/ui/gtk/gtk.h
index 3d96785ef155..856320e2cc05 100644
--- a/trunk/tools/perf/ui/gtk/gtk.h
+++ b/trunk/tools/perf/ui/gtk/gtk.h
@@ -10,7 +10,6 @@
 
 struct perf_gtk_context {
 	GtkWidget *main_window;
-	GtkWidget *notebook;
 
 #ifdef HAVE_GTK_INFO_BAR
 	GtkWidget *info_bar;
@@ -34,14 +33,7 @@ void perf_gtk__init_helpline(void);
 void perf_gtk__init_progress(void);
 void perf_gtk__init_hpp(void);
 
-void perf_gtk__signal(int sig);
-void perf_gtk__resize_window(GtkWidget *window);
-const char *perf_gtk__get_percent_color(double percent);
-GtkWidget *perf_gtk__setup_statusbar(void);
-
-#ifdef HAVE_GTK_INFO_BAR
-GtkWidget *perf_gtk__setup_info_bar(void);
-#else
+#ifndef HAVE_GTK_INFO_BAR
 static inline GtkWidget *perf_gtk__setup_info_bar(void)
 {
 	return NULL;
diff --git a/trunk/tools/perf/ui/gtk/helpline.c b/trunk/tools/perf/ui/gtk/helpline.c
index 3388cbd12186..5db4432ff12a 100644
--- a/trunk/tools/perf/ui/gtk/helpline.c
+++ b/trunk/tools/perf/ui/gtk/helpline.c
@@ -24,7 +24,17 @@ static void gtk_helpline_push(const char *msg)
 			   pgctx->statbar_ctx_id, msg);
 }
 
-static int gtk_helpline_show(const char *fmt, va_list ap)
+static struct ui_helpline gtk_helpline_fns = {
+	.pop	= gtk_helpline_pop,
+	.push	= gtk_helpline_push,
+};
+
+void perf_gtk__init_helpline(void)
+{
+	helpline_fns = &gtk_helpline_fns;
+}
+
+int perf_gtk__show_helpline(const char *fmt, va_list ap)
 {
 	int ret;
 	char *ptr;
@@ -44,14 +54,3 @@ static int gtk_helpline_show(const char *fmt, va_list ap)
 
 	return ret;
 }
-
-static struct ui_helpline gtk_helpline_fns = {
-	.pop	= gtk_helpline_pop,
-	.push	= gtk_helpline_push,
-	.show	= gtk_helpline_show,
-};
-
-void perf_gtk__init_helpline(void)
-{
-	helpline_fns = &gtk_helpline_fns;
-}
diff --git a/trunk/tools/perf/ui/gtk/hists.c b/trunk/tools/perf/ui/gtk/hists.c
deleted file mode 100644
index 1e764a8ad259..000000000000
--- a/trunk/tools/perf/ui/gtk/hists.c
+++ /dev/null
@@ -1,312 +0,0 @@
-#include "../evlist.h"
-#include "../cache.h"
-#include "../evsel.h"
-#include "../sort.h"
-#include "../hist.h"
-#include "../helpline.h"
-#include "gtk.h"
-
-#define MAX_COLUMNS			32
-
-static int __percent_color_snprintf(char *buf, size_t size, double percent)
-{
-	int ret = 0;
-	const char *markup;
-
-	markup = perf_gtk__get_percent_color(percent);
-	if (markup)
-		ret += scnprintf(buf, size, markup);
-
-	ret += scnprintf(buf + ret, size - ret, " %6.2f%%", percent);
-
-	if (markup)
-		ret += scnprintf(buf + ret, size - ret, "</span>");
-
-	return ret;
-}
-
-
-static int __hpp__color_fmt(struct perf_hpp *hpp, struct hist_entry *he,
-			    u64 (*get_field)(struct hist_entry *))
-{
-	int ret;
-	double percent = 0.0;
-	struct hists *hists = he->hists;
-
-	if (hists->stats.total_period)
-		percent = 100.0 * get_field(he) / hists->stats.total_period;
-
-	ret = __percent_color_snprintf(hpp->buf, hpp->size, percent);
-
-	if (symbol_conf.event_group) {
-		int prev_idx, idx_delta;
-		struct perf_evsel *evsel = hists_to_evsel(hists);
-		struct hist_entry *pair;
-		int nr_members = evsel->nr_members;
-
-		if (nr_members <= 1)
-			return ret;
-
-		prev_idx = perf_evsel__group_idx(evsel);
-
-		list_for_each_entry(pair, &he->pairs.head, pairs.node) {
-			u64 period = get_field(pair);
-			u64 total = pair->hists->stats.total_period;
-
-			evsel = hists_to_evsel(pair->hists);
-			idx_delta = perf_evsel__group_idx(evsel) - prev_idx - 1;
-
-			while (idx_delta--) {
-				/*
-				 * zero-fill group members in the middle which
-				 * have no sample
-				 */
-				ret += __percent_color_snprintf(hpp->buf + ret,
-								hpp->size - ret,
-								0.0);
-			}
-
-			percent = 100.0 * period / total;
-			ret += __percent_color_snprintf(hpp->buf + ret,
-							hpp->size - ret,
-							percent);
-
-			prev_idx = perf_evsel__group_idx(evsel);
-		}
-
-		idx_delta = nr_members - prev_idx - 1;
-
-		while (idx_delta--) {
-			/*
-			 * zero-fill group members at last which have no sample
-			 */
-			ret += __percent_color_snprintf(hpp->buf + ret,
-							hpp->size - ret,
-							0.0);
-		}
-	}
-	return ret;
-}
-
-#define __HPP_COLOR_PERCENT_FN(_type, _field)					\
-static u64 he_get_##_field(struct hist_entry *he)				\
-{										\
-	return he->stat._field;							\
-}										\
-										\
-static int perf_gtk__hpp_color_##_type(struct perf_hpp *hpp,			\
-				       struct hist_entry *he)			\
-{										\
-	return __hpp__color_fmt(hpp, he, he_get_##_field);			\
-}
-
-__HPP_COLOR_PERCENT_FN(overhead, period)
-__HPP_COLOR_PERCENT_FN(overhead_sys, period_sys)
-__HPP_COLOR_PERCENT_FN(overhead_us, period_us)
-__HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys)
-__HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us)
-
-#undef __HPP_COLOR_PERCENT_FN
-
-
-void perf_gtk__init_hpp(void)
-{
-	perf_hpp__column_enable(PERF_HPP__OVERHEAD);
-
-	perf_hpp__init();
-
-	perf_hpp__format[PERF_HPP__OVERHEAD].color =
-				perf_gtk__hpp_color_overhead;
-	perf_hpp__format[PERF_HPP__OVERHEAD_SYS].color =
-				perf_gtk__hpp_color_overhead_sys;
-	perf_hpp__format[PERF_HPP__OVERHEAD_US].color =
-				perf_gtk__hpp_color_overhead_us;
-	perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_SYS].color =
-				perf_gtk__hpp_color_overhead_guest_sys;
-	perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
-				perf_gtk__hpp_color_overhead_guest_us;
-}
-
-static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists)
-{
-	struct perf_hpp_fmt *fmt;
-	GType col_types[MAX_COLUMNS];
-	GtkCellRenderer *renderer;
-	struct sort_entry *se;
-	GtkListStore *store;
-	struct rb_node *nd;
-	GtkWidget *view;
-	int col_idx;
-	int nr_cols;
-	char s[512];
-
-	struct perf_hpp hpp = {
-		.buf		= s,
-		.size		= sizeof(s),
-		.ptr		= hists_to_evsel(hists),
-	};
-
-	nr_cols = 0;
-
-	perf_hpp__for_each_format(fmt)
-		col_types[nr_cols++] = G_TYPE_STRING;
-
-	list_for_each_entry(se, &hist_entry__sort_list, list) {
-		if (se->elide)
-			continue;
-
-		col_types[nr_cols++] = G_TYPE_STRING;
-	}
-
-	store = gtk_list_store_newv(nr_cols, col_types);
-
-	view = gtk_tree_view_new();
-
-	renderer = gtk_cell_renderer_text_new();
-
-	col_idx = 0;
-
-	perf_hpp__for_each_format(fmt) {
-		fmt->header(&hpp);
-
-		gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
-							    -1, ltrim(s),
-							    renderer, "markup",
-							    col_idx++, NULL);
-	}
-
-	list_for_each_entry(se, &hist_entry__sort_list, list) {
-		if (se->elide)
-			continue;
-
-		gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
-							    -1, se->se_header,
-							    renderer, "text",
-							    col_idx++, NULL);
-	}
-
-	gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store));
-
-	g_object_unref(GTK_TREE_MODEL(store));
-
-	for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
-		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
-		GtkTreeIter iter;
-
-		if (h->filtered)
-			continue;
-
-		gtk_list_store_append(store, &iter);
-
-		col_idx = 0;
-
-		perf_hpp__for_each_format(fmt) {
-			if (fmt->color)
-				fmt->color(&hpp, h);
-			else
-				fmt->entry(&hpp, h);
-
-			gtk_list_store_set(store, &iter, col_idx++, s, -1);
-		}
-
-		list_for_each_entry(se, &hist_entry__sort_list, list) {
-			if (se->elide)
-				continue;
-
-			se->se_snprintf(h, s, ARRAY_SIZE(s),
-					hists__col_len(hists, se->se_width_idx));
-
-			gtk_list_store_set(store, &iter, col_idx++, s, -1);
-		}
-	}
-
-	gtk_container_add(GTK_CONTAINER(window), view);
-}
-
-int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
-				  const char *help,
-				  struct hist_browser_timer *hbt __maybe_unused)
-{
-	struct perf_evsel *pos;
-	GtkWidget *vbox;
-	GtkWidget *notebook;
-	GtkWidget *info_bar;
-	GtkWidget *statbar;
-	GtkWidget *window;
-
-	signal(SIGSEGV, perf_gtk__signal);
-	signal(SIGFPE,  perf_gtk__signal);
-	signal(SIGINT,  perf_gtk__signal);
-	signal(SIGQUIT, perf_gtk__signal);
-	signal(SIGTERM, perf_gtk__signal);
-
-	window = gtk_window_new(GTK_WINDOW_TOPLEVEL);
-
-	gtk_window_set_title(GTK_WINDOW(window), "perf report");
-
-	g_signal_connect(window, "delete_event", gtk_main_quit, NULL);
-
-	pgctx = perf_gtk__activate_context(window);
-	if (!pgctx)
-		return -1;
-
-	vbox = gtk_vbox_new(FALSE, 0);
-
-	notebook = gtk_notebook_new();
-
-	gtk_box_pack_start(GTK_BOX(vbox), notebook, TRUE, TRUE, 0);
-
-	info_bar = perf_gtk__setup_info_bar();
-	if (info_bar)
-		gtk_box_pack_start(GTK_BOX(vbox), info_bar, FALSE, FALSE, 0);
-
-	statbar = perf_gtk__setup_statusbar();
-	gtk_box_pack_start(GTK_BOX(vbox), statbar, FALSE, FALSE, 0);
-
-	gtk_container_add(GTK_CONTAINER(window), vbox);
-
-	list_for_each_entry(pos, &evlist->entries, node) {
-		struct hists *hists = &pos->hists;
-		const char *evname = perf_evsel__name(pos);
-		GtkWidget *scrolled_window;
-		GtkWidget *tab_label;
-		char buf[512];
-		size_t size = sizeof(buf);
-
-		if (symbol_conf.event_group) {
-			if (!perf_evsel__is_group_leader(pos))
-				continue;
-
-			if (pos->nr_members > 1) {
-				perf_evsel__group_desc(pos, buf, size);
-				evname = buf;
-			}
-		}
-
-		scrolled_window = gtk_scrolled_window_new(NULL, NULL);
-
-		gtk_scrolled_window_set_policy(GTK_SCROLLED_WINDOW(scrolled_window),
-							GTK_POLICY_AUTOMATIC,
-							GTK_POLICY_AUTOMATIC);
-
-		perf_gtk__show_hists(scrolled_window, hists);
-
-		tab_label = gtk_label_new(evname);
-
-		gtk_notebook_append_page(GTK_NOTEBOOK(notebook), scrolled_window, tab_label);
-	}
-
-	gtk_widget_show_all(window);
-
-	perf_gtk__resize_window(window);
-
-	gtk_window_set_position(GTK_WINDOW(window), GTK_WIN_POS_CENTER);
-
-	ui_helpline__push(help);
-
-	gtk_main();
-
-	perf_gtk__deactivate_context(&pgctx);
-
-	return 0;
-}
diff --git a/trunk/tools/perf/ui/helpline.c b/trunk/tools/perf/ui/helpline.c
index 700fb3cfa1c7..a49bcf3c190b 100644
--- a/trunk/tools/perf/ui/helpline.c
+++ b/trunk/tools/perf/ui/helpline.c
@@ -16,16 +16,9 @@ static void nop_helpline__push(const char *msg __maybe_unused)
 {
 }
 
-static int nop_helpline__show(const char *fmt __maybe_unused,
-			       va_list ap __maybe_unused)
-{
-	return 0;
-}
-
 static struct ui_helpline default_helpline_fns = {
 	.pop	= nop_helpline__pop,
 	.push	= nop_helpline__push,
-	.show	= nop_helpline__show,
 };
 
 struct ui_helpline *helpline_fns = &default_helpline_fns;
@@ -66,8 +59,3 @@ void ui_helpline__puts(const char *msg)
 	ui_helpline__pop();
 	ui_helpline__push(msg);
 }
-
-int ui_helpline__vshow(const char *fmt, va_list ap)
-{
-	return helpline_fns->show(fmt, ap);
-}
diff --git a/trunk/tools/perf/ui/helpline.h b/trunk/tools/perf/ui/helpline.h
index 46181f4fc07e..baa28a4d16b9 100644
--- a/trunk/tools/perf/ui/helpline.h
+++ b/trunk/tools/perf/ui/helpline.h
@@ -9,7 +9,6 @@
 struct ui_helpline {
 	void (*pop)(void);
 	void (*push)(const char *msg);
-	int  (*show)(const char *fmt, va_list ap);
 };
 
 extern struct ui_helpline *helpline_fns;
@@ -21,9 +20,28 @@ void ui_helpline__push(const char *msg);
 void ui_helpline__vpush(const char *fmt, va_list ap);
 void ui_helpline__fpush(const char *fmt, ...);
 void ui_helpline__puts(const char *msg);
-int  ui_helpline__vshow(const char *fmt, va_list ap);
 
 extern char ui_helpline__current[512];
+
+#ifdef NEWT_SUPPORT
 extern char ui_helpline__last_msg[];
+int ui_helpline__show_help(const char *format, va_list ap);
+#else
+static inline int ui_helpline__show_help(const char *format __maybe_unused,
+					 va_list ap __maybe_unused)
+{
+	return 0;
+}
+#endif /* NEWT_SUPPORT */
+
+#ifdef GTK2_SUPPORT
+int perf_gtk__show_helpline(const char *format, va_list ap);
+#else
+static inline int perf_gtk__show_helpline(const char *format __maybe_unused,
+					  va_list ap __maybe_unused)
+{
+	return 0;
+}
+#endif /* GTK2_SUPPORT */
 
 #endif /* _PERF_UI_HELPLINE_H_ */
diff --git a/trunk/tools/perf/ui/hist.c b/trunk/tools/perf/ui/hist.c
index d671e63aa351..aa84130024d5 100644
--- a/trunk/tools/perf/ui/hist.c
+++ b/trunk/tools/perf/ui/hist.c
@@ -3,163 +3,151 @@
 #include "../util/hist.h"
 #include "../util/util.h"
 #include "../util/sort.h"
-#include "../util/evsel.h"
+
 
 /* hist period print (hpp) functions */
+static int hpp__header_overhead(struct perf_hpp *hpp)
+{
+	return scnprintf(hpp->buf, hpp->size, "Overhead");
+}
 
-typedef int (*hpp_snprint_fn)(char *buf, size_t size, const char *fmt, ...);
+static int hpp__width_overhead(struct perf_hpp *hpp __maybe_unused)
+{
+	return 8;
+}
 
-static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
-		      u64 (*get_field)(struct hist_entry *),
-		      const char *fmt, hpp_snprint_fn print_fn,
-		      bool fmt_percent)
+static int hpp__color_overhead(struct perf_hpp *hpp, struct hist_entry *he)
 {
-	int ret;
 	struct hists *hists = he->hists;
+	double percent = 100.0 * he->stat.period / hists->stats.total_period;
+
+	return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%%", percent);
+}
 
-	if (fmt_percent) {
-		double percent = 0.0;
+static int hpp__entry_overhead(struct perf_hpp *hpp, struct hist_entry *he)
+{
+	struct hists *hists = he->hists;
+	double percent = 100.0 * he->stat.period / hists->stats.total_period;
+	const char *fmt = symbol_conf.field_sep ? "%.2f" : " %6.2f%%";
 
-		if (hists->stats.total_period)
-			percent = 100.0 * get_field(he) /
-				  hists->stats.total_period;
+	return scnprintf(hpp->buf, hpp->size, fmt, percent);
+}
 
-		ret = print_fn(hpp->buf, hpp->size, fmt, percent);
-	} else
-		ret = print_fn(hpp->buf, hpp->size, fmt, get_field(he));
+static int hpp__header_overhead_sys(struct perf_hpp *hpp)
+{
+	const char *fmt = symbol_conf.field_sep ? "%s" : "%7s";
 
-	if (symbol_conf.event_group) {
-		int prev_idx, idx_delta;
-		struct perf_evsel *evsel = hists_to_evsel(hists);
-		struct hist_entry *pair;
-		int nr_members = evsel->nr_members;
+	return scnprintf(hpp->buf, hpp->size, fmt, "sys");
+}
 
-		if (nr_members <= 1)
-			return ret;
+static int hpp__width_overhead_sys(struct perf_hpp *hpp __maybe_unused)
+{
+	return 7;
+}
 
-		prev_idx = perf_evsel__group_idx(evsel);
+static int hpp__color_overhead_sys(struct perf_hpp *hpp, struct hist_entry *he)
+{
+	struct hists *hists = he->hists;
+	double percent = 100.0 * he->stat.period_sys / hists->stats.total_period;
 
-		list_for_each_entry(pair, &he->pairs.head, pairs.node) {
-			u64 period = get_field(pair);
-			u64 total = pair->hists->stats.total_period;
+	return percent_color_snprintf(hpp->buf, hpp->size, "%6.2f%%", percent);
+}
 
-			if (!total)
-				continue;
+static int hpp__entry_overhead_sys(struct perf_hpp *hpp, struct hist_entry *he)
+{
+	struct hists *hists = he->hists;
+	double percent = 100.0 * he->stat.period_sys / hists->stats.total_period;
+	const char *fmt = symbol_conf.field_sep ? "%.2f" : "%6.2f%%";
 
-			evsel = hists_to_evsel(pair->hists);
-			idx_delta = perf_evsel__group_idx(evsel) - prev_idx - 1;
+	return scnprintf(hpp->buf, hpp->size, fmt, percent);
+}
 
-			while (idx_delta--) {
-				/*
-				 * zero-fill group members in the middle which
-				 * have no sample
-				 */
-				ret += print_fn(hpp->buf + ret, hpp->size - ret,
-						fmt, 0);
-			}
+static int hpp__header_overhead_us(struct perf_hpp *hpp)
+{
+	const char *fmt = symbol_conf.field_sep ? "%s" : "%7s";
 
-			if (fmt_percent)
-				ret += print_fn(hpp->buf + ret, hpp->size - ret,
-						fmt, 100.0 * period / total);
-			else
-				ret += print_fn(hpp->buf + ret, hpp->size - ret,
-						fmt, period);
+	return scnprintf(hpp->buf, hpp->size, fmt, "user");
+}
 
-			prev_idx = perf_evsel__group_idx(evsel);
-		}
+static int hpp__width_overhead_us(struct perf_hpp *hpp __maybe_unused)
+{
+	return 7;
+}
 
-		idx_delta = nr_members - prev_idx - 1;
+static int hpp__color_overhead_us(struct perf_hpp *hpp, struct hist_entry *he)
+{
+	struct hists *hists = he->hists;
+	double percent = 100.0 * he->stat.period_us / hists->stats.total_period;
 
-		while (idx_delta--) {
-			/*
-			 * zero-fill group members at last which have no sample
-			 */
-			ret += print_fn(hpp->buf + ret, hpp->size - ret,
-					fmt, 0);
-		}
-	}
-	return ret;
+	return percent_color_snprintf(hpp->buf, hpp->size, "%6.2f%%", percent);
 }
 
-#define __HPP_HEADER_FN(_type, _str, _min_width, _unit_width) 		\
-static int hpp__header_##_type(struct perf_hpp *hpp)			\
-{									\
-	int len = _min_width;						\
-									\
-	if (symbol_conf.event_group) {					\
-		struct perf_evsel *evsel = hpp->ptr;			\
-									\
-		len = max(len, evsel->nr_members * _unit_width);	\
-	}								\
-	return scnprintf(hpp->buf, hpp->size, "%*s", len, _str);	\
-}
-
-#define __HPP_WIDTH_FN(_type, _min_width, _unit_width) 			\
-static int hpp__width_##_type(struct perf_hpp *hpp __maybe_unused)	\
-{									\
-	int len = _min_width;						\
-									\
-	if (symbol_conf.event_group) {					\
-		struct perf_evsel *evsel = hpp->ptr;			\
-									\
-		len = max(len, evsel->nr_members * _unit_width);	\
-	}								\
-	return len;							\
-}
-
-#define __HPP_COLOR_PERCENT_FN(_type, _field)					\
-static u64 he_get_##_field(struct hist_entry *he)				\
-{										\
-	return he->stat._field;							\
-}										\
-										\
-static int hpp__color_##_type(struct perf_hpp *hpp, struct hist_entry *he) 	\
-{										\
-	return __hpp__fmt(hpp, he, he_get_##_field, " %6.2f%%",			\
-			  (hpp_snprint_fn)percent_color_snprintf, true);	\
-}
-
-#define __HPP_ENTRY_PERCENT_FN(_type, _field)					\
-static int hpp__entry_##_type(struct perf_hpp *hpp, struct hist_entry *he) 	\
-{										\
-	const char *fmt = symbol_conf.field_sep ? " %.2f" : " %6.2f%%";		\
-	return __hpp__fmt(hpp, he, he_get_##_field, fmt,			\
-			  scnprintf, true);					\
-}
-
-#define __HPP_ENTRY_RAW_FN(_type, _field)					\
-static u64 he_get_raw_##_field(struct hist_entry *he)				\
-{										\
-	return he->stat._field;							\
-}										\
-										\
-static int hpp__entry_##_type(struct perf_hpp *hpp, struct hist_entry *he) 	\
-{										\
-	const char *fmt = symbol_conf.field_sep ? " %"PRIu64 : " %11"PRIu64;	\
-	return __hpp__fmt(hpp, he, he_get_raw_##_field, fmt, scnprintf, false);	\
-}
-
-#define HPP_PERCENT_FNS(_type, _str, _field, _min_width, _unit_width)	\
-__HPP_HEADER_FN(_type, _str, _min_width, _unit_width)			\
-__HPP_WIDTH_FN(_type, _min_width, _unit_width)				\
-__HPP_COLOR_PERCENT_FN(_type, _field)					\
-__HPP_ENTRY_PERCENT_FN(_type, _field)
-
-#define HPP_RAW_FNS(_type, _str, _field, _min_width, _unit_width)	\
-__HPP_HEADER_FN(_type, _str, _min_width, _unit_width)			\
-__HPP_WIDTH_FN(_type, _min_width, _unit_width)				\
-__HPP_ENTRY_RAW_FN(_type, _field)
-
-
-HPP_PERCENT_FNS(overhead, "Overhead", period, 8, 8)
-HPP_PERCENT_FNS(overhead_sys, "sys", period_sys, 8, 8)
-HPP_PERCENT_FNS(overhead_us, "usr", period_us, 8, 8)
-HPP_PERCENT_FNS(overhead_guest_sys, "guest sys", period_guest_sys, 9, 8)
-HPP_PERCENT_FNS(overhead_guest_us, "guest usr", period_guest_us, 9, 8)
-
-HPP_RAW_FNS(samples, "Samples", nr_events, 12, 12)
-HPP_RAW_FNS(period, "Period", period, 12, 12)
+static int hpp__entry_overhead_us(struct perf_hpp *hpp, struct hist_entry *he)
+{
+	struct hists *hists = he->hists;
+	double percent = 100.0 * he->stat.period_us / hists->stats.total_period;
+	const char *fmt = symbol_conf.field_sep ? "%.2f" : "%6.2f%%";
+
+	return scnprintf(hpp->buf, hpp->size, fmt, percent);
+}
+
+static int hpp__header_overhead_guest_sys(struct perf_hpp *hpp)
+{
+	return scnprintf(hpp->buf, hpp->size, "guest sys");
+}
+
+static int hpp__width_overhead_guest_sys(struct perf_hpp *hpp __maybe_unused)
+{
+	return 9;
+}
+
+static int hpp__color_overhead_guest_sys(struct perf_hpp *hpp,
+					 struct hist_entry *he)
+{
+	struct hists *hists = he->hists;
+	double percent = 100.0 * he->stat.period_guest_sys / hists->stats.total_period;
+
+	return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%% ", percent);
+}
+
+static int hpp__entry_overhead_guest_sys(struct perf_hpp *hpp,
+					 struct hist_entry *he)
+{
+	struct hists *hists = he->hists;
+	double percent = 100.0 * he->stat.period_guest_sys / hists->stats.total_period;
+	const char *fmt = symbol_conf.field_sep ? "%.2f" : " %6.2f%% ";
 
+	return scnprintf(hpp->buf, hpp->size, fmt, percent);
+}
+
+static int hpp__header_overhead_guest_us(struct perf_hpp *hpp)
+{
+	return scnprintf(hpp->buf, hpp->size, "guest usr");
+}
+
+static int hpp__width_overhead_guest_us(struct perf_hpp *hpp __maybe_unused)
+{
+	return 9;
+}
+
+static int hpp__color_overhead_guest_us(struct perf_hpp *hpp,
+					struct hist_entry *he)
+{
+	struct hists *hists = he->hists;
+	double percent = 100.0 * he->stat.period_guest_us / hists->stats.total_period;
+
+	return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%% ", percent);
+}
+
+static int hpp__entry_overhead_guest_us(struct perf_hpp *hpp,
+					struct hist_entry *he)
+{
+	struct hists *hists = he->hists;
+	double percent = 100.0 * he->stat.period_guest_us / hists->stats.total_period;
+	const char *fmt = symbol_conf.field_sep ? "%.2f" : " %6.2f%% ";
+
+	return scnprintf(hpp->buf, hpp->size, fmt, percent);
+}
 
 static int hpp__header_baseline(struct perf_hpp *hpp)
 {
@@ -191,7 +179,7 @@ static int hpp__color_baseline(struct perf_hpp *hpp, struct hist_entry *he)
 {
 	double percent = baseline_percent(he);
 
-	if (hist_entry__has_pairs(he) || symbol_conf.field_sep)
+	if (hist_entry__has_pairs(he))
 		return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%%", percent);
 	else
 		return scnprintf(hpp->buf, hpp->size, "        ");
@@ -208,6 +196,44 @@ static int hpp__entry_baseline(struct perf_hpp *hpp, struct hist_entry *he)
 		return scnprintf(hpp->buf, hpp->size, "            ");
 }
 
+static int hpp__header_samples(struct perf_hpp *hpp)
+{
+	const char *fmt = symbol_conf.field_sep ? "%s" : "%11s";
+
+	return scnprintf(hpp->buf, hpp->size, fmt, "Samples");
+}
+
+static int hpp__width_samples(struct perf_hpp *hpp __maybe_unused)
+{
+	return 11;
+}
+
+static int hpp__entry_samples(struct perf_hpp *hpp, struct hist_entry *he)
+{
+	const char *fmt = symbol_conf.field_sep ? "%" PRIu64 : "%11" PRIu64;
+
+	return scnprintf(hpp->buf, hpp->size, fmt, he->stat.nr_events);
+}
+
+static int hpp__header_period(struct perf_hpp *hpp)
+{
+	const char *fmt = symbol_conf.field_sep ? "%s" : "%12s";
+
+	return scnprintf(hpp->buf, hpp->size, fmt, "Period");
+}
+
+static int hpp__width_period(struct perf_hpp *hpp __maybe_unused)
+{
+	return 12;
+}
+
+static int hpp__entry_period(struct perf_hpp *hpp, struct hist_entry *he)
+{
+	const char *fmt = symbol_conf.field_sep ? "%" PRIu64 : "%12" PRIu64;
+
+	return scnprintf(hpp->buf, hpp->size, fmt, he->stat.period);
+}
+
 static int hpp__header_period_baseline(struct perf_hpp *hpp)
 {
 	const char *fmt = symbol_conf.field_sep ? "%s" : "%12s";
@@ -228,7 +254,6 @@ static int hpp__entry_period_baseline(struct perf_hpp *hpp, struct hist_entry *h
 
 	return scnprintf(hpp->buf, hpp->size, fmt, period);
 }
-
 static int hpp__header_delta(struct perf_hpp *hpp)
 {
 	const char *fmt = symbol_conf.field_sep ? "%s" : "%7s";
@@ -243,18 +268,14 @@ static int hpp__width_delta(struct perf_hpp *hpp __maybe_unused)
 
 static int hpp__entry_delta(struct perf_hpp *hpp, struct hist_entry *he)
 {
-	struct hist_entry *pair = hist_entry__next_pair(he);
 	const char *fmt = symbol_conf.field_sep ? "%s" : "%7.7s";
 	char buf[32] = " ";
-	double diff = 0.0;
+	double diff;
 
-	if (pair) {
-		if (he->diff.computed)
-			diff = he->diff.period_ratio_delta;
-		else
-			diff = perf_diff__compute_delta(he, pair);
-	} else
-		diff = perf_diff__period_percent(he, he->stat.period);
+	if (he->diff.computed)
+		diff = he->diff.period_ratio_delta;
+	else
+		diff = perf_diff__compute_delta(he);
 
 	if (fabs(diff) >= 0.01)
 		scnprintf(buf, sizeof(buf), "%+4.2F%%", diff);
@@ -276,17 +297,14 @@ static int hpp__width_ratio(struct perf_hpp *hpp __maybe_unused)
 
 static int hpp__entry_ratio(struct perf_hpp *hpp, struct hist_entry *he)
 {
-	struct hist_entry *pair = hist_entry__next_pair(he);
 	const char *fmt = symbol_conf.field_sep ? "%s" : "%14s";
 	char buf[32] = " ";
-	double ratio = 0.0;
+	double ratio;
 
-	if (pair) {
-		if (he->diff.computed)
-			ratio = he->diff.period_ratio;
-		else
-			ratio = perf_diff__compute_ratio(he, pair);
-	}
+	if (he->diff.computed)
+		ratio = he->diff.period_ratio;
+	else
+		ratio = perf_diff__compute_ratio(he);
 
 	if (ratio > 0.0)
 		scnprintf(buf, sizeof(buf), "%+14.6F", ratio);
@@ -308,17 +326,14 @@ static int hpp__width_wdiff(struct perf_hpp *hpp __maybe_unused)
 
 static int hpp__entry_wdiff(struct perf_hpp *hpp, struct hist_entry *he)
 {
-	struct hist_entry *pair = hist_entry__next_pair(he);
 	const char *fmt = symbol_conf.field_sep ? "%s" : "%14s";
 	char buf[32] = " ";
-	s64 wdiff = 0;
+	s64 wdiff;
 
-	if (pair) {
-		if (he->diff.computed)
-			wdiff = he->diff.wdiff;
-		else
-			wdiff = perf_diff__compute_wdiff(he, pair);
-	}
+	if (he->diff.computed)
+		wdiff = he->diff.wdiff;
+	else
+		wdiff = perf_diff__compute_wdiff(he);
 
 	if (wdiff != 0)
 		scnprintf(buf, sizeof(buf), "%14ld", wdiff);
@@ -326,6 +341,30 @@ static int hpp__entry_wdiff(struct perf_hpp *hpp, struct hist_entry *he)
 	return scnprintf(hpp->buf, hpp->size, fmt, buf);
 }
 
+static int hpp__header_displ(struct perf_hpp *hpp)
+{
+	return scnprintf(hpp->buf, hpp->size, "Displ.");
+}
+
+static int hpp__width_displ(struct perf_hpp *hpp __maybe_unused)
+{
+	return 6;
+}
+
+static int hpp__entry_displ(struct perf_hpp *hpp,
+			    struct hist_entry *he)
+{
+	struct hist_entry *pair = hist_entry__next_pair(he);
+	long displacement = pair ? pair->position - he->position : 0;
+	const char *fmt = symbol_conf.field_sep ? "%s" : "%6.6s";
+	char buf[32] = " ";
+
+	if (displacement)
+		scnprintf(buf, sizeof(buf), "%+4ld", displacement);
+
+	return scnprintf(hpp->buf, hpp->size, fmt, buf);
+}
+
 static int hpp__header_formula(struct perf_hpp *hpp)
 {
 	const char *fmt = symbol_conf.field_sep ? "%s" : "%70s";
@@ -340,91 +379,67 @@ static int hpp__width_formula(struct perf_hpp *hpp __maybe_unused)
 
 static int hpp__entry_formula(struct perf_hpp *hpp, struct hist_entry *he)
 {
-	struct hist_entry *pair = hist_entry__next_pair(he);
 	const char *fmt = symbol_conf.field_sep ? "%s" : "%-70s";
 	char buf[96] = " ";
 
-	if (pair)
-		perf_diff__formula(he, pair, buf, sizeof(buf));
-
+	perf_diff__formula(buf, sizeof(buf), he);
 	return scnprintf(hpp->buf, hpp->size, fmt, buf);
 }
 
-#define HPP__COLOR_PRINT_FNS(_name)			\
-	{						\
-		.header	= hpp__header_ ## _name,	\
-		.width	= hpp__width_ ## _name,		\
-		.color	= hpp__color_ ## _name,		\
-		.entry	= hpp__entry_ ## _name		\
-	}
+#define HPP__COLOR_PRINT_FNS(_name)		\
+	.header	= hpp__header_ ## _name,		\
+	.width	= hpp__width_ ## _name,		\
+	.color	= hpp__color_ ## _name,		\
+	.entry	= hpp__entry_ ## _name
 
-#define HPP__PRINT_FNS(_name)				\
-	{						\
-		.header	= hpp__header_ ## _name,	\
-		.width	= hpp__width_ ## _name,		\
-		.entry	= hpp__entry_ ## _name		\
-	}
+#define HPP__PRINT_FNS(_name)			\
+	.header	= hpp__header_ ## _name,		\
+	.width	= hpp__width_ ## _name,		\
+	.entry	= hpp__entry_ ## _name
 
 struct perf_hpp_fmt perf_hpp__format[] = {
-	HPP__COLOR_PRINT_FNS(baseline),
-	HPP__COLOR_PRINT_FNS(overhead),
-	HPP__COLOR_PRINT_FNS(overhead_sys),
-	HPP__COLOR_PRINT_FNS(overhead_us),
-	HPP__COLOR_PRINT_FNS(overhead_guest_sys),
-	HPP__COLOR_PRINT_FNS(overhead_guest_us),
-	HPP__PRINT_FNS(samples),
-	HPP__PRINT_FNS(period),
-	HPP__PRINT_FNS(period_baseline),
-	HPP__PRINT_FNS(delta),
-	HPP__PRINT_FNS(ratio),
-	HPP__PRINT_FNS(wdiff),
-	HPP__PRINT_FNS(formula)
+	{ .cond = false, HPP__COLOR_PRINT_FNS(baseline) },
+	{ .cond = true,  HPP__COLOR_PRINT_FNS(overhead) },
+	{ .cond = false, HPP__COLOR_PRINT_FNS(overhead_sys) },
+	{ .cond = false, HPP__COLOR_PRINT_FNS(overhead_us) },
+	{ .cond = false, HPP__COLOR_PRINT_FNS(overhead_guest_sys) },
+	{ .cond = false, HPP__COLOR_PRINT_FNS(overhead_guest_us) },
+	{ .cond = false, HPP__PRINT_FNS(samples) },
+	{ .cond = false, HPP__PRINT_FNS(period) },
+	{ .cond = false, HPP__PRINT_FNS(period_baseline) },
+	{ .cond = false, HPP__PRINT_FNS(delta) },
+	{ .cond = false, HPP__PRINT_FNS(ratio) },
+	{ .cond = false, HPP__PRINT_FNS(wdiff) },
+	{ .cond = false, HPP__PRINT_FNS(displ) },
+	{ .cond = false, HPP__PRINT_FNS(formula) }
 };
 
-LIST_HEAD(perf_hpp__list);
-
-
 #undef HPP__COLOR_PRINT_FNS
 #undef HPP__PRINT_FNS
 
-#undef HPP_PERCENT_FNS
-#undef HPP_RAW_FNS
-
-#undef __HPP_HEADER_FN
-#undef __HPP_WIDTH_FN
-#undef __HPP_COLOR_PERCENT_FN
-#undef __HPP_ENTRY_PERCENT_FN
-#undef __HPP_ENTRY_RAW_FN
-
-
 void perf_hpp__init(void)
 {
 	if (symbol_conf.show_cpu_utilization) {
-		perf_hpp__column_enable(PERF_HPP__OVERHEAD_SYS);
-		perf_hpp__column_enable(PERF_HPP__OVERHEAD_US);
+		perf_hpp__format[PERF_HPP__OVERHEAD_SYS].cond = true;
+		perf_hpp__format[PERF_HPP__OVERHEAD_US].cond = true;
 
 		if (perf_guest) {
-			perf_hpp__column_enable(PERF_HPP__OVERHEAD_GUEST_SYS);
-			perf_hpp__column_enable(PERF_HPP__OVERHEAD_GUEST_US);
+			perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_SYS].cond = true;
+			perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].cond = true;
 		}
 	}
 
 	if (symbol_conf.show_nr_samples)
-		perf_hpp__column_enable(PERF_HPP__SAMPLES);
+		perf_hpp__format[PERF_HPP__SAMPLES].cond = true;
 
 	if (symbol_conf.show_total_period)
-		perf_hpp__column_enable(PERF_HPP__PERIOD);
-}
-
-void perf_hpp__column_register(struct perf_hpp_fmt *format)
-{
-	list_add_tail(&format->list, &perf_hpp__list);
+		perf_hpp__format[PERF_HPP__PERIOD].cond = true;
 }
 
-void perf_hpp__column_enable(unsigned col)
+void perf_hpp__column_enable(unsigned col, bool enable)
 {
 	BUG_ON(col >= PERF_HPP__MAX_INDEX);
-	perf_hpp__column_register(&perf_hpp__format[col]);
+	perf_hpp__format[col].cond = enable;
 }
 
 static inline void advance_hpp(struct perf_hpp *hpp, int inc)
@@ -437,29 +452,27 @@ int hist_entry__period_snprintf(struct perf_hpp *hpp, struct hist_entry *he,
 				bool color)
 {
 	const char *sep = symbol_conf.field_sep;
-	struct perf_hpp_fmt *fmt;
 	char *start = hpp->buf;
-	int ret;
+	int i, ret;
 	bool first = true;
 
 	if (symbol_conf.exclude_other && !he->parent)
 		return 0;
 
-	perf_hpp__for_each_format(fmt) {
-		/*
-		 * If there's no field_sep, we still need
-		 * to display initial '  '.
-		 */
+	for (i = 0; i < PERF_HPP__MAX_INDEX; i++) {
+		if (!perf_hpp__format[i].cond)
+			continue;
+
 		if (!sep || !first) {
 			ret = scnprintf(hpp->buf, hpp->size, "%s", sep ?: "  ");
 			advance_hpp(hpp, ret);
-		} else
 			first = false;
+		}
 
-		if (color && fmt->color)
-			ret = fmt->color(hpp, he);
+		if (color && perf_hpp__format[i].color)
+			ret = perf_hpp__format[i].color(hpp, he);
 		else
-			ret = fmt->entry(hpp, he);
+			ret = perf_hpp__format[i].entry(hpp, he);
 
 		advance_hpp(hpp, ret);
 	}
@@ -491,18 +504,16 @@ int hist_entry__sort_snprintf(struct hist_entry *he, char *s, size_t size,
  */
 unsigned int hists__sort_list_width(struct hists *hists)
 {
-	struct perf_hpp_fmt *fmt;
 	struct sort_entry *se;
-	int i = 0, ret = 0;
-	struct perf_hpp dummy_hpp = {
-		.ptr	= hists_to_evsel(hists),
-	};
+	int i, ret = 0;
 
-	perf_hpp__for_each_format(fmt) {
+	for (i = 0; i < PERF_HPP__MAX_INDEX; i++) {
+		if (!perf_hpp__format[i].cond)
+			continue;
 		if (i)
 			ret += 2;
 
-		ret += fmt->width(&dummy_hpp);
+		ret += perf_hpp__format[i].width(NULL);
 	}
 
 	list_for_each_entry(se, &hist_entry__sort_list, list)
diff --git a/trunk/tools/perf/ui/keysyms.h b/trunk/tools/perf/ui/keysyms.h
index 65092d576b4e..809eca5707fa 100644
--- a/trunk/tools/perf/ui/keysyms.h
+++ b/trunk/tools/perf/ui/keysyms.h
@@ -23,6 +23,5 @@
 #define K_TIMER	 -1
 #define K_ERROR	 -2
 #define K_RESIZE -3
-#define K_SWITCH_INPUT_DATA -4
 
 #endif /* _PERF_KEYSYMS_H_ */
diff --git a/trunk/tools/perf/ui/setup.c b/trunk/tools/perf/ui/setup.c
index ae6a789cb0f6..ebb4cc107876 100644
--- a/trunk/tools/perf/ui/setup.c
+++ b/trunk/tools/perf/ui/setup.c
@@ -8,7 +8,7 @@ pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER;
 
 void setup_browser(bool fallback_to_pager)
 {
-	if (use_browser < 2 && (!isatty(1) || dump_trace))
+	if (!isatty(1) || dump_trace)
 		use_browser = 0;
 
 	/* default to TUI */
@@ -30,7 +30,6 @@ void setup_browser(bool fallback_to_pager)
 		if (fallback_to_pager)
 			setup_pager();
 
-		perf_hpp__column_enable(PERF_HPP__OVERHEAD);
 		perf_hpp__init();
 		break;
 	}
diff --git a/trunk/tools/perf/ui/stdio/hist.c b/trunk/tools/perf/ui/stdio/hist.c
index ff1f60cf442e..f0ee204f99bb 100644
--- a/trunk/tools/perf/ui/stdio/hist.c
+++ b/trunk/tools/perf/ui/stdio/hist.c
@@ -3,7 +3,6 @@
 #include "../../util/util.h"
 #include "../../util/hist.h"
 #include "../../util/sort.h"
-#include "../../util/evsel.h"
 
 
 static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin)
@@ -336,19 +335,17 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size,
 size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
 		      int max_cols, FILE *fp)
 {
-	struct perf_hpp_fmt *fmt;
 	struct sort_entry *se;
 	struct rb_node *nd;
 	size_t ret = 0;
 	unsigned int width;
 	const char *sep = symbol_conf.field_sep;
 	const char *col_width = symbol_conf.col_width_list_str;
-	int nr_rows = 0;
+	int idx, nr_rows = 0;
 	char bf[96];
 	struct perf_hpp dummy_hpp = {
 		.buf	= bf,
 		.size	= sizeof(bf),
-		.ptr	= hists_to_evsel(hists),
 	};
 	bool first = true;
 
@@ -358,14 +355,16 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
 		goto print_entries;
 
 	fprintf(fp, "# ");
+	for (idx = 0; idx < PERF_HPP__MAX_INDEX; idx++) {
+		if (!perf_hpp__format[idx].cond)
+			continue;
 
-	perf_hpp__for_each_format(fmt) {
 		if (!first)
 			fprintf(fp, "%s", sep ?: "  ");
 		else
 			first = false;
 
-		fmt->header(&dummy_hpp);
+		perf_hpp__format[idx].header(&dummy_hpp);
 		fprintf(fp, "%s", bf);
 	}
 
@@ -401,16 +400,18 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
 	first = true;
 
 	fprintf(fp, "# ");
-
-	perf_hpp__for_each_format(fmt) {
+	for (idx = 0; idx < PERF_HPP__MAX_INDEX; idx++) {
 		unsigned int i;
 
+		if (!perf_hpp__format[idx].cond)
+			continue;
+
 		if (!first)
 			fprintf(fp, "%s", sep ?: "  ");
 		else
 			first = false;
 
-		width = fmt->width(&dummy_hpp);
+		width = perf_hpp__format[idx].width(&dummy_hpp);
 		for (i = 0; i < width; i++)
 			fprintf(fp, ".");
 	}
@@ -461,7 +462,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
 	return ret;
 }
 
-size_t events_stats__fprintf(struct events_stats *stats, FILE *fp)
+size_t hists__fprintf_nr_events(struct hists *hists, FILE *fp)
 {
 	int i;
 	size_t ret = 0;
@@ -469,7 +470,7 @@ size_t events_stats__fprintf(struct events_stats *stats, FILE *fp)
 	for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) {
 		const char *name;
 
-		if (stats->nr_events[i] == 0)
+		if (hists->stats.nr_events[i] == 0)
 			continue;
 
 		name = perf_event__name(i);
@@ -477,7 +478,7 @@ size_t events_stats__fprintf(struct events_stats *stats, FILE *fp)
 			continue;
 
 		ret += fprintf(fp, "%16s events: %10d\n", name,
-			       stats->nr_events[i]);
+			       hists->stats.nr_events[i]);
 	}
 
 	return ret;
diff --git a/trunk/tools/perf/ui/tui/helpline.c b/trunk/tools/perf/ui/tui/helpline.c
index 1c8b9afd5d6e..2884d2f41e33 100644
--- a/trunk/tools/perf/ui/tui/helpline.c
+++ b/trunk/tools/perf/ui/tui/helpline.c
@@ -8,8 +8,6 @@
 #include "../ui.h"
 #include "../libslang.h"
 
-char ui_helpline__last_msg[1024];
-
 static void tui_helpline__pop(void)
 {
 }
@@ -25,7 +23,20 @@ static void tui_helpline__push(const char *msg)
 	strncpy(ui_helpline__current, msg, sz)[sz - 1] = '\0';
 }
 
-static int tui_helpline__show(const char *format, va_list ap)
+struct ui_helpline tui_helpline_fns = {
+	.pop	= tui_helpline__pop,
+	.push	= tui_helpline__push,
+};
+
+void ui_helpline__init(void)
+{
+	helpline_fns = &tui_helpline_fns;
+	ui_helpline__puts(" ");
+}
+
+char ui_helpline__last_msg[1024];
+
+int ui_helpline__show_help(const char *format, va_list ap)
 {
 	int ret;
 	static int backlog;
@@ -44,15 +55,3 @@ static int tui_helpline__show(const char *format, va_list ap)
 
 	return ret;
 }
-
-struct ui_helpline tui_helpline_fns = {
-	.pop	= tui_helpline__pop,
-	.push	= tui_helpline__push,
-	.show	= tui_helpline__show,
-};
-
-void ui_helpline__init(void)
-{
-	helpline_fns = &tui_helpline_fns;
-	ui_helpline__puts(" ");
-}
diff --git a/trunk/tools/perf/ui/util.c b/trunk/tools/perf/ui/util.c
index e3e0a963d03a..4f989774c8c6 100644
--- a/trunk/tools/perf/ui/util.c
+++ b/trunk/tools/perf/ui/util.c
@@ -52,6 +52,7 @@ int ui__warning(const char *format, ...)
 	return ret;
 }
 
+
 /**
  * perf_error__register - Register error logging functions
  * @eops: The pointer to error logging function struct
diff --git a/trunk/tools/perf/util/PERF-VERSION-GEN b/trunk/tools/perf/util/PERF-VERSION-GEN
index 055fef34b6f6..6aa34e5afdcf 100755
--- a/trunk/tools/perf/util/PERF-VERSION-GEN
+++ b/trunk/tools/perf/util/PERF-VERSION-GEN
@@ -26,13 +26,13 @@ VN=$(expr "$VN" : v*'\(.*\)')
 
 if test -r $GVF
 then
-	VC=$(sed -e 's/^#define PERF_VERSION "\(.*\)"/\1/' <$GVF)
+	VC=$(sed -e 's/^PERF_VERSION = //' <$GVF)
 else
 	VC=unset
 fi
 test "$VN" = "$VC" || {
 	echo >&2 "PERF_VERSION = $VN"
-	echo "#define PERF_VERSION \"$VN\"" >$GVF
+	echo "PERF_VERSION = $VN" >$GVF
 }
 
 
diff --git a/trunk/tools/perf/util/annotate.c b/trunk/tools/perf/util/annotate.c
index d33fe937e6f1..07aaeea60000 100644
--- a/trunk/tools/perf/util/annotate.c
+++ b/trunk/tools/perf/util/annotate.c
@@ -809,7 +809,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize)
 		pr_err("Can't annotate %s:\n\n"
 		       "No vmlinux file%s\nwas found in the path.\n\n"
 		       "Please use:\n\n"
-		       "  perf buildid-cache -vu vmlinux\n\n"
+		       "  perf buildid-cache -av vmlinux\n\n"
 		       "or:\n\n"
 		       "  --vmlinux vmlinux\n",
 		       sym->name, build_id_msg ?: "");
diff --git a/trunk/tools/perf/util/annotate.h b/trunk/tools/perf/util/annotate.h
index c422440fe611..8eec94358a4a 100644
--- a/trunk/tools/perf/util/annotate.h
+++ b/trunk/tools/perf/util/annotate.h
@@ -6,7 +6,6 @@
 #include "types.h"
 #include "symbol.h"
 #include "hist.h"
-#include "sort.h"
 #include <linux/list.h>
 #include <linux/rbtree.h>
 #include <pthread.h>
@@ -155,29 +154,6 @@ static inline int symbol__tui_annotate(struct symbol *sym __maybe_unused,
 }
 #endif
 
-#ifdef GTK2_SUPPORT
-int symbol__gtk_annotate(struct symbol *sym, struct map *map, int evidx,
-			 struct hist_browser_timer *hbt);
-
-static inline int hist_entry__gtk_annotate(struct hist_entry *he, int evidx,
-					   struct hist_browser_timer *hbt)
-{
-	return symbol__gtk_annotate(he->ms.sym, he->ms.map, evidx, hbt);
-}
-
-void perf_gtk__show_annotations(void);
-#else
-static inline int hist_entry__gtk_annotate(struct hist_entry *he __maybe_unused,
-					   int evidx __maybe_unused,
-					   struct hist_browser_timer *hbt
-					   __maybe_unused)
-{
-	return 0;
-}
-
-static inline void perf_gtk__show_annotations(void) {}
-#endif
-
 extern const char	*disassembler_style;
 
 #endif	/* __PERF_ANNOTATE_H */
diff --git a/trunk/tools/perf/util/callchain.c b/trunk/tools/perf/util/callchain.c
index 42b6a632fe7b..d3b3f5d82137 100644
--- a/trunk/tools/perf/util/callchain.c
+++ b/trunk/tools/perf/util/callchain.c
@@ -444,7 +444,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
 	struct callchain_cursor_node *node = *cursor->last;
 
 	if (!node) {
-		node = calloc(1, sizeof(*node));
+		node = calloc(sizeof(*node), 1);
 		if (!node)
 			return -ENOMEM;
 
diff --git a/trunk/tools/perf/util/callchain.h b/trunk/tools/perf/util/callchain.h
index 3ee9f67d5af0..eb340571e7d6 100644
--- a/trunk/tools/perf/util/callchain.h
+++ b/trunk/tools/perf/util/callchain.h
@@ -143,9 +143,4 @@ static inline void callchain_cursor_advance(struct callchain_cursor *cursor)
 	cursor->curr = cursor->curr->next;
 	cursor->pos++;
 }
-
-struct option;
-
-int record_parse_callchain_opt(const struct option *opt, const char *arg, int unset);
-extern const char record_callchain_help[];
 #endif	/* __PERF_CALLCHAIN_H */
diff --git a/trunk/tools/perf/util/cpumap.c b/trunk/tools/perf/util/cpumap.c
index f817046e22b1..2b32ffa9ebdb 100644
--- a/trunk/tools/perf/util/cpumap.c
+++ b/trunk/tools/perf/util/cpumap.c
@@ -1,5 +1,4 @@
 #include "util.h"
-#include "sysfs.h"
 #include "../perf.h"
 #include "cpumap.h"
 #include <assert.h>
@@ -202,56 +201,3 @@ void cpu_map__delete(struct cpu_map *map)
 {
 	free(map);
 }
-
-int cpu_map__get_socket(struct cpu_map *map, int idx)
-{
-	FILE *fp;
-	const char *mnt;
-	char path[PATH_MAX];
-	int cpu, ret;
-
-	if (idx > map->nr)
-		return -1;
-
-	cpu = map->map[idx];
-
-	mnt = sysfs_find_mountpoint();
-	if (!mnt)
-		return -1;
-
-	sprintf(path,
-		"%s/devices/system/cpu/cpu%d/topology/physical_package_id",
-		mnt, cpu);
-
-	fp = fopen(path, "r");
-	if (!fp)
-		return -1;
-	ret = fscanf(fp, "%d", &cpu);
-	fclose(fp);
-	return ret == 1 ? cpu : -1;
-}
-
-int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp)
-{
-	struct cpu_map *sock;
-	int nr = cpus->nr;
-	int cpu, s1, s2;
-
-	sock = calloc(1, sizeof(*sock) + nr * sizeof(int));
-	if (!sock)
-		return -1;
-
-	for (cpu = 0; cpu < nr; cpu++) {
-		s1 = cpu_map__get_socket(cpus, cpu);
-		for (s2 = 0; s2 < sock->nr; s2++) {
-			if (s1 == sock->map[s2])
-				break;
-		}
-		if (s2 == sock->nr) {
-			sock->map[sock->nr] = s1;
-			sock->nr++;
-		}
-	}
-	*sockp = sock;
-	return 0;
-}
diff --git a/trunk/tools/perf/util/cpumap.h b/trunk/tools/perf/util/cpumap.h
index 161b00756a12..2f68a3b8c285 100644
--- a/trunk/tools/perf/util/cpumap.h
+++ b/trunk/tools/perf/util/cpumap.h
@@ -14,15 +14,6 @@ struct cpu_map *cpu_map__dummy_new(void);
 void cpu_map__delete(struct cpu_map *map);
 struct cpu_map *cpu_map__read(FILE *file);
 size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
-int cpu_map__get_socket(struct cpu_map *map, int idx);
-int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp);
-
-static inline int cpu_map__socket(struct cpu_map *sock, int s)
-{
-	if (!sock || s > sock->nr || s < 0)
-		return 0;
-	return sock->map[s];
-}
 
 static inline int cpu_map__nr(const struct cpu_map *map)
 {
diff --git a/trunk/tools/perf/util/debug.c b/trunk/tools/perf/util/debug.c
index 399e74c34c1a..03f830b48148 100644
--- a/trunk/tools/perf/util/debug.c
+++ b/trunk/tools/perf/util/debug.c
@@ -23,8 +23,10 @@ int eprintf(int level, const char *fmt, ...)
 
 	if (verbose >= level) {
 		va_start(args, fmt);
-		if (use_browser >= 1)
-			ui_helpline__vshow(fmt, args);
+		if (use_browser == 1)
+			ret = ui_helpline__show_help(fmt, args);
+		else if (use_browser == 2)
+			ret = perf_gtk__show_helpline(fmt, args);
 		else
 			ret = vfprintf(stderr, fmt, args);
 		va_end(args);
@@ -47,6 +49,28 @@ int dump_printf(const char *fmt, ...)
 	return ret;
 }
 
+#if !defined(NEWT_SUPPORT) && !defined(GTK2_SUPPORT)
+int ui__warning(const char *format, ...)
+{
+	va_list args;
+
+	va_start(args, format);
+	vfprintf(stderr, format, args);
+	va_end(args);
+	return 0;
+}
+#endif
+
+int ui__error_paranoid(void)
+{
+	return ui__error("Permission error - are you root?\n"
+		    "Consider tweaking /proc/sys/kernel/perf_event_paranoid:\n"
+		    " -1 - Not paranoid at all\n"
+		    "  0 - Disallow raw tracepoint access for unpriv\n"
+		    "  1 - Disallow cpu events for unpriv\n"
+		    "  2 - Disallow kernel profiling for unpriv\n");
+}
+
 void trace_event(union perf_event *event)
 {
 	unsigned char *raw_event = (void *)event;
diff --git a/trunk/tools/perf/util/debug.h b/trunk/tools/perf/util/debug.h
index efbd98805ad0..83e8d234af6b 100644
--- a/trunk/tools/perf/util/debug.h
+++ b/trunk/tools/perf/util/debug.h
@@ -5,8 +5,6 @@
 #include <stdbool.h>
 #include "event.h"
 #include "../ui/helpline.h"
-#include "../ui/progress.h"
-#include "../ui/util.h"
 
 extern int verbose;
 extern bool quiet, dump_trace;
@@ -14,7 +12,39 @@ extern bool quiet, dump_trace;
 int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
 void trace_event(union perf_event *event);
 
+struct ui_progress;
+struct perf_error_ops;
+
+#if defined(NEWT_SUPPORT) || defined(GTK2_SUPPORT)
+
+#include "../ui/progress.h"
 int ui__error(const char *format, ...) __attribute__((format(printf, 1, 2)));
+#include "../ui/util.h"
+
+#else
+
+static inline void ui_progress__update(u64 curr __maybe_unused,
+				       u64 total __maybe_unused,
+				       const char *title __maybe_unused) {}
+static inline void ui_progress__finish(void) {}
+
+#define ui__error(format, arg...) ui__warning(format, ##arg)
+
+static inline int
+perf_error__register(struct perf_error_ops *eops __maybe_unused)
+{
+	return 0;
+}
+
+static inline int
+perf_error__unregister(struct perf_error_ops *eops __maybe_unused)
+{
+	return 0;
+}
+
+#endif /* NEWT_SUPPORT || GTK2_SUPPORT */
+
 int ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2)));
+int ui__error_paranoid(void);
 
 #endif	/* __PERF_DEBUG_H */
diff --git a/trunk/tools/perf/util/dso.c b/trunk/tools/perf/util/dso.c
index 6f7d5a9d6b05..d6d9a465acdb 100644
--- a/trunk/tools/perf/util/dso.c
+++ b/trunk/tools/perf/util/dso.c
@@ -539,13 +539,13 @@ struct dso *__dsos__findnew(struct list_head *head, const char *name)
 }
 
 size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
-			       bool (skip)(struct dso *dso, int parm), int parm)
+			       bool with_hits)
 {
 	struct dso *pos;
 	size_t ret = 0;
 
 	list_for_each_entry(pos, head, node) {
-		if (skip && skip(pos, parm))
+		if (with_hits && !pos->hit)
 			continue;
 		ret += dso__fprintf_buildid(pos, fp);
 		ret += fprintf(fp, " %s\n", pos->long_name);
@@ -583,7 +583,7 @@ size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp)
 	if (dso->short_name != dso->long_name)
 		ret += fprintf(fp, "%s, ", dso->long_name);
 	ret += fprintf(fp, "%s, %sloaded, ", map_type__name[type],
-		       dso__loaded(dso, type) ? "" : "NOT ");
+		       dso->loaded ? "" : "NOT ");
 	ret += dso__fprintf_buildid(dso, fp);
 	ret += fprintf(fp, ")\n");
 	for (nd = rb_first(&dso->symbols[type]); nd; nd = rb_next(nd)) {
diff --git a/trunk/tools/perf/util/dso.h b/trunk/tools/perf/util/dso.h
index 450199ab51b5..e03276940b99 100644
--- a/trunk/tools/perf/util/dso.h
+++ b/trunk/tools/perf/util/dso.h
@@ -138,7 +138,7 @@ struct dso *__dsos__findnew(struct list_head *head, const char *name);
 bool __dsos__read_build_ids(struct list_head *head, bool with_hits);
 
 size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
-			       bool (skip)(struct dso *dso, int parm), int parm);
+			       bool with_hits);
 size_t __dsos__fprintf(struct list_head *head, FILE *fp);
 
 size_t dso__fprintf_buildid(struct dso *dso, FILE *fp);
diff --git a/trunk/tools/perf/util/event.c b/trunk/tools/perf/util/event.c
index 5cd13d768cec..3cf2c3e0605f 100644
--- a/trunk/tools/perf/util/event.c
+++ b/trunk/tools/perf/util/event.c
@@ -476,10 +476,8 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
 		}
 	}
 
-	if (kallsyms__parse(filename, &args, find_symbol_cb) <= 0) {
-		free(event);
+	if (kallsyms__parse(filename, &args, find_symbol_cb) <= 0)
 		return -ENOENT;
-	}
 
 	map = machine->vmlinux_maps[MAP__FUNCTION];
 	size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
diff --git a/trunk/tools/perf/util/evlist.c b/trunk/tools/perf/util/evlist.c
index bc4ad7977438..705293489e3c 100644
--- a/trunk/tools/perf/util/evlist.c
+++ b/trunk/tools/perf/util/evlist.c
@@ -49,16 +49,10 @@ struct perf_evlist *perf_evlist__new(struct cpu_map *cpus,
 	return evlist;
 }
 
-void perf_evlist__config(struct perf_evlist *evlist,
-			struct perf_record_opts *opts)
+void perf_evlist__config_attrs(struct perf_evlist *evlist,
+			       struct perf_record_opts *opts)
 {
 	struct perf_evsel *evsel;
-	/*
-	 * Set the evsel leader links before we configure attributes,
-	 * since some might depend on this info.
-	 */
-	if (opts->group)
-		perf_evlist__set_leader(evlist);
 
 	if (evlist->cpus->map[0] < 0)
 		opts->no_inherit = true;
@@ -67,7 +61,7 @@ void perf_evlist__config(struct perf_evlist *evlist,
 		perf_evsel__config(evsel, opts);
 
 		if (evlist->nr_entries > 1)
-			perf_evsel__set_sample_id(evsel);
+			evsel->attr.sample_type |= PERF_SAMPLE_ID;
 	}
 }
 
@@ -117,21 +111,18 @@ void __perf_evlist__set_leader(struct list_head *list)
 	struct perf_evsel *evsel, *leader;
 
 	leader = list_entry(list->next, struct perf_evsel, node);
-	evsel = list_entry(list->prev, struct perf_evsel, node);
-
-	leader->nr_members = evsel->idx - leader->idx + 1;
+	leader->leader = NULL;
 
 	list_for_each_entry(evsel, list, node) {
-		evsel->leader = leader;
+		if (evsel != leader)
+			evsel->leader = leader;
 	}
 }
 
 void perf_evlist__set_leader(struct perf_evlist *evlist)
 {
-	if (evlist->nr_entries) {
-		evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0;
+	if (evlist->nr_entries)
 		__perf_evlist__set_leader(&evlist->entries);
-	}
 }
 
 int perf_evlist__add_default(struct perf_evlist *evlist)
@@ -231,7 +222,7 @@ void perf_evlist__disable(struct perf_evlist *evlist)
 
 	for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
 		list_for_each_entry(pos, &evlist->entries, node) {
-			if (!perf_evsel__is_group_leader(pos))
+			if (perf_evsel__is_group_member(pos))
 				continue;
 			for (thread = 0; thread < evlist->threads->nr; thread++)
 				ioctl(FD(pos, cpu, thread),
@@ -247,7 +238,7 @@ void perf_evlist__enable(struct perf_evlist *evlist)
 
 	for (cpu = 0; cpu < cpu_map__nr(evlist->cpus); cpu++) {
 		list_for_each_entry(pos, &evlist->entries, node) {
-			if (!perf_evsel__is_group_leader(pos))
+			if (perf_evsel__is_group_member(pos))
 				continue;
 			for (thread = 0; thread < evlist->threads->nr; thread++)
 				ioctl(FD(pos, cpu, thread),
@@ -375,7 +366,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
 		if ((old & md->mask) + size != ((old + size) & md->mask)) {
 			unsigned int offset = old;
 			unsigned int len = min(sizeof(*event), size), cpy;
-			void *dst = &md->event_copy;
+			void *dst = &evlist->event_copy;
 
 			do {
 				cpy = min(md->mask + 1 - (offset & md->mask), len);
@@ -385,7 +376,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
 				len -= cpy;
 			} while (len);
 
-			event = &md->event_copy;
+			event = &evlist->event_copy;
 		}
 
 		old += size;
diff --git a/trunk/tools/perf/util/evlist.h b/trunk/tools/perf/util/evlist.h
index 2dd07bd60b4f..56003f779e60 100644
--- a/trunk/tools/perf/util/evlist.h
+++ b/trunk/tools/perf/util/evlist.h
@@ -17,18 +17,10 @@ struct perf_record_opts;
 #define PERF_EVLIST__HLIST_BITS 8
 #define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS)
 
-struct perf_mmap {
-	void		 *base;
-	int		 mask;
-	unsigned int	 prev;
-	union perf_event event_copy;
-};
-
 struct perf_evlist {
 	struct list_head entries;
 	struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
 	int		 nr_entries;
-	int		 nr_groups;
 	int		 nr_fds;
 	int		 nr_mmaps;
 	int		 mmap_len;
@@ -37,6 +29,7 @@ struct perf_evlist {
 		pid_t	pid;
 	} workload;
 	bool		 overwrite;
+	union perf_event event_copy;
 	struct perf_mmap *mmap;
 	struct pollfd	 *pollfd;
 	struct thread_map *threads;
@@ -83,8 +76,8 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *self, int idx);
 
 int perf_evlist__open(struct perf_evlist *evlist);
 
-void perf_evlist__config(struct perf_evlist *evlist,
-			 struct perf_record_opts *opts);
+void perf_evlist__config_attrs(struct perf_evlist *evlist,
+			       struct perf_record_opts *opts);
 
 int perf_evlist__prepare_workload(struct perf_evlist *evlist,
 				  struct perf_record_opts *opts,
@@ -142,25 +135,4 @@ static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist)
 }
 
 size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp);
-
-static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm)
-{
-	struct perf_event_mmap_page *pc = mm->base;
-	int head = pc->data_head;
-	rmb();
-	return head;
-}
-
-static inline void perf_mmap__write_tail(struct perf_mmap *md,
-					 unsigned long tail)
-{
-	struct perf_event_mmap_page *pc = md->base;
-
-	/*
-	 * ensure all reads are done before we write the tail out.
-	 */
-	/* mb(); */
-	pc->data_tail = tail;
-}
-
 #endif /* __PERF_EVLIST_H */
diff --git a/trunk/tools/perf/util/evsel.c b/trunk/tools/perf/util/evsel.c
index 9c82f98f26de..1b16dd1edc8e 100644
--- a/trunk/tools/perf/util/evsel.c
+++ b/trunk/tools/perf/util/evsel.c
@@ -22,11 +22,6 @@
 #include <linux/perf_event.h>
 #include "perf_regs.h"
 
-static struct {
-	bool sample_id_all;
-	bool exclude_guest;
-} perf_missing_features;
-
 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
 
 static int __perf_evsel__sample_size(u64 sample_type)
@@ -55,36 +50,11 @@ void hists__init(struct hists *hists)
 	pthread_mutex_init(&hists->lock, NULL);
 }
 
-void __perf_evsel__set_sample_bit(struct perf_evsel *evsel,
-				  enum perf_event_sample_format bit)
-{
-	if (!(evsel->attr.sample_type & bit)) {
-		evsel->attr.sample_type |= bit;
-		evsel->sample_size += sizeof(u64);
-	}
-}
-
-void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel,
-				    enum perf_event_sample_format bit)
-{
-	if (evsel->attr.sample_type & bit) {
-		evsel->attr.sample_type &= ~bit;
-		evsel->sample_size -= sizeof(u64);
-	}
-}
-
-void perf_evsel__set_sample_id(struct perf_evsel *evsel)
-{
-	perf_evsel__set_sample_bit(evsel, ID);
-	evsel->attr.read_format |= PERF_FORMAT_ID;
-}
-
 void perf_evsel__init(struct perf_evsel *evsel,
 		      struct perf_event_attr *attr, int idx)
 {
 	evsel->idx	   = idx;
 	evsel->attr	   = *attr;
-	evsel->leader	   = evsel;
 	INIT_LIST_HEAD(&evsel->node);
 	hists__init(&evsel->hists);
 	evsel->sample_size = __perf_evsel__sample_size(attr->sample_type);
@@ -434,31 +404,6 @@ const char *perf_evsel__name(struct perf_evsel *evsel)
 	return evsel->name ?: "unknown";
 }
 
-const char *perf_evsel__group_name(struct perf_evsel *evsel)
-{
-	return evsel->group_name ?: "anon group";
-}
-
-int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size)
-{
-	int ret;
-	struct perf_evsel *pos;
-	const char *group_name = perf_evsel__group_name(evsel);
-
-	ret = scnprintf(buf, size, "%s", group_name);
-
-	ret += scnprintf(buf + ret, size - ret, " { %s",
-			 perf_evsel__name(evsel));
-
-	for_each_group_member(pos, evsel)
-		ret += scnprintf(buf + ret, size - ret, ", %s",
-				 perf_evsel__name(pos));
-
-	ret += scnprintf(buf + ret, size - ret, " }");
-
-	return ret;
-}
-
 /*
  * The enable_on_exec/disabled value strategy:
  *
@@ -493,11 +438,13 @@ void perf_evsel__config(struct perf_evsel *evsel,
 	struct perf_event_attr *attr = &evsel->attr;
 	int track = !evsel->idx; /* only the first counter needs these */
 
-	attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1;
+	attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
 	attr->inherit	    = !opts->no_inherit;
+	attr->read_format   = PERF_FORMAT_TOTAL_TIME_ENABLED |
+			      PERF_FORMAT_TOTAL_TIME_RUNNING |
+			      PERF_FORMAT_ID;
 
-	perf_evsel__set_sample_bit(evsel, IP);
-	perf_evsel__set_sample_bit(evsel, TID);
+	attr->sample_type  |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
 
 	/*
 	 * We default some events to a 1 default interval. But keep
@@ -506,7 +453,7 @@ void perf_evsel__config(struct perf_evsel *evsel,
 	if (!attr->sample_period || (opts->user_freq != UINT_MAX &&
 				     opts->user_interval != ULLONG_MAX)) {
 		if (opts->freq) {
-			perf_evsel__set_sample_bit(evsel, PERIOD);
+			attr->sample_type	|= PERF_SAMPLE_PERIOD;
 			attr->freq		= 1;
 			attr->sample_freq	= opts->freq;
 		} else {
@@ -521,16 +468,16 @@ void perf_evsel__config(struct perf_evsel *evsel,
 		attr->inherit_stat = 1;
 
 	if (opts->sample_address) {
-		perf_evsel__set_sample_bit(evsel, ADDR);
+		attr->sample_type	|= PERF_SAMPLE_ADDR;
 		attr->mmap_data = track;
 	}
 
 	if (opts->call_graph) {
-		perf_evsel__set_sample_bit(evsel, CALLCHAIN);
+		attr->sample_type	|= PERF_SAMPLE_CALLCHAIN;
 
 		if (opts->call_graph == CALLCHAIN_DWARF) {
-			perf_evsel__set_sample_bit(evsel, REGS_USER);
-			perf_evsel__set_sample_bit(evsel, STACK_USER);
+			attr->sample_type |= PERF_SAMPLE_REGS_USER |
+					     PERF_SAMPLE_STACK_USER;
 			attr->sample_regs_user = PERF_REGS_MASK;
 			attr->sample_stack_user = opts->stack_dump_size;
 			attr->exclude_callchain_user = 1;
@@ -538,20 +485,20 @@ void perf_evsel__config(struct perf_evsel *evsel,
 	}
 
 	if (perf_target__has_cpu(&opts->target))
-		perf_evsel__set_sample_bit(evsel, CPU);
+		attr->sample_type	|= PERF_SAMPLE_CPU;
 
 	if (opts->period)
-		perf_evsel__set_sample_bit(evsel, PERIOD);
+		attr->sample_type	|= PERF_SAMPLE_PERIOD;
 
-	if (!perf_missing_features.sample_id_all &&
+	if (!opts->sample_id_all_missing &&
 	    (opts->sample_time || !opts->no_inherit ||
 	     perf_target__has_cpu(&opts->target)))
-		perf_evsel__set_sample_bit(evsel, TIME);
+		attr->sample_type	|= PERF_SAMPLE_TIME;
 
 	if (opts->raw_samples) {
-		perf_evsel__set_sample_bit(evsel, TIME);
-		perf_evsel__set_sample_bit(evsel, RAW);
-		perf_evsel__set_sample_bit(evsel, CPU);
+		attr->sample_type	|= PERF_SAMPLE_TIME;
+		attr->sample_type	|= PERF_SAMPLE_RAW;
+		attr->sample_type	|= PERF_SAMPLE_CPU;
 	}
 
 	if (opts->no_delay) {
@@ -559,7 +506,7 @@ void perf_evsel__config(struct perf_evsel *evsel,
 		attr->wakeup_events = 1;
 	}
 	if (opts->branch_stack) {
-		perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
+		attr->sample_type	|= PERF_SAMPLE_BRANCH_STACK;
 		attr->branch_sample_type = opts->branch_stack;
 	}
 
@@ -572,14 +519,14 @@ void perf_evsel__config(struct perf_evsel *evsel,
 	 * Disabling only independent events or group leaders,
 	 * keeping group members enabled.
 	 */
-	if (perf_evsel__is_group_leader(evsel))
+	if (!perf_evsel__is_group_member(evsel))
 		attr->disabled = 1;
 
 	/*
 	 * Setting enable_on_exec for independent events and
 	 * group leaders for traced executed by perf.
 	 */
-	if (perf_target__none(&opts->target) && perf_evsel__is_group_leader(evsel))
+	if (perf_target__none(&opts->target) && !perf_evsel__is_group_member(evsel))
 		attr->enable_on_exec = 1;
 }
 
@@ -665,11 +612,6 @@ void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
 		}
 }
 
-void perf_evsel__free_counts(struct perf_evsel *evsel)
-{
-	free(evsel->counts);
-}
-
 void perf_evsel__exit(struct perf_evsel *evsel)
 {
 	assert(list_empty(&evsel->node));
@@ -689,28 +631,6 @@ void perf_evsel__delete(struct perf_evsel *evsel)
 	free(evsel);
 }
 
-static inline void compute_deltas(struct perf_evsel *evsel,
-				  int cpu,
-				  struct perf_counts_values *count)
-{
-	struct perf_counts_values tmp;
-
-	if (!evsel->prev_raw_counts)
-		return;
-
-	if (cpu == -1) {
-		tmp = evsel->prev_raw_counts->aggr;
-		evsel->prev_raw_counts->aggr = *count;
-	} else {
-		tmp = evsel->prev_raw_counts->cpu[cpu];
-		evsel->prev_raw_counts->cpu[cpu] = *count;
-	}
-
-	count->val = count->val - tmp.val;
-	count->ena = count->ena - tmp.ena;
-	count->run = count->run - tmp.run;
-}
-
 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
 			      int cpu, int thread, bool scale)
 {
@@ -726,8 +646,6 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
 	if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0)
 		return -errno;
 
-	compute_deltas(evsel, cpu, &count);
-
 	if (scale) {
 		if (count.run == 0)
 			count.val = 0;
@@ -766,8 +684,6 @@ int __perf_evsel__read(struct perf_evsel *evsel,
 		}
 	}
 
-	compute_deltas(evsel, -1, aggr);
-
 	evsel->counts->scaled = 0;
 	if (scale) {
 		if (aggr->run == 0) {
@@ -791,7 +707,7 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread)
 	struct perf_evsel *leader = evsel->leader;
 	int fd;
 
-	if (perf_evsel__is_group_leader(evsel))
+	if (!perf_evsel__is_group_member(evsel))
 		return -1;
 
 	/*
@@ -822,13 +738,6 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
 		pid = evsel->cgrp->fd;
 	}
 
-fallback_missing_features:
-	if (perf_missing_features.exclude_guest)
-		evsel->attr.exclude_guest = evsel->attr.exclude_host = 0;
-retry_sample_id:
-	if (perf_missing_features.sample_id_all)
-		evsel->attr.sample_id_all = 0;
-
 	for (cpu = 0; cpu < cpus->nr; cpu++) {
 
 		for (thread = 0; thread < threads->nr; thread++) {
@@ -845,26 +754,13 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
 								     group_fd, flags);
 			if (FD(evsel, cpu, thread) < 0) {
 				err = -errno;
-				goto try_fallback;
+				goto out_close;
 			}
 		}
 	}
 
 	return 0;
 
-try_fallback:
-	if (err != -EINVAL || cpu > 0 || thread > 0)
-		goto out_close;
-
-	if (!perf_missing_features.exclude_guest &&
-	    (evsel->attr.exclude_guest || evsel->attr.exclude_host)) {
-		perf_missing_features.exclude_guest = true;
-		goto fallback_missing_features;
-	} else if (!perf_missing_features.sample_id_all) {
-		perf_missing_features.sample_id_all = true;
-		goto retry_sample_id;
-	}
-
 out_close:
 	do {
 		while (--thread >= 0) {
@@ -1309,225 +1205,3 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
 
 	return 0;
 }
-
-static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...)
-{
-	va_list args;
-	int ret = 0;
-
-	if (!*first) {
-		ret += fprintf(fp, ",");
-	} else {
-		ret += fprintf(fp, ":");
-		*first = false;
-	}
-
-	va_start(args, fmt);
-	ret += vfprintf(fp, fmt, args);
-	va_end(args);
-	return ret;
-}
-
-static int __if_fprintf(FILE *fp, bool *first, const char *field, u64 value)
-{
-	if (value == 0)
-		return 0;
-
-	return comma_fprintf(fp, first, " %s: %" PRIu64, field, value);
-}
-
-#define if_print(field) printed += __if_fprintf(fp, &first, #field, evsel->attr.field)
-
-struct bit_names {
-	int bit;
-	const char *name;
-};
-
-static int bits__fprintf(FILE *fp, const char *field, u64 value,
-			 struct bit_names *bits, bool *first)
-{
-	int i = 0, printed = comma_fprintf(fp, first, " %s: ", field);
-	bool first_bit = true;
-
-	do {
-		if (value & bits[i].bit) {
-			printed += fprintf(fp, "%s%s", first_bit ? "" : "|", bits[i].name);
-			first_bit = false;
-		}
-	} while (bits[++i].name != NULL);
-
-	return printed;
-}
-
-static int sample_type__fprintf(FILE *fp, bool *first, u64 value)
-{
-#define bit_name(n) { PERF_SAMPLE_##n, #n }
-	struct bit_names bits[] = {
-		bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR),
-		bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU),
-		bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW),
-		bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
-		{ .name = NULL, }
-	};
-#undef bit_name
-	return bits__fprintf(fp, "sample_type", value, bits, first);
-}
-
-static int read_format__fprintf(FILE *fp, bool *first, u64 value)
-{
-#define bit_name(n) { PERF_FORMAT_##n, #n }
-	struct bit_names bits[] = {
-		bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING),
-		bit_name(ID), bit_name(GROUP),
-		{ .name = NULL, }
-	};
-#undef bit_name
-	return bits__fprintf(fp, "read_format", value, bits, first);
-}
-
-int perf_evsel__fprintf(struct perf_evsel *evsel,
-			struct perf_attr_details *details, FILE *fp)
-{
-	bool first = true;
-	int printed = 0;
-
-	if (details->event_group) {
-		struct perf_evsel *pos;
-
-		if (!perf_evsel__is_group_leader(evsel))
-			return 0;
-
-		if (evsel->nr_members > 1)
-			printed += fprintf(fp, "%s{", evsel->group_name ?: "");
-
-		printed += fprintf(fp, "%s", perf_evsel__name(evsel));
-		for_each_group_member(pos, evsel)
-			printed += fprintf(fp, ",%s", perf_evsel__name(pos));
-
-		if (evsel->nr_members > 1)
-			printed += fprintf(fp, "}");
-		goto out;
-	}
-
-	printed += fprintf(fp, "%s", perf_evsel__name(evsel));
-
-	if (details->verbose || details->freq) {
-		printed += comma_fprintf(fp, &first, " sample_freq=%" PRIu64,
-					 (u64)evsel->attr.sample_freq);
-	}
-
-	if (details->verbose) {
-		if_print(type);
-		if_print(config);
-		if_print(config1);
-		if_print(config2);
-		if_print(size);
-		printed += sample_type__fprintf(fp, &first, evsel->attr.sample_type);
-		if (evsel->attr.read_format)
-			printed += read_format__fprintf(fp, &first, evsel->attr.read_format);
-		if_print(disabled);
-		if_print(inherit);
-		if_print(pinned);
-		if_print(exclusive);
-		if_print(exclude_user);
-		if_print(exclude_kernel);
-		if_print(exclude_hv);
-		if_print(exclude_idle);
-		if_print(mmap);
-		if_print(comm);
-		if_print(freq);
-		if_print(inherit_stat);
-		if_print(enable_on_exec);
-		if_print(task);
-		if_print(watermark);
-		if_print(precise_ip);
-		if_print(mmap_data);
-		if_print(sample_id_all);
-		if_print(exclude_host);
-		if_print(exclude_guest);
-		if_print(__reserved_1);
-		if_print(wakeup_events);
-		if_print(bp_type);
-		if_print(branch_sample_type);
-	}
-out:
-	fputc('\n', fp);
-	return ++printed;
-}
-
-bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
-			  char *msg, size_t msgsize)
-{
-	if ((err == ENOENT || err == ENXIO) &&
-	    evsel->attr.type   == PERF_TYPE_HARDWARE &&
-	    evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES) {
-		/*
-		 * If it's cycles then fall back to hrtimer based
-		 * cpu-clock-tick sw counter, which is always available even if
-		 * no PMU support.
-		 *
-		 * PPC returns ENXIO until 2.6.37 (behavior changed with commit
-		 * b0a873e).
-		 */
-		scnprintf(msg, msgsize, "%s",
-"The cycles event is not supported, trying to fall back to cpu-clock-ticks");
-
-		evsel->attr.type   = PERF_TYPE_SOFTWARE;
-		evsel->attr.config = PERF_COUNT_SW_CPU_CLOCK;
-
-		free(evsel->name);
-		evsel->name = NULL;
-		return true;
-	}
-
-	return false;
-}
-
-int perf_evsel__open_strerror(struct perf_evsel *evsel,
-			      struct perf_target *target,
-			      int err, char *msg, size_t size)
-{
-	switch (err) {
-	case EPERM:
-	case EACCES:
-		return scnprintf(msg, size, "%s",
-		 "You may not have permission to collect %sstats.\n"
-		 "Consider tweaking /proc/sys/kernel/perf_event_paranoid:\n"
-		 " -1 - Not paranoid at all\n"
-		 "  0 - Disallow raw tracepoint access for unpriv\n"
-		 "  1 - Disallow cpu events for unpriv\n"
-		 "  2 - Disallow kernel profiling for unpriv",
-				 target->system_wide ? "system-wide " : "");
-	case ENOENT:
-		return scnprintf(msg, size, "The %s event is not supported.",
-				 perf_evsel__name(evsel));
-	case EMFILE:
-		return scnprintf(msg, size, "%s",
-			 "Too many events are opened.\n"
-			 "Try again after reducing the number of events.");
-	case ENODEV:
-		if (target->cpu_list)
-			return scnprintf(msg, size, "%s",
-	 "No such device - did you specify an out-of-range profile CPU?\n");
-		break;
-	case EOPNOTSUPP:
-		if (evsel->attr.precise_ip)
-			return scnprintf(msg, size, "%s",
-	"\'precise\' request may not be supported. Try removing 'p' modifier.");
-#if defined(__i386__) || defined(__x86_64__)
-		if (evsel->attr.type == PERF_TYPE_HARDWARE)
-			return scnprintf(msg, size, "%s",
-	"No hardware sampling interrupt available.\n"
-	"No APIC? If so then you can boot the kernel with the \"lapic\" boot parameter to force-enable it.");
-#endif
-		break;
-	default:
-		break;
-	}
-
-	return scnprintf(msg, size,
-	"The sys_perf_event_open() syscall returned with %d (%s) for event (%s).  \n"
-	"/bin/dmesg may provide additional information.\n"
-	"No CONFIG_PERF_EVENTS=y kernel support configured?\n",
-			 err, strerror(err), perf_evsel__name(evsel));
-}
diff --git a/trunk/tools/perf/util/evsel.h b/trunk/tools/perf/util/evsel.h
index 52021c3087df..3d2b8017438c 100644
--- a/trunk/tools/perf/util/evsel.h
+++ b/trunk/tools/perf/util/evsel.h
@@ -53,7 +53,6 @@ struct perf_evsel {
 	struct xyarray		*sample_id;
 	u64			*id;
 	struct perf_counts	*counts;
-	struct perf_counts	*prev_raw_counts;
 	int			idx;
 	u32			ids;
 	struct hists		hists;
@@ -74,13 +73,10 @@ struct perf_evsel {
 	bool 			needs_swap;
 	/* parse modifier helper */
 	int			exclude_GH;
-	int			nr_members;
 	struct perf_evsel	*leader;
 	char			*group_name;
 };
 
-#define hists_to_evsel(h) container_of(h, struct perf_evsel, hists)
-
 struct cpu_map;
 struct thread_map;
 struct perf_evlist;
@@ -114,30 +110,14 @@ extern const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX];
 int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
 					    char *bf, size_t size);
 const char *perf_evsel__name(struct perf_evsel *evsel);
-const char *perf_evsel__group_name(struct perf_evsel *evsel);
-int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size);
 
 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
 int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
 void perf_evsel__free_fd(struct perf_evsel *evsel);
 void perf_evsel__free_id(struct perf_evsel *evsel);
-void perf_evsel__free_counts(struct perf_evsel *evsel);
 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
 
-void __perf_evsel__set_sample_bit(struct perf_evsel *evsel,
-				  enum perf_event_sample_format bit);
-void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel,
-				    enum perf_event_sample_format bit);
-
-#define perf_evsel__set_sample_bit(evsel, bit) \
-	__perf_evsel__set_sample_bit(evsel, PERF_SAMPLE_##bit)
-
-#define perf_evsel__reset_sample_bit(evsel, bit) \
-	__perf_evsel__reset_sample_bit(evsel, PERF_SAMPLE_##bit)
-
-void perf_evsel__set_sample_id(struct perf_evsel *evsel);
-
 int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
 			   const char *filter);
 
@@ -246,34 +226,8 @@ static inline struct perf_evsel *perf_evsel__next(struct perf_evsel *evsel)
 	return list_entry(evsel->node.next, struct perf_evsel, node);
 }
 
-static inline bool perf_evsel__is_group_leader(const struct perf_evsel *evsel)
-{
-	return evsel->leader == evsel;
-}
-
-struct perf_attr_details {
-	bool freq;
-	bool verbose;
-	bool event_group;
-};
-
-int perf_evsel__fprintf(struct perf_evsel *evsel,
-			struct perf_attr_details *details, FILE *fp);
-
-bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
-			  char *msg, size_t msgsize);
-int perf_evsel__open_strerror(struct perf_evsel *evsel,
-			      struct perf_target *target,
-			      int err, char *msg, size_t size);
-
-static inline int perf_evsel__group_idx(struct perf_evsel *evsel)
+static inline bool perf_evsel__is_group_member(const struct perf_evsel *evsel)
 {
-	return evsel->idx - evsel->leader->idx;
+	return evsel->leader != NULL;
 }
-
-#define for_each_group_member(_evsel, _leader) 					\
-for ((_evsel) = list_entry((_leader)->node.next, struct perf_evsel, node); 	\
-     (_evsel) && (_evsel)->leader == (_leader);					\
-     (_evsel) = list_entry((_evsel)->node.next, struct perf_evsel, node))
-
 #endif /* __PERF_EVSEL_H */
diff --git a/trunk/tools/perf/util/header.c b/trunk/tools/perf/util/header.c
index f4bfd79ef6a7..b7da4634a047 100644
--- a/trunk/tools/perf/util/header.c
+++ b/trunk/tools/perf/util/header.c
@@ -148,7 +148,7 @@ static char *do_read_string(int fd, struct perf_header *ph)
 	u32 len;
 	char *buf;
 
-	sz = readn(fd, &len, sizeof(len));
+	sz = read(fd, &len, sizeof(len));
 	if (sz < (ssize_t)sizeof(len))
 		return NULL;
 
@@ -159,7 +159,7 @@ static char *do_read_string(int fd, struct perf_header *ph)
 	if (!buf)
 		return NULL;
 
-	ret = readn(fd, buf, len);
+	ret = read(fd, buf, len);
 	if (ret == (ssize_t)len) {
 		/*
 		 * strings are padded by zeroes
@@ -287,12 +287,12 @@ static int dsos__write_buildid_table(struct perf_header *header, int fd)
 	struct perf_session *session = container_of(header,
 			struct perf_session, header);
 	struct rb_node *nd;
-	int err = machine__write_buildid_table(&session->machines.host, fd);
+	int err = machine__write_buildid_table(&session->host_machine, fd);
 
 	if (err)
 		return err;
 
-	for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
+	for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) {
 		struct machine *pos = rb_entry(nd, struct machine, rb_node);
 		err = machine__write_buildid_table(pos, fd);
 		if (err)
@@ -313,8 +313,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
 	if (is_kallsyms) {
 		if (symbol_conf.kptr_restrict) {
 			pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n");
-			err = 0;
-			goto out_free;
+			return 0;
 		}
 		realname = (char *) name;
 	} else
@@ -449,9 +448,9 @@ static int perf_session__cache_build_ids(struct perf_session *session)
 	if (mkdir(debugdir, 0755) != 0 && errno != EEXIST)
 		return -1;
 
-	ret = machine__cache_build_ids(&session->machines.host, debugdir);
+	ret = machine__cache_build_ids(&session->host_machine, debugdir);
 
-	for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
+	for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) {
 		struct machine *pos = rb_entry(nd, struct machine, rb_node);
 		ret |= machine__cache_build_ids(pos, debugdir);
 	}
@@ -468,9 +467,9 @@ static bool machine__read_build_ids(struct machine *machine, bool with_hits)
 static bool perf_session__read_build_ids(struct perf_session *session, bool with_hits)
 {
 	struct rb_node *nd;
-	bool ret = machine__read_build_ids(&session->machines.host, with_hits);
+	bool ret = machine__read_build_ids(&session->host_machine, with_hits);
 
-	for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
+	for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) {
 		struct machine *pos = rb_entry(nd, struct machine, rb_node);
 		ret |= machine__read_build_ids(pos, with_hits);
 	}
@@ -955,7 +954,6 @@ static int write_topo_node(int fd, int node)
 	}
 
 	fclose(fp);
-	fp = NULL;
 
 	ret = do_write(fd, &mem_total, sizeof(u64));
 	if (ret)
@@ -982,8 +980,7 @@ static int write_topo_node(int fd, int node)
 	ret = do_write_string(fd, buf);
 done:
 	free(buf);
-	if (fp)
-		fclose(fp);
+	fclose(fp);
 	return ret;
 }
 
@@ -1054,25 +1051,16 @@ static int write_pmu_mappings(int fd, struct perf_header *h __maybe_unused,
 	struct perf_pmu *pmu = NULL;
 	off_t offset = lseek(fd, 0, SEEK_CUR);
 	__u32 pmu_num = 0;
-	int ret;
 
 	/* write real pmu_num later */
-	ret = do_write(fd, &pmu_num, sizeof(pmu_num));
-	if (ret < 0)
-		return ret;
+	do_write(fd, &pmu_num, sizeof(pmu_num));
 
 	while ((pmu = perf_pmu__scan(pmu))) {
 		if (!pmu->name)
 			continue;
 		pmu_num++;
-
-		ret = do_write(fd, &pmu->type, sizeof(pmu->type));
-		if (ret < 0)
-			return ret;
-
-		ret = do_write_string(fd, pmu->name);
-		if (ret < 0)
-			return ret;
+		do_write(fd, &pmu->type, sizeof(pmu->type));
+		do_write_string(fd, pmu->name);
 	}
 
 	if (pwrite(fd, &pmu_num, sizeof(pmu_num), offset) != sizeof(pmu_num)) {
@@ -1084,52 +1072,6 @@ static int write_pmu_mappings(int fd, struct perf_header *h __maybe_unused,
 	return 0;
 }
 
-/*
- * File format:
- *
- * struct group_descs {
- *	u32	nr_groups;
- *	struct group_desc {
- *		char	name[];
- *		u32	leader_idx;
- *		u32	nr_members;
- *	}[nr_groups];
- * };
- */
-static int write_group_desc(int fd, struct perf_header *h __maybe_unused,
-			    struct perf_evlist *evlist)
-{
-	u32 nr_groups = evlist->nr_groups;
-	struct perf_evsel *evsel;
-	int ret;
-
-	ret = do_write(fd, &nr_groups, sizeof(nr_groups));
-	if (ret < 0)
-		return ret;
-
-	list_for_each_entry(evsel, &evlist->entries, node) {
-		if (perf_evsel__is_group_leader(evsel) &&
-		    evsel->nr_members > 1) {
-			const char *name = evsel->group_name ?: "{anon_group}";
-			u32 leader_idx = evsel->idx;
-			u32 nr_members = evsel->nr_members;
-
-			ret = do_write_string(fd, name);
-			if (ret < 0)
-				return ret;
-
-			ret = do_write(fd, &leader_idx, sizeof(leader_idx));
-			if (ret < 0)
-				return ret;
-
-			ret = do_write(fd, &nr_members, sizeof(nr_members));
-			if (ret < 0)
-				return ret;
-		}
-	}
-	return 0;
-}
-
 /*
  * default get_cpuid(): nothing gets recorded
  * actual implementation must be in arch/$(ARCH)/util/header.c
@@ -1267,14 +1209,14 @@ read_event_desc(struct perf_header *ph, int fd)
 	size_t msz;
 
 	/* number of events */
-	ret = readn(fd, &nre, sizeof(nre));
+	ret = read(fd, &nre, sizeof(nre));
 	if (ret != (ssize_t)sizeof(nre))
 		goto error;
 
 	if (ph->needs_swap)
 		nre = bswap_32(nre);
 
-	ret = readn(fd, &sz, sizeof(sz));
+	ret = read(fd, &sz, sizeof(sz));
 	if (ret != (ssize_t)sizeof(sz))
 		goto error;
 
@@ -1302,7 +1244,7 @@ read_event_desc(struct perf_header *ph, int fd)
 		 * must read entire on-file attr struct to
 		 * sync up with layout.
 		 */
-		ret = readn(fd, buf, sz);
+		ret = read(fd, buf, sz);
 		if (ret != (ssize_t)sz)
 			goto error;
 
@@ -1311,7 +1253,7 @@ read_event_desc(struct perf_header *ph, int fd)
 
 		memcpy(&evsel->attr, buf, msz);
 
-		ret = readn(fd, &nr, sizeof(nr));
+		ret = read(fd, &nr, sizeof(nr));
 		if (ret != (ssize_t)sizeof(nr))
 			goto error;
 
@@ -1332,7 +1274,7 @@ read_event_desc(struct perf_header *ph, int fd)
 		evsel->id = id;
 
 		for (j = 0 ; j < nr; j++) {
-			ret = readn(fd, id, sizeof(*id));
+			ret = read(fd, id, sizeof(*id));
 			if (ret != (ssize_t)sizeof(*id))
 				goto error;
 			if (ph->needs_swap)
@@ -1493,31 +1435,6 @@ static void print_pmu_mappings(struct perf_header *ph, int fd __maybe_unused,
 	fprintf(fp, "# pmu mappings: unable to read\n");
 }
 
-static void print_group_desc(struct perf_header *ph, int fd __maybe_unused,
-			     FILE *fp)
-{
-	struct perf_session *session;
-	struct perf_evsel *evsel;
-	u32 nr = 0;
-
-	session = container_of(ph, struct perf_session, header);
-
-	list_for_each_entry(evsel, &session->evlist->entries, node) {
-		if (perf_evsel__is_group_leader(evsel) &&
-		    evsel->nr_members > 1) {
-			fprintf(fp, "# group: %s{%s", evsel->group_name ?: "",
-				perf_evsel__name(evsel));
-
-			nr = evsel->nr_members - 1;
-		} else if (nr) {
-			fprintf(fp, ",%s", perf_evsel__name(evsel));
-
-			if (--nr == 0)
-				fprintf(fp, "}\n");
-		}
-	}
-}
-
 static int __event_process_build_id(struct build_id_event *bev,
 				    char *filename,
 				    struct perf_session *session)
@@ -1589,14 +1506,14 @@ static int perf_header__read_build_ids_abi_quirk(struct perf_header *header,
 	while (offset < limit) {
 		ssize_t len;
 
-		if (readn(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev))
+		if (read(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev))
 			return -1;
 
 		if (header->needs_swap)
 			perf_event_header__bswap(&old_bev.header);
 
 		len = old_bev.header.size - sizeof(old_bev);
-		if (readn(input, filename, len) != len)
+		if (read(input, filename, len) != len)
 			return -1;
 
 		bev.header = old_bev.header;
@@ -1631,14 +1548,14 @@ static int perf_header__read_build_ids(struct perf_header *header,
 	while (offset < limit) {
 		ssize_t len;
 
-		if (readn(input, &bev, sizeof(bev)) != sizeof(bev))
+		if (read(input, &bev, sizeof(bev)) != sizeof(bev))
 			goto out;
 
 		if (header->needs_swap)
 			perf_event_header__bswap(&bev.header);
 
 		len = bev.header.size - sizeof(bev);
-		if (readn(input, filename, len) != len)
+		if (read(input, filename, len) != len)
 			goto out;
 		/*
 		 * The a1645ce1 changeset:
@@ -1724,7 +1641,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused,
 	size_t ret;
 	u32 nr;
 
-	ret = readn(fd, &nr, sizeof(nr));
+	ret = read(fd, &nr, sizeof(nr));
 	if (ret != sizeof(nr))
 		return -1;
 
@@ -1733,7 +1650,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused,
 
 	ph->env.nr_cpus_online = nr;
 
-	ret = readn(fd, &nr, sizeof(nr));
+	ret = read(fd, &nr, sizeof(nr));
 	if (ret != sizeof(nr))
 		return -1;
 
@@ -1767,7 +1684,7 @@ static int process_total_mem(struct perf_file_section *section __maybe_unused,
 	uint64_t mem;
 	size_t ret;
 
-	ret = readn(fd, &mem, sizeof(mem));
+	ret = read(fd, &mem, sizeof(mem));
 	if (ret != sizeof(mem))
 		return -1;
 
@@ -1839,7 +1756,7 @@ static int process_cmdline(struct perf_file_section *section __maybe_unused,
 	u32 nr, i;
 	struct strbuf sb;
 
-	ret = readn(fd, &nr, sizeof(nr));
+	ret = read(fd, &nr, sizeof(nr));
 	if (ret != sizeof(nr))
 		return -1;
 
@@ -1875,7 +1792,7 @@ static int process_cpu_topology(struct perf_file_section *section __maybe_unused
 	char *str;
 	struct strbuf sb;
 
-	ret = readn(fd, &nr, sizeof(nr));
+	ret = read(fd, &nr, sizeof(nr));
 	if (ret != sizeof(nr))
 		return -1;
 
@@ -1896,7 +1813,7 @@ static int process_cpu_topology(struct perf_file_section *section __maybe_unused
 	}
 	ph->env.sibling_cores = strbuf_detach(&sb, NULL);
 
-	ret = readn(fd, &nr, sizeof(nr));
+	ret = read(fd, &nr, sizeof(nr));
 	if (ret != sizeof(nr))
 		return -1;
 
@@ -1933,7 +1850,7 @@ static int process_numa_topology(struct perf_file_section *section __maybe_unuse
 	struct strbuf sb;
 
 	/* nr nodes */
-	ret = readn(fd, &nr, sizeof(nr));
+	ret = read(fd, &nr, sizeof(nr));
 	if (ret != sizeof(nr))
 		goto error;
 
@@ -1945,15 +1862,15 @@ static int process_numa_topology(struct perf_file_section *section __maybe_unuse
 
 	for (i = 0; i < nr; i++) {
 		/* node number */
-		ret = readn(fd, &node, sizeof(node));
+		ret = read(fd, &node, sizeof(node));
 		if (ret != sizeof(node))
 			goto error;
 
-		ret = readn(fd, &mem_total, sizeof(u64));
+		ret = read(fd, &mem_total, sizeof(u64));
 		if (ret != sizeof(u64))
 			goto error;
 
-		ret = readn(fd, &mem_free, sizeof(u64));
+		ret = read(fd, &mem_free, sizeof(u64));
 		if (ret != sizeof(u64))
 			goto error;
 
@@ -1992,7 +1909,7 @@ static int process_pmu_mappings(struct perf_file_section *section __maybe_unused
 	u32 type;
 	struct strbuf sb;
 
-	ret = readn(fd, &pmu_num, sizeof(pmu_num));
+	ret = read(fd, &pmu_num, sizeof(pmu_num));
 	if (ret != sizeof(pmu_num))
 		return -1;
 
@@ -2008,7 +1925,7 @@ static int process_pmu_mappings(struct perf_file_section *section __maybe_unused
 	strbuf_init(&sb, 128);
 
 	while (pmu_num) {
-		if (readn(fd, &type, sizeof(type)) != sizeof(type))
+		if (read(fd, &type, sizeof(type)) != sizeof(type))
 			goto error;
 		if (ph->needs_swap)
 			type = bswap_32(type);
@@ -2032,98 +1949,6 @@ static int process_pmu_mappings(struct perf_file_section *section __maybe_unused
 	return -1;
 }
 
-static int process_group_desc(struct perf_file_section *section __maybe_unused,
-			      struct perf_header *ph, int fd,
-			      void *data __maybe_unused)
-{
-	size_t ret = -1;
-	u32 i, nr, nr_groups;
-	struct perf_session *session;
-	struct perf_evsel *evsel, *leader = NULL;
-	struct group_desc {
-		char *name;
-		u32 leader_idx;
-		u32 nr_members;
-	} *desc;
-
-	if (readn(fd, &nr_groups, sizeof(nr_groups)) != sizeof(nr_groups))
-		return -1;
-
-	if (ph->needs_swap)
-		nr_groups = bswap_32(nr_groups);
-
-	ph->env.nr_groups = nr_groups;
-	if (!nr_groups) {
-		pr_debug("group desc not available\n");
-		return 0;
-	}
-
-	desc = calloc(nr_groups, sizeof(*desc));
-	if (!desc)
-		return -1;
-
-	for (i = 0; i < nr_groups; i++) {
-		desc[i].name = do_read_string(fd, ph);
-		if (!desc[i].name)
-			goto out_free;
-
-		if (readn(fd, &desc[i].leader_idx, sizeof(u32)) != sizeof(u32))
-			goto out_free;
-
-		if (readn(fd, &desc[i].nr_members, sizeof(u32)) != sizeof(u32))
-			goto out_free;
-
-		if (ph->needs_swap) {
-			desc[i].leader_idx = bswap_32(desc[i].leader_idx);
-			desc[i].nr_members = bswap_32(desc[i].nr_members);
-		}
-	}
-
-	/*
-	 * Rebuild group relationship based on the group_desc
-	 */
-	session = container_of(ph, struct perf_session, header);
-	session->evlist->nr_groups = nr_groups;
-
-	i = nr = 0;
-	list_for_each_entry(evsel, &session->evlist->entries, node) {
-		if (evsel->idx == (int) desc[i].leader_idx) {
-			evsel->leader = evsel;
-			/* {anon_group} is a dummy name */
-			if (strcmp(desc[i].name, "{anon_group}"))
-				evsel->group_name = desc[i].name;
-			evsel->nr_members = desc[i].nr_members;
-
-			if (i >= nr_groups || nr > 0) {
-				pr_debug("invalid group desc\n");
-				goto out_free;
-			}
-
-			leader = evsel;
-			nr = evsel->nr_members - 1;
-			i++;
-		} else if (nr) {
-			/* This is a group member */
-			evsel->leader = leader;
-
-			nr--;
-		}
-	}
-
-	if (i != nr_groups || nr != 0) {
-		pr_debug("invalid group desc\n");
-		goto out_free;
-	}
-
-	ret = 0;
-out_free:
-	while ((int) --i >= 0)
-		free(desc[i].name);
-	free(desc);
-
-	return ret;
-}
-
 struct feature_ops {
 	int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist);
 	void (*print)(struct perf_header *h, int fd, FILE *fp);
@@ -2163,7 +1988,6 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
 	FEAT_OPF(HEADER_NUMA_TOPOLOGY,	numa_topology),
 	FEAT_OPA(HEADER_BRANCH_STACK,	branch_stack),
 	FEAT_OPP(HEADER_PMU_MAPPINGS,	pmu_mappings),
-	FEAT_OPP(HEADER_GROUP_DESC,	group_desc),
 };
 
 struct header_print_data {
@@ -2253,7 +2077,7 @@ static int perf_header__adds_write(struct perf_header *header,
 	if (!nr_sections)
 		return 0;
 
-	feat_sec = p = calloc(nr_sections, sizeof(*feat_sec));
+	feat_sec = p = calloc(sizeof(*feat_sec), nr_sections);
 	if (feat_sec == NULL)
 		return -ENOMEM;
 
@@ -2425,7 +2249,7 @@ int perf_header__process_sections(struct perf_header *header, int fd,
 	if (!nr_sections)
 		return 0;
 
-	feat_sec = sec = calloc(nr_sections, sizeof(*feat_sec));
+	feat_sec = sec = calloc(sizeof(*feat_sec), nr_sections);
 	if (!feat_sec)
 		return -1;
 
@@ -3088,22 +2912,16 @@ int perf_event__process_tracing_data(union perf_event *event,
 				 session->repipe);
 	padding = PERF_ALIGN(size_read, sizeof(u64)) - size_read;
 
-	if (readn(session->fd, buf, padding) < 0) {
-		pr_err("%s: reading input file", __func__);
-		return -1;
-	}
+	if (read(session->fd, buf, padding) < 0)
+		die("reading input file");
 	if (session->repipe) {
 		int retw = write(STDOUT_FILENO, buf, padding);
-		if (retw <= 0 || retw != padding) {
-			pr_err("%s: repiping tracing data padding", __func__);
-			return -1;
-		}
+		if (retw <= 0 || retw != padding)
+			die("repiping tracing data padding");
 	}
 
-	if (size_read + padding != size) {
-		pr_err("%s: tracing data size mismatch", __func__);
-		return -1;
-	}
+	if (size_read + padding != size)
+		die("tracing data size mismatch");
 
 	perf_evlist__prepare_tracepoint_events(session->evlist,
 					       session->pevent);
diff --git a/trunk/tools/perf/util/header.h b/trunk/tools/perf/util/header.h
index c9fc55cada6d..20f0344accb1 100644
--- a/trunk/tools/perf/util/header.h
+++ b/trunk/tools/perf/util/header.h
@@ -29,7 +29,6 @@ enum {
 	HEADER_NUMA_TOPOLOGY,
 	HEADER_BRANCH_STACK,
 	HEADER_PMU_MAPPINGS,
-	HEADER_GROUP_DESC,
 	HEADER_LAST_FEATURE,
 	HEADER_FEAT_BITS	= 256,
 };
@@ -80,7 +79,6 @@ struct perf_session_env {
 	char			*numa_nodes;
 	int			nr_pmu_mappings;
 	char			*pmu_mappings;
-	int			nr_groups;
 };
 
 struct perf_header {
diff --git a/trunk/tools/perf/util/hist.c b/trunk/tools/perf/util/hist.c
index f855941bebea..cb17e2a8c6ed 100644
--- a/trunk/tools/perf/util/hist.c
+++ b/trunk/tools/perf/util/hist.c
@@ -4,7 +4,6 @@
 #include "hist.h"
 #include "session.h"
 #include "sort.h"
-#include "evsel.h"
 #include <math.h>
 
 static bool hists__filter_entry_by_dso(struct hists *hists,
@@ -83,9 +82,6 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
 		hists__new_col_len(hists, HISTC_DSO, len);
 	}
 
-	if (h->parent)
-		hists__new_col_len(hists, HISTC_PARENT, h->parent->namelen);
-
 	if (h->branch_info) {
 		int symlen;
 		/*
@@ -246,14 +242,6 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template)
 
 		if (he->ms.map)
 			he->ms.map->referenced = true;
-
-		if (he->branch_info) {
-			if (he->branch_info->from.map)
-				he->branch_info->from.map->referenced = true;
-			if (he->branch_info->to.map)
-				he->branch_info->to.map->referenced = true;
-		}
-
 		if (symbol_conf.use_callchain)
 			callchain_init(he->callchain);
 
@@ -263,7 +251,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template)
 	return he;
 }
 
-void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h)
+static void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h)
 {
 	if (!h->filtered) {
 		hists__calc_col_len(hists, h);
@@ -297,13 +285,7 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
 		parent = *p;
 		he = rb_entry(parent, struct hist_entry, rb_node_in);
 
-		/*
-		 * Make sure that it receives arguments in a same order as
-		 * hist_entry__collapse() so that we can use an appropriate
-		 * function when searching an entry regardless which sort
-		 * keys were used.
-		 */
-		cmp = hist_entry__cmp(he, entry);
+		cmp = hist_entry__cmp(entry, he);
 
 		if (!cmp) {
 			he_stat__add_period(&he->stat, period);
@@ -541,62 +523,6 @@ void hists__collapse_resort_threaded(struct hists *hists)
  * reverse the map, sort on period.
  */
 
-static int period_cmp(u64 period_a, u64 period_b)
-{
-	if (period_a > period_b)
-		return 1;
-	if (period_a < period_b)
-		return -1;
-	return 0;
-}
-
-static int hist_entry__sort_on_period(struct hist_entry *a,
-				      struct hist_entry *b)
-{
-	int ret;
-	int i, nr_members;
-	struct perf_evsel *evsel;
-	struct hist_entry *pair;
-	u64 *periods_a, *periods_b;
-
-	ret = period_cmp(a->stat.period, b->stat.period);
-	if (ret || !symbol_conf.event_group)
-		return ret;
-
-	evsel = hists_to_evsel(a->hists);
-	nr_members = evsel->nr_members;
-	if (nr_members <= 1)
-		return ret;
-
-	periods_a = zalloc(sizeof(periods_a) * nr_members);
-	periods_b = zalloc(sizeof(periods_b) * nr_members);
-
-	if (!periods_a || !periods_b)
-		goto out;
-
-	list_for_each_entry(pair, &a->pairs.head, pairs.node) {
-		evsel = hists_to_evsel(pair->hists);
-		periods_a[perf_evsel__group_idx(evsel)] = pair->stat.period;
-	}
-
-	list_for_each_entry(pair, &b->pairs.head, pairs.node) {
-		evsel = hists_to_evsel(pair->hists);
-		periods_b[perf_evsel__group_idx(evsel)] = pair->stat.period;
-	}
-
-	for (i = 1; i < nr_members; i++) {
-		ret = period_cmp(periods_a[i], periods_b[i]);
-		if (ret)
-			break;
-	}
-
-out:
-	free(periods_a);
-	free(periods_b);
-
-	return ret;
-}
-
 static void __hists__insert_output_entry(struct rb_root *entries,
 					 struct hist_entry *he,
 					 u64 min_callchain_hits)
@@ -613,7 +539,7 @@ static void __hists__insert_output_entry(struct rb_root *entries,
 		parent = *p;
 		iter = rb_entry(parent, struct hist_entry, rb_node);
 
-		if (hist_entry__sort_on_period(he, iter) > 0)
+		if (he->stat.period > iter->stat.period)
 			p = &(*p)->rb_left;
 		else
 			p = &(*p)->rb_right;
@@ -785,38 +711,25 @@ int hist_entry__annotate(struct hist_entry *he, size_t privsize)
 	return symbol__annotate(he->ms.sym, he->ms.map, privsize);
 }
 
-void events_stats__inc(struct events_stats *stats, u32 type)
-{
-	++stats->nr_events[0];
-	++stats->nr_events[type];
-}
-
 void hists__inc_nr_events(struct hists *hists, u32 type)
 {
-	events_stats__inc(&hists->stats, type);
+	++hists->stats.nr_events[0];
+	++hists->stats.nr_events[type];
 }
 
 static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
 						 struct hist_entry *pair)
 {
-	struct rb_root *root;
-	struct rb_node **p;
+	struct rb_node **p = &hists->entries.rb_node;
 	struct rb_node *parent = NULL;
 	struct hist_entry *he;
 	int cmp;
 
-	if (sort__need_collapse)
-		root = &hists->entries_collapsed;
-	else
-		root = hists->entries_in;
-
-	p = &root->rb_node;
-
 	while (*p != NULL) {
 		parent = *p;
-		he = rb_entry(parent, struct hist_entry, rb_node_in);
+		he = rb_entry(parent, struct hist_entry, rb_node);
 
-		cmp = hist_entry__collapse(he, pair);
+		cmp = hist_entry__cmp(pair, he);
 
 		if (!cmp)
 			goto out;
@@ -831,8 +744,8 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
 	if (he) {
 		memset(&he->stat, 0, sizeof(he->stat));
 		he->hists = hists;
-		rb_link_node(&he->rb_node_in, parent, p);
-		rb_insert_color(&he->rb_node_in, root);
+		rb_link_node(&he->rb_node, parent, p);
+		rb_insert_color(&he->rb_node, &hists->entries);
 		hists__inc_nr_entries(hists, he);
 	}
 out:
@@ -842,16 +755,11 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
 static struct hist_entry *hists__find_entry(struct hists *hists,
 					    struct hist_entry *he)
 {
-	struct rb_node *n;
-
-	if (sort__need_collapse)
-		n = hists->entries_collapsed.rb_node;
-	else
-		n = hists->entries_in->rb_node;
+	struct rb_node *n = hists->entries.rb_node;
 
 	while (n) {
-		struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node_in);
-		int64_t cmp = hist_entry__collapse(iter, he);
+		struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node);
+		int64_t cmp = hist_entry__cmp(he, iter);
 
 		if (cmp < 0)
 			n = n->rb_left;
@@ -869,21 +777,15 @@ static struct hist_entry *hists__find_entry(struct hists *hists,
  */
 void hists__match(struct hists *leader, struct hists *other)
 {
-	struct rb_root *root;
 	struct rb_node *nd;
 	struct hist_entry *pos, *pair;
 
-	if (sort__need_collapse)
-		root = &leader->entries_collapsed;
-	else
-		root = leader->entries_in;
-
-	for (nd = rb_first(root); nd; nd = rb_next(nd)) {
-		pos  = rb_entry(nd, struct hist_entry, rb_node_in);
+	for (nd = rb_first(&leader->entries); nd; nd = rb_next(nd)) {
+		pos  = rb_entry(nd, struct hist_entry, rb_node);
 		pair = hists__find_entry(other, pos);
 
 		if (pair)
-			hist_entry__add_pair(pair, pos);
+			hist__entry_add_pair(pos, pair);
 	}
 }
 
@@ -894,23 +796,17 @@ void hists__match(struct hists *leader, struct hists *other)
  */
 int hists__link(struct hists *leader, struct hists *other)
 {
-	struct rb_root *root;
 	struct rb_node *nd;
 	struct hist_entry *pos, *pair;
 
-	if (sort__need_collapse)
-		root = &other->entries_collapsed;
-	else
-		root = other->entries_in;
-
-	for (nd = rb_first(root); nd; nd = rb_next(nd)) {
-		pos = rb_entry(nd, struct hist_entry, rb_node_in);
+	for (nd = rb_first(&other->entries); nd; nd = rb_next(nd)) {
+		pos = rb_entry(nd, struct hist_entry, rb_node);
 
 		if (!hist_entry__has_pairs(pos)) {
 			pair = hists__add_dummy_entry(leader, pos);
 			if (pair == NULL)
 				return -1;
-			hist_entry__add_pair(pos, pair);
+			hist__entry_add_pair(pair, pos);
 		}
 	}
 
diff --git a/trunk/tools/perf/util/hist.h b/trunk/tools/perf/util/hist.h
index 38624686ee9a..8b091a51e4a2 100644
--- a/trunk/tools/perf/util/hist.h
+++ b/trunk/tools/perf/util/hist.h
@@ -96,10 +96,8 @@ void hists__decay_entries_threaded(struct hists *hists, bool zap_user,
 				   bool zap_kernel);
 void hists__output_recalc_col_len(struct hists *hists, int max_rows);
 
-void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h);
 void hists__inc_nr_events(struct hists *self, u32 type);
-void events_stats__inc(struct events_stats *stats, u32 type);
-size_t events_stats__fprintf(struct events_stats *stats, FILE *fp);
+size_t hists__fprintf_nr_events(struct hists *self, FILE *fp);
 
 size_t hists__fprintf(struct hists *self, bool show_header, int max_rows,
 		      int max_cols, FILE *fp);
@@ -128,19 +126,13 @@ struct perf_hpp {
 };
 
 struct perf_hpp_fmt {
+	bool cond;
 	int (*header)(struct perf_hpp *hpp);
 	int (*width)(struct perf_hpp *hpp);
 	int (*color)(struct perf_hpp *hpp, struct hist_entry *he);
 	int (*entry)(struct perf_hpp *hpp, struct hist_entry *he);
-
-	struct list_head list;
 };
 
-extern struct list_head perf_hpp__list;
-
-#define perf_hpp__for_each_format(format) \
-	list_for_each_entry(format, &perf_hpp__list, list)
-
 extern struct perf_hpp_fmt perf_hpp__format[];
 
 enum {
@@ -156,14 +148,14 @@ enum {
 	PERF_HPP__DELTA,
 	PERF_HPP__RATIO,
 	PERF_HPP__WEIGHTED_DIFF,
+	PERF_HPP__DISPL,
 	PERF_HPP__FORMULA,
 
 	PERF_HPP__MAX_INDEX
 };
 
 void perf_hpp__init(void);
-void perf_hpp__column_register(struct perf_hpp_fmt *format);
-void perf_hpp__column_enable(unsigned col);
+void perf_hpp__column_enable(unsigned col, bool enable);
 int hist_entry__period_snprintf(struct perf_hpp *hpp, struct hist_entry *he,
 				bool color);
 
@@ -227,10 +219,8 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist __maybe_unused,
 
 unsigned int hists__sort_list_width(struct hists *self);
 
-double perf_diff__compute_delta(struct hist_entry *he, struct hist_entry *pair);
-double perf_diff__compute_ratio(struct hist_entry *he, struct hist_entry *pair);
-s64 perf_diff__compute_wdiff(struct hist_entry *he, struct hist_entry *pair);
-int perf_diff__formula(struct hist_entry *he, struct hist_entry *pair,
-		       char *buf, size_t size);
-double perf_diff__period_percent(struct hist_entry *he, u64 period);
+double perf_diff__compute_delta(struct hist_entry *he);
+double perf_diff__compute_ratio(struct hist_entry *he);
+s64 perf_diff__compute_wdiff(struct hist_entry *he);
+int perf_diff__formula(char *buf, size_t size, struct hist_entry *he);
 #endif	/* __PERF_HIST_H */
diff --git a/trunk/tools/perf/util/include/linux/bitops.h b/trunk/tools/perf/util/include/linux/bitops.h
index 45cf10a562bd..a55d8cf083c9 100644
--- a/trunk/tools/perf/util/include/linux/bitops.h
+++ b/trunk/tools/perf/util/include/linux/bitops.h
@@ -14,7 +14,6 @@
 #define BITS_TO_LONGS(nr)       DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
 #define BITS_TO_U64(nr)         DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64))
 #define BITS_TO_U32(nr)         DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32))
-#define BITS_TO_BYTES(nr)       DIV_ROUND_UP(nr, BITS_PER_BYTE)
 
 #define for_each_set_bit(bit, addr, size) \
 	for ((bit) = find_first_bit((addr), (size));		\
diff --git a/trunk/tools/perf/util/intlist.c b/trunk/tools/perf/util/intlist.c
index 11a8d86f7fea..9d0740024ba8 100644
--- a/trunk/tools/perf/util/intlist.c
+++ b/trunk/tools/perf/util/intlist.c
@@ -59,40 +59,16 @@ void intlist__remove(struct intlist *ilist, struct int_node *node)
 
 struct int_node *intlist__find(struct intlist *ilist, int i)
 {
-	struct int_node *node;
-	struct rb_node *rb_node;
-
-	if (ilist == NULL)
-		return NULL;
+	struct int_node *node = NULL;
+	struct rb_node *rb_node = rblist__find(&ilist->rblist, (void *)((long)i));
 
-	node = NULL;
-	rb_node = rblist__find(&ilist->rblist, (void *)((long)i));
 	if (rb_node)
 		node = container_of(rb_node, struct int_node, rb_node);
 
 	return node;
 }
 
-static int intlist__parse_list(struct intlist *ilist, const char *s)
-{
-	char *sep;
-	int err;
-
-	do {
-		long value = strtol(s, &sep, 10);
-		err = -EINVAL;
-		if (*sep != ',' && *sep != '\0')
-			break;
-		err = intlist__add(ilist, value);
-		if (err)
-			break;
-		s = sep + 1;
-	} while (*sep != '\0');
-
-	return err;
-}
-
-struct intlist *intlist__new(const char *slist)
+struct intlist *intlist__new(void)
 {
 	struct intlist *ilist = malloc(sizeof(*ilist));
 
@@ -101,15 +77,9 @@ struct intlist *intlist__new(const char *slist)
 		ilist->rblist.node_cmp    = intlist__node_cmp;
 		ilist->rblist.node_new    = intlist__node_new;
 		ilist->rblist.node_delete = intlist__node_delete;
-
-		if (slist && intlist__parse_list(ilist, slist))
-			goto out_delete;
 	}
 
 	return ilist;
-out_delete:
-	intlist__delete(ilist);
-	return NULL;
 }
 
 void intlist__delete(struct intlist *ilist)
diff --git a/trunk/tools/perf/util/intlist.h b/trunk/tools/perf/util/intlist.h
index 62351dad848f..6d63ab90db50 100644
--- a/trunk/tools/perf/util/intlist.h
+++ b/trunk/tools/perf/util/intlist.h
@@ -15,7 +15,7 @@ struct intlist {
 	struct rblist rblist;
 };
 
-struct intlist *intlist__new(const char *slist);
+struct intlist *intlist__new(void);
 void intlist__delete(struct intlist *ilist);
 
 void intlist__remove(struct intlist *ilist, struct int_node *in);
diff --git a/trunk/tools/perf/util/machine.c b/trunk/tools/perf/util/machine.c
index efdb38e65a92..1f09d0581e6b 100644
--- a/trunk/tools/perf/util/machine.c
+++ b/trunk/tools/perf/util/machine.c
@@ -1,15 +1,10 @@
-#include "callchain.h"
 #include "debug.h"
 #include "event.h"
-#include "evsel.h"
-#include "hist.h"
 #include "machine.h"
 #include "map.h"
-#include "sort.h"
 #include "strlist.h"
 #include "thread.h"
 #include <stdbool.h>
-#include "unwind.h"
 
 int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
 {
@@ -53,29 +48,6 @@ static void dsos__delete(struct list_head *dsos)
 	}
 }
 
-void machine__delete_dead_threads(struct machine *machine)
-{
-	struct thread *n, *t;
-
-	list_for_each_entry_safe(t, n, &machine->dead_threads, node) {
-		list_del(&t->node);
-		thread__delete(t);
-	}
-}
-
-void machine__delete_threads(struct machine *machine)
-{
-	struct rb_node *nd = rb_first(&machine->threads);
-
-	while (nd) {
-		struct thread *t = rb_entry(nd, struct thread, rb_node);
-
-		rb_erase(&t->rb_node, &machine->threads);
-		nd = rb_next(nd);
-		thread__delete(t);
-	}
-}
-
 void machine__exit(struct machine *machine)
 {
 	map_groups__exit(&machine->kmaps);
@@ -91,22 +63,10 @@ void machine__delete(struct machine *machine)
 	free(machine);
 }
 
-void machines__init(struct machines *machines)
-{
-	machine__init(&machines->host, "", HOST_KERNEL_ID);
-	machines->guests = RB_ROOT;
-}
-
-void machines__exit(struct machines *machines)
-{
-	machine__exit(&machines->host);
-	/* XXX exit guest */
-}
-
-struct machine *machines__add(struct machines *machines, pid_t pid,
+struct machine *machines__add(struct rb_root *machines, pid_t pid,
 			      const char *root_dir)
 {
-	struct rb_node **p = &machines->guests.rb_node;
+	struct rb_node **p = &machines->rb_node;
 	struct rb_node *parent = NULL;
 	struct machine *pos, *machine = malloc(sizeof(*machine));
 
@@ -128,21 +88,18 @@ struct machine *machines__add(struct machines *machines, pid_t pid,
 	}
 
 	rb_link_node(&machine->rb_node, parent, p);
-	rb_insert_color(&machine->rb_node, &machines->guests);
+	rb_insert_color(&machine->rb_node, machines);
 
 	return machine;
 }
 
-struct machine *machines__find(struct machines *machines, pid_t pid)
+struct machine *machines__find(struct rb_root *machines, pid_t pid)
 {
-	struct rb_node **p = &machines->guests.rb_node;
+	struct rb_node **p = &machines->rb_node;
 	struct rb_node *parent = NULL;
 	struct machine *machine;
 	struct machine *default_machine = NULL;
 
-	if (pid == HOST_KERNEL_ID)
-		return &machines->host;
-
 	while (*p != NULL) {
 		parent = *p;
 		machine = rb_entry(parent, struct machine, rb_node);
@@ -159,7 +116,7 @@ struct machine *machines__find(struct machines *machines, pid_t pid)
 	return default_machine;
 }
 
-struct machine *machines__findnew(struct machines *machines, pid_t pid)
+struct machine *machines__findnew(struct rb_root *machines, pid_t pid)
 {
 	char path[PATH_MAX];
 	const char *root_dir = "";
@@ -193,12 +150,12 @@ struct machine *machines__findnew(struct machines *machines, pid_t pid)
 	return machine;
 }
 
-void machines__process_guests(struct machines *machines,
-			      machine__process_t process, void *data)
+void machines__process(struct rb_root *machines,
+		       machine__process_t process, void *data)
 {
 	struct rb_node *nd;
 
-	for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+	for (nd = rb_first(machines); nd; nd = rb_next(nd)) {
 		struct machine *pos = rb_entry(nd, struct machine, rb_node);
 		process(pos, data);
 	}
@@ -218,14 +175,12 @@ char *machine__mmap_name(struct machine *machine, char *bf, size_t size)
 	return bf;
 }
 
-void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size)
+void machines__set_id_hdr_size(struct rb_root *machines, u16 id_hdr_size)
 {
 	struct rb_node *node;
 	struct machine *machine;
 
-	machines->host.id_hdr_size = id_hdr_size;
-
-	for (node = rb_first(&machines->guests); node; node = rb_next(node)) {
+	for (node = rb_first(machines); node; node = rb_next(node)) {
 		machine = rb_entry(node, struct machine, rb_node);
 		machine->id_hdr_size = id_hdr_size;
 	}
@@ -309,537 +264,6 @@ int machine__process_lost_event(struct machine *machine __maybe_unused,
 	return 0;
 }
 
-struct map *machine__new_module(struct machine *machine, u64 start,
-				const char *filename)
-{
-	struct map *map;
-	struct dso *dso = __dsos__findnew(&machine->kernel_dsos, filename);
-
-	if (dso == NULL)
-		return NULL;
-
-	map = map__new2(start, dso, MAP__FUNCTION);
-	if (map == NULL)
-		return NULL;
-
-	if (machine__is_host(machine))
-		dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE;
-	else
-		dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE;
-	map_groups__insert(&machine->kmaps, map);
-	return map;
-}
-
-size_t machines__fprintf_dsos(struct machines *machines, FILE *fp)
-{
-	struct rb_node *nd;
-	size_t ret = __dsos__fprintf(&machines->host.kernel_dsos, fp) +
-		     __dsos__fprintf(&machines->host.user_dsos, fp);
-
-	for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
-		struct machine *pos = rb_entry(nd, struct machine, rb_node);
-		ret += __dsos__fprintf(&pos->kernel_dsos, fp);
-		ret += __dsos__fprintf(&pos->user_dsos, fp);
-	}
-
-	return ret;
-}
-
-size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp,
-				     bool (skip)(struct dso *dso, int parm), int parm)
-{
-	return __dsos__fprintf_buildid(&machine->kernel_dsos, fp, skip, parm) +
-	       __dsos__fprintf_buildid(&machine->user_dsos, fp, skip, parm);
-}
-
-size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
-				     bool (skip)(struct dso *dso, int parm), int parm)
-{
-	struct rb_node *nd;
-	size_t ret = machine__fprintf_dsos_buildid(&machines->host, fp, skip, parm);
-
-	for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
-		struct machine *pos = rb_entry(nd, struct machine, rb_node);
-		ret += machine__fprintf_dsos_buildid(pos, fp, skip, parm);
-	}
-	return ret;
-}
-
-size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp)
-{
-	int i;
-	size_t printed = 0;
-	struct dso *kdso = machine->vmlinux_maps[MAP__FUNCTION]->dso;
-
-	if (kdso->has_build_id) {
-		char filename[PATH_MAX];
-		if (dso__build_id_filename(kdso, filename, sizeof(filename)))
-			printed += fprintf(fp, "[0] %s\n", filename);
-	}
-
-	for (i = 0; i < vmlinux_path__nr_entries; ++i)
-		printed += fprintf(fp, "[%d] %s\n",
-				   i + kdso->has_build_id, vmlinux_path[i]);
-
-	return printed;
-}
-
-size_t machine__fprintf(struct machine *machine, FILE *fp)
-{
-	size_t ret = 0;
-	struct rb_node *nd;
-
-	for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
-		struct thread *pos = rb_entry(nd, struct thread, rb_node);
-
-		ret += thread__fprintf(pos, fp);
-	}
-
-	return ret;
-}
-
-static struct dso *machine__get_kernel(struct machine *machine)
-{
-	const char *vmlinux_name = NULL;
-	struct dso *kernel;
-
-	if (machine__is_host(machine)) {
-		vmlinux_name = symbol_conf.vmlinux_name;
-		if (!vmlinux_name)
-			vmlinux_name = "[kernel.kallsyms]";
-
-		kernel = dso__kernel_findnew(machine, vmlinux_name,
-					     "[kernel]",
-					     DSO_TYPE_KERNEL);
-	} else {
-		char bf[PATH_MAX];
-
-		if (machine__is_default_guest(machine))
-			vmlinux_name = symbol_conf.default_guest_vmlinux_name;
-		if (!vmlinux_name)
-			vmlinux_name = machine__mmap_name(machine, bf,
-							  sizeof(bf));
-
-		kernel = dso__kernel_findnew(machine, vmlinux_name,
-					     "[guest.kernel]",
-					     DSO_TYPE_GUEST_KERNEL);
-	}
-
-	if (kernel != NULL && (!kernel->has_build_id))
-		dso__read_running_kernel_build_id(kernel, machine);
-
-	return kernel;
-}
-
-struct process_args {
-	u64 start;
-};
-
-static int symbol__in_kernel(void *arg, const char *name,
-			     char type __maybe_unused, u64 start)
-{
-	struct process_args *args = arg;
-
-	if (strchr(name, '['))
-		return 0;
-
-	args->start = start;
-	return 1;
-}
-
-/* Figure out the start address of kernel map from /proc/kallsyms */
-static u64 machine__get_kernel_start_addr(struct machine *machine)
-{
-	const char *filename;
-	char path[PATH_MAX];
-	struct process_args args;
-
-	if (machine__is_host(machine)) {
-		filename = "/proc/kallsyms";
-	} else {
-		if (machine__is_default_guest(machine))
-			filename = (char *)symbol_conf.default_guest_kallsyms;
-		else {
-			sprintf(path, "%s/proc/kallsyms", machine->root_dir);
-			filename = path;
-		}
-	}
-
-	if (symbol__restricted_filename(filename, "/proc/kallsyms"))
-		return 0;
-
-	if (kallsyms__parse(filename, &args, symbol__in_kernel) <= 0)
-		return 0;
-
-	return args.start;
-}
-
-int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
-{
-	enum map_type type;
-	u64 start = machine__get_kernel_start_addr(machine);
-
-	for (type = 0; type < MAP__NR_TYPES; ++type) {
-		struct kmap *kmap;
-
-		machine->vmlinux_maps[type] = map__new2(start, kernel, type);
-		if (machine->vmlinux_maps[type] == NULL)
-			return -1;
-
-		machine->vmlinux_maps[type]->map_ip =
-			machine->vmlinux_maps[type]->unmap_ip =
-				identity__map_ip;
-		kmap = map__kmap(machine->vmlinux_maps[type]);
-		kmap->kmaps = &machine->kmaps;
-		map_groups__insert(&machine->kmaps,
-				   machine->vmlinux_maps[type]);
-	}
-
-	return 0;
-}
-
-void machine__destroy_kernel_maps(struct machine *machine)
-{
-	enum map_type type;
-
-	for (type = 0; type < MAP__NR_TYPES; ++type) {
-		struct kmap *kmap;
-
-		if (machine->vmlinux_maps[type] == NULL)
-			continue;
-
-		kmap = map__kmap(machine->vmlinux_maps[type]);
-		map_groups__remove(&machine->kmaps,
-				   machine->vmlinux_maps[type]);
-		if (kmap->ref_reloc_sym) {
-			/*
-			 * ref_reloc_sym is shared among all maps, so free just
-			 * on one of them.
-			 */
-			if (type == MAP__FUNCTION) {
-				free((char *)kmap->ref_reloc_sym->name);
-				kmap->ref_reloc_sym->name = NULL;
-				free(kmap->ref_reloc_sym);
-			}
-			kmap->ref_reloc_sym = NULL;
-		}
-
-		map__delete(machine->vmlinux_maps[type]);
-		machine->vmlinux_maps[type] = NULL;
-	}
-}
-
-int machines__create_guest_kernel_maps(struct machines *machines)
-{
-	int ret = 0;
-	struct dirent **namelist = NULL;
-	int i, items = 0;
-	char path[PATH_MAX];
-	pid_t pid;
-	char *endp;
-
-	if (symbol_conf.default_guest_vmlinux_name ||
-	    symbol_conf.default_guest_modules ||
-	    symbol_conf.default_guest_kallsyms) {
-		machines__create_kernel_maps(machines, DEFAULT_GUEST_KERNEL_ID);
-	}
-
-	if (symbol_conf.guestmount) {
-		items = scandir(symbol_conf.guestmount, &namelist, NULL, NULL);
-		if (items <= 0)
-			return -ENOENT;
-		for (i = 0; i < items; i++) {
-			if (!isdigit(namelist[i]->d_name[0])) {
-				/* Filter out . and .. */
-				continue;
-			}
-			pid = (pid_t)strtol(namelist[i]->d_name, &endp, 10);
-			if ((*endp != '\0') ||
-			    (endp == namelist[i]->d_name) ||
-			    (errno == ERANGE)) {
-				pr_debug("invalid directory (%s). Skipping.\n",
-					 namelist[i]->d_name);
-				continue;
-			}
-			sprintf(path, "%s/%s/proc/kallsyms",
-				symbol_conf.guestmount,
-				namelist[i]->d_name);
-			ret = access(path, R_OK);
-			if (ret) {
-				pr_debug("Can't access file %s\n", path);
-				goto failure;
-			}
-			machines__create_kernel_maps(machines, pid);
-		}
-failure:
-		free(namelist);
-	}
-
-	return ret;
-}
-
-void machines__destroy_kernel_maps(struct machines *machines)
-{
-	struct rb_node *next = rb_first(&machines->guests);
-
-	machine__destroy_kernel_maps(&machines->host);
-
-	while (next) {
-		struct machine *pos = rb_entry(next, struct machine, rb_node);
-
-		next = rb_next(&pos->rb_node);
-		rb_erase(&pos->rb_node, &machines->guests);
-		machine__delete(pos);
-	}
-}
-
-int machines__create_kernel_maps(struct machines *machines, pid_t pid)
-{
-	struct machine *machine = machines__findnew(machines, pid);
-
-	if (machine == NULL)
-		return -1;
-
-	return machine__create_kernel_maps(machine);
-}
-
-int machine__load_kallsyms(struct machine *machine, const char *filename,
-			   enum map_type type, symbol_filter_t filter)
-{
-	struct map *map = machine->vmlinux_maps[type];
-	int ret = dso__load_kallsyms(map->dso, filename, map, filter);
-
-	if (ret > 0) {
-		dso__set_loaded(map->dso, type);
-		/*
-		 * Since /proc/kallsyms will have multiple sessions for the
-		 * kernel, with modules between them, fixup the end of all
-		 * sections.
-		 */
-		__map_groups__fixup_end(&machine->kmaps, type);
-	}
-
-	return ret;
-}
-
-int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
-			       symbol_filter_t filter)
-{
-	struct map *map = machine->vmlinux_maps[type];
-	int ret = dso__load_vmlinux_path(map->dso, map, filter);
-
-	if (ret > 0) {
-		dso__set_loaded(map->dso, type);
-		map__reloc_vmlinux(map);
-	}
-
-	return ret;
-}
-
-static void map_groups__fixup_end(struct map_groups *mg)
-{
-	int i;
-	for (i = 0; i < MAP__NR_TYPES; ++i)
-		__map_groups__fixup_end(mg, i);
-}
-
-static char *get_kernel_version(const char *root_dir)
-{
-	char version[PATH_MAX];
-	FILE *file;
-	char *name, *tmp;
-	const char *prefix = "Linux version ";
-
-	sprintf(version, "%s/proc/version", root_dir);
-	file = fopen(version, "r");
-	if (!file)
-		return NULL;
-
-	version[0] = '\0';
-	tmp = fgets(version, sizeof(version), file);
-	fclose(file);
-
-	name = strstr(version, prefix);
-	if (!name)
-		return NULL;
-	name += strlen(prefix);
-	tmp = strchr(name, ' ');
-	if (tmp)
-		*tmp = '\0';
-
-	return strdup(name);
-}
-
-static int map_groups__set_modules_path_dir(struct map_groups *mg,
-				const char *dir_name)
-{
-	struct dirent *dent;
-	DIR *dir = opendir(dir_name);
-	int ret = 0;
-
-	if (!dir) {
-		pr_debug("%s: cannot open %s dir\n", __func__, dir_name);
-		return -1;
-	}
-
-	while ((dent = readdir(dir)) != NULL) {
-		char path[PATH_MAX];
-		struct stat st;
-
-		/*sshfs might return bad dent->d_type, so we have to stat*/
-		snprintf(path, sizeof(path), "%s/%s", dir_name, dent->d_name);
-		if (stat(path, &st))
-			continue;
-
-		if (S_ISDIR(st.st_mode)) {
-			if (!strcmp(dent->d_name, ".") ||
-			    !strcmp(dent->d_name, ".."))
-				continue;
-
-			ret = map_groups__set_modules_path_dir(mg, path);
-			if (ret < 0)
-				goto out;
-		} else {
-			char *dot = strrchr(dent->d_name, '.'),
-			     dso_name[PATH_MAX];
-			struct map *map;
-			char *long_name;
-
-			if (dot == NULL || strcmp(dot, ".ko"))
-				continue;
-			snprintf(dso_name, sizeof(dso_name), "[%.*s]",
-				 (int)(dot - dent->d_name), dent->d_name);
-
-			strxfrchar(dso_name, '-', '_');
-			map = map_groups__find_by_name(mg, MAP__FUNCTION,
-						       dso_name);
-			if (map == NULL)
-				continue;
-
-			long_name = strdup(path);
-			if (long_name == NULL) {
-				ret = -1;
-				goto out;
-			}
-			dso__set_long_name(map->dso, long_name);
-			map->dso->lname_alloc = 1;
-			dso__kernel_module_get_build_id(map->dso, "");
-		}
-	}
-
-out:
-	closedir(dir);
-	return ret;
-}
-
-static int machine__set_modules_path(struct machine *machine)
-{
-	char *version;
-	char modules_path[PATH_MAX];
-
-	version = get_kernel_version(machine->root_dir);
-	if (!version)
-		return -1;
-
-	snprintf(modules_path, sizeof(modules_path), "%s/lib/modules/%s/kernel",
-		 machine->root_dir, version);
-	free(version);
-
-	return map_groups__set_modules_path_dir(&machine->kmaps, modules_path);
-}
-
-static int machine__create_modules(struct machine *machine)
-{
-	char *line = NULL;
-	size_t n;
-	FILE *file;
-	struct map *map;
-	const char *modules;
-	char path[PATH_MAX];
-
-	if (machine__is_default_guest(machine))
-		modules = symbol_conf.default_guest_modules;
-	else {
-		sprintf(path, "%s/proc/modules", machine->root_dir);
-		modules = path;
-	}
-
-	if (symbol__restricted_filename(path, "/proc/modules"))
-		return -1;
-
-	file = fopen(modules, "r");
-	if (file == NULL)
-		return -1;
-
-	while (!feof(file)) {
-		char name[PATH_MAX];
-		u64 start;
-		char *sep;
-		int line_len;
-
-		line_len = getline(&line, &n, file);
-		if (line_len < 0)
-			break;
-
-		if (!line)
-			goto out_failure;
-
-		line[--line_len] = '\0'; /* \n */
-
-		sep = strrchr(line, 'x');
-		if (sep == NULL)
-			continue;
-
-		hex2u64(sep + 1, &start);
-
-		sep = strchr(line, ' ');
-		if (sep == NULL)
-			continue;
-
-		*sep = '\0';
-
-		snprintf(name, sizeof(name), "[%s]", line);
-		map = machine__new_module(machine, start, name);
-		if (map == NULL)
-			goto out_delete_line;
-		dso__kernel_module_get_build_id(map->dso, machine->root_dir);
-	}
-
-	free(line);
-	fclose(file);
-
-	return machine__set_modules_path(machine);
-
-out_delete_line:
-	free(line);
-out_failure:
-	return -1;
-}
-
-int machine__create_kernel_maps(struct machine *machine)
-{
-	struct dso *kernel = machine__get_kernel(machine);
-
-	if (kernel == NULL ||
-	    __machine__create_kernel_maps(machine, kernel) < 0)
-		return -1;
-
-	if (symbol_conf.use_modules && machine__create_modules(machine) < 0) {
-		if (machine__is_host(machine))
-			pr_debug("Problems creating module maps, "
-				 "continuing anyway...\n");
-		else
-			pr_debug("Problems creating module maps for guest %d, "
-				 "continuing anyway...\n", machine->pid);
-	}
-
-	/*
-	 * Now that we have all the maps created, just set the ->end of them:
-	 */
-	map_groups__fixup_end(&machine->kmaps);
-	return 0;
-}
-
 static void machine__set_kernel_mmap_len(struct machine *machine,
 					 union perf_event *event)
 {
@@ -1038,189 +462,3 @@ int machine__process_event(struct machine *machine, union perf_event *event)
 
 	return ret;
 }
-
-void machine__remove_thread(struct machine *machine, struct thread *th)
-{
-	machine->last_match = NULL;
-	rb_erase(&th->rb_node, &machine->threads);
-	/*
-	 * We may have references to this thread, for instance in some hist_entry
-	 * instances, so just move them to a separate list.
-	 */
-	list_add_tail(&th->node, &machine->dead_threads);
-}
-
-static bool symbol__match_parent_regex(struct symbol *sym)
-{
-	if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
-		return 1;
-
-	return 0;
-}
-
-static const u8 cpumodes[] = {
-	PERF_RECORD_MISC_USER,
-	PERF_RECORD_MISC_KERNEL,
-	PERF_RECORD_MISC_GUEST_USER,
-	PERF_RECORD_MISC_GUEST_KERNEL
-};
-#define NCPUMODES (sizeof(cpumodes)/sizeof(u8))
-
-static void ip__resolve_ams(struct machine *machine, struct thread *thread,
-			    struct addr_map_symbol *ams,
-			    u64 ip)
-{
-	struct addr_location al;
-	size_t i;
-	u8 m;
-
-	memset(&al, 0, sizeof(al));
-
-	for (i = 0; i < NCPUMODES; i++) {
-		m = cpumodes[i];
-		/*
-		 * We cannot use the header.misc hint to determine whether a
-		 * branch stack address is user, kernel, guest, hypervisor.
-		 * Branches may straddle the kernel/user/hypervisor boundaries.
-		 * Thus, we have to try consecutively until we find a match
-		 * or else, the symbol is unknown
-		 */
-		thread__find_addr_location(thread, machine, m, MAP__FUNCTION,
-				ip, &al, NULL);
-		if (al.sym)
-			goto found;
-	}
-found:
-	ams->addr = ip;
-	ams->al_addr = al.addr;
-	ams->sym = al.sym;
-	ams->map = al.map;
-}
-
-struct branch_info *machine__resolve_bstack(struct machine *machine,
-					    struct thread *thr,
-					    struct branch_stack *bs)
-{
-	struct branch_info *bi;
-	unsigned int i;
-
-	bi = calloc(bs->nr, sizeof(struct branch_info));
-	if (!bi)
-		return NULL;
-
-	for (i = 0; i < bs->nr; i++) {
-		ip__resolve_ams(machine, thr, &bi[i].to, bs->entries[i].to);
-		ip__resolve_ams(machine, thr, &bi[i].from, bs->entries[i].from);
-		bi[i].flags = bs->entries[i].flags;
-	}
-	return bi;
-}
-
-static int machine__resolve_callchain_sample(struct machine *machine,
-					     struct thread *thread,
-					     struct ip_callchain *chain,
-					     struct symbol **parent)
-
-{
-	u8 cpumode = PERF_RECORD_MISC_USER;
-	unsigned int i;
-	int err;
-
-	callchain_cursor_reset(&callchain_cursor);
-
-	if (chain->nr > PERF_MAX_STACK_DEPTH) {
-		pr_warning("corrupted callchain. skipping...\n");
-		return 0;
-	}
-
-	for (i = 0; i < chain->nr; i++) {
-		u64 ip;
-		struct addr_location al;
-
-		if (callchain_param.order == ORDER_CALLEE)
-			ip = chain->ips[i];
-		else
-			ip = chain->ips[chain->nr - i - 1];
-
-		if (ip >= PERF_CONTEXT_MAX) {
-			switch (ip) {
-			case PERF_CONTEXT_HV:
-				cpumode = PERF_RECORD_MISC_HYPERVISOR;
-				break;
-			case PERF_CONTEXT_KERNEL:
-				cpumode = PERF_RECORD_MISC_KERNEL;
-				break;
-			case PERF_CONTEXT_USER:
-				cpumode = PERF_RECORD_MISC_USER;
-				break;
-			default:
-				pr_debug("invalid callchain context: "
-					 "%"PRId64"\n", (s64) ip);
-				/*
-				 * It seems the callchain is corrupted.
-				 * Discard all.
-				 */
-				callchain_cursor_reset(&callchain_cursor);
-				return 0;
-			}
-			continue;
-		}
-
-		al.filtered = false;
-		thread__find_addr_location(thread, machine, cpumode,
-					   MAP__FUNCTION, ip, &al, NULL);
-		if (al.sym != NULL) {
-			if (sort__has_parent && !*parent &&
-			    symbol__match_parent_regex(al.sym))
-				*parent = al.sym;
-			if (!symbol_conf.use_callchain)
-				break;
-		}
-
-		err = callchain_cursor_append(&callchain_cursor,
-					      ip, al.map, al.sym);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
-static int unwind_entry(struct unwind_entry *entry, void *arg)
-{
-	struct callchain_cursor *cursor = arg;
-	return callchain_cursor_append(cursor, entry->ip,
-				       entry->map, entry->sym);
-}
-
-int machine__resolve_callchain(struct machine *machine,
-			       struct perf_evsel *evsel,
-			       struct thread *thread,
-			       struct perf_sample *sample,
-			       struct symbol **parent)
-
-{
-	int ret;
-
-	callchain_cursor_reset(&callchain_cursor);
-
-	ret = machine__resolve_callchain_sample(machine, thread,
-						sample->callchain, parent);
-	if (ret)
-		return ret;
-
-	/* Can we do dwarf post unwind? */
-	if (!((evsel->attr.sample_type & PERF_SAMPLE_REGS_USER) &&
-	      (evsel->attr.sample_type & PERF_SAMPLE_STACK_USER)))
-		return 0;
-
-	/* Bail out if nothing was captured. */
-	if ((!sample->user_regs.regs) ||
-	    (!sample->user_stack.size))
-		return 0;
-
-	return unwind__get_entries(unwind_entry, &callchain_cursor, machine,
-				   thread, evsel->attr.sample_regs_user,
-				   sample);
-
-}
diff --git a/trunk/tools/perf/util/machine.h b/trunk/tools/perf/util/machine.h
index 5ac5892f2326..b7cde7467d55 100644
--- a/trunk/tools/perf/util/machine.h
+++ b/trunk/tools/perf/util/machine.h
@@ -47,32 +47,23 @@ int machine__process_event(struct machine *machine, union perf_event *event);
 
 typedef void (*machine__process_t)(struct machine *machine, void *data);
 
-struct machines {
-	struct machine host;
-	struct rb_root guests;
-};
-
-void machines__init(struct machines *machines);
-void machines__exit(struct machines *machines);
+void machines__process(struct rb_root *machines,
+		       machine__process_t process, void *data);
 
-void machines__process_guests(struct machines *machines,
-			      machine__process_t process, void *data);
-
-struct machine *machines__add(struct machines *machines, pid_t pid,
+struct machine *machines__add(struct rb_root *machines, pid_t pid,
 			      const char *root_dir);
-struct machine *machines__find_host(struct machines *machines);
-struct machine *machines__find(struct machines *machines, pid_t pid);
-struct machine *machines__findnew(struct machines *machines, pid_t pid);
+struct machine *machines__find_host(struct rb_root *machines);
+struct machine *machines__find(struct rb_root *machines, pid_t pid);
+struct machine *machines__findnew(struct rb_root *machines, pid_t pid);
 
-void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size);
+void machines__set_id_hdr_size(struct rb_root *machines, u16 id_hdr_size);
 char *machine__mmap_name(struct machine *machine, char *bf, size_t size);
 
 int machine__init(struct machine *machine, const char *root_dir, pid_t pid);
 void machine__exit(struct machine *machine);
-void machine__delete_dead_threads(struct machine *machine);
-void machine__delete_threads(struct machine *machine);
 void machine__delete(struct machine *machine);
 
+
 struct branch_info *machine__resolve_bstack(struct machine *machine,
 					    struct thread *thread,
 					    struct branch_stack *bs);
@@ -138,19 +129,19 @@ int machine__load_kallsyms(struct machine *machine, const char *filename,
 int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
 			       symbol_filter_t filter);
 
-size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp,
-				     bool (skip)(struct dso *dso, int parm), int parm);
-size_t machines__fprintf_dsos(struct machines *machines, FILE *fp);
-size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
-				     bool (skip)(struct dso *dso, int parm), int parm);
+size_t machine__fprintf_dsos_buildid(struct machine *machine,
+				     FILE *fp, bool with_hits);
+size_t machines__fprintf_dsos(struct rb_root *machines, FILE *fp);
+size_t machines__fprintf_dsos_buildid(struct rb_root *machines,
+				      FILE *fp, bool with_hits);
 
 void machine__destroy_kernel_maps(struct machine *machine);
 int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel);
 int machine__create_kernel_maps(struct machine *machine);
 
-int machines__create_kernel_maps(struct machines *machines, pid_t pid);
-int machines__create_guest_kernel_maps(struct machines *machines);
-void machines__destroy_kernel_maps(struct machines *machines);
+int machines__create_kernel_maps(struct rb_root *machines, pid_t pid);
+int machines__create_guest_kernel_maps(struct rb_root *machines);
+void machines__destroy_guest_kernel_maps(struct rb_root *machines);
 
 size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp);
 
diff --git a/trunk/tools/perf/util/map.c b/trunk/tools/perf/util/map.c
index 6fcb9de62340..0328d45c4f2a 100644
--- a/trunk/tools/perf/util/map.c
+++ b/trunk/tools/perf/util/map.c
@@ -11,7 +11,6 @@
 #include "strlist.h"
 #include "vdso.h"
 #include "build-id.h"
-#include <linux/string.h>
 
 const char *map_type__name[MAP__NR_TYPES] = {
 	[MAP__FUNCTION] = "Functions",
@@ -20,8 +19,7 @@ const char *map_type__name[MAP__NR_TYPES] = {
 
 static inline int is_anon_memory(const char *filename)
 {
-	return !strcmp(filename, "//anon") ||
-	       !strcmp(filename, "/anon_hugepage (deleted)");
+	return strcmp(filename, "//anon") == 0;
 }
 
 static inline int is_no_dso_memory(const char *filename)
@@ -30,29 +28,29 @@ static inline int is_no_dso_memory(const char *filename)
 	       !strcmp(filename, "[heap]");
 }
 
-void map__init(struct map *map, enum map_type type,
+void map__init(struct map *self, enum map_type type,
 	       u64 start, u64 end, u64 pgoff, struct dso *dso)
 {
-	map->type     = type;
-	map->start    = start;
-	map->end      = end;
-	map->pgoff    = pgoff;
-	map->dso      = dso;
-	map->map_ip   = map__map_ip;
-	map->unmap_ip = map__unmap_ip;
-	RB_CLEAR_NODE(&map->rb_node);
-	map->groups   = NULL;
-	map->referenced = false;
-	map->erange_warned = false;
+	self->type     = type;
+	self->start    = start;
+	self->end      = end;
+	self->pgoff    = pgoff;
+	self->dso      = dso;
+	self->map_ip   = map__map_ip;
+	self->unmap_ip = map__unmap_ip;
+	RB_CLEAR_NODE(&self->rb_node);
+	self->groups   = NULL;
+	self->referenced = false;
+	self->erange_warned = false;
 }
 
 struct map *map__new(struct list_head *dsos__list, u64 start, u64 len,
 		     u64 pgoff, u32 pid, char *filename,
 		     enum map_type type)
 {
-	struct map *map = malloc(sizeof(*map));
+	struct map *self = malloc(sizeof(*self));
 
-	if (map != NULL) {
+	if (self != NULL) {
 		char newfilename[PATH_MAX];
 		struct dso *dso;
 		int anon, no_dso, vdso;
@@ -75,10 +73,10 @@ struct map *map__new(struct list_head *dsos__list, u64 start, u64 len,
 		if (dso == NULL)
 			goto out_delete;
 
-		map__init(map, type, start, start + len, pgoff, dso);
+		map__init(self, type, start, start + len, pgoff, dso);
 
 		if (anon || no_dso) {
-			map->map_ip = map->unmap_ip = identity__map_ip;
+			self->map_ip = self->unmap_ip = identity__map_ip;
 
 			/*
 			 * Set memory without DSO as loaded. All map__find_*
@@ -86,12 +84,12 @@ struct map *map__new(struct list_head *dsos__list, u64 start, u64 len,
 			 * unnecessary map__load warning.
 			 */
 			if (no_dso)
-				dso__set_loaded(dso, map->type);
+				dso__set_loaded(dso, self->type);
 		}
 	}
-	return map;
+	return self;
 out_delete:
-	free(map);
+	free(self);
 	return NULL;
 }
 
@@ -114,48 +112,48 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
 	return map;
 }
 
-void map__delete(struct map *map)
+void map__delete(struct map *self)
 {
-	free(map);
+	free(self);
 }
 
-void map__fixup_start(struct map *map)
+void map__fixup_start(struct map *self)
 {
-	struct rb_root *symbols = &map->dso->symbols[map->type];
+	struct rb_root *symbols = &self->dso->symbols[self->type];
 	struct rb_node *nd = rb_first(symbols);
 	if (nd != NULL) {
 		struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
-		map->start = sym->start;
+		self->start = sym->start;
 	}
 }
 
-void map__fixup_end(struct map *map)
+void map__fixup_end(struct map *self)
 {
-	struct rb_root *symbols = &map->dso->symbols[map->type];
+	struct rb_root *symbols = &self->dso->symbols[self->type];
 	struct rb_node *nd = rb_last(symbols);
 	if (nd != NULL) {
 		struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
-		map->end = sym->end;
+		self->end = sym->end;
 	}
 }
 
 #define DSO__DELETED "(deleted)"
 
-int map__load(struct map *map, symbol_filter_t filter)
+int map__load(struct map *self, symbol_filter_t filter)
 {
-	const char *name = map->dso->long_name;
+	const char *name = self->dso->long_name;
 	int nr;
 
-	if (dso__loaded(map->dso, map->type))
+	if (dso__loaded(self->dso, self->type))
 		return 0;
 
-	nr = dso__load(map->dso, map, filter);
+	nr = dso__load(self->dso, self, filter);
 	if (nr < 0) {
-		if (map->dso->has_build_id) {
+		if (self->dso->has_build_id) {
 			char sbuild_id[BUILD_ID_SIZE * 2 + 1];
 
-			build_id__sprintf(map->dso->build_id,
-					  sizeof(map->dso->build_id),
+			build_id__sprintf(self->dso->build_id,
+					  sizeof(self->dso->build_id),
 					  sbuild_id);
 			pr_warning("%s with build id %s not found",
 				   name, sbuild_id);
@@ -185,36 +183,43 @@ int map__load(struct map *map, symbol_filter_t filter)
 	 * Only applies to the kernel, as its symtabs aren't relative like the
 	 * module ones.
 	 */
-	if (map->dso->kernel)
-		map__reloc_vmlinux(map);
+	if (self->dso->kernel)
+		map__reloc_vmlinux(self);
 
 	return 0;
 }
 
-struct symbol *map__find_symbol(struct map *map, u64 addr,
+struct symbol *map__find_symbol(struct map *self, u64 addr,
 				symbol_filter_t filter)
 {
-	if (map__load(map, filter) < 0)
+	if (map__load(self, filter) < 0)
 		return NULL;
 
-	return dso__find_symbol(map->dso, map->type, addr);
+	return dso__find_symbol(self->dso, self->type, addr);
 }
 
-struct symbol *map__find_symbol_by_name(struct map *map, const char *name,
+struct symbol *map__find_symbol_by_name(struct map *self, const char *name,
 					symbol_filter_t filter)
 {
-	if (map__load(map, filter) < 0)
+	if (map__load(self, filter) < 0)
 		return NULL;
 
-	if (!dso__sorted_by_name(map->dso, map->type))
-		dso__sort_by_name(map->dso, map->type);
+	if (!dso__sorted_by_name(self->dso, self->type))
+		dso__sort_by_name(self->dso, self->type);
 
-	return dso__find_symbol_by_name(map->dso, map->type, name);
+	return dso__find_symbol_by_name(self->dso, self->type, name);
 }
 
-struct map *map__clone(struct map *map)
+struct map *map__clone(struct map *self)
 {
-	return memdup(map, sizeof(*map));
+	struct map *map = malloc(sizeof(*self));
+
+	if (!map)
+		return NULL;
+
+	memcpy(map, self, sizeof(*self));
+
+	return map;
 }
 
 int map__overlap(struct map *l, struct map *r)
@@ -231,10 +236,10 @@ int map__overlap(struct map *l, struct map *r)
 	return 0;
 }
 
-size_t map__fprintf(struct map *map, FILE *fp)
+size_t map__fprintf(struct map *self, FILE *fp)
 {
 	return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s\n",
-		       map->start, map->end, map->pgoff, map->dso->name);
+		       self->start, self->end, self->pgoff, self->dso->name);
 }
 
 size_t map__fprintf_dsoname(struct map *map, FILE *fp)
@@ -522,9 +527,9 @@ static u64 map__reloc_unmap_ip(struct map *map, u64 ip)
 	return ip - (s64)map->pgoff;
 }
 
-void map__reloc_vmlinux(struct map *map)
+void map__reloc_vmlinux(struct map *self)
 {
-	struct kmap *kmap = map__kmap(map);
+	struct kmap *kmap = map__kmap(self);
 	s64 reloc;
 
 	if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->unrelocated_addr)
@@ -536,9 +541,9 @@ void map__reloc_vmlinux(struct map *map)
 	if (!reloc)
 		return;
 
-	map->map_ip   = map__reloc_map_ip;
-	map->unmap_ip = map__reloc_unmap_ip;
-	map->pgoff    = reloc;
+	self->map_ip   = map__reloc_map_ip;
+	self->unmap_ip = map__reloc_unmap_ip;
+	self->pgoff    = reloc;
 }
 
 void maps__insert(struct rb_root *maps, struct map *map)
@@ -561,9 +566,9 @@ void maps__insert(struct rb_root *maps, struct map *map)
 	rb_insert_color(&map->rb_node, maps);
 }
 
-void maps__remove(struct rb_root *maps, struct map *map)
+void maps__remove(struct rb_root *self, struct map *map)
 {
-	rb_erase(&map->rb_node, maps);
+	rb_erase(&map->rb_node, self);
 }
 
 struct map *maps__find(struct rb_root *maps, u64 ip)
diff --git a/trunk/tools/perf/util/map.h b/trunk/tools/perf/util/map.h
index a887f2c9dfbb..bcb39e2a6965 100644
--- a/trunk/tools/perf/util/map.h
+++ b/trunk/tools/perf/util/map.h
@@ -57,9 +57,9 @@ struct map_groups {
 	struct machine	 *machine;
 };
 
-static inline struct kmap *map__kmap(struct map *map)
+static inline struct kmap *map__kmap(struct map *self)
 {
-	return (struct kmap *)(map + 1);
+	return (struct kmap *)(self + 1);
 }
 
 static inline u64 map__map_ip(struct map *map, u64 ip)
@@ -85,27 +85,27 @@ struct symbol;
 
 typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym);
 
-void map__init(struct map *map, enum map_type type,
+void map__init(struct map *self, enum map_type type,
 	       u64 start, u64 end, u64 pgoff, struct dso *dso);
 struct map *map__new(struct list_head *dsos__list, u64 start, u64 len,
 		     u64 pgoff, u32 pid, char *filename,
 		     enum map_type type);
 struct map *map__new2(u64 start, struct dso *dso, enum map_type type);
-void map__delete(struct map *map);
-struct map *map__clone(struct map *map);
+void map__delete(struct map *self);
+struct map *map__clone(struct map *self);
 int map__overlap(struct map *l, struct map *r);
-size_t map__fprintf(struct map *map, FILE *fp);
+size_t map__fprintf(struct map *self, FILE *fp);
 size_t map__fprintf_dsoname(struct map *map, FILE *fp);
 
-int map__load(struct map *map, symbol_filter_t filter);
-struct symbol *map__find_symbol(struct map *map,
+int map__load(struct map *self, symbol_filter_t filter);
+struct symbol *map__find_symbol(struct map *self,
 				u64 addr, symbol_filter_t filter);
-struct symbol *map__find_symbol_by_name(struct map *map, const char *name,
+struct symbol *map__find_symbol_by_name(struct map *self, const char *name,
 					symbol_filter_t filter);
-void map__fixup_start(struct map *map);
-void map__fixup_end(struct map *map);
+void map__fixup_start(struct map *self);
+void map__fixup_end(struct map *self);
 
-void map__reloc_vmlinux(struct map *map);
+void map__reloc_vmlinux(struct map *self);
 
 size_t __map_groups__fprintf_maps(struct map_groups *mg,
 				  enum map_type type, int verbose, FILE *fp);
diff --git a/trunk/tools/perf/util/parse-events.c b/trunk/tools/perf/util/parse-events.c
index c84f48cf9678..2d8d53bec17e 100644
--- a/trunk/tools/perf/util/parse-events.c
+++ b/trunk/tools/perf/util/parse-events.c
@@ -380,8 +380,8 @@ static int add_tracepoint(struct list_head **listp, int *idx,
 	return 0;
 }
 
-static int add_tracepoint_multi_event(struct list_head **list, int *idx,
-				      char *sys_name, char *evt_name)
+static int add_tracepoint_multi(struct list_head **list, int *idx,
+				char *sys_name, char *evt_name)
 {
 	char evt_path[MAXPATHLEN];
 	struct dirent *evt_ent;
@@ -408,47 +408,6 @@ static int add_tracepoint_multi_event(struct list_head **list, int *idx,
 		ret = add_tracepoint(list, idx, sys_name, evt_ent->d_name);
 	}
 
-	closedir(evt_dir);
-	return ret;
-}
-
-static int add_tracepoint_event(struct list_head **list, int *idx,
-				char *sys_name, char *evt_name)
-{
-	return strpbrk(evt_name, "*?") ?
-	       add_tracepoint_multi_event(list, idx, sys_name, evt_name) :
-	       add_tracepoint(list, idx, sys_name, evt_name);
-}
-
-static int add_tracepoint_multi_sys(struct list_head **list, int *idx,
-				    char *sys_name, char *evt_name)
-{
-	struct dirent *events_ent;
-	DIR *events_dir;
-	int ret = 0;
-
-	events_dir = opendir(tracing_events_path);
-	if (!events_dir) {
-		perror("Can't open event dir");
-		return -1;
-	}
-
-	while (!ret && (events_ent = readdir(events_dir))) {
-		if (!strcmp(events_ent->d_name, ".")
-		    || !strcmp(events_ent->d_name, "..")
-		    || !strcmp(events_ent->d_name, "enable")
-		    || !strcmp(events_ent->d_name, "header_event")
-		    || !strcmp(events_ent->d_name, "header_page"))
-			continue;
-
-		if (!strglobmatch(events_ent->d_name, sys_name))
-			continue;
-
-		ret = add_tracepoint_event(list, idx, events_ent->d_name,
-					   evt_name);
-	}
-
-	closedir(events_dir);
 	return ret;
 }
 
@@ -461,10 +420,9 @@ int parse_events_add_tracepoint(struct list_head **list, int *idx,
 	if (ret)
 		return ret;
 
-	if (strpbrk(sys, "*?"))
-		return add_tracepoint_multi_sys(list, idx, sys, event);
-	else
-		return add_tracepoint_event(list, idx, sys, event);
+	return strpbrk(event, "*?") ?
+	       add_tracepoint_multi(list, idx, sys, event) :
+	       add_tracepoint(list, idx, sys, event);
 }
 
 static int
@@ -534,7 +492,7 @@ int parse_events_add_breakpoint(struct list_head **list, int *idx,
 }
 
 static int config_term(struct perf_event_attr *attr,
-		       struct parse_events_term *term)
+		       struct parse_events__term *term)
 {
 #define CHECK_TYPE_VAL(type)					\
 do {								\
@@ -579,7 +537,7 @@ do {								\
 static int config_attr(struct perf_event_attr *attr,
 		       struct list_head *head, int fail)
 {
-	struct parse_events_term *term;
+	struct parse_events__term *term;
 
 	list_for_each_entry(term, head, list)
 		if (config_term(attr, term) && fail)
@@ -605,14 +563,14 @@ int parse_events_add_numeric(struct list_head **list, int *idx,
 	return add_event(list, idx, &attr, NULL);
 }
 
-static int parse_events__is_name_term(struct parse_events_term *term)
+static int parse_events__is_name_term(struct parse_events__term *term)
 {
 	return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME;
 }
 
 static char *pmu_event_name(struct list_head *head_terms)
 {
-	struct parse_events_term *term;
+	struct parse_events__term *term;
 
 	list_for_each_entry(term, head_terms, list)
 		if (parse_events__is_name_term(term))
@@ -699,6 +657,14 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
 	int exclude = eu | ek | eh;
 	int exclude_GH = evsel ? evsel->exclude_GH : 0;
 
+	/*
+	 * We are here for group and 'GH' was not set as event
+	 * modifier and whatever event/group modifier override
+	 * default 'GH' setup.
+	 */
+	if (evsel && !exclude_GH)
+		eH = eG = 0;
+
 	memset(mod, 0, sizeof(*mod));
 
 	while (*str) {
@@ -848,7 +814,7 @@ static int parse_events__scanner(const char *str, void *data, int start_token)
  */
 int parse_events_terms(struct list_head *terms, const char *str)
 {
-	struct parse_events_terms data = {
+	struct parse_events_data__terms data = {
 		.terms = NULL,
 	};
 	int ret;
@@ -864,9 +830,10 @@ int parse_events_terms(struct list_head *terms, const char *str)
 	return ret;
 }
 
-int parse_events(struct perf_evlist *evlist, const char *str)
+int parse_events(struct perf_evlist *evlist, const char *str,
+		 int unset __maybe_unused)
 {
-	struct parse_events_evlist data = {
+	struct parse_events_data__events data = {
 		.list = LIST_HEAD_INIT(data.list),
 		.idx  = evlist->nr_entries,
 	};
@@ -876,7 +843,6 @@ int parse_events(struct perf_evlist *evlist, const char *str)
 	if (!ret) {
 		int entries = data.idx - evlist->nr_entries;
 		perf_evlist__splice_list_tail(evlist, &data.list, entries);
-		evlist->nr_groups += data.nr_groups;
 		return 0;
 	}
 
@@ -892,7 +858,7 @@ int parse_events_option(const struct option *opt, const char *str,
 			int unset __maybe_unused)
 {
 	struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
-	int ret = parse_events(evlist, str);
+	int ret = parse_events(evlist, str, unset);
 
 	if (ret) {
 		fprintf(stderr, "invalid or unsupported event: '%s'\n", str);
@@ -1155,16 +1121,16 @@ void print_events(const char *event_glob, bool name_only)
 	print_tracepoint_events(NULL, NULL, name_only);
 }
 
-int parse_events__is_hardcoded_term(struct parse_events_term *term)
+int parse_events__is_hardcoded_term(struct parse_events__term *term)
 {
 	return term->type_term != PARSE_EVENTS__TERM_TYPE_USER;
 }
 
-static int new_term(struct parse_events_term **_term, int type_val,
+static int new_term(struct parse_events__term **_term, int type_val,
 		    int type_term, char *config,
 		    char *str, u64 num)
 {
-	struct parse_events_term *term;
+	struct parse_events__term *term;
 
 	term = zalloc(sizeof(*term));
 	if (!term)
@@ -1190,21 +1156,21 @@ static int new_term(struct parse_events_term **_term, int type_val,
 	return 0;
 }
 
-int parse_events_term__num(struct parse_events_term **term,
+int parse_events__term_num(struct parse_events__term **term,
 			   int type_term, char *config, u64 num)
 {
 	return new_term(term, PARSE_EVENTS__TERM_TYPE_NUM, type_term,
 			config, NULL, num);
 }
 
-int parse_events_term__str(struct parse_events_term **term,
+int parse_events__term_str(struct parse_events__term **term,
 			   int type_term, char *config, char *str)
 {
 	return new_term(term, PARSE_EVENTS__TERM_TYPE_STR, type_term,
 			config, str, 0);
 }
 
-int parse_events_term__sym_hw(struct parse_events_term **term,
+int parse_events__term_sym_hw(struct parse_events__term **term,
 			      char *config, unsigned idx)
 {
 	struct event_symbol *sym;
@@ -1222,8 +1188,8 @@ int parse_events_term__sym_hw(struct parse_events_term **term,
 				(char *) "event", (char *) sym->symbol, 0);
 }
 
-int parse_events_term__clone(struct parse_events_term **new,
-			     struct parse_events_term *term)
+int parse_events__term_clone(struct parse_events__term **new,
+			     struct parse_events__term *term)
 {
 	return new_term(new, term->type_val, term->type_term, term->config,
 			term->val.str, term->val.num);
@@ -1231,7 +1197,7 @@ int parse_events_term__clone(struct parse_events_term **new,
 
 void parse_events__free_terms(struct list_head *terms)
 {
-	struct parse_events_term *term, *h;
+	struct parse_events__term *term, *h;
 
 	list_for_each_entry_safe(term, h, terms, list)
 		free(term);
diff --git a/trunk/tools/perf/util/parse-events.h b/trunk/tools/perf/util/parse-events.h
index 8a4859315fd9..b7af80b8bdda 100644
--- a/trunk/tools/perf/util/parse-events.h
+++ b/trunk/tools/perf/util/parse-events.h
@@ -29,7 +29,8 @@ const char *event_type(int type);
 
 extern int parse_events_option(const struct option *opt, const char *str,
 			       int unset);
-extern int parse_events(struct perf_evlist *evlist, const char *str);
+extern int parse_events(struct perf_evlist *evlist, const char *str,
+			int unset);
 extern int parse_events_terms(struct list_head *terms, const char *str);
 extern int parse_filter(const struct option *opt, const char *str, int unset);
 
@@ -50,7 +51,7 @@ enum {
 	PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE,
 };
 
-struct parse_events_term {
+struct parse_events__term {
 	char *config;
 	union {
 		char *str;
@@ -61,25 +62,24 @@ struct parse_events_term {
 	struct list_head list;
 };
 
-struct parse_events_evlist {
+struct parse_events_data__events {
 	struct list_head list;
 	int idx;
-	int nr_groups;
 };
 
-struct parse_events_terms {
+struct parse_events_data__terms {
 	struct list_head *terms;
 };
 
-int parse_events__is_hardcoded_term(struct parse_events_term *term);
-int parse_events_term__num(struct parse_events_term **_term,
+int parse_events__is_hardcoded_term(struct parse_events__term *term);
+int parse_events__term_num(struct parse_events__term **_term,
 			   int type_term, char *config, u64 num);
-int parse_events_term__str(struct parse_events_term **_term,
+int parse_events__term_str(struct parse_events__term **_term,
 			   int type_term, char *config, char *str);
-int parse_events_term__sym_hw(struct parse_events_term **term,
+int parse_events__term_sym_hw(struct parse_events__term **term,
 			      char *config, unsigned idx);
-int parse_events_term__clone(struct parse_events_term **new,
-			     struct parse_events_term *term);
+int parse_events__term_clone(struct parse_events__term **new,
+			     struct parse_events__term *term);
 void parse_events__free_terms(struct list_head *terms);
 int parse_events__modifier_event(struct list_head *list, char *str, bool add);
 int parse_events__modifier_group(struct list_head *list, char *event_mod);
diff --git a/trunk/tools/perf/util/parse-events.y b/trunk/tools/perf/util/parse-events.y
index afc44c18dfe1..0f9914ae6bac 100644
--- a/trunk/tools/perf/util/parse-events.y
+++ b/trunk/tools/perf/util/parse-events.y
@@ -1,4 +1,5 @@
 %pure-parser
+%name-prefix "parse_events_"
 %parse-param {void *_data}
 %parse-param {void *scanner}
 %lex-param {void* scanner}
@@ -22,14 +23,6 @@ do { \
 		YYABORT; \
 } while (0)
 
-static inc_group_count(struct list_head *list,
-		       struct parse_events_evlist *data)
-{
-	/* Count groups only have more than 1 members */
-	if (!list_is_last(list->next, list))
-		data->nr_groups++;
-}
-
 %}
 
 %token PE_START_EVENTS PE_START_TERMS
@@ -75,7 +68,7 @@ static inc_group_count(struct list_head *list,
 	char *str;
 	u64 num;
 	struct list_head *head;
-	struct parse_events_term *term;
+	struct parse_events__term *term;
 }
 %%
 
@@ -86,7 +79,7 @@ PE_START_TERMS  start_terms
 
 start_events: groups
 {
-	struct parse_events_evlist *data = _data;
+	struct parse_events_data__events *data = _data;
 
 	parse_events_update_lists($1, &data->list);
 }
@@ -130,7 +123,6 @@ PE_NAME '{' events '}'
 {
 	struct list_head *list = $3;
 
-	inc_group_count(list, _data);
 	parse_events__set_leader($1, list);
 	$$ = list;
 }
@@ -139,7 +131,6 @@ PE_NAME '{' events '}'
 {
 	struct list_head *list = $2;
 
-	inc_group_count(list, _data);
 	parse_events__set_leader(NULL, list);
 	$$ = list;
 }
@@ -195,7 +186,7 @@ event_def: event_pmu |
 event_pmu:
 PE_NAME '/' event_config '/'
 {
-	struct parse_events_evlist *data = _data;
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 
 	ABORT_ON(parse_events_add_pmu(&list, &data->idx, $1, $3));
@@ -211,7 +202,7 @@ PE_VALUE_SYM_SW
 event_legacy_symbol:
 value_sym '/' event_config '/'
 {
-	struct parse_events_evlist *data = _data;
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 	int type = $1 >> 16;
 	int config = $1 & 255;
@@ -224,7 +215,7 @@ value_sym '/' event_config '/'
 |
 value_sym sep_slash_dc
 {
-	struct parse_events_evlist *data = _data;
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 	int type = $1 >> 16;
 	int config = $1 & 255;
@@ -237,7 +228,7 @@ value_sym sep_slash_dc
 event_legacy_cache:
 PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT
 {
-	struct parse_events_evlist *data = _data;
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 
 	ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, $3, $5));
@@ -246,7 +237,7 @@ PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT
 |
 PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT
 {
-	struct parse_events_evlist *data = _data;
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 
 	ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, $3, NULL));
@@ -255,7 +246,7 @@ PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT
 |
 PE_NAME_CACHE_TYPE
 {
-	struct parse_events_evlist *data = _data;
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 
 	ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, NULL, NULL));
@@ -265,7 +256,7 @@ PE_NAME_CACHE_TYPE
 event_legacy_mem:
 PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc
 {
-	struct parse_events_evlist *data = _data;
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 
 	ABORT_ON(parse_events_add_breakpoint(&list, &data->idx,
@@ -275,7 +266,7 @@ PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc
 |
 PE_PREFIX_MEM PE_VALUE sep_dc
 {
-	struct parse_events_evlist *data = _data;
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 
 	ABORT_ON(parse_events_add_breakpoint(&list, &data->idx,
@@ -286,7 +277,7 @@ PE_PREFIX_MEM PE_VALUE sep_dc
 event_legacy_tracepoint:
 PE_NAME ':' PE_NAME
 {
-	struct parse_events_evlist *data = _data;
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 
 	ABORT_ON(parse_events_add_tracepoint(&list, &data->idx, $1, $3));
@@ -296,7 +287,7 @@ PE_NAME ':' PE_NAME
 event_legacy_numeric:
 PE_VALUE ':' PE_VALUE
 {
-	struct parse_events_evlist *data = _data;
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 
 	ABORT_ON(parse_events_add_numeric(&list, &data->idx, (u32)$1, $3, NULL));
@@ -306,7 +297,7 @@ PE_VALUE ':' PE_VALUE
 event_legacy_raw:
 PE_RAW
 {
-	struct parse_events_evlist *data = _data;
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 
 	ABORT_ON(parse_events_add_numeric(&list, &data->idx,
@@ -316,7 +307,7 @@ PE_RAW
 
 start_terms: event_config
 {
-	struct parse_events_terms *data = _data;
+	struct parse_events_data__terms *data = _data;
 	data->terms = $1;
 }
 
@@ -324,7 +315,7 @@ event_config:
 event_config ',' event_term
 {
 	struct list_head *head = $1;
-	struct parse_events_term *term = $3;
+	struct parse_events__term *term = $3;
 
 	ABORT_ON(!head);
 	list_add_tail(&term->list, head);
@@ -334,7 +325,7 @@ event_config ',' event_term
 event_term
 {
 	struct list_head *head = malloc(sizeof(*head));
-	struct parse_events_term *term = $1;
+	struct parse_events__term *term = $1;
 
 	ABORT_ON(!head);
 	INIT_LIST_HEAD(head);
@@ -345,70 +336,70 @@ event_term
 event_term:
 PE_NAME '=' PE_NAME
 {
-	struct parse_events_term *term;
+	struct parse_events__term *term;
 
-	ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
+	ABORT_ON(parse_events__term_str(&term, PARSE_EVENTS__TERM_TYPE_USER,
 					$1, $3));
 	$$ = term;
 }
 |
 PE_NAME '=' PE_VALUE
 {
-	struct parse_events_term *term;
+	struct parse_events__term *term;
 
-	ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+	ABORT_ON(parse_events__term_num(&term, PARSE_EVENTS__TERM_TYPE_USER,
 					$1, $3));
 	$$ = term;
 }
 |
 PE_NAME '=' PE_VALUE_SYM_HW
 {
-	struct parse_events_term *term;
+	struct parse_events__term *term;
 	int config = $3 & 255;
 
-	ABORT_ON(parse_events_term__sym_hw(&term, $1, config));
+	ABORT_ON(parse_events__term_sym_hw(&term, $1, config));
 	$$ = term;
 }
 |
 PE_NAME
 {
-	struct parse_events_term *term;
+	struct parse_events__term *term;
 
-	ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+	ABORT_ON(parse_events__term_num(&term, PARSE_EVENTS__TERM_TYPE_USER,
 					$1, 1));
 	$$ = term;
 }
 |
 PE_VALUE_SYM_HW
 {
-	struct parse_events_term *term;
+	struct parse_events__term *term;
 	int config = $1 & 255;
 
-	ABORT_ON(parse_events_term__sym_hw(&term, NULL, config));
+	ABORT_ON(parse_events__term_sym_hw(&term, NULL, config));
 	$$ = term;
 }
 |
 PE_TERM '=' PE_NAME
 {
-	struct parse_events_term *term;
+	struct parse_events__term *term;
 
-	ABORT_ON(parse_events_term__str(&term, (int)$1, NULL, $3));
+	ABORT_ON(parse_events__term_str(&term, (int)$1, NULL, $3));
 	$$ = term;
 }
 |
 PE_TERM '=' PE_VALUE
 {
-	struct parse_events_term *term;
+	struct parse_events__term *term;
 
-	ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3));
+	ABORT_ON(parse_events__term_num(&term, (int)$1, NULL, $3));
 	$$ = term;
 }
 |
 PE_TERM
 {
-	struct parse_events_term *term;
+	struct parse_events__term *term;
 
-	ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1));
+	ABORT_ON(parse_events__term_num(&term, (int)$1, NULL, 1));
 	$$ = term;
 }
 
diff --git a/trunk/tools/perf/util/pmu.c b/trunk/tools/perf/util/pmu.c
index 4c6f9c490a8d..9bdc60c6f138 100644
--- a/trunk/tools/perf/util/pmu.c
+++ b/trunk/tools/perf/util/pmu.c
@@ -1,3 +1,4 @@
+
 #include <linux/list.h>
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -10,19 +11,6 @@
 #include "parse-events.h"
 #include "cpumap.h"
 
-struct perf_pmu_alias {
-	char *name;
-	struct list_head terms;
-	struct list_head list;
-};
-
-struct perf_pmu_format {
-	char *name;
-	int value;
-	DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS);
-	struct list_head list;
-};
-
 #define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/"
 
 int perf_pmu_parse(struct list_head *list, char *name);
@@ -97,7 +85,7 @@ static int pmu_format(char *name, struct list_head *format)
 
 static int perf_pmu__new_alias(struct list_head *list, char *name, FILE *file)
 {
-	struct perf_pmu_alias *alias;
+	struct perf_pmu__alias *alias;
 	char buf[256];
 	int ret;
 
@@ -184,15 +172,15 @@ static int pmu_aliases(char *name, struct list_head *head)
 	return 0;
 }
 
-static int pmu_alias_terms(struct perf_pmu_alias *alias,
+static int pmu_alias_terms(struct perf_pmu__alias *alias,
 			   struct list_head *terms)
 {
-	struct parse_events_term *term, *clone;
+	struct parse_events__term *term, *clone;
 	LIST_HEAD(list);
 	int ret;
 
 	list_for_each_entry(term, &alias->terms, list) {
-		ret = parse_events_term__clone(&clone, term);
+		ret = parse_events__term_clone(&clone, term);
 		if (ret) {
 			parse_events__free_terms(&list);
 			return ret;
@@ -372,10 +360,10 @@ struct perf_pmu *perf_pmu__find(char *name)
 	return pmu_lookup(name);
 }
 
-static struct perf_pmu_format *
+static struct perf_pmu__format*
 pmu_find_format(struct list_head *formats, char *name)
 {
-	struct perf_pmu_format *format;
+	struct perf_pmu__format *format;
 
 	list_for_each_entry(format, formats, list)
 		if (!strcmp(format->name, name))
@@ -415,9 +403,9 @@ static __u64 pmu_format_value(unsigned long *format, __u64 value)
  */
 static int pmu_config_term(struct list_head *formats,
 			   struct perf_event_attr *attr,
-			   struct parse_events_term *term)
+			   struct parse_events__term *term)
 {
-	struct perf_pmu_format *format;
+	struct perf_pmu__format *format;
 	__u64 *vp;
 
 	/*
@@ -462,7 +450,7 @@ int perf_pmu__config_terms(struct list_head *formats,
 			   struct perf_event_attr *attr,
 			   struct list_head *head_terms)
 {
-	struct parse_events_term *term;
+	struct parse_events__term *term;
 
 	list_for_each_entry(term, head_terms, list)
 		if (pmu_config_term(formats, attr, term))
@@ -483,10 +471,10 @@ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
 	return perf_pmu__config_terms(&pmu->format, attr, head_terms);
 }
 
-static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu,
-					     struct parse_events_term *term)
+static struct perf_pmu__alias *pmu_find_alias(struct perf_pmu *pmu,
+					      struct parse_events__term *term)
 {
-	struct perf_pmu_alias *alias;
+	struct perf_pmu__alias *alias;
 	char *name;
 
 	if (parse_events__is_hardcoded_term(term))
@@ -519,8 +507,8 @@ static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu,
  */
 int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms)
 {
-	struct parse_events_term *term, *h;
-	struct perf_pmu_alias *alias;
+	struct parse_events__term *term, *h;
+	struct perf_pmu__alias *alias;
 	int ret;
 
 	list_for_each_entry_safe(term, h, head_terms, list) {
@@ -539,7 +527,7 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms)
 int perf_pmu__new_format(struct list_head *list, char *name,
 			 int config, unsigned long *bits)
 {
-	struct perf_pmu_format *format;
+	struct perf_pmu__format *format;
 
 	format = zalloc(sizeof(*format));
 	if (!format)
@@ -560,7 +548,7 @@ void perf_pmu__set_format(unsigned long *bits, long from, long to)
 	if (!to)
 		to = from;
 
-	memset(bits, 0, BITS_TO_BYTES(PERF_PMU_FORMAT_BITS));
+	memset(bits, 0, BITS_TO_LONGS(PERF_PMU_FORMAT_BITS));
 	for (b = from; b <= to; b++)
 		set_bit(b, bits);
 }
diff --git a/trunk/tools/perf/util/pmu.h b/trunk/tools/perf/util/pmu.h
index 32fe55b659fa..a313ed76a49a 100644
--- a/trunk/tools/perf/util/pmu.h
+++ b/trunk/tools/perf/util/pmu.h
@@ -12,6 +12,19 @@ enum {
 
 #define PERF_PMU_FORMAT_BITS 64
 
+struct perf_pmu__format {
+	char *name;
+	int value;
+	DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS);
+	struct list_head list;
+};
+
+struct perf_pmu__alias {
+	char *name;
+	struct list_head terms;
+	struct list_head list;
+};
+
 struct perf_pmu {
 	char *name;
 	__u32 type;
@@ -29,7 +42,7 @@ int perf_pmu__config_terms(struct list_head *formats,
 			   struct list_head *head_terms);
 int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms);
 struct list_head *perf_pmu__alias(struct perf_pmu *pmu,
-				  struct list_head *head_terms);
+				struct list_head *head_terms);
 int perf_pmu_wrap(void);
 void perf_pmu_error(struct list_head *list, char *name, char const *msg);
 
diff --git a/trunk/tools/perf/util/pmu.y b/trunk/tools/perf/util/pmu.y
index bfd7e8509869..ec898047ebb9 100644
--- a/trunk/tools/perf/util/pmu.y
+++ b/trunk/tools/perf/util/pmu.y
@@ -1,4 +1,5 @@
 
+%name-prefix "perf_pmu_"
 %parse-param {struct list_head *format}
 %parse-param {char *name}
 
diff --git a/trunk/tools/perf/util/probe-finder.c b/trunk/tools/perf/util/probe-finder.c
index be0329394d56..1daf5c14e751 100644
--- a/trunk/tools/perf/util/probe-finder.c
+++ b/trunk/tools/perf/util/probe-finder.c
@@ -413,12 +413,12 @@ static int convert_variable_type(Dwarf_Die *vr_die,
 				   dwarf_diename(vr_die), dwarf_diename(&type));
 			return -EINVAL;
 		}
-		if (die_get_real_type(&type, &type) == NULL) {
-			pr_warning("Failed to get a type"
-				   " information.\n");
-			return -ENOENT;
-		}
 		if (ret == DW_TAG_pointer_type) {
+			if (die_get_real_type(&type, &type) == NULL) {
+				pr_warning("Failed to get a type"
+					   " information.\n");
+				return -ENOENT;
+			}
 			while (*ref_ptr)
 				ref_ptr = &(*ref_ptr)->next;
 			/* Add new reference with offset +0 */
diff --git a/trunk/tools/perf/util/python-ext-sources b/trunk/tools/perf/util/python-ext-sources
index 64536a993f4a..c40c2d33199e 100644
--- a/trunk/tools/perf/util/python-ext-sources
+++ b/trunk/tools/perf/util/python-ext-sources
@@ -18,5 +18,4 @@ util/cgroup.c
 util/debugfs.c
 util/rblist.c
 util/strlist.c
-util/sysfs.c
 ../../lib/rbtree.c
diff --git a/trunk/tools/perf/util/python.c b/trunk/tools/perf/util/python.c
index 925e0c3e6d91..a2657fd96837 100644
--- a/trunk/tools/perf/util/python.c
+++ b/trunk/tools/perf/util/python.c
@@ -1045,12 +1045,3 @@ PyMODINIT_FUNC initperf(void)
 	if (PyErr_Occurred())
 		PyErr_SetString(PyExc_ImportError, "perf: Init failed!");
 }
-
-/*
- * Dummy, to avoid dragging all the test_attr infrastructure in the python
- * binding.
- */
-void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
-                     int fd, int group_fd, unsigned long flags)
-{
-}
diff --git a/trunk/tools/perf/util/scripting-engines/trace-event-perl.c b/trunk/tools/perf/util/scripting-engines/trace-event-perl.c
index eacec859f299..f80605eb1855 100644
--- a/trunk/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/trunk/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -292,7 +292,6 @@ static void perl_process_tracepoint(union perf_event *perf_event __maybe_unused,
 	ns = nsecs - s * NSECS_PER_SEC;
 
 	scripting_context->event_data = data;
-	scripting_context->pevent = evsel->tp_format->pevent;
 
 	ENTER;
 	SAVETMPS;
diff --git a/trunk/tools/perf/util/scripting-engines/trace-event-python.c b/trunk/tools/perf/util/scripting-engines/trace-event-python.c
index e87aa5d9696b..14683dfca2ee 100644
--- a/trunk/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/trunk/tools/perf/util/scripting-engines/trace-event-python.c
@@ -265,7 +265,6 @@ static void python_process_tracepoint(union perf_event *perf_event
 	ns = nsecs - s * NSECS_PER_SEC;
 
 	scripting_context->event_data = data;
-	scripting_context->pevent = evsel->tp_format->pevent;
 
 	context = PyCObject_FromVoidPtr(scripting_context, NULL);
 
diff --git a/trunk/tools/perf/util/session.c b/trunk/tools/perf/util/session.c
index bd85280bb6e8..ce6f51162386 100644
--- a/trunk/tools/perf/util/session.c
+++ b/trunk/tools/perf/util/session.c
@@ -16,6 +16,7 @@
 #include "cpumap.h"
 #include "event-parse.h"
 #include "perf_regs.h"
+#include "unwind.h"
 #include "vdso.h"
 
 static int perf_session__open(struct perf_session *self, bool force)
@@ -86,12 +87,13 @@ void perf_session__set_id_hdr_size(struct perf_session *session)
 {
 	u16 id_hdr_size = perf_evlist__id_hdr_size(session->evlist);
 
+	session->host_machine.id_hdr_size = id_hdr_size;
 	machines__set_id_hdr_size(&session->machines, id_hdr_size);
 }
 
 int perf_session__create_kernel_maps(struct perf_session *self)
 {
-	int ret = machine__create_kernel_maps(&self->machines.host);
+	int ret = machine__create_kernel_maps(&self->host_machine);
 
 	if (ret >= 0)
 		ret = machines__create_guest_kernel_maps(&self->machines);
@@ -100,7 +102,8 @@ int perf_session__create_kernel_maps(struct perf_session *self)
 
 static void perf_session__destroy_kernel_maps(struct perf_session *self)
 {
-	machines__destroy_kernel_maps(&self->machines);
+	machine__destroy_kernel_maps(&self->host_machine);
+	machines__destroy_guest_kernel_maps(&self->machines);
 }
 
 struct perf_session *perf_session__new(const char *filename, int mode,
@@ -125,11 +128,22 @@ struct perf_session *perf_session__new(const char *filename, int mode,
 		goto out;
 
 	memcpy(self->filename, filename, len);
+	/*
+	 * On 64bit we can mmap the data file in one go. No need for tiny mmap
+	 * slices. On 32bit we use 32MB.
+	 */
+#if BITS_PER_LONG == 64
+	self->mmap_window = ULLONG_MAX;
+#else
+	self->mmap_window = 32 * 1024 * 1024ULL;
+#endif
+	self->machines = RB_ROOT;
 	self->repipe = repipe;
 	INIT_LIST_HEAD(&self->ordered_samples.samples);
 	INIT_LIST_HEAD(&self->ordered_samples.sample_cache);
 	INIT_LIST_HEAD(&self->ordered_samples.to_free);
-	machines__init(&self->machines);
+	machine__init(&self->host_machine, "", HOST_KERNEL_ID);
+	hists__init(&self->hists);
 
 	if (mode == O_RDONLY) {
 		if (perf_session__open(self, force) < 0)
@@ -157,30 +171,37 @@ struct perf_session *perf_session__new(const char *filename, int mode,
 	return NULL;
 }
 
-static void perf_session__delete_dead_threads(struct perf_session *session)
+static void machine__delete_dead_threads(struct machine *machine)
 {
-	machine__delete_dead_threads(&session->machines.host);
+	struct thread *n, *t;
+
+	list_for_each_entry_safe(t, n, &machine->dead_threads, node) {
+		list_del(&t->node);
+		thread__delete(t);
+	}
 }
 
-static void perf_session__delete_threads(struct perf_session *session)
+static void perf_session__delete_dead_threads(struct perf_session *session)
 {
-	machine__delete_threads(&session->machines.host);
+	machine__delete_dead_threads(&session->host_machine);
 }
 
-static void perf_session_env__delete(struct perf_session_env *env)
+static void machine__delete_threads(struct machine *self)
 {
-	free(env->hostname);
-	free(env->os_release);
-	free(env->version);
-	free(env->arch);
-	free(env->cpu_desc);
-	free(env->cpuid);
+	struct rb_node *nd = rb_first(&self->threads);
 
-	free(env->cmdline);
-	free(env->sibling_cores);
-	free(env->sibling_threads);
-	free(env->numa_nodes);
-	free(env->pmu_mappings);
+	while (nd) {
+		struct thread *t = rb_entry(nd, struct thread, rb_node);
+
+		rb_erase(&t->rb_node, &self->threads);
+		nd = rb_next(nd);
+		thread__delete(t);
+	}
+}
+
+static void perf_session__delete_threads(struct perf_session *session)
+{
+	machine__delete_threads(&session->host_machine);
 }
 
 void perf_session__delete(struct perf_session *self)
@@ -188,13 +209,198 @@ void perf_session__delete(struct perf_session *self)
 	perf_session__destroy_kernel_maps(self);
 	perf_session__delete_dead_threads(self);
 	perf_session__delete_threads(self);
-	perf_session_env__delete(&self->header.env);
-	machines__exit(&self->machines);
+	machine__exit(&self->host_machine);
 	close(self->fd);
 	free(self);
 	vdso__exit();
 }
 
+void machine__remove_thread(struct machine *self, struct thread *th)
+{
+	self->last_match = NULL;
+	rb_erase(&th->rb_node, &self->threads);
+	/*
+	 * We may have references to this thread, for instance in some hist_entry
+	 * instances, so just move them to a separate list.
+	 */
+	list_add_tail(&th->node, &self->dead_threads);
+}
+
+static bool symbol__match_parent_regex(struct symbol *sym)
+{
+	if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
+		return 1;
+
+	return 0;
+}
+
+static const u8 cpumodes[] = {
+	PERF_RECORD_MISC_USER,
+	PERF_RECORD_MISC_KERNEL,
+	PERF_RECORD_MISC_GUEST_USER,
+	PERF_RECORD_MISC_GUEST_KERNEL
+};
+#define NCPUMODES (sizeof(cpumodes)/sizeof(u8))
+
+static void ip__resolve_ams(struct machine *self, struct thread *thread,
+			    struct addr_map_symbol *ams,
+			    u64 ip)
+{
+	struct addr_location al;
+	size_t i;
+	u8 m;
+
+	memset(&al, 0, sizeof(al));
+
+	for (i = 0; i < NCPUMODES; i++) {
+		m = cpumodes[i];
+		/*
+		 * We cannot use the header.misc hint to determine whether a
+		 * branch stack address is user, kernel, guest, hypervisor.
+		 * Branches may straddle the kernel/user/hypervisor boundaries.
+		 * Thus, we have to try consecutively until we find a match
+		 * or else, the symbol is unknown
+		 */
+		thread__find_addr_location(thread, self, m, MAP__FUNCTION,
+				ip, &al, NULL);
+		if (al.sym)
+			goto found;
+	}
+found:
+	ams->addr = ip;
+	ams->al_addr = al.addr;
+	ams->sym = al.sym;
+	ams->map = al.map;
+}
+
+struct branch_info *machine__resolve_bstack(struct machine *self,
+					    struct thread *thr,
+					    struct branch_stack *bs)
+{
+	struct branch_info *bi;
+	unsigned int i;
+
+	bi = calloc(bs->nr, sizeof(struct branch_info));
+	if (!bi)
+		return NULL;
+
+	for (i = 0; i < bs->nr; i++) {
+		ip__resolve_ams(self, thr, &bi[i].to, bs->entries[i].to);
+		ip__resolve_ams(self, thr, &bi[i].from, bs->entries[i].from);
+		bi[i].flags = bs->entries[i].flags;
+	}
+	return bi;
+}
+
+static int machine__resolve_callchain_sample(struct machine *machine,
+					     struct thread *thread,
+					     struct ip_callchain *chain,
+					     struct symbol **parent)
+
+{
+	u8 cpumode = PERF_RECORD_MISC_USER;
+	unsigned int i;
+	int err;
+
+	callchain_cursor_reset(&callchain_cursor);
+
+	if (chain->nr > PERF_MAX_STACK_DEPTH) {
+		pr_warning("corrupted callchain. skipping...\n");
+		return 0;
+	}
+
+	for (i = 0; i < chain->nr; i++) {
+		u64 ip;
+		struct addr_location al;
+
+		if (callchain_param.order == ORDER_CALLEE)
+			ip = chain->ips[i];
+		else
+			ip = chain->ips[chain->nr - i - 1];
+
+		if (ip >= PERF_CONTEXT_MAX) {
+			switch (ip) {
+			case PERF_CONTEXT_HV:
+				cpumode = PERF_RECORD_MISC_HYPERVISOR;
+				break;
+			case PERF_CONTEXT_KERNEL:
+				cpumode = PERF_RECORD_MISC_KERNEL;
+				break;
+			case PERF_CONTEXT_USER:
+				cpumode = PERF_RECORD_MISC_USER;
+				break;
+			default:
+				pr_debug("invalid callchain context: "
+					 "%"PRId64"\n", (s64) ip);
+				/*
+				 * It seems the callchain is corrupted.
+				 * Discard all.
+				 */
+				callchain_cursor_reset(&callchain_cursor);
+				return 0;
+			}
+			continue;
+		}
+
+		al.filtered = false;
+		thread__find_addr_location(thread, machine, cpumode,
+					   MAP__FUNCTION, ip, &al, NULL);
+		if (al.sym != NULL) {
+			if (sort__has_parent && !*parent &&
+			    symbol__match_parent_regex(al.sym))
+				*parent = al.sym;
+			if (!symbol_conf.use_callchain)
+				break;
+		}
+
+		err = callchain_cursor_append(&callchain_cursor,
+					      ip, al.map, al.sym);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int unwind_entry(struct unwind_entry *entry, void *arg)
+{
+	struct callchain_cursor *cursor = arg;
+	return callchain_cursor_append(cursor, entry->ip,
+				       entry->map, entry->sym);
+}
+
+int machine__resolve_callchain(struct machine *machine,
+			       struct perf_evsel *evsel,
+			       struct thread *thread,
+			       struct perf_sample *sample,
+			       struct symbol **parent)
+
+{
+	int ret;
+
+	callchain_cursor_reset(&callchain_cursor);
+
+	ret = machine__resolve_callchain_sample(machine, thread,
+						sample->callchain, parent);
+	if (ret)
+		return ret;
+
+	/* Can we do dwarf post unwind? */
+	if (!((evsel->attr.sample_type & PERF_SAMPLE_REGS_USER) &&
+	      (evsel->attr.sample_type & PERF_SAMPLE_STACK_USER)))
+		return 0;
+
+	/* Bail out if nothing was captured. */
+	if ((!sample->user_regs.regs) ||
+	    (!sample->user_stack.size))
+		return 0;
+
+	return unwind__get_entries(unwind_entry, &callchain_cursor, machine,
+				   thread, evsel->attr.sample_regs_user,
+				   sample);
+
+}
+
 static int process_event_synth_tracing_data_stub(union perf_event *event
 						 __maybe_unused,
 						 struct perf_session *session
@@ -821,7 +1027,7 @@ static struct machine *
 		return perf_session__findnew_machine(session, pid);
 	}
 
-	return &session->machines.host;
+	return perf_session__find_host_machine(session);
 }
 
 static int perf_session_deliver_event(struct perf_session *session,
@@ -859,11 +1065,11 @@ static int perf_session_deliver_event(struct perf_session *session,
 	case PERF_RECORD_SAMPLE:
 		dump_sample(evsel, event, sample);
 		if (evsel == NULL) {
-			++session->stats.nr_unknown_id;
+			++session->hists.stats.nr_unknown_id;
 			return 0;
 		}
 		if (machine == NULL) {
-			++session->stats.nr_unprocessable_samples;
+			++session->hists.stats.nr_unprocessable_samples;
 			return 0;
 		}
 		return tool->sample(tool, event, sample, evsel, machine);
@@ -877,7 +1083,7 @@ static int perf_session_deliver_event(struct perf_session *session,
 		return tool->exit(tool, event, sample, machine);
 	case PERF_RECORD_LOST:
 		if (tool->lost == perf_event__process_lost)
-			session->stats.total_lost += event->lost.lost;
+			session->hists.stats.total_lost += event->lost.lost;
 		return tool->lost(tool, event, sample, machine);
 	case PERF_RECORD_READ:
 		return tool->read(tool, event, sample, evsel, machine);
@@ -886,7 +1092,7 @@ static int perf_session_deliver_event(struct perf_session *session,
 	case PERF_RECORD_UNTHROTTLE:
 		return tool->unthrottle(tool, event, sample, machine);
 	default:
-		++session->stats.nr_unknown_events;
+		++session->hists.stats.nr_unknown_events;
 		return -1;
 	}
 }
@@ -900,8 +1106,8 @@ static int perf_session__preprocess_sample(struct perf_session *session,
 
 	if (!ip_callchain__valid(sample->callchain, event)) {
 		pr_debug("call-chain problem with event, skipping it.\n");
-		++session->stats.nr_invalid_chains;
-		session->stats.total_invalid_chains += sample->period;
+		++session->hists.stats.nr_invalid_chains;
+		session->hists.stats.total_invalid_chains += sample->period;
 		return -EINVAL;
 	}
 	return 0;
@@ -959,7 +1165,7 @@ static int perf_session__process_event(struct perf_session *session,
 	if (event->header.type >= PERF_RECORD_HEADER_MAX)
 		return -EINVAL;
 
-	events_stats__inc(&session->stats, event->header.type);
+	hists__inc_nr_events(&session->hists, event->header.type);
 
 	if (event->header.type >= PERF_RECORD_USER_TYPE_START)
 		return perf_session__process_user_event(session, event, tool, file_offset);
@@ -995,7 +1201,7 @@ void perf_event_header__bswap(struct perf_event_header *self)
 
 struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)
 {
-	return machine__findnew_thread(&session->machines.host, pid);
+	return machine__findnew_thread(&session->host_machine, pid);
 }
 
 static struct thread *perf_session__register_idle_thread(struct perf_session *self)
@@ -1014,39 +1220,39 @@ static void perf_session__warn_about_errors(const struct perf_session *session,
 					    const struct perf_tool *tool)
 {
 	if (tool->lost == perf_event__process_lost &&
-	    session->stats.nr_events[PERF_RECORD_LOST] != 0) {
+	    session->hists.stats.nr_events[PERF_RECORD_LOST] != 0) {
 		ui__warning("Processed %d events and lost %d chunks!\n\n"
 			    "Check IO/CPU overload!\n\n",
-			    session->stats.nr_events[0],
-			    session->stats.nr_events[PERF_RECORD_LOST]);
+			    session->hists.stats.nr_events[0],
+			    session->hists.stats.nr_events[PERF_RECORD_LOST]);
 	}
 
-	if (session->stats.nr_unknown_events != 0) {
+	if (session->hists.stats.nr_unknown_events != 0) {
 		ui__warning("Found %u unknown events!\n\n"
 			    "Is this an older tool processing a perf.data "
 			    "file generated by a more recent tool?\n\n"
 			    "If that is not the case, consider "
 			    "reporting to linux-kernel@vger.kernel.org.\n\n",
-			    session->stats.nr_unknown_events);
+			    session->hists.stats.nr_unknown_events);
 	}
 
-	if (session->stats.nr_unknown_id != 0) {
+	if (session->hists.stats.nr_unknown_id != 0) {
 		ui__warning("%u samples with id not present in the header\n",
-			    session->stats.nr_unknown_id);
+			    session->hists.stats.nr_unknown_id);
 	}
 
- 	if (session->stats.nr_invalid_chains != 0) {
+ 	if (session->hists.stats.nr_invalid_chains != 0) {
  		ui__warning("Found invalid callchains!\n\n"
  			    "%u out of %u events were discarded for this reason.\n\n"
  			    "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
- 			    session->stats.nr_invalid_chains,
- 			    session->stats.nr_events[PERF_RECORD_SAMPLE]);
+ 			    session->hists.stats.nr_invalid_chains,
+ 			    session->hists.stats.nr_events[PERF_RECORD_SAMPLE]);
  	}
 
-	if (session->stats.nr_unprocessable_samples != 0) {
+	if (session->hists.stats.nr_unprocessable_samples != 0) {
 		ui__warning("%u unprocessable samples recorded.\n"
 			    "Do you have a KVM guest running and not using 'perf kvm'?\n",
-			    session->stats.nr_unprocessable_samples);
+			    session->hists.stats.nr_unprocessable_samples);
 	}
 }
 
@@ -1163,18 +1369,6 @@ fetch_mmaped_event(struct perf_session *session,
 	return event;
 }
 
-/*
- * On 64bit we can mmap the data file in one go. No need for tiny mmap
- * slices. On 32bit we use 32MB.
- */
-#if BITS_PER_LONG == 64
-#define MMAP_SIZE ULLONG_MAX
-#define NUM_MMAPS 1
-#else
-#define MMAP_SIZE (32 * 1024 * 1024ULL)
-#define NUM_MMAPS 128
-#endif
-
 int __perf_session__process_events(struct perf_session *session,
 				   u64 data_offset, u64 data_size,
 				   u64 file_size, struct perf_tool *tool)
@@ -1182,7 +1376,7 @@ int __perf_session__process_events(struct perf_session *session,
 	u64 head, page_offset, file_offset, file_pos, progress_next;
 	int err, mmap_prot, mmap_flags, map_idx = 0;
 	size_t	mmap_size;
-	char *buf, *mmaps[NUM_MMAPS];
+	char *buf, *mmaps[8];
 	union perf_event *event;
 	uint32_t size;
 
@@ -1197,7 +1391,7 @@ int __perf_session__process_events(struct perf_session *session,
 
 	progress_next = file_size / 16;
 
-	mmap_size = MMAP_SIZE;
+	mmap_size = session->mmap_window;
 	if (mmap_size > file_size)
 		mmap_size = file_size;
 
@@ -1332,13 +1526,16 @@ int maps__set_kallsyms_ref_reloc_sym(struct map **maps,
 
 size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp)
 {
-	return machines__fprintf_dsos(&self->machines, fp);
+	return __dsos__fprintf(&self->host_machine.kernel_dsos, fp) +
+	       __dsos__fprintf(&self->host_machine.user_dsos, fp) +
+	       machines__fprintf_dsos(&self->machines, fp);
 }
 
 size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp,
-					  bool (skip)(struct dso *dso, int parm), int parm)
+					  bool with_hits)
 {
-	return machines__fprintf_dsos_buildid(&self->machines, fp, skip, parm);
+	size_t ret = machine__fprintf_dsos_buildid(&self->host_machine, fp, with_hits);
+	return ret + machines__fprintf_dsos_buildid(&self->machines, fp, with_hits);
 }
 
 size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
@@ -1346,11 +1543,11 @@ size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
 	struct perf_evsel *pos;
 	size_t ret = fprintf(fp, "Aggregated stats:\n");
 
-	ret += events_stats__fprintf(&session->stats, fp);
+	ret += hists__fprintf_nr_events(&session->hists, fp);
 
 	list_for_each_entry(pos, &session->evlist->entries, node) {
 		ret += fprintf(fp, "%s stats:\n", perf_evsel__name(pos));
-		ret += events_stats__fprintf(&pos->hists.stats, fp);
+		ret += hists__fprintf_nr_events(&pos->hists, fp);
 	}
 
 	return ret;
@@ -1362,7 +1559,7 @@ size_t perf_session__fprintf(struct perf_session *session, FILE *fp)
 	 * FIXME: Here we have to actually print all the machines in this
 	 * session, not just the host...
 	 */
-	return machine__fprintf(&session->machines.host, fp);
+	return machine__fprintf(&session->host_machine, fp);
 }
 
 void perf_session__remove_thread(struct perf_session *session,
@@ -1371,10 +1568,10 @@ void perf_session__remove_thread(struct perf_session *session,
 	/*
 	 * FIXME: This one makes no sense, we need to remove the thread from
 	 * the machine it belongs to, perf_session can have many machines, so
-	 * doing it always on ->machines.host is wrong.  Fix when auditing all
+	 * doing it always on ->host_machine is wrong.  Fix when auditing all
 	 * the 'perf kvm' code.
 	 */
-	machine__remove_thread(&session->machines.host, th);
+	machine__remove_thread(&session->host_machine, th);
 }
 
 struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
diff --git a/trunk/tools/perf/util/session.h b/trunk/tools/perf/util/session.h
index b5c0847edfa9..cea133a6bdf1 100644
--- a/trunk/tools/perf/util/session.h
+++ b/trunk/tools/perf/util/session.h
@@ -30,10 +30,16 @@ struct ordered_samples {
 struct perf_session {
 	struct perf_header	header;
 	unsigned long		size;
-	struct machines		machines;
+	unsigned long		mmap_window;
+	struct machine		host_machine;
+	struct rb_root		machines;
 	struct perf_evlist	*evlist;
 	struct pevent		*pevent;
-	struct events_stats	stats;
+	/*
+	 * FIXME: Need to split this up further, we need global
+	 *	  stats + per event stats.
+	 */
+	struct hists		hists;
 	int			fd;
 	bool			fd_pipe;
 	bool			repipe;
@@ -48,7 +54,7 @@ struct perf_tool;
 struct perf_session *perf_session__new(const char *filename, int mode,
 				       bool force, bool repipe,
 				       struct perf_tool *tool);
-void perf_session__delete(struct perf_session *session);
+void perf_session__delete(struct perf_session *self);
 
 void perf_event_header__bswap(struct perf_event_header *self);
 
@@ -74,25 +80,44 @@ int perf_session__create_kernel_maps(struct perf_session *self);
 void perf_session__set_id_hdr_size(struct perf_session *session);
 void perf_session__remove_thread(struct perf_session *self, struct thread *th);
 
+static inline
+struct machine *perf_session__find_host_machine(struct perf_session *self)
+{
+	return &self->host_machine;
+}
+
 static inline
 struct machine *perf_session__find_machine(struct perf_session *self, pid_t pid)
 {
+	if (pid == HOST_KERNEL_ID)
+		return &self->host_machine;
 	return machines__find(&self->machines, pid);
 }
 
 static inline
 struct machine *perf_session__findnew_machine(struct perf_session *self, pid_t pid)
 {
+	if (pid == HOST_KERNEL_ID)
+		return &self->host_machine;
 	return machines__findnew(&self->machines, pid);
 }
 
+static inline
+void perf_session__process_machines(struct perf_session *self,
+				    struct perf_tool *tool,
+				    machine__process_t process)
+{
+	process(&self->host_machine, tool);
+	return machines__process(&self->machines, process, tool);
+}
+
 struct thread *perf_session__findnew(struct perf_session *self, pid_t pid);
 size_t perf_session__fprintf(struct perf_session *self, FILE *fp);
 
 size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp);
 
-size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp,
-					  bool (fn)(struct dso *dso, int parm), int parm);
+size_t perf_session__fprintf_dsos_buildid(struct perf_session *self,
+					  FILE *fp, bool with_hits);
 
 size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp);
 
diff --git a/trunk/tools/perf/util/sort.c b/trunk/tools/perf/util/sort.c
index d41926cb9e3f..cfd1c0feb32d 100644
--- a/trunk/tools/perf/util/sort.c
+++ b/trunk/tools/perf/util/sort.c
@@ -60,7 +60,7 @@ sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
 static int hist_entry__thread_snprintf(struct hist_entry *self, char *bf,
 				       size_t size, unsigned int width)
 {
-	return repsep_snprintf(bf, size, "%*s:%5d", width - 6,
+	return repsep_snprintf(bf, size, "%*s:%5d", width,
 			      self->thread->comm ?: "", self->thread->pid);
 }
 
@@ -97,16 +97,6 @@ static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf,
 	return repsep_snprintf(bf, size, "%*s", width, self->thread->comm);
 }
 
-struct sort_entry sort_comm = {
-	.se_header	= "Command",
-	.se_cmp		= sort__comm_cmp,
-	.se_collapse	= sort__comm_collapse,
-	.se_snprintf	= hist_entry__comm_snprintf,
-	.se_width_idx	= HISTC_COMM,
-};
-
-/* --sort dso */
-
 static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r)
 {
 	struct dso *dso_l = map_l ? map_l->dso : NULL;
@@ -127,12 +117,40 @@ static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r)
 	return strcmp(dso_name_l, dso_name_r);
 }
 
+struct sort_entry sort_comm = {
+	.se_header	= "Command",
+	.se_cmp		= sort__comm_cmp,
+	.se_collapse	= sort__comm_collapse,
+	.se_snprintf	= hist_entry__comm_snprintf,
+	.se_width_idx	= HISTC_COMM,
+};
+
+/* --sort dso */
+
 static int64_t
 sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
 {
 	return _sort__dso_cmp(left->ms.map, right->ms.map);
 }
 
+
+static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r,
+			      u64 ip_l, u64 ip_r)
+{
+	if (!sym_l || !sym_r)
+		return cmp_null(sym_l, sym_r);
+
+	if (sym_l == sym_r)
+		return 0;
+
+	if (sym_l)
+		ip_l = sym_l->start;
+	if (sym_r)
+		ip_r = sym_r->start;
+
+	return (int64_t)(ip_r - ip_l);
+}
+
 static int _hist_entry__dso_snprintf(struct map *map, char *bf,
 				     size_t size, unsigned int width)
 {
@@ -151,43 +169,9 @@ static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf,
 	return _hist_entry__dso_snprintf(self->ms.map, bf, size, width);
 }
 
-struct sort_entry sort_dso = {
-	.se_header	= "Shared Object",
-	.se_cmp		= sort__dso_cmp,
-	.se_snprintf	= hist_entry__dso_snprintf,
-	.se_width_idx	= HISTC_DSO,
-};
-
-/* --sort symbol */
-
-static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
-{
-	u64 ip_l, ip_r;
-
-	if (!sym_l || !sym_r)
-		return cmp_null(sym_l, sym_r);
-
-	if (sym_l == sym_r)
-		return 0;
-
-	ip_l = sym_l->start;
-	ip_r = sym_r->start;
-
-	return (int64_t)(ip_r - ip_l);
-}
-
-static int64_t
-sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-	if (!left->ms.sym && !right->ms.sym)
-		return right->level - left->level;
-
-	return _sort__sym_cmp(left->ms.sym, right->ms.sym);
-}
-
 static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
 				     u64 ip, char level, char *bf, size_t size,
-				     unsigned int width)
+				     unsigned int width __maybe_unused)
 {
 	size_t ret = 0;
 
@@ -213,13 +197,43 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
 	return ret;
 }
 
+
+struct sort_entry sort_dso = {
+	.se_header	= "Shared Object",
+	.se_cmp		= sort__dso_cmp,
+	.se_snprintf	= hist_entry__dso_snprintf,
+	.se_width_idx	= HISTC_DSO,
+};
+
 static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
-				    size_t size, unsigned int width)
+				    size_t size,
+				    unsigned int width __maybe_unused)
 {
 	return _hist_entry__sym_snprintf(self->ms.map, self->ms.sym, self->ip,
 					 self->level, bf, size, width);
 }
 
+/* --sort symbol */
+static int64_t
+sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	u64 ip_l, ip_r;
+
+	if (!left->ms.sym && !right->ms.sym)
+		return right->level - left->level;
+
+	if (!left->ms.sym || !right->ms.sym)
+		return cmp_null(left->ms.sym, right->ms.sym);
+
+	if (left->ms.sym == right->ms.sym)
+		return 0;
+
+	ip_l = left->ms.sym->start;
+	ip_r = right->ms.sym->start;
+
+	return _sort__sym_cmp(left->ms.sym, right->ms.sym, ip_l, ip_r);
+}
+
 struct sort_entry sort_sym = {
 	.se_header	= "Symbol",
 	.se_cmp		= sort__sym_cmp,
@@ -239,7 +253,7 @@ static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf,
 					size_t size,
 					unsigned int width __maybe_unused)
 {
-	FILE *fp = NULL;
+	FILE *fp;
 	char cmd[PATH_MAX + 2], *path = self->srcline, *nl;
 	size_t line_len;
 
@@ -260,6 +274,7 @@ static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf,
 
 	if (getline(&path, &line_len, fp) < 0 || !line_len)
 		goto out_ip;
+	fclose(fp);
 	self->srcline = strdup(path);
 	if (self->srcline == NULL)
 		goto out_ip;
@@ -269,12 +284,8 @@ static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf,
 		*nl = '\0';
 	path = self->srcline;
 out_path:
-	if (fp)
-		pclose(fp);
 	return repsep_snprintf(bf, size, "%s", path);
 out_ip:
-	if (fp)
-		pclose(fp);
 	return repsep_snprintf(bf, size, "%-#*llx", BITS_PER_LONG / 4, self->ip);
 }
 
@@ -324,7 +335,7 @@ sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right)
 static int hist_entry__cpu_snprintf(struct hist_entry *self, char *bf,
 				       size_t size, unsigned int width)
 {
-	return repsep_snprintf(bf, size, "%*d", width, self->cpu);
+	return repsep_snprintf(bf, size, "%-*d", width, self->cpu);
 }
 
 struct sort_entry sort_cpu = {
@@ -334,8 +345,6 @@ struct sort_entry sort_cpu = {
 	.se_width_idx	= HISTC_CPU,
 };
 
-/* sort keys for branch stacks */
-
 static int64_t
 sort__dso_from_cmp(struct hist_entry *left, struct hist_entry *right)
 {
@@ -350,6 +359,13 @@ static int hist_entry__dso_from_snprintf(struct hist_entry *self, char *bf,
 					 bf, size, width);
 }
 
+struct sort_entry sort_dso_from = {
+	.se_header	= "Source Shared Object",
+	.se_cmp		= sort__dso_from_cmp,
+	.se_snprintf	= hist_entry__dso_from_snprintf,
+	.se_width_idx	= HISTC_DSO_FROM,
+};
+
 static int64_t
 sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right)
 {
@@ -373,7 +389,8 @@ sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right)
 	if (!from_l->sym && !from_r->sym)
 		return right->level - left->level;
 
-	return _sort__sym_cmp(from_l->sym, from_r->sym);
+	return _sort__sym_cmp(from_l->sym, from_r->sym, from_l->addr,
+			     from_r->addr);
 }
 
 static int64_t
@@ -385,11 +402,12 @@ sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right)
 	if (!to_l->sym && !to_r->sym)
 		return right->level - left->level;
 
-	return _sort__sym_cmp(to_l->sym, to_r->sym);
+	return _sort__sym_cmp(to_l->sym, to_r->sym, to_l->addr, to_r->addr);
 }
 
 static int hist_entry__sym_from_snprintf(struct hist_entry *self, char *bf,
-					 size_t size, unsigned int width)
+					size_t size,
+					unsigned int width __maybe_unused)
 {
 	struct addr_map_symbol *from = &self->branch_info->from;
 	return _hist_entry__sym_snprintf(from->map, from->sym, from->addr,
@@ -398,7 +416,8 @@ static int hist_entry__sym_from_snprintf(struct hist_entry *self, char *bf,
 }
 
 static int hist_entry__sym_to_snprintf(struct hist_entry *self, char *bf,
-				       size_t size, unsigned int width)
+				       size_t size,
+				       unsigned int width __maybe_unused)
 {
 	struct addr_map_symbol *to = &self->branch_info->to;
 	return _hist_entry__sym_snprintf(to->map, to->sym, to->addr,
@@ -406,13 +425,6 @@ static int hist_entry__sym_to_snprintf(struct hist_entry *self, char *bf,
 
 }
 
-struct sort_entry sort_dso_from = {
-	.se_header	= "Source Shared Object",
-	.se_cmp		= sort__dso_from_cmp,
-	.se_snprintf	= hist_entry__dso_from_snprintf,
-	.se_width_idx	= HISTC_DSO_FROM,
-};
-
 struct sort_entry sort_dso_to = {
 	.se_header	= "Target Shared Object",
 	.se_cmp		= sort__dso_to_cmp,
@@ -472,40 +484,30 @@ struct sort_dimension {
 
 #define DIM(d, n, func) [d] = { .name = n, .entry = &(func) }
 
-static struct sort_dimension common_sort_dimensions[] = {
+static struct sort_dimension sort_dimensions[] = {
 	DIM(SORT_PID, "pid", sort_thread),
 	DIM(SORT_COMM, "comm", sort_comm),
 	DIM(SORT_DSO, "dso", sort_dso),
+	DIM(SORT_DSO_FROM, "dso_from", sort_dso_from),
+	DIM(SORT_DSO_TO, "dso_to", sort_dso_to),
 	DIM(SORT_SYM, "symbol", sort_sym),
+	DIM(SORT_SYM_TO, "symbol_from", sort_sym_from),
+	DIM(SORT_SYM_FROM, "symbol_to", sort_sym_to),
 	DIM(SORT_PARENT, "parent", sort_parent),
 	DIM(SORT_CPU, "cpu", sort_cpu),
-	DIM(SORT_SRCLINE, "srcline", sort_srcline),
-};
-
-#undef DIM
-
-#define DIM(d, n, func) [d - __SORT_BRANCH_STACK] = { .name = n, .entry = &(func) }
-
-static struct sort_dimension bstack_sort_dimensions[] = {
-	DIM(SORT_DSO_FROM, "dso_from", sort_dso_from),
-	DIM(SORT_DSO_TO, "dso_to", sort_dso_to),
-	DIM(SORT_SYM_FROM, "symbol_from", sort_sym_from),
-	DIM(SORT_SYM_TO, "symbol_to", sort_sym_to),
 	DIM(SORT_MISPREDICT, "mispredict", sort_mispredict),
+	DIM(SORT_SRCLINE, "srcline", sort_srcline),
 };
 
-#undef DIM
-
 int sort_dimension__add(const char *tok)
 {
 	unsigned int i;
 
-	for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
-		struct sort_dimension *sd = &common_sort_dimensions[i];
+	for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) {
+		struct sort_dimension *sd = &sort_dimensions[i];
 
 		if (strncasecmp(tok, sd->name, strlen(tok)))
 			continue;
-
 		if (sd->entry == &sort_parent) {
 			int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
 			if (ret) {
@@ -516,7 +518,9 @@ int sort_dimension__add(const char *tok)
 				return -EINVAL;
 			}
 			sort__has_parent = 1;
-		} else if (sd->entry == &sort_sym) {
+		} else if (sd->entry == &sort_sym ||
+			   sd->entry == &sort_sym_from ||
+			   sd->entry == &sort_sym_to) {
 			sort__has_sym = 1;
 		}
 
@@ -526,69 +530,52 @@ int sort_dimension__add(const char *tok)
 		if (sd->entry->se_collapse)
 			sort__need_collapse = 1;
 
-		if (list_empty(&hist_entry__sort_list))
-			sort__first_dimension = i;
-
-		list_add_tail(&sd->entry->list, &hist_entry__sort_list);
-		sd->taken = 1;
-
-		return 0;
-	}
-
-	for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
-		struct sort_dimension *sd = &bstack_sort_dimensions[i];
-
-		if (strncasecmp(tok, sd->name, strlen(tok)))
-			continue;
-
-		if (sort__branch_mode != 1)
-			return -EINVAL;
-
-		if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to)
-			sort__has_sym = 1;
-
-		if (sd->taken)
-			return 0;
-
-		if (sd->entry->se_collapse)
-			sort__need_collapse = 1;
-
-		if (list_empty(&hist_entry__sort_list))
-			sort__first_dimension = i + __SORT_BRANCH_STACK;
+		if (list_empty(&hist_entry__sort_list)) {
+			if (!strcmp(sd->name, "pid"))
+				sort__first_dimension = SORT_PID;
+			else if (!strcmp(sd->name, "comm"))
+				sort__first_dimension = SORT_COMM;
+			else if (!strcmp(sd->name, "dso"))
+				sort__first_dimension = SORT_DSO;
+			else if (!strcmp(sd->name, "symbol"))
+				sort__first_dimension = SORT_SYM;
+			else if (!strcmp(sd->name, "parent"))
+				sort__first_dimension = SORT_PARENT;
+			else if (!strcmp(sd->name, "cpu"))
+				sort__first_dimension = SORT_CPU;
+			else if (!strcmp(sd->name, "symbol_from"))
+				sort__first_dimension = SORT_SYM_FROM;
+			else if (!strcmp(sd->name, "symbol_to"))
+				sort__first_dimension = SORT_SYM_TO;
+			else if (!strcmp(sd->name, "dso_from"))
+				sort__first_dimension = SORT_DSO_FROM;
+			else if (!strcmp(sd->name, "dso_to"))
+				sort__first_dimension = SORT_DSO_TO;
+			else if (!strcmp(sd->name, "mispredict"))
+				sort__first_dimension = SORT_MISPREDICT;
+		}
 
 		list_add_tail(&sd->entry->list, &hist_entry__sort_list);
 		sd->taken = 1;
 
 		return 0;
 	}
-
 	return -ESRCH;
 }
 
-int setup_sorting(void)
+void setup_sorting(const char * const usagestr[], const struct option *opts)
 {
 	char *tmp, *tok, *str = strdup(sort_order);
-	int ret = 0;
-
-	if (str == NULL) {
-		error("Not enough memory to setup sort keys");
-		return -ENOMEM;
-	}
 
 	for (tok = strtok_r(str, ", ", &tmp);
 			tok; tok = strtok_r(NULL, ", ", &tmp)) {
-		ret = sort_dimension__add(tok);
-		if (ret == -EINVAL) {
-			error("Invalid --sort key: `%s'", tok);
-			break;
-		} else if (ret == -ESRCH) {
+		if (sort_dimension__add(tok) < 0) {
 			error("Unknown --sort key: `%s'", tok);
-			break;
+			usage_with_options(usagestr, opts);
 		}
 	}
 
 	free(str);
-	return ret;
 }
 
 void sort_entry__setup_elide(struct sort_entry *self, struct strlist *list,
diff --git a/trunk/tools/perf/util/sort.h b/trunk/tools/perf/util/sort.h
index b13e56f6ccbe..b4e8c3ba559d 100644
--- a/trunk/tools/perf/util/sort.h
+++ b/trunk/tools/perf/util/sort.h
@@ -55,6 +55,9 @@ struct he_stat {
 struct hist_entry_diff {
 	bool	computed;
 
+	/* PERF_HPP__DISPL */
+	int	displacement;
+
 	/* PERF_HPP__DELTA */
 	double	period_ratio_delta;
 
@@ -115,29 +118,25 @@ static inline struct hist_entry *hist_entry__next_pair(struct hist_entry *he)
 	return NULL;
 }
 
-static inline void hist_entry__add_pair(struct hist_entry *he,
+static inline void hist__entry_add_pair(struct hist_entry *he,
 					struct hist_entry *pair)
 {
 	list_add_tail(&he->pairs.head, &pair->pairs.node);
 }
 
 enum sort_type {
-	/* common sort keys */
 	SORT_PID,
 	SORT_COMM,
 	SORT_DSO,
 	SORT_SYM,
 	SORT_PARENT,
 	SORT_CPU,
-	SORT_SRCLINE,
-
-	/* branch stack specific sort keys */
-	__SORT_BRANCH_STACK,
-	SORT_DSO_FROM = __SORT_BRANCH_STACK,
+	SORT_DSO_FROM,
 	SORT_DSO_TO,
 	SORT_SYM_FROM,
 	SORT_SYM_TO,
 	SORT_MISPREDICT,
+	SORT_SRCLINE,
 };
 
 /*
@@ -160,7 +159,7 @@ struct sort_entry {
 extern struct sort_entry sort_thread;
 extern struct list_head hist_entry__sort_list;
 
-int setup_sorting(void);
+void setup_sorting(const char * const usagestr[], const struct option *opts);
 extern int sort_dimension__add(const char *);
 void sort_entry__setup_elide(struct sort_entry *self, struct strlist *list,
 			     const char *list_name, FILE *fp);
diff --git a/trunk/tools/perf/util/string.c b/trunk/tools/perf/util/string.c
index 29c7b2cb2521..346707df04b9 100644
--- a/trunk/tools/perf/util/string.c
+++ b/trunk/tools/perf/util/string.c
@@ -331,24 +331,6 @@ char *strxfrchar(char *s, char from, char to)
 	return s;
 }
 
-/**
- * ltrim - Removes leading whitespace from @s.
- * @s: The string to be stripped.
- *
- * Return pointer to the first non-whitespace character in @s.
- */
-char *ltrim(char *s)
-{
-	int len = strlen(s);
-
-	while (len && isspace(*s)) {
-		len--;
-		s++;
-	}
-
-	return s;
-}
-
 /**
  * rtrim - Removes trailing whitespace from @s.
  * @s: The string to be stripped.
diff --git a/trunk/tools/perf/util/strlist.c b/trunk/tools/perf/util/strlist.c
index 55433aa42c8f..155d8b7078a7 100644
--- a/trunk/tools/perf/util/strlist.c
+++ b/trunk/tools/perf/util/strlist.c
@@ -35,11 +35,11 @@ struct rb_node *strlist__node_new(struct rblist *rblist, const void *entry)
 	return NULL;
 }
 
-static void str_node__delete(struct str_node *snode, bool dupstr)
+static void str_node__delete(struct str_node *self, bool dupstr)
 {
 	if (dupstr)
-		free((void *)snode->s);
-	free(snode);
+		free((void *)self->s);
+	free(self);
 }
 
 static
@@ -59,12 +59,12 @@ static int strlist__node_cmp(struct rb_node *rb_node, const void *entry)
 	return strcmp(snode->s, str);
 }
 
-int strlist__add(struct strlist *slist, const char *new_entry)
+int strlist__add(struct strlist *self, const char *new_entry)
 {
-	return rblist__add_node(&slist->rblist, new_entry);
+	return rblist__add_node(&self->rblist, new_entry);
 }
 
-int strlist__load(struct strlist *slist, const char *filename)
+int strlist__load(struct strlist *self, const char *filename)
 {
 	char entry[1024];
 	int err;
@@ -80,7 +80,7 @@ int strlist__load(struct strlist *slist, const char *filename)
 			continue;
 		entry[len - 1] = '\0';
 
-		err = strlist__add(slist, entry);
+		err = strlist__add(self, entry);
 		if (err != 0)
 			goto out;
 	}
@@ -107,56 +107,56 @@ struct str_node *strlist__find(struct strlist *slist, const char *entry)
 	return snode;
 }
 
-static int strlist__parse_list_entry(struct strlist *slist, const char *s)
+static int strlist__parse_list_entry(struct strlist *self, const char *s)
 {
 	if (strncmp(s, "file://", 7) == 0)
-		return strlist__load(slist, s + 7);
+		return strlist__load(self, s + 7);
 
-	return strlist__add(slist, s);
+	return strlist__add(self, s);
 }
 
-int strlist__parse_list(struct strlist *slist, const char *s)
+int strlist__parse_list(struct strlist *self, const char *s)
 {
 	char *sep;
 	int err;
 
 	while ((sep = strchr(s, ',')) != NULL) {
 		*sep = '\0';
-		err = strlist__parse_list_entry(slist, s);
+		err = strlist__parse_list_entry(self, s);
 		*sep = ',';
 		if (err != 0)
 			return err;
 		s = sep + 1;
 	}
 
-	return *s ? strlist__parse_list_entry(slist, s) : 0;
+	return *s ? strlist__parse_list_entry(self, s) : 0;
 }
 
-struct strlist *strlist__new(bool dupstr, const char *list)
+struct strlist *strlist__new(bool dupstr, const char *slist)
 {
-	struct strlist *slist = malloc(sizeof(*slist));
+	struct strlist *self = malloc(sizeof(*self));
 
-	if (slist != NULL) {
-		rblist__init(&slist->rblist);
-		slist->rblist.node_cmp    = strlist__node_cmp;
-		slist->rblist.node_new    = strlist__node_new;
-		slist->rblist.node_delete = strlist__node_delete;
+	if (self != NULL) {
+		rblist__init(&self->rblist);
+		self->rblist.node_cmp    = strlist__node_cmp;
+		self->rblist.node_new    = strlist__node_new;
+		self->rblist.node_delete = strlist__node_delete;
 
-		slist->dupstr	 = dupstr;
-		if (slist && strlist__parse_list(slist, list) != 0)
+		self->dupstr	 = dupstr;
+		if (slist && strlist__parse_list(self, slist) != 0)
 			goto out_error;
 	}
 
-	return slist;
+	return self;
 out_error:
-	free(slist);
+	free(self);
 	return NULL;
 }
 
-void strlist__delete(struct strlist *slist)
+void strlist__delete(struct strlist *self)
 {
-	if (slist != NULL)
-		rblist__delete(&slist->rblist);
+	if (self != NULL)
+		rblist__delete(&self->rblist);
 }
 
 struct str_node *strlist__entry(const struct strlist *slist, unsigned int idx)
diff --git a/trunk/tools/perf/util/strlist.h b/trunk/tools/perf/util/strlist.h
index 5c7f87069d9c..dd9f922ec67c 100644
--- a/trunk/tools/perf/util/strlist.h
+++ b/trunk/tools/perf/util/strlist.h
@@ -17,34 +17,34 @@ struct strlist {
 };
 
 struct strlist *strlist__new(bool dupstr, const char *slist);
-void strlist__delete(struct strlist *slist);
+void strlist__delete(struct strlist *self);
 
-void strlist__remove(struct strlist *slist, struct str_node *sn);
-int strlist__load(struct strlist *slist, const char *filename);
-int strlist__add(struct strlist *slist, const char *str);
+void strlist__remove(struct strlist *self, struct str_node *sn);
+int strlist__load(struct strlist *self, const char *filename);
+int strlist__add(struct strlist *self, const char *str);
 
-struct str_node *strlist__entry(const struct strlist *slist, unsigned int idx);
-struct str_node *strlist__find(struct strlist *slist, const char *entry);
+struct str_node *strlist__entry(const struct strlist *self, unsigned int idx);
+struct str_node *strlist__find(struct strlist *self, const char *entry);
 
-static inline bool strlist__has_entry(struct strlist *slist, const char *entry)
+static inline bool strlist__has_entry(struct strlist *self, const char *entry)
 {
-	return strlist__find(slist, entry) != NULL;
+	return strlist__find(self, entry) != NULL;
 }
 
-static inline bool strlist__empty(const struct strlist *slist)
+static inline bool strlist__empty(const struct strlist *self)
 {
-	return rblist__empty(&slist->rblist);
+	return rblist__empty(&self->rblist);
 }
 
-static inline unsigned int strlist__nr_entries(const struct strlist *slist)
+static inline unsigned int strlist__nr_entries(const struct strlist *self)
 {
-	return rblist__nr_entries(&slist->rblist);
+	return rblist__nr_entries(&self->rblist);
 }
 
 /* For strlist iteration */
-static inline struct str_node *strlist__first(struct strlist *slist)
+static inline struct str_node *strlist__first(struct strlist *self)
 {
-	struct rb_node *rn = rb_first(&slist->rblist.entries);
+	struct rb_node *rn = rb_first(&self->rblist.entries);
 	return rn ? rb_entry(rn, struct str_node, rb_node) : NULL;
 }
 static inline struct str_node *strlist__next(struct str_node *sn)
@@ -59,21 +59,21 @@ static inline struct str_node *strlist__next(struct str_node *sn)
 /**
  * strlist_for_each      - iterate over a strlist
  * @pos:	the &struct str_node to use as a loop cursor.
- * @slist:	the &struct strlist for loop.
+ * @self:	the &struct strlist for loop.
  */
-#define strlist__for_each(pos, slist)	\
-	for (pos = strlist__first(slist); pos; pos = strlist__next(pos))
+#define strlist__for_each(pos, self)	\
+	for (pos = strlist__first(self); pos; pos = strlist__next(pos))
 
 /**
  * strlist_for_each_safe - iterate over a strlist safe against removal of
  *                         str_node
  * @pos:	the &struct str_node to use as a loop cursor.
  * @n:		another &struct str_node to use as temporary storage.
- * @slist:	the &struct strlist for loop.
+ * @self:	the &struct strlist for loop.
  */
-#define strlist__for_each_safe(pos, n, slist)	\
-	for (pos = strlist__first(slist), n = strlist__next(pos); pos;\
+#define strlist__for_each_safe(pos, n, self)	\
+	for (pos = strlist__first(self), n = strlist__next(pos); pos;\
 	     pos = n, n = strlist__next(n))
 
-int strlist__parse_list(struct strlist *slist, const char *s);
+int strlist__parse_list(struct strlist *self, const char *s);
 #endif /* __PERF_STRLIST_H */
diff --git a/trunk/tools/perf/util/symbol-elf.c b/trunk/tools/perf/util/symbol-elf.c
index 54efcb5659ac..db0cc92cf2ea 100644
--- a/trunk/tools/perf/util/symbol-elf.c
+++ b/trunk/tools/perf/util/symbol-elf.c
@@ -1,3 +1,6 @@
+#include <libelf.h>
+#include <gelf.h>
+#include <elf.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <errno.h>
@@ -715,17 +718,6 @@ int dso__load_sym(struct dso *dso, struct map *map,
 					sym.st_value);
 			used_opd = true;
 		}
-		/*
-		 * When loading symbols in a data mapping, ABS symbols (which
-		 * has a value of SHN_ABS in its st_shndx) failed at
-		 * elf_getscn().  And it marks the loading as a failure so
-		 * already loaded symbols cannot be fixed up.
-		 *
-		 * I'm not sure what should be done. Just ignore them for now.
-		 * - Namhyung Kim
-		 */
-		if (sym.st_shndx == SHN_ABS)
-			continue;
 
 		sec = elf_getscn(runtime_ss->elf, sym.st_shndx);
 		if (!sec)
diff --git a/trunk/tools/perf/util/symbol-minimal.c b/trunk/tools/perf/util/symbol-minimal.c
index a7390cde63bc..259f8f2ea9c9 100644
--- a/trunk/tools/perf/util/symbol-minimal.c
+++ b/trunk/tools/perf/util/symbol-minimal.c
@@ -1,5 +1,6 @@
 #include "symbol.h"
 
+#include <elf.h>
 #include <stdio.h>
 #include <fcntl.h>
 #include <string.h>
diff --git a/trunk/tools/perf/util/symbol.c b/trunk/tools/perf/util/symbol.c
index e6432d85b43d..295f8d4feedf 100644
--- a/trunk/tools/perf/util/symbol.c
+++ b/trunk/tools/perf/util/symbol.c
@@ -28,8 +28,8 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map,
 				symbol_filter_t filter);
 static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map,
 			symbol_filter_t filter);
-int vmlinux_path__nr_entries;
-char **vmlinux_path;
+static int vmlinux_path__nr_entries;
+static char **vmlinux_path;
 
 struct symbol_conf symbol_conf = {
 	.exclude_other	  = true,
@@ -202,6 +202,13 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
 	curr->end = ~0ULL;
 }
 
+static void map_groups__fixup_end(struct map_groups *mg)
+{
+	int i;
+	for (i = 0; i < MAP__NR_TYPES; ++i)
+		__map_groups__fixup_end(mg, i);
+}
+
 struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name)
 {
 	size_t namelen = strlen(name) + 1;
@@ -645,8 +652,8 @@ discard_symbol:		rb_erase(&pos->rb_node, root);
 	return count + moved;
 }
 
-bool symbol__restricted_filename(const char *filename,
-				 const char *restricted_filename)
+static bool symbol__restricted_filename(const char *filename,
+					const char *restricted_filename)
 {
 	bool restricted = false;
 
@@ -768,6 +775,10 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
 	else
 		machine = NULL;
 
+	name = malloc(PATH_MAX);
+	if (!name)
+		return -1;
+
 	dso->adjust_symbols = 0;
 
 	if (strncmp(dso->name, "/tmp/perf-", 10) == 0) {
@@ -791,10 +802,6 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
 	if (machine)
 		root_dir = machine->root_dir;
 
-	name = malloc(PATH_MAX);
-	if (!name)
-		return -1;
-
 	/* Iterate over candidate debug images.
 	 * Keep track of "interesting" ones (those which have a symtab, dynsym,
 	 * and/or opd section) for processing.
@@ -880,6 +887,200 @@ struct map *map_groups__find_by_name(struct map_groups *mg,
 	return NULL;
 }
 
+static int map_groups__set_modules_path_dir(struct map_groups *mg,
+				const char *dir_name)
+{
+	struct dirent *dent;
+	DIR *dir = opendir(dir_name);
+	int ret = 0;
+
+	if (!dir) {
+		pr_debug("%s: cannot open %s dir\n", __func__, dir_name);
+		return -1;
+	}
+
+	while ((dent = readdir(dir)) != NULL) {
+		char path[PATH_MAX];
+		struct stat st;
+
+		/*sshfs might return bad dent->d_type, so we have to stat*/
+		snprintf(path, sizeof(path), "%s/%s", dir_name, dent->d_name);
+		if (stat(path, &st))
+			continue;
+
+		if (S_ISDIR(st.st_mode)) {
+			if (!strcmp(dent->d_name, ".") ||
+			    !strcmp(dent->d_name, ".."))
+				continue;
+
+			ret = map_groups__set_modules_path_dir(mg, path);
+			if (ret < 0)
+				goto out;
+		} else {
+			char *dot = strrchr(dent->d_name, '.'),
+			     dso_name[PATH_MAX];
+			struct map *map;
+			char *long_name;
+
+			if (dot == NULL || strcmp(dot, ".ko"))
+				continue;
+			snprintf(dso_name, sizeof(dso_name), "[%.*s]",
+				 (int)(dot - dent->d_name), dent->d_name);
+
+			strxfrchar(dso_name, '-', '_');
+			map = map_groups__find_by_name(mg, MAP__FUNCTION,
+						       dso_name);
+			if (map == NULL)
+				continue;
+
+			long_name = strdup(path);
+			if (long_name == NULL) {
+				ret = -1;
+				goto out;
+			}
+			dso__set_long_name(map->dso, long_name);
+			map->dso->lname_alloc = 1;
+			dso__kernel_module_get_build_id(map->dso, "");
+		}
+	}
+
+out:
+	closedir(dir);
+	return ret;
+}
+
+static char *get_kernel_version(const char *root_dir)
+{
+	char version[PATH_MAX];
+	FILE *file;
+	char *name, *tmp;
+	const char *prefix = "Linux version ";
+
+	sprintf(version, "%s/proc/version", root_dir);
+	file = fopen(version, "r");
+	if (!file)
+		return NULL;
+
+	version[0] = '\0';
+	tmp = fgets(version, sizeof(version), file);
+	fclose(file);
+
+	name = strstr(version, prefix);
+	if (!name)
+		return NULL;
+	name += strlen(prefix);
+	tmp = strchr(name, ' ');
+	if (tmp)
+		*tmp = '\0';
+
+	return strdup(name);
+}
+
+static int machine__set_modules_path(struct machine *machine)
+{
+	char *version;
+	char modules_path[PATH_MAX];
+
+	version = get_kernel_version(machine->root_dir);
+	if (!version)
+		return -1;
+
+	snprintf(modules_path, sizeof(modules_path), "%s/lib/modules/%s/kernel",
+		 machine->root_dir, version);
+	free(version);
+
+	return map_groups__set_modules_path_dir(&machine->kmaps, modules_path);
+}
+
+struct map *machine__new_module(struct machine *machine, u64 start,
+				const char *filename)
+{
+	struct map *map;
+	struct dso *dso = __dsos__findnew(&machine->kernel_dsos, filename);
+
+	if (dso == NULL)
+		return NULL;
+
+	map = map__new2(start, dso, MAP__FUNCTION);
+	if (map == NULL)
+		return NULL;
+
+	if (machine__is_host(machine))
+		dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE;
+	else
+		dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE;
+	map_groups__insert(&machine->kmaps, map);
+	return map;
+}
+
+static int machine__create_modules(struct machine *machine)
+{
+	char *line = NULL;
+	size_t n;
+	FILE *file;
+	struct map *map;
+	const char *modules;
+	char path[PATH_MAX];
+
+	if (machine__is_default_guest(machine))
+		modules = symbol_conf.default_guest_modules;
+	else {
+		sprintf(path, "%s/proc/modules", machine->root_dir);
+		modules = path;
+	}
+
+	if (symbol__restricted_filename(path, "/proc/modules"))
+		return -1;
+
+	file = fopen(modules, "r");
+	if (file == NULL)
+		return -1;
+
+	while (!feof(file)) {
+		char name[PATH_MAX];
+		u64 start;
+		char *sep;
+		int line_len;
+
+		line_len = getline(&line, &n, file);
+		if (line_len < 0)
+			break;
+
+		if (!line)
+			goto out_failure;
+
+		line[--line_len] = '\0'; /* \n */
+
+		sep = strrchr(line, 'x');
+		if (sep == NULL)
+			continue;
+
+		hex2u64(sep + 1, &start);
+
+		sep = strchr(line, ' ');
+		if (sep == NULL)
+			continue;
+
+		*sep = '\0';
+
+		snprintf(name, sizeof(name), "[%s]", line);
+		map = machine__new_module(machine, start, name);
+		if (map == NULL)
+			goto out_delete_line;
+		dso__kernel_module_get_build_id(map->dso, machine->root_dir);
+	}
+
+	free(line);
+	fclose(file);
+
+	return machine__set_modules_path(machine);
+
+out_delete_line:
+	free(line);
+out_failure:
+	return -1;
+}
+
 int dso__load_vmlinux(struct dso *dso, struct map *map,
 		      const char *vmlinux, symbol_filter_t filter)
 {
@@ -923,10 +1124,8 @@ int dso__load_vmlinux_path(struct dso *dso, struct map *map,
 	filename = dso__build_id_filename(dso, NULL, 0);
 	if (filename != NULL) {
 		err = dso__load_vmlinux(dso, map, filename, filter);
-		if (err > 0) {
-			dso->lname_alloc = 1;
+		if (err > 0)
 			goto out;
-		}
 		free(filename);
 	}
 
@@ -934,7 +1133,6 @@ int dso__load_vmlinux_path(struct dso *dso, struct map *map,
 		err = dso__load_vmlinux(dso, map, vmlinux_path[i], filter);
 		if (err > 0) {
 			dso__set_long_name(dso, strdup(vmlinux_path[i]));
-			dso->lname_alloc = 1;
 			break;
 		}
 	}
@@ -974,7 +1172,6 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map,
 		if (err > 0) {
 			dso__set_long_name(dso,
 					   strdup(symbol_conf.vmlinux_name));
-			dso->lname_alloc = 1;
 			goto out_fixup;
 		}
 		return err;
@@ -1103,6 +1300,195 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map,
 	return err;
 }
 
+size_t machines__fprintf_dsos(struct rb_root *machines, FILE *fp)
+{
+	struct rb_node *nd;
+	size_t ret = 0;
+
+	for (nd = rb_first(machines); nd; nd = rb_next(nd)) {
+		struct machine *pos = rb_entry(nd, struct machine, rb_node);
+		ret += __dsos__fprintf(&pos->kernel_dsos, fp);
+		ret += __dsos__fprintf(&pos->user_dsos, fp);
+	}
+
+	return ret;
+}
+
+size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp,
+				     bool with_hits)
+{
+	return __dsos__fprintf_buildid(&machine->kernel_dsos, fp, with_hits) +
+	       __dsos__fprintf_buildid(&machine->user_dsos, fp, with_hits);
+}
+
+size_t machines__fprintf_dsos_buildid(struct rb_root *machines,
+				      FILE *fp, bool with_hits)
+{
+	struct rb_node *nd;
+	size_t ret = 0;
+
+	for (nd = rb_first(machines); nd; nd = rb_next(nd)) {
+		struct machine *pos = rb_entry(nd, struct machine, rb_node);
+		ret += machine__fprintf_dsos_buildid(pos, fp, with_hits);
+	}
+	return ret;
+}
+
+static struct dso *machine__get_kernel(struct machine *machine)
+{
+	const char *vmlinux_name = NULL;
+	struct dso *kernel;
+
+	if (machine__is_host(machine)) {
+		vmlinux_name = symbol_conf.vmlinux_name;
+		if (!vmlinux_name)
+			vmlinux_name = "[kernel.kallsyms]";
+
+		kernel = dso__kernel_findnew(machine, vmlinux_name,
+					     "[kernel]",
+					     DSO_TYPE_KERNEL);
+	} else {
+		char bf[PATH_MAX];
+
+		if (machine__is_default_guest(machine))
+			vmlinux_name = symbol_conf.default_guest_vmlinux_name;
+		if (!vmlinux_name)
+			vmlinux_name = machine__mmap_name(machine, bf,
+							  sizeof(bf));
+
+		kernel = dso__kernel_findnew(machine, vmlinux_name,
+					     "[guest.kernel]",
+					     DSO_TYPE_GUEST_KERNEL);
+	}
+
+	if (kernel != NULL && (!kernel->has_build_id))
+		dso__read_running_kernel_build_id(kernel, machine);
+
+	return kernel;
+}
+
+struct process_args {
+	u64 start;
+};
+
+static int symbol__in_kernel(void *arg, const char *name,
+			     char type __maybe_unused, u64 start)
+{
+	struct process_args *args = arg;
+
+	if (strchr(name, '['))
+		return 0;
+
+	args->start = start;
+	return 1;
+}
+
+/* Figure out the start address of kernel map from /proc/kallsyms */
+static u64 machine__get_kernel_start_addr(struct machine *machine)
+{
+	const char *filename;
+	char path[PATH_MAX];
+	struct process_args args;
+
+	if (machine__is_host(machine)) {
+		filename = "/proc/kallsyms";
+	} else {
+		if (machine__is_default_guest(machine))
+			filename = (char *)symbol_conf.default_guest_kallsyms;
+		else {
+			sprintf(path, "%s/proc/kallsyms", machine->root_dir);
+			filename = path;
+		}
+	}
+
+	if (symbol__restricted_filename(filename, "/proc/kallsyms"))
+		return 0;
+
+	if (kallsyms__parse(filename, &args, symbol__in_kernel) <= 0)
+		return 0;
+
+	return args.start;
+}
+
+int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
+{
+	enum map_type type;
+	u64 start = machine__get_kernel_start_addr(machine);
+
+	for (type = 0; type < MAP__NR_TYPES; ++type) {
+		struct kmap *kmap;
+
+		machine->vmlinux_maps[type] = map__new2(start, kernel, type);
+		if (machine->vmlinux_maps[type] == NULL)
+			return -1;
+
+		machine->vmlinux_maps[type]->map_ip =
+			machine->vmlinux_maps[type]->unmap_ip =
+				identity__map_ip;
+		kmap = map__kmap(machine->vmlinux_maps[type]);
+		kmap->kmaps = &machine->kmaps;
+		map_groups__insert(&machine->kmaps,
+				   machine->vmlinux_maps[type]);
+	}
+
+	return 0;
+}
+
+void machine__destroy_kernel_maps(struct machine *machine)
+{
+	enum map_type type;
+
+	for (type = 0; type < MAP__NR_TYPES; ++type) {
+		struct kmap *kmap;
+
+		if (machine->vmlinux_maps[type] == NULL)
+			continue;
+
+		kmap = map__kmap(machine->vmlinux_maps[type]);
+		map_groups__remove(&machine->kmaps,
+				   machine->vmlinux_maps[type]);
+		if (kmap->ref_reloc_sym) {
+			/*
+			 * ref_reloc_sym is shared among all maps, so free just
+			 * on one of them.
+			 */
+			if (type == MAP__FUNCTION) {
+				free((char *)kmap->ref_reloc_sym->name);
+				kmap->ref_reloc_sym->name = NULL;
+				free(kmap->ref_reloc_sym);
+			}
+			kmap->ref_reloc_sym = NULL;
+		}
+
+		map__delete(machine->vmlinux_maps[type]);
+		machine->vmlinux_maps[type] = NULL;
+	}
+}
+
+int machine__create_kernel_maps(struct machine *machine)
+{
+	struct dso *kernel = machine__get_kernel(machine);
+
+	if (kernel == NULL ||
+	    __machine__create_kernel_maps(machine, kernel) < 0)
+		return -1;
+
+	if (symbol_conf.use_modules && machine__create_modules(machine) < 0) {
+		if (machine__is_host(machine))
+			pr_debug("Problems creating module maps, "
+				 "continuing anyway...\n");
+		else
+			pr_debug("Problems creating module maps for guest %d, "
+				 "continuing anyway...\n", machine->pid);
+	}
+
+	/*
+	 * Now that we have all the maps created, just set the ->end of them:
+	 */
+	map_groups__fixup_end(&machine->kmaps);
+	return 0;
+}
+
 static void vmlinux_path__exit(void)
 {
 	while (--vmlinux_path__nr_entries >= 0) {
@@ -1163,6 +1549,25 @@ static int vmlinux_path__init(void)
 	return -1;
 }
 
+size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp)
+{
+	int i;
+	size_t printed = 0;
+	struct dso *kdso = machine->vmlinux_maps[MAP__FUNCTION]->dso;
+
+	if (kdso->has_build_id) {
+		char filename[PATH_MAX];
+		if (dso__build_id_filename(kdso, filename, sizeof(filename)))
+			printed += fprintf(fp, "[0] %s\n", filename);
+	}
+
+	for (i = 0; i < vmlinux_path__nr_entries; ++i)
+		printed += fprintf(fp, "[%d] %s\n",
+				   i + kdso->has_build_id, vmlinux_path[i]);
+
+	return printed;
+}
+
 static int setup_list(struct strlist **list, const char *list_str,
 		      const char *list_name)
 {
@@ -1266,3 +1671,108 @@ void symbol__exit(void)
 	symbol_conf.sym_list = symbol_conf.dso_list = symbol_conf.comm_list = NULL;
 	symbol_conf.initialized = false;
 }
+
+int machines__create_kernel_maps(struct rb_root *machines, pid_t pid)
+{
+	struct machine *machine = machines__findnew(machines, pid);
+
+	if (machine == NULL)
+		return -1;
+
+	return machine__create_kernel_maps(machine);
+}
+
+int machines__create_guest_kernel_maps(struct rb_root *machines)
+{
+	int ret = 0;
+	struct dirent **namelist = NULL;
+	int i, items = 0;
+	char path[PATH_MAX];
+	pid_t pid;
+	char *endp;
+
+	if (symbol_conf.default_guest_vmlinux_name ||
+	    symbol_conf.default_guest_modules ||
+	    symbol_conf.default_guest_kallsyms) {
+		machines__create_kernel_maps(machines, DEFAULT_GUEST_KERNEL_ID);
+	}
+
+	if (symbol_conf.guestmount) {
+		items = scandir(symbol_conf.guestmount, &namelist, NULL, NULL);
+		if (items <= 0)
+			return -ENOENT;
+		for (i = 0; i < items; i++) {
+			if (!isdigit(namelist[i]->d_name[0])) {
+				/* Filter out . and .. */
+				continue;
+			}
+			pid = (pid_t)strtol(namelist[i]->d_name, &endp, 10);
+			if ((*endp != '\0') ||
+			    (endp == namelist[i]->d_name) ||
+			    (errno == ERANGE)) {
+				pr_debug("invalid directory (%s). Skipping.\n",
+					 namelist[i]->d_name);
+				continue;
+			}
+			sprintf(path, "%s/%s/proc/kallsyms",
+				symbol_conf.guestmount,
+				namelist[i]->d_name);
+			ret = access(path, R_OK);
+			if (ret) {
+				pr_debug("Can't access file %s\n", path);
+				goto failure;
+			}
+			machines__create_kernel_maps(machines, pid);
+		}
+failure:
+		free(namelist);
+	}
+
+	return ret;
+}
+
+void machines__destroy_guest_kernel_maps(struct rb_root *machines)
+{
+	struct rb_node *next = rb_first(machines);
+
+	while (next) {
+		struct machine *pos = rb_entry(next, struct machine, rb_node);
+
+		next = rb_next(&pos->rb_node);
+		rb_erase(&pos->rb_node, machines);
+		machine__delete(pos);
+	}
+}
+
+int machine__load_kallsyms(struct machine *machine, const char *filename,
+			   enum map_type type, symbol_filter_t filter)
+{
+	struct map *map = machine->vmlinux_maps[type];
+	int ret = dso__load_kallsyms(map->dso, filename, map, filter);
+
+	if (ret > 0) {
+		dso__set_loaded(map->dso, type);
+		/*
+		 * Since /proc/kallsyms will have multiple sessions for the
+		 * kernel, with modules between them, fixup the end of all
+		 * sections.
+		 */
+		__map_groups__fixup_end(&machine->kmaps, type);
+	}
+
+	return ret;
+}
+
+int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
+			       symbol_filter_t filter)
+{
+	struct map *map = machine->vmlinux_maps[type];
+	int ret = dso__load_vmlinux_path(map->dso, map, filter);
+
+	if (ret > 0) {
+		dso__set_loaded(map->dso, type);
+		map__reloc_vmlinux(map);
+	}
+
+	return ret;
+}
diff --git a/trunk/tools/perf/util/symbol.h b/trunk/tools/perf/util/symbol.h
index b62ca37c4b77..de68f98b236d 100644
--- a/trunk/tools/perf/util/symbol.h
+++ b/trunk/tools/perf/util/symbol.h
@@ -16,8 +16,8 @@
 #ifdef LIBELF_SUPPORT
 #include <libelf.h>
 #include <gelf.h>
-#endif
 #include <elf.h>
+#endif
 
 #include "dso.h"
 
@@ -96,8 +96,7 @@ struct symbol_conf {
 			initialized,
 			kptr_restrict,
 			annotate_asm_raw,
-			annotate_src,
-			event_group;
+			annotate_src;
 	const char	*vmlinux_name,
 			*kallsyms_name,
 			*source_prefix,
@@ -121,8 +120,6 @@ struct symbol_conf {
 };
 
 extern struct symbol_conf symbol_conf;
-extern int vmlinux_path__nr_entries;
-extern char **vmlinux_path;
 
 static inline void *symbol__priv(struct symbol *sym)
 {
@@ -226,8 +223,6 @@ size_t symbol__fprintf_symname_offs(const struct symbol *sym,
 size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp);
 size_t symbol__fprintf(struct symbol *sym, FILE *fp);
 bool symbol_type__is_a(char symbol_type, enum map_type map_type);
-bool symbol__restricted_filename(const char *filename,
-				 const char *restricted_filename);
 
 int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
 		  struct symsrc *runtime_ss, symbol_filter_t filter,
diff --git a/trunk/tools/perf/util/sysfs.c b/trunk/tools/perf/util/sysfs.c
index f71e9eafe15a..48c6902e749f 100644
--- a/trunk/tools/perf/util/sysfs.c
+++ b/trunk/tools/perf/util/sysfs.c
@@ -8,7 +8,7 @@ static const char * const sysfs_known_mountpoints[] = {
 };
 
 static int sysfs_found;
-char sysfs_mountpoint[PATH_MAX + 1];
+char sysfs_mountpoint[PATH_MAX];
 
 static int sysfs_valid_mountpoint(const char *sysfs)
 {
diff --git a/trunk/tools/perf/util/thread.c b/trunk/tools/perf/util/thread.c
index 632e40e5ceca..df59623ac763 100644
--- a/trunk/tools/perf/util/thread.c
+++ b/trunk/tools/perf/util/thread.c
@@ -54,10 +54,10 @@ int thread__comm_len(struct thread *self)
 	return self->comm_len;
 }
 
-size_t thread__fprintf(struct thread *thread, FILE *fp)
+static size_t thread__fprintf(struct thread *self, FILE *fp)
 {
-	return fprintf(fp, "Thread %d %s\n", thread->pid, thread->comm) +
-	       map_groups__fprintf(&thread->mg, verbose, fp);
+	return fprintf(fp, "Thread %d %s\n", self->pid, self->comm) +
+	       map_groups__fprintf(&self->mg, verbose, fp);
 }
 
 void thread__insert_map(struct thread *self, struct map *map)
@@ -84,3 +84,17 @@ int thread__fork(struct thread *self, struct thread *parent)
 			return -ENOMEM;
 	return 0;
 }
+
+size_t machine__fprintf(struct machine *machine, FILE *fp)
+{
+	size_t ret = 0;
+	struct rb_node *nd;
+
+	for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
+		struct thread *pos = rb_entry(nd, struct thread, rb_node);
+
+		ret += thread__fprintf(pos, fp);
+	}
+
+	return ret;
+}
diff --git a/trunk/tools/perf/util/thread.h b/trunk/tools/perf/util/thread.h
index 5ad266403098..f2fa17caa7d5 100644
--- a/trunk/tools/perf/util/thread.h
+++ b/trunk/tools/perf/util/thread.h
@@ -30,7 +30,6 @@ int thread__set_comm(struct thread *self, const char *comm);
 int thread__comm_len(struct thread *self);
 void thread__insert_map(struct thread *self, struct map *map);
 int thread__fork(struct thread *self, struct thread *parent);
-size_t thread__fprintf(struct thread *thread, FILE *fp);
 
 static inline struct map *thread__find_map(struct thread *self,
 					   enum map_type type, u64 addr)
diff --git a/trunk/tools/perf/util/top.c b/trunk/tools/perf/util/top.c
index 54d37a4753c5..884dde9b9bc1 100644
--- a/trunk/tools/perf/util/top.c
+++ b/trunk/tools/perf/util/top.c
@@ -26,8 +26,6 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
 	float samples_per_sec = top->samples / top->delay_secs;
 	float ksamples_per_sec = top->kernel_samples / top->delay_secs;
 	float esamples_percent = (100.0 * top->exact_samples) / top->samples;
-	struct perf_record_opts *opts = &top->record_opts;
-	struct perf_target *target = &opts->target;
 	size_t ret = 0;
 
 	if (!perf_guest) {
@@ -63,31 +61,31 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
 		struct perf_evsel *first = perf_evlist__first(top->evlist);
 		ret += SNPRINTF(bf + ret, size - ret, "%" PRIu64 "%s ",
 				(uint64_t)first->attr.sample_period,
-				opts->freq ? "Hz" : "");
+				top->freq ? "Hz" : "");
 	}
 
 	ret += SNPRINTF(bf + ret, size - ret, "%s", perf_evsel__name(top->sym_evsel));
 
 	ret += SNPRINTF(bf + ret, size - ret, "], ");
 
-	if (target->pid)
+	if (top->target.pid)
 		ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %s",
-				target->pid);
-	else if (target->tid)
+				top->target.pid);
+	else if (top->target.tid)
 		ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %s",
-				target->tid);
-	else if (target->uid_str != NULL)
+				top->target.tid);
+	else if (top->target.uid_str != NULL)
 		ret += SNPRINTF(bf + ret, size - ret, " (uid: %s",
-				target->uid_str);
+				top->target.uid_str);
 	else
 		ret += SNPRINTF(bf + ret, size - ret, " (all");
 
-	if (target->cpu_list)
+	if (top->target.cpu_list)
 		ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)",
 				top->evlist->cpus->nr > 1 ? "s" : "",
-				target->cpu_list);
+				top->target.cpu_list);
 	else {
-		if (target->tid)
+		if (top->target.tid)
 			ret += SNPRINTF(bf + ret, size - ret, ")");
 		else
 			ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)",
diff --git a/trunk/tools/perf/util/top.h b/trunk/tools/perf/util/top.h
index 7ebf357dc9e1..86ff1b15059b 100644
--- a/trunk/tools/perf/util/top.h
+++ b/trunk/tools/perf/util/top.h
@@ -14,7 +14,7 @@ struct perf_session;
 struct perf_top {
 	struct perf_tool   tool;
 	struct perf_evlist *evlist;
-	struct perf_record_opts record_opts;
+	struct perf_target target;
 	/*
 	 * Symbols will be added here in perf_event__process_sample and will
 	 * get out after decayed.
@@ -24,16 +24,24 @@ struct perf_top {
 	u64		   exact_samples;
 	u64		   guest_us_samples, guest_kernel_samples;
 	int		   print_entries, count_filter, delay_secs;
+	int		   freq;
 	bool		   hide_kernel_symbols, hide_user_symbols, zero;
 	bool		   use_tui, use_stdio;
 	bool		   sort_has_symbols;
+	bool		   dont_use_callchains;
 	bool		   kptr_restrict_warned;
 	bool		   vmlinux_warned;
+	bool		   inherit;
+	bool		   group;
+	bool		   sample_id_all_missing;
+	bool		   exclude_guest_missing;
 	bool		   dump_symtab;
 	struct hist_entry  *sym_filter_entry;
 	struct perf_evsel  *sym_evsel;
 	struct perf_session *session;
 	struct winsize	   winsize;
+	unsigned int	   mmap_pages;
+	int		   default_interval;
 	int		   realtime_prio;
 	int		   sym_pcnt_filter;
 	const char	   *sym_filter;
diff --git a/trunk/tools/perf/util/util.c b/trunk/tools/perf/util/util.c
index 805d1f52c5b4..5906e8426cc7 100644
--- a/trunk/tools/perf/util/util.c
+++ b/trunk/tools/perf/util/util.c
@@ -12,8 +12,6 @@
  */
 unsigned int page_size;
 
-bool test_attr__enabled;
-
 bool perf_host  = true;
 bool perf_guest = false;
 
@@ -220,25 +218,3 @@ void dump_stack(void)
 #else
 void dump_stack(void) {}
 #endif
-
-void get_term_dimensions(struct winsize *ws)
-{
-	char *s = getenv("LINES");
-
-	if (s != NULL) {
-		ws->ws_row = atoi(s);
-		s = getenv("COLUMNS");
-		if (s != NULL) {
-			ws->ws_col = atoi(s);
-			if (ws->ws_row && ws->ws_col)
-				return;
-		}
-	}
-#ifdef TIOCGWINSZ
-	if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
-	    ws->ws_row && ws->ws_col)
-		return;
-#endif
-	ws->ws_row = 25;
-	ws->ws_col = 80;
-}
diff --git a/trunk/tools/perf/util/util.h b/trunk/tools/perf/util/util.h
index 09b4c26b71aa..c2330918110c 100644
--- a/trunk/tools/perf/util/util.h
+++ b/trunk/tools/perf/util/util.h
@@ -265,14 +265,10 @@ bool is_power_of_2(unsigned long n)
 size_t hex_width(u64 v);
 int hex2u64(const char *ptr, u64 *val);
 
-char *ltrim(char *s);
 char *rtrim(char *s);
 
 void dump_stack(void);
 
 extern unsigned int page_size;
 
-struct winsize;
-void get_term_dimensions(struct winsize *ws);
-
 #endif