diff --git a/[refs] b/[refs]
index 421a194abe4b..5eaec1b4c319 100644
--- a/[refs]
+++ b/[refs]
@@ -1,2 +1,2 @@
 ---
-refs/heads/master: e08fbb78f03fe2c4f88824faf6f51ce6af185e11
+refs/heads/master: e9dbfae53eeb9fc3d4bb7da3df87fa9875f5da02
diff --git a/trunk/Documentation/power/devices.txt b/trunk/Documentation/power/devices.txt
index 64565aac6e40..88880839ece4 100644
--- a/trunk/Documentation/power/devices.txt
+++ b/trunk/Documentation/power/devices.txt
@@ -520,20 +520,59 @@ Support for power domains is provided through the pwr_domain field of struct
 device.  This field is a pointer to an object of type struct dev_power_domain,
 defined in include/linux/pm.h, providing a set of power management callbacks
 analogous to the subsystem-level and device driver callbacks that are executed
-for the given device during all power transitions, instead of the respective
-subsystem-level callbacks.  Specifically, if a device's pm_domain pointer is
-not NULL, the ->suspend() callback from the object pointed to by it will be
-executed instead of its subsystem's (e.g. bus type's) ->suspend() callback and
-anlogously for all of the remaining callbacks.  In other words, power management
-domain callbacks, if defined for the given device, always take precedence over
-the callbacks provided by the device's subsystem (e.g. bus type).
-
-The support for device power management domains is only relevant to platforms
-needing to use the same device driver power management callbacks in many
-different power domain configurations and wanting to avoid incorporating the
-support for power domains into subsystem-level callbacks, for example by
-modifying the platform bus type.  Other platforms need not implement it or take
-it into account in any way.
+for the given device during all power transitions, in addition to the respective
+subsystem-level callbacks.  Specifically, the power domain "suspend" callbacks
+(i.e. ->runtime_suspend(), ->suspend(), ->freeze(), ->poweroff(), etc.) are
+executed after the analogous subsystem-level callbacks, while the power domain
+"resume" callbacks (i.e. ->runtime_resume(), ->resume(), ->thaw(), ->restore,
+etc.) are executed before the analogous subsystem-level callbacks.  Error codes
+returned by the "suspend" and "resume" power domain callbacks are ignored.
+
+Power domain ->runtime_idle() callback is executed before the subsystem-level
+->runtime_idle() callback and the result returned by it is not ignored.  Namely,
+if it returns error code, the subsystem-level ->runtime_idle() callback will not
+be called and the helper function rpm_idle() executing it will return error
+code.  This mechanism is intended to help platforms where saving device state
+is a time consuming operation and should only be carried out if all devices
+in the power domain are idle, before turning off the shared power resource(s).
+Namely, the power domain ->runtime_idle() callback may return error code until
+the pm_runtime_idle() helper (or its asychronous version) has been called for
+all devices in the power domain (it is recommended that the returned error code
+be -EBUSY in those cases), preventing the subsystem-level ->runtime_idle()
+callback from being run prematurely.
+
+The support for device power domains is only relevant to platforms needing to
+use the same subsystem-level (e.g. platform bus type) and device driver power
+management callbacks in many different power domain configurations and wanting
+to avoid incorporating the support for power domains into the subsystem-level
+callbacks.  The other platforms need not implement it or take it into account
+in any way.
+
+
+System Devices
+--------------
+System devices (sysdevs) follow a slightly different API, which can be found in
+
+	include/linux/sysdev.h
+	drivers/base/sys.c
+
+System devices will be suspended with interrupts disabled, and after all other
+devices have been suspended.  On resume, they will be resumed before any other
+devices, and also with interrupts disabled.  These things occur in special
+"sysdev_driver" phases, which affect only system devices.
+
+Thus, after the suspend_noirq (or freeze_noirq or poweroff_noirq) phase, when
+the non-boot CPUs are all offline and IRQs are disabled on the remaining online
+CPU, then a sysdev_driver.suspend phase is carried out, and the system enters a
+sleep state (or a system image is created).  During resume (or after the image
+has been created or loaded) a sysdev_driver.resume phase is carried out, IRQs
+are enabled on the only online CPU, the non-boot CPUs are enabled, and the
+resume_noirq (or thaw_noirq or restore_noirq) phase begins.
+
+Code to actually enter and exit the system-wide low power state sometimes
+involves hardware details that are only known to the boot firmware, and
+may leave a CPU running software (from SRAM or flash memory) that monitors
+the system and manages its wakeup sequence.
 
 
 Device Low Power (suspend) States
diff --git a/trunk/Documentation/power/runtime_pm.txt b/trunk/Documentation/power/runtime_pm.txt
index 22accb3eb40e..654097b130b4 100644
--- a/trunk/Documentation/power/runtime_pm.txt
+++ b/trunk/Documentation/power/runtime_pm.txt
@@ -566,6 +566,11 @@ to do this is:
 	pm_runtime_set_active(dev);
 	pm_runtime_enable(dev);
 
+The PM core always increments the run-time usage counter before calling the
+->prepare() callback and decrements it after calling the ->complete() callback.
+Hence disabling run-time PM temporarily like this will not cause any run-time
+suspend callbacks to be lost.
+
 7. Generic subsystem callbacks
 
 Subsystems may wish to conserve code space by using the set of generic power
diff --git a/trunk/Makefile b/trunk/Makefile
index e62e06b89a7a..41330a06e4ec 100644
--- a/trunk/Makefile
+++ b/trunk/Makefile
@@ -1,7 +1,7 @@
 VERSION = 3
 PATCHLEVEL = 0
 SUBLEVEL = 0
-EXTRAVERSION = -rc5
+EXTRAVERSION = -rc4
 NAME = Sneaky Weasel
 
 # *DOCUMENTATION*
@@ -1290,7 +1290,6 @@ help:
 	@echo  '  make O=dir [targets] Locate all output files in "dir", including .config'
 	@echo  '  make C=1   [targets] Check all c source with $$CHECK (sparse by default)'
 	@echo  '  make C=2   [targets] Force check of all c source with $$CHECK'
-	@echo  '  make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections'
 	@echo  '  make W=n   [targets] Enable extra gcc checks, n=1,2,3 where'
 	@echo  '		1: warnings which may be relevant and do not occur too often'
 	@echo  '		2: warnings which occur quite often but may still be relevant'
diff --git a/trunk/arch/alpha/include/asm/mmzone.h b/trunk/arch/alpha/include/asm/mmzone.h
index 445dc42e0334..8af56ce346ad 100644
--- a/trunk/arch/alpha/include/asm/mmzone.h
+++ b/trunk/arch/alpha/include/asm/mmzone.h
@@ -56,6 +56,7 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n)
  * Given a kernel address, find the home node of the underlying memory.
  */
 #define kvaddr_to_nid(kaddr)	pa_to_nid(__pa(kaddr))
+#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
 
 /*
  * Given a kaddr, LOCAL_BASE_ADDR finds the owning node of the memory
diff --git a/trunk/arch/alpha/kernel/perf_event.c b/trunk/arch/alpha/kernel/perf_event.c
index 8e47709160f8..90561c45e7d8 100644
--- a/trunk/arch/alpha/kernel/perf_event.c
+++ b/trunk/arch/alpha/kernel/perf_event.c
@@ -847,7 +847,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
 	data.period = event->hw.last_period;
 
 	if (alpha_perf_event_set_period(event, hwc, idx)) {
-		if (perf_event_overflow(event, &data, regs)) {
+		if (perf_event_overflow(event, 1, &data, regs)) {
 			/* Interrupts coming too quickly; "throttle" the
 			 * counter, i.e., disable it for a little while.
 			 */
diff --git a/trunk/arch/alpha/kernel/time.c b/trunk/arch/alpha/kernel/time.c
index f20d1b5396b8..818e74ed45dc 100644
--- a/trunk/arch/alpha/kernel/time.c
+++ b/trunk/arch/alpha/kernel/time.c
@@ -91,7 +91,7 @@ DEFINE_PER_CPU(u8, irq_work_pending);
 #define test_irq_work_pending()      __get_cpu_var(irq_work_pending)
 #define clear_irq_work_pending()     __get_cpu_var(irq_work_pending) = 0
 
-void arch_irq_work_raise(void)
+void set_irq_work_pending(void)
 {
 	set_irq_work_pending_flag();
 }
diff --git a/trunk/arch/arm/boot/compressed/head.S b/trunk/arch/arm/boot/compressed/head.S
index 940b20178107..942fad97e447 100644
--- a/trunk/arch/arm/boot/compressed/head.S
+++ b/trunk/arch/arm/boot/compressed/head.S
@@ -597,8 +597,6 @@ __common_mmu_cache_on:
 		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
 #endif
 
-#define PROC_ENTRY_SIZE (4*5)
-
 /*
  * Here follow the relocatable cache support functions for the
  * various processors.  This is a generic hook for locating an
@@ -626,7 +624,7 @@ call_cache_fn:	adr	r12, proc_types
  ARM(		addeq	pc, r12, r3		) @ call cache function
  THUMB(		addeq	r12, r3			)
  THUMB(		moveq	pc, r12			) @ call cache function
-		add	r12, r12, #PROC_ENTRY_SIZE
+		add	r12, r12, #4*5
 		b	1b
 
 /*
@@ -796,16 +794,6 @@ proc_types:
 
 		.size	proc_types, . - proc_types
 
-		/*
-		 * If you get a "non-constant expression in ".if" statement"
-		 * error from the assembler on this line, check that you have
-		 * not accidentally written a "b" instruction where you should
-		 * have written W(b).
-		 */
-		.if (. - proc_types) % PROC_ENTRY_SIZE != 0
-		.error "The size of one or more proc_types entries is wrong."
-		.endif
-
 /*
  * Turn off the Cache and MMU.  ARMv3 does not support
  * reading the control register, but ARMv4 does.
diff --git a/trunk/arch/arm/include/asm/assembler.h b/trunk/arch/arm/include/asm/assembler.h
index 65c3f2474f5e..bc2d2d75f706 100644
--- a/trunk/arch/arm/include/asm/assembler.h
+++ b/trunk/arch/arm/include/asm/assembler.h
@@ -13,9 +13,6 @@
  *  Do not include any C declarations in this file - it is included by
  *  assembler source.
  */
-#ifndef __ASM_ASSEMBLER_H__
-#define __ASM_ASSEMBLER_H__
-
 #ifndef __ASSEMBLY__
 #error "Only include this from assembly code"
 #endif
@@ -293,4 +290,3 @@
 	.macro	ldrusr, reg, ptr, inc, cond=al, rept=1, abort=9001f
 	usracc	ldr, \reg, \ptr, \inc, \cond, \rept, \abort
 	.endm
-#endif /* __ASM_ASSEMBLER_H__ */
diff --git a/trunk/arch/arm/include/asm/entry-macro-multi.S b/trunk/arch/arm/include/asm/entry-macro-multi.S
index 2da8547de6d6..ec0bbf79c71f 100644
--- a/trunk/arch/arm/include/asm/entry-macro-multi.S
+++ b/trunk/arch/arm/include/asm/entry-macro-multi.S
@@ -1,5 +1,3 @@
-#include <asm/assembler.h>
-
 /*
  * Interrupt handling.  Preserves r7, r8, r9
  */
diff --git a/trunk/arch/arm/kernel/module.c b/trunk/arch/arm/kernel/module.c
index 016d6a0830a3..fee7c36349eb 100644
--- a/trunk/arch/arm/kernel/module.c
+++ b/trunk/arch/arm/kernel/module.c
@@ -193,17 +193,8 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 				offset -= 0x02000000;
 			offset += sym->st_value - loc;
 
-			/*
-			 * For function symbols, only Thumb addresses are
-			 * allowed (no interworking).
-			 *
-			 * For non-function symbols, the destination
-			 * has no specific ARM/Thumb disposition, so
-			 * the branch is resolved under the assumption
-			 * that interworking is not required.
-			 */
-			if ((ELF32_ST_TYPE(sym->st_info) == STT_FUNC &&
-				!(offset & 1)) ||
+			/* only Thumb addresses allowed (no interworking) */
+			if (!(offset & 1) ||
 			    offset <= (s32)0xff000000 ||
 			    offset >= (s32)0x01000000) {
 				pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
diff --git a/trunk/arch/arm/kernel/perf_event_v6.c b/trunk/arch/arm/kernel/perf_event_v6.c
index dd7f3b9f4cb3..f1e8dd94afe8 100644
--- a/trunk/arch/arm/kernel/perf_event_v6.c
+++ b/trunk/arch/arm/kernel/perf_event_v6.c
@@ -173,20 +173,6 @@ static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
 		},
 	},
-	[C(NODE)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
 };
 
 enum armv6mpcore_perf_types {
@@ -324,20 +310,6 @@ static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
 		},
 	},
-	[C(NODE)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
-		},
-	},
 };
 
 static inline unsigned long
@@ -507,7 +479,7 @@ armv6pmu_handle_irq(int irq_num,
 		if (!armpmu_event_set_period(event, hwc, idx))
 			continue;
 
-		if (perf_event_overflow(event, &data, regs))
+		if (perf_event_overflow(event, 0, &data, regs))
 			armpmu->disable(hwc, idx);
 	}
 
diff --git a/trunk/arch/arm/kernel/perf_event_v7.c b/trunk/arch/arm/kernel/perf_event_v7.c
index e20ca9cafef5..4960686afb58 100644
--- a/trunk/arch/arm/kernel/perf_event_v7.c
+++ b/trunk/arch/arm/kernel/perf_event_v7.c
@@ -255,20 +255,6 @@ static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
 		},
 	},
-	[C(NODE)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
 };
 
 /*
@@ -385,20 +371,6 @@ static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
 		},
 	},
-	[C(NODE)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
 };
 
 /*
@@ -815,7 +787,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 		if (!armpmu_event_set_period(event, hwc, idx))
 			continue;
 
-		if (perf_event_overflow(event, &data, regs))
+		if (perf_event_overflow(event, 0, &data, regs))
 			armpmu->disable(hwc, idx);
 	}
 
diff --git a/trunk/arch/arm/kernel/perf_event_xscale.c b/trunk/arch/arm/kernel/perf_event_xscale.c
index 3c4397491d08..39affbe4fdb2 100644
--- a/trunk/arch/arm/kernel/perf_event_xscale.c
+++ b/trunk/arch/arm/kernel/perf_event_xscale.c
@@ -144,20 +144,6 @@ static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
 		},
 	},
-	[C(NODE)] = {
-		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-		[C(OP_PREFETCH)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
-			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
-		},
-	},
 };
 
 #define	XSCALE_PMU_ENABLE	0x001
@@ -265,7 +251,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
 		if (!armpmu_event_set_period(event, hwc, idx))
 			continue;
 
-		if (perf_event_overflow(event, &data, regs))
+		if (perf_event_overflow(event, 0, &data, regs))
 			armpmu->disable(hwc, idx);
 	}
 
@@ -597,7 +583,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
 		if (!armpmu_event_set_period(event, hwc, idx))
 			continue;
 
-		if (perf_event_overflow(event, &data, regs))
+		if (perf_event_overflow(event, 0, &data, regs))
 			armpmu->disable(hwc, idx);
 	}
 
diff --git a/trunk/arch/arm/kernel/ptrace.c b/trunk/arch/arm/kernel/ptrace.c
index 5c199610719f..97260060bf26 100644
--- a/trunk/arch/arm/kernel/ptrace.c
+++ b/trunk/arch/arm/kernel/ptrace.c
@@ -396,7 +396,7 @@ static long ptrace_hbp_idx_to_num(int idx)
 /*
  * Handle hitting a HW-breakpoint.
  */
-static void ptrace_hbptriggered(struct perf_event *bp,
+static void ptrace_hbptriggered(struct perf_event *bp, int unused,
 				     struct perf_sample_data *data,
 				     struct pt_regs *regs)
 {
@@ -479,8 +479,7 @@ static struct perf_event *ptrace_hbp_create(struct task_struct *tsk, int type)
 	attr.bp_type	= type;
 	attr.disabled	= 1;
 
-	return register_user_hw_breakpoint(&attr, ptrace_hbptriggered, NULL,
-					   tsk);
+	return register_user_hw_breakpoint(&attr, ptrace_hbptriggered, tsk);
 }
 
 static int ptrace_gethbpregs(struct task_struct *tsk, long num,
diff --git a/trunk/arch/arm/kernel/smp.c b/trunk/arch/arm/kernel/smp.c
index e7f92a4321f3..344e52b16c8c 100644
--- a/trunk/arch/arm/kernel/smp.c
+++ b/trunk/arch/arm/kernel/smp.c
@@ -318,13 +318,9 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
 	smp_store_cpu_info(cpu);
 
 	/*
-	 * OK, now it's safe to let the boot CPU continue.  Wait for
-	 * the CPU migration code to notice that the CPU is online
-	 * before we continue.
+	 * OK, now it's safe to let the boot CPU continue
 	 */
 	set_cpu_online(cpu, true);
-	while (!cpu_active(cpu))
-		cpu_relax();
 
 	/*
 	 * OK, it's off to the idle thread for us
diff --git a/trunk/arch/arm/kernel/swp_emulate.c b/trunk/arch/arm/kernel/swp_emulate.c
index 5f452f8fde05..40ee7e5045e4 100644
--- a/trunk/arch/arm/kernel/swp_emulate.c
+++ b/trunk/arch/arm/kernel/swp_emulate.c
@@ -183,7 +183,7 @@ static int swp_handler(struct pt_regs *regs, unsigned int instr)
 	unsigned int address, destreg, data, type;
 	unsigned int res = 0;
 
-	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->ARM_pc);
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, regs->ARM_pc);
 
 	if (current->pid != previous_pid) {
 		pr_debug("\"%s\" (%ld) uses deprecated SWP{B} instruction\n",
diff --git a/trunk/arch/arm/mach-h720x/Kconfig b/trunk/arch/arm/mach-h720x/Kconfig
index abf356c02343..9b6982efbd22 100644
--- a/trunk/arch/arm/mach-h720x/Kconfig
+++ b/trunk/arch/arm/mach-h720x/Kconfig
@@ -6,14 +6,12 @@ config ARCH_H7201
 	bool "gms30c7201"
 	depends on ARCH_H720X
 	select CPU_H7201
-	select ZONE_DMA
 	help
 	  Say Y here if you are using the Hynix GMS30C7201 Reference Board
 
 config ARCH_H7202
 	bool "hms30c7202"
 	select CPU_H7202
-	select ZONE_DMA
 	depends on ARCH_H720X
 	help
 	  Say Y here if you are using the Hynix HMS30C7202 Reference Board
diff --git a/trunk/arch/arm/mm/fault.c b/trunk/arch/arm/mm/fault.c
index 9ea4f7ddd665..bc0e1d88fd3b 100644
--- a/trunk/arch/arm/mm/fault.c
+++ b/trunk/arch/arm/mm/fault.c
@@ -318,11 +318,11 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 	fault = __do_page_fault(mm, addr, fsr, tsk);
 	up_read(&mm->mmap_sem);
 
-	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, addr);
 	if (fault & VM_FAULT_MAJOR)
-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, addr);
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, addr);
 	else if (fault & VM_FAULT_MINOR)
-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, addr);
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, addr);
 
 	/*
 	 * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR
diff --git a/trunk/arch/arm/mm/proc-v7.S b/trunk/arch/arm/mm/proc-v7.S
index 089c0b5e454f..3c3867850a30 100644
--- a/trunk/arch/arm/mm/proc-v7.S
+++ b/trunk/arch/arm/mm/proc-v7.S
@@ -210,21 +210,19 @@ cpu_v7_name:
 
 /* Suspend/resume support: derived from arch/arm/mach-s5pv210/sleep.S */
 .globl	cpu_v7_suspend_size
-.equ	cpu_v7_suspend_size, 4 * 9
+.equ	cpu_v7_suspend_size, 4 * 8
 #ifdef CONFIG_PM_SLEEP
 ENTRY(cpu_v7_do_suspend)
 	stmfd	sp!, {r4 - r11, lr}
 	mrc	p15, 0, r4, c13, c0, 0	@ FCSE/PID
 	mrc	p15, 0, r5, c13, c0, 1	@ Context ID
-	mrc	p15, 0, r6, c13, c0, 3	@ User r/o thread ID
-	stmia	r0!, {r4 - r6}
 	mrc	p15, 0, r6, c3, c0, 0	@ Domain ID
 	mrc	p15, 0, r7, c2, c0, 0	@ TTB 0
 	mrc	p15, 0, r8, c2, c0, 1	@ TTB 1
 	mrc	p15, 0, r9, c1, c0, 0	@ Control register
 	mrc	p15, 0, r10, c1, c0, 1	@ Auxiliary control register
 	mrc	p15, 0, r11, c1, c0, 2	@ Co-processor access control
-	stmia	r0, {r6 - r11}
+	stmia	r0, {r4 - r11}
 	ldmfd	sp!, {r4 - r11, pc}
 ENDPROC(cpu_v7_do_suspend)
 
@@ -232,11 +230,9 @@ ENTRY(cpu_v7_do_resume)
 	mov	ip, #0
 	mcr	p15, 0, ip, c8, c7, 0	@ invalidate TLBs
 	mcr	p15, 0, ip, c7, c5, 0	@ invalidate I cache
-	ldmia	r0!, {r4 - r6}
+	ldmia	r0, {r4 - r11}
 	mcr	p15, 0, r4, c13, c0, 0	@ FCSE/PID
 	mcr	p15, 0, r5, c13, c0, 1	@ Context ID
-	mcr	p15, 0, r6, c13, c0, 3	@ User r/o thread ID
-	ldmia	r0, {r6 - r11}
 	mcr	p15, 0, r6, c3, c0, 0	@ Domain ID
 	mcr	p15, 0, r7, c2, c0, 0	@ TTB 0
 	mcr	p15, 0, r8, c2, c0, 1	@ TTB 1
@@ -422,9 +418,9 @@ ENTRY(v7_processor_functions)
 	.word	cpu_v7_dcache_clean_area
 	.word	cpu_v7_switch_mm
 	.word	cpu_v7_set_pte_ext
-	.word	cpu_v7_suspend_size
-	.word	cpu_v7_do_suspend
-	.word	cpu_v7_do_resume
+	.word	0
+	.word	0
+	.word	0
 	.size	v7_processor_functions, . - v7_processor_functions
 
 	.section ".rodata"
diff --git a/trunk/arch/arm/plat-iop/cp6.c b/trunk/arch/arm/plat-iop/cp6.c
index bab73e2c79db..9612a87e2a88 100644
--- a/trunk/arch/arm/plat-iop/cp6.c
+++ b/trunk/arch/arm/plat-iop/cp6.c
@@ -18,7 +18,6 @@
  */
 #include <linux/init.h>
 #include <asm/traps.h>
-#include <asm/ptrace.h>
 
 static int cp6_trap(struct pt_regs *regs, unsigned int instr)
 {
diff --git a/trunk/arch/m32r/include/asm/mmzone.h b/trunk/arch/m32r/include/asm/mmzone.h
index 115ced33febd..9f3b5accda88 100644
--- a/trunk/arch/m32r/include/asm/mmzone.h
+++ b/trunk/arch/m32r/include/asm/mmzone.h
@@ -14,6 +14,12 @@ extern struct pglist_data *node_data[];
 #define NODE_DATA(nid)		(node_data[nid])
 
 #define node_localnr(pfn, nid)	((pfn) - NODE_DATA(nid)->node_start_pfn)
+#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
+#define node_end_pfn(nid)						\
+({									\
+	pg_data_t *__pgdat = NODE_DATA(nid);				\
+	__pgdat->node_start_pfn + __pgdat->node_spanned_pages - 1;	\
+})
 
 #define pmd_page(pmd)		(pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
 /*
@@ -38,7 +44,7 @@ static __inline__ int pfn_to_nid(unsigned long pfn)
 	int node;
 
 	for (node = 0 ; node < MAX_NUMNODES ; node++)
-		if (pfn >= node_start_pfn(node) && pfn < node_end_pfn(node))
+		if (pfn >= node_start_pfn(node) && pfn <= node_end_pfn(node))
 			break;
 
 	return node;
diff --git a/trunk/arch/mips/include/asm/stacktrace.h b/trunk/arch/mips/include/asm/stacktrace.h
index 780ee2c2a2ac..0bf82818aa53 100644
--- a/trunk/arch/mips/include/asm/stacktrace.h
+++ b/trunk/arch/mips/include/asm/stacktrace.h
@@ -7,10 +7,6 @@
 extern int raw_show_trace;
 extern unsigned long unwind_stack(struct task_struct *task, unsigned long *sp,
 				  unsigned long pc, unsigned long *ra);
-extern unsigned long unwind_stack_by_address(unsigned long stack_page,
-					     unsigned long *sp,
-					     unsigned long pc,
-					     unsigned long *ra);
 #else
 #define raw_show_trace 1
 static inline unsigned long unwind_stack(struct task_struct *task,
diff --git a/trunk/arch/mips/kernel/perf_event.c b/trunk/arch/mips/kernel/perf_event.c
index d0deaab9ace2..a8244854d3dc 100644
--- a/trunk/arch/mips/kernel/perf_event.c
+++ b/trunk/arch/mips/kernel/perf_event.c
@@ -527,7 +527,7 @@ handle_associated_event(struct cpu_hw_events *cpuc,
 	if (!mipspmu_event_set_period(event, hwc, idx))
 		return;
 
-	if (perf_event_overflow(event, data, regs))
+	if (perf_event_overflow(event, 0, data, regs))
 		mipspmu->disable_event(idx);
 }
 
diff --git a/trunk/arch/mips/kernel/perf_event_mipsxx.c b/trunk/arch/mips/kernel/perf_event_mipsxx.c
index e5ad09a9baf7..75266ff4cc33 100644
--- a/trunk/arch/mips/kernel/perf_event_mipsxx.c
+++ b/trunk/arch/mips/kernel/perf_event_mipsxx.c
@@ -377,20 +377,6 @@ static const struct mips_perf_event mipsxxcore_cache_map
 		[C(RESULT_MISS)]	= { UNSUPPORTED_PERF_EVENT_ID },
 	},
 },
-[C(NODE)] = {
-	[C(OP_READ)] = {
-		[C(RESULT_ACCESS)]	= { UNSUPPORTED_PERF_EVENT_ID },
-		[C(RESULT_MISS)]	= { UNSUPPORTED_PERF_EVENT_ID },
-	},
-	[C(OP_WRITE)] = {
-		[C(RESULT_ACCESS)]	= { UNSUPPORTED_PERF_EVENT_ID },
-		[C(RESULT_MISS)]	= { UNSUPPORTED_PERF_EVENT_ID },
-	},
-	[C(OP_PREFETCH)] = {
-		[C(RESULT_ACCESS)]	= { UNSUPPORTED_PERF_EVENT_ID },
-		[C(RESULT_MISS)]	= { UNSUPPORTED_PERF_EVENT_ID },
-	},
-},
 };
 
 /* 74K core has completely different cache event map. */
@@ -494,20 +480,6 @@ static const struct mips_perf_event mipsxx74Kcore_cache_map
 		[C(RESULT_MISS)]	= { UNSUPPORTED_PERF_EVENT_ID },
 	},
 },
-[C(NODE)] = {
-	[C(OP_READ)] = {
-		[C(RESULT_ACCESS)]	= { UNSUPPORTED_PERF_EVENT_ID },
-		[C(RESULT_MISS)]	= { UNSUPPORTED_PERF_EVENT_ID },
-	},
-	[C(OP_WRITE)] = {
-		[C(RESULT_ACCESS)]	= { UNSUPPORTED_PERF_EVENT_ID },
-		[C(RESULT_MISS)]	= { UNSUPPORTED_PERF_EVENT_ID },
-	},
-	[C(OP_PREFETCH)] = {
-		[C(RESULT_ACCESS)]	= { UNSUPPORTED_PERF_EVENT_ID },
-		[C(RESULT_MISS)]	= { UNSUPPORTED_PERF_EVENT_ID },
-	},
-},
 };
 
 #ifdef CONFIG_MIPS_MT_SMP
diff --git a/trunk/arch/mips/kernel/process.c b/trunk/arch/mips/kernel/process.c
index c28fbe6107bc..d2112d3cf115 100644
--- a/trunk/arch/mips/kernel/process.c
+++ b/trunk/arch/mips/kernel/process.c
@@ -373,18 +373,18 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
 
 
 #ifdef CONFIG_KALLSYMS
-/* generic stack unwinding function */
-unsigned long notrace unwind_stack_by_address(unsigned long stack_page,
-					      unsigned long *sp,
-					      unsigned long pc,
-					      unsigned long *ra)
+/* used by show_backtrace() */
+unsigned long unwind_stack(struct task_struct *task, unsigned long *sp,
+			   unsigned long pc, unsigned long *ra)
 {
+	unsigned long stack_page;
 	struct mips_frame_info info;
 	unsigned long size, ofs;
 	int leaf;
 	extern void ret_from_irq(void);
 	extern void ret_from_exception(void);
 
+	stack_page = (unsigned long)task_stack_page(task);
 	if (!stack_page)
 		return 0;
 
@@ -443,15 +443,6 @@ unsigned long notrace unwind_stack_by_address(unsigned long stack_page,
 	*ra = 0;
 	return __kernel_text_address(pc) ? pc : 0;
 }
-EXPORT_SYMBOL(unwind_stack_by_address);
-
-/* used by show_backtrace() */
-unsigned long unwind_stack(struct task_struct *task, unsigned long *sp,
-			   unsigned long pc, unsigned long *ra)
-{
-	unsigned long stack_page = (unsigned long)task_stack_page(task);
-	return unwind_stack_by_address(stack_page, sp, pc, ra);
-}
 #endif
 
 /*
diff --git a/trunk/arch/mips/kernel/traps.c b/trunk/arch/mips/kernel/traps.c
index b7517e3abc85..e9b3af27d844 100644
--- a/trunk/arch/mips/kernel/traps.c
+++ b/trunk/arch/mips/kernel/traps.c
@@ -578,12 +578,12 @@ static int simulate_llsc(struct pt_regs *regs, unsigned int opcode)
 {
 	if ((opcode & OPCODE) == LL) {
 		perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,
-				1, regs, 0);
+				1, 0, regs, 0);
 		return simulate_ll(regs, opcode);
 	}
 	if ((opcode & OPCODE) == SC) {
 		perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,
-				1, regs, 0);
+				1, 0, regs, 0);
 		return simulate_sc(regs, opcode);
 	}
 
@@ -602,7 +602,7 @@ static int simulate_rdhwr(struct pt_regs *regs, unsigned int opcode)
 		int rd = (opcode & RD) >> 11;
 		int rt = (opcode & RT) >> 16;
 		perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,
-				1, regs, 0);
+				1, 0, regs, 0);
 		switch (rd) {
 		case 0:		/* CPU number */
 			regs->regs[rt] = smp_processor_id();
@@ -640,7 +640,7 @@ static int simulate_sync(struct pt_regs *regs, unsigned int opcode)
 {
 	if ((opcode & OPCODE) == SPEC0 && (opcode & FUNC) == SYNC) {
 		perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,
-				1, regs, 0);
+				1, 0, regs, 0);
 		return 0;
 	}
 
diff --git a/trunk/arch/mips/kernel/unaligned.c b/trunk/arch/mips/kernel/unaligned.c
index eb319b580353..cfea1adfa153 100644
--- a/trunk/arch/mips/kernel/unaligned.c
+++ b/trunk/arch/mips/kernel/unaligned.c
@@ -111,7 +111,8 @@ static void emulate_load_store_insn(struct pt_regs *regs,
 	unsigned long value;
 	unsigned int res;
 
-	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,
+		      1, 0, regs, 0);
 
 	/*
 	 * This load never faults.
@@ -516,7 +517,7 @@ asmlinkage void do_ade(struct pt_regs *regs)
 	mm_segment_t seg;
 
 	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS,
-			1, regs, regs->cp0_badvaddr);
+			1, 0, regs, regs->cp0_badvaddr);
 	/*
 	 * Did we catch a fault trying to load an instruction?
 	 * Or are we running in MIPS16 mode?
diff --git a/trunk/arch/mips/math-emu/cp1emu.c b/trunk/arch/mips/math-emu/cp1emu.c
index dbf2f93a5091..d32cb0503110 100644
--- a/trunk/arch/mips/math-emu/cp1emu.c
+++ b/trunk/arch/mips/math-emu/cp1emu.c
@@ -272,7 +272,8 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 	}
 
       emul:
-	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, xcp, 0);
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,
+			1, 0, xcp, 0);
 	MIPS_FPU_EMU_INC_STATS(emulated);
 	switch (MIPSInst_OPCODE(ir)) {
 	case ldc1_op:{
diff --git a/trunk/arch/mips/mm/fault.c b/trunk/arch/mips/mm/fault.c
index 937cf3368164..137ee76a0045 100644
--- a/trunk/arch/mips/mm/fault.c
+++ b/trunk/arch/mips/mm/fault.c
@@ -145,7 +145,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long writ
 	 * the fault.
 	 */
 	fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
-	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
@@ -154,10 +154,12 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long writ
 		BUG();
 	}
 	if (fault & VM_FAULT_MAJOR) {
-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
+				1, 0, regs, address);
 		tsk->maj_flt++;
 	} else {
-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
+				1, 0, regs, address);
 		tsk->min_flt++;
 	}
 
diff --git a/trunk/arch/mips/oprofile/Makefile b/trunk/arch/mips/oprofile/Makefile
index 29f2f13eb31c..4b9d7044e26c 100644
--- a/trunk/arch/mips/oprofile/Makefile
+++ b/trunk/arch/mips/oprofile/Makefile
@@ -8,7 +8,7 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
 		oprofilefs.o oprofile_stats.o \
 		timer_int.o )
 
-oprofile-y				:= $(DRIVER_OBJS) common.o backtrace.o
+oprofile-y				:= $(DRIVER_OBJS) common.o
 
 oprofile-$(CONFIG_CPU_MIPS32)		+= op_model_mipsxx.o
 oprofile-$(CONFIG_CPU_MIPS64)		+= op_model_mipsxx.o
diff --git a/trunk/arch/mips/oprofile/backtrace.c b/trunk/arch/mips/oprofile/backtrace.c
deleted file mode 100644
index 6854ed5097d2..000000000000
--- a/trunk/arch/mips/oprofile/backtrace.c
+++ /dev/null
@@ -1,175 +0,0 @@
-#include <linux/oprofile.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/uaccess.h>
-#include <asm/ptrace.h>
-#include <asm/stacktrace.h>
-#include <linux/stacktrace.h>
-#include <linux/kernel.h>
-#include <asm/sections.h>
-#include <asm/inst.h>
-
-struct stackframe {
-	unsigned long sp;
-	unsigned long pc;
-	unsigned long ra;
-};
-
-static inline int get_mem(unsigned long addr, unsigned long *result)
-{
-	unsigned long *address = (unsigned long *) addr;
-	if (!access_ok(VERIFY_READ, addr, sizeof(unsigned long)))
-		return -1;
-	if (__copy_from_user_inatomic(result, address, sizeof(unsigned long)))
-		return -3;
-	return 0;
-}
-
-/*
- * These two instruction helpers were taken from process.c
- */
-static inline int is_ra_save_ins(union mips_instruction *ip)
-{
-	/* sw / sd $ra, offset($sp) */
-	return (ip->i_format.opcode == sw_op || ip->i_format.opcode == sd_op)
-		&& ip->i_format.rs == 29 && ip->i_format.rt == 31;
-}
-
-static inline int is_sp_move_ins(union mips_instruction *ip)
-{
-	/* addiu/daddiu sp,sp,-imm */
-	if (ip->i_format.rs != 29 || ip->i_format.rt != 29)
-		return 0;
-	if (ip->i_format.opcode == addiu_op || ip->i_format.opcode == daddiu_op)
-		return 1;
-	return 0;
-}
-
-/*
- * Looks for specific instructions that mark the end of a function.
- * This usually means we ran into the code area of the previous function.
- */
-static inline int is_end_of_function_marker(union mips_instruction *ip)
-{
-	/* jr ra */
-	if (ip->r_format.func == jr_op && ip->r_format.rs == 31)
-		return 1;
-	/* lui gp */
-	if (ip->i_format.opcode == lui_op && ip->i_format.rt == 28)
-		return 1;
-	return 0;
-}
-
-/*
- * TODO for userspace stack unwinding:
- * - handle cases where the stack is adjusted inside a function
- *     (generally doesn't happen)
- * - find optimal value for max_instr_check
- * - try to find a way to handle leaf functions
- */
-
-static inline int unwind_user_frame(struct stackframe *old_frame,
-				    const unsigned int max_instr_check)
-{
-	struct stackframe new_frame = *old_frame;
-	off_t ra_offset = 0;
-	size_t stack_size = 0;
-	unsigned long addr;
-
-	if (old_frame->pc == 0 || old_frame->sp == 0 || old_frame->ra == 0)
-		return -9;
-
-	for (addr = new_frame.pc; (addr + max_instr_check > new_frame.pc)
-		&& (!ra_offset || !stack_size); --addr) {
-		union mips_instruction ip;
-
-		if (get_mem(addr, (unsigned long *) &ip))
-			return -11;
-
-		if (is_sp_move_ins(&ip)) {
-			int stack_adjustment = ip.i_format.simmediate;
-			if (stack_adjustment > 0)
-				/* This marks the end of the previous function,
-				   which means we overran. */
-				break;
-			stack_size = (unsigned) stack_adjustment;
-		} else if (is_ra_save_ins(&ip)) {
-			int ra_slot = ip.i_format.simmediate;
-			if (ra_slot < 0)
-				/* This shouldn't happen. */
-				break;
-			ra_offset = ra_slot;
-		} else if (is_end_of_function_marker(&ip))
-			break;
-	}
-
-	if (!ra_offset || !stack_size)
-		return -1;
-
-	if (ra_offset) {
-		new_frame.ra = old_frame->sp + ra_offset;
-		if (get_mem(new_frame.ra, &(new_frame.ra)))
-			return -13;
-	}
-
-	if (stack_size) {
-		new_frame.sp = old_frame->sp + stack_size;
-		if (get_mem(new_frame.sp, &(new_frame.sp)))
-			return -14;
-	}
-
-	if (new_frame.sp > old_frame->sp)
-		return -2;
-
-	new_frame.pc = old_frame->ra;
-	*old_frame = new_frame;
-
-	return 0;
-}
-
-static inline void do_user_backtrace(unsigned long low_addr,
-				     struct stackframe *frame,
-				     unsigned int depth)
-{
-	const unsigned int max_instr_check = 512;
-	const unsigned long high_addr = low_addr + THREAD_SIZE;
-
-	while (depth-- && !unwind_user_frame(frame, max_instr_check)) {
-		oprofile_add_trace(frame->ra);
-		if (frame->sp < low_addr || frame->sp > high_addr)
-			break;
-	}
-}
-
-#ifndef CONFIG_KALLSYMS
-static inline void do_kernel_backtrace(unsigned long low_addr,
-				       struct stackframe *frame,
-				       unsigned int depth) { }
-#else
-static inline void do_kernel_backtrace(unsigned long low_addr,
-				       struct stackframe *frame,
-				       unsigned int depth)
-{
-	while (depth-- && frame->pc) {
-		frame->pc = unwind_stack_by_address(low_addr,
-						    &(frame->sp),
-						    frame->pc,
-						    &(frame->ra));
-		oprofile_add_trace(frame->ra);
-	}
-}
-#endif
-
-void notrace op_mips_backtrace(struct pt_regs *const regs, unsigned int depth)
-{
-	struct stackframe frame = { .sp = regs->regs[29],
-				    .pc = regs->cp0_epc,
-				    .ra = regs->regs[31] };
-	const int userspace = user_mode(regs);
-	const unsigned long low_addr = ALIGN(frame.sp, THREAD_SIZE);
-
-	if (userspace)
-		do_user_backtrace(low_addr, &frame, depth);
-	else
-		do_kernel_backtrace(low_addr, &frame, depth);
-}
diff --git a/trunk/arch/mips/oprofile/common.c b/trunk/arch/mips/oprofile/common.c
index d1f2d4c52d42..f9eb1aba6345 100644
--- a/trunk/arch/mips/oprofile/common.c
+++ b/trunk/arch/mips/oprofile/common.c
@@ -115,7 +115,6 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 	ops->start		= op_mips_start;
 	ops->stop		= op_mips_stop;
 	ops->cpu_type		= lmodel->cpu_type;
-	ops->backtrace		= op_mips_backtrace;
 
 	printk(KERN_INFO "oprofile: using %s performance monitoring.\n",
 	       lmodel->cpu_type);
diff --git a/trunk/arch/mips/oprofile/op_impl.h b/trunk/arch/mips/oprofile/op_impl.h
index 7c2da27ece04..f04b54fb37d1 100644
--- a/trunk/arch/mips/oprofile/op_impl.h
+++ b/trunk/arch/mips/oprofile/op_impl.h
@@ -36,6 +36,4 @@ struct op_mips_model {
 	unsigned char num_counters;
 };
 
-void op_mips_backtrace(struct pt_regs * const regs, unsigned int depth);
-
 #endif
diff --git a/trunk/arch/mn10300/include/asm/uaccess.h b/trunk/arch/mn10300/include/asm/uaccess.h
index 780560b330d9..3d6e60dad9d9 100644
--- a/trunk/arch/mn10300/include/asm/uaccess.h
+++ b/trunk/arch/mn10300/include/asm/uaccess.h
@@ -15,7 +15,6 @@
  * User space memory access functions
  */
 #include <linux/thread_info.h>
-#include <linux/kernel.h>
 #include <asm/page.h>
 #include <asm/errno.h>
 
diff --git a/trunk/arch/parisc/include/asm/mmzone.h b/trunk/arch/parisc/include/asm/mmzone.h
index e67eb9c3d1bf..9608d2cf214a 100644
--- a/trunk/arch/parisc/include/asm/mmzone.h
+++ b/trunk/arch/parisc/include/asm/mmzone.h
@@ -14,6 +14,13 @@ extern struct node_map_data node_data[];
 
 #define NODE_DATA(nid)          (&node_data[nid].pg_data)
 
+#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
+#define node_end_pfn(nid)						\
+({									\
+	pg_data_t *__pgdat = NODE_DATA(nid);				\
+	__pgdat->node_start_pfn + __pgdat->node_spanned_pages;		\
+})
+
 /* We have these possible memory map layouts:
  * Astro: 0-3.75, 67.75-68, 4-64
  * zx1: 0-1, 257-260, 4-256
diff --git a/trunk/arch/powerpc/include/asm/emulated_ops.h b/trunk/arch/powerpc/include/asm/emulated_ops.h
index 2cc41c715d2b..45921672b97a 100644
--- a/trunk/arch/powerpc/include/asm/emulated_ops.h
+++ b/trunk/arch/powerpc/include/asm/emulated_ops.h
@@ -78,14 +78,14 @@ extern void ppc_warn_emulated_print(const char *type);
 #define PPC_WARN_EMULATED(type, regs)					\
 	do {								\
 		perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,		\
-			1, regs, 0);					\
+			1, 0, regs, 0);					\
 		__PPC_WARN_EMULATED(type);				\
 	} while (0)
 
 #define PPC_WARN_ALIGNMENT(type, regs)					\
 	do {								\
 		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS,		\
-			1, regs, regs->dar);				\
+			1, 0, regs, regs->dar);				\
 		__PPC_WARN_EMULATED(type);				\
 	} while (0)
 
diff --git a/trunk/arch/powerpc/include/asm/hw_breakpoint.h b/trunk/arch/powerpc/include/asm/hw_breakpoint.h
index 80fd4d2b4a62..1c33ec17ca36 100644
--- a/trunk/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/trunk/arch/powerpc/include/asm/hw_breakpoint.h
@@ -57,7 +57,7 @@ void hw_breakpoint_pmu_read(struct perf_event *bp);
 extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
 
 extern struct pmu perf_ops_bp;
-extern void ptrace_triggered(struct perf_event *bp,
+extern void ptrace_triggered(struct perf_event *bp, int nmi,
 			struct perf_sample_data *data, struct pt_regs *regs);
 static inline void hw_breakpoint_disable(void)
 {
diff --git a/trunk/arch/powerpc/include/asm/mmzone.h b/trunk/arch/powerpc/include/asm/mmzone.h
index 7b589178be46..fd3fd58bad84 100644
--- a/trunk/arch/powerpc/include/asm/mmzone.h
+++ b/trunk/arch/powerpc/include/asm/mmzone.h
@@ -38,6 +38,13 @@ u64 memory_hotplug_max(void);
 #define memory_hotplug_max() memblock_end_of_DRAM()
 #endif
 
+/*
+ * Following are macros that each numa implmentation must define.
+ */
+
+#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
+#define node_end_pfn(nid)	(NODE_DATA(nid)->node_end_pfn)
+
 #else
 #define memory_hotplug_max() memblock_end_of_DRAM()
 #endif /* CONFIG_NEED_MULTIPLE_NODES */
diff --git a/trunk/arch/powerpc/kernel/e500-pmu.c b/trunk/arch/powerpc/kernel/e500-pmu.c
index cb2e2949c8d1..b150b510510f 100644
--- a/trunk/arch/powerpc/kernel/e500-pmu.c
+++ b/trunk/arch/powerpc/kernel/e500-pmu.c
@@ -75,11 +75,6 @@ static int e500_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 		[C(OP_WRITE)] = {	-1,		-1	},
 		[C(OP_PREFETCH)] = {	-1,		-1	},
 	},
-	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
-		[C(OP_READ)] = {	-1,		-1 	},
-		[C(OP_WRITE)] = {	-1,		-1	},
-		[C(OP_PREFETCH)] = {	-1,		-1	},
-	},
 };
 
 static int num_events = 128;
diff --git a/trunk/arch/powerpc/kernel/mpc7450-pmu.c b/trunk/arch/powerpc/kernel/mpc7450-pmu.c
index 845a58478890..2cc5e0301d0b 100644
--- a/trunk/arch/powerpc/kernel/mpc7450-pmu.c
+++ b/trunk/arch/powerpc/kernel/mpc7450-pmu.c
@@ -388,11 +388,6 @@ static int mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 		[C(OP_WRITE)] = {	-1,		-1	},
 		[C(OP_PREFETCH)] = {	-1,		-1	},
 	},
-	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
-		[C(OP_READ)] = {	-1,		-1	},
-		[C(OP_WRITE)] = {	-1,		-1	},
-		[C(OP_PREFETCH)] = {	-1,		-1	},
-	},
 };
 
 struct power_pmu mpc7450_pmu = {
diff --git a/trunk/arch/powerpc/kernel/perf_event.c b/trunk/arch/powerpc/kernel/perf_event.c
index 14967de98876..822f63008ae1 100644
--- a/trunk/arch/powerpc/kernel/perf_event.c
+++ b/trunk/arch/powerpc/kernel/perf_event.c
@@ -1207,7 +1207,7 @@ struct pmu power_pmu = {
  * here so there is no possibility of being interrupted.
  */
 static void record_and_restart(struct perf_event *event, unsigned long val,
-			       struct pt_regs *regs)
+			       struct pt_regs *regs, int nmi)
 {
 	u64 period = event->hw.sample_period;
 	s64 prev, delta, left;
@@ -1258,7 +1258,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 		if (event->attr.sample_type & PERF_SAMPLE_ADDR)
 			perf_get_data_addr(regs, &data.addr);
 
-		if (perf_event_overflow(event, &data, regs))
+		if (perf_event_overflow(event, nmi, &data, regs))
 			power_pmu_stop(event, 0);
 	}
 }
@@ -1346,7 +1346,7 @@ static void perf_event_interrupt(struct pt_regs *regs)
 		if ((int)val < 0) {
 			/* event has overflowed */
 			found = 1;
-			record_and_restart(event, val, regs);
+			record_and_restart(event, val, regs, nmi);
 		}
 	}
 
diff --git a/trunk/arch/powerpc/kernel/perf_event_fsl_emb.c b/trunk/arch/powerpc/kernel/perf_event_fsl_emb.c
index 0a6d2a9d569c..b0dc8f7069cd 100644
--- a/trunk/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/trunk/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -568,7 +568,7 @@ static struct pmu fsl_emb_pmu = {
  * here so there is no possibility of being interrupted.
  */
 static void record_and_restart(struct perf_event *event, unsigned long val,
-			       struct pt_regs *regs)
+			       struct pt_regs *regs, int nmi)
 {
 	u64 period = event->hw.sample_period;
 	s64 prev, delta, left;
@@ -616,7 +616,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 		perf_sample_data_init(&data, 0);
 		data.period = event->hw.last_period;
 
-		if (perf_event_overflow(event, &data, regs))
+		if (perf_event_overflow(event, nmi, &data, regs))
 			fsl_emb_pmu_stop(event, 0);
 	}
 }
@@ -644,7 +644,7 @@ static void perf_event_interrupt(struct pt_regs *regs)
 			if (event) {
 				/* event has overflowed */
 				found = 1;
-				record_and_restart(event, val, regs);
+				record_and_restart(event, val, regs, nmi);
 			} else {
 				/*
 				 * Disabled counter is negative,
diff --git a/trunk/arch/powerpc/kernel/power4-pmu.c b/trunk/arch/powerpc/kernel/power4-pmu.c
index e9dbc2d35c9c..ead8b3c2649e 100644
--- a/trunk/arch/powerpc/kernel/power4-pmu.c
+++ b/trunk/arch/powerpc/kernel/power4-pmu.c
@@ -587,11 +587,6 @@ static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 		[C(OP_WRITE)] = {	-1,		-1	},
 		[C(OP_PREFETCH)] = {	-1,		-1	},
 	},
-	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
-		[C(OP_READ)] = {	-1,		-1	},
-		[C(OP_WRITE)] = {	-1,		-1	},
-		[C(OP_PREFETCH)] = {	-1,		-1	},
-	},
 };
 
 static struct power_pmu power4_pmu = {
diff --git a/trunk/arch/powerpc/kernel/power5+-pmu.c b/trunk/arch/powerpc/kernel/power5+-pmu.c
index f58a2bd41b59..eca0ac595cb6 100644
--- a/trunk/arch/powerpc/kernel/power5+-pmu.c
+++ b/trunk/arch/powerpc/kernel/power5+-pmu.c
@@ -653,11 +653,6 @@ static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 		[C(OP_WRITE)] = {	-1,		-1		},
 		[C(OP_PREFETCH)] = {	-1,		-1		},
 	},
-	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
-		[C(OP_READ)] = {	-1,		-1		},
-		[C(OP_WRITE)] = {	-1,		-1		},
-		[C(OP_PREFETCH)] = {	-1,		-1		},
-	},
 };
 
 static struct power_pmu power5p_pmu = {
diff --git a/trunk/arch/powerpc/kernel/power5-pmu.c b/trunk/arch/powerpc/kernel/power5-pmu.c
index b1acab684142..d5ff0f64a5e6 100644
--- a/trunk/arch/powerpc/kernel/power5-pmu.c
+++ b/trunk/arch/powerpc/kernel/power5-pmu.c
@@ -595,11 +595,6 @@ static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 		[C(OP_WRITE)] = {	-1,		-1		},
 		[C(OP_PREFETCH)] = {	-1,		-1		},
 	},
-	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
-		[C(OP_READ)] = {	-1,		-1		},
-		[C(OP_WRITE)] = {	-1,		-1		},
-		[C(OP_PREFETCH)] = {	-1,		-1		},
-	},
 };
 
 static struct power_pmu power5_pmu = {
diff --git a/trunk/arch/powerpc/kernel/power6-pmu.c b/trunk/arch/powerpc/kernel/power6-pmu.c
index b24a3a23d073..31603927e376 100644
--- a/trunk/arch/powerpc/kernel/power6-pmu.c
+++ b/trunk/arch/powerpc/kernel/power6-pmu.c
@@ -516,11 +516,6 @@ static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 		[C(OP_WRITE)] = {	-1,		-1		},
 		[C(OP_PREFETCH)] = {	-1,		-1		},
 	},
-	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
-		[C(OP_READ)] = {	-1,		-1		},
-		[C(OP_WRITE)] = {	-1,		-1		},
-		[C(OP_PREFETCH)] = {	-1,		-1		},
-	},
 };
 
 static struct power_pmu power6_pmu = {
diff --git a/trunk/arch/powerpc/kernel/power7-pmu.c b/trunk/arch/powerpc/kernel/power7-pmu.c
index 6d9dccb2ea59..593740fcb799 100644
--- a/trunk/arch/powerpc/kernel/power7-pmu.c
+++ b/trunk/arch/powerpc/kernel/power7-pmu.c
@@ -342,11 +342,6 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 		[C(OP_WRITE)] = {	-1,		-1	},
 		[C(OP_PREFETCH)] = {	-1,		-1	},
 	},
-	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
-		[C(OP_READ)] = {	-1,		-1	},
-		[C(OP_WRITE)] = {	-1,		-1	},
-		[C(OP_PREFETCH)] = {	-1,		-1	},
-	},
 };
 
 static struct power_pmu power7_pmu = {
diff --git a/trunk/arch/powerpc/kernel/ppc970-pmu.c b/trunk/arch/powerpc/kernel/ppc970-pmu.c
index b121de9658eb..9a6e093858fe 100644
--- a/trunk/arch/powerpc/kernel/ppc970-pmu.c
+++ b/trunk/arch/powerpc/kernel/ppc970-pmu.c
@@ -467,11 +467,6 @@ static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 		[C(OP_WRITE)] = {	-1,		-1	},
 		[C(OP_PREFETCH)] = {	-1,		-1	},
 	},
-	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
-		[C(OP_READ)] = {	-1,		-1	},
-		[C(OP_WRITE)] = {	-1,		-1	},
-		[C(OP_PREFETCH)] = {	-1,		-1	},
-	},
 };
 
 static struct power_pmu ppc970_pmu = {
diff --git a/trunk/arch/powerpc/kernel/ptrace.c b/trunk/arch/powerpc/kernel/ptrace.c
index 05b7dd217f60..cb22024f2b42 100644
--- a/trunk/arch/powerpc/kernel/ptrace.c
+++ b/trunk/arch/powerpc/kernel/ptrace.c
@@ -882,7 +882,7 @@ void user_disable_single_step(struct task_struct *task)
 }
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
-void ptrace_triggered(struct perf_event *bp,
+void ptrace_triggered(struct perf_event *bp, int nmi,
 		      struct perf_sample_data *data, struct pt_regs *regs)
 {
 	struct perf_event_attr attr;
@@ -973,7 +973,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 								&attr.bp_type);
 
 	thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr,
-					       ptrace_triggered, NULL, task);
+							ptrace_triggered, task);
 	if (IS_ERR(bp)) {
 		thread->ptrace_bps[0] = NULL;
 		ptrace_put_breakpoints(task);
diff --git a/trunk/arch/powerpc/kernel/time.c b/trunk/arch/powerpc/kernel/time.c
index 03b29a6759ab..f33acfd872ad 100644
--- a/trunk/arch/powerpc/kernel/time.c
+++ b/trunk/arch/powerpc/kernel/time.c
@@ -544,7 +544,7 @@ DEFINE_PER_CPU(u8, irq_work_pending);
 
 #endif /* 32 vs 64 bit */
 
-void arch_irq_work_raise(void)
+void set_irq_work_pending(void)
 {
 	preempt_disable();
 	set_irq_work_pending_flag();
diff --git a/trunk/arch/powerpc/mm/fault.c b/trunk/arch/powerpc/mm/fault.c
index dbc48254c6cc..54f4fb994e99 100644
--- a/trunk/arch/powerpc/mm/fault.c
+++ b/trunk/arch/powerpc/mm/fault.c
@@ -173,7 +173,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
 		die("Weird page fault", regs, SIGSEGV);
 	}
 
-	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
@@ -319,7 +319,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
 	}
 	if (ret & VM_FAULT_MAJOR) {
 		current->maj_flt++;
-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 				     regs, address);
 #ifdef CONFIG_PPC_SMLPAR
 		if (firmware_has_feature(FW_FEATURE_CMO)) {
@@ -330,7 +330,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
 #endif
 	} else {
 		current->min_flt++;
-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 				     regs, address);
 	}
 	up_read(&mm->mmap_sem);
diff --git a/trunk/arch/s390/Kconfig b/trunk/arch/s390/Kconfig
index c03fef7a9c22..90d77bd078f5 100644
--- a/trunk/arch/s390/Kconfig
+++ b/trunk/arch/s390/Kconfig
@@ -579,7 +579,6 @@ config S390_GUEST
 	def_bool y
 	prompt "s390 guest support for KVM (EXPERIMENTAL)"
 	depends on 64BIT && EXPERIMENTAL
-	select VIRTUALIZATION
 	select VIRTIO
 	select VIRTIO_RING
 	select VIRTIO_CONSOLE
diff --git a/trunk/arch/s390/kernel/smp.c b/trunk/arch/s390/kernel/smp.c
index 1d55c95f617c..52420d2785b3 100644
--- a/trunk/arch/s390/kernel/smp.c
+++ b/trunk/arch/s390/kernel/smp.c
@@ -262,7 +262,7 @@ void smp_ctl_set_bit(int cr, int bit)
 
 	memset(&parms.orvals, 0, sizeof(parms.orvals));
 	memset(&parms.andvals, 0xff, sizeof(parms.andvals));
-	parms.orvals[cr] = 1UL << bit;
+	parms.orvals[cr] = 1 << bit;
 	on_each_cpu(smp_ctl_bit_callback, &parms, 1);
 }
 EXPORT_SYMBOL(smp_ctl_set_bit);
@@ -276,7 +276,7 @@ void smp_ctl_clear_bit(int cr, int bit)
 
 	memset(&parms.orvals, 0, sizeof(parms.orvals));
 	memset(&parms.andvals, 0xff, sizeof(parms.andvals));
-	parms.andvals[cr] = ~(1UL << bit);
+	parms.andvals[cr] = ~(1L << bit);
 	on_each_cpu(smp_ctl_bit_callback, &parms, 1);
 }
 EXPORT_SYMBOL(smp_ctl_clear_bit);
diff --git a/trunk/arch/s390/mm/fault.c b/trunk/arch/s390/mm/fault.c
index 095f782a5512..fe103e891e7a 100644
--- a/trunk/arch/s390/mm/fault.c
+++ b/trunk/arch/s390/mm/fault.c
@@ -299,7 +299,7 @@ static inline int do_exception(struct pt_regs *regs, int access,
 		goto out;
 
 	address = trans_exc_code & __FAIL_ADDR_MASK;
-	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 	flags = FAULT_FLAG_ALLOW_RETRY;
 	if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
 		flags |= FAULT_FLAG_WRITE;
@@ -345,11 +345,11 @@ static inline int do_exception(struct pt_regs *regs, int access,
 	if (flags & FAULT_FLAG_ALLOW_RETRY) {
 		if (fault & VM_FAULT_MAJOR) {
 			tsk->maj_flt++;
-			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 				      regs, address);
 		} else {
 			tsk->min_flt++;
-			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 				      regs, address);
 		}
 		if (fault & VM_FAULT_RETRY) {
diff --git a/trunk/arch/s390/oprofile/init.c b/trunk/arch/s390/oprofile/init.c
index 0e358c2cffeb..5995e9bc72d9 100644
--- a/trunk/arch/s390/oprofile/init.c
+++ b/trunk/arch/s390/oprofile/init.c
@@ -25,7 +25,7 @@ extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
 
 #include "hwsampler.h"
 
-#define DEFAULT_INTERVAL	4127518
+#define DEFAULT_INTERVAL	4096
 
 #define DEFAULT_SDBT_BLOCKS	1
 #define DEFAULT_SDB_BLOCKS	511
@@ -151,12 +151,6 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops)
 	if (oprofile_max_interval == 0)
 		return -ENODEV;
 
-	/* The initial value should be sane */
-	if (oprofile_hw_interval < oprofile_min_interval)
-		oprofile_hw_interval = oprofile_min_interval;
-	if (oprofile_hw_interval > oprofile_max_interval)
-		oprofile_hw_interval = oprofile_max_interval;
-
 	if (oprofile_timer_init(ops))
 		return -ENODEV;
 
diff --git a/trunk/arch/sh/include/asm/mmzone.h b/trunk/arch/sh/include/asm/mmzone.h
index 15a8496960e6..8887baff5eff 100644
--- a/trunk/arch/sh/include/asm/mmzone.h
+++ b/trunk/arch/sh/include/asm/mmzone.h
@@ -9,6 +9,10 @@
 extern struct pglist_data *node_data[];
 #define NODE_DATA(nid)		(node_data[nid])
 
+#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
+#define node_end_pfn(nid)	(NODE_DATA(nid)->node_start_pfn + \
+				 NODE_DATA(nid)->node_spanned_pages)
+
 static inline int pfn_to_nid(unsigned long pfn)
 {
 	int nid;
diff --git a/trunk/arch/sh/kernel/cpu/sh4/perf_event.c b/trunk/arch/sh/kernel/cpu/sh4/perf_event.c
index fa4f724b295a..748955df018d 100644
--- a/trunk/arch/sh/kernel/cpu/sh4/perf_event.c
+++ b/trunk/arch/sh/kernel/cpu/sh4/perf_event.c
@@ -180,21 +180,6 @@ static const int sh7750_cache_events
 			[ C(RESULT_MISS)   ] = -1,
 		},
 	},
-
-	[ C(NODE) ] = {
-		[ C(OP_READ) ] = {
-			[ C(RESULT_ACCESS) ] = -1,
-			[ C(RESULT_MISS)   ] = -1,
-		},
-		[ C(OP_WRITE) ] = {
-			[ C(RESULT_ACCESS) ] = -1,
-			[ C(RESULT_MISS)   ] = -1,
-		},
-		[ C(OP_PREFETCH) ] = {
-			[ C(RESULT_ACCESS) ] = -1,
-			[ C(RESULT_MISS)   ] = -1,
-		},
-	},
 };
 
 static int sh7750_event_map(int event)
diff --git a/trunk/arch/sh/kernel/cpu/sh4a/perf_event.c b/trunk/arch/sh/kernel/cpu/sh4a/perf_event.c
index 84a2c396ceee..17e6bebfede0 100644
--- a/trunk/arch/sh/kernel/cpu/sh4a/perf_event.c
+++ b/trunk/arch/sh/kernel/cpu/sh4a/perf_event.c
@@ -205,21 +205,6 @@ static const int sh4a_cache_events
 			[ C(RESULT_MISS)   ] = -1,
 		},
 	},
-
-	[ C(NODE) ] = {
-		[ C(OP_READ) ] = {
-			[ C(RESULT_ACCESS) ] = -1,
-			[ C(RESULT_MISS)   ] = -1,
-		},
-		[ C(OP_WRITE) ] = {
-			[ C(RESULT_ACCESS) ] = -1,
-			[ C(RESULT_MISS)   ] = -1,
-		},
-		[ C(OP_PREFETCH) ] = {
-			[ C(RESULT_ACCESS) ] = -1,
-			[ C(RESULT_MISS)   ] = -1,
-		},
-	},
 };
 
 static int sh4a_event_map(int event)
diff --git a/trunk/arch/sh/kernel/ptrace_32.c b/trunk/arch/sh/kernel/ptrace_32.c
index 92b3c276339a..3d7b209b2178 100644
--- a/trunk/arch/sh/kernel/ptrace_32.c
+++ b/trunk/arch/sh/kernel/ptrace_32.c
@@ -63,7 +63,7 @@ static inline int put_stack_long(struct task_struct *task, int offset,
 	return 0;
 }
 
-void ptrace_triggered(struct perf_event *bp,
+void ptrace_triggered(struct perf_event *bp, int nmi,
 		      struct perf_sample_data *data, struct pt_regs *regs)
 {
 	struct perf_event_attr attr;
@@ -91,8 +91,7 @@ static int set_single_step(struct task_struct *tsk, unsigned long addr)
 		attr.bp_len = HW_BREAKPOINT_LEN_2;
 		attr.bp_type = HW_BREAKPOINT_R;
 
-		bp = register_user_hw_breakpoint(&attr, ptrace_triggered,
-						 NULL, tsk);
+		bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk);
 		if (IS_ERR(bp))
 			return PTR_ERR(bp);
 
diff --git a/trunk/arch/sh/kernel/traps_32.c b/trunk/arch/sh/kernel/traps_32.c
index d9006f8ffc14..b51a17104b5f 100644
--- a/trunk/arch/sh/kernel/traps_32.c
+++ b/trunk/arch/sh/kernel/traps_32.c
@@ -393,7 +393,7 @@ int handle_unaligned_access(insn_size_t instruction, struct pt_regs *regs,
 	 */
 	if (!expected) {
 		unaligned_fixups_notify(current, instruction, regs);
-		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1,
+		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0,
 			      regs, address);
 	}
 
diff --git a/trunk/arch/sh/kernel/traps_64.c b/trunk/arch/sh/kernel/traps_64.c
index 67110be83fd7..6713ca97e553 100644
--- a/trunk/arch/sh/kernel/traps_64.c
+++ b/trunk/arch/sh/kernel/traps_64.c
@@ -434,7 +434,7 @@ static int misaligned_load(struct pt_regs *regs,
 		return error;
 	}
 
-	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, address);
+	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, address);
 
 	destreg = (opcode >> 4) & 0x3f;
 	if (user_mode(regs)) {
@@ -512,7 +512,7 @@ static int misaligned_store(struct pt_regs *regs,
 		return error;
 	}
 
-	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, address);
+	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, address);
 
 	srcreg = (opcode >> 4) & 0x3f;
 	if (user_mode(regs)) {
@@ -588,7 +588,7 @@ static int misaligned_fpu_load(struct pt_regs *regs,
 		return error;
 	}
 
-	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, address);
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, address);
 
 	destreg = (opcode >> 4) & 0x3f;
 	if (user_mode(regs)) {
@@ -665,7 +665,7 @@ static int misaligned_fpu_store(struct pt_regs *regs,
 		return error;
 	}
 
-	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, address);
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, address);
 
 	srcreg = (opcode >> 4) & 0x3f;
 	if (user_mode(regs)) {
diff --git a/trunk/arch/sh/math-emu/math.c b/trunk/arch/sh/math-emu/math.c
index 977195210653..f76a5090d5d1 100644
--- a/trunk/arch/sh/math-emu/math.c
+++ b/trunk/arch/sh/math-emu/math.c
@@ -620,7 +620,7 @@ int do_fpu_inst(unsigned short inst, struct pt_regs *regs)
 	struct task_struct *tsk = current;
 	struct sh_fpu_soft_struct *fpu = &(tsk->thread.xstate->softfpu);
 
-	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0);
 
 	if (!(task_thread_info(tsk)->status & TS_USEDFPU)) {
 		/* initialize once. */
diff --git a/trunk/arch/sh/mm/fault_32.c b/trunk/arch/sh/mm/fault_32.c
index 7bebd044f2a1..d4c34d757f0d 100644
--- a/trunk/arch/sh/mm/fault_32.c
+++ b/trunk/arch/sh/mm/fault_32.c
@@ -160,7 +160,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 	if ((regs->sr & SR_IMASK) != SR_IMASK)
 		local_irq_enable();
 
-	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 
 	/*
 	 * If we're in an interrupt, have no user context or are running
@@ -210,11 +210,11 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 	}
 	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 				     regs, address);
 	} else {
 		tsk->min_flt++;
-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 				     regs, address);
 	}
 
diff --git a/trunk/arch/sh/mm/tlbflush_64.c b/trunk/arch/sh/mm/tlbflush_64.c
index e3430e093d43..7f5810f5dfdc 100644
--- a/trunk/arch/sh/mm/tlbflush_64.c
+++ b/trunk/arch/sh/mm/tlbflush_64.c
@@ -116,7 +116,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
 	/* Not an IO address, so reenable interrupts */
 	local_irq_enable();
 
-	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 
 	/*
 	 * If we're in an interrupt or have no user
@@ -200,11 +200,11 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
 
 	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 				     regs, address);
 	} else {
 		tsk->min_flt++;
-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 				     regs, address);
 	}
 
diff --git a/trunk/arch/sparc/include/asm/mmzone.h b/trunk/arch/sparc/include/asm/mmzone.h
index 99d9b9f577bf..e8c648741ed4 100644
--- a/trunk/arch/sparc/include/asm/mmzone.h
+++ b/trunk/arch/sparc/include/asm/mmzone.h
@@ -8,6 +8,8 @@
 extern struct pglist_data *node_data[];
 
 #define NODE_DATA(nid)		(node_data[nid])
+#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
+#define node_end_pfn(nid)	(NODE_DATA(nid)->node_end_pfn)
 
 extern int numa_cpu_lookup_table[];
 extern cpumask_t numa_cpumask_lookup_table[];
diff --git a/trunk/arch/sparc/kernel/perf_event.c b/trunk/arch/sparc/kernel/perf_event.c
index 62a034318b18..2cb0e1c001e2 100644
--- a/trunk/arch/sparc/kernel/perf_event.c
+++ b/trunk/arch/sparc/kernel/perf_event.c
@@ -246,20 +246,6 @@ static const cache_map_t ultra3_cache_map = {
 		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
 	},
 },
-[C(NODE)] = {
-	[C(OP_READ)] = {
-		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
-		[C(RESULT_MISS)  ] = { CACHE_OP_UNSUPPORTED },
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
-		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
-		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
-	},
-},
 };
 
 static const struct sparc_pmu ultra3_pmu = {
@@ -375,20 +361,6 @@ static const cache_map_t niagara1_cache_map = {
 		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
 	},
 },
-[C(NODE)] = {
-	[C(OP_READ)] = {
-		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
-		[C(RESULT_MISS)  ] = { CACHE_OP_UNSUPPORTED },
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
-		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
-		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
-	},
-},
 };
 
 static const struct sparc_pmu niagara1_pmu = {
@@ -501,20 +473,6 @@ static const cache_map_t niagara2_cache_map = {
 		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
 	},
 },
-[C(NODE)] = {
-	[C(OP_READ)] = {
-		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
-		[C(RESULT_MISS)  ] = { CACHE_OP_UNSUPPORTED },
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
-		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
-		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
-	},
-},
 };
 
 static const struct sparc_pmu niagara2_pmu = {
@@ -1319,7 +1277,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
 		if (!sparc_perf_event_set_period(event, hwc, idx))
 			continue;
 
-		if (perf_event_overflow(event, &data, regs))
+		if (perf_event_overflow(event, 1, &data, regs))
 			sparc_pmu_stop(event, 0);
 	}
 
diff --git a/trunk/arch/sparc/kernel/unaligned_32.c b/trunk/arch/sparc/kernel/unaligned_32.c
index 7efbb2f9e77f..4491f4cb2695 100644
--- a/trunk/arch/sparc/kernel/unaligned_32.c
+++ b/trunk/arch/sparc/kernel/unaligned_32.c
@@ -247,7 +247,7 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn)
 		unsigned long addr = compute_effective_address(regs, insn);
 		int err;
 
-		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
+		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, addr);
 		switch (dir) {
 		case load:
 			err = do_int_load(fetch_reg_addr(((insn>>25)&0x1f),
@@ -338,7 +338,7 @@ asmlinkage void user_unaligned_trap(struct pt_regs *regs, unsigned int insn)
 		}
 
 		addr = compute_effective_address(regs, insn);
-		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
+		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, addr);
 		switch(dir) {
 		case load:
 			err = do_int_load(fetch_reg_addr(((insn>>25)&0x1f),
diff --git a/trunk/arch/sparc/kernel/unaligned_64.c b/trunk/arch/sparc/kernel/unaligned_64.c
index 35cff1673aa4..b2b019ea8caa 100644
--- a/trunk/arch/sparc/kernel/unaligned_64.c
+++ b/trunk/arch/sparc/kernel/unaligned_64.c
@@ -317,7 +317,7 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn)
 
 		addr = compute_effective_address(regs, insn,
 						 ((insn >> 25) & 0x1f));
-		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
+		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, addr);
 		switch (asi) {
 		case ASI_NL:
 		case ASI_AIUPL:
@@ -384,7 +384,7 @@ int handle_popc(u32 insn, struct pt_regs *regs)
 	int ret, i, rd = ((insn >> 25) & 0x1f);
 	int from_kernel = (regs->tstate & TSTATE_PRIV) != 0;
 	                        
-	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0);
 	if (insn & 0x2000) {
 		maybe_flush_windows(0, 0, rd, from_kernel);
 		value = sign_extend_imm13(insn);
@@ -431,7 +431,7 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs)
 	int asi = decode_asi(insn, regs);
 	int flag = (freg < 32) ? FPRS_DL : FPRS_DU;
 
-	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0);
 
 	save_and_clear_fpu();
 	current_thread_info()->xfsr[0] &= ~0x1c000;
@@ -554,7 +554,7 @@ void handle_ld_nf(u32 insn, struct pt_regs *regs)
 	int from_kernel = (regs->tstate & TSTATE_PRIV) != 0;
 	unsigned long *reg;
 	                        
-	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0);
 
 	maybe_flush_windows(0, 0, rd, from_kernel);
 	reg = fetch_reg_addr(rd, regs);
@@ -586,7 +586,7 @@ void handle_lddfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr
 
 	if (tstate & TSTATE_PRIV)
 		die_if_kernel("lddfmna from kernel", regs);
-	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, sfar);
+	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, sfar);
 	if (test_thread_flag(TIF_32BIT))
 		pc = (u32)pc;
 	if (get_user(insn, (u32 __user *) pc) != -EFAULT) {
@@ -647,7 +647,7 @@ void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr
 
 	if (tstate & TSTATE_PRIV)
 		die_if_kernel("stdfmna from kernel", regs);
-	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, sfar);
+	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, sfar);
 	if (test_thread_flag(TIF_32BIT))
 		pc = (u32)pc;
 	if (get_user(insn, (u32 __user *) pc) != -EFAULT) {
diff --git a/trunk/arch/sparc/kernel/visemul.c b/trunk/arch/sparc/kernel/visemul.c
index 32b626c9d815..36357717d691 100644
--- a/trunk/arch/sparc/kernel/visemul.c
+++ b/trunk/arch/sparc/kernel/visemul.c
@@ -802,7 +802,7 @@ int vis_emul(struct pt_regs *regs, unsigned int insn)
 
 	BUG_ON(regs->tstate & TSTATE_PRIV);
 
-	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0);
 
 	if (test_thread_flag(TIF_32BIT))
 		pc = (u32)pc;
diff --git a/trunk/arch/sparc/math-emu/math_32.c b/trunk/arch/sparc/math-emu/math_32.c
index aa4d55b0bdf0..a3fccde894ec 100644
--- a/trunk/arch/sparc/math-emu/math_32.c
+++ b/trunk/arch/sparc/math-emu/math_32.c
@@ -164,7 +164,7 @@ int do_mathemu(struct pt_regs *regs, struct task_struct *fpt)
 	int retcode = 0;                               /* assume all succeed */
 	unsigned long insn;
 
-	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0);
 
 #ifdef DEBUG_MATHEMU
 	printk("In do_mathemu()... pc is %08lx\n", regs->pc);
diff --git a/trunk/arch/sparc/math-emu/math_64.c b/trunk/arch/sparc/math-emu/math_64.c
index e575bd2fe381..56d2c44747b8 100644
--- a/trunk/arch/sparc/math-emu/math_64.c
+++ b/trunk/arch/sparc/math-emu/math_64.c
@@ -184,7 +184,7 @@ int do_mathemu(struct pt_regs *regs, struct fpustate *f)
 
 	if (tstate & TSTATE_PRIV)
 		die_if_kernel("unfinished/unimplemented FPop from kernel", regs);
-	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0);
 	if (test_thread_flag(TIF_32BIT))
 		pc = (u32)pc;
 	if (get_user(insn, (u32 __user *) pc) != -EFAULT) {
diff --git a/trunk/arch/sparc/mm/fault_32.c b/trunk/arch/sparc/mm/fault_32.c
index aa1c1b1ce5cc..7543ddbdadb2 100644
--- a/trunk/arch/sparc/mm/fault_32.c
+++ b/trunk/arch/sparc/mm/fault_32.c
@@ -251,7 +251,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
         if (in_atomic() || !mm)
                 goto no_context;
 
-	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 
 	down_read(&mm->mmap_sem);
 
@@ -301,10 +301,12 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
 	}
 	if (fault & VM_FAULT_MAJOR) {
 		current->maj_flt++;
-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+			      regs, address);
 	} else {
 		current->min_flt++;
-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+			      regs, address);
 	}
 	up_read(&mm->mmap_sem);
 	return;
diff --git a/trunk/arch/sparc/mm/fault_64.c b/trunk/arch/sparc/mm/fault_64.c
index 504c0622f729..f92ce56a8b22 100644
--- a/trunk/arch/sparc/mm/fault_64.c
+++ b/trunk/arch/sparc/mm/fault_64.c
@@ -325,7 +325,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
 	if (in_atomic() || !mm)
 		goto intr_or_no_mm;
 
-	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 
 	if (!down_read_trylock(&mm->mmap_sem)) {
 		if ((regs->tstate & TSTATE_PRIV) &&
@@ -433,10 +433,12 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
 	}
 	if (fault & VM_FAULT_MAJOR) {
 		current->maj_flt++;
-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+			      regs, address);
 	} else {
 		current->min_flt++;
-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+			      regs, address);
 	}
 	up_read(&mm->mmap_sem);
 
diff --git a/trunk/arch/tile/include/asm/mmzone.h b/trunk/arch/tile/include/asm/mmzone.h
index 9d3dbce8f953..c6344c4f32ac 100644
--- a/trunk/arch/tile/include/asm/mmzone.h
+++ b/trunk/arch/tile/include/asm/mmzone.h
@@ -40,6 +40,17 @@ static inline int pfn_to_nid(unsigned long pfn)
 	return highbits_to_node[__pfn_to_highbits(pfn)];
 }
 
+/*
+ * Following are macros that each numa implmentation must define.
+ */
+
+#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
+#define node_end_pfn(nid)						\
+({									\
+	pg_data_t *__pgdat = NODE_DATA(nid);				\
+	__pgdat->node_start_pfn + __pgdat->node_spanned_pages;		\
+})
+
 #define kern_addr_valid(kaddr)	virt_addr_valid((void *)kaddr)
 
 static inline int pfn_valid(int pfn)
diff --git a/trunk/arch/um/include/asm/percpu.h b/trunk/arch/um/include/asm/percpu.h
deleted file mode 100644
index efe7508d8abd..000000000000
--- a/trunk/arch/um/include/asm/percpu.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __UM_PERCPU_H
-#define __UM_PERCPU_H
-
-#include <asm-generic/percpu.h>
-
-#endif /* __UM_PERCPU_H */
diff --git a/trunk/arch/x86/include/asm/irqflags.h b/trunk/arch/x86/include/asm/irqflags.h
index bba3cf88e624..5745ce8bf108 100644
--- a/trunk/arch/x86/include/asm/irqflags.h
+++ b/trunk/arch/x86/include/asm/irqflags.h
@@ -60,24 +60,23 @@ static inline void native_halt(void)
 #include <asm/paravirt.h>
 #else
 #ifndef __ASSEMBLY__
-#include <linux/types.h>
 
-static inline notrace unsigned long arch_local_save_flags(void)
+static inline unsigned long arch_local_save_flags(void)
 {
 	return native_save_fl();
 }
 
-static inline notrace void arch_local_irq_restore(unsigned long flags)
+static inline void arch_local_irq_restore(unsigned long flags)
 {
 	native_restore_fl(flags);
 }
 
-static inline notrace void arch_local_irq_disable(void)
+static inline void arch_local_irq_disable(void)
 {
 	native_irq_disable();
 }
 
-static inline notrace void arch_local_irq_enable(void)
+static inline void arch_local_irq_enable(void)
 {
 	native_irq_enable();
 }
@@ -103,7 +102,7 @@ static inline void halt(void)
 /*
  * For spinlocks, etc:
  */
-static inline notrace unsigned long arch_local_irq_save(void)
+static inline unsigned long arch_local_irq_save(void)
 {
 	unsigned long flags = arch_local_save_flags();
 	arch_local_irq_disable();
diff --git a/trunk/arch/x86/include/asm/mmzone_32.h b/trunk/arch/x86/include/asm/mmzone_32.h
index 224e8c5eb307..5e83a416eca8 100644
--- a/trunk/arch/x86/include/asm/mmzone_32.h
+++ b/trunk/arch/x86/include/asm/mmzone_32.h
@@ -48,6 +48,17 @@ static inline int pfn_to_nid(unsigned long pfn)
 #endif
 }
 
+/*
+ * Following are macros that each numa implmentation must define.
+ */
+
+#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
+#define node_end_pfn(nid)						\
+({									\
+	pg_data_t *__pgdat = NODE_DATA(nid);				\
+	__pgdat->node_start_pfn + __pgdat->node_spanned_pages;		\
+})
+
 static inline int pfn_valid(int pfn)
 {
 	int nid = pfn_to_nid(pfn);
diff --git a/trunk/arch/x86/include/asm/mmzone_64.h b/trunk/arch/x86/include/asm/mmzone_64.h
index 129d9aa3ceb3..b3f88d7867c7 100644
--- a/trunk/arch/x86/include/asm/mmzone_64.h
+++ b/trunk/arch/x86/include/asm/mmzone_64.h
@@ -13,5 +13,8 @@ extern struct pglist_data *node_data[];
 
 #define NODE_DATA(nid)		(node_data[nid])
 
+#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
+#define node_end_pfn(nid)       (NODE_DATA(nid)->node_start_pfn +	\
+				 NODE_DATA(nid)->node_spanned_pages)
 #endif
 #endif /* _ASM_X86_MMZONE_64_H */
diff --git a/trunk/arch/x86/include/asm/perf_event.h b/trunk/arch/x86/include/asm/perf_event.h
index 094fb30817ab..d9d4dae305f6 100644
--- a/trunk/arch/x86/include/asm/perf_event.h
+++ b/trunk/arch/x86/include/asm/perf_event.h
@@ -152,11 +152,6 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
 	(regs)->bp = caller_frame_pointer();			\
 	(regs)->cs = __KERNEL_CS;				\
 	regs->flags = 0;					\
-	asm volatile(						\
-		_ASM_MOV "%%"_ASM_SP ", %0\n"			\
-		: "=m" ((regs)->sp)				\
-		:: "memory"					\
-	);							\
 }
 
 #else
diff --git a/trunk/arch/x86/kernel/cpu/perf_event.c b/trunk/arch/x86/kernel/cpu/perf_event.c
index c53d433c3dde..3a0338b4b179 100644
--- a/trunk/arch/x86/kernel/cpu/perf_event.c
+++ b/trunk/arch/x86/kernel/cpu/perf_event.c
@@ -44,29 +44,6 @@ do {								\
 } while (0)
 #endif
 
-/*
- *          |   NHM/WSM    |      SNB     |
- * register -------------------------------
- *          |  HT  | no HT |  HT  | no HT |
- *-----------------------------------------
- * offcore  | core | core  | cpu  | core  |
- * lbr_sel  | core | core  | cpu  | core  |
- * ld_lat   | cpu  | core  | cpu  | core  |
- *-----------------------------------------
- *
- * Given that there is a small number of shared regs,
- * we can pre-allocate their slot in the per-cpu
- * per-core reg tables.
- */
-enum extra_reg_type {
-	EXTRA_REG_NONE  = -1,	/* not used */
-
-	EXTRA_REG_RSP_0 = 0,	/* offcore_response_0 */
-	EXTRA_REG_RSP_1 = 1,	/* offcore_response_1 */
-
-	EXTRA_REG_MAX		/* number of entries needed */
-};
-
 /*
  * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
  */
@@ -155,10 +132,11 @@ struct cpu_hw_events {
 	struct perf_branch_entry	lbr_entries[MAX_LBR_ENTRIES];
 
 	/*
-	 * manage shared (per-core, per-cpu) registers
-	 * used on Intel NHM/WSM/SNB
+	 * Intel percore register state.
+	 * Coordinate shared resources between HT threads.
 	 */
-	struct intel_shared_regs	*shared_regs;
+	int				percore_used; /* Used by this CPU? */
+	struct intel_percore		*per_core;
 
 	/*
 	 * AMD specific bits
@@ -208,46 +186,27 @@ struct cpu_hw_events {
 #define for_each_event_constraint(e, c)	\
 	for ((e) = (c); (e)->weight; (e)++)
 
-/*
- * Per register state.
- */
-struct er_account {
-	raw_spinlock_t		lock;	/* per-core: protect structure */
-	u64			config;	/* extra MSR config */
-	u64			reg;	/* extra MSR number */
-	atomic_t		ref;	/* reference count */
-};
-
 /*
  * Extra registers for specific events.
- *
  * Some events need large masks and require external MSRs.
- * Those extra MSRs end up being shared for all events on
- * a PMU and sometimes between PMU of sibling HT threads.
- * In either case, the kernel needs to handle conflicting
- * accesses to those extra, shared, regs. The data structure
- * to manage those registers is stored in cpu_hw_event.
+ * Define a mapping to these extra registers.
  */
 struct extra_reg {
 	unsigned int		event;
 	unsigned int		msr;
 	u64			config_mask;
 	u64			valid_mask;
-	int			idx;  /* per_xxx->regs[] reg index */
 };
 
-#define EVENT_EXTRA_REG(e, ms, m, vm, i) {	\
+#define EVENT_EXTRA_REG(e, ms, m, vm) {	\
 	.event = (e),		\
 	.msr = (ms),		\
 	.config_mask = (m),	\
 	.valid_mask = (vm),	\
-	.idx = EXTRA_REG_##i	\
 	}
-
-#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx)	\
-	EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
-
-#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
+#define INTEL_EVENT_EXTRA_REG(event, msr, vm)	\
+	EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm)
+#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0)
 
 union perf_capabilities {
 	struct {
@@ -274,7 +233,6 @@ struct x86_pmu {
 	void		(*enable_all)(int added);
 	void		(*enable)(struct perf_event *);
 	void		(*disable)(struct perf_event *);
-	void		(*hw_watchdog_set_attr)(struct perf_event_attr *attr);
 	int		(*hw_config)(struct perf_event *event);
 	int		(*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
 	unsigned	eventsel;
@@ -294,6 +252,7 @@ struct x86_pmu {
 	void		(*put_event_constraints)(struct cpu_hw_events *cpuc,
 						 struct perf_event *event);
 	struct event_constraint *event_constraints;
+	struct event_constraint *percore_constraints;
 	void		(*quirks)(void);
 	int		perfctr_second_write;
 
@@ -327,12 +286,8 @@ struct x86_pmu {
 	 * Extra registers for events
 	 */
 	struct extra_reg *extra_regs;
-	unsigned int er_flags;
 };
 
-#define ERF_NO_HT_SHARING	1
-#define ERF_HAS_RSP_1		2
-
 static struct x86_pmu x86_pmu __read_mostly;
 
 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
@@ -360,12 +315,6 @@ static u64 __read_mostly hw_cache_extra_regs
 				[PERF_COUNT_HW_CACHE_OP_MAX]
 				[PERF_COUNT_HW_CACHE_RESULT_MAX];
 
-void hw_nmi_watchdog_set_attr(struct perf_event_attr *wd_attr)
-{
-	if (x86_pmu.hw_watchdog_set_attr)
-		x86_pmu.hw_watchdog_set_attr(wd_attr);
-}
-
 /*
  * Propagate event elapsed time into the generic event.
  * Can only be executed on the CPU where the event is active.
@@ -444,10 +393,10 @@ static inline unsigned int x86_pmu_event_addr(int index)
  */
 static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
 {
-	struct hw_perf_event_extra *reg;
 	struct extra_reg *er;
 
-	reg = &event->hw.extra_reg;
+	event->hw.extra_reg = 0;
+	event->hw.extra_config = 0;
 
 	if (!x86_pmu.extra_regs)
 		return 0;
@@ -457,10 +406,8 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
 			continue;
 		if (event->attr.config1 & ~er->valid_mask)
 			return -EINVAL;
-
-		reg->idx = er->idx;
-		reg->config = event->attr.config1;
-		reg->reg = er->msr;
+		event->hw.extra_reg = er->msr;
+		event->hw.extra_config = event->attr.config1;
 		break;
 	}
 	return 0;
@@ -759,9 +706,6 @@ static int __x86_pmu_event_init(struct perf_event *event)
 	event->hw.last_cpu = -1;
 	event->hw.last_tag = ~0ULL;
 
-	/* mark unused */
-	event->hw.extra_reg.idx = EXTRA_REG_NONE;
-
 	return x86_pmu.hw_config(event);
 }
 
@@ -803,8 +747,8 @@ static void x86_pmu_disable(struct pmu *pmu)
 static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
 					  u64 enable_mask)
 {
-	if (hwc->extra_reg.reg)
-		wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
+	if (hwc->extra_reg)
+		wrmsrl(hwc->extra_reg, hwc->extra_config);
 	wrmsrl(hwc->config_base, hwc->config | enable_mask);
 }
 
@@ -1388,7 +1332,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
 		if (!x86_perf_event_set_period(event))
 			continue;
 
-		if (perf_event_overflow(event, &data, regs))
+		if (perf_event_overflow(event, 1, &data, regs))
 			x86_pmu_stop(event, 0);
 	}
 
@@ -1693,40 +1637,6 @@ static int x86_pmu_commit_txn(struct pmu *pmu)
 	perf_pmu_enable(pmu);
 	return 0;
 }
-/*
- * a fake_cpuc is used to validate event groups. Due to
- * the extra reg logic, we need to also allocate a fake
- * per_core and per_cpu structure. Otherwise, group events
- * using extra reg may conflict without the kernel being
- * able to catch this when the last event gets added to
- * the group.
- */
-static void free_fake_cpuc(struct cpu_hw_events *cpuc)
-{
-	kfree(cpuc->shared_regs);
-	kfree(cpuc);
-}
-
-static struct cpu_hw_events *allocate_fake_cpuc(void)
-{
-	struct cpu_hw_events *cpuc;
-	int cpu = raw_smp_processor_id();
-
-	cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL);
-	if (!cpuc)
-		return ERR_PTR(-ENOMEM);
-
-	/* only needed, if we have extra_regs */
-	if (x86_pmu.extra_regs) {
-		cpuc->shared_regs = allocate_shared_regs(cpu);
-		if (!cpuc->shared_regs)
-			goto error;
-	}
-	return cpuc;
-error:
-	free_fake_cpuc(cpuc);
-	return ERR_PTR(-ENOMEM);
-}
 
 /*
  * validate that we can schedule this event
@@ -1737,9 +1647,9 @@ static int validate_event(struct perf_event *event)
 	struct event_constraint *c;
 	int ret = 0;
 
-	fake_cpuc = allocate_fake_cpuc();
-	if (IS_ERR(fake_cpuc))
-		return PTR_ERR(fake_cpuc);
+	fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
+	if (!fake_cpuc)
+		return -ENOMEM;
 
 	c = x86_pmu.get_event_constraints(fake_cpuc, event);
 
@@ -1749,7 +1659,7 @@ static int validate_event(struct perf_event *event)
 	if (x86_pmu.put_event_constraints)
 		x86_pmu.put_event_constraints(fake_cpuc, event);
 
-	free_fake_cpuc(fake_cpuc);
+	kfree(fake_cpuc);
 
 	return ret;
 }
@@ -1769,32 +1679,36 @@ static int validate_group(struct perf_event *event)
 {
 	struct perf_event *leader = event->group_leader;
 	struct cpu_hw_events *fake_cpuc;
-	int ret = -ENOSPC, n;
+	int ret, n;
+
+	ret = -ENOMEM;
+	fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
+	if (!fake_cpuc)
+		goto out;
 
-	fake_cpuc = allocate_fake_cpuc();
-	if (IS_ERR(fake_cpuc))
-		return PTR_ERR(fake_cpuc);
 	/*
 	 * the event is not yet connected with its
 	 * siblings therefore we must first collect
 	 * existing siblings, then add the new event
 	 * before we can simulate the scheduling
 	 */
+	ret = -ENOSPC;
 	n = collect_events(fake_cpuc, leader, true);
 	if (n < 0)
-		goto out;
+		goto out_free;
 
 	fake_cpuc->n_events = n;
 	n = collect_events(fake_cpuc, event, false);
 	if (n < 0)
-		goto out;
+		goto out_free;
 
 	fake_cpuc->n_events = n;
 
 	ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
 
+out_free:
+	kfree(fake_cpuc);
 out:
-	free_fake_cpuc(fake_cpuc);
 	return ret;
 }
 
diff --git a/trunk/arch/x86/kernel/cpu/perf_event_amd.c b/trunk/arch/x86/kernel/cpu/perf_event_amd.c
index 941caa2e449b..fe29c1d2219e 100644
--- a/trunk/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/trunk/arch/x86/kernel/cpu/perf_event_amd.c
@@ -89,20 +89,6 @@ static __initconst const u64 amd_hw_cache_event_ids
 		[ C(RESULT_MISS)   ] = -1,
 	},
  },
- [ C(NODE) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */
-		[ C(RESULT_MISS)   ] = 0x98e9, /* CPU Request to Memory, r   */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
 };
 
 /*
diff --git a/trunk/arch/x86/kernel/cpu/perf_event_intel.c b/trunk/arch/x86/kernel/cpu/perf_event_intel.c
index 45fbb8f7f549..41178c826c48 100644
--- a/trunk/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/trunk/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1,15 +1,25 @@
 #ifdef CONFIG_CPU_SUP_INTEL
 
+#define MAX_EXTRA_REGS 2
+
 /*
- * Per core/cpu state
- *
- * Used to coordinate shared registers between HT threads or
- * among events on a single PMU.
+ * Per register state.
  */
-struct intel_shared_regs {
-	struct er_account       regs[EXTRA_REG_MAX];
-	int                     refcnt;		/* per-core: #HT threads */
-	unsigned                core_id;	/* per-core: core id */
+struct er_account {
+	int			ref;		/* reference count */
+	unsigned int		extra_reg;	/* extra MSR number */
+	u64			extra_config;	/* extra MSR config */
+};
+
+/*
+ * Per core state
+ * This used to coordinate shared registers for HT threads.
+ */
+struct intel_percore {
+	raw_spinlock_t		lock;		/* protect structure */
+	struct er_account	regs[MAX_EXTRA_REGS];
+	int			refcnt;		/* number of threads */
+	unsigned		core_id;
 };
 
 /*
@@ -78,10 +88,16 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
 
 static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
 {
-	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
+	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
 	EVENT_EXTRA_END
 };
 
+static struct event_constraint intel_nehalem_percore_constraints[] __read_mostly =
+{
+	INTEL_EVENT_CONSTRAINT(0xb7, 0),
+	EVENT_CONSTRAINT_END
+};
+
 static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
 {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -100,6 +116,8 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
 	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
 	/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
 	INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
+	INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */
+	INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */
 	INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
 	INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
 	EVENT_CONSTRAINT_END
@@ -107,13 +125,15 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
 
 static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
 {
-	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
-	INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
+	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
+	INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff),
 	EVENT_EXTRA_END
 };
 
-static struct event_constraint intel_v1_event_constraints[] __read_mostly =
+static struct event_constraint intel_westmere_percore_constraints[] __read_mostly =
 {
+	INTEL_EVENT_CONSTRAINT(0xb7, 0),
+	INTEL_EVENT_CONSTRAINT(0xbb, 0),
 	EVENT_CONSTRAINT_END
 };
 
@@ -125,12 +145,6 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
 	EVENT_CONSTRAINT_END
 };
 
-static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
-	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
-	INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
-	EVENT_EXTRA_END
-};
-
 static u64 intel_pmu_event_map(int hw_event)
 {
 	return intel_perfmon_event_map[hw_event];
@@ -231,21 +245,6 @@ static __initconst const u64 snb_hw_cache_event_ids
 		[ C(RESULT_MISS)   ] = -1,
 	},
  },
- [ C(NODE) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
-
 };
 
 static __initconst const u64 westmere_hw_cache_event_ids
@@ -347,20 +346,6 @@ static __initconst const u64 westmere_hw_cache_event_ids
 		[ C(RESULT_MISS)   ] = -1,
 	},
  },
- [ C(NODE) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x01b7,
-		[ C(RESULT_MISS)   ] = 0x01b7,
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x01b7,
-		[ C(RESULT_MISS)   ] = 0x01b7,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x01b7,
-		[ C(RESULT_MISS)   ] = 0x01b7,
-	},
- },
 };
 
 /*
@@ -413,21 +398,7 @@ static __initconst const u64 nehalem_hw_cache_extra_regs
 		[ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
 		[ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
 	},
- },
- [ C(NODE) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_ALL_DRAM,
-		[ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_REMOTE_DRAM,
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_ALL_DRAM,
-		[ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_REMOTE_DRAM,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_ALL_DRAM,
-		[ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_REMOTE_DRAM,
-	},
- },
+ }
 };
 
 static __initconst const u64 nehalem_hw_cache_event_ids
@@ -529,20 +500,6 @@ static __initconst const u64 nehalem_hw_cache_event_ids
 		[ C(RESULT_MISS)   ] = -1,
 	},
  },
- [ C(NODE) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x01b7,
-		[ C(RESULT_MISS)   ] = 0x01b7,
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x01b7,
-		[ C(RESULT_MISS)   ] = 0x01b7,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x01b7,
-		[ C(RESULT_MISS)   ] = 0x01b7,
-	},
- },
 };
 
 static __initconst const u64 core2_hw_cache_event_ids
@@ -1046,7 +1003,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 
 		data.period = event->hw.last_period;
 
-		if (perf_event_overflow(event, &data, regs))
+		if (perf_event_overflow(event, 1, &data, regs))
 			x86_pmu_stop(event, 0);
 	}
 
@@ -1080,121 +1037,65 @@ intel_bts_constraints(struct perf_event *event)
 	return NULL;
 }
 
-static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
-{
-	if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
-		return false;
-
-	if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
-		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
-		event->hw.config |= 0x01bb;
-		event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
-		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
-	} else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
-		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
-		event->hw.config |= 0x01b7;
-		event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
-		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
-	}
-
-	if (event->hw.extra_reg.idx == orig_idx)
-		return false;
-
-	return true;
-}
-
-/*
- * manage allocation of shared extra msr for certain events
- *
- * sharing can be:
- * per-cpu: to be shared between the various events on a single PMU
- * per-core: per-cpu + shared by HT threads
- */
 static struct event_constraint *
-__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
-				   struct perf_event *event)
+intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
 {
-	struct event_constraint *c = &emptyconstraint;
-	struct hw_perf_event_extra *reg = &event->hw.extra_reg;
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
+	struct event_constraint *c;
+	struct intel_percore *pc;
 	struct er_account *era;
-	unsigned long flags;
-	int orig_idx = reg->idx;
-
-	/* already allocated shared msr */
-	if (reg->alloc)
-		return &unconstrained;
-
-again:
-	era = &cpuc->shared_regs->regs[reg->idx];
-	/*
-	 * we use spin_lock_irqsave() to avoid lockdep issues when
-	 * passing a fake cpuc
-	 */
-	raw_spin_lock_irqsave(&era->lock, flags);
-
-	if (!atomic_read(&era->ref) || era->config == reg->config) {
-
-		/* lock in msr value */
-		era->config = reg->config;
-		era->reg = reg->reg;
+	int i;
+	int free_slot;
+	int found;
 
-		/* one more user */
-		atomic_inc(&era->ref);
+	if (!x86_pmu.percore_constraints || hwc->extra_alloc)
+		return NULL;
 
-		/* no need to reallocate during incremental event scheduling */
-		reg->alloc = 1;
+	for (c = x86_pmu.percore_constraints; c->cmask; c++) {
+		if (e != c->code)
+			continue;
 
 		/*
-		 * All events using extra_reg are unconstrained.
-		 * Avoids calling x86_get_event_constraints()
-		 *
-		 * Must revisit if extra_reg controlling events
-		 * ever have constraints. Worst case we go through
-		 * the regular event constraint table.
+		 * Allocate resource per core.
 		 */
-		c = &unconstrained;
-	} else if (intel_try_alt_er(event, orig_idx)) {
-		raw_spin_unlock(&era->lock);
-		goto again;
+		pc = cpuc->per_core;
+		if (!pc)
+			break;
+		c = &emptyconstraint;
+		raw_spin_lock(&pc->lock);
+		free_slot = -1;
+		found = 0;
+		for (i = 0; i < MAX_EXTRA_REGS; i++) {
+			era = &pc->regs[i];
+			if (era->ref > 0 && hwc->extra_reg == era->extra_reg) {
+				/* Allow sharing same config */
+				if (hwc->extra_config == era->extra_config) {
+					era->ref++;
+					cpuc->percore_used = 1;
+					hwc->extra_alloc = 1;
+					c = NULL;
+				}
+				/* else conflict */
+				found = 1;
+				break;
+			} else if (era->ref == 0 && free_slot == -1)
+				free_slot = i;
+		}
+		if (!found && free_slot != -1) {
+			era = &pc->regs[free_slot];
+			era->ref = 1;
+			era->extra_reg = hwc->extra_reg;
+			era->extra_config = hwc->extra_config;
+			cpuc->percore_used = 1;
+			hwc->extra_alloc = 1;
+			c = NULL;
+		}
+		raw_spin_unlock(&pc->lock);
+		return c;
 	}
-	raw_spin_unlock_irqrestore(&era->lock, flags);
-
-	return c;
-}
-
-static void
-__intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
-				   struct hw_perf_event_extra *reg)
-{
-	struct er_account *era;
-
-	/*
-	 * only put constraint if extra reg was actually
-	 * allocated. Also takes care of event which do
-	 * not use an extra shared reg
-	 */
-	if (!reg->alloc)
-		return;
-
-	era = &cpuc->shared_regs->regs[reg->idx];
-
-	/* one fewer user */
-	atomic_dec(&era->ref);
-
-	/* allocate again next time */
-	reg->alloc = 0;
-}
-
-static struct event_constraint *
-intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
-			      struct perf_event *event)
-{
-	struct event_constraint *c = NULL;
 
-	if (event->hw.extra_reg.idx != EXTRA_REG_NONE)
-		c = __intel_shared_reg_get_constraints(cpuc, event);
-
-	return c;
+	return NULL;
 }
 
 static struct event_constraint *
@@ -1210,28 +1111,49 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
 	if (c)
 		return c;
 
-	c = intel_shared_regs_constraints(cpuc, event);
+	c = intel_percore_constraints(cpuc, event);
 	if (c)
 		return c;
 
 	return x86_get_event_constraints(cpuc, event);
 }
 
-static void
-intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
+static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
 					struct perf_event *event)
 {
-	struct hw_perf_event_extra *reg;
+	struct extra_reg *er;
+	struct intel_percore *pc;
+	struct er_account *era;
+	struct hw_perf_event *hwc = &event->hw;
+	int i, allref;
 
-	reg = &event->hw.extra_reg;
-	if (reg->idx != EXTRA_REG_NONE)
-		__intel_shared_reg_put_constraints(cpuc, reg);
-}
+	if (!cpuc->percore_used)
+		return;
 
-static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
-					struct perf_event *event)
-{
-	intel_put_shared_regs_event_constraints(cpuc, event);
+	for (er = x86_pmu.extra_regs; er->msr; er++) {
+		if (er->event != (hwc->config & er->config_mask))
+			continue;
+
+		pc = cpuc->per_core;
+		raw_spin_lock(&pc->lock);
+		for (i = 0; i < MAX_EXTRA_REGS; i++) {
+			era = &pc->regs[i];
+			if (era->ref > 0 &&
+			    era->extra_config == hwc->extra_config &&
+			    era->extra_reg == er->msr) {
+				era->ref--;
+				hwc->extra_alloc = 0;
+				break;
+			}
+		}
+		allref = 0;
+		for (i = 0; i < MAX_EXTRA_REGS; i++)
+			allref += pc->regs[i].ref;
+		if (allref == 0)
+			cpuc->percore_used = 0;
+		raw_spin_unlock(&pc->lock);
+		break;
+	}
 }
 
 static int intel_pmu_hw_config(struct perf_event *event)
@@ -1309,36 +1231,20 @@ static __initconst const struct x86_pmu core_pmu = {
 	.event_constraints	= intel_core_event_constraints,
 };
 
-static struct intel_shared_regs *allocate_shared_regs(int cpu)
-{
-	struct intel_shared_regs *regs;
-	int i;
-
-	regs = kzalloc_node(sizeof(struct intel_shared_regs),
-			    GFP_KERNEL, cpu_to_node(cpu));
-	if (regs) {
-		/*
-		 * initialize the locks to keep lockdep happy
-		 */
-		for (i = 0; i < EXTRA_REG_MAX; i++)
-			raw_spin_lock_init(&regs->regs[i].lock);
-
-		regs->core_id = -1;
-	}
-	return regs;
-}
-
 static int intel_pmu_cpu_prepare(int cpu)
 {
 	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
 
-	if (!x86_pmu.extra_regs)
+	if (!cpu_has_ht_siblings())
 		return NOTIFY_OK;
 
-	cpuc->shared_regs = allocate_shared_regs(cpu);
-	if (!cpuc->shared_regs)
+	cpuc->per_core = kzalloc_node(sizeof(struct intel_percore),
+				      GFP_KERNEL, cpu_to_node(cpu));
+	if (!cpuc->per_core)
 		return NOTIFY_BAD;
 
+	raw_spin_lock_init(&cpuc->per_core->lock);
+	cpuc->per_core->core_id = -1;
 	return NOTIFY_OK;
 }
 
@@ -1354,34 +1260,32 @@ static void intel_pmu_cpu_starting(int cpu)
 	 */
 	intel_pmu_lbr_reset();
 
-	if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING))
+	if (!cpu_has_ht_siblings())
 		return;
 
 	for_each_cpu(i, topology_thread_cpumask(cpu)) {
-		struct intel_shared_regs *pc;
+		struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core;
 
-		pc = per_cpu(cpu_hw_events, i).shared_regs;
 		if (pc && pc->core_id == core_id) {
-			kfree(cpuc->shared_regs);
-			cpuc->shared_regs = pc;
+			kfree(cpuc->per_core);
+			cpuc->per_core = pc;
 			break;
 		}
 	}
 
-	cpuc->shared_regs->core_id = core_id;
-	cpuc->shared_regs->refcnt++;
+	cpuc->per_core->core_id = core_id;
+	cpuc->per_core->refcnt++;
 }
 
 static void intel_pmu_cpu_dying(int cpu)
 {
 	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
-	struct intel_shared_regs *pc;
+	struct intel_percore *pc = cpuc->per_core;
 
-	pc = cpuc->shared_regs;
 	if (pc) {
 		if (pc->core_id == -1 || --pc->refcnt == 0)
 			kfree(pc);
-		cpuc->shared_regs = NULL;
+		cpuc->per_core = NULL;
 	}
 
 	fini_debug_store_on_cpu(cpu);
@@ -1532,6 +1436,7 @@ static __init int intel_pmu_init(void)
 
 		x86_pmu.event_constraints = intel_nehalem_event_constraints;
 		x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
+		x86_pmu.percore_constraints = intel_nehalem_percore_constraints;
 		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
 		x86_pmu.extra_regs = intel_nehalem_extra_regs;
 
@@ -1576,10 +1481,10 @@ static __init int intel_pmu_init(void)
 		intel_pmu_lbr_init_nhm();
 
 		x86_pmu.event_constraints = intel_westmere_event_constraints;
+		x86_pmu.percore_constraints = intel_westmere_percore_constraints;
 		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
 		x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
 		x86_pmu.extra_regs = intel_westmere_extra_regs;
-		x86_pmu.er_flags |= ERF_HAS_RSP_1;
 
 		/* UOPS_ISSUED.STALLED_CYCLES */
 		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
@@ -1597,10 +1502,6 @@ static __init int intel_pmu_init(void)
 
 		x86_pmu.event_constraints = intel_snb_event_constraints;
 		x86_pmu.pebs_constraints = intel_snb_pebs_events;
-		x86_pmu.extra_regs = intel_snb_extra_regs;
-		/* all extra regs are per-cpu when HT is on */
-		x86_pmu.er_flags |= ERF_HAS_RSP_1;
-		x86_pmu.er_flags |= ERF_NO_HT_SHARING;
 
 		/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
 		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
@@ -1611,19 +1512,11 @@ static __init int intel_pmu_init(void)
 		break;
 
 	default:
-		switch (x86_pmu.version) {
-		case 1:
-			x86_pmu.event_constraints = intel_v1_event_constraints;
-			pr_cont("generic architected perfmon v1, ");
-			break;
-		default:
-			/*
-			 * default constraints for v2 and up
-			 */
-			x86_pmu.event_constraints = intel_gen_event_constraints;
-			pr_cont("generic architected perfmon, ");
-			break;
-		}
+		/*
+		 * default constraints for v2 and up
+		 */
+		x86_pmu.event_constraints = intel_gen_event_constraints;
+		pr_cont("generic architected perfmon, ");
 	}
 	return 0;
 }
@@ -1635,8 +1528,4 @@ static int intel_pmu_init(void)
 	return 0;
 }
 
-static struct intel_shared_regs *allocate_shared_regs(int cpu)
-{
-	return NULL;
-}
 #endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/trunk/arch/x86/kernel/cpu/perf_event_intel_ds.c b/trunk/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 1b1ef3addcfd..bab491b8ee25 100644
--- a/trunk/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/trunk/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -340,7 +340,7 @@ static int intel_pmu_drain_bts_buffer(void)
 	 */
 	perf_prepare_sample(&header, &data, event, &regs);
 
-	if (perf_output_begin(&handle, event, header.size * (top - at)))
+	if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1))
 		return 1;
 
 	for (; at < top; at++) {
@@ -616,7 +616,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 	else
 		regs.flags &= ~PERF_EFLAGS_EXACT;
 
-	if (perf_event_overflow(event, &data, &regs))
+	if (perf_event_overflow(event, 1, &data, &regs))
 		x86_pmu_stop(event, 0);
 }
 
diff --git a/trunk/arch/x86/kernel/cpu/perf_event_p4.c b/trunk/arch/x86/kernel/cpu/perf_event_p4.c
index fb901c5080f7..ead584fb6a7d 100644
--- a/trunk/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/trunk/arch/x86/kernel/cpu/perf_event_p4.c
@@ -554,20 +554,6 @@ static __initconst const u64 p4_hw_cache_event_ids
 		[ C(RESULT_MISS)   ] = -1,
 	},
  },
- [ C(NODE) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
 };
 
 static u64 p4_general_events[PERF_COUNT_HW_MAX] = {
@@ -719,31 +705,6 @@ static int p4_validate_raw_event(struct perf_event *event)
 	return 0;
 }
 
-static void p4_hw_watchdog_set_attr(struct perf_event_attr *wd_attr)
-{
-	/*
-	 * Watchdog ticks are special on Netburst, we use
-	 * that named "non-sleeping" ticks as recommended
-	 * by Intel SDM Vol3b.
-	 */
-	WARN_ON_ONCE(wd_attr->type	!= PERF_TYPE_HARDWARE ||
-		     wd_attr->config	!= PERF_COUNT_HW_CPU_CYCLES);
-
-	wd_attr->type	= PERF_TYPE_RAW;
-	wd_attr->config	=
-		p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT)		|
-			P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)		|
-			P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)		|
-			P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)		|
-			P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)		|
-			P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0)		|
-			P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1)		|
-			P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2)		|
-			P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3))		|
-		p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT		|
-			P4_CCCR_COMPARE);
-}
-
 static int p4_hw_config(struct perf_event *event)
 {
 	int cpu = get_cpu();
@@ -984,7 +945,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
 
 		if (!x86_perf_event_set_period(event))
 			continue;
-		if (perf_event_overflow(event, &data, regs))
+		if (perf_event_overflow(event, 1, &data, regs))
 			x86_pmu_stop(event, 0);
 	}
 
@@ -1218,7 +1179,6 @@ static __initconst const struct x86_pmu p4_pmu = {
 	.cntval_bits		= ARCH_P4_CNTRVAL_BITS,
 	.cntval_mask		= ARCH_P4_CNTRVAL_MASK,
 	.max_period		= (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1,
-	.hw_watchdog_set_attr	= p4_hw_watchdog_set_attr,
 	.hw_config		= p4_hw_config,
 	.schedule_events	= p4_pmu_schedule_events,
 	/*
diff --git a/trunk/arch/x86/kernel/dumpstack_64.c b/trunk/arch/x86/kernel/dumpstack_64.c
index 19853ad8afc5..e71c98d3c0d2 100644
--- a/trunk/arch/x86/kernel/dumpstack_64.c
+++ b/trunk/arch/x86/kernel/dumpstack_64.c
@@ -104,6 +104,34 @@ in_irq_stack(unsigned long *stack, unsigned long *irq_stack,
 	return (stack >= irq_stack && stack < irq_stack_end);
 }
 
+/*
+ * We are returning from the irq stack and go to the previous one.
+ * If the previous stack is also in the irq stack, then bp in the first
+ * frame of the irq stack points to the previous, interrupted one.
+ * Otherwise we have another level of indirection: We first save
+ * the bp of the previous stack, then we switch the stack to the irq one
+ * and save a new bp that links to the previous one.
+ * (See save_args())
+ */
+static inline unsigned long
+fixup_bp_irq_link(unsigned long bp, unsigned long *stack,
+		  unsigned long *irq_stack, unsigned long *irq_stack_end)
+{
+#ifdef CONFIG_FRAME_POINTER
+	struct stack_frame *frame = (struct stack_frame *)bp;
+	unsigned long next;
+
+	if (!in_irq_stack(stack, irq_stack, irq_stack_end)) {
+		if (!probe_kernel_address(&frame->next_frame, next))
+			return next;
+		else
+			WARN_ONCE(1, "Perf: bad frame pointer = %p in "
+				  "callchain\n", &frame->next_frame);
+	}
+#endif
+	return bp;
+}
+
 /*
  * x86-64 can have up to three kernel stacks:
  * process stack
@@ -127,12 +155,9 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 		task = current;
 
 	if (!stack) {
-		if (regs)
-			stack = (unsigned long *)regs->sp;
-		else if (task && task != current)
+		stack = &dummy;
+		if (task && task != current)
 			stack = (unsigned long *)task->thread.sp;
-		else
-			stack = &dummy;
 	}
 
 	if (!bp)
@@ -180,6 +205,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 				 * pointer (index -1 to end) in the IRQ stack:
 				 */
 				stack = (unsigned long *) (irq_stack_end[-1]);
+				bp = fixup_bp_irq_link(bp, stack, irq_stack,
+						       irq_stack_end);
 				irq_stack_end = NULL;
 				ops->stack(data, "EOI");
 				continue;
diff --git a/trunk/arch/x86/kernel/entry_64.S b/trunk/arch/x86/kernel/entry_64.S
index d656f68371a4..8a445a0c989e 100644
--- a/trunk/arch/x86/kernel/entry_64.S
+++ b/trunk/arch/x86/kernel/entry_64.S
@@ -297,26 +297,27 @@ ENDPROC(native_usergs_sysret64)
 	.endm
 
 /* save partial stack frame */
-	.macro SAVE_ARGS_IRQ
+	.pushsection .kprobes.text, "ax"
+ENTRY(save_args)
+	XCPT_FRAME
 	cld
-	/* start from rbp in pt_regs and jump over */
-	movq_cfi rdi, RDI-RBP
-	movq_cfi rsi, RSI-RBP
-	movq_cfi rdx, RDX-RBP
-	movq_cfi rcx, RCX-RBP
-	movq_cfi rax, RAX-RBP
-	movq_cfi  r8,  R8-RBP
-	movq_cfi  r9,  R9-RBP
-	movq_cfi r10, R10-RBP
-	movq_cfi r11, R11-RBP
-
-	/* Save rbp so that we can unwind from get_irq_regs() */
-	movq_cfi rbp, 0
-
-	/* Save previous stack value */
-	movq %rsp, %rsi
-
-	leaq -RBP(%rsp),%rdi	/* arg1 for handler */
+	/*
+	 * start from rbp in pt_regs and jump over
+	 * return address.
+	 */
+	movq_cfi rdi, RDI+8-RBP
+	movq_cfi rsi, RSI+8-RBP
+	movq_cfi rdx, RDX+8-RBP
+	movq_cfi rcx, RCX+8-RBP
+	movq_cfi rax, RAX+8-RBP
+	movq_cfi  r8,  R8+8-RBP
+	movq_cfi  r9,  R9+8-RBP
+	movq_cfi r10, R10+8-RBP
+	movq_cfi r11, R11+8-RBP
+
+	leaq -RBP+8(%rsp),%rdi	/* arg1 for handler */
+	movq_cfi rbp, 8		/* push %rbp */
+	leaq 8(%rsp), %rbp		/* mov %rsp, %ebp */
 	testl $3, CS(%rdi)
 	je 1f
 	SWAPGS
@@ -328,14 +329,19 @@ ENDPROC(native_usergs_sysret64)
 	 */
 1:	incl PER_CPU_VAR(irq_count)
 	jne 2f
+	popq_cfi %rax			/* move return address... */
 	mov PER_CPU_VAR(irq_stack_ptr),%rsp
 	EMPTY_FRAME 0
-
-2:	/* Store previous stack value */
-	pushq %rsi
-	/* We entered an interrupt context - irqs are off: */
-	TRACE_IRQS_OFF
-	.endm
+	pushq_cfi %rbp			/* backlink for unwinder */
+	pushq_cfi %rax			/* ... to the new stack */
+	/*
+	 * We entered an interrupt context - irqs are off:
+	 */
+2:	TRACE_IRQS_OFF
+	ret
+	CFI_ENDPROC
+END(save_args)
+	.popsection
 
 ENTRY(save_rest)
 	PARTIAL_FRAME 1 REST_SKIP+8
@@ -785,7 +791,7 @@ END(interrupt)
 	/* reserve pt_regs for scratch regs and rbp */
 	subq $ORIG_RAX-RBP, %rsp
 	CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
-	SAVE_ARGS_IRQ
+	call save_args
 	PARTIAL_FRAME 0
 	call \func
 	.endm
@@ -808,14 +814,15 @@ ret_from_intr:
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	decl PER_CPU_VAR(irq_count)
+	leaveq
 
-	/* Restore saved previous stack */
-	popq %rsi
-	leaq 16(%rsi), %rsp
-
+	CFI_RESTORE		rbp
 	CFI_DEF_CFA_REGISTER	rsp
-	CFI_ADJUST_CFA_OFFSET	-16
+	CFI_ADJUST_CFA_OFFSET	-8
 
+	/* we did not save rbx, restore only from ARGOFFSET */
+	addq $8, %rsp
+	CFI_ADJUST_CFA_OFFSET	-8
 exit_intr:
 	GET_THREAD_INFO(%rcx)
 	testl $3,CS-ARGOFFSET(%rsp)
diff --git a/trunk/arch/x86/kernel/kgdb.c b/trunk/arch/x86/kernel/kgdb.c
index 00354d4919a9..5f9ecff328b5 100644
--- a/trunk/arch/x86/kernel/kgdb.c
+++ b/trunk/arch/x86/kernel/kgdb.c
@@ -608,7 +608,7 @@ int kgdb_arch_init(void)
 	return register_die_notifier(&kgdb_notifier);
 }
 
-static void kgdb_hw_overflow_handler(struct perf_event *event,
+static void kgdb_hw_overflow_handler(struct perf_event *event, int nmi,
 		struct perf_sample_data *data, struct pt_regs *regs)
 {
 	struct task_struct *tsk = current;
@@ -638,7 +638,7 @@ void kgdb_arch_late(void)
 	for (i = 0; i < HBP_NUM; i++) {
 		if (breakinfo[i].pev)
 			continue;
-		breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL, NULL);
+		breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL);
 		if (IS_ERR((void * __force)breakinfo[i].pev)) {
 			printk(KERN_ERR "kgdb: Could not allocate hw"
 			       "breakpoints\nDisabling the kernel debugger\n");
diff --git a/trunk/arch/x86/kernel/ptrace.c b/trunk/arch/x86/kernel/ptrace.c
index 82528799c5de..807c2a2b80f1 100644
--- a/trunk/arch/x86/kernel/ptrace.c
+++ b/trunk/arch/x86/kernel/ptrace.c
@@ -528,7 +528,7 @@ static int genregs_set(struct task_struct *target,
 	return ret;
 }
 
-static void ptrace_triggered(struct perf_event *bp,
+static void ptrace_triggered(struct perf_event *bp, int nmi,
 			     struct perf_sample_data *data,
 			     struct pt_regs *regs)
 {
@@ -715,8 +715,7 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
 		attr.bp_type = HW_BREAKPOINT_W;
 		attr.disabled = 1;
 
-		bp = register_user_hw_breakpoint(&attr, ptrace_triggered,
-						 NULL, tsk);
+		bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk);
 
 		/*
 		 * CHECKME: the previous code returned -EIO if the addr wasn't
diff --git a/trunk/arch/x86/kernel/stacktrace.c b/trunk/arch/x86/kernel/stacktrace.c
index fdd0c6430e5a..55d9bc03f696 100644
--- a/trunk/arch/x86/kernel/stacktrace.c
+++ b/trunk/arch/x86/kernel/stacktrace.c
@@ -66,7 +66,7 @@ void save_stack_trace(struct stack_trace *trace)
 }
 EXPORT_SYMBOL_GPL(save_stack_trace);
 
-void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
+void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs)
 {
 	dump_trace(current, regs, NULL, 0, &save_stack_ops, trace);
 	if (trace->nr_entries < trace->max_entries)
diff --git a/trunk/arch/x86/mm/fault.c b/trunk/arch/x86/mm/fault.c
index 4d09df054e39..2dbf6bf4c7e5 100644
--- a/trunk/arch/x86/mm/fault.c
+++ b/trunk/arch/x86/mm/fault.c
@@ -1059,7 +1059,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	if (unlikely(error_code & PF_RSVD))
 		pgtable_bad(regs, error_code, address);
 
-	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 
 	/*
 	 * If we're in an interrupt, have no user context or are running
@@ -1161,11 +1161,11 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	if (flags & FAULT_FLAG_ALLOW_RETRY) {
 		if (fault & VM_FAULT_MAJOR) {
 			tsk->maj_flt++;
-			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 				      regs, address);
 		} else {
 			tsk->min_flt++;
-			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 				      regs, address);
 		}
 		if (fault & VM_FAULT_RETRY) {
diff --git a/trunk/arch/x86/mm/kmemcheck/error.c b/trunk/arch/x86/mm/kmemcheck/error.c
index dab41876cdd5..704a37cedddb 100644
--- a/trunk/arch/x86/mm/kmemcheck/error.c
+++ b/trunk/arch/x86/mm/kmemcheck/error.c
@@ -185,7 +185,7 @@ void kmemcheck_error_save(enum kmemcheck_shadow state,
 	e->trace.entries = e->trace_entries;
 	e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
 	e->trace.skip = 0;
-	save_stack_trace_regs(regs, &e->trace);
+	save_stack_trace_regs(&e->trace, regs);
 
 	/* Round address down to nearest 16 bytes */
 	shadow_copy = kmemcheck_shadow_lookup(address
diff --git a/trunk/arch/x86/oprofile/backtrace.c b/trunk/arch/x86/oprofile/backtrace.c
index 32f78eb46744..a5b64ab4cd6e 100644
--- a/trunk/arch/x86/oprofile/backtrace.c
+++ b/trunk/arch/x86/oprofile/backtrace.c
@@ -11,12 +11,10 @@
 #include <linux/oprofile.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
-#include <linux/compat.h>
-#include <linux/highmem.h>
-
 #include <asm/ptrace.h>
 #include <asm/uaccess.h>
 #include <asm/stacktrace.h>
+#include <linux/compat.h>
 
 static int backtrace_stack(void *data, char *name)
 {
@@ -38,53 +36,17 @@ static struct stacktrace_ops backtrace_ops = {
 	.walk_stack	= print_context_stack,
 };
 
-/* from arch/x86/kernel/cpu/perf_event.c: */
-
-/*
- * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
- */
-static unsigned long
-copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
-{
-	unsigned long offset, addr = (unsigned long)from;
-	unsigned long size, len = 0;
-	struct page *page;
-	void *map;
-	int ret;
-
-	do {
-		ret = __get_user_pages_fast(addr, 1, 0, &page);
-		if (!ret)
-			break;
-
-		offset = addr & (PAGE_SIZE - 1);
-		size = min(PAGE_SIZE - offset, n - len);
-
-		map = kmap_atomic(page);
-		memcpy(to, map+offset, size);
-		kunmap_atomic(map);
-		put_page(page);
-
-		len  += size;
-		to   += size;
-		addr += size;
-
-	} while (len < n);
-
-	return len;
-}
-
 #ifdef CONFIG_COMPAT
 static struct stack_frame_ia32 *
 dump_user_backtrace_32(struct stack_frame_ia32 *head)
 {
-	/* Also check accessibility of one struct frame_head beyond: */
 	struct stack_frame_ia32 bufhead[2];
 	struct stack_frame_ia32 *fp;
-	unsigned long bytes;
 
-	bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead));
-	if (bytes != sizeof(bufhead))
+	/* Also check accessibility of one struct frame_head beyond */
+	if (!access_ok(VERIFY_READ, head, sizeof(bufhead)))
+		return NULL;
+	if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
 		return NULL;
 
 	fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame);
@@ -125,12 +87,12 @@ x86_backtrace_32(struct pt_regs * const regs, unsigned int depth)
 
 static struct stack_frame *dump_user_backtrace(struct stack_frame *head)
 {
-	/* Also check accessibility of one struct frame_head beyond: */
 	struct stack_frame bufhead[2];
-	unsigned long bytes;
 
-	bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead));
-	if (bytes != sizeof(bufhead))
+	/* Also check accessibility of one struct stack_frame beyond */
+	if (!access_ok(VERIFY_READ, head, sizeof(bufhead)))
+		return NULL;
+	if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
 		return NULL;
 
 	oprofile_add_trace(bufhead[0].return_address);
diff --git a/trunk/arch/x86/pci/acpi.c b/trunk/arch/x86/pci/acpi.c
index 68c3c1395202..0972315c3860 100644
--- a/trunk/arch/x86/pci/acpi.c
+++ b/trunk/arch/x86/pci/acpi.c
@@ -188,7 +188,7 @@ static bool resource_contains(struct resource *res, resource_size_t point)
 	return false;
 }
 
-static void coalesce_windows(struct pci_root_info *info, unsigned long type)
+static void coalesce_windows(struct pci_root_info *info, int type)
 {
 	int i, j;
 	struct resource *res1, *res2;
diff --git a/trunk/block/blk-throttle.c b/trunk/block/blk-throttle.c
index 3689f833afdc..a62be8d0dc1b 100644
--- a/trunk/block/blk-throttle.c
+++ b/trunk/block/blk-throttle.c
@@ -927,7 +927,7 @@ static int throtl_dispatch(struct request_queue *q)
 
 	bio_list_init(&bio_list_on_stack);
 
-	throtl_log(td, "dispatch nr_queued=%d read=%u write=%u",
+	throtl_log(td, "dispatch nr_queued=%lu read=%u write=%u",
 			total_nr_queued(td), td->nr_queued[READ],
 			td->nr_queued[WRITE]);
 
@@ -1204,7 +1204,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
 	}
 
 queue_bio:
-	throtl_log_tg(td, tg, "[%c] bio. bdisp=%llu sz=%u bps=%llu"
+	throtl_log_tg(td, tg, "[%c] bio. bdisp=%u sz=%u bps=%llu"
 			" iodisp=%u iops=%u queued=%d/%d",
 			rw == READ ? 'R' : 'W',
 			tg->bytes_disp[rw], bio->bi_size, tg->bps[rw],
diff --git a/trunk/block/cfq-iosched.c b/trunk/block/cfq-iosched.c
index f3799432676d..3c7b537bf908 100644
--- a/trunk/block/cfq-iosched.c
+++ b/trunk/block/cfq-iosched.c
@@ -988,10 +988,9 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
 
 	cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
 					st->min_vdisktime);
-	cfq_log_cfqq(cfqq->cfqd, cfqq,
-		     "sl_used=%u disp=%u charge=%u iops=%u sect=%lu",
-		     used_sl, cfqq->slice_dispatch, charge,
-		     iops_mode(cfqd), cfqq->nr_sectors);
+	cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u disp=%u charge=%u iops=%u"
+			" sect=%u", used_sl, cfqq->slice_dispatch, charge,
+			iops_mode(cfqd), cfqq->nr_sectors);
 	cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl,
 					  unaccounted_sl);
 	cfq_blkiocg_set_start_empty_time(&cfqg->blkg);
@@ -2024,8 +2023,8 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
 	 */
 	if (sample_valid(cic->ttime_samples) &&
 	    (cfqq->slice_end - jiffies < cic->ttime_mean)) {
-		cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu",
-			     cic->ttime_mean);
+		cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%d",
+				cic->ttime_mean);
 		return;
 	}
 
@@ -2773,11 +2772,8 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
 	smp_wmb();
 	cic->key = cfqd_dead_key(cfqd);
 
-	if (rcu_dereference(ioc->ioc_data) == cic) {
-		spin_lock(&ioc->lock);
+	if (ioc->ioc_data == cic)
 		rcu_assign_pointer(ioc->ioc_data, NULL);
-		spin_unlock(&ioc->lock);
-	}
 
 	if (cic->cfqq[BLK_RW_ASYNC]) {
 		cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]);
diff --git a/trunk/block/genhd.c b/trunk/block/genhd.c
index 3608289c8ecd..95822ae25cfe 100644
--- a/trunk/block/genhd.c
+++ b/trunk/block/genhd.c
@@ -1371,7 +1371,6 @@ struct disk_events {
 	struct gendisk		*disk;		/* the associated disk */
 	spinlock_t		lock;
 
-	struct mutex		block_mutex;	/* protects blocking */
 	int			block;		/* event blocking depth */
 	unsigned int		pending;	/* events already sent out */
 	unsigned int		clearing;	/* events being cleared */
@@ -1415,44 +1414,22 @@ static unsigned long disk_events_poll_jiffies(struct gendisk *disk)
 	return msecs_to_jiffies(intv_msecs);
 }
 
-/**
- * disk_block_events - block and flush disk event checking
- * @disk: disk to block events for
- *
- * On return from this function, it is guaranteed that event checking
- * isn't in progress and won't happen until unblocked by
- * disk_unblock_events().  Events blocking is counted and the actual
- * unblocking happens after the matching number of unblocks are done.
- *
- * Note that this intentionally does not block event checking from
- * disk_clear_events().
- *
- * CONTEXT:
- * Might sleep.
- */
-void disk_block_events(struct gendisk *disk)
+static void __disk_block_events(struct gendisk *disk, bool sync)
 {
 	struct disk_events *ev = disk->ev;
 	unsigned long flags;
 	bool cancel;
 
-	if (!ev)
-		return;
-
-	/*
-	 * Outer mutex ensures that the first blocker completes canceling
-	 * the event work before further blockers are allowed to finish.
-	 */
-	mutex_lock(&ev->block_mutex);
-
 	spin_lock_irqsave(&ev->lock, flags);
 	cancel = !ev->block++;
 	spin_unlock_irqrestore(&ev->lock, flags);
 
-	if (cancel)
-		cancel_delayed_work_sync(&disk->ev->dwork);
-
-	mutex_unlock(&ev->block_mutex);
+	if (cancel) {
+		if (sync)
+			cancel_delayed_work_sync(&disk->ev->dwork);
+		else
+			cancel_delayed_work(&disk->ev->dwork);
+	}
 }
 
 static void __disk_unblock_events(struct gendisk *disk, bool check_now)
@@ -1483,6 +1460,27 @@ static void __disk_unblock_events(struct gendisk *disk, bool check_now)
 	spin_unlock_irqrestore(&ev->lock, flags);
 }
 
+/**
+ * disk_block_events - block and flush disk event checking
+ * @disk: disk to block events for
+ *
+ * On return from this function, it is guaranteed that event checking
+ * isn't in progress and won't happen until unblocked by
+ * disk_unblock_events().  Events blocking is counted and the actual
+ * unblocking happens after the matching number of unblocks are done.
+ *
+ * Note that this intentionally does not block event checking from
+ * disk_clear_events().
+ *
+ * CONTEXT:
+ * Might sleep.
+ */
+void disk_block_events(struct gendisk *disk)
+{
+	if (disk->ev)
+		__disk_block_events(disk, true);
+}
+
 /**
  * disk_unblock_events - unblock disk event checking
  * @disk: disk to unblock events for
@@ -1510,18 +1508,10 @@ void disk_unblock_events(struct gendisk *disk)
  */
 void disk_check_events(struct gendisk *disk)
 {
-	struct disk_events *ev = disk->ev;
-	unsigned long flags;
-
-	if (!ev)
-		return;
-
-	spin_lock_irqsave(&ev->lock, flags);
-	if (!ev->block) {
-		cancel_delayed_work(&ev->dwork);
-		queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
+	if (disk->ev) {
+		__disk_block_events(disk, false);
+		__disk_unblock_events(disk, true);
 	}
-	spin_unlock_irqrestore(&ev->lock, flags);
 }
 EXPORT_SYMBOL_GPL(disk_check_events);
 
@@ -1556,7 +1546,7 @@ unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
 	spin_unlock_irq(&ev->lock);
 
 	/* uncondtionally schedule event check and wait for it to finish */
-	disk_block_events(disk);
+	__disk_block_events(disk, true);
 	queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
 	flush_delayed_work(&ev->dwork);
 	__disk_unblock_events(disk, false);
@@ -1674,7 +1664,7 @@ static ssize_t disk_events_poll_msecs_store(struct device *dev,
 	if (intv < 0 && intv != -1)
 		return -EINVAL;
 
-	disk_block_events(disk);
+	__disk_block_events(disk, true);
 	disk->ev->poll_msecs = intv;
 	__disk_unblock_events(disk, true);
 
@@ -1760,7 +1750,6 @@ static void disk_add_events(struct gendisk *disk)
 	INIT_LIST_HEAD(&ev->node);
 	ev->disk = disk;
 	spin_lock_init(&ev->lock);
-	mutex_init(&ev->block_mutex);
 	ev->block = 1;
 	ev->poll_msecs = -1;
 	INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn);
@@ -1781,7 +1770,7 @@ static void disk_del_events(struct gendisk *disk)
 	if (!disk->ev)
 		return;
 
-	disk_block_events(disk);
+	__disk_block_events(disk, true);
 
 	mutex_lock(&disk_events_mutex);
 	list_del_init(&disk->ev->node);
diff --git a/trunk/drivers/ata/libata-core.c b/trunk/drivers/ata/libata-core.c
index 000d03ae6653..736bee5dafeb 100644
--- a/trunk/drivers/ata/libata-core.c
+++ b/trunk/drivers/ata/libata-core.c
@@ -4143,9 +4143,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
 	 * Devices which choke on SETXFER.  Applies only if both the
 	 * device and controller are SATA.
 	 */
-	{ "PIONEER DVD-RW  DVRTD08",	NULL,	ATA_HORKAGE_NOSETXFER },
-	{ "PIONEER DVD-RW  DVR-212D",	NULL,	ATA_HORKAGE_NOSETXFER },
-	{ "PIONEER DVD-RW  DVR-216D",	NULL,	ATA_HORKAGE_NOSETXFER },
+	{ "PIONEER DVD-RW  DVRTD08",	"1.00",	ATA_HORKAGE_NOSETXFER },
+	{ "PIONEER DVD-RW  DVR-212D",	"1.28", ATA_HORKAGE_NOSETXFER },
+	{ "PIONEER DVD-RW  DVR-216D",	"1.08", ATA_HORKAGE_NOSETXFER },
 
 	/* End Marker */
 	{ }
diff --git a/trunk/drivers/ata/libata-scsi.c b/trunk/drivers/ata/libata-scsi.c
index 927f968e99d9..d51f9795c064 100644
--- a/trunk/drivers/ata/libata-scsi.c
+++ b/trunk/drivers/ata/libata-scsi.c
@@ -3797,12 +3797,6 @@ EXPORT_SYMBOL_GPL(ata_sas_port_alloc);
  */
 int ata_sas_port_start(struct ata_port *ap)
 {
-	/*
-	 * the port is marked as frozen at allocation time, but if we don't
-	 * have new eh, we won't thaw it
-	 */
-	if (!ap->ops->error_handler)
-		ap->pflags &= ~ATA_PFLAG_FROZEN;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(ata_sas_port_start);
diff --git a/trunk/drivers/ata/pata_marvell.c b/trunk/drivers/ata/pata_marvell.c
index 5d7f58a7e34d..75a6a0c0094f 100644
--- a/trunk/drivers/ata/pata_marvell.c
+++ b/trunk/drivers/ata/pata_marvell.c
@@ -161,9 +161,6 @@ static const struct pci_device_id marvell_pci_tbl[] = {
 	{ PCI_DEVICE(0x11AB, 0x6121), },
 	{ PCI_DEVICE(0x11AB, 0x6123), },
 	{ PCI_DEVICE(0x11AB, 0x6145), },
-	{ PCI_DEVICE(0x1B4B, 0x91A0), },
-	{ PCI_DEVICE(0x1B4B, 0x91A4), },
-
 	{ }	/* terminate list */
 };
 
diff --git a/trunk/drivers/ata/sata_dwc_460ex.c b/trunk/drivers/ata/sata_dwc_460ex.c
index dc88a39e7db8..1c4b3aa4c7c4 100644
--- a/trunk/drivers/ata/sata_dwc_460ex.c
+++ b/trunk/drivers/ata/sata_dwc_460ex.c
@@ -389,7 +389,7 @@ static void sata_dwc_tf_dump(struct ata_taskfile *tf)
 /*
  * Function: get_burst_length_encode
  * arguments: datalength: length in bytes of data
- * returns value to be programmed in register corresponding to data length
+ * returns value to be programmed in register corrresponding to data length
  * This value is effectively the log(base 2) of the length
  */
 static  int get_burst_length_encode(int datalength)
diff --git a/trunk/drivers/base/power/clock_ops.c b/trunk/drivers/base/power/clock_ops.c
index ad367c4139b1..eaa8a854af03 100644
--- a/trunk/drivers/base/power/clock_ops.c
+++ b/trunk/drivers/base/power/clock_ops.c
@@ -387,7 +387,7 @@ static int pm_runtime_clk_notify(struct notifier_block *nb,
 	clknb = container_of(nb, struct pm_clk_notifier_block, nb);
 
 	switch (action) {
-	case BUS_NOTIFY_BIND_DRIVER:
+	case BUS_NOTIFY_ADD_DEVICE:
 		if (clknb->con_ids[0]) {
 			for (con_id = clknb->con_ids; *con_id; con_id++)
 				enable_clock(dev, *con_id);
@@ -395,7 +395,7 @@ static int pm_runtime_clk_notify(struct notifier_block *nb,
 			enable_clock(dev, NULL);
 		}
 		break;
-	case BUS_NOTIFY_UNBOUND_DRIVER:
+	case BUS_NOTIFY_DEL_DEVICE:
 		if (clknb->con_ids[0]) {
 			for (con_id = clknb->con_ids; *con_id; con_id++)
 				disable_clock(dev, *con_id);
diff --git a/trunk/drivers/base/power/main.c b/trunk/drivers/base/power/main.c
index 06f09bf89cb2..aa6320207745 100644
--- a/trunk/drivers/base/power/main.c
+++ b/trunk/drivers/base/power/main.c
@@ -57,8 +57,7 @@ static int async_error;
  */
 void device_pm_init(struct device *dev)
 {
-	dev->power.is_prepared = false;
-	dev->power.is_suspended = false;
+	dev->power.in_suspend = false;
 	init_completion(&dev->power.completion);
 	complete_all(&dev->power.completion);
 	dev->power.wakeup = NULL;
@@ -92,7 +91,7 @@ void device_pm_add(struct device *dev)
 	pr_debug("PM: Adding info for %s:%s\n",
 		 dev->bus ? dev->bus->name : "No Bus", dev_name(dev));
 	mutex_lock(&dpm_list_mtx);
-	if (dev->parent && dev->parent->power.is_prepared)
+	if (dev->parent && dev->parent->power.in_suspend)
 		dev_warn(dev, "parent %s should not be sleeping\n",
 			dev_name(dev->parent));
 	list_add_tail(&dev->power.entry, &dpm_list);
@@ -512,14 +511,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
 	dpm_wait(dev->parent, async);
 	device_lock(dev);
 
-	/*
-	 * This is a fib.  But we'll allow new children to be added below
-	 * a resumed device, even if the device hasn't been completed yet.
-	 */
-	dev->power.is_prepared = false;
-
-	if (!dev->power.is_suspended)
-		goto Unlock;
+	dev->power.in_suspend = false;
 
 	if (dev->pwr_domain) {
 		pm_dev_dbg(dev, state, "power domain ");
@@ -556,9 +548,6 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
 	}
 
  End:
-	dev->power.is_suspended = false;
-
- Unlock:
 	device_unlock(dev);
 	complete_all(&dev->power.completion);
 
@@ -681,7 +670,7 @@ void dpm_complete(pm_message_t state)
 		struct device *dev = to_device(dpm_prepared_list.prev);
 
 		get_device(dev);
-		dev->power.is_prepared = false;
+		dev->power.in_suspend = false;
 		list_move(&dev->power.entry, &list);
 		mutex_unlock(&dpm_list_mtx);
 
@@ -846,11 +835,11 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 	device_lock(dev);
 
 	if (async_error)
-		goto Unlock;
+		goto End;
 
 	if (pm_wakeup_pending()) {
 		async_error = -EBUSY;
-		goto Unlock;
+		goto End;
 	}
 
 	if (dev->pwr_domain) {
@@ -888,9 +877,6 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 	}
 
  End:
-	dev->power.is_suspended = !error;
-
- Unlock:
 	device_unlock(dev);
 	complete_all(&dev->power.completion);
 
@@ -1056,7 +1042,7 @@ int dpm_prepare(pm_message_t state)
 			put_device(dev);
 			break;
 		}
-		dev->power.is_prepared = true;
+		dev->power.in_suspend = true;
 		if (!list_empty(&dev->power.entry))
 			list_move_tail(&dev->power.entry, &dpm_prepared_list);
 		put_device(dev);
diff --git a/trunk/drivers/gpu/drm/drm_gem.c b/trunk/drivers/gpu/drm/drm_gem.c
index 4012fe423460..74e4ff578017 100644
--- a/trunk/drivers/gpu/drm/drm_gem.c
+++ b/trunk/drivers/gpu/drm/drm_gem.c
@@ -34,7 +34,6 @@
 #include <linux/module.h>
 #include <linux/mman.h>
 #include <linux/pagemap.h>
-#include <linux/shmem_fs.h>
 #include "drmP.h"
 
 /** @file drm_gem.c
diff --git a/trunk/drivers/gpu/drm/i915/i915_dma.c b/trunk/drivers/gpu/drm/i915/i915_dma.c
index 2b79588541e7..0239e9974bf2 100644
--- a/trunk/drivers/gpu/drm/i915/i915_dma.c
+++ b/trunk/drivers/gpu/drm/i915/i915_dma.c
@@ -2182,8 +2182,9 @@ int i915_driver_unload(struct drm_device *dev)
 		/* Flush any outstanding unpin_work. */
 		flush_workqueue(dev_priv->wq);
 
-		mutex_lock(&dev->struct_mutex);
 		i915_gem_free_all_phys_object(dev);
+
+		mutex_lock(&dev->struct_mutex);
 		i915_gem_cleanup_ringbuffer(dev);
 		mutex_unlock(&dev->struct_mutex);
 		if (I915_HAS_FBC(dev) && i915_powersave)
diff --git a/trunk/drivers/gpu/drm/i915/i915_gem.c b/trunk/drivers/gpu/drm/i915/i915_gem.c
index 85f713746a1f..94c84d744100 100644
--- a/trunk/drivers/gpu/drm/i915/i915_gem.c
+++ b/trunk/drivers/gpu/drm/i915/i915_gem.c
@@ -31,7 +31,6 @@
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
-#include <linux/shmem_fs.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/pci.h>
@@ -360,7 +359,8 @@ i915_gem_shmem_pread_fast(struct drm_device *dev,
 		if ((page_offset + remain) > PAGE_SIZE)
 			page_length = PAGE_SIZE - page_offset;
 
-		page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
+		page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT,
+					   GFP_HIGHUSER | __GFP_RECLAIMABLE);
 		if (IS_ERR(page))
 			return PTR_ERR(page);
 
@@ -463,7 +463,8 @@ i915_gem_shmem_pread_slow(struct drm_device *dev,
 		if ((data_page_offset + page_length) > PAGE_SIZE)
 			page_length = PAGE_SIZE - data_page_offset;
 
-		page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
+		page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT,
+					   GFP_HIGHUSER | __GFP_RECLAIMABLE);
 		if (IS_ERR(page)) {
 			ret = PTR_ERR(page);
 			goto out;
@@ -796,7 +797,8 @@ i915_gem_shmem_pwrite_fast(struct drm_device *dev,
 		if ((page_offset + remain) > PAGE_SIZE)
 			page_length = PAGE_SIZE - page_offset;
 
-		page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
+		page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT,
+					   GFP_HIGHUSER | __GFP_RECLAIMABLE);
 		if (IS_ERR(page))
 			return PTR_ERR(page);
 
@@ -905,7 +907,8 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev,
 		if ((data_page_offset + page_length) > PAGE_SIZE)
 			page_length = PAGE_SIZE - data_page_offset;
 
-		page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
+		page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT,
+					   GFP_HIGHUSER | __GFP_RECLAIMABLE);
 		if (IS_ERR(page)) {
 			ret = PTR_ERR(page);
 			goto out;
@@ -1216,12 +1219,12 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		ret = i915_gem_object_bind_to_gtt(obj, 0, true);
 		if (ret)
 			goto unlock;
-
-		ret = i915_gem_object_set_to_gtt_domain(obj, write);
-		if (ret)
-			goto unlock;
 	}
 
+	ret = i915_gem_object_set_to_gtt_domain(obj, write);
+	if (ret)
+		goto unlock;
+
 	if (obj->tiling_mode == I915_TILING_NONE)
 		ret = i915_gem_object_put_fence(obj);
 	else
@@ -1555,10 +1558,12 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
 
 	inode = obj->base.filp->f_path.dentry->d_inode;
 	mapping = inode->i_mapping;
-	gfpmask |= mapping_gfp_mask(mapping);
-
 	for (i = 0; i < page_count; i++) {
-		page = shmem_read_mapping_page_gfp(mapping, i, gfpmask);
+		page = read_cache_page_gfp(mapping, i,
+					   GFP_HIGHUSER |
+					   __GFP_COLD |
+					   __GFP_RECLAIMABLE |
+					   gfpmask);
 		if (IS_ERR(page))
 			goto err_pages;
 
@@ -1696,10 +1701,13 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj)
 	/* Our goal here is to return as much of the memory as
 	 * is possible back to the system as we are called from OOM.
 	 * To do this we must instruct the shmfs to drop all of its
-	 * backing pages, *now*.
+	 * backing pages, *now*. Here we mirror the actions taken
+	 * when by shmem_delete_inode() to release the backing store.
 	 */
 	inode = obj->base.filp->f_path.dentry->d_inode;
-	shmem_truncate_range(inode, 0, (loff_t)-1);
+	truncate_inode_pages(inode->i_mapping, 0);
+	if (inode->i_op->truncate_range)
+		inode->i_op->truncate_range(inode, 0, (loff_t)-1);
 
 	obj->madv = __I915_MADV_PURGED;
 }
@@ -2918,6 +2926,8 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
 	 */
 	wmb();
 
+	i915_gem_release_mmap(obj);
+
 	old_write_domain = obj->base.write_domain;
 	obj->base.write_domain = 0;
 
@@ -3557,7 +3567,6 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj;
-	struct address_space *mapping;
 
 	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
 	if (obj == NULL)
@@ -3568,9 +3577,6 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 		return NULL;
 	}
 
-	mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
-	mapping_set_gfp_mask(mapping, GFP_HIGHUSER | __GFP_RECLAIMABLE);
-
 	i915_gem_info_add_obj(dev_priv, size);
 
 	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
@@ -3946,7 +3952,8 @@ void i915_gem_detach_phys_object(struct drm_device *dev,
 
 	page_count = obj->base.size / PAGE_SIZE;
 	for (i = 0; i < page_count; i++) {
-		struct page *page = shmem_read_mapping_page(mapping, i);
+		struct page *page = read_cache_page_gfp(mapping, i,
+							GFP_HIGHUSER | __GFP_RECLAIMABLE);
 		if (!IS_ERR(page)) {
 			char *dst = kmap_atomic(page);
 			memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE);
@@ -4007,7 +4014,8 @@ i915_gem_attach_phys_object(struct drm_device *dev,
 		struct page *page;
 		char *dst, *src;
 
-		page = shmem_read_mapping_page(mapping, i);
+		page = read_cache_page_gfp(mapping, i,
+					   GFP_HIGHUSER | __GFP_RECLAIMABLE);
 		if (IS_ERR(page))
 			return PTR_ERR(page);
 
diff --git a/trunk/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/trunk/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 4934cf84c320..20a4cc5b818f 100644
--- a/trunk/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/trunk/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -187,6 +187,10 @@ i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
 	if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
 		i915_gem_clflush_object(obj);
 
+	/* blow away mappings if mapped through GTT */
+	if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_GTT)
+		i915_gem_release_mmap(obj);
+
 	if (obj->base.pending_write_domain)
 		cd->flips |= atomic_read(&obj->pending_flip);
 
diff --git a/trunk/drivers/gpu/drm/i915/i915_irq.c b/trunk/drivers/gpu/drm/i915/i915_irq.c
index ae2b49969b99..9e34a1abeb61 100644
--- a/trunk/drivers/gpu/drm/i915/i915_irq.c
+++ b/trunk/drivers/gpu/drm/i915/i915_irq.c
@@ -1749,7 +1749,6 @@ void ironlake_irq_preinstall(struct drm_device *dev)
 		 * happens.
 		 */
 		I915_WRITE(GEN6_BLITTER_HWSTAM, ~GEN6_BLITTER_USER_INTERRUPT);
-		I915_WRITE(GEN6_BSD_HWSTAM, ~GEN6_BSD_USER_INTERRUPT);
 	}
 
 	/* XXX hotplug from PCH */
diff --git a/trunk/drivers/gpu/drm/i915/i915_reg.h b/trunk/drivers/gpu/drm/i915/i915_reg.h
index 5d5def756c9e..2f967af8e62e 100644
--- a/trunk/drivers/gpu/drm/i915/i915_reg.h
+++ b/trunk/drivers/gpu/drm/i915/i915_reg.h
@@ -531,7 +531,6 @@
 #define   GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_ENABLE		0
 #define   GEN6_BSD_SLEEP_PSMI_CONTROL_IDLE_INDICATOR			(1 << 3)
 
-#define GEN6_BSD_HWSTAM			0x12098
 #define GEN6_BSD_IMR			0x120a8
 #define   GEN6_BSD_USER_INTERRUPT	(1 << 12)
 
diff --git a/trunk/drivers/gpu/drm/i915/i915_suspend.c b/trunk/drivers/gpu/drm/i915/i915_suspend.c
index e8152d23d5b6..60a94d2b5264 100644
--- a/trunk/drivers/gpu/drm/i915/i915_suspend.c
+++ b/trunk/drivers/gpu/drm/i915/i915_suspend.c
@@ -678,7 +678,6 @@ void i915_save_display(struct drm_device *dev)
 	}
 
 	/* VGA state */
-	mutex_lock(&dev->struct_mutex);
 	dev_priv->saveVGA0 = I915_READ(VGA0);
 	dev_priv->saveVGA1 = I915_READ(VGA1);
 	dev_priv->saveVGA_PD = I915_READ(VGA_PD);
@@ -688,7 +687,6 @@ void i915_save_display(struct drm_device *dev)
 		dev_priv->saveVGACNTRL = I915_READ(VGACNTRL);
 
 	i915_save_vga(dev);
-	mutex_unlock(&dev->struct_mutex);
 }
 
 void i915_restore_display(struct drm_device *dev)
@@ -782,8 +780,6 @@ void i915_restore_display(struct drm_device *dev)
 		I915_WRITE(CPU_VGACNTRL, dev_priv->saveVGACNTRL);
 	else
 		I915_WRITE(VGACNTRL, dev_priv->saveVGACNTRL);
-
-	mutex_lock(&dev->struct_mutex);
 	I915_WRITE(VGA0, dev_priv->saveVGA0);
 	I915_WRITE(VGA1, dev_priv->saveVGA1);
 	I915_WRITE(VGA_PD, dev_priv->saveVGA_PD);
@@ -791,7 +787,6 @@ void i915_restore_display(struct drm_device *dev)
 	udelay(150);
 
 	i915_restore_vga(dev);
-	mutex_unlock(&dev->struct_mutex);
 }
 
 int i915_save_state(struct drm_device *dev)
diff --git a/trunk/drivers/gpu/drm/i915/intel_display.c b/trunk/drivers/gpu/drm/i915/intel_display.c
index aa43e7be6053..81a9059b6a94 100644
--- a/trunk/drivers/gpu/drm/i915/intel_display.c
+++ b/trunk/drivers/gpu/drm/i915/intel_display.c
@@ -4687,7 +4687,6 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 
 	I915_WRITE(DSPCNTR(plane), dspcntr);
 	POSTING_READ(DSPCNTR(plane));
-	intel_enable_plane(dev_priv, plane, pipe);
 
 	ret = intel_pipe_set_base(crtc, x, y, old_fb);
 
@@ -5218,6 +5217,8 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 
 	I915_WRITE(DSPCNTR(plane), dspcntr);
 	POSTING_READ(DSPCNTR(plane));
+	if (!HAS_PCH_SPLIT(dev))
+		intel_enable_plane(dev_priv, plane, pipe);
 
 	ret = intel_pipe_set_base(crtc, x, y, old_fb);
 
diff --git a/trunk/drivers/gpu/drm/i915/intel_overlay.c b/trunk/drivers/gpu/drm/i915/intel_overlay.c
index 56a8e2aea19c..a670c006982e 100644
--- a/trunk/drivers/gpu/drm/i915/intel_overlay.c
+++ b/trunk/drivers/gpu/drm/i915/intel_overlay.c
@@ -1416,8 +1416,6 @@ void intel_setup_overlay(struct drm_device *dev)
 		goto out_free;
 	overlay->reg_bo = reg_bo;
 
-	mutex_lock(&dev->struct_mutex);
-
 	if (OVERLAY_NEEDS_PHYSICAL(dev)) {
 		ret = i915_gem_attach_phys_object(dev, reg_bo,
 						  I915_GEM_PHYS_OVERLAY_REGS,
@@ -1442,8 +1440,6 @@ void intel_setup_overlay(struct drm_device *dev)
                 }
 	}
 
-	mutex_unlock(&dev->struct_mutex);
-
 	/* init all values */
 	overlay->color_key = 0x0101fe;
 	overlay->brightness = -19;
@@ -1468,7 +1464,6 @@ void intel_setup_overlay(struct drm_device *dev)
 	i915_gem_object_unpin(reg_bo);
 out_free_bo:
 	drm_gem_object_unreference(&reg_bo->base);
-	mutex_unlock(&dev->struct_mutex);
 out_free:
 	kfree(overlay);
 	return;
diff --git a/trunk/drivers/gpu/drm/radeon/evergreen.c b/trunk/drivers/gpu/drm/radeon/evergreen.c
index 12d2fdc52414..445af7981637 100644
--- a/trunk/drivers/gpu/drm/radeon/evergreen.c
+++ b/trunk/drivers/gpu/drm/radeon/evergreen.c
@@ -2013,9 +2013,9 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
 		rdev->config.evergreen.tile_config |= (3 << 0);
 		break;
 	}
-	/* num banks is 8 on all fusion asics. 0 = 4, 1 = 8, 2 = 16 */
+	/* num banks is 8 on all fusion asics */
 	if (rdev->flags & RADEON_IS_IGP)
-		rdev->config.evergreen.tile_config |= 1 << 4;
+		rdev->config.evergreen.tile_config |= 8 << 4;
 	else
 		rdev->config.evergreen.tile_config |=
 			((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
diff --git a/trunk/drivers/gpu/drm/radeon/radeon.h b/trunk/drivers/gpu/drm/radeon/radeon.h
index ef0e0e016914..27f45579e64b 100644
--- a/trunk/drivers/gpu/drm/radeon/radeon.h
+++ b/trunk/drivers/gpu/drm/radeon/radeon.h
@@ -179,7 +179,6 @@ void radeon_pm_resume(struct radeon_device *rdev);
 void radeon_combios_get_power_modes(struct radeon_device *rdev);
 void radeon_atombios_get_power_modes(struct radeon_device *rdev);
 void radeon_atom_set_voltage(struct radeon_device *rdev, u16 voltage_level, u8 voltage_type);
-int radeon_atom_get_max_vddc(struct radeon_device *rdev, u16 *voltage);
 void rs690_pm_info(struct radeon_device *rdev);
 extern int rv6xx_get_temp(struct radeon_device *rdev);
 extern int rv770_get_temp(struct radeon_device *rdev);
diff --git a/trunk/drivers/gpu/drm/radeon/radeon_atombios.c b/trunk/drivers/gpu/drm/radeon/radeon_atombios.c
index bf2b61584cdb..1e725d9f767f 100644
--- a/trunk/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/trunk/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -2320,14 +2320,6 @@ static bool radeon_atombios_parse_pplib_clock_info(struct radeon_device *rdev,
 			le16_to_cpu(clock_info->r600.usVDDC);
 	}
 
-	/* patch up vddc if necessary */
-	if (rdev->pm.power_state[state_index].clock_info[mode_index].voltage.voltage == 0xff01) {
-		u16 vddc;
-
-		if (radeon_atom_get_max_vddc(rdev, &vddc) == 0)
-			rdev->pm.power_state[state_index].clock_info[mode_index].voltage.voltage = vddc;
-	}
-
 	if (rdev->flags & RADEON_IS_IGP) {
 		/* skip invalid modes */
 		if (rdev->pm.power_state[state_index].clock_info[mode_index].sclk == 0)
@@ -2638,35 +2630,7 @@ void radeon_atom_set_voltage(struct radeon_device *rdev, u16 voltage_level, u8 v
 	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
 }
 
-int radeon_atom_get_max_vddc(struct radeon_device *rdev,
-			     u16 *voltage)
-{
-	union set_voltage args;
-	int index = GetIndexIntoMasterTable(COMMAND, SetVoltage);
-	u8 frev, crev;
-
-	if (!atom_parse_cmd_header(rdev->mode_info.atom_context, index, &frev, &crev))
-		return -EINVAL;
-
-	switch (crev) {
-	case 1:
-		return -EINVAL;
-	case 2:
-		args.v2.ucVoltageType = SET_VOLTAGE_GET_MAX_VOLTAGE;
-		args.v2.ucVoltageMode = 0;
-		args.v2.usVoltageLevel = 0;
-
-		atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
-
-		*voltage = le16_to_cpu(args.v2.usVoltageLevel);
-		break;
-	default:
-		DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
-		return -EINVAL;
-	}
 
-	return 0;
-}
 
 void radeon_atom_initialize_bios_scratch_regs(struct drm_device *dev)
 {
diff --git a/trunk/drivers/gpu/drm/ttm/ttm_tt.c b/trunk/drivers/gpu/drm/ttm/ttm_tt.c
index 58c271ebc0f7..90e23e0bfadb 100644
--- a/trunk/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/trunk/drivers/gpu/drm/ttm/ttm_tt.c
@@ -31,7 +31,6 @@
 #include <linux/sched.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
-#include <linux/shmem_fs.h>
 #include <linux/file.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
@@ -485,7 +484,7 @@ static int ttm_tt_swapin(struct ttm_tt *ttm)
 	swap_space = swap_storage->f_path.dentry->d_inode->i_mapping;
 
 	for (i = 0; i < ttm->num_pages; ++i) {
-		from_page = shmem_read_mapping_page(swap_space, i);
+		from_page = read_mapping_page(swap_space, i, NULL);
 		if (IS_ERR(from_page)) {
 			ret = PTR_ERR(from_page);
 			goto out_err;
@@ -558,7 +557,7 @@ int ttm_tt_swapout(struct ttm_tt *ttm, struct file *persistent_swap_storage)
 		from_page = ttm->pages[i];
 		if (unlikely(from_page == NULL))
 			continue;
-		to_page = shmem_read_mapping_page(swap_space, i);
+		to_page = read_mapping_page(swap_space, i, NULL);
 		if (unlikely(IS_ERR(to_page))) {
 			ret = PTR_ERR(to_page);
 			goto out_err;
diff --git a/trunk/drivers/hid/hid-core.c b/trunk/drivers/hid/hid-core.c
index 6f3289a57888..f7440e8ce3e7 100644
--- a/trunk/drivers/hid/hid-core.c
+++ b/trunk/drivers/hid/hid-core.c
@@ -1423,7 +1423,6 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_SPACETRAVELLER) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_SPACENAVIGATOR) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LUMIO, USB_DEVICE_ID_CRYSTALTOUCH) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_LUMIO, USB_DEVICE_ID_CRYSTALTOUCH_DUAL) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICOLCD) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICOLCD_BOOTLOADER) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_SIDEWINDER_GV) },
diff --git a/trunk/drivers/hid/hid-ids.h b/trunk/drivers/hid/hid-ids.h
index a756ee6c7df5..aecb5a4b8d6d 100644
--- a/trunk/drivers/hid/hid-ids.h
+++ b/trunk/drivers/hid/hid-ids.h
@@ -449,7 +449,6 @@
 
 #define USB_VENDOR_ID_LUMIO		0x202e
 #define USB_DEVICE_ID_CRYSTALTOUCH	0x0006
-#define USB_DEVICE_ID_CRYSTALTOUCH_DUAL	0x0007
 
 #define USB_VENDOR_ID_MCC		0x09db
 #define USB_DEVICE_ID_MCC_PMD1024LS	0x0076
diff --git a/trunk/drivers/hid/hid-multitouch.c b/trunk/drivers/hid/hid-multitouch.c
index 62cac4dc3b62..0b2dcd0ee591 100644
--- a/trunk/drivers/hid/hid-multitouch.c
+++ b/trunk/drivers/hid/hid-multitouch.c
@@ -271,8 +271,6 @@ static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi,
 			}
 			return 1;
 		case HID_DG_CONTACTID:
-			if (!td->maxcontacts)
-				td->maxcontacts = MT_DEFAULT_MAXCONTACT;
 			input_mt_init_slots(hi->input, td->maxcontacts);
 			td->last_slot_field = usage->hid;
 			td->last_field_index = field->index;
@@ -549,6 +547,9 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id)
 	if (ret)
 		goto fail;
 
+	if (!td->maxcontacts)
+		td->maxcontacts = MT_DEFAULT_MAXCONTACT;
+
 	td->slots = kzalloc(td->maxcontacts * sizeof(struct mt_slot),
 				GFP_KERNEL);
 	if (!td->slots) {
@@ -676,9 +677,6 @@ static const struct hid_device_id mt_devices[] = {
 	{ .driver_data = MT_CLS_CONFIDENCE_MINUS_ONE,
 		HID_USB_DEVICE(USB_VENDOR_ID_LUMIO,
 			USB_DEVICE_ID_CRYSTALTOUCH) },
-	{ .driver_data = MT_CLS_CONFIDENCE_MINUS_ONE,
-		HID_USB_DEVICE(USB_VENDOR_ID_LUMIO,
-			USB_DEVICE_ID_CRYSTALTOUCH_DUAL) },
 
 	/* MosArt panels */
 	{ .driver_data = MT_CLS_CONFIDENCE_MINUS_ONE,
@@ -709,10 +707,10 @@ static const struct hid_device_id mt_devices[] = {
 		HID_USB_DEVICE(USB_VENDOR_ID_STANTUM,
 			USB_DEVICE_ID_MTP)},
 	{ .driver_data = MT_CLS_CONFIDENCE,
-		HID_USB_DEVICE(USB_VENDOR_ID_STANTUM_STM,
+		HID_USB_DEVICE(USB_VENDOR_ID_STANTUM,
 			USB_DEVICE_ID_MTP_STM)},
 	{ .driver_data = MT_CLS_CONFIDENCE,
-		HID_USB_DEVICE(USB_VENDOR_ID_STANTUM_SITRONIX,
+		HID_USB_DEVICE(USB_VENDOR_ID_STANTUM,
 			USB_DEVICE_ID_MTP_SITRONIX)},
 
 	/* Touch International panels */
diff --git a/trunk/drivers/leds/leds-lp5521.c b/trunk/drivers/leds/leds-lp5521.c
index cc1dc4817fac..c0cff64a1ae6 100644
--- a/trunk/drivers/leds/leds-lp5521.c
+++ b/trunk/drivers/leds/leds-lp5521.c
@@ -593,7 +593,7 @@ static void lp5521_unregister_sysfs(struct i2c_client *client)
 				&lp5521_led_attribute_group);
 }
 
-static int __devinit lp5521_init_led(struct lp5521_led *led,
+static int __init lp5521_init_led(struct lp5521_led *led,
 				struct i2c_client *client,
 				int chan, struct lp5521_platform_data *pdata)
 {
@@ -637,7 +637,7 @@ static int __devinit lp5521_init_led(struct lp5521_led *led,
 	return 0;
 }
 
-static int __devinit lp5521_probe(struct i2c_client *client,
+static int lp5521_probe(struct i2c_client *client,
 			const struct i2c_device_id *id)
 {
 	struct lp5521_chip		*chip;
diff --git a/trunk/drivers/leds/leds-lp5523.c b/trunk/drivers/leds/leds-lp5523.c
index 5971e309b234..e19fed25f137 100644
--- a/trunk/drivers/leds/leds-lp5523.c
+++ b/trunk/drivers/leds/leds-lp5523.c
@@ -826,7 +826,7 @@ static int __init lp5523_init_engine(struct lp5523_engine *engine, int id)
 	return 0;
 }
 
-static int __devinit lp5523_init_led(struct lp5523_led *led, struct device *dev,
+static int __init lp5523_init_led(struct lp5523_led *led, struct device *dev,
 			   int chan, struct lp5523_platform_data *pdata)
 {
 	char name[32];
@@ -872,7 +872,7 @@ static int __devinit lp5523_init_led(struct lp5523_led *led, struct device *dev,
 
 static struct i2c_driver lp5523_driver;
 
-static int __devinit lp5523_probe(struct i2c_client *client,
+static int lp5523_probe(struct i2c_client *client,
 			const struct i2c_device_id *id)
 {
 	struct lp5523_chip		*chip;
diff --git a/trunk/drivers/misc/cb710/sgbuf2.c b/trunk/drivers/misc/cb710/sgbuf2.c
index 2a40d0efdff5..d019746551f3 100644
--- a/trunk/drivers/misc/cb710/sgbuf2.c
+++ b/trunk/drivers/misc/cb710/sgbuf2.c
@@ -47,7 +47,7 @@ static uint32_t sg_dwiter_read_buffer(struct sg_mapping_iter *miter)
 
 static inline bool needs_unaligned_copy(const void *ptr)
 {
-#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+#ifdef HAVE_EFFICIENT_UNALIGNED_ACCESS
 	return false;
 #else
 	return ((ptr - NULL) & 3) != 0;
diff --git a/trunk/drivers/misc/ioc4.c b/trunk/drivers/misc/ioc4.c
index df03dd3bd0e2..668d41e594a9 100644
--- a/trunk/drivers/misc/ioc4.c
+++ b/trunk/drivers/misc/ioc4.c
@@ -270,7 +270,7 @@ ioc4_variant(struct ioc4_driver_data *idd)
 	return IOC4_VARIANT_PCI_RT;
 }
 
-static void
+static void __devinit
 ioc4_load_modules(struct work_struct *work)
 {
 	request_module("sgiioc4");
diff --git a/trunk/drivers/misc/lkdtm.c b/trunk/drivers/misc/lkdtm.c
index 150cd7061b80..81d7fa4ec0db 100644
--- a/trunk/drivers/misc/lkdtm.c
+++ b/trunk/drivers/misc/lkdtm.c
@@ -120,7 +120,6 @@ static int recur_count = REC_NUM_DEFAULT;
 static enum cname cpoint = CN_INVALID;
 static enum ctype cptype = CT_NONE;
 static int count = DEFAULT_COUNT;
-static DEFINE_SPINLOCK(count_lock);
 
 module_param(recur_count, int, 0644);
 MODULE_PARM_DESC(recur_count, " Recursion level for the stack overflow test, "\
@@ -231,14 +230,11 @@ static const char *cp_name_to_str(enum cname name)
 static int lkdtm_parse_commandline(void)
 {
 	int i;
-	unsigned long flags;
 
 	if (cpoint_count < 1 || recur_count < 1)
 		return -EINVAL;
 
-	spin_lock_irqsave(&count_lock, flags);
 	count = cpoint_count;
-	spin_unlock_irqrestore(&count_lock, flags);
 
 	/* No special parameters */
 	if (!cpoint_type && !cpoint_name)
@@ -353,9 +349,6 @@ static void lkdtm_do_action(enum ctype which)
 
 static void lkdtm_handler(void)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&count_lock, flags);
 	count--;
 	printk(KERN_INFO "lkdtm: Crash point %s of type %s hit, trigger in %d rounds\n",
 			cp_name_to_str(cpoint), cp_type_to_str(cptype), count);
@@ -364,7 +357,6 @@ static void lkdtm_handler(void)
 		lkdtm_do_action(cptype);
 		count = cpoint_count;
 	}
-	spin_unlock_irqrestore(&count_lock, flags);
 }
 
 static int lkdtm_register_cpoint(enum cname which)
diff --git a/trunk/drivers/mmc/card/block.c b/trunk/drivers/mmc/card/block.c
index f85e42224559..71da5641e258 100644
--- a/trunk/drivers/mmc/card/block.c
+++ b/trunk/drivers/mmc/card/block.c
@@ -1024,7 +1024,7 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
 	INIT_LIST_HEAD(&md->part);
 	md->usage = 1;
 
-	ret = mmc_init_queue(&md->queue, card, &md->lock, subname);
+	ret = mmc_init_queue(&md->queue, card, &md->lock);
 	if (ret)
 		goto err_putdisk;
 
@@ -1297,9 +1297,6 @@ static void mmc_blk_remove(struct mmc_card *card)
 	struct mmc_blk_data *md = mmc_get_drvdata(card);
 
 	mmc_blk_remove_parts(card, md);
-	mmc_claim_host(card->host);
-	mmc_blk_part_switch(card, md);
-	mmc_release_host(card->host);
 	mmc_blk_remove_req(md);
 	mmc_set_drvdata(card, NULL);
 }
diff --git a/trunk/drivers/mmc/card/queue.c b/trunk/drivers/mmc/card/queue.c
index 6413afa318d2..c07322c2658c 100644
--- a/trunk/drivers/mmc/card/queue.c
+++ b/trunk/drivers/mmc/card/queue.c
@@ -106,12 +106,10 @@ static void mmc_request(struct request_queue *q)
  * @mq: mmc queue
  * @card: mmc card to attach this queue
  * @lock: queue lock
- * @subname: partition subname
  *
  * Initialise a MMC card request queue.
  */
-int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
-		   spinlock_t *lock, const char *subname)
+int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock)
 {
 	struct mmc_host *host = card->host;
 	u64 limit = BLK_BOUNCE_HIGH;
@@ -135,7 +133,12 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
 		mq->queue->limits.max_discard_sectors = UINT_MAX;
 		if (card->erased_byte == 0)
 			mq->queue->limits.discard_zeroes_data = 1;
-		mq->queue->limits.discard_granularity = card->pref_erase << 9;
+		if (!mmc_can_trim(card) && is_power_of_2(card->erase_size)) {
+			mq->queue->limits.discard_granularity =
+							card->erase_size << 9;
+			mq->queue->limits.discard_alignment =
+							card->erase_size << 9;
+		}
 		if (mmc_can_secure_erase_trim(card))
 			queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD,
 						mq->queue);
@@ -206,8 +209,8 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
 
 	sema_init(&mq->thread_sem, 1);
 
-	mq->thread = kthread_run(mmc_queue_thread, mq, "mmcqd/%d%s",
-		host->index, subname ? subname : "");
+	mq->thread = kthread_run(mmc_queue_thread, mq, "mmcqd/%d",
+		host->index);
 
 	if (IS_ERR(mq->thread)) {
 		ret = PTR_ERR(mq->thread);
diff --git a/trunk/drivers/mmc/card/queue.h b/trunk/drivers/mmc/card/queue.h
index 6223ef8dc9cd..64e66e0d4994 100644
--- a/trunk/drivers/mmc/card/queue.h
+++ b/trunk/drivers/mmc/card/queue.h
@@ -19,8 +19,7 @@ struct mmc_queue {
 	unsigned int		bounce_sg_len;
 };
 
-extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *,
-			  const char *);
+extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *);
 extern void mmc_cleanup_queue(struct mmc_queue *);
 extern void mmc_queue_suspend(struct mmc_queue *);
 extern void mmc_queue_resume(struct mmc_queue *);
diff --git a/trunk/drivers/mmc/core/core.c b/trunk/drivers/mmc/core/core.c
index 7843efe22359..68091dda3f31 100644
--- a/trunk/drivers/mmc/core/core.c
+++ b/trunk/drivers/mmc/core/core.c
@@ -1245,7 +1245,7 @@ static unsigned int mmc_mmc_erase_timeout(struct mmc_card *card,
 		 */
 		timeout_clks <<= 1;
 		timeout_us += (timeout_clks * 1000) /
-			      (mmc_host_clk_rate(card->host) / 1000);
+			      (card->host->ios.clock / 1000);
 
 		erase_timeout = timeout_us / 1000;
 
diff --git a/trunk/drivers/mmc/core/sdio.c b/trunk/drivers/mmc/core/sdio.c
index 262fff019177..4d0c15bfa514 100644
--- a/trunk/drivers/mmc/core/sdio.c
+++ b/trunk/drivers/mmc/core/sdio.c
@@ -691,54 +691,15 @@ static int mmc_sdio_resume(struct mmc_host *host)
 static int mmc_sdio_power_restore(struct mmc_host *host)
 {
 	int ret;
-	u32 ocr;
 
 	BUG_ON(!host);
 	BUG_ON(!host->card);
 
 	mmc_claim_host(host);
-
-	/*
-	 * Reset the card by performing the same steps that are taken by
-	 * mmc_rescan_try_freq() and mmc_attach_sdio() during a "normal" probe.
-	 *
-	 * sdio_reset() is technically not needed. Having just powered up the
-	 * hardware, it should already be in reset state. However, some
-	 * platforms (such as SD8686 on OLPC) do not instantly cut power,
-	 * meaning that a reset is required when restoring power soon after
-	 * powering off. It is harmless in other cases.
-	 *
-	 * The CMD5 reset (mmc_send_io_op_cond()), according to the SDIO spec,
-	 * is not necessary for non-removable cards. However, it is required
-	 * for OLPC SD8686 (which expects a [CMD5,5,3,7] init sequence), and
-	 * harmless in other situations.
-	 *
-	 * With these steps taken, mmc_select_voltage() is also required to
-	 * restore the correct voltage setting of the card.
-	 */
-	sdio_reset(host);
-	mmc_go_idle(host);
-	mmc_send_if_cond(host, host->ocr_avail);
-
-	ret = mmc_send_io_op_cond(host, 0, &ocr);
-	if (ret)
-		goto out;
-
-	if (host->ocr_avail_sdio)
-		host->ocr_avail = host->ocr_avail_sdio;
-
-	host->ocr = mmc_select_voltage(host, ocr & ~0x7F);
-	if (!host->ocr) {
-		ret = -EINVAL;
-		goto out;
-	}
-
 	ret = mmc_sdio_init_card(host, host->ocr, host->card,
 				mmc_card_keep_power(host));
 	if (!ret && host->sdio_irqs)
 		mmc_signal_sdio_irq(host);
-
-out:
 	mmc_release_host(host);
 
 	return ret;
diff --git a/trunk/drivers/mmc/core/sdio_bus.c b/trunk/drivers/mmc/core/sdio_bus.c
index d2565df8a7fb..d29b9c36919a 100644
--- a/trunk/drivers/mmc/core/sdio_bus.c
+++ b/trunk/drivers/mmc/core/sdio_bus.c
@@ -189,7 +189,7 @@ static int sdio_bus_remove(struct device *dev)
 
 	/* Then undo the runtime PM settings in sdio_bus_probe() */
 	if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD)
-		pm_runtime_put_sync(dev);
+		pm_runtime_put_noidle(dev);
 
 out:
 	return ret;
diff --git a/trunk/drivers/mmc/host/of_mmc_spi.c b/trunk/drivers/mmc/host/of_mmc_spi.c
index ab66f2454dc4..e2aecb7f1d5c 100644
--- a/trunk/drivers/mmc/host/of_mmc_spi.c
+++ b/trunk/drivers/mmc/host/of_mmc_spi.c
@@ -25,11 +25,6 @@
 #include <linux/mmc/core.h>
 #include <linux/mmc/host.h>
 
-/* For archs that don't support NO_IRQ (such as mips), provide a dummy value */
-#ifndef NO_IRQ
-#define NO_IRQ 0
-#endif
-
 MODULE_LICENSE("GPL");
 
 enum {
diff --git a/trunk/drivers/mmc/host/omap_hsmmc.c b/trunk/drivers/mmc/host/omap_hsmmc.c
index dedf3dab8a3b..5b2e2155b413 100644
--- a/trunk/drivers/mmc/host/omap_hsmmc.c
+++ b/trunk/drivers/mmc/host/omap_hsmmc.c
@@ -429,6 +429,7 @@ static int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 				return -EINVAL;
 			}
 		}
+		mmc_slot(host).ocr_mask = mmc_regulator_get_ocrmask(reg);
 
 		/* Allow an aux regulator */
 		reg = regulator_get(host->dev, "vmmc_aux");
@@ -961,8 +962,7 @@ static void omap_hsmmc_dma_cleanup(struct omap_hsmmc_host *host, int errno)
 	spin_unlock(&host->irq_lock);
 
 	if (host->use_dma && dma_ch != -1) {
-		dma_unmap_sg(mmc_dev(host->mmc), host->data->sg,
-			host->data->sg_len,
+		dma_unmap_sg(mmc_dev(host->mmc), host->data->sg, host->dma_len,
 			omap_hsmmc_get_dma_dir(host, host->data));
 		omap_free_dma(dma_ch);
 	}
@@ -1346,7 +1346,7 @@ static void omap_hsmmc_dma_cb(int lch, u16 ch_status, void *cb_data)
 		return;
 	}
 
-	dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
+	dma_unmap_sg(mmc_dev(host->mmc), data->sg, host->dma_len,
 		omap_hsmmc_get_dma_dir(host, data));
 
 	req_in_progress = host->req_in_progress;
diff --git a/trunk/drivers/mmc/host/sh_mobile_sdhi.c b/trunk/drivers/mmc/host/sh_mobile_sdhi.c
index ce500f03df85..b3654293017b 100644
--- a/trunk/drivers/mmc/host/sh_mobile_sdhi.c
+++ b/trunk/drivers/mmc/host/sh_mobile_sdhi.c
@@ -92,7 +92,7 @@ static int __devinit sh_mobile_sdhi_probe(struct platform_device *pdev)
 		mmc_data->ocr_mask = p->tmio_ocr_mask;
 		mmc_data->capabilities |= p->tmio_caps;
 
-		if (p->dma_slave_tx > 0 && p->dma_slave_rx > 0) {
+		if (p->dma_slave_tx >= 0 && p->dma_slave_rx >= 0) {
 			priv->param_tx.slave_id = p->dma_slave_tx;
 			priv->param_rx.slave_id = p->dma_slave_rx;
 			priv->dma_priv.chan_priv_tx = &priv->param_tx;
@@ -165,14 +165,13 @@ static int sh_mobile_sdhi_remove(struct platform_device *pdev)
 
 	p->pdata = NULL;
 
-	tmio_mmc_host_remove(host);
-
 	for (i = 0; i < 3; i++) {
 		irq = platform_get_irq(pdev, i);
 		if (irq >= 0)
 			free_irq(irq, host);
 	}
 
+	tmio_mmc_host_remove(host);
 	clk_disable(priv->clk);
 	clk_put(priv->clk);
 	kfree(priv);
diff --git a/trunk/drivers/mmc/host/tmio_mmc_pio.c b/trunk/drivers/mmc/host/tmio_mmc_pio.c
index 0b09e8239aa0..ad6347bb02dd 100644
--- a/trunk/drivers/mmc/host/tmio_mmc_pio.c
+++ b/trunk/drivers/mmc/host/tmio_mmc_pio.c
@@ -824,8 +824,8 @@ static int tmio_mmc_get_ro(struct mmc_host *mmc)
 	struct tmio_mmc_host *host = mmc_priv(mmc);
 	struct tmio_mmc_data *pdata = host->pdata;
 
-	return !((pdata->flags & TMIO_MMC_WRPROTECT_DISABLE) ||
-		 (sd_ctrl_read32(host, CTL_STATUS) & TMIO_STAT_WRPROTECT));
+	return ((pdata->flags & TMIO_MMC_WRPROTECT_DISABLE) ||
+		!(sd_ctrl_read32(host, CTL_STATUS) & TMIO_STAT_WRPROTECT));
 }
 
 static int tmio_mmc_get_cd(struct mmc_host *mmc)
diff --git a/trunk/drivers/mmc/host/vub300.c b/trunk/drivers/mmc/host/vub300.c
index d4455ffbefd8..cbb03305b77b 100644
--- a/trunk/drivers/mmc/host/vub300.c
+++ b/trunk/drivers/mmc/host/vub300.c
@@ -2096,7 +2096,7 @@ static struct mmc_host_ops vub300_mmc_ops = {
 static int vub300_probe(struct usb_interface *interface,
 			const struct usb_device_id *id)
 {				/* NOT irq */
-	struct vub300_mmc_host *vub300;
+	struct vub300_mmc_host *vub300 = NULL;
 	struct usb_host_interface *iface_desc;
 	struct usb_device *udev = usb_get_dev(interface_to_usbdev(interface));
 	int i;
@@ -2118,20 +2118,23 @@ static int vub300_probe(struct usb_interface *interface,
 	command_out_urb = usb_alloc_urb(0, GFP_KERNEL);
 	if (!command_out_urb) {
 		retval = -ENOMEM;
-		dev_err(&udev->dev, "not enough memory for command_out_urb\n");
+		dev_err(&vub300->udev->dev,
+			"not enough memory for the command_out_urb\n");
 		goto error0;
 	}
 	command_res_urb = usb_alloc_urb(0, GFP_KERNEL);
 	if (!command_res_urb) {
 		retval = -ENOMEM;
-		dev_err(&udev->dev, "not enough memory for command_res_urb\n");
+		dev_err(&vub300->udev->dev,
+			"not enough memory for the command_res_urb\n");
 		goto error1;
 	}
 	/* this also allocates memory for our VUB300 mmc host device */
 	mmc = mmc_alloc_host(sizeof(struct vub300_mmc_host), &udev->dev);
 	if (!mmc) {
 		retval = -ENOMEM;
-		dev_err(&udev->dev, "not enough memory for the mmc_host\n");
+		dev_err(&vub300->udev->dev,
+			"not enough memory for the mmc_host\n");
 		goto error4;
 	}
 	/* MMC core transfer sizes tunable parameters */
diff --git a/trunk/drivers/oprofile/oprofile_perf.c b/trunk/drivers/oprofile/oprofile_perf.c
index 59acf9ef78a4..9046f7b2ed79 100644
--- a/trunk/drivers/oprofile/oprofile_perf.c
+++ b/trunk/drivers/oprofile/oprofile_perf.c
@@ -79,7 +79,7 @@ static int op_create_counter(int cpu, int event)
 
 	pevent = perf_event_create_kernel_counter(&counter_config[event].attr,
 						  cpu, NULL,
-						  op_overflow_handler, NULL);
+						  op_overflow_handler);
 
 	if (IS_ERR(pevent))
 		return PTR_ERR(pevent);
diff --git a/trunk/drivers/pci/pci-driver.c b/trunk/drivers/pci/pci-driver.c
index 46767c53917a..135df164a4c1 100644
--- a/trunk/drivers/pci/pci-driver.c
+++ b/trunk/drivers/pci/pci-driver.c
@@ -624,7 +624,7 @@ static int pci_pm_prepare(struct device *dev)
 	 * system from the sleep state, we'll have to prevent it from signaling
 	 * wake-up.
 	 */
-	pm_runtime_get_sync(dev);
+	pm_runtime_resume(dev);
 
 	if (drv && drv->pm && drv->pm->prepare)
 		error = drv->pm->prepare(dev);
@@ -638,8 +638,6 @@ static void pci_pm_complete(struct device *dev)
 
 	if (drv && drv->pm && drv->pm->complete)
 		drv->pm->complete(dev);
-
-	pm_runtime_put_sync(dev);
 }
 
 #else /* !CONFIG_PM_SLEEP */
diff --git a/trunk/drivers/pci/pci.c b/trunk/drivers/pci/pci.c
index 2c5b9b991279..5f10c23dff94 100644
--- a/trunk/drivers/pci/pci.c
+++ b/trunk/drivers/pci/pci.c
@@ -3284,7 +3284,7 @@ static int pci_set_vga_state_arch(struct pci_dev *dev, bool decode,
  * @dev: the PCI device
  * @decode: true = enable decoding, false = disable decoding
  * @command_bits: PCI_COMMAND_IO and/or PCI_COMMAND_MEMORY
- * @flags: traverse ancestors and change bridges
+ * @change_bridge_flags: traverse ancestors and change bridges
  * CHANGE_BRIDGE_ONLY / CHANGE_BRIDGE
  */
 int pci_set_vga_state(struct pci_dev *dev, bool decode,
diff --git a/trunk/drivers/pci/probe.c b/trunk/drivers/pci/probe.c
index bafb3c3d4a89..48849ffdd672 100644
--- a/trunk/drivers/pci/probe.c
+++ b/trunk/drivers/pci/probe.c
@@ -168,7 +168,7 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
 		res->flags |= pci_calc_resource_flags(l) | IORESOURCE_SIZEALIGN;
 		if (type == pci_bar_io) {
 			l &= PCI_BASE_ADDRESS_IO_MASK;
-			mask = PCI_BASE_ADDRESS_IO_MASK & (u32) IO_SPACE_LIMIT;
+			mask = PCI_BASE_ADDRESS_IO_MASK & IO_SPACE_LIMIT;
 		} else {
 			l &= PCI_BASE_ADDRESS_MEM_MASK;
 			mask = (u32)PCI_BASE_ADDRESS_MEM_MASK;
diff --git a/trunk/drivers/pci/quirks.c b/trunk/drivers/pci/quirks.c
index 02145e9697a9..e8a140669f90 100644
--- a/trunk/drivers/pci/quirks.c
+++ b/trunk/drivers/pci/quirks.c
@@ -2761,8 +2761,6 @@ static void ricoh_mmc_fixup_r5c832(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832);
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE823, ricoh_mmc_fixup_r5c832);
-DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE823, ricoh_mmc_fixup_r5c832);
 #endif /*CONFIG_MMC_RICOH_MMC*/
 
 #if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP)
diff --git a/trunk/drivers/rtc/rtc-ds1307.c b/trunk/drivers/rtc/rtc-ds1307.c
index b2005b44e4f7..4724ba3acf1a 100644
--- a/trunk/drivers/rtc/rtc-ds1307.c
+++ b/trunk/drivers/rtc/rtc-ds1307.c
@@ -149,7 +149,6 @@ static const struct i2c_device_id ds1307_id[] = {
 	{ "ds1340", ds_1340 },
 	{ "ds3231", ds_3231 },
 	{ "m41t00", m41t00 },
-	{ "pt7c4338", ds_1307 },
 	{ "rx8025", rx_8025 },
 	{ }
 };
diff --git a/trunk/drivers/rtc/rtc-vt8500.c b/trunk/drivers/rtc/rtc-vt8500.c
index efd6066b5cd2..b8bc862903ae 100644
--- a/trunk/drivers/rtc/rtc-vt8500.c
+++ b/trunk/drivers/rtc/rtc-vt8500.c
@@ -78,6 +78,7 @@ struct vt8500_rtc {
 	void __iomem		*regbase;
 	struct resource		*res;
 	int			irq_alarm;
+	int			irq_hz;
 	struct rtc_device	*rtc;
 	spinlock_t		lock;		/* Protects this structure */
 };
@@ -99,6 +100,10 @@ static irqreturn_t vt8500_rtc_irq(int irq, void *dev_id)
 	if (isr & 1)
 		events |= RTC_AF | RTC_IRQF;
 
+	/* Only second/minute interrupts are supported */
+	if (isr & 2)
+		events |= RTC_UF | RTC_IRQF;
+
 	rtc_update_irq(vt8500_rtc->rtc, 1, events);
 
 	return IRQ_HANDLED;
@@ -194,12 +199,27 @@ static int vt8500_alarm_irq_enable(struct device *dev, unsigned int enabled)
 	return 0;
 }
 
+static int vt8500_update_irq_enable(struct device *dev, unsigned int enabled)
+{
+	struct vt8500_rtc *vt8500_rtc = dev_get_drvdata(dev);
+	unsigned long tmp = readl(vt8500_rtc->regbase + VT8500_RTC_CR);
+
+	if (enabled)
+		tmp |= VT8500_RTC_CR_SM_SEC | VT8500_RTC_CR_SM_ENABLE;
+	else
+		tmp &= ~VT8500_RTC_CR_SM_ENABLE;
+
+	writel(tmp, vt8500_rtc->regbase + VT8500_RTC_CR);
+	return 0;
+}
+
 static const struct rtc_class_ops vt8500_rtc_ops = {
 	.read_time = vt8500_rtc_read_time,
 	.set_time = vt8500_rtc_set_time,
 	.read_alarm = vt8500_rtc_read_alarm,
 	.set_alarm = vt8500_rtc_set_alarm,
 	.alarm_irq_enable = vt8500_alarm_irq_enable,
+	.update_irq_enable = vt8500_update_irq_enable,
 };
 
 static int __devinit vt8500_rtc_probe(struct platform_device *pdev)
@@ -228,6 +248,13 @@ static int __devinit vt8500_rtc_probe(struct platform_device *pdev)
 		goto err_free;
 	}
 
+	vt8500_rtc->irq_hz = platform_get_irq(pdev, 1);
+	if (vt8500_rtc->irq_hz < 0) {
+		dev_err(&pdev->dev, "No 1Hz IRQ resource defined\n");
+		ret = -ENXIO;
+		goto err_free;
+	}
+
 	vt8500_rtc->res = request_mem_region(vt8500_rtc->res->start,
 					     resource_size(vt8500_rtc->res),
 					     "vt8500-rtc");
@@ -245,8 +272,9 @@ static int __devinit vt8500_rtc_probe(struct platform_device *pdev)
 		goto err_release;
 	}
 
-	/* Enable RTC and set it to 24-hour mode */
-	writel(VT8500_RTC_CR_ENABLE | VT8500_RTC_CR_24H,
+	/* Enable the second/minute interrupt generation and enable RTC */
+	writel(VT8500_RTC_CR_ENABLE | VT8500_RTC_CR_24H
+		| VT8500_RTC_CR_SM_ENABLE | VT8500_RTC_CR_SM_SEC,
 	       vt8500_rtc->regbase + VT8500_RTC_CR);
 
 	vt8500_rtc->rtc = rtc_device_register("vt8500-rtc", &pdev->dev,
@@ -258,16 +286,26 @@ static int __devinit vt8500_rtc_probe(struct platform_device *pdev)
 		goto err_unmap;
 	}
 
+	ret = request_irq(vt8500_rtc->irq_hz, vt8500_rtc_irq, 0,
+			  "rtc 1Hz", vt8500_rtc);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "can't get irq %i, err %d\n",
+			vt8500_rtc->irq_hz, ret);
+		goto err_unreg;
+	}
+
 	ret = request_irq(vt8500_rtc->irq_alarm, vt8500_rtc_irq, 0,
 			  "rtc alarm", vt8500_rtc);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "can't get irq %i, err %d\n",
 			vt8500_rtc->irq_alarm, ret);
-		goto err_unreg;
+		goto err_free_hz;
 	}
 
 	return 0;
 
+err_free_hz:
+	free_irq(vt8500_rtc->irq_hz, vt8500_rtc);
 err_unreg:
 	rtc_device_unregister(vt8500_rtc->rtc);
 err_unmap:
@@ -285,6 +323,7 @@ static int __devexit vt8500_rtc_remove(struct platform_device *pdev)
 	struct vt8500_rtc *vt8500_rtc = platform_get_drvdata(pdev);
 
 	free_irq(vt8500_rtc->irq_alarm, vt8500_rtc);
+	free_irq(vt8500_rtc->irq_hz, vt8500_rtc);
 
 	rtc_device_unregister(vt8500_rtc->rtc);
 
diff --git a/trunk/drivers/target/loopback/tcm_loop.c b/trunk/drivers/target/loopback/tcm_loop.c
index 70c2e7fa6664..dee2a2c909f5 100644
--- a/trunk/drivers/target/loopback/tcm_loop.c
+++ b/trunk/drivers/target/loopback/tcm_loop.c
@@ -386,7 +386,7 @@ static int tcm_loop_device_reset(struct scsi_cmnd *sc)
 	 */
 	se_cmd->se_tmr_req = core_tmr_alloc_req(se_cmd, (void *)tl_tmr,
 				TMR_LUN_RESET);
-	if (IS_ERR(se_cmd->se_tmr_req))
+	if (!se_cmd->se_tmr_req)
 		goto release;
 	/*
 	 * Locate the underlying TCM struct se_lun from sc->device->lun
@@ -1017,7 +1017,6 @@ static int tcm_loop_make_nexus(
 	struct se_portal_group *se_tpg;
 	struct tcm_loop_hba *tl_hba = tl_tpg->tl_hba;
 	struct tcm_loop_nexus *tl_nexus;
-	int ret = -ENOMEM;
 
 	if (tl_tpg->tl_hba->tl_nexus) {
 		printk(KERN_INFO "tl_tpg->tl_hba->tl_nexus already exists\n");
@@ -1034,10 +1033,8 @@ static int tcm_loop_make_nexus(
 	 * Initialize the struct se_session pointer
 	 */
 	tl_nexus->se_sess = transport_init_session();
-	if (IS_ERR(tl_nexus->se_sess)) {
-		ret = PTR_ERR(tl_nexus->se_sess);
+	if (!tl_nexus->se_sess)
 		goto out;
-	}
 	/*
 	 * Since we are running in 'demo mode' this call with generate a
 	 * struct se_node_acl for the tcm_loop struct se_portal_group with the SCSI
@@ -1063,7 +1060,7 @@ static int tcm_loop_make_nexus(
 
 out:
 	kfree(tl_nexus);
-	return ret;
+	return -ENOMEM;
 }
 
 static int tcm_loop_drop_nexus(
@@ -1143,7 +1140,7 @@ static ssize_t tcm_loop_tpg_store_nexus(
 	 * the fabric protocol_id set in tcm_loop_make_scsi_hba(), and call
 	 * tcm_loop_make_nexus()
 	 */
-	if (strlen(page) >= TL_WWN_ADDR_LEN) {
+	if (strlen(page) > TL_WWN_ADDR_LEN) {
 		printk(KERN_ERR "Emulated NAA Sas Address: %s, exceeds"
 				" max: %d\n", page, TL_WWN_ADDR_LEN);
 		return -EINVAL;
@@ -1324,7 +1321,7 @@ struct se_wwn *tcm_loop_make_scsi_hba(
 	return ERR_PTR(-EINVAL);
 
 check_len:
-	if (strlen(name) >= TL_WWN_ADDR_LEN) {
+	if (strlen(name) > TL_WWN_ADDR_LEN) {
 		printk(KERN_ERR "Emulated NAA %s Address: %s, exceeds"
 			" max: %d\n", name, tcm_loop_dump_proto_id(tl_hba),
 			TL_WWN_ADDR_LEN);
diff --git a/trunk/drivers/target/target_core_configfs.c b/trunk/drivers/target/target_core_configfs.c
index 25c1f49a7d8b..ee6fad979b50 100644
--- a/trunk/drivers/target/target_core_configfs.c
+++ b/trunk/drivers/target/target_core_configfs.c
@@ -304,7 +304,7 @@ struct target_fabric_configfs *target_fabric_configfs_init(
 		printk(KERN_ERR "Unable to locate passed fabric name\n");
 		return NULL;
 	}
-	if (strlen(name) >= TARGET_FABRIC_NAME_SIZE) {
+	if (strlen(name) > TARGET_FABRIC_NAME_SIZE) {
 		printk(KERN_ERR "Passed name: %s exceeds TARGET_FABRIC"
 			"_NAME_SIZE\n", name);
 		return NULL;
@@ -312,7 +312,7 @@ struct target_fabric_configfs *target_fabric_configfs_init(
 
 	tf = kzalloc(sizeof(struct target_fabric_configfs), GFP_KERNEL);
 	if (!(tf))
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	INIT_LIST_HEAD(&tf->tf_list);
 	atomic_set(&tf->tf_access_cnt, 0);
@@ -851,7 +851,7 @@ static ssize_t target_core_dev_wwn_store_attr_vpd_unit_serial(
 		return -EOPNOTSUPP;
 	}
 
-	if (strlen(page) >= INQUIRY_VPD_SERIAL_LEN) {
+	if ((strlen(page) + 1) > INQUIRY_VPD_SERIAL_LEN) {
 		printk(KERN_ERR "Emulated VPD Unit Serial exceeds"
 		" INQUIRY_VPD_SERIAL_LEN: %d\n", INQUIRY_VPD_SERIAL_LEN);
 		return -EOVERFLOW;
@@ -917,7 +917,7 @@ static ssize_t target_core_dev_wwn_show_attr_vpd_protocol_identifier(
 
 		transport_dump_vpd_proto_id(vpd, buf, VPD_TMP_BUF_SIZE);
 
-		if ((len + strlen(buf) >= PAGE_SIZE))
+		if ((len + strlen(buf) > PAGE_SIZE))
 			break;
 
 		len += sprintf(page+len, "%s", buf);
@@ -962,19 +962,19 @@ static ssize_t target_core_dev_wwn_show_attr_##_name(			\
 									\
 		memset(buf, 0, VPD_TMP_BUF_SIZE);			\
 		transport_dump_vpd_assoc(vpd, buf, VPD_TMP_BUF_SIZE);	\
-		if ((len + strlen(buf) >= PAGE_SIZE))			\
+		if ((len + strlen(buf) > PAGE_SIZE))			\
 			break;						\
 		len += sprintf(page+len, "%s", buf);			\
 									\
 		memset(buf, 0, VPD_TMP_BUF_SIZE);			\
 		transport_dump_vpd_ident_type(vpd, buf, VPD_TMP_BUF_SIZE); \
-		if ((len + strlen(buf) >= PAGE_SIZE))			\
+		if ((len + strlen(buf) > PAGE_SIZE))			\
 			break;						\
 		len += sprintf(page+len, "%s", buf);			\
 									\
 		memset(buf, 0, VPD_TMP_BUF_SIZE);			\
 		transport_dump_vpd_ident(vpd, buf, VPD_TMP_BUF_SIZE); \
-		if ((len + strlen(buf) >= PAGE_SIZE))			\
+		if ((len + strlen(buf) > PAGE_SIZE))			\
 			break;						\
 		len += sprintf(page+len, "%s", buf);			\
 	}								\
@@ -1299,7 +1299,7 @@ static ssize_t target_core_dev_pr_show_attr_res_pr_registered_i_pts(
 			&i_buf[0] : "", pr_reg->pr_res_key,
 			pr_reg->pr_res_generation);
 
-		if ((len + strlen(buf) >= PAGE_SIZE))
+		if ((len + strlen(buf) > PAGE_SIZE))
 			break;
 
 		len += sprintf(page+len, "%s", buf);
@@ -1496,7 +1496,7 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata(
 				ret = -ENOMEM;
 				goto out;
 			}
-			if (strlen(i_port) >= PR_APTPL_MAX_IPORT_LEN) {
+			if (strlen(i_port) > PR_APTPL_MAX_IPORT_LEN) {
 				printk(KERN_ERR "APTPL metadata initiator_node="
 					" exceeds PR_APTPL_MAX_IPORT_LEN: %d\n",
 					PR_APTPL_MAX_IPORT_LEN);
@@ -1510,7 +1510,7 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata(
 				ret = -ENOMEM;
 				goto out;
 			}
-			if (strlen(isid) >= PR_REG_ISID_LEN) {
+			if (strlen(isid) > PR_REG_ISID_LEN) {
 				printk(KERN_ERR "APTPL metadata initiator_isid"
 					"= exceeds PR_REG_ISID_LEN: %d\n",
 					PR_REG_ISID_LEN);
@@ -1571,7 +1571,7 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata(
 				ret = -ENOMEM;
 				goto out;
 			}
-			if (strlen(t_port) >= PR_APTPL_MAX_TPORT_LEN) {
+			if (strlen(t_port) > PR_APTPL_MAX_TPORT_LEN) {
 				printk(KERN_ERR "APTPL metadata target_node="
 					" exceeds PR_APTPL_MAX_TPORT_LEN: %d\n",
 					PR_APTPL_MAX_TPORT_LEN);
@@ -3052,7 +3052,7 @@ static struct config_group *target_core_call_addhbatotarget(
 	int ret;
 
 	memset(buf, 0, TARGET_CORE_NAME_MAX_LEN);
-	if (strlen(name) >= TARGET_CORE_NAME_MAX_LEN) {
+	if (strlen(name) > TARGET_CORE_NAME_MAX_LEN) {
 		printk(KERN_ERR "Passed *name strlen(): %d exceeds"
 			" TARGET_CORE_NAME_MAX_LEN: %d\n", (int)strlen(name),
 			TARGET_CORE_NAME_MAX_LEN);
diff --git a/trunk/drivers/target/target_core_device.c b/trunk/drivers/target/target_core_device.c
index ba698ea62bb2..8407f9ca2b31 100644
--- a/trunk/drivers/target/target_core_device.c
+++ b/trunk/drivers/target/target_core_device.c
@@ -192,7 +192,7 @@ int transport_get_lun_for_tmr(
 			&SE_NODE_ACL(se_sess)->device_list[unpacked_lun];
 	if (deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS) {
 		se_lun = se_cmd->se_lun = se_tmr->tmr_lun = deve->se_lun;
-		dev = se_lun->lun_se_dev;
+		dev = se_tmr->tmr_dev = se_lun->lun_se_dev;
 		se_cmd->pr_res_key = deve->pr_res_key;
 		se_cmd->orig_fe_lun = unpacked_lun;
 		se_cmd->se_orig_obj_ptr = SE_LUN(se_cmd)->lun_se_dev;
@@ -216,7 +216,6 @@ int transport_get_lun_for_tmr(
 		se_cmd->se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
 		return -1;
 	}
-	se_tmr->tmr_dev = dev;
 
 	spin_lock(&dev->se_tmr_lock);
 	list_add_tail(&se_tmr->tmr_list, &dev->dev_tmr_list);
@@ -1431,7 +1430,7 @@ struct se_lun_acl *core_dev_init_initiator_node_lun_acl(
 	struct se_lun_acl *lacl;
 	struct se_node_acl *nacl;
 
-	if (strlen(initiatorname) >= TRANSPORT_IQN_LEN) {
+	if (strlen(initiatorname) > TRANSPORT_IQN_LEN) {
 		printk(KERN_ERR "%s InitiatorName exceeds maximum size.\n",
 			TPG_TFO(tpg)->get_fabric_name());
 		*ret = -EOVERFLOW;
diff --git a/trunk/drivers/target/target_core_pr.c b/trunk/drivers/target/target_core_pr.c
index b662db3a320b..a79f518ca6e2 100644
--- a/trunk/drivers/target/target_core_pr.c
+++ b/trunk/drivers/target/target_core_pr.c
@@ -1916,7 +1916,7 @@ static int __core_scsi3_update_aptpl_buf(
 				pr_reg->pr_res_mapped_lun);
 		}
 
-		if ((len + strlen(tmp) >= pr_aptpl_buf_len)) {
+		if ((len + strlen(tmp) > pr_aptpl_buf_len)) {
 			printk(KERN_ERR "Unable to update renaming"
 				" APTPL metadata\n");
 			spin_unlock(&T10_RES(su_dev)->registration_lock);
@@ -1934,7 +1934,7 @@ static int __core_scsi3_update_aptpl_buf(
 			TPG_TFO(tpg)->tpg_get_tag(tpg),
 			lun->lun_sep->sep_rtpi, lun->unpacked_lun, reg_count);
 
-		if ((len + strlen(tmp) >= pr_aptpl_buf_len)) {
+		if ((len + strlen(tmp) > pr_aptpl_buf_len)) {
 			printk(KERN_ERR "Unable to update renaming"
 				" APTPL metadata\n");
 			spin_unlock(&T10_RES(su_dev)->registration_lock);
@@ -1986,7 +1986,7 @@ static int __core_scsi3_write_aptpl_to_file(
 	memset(iov, 0, sizeof(struct iovec));
 	memset(path, 0, 512);
 
-	if (strlen(&wwn->unit_serial[0]) >= 512) {
+	if (strlen(&wwn->unit_serial[0]) > 512) {
 		printk(KERN_ERR "WWN value for struct se_device does not fit"
 			" into path buffer\n");
 		return -1;
diff --git a/trunk/drivers/target/target_core_tmr.c b/trunk/drivers/target/target_core_tmr.c
index 179063d81cdd..59b8b9c5ad72 100644
--- a/trunk/drivers/target/target_core_tmr.c
+++ b/trunk/drivers/target/target_core_tmr.c
@@ -75,16 +75,10 @@ void core_tmr_release_req(
 {
 	struct se_device *dev = tmr->tmr_dev;
 
-	if (!dev) {
-		kmem_cache_free(se_tmr_req_cache, tmr);
-		return;
-	}
-
 	spin_lock(&dev->se_tmr_lock);
 	list_del(&tmr->tmr_list);
-	spin_unlock(&dev->se_tmr_lock);
-
 	kmem_cache_free(se_tmr_req_cache, tmr);
+	spin_unlock(&dev->se_tmr_lock);
 }
 
 static void core_tmr_handle_tas_abort(
diff --git a/trunk/drivers/target/target_core_transport.c b/trunk/drivers/target/target_core_transport.c
index 4b9b7169bdd9..4dafeb8b5638 100644
--- a/trunk/drivers/target/target_core_transport.c
+++ b/trunk/drivers/target/target_core_transport.c
@@ -536,13 +536,13 @@ EXPORT_SYMBOL(transport_register_session);
 void transport_deregister_session_configfs(struct se_session *se_sess)
 {
 	struct se_node_acl *se_nacl;
-	unsigned long flags;
+
 	/*
 	 * Used by struct se_node_acl's under ConfigFS to locate active struct se_session
 	 */
 	se_nacl = se_sess->se_node_acl;
 	if ((se_nacl)) {
-		spin_lock_irqsave(&se_nacl->nacl_sess_lock, flags);
+		spin_lock_irq(&se_nacl->nacl_sess_lock);
 		list_del(&se_sess->sess_acl_list);
 		/*
 		 * If the session list is empty, then clear the pointer.
@@ -556,7 +556,7 @@ void transport_deregister_session_configfs(struct se_session *se_sess)
 					se_nacl->acl_sess_list.prev,
 					struct se_session, sess_acl_list);
 		}
-		spin_unlock_irqrestore(&se_nacl->nacl_sess_lock, flags);
+		spin_unlock_irq(&se_nacl->nacl_sess_lock);
 	}
 }
 EXPORT_SYMBOL(transport_deregister_session_configfs);
diff --git a/trunk/drivers/target/tcm_fc/tcm_fc.h b/trunk/drivers/target/tcm_fc/tcm_fc.h
index 7b82f1b7fef8..defff32b7880 100644
--- a/trunk/drivers/target/tcm_fc/tcm_fc.h
+++ b/trunk/drivers/target/tcm_fc/tcm_fc.h
@@ -144,7 +144,7 @@ enum ft_cmd_state {
  */
 struct ft_cmd {
 	enum ft_cmd_state state;
-	u32 lun;                        /* LUN from request */
+	u16 lun;			/* LUN from request */
 	struct ft_sess *sess;		/* session held for cmd */
 	struct fc_seq *seq;		/* sequence in exchange mgr */
 	struct se_cmd se_cmd;		/* Local TCM I/O descriptor */
diff --git a/trunk/drivers/target/tcm_fc/tfc_cmd.c b/trunk/drivers/target/tcm_fc/tfc_cmd.c
index b2a106729d49..c056a1132ae1 100644
--- a/trunk/drivers/target/tcm_fc/tfc_cmd.c
+++ b/trunk/drivers/target/tcm_fc/tfc_cmd.c
@@ -94,6 +94,29 @@ void ft_dump_cmd(struct ft_cmd *cmd, const char *caller)
 		16, 4, cmd->cdb, MAX_COMMAND_SIZE, 0);
 }
 
+/*
+ * Get LUN from CDB.
+ */
+static int ft_get_lun_for_cmd(struct ft_cmd *cmd, u8 *lunp)
+{
+	u64 lun;
+
+	lun = lunp[1];
+	switch (lunp[0] >> 6) {
+	case 0:
+		break;
+	case 1:
+		lun |= (lunp[0] & 0x3f) << 8;
+		break;
+	default:
+		return -1;
+	}
+	if (lun >= TRANSPORT_MAX_LUNS_PER_TPG)
+		return -1;
+	cmd->lun = lun;
+	return transport_get_lun_for_cmd(&cmd->se_cmd, NULL, lun);
+}
+
 static void ft_queue_cmd(struct ft_sess *sess, struct ft_cmd *cmd)
 {
 	struct se_queue_obj *qobj;
@@ -395,7 +418,6 @@ static void ft_send_tm(struct ft_cmd *cmd)
 {
 	struct se_tmr_req *tmr;
 	struct fcp_cmnd *fcp;
-	struct ft_sess *sess;
 	u8 tm_func;
 
 	fcp = fc_frame_payload_get(cmd->req_frame, sizeof(*fcp));
@@ -403,6 +425,13 @@ static void ft_send_tm(struct ft_cmd *cmd)
 	switch (fcp->fc_tm_flags) {
 	case FCP_TMF_LUN_RESET:
 		tm_func = TMR_LUN_RESET;
+		if (ft_get_lun_for_cmd(cmd, fcp->fc_lun) < 0) {
+			ft_dump_cmd(cmd, __func__);
+			transport_send_check_condition_and_sense(&cmd->se_cmd,
+				cmd->se_cmd.scsi_sense_reason, 0);
+			ft_sess_put(cmd->sess);
+			return;
+		}
 		break;
 	case FCP_TMF_TGT_RESET:
 		tm_func = TMR_TARGET_WARM_RESET;
@@ -434,36 +463,6 @@ static void ft_send_tm(struct ft_cmd *cmd)
 		return;
 	}
 	cmd->se_cmd.se_tmr_req = tmr;
-
-	switch (fcp->fc_tm_flags) {
-	case FCP_TMF_LUN_RESET:
-		cmd->lun = scsilun_to_int((struct scsi_lun *)fcp->fc_lun);
-		if (transport_get_lun_for_tmr(&cmd->se_cmd, cmd->lun) < 0) {
-			/*
-			 * Make sure to clean up newly allocated TMR request
-			 * since "unable to  handle TMR request because failed
-			 * to get to LUN"
-			 */
-			FT_TM_DBG("Failed to get LUN for TMR func %d, "
-				  "se_cmd %p, unpacked_lun %d\n",
-				  tm_func, &cmd->se_cmd, cmd->lun);
-			ft_dump_cmd(cmd, __func__);
-			sess = cmd->sess;
-			transport_send_check_condition_and_sense(&cmd->se_cmd,
-				cmd->se_cmd.scsi_sense_reason, 0);
-			transport_generic_free_cmd(&cmd->se_cmd, 0, 1, 0);
-			ft_sess_put(sess);
-			return;
-		}
-		break;
-	case FCP_TMF_TGT_RESET:
-	case FCP_TMF_CLR_TASK_SET:
-	case FCP_TMF_ABT_TASK_SET:
-	case FCP_TMF_CLR_ACA:
-		break;
-	default:
-		return;
-	}
 	transport_generic_handle_tmr(&cmd->se_cmd);
 }
 
@@ -636,8 +635,7 @@ static void ft_send_cmd(struct ft_cmd *cmd)
 
 	fc_seq_exch(cmd->seq)->lp->tt.seq_set_resp(cmd->seq, ft_recv_seq, cmd);
 
-	cmd->lun = scsilun_to_int((struct scsi_lun *)fcp->fc_lun);
-	ret = transport_get_lun_for_cmd(&cmd->se_cmd, NULL, cmd->lun);
+	ret = ft_get_lun_for_cmd(cmd, fcp->fc_lun);
 	if (ret < 0) {
 		ft_dump_cmd(cmd, __func__);
 		transport_send_check_condition_and_sense(&cmd->se_cmd,
diff --git a/trunk/drivers/target/tcm_fc/tfc_io.c b/trunk/drivers/target/tcm_fc/tfc_io.c
index 8c4a24077d9d..4c3c0efbe13f 100644
--- a/trunk/drivers/target/tcm_fc/tfc_io.c
+++ b/trunk/drivers/target/tcm_fc/tfc_io.c
@@ -203,7 +203,7 @@ int ft_queue_data_in(struct se_cmd *se_cmd)
 			/* XXX For now, initiator will retry */
 			if (printk_ratelimit())
 				printk(KERN_ERR "%s: Failed to send frame %p, "
-						"xid <0x%x>, remaining %zu, "
+						"xid <0x%x>, remaining <0x%x>, "
 						"lso_max <0x%x>\n",
 						__func__, fp, ep->xid,
 						remaining, lport->lso_max);
diff --git a/trunk/drivers/target/tcm_fc/tfc_sess.c b/trunk/drivers/target/tcm_fc/tfc_sess.c
index 7491e21cc6ae..a3bd57f2ea32 100644
--- a/trunk/drivers/target/tcm_fc/tfc_sess.c
+++ b/trunk/drivers/target/tcm_fc/tfc_sess.c
@@ -229,7 +229,7 @@ static struct ft_sess *ft_sess_create(struct ft_tport *tport, u32 port_id,
 		return NULL;
 
 	sess->se_sess = transport_init_session();
-	if (IS_ERR(sess->se_sess)) {
+	if (!sess->se_sess) {
 		kfree(sess);
 		return NULL;
 	}
@@ -332,7 +332,7 @@ void ft_sess_close(struct se_session *se_sess)
 	lport = sess->tport->lport;
 	port_id = sess->port_id;
 	if (port_id == -1) {
-		mutex_unlock(&ft_lport_lock);
+		mutex_lock(&ft_lport_lock);
 		return;
 	}
 	FT_SESS_DBG("port_id %x\n", port_id);
diff --git a/trunk/drivers/tty/serial/8250_pci.c b/trunk/drivers/tty/serial/8250_pci.c
index 78e98a5cef96..4b4968a294b2 100644
--- a/trunk/drivers/tty/serial/8250_pci.c
+++ b/trunk/drivers/tty/serial/8250_pci.c
@@ -973,7 +973,7 @@ ce4100_serial_setup(struct serial_private *priv,
 
 static int
 pci_omegapci_setup(struct serial_private *priv,
-		      const struct pciserial_board *board,
+		      struct pciserial_board *board,
 		      struct uart_port *port, int idx)
 {
 	return setup_port(priv, port, 2, idx * 8, 0);
diff --git a/trunk/drivers/tty/serial/mrst_max3110.c b/trunk/drivers/tty/serial/mrst_max3110.c
index a764bf99743b..1bd28450ca40 100644
--- a/trunk/drivers/tty/serial/mrst_max3110.c
+++ b/trunk/drivers/tty/serial/mrst_max3110.c
@@ -421,6 +421,7 @@ static int max3110_main_thread(void *_max)
 	int ret = 0;
 	struct circ_buf *xmit = &max->con_xmit;
 
+	init_waitqueue_head(wq);
 	pr_info(PR_FMT "start main thread\n");
 
 	do {
@@ -822,7 +823,7 @@ static int __devinit serial_m3110_probe(struct spi_device *spi)
 	res = RC_TAG;
 	ret = max3110_write_then_read(max, (u8 *)&res, (u8 *)&res, 2, 0);
 	if (ret < 0 || res == 0 || res == 0xffff) {
-		dev_dbg(&spi->dev, "MAX3111 deemed not present (conf reg %04x)",
+		printk(KERN_ERR "MAX3111 deemed not present (conf reg %04x)",
 									res);
 		ret = -ENODEV;
 		goto err_get_page;
@@ -837,8 +838,6 @@ static int __devinit serial_m3110_probe(struct spi_device *spi)
 	max->con_xmit.head = 0;
 	max->con_xmit.tail = 0;
 
-	init_waitqueue_head(&max->wq);
-
 	max->main_thread = kthread_run(max3110_main_thread,
 					max, "max3110_main");
 	if (IS_ERR(max->main_thread)) {
diff --git a/trunk/drivers/usb/core/driver.c b/trunk/drivers/usb/core/driver.c
index aa3cc465a601..e35a17687c05 100644
--- a/trunk/drivers/usb/core/driver.c
+++ b/trunk/drivers/usb/core/driver.c
@@ -375,7 +375,7 @@ static int usb_unbind_interface(struct device *dev)
 		 * Just re-enable it without affecting the endpoint toggles.
 		 */
 		usb_enable_interface(udev, intf, false);
-	} else if (!error && !intf->dev.power.is_prepared) {
+	} else if (!error && !intf->dev.power.in_suspend) {
 		r = usb_set_interface(udev, intf->altsetting[0].
 				desc.bInterfaceNumber, 0);
 		if (r < 0)
@@ -960,7 +960,7 @@ void usb_rebind_intf(struct usb_interface *intf)
 	}
 
 	/* Try to rebind the interface */
-	if (!intf->dev.power.is_prepared) {
+	if (!intf->dev.power.in_suspend) {
 		intf->needs_binding = 0;
 		rc = device_attach(&intf->dev);
 		if (rc < 0)
@@ -1107,7 +1107,7 @@ static int usb_resume_interface(struct usb_device *udev,
 	if (intf->condition == USB_INTERFACE_UNBOUND) {
 
 		/* Carry out a deferred switch to altsetting 0 */
-		if (intf->needs_altsetting0 && !intf->dev.power.is_prepared) {
+		if (intf->needs_altsetting0 && !intf->dev.power.in_suspend) {
 			usb_set_interface(udev, intf->altsetting[0].
 					desc.bInterfaceNumber, 0);
 			intf->needs_altsetting0 = 0;
diff --git a/trunk/fs/block_dev.c b/trunk/fs/block_dev.c
index 610e8e0b04b8..1a2421f908f0 100644
--- a/trunk/fs/block_dev.c
+++ b/trunk/fs/block_dev.c
@@ -762,19 +762,7 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
 	if (!disk)
 		return ERR_PTR(-ENXIO);
 
-	/*
-	 * Normally, @bdev should equal what's returned from bdget_disk()
-	 * if partno is 0; however, some drivers (floppy) use multiple
-	 * bdev's for the same physical device and @bdev may be one of the
-	 * aliases.  Keep @bdev if partno is 0.  This means claimer
-	 * tracking is broken for those devices but it has always been that
-	 * way.
-	 */
-	if (partno)
-		whole = bdget_disk(disk, 0);
-	else
-		whole = bdgrab(bdev);
-
+	whole = bdget_disk(disk, 0);
 	module_put(disk->fops->owner);
 	put_disk(disk);
 	if (!whole)
diff --git a/trunk/fs/btrfs/ctree.h b/trunk/fs/btrfs/ctree.h
index f30ac05dbda7..300628795fdb 100644
--- a/trunk/fs/btrfs/ctree.h
+++ b/trunk/fs/btrfs/ctree.h
@@ -19,6 +19,7 @@
 #ifndef __BTRFS_CTREE__
 #define __BTRFS_CTREE__
 
+#include <linux/version.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
 #include <linux/fs.h>
diff --git a/trunk/fs/btrfs/delayed-inode.c b/trunk/fs/btrfs/delayed-inode.c
index 98c68e658a9b..f1cbd028f7b3 100644
--- a/trunk/fs/btrfs/delayed-inode.c
+++ b/trunk/fs/btrfs/delayed-inode.c
@@ -82,16 +82,19 @@ static inline struct btrfs_delayed_root *btrfs_get_delayed_root(
 	return root->fs_info->delayed_root;
 }
 
-static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
+static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
+							struct inode *inode)
 {
+	struct btrfs_delayed_node *node;
 	struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
 	struct btrfs_root *root = btrfs_inode->root;
 	u64 ino = btrfs_ino(inode);
-	struct btrfs_delayed_node *node;
+	int ret;
 
+again:
 	node = ACCESS_ONCE(btrfs_inode->delayed_node);
 	if (node) {
-		atomic_inc(&node->refs);
+		atomic_inc(&node->refs);	/* can be accessed */
 		return node;
 	}
 
@@ -99,10 +102,8 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
 	node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
 	if (node) {
 		if (btrfs_inode->delayed_node) {
-			atomic_inc(&node->refs);	/* can be accessed */
-			BUG_ON(btrfs_inode->delayed_node != node);
 			spin_unlock(&root->inode_lock);
-			return node;
+			goto again;
 		}
 		btrfs_inode->delayed_node = node;
 		atomic_inc(&node->refs);	/* can be accessed */
@@ -112,23 +113,6 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
 	}
 	spin_unlock(&root->inode_lock);
 
-	return NULL;
-}
-
-static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
-							struct inode *inode)
-{
-	struct btrfs_delayed_node *node;
-	struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
-	struct btrfs_root *root = btrfs_inode->root;
-	u64 ino = btrfs_ino(inode);
-	int ret;
-
-again:
-	node = btrfs_get_delayed_node(inode);
-	if (node)
-		return node;
-
 	node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS);
 	if (!node)
 		return ERR_PTR(-ENOMEM);
@@ -564,6 +548,19 @@ struct btrfs_delayed_item *__btrfs_next_delayed_item(
 	return next;
 }
 
+static inline struct btrfs_delayed_node *btrfs_get_delayed_node(
+							struct inode *inode)
+{
+	struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
+	struct btrfs_delayed_node *delayed_node;
+
+	delayed_node = btrfs_inode->delayed_node;
+	if (delayed_node)
+		atomic_inc(&delayed_node->refs);
+
+	return delayed_node;
+}
+
 static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root,
 						   u64 root_id)
 {
@@ -1407,7 +1404,8 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
 
 int btrfs_inode_delayed_dir_index_count(struct inode *inode)
 {
-	struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
+	struct btrfs_delayed_node *delayed_node = BTRFS_I(inode)->delayed_node;
+	int ret = 0;
 
 	if (!delayed_node)
 		return -ENOENT;
@@ -1417,14 +1415,11 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode)
 	 * a new directory index is added into the delayed node and index_cnt
 	 * is updated now. So we needn't lock the delayed node.
 	 */
-	if (!delayed_node->index_cnt) {
-		btrfs_release_delayed_node(delayed_node);
+	if (!delayed_node->index_cnt)
 		return -EINVAL;
-	}
 
 	BTRFS_I(inode)->index_cnt = delayed_node->index_cnt;
-	btrfs_release_delayed_node(delayed_node);
-	return 0;
+	return ret;
 }
 
 void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
@@ -1618,57 +1613,6 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
 				      inode->i_ctime.tv_nsec);
 }
 
-int btrfs_fill_inode(struct inode *inode, u32 *rdev)
-{
-	struct btrfs_delayed_node *delayed_node;
-	struct btrfs_inode_item *inode_item;
-	struct btrfs_timespec *tspec;
-
-	delayed_node = btrfs_get_delayed_node(inode);
-	if (!delayed_node)
-		return -ENOENT;
-
-	mutex_lock(&delayed_node->mutex);
-	if (!delayed_node->inode_dirty) {
-		mutex_unlock(&delayed_node->mutex);
-		btrfs_release_delayed_node(delayed_node);
-		return -ENOENT;
-	}
-
-	inode_item = &delayed_node->inode_item;
-
-	inode->i_uid = btrfs_stack_inode_uid(inode_item);
-	inode->i_gid = btrfs_stack_inode_gid(inode_item);
-	btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
-	inode->i_mode = btrfs_stack_inode_mode(inode_item);
-	inode->i_nlink = btrfs_stack_inode_nlink(inode_item);
-	inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
-	BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
-	BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item);
-	inode->i_rdev = 0;
-	*rdev = btrfs_stack_inode_rdev(inode_item);
-	BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
-
-	tspec = btrfs_inode_atime(inode_item);
-	inode->i_atime.tv_sec = btrfs_stack_timespec_sec(tspec);
-	inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
-
-	tspec = btrfs_inode_mtime(inode_item);
-	inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(tspec);
-	inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
-
-	tspec = btrfs_inode_ctime(inode_item);
-	inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(tspec);
-	inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
-
-	inode->i_generation = BTRFS_I(inode)->generation;
-	BTRFS_I(inode)->index_cnt = (u64)-1;
-
-	mutex_unlock(&delayed_node->mutex);
-	btrfs_release_delayed_node(delayed_node);
-	return 0;
-}
-
 int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *root, struct inode *inode)
 {
diff --git a/trunk/fs/btrfs/delayed-inode.h b/trunk/fs/btrfs/delayed-inode.h
index 8d27af4bd8b9..d1a6a2915c66 100644
--- a/trunk/fs/btrfs/delayed-inode.h
+++ b/trunk/fs/btrfs/delayed-inode.h
@@ -119,7 +119,6 @@ void btrfs_kill_delayed_inode_items(struct inode *inode);
 
 int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *root, struct inode *inode);
-int btrfs_fill_inode(struct inode *inode, u32 *rdev);
 
 /* Used for drop dead root */
 void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
diff --git a/trunk/fs/btrfs/extent-tree.c b/trunk/fs/btrfs/extent-tree.c
index 71cd456fdb60..1f61bf5b4960 100644
--- a/trunk/fs/btrfs/extent-tree.c
+++ b/trunk/fs/btrfs/extent-tree.c
@@ -4842,7 +4842,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
 				     u64 num_bytes, u64 empty_size,
 				     u64 search_start, u64 search_end,
 				     u64 hint_byte, struct btrfs_key *ins,
-				     u64 data)
+				     int data)
 {
 	int ret = 0;
 	struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -4869,7 +4869,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
 
 	space_info = __find_space_info(root->fs_info, data);
 	if (!space_info) {
-		printk(KERN_ERR "No space info for %llu\n", data);
+		printk(KERN_ERR "No space info for %d\n", data);
 		return -ENOSPC;
 	}
 
diff --git a/trunk/fs/btrfs/free-space-cache.c b/trunk/fs/btrfs/free-space-cache.c
index bf0d61567f3d..9f985a429877 100644
--- a/trunk/fs/btrfs/free-space-cache.c
+++ b/trunk/fs/btrfs/free-space-cache.c
@@ -1893,12 +1893,9 @@ void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
 
 	while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
 		info = rb_entry(node, struct btrfs_free_space, offset_index);
-		if (!info->bitmap) {
-			unlink_free_space(ctl, info);
-			kmem_cache_free(btrfs_free_space_cachep, info);
-		} else {
-			free_bitmap(ctl, info);
-		}
+		unlink_free_space(ctl, info);
+		kfree(info->bitmap);
+		kmem_cache_free(btrfs_free_space_cachep, info);
 		if (need_resched()) {
 			spin_unlock(&ctl->tree_lock);
 			cond_resched();
diff --git a/trunk/fs/btrfs/inode.c b/trunk/fs/btrfs/inode.c
index d340f63d8f07..0a9b10c5b0a7 100644
--- a/trunk/fs/btrfs/inode.c
+++ b/trunk/fs/btrfs/inode.c
@@ -2509,11 +2509,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
 	int maybe_acls;
 	u32 rdev;
 	int ret;
-	bool filled = false;
-
-	ret = btrfs_fill_inode(inode, &rdev);
-	if (!ret)
-		filled = true;
 
 	path = btrfs_alloc_path();
 	BUG_ON(!path);
@@ -2525,10 +2520,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
 		goto make_bad;
 
 	leaf = path->nodes[0];
-
-	if (filled)
-		goto cache_acl;
-
 	inode_item = btrfs_item_ptr(leaf, path->slots[0],
 				    struct btrfs_inode_item);
 	if (!leaf->map_token)
@@ -2565,7 +2556,7 @@ static void btrfs_read_locked_inode(struct inode *inode)
 
 	BTRFS_I(inode)->index_cnt = (u64)-1;
 	BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
-cache_acl:
+
 	/*
 	 * try to precache a NULL acl entry for files that don't have
 	 * any xattrs or acls
@@ -2581,6 +2572,7 @@ static void btrfs_read_locked_inode(struct inode *inode)
 	}
 
 	btrfs_free_path(path);
+	inode_item = NULL;
 
 	switch (inode->i_mode & S_IFMT) {
 	case S_IFREG:
@@ -4528,7 +4520,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 	inode_tree_add(inode);
 
 	trace_btrfs_inode_new(inode);
-	btrfs_set_inode_last_trans(trans, inode);
 
 	return inode;
 fail:
diff --git a/trunk/fs/cifs/Kconfig b/trunk/fs/cifs/Kconfig
index f66cc1625150..53ed1ad2c112 100644
--- a/trunk/fs/cifs/Kconfig
+++ b/trunk/fs/cifs/Kconfig
@@ -156,6 +156,6 @@ config CIFS_ACL
 
 config CIFS_NFSD_EXPORT
 	  bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)"
-	  depends on CIFS && EXPERIMENTAL && BROKEN
+	  depends on CIFS && EXPERIMENTAL
 	  help
 	   Allows NFS server to export a CIFS mounted share (nfsd over cifs)
diff --git a/trunk/fs/cifs/cifs_fs_sb.h b/trunk/fs/cifs/cifs_fs_sb.h
index 7260e11e21f8..ffb1459dc6ec 100644
--- a/trunk/fs/cifs/cifs_fs_sb.h
+++ b/trunk/fs/cifs/cifs_fs_sb.h
@@ -42,7 +42,6 @@
 #define CIFS_MOUNT_MULTIUSER	0x20000 /* multiuser mount */
 #define CIFS_MOUNT_STRICT_IO	0x40000 /* strict cache mode */
 #define CIFS_MOUNT_RWPIDFORWARD	0x80000 /* use pid forwarding for rw */
-#define CIFS_MOUNT_POSIXACL	0x100000 /* mirror of MS_POSIXACL in mnt_cifs_flags */
 
 struct cifs_sb_info {
 	struct rb_root tlink_tree;
diff --git a/trunk/fs/cifs/cifsfs.c b/trunk/fs/cifs/cifsfs.c
index 35f9154615fa..2f0c58646c10 100644
--- a/trunk/fs/cifs/cifsfs.c
+++ b/trunk/fs/cifs/cifsfs.c
@@ -104,7 +104,8 @@ cifs_sb_deactive(struct super_block *sb)
 }
 
 static int
-cifs_read_super(struct super_block *sb)
+cifs_read_super(struct super_block *sb, struct smb_vol *volume_info,
+		const char *devname, int silent)
 {
 	struct inode *inode;
 	struct cifs_sb_info *cifs_sb;
@@ -112,16 +113,22 @@ cifs_read_super(struct super_block *sb)
 
 	cifs_sb = CIFS_SB(sb);
 
-	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIXACL)
-		sb->s_flags |= MS_POSIXACL;
+	spin_lock_init(&cifs_sb->tlink_tree_lock);
+	cifs_sb->tlink_tree = RB_ROOT;
 
-	if (cifs_sb_master_tcon(cifs_sb)->ses->capabilities & CAP_LARGE_FILES)
-		sb->s_maxbytes = MAX_LFS_FILESIZE;
-	else
-		sb->s_maxbytes = MAX_NON_LFS;
+	rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY);
+	if (rc)
+		return rc;
+
+	cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages;
 
-	/* BB FIXME fix time_gran to be larger for LANMAN sessions */
-	sb->s_time_gran = 100;
+	rc = cifs_mount(sb, cifs_sb, volume_info, devname);
+
+	if (rc) {
+		if (!silent)
+			cERROR(1, "cifs_mount failed w/return code = %d", rc);
+		goto out_mount_failed;
+	}
 
 	sb->s_magic = CIFS_MAGIC_NUMBER;
 	sb->s_op = &cifs_super_ops;
@@ -163,14 +170,37 @@ cifs_read_super(struct super_block *sb)
 	if (inode)
 		iput(inode);
 
+	cifs_umount(sb, cifs_sb);
+
+out_mount_failed:
+	bdi_destroy(&cifs_sb->bdi);
 	return rc;
 }
 
-static void cifs_kill_sb(struct super_block *sb)
+static void
+cifs_put_super(struct super_block *sb)
 {
-	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
-	kill_anon_super(sb);
-	cifs_umount(cifs_sb);
+	int rc = 0;
+	struct cifs_sb_info *cifs_sb;
+
+	cFYI(1, "In cifs_put_super");
+	cifs_sb = CIFS_SB(sb);
+	if (cifs_sb == NULL) {
+		cFYI(1, "Empty cifs superblock info passed to unmount");
+		return;
+	}
+
+	rc = cifs_umount(sb, cifs_sb);
+	if (rc)
+		cERROR(1, "cifs_umount failed with return code %d", rc);
+	if (cifs_sb->mountdata) {
+		kfree(cifs_sb->mountdata);
+		cifs_sb->mountdata = NULL;
+	}
+
+	unload_nls(cifs_sb->local_nls);
+	bdi_destroy(&cifs_sb->bdi);
+	kfree(cifs_sb);
 }
 
 static int
@@ -518,6 +548,7 @@ static int cifs_drop_inode(struct inode *inode)
 }
 
 static const struct super_operations cifs_super_ops = {
+	.put_super = cifs_put_super,
 	.statfs = cifs_statfs,
 	.alloc_inode = cifs_alloc_inode,
 	.destroy_inode = cifs_destroy_inode,
@@ -554,7 +585,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
 	full_path = cifs_build_path_to_root(vol, cifs_sb,
 					    cifs_sb_master_tcon(cifs_sb));
 	if (full_path == NULL)
-		return ERR_PTR(-ENOMEM);
+		return NULL;
 
 	cFYI(1, "Get root dentry for %s", full_path);
 
@@ -583,7 +614,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
 			dchild = d_alloc(dparent, &name);
 			if (dchild == NULL) {
 				dput(dparent);
-				dparent = ERR_PTR(-ENOMEM);
+				dparent = NULL;
 				goto out;
 			}
 		}
@@ -601,7 +632,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
 			if (rc) {
 				dput(dchild);
 				dput(dparent);
-				dparent = ERR_PTR(rc);
+				dparent = NULL;
 				goto out;
 			}
 			alias = d_materialise_unique(dchild, inode);
@@ -609,7 +640,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
 				dput(dchild);
 				if (IS_ERR(alias)) {
 					dput(dparent);
-					dparent = ERR_PTR(-EINVAL); /* XXX */
+					dparent = NULL;
 					goto out;
 				}
 				dchild = alias;
@@ -629,13 +660,6 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
 	return dparent;
 }
 
-static int cifs_set_super(struct super_block *sb, void *data)
-{
-	struct cifs_mnt_data *mnt_data = data;
-	sb->s_fs_info = mnt_data->cifs_sb;
-	return set_anon_super(sb, NULL);
-}
-
 static struct dentry *
 cifs_do_mount(struct file_system_type *fs_type,
 	      int flags, const char *dev_name, void *data)
@@ -656,73 +680,75 @@ cifs_do_mount(struct file_system_type *fs_type,
 	cifs_sb = kzalloc(sizeof(struct cifs_sb_info), GFP_KERNEL);
 	if (cifs_sb == NULL) {
 		root = ERR_PTR(-ENOMEM);
-		goto out_nls;
-	}
-
-	cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL);
-	if (cifs_sb->mountdata == NULL) {
-		root = ERR_PTR(-ENOMEM);
-		goto out_cifs_sb;
+		goto out;
 	}
 
 	cifs_setup_cifs_sb(volume_info, cifs_sb);
 
-	rc = cifs_mount(cifs_sb, volume_info);
-	if (rc) {
-		if (!(flags & MS_SILENT))
-			cERROR(1, "cifs_mount failed w/return code = %d", rc);
-		root = ERR_PTR(rc);
-		goto out_mountdata;
-	}
-
 	mnt_data.vol = volume_info;
 	mnt_data.cifs_sb = cifs_sb;
 	mnt_data.flags = flags;
 
-	sb = sget(fs_type, cifs_match_super, cifs_set_super, &mnt_data);
+	sb = sget(fs_type, cifs_match_super, set_anon_super, &mnt_data);
 	if (IS_ERR(sb)) {
 		root = ERR_CAST(sb);
-		cifs_umount(cifs_sb);
-		goto out;
+		goto out_cifs_sb;
 	}
 
-	if (sb->s_root) {
+	if (sb->s_fs_info) {
 		cFYI(1, "Use existing superblock");
-		cifs_umount(cifs_sb);
-	} else {
-		sb->s_flags = flags;
-		/* BB should we make this contingent on mount parm? */
-		sb->s_flags |= MS_NODIRATIME | MS_NOATIME;
-
-		rc = cifs_read_super(sb);
-		if (rc) {
-			root = ERR_PTR(rc);
-			goto out_super;
-		}
+		goto out_shared;
+	}
+
+	/*
+	 * Copy mount params for use in submounts. Better to do
+	 * the copy here and deal with the error before cleanup gets
+	 * complicated post-mount.
+	 */
+	cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL);
+	if (cifs_sb->mountdata == NULL) {
+		root = ERR_PTR(-ENOMEM);
+		goto out_super;
+	}
+
+	sb->s_flags = flags;
+	/* BB should we make this contingent on mount parm? */
+	sb->s_flags |= MS_NODIRATIME | MS_NOATIME;
+	sb->s_fs_info = cifs_sb;
 
-		sb->s_flags |= MS_ACTIVE;
+	rc = cifs_read_super(sb, volume_info, dev_name,
+			     flags & MS_SILENT ? 1 : 0);
+	if (rc) {
+		root = ERR_PTR(rc);
+		goto out_super;
 	}
 
+	sb->s_flags |= MS_ACTIVE;
+
 	root = cifs_get_root(volume_info, sb);
-	if (IS_ERR(root))
+	if (root == NULL)
 		goto out_super;
 
 	cFYI(1, "dentry root is: %p", root);
 	goto out;
 
+out_shared:
+	root = cifs_get_root(volume_info, sb);
+	if (root)
+		cFYI(1, "dentry root is: %p", root);
+	goto out;
+
 out_super:
+	kfree(cifs_sb->mountdata);
 	deactivate_locked_super(sb);
-out:
-	cifs_cleanup_volume_info(&volume_info);
-	return root;
 
-out_mountdata:
-	kfree(cifs_sb->mountdata);
 out_cifs_sb:
+	unload_nls(cifs_sb->local_nls);
 	kfree(cifs_sb);
-out_nls:
-	unload_nls(volume_info->local_nls);
-	goto out;
+
+out:
+	cifs_cleanup_volume_info(&volume_info);
+	return root;
 }
 
 static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
@@ -811,7 +837,7 @@ struct file_system_type cifs_fs_type = {
 	.owner = THIS_MODULE,
 	.name = "cifs",
 	.mount = cifs_do_mount,
-	.kill_sb = cifs_kill_sb,
+	.kill_sb = kill_anon_super,
 	/*  .fs_flags */
 };
 const struct inode_operations cifs_dir_inode_ops = {
diff --git a/trunk/fs/cifs/cifsproto.h b/trunk/fs/cifs/cifsproto.h
index 257f312ede42..953f84413c77 100644
--- a/trunk/fs/cifs/cifsproto.h
+++ b/trunk/fs/cifs/cifsproto.h
@@ -157,8 +157,9 @@ extern int cifs_match_super(struct super_block *, void *);
 extern void cifs_cleanup_volume_info(struct smb_vol **pvolume_info);
 extern int cifs_setup_volume_info(struct smb_vol **pvolume_info,
 				  char *mount_data, const char *devname);
-extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *);
-extern void cifs_umount(struct cifs_sb_info *);
+extern int cifs_mount(struct super_block *, struct cifs_sb_info *,
+		      struct smb_vol *, const char *);
+extern int cifs_umount(struct super_block *, struct cifs_sb_info *);
 extern void cifs_dfs_release_automount_timer(void);
 void cifs_proc_init(void);
 void cifs_proc_clean(void);
@@ -217,8 +218,7 @@ extern int get_dfs_path(int xid, struct cifs_ses *pSesInfo,
 			struct dfs_info3_param **preferrals,
 			int remap);
 extern void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon,
-				 struct cifs_sb_info *cifs_sb,
-				 struct smb_vol *vol);
+				 struct super_block *sb, struct smb_vol *vol);
 extern int CIFSSMBQFSInfo(const int xid, struct cifs_tcon *tcon,
 			struct kstatfs *FSData);
 extern int SMBOldQFSInfo(const int xid, struct cifs_tcon *tcon,
diff --git a/trunk/fs/cifs/connect.c b/trunk/fs/cifs/connect.c
index 7f540df52527..12cf72dd0c42 100644
--- a/trunk/fs/cifs/connect.c
+++ b/trunk/fs/cifs/connect.c
@@ -2546,7 +2546,7 @@ ip_connect(struct TCP_Server_Info *server)
 }
 
 void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon,
-			  struct cifs_sb_info *cifs_sb, struct smb_vol *vol_info)
+			  struct super_block *sb, struct smb_vol *vol_info)
 {
 	/* if we are reconnecting then should we check to see if
 	 * any requested capabilities changed locally e.g. via
@@ -2600,23 +2600,22 @@ void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon,
 			cap &= ~CIFS_UNIX_POSIX_ACL_CAP;
 		else if (CIFS_UNIX_POSIX_ACL_CAP & cap) {
 			cFYI(1, "negotiated posix acl support");
-			if (cifs_sb)
-				cifs_sb->mnt_cifs_flags |=
-					CIFS_MOUNT_POSIXACL;
+			if (sb)
+				sb->s_flags |= MS_POSIXACL;
 		}
 
 		if (vol_info && vol_info->posix_paths == 0)
 			cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP;
 		else if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) {
 			cFYI(1, "negotiate posix pathnames");
-			if (cifs_sb)
-				cifs_sb->mnt_cifs_flags |=
+			if (sb)
+				CIFS_SB(sb)->mnt_cifs_flags |=
 					CIFS_MOUNT_POSIX_PATHS;
 		}
 
-		if (cifs_sb && (cifs_sb->rsize > 127 * 1024)) {
+		if (sb && (CIFS_SB(sb)->rsize > 127 * 1024)) {
 			if ((cap & CIFS_UNIX_LARGE_READ_CAP) == 0) {
-				cifs_sb->rsize = 127 * 1024;
+				CIFS_SB(sb)->rsize = 127 * 1024;
 				cFYI(DBG2, "larger reads not supported by srv");
 			}
 		}
@@ -2663,9 +2662,6 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
 {
 	INIT_DELAYED_WORK(&cifs_sb->prune_tlinks, cifs_prune_tlinks);
 
-	spin_lock_init(&cifs_sb->tlink_tree_lock);
-	cifs_sb->tlink_tree = RB_ROOT;
-
 	if (pvolume_info->rsize > CIFSMaxBufSize) {
 		cERROR(1, "rsize %d too large, using MaxBufSize",
 			pvolume_info->rsize);
@@ -2754,21 +2750,21 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
 
 /*
  * When the server supports very large writes via POSIX extensions, we can
- * allow up to 2^24-1, minus the size of a WRITE_AND_X header, not including
- * the RFC1001 length.
+ * allow up to 2^24 - PAGE_CACHE_SIZE.
  *
  * Note that this might make for "interesting" allocation problems during
- * writeback however as we have to allocate an array of pointers for the
- * pages. A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096.
+ * writeback however (as we have to allocate an array of pointers for the
+ * pages). A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096.
  */
-#define CIFS_MAX_WSIZE ((1<<24) - 1 - sizeof(WRITE_REQ) + 4)
+#define CIFS_MAX_WSIZE ((1<<24) - PAGE_CACHE_SIZE)
 
 /*
- * When the server doesn't allow large posix writes, only allow a wsize of
- * 128k minus the size of the WRITE_AND_X header. That allows for a write up
- * to the maximum size described by RFC1002.
+ * When the server doesn't allow large posix writes, default to a wsize of
+ * 128k - PAGE_CACHE_SIZE -- one page less than the largest frame size
+ * described in RFC1001. This allows space for the header without going over
+ * that by default.
  */
-#define CIFS_MAX_RFC1002_WSIZE (128 * 1024 - sizeof(WRITE_REQ) + 4)
+#define CIFS_MAX_RFC1001_WSIZE (128 * 1024 - PAGE_CACHE_SIZE)
 
 /*
  * The default wsize is 1M. find_get_pages seems to return a maximum of 256
@@ -2787,18 +2783,11 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
 
 	/* can server support 24-bit write sizes? (via UNIX extensions) */
 	if (!tcon->unix_ext || !(unix_cap & CIFS_UNIX_LARGE_WRITE_CAP))
-		wsize = min_t(unsigned int, wsize, CIFS_MAX_RFC1002_WSIZE);
+		wsize = min_t(unsigned int, wsize, CIFS_MAX_RFC1001_WSIZE);
 
-	/*
-	 * no CAP_LARGE_WRITE_X or is signing enabled without CAP_UNIX set?
-	 * Limit it to max buffer offered by the server, minus the size of the
-	 * WRITEX header, not including the 4 byte RFC1001 length.
-	 */
-	if (!(server->capabilities & CAP_LARGE_WRITE_X) ||
-	    (!(server->capabilities & CAP_UNIX) &&
-	     (server->sec_mode & (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED))))
-		wsize = min_t(unsigned int, wsize,
-				server->maxBuf - sizeof(WRITE_REQ) + 4);
+	/* no CAP_LARGE_WRITE_X? Limit it to 16 bits */
+	if (!(server->capabilities & CAP_LARGE_WRITE_X))
+		wsize = min_t(unsigned int, wsize, USHRT_MAX);
 
 	/* hard limit of CIFS_MAX_WSIZE */
 	wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE);
@@ -2948,11 +2937,7 @@ int cifs_setup_volume_info(struct smb_vol **pvolume_info, char *mount_data,
 
 	if (volume_info->nullauth) {
 		cFYI(1, "null user");
-		volume_info->username = kzalloc(1, GFP_KERNEL);
-		if (volume_info->username == NULL) {
-			rc = -ENOMEM;
-			goto out;
-		}
+		volume_info->username = "";
 	} else if (volume_info->username) {
 		/* BB fixme parse for domain name here */
 		cFYI(1, "Username: %s", volume_info->username);
@@ -2986,7 +2971,8 @@ int cifs_setup_volume_info(struct smb_vol **pvolume_info, char *mount_data,
 }
 
 int
-cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info)
+cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
+	   struct smb_vol *volume_info, const char *devname)
 {
 	int rc = 0;
 	int xid;
@@ -2997,13 +2983,6 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info)
 	struct tcon_link *tlink;
 #ifdef CONFIG_CIFS_DFS_UPCALL
 	int referral_walks_count = 0;
-
-	rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY);
-	if (rc)
-		return rc;
-
-	cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages;
-
 try_mount_again:
 	/* cleanup activities if we're chasing a referral */
 	if (referral_walks_count) {
@@ -3028,7 +3007,6 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info)
 	srvTcp = cifs_get_tcp_session(volume_info);
 	if (IS_ERR(srvTcp)) {
 		rc = PTR_ERR(srvTcp);
-		bdi_destroy(&cifs_sb->bdi);
 		goto out;
 	}
 
@@ -3040,6 +3018,14 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info)
 		goto mount_fail_check;
 	}
 
+	if (pSesInfo->capabilities & CAP_LARGE_FILES)
+		sb->s_maxbytes = MAX_LFS_FILESIZE;
+	else
+		sb->s_maxbytes = MAX_NON_LFS;
+
+	/* BB FIXME fix time_gran to be larger for LANMAN sessions */
+	sb->s_time_gran = 100;
+
 	/* search for existing tcon to this server share */
 	tcon = cifs_get_tcon(pSesInfo, volume_info);
 	if (IS_ERR(tcon)) {
@@ -3052,7 +3038,7 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info)
 	if (tcon->ses->capabilities & CAP_UNIX) {
 		/* reset of caps checks mount to see if unix extensions
 		   disabled for just this mount */
-		reset_cifs_unix_caps(xid, tcon, cifs_sb, volume_info);
+		reset_cifs_unix_caps(xid, tcon, sb, volume_info);
 		if ((tcon->ses->server->tcpStatus == CifsNeedReconnect) &&
 		    (le64_to_cpu(tcon->fsUnixInfo.Capability) &
 		     CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)) {
@@ -3175,7 +3161,6 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info)
 			cifs_put_smb_ses(pSesInfo);
 		else
 			cifs_put_tcp_session(srvTcp);
-		bdi_destroy(&cifs_sb->bdi);
 		goto out;
 	}
 
@@ -3350,8 +3335,8 @@ CIFSTCon(unsigned int xid, struct cifs_ses *ses,
 	return rc;
 }
 
-void
-cifs_umount(struct cifs_sb_info *cifs_sb)
+int
+cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
 {
 	struct rb_root *root = &cifs_sb->tlink_tree;
 	struct rb_node *node;
@@ -3372,10 +3357,7 @@ cifs_umount(struct cifs_sb_info *cifs_sb)
 	}
 	spin_unlock(&cifs_sb->tlink_tree_lock);
 
-	bdi_destroy(&cifs_sb->bdi);
-	kfree(cifs_sb->mountdata);
-	unload_nls(cifs_sb->local_nls);
-	kfree(cifs_sb);
+	return 0;
 }
 
 int cifs_negotiate_protocol(unsigned int xid, struct cifs_ses *ses)
diff --git a/trunk/fs/cifs/smbencrypt.c b/trunk/fs/cifs/smbencrypt.c
index 1c5b770c3141..1525d5e662b6 100644
--- a/trunk/fs/cifs/smbencrypt.c
+++ b/trunk/fs/cifs/smbencrypt.c
@@ -90,10 +90,12 @@ smbhash(unsigned char *out, const unsigned char *in, unsigned char *key)
 	sg_init_one(&sgout, out, 8);
 
 	rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, 8);
-	if (rc)
+	if (rc) {
 		cERROR(1, "could not encrypt crypt key rc: %d\n", rc);
+		crypto_free_blkcipher(tfm_des);
+		goto smbhash_err;
+	}
 
-	crypto_free_blkcipher(tfm_des);
 smbhash_err:
 	return rc;
 }
diff --git a/trunk/fs/inode.c b/trunk/fs/inode.c
index 43566d17d1b8..0f7e88a7803f 100644
--- a/trunk/fs/inode.c
+++ b/trunk/fs/inode.c
@@ -423,14 +423,7 @@ EXPORT_SYMBOL(remove_inode_hash);
 void end_writeback(struct inode *inode)
 {
 	might_sleep();
-	/*
-	 * We have to cycle tree_lock here because reclaim can be still in the
-	 * process of removing the last page (in __delete_from_page_cache())
-	 * and we must not free mapping under it.
-	 */
-	spin_lock_irq(&inode->i_data.tree_lock);
 	BUG_ON(inode->i_data.nrpages);
-	spin_unlock_irq(&inode->i_data.tree_lock);
 	BUG_ON(!list_empty(&inode->i_data.private_list));
 	BUG_ON(!(inode->i_state & I_FREEING));
 	BUG_ON(inode->i_state & I_CLEAR);
diff --git a/trunk/fs/jfs/file.c b/trunk/fs/jfs/file.c
index 2f3f531f3606..c5ce6c1d1ff4 100644
--- a/trunk/fs/jfs/file.c
+++ b/trunk/fs/jfs/file.c
@@ -66,9 +66,9 @@ static int jfs_open(struct inode *inode, struct file *file)
 		struct jfs_inode_info *ji = JFS_IP(inode);
 		spin_lock_irq(&ji->ag_lock);
 		if (ji->active_ag == -1) {
-			struct jfs_sb_info *jfs_sb = JFS_SBI(inode->i_sb);
-			ji->active_ag = BLKTOAG(addressPXD(&ji->ixpxd), jfs_sb);
-			atomic_inc( &jfs_sb->bmap->db_active[ji->active_ag]);
+			ji->active_ag = ji->agno;
+			atomic_inc(
+			    &JFS_SBI(inode->i_sb)->bmap->db_active[ji->agno]);
 		}
 		spin_unlock_irq(&ji->ag_lock);
 	}
diff --git a/trunk/fs/jfs/jfs_imap.c b/trunk/fs/jfs/jfs_imap.c
index b78b2f978f04..ed53a4740168 100644
--- a/trunk/fs/jfs/jfs_imap.c
+++ b/trunk/fs/jfs/jfs_imap.c
@@ -397,7 +397,7 @@ int diRead(struct inode *ip)
 	release_metapage(mp);
 
 	/* set the ag for the inode */
-	JFS_IP(ip)->agstart = agstart;
+	JFS_IP(ip)->agno = BLKTOAG(agstart, sbi);
 	JFS_IP(ip)->active_ag = -1;
 
 	return (rc);
@@ -901,7 +901,7 @@ int diFree(struct inode *ip)
 
 	/* get the allocation group for this ino.
 	 */
-	agno = BLKTOAG(JFS_IP(ip)->agstart, JFS_SBI(ip->i_sb));
+	agno = JFS_IP(ip)->agno;
 
 	/* Lock the AG specific inode map information
 	 */
@@ -1315,11 +1315,12 @@ int diFree(struct inode *ip)
 static inline void
 diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp)
 {
+	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
 	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 
 	ip->i_ino = (iagno << L2INOSPERIAG) + ino;
 	jfs_ip->ixpxd = iagp->inoext[extno];
-	jfs_ip->agstart = le64_to_cpu(iagp->agstart);
+	jfs_ip->agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi);
 	jfs_ip->active_ag = -1;
 }
 
@@ -1378,7 +1379,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
 	 */
 
 	/* get the ag number of this iag */
-	agno = BLKTOAG(JFS_IP(pip)->agstart, JFS_SBI(pip->i_sb));
+	agno = JFS_IP(pip)->agno;
 
 	if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) {
 		/*
@@ -2920,9 +2921,10 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
 			continue;
 		}
 
+		/* agstart that computes to the same ag is treated as same; */
 		agstart = le64_to_cpu(iagp->agstart);
+		/* iagp->agstart = agstart & ~(mp->db_agsize - 1); */
 		n = agstart >> mp->db_agl2size;
-		iagp->agstart = cpu_to_le64((s64)n << mp->db_agl2size);
 
 		/* compute backed inodes */
 		numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts))
diff --git a/trunk/fs/jfs/jfs_incore.h b/trunk/fs/jfs/jfs_incore.h
index 584a4a1a6e81..1439f119ec83 100644
--- a/trunk/fs/jfs/jfs_incore.h
+++ b/trunk/fs/jfs/jfs_incore.h
@@ -50,9 +50,8 @@ struct jfs_inode_info {
 	short	btindex;	/* btpage entry index*/
 	struct inode *ipimap;	/* inode map			*/
 	unsigned long cflag;	/* commit flags		*/
-	u64	agstart;	/* agstart of the containing IAG */
 	u16	bxflag;		/* xflag of pseudo buffer?	*/
-	unchar	pad;
+	unchar	agno;		/* ag number			*/
 	signed char active_ag;	/* ag currently allocating from	*/
 	lid_t	blid;		/* lid of pseudo buffer?	*/
 	lid_t	atlhead;	/* anonymous tlock list head	*/
diff --git a/trunk/fs/jfs/resize.c b/trunk/fs/jfs/resize.c
index 8d0c1c7c0820..8ea5efb5a34e 100644
--- a/trunk/fs/jfs/resize.c
+++ b/trunk/fs/jfs/resize.c
@@ -80,7 +80,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
 	int log_formatted = 0;
 	struct inode *iplist[1];
 	struct jfs_superblock *j_sb, *j_sb2;
-	s64 old_agsize;
+	uint old_agsize;
 	int agsizechanged = 0;
 	struct buffer_head *bh, *bh2;
 
diff --git a/trunk/fs/lockd/clntproc.c b/trunk/fs/lockd/clntproc.c
index e374050a911c..adb45ec9038c 100644
--- a/trunk/fs/lockd/clntproc.c
+++ b/trunk/fs/lockd/clntproc.c
@@ -708,13 +708,7 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
 
 	if (task->tk_status < 0) {
 		dprintk("lockd: unlock failed (err = %d)\n", -task->tk_status);
-		switch (task->tk_status) {
-		case -EACCES:
-		case -EIO:
-			goto die;
-		default:
-			goto retry_rebind;
-		}
+		goto retry_rebind;
 	}
 	if (status == NLM_LCK_DENIED_GRACE_PERIOD) {
 		rpc_delay(task, NLMCLNT_GRACE_WAIT);
diff --git a/trunk/fs/nfs/inode.c b/trunk/fs/nfs/inode.c
index 6f4850deb272..144f2a3c7185 100644
--- a/trunk/fs/nfs/inode.c
+++ b/trunk/fs/nfs/inode.c
@@ -256,8 +256,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 
 	nfs_attr_check_mountpoint(sb, fattr);
 
-	if (((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0) &&
-	    !nfs_attr_use_mounted_on_fileid(fattr))
+	if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0 && (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT) == 0)
 		goto out_no_inode;
 	if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0)
 		goto out_no_inode;
@@ -1295,8 +1294,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		if (new_isize != cur_isize) {
 			/* Do we perhaps have any outstanding writes, or has
 			 * the file grown beyond our last write? */
-			if ((nfsi->npages == 0 && !test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) ||
-			     new_isize > cur_isize) {
+			if (nfsi->npages == 0 || new_isize > cur_isize) {
 				i_size_write(inode, new_isize);
 				invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
 			}
diff --git a/trunk/fs/nfs/internal.h b/trunk/fs/nfs/internal.h
index 2a55347a2daa..b9056cbe68d6 100644
--- a/trunk/fs/nfs/internal.h
+++ b/trunk/fs/nfs/internal.h
@@ -45,17 +45,6 @@ static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct
 		fattr->valid |= NFS_ATTR_FATTR_MOUNTPOINT;
 }
 
-static inline int nfs_attr_use_mounted_on_fileid(struct nfs_fattr *fattr)
-{
-	if (((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) == 0) ||
-	    (((fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT) == 0) &&
-	     ((fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) == 0)))
-		return 0;
-
-	fattr->fileid = fattr->mounted_on_fileid;
-	return 1;
-}
-
 struct nfs_clone_mount {
 	const struct super_block *sb;
 	const struct dentry *dentry;
diff --git a/trunk/fs/nfs/nfs4filelayout.c b/trunk/fs/nfs/nfs4filelayout.c
index 0bafcc91c27f..426908809c97 100644
--- a/trunk/fs/nfs/nfs4filelayout.c
+++ b/trunk/fs/nfs/nfs4filelayout.c
@@ -30,7 +30,6 @@
  */
 
 #include <linux/nfs_fs.h>
-#include <linux/nfs_page.h>
 
 #include "internal.h"
 #include "nfs4filelayout.h"
@@ -553,18 +552,13 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
 		__func__, nfl_util, fl->num_fh, fl->first_stripe_index,
 		fl->pattern_offset);
 
-	/* Note that a zero value for num_fh is legal for STRIPE_SPARSE.
-	 * Futher checking is done in filelayout_check_layout */
-	if (fl->num_fh < 0 || fl->num_fh >
-	    max(NFS4_PNFS_MAX_STRIPE_CNT, NFS4_PNFS_MAX_MULTI_CNT))
+	if (!fl->num_fh)
 		goto out_err;
 
-	if (fl->num_fh > 0) {
-		fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
-				       gfp_flags);
-		if (!fl->fh_array)
-			goto out_err;
-	}
+	fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
+			       gfp_flags);
+	if (!fl->fh_array)
+		goto out_err;
 
 	for (i = 0; i < fl->num_fh; i++) {
 		/* Do we want to use a mempool here? */
@@ -667,9 +661,8 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 	u64 p_stripe, r_stripe;
 	u32 stripe_unit;
 
-	if (!pnfs_generic_pg_test(pgio, prev, req) ||
-	    !nfs_generic_pg_test(pgio, prev, req))
-		return false;
+	if (!pnfs_generic_pg_test(pgio, prev, req))
+		return 0;
 
 	if (!pgio->pg_lseg)
 		return 1;
diff --git a/trunk/fs/nfs/nfs4proc.c b/trunk/fs/nfs/nfs4proc.c
index 5879b23e0c99..d2c4b59c896d 100644
--- a/trunk/fs/nfs/nfs4proc.c
+++ b/trunk/fs/nfs/nfs4proc.c
@@ -2265,14 +2265,12 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
 	return nfs4_map_errors(status);
 }
 
-static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
 /*
  * Get locations and (maybe) other attributes of a referral.
  * Note that we'll actually follow the referral later when
  * we detect fsid mismatch in inode revalidation
  */
-static int nfs4_get_referral(struct inode *dir, const struct qstr *name,
-			     struct nfs_fattr *fattr, struct nfs_fh *fhandle)
+static int nfs4_get_referral(struct inode *dir, const struct qstr *name, struct nfs_fattr *fattr, struct nfs_fh *fhandle)
 {
 	int status = -ENOMEM;
 	struct page *page = NULL;
@@ -2290,16 +2288,15 @@ static int nfs4_get_referral(struct inode *dir, const struct qstr *name,
 		goto out;
 	/* Make sure server returned a different fsid for the referral */
 	if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &locations->fattr.fsid)) {
-		dprintk("%s: server did not return a different fsid for"
-			" a referral at %s\n", __func__, name->name);
+		dprintk("%s: server did not return a different fsid for a referral at %s\n", __func__, name->name);
 		status = -EIO;
 		goto out;
 	}
-	/* Fixup attributes for the nfs_lookup() call to nfs_fhget() */
-	nfs_fixup_referral_attributes(&locations->fattr);
 
-	/* replace the lookup nfs_fattr with the locations nfs_fattr */
 	memcpy(fattr, &locations->fattr, sizeof(struct nfs_fattr));
+	fattr->valid |= NFS_ATTR_FATTR_V4_REFERRAL;
+	if (!fattr->mode)
+		fattr->mode = S_IFDIR;
 	memset(fhandle, 0, sizeof(struct nfs_fh));
 out:
 	if (page)
@@ -4670,15 +4667,11 @@ static size_t nfs4_xattr_list_nfs4_acl(struct dentry *dentry, char *list,
 	return len;
 }
 
-/*
- * nfs_fhget will use either the mounted_on_fileid or the fileid
- */
 static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr)
 {
-	if (!(((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) ||
-	       (fattr->valid & NFS_ATTR_FATTR_FILEID)) &&
-	      (fattr->valid & NFS_ATTR_FATTR_FSID) &&
-	      (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)))
+	if (!((fattr->valid & NFS_ATTR_FATTR_FILEID) &&
+		(fattr->valid & NFS_ATTR_FATTR_FSID) &&
+		(fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)))
 		return;
 
 	fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE |
@@ -4693,6 +4686,7 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
 	struct nfs_server *server = NFS_SERVER(dir);
 	u32 bitmask[2] = {
 		[0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS,
+		[1] = FATTR4_WORD1_MOUNTED_ON_FILEID,
 	};
 	struct nfs4_fs_locations_arg args = {
 		.dir_fh = NFS_FH(dir),
@@ -4711,18 +4705,11 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
 	int status;
 
 	dprintk("%s: start\n", __func__);
-
-	/* Ask for the fileid of the absent filesystem if mounted_on_fileid
-	 * is not supported */
-	if (NFS_SERVER(dir)->attr_bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
-		bitmask[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID;
-	else
-		bitmask[0] |= FATTR4_WORD0_FILEID;
-
 	nfs_fattr_init(&fs_locations->fattr);
 	fs_locations->server = server;
 	fs_locations->nlocations = 0;
 	status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
+	nfs_fixup_referral_attributes(&fs_locations->fattr);
 	dprintk("%s: returned status = %d\n", __func__, status);
 	return status;
 }
@@ -5111,6 +5098,7 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
 	if (mxresp_sz == 0)
 		mxresp_sz = NFS_MAX_FILE_IO_SIZE;
 	/* Fore channel attributes */
+	args->fc_attrs.headerpadsz = 0;
 	args->fc_attrs.max_rqst_sz = mxrqst_sz;
 	args->fc_attrs.max_resp_sz = mxresp_sz;
 	args->fc_attrs.max_ops = NFS4_MAX_OPS;
@@ -5123,6 +5111,7 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
 		args->fc_attrs.max_ops, args->fc_attrs.max_reqs);
 
 	/* Back channel attributes */
+	args->bc_attrs.headerpadsz = 0;
 	args->bc_attrs.max_rqst_sz = PAGE_SIZE;
 	args->bc_attrs.max_resp_sz = PAGE_SIZE;
 	args->bc_attrs.max_resp_sz_cached = 0;
@@ -5142,6 +5131,8 @@ static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args
 	struct nfs4_channel_attrs *sent = &args->fc_attrs;
 	struct nfs4_channel_attrs *rcvd = &session->fc_attrs;
 
+	if (rcvd->headerpadsz > sent->headerpadsz)
+		return -EINVAL;
 	if (rcvd->max_resp_sz > sent->max_resp_sz)
 		return -EINVAL;
 	/*
@@ -5706,7 +5697,6 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_layoutreturn *lrp = calldata;
 	struct nfs_server *server;
-	struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
 
 	dprintk("--> %s\n", __func__);
 
@@ -5718,15 +5708,16 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
 		nfs_restart_rpc(task, lrp->clp);
 		return;
 	}
-	spin_lock(&lo->plh_inode->i_lock);
 	if (task->tk_status == 0) {
+		struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
+
 		if (lrp->res.lrs_present) {
+			spin_lock(&lo->plh_inode->i_lock);
 			pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
+			spin_unlock(&lo->plh_inode->i_lock);
 		} else
 			BUG_ON(!list_empty(&lo->plh_segs));
 	}
-	lo->plh_block_lgets--;
-	spin_unlock(&lo->plh_inode->i_lock);
 	dprintk("<-- %s\n", __func__);
 }
 
diff --git a/trunk/fs/nfs/nfs4xdr.c b/trunk/fs/nfs/nfs4xdr.c
index 6870bc61ceec..d869a5e5464b 100644
--- a/trunk/fs/nfs/nfs4xdr.c
+++ b/trunk/fs/nfs/nfs4xdr.c
@@ -255,7 +255,7 @@ static int nfs4_stat_to_errno(int);
 #define decode_fs_locations_maxsz \
 				(0)
 #define encode_secinfo_maxsz	(op_encode_hdr_maxsz + nfs4_name_maxsz)
-#define decode_secinfo_maxsz	(op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
+#define decode_secinfo_maxsz	(op_decode_hdr_maxsz + 4 + (NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)))
 
 #if defined(CONFIG_NFS_V4_1)
 #define NFS4_MAX_MACHINE_NAME_LEN (64)
@@ -1725,7 +1725,7 @@ static void encode_create_session(struct xdr_stream *xdr,
 	*p++ = cpu_to_be32(args->flags);			/*flags */
 
 	/* Fore Channel */
-	*p++ = cpu_to_be32(0);				/* header padding size */
+	*p++ = cpu_to_be32(args->fc_attrs.headerpadsz);	/* header padding size */
 	*p++ = cpu_to_be32(args->fc_attrs.max_rqst_sz);	/* max req size */
 	*p++ = cpu_to_be32(args->fc_attrs.max_resp_sz);	/* max resp size */
 	*p++ = cpu_to_be32(max_resp_sz_cached);		/* Max resp sz cached */
@@ -1734,7 +1734,7 @@ static void encode_create_session(struct xdr_stream *xdr,
 	*p++ = cpu_to_be32(0);				/* rdmachannel_attrs */
 
 	/* Back Channel */
-	*p++ = cpu_to_be32(0);				/* header padding size */
+	*p++ = cpu_to_be32(args->fc_attrs.headerpadsz);	/* header padding size */
 	*p++ = cpu_to_be32(args->bc_attrs.max_rqst_sz);	/* max req size */
 	*p++ = cpu_to_be32(args->bc_attrs.max_resp_sz);	/* max resp size */
 	*p++ = cpu_to_be32(args->bc_attrs.max_resp_sz_cached);	/* Max resp sz cached */
@@ -3098,7 +3098,7 @@ static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint
 	return -EIO;
 }
 
-static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap, int32_t *res)
+static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap)
 {
 	__be32 *p;
 
@@ -3109,7 +3109,7 @@ static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap, int32_t *
 		if (unlikely(!p))
 			goto out_overflow;
 		bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR;
-		*res = -be32_to_cpup(p);
+		return -be32_to_cpup(p);
 	}
 	return 0;
 out_overflow:
@@ -4070,7 +4070,6 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
 	int status;
 	umode_t fmode = 0;
 	uint32_t type;
-	int32_t err;
 
 	status = decode_attr_type(xdr, bitmap, &type);
 	if (status < 0)
@@ -4096,12 +4095,13 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
 		goto xdr_error;
 	fattr->valid |= status;
 
-	err = 0;
-	status = decode_attr_error(xdr, bitmap, &err);
+	status = decode_attr_error(xdr, bitmap);
+	if (status == -NFS4ERR_WRONGSEC) {
+		nfs_fixup_secinfo_attributes(fattr, fh);
+		status = 0;
+	}
 	if (status < 0)
 		goto xdr_error;
-	if (err == -NFS4ERR_WRONGSEC)
-		nfs_fixup_secinfo_attributes(fattr, fh);
 
 	status = decode_attr_filehandle(xdr, bitmap, fh);
 	if (status < 0)
@@ -4997,14 +4997,12 @@ static int decode_chan_attrs(struct xdr_stream *xdr,
 			     struct nfs4_channel_attrs *attrs)
 {
 	__be32 *p;
-	u32 nr_attrs, val;
+	u32 nr_attrs;
 
 	p = xdr_inline_decode(xdr, 28);
 	if (unlikely(!p))
 		goto out_overflow;
-	val = be32_to_cpup(p++);	/* headerpadsz */
-	if (val)
-		return -EINVAL;		/* no support for header padding yet */
+	attrs->headerpadsz = be32_to_cpup(p++);
 	attrs->max_rqst_sz = be32_to_cpup(p++);
 	attrs->max_resp_sz = be32_to_cpup(p++);
 	attrs->max_resp_sz_cached = be32_to_cpup(p++);
diff --git a/trunk/fs/nfs/objlayout/objio_osd.c b/trunk/fs/nfs/objlayout/objio_osd.c
index 8ff2ea3f10ef..9cf208df1f25 100644
--- a/trunk/fs/nfs/objlayout/objio_osd.c
+++ b/trunk/fs/nfs/objlayout/objio_osd.c
@@ -108,6 +108,7 @@ _dev_list_add(const struct nfs_server *nfss,
 		de = n;
 	}
 
+	atomic_inc(&de->id_node.ref);
 	return de;
 }
 
@@ -1000,9 +1001,6 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
 	if (!pnfs_generic_pg_test(pgio, prev, req))
 		return false;
 
-	if (pgio->pg_lseg == NULL)
-		return true;
-
 	return pgio->pg_count + req->wb_bytes <=
 			OBJIO_LSEG(pgio->pg_lseg)->max_io_size;
 }
diff --git a/trunk/fs/nfs/objlayout/objlayout.c b/trunk/fs/nfs/objlayout/objlayout.c
index 1d06f8e2adea..dc3956c0de80 100644
--- a/trunk/fs/nfs/objlayout/objlayout.c
+++ b/trunk/fs/nfs/objlayout/objlayout.c
@@ -291,7 +291,7 @@ objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync)
 	struct nfs_read_data *rdata;
 
 	state->status = status;
-	dprintk("%s: Begin status=%zd eof=%d\n", __func__, status, eof);
+	dprintk("%s: Begin status=%ld eof=%d\n", __func__, status, eof);
 	rdata = state->rpcdata;
 	rdata->task.tk_status = status;
 	if (status >= 0) {
diff --git a/trunk/fs/nfs/pagelist.c b/trunk/fs/nfs/pagelist.c
index 009855716286..7913961aff22 100644
--- a/trunk/fs/nfs/pagelist.c
+++ b/trunk/fs/nfs/pagelist.c
@@ -204,7 +204,7 @@ nfs_wait_on_request(struct nfs_page *req)
 			TASK_UNINTERRUPTIBLE);
 }
 
-bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
+static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
 {
 	/*
 	 * FIXME: ideally we should be able to coalesce all requests
@@ -218,7 +218,6 @@ bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *pr
 
 	return desc->pg_count + req->wb_bytes <= desc->pg_bsize;
 }
-EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
 
 /**
  * nfs_pageio_init - initialise a page io descriptor
diff --git a/trunk/fs/nfs/pnfs.c b/trunk/fs/nfs/pnfs.c
index 29c0ca7fc347..8c1309d852a6 100644
--- a/trunk/fs/nfs/pnfs.c
+++ b/trunk/fs/nfs/pnfs.c
@@ -634,16 +634,14 @@ _pnfs_return_layout(struct inode *ino)
 
 	spin_lock(&ino->i_lock);
 	lo = nfsi->layout;
-	if (!lo) {
+	if (!lo || !mark_matching_lsegs_invalid(lo, &tmp_list, NULL)) {
 		spin_unlock(&ino->i_lock);
-		dprintk("%s: no layout to return\n", __func__);
-		return status;
+		dprintk("%s: no layout segments to return\n", __func__);
+		goto out;
 	}
 	stateid = nfsi->layout->plh_stateid;
 	/* Reference matched in nfs4_layoutreturn_release */
 	get_layout_hdr(lo);
-	mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
-	lo->plh_block_lgets++;
 	spin_unlock(&ino->i_lock);
 	pnfs_free_lseg_list(&tmp_list);
 
@@ -652,9 +650,6 @@ _pnfs_return_layout(struct inode *ino)
 	lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
 	if (unlikely(lrp == NULL)) {
 		status = -ENOMEM;
-		set_bit(NFS_LAYOUT_RW_FAILED, &lo->plh_flags);
-		set_bit(NFS_LAYOUT_RO_FAILED, &lo->plh_flags);
-		put_layout_hdr(lo);
 		goto out;
 	}
 
@@ -892,7 +887,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
 			ret = get_lseg(lseg);
 			break;
 		}
-		if (lseg->pls_range.offset > range->offset)
+		if (cmp_layout(range, &lseg->pls_range) > 0)
 			break;
 	}
 
@@ -1064,36 +1059,23 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 		gfp_flags = GFP_NOFS;
 	}
 
-	if (pgio->pg_lseg == NULL) {
-		if (pgio->pg_count != prev->wb_bytes)
-			return true;
+	if (pgio->pg_count == prev->wb_bytes) {
 		/* This is first coelesce call for a series of nfs_pages */
 		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
 						   prev->wb_context,
-						   req_offset(prev),
+						   req_offset(req),
 						   pgio->pg_count,
 						   access_type,
 						   gfp_flags);
-		if (pgio->pg_lseg == NULL)
-			return true;
+		return true;
 	}
 
-	/*
-	 * Test if a nfs_page is fully contained in the pnfs_layout_range.
-	 * Note that this test makes several assumptions:
-	 * - that the previous nfs_page in the struct nfs_pageio_descriptor
-	 *   is known to lie within the range.
-	 *   - that the nfs_page being tested is known to be contiguous with the
-	 *   previous nfs_page.
-	 *   - Layout ranges are page aligned, so we only have to test the
-	 *   start offset of the request.
-	 *
-	 * Please also note that 'end_offset' is actually the offset of the
-	 * first byte that lies outside the pnfs_layout_range. FIXME?
-	 *
-	 */
-	return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset,
-					 pgio->pg_lseg->pls_range.length);
+	if (pgio->pg_lseg &&
+	    req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
+					 pgio->pg_lseg->pls_range.length))
+		return false;
+
+	return true;
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
 
diff --git a/trunk/fs/nfs/pnfs.h b/trunk/fs/nfs/pnfs.h
index 96bf4e6f45be..48d0a8e4d062 100644
--- a/trunk/fs/nfs/pnfs.h
+++ b/trunk/fs/nfs/pnfs.h
@@ -186,7 +186,6 @@ int pnfs_ld_read_done(struct nfs_read_data *);
 /* pnfs_dev.c */
 struct nfs4_deviceid_node {
 	struct hlist_node		node;
-	struct hlist_node		tmpnode;
 	const struct pnfs_layoutdriver_type *ld;
 	const struct nfs_client		*nfs_client;
 	struct nfs4_deviceid		deviceid;
diff --git a/trunk/fs/nfs/pnfs_dev.c b/trunk/fs/nfs/pnfs_dev.c
index f0f8e1e22f6c..c65e133ce9c0 100644
--- a/trunk/fs/nfs/pnfs_dev.c
+++ b/trunk/fs/nfs/pnfs_dev.c
@@ -174,7 +174,6 @@ nfs4_init_deviceid_node(struct nfs4_deviceid_node *d,
 			const struct nfs4_deviceid *id)
 {
 	INIT_HLIST_NODE(&d->node);
-	INIT_HLIST_NODE(&d->tmpnode);
 	d->ld = ld;
 	d->nfs_client = nfs_client;
 	d->deviceid = *id;
@@ -209,7 +208,6 @@ nfs4_insert_deviceid_node(struct nfs4_deviceid_node *new)
 
 	hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]);
 	spin_unlock(&nfs4_deviceid_lock);
-	atomic_inc(&new->ref);
 
 	return new;
 }
@@ -240,29 +238,24 @@ static void
 _deviceid_purge_client(const struct nfs_client *clp, long hash)
 {
 	struct nfs4_deviceid_node *d;
-	struct hlist_node *n;
+	struct hlist_node *n, *next;
 	HLIST_HEAD(tmp);
 
-	spin_lock(&nfs4_deviceid_lock);
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node)
 		if (d->nfs_client == clp && atomic_read(&d->ref)) {
 			hlist_del_init_rcu(&d->node);
-			hlist_add_head(&d->tmpnode, &tmp);
+			hlist_add_head(&d->node, &tmp);
 		}
 	rcu_read_unlock();
-	spin_unlock(&nfs4_deviceid_lock);
 
 	if (hlist_empty(&tmp))
 		return;
 
 	synchronize_rcu();
-	while (!hlist_empty(&tmp)) {
-		d = hlist_entry(tmp.first, struct nfs4_deviceid_node, tmpnode);
-		hlist_del(&d->tmpnode);
+	hlist_for_each_entry_safe(d, n, next, &tmp, node)
 		if (atomic_dec_and_test(&d->ref))
 			d->ld->free_deviceid_node(d);
-	}
 }
 
 void
@@ -270,8 +263,8 @@ nfs4_deviceid_purge_client(const struct nfs_client *clp)
 {
 	long h;
 
-	if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS))
-		return;
+	spin_lock(&nfs4_deviceid_lock);
 	for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++)
 		_deviceid_purge_client(clp, h);
+	spin_unlock(&nfs4_deviceid_lock);
 }
diff --git a/trunk/fs/omfs/file.c b/trunk/fs/omfs/file.c
index 2c6d95257a4d..d738a7e493dd 100644
--- a/trunk/fs/omfs/file.c
+++ b/trunk/fs/omfs/file.c
@@ -4,6 +4,7 @@
  * Released under GPL v2.
  */
 
+#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/buffer_head.h>
diff --git a/trunk/fs/romfs/mmap-nommu.c b/trunk/fs/romfs/mmap-nommu.c
index eed99428f104..f0511e816967 100644
--- a/trunk/fs/romfs/mmap-nommu.c
+++ b/trunk/fs/romfs/mmap-nommu.c
@@ -27,18 +27,14 @@ static unsigned long romfs_get_unmapped_area(struct file *file,
 {
 	struct inode *inode = file->f_mapping->host;
 	struct mtd_info *mtd = inode->i_sb->s_mtd;
-	unsigned long isize, offset, maxpages, lpages;
+	unsigned long isize, offset;
 
 	if (!mtd)
 		goto cant_map_directly;
 
-	/* the mapping mustn't extend beyond the EOF */
-	lpages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	isize = i_size_read(inode);
 	offset = pgoff << PAGE_SHIFT;
-
-	maxpages = (isize + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	if ((pgoff >= maxpages) || (maxpages - pgoff < lpages))
+	if (offset > isize || len > isize || offset > isize - len)
 		return (unsigned long) -EINVAL;
 
 	/* we need to call down to the MTD layer to do the actual mapping */
diff --git a/trunk/fs/xfs/xfs_attr.c b/trunk/fs/xfs/xfs_attr.c
index 01d2072fb6d4..c86375378810 100644
--- a/trunk/fs/xfs/xfs_attr.c
+++ b/trunk/fs/xfs/xfs_attr.c
@@ -489,13 +489,6 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
 	args.total = 0;
 	args.whichfork = XFS_ATTR_FORK;
 
-	/*
-	 * we have no control over the attribute names that userspace passes us
-	 * to remove, so we have to allow the name lookup prior to attribute
-	 * removal to fail.
-	 */
-	args.op_flags = XFS_DA_OP_OKNOENT;
-
 	/*
 	 * Attach the dquots to the inode.
 	 */
diff --git a/trunk/fs/xfs/xfs_iget.c b/trunk/fs/xfs/xfs_iget.c
index 3631783b2b53..cb9b6d1469f7 100644
--- a/trunk/fs/xfs/xfs_iget.c
+++ b/trunk/fs/xfs/xfs_iget.c
@@ -253,21 +253,16 @@ xfs_iget_cache_hit(
 			rcu_read_lock();
 			spin_lock(&ip->i_flags_lock);
 
-			ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM);
-			ASSERT(ip->i_flags & XFS_IRECLAIMABLE);
+			ip->i_flags &= ~XFS_INEW;
+			ip->i_flags |= XFS_IRECLAIMABLE;
+			__xfs_inode_set_reclaim_tag(pag, ip);
 			trace_xfs_iget_reclaim_fail(ip);
 			goto out_error;
 		}
 
 		spin_lock(&pag->pag_ici_lock);
 		spin_lock(&ip->i_flags_lock);
-
-		/*
-		 * Clear the per-lifetime state in the inode as we are now
-		 * effectively a new inode and need to return to the initial
-		 * state before reuse occurs.
-		 */
-		ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS;
+		ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM);
 		ip->i_flags |= XFS_INEW;
 		__xfs_inode_clear_reclaim_tag(mp, pag, ip);
 		inode->i_state = I_NEW;
diff --git a/trunk/fs/xfs/xfs_inode.h b/trunk/fs/xfs/xfs_inode.h
index 964cfea77686..3ae6d58e5473 100644
--- a/trunk/fs/xfs/xfs_inode.h
+++ b/trunk/fs/xfs/xfs_inode.h
@@ -383,16 +383,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
 #define XFS_ITRUNCATED		0x0020	/* truncated down so flush-on-close */
 #define XFS_IDIRTY_RELEASE	0x0040	/* dirty release already seen */
 
-/*
- * Per-lifetime flags need to be reset when re-using a reclaimable inode during
- * inode lookup. Thi prevents unintended behaviour on the new inode from
- * ocurring.
- */
-#define XFS_IRECLAIM_RESET_FLAGS	\
-	(XFS_IRECLAIMABLE | XFS_IRECLAIM | \
-	 XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | \
-	 XFS_IFILESTREAM);
-
 /*
  * Flags for inode locking.
  * Bit ranges:	1<<1  - 1<<16-1 -- iolock/ilock modes (bitfield)
diff --git a/trunk/fs/xfs/xfs_vnodeops.c b/trunk/fs/xfs/xfs_vnodeops.c
index 619720705bc6..b7a5fe7c52c8 100644
--- a/trunk/fs/xfs/xfs_vnodeops.c
+++ b/trunk/fs/xfs/xfs_vnodeops.c
@@ -960,11 +960,8 @@ xfs_release(
 		 * be exposed to that problem.
 		 */
 		truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
-		if (truncated) {
-			xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE);
-			if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0)
-				xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE);
-		}
+		if (truncated && VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0)
+			xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE);
 	}
 
 	if (ip->i_d.di_nlink == 0)
diff --git a/trunk/include/linux/blk_types.h b/trunk/include/linux/blk_types.h
index 6395692b2e7a..2a7cea53ca0d 100644
--- a/trunk/include/linux/blk_types.h
+++ b/trunk/include/linux/blk_types.h
@@ -167,7 +167,7 @@ enum rq_flag_bits {
 	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
 #define REQ_COMMON_MASK \
 	(REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_DISCARD | \
-	 REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE)
+	 REQ_NOIDLE | REQ_FLUSH | REQ_FUA)
 #define REQ_CLONE_MASK		REQ_COMMON_MASK
 
 #define REQ_RAHEAD		(1 << __REQ_RAHEAD)
diff --git a/trunk/include/linux/blktrace_api.h b/trunk/include/linux/blktrace_api.h
index 8c7c2de7631a..b22fb0d3db0f 100644
--- a/trunk/include/linux/blktrace_api.h
+++ b/trunk/include/linux/blktrace_api.h
@@ -169,8 +169,7 @@ extern void blk_trace_shutdown(struct request_queue *);
 extern int do_blk_trace_setup(struct request_queue *q, char *name,
 			      dev_t dev, struct block_device *bdev,
 			      struct blk_user_trace_setup *buts);
-extern __attribute__((format(printf, 2, 3)))
-void __trace_note_message(struct blk_trace *, const char *fmt, ...);
+extern void __trace_note_message(struct blk_trace *, const char *fmt, ...);
 
 /**
  * blk_add_trace_msg - Add a (simple) message to the blktrace stream
diff --git a/trunk/include/linux/compat.h b/trunk/include/linux/compat.h
index 846bb1792572..ddcb7db38e67 100644
--- a/trunk/include/linux/compat.h
+++ b/trunk/include/linux/compat.h
@@ -467,8 +467,6 @@ asmlinkage long compat_sys_setsockopt(int fd, int level, int optname,
 				      char __user *optval, unsigned int optlen);
 asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg,
 				   unsigned flags);
-asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg,
-				    unsigned vlen, unsigned int flags);
 asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg,
 				   unsigned int flags);
 asmlinkage long compat_sys_recv(int fd, void __user *buf, size_t len,
diff --git a/trunk/include/linux/device.h b/trunk/include/linux/device.h
index e4f62d8896b7..c66111affca9 100644
--- a/trunk/include/linux/device.h
+++ b/trunk/include/linux/device.h
@@ -530,6 +530,7 @@ struct device_dma_parameters {
  * @dma_mem:	Internal for coherent mem override.
  * @archdata:	For arch-specific additions.
  * @of_node:	Associated device tree node.
+ * @of_match:	Matching of_device_id from driver.
  * @devt:	For creating the sysfs "dev".
  * @devres_lock: Spinlock to protect the resource of the device.
  * @devres_head: The resources list of the device.
@@ -653,13 +654,13 @@ static inline int device_is_registered(struct device *dev)
 
 static inline void device_enable_async_suspend(struct device *dev)
 {
-	if (!dev->power.is_prepared)
+	if (!dev->power.in_suspend)
 		dev->power.async_suspend = true;
 }
 
 static inline void device_disable_async_suspend(struct device *dev)
 {
-	if (!dev->power.is_prepared)
+	if (!dev->power.in_suspend)
 		dev->power.async_suspend = false;
 }
 
diff --git a/trunk/include/linux/fs.h b/trunk/include/linux/fs.h
index b5b979247863..6e73e2e9ae33 100644
--- a/trunk/include/linux/fs.h
+++ b/trunk/include/linux/fs.h
@@ -639,7 +639,6 @@ struct address_space {
 	struct prio_tree_root	i_mmap;		/* tree of private and shared mappings */
 	struct list_head	i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
 	struct mutex		i_mmap_mutex;	/* protect tree, count, list */
-	/* Protected by tree_lock together with the radix tree */
 	unsigned long		nrpages;	/* number of total pages */
 	pgoff_t			writeback_index;/* writeback starts here */
 	const struct address_space_operations *a_ops;	/* methods */
diff --git a/trunk/include/linux/ftrace_event.h b/trunk/include/linux/ftrace_event.h
index b1e69eefc203..59d3ef100eb9 100644
--- a/trunk/include/linux/ftrace_event.h
+++ b/trunk/include/linux/ftrace_event.h
@@ -129,10 +129,6 @@ void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
 void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
 				       struct ring_buffer_event *event,
 					unsigned long flags, int pc);
-void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer,
-					    struct ring_buffer_event *event,
-					    unsigned long flags, int pc,
-					    struct pt_regs *regs);
 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
 					 struct ring_buffer_event *event);
 
diff --git a/trunk/include/linux/hrtimer.h b/trunk/include/linux/hrtimer.h
index fd0dc30c9f15..51932e5acf7c 100644
--- a/trunk/include/linux/hrtimer.h
+++ b/trunk/include/linux/hrtimer.h
@@ -135,7 +135,6 @@ struct hrtimer_sleeper {
  * @cpu_base:		per cpu clock base
  * @index:		clock type index for per_cpu support when moving a
  *			timer to a base on another cpu.
- * @clockid:		clock id for per_cpu support
  * @active:		red black tree root node for the active timers
  * @resolution:		the resolution of the clock, in nanoseconds
  * @get_time:		function to retrieve the current time of the clock
diff --git a/trunk/include/linux/hw_breakpoint.h b/trunk/include/linux/hw_breakpoint.h
index 6ae9c631a1be..d1e55fed2c7d 100644
--- a/trunk/include/linux/hw_breakpoint.h
+++ b/trunk/include/linux/hw_breakpoint.h
@@ -73,7 +73,6 @@ static inline unsigned long hw_breakpoint_len(struct perf_event *bp)
 extern struct perf_event *
 register_user_hw_breakpoint(struct perf_event_attr *attr,
 			    perf_overflow_handler_t triggered,
-			    void *context,
 			    struct task_struct *tsk);
 
 /* FIXME: only change from the attr, and don't unregister */
@@ -86,13 +85,11 @@ modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr);
 extern struct perf_event *
 register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
 				perf_overflow_handler_t	triggered,
-				void *context,
 				int cpu);
 
 extern struct perf_event * __percpu *
 register_wide_hw_breakpoint(struct perf_event_attr *attr,
-			    perf_overflow_handler_t triggered,
-			    void *context);
+			    perf_overflow_handler_t triggered);
 
 extern int register_perf_hw_breakpoint(struct perf_event *bp);
 extern int __register_perf_hw_breakpoint(struct perf_event *bp);
@@ -118,7 +115,6 @@ static inline int __init init_hw_breakpoint(void) { return 0; }
 static inline struct perf_event *
 register_user_hw_breakpoint(struct perf_event_attr *attr,
 			    perf_overflow_handler_t triggered,
-			    void *context,
 			    struct task_struct *tsk)	{ return NULL; }
 static inline int
 modify_user_hw_breakpoint(struct perf_event *bp,
@@ -126,12 +122,10 @@ modify_user_hw_breakpoint(struct perf_event *bp,
 static inline struct perf_event *
 register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
 				perf_overflow_handler_t	 triggered,
-				void *context,
 				int cpu)		{ return NULL; }
 static inline struct perf_event * __percpu *
 register_wide_hw_breakpoint(struct perf_event_attr *attr,
-			    perf_overflow_handler_t triggered,
-			    void *context)		{ return NULL; }
+			    perf_overflow_handler_t triggered)	{ return NULL; }
 static inline int
 register_perf_hw_breakpoint(struct perf_event *bp)	{ return -ENOSYS; }
 static inline int
diff --git a/trunk/include/linux/mmzone.h b/trunk/include/linux/mmzone.h
index 9f7c3ebcbbad..c928dac6cad0 100644
--- a/trunk/include/linux/mmzone.h
+++ b/trunk/include/linux/mmzone.h
@@ -647,13 +647,6 @@ typedef struct pglist_data {
 #endif
 #define nid_page_nr(nid, pagenr) 	pgdat_page_nr(NODE_DATA(nid),(pagenr))
 
-#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
-
-#define node_end_pfn(nid) ({\
-	pg_data_t *__pgdat = NODE_DATA(nid);\
-	__pgdat->node_start_pfn + __pgdat->node_spanned_pages;\
-})
-
 #include <linux/memory_hotplug.h>
 
 extern struct mutex zonelists_mutex;
diff --git a/trunk/include/linux/nfs_page.h b/trunk/include/linux/nfs_page.h
index 25311b3bedf8..3a34e80ae92f 100644
--- a/trunk/include/linux/nfs_page.h
+++ b/trunk/include/linux/nfs_page.h
@@ -92,9 +92,6 @@ extern	int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
 				   struct nfs_page *);
 extern	void nfs_pageio_complete(struct nfs_pageio_descriptor *desc);
 extern	void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t);
-extern bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
-				struct nfs_page *prev,
-				struct nfs_page *req);
 extern  int nfs_wait_on_request(struct nfs_page *);
 extern	void nfs_unlock_request(struct nfs_page *req);
 extern	int nfs_set_page_tag_locked(struct nfs_page *req);
diff --git a/trunk/include/linux/nfs_xdr.h b/trunk/include/linux/nfs_xdr.h
index 00848d86ffb2..5e8444a11adf 100644
--- a/trunk/include/linux/nfs_xdr.h
+++ b/trunk/include/linux/nfs_xdr.h
@@ -158,6 +158,7 @@ struct nfs_seqid;
 
 /* nfs41 sessions channel attributes */
 struct nfs4_channel_attrs {
+	u32			headerpadsz;
 	u32			max_rqst_sz;
 	u32			max_resp_sz;
 	u32			max_resp_sz_cached;
diff --git a/trunk/include/linux/pci_ids.h b/trunk/include/linux/pci_ids.h
index f8910e155566..a311008af5e1 100644
--- a/trunk/include/linux/pci_ids.h
+++ b/trunk/include/linux/pci_ids.h
@@ -1537,7 +1537,6 @@
 #define PCI_DEVICE_ID_RICOH_RL5C476	0x0476
 #define PCI_DEVICE_ID_RICOH_RL5C478	0x0478
 #define PCI_DEVICE_ID_RICOH_R5C822	0x0822
-#define PCI_DEVICE_ID_RICOH_R5CE823	0xe823
 #define PCI_DEVICE_ID_RICOH_R5C832	0x0832
 #define PCI_DEVICE_ID_RICOH_R5C843	0x0843
 
diff --git a/trunk/include/linux/perf_event.h b/trunk/include/linux/perf_event.h
index 3f2711ccf910..e0786e35f247 100644
--- a/trunk/include/linux/perf_event.h
+++ b/trunk/include/linux/perf_event.h
@@ -61,7 +61,7 @@ enum perf_hw_id {
 /*
  * Generalized hardware cache events:
  *
- *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x
+ *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
  *       { read, write, prefetch } x
  *       { accesses, misses }
  */
@@ -72,7 +72,6 @@ enum perf_hw_cache_id {
 	PERF_COUNT_HW_CACHE_DTLB		= 3,
 	PERF_COUNT_HW_CACHE_ITLB		= 4,
 	PERF_COUNT_HW_CACHE_BPU			= 5,
-	PERF_COUNT_HW_CACHE_NODE		= 6,
 
 	PERF_COUNT_HW_CACHE_MAX,		/* non-ABI */
 };
@@ -537,16 +536,6 @@ struct perf_branch_stack {
 
 struct task_struct;
 
-/*
- * extra PMU register associated with an event
- */
-struct hw_perf_event_extra {
-	u64		config;	/* register value */
-	unsigned int	reg;	/* register address or index */
-	int		alloc;	/* extra register already allocated */
-	int		idx;	/* index in shared_regs->regs[] */
-};
-
 /**
  * struct hw_perf_event - performance event hardware details:
  */
@@ -560,7 +549,9 @@ struct hw_perf_event {
 			unsigned long	event_base;
 			int		idx;
 			int		last_cpu;
-			struct hw_perf_event_extra extra_reg;
+			unsigned int	extra_reg;
+			u64		extra_config;
+			int		extra_alloc;
 		};
 		struct { /* software */
 			struct hrtimer	hrtimer;
@@ -689,9 +680,36 @@ enum perf_event_active_state {
 };
 
 struct file;
+
+#define PERF_BUFFER_WRITABLE		0x01
+
+struct perf_buffer {
+	atomic_t			refcount;
+	struct rcu_head			rcu_head;
+#ifdef CONFIG_PERF_USE_VMALLOC
+	struct work_struct		work;
+	int				page_order;	/* allocation order  */
+#endif
+	int				nr_pages;	/* nr of data pages  */
+	int				writable;	/* are we writable   */
+
+	atomic_t			poll;		/* POLL_ for wakeups */
+
+	local_t				head;		/* write position    */
+	local_t				nest;		/* nested writers    */
+	local_t				events;		/* event limit       */
+	local_t				wakeup;		/* wakeup stamp      */
+	local_t				lost;		/* nr records lost   */
+
+	long				watermark;	/* wakeup watermark  */
+
+	struct perf_event_mmap_page	*user_page;
+	void				*data_pages[0];
+};
+
 struct perf_sample_data;
 
-typedef void (*perf_overflow_handler_t)(struct perf_event *,
+typedef void (*perf_overflow_handler_t)(struct perf_event *, int,
 					struct perf_sample_data *,
 					struct pt_regs *regs);
 
@@ -727,8 +745,6 @@ struct perf_cgroup {
 };
 #endif
 
-struct ring_buffer;
-
 /**
  * struct perf_event - performance event kernel representation:
  */
@@ -818,7 +834,7 @@ struct perf_event {
 	atomic_t			mmap_count;
 	int				mmap_locked;
 	struct user_struct		*mmap_user;
-	struct ring_buffer		*rb;
+	struct perf_buffer		*buffer;
 
 	/* poll related */
 	wait_queue_head_t		waitq;
@@ -839,7 +855,6 @@ struct perf_event {
 	u64				id;
 
 	perf_overflow_handler_t		overflow_handler;
-	void				*overflow_handler_context;
 
 #ifdef CONFIG_EVENT_TRACING
 	struct ftrace_event_call	*tp_event;
@@ -904,8 +919,8 @@ struct perf_event_context {
 	u64				parent_gen;
 	u64				generation;
 	int				pin_count;
-	int				nr_cgroups; /* cgroup events present */
 	struct rcu_head			rcu_head;
+	int				nr_cgroups; /* cgroup events present */
 };
 
 /*
@@ -930,11 +945,13 @@ struct perf_cpu_context {
 
 struct perf_output_handle {
 	struct perf_event		*event;
-	struct ring_buffer		*rb;
+	struct perf_buffer		*buffer;
 	unsigned long			wakeup;
 	unsigned long			size;
 	void				*addr;
 	int				page;
+	int				nmi;
+	int				sample;
 };
 
 #ifdef CONFIG_PERF_EVENTS
@@ -955,15 +972,13 @@ extern void perf_pmu_disable(struct pmu *pmu);
 extern void perf_pmu_enable(struct pmu *pmu);
 extern int perf_event_task_disable(void);
 extern int perf_event_task_enable(void);
-extern int perf_event_refresh(struct perf_event *event, int refresh);
 extern void perf_event_update_userpage(struct perf_event *event);
 extern int perf_event_release_kernel(struct perf_event *event);
 extern struct perf_event *
 perf_event_create_kernel_counter(struct perf_event_attr *attr,
 				int cpu,
 				struct task_struct *task,
-				perf_overflow_handler_t callback,
-				void *context);
+				perf_overflow_handler_t callback);
 extern u64 perf_event_read_value(struct perf_event *event,
 				 u64 *enabled, u64 *running);
 
@@ -1003,7 +1018,7 @@ extern void perf_prepare_sample(struct perf_event_header *header,
 				struct perf_event *event,
 				struct pt_regs *regs);
 
-extern int perf_event_overflow(struct perf_event *event,
+extern int perf_event_overflow(struct perf_event *event, int nmi,
 				 struct perf_sample_data *data,
 				 struct pt_regs *regs);
 
@@ -1022,7 +1037,7 @@ static inline int is_software_event(struct perf_event *event)
 
 extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
 
-extern void __perf_sw_event(u32, u64, struct pt_regs *, u64);
+extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
 
 #ifndef perf_arch_fetch_caller_regs
 static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
@@ -1044,7 +1059,7 @@ static inline void perf_fetch_caller_regs(struct pt_regs *regs)
 }
 
 static __always_inline void
-perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
+perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
 {
 	struct pt_regs hot_regs;
 
@@ -1053,7 +1068,7 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
 			perf_fetch_caller_regs(&hot_regs);
 			regs = &hot_regs;
 		}
-		__perf_sw_event(event_id, nr, regs, addr);
+		__perf_sw_event(event_id, nr, nmi, regs, addr);
 	}
 }
 
@@ -1067,7 +1082,7 @@ static inline void perf_event_task_sched_in(struct task_struct *task)
 
 static inline void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
 {
-	perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0);
+	perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
 
 	__perf_event_task_sched_out(task, next);
 }
@@ -1128,7 +1143,8 @@ extern void perf_bp_event(struct perf_event *event, void *data);
 #endif
 
 extern int perf_output_begin(struct perf_output_handle *handle,
-			     struct perf_event *event, unsigned int size);
+			     struct perf_event *event, unsigned int size,
+			     int nmi, int sample);
 extern void perf_output_end(struct perf_output_handle *handle);
 extern void perf_output_copy(struct perf_output_handle *handle,
 			     const void *buf, unsigned int len);
@@ -1150,13 +1166,10 @@ static inline void perf_event_delayed_put(struct task_struct *task)	{ }
 static inline void perf_event_print_debug(void)				{ }
 static inline int perf_event_task_disable(void)				{ return -EINVAL; }
 static inline int perf_event_task_enable(void)				{ return -EINVAL; }
-static inline int perf_event_refresh(struct perf_event *event, int refresh)
-{
-	return -EINVAL;
-}
 
 static inline void
-perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)	{ }
+perf_sw_event(u32 event_id, u64 nr, int nmi,
+		     struct pt_regs *regs, u64 addr)			{ }
 static inline void
 perf_bp_event(struct perf_event *event, void *data)			{ }
 
diff --git a/trunk/include/linux/pm.h b/trunk/include/linux/pm.h
index 411e4f4be52b..3160648ccdda 100644
--- a/trunk/include/linux/pm.h
+++ b/trunk/include/linux/pm.h
@@ -425,8 +425,7 @@ struct dev_pm_info {
 	pm_message_t		power_state;
 	unsigned int		can_wakeup:1;
 	unsigned int		async_suspend:1;
-	bool			is_prepared:1;	/* Owned by the PM core */
-	bool			is_suspended:1;	/* Ditto */
+	unsigned int		in_suspend:1;	/* Owned by the PM core */
 	spinlock_t		lock;
 #ifdef CONFIG_PM_SLEEP
 	struct list_head	entry;
diff --git a/trunk/include/linux/ring_buffer.h b/trunk/include/linux/ring_buffer.h
index b891de96000f..ab38ac80b0f9 100644
--- a/trunk/include/linux/ring_buffer.h
+++ b/trunk/include/linux/ring_buffer.h
@@ -169,7 +169,7 @@ void ring_buffer_set_clock(struct ring_buffer *buffer,
 size_t ring_buffer_page_len(void *page);
 
 
-void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu);
+void *ring_buffer_alloc_read_page(struct ring_buffer *buffer);
 void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data);
 int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page,
 			  size_t len, int cpu, int full);
diff --git a/trunk/include/linux/shmem_fs.h b/trunk/include/linux/shmem_fs.h
index aa08fa8fd79b..2b7fec840517 100644
--- a/trunk/include/linux/shmem_fs.h
+++ b/trunk/include/linux/shmem_fs.h
@@ -3,7 +3,6 @@
 
 #include <linux/swap.h>
 #include <linux/mempolicy.h>
-#include <linux/pagemap.h>
 #include <linux/percpu_counter.h>
 
 /* inode in-kernel data */
@@ -46,27 +45,7 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
 	return container_of(inode, struct shmem_inode_info, vfs_inode);
 }
 
-/*
- * Functions in mm/shmem.c called directly from elsewhere:
- */
 extern int init_tmpfs(void);
 extern int shmem_fill_super(struct super_block *sb, void *data, int silent);
-extern struct file *shmem_file_setup(const char *name,
-					loff_t size, unsigned long flags);
-extern int shmem_zero_setup(struct vm_area_struct *);
-extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
-extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
-					pgoff_t index, gfp_t gfp_mask);
-extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
-extern int shmem_unuse(swp_entry_t entry, struct page *page);
-extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
-					struct page **pagep, swp_entry_t *ent);
-
-static inline struct page *shmem_read_mapping_page(
-				struct address_space *mapping, pgoff_t index)
-{
-	return shmem_read_mapping_page_gfp(mapping, index,
-					mapping_gfp_mask(mapping));
-}
 
 #endif
diff --git a/trunk/include/linux/stacktrace.h b/trunk/include/linux/stacktrace.h
index 115b570e3bff..25310f1d7f37 100644
--- a/trunk/include/linux/stacktrace.h
+++ b/trunk/include/linux/stacktrace.h
@@ -14,8 +14,8 @@ struct stack_trace {
 };
 
 extern void save_stack_trace(struct stack_trace *trace);
-extern void save_stack_trace_regs(struct pt_regs *regs,
-				  struct stack_trace *trace);
+extern void save_stack_trace_regs(struct stack_trace *trace,
+				  struct pt_regs *regs);
 extern void save_stack_trace_tsk(struct task_struct *tsk,
 				struct stack_trace *trace);
 
diff --git a/trunk/include/linux/sunrpc/sched.h b/trunk/include/linux/sunrpc/sched.h
index fe2d8e6b923b..f73c482ec9c6 100644
--- a/trunk/include/linux/sunrpc/sched.h
+++ b/trunk/include/linux/sunrpc/sched.h
@@ -84,8 +84,7 @@ struct rpc_task {
 #endif
 	unsigned char		tk_priority : 2,/* Task priority */
 				tk_garb_retry : 2,
-				tk_cred_retry : 2,
-				tk_rebind_retry : 2;
+				tk_cred_retry : 2;
 };
 #define tk_xprt			tk_client->cl_xprt
 
diff --git a/trunk/include/linux/swap.h b/trunk/include/linux/swap.h
index a273468f8285..e70564647039 100644
--- a/trunk/include/linux/swap.h
+++ b/trunk/include/linux/swap.h
@@ -300,6 +300,16 @@ static inline void scan_unevictable_unregister_node(struct node *node)
 extern int kswapd_run(int nid);
 extern void kswapd_stop(int nid);
 
+#ifdef CONFIG_MMU
+/* linux/mm/shmem.c */
+extern int shmem_unuse(swp_entry_t entry, struct page *page);
+#endif /* CONFIG_MMU */
+
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
+					struct page **pagep, swp_entry_t *ent);
+#endif
+
 #ifdef CONFIG_SWAP
 /* linux/mm/page_io.c */
 extern int swap_readpage(struct page *);
diff --git a/trunk/include/net/sock.h b/trunk/include/net/sock.h
index c0b938cb4b1a..f2046e404a61 100644
--- a/trunk/include/net/sock.h
+++ b/trunk/include/net/sock.h
@@ -178,6 +178,7 @@ struct sock_common {
   *	@sk_dst_cache: destination cache
   *	@sk_dst_lock: destination cache lock
   *	@sk_policy: flow policy
+  *	@sk_rmem_alloc: receive queue bytes committed
   *	@sk_receive_queue: incoming packets
   *	@sk_wmem_alloc: transmit queue bytes committed
   *	@sk_write_queue: Packet sending queue
diff --git a/trunk/include/sound/soc.h b/trunk/include/sound/soc.h
index 3a4bd3a3c68d..f1de3e0c75bc 100644
--- a/trunk/include/sound/soc.h
+++ b/trunk/include/sound/soc.h
@@ -248,7 +248,8 @@ typedef int (*hw_write_t)(void *,const char* ,int);
 extern struct snd_ac97_bus_ops soc_ac97_ops;
 
 enum snd_soc_control_type {
-	SND_SOC_I2C = 1,
+	SND_SOC_CUSTOM = 1,
+	SND_SOC_I2C,
 	SND_SOC_SPI,
 };
 
diff --git a/trunk/init/calibrate.c b/trunk/init/calibrate.c
index aae2f40fea4c..2568d22a304e 100644
--- a/trunk/init/calibrate.c
+++ b/trunk/init/calibrate.c
@@ -245,32 +245,30 @@ static unsigned long __cpuinit calibrate_delay_converge(void)
 
 void __cpuinit calibrate_delay(void)
 {
-	unsigned long lpj;
 	static bool printed;
 
 	if (preset_lpj) {
-		lpj = preset_lpj;
+		loops_per_jiffy = preset_lpj;
 		if (!printed)
 			pr_info("Calibrating delay loop (skipped) "
 				"preset value.. ");
 	} else if ((!printed) && lpj_fine) {
-		lpj = lpj_fine;
+		loops_per_jiffy = lpj_fine;
 		pr_info("Calibrating delay loop (skipped), "
 			"value calculated using timer frequency.. ");
-	} else if ((lpj = calibrate_delay_direct()) != 0) {
+	} else if ((loops_per_jiffy = calibrate_delay_direct()) != 0) {
 		if (!printed)
 			pr_info("Calibrating delay using timer "
 				"specific routine.. ");
 	} else {
 		if (!printed)
 			pr_info("Calibrating delay loop... ");
-		lpj = calibrate_delay_converge();
+		loops_per_jiffy = calibrate_delay_converge();
 	}
 	if (!printed)
 		pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n",
-			lpj/(500000/HZ),
-			(lpj/(5000/HZ)) % 100, lpj);
+			loops_per_jiffy/(500000/HZ),
+			(loops_per_jiffy/(5000/HZ)) % 100, loops_per_jiffy);
 
-	loops_per_jiffy = lpj;
 	printed = true;
 }
diff --git a/trunk/kernel/async.c b/trunk/kernel/async.c
index d5fe7af0de2e..cd9dbb913c77 100644
--- a/trunk/kernel/async.c
+++ b/trunk/kernel/async.c
@@ -49,13 +49,12 @@ asynchronous and synchronous parts of the kernel.
 */
 
 #include <linux/async.h>
-#include <linux/atomic.h>
-#include <linux/ktime.h>
 #include <linux/module.h>
 #include <linux/wait.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
+#include <asm/atomic.h>
 
 static async_cookie_t next_cookie = 1;
 
@@ -129,8 +128,7 @@ static void async_run_entry_fn(struct work_struct *work)
 
 	/* 2) run (and print duration) */
 	if (initcall_debug && system_state == SYSTEM_BOOTING) {
-		printk(KERN_DEBUG "calling  %lli_%pF @ %i\n",
-			(long long)entry->cookie,
+		printk("calling  %lli_%pF @ %i\n", (long long)entry->cookie,
 			entry->func, task_pid_nr(current));
 		calltime = ktime_get();
 	}
@@ -138,7 +136,7 @@ static void async_run_entry_fn(struct work_struct *work)
 	if (initcall_debug && system_state == SYSTEM_BOOTING) {
 		rettime = ktime_get();
 		delta = ktime_sub(rettime, calltime);
-		printk(KERN_DEBUG "initcall %lli_%pF returned 0 after %lld usecs\n",
+		printk("initcall %lli_%pF returned 0 after %lld usecs\n",
 			(long long)entry->cookie,
 			entry->func,
 			(long long)ktime_to_ns(delta) >> 10);
@@ -272,7 +270,7 @@ void async_synchronize_cookie_domain(async_cookie_t cookie,
 	ktime_t starttime, delta, endtime;
 
 	if (initcall_debug && system_state == SYSTEM_BOOTING) {
-		printk(KERN_DEBUG "async_waiting @ %i\n", task_pid_nr(current));
+		printk("async_waiting @ %i\n", task_pid_nr(current));
 		starttime = ktime_get();
 	}
 
@@ -282,7 +280,7 @@ void async_synchronize_cookie_domain(async_cookie_t cookie,
 		endtime = ktime_get();
 		delta = ktime_sub(endtime, starttime);
 
-		printk(KERN_DEBUG "async_continuing @ %i after %lli usec\n",
+		printk("async_continuing @ %i after %lli usec\n",
 			task_pid_nr(current),
 			(long long)ktime_to_ns(delta) >> 10);
 	}
diff --git a/trunk/kernel/events/Makefile b/trunk/kernel/events/Makefile
index 89e5e8aa4c36..1ce23d3d8394 100644
--- a/trunk/kernel/events/Makefile
+++ b/trunk/kernel/events/Makefile
@@ -2,5 +2,5 @@ ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_core.o = -pg
 endif
 
-obj-y := core.o ring_buffer.o
+obj-y := core.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
diff --git a/trunk/kernel/events/core.c b/trunk/kernel/events/core.c
index 0567e32d71aa..9efe7108ccaf 100644
--- a/trunk/kernel/events/core.c
+++ b/trunk/kernel/events/core.c
@@ -36,8 +36,6 @@
 #include <linux/ftrace_event.h>
 #include <linux/hw_breakpoint.h>
 
-#include "internal.h"
-
 #include <asm/irq_regs.h>
 
 struct remote_function_call {
@@ -202,22 +200,6 @@ __get_cpu_context(struct perf_event_context *ctx)
 	return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
 }
 
-static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
-			  struct perf_event_context *ctx)
-{
-	raw_spin_lock(&cpuctx->ctx.lock);
-	if (ctx)
-		raw_spin_lock(&ctx->lock);
-}
-
-static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
-			    struct perf_event_context *ctx)
-{
-	if (ctx)
-		raw_spin_unlock(&ctx->lock);
-	raw_spin_unlock(&cpuctx->ctx.lock);
-}
-
 #ifdef CONFIG_CGROUP_PERF
 
 /*
@@ -358,8 +340,11 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
 	rcu_read_lock();
 
 	list_for_each_entry_rcu(pmu, &pmus, entry) {
+
 		cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
 
+		perf_pmu_disable(cpuctx->ctx.pmu);
+
 		/*
 		 * perf_cgroup_events says at least one
 		 * context on this CPU has cgroup events.
@@ -368,8 +353,6 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
 		 * events for a context.
 		 */
 		if (cpuctx->ctx.nr_cgroups > 0) {
-			perf_ctx_lock(cpuctx, cpuctx->task_ctx);
-			perf_pmu_disable(cpuctx->ctx.pmu);
 
 			if (mode & PERF_CGROUP_SWOUT) {
 				cpu_ctx_sched_out(cpuctx, EVENT_ALL);
@@ -389,9 +372,9 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
 				cpuctx->cgrp = perf_cgroup_from_task(task);
 				cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
 			}
-			perf_pmu_enable(cpuctx->ctx.pmu);
-			perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
 		}
+
+		perf_pmu_enable(cpuctx->ctx.pmu);
 	}
 
 	rcu_read_unlock();
@@ -748,7 +731,6 @@ static u64 perf_event_time(struct perf_event *event)
 
 /*
  * Update the total_time_enabled and total_time_running fields for a event.
- * The caller of this function needs to hold the ctx->lock.
  */
 static void update_event_times(struct perf_event *event)
 {
@@ -1123,10 +1105,6 @@ static int __perf_remove_from_context(void *info)
 	raw_spin_lock(&ctx->lock);
 	event_sched_out(event, cpuctx, ctx);
 	list_del_event(event, ctx);
-	if (!ctx->nr_events && cpuctx->task_ctx == ctx) {
-		ctx->is_active = 0;
-		cpuctx->task_ctx = NULL;
-	}
 	raw_spin_unlock(&ctx->lock);
 
 	return 0;
@@ -1476,24 +1454,8 @@ static void add_event_to_ctx(struct perf_event *event,
 	event->tstamp_stopped = tstamp;
 }
 
-static void task_ctx_sched_out(struct perf_event_context *ctx);
-static void
-ctx_sched_in(struct perf_event_context *ctx,
-	     struct perf_cpu_context *cpuctx,
-	     enum event_type_t event_type,
-	     struct task_struct *task);
-
-static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
-				struct perf_event_context *ctx,
-				struct task_struct *task)
-{
-	cpu_ctx_sched_in(cpuctx, EVENT_PINNED, task);
-	if (ctx)
-		ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task);
-	cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task);
-	if (ctx)
-		ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
-}
+static void perf_event_context_sched_in(struct perf_event_context *ctx,
+					struct task_struct *tsk);
 
 /*
  * Cross CPU call to install and enable a performance event
@@ -1504,37 +1466,20 @@ static int  __perf_install_in_context(void *info)
 {
 	struct perf_event *event = info;
 	struct perf_event_context *ctx = event->ctx;
+	struct perf_event *leader = event->group_leader;
 	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
-	struct perf_event_context *task_ctx = cpuctx->task_ctx;
-	struct task_struct *task = current;
-
-	perf_ctx_lock(cpuctx, task_ctx);
-	perf_pmu_disable(cpuctx->ctx.pmu);
-
-	/*
-	 * If there was an active task_ctx schedule it out.
-	 */
-	if (task_ctx)
-		task_ctx_sched_out(task_ctx);
+	int err;
 
 	/*
-	 * If the context we're installing events in is not the
-	 * active task_ctx, flip them.
+	 * In case we're installing a new context to an already running task,
+	 * could also happen before perf_event_task_sched_in() on architectures
+	 * which do context switches with IRQs enabled.
 	 */
-	if (ctx->task && task_ctx != ctx) {
-		if (task_ctx)
-			raw_spin_unlock(&task_ctx->lock);
-		raw_spin_lock(&ctx->lock);
-		task_ctx = ctx;
-	}
-
-	if (task_ctx) {
-		cpuctx->task_ctx = task_ctx;
-		task = task_ctx->task;
-	}
-
-	cpu_ctx_sched_out(cpuctx, EVENT_ALL);
+	if (ctx->task && !cpuctx->task_ctx)
+		perf_event_context_sched_in(ctx, ctx->task);
 
+	raw_spin_lock(&ctx->lock);
+	ctx->is_active = 1;
 	update_context_time(ctx);
 	/*
 	 * update cgrp time only if current cgrp
@@ -1545,13 +1490,43 @@ static int  __perf_install_in_context(void *info)
 
 	add_event_to_ctx(event, ctx);
 
+	if (!event_filter_match(event))
+		goto unlock;
+
 	/*
-	 * Schedule everything back in
+	 * Don't put the event on if it is disabled or if
+	 * it is in a group and the group isn't on.
 	 */
-	perf_event_sched_in(cpuctx, task_ctx, task);
+	if (event->state != PERF_EVENT_STATE_INACTIVE ||
+	    (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE))
+		goto unlock;
 
-	perf_pmu_enable(cpuctx->ctx.pmu);
-	perf_ctx_unlock(cpuctx, task_ctx);
+	/*
+	 * An exclusive event can't go on if there are already active
+	 * hardware events, and no hardware event can go on if there
+	 * is already an exclusive event on.
+	 */
+	if (!group_can_go_on(event, cpuctx, 1))
+		err = -EEXIST;
+	else
+		err = event_sched_in(event, cpuctx, ctx);
+
+	if (err) {
+		/*
+		 * This event couldn't go on.  If it is in a group
+		 * then we have to pull the whole group off.
+		 * If the event group is pinned then put it in error state.
+		 */
+		if (leader != event)
+			group_sched_out(leader, cpuctx, ctx);
+		if (leader->attr.pinned) {
+			update_group_times(leader);
+			leader->state = PERF_EVENT_STATE_ERROR;
+		}
+	}
+
+unlock:
+	raw_spin_unlock(&ctx->lock);
 
 	return 0;
 }
@@ -1764,7 +1739,7 @@ void perf_event_enable(struct perf_event *event)
 	raw_spin_unlock_irq(&ctx->lock);
 }
 
-int perf_event_refresh(struct perf_event *event, int refresh)
+static int perf_event_refresh(struct perf_event *event, int refresh)
 {
 	/*
 	 * not supported on inherited events
@@ -1777,35 +1752,36 @@ int perf_event_refresh(struct perf_event *event, int refresh)
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(perf_event_refresh);
 
 static void ctx_sched_out(struct perf_event_context *ctx,
 			  struct perf_cpu_context *cpuctx,
 			  enum event_type_t event_type)
 {
 	struct perf_event *event;
-	int is_active = ctx->is_active;
 
-	ctx->is_active &= ~event_type;
+	raw_spin_lock(&ctx->lock);
+	perf_pmu_disable(ctx->pmu);
+	ctx->is_active = 0;
 	if (likely(!ctx->nr_events))
-		return;
-
+		goto out;
 	update_context_time(ctx);
 	update_cgrp_time_from_cpuctx(cpuctx);
+
 	if (!ctx->nr_active)
-		return;
+		goto out;
 
-	perf_pmu_disable(ctx->pmu);
-	if ((is_active & EVENT_PINNED) && (event_type & EVENT_PINNED)) {
+	if (event_type & EVENT_PINNED) {
 		list_for_each_entry(event, &ctx->pinned_groups, group_entry)
 			group_sched_out(event, cpuctx, ctx);
 	}
 
-	if ((is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE)) {
+	if (event_type & EVENT_FLEXIBLE) {
 		list_for_each_entry(event, &ctx->flexible_groups, group_entry)
 			group_sched_out(event, cpuctx, ctx);
 	}
+out:
 	perf_pmu_enable(ctx->pmu);
+	raw_spin_unlock(&ctx->lock);
 }
 
 /*
@@ -1953,10 +1929,8 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
 	rcu_read_unlock();
 
 	if (do_switch) {
-		raw_spin_lock(&ctx->lock);
 		ctx_sched_out(ctx, cpuctx, EVENT_ALL);
 		cpuctx->task_ctx = NULL;
-		raw_spin_unlock(&ctx->lock);
 	}
 }
 
@@ -1991,7 +1965,8 @@ void __perf_event_task_sched_out(struct task_struct *task,
 		perf_cgroup_sched_out(task);
 }
 
-static void task_ctx_sched_out(struct perf_event_context *ctx)
+static void task_ctx_sched_out(struct perf_event_context *ctx,
+			       enum event_type_t event_type)
 {
 	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
 
@@ -2001,7 +1976,7 @@ static void task_ctx_sched_out(struct perf_event_context *ctx)
 	if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
 		return;
 
-	ctx_sched_out(ctx, cpuctx, EVENT_ALL);
+	ctx_sched_out(ctx, cpuctx, event_type);
 	cpuctx->task_ctx = NULL;
 }
 
@@ -2080,11 +2055,11 @@ ctx_sched_in(struct perf_event_context *ctx,
 	     struct task_struct *task)
 {
 	u64 now;
-	int is_active = ctx->is_active;
 
-	ctx->is_active |= event_type;
+	raw_spin_lock(&ctx->lock);
+	ctx->is_active = 1;
 	if (likely(!ctx->nr_events))
-		return;
+		goto out;
 
 	now = perf_clock();
 	ctx->timestamp = now;
@@ -2093,12 +2068,15 @@ ctx_sched_in(struct perf_event_context *ctx,
 	 * First go through the list and put on any pinned groups
 	 * in order to give them the best chance of going on.
 	 */
-	if (!(is_active & EVENT_PINNED) && (event_type & EVENT_PINNED))
+	if (event_type & EVENT_PINNED)
 		ctx_pinned_sched_in(ctx, cpuctx);
 
 	/* Then walk through the lower prio flexible groups */
-	if (!(is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE))
+	if (event_type & EVENT_FLEXIBLE)
 		ctx_flexible_sched_in(ctx, cpuctx);
+
+out:
+	raw_spin_unlock(&ctx->lock);
 }
 
 static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
@@ -2110,6 +2088,19 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
 	ctx_sched_in(ctx, cpuctx, event_type, task);
 }
 
+static void task_ctx_sched_in(struct perf_event_context *ctx,
+			      enum event_type_t event_type)
+{
+	struct perf_cpu_context *cpuctx;
+
+	cpuctx = __get_cpu_context(ctx);
+	if (cpuctx->task_ctx == ctx)
+		return;
+
+	ctx_sched_in(ctx, cpuctx, event_type, NULL);
+	cpuctx->task_ctx = ctx;
+}
+
 static void perf_event_context_sched_in(struct perf_event_context *ctx,
 					struct task_struct *task)
 {
@@ -2119,7 +2110,6 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
 	if (cpuctx->task_ctx == ctx)
 		return;
 
-	perf_ctx_lock(cpuctx, ctx);
 	perf_pmu_disable(ctx->pmu);
 	/*
 	 * We want to keep the following priority order:
@@ -2128,18 +2118,18 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
 	 */
 	cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
 
-	perf_event_sched_in(cpuctx, ctx, task);
+	ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task);
+	cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task);
+	ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
 
 	cpuctx->task_ctx = ctx;
 
-	perf_pmu_enable(ctx->pmu);
-	perf_ctx_unlock(cpuctx, ctx);
-
 	/*
 	 * Since these rotations are per-cpu, we need to ensure the
 	 * cpu-context we got scheduled on is actually rotating.
 	 */
 	perf_pmu_rotate_start(ctx->pmu);
+	perf_pmu_enable(ctx->pmu);
 }
 
 /*
@@ -2279,6 +2269,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
 	u64 interrupts, now;
 	s64 delta;
 
+	raw_spin_lock(&ctx->lock);
 	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
 		if (event->state != PERF_EVENT_STATE_ACTIVE)
 			continue;
@@ -2310,6 +2301,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
 		if (delta > 0)
 			perf_adjust_period(event, period, delta);
 	}
+	raw_spin_unlock(&ctx->lock);
 }
 
 /*
@@ -2317,12 +2309,16 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
  */
 static void rotate_ctx(struct perf_event_context *ctx)
 {
+	raw_spin_lock(&ctx->lock);
+
 	/*
 	 * Rotate the first entry last of non-pinned groups. Rotation might be
 	 * disabled by the inheritance code.
 	 */
 	if (!ctx->rotate_disable)
 		list_rotate_left(&ctx->flexible_groups);
+
+	raw_spin_unlock(&ctx->lock);
 }
 
 /*
@@ -2349,7 +2345,6 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
 			rotate = 1;
 	}
 
-	perf_ctx_lock(cpuctx, cpuctx->task_ctx);
 	perf_pmu_disable(cpuctx->ctx.pmu);
 	perf_ctx_adjust_freq(&cpuctx->ctx, interval);
 	if (ctx)
@@ -2360,20 +2355,21 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
 
 	cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
 	if (ctx)
-		ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
+		task_ctx_sched_out(ctx, EVENT_FLEXIBLE);
 
 	rotate_ctx(&cpuctx->ctx);
 	if (ctx)
 		rotate_ctx(ctx);
 
-	perf_event_sched_in(cpuctx, ctx, current);
+	cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, current);
+	if (ctx)
+		task_ctx_sched_in(ctx, EVENT_FLEXIBLE);
 
 done:
 	if (remove)
 		list_del_init(&cpuctx->rotation_list);
 
 	perf_pmu_enable(cpuctx->ctx.pmu);
-	perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
 }
 
 void perf_event_task_tick(void)
@@ -2428,9 +2424,9 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)
 	 * in.
 	 */
 	perf_cgroup_sched_out(current);
+	task_ctx_sched_out(ctx, EVENT_ALL);
 
 	raw_spin_lock(&ctx->lock);
-	task_ctx_sched_out(ctx);
 
 	list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
 		ret = event_enable_on_exec(event, ctx);
@@ -2839,12 +2835,16 @@ find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
 		unclone_ctx(ctx);
 		++ctx->pin_count;
 		raw_spin_unlock_irqrestore(&ctx->lock, flags);
-	} else {
+	}
+
+	if (!ctx) {
 		ctx = alloc_perf_context(pmu, task);
 		err = -ENOMEM;
 		if (!ctx)
 			goto errout;
 
+		get_ctx(ctx);
+
 		err = 0;
 		mutex_lock(&task->perf_event_mutex);
 		/*
@@ -2856,14 +2856,14 @@ find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
 		else if (task->perf_event_ctxp[ctxn])
 			err = -EAGAIN;
 		else {
-			get_ctx(ctx);
 			++ctx->pin_count;
 			rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx);
 		}
 		mutex_unlock(&task->perf_event_mutex);
 
 		if (unlikely(err)) {
-			put_ctx(ctx);
+			put_task_struct(task);
+			kfree(ctx);
 
 			if (err == -EAGAIN)
 				goto retry;
@@ -2890,7 +2890,7 @@ static void free_event_rcu(struct rcu_head *head)
 	kfree(event);
 }
 
-static void ring_buffer_put(struct ring_buffer *rb);
+static void perf_buffer_put(struct perf_buffer *buffer);
 
 static void free_event(struct perf_event *event)
 {
@@ -2913,9 +2913,9 @@ static void free_event(struct perf_event *event)
 		}
 	}
 
-	if (event->rb) {
-		ring_buffer_put(event->rb);
-		event->rb = NULL;
+	if (event->buffer) {
+		perf_buffer_put(event->buffer);
+		event->buffer = NULL;
 	}
 
 	if (is_cgroup_event(event))
@@ -2934,6 +2934,12 @@ int perf_event_release_kernel(struct perf_event *event)
 {
 	struct perf_event_context *ctx = event->ctx;
 
+	/*
+	 * Remove from the PMU, can't get re-enabled since we got
+	 * here because the last ref went.
+	 */
+	perf_event_disable(event);
+
 	WARN_ON_ONCE(ctx->parent_ctx);
 	/*
 	 * There are two ways this annotation is useful:
@@ -2950,8 +2956,8 @@ int perf_event_release_kernel(struct perf_event *event)
 	mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
 	raw_spin_lock_irq(&ctx->lock);
 	perf_group_detach(event);
+	list_del_event(event, ctx);
 	raw_spin_unlock_irq(&ctx->lock);
-	perf_remove_from_context(event);
 	mutex_unlock(&ctx->mutex);
 
 	free_event(event);
@@ -3143,13 +3149,13 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 static unsigned int perf_poll(struct file *file, poll_table *wait)
 {
 	struct perf_event *event = file->private_data;
-	struct ring_buffer *rb;
+	struct perf_buffer *buffer;
 	unsigned int events = POLL_HUP;
 
 	rcu_read_lock();
-	rb = rcu_dereference(event->rb);
-	if (rb)
-		events = atomic_xchg(&rb->poll, 0);
+	buffer = rcu_dereference(event->buffer);
+	if (buffer)
+		events = atomic_xchg(&buffer->poll, 0);
 	rcu_read_unlock();
 
 	poll_wait(file, &event->waitq, wait);
@@ -3352,18 +3358,6 @@ static int perf_event_index(struct perf_event *event)
 	return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET;
 }
 
-static void calc_timer_values(struct perf_event *event,
-				u64 *running,
-				u64 *enabled)
-{
-	u64 now, ctx_time;
-
-	now = perf_clock();
-	ctx_time = event->shadow_ctx_time + now;
-	*enabled = ctx_time - event->tstamp_enabled;
-	*running = ctx_time - event->tstamp_running;
-}
-
 /*
  * Callers need to ensure there can be no nesting of this function, otherwise
  * the seqlock logic goes bad. We can not serialize this because the arch
@@ -3372,25 +3366,14 @@ static void calc_timer_values(struct perf_event *event,
 void perf_event_update_userpage(struct perf_event *event)
 {
 	struct perf_event_mmap_page *userpg;
-	struct ring_buffer *rb;
-	u64 enabled, running;
+	struct perf_buffer *buffer;
 
 	rcu_read_lock();
-	/*
-	 * compute total_time_enabled, total_time_running
-	 * based on snapshot values taken when the event
-	 * was last scheduled in.
-	 *
-	 * we cannot simply called update_context_time()
-	 * because of locking issue as we can be called in
-	 * NMI context
-	 */
-	calc_timer_values(event, &enabled, &running);
-	rb = rcu_dereference(event->rb);
-	if (!rb)
+	buffer = rcu_dereference(event->buffer);
+	if (!buffer)
 		goto unlock;
 
-	userpg = rb->user_page;
+	userpg = buffer->user_page;
 
 	/*
 	 * Disable preemption so as to not let the corresponding user-space
@@ -3404,10 +3387,10 @@ void perf_event_update_userpage(struct perf_event *event)
 	if (event->state == PERF_EVENT_STATE_ACTIVE)
 		userpg->offset -= local64_read(&event->hw.prev_count);
 
-	userpg->time_enabled = enabled +
+	userpg->time_enabled = event->total_time_enabled +
 			atomic64_read(&event->child_total_time_enabled);
 
-	userpg->time_running = running +
+	userpg->time_running = event->total_time_running +
 			atomic64_read(&event->child_total_time_running);
 
 	barrier();
@@ -3417,10 +3400,220 @@ void perf_event_update_userpage(struct perf_event *event)
 	rcu_read_unlock();
 }
 
+static unsigned long perf_data_size(struct perf_buffer *buffer);
+
+static void
+perf_buffer_init(struct perf_buffer *buffer, long watermark, int flags)
+{
+	long max_size = perf_data_size(buffer);
+
+	if (watermark)
+		buffer->watermark = min(max_size, watermark);
+
+	if (!buffer->watermark)
+		buffer->watermark = max_size / 2;
+
+	if (flags & PERF_BUFFER_WRITABLE)
+		buffer->writable = 1;
+
+	atomic_set(&buffer->refcount, 1);
+}
+
+#ifndef CONFIG_PERF_USE_VMALLOC
+
+/*
+ * Back perf_mmap() with regular GFP_KERNEL-0 pages.
+ */
+
+static struct page *
+perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
+{
+	if (pgoff > buffer->nr_pages)
+		return NULL;
+
+	if (pgoff == 0)
+		return virt_to_page(buffer->user_page);
+
+	return virt_to_page(buffer->data_pages[pgoff - 1]);
+}
+
+static void *perf_mmap_alloc_page(int cpu)
+{
+	struct page *page;
+	int node;
+
+	node = (cpu == -1) ? cpu : cpu_to_node(cpu);
+	page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
+	if (!page)
+		return NULL;
+
+	return page_address(page);
+}
+
+static struct perf_buffer *
+perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
+{
+	struct perf_buffer *buffer;
+	unsigned long size;
+	int i;
+
+	size = sizeof(struct perf_buffer);
+	size += nr_pages * sizeof(void *);
+
+	buffer = kzalloc(size, GFP_KERNEL);
+	if (!buffer)
+		goto fail;
+
+	buffer->user_page = perf_mmap_alloc_page(cpu);
+	if (!buffer->user_page)
+		goto fail_user_page;
+
+	for (i = 0; i < nr_pages; i++) {
+		buffer->data_pages[i] = perf_mmap_alloc_page(cpu);
+		if (!buffer->data_pages[i])
+			goto fail_data_pages;
+	}
+
+	buffer->nr_pages = nr_pages;
+
+	perf_buffer_init(buffer, watermark, flags);
+
+	return buffer;
+
+fail_data_pages:
+	for (i--; i >= 0; i--)
+		free_page((unsigned long)buffer->data_pages[i]);
+
+	free_page((unsigned long)buffer->user_page);
+
+fail_user_page:
+	kfree(buffer);
+
+fail:
+	return NULL;
+}
+
+static void perf_mmap_free_page(unsigned long addr)
+{
+	struct page *page = virt_to_page((void *)addr);
+
+	page->mapping = NULL;
+	__free_page(page);
+}
+
+static void perf_buffer_free(struct perf_buffer *buffer)
+{
+	int i;
+
+	perf_mmap_free_page((unsigned long)buffer->user_page);
+	for (i = 0; i < buffer->nr_pages; i++)
+		perf_mmap_free_page((unsigned long)buffer->data_pages[i]);
+	kfree(buffer);
+}
+
+static inline int page_order(struct perf_buffer *buffer)
+{
+	return 0;
+}
+
+#else
+
+/*
+ * Back perf_mmap() with vmalloc memory.
+ *
+ * Required for architectures that have d-cache aliasing issues.
+ */
+
+static inline int page_order(struct perf_buffer *buffer)
+{
+	return buffer->page_order;
+}
+
+static struct page *
+perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
+{
+	if (pgoff > (1UL << page_order(buffer)))
+		return NULL;
+
+	return vmalloc_to_page((void *)buffer->user_page + pgoff * PAGE_SIZE);
+}
+
+static void perf_mmap_unmark_page(void *addr)
+{
+	struct page *page = vmalloc_to_page(addr);
+
+	page->mapping = NULL;
+}
+
+static void perf_buffer_free_work(struct work_struct *work)
+{
+	struct perf_buffer *buffer;
+	void *base;
+	int i, nr;
+
+	buffer = container_of(work, struct perf_buffer, work);
+	nr = 1 << page_order(buffer);
+
+	base = buffer->user_page;
+	for (i = 0; i < nr + 1; i++)
+		perf_mmap_unmark_page(base + (i * PAGE_SIZE));
+
+	vfree(base);
+	kfree(buffer);
+}
+
+static void perf_buffer_free(struct perf_buffer *buffer)
+{
+	schedule_work(&buffer->work);
+}
+
+static struct perf_buffer *
+perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
+{
+	struct perf_buffer *buffer;
+	unsigned long size;
+	void *all_buf;
+
+	size = sizeof(struct perf_buffer);
+	size += sizeof(void *);
+
+	buffer = kzalloc(size, GFP_KERNEL);
+	if (!buffer)
+		goto fail;
+
+	INIT_WORK(&buffer->work, perf_buffer_free_work);
+
+	all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE);
+	if (!all_buf)
+		goto fail_all_buf;
+
+	buffer->user_page = all_buf;
+	buffer->data_pages[0] = all_buf + PAGE_SIZE;
+	buffer->page_order = ilog2(nr_pages);
+	buffer->nr_pages = 1;
+
+	perf_buffer_init(buffer, watermark, flags);
+
+	return buffer;
+
+fail_all_buf:
+	kfree(buffer);
+
+fail:
+	return NULL;
+}
+
+#endif
+
+static unsigned long perf_data_size(struct perf_buffer *buffer)
+{
+	return buffer->nr_pages << (PAGE_SHIFT + page_order(buffer));
+}
+
 static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct perf_event *event = vma->vm_file->private_data;
-	struct ring_buffer *rb;
+	struct perf_buffer *buffer;
 	int ret = VM_FAULT_SIGBUS;
 
 	if (vmf->flags & FAULT_FLAG_MKWRITE) {
@@ -3430,14 +3623,14 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	}
 
 	rcu_read_lock();
-	rb = rcu_dereference(event->rb);
-	if (!rb)
+	buffer = rcu_dereference(event->buffer);
+	if (!buffer)
 		goto unlock;
 
 	if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))
 		goto unlock;
 
-	vmf->page = perf_mmap_to_page(rb, vmf->pgoff);
+	vmf->page = perf_mmap_to_page(buffer, vmf->pgoff);
 	if (!vmf->page)
 		goto unlock;
 
@@ -3452,35 +3645,35 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return ret;
 }
 
-static void rb_free_rcu(struct rcu_head *rcu_head)
+static void perf_buffer_free_rcu(struct rcu_head *rcu_head)
 {
-	struct ring_buffer *rb;
+	struct perf_buffer *buffer;
 
-	rb = container_of(rcu_head, struct ring_buffer, rcu_head);
-	rb_free(rb);
+	buffer = container_of(rcu_head, struct perf_buffer, rcu_head);
+	perf_buffer_free(buffer);
 }
 
-static struct ring_buffer *ring_buffer_get(struct perf_event *event)
+static struct perf_buffer *perf_buffer_get(struct perf_event *event)
 {
-	struct ring_buffer *rb;
+	struct perf_buffer *buffer;
 
 	rcu_read_lock();
-	rb = rcu_dereference(event->rb);
-	if (rb) {
-		if (!atomic_inc_not_zero(&rb->refcount))
-			rb = NULL;
+	buffer = rcu_dereference(event->buffer);
+	if (buffer) {
+		if (!atomic_inc_not_zero(&buffer->refcount))
+			buffer = NULL;
 	}
 	rcu_read_unlock();
 
-	return rb;
+	return buffer;
 }
 
-static void ring_buffer_put(struct ring_buffer *rb)
+static void perf_buffer_put(struct perf_buffer *buffer)
 {
-	if (!atomic_dec_and_test(&rb->refcount))
+	if (!atomic_dec_and_test(&buffer->refcount))
 		return;
 
-	call_rcu(&rb->rcu_head, rb_free_rcu);
+	call_rcu(&buffer->rcu_head, perf_buffer_free_rcu);
 }
 
 static void perf_mmap_open(struct vm_area_struct *vma)
@@ -3495,16 +3688,16 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 	struct perf_event *event = vma->vm_file->private_data;
 
 	if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
-		unsigned long size = perf_data_size(event->rb);
+		unsigned long size = perf_data_size(event->buffer);
 		struct user_struct *user = event->mmap_user;
-		struct ring_buffer *rb = event->rb;
+		struct perf_buffer *buffer = event->buffer;
 
 		atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
 		vma->vm_mm->locked_vm -= event->mmap_locked;
-		rcu_assign_pointer(event->rb, NULL);
+		rcu_assign_pointer(event->buffer, NULL);
 		mutex_unlock(&event->mmap_mutex);
 
-		ring_buffer_put(rb);
+		perf_buffer_put(buffer);
 		free_uid(user);
 	}
 }
@@ -3522,7 +3715,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 	unsigned long user_locked, user_lock_limit;
 	struct user_struct *user = current_user();
 	unsigned long locked, lock_limit;
-	struct ring_buffer *rb;
+	struct perf_buffer *buffer;
 	unsigned long vma_size;
 	unsigned long nr_pages;
 	long user_extra, extra;
@@ -3531,7 +3724,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 	/*
 	 * Don't allow mmap() of inherited per-task counters. This would
 	 * create a performance issue due to all children writing to the
-	 * same rb.
+	 * same buffer.
 	 */
 	if (event->cpu == -1 && event->attr.inherit)
 		return -EINVAL;
@@ -3543,7 +3736,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 	nr_pages = (vma_size / PAGE_SIZE) - 1;
 
 	/*
-	 * If we have rb pages ensure they're a power-of-two number, so we
+	 * If we have buffer pages ensure they're a power-of-two number, so we
 	 * can do bitmasks instead of modulo.
 	 */
 	if (nr_pages != 0 && !is_power_of_2(nr_pages))
@@ -3557,9 +3750,9 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 
 	WARN_ON_ONCE(event->ctx->parent_ctx);
 	mutex_lock(&event->mmap_mutex);
-	if (event->rb) {
-		if (event->rb->nr_pages == nr_pages)
-			atomic_inc(&event->rb->refcount);
+	if (event->buffer) {
+		if (event->buffer->nr_pages == nr_pages)
+			atomic_inc(&event->buffer->refcount);
 		else
 			ret = -EINVAL;
 		goto unlock;
@@ -3589,20 +3782,18 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 		goto unlock;
 	}
 
-	WARN_ON(event->rb);
+	WARN_ON(event->buffer);
 
 	if (vma->vm_flags & VM_WRITE)
-		flags |= RING_BUFFER_WRITABLE;
+		flags |= PERF_BUFFER_WRITABLE;
 
-	rb = rb_alloc(nr_pages, 
-		event->attr.watermark ? event->attr.wakeup_watermark : 0,
-		event->cpu, flags);
-
-	if (!rb) {
+	buffer = perf_buffer_alloc(nr_pages, event->attr.wakeup_watermark,
+				   event->cpu, flags);
+	if (!buffer) {
 		ret = -ENOMEM;
 		goto unlock;
 	}
-	rcu_assign_pointer(event->rb, rb);
+	rcu_assign_pointer(event->buffer, buffer);
 
 	atomic_long_add(user_extra, &user->locked_vm);
 	event->mmap_locked = extra;
@@ -3701,6 +3892,117 @@ int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
 }
 EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
 
+/*
+ * Output
+ */
+static bool perf_output_space(struct perf_buffer *buffer, unsigned long tail,
+			      unsigned long offset, unsigned long head)
+{
+	unsigned long mask;
+
+	if (!buffer->writable)
+		return true;
+
+	mask = perf_data_size(buffer) - 1;
+
+	offset = (offset - tail) & mask;
+	head   = (head   - tail) & mask;
+
+	if ((int)(head - offset) < 0)
+		return false;
+
+	return true;
+}
+
+static void perf_output_wakeup(struct perf_output_handle *handle)
+{
+	atomic_set(&handle->buffer->poll, POLL_IN);
+
+	if (handle->nmi) {
+		handle->event->pending_wakeup = 1;
+		irq_work_queue(&handle->event->pending);
+	} else
+		perf_event_wakeup(handle->event);
+}
+
+/*
+ * We need to ensure a later event_id doesn't publish a head when a former
+ * event isn't done writing. However since we need to deal with NMIs we
+ * cannot fully serialize things.
+ *
+ * We only publish the head (and generate a wakeup) when the outer-most
+ * event completes.
+ */
+static void perf_output_get_handle(struct perf_output_handle *handle)
+{
+	struct perf_buffer *buffer = handle->buffer;
+
+	preempt_disable();
+	local_inc(&buffer->nest);
+	handle->wakeup = local_read(&buffer->wakeup);
+}
+
+static void perf_output_put_handle(struct perf_output_handle *handle)
+{
+	struct perf_buffer *buffer = handle->buffer;
+	unsigned long head;
+
+again:
+	head = local_read(&buffer->head);
+
+	/*
+	 * IRQ/NMI can happen here, which means we can miss a head update.
+	 */
+
+	if (!local_dec_and_test(&buffer->nest))
+		goto out;
+
+	/*
+	 * Publish the known good head. Rely on the full barrier implied
+	 * by atomic_dec_and_test() order the buffer->head read and this
+	 * write.
+	 */
+	buffer->user_page->data_head = head;
+
+	/*
+	 * Now check if we missed an update, rely on the (compiler)
+	 * barrier in atomic_dec_and_test() to re-read buffer->head.
+	 */
+	if (unlikely(head != local_read(&buffer->head))) {
+		local_inc(&buffer->nest);
+		goto again;
+	}
+
+	if (handle->wakeup != local_read(&buffer->wakeup))
+		perf_output_wakeup(handle);
+
+out:
+	preempt_enable();
+}
+
+__always_inline void perf_output_copy(struct perf_output_handle *handle,
+		      const void *buf, unsigned int len)
+{
+	do {
+		unsigned long size = min_t(unsigned long, handle->size, len);
+
+		memcpy(handle->addr, buf, size);
+
+		len -= size;
+		handle->addr += size;
+		buf += size;
+		handle->size -= size;
+		if (!handle->size) {
+			struct perf_buffer *buffer = handle->buffer;
+
+			handle->page++;
+			handle->page &= buffer->nr_pages - 1;
+			handle->addr = buffer->data_pages[handle->page];
+			handle->size = PAGE_SIZE << page_order(buffer);
+		}
+	} while (len);
+}
+
 static void __perf_event_header__init_id(struct perf_event_header *header,
 					 struct perf_sample_data *data,
 					 struct perf_event *event)
@@ -3731,9 +4033,9 @@ static void __perf_event_header__init_id(struct perf_event_header *header,
 	}
 }
 
-void perf_event_header__init_id(struct perf_event_header *header,
-				struct perf_sample_data *data,
-				struct perf_event *event)
+static void perf_event_header__init_id(struct perf_event_header *header,
+				       struct perf_sample_data *data,
+				       struct perf_event *event)
 {
 	if (event->attr.sample_id_all)
 		__perf_event_header__init_id(header, data, event);
@@ -3760,14 +4062,121 @@ static void __perf_event__output_id_sample(struct perf_output_handle *handle,
 		perf_output_put(handle, data->cpu_entry);
 }
 
-void perf_event__output_id_sample(struct perf_event *event,
-				  struct perf_output_handle *handle,
-				  struct perf_sample_data *sample)
+static void perf_event__output_id_sample(struct perf_event *event,
+					 struct perf_output_handle *handle,
+					 struct perf_sample_data *sample)
 {
 	if (event->attr.sample_id_all)
 		__perf_event__output_id_sample(handle, sample);
 }
 
+int perf_output_begin(struct perf_output_handle *handle,
+		      struct perf_event *event, unsigned int size,
+		      int nmi, int sample)
+{
+	struct perf_buffer *buffer;
+	unsigned long tail, offset, head;
+	int have_lost;
+	struct perf_sample_data sample_data;
+	struct {
+		struct perf_event_header header;
+		u64			 id;
+		u64			 lost;
+	} lost_event;
+
+	rcu_read_lock();
+	/*
+	 * For inherited events we send all the output towards the parent.
+	 */
+	if (event->parent)
+		event = event->parent;
+
+	buffer = rcu_dereference(event->buffer);
+	if (!buffer)
+		goto out;
+
+	handle->buffer	= buffer;
+	handle->event	= event;
+	handle->nmi	= nmi;
+	handle->sample	= sample;
+
+	if (!buffer->nr_pages)
+		goto out;
+
+	have_lost = local_read(&buffer->lost);
+	if (have_lost) {
+		lost_event.header.size = sizeof(lost_event);
+		perf_event_header__init_id(&lost_event.header, &sample_data,
+					   event);
+		size += lost_event.header.size;
+	}
+
+	perf_output_get_handle(handle);
+
+	do {
+		/*
+		 * Userspace could choose to issue a mb() before updating the
+		 * tail pointer. So that all reads will be completed before the
+		 * write is issued.
+		 */
+		tail = ACCESS_ONCE(buffer->user_page->data_tail);
+		smp_rmb();
+		offset = head = local_read(&buffer->head);
+		head += size;
+		if (unlikely(!perf_output_space(buffer, tail, offset, head)))
+			goto fail;
+	} while (local_cmpxchg(&buffer->head, offset, head) != offset);
+
+	if (head - local_read(&buffer->wakeup) > buffer->watermark)
+		local_add(buffer->watermark, &buffer->wakeup);
+
+	handle->page = offset >> (PAGE_SHIFT + page_order(buffer));
+	handle->page &= buffer->nr_pages - 1;
+	handle->size = offset & ((PAGE_SIZE << page_order(buffer)) - 1);
+	handle->addr = buffer->data_pages[handle->page];
+	handle->addr += handle->size;
+	handle->size = (PAGE_SIZE << page_order(buffer)) - handle->size;
+
+	if (have_lost) {
+		lost_event.header.type = PERF_RECORD_LOST;
+		lost_event.header.misc = 0;
+		lost_event.id          = event->id;
+		lost_event.lost        = local_xchg(&buffer->lost, 0);
+
+		perf_output_put(handle, lost_event);
+		perf_event__output_id_sample(event, handle, &sample_data);
+	}
+
+	return 0;
+
+fail:
+	local_inc(&buffer->lost);
+	perf_output_put_handle(handle);
+out:
+	rcu_read_unlock();
+
+	return -ENOSPC;
+}
+
+void perf_output_end(struct perf_output_handle *handle)
+{
+	struct perf_event *event = handle->event;
+	struct perf_buffer *buffer = handle->buffer;
+
+	int wakeup_events = event->attr.wakeup_events;
+
+	if (handle->sample && wakeup_events) {
+		int events = local_inc_return(&buffer->events);
+		if (events >= wakeup_events) {
+			local_sub(wakeup_events, &buffer->events);
+			local_inc(&buffer->wakeup);
+		}
+	}
+
+	perf_output_put_handle(handle);
+	rcu_read_unlock();
+}
+
 static void perf_output_read_one(struct perf_output_handle *handle,
 				 struct perf_event *event,
 				 u64 enabled, u64 running)
@@ -3788,7 +4197,7 @@ static void perf_output_read_one(struct perf_output_handle *handle,
 	if (read_format & PERF_FORMAT_ID)
 		values[n++] = primary_event_id(event);
 
-	__output_copy(handle, values, n * sizeof(u64));
+	perf_output_copy(handle, values, n * sizeof(u64));
 }
 
 /*
@@ -3818,7 +4227,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 	if (read_format & PERF_FORMAT_ID)
 		values[n++] = primary_event_id(leader);
 
-	__output_copy(handle, values, n * sizeof(u64));
+	perf_output_copy(handle, values, n * sizeof(u64));
 
 	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
 		n = 0;
@@ -3830,7 +4239,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 		if (read_format & PERF_FORMAT_ID)
 			values[n++] = primary_event_id(sub);
 
-		__output_copy(handle, values, n * sizeof(u64));
+		perf_output_copy(handle, values, n * sizeof(u64));
 	}
 }
 
@@ -3840,7 +4249,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 static void perf_output_read(struct perf_output_handle *handle,
 			     struct perf_event *event)
 {
-	u64 enabled = 0, running = 0;
+	u64 enabled = 0, running = 0, now, ctx_time;
 	u64 read_format = event->attr.read_format;
 
 	/*
@@ -3852,8 +4261,12 @@ static void perf_output_read(struct perf_output_handle *handle,
 	 * because of locking issue as we are called in
 	 * NMI context
 	 */
-	if (read_format & PERF_FORMAT_TOTAL_TIMES)
-		calc_timer_values(event, &enabled, &running);
+	if (read_format & PERF_FORMAT_TOTAL_TIMES) {
+		now = perf_clock();
+		ctx_time = event->shadow_ctx_time + now;
+		enabled = ctx_time - event->tstamp_enabled;
+		running = ctx_time - event->tstamp_running;
+	}
 
 	if (event->attr.read_format & PERF_FORMAT_GROUP)
 		perf_output_read_group(handle, event, enabled, running);
@@ -3906,7 +4319,7 @@ void perf_output_sample(struct perf_output_handle *handle,
 
 			size *= sizeof(u64);
 
-			__output_copy(handle, data->callchain, size);
+			perf_output_copy(handle, data->callchain, size);
 		} else {
 			u64 nr = 0;
 			perf_output_put(handle, nr);
@@ -3916,8 +4329,8 @@ void perf_output_sample(struct perf_output_handle *handle,
 	if (sample_type & PERF_SAMPLE_RAW) {
 		if (data->raw) {
 			perf_output_put(handle, data->raw->size);
-			__output_copy(handle, data->raw->data,
-					   data->raw->size);
+			perf_output_copy(handle, data->raw->data,
+					 data->raw->size);
 		} else {
 			struct {
 				u32	size;
@@ -3929,20 +4342,6 @@ void perf_output_sample(struct perf_output_handle *handle,
 			perf_output_put(handle, raw);
 		}
 	}
-
-	if (!event->attr.watermark) {
-		int wakeup_events = event->attr.wakeup_events;
-
-		if (wakeup_events) {
-			struct ring_buffer *rb = handle->rb;
-			int events = local_inc_return(&rb->events);
-
-			if (events >= wakeup_events) {
-				local_sub(wakeup_events, &rb->events);
-				local_inc(&rb->wakeup);
-			}
-		}
-	}
 }
 
 void perf_prepare_sample(struct perf_event_header *header,
@@ -3987,7 +4386,7 @@ void perf_prepare_sample(struct perf_event_header *header,
 	}
 }
 
-static void perf_event_output(struct perf_event *event,
+static void perf_event_output(struct perf_event *event, int nmi,
 				struct perf_sample_data *data,
 				struct pt_regs *regs)
 {
@@ -3999,7 +4398,7 @@ static void perf_event_output(struct perf_event *event,
 
 	perf_prepare_sample(&header, data, event, regs);
 
-	if (perf_output_begin(&handle, event, header.size))
+	if (perf_output_begin(&handle, event, header.size, nmi, 1))
 		goto exit;
 
 	perf_output_sample(&handle, &header, data, event);
@@ -4039,7 +4438,7 @@ perf_event_read_event(struct perf_event *event,
 	int ret;
 
 	perf_event_header__init_id(&read_event.header, &sample, event);
-	ret = perf_output_begin(&handle, event, read_event.header.size);
+	ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0);
 	if (ret)
 		return;
 
@@ -4082,7 +4481,7 @@ static void perf_event_task_output(struct perf_event *event,
 	perf_event_header__init_id(&task_event->event_id.header, &sample, event);
 
 	ret = perf_output_begin(&handle, event,
-				task_event->event_id.header.size);
+				task_event->event_id.header.size, 0, 0);
 	if (ret)
 		goto out;
 
@@ -4219,7 +4618,7 @@ static void perf_event_comm_output(struct perf_event *event,
 
 	perf_event_header__init_id(&comm_event->event_id.header, &sample, event);
 	ret = perf_output_begin(&handle, event,
-				comm_event->event_id.header.size);
+				comm_event->event_id.header.size, 0, 0);
 
 	if (ret)
 		goto out;
@@ -4228,7 +4627,7 @@ static void perf_event_comm_output(struct perf_event *event,
 	comm_event->event_id.tid = perf_event_tid(event, comm_event->task);
 
 	perf_output_put(&handle, comm_event->event_id);
-	__output_copy(&handle, comm_event->comm,
+	perf_output_copy(&handle, comm_event->comm,
 				   comm_event->comm_size);
 
 	perf_event__output_id_sample(event, &handle, &sample);
@@ -4366,7 +4765,7 @@ static void perf_event_mmap_output(struct perf_event *event,
 
 	perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
 	ret = perf_output_begin(&handle, event,
-				mmap_event->event_id.header.size);
+				mmap_event->event_id.header.size, 0, 0);
 	if (ret)
 		goto out;
 
@@ -4374,7 +4773,7 @@ static void perf_event_mmap_output(struct perf_event *event,
 	mmap_event->event_id.tid = perf_event_tid(event, current);
 
 	perf_output_put(&handle, mmap_event->event_id);
-	__output_copy(&handle, mmap_event->file_name,
+	perf_output_copy(&handle, mmap_event->file_name,
 				   mmap_event->file_size);
 
 	perf_event__output_id_sample(event, &handle, &sample);
@@ -4430,7 +4829,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 
 	if (file) {
 		/*
-		 * d_path works from the end of the rb backwards, so we
+		 * d_path works from the end of the buffer backwards, so we
 		 * need to add enough zero bytes after the string to handle
 		 * the 64bit alignment we do later.
 		 */
@@ -4561,7 +4960,7 @@ static void perf_log_throttle(struct perf_event *event, int enable)
 	perf_event_header__init_id(&throttle_event.header, &sample, event);
 
 	ret = perf_output_begin(&handle, event,
-				throttle_event.header.size);
+				throttle_event.header.size, 1, 0);
 	if (ret)
 		return;
 
@@ -4574,7 +4973,7 @@ static void perf_log_throttle(struct perf_event *event, int enable)
  * Generic event overflow handling, sampling.
  */
 
-static int __perf_event_overflow(struct perf_event *event,
+static int __perf_event_overflow(struct perf_event *event, int nmi,
 				   int throttle, struct perf_sample_data *data,
 				   struct pt_regs *regs)
 {
@@ -4617,28 +5016,34 @@ static int __perf_event_overflow(struct perf_event *event,
 	if (events && atomic_dec_and_test(&event->event_limit)) {
 		ret = 1;
 		event->pending_kill = POLL_HUP;
-		event->pending_disable = 1;
-		irq_work_queue(&event->pending);
+		if (nmi) {
+			event->pending_disable = 1;
+			irq_work_queue(&event->pending);
+		} else
+			perf_event_disable(event);
 	}
 
 	if (event->overflow_handler)
-		event->overflow_handler(event, data, regs);
+		event->overflow_handler(event, nmi, data, regs);
 	else
-		perf_event_output(event, data, regs);
+		perf_event_output(event, nmi, data, regs);
 
 	if (event->fasync && event->pending_kill) {
-		event->pending_wakeup = 1;
-		irq_work_queue(&event->pending);
+		if (nmi) {
+			event->pending_wakeup = 1;
+			irq_work_queue(&event->pending);
+		} else
+			perf_event_wakeup(event);
 	}
 
 	return ret;
 }
 
-int perf_event_overflow(struct perf_event *event,
+int perf_event_overflow(struct perf_event *event, int nmi,
 			  struct perf_sample_data *data,
 			  struct pt_regs *regs)
 {
-	return __perf_event_overflow(event, 1, data, regs);
+	return __perf_event_overflow(event, nmi, 1, data, regs);
 }
 
 /*
@@ -4687,7 +5092,7 @@ static u64 perf_swevent_set_period(struct perf_event *event)
 }
 
 static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
-				    struct perf_sample_data *data,
+				    int nmi, struct perf_sample_data *data,
 				    struct pt_regs *regs)
 {
 	struct hw_perf_event *hwc = &event->hw;
@@ -4701,7 +5106,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
 		return;
 
 	for (; overflow; overflow--) {
-		if (__perf_event_overflow(event, throttle,
+		if (__perf_event_overflow(event, nmi, throttle,
 					    data, regs)) {
 			/*
 			 * We inhibit the overflow from happening when
@@ -4714,7 +5119,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
 }
 
 static void perf_swevent_event(struct perf_event *event, u64 nr,
-			       struct perf_sample_data *data,
+			       int nmi, struct perf_sample_data *data,
 			       struct pt_regs *regs)
 {
 	struct hw_perf_event *hwc = &event->hw;
@@ -4728,12 +5133,12 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
 		return;
 
 	if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
-		return perf_swevent_overflow(event, 1, data, regs);
+		return perf_swevent_overflow(event, 1, nmi, data, regs);
 
 	if (local64_add_negative(nr, &hwc->period_left))
 		return;
 
-	perf_swevent_overflow(event, 0, data, regs);
+	perf_swevent_overflow(event, 0, nmi, data, regs);
 }
 
 static int perf_exclude_event(struct perf_event *event,
@@ -4821,7 +5226,7 @@ find_swevent_head(struct swevent_htable *swhash, struct perf_event *event)
 }
 
 static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
-				    u64 nr,
+				    u64 nr, int nmi,
 				    struct perf_sample_data *data,
 				    struct pt_regs *regs)
 {
@@ -4837,7 +5242,7 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
 
 	hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
 		if (perf_swevent_match(event, type, event_id, data, regs))
-			perf_swevent_event(event, nr, data, regs);
+			perf_swevent_event(event, nr, nmi, data, regs);
 	}
 end:
 	rcu_read_unlock();
@@ -4858,7 +5263,8 @@ inline void perf_swevent_put_recursion_context(int rctx)
 	put_recursion_context(swhash->recursion, rctx);
 }
 
-void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
+void __perf_sw_event(u32 event_id, u64 nr, int nmi,
+			    struct pt_regs *regs, u64 addr)
 {
 	struct perf_sample_data data;
 	int rctx;
@@ -4870,7 +5276,7 @@ void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
 
 	perf_sample_data_init(&data, addr);
 
-	do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs);
+	do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs);
 
 	perf_swevent_put_recursion_context(rctx);
 	preempt_enable_notrace();
@@ -5118,7 +5524,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
 
 	hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
 		if (perf_tp_event_match(event, &data, regs))
-			perf_swevent_event(event, count, &data, regs);
+			perf_swevent_event(event, count, 1, &data, regs);
 	}
 
 	perf_swevent_put_recursion_context(rctx);
@@ -5211,7 +5617,7 @@ void perf_bp_event(struct perf_event *bp, void *data)
 	perf_sample_data_init(&sample, bp->attr.bp_addr);
 
 	if (!bp->hw.state && !perf_exclude_event(bp, regs))
-		perf_swevent_event(bp, 1, &sample, regs);
+		perf_swevent_event(bp, 1, 1, &sample, regs);
 }
 #endif
 
@@ -5240,7 +5646,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
 
 	if (regs && !perf_exclude_event(event, regs)) {
 		if (!(event->attr.exclude_idle && current->pid == 0))
-			if (perf_event_overflow(event, &data, regs))
+			if (perf_event_overflow(event, 0, &data, regs))
 				ret = HRTIMER_NORESTART;
 	}
 
@@ -5580,7 +5986,6 @@ static int pmu_dev_alloc(struct pmu *pmu)
 }
 
 static struct lock_class_key cpuctx_mutex;
-static struct lock_class_key cpuctx_lock;
 
 int perf_pmu_register(struct pmu *pmu, char *name, int type)
 {
@@ -5631,7 +6036,6 @@ int perf_pmu_register(struct pmu *pmu, char *name, int type)
 		cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
 		__perf_event_init_context(&cpuctx->ctx);
 		lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex);
-		lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
 		cpuctx->ctx.type = cpu_context;
 		cpuctx->ctx.pmu = pmu;
 		cpuctx->jiffies_interval = 1;
@@ -5746,8 +6150,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 		 struct task_struct *task,
 		 struct perf_event *group_leader,
 		 struct perf_event *parent_event,
-		 perf_overflow_handler_t overflow_handler,
-		 void *context)
+		 perf_overflow_handler_t overflow_handler)
 {
 	struct pmu *pmu;
 	struct perf_event *event;
@@ -5805,13 +6208,10 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 #endif
 	}
 
-	if (!overflow_handler && parent_event) {
+	if (!overflow_handler && parent_event)
 		overflow_handler = parent_event->overflow_handler;
-		context = parent_event->overflow_handler_context;
-	}
 
 	event->overflow_handler	= overflow_handler;
-	event->overflow_handler_context = context;
 
 	if (attr->disabled)
 		event->state = PERF_EVENT_STATE_OFF;
@@ -5954,7 +6354,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
 static int
 perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
 {
-	struct ring_buffer *rb = NULL, *old_rb = NULL;
+	struct perf_buffer *buffer = NULL, *old_buffer = NULL;
 	int ret = -EINVAL;
 
 	if (!output_event)
@@ -5971,7 +6371,7 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
 		goto out;
 
 	/*
-	 * If its not a per-cpu rb, it must be the same task.
+	 * If its not a per-cpu buffer, it must be the same task.
 	 */
 	if (output_event->cpu == -1 && output_event->ctx != event->ctx)
 		goto out;
@@ -5983,20 +6383,20 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
 		goto unlock;
 
 	if (output_event) {
-		/* get the rb we want to redirect to */
-		rb = ring_buffer_get(output_event);
-		if (!rb)
+		/* get the buffer we want to redirect to */
+		buffer = perf_buffer_get(output_event);
+		if (!buffer)
 			goto unlock;
 	}
 
-	old_rb = event->rb;
-	rcu_assign_pointer(event->rb, rb);
+	old_buffer = event->buffer;
+	rcu_assign_pointer(event->buffer, buffer);
 	ret = 0;
 unlock:
 	mutex_unlock(&event->mmap_mutex);
 
-	if (old_rb)
-		ring_buffer_put(old_rb);
+	if (old_buffer)
+		perf_buffer_put(old_buffer);
 out:
 	return ret;
 }
@@ -6078,8 +6478,7 @@ SYSCALL_DEFINE5(perf_event_open,
 		}
 	}
 
-	event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
-				 NULL, NULL);
+	event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, NULL);
 	if (IS_ERR(event)) {
 		err = PTR_ERR(event);
 		goto err_task;
@@ -6264,8 +6663,7 @@ SYSCALL_DEFINE5(perf_event_open,
 struct perf_event *
 perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 				 struct task_struct *task,
-				 perf_overflow_handler_t overflow_handler,
-				 void *context)
+				 perf_overflow_handler_t overflow_handler)
 {
 	struct perf_event_context *ctx;
 	struct perf_event *event;
@@ -6275,8 +6673,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 	 * Get the target context (task or percpu):
 	 */
 
-	event = perf_event_alloc(attr, cpu, task, NULL, NULL,
-				 overflow_handler, context);
+	event = perf_event_alloc(attr, cpu, task, NULL, NULL, overflow_handler);
 	if (IS_ERR(event)) {
 		err = PTR_ERR(event);
 		goto err;
@@ -6383,6 +6780,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 	 * our context.
 	 */
 	child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]);
+	task_ctx_sched_out(child_ctx, EVENT_ALL);
 
 	/*
 	 * Take the context lock here so that if find_get_context is
@@ -6390,7 +6788,6 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 	 * incremented the context's refcount before we do put_ctx below.
 	 */
 	raw_spin_lock(&child_ctx->lock);
-	task_ctx_sched_out(child_ctx);
 	child->perf_event_ctxp[ctxn] = NULL;
 	/*
 	 * If this context is a clone; unclone it so it can't get
@@ -6560,7 +6957,7 @@ inherit_event(struct perf_event *parent_event,
 					   parent_event->cpu,
 					   child,
 					   group_leader, parent_event,
-				           NULL, NULL);
+					   NULL);
 	if (IS_ERR(child_event))
 		return child_event;
 	get_ctx(child_ctx);
@@ -6587,8 +6984,6 @@ inherit_event(struct perf_event *parent_event,
 
 	child_event->ctx = child_ctx;
 	child_event->overflow_handler = parent_event->overflow_handler;
-	child_event->overflow_handler_context
-		= parent_event->overflow_handler_context;
 
 	/*
 	 * Precalculate sample_data sizes
diff --git a/trunk/kernel/events/hw_breakpoint.c b/trunk/kernel/events/hw_breakpoint.c
index b7971d6f38bf..086adf25a55e 100644
--- a/trunk/kernel/events/hw_breakpoint.c
+++ b/trunk/kernel/events/hw_breakpoint.c
@@ -431,11 +431,9 @@ int register_perf_hw_breakpoint(struct perf_event *bp)
 struct perf_event *
 register_user_hw_breakpoint(struct perf_event_attr *attr,
 			    perf_overflow_handler_t triggered,
-			    void *context,
 			    struct task_struct *tsk)
 {
-	return perf_event_create_kernel_counter(attr, -1, tsk, triggered,
-						context);
+	return perf_event_create_kernel_counter(attr, -1, tsk, triggered);
 }
 EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
 
@@ -504,8 +502,7 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
  */
 struct perf_event * __percpu *
 register_wide_hw_breakpoint(struct perf_event_attr *attr,
-			    perf_overflow_handler_t triggered,
-			    void *context)
+			    perf_overflow_handler_t triggered)
 {
 	struct perf_event * __percpu *cpu_events, **pevent, *bp;
 	long err;
@@ -518,8 +515,7 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
 	get_online_cpus();
 	for_each_online_cpu(cpu) {
 		pevent = per_cpu_ptr(cpu_events, cpu);
-		bp = perf_event_create_kernel_counter(attr, cpu, NULL,
-						      triggered, context);
+		bp = perf_event_create_kernel_counter(attr, cpu, NULL, triggered);
 
 		*pevent = bp;
 
diff --git a/trunk/kernel/events/internal.h b/trunk/kernel/events/internal.h
deleted file mode 100644
index 09097dd8116c..000000000000
--- a/trunk/kernel/events/internal.h
+++ /dev/null
@@ -1,96 +0,0 @@
-#ifndef _KERNEL_EVENTS_INTERNAL_H
-#define _KERNEL_EVENTS_INTERNAL_H
-
-#define RING_BUFFER_WRITABLE		0x01
-
-struct ring_buffer {
-	atomic_t			refcount;
-	struct rcu_head			rcu_head;
-#ifdef CONFIG_PERF_USE_VMALLOC
-	struct work_struct		work;
-	int				page_order;	/* allocation order  */
-#endif
-	int				nr_pages;	/* nr of data pages  */
-	int				writable;	/* are we writable   */
-
-	atomic_t			poll;		/* POLL_ for wakeups */
-
-	local_t				head;		/* write position    */
-	local_t				nest;		/* nested writers    */
-	local_t				events;		/* event limit       */
-	local_t				wakeup;		/* wakeup stamp      */
-	local_t				lost;		/* nr records lost   */
-
-	long				watermark;	/* wakeup watermark  */
-
-	struct perf_event_mmap_page	*user_page;
-	void				*data_pages[0];
-};
-
-extern void rb_free(struct ring_buffer *rb);
-extern struct ring_buffer *
-rb_alloc(int nr_pages, long watermark, int cpu, int flags);
-extern void perf_event_wakeup(struct perf_event *event);
-
-extern void
-perf_event_header__init_id(struct perf_event_header *header,
-			   struct perf_sample_data *data,
-			   struct perf_event *event);
-extern void
-perf_event__output_id_sample(struct perf_event *event,
-			     struct perf_output_handle *handle,
-			     struct perf_sample_data *sample);
-
-extern struct page *
-perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff);
-
-#ifdef CONFIG_PERF_USE_VMALLOC
-/*
- * Back perf_mmap() with vmalloc memory.
- *
- * Required for architectures that have d-cache aliasing issues.
- */
-
-static inline int page_order(struct ring_buffer *rb)
-{
-	return rb->page_order;
-}
-
-#else
-
-static inline int page_order(struct ring_buffer *rb)
-{
-	return 0;
-}
-#endif
-
-static unsigned long perf_data_size(struct ring_buffer *rb)
-{
-	return rb->nr_pages << (PAGE_SHIFT + page_order(rb));
-}
-
-static inline void
-__output_copy(struct perf_output_handle *handle,
-		   const void *buf, unsigned int len)
-{
-	do {
-		unsigned long size = min_t(unsigned long, handle->size, len);
-
-		memcpy(handle->addr, buf, size);
-
-		len -= size;
-		handle->addr += size;
-		buf += size;
-		handle->size -= size;
-		if (!handle->size) {
-			struct ring_buffer *rb = handle->rb;
-
-			handle->page++;
-			handle->page &= rb->nr_pages - 1;
-			handle->addr = rb->data_pages[handle->page];
-			handle->size = PAGE_SIZE << page_order(rb);
-		}
-	} while (len);
-}
-
-#endif /* _KERNEL_EVENTS_INTERNAL_H */
diff --git a/trunk/kernel/events/ring_buffer.c b/trunk/kernel/events/ring_buffer.c
deleted file mode 100644
index a2a29205cc0f..000000000000
--- a/trunk/kernel/events/ring_buffer.c
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * Performance events ring-buffer code:
- *
- *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
- *  Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
- *  Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
- *  Copyright  �  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
- *
- * For licensing details see kernel-base/COPYING
- */
-
-#include <linux/perf_event.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-
-#include "internal.h"
-
-static bool perf_output_space(struct ring_buffer *rb, unsigned long tail,
-			      unsigned long offset, unsigned long head)
-{
-	unsigned long mask;
-
-	if (!rb->writable)
-		return true;
-
-	mask = perf_data_size(rb) - 1;
-
-	offset = (offset - tail) & mask;
-	head   = (head   - tail) & mask;
-
-	if ((int)(head - offset) < 0)
-		return false;
-
-	return true;
-}
-
-static void perf_output_wakeup(struct perf_output_handle *handle)
-{
-	atomic_set(&handle->rb->poll, POLL_IN);
-
-	handle->event->pending_wakeup = 1;
-	irq_work_queue(&handle->event->pending);
-}
-
-/*
- * We need to ensure a later event_id doesn't publish a head when a former
- * event isn't done writing. However since we need to deal with NMIs we
- * cannot fully serialize things.
- *
- * We only publish the head (and generate a wakeup) when the outer-most
- * event completes.
- */
-static void perf_output_get_handle(struct perf_output_handle *handle)
-{
-	struct ring_buffer *rb = handle->rb;
-
-	preempt_disable();
-	local_inc(&rb->nest);
-	handle->wakeup = local_read(&rb->wakeup);
-}
-
-static void perf_output_put_handle(struct perf_output_handle *handle)
-{
-	struct ring_buffer *rb = handle->rb;
-	unsigned long head;
-
-again:
-	head = local_read(&rb->head);
-
-	/*
-	 * IRQ/NMI can happen here, which means we can miss a head update.
-	 */
-
-	if (!local_dec_and_test(&rb->nest))
-		goto out;
-
-	/*
-	 * Publish the known good head. Rely on the full barrier implied
-	 * by atomic_dec_and_test() order the rb->head read and this
-	 * write.
-	 */
-	rb->user_page->data_head = head;
-
-	/*
-	 * Now check if we missed an update, rely on the (compiler)
-	 * barrier in atomic_dec_and_test() to re-read rb->head.
-	 */
-	if (unlikely(head != local_read(&rb->head))) {
-		local_inc(&rb->nest);
-		goto again;
-	}
-
-	if (handle->wakeup != local_read(&rb->wakeup))
-		perf_output_wakeup(handle);
-
-out:
-	preempt_enable();
-}
-
-int perf_output_begin(struct perf_output_handle *handle,
-		      struct perf_event *event, unsigned int size)
-{
-	struct ring_buffer *rb;
-	unsigned long tail, offset, head;
-	int have_lost;
-	struct perf_sample_data sample_data;
-	struct {
-		struct perf_event_header header;
-		u64			 id;
-		u64			 lost;
-	} lost_event;
-
-	rcu_read_lock();
-	/*
-	 * For inherited events we send all the output towards the parent.
-	 */
-	if (event->parent)
-		event = event->parent;
-
-	rb = rcu_dereference(event->rb);
-	if (!rb)
-		goto out;
-
-	handle->rb	= rb;
-	handle->event	= event;
-
-	if (!rb->nr_pages)
-		goto out;
-
-	have_lost = local_read(&rb->lost);
-	if (have_lost) {
-		lost_event.header.size = sizeof(lost_event);
-		perf_event_header__init_id(&lost_event.header, &sample_data,
-					   event);
-		size += lost_event.header.size;
-	}
-
-	perf_output_get_handle(handle);
-
-	do {
-		/*
-		 * Userspace could choose to issue a mb() before updating the
-		 * tail pointer. So that all reads will be completed before the
-		 * write is issued.
-		 */
-		tail = ACCESS_ONCE(rb->user_page->data_tail);
-		smp_rmb();
-		offset = head = local_read(&rb->head);
-		head += size;
-		if (unlikely(!perf_output_space(rb, tail, offset, head)))
-			goto fail;
-	} while (local_cmpxchg(&rb->head, offset, head) != offset);
-
-	if (head - local_read(&rb->wakeup) > rb->watermark)
-		local_add(rb->watermark, &rb->wakeup);
-
-	handle->page = offset >> (PAGE_SHIFT + page_order(rb));
-	handle->page &= rb->nr_pages - 1;
-	handle->size = offset & ((PAGE_SIZE << page_order(rb)) - 1);
-	handle->addr = rb->data_pages[handle->page];
-	handle->addr += handle->size;
-	handle->size = (PAGE_SIZE << page_order(rb)) - handle->size;
-
-	if (have_lost) {
-		lost_event.header.type = PERF_RECORD_LOST;
-		lost_event.header.misc = 0;
-		lost_event.id          = event->id;
-		lost_event.lost        = local_xchg(&rb->lost, 0);
-
-		perf_output_put(handle, lost_event);
-		perf_event__output_id_sample(event, handle, &sample_data);
-	}
-
-	return 0;
-
-fail:
-	local_inc(&rb->lost);
-	perf_output_put_handle(handle);
-out:
-	rcu_read_unlock();
-
-	return -ENOSPC;
-}
-
-void perf_output_copy(struct perf_output_handle *handle,
-		      const void *buf, unsigned int len)
-{
-	__output_copy(handle, buf, len);
-}
-
-void perf_output_end(struct perf_output_handle *handle)
-{
-	perf_output_put_handle(handle);
-	rcu_read_unlock();
-}
-
-static void
-ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
-{
-	long max_size = perf_data_size(rb);
-
-	if (watermark)
-		rb->watermark = min(max_size, watermark);
-
-	if (!rb->watermark)
-		rb->watermark = max_size / 2;
-
-	if (flags & RING_BUFFER_WRITABLE)
-		rb->writable = 1;
-
-	atomic_set(&rb->refcount, 1);
-}
-
-#ifndef CONFIG_PERF_USE_VMALLOC
-
-/*
- * Back perf_mmap() with regular GFP_KERNEL-0 pages.
- */
-
-struct page *
-perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
-{
-	if (pgoff > rb->nr_pages)
-		return NULL;
-
-	if (pgoff == 0)
-		return virt_to_page(rb->user_page);
-
-	return virt_to_page(rb->data_pages[pgoff - 1]);
-}
-
-static void *perf_mmap_alloc_page(int cpu)
-{
-	struct page *page;
-	int node;
-
-	node = (cpu == -1) ? cpu : cpu_to_node(cpu);
-	page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
-	if (!page)
-		return NULL;
-
-	return page_address(page);
-}
-
-struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
-{
-	struct ring_buffer *rb;
-	unsigned long size;
-	int i;
-
-	size = sizeof(struct ring_buffer);
-	size += nr_pages * sizeof(void *);
-
-	rb = kzalloc(size, GFP_KERNEL);
-	if (!rb)
-		goto fail;
-
-	rb->user_page = perf_mmap_alloc_page(cpu);
-	if (!rb->user_page)
-		goto fail_user_page;
-
-	for (i = 0; i < nr_pages; i++) {
-		rb->data_pages[i] = perf_mmap_alloc_page(cpu);
-		if (!rb->data_pages[i])
-			goto fail_data_pages;
-	}
-
-	rb->nr_pages = nr_pages;
-
-	ring_buffer_init(rb, watermark, flags);
-
-	return rb;
-
-fail_data_pages:
-	for (i--; i >= 0; i--)
-		free_page((unsigned long)rb->data_pages[i]);
-
-	free_page((unsigned long)rb->user_page);
-
-fail_user_page:
-	kfree(rb);
-
-fail:
-	return NULL;
-}
-
-static void perf_mmap_free_page(unsigned long addr)
-{
-	struct page *page = virt_to_page((void *)addr);
-
-	page->mapping = NULL;
-	__free_page(page);
-}
-
-void rb_free(struct ring_buffer *rb)
-{
-	int i;
-
-	perf_mmap_free_page((unsigned long)rb->user_page);
-	for (i = 0; i < rb->nr_pages; i++)
-		perf_mmap_free_page((unsigned long)rb->data_pages[i]);
-	kfree(rb);
-}
-
-#else
-
-struct page *
-perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
-{
-	if (pgoff > (1UL << page_order(rb)))
-		return NULL;
-
-	return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE);
-}
-
-static void perf_mmap_unmark_page(void *addr)
-{
-	struct page *page = vmalloc_to_page(addr);
-
-	page->mapping = NULL;
-}
-
-static void rb_free_work(struct work_struct *work)
-{
-	struct ring_buffer *rb;
-	void *base;
-	int i, nr;
-
-	rb = container_of(work, struct ring_buffer, work);
-	nr = 1 << page_order(rb);
-
-	base = rb->user_page;
-	for (i = 0; i < nr + 1; i++)
-		perf_mmap_unmark_page(base + (i * PAGE_SIZE));
-
-	vfree(base);
-	kfree(rb);
-}
-
-void rb_free(struct ring_buffer *rb)
-{
-	schedule_work(&rb->work);
-}
-
-struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
-{
-	struct ring_buffer *rb;
-	unsigned long size;
-	void *all_buf;
-
-	size = sizeof(struct ring_buffer);
-	size += sizeof(void *);
-
-	rb = kzalloc(size, GFP_KERNEL);
-	if (!rb)
-		goto fail;
-
-	INIT_WORK(&rb->work, rb_free_work);
-
-	all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE);
-	if (!all_buf)
-		goto fail_all_buf;
-
-	rb->user_page = all_buf;
-	rb->data_pages[0] = all_buf + PAGE_SIZE;
-	rb->page_order = ilog2(nr_pages);
-	rb->nr_pages = 1;
-
-	ring_buffer_init(rb, watermark, flags);
-
-	return rb;
-
-fail_all_buf:
-	kfree(rb);
-
-fail:
-	return NULL;
-}
-
-#endif
diff --git a/trunk/kernel/jump_label.c b/trunk/kernel/jump_label.c
index fa27e750dbc0..a8ce45097f3d 100644
--- a/trunk/kernel/jump_label.c
+++ b/trunk/kernel/jump_label.c
@@ -375,15 +375,19 @@ int jump_label_text_reserved(void *start, void *end)
 
 static void jump_label_update(struct jump_label_key *key, int enable)
 {
-	struct jump_entry *entry = key->entries;
-
-	/* if there are no users, entry can be NULL */
-	if (entry)
-		__jump_label_update(key, entry, __stop___jump_table, enable);
+	struct jump_entry *entry = key->entries, *stop = __stop___jump_table;
 
 #ifdef CONFIG_MODULES
+	struct module *mod = __module_address((jump_label_t)key);
+
 	__jump_label_mod_update(key, enable);
+
+	if (mod)
+		stop = mod->jump_entries + mod->num_jump_entries;
 #endif
+	/* if there are no users, entry can be NULL */
+	if (entry)
+		__jump_label_update(key, entry, stop, enable);
 }
 
 #endif
diff --git a/trunk/kernel/power/user.c b/trunk/kernel/power/user.c
index 42ddbc6f0de6..7d02d33be699 100644
--- a/trunk/kernel/power/user.c
+++ b/trunk/kernel/power/user.c
@@ -113,10 +113,8 @@ static int snapshot_open(struct inode *inode, struct file *filp)
 		if (error)
 			pm_notifier_call_chain(PM_POST_RESTORE);
 	}
-	if (error) {
-		free_basic_memory_bitmaps();
+	if (error)
 		atomic_inc(&snapshot_device_available);
-	}
 	data->frozen = 0;
 	data->ready = 0;
 	data->platform_support = 0;
diff --git a/trunk/kernel/sched.c b/trunk/kernel/sched.c
index d08d110b8976..3f2e502d609b 100644
--- a/trunk/kernel/sched.c
+++ b/trunk/kernel/sched.c
@@ -2220,7 +2220,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 
 	if (task_cpu(p) != new_cpu) {
 		p->se.nr_migrations++;
-		perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0);
+		perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0);
 	}
 
 	__set_task_cpu(p, new_cpu);
diff --git a/trunk/kernel/stacktrace.c b/trunk/kernel/stacktrace.c
index d20c6983aad9..eb212f8f8bc8 100644
--- a/trunk/kernel/stacktrace.c
+++ b/trunk/kernel/stacktrace.c
@@ -26,18 +26,12 @@ void print_stack_trace(struct stack_trace *trace, int spaces)
 EXPORT_SYMBOL_GPL(print_stack_trace);
 
 /*
- * Architectures that do not implement save_stack_trace_tsk or
- * save_stack_trace_regs get this weak alias and a once-per-bootup warning
- * (whenever this facility is utilized - for example by procfs):
+ * Architectures that do not implement save_stack_trace_tsk get this
+ * weak alias and a once-per-bootup warning (whenever this facility
+ * is utilized - for example by procfs):
  */
 __weak void
 save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
 	WARN_ONCE(1, KERN_INFO "save_stack_trace_tsk() not implemented yet.\n");
 }
-
-__weak void
-save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
-{
-	WARN_ONCE(1, KERN_INFO "save_stack_trace_regs() not implemented yet.\n");
-}
diff --git a/trunk/kernel/taskstats.c b/trunk/kernel/taskstats.c
index fc0f22005417..9ffea360a778 100644
--- a/trunk/kernel/taskstats.c
+++ b/trunk/kernel/taskstats.c
@@ -285,18 +285,16 @@ static void fill_tgid_exit(struct task_struct *tsk)
 static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd)
 {
 	struct listener_list *listeners;
-	struct listener *s, *tmp, *s2;
+	struct listener *s, *tmp;
 	unsigned int cpu;
 
 	if (!cpumask_subset(mask, cpu_possible_mask))
 		return -EINVAL;
 
-	s = NULL;
 	if (isadd == REGISTER) {
 		for_each_cpu(cpu, mask) {
-			if (!s)
-				s = kmalloc_node(sizeof(struct listener),
-						 GFP_KERNEL, cpu_to_node(cpu));
+			s = kmalloc_node(sizeof(struct listener), GFP_KERNEL,
+					 cpu_to_node(cpu));
 			if (!s)
 				goto cleanup;
 			s->pid = pid;
@@ -305,16 +303,9 @@ static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd)
 
 			listeners = &per_cpu(listener_array, cpu);
 			down_write(&listeners->sem);
-			list_for_each_entry_safe(s2, tmp, &listeners->list, list) {
-				if (s2->pid == pid)
-					goto next_cpu;
-			}
 			list_add(&s->list, &listeners->list);
-			s = NULL;
-next_cpu:
 			up_write(&listeners->sem);
 		}
-		kfree(s);
 		return 0;
 	}
 
diff --git a/trunk/kernel/time/alarmtimer.c b/trunk/kernel/time/alarmtimer.c
index 59f369f98a04..2d966244ea60 100644
--- a/trunk/kernel/time/alarmtimer.c
+++ b/trunk/kernel/time/alarmtimer.c
@@ -42,76 +42,16 @@ static struct alarm_base {
 	clockid_t		base_clockid;
 } alarm_bases[ALARM_NUMTYPE];
 
-/* freezer delta & lock used to handle clock_nanosleep triggered wakeups */
-static ktime_t freezer_delta;
-static DEFINE_SPINLOCK(freezer_delta_lock);
-
 #ifdef CONFIG_RTC_CLASS
 /* rtc timer and device for setting alarm wakeups at suspend */
 static struct rtc_timer		rtctimer;
 static struct rtc_device	*rtcdev;
-static DEFINE_SPINLOCK(rtcdev_lock);
-
-/**
- * has_wakealarm - check rtc device has wakealarm ability
- * @dev: current device
- * @name_ptr: name to be returned
- *
- * This helper function checks to see if the rtc device can wake
- * from suspend.
- */
-static int has_wakealarm(struct device *dev, void *name_ptr)
-{
-	struct rtc_device *candidate = to_rtc_device(dev);
-
-	if (!candidate->ops->set_alarm)
-		return 0;
-	if (!device_may_wakeup(candidate->dev.parent))
-		return 0;
-
-	*(const char **)name_ptr = dev_name(dev);
-	return 1;
-}
-
-/**
- * alarmtimer_get_rtcdev - Return selected rtcdevice
- *
- * This function returns the rtc device to use for wakealarms.
- * If one has not already been chosen, it checks to see if a
- * functional rtc device is available.
- */
-static struct rtc_device *alarmtimer_get_rtcdev(void)
-{
-	struct device *dev;
-	char *str;
-	unsigned long flags;
-	struct rtc_device *ret;
-
-	spin_lock_irqsave(&rtcdev_lock, flags);
-	if (!rtcdev) {
-		/* Find an rtc device and init the rtc_timer */
-		dev = class_find_device(rtc_class, NULL, &str, has_wakealarm);
-		/* If we have a device then str is valid. See has_wakealarm() */
-		if (dev) {
-			rtcdev = rtc_class_open(str);
-			/*
-			 * Drop the reference we got in class_find_device,
-			 * rtc_open takes its own.
-			 */
-			put_device(dev);
-			rtc_timer_init(&rtctimer, NULL, NULL);
-		}
-	}
-	ret = rtcdev;
-	spin_unlock_irqrestore(&rtcdev_lock, flags);
-
-	return ret;
-}
-#else
-#define alarmtimer_get_rtcdev() (0)
-#define rtcdev (0)
 #endif
 
+/* freezer delta & lock used to handle clock_nanosleep triggered wakeups */
+static ktime_t freezer_delta;
+static DEFINE_SPINLOCK(freezer_delta_lock);
+
 
 /**
  * alarmtimer_enqueue - Adds an alarm timer to an alarm_base timerqueue
@@ -226,7 +166,6 @@ static int alarmtimer_suspend(struct device *dev)
 	struct rtc_time tm;
 	ktime_t min, now;
 	unsigned long flags;
-	struct rtc_device *rtc;
 	int i;
 
 	spin_lock_irqsave(&freezer_delta_lock, flags);
@@ -234,9 +173,8 @@ static int alarmtimer_suspend(struct device *dev)
 	freezer_delta = ktime_set(0, 0);
 	spin_unlock_irqrestore(&freezer_delta_lock, flags);
 
-	rtc = rtcdev;
 	/* If we have no rtcdev, just return */
-	if (!rtc)
+	if (!rtcdev)
 		return 0;
 
 	/* Find the soonest timer to expire*/
@@ -261,12 +199,12 @@ static int alarmtimer_suspend(struct device *dev)
 	WARN_ON(min.tv64 < NSEC_PER_SEC);
 
 	/* Setup an rtc timer to fire that far in the future */
-	rtc_timer_cancel(rtc, &rtctimer);
-	rtc_read_time(rtc, &tm);
+	rtc_timer_cancel(rtcdev, &rtctimer);
+	rtc_read_time(rtcdev, &tm);
 	now = rtc_tm_to_ktime(tm);
 	now = ktime_add(now, min);
 
-	rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0));
+	rtc_timer_start(rtcdev, &rtctimer, now, ktime_set(0, 0));
 
 	return 0;
 }
@@ -384,9 +322,6 @@ static int alarm_clock_getres(const clockid_t which_clock, struct timespec *tp)
 {
 	clockid_t baseid = alarm_bases[clock2alarm(which_clock)].base_clockid;
 
-	if (!alarmtimer_get_rtcdev())
-		return -ENOTSUPP;
-
 	return hrtimer_get_res(baseid, tp);
 }
 
@@ -401,9 +336,6 @@ static int alarm_clock_get(clockid_t which_clock, struct timespec *tp)
 {
 	struct alarm_base *base = &alarm_bases[clock2alarm(which_clock)];
 
-	if (!alarmtimer_get_rtcdev())
-		return -ENOTSUPP;
-
 	*tp = ktime_to_timespec(base->gettime());
 	return 0;
 }
@@ -419,9 +351,6 @@ static int alarm_timer_create(struct k_itimer *new_timer)
 	enum  alarmtimer_type type;
 	struct alarm_base *base;
 
-	if (!alarmtimer_get_rtcdev())
-		return -ENOTSUPP;
-
 	if (!capable(CAP_WAKE_ALARM))
 		return -EPERM;
 
@@ -456,9 +385,6 @@ static void alarm_timer_get(struct k_itimer *timr,
  */
 static int alarm_timer_del(struct k_itimer *timr)
 {
-	if (!rtcdev)
-		return -ENOTSUPP;
-
 	alarm_cancel(&timr->it.alarmtimer);
 	return 0;
 }
@@ -476,9 +402,6 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
 				struct itimerspec *new_setting,
 				struct itimerspec *old_setting)
 {
-	if (!rtcdev)
-		return -ENOTSUPP;
-
 	/* Save old values */
 	old_setting->it_interval =
 			ktime_to_timespec(timr->it.alarmtimer.period);
@@ -618,9 +541,6 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
 	int ret = 0;
 	struct restart_block *restart;
 
-	if (!alarmtimer_get_rtcdev())
-		return -ENOTSUPP;
-
 	if (!capable(CAP_WAKE_ALARM))
 		return -EPERM;
 
@@ -718,3 +638,65 @@ static int __init alarmtimer_init(void)
 }
 device_initcall(alarmtimer_init);
 
+#ifdef CONFIG_RTC_CLASS
+/**
+ * has_wakealarm - check rtc device has wakealarm ability
+ * @dev: current device
+ * @name_ptr: name to be returned
+ *
+ * This helper function checks to see if the rtc device can wake
+ * from suspend.
+ */
+static int __init has_wakealarm(struct device *dev, void *name_ptr)
+{
+	struct rtc_device *candidate = to_rtc_device(dev);
+
+	if (!candidate->ops->set_alarm)
+		return 0;
+	if (!device_may_wakeup(candidate->dev.parent))
+		return 0;
+
+	*(const char **)name_ptr = dev_name(dev);
+	return 1;
+}
+
+/**
+ * alarmtimer_init_late - Late initializing of alarmtimer code
+ *
+ * This function locates a rtc device to use for wakealarms.
+ * Run as late_initcall to make sure rtc devices have been
+ * registered.
+ */
+static int __init alarmtimer_init_late(void)
+{
+	struct device *dev;
+	char *str;
+
+	/* Find an rtc device and init the rtc_timer */
+	dev = class_find_device(rtc_class, NULL, &str, has_wakealarm);
+	/* If we have a device then str is valid. See has_wakealarm() */
+	if (dev) {
+		rtcdev = rtc_class_open(str);
+		/*
+		 * Drop the reference we got in class_find_device,
+		 * rtc_open takes its own.
+		 */
+		put_device(dev);
+	}
+	if (!rtcdev) {
+		printk(KERN_WARNING "No RTC device found, ALARM timers will"
+			" not wake from suspend");
+	}
+	rtc_timer_init(&rtctimer, NULL, NULL);
+
+	return 0;
+}
+#else
+static int __init alarmtimer_init_late(void)
+{
+	printk(KERN_WARNING "Kernel not built with RTC support, ALARM timers"
+		" will not wake from suspend");
+	return 0;
+}
+#endif
+late_initcall(alarmtimer_init_late);
diff --git a/trunk/kernel/trace/ftrace.c b/trunk/kernel/trace/ftrace.c
index c997f7371c65..908038f57440 100644
--- a/trunk/kernel/trace/ftrace.c
+++ b/trunk/kernel/trace/ftrace.c
@@ -32,6 +32,7 @@
 
 #include <trace/events/sched.h>
 
+#include <asm/ftrace.h>
 #include <asm/setup.h>
 
 #include "trace_output.h"
@@ -81,7 +82,8 @@ static int ftrace_disabled __read_mostly;
 
 static DEFINE_MUTEX(ftrace_lock);
 
-static struct ftrace_ops ftrace_list_end __read_mostly = {
+static struct ftrace_ops ftrace_list_end __read_mostly =
+{
 	.func		= ftrace_stub,
 };
 
@@ -783,7 +785,8 @@ static void unregister_ftrace_profiler(void)
 	unregister_ftrace_graph();
 }
 #else
-static struct ftrace_ops ftrace_profile_ops __read_mostly = {
+static struct ftrace_ops ftrace_profile_ops __read_mostly =
+{
 	.func		= function_profile_call,
 };
 
@@ -803,10 +806,19 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
 		     size_t cnt, loff_t *ppos)
 {
 	unsigned long val;
+	char buf[64];		/* big enough to hold a number */
 	int ret;
 
-	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
-	if (ret)
+	if (cnt >= sizeof(buf))
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	buf[cnt] = 0;
+
+	ret = strict_strtoul(buf, 10, &val);
+	if (ret < 0)
 		return ret;
 
 	val = !!val;
diff --git a/trunk/kernel/trace/ring_buffer.c b/trunk/kernel/trace/ring_buffer.c
index 731201bf4acc..b0c7aa407943 100644
--- a/trunk/kernel/trace/ring_buffer.c
+++ b/trunk/kernel/trace/ring_buffer.c
@@ -997,21 +997,15 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
 			     unsigned nr_pages)
 {
 	struct buffer_page *bpage, *tmp;
+	unsigned long addr;
 	LIST_HEAD(pages);
 	unsigned i;
 
 	WARN_ON(!nr_pages);
 
 	for (i = 0; i < nr_pages; i++) {
-		struct page *page;
-		/*
-		 * __GFP_NORETRY flag makes sure that the allocation fails
-		 * gracefully without invoking oom-killer and the system is
-		 * not destabilized.
-		 */
 		bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
-				    GFP_KERNEL | __GFP_NORETRY,
-				    cpu_to_node(cpu_buffer->cpu));
+				    GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
 		if (!bpage)
 			goto free_pages;
 
@@ -1019,11 +1013,10 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
 
 		list_add(&bpage->list, &pages);
 
-		page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu),
-					GFP_KERNEL | __GFP_NORETRY, 0);
-		if (!page)
+		addr = __get_free_page(GFP_KERNEL);
+		if (!addr)
 			goto free_pages;
-		bpage->page = page_address(page);
+		bpage->page = (void *)addr;
 		rb_init_page(bpage->page);
 	}
 
@@ -1052,7 +1045,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct buffer_page *bpage;
-	struct page *page;
+	unsigned long addr;
 	int ret;
 
 	cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
@@ -1074,10 +1067,10 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
 	rb_check_bpage(cpu_buffer, bpage);
 
 	cpu_buffer->reader_page = bpage;
-	page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0);
-	if (!page)
+	addr = __get_free_page(GFP_KERNEL);
+	if (!addr)
 		goto fail_free_reader;
-	bpage->page = page_address(page);
+	bpage->page = (void *)addr;
 	rb_init_page(bpage->page);
 
 	INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
@@ -1321,6 +1314,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
 	unsigned nr_pages, rm_pages, new_pages;
 	struct buffer_page *bpage, *tmp;
 	unsigned long buffer_size;
+	unsigned long addr;
 	LIST_HEAD(pages);
 	int i, cpu;
 
@@ -1381,24 +1375,16 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
 
 	for_each_buffer_cpu(buffer, cpu) {
 		for (i = 0; i < new_pages; i++) {
-			struct page *page;
-			/*
-			 * __GFP_NORETRY flag makes sure that the allocation
-			 * fails gracefully without invoking oom-killer and
-			 * the system is not destabilized.
-			 */
 			bpage = kzalloc_node(ALIGN(sizeof(*bpage),
 						  cache_line_size()),
-					    GFP_KERNEL | __GFP_NORETRY,
-					    cpu_to_node(cpu));
+					    GFP_KERNEL, cpu_to_node(cpu));
 			if (!bpage)
 				goto free_pages;
 			list_add(&bpage->list, &pages);
-			page = alloc_pages_node(cpu_to_node(cpu),
-						GFP_KERNEL | __GFP_NORETRY, 0);
-			if (!page)
+			addr = __get_free_page(GFP_KERNEL);
+			if (!addr)
 				goto free_pages;
-			bpage->page = page_address(page);
+			bpage->page = (void *)addr;
 			rb_init_page(bpage->page);
 		}
 	}
@@ -3744,17 +3730,16 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
  * Returns:
  *  The page allocated, or NULL on error.
  */
-void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu)
+void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
 {
 	struct buffer_data_page *bpage;
-	struct page *page;
+	unsigned long addr;
 
-	page = alloc_pages_node(cpu_to_node(cpu),
-				GFP_KERNEL | __GFP_NORETRY, 0);
-	if (!page)
+	addr = __get_free_page(GFP_KERNEL);
+	if (!addr)
 		return NULL;
 
-	bpage = page_address(page);
+	bpage = (void *)addr;
 
 	rb_init_page(bpage);
 
@@ -3993,11 +3978,20 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
 		size_t cnt, loff_t *ppos)
 {
 	unsigned long *p = filp->private_data;
+	char buf[64];
 	unsigned long val;
 	int ret;
 
-	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
-	if (ret)
+	if (cnt >= sizeof(buf))
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	buf[cnt] = 0;
+
+	ret = strict_strtoul(buf, 10, &val);
+	if (ret < 0)
 		return ret;
 
 	if (val)
diff --git a/trunk/kernel/trace/ring_buffer_benchmark.c b/trunk/kernel/trace/ring_buffer_benchmark.c
index a5457d577b98..302f8a614635 100644
--- a/trunk/kernel/trace/ring_buffer_benchmark.c
+++ b/trunk/kernel/trace/ring_buffer_benchmark.c
@@ -106,7 +106,7 @@ static enum event_status read_page(int cpu)
 	int inc;
 	int i;
 
-	bpage = ring_buffer_alloc_read_page(buffer, cpu);
+	bpage = ring_buffer_alloc_read_page(buffer);
 	if (!bpage)
 		return EVENT_DROPPED;
 
diff --git a/trunk/kernel/trace/trace.c b/trunk/kernel/trace/trace.c
index d9c16123f6e2..ee9c921d7f21 100644
--- a/trunk/kernel/trace/trace.c
+++ b/trunk/kernel/trace/trace.c
@@ -343,27 +343,26 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
 static int trace_stop_count;
 static DEFINE_SPINLOCK(tracing_start_lock);
 
-static void wakeup_work_handler(struct work_struct *work)
-{
-	wake_up(&trace_wait);
-}
-
-static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler);
-
 /**
  * trace_wake_up - wake up tasks waiting for trace input
  *
- * Schedules a delayed work to wake up any task that is blocked on the
- * trace_wait queue. These is used with trace_poll for tasks polling the
- * trace.
+ * Simply wakes up any task that is blocked on the trace_wait
+ * queue. These is used with trace_poll for tasks polling the trace.
  */
 void trace_wake_up(void)
 {
-	const unsigned long delay = msecs_to_jiffies(2);
+	int cpu;
 
 	if (trace_flags & TRACE_ITER_BLOCK)
 		return;
-	schedule_delayed_work(&wakeup_work, delay);
+	/*
+	 * The runqueue_is_locked() can fail, but this is the best we
+	 * have for now:
+	 */
+	cpu = get_cpu();
+	if (!runqueue_is_locked(cpu))
+		wake_up(&trace_wait);
+	put_cpu();
 }
 
 static int __init set_buf_size(char *str)
@@ -425,7 +424,6 @@ static const char *trace_options[] = {
 	"graph-time",
 	"record-cmd",
 	"overwrite",
-	"disable_on_free",
 	NULL
 };
 
@@ -1193,18 +1191,6 @@ void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
 }
 EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
 
-void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer,
-					    struct ring_buffer_event *event,
-					    unsigned long flags, int pc,
-					    struct pt_regs *regs)
-{
-	ring_buffer_unlock_commit(buffer, event);
-
-	ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
-	ftrace_trace_userstack(buffer, flags, pc);
-}
-EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit_regs);
-
 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
 					 struct ring_buffer_event *event)
 {
@@ -1250,7 +1236,7 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
 #ifdef CONFIG_STACKTRACE
 static void __ftrace_trace_stack(struct ring_buffer *buffer,
 				 unsigned long flags,
-				 int skip, int pc, struct pt_regs *regs)
+				 int skip, int pc)
 {
 	struct ftrace_event_call *call = &event_kernel_stack;
 	struct ring_buffer_event *event;
@@ -1269,36 +1255,24 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
 	trace.skip		= skip;
 	trace.entries		= entry->caller;
 
-	if (regs)
-		save_stack_trace_regs(regs, &trace);
-	else
-		save_stack_trace(&trace);
+	save_stack_trace(&trace);
 	if (!filter_check_discard(call, entry, buffer, event))
 		ring_buffer_unlock_commit(buffer, event);
 }
 
-void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
-			     int skip, int pc, struct pt_regs *regs)
-{
-	if (!(trace_flags & TRACE_ITER_STACKTRACE))
-		return;
-
-	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
-}
-
 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
 			int skip, int pc)
 {
 	if (!(trace_flags & TRACE_ITER_STACKTRACE))
 		return;
 
-	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
+	__ftrace_trace_stack(buffer, flags, skip, pc);
 }
 
 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
 		   int pc)
 {
-	__ftrace_trace_stack(tr->buffer, flags, skip, pc, NULL);
+	__ftrace_trace_stack(tr->buffer, flags, skip, pc);
 }
 
 /**
@@ -1314,7 +1288,7 @@ void trace_dump_stack(void)
 	local_save_flags(flags);
 
 	/* skipping 3 traces, seems to get us at the caller of this function */
-	__ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count(), NULL);
+	__ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
 }
 
 static DEFINE_PER_CPU(int, user_stack_count);
@@ -2077,9 +2051,6 @@ void trace_default_header(struct seq_file *m)
 {
 	struct trace_iterator *iter = m->private;
 
-	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
-		return;
-
 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
 		/* print nothing if the buffers are empty */
 		if (trace_empty(iter))
@@ -2730,11 +2701,20 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
 		   size_t cnt, loff_t *ppos)
 {
 	struct trace_array *tr = filp->private_data;
+	char buf[64];
 	unsigned long val;
 	int ret;
 
-	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
-	if (ret)
+	if (cnt >= sizeof(buf))
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	buf[cnt] = 0;
+
+	ret = strict_strtoul(buf, 10, &val);
+	if (ret < 0)
 		return ret;
 
 	val = !!val;
@@ -2787,7 +2767,7 @@ int tracer_init(struct tracer *t, struct trace_array *tr)
 	return t->init(tr);
 }
 
-static int __tracing_resize_ring_buffer(unsigned long size)
+static int tracing_resize_ring_buffer(unsigned long size)
 {
 	int ret;
 
@@ -2839,41 +2819,6 @@ static int __tracing_resize_ring_buffer(unsigned long size)
 	return ret;
 }
 
-static ssize_t tracing_resize_ring_buffer(unsigned long size)
-{
-	int cpu, ret = size;
-
-	mutex_lock(&trace_types_lock);
-
-	tracing_stop();
-
-	/* disable all cpu buffers */
-	for_each_tracing_cpu(cpu) {
-		if (global_trace.data[cpu])
-			atomic_inc(&global_trace.data[cpu]->disabled);
-		if (max_tr.data[cpu])
-			atomic_inc(&max_tr.data[cpu]->disabled);
-	}
-
-	if (size != global_trace.entries)
-		ret = __tracing_resize_ring_buffer(size);
-
-	if (ret < 0)
-		ret = -ENOMEM;
-
-	for_each_tracing_cpu(cpu) {
-		if (global_trace.data[cpu])
-			atomic_dec(&global_trace.data[cpu]->disabled);
-		if (max_tr.data[cpu])
-			atomic_dec(&max_tr.data[cpu]->disabled);
-	}
-
-	tracing_start();
-	mutex_unlock(&trace_types_lock);
-
-	return ret;
-}
-
 
 /**
  * tracing_update_buffers - used by tracing facility to expand ring buffers
@@ -2891,7 +2836,7 @@ int tracing_update_buffers(void)
 
 	mutex_lock(&trace_types_lock);
 	if (!ring_buffer_expanded)
-		ret = __tracing_resize_ring_buffer(trace_buf_size);
+		ret = tracing_resize_ring_buffer(trace_buf_size);
 	mutex_unlock(&trace_types_lock);
 
 	return ret;
@@ -2915,7 +2860,7 @@ static int tracing_set_tracer(const char *buf)
 	mutex_lock(&trace_types_lock);
 
 	if (!ring_buffer_expanded) {
-		ret = __tracing_resize_ring_buffer(trace_buf_size);
+		ret = tracing_resize_ring_buffer(trace_buf_size);
 		if (ret < 0)
 			goto out;
 		ret = 0;
@@ -3021,11 +2966,20 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf,
 		      size_t cnt, loff_t *ppos)
 {
 	unsigned long *ptr = filp->private_data;
+	char buf[64];
 	unsigned long val;
 	int ret;
 
-	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
-	if (ret)
+	if (cnt >= sizeof(buf))
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	buf[cnt] = 0;
+
+	ret = strict_strtoul(buf, 10, &val);
+	if (ret < 0)
 		return ret;
 
 	*ptr = val * 1000;
@@ -3480,52 +3434,65 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
 		      size_t cnt, loff_t *ppos)
 {
 	unsigned long val;
-	int ret;
+	char buf[64];
+	int ret, cpu;
 
-	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
-	if (ret)
+	if (cnt >= sizeof(buf))
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	buf[cnt] = 0;
+
+	ret = strict_strtoul(buf, 10, &val);
+	if (ret < 0)
 		return ret;
 
 	/* must have at least 1 entry */
 	if (!val)
 		return -EINVAL;
 
-	/* value is in KB */
-	val <<= 10;
+	mutex_lock(&trace_types_lock);
 
-	ret = tracing_resize_ring_buffer(val);
-	if (ret < 0)
-		return ret;
+	tracing_stop();
 
-	*ppos += cnt;
+	/* disable all cpu buffers */
+	for_each_tracing_cpu(cpu) {
+		if (global_trace.data[cpu])
+			atomic_inc(&global_trace.data[cpu]->disabled);
+		if (max_tr.data[cpu])
+			atomic_inc(&max_tr.data[cpu]->disabled);
+	}
 
-	return cnt;
-}
+	/* value is in KB */
+	val <<= 10;
 
-static ssize_t
-tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
-			  size_t cnt, loff_t *ppos)
-{
-	/*
-	 * There is no need to read what the user has written, this function
-	 * is just to make sure that there is no error when "echo" is used
-	 */
+	if (val != global_trace.entries) {
+		ret = tracing_resize_ring_buffer(val);
+		if (ret < 0) {
+			cnt = ret;
+			goto out;
+		}
+	}
 
 	*ppos += cnt;
 
-	return cnt;
-}
+	/* If check pages failed, return ENOMEM */
+	if (tracing_disabled)
+		cnt = -ENOMEM;
+ out:
+	for_each_tracing_cpu(cpu) {
+		if (global_trace.data[cpu])
+			atomic_dec(&global_trace.data[cpu]->disabled);
+		if (max_tr.data[cpu])
+			atomic_dec(&max_tr.data[cpu]->disabled);
+	}
 
-static int
-tracing_free_buffer_release(struct inode *inode, struct file *filp)
-{
-	/* disable tracing ? */
-	if (trace_flags & TRACE_ITER_STOP_ON_FREE)
-		tracing_off();
-	/* resize the ring buffer to 0 */
-	tracing_resize_ring_buffer(0);
+	tracing_start();
+	mutex_unlock(&trace_types_lock);
 
-	return 0;
+	return cnt;
 }
 
 static int mark_printk(const char *fmt, ...)
@@ -3673,11 +3640,6 @@ static const struct file_operations tracing_entries_fops = {
 	.llseek		= generic_file_llseek,
 };
 
-static const struct file_operations tracing_free_buffer_fops = {
-	.write		= tracing_free_buffer_write,
-	.release	= tracing_free_buffer_release,
-};
-
 static const struct file_operations tracing_mark_fops = {
 	.open		= tracing_open_generic,
 	.write		= tracing_mark_write,
@@ -3734,7 +3696,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
 		return 0;
 
 	if (!info->spare)
-		info->spare = ring_buffer_alloc_read_page(info->tr->buffer, info->cpu);
+		info->spare = ring_buffer_alloc_read_page(info->tr->buffer);
 	if (!info->spare)
 		return -ENOMEM;
 
@@ -3891,7 +3853,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 
 		ref->ref = 1;
 		ref->buffer = info->tr->buffer;
-		ref->page = ring_buffer_alloc_read_page(ref->buffer, info->cpu);
+		ref->page = ring_buffer_alloc_read_page(ref->buffer);
 		if (!ref->page) {
 			kfree(ref);
 			break;
@@ -3900,7 +3862,8 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 		r = ring_buffer_read_page(ref->buffer, &ref->page,
 					  len, info->cpu, 1);
 		if (r < 0) {
-			ring_buffer_free_read_page(ref->buffer, ref->page);
+			ring_buffer_free_read_page(ref->buffer,
+						   ref->page);
 			kfree(ref);
 			break;
 		}
@@ -4136,10 +4099,19 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
 {
 	struct trace_option_dentry *topt = filp->private_data;
 	unsigned long val;
+	char buf[64];
 	int ret;
 
-	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
-	if (ret)
+	if (cnt >= sizeof(buf))
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	buf[cnt] = 0;
+
+	ret = strict_strtoul(buf, 10, &val);
+	if (ret < 0)
 		return ret;
 
 	if (val != 0 && val != 1)
@@ -4187,11 +4159,20 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
 			 loff_t *ppos)
 {
 	long index = (long)filp->private_data;
+	char buf[64];
 	unsigned long val;
 	int ret;
 
-	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
-	if (ret)
+	if (cnt >= sizeof(buf))
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	buf[cnt] = 0;
+
+	ret = strict_strtoul(buf, 10, &val);
+	if (ret < 0)
 		return ret;
 
 	if (val != 0 && val != 1)
@@ -4384,9 +4365,6 @@ static __init int tracer_init_debugfs(void)
 	trace_create_file("buffer_size_kb", 0644, d_tracer,
 			&global_trace, &tracing_entries_fops);
 
-	trace_create_file("free_buffer", 0644, d_tracer,
-			&global_trace, &tracing_free_buffer_fops);
-
 	trace_create_file("trace_marker", 0220, d_tracer,
 			NULL, &tracing_mark_fops);
 
diff --git a/trunk/kernel/trace/trace.h b/trunk/kernel/trace/trace.h
index a3e2db708072..f8074072d111 100644
--- a/trunk/kernel/trace/trace.h
+++ b/trunk/kernel/trace/trace.h
@@ -389,9 +389,6 @@ void update_max_tr_single(struct trace_array *tr,
 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
 			int skip, int pc);
 
-void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
-			     int skip, int pc, struct pt_regs *regs);
-
 void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags,
 			    int pc);
 
@@ -403,12 +400,6 @@ static inline void ftrace_trace_stack(struct ring_buffer *buffer,
 {
 }
 
-static inline void ftrace_trace_stack_regs(struct ring_buffer *buffer,
-					   unsigned long flags, int skip,
-					   int pc, struct pt_regs *regs)
-{
-}
-
 static inline void ftrace_trace_userstack(struct ring_buffer *buffer,
 					  unsigned long flags, int pc)
 {
@@ -618,7 +609,6 @@ enum trace_iterator_flags {
 	TRACE_ITER_GRAPH_TIME		= 0x80000,
 	TRACE_ITER_RECORD_CMD		= 0x100000,
 	TRACE_ITER_OVERWRITE		= 0x200000,
-	TRACE_ITER_STOP_ON_FREE		= 0x400000,
 };
 
 /*
@@ -687,6 +677,7 @@ struct event_subsystem {
 	struct dentry		*entry;
 	struct event_filter	*filter;
 	int			nr_events;
+	int			ref_count;
 };
 
 #define FILTER_PRED_INVALID	((unsigned short)-1)
diff --git a/trunk/kernel/trace/trace_events.c b/trunk/kernel/trace/trace_events.c
index 4d7e1498ae91..ffc5b2884af1 100644
--- a/trunk/kernel/trace/trace_events.c
+++ b/trunk/kernel/trace/trace_events.c
@@ -244,6 +244,35 @@ static void ftrace_clear_events(void)
 	mutex_unlock(&event_mutex);
 }
 
+static void __put_system(struct event_subsystem *system)
+{
+	struct event_filter *filter = system->filter;
+
+	WARN_ON_ONCE(system->ref_count == 0);
+	if (--system->ref_count)
+		return;
+
+	if (filter) {
+		kfree(filter->filter_string);
+		kfree(filter);
+	}
+	kfree(system->name);
+	kfree(system);
+}
+
+static void __get_system(struct event_subsystem *system)
+{
+	WARN_ON_ONCE(system->ref_count == 0);
+	system->ref_count++;
+}
+
+static void put_system(struct event_subsystem *system)
+{
+	mutex_lock(&event_mutex);
+	__put_system(system);
+	mutex_unlock(&event_mutex);
+}
+
 /*
  * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
  */
@@ -486,11 +515,20 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
 		   loff_t *ppos)
 {
 	struct ftrace_event_call *call = filp->private_data;
+	char buf[64];
 	unsigned long val;
 	int ret;
 
-	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
-	if (ret)
+	if (cnt >= sizeof(buf))
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	buf[cnt] = 0;
+
+	ret = strict_strtoul(buf, 10, &val);
+	if (ret < 0)
 		return ret;
 
 	ret = tracing_update_buffers();
@@ -562,10 +600,19 @@ system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
 {
 	const char *system = filp->private_data;
 	unsigned long val;
+	char buf[64];
 	ssize_t ret;
 
-	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
-	if (ret)
+	if (cnt >= sizeof(buf))
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	buf[cnt] = 0;
+
+	ret = strict_strtoul(buf, 10, &val);
+	if (ret < 0)
 		return ret;
 
 	ret = tracing_update_buffers();
@@ -808,6 +855,47 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
 	return cnt;
 }
 
+static LIST_HEAD(event_subsystems);
+
+static int subsystem_open(struct inode *inode, struct file *filp)
+{
+	struct event_subsystem *system = NULL;
+	int ret;
+
+	/* Make sure the system still exists */
+	mutex_lock(&event_mutex);
+	list_for_each_entry(system, &event_subsystems, list) {
+		if (system == inode->i_private) {
+			/* Don't open systems with no events */
+			if (!system->nr_events) {
+				system = NULL;
+				break;
+			}
+			__get_system(system);
+			break;
+		}
+	}
+	mutex_unlock(&event_mutex);
+
+	if (system != inode->i_private)
+		return -ENODEV;
+
+	ret = tracing_open_generic(inode, filp);
+	if (ret < 0)
+		put_system(system);
+
+	return ret;
+}
+
+static int subsystem_release(struct inode *inode, struct file *file)
+{
+	struct event_subsystem *system = inode->i_private;
+
+	put_system(system);
+
+	return 0;
+}
+
 static ssize_t
 subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
 		      loff_t *ppos)
@@ -945,10 +1033,11 @@ static const struct file_operations ftrace_event_filter_fops = {
 };
 
 static const struct file_operations ftrace_subsystem_filter_fops = {
-	.open = tracing_open_generic,
+	.open = subsystem_open,
 	.read = subsystem_filter_read,
 	.write = subsystem_filter_write,
 	.llseek = default_llseek,
+	.release = subsystem_release,
 };
 
 static const struct file_operations ftrace_system_enable_fops = {
@@ -984,8 +1073,6 @@ static struct dentry *event_trace_events_dir(void)
 	return d_events;
 }
 
-static LIST_HEAD(event_subsystems);
-
 static struct dentry *
 event_subsystem_dir(const char *name, struct dentry *d_events)
 {
@@ -995,6 +1082,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
 	/* First see if we did not already create this dir */
 	list_for_each_entry(system, &event_subsystems, list) {
 		if (strcmp(system->name, name) == 0) {
+			__get_system(system);
 			system->nr_events++;
 			return system->entry;
 		}
@@ -1017,6 +1105,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
 	}
 
 	system->nr_events = 1;
+	system->ref_count = 1;
 	system->name = kstrdup(name, GFP_KERNEL);
 	if (!system->name) {
 		debugfs_remove(system->entry);
@@ -1166,16 +1255,9 @@ static void remove_subsystem_dir(const char *name)
 	list_for_each_entry(system, &event_subsystems, list) {
 		if (strcmp(system->name, name) == 0) {
 			if (!--system->nr_events) {
-				struct event_filter *filter = system->filter;
-
 				debugfs_remove_recursive(system->entry);
 				list_del(&system->list);
-				if (filter) {
-					kfree(filter->filter_string);
-					kfree(filter);
-				}
-				kfree(system->name);
-				kfree(system);
+				__put_system(system);
 			}
 			break;
 		}
diff --git a/trunk/kernel/trace/trace_events_filter.c b/trunk/kernel/trace/trace_events_filter.c
index 8008ddcfbf20..256764ecccd6 100644
--- a/trunk/kernel/trace/trace_events_filter.c
+++ b/trunk/kernel/trace/trace_events_filter.c
@@ -1886,6 +1886,12 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
 
 	mutex_lock(&event_mutex);
 
+	/* Make sure the system still has events */
+	if (!system->nr_events) {
+		err = -ENODEV;
+		goto out_unlock;
+	}
+
 	if (!strcmp(strstrip(filter_string), "0")) {
 		filter_free_subsystem_preds(system);
 		remove_filter_string(system->filter);
diff --git a/trunk/kernel/trace/trace_functions_graph.c b/trunk/kernel/trace/trace_functions_graph.c
index e8d6bb55d719..962cdb24ed81 100644
--- a/trunk/kernel/trace/trace_functions_graph.c
+++ b/trunk/kernel/trace/trace_functions_graph.c
@@ -74,20 +74,6 @@ static struct tracer_flags tracer_flags = {
 
 static struct trace_array *graph_array;
 
-/*
- * DURATION column is being also used to display IRQ signs,
- * following values are used by print_graph_irq and others
- * to fill in space into DURATION column.
- */
-enum {
-	DURATION_FILL_FULL  = -1,
-	DURATION_FILL_START = -2,
-	DURATION_FILL_END   = -3,
-};
-
-static enum print_line_t
-print_graph_duration(unsigned long long duration, struct trace_seq *s,
-		     u32 flags);
 
 /* Add a function return address to the trace stack on thread info.*/
 int
@@ -591,6 +577,32 @@ get_return_for_leaf(struct trace_iterator *iter,
 	return next;
 }
 
+/* Signal a overhead of time execution to the output */
+static int
+print_graph_overhead(unsigned long long duration, struct trace_seq *s,
+		     u32 flags)
+{
+	/* If duration disappear, we don't need anything */
+	if (!(flags & TRACE_GRAPH_PRINT_DURATION))
+		return 1;
+
+	/* Non nested entry or return */
+	if (duration == -1)
+		return trace_seq_printf(s, "  ");
+
+	if (flags & TRACE_GRAPH_PRINT_OVERHEAD) {
+		/* Duration exceeded 100 msecs */
+		if (duration > 100000ULL)
+			return trace_seq_printf(s, "! ");
+
+		/* Duration exceeded 10 msecs */
+		if (duration > 10000ULL)
+			return trace_seq_printf(s, "+ ");
+	}
+
+	return trace_seq_printf(s, "  ");
+}
+
 static int print_graph_abs_time(u64 t, struct trace_seq *s)
 {
 	unsigned long usecs_rem;
@@ -613,36 +625,34 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
 		addr >= (unsigned long)__irqentry_text_end)
 		return TRACE_TYPE_UNHANDLED;
 
-	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
-		/* Absolute time */
-		if (flags & TRACE_GRAPH_PRINT_ABS_TIME) {
-			ret = print_graph_abs_time(iter->ts, s);
-			if (!ret)
-				return TRACE_TYPE_PARTIAL_LINE;
-		}
+	/* Absolute time */
+	if (flags & TRACE_GRAPH_PRINT_ABS_TIME) {
+		ret = print_graph_abs_time(iter->ts, s);
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+	}
 
-		/* Cpu */
-		if (flags & TRACE_GRAPH_PRINT_CPU) {
-			ret = print_graph_cpu(s, cpu);
-			if (ret == TRACE_TYPE_PARTIAL_LINE)
-				return TRACE_TYPE_PARTIAL_LINE;
-		}
+	/* Cpu */
+	if (flags & TRACE_GRAPH_PRINT_CPU) {
+		ret = print_graph_cpu(s, cpu);
+		if (ret == TRACE_TYPE_PARTIAL_LINE)
+			return TRACE_TYPE_PARTIAL_LINE;
+	}
 
-		/* Proc */
-		if (flags & TRACE_GRAPH_PRINT_PROC) {
-			ret = print_graph_proc(s, pid);
-			if (ret == TRACE_TYPE_PARTIAL_LINE)
-				return TRACE_TYPE_PARTIAL_LINE;
-			ret = trace_seq_printf(s, " | ");
-			if (!ret)
-				return TRACE_TYPE_PARTIAL_LINE;
-		}
+	/* Proc */
+	if (flags & TRACE_GRAPH_PRINT_PROC) {
+		ret = print_graph_proc(s, pid);
+		if (ret == TRACE_TYPE_PARTIAL_LINE)
+			return TRACE_TYPE_PARTIAL_LINE;
+		ret = trace_seq_printf(s, " | ");
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
 	}
 
 	/* No overhead */
-	ret = print_graph_duration(DURATION_FILL_START, s, flags);
-	if (ret != TRACE_TYPE_HANDLED)
-		return ret;
+	ret = print_graph_overhead(-1, s, flags);
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
 
 	if (type == TRACE_GRAPH_ENT)
 		ret = trace_seq_printf(s, "==========>");
@@ -652,10 +662,9 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
-	ret = print_graph_duration(DURATION_FILL_END, s, flags);
-	if (ret != TRACE_TYPE_HANDLED)
-		return ret;
-
+	/* Don't close the duration column if haven't one */
+	if (flags & TRACE_GRAPH_PRINT_DURATION)
+		trace_seq_printf(s, " |");
 	ret = trace_seq_printf(s, "\n");
 
 	if (!ret)
@@ -707,49 +716,9 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
 }
 
 static enum print_line_t
-print_graph_duration(unsigned long long duration, struct trace_seq *s,
-		     u32 flags)
+print_graph_duration(unsigned long long duration, struct trace_seq *s)
 {
-	int ret = -1;
-
-	if (!(flags & TRACE_GRAPH_PRINT_DURATION) ||
-	    !(trace_flags & TRACE_ITER_CONTEXT_INFO))
-			return TRACE_TYPE_HANDLED;
-
-	/* No real adata, just filling the column with spaces */
-	switch (duration) {
-	case DURATION_FILL_FULL:
-		ret = trace_seq_printf(s, "              |  ");
-		return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
-	case DURATION_FILL_START:
-		ret = trace_seq_printf(s, "  ");
-		return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
-	case DURATION_FILL_END:
-		ret = trace_seq_printf(s, " |");
-		return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
-	}
-
-	/* Signal a overhead of time execution to the output */
-	if (flags & TRACE_GRAPH_PRINT_OVERHEAD) {
-		/* Duration exceeded 100 msecs */
-		if (duration > 100000ULL)
-			ret = trace_seq_printf(s, "! ");
-		/* Duration exceeded 10 msecs */
-		else if (duration > 10000ULL)
-			ret = trace_seq_printf(s, "+ ");
-	}
-
-	/*
-	 * The -1 means we either did not exceed the duration tresholds
-	 * or we dont want to print out the overhead. Either way we need
-	 * to fill out the space.
-	 */
-	if (ret == -1)
-		ret = trace_seq_printf(s, "  ");
-
-	/* Catching here any failure happenned above */
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
+	int ret;
 
 	ret = trace_print_graph_duration(duration, s);
 	if (ret != TRACE_TYPE_HANDLED)
@@ -798,11 +767,18 @@ print_graph_entry_leaf(struct trace_iterator *iter,
 			cpu_data->enter_funcs[call->depth] = 0;
 	}
 
-	/* Overhead and duration */
-	ret = print_graph_duration(duration, s, flags);
-	if (ret == TRACE_TYPE_PARTIAL_LINE)
+	/* Overhead */
+	ret = print_graph_overhead(duration, s, flags);
+	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
+	/* Duration */
+	if (flags & TRACE_GRAPH_PRINT_DURATION) {
+		ret = print_graph_duration(duration, s);
+		if (ret == TRACE_TYPE_PARTIAL_LINE)
+			return TRACE_TYPE_PARTIAL_LINE;
+	}
+
 	/* Function */
 	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
 		ret = trace_seq_printf(s, " ");
@@ -839,10 +815,17 @@ print_graph_entry_nested(struct trace_iterator *iter,
 			cpu_data->enter_funcs[call->depth] = call->func;
 	}
 
+	/* No overhead */
+	ret = print_graph_overhead(-1, s, flags);
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
 	/* No time */
-	ret = print_graph_duration(DURATION_FILL_FULL, s, flags);
-	if (ret != TRACE_TYPE_HANDLED)
-		return ret;
+	if (flags & TRACE_GRAPH_PRINT_DURATION) {
+		ret = trace_seq_printf(s, "            |  ");
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+	}
 
 	/* Function */
 	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
@@ -882,9 +865,6 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
 			return TRACE_TYPE_PARTIAL_LINE;
 	}
 
-	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
-		return 0;
-
 	/* Absolute time */
 	if (flags & TRACE_GRAPH_PRINT_ABS_TIME) {
 		ret = print_graph_abs_time(iter->ts, s);
@@ -1098,11 +1078,18 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
 	if (print_graph_prologue(iter, s, 0, 0, flags))
 		return TRACE_TYPE_PARTIAL_LINE;
 
-	/* Overhead and duration */
-	ret = print_graph_duration(duration, s, flags);
-	if (ret == TRACE_TYPE_PARTIAL_LINE)
+	/* Overhead */
+	ret = print_graph_overhead(duration, s, flags);
+	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
+	/* Duration */
+	if (flags & TRACE_GRAPH_PRINT_DURATION) {
+		ret = print_graph_duration(duration, s);
+		if (ret == TRACE_TYPE_PARTIAL_LINE)
+			return TRACE_TYPE_PARTIAL_LINE;
+	}
+
 	/* Closing brace */
 	for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
 		ret = trace_seq_printf(s, " ");
@@ -1159,10 +1146,17 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
 	if (print_graph_prologue(iter, s, 0, 0, flags))
 		return TRACE_TYPE_PARTIAL_LINE;
 
+	/* No overhead */
+	ret = print_graph_overhead(-1, s, flags);
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
 	/* No time */
-	ret = print_graph_duration(DURATION_FILL_FULL, s, flags);
-	if (ret != TRACE_TYPE_HANDLED)
-		return ret;
+	if (flags & TRACE_GRAPH_PRINT_DURATION) {
+		ret = trace_seq_printf(s, "            |  ");
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+	}
 
 	/* Indentation */
 	if (depth > 0)
@@ -1213,7 +1207,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
 
 
 enum print_line_t
-print_graph_function_flags(struct trace_iterator *iter, u32 flags)
+__print_graph_function_flags(struct trace_iterator *iter, u32 flags)
 {
 	struct ftrace_graph_ent_entry *field;
 	struct fgraph_data *data = iter->private;
@@ -1276,7 +1270,18 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags)
 static enum print_line_t
 print_graph_function(struct trace_iterator *iter)
 {
-	return print_graph_function_flags(iter, tracer_flags.val);
+	return __print_graph_function_flags(iter, tracer_flags.val);
+}
+
+enum print_line_t print_graph_function_flags(struct trace_iterator *iter,
+					     u32 flags)
+{
+	if (trace_flags & TRACE_ITER_LATENCY_FMT)
+		flags |= TRACE_GRAPH_PRINT_DURATION;
+	else
+		flags |= TRACE_GRAPH_PRINT_ABS_TIME;
+
+	return __print_graph_function_flags(iter, flags);
 }
 
 static enum print_line_t
@@ -1304,7 +1309,8 @@ static void print_lat_header(struct seq_file *s, u32 flags)
 	seq_printf(s, "#%.*s / _----=> need-resched    \n", size, spaces);
 	seq_printf(s, "#%.*s| / _---=> hardirq/softirq \n", size, spaces);
 	seq_printf(s, "#%.*s|| / _--=> preempt-depth   \n", size, spaces);
-	seq_printf(s, "#%.*s||| /                      \n", size, spaces);
+	seq_printf(s, "#%.*s||| / _-=> lock-depth      \n", size, spaces);
+	seq_printf(s, "#%.*s|||| /                     \n", size, spaces);
 }
 
 static void __print_graph_headers_flags(struct seq_file *s, u32 flags)
@@ -1323,7 +1329,7 @@ static void __print_graph_headers_flags(struct seq_file *s, u32 flags)
 	if (flags & TRACE_GRAPH_PRINT_PROC)
 		seq_printf(s, "  TASK/PID       ");
 	if (lat)
-		seq_printf(s, "||||");
+		seq_printf(s, "|||||");
 	if (flags & TRACE_GRAPH_PRINT_DURATION)
 		seq_printf(s, "  DURATION   ");
 	seq_printf(s, "               FUNCTION CALLS\n");
@@ -1337,7 +1343,7 @@ static void __print_graph_headers_flags(struct seq_file *s, u32 flags)
 	if (flags & TRACE_GRAPH_PRINT_PROC)
 		seq_printf(s, "   |    |        ");
 	if (lat)
-		seq_printf(s, "||||");
+		seq_printf(s, "|||||");
 	if (flags & TRACE_GRAPH_PRINT_DURATION)
 		seq_printf(s, "   |   |      ");
 	seq_printf(s, "               |   |   |   |\n");
@@ -1352,16 +1358,15 @@ void print_graph_headers_flags(struct seq_file *s, u32 flags)
 {
 	struct trace_iterator *iter = s->private;
 
-	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
-		return;
-
 	if (trace_flags & TRACE_ITER_LATENCY_FMT) {
 		/* print nothing if the buffers are empty */
 		if (trace_empty(iter))
 			return;
 
 		print_trace_header(s, iter);
-	}
+		flags |= TRACE_GRAPH_PRINT_DURATION;
+	} else
+		flags |= TRACE_GRAPH_PRINT_ABS_TIME;
 
 	__print_graph_headers_flags(s, flags);
 }
diff --git a/trunk/kernel/trace/trace_irqsoff.c b/trunk/kernel/trace/trace_irqsoff.c
index 667aa8cc0cfc..c77424be284d 100644
--- a/trunk/kernel/trace/trace_irqsoff.c
+++ b/trunk/kernel/trace/trace_irqsoff.c
@@ -226,9 +226,7 @@ static void irqsoff_trace_close(struct trace_iterator *iter)
 }
 
 #define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_CPU | \
-			    TRACE_GRAPH_PRINT_PROC | \
-			    TRACE_GRAPH_PRINT_ABS_TIME | \
-			    TRACE_GRAPH_PRINT_DURATION)
+			    TRACE_GRAPH_PRINT_PROC)
 
 static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
 {
diff --git a/trunk/kernel/trace/trace_kprobe.c b/trunk/kernel/trace/trace_kprobe.c
index 7db7b68c6c37..27d13b36b8be 100644
--- a/trunk/kernel/trace/trace_kprobe.c
+++ b/trunk/kernel/trace/trace_kprobe.c
@@ -1397,8 +1397,7 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
 	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
 
 	if (!filter_current_check_discard(buffer, call, entry, event))
-		trace_nowake_buffer_unlock_commit_regs(buffer, event,
-						       irq_flags, pc, regs);
+		trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
 }
 
 /* Kretprobe handler */
@@ -1430,8 +1429,7 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
 	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
 
 	if (!filter_current_check_discard(buffer, call, entry, event))
-		trace_nowake_buffer_unlock_commit_regs(buffer, event,
-						       irq_flags, pc, regs);
+		trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
 }
 
 /* Event entry printers */
diff --git a/trunk/kernel/trace/trace_sched_wakeup.c b/trunk/kernel/trace/trace_sched_wakeup.c
index e4a70c0c71b6..f029dd4fd2ca 100644
--- a/trunk/kernel/trace/trace_sched_wakeup.c
+++ b/trunk/kernel/trace/trace_sched_wakeup.c
@@ -227,9 +227,7 @@ static void wakeup_trace_close(struct trace_iterator *iter)
 		graph_trace_close(iter);
 }
 
-#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC | \
-			    TRACE_GRAPH_PRINT_ABS_TIME | \
-			    TRACE_GRAPH_PRINT_DURATION)
+#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC)
 
 static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
 {
diff --git a/trunk/kernel/trace/trace_stack.c b/trunk/kernel/trace/trace_stack.c
index 77575b386d97..b0b53b8e4c25 100644
--- a/trunk/kernel/trace/trace_stack.c
+++ b/trunk/kernel/trace/trace_stack.c
@@ -156,11 +156,20 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
 {
 	long *ptr = filp->private_data;
 	unsigned long val, flags;
+	char buf[64];
 	int ret;
 	int cpu;
 
-	ret = kstrtoul_from_user(ubuf, count, 10, &val);
-	if (ret)
+	if (count >= sizeof(buf))
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, count))
+		return -EFAULT;
+
+	buf[count] = 0;
+
+	ret = strict_strtoul(buf, 10, &val);
+	if (ret < 0)
 		return ret;
 
 	local_irq_save(flags);
diff --git a/trunk/kernel/watchdog.c b/trunk/kernel/watchdog.c
index a933e3a0398b..3d0c56ad4792 100644
--- a/trunk/kernel/watchdog.c
+++ b/trunk/kernel/watchdog.c
@@ -200,8 +200,6 @@ static int is_softlockup(unsigned long touch_ts)
 }
 
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
-void __weak hw_nmi_watchdog_set_attr(struct perf_event_attr *wd_attr) { }
-
 static struct perf_event_attr wd_hw_attr = {
 	.type		= PERF_TYPE_HARDWARE,
 	.config		= PERF_COUNT_HW_CPU_CYCLES,
@@ -211,7 +209,7 @@ static struct perf_event_attr wd_hw_attr = {
 };
 
 /* Callback function for perf event subsystem */
-static void watchdog_overflow_callback(struct perf_event *event,
+static void watchdog_overflow_callback(struct perf_event *event, int nmi,
 		 struct perf_sample_data *data,
 		 struct pt_regs *regs)
 {
@@ -370,12 +368,10 @@ static int watchdog_nmi_enable(int cpu)
 	if (event != NULL)
 		goto out_enable;
 
+	/* Try to register using hardware perf events */
 	wd_attr = &wd_hw_attr;
 	wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
-	hw_nmi_watchdog_set_attr(wd_attr);
-
-	/* Try to register using hardware perf events */
-	event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
+	event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback);
 	if (!IS_ERR(event)) {
 		printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
 		goto out_save;
diff --git a/trunk/mm/memcontrol.c b/trunk/mm/memcontrol.c
index ddffc74cdebe..cf7d027a8844 100644
--- a/trunk/mm/memcontrol.c
+++ b/trunk/mm/memcontrol.c
@@ -35,7 +35,6 @@
 #include <linux/limits.h>
 #include <linux/mutex.h>
 #include <linux/rbtree.h>
-#include <linux/shmem_fs.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
diff --git a/trunk/mm/memory-failure.c b/trunk/mm/memory-failure.c
index 740c4f52059c..eac0ba561491 100644
--- a/trunk/mm/memory-failure.c
+++ b/trunk/mm/memory-failure.c
@@ -391,11 +391,10 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
 	struct task_struct *tsk;
 	struct anon_vma *av;
 
+	read_lock(&tasklist_lock);
 	av = page_lock_anon_vma(page);
 	if (av == NULL)	/* Not actually mapped anymore */
-		return;
-
-	read_lock(&tasklist_lock);
+		goto out;
 	for_each_process (tsk) {
 		struct anon_vma_chain *vmac;
 
@@ -409,8 +408,9 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
 				add_to_kill(tsk, page, vma, to_kill, tkc);
 		}
 	}
-	read_unlock(&tasklist_lock);
 	page_unlock_anon_vma(av);
+out:
+	read_unlock(&tasklist_lock);
 }
 
 /*
@@ -424,8 +424,17 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
 	struct prio_tree_iter iter;
 	struct address_space *mapping = page->mapping;
 
-	mutex_lock(&mapping->i_mmap_mutex);
+	/*
+	 * A note on the locking order between the two locks.
+	 * We don't rely on this particular order.
+	 * If you have some other code that needs a different order
+	 * feel free to switch them around. Or add a reverse link
+	 * from mm_struct to task_struct, then this could be all
+	 * done without taking tasklist_lock and looping over all tasks.
+	 */
+
 	read_lock(&tasklist_lock);
+	mutex_lock(&mapping->i_mmap_mutex);
 	for_each_process(tsk) {
 		pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 
@@ -445,8 +454,8 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
 				add_to_kill(tsk, page, vma, to_kill, tkc);
 		}
 	}
-	read_unlock(&tasklist_lock);
 	mutex_unlock(&mapping->i_mmap_mutex);
+	read_unlock(&tasklist_lock);
 }
 
 /*
diff --git a/trunk/mm/memory.c b/trunk/mm/memory.c
index 40b7531ee8ba..87d935333f0d 100644
--- a/trunk/mm/memory.c
+++ b/trunk/mm/memory.c
@@ -2798,6 +2798,30 @@ void unmap_mapping_range(struct address_space *mapping,
 }
 EXPORT_SYMBOL(unmap_mapping_range);
 
+int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
+{
+	struct address_space *mapping = inode->i_mapping;
+
+	/*
+	 * If the underlying filesystem is not going to provide
+	 * a way to truncate a range of blocks (punch a hole) -
+	 * we should return failure right now.
+	 */
+	if (!inode->i_op->truncate_range)
+		return -ENOSYS;
+
+	mutex_lock(&inode->i_mutex);
+	down_write(&inode->i_alloc_sem);
+	unmap_mapping_range(mapping, offset, (end - offset), 1);
+	truncate_inode_pages_range(mapping, offset, end);
+	unmap_mapping_range(mapping, offset, (end - offset), 1);
+	inode->i_op->truncate_range(inode, offset, end);
+	up_write(&inode->i_alloc_sem);
+	mutex_unlock(&inode->i_mutex);
+
+	return 0;
+}
+
 /*
  * We enter with non-exclusive mmap_sem (to exclude vma changes,
  * but allow concurrent faults), and pte mapped but not yet locked.
diff --git a/trunk/mm/memory_hotplug.c b/trunk/mm/memory_hotplug.c
index c46887b5a11e..02159c755136 100644
--- a/trunk/mm/memory_hotplug.c
+++ b/trunk/mm/memory_hotplug.c
@@ -498,9 +498,7 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
 	 * The node we allocated has no zone fallback lists. For avoiding
 	 * to access not-initialized zonelist, build here.
 	 */
-	mutex_lock(&zonelists_mutex);
 	build_all_zonelists(NULL);
-	mutex_unlock(&zonelists_mutex);
 
 	return pgdat;
 }
@@ -523,7 +521,7 @@ int mem_online_node(int nid)
 
 	lock_memory_hotplug();
 	pgdat = hotadd_new_pgdat(nid, 0);
-	if (!pgdat) {
+	if (pgdat) {
 		ret = -ENOMEM;
 		goto out;
 	}
diff --git a/trunk/mm/rmap.c b/trunk/mm/rmap.c
index 23295f65ae43..27dfd3b82b0f 100644
--- a/trunk/mm/rmap.c
+++ b/trunk/mm/rmap.c
@@ -38,8 +38,9 @@
  *                           in arch-dependent flush_dcache_mmap_lock,
  *                           within inode_wb_list_lock in __sync_single_inode)
  *
- * anon_vma->mutex,mapping->i_mutex      (memory_failure, collect_procs_anon)
- *   ->tasklist_lock
+ * (code doesn't rely on that order so it could be switched around)
+ * ->tasklist_lock
+ *   anon_vma->mutex      (memory_failure, collect_procs_anon)
  *     pte map lock
  */
 
diff --git a/trunk/mm/shmem.c b/trunk/mm/shmem.c
index fcedf5464eb7..d221a1cfd7b1 100644
--- a/trunk/mm/shmem.c
+++ b/trunk/mm/shmem.c
@@ -539,7 +539,7 @@ static void shmem_free_pages(struct list_head *next)
 	} while (next);
 }
 
-void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
+static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	unsigned long idx;
@@ -562,8 +562,6 @@ void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
 	spinlock_t *punch_lock;
 	unsigned long upper_limit;
 
-	truncate_inode_pages_range(inode->i_mapping, start, end);
-
 	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
 	idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 	if (idx >= info->next_index)
@@ -740,8 +738,16 @@ void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
 		 * lowered next_index.  Also, though shmem_getpage checks
 		 * i_size before adding to cache, no recheck after: so fix the
 		 * narrow window there too.
+		 *
+		 * Recalling truncate_inode_pages_range and unmap_mapping_range
+		 * every time for punch_hole (which never got a chance to clear
+		 * SHMEM_PAGEIN at the start of vmtruncate_range) is expensive,
+		 * yet hardly ever necessary: try to optimize them out later.
 		 */
 		truncate_inode_pages_range(inode->i_mapping, start, end);
+		if (punch_hole)
+			unmap_mapping_range(inode->i_mapping, start,
+							end - start, 1);
 	}
 
 	spin_lock(&info->lock);
@@ -760,23 +766,22 @@ void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
 		shmem_free_pages(pages_to_free.next);
 	}
 }
-EXPORT_SYMBOL_GPL(shmem_truncate_range);
 
-static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
+static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
 {
 	struct inode *inode = dentry->d_inode;
+	loff_t newsize = attr->ia_size;
 	int error;
 
 	error = inode_change_ok(inode, attr);
 	if (error)
 		return error;
 
-	if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
-		loff_t oldsize = inode->i_size;
-		loff_t newsize = attr->ia_size;
+	if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)
+					&& newsize != inode->i_size) {
 		struct page *page = NULL;
 
-		if (newsize < oldsize) {
+		if (newsize < inode->i_size) {
 			/*
 			 * If truncating down to a partial page, then
 			 * if that page is already allocated, hold it
@@ -805,19 +810,12 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
 				spin_unlock(&info->lock);
 			}
 		}
-		if (newsize != oldsize) {
-			i_size_write(inode, newsize);
-			inode->i_ctime = inode->i_mtime = CURRENT_TIME;
-		}
-		if (newsize < oldsize) {
-			loff_t holebegin = round_up(newsize, PAGE_SIZE);
-			unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
-			shmem_truncate_range(inode, newsize, (loff_t)-1);
-			/* unmap again to remove racily COWed private pages */
-			unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
-		}
+
+		/* XXX(truncate): truncate_setsize should be called last */
+		truncate_setsize(inode, newsize);
 		if (page)
 			page_cache_release(page);
+		shmem_truncate_range(inode, newsize, (loff_t)-1);
 	}
 
 	setattr_copy(inode, attr);
@@ -834,6 +832,7 @@ static void shmem_evict_inode(struct inode *inode)
 	struct shmem_xattr *xattr, *nxattr;
 
 	if (inode->i_mapping->a_ops == &shmem_aops) {
+		truncate_inode_pages(inode->i_mapping, 0);
 		shmem_unacct_size(info->flags, inode->i_size);
 		inode->i_size = 0;
 		shmem_truncate_range(inode, 0, (loff_t)-1);
@@ -2707,7 +2706,7 @@ static const struct file_operations shmem_file_operations = {
 };
 
 static const struct inode_operations shmem_inode_operations = {
-	.setattr	= shmem_setattr,
+	.setattr	= shmem_notify_change,
 	.truncate_range	= shmem_truncate_range,
 #ifdef CONFIG_TMPFS_XATTR
 	.setxattr	= shmem_setxattr,
@@ -2740,7 +2739,7 @@ static const struct inode_operations shmem_dir_inode_operations = {
 	.removexattr	= shmem_removexattr,
 #endif
 #ifdef CONFIG_TMPFS_POSIX_ACL
-	.setattr	= shmem_setattr,
+	.setattr	= shmem_notify_change,
 	.check_acl	= generic_check_acl,
 #endif
 };
@@ -2753,7 +2752,7 @@ static const struct inode_operations shmem_special_inode_operations = {
 	.removexattr	= shmem_removexattr,
 #endif
 #ifdef CONFIG_TMPFS_POSIX_ACL
-	.setattr	= shmem_setattr,
+	.setattr	= shmem_notify_change,
 	.check_acl	= generic_check_acl,
 #endif
 };
@@ -2909,12 +2908,6 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
 	return 0;
 }
 
-void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
-{
-	truncate_inode_pages_range(inode->i_mapping, start, end);
-}
-EXPORT_SYMBOL_GPL(shmem_truncate_range);
-
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 /**
  * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file
@@ -3035,26 +3028,3 @@ int shmem_zero_setup(struct vm_area_struct *vma)
 	vma->vm_flags |= VM_CAN_NONLINEAR;
 	return 0;
 }
-
-/**
- * shmem_read_mapping_page_gfp - read into page cache, using specified page allocation flags.
- * @mapping:	the page's address_space
- * @index:	the page index
- * @gfp:	the page allocator flags to use if allocating
- *
- * This behaves as a tmpfs "read_cache_page_gfp(mapping, index, gfp)",
- * with any new page allocations done using the specified allocation flags.
- * But read_cache_page_gfp() uses the ->readpage() method: which does not
- * suit tmpfs, since it may have pages in swapcache, and needs to find those
- * for itself; although drivers/gpu/drm i915 and ttm rely upon this support.
- *
- * Provide a stub for those callers to start using now, then later
- * flesh it out to call shmem_getpage() with additional gfp mask, when
- * shmem_file_splice_read() is added and shmem_readpage() is removed.
- */
-struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
-					 pgoff_t index, gfp_t gfp)
-{
-	return read_cache_page_gfp(mapping, index, gfp);
-}
-EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp);
diff --git a/trunk/mm/swapfile.c b/trunk/mm/swapfile.c
index ff8dc1a18cb4..d537d29e9b7b 100644
--- a/trunk/mm/swapfile.c
+++ b/trunk/mm/swapfile.c
@@ -14,7 +14,7 @@
 #include <linux/vmalloc.h>
 #include <linux/pagemap.h>
 #include <linux/namei.h>
-#include <linux/shmem_fs.h>
+#include <linux/shm.h>
 #include <linux/blkdev.h>
 #include <linux/random.h>
 #include <linux/writeback.h>
diff --git a/trunk/mm/truncate.c b/trunk/mm/truncate.c
index e13f22efaad7..3a29a6180212 100644
--- a/trunk/mm/truncate.c
+++ b/trunk/mm/truncate.c
@@ -304,11 +304,6 @@ EXPORT_SYMBOL(truncate_inode_pages_range);
  * @lstart: offset from which to truncate
  *
  * Called under (and serialised by) inode->i_mutex.
- *
- * Note: When this function returns, there can be a page in the process of
- * deletion (inside __delete_from_page_cache()) in the specified range.  Thus
- * mapping->nrpages can be non-zero when this function returns even after
- * truncation of the whole mapping.
  */
 void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
 {
@@ -608,27 +603,3 @@ int vmtruncate(struct inode *inode, loff_t offset)
 	return 0;
 }
 EXPORT_SYMBOL(vmtruncate);
-
-int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
-{
-	struct address_space *mapping = inode->i_mapping;
-
-	/*
-	 * If the underlying filesystem is not going to provide
-	 * a way to truncate a range of blocks (punch a hole) -
-	 * we should return failure right now.
-	 */
-	if (!inode->i_op->truncate_range)
-		return -ENOSYS;
-
-	mutex_lock(&inode->i_mutex);
-	down_write(&inode->i_alloc_sem);
-	unmap_mapping_range(mapping, offset, (end - offset), 1);
-	inode->i_op->truncate_range(inode, offset, end);
-	/* unmap again to remove racily COWed private pages */
-	unmap_mapping_range(mapping, offset, (end - offset), 1);
-	up_write(&inode->i_alloc_sem);
-	mutex_unlock(&inode->i_mutex);
-
-	return 0;
-}
diff --git a/trunk/mm/vmscan.c b/trunk/mm/vmscan.c
index 4f49535d4cd3..8ff834e19c24 100644
--- a/trunk/mm/vmscan.c
+++ b/trunk/mm/vmscan.c
@@ -1995,13 +1995,14 @@ static void shrink_zone(int priority, struct zone *zone,
  * If a zone is deemed to be full of pinned pages then just give it a light
  * scan then give up on it.
  */
-static void shrink_zones(int priority, struct zonelist *zonelist,
+static unsigned long shrink_zones(int priority, struct zonelist *zonelist,
 					struct scan_control *sc)
 {
 	struct zoneref *z;
 	struct zone *zone;
 	unsigned long nr_soft_reclaimed;
 	unsigned long nr_soft_scanned;
+	unsigned long total_scanned = 0;
 
 	for_each_zone_zonelist_nodemask(zone, z, zonelist,
 					gfp_zone(sc->gfp_mask), sc->nodemask) {
@@ -2016,23 +2017,19 @@ static void shrink_zones(int priority, struct zonelist *zonelist,
 				continue;
 			if (zone->all_unreclaimable && priority != DEF_PRIORITY)
 				continue;	/* Let kswapd poll it */
-			/*
-			 * This steals pages from memory cgroups over softlimit
-			 * and returns the number of reclaimed pages and
-			 * scanned pages. This works for global memory pressure
-			 * and balancing, not for a memcg's limit.
-			 */
-			nr_soft_scanned = 0;
-			nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
-						sc->order, sc->gfp_mask,
-						&nr_soft_scanned);
-			sc->nr_reclaimed += nr_soft_reclaimed;
-			sc->nr_scanned += nr_soft_scanned;
-			/* need some check for avoid more shrink_zone() */
 		}
 
+		nr_soft_scanned = 0;
+		nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
+							sc->order, sc->gfp_mask,
+							&nr_soft_scanned);
+		sc->nr_reclaimed += nr_soft_reclaimed;
+		total_scanned += nr_soft_scanned;
+
 		shrink_zone(priority, zone, sc);
 	}
+
+	return total_scanned;
 }
 
 static bool zone_reclaimable(struct zone *zone)
@@ -2097,7 +2094,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 		sc->nr_scanned = 0;
 		if (!priority)
 			disable_swap_token(sc->mem_cgroup);
-		shrink_zones(priority, zonelist, sc);
+		total_scanned += shrink_zones(priority, zonelist, sc);
 		/*
 		 * Don't shrink slabs when reclaiming memory from
 		 * over limit cgroups
diff --git a/trunk/net/sunrpc/auth_gss/auth_gss.c b/trunk/net/sunrpc/auth_gss/auth_gss.c
index 5daf6cc4faea..339ba64cce1e 100644
--- a/trunk/net/sunrpc/auth_gss/auth_gss.c
+++ b/trunk/net/sunrpc/auth_gss/auth_gss.c
@@ -577,13 +577,13 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
 	}
 	inode = &gss_msg->inode->vfs_inode;
 	for (;;) {
-		prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_KILLABLE);
+		prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_INTERRUPTIBLE);
 		spin_lock(&inode->i_lock);
 		if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) {
 			break;
 		}
 		spin_unlock(&inode->i_lock);
-		if (fatal_signal_pending(current)) {
+		if (signalled()) {
 			err = -ERESTARTSYS;
 			goto out_intr;
 		}
diff --git a/trunk/net/sunrpc/clnt.c b/trunk/net/sunrpc/clnt.c
index 8c9141583d6f..b84d7395535e 100644
--- a/trunk/net/sunrpc/clnt.c
+++ b/trunk/net/sunrpc/clnt.c
@@ -1061,7 +1061,7 @@ call_allocate(struct rpc_task *task)
 
 	dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid);
 
-	if (RPC_IS_ASYNC(task) || !fatal_signal_pending(current)) {
+	if (RPC_IS_ASYNC(task) || !signalled()) {
 		task->tk_action = call_allocate;
 		rpc_delay(task, HZ>>4);
 		return;
@@ -1175,9 +1175,6 @@ call_bind_status(struct rpc_task *task)
 			status = -EOPNOTSUPP;
 			break;
 		}
-		if (task->tk_rebind_retry == 0)
-			break;
-		task->tk_rebind_retry--;
 		rpc_delay(task, 3*HZ);
 		goto retry_timeout;
 	case -ETIMEDOUT:
diff --git a/trunk/net/sunrpc/sched.c b/trunk/net/sunrpc/sched.c
index a27406b1654f..6b43ee7221d5 100644
--- a/trunk/net/sunrpc/sched.c
+++ b/trunk/net/sunrpc/sched.c
@@ -792,7 +792,6 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
 	/* Initialize retry counters */
 	task->tk_garb_retry = 2;
 	task->tk_cred_retry = 2;
-	task->tk_rebind_retry = 2;
 
 	task->tk_priority = task_setup_data->priority - RPC_PRIORITY_LOW;
 	task->tk_owner = current->tgid;
diff --git a/trunk/samples/hw_breakpoint/data_breakpoint.c b/trunk/samples/hw_breakpoint/data_breakpoint.c
index ef7f32291852..063653955f9f 100644
--- a/trunk/samples/hw_breakpoint/data_breakpoint.c
+++ b/trunk/samples/hw_breakpoint/data_breakpoint.c
@@ -41,7 +41,7 @@ module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO);
 MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any"
 			" write operations on the kernel symbol");
 
-static void sample_hbp_handler(struct perf_event *bp,
+static void sample_hbp_handler(struct perf_event *bp, int nmi,
 			       struct perf_sample_data *data,
 			       struct pt_regs *regs)
 {
@@ -60,7 +60,7 @@ static int __init hw_break_module_init(void)
 	attr.bp_len = HW_BREAKPOINT_LEN_4;
 	attr.bp_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
 
-	sample_hbp = register_wide_hw_breakpoint(&attr, sample_hbp_handler, NULL);
+	sample_hbp = register_wide_hw_breakpoint(&attr, sample_hbp_handler);
 	if (IS_ERR((void __force *)sample_hbp)) {
 		ret = PTR_ERR((void __force *)sample_hbp);
 		goto fail;
diff --git a/trunk/security/keys/request_key.c b/trunk/security/keys/request_key.c
index 82465328c39b..8e319a416eec 100644
--- a/trunk/security/keys/request_key.c
+++ b/trunk/security/keys/request_key.c
@@ -469,7 +469,7 @@ static struct key *construct_key_and_link(struct key_type *type,
 	} else if (ret == -EINPROGRESS) {
 		ret = 0;
 	} else {
-		goto couldnt_alloc_key;
+		key = ERR_PTR(ret);
 	}
 
 	key_put(dest_keyring);
@@ -479,7 +479,6 @@ static struct key *construct_key_and_link(struct key_type *type,
 construction_failed:
 	key_negate_and_link(key, key_negative_timeout, NULL, NULL);
 	key_put(key);
-couldnt_alloc_key:
 	key_put(dest_keyring);
 	kleave(" = %d", ret);
 	return ERR_PTR(ret);
diff --git a/trunk/sound/pci/asihpi/asihpi.c b/trunk/sound/pci/asihpi/asihpi.c
index e3569bdd3b64..2ca6f4f85b41 100644
--- a/trunk/sound/pci/asihpi/asihpi.c
+++ b/trunk/sound/pci/asihpi/asihpi.c
@@ -27,6 +27,7 @@
 #include "hpioctl.h"
 
 #include <linux/pci.h>
+#include <linux/version.h>
 #include <linux/init.h>
 #include <linux/jiffies.h>
 #include <linux/slab.h>
diff --git a/trunk/sound/pci/hda/patch_realtek.c b/trunk/sound/pci/hda/patch_realtek.c
index d21191dcfe88..61a774b3d3cb 100644
--- a/trunk/sound/pci/hda/patch_realtek.c
+++ b/trunk/sound/pci/hda/patch_realtek.c
@@ -4883,6 +4883,7 @@ static const struct snd_pci_quirk alc880_cfg_tbl[] = {
 	SND_PCI_QUIRK(0x1025, 0xe309, "ULI", ALC880_3ST_DIG),
 	SND_PCI_QUIRK(0x1025, 0xe310, "ULI", ALC880_3ST),
 	SND_PCI_QUIRK(0x1039, 0x1234, NULL, ALC880_6ST_DIG),
+	SND_PCI_QUIRK(0x103c, 0x2a09, "HP", ALC880_5ST),
 	SND_PCI_QUIRK(0x1043, 0x10b3, "ASUS W1V", ALC880_ASUS_W1V),
 	SND_PCI_QUIRK(0x1043, 0x10c2, "ASUS W6A", ALC880_ASUS_DIG),
 	SND_PCI_QUIRK(0x1043, 0x10c3, "ASUS Wxx", ALC880_ASUS_DIG),
@@ -12599,7 +12600,6 @@ static const struct hda_verb alc262_toshiba_rx1_unsol_verbs[] = {
  */
 enum {
 	PINFIX_FSC_H270,
-	PINFIX_HP_Z200,
 };
 
 static const struct alc_fixup alc262_fixups[] = {
@@ -12612,17 +12612,9 @@ static const struct alc_fixup alc262_fixups[] = {
 			{ }
 		}
 	},
-	[PINFIX_HP_Z200] = {
-		.type = ALC_FIXUP_PINS,
-		.v.pins = (const struct alc_pincfg[]) {
-			{ 0x16, 0x99130120 }, /* internal speaker */
-			{ }
-		}
-	},
 };
 
 static const struct snd_pci_quirk alc262_fixup_tbl[] = {
-	SND_PCI_QUIRK(0x103c, 0x170b, "HP Z200", PINFIX_HP_Z200),
 	SND_PCI_QUIRK(0x1734, 0x1147, "FSC Celsius H270", PINFIX_FSC_H270),
 	{}
 };
@@ -12739,8 +12731,6 @@ static const struct snd_pci_quirk alc262_cfg_tbl[] = {
 			   ALC262_HP_BPC),
 	SND_PCI_QUIRK_MASK(0x103c, 0xff00, 0x1500, "HP z series",
 			   ALC262_HP_BPC),
-	SND_PCI_QUIRK(0x103c, 0x170b, "HP Z200",
-			   ALC262_AUTO),
 	SND_PCI_QUIRK_MASK(0x103c, 0xff00, 0x1700, "HP xw series",
 			   ALC262_HP_BPC),
 	SND_PCI_QUIRK(0x103c, 0x2800, "HP D7000", ALC262_HP_BPC_D7000_WL),
@@ -13882,6 +13872,7 @@ static const struct snd_pci_quirk alc268_cfg_tbl[] = {
 	SND_PCI_QUIRK(0x1043, 0x1205, "ASUS W7J", ALC268_3ST),
 	SND_PCI_QUIRK(0x1170, 0x0040, "ZEPTO", ALC268_ZEPTO),
 	SND_PCI_QUIRK(0x14c0, 0x0025, "COMPAL IFL90/JFL-92", ALC268_TOSHIBA),
+	SND_PCI_QUIRK(0x152d, 0x0763, "Diverse (CPR2000)", ALC268_ACER),
 	SND_PCI_QUIRK(0x152d, 0x0771, "Quanta IL1", ALC267_QUANTA_IL1),
 	{}
 };
diff --git a/trunk/sound/pci/hda/patch_via.c b/trunk/sound/pci/hda/patch_via.c
index f43bb0eaed8b..c952582fb218 100644
--- a/trunk/sound/pci/hda/patch_via.c
+++ b/trunk/sound/pci/hda/patch_via.c
@@ -745,23 +745,12 @@ static int via_independent_hp_put(struct snd_kcontrol *kcontrol,
 	struct via_spec *spec = codec->spec;
 	hda_nid_t nid = kcontrol->private_value;
 	unsigned int pinsel = ucontrol->value.enumerated.item[0];
-	unsigned int parm0, parm1;
 	/* Get Independent Mode index of headphone pin widget */
 	spec->hp_independent_mode = spec->hp_independent_mode_index == pinsel
 		? 1 : 0;
-	if (spec->codec_type == VT1718S) {
+	if (spec->codec_type == VT1718S)
 		snd_hda_codec_write(codec, nid, 0,
 				    AC_VERB_SET_CONNECT_SEL, pinsel ? 2 : 0);
-		/* Set correct mute switch for MW3 */
-		parm0 = spec->hp_independent_mode ?
-			       AMP_IN_UNMUTE(0) : AMP_IN_MUTE(0);
-		parm1 = spec->hp_independent_mode ?
-			       AMP_IN_MUTE(1) : AMP_IN_UNMUTE(1);
-		snd_hda_codec_write(codec, 0x1b, 0,
-				    AC_VERB_SET_AMP_GAIN_MUTE, parm0);
-		snd_hda_codec_write(codec, 0x1b, 0,
-				    AC_VERB_SET_AMP_GAIN_MUTE, parm1);
-	}
 	else
 		snd_hda_codec_write(codec, nid, 0,
 				    AC_VERB_SET_CONNECT_SEL, pinsel);
@@ -4294,6 +4283,9 @@ static const struct hda_verb vt1718S_volume_init_verbs[] = {
 	{0x21, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(2)},
 	{0x21, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(3)},
 	{0x21, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(5)},
+
+	/* Setup default input of Front HP to MW9 */
+	{0x28, AC_VERB_SET_CONNECT_SEL, 0x1},
 	/* PW9 PW10 Output enable */
 	{0x2d, AC_VERB_SET_PIN_WIDGET_CONTROL, AC_PINCTL_OUT_EN},
 	{0x2e, AC_VERB_SET_PIN_WIDGET_CONTROL, AC_PINCTL_OUT_EN},
@@ -4302,10 +4294,10 @@ static const struct hda_verb vt1718S_volume_init_verbs[] = {
 	/* Enable Boost Volume backdoor */
 	{0x1, 0xf88, 0x8},
 	/* MW0/1/2/3/4: un-mute index 0 (AOWx), mute index 1 (MW9) */
-	{0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+	{0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
 	{0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
 	{0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
-	{0x1b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+	{0x1b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
 	{0x1c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
 	{0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
 	{0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
@@ -4315,6 +4307,8 @@ static const struct hda_verb vt1718S_volume_init_verbs[] = {
 	/* set MUX1 = 2 (AOW4), MUX2 = 1 (AOW3) */
 	{0x34, AC_VERB_SET_CONNECT_SEL, 0x2},
 	{0x35, AC_VERB_SET_CONNECT_SEL, 0x1},
+	/* Unmute MW4's index 0 */
+	{0x1c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
 	{ }
 };
 
@@ -4462,19 +4456,6 @@ static int vt1718S_auto_create_multi_out_ctls(struct via_spec *spec,
 			if (err < 0)
 				return err;
 		} else if (i == AUTO_SEQ_FRONT) {
-			/* add control to mixer index 0 */
-			err = via_add_control(spec, VIA_CTL_WIDGET_VOL,
-					      "Master Front Playback Volume",
-					      HDA_COMPOSE_AMP_VAL(0x21, 3, 5,
-								  HDA_INPUT));
-			if (err < 0)
-				return err;
-			err = via_add_control(spec, VIA_CTL_WIDGET_MUTE,
-					      "Master Front Playback Switch",
-					      HDA_COMPOSE_AMP_VAL(0x21, 3, 5,
-								  HDA_INPUT));
-			if (err < 0)
-				return err;
 			/* Front */
 			sprintf(name, "%s Playback Volume", chname[i]);
 			err = via_add_control(
diff --git a/trunk/sound/soc/codecs/wm8991.c b/trunk/sound/soc/codecs/wm8991.c
index 6af23d06870f..3c2ee1bb73cd 100644
--- a/trunk/sound/soc/codecs/wm8991.c
+++ b/trunk/sound/soc/codecs/wm8991.c
@@ -13,6 +13,7 @@
 
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/delay.h>
diff --git a/trunk/sound/soc/imx/Kconfig b/trunk/sound/soc/imx/Kconfig
index bb699bb55a50..d8f130d39dd9 100644
--- a/trunk/sound/soc/imx/Kconfig
+++ b/trunk/sound/soc/imx/Kconfig
@@ -11,6 +11,9 @@ menuconfig SND_IMX_SOC
 
 if SND_IMX_SOC
 
+config SND_MXC_SOC_SSI
+	tristate
+
 config SND_MXC_SOC_FIQ
 	tristate
 
@@ -21,6 +24,7 @@ config SND_MXC_SOC_WM1133_EV1
 	tristate "Audio on the the i.MX31ADS with WM1133-EV1 fitted"
 	depends on MACH_MX31ADS_WM1133_EV1 && EXPERIMENTAL
 	select SND_SOC_WM8350
+	select SND_MXC_SOC_SSI
 	select SND_MXC_SOC_FIQ
 	help
 	  Enable support for audio on the i.MX31ADS with the WM1133-EV1
@@ -30,6 +34,7 @@ config SND_SOC_MX27VIS_AIC32X4
 	tristate "SoC audio support for Visstrim M10 boards"
 	depends on MACH_IMX27_VISSTRIM_M10
 	select SND_SOC_TVL320AIC32X4
+	select SND_MXC_SOC_SSI
 	select SND_MXC_SOC_MX2
 	help
 	  Say Y if you want to add support for SoC audio on Visstrim SM10
@@ -39,6 +44,7 @@ config SND_SOC_PHYCORE_AC97
 	tristate "SoC Audio support for Phytec phyCORE (and phyCARD) boards"
 	depends on MACH_PCM043 || MACH_PCA100
 	select SND_SOC_WM9712
+	select SND_MXC_SOC_SSI
 	select SND_MXC_SOC_FIQ
 	help
 	  Say Y if you want to add support for SoC audio on Phytec phyCORE
@@ -51,6 +57,7 @@ config SND_SOC_EUKREA_TLV320
 		|| MACH_EUKREA_MBIMXSD35_BASEBOARD \
 		|| MACH_EUKREA_MBIMXSD51_BASEBOARD
 	select SND_SOC_TLV320AIC23
+	select SND_MXC_SOC_SSI
 	select SND_MXC_SOC_FIQ
 	help
 	  Enable I2S based access to the TLV320AIC23B codec attached
diff --git a/trunk/sound/soc/imx/imx-pcm-dma-mx2.c b/trunk/sound/soc/imx/imx-pcm-dma-mx2.c
index 4173b3d87f97..aab7765f401a 100644
--- a/trunk/sound/soc/imx/imx-pcm-dma-mx2.c
+++ b/trunk/sound/soc/imx/imx-pcm-dma-mx2.c
@@ -337,5 +337,3 @@ static void __exit snd_imx_pcm_exit(void)
 	platform_driver_unregister(&imx_pcm_driver);
 }
 module_exit(snd_imx_pcm_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:imx-pcm-audio");
diff --git a/trunk/sound/soc/imx/imx-ssi.c b/trunk/sound/soc/imx/imx-ssi.c
index 61fceb09cdb5..5b13feca7537 100644
--- a/trunk/sound/soc/imx/imx-ssi.c
+++ b/trunk/sound/soc/imx/imx-ssi.c
@@ -774,4 +774,4 @@ module_exit(imx_ssi_exit);
 MODULE_AUTHOR("Sascha Hauer, <s.hauer@pengutronix.de>");
 MODULE_DESCRIPTION("i.MX I2S/ac97 SoC Interface");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:imx-ssi");
+
diff --git a/trunk/sound/soc/pxa/pxa2xx-pcm.c b/trunk/sound/soc/pxa/pxa2xx-pcm.c
index fab20a54e863..2ce0b2d891d5 100644
--- a/trunk/sound/soc/pxa/pxa2xx-pcm.c
+++ b/trunk/sound/soc/pxa/pxa2xx-pcm.c
@@ -95,14 +95,14 @@ static int pxa2xx_soc_pcm_new(struct snd_card *card, struct snd_soc_dai *dai,
 	if (!card->dev->coherent_dma_mask)
 		card->dev->coherent_dma_mask = DMA_BIT_MASK(32);
 
-	if (pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream) {
+	if (dai->driver->playback.channels_min) {
 		ret = pxa2xx_pcm_preallocate_dma_buffer(pcm,
 			SNDRV_PCM_STREAM_PLAYBACK);
 		if (ret)
 			goto out;
 	}
 
-	if (pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream) {
+	if (dai->driver->capture.channels_min) {
 		ret = pxa2xx_pcm_preallocate_dma_buffer(pcm,
 			SNDRV_PCM_STREAM_CAPTURE);
 		if (ret)
diff --git a/trunk/sound/soc/soc-cache.c b/trunk/sound/soc/soc-cache.c
index 039b9532b270..c005ceb70c9d 100644
--- a/trunk/sound/soc/soc-cache.c
+++ b/trunk/sound/soc/soc-cache.c
@@ -409,6 +409,9 @@ int snd_soc_codec_set_cache_io(struct snd_soc_codec *codec,
 	codec->bulk_write_raw = snd_soc_hw_bulk_write_raw;
 
 	switch (control) {
+	case SND_SOC_CUSTOM:
+		break;
+
 	case SND_SOC_I2C:
 #if defined(CONFIG_I2C) || (defined(CONFIG_I2C_MODULE) && defined(MODULE))
 		codec->hw_write = (hw_write_t)i2c_master_send;
diff --git a/trunk/tools/perf/Documentation/perf-annotate.txt b/trunk/tools/perf/Documentation/perf-annotate.txt
index 85c5f026930d..6f5a498608b2 100644
--- a/trunk/tools/perf/Documentation/perf-annotate.txt
+++ b/trunk/tools/perf/Documentation/perf-annotate.txt
@@ -66,12 +66,6 @@ OPTIONS
 	used. This interfaces starts by centering on the line with more
 	samples, TAB/UNTAB cycles through the lines with more samples.
 
--c::
---cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
-	be provided as a comma-separated list with no space: 0,1. Ranges of
-	CPUs are specified with -: 0-2. Default is to report samples on all
-	CPUs.
-
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/trunk/tools/perf/Documentation/perf-report.txt b/trunk/tools/perf/Documentation/perf-report.txt
index 04253c07d19a..8ba03d6e5398 100644
--- a/trunk/tools/perf/Documentation/perf-report.txt
+++ b/trunk/tools/perf/Documentation/perf-report.txt
@@ -80,24 +80,15 @@ OPTIONS
 --dump-raw-trace::
         Dump raw trace in ASCII.
 
--g [type,min,order]::
+-g [type,min]::
 --call-graph::
-        Display call chains using type, min percent threshold and order.
+        Display call chains using type and min percent threshold.
 	type can be either:
 	- flat: single column, linear exposure of call chains.
 	- graph: use a graph tree, displaying absolute overhead rates.
 	- fractal: like graph, but displays relative rates. Each branch of
 		 the tree is considered as a new profiled object. +
-
-	order can be either:
-	- callee: callee based call graph.
-	- caller: inverted caller based call graph.
-
-	Default: fractal,0.5,callee.
-
--G::
---inverted::
-        alias for inverted caller based call graph.
+	Default: fractal,0.5.
 
 --pretty=<key>::
         Pretty printing style.  key: normal, raw
@@ -128,12 +119,6 @@ OPTIONS
 --symfs=<directory>::
         Look for files with symbols relative to this directory.
 
--c::
---cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
-	be provided as a comma-separated list with no space: 0,1. Ranges of
-	CPUs are specified with -: 0-2. Default is to report samples on all
-	CPUs.
-
 SEE ALSO
 --------
 linkperf:perf-stat[1]
diff --git a/trunk/tools/perf/Documentation/perf-script.txt b/trunk/tools/perf/Documentation/perf-script.txt
index db017867d9e8..86c87e214b11 100644
--- a/trunk/tools/perf/Documentation/perf-script.txt
+++ b/trunk/tools/perf/Documentation/perf-script.txt
@@ -115,10 +115,10 @@ OPTIONS
 -f::
 --fields::
         Comma separated list of fields to print. Options are:
-        comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr.
-        Field list can be prepended with the type, trace, sw or hw,
+        comm, tid, pid, time, cpu, event, trace, sym. Field
+        list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
-        e.g., -f sw:comm,tid,time,ip,sym  and -f trace:time,cpu,trace
+        e.g., -f sw:comm,tid,time,sym  and -f trace:time,cpu,trace
 
 		perf script -f <fields>
 
@@ -132,17 +132,17 @@ OPTIONS
 	The arguments are processed in the order received. A later usage can
 	reset a prior request. e.g.:
     
-		-f trace: -f comm,tid,time,ip,sym
+		-f trace: -f comm,tid,time,sym
     
 	The first -f suppresses trace events (field list is ""), but then the
-	second invocation sets the fields to comm,tid,time,ip,sym. In this case a
+	second invocation sets the fields to comm,tid,time,sym. In this case a
 	warning is given to the user:
     
 		"Overriding previous field request for all events."
     
 	Alternativey, consider the order:
     
-		-f comm,tid,time,ip,sym -f trace:
+		-f comm,tid,time,sym -f trace:
     
 	The first -f sets the fields for all events and the second -f
 	suppresses trace events. The user is given a warning message about
@@ -182,12 +182,6 @@ OPTIONS
 --hide-call-graph::
         When printing symbols do not display call chain.
 
--c::
---cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
-	be provided as a comma-separated list with no space: 0,1. Ranges of
-	CPUs are specified with -: 0-2. Default is to report samples on all
-	CPUs.
-
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/trunk/tools/perf/builtin-annotate.c b/trunk/tools/perf/builtin-annotate.c
index 555aefd7fe01..7b139e1e7e86 100644
--- a/trunk/tools/perf/builtin-annotate.c
+++ b/trunk/tools/perf/builtin-annotate.c
@@ -28,8 +28,6 @@
 #include "util/hist.h"
 #include "util/session.h"
 
-#include <linux/bitmap.h>
-
 static char		const *input_name = "perf.data";
 
 static bool		force, use_tui, use_stdio;
@@ -40,9 +38,6 @@ static bool		print_line;
 
 static const char *sym_hist_filter;
 
-static const char	*cpu_list;
-static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
-
 static int perf_evlist__add_sample(struct perf_evlist *evlist,
 				   struct perf_sample *sample,
 				   struct perf_evsel *evsel,
@@ -95,9 +90,6 @@ static int process_sample_event(union perf_event *event,
 		return -1;
 	}
 
-	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
-		return 0;
-
 	if (!al.filtered &&
 	    perf_evlist__add_sample(session->evlist, sample, evsel, &al)) {
 		pr_warning("problem incrementing symbol count, "
@@ -185,12 +177,6 @@ static int __cmd_annotate(void)
 	if (session == NULL)
 		return -ENOMEM;
 
-	if (cpu_list) {
-		ret = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
-		if (ret)
-			goto out_delete;
-	}
-
 	ret = perf_session__process_events(session, &event_ops);
 	if (ret)
 		goto out_delete;
@@ -266,7 +252,6 @@ static const struct option options[] = {
 		    "print matching source lines (may be slow)"),
 	OPT_BOOLEAN('P', "full-paths", &full_paths,
 		    "Don't shorten the displayed pathnames"),
-	OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
 	OPT_END()
 };
 
diff --git a/trunk/tools/perf/builtin-report.c b/trunk/tools/perf/builtin-report.c
index f854efda7686..287a173523a7 100644
--- a/trunk/tools/perf/builtin-report.c
+++ b/trunk/tools/perf/builtin-report.c
@@ -33,8 +33,6 @@
 #include "util/sort.h"
 #include "util/hist.h"
 
-#include <linux/bitmap.h>
-
 static char		const *input_name = "perf.data";
 
 static bool		force, use_tui, use_stdio;
@@ -47,13 +45,9 @@ static struct perf_read_values	show_threads_values;
 static const char	default_pretty_printing_style[] = "normal";
 static const char	*pretty_printing_style = default_pretty_printing_style;
 
-static char		callchain_default_opt[] = "fractal,0.5,callee";
-static bool		inverted_callchain;
+static char		callchain_default_opt[] = "fractal,0.5";
 static symbol_filter_t	annotate_init;
 
-static const char	*cpu_list;
-static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
-
 static int perf_session__add_hist_entry(struct perf_session *session,
 					struct addr_location *al,
 					struct perf_sample *sample,
@@ -122,9 +116,6 @@ static int process_sample_event(union perf_event *event,
 	if (al.filtered || (hide_unresolved && al.sym == NULL))
 		return 0;
 
-	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
-		return 0;
-
 	if (al.map != NULL)
 		al.map->dso->hit = 1;
 
@@ -271,12 +262,6 @@ static int __cmd_report(void)
 	if (session == NULL)
 		return -ENOMEM;
 
-	if (cpu_list) {
-		ret = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
-		if (ret)
-			goto out_delete;
-	}
-
 	if (show_threads)
 		perf_read_values_init(&show_threads_values);
 
@@ -401,29 +386,13 @@ parse_callchain_opt(const struct option *opt __used, const char *arg,
 	if (!tok)
 		goto setup;
 
+	tok2 = strtok(NULL, ",");
 	callchain_param.min_percent = strtod(tok, &endptr);
 	if (tok == endptr)
 		return -1;
 
-	/* get the print limit */
-	tok2 = strtok(NULL, ",");
-	if (!tok2)
-		goto setup;
-
-	if (tok2[0] != 'c') {
+	if (tok2)
 		callchain_param.print_limit = strtod(tok2, &endptr);
-		tok2 = strtok(NULL, ",");
-		if (!tok2)
-			goto setup;
-	}
-
-	/* get the call chain order */
-	if (!strcmp(tok2, "caller"))
-		callchain_param.order = ORDER_CALLER;
-	else if (!strcmp(tok2, "callee"))
-		callchain_param.order = ORDER_CALLEE;
-	else
-		return -1;
 setup:
 	if (callchain_register_param(&callchain_param) < 0) {
 		fprintf(stderr, "Can't register callchain params\n");
@@ -467,10 +436,9 @@ static const struct option options[] = {
 		   "regex filter to identify parent, see: '--sort parent'"),
 	OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
 		    "Only display entries with parent-match"),
-	OPT_CALLBACK_DEFAULT('g', "call-graph", NULL, "output_type,min_percent, call_order",
-		     "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold and callchain order. "
-		     "Default: fractal,0.5,callee", &parse_callchain_opt, callchain_default_opt),
-	OPT_BOOLEAN('G', "inverted", &inverted_callchain, "alias for inverted call graph"),
+	OPT_CALLBACK_DEFAULT('g', "call-graph", NULL, "output_type,min_percent",
+		     "Display callchains using output_type (graph, flat, fractal, or none) and min percent threshold. "
+		     "Default: fractal,0.5", &parse_callchain_opt, callchain_default_opt),
 	OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
 		   "only consider symbols in these dsos"),
 	OPT_STRING('C', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
@@ -487,7 +455,6 @@ static const struct option options[] = {
 		    "Only display entries resolved to a symbol"),
 	OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
 		    "Look for files with symbols relative to this directory"),
-	OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
 	OPT_END()
 };
 
@@ -500,9 +467,6 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
 	else if (use_tui)
 		use_browser = 1;
 
-	if (inverted_callchain)
-		callchain_param.order = ORDER_CALLER;
-
 	if (strcmp(input_name, "-") != 0)
 		setup_browser(true);
 	else
@@ -540,14 +504,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
 	if (parent_pattern != default_parent_pattern) {
 		if (sort_dimension__add("parent") < 0)
 			return -1;
-
-		/*
-		 * Only show the parent fields if we explicitly
-		 * sort that way. If we only use parent machinery
-		 * for filtering, we don't want it.
-		 */
-		if (!strstr(sort_order, "parent"))
-			sort_parent.elide = 1;
+		sort_parent.elide = 1;
 	} else
 		symbol_conf.exclude_other = false;
 
diff --git a/trunk/tools/perf/builtin-script.c b/trunk/tools/perf/builtin-script.c
index 09024ec2ab2e..22747de7234b 100644
--- a/trunk/tools/perf/builtin-script.c
+++ b/trunk/tools/perf/builtin-script.c
@@ -13,7 +13,6 @@
 #include "util/util.h"
 #include "util/evlist.h"
 #include "util/evsel.h"
-#include <linux/bitmap.h>
 
 static char const		*script_name;
 static char const		*generate_script_lang;
@@ -22,8 +21,6 @@ static u64			last_timestamp;
 static u64			nr_unordered;
 extern const struct option	record_options[];
 static bool			no_callchain;
-static const char		*cpu_list;
-static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
 
 enum perf_output_field {
 	PERF_OUTPUT_COMM            = 1U << 0,
@@ -33,10 +30,7 @@ enum perf_output_field {
 	PERF_OUTPUT_CPU             = 1U << 4,
 	PERF_OUTPUT_EVNAME          = 1U << 5,
 	PERF_OUTPUT_TRACE           = 1U << 6,
-	PERF_OUTPUT_IP              = 1U << 7,
-	PERF_OUTPUT_SYM             = 1U << 8,
-	PERF_OUTPUT_DSO             = 1U << 9,
-	PERF_OUTPUT_ADDR            = 1U << 10,
+	PERF_OUTPUT_SYM             = 1U << 7,
 };
 
 struct output_option {
@@ -50,10 +44,7 @@ struct output_option {
 	{.str = "cpu",   .field = PERF_OUTPUT_CPU},
 	{.str = "event", .field = PERF_OUTPUT_EVNAME},
 	{.str = "trace", .field = PERF_OUTPUT_TRACE},
-	{.str = "ip",    .field = PERF_OUTPUT_IP},
 	{.str = "sym",   .field = PERF_OUTPUT_SYM},
-	{.str = "dso",   .field = PERF_OUTPUT_DSO},
-	{.str = "addr",  .field = PERF_OUTPUT_ADDR},
 };
 
 /* default set to maintain compatibility with current format */
@@ -69,8 +60,7 @@ static struct {
 
 		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
 			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
-			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
-				  PERF_OUTPUT_SYM | PERF_OUTPUT_DSO,
+			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_SYM,
 
 		.invalid_fields = PERF_OUTPUT_TRACE,
 	},
@@ -80,8 +70,7 @@ static struct {
 
 		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
 			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
-			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
-				  PERF_OUTPUT_SYM | PERF_OUTPUT_DSO,
+			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_SYM,
 
 		.invalid_fields = PERF_OUTPUT_TRACE,
 	},
@@ -99,8 +88,7 @@ static struct {
 
 		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
 			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
-			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
-				  PERF_OUTPUT_SYM | PERF_OUTPUT_DSO,
+			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_SYM,
 
 		.invalid_fields = PERF_OUTPUT_TRACE,
 	},
@@ -169,9 +157,9 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
 		!perf_session__has_traces(session, "record -R"))
 		return -EINVAL;
 
-	if (PRINT_FIELD(IP)) {
+	if (PRINT_FIELD(SYM)) {
 		if (perf_event_attr__check_stype(attr, PERF_SAMPLE_IP, "IP",
-					   PERF_OUTPUT_IP))
+					   PERF_OUTPUT_SYM))
 			return -EINVAL;
 
 		if (!no_callchain &&
@@ -179,24 +167,6 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
 			symbol_conf.use_callchain = false;
 	}
 
-	if (PRINT_FIELD(ADDR) &&
-		perf_event_attr__check_stype(attr, PERF_SAMPLE_ADDR, "ADDR",
-				       PERF_OUTPUT_ADDR))
-		return -EINVAL;
-
-	if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
-		pr_err("Display of symbols requested but neither sample IP nor "
-			   "sample address\nis selected. Hence, no addresses to convert "
-		       "to symbols.\n");
-		return -EINVAL;
-	}
-	if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
-		pr_err("Display of DSO requested but neither sample IP nor "
-			   "sample address\nis selected. Hence, no addresses to convert "
-		       "to DSO.\n");
-		return -EINVAL;
-	}
-
 	if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) &&
 		perf_event_attr__check_stype(attr, PERF_SAMPLE_TID, "TID",
 				       PERF_OUTPUT_TID|PERF_OUTPUT_PID))
@@ -260,7 +230,7 @@ static void print_sample_start(struct perf_sample *sample,
 	if (PRINT_FIELD(COMM)) {
 		if (latency_format)
 			printf("%8.8s ", thread->comm);
-		else if (PRINT_FIELD(IP) && symbol_conf.use_callchain)
+		else if (PRINT_FIELD(SYM) && symbol_conf.use_callchain)
 			printf("%s ", thread->comm);
 		else
 			printf("%16s ", thread->comm);
@@ -301,63 +271,6 @@ static void print_sample_start(struct perf_sample *sample,
 	}
 }
 
-static bool sample_addr_correlates_sym(struct perf_event_attr *attr)
-{
-	if ((attr->type == PERF_TYPE_SOFTWARE) &&
-	    ((attr->config == PERF_COUNT_SW_PAGE_FAULTS) ||
-	     (attr->config == PERF_COUNT_SW_PAGE_FAULTS_MIN) ||
-	     (attr->config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)))
-		return true;
-
-	return false;
-}
-
-static void print_sample_addr(union perf_event *event,
-			  struct perf_sample *sample,
-			  struct perf_session *session,
-			  struct thread *thread,
-			  struct perf_event_attr *attr)
-{
-	struct addr_location al;
-	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-	const char *symname, *dsoname;
-
-	printf("%16" PRIx64, sample->addr);
-
-	if (!sample_addr_correlates_sym(attr))
-		return;
-
-	thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION,
-			      event->ip.pid, sample->addr, &al);
-	if (!al.map)
-		thread__find_addr_map(thread, session, cpumode, MAP__VARIABLE,
-				      event->ip.pid, sample->addr, &al);
-
-	al.cpu = sample->cpu;
-	al.sym = NULL;
-
-	if (al.map)
-		al.sym = map__find_symbol(al.map, al.addr, NULL);
-
-	if (PRINT_FIELD(SYM)) {
-		if (al.sym && al.sym->name)
-			symname = al.sym->name;
-		else
-			symname = "";
-
-		printf(" %16s", symname);
-	}
-
-	if (PRINT_FIELD(DSO)) {
-		if (al.map && al.map->dso && al.map->dso->name)
-			dsoname = al.map->dso->name;
-		else
-			dsoname = "";
-
-		printf(" (%s)", dsoname);
-	}
-}
-
 static void process_event(union perf_event *event __unused,
 			  struct perf_sample *sample,
 			  struct perf_evsel *evsel,
@@ -375,16 +288,12 @@ static void process_event(union perf_event *event __unused,
 		print_trace_event(sample->cpu, sample->raw_data,
 				  sample->raw_size);
 
-	if (PRINT_FIELD(ADDR))
-		print_sample_addr(event, sample, session, thread, attr);
-
-	if (PRINT_FIELD(IP)) {
+	if (PRINT_FIELD(SYM)) {
 		if (!symbol_conf.use_callchain)
 			printf(" ");
 		else
 			printf("\n");
-		perf_session__print_ip(event, sample, session,
-					      PRINT_FIELD(SYM), PRINT_FIELD(DSO));
+		perf_session__print_symbols(event, sample, session);
 	}
 
 	printf("\n");
@@ -456,10 +365,6 @@ static int process_sample_event(union perf_event *event,
 		last_timestamp = sample->time;
 		return 0;
 	}
-
-	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
-		return 0;
-
 	scripting_ops->process_event(event, sample, evsel, session, thread);
 
 	session->hists.stats.total_period += sample->period;
@@ -1080,9 +985,8 @@ static const struct option options[] = {
 	OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
 		    "Look for files with symbols relative to this directory"),
 	OPT_CALLBACK('f', "fields", NULL, "str",
-		     "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr",
+		     "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,sym",
 		     parse_output_fields),
-	OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
 
 	OPT_END()
 };
@@ -1263,11 +1167,6 @@ int cmd_script(int argc, const char **argv, const char *prefix __used)
 	if (session == NULL)
 		return -ENOMEM;
 
-	if (cpu_list) {
-		if (perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap))
-			return -1;
-	}
-
 	if (!no_callchain)
 		symbol_conf.use_callchain = true;
 	else
diff --git a/trunk/tools/perf/builtin-stat.c b/trunk/tools/perf/builtin-stat.c
index 1d08c8084cc4..a9f06715e44d 100644
--- a/trunk/tools/perf/builtin-stat.c
+++ b/trunk/tools/perf/builtin-stat.c
@@ -61,8 +61,6 @@
 #include <locale.h>
 
 #define DEFAULT_SEPARATOR	" "
-#define CNTR_NOT_SUPPORTED	"<not supported>"
-#define CNTR_NOT_COUNTED	"<not counted>"
 
 static struct perf_event_attr default_attrs[] = {
 
@@ -450,7 +448,6 @@ static int run_perf_stat(int argc __used, const char **argv)
 				if (verbose)
 					ui__warning("%s event is not supported by the kernel.\n",
 						    event_name(counter));
-				counter->supported = false;
 				continue;
 			}
 
@@ -469,7 +466,6 @@ static int run_perf_stat(int argc __used, const char **argv)
 			die("Not all events could be opened.\n");
 			return -1;
 		}
-		counter->supported = true;
 	}
 
 	if (perf_evlist__set_filters(evsel_list)) {
@@ -517,10 +513,7 @@ static void print_noise_pct(double total, double avg)
 	if (avg)
 		pct = 100.0*total/avg;
 
-	if (csv_output)
-		fprintf(stderr, "%s%.2f%%", csv_sep, pct);
-	else
-		fprintf(stderr, "  ( +-%6.2f%% )", pct);
+	fprintf(stderr, "  ( +-%6.2f%% )", pct);
 }
 
 static void print_noise(struct perf_evsel *evsel, double avg)
@@ -868,7 +861,7 @@ static void print_counter_aggr(struct perf_evsel *counter)
 	if (scaled == -1) {
 		fprintf(stderr, "%*s%s%*s",
 			csv_output ? 0 : 18,
-			counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
+			"<not counted>",
 			csv_sep,
 			csv_output ? 0 : -24,
 			event_name(counter));
@@ -885,13 +878,13 @@ static void print_counter_aggr(struct perf_evsel *counter)
 	else
 		abs_printout(-1, counter, avg);
 
-	print_noise(counter, avg);
-
 	if (csv_output) {
 		fputc('\n', stderr);
 		return;
 	}
 
+	print_noise(counter, avg);
+
 	if (scaled) {
 		double avg_enabled, avg_running;
 
@@ -921,8 +914,7 @@ static void print_counter(struct perf_evsel *counter)
 				csv_output ? 0 : -4,
 				evsel_list->cpus->map[cpu], csv_sep,
 				csv_output ? 0 : 18,
-				counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
-				csv_sep,
+				"<not counted>", csv_sep,
 				csv_output ? 0 : -24,
 				event_name(counter));
 
diff --git a/trunk/tools/perf/util/callchain.h b/trunk/tools/perf/util/callchain.h
index 9b4ff16cac96..1a79df9f739f 100644
--- a/trunk/tools/perf/util/callchain.h
+++ b/trunk/tools/perf/util/callchain.h
@@ -14,11 +14,6 @@ enum chain_mode {
 	CHAIN_GRAPH_REL
 };
 
-enum chain_order {
-	ORDER_CALLER,
-	ORDER_CALLEE
-};
-
 struct callchain_node {
 	struct callchain_node	*parent;
 	struct list_head	siblings;
@@ -46,7 +41,6 @@ struct callchain_param {
 	u32			print_limit;
 	double			min_percent;
 	sort_chain_func_t	sort;
-	enum chain_order	order;
 };
 
 struct callchain_list {
diff --git a/trunk/tools/perf/util/evsel.c b/trunk/tools/perf/util/evsel.c
index a03a36b7908a..0239eb87b232 100644
--- a/trunk/tools/perf/util/evsel.c
+++ b/trunk/tools/perf/util/evsel.c
@@ -377,7 +377,6 @@ int perf_event__parse_sample(const union perf_event *event, u64 type,
 		array++;
 	}
 
-	data->addr = 0;
 	if (type & PERF_SAMPLE_ADDR) {
 		data->addr = *array;
 		array++;
diff --git a/trunk/tools/perf/util/evsel.h b/trunk/tools/perf/util/evsel.h
index e9a31554e265..7e9366e4490b 100644
--- a/trunk/tools/perf/util/evsel.h
+++ b/trunk/tools/perf/util/evsel.h
@@ -61,7 +61,6 @@ struct perf_evsel {
 		off_t		id_offset;
 	};
 	struct cgroup_sel	*cgrp;
-	bool 			supported;
 };
 
 struct cpu_map;
diff --git a/trunk/tools/perf/util/hist.c b/trunk/tools/perf/util/hist.c
index 677e1da6bb3e..627a02e03c57 100644
--- a/trunk/tools/perf/util/hist.c
+++ b/trunk/tools/perf/util/hist.c
@@ -14,8 +14,7 @@ enum hist_filter {
 
 struct callchain_param	callchain_param = {
 	.mode	= CHAIN_GRAPH_REL,
-	.min_percent = 0.5,
-	.order  = ORDER_CALLEE
+	.min_percent = 0.5
 };
 
 u16 hists__col_len(struct hists *self, enum hist_column col)
@@ -847,9 +846,6 @@ size_t hists__fprintf(struct hists *self, struct hists *pair,
 	for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 
-		if (h->filtered)
-			continue;
-
 		if (show_displacement) {
 			if (h->pair != NULL)
 				displacement = ((long)h->pair->position -
diff --git a/trunk/tools/perf/util/python.c b/trunk/tools/perf/util/python.c
index 8e0b5a39d8a7..a9ac0504aabd 100644
--- a/trunk/tools/perf/util/python.c
+++ b/trunk/tools/perf/util/python.c
@@ -247,7 +247,7 @@ struct pyrf_cpu_map {
 static int pyrf_cpu_map__init(struct pyrf_cpu_map *pcpus,
 			      PyObject *args, PyObject *kwargs)
 {
-	static char *kwlist[] = { "cpustr", NULL };
+	static char *kwlist[] = { "cpustr", NULL, NULL, };
 	char *cpustr = NULL;
 
 	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|s",
@@ -316,7 +316,7 @@ struct pyrf_thread_map {
 static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads,
 				 PyObject *args, PyObject *kwargs)
 {
-	static char *kwlist[] = { "pid", "tid", NULL };
+	static char *kwlist[] = { "pid", "tid", NULL, NULL, };
 	int pid = -1, tid = -1;
 
 	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii",
@@ -418,9 +418,7 @@ static int pyrf_evsel__init(struct pyrf_evsel *pevsel,
 		"wakeup_events",
 		"bp_type",
 		"bp_addr",
-		"bp_len",
-		 NULL
-	};
+		"bp_len", NULL, NULL, };
 	u64 sample_period = 0;
 	u32 disabled = 0,
 	    inherit = 0,
@@ -501,7 +499,7 @@ static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel,
 	struct thread_map *threads = NULL;
 	PyObject *pcpus = NULL, *pthreads = NULL;
 	int group = 0, inherit = 0;
-	static char *kwlist[] = { "cpus", "threads", "group", "inherit", NULL };
+	static char *kwlist[] = {"cpus", "threads", "group", "inherit", NULL, NULL};
 
 	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOii", kwlist,
 					 &pcpus, &pthreads, &group, &inherit))
@@ -584,7 +582,8 @@ static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist,
 				   PyObject *args, PyObject *kwargs)
 {
 	struct perf_evlist *evlist = &pevlist->evlist;
-	static char *kwlist[] = { "pages", "overwrite", NULL };
+	static char *kwlist[] = {"pages", "overwrite",
+				  NULL, NULL};
 	int pages = 128, overwrite = false;
 
 	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", kwlist,
@@ -604,7 +603,7 @@ static PyObject *pyrf_evlist__poll(struct pyrf_evlist *pevlist,
 				   PyObject *args, PyObject *kwargs)
 {
 	struct perf_evlist *evlist = &pevlist->evlist;
-	static char *kwlist[] = { "timeout", NULL };
+	static char *kwlist[] = {"timeout", NULL, NULL};
 	int timeout = -1, n;
 
 	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &timeout))
@@ -675,7 +674,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
 	struct perf_evlist *evlist = &pevlist->evlist;
 	union perf_event *event;
 	int sample_id_all = 1, cpu;
-	static char *kwlist[] = { "cpu", "sample_id_all", NULL };
+	static char *kwlist[] = {"cpu", "sample_id_all", NULL, NULL};
 	int err;
 
 	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist,
diff --git a/trunk/tools/perf/util/session.c b/trunk/tools/perf/util/session.c
index 080e5336d89f..f5a8fbdd3f76 100644
--- a/trunk/tools/perf/util/session.c
+++ b/trunk/tools/perf/util/session.c
@@ -12,7 +12,6 @@
 #include "session.h"
 #include "sort.h"
 #include "util.h"
-#include "cpumap.h"
 
 static int perf_session__open(struct perf_session *self, bool force)
 {
@@ -248,14 +247,9 @@ int perf_session__resolve_callchain(struct perf_session *self,
 	callchain_cursor_reset(&self->callchain_cursor);
 
 	for (i = 0; i < chain->nr; i++) {
-		u64 ip;
+		u64 ip = chain->ips[i];
 		struct addr_location al;
 
-		if (callchain_param.order == ORDER_CALLEE)
-			ip = chain->ips[i];
-		else
-			ip = chain->ips[chain->nr - i - 1];
-
 		if (ip >= PERF_CONTEXT_MAX) {
 			switch (ip) {
 			case PERF_CONTEXT_HV:
@@ -714,9 +708,9 @@ static void dump_sample(struct perf_session *session, union perf_event *event,
 	if (!dump_trace)
 		return;
 
-	printf("(IP, %d): %d/%d: %#" PRIx64 " period: %" PRIu64 " addr: %#" PRIx64 "\n",
+	printf("(IP, %d): %d/%d: %#" PRIx64 " period: %" PRIu64 "\n",
 	       event->header.misc, sample->pid, sample->tid, sample->ip,
-	       sample->period, sample->addr);
+	       sample->period);
 
 	if (session->sample_type & PERF_SAMPLE_CALLCHAIN)
 		callchain__printf(sample);
@@ -1208,10 +1202,9 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
 	return NULL;
 }
 
-void perf_session__print_ip(union perf_event *event,
-			    struct perf_sample *sample,
-			    struct perf_session *session,
-			    int print_sym, int print_dso)
+void perf_session__print_symbols(union perf_event *event,
+				struct perf_sample *sample,
+				struct perf_session *session)
 {
 	struct addr_location al;
 	const char *symname, *dsoname;
@@ -1240,83 +1233,32 @@ void perf_session__print_ip(union perf_event *event,
 			if (!node)
 				break;
 
-			printf("\t%16" PRIx64, node->ip);
-			if (print_sym) {
-				if (node->sym && node->sym->name)
-					symname = node->sym->name;
-				else
-					symname = "";
-
-				printf(" %s", symname);
-			}
-			if (print_dso) {
-				if (node->map && node->map->dso && node->map->dso->name)
-					dsoname = node->map->dso->name;
-				else
-					dsoname = "";
-
-				printf(" (%s)", dsoname);
-			}
-			printf("\n");
-
-			callchain_cursor_advance(cursor);
-		}
-
-	} else {
-		printf("%16" PRIx64, al.addr);
-		if (print_sym) {
-			if (al.sym && al.sym->name)
-				symname = al.sym->name;
+			if (node->sym && node->sym->name)
+				symname = node->sym->name;
 			else
 				symname = "";
 
-			printf(" %s", symname);
-		}
-
-		if (print_dso) {
-			if (al.map && al.map->dso && al.map->dso->name)
-				dsoname = al.map->dso->name;
+			if (node->map && node->map->dso && node->map->dso->name)
+				dsoname = node->map->dso->name;
 			else
 				dsoname = "";
 
-			printf(" (%s)", dsoname);
-		}
-	}
-}
-
-int perf_session__cpu_bitmap(struct perf_session *session,
-			     const char *cpu_list, unsigned long *cpu_bitmap)
-{
-	int i;
-	struct cpu_map *map;
-
-	for (i = 0; i < PERF_TYPE_MAX; ++i) {
-		struct perf_evsel *evsel;
-
-		evsel = perf_session__find_first_evtype(session, i);
-		if (!evsel)
-			continue;
+			printf("\t%16" PRIx64 " %s (%s)\n", node->ip, symname, dsoname);
 
-		if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) {
-			pr_err("File does not contain CPU events. "
-			       "Remove -c option to proceed.\n");
-			return -1;
+			callchain_cursor_advance(cursor);
 		}
-	}
 
-	map = cpu_map__new(cpu_list);
-
-	for (i = 0; i < map->nr; i++) {
-		int cpu = map->map[i];
+	} else {
+		if (al.sym && al.sym->name)
+			symname = al.sym->name;
+		else
+			symname = "";
 
-		if (cpu >= MAX_NR_CPUS) {
-			pr_err("Requested CPU %d too large. "
-			       "Consider raising MAX_NR_CPUS\n", cpu);
-			return -1;
-		}
+		if (al.map && al.map->dso && al.map->dso->name)
+			dsoname = al.map->dso->name;
+		else
+			dsoname = "";
 
-		set_bit(cpu, cpu_bitmap);
+		printf("%16" PRIx64 " %s (%s)", al.addr, symname, dsoname);
 	}
-
-	return 0;
 }
diff --git a/trunk/tools/perf/util/session.h b/trunk/tools/perf/util/session.h
index 5de754f4b7f3..66d4e1490879 100644
--- a/trunk/tools/perf/util/session.h
+++ b/trunk/tools/perf/util/session.h
@@ -167,12 +167,8 @@ static inline int perf_session__parse_sample(struct perf_session *session,
 struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
 					    unsigned int type);
 
-void perf_session__print_ip(union perf_event *event,
+void perf_session__print_symbols(union perf_event *event,
 				 struct perf_sample *sample,
-				 struct perf_session *session,
-				 int print_sym, int print_dso);
-
-int perf_session__cpu_bitmap(struct perf_session *session,
-			     const char *cpu_list, unsigned long *cpu_bitmap);
+				 struct perf_session *session);
 
 #endif /* __PERF_SESSION_H */
diff --git a/trunk/tools/perf/util/sort.c b/trunk/tools/perf/util/sort.c
index 401e220566fd..f44fa541d56e 100644
--- a/trunk/tools/perf/util/sort.c
+++ b/trunk/tools/perf/util/sort.c
@@ -15,6 +15,95 @@ char * field_sep;
 
 LIST_HEAD(hist_entry__sort_list);
 
+static int hist_entry__thread_snprintf(struct hist_entry *self, char *bf,
+				       size_t size, unsigned int width);
+static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf,
+				     size_t size, unsigned int width);
+static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf,
+				    size_t size, unsigned int width);
+static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
+				    size_t size, unsigned int width);
+static int hist_entry__parent_snprintf(struct hist_entry *self, char *bf,
+				       size_t size, unsigned int width);
+static int hist_entry__cpu_snprintf(struct hist_entry *self, char *bf,
+				    size_t size, unsigned int width);
+
+struct sort_entry sort_thread = {
+	.se_header	= "Command:  Pid",
+	.se_cmp		= sort__thread_cmp,
+	.se_snprintf	= hist_entry__thread_snprintf,
+	.se_width_idx	= HISTC_THREAD,
+};
+
+struct sort_entry sort_comm = {
+	.se_header	= "Command",
+	.se_cmp		= sort__comm_cmp,
+	.se_collapse	= sort__comm_collapse,
+	.se_snprintf	= hist_entry__comm_snprintf,
+	.se_width_idx	= HISTC_COMM,
+};
+
+struct sort_entry sort_dso = {
+	.se_header	= "Shared Object",
+	.se_cmp		= sort__dso_cmp,
+	.se_snprintf	= hist_entry__dso_snprintf,
+	.se_width_idx	= HISTC_DSO,
+};
+
+struct sort_entry sort_sym = {
+	.se_header	= "Symbol",
+	.se_cmp		= sort__sym_cmp,
+	.se_snprintf	= hist_entry__sym_snprintf,
+	.se_width_idx	= HISTC_SYMBOL,
+};
+
+struct sort_entry sort_parent = {
+	.se_header	= "Parent symbol",
+	.se_cmp		= sort__parent_cmp,
+	.se_snprintf	= hist_entry__parent_snprintf,
+	.se_width_idx	= HISTC_PARENT,
+};
+ 
+struct sort_entry sort_cpu = {
+	.se_header      = "CPU",
+	.se_cmp	        = sort__cpu_cmp,
+	.se_snprintf    = hist_entry__cpu_snprintf,
+	.se_width_idx	= HISTC_CPU,
+};
+
+struct sort_dimension {
+	const char		*name;
+	struct sort_entry	*entry;
+	int			taken;
+};
+
+static struct sort_dimension sort_dimensions[] = {
+	{ .name = "pid",	.entry = &sort_thread,	},
+	{ .name = "comm",	.entry = &sort_comm,	},
+	{ .name = "dso",	.entry = &sort_dso,	},
+	{ .name = "symbol",	.entry = &sort_sym,	},
+	{ .name = "parent",	.entry = &sort_parent,	},
+	{ .name = "cpu",	.entry = &sort_cpu,	},
+};
+
+int64_t cmp_null(void *l, void *r)
+{
+	if (!l && !r)
+		return 0;
+	else if (!l)
+		return -1;
+	else
+		return 1;
+}
+
+/* --sort pid */
+
+int64_t
+sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return right->thread->pid - left->thread->pid;
+}
+
 static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...)
 {
 	int n;
@@ -36,24 +125,6 @@ static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...)
 	return n;
 }
 
-static int64_t cmp_null(void *l, void *r)
-{
-	if (!l && !r)
-		return 0;
-	else if (!l)
-		return -1;
-	else
-		return 1;
-}
-
-/* --sort pid */
-
-static int64_t
-sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-	return right->thread->pid - left->thread->pid;
-}
-
 static int hist_entry__thread_snprintf(struct hist_entry *self, char *bf,
 				       size_t size, unsigned int width)
 {
@@ -61,50 +132,15 @@ static int hist_entry__thread_snprintf(struct hist_entry *self, char *bf,
 			      self->thread->comm ?: "", self->thread->pid);
 }
 
-struct sort_entry sort_thread = {
-	.se_header	= "Command:  Pid",
-	.se_cmp		= sort__thread_cmp,
-	.se_snprintf	= hist_entry__thread_snprintf,
-	.se_width_idx	= HISTC_THREAD,
-};
-
-/* --sort comm */
-
-static int64_t
-sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-	return right->thread->pid - left->thread->pid;
-}
-
-static int64_t
-sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
-{
-	char *comm_l = left->thread->comm;
-	char *comm_r = right->thread->comm;
-
-	if (!comm_l || !comm_r)
-		return cmp_null(comm_l, comm_r);
-
-	return strcmp(comm_l, comm_r);
-}
-
 static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf,
 				     size_t size, unsigned int width)
 {
 	return repsep_snprintf(bf, size, "%*s", width, self->thread->comm);
 }
 
-struct sort_entry sort_comm = {
-	.se_header	= "Command",
-	.se_cmp		= sort__comm_cmp,
-	.se_collapse	= sort__comm_collapse,
-	.se_snprintf	= hist_entry__comm_snprintf,
-	.se_width_idx	= HISTC_COMM,
-};
-
 /* --sort dso */
 
-static int64_t
+int64_t
 sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
 {
 	struct dso *dso_l = left->ms.map ? left->ms.map->dso : NULL;
@@ -137,16 +173,9 @@ static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf,
 	return repsep_snprintf(bf, size, "%-*s", width, "[unknown]");
 }
 
-struct sort_entry sort_dso = {
-	.se_header	= "Shared Object",
-	.se_cmp		= sort__dso_cmp,
-	.se_snprintf	= hist_entry__dso_snprintf,
-	.se_width_idx	= HISTC_DSO,
-};
-
 /* --sort symbol */
 
-static int64_t
+int64_t
 sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
 {
 	u64 ip_l, ip_r;
@@ -182,16 +211,29 @@ static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
 	return ret;
 }
 
-struct sort_entry sort_sym = {
-	.se_header	= "Symbol",
-	.se_cmp		= sort__sym_cmp,
-	.se_snprintf	= hist_entry__sym_snprintf,
-	.se_width_idx	= HISTC_SYMBOL,
-};
+/* --sort comm */
+
+int64_t
+sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return right->thread->pid - left->thread->pid;
+}
+
+int64_t
+sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
+{
+	char *comm_l = left->thread->comm;
+	char *comm_r = right->thread->comm;
+
+	if (!comm_l || !comm_r)
+		return cmp_null(comm_l, comm_r);
+
+	return strcmp(comm_l, comm_r);
+}
 
 /* --sort parent */
 
-static int64_t
+int64_t
 sort__parent_cmp(struct hist_entry *left, struct hist_entry *right)
 {
 	struct symbol *sym_l = left->parent;
@@ -210,16 +252,9 @@ static int hist_entry__parent_snprintf(struct hist_entry *self, char *bf,
 			      self->parent ? self->parent->name : "[other]");
 }
 
-struct sort_entry sort_parent = {
-	.se_header	= "Parent symbol",
-	.se_cmp		= sort__parent_cmp,
-	.se_snprintf	= hist_entry__parent_snprintf,
-	.se_width_idx	= HISTC_PARENT,
-};
-
 /* --sort cpu */
 
-static int64_t
+int64_t
 sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right)
 {
 	return right->cpu - left->cpu;
@@ -231,28 +266,6 @@ static int hist_entry__cpu_snprintf(struct hist_entry *self, char *bf,
 	return repsep_snprintf(bf, size, "%-*d", width, self->cpu);
 }
 
-struct sort_entry sort_cpu = {
-	.se_header      = "CPU",
-	.se_cmp	        = sort__cpu_cmp,
-	.se_snprintf    = hist_entry__cpu_snprintf,
-	.se_width_idx	= HISTC_CPU,
-};
-
-struct sort_dimension {
-	const char		*name;
-	struct sort_entry	*entry;
-	int			taken;
-};
-
-static struct sort_dimension sort_dimensions[] = {
-	{ .name = "pid",	.entry = &sort_thread,	},
-	{ .name = "comm",	.entry = &sort_comm,	},
-	{ .name = "dso",	.entry = &sort_dso,	},
-	{ .name = "symbol",	.entry = &sort_sym,	},
-	{ .name = "parent",	.entry = &sort_parent,	},
-	{ .name = "cpu",	.entry = &sort_cpu,	},
-};
-
 int sort_dimension__add(const char *tok)
 {
 	unsigned int i;
@@ -260,9 +273,15 @@ int sort_dimension__add(const char *tok)
 	for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) {
 		struct sort_dimension *sd = &sort_dimensions[i];
 
+		if (sd->taken)
+			continue;
+
 		if (strncasecmp(tok, sd->name, strlen(tok)))
 			continue;
 
+		if (sd->entry->se_collapse)
+			sort__need_collapse = 1;
+
 		if (sd->entry == &sort_parent) {
 			int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
 			if (ret) {
@@ -275,12 +294,6 @@ int sort_dimension__add(const char *tok)
 			sort__has_parent = 1;
 		}
 
-		if (sd->taken)
-			return 0;
-
-		if (sd->entry->se_collapse)
-			sort__need_collapse = 1;
-
 		if (list_empty(&hist_entry__sort_list)) {
 			if (!strcmp(sd->name, "pid"))
 				sort__first_dimension = SORT_PID;
diff --git a/trunk/tools/perf/util/sort.h b/trunk/tools/perf/util/sort.h
index 77d0388ad415..0b91053a7d11 100644
--- a/trunk/tools/perf/util/sort.h
+++ b/trunk/tools/perf/util/sort.h
@@ -103,6 +103,20 @@ extern struct sort_entry sort_thread;
 extern struct list_head hist_entry__sort_list;
 
 void setup_sorting(const char * const usagestr[], const struct option *opts);
+
+extern size_t sort__thread_print(FILE *, struct hist_entry *, unsigned int);
+extern size_t sort__comm_print(FILE *, struct hist_entry *, unsigned int);
+extern size_t sort__dso_print(FILE *, struct hist_entry *, unsigned int);
+extern size_t sort__sym_print(FILE *, struct hist_entry *, unsigned int __used);
+extern int64_t cmp_null(void *, void *);
+extern int64_t sort__thread_cmp(struct hist_entry *, struct hist_entry *);
+extern int64_t sort__comm_cmp(struct hist_entry *, struct hist_entry *);
+extern int64_t sort__comm_collapse(struct hist_entry *, struct hist_entry *);
+extern int64_t sort__dso_cmp(struct hist_entry *, struct hist_entry *);
+extern int64_t sort__sym_cmp(struct hist_entry *, struct hist_entry *);
+extern int64_t sort__parent_cmp(struct hist_entry *, struct hist_entry *);
+int64_t sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right);
+extern size_t sort__parent_print(FILE *, struct hist_entry *, unsigned int);
 extern int sort_dimension__add(const char *);
 void sort_entry__setup_elide(struct sort_entry *self, struct strlist *list,
 			     const char *list_name, FILE *fp);