diff --git a/[refs] b/[refs]
index 187643d72154..9d20dc00a25a 100644
--- a/[refs]
+++ b/[refs]
@@ -1,2 +1,2 @@
 ---
-refs/heads/master: 8b6b4628126fd73d0a53b499a26133c15b73c1e6
+refs/heads/master: da5aa861bea09197e6ae4d7c46618616064891e4
diff --git a/trunk/Documentation/acpi/apei/einj.txt b/trunk/Documentation/acpi/apei/einj.txt
index 5cc699ba5453..dfab71848dc8 100644
--- a/trunk/Documentation/acpi/apei/einj.txt
+++ b/trunk/Documentation/acpi/apei/einj.txt
@@ -48,19 +48,12 @@ directory apei/einj. The following files are provided.
 - param1
   This file is used to set the first error parameter value. Effect of
   parameter depends on error_type specified. For memory error, this is
-  physical memory address.  Only available if param_extension module
-  parameter is specified.
+  physical memory address.
 
 - param2
   This file is used to set the second error parameter value. Effect of
   parameter depends on error_type specified. For memory error, this is
-  physical memory address mask.  Only available if param_extension
-  module parameter is specified.
-
-Injecting parameter support is a BIOS version specific extension, that
-is, it only works on some BIOS version.  If you want to use it, please
-make sure your BIOS version has the proper support and specify
-"param_extension=y" in module parameter.
+  physical memory address mask.
 
 For more information about EINJ, please refer to ACPI specification
 version 4.0, section 17.5.
diff --git a/trunk/Documentation/device-mapper/dm-crypt.txt b/trunk/Documentation/device-mapper/dm-crypt.txt
index 2c656ae43ba7..6b5c42dbbe84 100644
--- a/trunk/Documentation/device-mapper/dm-crypt.txt
+++ b/trunk/Documentation/device-mapper/dm-crypt.txt
@@ -4,8 +4,7 @@ dm-crypt
 Device-Mapper's "crypt" target provides transparent encryption of block devices
 using the kernel crypto API.
 
-Parameters: <cipher> <key> <iv_offset> <device path> \
-	      <offset> [<#opt_params> <opt_params>]
+Parameters: <cipher> <key> <iv_offset> <device path> <offset>
 
 <cipher>
     Encryption cipher and an optional IV generation mode.
@@ -38,24 +37,6 @@ Parameters: <cipher> <key> <iv_offset> <device path> \
 <offset>
     Starting sector within the device where the encrypted data begins.
 
-<#opt_params>
-    Number of optional parameters. If there are no optional parameters,
-    the optional paramaters section can be skipped or #opt_params can be zero.
-    Otherwise #opt_params is the number of following arguments.
-
-    Example of optional parameters section:
-        1 allow_discards
-
-allow_discards
-    Block discard requests (a.k.a. TRIM) are passed through the crypt device.
-    The default is to ignore discard requests.
-
-    WARNING: Assess the specific security risks carefully before enabling this
-    option.  For example, allowing discards on encrypted devices may lead to
-    the leak of information about the ciphertext device (filesystem type,
-    used space etc.) if the discarded blocks can be located easily on the
-    device later.
-
 Example scripts
 ===============
 LUKS (Linux Unified Key Setup) is now the preferred way to set up disk
diff --git a/trunk/Documentation/device-mapper/dm-flakey.txt b/trunk/Documentation/device-mapper/dm-flakey.txt
index 6ff5c2327227..c8efdfd19a65 100644
--- a/trunk/Documentation/device-mapper/dm-flakey.txt
+++ b/trunk/Documentation/device-mapper/dm-flakey.txt
@@ -1,53 +1,17 @@
 dm-flakey
 =========
 
-This target is the same as the linear target except that it exhibits
-unreliable behaviour periodically.  It's been found useful in simulating
-failing devices for testing purposes.
+This target is the same as the linear target except that it returns I/O
+errors periodically.  It's been found useful in simulating failing
+devices for testing purposes.
 
 Starting from the time the table is loaded, the device is available for
-<up interval> seconds, then exhibits unreliable behaviour for <down
-interval> seconds, and then this cycle repeats.
+<up interval> seconds, then returns errors for <down interval> seconds,
+and then this cycle repeats.
 
-Also, consider using this in combination with the dm-delay target too,
-which can delay reads and writes and/or send them to different
-underlying devices.
-
-Table parameters
-----------------
-  <dev path> <offset> <up interval> <down interval> \
-    [<num_features> [<feature arguments>]]
-
-Mandatory parameters:
+Parameters: <dev path> <offset> <up interval> <down interval>
     <dev path>: Full pathname to the underlying block-device, or a
                 "major:minor" device-number.
     <offset>: Starting sector within the device.
     <up interval>: Number of seconds device is available.
     <down interval>: Number of seconds device returns errors.
-
-Optional feature parameters:
-  If no feature parameters are present, during the periods of
-  unreliability, all I/O returns errors.
-
-  drop_writes:
-	All write I/O is silently ignored.
-	Read I/O is handled correctly.
-
-  corrupt_bio_byte <Nth_byte> <direction> <value> <flags>:
-	During <down interval>, replace <Nth_byte> of the data of
-	each matching bio with <value>.
-
-    <Nth_byte>: The offset of the byte to replace.
-		Counting starts at 1, to replace the first byte.
-    <direction>: Either 'r' to corrupt reads or 'w' to corrupt writes.
-		 'w' is incompatible with drop_writes.
-    <value>: The value (from 0-255) to write.
-    <flags>: Perform the replacement only if bio->bi_rw has all the
-	     selected flags set.
-
-Examples:
-  corrupt_bio_byte 32 r 1 0
-	- replaces the 32nd byte of READ bios with the value 1
-
-  corrupt_bio_byte 224 w 0 32
-	- replaces the 224th byte of REQ_META (=32) bios with the value 0
diff --git a/trunk/Documentation/device-mapper/dm-raid.txt b/trunk/Documentation/device-mapper/dm-raid.txt
index 2a8c11331d2d..33b6b7071ac8 100644
--- a/trunk/Documentation/device-mapper/dm-raid.txt
+++ b/trunk/Documentation/device-mapper/dm-raid.txt
@@ -1,108 +1,70 @@
-dm-raid
--------
+Device-mapper RAID (dm-raid) is a bridge from DM to MD.  It
+provides a way to use device-mapper interfaces to access the MD RAID
+drivers.
 
-The device-mapper RAID (dm-raid) target provides a bridge from DM to MD.
-It allows the MD RAID drivers to be accessed using a device-mapper
-interface.
+As with all device-mapper targets, the nominal public interfaces are the
+constructor (CTR) tables and the status outputs (both STATUSTYPE_INFO
+and STATUSTYPE_TABLE).  The CTR table looks like the following:
 
-The target is named "raid" and it accepts the following parameters:
-
-  <raid_type> <#raid_params> <raid_params> \
-    <#raid_devs> <metadata_dev0> <dev0> [.. <metadata_devN> <devN>]
-
-<raid_type>:
-  raid1		RAID1 mirroring
-  raid4		RAID4 dedicated parity disk
-  raid5_la	RAID5 left asymmetric
-		- rotating parity 0 with data continuation
-  raid5_ra	RAID5 right asymmetric
-		- rotating parity N with data continuation
-  raid5_ls	RAID5 left symmetric
-		- rotating parity 0 with data restart
-  raid5_rs 	RAID5 right symmetric
-		- rotating parity N with data restart
-  raid6_zr	RAID6 zero restart
-		- rotating parity zero (left-to-right) with data restart
-  raid6_nr	RAID6 N restart
-		- rotating parity N (right-to-left) with data restart
-  raid6_nc	RAID6 N continue
-		- rotating parity N (right-to-left) with data continuation
-
-  Refererence: Chapter 4 of
-  http://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf
-
-<#raid_params>: The number of parameters that follow.
-
-<raid_params> consists of
-    Mandatory parameters:
-        <chunk_size>: Chunk size in sectors.  This parameter is often known as
-		      "stripe size".  It is the only mandatory parameter and
-		      is placed first.
-
-    followed by optional parameters (in any order):
-	[sync|nosync]   Force or prevent RAID initialization.
-
-	[rebuild <idx>]	Rebuild drive number idx (first drive is 0).
-
-	[daemon_sleep <ms>]
-		Interval between runs of the bitmap daemon that
-		clear bits.  A longer interval means less bitmap I/O but
-		resyncing after a failure is likely to take longer.
-
-	[min_recovery_rate <kB/sec/disk>]  Throttle RAID initialization
-	[max_recovery_rate <kB/sec/disk>]  Throttle RAID initialization
-	[write_mostly <idx>]		   Drive index is write-mostly
-	[max_write_behind <sectors>]       See '-write-behind=' (man mdadm)
-	[stripe_cache <sectors>]           Stripe cache size (higher RAIDs only)
-	[region_size <sectors>]
-		The region_size multiplied by the number of regions is the
-		logical size of the array.  The bitmap records the device
-		synchronisation state for each region.
-
-<#raid_devs>: The number of devices composing the array.
-	Each device consists of two entries.  The first is the device
-	containing the metadata (if any); the second is the one containing the
-	data.
-
-	If a drive has failed or is missing at creation time, a '-' can be
-	given for both the metadata and data drives for a given position.
-
-
-Example tables
---------------
-# RAID4 - 4 data drives, 1 parity (no metadata devices)
+1: <s> <l> raid \
+2:      <raid_type> <#raid_params> <raid_params> \
+3:      <#raid_devs> <meta_dev1> <dev1> .. <meta_devN> <devN>
+
+Line 1 contains the standard first three arguments to any device-mapper
+target - the start, length, and target type fields.  The target type in
+this case is "raid".
+
+Line 2 contains the arguments that define the particular raid
+type/personality/level, the required arguments for that raid type, and
+any optional arguments.  Possible raid types include: raid4, raid5_la,
+raid5_ls, raid5_rs, raid6_zr, raid6_nr, and raid6_nc.  (raid1 is
+planned for the future.)  The list of required and optional parameters
+is the same for all the current raid types.  The required parameters are
+positional, while the optional parameters are given as key/value pairs.
+The possible parameters are as follows:
+ <chunk_size>           Chunk size in sectors.
+ [[no]sync]             Force/Prevent RAID initialization
+ [rebuild <idx>]        Rebuild the drive indicated by the index
+ [daemon_sleep <ms>]    Time between bitmap daemon work to clear bits
+ [min_recovery_rate <kB/sec/disk>]      Throttle RAID initialization
+ [max_recovery_rate <kB/sec/disk>]      Throttle RAID initialization
+ [max_write_behind <sectors>]           See '-write-behind=' (man mdadm)
+ [stripe_cache <sectors>]               Stripe cache size for higher RAIDs
+
+Line 3 contains the list of devices that compose the array in
+metadata/data device pairs.  If the metadata is stored separately, a '-'
+is given for the metadata device position.  If a drive has failed or is
+missing at creation time, a '-' can be given for both the metadata and
+data drives for a given position.
+
+NB. Currently all metadata devices must be specified as '-'.
+
+Examples:
+# RAID4 - 4 data drives, 1 parity
 # No metadata devices specified to hold superblock/bitmap info
 # Chunk size of 1MiB
 # (Lines separated for easy reading)
-
 0 1960893648 raid \
         raid4 1 2048 \
         5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81
 
-# RAID4 - 4 data drives, 1 parity (with metadata devices)
+# RAID4 - 4 data drives, 1 parity (no metadata devices)
 # Chunk size of 1MiB, force RAID initialization,
 #       min recovery rate at 20 kiB/sec/disk
-
 0 1960893648 raid \
-        raid4 4 2048 sync min_recovery_rate 20 \
-        5 8:17 8:18 8:33 8:34 8:49 8:50 8:65 8:66 8:81 8:82
+        raid4 4 2048 min_recovery_rate 20 sync\
+        5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81
 
-'dmsetup table' displays the table used to construct the mapping.
-The optional parameters are always printed in the order listed
-above with "sync" or "nosync" always output ahead of the other
-arguments, regardless of the order used when originally loading the table.
-Arguments that can be repeated are ordered by value.
+Performing a 'dmsetup table' should display the CTR table used to
+construct the mapping (with possible reordering of optional
+parameters).
 
-'dmsetup status' yields information on the state and health of the
-array.
-The output is as follows:
+Performing a 'dmsetup status' will yield information on the state and
+health of the array.  The output is as follows:
 1: <s> <l> raid \
 2:      <raid_type> <#devices> <1 health char for each dev> <resync_ratio>
 
-Line 1 is the standard output produced by device-mapper.
-Line 2 is produced by the raid target, and best explained by example:
+Line 1 is standard DM output.  Line 2 is best shown by example:
         0 1960893648 raid raid4 5 AAAAA 2/490221568
 Here we can see the RAID type is raid4, there are 5 devices - all of
 which are 'A'live, and the array is 2/490221568 complete with recovery.
-Faulty or missing devices are marked 'D'.  Devices that are out-of-sync
-are marked 'a'.
diff --git a/trunk/Documentation/devicetree/bindings/gpio/gpio_keys.txt b/trunk/Documentation/devicetree/bindings/gpio/gpio_keys.txt
index 5c2c02140a62..7190c99d7611 100644
--- a/trunk/Documentation/devicetree/bindings/gpio/gpio_keys.txt
+++ b/trunk/Documentation/devicetree/bindings/gpio/gpio_keys.txt
@@ -10,7 +10,7 @@ Optional properties:
 Each button (key) is represented as a sub-node of "gpio-keys":
 Subnode properties:
 
-	- gpios: OF device-tree gpio specification.
+	- gpios: OF devcie-tree gpio specificatin.
 	- label: Descriptive name of the key.
 	- linux,code: Keycode to emit.
 
diff --git a/trunk/Documentation/devicetree/bindings/input/fsl-mma8450.txt b/trunk/Documentation/devicetree/bindings/input/fsl-mma8450.txt
deleted file mode 100644
index a00c94ccbdee..000000000000
--- a/trunk/Documentation/devicetree/bindings/input/fsl-mma8450.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-* Freescale MMA8450 3-Axis Accelerometer
-
-Required properties:
-- compatible : "fsl,mma8450".
-
-Example:
-
-accelerometer: mma8450@1c {
-	compatible = "fsl,mma8450";
-	reg = <0x1c>;
-};
diff --git a/trunk/Documentation/fault-injection/fault-injection.txt b/trunk/Documentation/fault-injection/fault-injection.txt
index 82a5d250d75e..7be15e44d481 100644
--- a/trunk/Documentation/fault-injection/fault-injection.txt
+++ b/trunk/Documentation/fault-injection/fault-injection.txt
@@ -143,7 +143,8 @@ o provide a way to configure fault attributes
   failslab, fail_page_alloc, and fail_make_request use this way.
   Helper functions:
 
-	fault_create_debugfs_attr(name, parent, attr);
+	init_fault_attr_dentries(entries, attr, name);
+	void cleanup_fault_attr_dentries(entries);
 
 - module parameters
 
diff --git a/trunk/Documentation/feature-removal-schedule.txt b/trunk/Documentation/feature-removal-schedule.txt
index 43f48098220d..ea0bace0124a 100644
--- a/trunk/Documentation/feature-removal-schedule.txt
+++ b/trunk/Documentation/feature-removal-schedule.txt
@@ -296,6 +296,15 @@ Who:	Ravikiran Thirumalai <kiran@scalex86.org>
 
 ---------------------------
 
+What:	CONFIG_THERMAL_HWMON
+When:	January 2009
+Why:	This option was introduced just to allow older lm-sensors userspace
+	to keep working over the upgrade to 2.6.26. At the scheduled time of
+	removal fixed lm-sensors (2.x or 3.x) should be readily available.
+Who:	Rene Herman <rene.herman@gmail.com>
+
+---------------------------
+
 What:	Code that is now under CONFIG_WIRELESS_EXT_SYSFS
 	(in net/core/net-sysfs.c)
 When:	After the only user (hal) has seen a release with the patches
diff --git a/trunk/Documentation/frv/booting.txt b/trunk/Documentation/frv/booting.txt
index 37c4d84a0e57..ace200b7c214 100644
--- a/trunk/Documentation/frv/booting.txt
+++ b/trunk/Documentation/frv/booting.txt
@@ -106,20 +106,13 @@ separated by spaces:
       To use the first on-chip serial port at baud rate 115200, no parity, 8
       bits, and no flow control.
 
-  (*) root=<xxxx>
+  (*) root=/dev/<xxxx>
 
-      This specifies the device upon which the root filesystem resides. It
-      may be specified by major and minor number, device path, or even
-      partition uuid, if supported.  For example:
+      This specifies the device upon which the root filesystem resides. For
+      example:
 
 	/dev/nfs	NFS root filesystem
 	/dev/mtdblock3	Fourth RedBoot partition on the System Flash
-	PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF/PARTNROFF=1
-		first partition after the partition with the given UUID
-	253:0		Device with major 253 and minor 0
-
-      Authoritative information can be found in
-      "Documentation/kernel-parameters.txt".
 
   (*) rw
 
diff --git a/trunk/Documentation/ioctl/ioctl-number.txt b/trunk/Documentation/ioctl/ioctl-number.txt
index 845a191004b1..72ba8d51dbc1 100644
--- a/trunk/Documentation/ioctl/ioctl-number.txt
+++ b/trunk/Documentation/ioctl/ioctl-number.txt
@@ -292,7 +292,6 @@ Code  Seq#(hex)	Include File		Comments
 					<mailto:buk@buks.ipn.de>
 0xA0	all	linux/sdp/sdp.h		Industrial Device Project
 					<mailto:kenji@bitgate.com>
-0xA2	00-0F	arch/tile/include/asm/hardwall.h
 0xA3	80-8F	Port ACL		in development:
 					<mailto:tlewis@mindspring.com>
 0xA3	90-9F	linux/dtlk.h
diff --git a/trunk/Documentation/kernel-parameters.txt b/trunk/Documentation/kernel-parameters.txt
index e279b7242912..26a83743af19 100644
--- a/trunk/Documentation/kernel-parameters.txt
+++ b/trunk/Documentation/kernel-parameters.txt
@@ -163,11 +163,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
 			See also Documentation/power/pm.txt, pci=noacpi
 
-	acpi_rsdp=	[ACPI,EFI,KEXEC]
-			Pass the RSDP address to the kernel, mostly used
-			on machines running EFI runtime service to boot the
-			second kernel for kdump.
-
 	acpi_apic_instance=	[ACPI, IOAPIC]
 			Format: <int>
 			2: use 2nd APIC table, if available
@@ -551,9 +546,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			/proc/<pid>/coredump_filter.
 			See also Documentation/filesystems/proc.txt.
 
-	cpuidle.off=1	[CPU_IDLE]
-			disable the cpuidle sub-system
-
 	cpcihp_generic=	[HW,PCI] Generic port I/O CompactPCI driver
 			Format:
 			<first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>]
@@ -2248,7 +2240,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	ro		[KNL] Mount root device read-only on boot
 
 	root=		[KNL] Root filesystem
-			See name_to_dev_t comment in init/do_mounts.c.
 
 	rootdelay=	[KNL] Delay (in seconds) to pause before attempting to
 			mount the root filesystem
diff --git a/trunk/Documentation/m68k/kernel-options.txt b/trunk/Documentation/m68k/kernel-options.txt
index 97d45f276fe6..c93bed66e25d 100644
--- a/trunk/Documentation/m68k/kernel-options.txt
+++ b/trunk/Documentation/m68k/kernel-options.txt
@@ -129,20 +129,6 @@ decimal 11 is the major of SCSI CD-ROMs, and the minor 0 stands for
 the first of these. You can find out all valid major numbers by
 looking into include/linux/major.h.
 
-In addition to major and minor numbers, if the device containing your
-root partition uses a partition table format with unique partition
-identifiers, then you may use them.  For instance,
-"root=PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF".  It is also
-possible to reference another partition on the same device using a
-known partition UUID as the starting point.  For example,
-if partition 5 of the device has the UUID of
-00112233-4455-6677-8899-AABBCCDDEEFF then partition 3 may be found as
-follows:
-  PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF/PARTNROFF=-2
-
-Authoritative information can be found in
-"Documentation/kernel-parameters.txt".
-
 
 2.2) ro, rw
 -----------
diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS
index 07cfd8deaad5..c9c6324a7a9f 100644
--- a/trunk/MAINTAINERS
+++ b/trunk/MAINTAINERS
@@ -3367,12 +3367,6 @@ F:	drivers/net/ixgb/
 F:	drivers/net/ixgbe/
 F:	drivers/net/ixgbevf/
 
-INTEL MRST PMU DRIVER
-M:	Len Brown <len.brown@intel.com>
-L:	linux-pm@lists.linux-foundation.org
-S:	Supported
-F:	arch/x86/platform/mrst/pmu.*
-
 INTEL PRO/WIRELESS 2100 NETWORK CONNECTION SUPPORT
 L:	linux-wireless@vger.kernel.org
 S:	Orphan
@@ -4728,7 +4722,6 @@ S:	Maintained
 F:	drivers/of
 F:	include/linux/of*.h
 K:	of_get_property
-K:	of_match_table
 
 OPENRISC ARCHITECTURE
 M:	Jonas Bonn <jonas@southpole.se>
@@ -6325,7 +6318,6 @@ F:	include/linux/sysv_fs.h
 TARGET SUBSYSTEM
 M:	Nicholas A. Bellinger <nab@linux-iscsi.org>
 L:	linux-scsi@vger.kernel.org
-L:	target-devel@vger.kernel.org
 L:	http://groups.google.com/group/linux-iscsi-target-dev
 W:	http://www.linux-iscsi.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/nab/lio-core-2.6.git master
diff --git a/trunk/arch/Kconfig b/trunk/arch/Kconfig
index 4b0669cbb3b0..26b0e2397a57 100644
--- a/trunk/arch/Kconfig
+++ b/trunk/arch/Kconfig
@@ -178,7 +178,4 @@ config HAVE_ARCH_MUTEX_CPU_RELAX
 config HAVE_RCU_TABLE_FREE
 	bool
 
-config ARCH_HAVE_NMI_SAFE_CMPXCHG
-	bool
-
 source "kernel/gcov/Kconfig"
diff --git a/trunk/arch/alpha/Kconfig b/trunk/arch/alpha/Kconfig
index 60cde53d266c..ca2da8da6e9c 100644
--- a/trunk/arch/alpha/Kconfig
+++ b/trunk/arch/alpha/Kconfig
@@ -14,7 +14,6 @@ config ALPHA
 	select AUTO_IRQ_AFFINITY if SMP
 	select GENERIC_IRQ_SHOW
 	select ARCH_WANT_OPTIONAL_GPIOLIB
-	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	help
 	  The Alpha is a 64-bit general-purpose processor designed and
 	  marketed by the Digital Equipment Corporation of blessed memory,
diff --git a/trunk/arch/arm/kernel/process.c b/trunk/arch/arm/kernel/process.c
index 1a347f481e5e..5e1e54197227 100644
--- a/trunk/arch/arm/kernel/process.c
+++ b/trunk/arch/arm/kernel/process.c
@@ -30,7 +30,6 @@
 #include <linux/uaccess.h>
 #include <linux/random.h>
 #include <linux/hw_breakpoint.h>
-#include <linux/cpuidle.h>
 
 #include <asm/cacheflush.h>
 #include <asm/leds.h>
@@ -197,8 +196,7 @@ void cpu_idle(void)
 				cpu_relax();
 			} else {
 				stop_critical_timings();
-				if (cpuidle_idle_call())
-					pm_idle();
+				pm_idle();
 				start_critical_timings();
 				/*
 				 * This will eventually be removed - pm_idle
diff --git a/trunk/arch/avr32/Kconfig b/trunk/arch/avr32/Kconfig
index 197e96f70405..e9d689b7c833 100644
--- a/trunk/arch/avr32/Kconfig
+++ b/trunk/arch/avr32/Kconfig
@@ -10,7 +10,6 @@ config AVR32
 	select GENERIC_IRQ_PROBE
 	select HARDIRQS_SW_RESEND
 	select GENERIC_IRQ_SHOW
-	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	help
 	  AVR32 is a high-performance 32-bit RISC microprocessor core,
 	  designed for cost-sensitive embedded applications, with particular
diff --git a/trunk/arch/cris/arch-v10/drivers/sync_serial.c b/trunk/arch/cris/arch-v10/drivers/sync_serial.c
index 466af40c5822..850265373611 100644
--- a/trunk/arch/cris/arch-v10/drivers/sync_serial.c
+++ b/trunk/arch/cris/arch-v10/drivers/sync_serial.c
@@ -158,7 +158,7 @@ static int sync_serial_open(struct inode *inode, struct file *file);
 static int sync_serial_release(struct inode *inode, struct file *file);
 static unsigned int sync_serial_poll(struct file *filp, poll_table *wait);
 
-static long sync_serial_ioctl(struct file *file,
+static int sync_serial_ioctl(struct file *file,
 	unsigned int cmd, unsigned long arg);
 static ssize_t sync_serial_write(struct file *file, const char *buf,
 	size_t count, loff_t *ppos);
@@ -625,11 +625,11 @@ static int sync_serial_open(struct inode *inode, struct file *file)
 			*R_IRQ_MASK1_SET = 1 << port->data_avail_bit;
 		DEBUG(printk(KERN_DEBUG "sser%d rec started\n", dev));
 	}
-	err = 0;
+	ret = 0;
 	
 out:
 	mutex_unlock(&sync_serial_mutex);
-	return err;
+	return ret;
 }
 
 static int sync_serial_release(struct inode *inode, struct file *file)
diff --git a/trunk/arch/cris/arch-v10/kernel/irq.c b/trunk/arch/cris/arch-v10/kernel/irq.c
index ba0e5965d6e3..907cfb5a873d 100644
--- a/trunk/arch/cris/arch-v10/kernel/irq.c
+++ b/trunk/arch/cris/arch-v10/kernel/irq.c
@@ -20,9 +20,6 @@
 #define crisv10_mask_irq(irq_nr) (*R_VECT_MASK_CLR = 1 << (irq_nr));
 #define crisv10_unmask_irq(irq_nr) (*R_VECT_MASK_SET = 1 << (irq_nr));
 
-extern void kgdb_init(void);
-extern void breakpoint(void);
-
 /* don't use set_int_vector, it bypasses the linux interrupt handlers. it is
  * global just so that the kernel gdb can use it.
  */
diff --git a/trunk/arch/cris/include/asm/thread_info.h b/trunk/arch/cris/include/asm/thread_info.h
index 332f19c54557..29b74a105830 100644
--- a/trunk/arch/cris/include/asm/thread_info.h
+++ b/trunk/arch/cris/include/asm/thread_info.h
@@ -11,6 +11,8 @@
 
 #ifdef __KERNEL__
 
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
 #ifndef __ASSEMBLY__
 #include <asm/types.h>
 #include <asm/processor.h>
@@ -65,10 +67,8 @@ struct thread_info {
 
 #define init_thread_info	(init_thread_union.thread_info)
 
-#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
 /* thread information allocation */
-#define alloc_thread_info_node(tsk, node)	\
-	((struct thread_info *) __get_free_pages(GFP_KERNEL, 1))
+#define alloc_thread_info(tsk, node) ((struct thread_info *) __get_free_pages(GFP_KERNEL,1))
 #define free_thread_info(ti) free_pages((unsigned long) (ti), 1)
 
 #endif /* !__ASSEMBLY__ */
diff --git a/trunk/arch/frv/Kconfig b/trunk/arch/frv/Kconfig
index bad27a6ff407..cb884e489425 100644
--- a/trunk/arch/frv/Kconfig
+++ b/trunk/arch/frv/Kconfig
@@ -7,7 +7,6 @@ config FRV
 	select HAVE_PERF_EVENTS
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_IRQ_SHOW
-	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 
 config ZONE_DMA
 	bool
diff --git a/trunk/arch/ia64/Kconfig b/trunk/arch/ia64/Kconfig
index 124854714958..64c7ab7e7a81 100644
--- a/trunk/arch/ia64/Kconfig
+++ b/trunk/arch/ia64/Kconfig
@@ -28,7 +28,6 @@ config IA64
 	select IRQ_PER_CPU
 	select GENERIC_IRQ_SHOW
 	select ARCH_WANT_OPTIONAL_GPIOLIB
-	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	default y
 	help
 	  The Itanium Processor Family is Intel's 64-bit successor to
diff --git a/trunk/arch/ia64/kernel/efi.c b/trunk/arch/ia64/kernel/efi.c
index c38d22e5e902..6fc03aff046c 100644
--- a/trunk/arch/ia64/kernel/efi.c
+++ b/trunk/arch/ia64/kernel/efi.c
@@ -156,7 +156,7 @@ prefix##_get_next_variable (unsigned long *name_size, efi_char16_t *name,      \
 #define STUB_SET_VARIABLE(prefix, adjust_arg)				       \
 static efi_status_t							       \
 prefix##_set_variable (efi_char16_t *name, efi_guid_t *vendor,		       \
-		       u32 attr, unsigned long data_size,		       \
+		       unsigned long attr, unsigned long data_size,	       \
 		       void *data)					       \
 {									       \
 	struct ia64_fpreg fr[6];					       \
diff --git a/trunk/arch/m68k/Kconfig b/trunk/arch/m68k/Kconfig
index 9e8ee9d2b8ca..284cd3771eaa 100644
--- a/trunk/arch/m68k/Kconfig
+++ b/trunk/arch/m68k/Kconfig
@@ -6,7 +6,6 @@ config M68K
 	select GENERIC_ATOMIC64 if MMU
 	select HAVE_GENERIC_HARDIRQS if !MMU
 	select GENERIC_IRQ_SHOW if !MMU
-	select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS
 
 config RWSEM_GENERIC_SPINLOCK
 	bool
diff --git a/trunk/arch/parisc/Kconfig b/trunk/arch/parisc/Kconfig
index e077b0bf56ca..65adc86a230e 100644
--- a/trunk/arch/parisc/Kconfig
+++ b/trunk/arch/parisc/Kconfig
@@ -15,7 +15,6 @@ config PARISC
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_IRQ_PROBE
 	select IRQ_PER_CPU
-	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 
 	help
 	  The PA-RISC microprocessor is designed by Hewlett-Packard and used
diff --git a/trunk/arch/parisc/include/asm/atomic.h b/trunk/arch/parisc/include/asm/atomic.h
index 4054b31e0fa9..b1dc71f5534e 100644
--- a/trunk/arch/parisc/include/asm/atomic.h
+++ b/trunk/arch/parisc/include/asm/atomic.h
@@ -258,10 +258,10 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
 
 #define ATOMIC64_INIT(i) ((atomic64_t) { (i) })
 
-static __inline__ s64
+static __inline__ int
 __atomic64_add_return(s64 i, atomic64_t *v)
 {
-	s64 ret;
+	int ret;
 	unsigned long flags;
 	_atomic_spin_lock_irqsave(v, flags);
 
diff --git a/trunk/arch/parisc/include/asm/futex.h b/trunk/arch/parisc/include/asm/futex.h
index 2388bdb32832..67a33cc27ef2 100644
--- a/trunk/arch/parisc/include/asm/futex.h
+++ b/trunk/arch/parisc/include/asm/futex.h
@@ -5,14 +5,11 @@
 
 #include <linux/futex.h>
 #include <linux/uaccess.h>
-#include <asm/atomic.h>
 #include <asm/errno.h>
 
 static inline int
 futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
 {
-	unsigned long int flags;
-	u32 val;
 	int op = (encoded_op >> 28) & 7;
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
@@ -21,58 +18,21 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(*uaddr)))
+	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
 		return -EFAULT;
 
 	pagefault_disable();
 
-	_atomic_spin_lock_irqsave(uaddr, flags);
-
 	switch (op) {
 	case FUTEX_OP_SET:
-		/* *(int *)UADDR2 = OPARG; */
-		ret = get_user(oldval, uaddr);
-		if (!ret)
-			ret = put_user(oparg, uaddr);
-		break;
 	case FUTEX_OP_ADD:
-		/* *(int *)UADDR2 += OPARG; */
-		ret = get_user(oldval, uaddr);
-		if (!ret) {
-			val = oldval + oparg;
-			ret = put_user(val, uaddr);
-		}
-		break;
 	case FUTEX_OP_OR:
-		/* *(int *)UADDR2 |= OPARG; */
-		ret = get_user(oldval, uaddr);
-		if (!ret) {
-			val = oldval | oparg;
-			ret = put_user(val, uaddr);
-		}
-		break;
 	case FUTEX_OP_ANDN:
-		/* *(int *)UADDR2 &= ~OPARG; */
-		ret = get_user(oldval, uaddr);
-		if (!ret) {
-			val = oldval & ~oparg;
-			ret = put_user(val, uaddr);
-		}
-		break;
 	case FUTEX_OP_XOR:
-		/* *(int *)UADDR2 ^= OPARG; */
-		ret = get_user(oldval, uaddr);
-		if (!ret) {
-			val = oldval ^ oparg;
-			ret = put_user(val, uaddr);
-		}
-		break;
 	default:
 		ret = -ENOSYS;
 	}
 
-	_atomic_spin_unlock_irqrestore(uaddr, flags);
-
 	pagefault_enable();
 
 	if (!ret) {
@@ -94,9 +54,7 @@ static inline int
 futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 			      u32 oldval, u32 newval)
 {
-	int ret;
 	u32 val;
-	unsigned long flags;
 
 	/* futex.c wants to do a cmpxchg_inatomic on kernel NULL, which is
 	 * our gateway page, and causes no end of trouble...
@@ -107,24 +65,12 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
 		return -EFAULT;
 
-	/* HPPA has no cmpxchg in hardware and therefore the
-	 * best we can do here is use an array of locks. The
-	 * lock selected is based on a hash of the userspace
-	 * address. This should scale to a couple of CPUs.
-	 */
-
-	_atomic_spin_lock_irqsave(uaddr, flags);
-
-	ret = get_user(val, uaddr);
-
-	if (!ret && val == oldval)
-		ret = put_user(newval, uaddr);
-
+	if (get_user(val, uaddr))
+		return -EFAULT;
+	if (val == oldval && put_user(newval, uaddr))
+		return -EFAULT;
 	*uval = val;
-
-	_atomic_spin_unlock_irqrestore(uaddr, flags);
-
-	return ret;
+	return 0;
 }
 
 #endif /*__KERNEL__*/
diff --git a/trunk/arch/parisc/include/asm/unistd.h b/trunk/arch/parisc/include/asm/unistd.h
index d61de64f990a..3392de3e7be0 100644
--- a/trunk/arch/parisc/include/asm/unistd.h
+++ b/trunk/arch/parisc/include/asm/unistd.h
@@ -821,9 +821,8 @@
 #define __NR_open_by_handle_at	(__NR_Linux + 326)
 #define __NR_syncfs		(__NR_Linux + 327)
 #define __NR_setns		(__NR_Linux + 328)
-#define __NR_sendmmsg		(__NR_Linux + 329)
 
-#define __NR_Linux_syscalls	(__NR_sendmmsg + 1)
+#define __NR_Linux_syscalls	(__NR_setns + 1)
 
 
 #define __IGNORE_select		/* newselect */
diff --git a/trunk/arch/parisc/kernel/syscall_table.S b/trunk/arch/parisc/kernel/syscall_table.S
index e66366fd2abc..34a4f5a2fffb 100644
--- a/trunk/arch/parisc/kernel/syscall_table.S
+++ b/trunk/arch/parisc/kernel/syscall_table.S
@@ -427,7 +427,6 @@
 	ENTRY_COMP(open_by_handle_at)
 	ENTRY_SAME(syncfs)
 	ENTRY_SAME(setns)
-	ENTRY_COMP(sendmmsg)
 
 	/* Nothing yet */
 
diff --git a/trunk/arch/powerpc/Kconfig b/trunk/arch/powerpc/Kconfig
index 6926b61acfea..374c475e56a3 100644
--- a/trunk/arch/powerpc/Kconfig
+++ b/trunk/arch/powerpc/Kconfig
@@ -136,7 +136,6 @@ config PPC
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_BPF_JIT if (PPC64 && NET)
 	select HAVE_ARCH_JUMP_LABEL
-	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 
 config EARLY_PRINTK
 	bool
diff --git a/trunk/arch/s390/Kconfig b/trunk/arch/s390/Kconfig
index ed5cb5af5281..c03fef7a9c22 100644
--- a/trunk/arch/s390/Kconfig
+++ b/trunk/arch/s390/Kconfig
@@ -81,7 +81,6 @@ config S390
 	select INIT_ALL_POSSIBLE
 	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
-	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_BZIP2
 	select HAVE_KERNEL_LZMA
@@ -274,11 +273,11 @@ config MARCH_Z10
 	  on older machines.
 
 config MARCH_Z196
-	bool "IBM zEnterprise 114 and 196"
+	bool "IBM zEnterprise 196"
 	help
-	  Select this to enable optimizations for IBM zEnterprise 114 and 196
-	  (2818 and 2817 series). The kernel will be slightly faster but will
-	  not work on older machines.
+	  Select this to enable optimizations for IBM zEnterprise 196
+	  (2817 series). The kernel will be slightly faster but will not work
+	  on older machines.
 
 endchoice
 
diff --git a/trunk/arch/s390/include/asm/ipl.h b/trunk/arch/s390/include/asm/ipl.h
index 97cc4403fabf..5e95d95450b3 100644
--- a/trunk/arch/s390/include/asm/ipl.h
+++ b/trunk/arch/s390/include/asm/ipl.h
@@ -167,6 +167,5 @@ enum diag308_rc {
 };
 
 extern int diag308(unsigned long subcode, void *addr);
-extern void diag308_reset(void);
 
 #endif /* _ASM_S390_IPL_H */
diff --git a/trunk/arch/s390/include/asm/lowcore.h b/trunk/arch/s390/include/asm/lowcore.h
index e85c911aabf0..f26280d9e88d 100644
--- a/trunk/arch/s390/include/asm/lowcore.h
+++ b/trunk/arch/s390/include/asm/lowcore.h
@@ -18,7 +18,6 @@ void system_call(void);
 void pgm_check_handler(void);
 void mcck_int_handler(void);
 void io_int_handler(void);
-void psw_restart_int_handler(void);
 
 #ifdef CONFIG_32BIT
 
@@ -151,10 +150,7 @@ struct _lowcore {
 	 */
 	__u32	ipib;				/* 0x0e00 */
 	__u32	ipib_checksum;			/* 0x0e04 */
-
-	/* 64 bit save area */
-	__u64	save_area_64;			/* 0x0e08 */
-	__u8	pad_0x0e10[0x0f00-0x0e10];	/* 0x0e10 */
+	__u8	pad_0x0e08[0x0f00-0x0e08];	/* 0x0e08 */
 
 	/* Extended facility list */
 	__u64	stfle_fac_list[32];		/* 0x0f00 */
@@ -290,10 +286,7 @@ struct _lowcore {
 	 */
 	__u64	ipib;				/* 0x0e00 */
 	__u32	ipib_checksum;			/* 0x0e08 */
-
-	/* 64 bit save area */
-	__u64	save_area_64;			/* 0x0e0c */
-	__u8	pad_0x0e14[0x0f00-0x0e14];	/* 0x0e14 */
+	__u8	pad_0x0e0c[0x0f00-0x0e0c];	/* 0x0e0c */
 
 	/* Extended facility list */
 	__u64	stfle_fac_list[32];		/* 0x0f00 */
diff --git a/trunk/arch/s390/include/asm/processor.h b/trunk/arch/s390/include/asm/processor.h
index a4b6229e5d4b..55dfcc8bdc0d 100644
--- a/trunk/arch/s390/include/asm/processor.h
+++ b/trunk/arch/s390/include/asm/processor.h
@@ -119,12 +119,14 @@ struct stack_frame {
  * Do necessary setup to start up a new thread.
  */
 #define start_thread(regs, new_psw, new_stackp) do {		\
+	set_fs(USER_DS);					\
 	regs->psw.mask	= psw_user_bits;			\
 	regs->psw.addr	= new_psw | PSW_ADDR_AMODE;		\
 	regs->gprs[15]	= new_stackp;				\
 } while (0)
 
 #define start_thread31(regs, new_psw, new_stackp) do {		\
+	set_fs(USER_DS);					\
 	regs->psw.mask	= psw_user32_bits;			\
 	regs->psw.addr	= new_psw | PSW_ADDR_AMODE;		\
 	regs->gprs[15]	= new_stackp;				\
diff --git a/trunk/arch/s390/include/asm/system.h b/trunk/arch/s390/include/asm/system.h
index 6582f69f2389..d382629a0172 100644
--- a/trunk/arch/s390/include/asm/system.h
+++ b/trunk/arch/s390/include/asm/system.h
@@ -113,7 +113,6 @@ extern void pfault_fini(void);
 
 extern void cmma_init(void);
 extern int memcpy_real(void *, void *, size_t);
-extern void copy_to_absolute_zero(void *dest, void *src, size_t count);
 
 #define finish_arch_switch(prev) do {					     \
 	set_fs(current->thread.mm_segment);				     \
diff --git a/trunk/arch/s390/kernel/asm-offsets.c b/trunk/arch/s390/kernel/asm-offsets.c
index 532fd4322156..05d8f38734ec 100644
--- a/trunk/arch/s390/kernel/asm-offsets.c
+++ b/trunk/arch/s390/kernel/asm-offsets.c
@@ -27,9 +27,12 @@ int main(void)
 	BLANK();
 	DEFINE(__TASK_pid, offsetof(struct task_struct, pid));
 	BLANK();
-	DEFINE(__THREAD_per_cause, offsetof(struct task_struct, thread.per_event.cause));
-	DEFINE(__THREAD_per_address, offsetof(struct task_struct, thread.per_event.address));
-	DEFINE(__THREAD_per_paid, offsetof(struct task_struct, thread.per_event.paid));
+	DEFINE(__THREAD_per_cause,
+	       offsetof(struct task_struct, thread.per_event.cause));
+	DEFINE(__THREAD_per_address,
+	       offsetof(struct task_struct, thread.per_event.address));
+	DEFINE(__THREAD_per_paid,
+	       offsetof(struct task_struct, thread.per_event.paid));
 	BLANK();
 	DEFINE(__TI_task, offsetof(struct thread_info, task));
 	DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain));
@@ -139,7 +142,6 @@ int main(void)
 	DEFINE(__LC_FPREGS_SAVE_AREA, offsetof(struct _lowcore, floating_pt_save_area));
 	DEFINE(__LC_GPREGS_SAVE_AREA, offsetof(struct _lowcore, gpregs_save_area));
 	DEFINE(__LC_CREGS_SAVE_AREA, offsetof(struct _lowcore, cregs_save_area));
-	DEFINE(__LC_SAVE_AREA_64, offsetof(struct _lowcore, save_area_64));
 #ifdef CONFIG_32BIT
 	DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr));
 #else /* CONFIG_32BIT */
diff --git a/trunk/arch/s390/kernel/base.S b/trunk/arch/s390/kernel/base.S
index 255435663bf8..209938c1dfc8 100644
--- a/trunk/arch/s390/kernel/base.S
+++ b/trunk/arch/s390/kernel/base.S
@@ -76,42 +76,6 @@ s390_base_pgm_handler_fn:
 	.quad	0
 	.previous
 
-#
-# Calls diag 308 subcode 1 and continues execution
-#
-# The following conditions must be ensured before calling this function:
-# * Prefix register = 0
-# * Lowcore protection is disabled
-#
-ENTRY(diag308_reset)
-	larl	%r4,.Lctlregs		# Save control registers
-	stctg	%c0,%c15,0(%r4)
-	larl	%r4,.Lrestart_psw	# Setup restart PSW at absolute 0
-	lghi	%r3,0
-	lg	%r4,0(%r4)		# Save PSW
-	sturg	%r4,%r3			# Use sturg, because of large pages
-	lghi	%r1,1
-	diag	%r1,%r1,0x308
-.Lrestart_part2:
-	lhi	%r0,0			# Load r0 with zero
-	lhi	%r1,2			# Use mode 2 = ESAME (dump)
-	sigp	%r1,%r0,0x12		# Switch to ESAME mode
-	sam64				# Switch to 64 bit addressing mode
-	larl	%r4,.Lctlregs		# Restore control registers
-	lctlg	%c0,%c15,0(%r4)
-	br	%r14
-.align 16
-.Lrestart_psw:
-	.long	0x00080000,0x80000000 + .Lrestart_part2
-
-	.section .bss
-.align 8
-.Lctlregs:
-	.rept	16
-	.quad	0
-	.endr
-	.previous
-
 #else /* CONFIG_64BIT */
 
 ENTRY(s390_base_mcck_handler)
diff --git a/trunk/arch/s390/kernel/compat_signal.c b/trunk/arch/s390/kernel/compat_signal.c
index a9a285b8c4ad..eee999853a7c 100644
--- a/trunk/arch/s390/kernel/compat_signal.c
+++ b/trunk/arch/s390/kernel/compat_signal.c
@@ -380,13 +380,20 @@ asmlinkage long sys32_sigreturn(void)
 		goto badframe;
 	if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32))
 		goto badframe;
+
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	set_current_blocked(&set);
+	spin_lock_irq(&current->sighand->siglock);
+	current->blocked = set;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
 	if (restore_sigregs32(regs, &frame->sregs))
 		goto badframe;
 	if (restore_sigregs_gprs_high(regs, frame->gprs_high))
 		goto badframe;
+
 	return regs->gprs[2];
+
 badframe:
 	force_sig(SIGSEGV, current);
 	return 0;
@@ -406,22 +413,31 @@ asmlinkage long sys32_rt_sigreturn(void)
 		goto badframe;
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
+
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	set_current_blocked(&set);
+	spin_lock_irq(&current->sighand->siglock);
+	current->blocked = set;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
 	if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
 		goto badframe;
 	if (restore_sigregs_gprs_high(regs, frame->gprs_high))
 		goto badframe;
+
 	err = __get_user(ss_sp, &frame->uc.uc_stack.ss_sp);
 	st.ss_sp = compat_ptr(ss_sp);
 	err |= __get_user(st.ss_size, &frame->uc.uc_stack.ss_size);
 	err |= __get_user(st.ss_flags, &frame->uc.uc_stack.ss_flags);
 	if (err)
 		goto badframe; 
+
 	set_fs (KERNEL_DS);
 	do_sigaltstack((stack_t __force __user *)&st, NULL, regs->gprs[15]);
 	set_fs (old_fs);
+
 	return regs->gprs[2];
+
 badframe:
 	force_sig(SIGSEGV, current);
 	return 0;
@@ -589,10 +605,10 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info,
  * OK, we're invoking a handler
  */	
 
-int handle_signal32(unsigned long sig, struct k_sigaction *ka,
-		    siginfo_t *info, sigset_t *oldset, struct pt_regs *regs)
+int
+handle_signal32(unsigned long sig, struct k_sigaction *ka,
+		siginfo_t *info, sigset_t *oldset, struct pt_regs * regs)
 {
-	sigset_t blocked;
 	int ret;
 
 	/* Set up the stack frame */
@@ -600,12 +616,15 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka,
 		ret = setup_rt_frame32(sig, ka, info, oldset, regs);
 	else
 		ret = setup_frame32(sig, ka, oldset, regs);
-	if (ret)
-		return ret;
-	sigorsets(&blocked, &current->blocked, &ka->sa.sa_mask);
-	if (!(ka->sa.sa_flags & SA_NODEFER))
-		sigaddset(&blocked, sig);
-	set_current_blocked(&blocked);
-	return 0;
+
+	if (ret == 0) {
+		spin_lock_irq(&current->sighand->siglock);
+		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+		if (!(ka->sa.sa_flags & SA_NODEFER))
+			sigaddset(&current->blocked,sig);
+		recalc_sigpending();
+		spin_unlock_irq(&current->sighand->siglock);
+	}
+	return ret;
 }
 
diff --git a/trunk/arch/s390/kernel/entry.S b/trunk/arch/s390/kernel/entry.S
index 02ec8fe7d03f..3eab7cfab07c 100644
--- a/trunk/arch/s390/kernel/entry.S
+++ b/trunk/arch/s390/kernel/entry.S
@@ -849,34 +849,6 @@ restart_crash:
 restart_go:
 #endif
 
-#
-# PSW restart interrupt handler
-#
-ENTRY(psw_restart_int_handler)
-	st	%r15,__LC_SAVE_AREA_64(%r0)	# save r15
-	basr	%r15,0
-0:	l	%r15,.Lrestart_stack-0b(%r15)	# load restart stack
-	l	%r15,0(%r15)
-	ahi	%r15,-SP_SIZE			# make room for pt_regs
-	stm	%r0,%r14,SP_R0(%r15)		# store gprs %r0-%r14 to stack
-	mvc	SP_R15(4,%r15),__LC_SAVE_AREA_64(%r0)# store saved %r15 to stack
-	mvc	SP_PSW(8,%r15),__LC_RST_OLD_PSW(%r0) # store restart old psw
-	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # set backchain to 0
-	basr	%r14,0
-1:	l	%r14,.Ldo_restart-1b(%r14)
-	basr	%r14,%r14
-
-	basr	%r14,0				# load disabled wait PSW if
-2:	lpsw	restart_psw_crash-2b(%r14)	# do_restart returns
-	.align 4
-.Ldo_restart:
-	.long	do_restart
-.Lrestart_stack:
-	.long	restart_stack
-	.align 8
-restart_psw_crash:
-	.long	0x000a0000,0x00000000 + restart_psw_crash
-
 	.section .kprobes.text, "ax"
 
 #ifdef CONFIG_CHECK_STACK
diff --git a/trunk/arch/s390/kernel/entry64.S b/trunk/arch/s390/kernel/entry64.S
index 5f729d627cef..7a0fd426ca92 100644
--- a/trunk/arch/s390/kernel/entry64.S
+++ b/trunk/arch/s390/kernel/entry64.S
@@ -865,26 +865,6 @@ restart_crash:
 restart_go:
 #endif
 
-#
-# PSW restart interrupt handler
-#
-ENTRY(psw_restart_int_handler)
-	stg	%r15,__LC_SAVE_AREA_64(%r0)	# save r15
-	larl	%r15,restart_stack		# load restart stack
-	lg	%r15,0(%r15)
-	aghi	%r15,-SP_SIZE			# make room for pt_regs
-	stmg	%r0,%r14,SP_R0(%r15)		# store gprs %r0-%r14 to stack
-	mvc	SP_R15(8,%r15),__LC_SAVE_AREA_64(%r0)# store saved %r15 to stack
-	mvc	SP_PSW(16,%r15),__LC_RST_OLD_PSW(%r0)# store restart old psw
-	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # set backchain to 0
-	brasl	%r14,do_restart
-
-	larl	%r14,restart_psw_crash		# load disabled wait PSW if
-	lpswe	0(%r14)				# do_restart returns
-	.align 8
-restart_psw_crash:
-	.quad	0x0002000080000000,0x0000000000000000 + restart_psw_crash
-
 	.section .kprobes.text, "ax"
 
 #ifdef CONFIG_CHECK_STACK
diff --git a/trunk/arch/s390/kernel/ipl.c b/trunk/arch/s390/kernel/ipl.c
index 04361d5a4279..a689070be287 100644
--- a/trunk/arch/s390/kernel/ipl.c
+++ b/trunk/arch/s390/kernel/ipl.c
@@ -45,13 +45,11 @@
  * - halt
  * - power off
  * - reipl
- * - restart
  */
 #define ON_PANIC_STR		"on_panic"
 #define ON_HALT_STR		"on_halt"
 #define ON_POFF_STR		"on_poff"
 #define ON_REIPL_STR		"on_reboot"
-#define ON_RESTART_STR		"on_restart"
 
 struct shutdown_action;
 struct shutdown_trigger {
@@ -1546,20 +1544,17 @@ static char vmcmd_on_reboot[128];
 static char vmcmd_on_panic[128];
 static char vmcmd_on_halt[128];
 static char vmcmd_on_poff[128];
-static char vmcmd_on_restart[128];
 
 DEFINE_IPL_ATTR_STR_RW(vmcmd, on_reboot, "%s\n", "%s\n", vmcmd_on_reboot);
 DEFINE_IPL_ATTR_STR_RW(vmcmd, on_panic, "%s\n", "%s\n", vmcmd_on_panic);
 DEFINE_IPL_ATTR_STR_RW(vmcmd, on_halt, "%s\n", "%s\n", vmcmd_on_halt);
 DEFINE_IPL_ATTR_STR_RW(vmcmd, on_poff, "%s\n", "%s\n", vmcmd_on_poff);
-DEFINE_IPL_ATTR_STR_RW(vmcmd, on_restart, "%s\n", "%s\n", vmcmd_on_restart);
 
 static struct attribute *vmcmd_attrs[] = {
 	&sys_vmcmd_on_reboot_attr.attr,
 	&sys_vmcmd_on_panic_attr.attr,
 	&sys_vmcmd_on_halt_attr.attr,
 	&sys_vmcmd_on_poff_attr.attr,
-	&sys_vmcmd_on_restart_attr.attr,
 	NULL,
 };
 
@@ -1581,8 +1576,6 @@ static void vmcmd_run(struct shutdown_trigger *trigger)
 		cmd = vmcmd_on_halt;
 	else if (strcmp(trigger->name, ON_POFF_STR) == 0)
 		cmd = vmcmd_on_poff;
-	else if (strcmp(trigger->name, ON_RESTART_STR) == 0)
-		cmd = vmcmd_on_restart;
 	else
 		return;
 
@@ -1714,34 +1707,6 @@ static void do_panic(void)
 	stop_run(&on_panic_trigger);
 }
 
-/* on restart */
-
-static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR,
-	&reipl_action};
-
-static ssize_t on_restart_show(struct kobject *kobj,
-			       struct kobj_attribute *attr, char *page)
-{
-	return sprintf(page, "%s\n", on_restart_trigger.action->name);
-}
-
-static ssize_t on_restart_store(struct kobject *kobj,
-				struct kobj_attribute *attr,
-				const char *buf, size_t len)
-{
-	return set_trigger(buf, &on_restart_trigger, len);
-}
-
-static struct kobj_attribute on_restart_attr =
-	__ATTR(on_restart, 0644, on_restart_show, on_restart_store);
-
-void do_restart(void)
-{
-	smp_send_stop();
-	on_restart_trigger.action->fn(&on_restart_trigger);
-	stop_run(&on_restart_trigger);
-}
-
 /* on halt */
 
 static struct shutdown_trigger on_halt_trigger = {ON_HALT_STR, &stop_action};
@@ -1818,9 +1783,7 @@ static void __init shutdown_triggers_init(void)
 	if (sysfs_create_file(&shutdown_actions_kset->kobj,
 			      &on_poff_attr.attr))
 		goto fail;
-	if (sysfs_create_file(&shutdown_actions_kset->kobj,
-			      &on_restart_attr.attr))
-		goto fail;
+
 	return;
 fail:
 	panic("shutdown_triggers_init failed\n");
@@ -1996,12 +1959,6 @@ static void do_reset_calls(void)
 {
 	struct reset_call *reset;
 
-#ifdef CONFIG_64BIT
-	if (diag308_set_works) {
-		diag308_reset();
-		return;
-	}
-#endif
 	list_for_each_entry(reset, &rcall, list)
 		reset->fn();
 }
diff --git a/trunk/arch/s390/kernel/reipl64.S b/trunk/arch/s390/kernel/reipl64.S
index e690975403f4..78eb7cfbd3d1 100644
--- a/trunk/arch/s390/kernel/reipl64.S
+++ b/trunk/arch/s390/kernel/reipl64.S
@@ -1,5 +1,5 @@
 /*
- *    Copyright IBM Corp 2000,2011
+ *    Copyright IBM Corp 2000,2009
  *    Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>,
  *		 Denis Joseph Barrow,
  */
@@ -7,64 +7,6 @@
 #include <linux/linkage.h>
 #include <asm/asm-offsets.h>
 
-#
-# store_status
-#
-# Prerequisites to run this function:
-# - Prefix register is set to zero
-# - Original prefix register is stored in "dump_prefix_page"
-# - Lowcore protection is off
-#
-ENTRY(store_status)
-	/* Save register one and load save area base */
-	stg	%r1,__LC_SAVE_AREA_64(%r0)
-	lghi	%r1,SAVE_AREA_BASE
-	/* General purpose registers */
-	stmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	lg	%r2,__LC_SAVE_AREA_64(%r0)
-	stg	%r2,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE+8(%r1)
-	/* Control registers */
-	stctg	%c0,%c15,__LC_CREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	/* Access registers */
-	stam	%a0,%a15,__LC_AREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	/* Floating point registers */
-	std	%f0, 0x00 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f1, 0x08 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f2, 0x10 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f3, 0x18 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f4, 0x20 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f5, 0x28 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f6, 0x30 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f7, 0x38 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f8, 0x40 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f9, 0x48 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f10,0x50 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f11,0x58 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f12,0x60 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f13,0x68 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f14,0x70 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	std	%f15,0x78 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	/* Floating point control register */
-	stfpc	__LC_FP_CREG_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	/* CPU timer */
-	stpt	__LC_CPU_TIMER_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	/* Saved prefix register */
-	larl	%r2,dump_prefix_page
-	mvc	__LC_PREFIX_SAVE_AREA-SAVE_AREA_BASE(4,%r1),0(%r2)
-	/* Clock comparator - seven bytes */
-	larl	%r2,.Lclkcmp
-	stckc	0(%r2)
-	mvc	__LC_CLOCK_COMP_SAVE_AREA-SAVE_AREA_BASE + 1(7,%r1),1(%r2)
-	/* Program status word */
-	epsw	%r2,%r3
-	st	%r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 0(%r1)
-	st	%r3,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 4(%r1)
-	larl	%r2,store_status
-	stg	%r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 8(%r1)
-	br	%r14
-.align	8
-.Lclkcmp:	.quad	0x0000000000000000
-
 #
 # do_reipl_asm
 # Parameter: r2 = schid of reipl device
@@ -73,7 +15,22 @@ ENTRY(store_status)
 ENTRY(do_reipl_asm)
 		basr	%r13,0
 .Lpg0:		lpswe	.Lnewpsw-.Lpg0(%r13)
-.Lpg1:		brasl	%r14,store_status
+.Lpg1:		# do store status of all registers
+
+		stg	%r1,.Lregsave-.Lpg0(%r13)
+		lghi	%r1,0x1000
+		stmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-0x1000(%r1)
+		lg	%r0,.Lregsave-.Lpg0(%r13)
+		stg	%r0,__LC_GPREGS_SAVE_AREA-0x1000+8(%r1)
+		stctg	%c0,%c15,__LC_CREGS_SAVE_AREA-0x1000(%r1)
+		stam	%a0,%a15,__LC_AREGS_SAVE_AREA-0x1000(%r1)
+		lg	%r10,.Ldump_pfx-.Lpg0(%r13)
+		mvc	__LC_PREFIX_SAVE_AREA-0x1000(4,%r1),0(%r10)
+		stfpc	__LC_FP_CREG_SAVE_AREA-0x1000(%r1)
+		stckc	.Lclkcmp-.Lpg0(%r13)
+		mvc	__LC_CLOCK_COMP_SAVE_AREA-0x1000(7,%r1),.Lclkcmp-.Lpg0(%r13)
+		stpt	__LC_CPU_TIMER_SAVE_AREA-0x1000(%r1)
+		stg	%r13, __LC_PSW_SAVE_AREA-0x1000+8(%r1)
 
 		lctlg	%c6,%c6,.Lall-.Lpg0(%r13)
 		lgr	%r1,%r2
@@ -110,7 +67,10 @@ ENTRY(do_reipl_asm)
 		st	%r14,.Ldispsw+12-.Lpg0(%r13)
 		lpswe	.Ldispsw-.Lpg0(%r13)
 		.align	8
+.Lclkcmp:	.quad	0x0000000000000000
 .Lall:		.quad	0x00000000ff000000
+.Ldump_pfx:	.quad	dump_prefix_page
+.Lregsave:	.quad	0x0000000000000000
 		.align	16
 /*
  * These addresses have to be 31 bit otherwise
diff --git a/trunk/arch/s390/kernel/setup.c b/trunk/arch/s390/kernel/setup.c
index 7b371c37061d..0c35dee10b00 100644
--- a/trunk/arch/s390/kernel/setup.c
+++ b/trunk/arch/s390/kernel/setup.c
@@ -346,7 +346,7 @@ setup_lowcore(void)
 	lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0);
 	lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY;
 	lc->restart_psw.addr =
-		PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler;
+		PSW_ADDR_AMODE | (unsigned long) restart_int_handler;
 	if (user_mode != HOME_SPACE_MODE)
 		lc->restart_psw.mask |= PSW_ASC_HOME;
 	lc->external_new_psw.mask = psw_kernel_bits;
@@ -529,27 +529,6 @@ static void __init setup_memory_end(void)
 		memory_end = memory_size;
 }
 
-void *restart_stack __attribute__((__section__(".data")));
-
-/*
- * Setup new PSW and allocate stack for PSW restart interrupt
- */
-static void __init setup_restart_psw(void)
-{
-	psw_t psw;
-
-	restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0);
-	restart_stack += ASYNC_SIZE;
-
-	/*
-	 * Setup restart PSW for absolute zero lowcore. This is necesary
-	 * if PSW restart is done on an offline CPU that has lowcore zero
-	 */
-	psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY;
-	psw.addr = PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler;
-	copy_to_absolute_zero(&S390_lowcore.restart_psw, &psw, sizeof(psw));
-}
-
 static void __init
 setup_memory(void)
 {
@@ -752,7 +731,6 @@ static void __init setup_hwcaps(void)
 		strcpy(elf_platform, "z10");
 		break;
 	case 0x2817:
-	case 0x2818:
 		strcpy(elf_platform, "z196");
 		break;
 	}
@@ -814,7 +792,6 @@ setup_arch(char **cmdline_p)
 	setup_addressing_mode();
 	setup_memory();
 	setup_resources();
-	setup_restart_psw();
 	setup_lowcore();
 
         cpu_init();
diff --git a/trunk/arch/s390/kernel/signal.c b/trunk/arch/s390/kernel/signal.c
index 9a40e1cc5ec3..abbb3c3c7aab 100644
--- a/trunk/arch/s390/kernel/signal.c
+++ b/trunk/arch/s390/kernel/signal.c
@@ -57,15 +57,17 @@ typedef struct
  */
 SYSCALL_DEFINE3(sigsuspend, int, history0, int, history1, old_sigset_t, mask)
 {
-	sigset_t blocked;
-
-	current->saved_sigmask = current->blocked;
 	mask &= _BLOCKABLE;
-	siginitset(&blocked, mask);
-	set_current_blocked(&blocked);
+	spin_lock_irq(&current->sighand->siglock);
+	current->saved_sigmask = current->blocked;
+	siginitset(&current->blocked, mask);
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
 	set_current_state(TASK_INTERRUPTIBLE);
 	schedule();
-	set_restore_sigmask();
+	set_thread_flag(TIF_RESTORE_SIGMASK);
+
 	return -ERESTARTNOHAND;
 }
 
@@ -170,11 +172,18 @@ SYSCALL_DEFINE0(sigreturn)
 		goto badframe;
 	if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE))
 		goto badframe;
+
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	set_current_blocked(&set);
+	spin_lock_irq(&current->sighand->siglock);
+	current->blocked = set;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
 	if (restore_sigregs(regs, &frame->sregs))
 		goto badframe;
+
 	return regs->gprs[2];
+
 badframe:
 	force_sig(SIGSEGV, current);
 	return 0;
@@ -190,14 +199,21 @@ SYSCALL_DEFINE0(rt_sigreturn)
 		goto badframe;
 	if (__copy_from_user(&set.sig, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
+
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	set_current_blocked(&set);
+	spin_lock_irq(&current->sighand->siglock);
+	current->blocked = set;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
 	if (restore_sigregs(regs, &frame->uc.uc_mcontext))
 		goto badframe;
+
 	if (do_sigaltstack(&frame->uc.uc_stack, NULL,
 			   regs->gprs[15]) == -EFAULT)
 		goto badframe;
 	return regs->gprs[2];
+
 badframe:
 	force_sig(SIGSEGV, current);
 	return 0;
@@ -369,11 +385,14 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	return -EFAULT;
 }
 
-static int handle_signal(unsigned long sig, struct k_sigaction *ka,
-			 siginfo_t *info, sigset_t *oldset,
-			 struct pt_regs *regs)
+/*
+ * OK, we're invoking a handler
+ */	
+
+static int
+handle_signal(unsigned long sig, struct k_sigaction *ka,
+	      siginfo_t *info, sigset_t *oldset, struct pt_regs * regs)
 {
-	sigset_t blocked;
 	int ret;
 
 	/* Set up the stack frame */
@@ -381,13 +400,17 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka,
 		ret = setup_rt_frame(sig, ka, info, oldset, regs);
 	else
 		ret = setup_frame(sig, ka, oldset, regs);
-	if (ret)
-		return ret;
-	sigorsets(&blocked, &current->blocked, &ka->sa.sa_mask);
-	if (!(ka->sa.sa_flags & SA_NODEFER))
-		sigaddset(&blocked, sig);
-	set_current_blocked(&blocked);
-	return 0;
+
+	if (ret == 0) {
+		spin_lock_irq(&current->sighand->siglock);
+		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+		if (!(ka->sa.sa_flags & SA_NODEFER))
+			sigaddset(&current->blocked,sig);
+		recalc_sigpending();
+		spin_unlock_irq(&current->sighand->siglock);
+	}
+
+	return ret;
 }
 
 /*
diff --git a/trunk/arch/s390/kernel/smp.c b/trunk/arch/s390/kernel/smp.c
index 6ab16ac64d29..a6d85c0a7f20 100644
--- a/trunk/arch/s390/kernel/smp.c
+++ b/trunk/arch/s390/kernel/smp.c
@@ -452,27 +452,23 @@ static void __init smp_detect_cpus(void)
  */
 int __cpuinit start_secondary(void *cpuvoid)
 {
+	/* Setup the cpu */
 	cpu_init();
 	preempt_disable();
+	/* Enable TOD clock interrupts on the secondary cpu. */
 	init_cpu_timer();
+	/* Enable cpu timer interrupts on the secondary cpu. */
 	init_cpu_vtimer();
+	/* Enable pfault pseudo page faults on this cpu. */
 	pfault_init();
 
+	/* call cpu notifiers */
 	notify_cpu_starting(smp_processor_id());
+	/* Mark this cpu as online */
 	ipi_call_lock();
 	set_cpu_online(smp_processor_id(), true);
 	ipi_call_unlock();
-	__ctl_clear_bit(0, 28); /* Disable lowcore protection */
-	S390_lowcore.restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY;
-	S390_lowcore.restart_psw.addr =
-		PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler;
-	__ctl_set_bit(0, 28); /* Enable lowcore protection */
-	/*
-	 * Wait until the cpu which brought this one up marked it
-	 * active before enabling interrupts.
-	 */
-	while (!cpumask_test_cpu(smp_processor_id(), cpu_active_mask))
-		cpu_relax();
+	/* Switch on interrupts */
 	local_irq_enable();
 	/* cpu_idle will call schedule for us */
 	cpu_idle();
@@ -511,11 +507,7 @@ static int __cpuinit smp_alloc_lowcore(int cpu)
 	memset((char *)lowcore + 512, 0, sizeof(*lowcore) - 512);
 	lowcore->async_stack = async_stack + ASYNC_SIZE;
 	lowcore->panic_stack = panic_stack + PAGE_SIZE;
-	lowcore->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY;
-	lowcore->restart_psw.addr =
-		PSW_ADDR_AMODE | (unsigned long) restart_int_handler;
-	if (user_mode != HOME_SPACE_MODE)
-		lowcore->restart_psw.mask |= PSW_ASC_HOME;
+
 #ifndef CONFIG_64BIT
 	if (MACHINE_HAS_IEEE) {
 		unsigned long save_area;
diff --git a/trunk/arch/s390/mm/maccess.c b/trunk/arch/s390/mm/maccess.c
index 5dbbaa6e594c..51e5cd9b906a 100644
--- a/trunk/arch/s390/mm/maccess.c
+++ b/trunk/arch/s390/mm/maccess.c
@@ -85,19 +85,3 @@ int memcpy_real(void *dest, void *src, size_t count)
 	arch_local_irq_restore(flags);
 	return rc;
 }
-
-/*
- * Copy memory to absolute zero
- */
-void copy_to_absolute_zero(void *dest, void *src, size_t count)
-{
-	unsigned long cr0;
-
-	BUG_ON((unsigned long) dest + count >= sizeof(struct _lowcore));
-	preempt_disable();
-	__ctl_store(cr0, 0, 0);
-	__ctl_clear_bit(0, 28); /* disable lowcore protection */
-	memcpy_real(dest + store_prefix(), src, count);
-	__ctl_load(cr0, 0, 0);
-	preempt_enable();
-}
diff --git a/trunk/arch/s390/mm/pgtable.c b/trunk/arch/s390/mm/pgtable.c
index 4d1f2bce87b3..2adb23938a7f 100644
--- a/trunk/arch/s390/mm/pgtable.c
+++ b/trunk/arch/s390/mm/pgtable.c
@@ -528,7 +528,6 @@ static inline void page_table_free_pgste(unsigned long *table)
 static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
 						    unsigned long vmaddr)
 {
-	return NULL;
 }
 
 static inline void page_table_free_pgste(unsigned long *table)
diff --git a/trunk/arch/sh/Kconfig b/trunk/arch/sh/Kconfig
index ff9177c8f643..748ff1920068 100644
--- a/trunk/arch/sh/Kconfig
+++ b/trunk/arch/sh/Kconfig
@@ -11,7 +11,6 @@ config SUPERH
 	select HAVE_DMA_ATTRS
 	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
-	select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A)
 	select PERF_USE_VMALLOC
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_BZIP2
diff --git a/trunk/arch/sh/kernel/idle.c b/trunk/arch/sh/kernel/idle.c
index 32114e0941ae..84db0d6ccd0d 100644
--- a/trunk/arch/sh/kernel/idle.c
+++ b/trunk/arch/sh/kernel/idle.c
@@ -16,13 +16,12 @@
 #include <linux/thread_info.h>
 #include <linux/irqflags.h>
 #include <linux/smp.h>
-#include <linux/cpuidle.h>
 #include <asm/pgalloc.h>
 #include <asm/system.h>
 #include <linux/atomic.h>
 #include <asm/smp.h>
 
-static void (*pm_idle)(void);
+void (*pm_idle)(void) = NULL;
 
 static int hlt_counter;
 
@@ -101,8 +100,7 @@ void cpu_idle(void)
 			local_irq_disable();
 			/* Don't trace irqs off for idle */
 			stop_critical_timings();
-			if (cpuidle_idle_call())
-				pm_idle();
+			pm_idle();
 			/*
 			 * Sanity check to ensure that pm_idle() returns
 			 * with IRQs enabled
diff --git a/trunk/arch/sparc/Kconfig b/trunk/arch/sparc/Kconfig
index 42c67beadcae..1074dddcb104 100644
--- a/trunk/arch/sparc/Kconfig
+++ b/trunk/arch/sparc/Kconfig
@@ -54,7 +54,6 @@ config SPARC64
 	select HAVE_PERF_EVENTS
 	select PERF_USE_VMALLOC
 	select IRQ_PREFLOW_FASTEOI
-	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 
 config ARCH_DEFCONFIG
 	string
diff --git a/trunk/arch/tile/Kconfig b/trunk/arch/tile/Kconfig
index b30f71ac0d06..0249b8b4db54 100644
--- a/trunk/arch/tile/Kconfig
+++ b/trunk/arch/tile/Kconfig
@@ -12,7 +12,6 @@ config TILE
 	select GENERIC_PENDING_IRQ if SMP
 	select GENERIC_IRQ_SHOW
 	select SYS_HYPERVISOR
-	select ARCH_HAVE_NMI_SAFE_CMPXCHG if !M386
 
 # FIXME: investigate whether we need/want these options.
 #	select HAVE_IOREMAP_PROT
diff --git a/trunk/arch/tile/include/asm/Kbuild b/trunk/arch/tile/include/asm/Kbuild
index aec60dc06007..849ab2fa1f5c 100644
--- a/trunk/arch/tile/include/asm/Kbuild
+++ b/trunk/arch/tile/include/asm/Kbuild
@@ -2,41 +2,3 @@ include include/asm-generic/Kbuild.asm
 
 header-y += ucontext.h
 header-y += hardwall.h
-
-generic-y += bug.h
-generic-y += bugs.h
-generic-y += cputime.h
-generic-y += device.h
-generic-y += div64.h
-generic-y += emergency-restart.h
-generic-y += errno.h
-generic-y += fb.h
-generic-y += fcntl.h
-generic-y += ioctl.h
-generic-y += ioctls.h
-generic-y += ipc.h
-generic-y += ipcbuf.h
-generic-y += irq_regs.h
-generic-y += kdebug.h
-generic-y += local.h
-generic-y += module.h
-generic-y += msgbuf.h
-generic-y += mutex.h
-generic-y += param.h
-generic-y += parport.h
-generic-y += poll.h
-generic-y += posix_types.h
-generic-y += resource.h
-generic-y += scatterlist.h
-generic-y += sembuf.h
-generic-y += serial.h
-generic-y += shmbuf.h
-generic-y += shmparam.h
-generic-y += socket.h
-generic-y += sockios.h
-generic-y += statfs.h
-generic-y += termbits.h
-generic-y += termios.h
-generic-y += types.h
-generic-y += ucontext.h
-generic-y += xor.h
diff --git a/trunk/arch/tile/include/asm/bug.h b/trunk/arch/tile/include/asm/bug.h
new file mode 100644
index 000000000000..b12fd89e42e9
--- /dev/null
+++ b/trunk/arch/tile/include/asm/bug.h
@@ -0,0 +1 @@
+#include <asm-generic/bug.h>
diff --git a/trunk/arch/tile/include/asm/bugs.h b/trunk/arch/tile/include/asm/bugs.h
new file mode 100644
index 000000000000..61791e1ad9f5
--- /dev/null
+++ b/trunk/arch/tile/include/asm/bugs.h
@@ -0,0 +1 @@
+#include <asm-generic/bugs.h>
diff --git a/trunk/arch/tile/include/asm/cputime.h b/trunk/arch/tile/include/asm/cputime.h
new file mode 100644
index 000000000000..6d68ad7e0ea3
--- /dev/null
+++ b/trunk/arch/tile/include/asm/cputime.h
@@ -0,0 +1 @@
+#include <asm-generic/cputime.h>
diff --git a/trunk/arch/tile/include/asm/device.h b/trunk/arch/tile/include/asm/device.h
new file mode 100644
index 000000000000..f0a4c256403b
--- /dev/null
+++ b/trunk/arch/tile/include/asm/device.h
@@ -0,0 +1 @@
+#include <asm-generic/device.h>
diff --git a/trunk/arch/tile/include/asm/div64.h b/trunk/arch/tile/include/asm/div64.h
new file mode 100644
index 000000000000..6cd978cefb28
--- /dev/null
+++ b/trunk/arch/tile/include/asm/div64.h
@@ -0,0 +1 @@
+#include <asm-generic/div64.h>
diff --git a/trunk/arch/tile/include/asm/emergency-restart.h b/trunk/arch/tile/include/asm/emergency-restart.h
new file mode 100644
index 000000000000..3711bd9d50bd
--- /dev/null
+++ b/trunk/arch/tile/include/asm/emergency-restart.h
@@ -0,0 +1 @@
+#include <asm-generic/emergency-restart.h>
diff --git a/trunk/arch/tile/include/asm/errno.h b/trunk/arch/tile/include/asm/errno.h
new file mode 100644
index 000000000000..4c82b503d92f
--- /dev/null
+++ b/trunk/arch/tile/include/asm/errno.h
@@ -0,0 +1 @@
+#include <asm-generic/errno.h>
diff --git a/trunk/arch/tile/include/asm/fb.h b/trunk/arch/tile/include/asm/fb.h
new file mode 100644
index 000000000000..3a4988e8df45
--- /dev/null
+++ b/trunk/arch/tile/include/asm/fb.h
@@ -0,0 +1 @@
+#include <asm-generic/fb.h>
diff --git a/trunk/arch/tile/include/asm/fcntl.h b/trunk/arch/tile/include/asm/fcntl.h
new file mode 100644
index 000000000000..46ab12db5739
--- /dev/null
+++ b/trunk/arch/tile/include/asm/fcntl.h
@@ -0,0 +1 @@
+#include <asm-generic/fcntl.h>
diff --git a/trunk/arch/tile/include/asm/fixmap.h b/trunk/arch/tile/include/asm/fixmap.h
index c66f7933beaa..51537ff9265a 100644
--- a/trunk/arch/tile/include/asm/fixmap.h
+++ b/trunk/arch/tile/include/asm/fixmap.h
@@ -75,6 +75,12 @@ extern void __set_fixmap(enum fixed_addresses idx,
 
 #define set_fixmap(idx, phys) \
 		__set_fixmap(idx, phys, PAGE_KERNEL)
+/*
+ * Some hardware wants to get fixmapped without caching.
+ */
+#define set_fixmap_nocache(idx, phys) \
+		__set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
+
 #define clear_fixmap(idx) \
 		__set_fixmap(idx, 0, __pgprot(0))
 
diff --git a/trunk/arch/tile/include/asm/ioctl.h b/trunk/arch/tile/include/asm/ioctl.h
new file mode 100644
index 000000000000..b279fe06dfe5
--- /dev/null
+++ b/trunk/arch/tile/include/asm/ioctl.h
@@ -0,0 +1 @@
+#include <asm-generic/ioctl.h>
diff --git a/trunk/arch/tile/include/asm/ioctls.h b/trunk/arch/tile/include/asm/ioctls.h
new file mode 100644
index 000000000000..ec34c760665e
--- /dev/null
+++ b/trunk/arch/tile/include/asm/ioctls.h
@@ -0,0 +1 @@
+#include <asm-generic/ioctls.h>
diff --git a/trunk/arch/tile/include/asm/ipc.h b/trunk/arch/tile/include/asm/ipc.h
new file mode 100644
index 000000000000..a46e3d9c2a3f
--- /dev/null
+++ b/trunk/arch/tile/include/asm/ipc.h
@@ -0,0 +1 @@
+#include <asm-generic/ipc.h>
diff --git a/trunk/arch/tile/include/asm/ipcbuf.h b/trunk/arch/tile/include/asm/ipcbuf.h
new file mode 100644
index 000000000000..84c7e51cb6d0
--- /dev/null
+++ b/trunk/arch/tile/include/asm/ipcbuf.h
@@ -0,0 +1 @@
+#include <asm-generic/ipcbuf.h>
diff --git a/trunk/arch/tile/include/asm/irq_regs.h b/trunk/arch/tile/include/asm/irq_regs.h
new file mode 100644
index 000000000000..3dd9c0b70270
--- /dev/null
+++ b/trunk/arch/tile/include/asm/irq_regs.h
@@ -0,0 +1 @@
+#include <asm-generic/irq_regs.h>
diff --git a/trunk/arch/tile/include/asm/kdebug.h b/trunk/arch/tile/include/asm/kdebug.h
new file mode 100644
index 000000000000..6ece1b037665
--- /dev/null
+++ b/trunk/arch/tile/include/asm/kdebug.h
@@ -0,0 +1 @@
+#include <asm-generic/kdebug.h>
diff --git a/trunk/arch/tile/include/asm/local.h b/trunk/arch/tile/include/asm/local.h
new file mode 100644
index 000000000000..c11c530f74d0
--- /dev/null
+++ b/trunk/arch/tile/include/asm/local.h
@@ -0,0 +1 @@
+#include <asm-generic/local.h>
diff --git a/trunk/arch/tile/include/asm/module.h b/trunk/arch/tile/include/asm/module.h
new file mode 100644
index 000000000000..1e4b79fe8584
--- /dev/null
+++ b/trunk/arch/tile/include/asm/module.h
@@ -0,0 +1 @@
+#include <asm-generic/module.h>
diff --git a/trunk/arch/tile/include/asm/msgbuf.h b/trunk/arch/tile/include/asm/msgbuf.h
new file mode 100644
index 000000000000..809134c644a6
--- /dev/null
+++ b/trunk/arch/tile/include/asm/msgbuf.h
@@ -0,0 +1 @@
+#include <asm-generic/msgbuf.h>
diff --git a/trunk/arch/tile/include/asm/mutex.h b/trunk/arch/tile/include/asm/mutex.h
new file mode 100644
index 000000000000..ff6101aa2c71
--- /dev/null
+++ b/trunk/arch/tile/include/asm/mutex.h
@@ -0,0 +1 @@
+#include <asm-generic/mutex-dec.h>
diff --git a/trunk/arch/tile/include/asm/param.h b/trunk/arch/tile/include/asm/param.h
new file mode 100644
index 000000000000..965d45427975
--- /dev/null
+++ b/trunk/arch/tile/include/asm/param.h
@@ -0,0 +1 @@
+#include <asm-generic/param.h>
diff --git a/trunk/arch/tile/include/asm/parport.h b/trunk/arch/tile/include/asm/parport.h
new file mode 100644
index 000000000000..cf252af64590
--- /dev/null
+++ b/trunk/arch/tile/include/asm/parport.h
@@ -0,0 +1 @@
+#include <asm-generic/parport.h>
diff --git a/trunk/arch/tile/include/asm/poll.h b/trunk/arch/tile/include/asm/poll.h
new file mode 100644
index 000000000000..c98509d3149e
--- /dev/null
+++ b/trunk/arch/tile/include/asm/poll.h
@@ -0,0 +1 @@
+#include <asm-generic/poll.h>
diff --git a/trunk/arch/tile/include/asm/posix_types.h b/trunk/arch/tile/include/asm/posix_types.h
new file mode 100644
index 000000000000..22cae6230ceb
--- /dev/null
+++ b/trunk/arch/tile/include/asm/posix_types.h
@@ -0,0 +1 @@
+#include <asm-generic/posix_types.h>
diff --git a/trunk/arch/tile/include/asm/resource.h b/trunk/arch/tile/include/asm/resource.h
new file mode 100644
index 000000000000..04bc4db8921b
--- /dev/null
+++ b/trunk/arch/tile/include/asm/resource.h
@@ -0,0 +1 @@
+#include <asm-generic/resource.h>
diff --git a/trunk/arch/tile/include/asm/scatterlist.h b/trunk/arch/tile/include/asm/scatterlist.h
new file mode 100644
index 000000000000..35d786fe93ae
--- /dev/null
+++ b/trunk/arch/tile/include/asm/scatterlist.h
@@ -0,0 +1 @@
+#include <asm-generic/scatterlist.h>
diff --git a/trunk/arch/tile/include/asm/sembuf.h b/trunk/arch/tile/include/asm/sembuf.h
new file mode 100644
index 000000000000..7673b83cfef7
--- /dev/null
+++ b/trunk/arch/tile/include/asm/sembuf.h
@@ -0,0 +1 @@
+#include <asm-generic/sembuf.h>
diff --git a/trunk/arch/tile/include/asm/serial.h b/trunk/arch/tile/include/asm/serial.h
new file mode 100644
index 000000000000..a0cb0caff152
--- /dev/null
+++ b/trunk/arch/tile/include/asm/serial.h
@@ -0,0 +1 @@
+#include <asm-generic/serial.h>
diff --git a/trunk/arch/tile/include/asm/shmbuf.h b/trunk/arch/tile/include/asm/shmbuf.h
new file mode 100644
index 000000000000..83c05fc2de38
--- /dev/null
+++ b/trunk/arch/tile/include/asm/shmbuf.h
@@ -0,0 +1 @@
+#include <asm-generic/shmbuf.h>
diff --git a/trunk/arch/tile/include/asm/shmparam.h b/trunk/arch/tile/include/asm/shmparam.h
new file mode 100644
index 000000000000..93f30deb95d0
--- /dev/null
+++ b/trunk/arch/tile/include/asm/shmparam.h
@@ -0,0 +1 @@
+#include <asm-generic/shmparam.h>
diff --git a/trunk/arch/tile/include/asm/socket.h b/trunk/arch/tile/include/asm/socket.h
new file mode 100644
index 000000000000..6b71384b9d8b
--- /dev/null
+++ b/trunk/arch/tile/include/asm/socket.h
@@ -0,0 +1 @@
+#include <asm-generic/socket.h>
diff --git a/trunk/arch/tile/include/asm/sockios.h b/trunk/arch/tile/include/asm/sockios.h
new file mode 100644
index 000000000000..def6d4746ee7
--- /dev/null
+++ b/trunk/arch/tile/include/asm/sockios.h
@@ -0,0 +1 @@
+#include <asm-generic/sockios.h>
diff --git a/trunk/arch/tile/include/asm/statfs.h b/trunk/arch/tile/include/asm/statfs.h
new file mode 100644
index 000000000000..0b91fe198c20
--- /dev/null
+++ b/trunk/arch/tile/include/asm/statfs.h
@@ -0,0 +1 @@
+#include <asm-generic/statfs.h>
diff --git a/trunk/arch/tile/include/asm/termbits.h b/trunk/arch/tile/include/asm/termbits.h
new file mode 100644
index 000000000000..3935b106de79
--- /dev/null
+++ b/trunk/arch/tile/include/asm/termbits.h
@@ -0,0 +1 @@
+#include <asm-generic/termbits.h>
diff --git a/trunk/arch/tile/include/asm/termios.h b/trunk/arch/tile/include/asm/termios.h
new file mode 100644
index 000000000000..280d78a9d966
--- /dev/null
+++ b/trunk/arch/tile/include/asm/termios.h
@@ -0,0 +1 @@
+#include <asm-generic/termios.h>
diff --git a/trunk/arch/tile/include/asm/types.h b/trunk/arch/tile/include/asm/types.h
new file mode 100644
index 000000000000..b9e79bc580dd
--- /dev/null
+++ b/trunk/arch/tile/include/asm/types.h
@@ -0,0 +1 @@
+#include <asm-generic/types.h>
diff --git a/trunk/arch/tile/include/asm/ucontext.h b/trunk/arch/tile/include/asm/ucontext.h
new file mode 100644
index 000000000000..9bc07b9f30fb
--- /dev/null
+++ b/trunk/arch/tile/include/asm/ucontext.h
@@ -0,0 +1 @@
+#include <asm-generic/ucontext.h>
diff --git a/trunk/arch/tile/include/asm/xor.h b/trunk/arch/tile/include/asm/xor.h
new file mode 100644
index 000000000000..c82eb12a5b18
--- /dev/null
+++ b/trunk/arch/tile/include/asm/xor.h
@@ -0,0 +1 @@
+#include <asm-generic/xor.h>
diff --git a/trunk/arch/tile/include/hv/drv_srom_intf.h b/trunk/arch/tile/include/hv/drv_srom_intf.h
deleted file mode 100644
index 6395faa6d9e6..000000000000
--- a/trunk/arch/tile/include/hv/drv_srom_intf.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright 2011 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-/**
- * @file drv_srom_intf.h
- * Interface definitions for the SPI Flash ROM driver.
- */
-
-#ifndef _SYS_HV_INCLUDE_DRV_SROM_INTF_H
-#define _SYS_HV_INCLUDE_DRV_SROM_INTF_H
-
-/** Read this offset to get the total device size. */
-#define SROM_TOTAL_SIZE_OFF   0xF0000000
-
-/** Read this offset to get the device sector size. */
-#define SROM_SECTOR_SIZE_OFF  0xF0000004
-
-/** Read this offset to get the device page size. */
-#define SROM_PAGE_SIZE_OFF    0xF0000008
-
-/** Write this offset to flush any pending writes. */
-#define SROM_FLUSH_OFF        0xF1000000
-
-/** Write this offset, plus the byte offset of the start of a sector, to
- *  erase a sector.  Any write data is ignored, but there must be at least
- *  one byte of write data.  Only applies when the driver is in MTD mode.
- */
-#define SROM_ERASE_OFF        0xF2000000
-
-#endif /* _SYS_HV_INCLUDE_DRV_SROM_INTF_H */
diff --git a/trunk/arch/tile/kernel/time.c b/trunk/arch/tile/kernel/time.c
index f6f50f2a5e37..c4be58cc5d50 100644
--- a/trunk/arch/tile/kernel/time.c
+++ b/trunk/arch/tile/kernel/time.c
@@ -78,6 +78,7 @@ static struct clocksource cycle_counter_cs = {
 	.rating = 300,
 	.read = clocksource_get_cycles,
 	.mask = CLOCKSOURCE_MASK(64),
+	.shift = 22,   /* typical value, e.g. x86 tsc uses this */
 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -90,6 +91,8 @@ void __init setup_clock(void)
 	cycles_per_sec = hv_sysconf(HV_SYSCONF_CPU_SPEED);
 	sched_clock_mult =
 		clocksource_hz2mult(cycles_per_sec, SCHED_CLOCK_SHIFT);
+	cycle_counter_cs.mult =
+		clocksource_hz2mult(cycles_per_sec, cycle_counter_cs.shift);
 }
 
 void __init calibrate_delay(void)
@@ -104,7 +107,7 @@ void __init calibrate_delay(void)
 void __init time_init(void)
 {
 	/* Initialize and register the clock source. */
-	clocksource_register_hz(&cycle_counter_cs, cycles_per_sec);
+	clocksource_register(&cycle_counter_cs);
 
 	/* Start up the tile-timer interrupt source on the boot cpu. */
 	setup_tile_timer();
diff --git a/trunk/arch/tile/mm/init.c b/trunk/arch/tile/mm/init.c
index 7309988c9794..4e10c4023028 100644
--- a/trunk/arch/tile/mm/init.c
+++ b/trunk/arch/tile/mm/init.c
@@ -836,7 +836,8 @@ void __init mem_init(void)
 #endif
 
 #ifdef CONFIG_FLATMEM
-	BUG_ON(!mem_map);
+	if (!mem_map)
+		BUG();
 #endif
 
 #ifdef CONFIG_HIGHMEM
diff --git a/trunk/arch/x86/Kconfig b/trunk/arch/x86/Kconfig
index 6a47bb22657f..7cf916fc1ce7 100644
--- a/trunk/arch/x86/Kconfig
+++ b/trunk/arch/x86/Kconfig
@@ -72,7 +72,6 @@ config X86
 	select USE_GENERIC_SMP_HELPERS if SMP
 	select HAVE_BPF_JIT if (X86_64 && NET)
 	select CLKEVT_I8253
-	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 
 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS)
diff --git a/trunk/arch/x86/include/asm/io.h b/trunk/arch/x86/include/asm/io.h
index d8e8eefbe24c..d02804d650c4 100644
--- a/trunk/arch/x86/include/asm/io.h
+++ b/trunk/arch/x86/include/asm/io.h
@@ -40,6 +40,8 @@
 #include <linux/compiler.h>
 #include <asm/page.h>
 
+#include <xen/xen.h>
+
 #define build_mmio_read(name, size, type, reg, barrier) \
 static inline type name(const volatile void __iomem *addr) \
 { type ret; asm volatile("mov" size " %1,%0":reg (ret) \
@@ -332,7 +334,6 @@ extern void fixup_early_ioremap(void);
 extern bool is_early_ioremap_ptep(pte_t *ptep);
 
 #ifdef CONFIG_XEN
-#include <xen/xen.h>
 struct bio_vec;
 
 extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
diff --git a/trunk/arch/x86/include/asm/processor.h b/trunk/arch/x86/include/asm/processor.h
index 0d1171c97729..219371546afd 100644
--- a/trunk/arch/x86/include/asm/processor.h
+++ b/trunk/arch/x86/include/asm/processor.h
@@ -751,6 +751,8 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
 		     :: "a" (eax), "c" (ecx));
 }
 
+extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
+
 extern void select_idle_routine(const struct cpuinfo_x86 *c);
 extern void init_amd_e400_c1e_mask(void);
 
diff --git a/trunk/arch/x86/kernel/acpi/cstate.c b/trunk/arch/x86/kernel/acpi/cstate.c
index f50e7fb2a201..5812404a0d4c 100644
--- a/trunk/arch/x86/kernel/acpi/cstate.c
+++ b/trunk/arch/x86/kernel/acpi/cstate.c
@@ -149,29 +149,6 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
 }
 EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe);
 
-/*
- * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
- * which can obviate IPI to trigger checking of need_resched.
- * We execute MONITOR against need_resched and enter optimized wait state
- * through MWAIT. Whenever someone changes need_resched, we would be woken
- * up from MWAIT (without an IPI).
- *
- * New with Core Duo processors, MWAIT can take some hints based on CPU
- * capability.
- */
-void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
-{
-	if (!need_resched()) {
-		if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
-			clflush((void *)&current_thread_info()->flags);
-
-		__monitor((void *)&current_thread_info()->flags, 0, 0);
-		smp_mb();
-		if (!need_resched())
-			__mwait(ax, cx);
-	}
-}
-
 void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx)
 {
 	unsigned int cpu = smp_processor_id();
diff --git a/trunk/arch/x86/kernel/process.c b/trunk/arch/x86/kernel/process.c
index e7e3b019c439..e1ba8cb24e4e 100644
--- a/trunk/arch/x86/kernel/process.c
+++ b/trunk/arch/x86/kernel/process.c
@@ -438,6 +438,29 @@ void cpu_idle_wait(void)
 }
 EXPORT_SYMBOL_GPL(cpu_idle_wait);
 
+/*
+ * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
+ * which can obviate IPI to trigger checking of need_resched.
+ * We execute MONITOR against need_resched and enter optimized wait state
+ * through MWAIT. Whenever someone changes need_resched, we would be woken
+ * up from MWAIT (without an IPI).
+ *
+ * New with Core Duo processors, MWAIT can take some hints based on CPU
+ * capability.
+ */
+void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
+{
+	if (!need_resched()) {
+		if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
+			clflush((void *)&current_thread_info()->flags);
+
+		__monitor((void *)&current_thread_info()->flags, 0, 0);
+		smp_mb();
+		if (!need_resched())
+			__mwait(ax, cx);
+	}
+}
+
 /* Default MONITOR/MWAIT with no hints, used for default C1 state */
 static void mwait_idle(void)
 {
diff --git a/trunk/arch/x86/kernel/process_32.c b/trunk/arch/x86/kernel/process_32.c
index 7a3b65107a27..a3d0dc59067b 100644
--- a/trunk/arch/x86/kernel/process_32.c
+++ b/trunk/arch/x86/kernel/process_32.c
@@ -38,7 +38,6 @@
 #include <linux/uaccess.h>
 #include <linux/io.h>
 #include <linux/kdebug.h>
-#include <linux/cpuidle.h>
 
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -110,8 +109,7 @@ void cpu_idle(void)
 			local_irq_disable();
 			/* Don't trace irqs off for idle */
 			stop_critical_timings();
-			if (cpuidle_idle_call())
-				pm_idle();
+			pm_idle();
 			start_critical_timings();
 		}
 		tick_nohz_restart_sched_tick();
diff --git a/trunk/arch/x86/kernel/process_64.c b/trunk/arch/x86/kernel/process_64.c
index f693e44e1bf6..ca6f7ab8df33 100644
--- a/trunk/arch/x86/kernel/process_64.c
+++ b/trunk/arch/x86/kernel/process_64.c
@@ -37,7 +37,6 @@
 #include <linux/uaccess.h>
 #include <linux/io.h>
 #include <linux/ftrace.h>
-#include <linux/cpuidle.h>
 
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -137,8 +136,7 @@ void cpu_idle(void)
 			enter_idle();
 			/* Don't trace irqs off for idle */
 			stop_critical_timings();
-			if (cpuidle_idle_call())
-				pm_idle();
+			pm_idle();
 			start_critical_timings();
 
 			/* In many cases the interrupt that ended idle
diff --git a/trunk/arch/x86/platform/mrst/Makefile b/trunk/arch/x86/platform/mrst/Makefile
index 1ea38775a6d3..f61ccdd49341 100644
--- a/trunk/arch/x86/platform/mrst/Makefile
+++ b/trunk/arch/x86/platform/mrst/Makefile
@@ -1,4 +1,3 @@
 obj-$(CONFIG_X86_MRST)		+= mrst.o
 obj-$(CONFIG_X86_MRST)		+= vrtc.o
 obj-$(CONFIG_EARLY_PRINTK_MRST)	+= early_printk_mrst.o
-obj-$(CONFIG_X86_MRST)		+= pmu.o
diff --git a/trunk/arch/x86/platform/mrst/pmu.c b/trunk/arch/x86/platform/mrst/pmu.c
deleted file mode 100644
index 9281da7d91bd..000000000000
--- a/trunk/arch/x86/platform/mrst/pmu.c
+++ /dev/null
@@ -1,817 +0,0 @@
-/*
- * mrst/pmu.c - driver for MRST Power Management Unit
- *
- * Copyright (c) 2011, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <linux/cpuidle.h>
-#include <linux/debugfs.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/seq_file.h>
-#include <linux/sfi.h>
-#include <asm/intel_scu_ipc.h>
-#include "pmu.h"
-
-#define IPCMSG_FW_REVISION	0xF4
-
-struct mrst_device {
-	u16 pci_dev_num;	/* DEBUG only */
-	u16 lss;
-	u16 latest_request;
-	unsigned int pci_state_counts[PCI_D3cold + 1]; /* DEBUG only */
-};
-
-/*
- * comlete list of MRST PCI devices
- */
-static struct mrst_device mrst_devs[] = {
-/*  0 */ { 0x0800, LSS_SPI0 },		/* Moorestown SPI Ctrl 0 */
-/*  1 */ { 0x0801, LSS_SPI1 },		/* Moorestown SPI Ctrl 1 */
-/*  2 */ { 0x0802, LSS_I2C0 },		/* Moorestown I2C 0 */
-/*  3 */ { 0x0803, LSS_I2C1 },		/* Moorestown I2C 1 */
-/*  4 */ { 0x0804, LSS_I2C2 },		/* Moorestown I2C 2 */
-/*  5 */ { 0x0805, LSS_KBD },		/* Moorestown Keyboard Ctrl */
-/*  6 */ { 0x0806, LSS_USB_HC },	/* Moorestown USB Ctrl */
-/*  7 */ { 0x0807, LSS_SD_HC0 },	/* Moorestown SD Host Ctrl 0 */
-/*  8 */ { 0x0808, LSS_SD_HC1 },	/* Moorestown SD Host Ctrl 1 */
-/*  9 */ { 0x0809, LSS_NAND },		/* Moorestown NAND Ctrl */
-/* 10 */ { 0x080a, LSS_AUDIO },		/* Moorestown Audio Ctrl */
-/* 11 */ { 0x080b, LSS_IMAGING },	/* Moorestown ISP */
-/* 12 */ { 0x080c, LSS_SECURITY },	/* Moorestown Security Controller */
-/* 13 */ { 0x080d, LSS_DISPLAY },	/* Moorestown External Displays */
-/* 14 */ { 0x080e, 0 },			/* Moorestown SCU IPC */
-/* 15 */ { 0x080f, LSS_GPIO },		/* Moorestown GPIO Controller */
-/* 16 */ { 0x0810, 0 },			/* Moorestown Power Management Unit */
-/* 17 */ { 0x0811, LSS_USB_OTG },	/* Moorestown OTG Ctrl */
-/* 18 */ { 0x0812, LSS_SPI2 },		/* Moorestown SPI Ctrl 2 */
-/* 19 */ { 0x0813, 0 },			/* Moorestown SC DMA */
-/* 20 */ { 0x0814, LSS_AUDIO_LPE },	/* Moorestown LPE DMA */
-/* 21 */ { 0x0815, LSS_AUDIO_SSP },	/* Moorestown SSP0 */
-
-/* 22 */ { 0x084F, LSS_SD_HC2 },	/* Moorestown SD Host Ctrl 2 */
-
-/* 23 */ { 0x4102, 0 },			/* Lincroft */
-/* 24 */ { 0x4110, 0 },			/* Lincroft */
-};
-
-/* n.b. We ignore PCI-id 0x815 in LSS9 b/c MeeGo has no driver for it */
-static u16 mrst_lss9_pci_ids[] = {0x080a, 0x0814, 0};
-static u16 mrst_lss10_pci_ids[] = {0x0800, 0x0801, 0x0802, 0x0803,
-					0x0804, 0x0805, 0x080f, 0};
-
-/* handle concurrent SMP invokations of pmu_pci_set_power_state() */
-static spinlock_t mrst_pmu_power_state_lock;
-
-static unsigned int wake_counters[MRST_NUM_LSS];	/* DEBUG only */
-static unsigned int pmu_irq_stats[INT_INVALID + 1];	/* DEBUG only */
-
-static int graphics_is_off;
-static int lss_s0i3_enabled;
-static bool mrst_pmu_s0i3_enable;
-
-/*  debug counters */
-static u32 pmu_wait_ready_calls;
-static u32 pmu_wait_ready_udelays;
-static u32 pmu_wait_ready_udelays_max;
-static u32 pmu_wait_done_calls;
-static u32 pmu_wait_done_udelays;
-static u32 pmu_wait_done_udelays_max;
-static u32 pmu_set_power_state_entry;
-static u32 pmu_set_power_state_send_cmd;
-
-static struct mrst_device *pci_id_2_mrst_dev(u16 pci_dev_num)
-{
-	int index = 0;
-
-	if ((pci_dev_num >= 0x0800) && (pci_dev_num <= 0x815))
-		index = pci_dev_num - 0x800;
-	else if (pci_dev_num == 0x084F)
-		index = 22;
-	else if (pci_dev_num == 0x4102)
-		index = 23;
-	else if (pci_dev_num == 0x4110)
-		index = 24;
-
-	if (pci_dev_num != mrst_devs[index].pci_dev_num) {
-		WARN_ONCE(1, FW_BUG "Unknown PCI device 0x%04X\n", pci_dev_num);
-		return 0;
-	}
-
-	return &mrst_devs[index];
-}
-
-/**
- * mrst_pmu_validate_cstates
- * @dev: cpuidle_device
- *
- * Certain states are not appropriate for governor to pick in some cases.
- * This function will be called as cpuidle_device's prepare callback and
- * thus tells governor to ignore such states when selecting the next state
- * to enter.
- */
-
-#define IDLE_STATE4_IS_C6	4
-#define IDLE_STATE5_IS_S0I3	5
-
-int mrst_pmu_invalid_cstates(void)
-{
-	int cpu = smp_processor_id();
-
-	/*
-	 * Demote to C4 if the PMU is busy.
-	 * Since LSS changes leave the busy bit clear...
-	 * busy means either the PMU is waiting for an ACK-C6 that
-	 * isn't coming due to an MWAIT that returned immediately;
-	 * or we returned from S0i3 successfully, and the PMU
-	 * is not done sending us interrupts.
-	 */
-	if (pmu_read_busy_status())
-		return 1 << IDLE_STATE4_IS_C6 | 1 << IDLE_STATE5_IS_S0I3;
-
-	/*
-	 * Disallow S0i3 if: PMU is not initialized, or CPU1 is active,
-	 * or if device LSS is insufficient, or the GPU is active,
-	 * or if it has been explicitly disabled.
-	 */
-	if (!pmu_reg || !cpumask_equal(cpu_online_mask, cpumask_of(cpu)) ||
-	    !lss_s0i3_enabled || !graphics_is_off || !mrst_pmu_s0i3_enable)
-		return 1 << IDLE_STATE5_IS_S0I3;
-	else
-		return 0;
-}
-
-/*
- * pmu_update_wake_counters(): read PM_WKS, update wake_counters[]
- * DEBUG only.
- */
-static void pmu_update_wake_counters(void)
-{
-	int lss;
-	u32 wake_status;
-
-	wake_status = pmu_read_wks();
-
-	for (lss = 0; lss < MRST_NUM_LSS; ++lss) {
-		if (wake_status & (1 << lss))
-			wake_counters[lss]++;
-	}
-}
-
-int mrst_pmu_s0i3_entry(void)
-{
-	int status;
-
-	/* Clear any possible error conditions */
-	pmu_write_ics(0x300);
-
-	/* set wake control to current D-states */
-	pmu_write_wssc(S0I3_SSS_TARGET);
-
-	status = mrst_s0i3_entry(PM_S0I3_COMMAND, &pmu_reg->pm_cmd);
-	pmu_update_wake_counters();
-	return status;
-}
-
-/* poll for maximum of 5ms for busy bit to clear */
-static int pmu_wait_ready(void)
-{
-	int udelays;
-
-	pmu_wait_ready_calls++;
-
-	for (udelays = 0; udelays < 500; ++udelays) {
-		if (udelays > pmu_wait_ready_udelays_max)
-			pmu_wait_ready_udelays_max = udelays;
-
-		if (pmu_read_busy_status() == 0)
-			return 0;
-
-		udelay(10);
-		pmu_wait_ready_udelays++;
-	}
-
-	/*
-	 * if this fires, observe
-	 * /sys/kernel/debug/mrst_pmu_wait_ready_calls
-	 * /sys/kernel/debug/mrst_pmu_wait_ready_udelays
-	 */
-	WARN_ONCE(1, "SCU not ready for 5ms");
-	return -EBUSY;
-}
-/* poll for maximum of 50ms us for busy bit to clear */
-static int pmu_wait_done(void)
-{
-	int udelays;
-
-	pmu_wait_done_calls++;
-
-	for (udelays = 0; udelays < 500; ++udelays) {
-		if (udelays > pmu_wait_done_udelays_max)
-			pmu_wait_done_udelays_max = udelays;
-
-		if (pmu_read_busy_status() == 0)
-			return 0;
-
-		udelay(100);
-		pmu_wait_done_udelays++;
-	}
-
-	/*
-	 * if this fires, observe
-	 * /sys/kernel/debug/mrst_pmu_wait_done_calls
-	 * /sys/kernel/debug/mrst_pmu_wait_done_udelays
-	 */
-	WARN_ONCE(1, "SCU not done for 50ms");
-	return -EBUSY;
-}
-
-u32 mrst_pmu_msi_is_disabled(void)
-{
-	return pmu_msi_is_disabled();
-}
-
-void mrst_pmu_enable_msi(void)
-{
-	pmu_msi_enable();
-}
-
-/**
- * pmu_irq - pmu driver interrupt handler
- * Context: interrupt context
- */
-static irqreturn_t pmu_irq(int irq, void *dummy)
-{
-	union pmu_pm_ics pmu_ics;
-
-	pmu_ics.value = pmu_read_ics();
-
-	if (!pmu_ics.bits.pending)
-		return IRQ_NONE;
-
-	switch (pmu_ics.bits.cause) {
-	case INT_SPURIOUS:
-	case INT_CMD_DONE:
-	case INT_CMD_ERR:
-	case INT_WAKE_RX:
-	case INT_SS_ERROR:
-	case INT_S0IX_MISS:
-	case INT_NO_ACKC6:
-		pmu_irq_stats[pmu_ics.bits.cause]++;
-		break;
-	default:
-		pmu_irq_stats[INT_INVALID]++;
-	}
-
-	pmu_write_ics(pmu_ics.value); /* Clear pending interrupt */
-
-	return IRQ_HANDLED;
-}
-
-/*
- * Translate PCI power management to MRST LSS D-states
- */
-static int pci_2_mrst_state(int lss, pci_power_t pci_state)
-{
-	switch (pci_state) {
-	case PCI_D0:
-		if (SSMSK(D0i1, lss) & D0I1_ACG_SSS_TARGET)
-			return D0i1;
-		else
-			return D0;
-	case PCI_D1:
-		return D0i1;
-	case PCI_D2:
-		return D0i2;
-	case PCI_D3hot:
-	case PCI_D3cold:
-		return D0i3;
-	default:
-		WARN(1, "pci_state %d\n", pci_state);
-		return 0;
-	}
-}
-
-static int pmu_issue_command(u32 pm_ssc)
-{
-	union pmu_pm_set_cfg_cmd_t command;
-
-	if (pmu_read_busy_status()) {
-		pr_debug("pmu is busy, Operation not permitted\n");
-		return -1;
-	}
-
-	/*
-	 * enable interrupts in PMU so that interrupts are
-	 * propagated when ioc bit for a particular set
-	 * command is set
-	 */
-
-	pmu_irq_enable();
-
-	/* Configure the sub systems for pmu2 */
-
-	pmu_write_ssc(pm_ssc);
-
-	/*
-	 * Send the set config command for pmu its configured
-	 * for mode CM_IMMEDIATE & hence with No Trigger
-	 */
-
-	command.pmu2_params.d_param.cfg_mode = CM_IMMEDIATE;
-	command.pmu2_params.d_param.cfg_delay = 0;
-	command.pmu2_params.d_param.rsvd = 0;
-
-	/* construct the command to send SET_CFG to particular PMU */
-	command.pmu2_params.d_param.cmd = SET_CFG_CMD;
-	command.pmu2_params.d_param.ioc = 0;
-	command.pmu2_params.d_param.mode_id = 0;
-	command.pmu2_params.d_param.sys_state = SYS_STATE_S0I0;
-
-	/* write the value of PM_CMD into particular PMU */
-	pr_debug("pmu command being written %x\n",
-			command.pmu_pm_set_cfg_cmd_value);
-
-	pmu_write_cmd(command.pmu_pm_set_cfg_cmd_value);
-
-	return 0;
-}
-
-static u16 pmu_min_lss_pci_req(u16 *ids, u16 pci_state)
-{
-	u16 existing_request;
-	int i;
-
-	for (i = 0; ids[i]; ++i) {
-		struct mrst_device *mrst_dev;
-
-		mrst_dev = pci_id_2_mrst_dev(ids[i]);
-		if (unlikely(!mrst_dev))
-			continue;
-
-		existing_request = mrst_dev->latest_request;
-		if (existing_request < pci_state)
-			pci_state = existing_request;
-	}
-	return pci_state;
-}
-
-/**
- * pmu_pci_set_power_state - Callback function is used by all the PCI devices
- *			for a platform  specific device power on/shutdown.
- */
-
-int pmu_pci_set_power_state(struct pci_dev *pdev, pci_power_t pci_state)
-{
-	u32 old_sss, new_sss;
-	int status = 0;
-	struct mrst_device *mrst_dev;
-
-	pmu_set_power_state_entry++;
-
-	BUG_ON(pdev->vendor != PCI_VENDOR_ID_INTEL);
-	BUG_ON(pci_state < PCI_D0 || pci_state > PCI_D3cold);
-
-	mrst_dev = pci_id_2_mrst_dev(pdev->device);
-	if (unlikely(!mrst_dev))
-		return -ENODEV;
-
-	mrst_dev->pci_state_counts[pci_state]++;	/* count invocations */
-
-	/* PMU driver calls self as part of PCI initialization, ignore */
-	if (pdev->device == PCI_DEV_ID_MRST_PMU)
-		return 0;
-
-	BUG_ON(!pmu_reg); /* SW bug if called before initialized */
-
-	spin_lock(&mrst_pmu_power_state_lock);
-
-	if (pdev->d3_delay) {
-		dev_dbg(&pdev->dev, "d3_delay %d, should be 0\n",
-			pdev->d3_delay);
-		pdev->d3_delay = 0;
-	}
-	/*
-	 * If Lincroft graphics, simply remember state
-	 */
-	if ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY
-		&& !((pdev->class & PCI_SUB_CLASS_MASK) >> 8)) {
-		if (pci_state == PCI_D0)
-			graphics_is_off = 0;
-		else
-			graphics_is_off = 1;
-		goto ret;
-	}
-
-	if (!mrst_dev->lss)
-		goto ret;	/* device with no LSS */
-
-	if (mrst_dev->latest_request == pci_state)
-		goto ret;	/* no change */
-
-	mrst_dev->latest_request = pci_state;	/* record latest request */
-
-	/*
-	 * LSS9 and LSS10 contain multiple PCI devices.
-	 * Use the lowest numbered (highest power) state in the LSS
-	 */
-	if (mrst_dev->lss == 9)
-		pci_state = pmu_min_lss_pci_req(mrst_lss9_pci_ids, pci_state);
-	else if (mrst_dev->lss == 10)
-		pci_state = pmu_min_lss_pci_req(mrst_lss10_pci_ids, pci_state);
-
-	status = pmu_wait_ready();
-	if (status)
-		goto ret;
-
-	old_sss = pmu_read_sss();
-	new_sss = old_sss & ~SSMSK(3, mrst_dev->lss);
-	new_sss |= SSMSK(pci_2_mrst_state(mrst_dev->lss, pci_state),
-			mrst_dev->lss);
-
-	if (new_sss == old_sss)
-		goto ret;	/* nothing to do */
-
-	pmu_set_power_state_send_cmd++;
-
-	status = pmu_issue_command(new_sss);
-
-	if (unlikely(status != 0)) {
-		dev_err(&pdev->dev, "Failed to Issue a PM command\n");
-		goto ret;
-	}
-
-	if (pmu_wait_done())
-		goto ret;
-
-	lss_s0i3_enabled =
-	((pmu_read_sss() & S0I3_SSS_TARGET) == S0I3_SSS_TARGET);
-ret:
-	spin_unlock(&mrst_pmu_power_state_lock);
-	return status;
-}
-
-#ifdef CONFIG_DEBUG_FS
-static char *d0ix_names[] = {"D0", "D0i1", "D0i2", "D0i3"};
-
-static inline const char *d0ix_name(int state)
-{
-	return d0ix_names[(int) state];
-}
-
-static int debug_mrst_pmu_show(struct seq_file *s, void *unused)
-{
-	struct pci_dev *pdev = NULL;
-	u32 cur_pmsss;
-	int lss;
-
-	seq_printf(s, "0x%08X D0I1_ACG_SSS_TARGET\n", D0I1_ACG_SSS_TARGET);
-
-	cur_pmsss = pmu_read_sss();
-
-	seq_printf(s, "0x%08X S0I3_SSS_TARGET\n", S0I3_SSS_TARGET);
-
-	seq_printf(s, "0x%08X Current SSS ", cur_pmsss);
-	seq_printf(s, lss_s0i3_enabled ? "\n" : "[BLOCKS s0i3]\n");
-
-	if (cpumask_equal(cpu_online_mask, cpumask_of(0)))
-		seq_printf(s, "cpu0 is only cpu online\n");
-	else
-		seq_printf(s, "cpu0 is NOT only cpu online [BLOCKS S0i3]\n");
-
-	seq_printf(s, "GFX: %s\n", graphics_is_off ? "" : "[BLOCKS s0i3]");
-
-
-	for_each_pci_dev(pdev) {
-		int pos;
-		u16 pmcsr;
-		struct mrst_device *mrst_dev;
-		int i;
-
-		mrst_dev = pci_id_2_mrst_dev(pdev->device);
-
-		seq_printf(s, "%s %04x/%04X %-16.16s ",
-			dev_name(&pdev->dev),
-			pdev->vendor, pdev->device,
-			dev_driver_string(&pdev->dev));
-
-		if (unlikely (!mrst_dev)) {
-			seq_printf(s, " UNKNOWN\n");
-			continue;
-		}
-
-		if (mrst_dev->lss)
-			seq_printf(s, "LSS %2d %-4s ", mrst_dev->lss,
-				d0ix_name(((cur_pmsss >>
-					(mrst_dev->lss * 2)) & 0x3)));
-		else
-			seq_printf(s, "            ");
-
-		/* PCI PM config space setting */
-		pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
-		if (pos != 0) {
-			pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
-		seq_printf(s, "PCI-%-4s",
-			pci_power_name(pmcsr & PCI_PM_CTRL_STATE_MASK));
-		} else {
-			seq_printf(s, "        ");
-		}
-
-		seq_printf(s, " %s ", pci_power_name(mrst_dev->latest_request));
-		for (i = 0; i <= PCI_D3cold; ++i)
-			seq_printf(s, "%d ", mrst_dev->pci_state_counts[i]);
-
-		if (mrst_dev->lss) {
-			unsigned int lssmask;
-
-			lssmask = SSMSK(D0i3, mrst_dev->lss);
-
-			if ((lssmask & S0I3_SSS_TARGET) &&
-				((lssmask & cur_pmsss) !=
-					(lssmask & S0I3_SSS_TARGET)))
-						seq_printf(s , "[BLOCKS s0i3]");
-		}
-
-		seq_printf(s, "\n");
-	}
-	seq_printf(s, "Wake Counters:\n");
-	for (lss = 0; lss < MRST_NUM_LSS; ++lss)
-		seq_printf(s, "LSS%d %d\n", lss, wake_counters[lss]);
-
-	seq_printf(s, "Interrupt Counters:\n");
-	seq_printf(s,
-		"INT_SPURIOUS \t%8u\n" "INT_CMD_DONE \t%8u\n"
-		"INT_CMD_ERR  \t%8u\n" "INT_WAKE_RX  \t%8u\n"
-		"INT_SS_ERROR \t%8u\n" "INT_S0IX_MISS\t%8u\n"
-		"INT_NO_ACKC6 \t%8u\n" "INT_INVALID  \t%8u\n",
-		pmu_irq_stats[INT_SPURIOUS], pmu_irq_stats[INT_CMD_DONE],
-		pmu_irq_stats[INT_CMD_ERR], pmu_irq_stats[INT_WAKE_RX],
-		pmu_irq_stats[INT_SS_ERROR], pmu_irq_stats[INT_S0IX_MISS],
-		pmu_irq_stats[INT_NO_ACKC6], pmu_irq_stats[INT_INVALID]);
-
-	seq_printf(s, "mrst_pmu_wait_ready_calls          %8d\n",
-			pmu_wait_ready_calls);
-	seq_printf(s, "mrst_pmu_wait_ready_udelays        %8d\n",
-			pmu_wait_ready_udelays);
-	seq_printf(s, "mrst_pmu_wait_ready_udelays_max    %8d\n",
-			pmu_wait_ready_udelays_max);
-	seq_printf(s, "mrst_pmu_wait_done_calls           %8d\n",
-			pmu_wait_done_calls);
-	seq_printf(s, "mrst_pmu_wait_done_udelays         %8d\n",
-			pmu_wait_done_udelays);
-	seq_printf(s, "mrst_pmu_wait_done_udelays_max     %8d\n",
-			pmu_wait_done_udelays_max);
-	seq_printf(s, "mrst_pmu_set_power_state_entry     %8d\n",
-			pmu_set_power_state_entry);
-	seq_printf(s, "mrst_pmu_set_power_state_send_cmd  %8d\n",
-			pmu_set_power_state_send_cmd);
-	seq_printf(s, "SCU busy: %d\n", pmu_read_busy_status());
-
-	return 0;
-}
-
-static int debug_mrst_pmu_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, debug_mrst_pmu_show, NULL);
-}
-
-static const struct file_operations devices_state_operations = {
-	.open		= debug_mrst_pmu_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-#endif	/* DEBUG_FS */
-
-/*
- * Validate SCU PCI shim PCI vendor capability byte
- * against LSS hard-coded in mrst_devs[] above.
- * DEBUG only.
- */
-static void pmu_scu_firmware_debug(void)
-{
-	struct pci_dev *pdev = NULL;
-
-	for_each_pci_dev(pdev) {
-		struct mrst_device *mrst_dev;
-		u8 pci_config_lss;
-		int pos;
-
-		mrst_dev = pci_id_2_mrst_dev(pdev->device);
-		if (unlikely(!mrst_dev)) {
-			printk(KERN_ERR FW_BUG "pmu: Unknown "
-				"PCI device 0x%04X\n", pdev->device);
-			continue;
-		}
-
-		if (mrst_dev->lss == 0)
-			continue;	 /* no LSS in our table */
-
-		pos = pci_find_capability(pdev, PCI_CAP_ID_VNDR);
-		if (!pos != 0) {
-			printk(KERN_ERR FW_BUG "pmu: 0x%04X "
-				"missing PCI Vendor Capability\n",
-				pdev->device);
-			continue;
-		}
-		pci_read_config_byte(pdev, pos + 4, &pci_config_lss);
-		if (!(pci_config_lss & PCI_VENDOR_CAP_LOG_SS_MASK)) {
-			printk(KERN_ERR FW_BUG "pmu: 0x%04X "
-				"invalid PCI Vendor Capability 0x%x "
-				" expected LSS 0x%X\n",
-				pdev->device, pci_config_lss, mrst_dev->lss);
-			continue;
-		}
-		pci_config_lss &= PCI_VENDOR_CAP_LOG_ID_MASK;
-
-		if (mrst_dev->lss == pci_config_lss)
-			continue;
-
-		printk(KERN_ERR FW_BUG "pmu: 0x%04X LSS = %d, expected %d\n",
-			pdev->device, pci_config_lss, mrst_dev->lss);
-	}
-}
-
-/**
- * pmu_probe
- */
-static int __devinit pmu_probe(struct pci_dev *pdev,
-				   const struct pci_device_id *pci_id)
-{
-	int ret;
-	struct mrst_pmu_reg *pmu;
-
-	/* Init the device */
-	ret = pci_enable_device(pdev);
-	if (ret) {
-		dev_err(&pdev->dev, "Unable to Enable PCI device\n");
-		return ret;
-	}
-
-	ret = pci_request_regions(pdev, MRST_PMU_DRV_NAME);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "Cannot obtain PCI resources, aborting\n");
-		goto out_err1;
-	}
-
-	/* Map the memory of PMU reg base */
-	pmu = pci_iomap(pdev, 0, 0);
-	if (!pmu) {
-		dev_err(&pdev->dev, "Unable to map the PMU address space\n");
-		ret = -ENOMEM;
-		goto out_err2;
-	}
-
-#ifdef CONFIG_DEBUG_FS
-	/* /sys/kernel/debug/mrst_pmu */
-	(void) debugfs_create_file("mrst_pmu", S_IFREG | S_IRUGO,
-				NULL, NULL, &devices_state_operations);
-#endif
-	pmu_reg = pmu;	/* success */
-
-	if (request_irq(pdev->irq, pmu_irq, 0, MRST_PMU_DRV_NAME, NULL)) {
-		dev_err(&pdev->dev, "Registering isr has failed\n");
-		ret = -1;
-		goto out_err3;
-	}
-
-	pmu_scu_firmware_debug();
-
-	pmu_write_wkc(S0I3_WAKE_SOURCES);	/* Enable S0i3 wakeup sources */
-
-	pmu_wait_ready();
-
-	pmu_write_ssc(D0I1_ACG_SSS_TARGET);	/* Enable Auto-Clock_Gating */
-	pmu_write_cmd(0x201);
-
-	spin_lock_init(&mrst_pmu_power_state_lock);
-
-	/* Enable the hardware interrupt */
-	pmu_irq_enable();
-	return 0;
-
-out_err3:
-	free_irq(pdev->irq, NULL);
-	pci_iounmap(pdev, pmu_reg);
-	pmu_reg = NULL;
-out_err2:
-	pci_release_region(pdev, 0);
-out_err1:
-	pci_disable_device(pdev);
-	return ret;
-}
-
-static void __devexit pmu_remove(struct pci_dev *pdev)
-{
-	dev_err(&pdev->dev, "Mid PM pmu_remove called\n");
-
-	/* Freeing up the irq */
-	free_irq(pdev->irq, NULL);
-
-	pci_iounmap(pdev, pmu_reg);
-	pmu_reg = NULL;
-
-	/* disable the current PCI device */
-	pci_release_region(pdev, 0);
-	pci_disable_device(pdev);
-}
-
-static DEFINE_PCI_DEVICE_TABLE(pmu_pci_ids) = {
-	{ PCI_VDEVICE(INTEL, PCI_DEV_ID_MRST_PMU), 0 },
-	{ }
-};
-
-MODULE_DEVICE_TABLE(pci, pmu_pci_ids);
-
-static struct pci_driver driver = {
-	.name = MRST_PMU_DRV_NAME,
-	.id_table = pmu_pci_ids,
-	.probe = pmu_probe,
-	.remove = __devexit_p(pmu_remove),
-};
-
-/**
- * pmu_pci_register - register the PMU driver as PCI device
- */
-static int __init pmu_pci_register(void)
-{
-	return pci_register_driver(&driver);
-}
-
-/* Register and probe via fs_initcall() to preceed device_initcall() */
-fs_initcall(pmu_pci_register);
-
-static void __exit mid_pci_cleanup(void)
-{
-	pci_unregister_driver(&driver);
-}
-
-static int ia_major;
-static int ia_minor;
-
-static int pmu_sfi_parse_oem(struct sfi_table_header *table)
-{
-	struct sfi_table_simple *sb;
-
-	sb = (struct sfi_table_simple *)table;
-	ia_major = (sb->pentry[1] >> 0) & 0xFFFF;
-	ia_minor = (sb->pentry[1] >> 16) & 0xFFFF;
-	printk(KERN_INFO "mrst_pmu: IA FW version v%x.%x\n",
-		ia_major, ia_minor);
-
-	return 0;
-}
-
-static int __init scu_fw_check(void)
-{
-	int ret;
-	u32 fw_version;
-
-	if (!pmu_reg)
-		return 0;	/* this driver didn't probe-out */
-
-	sfi_table_parse("OEMB", NULL, NULL, pmu_sfi_parse_oem);
-
-	if (ia_major < 0x6005 || ia_minor < 0x1525) {
-		WARN(1, "mrst_pmu: IA FW version too old\n");
-		return -1;
-	}
-
-	ret = intel_scu_ipc_command(IPCMSG_FW_REVISION, 0, NULL, 0,
-					&fw_version, 1);
-
-	if (ret) {
-		WARN(1, "mrst_pmu: IPC FW version? %d\n", ret);
-	} else {
-		int scu_major = (fw_version >> 8) & 0xFF;
-		int scu_minor = (fw_version >> 0) & 0xFF;
-
-		printk(KERN_INFO "mrst_pmu: firmware v%x\n", fw_version);
-
-		if ((scu_major >= 0xC0) && (scu_minor >= 0x49)) {
-			printk(KERN_INFO "mrst_pmu: enabling S0i3\n");
-			mrst_pmu_s0i3_enable = true;
-		} else {
-			WARN(1, "mrst_pmu: S0i3 disabled, old firmware %X.%X",
-					scu_major, scu_minor);
-		}
-	}
-	return 0;
-}
-late_initcall(scu_fw_check);
-module_exit(mid_pci_cleanup);
diff --git a/trunk/arch/x86/platform/mrst/pmu.h b/trunk/arch/x86/platform/mrst/pmu.h
deleted file mode 100644
index bfbfe64b167b..000000000000
--- a/trunk/arch/x86/platform/mrst/pmu.h
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * mrst/pmu.h - private definitions for MRST Power Management Unit mrst/pmu.c
- *
- * Copyright (c) 2011, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef _MRST_PMU_H_
-#define _MRST_PMU_H_
-
-#define PCI_DEV_ID_MRST_PMU		0x0810
-#define MRST_PMU_DRV_NAME		"mrst_pmu"
-#define	PCI_SUB_CLASS_MASK		0xFF00
-
-#define	PCI_VENDOR_CAP_LOG_ID_MASK	0x7F
-#define PCI_VENDOR_CAP_LOG_SS_MASK	0x80
-
-#define SUB_SYS_ALL_D0I1	0x01155555
-#define S0I3_WAKE_SOURCES	0x00001FFF
-
-#define PM_S0I3_COMMAND					\
-	((0 << 31) |	/* Reserved */			\
-	(0 << 30) |	/* Core must be idle */		\
-	(0xc2 << 22) |	/* ACK C6 trigger */		\
-	(3 << 19) |	/* Trigger on DMI message */	\
-	(3 << 16) |	/* Enter S0i3 */		\
-	(0 << 13) |	/* Numeric mode ID (sw) */	\
-	(3 << 9) |	/* Trigger mode */		\
-	(0 << 8) |	/* Do not interrupt */		\
-	(1 << 0))	/* Set configuration */
-
-#define	LSS_DMI		0
-#define	LSS_SD_HC0	1
-#define	LSS_SD_HC1	2
-#define	LSS_NAND	3
-#define	LSS_IMAGING	4
-#define	LSS_SECURITY	5
-#define	LSS_DISPLAY	6
-#define	LSS_USB_HC	7
-#define	LSS_USB_OTG	8
-#define	LSS_AUDIO	9
-#define	LSS_AUDIO_LPE	9
-#define	LSS_AUDIO_SSP	9
-#define	LSS_I2C0	10
-#define	LSS_I2C1	10
-#define	LSS_I2C2	10
-#define	LSS_KBD		10
-#define	LSS_SPI0	10
-#define	LSS_SPI1	10
-#define	LSS_SPI2	10
-#define	LSS_GPIO	10
-#define	LSS_SRAM	11	/* used by SCU, do not touch */
-#define	LSS_SD_HC2	12
-/* LSS hardware bits 15,14,13 are hardwired to 0, thus unusable */
-#define MRST_NUM_LSS	13
-
-#define MIN(a, b) (((a) < (b)) ? (a) : (b))
-
-#define	SSMSK(mask, lss) ((mask) << ((lss) * 2))
-#define	D0	0
-#define	D0i1	1
-#define	D0i2	2
-#define	D0i3	3
-
-#define S0I3_SSS_TARGET	(		\
-	SSMSK(D0i1, LSS_DMI) |		\
-	SSMSK(D0i3, LSS_SD_HC0) |	\
-	SSMSK(D0i3, LSS_SD_HC1) |	\
-	SSMSK(D0i3, LSS_NAND) |		\
-	SSMSK(D0i3, LSS_SD_HC2) |	\
-	SSMSK(D0i3, LSS_IMAGING) |	\
-	SSMSK(D0i3, LSS_SECURITY) |	\
-	SSMSK(D0i3, LSS_DISPLAY) |	\
-	SSMSK(D0i3, LSS_USB_HC) |	\
-	SSMSK(D0i3, LSS_USB_OTG) |	\
-	SSMSK(D0i3, LSS_AUDIO) |	\
-	SSMSK(D0i1, LSS_I2C0))
-
-/*
- * D0i1 on Langwell is Autonomous Clock Gating (ACG).
- * Enable ACG on every LSS except camera and audio
- */
-#define D0I1_ACG_SSS_TARGET	 \
-	(SUB_SYS_ALL_D0I1 & ~SSMSK(D0i1, LSS_IMAGING) & ~SSMSK(D0i1, LSS_AUDIO))
-
-enum cm_mode {
-	CM_NOP,			/* ignore the config mode value */
-	CM_IMMEDIATE,
-	CM_DELAY,
-	CM_TRIGGER,
-	CM_INVALID
-};
-
-enum sys_state {
-	SYS_STATE_S0I0,
-	SYS_STATE_S0I1,
-	SYS_STATE_S0I2,
-	SYS_STATE_S0I3,
-	SYS_STATE_S3,
-	SYS_STATE_S5
-};
-
-#define SET_CFG_CMD	1
-
-enum int_status {
-	INT_SPURIOUS = 0,
-	INT_CMD_DONE = 1,
-	INT_CMD_ERR = 2,
-	INT_WAKE_RX = 3,
-	INT_SS_ERROR = 4,
-	INT_S0IX_MISS = 5,
-	INT_NO_ACKC6 = 6,
-	INT_INVALID = 7,
-};
-
-/* PMU register interface */
-static struct mrst_pmu_reg {
-	u32 pm_sts;		/* 0x00 */
-	u32 pm_cmd;		/* 0x04 */
-	u32 pm_ics;		/* 0x08 */
-	u32 _resv1;		/* 0x0C */
-	u32 pm_wkc[2];		/* 0x10 */
-	u32 pm_wks[2];		/* 0x18 */
-	u32 pm_ssc[4];		/* 0x20 */
-	u32 pm_sss[4];		/* 0x30 */
-	u32 pm_wssc[4];		/* 0x40 */
-	u32 pm_c3c4;		/* 0x50 */
-	u32 pm_c5c6;		/* 0x54 */
-	u32 pm_msi_disable;	/* 0x58 */
-} *pmu_reg;
-
-static inline u32 pmu_read_sts(void) { return readl(&pmu_reg->pm_sts); }
-static inline u32 pmu_read_ics(void) { return readl(&pmu_reg->pm_ics); }
-static inline u32 pmu_read_wks(void) { return readl(&pmu_reg->pm_wks[0]); }
-static inline u32 pmu_read_sss(void) { return readl(&pmu_reg->pm_sss[0]); }
-
-static inline void pmu_write_cmd(u32 arg) { writel(arg, &pmu_reg->pm_cmd); }
-static inline void pmu_write_ics(u32 arg) { writel(arg, &pmu_reg->pm_ics); }
-static inline void pmu_write_wkc(u32 arg) { writel(arg, &pmu_reg->pm_wkc[0]); }
-static inline void pmu_write_ssc(u32 arg) { writel(arg, &pmu_reg->pm_ssc[0]); }
-static inline void pmu_write_wssc(u32 arg)
-					{ writel(arg, &pmu_reg->pm_wssc[0]); }
-
-static inline void pmu_msi_enable(void) { writel(0, &pmu_reg->pm_msi_disable); }
-static inline u32 pmu_msi_is_disabled(void)
-				{ return readl(&pmu_reg->pm_msi_disable); }
-
-union pmu_pm_ics {
-	struct {
-		u32 cause:8;
-		u32 enable:1;
-		u32 pending:1;
-		u32 reserved:22;
-	} bits;
-	u32 value;
-};
-
-static inline void pmu_irq_enable(void)
-{
-	union pmu_pm_ics pmu_ics;
-
-	pmu_ics.value = pmu_read_ics();
-	pmu_ics.bits.enable = 1;
-	pmu_write_ics(pmu_ics.value);
-}
-
-union pmu_pm_status {
-	struct {
-		u32 pmu_rev:8;
-		u32 pmu_busy:1;
-		u32 mode_id:4;
-		u32 Reserved:19;
-	} pmu_status_parts;
-	u32 pmu_status_value;
-};
-
-static inline int pmu_read_busy_status(void)
-{
-	union pmu_pm_status result;
-
-	result.pmu_status_value = pmu_read_sts();
-
-	return result.pmu_status_parts.pmu_busy;
-}
-
-/* pmu set config parameters */
-struct cfg_delay_param_t {
-	u32 cmd:8;
-	u32 ioc:1;
-	u32 cfg_mode:4;
-	u32 mode_id:3;
-	u32 sys_state:3;
-	u32 cfg_delay:8;
-	u32 rsvd:5;
-};
-
-struct cfg_trig_param_t {
-	u32 cmd:8;
-	u32 ioc:1;
-	u32 cfg_mode:4;
-	u32 mode_id:3;
-	u32 sys_state:3;
-	u32 cfg_trig_type:3;
-	u32 cfg_trig_val:8;
-	u32 cmbi:1;
-	u32 rsvd1:1;
-};
-
-union pmu_pm_set_cfg_cmd_t {
-	union {
-		struct cfg_delay_param_t d_param;
-		struct cfg_trig_param_t t_param;
-	} pmu2_params;
-	u32 pmu_pm_set_cfg_cmd_value;
-};
-
-#ifdef FUTURE_PATCH
-extern int mrst_s0i3_entry(u32 regval, u32 *regaddr);
-#else
-static inline int mrst_s0i3_entry(u32 regval, u32 *regaddr) { return -1; }
-#endif
-#endif
diff --git a/trunk/arch/x86/xen/setup.c b/trunk/arch/x86/xen/setup.c
index a9627e2e3295..60aeeb56948f 100644
--- a/trunk/arch/x86/xen/setup.c
+++ b/trunk/arch/x86/xen/setup.c
@@ -9,7 +9,6 @@
 #include <linux/mm.h>
 #include <linux/pm.h>
 #include <linux/memblock.h>
-#include <linux/cpuidle.h>
 
 #include <asm/elf.h>
 #include <asm/vdso.h>
@@ -427,7 +426,7 @@ void __init xen_arch_setup(void)
 #ifdef CONFIG_X86_32
 	boot_cpu_data.hlt_works_ok = 1;
 #endif
-	disable_cpuidle();
+	pm_idle = default_idle;
 	boot_option_idle_override = IDLE_HALT;
 
 	fiddle_vdso();
diff --git a/trunk/block/blk-core.c b/trunk/block/blk-core.c
index b627558c461f..b850bedad229 100644
--- a/trunk/block/blk-core.c
+++ b/trunk/block/blk-core.c
@@ -1368,10 +1368,8 @@ static bool should_fail_request(struct hd_struct *part, unsigned int bytes)
 
 static int __init fail_make_request_debugfs(void)
 {
-	struct dentry *dir = fault_create_debugfs_attr("fail_make_request",
-						NULL, &fail_make_request);
-
-	return IS_ERR(dir) ? PTR_ERR(dir) : 0;
+	return init_fault_attr_dentries(&fail_make_request,
+					"fail_make_request");
 }
 
 late_initcall(fail_make_request_debugfs);
diff --git a/trunk/block/blk-timeout.c b/trunk/block/blk-timeout.c
index 780354888958..4f0c06c7a338 100644
--- a/trunk/block/blk-timeout.c
+++ b/trunk/block/blk-timeout.c
@@ -28,10 +28,7 @@ int blk_should_fake_timeout(struct request_queue *q)
 
 static int __init fail_io_timeout_debugfs(void)
 {
-	struct dentry *dir = fault_create_debugfs_attr("fail_io_timeout",
-						NULL, &fail_io_timeout);
-
-	return IS_ERR(dir) ? PTR_ERR(dir) : 0;
+	return init_fault_attr_dentries(&fail_io_timeout, "fail_io_timeout");
 }
 
 late_initcall(fail_io_timeout_debugfs);
diff --git a/trunk/drivers/acpi/acpica/acglobal.h b/trunk/drivers/acpi/acpica/acglobal.h
index 76dc02f15574..73863d86f022 100644
--- a/trunk/drivers/acpi/acpica/acglobal.h
+++ b/trunk/drivers/acpi/acpica/acglobal.h
@@ -126,12 +126,6 @@ u8 ACPI_INIT_GLOBAL(acpi_gbl_copy_dsdt_locally, FALSE);
  */
 u8 ACPI_INIT_GLOBAL(acpi_gbl_truncate_io_addresses, FALSE);
 
-/*
- * Disable runtime checking and repair of values returned by control methods.
- * Use only if the repair is causing a problem on a particular machine.
- */
-u8 ACPI_INIT_GLOBAL(acpi_gbl_disable_auto_repair, FALSE);
-
 /* acpi_gbl_FADT is a local copy of the FADT, converted to a common format. */
 
 struct acpi_table_fadt acpi_gbl_FADT;
diff --git a/trunk/drivers/acpi/acpica/aclocal.h b/trunk/drivers/acpi/acpica/aclocal.h
index 5552125d8340..c7f743ca395b 100644
--- a/trunk/drivers/acpi/acpica/aclocal.h
+++ b/trunk/drivers/acpi/acpica/aclocal.h
@@ -357,7 +357,6 @@ struct acpi_predefined_data {
 	char *pathname;
 	const union acpi_predefined_info *predefined;
 	union acpi_operand_object *parent_package;
-	struct acpi_namespace_node *node;
 	u32 flags;
 	u8 node_flags;
 };
diff --git a/trunk/drivers/acpi/acpica/acpredef.h b/trunk/drivers/acpi/acpica/acpredef.h
index c445cca490ea..94e73c97cf85 100644
--- a/trunk/drivers/acpi/acpica/acpredef.h
+++ b/trunk/drivers/acpi/acpica/acpredef.h
@@ -468,7 +468,6 @@ static const union acpi_predefined_info predefined_names[] =
 	{{"_SWS", 0, ACPI_RTYPE_INTEGER}},
 	{{"_TC1", 0, ACPI_RTYPE_INTEGER}},
 	{{"_TC2", 0, ACPI_RTYPE_INTEGER}},
-	{{"_TDL", 0, ACPI_RTYPE_INTEGER}},
 	{{"_TIP", 1, ACPI_RTYPE_INTEGER}},
 	{{"_TIV", 1, ACPI_RTYPE_INTEGER}},
 	{{"_TMP", 0, ACPI_RTYPE_INTEGER}},
diff --git a/trunk/drivers/acpi/acpica/nspredef.c b/trunk/drivers/acpi/acpica/nspredef.c
index c845c8089f39..9fb03fa8ffde 100644
--- a/trunk/drivers/acpi/acpica/nspredef.c
+++ b/trunk/drivers/acpi/acpica/nspredef.c
@@ -193,20 +193,14 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node,
 	}
 
 	/*
-	 * Return value validation and possible repair.
+	 * 1) We have a return value, but if one wasn't expected, just exit, this is
+	 * not a problem. For example, if the "Implicit Return" feature is
+	 * enabled, methods will always return a value.
 	 *
-	 * 1) Don't perform return value validation/repair if this feature
-	 * has been disabled via a global option.
-	 *
-	 * 2) We have a return value, but if one wasn't expected, just exit,
-	 * this is not a problem. For example, if the "Implicit Return"
-	 * feature is enabled, methods will always return a value.
-	 *
-	 * 3) If the return value can be of any type, then we cannot perform
-	 * any validation, just exit.
+	 * 2) If the return value can be of any type, then we cannot perform any
+	 * validation, exit.
 	 */
-	if (acpi_gbl_disable_auto_repair ||
-	    (!predefined->info.expected_btypes) ||
+	if ((!predefined->info.expected_btypes) ||
 	    (predefined->info.expected_btypes == ACPI_RTYPE_ALL)) {
 		goto cleanup;
 	}
@@ -218,7 +212,6 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node,
 		goto cleanup;
 	}
 	data->predefined = predefined;
-	data->node = node;
 	data->node_flags = node->flags;
 	data->pathname = pathname;
 
diff --git a/trunk/drivers/acpi/acpica/nsrepair2.c b/trunk/drivers/acpi/acpica/nsrepair2.c
index 024c4f263f87..973883babee1 100644
--- a/trunk/drivers/acpi/acpica/nsrepair2.c
+++ b/trunk/drivers/acpi/acpica/nsrepair2.c
@@ -503,21 +503,6 @@ acpi_ns_repair_TSS(struct acpi_predefined_data *data,
 {
 	union acpi_operand_object *return_object = *return_object_ptr;
 	acpi_status status;
-	struct acpi_namespace_node *node;
-
-	/*
-	 * We can only sort the _TSS return package if there is no _PSS in the
-	 * same scope. This is because if _PSS is present, the ACPI specification
-	 * dictates that the _TSS Power Dissipation field is to be ignored, and
-	 * therefore some BIOSs leave garbage values in the _TSS Power field(s).
-	 * In this case, it is best to just return the _TSS package as-is.
-	 * (May, 2011)
-	 */
-	status =
-	    acpi_ns_get_node(data->node, "^_PSS", ACPI_NS_NO_UPSEARCH, &node);
-	if (ACPI_SUCCESS(status)) {
-		return (AE_OK);
-	}
 
 	status = acpi_ns_check_sorted_list(data, return_object, 5, 1,
 					   ACPI_SORT_DESCENDING,
diff --git a/trunk/drivers/acpi/acpica/tbinstal.c b/trunk/drivers/acpi/acpica/tbinstal.c
index 62365f6075dd..48db0944ce4a 100644
--- a/trunk/drivers/acpi/acpica/tbinstal.c
+++ b/trunk/drivers/acpi/acpica/tbinstal.c
@@ -126,29 +126,12 @@ acpi_tb_add_table(struct acpi_table_desc *table_desc, u32 *table_index)
 	}
 
 	/*
-	 * Validate the incoming table signature.
-	 *
-	 * 1) Originally, we checked the table signature for "SSDT" or "PSDT".
-	 * 2) We added support for OEMx tables, signature "OEM".
-	 * 3) Valid tables were encountered with a null signature, so we just
-	 *    gave up on validating the signature, (05/2008).
-	 * 4) We encountered non-AML tables such as the MADT, which caused
-	 *    interpreter errors and kernel faults. So now, we once again allow
-	 *    only "SSDT", "OEMx", and now, also a null signature. (05/2011).
+	 * Originally, we checked the table signature for "SSDT" or "PSDT" here.
+	 * Next, we added support for OEMx tables, signature "OEM".
+	 * Valid tables were encountered with a null signature, so we've just
+	 * given up on validating the signature, since it seems to be a waste
+	 * of code. The original code was removed (05/2008).
 	 */
-	if ((table_desc->pointer->signature[0] != 0x00) &&
-	    (!ACPI_COMPARE_NAME(table_desc->pointer->signature, ACPI_SIG_SSDT))
-	    && (ACPI_STRNCMP(table_desc->pointer->signature, "OEM", 3))) {
-		ACPI_ERROR((AE_INFO,
-			    "Table has invalid signature [%4.4s] (0x%8.8X), must be SSDT or OEMx",
-			    acpi_ut_valid_acpi_name(*(u32 *)table_desc->
-						    pointer->
-						    signature) ? table_desc->
-			    pointer->signature : "????",
-			    *(u32 *)table_desc->pointer->signature));
-
-		return_ACPI_STATUS(AE_BAD_SIGNATURE);
-	}
 
 	(void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
 
diff --git a/trunk/drivers/acpi/apei/Kconfig b/trunk/drivers/acpi/apei/Kconfig
index c34aa51af4ee..f739a70b1c70 100644
--- a/trunk/drivers/acpi/apei/Kconfig
+++ b/trunk/drivers/acpi/apei/Kconfig
@@ -10,11 +10,9 @@ config ACPI_APEI
 	  error injection.
 
 config ACPI_APEI_GHES
-	bool "APEI Generic Hardware Error Source"
+	tristate "APEI Generic Hardware Error Source"
 	depends on ACPI_APEI && X86
 	select ACPI_HED
-	select LLIST
-	select GENERIC_ALLOCATOR
 	help
 	  Generic Hardware Error Source provides a way to report
 	  platform hardware errors (such as that from chipset). It
@@ -32,13 +30,6 @@ config ACPI_APEI_PCIEAER
 	  PCIe AER errors may be reported via APEI firmware first mode.
 	  Turn on this option to enable the corresponding support.
 
-config ACPI_APEI_MEMORY_FAILURE
-	bool "APEI memory error recovering support"
-	depends on ACPI_APEI && MEMORY_FAILURE
-	help
-	  Memory errors may be reported via APEI firmware first mode.
-	  Turn on this option to enable the memory recovering support.
-
 config ACPI_APEI_EINJ
 	tristate "APEI Error INJection (EINJ)"
 	depends on ACPI_APEI && DEBUG_FS
diff --git a/trunk/drivers/acpi/apei/apei-base.c b/trunk/drivers/acpi/apei/apei-base.c
index 8041248fce9b..4a904a4bf05f 100644
--- a/trunk/drivers/acpi/apei/apei-base.c
+++ b/trunk/drivers/acpi/apei/apei-base.c
@@ -157,10 +157,9 @@ EXPORT_SYMBOL_GPL(apei_exec_noop);
  * Interpret the specified action. Go through whole action table,
  * execute all instructions belong to the action.
  */
-int __apei_exec_run(struct apei_exec_context *ctx, u8 action,
-		    bool optional)
+int apei_exec_run(struct apei_exec_context *ctx, u8 action)
 {
-	int rc = -ENOENT;
+	int rc;
 	u32 i, ip;
 	struct acpi_whea_header *entry;
 	apei_exec_ins_func_t run;
@@ -199,9 +198,9 @@ int __apei_exec_run(struct apei_exec_context *ctx, u8 action,
 			goto rewind;
 	}
 
-	return !optional && rc < 0 ? rc : 0;
+	return 0;
 }
-EXPORT_SYMBOL_GPL(__apei_exec_run);
+EXPORT_SYMBOL_GPL(apei_exec_run);
 
 typedef int (*apei_exec_entry_func_t)(struct apei_exec_context *ctx,
 				      struct acpi_whea_header *entry,
@@ -604,29 +603,3 @@ struct dentry *apei_get_debugfs_dir(void)
 	return dapei;
 }
 EXPORT_SYMBOL_GPL(apei_get_debugfs_dir);
-
-int apei_osc_setup(void)
-{
-	static u8 whea_uuid_str[] = "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c";
-	acpi_handle handle;
-	u32 capbuf[3];
-	struct acpi_osc_context context = {
-		.uuid_str	= whea_uuid_str,
-		.rev		= 1,
-		.cap.length	= sizeof(capbuf),
-		.cap.pointer	= capbuf,
-	};
-
-	capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
-	capbuf[OSC_SUPPORT_TYPE] = 0;
-	capbuf[OSC_CONTROL_TYPE] = 0;
-
-	if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))
-	    || ACPI_FAILURE(acpi_run_osc(handle, &context)))
-		return -EIO;
-	else {
-		kfree(context.ret.pointer);
-		return 0;
-	}
-}
-EXPORT_SYMBOL_GPL(apei_osc_setup);
diff --git a/trunk/drivers/acpi/apei/apei-internal.h b/trunk/drivers/acpi/apei/apei-internal.h
index f57050e7a5e7..ef0581f2094d 100644
--- a/trunk/drivers/acpi/apei/apei-internal.h
+++ b/trunk/drivers/acpi/apei/apei-internal.h
@@ -50,18 +50,7 @@ static inline u64 apei_exec_ctx_get_output(struct apei_exec_context *ctx)
 	return ctx->value;
 }
 
-int __apei_exec_run(struct apei_exec_context *ctx, u8 action, bool optional);
-
-static inline int apei_exec_run(struct apei_exec_context *ctx, u8 action)
-{
-	return __apei_exec_run(ctx, action, 0);
-}
-
-/* It is optional whether the firmware provides the action */
-static inline int apei_exec_run_optional(struct apei_exec_context *ctx, u8 action)
-{
-	return __apei_exec_run(ctx, action, 1);
-}
+int apei_exec_run(struct apei_exec_context *ctx, u8 action);
 
 /* Common instruction implementation */
 
@@ -124,6 +113,4 @@ void apei_estatus_print(const char *pfx,
 			const struct acpi_hest_generic_status *estatus);
 int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus);
 int apei_estatus_check(const struct acpi_hest_generic_status *estatus);
-
-int apei_osc_setup(void);
 #endif
diff --git a/trunk/drivers/acpi/apei/einj.c b/trunk/drivers/acpi/apei/einj.c
index 589b96c38704..f74b2ea11f21 100644
--- a/trunk/drivers/acpi/apei/einj.c
+++ b/trunk/drivers/acpi/apei/einj.c
@@ -46,8 +46,7 @@
  * Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the
  * EINJ table through an unpublished extension. Use with caution as
  * most will ignore the parameter and make their own choice of address
- * for error injection.  This extension is used only if
- * param_extension module parameter is specified.
+ * for error injection.
  */
 struct einj_parameter {
 	u64 type;
@@ -66,9 +65,6 @@ struct einj_parameter {
 	((struct acpi_whea_header *)((char *)(tab) +			\
 				    sizeof(struct acpi_table_einj)))
 
-static bool param_extension;
-module_param(param_extension, bool, 0);
-
 static struct acpi_table_einj *einj_tab;
 
 static struct apei_resources einj_resources;
@@ -289,7 +285,7 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2)
 
 	einj_exec_ctx_init(&ctx);
 
-	rc = apei_exec_run_optional(&ctx, ACPI_EINJ_BEGIN_OPERATION);
+	rc = apei_exec_run(&ctx, ACPI_EINJ_BEGIN_OPERATION);
 	if (rc)
 		return rc;
 	apei_exec_ctx_set_input(&ctx, type);
@@ -327,7 +323,7 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2)
 	rc = __einj_error_trigger(trigger_paddr);
 	if (rc)
 		return rc;
-	rc = apei_exec_run_optional(&ctx, ACPI_EINJ_END_OPERATION);
+	rc = apei_exec_run(&ctx, ACPI_EINJ_END_OPERATION);
 
 	return rc;
 }
@@ -493,6 +489,14 @@ static int __init einj_init(void)
 				     einj_debug_dir, NULL, &error_type_fops);
 	if (!fentry)
 		goto err_cleanup;
+	fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR,
+				    einj_debug_dir, &error_param1);
+	if (!fentry)
+		goto err_cleanup;
+	fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR,
+				    einj_debug_dir, &error_param2);
+	if (!fentry)
+		goto err_cleanup;
 	fentry = debugfs_create_file("error_inject", S_IWUSR,
 				     einj_debug_dir, NULL, &error_inject_fops);
 	if (!fentry)
@@ -509,23 +513,12 @@ static int __init einj_init(void)
 	rc = apei_exec_pre_map_gars(&ctx);
 	if (rc)
 		goto err_release;
-	if (param_extension) {
-		param_paddr = einj_get_parameter_address();
-		if (param_paddr) {
-			einj_param = ioremap(param_paddr, sizeof(*einj_param));
-			rc = -ENOMEM;
-			if (!einj_param)
-				goto err_unmap;
-			fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR,
-						    einj_debug_dir, &error_param1);
-			if (!fentry)
-				goto err_unmap;
-			fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR,
-						    einj_debug_dir, &error_param2);
-			if (!fentry)
-				goto err_unmap;
-		} else
-			pr_warn(EINJ_PFX "Parameter extension is not supported.\n");
+	param_paddr = einj_get_parameter_address();
+	if (param_paddr) {
+		einj_param = ioremap(param_paddr, sizeof(*einj_param));
+		rc = -ENOMEM;
+		if (!einj_param)
+			goto err_unmap;
 	}
 
 	pr_info(EINJ_PFX "Error INJection is initialized.\n");
@@ -533,8 +526,6 @@ static int __init einj_init(void)
 	return 0;
 
 err_unmap:
-	if (einj_param)
-		iounmap(einj_param);
 	apei_exec_post_unmap_gars(&ctx);
 err_release:
 	apei_resources_release(&einj_resources);
diff --git a/trunk/drivers/acpi/apei/erst-dbg.c b/trunk/drivers/acpi/apei/erst-dbg.c
index 903549df809b..a4cfb64c86a1 100644
--- a/trunk/drivers/acpi/apei/erst-dbg.c
+++ b/trunk/drivers/acpi/apei/erst-dbg.c
@@ -33,7 +33,7 @@
 
 #define ERST_DBG_PFX			"ERST DBG: "
 
-#define ERST_DBG_RECORD_LEN_MAX		0x4000
+#define ERST_DBG_RECORD_LEN_MAX		4096
 
 static void *erst_dbg_buf;
 static unsigned int erst_dbg_buf_len;
@@ -213,10 +213,6 @@ static struct miscdevice erst_dbg_dev = {
 
 static __init int erst_dbg_init(void)
 {
-	if (erst_disable) {
-		pr_info(ERST_DBG_PFX "ERST support is disabled.\n");
-		return -ENODEV;
-	}
 	return misc_register(&erst_dbg_dev);
 }
 
diff --git a/trunk/drivers/acpi/apei/erst.c b/trunk/drivers/acpi/apei/erst.c
index 2ca59dc69f7f..6053f4780df9 100644
--- a/trunk/drivers/acpi/apei/erst.c
+++ b/trunk/drivers/acpi/apei/erst.c
@@ -642,7 +642,7 @@ static int __erst_write_to_storage(u64 offset)
 	int rc;
 
 	erst_exec_ctx_init(&ctx);
-	rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_WRITE);
+	rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_WRITE);
 	if (rc)
 		return rc;
 	apei_exec_ctx_set_input(&ctx, offset);
@@ -666,7 +666,7 @@ static int __erst_write_to_storage(u64 offset)
 	if (rc)
 		return rc;
 	val = apei_exec_ctx_get_output(&ctx);
-	rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
+	rc = apei_exec_run(&ctx, ACPI_ERST_END);
 	if (rc)
 		return rc;
 
@@ -681,7 +681,7 @@ static int __erst_read_from_storage(u64 record_id, u64 offset)
 	int rc;
 
 	erst_exec_ctx_init(&ctx);
-	rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_READ);
+	rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_READ);
 	if (rc)
 		return rc;
 	apei_exec_ctx_set_input(&ctx, offset);
@@ -709,7 +709,7 @@ static int __erst_read_from_storage(u64 record_id, u64 offset)
 	if (rc)
 		return rc;
 	val = apei_exec_ctx_get_output(&ctx);
-	rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
+	rc = apei_exec_run(&ctx, ACPI_ERST_END);
 	if (rc)
 		return rc;
 
@@ -724,7 +724,7 @@ static int __erst_clear_from_storage(u64 record_id)
 	int rc;
 
 	erst_exec_ctx_init(&ctx);
-	rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_CLEAR);
+	rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_CLEAR);
 	if (rc)
 		return rc;
 	apei_exec_ctx_set_input(&ctx, record_id);
@@ -748,7 +748,7 @@ static int __erst_clear_from_storage(u64 record_id)
 	if (rc)
 		return rc;
 	val = apei_exec_ctx_get_output(&ctx);
-	rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
+	rc = apei_exec_run(&ctx, ACPI_ERST_END);
 	if (rc)
 		return rc;
 
diff --git a/trunk/drivers/acpi/apei/ghes.c b/trunk/drivers/acpi/apei/ghes.c
index 0784f99a4665..f703b2881153 100644
--- a/trunk/drivers/acpi/apei/ghes.c
+++ b/trunk/drivers/acpi/apei/ghes.c
@@ -12,7 +12,7 @@
  * For more information about Generic Hardware Error Source, please
  * refer to ACPI Specification version 4.0, section 17.3.2.6
  *
- * Copyright 2010,2011 Intel Corp.
+ * Copyright 2010 Intel Corp.
  *   Author: Huang Ying <ying.huang@intel.com>
  *
  * This program is free software; you can redistribute it and/or
@@ -42,9 +42,6 @@
 #include <linux/mutex.h>
 #include <linux/ratelimit.h>
 #include <linux/vmalloc.h>
-#include <linux/irq_work.h>
-#include <linux/llist.h>
-#include <linux/genalloc.h>
 #include <acpi/apei.h>
 #include <acpi/atomicio.h>
 #include <acpi/hed.h>
@@ -56,30 +53,6 @@
 #define GHES_PFX	"GHES: "
 
 #define GHES_ESTATUS_MAX_SIZE		65536
-#define GHES_ESOURCE_PREALLOC_MAX_SIZE	65536
-
-#define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3
-
-/* This is just an estimation for memory pool allocation */
-#define GHES_ESTATUS_CACHE_AVG_SIZE	512
-
-#define GHES_ESTATUS_CACHES_SIZE	4
-
-#define GHES_ESTATUS_IN_CACHE_MAX_NSEC	10000000000ULL
-/* Prevent too many caches are allocated because of RCU */
-#define GHES_ESTATUS_CACHE_ALLOCED_MAX	(GHES_ESTATUS_CACHES_SIZE * 3 / 2)
-
-#define GHES_ESTATUS_CACHE_LEN(estatus_len)			\
-	(sizeof(struct ghes_estatus_cache) + (estatus_len))
-#define GHES_ESTATUS_FROM_CACHE(estatus_cache)			\
-	((struct acpi_hest_generic_status *)			\
-	 ((struct ghes_estatus_cache *)(estatus_cache) + 1))
-
-#define GHES_ESTATUS_NODE_LEN(estatus_len)			\
-	(sizeof(struct ghes_estatus_node) + (estatus_len))
-#define GHES_ESTATUS_FROM_NODE(estatus_node)				\
-	((struct acpi_hest_generic_status *)				\
-	 ((struct ghes_estatus_node *)(estatus_node) + 1))
 
 /*
  * One struct ghes is created for each generic hardware error source.
@@ -104,22 +77,6 @@ struct ghes {
 	};
 };
 
-struct ghes_estatus_node {
-	struct llist_node llnode;
-	struct acpi_hest_generic *generic;
-};
-
-struct ghes_estatus_cache {
-	u32 estatus_len;
-	atomic_t count;
-	struct acpi_hest_generic *generic;
-	unsigned long long time_in;
-	struct rcu_head rcu;
-};
-
-int ghes_disable;
-module_param_named(disable, ghes_disable, bool, 0);
-
 static int ghes_panic_timeout	__read_mostly = 30;
 
 /*
@@ -164,22 +121,6 @@ static struct vm_struct *ghes_ioremap_area;
 static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
 static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
 
-/*
- * printk is not safe in NMI context.  So in NMI handler, we allocate
- * required memory from lock-less memory allocator
- * (ghes_estatus_pool), save estatus into it, put them into lock-less
- * list (ghes_estatus_llist), then delay printk into IRQ context via
- * irq_work (ghes_proc_irq_work).  ghes_estatus_size_request record
- * required pool size by all NMI error source.
- */
-static struct gen_pool *ghes_estatus_pool;
-static unsigned long ghes_estatus_pool_size_request;
-static struct llist_head ghes_estatus_llist;
-static struct irq_work ghes_proc_irq_work;
-
-struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
-static atomic_t ghes_estatus_cache_alloced;
-
 static int ghes_ioremap_init(void)
 {
 	ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES,
@@ -239,55 +180,6 @@ static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
 	__flush_tlb_one(vaddr);
 }
 
-static int ghes_estatus_pool_init(void)
-{
-	ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
-	if (!ghes_estatus_pool)
-		return -ENOMEM;
-	return 0;
-}
-
-static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool,
-					      struct gen_pool_chunk *chunk,
-					      void *data)
-{
-	free_page(chunk->start_addr);
-}
-
-static void ghes_estatus_pool_exit(void)
-{
-	gen_pool_for_each_chunk(ghes_estatus_pool,
-				ghes_estatus_pool_free_chunk_page, NULL);
-	gen_pool_destroy(ghes_estatus_pool);
-}
-
-static int ghes_estatus_pool_expand(unsigned long len)
-{
-	unsigned long i, pages, size, addr;
-	int ret;
-
-	ghes_estatus_pool_size_request += PAGE_ALIGN(len);
-	size = gen_pool_size(ghes_estatus_pool);
-	if (size >= ghes_estatus_pool_size_request)
-		return 0;
-	pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE;
-	for (i = 0; i < pages; i++) {
-		addr = __get_free_page(GFP_KERNEL);
-		if (!addr)
-			return -ENOMEM;
-		ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
-static void ghes_estatus_pool_shrink(unsigned long len)
-{
-	ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
-}
-
 static struct ghes *ghes_new(struct acpi_hest_generic *generic)
 {
 	struct ghes *ghes;
@@ -449,196 +341,43 @@ static void ghes_clear_estatus(struct ghes *ghes)
 	ghes->flags &= ~GHES_TO_CLEAR;
 }
 
-static void ghes_do_proc(const struct acpi_hest_generic_status *estatus)
+static void ghes_do_proc(struct ghes *ghes)
 {
-	int sev, sec_sev;
+	int sev, processed = 0;
 	struct acpi_hest_generic_data *gdata;
 
-	sev = ghes_severity(estatus->error_severity);
-	apei_estatus_for_each_section(estatus, gdata) {
-		sec_sev = ghes_severity(gdata->error_severity);
+	sev = ghes_severity(ghes->estatus->error_severity);
+	apei_estatus_for_each_section(ghes->estatus, gdata) {
+#ifdef CONFIG_X86_MCE
 		if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
 				 CPER_SEC_PLATFORM_MEM)) {
-			struct cper_sec_mem_err *mem_err;
-			mem_err = (struct cper_sec_mem_err *)(gdata+1);
-#ifdef CONFIG_X86_MCE
-			apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
-						  mem_err);
-#endif
-#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
-			if (sev == GHES_SEV_RECOVERABLE &&
-			    sec_sev == GHES_SEV_RECOVERABLE &&
-			    mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
-				unsigned long pfn;
-				pfn = mem_err->physical_addr >> PAGE_SHIFT;
-				memory_failure_queue(pfn, 0, 0);
-			}
-#endif
+			apei_mce_report_mem_error(
+				sev == GHES_SEV_CORRECTED,
+				(struct cper_sec_mem_err *)(gdata+1));
+			processed = 1;
 		}
+#endif
 	}
 }
 
-static void __ghes_print_estatus(const char *pfx,
-				 const struct acpi_hest_generic *generic,
-				 const struct acpi_hest_generic_status *estatus)
+static void ghes_print_estatus(const char *pfx, struct ghes *ghes)
 {
+	/* Not more than 2 messages every 5 seconds */
+	static DEFINE_RATELIMIT_STATE(ratelimit, 5*HZ, 2);
+
 	if (pfx == NULL) {
-		if (ghes_severity(estatus->error_severity) <=
+		if (ghes_severity(ghes->estatus->error_severity) <=
 		    GHES_SEV_CORRECTED)
 			pfx = KERN_WARNING HW_ERR;
 		else
 			pfx = KERN_ERR HW_ERR;
 	}
-	printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
-	       pfx, generic->header.source_id);
-	apei_estatus_print(pfx, estatus);
-}
-
-static int ghes_print_estatus(const char *pfx,
-			      const struct acpi_hest_generic *generic,
-			      const struct acpi_hest_generic_status *estatus)
-{
-	/* Not more than 2 messages every 5 seconds */
-	static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
-	static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2);
-	struct ratelimit_state *ratelimit;
-
-	if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED)
-		ratelimit = &ratelimit_corrected;
-	else
-		ratelimit = &ratelimit_uncorrected;
-	if (__ratelimit(ratelimit)) {
-		__ghes_print_estatus(pfx, generic, estatus);
-		return 1;
+	if (__ratelimit(&ratelimit)) {
+		printk(
+	"%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
+	pfx, ghes->generic->header.source_id);
+		apei_estatus_print(pfx, ghes->estatus);
 	}
-	return 0;
-}
-
-/*
- * GHES error status reporting throttle, to report more kinds of
- * errors, instead of just most frequently occurred errors.
- */
-static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus)
-{
-	u32 len;
-	int i, cached = 0;
-	unsigned long long now;
-	struct ghes_estatus_cache *cache;
-	struct acpi_hest_generic_status *cache_estatus;
-
-	len = apei_estatus_len(estatus);
-	rcu_read_lock();
-	for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
-		cache = rcu_dereference(ghes_estatus_caches[i]);
-		if (cache == NULL)
-			continue;
-		if (len != cache->estatus_len)
-			continue;
-		cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
-		if (memcmp(estatus, cache_estatus, len))
-			continue;
-		atomic_inc(&cache->count);
-		now = sched_clock();
-		if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC)
-			cached = 1;
-		break;
-	}
-	rcu_read_unlock();
-	return cached;
-}
-
-static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
-	struct acpi_hest_generic *generic,
-	struct acpi_hest_generic_status *estatus)
-{
-	int alloced;
-	u32 len, cache_len;
-	struct ghes_estatus_cache *cache;
-	struct acpi_hest_generic_status *cache_estatus;
-
-	alloced = atomic_add_return(1, &ghes_estatus_cache_alloced);
-	if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) {
-		atomic_dec(&ghes_estatus_cache_alloced);
-		return NULL;
-	}
-	len = apei_estatus_len(estatus);
-	cache_len = GHES_ESTATUS_CACHE_LEN(len);
-	cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len);
-	if (!cache) {
-		atomic_dec(&ghes_estatus_cache_alloced);
-		return NULL;
-	}
-	cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
-	memcpy(cache_estatus, estatus, len);
-	cache->estatus_len = len;
-	atomic_set(&cache->count, 0);
-	cache->generic = generic;
-	cache->time_in = sched_clock();
-	return cache;
-}
-
-static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache)
-{
-	u32 len;
-
-	len = apei_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
-	len = GHES_ESTATUS_CACHE_LEN(len);
-	gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
-	atomic_dec(&ghes_estatus_cache_alloced);
-}
-
-static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
-{
-	struct ghes_estatus_cache *cache;
-
-	cache = container_of(head, struct ghes_estatus_cache, rcu);
-	ghes_estatus_cache_free(cache);
-}
-
-static void ghes_estatus_cache_add(
-	struct acpi_hest_generic *generic,
-	struct acpi_hest_generic_status *estatus)
-{
-	int i, slot = -1, count;
-	unsigned long long now, duration, period, max_period = 0;
-	struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache;
-
-	new_cache = ghes_estatus_cache_alloc(generic, estatus);
-	if (new_cache == NULL)
-		return;
-	rcu_read_lock();
-	now = sched_clock();
-	for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
-		cache = rcu_dereference(ghes_estatus_caches[i]);
-		if (cache == NULL) {
-			slot = i;
-			slot_cache = NULL;
-			break;
-		}
-		duration = now - cache->time_in;
-		if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
-			slot = i;
-			slot_cache = cache;
-			break;
-		}
-		count = atomic_read(&cache->count);
-		period = duration;
-		do_div(period, (count + 1));
-		if (period > max_period) {
-			max_period = period;
-			slot = i;
-			slot_cache = cache;
-		}
-	}
-	/* new_cache must be put into array after its contents are written */
-	smp_wmb();
-	if (slot != -1 && cmpxchg(ghes_estatus_caches + slot,
-				  slot_cache, new_cache) == slot_cache) {
-		if (slot_cache)
-			call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free);
-	} else
-		ghes_estatus_cache_free(new_cache);
-	rcu_read_unlock();
 }
 
 static int ghes_proc(struct ghes *ghes)
@@ -648,11 +387,9 @@ static int ghes_proc(struct ghes *ghes)
 	rc = ghes_read_estatus(ghes, 0);
 	if (rc)
 		goto out;
-	if (!ghes_estatus_cached(ghes->estatus)) {
-		if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
-			ghes_estatus_cache_add(ghes->generic, ghes->estatus);
-	}
-	ghes_do_proc(ghes->estatus);
+	ghes_print_estatus(NULL, ghes);
+	ghes_do_proc(ghes);
+
 out:
 	ghes_clear_estatus(ghes);
 	return 0;
@@ -710,45 +447,6 @@ static int ghes_notify_sci(struct notifier_block *this,
 	return ret;
 }
 
-static void ghes_proc_in_irq(struct irq_work *irq_work)
-{
-	struct llist_node *llnode, *next, *tail = NULL;
-	struct ghes_estatus_node *estatus_node;
-	struct acpi_hest_generic *generic;
-	struct acpi_hest_generic_status *estatus;
-	u32 len, node_len;
-
-	/*
-	 * Because the time order of estatus in list is reversed,
-	 * revert it back to proper order.
-	 */
-	llnode = llist_del_all(&ghes_estatus_llist);
-	while (llnode) {
-		next = llnode->next;
-		llnode->next = tail;
-		tail = llnode;
-		llnode = next;
-	}
-	llnode = tail;
-	while (llnode) {
-		next = llnode->next;
-		estatus_node = llist_entry(llnode, struct ghes_estatus_node,
-					   llnode);
-		estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
-		len = apei_estatus_len(estatus);
-		node_len = GHES_ESTATUS_NODE_LEN(len);
-		ghes_do_proc(estatus);
-		if (!ghes_estatus_cached(estatus)) {
-			generic = estatus_node->generic;
-			if (ghes_print_estatus(NULL, generic, estatus))
-				ghes_estatus_cache_add(generic, estatus);
-		}
-		gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
-			      node_len);
-		llnode = next;
-	}
-}
-
 static int ghes_notify_nmi(struct notifier_block *this,
 				  unsigned long cmd, void *data)
 {
@@ -778,8 +476,7 @@ static int ghes_notify_nmi(struct notifier_block *this,
 
 	if (sev_global >= GHES_SEV_PANIC) {
 		oops_begin();
-		__ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global->generic,
-				     ghes_global->estatus);
+		ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global);
 		/* reboot to log the error! */
 		if (panic_timeout == 0)
 			panic_timeout = ghes_panic_timeout;
@@ -787,34 +484,12 @@ static int ghes_notify_nmi(struct notifier_block *this,
 	}
 
 	list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
-#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
-		u32 len, node_len;
-		struct ghes_estatus_node *estatus_node;
-		struct acpi_hest_generic_status *estatus;
-#endif
 		if (!(ghes->flags & GHES_TO_CLEAR))
 			continue;
-#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
-		if (ghes_estatus_cached(ghes->estatus))
-			goto next;
-		/* Save estatus for further processing in IRQ context */
-		len = apei_estatus_len(ghes->estatus);
-		node_len = GHES_ESTATUS_NODE_LEN(len);
-		estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool,
-						      node_len);
-		if (estatus_node) {
-			estatus_node->generic = ghes->generic;
-			estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
-			memcpy(estatus, ghes->estatus, len);
-			llist_add(&estatus_node->llnode, &ghes_estatus_llist);
-		}
-next:
-#endif
+		/* Do not print estatus because printk is not NMI safe */
+		ghes_do_proc(ghes);
 		ghes_clear_estatus(ghes);
 	}
-#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
-	irq_work_queue(&ghes_proc_irq_work);
-#endif
 
 out:
 	raw_spin_unlock(&ghes_nmi_lock);
@@ -829,26 +504,10 @@ static struct notifier_block ghes_notifier_nmi = {
 	.notifier_call = ghes_notify_nmi,
 };
 
-static unsigned long ghes_esource_prealloc_size(
-	const struct acpi_hest_generic *generic)
-{
-	unsigned long block_length, prealloc_records, prealloc_size;
-
-	block_length = min_t(unsigned long, generic->error_block_length,
-			     GHES_ESTATUS_MAX_SIZE);
-	prealloc_records = max_t(unsigned long,
-				 generic->records_to_preallocate, 1);
-	prealloc_size = min_t(unsigned long, block_length * prealloc_records,
-			      GHES_ESOURCE_PREALLOC_MAX_SIZE);
-
-	return prealloc_size;
-}
-
 static int __devinit ghes_probe(struct platform_device *ghes_dev)
 {
 	struct acpi_hest_generic *generic;
 	struct ghes *ghes = NULL;
-	unsigned long len;
 	int rc = -EINVAL;
 
 	generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
@@ -914,8 +573,6 @@ static int __devinit ghes_probe(struct platform_device *ghes_dev)
 		mutex_unlock(&ghes_list_mutex);
 		break;
 	case ACPI_HEST_NOTIFY_NMI:
-		len = ghes_esource_prealloc_size(generic);
-		ghes_estatus_pool_expand(len);
 		mutex_lock(&ghes_list_mutex);
 		if (list_empty(&ghes_nmi))
 			register_die_notifier(&ghes_notifier_nmi);
@@ -940,7 +597,6 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev)
 {
 	struct ghes *ghes;
 	struct acpi_hest_generic *generic;
-	unsigned long len;
 
 	ghes = platform_get_drvdata(ghes_dev);
 	generic = ghes->generic;
@@ -971,8 +627,6 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev)
 		 * freed after NMI handler finishes.
 		 */
 		synchronize_rcu();
-		len = ghes_esource_prealloc_size(generic);
-		ghes_estatus_pool_shrink(len);
 		break;
 	default:
 		BUG();
@@ -1008,43 +662,15 @@ static int __init ghes_init(void)
 		return -EINVAL;
 	}
 
-	if (ghes_disable) {
-		pr_info(GHES_PFX "GHES is not enabled!\n");
-		return -EINVAL;
-	}
-
-	init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
-
 	rc = ghes_ioremap_init();
 	if (rc)
 		goto err;
 
-	rc = ghes_estatus_pool_init();
-	if (rc)
-		goto err_ioremap_exit;
-
-	rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE *
-				      GHES_ESTATUS_CACHE_ALLOCED_MAX);
-	if (rc)
-		goto err_pool_exit;
-
 	rc = platform_driver_register(&ghes_platform_driver);
 	if (rc)
-		goto err_pool_exit;
-
-	rc = apei_osc_setup();
-	if (rc == 0 && osc_sb_apei_support_acked)
-		pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n");
-	else if (rc == 0 && !osc_sb_apei_support_acked)
-		pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n");
-	else if (rc && osc_sb_apei_support_acked)
-		pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
-	else
-		pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
+		goto err_ioremap_exit;
 
 	return 0;
-err_pool_exit:
-	ghes_estatus_pool_exit();
 err_ioremap_exit:
 	ghes_ioremap_exit();
 err:
@@ -1054,7 +680,6 @@ static int __init ghes_init(void)
 static void __exit ghes_exit(void)
 {
 	platform_driver_unregister(&ghes_platform_driver);
-	ghes_estatus_pool_exit();
 	ghes_ioremap_exit();
 }
 
diff --git a/trunk/drivers/acpi/apei/hest.c b/trunk/drivers/acpi/apei/hest.c
index 05fee06f4d6e..181bc2f7bb74 100644
--- a/trunk/drivers/acpi/apei/hest.c
+++ b/trunk/drivers/acpi/apei/hest.c
@@ -231,17 +231,16 @@ void __init acpi_hest_init(void)
 		goto err;
 	}
 
-	if (!ghes_disable) {
-		rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count);
-		if (rc)
-			goto err;
-		rc = hest_ghes_dev_register(ghes_count);
-		if (rc)
-			goto err;
+	rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count);
+	if (rc)
+		goto err;
+
+	rc = hest_ghes_dev_register(ghes_count);
+	if (!rc) {
+		pr_info(HEST_PFX "Table parsing has been initialized.\n");
+		return;
 	}
 
-	pr_info(HEST_PFX "Table parsing has been initialized.\n");
-	return;
 err:
 	hest_disable = 1;
 }
diff --git a/trunk/drivers/acpi/battery.c b/trunk/drivers/acpi/battery.c
index 87c0a8daa99a..2c661353e8f2 100644
--- a/trunk/drivers/acpi/battery.c
+++ b/trunk/drivers/acpi/battery.c
@@ -55,9 +55,6 @@
 #define ACPI_BATTERY_NOTIFY_INFO	0x81
 #define ACPI_BATTERY_NOTIFY_THRESHOLD   0x82
 
-/* Battery power unit: 0 means mW, 1 means mA */
-#define ACPI_BATTERY_POWER_UNIT_MA	1
-
 #define _COMPONENT		ACPI_BATTERY_COMPONENT
 
 ACPI_MODULE_NAME("battery");
@@ -94,6 +91,11 @@ MODULE_DEVICE_TABLE(acpi, battery_device_ids);
 enum {
 	ACPI_BATTERY_ALARM_PRESENT,
 	ACPI_BATTERY_XINFO_PRESENT,
+	/* For buggy DSDTs that report negative 16-bit values for either
+	 * charging or discharging current and/or report 0 as 65536
+	 * due to bad math.
+	 */
+	ACPI_BATTERY_QUIRK_SIGNED16_CURRENT,
 	ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY,
 };
 
@@ -299,8 +301,7 @@ static enum power_supply_property energy_battery_props[] = {
 #ifdef CONFIG_ACPI_PROCFS_POWER
 inline char *acpi_battery_units(struct acpi_battery *battery)
 {
-	return (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA) ?
-		"mA" : "mW";
+	return (battery->power_unit)?"mA":"mW";
 }
 #endif
 
@@ -460,17 +461,9 @@ static int acpi_battery_get_state(struct acpi_battery *battery)
 	battery->update_time = jiffies;
 	kfree(buffer.pointer);
 
-	/* For buggy DSDTs that report negative 16-bit values for either
-	 * charging or discharging current and/or report 0 as 65536
-	 * due to bad math.
-	 */
-	if (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA &&
-		battery->rate_now != ACPI_BATTERY_VALUE_UNKNOWN &&
-		(s16)(battery->rate_now) < 0) {
+	if (test_bit(ACPI_BATTERY_QUIRK_SIGNED16_CURRENT, &battery->flags) &&
+	    battery->rate_now != -1)
 		battery->rate_now = abs((s16)battery->rate_now);
-		printk_once(KERN_WARNING FW_BUG "battery: (dis)charge rate"
-			" invalid.\n");
-	}
 
 	if (test_bit(ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY, &battery->flags)
 	    && battery->capacity_now >= 0 && battery->capacity_now <= 100)
@@ -551,7 +544,7 @@ static int sysfs_add_battery(struct acpi_battery *battery)
 {
 	int result;
 
-	if (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA) {
+	if (battery->power_unit) {
 		battery->bat.properties = charge_battery_props;
 		battery->bat.num_properties =
 			ARRAY_SIZE(charge_battery_props);
@@ -573,16 +566,18 @@ static int sysfs_add_battery(struct acpi_battery *battery)
 
 static void sysfs_remove_battery(struct acpi_battery *battery)
 {
-	mutex_lock(&battery->lock);
-	if (!battery->bat.dev) {
-		mutex_unlock(&battery->lock);
+	if (!battery->bat.dev)
 		return;
-	}
-
 	device_remove_file(battery->bat.dev, &alarm_attr);
 	power_supply_unregister(&battery->bat);
 	battery->bat.dev = NULL;
-	mutex_unlock(&battery->lock);
+}
+
+static void acpi_battery_quirks(struct acpi_battery *battery)
+{
+	if (dmi_name_in_vendors("Acer") && battery->power_unit) {
+		set_bit(ACPI_BATTERY_QUIRK_SIGNED16_CURRENT, &battery->flags);
+	}
 }
 
 /*
@@ -597,7 +592,7 @@ static void sysfs_remove_battery(struct acpi_battery *battery)
  *
  * Handle this correctly so that they won't break userspace.
  */
-static void acpi_battery_quirks(struct acpi_battery *battery)
+static void acpi_battery_quirks2(struct acpi_battery *battery)
 {
 	if (test_bit(ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY, &battery->flags))
 		return ;
@@ -628,15 +623,13 @@ static int acpi_battery_update(struct acpi_battery *battery)
 		result = acpi_battery_get_info(battery);
 		if (result)
 			return result;
+		acpi_battery_quirks(battery);
 		acpi_battery_init_alarm(battery);
 	}
-	if (!battery->bat.dev) {
-		result = sysfs_add_battery(battery);
-		if (result)
-			return result;
-	}
+	if (!battery->bat.dev)
+		sysfs_add_battery(battery);
 	result = acpi_battery_get_state(battery);
-	acpi_battery_quirks(battery);
+	acpi_battery_quirks2(battery);
 	return result;
 }
 
@@ -870,7 +863,7 @@ DECLARE_FILE_FUNCTIONS(alarm);
 		}, \
 	}
 
-static const struct battery_file {
+static struct battery_file {
 	struct file_operations ops;
 	mode_t mode;
 	const char *name;
@@ -955,12 +948,9 @@ static int battery_notify(struct notifier_block *nb,
 	struct acpi_battery *battery = container_of(nb, struct acpi_battery,
 						    pm_nb);
 	switch (mode) {
-	case PM_POST_HIBERNATION:
 	case PM_POST_SUSPEND:
-		if (battery->bat.dev) {
-			sysfs_remove_battery(battery);
-			sysfs_add_battery(battery);
-		}
+		sysfs_remove_battery(battery);
+		sysfs_add_battery(battery);
 		break;
 	}
 
@@ -985,33 +975,25 @@ static int acpi_battery_add(struct acpi_device *device)
 	if (ACPI_SUCCESS(acpi_get_handle(battery->device->handle,
 			"_BIX", &handle)))
 		set_bit(ACPI_BATTERY_XINFO_PRESENT, &battery->flags);
-	result = acpi_battery_update(battery);
-	if (result)
-		goto fail;
+	acpi_battery_update(battery);
 #ifdef CONFIG_ACPI_PROCFS_POWER
 	result = acpi_battery_add_fs(device);
 #endif
-	if (result) {
+	if (!result) {
+		printk(KERN_INFO PREFIX "%s Slot [%s] (battery %s)\n",
+			ACPI_BATTERY_DEVICE_NAME, acpi_device_bid(device),
+			device->status.battery_present ? "present" : "absent");
+	} else {
 #ifdef CONFIG_ACPI_PROCFS_POWER
 		acpi_battery_remove_fs(device);
 #endif
-		goto fail;
+		kfree(battery);
 	}
 
-	printk(KERN_INFO PREFIX "%s Slot [%s] (battery %s)\n",
-		ACPI_BATTERY_DEVICE_NAME, acpi_device_bid(device),
-		device->status.battery_present ? "present" : "absent");
-
 	battery->pm_nb.notifier_call = battery_notify;
 	register_pm_notifier(&battery->pm_nb);
 
 	return result;
-
-fail:
-	sysfs_remove_battery(battery);
-	mutex_destroy(&battery->lock);
-	kfree(battery);
-	return result;
 }
 
 static int acpi_battery_remove(struct acpi_device *device, int type)
diff --git a/trunk/drivers/acpi/bus.c b/trunk/drivers/acpi/bus.c
index 437ddbf0c49a..d1e06c182cdb 100644
--- a/trunk/drivers/acpi/bus.c
+++ b/trunk/drivers/acpi/bus.c
@@ -39,7 +39,6 @@
 #include <linux/pci.h>
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
-#include <acpi/apei.h>
 #include <linux/dmi.h>
 #include <linux/suspend.h>
 
@@ -520,7 +519,6 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context)
 }
 EXPORT_SYMBOL(acpi_run_osc);
 
-bool osc_sb_apei_support_acked;
 static u8 sb_uuid_str[] = "0811B06E-4A27-44F9-8D60-3CBBC22E7B48";
 static void acpi_bus_osc_support(void)
 {
@@ -543,19 +541,11 @@ static void acpi_bus_osc_support(void)
 #if defined(CONFIG_ACPI_PROCESSOR) || defined(CONFIG_ACPI_PROCESSOR_MODULE)
 	capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_PPC_OST_SUPPORT;
 #endif
-
-	if (!ghes_disable)
-		capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_APEI_SUPPORT;
 	if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)))
 		return;
-	if (ACPI_SUCCESS(acpi_run_osc(handle, &context))) {
-		u32 *capbuf_ret = context.ret.pointer;
-		if (context.ret.length > OSC_SUPPORT_TYPE)
-			osc_sb_apei_support_acked =
-				capbuf_ret[OSC_SUPPORT_TYPE] & OSC_SB_APEI_SUPPORT;
+	if (ACPI_SUCCESS(acpi_run_osc(handle, &context)))
 		kfree(context.ret.pointer);
-	}
-	/* do we need to check other returned cap? Sounds no */
+	/* do we need to check the returned cap? Sounds no */
 }
 
 /* --------------------------------------------------------------------------
diff --git a/trunk/drivers/acpi/dock.c b/trunk/drivers/acpi/dock.c
index 19a61136d848..1864ad3cf895 100644
--- a/trunk/drivers/acpi/dock.c
+++ b/trunk/drivers/acpi/dock.c
@@ -77,7 +77,7 @@ struct dock_dependent_device {
 	struct list_head list;
 	struct list_head hotplug_list;
 	acpi_handle handle;
-	const struct acpi_dock_ops *ops;
+	struct acpi_dock_ops *ops;
 	void *context;
 };
 
@@ -589,7 +589,7 @@ EXPORT_SYMBOL_GPL(unregister_dock_notifier);
  * the dock driver after _DCK is executed.
  */
 int
-register_hotplug_dock_device(acpi_handle handle, const struct acpi_dock_ops *ops,
+register_hotplug_dock_device(acpi_handle handle, struct acpi_dock_ops *ops,
 			     void *context)
 {
 	struct dock_dependent_device *dd;
diff --git a/trunk/drivers/acpi/ec_sys.c b/trunk/drivers/acpi/ec_sys.c
index 22f918bacd35..05b44201a614 100644
--- a/trunk/drivers/acpi/ec_sys.c
+++ b/trunk/drivers/acpi/ec_sys.c
@@ -92,7 +92,7 @@ static ssize_t acpi_ec_write_io(struct file *f, const char __user *buf,
 	return count;
 }
 
-static const struct file_operations acpi_ec_io_ops = {
+static struct file_operations acpi_ec_io_ops = {
 	.owner = THIS_MODULE,
 	.open  = acpi_ec_open_io,
 	.read  = acpi_ec_read_io,
diff --git a/trunk/drivers/acpi/fan.c b/trunk/drivers/acpi/fan.c
index 0f0356ca1a9e..467479f07c1f 100644
--- a/trunk/drivers/acpi/fan.c
+++ b/trunk/drivers/acpi/fan.c
@@ -110,7 +110,7 @@ fan_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state)
 	return result;
 }
 
-static const struct thermal_cooling_device_ops fan_cooling_ops = {
+static struct thermal_cooling_device_ops fan_cooling_ops = {
 	.get_max_state = fan_get_max_state,
 	.get_cur_state = fan_get_cur_state,
 	.set_cur_state = fan_set_cur_state,
diff --git a/trunk/drivers/acpi/osl.c b/trunk/drivers/acpi/osl.c
index fa32f584229f..372f9b70f7f4 100644
--- a/trunk/drivers/acpi/osl.c
+++ b/trunk/drivers/acpi/osl.c
@@ -155,7 +155,7 @@ static u32 acpi_osi_handler(acpi_string interface, u32 supported)
 {
 	if (!strcmp("Linux", interface)) {
 
-		printk_once(KERN_NOTICE FW_BUG PREFIX
+		printk(KERN_NOTICE FW_BUG PREFIX
 			"BIOS _OSI(Linux) query %s%s\n",
 			osi_linux.enable ? "honored" : "ignored",
 			osi_linux.cmdline ? " via cmdline" :
@@ -237,23 +237,8 @@ void acpi_os_vprintf(const char *fmt, va_list args)
 #endif
 }
 
-#ifdef CONFIG_KEXEC
-static unsigned long acpi_rsdp;
-static int __init setup_acpi_rsdp(char *arg)
-{
-	acpi_rsdp = simple_strtoul(arg, NULL, 16);
-	return 0;
-}
-early_param("acpi_rsdp", setup_acpi_rsdp);
-#endif
-
 acpi_physical_address __init acpi_os_get_root_pointer(void)
 {
-#ifdef CONFIG_KEXEC
-	if (acpi_rsdp)
-		return acpi_rsdp;
-#endif
-
 	if (efi_enabled) {
 		if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
 			return efi.acpi20;
@@ -1098,13 +1083,7 @@ struct osi_setup_entry {
 	bool enable;
 };
 
-static struct osi_setup_entry __initdata
-		osi_setup_entries[OSI_STRING_ENTRIES_MAX] = {
-	{"Module Device", true},
-	{"Processor Device", true},
-	{"3.0 _SCP Extensions", true},
-	{"Processor Aggregator Device", true},
-};
+static struct osi_setup_entry __initdata osi_setup_entries[OSI_STRING_ENTRIES_MAX];
 
 void __init acpi_osi_setup(char *str)
 {
diff --git a/trunk/drivers/acpi/pci_irq.c b/trunk/drivers/acpi/pci_irq.c
index 7f9eba9a0b02..f907cfbfa13c 100644
--- a/trunk/drivers/acpi/pci_irq.c
+++ b/trunk/drivers/acpi/pci_irq.c
@@ -303,61 +303,6 @@ void acpi_pci_irq_del_prt(struct pci_bus *bus)
 /* --------------------------------------------------------------------------
                           PCI Interrupt Routing Support
    -------------------------------------------------------------------------- */
-#ifdef CONFIG_X86_IO_APIC
-extern int noioapicquirk;
-extern int noioapicreroute;
-
-static int bridge_has_boot_interrupt_variant(struct pci_bus *bus)
-{
-	struct pci_bus *bus_it;
-
-	for (bus_it = bus ; bus_it ; bus_it = bus_it->parent) {
-		if (!bus_it->self)
-			return 0;
-		if (bus_it->self->irq_reroute_variant)
-			return bus_it->self->irq_reroute_variant;
-	}
-	return 0;
-}
-
-/*
- * Some chipsets (e.g. Intel 6700PXH) generate a legacy INTx when the IRQ
- * entry in the chipset's IO-APIC is masked (as, e.g. the RT kernel does
- * during interrupt handling). When this INTx generation cannot be disabled,
- * we reroute these interrupts to their legacy equivalent to get rid of
- * spurious interrupts.
- */
-static int acpi_reroute_boot_interrupt(struct pci_dev *dev,
-				       struct acpi_prt_entry *entry)
-{
-	if (noioapicquirk || noioapicreroute) {
-		return 0;
-	} else {
-		switch (bridge_has_boot_interrupt_variant(dev->bus)) {
-		case 0:
-			/* no rerouting necessary */
-			return 0;
-		case INTEL_IRQ_REROUTE_VARIANT:
-			/*
-			 * Remap according to INTx routing table in 6700PXH
-			 * specs, intel order number 302628-002, section
-			 * 2.15.2. Other chipsets (80332, ...) have the same
-			 * mapping and are handled here as well.
-			 */
-			dev_info(&dev->dev, "PCI IRQ %d -> rerouted to legacy "
-				 "IRQ %d\n", entry->index,
-				 (entry->index % 4) + 16);
-			entry->index = (entry->index % 4) + 16;
-			return 1;
-		default:
-			dev_warn(&dev->dev, "Cannot reroute IRQ %d to legacy "
-				 "IRQ: unknown mapping\n", entry->index);
-			return -1;
-		}
-	}
-}
-#endif /* CONFIG_X86_IO_APIC */
-
 static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin)
 {
 	struct acpi_prt_entry *entry;
@@ -366,9 +311,6 @@ static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin)
 
 	entry = acpi_pci_irq_find_prt_entry(dev, pin);
 	if (entry) {
-#ifdef CONFIG_X86_IO_APIC
-		acpi_reroute_boot_interrupt(dev, entry);
-#endif /* CONFIG_X86_IO_APIC */
 		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %s[%c] _PRT entry\n",
 				  pci_name(dev), pin_name(pin)));
 		return entry;
diff --git a/trunk/drivers/acpi/pci_root.c b/trunk/drivers/acpi/pci_root.c
index 2672c798272f..d06078d660ad 100644
--- a/trunk/drivers/acpi/pci_root.c
+++ b/trunk/drivers/acpi/pci_root.c
@@ -485,8 +485,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 		root->secondary.end = 0xFF;
 		printk(KERN_WARNING FW_BUG PREFIX
 		       "no secondary bus range in _CRS\n");
-		status = acpi_evaluate_integer(device->handle, METHOD_NAME__BBN,
-					       NULL, &bus);
+		status = acpi_evaluate_integer(device->handle, METHOD_NAME__BBN,					       NULL, &bus);
 		if (ACPI_SUCCESS(status))
 			root->secondary.start = bus;
 		else if (status == AE_NOT_FOUND)
diff --git a/trunk/drivers/acpi/processor_thermal.c b/trunk/drivers/acpi/processor_thermal.c
index 870550d6a4bf..79cb65332894 100644
--- a/trunk/drivers/acpi/processor_thermal.c
+++ b/trunk/drivers/acpi/processor_thermal.c
@@ -244,7 +244,7 @@ processor_set_cur_state(struct thermal_cooling_device *cdev,
 	return result;
 }
 
-const struct thermal_cooling_device_ops processor_cooling_ops = {
+struct thermal_cooling_device_ops processor_cooling_ops = {
 	.get_max_state = processor_get_max_state,
 	.get_cur_state = processor_get_cur_state,
 	.set_cur_state = processor_set_cur_state,
diff --git a/trunk/drivers/acpi/sbs.c b/trunk/drivers/acpi/sbs.c
index 6e36d0c0057c..50658ff887d9 100644
--- a/trunk/drivers/acpi/sbs.c
+++ b/trunk/drivers/acpi/sbs.c
@@ -130,9 +130,6 @@ struct acpi_sbs {
 
 #define to_acpi_sbs(x) container_of(x, struct acpi_sbs, charger)
 
-static int acpi_sbs_remove(struct acpi_device *device, int type);
-static int acpi_battery_get_state(struct acpi_battery *battery);
-
 static inline int battery_scale(int log)
 {
 	int scale = 1;
@@ -198,8 +195,6 @@ static int acpi_sbs_battery_get_property(struct power_supply *psy,
 
 	if ((!battery->present) && psp != POWER_SUPPLY_PROP_PRESENT)
 		return -ENODEV;
-
-	acpi_battery_get_state(battery);
 	switch (psp) {
 	case POWER_SUPPLY_PROP_STATUS:
 		if (battery->rate_now < 0)
@@ -230,17 +225,11 @@ static int acpi_sbs_battery_get_property(struct power_supply *psy,
 	case POWER_SUPPLY_PROP_POWER_NOW:
 		val->intval = abs(battery->rate_now) *
 				acpi_battery_ipscale(battery) * 1000;
-		val->intval *= (acpi_battery_mode(battery)) ?
-				(battery->voltage_now *
-				acpi_battery_vscale(battery) / 1000) : 1;
 		break;
 	case POWER_SUPPLY_PROP_CURRENT_AVG:
 	case POWER_SUPPLY_PROP_POWER_AVG:
 		val->intval = abs(battery->rate_avg) *
 				acpi_battery_ipscale(battery) * 1000;
-		val->intval *= (acpi_battery_mode(battery)) ?
-				(battery->voltage_now *
-				acpi_battery_vscale(battery) / 1000) : 1;
 		break;
 	case POWER_SUPPLY_PROP_CAPACITY:
 		val->intval = battery->state_of_charge;
@@ -914,6 +903,8 @@ static void acpi_sbs_callback(void *context)
 	}
 }
 
+static int acpi_sbs_remove(struct acpi_device *device, int type);
+
 static int acpi_sbs_add(struct acpi_device *device)
 {
 	struct acpi_sbs *sbs;
diff --git a/trunk/drivers/acpi/sleep.c b/trunk/drivers/acpi/sleep.c
index 3ed80b2ca907..6c949602cbd1 100644
--- a/trunk/drivers/acpi/sleep.c
+++ b/trunk/drivers/acpi/sleep.c
@@ -428,22 +428,6 @@ static struct dmi_system_id __initdata acpisleep_dmi_table[] = {
 		DMI_MATCH(DMI_PRODUCT_NAME, "1000 Series"),
 		},
 	},
-	{
-	.callback = init_old_suspend_ordering,
-	.ident = "Asus A8N-SLI DELUXE",
-	.matches = {
-		DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
-		DMI_MATCH(DMI_BOARD_NAME, "A8N-SLI DELUXE"),
-		},
-	},
-	{
-	.callback = init_old_suspend_ordering,
-	.ident = "Asus A8N-SLI Premium",
-	.matches = {
-		DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
-		DMI_MATCH(DMI_BOARD_NAME, "A8N-SLI Premium"),
-		},
-	},
 	{},
 };
 #endif /* CONFIG_SUSPEND */
diff --git a/trunk/drivers/acpi/sysfs.c b/trunk/drivers/acpi/sysfs.c
index c538d0ef10ff..77255f250dbb 100644
--- a/trunk/drivers/acpi/sysfs.c
+++ b/trunk/drivers/acpi/sysfs.c
@@ -149,12 +149,12 @@ static int param_get_debug_level(char *buffer, const struct kernel_param *kp)
 	return result;
 }
 
-static const struct kernel_param_ops param_ops_debug_layer = {
+static struct kernel_param_ops param_ops_debug_layer = {
 	.set = param_set_uint,
 	.get = param_get_debug_layer,
 };
 
-static const struct kernel_param_ops param_ops_debug_level = {
+static struct kernel_param_ops param_ops_debug_level = {
 	.set = param_set_uint,
 	.get = param_get_debug_level,
 };
diff --git a/trunk/drivers/acpi/thermal.c b/trunk/drivers/acpi/thermal.c
index 48fbc647b178..2607e17b520f 100644
--- a/trunk/drivers/acpi/thermal.c
+++ b/trunk/drivers/acpi/thermal.c
@@ -812,7 +812,7 @@ acpi_thermal_unbind_cooling_device(struct thermal_zone_device *thermal,
 				thermal_zone_unbind_cooling_device);
 }
 
-static const struct thermal_zone_device_ops acpi_thermal_zone_ops = {
+static struct thermal_zone_device_ops acpi_thermal_zone_ops = {
 	.bind = acpi_thermal_bind_cooling_device,
 	.unbind	= acpi_thermal_unbind_cooling_device,
 	.get_temp = thermal_get_temp,
diff --git a/trunk/drivers/acpi/video.c b/trunk/drivers/acpi/video.c
index 08a44b532f7c..ada4b4d9bdc8 100644
--- a/trunk/drivers/acpi/video.c
+++ b/trunk/drivers/acpi/video.c
@@ -307,7 +307,7 @@ video_set_cur_state(struct thermal_cooling_device *cooling_dev, unsigned long st
 	return acpi_video_device_lcd_set_level(video, level);
 }
 
-static const struct thermal_cooling_device_ops video_cooling_ops = {
+static struct thermal_cooling_device_ops video_cooling_ops = {
 	.get_max_state = video_get_max_state,
 	.get_cur_state = video_get_cur_state,
 	.set_cur_state = video_set_cur_state,
diff --git a/trunk/drivers/ata/libata-acpi.c b/trunk/drivers/ata/libata-acpi.c
index bb7c5f1085cc..e0a5b555cee1 100644
--- a/trunk/drivers/ata/libata-acpi.c
+++ b/trunk/drivers/ata/libata-acpi.c
@@ -218,12 +218,12 @@ static void ata_acpi_dev_uevent(acpi_handle handle, u32 event, void *data)
 	ata_acpi_uevent(dev->link->ap, dev, event);
 }
 
-static const struct acpi_dock_ops ata_acpi_dev_dock_ops = {
+static struct acpi_dock_ops ata_acpi_dev_dock_ops = {
 	.handler = ata_acpi_dev_notify_dock,
 	.uevent = ata_acpi_dev_uevent,
 };
 
-static const struct acpi_dock_ops ata_acpi_ap_dock_ops = {
+static struct acpi_dock_ops ata_acpi_ap_dock_ops = {
 	.handler = ata_acpi_ap_notify_dock,
 	.uevent = ata_acpi_ap_uevent,
 };
diff --git a/trunk/drivers/char/Kconfig b/trunk/drivers/char/Kconfig
index 423fd56bf612..49502bc5360a 100644
--- a/trunk/drivers/char/Kconfig
+++ b/trunk/drivers/char/Kconfig
@@ -616,16 +616,5 @@ config MSM_SMD_PKT
 	  Enables userspace clients to read and write to some packet SMD
 	  ports via device interface for MSM chipset.
 
-config TILE_SROM
-	bool "Character-device access via hypervisor to the Tilera SPI ROM"
-	depends on TILE
-	default y
-	---help---
-	  This device provides character-level read-write access
-	  to the SROM, typically via the "0", "1", and "2" devices
-	  in /dev/srom/.  The Tilera hypervisor makes the flash
-	  device appear much like a simple EEPROM, and knows
-	  how to partition a single ROM for multiple purposes.
-
 endmenu
 
diff --git a/trunk/drivers/char/Makefile b/trunk/drivers/char/Makefile
index 32762ba769c2..7a00672bd85d 100644
--- a/trunk/drivers/char/Makefile
+++ b/trunk/drivers/char/Makefile
@@ -63,5 +63,3 @@ obj-$(CONFIG_RAMOOPS)		+= ramoops.o
 
 obj-$(CONFIG_JS_RTC)		+= js-rtc.o
 js-rtc-y = rtc.o
-
-obj-$(CONFIG_TILE_SROM)		+= tile-srom.o
diff --git a/trunk/drivers/char/ramoops.c b/trunk/drivers/char/ramoops.c
index 810aff9e750f..fca0c51bbc90 100644
--- a/trunk/drivers/char/ramoops.c
+++ b/trunk/drivers/char/ramoops.c
@@ -147,14 +147,6 @@ static int __init ramoops_probe(struct platform_device *pdev)
 	cxt->phys_addr = pdata->mem_address;
 	cxt->record_size = pdata->record_size;
 	cxt->dump_oops = pdata->dump_oops;
-	/*
-	 * Update the module parameter variables as well so they are visible
-	 * through /sys/module/ramoops/parameters/
-	 */
-	mem_size = pdata->mem_size;
-	mem_address = pdata->mem_address;
-	record_size = pdata->record_size;
-	dump_oops = pdata->dump_oops;
 
 	if (!request_mem_region(cxt->phys_addr, cxt->size, "ramoops")) {
 		pr_err("request mem region failed\n");
diff --git a/trunk/drivers/char/tile-srom.c b/trunk/drivers/char/tile-srom.c
deleted file mode 100644
index cf3ee008dca2..000000000000
--- a/trunk/drivers/char/tile-srom.c
+++ /dev/null
@@ -1,481 +0,0 @@
-/*
- * Copyright 2011 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- *
- * SPI Flash ROM driver
- *
- * This source code is derived from code provided in "Linux Device
- * Drivers, Third Edition", by Jonathan Corbet, Alessandro Rubini, and
- * Greg Kroah-Hartman, published by O'Reilly Media, Inc.
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/init.h>
-#include <linux/kernel.h>	/* printk() */
-#include <linux/slab.h>		/* kmalloc() */
-#include <linux/fs.h>		/* everything... */
-#include <linux/errno.h>	/* error codes */
-#include <linux/types.h>	/* size_t */
-#include <linux/proc_fs.h>
-#include <linux/fcntl.h>	/* O_ACCMODE */
-#include <linux/aio.h>
-#include <linux/pagemap.h>
-#include <linux/hugetlb.h>
-#include <linux/uaccess.h>
-#include <linux/platform_device.h>
-#include <hv/hypervisor.h>
-#include <linux/ioctl.h>
-#include <linux/cdev.h>
-#include <linux/delay.h>
-#include <hv/drv_srom_intf.h>
-
-/*
- * Size of our hypervisor I/O requests.  We break up large transfers
- * so that we don't spend large uninterrupted spans of time in the
- * hypervisor.  Erasing an SROM sector takes a significant fraction of
- * a second, so if we allowed the user to, say, do one I/O to write the
- * entire ROM, we'd get soft lockup timeouts, or worse.
- */
-#define SROM_CHUNK_SIZE ((size_t)4096)
-
-/*
- * When hypervisor is busy (e.g. erasing), poll the status periodically.
- */
-
-/*
- * Interval to poll the state in msec
- */
-#define SROM_WAIT_TRY_INTERVAL 20
-
-/*
- * Maximum times to poll the state
- */
-#define SROM_MAX_WAIT_TRY_TIMES 1000
-
-struct srom_dev {
-	int hv_devhdl;			/* Handle for hypervisor device */
-	u32 total_size;			/* Size of this device */
-	u32 sector_size;		/* Size of a sector */
-	u32 page_size;			/* Size of a page */
-	struct mutex lock;		/* Allow only one accessor at a time */
-};
-
-static int srom_major;			/* Dynamic major by default */
-module_param(srom_major, int, 0);
-MODULE_AUTHOR("Tilera Corporation");
-MODULE_LICENSE("GPL");
-
-static int srom_devs;			/* Number of SROM partitions */
-static struct cdev srom_cdev;
-static struct class *srom_class;
-static struct srom_dev *srom_devices;
-
-/*
- * Handle calling the hypervisor and managing EAGAIN/EBUSY.
- */
-
-static ssize_t _srom_read(int hv_devhdl, void *buf,
-			  loff_t off, size_t count)
-{
-	int retval, retries = SROM_MAX_WAIT_TRY_TIMES;
-	for (;;) {
-		retval = hv_dev_pread(hv_devhdl, 0, (HV_VirtAddr)buf,
-				      count, off);
-		if (retval >= 0)
-			return retval;
-		if (retval == HV_EAGAIN)
-			continue;
-		if (retval == HV_EBUSY && --retries > 0) {
-			msleep(SROM_WAIT_TRY_INTERVAL);
-			continue;
-		}
-		pr_err("_srom_read: error %d\n", retval);
-		return -EIO;
-	}
-}
-
-static ssize_t _srom_write(int hv_devhdl, const void *buf,
-			   loff_t off, size_t count)
-{
-	int retval, retries = SROM_MAX_WAIT_TRY_TIMES;
-	for (;;) {
-		retval = hv_dev_pwrite(hv_devhdl, 0, (HV_VirtAddr)buf,
-				       count, off);
-		if (retval >= 0)
-			return retval;
-		if (retval == HV_EAGAIN)
-			continue;
-		if (retval == HV_EBUSY && --retries > 0) {
-			msleep(SROM_WAIT_TRY_INTERVAL);
-			continue;
-		}
-		pr_err("_srom_write: error %d\n", retval);
-		return -EIO;
-	}
-}
-
-/**
- * srom_open() - Device open routine.
- * @inode: Inode for this device.
- * @filp: File for this specific open of the device.
- *
- * Returns zero, or an error code.
- */
-static int srom_open(struct inode *inode, struct file *filp)
-{
-	filp->private_data = &srom_devices[iminor(inode)];
-	return 0;
-}
-
-
-/**
- * srom_release() - Device release routine.
- * @inode: Inode for this device.
- * @filp: File for this specific open of the device.
- *
- * Returns zero, or an error code.
- */
-static int srom_release(struct inode *inode, struct file *filp)
-{
-	struct srom_dev *srom = filp->private_data;
-	char dummy;
-
-	/* Make sure we've flushed anything written to the ROM. */
-	mutex_lock(&srom->lock);
-	if (srom->hv_devhdl >= 0)
-		_srom_write(srom->hv_devhdl, &dummy, SROM_FLUSH_OFF, 1);
-	mutex_unlock(&srom->lock);
-
-	filp->private_data = NULL;
-
-	return 0;
-}
-
-
-/**
- * srom_read() - Read data from the device.
- * @filp: File for this specific open of the device.
- * @buf: User's data buffer.
- * @count: Number of bytes requested.
- * @f_pos: File position.
- *
- * Returns number of bytes read, or an error code.
- */
-static ssize_t srom_read(struct file *filp, char __user *buf,
-			 size_t count, loff_t *f_pos)
-{
-	int retval = 0;
-	void *kernbuf;
-	struct srom_dev *srom = filp->private_data;
-
-	kernbuf = kmalloc(SROM_CHUNK_SIZE, GFP_KERNEL);
-	if (!kernbuf)
-		return -ENOMEM;
-
-	if (mutex_lock_interruptible(&srom->lock)) {
-		retval = -ERESTARTSYS;
-		kfree(kernbuf);
-		return retval;
-	}
-
-	while (count) {
-		int hv_retval;
-		int bytes_this_pass = min(count, SROM_CHUNK_SIZE);
-
-		hv_retval = _srom_read(srom->hv_devhdl, kernbuf,
-				       *f_pos, bytes_this_pass);
-		if (hv_retval > 0) {
-			if (copy_to_user(buf, kernbuf, hv_retval) != 0) {
-				retval = -EFAULT;
-				break;
-			}
-		} else if (hv_retval <= 0) {
-			if (retval == 0)
-				retval = hv_retval;
-			break;
-		}
-
-		retval += hv_retval;
-		*f_pos += hv_retval;
-		buf += hv_retval;
-		count -= hv_retval;
-	}
-
-	mutex_unlock(&srom->lock);
-	kfree(kernbuf);
-
-	return retval;
-}
-
-/**
- * srom_write() - Write data to the device.
- * @filp: File for this specific open of the device.
- * @buf: User's data buffer.
- * @count: Number of bytes requested.
- * @f_pos: File position.
- *
- * Returns number of bytes written, or an error code.
- */
-static ssize_t srom_write(struct file *filp, const char __user *buf,
-			  size_t count, loff_t *f_pos)
-{
-	int retval = 0;
-	void *kernbuf;
-	struct srom_dev *srom = filp->private_data;
-
-	kernbuf = kmalloc(SROM_CHUNK_SIZE, GFP_KERNEL);
-	if (!kernbuf)
-		return -ENOMEM;
-
-	if (mutex_lock_interruptible(&srom->lock)) {
-		retval = -ERESTARTSYS;
-		kfree(kernbuf);
-		return retval;
-	}
-
-	while (count) {
-		int hv_retval;
-		int bytes_this_pass = min(count, SROM_CHUNK_SIZE);
-
-		if (copy_from_user(kernbuf, buf, bytes_this_pass) != 0) {
-			retval = -EFAULT;
-			break;
-		}
-
-		hv_retval = _srom_write(srom->hv_devhdl, kernbuf,
-					*f_pos, bytes_this_pass);
-		if (hv_retval <= 0) {
-			if (retval == 0)
-				retval = hv_retval;
-			break;
-		}
-
-		retval += hv_retval;
-		*f_pos += hv_retval;
-		buf += hv_retval;
-		count -= hv_retval;
-	}
-
-	mutex_unlock(&srom->lock);
-	kfree(kernbuf);
-
-	return retval;
-}
-
-/* Provide our own implementation so we can use srom->total_size. */
-loff_t srom_llseek(struct file *filp, loff_t offset, int origin)
-{
-	struct srom_dev *srom = filp->private_data;
-
-	if (mutex_lock_interruptible(&srom->lock))
-		return -ERESTARTSYS;
-
-	switch (origin) {
-	case SEEK_END:
-		offset += srom->total_size;
-		break;
-	case SEEK_CUR:
-		offset += filp->f_pos;
-		break;
-	}
-
-	if (offset < 0 || offset > srom->total_size) {
-		offset = -EINVAL;
-	} else {
-		filp->f_pos = offset;
-		filp->f_version = 0;
-	}
-
-	mutex_unlock(&srom->lock);
-
-	return offset;
-}
-
-static ssize_t total_show(struct device *dev,
-			  struct device_attribute *attr, char *buf)
-{
-	struct srom_dev *srom = dev_get_drvdata(dev);
-	return sprintf(buf, "%u\n", srom->total_size);
-}
-
-static ssize_t sector_show(struct device *dev,
-			   struct device_attribute *attr, char *buf)
-{
-	struct srom_dev *srom = dev_get_drvdata(dev);
-	return sprintf(buf, "%u\n", srom->sector_size);
-}
-
-static ssize_t page_show(struct device *dev,
-			 struct device_attribute *attr, char *buf)
-{
-	struct srom_dev *srom = dev_get_drvdata(dev);
-	return sprintf(buf, "%u\n", srom->page_size);
-}
-
-static struct device_attribute srom_dev_attrs[] = {
-	__ATTR(total_size, S_IRUGO, total_show, NULL),
-	__ATTR(sector_size, S_IRUGO, sector_show, NULL),
-	__ATTR(page_size, S_IRUGO, page_show, NULL),
-	__ATTR_NULL
-};
-
-static char *srom_devnode(struct device *dev, mode_t *mode)
-{
-	*mode = S_IRUGO | S_IWUSR;
-	return kasprintf(GFP_KERNEL, "srom/%s", dev_name(dev));
-}
-
-/*
- * The fops
- */
-static const struct file_operations srom_fops = {
-	.owner =     THIS_MODULE,
-	.llseek =    srom_llseek,
-	.read =	     srom_read,
-	.write =     srom_write,
-	.open =	     srom_open,
-	.release =   srom_release,
-};
-
-/**
- * srom_setup_minor() - Initialize per-minor information.
- * @srom: Per-device SROM state.
- * @index: Device to set up.
- */
-static int srom_setup_minor(struct srom_dev *srom, int index)
-{
-	struct device *dev;
-	int devhdl = srom->hv_devhdl;
-
-	mutex_init(&srom->lock);
-
-	if (_srom_read(devhdl, &srom->total_size,
-		       SROM_TOTAL_SIZE_OFF, sizeof(srom->total_size)) < 0)
-		return -EIO;
-	if (_srom_read(devhdl, &srom->sector_size,
-		       SROM_SECTOR_SIZE_OFF, sizeof(srom->sector_size)) < 0)
-		return -EIO;
-	if (_srom_read(devhdl, &srom->page_size,
-		       SROM_PAGE_SIZE_OFF, sizeof(srom->page_size)) < 0)
-		return -EIO;
-
-	dev = device_create(srom_class, &platform_bus,
-			    MKDEV(srom_major, index), srom, "%d", index);
-	return IS_ERR(dev) ? PTR_ERR(dev) : 0;
-}
-
-/** srom_init() - Initialize the driver's module. */
-static int srom_init(void)
-{
-	int result, i;
-	dev_t dev = MKDEV(srom_major, 0);
-
-	/*
-	 * Start with a plausible number of partitions; the krealloc() call
-	 * below will yield about log(srom_devs) additional allocations.
-	 */
-	srom_devices = kzalloc(4 * sizeof(struct srom_dev), GFP_KERNEL);
-
-	/* Discover the number of srom partitions. */
-	for (i = 0; ; i++) {
-		int devhdl;
-		char buf[20];
-		struct srom_dev *new_srom_devices =
-			krealloc(srom_devices, (i+1) * sizeof(struct srom_dev),
-				 GFP_KERNEL | __GFP_ZERO);
-		if (!new_srom_devices) {
-			result = -ENOMEM;
-			goto fail_mem;
-		}
-		srom_devices = new_srom_devices;
-		sprintf(buf, "srom/0/%d", i);
-		devhdl = hv_dev_open((HV_VirtAddr)buf, 0);
-		if (devhdl < 0) {
-			if (devhdl != HV_ENODEV)
-				pr_notice("srom/%d: hv_dev_open failed: %d.\n",
-					  i, devhdl);
-			break;
-		}
-		srom_devices[i].hv_devhdl = devhdl;
-	}
-	srom_devs = i;
-
-	/* Bail out early if we have no partitions at all. */
-	if (srom_devs == 0) {
-		result = -ENODEV;
-		goto fail_mem;
-	}
-
-	/* Register our major, and accept a dynamic number. */
-	if (srom_major)
-		result = register_chrdev_region(dev, srom_devs, "srom");
-	else {
-		result = alloc_chrdev_region(&dev, 0, srom_devs, "srom");
-		srom_major = MAJOR(dev);
-	}
-	if (result < 0)
-		goto fail_mem;
-
-	/* Register a character device. */
-	cdev_init(&srom_cdev, &srom_fops);
-	srom_cdev.owner = THIS_MODULE;
-	srom_cdev.ops = &srom_fops;
-	result = cdev_add(&srom_cdev, dev, srom_devs);
-	if (result < 0)
-		goto fail_chrdev;
-
-	/* Create a sysfs class. */
-	srom_class = class_create(THIS_MODULE, "srom");
-	if (IS_ERR(srom_class)) {
-		result = PTR_ERR(srom_class);
-		goto fail_cdev;
-	}
-	srom_class->dev_attrs = srom_dev_attrs;
-	srom_class->devnode = srom_devnode;
-
-	/* Do per-partition initialization */
-	for (i = 0; i < srom_devs; i++) {
-		result = srom_setup_minor(srom_devices + i, i);
-		if (result < 0)
-			goto fail_class;
-	}
-
-	return 0;
-
-fail_class:
-	for (i = 0; i < srom_devs; i++)
-		device_destroy(srom_class, MKDEV(srom_major, i));
-	class_destroy(srom_class);
-fail_cdev:
-	cdev_del(&srom_cdev);
-fail_chrdev:
-	unregister_chrdev_region(dev, srom_devs);
-fail_mem:
-	kfree(srom_devices);
-	return result;
-}
-
-/** srom_cleanup() - Clean up the driver's module. */
-static void srom_cleanup(void)
-{
-	int i;
-	for (i = 0; i < srom_devs; i++)
-		device_destroy(srom_class, MKDEV(srom_major, i));
-	class_destroy(srom_class);
-	cdev_del(&srom_cdev);
-	unregister_chrdev_region(MKDEV(srom_major, 0), srom_devs);
-	kfree(srom_devices);
-}
-
-module_init(srom_init);
-module_exit(srom_cleanup);
diff --git a/trunk/drivers/char/tpm/tpm_tis.c b/trunk/drivers/char/tpm/tpm_tis.c
index 3f4051a7c5a7..7fc2f108f490 100644
--- a/trunk/drivers/char/tpm/tpm_tis.c
+++ b/trunk/drivers/char/tpm/tpm_tis.c
@@ -80,7 +80,7 @@ enum tis_defaults {
 static LIST_HEAD(tis_chips);
 static DEFINE_SPINLOCK(tis_lock);
 
-#if defined(CONFIG_PNP) && defined(CONFIG_ACPI)
+#ifdef CONFIG_PNP
 static int is_itpm(struct pnp_dev *dev)
 {
 	struct acpi_device *acpi = pnp_acpi_device(dev);
@@ -93,11 +93,6 @@ static int is_itpm(struct pnp_dev *dev)
 
 	return 0;
 }
-#else
-static inline int is_itpm(struct pnp_dev *dev)
-{
-	return 0;
-}
 #endif
 
 static int check_locality(struct tpm_chip *chip, int l)
diff --git a/trunk/drivers/cpuidle/cpuidle.c b/trunk/drivers/cpuidle/cpuidle.c
index d4c542372886..bf5092455a8f 100644
--- a/trunk/drivers/cpuidle/cpuidle.c
+++ b/trunk/drivers/cpuidle/cpuidle.c
@@ -25,19 +25,9 @@ DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
 
 DEFINE_MUTEX(cpuidle_lock);
 LIST_HEAD(cpuidle_detected_devices);
+static void (*pm_idle_old)(void);
 
 static int enabled_devices;
-static int off __read_mostly;
-static int initialized __read_mostly;
-
-int cpuidle_disabled(void)
-{
-	return off;
-}
-void disable_cpuidle(void)
-{
-	off = 1;
-}
 
 #if defined(CONFIG_ARCH_HAS_CPU_IDLE_WAIT)
 static void cpuidle_kick_cpus(void)
@@ -56,23 +46,25 @@ static int __cpuidle_register_device(struct cpuidle_device *dev);
  * cpuidle_idle_call - the main idle loop
  *
  * NOTE: no locks or semaphores should be used here
- * return non-zero on failure
  */
-int cpuidle_idle_call(void)
+static void cpuidle_idle_call(void)
 {
 	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
 	struct cpuidle_state *target_state;
 	int next_state;
 
-	if (off)
-		return -ENODEV;
-
-	if (!initialized)
-		return -ENODEV;
-
 	/* check if the device is ready */
-	if (!dev || !dev->enabled)
-		return -EBUSY;
+	if (!dev || !dev->enabled) {
+		if (pm_idle_old)
+			pm_idle_old();
+		else
+#if defined(CONFIG_ARCH_HAS_DEFAULT_IDLE)
+			default_idle();
+#else
+			local_irq_enable();
+#endif
+		return;
+	}
 
 #if 0
 	/* shows regressions, re-enable for 2.6.29 */
@@ -97,7 +89,7 @@ int cpuidle_idle_call(void)
 	next_state = cpuidle_curr_governor->select(dev);
 	if (need_resched()) {
 		local_irq_enable();
-		return 0;
+		return;
 	}
 
 	target_state = &dev->states[next_state];
@@ -122,8 +114,6 @@ int cpuidle_idle_call(void)
 	/* give the governor an opportunity to reflect on the outcome */
 	if (cpuidle_curr_governor->reflect)
 		cpuidle_curr_governor->reflect(dev);
-
-	return 0;
 }
 
 /**
@@ -131,10 +121,10 @@ int cpuidle_idle_call(void)
  */
 void cpuidle_install_idle_handler(void)
 {
-	if (enabled_devices) {
+	if (enabled_devices && (pm_idle != cpuidle_idle_call)) {
 		/* Make sure all changes finished before we switch to new idle */
 		smp_wmb();
-		initialized = 1;
+		pm_idle = cpuidle_idle_call;
 	}
 }
 
@@ -143,8 +133,8 @@ void cpuidle_install_idle_handler(void)
  */
 void cpuidle_uninstall_idle_handler(void)
 {
-	if (enabled_devices) {
-		initialized = 0;
+	if (enabled_devices && pm_idle_old && (pm_idle != pm_idle_old)) {
+		pm_idle = pm_idle_old;
 		cpuidle_kick_cpus();
 	}
 }
@@ -437,8 +427,7 @@ static int __init cpuidle_init(void)
 {
 	int ret;
 
-	if (cpuidle_disabled())
-		return -ENODEV;
+	pm_idle_old = pm_idle;
 
 	ret = cpuidle_add_class_sysfs(&cpu_sysdev_class);
 	if (ret)
@@ -449,5 +438,4 @@ static int __init cpuidle_init(void)
 	return 0;
 }
 
-module_param(off, int, 0444);
 core_initcall(cpuidle_init);
diff --git a/trunk/drivers/cpuidle/cpuidle.h b/trunk/drivers/cpuidle/cpuidle.h
index 38c3fd8b9d76..33e50d556f17 100644
--- a/trunk/drivers/cpuidle/cpuidle.h
+++ b/trunk/drivers/cpuidle/cpuidle.h
@@ -13,7 +13,6 @@ extern struct list_head cpuidle_governors;
 extern struct list_head cpuidle_detected_devices;
 extern struct mutex cpuidle_lock;
 extern spinlock_t cpuidle_driver_lock;
-extern int cpuidle_disabled(void);
 
 /* idle loop */
 extern void cpuidle_install_idle_handler(void);
diff --git a/trunk/drivers/cpuidle/driver.c b/trunk/drivers/cpuidle/driver.c
index 3f7e3cedd133..fd1601e3d125 100644
--- a/trunk/drivers/cpuidle/driver.c
+++ b/trunk/drivers/cpuidle/driver.c
@@ -26,9 +26,6 @@ int cpuidle_register_driver(struct cpuidle_driver *drv)
 	if (!drv)
 		return -EINVAL;
 
-	if (cpuidle_disabled())
-		return -ENODEV;
-
 	spin_lock(&cpuidle_driver_lock);
 	if (cpuidle_curr_driver) {
 		spin_unlock(&cpuidle_driver_lock);
diff --git a/trunk/drivers/cpuidle/governor.c b/trunk/drivers/cpuidle/governor.c
index ea2f8e7aa24a..724c164d31c9 100644
--- a/trunk/drivers/cpuidle/governor.c
+++ b/trunk/drivers/cpuidle/governor.c
@@ -81,9 +81,6 @@ int cpuidle_register_governor(struct cpuidle_governor *gov)
 	if (!gov || !gov->select)
 		return -EINVAL;
 
-	if (cpuidle_disabled())
-		return -ENODEV;
-
 	mutex_lock(&cpuidle_lock);
 	if (__cpuidle_find_governor(gov->name) == NULL) {
 		ret = 0;
diff --git a/trunk/drivers/dma/dmaengine.c b/trunk/drivers/dma/dmaengine.c
index b48967b499da..26374b2a55a2 100644
--- a/trunk/drivers/dma/dmaengine.c
+++ b/trunk/drivers/dma/dmaengine.c
@@ -62,9 +62,9 @@
 #include <linux/slab.h>
 
 static DEFINE_MUTEX(dma_list_mutex);
-static DEFINE_IDR(dma_idr);
 static LIST_HEAD(dma_device_list);
 static long dmaengine_ref_count;
+static struct idr dma_idr;
 
 /* --- sysfs implementation --- */
 
@@ -1050,6 +1050,8 @@ EXPORT_SYMBOL_GPL(dma_run_dependencies);
 
 static int __init dma_bus_init(void)
 {
+	idr_init(&dma_idr);
+	mutex_init(&dma_list_mutex);
 	return class_register(&dma_devclass);
 }
 arch_initcall(dma_bus_init);
diff --git a/trunk/drivers/dma/ioat/dma_v3.c b/trunk/drivers/dma/ioat/dma_v3.c
index f519c93a61e7..d845dc4b7103 100644
--- a/trunk/drivers/dma/ioat/dma_v3.c
+++ b/trunk/drivers/dma/ioat/dma_v3.c
@@ -73,10 +73,10 @@
 /* provide a lookup table for setting the source address in the base or
  * extended descriptor of an xor or pq descriptor
  */
-static const u8 xor_idx_to_desc = 0xe0;
-static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 };
-static const u8 pq_idx_to_desc = 0xf8;
-static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 };
+static const u8 xor_idx_to_desc __read_mostly = 0xd0;
+static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 };
+static const u8 pq_idx_to_desc __read_mostly = 0xf8;
+static const u8 pq_idx_to_field[] __read_mostly = { 1, 4, 5, 0, 1, 2, 4, 5 };
 
 static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx)
 {
diff --git a/trunk/drivers/dma/ioat/pci.c b/trunk/drivers/dma/ioat/pci.c
index 5e3a40f79945..fab37d1cf48d 100644
--- a/trunk/drivers/dma/ioat/pci.c
+++ b/trunk/drivers/dma/ioat/pci.c
@@ -72,17 +72,6 @@ static struct pci_device_id ioat_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) },
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) },
 
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB0) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB1) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB2) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB3) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB4) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB5) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB6) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB7) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB8) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB9) },
-
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, ioat_pci_tbl);
diff --git a/trunk/drivers/eisa/pci_eisa.c b/trunk/drivers/eisa/pci_eisa.c
index cdae207028a7..30da70d06a6d 100644
--- a/trunk/drivers/eisa/pci_eisa.c
+++ b/trunk/drivers/eisa/pci_eisa.c
@@ -45,13 +45,13 @@ static int __init pci_eisa_init(struct pci_dev *pdev,
 	return 0;
 }
 
-static struct pci_device_id pci_eisa_pci_tbl[] = {
+static struct pci_device_id __initdata pci_eisa_pci_tbl[] = {
 	{ PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
 	  PCI_CLASS_BRIDGE_EISA << 8, 0xffff00, 0 },
 	{ 0, }
 };
 
-static struct pci_driver __refdata pci_eisa_driver = {
+static struct pci_driver __initdata pci_eisa_driver = {
 	.name		= "pci_eisa",
 	.id_table	= pci_eisa_pci_tbl,
 	.probe		= pci_eisa_init,
diff --git a/trunk/drivers/firmware/efivars.c b/trunk/drivers/firmware/efivars.c
index eb80b549ed8d..eacb05e6cfb3 100644
--- a/trunk/drivers/firmware/efivars.c
+++ b/trunk/drivers/firmware/efivars.c
@@ -157,7 +157,7 @@ utf16_strnlen(efi_char16_t *s, size_t maxlength)
 	return length;
 }
 
-static inline unsigned long
+static unsigned long
 utf16_strlen(efi_char16_t *s)
 {
 	return utf16_strnlen(s, ~0UL);
@@ -580,8 +580,8 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
 	return -1;
 }
 
-static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part,
-			    size_t size, struct pstore_info *psi)
+static u64 efi_pstore_write(enum pstore_type_id type, int part, size_t size,
+			    struct pstore_info *psi)
 {
 	return 0;
 }
diff --git a/trunk/drivers/input/keyboard/gpio_keys.c b/trunk/drivers/input/keyboard/gpio_keys.c
index 67df91af8424..ce281d152275 100644
--- a/trunk/drivers/input/keyboard/gpio_keys.c
+++ b/trunk/drivers/input/keyboard/gpio_keys.c
@@ -483,7 +483,7 @@ static int gpio_keys_get_devtree_pdata(struct device *dev,
 
 	buttons = kzalloc(pdata->nbuttons * (sizeof *buttons), GFP_KERNEL);
 	if (!buttons)
-		return -ENOMEM;
+		return -ENODEV;
 
 	pp = NULL;
 	i = 0;
diff --git a/trunk/drivers/input/keyboard/lm8323.c b/trunk/drivers/input/keyboard/lm8323.c
index 756348a7f93a..ab0acaf7fe8f 100644
--- a/trunk/drivers/input/keyboard/lm8323.c
+++ b/trunk/drivers/input/keyboard/lm8323.c
@@ -754,11 +754,8 @@ static int __devinit lm8323_probe(struct i2c_client *client,
 	device_remove_file(&client->dev, &dev_attr_disable_kp);
 fail2:
 	while (--pwm >= 0)
-		if (lm->pwm[pwm].enabled) {
-			device_remove_file(lm->pwm[pwm].cdev.dev,
-					   &dev_attr_time);
+		if (lm->pwm[pwm].enabled)
 			led_classdev_unregister(&lm->pwm[pwm].cdev);
-		}
 fail1:
 	input_free_device(idev);
 	kfree(lm);
@@ -778,10 +775,8 @@ static int __devexit lm8323_remove(struct i2c_client *client)
 	device_remove_file(&lm->client->dev, &dev_attr_disable_kp);
 
 	for (i = 0; i < 3; i++)
-		if (lm->pwm[i].enabled) {
-			device_remove_file(lm->pwm[i].cdev.dev, &dev_attr_time);
+		if (lm->pwm[i].enabled)
 			led_classdev_unregister(&lm->pwm[i].cdev);
-		}
 
 	kfree(lm);
 
diff --git a/trunk/drivers/input/keyboard/tegra-kbc.c b/trunk/drivers/input/keyboard/tegra-kbc.c
index f270447ba951..da3828fc2c09 100644
--- a/trunk/drivers/input/keyboard/tegra-kbc.c
+++ b/trunk/drivers/input/keyboard/tegra-kbc.c
@@ -19,7 +19,6 @@
  * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  */
 
-#include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/input.h>
 #include <linux/platform_device.h>
@@ -38,7 +37,7 @@
 #define KBC_ROW_SCAN_DLY	5
 
 /* KBC uses a 32KHz clock so a cycle = 1/32Khz */
-#define KBC_CYCLE_MS	32
+#define KBC_CYCLE_USEC	32
 
 /* KBC Registers */
 
@@ -648,7 +647,7 @@ static int __devinit tegra_kbc_probe(struct platform_device *pdev)
 	debounce_cnt = min(pdata->debounce_cnt, KBC_MAX_DEBOUNCE_CNT);
 	scan_time_rows = (KBC_ROW_SCAN_TIME + debounce_cnt) * num_rows;
 	kbc->repoll_dly = KBC_ROW_SCAN_DLY + scan_time_rows + pdata->repeat_cnt;
-	kbc->repoll_dly = DIV_ROUND_UP(kbc->repoll_dly, KBC_CYCLE_MS);
+	kbc->repoll_dly = ((kbc->repoll_dly * KBC_CYCLE_USEC) + 999) / 1000;
 
 	input_dev->name = pdev->name;
 	input_dev->id.bustype = BUS_HOST;
diff --git a/trunk/drivers/input/misc/kxtj9.c b/trunk/drivers/input/misc/kxtj9.c
index 783597a9a64a..c456f63b6bae 100644
--- a/trunk/drivers/input/misc/kxtj9.c
+++ b/trunk/drivers/input/misc/kxtj9.c
@@ -21,7 +21,6 @@
 #include <linux/i2c.h>
 #include <linux/input.h>
 #include <linux/interrupt.h>
-#include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/input/kxtj9.h>
 #include <linux/input-polldev.h>
diff --git a/trunk/drivers/input/misc/mma8450.c b/trunk/drivers/input/misc/mma8450.c
index 6c76cf792991..20f8f9284f02 100644
--- a/trunk/drivers/input/misc/mma8450.c
+++ b/trunk/drivers/input/misc/mma8450.c
@@ -24,7 +24,6 @@
 #include <linux/delay.h>
 #include <linux/i2c.h>
 #include <linux/input-polldev.h>
-#include <linux/of_device.h>
 
 #define MMA8450_DRV_NAME	"mma8450"
 
@@ -230,17 +229,10 @@ static const struct i2c_device_id mma8450_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, mma8450_id);
 
-static const struct of_device_id mma8450_dt_ids[] = {
-	{ .compatible = "fsl,mma8450", },
-	{ /* sentinel */ }
-};
-MODULE_DEVICE_TABLE(i2c, mma8450_dt_ids);
-
 static struct i2c_driver mma8450_driver = {
 	.driver = {
 		.name	= MMA8450_DRV_NAME,
 		.owner	= THIS_MODULE,
-		.of_match_table = mma8450_dt_ids,
 	},
 	.probe		= mma8450_probe,
 	.remove		= __devexit_p(mma8450_remove),
diff --git a/trunk/drivers/input/mouse/hgpk.c b/trunk/drivers/input/mouse/hgpk.c
index 4d17d9f3320b..95577c15ae56 100644
--- a/trunk/drivers/input/mouse/hgpk.c
+++ b/trunk/drivers/input/mouse/hgpk.c
@@ -32,7 +32,6 @@
 #define DEBUG
 #include <linux/slab.h>
 #include <linux/input.h>
-#include <linux/module.h>
 #include <linux/serio.h>
 #include <linux/libps2.h>
 #include <linux/delay.h>
diff --git a/trunk/drivers/input/serio/xilinx_ps2.c b/trunk/drivers/input/serio/xilinx_ps2.c
index d64c5a43aaad..80baa53da5b1 100644
--- a/trunk/drivers/input/serio/xilinx_ps2.c
+++ b/trunk/drivers/input/serio/xilinx_ps2.c
@@ -23,7 +23,7 @@
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/io.h>
-#include <linux/of_address.h>
+
 #include <linux/of_device.h>
 #include <linux/of_platform.h>
 
diff --git a/trunk/drivers/input/touchscreen/ad7879.c b/trunk/drivers/input/touchscreen/ad7879.c
index 131f9d1c921b..bc3b5187f3a3 100644
--- a/trunk/drivers/input/touchscreen/ad7879.c
+++ b/trunk/drivers/input/touchscreen/ad7879.c
@@ -249,14 +249,12 @@ static void __ad7879_enable(struct ad7879 *ts)
 
 static void __ad7879_disable(struct ad7879 *ts)
 {
-	u16 reg = (ts->cmd_crtl2 & ~AD7879_PM(-1)) |
-		AD7879_PM(AD7879_PM_SHUTDOWN);
 	disable_irq(ts->irq);
 
 	if (del_timer_sync(&ts->timer))
 		ad7879_ts_event_release(ts);
 
-	ad7879_write(ts, AD7879_REG_CTRL2, reg);
+	ad7879_write(ts, AD7879_REG_CTRL2, AD7879_PM(AD7879_PM_SHUTDOWN));
 }
 
 
diff --git a/trunk/drivers/md/Kconfig b/trunk/drivers/md/Kconfig
index f75a66e7d312..8420129fc5ee 100644
--- a/trunk/drivers/md/Kconfig
+++ b/trunk/drivers/md/Kconfig
@@ -241,13 +241,12 @@ config DM_MIRROR
          needed for live data migration tools such as 'pvmove'.
 
 config DM_RAID
-       tristate "RAID 1/4/5/6 target (EXPERIMENTAL)"
+       tristate "RAID 4/5/6 target (EXPERIMENTAL)"
        depends on BLK_DEV_DM && EXPERIMENTAL
-       select MD_RAID1
        select MD_RAID456
        select BLK_DEV_MD
        ---help---
-	 A dm target that supports RAID1, RAID4, RAID5 and RAID6 mappings
+	 A dm target that supports RAID4, RAID5 and RAID6 mappings
 
 	 A RAID-5 set of N drives with a capacity of C MB per drive provides
 	 the capacity of C * (N - 1) MB, and protects against a failure
diff --git a/trunk/drivers/md/dm-crypt.c b/trunk/drivers/md/dm-crypt.c
index 49da55c1528a..bae6c4e23d3f 100644
--- a/trunk/drivers/md/dm-crypt.c
+++ b/trunk/drivers/md/dm-crypt.c
@@ -30,6 +30,7 @@
 #include <linux/device-mapper.h>
 
 #define DM_MSG_PREFIX "crypt"
+#define MESG_STR(x) x, sizeof(x)
 
 /*
  * context holding the current state of a multi-part conversion
@@ -238,7 +239,7 @@ static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv,
 			      struct dm_crypt_request *dmreq)
 {
 	memset(iv, 0, cc->iv_size);
-	*(__le32 *)iv = cpu_to_le32(dmreq->iv_sector & 0xffffffff);
+	*(u32 *)iv = cpu_to_le32(dmreq->iv_sector & 0xffffffff);
 
 	return 0;
 }
@@ -247,7 +248,7 @@ static int crypt_iv_plain64_gen(struct crypt_config *cc, u8 *iv,
 				struct dm_crypt_request *dmreq)
 {
 	memset(iv, 0, cc->iv_size);
-	*(__le64 *)iv = cpu_to_le64(dmreq->iv_sector);
+	*(u64 *)iv = cpu_to_le64(dmreq->iv_sector);
 
 	return 0;
 }
@@ -414,7 +415,7 @@ static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv,
 	struct crypto_cipher *essiv_tfm = this_crypt_config(cc)->iv_private;
 
 	memset(iv, 0, cc->iv_size);
-	*(__le64 *)iv = cpu_to_le64(dmreq->iv_sector);
+	*(u64 *)iv = cpu_to_le64(dmreq->iv_sector);
 	crypto_cipher_encrypt_one(essiv_tfm, iv, iv);
 
 	return 0;
@@ -1574,17 +1575,11 @@ static int crypt_ctr_cipher(struct dm_target *ti,
 static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 {
 	struct crypt_config *cc;
-	unsigned int key_size, opt_params;
+	unsigned int key_size;
 	unsigned long long tmpll;
 	int ret;
-	struct dm_arg_set as;
-	const char *opt_string;
-
-	static struct dm_arg _args[] = {
-		{0, 1, "Invalid number of feature args"},
-	};
 
-	if (argc < 5) {
+	if (argc != 5) {
 		ti->error = "Not enough arguments";
 		return -EINVAL;
 	}
@@ -1653,30 +1648,6 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	}
 	cc->start = tmpll;
 
-	argv += 5;
-	argc -= 5;
-
-	/* Optional parameters */
-	if (argc) {
-		as.argc = argc;
-		as.argv = argv;
-
-		ret = dm_read_arg_group(_args, &as, &opt_params, &ti->error);
-		if (ret)
-			goto bad;
-
-		opt_string = dm_shift_arg(&as);
-
-		if (opt_params == 1 && opt_string &&
-		    !strcasecmp(opt_string, "allow_discards"))
-			ti->num_discard_requests = 1;
-		else if (opt_params) {
-			ret = -EINVAL;
-			ti->error = "Invalid feature arguments";
-			goto bad;
-		}
-	}
-
 	ret = -ENOMEM;
 	cc->io_queue = alloc_workqueue("kcryptd_io",
 				       WQ_NON_REENTRANT|
@@ -1711,16 +1682,9 @@ static int crypt_map(struct dm_target *ti, struct bio *bio,
 	struct dm_crypt_io *io;
 	struct crypt_config *cc;
 
-	/*
-	 * If bio is REQ_FLUSH or REQ_DISCARD, just bypass crypt queues.
-	 * - for REQ_FLUSH device-mapper core ensures that no IO is in-flight
-	 * - for REQ_DISCARD caller must use flush if IO ordering matters
-	 */
-	if (unlikely(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) {
+	if (bio->bi_rw & REQ_FLUSH) {
 		cc = ti->private;
 		bio->bi_bdev = cc->dev->bdev;
-		if (bio_sectors(bio))
-			bio->bi_sector = cc->start + dm_target_offset(ti, bio->bi_sector);
 		return DM_MAPIO_REMAPPED;
 	}
 
@@ -1763,10 +1727,6 @@ static int crypt_status(struct dm_target *ti, status_type_t type,
 
 		DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset,
 				cc->dev->name, (unsigned long long)cc->start);
-
-		if (ti->num_discard_requests)
-			DMEMIT(" 1 allow_discards");
-
 		break;
 	}
 	return 0;
@@ -1810,12 +1770,12 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv)
 	if (argc < 2)
 		goto error;
 
-	if (!strcasecmp(argv[0], "key")) {
+	if (!strnicmp(argv[0], MESG_STR("key"))) {
 		if (!test_bit(DM_CRYPT_SUSPENDED, &cc->flags)) {
 			DMWARN("not suspended during key manipulation.");
 			return -EINVAL;
 		}
-		if (argc == 3 && !strcasecmp(argv[1], "set")) {
+		if (argc == 3 && !strnicmp(argv[1], MESG_STR("set"))) {
 			ret = crypt_set_key(cc, argv[2]);
 			if (ret)
 				return ret;
@@ -1823,7 +1783,7 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv)
 				ret = cc->iv_gen_ops->init(cc);
 			return ret;
 		}
-		if (argc == 2 && !strcasecmp(argv[1], "wipe")) {
+		if (argc == 2 && !strnicmp(argv[1], MESG_STR("wipe"))) {
 			if (cc->iv_gen_ops && cc->iv_gen_ops->wipe) {
 				ret = cc->iv_gen_ops->wipe(cc);
 				if (ret)
@@ -1863,7 +1823,7 @@ static int crypt_iterate_devices(struct dm_target *ti,
 
 static struct target_type crypt_target = {
 	.name   = "crypt",
-	.version = {1, 11, 0},
+	.version = {1, 10, 0},
 	.module = THIS_MODULE,
 	.ctr    = crypt_ctr,
 	.dtr    = crypt_dtr,
diff --git a/trunk/drivers/md/dm-flakey.c b/trunk/drivers/md/dm-flakey.c
index 89f73ca22cfa..ea790623c30b 100644
--- a/trunk/drivers/md/dm-flakey.c
+++ b/trunk/drivers/md/dm-flakey.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2003 Sistina Software (UK) Limited.
- * Copyright (C) 2004, 2010-2011 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004, 2010 Red Hat, Inc. All rights reserved.
  *
  * This file is released under the GPL.
  */
@@ -15,9 +15,6 @@
 
 #define DM_MSG_PREFIX "flakey"
 
-#define all_corrupt_bio_flags_match(bio, fc)	\
-	(((bio)->bi_rw & (fc)->corrupt_bio_flags) == (fc)->corrupt_bio_flags)
-
 /*
  * Flakey: Used for testing only, simulates intermittent,
  * catastrophic device failure.
@@ -28,189 +25,60 @@ struct flakey_c {
 	sector_t start;
 	unsigned up_interval;
 	unsigned down_interval;
-	unsigned long flags;
-	unsigned corrupt_bio_byte;
-	unsigned corrupt_bio_rw;
-	unsigned corrupt_bio_value;
-	unsigned corrupt_bio_flags;
-};
-
-enum feature_flag_bits {
-	DROP_WRITES
 };
 
-static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
-			  struct dm_target *ti)
-{
-	int r;
-	unsigned argc;
-	const char *arg_name;
-
-	static struct dm_arg _args[] = {
-		{0, 6, "Invalid number of feature args"},
-		{1, UINT_MAX, "Invalid corrupt bio byte"},
-		{0, 255, "Invalid corrupt value to write into bio byte (0-255)"},
-		{0, UINT_MAX, "Invalid corrupt bio flags mask"},
-	};
-
-	/* No feature arguments supplied. */
-	if (!as->argc)
-		return 0;
-
-	r = dm_read_arg_group(_args, as, &argc, &ti->error);
-	if (r)
-		return r;
-
-	while (argc) {
-		arg_name = dm_shift_arg(as);
-		argc--;
-
-		/*
-		 * drop_writes
-		 */
-		if (!strcasecmp(arg_name, "drop_writes")) {
-			if (test_and_set_bit(DROP_WRITES, &fc->flags)) {
-				ti->error = "Feature drop_writes duplicated";
-				return -EINVAL;
-			}
-
-			continue;
-		}
-
-		/*
-		 * corrupt_bio_byte <Nth_byte> <direction> <value> <bio_flags>
-		 */
-		if (!strcasecmp(arg_name, "corrupt_bio_byte")) {
-			if (!argc)
-				ti->error = "Feature corrupt_bio_byte requires parameters";
-
-			r = dm_read_arg(_args + 1, as, &fc->corrupt_bio_byte, &ti->error);
-			if (r)
-				return r;
-			argc--;
-
-			/*
-			 * Direction r or w?
-			 */
-			arg_name = dm_shift_arg(as);
-			if (!strcasecmp(arg_name, "w"))
-				fc->corrupt_bio_rw = WRITE;
-			else if (!strcasecmp(arg_name, "r"))
-				fc->corrupt_bio_rw = READ;
-			else {
-				ti->error = "Invalid corrupt bio direction (r or w)";
-				return -EINVAL;
-			}
-			argc--;
-
-			/*
-			 * Value of byte (0-255) to write in place of correct one.
-			 */
-			r = dm_read_arg(_args + 2, as, &fc->corrupt_bio_value, &ti->error);
-			if (r)
-				return r;
-			argc--;
-
-			/*
-			 * Only corrupt bios with these flags set.
-			 */
-			r = dm_read_arg(_args + 3, as, &fc->corrupt_bio_flags, &ti->error);
-			if (r)
-				return r;
-			argc--;
-
-			continue;
-		}
-
-		ti->error = "Unrecognised flakey feature requested";
-		return -EINVAL;
-	}
-
-	if (test_bit(DROP_WRITES, &fc->flags) && (fc->corrupt_bio_rw == WRITE)) {
-		ti->error = "drop_writes is incompatible with corrupt_bio_byte with the WRITE flag set";
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
 /*
- * Construct a flakey mapping:
- * <dev_path> <offset> <up interval> <down interval> [<#feature args> [<arg>]*]
- *
- *   Feature args:
- *     [drop_writes]
- *     [corrupt_bio_byte <Nth_byte> <direction> <value> <bio_flags>]
- *
- *   Nth_byte starts from 1 for the first byte.
- *   Direction is r for READ or w for WRITE.
- *   bio_flags is ignored if 0.
+ * Construct a flakey mapping: <dev_path> <offset> <up interval> <down interval>
  */
 static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 {
-	static struct dm_arg _args[] = {
-		{0, UINT_MAX, "Invalid up interval"},
-		{0, UINT_MAX, "Invalid down interval"},
-	};
-
-	int r;
 	struct flakey_c *fc;
-	unsigned long long tmpll;
-	struct dm_arg_set as;
-	const char *devname;
+	unsigned long long tmp;
 
-	as.argc = argc;
-	as.argv = argv;
-
-	if (argc < 4) {
-		ti->error = "Invalid argument count";
+	if (argc != 4) {
+		ti->error = "dm-flakey: Invalid argument count";
 		return -EINVAL;
 	}
 
-	fc = kzalloc(sizeof(*fc), GFP_KERNEL);
+	fc = kmalloc(sizeof(*fc), GFP_KERNEL);
 	if (!fc) {
-		ti->error = "Cannot allocate linear context";
+		ti->error = "dm-flakey: Cannot allocate linear context";
 		return -ENOMEM;
 	}
 	fc->start_time = jiffies;
 
-	devname = dm_shift_arg(&as);
-
-	if (sscanf(dm_shift_arg(&as), "%llu", &tmpll) != 1) {
-		ti->error = "Invalid device sector";
+	if (sscanf(argv[1], "%llu", &tmp) != 1) {
+		ti->error = "dm-flakey: Invalid device sector";
 		goto bad;
 	}
-	fc->start = tmpll;
+	fc->start = tmp;
 
-	r = dm_read_arg(_args, &as, &fc->up_interval, &ti->error);
-	if (r)
+	if (sscanf(argv[2], "%u", &fc->up_interval) != 1) {
+		ti->error = "dm-flakey: Invalid up interval";
 		goto bad;
+	}
 
-	r = dm_read_arg(_args, &as, &fc->down_interval, &ti->error);
-	if (r)
+	if (sscanf(argv[3], "%u", &fc->down_interval) != 1) {
+		ti->error = "dm-flakey: Invalid down interval";
 		goto bad;
+	}
 
 	if (!(fc->up_interval + fc->down_interval)) {
-		ti->error = "Total (up + down) interval is zero";
+		ti->error = "dm-flakey: Total (up + down) interval is zero";
 		goto bad;
 	}
 
 	if (fc->up_interval + fc->down_interval < fc->up_interval) {
-		ti->error = "Interval overflow";
+		ti->error = "dm-flakey: Interval overflow";
 		goto bad;
 	}
 
-	r = parse_features(&as, fc, ti);
-	if (r)
-		goto bad;
-
-	if (dm_get_device(ti, devname, dm_table_get_mode(ti->table), &fc->dev)) {
-		ti->error = "Device lookup failed";
+	if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &fc->dev)) {
+		ti->error = "dm-flakey: Device lookup failed";
 		goto bad;
 	}
 
 	ti->num_flush_requests = 1;
-	ti->num_discard_requests = 1;
 	ti->private = fc;
 	return 0;
 
@@ -231,7 +99,7 @@ static sector_t flakey_map_sector(struct dm_target *ti, sector_t bi_sector)
 {
 	struct flakey_c *fc = ti->private;
 
-	return fc->start + dm_target_offset(ti, bi_sector);
+	return fc->start + (bi_sector - ti->begin);
 }
 
 static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
@@ -243,25 +111,6 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
 		bio->bi_sector = flakey_map_sector(ti, bio->bi_sector);
 }
 
-static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
-{
-	unsigned bio_bytes = bio_cur_bytes(bio);
-	char *data = bio_data(bio);
-
-	/*
-	 * Overwrite the Nth byte of the data returned.
-	 */
-	if (data && bio_bytes >= fc->corrupt_bio_byte) {
-		data[fc->corrupt_bio_byte - 1] = fc->corrupt_bio_value;
-
-		DMDEBUG("Corrupting data bio=%p by writing %u to byte %u "
-			"(rw=%c bi_rw=%lu bi_sector=%llu cur_bytes=%u)\n",
-			bio, fc->corrupt_bio_value, fc->corrupt_bio_byte,
-			(bio_data_dir(bio) == WRITE) ? 'w' : 'r',
-			bio->bi_rw, (unsigned long long)bio->bi_sector, bio_bytes);
-	}
-}
-
 static int flakey_map(struct dm_target *ti, struct bio *bio,
 		      union map_info *map_context)
 {
@@ -270,71 +119,18 @@ static int flakey_map(struct dm_target *ti, struct bio *bio,
 
 	/* Are we alive ? */
 	elapsed = (jiffies - fc->start_time) / HZ;
-	if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval) {
-		/*
-		 * Flag this bio as submitted while down.
-		 */
-		map_context->ll = 1;
-
-		/*
-		 * Map reads as normal.
-		 */
-		if (bio_data_dir(bio) == READ)
-			goto map_bio;
-
-		/*
-		 * Drop writes?
-		 */
-		if (test_bit(DROP_WRITES, &fc->flags)) {
-			bio_endio(bio, 0);
-			return DM_MAPIO_SUBMITTED;
-		}
-
-		/*
-		 * Corrupt matching writes.
-		 */
-		if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == WRITE)) {
-			if (all_corrupt_bio_flags_match(bio, fc))
-				corrupt_bio_data(bio, fc);
-			goto map_bio;
-		}
-
-		/*
-		 * By default, error all I/O.
-		 */
+	if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval)
 		return -EIO;
-	}
 
-map_bio:
 	flakey_map_bio(ti, bio);
 
 	return DM_MAPIO_REMAPPED;
 }
 
-static int flakey_end_io(struct dm_target *ti, struct bio *bio,
-			 int error, union map_info *map_context)
-{
-	struct flakey_c *fc = ti->private;
-	unsigned bio_submitted_while_down = map_context->ll;
-
-	/*
-	 * Corrupt successful READs while in down state.
-	 * If flags were specified, only corrupt those that match.
-	 */
-	if (!error && bio_submitted_while_down &&
-	    (bio_data_dir(bio) == READ) && (fc->corrupt_bio_rw == READ) &&
-	    all_corrupt_bio_flags_match(bio, fc))
-		corrupt_bio_data(bio, fc);
-
-	return error;
-}
-
 static int flakey_status(struct dm_target *ti, status_type_t type,
 			 char *result, unsigned int maxlen)
 {
-	unsigned sz = 0;
 	struct flakey_c *fc = ti->private;
-	unsigned drop_writes;
 
 	switch (type) {
 	case STATUSTYPE_INFO:
@@ -342,22 +138,9 @@ static int flakey_status(struct dm_target *ti, status_type_t type,
 		break;
 
 	case STATUSTYPE_TABLE:
-		DMEMIT("%s %llu %u %u ", fc->dev->name,
-		       (unsigned long long)fc->start, fc->up_interval,
-		       fc->down_interval);
-
-		drop_writes = test_bit(DROP_WRITES, &fc->flags);
-		DMEMIT("%u ", drop_writes + (fc->corrupt_bio_byte > 0) * 5);
-
-		if (drop_writes)
-			DMEMIT("drop_writes ");
-
-		if (fc->corrupt_bio_byte)
-			DMEMIT("corrupt_bio_byte %u %c %u %u ",
-			       fc->corrupt_bio_byte,
-			       (fc->corrupt_bio_rw == WRITE) ? 'w' : 'r',
-			       fc->corrupt_bio_value, fc->corrupt_bio_flags);
-
+		snprintf(result, maxlen, "%s %llu %u %u", fc->dev->name,
+			 (unsigned long long)fc->start, fc->up_interval,
+			 fc->down_interval);
 		break;
 	}
 	return 0;
@@ -394,12 +177,11 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_
 
 static struct target_type flakey_target = {
 	.name   = "flakey",
-	.version = {1, 2, 0},
+	.version = {1, 1, 0},
 	.module = THIS_MODULE,
 	.ctr    = flakey_ctr,
 	.dtr    = flakey_dtr,
 	.map    = flakey_map,
-	.end_io = flakey_end_io,
 	.status = flakey_status,
 	.ioctl	= flakey_ioctl,
 	.merge	= flakey_merge,
diff --git a/trunk/drivers/md/dm-io.c b/trunk/drivers/md/dm-io.c
index ad2eba40e319..2067288f61f9 100644
--- a/trunk/drivers/md/dm-io.c
+++ b/trunk/drivers/md/dm-io.c
@@ -38,8 +38,6 @@ struct io {
 	struct dm_io_client *client;
 	io_notify_fn callback;
 	void *context;
-	void *vma_invalidate_address;
-	unsigned long vma_invalidate_size;
 } __attribute__((aligned(DM_IO_MAX_REGIONS)));
 
 static struct kmem_cache *_dm_io_cache;
@@ -118,10 +116,6 @@ static void dec_count(struct io *io, unsigned int region, int error)
 		set_bit(region, &io->error_bits);
 
 	if (atomic_dec_and_test(&io->count)) {
-		if (io->vma_invalidate_size)
-			invalidate_kernel_vmap_range(io->vma_invalidate_address,
-						     io->vma_invalidate_size);
-
 		if (io->sleeper)
 			wake_up_process(io->sleeper);
 
@@ -165,9 +159,6 @@ struct dpages {
 
 	unsigned context_u;
 	void *context_ptr;
-
-	void *vma_invalidate_address;
-	unsigned long vma_invalidate_size;
 };
 
 /*
@@ -386,9 +377,6 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
 	io->sleeper = current;
 	io->client = client;
 
-	io->vma_invalidate_address = dp->vma_invalidate_address;
-	io->vma_invalidate_size = dp->vma_invalidate_size;
-
 	dispatch_io(rw, num_regions, where, dp, io, 1);
 
 	while (1) {
@@ -427,21 +415,13 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions,
 	io->callback = fn;
 	io->context = context;
 
-	io->vma_invalidate_address = dp->vma_invalidate_address;
-	io->vma_invalidate_size = dp->vma_invalidate_size;
-
 	dispatch_io(rw, num_regions, where, dp, io, 0);
 	return 0;
 }
 
-static int dp_init(struct dm_io_request *io_req, struct dpages *dp,
-		   unsigned long size)
+static int dp_init(struct dm_io_request *io_req, struct dpages *dp)
 {
 	/* Set up dpages based on memory type */
-
-	dp->vma_invalidate_address = NULL;
-	dp->vma_invalidate_size = 0;
-
 	switch (io_req->mem.type) {
 	case DM_IO_PAGE_LIST:
 		list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset);
@@ -452,11 +432,6 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp,
 		break;
 
 	case DM_IO_VMA:
-		flush_kernel_vmap_range(io_req->mem.ptr.vma, size);
-		if ((io_req->bi_rw & RW_MASK) == READ) {
-			dp->vma_invalidate_address = io_req->mem.ptr.vma;
-			dp->vma_invalidate_size = size;
-		}
 		vm_dp_init(dp, io_req->mem.ptr.vma);
 		break;
 
@@ -485,7 +460,7 @@ int dm_io(struct dm_io_request *io_req, unsigned num_regions,
 	int r;
 	struct dpages dp;
 
-	r = dp_init(io_req, &dp, (unsigned long)where->count << SECTOR_SHIFT);
+	r = dp_init(io_req, &dp);
 	if (r)
 		return r;
 
diff --git a/trunk/drivers/md/dm-ioctl.c b/trunk/drivers/md/dm-ioctl.c
index 2e9a3ca37bdd..4cacdad2270a 100644
--- a/trunk/drivers/md/dm-ioctl.c
+++ b/trunk/drivers/md/dm-ioctl.c
@@ -128,24 +128,6 @@ static struct hash_cell *__get_uuid_cell(const char *str)
 	return NULL;
 }
 
-static struct hash_cell *__get_dev_cell(uint64_t dev)
-{
-	struct mapped_device *md;
-	struct hash_cell *hc;
-
-	md = dm_get_md(huge_decode_dev(dev));
-	if (!md)
-		return NULL;
-
-	hc = dm_get_mdptr(md);
-	if (!hc) {
-		dm_put(md);
-		return NULL;
-	}
-
-	return hc;
-}
-
 /*-----------------------------------------------------------------
  * Inserting, removing and renaming a device.
  *---------------------------------------------------------------*/
@@ -736,45 +718,25 @@ static int dev_create(struct dm_ioctl *param, size_t param_size)
  */
 static struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param)
 {
-	struct hash_cell *hc = NULL;
+	struct mapped_device *md;
+	void *mdptr = NULL;
 
-	if (*param->uuid) {
-		if (*param->name || param->dev)
-			return NULL;
+	if (*param->uuid)
+		return __get_uuid_cell(param->uuid);
 
-		hc = __get_uuid_cell(param->uuid);
-		if (!hc)
-			return NULL;
-	} else if (*param->name) {
-		if (param->dev)
-			return NULL;
+	if (*param->name)
+		return __get_name_cell(param->name);
 
-		hc = __get_name_cell(param->name);
-		if (!hc)
-			return NULL;
-	} else if (param->dev) {
-		hc = __get_dev_cell(param->dev);
-		if (!hc)
-			return NULL;
-	} else
-		return NULL;
-
-	/*
-	 * Sneakily write in both the name and the uuid
-	 * while we have the cell.
-	 */
-	strlcpy(param->name, hc->name, sizeof(param->name));
-	if (hc->uuid)
-		strlcpy(param->uuid, hc->uuid, sizeof(param->uuid));
-	else
-		param->uuid[0] = '\0';
+	md = dm_get_md(huge_decode_dev(param->dev));
+	if (!md)
+		goto out;
 
-	if (hc->new_map)
-		param->flags |= DM_INACTIVE_PRESENT_FLAG;
-	else
-		param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
+	mdptr = dm_get_mdptr(md);
+	if (!mdptr)
+		dm_put(md);
 
-	return hc;
+out:
+	return mdptr;
 }
 
 static struct mapped_device *find_device(struct dm_ioctl *param)
@@ -784,8 +746,24 @@ static struct mapped_device *find_device(struct dm_ioctl *param)
 
 	down_read(&_hash_lock);
 	hc = __find_device_hash_cell(param);
-	if (hc)
+	if (hc) {
 		md = hc->md;
+
+		/*
+		 * Sneakily write in both the name and the uuid
+		 * while we have the cell.
+		 */
+		strlcpy(param->name, hc->name, sizeof(param->name));
+		if (hc->uuid)
+			strlcpy(param->uuid, hc->uuid, sizeof(param->uuid));
+		else
+			param->uuid[0] = '\0';
+
+		if (hc->new_map)
+			param->flags |= DM_INACTIVE_PRESENT_FLAG;
+		else
+			param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
+	}
 	up_read(&_hash_lock);
 
 	return md;
@@ -1424,11 +1402,6 @@ static int target_message(struct dm_ioctl *param, size_t param_size)
 		goto out;
 	}
 
-	if (!argc) {
-		DMWARN("Empty message received.");
-		goto out;
-	}
-
 	table = dm_get_live_table(md);
 	if (!table)
 		goto out_argv;
diff --git a/trunk/drivers/md/dm-kcopyd.c b/trunk/drivers/md/dm-kcopyd.c
index f82147029636..320401dec104 100644
--- a/trunk/drivers/md/dm-kcopyd.c
+++ b/trunk/drivers/md/dm-kcopyd.c
@@ -224,6 +224,8 @@ struct kcopyd_job {
 	unsigned int num_dests;
 	struct dm_io_region dests[DM_KCOPYD_MAX_REGIONS];
 
+	sector_t offset;
+	unsigned int nr_pages;
 	struct page_list *pages;
 
 	/*
@@ -378,7 +380,7 @@ static int run_io_job(struct kcopyd_job *job)
 		.bi_rw = job->rw,
 		.mem.type = DM_IO_PAGE_LIST,
 		.mem.ptr.pl = job->pages,
-		.mem.offset = 0,
+		.mem.offset = job->offset,
 		.notify.fn = complete_io,
 		.notify.context = job,
 		.client = job->kc->io_client,
@@ -395,9 +397,10 @@ static int run_io_job(struct kcopyd_job *job)
 static int run_pages_job(struct kcopyd_job *job)
 {
 	int r;
-	unsigned nr_pages = dm_div_up(job->dests[0].count, PAGE_SIZE >> 9);
 
-	r = kcopyd_get_pages(job->kc, nr_pages, &job->pages);
+	job->nr_pages = dm_div_up(job->dests[0].count + job->offset,
+				  PAGE_SIZE >> 9);
+	r = kcopyd_get_pages(job->kc, job->nr_pages, &job->pages);
 	if (!r) {
 		/* this job is ready for io */
 		push(&job->kc->io_jobs, job);
@@ -599,6 +602,8 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
 	job->num_dests = num_dests;
 	memcpy(&job->dests, dests, sizeof(*dests) * num_dests);
 
+	job->offset = 0;
+	job->nr_pages = 0;
 	job->pages = NULL;
 
 	job->fn = fn;
@@ -617,37 +622,6 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
 }
 EXPORT_SYMBOL(dm_kcopyd_copy);
 
-void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc,
-				 dm_kcopyd_notify_fn fn, void *context)
-{
-	struct kcopyd_job *job;
-
-	job = mempool_alloc(kc->job_pool, GFP_NOIO);
-
-	memset(job, 0, sizeof(struct kcopyd_job));
-	job->kc = kc;
-	job->fn = fn;
-	job->context = context;
-
-	atomic_inc(&kc->nr_jobs);
-
-	return job;
-}
-EXPORT_SYMBOL(dm_kcopyd_prepare_callback);
-
-void dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err)
-{
-	struct kcopyd_job *job = j;
-	struct dm_kcopyd_client *kc = job->kc;
-
-	job->read_err = read_err;
-	job->write_err = write_err;
-
-	push(&kc->complete_jobs, job);
-	wake(kc);
-}
-EXPORT_SYMBOL(dm_kcopyd_do_callback);
-
 /*
  * Cancels a kcopyd job, eg. someone might be deactivating a
  * mirror.
diff --git a/trunk/drivers/md/dm-log-userspace-base.c b/trunk/drivers/md/dm-log-userspace-base.c
index 1021c8986011..aa2e0c374ab3 100644
--- a/trunk/drivers/md/dm-log-userspace-base.c
+++ b/trunk/drivers/md/dm-log-userspace-base.c
@@ -394,7 +394,8 @@ static int flush_by_group(struct log_c *lc, struct list_head *flush_list)
 			group[count] = fe->region;
 			count++;
 
-			list_move(&fe->list, &tmp_list);
+			list_del(&fe->list);
+			list_add(&fe->list, &tmp_list);
 
 			type = fe->type;
 			if (count >= MAX_FLUSH_GROUP_COUNT)
diff --git a/trunk/drivers/md/dm-log.c b/trunk/drivers/md/dm-log.c
index 3b52bb72bd1f..948e3f4925bf 100644
--- a/trunk/drivers/md/dm-log.c
+++ b/trunk/drivers/md/dm-log.c
@@ -197,21 +197,15 @@ EXPORT_SYMBOL(dm_dirty_log_destroy);
 #define MIRROR_DISK_VERSION 2
 #define LOG_OFFSET 2
 
-struct log_header_disk {
-	__le32 magic;
+struct log_header {
+	uint32_t magic;
 
 	/*
 	 * Simple, incrementing version. no backward
 	 * compatibility.
 	 */
-	__le32 version;
-	__le64 nr_regions;
-} __packed;
-
-struct log_header_core {
-	uint32_t magic;
 	uint32_t version;
-	uint64_t nr_regions;
+	sector_t nr_regions;
 };
 
 struct log_c {
@@ -245,10 +239,10 @@ struct log_c {
 	int log_dev_failed;
 	int log_dev_flush_failed;
 	struct dm_dev *log_dev;
-	struct log_header_core header;
+	struct log_header header;
 
 	struct dm_io_region header_location;
-	struct log_header_disk *disk_header;
+	struct log_header *disk_header;
 };
 
 /*
@@ -257,34 +251,34 @@ struct log_c {
  */
 static inline int log_test_bit(uint32_t *bs, unsigned bit)
 {
-	return test_bit_le(bit, bs) ? 1 : 0;
+	return test_bit_le(bit, (unsigned long *) bs) ? 1 : 0;
 }
 
 static inline void log_set_bit(struct log_c *l,
 			       uint32_t *bs, unsigned bit)
 {
-	__set_bit_le(bit, bs);
+	__test_and_set_bit_le(bit, (unsigned long *) bs);
 	l->touched_cleaned = 1;
 }
 
 static inline void log_clear_bit(struct log_c *l,
 				 uint32_t *bs, unsigned bit)
 {
-	__clear_bit_le(bit, bs);
+	__test_and_clear_bit_le(bit, (unsigned long *) bs);
 	l->touched_dirtied = 1;
 }
 
 /*----------------------------------------------------------------
  * Header IO
  *--------------------------------------------------------------*/
-static void header_to_disk(struct log_header_core *core, struct log_header_disk *disk)
+static void header_to_disk(struct log_header *core, struct log_header *disk)
 {
 	disk->magic = cpu_to_le32(core->magic);
 	disk->version = cpu_to_le32(core->version);
 	disk->nr_regions = cpu_to_le64(core->nr_regions);
 }
 
-static void header_from_disk(struct log_header_core *core, struct log_header_disk *disk)
+static void header_from_disk(struct log_header *core, struct log_header *disk)
 {
 	core->magic = le32_to_cpu(disk->magic);
 	core->version = le32_to_cpu(disk->version);
@@ -492,7 +486,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
 	memset(lc->sync_bits, (sync == NOSYNC) ? -1 : 0, bitset_size);
 	lc->sync_count = (sync == NOSYNC) ? region_count : 0;
 
-	lc->recovering_bits = vzalloc(bitset_size);
+	lc->recovering_bits = vmalloc(bitset_size);
 	if (!lc->recovering_bits) {
 		DMWARN("couldn't allocate sync bitset");
 		vfree(lc->sync_bits);
@@ -504,6 +498,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
 		kfree(lc);
 		return -ENOMEM;
 	}
+	memset(lc->recovering_bits, 0, bitset_size);
 	lc->sync_search = 0;
 	log->context = lc;
 
@@ -744,7 +739,8 @@ static int core_get_resync_work(struct dm_dirty_log *log, region_t *region)
 		return 0;
 
 	do {
-		*region = find_next_zero_bit_le(lc->sync_bits,
+		*region = find_next_zero_bit_le(
+					     (unsigned long *) lc->sync_bits,
 					     lc->region_count,
 					     lc->sync_search);
 		lc->sync_search = *region + 1;
diff --git a/trunk/drivers/md/dm-mpath.c b/trunk/drivers/md/dm-mpath.c
index 5e0090ef4182..c3547016f0f1 100644
--- a/trunk/drivers/md/dm-mpath.c
+++ b/trunk/drivers/md/dm-mpath.c
@@ -22,6 +22,7 @@
 #include <linux/atomic.h>
 
 #define DM_MSG_PREFIX "multipath"
+#define MESG_STR(x) x, sizeof(x)
 #define DM_PG_INIT_DELAY_MSECS 2000
 #define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1)
 
@@ -504,29 +505,80 @@ static void trigger_event(struct work_struct *work)
  *      <#paths> <#per-path selector args>
  *         [<path> [<arg>]* ]+ ]+
  *---------------------------------------------------------------*/
-static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg,
+struct param {
+	unsigned min;
+	unsigned max;
+	char *error;
+};
+
+static int read_param(struct param *param, char *str, unsigned *v, char **error)
+{
+	if (!str ||
+	    (sscanf(str, "%u", v) != 1) ||
+	    (*v < param->min) ||
+	    (*v > param->max)) {
+		*error = param->error;
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+struct arg_set {
+	unsigned argc;
+	char **argv;
+};
+
+static char *shift(struct arg_set *as)
+{
+	char *r;
+
+	if (as->argc) {
+		as->argc--;
+		r = *as->argv;
+		as->argv++;
+		return r;
+	}
+
+	return NULL;
+}
+
+static void consume(struct arg_set *as, unsigned n)
+{
+	BUG_ON (as->argc < n);
+	as->argc -= n;
+	as->argv += n;
+}
+
+static int parse_path_selector(struct arg_set *as, struct priority_group *pg,
 			       struct dm_target *ti)
 {
 	int r;
 	struct path_selector_type *pst;
 	unsigned ps_argc;
 
-	static struct dm_arg _args[] = {
+	static struct param _params[] = {
 		{0, 1024, "invalid number of path selector args"},
 	};
 
-	pst = dm_get_path_selector(dm_shift_arg(as));
+	pst = dm_get_path_selector(shift(as));
 	if (!pst) {
 		ti->error = "unknown path selector type";
 		return -EINVAL;
 	}
 
-	r = dm_read_arg_group(_args, as, &ps_argc, &ti->error);
+	r = read_param(_params, shift(as), &ps_argc, &ti->error);
 	if (r) {
 		dm_put_path_selector(pst);
 		return -EINVAL;
 	}
 
+	if (ps_argc > as->argc) {
+		dm_put_path_selector(pst);
+		ti->error = "not enough arguments for path selector";
+		return -EINVAL;
+	}
+
 	r = pst->create(&pg->ps, ps_argc, as->argv);
 	if (r) {
 		dm_put_path_selector(pst);
@@ -535,12 +587,12 @@ static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg,
 	}
 
 	pg->ps.type = pst;
-	dm_consume_args(as, ps_argc);
+	consume(as, ps_argc);
 
 	return 0;
 }
 
-static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps,
+static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
 			       struct dm_target *ti)
 {
 	int r;
@@ -557,7 +609,7 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
 	if (!p)
 		return ERR_PTR(-ENOMEM);
 
-	r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table),
+	r = dm_get_device(ti, shift(as), dm_table_get_mode(ti->table),
 			  &p->path.dev);
 	if (r) {
 		ti->error = "error getting device";
@@ -608,16 +660,16 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
 	return ERR_PTR(r);
 }
 
-static struct priority_group *parse_priority_group(struct dm_arg_set *as,
+static struct priority_group *parse_priority_group(struct arg_set *as,
 						   struct multipath *m)
 {
-	static struct dm_arg _args[] = {
+	static struct param _params[] = {
 		{1, 1024, "invalid number of paths"},
 		{0, 1024, "invalid number of selector args"}
 	};
 
 	int r;
-	unsigned i, nr_selector_args, nr_args;
+	unsigned i, nr_selector_args, nr_params;
 	struct priority_group *pg;
 	struct dm_target *ti = m->ti;
 
@@ -641,26 +693,26 @@ static struct priority_group *parse_priority_group(struct dm_arg_set *as,
 	/*
 	 * read the paths
 	 */
-	r = dm_read_arg(_args, as, &pg->nr_pgpaths, &ti->error);
+	r = read_param(_params, shift(as), &pg->nr_pgpaths, &ti->error);
 	if (r)
 		goto bad;
 
-	r = dm_read_arg(_args + 1, as, &nr_selector_args, &ti->error);
+	r = read_param(_params + 1, shift(as), &nr_selector_args, &ti->error);
 	if (r)
 		goto bad;
 
-	nr_args = 1 + nr_selector_args;
+	nr_params = 1 + nr_selector_args;
 	for (i = 0; i < pg->nr_pgpaths; i++) {
 		struct pgpath *pgpath;
-		struct dm_arg_set path_args;
+		struct arg_set path_args;
 
-		if (as->argc < nr_args) {
+		if (as->argc < nr_params) {
 			ti->error = "not enough path parameters";
 			r = -EINVAL;
 			goto bad;
 		}
 
-		path_args.argc = nr_args;
+		path_args.argc = nr_params;
 		path_args.argv = as->argv;
 
 		pgpath = parse_path(&path_args, &pg->ps, ti);
@@ -671,7 +723,7 @@ static struct priority_group *parse_priority_group(struct dm_arg_set *as,
 
 		pgpath->pg = pg;
 		list_add_tail(&pgpath->list, &pg->pgpaths);
-		dm_consume_args(as, nr_args);
+		consume(as, nr_params);
 	}
 
 	return pg;
@@ -681,23 +733,28 @@ static struct priority_group *parse_priority_group(struct dm_arg_set *as,
 	return ERR_PTR(r);
 }
 
-static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
+static int parse_hw_handler(struct arg_set *as, struct multipath *m)
 {
 	unsigned hw_argc;
 	int ret;
 	struct dm_target *ti = m->ti;
 
-	static struct dm_arg _args[] = {
+	static struct param _params[] = {
 		{0, 1024, "invalid number of hardware handler args"},
 	};
 
-	if (dm_read_arg_group(_args, as, &hw_argc, &ti->error))
+	if (read_param(_params, shift(as), &hw_argc, &ti->error))
 		return -EINVAL;
 
 	if (!hw_argc)
 		return 0;
 
-	m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL);
+	if (hw_argc > as->argc) {
+		ti->error = "not enough arguments for hardware handler";
+		return -EINVAL;
+	}
+
+	m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL);
 	request_module("scsi_dh_%s", m->hw_handler_name);
 	if (scsi_dh_handler_exist(m->hw_handler_name) == 0) {
 		ti->error = "unknown hardware handler type";
@@ -721,7 +778,7 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
 		for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1)
 			j = sprintf(p, "%s", as->argv[i]);
 	}
-	dm_consume_args(as, hw_argc - 1);
+	consume(as, hw_argc - 1);
 
 	return 0;
 fail:
@@ -730,20 +787,20 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
 	return ret;
 }
 
-static int parse_features(struct dm_arg_set *as, struct multipath *m)
+static int parse_features(struct arg_set *as, struct multipath *m)
 {
 	int r;
 	unsigned argc;
 	struct dm_target *ti = m->ti;
-	const char *arg_name;
+	const char *param_name;
 
-	static struct dm_arg _args[] = {
+	static struct param _params[] = {
 		{0, 5, "invalid number of feature args"},
 		{1, 50, "pg_init_retries must be between 1 and 50"},
 		{0, 60000, "pg_init_delay_msecs must be between 0 and 60000"},
 	};
 
-	r = dm_read_arg_group(_args, as, &argc, &ti->error);
+	r = read_param(_params, shift(as), &argc, &ti->error);
 	if (r)
 		return -EINVAL;
 
@@ -751,24 +808,26 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
 		return 0;
 
 	do {
-		arg_name = dm_shift_arg(as);
+		param_name = shift(as);
 		argc--;
 
-		if (!strcasecmp(arg_name, "queue_if_no_path")) {
+		if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) {
 			r = queue_if_no_path(m, 1, 0);
 			continue;
 		}
 
-		if (!strcasecmp(arg_name, "pg_init_retries") &&
+		if (!strnicmp(param_name, MESG_STR("pg_init_retries")) &&
 		    (argc >= 1)) {
-			r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error);
+			r = read_param(_params + 1, shift(as),
+				       &m->pg_init_retries, &ti->error);
 			argc--;
 			continue;
 		}
 
-		if (!strcasecmp(arg_name, "pg_init_delay_msecs") &&
+		if (!strnicmp(param_name, MESG_STR("pg_init_delay_msecs")) &&
 		    (argc >= 1)) {
-			r = dm_read_arg(_args + 2, as, &m->pg_init_delay_msecs, &ti->error);
+			r = read_param(_params + 2, shift(as),
+				       &m->pg_init_delay_msecs, &ti->error);
 			argc--;
 			continue;
 		}
@@ -783,15 +842,15 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
 static int multipath_ctr(struct dm_target *ti, unsigned int argc,
 			 char **argv)
 {
-	/* target arguments */
-	static struct dm_arg _args[] = {
+	/* target parameters */
+	static struct param _params[] = {
 		{0, 1024, "invalid number of priority groups"},
 		{0, 1024, "invalid initial priority group number"},
 	};
 
 	int r;
 	struct multipath *m;
-	struct dm_arg_set as;
+	struct arg_set as;
 	unsigned pg_count = 0;
 	unsigned next_pg_num;
 
@@ -812,11 +871,11 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
 	if (r)
 		goto bad;
 
-	r = dm_read_arg(_args, &as, &m->nr_priority_groups, &ti->error);
+	r = read_param(_params, shift(&as), &m->nr_priority_groups, &ti->error);
 	if (r)
 		goto bad;
 
-	r = dm_read_arg(_args + 1, &as, &next_pg_num, &ti->error);
+	r = read_param(_params + 1, shift(&as), &next_pg_num, &ti->error);
 	if (r)
 		goto bad;
 
@@ -1446,10 +1505,10 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
 	}
 
 	if (argc == 1) {
-		if (!strcasecmp(argv[0], "queue_if_no_path")) {
+		if (!strnicmp(argv[0], MESG_STR("queue_if_no_path"))) {
 			r = queue_if_no_path(m, 1, 0);
 			goto out;
-		} else if (!strcasecmp(argv[0], "fail_if_no_path")) {
+		} else if (!strnicmp(argv[0], MESG_STR("fail_if_no_path"))) {
 			r = queue_if_no_path(m, 0, 0);
 			goto out;
 		}
@@ -1460,18 +1519,18 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
 		goto out;
 	}
 
-	if (!strcasecmp(argv[0], "disable_group")) {
+	if (!strnicmp(argv[0], MESG_STR("disable_group"))) {
 		r = bypass_pg_num(m, argv[1], 1);
 		goto out;
-	} else if (!strcasecmp(argv[0], "enable_group")) {
+	} else if (!strnicmp(argv[0], MESG_STR("enable_group"))) {
 		r = bypass_pg_num(m, argv[1], 0);
 		goto out;
-	} else if (!strcasecmp(argv[0], "switch_group")) {
+	} else if (!strnicmp(argv[0], MESG_STR("switch_group"))) {
 		r = switch_pg_num(m, argv[1]);
 		goto out;
-	} else if (!strcasecmp(argv[0], "reinstate_path"))
+	} else if (!strnicmp(argv[0], MESG_STR("reinstate_path")))
 		action = reinstate_path;
-	else if (!strcasecmp(argv[0], "fail_path"))
+	else if (!strnicmp(argv[0], MESG_STR("fail_path")))
 		action = fail_path;
 	else {
 		DMWARN("Unrecognised multipath message received.");
diff --git a/trunk/drivers/md/dm-raid.c b/trunk/drivers/md/dm-raid.c
index a002dd85db1e..e5d8904fc8f6 100644
--- a/trunk/drivers/md/dm-raid.c
+++ b/trunk/drivers/md/dm-raid.c
@@ -8,19 +8,19 @@
 #include <linux/slab.h>
 
 #include "md.h"
-#include "raid1.h"
 #include "raid5.h"
+#include "dm.h"
 #include "bitmap.h"
 
-#include <linux/device-mapper.h>
-
 #define DM_MSG_PREFIX "raid"
 
 /*
- * The following flags are used by dm-raid.c to set up the array state.
- * They must be cleared before md_run is called.
+ * If the MD doesn't support MD_SYNC_STATE_FORCED yet, then
+ * make it so the flag doesn't set anything.
  */
-#define FirstUse 10             /* rdev flag */
+#ifndef MD_SYNC_STATE_FORCED
+#define MD_SYNC_STATE_FORCED 0
+#endif
 
 struct raid_dev {
 	/*
@@ -43,15 +43,14 @@ struct raid_dev {
 /*
  * Flags for rs->print_flags field.
  */
-#define DMPF_SYNC              0x1
-#define DMPF_NOSYNC            0x2
-#define DMPF_REBUILD           0x4
-#define DMPF_DAEMON_SLEEP      0x8
-#define DMPF_MIN_RECOVERY_RATE 0x10
-#define DMPF_MAX_RECOVERY_RATE 0x20
-#define DMPF_MAX_WRITE_BEHIND  0x40
-#define DMPF_STRIPE_CACHE      0x80
-#define DMPF_REGION_SIZE       0X100
+#define DMPF_DAEMON_SLEEP      0x1
+#define DMPF_MAX_WRITE_BEHIND  0x2
+#define DMPF_SYNC              0x4
+#define DMPF_NOSYNC            0x8
+#define DMPF_STRIPE_CACHE      0x10
+#define DMPF_MIN_RECOVERY_RATE 0x20
+#define DMPF_MAX_RECOVERY_RATE 0x40
+
 struct raid_set {
 	struct dm_target *ti;
 
@@ -73,7 +72,6 @@ static struct raid_type {
 	const unsigned level;		/* RAID level. */
 	const unsigned algorithm;	/* RAID algorithm. */
 } raid_types[] = {
-	{"raid1",    "RAID1 (mirroring)",               0, 2, 1, 0 /* NONE */},
 	{"raid4",    "RAID4 (dedicated parity disk)",	1, 2, 5, ALGORITHM_PARITY_0},
 	{"raid5_la", "RAID5 (left asymmetric)",		1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC},
 	{"raid5_ra", "RAID5 (right asymmetric)",	1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC},
@@ -107,8 +105,7 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra
 	}
 
 	sectors_per_dev = ti->len;
-	if ((raid_type->level > 1) &&
-	    sector_div(sectors_per_dev, (raid_devs - raid_type->parity_devs))) {
+	if (sector_div(sectors_per_dev, (raid_devs - raid_type->parity_devs))) {
 		ti->error = "Target length not divisible by number of data devices";
 		return ERR_PTR(-EINVAL);
 	}
@@ -150,16 +147,9 @@ static void context_free(struct raid_set *rs)
 {
 	int i;
 
-	for (i = 0; i < rs->md.raid_disks; i++) {
-		if (rs->dev[i].meta_dev)
-			dm_put_device(rs->ti, rs->dev[i].meta_dev);
-		if (rs->dev[i].rdev.sb_page)
-			put_page(rs->dev[i].rdev.sb_page);
-		rs->dev[i].rdev.sb_page = NULL;
-		rs->dev[i].rdev.sb_loaded = 0;
+	for (i = 0; i < rs->md.raid_disks; i++)
 		if (rs->dev[i].data_dev)
 			dm_put_device(rs->ti, rs->dev[i].data_dev);
-	}
 
 	kfree(rs);
 }
@@ -169,16 +159,7 @@ static void context_free(struct raid_set *rs)
  *  <meta_dev>: meta device name or '-' if missing
  *  <data_dev>: data device name or '-' if missing
  *
- * The following are permitted:
- *    - -
- *    - <data_dev>
- *    <meta_dev> <data_dev>
- *
- * The following is not allowed:
- *    <meta_dev> -
- *
- * This code parses those words.  If there is a failure,
- * the caller must use context_free to unwind the operations.
+ * This code parses those words.
  */
 static int dev_parms(struct raid_set *rs, char **argv)
 {
@@ -201,16 +182,8 @@ static int dev_parms(struct raid_set *rs, char **argv)
 		rs->dev[i].rdev.mddev = &rs->md;
 
 		if (strcmp(argv[0], "-")) {
-			ret = dm_get_device(rs->ti, argv[0],
-					    dm_table_get_mode(rs->ti->table),
-					    &rs->dev[i].meta_dev);
-			rs->ti->error = "RAID metadata device lookup failure";
-			if (ret)
-				return ret;
-
-			rs->dev[i].rdev.sb_page = alloc_page(GFP_KERNEL);
-			if (!rs->dev[i].rdev.sb_page)
-				return -ENOMEM;
+			rs->ti->error = "Metadata devices not supported";
+			return -EINVAL;
 		}
 
 		if (!strcmp(argv[1], "-")) {
@@ -220,10 +193,6 @@ static int dev_parms(struct raid_set *rs, char **argv)
 				return -EINVAL;
 			}
 
-			rs->ti->error = "No data device supplied with metadata device";
-			if (rs->dev[i].meta_dev)
-				return -EINVAL;
-
 			continue;
 		}
 
@@ -235,10 +204,6 @@ static int dev_parms(struct raid_set *rs, char **argv)
 			return ret;
 		}
 
-		if (rs->dev[i].meta_dev) {
-			metadata_available = 1;
-			rs->dev[i].rdev.meta_bdev = rs->dev[i].meta_dev->bdev;
-		}
 		rs->dev[i].rdev.bdev = rs->dev[i].data_dev->bdev;
 		list_add(&rs->dev[i].rdev.same_set, &rs->md.disks);
 		if (!test_bit(In_sync, &rs->dev[i].rdev.flags))
@@ -269,110 +234,34 @@ static int dev_parms(struct raid_set *rs, char **argv)
 	return 0;
 }
 
-/*
- * validate_region_size
- * @rs
- * @region_size:  region size in sectors.  If 0, pick a size (4MiB default).
- *
- * Set rs->md.bitmap_info.chunksize (which really refers to 'region size').
- * Ensure that (ti->len/region_size < 2^21) - required by MD bitmap.
- *
- * Returns: 0 on success, -EINVAL on failure.
- */
-static int validate_region_size(struct raid_set *rs, unsigned long region_size)
-{
-	unsigned long min_region_size = rs->ti->len / (1 << 21);
-
-	if (!region_size) {
-		/*
-		 * Choose a reasonable default.  All figures in sectors.
-		 */
-		if (min_region_size > (1 << 13)) {
-			DMINFO("Choosing default region size of %lu sectors",
-			       region_size);
-			region_size = min_region_size;
-		} else {
-			DMINFO("Choosing default region size of 4MiB");
-			region_size = 1 << 13; /* sectors */
-		}
-	} else {
-		/*
-		 * Validate user-supplied value.
-		 */
-		if (region_size > rs->ti->len) {
-			rs->ti->error = "Supplied region size is too large";
-			return -EINVAL;
-		}
-
-		if (region_size < min_region_size) {
-			DMERR("Supplied region_size (%lu sectors) below minimum (%lu)",
-			      region_size, min_region_size);
-			rs->ti->error = "Supplied region size is too small";
-			return -EINVAL;
-		}
-
-		if (!is_power_of_2(region_size)) {
-			rs->ti->error = "Region size is not a power of 2";
-			return -EINVAL;
-		}
-
-		if (region_size < rs->md.chunk_sectors) {
-			rs->ti->error = "Region size is smaller than the chunk size";
-			return -EINVAL;
-		}
-	}
-
-	/*
-	 * Convert sectors to bytes.
-	 */
-	rs->md.bitmap_info.chunksize = (region_size << 9);
-
-	return 0;
-}
-
 /*
  * Possible arguments are...
+ * RAID456:
  *	<chunk_size> [optional_args]
  *
- * Argument definitions
- *    <chunk_size>			The number of sectors per disk that
- *                                      will form the "stripe"
- *    [[no]sync]			Force or prevent recovery of the
- *                                      entire array
+ * Optional args:
+ *    [[no]sync]			Force or prevent recovery of the entire array
  *    [rebuild <idx>]			Rebuild the drive indicated by the index
- *    [daemon_sleep <ms>]		Time between bitmap daemon work to
- *                                      clear bits
+ *    [daemon_sleep <ms>]		Time between bitmap daemon work to clear bits
  *    [min_recovery_rate <kB/sec/disk>]	Throttle RAID initialization
  *    [max_recovery_rate <kB/sec/disk>]	Throttle RAID initialization
- *    [write_mostly <idx>]		Indicate a write mostly drive via index
  *    [max_write_behind <sectors>]	See '-write-behind=' (man mdadm)
  *    [stripe_cache <sectors>]		Stripe cache size for higher RAIDs
- *    [region_size <sectors>]           Defines granularity of bitmap
  */
 static int parse_raid_params(struct raid_set *rs, char **argv,
 			     unsigned num_raid_params)
 {
 	unsigned i, rebuild_cnt = 0;
-	unsigned long value, region_size = 0;
+	unsigned long value;
 	char *key;
 
 	/*
 	 * First, parse the in-order required arguments
-	 * "chunk_size" is the only argument of this type.
 	 */
-	if ((strict_strtoul(argv[0], 10, &value) < 0)) {
+	if ((strict_strtoul(argv[0], 10, &value) < 0) ||
+	    !is_power_of_2(value) || (value < 8)) {
 		rs->ti->error = "Bad chunk size";
 		return -EINVAL;
-	} else if (rs->raid_type->level == 1) {
-		if (value)
-			DMERR("Ignoring chunk size parameter for RAID 1");
-		value = 0;
-	} else if (!is_power_of_2(value)) {
-		rs->ti->error = "Chunk size must be a power of 2";
-		return -EINVAL;
-	} else if (value < 8) {
-		rs->ti->error = "Chunk size value is too small";
-		return -EINVAL;
 	}
 
 	rs->md.new_chunk_sectors = rs->md.chunk_sectors = value;
@@ -380,39 +269,22 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
 	num_raid_params--;
 
 	/*
-	 * We set each individual device as In_sync with a completed
-	 * 'recovery_offset'.  If there has been a device failure or
-	 * replacement then one of the following cases applies:
-	 *
-	 *   1) User specifies 'rebuild'.
-	 *      - Device is reset when param is read.
-	 *   2) A new device is supplied.
-	 *      - No matching superblock found, resets device.
-	 *   3) Device failure was transient and returns on reload.
-	 *      - Failure noticed, resets device for bitmap replay.
-	 *   4) Device hadn't completed recovery after previous failure.
-	 *      - Superblock is read and overrides recovery_offset.
-	 *
-	 * What is found in the superblocks of the devices is always
-	 * authoritative, unless 'rebuild' or '[no]sync' was specified.
+	 * Second, parse the unordered optional arguments
 	 */
-	for (i = 0; i < rs->md.raid_disks; i++) {
+	for (i = 0; i < rs->md.raid_disks; i++)
 		set_bit(In_sync, &rs->dev[i].rdev.flags);
-		rs->dev[i].rdev.recovery_offset = MaxSector;
-	}
 
-	/*
-	 * Second, parse the unordered optional arguments
-	 */
 	for (i = 0; i < num_raid_params; i++) {
-		if (!strcasecmp(argv[i], "nosync")) {
+		if (!strcmp(argv[i], "nosync")) {
 			rs->md.recovery_cp = MaxSector;
 			rs->print_flags |= DMPF_NOSYNC;
+			rs->md.flags |= MD_SYNC_STATE_FORCED;
 			continue;
 		}
-		if (!strcasecmp(argv[i], "sync")) {
+		if (!strcmp(argv[i], "sync")) {
 			rs->md.recovery_cp = 0;
 			rs->print_flags |= DMPF_SYNC;
+			rs->md.flags |= MD_SYNC_STATE_FORCED;
 			continue;
 		}
 
@@ -428,13 +300,9 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
 			return -EINVAL;
 		}
 
-		if (!strcasecmp(key, "rebuild")) {
-			rebuild_cnt++;
-			if (((rs->raid_type->level != 1) &&
-			     (rebuild_cnt > rs->raid_type->parity_devs)) ||
-			    ((rs->raid_type->level == 1) &&
-			     (rebuild_cnt > (rs->md.raid_disks - 1)))) {
-				rs->ti->error = "Too many rebuild devices specified for given RAID type";
+		if (!strcmp(key, "rebuild")) {
+			if (++rebuild_cnt > rs->raid_type->parity_devs) {
+				rs->ti->error = "Too many rebuild drives given";
 				return -EINVAL;
 			}
 			if (value > rs->md.raid_disks) {
@@ -443,22 +311,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
 			}
 			clear_bit(In_sync, &rs->dev[value].rdev.flags);
 			rs->dev[value].rdev.recovery_offset = 0;
-			rs->print_flags |= DMPF_REBUILD;
-		} else if (!strcasecmp(key, "write_mostly")) {
-			if (rs->raid_type->level != 1) {
-				rs->ti->error = "write_mostly option is only valid for RAID1";
-				return -EINVAL;
-			}
-			if (value > rs->md.raid_disks) {
-				rs->ti->error = "Invalid write_mostly drive index given";
-				return -EINVAL;
-			}
-			set_bit(WriteMostly, &rs->dev[value].rdev.flags);
-		} else if (!strcasecmp(key, "max_write_behind")) {
-			if (rs->raid_type->level != 1) {
-				rs->ti->error = "max_write_behind option is only valid for RAID1";
-				return -EINVAL;
-			}
+		} else if (!strcmp(key, "max_write_behind")) {
 			rs->print_flags |= DMPF_MAX_WRITE_BEHIND;
 
 			/*
@@ -471,14 +324,14 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
 				return -EINVAL;
 			}
 			rs->md.bitmap_info.max_write_behind = value;
-		} else if (!strcasecmp(key, "daemon_sleep")) {
+		} else if (!strcmp(key, "daemon_sleep")) {
 			rs->print_flags |= DMPF_DAEMON_SLEEP;
 			if (!value || (value > MAX_SCHEDULE_TIMEOUT)) {
 				rs->ti->error = "daemon sleep period out of range";
 				return -EINVAL;
 			}
 			rs->md.bitmap_info.daemon_sleep = value;
-		} else if (!strcasecmp(key, "stripe_cache")) {
+		} else if (!strcmp(key, "stripe_cache")) {
 			rs->print_flags |= DMPF_STRIPE_CACHE;
 
 			/*
@@ -495,23 +348,20 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
 				rs->ti->error = "Bad stripe_cache size";
 				return -EINVAL;
 			}
-		} else if (!strcasecmp(key, "min_recovery_rate")) {
+		} else if (!strcmp(key, "min_recovery_rate")) {
 			rs->print_flags |= DMPF_MIN_RECOVERY_RATE;
 			if (value > INT_MAX) {
 				rs->ti->error = "min_recovery_rate out of range";
 				return -EINVAL;
 			}
 			rs->md.sync_speed_min = (int)value;
-		} else if (!strcasecmp(key, "max_recovery_rate")) {
+		} else if (!strcmp(key, "max_recovery_rate")) {
 			rs->print_flags |= DMPF_MAX_RECOVERY_RATE;
 			if (value > INT_MAX) {
 				rs->ti->error = "max_recovery_rate out of range";
 				return -EINVAL;
 			}
 			rs->md.sync_speed_max = (int)value;
-		} else if (!strcasecmp(key, "region_size")) {
-			rs->print_flags |= DMPF_REGION_SIZE;
-			region_size = value;
 		} else {
 			DMERR("Unable to parse RAID parameter: %s", key);
 			rs->ti->error = "Unable to parse RAID parameters";
@@ -519,19 +369,6 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
 		}
 	}
 
-	if (validate_region_size(rs, region_size))
-		return -EINVAL;
-
-	if (rs->md.chunk_sectors)
-		rs->ti->split_io = rs->md.chunk_sectors;
-	else
-		rs->ti->split_io = region_size;
-
-	if (rs->md.chunk_sectors)
-		rs->ti->split_io = rs->md.chunk_sectors;
-	else
-		rs->ti->split_io = region_size;
-
 	/* Assume there are no metadata devices until the drives are parsed */
 	rs->md.persistent = 0;
 	rs->md.external = 1;
@@ -550,351 +387,17 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits)
 {
 	struct raid_set *rs = container_of(cb, struct raid_set, callbacks);
 
-	if (rs->raid_type->level == 1)
-		return md_raid1_congested(&rs->md, bits);
-
 	return md_raid5_congested(&rs->md, bits);
 }
 
-/*
- * This structure is never routinely used by userspace, unlike md superblocks.
- * Devices with this superblock should only ever be accessed via device-mapper.
- */
-#define DM_RAID_MAGIC 0x64526D44
-struct dm_raid_superblock {
-	__le32 magic;		/* "DmRd" */
-	__le32 features;	/* Used to indicate possible future changes */
-
-	__le32 num_devices;	/* Number of devices in this array. (Max 64) */
-	__le32 array_position;	/* The position of this drive in the array */
-
-	__le64 events;		/* Incremented by md when superblock updated */
-	__le64 failed_devices;	/* Bit field of devices to indicate failures */
-
-	/*
-	 * This offset tracks the progress of the repair or replacement of
-	 * an individual drive.
-	 */
-	__le64 disk_recovery_offset;
-
-	/*
-	 * This offset tracks the progress of the initial array
-	 * synchronisation/parity calculation.
-	 */
-	__le64 array_resync_offset;
-
-	/*
-	 * RAID characteristics
-	 */
-	__le32 level;
-	__le32 layout;
-	__le32 stripe_sectors;
-
-	__u8 pad[452];		/* Round struct to 512 bytes. */
-				/* Always set to 0 when writing. */
-} __packed;
-
-static int read_disk_sb(mdk_rdev_t *rdev, int size)
-{
-	BUG_ON(!rdev->sb_page);
-
-	if (rdev->sb_loaded)
-		return 0;
-
-	if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, 1)) {
-		DMERR("Failed to read device superblock");
-		return -EINVAL;
-	}
-
-	rdev->sb_loaded = 1;
-
-	return 0;
-}
-
-static void super_sync(mddev_t *mddev, mdk_rdev_t *rdev)
-{
-	mdk_rdev_t *r, *t;
-	uint64_t failed_devices;
-	struct dm_raid_superblock *sb;
-
-	sb = page_address(rdev->sb_page);
-	failed_devices = le64_to_cpu(sb->failed_devices);
-
-	rdev_for_each(r, t, mddev)
-		if ((r->raid_disk >= 0) && test_bit(Faulty, &r->flags))
-			failed_devices |= (1ULL << r->raid_disk);
-
-	memset(sb, 0, sizeof(*sb));
-
-	sb->magic = cpu_to_le32(DM_RAID_MAGIC);
-	sb->features = cpu_to_le32(0);	/* No features yet */
-
-	sb->num_devices = cpu_to_le32(mddev->raid_disks);
-	sb->array_position = cpu_to_le32(rdev->raid_disk);
-
-	sb->events = cpu_to_le64(mddev->events);
-	sb->failed_devices = cpu_to_le64(failed_devices);
-
-	sb->disk_recovery_offset = cpu_to_le64(rdev->recovery_offset);
-	sb->array_resync_offset = cpu_to_le64(mddev->recovery_cp);
-
-	sb->level = cpu_to_le32(mddev->level);
-	sb->layout = cpu_to_le32(mddev->layout);
-	sb->stripe_sectors = cpu_to_le32(mddev->chunk_sectors);
-}
-
-/*
- * super_load
- *
- * This function creates a superblock if one is not found on the device
- * and will decide which superblock to use if there's a choice.
- *
- * Return: 1 if use rdev, 0 if use refdev, -Exxx otherwise
- */
-static int super_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev)
-{
-	int ret;
-	struct dm_raid_superblock *sb;
-	struct dm_raid_superblock *refsb;
-	uint64_t events_sb, events_refsb;
-
-	rdev->sb_start = 0;
-	rdev->sb_size = sizeof(*sb);
-
-	ret = read_disk_sb(rdev, rdev->sb_size);
-	if (ret)
-		return ret;
-
-	sb = page_address(rdev->sb_page);
-	if (sb->magic != cpu_to_le32(DM_RAID_MAGIC)) {
-		super_sync(rdev->mddev, rdev);
-
-		set_bit(FirstUse, &rdev->flags);
-
-		/* Force writing of superblocks to disk */
-		set_bit(MD_CHANGE_DEVS, &rdev->mddev->flags);
-
-		/* Any superblock is better than none, choose that if given */
-		return refdev ? 0 : 1;
-	}
-
-	if (!refdev)
-		return 1;
-
-	events_sb = le64_to_cpu(sb->events);
-
-	refsb = page_address(refdev->sb_page);
-	events_refsb = le64_to_cpu(refsb->events);
-
-	return (events_sb > events_refsb) ? 1 : 0;
-}
-
-static int super_init_validation(mddev_t *mddev, mdk_rdev_t *rdev)
-{
-	int role;
-	struct raid_set *rs = container_of(mddev, struct raid_set, md);
-	uint64_t events_sb;
-	uint64_t failed_devices;
-	struct dm_raid_superblock *sb;
-	uint32_t new_devs = 0;
-	uint32_t rebuilds = 0;
-	mdk_rdev_t *r, *t;
-	struct dm_raid_superblock *sb2;
-
-	sb = page_address(rdev->sb_page);
-	events_sb = le64_to_cpu(sb->events);
-	failed_devices = le64_to_cpu(sb->failed_devices);
-
-	/*
-	 * Initialise to 1 if this is a new superblock.
-	 */
-	mddev->events = events_sb ? : 1;
-
-	/*
-	 * Reshaping is not currently allowed
-	 */
-	if ((le32_to_cpu(sb->level) != mddev->level) ||
-	    (le32_to_cpu(sb->layout) != mddev->layout) ||
-	    (le32_to_cpu(sb->stripe_sectors) != mddev->chunk_sectors)) {
-		DMERR("Reshaping arrays not yet supported.");
-		return -EINVAL;
-	}
-
-	/* We can only change the number of devices in RAID1 right now */
-	if ((rs->raid_type->level != 1) &&
-	    (le32_to_cpu(sb->num_devices) != mddev->raid_disks)) {
-		DMERR("Reshaping arrays not yet supported.");
-		return -EINVAL;
-	}
-
-	if (!(rs->print_flags & (DMPF_SYNC | DMPF_NOSYNC)))
-		mddev->recovery_cp = le64_to_cpu(sb->array_resync_offset);
-
-	/*
-	 * During load, we set FirstUse if a new superblock was written.
-	 * There are two reasons we might not have a superblock:
-	 * 1) The array is brand new - in which case, all of the
-	 *    devices must have their In_sync bit set.  Also,
-	 *    recovery_cp must be 0, unless forced.
-	 * 2) This is a new device being added to an old array
-	 *    and the new device needs to be rebuilt - in which
-	 *    case the In_sync bit will /not/ be set and
-	 *    recovery_cp must be MaxSector.
-	 */
-	rdev_for_each(r, t, mddev) {
-		if (!test_bit(In_sync, &r->flags)) {
-			if (!test_bit(FirstUse, &r->flags))
-				DMERR("Superblock area of "
-				      "rebuild device %d should have been "
-				      "cleared.", r->raid_disk);
-			set_bit(FirstUse, &r->flags);
-			rebuilds++;
-		} else if (test_bit(FirstUse, &r->flags))
-			new_devs++;
-	}
-
-	if (!rebuilds) {
-		if (new_devs == mddev->raid_disks) {
-			DMINFO("Superblocks created for new array");
-			set_bit(MD_ARRAY_FIRST_USE, &mddev->flags);
-		} else if (new_devs) {
-			DMERR("New device injected "
-			      "into existing array without 'rebuild' "
-			      "parameter specified");
-			return -EINVAL;
-		}
-	} else if (new_devs) {
-		DMERR("'rebuild' devices cannot be "
-		      "injected into an array with other first-time devices");
-		return -EINVAL;
-	} else if (mddev->recovery_cp != MaxSector) {
-		DMERR("'rebuild' specified while array is not in-sync");
-		return -EINVAL;
-	}
-
-	/*
-	 * Now we set the Faulty bit for those devices that are
-	 * recorded in the superblock as failed.
-	 */
-	rdev_for_each(r, t, mddev) {
-		if (!r->sb_page)
-			continue;
-		sb2 = page_address(r->sb_page);
-		sb2->failed_devices = 0;
-
-		/*
-		 * Check for any device re-ordering.
-		 */
-		if (!test_bit(FirstUse, &r->flags) && (r->raid_disk >= 0)) {
-			role = le32_to_cpu(sb2->array_position);
-			if (role != r->raid_disk) {
-				if (rs->raid_type->level != 1) {
-					rs->ti->error = "Cannot change device "
-						"positions in RAID array";
-					return -EINVAL;
-				}
-				DMINFO("RAID1 device #%d now at position #%d",
-				       role, r->raid_disk);
-			}
-
-			/*
-			 * Partial recovery is performed on
-			 * returning failed devices.
-			 */
-			if (failed_devices & (1 << role))
-				set_bit(Faulty, &r->flags);
-		}
-	}
-
-	return 0;
-}
-
-static int super_validate(mddev_t *mddev, mdk_rdev_t *rdev)
-{
-	struct dm_raid_superblock *sb = page_address(rdev->sb_page);
-
-	/*
-	 * If mddev->events is not set, we know we have not yet initialized
-	 * the array.
-	 */
-	if (!mddev->events && super_init_validation(mddev, rdev))
-		return -EINVAL;
-
-	mddev->bitmap_info.offset = 4096 >> 9; /* Enable bitmap creation */
-	rdev->mddev->bitmap_info.default_offset = 4096 >> 9;
-	if (!test_bit(FirstUse, &rdev->flags)) {
-		rdev->recovery_offset = le64_to_cpu(sb->disk_recovery_offset);
-		if (rdev->recovery_offset != MaxSector)
-			clear_bit(In_sync, &rdev->flags);
-	}
-
-	/*
-	 * If a device comes back, set it as not In_sync and no longer faulty.
-	 */
-	if (test_bit(Faulty, &rdev->flags)) {
-		clear_bit(Faulty, &rdev->flags);
-		clear_bit(In_sync, &rdev->flags);
-		rdev->saved_raid_disk = rdev->raid_disk;
-		rdev->recovery_offset = 0;
-	}
-
-	clear_bit(FirstUse, &rdev->flags);
-
-	return 0;
-}
-
-/*
- * Analyse superblocks and select the freshest.
- */
-static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
-{
-	int ret;
-	mdk_rdev_t *rdev, *freshest, *tmp;
-	mddev_t *mddev = &rs->md;
-
-	freshest = NULL;
-	rdev_for_each(rdev, tmp, mddev) {
-		if (!rdev->meta_bdev)
-			continue;
-
-		ret = super_load(rdev, freshest);
-
-		switch (ret) {
-		case 1:
-			freshest = rdev;
-			break;
-		case 0:
-			break;
-		default:
-			ti->error = "Failed to load superblock";
-			return ret;
-		}
-	}
-
-	if (!freshest)
-		return 0;
-
-	/*
-	 * Validation of the freshest device provides the source of
-	 * validation for the remaining devices.
-	 */
-	ti->error = "Unable to assemble array: Invalid superblocks";
-	if (super_validate(mddev, freshest))
-		return -EINVAL;
-
-	rdev_for_each(rdev, tmp, mddev)
-		if ((rdev != freshest) && super_validate(mddev, rdev))
-			return -EINVAL;
-
-	return 0;
-}
-
 /*
  * Construct a RAID4/5/6 mapping:
  * Args:
  *	<raid_type> <#raid_params> <raid_params>		\
  *	<#raid_devs> { <meta_dev1> <dev1> .. <meta_devN> <devN> }
  *
+ * ** metadata devices are not supported yet, use '-' instead **
+ *
  * <raid_params> varies by <raid_type>.  See 'parse_raid_params' for
  * details on possible <raid_params>.
  */
@@ -962,12 +465,8 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	if (ret)
 		goto bad;
 
-	rs->md.sync_super = super_sync;
-	ret = analyse_superblocks(ti, rs);
-	if (ret)
-		goto bad;
-
 	INIT_WORK(&rs->md.event_work, do_table_event);
+	ti->split_io = rs->md.chunk_sectors;
 	ti->private = rs;
 
 	mutex_lock(&rs->md.reconfig_mutex);
@@ -983,7 +482,6 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	rs->callbacks.congested_fn = raid_is_congested;
 	dm_table_add_target_callbacks(ti->table, &rs->callbacks);
 
-	mddev_suspend(&rs->md);
 	return 0;
 
 bad:
@@ -1048,17 +546,12 @@ static int raid_status(struct dm_target *ti, status_type_t type,
 		break;
 	case STATUSTYPE_TABLE:
 		/* The string you would use to construct this array */
-		for (i = 0; i < rs->md.raid_disks; i++) {
-			if ((rs->print_flags & DMPF_REBUILD) &&
-			    rs->dev[i].data_dev &&
-			    !test_bit(In_sync, &rs->dev[i].rdev.flags))
-				raid_param_cnt += 2; /* for rebuilds */
+		for (i = 0; i < rs->md.raid_disks; i++)
 			if (rs->dev[i].data_dev &&
-			    test_bit(WriteMostly, &rs->dev[i].rdev.flags))
-				raid_param_cnt += 2;
-		}
+			    !test_bit(In_sync, &rs->dev[i].rdev.flags))
+				raid_param_cnt++; /* for rebuilds */
 
-		raid_param_cnt += (hweight64(rs->print_flags & ~DMPF_REBUILD) * 2);
+		raid_param_cnt += (hweight64(rs->print_flags) * 2);
 		if (rs->print_flags & (DMPF_SYNC | DMPF_NOSYNC))
 			raid_param_cnt--;
 
@@ -1072,8 +565,7 @@ static int raid_status(struct dm_target *ti, status_type_t type,
 			DMEMIT(" nosync");
 
 		for (i = 0; i < rs->md.raid_disks; i++)
-			if ((rs->print_flags & DMPF_REBUILD) &&
-			    rs->dev[i].data_dev &&
+			if (rs->dev[i].data_dev &&
 			    !test_bit(In_sync, &rs->dev[i].rdev.flags))
 				DMEMIT(" rebuild %u", i);
 
@@ -1087,11 +579,6 @@ static int raid_status(struct dm_target *ti, status_type_t type,
 		if (rs->print_flags & DMPF_MAX_RECOVERY_RATE)
 			DMEMIT(" max_recovery_rate %d", rs->md.sync_speed_max);
 
-		for (i = 0; i < rs->md.raid_disks; i++)
-			if (rs->dev[i].data_dev &&
-			    test_bit(WriteMostly, &rs->dev[i].rdev.flags))
-				DMEMIT(" write_mostly %u", i);
-
 		if (rs->print_flags & DMPF_MAX_WRITE_BEHIND)
 			DMEMIT(" max_write_behind %lu",
 			       rs->md.bitmap_info.max_write_behind);
@@ -1104,16 +591,9 @@ static int raid_status(struct dm_target *ti, status_type_t type,
 			       conf ? conf->max_nr_stripes * 2 : 0);
 		}
 
-		if (rs->print_flags & DMPF_REGION_SIZE)
-			DMEMIT(" region_size %lu",
-			       rs->md.bitmap_info.chunksize >> 9);
-
 		DMEMIT(" %d", rs->md.raid_disks);
 		for (i = 0; i < rs->md.raid_disks; i++) {
-			if (rs->dev[i].meta_dev)
-				DMEMIT(" %s", rs->dev[i].meta_dev->name);
-			else
-				DMEMIT(" -");
+			DMEMIT(" -"); /* metadata device */
 
 			if (rs->dev[i].data_dev)
 				DMEMIT(" %s", rs->dev[i].data_dev->name);
@@ -1170,13 +650,12 @@ static void raid_resume(struct dm_target *ti)
 {
 	struct raid_set *rs = ti->private;
 
-	bitmap_load(&rs->md);
 	mddev_resume(&rs->md);
 }
 
 static struct target_type raid_target = {
 	.name = "raid",
-	.version = {1, 1, 0},
+	.version = {1, 0, 0},
 	.module = THIS_MODULE,
 	.ctr = raid_ctr,
 	.dtr = raid_dtr,
diff --git a/trunk/drivers/md/dm-snap-persistent.c b/trunk/drivers/md/dm-snap-persistent.c
index d1f1d7017103..135c2f1fdbfc 100644
--- a/trunk/drivers/md/dm-snap-persistent.c
+++ b/trunk/drivers/md/dm-snap-persistent.c
@@ -58,30 +58,25 @@
 #define NUM_SNAPSHOT_HDR_CHUNKS 1
 
 struct disk_header {
-	__le32 magic;
+	uint32_t magic;
 
 	/*
 	 * Is this snapshot valid.  There is no way of recovering
 	 * an invalid snapshot.
 	 */
-	__le32 valid;
+	uint32_t valid;
 
 	/*
 	 * Simple, incrementing version. no backward
 	 * compatibility.
 	 */
-	__le32 version;
+	uint32_t version;
 
 	/* In sectors */
-	__le32 chunk_size;
-} __packed;
+	uint32_t chunk_size;
+};
 
 struct disk_exception {
-	__le64 old_chunk;
-	__le64 new_chunk;
-} __packed;
-
-struct core_exception {
 	uint64_t old_chunk;
 	uint64_t new_chunk;
 };
@@ -174,9 +169,10 @@ static int alloc_area(struct pstore *ps)
 	if (!ps->area)
 		goto err_area;
 
-	ps->zero_area = vzalloc(len);
+	ps->zero_area = vmalloc(len);
 	if (!ps->zero_area)
 		goto err_zero_area;
+	memset(ps->zero_area, 0, len);
 
 	ps->header_area = vmalloc(len);
 	if (!ps->header_area)
@@ -400,32 +396,32 @@ static struct disk_exception *get_exception(struct pstore *ps, uint32_t index)
 }
 
 static void read_exception(struct pstore *ps,
-			   uint32_t index, struct core_exception *result)
+			   uint32_t index, struct disk_exception *result)
 {
-	struct disk_exception *de = get_exception(ps, index);
+	struct disk_exception *e = get_exception(ps, index);
 
 	/* copy it */
-	result->old_chunk = le64_to_cpu(de->old_chunk);
-	result->new_chunk = le64_to_cpu(de->new_chunk);
+	result->old_chunk = le64_to_cpu(e->old_chunk);
+	result->new_chunk = le64_to_cpu(e->new_chunk);
 }
 
 static void write_exception(struct pstore *ps,
-			    uint32_t index, struct core_exception *e)
+			    uint32_t index, struct disk_exception *de)
 {
-	struct disk_exception *de = get_exception(ps, index);
+	struct disk_exception *e = get_exception(ps, index);
 
 	/* copy it */
-	de->old_chunk = cpu_to_le64(e->old_chunk);
-	de->new_chunk = cpu_to_le64(e->new_chunk);
+	e->old_chunk = cpu_to_le64(de->old_chunk);
+	e->new_chunk = cpu_to_le64(de->new_chunk);
 }
 
 static void clear_exception(struct pstore *ps, uint32_t index)
 {
-	struct disk_exception *de = get_exception(ps, index);
+	struct disk_exception *e = get_exception(ps, index);
 
 	/* clear it */
-	de->old_chunk = 0;
-	de->new_chunk = 0;
+	e->old_chunk = 0;
+	e->new_chunk = 0;
 }
 
 /*
@@ -441,13 +437,13 @@ static int insert_exceptions(struct pstore *ps,
 {
 	int r;
 	unsigned int i;
-	struct core_exception e;
+	struct disk_exception de;
 
 	/* presume the area is full */
 	*full = 1;
 
 	for (i = 0; i < ps->exceptions_per_area; i++) {
-		read_exception(ps, i, &e);
+		read_exception(ps, i, &de);
 
 		/*
 		 * If the new_chunk is pointing at the start of
@@ -455,7 +451,7 @@ static int insert_exceptions(struct pstore *ps,
 		 * is we know that we've hit the end of the
 		 * exceptions.  Therefore the area is not full.
 		 */
-		if (e.new_chunk == 0LL) {
+		if (de.new_chunk == 0LL) {
 			ps->current_committed = i;
 			*full = 0;
 			break;
@@ -464,13 +460,13 @@ static int insert_exceptions(struct pstore *ps,
 		/*
 		 * Keep track of the start of the free chunks.
 		 */
-		if (ps->next_free <= e.new_chunk)
-			ps->next_free = e.new_chunk + 1;
+		if (ps->next_free <= de.new_chunk)
+			ps->next_free = de.new_chunk + 1;
 
 		/*
 		 * Otherwise we add the exception to the snapshot.
 		 */
-		r = callback(callback_context, e.old_chunk, e.new_chunk);
+		r = callback(callback_context, de.old_chunk, de.new_chunk);
 		if (r)
 			return r;
 	}
@@ -567,7 +563,7 @@ static int persistent_read_metadata(struct dm_exception_store *store,
 	ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) /
 				  sizeof(struct disk_exception);
 	ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
-				   sizeof(*ps->callbacks));
+			sizeof(*ps->callbacks));
 	if (!ps->callbacks)
 		return -ENOMEM;
 
@@ -645,12 +641,12 @@ static void persistent_commit_exception(struct dm_exception_store *store,
 {
 	unsigned int i;
 	struct pstore *ps = get_info(store);
-	struct core_exception ce;
+	struct disk_exception de;
 	struct commit_callback *cb;
 
-	ce.old_chunk = e->old_chunk;
-	ce.new_chunk = e->new_chunk;
-	write_exception(ps, ps->current_committed++, &ce);
+	de.old_chunk = e->old_chunk;
+	de.new_chunk = e->new_chunk;
+	write_exception(ps, ps->current_committed++, &de);
 
 	/*
 	 * Add the callback to the back of the array.  This code
@@ -674,7 +670,7 @@ static void persistent_commit_exception(struct dm_exception_store *store,
 	 * If we completely filled the current area, then wipe the next one.
 	 */
 	if ((ps->current_committed == ps->exceptions_per_area) &&
-	    zero_disk_area(ps, ps->current_area + 1))
+	     zero_disk_area(ps, ps->current_area + 1))
 		ps->valid = 0;
 
 	/*
@@ -705,7 +701,7 @@ static int persistent_prepare_merge(struct dm_exception_store *store,
 				    chunk_t *last_new_chunk)
 {
 	struct pstore *ps = get_info(store);
-	struct core_exception ce;
+	struct disk_exception de;
 	int nr_consecutive;
 	int r;
 
@@ -726,9 +722,9 @@ static int persistent_prepare_merge(struct dm_exception_store *store,
 		ps->current_committed = ps->exceptions_per_area;
 	}
 
-	read_exception(ps, ps->current_committed - 1, &ce);
-	*last_old_chunk = ce.old_chunk;
-	*last_new_chunk = ce.new_chunk;
+	read_exception(ps, ps->current_committed - 1, &de);
+	*last_old_chunk = de.old_chunk;
+	*last_new_chunk = de.new_chunk;
 
 	/*
 	 * Find number of consecutive chunks within the current area,
@@ -737,9 +733,9 @@ static int persistent_prepare_merge(struct dm_exception_store *store,
 	for (nr_consecutive = 1; nr_consecutive < ps->current_committed;
 	     nr_consecutive++) {
 		read_exception(ps, ps->current_committed - 1 - nr_consecutive,
-			       &ce);
-		if (ce.old_chunk != *last_old_chunk - nr_consecutive ||
-		    ce.new_chunk != *last_new_chunk - nr_consecutive)
+			       &de);
+		if (de.old_chunk != *last_old_chunk - nr_consecutive ||
+		    de.new_chunk != *last_new_chunk - nr_consecutive)
 			break;
 	}
 
@@ -757,7 +753,7 @@ static int persistent_commit_merge(struct dm_exception_store *store,
 	for (i = 0; i < nr_merged; i++)
 		clear_exception(ps, ps->current_committed - 1 - i);
 
-	r = area_io(ps, WRITE_FLUSH_FUA);
+	r = area_io(ps, WRITE);
 	if (r < 0)
 		return r;
 
diff --git a/trunk/drivers/md/dm-snap.c b/trunk/drivers/md/dm-snap.c
index 6f758870fc19..9ecff5f3023a 100644
--- a/trunk/drivers/md/dm-snap.c
+++ b/trunk/drivers/md/dm-snap.c
@@ -29,6 +29,16 @@ static const char dm_snapshot_merge_target_name[] = "snapshot-merge";
 #define dm_target_is_snapshot_merge(ti) \
 	((ti)->type->name == dm_snapshot_merge_target_name)
 
+/*
+ * The percentage increment we will wake up users at
+ */
+#define WAKE_UP_PERCENT 5
+
+/*
+ * kcopyd priority of snapshot operations
+ */
+#define SNAPSHOT_COPY_PRIORITY 2
+
 /*
  * The size of the mempool used to track chunks in use.
  */
@@ -170,13 +180,6 @@ struct dm_snap_pending_exception {
 	 * kcopyd.
 	 */
 	int started;
-
-	/*
-	 * For writing a complete chunk, bypassing the copy.
-	 */
-	struct bio *full_bio;
-	bio_end_io_t *full_bio_end_io;
-	void *full_bio_private;
 };
 
 /*
@@ -1052,7 +1055,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
 	s = kmalloc(sizeof(*s), GFP_KERNEL);
 	if (!s) {
-		ti->error = "Cannot allocate private snapshot structure";
+		ti->error = "Cannot allocate snapshot context private "
+		    "structure";
 		r = -ENOMEM;
 		goto bad;
 	}
@@ -1376,7 +1380,6 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
 	struct dm_snapshot *s = pe->snap;
 	struct bio *origin_bios = NULL;
 	struct bio *snapshot_bios = NULL;
-	struct bio *full_bio = NULL;
 	int error = 0;
 
 	if (!success) {
@@ -1412,15 +1415,10 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
 	 */
 	dm_insert_exception(&s->complete, e);
 
-out:
+ out:
 	dm_remove_exception(&pe->e);
 	snapshot_bios = bio_list_get(&pe->snapshot_bios);
 	origin_bios = bio_list_get(&pe->origin_bios);
-	full_bio = pe->full_bio;
-	if (full_bio) {
-		full_bio->bi_end_io = pe->full_bio_end_io;
-		full_bio->bi_private = pe->full_bio_private;
-	}
 	free_pending_exception(pe);
 
 	increment_pending_exceptions_done_count();
@@ -1428,15 +1426,10 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
 	up_write(&s->lock);
 
 	/* Submit any pending write bios */
-	if (error) {
-		if (full_bio)
-			bio_io_error(full_bio);
+	if (error)
 		error_bios(snapshot_bios);
-	} else {
-		if (full_bio)
-			bio_endio(full_bio, 0);
+	else
 		flush_bios(snapshot_bios);
-	}
 
 	retry_origin_bios(s, origin_bios);
 }
@@ -1487,33 +1480,8 @@ static void start_copy(struct dm_snap_pending_exception *pe)
 	dest.count = src.count;
 
 	/* Hand over to kcopyd */
-	dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe);
-}
-
-static void full_bio_end_io(struct bio *bio, int error)
-{
-	void *callback_data = bio->bi_private;
-
-	dm_kcopyd_do_callback(callback_data, 0, error ? 1 : 0);
-}
-
-static void start_full_bio(struct dm_snap_pending_exception *pe,
-			   struct bio *bio)
-{
-	struct dm_snapshot *s = pe->snap;
-	void *callback_data;
-
-	pe->full_bio = bio;
-	pe->full_bio_end_io = bio->bi_end_io;
-	pe->full_bio_private = bio->bi_private;
-
-	callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client,
-						   copy_callback, pe);
-
-	bio->bi_end_io = full_bio_end_io;
-	bio->bi_private = callback_data;
-
-	generic_make_request(bio);
+	dm_kcopyd_copy(s->kcopyd_client,
+		    &src, 1, &dest, 0, copy_callback, pe);
 }
 
 static struct dm_snap_pending_exception *
@@ -1551,7 +1519,6 @@ __find_pending_exception(struct dm_snapshot *s,
 	bio_list_init(&pe->origin_bios);
 	bio_list_init(&pe->snapshot_bios);
 	pe->started = 0;
-	pe->full_bio = NULL;
 
 	if (s->store->type->prepare_exception(s->store, &pe->e)) {
 		free_pending_exception(pe);
@@ -1645,19 +1612,10 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
 		}
 
 		remap_exception(s, &pe->e, bio, chunk);
+		bio_list_add(&pe->snapshot_bios, bio);
 
 		r = DM_MAPIO_SUBMITTED;
 
-		if (!pe->started &&
-		    bio->bi_size == (s->store->chunk_size << SECTOR_SHIFT)) {
-			pe->started = 1;
-			up_write(&s->lock);
-			start_full_bio(pe, bio);
-			goto out;
-		}
-
-		bio_list_add(&pe->snapshot_bios, bio);
-
 		if (!pe->started) {
 			/* this is protected by snap->lock */
 			pe->started = 1;
@@ -1670,9 +1628,9 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
 		map_context->ptr = track_chunk(s, chunk);
 	}
 
-out_unlock:
+ out_unlock:
 	up_write(&s->lock);
-out:
+ out:
 	return r;
 }
 
@@ -2016,7 +1974,7 @@ static int __origin_write(struct list_head *snapshots, sector_t sector,
 			pe_to_start_now = pe;
 		}
 
-next_snapshot:
+ next_snapshot:
 		up_write(&snap->lock);
 
 		if (pe_to_start_now) {
diff --git a/trunk/drivers/md/dm-table.c b/trunk/drivers/md/dm-table.c
index 986b8754bb08..bfe9c2333cea 100644
--- a/trunk/drivers/md/dm-table.c
+++ b/trunk/drivers/md/dm-table.c
@@ -54,6 +54,7 @@ struct dm_table {
 	sector_t *highs;
 	struct dm_target *targets;
 
+	unsigned discards_supported:1;
 	unsigned integrity_supported:1;
 
 	/*
@@ -153,11 +154,12 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size)
 		return NULL;
 
 	size = nmemb * elem_size;
-	addr = vzalloc(size);
+	addr = vmalloc(size);
+	if (addr)
+		memset(addr, 0, size);
 
 	return addr;
 }
-EXPORT_SYMBOL(dm_vcalloc);
 
 /*
  * highs, and targets are managed as dynamic arrays during a
@@ -207,6 +209,7 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
 	INIT_LIST_HEAD(&t->devices);
 	INIT_LIST_HEAD(&t->target_callbacks);
 	atomic_set(&t->holders, 0);
+	t->discards_supported = 1;
 
 	if (!num_targets)
 		num_targets = KEYS_PER_NODE;
@@ -278,7 +281,6 @@ void dm_table_get(struct dm_table *t)
 {
 	atomic_inc(&t->holders);
 }
-EXPORT_SYMBOL(dm_table_get);
 
 void dm_table_put(struct dm_table *t)
 {
@@ -288,7 +290,6 @@ void dm_table_put(struct dm_table *t)
 	smp_mb__before_atomic_dec();
 	atomic_dec(&t->holders);
 }
-EXPORT_SYMBOL(dm_table_put);
 
 /*
  * Checks to see if we need to extend highs or targets.
@@ -454,14 +455,13 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
  * Add a device to the list, or just increment the usage count if
  * it's already present.
  */
-int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
-		  struct dm_dev **result)
+static int __table_get_device(struct dm_table *t, struct dm_target *ti,
+		      const char *path, fmode_t mode, struct dm_dev **result)
 {
 	int r;
 	dev_t uninitialized_var(dev);
 	struct dm_dev_internal *dd;
 	unsigned int major, minor;
-	struct dm_table *t = ti->table;
 
 	BUG_ON(!t);
 
@@ -509,7 +509,6 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
 	*result = &dd->dm_dev;
 	return 0;
 }
-EXPORT_SYMBOL(dm_get_device);
 
 int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
 			 sector_t start, sector_t len, void *data)
@@ -540,15 +539,23 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
 	 * If not we'll force DM to use PAGE_SIZE or
 	 * smaller I/O, just to be safe.
 	 */
-	if (dm_queue_merge_is_compulsory(q) && !ti->type->merge)
+
+	if (q->merge_bvec_fn && !ti->type->merge)
 		blk_limits_max_hw_sectors(limits,
 					  (unsigned int) (PAGE_SIZE >> 9));
 	return 0;
 }
 EXPORT_SYMBOL_GPL(dm_set_device_limits);
 
+int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
+		  struct dm_dev **result)
+{
+	return __table_get_device(ti->table, ti, path, mode, result);
+}
+
+
 /*
- * Decrement a device's use count and remove it if necessary.
+ * Decrement a devices use count and remove it if necessary.
  */
 void dm_put_device(struct dm_target *ti, struct dm_dev *d)
 {
@@ -561,7 +568,6 @@ void dm_put_device(struct dm_target *ti, struct dm_dev *d)
 		kfree(dd);
 	}
 }
-EXPORT_SYMBOL(dm_put_device);
 
 /*
  * Checks to see if the target joins onto the end of the table.
@@ -785,9 +791,8 @@ int dm_table_add_target(struct dm_table *t, const char *type,
 
 	t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
 
-	if (!tgt->num_discard_requests && tgt->discards_supported)
-		DMWARN("%s: %s: ignoring discards_supported because num_discard_requests is zero.",
-		       dm_device_name(t->md), type);
+	if (!tgt->num_discard_requests)
+		t->discards_supported = 0;
 
 	return 0;
 
@@ -797,63 +802,6 @@ int dm_table_add_target(struct dm_table *t, const char *type,
 	return r;
 }
 
-/*
- * Target argument parsing helpers.
- */
-static int validate_next_arg(struct dm_arg *arg, struct dm_arg_set *arg_set,
-			     unsigned *value, char **error, unsigned grouped)
-{
-	const char *arg_str = dm_shift_arg(arg_set);
-
-	if (!arg_str ||
-	    (sscanf(arg_str, "%u", value) != 1) ||
-	    (*value < arg->min) ||
-	    (*value > arg->max) ||
-	    (grouped && arg_set->argc < *value)) {
-		*error = arg->error;
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-int dm_read_arg(struct dm_arg *arg, struct dm_arg_set *arg_set,
-		unsigned *value, char **error)
-{
-	return validate_next_arg(arg, arg_set, value, error, 0);
-}
-EXPORT_SYMBOL(dm_read_arg);
-
-int dm_read_arg_group(struct dm_arg *arg, struct dm_arg_set *arg_set,
-		      unsigned *value, char **error)
-{
-	return validate_next_arg(arg, arg_set, value, error, 1);
-}
-EXPORT_SYMBOL(dm_read_arg_group);
-
-const char *dm_shift_arg(struct dm_arg_set *as)
-{
-	char *r;
-
-	if (as->argc) {
-		as->argc--;
-		r = *as->argv;
-		as->argv++;
-		return r;
-	}
-
-	return NULL;
-}
-EXPORT_SYMBOL(dm_shift_arg);
-
-void dm_consume_args(struct dm_arg_set *as, unsigned num_args)
-{
-	BUG_ON(as->argc < num_args);
-	as->argc -= num_args;
-	as->argv += num_args;
-}
-EXPORT_SYMBOL(dm_consume_args);
-
 static int dm_table_set_type(struct dm_table *t)
 {
 	unsigned i;
@@ -1129,13 +1077,11 @@ void dm_table_event(struct dm_table *t)
 		t->event_fn(t->event_context);
 	mutex_unlock(&_event_lock);
 }
-EXPORT_SYMBOL(dm_table_event);
 
 sector_t dm_table_get_size(struct dm_table *t)
 {
 	return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0;
 }
-EXPORT_SYMBOL(dm_table_get_size);
 
 struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index)
 {
@@ -1248,45 +1194,9 @@ static void dm_table_set_integrity(struct dm_table *t)
 			       blk_get_integrity(template_disk));
 }
 
-static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev,
-				sector_t start, sector_t len, void *data)
-{
-	unsigned flush = (*(unsigned *)data);
-	struct request_queue *q = bdev_get_queue(dev->bdev);
-
-	return q && (q->flush_flags & flush);
-}
-
-static bool dm_table_supports_flush(struct dm_table *t, unsigned flush)
-{
-	struct dm_target *ti;
-	unsigned i = 0;
-
-	/*
-	 * Require at least one underlying device to support flushes.
-	 * t->devices includes internal dm devices such as mirror logs
-	 * so we need to use iterate_devices here, which targets
-	 * supporting flushes must provide.
-	 */
-	while (i < dm_table_get_num_targets(t)) {
-		ti = dm_table_get_target(t, i++);
-
-		if (!ti->num_flush_requests)
-			continue;
-
-		if (ti->type->iterate_devices &&
-		    ti->type->iterate_devices(ti, device_flush_capable, &flush))
-			return 1;
-	}
-
-	return 0;
-}
-
 void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 			       struct queue_limits *limits)
 {
-	unsigned flush = 0;
-
 	/*
 	 * Copy table's limits to the DM device's request_queue
 	 */
@@ -1297,13 +1207,6 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 	else
 		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
 
-	if (dm_table_supports_flush(t, REQ_FLUSH)) {
-		flush |= REQ_FLUSH;
-		if (dm_table_supports_flush(t, REQ_FUA))
-			flush |= REQ_FUA;
-	}
-	blk_queue_flush(q, flush);
-
 	dm_table_set_integrity(t);
 
 	/*
@@ -1334,7 +1237,6 @@ fmode_t dm_table_get_mode(struct dm_table *t)
 {
 	return t->mode;
 }
-EXPORT_SYMBOL(dm_table_get_mode);
 
 static void suspend_targets(struct dm_table *t, unsigned postsuspend)
 {
@@ -1443,7 +1345,6 @@ struct mapped_device *dm_table_get_md(struct dm_table *t)
 {
 	return t->md;
 }
-EXPORT_SYMBOL(dm_table_get_md);
 
 static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev,
 				  sector_t start, sector_t len, void *data)
@@ -1458,19 +1359,19 @@ bool dm_table_supports_discards(struct dm_table *t)
 	struct dm_target *ti;
 	unsigned i = 0;
 
+	if (!t->discards_supported)
+		return 0;
+
 	/*
 	 * Unless any target used by the table set discards_supported,
 	 * require at least one underlying device to support discards.
 	 * t->devices includes internal dm devices such as mirror logs
 	 * so we need to use iterate_devices here, which targets
-	 * supporting discard selectively must provide.
+	 * supporting discard must provide.
 	 */
 	while (i < dm_table_get_num_targets(t)) {
 		ti = dm_table_get_target(t, i++);
 
-		if (!ti->num_discard_requests)
-			continue;
-
 		if (ti->discards_supported)
 			return 1;
 
@@ -1481,3 +1382,13 @@ bool dm_table_supports_discards(struct dm_table *t)
 
 	return 0;
 }
+
+EXPORT_SYMBOL(dm_vcalloc);
+EXPORT_SYMBOL(dm_get_device);
+EXPORT_SYMBOL(dm_put_device);
+EXPORT_SYMBOL(dm_table_event);
+EXPORT_SYMBOL(dm_table_get_size);
+EXPORT_SYMBOL(dm_table_get_mode);
+EXPORT_SYMBOL(dm_table_get_md);
+EXPORT_SYMBOL(dm_table_put);
+EXPORT_SYMBOL(dm_table_get);
diff --git a/trunk/drivers/md/dm.c b/trunk/drivers/md/dm.c
index 52b39f335bb3..0cf68b478878 100644
--- a/trunk/drivers/md/dm.c
+++ b/trunk/drivers/md/dm.c
@@ -37,8 +37,6 @@ static const char *_name = DM_NAME;
 static unsigned int major = 0;
 static unsigned int _major = 0;
 
-static DEFINE_IDR(_minor_idr);
-
 static DEFINE_SPINLOCK(_minor_lock);
 /*
  * For bio-based dm.
@@ -111,7 +109,6 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
 #define DMF_FREEING 3
 #define DMF_DELETING 4
 #define DMF_NOFLUSH_SUSPENDING 5
-#define DMF_MERGE_IS_OPTIONAL 6
 
 /*
  * Work processed by per-device workqueue.
@@ -316,12 +313,6 @@ static void __exit dm_exit(void)
 
 	while (i--)
 		_exits[i]();
-
-	/*
-	 * Should be empty by this point.
-	 */
-	idr_remove_all(&_minor_idr);
-	idr_destroy(&_minor_idr);
 }
 
 /*
@@ -1180,8 +1171,7 @@ static int __clone_and_map_discard(struct clone_info *ci)
 
 		/*
 		 * Even though the device advertised discard support,
-		 * that does not mean every target supports it, and
-		 * reconfiguration might also have changed that since the
+		 * reconfiguration might have changed that since the
 		 * check was performed.
 		 */
 		if (!ti->num_discard_requests)
@@ -1715,6 +1705,8 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
 /*-----------------------------------------------------------------
  * An IDR is used to keep track of allocated minor numbers.
  *---------------------------------------------------------------*/
+static DEFINE_IDR(_minor_idr);
+
 static void free_minor(int minor)
 {
 	spin_lock(&_minor_lock);
@@ -1808,6 +1800,7 @@ static void dm_init_md_queue(struct mapped_device *md)
 	blk_queue_make_request(md->queue, dm_request);
 	blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
 	blk_queue_merge_bvec(md->queue, dm_merge_bvec);
+	blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA);
 }
 
 /*
@@ -1992,59 +1985,6 @@ static void __set_size(struct mapped_device *md, sector_t size)
 	i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
 }
 
-/*
- * Return 1 if the queue has a compulsory merge_bvec_fn function.
- *
- * If this function returns 0, then the device is either a non-dm
- * device without a merge_bvec_fn, or it is a dm device that is
- * able to split any bios it receives that are too big.
- */
-int dm_queue_merge_is_compulsory(struct request_queue *q)
-{
-	struct mapped_device *dev_md;
-
-	if (!q->merge_bvec_fn)
-		return 0;
-
-	if (q->make_request_fn == dm_request) {
-		dev_md = q->queuedata;
-		if (test_bit(DMF_MERGE_IS_OPTIONAL, &dev_md->flags))
-			return 0;
-	}
-
-	return 1;
-}
-
-static int dm_device_merge_is_compulsory(struct dm_target *ti,
-					 struct dm_dev *dev, sector_t start,
-					 sector_t len, void *data)
-{
-	struct block_device *bdev = dev->bdev;
-	struct request_queue *q = bdev_get_queue(bdev);
-
-	return dm_queue_merge_is_compulsory(q);
-}
-
-/*
- * Return 1 if it is acceptable to ignore merge_bvec_fn based
- * on the properties of the underlying devices.
- */
-static int dm_table_merge_is_optional(struct dm_table *table)
-{
-	unsigned i = 0;
-	struct dm_target *ti;
-
-	while (i < dm_table_get_num_targets(table)) {
-		ti = dm_table_get_target(table, i++);
-
-		if (ti->type->iterate_devices &&
-		    ti->type->iterate_devices(ti, dm_device_merge_is_compulsory, NULL))
-			return 0;
-	}
-
-	return 1;
-}
-
 /*
  * Returns old map, which caller must destroy.
  */
@@ -2055,7 +1995,6 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
 	struct request_queue *q = md->queue;
 	sector_t size;
 	unsigned long flags;
-	int merge_is_optional;
 
 	size = dm_table_get_size(t);
 
@@ -2081,16 +2020,10 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
 
 	__bind_mempools(md, t);
 
-	merge_is_optional = dm_table_merge_is_optional(t);
-
 	write_lock_irqsave(&md->map_lock, flags);
 	old_map = md->map;
 	md->map = t;
 	dm_table_set_restrictions(t, q, limits);
-	if (merge_is_optional)
-		set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
-	else
-		clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
 	write_unlock_irqrestore(&md->map_lock, flags);
 
 	return old_map;
diff --git a/trunk/drivers/md/dm.h b/trunk/drivers/md/dm.h
index 6745dbd278a4..1aaf16746da8 100644
--- a/trunk/drivers/md/dm.h
+++ b/trunk/drivers/md/dm.h
@@ -66,8 +66,6 @@ int dm_table_alloc_md_mempools(struct dm_table *t);
 void dm_table_free_md_mempools(struct dm_table *t);
 struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
 
-int dm_queue_merge_is_compulsory(struct request_queue *q);
-
 void dm_lock_md_type(struct mapped_device *md);
 void dm_unlock_md_type(struct mapped_device *md);
 void dm_set_md_type(struct mapped_device *md, unsigned type);
diff --git a/trunk/drivers/of/address.c b/trunk/drivers/of/address.c
index 72c33fbe451d..da1f4b9605df 100644
--- a/trunk/drivers/of/address.c
+++ b/trunk/drivers/of/address.c
@@ -610,6 +610,6 @@ void __iomem *of_iomap(struct device_node *np, int index)
 	if (of_address_to_resource(np, index, &res))
 		return NULL;
 
-	return ioremap(res.start, resource_size(&res));
+	return ioremap(res.start, 1 + res.end - res.start);
 }
 EXPORT_SYMBOL(of_iomap);
diff --git a/trunk/drivers/of/base.c b/trunk/drivers/of/base.c
index 3ff22e32b602..02ed36719def 100644
--- a/trunk/drivers/of/base.c
+++ b/trunk/drivers/of/base.c
@@ -610,9 +610,8 @@ EXPORT_SYMBOL(of_find_node_by_phandle);
  *
  * The out_value is modified only if a valid u32 value can be decoded.
  */
-int of_property_read_u32_array(const struct device_node *np,
-			       const char *propname, u32 *out_values,
-			       size_t sz)
+int of_property_read_u32_array(const struct device_node *np, char *propname,
+			       u32 *out_values, size_t sz)
 {
 	struct property *prop = of_find_property(np, propname, NULL);
 	const __be32 *val;
@@ -646,7 +645,7 @@ EXPORT_SYMBOL_GPL(of_property_read_u32_array);
  *
  * The out_string pointer is modified only if a valid string can be decoded.
  */
-int of_property_read_string(struct device_node *np, const char *propname,
+int of_property_read_string(struct device_node *np, char *propname,
 				const char **out_string)
 {
 	struct property *prop = of_find_property(np, propname, NULL);
diff --git a/trunk/drivers/pci/hotplug/acpiphp_glue.c b/trunk/drivers/pci/hotplug/acpiphp_glue.c
index 220285760b68..a70fa89f76fd 100644
--- a/trunk/drivers/pci/hotplug/acpiphp_glue.c
+++ b/trunk/drivers/pci/hotplug/acpiphp_glue.c
@@ -110,7 +110,7 @@ static int post_dock_fixups(struct notifier_block *nb, unsigned long val,
 }
 
 
-static const struct acpi_dock_ops acpiphp_dock_ops = {
+static struct acpi_dock_ops acpiphp_dock_ops = {
 	.handler = handle_hotplug_event_func,
 };
 
diff --git a/trunk/drivers/rtc/rtc-omap.c b/trunk/drivers/rtc/rtc-omap.c
index 7789002bdd5c..bcae8dd41496 100644
--- a/trunk/drivers/rtc/rtc-omap.c
+++ b/trunk/drivers/rtc/rtc-omap.c
@@ -368,7 +368,7 @@ static int __init omap_rtc_probe(struct platform_device *pdev)
 		pr_info("%s: already running\n", pdev->name);
 
 	/* force to 24 hour mode */
-	new_ctrl = reg & (OMAP_RTC_CTRL_SPLIT|OMAP_RTC_CTRL_AUTO_COMP);
+	new_ctrl = reg & ~(OMAP_RTC_CTRL_SPLIT|OMAP_RTC_CTRL_AUTO_COMP);
 	new_ctrl |= OMAP_RTC_CTRL_STOP;
 
 	/* BOARD-SPECIFIC CUSTOMIZATION CAN GO HERE:
diff --git a/trunk/drivers/s390/block/dasd.c b/trunk/drivers/s390/block/dasd.c
index a1d3ddba99cc..432444af7ee4 100644
--- a/trunk/drivers/s390/block/dasd.c
+++ b/trunk/drivers/s390/block/dasd.c
@@ -24,7 +24,6 @@
 #include <linux/mutex.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
-#include <linux/vmalloc.h>
 
 #include <asm/ccwdev.h>
 #include <asm/ebcdic.h>
@@ -889,11 +888,11 @@ char *dasd_get_user_string(const char __user *user_buf, size_t user_len)
 {
 	char *buffer;
 
-	buffer = vmalloc(user_len + 1);
+	buffer = kmalloc(user_len + 1, GFP_KERNEL);
 	if (buffer == NULL)
 		return ERR_PTR(-ENOMEM);
 	if (copy_from_user(buffer, user_buf, user_len) != 0) {
-		vfree(buffer);
+		kfree(buffer);
 		return ERR_PTR(-EFAULT);
 	}
 	/* got the string, now strip linefeed. */
@@ -931,7 +930,7 @@ static ssize_t dasd_stats_write(struct file *file,
 		dasd_profile_off(prof);
 	} else
 		rc = -EINVAL;
-	vfree(buffer);
+	kfree(buffer);
 	return rc;
 }
 
@@ -1043,7 +1042,7 @@ static ssize_t dasd_stats_global_write(struct file *file,
 		dasd_global_profile_level = DASD_PROFILE_OFF;
 	} else
 		rc = -EINVAL;
-	vfree(buffer);
+	kfree(buffer);
 	return rc;
 }
 
diff --git a/trunk/drivers/s390/block/dasd_eckd.c b/trunk/drivers/s390/block/dasd_eckd.c
index 6e835c9fdfcb..30fb979d684d 100644
--- a/trunk/drivers/s390/block/dasd_eckd.c
+++ b/trunk/drivers/s390/block/dasd_eckd.c
@@ -1461,15 +1461,6 @@ dasd_eckd_check_characteristics(struct dasd_device *device)
 				"Read device characteristic failed, rc=%d", rc);
 		goto out_err3;
 	}
-
-	if ((device->features & DASD_FEATURE_USERAW) &&
-	    !(private->rdc_data.facilities.RT_in_LR)) {
-		dev_err(&device->cdev->dev, "The storage server does not "
-			"support raw-track access\n");
-		rc = -EINVAL;
-		goto out_err3;
-	}
-
 	/* find the valid cylinder size */
 	if (private->rdc_data.no_cyl == LV_COMPAT_CYL &&
 	    private->rdc_data.long_no_cyl)
diff --git a/trunk/drivers/s390/block/dasd_proc.c b/trunk/drivers/s390/block/dasd_proc.c
index e12989fff4ff..6c3c5364d082 100644
--- a/trunk/drivers/s390/block/dasd_proc.c
+++ b/trunk/drivers/s390/block/dasd_proc.c
@@ -312,14 +312,14 @@ static ssize_t dasd_stats_proc_write(struct file *file,
 		pr_info("The statistics have been reset\n");
 	} else
 		goto out_parse_error;
-	vfree(buffer);
+	kfree(buffer);
 	return user_len;
 out_parse_error:
 	rc = -EINVAL;
 	pr_warning("%s is not a supported value for /proc/dasd/statistics\n",
 		str);
 out_error:
-	vfree(buffer);
+	kfree(buffer);
 	return rc;
 #else
 	pr_warning("/proc/dasd/statistics: is not activated in this kernel\n");
diff --git a/trunk/drivers/s390/char/sclp_async.c b/trunk/drivers/s390/char/sclp_async.c
index 5f9f929e891c..7ad30e72f868 100644
--- a/trunk/drivers/s390/char/sclp_async.c
+++ b/trunk/drivers/s390/char/sclp_async.c
@@ -82,9 +82,12 @@ static int proc_handler_callhome(struct ctl_table *ctl, int write,
 			return -EFAULT;
 	} else {
 		len = *count;
-		rc = kstrtoul_from_user(buffer, len, 0, &val);
-		if (rc)
-			return rc;
+		rc = copy_from_user(buf, buffer, sizeof(buf));
+		if (rc != 0)
+			return -EFAULT;
+		buf[sizeof(buf) - 1] = '\0';
+		if (strict_strtoul(buf, 0, &val) != 0)
+			return -EINVAL;
 		if (val != 0 && val != 1)
 			return -EINVAL;
 		callhome_enabled = val;
diff --git a/trunk/drivers/s390/cio/qdio.h b/trunk/drivers/s390/cio/qdio.h
index e5c966462c5a..7bc643f3f5ab 100644
--- a/trunk/drivers/s390/cio/qdio.h
+++ b/trunk/drivers/s390/cio/qdio.h
@@ -14,8 +14,6 @@
 #include "chsc.h"
 
 #define QDIO_BUSY_BIT_PATIENCE		(100 << 12)	/* 100 microseconds */
-#define QDIO_BUSY_BIT_RETRY_DELAY	10		/* 10 milliseconds */
-#define QDIO_BUSY_BIT_RETRIES		1000		/* = 10s retry time */
 #define QDIO_INPUT_THRESHOLD		(500 << 12)	/* 500 microseconds */
 
 /*
diff --git a/trunk/drivers/s390/cio/qdio_debug.c b/trunk/drivers/s390/cio/qdio_debug.c
index 0e615cb912d0..f8b03a636e49 100644
--- a/trunk/drivers/s390/cio/qdio_debug.c
+++ b/trunk/drivers/s390/cio/qdio_debug.c
@@ -188,13 +188,19 @@ static ssize_t qperf_seq_write(struct file *file, const char __user *ubuf,
 	struct qdio_irq *irq_ptr = seq->private;
 	struct qdio_q *q;
 	unsigned long val;
+	char buf[8];
 	int ret, i;
 
 	if (!irq_ptr)
 		return 0;
-
-	ret = kstrtoul_from_user(ubuf, count, 10, &val);
-	if (ret)
+	if (count >= sizeof(buf))
+		return -EINVAL;
+	if (copy_from_user(&buf, ubuf, count))
+		return -EFAULT;
+	buf[count] = 0;
+
+	ret = strict_strtoul(buf, 10, &val);
+	if (ret < 0)
 		return ret;
 
 	switch (val) {
diff --git a/trunk/drivers/s390/cio/qdio_main.c b/trunk/drivers/s390/cio/qdio_main.c
index 288c9140290e..e58169c32474 100644
--- a/trunk/drivers/s390/cio/qdio_main.c
+++ b/trunk/drivers/s390/cio/qdio_main.c
@@ -313,7 +313,7 @@ static int qdio_siga_output(struct qdio_q *q, unsigned int *busy_bit)
 	unsigned long schid = *((u32 *) &q->irq_ptr->schid);
 	unsigned int fc = QDIO_SIGA_WRITE;
 	u64 start_time = 0;
-	int retries = 0, cc;
+	int cc;
 
 	if (is_qebsm(q)) {
 		schid = q->irq_ptr->sch_token;
@@ -325,7 +325,6 @@ static int qdio_siga_output(struct qdio_q *q, unsigned int *busy_bit)
 	/* hipersocket busy condition */
 	if (unlikely(*busy_bit)) {
 		WARN_ON(queue_type(q) != QDIO_IQDIO_QFMT || cc != 2);
-		retries++;
 
 		if (!start_time) {
 			start_time = get_clock();
@@ -334,11 +333,6 @@ static int qdio_siga_output(struct qdio_q *q, unsigned int *busy_bit)
 		if ((get_clock() - start_time) < QDIO_BUSY_BIT_PATIENCE)
 			goto again;
 	}
-	if (retries) {
-		DBF_DEV_EVENT(DBF_WARN, q->irq_ptr,
-			      "%4x cc2 BB1:%1d", SCH_NO(q), q->nr);
-		DBF_DEV_EVENT(DBF_WARN, q->irq_ptr, "count:%u", retries);
-	}
 	return cc;
 }
 
@@ -734,14 +728,13 @@ static inline int qdio_outbound_q_moved(struct qdio_q *q)
 
 static int qdio_kick_outbound_q(struct qdio_q *q)
 {
-	int retries = 0, cc;
 	unsigned int busy_bit;
+	int cc;
 
 	if (!need_siga_out(q))
 		return 0;
 
 	DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "siga-w:%1d", q->nr);
-retry:
 	qperf_inc(q, siga_write);
 
 	cc = qdio_siga_output(q, &busy_bit);
@@ -750,11 +743,7 @@ static int qdio_kick_outbound_q(struct qdio_q *q)
 		break;
 	case 2:
 		if (busy_bit) {
-			while (++retries < QDIO_BUSY_BIT_RETRIES) {
-				mdelay(QDIO_BUSY_BIT_RETRY_DELAY);
-				goto retry;
-			}
-			DBF_ERROR("%4x cc2 BBC:%1d", SCH_NO(q), q->nr);
+			DBF_ERROR("%4x cc2 REP:%1d", SCH_NO(q), q->nr);
 			cc |= QDIO_ERROR_SIGA_BUSY;
 		} else
 			DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "siga-w cc2:%1d", q->nr);
@@ -764,10 +753,6 @@ static int qdio_kick_outbound_q(struct qdio_q *q)
 		DBF_ERROR("%4x SIGA-W:%1d", SCH_NO(q), cc);
 		break;
 	}
-	if (retries) {
-		DBF_ERROR("%4x cc2 BB2:%1d", SCH_NO(q), q->nr);
-		DBF_ERROR("count:%u", retries);
-	}
 	return cc;
 }
 
diff --git a/trunk/drivers/spi/spi-pl022.c b/trunk/drivers/spi/spi-pl022.c
index 730b4a37b823..eba88c749fb1 100644
--- a/trunk/drivers/spi/spi-pl022.c
+++ b/trunk/drivers/spi/spi-pl022.c
@@ -2267,13 +2267,17 @@ static int __devexit
 pl022_remove(struct amba_device *adev)
 {
 	struct pl022 *pl022 = amba_get_drvdata(adev);
-
+	int status = 0;
 	if (!pl022)
 		return 0;
 
 	/* Remove the queue */
-	if (destroy_queue(pl022) != 0)
-		dev_err(&adev->dev, "queue remove failed\n");
+	status = destroy_queue(pl022);
+	if (status != 0) {
+		dev_err(&adev->dev,
+			"queue remove failed (%d)\n", status);
+		return status;
+	}
 	load_ssp_default_config(pl022);
 	pl022_dma_remove(pl022);
 	free_irq(adev->irq[0], pl022);
@@ -2285,6 +2289,7 @@ pl022_remove(struct amba_device *adev)
 	spi_unregister_master(pl022->master);
 	spi_master_put(pl022->master);
 	amba_set_drvdata(adev, NULL);
+	dev_dbg(&adev->dev, "remove succeeded\n");
 	return 0;
 }
 
diff --git a/trunk/drivers/target/iscsi/Kconfig b/trunk/drivers/target/iscsi/Kconfig
index 8345fb457a40..564ff4e0dbc4 100644
--- a/trunk/drivers/target/iscsi/Kconfig
+++ b/trunk/drivers/target/iscsi/Kconfig
@@ -1,6 +1,5 @@
 config ISCSI_TARGET
 	tristate "Linux-iSCSI.org iSCSI Target Mode Stack"
-	depends on NET
 	select CRYPTO
 	select CRYPTO_CRC32C
 	select CRYPTO_CRC32C_INTEL if X86
diff --git a/trunk/drivers/target/iscsi/iscsi_target.c b/trunk/drivers/target/iscsi/iscsi_target.c
index c24fb10de60b..14c81c4265bd 100644
--- a/trunk/drivers/target/iscsi/iscsi_target.c
+++ b/trunk/drivers/target/iscsi/iscsi_target.c
@@ -120,7 +120,7 @@ struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *buf)
 	struct iscsi_tiqn *tiqn = NULL;
 	int ret;
 
-	if (strlen(buf) >= ISCSI_IQN_LEN) {
+	if (strlen(buf) > ISCSI_IQN_LEN) {
 		pr_err("Target IQN exceeds %d bytes\n",
 				ISCSI_IQN_LEN);
 		return ERR_PTR(-EINVAL);
@@ -1857,7 +1857,7 @@ static int iscsit_handle_text_cmd(
 	char *text_ptr, *text_in;
 	int cmdsn_ret, niov = 0, rx_got, rx_size;
 	u32 checksum = 0, data_crc = 0, payload_length;
-	u32 padding = 0, pad_bytes = 0, text_length = 0;
+	u32 padding = 0, text_length = 0;
 	struct iscsi_cmd *cmd;
 	struct kvec iov[3];
 	struct iscsi_text *hdr;
@@ -1896,7 +1896,7 @@ static int iscsit_handle_text_cmd(
 
 		padding = ((-payload_length) & 3);
 		if (padding != 0) {
-			iov[niov].iov_base = &pad_bytes;
+			iov[niov].iov_base = cmd->pad_bytes;
 			iov[niov++].iov_len  = padding;
 			rx_size += padding;
 			pr_debug("Receiving %u additional bytes"
@@ -1917,7 +1917,7 @@ static int iscsit_handle_text_cmd(
 		if (conn->conn_ops->DataDigest) {
 			iscsit_do_crypto_hash_buf(&conn->conn_rx_hash,
 					text_in, text_length,
-					padding, (u8 *)&pad_bytes,
+					padding, cmd->pad_bytes,
 					(u8 *)&data_crc);
 
 			if (checksum != data_crc) {
@@ -3468,12 +3468,7 @@ static inline void iscsit_thread_check_cpumask(
 }
 
 #else
-
-void iscsit_thread_get_cpumask(struct iscsi_conn *conn)
-{
-	return;
-}
-
+#define iscsit_thread_get_cpumask(X) ({})
 #define iscsit_thread_check_cpumask(X, Y, Z) ({})
 #endif /* CONFIG_SMP */
 
diff --git a/trunk/drivers/target/iscsi/iscsi_target_configfs.c b/trunk/drivers/target/iscsi/iscsi_target_configfs.c
index f095e65b1ccf..32bb92c44450 100644
--- a/trunk/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/trunk/drivers/target/iscsi/iscsi_target_configfs.c
@@ -181,7 +181,7 @@ struct se_tpg_np *lio_target_call_addnptotpg(
 		return ERR_PTR(-EOVERFLOW);
 	}
 	memset(buf, 0, MAX_PORTAL_LEN + 1);
-	snprintf(buf, MAX_PORTAL_LEN + 1, "%s", name);
+	snprintf(buf, MAX_PORTAL_LEN, "%s", name);
 
 	memset(&sockaddr, 0, sizeof(struct __kernel_sockaddr_storage));
 
diff --git a/trunk/drivers/target/iscsi/iscsi_target_nego.c b/trunk/drivers/target/iscsi/iscsi_target_nego.c
index 4d087ac11067..713a4d23557a 100644
--- a/trunk/drivers/target/iscsi/iscsi_target_nego.c
+++ b/trunk/drivers/target/iscsi/iscsi_target_nego.c
@@ -978,7 +978,7 @@ struct iscsi_login *iscsi_target_init_negotiation(
 		pr_err("Unable to allocate memory for struct iscsi_login.\n");
 		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
 				ISCSI_LOGIN_STATUS_NO_RESOURCES);
-		return NULL;
+		goto out;
 	}
 
 	login->req = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL);
diff --git a/trunk/drivers/target/target_core_transport.c b/trunk/drivers/target/target_core_transport.c
index 89760329d5d0..c75a01a1c475 100644
--- a/trunk/drivers/target/target_core_transport.c
+++ b/trunk/drivers/target/target_core_transport.c
@@ -1747,8 +1747,6 @@ int transport_generic_handle_cdb(
 }
 EXPORT_SYMBOL(transport_generic_handle_cdb);
 
-static void transport_generic_request_failure(struct se_cmd *,
-			struct se_device *, int, int);
 /*
  * Used by fabric module frontends to queue tasks directly.
  * Many only be used from process context only
@@ -1756,8 +1754,6 @@ static void transport_generic_request_failure(struct se_cmd *,
 int transport_handle_cdb_direct(
 	struct se_cmd *cmd)
 {
-	int ret;
-
 	if (!cmd->se_lun) {
 		dump_stack();
 		pr_err("cmd->se_lun is NULL\n");
@@ -1769,31 +1765,8 @@ int transport_handle_cdb_direct(
 				" from interrupt context\n");
 		return -EINVAL;
 	}
-	/*
-	 * Set TRANSPORT_NEW_CMD state and cmd->t_transport_active=1 following
-	 * transport_generic_handle_cdb*() -> transport_add_cmd_to_queue()
-	 * in existing usage to ensure that outstanding descriptors are handled
-	 * correctly during shutdown via transport_generic_wait_for_tasks()
-	 *
-	 * Also, we don't take cmd->t_state_lock here as we only expect
-	 * this to be called for initial descriptor submission.
-	 */
-	cmd->t_state = TRANSPORT_NEW_CMD;
-	atomic_set(&cmd->t_transport_active, 1);
-	/*
-	 * transport_generic_new_cmd() is already handling QUEUE_FULL,
-	 * so follow TRANSPORT_NEW_CMD processing thread context usage
-	 * and call transport_generic_request_failure() if necessary..
-	 */
-	ret = transport_generic_new_cmd(cmd);
-	if (ret == -EAGAIN)
-		return 0;
-	else if (ret < 0) {
-		cmd->transport_error_status = ret;
-		transport_generic_request_failure(cmd, NULL, 0,
-				(cmd->data_direction != DMA_TO_DEVICE));
-	}
-	return 0;
+
+	return transport_generic_new_cmd(cmd);
 }
 EXPORT_SYMBOL(transport_handle_cdb_direct);
 
@@ -3351,7 +3324,7 @@ static int transport_generic_cmd_sequencer(
 			goto out_invalid_cdb_field;
 		}
 
-		cmd->t_task_lba = get_unaligned_be64(&cdb[2]);
+		cmd->t_task_lba = get_unaligned_be16(&cdb[2]);
 		passthrough = (dev->transport->transport_type ==
 				TRANSPORT_PLUGIN_PHBA_PDEV);
 		/*
diff --git a/trunk/drivers/target/tcm_fc/tcm_fc.h b/trunk/drivers/target/tcm_fc/tcm_fc.h
index bd4fe21a23b8..f7fff7ed63c3 100644
--- a/trunk/drivers/target/tcm_fc/tcm_fc.h
+++ b/trunk/drivers/target/tcm_fc/tcm_fc.h
@@ -187,9 +187,4 @@ void ft_dump_cmd(struct ft_cmd *, const char *caller);
 
 ssize_t ft_format_wwn(char *, size_t, u64);
 
-/*
- * Underlying HW specific helper function
- */
-void ft_invl_hw_context(struct ft_cmd *);
-
 #endif /* __TCM_FC_H__ */
diff --git a/trunk/drivers/target/tcm_fc/tfc_cmd.c b/trunk/drivers/target/tcm_fc/tfc_cmd.c
index 5654dc22f7ae..09df38b4610c 100644
--- a/trunk/drivers/target/tcm_fc/tfc_cmd.c
+++ b/trunk/drivers/target/tcm_fc/tfc_cmd.c
@@ -320,7 +320,6 @@ static void ft_recv_seq(struct fc_seq *sp, struct fc_frame *fp, void *arg)
 	default:
 		pr_debug("%s: unhandled frame r_ctl %x\n",
 		       __func__, fh->fh_r_ctl);
-		ft_invl_hw_context(cmd);
 		fc_frame_free(fp);
 		transport_generic_free_cmd(&cmd->se_cmd, 0, 0);
 		break;
diff --git a/trunk/drivers/target/tcm_fc/tfc_io.c b/trunk/drivers/target/tcm_fc/tfc_io.c
index c37f4cd96452..8e2a46ddcccb 100644
--- a/trunk/drivers/target/tcm_fc/tfc_io.c
+++ b/trunk/drivers/target/tcm_fc/tfc_io.c
@@ -213,49 +213,62 @@ void ft_recv_write_data(struct ft_cmd *cmd, struct fc_frame *fp)
 	if (!(ntoh24(fh->fh_f_ctl) & FC_FC_REL_OFF))
 		goto drop;
 
-	f_ctl = ntoh24(fh->fh_f_ctl);
-	ep = fc_seq_exch(seq);
-	lport = ep->lp;
-	if (cmd->was_ddp_setup) {
-		BUG_ON(!ep);
-		BUG_ON(!lport);
-	}
-
 	/*
-	 * Doesn't expect payload if DDP is setup. Payload
+	 * Doesn't expect even single byte of payload. Payload
 	 * is expected to be copied directly to user buffers
-	 * due to DDP (Large Rx offload),
+	 * due to DDP (Large Rx offload) feature, hence
+	 * BUG_ON if BUF is non-NULL
 	 */
 	buf = fc_frame_payload_get(fp, 1);
-	if (buf)
-		pr_err("%s: xid 0x%x, f_ctl 0x%x, cmd->sg %p, "
-				"cmd->sg_cnt 0x%x. DDP was setup"
-				" hence not expected to receive frame with "
-				"payload, Frame will be dropped if "
-				"'Sequence Initiative' bit in f_ctl is "
-				"not set\n", __func__, ep->xid, f_ctl,
-				cmd->sg, cmd->sg_cnt);
-	/*
- 	 * Invalidate HW DDP context if it was setup for respective
- 	 * command. Invalidation of HW DDP context is requited in both
- 	 * situation (success and error). 
- 	 */
-	ft_invl_hw_context(cmd);
+	if (cmd->was_ddp_setup && buf) {
+		pr_debug("%s: When DDP was setup, not expected to"
+				 "receive frame with payload, Payload shall be"
+				 "copied directly to buffer instead of coming "
+				 "via. legacy receive queues\n", __func__);
+		BUG_ON(buf);
+	}
 
 	/*
-	 * If "Sequence Initiative (TSI)" bit set in f_ctl, means last
-	 * write data frame is received successfully where payload is
-	 * posted directly to user buffer and only the last frame's
-	 * header is posted in receive queue.
-	 *
-	 * If "Sequence Initiative (TSI)" bit is not set, means error
-	 * condition w.r.t. DDP, hence drop the packet and let explict
-	 * ABORTS from other end of exchange timer trigger the recovery.
+	 * If ft_cmd indicated 'ddp_setup', in that case only the last frame
+	 * should come with 'TSI bit being set'. If 'TSI bit is not set and if
+	 * data frame appears here, means error condition. In both the cases
+	 * release the DDP context (ddp_put) and in error case, as well
+	 * initiate error recovery mechanism.
 	 */
-	if (f_ctl & FC_FC_SEQ_INIT)
-		goto last_frame;
-	else
-		goto drop;
+	ep = fc_seq_exch(seq);
+	if (cmd->was_ddp_setup) {
+		BUG_ON(!ep);
+		lport = ep->lp;
+		BUG_ON(!lport);
+	}
+	if (cmd->was_ddp_setup && ep->xid != FC_XID_UNKNOWN) {
+		f_ctl = ntoh24(fh->fh_f_ctl);
+		/*
+		 * If TSI bit set in f_ctl, means last write data frame is
+		 * received successfully where payload is posted directly
+		 * to user buffer and only the last frame's header is posted
+		 * in legacy receive queue
+		 */
+		if (f_ctl & FC_FC_SEQ_INIT) { /* TSI bit set in FC frame */
+			cmd->write_data_len = lport->tt.ddp_done(lport,
+								ep->xid);
+			goto last_frame;
+		} else {
+			/*
+			 * Updating the write_data_len may be meaningless at
+			 * this point, but just in case if required in future
+			 * for debugging or any other purpose
+			 */
+			pr_err("%s: Received frame with TSI bit not"
+					" being SET, dropping the frame, "
+					"cmd->sg <%p>, cmd->sg_cnt <0x%x>\n",
+					__func__, cmd->sg, cmd->sg_cnt);
+			cmd->write_data_len = lport->tt.ddp_done(lport,
+							      ep->xid);
+			lport->tt.seq_exch_abort(cmd->seq, 0);
+			goto drop;
+		}
+	}
 
 	rel_off = ntohl(fh->fh_parm_offset);
 	frame_len = fr_len(fp);
@@ -318,39 +331,3 @@ void ft_recv_write_data(struct ft_cmd *cmd, struct fc_frame *fp)
 drop:
 	fc_frame_free(fp);
 }
-
-/*
- * Handle and cleanup any HW specific resources if
- * received ABORTS, errors, timeouts.
- */
-void ft_invl_hw_context(struct ft_cmd *cmd)
-{
-	struct fc_seq *seq = cmd->seq;
-	struct fc_exch *ep = NULL;
-	struct fc_lport *lport = NULL;
-
-	BUG_ON(!cmd);
-
-	/* Cleanup the DDP context in HW if DDP was setup */
-	if (cmd->was_ddp_setup && seq) {
-		ep = fc_seq_exch(seq);
-		if (ep) {
-			lport = ep->lp;
-			if (lport && (ep->xid <= lport->lro_xid))
-				/*
-				 * "ddp_done" trigger invalidation of HW
-				 * specific DDP context
-				 */
-				cmd->write_data_len = lport->tt.ddp_done(lport,
-								      ep->xid);
-
-				/*
-				 * Resetting same variable to indicate HW's
-				 * DDP context has been invalidated to avoid
-				 * re_invalidation of same context (context is
-				 * identified using ep->xid)
-				 */
-				cmd->was_ddp_setup = 0;
-		}
-	}
-}
diff --git a/trunk/drivers/thermal/Kconfig b/trunk/drivers/thermal/Kconfig
index f7f71b2d3101..bf7c687519ef 100644
--- a/trunk/drivers/thermal/Kconfig
+++ b/trunk/drivers/thermal/Kconfig
@@ -14,7 +14,11 @@ menuconfig THERMAL
 	  If you want this support, you should say Y or M here.
 
 config THERMAL_HWMON
-	bool
+	bool "Hardware monitoring support"
 	depends on THERMAL
 	depends on HWMON=y || HWMON=THERMAL
-	default y
+	help
+	  The generic thermal sysfs driver's hardware monitoring support
+	  requires a 2.10.7/3.0.2 or later lm-sensors userspace.
+
+	  Say Y if your user-space is new enough.
diff --git a/trunk/drivers/thermal/thermal_sys.c b/trunk/drivers/thermal/thermal_sys.c
index 708f8e92771a..0b1c82ad6805 100644
--- a/trunk/drivers/thermal/thermal_sys.c
+++ b/trunk/drivers/thermal/thermal_sys.c
@@ -420,29 +420,6 @@ thermal_cooling_device_trip_point_show(struct device *dev,
 
 /* hwmon sys I/F */
 #include <linux/hwmon.h>
-
-/* thermal zone devices with the same type share one hwmon device */
-struct thermal_hwmon_device {
-	char type[THERMAL_NAME_LENGTH];
-	struct device *device;
-	int count;
-	struct list_head tz_list;
-	struct list_head node;
-};
-
-struct thermal_hwmon_attr {
-	struct device_attribute attr;
-	char name[16];
-};
-
-/* one temperature input for each thermal zone */
-struct thermal_hwmon_temp {
-	struct list_head hwmon_node;
-	struct thermal_zone_device *tz;
-	struct thermal_hwmon_attr temp_input;	/* hwmon sys attr */
-	struct thermal_hwmon_attr temp_crit;	/* hwmon sys attr */
-};
-
 static LIST_HEAD(thermal_hwmon_list);
 
 static ssize_t
@@ -460,10 +437,9 @@ temp_input_show(struct device *dev, struct device_attribute *attr, char *buf)
 	int ret;
 	struct thermal_hwmon_attr *hwmon_attr
 			= container_of(attr, struct thermal_hwmon_attr, attr);
-	struct thermal_hwmon_temp *temp
-			= container_of(hwmon_attr, struct thermal_hwmon_temp,
+	struct thermal_zone_device *tz
+			= container_of(hwmon_attr, struct thermal_zone_device,
 				       temp_input);
-	struct thermal_zone_device *tz = temp->tz;
 
 	ret = tz->ops->get_temp(tz, &temperature);
 
@@ -479,10 +455,9 @@ temp_crit_show(struct device *dev, struct device_attribute *attr,
 {
 	struct thermal_hwmon_attr *hwmon_attr
 			= container_of(attr, struct thermal_hwmon_attr, attr);
-	struct thermal_hwmon_temp *temp
-			= container_of(hwmon_attr, struct thermal_hwmon_temp,
+	struct thermal_zone_device *tz
+			= container_of(hwmon_attr, struct thermal_zone_device,
 				       temp_crit);
-	struct thermal_zone_device *tz = temp->tz;
 	long temperature;
 	int ret;
 
@@ -494,54 +469,22 @@ temp_crit_show(struct device *dev, struct device_attribute *attr,
 }
 
 
-static struct thermal_hwmon_device *
-thermal_hwmon_lookup_by_type(const struct thermal_zone_device *tz)
+static int
+thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
 {
 	struct thermal_hwmon_device *hwmon;
+	int new_hwmon_device = 1;
+	int result;
 
 	mutex_lock(&thermal_list_lock);
 	list_for_each_entry(hwmon, &thermal_hwmon_list, node)
 		if (!strcmp(hwmon->type, tz->type)) {
+			new_hwmon_device = 0;
 			mutex_unlock(&thermal_list_lock);
-			return hwmon;
-		}
-	mutex_unlock(&thermal_list_lock);
-
-	return NULL;
-}
-
-/* Find the temperature input matching a given thermal zone */
-static struct thermal_hwmon_temp *
-thermal_hwmon_lookup_temp(const struct thermal_hwmon_device *hwmon,
-			  const struct thermal_zone_device *tz)
-{
-	struct thermal_hwmon_temp *temp;
-
-	mutex_lock(&thermal_list_lock);
-	list_for_each_entry(temp, &hwmon->tz_list, hwmon_node)
-		if (temp->tz == tz) {
-			mutex_unlock(&thermal_list_lock);
-			return temp;
+			goto register_sys_interface;
 		}
 	mutex_unlock(&thermal_list_lock);
 
-	return NULL;
-}
-
-static int
-thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
-{
-	struct thermal_hwmon_device *hwmon;
-	struct thermal_hwmon_temp *temp;
-	int new_hwmon_device = 1;
-	int result;
-
-	hwmon = thermal_hwmon_lookup_by_type(tz);
-	if (hwmon) {
-		new_hwmon_device = 0;
-		goto register_sys_interface;
-	}
-
 	hwmon = kzalloc(sizeof(struct thermal_hwmon_device), GFP_KERNEL);
 	if (!hwmon)
 		return -ENOMEM;
@@ -559,36 +502,30 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
 		goto free_mem;
 
  register_sys_interface:
-	temp = kzalloc(sizeof(struct thermal_hwmon_temp), GFP_KERNEL);
-	if (!temp) {
-		result = -ENOMEM;
-		goto unregister_name;
-	}
-
-	temp->tz = tz;
+	tz->hwmon = hwmon;
 	hwmon->count++;
 
-	snprintf(temp->temp_input.name, THERMAL_NAME_LENGTH,
+	snprintf(tz->temp_input.name, THERMAL_NAME_LENGTH,
 		 "temp%d_input", hwmon->count);
-	temp->temp_input.attr.attr.name = temp->temp_input.name;
-	temp->temp_input.attr.attr.mode = 0444;
-	temp->temp_input.attr.show = temp_input_show;
-	sysfs_attr_init(&temp->temp_input.attr.attr);
-	result = device_create_file(hwmon->device, &temp->temp_input.attr);
+	tz->temp_input.attr.attr.name = tz->temp_input.name;
+	tz->temp_input.attr.attr.mode = 0444;
+	tz->temp_input.attr.show = temp_input_show;
+	sysfs_attr_init(&tz->temp_input.attr.attr);
+	result = device_create_file(hwmon->device, &tz->temp_input.attr);
 	if (result)
-		goto free_temp_mem;
+		goto unregister_name;
 
 	if (tz->ops->get_crit_temp) {
 		unsigned long temperature;
 		if (!tz->ops->get_crit_temp(tz, &temperature)) {
-			snprintf(temp->temp_crit.name, THERMAL_NAME_LENGTH,
+			snprintf(tz->temp_crit.name, THERMAL_NAME_LENGTH,
 				"temp%d_crit", hwmon->count);
-			temp->temp_crit.attr.attr.name = temp->temp_crit.name;
-			temp->temp_crit.attr.attr.mode = 0444;
-			temp->temp_crit.attr.show = temp_crit_show;
-			sysfs_attr_init(&temp->temp_crit.attr.attr);
+			tz->temp_crit.attr.attr.name = tz->temp_crit.name;
+			tz->temp_crit.attr.attr.mode = 0444;
+			tz->temp_crit.attr.show = temp_crit_show;
+			sysfs_attr_init(&tz->temp_crit.attr.attr);
 			result = device_create_file(hwmon->device,
-						    &temp->temp_crit.attr);
+						    &tz->temp_crit.attr);
 			if (result)
 				goto unregister_input;
 		}
@@ -597,15 +534,13 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
 	mutex_lock(&thermal_list_lock);
 	if (new_hwmon_device)
 		list_add_tail(&hwmon->node, &thermal_hwmon_list);
-	list_add_tail(&temp->hwmon_node, &hwmon->tz_list);
+	list_add_tail(&tz->hwmon_node, &hwmon->tz_list);
 	mutex_unlock(&thermal_list_lock);
 
 	return 0;
 
  unregister_input:
-	device_remove_file(hwmon->device, &temp->temp_input.attr);
- free_temp_mem:
-	kfree(temp);
+	device_remove_file(hwmon->device, &tz->temp_input.attr);
  unregister_name:
 	if (new_hwmon_device) {
 		device_remove_file(hwmon->device, &dev_attr_name);
@@ -621,30 +556,15 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
 static void
 thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz)
 {
-	struct thermal_hwmon_device *hwmon;
-	struct thermal_hwmon_temp *temp;
-
-	hwmon = thermal_hwmon_lookup_by_type(tz);
-	if (unlikely(!hwmon)) {
-		/* Should never happen... */
-		dev_dbg(&tz->device, "hwmon device lookup failed!\n");
-		return;
-	}
-
-	temp = thermal_hwmon_lookup_temp(hwmon, tz);
-	if (unlikely(!temp)) {
-		/* Should never happen... */
-		dev_dbg(&tz->device, "temperature input lookup failed!\n");
-		return;
-	}
+	struct thermal_hwmon_device *hwmon = tz->hwmon;
 
-	device_remove_file(hwmon->device, &temp->temp_input.attr);
+	tz->hwmon = NULL;
+	device_remove_file(hwmon->device, &tz->temp_input.attr);
 	if (tz->ops->get_crit_temp)
-		device_remove_file(hwmon->device, &temp->temp_crit.attr);
+		device_remove_file(hwmon->device, &tz->temp_crit.attr);
 
 	mutex_lock(&thermal_list_lock);
-	list_del(&temp->hwmon_node);
-	kfree(temp);
+	list_del(&tz->hwmon_node);
 	if (!list_empty(&hwmon->tz_list)) {
 		mutex_unlock(&thermal_list_lock);
 		return;
diff --git a/trunk/drivers/tty/serial/imx.c b/trunk/drivers/tty/serial/imx.c
index 7e91b3d368cd..827db7654594 100644
--- a/trunk/drivers/tty/serial/imx.c
+++ b/trunk/drivers/tty/serial/imx.c
@@ -1286,17 +1286,22 @@ static int serial_imx_resume(struct platform_device *dev)
 static int serial_imx_probe_dt(struct imx_port *sport,
 		struct platform_device *pdev)
 {
-	static int portnum = 0;
 	struct device_node *np = pdev->dev.of_node;
 	const struct of_device_id *of_id =
 			of_match_device(imx_uart_dt_ids, &pdev->dev);
+	int ret;
 
 	if (!np)
 		return -ENODEV;
 
-	sport->port.line = portnum++;
-	if (sport->port.line >= UART_NR)
-		return -EINVAL;
+	ret = of_alias_get_id(np, "serial");
+	if (ret < 0) {
+		pr_err("%s: failed to get alias id, errno %d\n",
+			__func__, ret);
+		return -ENODEV;
+	} else {
+		sport->port.line = ret;
+	}
 
 	if (of_get_property(np, "fsl,uart-has-rtscts", NULL))
 		sport->have_rtscts = 1;
diff --git a/trunk/drivers/video/backlight/Kconfig b/trunk/drivers/video/backlight/Kconfig
index 278aeaa92505..69407e72aac1 100644
--- a/trunk/drivers/video/backlight/Kconfig
+++ b/trunk/drivers/video/backlight/Kconfig
@@ -336,7 +336,7 @@ config BACKLIGHT_PCF50633
 	  enable its driver.
 
 config BACKLIGHT_AAT2870
-	tristate "AnalogicTech AAT2870 Backlight"
+	bool "AnalogicTech AAT2870 Backlight"
 	depends on BACKLIGHT_CLASS_DEVICE && MFD_AAT2870_CORE
 	help
 	  If you have a AnalogicTech AAT2870 say Y to enable the
diff --git a/trunk/drivers/video/backlight/aat2870_bl.c b/trunk/drivers/video/backlight/aat2870_bl.c
index 331f1ef1dad5..4952a617563d 100644
--- a/trunk/drivers/video/backlight/aat2870_bl.c
+++ b/trunk/drivers/video/backlight/aat2870_bl.c
@@ -44,7 +44,7 @@ static inline int aat2870_brightness(struct aat2870_bl_driver_data *aat2870_bl,
 	struct backlight_device *bd = aat2870_bl->bd;
 	int val;
 
-	val = brightness * (aat2870_bl->max_current - 1);
+	val = brightness * aat2870_bl->max_current;
 	val /= bd->props.max_brightness;
 
 	return val;
@@ -158,10 +158,10 @@ static int aat2870_bl_probe(struct platform_device *pdev)
 	props.type = BACKLIGHT_RAW;
 	bd = backlight_device_register("aat2870-backlight", &pdev->dev,
 				       aat2870_bl, &aat2870_bl_ops, &props);
-	if (IS_ERR(bd)) {
+	if (!bd) {
 		dev_err(&pdev->dev,
 			"Failed allocate memory for backlight device\n");
-		ret = PTR_ERR(bd);
+		ret = -ENOMEM;
 		goto out_kfree;
 	}
 
@@ -175,7 +175,7 @@ static int aat2870_bl_probe(struct platform_device *pdev)
 	else
 		aat2870_bl->channels = AAT2870_BL_CH_ALL;
 
-	if (pdata->max_current > 0)
+	if (pdata->max_brightness > 0)
 		aat2870_bl->max_current = pdata->max_current;
 	else
 		aat2870_bl->max_current = AAT2870_CURRENT_27_9;
diff --git a/trunk/drivers/watchdog/Kconfig b/trunk/drivers/watchdog/Kconfig
index 86b0735e6aa0..f441726ddf2b 100644
--- a/trunk/drivers/watchdog/Kconfig
+++ b/trunk/drivers/watchdog/Kconfig
@@ -36,6 +36,9 @@ config WATCHDOG_CORE
 	  and gives them the /dev/watchdog interface (and later also the
 	  sysfs interface).
 
+	  To compile this driver as a module, choose M here: the module will
+	  be called watchdog.
+
 config WATCHDOG_NOWAYOUT
 	bool "Disable watchdog shutdown on close"
 	help
diff --git a/trunk/drivers/watchdog/nv_tco.c b/trunk/drivers/watchdog/nv_tco.c
index 809f41c30c44..afa78a54711e 100644
--- a/trunk/drivers/watchdog/nv_tco.c
+++ b/trunk/drivers/watchdog/nv_tco.c
@@ -458,15 +458,7 @@ static int __devexit nv_tco_remove(struct platform_device *dev)
 
 static void nv_tco_shutdown(struct platform_device *dev)
 {
-	u32 val;
-
 	tco_timer_stop();
-
-	/* Some BIOSes fail the POST (once) if the NO_REBOOT flag is not
-	 * unset during shutdown. */
-	pci_read_config_dword(tco_pci, MCP51_SMBUS_SETUP_B, &val);
-	val &= ~MCP51_SMBUS_SETUP_B_TCO_REBOOT;
-	pci_write_config_dword(tco_pci, MCP51_SMBUS_SETUP_B, val);
 }
 
 static struct platform_driver nv_tco_driver = {
diff --git a/trunk/drivers/watchdog/shwdt.c b/trunk/drivers/watchdog/shwdt.c
index a267dc078daf..db84f2322d1a 100644
--- a/trunk/drivers/watchdog/shwdt.c
+++ b/trunk/drivers/watchdog/shwdt.c
@@ -64,7 +64,7 @@
  * misses its deadline, the kernel timer will allow the WDT to overflow.
  */
 static int clock_division_ratio = WTCSR_CKS_4096;
-#define next_ping_period(cks)	(jiffies + msecs_to_jiffies(cks - 4))
+#define next_ping_period(cks)	msecs_to_jiffies(cks - 4)
 
 static const struct watchdog_info sh_wdt_info;
 static struct platform_device *sh_wdt_dev;
diff --git a/trunk/fs/Kconfig b/trunk/fs/Kconfig
index 9fe0b349f4cd..19891aab9c6e 100644
--- a/trunk/fs/Kconfig
+++ b/trunk/fs/Kconfig
@@ -127,21 +127,14 @@ config TMPFS_POSIX_ACL
 	select TMPFS_XATTR
 	select GENERIC_ACL
 	help
-	  POSIX Access Control Lists (ACLs) support additional access rights
-	  for users and groups beyond the standard owner/group/world scheme,
-	  and this option selects support for ACLs specifically for tmpfs
-	  filesystems.
-
-	  If you've selected TMPFS, it's possible that you'll also need
-	  this option as there are a number of Linux distros that require
-	  POSIX ACL support under /dev for certain features to work properly.
-	  For example, some distros need this feature for ALSA-related /dev
-	  files for sound to work properly.  In short, if you're not sure,
-	  say Y.
+	  POSIX Access Control Lists (ACLs) support permissions for users and
+	  groups beyond the owner/group/world scheme.
 
 	  To learn more about Access Control Lists, visit the POSIX ACLs for
 	  Linux website <http://acl.bestbits.at/>.
 
+	  If you don't know what Access Control Lists are, say N.
+
 config TMPFS_XATTR
 	bool "Tmpfs extended attributes"
 	depends on TMPFS
diff --git a/trunk/fs/block_dev.c b/trunk/fs/block_dev.c
index f28680553288..ff77262e887c 100644
--- a/trunk/fs/block_dev.c
+++ b/trunk/fs/block_dev.c
@@ -387,6 +387,10 @@ int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
 	struct inode *bd_inode = filp->f_mapping->host;
 	struct block_device *bdev = I_BDEV(bd_inode);
 	int error;
+	
+	error = filemap_write_and_wait_range(filp->f_mapping, start, end);
+	if (error)
+		return error;
 
 	/*
 	 * There is no need to serialise calls to blkdev_issue_flush with
diff --git a/trunk/fs/btrfs/Makefile b/trunk/fs/btrfs/Makefile
index 40e6ac08c21f..9b72dcf1cd25 100644
--- a/trunk/fs/btrfs/Makefile
+++ b/trunk/fs/btrfs/Makefile
@@ -6,7 +6,5 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
 	   transaction.o inode.o file.o tree-defrag.o \
 	   extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
 	   extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
-	   export.o tree-log.o free-space-cache.o zlib.o lzo.o \
+	   export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \
 	   compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o
-
-btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
diff --git a/trunk/fs/btrfs/acl.c b/trunk/fs/btrfs/acl.c
index eb159aaa5a11..4cc5c0164ed6 100644
--- a/trunk/fs/btrfs/acl.c
+++ b/trunk/fs/btrfs/acl.c
@@ -28,6 +28,8 @@
 #include "btrfs_inode.h"
 #include "xattr.h"
 
+#ifdef CONFIG_BTRFS_FS_POSIX_ACL
+
 struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
 {
 	int size;
@@ -274,3 +276,18 @@ const struct xattr_handler btrfs_xattr_acl_access_handler = {
 	.get	= btrfs_xattr_acl_get,
 	.set	= btrfs_xattr_acl_set,
 };
+
+#else /* CONFIG_BTRFS_FS_POSIX_ACL */
+
+int btrfs_acl_chmod(struct inode *inode)
+{
+	return 0;
+}
+
+int btrfs_init_acl(struct btrfs_trans_handle *trans,
+		   struct inode *inode, struct inode *dir)
+{
+	return 0;
+}
+
+#endif /* CONFIG_BTRFS_FS_POSIX_ACL */
diff --git a/trunk/fs/btrfs/compression.c b/trunk/fs/btrfs/compression.c
index 8ec5d86f1734..bfe42b03eaf9 100644
--- a/trunk/fs/btrfs/compression.c
+++ b/trunk/fs/btrfs/compression.c
@@ -338,7 +338,6 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 	u64 first_byte = disk_start;
 	struct block_device *bdev;
 	int ret;
-	int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
 	WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1));
 	cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
@@ -393,11 +392,8 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 			ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
 			BUG_ON(ret);
 
-			if (!skip_sum) {
-				ret = btrfs_csum_one_bio(root, inode, bio,
-							 start, 1);
-				BUG_ON(ret);
-			}
+			ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
+			BUG_ON(ret);
 
 			ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
 			BUG_ON(ret);
@@ -422,10 +418,8 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 	ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
 	BUG_ON(ret);
 
-	if (!skip_sum) {
-		ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
-		BUG_ON(ret);
-	}
+	ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
+	BUG_ON(ret);
 
 	ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
 	BUG_ON(ret);
diff --git a/trunk/fs/btrfs/ctree.h b/trunk/fs/btrfs/ctree.h
index 0469263e327e..365c4e1dde04 100644
--- a/trunk/fs/btrfs/ctree.h
+++ b/trunk/fs/btrfs/ctree.h
@@ -2406,8 +2406,8 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
 			 btrfs_root_item *item, struct btrfs_key *key);
 int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
 int btrfs_find_orphan_roots(struct btrfs_root *tree_root);
-void btrfs_set_root_node(struct btrfs_root_item *item,
-			 struct extent_buffer *node);
+int btrfs_set_root_node(struct btrfs_root_item *item,
+			struct extent_buffer *node);
 void btrfs_check_and_init_root_item(struct btrfs_root_item *item);
 
 /* dir-item.c */
@@ -2523,14 +2523,6 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
 #define PageChecked PageFsMisc
 #endif
 
-/* This forces readahead on a given range of bytes in an inode */
-static inline void btrfs_force_ra(struct address_space *mapping,
-				  struct file_ra_state *ra, struct file *file,
-				  pgoff_t offset, unsigned long req_size)
-{
-	page_cache_sync_readahead(mapping, ra, file, offset, req_size);
-}
-
 struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
 int btrfs_set_inode_index(struct inode *dir, u64 *index);
 int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
@@ -2559,6 +2551,9 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
 int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
 			 size_t size, struct bio *bio, unsigned long bio_flags);
 
+unsigned long btrfs_force_ra(struct address_space *mapping,
+			      struct file_ra_state *ra, struct file *file,
+			      pgoff_t offset, pgoff_t last_index);
 int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 int btrfs_readpage(struct file *file, struct page *page);
 void btrfs_evict_inode(struct inode *inode);
@@ -2653,21 +2648,12 @@ do {								\
 /* acl.c */
 #ifdef CONFIG_BTRFS_FS_POSIX_ACL
 struct posix_acl *btrfs_get_acl(struct inode *inode, int type);
-int btrfs_init_acl(struct btrfs_trans_handle *trans,
-		   struct inode *inode, struct inode *dir);
-int btrfs_acl_chmod(struct inode *inode);
 #else
 #define btrfs_get_acl NULL
-static inline int btrfs_init_acl(struct btrfs_trans_handle *trans,
-				 struct inode *inode, struct inode *dir)
-{
-	return 0;
-}
-static inline int btrfs_acl_chmod(struct inode *inode)
-{
-	return 0;
-}
 #endif
+int btrfs_init_acl(struct btrfs_trans_handle *trans,
+		   struct inode *inode, struct inode *dir);
+int btrfs_acl_chmod(struct inode *inode);
 
 /* relocation.c */
 int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start);
diff --git a/trunk/fs/btrfs/dir-item.c b/trunk/fs/btrfs/dir-item.c
index 31d84e78129b..c360a848d97f 100644
--- a/trunk/fs/btrfs/dir-item.c
+++ b/trunk/fs/btrfs/dir-item.c
@@ -198,6 +198,8 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
 	struct btrfs_key key;
 	int ins_len = mod < 0 ? -1 : 0;
 	int cow = mod != 0;
+	struct btrfs_key found_key;
+	struct extent_buffer *leaf;
 
 	key.objectid = dir;
 	btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
@@ -207,7 +209,18 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
 	ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
 	if (ret < 0)
 		return ERR_PTR(ret);
-	if (ret > 0)
+	if (ret > 0) {
+		if (path->slots[0] == 0)
+			return NULL;
+		path->slots[0]--;
+	}
+
+	leaf = path->nodes[0];
+	btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+
+	if (found_key.objectid != dir ||
+	    btrfs_key_type(&found_key) != BTRFS_DIR_ITEM_KEY ||
+	    found_key.offset != key.offset)
 		return NULL;
 
 	return btrfs_match_dir_item_name(root, path, name, name_len);
@@ -302,6 +315,8 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
 	struct btrfs_key key;
 	int ins_len = mod < 0 ? -1 : 0;
 	int cow = mod != 0;
+	struct btrfs_key found_key;
+	struct extent_buffer *leaf;
 
 	key.objectid = dir;
 	btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
@@ -309,7 +324,18 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
 	ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
 	if (ret < 0)
 		return ERR_PTR(ret);
-	if (ret > 0)
+	if (ret > 0) {
+		if (path->slots[0] == 0)
+			return NULL;
+		path->slots[0]--;
+	}
+
+	leaf = path->nodes[0];
+	btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+
+	if (found_key.objectid != dir ||
+	    btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY ||
+	    found_key.offset != key.offset)
 		return NULL;
 
 	return btrfs_match_dir_item_name(root, path, name, name_len);
diff --git a/trunk/fs/btrfs/extent-tree.c b/trunk/fs/btrfs/extent-tree.c
index 66bac226944e..4d08ed79405d 100644
--- a/trunk/fs/btrfs/extent-tree.c
+++ b/trunk/fs/btrfs/extent-tree.c
@@ -663,9 +663,7 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
 	struct btrfs_path *path;
 
 	path = btrfs_alloc_path();
-	if (!path)
-		return -ENOMEM;
-
+	BUG_ON(!path);
 	key.objectid = start;
 	key.offset = len;
 	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
@@ -3274,9 +3272,6 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 	}
 
 	ret = btrfs_alloc_chunk(trans, extent_root, flags);
-	if (ret < 0 && ret != -ENOSPC)
-		goto out;
-
 	spin_lock(&space_info->lock);
 	if (ret)
 		space_info->full = 1;
@@ -3286,7 +3281,6 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 	space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
 	space_info->chunk_alloc = 0;
 	spin_unlock(&space_info->lock);
-out:
 	mutex_unlock(&extent_root->fs_info->chunk_mutex);
 	return ret;
 }
@@ -4462,9 +4456,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 				printk(KERN_ERR "umm, got %d back from search"
 				       ", was looking for %llu\n", ret,
 				       (unsigned long long)bytenr);
-				if (ret > 0)
-					btrfs_print_leaf(extent_root,
-							 path->nodes[0]);
+				btrfs_print_leaf(extent_root, path->nodes[0]);
 			}
 			BUG_ON(ret);
 			extent_slot = path->slots[0];
@@ -5081,9 +5073,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
 			 * group is does point to and try again
 			 */
 			if (!last_ptr_loop && last_ptr->block_group &&
-			    last_ptr->block_group != block_group &&
-			    index <=
-				 get_block_group_index(last_ptr->block_group)) {
+			    last_ptr->block_group != block_group) {
 
 				btrfs_put_block_group(block_group);
 				block_group = last_ptr->block_group;
@@ -5511,8 +5501,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
 	u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref);
 
 	path = btrfs_alloc_path();
-	if (!path)
-		return -ENOMEM;
+	BUG_ON(!path);
 
 	path->leave_spinning = 1;
 	ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
@@ -6283,14 +6272,10 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
 	int level;
 
 	path = btrfs_alloc_path();
-	if (!path)
-		return -ENOMEM;
+	BUG_ON(!path);
 
 	wc = kzalloc(sizeof(*wc), GFP_NOFS);
-	if (!wc) {
-		btrfs_free_path(path);
-		return -ENOMEM;
-	}
+	BUG_ON(!wc);
 
 	trans = btrfs_start_transaction(tree_root, 0);
 	BUG_ON(IS_ERR(trans));
@@ -6553,6 +6538,8 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
 	u64 min_allocable_bytes;
 	int ret = -ENOSPC;
 
+	if (cache->ro)
+		return 0;
 
 	/*
 	 * We need some metadata space and system metadata space for
@@ -6568,12 +6555,6 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
 
 	spin_lock(&sinfo->lock);
 	spin_lock(&cache->lock);
-
-	if (cache->ro) {
-		ret = 0;
-		goto out;
-	}
-
 	num_bytes = cache->key.offset - cache->reserved - cache->pinned -
 		    cache->bytes_super - btrfs_block_group_used(&cache->item);
 
@@ -6587,7 +6568,7 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
 		cache->ro = 1;
 		ret = 0;
 	}
-out:
+
 	spin_unlock(&cache->lock);
 	spin_unlock(&sinfo->lock);
 	return ret;
@@ -7202,15 +7183,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 	spin_unlock(&cluster->refill_lock);
 
 	path = btrfs_alloc_path();
-	if (!path) {
-		ret = -ENOMEM;
-		goto out;
-	}
+	BUG_ON(!path);
 
 	inode = lookup_free_space_inode(root, block_group, path);
 	if (!IS_ERR(inode)) {
-		ret = btrfs_orphan_add(trans, inode);
-		BUG_ON(ret);
+		btrfs_orphan_add(trans, inode);
 		clear_nlink(inode);
 		/* One for the block groups ref */
 		spin_lock(&block_group->lock);
diff --git a/trunk/fs/btrfs/extent_io.c b/trunk/fs/btrfs/extent_io.c
index d418164a35f1..067b1747421b 100644
--- a/trunk/fs/btrfs/extent_io.c
+++ b/trunk/fs/btrfs/extent_io.c
@@ -254,14 +254,14 @@ static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
  *
  * This should be called with the tree lock held.
  */
-static void merge_state(struct extent_io_tree *tree,
-		        struct extent_state *state)
+static int merge_state(struct extent_io_tree *tree,
+		       struct extent_state *state)
 {
 	struct extent_state *other;
 	struct rb_node *other_node;
 
 	if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
-		return;
+		return 0;
 
 	other_node = rb_prev(&state->rb_node);
 	if (other_node) {
@@ -287,13 +287,19 @@ static void merge_state(struct extent_io_tree *tree,
 			free_extent_state(other);
 		}
 	}
+
+	return 0;
 }
 
-static void set_state_cb(struct extent_io_tree *tree,
+static int set_state_cb(struct extent_io_tree *tree,
 			 struct extent_state *state, int *bits)
 {
-	if (tree->ops && tree->ops->set_bit_hook)
-		tree->ops->set_bit_hook(tree->mapping->host, state, bits);
+	if (tree->ops && tree->ops->set_bit_hook) {
+		return tree->ops->set_bit_hook(tree->mapping->host,
+					       state, bits);
+	}
+
+	return 0;
 }
 
 static void clear_state_cb(struct extent_io_tree *tree,
@@ -303,9 +309,6 @@ static void clear_state_cb(struct extent_io_tree *tree,
 		tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
 }
 
-static void set_state_bits(struct extent_io_tree *tree,
-			   struct extent_state *state, int *bits);
-
 /*
  * insert an extent_state struct into the tree.  'bits' are set on the
  * struct before it is inserted.
@@ -321,6 +324,8 @@ static int insert_state(struct extent_io_tree *tree,
 			int *bits)
 {
 	struct rb_node *node;
+	int bits_to_set = *bits & ~EXTENT_CTLBITS;
+	int ret;
 
 	if (end < start) {
 		printk(KERN_ERR "btrfs end < start %llu %llu\n",
@@ -330,9 +335,13 @@ static int insert_state(struct extent_io_tree *tree,
 	}
 	state->start = start;
 	state->end = end;
+	ret = set_state_cb(tree, state, bits);
+	if (ret)
+		return ret;
 
-	set_state_bits(tree, state, bits);
-
+	if (bits_to_set & EXTENT_DIRTY)
+		tree->dirty_bytes += end - start + 1;
+	state->state |= bits_to_set;
 	node = tree_insert(&tree->state, end, &state->rb_node);
 	if (node) {
 		struct extent_state *found;
@@ -348,11 +357,13 @@ static int insert_state(struct extent_io_tree *tree,
 	return 0;
 }
 
-static void split_cb(struct extent_io_tree *tree, struct extent_state *orig,
+static int split_cb(struct extent_io_tree *tree, struct extent_state *orig,
 		     u64 split)
 {
 	if (tree->ops && tree->ops->split_extent_hook)
-		tree->ops->split_extent_hook(tree->mapping->host, orig, split);
+		return tree->ops->split_extent_hook(tree->mapping->host,
+						    orig, split);
+	return 0;
 }
 
 /*
@@ -648,25 +659,34 @@ int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
 		if (start > end)
 			break;
 
-		cond_resched_lock(&tree->lock);
+		if (need_resched()) {
+			spin_unlock(&tree->lock);
+			cond_resched();
+			spin_lock(&tree->lock);
+		}
 	}
 out:
 	spin_unlock(&tree->lock);
 	return 0;
 }
 
-static void set_state_bits(struct extent_io_tree *tree,
+static int set_state_bits(struct extent_io_tree *tree,
 			   struct extent_state *state,
 			   int *bits)
 {
+	int ret;
 	int bits_to_set = *bits & ~EXTENT_CTLBITS;
 
-	set_state_cb(tree, state, bits);
+	ret = set_state_cb(tree, state, bits);
+	if (ret)
+		return ret;
 	if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
 		u64 range = state->end - state->start + 1;
 		tree->dirty_bytes += range;
 	}
 	state->state |= bits_to_set;
+
+	return 0;
 }
 
 static void cache_state(struct extent_state *state,
@@ -759,7 +779,9 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
 			goto out;
 		}
 
-		set_state_bits(tree, state, &bits);
+		err = set_state_bits(tree, state, &bits);
+		if (err)
+			goto out;
 
 		cache_state(state, cached_state);
 		merge_state(tree, state);
@@ -808,7 +830,9 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
 		if (err)
 			goto out;
 		if (state->end <= end) {
-			set_state_bits(tree, state, &bits);
+			err = set_state_bits(tree, state, &bits);
+			if (err)
+				goto out;
 			cache_state(state, cached_state);
 			merge_state(tree, state);
 			if (last_end == (u64)-1)
@@ -869,7 +893,11 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
 		err = split_state(tree, state, prealloc, end + 1);
 		BUG_ON(err == -EEXIST);
 
-		set_state_bits(tree, prealloc, &bits);
+		err = set_state_bits(tree, prealloc, &bits);
+		if (err) {
+			prealloc = NULL;
+			goto out;
+		}
 		cache_state(prealloc, cached_state);
 		merge_state(tree, prealloc);
 		prealloc = NULL;
@@ -1031,6 +1059,46 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
 	return 0;
 }
 
+/*
+ * find the first offset in the io tree with 'bits' set. zero is
+ * returned if we find something, and *start_ret and *end_ret are
+ * set to reflect the state struct that was found.
+ *
+ * If nothing was found, 1 is returned, < 0 on error
+ */
+int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
+			  u64 *start_ret, u64 *end_ret, int bits)
+{
+	struct rb_node *node;
+	struct extent_state *state;
+	int ret = 1;
+
+	spin_lock(&tree->lock);
+	/*
+	 * this search will find all the extents that end after
+	 * our range starts.
+	 */
+	node = tree_search(tree, start);
+	if (!node)
+		goto out;
+
+	while (1) {
+		state = rb_entry(node, struct extent_state, rb_node);
+		if (state->end >= start && (state->state & bits)) {
+			*start_ret = state->start;
+			*end_ret = state->end;
+			ret = 0;
+			break;
+		}
+		node = rb_next(node);
+		if (!node)
+			break;
+	}
+out:
+	spin_unlock(&tree->lock);
+	return ret;
+}
+
 /* find the first state struct with 'bits' set after 'start', and
  * return it.  tree->lock must be held.  NULL will returned if
  * nothing was found after 'start'
@@ -1062,30 +1130,6 @@ struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
 	return NULL;
 }
 
-/*
- * find the first offset in the io tree with 'bits' set. zero is
- * returned if we find something, and *start_ret and *end_ret are
- * set to reflect the state struct that was found.
- *
- * If nothing was found, 1 is returned, < 0 on error
- */
-int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
-			  u64 *start_ret, u64 *end_ret, int bits)
-{
-	struct extent_state *state;
-	int ret = 1;
-
-	spin_lock(&tree->lock);
-	state = find_first_extent_bit_state(tree, start, bits);
-	if (state) {
-		*start_ret = state->start;
-		*end_ret = state->end;
-		ret = 0;
-	}
-	spin_unlock(&tree->lock);
-	return ret;
-}
-
 /*
  * find a contiguous range of bytes in the file marked as delalloc, not
  * more than 'max_bytes'.  start and end are used to return the range,
@@ -2502,6 +2546,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
 			  struct writeback_control *wbc)
 {
 	int ret;
+	struct address_space *mapping = page->mapping;
 	struct extent_page_data epd = {
 		.bio = NULL,
 		.tree = tree,
@@ -2509,9 +2554,17 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
 		.extent_locked = 0,
 		.sync_io = wbc->sync_mode == WB_SYNC_ALL,
 	};
+	struct writeback_control wbc_writepages = {
+		.sync_mode	= wbc->sync_mode,
+		.nr_to_write	= 64,
+		.range_start	= page_offset(page) + PAGE_CACHE_SIZE,
+		.range_end	= (loff_t)-1,
+	};
 
 	ret = __extent_writepage(page, wbc, &epd);
 
+	extent_write_cache_pages(tree, mapping, &wbc_writepages,
+				 __extent_writepage, &epd, flush_write_bio);
 	flush_epd_write_bio(&epd);
 	return ret;
 }
diff --git a/trunk/fs/btrfs/extent_io.h b/trunk/fs/btrfs/extent_io.h
index 7b2f0c3e7929..21a7ca9e7282 100644
--- a/trunk/fs/btrfs/extent_io.h
+++ b/trunk/fs/btrfs/extent_io.h
@@ -76,15 +76,15 @@ struct extent_io_ops {
 				    struct extent_state *state);
 	int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
 				      struct extent_state *state, int uptodate);
-	void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
-			     int *bits);
-	void (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
-			       int *bits);
-	void (*merge_extent_hook)(struct inode *inode,
-				  struct extent_state *new,
-				  struct extent_state *other);
-	void (*split_extent_hook)(struct inode *inode,
-				  struct extent_state *orig, u64 split);
+	int (*set_bit_hook)(struct inode *inode, struct extent_state *state,
+			    int *bits);
+	int (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
+			      int *bits);
+	int (*merge_extent_hook)(struct inode *inode,
+				 struct extent_state *new,
+				 struct extent_state *other);
+	int (*split_extent_hook)(struct inode *inode,
+				 struct extent_state *orig, u64 split);
 	int (*write_cache_pages_lock_hook)(struct page *page);
 };
 
@@ -108,6 +108,8 @@ struct extent_state {
 	wait_queue_head_t wq;
 	atomic_t refs;
 	unsigned long state;
+	u64 split_start;
+	u64 split_end;
 
 	/* for use by the FS */
 	u64 private;
diff --git a/trunk/fs/btrfs/extent_map.c b/trunk/fs/btrfs/extent_map.c
index 7c97b3301459..2d0410344ea3 100644
--- a/trunk/fs/btrfs/extent_map.c
+++ b/trunk/fs/btrfs/extent_map.c
@@ -183,10 +183,22 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
 	return 0;
 }
 
-static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
+int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len)
 {
+	int ret = 0;
 	struct extent_map *merge = NULL;
 	struct rb_node *rb;
+	struct extent_map *em;
+
+	write_lock(&tree->lock);
+	em = lookup_extent_mapping(tree, start, len);
+
+	WARN_ON(!em || em->start != start);
+
+	if (!em)
+		goto out;
+
+	clear_bit(EXTENT_FLAG_PINNED, &em->flags);
 
 	if (em->start != 0) {
 		rb = rb_prev(&em->rb_node);
@@ -213,24 +225,6 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
 		merge->in_tree = 0;
 		free_extent_map(merge);
 	}
-}
-
-int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len)
-{
-	int ret = 0;
-	struct extent_map *em;
-
-	write_lock(&tree->lock);
-	em = lookup_extent_mapping(tree, start, len);
-
-	WARN_ON(!em || em->start != start);
-
-	if (!em)
-		goto out;
-
-	clear_bit(EXTENT_FLAG_PINNED, &em->flags);
-
-	try_merge_map(tree, em);
 
 	free_extent_map(em);
 out:
@@ -253,6 +247,7 @@ int add_extent_mapping(struct extent_map_tree *tree,
 		       struct extent_map *em)
 {
 	int ret = 0;
+	struct extent_map *merge = NULL;
 	struct rb_node *rb;
 	struct extent_map *exist;
 
@@ -268,8 +263,30 @@ int add_extent_mapping(struct extent_map_tree *tree,
 		goto out;
 	}
 	atomic_inc(&em->refs);
-
-	try_merge_map(tree, em);
+	if (em->start != 0) {
+		rb = rb_prev(&em->rb_node);
+		if (rb)
+			merge = rb_entry(rb, struct extent_map, rb_node);
+		if (rb && mergable_maps(merge, em)) {
+			em->start = merge->start;
+			em->len += merge->len;
+			em->block_len += merge->block_len;
+			em->block_start = merge->block_start;
+			merge->in_tree = 0;
+			rb_erase(&merge->rb_node, &tree->map);
+			free_extent_map(merge);
+		}
+	 }
+	rb = rb_next(&em->rb_node);
+	if (rb)
+		merge = rb_entry(rb, struct extent_map, rb_node);
+	if (rb && mergable_maps(em, merge)) {
+		em->len += merge->len;
+		em->block_len += merge->len;
+		rb_erase(&merge->rb_node, &tree->map);
+		merge->in_tree = 0;
+		free_extent_map(merge);
+	}
 out:
 	return ret;
 }
@@ -282,8 +299,19 @@ static u64 range_end(u64 start, u64 len)
 	return start + len;
 }
 
-struct extent_map *__lookup_extent_mapping(struct extent_map_tree *tree,
-					   u64 start, u64 len, int strict)
+/**
+ * lookup_extent_mapping - lookup extent_map
+ * @tree:	tree to lookup in
+ * @start:	byte offset to start the search
+ * @len:	length of the lookup range
+ *
+ * Find and return the first extent_map struct in @tree that intersects the
+ * [start, len] range.  There may be additional objects in the tree that
+ * intersect, so check the object returned carefully to make sure that no
+ * additional lookups are needed.
+ */
+struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
+					 u64 start, u64 len)
 {
 	struct extent_map *em;
 	struct rb_node *rb_node;
@@ -292,41 +320,37 @@ struct extent_map *__lookup_extent_mapping(struct extent_map_tree *tree,
 	u64 end = range_end(start, len);
 
 	rb_node = __tree_search(&tree->map, start, &prev, &next);
+	if (!rb_node && prev) {
+		em = rb_entry(prev, struct extent_map, rb_node);
+		if (end > em->start && start < extent_map_end(em))
+			goto found;
+	}
+	if (!rb_node && next) {
+		em = rb_entry(next, struct extent_map, rb_node);
+		if (end > em->start && start < extent_map_end(em))
+			goto found;
+	}
 	if (!rb_node) {
-		if (prev)
-			rb_node = prev;
-		else if (next)
-			rb_node = next;
-		else
-			return NULL;
+		em = NULL;
+		goto out;
+	}
+	if (IS_ERR(rb_node)) {
+		em = ERR_CAST(rb_node);
+		goto out;
 	}
-
 	em = rb_entry(rb_node, struct extent_map, rb_node);
+	if (end > em->start && start < extent_map_end(em))
+		goto found;
 
-	if (strict && !(end > em->start && start < extent_map_end(em)))
-		return NULL;
+	em = NULL;
+	goto out;
 
+found:
 	atomic_inc(&em->refs);
+out:
 	return em;
 }
 
-/**
- * lookup_extent_mapping - lookup extent_map
- * @tree:	tree to lookup in
- * @start:	byte offset to start the search
- * @len:	length of the lookup range
- *
- * Find and return the first extent_map struct in @tree that intersects the
- * [start, len] range.  There may be additional objects in the tree that
- * intersect, so check the object returned carefully to make sure that no
- * additional lookups are needed.
- */
-struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
-					 u64 start, u64 len)
-{
-	return __lookup_extent_mapping(tree, start, len, 1);
-}
-
 /**
  * search_extent_mapping - find a nearby extent map
  * @tree:	tree to lookup in
@@ -341,7 +365,38 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
 struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
 					 u64 start, u64 len)
 {
-	return __lookup_extent_mapping(tree, start, len, 0);
+	struct extent_map *em;
+	struct rb_node *rb_node;
+	struct rb_node *prev = NULL;
+	struct rb_node *next = NULL;
+
+	rb_node = __tree_search(&tree->map, start, &prev, &next);
+	if (!rb_node && prev) {
+		em = rb_entry(prev, struct extent_map, rb_node);
+		goto found;
+	}
+	if (!rb_node && next) {
+		em = rb_entry(next, struct extent_map, rb_node);
+		goto found;
+	}
+	if (!rb_node) {
+		em = NULL;
+		goto out;
+	}
+	if (IS_ERR(rb_node)) {
+		em = ERR_CAST(rb_node);
+		goto out;
+	}
+	em = rb_entry(rb_node, struct extent_map, rb_node);
+	goto found;
+
+	em = NULL;
+	goto out;
+
+found:
+	atomic_inc(&em->refs);
+out:
+	return em;
 }
 
 /**
diff --git a/trunk/fs/btrfs/file-item.c b/trunk/fs/btrfs/file-item.c
index b910694f61ed..08bcfa92a222 100644
--- a/trunk/fs/btrfs/file-item.c
+++ b/trunk/fs/btrfs/file-item.c
@@ -291,8 +291,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
 	u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy);
 
 	path = btrfs_alloc_path();
-	if (!path)
-		return -ENOMEM;
+	BUG_ON(!path);
 
 	if (search_commit) {
 		path->skip_locking = 1;
@@ -678,9 +677,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
 		btrfs_super_csum_size(&root->fs_info->super_copy);
 
 	path = btrfs_alloc_path();
-	if (!path)
-		return -ENOMEM;
-
+	BUG_ON(!path);
 	sector_sum = sums->sums;
 again:
 	next_offset = (u64)-1;
diff --git a/trunk/fs/btrfs/file.c b/trunk/fs/btrfs/file.c
index 658d66959abe..a35e51c9f235 100644
--- a/trunk/fs/btrfs/file.c
+++ b/trunk/fs/btrfs/file.c
@@ -74,7 +74,7 @@ struct inode_defrag {
  * If an existing record is found the defrag item you
  * pass in is freed
  */
-static void __btrfs_add_inode_defrag(struct inode *inode,
+static int __btrfs_add_inode_defrag(struct inode *inode,
 				    struct inode_defrag *defrag)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -106,11 +106,11 @@ static void __btrfs_add_inode_defrag(struct inode *inode,
 	BTRFS_I(inode)->in_defrag = 1;
 	rb_link_node(&defrag->rb_node, parent, p);
 	rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
-	return;
+	return 0;
 
 exists:
 	kfree(defrag);
-	return;
+	return 0;
 
 }
 
@@ -123,6 +123,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct inode_defrag *defrag;
+	int ret = 0;
 	u64 transid;
 
 	if (!btrfs_test_opt(root, AUTO_DEFRAG))
@@ -149,9 +150,9 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
 
 	spin_lock(&root->fs_info->defrag_inodes_lock);
 	if (!BTRFS_I(inode)->in_defrag)
-		__btrfs_add_inode_defrag(inode, defrag);
+		ret = __btrfs_add_inode_defrag(inode, defrag);
 	spin_unlock(&root->fs_info->defrag_inodes_lock);
-	return 0;
+	return ret;
 }
 
 /*
@@ -854,8 +855,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
 	btrfs_drop_extent_cache(inode, start, end - 1, 0);
 
 	path = btrfs_alloc_path();
-	if (!path)
-		return -ENOMEM;
+	BUG_ON(!path);
 again:
 	recow = 0;
 	split = start;
@@ -1059,7 +1059,7 @@ static int prepare_uptodate_page(struct page *page, u64 pos)
 static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
 			 struct page **pages, size_t num_pages,
 			 loff_t pos, unsigned long first_index,
-			 size_t write_bytes)
+			 unsigned long last_index, size_t write_bytes)
 {
 	struct extent_state *cached_state = NULL;
 	int i;
@@ -1159,6 +1159,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct page **pages = NULL;
 	unsigned long first_index;
+	unsigned long last_index;
 	size_t num_written = 0;
 	int nrptrs;
 	int ret = 0;
@@ -1171,6 +1172,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 		return -ENOMEM;
 
 	first_index = pos >> PAGE_CACHE_SHIFT;
+	last_index = (pos + iov_iter_count(i)) >> PAGE_CACHE_SHIFT;
 
 	while (iov_iter_count(i) > 0) {
 		size_t offset = pos & (PAGE_CACHE_SIZE - 1);
@@ -1204,7 +1206,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 		 * contents of pages from loop to loop
 		 */
 		ret = prepare_pages(root, file, pages, num_pages,
-				    pos, first_index, write_bytes);
+				    pos, first_index, last_index,
+				    write_bytes);
 		if (ret) {
 			btrfs_delalloc_release_space(inode,
 					num_pages << PAGE_CACHE_SHIFT);
diff --git a/trunk/fs/btrfs/inode.c b/trunk/fs/btrfs/inode.c
index 15fceefbca0a..ae762dab37f8 100644
--- a/trunk/fs/btrfs/inode.c
+++ b/trunk/fs/btrfs/inode.c
@@ -1061,8 +1061,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
 	u64 ino = btrfs_ino(inode);
 
 	path = btrfs_alloc_path();
-	if (!path)
-		return -ENOMEM;
+	BUG_ON(!path);
 
 	nolock = btrfs_is_free_space_inode(root, inode);
 
@@ -1283,16 +1282,17 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
 	return ret;
 }
 
-static void btrfs_split_extent_hook(struct inode *inode,
-				    struct extent_state *orig, u64 split)
+static int btrfs_split_extent_hook(struct inode *inode,
+				   struct extent_state *orig, u64 split)
 {
 	/* not delalloc, ignore it */
 	if (!(orig->state & EXTENT_DELALLOC))
-		return;
+		return 0;
 
 	spin_lock(&BTRFS_I(inode)->lock);
 	BTRFS_I(inode)->outstanding_extents++;
 	spin_unlock(&BTRFS_I(inode)->lock);
+	return 0;
 }
 
 /*
@@ -1301,17 +1301,18 @@ static void btrfs_split_extent_hook(struct inode *inode,
  * extents, such as when we are doing sequential writes, so we can properly
  * account for the metadata space we'll need.
  */
-static void btrfs_merge_extent_hook(struct inode *inode,
-				    struct extent_state *new,
-				    struct extent_state *other)
+static int btrfs_merge_extent_hook(struct inode *inode,
+				   struct extent_state *new,
+				   struct extent_state *other)
 {
 	/* not delalloc, ignore it */
 	if (!(other->state & EXTENT_DELALLOC))
-		return;
+		return 0;
 
 	spin_lock(&BTRFS_I(inode)->lock);
 	BTRFS_I(inode)->outstanding_extents--;
 	spin_unlock(&BTRFS_I(inode)->lock);
+	return 0;
 }
 
 /*
@@ -1319,8 +1320,8 @@ static void btrfs_merge_extent_hook(struct inode *inode,
  * bytes in this file, and to maintain the list of inodes that
  * have pending delalloc work to be done.
  */
-static void btrfs_set_bit_hook(struct inode *inode,
-			       struct extent_state *state, int *bits)
+static int btrfs_set_bit_hook(struct inode *inode,
+			      struct extent_state *state, int *bits)
 {
 
 	/*
@@ -1350,13 +1351,14 @@ static void btrfs_set_bit_hook(struct inode *inode,
 		}
 		spin_unlock(&root->fs_info->delalloc_lock);
 	}
+	return 0;
 }
 
 /*
  * extent_io.c clear_bit_hook, see set_bit_hook for why
  */
-static void btrfs_clear_bit_hook(struct inode *inode,
-				 struct extent_state *state, int *bits)
+static int btrfs_clear_bit_hook(struct inode *inode,
+				struct extent_state *state, int *bits)
 {
 	/*
 	 * set_bit and clear bit hooks normally require _irqsave/restore
@@ -1393,6 +1395,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
 		}
 		spin_unlock(&root->fs_info->delalloc_lock);
 	}
+	return 0;
 }
 
 /*
@@ -1642,8 +1645,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
 	int ret;
 
 	path = btrfs_alloc_path();
-	if (!path)
-		return -ENOMEM;
+	BUG_ON(!path);
 
 	path->leave_spinning = 1;
 
@@ -2213,8 +2215,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
 
 	if (!root->orphan_block_rsv) {
 		block_rsv = btrfs_alloc_block_rsv(root);
-		if (!block_rsv)
-			return -ENOMEM;
+		BUG_ON(!block_rsv);
 	}
 
 	spin_lock(&root->orphan_lock);
@@ -2516,9 +2517,7 @@ static void btrfs_read_locked_inode(struct inode *inode)
 		filled = true;
 
 	path = btrfs_alloc_path();
-	if (!path)
-		goto make_bad;
-
+	BUG_ON(!path);
 	path->leave_spinning = 1;
 	memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
 
@@ -2999,16 +2998,13 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
 
 	ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
 				 dentry->d_name.name, dentry->d_name.len);
-	if (ret)
-		goto out;
+	BUG_ON(ret);
 
 	if (inode->i_nlink == 0) {
 		ret = btrfs_orphan_add(trans, inode);
-		if (ret)
-			goto out;
+		BUG_ON(ret);
 	}
 
-out:
 	nr = trans->blocks_used;
 	__unlink_end_trans(trans, root);
 	btrfs_btree_balance_dirty(root, nr);
@@ -3151,11 +3147,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 
 	BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
 
-	path = btrfs_alloc_path();
-	if (!path)
-		return -ENOMEM;
-	path->reada = -1;
-
 	if (root->ref_cows || root == root->fs_info->tree_root)
 		btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
 
@@ -3168,6 +3159,10 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 	if (min_type == 0 && root == BTRFS_I(inode)->root)
 		btrfs_kill_delayed_inode_items(inode);
 
+	path = btrfs_alloc_path();
+	BUG_ON(!path);
+	path->reada = -1;
+
 	key.objectid = ino;
 	key.offset = (u64)-1;
 	key.type = (u8)-1;
@@ -3695,8 +3690,7 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
 	int ret = 0;
 
 	path = btrfs_alloc_path();
-	if (!path)
-		return -ENOMEM;
+	BUG_ON(!path);
 
 	di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name,
 				    namelen, 0);
@@ -3952,7 +3946,6 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
 			 struct btrfs_root *root, int *new)
 {
 	struct inode *inode;
-	int bad_inode = 0;
 
 	inode = btrfs_iget_locked(s, location->objectid, root);
 	if (!inode)
@@ -3962,19 +3955,10 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
 		BTRFS_I(inode)->root = root;
 		memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
 		btrfs_read_locked_inode(inode);
-		if (!is_bad_inode(inode)) {
-			inode_tree_add(inode);
-			unlock_new_inode(inode);
-			if (new)
-				*new = 1;
-		} else {
-			bad_inode = 1;
-		}
-	}
-
-	if (bad_inode) {
-		iput(inode);
-		inode = ERR_PTR(-ESTALE);
+		inode_tree_add(inode);
+		unlock_new_inode(inode);
+		if (new)
+			*new = 1;
 	}
 
 	return inode;
@@ -4467,8 +4451,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 	int owner;
 
 	path = btrfs_alloc_path();
-	if (!path)
-		return ERR_PTR(-ENOMEM);
+	BUG_ON(!path);
 
 	inode = new_inode(root->fs_info->sb);
 	if (!inode) {
@@ -6728,6 +6711,19 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
 	return 0;
 }
 
+/* helper function for file defrag and space balancing.  This
+ * forces readahead on a given range of bytes in an inode
+ */
+unsigned long btrfs_force_ra(struct address_space *mapping,
+			      struct file_ra_state *ra, struct file *file,
+			      pgoff_t offset, pgoff_t last_index)
+{
+	pgoff_t req_size = last_index - offset + 1;
+
+	page_cache_sync_readahead(mapping, ra, file, offset, req_size);
+	return offset + req_size;
+}
+
 struct inode *btrfs_alloc_inode(struct super_block *sb)
 {
 	struct btrfs_inode *ei;
@@ -7210,11 +7206,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 		goto out_unlock;
 
 	path = btrfs_alloc_path();
-	if (!path) {
-		err = -ENOMEM;
-		drop_inode = 1;
-		goto out_unlock;
-	}
+	BUG_ON(!path);
 	key.objectid = btrfs_ino(inode);
 	key.offset = 0;
 	btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
diff --git a/trunk/fs/btrfs/ioctl.c b/trunk/fs/btrfs/ioctl.c
index 7cf013349941..0b980afc5edd 100644
--- a/trunk/fs/btrfs/ioctl.c
+++ b/trunk/fs/btrfs/ioctl.c
@@ -1749,10 +1749,11 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
 		key.objectid = key.offset;
 		key.offset = (u64)-1;
 		dirid = key.objectid;
+
 	}
 	if (ptr < name)
 		goto out;
-	memmove(name, ptr, total_len);
+	memcpy(name, ptr, total_len);
 	name[total_len]='\0';
 	ret = 0;
 out:
diff --git a/trunk/fs/btrfs/ref-cache.c b/trunk/fs/btrfs/ref-cache.c
new file mode 100644
index 000000000000..82d569cb6267
--- /dev/null
+++ b/trunk/fs/btrfs/ref-cache.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2008 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/sort.h>
+#include "ctree.h"
+#include "ref-cache.h"
+#include "transaction.h"
+
+static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
+				   struct rb_node *node)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct btrfs_leaf_ref *entry;
+
+	while (*p) {
+		parent = *p;
+		entry = rb_entry(parent, struct btrfs_leaf_ref, rb_node);
+
+		if (bytenr < entry->bytenr)
+			p = &(*p)->rb_left;
+		else if (bytenr > entry->bytenr)
+			p = &(*p)->rb_right;
+		else
+			return parent;
+	}
+
+	entry = rb_entry(node, struct btrfs_leaf_ref, rb_node);
+	rb_link_node(node, parent, p);
+	rb_insert_color(node, root);
+	return NULL;
+}
+
+static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
+{
+	struct rb_node *n = root->rb_node;
+	struct btrfs_leaf_ref *entry;
+
+	while (n) {
+		entry = rb_entry(n, struct btrfs_leaf_ref, rb_node);
+		WARN_ON(!entry->in_tree);
+
+		if (bytenr < entry->bytenr)
+			n = n->rb_left;
+		else if (bytenr > entry->bytenr)
+			n = n->rb_right;
+		else
+			return n;
+	}
+	return NULL;
+}
diff --git a/trunk/fs/btrfs/ref-cache.h b/trunk/fs/btrfs/ref-cache.h
new file mode 100644
index 000000000000..24f7001f6387
--- /dev/null
+++ b/trunk/fs/btrfs/ref-cache.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2008 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+#ifndef __REFCACHE__
+#define __REFCACHE__
+
+struct btrfs_extent_info {
+	/* bytenr and num_bytes find the extent in the extent allocation tree */
+	u64 bytenr;
+	u64 num_bytes;
+
+	/* objectid and offset find the back reference for the file */
+	u64 objectid;
+	u64 offset;
+};
+
+struct btrfs_leaf_ref {
+	struct rb_node rb_node;
+	struct btrfs_leaf_ref_tree *tree;
+	int in_tree;
+	atomic_t usage;
+
+	u64 root_gen;
+	u64 bytenr;
+	u64 owner;
+	u64 generation;
+	int nritems;
+
+	struct list_head list;
+	struct btrfs_extent_info extents[];
+};
+
+static inline size_t btrfs_leaf_ref_size(int nr_extents)
+{
+	return sizeof(struct btrfs_leaf_ref) +
+	       sizeof(struct btrfs_extent_info) * nr_extents;
+}
+#endif
diff --git a/trunk/fs/btrfs/root-tree.c b/trunk/fs/btrfs/root-tree.c
index f4099904565a..ebe45443de06 100644
--- a/trunk/fs/btrfs/root-tree.c
+++ b/trunk/fs/btrfs/root-tree.c
@@ -71,12 +71,13 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
 	return ret;
 }
 
-void btrfs_set_root_node(struct btrfs_root_item *item,
-			 struct extent_buffer *node)
+int btrfs_set_root_node(struct btrfs_root_item *item,
+			struct extent_buffer *node)
 {
 	btrfs_set_root_bytenr(item, node->start);
 	btrfs_set_root_level(item, btrfs_header_level(node));
 	btrfs_set_root_generation(item, btrfs_header_generation(node));
+	return 0;
 }
 
 /*
diff --git a/trunk/fs/btrfs/transaction.c b/trunk/fs/btrfs/transaction.c
index 7dc36fab4afc..eb55863bb4ae 100644
--- a/trunk/fs/btrfs/transaction.c
+++ b/trunk/fs/btrfs/transaction.c
@@ -216,11 +216,17 @@ static void wait_current_trans(struct btrfs_root *root)
 	spin_lock(&root->fs_info->trans_lock);
 	cur_trans = root->fs_info->running_transaction;
 	if (cur_trans && cur_trans->blocked) {
+		DEFINE_WAIT(wait);
 		atomic_inc(&cur_trans->use_count);
 		spin_unlock(&root->fs_info->trans_lock);
-
-		wait_event(root->fs_info->transaction_wait,
-			   !cur_trans->blocked);
+		while (1) {
+			prepare_to_wait(&root->fs_info->transaction_wait, &wait,
+					TASK_UNINTERRUPTIBLE);
+			if (!cur_trans->blocked)
+				break;
+			schedule();
+		}
+		finish_wait(&root->fs_info->transaction_wait, &wait);
 		put_transaction(cur_trans);
 	} else {
 		spin_unlock(&root->fs_info->trans_lock);
@@ -351,10 +357,19 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root
 }
 
 /* wait for a transaction commit to be fully complete */
-static noinline void wait_for_commit(struct btrfs_root *root,
+static noinline int wait_for_commit(struct btrfs_root *root,
 				    struct btrfs_transaction *commit)
 {
-	wait_event(commit->commit_wait, commit->commit_done);
+	DEFINE_WAIT(wait);
+	while (!commit->commit_done) {
+		prepare_to_wait(&commit->commit_wait, &wait,
+				TASK_UNINTERRUPTIBLE);
+		if (commit->commit_done)
+			break;
+		schedule();
+	}
+	finish_wait(&commit->commit_wait, &wait);
+	return 0;
 }
 
 int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
@@ -1070,7 +1085,22 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info)
 static void wait_current_trans_commit_start(struct btrfs_root *root,
 					    struct btrfs_transaction *trans)
 {
-	wait_event(root->fs_info->transaction_blocked_wait, trans->in_commit);
+	DEFINE_WAIT(wait);
+
+	if (trans->in_commit)
+		return;
+
+	while (1) {
+		prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait,
+				TASK_UNINTERRUPTIBLE);
+		if (trans->in_commit) {
+			finish_wait(&root->fs_info->transaction_blocked_wait,
+				    &wait);
+			break;
+		}
+		schedule();
+		finish_wait(&root->fs_info->transaction_blocked_wait, &wait);
+	}
 }
 
 /*
@@ -1080,8 +1110,24 @@ static void wait_current_trans_commit_start(struct btrfs_root *root,
 static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
 					 struct btrfs_transaction *trans)
 {
-	wait_event(root->fs_info->transaction_wait,
-		   trans->commit_done || (trans->in_commit && !trans->blocked));
+	DEFINE_WAIT(wait);
+
+	if (trans->commit_done || (trans->in_commit && !trans->blocked))
+		return;
+
+	while (1) {
+		prepare_to_wait(&root->fs_info->transaction_wait, &wait,
+				TASK_UNINTERRUPTIBLE);
+		if (trans->commit_done ||
+		    (trans->in_commit && !trans->blocked)) {
+			finish_wait(&root->fs_info->transaction_wait,
+				    &wait);
+			break;
+		}
+		schedule();
+		finish_wait(&root->fs_info->transaction_wait,
+			    &wait);
+	}
 }
 
 /*
@@ -1188,7 +1234,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 		atomic_inc(&cur_trans->use_count);
 		btrfs_end_transaction(trans, root);
 
-		wait_for_commit(root, cur_trans);
+		ret = wait_for_commit(root, cur_trans);
+		BUG_ON(ret);
 
 		put_transaction(cur_trans);
 
diff --git a/trunk/fs/btrfs/tree-log.c b/trunk/fs/btrfs/tree-log.c
index babee65f8eda..ac278dd83175 100644
--- a/trunk/fs/btrfs/tree-log.c
+++ b/trunk/fs/btrfs/tree-log.c
@@ -1617,8 +1617,7 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
 		return 0;
 
 	path = btrfs_alloc_path();
-	if (!path)
-		return -ENOMEM;
+	BUG_ON(!path);
 
 	nritems = btrfs_header_nritems(eb);
 	for (i = 0; i < nritems; i++) {
@@ -1724,9 +1723,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 			return -ENOMEM;
 
 		if (*level == 1) {
-			ret = wc->process_func(root, next, wc, ptr_gen);
-			if (ret)
-				return ret;
+			wc->process_func(root, next, wc, ptr_gen);
 
 			path->slots[*level]++;
 			if (wc->free) {
@@ -1791,11 +1788,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
 				parent = path->nodes[*level + 1];
 
 			root_owner = btrfs_header_owner(parent);
-			ret = wc->process_func(root, path->nodes[*level], wc,
+			wc->process_func(root, path->nodes[*level], wc,
 				 btrfs_header_generation(path->nodes[*level]));
-			if (ret)
-				return ret;
-
 			if (wc->free) {
 				struct extent_buffer *next;
 
diff --git a/trunk/fs/btrfs/volumes.c b/trunk/fs/btrfs/volumes.c
index 53875ae73ad4..b89e372c7544 100644
--- a/trunk/fs/btrfs/volumes.c
+++ b/trunk/fs/btrfs/volumes.c
@@ -1037,8 +1037,7 @@ static noinline int find_next_chunk(struct btrfs_root *root,
 	struct btrfs_key found_key;
 
 	path = btrfs_alloc_path();
-	if (!path)
-		return -ENOMEM;
+	BUG_ON(!path);
 
 	key.objectid = objectid;
 	key.offset = (u64)-1;
@@ -2062,10 +2061,8 @@ int btrfs_balance(struct btrfs_root *dev_root)
 
 	/* step two, relocate all the chunks */
 	path = btrfs_alloc_path();
-	if (!path) {
-		ret = -ENOMEM;
-		goto error;
-	}
+	BUG_ON(!path);
+
 	key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
 	key.offset = (u64)-1;
 	key.type = BTRFS_CHUNK_ITEM_KEY;
@@ -2664,8 +2661,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
 
 	ret = find_next_chunk(fs_info->chunk_root,
 			      BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset);
-	if (ret)
-		return ret;
+	BUG_ON(ret);
 
 	alloc_profile = BTRFS_BLOCK_GROUP_METADATA |
 			(fs_info->metadata_alloc_profile &
diff --git a/trunk/fs/dcache.c b/trunk/fs/dcache.c
index c83cae19161e..2347cdb15abb 100644
--- a/trunk/fs/dcache.c
+++ b/trunk/fs/dcache.c
@@ -795,7 +795,6 @@ static void __shrink_dcache_sb(struct super_block *sb, int count, int flags)
 
 /**
  * prune_dcache_sb - shrink the dcache
- * @sb: superblock
  * @nr_to_scan: number of entries to try to free
  *
  * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is
diff --git a/trunk/fs/ext4/super.c b/trunk/fs/ext4/super.c
index 4687fea0c00f..e2d88baf91d3 100644
--- a/trunk/fs/ext4/super.c
+++ b/trunk/fs/ext4/super.c
@@ -124,7 +124,7 @@ void *ext4_kvzalloc(size_t size, gfp_t flags)
 {
 	void *ret;
 
-	ret = kzalloc(size, flags);
+	ret = kmalloc(size, flags);
 	if (!ret)
 		ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
 	return ret;
diff --git a/trunk/fs/stack.c b/trunk/fs/stack.c
index b4f2ab48a61f..4a6f7f440658 100644
--- a/trunk/fs/stack.c
+++ b/trunk/fs/stack.c
@@ -29,7 +29,10 @@ void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
 	 *
 	 * We don't actually know what locking is used at the lower level;
 	 * but if it's a filesystem that supports quotas, it will be using
-	 * i_lock as in inode_add_bytes().
+	 * i_lock as in inode_add_bytes().  tmpfs uses other locking, and
+	 * its 32-bit is (just) able to exceed 2TB i_size with the aid of
+	 * holes; but its i_blocks cannot carry into the upper long without
+	 * almost 2TB swap - let's ignore that case.
 	 */
 	if (sizeof(i_blocks) > sizeof(long))
 		spin_lock(&src->i_lock);
diff --git a/trunk/include/acpi/acpi_drivers.h b/trunk/include/acpi/acpi_drivers.h
index e49c36d38d7e..3090471b2a5e 100644
--- a/trunk/include/acpi/acpi_drivers.h
+++ b/trunk/include/acpi/acpi_drivers.h
@@ -128,7 +128,7 @@ extern int is_dock_device(acpi_handle handle);
 extern int register_dock_notifier(struct notifier_block *nb);
 extern void unregister_dock_notifier(struct notifier_block *nb);
 extern int register_hotplug_dock_device(acpi_handle handle,
-					const struct acpi_dock_ops *ops,
+					struct acpi_dock_ops *ops,
 					void *context);
 extern void unregister_hotplug_dock_device(acpi_handle handle);
 #else
diff --git a/trunk/include/acpi/acpixf.h b/trunk/include/acpi/acpixf.h
index f554a9313b43..2ed0a8486c19 100644
--- a/trunk/include/acpi/acpixf.h
+++ b/trunk/include/acpi/acpixf.h
@@ -47,7 +47,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20110623
+#define ACPI_CA_VERSION                 0x20110413
 
 #include "actypes.h"
 #include "actbl.h"
@@ -69,7 +69,6 @@ extern u32 acpi_gbl_trace_flags;
 extern u32 acpi_gbl_enable_aml_debug_object;
 extern u8 acpi_gbl_copy_dsdt_locally;
 extern u8 acpi_gbl_truncate_io_addresses;
-extern u8 acpi_gbl_disable_auto_repair;
 
 extern u32 acpi_current_gpe_count;
 extern struct acpi_table_fadt acpi_gbl_FADT;
diff --git a/trunk/include/acpi/apei.h b/trunk/include/acpi/apei.h
index 51a527d24a8a..e67b523a50e1 100644
--- a/trunk/include/acpi/apei.h
+++ b/trunk/include/acpi/apei.h
@@ -18,11 +18,6 @@
 
 extern int hest_disable;
 extern int erst_disable;
-#ifdef CONFIG_ACPI_APEI_GHES
-extern int ghes_disable;
-#else
-#define ghes_disable 1
-#endif
 
 #ifdef CONFIG_ACPI_APEI
 void __init acpi_hest_init(void);
diff --git a/trunk/include/acpi/processor.h b/trunk/include/acpi/processor.h
index 67055f180330..ba4928cae473 100644
--- a/trunk/include/acpi/processor.h
+++ b/trunk/include/acpi/processor.h
@@ -337,7 +337,7 @@ extern struct cpuidle_driver acpi_idle_driver;
 
 /* in processor_thermal.c */
 int acpi_processor_get_limit_info(struct acpi_processor *pr);
-extern const struct thermal_cooling_device_ops processor_cooling_ops;
+extern struct thermal_cooling_device_ops processor_cooling_ops;
 #ifdef CONFIG_CPU_FREQ
 void acpi_thermal_cpufreq_init(void);
 void acpi_thermal_cpufreq_exit(void);
diff --git a/trunk/include/linux/acpi.h b/trunk/include/linux/acpi.h
index 6001b4da39dd..1deb2a73c2da 100644
--- a/trunk/include/linux/acpi.h
+++ b/trunk/include/linux/acpi.h
@@ -238,6 +238,7 @@ extern int acpi_paddr_to_node(u64 start_addr, u64 size);
 extern int pnpacpi_disabled;
 
 #define PXM_INVAL	(-1)
+#define NID_INVAL	(-1)
 
 int acpi_check_resource_conflict(const struct resource *res);
 
@@ -279,8 +280,6 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 #define OSC_SB_CPUHP_OST_SUPPORT	8
 #define OSC_SB_APEI_SUPPORT		16
 
-extern bool osc_sb_apei_support_acked;
-
 /* PCI defined _OSC bits */
 /* _OSC DW1 Definition (OS Support Fields) */
 #define OSC_EXT_PCI_CONFIG_SUPPORT		1
diff --git a/trunk/include/linux/bitmap.h b/trunk/include/linux/bitmap.h
index 7ad634501e48..3bac44cce142 100644
--- a/trunk/include/linux/bitmap.h
+++ b/trunk/include/linux/bitmap.h
@@ -146,7 +146,6 @@ extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order);
 extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits);
 extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits);
 
-#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
 #define BITMAP_LAST_WORD_MASK(nbits)					\
 (									\
 	((nbits) % BITS_PER_LONG) ?					\
diff --git a/trunk/include/linux/cpuidle.h b/trunk/include/linux/cpuidle.h
index b51629e15cfc..36719ead50e8 100644
--- a/trunk/include/linux/cpuidle.h
+++ b/trunk/include/linux/cpuidle.h
@@ -122,8 +122,6 @@ struct cpuidle_driver {
 };
 
 #ifdef CONFIG_CPU_IDLE
-extern void disable_cpuidle(void);
-extern int cpuidle_idle_call(void);
 
 extern int cpuidle_register_driver(struct cpuidle_driver *drv);
 struct cpuidle_driver *cpuidle_get_driver(void);
@@ -137,8 +135,6 @@ extern int cpuidle_enable_device(struct cpuidle_device *dev);
 extern void cpuidle_disable_device(struct cpuidle_device *dev);
 
 #else
-static inline void disable_cpuidle(void) { }
-static inline int cpuidle_idle_call(void) { return -ENODEV; }
 
 static inline int cpuidle_register_driver(struct cpuidle_driver *drv)
 {return -ENODEV; }
diff --git a/trunk/include/linux/device-mapper.h b/trunk/include/linux/device-mapper.h
index 3fa1f3d90ce0..4427e0454051 100644
--- a/trunk/include/linux/device-mapper.h
+++ b/trunk/include/linux/device-mapper.h
@@ -208,49 +208,6 @@ struct dm_target_callbacks {
 int dm_register_target(struct target_type *t);
 void dm_unregister_target(struct target_type *t);
 
-/*
- * Target argument parsing.
- */
-struct dm_arg_set {
-	unsigned argc;
-	char **argv;
-};
-
-/*
- * The minimum and maximum value of a numeric argument, together with
- * the error message to use if the number is found to be outside that range.
- */
-struct dm_arg {
-	unsigned min;
-	unsigned max;
-	char *error;
-};
-
-/*
- * Validate the next argument, either returning it as *value or, if invalid,
- * returning -EINVAL and setting *error.
- */
-int dm_read_arg(struct dm_arg *arg, struct dm_arg_set *arg_set,
-		unsigned *value, char **error);
-
-/*
- * Process the next argument as the start of a group containing between
- * arg->min and arg->max further arguments. Either return the size as
- * *num_args or, if invalid, return -EINVAL and set *error.
- */
-int dm_read_arg_group(struct dm_arg *arg, struct dm_arg_set *arg_set,
-		      unsigned *num_args, char **error);
-
-/*
- * Return the current argument and shift to the next.
- */
-const char *dm_shift_arg(struct dm_arg_set *as);
-
-/*
- * Move through num_args arguments.
- */
-void dm_consume_args(struct dm_arg_set *as, unsigned num_args);
-
 /*-----------------------------------------------------------------
  * Functions for creating and manipulating mapped devices.
  * Drop the reference with dm_put when you finish with the object.
diff --git a/trunk/include/linux/dm-ioctl.h b/trunk/include/linux/dm-ioctl.h
index 0cb8eff76bd6..3708455ee6c3 100644
--- a/trunk/include/linux/dm-ioctl.h
+++ b/trunk/include/linux/dm-ioctl.h
@@ -267,9 +267,9 @@ enum {
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	21
+#define DM_VERSION_MINOR	20
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2011-07-06)"
+#define DM_VERSION_EXTRA	"-ioctl (2011-02-02)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
diff --git a/trunk/include/linux/dm-kcopyd.h b/trunk/include/linux/dm-kcopyd.h
index 5e54458e920f..298d587e349b 100644
--- a/trunk/include/linux/dm-kcopyd.h
+++ b/trunk/include/linux/dm-kcopyd.h
@@ -42,20 +42,5 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
 		   unsigned num_dests, struct dm_io_region *dests,
 		   unsigned flags, dm_kcopyd_notify_fn fn, void *context);
 
-/*
- * Prepare a callback and submit it via the kcopyd thread.
- *
- * dm_kcopyd_prepare_callback allocates a callback structure and returns it.
- * It must not be called from interrupt context.
- * The returned value should be passed into dm_kcopyd_do_callback.
- *
- * dm_kcopyd_do_callback submits the callback.
- * It may be called from interrupt context.
- * The callback is issued from the kcopyd thread.
- */
-void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc,
-				 dm_kcopyd_notify_fn fn, void *context);
-void dm_kcopyd_do_callback(void *job, int read_err, unsigned long write_err);
-
 #endif	/* __KERNEL__ */
 #endif	/* _LINUX_DM_KCOPYD_H */
diff --git a/trunk/include/linux/fault-inject.h b/trunk/include/linux/fault-inject.h
index c6f996f2abb6..3ff060ac7810 100644
--- a/trunk/include/linux/fault-inject.h
+++ b/trunk/include/linux/fault-inject.h
@@ -25,6 +25,10 @@ struct fault_attr {
 	unsigned long reject_end;
 
 	unsigned long count;
+
+#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+	struct dentry *dir;
+#endif
 };
 
 #define FAULT_ATTR_INITIALIZER {				\
@@ -41,15 +45,19 @@ bool should_fail(struct fault_attr *attr, ssize_t size);
 
 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
 
-struct dentry *fault_create_debugfs_attr(const char *name,
-			struct dentry *parent, struct fault_attr *attr);
+int init_fault_attr_dentries(struct fault_attr *attr, const char *name);
+void cleanup_fault_attr_dentries(struct fault_attr *attr);
 
 #else /* CONFIG_FAULT_INJECTION_DEBUG_FS */
 
-static inline struct dentry *fault_create_debugfs_attr(const char *name,
-			struct dentry *parent, struct fault_attr *attr)
+static inline int init_fault_attr_dentries(struct fault_attr *attr,
+					  const char *name)
+{
+	return -ENODEV;
+}
+
+static inline void cleanup_fault_attr_dentries(struct fault_attr *attr)
 {
-	return ERR_PTR(-ENODEV);
 }
 
 #endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
diff --git a/trunk/include/linux/genalloc.h b/trunk/include/linux/genalloc.h
index 5e98eeb2af3b..5bbebda78b02 100644
--- a/trunk/include/linux/genalloc.h
+++ b/trunk/include/linux/genalloc.h
@@ -1,26 +1,8 @@
 /*
- * Basic general purpose allocator for managing special purpose
- * memory, for example, memory that is not managed by the regular
- * kmalloc/kfree interface.  Uses for this includes on-device special
- * memory, uncached memory etc.
- *
- * It is safe to use the allocator in NMI handlers and other special
- * unblockable contexts that could otherwise deadlock on locks.  This
- * is implemented by using atomic operations and retries on any
- * conflicts.  The disadvantage is that there may be livelocks in
- * extreme cases.  For better scalability, one allocator can be used
- * for each CPU.
- *
- * The lockless operation only works if there is enough memory
- * available.  If new memory is added to the pool a lock has to be
- * still taken.  So any user relying on locklessness has to ensure
- * that sufficient memory is preallocated.
- *
- * The basic atomic operation of this allocator is cmpxchg on long.
- * On architectures that don't have NMI-safe cmpxchg implementation,
- * the allocator can NOT be used in NMI handler.  So code uses the
- * allocator in NMI handler should depend on
- * CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
+ * Basic general purpose allocator for managing special purpose memory
+ * not managed by the regular kmalloc/kfree interface.
+ * Uses for this includes on-device special memory, uncached memory
+ * etc.
  *
  * This source code is licensed under the GNU General Public License,
  * Version 2.  See the file COPYING for more details.
@@ -33,7 +15,7 @@
  *  General purpose special memory pool descriptor.
  */
 struct gen_pool {
-	spinlock_t lock;
+	rwlock_t lock;
 	struct list_head chunks;	/* list of chunks in this pool */
 	int min_alloc_order;		/* minimum allocation order */
 };
@@ -42,8 +24,8 @@ struct gen_pool {
  *  General purpose special memory pool chunk descriptor.
  */
 struct gen_pool_chunk {
+	spinlock_t lock;
 	struct list_head next_chunk;	/* next chunk in pool */
-	atomic_t avail;
 	phys_addr_t phys_addr;		/* physical starting address of memory chunk */
 	unsigned long start_addr;	/* starting address of memory chunk */
 	unsigned long end_addr;		/* ending address of memory chunk */
@@ -74,8 +56,4 @@ static inline int gen_pool_add(struct gen_pool *pool, unsigned long addr,
 extern void gen_pool_destroy(struct gen_pool *);
 extern unsigned long gen_pool_alloc(struct gen_pool *, size_t);
 extern void gen_pool_free(struct gen_pool *, unsigned long, size_t);
-extern void gen_pool_for_each_chunk(struct gen_pool *,
-	void (*)(struct gen_pool *, struct gen_pool_chunk *, void *), void *);
-extern size_t gen_pool_avail(struct gen_pool *);
-extern size_t gen_pool_size(struct gen_pool *);
 #endif /* __GENALLOC_H__ */
diff --git a/trunk/include/linux/gfp.h b/trunk/include/linux/gfp.h
index 3a76faf6a3ee..cb4089254f01 100644
--- a/trunk/include/linux/gfp.h
+++ b/trunk/include/linux/gfp.h
@@ -92,7 +92,7 @@ struct vm_area_struct;
  */
 #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
 
-#define __GFP_BITS_SHIFT 24	/* Room for N __GFP_FOO bits */
+#define __GFP_BITS_SHIFT 23	/* Room for 23 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 /* This equals 0, but use constants in case they ever change */
diff --git a/trunk/include/linux/idr.h b/trunk/include/linux/idr.h
index 255491cf522e..13a801f3d028 100644
--- a/trunk/include/linux/idr.h
+++ b/trunk/include/linux/idr.h
@@ -146,10 +146,6 @@ void ida_remove(struct ida *ida, int id);
 void ida_destroy(struct ida *ida);
 void ida_init(struct ida *ida);
 
-int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
-		   gfp_t gfp_mask);
-void ida_simple_remove(struct ida *ida, unsigned int id);
-
 void __init idr_init_cache(void);
 
 #endif /* __IDR_H__ */
diff --git a/trunk/include/linux/llist.h b/trunk/include/linux/llist.h
deleted file mode 100644
index aa0c8b5b3cd0..000000000000
--- a/trunk/include/linux/llist.h
+++ /dev/null
@@ -1,126 +0,0 @@
-#ifndef LLIST_H
-#define LLIST_H
-/*
- * Lock-less NULL terminated single linked list
- *
- * If there are multiple producers and multiple consumers, llist_add
- * can be used in producers and llist_del_all can be used in
- * consumers.  They can work simultaneously without lock.  But
- * llist_del_first can not be used here.  Because llist_del_first
- * depends on list->first->next does not changed if list->first is not
- * changed during its operation, but llist_del_first, llist_add,
- * llist_add (or llist_del_all, llist_add, llist_add) sequence in
- * another consumer may violate that.
- *
- * If there are multiple producers and one consumer, llist_add can be
- * used in producers and llist_del_all or llist_del_first can be used
- * in the consumer.
- *
- * This can be summarized as follow:
- *
- *           |   add    | del_first |  del_all
- * add       |    -     |     -     |     -
- * del_first |          |     L     |     L
- * del_all   |          |           |     -
- *
- * Where "-" stands for no lock is needed, while "L" stands for lock
- * is needed.
- *
- * The list entries deleted via llist_del_all can be traversed with
- * traversing function such as llist_for_each etc.  But the list
- * entries can not be traversed safely before deleted from the list.
- * The order of deleted entries is from the newest to the oldest added
- * one.  If you want to traverse from the oldest to the newest, you
- * must reverse the order by yourself before traversing.
- *
- * The basic atomic operation of this list is cmpxchg on long.  On
- * architectures that don't have NMI-safe cmpxchg implementation, the
- * list can NOT be used in NMI handler.  So code uses the list in NMI
- * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
- */
-
-struct llist_head {
-	struct llist_node *first;
-};
-
-struct llist_node {
-	struct llist_node *next;
-};
-
-#define LLIST_HEAD_INIT(name)	{ NULL }
-#define LLIST_HEAD(name)	struct llist_head name = LLIST_HEAD_INIT(name)
-
-/**
- * init_llist_head - initialize lock-less list head
- * @head:	the head for your lock-less list
- */
-static inline void init_llist_head(struct llist_head *list)
-{
-	list->first = NULL;
-}
-
-/**
- * llist_entry - get the struct of this entry
- * @ptr:	the &struct llist_node pointer.
- * @type:	the type of the struct this is embedded in.
- * @member:	the name of the llist_node within the struct.
- */
-#define llist_entry(ptr, type, member)		\
-	container_of(ptr, type, member)
-
-/**
- * llist_for_each - iterate over some deleted entries of a lock-less list
- * @pos:	the &struct llist_node to use as a loop cursor
- * @node:	the first entry of deleted list entries
- *
- * In general, some entries of the lock-less list can be traversed
- * safely only after being deleted from list, so start with an entry
- * instead of list head.
- *
- * If being used on entries deleted from lock-less list directly, the
- * traverse order is from the newest to the oldest added entry.  If
- * you want to traverse from the oldest to the newest, you must
- * reverse the order by yourself before traversing.
- */
-#define llist_for_each(pos, node)			\
-	for ((pos) = (node); pos; (pos) = (pos)->next)
-
-/**
- * llist_for_each_entry - iterate over some deleted entries of lock-less list of given type
- * @pos:	the type * to use as a loop cursor.
- * @node:	the fist entry of deleted list entries.
- * @member:	the name of the llist_node with the struct.
- *
- * In general, some entries of the lock-less list can be traversed
- * safely only after being removed from list, so start with an entry
- * instead of list head.
- *
- * If being used on entries deleted from lock-less list directly, the
- * traverse order is from the newest to the oldest added entry.  If
- * you want to traverse from the oldest to the newest, you must
- * reverse the order by yourself before traversing.
- */
-#define llist_for_each_entry(pos, node, member)				\
-	for ((pos) = llist_entry((node), typeof(*(pos)), member);	\
-	     &(pos)->member != NULL;					\
-	     (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member))
-
-/**
- * llist_empty - tests whether a lock-less list is empty
- * @head:	the list to test
- *
- * Not guaranteed to be accurate or up to date.  Just a quick way to
- * test whether the list is empty without deleting something from the
- * list.
- */
-static inline int llist_empty(const struct llist_head *head)
-{
-	return ACCESS_ONCE(head->first) == NULL;
-}
-
-void llist_add(struct llist_node *new, struct llist_head *head);
-void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
-		     struct llist_head *head);
-struct llist_node *llist_del_first(struct llist_head *head);
-struct llist_node *llist_del_all(struct llist_head *head);
-#endif /* LLIST_H */
diff --git a/trunk/include/linux/memcontrol.h b/trunk/include/linux/memcontrol.h
index 3b535db00a94..b96600786913 100644
--- a/trunk/include/linux/memcontrol.h
+++ b/trunk/include/linux/memcontrol.h
@@ -86,6 +86,8 @@ extern void mem_cgroup_uncharge_end(void);
 
 extern void mem_cgroup_uncharge_page(struct page *page);
 extern void mem_cgroup_uncharge_cache_page(struct page *page);
+extern int mem_cgroup_shmem_charge_fallback(struct page *page,
+			struct mm_struct *mm, gfp_t gfp_mask);
 
 extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
 int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
@@ -223,6 +225,12 @@ static inline void mem_cgroup_uncharge_cache_page(struct page *page)
 {
 }
 
+static inline int mem_cgroup_shmem_charge_fallback(struct page *page,
+			struct mm_struct *mm, gfp_t gfp_mask)
+{
+	return 0;
+}
+
 static inline void mem_cgroup_add_lru_list(struct page *page, int lru)
 {
 }
diff --git a/trunk/include/linux/mfd/aat2870.h b/trunk/include/linux/mfd/aat2870.h
index f7316c29bdec..89212df05622 100644
--- a/trunk/include/linux/mfd/aat2870.h
+++ b/trunk/include/linux/mfd/aat2870.h
@@ -89,7 +89,7 @@ enum aat2870_id {
 
 /* Backlight current magnitude (mA) */
 enum aat2870_current {
-	AAT2870_CURRENT_0_45 = 1,
+	AAT2870_CURRENT_0_45,
 	AAT2870_CURRENT_0_90,
 	AAT2870_CURRENT_1_80,
 	AAT2870_CURRENT_2_70,
diff --git a/trunk/include/linux/mm.h b/trunk/include/linux/mm.h
index f2690cf49827..3172a1c0f08e 100644
--- a/trunk/include/linux/mm.h
+++ b/trunk/include/linux/mm.h
@@ -1600,7 +1600,6 @@ enum mf_flags {
 };
 extern void memory_failure(unsigned long pfn, int trapno);
 extern int __memory_failure(unsigned long pfn, int trapno, int flags);
-extern void memory_failure_queue(unsigned long pfn, int trapno, int flags);
 extern int unpoison_memory(unsigned long pfn);
 extern int sysctl_memory_failure_early_kill;
 extern int sysctl_memory_failure_recovery;
diff --git a/trunk/include/linux/of.h b/trunk/include/linux/of.h
index 0085bb01c041..bd716f8908de 100644
--- a/trunk/include/linux/of.h
+++ b/trunk/include/linux/of.h
@@ -196,13 +196,12 @@ extern struct property *of_find_property(const struct device_node *np,
 					 const char *name,
 					 int *lenp);
 extern int of_property_read_u32_array(const struct device_node *np,
-				      const char *propname,
+				      char *propname,
 				      u32 *out_values,
 				      size_t sz);
 
-extern int of_property_read_string(struct device_node *np,
-				   const char *propname,
-				   const char **out_string);
+extern int of_property_read_string(struct device_node *np, char *propname,
+					const char **out_string);
 extern int of_device_is_compatible(const struct device_node *device,
 				   const char *);
 extern int of_device_is_available(const struct device_node *device);
@@ -243,15 +242,13 @@ static inline bool of_have_populated_dt(void)
 }
 
 static inline int of_property_read_u32_array(const struct device_node *np,
-					     const char *propname,
-					     u32 *out_values, size_t sz)
+				char *propname, u32 *out_values, size_t sz)
 {
 	return -ENOSYS;
 }
 
 static inline int of_property_read_string(struct device_node *np,
-					  const char *propname,
-					  const char **out_string)
+				char *propname, const char **out_string)
 {
 	return -ENOSYS;
 }
@@ -259,7 +256,7 @@ static inline int of_property_read_string(struct device_node *np,
 #endif /* CONFIG_OF */
 
 static inline int of_property_read_u32(const struct device_node *np,
-				       const char *propname,
+				       char *propname,
 				       u32 *out_value)
 {
 	return of_property_read_u32_array(np, propname, out_value, 1);
diff --git a/trunk/include/linux/pci_ids.h b/trunk/include/linux/pci_ids.h
index ae96bbe54518..b00c4ec5056e 100644
--- a/trunk/include/linux/pci_ids.h
+++ b/trunk/include/linux/pci_ids.h
@@ -2709,16 +2709,6 @@
 #define PCI_DEVICE_ID_INTEL_ICH10_5	0x3a60
 #define PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MIN	0x3b00
 #define PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MAX	0x3b1f
-#define PCI_DEVICE_ID_INTEL_IOAT_SNB0	0x3c20
-#define PCI_DEVICE_ID_INTEL_IOAT_SNB1	0x3c21
-#define PCI_DEVICE_ID_INTEL_IOAT_SNB2	0x3c22
-#define PCI_DEVICE_ID_INTEL_IOAT_SNB3	0x3c23
-#define PCI_DEVICE_ID_INTEL_IOAT_SNB4	0x3c24
-#define PCI_DEVICE_ID_INTEL_IOAT_SNB5	0x3c25
-#define PCI_DEVICE_ID_INTEL_IOAT_SNB6	0x3c26
-#define PCI_DEVICE_ID_INTEL_IOAT_SNB7	0x3c27
-#define PCI_DEVICE_ID_INTEL_IOAT_SNB8	0x3c2e
-#define PCI_DEVICE_ID_INTEL_IOAT_SNB9	0x3c2f
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB	0x402f
 #define PCI_DEVICE_ID_INTEL_5100_16	0x65f0
 #define PCI_DEVICE_ID_INTEL_5100_21	0x65f5
diff --git a/trunk/include/linux/radix-tree.h b/trunk/include/linux/radix-tree.h
index 9d4539c52e53..23241c2fecce 100644
--- a/trunk/include/linux/radix-tree.h
+++ b/trunk/include/linux/radix-tree.h
@@ -39,15 +39,7 @@
  * when it is shrunk, before we rcu free the node. See shrink code for
  * details.
  */
-#define RADIX_TREE_INDIRECT_PTR		1
-/*
- * A common use of the radix tree is to store pointers to struct pages;
- * but shmem/tmpfs needs also to store swap entries in the same tree:
- * those are marked as exceptional entries to distinguish them.
- * EXCEPTIONAL_ENTRY tests the bit, EXCEPTIONAL_SHIFT shifts content past it.
- */
-#define RADIX_TREE_EXCEPTIONAL_ENTRY	2
-#define RADIX_TREE_EXCEPTIONAL_SHIFT	2
+#define RADIX_TREE_INDIRECT_PTR	1
 
 #define radix_tree_indirect_to_ptr(ptr) \
 	radix_tree_indirect_to_ptr((void __force *)(ptr))
@@ -181,28 +173,6 @@ static inline int radix_tree_deref_retry(void *arg)
 	return unlikely((unsigned long)arg & RADIX_TREE_INDIRECT_PTR);
 }
 
-/**
- * radix_tree_exceptional_entry	- radix_tree_deref_slot gave exceptional entry?
- * @arg:	value returned by radix_tree_deref_slot
- * Returns:	0 if well-aligned pointer, non-0 if exceptional entry.
- */
-static inline int radix_tree_exceptional_entry(void *arg)
-{
-	/* Not unlikely because radix_tree_exception often tested first */
-	return (unsigned long)arg & RADIX_TREE_EXCEPTIONAL_ENTRY;
-}
-
-/**
- * radix_tree_exception	- radix_tree_deref_slot returned either exception?
- * @arg:	value returned by radix_tree_deref_slot
- * Returns:	0 if well-aligned pointer, non-0 if either kind of exception.
- */
-static inline int radix_tree_exception(void *arg)
-{
-	return unlikely((unsigned long)arg &
-		(RADIX_TREE_INDIRECT_PTR | RADIX_TREE_EXCEPTIONAL_ENTRY));
-}
-
 /**
  * radix_tree_replace_slot	- replace item in a slot
  * @pslot:	pointer to slot, returned by radix_tree_lookup_slot
@@ -224,8 +194,8 @@ void *radix_tree_delete(struct radix_tree_root *, unsigned long);
 unsigned int
 radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
 			unsigned long first_index, unsigned int max_items);
-unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root,
-			void ***results, unsigned long *indices,
+unsigned int
+radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
 			unsigned long first_index, unsigned int max_items);
 unsigned long radix_tree_next_hole(struct radix_tree_root *root,
 				unsigned long index, unsigned long max_scan);
@@ -252,7 +222,6 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
 		unsigned long nr_to_tag,
 		unsigned int fromtag, unsigned int totag);
 int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
-unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item);
 
 static inline void radix_tree_preload_end(void)
 {
diff --git a/trunk/include/linux/shmem_fs.h b/trunk/include/linux/shmem_fs.h
index 9291ac3cc627..aa08fa8fd79b 100644
--- a/trunk/include/linux/shmem_fs.h
+++ b/trunk/include/linux/shmem_fs.h
@@ -8,15 +8,22 @@
 
 /* inode in-kernel data */
 
+#define SHMEM_NR_DIRECT 16
+
+#define SHMEM_SYMLINK_INLINE_LEN (SHMEM_NR_DIRECT * sizeof(swp_entry_t))
+
 struct shmem_inode_info {
 	spinlock_t		lock;
 	unsigned long		flags;
 	unsigned long		alloced;	/* data pages alloced to file */
+	unsigned long		swapped;	/* subtotal assigned to swap */
+	unsigned long		next_index;	/* highest alloced index + 1 */
+	struct shared_policy	policy;		/* NUMA memory alloc policy */
+	struct page		*i_indirect;	/* top indirect blocks page */
 	union {
-		unsigned long	swapped;	/* subtotal assigned to swap */
-		char		*symlink;	/* unswappable short symlink */
+		swp_entry_t	i_direct[SHMEM_NR_DIRECT]; /* first blocks */
+		char		inline_symlink[SHMEM_SYMLINK_INLINE_LEN];
 	};
-	struct shared_policy	policy;		/* NUMA memory alloc policy */
 	struct list_head	swaplist;	/* chain of maybes on swap */
 	struct list_head	xattr_list;	/* list of shmem_xattr */
 	struct inode		vfs_inode;
@@ -42,7 +49,7 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
 /*
  * Functions in mm/shmem.c called directly from elsewhere:
  */
-extern int shmem_init(void);
+extern int init_tmpfs(void);
 extern int shmem_fill_super(struct super_block *sb, void *data, int silent);
 extern struct file *shmem_file_setup(const char *name,
 					loff_t size, unsigned long flags);
@@ -52,6 +59,8 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
 					pgoff_t index, gfp_t gfp_mask);
 extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
 extern int shmem_unuse(swp_entry_t entry, struct page *page);
+extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
+					struct page **pagep, swp_entry_t *ent);
 
 static inline struct page *shmem_read_mapping_page(
 				struct address_space *mapping, pgoff_t index)
diff --git a/trunk/include/linux/swapops.h b/trunk/include/linux/swapops.h
index 2189d3ffc85d..cd42e30b7c6e 100644
--- a/trunk/include/linux/swapops.h
+++ b/trunk/include/linux/swapops.h
@@ -1,8 +1,3 @@
-#ifndef _LINUX_SWAPOPS_H
-#define _LINUX_SWAPOPS_H
-
-#include <linux/radix-tree.h>
-
 /*
  * swapcache pages are stored in the swapper_space radix tree.  We want to
  * get good packing density in that tree, so the index should be dense in
@@ -81,22 +76,6 @@ static inline pte_t swp_entry_to_pte(swp_entry_t entry)
 	return __swp_entry_to_pte(arch_entry);
 }
 
-static inline swp_entry_t radix_to_swp_entry(void *arg)
-{
-	swp_entry_t entry;
-
-	entry.val = (unsigned long)arg >> RADIX_TREE_EXCEPTIONAL_SHIFT;
-	return entry;
-}
-
-static inline void *swp_to_radix_entry(swp_entry_t entry)
-{
-	unsigned long value;
-
-	value = entry.val << RADIX_TREE_EXCEPTIONAL_SHIFT;
-	return (void *)(value | RADIX_TREE_EXCEPTIONAL_ENTRY);
-}
-
 #ifdef CONFIG_MIGRATION
 static inline swp_entry_t make_migration_entry(struct page *page, int write)
 {
@@ -190,5 +169,3 @@ static inline int non_swap_entry(swp_entry_t entry)
 	return 0;
 }
 #endif
-
-#endif /* _LINUX_SWAPOPS_H */
diff --git a/trunk/include/linux/thermal.h b/trunk/include/linux/thermal.h
index 47b4a27e6e97..d3ec89fb4122 100644
--- a/trunk/include/linux/thermal.h
+++ b/trunk/include/linux/thermal.h
@@ -85,6 +85,22 @@ struct thermal_cooling_device {
 				((long)t-2732+5)/10 : ((long)t-2732-5)/10)
 #define CELSIUS_TO_KELVIN(t)	((t)*10+2732)
 
+#if defined(CONFIG_THERMAL_HWMON)
+/* thermal zone devices with the same type share one hwmon device */
+struct thermal_hwmon_device {
+	char type[THERMAL_NAME_LENGTH];
+	struct device *device;
+	int count;
+	struct list_head tz_list;
+	struct list_head node;
+};
+
+struct thermal_hwmon_attr {
+	struct device_attribute attr;
+	char name[16];
+};
+#endif
+
 struct thermal_zone_device {
 	int id;
 	char type[THERMAL_NAME_LENGTH];
@@ -104,6 +120,12 @@ struct thermal_zone_device {
 	struct mutex lock;	/* protect cooling devices list */
 	struct list_head node;
 	struct delayed_work poll_queue;
+#if defined(CONFIG_THERMAL_HWMON)
+	struct list_head hwmon_node;
+	struct thermal_hwmon_device *hwmon;
+	struct thermal_hwmon_attr temp_input;	/* hwmon sys attr */
+	struct thermal_hwmon_attr temp_crit;	/* hwmon sys attr */
+#endif
 };
 /* Adding event notification support elements */
 #define THERMAL_GENL_FAMILY_NAME                "thermal_event"
diff --git a/trunk/init/main.c b/trunk/init/main.c
index 9c51ee7adf3d..d7211faed2ad 100644
--- a/trunk/init/main.c
+++ b/trunk/init/main.c
@@ -369,12 +369,9 @@ static noinline void __init_refok rest_init(void)
 	init_idle_bootup_task(current);
 	preempt_enable_no_resched();
 	schedule();
-
-	/* At this point, we can enable user mode helper functionality */
-	usermodehelper_enable();
+	preempt_disable();
 
 	/* Call into cpu_idle with preempt disabled */
-	preempt_disable();
 	cpu_idle();
 }
 
@@ -718,7 +715,7 @@ static void __init do_basic_setup(void)
 {
 	cpuset_init_smp();
 	usermodehelper_init();
-	shmem_init();
+	init_tmpfs();
 	driver_init();
 	init_irq_proc();
 	do_ctors();
diff --git a/trunk/ipc/shm.c b/trunk/ipc/shm.c
index b5bae9d945b6..9fb044f3b345 100644
--- a/trunk/ipc/shm.c
+++ b/trunk/ipc/shm.c
@@ -294,7 +294,7 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data)
 void shm_destroy_orphaned(struct ipc_namespace *ns)
 {
 	down_write(&shm_ids(ns).rw_mutex);
-	if (shm_ids(ns).in_use)
+	if (&shm_ids(ns).in_use)
 		idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
 	up_write(&shm_ids(ns).rw_mutex);
 }
@@ -304,12 +304,9 @@ void exit_shm(struct task_struct *task)
 {
 	struct ipc_namespace *ns = task->nsproxy->ipc_ns;
 
-	if (shm_ids(ns).in_use == 0)
-		return;
-
 	/* Destroy all already created segments, but not mapped yet */
 	down_write(&shm_ids(ns).rw_mutex);
-	if (shm_ids(ns).in_use)
+	if (&shm_ids(ns).in_use)
 		idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
 	up_write(&shm_ids(ns).rw_mutex);
 }
diff --git a/trunk/kernel/futex.c b/trunk/kernel/futex.c
index 11cbe052b2e8..0a308970c24a 100644
--- a/trunk/kernel/futex.c
+++ b/trunk/kernel/futex.c
@@ -218,8 +218,6 @@ static void drop_futex_key_refs(union futex_key *key)
  * @uaddr:	virtual address of the futex
  * @fshared:	0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
  * @key:	address where result is stored.
- * @rw:		mapping needs to be read/write (values: VERIFY_READ,
- *              VERIFY_WRITE)
  *
  * Returns a negative error code or 0
  * The key words are stored in *key on success.
@@ -231,12 +229,12 @@ static void drop_futex_key_refs(union futex_key *key)
  * lock_page() might sleep, the caller should not hold a spinlock.
  */
 static int
-get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
+get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
 {
 	unsigned long address = (unsigned long)uaddr;
 	struct mm_struct *mm = current->mm;
 	struct page *page, *page_head;
-	int err, ro = 0;
+	int err;
 
 	/*
 	 * The futex address must be "naturally" aligned.
@@ -264,18 +262,8 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
 
 again:
 	err = get_user_pages_fast(address, 1, 1, &page);
-	/*
-	 * If write access is not required (eg. FUTEX_WAIT), try
-	 * and get read-only access.
-	 */
-	if (err == -EFAULT && rw == VERIFY_READ) {
-		err = get_user_pages_fast(address, 1, 0, &page);
-		ro = 1;
-	}
 	if (err < 0)
 		return err;
-	else
-		err = 0;
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	page_head = page;
@@ -317,13 +305,6 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
 	if (!page_head->mapping) {
 		unlock_page(page_head);
 		put_page(page_head);
-		/*
-		* ZERO_PAGE pages don't have a mapping. Avoid a busy loop
-		* trying to find one. RW mapping would have COW'd (and thus
-		* have a mapping) so this page is RO and won't ever change.
-		*/
-		if ((page_head == ZERO_PAGE(address)))
-			return -EFAULT;
 		goto again;
 	}
 
@@ -335,15 +316,6 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
 	 * the object not the particular process.
 	 */
 	if (PageAnon(page_head)) {
-		/*
-		 * A RO anonymous page will never change and thus doesn't make
-		 * sense for futex operations.
-		 */
-		if (ro) {
-			err = -EFAULT;
-			goto out;
-		}
-
 		key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
 		key->private.mm = mm;
 		key->private.address = address;
@@ -355,10 +327,9 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
 
 	get_futex_key_refs(key);
 
-out:
 	unlock_page(page_head);
 	put_page(page_head);
-	return err;
+	return 0;
 }
 
 static inline void put_futex_key(union futex_key *key)
@@ -969,7 +940,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
 	if (!bitset)
 		return -EINVAL;
 
-	ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ);
+	ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key);
 	if (unlikely(ret != 0))
 		goto out;
 
@@ -1015,10 +986,10 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
 	int ret, op_ret;
 
 retry:
-	ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
+	ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1);
 	if (unlikely(ret != 0))
 		goto out;
-	ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
+	ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2);
 	if (unlikely(ret != 0))
 		goto out_put_key1;
 
@@ -1272,11 +1243,10 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
 		pi_state = NULL;
 	}
 
-	ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
+	ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1);
 	if (unlikely(ret != 0))
 		goto out;
-	ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
-			    requeue_pi ? VERIFY_WRITE : VERIFY_READ);
+	ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2);
 	if (unlikely(ret != 0))
 		goto out_put_key1;
 
@@ -1820,7 +1790,7 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
 	 * while the syscall executes.
 	 */
 retry:
-	ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ);
+	ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key);
 	if (unlikely(ret != 0))
 		return ret;
 
@@ -1971,7 +1941,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect,
 	}
 
 retry:
-	ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE);
+	ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key);
 	if (unlikely(ret != 0))
 		goto out;
 
@@ -2090,7 +2060,7 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
 	if ((uval & FUTEX_TID_MASK) != vpid)
 		return -EPERM;
 
-	ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE);
+	ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key);
 	if (unlikely(ret != 0))
 		goto out;
 
@@ -2279,7 +2249,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
 	debug_rt_mutex_init_waiter(&rt_waiter);
 	rt_waiter.task = NULL;
 
-	ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
+	ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2);
 	if (unlikely(ret != 0))
 		goto out;
 
diff --git a/trunk/kernel/kmod.c b/trunk/kernel/kmod.c
index ddc7644c1305..47613dfb7b28 100644
--- a/trunk/kernel/kmod.c
+++ b/trunk/kernel/kmod.c
@@ -274,7 +274,7 @@ static void __call_usermodehelper(struct work_struct *work)
  * (used for preventing user land processes from being created after the user
  * land has been frozen during a system-wide hibernation or suspend operation).
  */
-static int usermodehelper_disabled = 1;
+static int usermodehelper_disabled;
 
 /* Number of helpers running */
 static atomic_t running_helpers = ATOMIC_INIT(0);
diff --git a/trunk/kernel/lockdep.c b/trunk/kernel/lockdep.c
index 8c24294e477f..3956f5149e25 100644
--- a/trunk/kernel/lockdep.c
+++ b/trunk/kernel/lockdep.c
@@ -2468,7 +2468,7 @@ mark_held_locks(struct task_struct *curr, enum mark_type mark)
 
 		BUG_ON(usage_bit >= LOCK_USAGE_STATES);
 
-		if (hlock_class(hlock)->key == __lockdep_no_validate__.subkeys)
+		if (hlock_class(hlock)->key == &__lockdep_no_validate__)
 			continue;
 
 		if (!mark_lock(curr, hlock, usage_bit))
@@ -2485,9 +2485,23 @@ static void __trace_hardirqs_on_caller(unsigned long ip)
 {
 	struct task_struct *curr = current;
 
+	if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled)))
+		return;
+
+	if (unlikely(curr->hardirqs_enabled)) {
+		/*
+		 * Neither irq nor preemption are disabled here
+		 * so this is racy by nature but losing one hit
+		 * in a stat is not a big deal.
+		 */
+		__debug_atomic_inc(redundant_hardirqs_on);
+		return;
+	}
 	/* we'll do an OFF -> ON transition: */
 	curr->hardirqs_enabled = 1;
 
+	if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
+		return;
 	/*
 	 * We are going to turn hardirqs on, so set the
 	 * usage bit for all held locks:
@@ -2515,25 +2529,9 @@ void trace_hardirqs_on_caller(unsigned long ip)
 	if (unlikely(!debug_locks || current->lockdep_recursion))
 		return;
 
-	if (unlikely(current->hardirqs_enabled)) {
-		/*
-		 * Neither irq nor preemption are disabled here
-		 * so this is racy by nature but losing one hit
-		 * in a stat is not a big deal.
-		 */
-		__debug_atomic_inc(redundant_hardirqs_on);
-		return;
-	}
-
 	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
 		return;
 
-	if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled)))
-		return;
-
-	if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
-		return;
-
 	current->lockdep_recursion = 1;
 	__trace_hardirqs_on_caller(ip);
 	current->lockdep_recursion = 0;
@@ -2874,7 +2872,10 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
 void lockdep_init_map(struct lockdep_map *lock, const char *name,
 		      struct lock_class_key *key, int subclass)
 {
-	memset(lock, 0, sizeof(*lock));
+	int i;
+
+	for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++)
+		lock->class_cache[i] = NULL;
 
 #ifdef CONFIG_LOCK_STAT
 	lock->cpu = raw_smp_processor_id();
diff --git a/trunk/kernel/taskstats.c b/trunk/kernel/taskstats.c
index e19ce1454ee1..d1db2880d1cf 100644
--- a/trunk/kernel/taskstats.c
+++ b/trunk/kernel/taskstats.c
@@ -291,28 +291,30 @@ static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd)
 	if (!cpumask_subset(mask, cpu_possible_mask))
 		return -EINVAL;
 
+	s = NULL;
 	if (isadd == REGISTER) {
 		for_each_cpu(cpu, mask) {
-			s = kmalloc_node(sizeof(struct listener),
-					GFP_KERNEL, cpu_to_node(cpu));
+			if (!s)
+				s = kmalloc_node(sizeof(struct listener),
+						 GFP_KERNEL, cpu_to_node(cpu));
 			if (!s)
 				goto cleanup;
-
 			s->pid = pid;
+			INIT_LIST_HEAD(&s->list);
 			s->valid = 1;
 
 			listeners = &per_cpu(listener_array, cpu);
 			down_write(&listeners->sem);
-			list_for_each_entry(s2, &listeners->list, list) {
-				if (s2->pid == pid && s2->valid)
-					goto exists;
+			list_for_each_entry_safe(s2, tmp, &listeners->list, list) {
+				if (s2->pid == pid)
+					goto next_cpu;
 			}
 			list_add(&s->list, &listeners->list);
 			s = NULL;
-exists:
+next_cpu:
 			up_write(&listeners->sem);
-			kfree(s); /* nop if NULL */
 		}
+		kfree(s);
 		return 0;
 	}
 
diff --git a/trunk/lib/Kconfig b/trunk/lib/Kconfig
index 6c695ff9caba..32f3e5ae2be5 100644
--- a/trunk/lib/Kconfig
+++ b/trunk/lib/Kconfig
@@ -276,7 +276,4 @@ config CORDIC
 	  so its calculations are in fixed point. Modules can select this
 	  when they require this function. Module will be called cordic.
 
-config LLIST
-	bool
-
 endmenu
diff --git a/trunk/lib/Makefile b/trunk/lib/Makefile
index 6457af4a7caf..892f4e282ea1 100644
--- a/trunk/lib/Makefile
+++ b/trunk/lib/Makefile
@@ -115,8 +115,6 @@ obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
 
 obj-$(CONFIG_CORDIC) += cordic.o
 
-obj-$(CONFIG_LLIST) += llist.o
-
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
 
diff --git a/trunk/lib/bitmap.c b/trunk/lib/bitmap.c
index 2f4412e4d071..37ef4b048795 100644
--- a/trunk/lib/bitmap.c
+++ b/trunk/lib/bitmap.c
@@ -271,6 +271,8 @@ int __bitmap_weight(const unsigned long *bitmap, int bits)
 }
 EXPORT_SYMBOL(__bitmap_weight);
 
+#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
+
 void bitmap_set(unsigned long *map, int start, int nr)
 {
 	unsigned long *p = map + BIT_WORD(start);
diff --git a/trunk/lib/fault-inject.c b/trunk/lib/fault-inject.c
index f193b7796449..2577b121c7c1 100644
--- a/trunk/lib/fault-inject.c
+++ b/trunk/lib/fault-inject.c
@@ -197,15 +197,21 @@ static struct dentry *debugfs_create_atomic_t(const char *name, mode_t mode,
 	return debugfs_create_file(name, mode, parent, value, &fops_atomic_t);
 }
 
-struct dentry *fault_create_debugfs_attr(const char *name,
-			struct dentry *parent, struct fault_attr *attr)
+void cleanup_fault_attr_dentries(struct fault_attr *attr)
+{
+	debugfs_remove_recursive(attr->dir);
+}
+
+int init_fault_attr_dentries(struct fault_attr *attr, const char *name)
 {
 	mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
 	struct dentry *dir;
 
-	dir = debugfs_create_dir(name, parent);
+	dir = debugfs_create_dir(name, NULL);
 	if (!dir)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
+
+	attr->dir = dir;
 
 	if (!debugfs_create_ul("probability", mode, dir, &attr->probability))
 		goto fail;
@@ -237,11 +243,11 @@ struct dentry *fault_create_debugfs_attr(const char *name,
 
 #endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */
 
-	return dir;
+	return 0;
 fail:
-	debugfs_remove_recursive(dir);
+	debugfs_remove_recursive(attr->dir);
 
-	return ERR_PTR(-ENOMEM);
+	return -ENOMEM;
 }
 
 #endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
diff --git a/trunk/lib/genalloc.c b/trunk/lib/genalloc.c
index f352cc42f4f8..577ddf805975 100644
--- a/trunk/lib/genalloc.c
+++ b/trunk/lib/genalloc.c
@@ -1,26 +1,8 @@
 /*
- * Basic general purpose allocator for managing special purpose
- * memory, for example, memory that is not managed by the regular
- * kmalloc/kfree interface.  Uses for this includes on-device special
- * memory, uncached memory etc.
- *
- * It is safe to use the allocator in NMI handlers and other special
- * unblockable contexts that could otherwise deadlock on locks.  This
- * is implemented by using atomic operations and retries on any
- * conflicts.  The disadvantage is that there may be livelocks in
- * extreme cases.  For better scalability, one allocator can be used
- * for each CPU.
- *
- * The lockless operation only works if there is enough memory
- * available.  If new memory is added to the pool a lock has to be
- * still taken.  So any user relying on locklessness has to ensure
- * that sufficient memory is preallocated.
- *
- * The basic atomic operation of this allocator is cmpxchg on long.
- * On architectures that don't have NMI-safe cmpxchg implementation,
- * the allocator can NOT be used in NMI handler.  So code uses the
- * allocator in NMI handler should depend on
- * CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
+ * Basic general purpose allocator for managing special purpose memory
+ * not managed by the regular kmalloc/kfree interface.
+ * Uses for this includes on-device special memory, uncached memory
+ * etc.
  *
  * Copyright 2005 (C) Jes Sorensen <jes@trained-monkey.org>
  *
@@ -31,109 +13,8 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/bitmap.h>
-#include <linux/rculist.h>
-#include <linux/interrupt.h>
 #include <linux/genalloc.h>
 
-static int set_bits_ll(unsigned long *addr, unsigned long mask_to_set)
-{
-	unsigned long val, nval;
-
-	nval = *addr;
-	do {
-		val = nval;
-		if (val & mask_to_set)
-			return -EBUSY;
-		cpu_relax();
-	} while ((nval = cmpxchg(addr, val, val | mask_to_set)) != val);
-
-	return 0;
-}
-
-static int clear_bits_ll(unsigned long *addr, unsigned long mask_to_clear)
-{
-	unsigned long val, nval;
-
-	nval = *addr;
-	do {
-		val = nval;
-		if ((val & mask_to_clear) != mask_to_clear)
-			return -EBUSY;
-		cpu_relax();
-	} while ((nval = cmpxchg(addr, val, val & ~mask_to_clear)) != val);
-
-	return 0;
-}
-
-/*
- * bitmap_set_ll - set the specified number of bits at the specified position
- * @map: pointer to a bitmap
- * @start: a bit position in @map
- * @nr: number of bits to set
- *
- * Set @nr bits start from @start in @map lock-lessly. Several users
- * can set/clear the same bitmap simultaneously without lock. If two
- * users set the same bit, one user will return remain bits, otherwise
- * return 0.
- */
-static int bitmap_set_ll(unsigned long *map, int start, int nr)
-{
-	unsigned long *p = map + BIT_WORD(start);
-	const int size = start + nr;
-	int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
-	unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
-
-	while (nr - bits_to_set >= 0) {
-		if (set_bits_ll(p, mask_to_set))
-			return nr;
-		nr -= bits_to_set;
-		bits_to_set = BITS_PER_LONG;
-		mask_to_set = ~0UL;
-		p++;
-	}
-	if (nr) {
-		mask_to_set &= BITMAP_LAST_WORD_MASK(size);
-		if (set_bits_ll(p, mask_to_set))
-			return nr;
-	}
-
-	return 0;
-}
-
-/*
- * bitmap_clear_ll - clear the specified number of bits at the specified position
- * @map: pointer to a bitmap
- * @start: a bit position in @map
- * @nr: number of bits to set
- *
- * Clear @nr bits start from @start in @map lock-lessly. Several users
- * can set/clear the same bitmap simultaneously without lock. If two
- * users clear the same bit, one user will return remain bits,
- * otherwise return 0.
- */
-static int bitmap_clear_ll(unsigned long *map, int start, int nr)
-{
-	unsigned long *p = map + BIT_WORD(start);
-	const int size = start + nr;
-	int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
-	unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
-
-	while (nr - bits_to_clear >= 0) {
-		if (clear_bits_ll(p, mask_to_clear))
-			return nr;
-		nr -= bits_to_clear;
-		bits_to_clear = BITS_PER_LONG;
-		mask_to_clear = ~0UL;
-		p++;
-	}
-	if (nr) {
-		mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
-		if (clear_bits_ll(p, mask_to_clear))
-			return nr;
-	}
-
-	return 0;
-}
 
 /**
  * gen_pool_create - create a new special memory pool
@@ -149,7 +30,7 @@ struct gen_pool *gen_pool_create(int min_alloc_order, int nid)
 
 	pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid);
 	if (pool != NULL) {
-		spin_lock_init(&pool->lock);
+		rwlock_init(&pool->lock);
 		INIT_LIST_HEAD(&pool->chunks);
 		pool->min_alloc_order = min_alloc_order;
 	}
@@ -182,14 +63,14 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy
 	if (unlikely(chunk == NULL))
 		return -ENOMEM;
 
+	spin_lock_init(&chunk->lock);
 	chunk->phys_addr = phys;
 	chunk->start_addr = virt;
 	chunk->end_addr = virt + size;
-	atomic_set(&chunk->avail, size);
 
-	spin_lock(&pool->lock);
-	list_add_rcu(&chunk->next_chunk, &pool->chunks);
-	spin_unlock(&pool->lock);
+	write_lock(&pool->lock);
+	list_add(&chunk->next_chunk, &pool->chunks);
+	write_unlock(&pool->lock);
 
 	return 0;
 }
@@ -204,19 +85,19 @@ EXPORT_SYMBOL(gen_pool_add_virt);
  */
 phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long addr)
 {
+	struct list_head *_chunk;
 	struct gen_pool_chunk *chunk;
-	phys_addr_t paddr = -1;
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
-		if (addr >= chunk->start_addr && addr < chunk->end_addr) {
-			paddr = chunk->phys_addr + (addr - chunk->start_addr);
-			break;
-		}
+	read_lock(&pool->lock);
+	list_for_each(_chunk, &pool->chunks) {
+		chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
+
+		if (addr >= chunk->start_addr && addr < chunk->end_addr)
+			return chunk->phys_addr + addr - chunk->start_addr;
 	}
-	rcu_read_unlock();
+	read_unlock(&pool->lock);
 
-	return paddr;
+	return -1;
 }
 EXPORT_SYMBOL(gen_pool_virt_to_phys);
 
@@ -234,6 +115,7 @@ void gen_pool_destroy(struct gen_pool *pool)
 	int order = pool->min_alloc_order;
 	int bit, end_bit;
 
+
 	list_for_each_safe(_chunk, _next_chunk, &pool->chunks) {
 		chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
 		list_del(&chunk->next_chunk);
@@ -255,50 +137,44 @@ EXPORT_SYMBOL(gen_pool_destroy);
  * @size: number of bytes to allocate from the pool
  *
  * Allocate the requested number of bytes from the specified pool.
- * Uses a first-fit algorithm. Can not be used in NMI handler on
- * architectures without NMI-safe cmpxchg implementation.
+ * Uses a first-fit algorithm.
  */
 unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
 {
+	struct list_head *_chunk;
 	struct gen_pool_chunk *chunk;
-	unsigned long addr = 0;
+	unsigned long addr, flags;
 	int order = pool->min_alloc_order;
-	int nbits, start_bit = 0, end_bit, remain;
-
-#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
-	BUG_ON(in_nmi());
-#endif
+	int nbits, start_bit, end_bit;
 
 	if (size == 0)
 		return 0;
 
 	nbits = (size + (1UL << order) - 1) >> order;
-	rcu_read_lock();
-	list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
-		if (size > atomic_read(&chunk->avail))
-			continue;
+
+	read_lock(&pool->lock);
+	list_for_each(_chunk, &pool->chunks) {
+		chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
 
 		end_bit = (chunk->end_addr - chunk->start_addr) >> order;
-retry:
-		start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit,
-						       start_bit, nbits, 0);
-		if (start_bit >= end_bit)
+
+		spin_lock_irqsave(&chunk->lock, flags);
+		start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, 0,
+						nbits, 0);
+		if (start_bit >= end_bit) {
+			spin_unlock_irqrestore(&chunk->lock, flags);
 			continue;
-		remain = bitmap_set_ll(chunk->bits, start_bit, nbits);
-		if (remain) {
-			remain = bitmap_clear_ll(chunk->bits, start_bit,
-						 nbits - remain);
-			BUG_ON(remain);
-			goto retry;
 		}
 
 		addr = chunk->start_addr + ((unsigned long)start_bit << order);
-		size = nbits << order;
-		atomic_sub(size, &chunk->avail);
-		break;
+
+		bitmap_set(chunk->bits, start_bit, nbits);
+		spin_unlock_irqrestore(&chunk->lock, flags);
+		read_unlock(&pool->lock);
+		return addr;
 	}
-	rcu_read_unlock();
-	return addr;
+	read_unlock(&pool->lock);
+	return 0;
 }
 EXPORT_SYMBOL(gen_pool_alloc);
 
@@ -308,95 +184,33 @@ EXPORT_SYMBOL(gen_pool_alloc);
  * @addr: starting address of memory to free back to pool
  * @size: size in bytes of memory to free
  *
- * Free previously allocated special memory back to the specified
- * pool.  Can not be used in NMI handler on architectures without
- * NMI-safe cmpxchg implementation.
+ * Free previously allocated special memory back to the specified pool.
  */
 void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size)
 {
+	struct list_head *_chunk;
 	struct gen_pool_chunk *chunk;
+	unsigned long flags;
 	int order = pool->min_alloc_order;
-	int start_bit, nbits, remain;
-
-#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
-	BUG_ON(in_nmi());
-#endif
+	int bit, nbits;
 
 	nbits = (size + (1UL << order) - 1) >> order;
-	rcu_read_lock();
-	list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
+
+	read_lock(&pool->lock);
+	list_for_each(_chunk, &pool->chunks) {
+		chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
+
 		if (addr >= chunk->start_addr && addr < chunk->end_addr) {
 			BUG_ON(addr + size > chunk->end_addr);
-			start_bit = (addr - chunk->start_addr) >> order;
-			remain = bitmap_clear_ll(chunk->bits, start_bit, nbits);
-			BUG_ON(remain);
-			size = nbits << order;
-			atomic_add(size, &chunk->avail);
-			rcu_read_unlock();
-			return;
+			spin_lock_irqsave(&chunk->lock, flags);
+			bit = (addr - chunk->start_addr) >> order;
+			while (nbits--)
+				__clear_bit(bit++, chunk->bits);
+			spin_unlock_irqrestore(&chunk->lock, flags);
+			break;
 		}
 	}
-	rcu_read_unlock();
-	BUG();
+	BUG_ON(nbits > 0);
+	read_unlock(&pool->lock);
 }
 EXPORT_SYMBOL(gen_pool_free);
-
-/**
- * gen_pool_for_each_chunk - call func for every chunk of generic memory pool
- * @pool:	the generic memory pool
- * @func:	func to call
- * @data:	additional data used by @func
- *
- * Call @func for every chunk of generic memory pool.  The @func is
- * called with rcu_read_lock held.
- */
-void gen_pool_for_each_chunk(struct gen_pool *pool,
-	void (*func)(struct gen_pool *pool, struct gen_pool_chunk *chunk, void *data),
-	void *data)
-{
-	struct gen_pool_chunk *chunk;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(chunk, &(pool)->chunks, next_chunk)
-		func(pool, chunk, data);
-	rcu_read_unlock();
-}
-EXPORT_SYMBOL(gen_pool_for_each_chunk);
-
-/**
- * gen_pool_avail - get available free space of the pool
- * @pool: pool to get available free space
- *
- * Return available free space of the specified pool.
- */
-size_t gen_pool_avail(struct gen_pool *pool)
-{
-	struct gen_pool_chunk *chunk;
-	size_t avail = 0;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk)
-		avail += atomic_read(&chunk->avail);
-	rcu_read_unlock();
-	return avail;
-}
-EXPORT_SYMBOL_GPL(gen_pool_avail);
-
-/**
- * gen_pool_size - get size in bytes of memory managed by the pool
- * @pool: pool to get size
- *
- * Return size in bytes of memory managed by the pool.
- */
-size_t gen_pool_size(struct gen_pool *pool)
-{
-	struct gen_pool_chunk *chunk;
-	size_t size = 0;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk)
-		size += chunk->end_addr - chunk->start_addr;
-	rcu_read_unlock();
-	return size;
-}
-EXPORT_SYMBOL_GPL(gen_pool_size);
diff --git a/trunk/lib/idr.c b/trunk/lib/idr.c
index db040ce3fa73..e15502e8b21e 100644
--- a/trunk/lib/idr.c
+++ b/trunk/lib/idr.c
@@ -34,10 +34,8 @@
 #include <linux/err.h>
 #include <linux/string.h>
 #include <linux/idr.h>
-#include <linux/spinlock.h>
 
 static struct kmem_cache *idr_layer_cache;
-static DEFINE_SPINLOCK(simple_ida_lock);
 
 static struct idr_layer *get_from_free_list(struct idr *idp)
 {
@@ -927,71 +925,6 @@ void ida_destroy(struct ida *ida)
 }
 EXPORT_SYMBOL(ida_destroy);
 
-/**
- * ida_simple_get - get a new id.
- * @ida: the (initialized) ida.
- * @start: the minimum id (inclusive, < 0x8000000)
- * @end: the maximum id (exclusive, < 0x8000000 or 0)
- * @gfp_mask: memory allocation flags
- *
- * Allocates an id in the range start <= id < end, or returns -ENOSPC.
- * On memory allocation failure, returns -ENOMEM.
- *
- * Use ida_simple_remove() to get rid of an id.
- */
-int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
-		   gfp_t gfp_mask)
-{
-	int ret, id;
-	unsigned int max;
-
-	BUG_ON((int)start < 0);
-	BUG_ON((int)end < 0);
-
-	if (end == 0)
-		max = 0x80000000;
-	else {
-		BUG_ON(end < start);
-		max = end - 1;
-	}
-
-again:
-	if (!ida_pre_get(ida, gfp_mask))
-		return -ENOMEM;
-
-	spin_lock(&simple_ida_lock);
-	ret = ida_get_new_above(ida, start, &id);
-	if (!ret) {
-		if (id > max) {
-			ida_remove(ida, id);
-			ret = -ENOSPC;
-		} else {
-			ret = id;
-		}
-	}
-	spin_unlock(&simple_ida_lock);
-
-	if (unlikely(ret == -EAGAIN))
-		goto again;
-
-	return ret;
-}
-EXPORT_SYMBOL(ida_simple_get);
-
-/**
- * ida_simple_remove - remove an allocated id.
- * @ida: the (initialized) ida.
- * @id: the id returned by ida_simple_get.
- */
-void ida_simple_remove(struct ida *ida, unsigned int id)
-{
-	BUG_ON((int)id < 0);
-	spin_lock(&simple_ida_lock);
-	ida_remove(ida, id);
-	spin_unlock(&simple_ida_lock);
-}
-EXPORT_SYMBOL(ida_simple_remove);
-
 /**
  * ida_init - initialize ida handle
  * @ida:	ida handle
diff --git a/trunk/lib/llist.c b/trunk/lib/llist.c
deleted file mode 100644
index da445724fa1f..000000000000
--- a/trunk/lib/llist.c
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Lock-less NULL terminated single linked list
- *
- * The basic atomic operation of this list is cmpxchg on long.  On
- * architectures that don't have NMI-safe cmpxchg implementation, the
- * list can NOT be used in NMI handler.  So code uses the list in NMI
- * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
- *
- * Copyright 2010,2011 Intel Corp.
- *   Author: Huang Ying <ying.huang@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation;
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/llist.h>
-
-#include <asm/system.h>
-
-/**
- * llist_add - add a new entry
- * @new:	new entry to be added
- * @head:	the head for your lock-less list
- */
-void llist_add(struct llist_node *new, struct llist_head *head)
-{
-	struct llist_node *entry, *old_entry;
-
-#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
-	BUG_ON(in_nmi());
-#endif
-
-	entry = head->first;
-	do {
-		old_entry = entry;
-		new->next = entry;
-		cpu_relax();
-	} while ((entry = cmpxchg(&head->first, old_entry, new)) != old_entry);
-}
-EXPORT_SYMBOL_GPL(llist_add);
-
-/**
- * llist_add_batch - add several linked entries in batch
- * @new_first:	first entry in batch to be added
- * @new_last:	last entry in batch to be added
- * @head:	the head for your lock-less list
- */
-void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
-		     struct llist_head *head)
-{
-	struct llist_node *entry, *old_entry;
-
-#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
-	BUG_ON(in_nmi());
-#endif
-
-	entry = head->first;
-	do {
-		old_entry = entry;
-		new_last->next = entry;
-		cpu_relax();
-	} while ((entry = cmpxchg(&head->first, old_entry, new_first)) != old_entry);
-}
-EXPORT_SYMBOL_GPL(llist_add_batch);
-
-/**
- * llist_del_first - delete the first entry of lock-less list
- * @head:	the head for your lock-less list
- *
- * If list is empty, return NULL, otherwise, return the first entry
- * deleted, this is the newest added one.
- *
- * Only one llist_del_first user can be used simultaneously with
- * multiple llist_add users without lock.  Because otherwise
- * llist_del_first, llist_add, llist_add (or llist_del_all, llist_add,
- * llist_add) sequence in another user may change @head->first->next,
- * but keep @head->first.  If multiple consumers are needed, please
- * use llist_del_all or use lock between consumers.
- */
-struct llist_node *llist_del_first(struct llist_head *head)
-{
-	struct llist_node *entry, *old_entry, *next;
-
-#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
-	BUG_ON(in_nmi());
-#endif
-
-	entry = head->first;
-	do {
-		if (entry == NULL)
-			return NULL;
-		old_entry = entry;
-		next = entry->next;
-		cpu_relax();
-	} while ((entry = cmpxchg(&head->first, old_entry, next)) != old_entry);
-
-	return entry;
-}
-EXPORT_SYMBOL_GPL(llist_del_first);
-
-/**
- * llist_del_all - delete all entries from lock-less list
- * @head:	the head of lock-less list to delete all entries
- *
- * If list is empty, return NULL, otherwise, delete all entries and
- * return the pointer to the first entry.  The order of entries
- * deleted is from the newest to the oldest added one.
- */
-struct llist_node *llist_del_all(struct llist_head *head)
-{
-#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
-	BUG_ON(in_nmi());
-#endif
-
-	return xchg(&head->first, NULL);
-}
-EXPORT_SYMBOL_GPL(llist_del_all);
diff --git a/trunk/lib/radix-tree.c b/trunk/lib/radix-tree.c
index a2f9da59c197..7ea2e033d715 100644
--- a/trunk/lib/radix-tree.c
+++ b/trunk/lib/radix-tree.c
@@ -823,8 +823,8 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
 EXPORT_SYMBOL(radix_tree_prev_hole);
 
 static unsigned int
-__lookup(struct radix_tree_node *slot, void ***results, unsigned long *indices,
-	unsigned long index, unsigned int max_items, unsigned long *next_index)
+__lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
+	unsigned int max_items, unsigned long *next_index)
 {
 	unsigned int nr_found = 0;
 	unsigned int shift, height;
@@ -857,16 +857,12 @@ __lookup(struct radix_tree_node *slot, void ***results, unsigned long *indices,
 
 	/* Bottom level: grab some items */
 	for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
+		index++;
 		if (slot->slots[i]) {
-			results[nr_found] = &(slot->slots[i]);
-			if (indices)
-				indices[nr_found] = index;
-			if (++nr_found == max_items) {
-				index++;
+			results[nr_found++] = &(slot->slots[i]);
+			if (nr_found == max_items)
 				goto out;
-			}
 		}
-		index++;
 	}
 out:
 	*next_index = index;
@@ -922,8 +918,8 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
 
 		if (cur_index > max_index)
 			break;
-		slots_found = __lookup(node, (void ***)results + ret, NULL,
-				cur_index, max_items - ret, &next_index);
+		slots_found = __lookup(node, (void ***)results + ret, cur_index,
+					max_items - ret, &next_index);
 		nr_found = 0;
 		for (i = 0; i < slots_found; i++) {
 			struct radix_tree_node *slot;
@@ -948,7 +944,6 @@ EXPORT_SYMBOL(radix_tree_gang_lookup);
  *	radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree
  *	@root:		radix tree root
  *	@results:	where the results of the lookup are placed
- *	@indices:	where their indices should be placed (but usually NULL)
  *	@first_index:	start the lookup from this key
  *	@max_items:	place up to this many items at *results
  *
@@ -963,8 +958,7 @@ EXPORT_SYMBOL(radix_tree_gang_lookup);
  *	protection, radix_tree_deref_slot may fail requiring a retry.
  */
 unsigned int
-radix_tree_gang_lookup_slot(struct radix_tree_root *root,
-			void ***results, unsigned long *indices,
+radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
 			unsigned long first_index, unsigned int max_items)
 {
 	unsigned long max_index;
@@ -980,8 +974,6 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root,
 		if (first_index > 0)
 			return 0;
 		results[0] = (void **)&root->rnode;
-		if (indices)
-			indices[0] = 0;
 		return 1;
 	}
 	node = indirect_to_ptr(node);
@@ -995,9 +987,8 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root,
 
 		if (cur_index > max_index)
 			break;
-		slots_found = __lookup(node, results + ret,
-				indices ? indices + ret : NULL,
-				cur_index, max_items - ret, &next_index);
+		slots_found = __lookup(node, results + ret, cur_index,
+					max_items - ret, &next_index);
 		ret += slots_found;
 		if (next_index == 0)
 			break;
@@ -1203,98 +1194,6 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
 }
 EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
 
-#if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP)
-#include <linux/sched.h> /* for cond_resched() */
-
-/*
- * This linear search is at present only useful to shmem_unuse_inode().
- */
-static unsigned long __locate(struct radix_tree_node *slot, void *item,
-			      unsigned long index, unsigned long *found_index)
-{
-	unsigned int shift, height;
-	unsigned long i;
-
-	height = slot->height;
-	shift = (height-1) * RADIX_TREE_MAP_SHIFT;
-
-	for ( ; height > 1; height--) {
-		i = (index >> shift) & RADIX_TREE_MAP_MASK;
-		for (;;) {
-			if (slot->slots[i] != NULL)
-				break;
-			index &= ~((1UL << shift) - 1);
-			index += 1UL << shift;
-			if (index == 0)
-				goto out;	/* 32-bit wraparound */
-			i++;
-			if (i == RADIX_TREE_MAP_SIZE)
-				goto out;
-		}
-
-		shift -= RADIX_TREE_MAP_SHIFT;
-		slot = rcu_dereference_raw(slot->slots[i]);
-		if (slot == NULL)
-			goto out;
-	}
-
-	/* Bottom level: check items */
-	for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
-		if (slot->slots[i] == item) {
-			*found_index = index + i;
-			index = 0;
-			goto out;
-		}
-	}
-	index += RADIX_TREE_MAP_SIZE;
-out:
-	return index;
-}
-
-/**
- *	radix_tree_locate_item - search through radix tree for item
- *	@root:		radix tree root
- *	@item:		item to be found
- *
- *	Returns index where item was found, or -1 if not found.
- *	Caller must hold no lock (since this time-consuming function needs
- *	to be preemptible), and must check afterwards if item is still there.
- */
-unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
-{
-	struct radix_tree_node *node;
-	unsigned long max_index;
-	unsigned long cur_index = 0;
-	unsigned long found_index = -1;
-
-	do {
-		rcu_read_lock();
-		node = rcu_dereference_raw(root->rnode);
-		if (!radix_tree_is_indirect_ptr(node)) {
-			rcu_read_unlock();
-			if (node == item)
-				found_index = 0;
-			break;
-		}
-
-		node = indirect_to_ptr(node);
-		max_index = radix_tree_maxindex(node->height);
-		if (cur_index > max_index)
-			break;
-
-		cur_index = __locate(node, item, cur_index, &found_index);
-		rcu_read_unlock();
-		cond_resched();
-	} while (cur_index != 0 && cur_index <= max_index);
-
-	return found_index;
-}
-#else
-unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
-{
-	return -1;
-}
-#endif /* CONFIG_SHMEM && CONFIG_SWAP */
 
 /**
  *	radix_tree_shrink    -    shrink height of a radix tree to minimal
diff --git a/trunk/mm/failslab.c b/trunk/mm/failslab.c
index 0dd7b8fec71c..1ce58c201dca 100644
--- a/trunk/mm/failslab.c
+++ b/trunk/mm/failslab.c
@@ -34,23 +34,23 @@ __setup("failslab=", setup_failslab);
 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
 static int __init failslab_debugfs_init(void)
 {
-	struct dentry *dir;
 	mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
+	int err;
 
-	dir = fault_create_debugfs_attr("failslab", NULL, &failslab.attr);
-	if (IS_ERR(dir))
-		return PTR_ERR(dir);
+	err = init_fault_attr_dentries(&failslab.attr, "failslab");
+	if (err)
+		return err;
 
-	if (!debugfs_create_bool("ignore-gfp-wait", mode, dir,
+	if (!debugfs_create_bool("ignore-gfp-wait", mode, failslab.attr.dir,
 				&failslab.ignore_gfp_wait))
 		goto fail;
-	if (!debugfs_create_bool("cache-filter", mode, dir,
+	if (!debugfs_create_bool("cache-filter", mode, failslab.attr.dir,
 				&failslab.cache_filter))
 		goto fail;
 
 	return 0;
 fail:
-	debugfs_remove_recursive(dir);
+	cleanup_fault_attr_dentries(&failslab.attr);
 
 	return -ENOMEM;
 }
diff --git a/trunk/mm/filemap.c b/trunk/mm/filemap.c
index 645a080ba4df..867d40222ec7 100644
--- a/trunk/mm/filemap.c
+++ b/trunk/mm/filemap.c
@@ -33,6 +33,7 @@
 #include <linux/cpuset.h>
 #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
 #include <linux/memcontrol.h>
+#include <linux/mm_inline.h> /* for page_is_file_cache() */
 #include <linux/cleancache.h>
 #include "internal.h"
 
@@ -461,7 +462,6 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 	int error;
 
 	VM_BUG_ON(!PageLocked(page));
-	VM_BUG_ON(PageSwapBacked(page));
 
 	error = mem_cgroup_cache_charge(page, current->mm,
 					gfp_mask & GFP_RECLAIM_MASK);
@@ -479,6 +479,8 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 		if (likely(!error)) {
 			mapping->nrpages++;
 			__inc_zone_page_state(page, NR_FILE_PAGES);
+			if (PageSwapBacked(page))
+				__inc_zone_page_state(page, NR_SHMEM);
 			spin_unlock_irq(&mapping->tree_lock);
 		} else {
 			page->mapping = NULL;
@@ -500,9 +502,22 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
 {
 	int ret;
 
+	/*
+	 * Splice_read and readahead add shmem/tmpfs pages into the page cache
+	 * before shmem_readpage has a chance to mark them as SwapBacked: they
+	 * need to go on the anon lru below, and mem_cgroup_cache_charge
+	 * (called in add_to_page_cache) needs to know where they're going too.
+	 */
+	if (mapping_cap_swap_backed(mapping))
+		SetPageSwapBacked(page);
+
 	ret = add_to_page_cache(page, mapping, offset, gfp_mask);
-	if (ret == 0)
-		lru_cache_add_file(page);
+	if (ret == 0) {
+		if (page_is_file_cache(page))
+			lru_cache_add_file(page);
+		else
+			lru_cache_add_anon(page);
+	}
 	return ret;
 }
 EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
@@ -699,16 +714,9 @@ struct page *find_get_page(struct address_space *mapping, pgoff_t offset)
 		page = radix_tree_deref_slot(pagep);
 		if (unlikely(!page))
 			goto out;
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page))
-				goto repeat;
-			/*
-			 * Otherwise, shmem/tmpfs must be storing a swap entry
-			 * here as an exceptional entry: so return it without
-			 * attempting to raise page count.
-			 */
-			goto out;
-		}
+		if (radix_tree_deref_retry(page))
+			goto repeat;
+
 		if (!page_cache_get_speculative(page))
 			goto repeat;
 
@@ -745,7 +753,7 @@ struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
 
 repeat:
 	page = find_get_page(mapping, offset);
-	if (page && !radix_tree_exception(page)) {
+	if (page) {
 		lock_page(page);
 		/* Has the page been truncated? */
 		if (unlikely(page->mapping != mapping)) {
@@ -832,7 +840,7 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
 	rcu_read_lock();
 restart:
 	nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
-				(void ***)pages, NULL, start, nr_pages);
+				(void ***)pages, start, nr_pages);
 	ret = 0;
 	for (i = 0; i < nr_found; i++) {
 		struct page *page;
@@ -841,22 +849,13 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
 		if (unlikely(!page))
 			continue;
 
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page)) {
-				/*
-				 * Transient condition which can only trigger
-				 * when entry at index 0 moves out of or back
-				 * to root: none yet gotten, safe to restart.
-				 */
-				WARN_ON(start | i);
-				goto restart;
-			}
-			/*
-			 * Otherwise, shmem/tmpfs must be storing a swap entry
-			 * here as an exceptional entry: so skip over it -
-			 * we only reach this from invalidate_mapping_pages().
-			 */
-			continue;
+		/*
+		 * This can only trigger when the entry at index 0 moves out
+		 * of or back to the root: none yet gotten, safe to restart.
+		 */
+		if (radix_tree_deref_retry(page)) {
+			WARN_ON(start | i);
+			goto restart;
 		}
 
 		if (!page_cache_get_speculative(page))
@@ -904,7 +903,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
 	rcu_read_lock();
 restart:
 	nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
-				(void ***)pages, NULL, index, nr_pages);
+				(void ***)pages, index, nr_pages);
 	ret = 0;
 	for (i = 0; i < nr_found; i++) {
 		struct page *page;
@@ -913,22 +912,12 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
 		if (unlikely(!page))
 			continue;
 
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page)) {
-				/*
-				 * Transient condition which can only trigger
-				 * when entry at index 0 moves out of or back
-				 * to root: none yet gotten, safe to restart.
-				 */
-				goto restart;
-			}
-			/*
-			 * Otherwise, shmem/tmpfs must be storing a swap entry
-			 * here as an exceptional entry: so stop looking for
-			 * contiguous pages.
-			 */
-			break;
-		}
+		/*
+		 * This can only trigger when the entry at index 0 moves out
+		 * of or back to the root: none yet gotten, safe to restart.
+		 */
+		if (radix_tree_deref_retry(page))
+			goto restart;
 
 		if (!page_cache_get_speculative(page))
 			goto repeat;
@@ -988,21 +977,12 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
 		if (unlikely(!page))
 			continue;
 
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page)) {
-				/*
-				 * Transient condition which can only trigger
-				 * when entry at index 0 moves out of or back
-				 * to root: none yet gotten, safe to restart.
-				 */
-				goto restart;
-			}
-			/*
-			 * This function is never used on a shmem/tmpfs
-			 * mapping, so a swap entry won't be found here.
-			 */
-			BUG();
-		}
+		/*
+		 * This can only trigger when the entry at index 0 moves out
+		 * of or back to the root: none yet gotten, safe to restart.
+		 */
+		if (radix_tree_deref_retry(page))
+			goto restart;
 
 		if (!page_cache_get_speculative(page))
 			goto repeat;
diff --git a/trunk/mm/memcontrol.c b/trunk/mm/memcontrol.c
index f4ec4e7ca4cd..5f84d2351ddb 100644
--- a/trunk/mm/memcontrol.c
+++ b/trunk/mm/memcontrol.c
@@ -35,6 +35,7 @@
 #include <linux/limits.h>
 #include <linux/mutex.h>
 #include <linux/rbtree.h>
+#include <linux/shmem_fs.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
@@ -2872,6 +2873,30 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 		return 0;
 	if (PageCompound(page))
 		return 0;
+	/*
+	 * Corner case handling. This is called from add_to_page_cache()
+	 * in usual. But some FS (shmem) precharges this page before calling it
+	 * and call add_to_page_cache() with GFP_NOWAIT.
+	 *
+	 * For GFP_NOWAIT case, the page may be pre-charged before calling
+	 * add_to_page_cache(). (See shmem.c) check it here and avoid to call
+	 * charge twice. (It works but has to pay a bit larger cost.)
+	 * And when the page is SwapCache, it should take swap information
+	 * into account. This is under lock_page() now.
+	 */
+	if (!(gfp_mask & __GFP_WAIT)) {
+		struct page_cgroup *pc;
+
+		pc = lookup_page_cgroup(page);
+		if (!pc)
+			return 0;
+		lock_page_cgroup(pc);
+		if (PageCgroupUsed(pc)) {
+			unlock_page_cgroup(pc);
+			return 0;
+		}
+		unlock_page_cgroup(pc);
+	}
 
 	if (unlikely(!mm))
 		mm = &init_mm;
@@ -3461,6 +3486,31 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem,
 	cgroup_release_and_wakeup_rmdir(&mem->css);
 }
 
+/*
+ * A call to try to shrink memory usage on charge failure at shmem's swapin.
+ * Calling hierarchical_reclaim is not enough because we should update
+ * last_oom_jiffies to prevent pagefault_out_of_memory from invoking global OOM.
+ * Moreover considering hierarchy, we should reclaim from the mem_over_limit,
+ * not from the memcg which this page would be charged to.
+ * try_charge_swapin does all of these works properly.
+ */
+int mem_cgroup_shmem_charge_fallback(struct page *page,
+			    struct mm_struct *mm,
+			    gfp_t gfp_mask)
+{
+	struct mem_cgroup *mem;
+	int ret;
+
+	if (mem_cgroup_disabled())
+		return 0;
+
+	ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem);
+	if (!ret)
+		mem_cgroup_cancel_charge_swapin(mem); /* it does !mem check */
+
+	return ret;
+}
+
 #ifdef CONFIG_DEBUG_VM
 static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
 {
@@ -5280,17 +5330,15 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
 		pgoff = pte_to_pgoff(ptent);
 
 	/* page is moved even if it's not RSS of this task(page-faulted). */
-	page = find_get_page(mapping, pgoff);
-
-#ifdef CONFIG_SWAP
-	/* shmem/tmpfs may report page out on swap: account for that too. */
-	if (radix_tree_exceptional_entry(page)) {
-		swp_entry_t swap = radix_to_swp_entry(page);
+	if (!mapping_cap_swap_backed(mapping)) { /* normal file */
+		page = find_get_page(mapping, pgoff);
+	} else { /* shmem/tmpfs file. we should take account of swap too. */
+		swp_entry_t ent;
+		mem_cgroup_get_shmem_target(inode, pgoff, &page, &ent);
 		if (do_swap_account)
-			*entry = swap;
-		page = find_get_page(&swapper_space, swap.val);
+			entry->val = ent.val;
 	}
-#endif
+
 	return page;
 }
 
diff --git a/trunk/mm/memory-failure.c b/trunk/mm/memory-failure.c
index 2b43ba051ac9..740c4f52059c 100644
--- a/trunk/mm/memory-failure.c
+++ b/trunk/mm/memory-failure.c
@@ -53,7 +53,6 @@
 #include <linux/hugetlb.h>
 #include <linux/memory_hotplug.h>
 #include <linux/mm_inline.h>
-#include <linux/kfifo.h>
 #include "internal.h"
 
 int sysctl_memory_failure_early_kill __read_mostly = 0;
@@ -1179,97 +1178,6 @@ void memory_failure(unsigned long pfn, int trapno)
 	__memory_failure(pfn, trapno, 0);
 }
 
-#define MEMORY_FAILURE_FIFO_ORDER	4
-#define MEMORY_FAILURE_FIFO_SIZE	(1 << MEMORY_FAILURE_FIFO_ORDER)
-
-struct memory_failure_entry {
-	unsigned long pfn;
-	int trapno;
-	int flags;
-};
-
-struct memory_failure_cpu {
-	DECLARE_KFIFO(fifo, struct memory_failure_entry,
-		      MEMORY_FAILURE_FIFO_SIZE);
-	spinlock_t lock;
-	struct work_struct work;
-};
-
-static DEFINE_PER_CPU(struct memory_failure_cpu, memory_failure_cpu);
-
-/**
- * memory_failure_queue - Schedule handling memory failure of a page.
- * @pfn: Page Number of the corrupted page
- * @trapno: Trap number reported in the signal to user space.
- * @flags: Flags for memory failure handling
- *
- * This function is called by the low level hardware error handler
- * when it detects hardware memory corruption of a page. It schedules
- * the recovering of error page, including dropping pages, killing
- * processes etc.
- *
- * The function is primarily of use for corruptions that
- * happen outside the current execution context (e.g. when
- * detected by a background scrubber)
- *
- * Can run in IRQ context.
- */
-void memory_failure_queue(unsigned long pfn, int trapno, int flags)
-{
-	struct memory_failure_cpu *mf_cpu;
-	unsigned long proc_flags;
-	struct memory_failure_entry entry = {
-		.pfn =		pfn,
-		.trapno =	trapno,
-		.flags =	flags,
-	};
-
-	mf_cpu = &get_cpu_var(memory_failure_cpu);
-	spin_lock_irqsave(&mf_cpu->lock, proc_flags);
-	if (kfifo_put(&mf_cpu->fifo, &entry))
-		schedule_work_on(smp_processor_id(), &mf_cpu->work);
-	else
-		pr_err("Memory failure: buffer overflow when queuing memory failure at 0x%#lx\n",
-		       pfn);
-	spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
-	put_cpu_var(memory_failure_cpu);
-}
-EXPORT_SYMBOL_GPL(memory_failure_queue);
-
-static void memory_failure_work_func(struct work_struct *work)
-{
-	struct memory_failure_cpu *mf_cpu;
-	struct memory_failure_entry entry = { 0, };
-	unsigned long proc_flags;
-	int gotten;
-
-	mf_cpu = &__get_cpu_var(memory_failure_cpu);
-	for (;;) {
-		spin_lock_irqsave(&mf_cpu->lock, proc_flags);
-		gotten = kfifo_get(&mf_cpu->fifo, &entry);
-		spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
-		if (!gotten)
-			break;
-		__memory_failure(entry.pfn, entry.trapno, entry.flags);
-	}
-}
-
-static int __init memory_failure_init(void)
-{
-	struct memory_failure_cpu *mf_cpu;
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		mf_cpu = &per_cpu(memory_failure_cpu, cpu);
-		spin_lock_init(&mf_cpu->lock);
-		INIT_KFIFO(mf_cpu->fifo);
-		INIT_WORK(&mf_cpu->work, memory_failure_work_func);
-	}
-
-	return 0;
-}
-core_initcall(memory_failure_init);
-
 /**
  * unpoison_memory - Unpoison a previously poisoned page
  * @pfn: Page number of the to be unpoisoned page
diff --git a/trunk/mm/mincore.c b/trunk/mm/mincore.c
index 636a86876ff2..a4e6b9d75c76 100644
--- a/trunk/mm/mincore.c
+++ b/trunk/mm/mincore.c
@@ -69,15 +69,12 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
 	 * file will not get a swp_entry_t in its pte, but rather it is like
 	 * any other file mapping (ie. marked !present and faulted in with
 	 * tmpfs's .fault). So swapped out tmpfs mappings are tested here.
+	 *
+	 * However when tmpfs moves the page from pagecache and into swapcache,
+	 * it is still in core, but the find_get_page below won't find it.
+	 * No big deal, but make a note of it.
 	 */
 	page = find_get_page(mapping, pgoff);
-#ifdef CONFIG_SWAP
-	/* shmem/tmpfs may return swap: account for swapcache page too. */
-	if (radix_tree_exceptional_entry(page)) {
-		swp_entry_t swap = radix_to_swp_entry(page);
-		page = find_get_page(&swapper_space, swap.val);
-	}
-#endif
 	if (page) {
 		present = PageUptodate(page);
 		page_cache_release(page);
diff --git a/trunk/mm/oom_kill.c b/trunk/mm/oom_kill.c
index 626303b52f3c..eafff89b3dd6 100644
--- a/trunk/mm/oom_kill.c
+++ b/trunk/mm/oom_kill.c
@@ -303,7 +303,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 	do_each_thread(g, p) {
 		unsigned int points;
 
-		if (p->exit_state)
+		if (!p->mm)
 			continue;
 		if (oom_unkillable_task(p, mem, nodemask))
 			continue;
@@ -319,8 +319,6 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 		 */
 		if (test_tsk_thread_flag(p, TIF_MEMDIE))
 			return ERR_PTR(-1UL);
-		if (!p->mm)
-			continue;
 
 		if (p->flags & PF_EXITING) {
 			/*
diff --git a/trunk/mm/page_alloc.c b/trunk/mm/page_alloc.c
index 6e8ecb6e021c..1dbcf8888f14 100644
--- a/trunk/mm/page_alloc.c
+++ b/trunk/mm/page_alloc.c
@@ -1409,11 +1409,14 @@ static int __init fail_page_alloc_debugfs(void)
 {
 	mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
 	struct dentry *dir;
+	int err;
 
-	dir = fault_create_debugfs_attr("fail_page_alloc", NULL,
-					&fail_page_alloc.attr);
-	if (IS_ERR(dir))
-		return PTR_ERR(dir);
+	err = init_fault_attr_dentries(&fail_page_alloc.attr,
+				       "fail_page_alloc");
+	if (err)
+		return err;
+
+	dir = fail_page_alloc.attr.dir;
 
 	if (!debugfs_create_bool("ignore-gfp-wait", mode, dir,
 				&fail_page_alloc.ignore_gfp_wait))
@@ -1427,7 +1430,7 @@ static int __init fail_page_alloc_debugfs(void)
 
 	return 0;
 fail:
-	debugfs_remove_recursive(dir);
+	cleanup_fault_attr_dentries(&fail_page_alloc.attr);
 
 	return -ENOMEM;
 }
diff --git a/trunk/mm/shmem.c b/trunk/mm/shmem.c
index 32f6763f16fb..5cc21f8b4cd3 100644
--- a/trunk/mm/shmem.c
+++ b/trunk/mm/shmem.c
@@ -6,8 +6,7 @@
  *		 2000-2001 Christoph Rohland
  *		 2000-2001 SAP AG
  *		 2002 Red Hat Inc.
- * Copyright (C) 2002-2011 Hugh Dickins.
- * Copyright (C) 2011 Google Inc.
+ * Copyright (C) 2002-2005 Hugh Dickins.
  * Copyright (C) 2002-2005 VERITAS Software Corporation.
  * Copyright (C) 2004 Andi Kleen, SuSE Labs
  *
@@ -29,6 +28,7 @@
 #include <linux/file.h>
 #include <linux/mm.h>
 #include <linux/module.h>
+#include <linux/percpu_counter.h>
 #include <linux/swap.h>
 
 static struct vfsmount *shm_mnt;
@@ -51,8 +51,6 @@ static struct vfsmount *shm_mnt;
 #include <linux/shmem_fs.h>
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
-#include <linux/pagevec.h>
-#include <linux/percpu_counter.h>
 #include <linux/splice.h>
 #include <linux/security.h>
 #include <linux/swapops.h>
@@ -65,17 +63,43 @@ static struct vfsmount *shm_mnt;
 #include <linux/magic.h>
 
 #include <asm/uaccess.h>
+#include <asm/div64.h>
 #include <asm/pgtable.h>
 
+/*
+ * The maximum size of a shmem/tmpfs file is limited by the maximum size of
+ * its triple-indirect swap vector - see illustration at shmem_swp_entry().
+ *
+ * With 4kB page size, maximum file size is just over 2TB on a 32-bit kernel,
+ * but one eighth of that on a 64-bit kernel.  With 8kB page size, maximum
+ * file size is just over 4TB on a 64-bit kernel, but 16TB on a 32-bit kernel,
+ * MAX_LFS_FILESIZE being then more restrictive than swap vector layout.
+ *
+ * We use / and * instead of shifts in the definitions below, so that the swap
+ * vector can be tested with small even values (e.g. 20) for ENTRIES_PER_PAGE.
+ */
+#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
+#define ENTRIES_PER_PAGEPAGE ((unsigned long long)ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
+
+#define SHMSWP_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
+#define SHMSWP_MAX_BYTES (SHMSWP_MAX_INDEX << PAGE_CACHE_SHIFT)
+
+#define SHMEM_MAX_BYTES  min_t(unsigned long long, SHMSWP_MAX_BYTES, MAX_LFS_FILESIZE)
+#define SHMEM_MAX_INDEX  ((unsigned long)((SHMEM_MAX_BYTES+1) >> PAGE_CACHE_SHIFT))
+
 #define BLOCKS_PER_PAGE  (PAGE_CACHE_SIZE/512)
 #define VM_ACCT(size)    (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
 
+/* info->flags needs VM_flags to handle pagein/truncate races efficiently */
+#define SHMEM_PAGEIN	 VM_READ
+#define SHMEM_TRUNCATE	 VM_WRITE
+
+/* Definition to limit shmem_truncate's steps between cond_rescheds */
+#define LATENCY_LIMIT	 64
+
 /* Pretend that each entry is of this size in directory's i_size */
 #define BOGO_DIRENT_SIZE 20
 
-/* Symlink up to this size is kmalloc'ed instead of using a swappable page */
-#define SHORT_SYMLINK_LEN 128
-
 struct shmem_xattr {
 	struct list_head list;	/* anchored by shmem_inode_info->xattr_list */
 	char *name;		/* xattr name */
@@ -83,7 +107,7 @@ struct shmem_xattr {
 	char value[0];
 };
 
-/* Flag allocation requirements to shmem_getpage */
+/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
 enum sgp_type {
 	SGP_READ,	/* don't exceed i_size, don't allocate page */
 	SGP_CACHE,	/* don't exceed i_size, may allocate page */
@@ -113,6 +137,56 @@ static inline int shmem_getpage(struct inode *inode, pgoff_t index,
 			mapping_gfp_mask(inode->i_mapping), fault_type);
 }
 
+static inline struct page *shmem_dir_alloc(gfp_t gfp_mask)
+{
+	/*
+	 * The above definition of ENTRIES_PER_PAGE, and the use of
+	 * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE:
+	 * might be reconsidered if it ever diverges from PAGE_SIZE.
+	 *
+	 * Mobility flags are masked out as swap vectors cannot move
+	 */
+	return alloc_pages((gfp_mask & ~GFP_MOVABLE_MASK) | __GFP_ZERO,
+				PAGE_CACHE_SHIFT-PAGE_SHIFT);
+}
+
+static inline void shmem_dir_free(struct page *page)
+{
+	__free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT);
+}
+
+static struct page **shmem_dir_map(struct page *page)
+{
+	return (struct page **)kmap_atomic(page, KM_USER0);
+}
+
+static inline void shmem_dir_unmap(struct page **dir)
+{
+	kunmap_atomic(dir, KM_USER0);
+}
+
+static swp_entry_t *shmem_swp_map(struct page *page)
+{
+	return (swp_entry_t *)kmap_atomic(page, KM_USER1);
+}
+
+static inline void shmem_swp_balance_unmap(void)
+{
+	/*
+	 * When passing a pointer to an i_direct entry, to code which
+	 * also handles indirect entries and so will shmem_swp_unmap,
+	 * we must arrange for the preempt count to remain in balance.
+	 * What kmap_atomic of a lowmem page does depends on config
+	 * and architecture, so pretend to kmap_atomic some lowmem page.
+	 */
+	(void) kmap_atomic(ZERO_PAGE(0), KM_USER1);
+}
+
+static inline void shmem_swp_unmap(swp_entry_t *entry)
+{
+	kunmap_atomic(entry, KM_USER1);
+}
+
 static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
 {
 	return sb->s_fs_info;
@@ -170,6 +244,15 @@ static struct backing_dev_info shmem_backing_dev_info  __read_mostly = {
 static LIST_HEAD(shmem_swaplist);
 static DEFINE_MUTEX(shmem_swaplist_mutex);
 
+static void shmem_free_blocks(struct inode *inode, long pages)
+{
+	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+	if (sbinfo->max_blocks) {
+		percpu_counter_add(&sbinfo->used_blocks, -pages);
+		inode->i_blocks -= pages*BLOCKS_PER_PAGE;
+	}
+}
+
 static int shmem_reserve_inode(struct super_block *sb)
 {
 	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
@@ -196,7 +279,7 @@ static void shmem_free_inode(struct super_block *sb)
 }
 
 /**
- * shmem_recalc_inode - recalculate the block usage of an inode
+ * shmem_recalc_inode - recalculate the size of an inode
  * @inode: inode to recalc
  *
  * We have to calculate the free blocks since the mm can drop
@@ -214,297 +297,474 @@ static void shmem_recalc_inode(struct inode *inode)
 
 	freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
 	if (freed > 0) {
-		struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
-		if (sbinfo->max_blocks)
-			percpu_counter_add(&sbinfo->used_blocks, -freed);
 		info->alloced -= freed;
-		inode->i_blocks -= freed * BLOCKS_PER_PAGE;
 		shmem_unacct_blocks(info->flags, freed);
+		shmem_free_blocks(inode, freed);
 	}
 }
 
-/*
- * Replace item expected in radix tree by a new item, while holding tree lock.
+/**
+ * shmem_swp_entry - find the swap vector position in the info structure
+ * @info:  info structure for the inode
+ * @index: index of the page to find
+ * @page:  optional page to add to the structure. Has to be preset to
+ *         all zeros
+ *
+ * If there is no space allocated yet it will return NULL when
+ * page is NULL, else it will use the page for the needed block,
+ * setting it to NULL on return to indicate that it has been used.
+ *
+ * The swap vector is organized the following way:
+ *
+ * There are SHMEM_NR_DIRECT entries directly stored in the
+ * shmem_inode_info structure. So small files do not need an addional
+ * allocation.
+ *
+ * For pages with index > SHMEM_NR_DIRECT there is the pointer
+ * i_indirect which points to a page which holds in the first half
+ * doubly indirect blocks, in the second half triple indirect blocks:
+ *
+ * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the
+ * following layout (for SHMEM_NR_DIRECT == 16):
+ *
+ * i_indirect -> dir --> 16-19
+ * 	      |	     +-> 20-23
+ * 	      |
+ * 	      +-->dir2 --> 24-27
+ * 	      |	       +-> 28-31
+ * 	      |	       +-> 32-35
+ * 	      |	       +-> 36-39
+ * 	      |
+ * 	      +-->dir3 --> 40-43
+ * 	       	       +-> 44-47
+ * 	      	       +-> 48-51
+ * 	      	       +-> 52-55
  */
-static int shmem_radix_tree_replace(struct address_space *mapping,
-			pgoff_t index, void *expected, void *replacement)
-{
-	void **pslot;
-	void *item = NULL;
-
-	VM_BUG_ON(!expected);
-	pslot = radix_tree_lookup_slot(&mapping->page_tree, index);
-	if (pslot)
-		item = radix_tree_deref_slot_protected(pslot,
-							&mapping->tree_lock);
-	if (item != expected)
-		return -ENOENT;
-	if (replacement)
-		radix_tree_replace_slot(pslot, replacement);
-	else
-		radix_tree_delete(&mapping->page_tree, index);
-	return 0;
-}
+static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, struct page **page)
+{
+	unsigned long offset;
+	struct page **dir;
+	struct page *subdir;
 
-/*
- * Like add_to_page_cache_locked, but error if expected item has gone.
- */
-static int shmem_add_to_page_cache(struct page *page,
-				   struct address_space *mapping,
-				   pgoff_t index, gfp_t gfp, void *expected)
-{
-	int error = 0;
-
-	VM_BUG_ON(!PageLocked(page));
-	VM_BUG_ON(!PageSwapBacked(page));
-
-	if (!expected)
-		error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
-	if (!error) {
-		page_cache_get(page);
-		page->mapping = mapping;
-		page->index = index;
-
-		spin_lock_irq(&mapping->tree_lock);
-		if (!expected)
-			error = radix_tree_insert(&mapping->page_tree,
-							index, page);
-		else
-			error = shmem_radix_tree_replace(mapping, index,
-							expected, page);
-		if (!error) {
-			mapping->nrpages++;
-			__inc_zone_page_state(page, NR_FILE_PAGES);
-			__inc_zone_page_state(page, NR_SHMEM);
-			spin_unlock_irq(&mapping->tree_lock);
-		} else {
-			page->mapping = NULL;
-			spin_unlock_irq(&mapping->tree_lock);
-			page_cache_release(page);
+	if (index < SHMEM_NR_DIRECT) {
+		shmem_swp_balance_unmap();
+		return info->i_direct+index;
+	}
+	if (!info->i_indirect) {
+		if (page) {
+			info->i_indirect = *page;
+			*page = NULL;
 		}
-		if (!expected)
-			radix_tree_preload_end();
+		return NULL;			/* need another page */
 	}
-	if (error)
-		mem_cgroup_uncharge_cache_page(page);
-	return error;
+
+	index -= SHMEM_NR_DIRECT;
+	offset = index % ENTRIES_PER_PAGE;
+	index /= ENTRIES_PER_PAGE;
+	dir = shmem_dir_map(info->i_indirect);
+
+	if (index >= ENTRIES_PER_PAGE/2) {
+		index -= ENTRIES_PER_PAGE/2;
+		dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE;
+		index %= ENTRIES_PER_PAGE;
+		subdir = *dir;
+		if (!subdir) {
+			if (page) {
+				*dir = *page;
+				*page = NULL;
+			}
+			shmem_dir_unmap(dir);
+			return NULL;		/* need another page */
+		}
+		shmem_dir_unmap(dir);
+		dir = shmem_dir_map(subdir);
+	}
+
+	dir += index;
+	subdir = *dir;
+	if (!subdir) {
+		if (!page || !(subdir = *page)) {
+			shmem_dir_unmap(dir);
+			return NULL;		/* need a page */
+		}
+		*dir = subdir;
+		*page = NULL;
+	}
+	shmem_dir_unmap(dir);
+	return shmem_swp_map(subdir) + offset;
 }
 
-/*
- * Like delete_from_page_cache, but substitutes swap for page.
- */
-static void shmem_delete_from_page_cache(struct page *page, void *radswap)
+static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, unsigned long value)
 {
-	struct address_space *mapping = page->mapping;
-	int error;
+	long incdec = value? 1: -1;
 
-	spin_lock_irq(&mapping->tree_lock);
-	error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
-	page->mapping = NULL;
-	mapping->nrpages--;
-	__dec_zone_page_state(page, NR_FILE_PAGES);
-	__dec_zone_page_state(page, NR_SHMEM);
-	spin_unlock_irq(&mapping->tree_lock);
-	page_cache_release(page);
-	BUG_ON(error);
+	entry->val = value;
+	info->swapped += incdec;
+	if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT) {
+		struct page *page = kmap_atomic_to_page(entry);
+		set_page_private(page, page_private(page) + incdec);
+	}
 }
 
-/*
- * Like find_get_pages, but collecting swap entries as well as pages.
+/**
+ * shmem_swp_alloc - get the position of the swap entry for the page.
+ * @info:	info structure for the inode
+ * @index:	index of the page to find
+ * @sgp:	check and recheck i_size? skip allocation?
+ * @gfp:	gfp mask to use for any page allocation
+ *
+ * If the entry does not exist, allocate it.
  */
-static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping,
-					pgoff_t start, unsigned int nr_pages,
-					struct page **pages, pgoff_t *indices)
-{
-	unsigned int i;
-	unsigned int ret;
-	unsigned int nr_found;
-
-	rcu_read_lock();
-restart:
-	nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
-				(void ***)pages, indices, start, nr_pages);
-	ret = 0;
-	for (i = 0; i < nr_found; i++) {
-		struct page *page;
-repeat:
-		page = radix_tree_deref_slot((void **)pages[i]);
-		if (unlikely(!page))
-			continue;
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page))
-				goto restart;
-			/*
-			 * Otherwise, we must be storing a swap entry
-			 * here as an exceptional entry: so return it
-			 * without attempting to raise page count.
-			 */
-			goto export;
+static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info,
+			unsigned long index, enum sgp_type sgp, gfp_t gfp)
+{
+	struct inode *inode = &info->vfs_inode;
+	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+	struct page *page = NULL;
+	swp_entry_t *entry;
+
+	if (sgp != SGP_WRITE &&
+	    ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode))
+		return ERR_PTR(-EINVAL);
+
+	while (!(entry = shmem_swp_entry(info, index, &page))) {
+		if (sgp == SGP_READ)
+			return shmem_swp_map(ZERO_PAGE(0));
+		/*
+		 * Test used_blocks against 1 less max_blocks, since we have 1 data
+		 * page (and perhaps indirect index pages) yet to allocate:
+		 * a waste to allocate index if we cannot allocate data.
+		 */
+		if (sbinfo->max_blocks) {
+			if (percpu_counter_compare(&sbinfo->used_blocks,
+						sbinfo->max_blocks - 1) >= 0)
+				return ERR_PTR(-ENOSPC);
+			percpu_counter_inc(&sbinfo->used_blocks);
+			inode->i_blocks += BLOCKS_PER_PAGE;
 		}
-		if (!page_cache_get_speculative(page))
-			goto repeat;
 
-		/* Has the page moved? */
-		if (unlikely(page != *((void **)pages[i]))) {
-			page_cache_release(page);
-			goto repeat;
+		spin_unlock(&info->lock);
+		page = shmem_dir_alloc(gfp);
+		spin_lock(&info->lock);
+
+		if (!page) {
+			shmem_free_blocks(inode, 1);
+			return ERR_PTR(-ENOMEM);
 		}
-export:
-		indices[ret] = indices[i];
-		pages[ret] = page;
-		ret++;
-	}
-	if (unlikely(!ret && nr_found))
-		goto restart;
-	rcu_read_unlock();
-	return ret;
+		if (sgp != SGP_WRITE &&
+		    ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
+			entry = ERR_PTR(-EINVAL);
+			break;
+		}
+		if (info->next_index <= index)
+			info->next_index = index + 1;
+	}
+	if (page) {
+		/* another task gave its page, or truncated the file */
+		shmem_free_blocks(inode, 1);
+		shmem_dir_free(page);
+	}
+	if (info->next_index <= index && !IS_ERR(entry))
+		info->next_index = index + 1;
+	return entry;
 }
 
-/*
- * Remove swap entry from radix tree, free the swap and its page cache.
+/**
+ * shmem_free_swp - free some swap entries in a directory
+ * @dir:        pointer to the directory
+ * @edir:       pointer after last entry of the directory
+ * @punch_lock: pointer to spinlock when needed for the holepunch case
  */
-static int shmem_free_swap(struct address_space *mapping,
-			   pgoff_t index, void *radswap)
-{
-	int error;
-
-	spin_lock_irq(&mapping->tree_lock);
-	error = shmem_radix_tree_replace(mapping, index, radswap, NULL);
-	spin_unlock_irq(&mapping->tree_lock);
-	if (!error)
-		free_swap_and_cache(radix_to_swp_entry(radswap));
-	return error;
+static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir,
+						spinlock_t *punch_lock)
+{
+	spinlock_t *punch_unlock = NULL;
+	swp_entry_t *ptr;
+	int freed = 0;
+
+	for (ptr = dir; ptr < edir; ptr++) {
+		if (ptr->val) {
+			if (unlikely(punch_lock)) {
+				punch_unlock = punch_lock;
+				punch_lock = NULL;
+				spin_lock(punch_unlock);
+				if (!ptr->val)
+					continue;
+			}
+			free_swap_and_cache(*ptr);
+			*ptr = (swp_entry_t){0};
+			freed++;
+		}
+	}
+	if (punch_unlock)
+		spin_unlock(punch_unlock);
+	return freed;
+}
+
+static int shmem_map_and_free_swp(struct page *subdir, int offset,
+		int limit, struct page ***dir, spinlock_t *punch_lock)
+{
+	swp_entry_t *ptr;
+	int freed = 0;
+
+	ptr = shmem_swp_map(subdir);
+	for (; offset < limit; offset += LATENCY_LIMIT) {
+		int size = limit - offset;
+		if (size > LATENCY_LIMIT)
+			size = LATENCY_LIMIT;
+		freed += shmem_free_swp(ptr+offset, ptr+offset+size,
+							punch_lock);
+		if (need_resched()) {
+			shmem_swp_unmap(ptr);
+			if (*dir) {
+				shmem_dir_unmap(*dir);
+				*dir = NULL;
+			}
+			cond_resched();
+			ptr = shmem_swp_map(subdir);
+		}
+	}
+	shmem_swp_unmap(ptr);
+	return freed;
 }
 
-/*
- * Pagevec may contain swap entries, so shuffle up pages before releasing.
- */
-static void shmem_pagevec_release(struct pagevec *pvec)
+static void shmem_free_pages(struct list_head *next)
 {
-	int i, j;
-
-	for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
-		struct page *page = pvec->pages[i];
-		if (!radix_tree_exceptional_entry(page))
-			pvec->pages[j++] = page;
-	}
-	pvec->nr = j;
-	pagevec_release(pvec);
+	struct page *page;
+	int freed = 0;
+
+	do {
+		page = container_of(next, struct page, lru);
+		next = next->next;
+		shmem_dir_free(page);
+		freed++;
+		if (freed >= LATENCY_LIMIT) {
+			cond_resched();
+			freed = 0;
+		}
+	} while (next);
 }
 
-/*
- * Remove range of pages and swap entries from radix tree, and free them.
- */
-void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
+void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
 {
-	struct address_space *mapping = inode->i_mapping;
 	struct shmem_inode_info *info = SHMEM_I(inode);
-	pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-	unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
-	pgoff_t end = (lend >> PAGE_CACHE_SHIFT);
-	struct pagevec pvec;
-	pgoff_t indices[PAGEVEC_SIZE];
+	unsigned long idx;
+	unsigned long size;
+	unsigned long limit;
+	unsigned long stage;
+	unsigned long diroff;
+	struct page **dir;
+	struct page *topdir;
+	struct page *middir;
+	struct page *subdir;
+	swp_entry_t *ptr;
+	LIST_HEAD(pages_to_free);
+	long nr_pages_to_free = 0;
 	long nr_swaps_freed = 0;
-	pgoff_t index;
-	int i;
-
-	BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1));
-
-	pagevec_init(&pvec, 0);
-	index = start;
-	while (index <= end) {
-		pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
-			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
-							pvec.pages, indices);
-		if (!pvec.nr)
-			break;
-		mem_cgroup_uncharge_start();
-		for (i = 0; i < pagevec_count(&pvec); i++) {
-			struct page *page = pvec.pages[i];
+	int offset;
+	int freed;
+	int punch_hole;
+	spinlock_t *needs_lock;
+	spinlock_t *punch_lock;
+	unsigned long upper_limit;
 
-			index = indices[i];
-			if (index > end)
-				break;
+	truncate_inode_pages_range(inode->i_mapping, start, end);
 
-			if (radix_tree_exceptional_entry(page)) {
-				nr_swaps_freed += !shmem_free_swap(mapping,
-								index, page);
-				continue;
-			}
+	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+	idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	if (idx >= info->next_index)
+		return;
 
-			if (!trylock_page(page))
-				continue;
-			if (page->mapping == mapping) {
-				VM_BUG_ON(PageWriteback(page));
-				truncate_inode_page(mapping, page);
-			}
-			unlock_page(page);
+	spin_lock(&info->lock);
+	info->flags |= SHMEM_TRUNCATE;
+	if (likely(end == (loff_t) -1)) {
+		limit = info->next_index;
+		upper_limit = SHMEM_MAX_INDEX;
+		info->next_index = idx;
+		needs_lock = NULL;
+		punch_hole = 0;
+	} else {
+		if (end + 1 >= inode->i_size) {	/* we may free a little more */
+			limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >>
+							PAGE_CACHE_SHIFT;
+			upper_limit = SHMEM_MAX_INDEX;
+		} else {
+			limit = (end + 1) >> PAGE_CACHE_SHIFT;
+			upper_limit = limit;
 		}
-		shmem_pagevec_release(&pvec);
-		mem_cgroup_uncharge_end();
-		cond_resched();
-		index++;
+		needs_lock = &info->lock;
+		punch_hole = 1;
 	}
 
-	if (partial) {
-		struct page *page = NULL;
-		shmem_getpage(inode, start - 1, &page, SGP_READ, NULL);
-		if (page) {
-			zero_user_segment(page, partial, PAGE_CACHE_SIZE);
-			set_page_dirty(page);
-			unlock_page(page);
-			page_cache_release(page);
-		}
+	topdir = info->i_indirect;
+	if (topdir && idx <= SHMEM_NR_DIRECT && !punch_hole) {
+		info->i_indirect = NULL;
+		nr_pages_to_free++;
+		list_add(&topdir->lru, &pages_to_free);
 	}
+	spin_unlock(&info->lock);
 
-	index = start;
-	for ( ; ; ) {
-		cond_resched();
-		pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
-			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
-							pvec.pages, indices);
-		if (!pvec.nr) {
-			if (index == start)
-				break;
-			index = start;
-			continue;
-		}
-		if (index == start && indices[0] > end) {
-			shmem_pagevec_release(&pvec);
-			break;
-		}
-		mem_cgroup_uncharge_start();
-		for (i = 0; i < pagevec_count(&pvec); i++) {
-			struct page *page = pvec.pages[i];
+	if (info->swapped && idx < SHMEM_NR_DIRECT) {
+		ptr = info->i_direct;
+		size = limit;
+		if (size > SHMEM_NR_DIRECT)
+			size = SHMEM_NR_DIRECT;
+		nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock);
+	}
 
-			index = indices[i];
-			if (index > end)
-				break;
+	/*
+	 * If there are no indirect blocks or we are punching a hole
+	 * below indirect blocks, nothing to be done.
+	 */
+	if (!topdir || limit <= SHMEM_NR_DIRECT)
+		goto done2;
 
-			if (radix_tree_exceptional_entry(page)) {
-				nr_swaps_freed += !shmem_free_swap(mapping,
-								index, page);
-				continue;
+	/*
+	 * The truncation case has already dropped info->lock, and we're safe
+	 * because i_size and next_index have already been lowered, preventing
+	 * access beyond.  But in the punch_hole case, we still need to take
+	 * the lock when updating the swap directory, because there might be
+	 * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or
+	 * shmem_writepage.  However, whenever we find we can remove a whole
+	 * directory page (not at the misaligned start or end of the range),
+	 * we first NULLify its pointer in the level above, and then have no
+	 * need to take the lock when updating its contents: needs_lock and
+	 * punch_lock (either pointing to info->lock or NULL) manage this.
+	 */
+
+	upper_limit -= SHMEM_NR_DIRECT;
+	limit -= SHMEM_NR_DIRECT;
+	idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0;
+	offset = idx % ENTRIES_PER_PAGE;
+	idx -= offset;
+
+	dir = shmem_dir_map(topdir);
+	stage = ENTRIES_PER_PAGEPAGE/2;
+	if (idx < ENTRIES_PER_PAGEPAGE/2) {
+		middir = topdir;
+		diroff = idx/ENTRIES_PER_PAGE;
+	} else {
+		dir += ENTRIES_PER_PAGE/2;
+		dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE;
+		while (stage <= idx)
+			stage += ENTRIES_PER_PAGEPAGE;
+		middir = *dir;
+		if (*dir) {
+			diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) %
+				ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE;
+			if (!diroff && !offset && upper_limit >= stage) {
+				if (needs_lock) {
+					spin_lock(needs_lock);
+					*dir = NULL;
+					spin_unlock(needs_lock);
+					needs_lock = NULL;
+				} else
+					*dir = NULL;
+				nr_pages_to_free++;
+				list_add(&middir->lru, &pages_to_free);
 			}
+			shmem_dir_unmap(dir);
+			dir = shmem_dir_map(middir);
+		} else {
+			diroff = 0;
+			offset = 0;
+			idx = stage;
+		}
+	}
 
-			lock_page(page);
-			if (page->mapping == mapping) {
-				VM_BUG_ON(PageWriteback(page));
-				truncate_inode_page(mapping, page);
+	for (; idx < limit; idx += ENTRIES_PER_PAGE, diroff++) {
+		if (unlikely(idx == stage)) {
+			shmem_dir_unmap(dir);
+			dir = shmem_dir_map(topdir) +
+			    ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
+			while (!*dir) {
+				dir++;
+				idx += ENTRIES_PER_PAGEPAGE;
+				if (idx >= limit)
+					goto done1;
 			}
-			unlock_page(page);
+			stage = idx + ENTRIES_PER_PAGEPAGE;
+			middir = *dir;
+			if (punch_hole)
+				needs_lock = &info->lock;
+			if (upper_limit >= stage) {
+				if (needs_lock) {
+					spin_lock(needs_lock);
+					*dir = NULL;
+					spin_unlock(needs_lock);
+					needs_lock = NULL;
+				} else
+					*dir = NULL;
+				nr_pages_to_free++;
+				list_add(&middir->lru, &pages_to_free);
+			}
+			shmem_dir_unmap(dir);
+			cond_resched();
+			dir = shmem_dir_map(middir);
+			diroff = 0;
 		}
-		shmem_pagevec_release(&pvec);
-		mem_cgroup_uncharge_end();
-		index++;
+		punch_lock = needs_lock;
+		subdir = dir[diroff];
+		if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) {
+			if (needs_lock) {
+				spin_lock(needs_lock);
+				dir[diroff] = NULL;
+				spin_unlock(needs_lock);
+				punch_lock = NULL;
+			} else
+				dir[diroff] = NULL;
+			nr_pages_to_free++;
+			list_add(&subdir->lru, &pages_to_free);
+		}
+		if (subdir && page_private(subdir) /* has swap entries */) {
+			size = limit - idx;
+			if (size > ENTRIES_PER_PAGE)
+				size = ENTRIES_PER_PAGE;
+			freed = shmem_map_and_free_swp(subdir,
+					offset, size, &dir, punch_lock);
+			if (!dir)
+				dir = shmem_dir_map(middir);
+			nr_swaps_freed += freed;
+			if (offset || punch_lock) {
+				spin_lock(&info->lock);
+				set_page_private(subdir,
+					page_private(subdir) - freed);
+				spin_unlock(&info->lock);
+			} else
+				BUG_ON(page_private(subdir) != freed);
+		}
+		offset = 0;
+	}
+done1:
+	shmem_dir_unmap(dir);
+done2:
+	if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) {
+		/*
+		 * Call truncate_inode_pages again: racing shmem_unuse_inode
+		 * may have swizzled a page in from swap since
+		 * truncate_pagecache or generic_delete_inode did it, before we
+		 * lowered next_index.  Also, though shmem_getpage checks
+		 * i_size before adding to cache, no recheck after: so fix the
+		 * narrow window there too.
+		 */
+		truncate_inode_pages_range(inode->i_mapping, start, end);
 	}
 
 	spin_lock(&info->lock);
+	info->flags &= ~SHMEM_TRUNCATE;
 	info->swapped -= nr_swaps_freed;
+	if (nr_pages_to_free)
+		shmem_free_blocks(inode, nr_pages_to_free);
 	shmem_recalc_inode(inode);
 	spin_unlock(&info->lock);
 
-	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+	/*
+	 * Empty swap vector directory pages to be freed?
+	 */
+	if (!list_empty(&pages_to_free)) {
+		pages_to_free.prev->next = NULL;
+		shmem_free_pages(pages_to_free.next);
+	}
 }
 EXPORT_SYMBOL_GPL(shmem_truncate_range);
 
@@ -520,7 +780,37 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
 	if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
 		loff_t oldsize = inode->i_size;
 		loff_t newsize = attr->ia_size;
+		struct page *page = NULL;
 
+		if (newsize < oldsize) {
+			/*
+			 * If truncating down to a partial page, then
+			 * if that page is already allocated, hold it
+			 * in memory until the truncation is over, so
+			 * truncate_partial_page cannot miss it were
+			 * it assigned to swap.
+			 */
+			if (newsize & (PAGE_CACHE_SIZE-1)) {
+				(void) shmem_getpage(inode,
+					newsize >> PAGE_CACHE_SHIFT,
+						&page, SGP_READ, NULL);
+				if (page)
+					unlock_page(page);
+			}
+			/*
+			 * Reset SHMEM_PAGEIN flag so that shmem_truncate can
+			 * detect if any pages might have been added to cache
+			 * after truncate_inode_pages.  But we needn't bother
+			 * if it's being fully truncated to zero-length: the
+			 * nrpages check is efficient enough in that case.
+			 */
+			if (newsize) {
+				struct shmem_inode_info *info = SHMEM_I(inode);
+				spin_lock(&info->lock);
+				info->flags &= ~SHMEM_PAGEIN;
+				spin_unlock(&info->lock);
+			}
+		}
 		if (newsize != oldsize) {
 			i_size_write(inode, newsize);
 			inode->i_ctime = inode->i_mtime = CURRENT_TIME;
@@ -532,6 +822,8 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
 			/* unmap again to remove racily COWed private pages */
 			unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
 		}
+		if (page)
+			page_cache_release(page);
 	}
 
 	setattr_copy(inode, attr);
@@ -556,8 +848,7 @@ static void shmem_evict_inode(struct inode *inode)
 			list_del_init(&info->swaplist);
 			mutex_unlock(&shmem_swaplist_mutex);
 		}
-	} else
-		kfree(info->symlink);
+	}
 
 	list_for_each_entry_safe(xattr, nxattr, &info->xattr_list, list) {
 		kfree(xattr->name);
@@ -568,27 +859,106 @@ static void shmem_evict_inode(struct inode *inode)
 	end_writeback(inode);
 }
 
-/*
- * If swap found in inode, free it and move page from swapcache to filecache.
- */
-static int shmem_unuse_inode(struct shmem_inode_info *info,
-			     swp_entry_t swap, struct page *page)
+static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir)
 {
-	struct address_space *mapping = info->vfs_inode.i_mapping;
-	void *radswap;
-	pgoff_t index;
+	swp_entry_t *ptr;
+
+	for (ptr = dir; ptr < edir; ptr++) {
+		if (ptr->val == entry.val)
+			return ptr - dir;
+	}
+	return -1;
+}
+
+static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
+{
+	struct address_space *mapping;
+	unsigned long idx;
+	unsigned long size;
+	unsigned long limit;
+	unsigned long stage;
+	struct page **dir;
+	struct page *subdir;
+	swp_entry_t *ptr;
+	int offset;
 	int error;
 
-	radswap = swp_to_radix_entry(swap);
-	index = radix_tree_locate_item(&mapping->page_tree, radswap);
-	if (index == -1)
-		return 0;
+	idx = 0;
+	ptr = info->i_direct;
+	spin_lock(&info->lock);
+	if (!info->swapped) {
+		list_del_init(&info->swaplist);
+		goto lost2;
+	}
+	limit = info->next_index;
+	size = limit;
+	if (size > SHMEM_NR_DIRECT)
+		size = SHMEM_NR_DIRECT;
+	offset = shmem_find_swp(entry, ptr, ptr+size);
+	if (offset >= 0) {
+		shmem_swp_balance_unmap();
+		goto found;
+	}
+	if (!info->i_indirect)
+		goto lost2;
+
+	dir = shmem_dir_map(info->i_indirect);
+	stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2;
+
+	for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) {
+		if (unlikely(idx == stage)) {
+			shmem_dir_unmap(dir-1);
+			if (cond_resched_lock(&info->lock)) {
+				/* check it has not been truncated */
+				if (limit > info->next_index) {
+					limit = info->next_index;
+					if (idx >= limit)
+						goto lost2;
+				}
+			}
+			dir = shmem_dir_map(info->i_indirect) +
+			    ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
+			while (!*dir) {
+				dir++;
+				idx += ENTRIES_PER_PAGEPAGE;
+				if (idx >= limit)
+					goto lost1;
+			}
+			stage = idx + ENTRIES_PER_PAGEPAGE;
+			subdir = *dir;
+			shmem_dir_unmap(dir);
+			dir = shmem_dir_map(subdir);
+		}
+		subdir = *dir;
+		if (subdir && page_private(subdir)) {
+			ptr = shmem_swp_map(subdir);
+			size = limit - idx;
+			if (size > ENTRIES_PER_PAGE)
+				size = ENTRIES_PER_PAGE;
+			offset = shmem_find_swp(entry, ptr, ptr+size);
+			shmem_swp_unmap(ptr);
+			if (offset >= 0) {
+				shmem_dir_unmap(dir);
+				ptr = shmem_swp_map(subdir);
+				goto found;
+			}
+		}
+	}
+lost1:
+	shmem_dir_unmap(dir-1);
+lost2:
+	spin_unlock(&info->lock);
+	return 0;
+found:
+	idx += offset;
+	ptr += offset;
 
 	/*
 	 * Move _head_ to start search for next from here.
 	 * But be careful: shmem_evict_inode checks list_empty without taking
 	 * mutex, and there's an instant in list_move_tail when info->swaplist
-	 * would appear empty, if it were the only one on shmem_swaplist.
+	 * would appear empty, if it were the only one on shmem_swaplist.  We
+	 * could avoid doing it if inode NULL; or use this minor optimization.
 	 */
 	if (shmem_swaplist.next != &info->swaplist)
 		list_move_tail(&shmem_swaplist, &info->swaplist);
@@ -598,34 +968,29 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
 	 * but also to hold up shmem_evict_inode(): so inode cannot be freed
 	 * beneath us (pagelock doesn't help until the page is in pagecache).
 	 */
-	error = shmem_add_to_page_cache(page, mapping, index,
-						GFP_NOWAIT, radswap);
+	mapping = info->vfs_inode.i_mapping;
+	error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT);
 	/* which does mem_cgroup_uncharge_cache_page on error */
 
 	if (error != -ENOMEM) {
-		/*
-		 * Truncation and eviction use free_swap_and_cache(), which
-		 * only does trylock page: if we raced, best clean up here.
-		 */
 		delete_from_swap_cache(page);
 		set_page_dirty(page);
-		if (!error) {
-			spin_lock(&info->lock);
-			info->swapped--;
-			spin_unlock(&info->lock);
-			swap_free(swap);
-		}
+		info->flags |= SHMEM_PAGEIN;
+		shmem_swp_set(info, ptr, 0);
+		swap_free(entry);
 		error = 1;	/* not an error, but entry was found */
 	}
+	shmem_swp_unmap(ptr);
+	spin_unlock(&info->lock);
 	return error;
 }
 
 /*
- * Search through swapped inodes to find and replace swap by page.
+ * shmem_unuse() search for an eventually swapped out shmem page.
  */
-int shmem_unuse(swp_entry_t swap, struct page *page)
+int shmem_unuse(swp_entry_t entry, struct page *page)
 {
-	struct list_head *this, *next;
+	struct list_head *p, *next;
 	struct shmem_inode_info *info;
 	int found = 0;
 	int error;
@@ -634,25 +999,32 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
 	 * Charge page using GFP_KERNEL while we can wait, before taking
 	 * the shmem_swaplist_mutex which might hold up shmem_writepage().
 	 * Charged back to the user (not to caller) when swap account is used.
+	 * add_to_page_cache() will be called with GFP_NOWAIT.
 	 */
 	error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
 	if (error)
 		goto out;
-	/* No radix_tree_preload: swap entry keeps a place for page in tree */
+	/*
+	 * Try to preload while we can wait, to not make a habit of
+	 * draining atomic reserves; but don't latch on to this cpu,
+	 * it's okay if sometimes we get rescheduled after this.
+	 */
+	error = radix_tree_preload(GFP_KERNEL);
+	if (error)
+		goto uncharge;
+	radix_tree_preload_end();
 
 	mutex_lock(&shmem_swaplist_mutex);
-	list_for_each_safe(this, next, &shmem_swaplist) {
-		info = list_entry(this, struct shmem_inode_info, swaplist);
-		if (info->swapped)
-			found = shmem_unuse_inode(info, swap, page);
-		else
-			list_del_init(&info->swaplist);
+	list_for_each_safe(p, next, &shmem_swaplist) {
+		info = list_entry(p, struct shmem_inode_info, swaplist);
+		found = shmem_unuse_inode(info, entry, page);
 		cond_resched();
 		if (found)
 			break;
 	}
 	mutex_unlock(&shmem_swaplist_mutex);
 
+uncharge:
 	if (!found)
 		mem_cgroup_uncharge_cache_page(page);
 	if (found < 0)
@@ -669,10 +1041,10 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
 static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 {
 	struct shmem_inode_info *info;
+	swp_entry_t *entry, swap;
 	struct address_space *mapping;
+	unsigned long index;
 	struct inode *inode;
-	swp_entry_t swap;
-	pgoff_t index;
 
 	BUG_ON(!PageLocked(page));
 	mapping = page->mapping;
@@ -701,32 +1073,50 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 
 	/*
 	 * Add inode to shmem_unuse()'s list of swapped-out inodes,
-	 * if it's not already there.  Do it now before the page is
-	 * moved to swap cache, when its pagelock no longer protects
+	 * if it's not already there.  Do it now because we cannot take
+	 * mutex while holding spinlock, and must do so before the page
+	 * is moved to swap cache, when its pagelock no longer protects
 	 * the inode from eviction.  But don't unlock the mutex until
-	 * we've incremented swapped, because shmem_unuse_inode() will
-	 * prune a !swapped inode from the swaplist under this mutex.
+	 * we've taken the spinlock, because shmem_unuse_inode() will
+	 * prune a !swapped inode from the swaplist under both locks.
 	 */
 	mutex_lock(&shmem_swaplist_mutex);
 	if (list_empty(&info->swaplist))
 		list_add_tail(&info->swaplist, &shmem_swaplist);
 
+	spin_lock(&info->lock);
+	mutex_unlock(&shmem_swaplist_mutex);
+
+	if (index >= info->next_index) {
+		BUG_ON(!(info->flags & SHMEM_TRUNCATE));
+		goto unlock;
+	}
+	entry = shmem_swp_entry(info, index, NULL);
+	if (entry->val) {
+		WARN_ON_ONCE(1);	/* Still happens? Tell us about it! */
+		free_swap_and_cache(*entry);
+		shmem_swp_set(info, entry, 0);
+	}
+	shmem_recalc_inode(inode);
+
 	if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
+		delete_from_page_cache(page);
+		shmem_swp_set(info, entry, swap.val);
+		shmem_swp_unmap(entry);
 		swap_shmem_alloc(swap);
-		shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
-
-		spin_lock(&info->lock);
-		info->swapped++;
-		shmem_recalc_inode(inode);
 		spin_unlock(&info->lock);
-
-		mutex_unlock(&shmem_swaplist_mutex);
 		BUG_ON(page_mapped(page));
 		swap_writepage(page, wbc);
 		return 0;
 	}
 
-	mutex_unlock(&shmem_swaplist_mutex);
+	shmem_swp_unmap(entry);
+unlock:
+	spin_unlock(&info->lock);
+	/*
+	 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
+	 * clear SWAP_HAS_CACHE flag.
+	 */
 	swapcache_free(swap, NULL);
 redirty:
 	set_page_dirty(page);
@@ -763,33 +1153,35 @@ static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
 }
 #endif /* CONFIG_TMPFS */
 
-static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
-			struct shmem_inode_info *info, pgoff_t index)
+static struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp,
+			struct shmem_inode_info *info, unsigned long idx)
 {
 	struct mempolicy mpol, *spol;
 	struct vm_area_struct pvma;
+	struct page *page;
 
 	spol = mpol_cond_copy(&mpol,
-			mpol_shared_policy_lookup(&info->policy, index));
+				mpol_shared_policy_lookup(&info->policy, idx));
 
 	/* Create a pseudo vma that just contains the policy */
 	pvma.vm_start = 0;
-	pvma.vm_pgoff = index;
+	pvma.vm_pgoff = idx;
 	pvma.vm_ops = NULL;
 	pvma.vm_policy = spol;
-	return swapin_readahead(swap, gfp, &pvma, 0);
+	page = swapin_readahead(entry, gfp, &pvma, 0);
+	return page;
 }
 
 static struct page *shmem_alloc_page(gfp_t gfp,
-			struct shmem_inode_info *info, pgoff_t index)
+			struct shmem_inode_info *info, unsigned long idx)
 {
 	struct vm_area_struct pvma;
 
 	/* Create a pseudo vma that just contains the policy */
 	pvma.vm_start = 0;
-	pvma.vm_pgoff = index;
+	pvma.vm_pgoff = idx;
 	pvma.vm_ops = NULL;
-	pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
+	pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx);
 
 	/*
 	 * alloc_page_vma() will drop the shared policy reference
@@ -798,19 +1190,19 @@ static struct page *shmem_alloc_page(gfp_t gfp,
 }
 #else /* !CONFIG_NUMA */
 #ifdef CONFIG_TMPFS
-static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
+static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *p)
 {
 }
 #endif /* CONFIG_TMPFS */
 
-static inline struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
-			struct shmem_inode_info *info, pgoff_t index)
+static inline struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp,
+			struct shmem_inode_info *info, unsigned long idx)
 {
-	return swapin_readahead(swap, gfp, NULL, 0);
+	return swapin_readahead(entry, gfp, NULL, 0);
 }
 
 static inline struct page *shmem_alloc_page(gfp_t gfp,
-			struct shmem_inode_info *info, pgoff_t index)
+			struct shmem_inode_info *info, unsigned long idx)
 {
 	return alloc_page(gfp);
 }
@@ -830,190 +1222,243 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
  * vm. If we swap it in we mark it dirty since we also free the swap
  * entry since a page cannot live in both the swap and page cache
  */
-static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
+static int shmem_getpage_gfp(struct inode *inode, pgoff_t idx,
 	struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type)
 {
 	struct address_space *mapping = inode->i_mapping;
-	struct shmem_inode_info *info;
+	struct shmem_inode_info *info = SHMEM_I(inode);
 	struct shmem_sb_info *sbinfo;
 	struct page *page;
+	struct page *prealloc_page = NULL;
+	swp_entry_t *entry;
 	swp_entry_t swap;
 	int error;
-	int once = 0;
+	int ret;
 
-	if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
+	if (idx >= SHMEM_MAX_INDEX)
 		return -EFBIG;
 repeat:
-	swap.val = 0;
-	page = find_lock_page(mapping, index);
-	if (radix_tree_exceptional_entry(page)) {
-		swap = radix_to_swp_entry(page);
-		page = NULL;
-	}
-
-	if (sgp != SGP_WRITE &&
-	    ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
-		error = -EINVAL;
-		goto failed;
-	}
-
-	if (page || (sgp == SGP_READ && !swap.val)) {
+	page = find_lock_page(mapping, idx);
+	if (page) {
 		/*
 		 * Once we can get the page lock, it must be uptodate:
 		 * if there were an error in reading back from swap,
 		 * the page would not be inserted into the filecache.
 		 */
-		BUG_ON(page && !PageUptodate(page));
-		*pagep = page;
-		return 0;
+		BUG_ON(!PageUptodate(page));
+		goto done;
 	}
 
 	/*
-	 * Fast cache lookup did not find it:
-	 * bring it back from swap or allocate.
+	 * Try to preload while we can wait, to not make a habit of
+	 * draining atomic reserves; but don't latch on to this cpu.
 	 */
-	info = SHMEM_I(inode);
-	sbinfo = SHMEM_SB(inode->i_sb);
+	error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
+	if (error)
+		goto out;
+	radix_tree_preload_end();
+
+	if (sgp != SGP_READ && !prealloc_page) {
+		prealloc_page = shmem_alloc_page(gfp, info, idx);
+		if (prealloc_page) {
+			SetPageSwapBacked(prealloc_page);
+			if (mem_cgroup_cache_charge(prealloc_page,
+					current->mm, GFP_KERNEL)) {
+				page_cache_release(prealloc_page);
+				prealloc_page = NULL;
+			}
+		}
+	}
+
+	spin_lock(&info->lock);
+	shmem_recalc_inode(inode);
+	entry = shmem_swp_alloc(info, idx, sgp, gfp);
+	if (IS_ERR(entry)) {
+		spin_unlock(&info->lock);
+		error = PTR_ERR(entry);
+		goto out;
+	}
+	swap = *entry;
 
 	if (swap.val) {
 		/* Look it up and read it in.. */
 		page = lookup_swap_cache(swap);
 		if (!page) {
+			shmem_swp_unmap(entry);
+			spin_unlock(&info->lock);
 			/* here we actually do the io */
 			if (fault_type)
 				*fault_type |= VM_FAULT_MAJOR;
-			page = shmem_swapin(swap, gfp, info, index);
+			page = shmem_swapin(swap, gfp, info, idx);
 			if (!page) {
-				error = -ENOMEM;
-				goto failed;
+				spin_lock(&info->lock);
+				entry = shmem_swp_alloc(info, idx, sgp, gfp);
+				if (IS_ERR(entry))
+					error = PTR_ERR(entry);
+				else {
+					if (entry->val == swap.val)
+						error = -ENOMEM;
+					shmem_swp_unmap(entry);
+				}
+				spin_unlock(&info->lock);
+				if (error)
+					goto out;
+				goto repeat;
 			}
+			wait_on_page_locked(page);
+			page_cache_release(page);
+			goto repeat;
 		}
 
 		/* We have to do this with page locked to prevent races */
-		lock_page(page);
+		if (!trylock_page(page)) {
+			shmem_swp_unmap(entry);
+			spin_unlock(&info->lock);
+			wait_on_page_locked(page);
+			page_cache_release(page);
+			goto repeat;
+		}
+		if (PageWriteback(page)) {
+			shmem_swp_unmap(entry);
+			spin_unlock(&info->lock);
+			wait_on_page_writeback(page);
+			unlock_page(page);
+			page_cache_release(page);
+			goto repeat;
+		}
 		if (!PageUptodate(page)) {
+			shmem_swp_unmap(entry);
+			spin_unlock(&info->lock);
+			unlock_page(page);
+			page_cache_release(page);
 			error = -EIO;
-			goto failed;
-		}
-		wait_on_page_writeback(page);
-
-		/* Someone may have already done it for us */
-		if (page->mapping) {
-			if (page->mapping == mapping &&
-			    page->index == index)
-				goto done;
-			error = -EEXIST;
-			goto failed;
+			goto out;
 		}
 
-		error = mem_cgroup_cache_charge(page, current->mm,
-						gfp & GFP_RECLAIM_MASK);
-		if (!error)
-			error = shmem_add_to_page_cache(page, mapping, index,
-						gfp, swp_to_radix_entry(swap));
-		if (error)
-			goto failed;
-
-		spin_lock(&info->lock);
-		info->swapped--;
-		shmem_recalc_inode(inode);
-		spin_unlock(&info->lock);
+		error = add_to_page_cache_locked(page, mapping,
+						 idx, GFP_NOWAIT);
+		if (error) {
+			shmem_swp_unmap(entry);
+			spin_unlock(&info->lock);
+			if (error == -ENOMEM) {
+				/*
+				 * reclaim from proper memory cgroup and
+				 * call memcg's OOM if needed.
+				 */
+				error = mem_cgroup_shmem_charge_fallback(
+						page, current->mm, gfp);
+				if (error) {
+					unlock_page(page);
+					page_cache_release(page);
+					goto out;
+				}
+			}
+			unlock_page(page);
+			page_cache_release(page);
+			goto repeat;
+		}
 
+		info->flags |= SHMEM_PAGEIN;
+		shmem_swp_set(info, entry, 0);
+		shmem_swp_unmap(entry);
 		delete_from_swap_cache(page);
+		spin_unlock(&info->lock);
 		set_page_dirty(page);
 		swap_free(swap);
 
-	} else {
-		if (shmem_acct_block(info->flags)) {
-			error = -ENOSPC;
-			goto failed;
+	} else if (sgp == SGP_READ) {
+		shmem_swp_unmap(entry);
+		page = find_get_page(mapping, idx);
+		if (page && !trylock_page(page)) {
+			spin_unlock(&info->lock);
+			wait_on_page_locked(page);
+			page_cache_release(page);
+			goto repeat;
 		}
+		spin_unlock(&info->lock);
+
+	} else if (prealloc_page) {
+		shmem_swp_unmap(entry);
+		sbinfo = SHMEM_SB(inode->i_sb);
 		if (sbinfo->max_blocks) {
 			if (percpu_counter_compare(&sbinfo->used_blocks,
-						sbinfo->max_blocks) >= 0) {
-				error = -ENOSPC;
-				goto unacct;
-			}
+						sbinfo->max_blocks) >= 0 ||
+			    shmem_acct_block(info->flags))
+				goto nospace;
 			percpu_counter_inc(&sbinfo->used_blocks);
+			inode->i_blocks += BLOCKS_PER_PAGE;
+		} else if (shmem_acct_block(info->flags))
+			goto nospace;
+
+		page = prealloc_page;
+		prealloc_page = NULL;
+
+		entry = shmem_swp_alloc(info, idx, sgp, gfp);
+		if (IS_ERR(entry))
+			error = PTR_ERR(entry);
+		else {
+			swap = *entry;
+			shmem_swp_unmap(entry);
 		}
-
-		page = shmem_alloc_page(gfp, info, index);
-		if (!page) {
-			error = -ENOMEM;
-			goto decused;
+		ret = error || swap.val;
+		if (ret)
+			mem_cgroup_uncharge_cache_page(page);
+		else
+			ret = add_to_page_cache_lru(page, mapping,
+						idx, GFP_NOWAIT);
+		/*
+		 * At add_to_page_cache_lru() failure,
+		 * uncharge will be done automatically.
+		 */
+		if (ret) {
+			shmem_unacct_blocks(info->flags, 1);
+			shmem_free_blocks(inode, 1);
+			spin_unlock(&info->lock);
+			page_cache_release(page);
+			if (error)
+				goto out;
+			goto repeat;
 		}
 
-		SetPageSwapBacked(page);
-		__set_page_locked(page);
-		error = mem_cgroup_cache_charge(page, current->mm,
-						gfp & GFP_RECLAIM_MASK);
-		if (!error)
-			error = shmem_add_to_page_cache(page, mapping, index,
-						gfp, NULL);
-		if (error)
-			goto decused;
-		lru_cache_add_anon(page);
-
-		spin_lock(&info->lock);
+		info->flags |= SHMEM_PAGEIN;
 		info->alloced++;
-		inode->i_blocks += BLOCKS_PER_PAGE;
-		shmem_recalc_inode(inode);
 		spin_unlock(&info->lock);
-
 		clear_highpage(page);
 		flush_dcache_page(page);
 		SetPageUptodate(page);
 		if (sgp == SGP_DIRTY)
 			set_page_dirty(page);
+
+	} else {
+		spin_unlock(&info->lock);
+		error = -ENOMEM;
+		goto out;
 	}
 done:
-	/* Perhaps the file has been truncated since we checked */
-	if (sgp != SGP_WRITE &&
-	    ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
-		error = -EINVAL;
-		goto trunc;
-	}
 	*pagep = page;
-	return 0;
+	error = 0;
+out:
+	if (prealloc_page) {
+		mem_cgroup_uncharge_cache_page(prealloc_page);
+		page_cache_release(prealloc_page);
+	}
+	return error;
 
+nospace:
 	/*
-	 * Error recovery.
+	 * Perhaps the page was brought in from swap between find_lock_page
+	 * and taking info->lock?  We allow for that at add_to_page_cache_lru,
+	 * but must also avoid reporting a spurious ENOSPC while working on a
+	 * full tmpfs.
 	 */
-trunc:
-	ClearPageDirty(page);
-	delete_from_page_cache(page);
-	spin_lock(&info->lock);
-	info->alloced--;
-	inode->i_blocks -= BLOCKS_PER_PAGE;
+	page = find_get_page(mapping, idx);
 	spin_unlock(&info->lock);
-decused:
-	if (sbinfo->max_blocks)
-		percpu_counter_add(&sbinfo->used_blocks, -1);
-unacct:
-	shmem_unacct_blocks(info->flags, 1);
-failed:
-	if (swap.val && error != -EINVAL) {
-		struct page *test = find_get_page(mapping, index);
-		if (test && !radix_tree_exceptional_entry(test))
-			page_cache_release(test);
-		/* Have another try if the entry has changed */
-		if (test != swp_to_radix_entry(swap))
-			error = -EEXIST;
-	}
 	if (page) {
-		unlock_page(page);
 		page_cache_release(page);
-	}
-	if (error == -ENOSPC && !once++) {
-		info = SHMEM_I(inode);
-		spin_lock(&info->lock);
-		shmem_recalc_inode(inode);
-		spin_unlock(&info->lock);
 		goto repeat;
 	}
-	if (error == -EEXIST)
-		goto repeat;
-	return error;
+	error = -ENOSPC;
+	goto out;
 }
 
 static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -1022,6 +1467,9 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	int error;
 	int ret = VM_FAULT_LOCKED;
 
+	if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
+		return VM_FAULT_SIGBUS;
+
 	error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
 	if (error)
 		return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
@@ -1034,20 +1482,20 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 }
 
 #ifdef CONFIG_NUMA
-static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
+static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
 {
-	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
-	return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol);
+	struct inode *i = vma->vm_file->f_path.dentry->d_inode;
+	return mpol_set_shared_policy(&SHMEM_I(i)->policy, vma, new);
 }
 
 static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
 					  unsigned long addr)
 {
-	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
-	pgoff_t index;
+	struct inode *i = vma->vm_file->f_path.dentry->d_inode;
+	unsigned long idx;
 
-	index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
-	return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index);
+	idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+	return mpol_shared_policy_lookup(&SHMEM_I(i)->policy, idx);
 }
 #endif
 
@@ -1145,7 +1593,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
 
 #ifdef CONFIG_TMPFS
 static const struct inode_operations shmem_symlink_inode_operations;
-static const struct inode_operations shmem_short_symlink_operations;
+static const struct inode_operations shmem_symlink_inline_operations;
 
 static int
 shmem_write_begin(struct file *file, struct address_space *mapping,
@@ -1178,8 +1626,7 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
 {
 	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct address_space *mapping = inode->i_mapping;
-	pgoff_t index;
-	unsigned long offset;
+	unsigned long index, offset;
 	enum sgp_type sgp = SGP_READ;
 
 	/*
@@ -1195,8 +1642,7 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
 
 	for (;;) {
 		struct page *page = NULL;
-		pgoff_t end_index;
-		unsigned long nr, ret;
+		unsigned long end_index, nr, ret;
 		loff_t i_size = i_size_read(inode);
 
 		end_index = i_size >> PAGE_CACHE_SHIFT;
@@ -1434,9 +1880,8 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_namelen = NAME_MAX;
 	if (sbinfo->max_blocks) {
 		buf->f_blocks = sbinfo->max_blocks;
-		buf->f_bavail =
-		buf->f_bfree  = sbinfo->max_blocks -
-				percpu_counter_sum(&sbinfo->used_blocks);
+		buf->f_bavail = buf->f_bfree =
+				sbinfo->max_blocks - percpu_counter_sum(&sbinfo->used_blocks);
 	}
 	if (sbinfo->max_inodes) {
 		buf->f_files = sbinfo->max_inodes;
@@ -1610,13 +2055,10 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
 
 	info = SHMEM_I(inode);
 	inode->i_size = len-1;
-	if (len <= SHORT_SYMLINK_LEN) {
-		info->symlink = kmemdup(symname, len, GFP_KERNEL);
-		if (!info->symlink) {
-			iput(inode);
-			return -ENOMEM;
-		}
-		inode->i_op = &shmem_short_symlink_operations;
+	if (len <= SHMEM_SYMLINK_INLINE_LEN) {
+		/* do it inline */
+		memcpy(info->inline_symlink, symname, len);
+		inode->i_op = &shmem_symlink_inline_operations;
 	} else {
 		error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
 		if (error) {
@@ -1639,17 +2081,17 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
 	return 0;
 }
 
-static void *shmem_follow_short_symlink(struct dentry *dentry, struct nameidata *nd)
+static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
 {
-	nd_set_link(nd, SHMEM_I(dentry->d_inode)->symlink);
+	nd_set_link(nd, SHMEM_I(dentry->d_inode)->inline_symlink);
 	return NULL;
 }
 
 static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	struct page *page = NULL;
-	int error = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
-	nd_set_link(nd, error ? ERR_PTR(error) : kmap(page));
+	int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
+	nd_set_link(nd, res ? ERR_PTR(res) : kmap(page));
 	if (page)
 		unlock_page(page);
 	return page;
@@ -1760,6 +2202,7 @@ static int shmem_xattr_set(struct dentry *dentry, const char *name,
 	return err;
 }
 
+
 static const struct xattr_handler *shmem_xattr_handlers[] = {
 #ifdef CONFIG_TMPFS_POSIX_ACL
 	&generic_acl_access_handler,
@@ -1889,9 +2332,9 @@ static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
 }
 #endif /* CONFIG_TMPFS_XATTR */
 
-static const struct inode_operations shmem_short_symlink_operations = {
+static const struct inode_operations shmem_symlink_inline_operations = {
 	.readlink	= generic_readlink,
-	.follow_link	= shmem_follow_short_symlink,
+	.follow_link	= shmem_follow_link_inline,
 #ifdef CONFIG_TMPFS_XATTR
 	.setxattr	= shmem_setxattr,
 	.getxattr	= shmem_getxattr,
@@ -2091,7 +2534,8 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
 	if (config.max_inodes < inodes)
 		goto out;
 	/*
-	 * Those tests disallow limited->unlimited while any are in use;
+	 * Those tests also disallow limited->unlimited while any are in
+	 * use, so i_blocks will always be zero when max_blocks is zero;
 	 * but we must separately disallow unlimited->limited, because
 	 * in that case we have no record of how much is already in use.
 	 */
@@ -2183,7 +2627,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
 		goto failed;
 	sbinfo->free_inodes = sbinfo->max_inodes;
 
-	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	sb->s_maxbytes = SHMEM_MAX_BYTES;
 	sb->s_blocksize = PAGE_CACHE_SIZE;
 	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
 	sb->s_magic = TMPFS_MAGIC;
@@ -2218,14 +2662,14 @@ static struct kmem_cache *shmem_inode_cachep;
 
 static struct inode *shmem_alloc_inode(struct super_block *sb)
 {
-	struct shmem_inode_info *info;
-	info = kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
-	if (!info)
+	struct shmem_inode_info *p;
+	p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
+	if (!p)
 		return NULL;
-	return &info->vfs_inode;
+	return &p->vfs_inode;
 }
 
-static void shmem_destroy_callback(struct rcu_head *head)
+static void shmem_i_callback(struct rcu_head *head)
 {
 	struct inode *inode = container_of(head, struct inode, i_rcu);
 	INIT_LIST_HEAD(&inode->i_dentry);
@@ -2234,26 +2678,29 @@ static void shmem_destroy_callback(struct rcu_head *head)
 
 static void shmem_destroy_inode(struct inode *inode)
 {
-	if ((inode->i_mode & S_IFMT) == S_IFREG)
+	if ((inode->i_mode & S_IFMT) == S_IFREG) {
+		/* only struct inode is valid if it's an inline symlink */
 		mpol_free_shared_policy(&SHMEM_I(inode)->policy);
-	call_rcu(&inode->i_rcu, shmem_destroy_callback);
+	}
+	call_rcu(&inode->i_rcu, shmem_i_callback);
 }
 
-static void shmem_init_inode(void *foo)
+static void init_once(void *foo)
 {
-	struct shmem_inode_info *info = foo;
-	inode_init_once(&info->vfs_inode);
+	struct shmem_inode_info *p = (struct shmem_inode_info *) foo;
+
+	inode_init_once(&p->vfs_inode);
 }
 
-static int shmem_init_inodecache(void)
+static int init_inodecache(void)
 {
 	shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
 				sizeof(struct shmem_inode_info),
-				0, SLAB_PANIC, shmem_init_inode);
+				0, SLAB_PANIC, init_once);
 	return 0;
 }
 
-static void shmem_destroy_inodecache(void)
+static void destroy_inodecache(void)
 {
 	kmem_cache_destroy(shmem_inode_cachep);
 }
@@ -2350,20 +2797,21 @@ static const struct vm_operations_struct shmem_vm_ops = {
 #endif
 };
 
+
 static struct dentry *shmem_mount(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data)
 {
 	return mount_nodev(fs_type, flags, data, shmem_fill_super);
 }
 
-static struct file_system_type shmem_fs_type = {
+static struct file_system_type tmpfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "tmpfs",
 	.mount		= shmem_mount,
 	.kill_sb	= kill_litter_super,
 };
 
-int __init shmem_init(void)
+int __init init_tmpfs(void)
 {
 	int error;
 
@@ -2371,18 +2819,18 @@ int __init shmem_init(void)
 	if (error)
 		goto out4;
 
-	error = shmem_init_inodecache();
+	error = init_inodecache();
 	if (error)
 		goto out3;
 
-	error = register_filesystem(&shmem_fs_type);
+	error = register_filesystem(&tmpfs_fs_type);
 	if (error) {
 		printk(KERN_ERR "Could not register tmpfs\n");
 		goto out2;
 	}
 
-	shm_mnt = vfs_kern_mount(&shmem_fs_type, MS_NOUSER,
-				 shmem_fs_type.name, NULL);
+	shm_mnt = vfs_kern_mount(&tmpfs_fs_type, MS_NOUSER,
+				tmpfs_fs_type.name, NULL);
 	if (IS_ERR(shm_mnt)) {
 		error = PTR_ERR(shm_mnt);
 		printk(KERN_ERR "Could not kern_mount tmpfs\n");
@@ -2391,9 +2839,9 @@ int __init shmem_init(void)
 	return 0;
 
 out1:
-	unregister_filesystem(&shmem_fs_type);
+	unregister_filesystem(&tmpfs_fs_type);
 out2:
-	shmem_destroy_inodecache();
+	destroy_inodecache();
 out3:
 	bdi_destroy(&shmem_backing_dev_info);
 out4:
@@ -2401,6 +2849,45 @@ int __init shmem_init(void)
 	return error;
 }
 
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+/**
+ * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file
+ * @inode: the inode to be searched
+ * @pgoff: the offset to be searched
+ * @pagep: the pointer for the found page to be stored
+ * @ent: the pointer for the found swap entry to be stored
+ *
+ * If a page is found, refcount of it is incremented. Callers should handle
+ * these refcount.
+ */
+void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
+					struct page **pagep, swp_entry_t *ent)
+{
+	swp_entry_t entry = { .val = 0 }, *ptr;
+	struct page *page = NULL;
+	struct shmem_inode_info *info = SHMEM_I(inode);
+
+	if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
+		goto out;
+
+	spin_lock(&info->lock);
+	ptr = shmem_swp_entry(info, pgoff, NULL);
+#ifdef CONFIG_SWAP
+	if (ptr && ptr->val) {
+		entry.val = ptr->val;
+		page = find_get_page(&swapper_space, entry.val);
+	} else
+#endif
+		page = find_get_page(inode->i_mapping, pgoff);
+	if (ptr)
+		shmem_swp_unmap(ptr);
+	spin_unlock(&info->lock);
+out:
+	*pagep = page;
+	*ent = entry;
+}
+#endif
+
 #else /* !CONFIG_SHMEM */
 
 /*
@@ -2414,23 +2901,23 @@ int __init shmem_init(void)
 
 #include <linux/ramfs.h>
 
-static struct file_system_type shmem_fs_type = {
+static struct file_system_type tmpfs_fs_type = {
 	.name		= "tmpfs",
 	.mount		= ramfs_mount,
 	.kill_sb	= kill_litter_super,
 };
 
-int __init shmem_init(void)
+int __init init_tmpfs(void)
 {
-	BUG_ON(register_filesystem(&shmem_fs_type) != 0);
+	BUG_ON(register_filesystem(&tmpfs_fs_type) != 0);
 
-	shm_mnt = kern_mount(&shmem_fs_type);
+	shm_mnt = kern_mount(&tmpfs_fs_type);
 	BUG_ON(IS_ERR(shm_mnt));
 
 	return 0;
 }
 
-int shmem_unuse(swp_entry_t swap, struct page *page)
+int shmem_unuse(swp_entry_t entry, struct page *page)
 {
 	return 0;
 }
@@ -2440,17 +2927,43 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
 	return 0;
 }
 
-void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
+void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
 {
-	truncate_inode_pages_range(inode->i_mapping, lstart, lend);
+	truncate_inode_pages_range(inode->i_mapping, start, end);
 }
 EXPORT_SYMBOL_GPL(shmem_truncate_range);
 
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+/**
+ * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file
+ * @inode: the inode to be searched
+ * @pgoff: the offset to be searched
+ * @pagep: the pointer for the found page to be stored
+ * @ent: the pointer for the found swap entry to be stored
+ *
+ * If a page is found, refcount of it is incremented. Callers should handle
+ * these refcount.
+ */
+void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
+					struct page **pagep, swp_entry_t *ent)
+{
+	struct page *page = NULL;
+
+	if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
+		goto out;
+	page = find_get_page(inode->i_mapping, pgoff);
+out:
+	*pagep = page;
+	*ent = (swp_entry_t){ .val = 0 };
+}
+#endif
+
 #define shmem_vm_ops				generic_file_vm_ops
 #define shmem_file_operations			ramfs_file_operations
 #define shmem_get_inode(sb, dir, mode, dev, flags)	ramfs_get_inode(sb, dir, mode, dev)
 #define shmem_acct_size(flags, size)		0
 #define shmem_unacct_size(flags, size)		do {} while (0)
+#define SHMEM_MAX_BYTES				MAX_LFS_FILESIZE
 
 #endif /* CONFIG_SHMEM */
 
@@ -2474,7 +2987,7 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
 	if (IS_ERR(shm_mnt))
 		return (void *)shm_mnt;
 
-	if (size < 0 || size > MAX_LFS_FILESIZE)
+	if (size < 0 || size > SHMEM_MAX_BYTES)
 		return ERR_PTR(-EINVAL);
 
 	if (shmem_acct_size(flags, size))
diff --git a/trunk/mm/slab.c b/trunk/mm/slab.c
index 6d90a091fdca..95947400702b 100644
--- a/trunk/mm/slab.c
+++ b/trunk/mm/slab.c
@@ -622,51 +622,6 @@ int slab_is_available(void)
 static struct lock_class_key on_slab_l3_key;
 static struct lock_class_key on_slab_alc_key;
 
-static struct lock_class_key debugobj_l3_key;
-static struct lock_class_key debugobj_alc_key;
-
-static void slab_set_lock_classes(struct kmem_cache *cachep,
-		struct lock_class_key *l3_key, struct lock_class_key *alc_key,
-		int q)
-{
-	struct array_cache **alc;
-	struct kmem_list3 *l3;
-	int r;
-
-	l3 = cachep->nodelists[q];
-	if (!l3)
-		return;
-
-	lockdep_set_class(&l3->list_lock, l3_key);
-	alc = l3->alien;
-	/*
-	 * FIXME: This check for BAD_ALIEN_MAGIC
-	 * should go away when common slab code is taught to
-	 * work even without alien caches.
-	 * Currently, non NUMA code returns BAD_ALIEN_MAGIC
-	 * for alloc_alien_cache,
-	 */
-	if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
-		return;
-	for_each_node(r) {
-		if (alc[r])
-			lockdep_set_class(&alc[r]->lock, alc_key);
-	}
-}
-
-static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
-{
-	slab_set_lock_classes(cachep, &debugobj_l3_key, &debugobj_alc_key, node);
-}
-
-static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
-{
-	int node;
-
-	for_each_online_node(node)
-		slab_set_debugobj_lock_classes_node(cachep, node);
-}
-
 static void init_node_lock_keys(int q)
 {
 	struct cache_sizes *s = malloc_sizes;
@@ -675,14 +630,29 @@ static void init_node_lock_keys(int q)
 		return;
 
 	for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
+		struct array_cache **alc;
 		struct kmem_list3 *l3;
+		int r;
 
 		l3 = s->cs_cachep->nodelists[q];
 		if (!l3 || OFF_SLAB(s->cs_cachep))
 			continue;
-
-		slab_set_lock_classes(s->cs_cachep, &on_slab_l3_key,
-				&on_slab_alc_key, q);
+		lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
+		alc = l3->alien;
+		/*
+		 * FIXME: This check for BAD_ALIEN_MAGIC
+		 * should go away when common slab code is taught to
+		 * work even without alien caches.
+		 * Currently, non NUMA code returns BAD_ALIEN_MAGIC
+		 * for alloc_alien_cache,
+		 */
+		if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
+			continue;
+		for_each_node(r) {
+			if (alc[r])
+				lockdep_set_class(&alc[r]->lock,
+					&on_slab_alc_key);
+		}
 	}
 }
 
@@ -701,14 +671,6 @@ static void init_node_lock_keys(int q)
 static inline void init_lock_keys(void)
 {
 }
-
-static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
-{
-}
-
-static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
-{
-}
 #endif
 
 /*
@@ -1302,8 +1264,6 @@ static int __cpuinit cpuup_prepare(long cpu)
 		spin_unlock_irq(&l3->list_lock);
 		kfree(shared);
 		free_alien_cache(alien);
-		if (cachep->flags & SLAB_DEBUG_OBJECTS)
-			slab_set_debugobj_lock_classes_node(cachep, node);
 	}
 	init_node_lock_keys(node);
 
@@ -1666,9 +1626,6 @@ void __init kmem_cache_init_late(void)
 {
 	struct kmem_cache *cachep;
 
-	/* Annotate slab for lockdep -- annotate the malloc caches */
-	init_lock_keys();
-
 	/* 6) resize the head arrays to their final sizes */
 	mutex_lock(&cache_chain_mutex);
 	list_for_each_entry(cachep, &cache_chain, next)
@@ -1679,6 +1636,9 @@ void __init kmem_cache_init_late(void)
 	/* Done! */
 	g_cpucache_up = FULL;
 
+	/* Annotate slab for lockdep -- annotate the malloc caches */
+	init_lock_keys();
+
 	/*
 	 * Register a cpu startup notifier callback that initializes
 	 * cpu_cache_get for all new cpus
@@ -2466,16 +2426,6 @@ kmem_cache_create (const char *name, size_t size, size_t align,
 		goto oops;
 	}
 
-	if (flags & SLAB_DEBUG_OBJECTS) {
-		/*
-		 * Would deadlock through slab_destroy()->call_rcu()->
-		 * debug_object_activate()->kmem_cache_alloc().
-		 */
-		WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU);
-
-		slab_set_debugobj_lock_classes(cachep);
-	}
-
 	/* cache setup completed, link it into the list */
 	list_add(&cachep->next, &cache_chain);
 oops:
diff --git a/trunk/mm/swapfile.c b/trunk/mm/swapfile.c
index 17bc224bce68..1b8c33907242 100644
--- a/trunk/mm/swapfile.c
+++ b/trunk/mm/swapfile.c
@@ -1924,24 +1924,20 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
 
 	/*
 	 * Find out how many pages are allowed for a single swap
-	 * device. There are three limiting factors: 1) the number
-	 * of bits for the swap offset in the swp_entry_t type, and
-	 * 2) the number of bits in the swap pte as defined by the
-	 * the different architectures, and 3) the number of free bits
-	 * in an exceptional radix_tree entry. In order to find the
-	 * largest possible bit mask, a swap entry with swap type 0
+	 * device. There are two limiting factors: 1) the number of
+	 * bits for the swap offset in the swp_entry_t type and
+	 * 2) the number of bits in the a swap pte as defined by
+	 * the different architectures. In order to find the
+	 * largest possible bit mask a swap entry with swap type 0
 	 * and swap offset ~0UL is created, encoded to a swap pte,
-	 * decoded to a swp_entry_t again, and finally the swap
+	 * decoded to a swp_entry_t again and finally the swap
 	 * offset is extracted. This will mask all the bits from
 	 * the initial ~0UL mask that can't be encoded in either
 	 * the swp_entry_t or the architecture definition of a
-	 * swap pte.  Then the same is done for a radix_tree entry.
+	 * swap pte.
 	 */
 	maxpages = swp_offset(pte_to_swp_entry(
-			swp_entry_to_pte(swp_entry(0, ~0UL))));
-	maxpages = swp_offset(radix_to_swp_entry(
-			swp_to_radix_entry(swp_entry(0, maxpages)))) + 1;
-
+			swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
 	if (maxpages > swap_header->info.last_page) {
 		maxpages = swap_header->info.last_page + 1;
 		/* p->max is an unsigned int: don't overflow it */
diff --git a/trunk/mm/truncate.c b/trunk/mm/truncate.c
index b40ac6d4e86e..232eb2736a79 100644
--- a/trunk/mm/truncate.c
+++ b/trunk/mm/truncate.c
@@ -336,14 +336,6 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
 	unsigned long count = 0;
 	int i;
 
-	/*
-	 * Note: this function may get called on a shmem/tmpfs mapping:
-	 * pagevec_lookup() might then return 0 prematurely (because it
-	 * got a gangful of swap entries); but it's hardly worth worrying
-	 * about - it can rarely have anything to free from such a mapping
-	 * (most pages are dirty), and already skips over any difficulties.
-	 */
-
 	pagevec_init(&pvec, 0);
 	while (index <= end && pagevec_lookup(&pvec, mapping, index,
 			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
diff --git a/trunk/sound/core/pcm_compat.c b/trunk/sound/core/pcm_compat.c
index 91cdf9435fec..5fb2e28e796f 100644
--- a/trunk/sound/core/pcm_compat.c
+++ b/trunk/sound/core/pcm_compat.c
@@ -342,7 +342,7 @@ static int snd_pcm_ioctl_xfern_compat(struct snd_pcm_substream *substream,
 			kfree(bufs);
 			return -EFAULT;
 		}
-		bufs[i] = compat_ptr(ptr);
+		bufs[ch] = compat_ptr(ptr);
 		bufptr++;
 	}
 	if (dir == SNDRV_PCM_STREAM_PLAYBACK)
diff --git a/trunk/sound/core/rtctimer.c b/trunk/sound/core/rtctimer.c
index e85e72baff9e..0851cd13e303 100644
--- a/trunk/sound/core/rtctimer.c
+++ b/trunk/sound/core/rtctimer.c
@@ -22,7 +22,7 @@
 
 #include <linux/init.h>
 #include <linux/interrupt.h>
-#include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/log2.h>
 #include <sound/core.h>
 #include <sound/timer.h>
diff --git a/trunk/sound/pci/asihpi/hpidspcd.c b/trunk/sound/pci/asihpi/hpidspcd.c
index 71d32c868c92..3a7afa31c1d8 100644
--- a/trunk/sound/pci/asihpi/hpidspcd.c
+++ b/trunk/sound/pci/asihpi/hpidspcd.c
@@ -43,7 +43,6 @@ short hpi_dsp_code_open(u32 adapter, void *os_data, struct dsp_code *dsp_code,
 	struct pci_dev *dev = os_data;
 	struct code_header header;
 	char fw_name[20];
-	short err_ret = HPI_ERROR_DSP_FILE_NOT_FOUND;
 	int err;
 
 	sprintf(fw_name, "asihpi/dsp%04x.bin", adapter);
@@ -86,10 +85,8 @@ short hpi_dsp_code_open(u32 adapter, void *os_data, struct dsp_code *dsp_code,
 
 	HPI_DEBUG_LOG(DEBUG, "dsp code %s opened\n", fw_name);
 	dsp_code->pvt = kmalloc(sizeof(*dsp_code->pvt), GFP_KERNEL);
-	if (!dsp_code->pvt) {
-		err_ret = HPI_ERROR_MEMORY_ALLOC;
-		goto error2;
-	}
+	if (!dsp_code->pvt)
+		return HPI_ERROR_MEMORY_ALLOC;
 
 	dsp_code->pvt->dev = dev;
 	dsp_code->pvt->firmware = firmware;
@@ -102,7 +99,7 @@ short hpi_dsp_code_open(u32 adapter, void *os_data, struct dsp_code *dsp_code,
 	release_firmware(firmware);
 error1:
 	dsp_code->block_length = 0;
-	return err_ret;
+	return HPI_ERROR_DSP_FILE_NOT_FOUND;
 }
 
 /*-------------------------------------------------------------------*/
diff --git a/trunk/sound/pci/asihpi/hpioctl.c b/trunk/sound/pci/asihpi/hpioctl.c
index a32502e796de..9683f84ecdc8 100644
--- a/trunk/sound/pci/asihpi/hpioctl.c
+++ b/trunk/sound/pci/asihpi/hpioctl.c
@@ -177,21 +177,16 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	} else {
 		u16 __user *ptr = NULL;
 		u32 size = 0;
-		u32 adapter_present;
+
 		/* -1=no data 0=read from user mem, 1=write to user mem */
 		int wrflag = -1;
-		struct hpi_adapter *pa;
-
-		if (hm->h.adapter_index < HPI_MAX_ADAPTERS) {
-			pa = &adapters[hm->h.adapter_index];
-			adapter_present = pa->type;
-		} else {
-			adapter_present = 0;
-		}
+		u32 adapter = hm->h.adapter_index;
+		struct hpi_adapter *pa = &adapters[adapter];
 
-		if (!adapter_present) {
-			hpi_init_response(&hr->r0, hm->h.object,
-				hm->h.function, HPI_ERROR_BAD_ADAPTER_NUMBER);
+		if ((adapter >= HPI_MAX_ADAPTERS) || (!pa->type)) {
+			hpi_init_response(&hr->r0, HPI_OBJ_ADAPTER,
+				HPI_ADAPTER_OPEN,
+				HPI_ERROR_BAD_ADAPTER_NUMBER);
 
 			uncopied_bytes =
 				copy_to_user(puhr, hr, sizeof(hr->h));
diff --git a/trunk/sound/pci/rme9652/hdspm.c b/trunk/sound/pci/rme9652/hdspm.c
index 6edc67ced905..af130ee0c45d 100644
--- a/trunk/sound/pci/rme9652/hdspm.c
+++ b/trunk/sound/pci/rme9652/hdspm.c
@@ -521,7 +521,6 @@ MODULE_SUPPORTED_DEVICE("{{RME HDSPM-MADI}}");
 #define HDSPM_DMA_AREA_KILOBYTES (HDSPM_DMA_AREA_BYTES/1024)
 
 /* revisions >= 230 indicate AES32 card */
-#define HDSPM_MADI_ANCIENT_REV	204
 #define HDSPM_MADI_OLD_REV	207
 #define HDSPM_MADI_REV		210
 #define HDSPM_RAYDAT_REV	211
@@ -1218,22 +1217,6 @@ static int hdspm_external_sample_rate(struct hdspm *hdspm)
 				rate = 0;
 				break;
 			}
-
-			/* QS and DS rates normally can not be detected
-			 * automatically by the card. Only exception is MADI
-			 * in 96k frame mode.
-			 *
-			 * So if we read SS values (32 .. 48k), check for
-			 * user-provided DS/QS bits in the control register
-			 * and multiply the base frequency accordingly.
-			 */
-			if (rate <= 48000) {
-				if (hdspm->control_register & HDSPM_QuadSpeed)
-					rate *= 4;
-				else if (hdspm->control_register &
-						HDSPM_DoubleSpeed)
-					rate *= 2;
-			}
 		}
 		break;
 	}
@@ -3432,91 +3415,6 @@ static int snd_hdspm_put_qs_wire(struct snd_kcontrol *kcontrol,
 	return change;
 }
 
-#define HDSPM_MADI_SPEEDMODE(xname, xindex) \
-{	.iface = SNDRV_CTL_ELEM_IFACE_MIXER, \
-	.name = xname, \
-	.index = xindex, \
-	.info = snd_hdspm_info_madi_speedmode, \
-	.get = snd_hdspm_get_madi_speedmode, \
-	.put = snd_hdspm_put_madi_speedmode \
-}
-
-static int hdspm_madi_speedmode(struct hdspm *hdspm)
-{
-	if (hdspm->control_register & HDSPM_QuadSpeed)
-		return 2;
-	if (hdspm->control_register & HDSPM_DoubleSpeed)
-		return 1;
-	return 0;
-}
-
-static int hdspm_set_madi_speedmode(struct hdspm *hdspm, int mode)
-{
-	hdspm->control_register &= ~(HDSPM_DoubleSpeed | HDSPM_QuadSpeed);
-	switch (mode) {
-	case 0:
-		break;
-	case 1:
-		hdspm->control_register |= HDSPM_DoubleSpeed;
-		break;
-	case 2:
-		hdspm->control_register |= HDSPM_QuadSpeed;
-		break;
-	}
-	hdspm_write(hdspm, HDSPM_controlRegister, hdspm->control_register);
-
-	return 0;
-}
-
-static int snd_hdspm_info_madi_speedmode(struct snd_kcontrol *kcontrol,
-				       struct snd_ctl_elem_info *uinfo)
-{
-	static char *texts[] = { "Single", "Double", "Quad" };
-
-	uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
-	uinfo->count = 1;
-	uinfo->value.enumerated.items = 3;
-
-	if (uinfo->value.enumerated.item >= uinfo->value.enumerated.items)
-		uinfo->value.enumerated.item =
-		    uinfo->value.enumerated.items - 1;
-	strcpy(uinfo->value.enumerated.name,
-	       texts[uinfo->value.enumerated.item]);
-
-	return 0;
-}
-
-static int snd_hdspm_get_madi_speedmode(struct snd_kcontrol *kcontrol,
-				      struct snd_ctl_elem_value *ucontrol)
-{
-	struct hdspm *hdspm = snd_kcontrol_chip(kcontrol);
-
-	spin_lock_irq(&hdspm->lock);
-	ucontrol->value.enumerated.item[0] = hdspm_madi_speedmode(hdspm);
-	spin_unlock_irq(&hdspm->lock);
-	return 0;
-}
-
-static int snd_hdspm_put_madi_speedmode(struct snd_kcontrol *kcontrol,
-				      struct snd_ctl_elem_value *ucontrol)
-{
-	struct hdspm *hdspm = snd_kcontrol_chip(kcontrol);
-	int change;
-	int val;
-
-	if (!snd_hdspm_use_is_exclusive(hdspm))
-		return -EBUSY;
-	val = ucontrol->value.integer.value[0];
-	if (val < 0)
-		val = 0;
-	if (val > 2)
-		val = 2;
-	spin_lock_irq(&hdspm->lock);
-	change = val != hdspm_madi_speedmode(hdspm);
-	hdspm_set_madi_speedmode(hdspm, val);
-	spin_unlock_irq(&hdspm->lock);
-	return change;
-}
 
 #define HDSPM_MIXER(xname, xindex) \
 { .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \
@@ -4391,8 +4289,7 @@ static struct snd_kcontrol_new snd_hdspm_controls_madi[] = {
 	HDSPM_TX_64("TX 64 channels mode", 0),
 	HDSPM_C_TMS("Clear Track Marker", 0),
 	HDSPM_SAFE_MODE("Safe Mode", 0),
-	HDSPM_INPUT_SELECT("Input Select", 0),
-	HDSPM_MADI_SPEEDMODE("MADI Speed Mode", 0)
+	HDSPM_INPUT_SELECT("Input Select", 0)
 };
 
 
@@ -4405,8 +4302,7 @@ static struct snd_kcontrol_new snd_hdspm_controls_madiface[] = {
 	HDSPM_SYNC_CHECK("MADI SyncCheck", 0),
 	HDSPM_TX_64("TX 64 channels mode", 0),
 	HDSPM_C_TMS("Clear Track Marker", 0),
-	HDSPM_SAFE_MODE("Safe Mode", 0),
-	HDSPM_MADI_SPEEDMODE("MADI Speed Mode", 0)
+	HDSPM_SAFE_MODE("Safe Mode", 0)
 };
 
 static struct snd_kcontrol_new snd_hdspm_controls_aio[] = {
@@ -6485,7 +6381,6 @@ static int __devinit snd_hdspm_create(struct snd_card *card,
 	switch (hdspm->firmware_rev) {
 	case HDSPM_MADI_REV:
 	case HDSPM_MADI_OLD_REV:
-	case HDSPM_MADI_ANCIENT_REV:
 		hdspm->io_type = MADI;
 		hdspm->card_name = "RME MADI";
 		hdspm->midiPorts = 3;
diff --git a/trunk/sound/soc/txx9/txx9aclc.c b/trunk/sound/soc/txx9/txx9aclc.c
index 3de99af8cb82..34aa972669ed 100644
--- a/trunk/sound/soc/txx9/txx9aclc.c
+++ b/trunk/sound/soc/txx9/txx9aclc.c
@@ -290,7 +290,6 @@ static void txx9aclc_pcm_free_dma_buffers(struct snd_pcm *pcm)
 
 static int txx9aclc_pcm_new(struct snd_soc_pcm_runtime *rtd)
 {
-	struct snd_card *card = rtd->card->snd_card;
 	struct snd_soc_dai *dai = rtd->cpu_dai;
 	struct snd_pcm *pcm = rtd->pcm;
 	struct platform_device *pdev = to_platform_device(dai->platform->dev);
diff --git a/trunk/tools/power/x86/turbostat/turbostat.c b/trunk/tools/power/x86/turbostat/turbostat.c
index 8b2d37b59c9e..6d8ef4a3a9b5 100644
--- a/trunk/tools/power/x86/turbostat/turbostat.c
+++ b/trunk/tools/power/x86/turbostat/turbostat.c
@@ -128,34 +128,34 @@ unsigned long long get_msr(int cpu, off_t offset)
 void print_header(void)
 {
 	if (show_pkg)
-		fprintf(stderr, "pk");
+		fprintf(stderr, "pkg ");
 	if (show_core)
-		fprintf(stderr, " cr");
+		fprintf(stderr, "core");
 	if (show_cpu)
 		fprintf(stderr, " CPU");
 	if (do_nhm_cstates)
-		fprintf(stderr, "    %%c0 ");
+		fprintf(stderr, "   %%c0 ");
 	if (has_aperf)
-		fprintf(stderr, " GHz");
+		fprintf(stderr, "  GHz");
 	fprintf(stderr, "  TSC");
 	if (do_nhm_cstates)
-		fprintf(stderr, "    %%c1");
+		fprintf(stderr, "   %%c1 ");
 	if (do_nhm_cstates)
-		fprintf(stderr, "    %%c3");
+		fprintf(stderr, "   %%c3 ");
 	if (do_nhm_cstates)
-		fprintf(stderr, "    %%c6");
+		fprintf(stderr, "   %%c6 ");
 	if (do_snb_cstates)
-		fprintf(stderr, "    %%c7");
+		fprintf(stderr, "   %%c7 ");
 	if (do_snb_cstates)
-		fprintf(stderr, "  %%pc2");
+		fprintf(stderr, "  %%pc2 ");
 	if (do_nhm_cstates)
-		fprintf(stderr, "  %%pc3");
+		fprintf(stderr, "  %%pc3 ");
 	if (do_nhm_cstates)
-		fprintf(stderr, "  %%pc6");
+		fprintf(stderr, "  %%pc6 ");
 	if (do_snb_cstates)
-		fprintf(stderr, "  %%pc7");
+		fprintf(stderr, "  %%pc7 ");
 	if (extra_msr_offset)
-		fprintf(stderr, "        MSR 0x%x ", extra_msr_offset);
+		fprintf(stderr, "       MSR 0x%x ", extra_msr_offset);
 
 	putc('\n', stderr);
 }
@@ -194,14 +194,14 @@ void print_cnt(struct counters *p)
 	/* topology columns, print blanks on 1st (average) line */
 	if (p == cnt_average) {
 		if (show_pkg)
-			fprintf(stderr, " ");
+			fprintf(stderr, "    ");
 		if (show_core)
 			fprintf(stderr, "    ");
 		if (show_cpu)
 			fprintf(stderr, "    ");
 	} else {
 		if (show_pkg)
-			fprintf(stderr, "%d", p->pkg);
+			fprintf(stderr, "%4d", p->pkg);
 		if (show_core)
 			fprintf(stderr, "%4d", p->core);
 		if (show_cpu)
@@ -241,22 +241,22 @@ void print_cnt(struct counters *p)
 		if (!skip_c1)
 			fprintf(stderr, "%7.2f", 100.0 * p->c1/p->tsc);
 		else
-			fprintf(stderr, "  ****");
+			fprintf(stderr, "   ****");
 	}
 	if (do_nhm_cstates)
-		fprintf(stderr, " %6.2f", 100.0 * p->c3/p->tsc);
+		fprintf(stderr, "%7.2f", 100.0 * p->c3/p->tsc);
 	if (do_nhm_cstates)
-		fprintf(stderr, " %6.2f", 100.0 * p->c6/p->tsc);
+		fprintf(stderr, "%7.2f", 100.0 * p->c6/p->tsc);
 	if (do_snb_cstates)
-		fprintf(stderr, " %6.2f", 100.0 * p->c7/p->tsc);
+		fprintf(stderr, "%7.2f", 100.0 * p->c7/p->tsc);
 	if (do_snb_cstates)
-		fprintf(stderr, " %5.2f", 100.0 * p->pc2/p->tsc);
+		fprintf(stderr, "%7.2f", 100.0 * p->pc2/p->tsc);
 	if (do_nhm_cstates)
-		fprintf(stderr, " %5.2f", 100.0 * p->pc3/p->tsc);
+		fprintf(stderr, "%7.2f", 100.0 * p->pc3/p->tsc);
 	if (do_nhm_cstates)
-		fprintf(stderr, " %5.2f", 100.0 * p->pc6/p->tsc);
+		fprintf(stderr, "%7.2f", 100.0 * p->pc6/p->tsc);
 	if (do_snb_cstates)
-		fprintf(stderr, " %5.2f", 100.0 * p->pc7/p->tsc);
+		fprintf(stderr, "%7.2f", 100.0 * p->pc7/p->tsc);
 	if (extra_msr_offset)
 		fprintf(stderr, "  0x%016llx", p->extra_msr);
 	putc('\n', stderr);
diff --git a/trunk/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/trunk/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
index 33c5c7ee148f..2618ef2ba31f 100644
--- a/trunk/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
+++ b/trunk/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
@@ -137,6 +137,7 @@ void cmdline(int argc, char **argv)
 void validate_cpuid(void)
 {
 	unsigned int eax, ebx, ecx, edx, max_level;
+	char brand[16];
 	unsigned int fms, family, model, stepping;
 
 	eax = ebx = ecx = edx = 0;
@@ -159,8 +160,8 @@ void validate_cpuid(void)
 		model += ((fms >> 16) & 0xf) << 4;
 
 	if (verbose > 1)
-		printf("CPUID %d levels family:model:stepping "
-			"0x%x:%x:%x (%d:%d:%d)\n", max_level,
+		printf("CPUID %s %d levels family:model:stepping "
+			"0x%x:%x:%x (%d:%d:%d)\n", brand, max_level,
 			family, model, stepping, family, model, stepping);
 
 	if (!(edx & (1 << 5))) {