From 40ce65d90b6394fd4156abc2e9214ffca183fd0c Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Date: Mon, 13 Apr 2009 11:04:19 +0900
Subject: [PATCH] --- yaml --- r: 143252 b: refs/heads/master c:
 17a7b7b39056a82c5012539311850f202e6c3cd4 h: refs/heads/master v: v3

---
 [refs]                                        |    2 +-
 .../Documentation/ABI/testing/debugfs-pktcdvd |    6 +-
 trunk/Documentation/cgroups/memory.txt        |   55 +-
 .../cgroups/resource_counter.txt              |   27 +-
 trunk/Documentation/sysctl/net.txt            |    2 +-
 trunk/Documentation/tomoyo.txt                |   55 +
 trunk/Documentation/vm/00-INDEX               |    2 -
 trunk/Documentation/vm/active_mm.txt          |   83 --
 trunk/Documentation/vm/unevictable-lru.txt    | 1041 ++++++++---------
 trunk/MAINTAINERS                             |   30 +-
 trunk/arch/arm/mach-omap2/usb-musb.c          |    8 +-
 trunk/arch/ia64/kernel/pci-swiotlb.c          |    2 +-
 .../arch/x86/include/asm/required-features.h  |    2 +-
 trunk/arch/x86/include/asm/xen/page.h         |    3 +-
 .../x86/kernel/cpu/cpufreq/acpi-cpufreq.c     |    2 +-
 trunk/arch/x86/xen/enlighten.c                |   89 +-
 trunk/arch/x86/xen/mmu.c                      |  116 +-
 trunk/arch/x86/xen/mmu.h                      |    3 -
 trunk/arch/x86/xen/smp.c                      |    4 +-
 trunk/arch/x86/xen/xen-ops.h                  |    2 +
 trunk/drivers/atm/solos-pci.c                 |    2 +-
 trunk/drivers/block/cciss.c                   |    2 +-
 trunk/drivers/char/agp/intel-agp.c            |    3 -
 trunk/drivers/char/sysrq.c                    |    1 +
 trunk/drivers/edac/edac_core.h                |   12 +-
 trunk/drivers/edac/edac_device.c              |    2 +-
 trunk/drivers/edac/edac_mc.c                  |    2 +-
 trunk/drivers/edac/edac_pci.c                 |    2 +-
 trunk/drivers/hwmon/Kconfig                   |   10 -
 trunk/drivers/hwmon/Makefile                  |    1 -
 trunk/drivers/hwmon/hp_accel.c                |    1 +
 trunk/drivers/hwmon/sht15.c                   |  692 -----------
 trunk/drivers/misc/eeprom/at24.c              |    8 +-
 trunk/drivers/misc/eeprom/at25.c              |    5 +-
 trunk/drivers/misc/sgi-xp/xpc.h               |  254 ++--
 trunk/drivers/misc/sgi-xp/xpc_channel.c       |  138 +--
 trunk/drivers/misc/sgi-xp/xpc_main.c          |  128 +-
 trunk/drivers/misc/sgi-xp/xpc_partition.c     |   20 +-
 trunk/drivers/misc/sgi-xp/xpc_sn2.c           |  164 +--
 trunk/drivers/misc/sgi-xp/xpc_uv.c            |  257 ++--
 trunk/drivers/net/atl1c/atl1c_main.c          |    4 +-
 trunk/drivers/net/benet/be_main.c             |    4 +-
 trunk/drivers/net/jme.c                       |    8 +-
 trunk/drivers/net/wireless/ath9k/pci.c        |    4 +-
 trunk/drivers/net/wireless/p54/p54pci.c       |    4 +-
 trunk/drivers/scsi/3w-9xxx.c                  |    8 +-
 trunk/drivers/scsi/aacraid/aachba.c           |    2 +-
 trunk/drivers/scsi/mpt2sas/mpt2sas_base.c     |   10 +-
 trunk/drivers/spi/spi.c                       |   22 +-
 trunk/drivers/staging/b3dfg/b3dfg.c           |    2 +-
 trunk/drivers/usb/otg/nop-usb-xceiv.c         |    4 +-
 trunk/drivers/video/aty/radeon_base.c         |    4 +-
 trunk/drivers/video/backlight/backlight.c     |    3 +
 trunk/drivers/video/backlight/lcd.c           |    3 +
 trunk/drivers/video/cirrusfb.c                |    4 +-
 trunk/drivers/video/console/fbcon.c           |   55 +-
 trunk/drivers/video/efifb.c                   |    7 +-
 trunk/drivers/video/fbmem.c                   |   19 -
 trunk/drivers/video/intelfb/intelfb.h         |    2 -
 trunk/drivers/video/intelfb/intelfb_i2c.c     |    1 -
 trunk/drivers/video/intelfb/intelfbdrv.c      |    1 -
 trunk/drivers/video/intelfb/intelfbhw.c       |    5 -
 trunk/drivers/video/s3fb.c                    |    6 +-
 trunk/drivers/video/sa1100fb.c                |   15 +-
 trunk/drivers/video/sa1100fb.h                |    7 +-
 trunk/drivers/video/sis/sis_main.c            |    2 +-
 trunk/drivers/video/skeletonfb.c              |    8 +-
 trunk/drivers/video/uvesafb.c                 |   35 +-
 trunk/drivers/video/vfb.c                     |   11 +-
 trunk/drivers/xen/cpu_hotplug.c               |   40 +-
 trunk/drivers/xen/manage.c                    |    5 +-
 trunk/fs/ext2/inode.c                         |   44 +-
 trunk/fs/hfs/inode.c                          |    4 -
 trunk/fs/hfs/mdb.c                            |    1 -
 trunk/fs/jbd/revoke.c                         |   24 +-
 trunk/fs/xfs/linux-2.6/xfs_aops.c             |   38 +-
 trunk/fs/xfs/linux-2.6/xfs_aops.h             |    1 -
 trunk/fs/xfs/linux-2.6/xfs_buf.c              |    9 -
 trunk/fs/xfs/linux-2.6/xfs_fs_subr.c          |   14 +-
 trunk/fs/xfs/linux-2.6/xfs_lrw.c              |   18 +-
 trunk/fs/xfs/linux-2.6/xfs_sync.c             |   78 +-
 trunk/fs/xfs/linux-2.6/xfs_sync.h             |    9 +-
 trunk/fs/xfs/xfs_iget.c                       |   23 +-
 trunk/fs/xfs/xfs_iomap.c                      |   61 +-
 trunk/fs/xfs/xfs_iomap.h                      |    3 +-
 trunk/fs/xfs/xfs_log.c                        |   78 +-
 trunk/fs/xfs/xfs_mount.h                      |    2 +-
 trunk/fs/xfs/xfs_vnodeops.c                   |    7 -
 trunk/include/asm-generic/siginfo.h           |    2 +-
 trunk/include/drm/drm_pciids.h                |    2 -
 trunk/include/linux/fb.h                      |    8 +-
 trunk/include/linux/fiemap.h                  |    2 -
 trunk/include/linux/init_task.h               |   13 +
 trunk/include/linux/pci_ids.h                 |    2 -
 trunk/include/linux/sht15.h                   |   24 -
 trunk/include/video/cyblafb.h                 |  175 +++
 trunk/init/initramfs.c                        |    5 +-
 trunk/ipc/mq_sysctl.c                         |    2 +-
 trunk/kernel/ptrace.c                         |    7 +-
 trunk/kernel/sys.c                            |   24 +-
 trunk/kernel/sysctl.c                         |   20 +-
 trunk/mm/Kconfig                              |    2 -
 trunk/mm/filemap.c                            |    4 +-
 trunk/mm/memcontrol.c                         |    2 +-
 trunk/mm/shmem.c                              |   27 +-
 trunk/mm/util.c                               |   16 -
 trunk/sound/pci/hda/hda_intel.c               |    8 +-
 107 files changed, 1702 insertions(+), 2600 deletions(-)
 create mode 100644 trunk/Documentation/tomoyo.txt
 delete mode 100644 trunk/Documentation/vm/active_mm.txt
 delete mode 100644 trunk/drivers/hwmon/sht15.c
 delete mode 100644 trunk/include/linux/sht15.h
 create mode 100644 trunk/include/video/cyblafb.h

diff --git a/[refs] b/[refs]
index fab9f645f473..8f3d59627216 100644
--- a/[refs]
+++ b/[refs]
@@ -1,2 +1,2 @@
 ---
-refs/heads/master: 1c98aa7424ff163637d8321674ec58dee28152d4
+refs/heads/master: 17a7b7b39056a82c5012539311850f202e6c3cd4
diff --git a/trunk/Documentation/ABI/testing/debugfs-pktcdvd b/trunk/Documentation/ABI/testing/debugfs-pktcdvd
index cf11736acb76..bf9c16b64c34 100644
--- a/trunk/Documentation/ABI/testing/debugfs-pktcdvd
+++ b/trunk/Documentation/ABI/testing/debugfs-pktcdvd
@@ -1,4 +1,4 @@
-What:           /sys/kernel/debug/pktcdvd/pktcdvd[0-7]
+What:           /debug/pktcdvd/pktcdvd[0-7]
 Date:           Oct. 2006
 KernelVersion:  2.6.20
 Contact:        Thomas Maier <balagi@justmail.de>
@@ -10,10 +10,10 @@ debugfs interface
 The pktcdvd module (packet writing driver) creates
 these files in debugfs:
 
-/sys/kernel/debug/pktcdvd/pktcdvd[0-7]/
+/debug/pktcdvd/pktcdvd[0-7]/
     info            (0444) Lots of driver statistics and infos.
 
 Example:
 -------
 
-cat /sys/kernel/debug/pktcdvd/pktcdvd0/info
+cat /debug/pktcdvd/pktcdvd0/info
diff --git a/trunk/Documentation/cgroups/memory.txt b/trunk/Documentation/cgroups/memory.txt
index 1a608877b14e..a98a7fe7aabb 100644
--- a/trunk/Documentation/cgroups/memory.txt
+++ b/trunk/Documentation/cgroups/memory.txt
@@ -6,14 +6,15 @@ used here with the memory controller that is used in hardware.
 
 Salient features
 
-a. Enable control of Anonymous, Page Cache (mapped and unmapped) and
-   Swap Cache memory pages.
+a. Enable control of both RSS (mapped) and Page Cache (unmapped) pages
 b. The infrastructure allows easy addition of other types of memory to control
 c. Provides *zero overhead* for non memory controller users
 d. Provides a double LRU: global memory pressure causes reclaim from the
    global LRU; a cgroup on hitting a limit, reclaims from the per
    cgroup LRU
 
+NOTE: Swap Cache (unmapped) is not accounted now.
+
 Benefits and Purpose of the memory controller
 
 The memory controller isolates the memory behaviour of a group of tasks
@@ -289,44 +290,34 @@ will be charged as a new owner of it.
   moved to the parent. If you want to avoid that, force_empty will be useful.
 
 5.2 stat file
-
-memory.stat file includes following statistics
-
-cache		- # of bytes of page cache memory.
-rss		- # of bytes of anonymous and swap cache memory.
-pgpgin		- # of pages paged in (equivalent to # of charging events).
-pgpgout		- # of pages paged out (equivalent to # of uncharging events).
-active_anon	- # of bytes of anonymous and  swap cache memory on active
-		  lru list.
-inactive_anon	- # of bytes of anonymous memory and swap cache memory on
-		  inactive lru list.
-active_file	- # of bytes of file-backed memory on active lru list.
-inactive_file	- # of bytes of file-backed memory on inactive lru list.
-unevictable	- # of bytes of memory that cannot be reclaimed (mlocked etc).
-
-The following additional stats are dependent on CONFIG_DEBUG_VM.
-
-inactive_ratio		- VM internal parameter. (see mm/page_alloc.c)
-recent_rotated_anon	- VM internal parameter. (see mm/vmscan.c)
-recent_rotated_file	- VM internal parameter. (see mm/vmscan.c)
-recent_scanned_anon	- VM internal parameter. (see mm/vmscan.c)
-recent_scanned_file	- VM internal parameter. (see mm/vmscan.c)
-
-Memo:
+  memory.stat file includes following statistics (now)
+	cache			- # of pages from page-cache and shmem.
+	rss			- # of pages from anonymous memory.
+	pgpgin			- # of event of charging
+	pgpgout			- # of event of uncharging
+	active_anon		- # of pages on active lru of anon, shmem.
+	inactive_anon 		- # of pages on active lru of anon, shmem
+	active_file		- # of pages on active lru of file-cache
+	inactive_file		- # of pages on inactive lru of file cache
+	unevictable		- # of pages cannot be reclaimed.(mlocked etc)
+
+	Below is depend on CONFIG_DEBUG_VM.
+	inactive_ratio		- VM internal parameter. (see mm/page_alloc.c)
+	recent_rotated_anon	- VM internal parameter. (see mm/vmscan.c)
+	recent_rotated_file	- VM internal parameter. (see mm/vmscan.c)
+	recent_scanned_anon 	- VM internal parameter. (see mm/vmscan.c)
+	recent_scanned_file 	- VM internal parameter. (see mm/vmscan.c)
+
+  Memo:
 	recent_rotated means recent frequency of lru rotation.
 	recent_scanned means recent # of scans to lru.
 	showing for better debug please see the code for meanings.
 
-Note:
-	Only anonymous and swap cache memory is listed as part of 'rss' stat.
-	This should not be confused with the true 'resident set size' or the
-	amount of physical memory used by the cgroup. Per-cgroup rss
-	accounting is not done yet.
 
 5.3 swappiness
   Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only.
 
-  Following cgroups' swapiness can't be changed.
+  Following cgroup's swapiness can't be changed.
   - root cgroup (uses /proc/sys/vm/swappiness).
   - a cgroup which uses hierarchy and it has child cgroup.
   - a cgroup which uses hierarchy and not the root of hierarchy.
diff --git a/trunk/Documentation/cgroups/resource_counter.txt b/trunk/Documentation/cgroups/resource_counter.txt
index 95b24d766eab..f196ac1d7d25 100644
--- a/trunk/Documentation/cgroups/resource_counter.txt
+++ b/trunk/Documentation/cgroups/resource_counter.txt
@@ -47,18 +47,13 @@ to work with it.
 
 2. Basic accounting routines
 
- a. void res_counter_init(struct res_counter *rc,
-				struct res_counter *rc_parent)
+ a. void res_counter_init(struct res_counter *rc)
 
  	Initializes the resource counter. As usual, should be the first
 	routine called for a new counter.
 
-	The struct res_counter *parent can be used to define a hierarchical
-	child -> parent relationship directly in the res_counter structure,
-	NULL can be used to define no relationship.
-
- c. int res_counter_charge(struct res_counter *rc, unsigned long val,
-				struct res_counter **limit_fail_at)
+ b. int res_counter_charge[_locked]
+			(struct res_counter *rc, unsigned long val)
 
 	When a resource is about to be allocated it has to be accounted
 	with the appropriate resource counter (controller should determine
@@ -72,25 +67,15 @@ to work with it.
 	  * if the charging is performed first, then it should be uncharged
 	    on error path (if the one is called).
 
-	If the charging fails and a hierarchical dependency exists, the
-	limit_fail_at parameter is set to the particular res_counter element
-	where the charging failed.
-
- d. int res_counter_charge_locked
-			(struct res_counter *rc, unsigned long val)
-
-	The same as res_counter_charge(), but it must not acquire/release the
-	res_counter->lock internally (it must be called with res_counter->lock
-	held).
-
- e. void res_counter_uncharge[_locked]
+ c. void res_counter_uncharge[_locked]
 			(struct res_counter *rc, unsigned long val)
 
 	When a resource is released (freed) it should be de-accounted
 	from the resource counter it was accounted to.  This is called
 	"uncharging".
 
-	The _locked routines imply that the res_counter->lock is taken.
+    The _locked routines imply that the res_counter->lock is taken.
+
 
  2.1 Other accounting routines
 
diff --git a/trunk/Documentation/sysctl/net.txt b/trunk/Documentation/sysctl/net.txt
index df38ef046f8d..a34d55b65441 100644
--- a/trunk/Documentation/sysctl/net.txt
+++ b/trunk/Documentation/sysctl/net.txt
@@ -95,7 +95,7 @@ of struct cmsghdr structures with appended data.
 
 There is only one file in this directory.
 unix_dgram_qlen limits the max number of datagrams queued in Unix domain
-socket's buffer. It will not take effect unless PF_UNIX flag is specified.
+socket's buffer. It will not take effect unless PF_UNIX flag is spicified.
 
 
 3. /proc/sys/net/ipv4 - IPV4 settings
diff --git a/trunk/Documentation/tomoyo.txt b/trunk/Documentation/tomoyo.txt
new file mode 100644
index 000000000000..b3a232cae7f8
--- /dev/null
+++ b/trunk/Documentation/tomoyo.txt
@@ -0,0 +1,55 @@
+--- What is TOMOYO? ---
+
+TOMOYO is a name-based MAC extension (LSM module) for the Linux kernel.
+
+LiveCD-based tutorials are available at
+http://tomoyo.sourceforge.jp/en/1.6.x/1st-step/ubuntu8.04-live/
+http://tomoyo.sourceforge.jp/en/1.6.x/1st-step/centos5-live/ .
+Though these tutorials use non-LSM version of TOMOYO, they are useful for you
+to know what TOMOYO is.
+
+--- How to enable TOMOYO? ---
+
+Build the kernel with CONFIG_SECURITY_TOMOYO=y and pass "security=tomoyo" on
+kernel's command line.
+
+Please see http://tomoyo.sourceforge.jp/en/2.2.x/ for details.
+
+--- Where is documentation? ---
+
+User <-> Kernel interface documentation is available at
+http://tomoyo.sourceforge.jp/en/2.2.x/policy-reference.html .
+
+Materials we prepared for seminars and symposiums are available at
+http://sourceforge.jp/projects/tomoyo/docs/?category_id=532&language_id=1 .
+Below lists are chosen from three aspects.
+
+What is TOMOYO?
+  TOMOYO Linux Overview
+    http://sourceforge.jp/projects/tomoyo/docs/lca2009-takeda.pdf
+  TOMOYO Linux: pragmatic and manageable security for Linux
+    http://sourceforge.jp/projects/tomoyo/docs/freedomhectaipei-tomoyo.pdf
+  TOMOYO Linux: A Practical Method to Understand and Protect Your Own Linux Box
+    http://sourceforge.jp/projects/tomoyo/docs/PacSec2007-en-no-demo.pdf
+
+What can TOMOYO do?
+  Deep inside TOMOYO Linux
+    http://sourceforge.jp/projects/tomoyo/docs/lca2009-kumaneko.pdf
+  The role of "pathname based access control" in security.
+    http://sourceforge.jp/projects/tomoyo/docs/lfj2008-bof.pdf
+
+History of TOMOYO?
+  Realities of Mainlining
+    http://sourceforge.jp/projects/tomoyo/docs/lfj2008.pdf
+
+--- What is future plan? ---
+
+We believe that inode based security and name based security are complementary
+and both should be used together. But unfortunately, so far, we cannot enable
+multiple LSM modules at the same time. We feel sorry that you have to give up
+SELinux/SMACK/AppArmor etc. when you want to use TOMOYO.
+
+We hope that LSM becomes stackable in future. Meanwhile, you can use non-LSM
+version of TOMOYO, available at http://tomoyo.sourceforge.jp/en/1.6.x/ .
+LSM version of TOMOYO is a subset of non-LSM version of TOMOYO. We are planning
+to port non-LSM version's functionalities to LSM versions.
diff --git a/trunk/Documentation/vm/00-INDEX b/trunk/Documentation/vm/00-INDEX
index 2f77ced35df7..2131b00b63f6 100644
--- a/trunk/Documentation/vm/00-INDEX
+++ b/trunk/Documentation/vm/00-INDEX
@@ -1,7 +1,5 @@
 00-INDEX
 	- this file.
-active_mm.txt
-	- An explanation from Linus about tsk->active_mm vs tsk->mm.
 balance
 	- various information on memory balancing.
 hugetlbpage.txt
diff --git a/trunk/Documentation/vm/active_mm.txt b/trunk/Documentation/vm/active_mm.txt
deleted file mode 100644
index 4ee1f643d897..000000000000
--- a/trunk/Documentation/vm/active_mm.txt
+++ /dev/null
@@ -1,83 +0,0 @@
-List:       linux-kernel
-Subject:    Re: active_mm
-From:       Linus Torvalds <torvalds () transmeta ! com>
-Date:       1999-07-30 21:36:24
-
-Cc'd to linux-kernel, because I don't write explanations all that often,
-and when I do I feel better about more people reading them.
-
-On Fri, 30 Jul 1999, David Mosberger wrote:
->
-> Is there a brief description someplace on how "mm" vs. "active_mm" in
-> the task_struct are supposed to be used?  (My apologies if this was
-> discussed on the mailing lists---I just returned from vacation and
-> wasn't able to follow linux-kernel for a while).
-
-Basically, the new setup is:
-
- - we have "real address spaces" and "anonymous address spaces". The
-   difference is that an anonymous address space doesn't care about the
-   user-level page tables at all, so when we do a context switch into an
-   anonymous address space we just leave the previous address space
-   active.
-
-   The obvious use for a "anonymous address space" is any thread that
-   doesn't need any user mappings - all kernel threads basically fall into
-   this category, but even "real" threads can temporarily say that for
-   some amount of time they are not going to be interested in user space,
-   and that the scheduler might as well try to avoid wasting time on
-   switching the VM state around. Currently only the old-style bdflush
-   sync does that.
-
- - "tsk->mm" points to the "real address space". For an anonymous process,
-   tsk->mm will be NULL, for the logical reason that an anonymous process
-   really doesn't _have_ a real address space at all.
-
- - however, we obviously need to keep track of which address space we
-   "stole" for such an anonymous user. For that, we have "tsk->active_mm",
-   which shows what the currently active address space is.
-
-   The rule is that for a process with a real address space (ie tsk->mm is
-   non-NULL) the active_mm obviously always has to be the same as the real
-   one.
-
-   For a anonymous process, tsk->mm == NULL, and tsk->active_mm is the
-   "borrowed" mm while the anonymous process is running. When the
-   anonymous process gets scheduled away, the borrowed address space is
-   returned and cleared.
-
-To support all that, the "struct mm_struct" now has two counters: a
-"mm_users" counter that is how many "real address space users" there are,
-and a "mm_count" counter that is the number of "lazy" users (ie anonymous
-users) plus one if there are any real users.
-
-Usually there is at least one real user, but it could be that the real
-user exited on another CPU while a lazy user was still active, so you do
-actually get cases where you have a address space that is _only_ used by
-lazy users. That is often a short-lived state, because once that thread
-gets scheduled away in favour of a real thread, the "zombie" mm gets
-released because "mm_users" becomes zero.
-
-Also, a new rule is that _nobody_ ever has "init_mm" as a real MM any
-more. "init_mm" should be considered just a "lazy context when no other
-context is available", and in fact it is mainly used just at bootup when
-no real VM has yet been created. So code that used to check
-
-	if (current->mm == &init_mm)
-
-should generally just do
-
-	if (!current->mm)
-
-instead (which makes more sense anyway - the test is basically one of "do
-we have a user context", and is generally done by the page fault handler
-and things like that).
-
-Anyway, I put a pre-patch-2.3.13-1 on ftp.kernel.org just a moment ago,
-because it slightly changes the interfaces to accomodate the alpha (who
-would have thought it, but the alpha actually ends up having one of the
-ugliest context switch codes - unlike the other architectures where the MM
-and register state is separate, the alpha PALcode joins the two, and you
-need to switch both together).
-
-(From http://marc.info/?l=linux-kernel&m=93337278602211&w=2)
diff --git a/trunk/Documentation/vm/unevictable-lru.txt b/trunk/Documentation/vm/unevictable-lru.txt
index 2d70d0d95108..0706a7282a8c 100644
--- a/trunk/Documentation/vm/unevictable-lru.txt
+++ b/trunk/Documentation/vm/unevictable-lru.txt
@@ -1,691 +1,588 @@
-			==============================
-			UNEVICTABLE LRU INFRASTRUCTURE
-			==============================
-
-========
-CONTENTS
-========
-
- (*) The Unevictable LRU
-
-     - The unevictable page list.
-     - Memory control group interaction.
-     - Marking address spaces unevictable.
-     - Detecting Unevictable Pages.
-     - vmscan's handling of unevictable pages.
-
- (*) mlock()'d pages.
-
-     - History.
-     - Basic management.
-     - mlock()/mlockall() system call handling.
-     - Filtering special vmas.
-     - munlock()/munlockall() system call handling.
-     - Migrating mlocked pages.
-     - mmap(MAP_LOCKED) system call handling.
-     - munmap()/exit()/exec() system call handling.
-     - try_to_unmap().
-     - try_to_munlock() reverse map scan.
-     - Page reclaim in shrink_*_list().
-
-
-============
-INTRODUCTION
-============
-
-This document describes the Linux memory manager's "Unevictable LRU"
-infrastructure and the use of this to manage several types of "unevictable"
-pages.
-
-The document attempts to provide the overall rationale behind this mechanism
-and the rationale for some of the design decisions that drove the
-implementation.  The latter design rationale is discussed in the context of an
-implementation description.  Admittedly, one can obtain the implementation
-details - the "what does it do?" - by reading the code.  One hopes that the
-descriptions below add value by provide the answer to "why does it do that?".
-
-
-===================
-THE UNEVICTABLE LRU
-===================
-
-The Unevictable LRU facility adds an additional LRU list to track unevictable
-pages and to hide these pages from vmscan.  This mechanism is based on a patch
-by Larry Woodman of Red Hat to address several scalability problems with page
-reclaim in Linux.  The problems have been observed at customer sites on large
-memory x86_64 systems.
-
-To illustrate this with an example, a non-NUMA x86_64 platform with 128GB of
-main memory will have over 32 million 4k pages in a single zone.  When a large
-fraction of these pages are not evictable for any reason [see below], vmscan
-will spend a lot of time scanning the LRU lists looking for the small fraction
-of pages that are evictable.  This can result in a situation where all CPUs are
-spending 100% of their time in vmscan for hours or days on end, with the system
-completely unresponsive.
-
-The unevictable list addresses the following classes of unevictable pages:
-
- (*) Those owned by ramfs.
-
- (*) Those mapped into SHM_LOCK'd shared memory regions.
 
- (*) Those mapped into VM_LOCKED [mlock()ed] VMAs.
-
-The infrastructure may also be able to handle other conditions that make pages
+This document describes the Linux memory management "Unevictable LRU"
+infrastructure and the use of this infrastructure to manage several types
+of "unevictable" pages.  The document attempts to provide the overall
+rationale behind this mechanism and the rationale for some of the design
+decisions that drove the implementation.  The latter design rationale is
+discussed in the context of an implementation description.  Admittedly, one
+can obtain the implementation details--the "what does it do?"--by reading the
+code.  One hopes that the descriptions below add value by provide the answer
+to "why does it do that?".
+
+Unevictable LRU Infrastructure:
+
+The Unevictable LRU adds an additional LRU list to track unevictable pages
+and to hide these pages from vmscan.  This mechanism is based on a patch by
+Larry Woodman of Red Hat to address several scalability problems with page
+reclaim in Linux.  The problems have been observed at customer sites on large
+memory x86_64 systems.  For example, a non-numal x86_64 platform with 128GB
+of main memory will have over 32 million 4k pages in a single zone.  When a
+large fraction of these pages are not evictable for any reason [see below],
+vmscan will spend a lot of time scanning the LRU lists looking for the small
+fraction of pages that are evictable.  This can result in a situation where
+all cpus are spending 100% of their time in vmscan for hours or days on end,
+with the system completely unresponsive.
+
+The Unevictable LRU infrastructure addresses the following classes of
+unevictable pages:
+
++ page owned by ramfs
++ page mapped into SHM_LOCKed shared memory regions
++ page mapped into VM_LOCKED [mlock()ed] vmas
+
+The infrastructure might be able to handle other conditions that make pages
 unevictable, either by definition or by circumstance, in the future.
 
 
-THE UNEVICTABLE PAGE LIST
--------------------------
+The Unevictable LRU List
 
 The Unevictable LRU infrastructure consists of an additional, per-zone, LRU list
 called the "unevictable" list and an associated page flag, PG_unevictable, to
-indicate that the page is being managed on the unevictable list.
-
-The PG_unevictable flag is analogous to, and mutually exclusive with, the
-PG_active flag in that it indicates on which LRU list a page resides when
-PG_lru is set.  The unevictable list is compile-time configurable based on the
-UNEVICTABLE_LRU Kconfig option.
+indicate that the page is being managed on the unevictable list.  The
+PG_unevictable flag is analogous to, and mutually exclusive with, the PG_active
+flag in that it indicates on which LRU list a page resides when PG_lru is set.
+The unevictable LRU list is source configurable based on the UNEVICTABLE_LRU
+Kconfig option.
 
 The Unevictable LRU infrastructure maintains unevictable pages on an additional
 LRU list for a few reasons:
 
- (1) We get to "treat unevictable pages just like we treat other pages in the
-     system - which means we get to use the same code to manipulate them, the
-     same code to isolate them (for migrate, etc.), the same code to keep track
-     of the statistics, etc..." [Rik van Riel]
-
- (2) We want to be able to migrate unevictable pages between nodes for memory
-     defragmentation, workload management and memory hotplug.  The linux kernel
-     can only migrate pages that it can successfully isolate from the LRU
-     lists.  If we were to maintain pages elsewhere than on an LRU-like list,
-     where they can be found by isolate_lru_page(), we would prevent their
-     migration, unless we reworked migration code to find the unevictable pages
-     itself.
+1) We get to "treat unevictable pages just like we treat other pages in the
+   system, which means we get to use the same code to manipulate them, the
+   same code to isolate them (for migrate, etc.), the same code to keep track
+   of the statistics, etc..." [Rik van Riel]
 
+2) We want to be able to migrate unevictable pages between nodes--for memory
+   defragmentation, workload management and memory hotplug.  The linux kernel
+   can only migrate pages that it can successfully isolate from the lru lists.
+   If we were to maintain pages elsewise than on an lru-like list, where they
+   can be found by isolate_lru_page(), we would prevent their migration, unless
+   we reworked migration code to find the unevictable pages.
 
-The unevictable list does not differentiate between file-backed and anonymous,
-swap-backed pages.  This differentiation is only important while the pages are,
-in fact, evictable.
 
-The unevictable list benefits from the "arrayification" of the per-zone LRU
-lists and statistics originally proposed and posted by Christoph Lameter.
+The unevictable LRU list does not differentiate between file backed and swap
+backed [anon] pages.  This differentiation is only important while the pages
+are, in fact, evictable.
 
-The unevictable list does not use the LRU pagevec mechanism. Rather,
-unevictable pages are placed directly on the page's zone's unevictable list
-under the zone lru_lock.  This allows us to prevent the stranding of pages on
-the unevictable list when one task has the page isolated from the LRU and other
-tasks are changing the "evictability" state of the page.
+The unevictable LRU list benefits from the "arrayification" of the per-zone
+LRU lists and statistics originally proposed and posted by Christoph Lameter.
 
+The unevictable list does not use the lru pagevec mechanism. Rather,
+unevictable pages are placed directly on the page's zone's unevictable
+list under the zone lru_lock.  The reason for this is to prevent stranding
+of pages on the unevictable list when one task has the page isolated from the
+lru and other tasks are changing the "evictability" state of the page.
 
-MEMORY CONTROL GROUP INTERACTION
---------------------------------
 
-The unevictable LRU facility interacts with the memory control group [aka
-memory controller; see Documentation/cgroups/memory.txt] by extending the
-lru_list enum.
-
-The memory controller data structure automatically gets a per-zone unevictable
-list as a result of the "arrayification" of the per-zone LRU lists (one per
-lru_list enum element).  The memory controller tracks the movement of pages to
-and from the unevictable list.
+Unevictable LRU and Memory Controller Interaction
 
+The memory controller data structure automatically gets a per zone unevictable
+lru list as a result of the "arrayification" of the per-zone LRU lists.  The
+memory controller tracks the movement of pages to and from the unevictable list.
 When a memory control group comes under memory pressure, the controller will
 not attempt to reclaim pages on the unevictable list.  This has a couple of
-effects:
-
- (1) Because the pages are "hidden" from reclaim on the unevictable list, the
-     reclaim process can be more efficient, dealing only with pages that have a
-     chance of being reclaimed.
-
- (2) On the other hand, if too many of the pages charged to the control group
-     are unevictable, the evictable portion of the working set of the tasks in
-     the control group may not fit into the available memory.  This can cause
-     the control group to thrash or to OOM-kill tasks.
-
-
-MARKING ADDRESS SPACES UNEVICTABLE
-----------------------------------
-
-For facilities such as ramfs none of the pages attached to the address space
-may be evicted.  To prevent eviction of any such pages, the AS_UNEVICTABLE
-address space flag is provided, and this can be manipulated by a filesystem
-using a number of wrapper functions:
-
- (*) void mapping_set_unevictable(struct address_space *mapping);
-
-	Mark the address space as being completely unevictable.
-
- (*) void mapping_clear_unevictable(struct address_space *mapping);
-
-	Mark the address space as being evictable.
-
- (*) int mapping_unevictable(struct address_space *mapping);
-
-	Query the address space, and return true if it is completely
-	unevictable.
-
-These are currently used in two places in the kernel:
-
- (1) By ramfs to mark the address spaces of its inodes when they are created,
-     and this mark remains for the life of the inode.
-
- (2) By SYSV SHM to mark SHM_LOCK'd address spaces until SHM_UNLOCK is called.
-
-     Note that SHM_LOCK is not required to page in the locked pages if they're
-     swapped out; the application must touch the pages manually if it wants to
-     ensure they're in memory.
-
-
-DETECTING UNEVICTABLE PAGES
----------------------------
-
-The function page_evictable() in vmscan.c determines whether a page is
-evictable or not using the query function outlined above [see section "Marking
-address spaces unevictable"] to check the AS_UNEVICTABLE flag.
-
-For address spaces that are so marked after being populated (as SHM regions
-might be), the lock action (eg: SHM_LOCK) can be lazy, and need not populate
-the page tables for the region as does, for example, mlock(), nor need it make
-any special effort to push any pages in the SHM_LOCK'd area to the unevictable
-list.  Instead, vmscan will do this if and when it encounters the pages during
-a reclamation scan.
-
-On an unlock action (such as SHM_UNLOCK), the unlocker (eg: shmctl()) must scan
-the pages in the region and "rescue" them from the unevictable list if no other
-condition is keeping them unevictable.  If an unevictable region is destroyed,
-the pages are also "rescued" from the unevictable list in the process of
-freeing them.
-
-page_evictable() also checks for mlocked pages by testing an additional page
-flag, PG_mlocked (as wrapped by PageMlocked()).  If the page is NOT mlocked,
-and a non-NULL VMA is supplied, page_evictable() will check whether the VMA is
+effects.  Because the pages are "hidden" from reclaim on the unevictable list,
+the reclaim process can be more efficient, dealing only with pages that have
+a chance of being reclaimed.  On the other hand, if too many of the pages
+charged to the control group are unevictable, the evictable portion of the
+working set of the tasks in the control group may not fit into the available
+memory.  This can cause the control group to thrash or to oom-kill tasks.
+
+
+Unevictable LRU:  Detecting Unevictable Pages
+
+The function page_evictable(page, vma) in vmscan.c determines whether a
+page is evictable or not.  For ramfs pages and pages in SHM_LOCKed regions,
+page_evictable() tests a new address space flag, AS_UNEVICTABLE, in the page's
+address space using a wrapper function.  Wrapper functions are used to set,
+clear and test the flag to reduce the requirement for #ifdef's throughout the
+source code.  AS_UNEVICTABLE is set on ramfs inode/mapping when it is created.
+This flag remains for the life of the inode.
+
+For shared memory regions, AS_UNEVICTABLE is set when an application
+successfully SHM_LOCKs the region and is removed when the region is
+SHM_UNLOCKed.  Note that shmctl(SHM_LOCK, ...) does not populate the page
+tables for the region as does, for example, mlock().   So, we make no special
+effort to push any pages in the SHM_LOCKed region to the unevictable list.
+Vmscan will do this when/if it encounters the pages during reclaim.  On
+SHM_UNLOCK, shmctl() scans the pages in the region and "rescues" them from the
+unevictable list if no other condition keeps them unevictable.  If a SHM_LOCKed
+region is destroyed, the pages are also "rescued" from the unevictable list in
+the process of freeing them.
+
+page_evictable() detects mlock()ed pages by testing an additional page flag,
+PG_mlocked via the PageMlocked() wrapper.  If the page is NOT mlocked, and a
+non-NULL vma is supplied, page_evictable() will check whether the vma is
 VM_LOCKED via is_mlocked_vma().  is_mlocked_vma() will SetPageMlocked() and
 update the appropriate statistics if the vma is VM_LOCKED.  This method allows
 efficient "culling" of pages in the fault path that are being faulted in to
-VM_LOCKED VMAs.
+VM_LOCKED vmas.
 
 
-VMSCAN'S HANDLING OF UNEVICTABLE PAGES
---------------------------------------
+Unevictable Pages and Vmscan [shrink_*_list()]
 
 If unevictable pages are culled in the fault path, or moved to the unevictable
-list at mlock() or mmap() time, vmscan will not encounter the pages until they
-have become evictable again (via munlock() for example) and have been "rescued"
-from the unevictable list.  However, there may be situations where we decide,
-for the sake of expediency, to leave a unevictable page on one of the regular
-active/inactive LRU lists for vmscan to deal with.  vmscan checks for such
-pages in all of the shrink_{active|inactive|page}_list() functions and will
-"cull" such pages that it encounters: that is, it diverts those pages to the
-unevictable list for the zone being scanned.
-
-There may be situations where a page is mapped into a VM_LOCKED VMA, but the
-page is not marked as PG_mlocked.  Such pages will make it all the way to
+list at mlock() or mmap() time, vmscan will never encounter the pages until
+they have become evictable again, for example, via munlock() and have been
+"rescued" from the unevictable list.  However, there may be situations where we
+decide, for the sake of expediency, to leave a unevictable page on one of the
+regular active/inactive LRU lists for vmscan to deal with.  Vmscan checks for
+such pages in all of the shrink_{active|inactive|page}_list() functions and
+will "cull" such pages that it encounters--that is, it diverts those pages to
+the unevictable list for the zone being scanned.
+
+There may be situations where a page is mapped into a VM_LOCKED vma, but the
+page is not marked as PageMlocked.  Such pages will make it all the way to
 shrink_page_list() where they will be detected when vmscan walks the reverse
-map in try_to_unmap().  If try_to_unmap() returns SWAP_MLOCK,
-shrink_page_list() will cull the page at that point.
+map in try_to_unmap().  If try_to_unmap() returns SWAP_MLOCK, shrink_page_list()
+will cull the page at that point.
 
-To "cull" an unevictable page, vmscan simply puts the page back on the LRU list
-using putback_lru_page() - the inverse operation to isolate_lru_page() - after
-dropping the page lock.  Because the condition which makes the page unevictable
-may change once the page is unlocked, putback_lru_page() will recheck the
-unevictable state of a page that it places on the unevictable list.  If the
-page has become unevictable, putback_lru_page() removes it from the list and
-retries, including the page_unevictable() test.  Because such a race is a rare
-event and movement of pages onto the unevictable list should be rare, these
-extra evictabilty checks should not occur in the majority of calls to
-putback_lru_page().
+To "cull" an unevictable page, vmscan simply puts the page back on the lru
+list using putback_lru_page()--the inverse operation to isolate_lru_page()--
+after dropping the page lock.  Because the condition which makes the page
+unevictable may change once the page is unlocked, putback_lru_page() will
+recheck the unevictable state of a page that it places on the unevictable lru
+list.  If the page has become unevictable, putback_lru_page() removes it from
+the list and retries, including the page_unevictable() test.  Because such a
+race is a rare event and movement of pages onto the unevictable list should be
+rare, these extra evictabilty checks should not occur in the majority of calls
+to putback_lru_page().
 
 
-=============
-MLOCKED PAGES
-=============
+Mlocked Page:  Prior Work
 
-The unevictable page list is also useful for mlock(), in addition to ramfs and
-SYSV SHM.  Note that mlock() is only available in CONFIG_MMU=y situations; in
-NOMMU situations, all mappings are effectively mlocked.
-
-
-HISTORY
--------
-
-The "Unevictable mlocked Pages" infrastructure is based on work originally
+The "Unevictable Mlocked Pages" infrastructure is based on work originally
 posted by Nick Piggin in an RFC patch entitled "mm: mlocked pages off LRU".
-Nick posted his patch as an alternative to a patch posted by Christoph Lameter
-to achieve the same objective: hiding mlocked pages from vmscan.
-
-In Nick's patch, he used one of the struct page LRU list link fields as a count
-of VM_LOCKED VMAs that map the page.  This use of the link field for a count
-prevented the management of the pages on an LRU list, and thus mlocked pages
-were not migratable as isolate_lru_page() could not find them, and the LRU list
-link field was not available to the migration subsystem.
-
-Nick resolved this by putting mlocked pages back on the lru list before
-attempting to isolate them, thus abandoning the count of VM_LOCKED VMAs.  When
-Nick's patch was integrated with the Unevictable LRU work, the count was
-replaced by walking the reverse map to determine whether any VM_LOCKED VMAs
-mapped the page.  More on this below.
-
-
-BASIC MANAGEMENT
-----------------
-
-mlocked pages - pages mapped into a VM_LOCKED VMA - are a class of unevictable
-pages.  When such a page has been "noticed" by the memory management subsystem,
-the page is marked with the PG_mlocked flag.  This can be manipulated using the
-PageMlocked() functions.
-
-A PG_mlocked page will be placed on the unevictable list when it is added to
-the LRU.  Such pages can be "noticed" by memory management in several places:
-
- (1) in the mlock()/mlockall() system call handlers;
-
- (2) in the mmap() system call handler when mmapping a region with the
-     MAP_LOCKED flag;
-
- (3) mmapping a region in a task that has called mlockall() with the MCL_FUTURE
-     flag
-
- (4) in the fault path, if mlocked pages are "culled" in the fault path,
-     and when a VM_LOCKED stack segment is expanded; or
-
- (5) as mentioned above, in vmscan:shrink_page_list() when attempting to
-     reclaim a page in a VM_LOCKED VMA via try_to_unmap()
-
-all of which result in the VM_LOCKED flag being set for the VMA if it doesn't
-already have it set.
-
-mlocked pages become unlocked and rescued from the unevictable list when:
-
- (1) mapped in a range unlocked via the munlock()/munlockall() system calls;
-
- (2) munmap()'d out of the last VM_LOCKED VMA that maps the page, including
-     unmapping at task exit;
-
- (3) when the page is truncated from the last VM_LOCKED VMA of an mmapped file;
-     or
-
- (4) before a page is COW'd in a VM_LOCKED VMA.
-
-
-mlock()/mlockall() SYSTEM CALL HANDLING
----------------------------------------
+Nick posted his patch as an alternative to a patch posted by Christoph
+Lameter to achieve the same objective--hiding mlocked pages from vmscan.
+In Nick's patch, he used one of the struct page lru list link fields as a count
+of VM_LOCKED vmas that map the page.  This use of the link field for a count
+prevented the management of the pages on an LRU list.  Thus, mlocked pages were
+not migratable as isolate_lru_page() could not find them and the lru list link
+field was not available to the migration subsystem.  Nick resolved this by
+putting mlocked pages back on the lru list before attempting to isolate them,
+thus abandoning the count of VM_LOCKED vmas.  When Nick's patch was integrated
+with the Unevictable LRU work, the count was replaced by walking the reverse
+map to determine whether any VM_LOCKED vmas mapped the page.  More on this
+below.
+
+
+Mlocked Pages:  Basic Management
+
+Mlocked pages--pages mapped into a VM_LOCKED vma--represent one class of
+unevictable pages.  When such a page has been "noticed" by the memory
+management subsystem, the page is marked with the PG_mlocked [PageMlocked()]
+flag.  A PageMlocked() page will be placed on the unevictable LRU list when
+it is added to the LRU.   Pages can be "noticed" by memory management in
+several places:
+
+1) in the mlock()/mlockall() system call handlers.
+2) in the mmap() system call handler when mmap()ing a region with the
+   MAP_LOCKED flag, or mmap()ing a region in a task that has called
+   mlockall() with the MCL_FUTURE flag.  Both of these conditions result
+   in the VM_LOCKED flag being set for the vma.
+3) in the fault path, if mlocked pages are "culled" in the fault path,
+   and when a VM_LOCKED stack segment is expanded.
+4) as mentioned above, in vmscan:shrink_page_list() when attempting to
+   reclaim a page in a VM_LOCKED vma via try_to_unmap().
+
+Mlocked pages become unlocked and rescued from the unevictable list when:
+
+1) mapped in a range unlocked via the munlock()/munlockall() system calls.
+2) munmapped() out of the last VM_LOCKED vma that maps the page, including
+   unmapping at task exit.
+3) when the page is truncated from the last VM_LOCKED vma of an mmap()ed file.
+4) before a page is COWed in a VM_LOCKED vma.
+
+
+Mlocked Pages:  mlock()/mlockall() System Call Handling
 
 Both [do_]mlock() and [do_]mlockall() system call handlers call mlock_fixup()
-for each VMA in the range specified by the call.  In the case of mlockall(),
+for each vma in the range specified by the call.  In the case of mlockall(),
 this is the entire active address space of the task.  Note that mlock_fixup()
-is used for both mlocking and munlocking a range of memory.  A call to mlock()
-an already VM_LOCKED VMA, or to munlock() a VMA that is not VM_LOCKED is
-treated as a no-op, and mlock_fixup() simply returns.
-
-If the VMA passes some filtering as described in "Filtering Special Vmas"
-below, mlock_fixup() will attempt to merge the VMA with its neighbors or split
-off a subset of the VMA if the range does not cover the entire VMA.  Once the
-VMA has been merged or split or neither, mlock_fixup() will call
-__mlock_vma_pages_range() to fault in the pages via get_user_pages() and to
-mark the pages as mlocked via mlock_vma_page().
-
-Note that the VMA being mlocked might be mapped with PROT_NONE.  In this case,
-get_user_pages() will be unable to fault in the pages.  That's okay.  If pages
-do end up getting faulted into this VM_LOCKED VMA, we'll handle them in the
+is used for both mlock()ing and munlock()ing a range of memory.  A call to
+mlock() an already VM_LOCKED vma, or to munlock() a vma that is not VM_LOCKED
+is treated as a no-op--mlock_fixup() simply returns.
+
+If the vma passes some filtering described in "Mlocked Pages:  Filtering Vmas"
+below, mlock_fixup() will attempt to merge the vma with its neighbors or split
+off a subset of the vma if the range does not cover the entire vma.  Once the
+vma has been merged or split or neither, mlock_fixup() will call
+__mlock_vma_pages_range() to fault in the pages via get_user_pages() and
+to mark the pages as mlocked via mlock_vma_page().
+
+Note that the vma being mlocked might be mapped with PROT_NONE.  In this case,
+get_user_pages() will be unable to fault in the pages.  That's OK.  If pages
+do end up getting faulted into this VM_LOCKED vma, we'll handle them in the
 fault path or in vmscan.
 
 Also note that a page returned by get_user_pages() could be truncated or
-migrated out from under us, while we're trying to mlock it.  To detect this,
-__mlock_vma_pages_range() checks page_mapping() after acquiring the page lock.
-If the page is still associated with its mapping, we'll go ahead and call
-mlock_vma_page().  If the mapping is gone, we just unlock the page and move on.
-In the worst case, this will result in a page mapped in a VM_LOCKED VMA
-remaining on a normal LRU list without being PageMlocked().  Again, vmscan will
-detect and cull such pages.
-
-mlock_vma_page() will call TestSetPageMlocked() for each page returned by
-get_user_pages().  We use TestSetPageMlocked() because the page might already
-be mlocked by another task/VMA and we don't want to do extra work.  We
-especially do not want to count an mlocked page more than once in the
-statistics.  If the page was already mlocked, mlock_vma_page() need do nothing
-more.
+migrated out from under us, while we're trying to mlock it.  To detect
+this, __mlock_vma_pages_range() tests the page_mapping after acquiring
+the page lock.  If the page is still associated with its mapping, we'll
+go ahead and call mlock_vma_page().  If the mapping is gone, we just
+unlock the page and move on.  Worse case, this results in page mapped
+in a VM_LOCKED vma remaining on a normal LRU list without being
+PageMlocked().  Again, vmscan will detect and cull such pages.
+
+mlock_vma_page(), called with the page locked [N.B., not "mlocked"], will
+TestSetPageMlocked() for each page returned by get_user_pages().  We use
+TestSetPageMlocked() because the page might already be mlocked by another
+task/vma and we don't want to do extra work.  We especially do not want to
+count an mlocked page more than once in the statistics.  If the page was
+already mlocked, mlock_vma_page() is done.
 
 If the page was NOT already mlocked, mlock_vma_page() attempts to isolate the
 page from the LRU, as it is likely on the appropriate active or inactive list
-at that time.  If the isolate_lru_page() succeeds, mlock_vma_page() will put
-back the page - by calling putback_lru_page() - which will notice that the page
-is now mlocked and divert the page to the zone's unevictable list.  If
+at that time.  If the isolate_lru_page() succeeds, mlock_vma_page() will
+putback the page--putback_lru_page()--which will notice that the page is now
+mlocked and divert the page to the zone's unevictable LRU list.  If
 mlock_vma_page() is unable to isolate the page from the LRU, vmscan will handle
-it later if and when it attempts to reclaim the page.
+it later if/when it attempts to reclaim the page.
 
 
-FILTERING SPECIAL VMAS
-----------------------
+Mlocked Pages:  Filtering Special Vmas
 
-mlock_fixup() filters several classes of "special" VMAs:
+mlock_fixup() filters several classes of "special" vmas:
 
-1) VMAs with VM_IO or VM_PFNMAP set are skipped entirely.  The pages behind
+1) vmas with VM_IO|VM_PFNMAP set are skipped entirely.  The pages behind
    these mappings are inherently pinned, so we don't need to mark them as
-   mlocked.  In any case, most of the pages have no struct page in which to so
-   mark the page.  Because of this, get_user_pages() will fail for these VMAs,
-   so there is no sense in attempting to visit them.
-
-2) VMAs mapping hugetlbfs page are already effectively pinned into memory.  We
-   neither need nor want to mlock() these pages.  However, to preserve the
-   prior behavior of mlock() - before the unevictable/mlock changes -
-   mlock_fixup() will call make_pages_present() in the hugetlbfs VMA range to
-   allocate the huge pages and populate the ptes.
-
-3) VMAs with VM_DONTEXPAND or VM_RESERVED are generally userspace mappings of
-   kernel pages, such as the VDSO page, relay channel pages, etc.  These pages
+   mlocked.  In any case, most of the pages have no struct page in which to
+   so mark the page.  Because of this, get_user_pages() will fail for these
+   vmas, so there is no sense in attempting to visit them.
+
+2) vmas mapping hugetlbfs page are already effectively pinned into memory.
+   We don't need nor want to mlock() these pages.  However, to preserve the
+   prior behavior of mlock()--before the unevictable/mlock changes--
+   mlock_fixup() will call make_pages_present() in the hugetlbfs vma range
+   to allocate the huge pages and populate the ptes.
+
+3) vmas with VM_DONTEXPAND|VM_RESERVED are generally user space mappings of
+   kernel pages, such as the vdso page, relay channel pages, etc.  These pages
    are inherently unevictable and are not managed on the LRU lists.
-   mlock_fixup() treats these VMAs the same as hugetlbfs VMAs.  It calls
+   mlock_fixup() treats these vmas the same as hugetlbfs vmas.  It calls
    make_pages_present() to populate the ptes.
 
-Note that for all of these special VMAs, mlock_fixup() does not set the
+Note that for all of these special vmas, mlock_fixup() does not set the
 VM_LOCKED flag.  Therefore, we won't have to deal with them later during
-munlock(), munmap() or task exit.  Neither does mlock_fixup() account these
-VMAs against the task's "locked_vm".
-
-
-munlock()/munlockall() SYSTEM CALL HANDLING
--------------------------------------------
-
-The munlock() and munlockall() system calls are handled by the same functions -
-do_mlock[all]() - as the mlock() and mlockall() system calls with the unlock vs
-lock operation indicated by an argument.  So, these system calls are also
-handled by mlock_fixup().  Again, if called for an already munlocked VMA,
-mlock_fixup() simply returns.  Because of the VMA filtering discussed above,
-VM_LOCKED will not be set in any "special" VMAs.  So, these VMAs will be
+munlock() or munmap()--for example, at task exit.  Neither does mlock_fixup()
+account these vmas against the task's "locked_vm".
+
+Mlocked Pages:  Downgrading the Mmap Semaphore.
+
+mlock_fixup() must be called with the mmap semaphore held for write, because
+it may have to merge or split vmas.  However, mlocking a large region of
+memory can take a long time--especially if vmscan must reclaim pages to
+satisfy the regions requirements.  Faulting in a large region with the mmap
+semaphore held for write can hold off other faults on the address space, in
+the case of a multi-threaded task.  It can also hold off scans of the task's
+address space via /proc.  While testing under heavy load, it was observed that
+the ps(1) command could be held off for many minutes while a large segment was
+mlock()ed down.
+
+To address this issue, and to make the system more responsive during mlock()ing
+of large segments, mlock_fixup() downgrades the mmap semaphore to read mode
+during the call to __mlock_vma_pages_range().  This works fine.  However, the
+callers of mlock_fixup() expect the semaphore to be returned in write mode.
+So, mlock_fixup() "upgrades" the semphore to write mode.  Linux does not
+support an atomic upgrade_sem() call, so mlock_fixup() must drop the semaphore
+and reacquire it in write mode.  In a multi-threaded task, it is possible for
+the task memory map to change while the semaphore is dropped.  Therefore,
+mlock_fixup() looks up the vma at the range start address after reacquiring
+the semaphore in write mode and verifies that it still covers the original
+range.  If not, mlock_fixup() returns an error [-EAGAIN].  All callers of
+mlock_fixup() have been changed to deal with this new error condition.
+
+Note:  when munlocking a region, all of the pages should already be resident--
+unless we have racing threads mlocking() and munlocking() regions.  So,
+unlocking should not have to wait for page allocations nor faults  of any kind.
+Therefore mlock_fixup() does not downgrade the semaphore for munlock().
+
+
+Mlocked Pages:  munlock()/munlockall() System Call Handling
+
+The munlock() and munlockall() system calls are handled by the same functions--
+do_mlock[all]()--as the mlock() and mlockall() system calls with the unlock
+vs lock operation indicated by an argument.  So, these system calls are also
+handled by mlock_fixup().  Again, if called for an already munlock()ed vma,
+mlock_fixup() simply returns.  Because of the vma filtering discussed above,
+VM_LOCKED will not be set in any "special" vmas.  So, these vmas will be
 ignored for munlock.
 
-If the VMA is VM_LOCKED, mlock_fixup() again attempts to merge or split off the
-specified range.  The range is then munlocked via the function
-__mlock_vma_pages_range() - the same function used to mlock a VMA range -
+If the vma is VM_LOCKED, mlock_fixup() again attempts to merge or split off
+the specified range.  The range is then munlocked via the function
+__mlock_vma_pages_range()--the same function used to mlock a vma range--
 passing a flag to indicate that munlock() is being performed.
 
-Because the VMA access protections could have been changed to PROT_NONE after
+Because the vma access protections could have been changed to PROT_NONE after
 faulting in and mlocking pages, get_user_pages() was unreliable for visiting
-these pages for munlocking.  Because we don't want to leave pages mlocked,
+these pages for munlocking.  Because we don't want to leave pages mlocked(),
 get_user_pages() was enhanced to accept a flag to ignore the permissions when
-fetching the pages - all of which should be resident as a result of previous
-mlocking.
+fetching the pages--all of which should be resident as a result of previous
+mlock()ing.
 
 For munlock(), __mlock_vma_pages_range() unlocks individual pages by calling
 munlock_vma_page().  munlock_vma_page() unconditionally clears the PG_mlocked
-flag using TestClearPageMlocked().  As with mlock_vma_page(),
-munlock_vma_page() use the Test*PageMlocked() function to handle the case where
-the page might have already been unlocked by another task.  If the page was
-mlocked, munlock_vma_page() updates that zone statistics for the number of
-mlocked pages.  Note, however, that at this point we haven't checked whether
-the page is mapped by other VM_LOCKED VMAs.
-
-We can't call try_to_munlock(), the function that walks the reverse map to
-check for other VM_LOCKED VMAs, without first isolating the page from the LRU.
+flag using TestClearPageMlocked().  As with mlock_vma_page(), munlock_vma_page()
+use the Test*PageMlocked() function to handle the case where the page might
+have already been unlocked by another task.  If the page was mlocked,
+munlock_vma_page() updates that zone statistics for the number of mlocked
+pages.  Note, however, that at this point we haven't checked whether the page
+is mapped by other VM_LOCKED vmas.
+
+We can't call try_to_munlock(), the function that walks the reverse map to check
+for other VM_LOCKED vmas, without first isolating the page from the LRU.
 try_to_munlock() is a variant of try_to_unmap() and thus requires that the page
-not be on an LRU list [more on these below].  However, the call to
-isolate_lru_page() could fail, in which case we couldn't try_to_munlock().  So,
-we go ahead and clear PG_mlocked up front, as this might be the only chance we
-have.  If we can successfully isolate the page, we go ahead and
+not be on an lru list.  [More on these below.]  However, the call to
+isolate_lru_page() could fail, in which case we couldn't try_to_munlock().
+So, we go ahead and clear PG_mlocked up front, as this might be the only chance
+we have.  If we can successfully isolate the page, we go ahead and
 try_to_munlock(), which will restore the PG_mlocked flag and update the zone
-page statistics if it finds another VMA holding the page mlocked.  If we fail
+page statistics if it finds another vma holding the page mlocked.  If we fail
 to isolate the page, we'll have left a potentially mlocked page on the LRU.
-This is fine, because we'll catch it later if and if vmscan tries to reclaim
-the page.  This should be relatively rare.
-
-
-MIGRATING MLOCKED PAGES
------------------------
-
-A page that is being migrated has been isolated from the LRU lists and is held
-locked across unmapping of the page, updating the page's address space entry
-and copying the contents and state, until the page table entry has been
-replaced with an entry that refers to the new page.  Linux supports migration
-of mlocked pages and other unevictable pages.  This involves simply moving the
-PG_mlocked and PG_unevictable states from the old page to the new page.
-
-Note that page migration can race with mlocking or munlocking of the same page.
-This has been discussed from the mlock/munlock perspective in the respective
-sections above.  Both processes (migration and m[un]locking) hold the page
-locked.  This provides the first level of synchronization.  Page migration
-zeros out the page_mapping of the old page before unlocking it, so m[un]lock
-can skip these pages by testing the page mapping under page lock.
-
-To complete page migration, we place the new and old pages back onto the LRU
-after dropping the page lock.  The "unneeded" page - old page on success, new
-page on failure - will be freed when the reference count held by the migration
-process is released.  To ensure that we don't strand pages on the unevictable
-list because of a race between munlock and migration, page migration uses the
-putback_lru_page() function to add migrated pages back to the LRU.
-
-
-mmap(MAP_LOCKED) SYSTEM CALL HANDLING
--------------------------------------
+This is fine, because we'll catch it later when/if vmscan tries to reclaim the
+page.  This should be relatively rare.
+
+Mlocked Pages:  Migrating Them...
+
+A page that is being migrated has been isolated from the lru lists and is
+held locked across unmapping of the page, updating the page's mapping
+[address_space] entry and copying the contents and state, until the
+page table entry has been replaced with an entry that refers to the new
+page.  Linux supports migration of mlocked pages and other unevictable
+pages.  This involves simply moving the PageMlocked and PageUnevictable states
+from the old page to the new page.
+
+Note that page migration can race with mlocking or munlocking of the same
+page.  This has been discussed from the mlock/munlock perspective in the
+respective sections above.  Both processes [migration, m[un]locking], hold
+the page locked.  This provides the first level of synchronization.  Page
+migration zeros out the page_mapping of the old page before unlocking it,
+so m[un]lock can skip these pages by testing the page mapping under page
+lock.
+
+When completing page migration, we place the new and old pages back onto the
+lru after dropping the page lock.  The "unneeded" page--old page on success,
+new page on failure--will be freed when the reference count held by the
+migration process is released.  To ensure that we don't strand pages on the
+unevictable list because of a race between munlock and migration, page
+migration uses the putback_lru_page() function to add migrated pages back to
+the lru.
+
+
+Mlocked Pages:  mmap(MAP_LOCKED) System Call Handling
 
 In addition the the mlock()/mlockall() system calls, an application can request
-that a region of memory be mlocked supplying the MAP_LOCKED flag to the mmap()
+that a region of memory be mlocked using the MAP_LOCKED flag with the mmap()
 call.  Furthermore, any mmap() call or brk() call that expands the heap by a
 task that has previously called mlockall() with the MCL_FUTURE flag will result
-in the newly mapped memory being mlocked.  Before the unevictable/mlock
-changes, the kernel simply called make_pages_present() to allocate pages and
-populate the page table.
+in the newly mapped memory being mlocked.  Before the unevictable/mlock changes,
+the kernel simply called make_pages_present() to allocate pages and populate
+the page table.
 
 To mlock a range of memory under the unevictable/mlock infrastructure, the
 mmap() handler and task address space expansion functions call
 mlock_vma_pages_range() specifying the vma and the address range to mlock.
-mlock_vma_pages_range() filters VMAs like mlock_fixup(), as described above in
-"Filtering Special VMAs".  It will clear the VM_LOCKED flag, which will have
-already been set by the caller, in filtered VMAs.  Thus these VMA's need not be
-visited for munlock when the region is unmapped.
+mlock_vma_pages_range() filters vmas like mlock_fixup(), as described above in
+"Mlocked Pages:  Filtering Vmas".  It will clear the VM_LOCKED flag, which will
+have already been set by the caller, in filtered vmas.  Thus these vma's need
+not be visited for munlock when the region is unmapped.
 
-For "normal" VMAs, mlock_vma_pages_range() calls __mlock_vma_pages_range() to
+For "normal" vmas, mlock_vma_pages_range() calls __mlock_vma_pages_range() to
 fault/allocate the pages and mlock them.  Again, like mlock_fixup(),
 mlock_vma_pages_range() downgrades the mmap semaphore to read mode before
-attempting to fault/allocate and mlock the pages and "upgrades" the semaphore
+attempting to fault/allocate and mlock the pages; and "upgrades" the semaphore
 back to write mode before returning.
 
-The callers of mlock_vma_pages_range() will have already added the memory range
-to be mlocked to the task's "locked_vm".  To account for filtered VMAs,
+The callers of mlock_vma_pages_range() will have already added the memory
+range to be mlocked to the task's "locked_vm".  To account for filtered vmas,
 mlock_vma_pages_range() returns the number of pages NOT mlocked.  All of the
-callers then subtract a non-negative return value from the task's locked_vm.  A
-negative return value represent an error - for example, from get_user_pages()
-attempting to fault in a VMA with PROT_NONE access.  In this case, we leave the
-memory range accounted as locked_vm, as the protections could be changed later
-and pages allocated into that region.
+callers then subtract a non-negative return value from the task's locked_vm.
+A negative return value represent an error--for example, from get_user_pages()
+attempting to fault in a vma with PROT_NONE access.  In this case, we leave
+the memory range accounted as locked_vm, as the protections could be changed
+later and pages allocated into that region.
 
 
-munmap()/exit()/exec() SYSTEM CALL HANDLING
--------------------------------------------
+Mlocked Pages:  munmap()/exit()/exec() System Call Handling
 
 When unmapping an mlocked region of memory, whether by an explicit call to
 munmap() or via an internal unmap from exit() or exec() processing, we must
-munlock the pages if we're removing the last VM_LOCKED VMA that maps the pages.
+munlock the pages if we're removing the last VM_LOCKED vma that maps the pages.
 Before the unevictable/mlock changes, mlocking did not mark the pages in any
 way, so unmapping them required no processing.
 
 To munlock a range of memory under the unevictable/mlock infrastructure, the
-munmap() handler and task address space call tear down function
+munmap() hander and task address space tear down function call
 munlock_vma_pages_all().  The name reflects the observation that one always
-specifies the entire VMA range when munlock()ing during unmap of a region.
-Because of the VMA filtering when mlocking() regions, only "normal" VMAs that
+specifies the entire vma range when munlock()ing during unmap of a region.
+Because of the vma filtering when mlocking() regions, only "normal" vmas that
 actually contain mlocked pages will be passed to munlock_vma_pages_all().
 
-munlock_vma_pages_all() clears the VM_LOCKED VMA flag and, like mlock_fixup()
+munlock_vma_pages_all() clears the VM_LOCKED vma flag and, like mlock_fixup()
 for the munlock case, calls __munlock_vma_pages_range() to walk the page table
-for the VMA's memory range and munlock_vma_page() each resident page mapped by
-the VMA.  This effectively munlocks the page, only if this is the last
-VM_LOCKED VMA that maps the page.
+for the vma's memory range and munlock_vma_page() each resident page mapped by
+the vma.  This effectively munlocks the page, only if this is the last
+VM_LOCKED vma that maps the page.
+
 
+Mlocked Page:  try_to_unmap()
 
-try_to_unmap()
---------------
+[Note:  the code changes represented by this section are really quite small
+compared to the text to describe what happening and why, and to discuss the
+implications.]
 
-Pages can, of course, be mapped into multiple VMAs.  Some of these VMAs may
+Pages can, of course, be mapped into multiple vmas.  Some of these vmas may
 have VM_LOCKED flag set.  It is possible for a page mapped into one or more
-VM_LOCKED VMAs not to have the PG_mlocked flag set and therefore reside on one
-of the active or inactive LRU lists.  This could happen if, for example, a task
-in the process of munlocking the page could not isolate the page from the LRU.
-As a result, vmscan/shrink_page_list() might encounter such a page as described
-in section "vmscan's handling of unevictable pages".  To handle this situation,
-try_to_unmap() checks for VM_LOCKED VMAs while it is walking a page's reverse
-map.
+VM_LOCKED vmas not to have the PG_mlocked flag set and therefore reside on one
+of the active or inactive LRU lists.  This could happen if, for example, a
+task in the process of munlock()ing the page could not isolate the page from
+the LRU.  As a result, vmscan/shrink_page_list() might encounter such a page
+as described in "Unevictable Pages and Vmscan [shrink_*_list()]".  To
+handle this situation, try_to_unmap() has been enhanced to check for VM_LOCKED
+vmas while it is walking a page's reverse map.
 
 try_to_unmap() is always called, by either vmscan for reclaim or for page
-migration, with the argument page locked and isolated from the LRU.  Separate
-functions handle anonymous and mapped file pages, as these types of pages have
-different reverse map mechanisms.
-
- (*) try_to_unmap_anon()
-
-     To unmap anonymous pages, each VMA in the list anchored in the anon_vma
-     must be visited - at least until a VM_LOCKED VMA is encountered.  If the
-     page is being unmapped for migration, VM_LOCKED VMAs do not stop the
-     process because mlocked pages are migratable.  However, for reclaim, if
-     the page is mapped into a VM_LOCKED VMA, the scan stops.
-
-     try_to_unmap_anon() attempts to acquire in read mode the mmap semphore of
-     the mm_struct to which the VMA belongs.  If this is successful, it will
-     mlock the page via mlock_vma_page() - we wouldn't have gotten to
-     try_to_unmap_anon() if the page were already mlocked - and will return
-     SWAP_MLOCK, indicating that the page is unevictable.
-
-     If the mmap semaphore cannot be acquired, we are not sure whether the page
-     is really unevictable or not.  In this case, try_to_unmap_anon() will
-     return SWAP_AGAIN.
-
- (*) try_to_unmap_file() - linear mappings
-
-     Unmapping of a mapped file page works the same as for anonymous mappings,
-     except that the scan visits all VMAs that map the page's index/page offset
-     in the page's mapping's reverse map priority search tree.  It also visits
-     each VMA in the page's mapping's non-linear list, if the list is
-     non-empty.
-
-     As for anonymous pages, on encountering a VM_LOCKED VMA for a mapped file
-     page, try_to_unmap_file() will attempt to acquire the associated
-     mm_struct's mmap semaphore to mlock the page, returning SWAP_MLOCK if this
-     is successful, and SWAP_AGAIN, if not.
-
- (*) try_to_unmap_file() - non-linear mappings
-
-     If a page's mapping contains a non-empty non-linear mapping VMA list, then
-     try_to_un{map|lock}() must also visit each VMA in that list to determine
-     whether the page is mapped in a VM_LOCKED VMA.  Again, the scan must visit
-     all VMAs in the non-linear list to ensure that the pages is not/should not
-     be mlocked.
-
-     If a VM_LOCKED VMA is found in the list, the scan could terminate.
-     However, there is no easy way to determine whether the page is actually
-     mapped in a given VMA - either for unmapping or testing whether the
-     VM_LOCKED VMA actually pins the page.
-
-     try_to_unmap_file() handles non-linear mappings by scanning a certain
-     number of pages - a "cluster" - in each non-linear VMA associated with the
-     page's mapping, for each file mapped page that vmscan tries to unmap.  If
-     this happens to unmap the page we're trying to unmap, try_to_unmap() will
-     notice this on return (page_mapcount(page) will be 0) and return
-     SWAP_SUCCESS.  Otherwise, it will return SWAP_AGAIN, causing vmscan to
-     recirculate this page.  We take advantage of the cluster scan in
-     try_to_unmap_cluster() as follows:
-
-	For each non-linear VMA, try_to_unmap_cluster() attempts to acquire the
-	mmap semaphore of the associated mm_struct for read without blocking.
-
-	If this attempt is successful and the VMA is VM_LOCKED,
-	try_to_unmap_cluster() will retain the mmap semaphore for the scan;
-	otherwise it drops it here.
-
-	Then, for each page in the cluster, if we're holding the mmap semaphore
-	for a locked VMA, try_to_unmap_cluster() calls mlock_vma_page() to
-	mlock the page.  This call is a no-op if the page is already locked,
-	but will mlock any pages in the non-linear mapping that happen to be
-	unlocked.
-
-	If one of the pages so mlocked is the page passed in to try_to_unmap(),
-	try_to_unmap_cluster() will return SWAP_MLOCK, rather than the default
-	SWAP_AGAIN.  This will allow vmscan to cull the page, rather than
-	recirculating it on the inactive list.
-
-	Again, if try_to_unmap_cluster() cannot acquire the VMA's mmap sem, it
-	returns SWAP_AGAIN, indicating that the page is mapped by a VM_LOCKED
-	VMA, but couldn't be mlocked.
-
-
-try_to_munlock() REVERSE MAP SCAN
----------------------------------
-
- [!] TODO/FIXME: a better name might be page_mlocked() - analogous to the
-     page_referenced() reverse map walker.
-
-When munlock_vma_page() [see section "munlock()/munlockall() System Call
-Handling" above] tries to munlock a page, it needs to determine whether or not
-the page is mapped by any VM_LOCKED VMA without actually attempting to unmap
-all PTEs from the page.  For this purpose, the unevictable/mlock infrastructure
-introduced a variant of try_to_unmap() called try_to_munlock().
+migration, with the argument page locked and isolated from the LRU.  BUG_ON()
+assertions enforce this requirement.  Separate functions handle anonymous and
+mapped file pages, as these types of pages have different reverse map
+mechanisms.
+
+	try_to_unmap_anon()
+
+To unmap anonymous pages, each vma in the list anchored in the anon_vma must be
+visited--at least until a VM_LOCKED vma is encountered.  If the page is being
+unmapped for migration, VM_LOCKED vmas do not stop the process because mlocked
+pages are migratable.  However, for reclaim, if the page is mapped into a
+VM_LOCKED vma, the scan stops.  try_to_unmap() attempts to acquire the mmap
+semphore of the mm_struct to which the vma belongs in read mode.  If this is
+successful, try_to_unmap() will mlock the page via mlock_vma_page()--we
+wouldn't have gotten to try_to_unmap() if the page were already mlocked--and
+will return SWAP_MLOCK, indicating that the page is unevictable.  If the
+mmap semaphore cannot be acquired, we are not sure whether the page is really
+unevictable or not.  In this case, try_to_unmap() will return SWAP_AGAIN.
+
+	try_to_unmap_file() -- linear mappings
+
+Unmapping of a mapped file page works the same, except that the scan visits
+all vmas that maps the page's index/page offset in the page's mapping's
+reverse map priority search tree.  It must also visit each vma in the page's
+mapping's non-linear list, if the list is non-empty.  As for anonymous pages,
+on encountering a VM_LOCKED vma for a mapped file page, try_to_unmap() will
+attempt to acquire the associated mm_struct's mmap semaphore to mlock the page,
+returning SWAP_MLOCK if this is successful, and SWAP_AGAIN, if not.
+
+	try_to_unmap_file() -- non-linear mappings
+
+If a page's mapping contains a non-empty non-linear mapping vma list, then
+try_to_un{map|lock}() must also visit each vma in that list to determine
+whether the page is mapped in a VM_LOCKED vma.  Again, the scan must visit
+all vmas in the non-linear list to ensure that the pages is not/should not be
+mlocked.  If a VM_LOCKED vma is found in the list, the scan could terminate.
+However, there is no easy way to determine whether the page is actually mapped
+in a given vma--either for unmapping or testing whether the VM_LOCKED vma
+actually pins the page.
+
+So, try_to_unmap_file() handles non-linear mappings by scanning a certain
+number of pages--a "cluster"--in each non-linear vma associated with the page's
+mapping, for each file mapped page that vmscan tries to unmap.  If this happens
+to unmap the page we're trying to unmap, try_to_unmap() will notice this on
+return--(page_mapcount(page) == 0)--and return SWAP_SUCCESS.  Otherwise, it
+will return SWAP_AGAIN, causing vmscan to recirculate this page.  We take
+advantage of the cluster scan in try_to_unmap_cluster() as follows:
+
+For each non-linear vma, try_to_unmap_cluster() attempts to acquire the mmap
+semaphore of the associated mm_struct for read without blocking.  If this
+attempt is successful and the vma is VM_LOCKED, try_to_unmap_cluster() will
+retain the mmap semaphore for the scan; otherwise it drops it here.  Then,
+for each page in the cluster, if we're holding the mmap semaphore for a locked
+vma, try_to_unmap_cluster() calls mlock_vma_page() to mlock the page.  This
+call is a no-op if the page is already locked, but will mlock any pages in
+the non-linear mapping that happen to be unlocked.  If one of the pages so
+mlocked is the page passed in to try_to_unmap(), try_to_unmap_cluster() will
+return SWAP_MLOCK, rather than the default SWAP_AGAIN.  This will allow vmscan
+to cull the page, rather than recirculating it on the inactive list.  Again,
+if try_to_unmap_cluster() cannot acquire the vma's mmap sem, it returns
+SWAP_AGAIN, indicating that the page is mapped by a VM_LOCKED vma, but
+couldn't be mlocked.
+
+
+Mlocked pages:  try_to_munlock() Reverse Map Scan
+
+TODO/FIXME:  a better name might be page_mlocked()--analogous to the
+page_referenced() reverse map walker.
+
+When munlock_vma_page()--see "Mlocked Pages:  munlock()/munlockall()
+System Call Handling" above--tries to munlock a page, it needs to
+determine whether or not the page is mapped by any VM_LOCKED vma, without
+actually attempting to unmap all ptes from the page.  For this purpose, the
+unevictable/mlock infrastructure introduced a variant of try_to_unmap() called
+try_to_munlock().
 
 try_to_munlock() calls the same functions as try_to_unmap() for anonymous and
 mapped file pages with an additional argument specifing unlock versus unmap
 processing.  Again, these functions walk the respective reverse maps looking
-for VM_LOCKED VMAs.  When such a VMA is found for anonymous pages and file
+for VM_LOCKED vmas.  When such a vma is found for anonymous pages and file
 pages mapped in linear VMAs, as in the try_to_unmap() case, the functions
 attempt to acquire the associated mmap semphore, mlock the page via
 mlock_vma_page() and return SWAP_MLOCK.  This effectively undoes the
 pre-clearing of the page's PG_mlocked done by munlock_vma_page.
 
-If try_to_unmap() is unable to acquire a VM_LOCKED VMA's associated mmap
-semaphore, it will return SWAP_AGAIN.  This will allow shrink_page_list() to
-recycle the page on the inactive list and hope that it has better luck with the
-page next time.
-
-For file pages mapped into non-linear VMAs, the try_to_munlock() logic works
-slightly differently.  On encountering a VM_LOCKED non-linear VMA that might
-map the page, try_to_munlock() returns SWAP_AGAIN without actually mlocking the
-page.  munlock_vma_page() will just leave the page unlocked and let vmscan deal
-with it - the usual fallback position.
-
-Note that try_to_munlock()'s reverse map walk must visit every VMA in a page's
-reverse map to determine that a page is NOT mapped into any VM_LOCKED VMA.
-However, the scan can terminate when it encounters a VM_LOCKED VMA and can
-successfully acquire the VMA's mmap semphore for read and mlock the page.
-Although try_to_munlock() might be called a great many times when munlocking a
-large region or tearing down a large address space that has been mlocked via
-mlockall(), overall this is a fairly rare event.
-
-
-PAGE RECLAIM IN shrink_*_list()
--------------------------------
-
-shrink_active_list() culls any obviously unevictable pages - i.e.
-!page_evictable(page, NULL) - diverting these to the unevictable list.
-However, shrink_active_list() only sees unevictable pages that made it onto the
-active/inactive lru lists.  Note that these pages do not have PageUnevictable
-set - otherwise they would be on the unevictable list and shrink_active_list
-would never see them.
+If try_to_unmap() is unable to acquire a VM_LOCKED vma's associated mmap
+semaphore, it will return SWAP_AGAIN.  This will allow shrink_page_list()
+to recycle the page on the inactive list and hope that it has better luck
+with the page next time.
+
+For file pages mapped into non-linear vmas, the try_to_munlock() logic works
+slightly differently.  On encountering a VM_LOCKED non-linear vma that might
+map the page, try_to_munlock() returns SWAP_AGAIN without actually mlocking
+the page.  munlock_vma_page() will just leave the page unlocked and let
+vmscan deal with it--the usual fallback position.
+
+Note that try_to_munlock()'s reverse map walk must visit every vma in a pages'
+reverse map to determine that a page is NOT mapped into any VM_LOCKED vma.
+However, the scan can terminate when it encounters a VM_LOCKED vma and can
+successfully acquire the vma's mmap semphore for read and mlock the page.
+Although try_to_munlock() can be called many [very many!] times when
+munlock()ing a large region or tearing down a large address space that has been
+mlocked via mlockall(), overall this is a fairly rare event.
+
+Mlocked Page:  Page Reclaim in shrink_*_list()
+
+shrink_active_list() culls any obviously unevictable pages--i.e.,
+!page_evictable(page, NULL)--diverting these to the unevictable lru
+list.  However, shrink_active_list() only sees unevictable pages that
+made it onto the active/inactive lru lists.  Note that these pages do not
+have PageUnevictable set--otherwise, they would be on the unevictable list and
+shrink_active_list would never see them.
 
 Some examples of these unevictable pages on the LRU lists are:
 
- (1) ramfs pages that have been placed on the LRU lists when first allocated.
-
- (2) SHM_LOCK'd shared memory pages.  shmctl(SHM_LOCK) does not attempt to
-     allocate or fault in the pages in the shared memory region.  This happens
-     when an application accesses the page the first time after SHM_LOCK'ing
-     the segment.
+1) ramfs pages that have been placed on the lru lists when first allocated.
 
- (3) mlocked pages that could not be isolated from the LRU and moved to the
-     unevictable list in mlock_vma_page().
+2) SHM_LOCKed shared memory pages.  shmctl(SHM_LOCK) does not attempt to
+   allocate or fault in the pages in the shared memory region.  This happens
+   when an application accesses the page the first time after SHM_LOCKing
+   the segment.
 
- (4) Pages mapped into multiple VM_LOCKED VMAs, but try_to_munlock() couldn't
-     acquire the VMA's mmap semaphore to test the flags and set PageMlocked.
-     munlock_vma_page() was forced to let the page back on to the normal LRU
-     list for vmscan to handle.
+3) Mlocked pages that could not be isolated from the lru and moved to the
+   unevictable list in mlock_vma_page().
 
-shrink_inactive_list() also diverts any unevictable pages that it finds on the
-inactive lists to the appropriate zone's unevictable list.
+3) Pages mapped into multiple VM_LOCKED vmas, but try_to_munlock() couldn't
+   acquire the vma's mmap semaphore to test the flags and set PageMlocked.
+   munlock_vma_page() was forced to let the page back on to the normal
+   LRU list for vmscan to handle.
 
-shrink_inactive_list() should only see SHM_LOCK'd pages that became SHM_LOCK'd
-after shrink_active_list() had moved them to the inactive list, or pages mapped
-into VM_LOCKED VMAs that munlock_vma_page() couldn't isolate from the LRU to
-recheck via try_to_munlock().  shrink_inactive_list() won't notice the latter,
-but will pass on to shrink_page_list().
+shrink_inactive_list() also culls any unevictable pages that it finds on
+the inactive lists, again diverting them to the appropriate zone's unevictable
+lru list.  shrink_inactive_list() should only see SHM_LOCKed pages that became
+SHM_LOCKed after shrink_active_list() had moved them to the inactive list, or
+pages mapped into VM_LOCKED vmas that munlock_vma_page() couldn't isolate from
+the lru to recheck via try_to_munlock().  shrink_inactive_list() won't notice
+the latter, but will pass on to shrink_page_list().
 
 shrink_page_list() again culls obviously unevictable pages that it could
 encounter for similar reason to shrink_inactive_list().  Pages mapped into
-VM_LOCKED VMAs but without PG_mlocked set will make it all the way to
+VM_LOCKED vmas but without PG_mlocked set will make it all the way to
 try_to_unmap().  shrink_page_list() will divert them to the unevictable list
 when try_to_unmap() returns SWAP_MLOCK, as discussed above.
diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS
index 29d74f47ba86..ca2997a45766 100644
--- a/trunk/MAINTAINERS
+++ b/trunk/MAINTAINERS
@@ -461,7 +461,7 @@ F:	arch/x86/include/asm/amd_iommu*.h
 
 AMD MICROCODE UPDATE SUPPORT
 P:	Andreas Herrmann
-M:	andreas.herrmann3@amd.com
+M:	andeas.herrmann3@amd.com
 L:	amd64-microcode@amd64.org
 S:	Supported
 F:	arch/x86/kernel/microcode_amd.c
@@ -1894,7 +1894,7 @@ F:	fs/ecryptfs/
 EDAC-CORE
 P:	Doug Thompson
 M:	dougthompson@xmission.com
-L:	bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
+L:	bluesmoke-devel@lists.sourceforge.net
 W:	bluesmoke.sourceforge.net
 S:	Supported
 F:	Documentation/edac.txt
@@ -1906,7 +1906,7 @@ P:	Mark Gross
 P:	Doug Thompson
 M:	mark.gross@intel.com
 M:	dougthompson@xmission.com
-L:	bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
+L:	bluesmoke-devel@lists.sourceforge.net
 W:	bluesmoke.sourceforge.net
 S:	Maintained
 F:	drivers/edac/e752x_edac.c
@@ -1914,7 +1914,7 @@ F:	drivers/edac/e752x_edac.c
 EDAC-E7XXX
 P:	Doug Thompson
 M:	dougthompson@xmission.com
-L:	bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
+L:	bluesmoke-devel@lists.sourceforge.net
 W:	bluesmoke.sourceforge.net
 S:	Maintained
 F:	drivers/edac/e7xxx_edac.c
@@ -1922,7 +1922,7 @@ F:	drivers/edac/e7xxx_edac.c
 EDAC-I82443BXGX
 P:	Tim Small
 M:	tim@buttersideup.com
-L:	bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
+L:	bluesmoke-devel@lists.sourceforge.net
 W:	bluesmoke.sourceforge.net
 S:	Maintained
 F:	drivers/edac/i82443bxgx_edac.c
@@ -1930,7 +1930,7 @@ F:	drivers/edac/i82443bxgx_edac.c
 EDAC-I3000
 P:	Jason Uhlenkott
 M:	juhlenko@akamai.com
-L:	bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
+L:	bluesmoke-devel@lists.sourceforge.net
 W:	bluesmoke.sourceforge.net
 S:	Maintained
 F:	drivers/edac/i3000_edac.c
@@ -1938,7 +1938,7 @@ F:	drivers/edac/i3000_edac.c
 EDAC-I5000
 P:	Doug Thompson
 M:	dougthompson@xmission.com
-L:	bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
+L:	bluesmoke-devel@lists.sourceforge.net
 W:	bluesmoke.sourceforge.net
 S:	Maintained
 F:	drivers/edac/i5000_edac.c
@@ -1946,7 +1946,7 @@ F:	drivers/edac/i5000_edac.c
 EDAC-I5400
 P:	Mauro Carvalho Chehab
 M:	mchehab@redhat.com
-L:	bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
+L:	bluesmoke-devel@lists.sourceforge.net
 W:	bluesmoke.sourceforge.net
 S:	Maintained
 F:	drivers/edac/i5400_edac.c
@@ -1956,7 +1956,7 @@ P:	Ranganathan Desikan
 P:	Arvind R.
 M:	rdesikan@jetzbroadband.com
 M:	arvind@acarlab.com
-L:	bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
+L:	bluesmoke-devel@lists.sourceforge.net
 W:	bluesmoke.sourceforge.net
 S:	Maintained
 F:	drivers/edac/i82975x_edac.c
@@ -1964,7 +1964,7 @@ F:	drivers/edac/i82975x_edac.c
 EDAC-PASEMI
 P:	Egor Martovetsky
 M:	egor@pasemi.com
-L:	bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
+L:	bluesmoke-devel@lists.sourceforge.net
 W:	bluesmoke.sourceforge.net
 S:	Maintained
 F:	drivers/edac/pasemi_edac.c
@@ -1972,7 +1972,7 @@ F:	drivers/edac/pasemi_edac.c
 EDAC-R82600
 P:	Tim Small
 M:	tim@buttersideup.com
-L:	bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
+L:	bluesmoke-devel@lists.sourceforge.net
 W:	bluesmoke.sourceforge.net
 S:	Maintained
 F:	drivers/edac/r82600_edac.c
@@ -2592,8 +2592,8 @@ S:	Maintained
 F:	fs/hpfs/
 
 HSO 3G MODEM DRIVER
-P:	Jan Dumon
-M:	j.dumon@option.com
+P:	Denis Joseph Barrow
+M:	d.barow@option.com
 W:	http://www.pharscape.org
 S:	Maintained
 F:	drivers/net/usb/hso.c
@@ -4978,8 +4978,8 @@ S:	Maintained for 2.6.
 F:	Documentation/sgi-visws.txt
 
 SGI XP/XPC/XPNET DRIVER
-P:	Robin Holt
-M:	holt@sgi.com
+P:	Dean Nelson
+M:	dcn@sgi.com
 S:	Maintained
 F:	drivers/misc/sgi-xp/
 
diff --git a/trunk/arch/arm/mach-omap2/usb-musb.c b/trunk/arch/arm/mach-omap2/usb-musb.c
index 34a56a136efd..fc74e913c415 100644
--- a/trunk/arch/arm/mach-omap2/usb-musb.c
+++ b/trunk/arch/arm/mach-omap2/usb-musb.c
@@ -131,14 +131,14 @@ static struct musb_hdrc_platform_data musb_plat = {
 	.power		= 50,			/* up to 100 mA */
 };
 
-static u64 musb_dmamask = DMA_BIT_MASK(32);
+static u64 musb_dmamask = DMA_32BIT_MASK;
 
 static struct platform_device musb_device = {
 	.name		= "musb_hdrc",
 	.id		= -1,
 	.dev = {
 		.dma_mask		= &musb_dmamask,
-		.coherent_dma_mask	= DMA_BIT_MASK(32),
+		.coherent_dma_mask	= DMA_32BIT_MASK,
 		.platform_data		= &musb_plat,
 	},
 	.num_resources	= ARRAY_SIZE(musb_resources),
@@ -146,14 +146,14 @@ static struct platform_device musb_device = {
 };
 
 #ifdef CONFIG_NOP_USB_XCEIV
-static u64 nop_xceiv_dmamask = DMA_BIT_MASK(32);
+static u64 nop_xceiv_dmamask = DMA_32BIT_MASK;
 
 static struct platform_device nop_xceiv_device = {
 	.name		= "nop_usb_xceiv",
 	.id		= -1,
 	.dev = {
 		.dma_mask		= &nop_xceiv_dmamask,
-		.coherent_dma_mask	= DMA_BIT_MASK(32),
+		.coherent_dma_mask	= DMA_32BIT_MASK,
 		.platform_data		= NULL,
 	},
 };
diff --git a/trunk/arch/ia64/kernel/pci-swiotlb.c b/trunk/arch/ia64/kernel/pci-swiotlb.c
index 285aae8431c6..573f02c39a00 100644
--- a/trunk/arch/ia64/kernel/pci-swiotlb.c
+++ b/trunk/arch/ia64/kernel/pci-swiotlb.c
@@ -16,7 +16,7 @@ EXPORT_SYMBOL(swiotlb);
 static void *ia64_swiotlb_alloc_coherent(struct device *dev, size_t size,
 					 dma_addr_t *dma_handle, gfp_t gfp)
 {
-	if (dev->coherent_dma_mask != DMA_BIT_MASK(64))
+	if (dev->coherent_dma_mask != DMA_64BIT_MASK)
 		gfp |= GFP_DMA;
 	return swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
 }
diff --git a/trunk/arch/x86/include/asm/required-features.h b/trunk/arch/x86/include/asm/required-features.h
index a4737dddfd58..d5cd6c586881 100644
--- a/trunk/arch/x86/include/asm/required-features.h
+++ b/trunk/arch/x86/include/asm/required-features.h
@@ -50,7 +50,7 @@
 #ifdef CONFIG_X86_64
 #define NEED_PSE	0
 #define NEED_MSR	(1<<(X86_FEATURE_MSR & 31))
-#define NEED_PGE	0
+#define NEED_PGE	(1<<(X86_FEATURE_PGE & 31))
 #define NEED_FXSR	(1<<(X86_FEATURE_FXSR & 31))
 #define NEED_XMM	(1<<(X86_FEATURE_XMM & 31))
 #define NEED_XMM2	(1<<(X86_FEATURE_XMM2 & 31))
diff --git a/trunk/arch/x86/include/asm/xen/page.h b/trunk/arch/x86/include/asm/xen/page.h
index 018a0a400799..1a918dde46b5 100644
--- a/trunk/arch/x86/include/asm/xen/page.h
+++ b/trunk/arch/x86/include/asm/xen/page.h
@@ -124,8 +124,7 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
 
 /* VIRT <-> MACHINE conversion */
 #define virt_to_machine(v)	(phys_to_machine(XPADDR(__pa(v))))
-#define virt_to_pfn(v)          (PFN_DOWN(__pa(v)))
-#define virt_to_mfn(v)		(pfn_to_mfn(virt_to_pfn(v)))
+#define virt_to_mfn(v)		(pfn_to_mfn(PFN_DOWN(__pa(v))))
 #define mfn_to_virt(m)		(__va(mfn_to_pfn(m) << PAGE_SHIFT))
 
 static inline unsigned long pte_mfn(pte_t pte)
diff --git a/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 837c2c4cc203..3e3cd3db7a0c 100644
--- a/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/trunk/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -277,7 +277,7 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy,
 	unsigned int perf_percent;
 	unsigned int retval;
 
-	if (smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1))
+	if (smp_call_function_single(cpu, read_measured_perf_ctrs, &cur, 1))
 		return 0;
 
 	cur.aperf.whole = readin.aperf.whole -
diff --git a/trunk/arch/x86/xen/enlighten.c b/trunk/arch/x86/xen/enlighten.c
index f09e8c36ee80..82cd39a6cbd3 100644
--- a/trunk/arch/x86/xen/enlighten.c
+++ b/trunk/arch/x86/xen/enlighten.c
@@ -42,7 +42,6 @@
 #include <asm/xen/hypervisor.h>
 #include <asm/fixmap.h>
 #include <asm/processor.h>
-#include <asm/proto.h>
 #include <asm/msr-index.h>
 #include <asm/setup.h>
 #include <asm/desc.h>
@@ -169,23 +168,21 @@ static void __init xen_banner(void)
 	       xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
 }
 
-static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;
-static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
-
 static void xen_cpuid(unsigned int *ax, unsigned int *bx,
 		      unsigned int *cx, unsigned int *dx)
 {
-	unsigned maskecx = ~0;
 	unsigned maskedx = ~0;
 
 	/*
 	 * Mask out inconvenient features, to try and disable as many
 	 * unsupported kernel subsystems as possible.
 	 */
-	if (*ax == 1) {
-		maskecx = cpuid_leaf1_ecx_mask;
-		maskedx = cpuid_leaf1_edx_mask;
-	}
+	if (*ax == 1)
+		maskedx = ~((1 << X86_FEATURE_APIC) |  /* disable APIC */
+			    (1 << X86_FEATURE_ACPI) |  /* disable ACPI */
+			    (1 << X86_FEATURE_MCE)  |  /* disable MCE */
+			    (1 << X86_FEATURE_MCA)  |  /* disable MCA */
+			    (1 << X86_FEATURE_ACC));   /* thermal monitoring */
 
 	asm(XEN_EMULATE_PREFIX "cpuid"
 		: "=a" (*ax),
@@ -193,43 +190,9 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
 		  "=c" (*cx),
 		  "=d" (*dx)
 		: "0" (*ax), "2" (*cx));
-
-	*cx &= maskecx;
 	*dx &= maskedx;
 }
 
-static __init void xen_init_cpuid_mask(void)
-{
-	unsigned int ax, bx, cx, dx;
-
-	cpuid_leaf1_edx_mask =
-		~((1 << X86_FEATURE_MCE)  |  /* disable MCE */
-		  (1 << X86_FEATURE_MCA)  |  /* disable MCA */
-		  (1 << X86_FEATURE_ACC));   /* thermal monitoring */
-
-	if (!xen_initial_domain())
-		cpuid_leaf1_edx_mask &=
-			~((1 << X86_FEATURE_APIC) |  /* disable local APIC */
-			  (1 << X86_FEATURE_ACPI));  /* disable ACPI */
-
-	ax = 1;
-	xen_cpuid(&ax, &bx, &cx, &dx);
-
-	/* cpuid claims we support xsave; try enabling it to see what happens */
-	if (cx & (1 << (X86_FEATURE_XSAVE % 32))) {
-		unsigned long cr4;
-
-		set_in_cr4(X86_CR4_OSXSAVE);
-		
-		cr4 = read_cr4();
-
-		if ((cr4 & X86_CR4_OSXSAVE) == 0)
-			cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_XSAVE % 32));
-
-		clear_in_cr4(X86_CR4_OSXSAVE);
-	}
-}
-
 static void xen_set_debugreg(int reg, unsigned long val)
 {
 	HYPERVISOR_set_debugreg(reg, val);
@@ -321,11 +284,12 @@ static void xen_set_ldt(const void *addr, unsigned entries)
 
 static void xen_load_gdt(const struct desc_ptr *dtr)
 {
+	unsigned long *frames;
 	unsigned long va = dtr->address;
 	unsigned int size = dtr->size + 1;
 	unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
-	unsigned long frames[pages];
 	int f;
+	struct multicall_space mcs;
 
 	/* A GDT can be up to 64k in size, which corresponds to 8192
 	   8-byte entries, or 16 4k pages.. */
@@ -333,26 +297,19 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
 	BUG_ON(size > 65536);
 	BUG_ON(va & ~PAGE_MASK);
 
-	for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
-		int level;
-		pte_t *ptep = lookup_address(va, &level);
-		unsigned long pfn, mfn;
-		void *virt;
-
-		BUG_ON(ptep == NULL);
+	mcs = xen_mc_entry(sizeof(*frames) * pages);
+	frames = mcs.args;
 
-		pfn = pte_pfn(*ptep);
-		mfn = pfn_to_mfn(pfn);
-		virt = __va(PFN_PHYS(pfn));
-
-		frames[f] = mfn;
+	for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
+		frames[f] = arbitrary_virt_to_mfn((void *)va);
 
 		make_lowmem_page_readonly((void *)va);
-		make_lowmem_page_readonly(virt);
+		make_lowmem_page_readonly(mfn_to_virt(frames[f]));
 	}
 
-	if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
-		BUG();
+	MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct));
+
+	xen_mc_issue(PARAVIRT_LAZY_CPU);
 }
 
 static void load_TLS_descriptor(struct thread_struct *t,
@@ -428,7 +385,7 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
 static int cvt_gate_to_trap(int vector, const gate_desc *val,
 			    struct trap_info *info)
 {
-	if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT)
+	if (val->type != 0xf && val->type != 0xe)
 		return 0;
 
 	info->vector = vector;
@@ -436,8 +393,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
 	info->cs = gate_segment(*val);
 	info->flags = val->dpl;
 	/* interrupt gates clear IF */
-	if (val->type == GATE_INTERRUPT)
-		info->flags |= 1 << 2;
+	if (val->type == 0xe)
+		info->flags |= 4;
 
 	return 1;
 }
@@ -915,6 +872,7 @@ static const struct machine_ops __initdata xen_machine_ops = {
 	.emergency_restart = xen_emergency_restart,
 };
 
+
 /* First C function to be called on Xen boot */
 asmlinkage void __init xen_start_kernel(void)
 {
@@ -939,8 +897,6 @@ asmlinkage void __init xen_start_kernel(void)
 
 	xen_init_irq_ops();
 
-	xen_init_cpuid_mask();
-
 #ifdef CONFIG_X86_LOCAL_APIC
 	/*
 	 * set up the basic apic ops.
@@ -982,11 +938,6 @@ asmlinkage void __init xen_start_kernel(void)
 	if (!xen_initial_domain())
 		__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
 
-#ifdef CONFIG_X86_64
-	/* Work out if we support NX */
-	check_efer();
-#endif
-
 	/* Don't do the full vcpu_info placement stuff until we have a
 	   possible map and a non-dummy shared_info. */
 	per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
diff --git a/trunk/arch/x86/xen/mmu.c b/trunk/arch/x86/xen/mmu.c
index 9842b1212407..2a81838a9ab7 100644
--- a/trunk/arch/x86/xen/mmu.c
+++ b/trunk/arch/x86/xen/mmu.c
@@ -184,7 +184,7 @@ static inline unsigned p2m_index(unsigned long pfn)
 }
 
 /* Build the parallel p2m_top_mfn structures */
-static void __init xen_build_mfn_list_list(void)
+void xen_setup_mfn_list_list(void)
 {
 	unsigned pfn, idx;
 
@@ -198,10 +198,7 @@ static void __init xen_build_mfn_list_list(void)
 		unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
 		p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
 	}
-}
 
-void xen_setup_mfn_list_list(void)
-{
 	BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
 
 	HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
@@ -221,8 +218,6 @@ void __init xen_build_dynamic_phys_to_machine(void)
 
 		p2m_top[topidx] = &mfn_list[pfn];
 	}
-
-	xen_build_mfn_list_list();
 }
 
 unsigned long get_phys_to_machine(unsigned long pfn)
@@ -238,74 +233,47 @@ unsigned long get_phys_to_machine(unsigned long pfn)
 }
 EXPORT_SYMBOL_GPL(get_phys_to_machine);
 
-/* install a  new p2m_top page */
-bool install_p2mtop_page(unsigned long pfn, unsigned long *p)
+static void alloc_p2m(unsigned long **pp, unsigned long *mfnp)
 {
-	unsigned topidx = p2m_top_index(pfn);
-	unsigned long **pfnp, *mfnp;
+	unsigned long *p;
 	unsigned i;
 
-	pfnp = &p2m_top[topidx];
-	mfnp = &p2m_top_mfn[topidx];
+	p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
+	BUG_ON(p == NULL);
 
 	for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
 		p[i] = INVALID_P2M_ENTRY;
 
-	if (cmpxchg(pfnp, p2m_missing, p) == p2m_missing) {
-		*mfnp = virt_to_mfn(p);
-		return true;
-	}
-
-	return false;
-}
-
-static void alloc_p2m(unsigned long pfn)
-{
-	unsigned long *p;
-
-	p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
-	BUG_ON(p == NULL);
-
-	if (!install_p2mtop_page(pfn, p))
+	if (cmpxchg(pp, p2m_missing, p) != p2m_missing)
 		free_page((unsigned long)p);
+	else
+		*mfnp = virt_to_mfn(p);
 }
 
-/* Try to install p2m mapping; fail if intermediate bits missing */
-bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
 {
 	unsigned topidx, idx;
 
+	if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
+		BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
+		return;
+	}
+
 	if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
 		BUG_ON(mfn != INVALID_P2M_ENTRY);
-		return true;
+		return;
 	}
 
 	topidx = p2m_top_index(pfn);
 	if (p2m_top[topidx] == p2m_missing) {
+		/* no need to allocate a page to store an invalid entry */
 		if (mfn == INVALID_P2M_ENTRY)
-			return true;
-		return false;
+			return;
+		alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]);
 	}
 
 	idx = p2m_index(pfn);
 	p2m_top[topidx][idx] = mfn;
-
-	return true;
-}
-
-void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
-{
-	if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
-		BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
-		return;
-	}
-
-	if (unlikely(!__set_phys_to_machine(pfn, mfn)))  {
-		alloc_p2m(pfn);
-
-		if (!__set_phys_to_machine(pfn, mfn))
-			BUG();
-	}
 }
 
 unsigned long arbitrary_virt_to_mfn(void *vaddr)
@@ -1019,7 +987,7 @@ static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page,
 	return 0;
 }
 
-static void __init xen_mark_init_mm_pinned(void)
+void __init xen_mark_init_mm_pinned(void)
 {
 	xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
 }
@@ -1302,8 +1270,8 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
 	} *args;
 	struct multicall_space mcs;
 
-	if (cpumask_empty(cpus))
-		return;		/* nothing to do */
+	BUG_ON(cpumask_empty(cpus));
+	BUG_ON(!mm);
 
 	mcs = xen_mc_entry(sizeof(*args));
 	args = mcs.args;
@@ -1470,29 +1438,10 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
 }
 #endif
 
-static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
-{
-	struct mmuext_op op;
-	op.cmd = cmd;
-	op.arg1.mfn = pfn_to_mfn(pfn);
-	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
-		BUG();
-}
-
 /* Early in boot, while setting up the initial pagetable, assume
    everything is pinned. */
 static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
 {
-#ifdef CONFIG_FLATMEM
-	BUG_ON(mem_map);	/* should only be used early */
-#endif
-	make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
-	pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
-}
-
-/* Used for pmd and pud */
-static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
-{
 #ifdef CONFIG_FLATMEM
 	BUG_ON(mem_map);	/* should only be used early */
 #endif
@@ -1501,15 +1450,18 @@ static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
 
 /* Early release_pte assumes that all pts are pinned, since there's
    only init_mm and anything attached to that is pinned. */
-static __init void xen_release_pte_init(unsigned long pfn)
+static void xen_release_pte_init(unsigned long pfn)
 {
-	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
 	make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
 }
 
-static __init void xen_release_pmd_init(unsigned long pfn)
+static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
 {
-	make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
+	struct mmuext_op op;
+	op.cmd = cmd;
+	op.arg1.mfn = pfn_to_mfn(pfn);
+	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
+		BUG();
 }
 
 /* This needs to make sure the new pte page is pinned iff its being
@@ -1821,9 +1773,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
 #ifdef CONFIG_X86_LOCAL_APIC
 	case FIX_APIC_BASE:	/* maps dummy local APIC */
 #endif
-	case FIX_TEXT_POKE0:
-	case FIX_TEXT_POKE1:
-		/* All local page mappings */
 		pte = pfn_pte(phys, prot);
 		break;
 
@@ -1870,6 +1819,7 @@ __init void xen_post_allocator_init(void)
 	xen_mark_init_mm_pinned();
 }
 
+
 const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.pagetable_setup_start = xen_pagetable_setup_start,
 	.pagetable_setup_done = xen_pagetable_setup_done,
@@ -1893,9 +1843,9 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
 
 	.alloc_pte = xen_alloc_pte_init,
 	.release_pte = xen_release_pte_init,
-	.alloc_pmd = xen_alloc_pmd_init,
+	.alloc_pmd = xen_alloc_pte_init,
 	.alloc_pmd_clone = paravirt_nop,
-	.release_pmd = xen_release_pmd_init,
+	.release_pmd = xen_release_pte_init,
 
 #ifdef CONFIG_HIGHPTE
 	.kmap_atomic_pte = xen_kmap_atomic_pte,
@@ -1933,8 +1883,8 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.make_pud = PV_CALLEE_SAVE(xen_make_pud),
 	.set_pgd = xen_set_pgd_hyper,
 
-	.alloc_pud = xen_alloc_pmd_init,
-	.release_pud = xen_release_pmd_init,
+	.alloc_pud = xen_alloc_pte_init,
+	.release_pud = xen_release_pte_init,
 #endif	/* PAGETABLE_LEVELS == 4 */
 
 	.activate_mm = xen_activate_mm,
diff --git a/trunk/arch/x86/xen/mmu.h b/trunk/arch/x86/xen/mmu.h
index da7302624897..24d1b44a337d 100644
--- a/trunk/arch/x86/xen/mmu.h
+++ b/trunk/arch/x86/xen/mmu.h
@@ -11,9 +11,6 @@ enum pt_level {
 };
 
 
-bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
-bool install_p2mtop_page(unsigned long pfn, unsigned long *p);
-
 void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
 
 
diff --git a/trunk/arch/x86/xen/smp.c b/trunk/arch/x86/xen/smp.c
index 429834ec1687..585a6e330837 100644
--- a/trunk/arch/x86/xen/smp.c
+++ b/trunk/arch/x86/xen/smp.c
@@ -317,7 +317,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
 	BUG_ON(rc);
 
 	while(per_cpu(cpu_state, cpu) != CPU_ONLINE) {
-		HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
+		HYPERVISOR_sched_op(SCHEDOP_yield, 0);
 		barrier();
 	}
 
@@ -422,7 +422,7 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
 	/* Make sure other vcpus get a chance to run if they need to. */
 	for_each_cpu(cpu, mask) {
 		if (xen_vcpu_stolen(cpu)) {
-			HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
+			HYPERVISOR_sched_op(SCHEDOP_yield, 0);
 			break;
 		}
 	}
diff --git a/trunk/arch/x86/xen/xen-ops.h b/trunk/arch/x86/xen/xen-ops.h
index 20139464943c..2f5ef2632ea2 100644
--- a/trunk/arch/x86/xen/xen-ops.h
+++ b/trunk/arch/x86/xen/xen-ops.h
@@ -57,6 +57,8 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
 
 bool xen_vcpu_stolen(int vcpu);
 
+void xen_mark_init_mm_pinned(void);
+
 void xen_setup_vcpu_info_placement(void);
 
 #ifdef CONFIG_SMP
diff --git a/trunk/drivers/atm/solos-pci.c b/trunk/drivers/atm/solos-pci.c
index 9359613addc5..be204308cc1b 100644
--- a/trunk/drivers/atm/solos-pci.c
+++ b/trunk/drivers/atm/solos-pci.c
@@ -1059,7 +1059,7 @@ static int fpga_probe(struct pci_dev *dev, const struct pci_device_id *id)
 		goto out;
 	}
 
-	err = pci_set_dma_mask(dev, DMA_BIT_MASK(32));
+	err = pci_set_dma_mask(dev, DMA_32BIT_MASK);
 	if (err) {
 		dev_warn(&dev->dev, "Failed to set 32-bit DMA mask\n");
 		goto out;
diff --git a/trunk/drivers/block/cciss.c b/trunk/drivers/block/cciss.c
index 4d4d5e0d3fa6..0ef6f08aa6ea 100644
--- a/trunk/drivers/block/cciss.c
+++ b/trunk/drivers/block/cciss.c
@@ -3505,7 +3505,7 @@ static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, u
 	/* The Inbound Post Queue only accepts 32-bit physical addresses for the
 	   CCISS commands, so they must be allocated from the lower 4GiB of
 	   memory. */
-	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+	err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
 	if (err) {
 		iounmap(vaddr);
 		return -ENOMEM;
diff --git a/trunk/drivers/char/agp/intel-agp.c b/trunk/drivers/char/agp/intel-agp.c
index 3686912427ba..9d9490e22e07 100644
--- a/trunk/drivers/char/agp/intel-agp.c
+++ b/trunk/drivers/char/agp/intel-agp.c
@@ -2131,8 +2131,6 @@ static const struct intel_driver_description {
 	{ PCI_DEVICE_ID_INTEL_82845G_HB, PCI_DEVICE_ID_INTEL_82845G_IG, 0, "830M",
 		&intel_845_driver, &intel_830_driver },
 	{ PCI_DEVICE_ID_INTEL_82850_HB, 0, 0, "i850", &intel_850_driver, NULL },
-	{ PCI_DEVICE_ID_INTEL_82854_HB, PCI_DEVICE_ID_INTEL_82854_IG, 0, "854",
-		&intel_845_driver, &intel_830_driver },
 	{ PCI_DEVICE_ID_INTEL_82855PM_HB, 0, 0, "855PM", &intel_845_driver, NULL },
 	{ PCI_DEVICE_ID_INTEL_82855GM_HB, PCI_DEVICE_ID_INTEL_82855GM_IG, 0, "855GM",
 		&intel_845_driver, &intel_830_driver },
@@ -2357,7 +2355,6 @@ static struct pci_device_id agp_intel_pci_table[] = {
 	ID(PCI_DEVICE_ID_INTEL_82845_HB),
 	ID(PCI_DEVICE_ID_INTEL_82845G_HB),
 	ID(PCI_DEVICE_ID_INTEL_82850_HB),
-	ID(PCI_DEVICE_ID_INTEL_82854_HB),
 	ID(PCI_DEVICE_ID_INTEL_82855PM_HB),
 	ID(PCI_DEVICE_ID_INTEL_82855GM_HB),
 	ID(PCI_DEVICE_ID_INTEL_82860_HB),
diff --git a/trunk/drivers/char/sysrq.c b/trunk/drivers/char/sysrq.c
index b0a6a3e51924..6de020d078e1 100644
--- a/trunk/drivers/char/sysrq.c
+++ b/trunk/drivers/char/sysrq.c
@@ -35,6 +35,7 @@
 #include <linux/vt_kern.h>
 #include <linux/workqueue.h>
 #include <linux/kexec.h>
+#include <linux/interrupt.h>
 #include <linux/hrtimer.h>
 #include <linux/oom.h>
 
diff --git a/trunk/drivers/edac/edac_core.h b/trunk/drivers/edac/edac_core.h
index 6ad95c8d6363..28f2c3f959b5 100644
--- a/trunk/drivers/edac/edac_core.h
+++ b/trunk/drivers/edac/edac_core.h
@@ -767,19 +767,11 @@ static inline void pci_write_bits16(struct pci_dev *pdev, int offset,
 	pci_write_config_word(pdev, offset, value);
 }
 
-/*
- * pci_write_bits32
- *
- * edac local routine to do pci_write_config_dword, but adds
- * a mask parameter. If mask is all ones, ignore the mask.
- * Otherwise utilize the mask to isolate specified bits
- *
- * write all or some bits in a dword-register
- */
+/* write all or some bits in a dword-register*/
 static inline void pci_write_bits32(struct pci_dev *pdev, int offset,
 				    u32 value, u32 mask)
 {
-	if (mask != 0xffffffff) {
+	if (mask != 0xffff) {
 		u32 buf;
 
 		pci_read_config_dword(pdev, offset, &buf);
diff --git a/trunk/drivers/edac/edac_device.c b/trunk/drivers/edac/edac_device.c
index a7d2c717d033..ca9113e1c106 100644
--- a/trunk/drivers/edac/edac_device.c
+++ b/trunk/drivers/edac/edac_device.c
@@ -389,7 +389,7 @@ static void del_edac_device_from_global_list(struct edac_device_ctl_info
  */
 static void edac_device_workq_function(struct work_struct *work_req)
 {
-	struct delayed_work *d_work = to_delayed_work(work_req);
+	struct delayed_work *d_work = (struct delayed_work *)work_req;
 	struct edac_device_ctl_info *edac_dev = to_edac_device_ctl_work(d_work);
 
 	mutex_lock(&device_ctls_mutex);
diff --git a/trunk/drivers/edac/edac_mc.c b/trunk/drivers/edac/edac_mc.c
index 335b7ebdb11c..25d66940b4fa 100644
--- a/trunk/drivers/edac/edac_mc.c
+++ b/trunk/drivers/edac/edac_mc.c
@@ -260,7 +260,7 @@ static int edac_mc_assert_error_check_and_clear(void)
  */
 static void edac_mc_workq_function(struct work_struct *work_req)
 {
-	struct delayed_work *d_work = to_delayed_work(work_req);
+	struct delayed_work *d_work = (struct delayed_work *)work_req;
 	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
 
 	mutex_lock(&mem_ctls_mutex);
diff --git a/trunk/drivers/edac/edac_pci.c b/trunk/drivers/edac/edac_pci.c
index 30b585b1d60b..5b150aea703a 100644
--- a/trunk/drivers/edac/edac_pci.c
+++ b/trunk/drivers/edac/edac_pci.c
@@ -233,7 +233,7 @@ EXPORT_SYMBOL_GPL(edac_pci_find);
  */
 static void edac_pci_workq_function(struct work_struct *work_req)
 {
-	struct delayed_work *d_work = to_delayed_work(work_req);
+	struct delayed_work *d_work = (struct delayed_work *)work_req;
 	struct edac_pci_ctl_info *pci = to_edac_pci_ctl_work(d_work);
 	int msec;
 	unsigned long delay;
diff --git a/trunk/drivers/hwmon/Kconfig b/trunk/drivers/hwmon/Kconfig
index d73f5f473e38..0e8a9185f676 100644
--- a/trunk/drivers/hwmon/Kconfig
+++ b/trunk/drivers/hwmon/Kconfig
@@ -692,16 +692,6 @@ config SENSORS_PCF8591
 	  These devices are hard to detect and rarely found on mainstream
 	  hardware.  If unsure, say N.
 
-config SENSORS_SHT15
-	tristate "Sensiron humidity and temperature sensors. SHT15 and compat."
-	depends on GENERIC_GPIO
-	help
-	  If you say yes here you get support for the Sensiron SHT10, SHT11,
-	  SHT15, SHT71, SHT75 humidity and temperature sensors.
-
-	  This driver can also be built as a module.  If so, the module
-	  will be called sht15.
-
 config SENSORS_SIS5595
 	tristate "Silicon Integrated Systems Corp. SiS5595"
 	depends on PCI
diff --git a/trunk/drivers/hwmon/Makefile b/trunk/drivers/hwmon/Makefile
index 0ae26984ba45..1d3757837b4f 100644
--- a/trunk/drivers/hwmon/Makefile
+++ b/trunk/drivers/hwmon/Makefile
@@ -76,7 +76,6 @@ obj-$(CONFIG_SENSORS_MAX6650)	+= max6650.o
 obj-$(CONFIG_SENSORS_PC87360)	+= pc87360.o
 obj-$(CONFIG_SENSORS_PC87427)	+= pc87427.o
 obj-$(CONFIG_SENSORS_PCF8591)	+= pcf8591.o
-obj-$(CONFIG_SENSORS_SHT15)	+= sht15.o
 obj-$(CONFIG_SENSORS_SIS5595)	+= sis5595.o
 obj-$(CONFIG_SENSORS_SMSC47B397)+= smsc47b397.o
 obj-$(CONFIG_SENSORS_SMSC47M1)	+= smsc47m1.o
diff --git a/trunk/drivers/hwmon/hp_accel.c b/trunk/drivers/hwmon/hp_accel.c
index abca7e9f953b..55d3dc565be6 100644
--- a/trunk/drivers/hwmon/hp_accel.c
+++ b/trunk/drivers/hwmon/hp_accel.c
@@ -34,6 +34,7 @@
 #include <linux/wait.h>
 #include <linux/poll.h>
 #include <linux/freezer.h>
+#include <linux/version.h>
 #include <linux/uaccess.h>
 #include <linux/leds.h>
 #include <acpi/acpi_drivers.h>
diff --git a/trunk/drivers/hwmon/sht15.c b/trunk/drivers/hwmon/sht15.c
deleted file mode 100644
index 6cbdc2fea734..000000000000
--- a/trunk/drivers/hwmon/sht15.c
+++ /dev/null
@@ -1,692 +0,0 @@
-/*
- * sht15.c - support for the SHT15 Temperature and Humidity Sensor
- *
- * Copyright (c) 2009 Jonathan Cameron
- *
- * Copyright (c) 2007 Wouter Horre
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Currently ignoring checksum on readings.
- * Default resolution only (14bit temp, 12bit humidity)
- * Ignoring battery status.
- * Heater not enabled.
- * Timings are all conservative.
- *
- * Data sheet available (1/2009) at
- * http://www.sensirion.ch/en/pdf/product_information/Datasheet-humidity-sensor-SHT1x.pdf
- *
- * Regulator supply name = vcc
- */
-
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/gpio.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/hwmon.h>
-#include <linux/hwmon-sysfs.h>
-#include <linux/mutex.h>
-#include <linux/platform_device.h>
-#include <linux/delay.h>
-#include <linux/jiffies.h>
-#include <linux/err.h>
-#include <linux/sht15.h>
-#include <linux/regulator/consumer.h>
-#include <asm/atomic.h>
-
-#define SHT15_MEASURE_TEMP	3
-#define SHT15_MEASURE_RH	5
-
-#define SHT15_READING_NOTHING	0
-#define SHT15_READING_TEMP	1
-#define SHT15_READING_HUMID	2
-
-/* Min timings in nsecs */
-#define SHT15_TSCKL		100	/* clock low */
-#define SHT15_TSCKH		100	/* clock high */
-#define SHT15_TSU		150	/* data setup time */
-
-/**
- * struct sht15_temppair - elements of voltage dependant temp calc
- * @vdd:	supply voltage in microvolts
- * @d1:		see data sheet
- */
-struct sht15_temppair {
-	int vdd; /* microvolts */
-	int d1;
-};
-
-/* Table 9 from data sheet - relates temperature calculation
- * to supply voltage.
- */
-static const struct sht15_temppair temppoints[] = {
-	{ 2500000, -39400 },
-	{ 3000000, -39600 },
-	{ 3500000, -39700 },
-	{ 4000000, -39800 },
-	{ 5000000, -40100 },
-};
-
-/**
- * struct sht15_data - device instance specific data
- * @pdata:	platform data (gpio's etc)
- * @read_work:	bh of interrupt handler
- * @wait_queue:	wait queue for getting values from device
- * @val_temp:	last temperature value read from device
- * @val_humid: 	last humidity value read from device
- * @flag:	status flag used to identify what the last request was
- * @valid:	are the current stored values valid (start condition)
- * @last_updat:	time of last update
- * @read_lock:	mutex to ensure only one read in progress
- *		at a time.
- * @dev:	associate device structure
- * @hwmon_dev:	device associated with hwmon subsystem
- * @reg:	associated regulator (if specified)
- * @nb:		notifier block to handle notifications of voltage changes
- * @supply_uV:	local copy of supply voltage used to allow
- *		use of regulator consumer if available
- * @supply_uV_valid:   indicates that an updated value has not yet
- *		been obtained from the regulator and so any calculations
- *		based upon it will be invalid.
- * @update_supply_work:	work struct that is used to update the supply_uV
- * @interrupt_handled:	flag used to indicate a hander has been scheduled
- */
-struct sht15_data {
-	struct sht15_platform_data	*pdata;
-	struct work_struct		read_work;
-	wait_queue_head_t		wait_queue;
-	uint16_t			val_temp;
-	uint16_t			val_humid;
-	u8				flag;
-	u8				valid;
-	unsigned long			last_updat;
-	struct mutex			read_lock;
-	struct device			*dev;
-	struct device			*hwmon_dev;
-	struct regulator		*reg;
-	struct notifier_block		nb;
-	int				supply_uV;
-	int				supply_uV_valid;
-	struct work_struct		update_supply_work;
-	atomic_t			interrupt_handled;
-};
-
-/**
- * sht15_connection_reset() - reset the comms interface
- * @data:	sht15 specific data
- *
- * This implements section 3.4 of the data sheet
- */
-static void sht15_connection_reset(struct sht15_data *data)
-{
-	int i;
-	gpio_direction_output(data->pdata->gpio_data, 1);
-	ndelay(SHT15_TSCKL);
-	gpio_set_value(data->pdata->gpio_sck, 0);
-	ndelay(SHT15_TSCKL);
-	for (i = 0; i < 9; ++i) {
-		gpio_set_value(data->pdata->gpio_sck, 1);
-		ndelay(SHT15_TSCKH);
-		gpio_set_value(data->pdata->gpio_sck, 0);
-		ndelay(SHT15_TSCKL);
-	}
-}
-/**
- * sht15_send_bit() - send an individual bit to the device
- * @data:	device state data
- * @val:	value of bit to be sent
- **/
-static inline void sht15_send_bit(struct sht15_data *data, int val)
-{
-
-	gpio_set_value(data->pdata->gpio_data, val);
-	ndelay(SHT15_TSU);
-	gpio_set_value(data->pdata->gpio_sck, 1);
-	ndelay(SHT15_TSCKH);
-	gpio_set_value(data->pdata->gpio_sck, 0);
-	ndelay(SHT15_TSCKL); /* clock low time */
-}
-
-/**
- * sht15_transmission_start() - specific sequence for new transmission
- *
- * @data:	device state data
- * Timings for this are not documented on the data sheet, so very
- * conservative ones used in implementation. This implements
- * figure 12 on the data sheet.
- **/
-static void sht15_transmission_start(struct sht15_data *data)
-{
-	/* ensure data is high and output */
-	gpio_direction_output(data->pdata->gpio_data, 1);
-	ndelay(SHT15_TSU);
-	gpio_set_value(data->pdata->gpio_sck, 0);
-	ndelay(SHT15_TSCKL);
-	gpio_set_value(data->pdata->gpio_sck, 1);
-	ndelay(SHT15_TSCKH);
-	gpio_set_value(data->pdata->gpio_data, 0);
-	ndelay(SHT15_TSU);
-	gpio_set_value(data->pdata->gpio_sck, 0);
-	ndelay(SHT15_TSCKL);
-	gpio_set_value(data->pdata->gpio_sck, 1);
-	ndelay(SHT15_TSCKH);
-	gpio_set_value(data->pdata->gpio_data, 1);
-	ndelay(SHT15_TSU);
-	gpio_set_value(data->pdata->gpio_sck, 0);
-	ndelay(SHT15_TSCKL);
-}
-/**
- * sht15_send_byte() - send a single byte to the device
- * @data:	device state
- * @byte:	value to be sent
- **/
-static void sht15_send_byte(struct sht15_data *data, u8 byte)
-{
-	int i;
-	for (i = 0; i < 8; i++) {
-		sht15_send_bit(data, !!(byte & 0x80));
-		byte <<= 1;
-	}
-}
-/**
- * sht15_wait_for_response() - checks for ack from device
- * @data:	device state
- **/
-static int sht15_wait_for_response(struct sht15_data *data)
-{
-	gpio_direction_input(data->pdata->gpio_data);
-	gpio_set_value(data->pdata->gpio_sck, 1);
-	ndelay(SHT15_TSCKH);
-	if (gpio_get_value(data->pdata->gpio_data)) {
-		gpio_set_value(data->pdata->gpio_sck, 0);
-		dev_err(data->dev, "Command not acknowledged\n");
-		sht15_connection_reset(data);
-		return -EIO;
-	}
-	gpio_set_value(data->pdata->gpio_sck, 0);
-	ndelay(SHT15_TSCKL);
-	return 0;
-}
-
-/**
- * sht15_send_cmd() - Sends a command to the device.
- * @data:	device state
- * @cmd:	command byte to be sent
- *
- * On entry, sck is output low, data is output pull high
- * and the interrupt disabled.
- **/
-static int sht15_send_cmd(struct sht15_data *data, u8 cmd)
-{
-	int ret = 0;
-	sht15_transmission_start(data);
-	sht15_send_byte(data, cmd);
-	ret = sht15_wait_for_response(data);
-	return ret;
-}
-/**
- * sht15_update_single_val() - get a new value from device
- * @data:		device instance specific data
- * @command:		command sent to request value
- * @timeout_msecs:	timeout after which comms are assumed
- *			to have failed are reset.
- **/
-static inline int sht15_update_single_val(struct sht15_data *data,
-					  int command,
-					  int timeout_msecs)
-{
-	int ret;
-	ret = sht15_send_cmd(data, command);
-	if (ret)
-		return ret;
-
-	gpio_direction_input(data->pdata->gpio_data);
-	atomic_set(&data->interrupt_handled, 0);
-
-	enable_irq(gpio_to_irq(data->pdata->gpio_data));
-	if (gpio_get_value(data->pdata->gpio_data) == 0) {
-		disable_irq_nosync(gpio_to_irq(data->pdata->gpio_data));
-		/* Only relevant if the interrupt hasn't occured. */
-		if (!atomic_read(&data->interrupt_handled))
-			schedule_work(&data->read_work);
-	}
-	ret = wait_event_timeout(data->wait_queue,
-				 (data->flag == SHT15_READING_NOTHING),
-				 msecs_to_jiffies(timeout_msecs));
-	if (ret == 0) {/* timeout occurred */
-		disable_irq_nosync(gpio_to_irq(data->pdata->gpio_data));;
-		sht15_connection_reset(data);
-		return -ETIME;
-	}
-	return 0;
-}
-
-/**
- * sht15_update_vals() - get updated readings from device if too old
- * @data:	device state
- **/
-static int sht15_update_vals(struct sht15_data *data)
-{
-	int ret = 0;
-	int timeout = HZ;
-
-	mutex_lock(&data->read_lock);
-	if (time_after(jiffies, data->last_updat + timeout)
-	    || !data->valid) {
-		data->flag = SHT15_READING_HUMID;
-		ret = sht15_update_single_val(data, SHT15_MEASURE_RH, 160);
-		if (ret)
-			goto error_ret;
-		data->flag = SHT15_READING_TEMP;
-		ret = sht15_update_single_val(data, SHT15_MEASURE_TEMP, 400);
-		if (ret)
-			goto error_ret;
-		data->valid = 1;
-		data->last_updat = jiffies;
-	}
-error_ret:
-	mutex_unlock(&data->read_lock);
-
-	return ret;
-}
-
-/**
- * sht15_calc_temp() - convert the raw reading to a temperature
- * @data:	device state
- *
- * As per section 4.3 of the data sheet.
- **/
-static inline int sht15_calc_temp(struct sht15_data *data)
-{
-	int d1 = 0;
-	int i;
-
-	for (i = 1; i < ARRAY_SIZE(temppoints) - 1; i++)
-		/* Find pointer to interpolate */
-		if (data->supply_uV > temppoints[i - 1].vdd) {
-			d1 = (data->supply_uV/1000 - temppoints[i - 1].vdd)
-				* (temppoints[i].d1 - temppoints[i - 1].d1)
-				/ (temppoints[i].vdd - temppoints[i - 1].vdd)
-				+ temppoints[i - 1].d1;
-			break;
-		}
-
-	return data->val_temp*10 + d1;
-}
-
-/**
- * sht15_calc_humid() - using last temperature convert raw to humid
- * @data:	device state
- *
- * This is the temperature compensated version as per section 4.2 of
- * the data sheet.
- **/
-static inline int sht15_calc_humid(struct sht15_data *data)
-{
-	int RHlinear; /* milli percent */
-	int temp = sht15_calc_temp(data);
-
-	const int c1 = -4;
-	const int c2 = 40500; /* x 10 ^ -6 */
-	const int c3 = 2800; /* x10 ^ -9 */
-
-	RHlinear = c1*1000
-		+ c2 * data->val_humid/1000
-		+ (data->val_humid * data->val_humid * c3)/1000000;
-	return (temp - 25000) * (10000 + 800 * data->val_humid)
-		/ 1000000 + RHlinear;
-}
-
-static ssize_t sht15_show_temp(struct device *dev,
-			       struct device_attribute *attr,
-			       char *buf)
-{
-	int ret;
-	struct sht15_data *data = dev_get_drvdata(dev);
-
-	/* Technically no need to read humidity as well */
-	ret = sht15_update_vals(data);
-
-	return ret ? ret : sprintf(buf, "%d\n",
-				   sht15_calc_temp(data));
-}
-
-static ssize_t sht15_show_humidity(struct device *dev,
-				   struct device_attribute *attr,
-				   char *buf)
-{
-	int ret;
-	struct sht15_data *data = dev_get_drvdata(dev);
-
-	ret = sht15_update_vals(data);
-
-	return ret ? ret : sprintf(buf, "%d\n", sht15_calc_humid(data));
-
-};
-static ssize_t show_name(struct device *dev,
-			 struct device_attribute *attr,
-			 char *buf)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	return sprintf(buf, "%s\n", pdev->name);
-}
-
-static SENSOR_DEVICE_ATTR(temp1_input,
-			  S_IRUGO, sht15_show_temp,
-			  NULL, 0);
-static SENSOR_DEVICE_ATTR(humidity1_input,
-			  S_IRUGO, sht15_show_humidity,
-			  NULL, 0);
-static DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
-static struct attribute *sht15_attrs[] = {
-	&sensor_dev_attr_temp1_input.dev_attr.attr,
-	&sensor_dev_attr_humidity1_input.dev_attr.attr,
-	&dev_attr_name.attr,
-	NULL,
-};
-
-static const struct attribute_group sht15_attr_group = {
-	.attrs = sht15_attrs,
-};
-
-static irqreturn_t sht15_interrupt_fired(int irq, void *d)
-{
-	struct sht15_data *data = d;
-	/* First disable the interrupt */
-	disable_irq_nosync(irq);
-	atomic_inc(&data->interrupt_handled);
-	/* Then schedule a reading work struct */
-	if (data->flag != SHT15_READING_NOTHING)
-		schedule_work(&data->read_work);
-	return IRQ_HANDLED;
-}
-
-/* Each byte of data is acknowledged by pulling the data line
- * low for one clock pulse.
- */
-static void sht15_ack(struct sht15_data *data)
-{
-	gpio_direction_output(data->pdata->gpio_data, 0);
-	ndelay(SHT15_TSU);
-	gpio_set_value(data->pdata->gpio_sck, 1);
-	ndelay(SHT15_TSU);
-	gpio_set_value(data->pdata->gpio_sck, 0);
-	ndelay(SHT15_TSU);
-	gpio_set_value(data->pdata->gpio_data, 1);
-
-	gpio_direction_input(data->pdata->gpio_data);
-}
-/**
- * sht15_end_transmission() - notify device of end of transmission
- * @data:	device state
- *
- * This is basically a NAK. (single clock pulse, data high)
- **/
-static void sht15_end_transmission(struct sht15_data *data)
-{
-	gpio_direction_output(data->pdata->gpio_data, 1);
-	ndelay(SHT15_TSU);
-	gpio_set_value(data->pdata->gpio_sck, 1);
-	ndelay(SHT15_TSCKH);
-	gpio_set_value(data->pdata->gpio_sck, 0);
-	ndelay(SHT15_TSCKL);
-}
-
-static void sht15_bh_read_data(struct work_struct *work_s)
-{
-	int i;
-	uint16_t val = 0;
-	struct sht15_data *data
-		= container_of(work_s, struct sht15_data,
-			       read_work);
-	/* Firstly, verify the line is low */
-	if (gpio_get_value(data->pdata->gpio_data)) {
-		/* If not, then start the interrupt again - care
-		   here as could have gone low in meantime so verify
-		   it hasn't!
-		*/
-		atomic_set(&data->interrupt_handled, 0);
-		enable_irq(gpio_to_irq(data->pdata->gpio_data));
-		/* If still not occured or another handler has been scheduled */
-		if (gpio_get_value(data->pdata->gpio_data)
-		    || atomic_read(&data->interrupt_handled))
-			return;
-	}
-	/* Read the data back from the device */
-	for (i = 0; i < 16; ++i) {
-		val <<= 1;
-		gpio_set_value(data->pdata->gpio_sck, 1);
-		ndelay(SHT15_TSCKH);
-		val |= !!gpio_get_value(data->pdata->gpio_data);
-		gpio_set_value(data->pdata->gpio_sck, 0);
-		ndelay(SHT15_TSCKL);
-		if (i == 7)
-			sht15_ack(data);
-	}
-	/* Tell the device we are done */
-	sht15_end_transmission(data);
-
-	switch (data->flag) {
-	case SHT15_READING_TEMP:
-		data->val_temp = val;
-		break;
-	case SHT15_READING_HUMID:
-		data->val_humid = val;
-		break;
-	}
-
-	data->flag = SHT15_READING_NOTHING;
-	wake_up(&data->wait_queue);
-}
-
-static void sht15_update_voltage(struct work_struct *work_s)
-{
-	struct sht15_data *data
-		= container_of(work_s, struct sht15_data,
-			       update_supply_work);
-	data->supply_uV = regulator_get_voltage(data->reg);
-}
-
-/**
- * sht15_invalidate_voltage() - mark supply voltage invalid when notified by reg
- * @nb:		associated notification structure
- * @event:	voltage regulator state change event code
- * @ignored:	function parameter - ignored here
- *
- * Note that as the notification code holds the regulator lock, we have
- * to schedule an update of the supply voltage rather than getting it directly.
- **/
-static int sht15_invalidate_voltage(struct notifier_block *nb,
-				unsigned long event,
-				void *ignored)
-{
-	struct sht15_data *data = container_of(nb, struct sht15_data, nb);
-
-	if (event == REGULATOR_EVENT_VOLTAGE_CHANGE)
-		data->supply_uV_valid = false;
-	schedule_work(&data->update_supply_work);
-
-	return NOTIFY_OK;
-}
-
-static int __devinit sht15_probe(struct platform_device *pdev)
-{
-	int ret = 0;
-	struct sht15_data *data = kzalloc(sizeof(*data), GFP_KERNEL);
-
-	if (!data) {
-		ret = -ENOMEM;
-		dev_err(&pdev->dev, "kzalloc failed");
-		goto error_ret;
-	}
-
-	INIT_WORK(&data->read_work, sht15_bh_read_data);
-	INIT_WORK(&data->update_supply_work, sht15_update_voltage);
-	platform_set_drvdata(pdev, data);
-	mutex_init(&data->read_lock);
-	data->dev = &pdev->dev;
-	init_waitqueue_head(&data->wait_queue);
-
-	if (pdev->dev.platform_data == NULL) {
-		dev_err(&pdev->dev, "no platform data supplied");
-		goto err_free_data;
-	}
-	data->pdata = pdev->dev.platform_data;
-	data->supply_uV = data->pdata->supply_mv*1000;
-
-/* If a regulator is available, query what the supply voltage actually is!*/
-	data->reg = regulator_get(data->dev, "vcc");
-	if (!IS_ERR(data->reg)) {
-		data->supply_uV = regulator_get_voltage(data->reg);
-		regulator_enable(data->reg);
-		/* setup a notifier block to update this if another device
-		 *  causes the voltage to change */
-		data->nb.notifier_call = &sht15_invalidate_voltage;
-		ret = regulator_register_notifier(data->reg, &data->nb);
-	}
-/* Try requesting the GPIOs */
-	ret = gpio_request(data->pdata->gpio_sck, "SHT15 sck");
-	if (ret) {
-		dev_err(&pdev->dev, "gpio request failed");
-		goto err_free_data;
-	}
-	gpio_direction_output(data->pdata->gpio_sck, 0);
-	ret = gpio_request(data->pdata->gpio_data, "SHT15 data");
-	if (ret) {
-		dev_err(&pdev->dev, "gpio request failed");
-		goto err_release_gpio_sck;
-	}
-	ret = sysfs_create_group(&pdev->dev.kobj, &sht15_attr_group);
-	if (ret) {
-		dev_err(&pdev->dev, "sysfs create failed");
-		goto err_free_data;
-	}
-
-	ret = request_irq(gpio_to_irq(data->pdata->gpio_data),
-			  sht15_interrupt_fired,
-			  IRQF_TRIGGER_FALLING,
-			  "sht15 data",
-			  data);
-	if (ret) {
-		dev_err(&pdev->dev, "failed to get irq for data line");
-		goto err_release_gpio_data;
-	}
-	disable_irq_nosync(gpio_to_irq(data->pdata->gpio_data));
-	sht15_connection_reset(data);
-	sht15_send_cmd(data, 0x1E);
-
-	data->hwmon_dev = hwmon_device_register(data->dev);
-	if (IS_ERR(data->hwmon_dev)) {
-		ret = PTR_ERR(data->hwmon_dev);
-		goto err_release_gpio_data;
-	}
-	return 0;
-
-err_release_gpio_data:
-	gpio_free(data->pdata->gpio_data);
-err_release_gpio_sck:
-	gpio_free(data->pdata->gpio_sck);
-err_free_data:
-	kfree(data);
-error_ret:
-
-	return ret;
-}
-
-static int __devexit sht15_remove(struct platform_device *pdev)
-{
-	struct sht15_data *data = platform_get_drvdata(pdev);
-
-	/* Make sure any reads from the device are done and
-	 * prevent new ones beginnning */
-	mutex_lock(&data->read_lock);
-	hwmon_device_unregister(data->hwmon_dev);
-	sysfs_remove_group(&pdev->dev.kobj, &sht15_attr_group);
-	if (!IS_ERR(data->reg)) {
-		regulator_unregister_notifier(data->reg, &data->nb);
-		regulator_disable(data->reg);
-		regulator_put(data->reg);
-	}
-
-	free_irq(gpio_to_irq(data->pdata->gpio_data), data);
-	gpio_free(data->pdata->gpio_data);
-	gpio_free(data->pdata->gpio_sck);
-	mutex_unlock(&data->read_lock);
-	kfree(data);
-	return 0;
-}
-
-
-static struct platform_driver sht_drivers[] = {
-	{
-		.driver = {
-			.name = "sht10",
-			.owner = THIS_MODULE,
-		},
-		.probe = sht15_probe,
-		.remove = sht15_remove,
-	}, {
-		.driver = {
-			.name = "sht11",
-			.owner = THIS_MODULE,
-		},
-		.probe = sht15_probe,
-		.remove = sht15_remove,
-	}, {
-		.driver = {
-			.name = "sht15",
-			.owner = THIS_MODULE,
-		},
-		.probe = sht15_probe,
-		.remove = sht15_remove,
-	}, {
-		.driver = {
-			.name = "sht71",
-			.owner = THIS_MODULE,
-		},
-		.probe = sht15_probe,
-		.remove = sht15_remove,
-	}, {
-		.driver = {
-			.name = "sht75",
-			.owner = THIS_MODULE,
-		},
-		.probe = sht15_probe,
-		.remove = sht15_remove,
-	},
-};
-
-
-static int __init sht15_init(void)
-{
-	int ret;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(sht_drivers); i++) {
-		ret = platform_driver_register(&sht_drivers[i]);
-		if (ret)
-			goto error_unreg;
-	}
-
-	return 0;
-
-error_unreg:
-	while (--i >= 0)
-		platform_driver_unregister(&sht_drivers[i]);
-
-	return ret;
-}
-module_init(sht15_init);
-
-static void __exit sht15_exit(void)
-{
-	int i;
-	for (i = ARRAY_SIZE(sht_drivers) - 1; i >= 0; i--)
-		platform_driver_unregister(&sht_drivers[i]);
-}
-module_exit(sht15_exit);
-
-MODULE_LICENSE("GPL");
diff --git a/trunk/drivers/misc/eeprom/at24.c b/trunk/drivers/misc/eeprom/at24.c
index db39f4a52f53..d184dfab9631 100644
--- a/trunk/drivers/misc/eeprom/at24.c
+++ b/trunk/drivers/misc/eeprom/at24.c
@@ -278,7 +278,7 @@ static ssize_t at24_bin_read(struct kobject *kobj, struct bin_attribute *attr,
  * We only use page mode writes; the alternative is sloooow. This routine
  * writes at most one page.
  */
-static ssize_t at24_eeprom_write(struct at24_data *at24, const char *buf,
+static ssize_t at24_eeprom_write(struct at24_data *at24, char *buf,
 		unsigned offset, size_t count)
 {
 	struct i2c_client *client;
@@ -347,8 +347,8 @@ static ssize_t at24_eeprom_write(struct at24_data *at24, const char *buf,
 	return -ETIMEDOUT;
 }
 
-static ssize_t at24_write(struct at24_data *at24, const char *buf, loff_t off,
-			  size_t count)
+static ssize_t at24_write(struct at24_data *at24,
+		char *buf, loff_t off, size_t count)
 {
 	ssize_t retval = 0;
 
@@ -406,7 +406,7 @@ static ssize_t at24_macc_read(struct memory_accessor *macc, char *buf,
 	return at24_read(at24, buf, offset, count);
 }
 
-static ssize_t at24_macc_write(struct memory_accessor *macc, const char *buf,
+static ssize_t at24_macc_write(struct memory_accessor *macc, char *buf,
 			  off_t offset, size_t count)
 {
 	struct at24_data *at24 = container_of(macc, struct at24_data, macc);
diff --git a/trunk/drivers/misc/eeprom/at25.c b/trunk/drivers/misc/eeprom/at25.c
index b34cb5f79eea..6bc0dac5c1e8 100644
--- a/trunk/drivers/misc/eeprom/at25.c
+++ b/trunk/drivers/misc/eeprom/at25.c
@@ -140,8 +140,7 @@ at25_bin_read(struct kobject *kobj, struct bin_attribute *bin_attr,
 
 
 static ssize_t
-at25_ee_write(struct at25_data *at25, const char *buf, loff_t off,
-	      size_t count)
+at25_ee_write(struct at25_data *at25, char *buf, loff_t off, size_t count)
 {
 	ssize_t			status = 0;
 	unsigned		written = 0;
@@ -277,7 +276,7 @@ static ssize_t at25_mem_read(struct memory_accessor *mem, char *buf,
 	return at25_ee_read(at25, buf, offset, count);
 }
 
-static ssize_t at25_mem_write(struct memory_accessor *mem, const char *buf,
+static ssize_t at25_mem_write(struct memory_accessor *mem, char *buf,
 			  off_t offset, size_t count)
 {
 	struct at25_data *at25 = container_of(mem, struct at25_data, mem);
diff --git a/trunk/drivers/misc/sgi-xp/xpc.h b/trunk/drivers/misc/sgi-xp/xpc.h
index b94d5f767703..114444cfd496 100644
--- a/trunk/drivers/misc/sgi-xp/xpc.h
+++ b/trunk/drivers/misc/sgi-xp/xpc.h
@@ -90,21 +90,18 @@ struct xpc_rsvd_page {
 	short max_npartitions;	/* value of XPC_MAX_PARTITIONS */
 	u8 version;
 	u8 pad1[3];		/* align to next u64 in 1st 64-byte cacheline */
-	unsigned long ts_jiffies; /* timestamp when rsvd pg was setup by XPC */
 	union {
-		struct {
-			unsigned long vars_pa;	/* phys addr */
-		} sn2;
-		struct {
-			unsigned long heartbeat_gpa; /* phys addr */
-			unsigned long activate_gru_mq_desc_gpa; /* phys addr */
-		} uv;
+		unsigned long vars_pa;	/* phys address of struct xpc_vars */
+		unsigned long activate_gru_mq_desc_gpa; /* phys addr of */
+							/* activate mq's */
+							/* gru mq descriptor */
 	} sn;
-	u64 pad2[9];		/* align to last u64 in 2nd 64-byte cacheline */
+	unsigned long ts_jiffies; /* timestamp when rsvd pg was setup by XPC */
+	u64 pad2[10];		/* align to last u64 in 2nd 64-byte cacheline */
 	u64 SAL_nasids_size;	/* SAL: size of each nasid mask in bytes */
 };
 
-#define XPC_RP_VERSION _XPC_VERSION(3, 0) /* version 3.0 of the reserved page */
+#define XPC_RP_VERSION _XPC_VERSION(2, 0) /* version 2.0 of the reserved page */
 
 /*
  * Define the structures by which XPC variables can be exported to other
@@ -185,17 +182,6 @@ struct xpc_vars_part_sn2 {
 				 (XPC_RP_MACH_NASIDS(_rp) + \
 				  xpc_nasid_mask_nlongs))
 
-
-/*
- * The following structure describes the partition's heartbeat info which
- * will be periodically read by other partitions to determine whether this
- * XPC is still 'alive'.
- */
-struct xpc_heartbeat_uv {
-	unsigned long value;
-	unsigned long offline;	/* if 0, heartbeat should be changing */
-};
-
 /*
  * Info pertinent to a GRU message queue using a watch list for irq generation.
  */
@@ -212,7 +198,7 @@ struct xpc_gru_mq_uv {
 
 /*
  * The activate_mq is used to send/receive GRU messages that affect XPC's
- * partition active state and channel state. This is uv only.
+ * heartbeat, partition active state, and channel state. This is UV only.
  */
 struct xpc_activate_mq_msghdr_uv {
 	unsigned int gru_msg_hdr; /* FOR GRU INTERNAL USE ONLY */
@@ -224,27 +210,33 @@ struct xpc_activate_mq_msghdr_uv {
 
 /* activate_mq defined message types */
 #define XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV		0
+#define XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV		1
+#define XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV	2
+#define XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV		3
 
-#define XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV		1
-#define XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV		2
+#define XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV		4
+#define XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV		5
 
-#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV	3
-#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV		4
-#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV	5
-#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV		6
-#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV	7
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV	6
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV		7
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV	8
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV		9
 
-#define XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV		8
-#define XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV		9
+#define XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV		10
+#define XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV		11
 
 struct xpc_activate_mq_msg_uv {
 	struct xpc_activate_mq_msghdr_uv hdr;
 };
 
+struct xpc_activate_mq_msg_heartbeat_req_uv {
+	struct xpc_activate_mq_msghdr_uv hdr;
+	u64 heartbeat;
+};
+
 struct xpc_activate_mq_msg_activate_req_uv {
 	struct xpc_activate_mq_msghdr_uv hdr;
 	unsigned long rp_gpa;
-	unsigned long heartbeat_gpa;
 	unsigned long activate_gru_mq_desc_gpa;
 };
 
@@ -279,11 +271,6 @@ struct xpc_activate_mq_msg_chctl_openreply_uv {
 	unsigned long notify_gru_mq_desc_gpa;
 };
 
-struct xpc_activate_mq_msg_chctl_opencomplete_uv {
-	struct xpc_activate_mq_msghdr_uv hdr;
-	short ch_number;
-};
-
 /*
  * Functions registered by add_timer() or called by kernel_thread() only
  * allow for a single 64-bit argument. The following macros can be used to
@@ -589,32 +576,30 @@ struct xpc_channel {
 
 #define	XPC_C_WASCONNECTED	0x00000001	/* channel was connected */
 
-#define XPC_C_ROPENCOMPLETE	0x00000002    /* remote open channel complete */
-#define XPC_C_OPENCOMPLETE	0x00000004     /* local open channel complete */
-#define	XPC_C_ROPENREPLY	0x00000008	/* remote open channel reply */
-#define	XPC_C_OPENREPLY		0x00000010	/* local open channel reply */
-#define	XPC_C_ROPENREQUEST	0x00000020     /* remote open channel request */
-#define	XPC_C_OPENREQUEST	0x00000040	/* local open channel request */
+#define	XPC_C_ROPENREPLY	0x00000002	/* remote open channel reply */
+#define	XPC_C_OPENREPLY		0x00000004	/* local open channel reply */
+#define	XPC_C_ROPENREQUEST	0x00000008     /* remote open channel request */
+#define	XPC_C_OPENREQUEST	0x00000010	/* local open channel request */
 
-#define	XPC_C_SETUP		0x00000080 /* channel's msgqueues are alloc'd */
-#define	XPC_C_CONNECTEDCALLOUT	0x00000100     /* connected callout initiated */
+#define	XPC_C_SETUP		0x00000020 /* channel's msgqueues are alloc'd */
+#define	XPC_C_CONNECTEDCALLOUT	0x00000040     /* connected callout initiated */
 #define	XPC_C_CONNECTEDCALLOUT_MADE \
-				0x00000200     /* connected callout completed */
-#define	XPC_C_CONNECTED		0x00000400	/* local channel is connected */
-#define	XPC_C_CONNECTING	0x00000800	/* channel is being connected */
+				0x00000080     /* connected callout completed */
+#define	XPC_C_CONNECTED		0x00000100	/* local channel is connected */
+#define	XPC_C_CONNECTING	0x00000200	/* channel is being connected */
 
-#define	XPC_C_RCLOSEREPLY	0x00001000	/* remote close channel reply */
-#define	XPC_C_CLOSEREPLY	0x00002000	/* local close channel reply */
-#define	XPC_C_RCLOSEREQUEST	0x00004000    /* remote close channel request */
-#define	XPC_C_CLOSEREQUEST	0x00008000     /* local close channel request */
+#define	XPC_C_RCLOSEREPLY	0x00000400	/* remote close channel reply */
+#define	XPC_C_CLOSEREPLY	0x00000800	/* local close channel reply */
+#define	XPC_C_RCLOSEREQUEST	0x00001000    /* remote close channel request */
+#define	XPC_C_CLOSEREQUEST	0x00002000     /* local close channel request */
 
-#define	XPC_C_DISCONNECTED	0x00010000	/* channel is disconnected */
-#define	XPC_C_DISCONNECTING	0x00020000   /* channel is being disconnected */
+#define	XPC_C_DISCONNECTED	0x00004000	/* channel is disconnected */
+#define	XPC_C_DISCONNECTING	0x00008000   /* channel is being disconnected */
 #define	XPC_C_DISCONNECTINGCALLOUT \
-				0x00040000 /* disconnecting callout initiated */
+				0x00010000 /* disconnecting callout initiated */
 #define	XPC_C_DISCONNECTINGCALLOUT_MADE \
-				0x00080000 /* disconnecting callout completed */
-#define	XPC_C_WDISCONNECT	0x00100000  /* waiting for channel disconnect */
+				0x00020000 /* disconnecting callout completed */
+#define	XPC_C_WDISCONNECT	0x00040000  /* waiting for channel disconnect */
 
 /*
  * The channel control flags (chctl) union consists of a 64-bit variable which
@@ -633,13 +618,11 @@ union xpc_channel_ctl_flags {
 #define	XPC_CHCTL_CLOSEREPLY	0x02
 #define	XPC_CHCTL_OPENREQUEST	0x04
 #define	XPC_CHCTL_OPENREPLY	0x08
-#define XPC_CHCTL_OPENCOMPLETE	0x10
-#define	XPC_CHCTL_MSGREQUEST	0x20
+#define	XPC_CHCTL_MSGREQUEST	0x10
 
 #define XPC_OPENCLOSE_CHCTL_FLAGS \
 			(XPC_CHCTL_CLOSEREQUEST | XPC_CHCTL_CLOSEREPLY | \
-			 XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY | \
-			 XPC_CHCTL_OPENCOMPLETE)
+			 XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY)
 #define XPC_MSG_CHCTL_FLAGS	XPC_CHCTL_MSGREQUEST
 
 static inline int
@@ -704,9 +687,6 @@ struct xpc_partition_sn2 {
 };
 
 struct xpc_partition_uv {
-	unsigned long heartbeat_gpa; /* phys addr of partition's heartbeat */
-	struct xpc_heartbeat_uv cached_heartbeat; /* cached copy of */
-						  /* partition's heartbeat */
 	unsigned long activate_gru_mq_desc_gpa;	/* phys addr of parititon's */
 						/* activate mq's gru mq */
 						/* descriptor */
@@ -718,12 +698,14 @@ struct xpc_partition_uv {
 	u8 remote_act_state;	/* remote partition's act_state */
 	u8 act_state_req;	/* act_state request from remote partition */
 	enum xp_retval reason;	/* reason for deactivate act_state request */
+	u64 heartbeat;		/* incremented by remote partition */
 };
 
 /* struct xpc_partition_uv flags */
 
-#define XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV	0x00000001
+#define XPC_P_HEARTBEAT_OFFLINE_UV		0x00000001
 #define XPC_P_ENGAGED_UV			0x00000002
+#define XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV	0x00000004
 
 /* struct xpc_partition_uv act_state change requests */
 
@@ -780,62 +762,6 @@ struct xpc_partition {
 
 } ____cacheline_aligned;
 
-struct xpc_arch_operations {
-	int (*setup_partitions) (void);
-	void (*teardown_partitions) (void);
-	void (*process_activate_IRQ_rcvd) (void);
-	enum xp_retval (*get_partition_rsvd_page_pa)
-		(void *, u64 *, unsigned long *, size_t *);
-	int (*setup_rsvd_page) (struct xpc_rsvd_page *);
-
-	void (*allow_hb) (short);
-	void (*disallow_hb) (short);
-	void (*disallow_all_hbs) (void);
-	void (*increment_heartbeat) (void);
-	void (*offline_heartbeat) (void);
-	void (*online_heartbeat) (void);
-	void (*heartbeat_init) (void);
-	void (*heartbeat_exit) (void);
-	enum xp_retval (*get_remote_heartbeat) (struct xpc_partition *);
-
-	void (*request_partition_activation) (struct xpc_rsvd_page *,
-						 unsigned long, int);
-	void (*request_partition_reactivation) (struct xpc_partition *);
-	void (*request_partition_deactivation) (struct xpc_partition *);
-	void (*cancel_partition_deactivation_request) (struct xpc_partition *);
-	enum xp_retval (*setup_ch_structures) (struct xpc_partition *);
-	void (*teardown_ch_structures) (struct xpc_partition *);
-
-	enum xp_retval (*make_first_contact) (struct xpc_partition *);
-
-	u64 (*get_chctl_all_flags) (struct xpc_partition *);
-	void (*send_chctl_closerequest) (struct xpc_channel *, unsigned long *);
-	void (*send_chctl_closereply) (struct xpc_channel *, unsigned long *);
-	void (*send_chctl_openrequest) (struct xpc_channel *, unsigned long *);
-	void (*send_chctl_openreply) (struct xpc_channel *, unsigned long *);
-	void (*send_chctl_opencomplete) (struct xpc_channel *, unsigned long *);
-	void (*process_msg_chctl_flags) (struct xpc_partition *, int);
-
-	enum xp_retval (*save_remote_msgqueue_pa) (struct xpc_channel *,
-						      unsigned long);
-
-	enum xp_retval (*setup_msg_structures) (struct xpc_channel *);
-	void (*teardown_msg_structures) (struct xpc_channel *);
-
-	void (*indicate_partition_engaged) (struct xpc_partition *);
-	void (*indicate_partition_disengaged) (struct xpc_partition *);
-	void (*assume_partition_disengaged) (short);
-	int (*partition_engaged) (short);
-	int (*any_partition_engaged) (void);
-
-	int (*n_of_deliverable_payloads) (struct xpc_channel *);
-	enum xp_retval (*send_payload) (struct xpc_channel *, u32, void *,
-					   u16, u8, xpc_notify_func, void *);
-	void *(*get_deliverable_payload) (struct xpc_channel *);
-	void (*received_payload) (struct xpc_channel *, void *);
-	void (*notify_senders_of_disconnect) (struct xpc_channel *);
-};
-
 /* struct xpc_partition act_state values (for XPC HB) */
 
 #define	XPC_P_AS_INACTIVE	0x00	/* partition is not active */
@@ -876,17 +802,67 @@ extern struct xpc_registration xpc_registrations[];
 /* found in xpc_main.c */
 extern struct device *xpc_part;
 extern struct device *xpc_chan;
-extern struct xpc_arch_operations xpc_arch_ops;
 extern int xpc_disengage_timelimit;
 extern int xpc_disengage_timedout;
 extern int xpc_activate_IRQ_rcvd;
 extern spinlock_t xpc_activate_IRQ_rcvd_lock;
 extern wait_queue_head_t xpc_activate_IRQ_wq;
+extern void *xpc_heartbeating_to_mask;
 extern void *xpc_kzalloc_cacheline_aligned(size_t, gfp_t, void **);
 extern void xpc_activate_partition(struct xpc_partition *);
 extern void xpc_activate_kthreads(struct xpc_channel *, int);
 extern void xpc_create_kthreads(struct xpc_channel *, int, int);
 extern void xpc_disconnect_wait(int);
+extern int (*xpc_setup_partitions_sn) (void);
+extern void (*xpc_teardown_partitions_sn) (void);
+extern enum xp_retval (*xpc_get_partition_rsvd_page_pa) (void *, u64 *,
+							 unsigned long *,
+							 size_t *);
+extern int (*xpc_setup_rsvd_page_sn) (struct xpc_rsvd_page *);
+extern void (*xpc_heartbeat_init) (void);
+extern void (*xpc_heartbeat_exit) (void);
+extern void (*xpc_increment_heartbeat) (void);
+extern void (*xpc_offline_heartbeat) (void);
+extern void (*xpc_online_heartbeat) (void);
+extern enum xp_retval (*xpc_get_remote_heartbeat) (struct xpc_partition *);
+extern enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *);
+extern u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *);
+extern enum xp_retval (*xpc_setup_msg_structures) (struct xpc_channel *);
+extern void (*xpc_teardown_msg_structures) (struct xpc_channel *);
+extern void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *);
+extern void (*xpc_process_msg_chctl_flags) (struct xpc_partition *, int);
+extern int (*xpc_n_of_deliverable_payloads) (struct xpc_channel *);
+extern void *(*xpc_get_deliverable_payload) (struct xpc_channel *);
+extern void (*xpc_request_partition_activation) (struct xpc_rsvd_page *,
+						 unsigned long, int);
+extern void (*xpc_request_partition_reactivation) (struct xpc_partition *);
+extern void (*xpc_request_partition_deactivation) (struct xpc_partition *);
+extern void (*xpc_cancel_partition_deactivation_request) (
+							struct xpc_partition *);
+extern void (*xpc_process_activate_IRQ_rcvd) (void);
+extern enum xp_retval (*xpc_setup_ch_structures_sn) (struct xpc_partition *);
+extern void (*xpc_teardown_ch_structures_sn) (struct xpc_partition *);
+
+extern void (*xpc_indicate_partition_engaged) (struct xpc_partition *);
+extern int (*xpc_partition_engaged) (short);
+extern int (*xpc_any_partition_engaged) (void);
+extern void (*xpc_indicate_partition_disengaged) (struct xpc_partition *);
+extern void (*xpc_assume_partition_disengaged) (short);
+
+extern void (*xpc_send_chctl_closerequest) (struct xpc_channel *,
+					    unsigned long *);
+extern void (*xpc_send_chctl_closereply) (struct xpc_channel *,
+					  unsigned long *);
+extern void (*xpc_send_chctl_openrequest) (struct xpc_channel *,
+					   unsigned long *);
+extern void (*xpc_send_chctl_openreply) (struct xpc_channel *, unsigned long *);
+
+extern enum xp_retval (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *,
+						      unsigned long);
+
+extern enum xp_retval (*xpc_send_payload) (struct xpc_channel *, u32, void *,
+					   u16, u8, xpc_notify_func, void *);
+extern void (*xpc_received_payload) (struct xpc_channel *, void *);
 
 /* found in xpc_sn2.c */
 extern int xpc_init_sn2(void);
@@ -933,6 +909,40 @@ extern void xpc_disconnect_channel(const int, struct xpc_channel *,
 extern void xpc_disconnect_callout(struct xpc_channel *, enum xp_retval);
 extern void xpc_partition_going_down(struct xpc_partition *, enum xp_retval);
 
+static inline int
+xpc_hb_allowed(short partid, void *heartbeating_to_mask)
+{
+	return test_bit(partid, heartbeating_to_mask);
+}
+
+static inline int
+xpc_any_hbs_allowed(void)
+{
+	DBUG_ON(xpc_heartbeating_to_mask == NULL);
+	return !bitmap_empty(xpc_heartbeating_to_mask, xp_max_npartitions);
+}
+
+static inline void
+xpc_allow_hb(short partid)
+{
+	DBUG_ON(xpc_heartbeating_to_mask == NULL);
+	set_bit(partid, xpc_heartbeating_to_mask);
+}
+
+static inline void
+xpc_disallow_hb(short partid)
+{
+	DBUG_ON(xpc_heartbeating_to_mask == NULL);
+	clear_bit(partid, xpc_heartbeating_to_mask);
+}
+
+static inline void
+xpc_disallow_all_hbs(void)
+{
+	DBUG_ON(xpc_heartbeating_to_mask == NULL);
+	bitmap_zero(xpc_heartbeating_to_mask, xp_max_npartitions);
+}
+
 static inline void
 xpc_wakeup_channel_mgr(struct xpc_partition *part)
 {
diff --git a/trunk/drivers/misc/sgi-xp/xpc_channel.c b/trunk/drivers/misc/sgi-xp/xpc_channel.c
index 652593fc486d..99a2534c38a1 100644
--- a/trunk/drivers/misc/sgi-xp/xpc_channel.c
+++ b/trunk/drivers/misc/sgi-xp/xpc_channel.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2004-2009 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2004-2008 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 /*
@@ -39,38 +39,34 @@ xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
 
 	if (!(ch->flags & XPC_C_SETUP)) {
 		spin_unlock_irqrestore(&ch->lock, *irq_flags);
-		ret = xpc_arch_ops.setup_msg_structures(ch);
+		ret = xpc_setup_msg_structures(ch);
 		spin_lock_irqsave(&ch->lock, *irq_flags);
 
 		if (ret != xpSuccess)
 			XPC_DISCONNECT_CHANNEL(ch, ret, irq_flags);
-		else
-			ch->flags |= XPC_C_SETUP;
 
-		if (ch->flags & XPC_C_DISCONNECTING)
+		ch->flags |= XPC_C_SETUP;
+
+		if (ch->flags & (XPC_C_CONNECTED | XPC_C_DISCONNECTING))
 			return;
 	}
 
 	if (!(ch->flags & XPC_C_OPENREPLY)) {
 		ch->flags |= XPC_C_OPENREPLY;
-		xpc_arch_ops.send_chctl_openreply(ch, irq_flags);
+		xpc_send_chctl_openreply(ch, irq_flags);
 	}
 
 	if (!(ch->flags & XPC_C_ROPENREPLY))
 		return;
 
-	if (!(ch->flags & XPC_C_OPENCOMPLETE)) {
-		ch->flags |= (XPC_C_OPENCOMPLETE | XPC_C_CONNECTED);
-		xpc_arch_ops.send_chctl_opencomplete(ch, irq_flags);
-	}
-
-	if (!(ch->flags & XPC_C_ROPENCOMPLETE))
-		return;
+	ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP);	/* clear all else */
 
 	dev_info(xpc_chan, "channel %d to partition %d connected\n",
 		 ch->number, ch->partid);
 
-	ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP);	/* clear all else */
+	spin_unlock_irqrestore(&ch->lock, *irq_flags);
+	xpc_create_kthreads(ch, 1, 0);
+	spin_lock_irqsave(&ch->lock, *irq_flags);
 }
 
 /*
@@ -100,7 +96,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 
 	if (part->act_state == XPC_P_AS_DEACTIVATING) {
 		/* can't proceed until the other side disengages from us */
-		if (xpc_arch_ops.partition_engaged(ch->partid))
+		if (xpc_partition_engaged(ch->partid))
 			return;
 
 	} else {
@@ -112,7 +108,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 
 		if (!(ch->flags & XPC_C_CLOSEREPLY)) {
 			ch->flags |= XPC_C_CLOSEREPLY;
-			xpc_arch_ops.send_chctl_closereply(ch, irq_flags);
+			xpc_send_chctl_closereply(ch, irq_flags);
 		}
 
 		if (!(ch->flags & XPC_C_RCLOSEREPLY))
@@ -122,7 +118,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 	/* wake those waiting for notify completion */
 	if (atomic_read(&ch->n_to_notify) > 0) {
 		/* we do callout while holding ch->lock, callout can't block */
-		xpc_arch_ops.notify_senders_of_disconnect(ch);
+		xpc_notify_senders_of_disconnect(ch);
 	}
 
 	/* both sides are disconnected now */
@@ -136,7 +132,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 	DBUG_ON(atomic_read(&ch->n_to_notify) != 0);
 
 	/* it's now safe to free the channel's message queues */
-	xpc_arch_ops.teardown_msg_structures(ch);
+	xpc_teardown_msg_structures(ch);
 
 	ch->func = NULL;
 	ch->key = NULL;
@@ -148,9 +144,8 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 
 	/*
 	 * Mark the channel disconnected and clear all other flags, including
-	 * XPC_C_SETUP (because of call to
-	 * xpc_arch_ops.teardown_msg_structures()) but not including
-	 * XPC_C_WDISCONNECT (if it was set).
+	 * XPC_C_SETUP (because of call to xpc_teardown_msg_structures()) but
+	 * not including XPC_C_WDISCONNECT (if it was set).
 	 */
 	ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT));
 
@@ -189,7 +184,6 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number,
 	struct xpc_channel *ch = &part->channels[ch_number];
 	enum xp_retval reason;
 	enum xp_retval ret;
-	int create_kthread = 0;
 
 	spin_lock_irqsave(&ch->lock, irq_flags);
 
@@ -202,7 +196,8 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number,
 		 * has had a chance to see that the channel is disconnected.
 		 */
 		ch->delayed_chctl_flags |= chctl_flags;
-		goto out;
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+		return;
 	}
 
 	if (chctl_flags & XPC_CHCTL_CLOSEREQUEST) {
@@ -244,7 +239,8 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number,
 					    XPC_CHCTL_CLOSEREQUEST;
 					spin_unlock(&part->chctl_lock);
 				}
-				goto out;
+				spin_unlock_irqrestore(&ch->lock, irq_flags);
+				return;
 			}
 
 			XPC_SET_REASON(ch, 0, 0);
@@ -254,8 +250,7 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number,
 			ch->flags |= (XPC_C_CONNECTING | XPC_C_ROPENREQUEST);
 		}
 
-		chctl_flags &= ~(XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY |
-		    XPC_CHCTL_OPENCOMPLETE);
+		chctl_flags &= ~(XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY);
 
 		/*
 		 * The meaningful CLOSEREQUEST connection state fields are:
@@ -274,7 +269,8 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number,
 			XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
 
 			DBUG_ON(chctl_flags & XPC_CHCTL_CLOSEREPLY);
-			goto out;
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+			return;
 		}
 
 		xpc_process_disconnect(ch, &irq_flags);
@@ -287,7 +283,8 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number,
 
 		if (ch->flags & XPC_C_DISCONNECTED) {
 			DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING);
-			goto out;
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+			return;
 		}
 
 		DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
@@ -302,7 +299,8 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number,
 				    XPC_CHCTL_CLOSEREPLY;
 				spin_unlock(&part->chctl_lock);
 			}
-			goto out;
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+			return;
 		}
 
 		ch->flags |= XPC_C_RCLOSEREPLY;
@@ -322,12 +320,14 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number,
 
 		if (part->act_state == XPC_P_AS_DEACTIVATING ||
 		    (ch->flags & XPC_C_ROPENREQUEST)) {
-			goto out;
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+			return;
 		}
 
 		if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) {
 			ch->delayed_chctl_flags |= XPC_CHCTL_OPENREQUEST;
-			goto out;
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+			return;
 		}
 		DBUG_ON(!(ch->flags & (XPC_C_DISCONNECTED |
 				       XPC_C_OPENREQUEST)));
@@ -341,7 +341,8 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number,
 		 */
 		if (args->entry_size == 0 || args->local_nentries == 0) {
 			/* assume OPENREQUEST was delayed by mistake */
-			goto out;
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+			return;
 		}
 
 		ch->flags |= (XPC_C_ROPENREQUEST | XPC_C_CONNECTING);
@@ -351,7 +352,8 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number,
 			if (args->entry_size != ch->entry_size) {
 				XPC_DISCONNECT_CHANNEL(ch, xpUnequalMsgSizes,
 						       &irq_flags);
-				goto out;
+				spin_unlock_irqrestore(&ch->lock, irq_flags);
+				return;
 			}
 		} else {
 			ch->entry_size = args->entry_size;
@@ -373,13 +375,15 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number,
 			args->local_msgqueue_pa, args->local_nentries,
 			args->remote_nentries, ch->partid, ch->number);
 
-		if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED))
-			goto out;
-
+		if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) {
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+			return;
+		}
 		if (!(ch->flags & XPC_C_OPENREQUEST)) {
 			XPC_DISCONNECT_CHANNEL(ch, xpOpenCloseError,
 					       &irq_flags);
-			goto out;
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+			return;
 		}
 
 		DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST));
@@ -396,11 +400,11 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number,
 		DBUG_ON(args->local_nentries == 0);
 		DBUG_ON(args->remote_nentries == 0);
 
-		ret = xpc_arch_ops.save_remote_msgqueue_pa(ch,
-						      args->local_msgqueue_pa);
+		ret = xpc_save_remote_msgqueue_pa(ch, args->local_msgqueue_pa);
 		if (ret != xpSuccess) {
 			XPC_DISCONNECT_CHANNEL(ch, ret, &irq_flags);
-			goto out;
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+			return;
 		}
 		ch->flags |= XPC_C_ROPENREPLY;
 
@@ -426,36 +430,7 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number,
 		xpc_process_connect(ch, &irq_flags);
 	}
 
-	if (chctl_flags & XPC_CHCTL_OPENCOMPLETE) {
-
-		dev_dbg(xpc_chan, "XPC_CHCTL_OPENCOMPLETE received from "
-			"partid=%d, channel=%d\n", ch->partid, ch->number);
-
-		if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED))
-			goto out;
-
-		if (!(ch->flags & XPC_C_OPENREQUEST) ||
-		    !(ch->flags & XPC_C_OPENREPLY)) {
-			XPC_DISCONNECT_CHANNEL(ch, xpOpenCloseError,
-					       &irq_flags);
-			goto out;
-		}
-
-		DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST));
-		DBUG_ON(!(ch->flags & XPC_C_ROPENREPLY));
-		DBUG_ON(!(ch->flags & XPC_C_CONNECTED));
-
-		ch->flags |= XPC_C_ROPENCOMPLETE;
-
-		xpc_process_connect(ch, &irq_flags);
-		create_kthread = 1;
-	}
-
-out:
 	spin_unlock_irqrestore(&ch->lock, irq_flags);
-
-	if (create_kthread)
-		xpc_create_kthreads(ch, 1, 0);
 }
 
 /*
@@ -533,7 +508,7 @@ xpc_connect_channel(struct xpc_channel *ch)
 	/* initiate the connection */
 
 	ch->flags |= (XPC_C_OPENREQUEST | XPC_C_CONNECTING);
-	xpc_arch_ops.send_chctl_openrequest(ch, &irq_flags);
+	xpc_send_chctl_openrequest(ch, &irq_flags);
 
 	xpc_process_connect(ch, &irq_flags);
 
@@ -551,7 +526,7 @@ xpc_process_sent_chctl_flags(struct xpc_partition *part)
 	int ch_number;
 	u32 ch_flags;
 
-	chctl.all_flags = xpc_arch_ops.get_chctl_all_flags(part);
+	chctl.all_flags = xpc_get_chctl_all_flags(part);
 
 	/*
 	 * Initiate channel connections for registered channels.
@@ -589,6 +564,10 @@ xpc_process_sent_chctl_flags(struct xpc_partition *part)
 			if (!(ch_flags & XPC_C_OPENREQUEST)) {
 				DBUG_ON(ch_flags & XPC_C_SETUP);
 				(void)xpc_connect_channel(ch);
+			} else {
+				spin_lock_irqsave(&ch->lock, irq_flags);
+				xpc_process_connect(ch, &irq_flags);
+				spin_unlock_irqrestore(&ch->lock, irq_flags);
 			}
 			continue;
 		}
@@ -600,7 +579,7 @@ xpc_process_sent_chctl_flags(struct xpc_partition *part)
 		 */
 
 		if (chctl.flags[ch_number] & XPC_MSG_CHCTL_FLAGS)
-			xpc_arch_ops.process_msg_chctl_flags(part, ch_number);
+			xpc_process_msg_chctl_flags(part, ch_number);
 	}
 }
 
@@ -776,7 +755,7 @@ xpc_disconnect_channel(const int line, struct xpc_channel *ch,
 		       XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
 		       XPC_C_CONNECTING | XPC_C_CONNECTED);
 
-	xpc_arch_ops.send_chctl_closerequest(ch, irq_flags);
+	xpc_send_chctl_closerequest(ch, irq_flags);
 
 	if (channel_was_connected)
 		ch->flags |= XPC_C_WASCONNECTED;
@@ -883,8 +862,8 @@ xpc_initiate_send(short partid, int ch_number, u32 flags, void *payload,
 	DBUG_ON(payload == NULL);
 
 	if (xpc_part_ref(part)) {
-		ret = xpc_arch_ops.send_payload(&part->channels[ch_number],
-				  flags, payload, payload_size, 0, NULL, NULL);
+		ret = xpc_send_payload(&part->channels[ch_number], flags,
+				       payload, payload_size, 0, NULL, NULL);
 		xpc_part_deref(part);
 	}
 
@@ -935,8 +914,9 @@ xpc_initiate_send_notify(short partid, int ch_number, u32 flags, void *payload,
 	DBUG_ON(func == NULL);
 
 	if (xpc_part_ref(part)) {
-		ret = xpc_arch_ops.send_payload(&part->channels[ch_number],
-			  flags, payload, payload_size, XPC_N_CALL, func, key);
+		ret = xpc_send_payload(&part->channels[ch_number], flags,
+				       payload, payload_size, XPC_N_CALL, func,
+				       key);
 		xpc_part_deref(part);
 	}
 	return ret;
@@ -950,7 +930,7 @@ xpc_deliver_payload(struct xpc_channel *ch)
 {
 	void *payload;
 
-	payload = xpc_arch_ops.get_deliverable_payload(ch);
+	payload = xpc_get_deliverable_payload(ch);
 	if (payload != NULL) {
 
 		/*
@@ -1004,7 +984,7 @@ xpc_initiate_received(short partid, int ch_number, void *payload)
 	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
 
 	ch = &part->channels[ch_number];
-	xpc_arch_ops.received_payload(ch, payload);
+	xpc_received_payload(ch, payload);
 
 	/* the call to xpc_msgqueue_ref() was done by xpc_deliver_payload()  */
 	xpc_msgqueue_deref(ch);
diff --git a/trunk/drivers/misc/sgi-xp/xpc_main.c b/trunk/drivers/misc/sgi-xp/xpc_main.c
index fd3688a3e23f..1ab9fda87fab 100644
--- a/trunk/drivers/misc/sgi-xp/xpc_main.c
+++ b/trunk/drivers/misc/sgi-xp/xpc_main.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2004-2009 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2004-2008 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 /*
@@ -150,6 +150,7 @@ DECLARE_WAIT_QUEUE_HEAD(xpc_activate_IRQ_wq);
 
 static unsigned long xpc_hb_check_timeout;
 static struct timer_list xpc_hb_timer;
+void *xpc_heartbeating_to_mask;
 
 /* notification that the xpc_hb_checker thread has exited */
 static DECLARE_COMPLETION(xpc_hb_checker_exited);
@@ -169,7 +170,62 @@ static struct notifier_block xpc_die_notifier = {
 	.notifier_call = xpc_system_die,
 };
 
-struct xpc_arch_operations xpc_arch_ops;
+int (*xpc_setup_partitions_sn) (void);
+void (*xpc_teardown_partitions_sn) (void);
+enum xp_retval (*xpc_get_partition_rsvd_page_pa) (void *buf, u64 *cookie,
+						  unsigned long *rp_pa,
+						  size_t *len);
+int (*xpc_setup_rsvd_page_sn) (struct xpc_rsvd_page *rp);
+void (*xpc_heartbeat_init) (void);
+void (*xpc_heartbeat_exit) (void);
+void (*xpc_increment_heartbeat) (void);
+void (*xpc_offline_heartbeat) (void);
+void (*xpc_online_heartbeat) (void);
+enum xp_retval (*xpc_get_remote_heartbeat) (struct xpc_partition *part);
+
+enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *part);
+void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *ch);
+u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *part);
+enum xp_retval (*xpc_setup_msg_structures) (struct xpc_channel *ch);
+void (*xpc_teardown_msg_structures) (struct xpc_channel *ch);
+void (*xpc_process_msg_chctl_flags) (struct xpc_partition *part, int ch_number);
+int (*xpc_n_of_deliverable_payloads) (struct xpc_channel *ch);
+void *(*xpc_get_deliverable_payload) (struct xpc_channel *ch);
+
+void (*xpc_request_partition_activation) (struct xpc_rsvd_page *remote_rp,
+					  unsigned long remote_rp_pa,
+					  int nasid);
+void (*xpc_request_partition_reactivation) (struct xpc_partition *part);
+void (*xpc_request_partition_deactivation) (struct xpc_partition *part);
+void (*xpc_cancel_partition_deactivation_request) (struct xpc_partition *part);
+
+void (*xpc_process_activate_IRQ_rcvd) (void);
+enum xp_retval (*xpc_setup_ch_structures_sn) (struct xpc_partition *part);
+void (*xpc_teardown_ch_structures_sn) (struct xpc_partition *part);
+
+void (*xpc_indicate_partition_engaged) (struct xpc_partition *part);
+int (*xpc_partition_engaged) (short partid);
+int (*xpc_any_partition_engaged) (void);
+void (*xpc_indicate_partition_disengaged) (struct xpc_partition *part);
+void (*xpc_assume_partition_disengaged) (short partid);
+
+void (*xpc_send_chctl_closerequest) (struct xpc_channel *ch,
+				     unsigned long *irq_flags);
+void (*xpc_send_chctl_closereply) (struct xpc_channel *ch,
+				   unsigned long *irq_flags);
+void (*xpc_send_chctl_openrequest) (struct xpc_channel *ch,
+				    unsigned long *irq_flags);
+void (*xpc_send_chctl_openreply) (struct xpc_channel *ch,
+				  unsigned long *irq_flags);
+
+enum xp_retval (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *ch,
+					       unsigned long msgqueue_pa);
+
+enum xp_retval (*xpc_send_payload) (struct xpc_channel *ch, u32 flags,
+				    void *payload, u16 payload_size,
+				    u8 notify_type, xpc_notify_func func,
+				    void *key);
+void (*xpc_received_payload) (struct xpc_channel *ch, void *payload);
 
 /*
  * Timer function to enforce the timelimit on the partition disengage.
@@ -184,7 +240,7 @@ xpc_timeout_partition_disengage(unsigned long data)
 	(void)xpc_partition_disengaged(part);
 
 	DBUG_ON(part->disengage_timeout != 0);
-	DBUG_ON(xpc_arch_ops.partition_engaged(XPC_PARTID(part)));
+	DBUG_ON(xpc_partition_engaged(XPC_PARTID(part)));
 }
 
 /*
@@ -195,7 +251,7 @@ xpc_timeout_partition_disengage(unsigned long data)
 static void
 xpc_hb_beater(unsigned long dummy)
 {
-	xpc_arch_ops.increment_heartbeat();
+	xpc_increment_heartbeat();
 
 	if (time_is_before_eq_jiffies(xpc_hb_check_timeout))
 		wake_up_interruptible(&xpc_activate_IRQ_wq);
@@ -207,7 +263,7 @@ xpc_hb_beater(unsigned long dummy)
 static void
 xpc_start_hb_beater(void)
 {
-	xpc_arch_ops.heartbeat_init();
+	xpc_heartbeat_init();
 	init_timer(&xpc_hb_timer);
 	xpc_hb_timer.function = xpc_hb_beater;
 	xpc_hb_beater(0);
@@ -217,7 +273,7 @@ static void
 xpc_stop_hb_beater(void)
 {
 	del_timer_sync(&xpc_hb_timer);
-	xpc_arch_ops.heartbeat_exit();
+	xpc_heartbeat_exit();
 }
 
 /*
@@ -246,7 +302,7 @@ xpc_check_remote_hb(void)
 			continue;
 		}
 
-		ret = xpc_arch_ops.get_remote_heartbeat(part);
+		ret = xpc_get_remote_heartbeat(part);
 		if (ret != xpSuccess)
 			XPC_DEACTIVATE_PARTITION(part, ret);
 	}
@@ -297,7 +353,7 @@ xpc_hb_checker(void *ignore)
 			force_IRQ = 0;
 			dev_dbg(xpc_part, "processing activate IRQs "
 				"received\n");
-			xpc_arch_ops.process_activate_IRQ_rcvd();
+			xpc_process_activate_IRQ_rcvd();
 		}
 
 		/* wait for IRQ or timeout */
@@ -472,7 +528,7 @@ xpc_setup_ch_structures(struct xpc_partition *part)
 		init_waitqueue_head(&ch->idle_wq);
 	}
 
-	ret = xpc_arch_ops.setup_ch_structures(part);
+	ret = xpc_setup_ch_structures_sn(part);
 	if (ret != xpSuccess)
 		goto out_2;
 
@@ -516,7 +572,7 @@ xpc_teardown_ch_structures(struct xpc_partition *part)
 
 	/* now we can begin tearing down the infrastructure */
 
-	xpc_arch_ops.teardown_ch_structures(part);
+	xpc_teardown_ch_structures_sn(part);
 
 	kfree(part->remote_openclose_args_base);
 	part->remote_openclose_args = NULL;
@@ -564,12 +620,12 @@ xpc_activating(void *__partid)
 
 	dev_dbg(xpc_part, "activating partition %d\n", partid);
 
-	xpc_arch_ops.allow_hb(partid);
+	xpc_allow_hb(partid);
 
 	if (xpc_setup_ch_structures(part) == xpSuccess) {
 		(void)xpc_part_ref(part);	/* this will always succeed */
 
-		if (xpc_arch_ops.make_first_contact(part) == xpSuccess) {
+		if (xpc_make_first_contact(part) == xpSuccess) {
 			xpc_mark_partition_active(part);
 			xpc_channel_mgr(part);
 			/* won't return until partition is deactivating */
@@ -579,12 +635,12 @@ xpc_activating(void *__partid)
 		xpc_teardown_ch_structures(part);
 	}
 
-	xpc_arch_ops.disallow_hb(partid);
+	xpc_disallow_hb(partid);
 	xpc_mark_partition_inactive(part);
 
 	if (part->reason == xpReactivating) {
 		/* interrupting ourselves results in activating partition */
-		xpc_arch_ops.request_partition_reactivation(part);
+		xpc_request_partition_reactivation(part);
 	}
 
 	return 0;
@@ -657,13 +713,10 @@ xpc_activate_kthreads(struct xpc_channel *ch, int needed)
 static void
 xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
 {
-	int (*n_of_deliverable_payloads) (struct xpc_channel *) =
-		xpc_arch_ops.n_of_deliverable_payloads;
-
 	do {
 		/* deliver messages to their intended recipients */
 
-		while (n_of_deliverable_payloads(ch) > 0 &&
+		while (xpc_n_of_deliverable_payloads(ch) > 0 &&
 		       !(ch->flags & XPC_C_DISCONNECTING)) {
 			xpc_deliver_payload(ch);
 		}
@@ -679,7 +732,7 @@ xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
 			"wait_event_interruptible_exclusive()\n");
 
 		(void)wait_event_interruptible_exclusive(ch->idle_wq,
-				(n_of_deliverable_payloads(ch) > 0 ||
+				(xpc_n_of_deliverable_payloads(ch) > 0 ||
 				 (ch->flags & XPC_C_DISCONNECTING)));
 
 		atomic_dec(&ch->kthreads_idle);
@@ -696,8 +749,6 @@ xpc_kthread_start(void *args)
 	struct xpc_channel *ch;
 	int n_needed;
 	unsigned long irq_flags;
-	int (*n_of_deliverable_payloads) (struct xpc_channel *) =
-		xpc_arch_ops.n_of_deliverable_payloads;
 
 	dev_dbg(xpc_chan, "kthread starting, partid=%d, channel=%d\n",
 		partid, ch_number);
@@ -726,7 +777,7 @@ xpc_kthread_start(void *args)
 			 * additional kthreads to help deliver them. We only
 			 * need one less than total #of messages to deliver.
 			 */
-			n_needed = n_of_deliverable_payloads(ch) - 1;
+			n_needed = xpc_n_of_deliverable_payloads(ch) - 1;
 			if (n_needed > 0 && !(ch->flags & XPC_C_DISCONNECTING))
 				xpc_activate_kthreads(ch, n_needed);
 
@@ -754,7 +805,7 @@ xpc_kthread_start(void *args)
 
 	if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
 	    atomic_dec_return(&part->nchannels_engaged) == 0) {
-		xpc_arch_ops.indicate_partition_disengaged(part);
+		xpc_indicate_partition_disengaged(part);
 	}
 
 	xpc_msgqueue_deref(ch);
@@ -786,8 +837,6 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed,
 	u64 args = XPC_PACK_ARGS(ch->partid, ch->number);
 	struct xpc_partition *part = &xpc_partitions[ch->partid];
 	struct task_struct *kthread;
-	void (*indicate_partition_disengaged) (struct xpc_partition *) =
-		xpc_arch_ops.indicate_partition_disengaged;
 
 	while (needed-- > 0) {
 
@@ -809,7 +858,7 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed,
 
 		} else if (atomic_inc_return(&ch->kthreads_assigned) == 1 &&
 			   atomic_inc_return(&part->nchannels_engaged) == 1) {
-			xpc_arch_ops.indicate_partition_engaged(part);
+				xpc_indicate_partition_engaged(part);
 		}
 		(void)xpc_part_ref(part);
 		xpc_msgqueue_ref(ch);
@@ -831,7 +880,7 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed,
 
 			if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
 			    atomic_dec_return(&part->nchannels_engaged) == 0) {
-				indicate_partition_disengaged(part);
+				xpc_indicate_partition_disengaged(part);
 			}
 			xpc_msgqueue_deref(ch);
 			xpc_part_deref(part);
@@ -944,13 +993,13 @@ xpc_setup_partitions(void)
 		atomic_set(&part->references, 0);
 	}
 
-	return xpc_arch_ops.setup_partitions();
+	return xpc_setup_partitions_sn();
 }
 
 static void
 xpc_teardown_partitions(void)
 {
-	xpc_arch_ops.teardown_partitions();
+	xpc_teardown_partitions_sn();
 	kfree(xpc_partitions);
 }
 
@@ -1006,7 +1055,7 @@ xpc_do_exit(enum xp_retval reason)
 				disengage_timeout = part->disengage_timeout;
 		}
 
-		if (xpc_arch_ops.any_partition_engaged()) {
+		if (xpc_any_partition_engaged()) {
 			if (time_is_before_jiffies(printmsg_time)) {
 				dev_info(xpc_part, "waiting for remote "
 					 "partitions to deactivate, timeout in "
@@ -1037,7 +1086,8 @@ xpc_do_exit(enum xp_retval reason)
 
 	} while (1);
 
-	DBUG_ON(xpc_arch_ops.any_partition_engaged());
+	DBUG_ON(xpc_any_partition_engaged());
+	DBUG_ON(xpc_any_hbs_allowed() != 0);
 
 	xpc_teardown_rsvd_page();
 
@@ -1102,15 +1152,15 @@ xpc_die_deactivate(void)
 	/* keep xpc_hb_checker thread from doing anything (just in case) */
 	xpc_exiting = 1;
 
-	xpc_arch_ops.disallow_all_hbs();   /*indicate we're deactivated */
+	xpc_disallow_all_hbs();	/*indicate we're deactivated */
 
 	for (partid = 0; partid < xp_max_npartitions; partid++) {
 		part = &xpc_partitions[partid];
 
-		if (xpc_arch_ops.partition_engaged(partid) ||
+		if (xpc_partition_engaged(partid) ||
 		    part->act_state != XPC_P_AS_INACTIVE) {
-			xpc_arch_ops.request_partition_deactivation(part);
-			xpc_arch_ops.indicate_partition_disengaged(part);
+			xpc_request_partition_deactivation(part);
+			xpc_indicate_partition_disengaged(part);
 		}
 	}
 
@@ -1127,7 +1177,7 @@ xpc_die_deactivate(void)
 	wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL * 1000 * 5;
 
 	while (1) {
-		any_engaged = xpc_arch_ops.any_partition_engaged();
+		any_engaged = xpc_any_partition_engaged();
 		if (!any_engaged) {
 			dev_info(xpc_part, "all partitions have deactivated\n");
 			break;
@@ -1136,7 +1186,7 @@ xpc_die_deactivate(void)
 		if (!keep_waiting--) {
 			for (partid = 0; partid < xp_max_npartitions;
 			     partid++) {
-				if (xpc_arch_ops.partition_engaged(partid)) {
+				if (xpc_partition_engaged(partid)) {
 					dev_info(xpc_part, "deactivate from "
 						 "remote partition %d timed "
 						 "out\n", partid);
@@ -1183,7 +1233,7 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
 		/* fall through */
 	case DIE_MCA_MONARCH_ENTER:
 	case DIE_INIT_MONARCH_ENTER:
-		xpc_arch_ops.offline_heartbeat();
+		xpc_offline_heartbeat();
 		break;
 
 	case DIE_KDEBUG_LEAVE:
@@ -1194,7 +1244,7 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
 		/* fall through */
 	case DIE_MCA_MONARCH_LEAVE:
 	case DIE_INIT_MONARCH_LEAVE:
-		xpc_arch_ops.online_heartbeat();
+		xpc_online_heartbeat();
 		break;
 	}
 #else
diff --git a/trunk/drivers/misc/sgi-xp/xpc_partition.c b/trunk/drivers/misc/sgi-xp/xpc_partition.c
index 65877bc5edaa..6722f6fe4dc7 100644
--- a/trunk/drivers/misc/sgi-xp/xpc_partition.c
+++ b/trunk/drivers/misc/sgi-xp/xpc_partition.c
@@ -70,9 +70,6 @@ xpc_get_rsvd_page_pa(int nasid)
 	size_t buf_len = 0;
 	void *buf = buf;
 	void *buf_base = NULL;
-	enum xp_retval (*get_partition_rsvd_page_pa)
-		(void *, u64 *, unsigned long *, size_t *) =
-		xpc_arch_ops.get_partition_rsvd_page_pa;
 
 	while (1) {
 
@@ -82,7 +79,8 @@ xpc_get_rsvd_page_pa(int nasid)
 		 * ??? function or have two versions? Rename rp_pa for UV to
 		 * ??? rp_gpa?
 		 */
-		ret = get_partition_rsvd_page_pa(buf, &cookie, &rp_pa, &len);
+		ret = xpc_get_partition_rsvd_page_pa(buf, &cookie, &rp_pa,
+						     &len);
 
 		dev_dbg(xpc_part, "SAL returned with ret=%d, cookie=0x%016lx, "
 			"address=0x%016lx, len=0x%016lx\n", ret,
@@ -174,7 +172,7 @@ xpc_setup_rsvd_page(void)
 	xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
 	xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
 
-	ret = xpc_arch_ops.setup_rsvd_page(rp);
+	ret = xpc_setup_rsvd_page_sn(rp);
 	if (ret != 0)
 		return ret;
 
@@ -266,7 +264,7 @@ xpc_partition_disengaged(struct xpc_partition *part)
 	short partid = XPC_PARTID(part);
 	int disengaged;
 
-	disengaged = !xpc_arch_ops.partition_engaged(partid);
+	disengaged = !xpc_partition_engaged(partid);
 	if (part->disengage_timeout) {
 		if (!disengaged) {
 			if (time_is_after_jiffies(part->disengage_timeout)) {
@@ -282,7 +280,7 @@ xpc_partition_disengaged(struct xpc_partition *part)
 			dev_info(xpc_part, "deactivate request to remote "
 				 "partition %d timed out\n", partid);
 			xpc_disengage_timedout = 1;
-			xpc_arch_ops.assume_partition_disengaged(partid);
+			xpc_assume_partition_disengaged(partid);
 			disengaged = 1;
 		}
 		part->disengage_timeout = 0;
@@ -296,7 +294,7 @@ xpc_partition_disengaged(struct xpc_partition *part)
 		if (part->act_state != XPC_P_AS_INACTIVE)
 			xpc_wakeup_channel_mgr(part);
 
-		xpc_arch_ops.cancel_partition_deactivation_request(part);
+		xpc_cancel_partition_deactivation_request(part);
 	}
 	return disengaged;
 }
@@ -341,7 +339,7 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part,
 		spin_unlock_irqrestore(&part->act_lock, irq_flags);
 		if (reason == xpReactivating) {
 			/* we interrupt ourselves to reactivate partition */
-			xpc_arch_ops.request_partition_reactivation(part);
+			xpc_request_partition_reactivation(part);
 		}
 		return;
 	}
@@ -360,7 +358,7 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part,
 	spin_unlock_irqrestore(&part->act_lock, irq_flags);
 
 	/* ask remote partition to deactivate with regard to us */
-	xpc_arch_ops.request_partition_deactivation(part);
+	xpc_request_partition_deactivation(part);
 
 	/* set a timelimit on the disengage phase of the deactivation request */
 	part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ);
@@ -498,7 +496,7 @@ xpc_discovery(void)
 				continue;
 			}
 
-			xpc_arch_ops.request_partition_activation(remote_rp,
+			xpc_request_partition_activation(remote_rp,
 							 remote_rp_pa, nasid);
 		}
 	}
diff --git a/trunk/drivers/misc/sgi-xp/xpc_sn2.c b/trunk/drivers/misc/sgi-xp/xpc_sn2.c
index 915a3b495da5..eaaa964942de 100644
--- a/trunk/drivers/misc/sgi-xp/xpc_sn2.c
+++ b/trunk/drivers/misc/sgi-xp/xpc_sn2.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2008-2009 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 /*
@@ -60,14 +60,14 @@ static struct xpc_vars_sn2 *xpc_vars_sn2;
 static struct xpc_vars_part_sn2 *xpc_vars_part_sn2;
 
 static int
-xpc_setup_partitions_sn2(void)
+xpc_setup_partitions_sn_sn2(void)
 {
 	/* nothing needs to be done */
 	return 0;
 }
 
 static void
-xpc_teardown_partitions_sn2(void)
+xpc_teardown_partitions_sn_sn2(void)
 {
 	/* nothing needs to be done */
 }
@@ -430,13 +430,6 @@ xpc_send_chctl_openreply_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
 	XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREPLY, irq_flags);
 }
 
-static void
-xpc_send_chctl_opencomplete_sn2(struct xpc_channel *ch,
-				unsigned long *irq_flags)
-{
-	XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENCOMPLETE, irq_flags);
-}
-
 static void
 xpc_send_chctl_msgrequest_sn2(struct xpc_channel *ch)
 {
@@ -628,7 +621,7 @@ xpc_get_partition_rsvd_page_pa_sn2(void *buf, u64 *cookie, unsigned long *rp_pa,
 
 
 static int
-xpc_setup_rsvd_page_sn2(struct xpc_rsvd_page *rp)
+xpc_setup_rsvd_page_sn_sn2(struct xpc_rsvd_page *rp)
 {
 	struct amo *amos_page;
 	int i;
@@ -636,7 +629,7 @@ xpc_setup_rsvd_page_sn2(struct xpc_rsvd_page *rp)
 
 	xpc_vars_sn2 = XPC_RP_VARS(rp);
 
-	rp->sn.sn2.vars_pa = xp_pa(xpc_vars_sn2);
+	rp->sn.vars_pa = xp_pa(xpc_vars_sn2);
 
 	/* vars_part array follows immediately after vars */
 	xpc_vars_part_sn2 = (struct xpc_vars_part_sn2 *)((u8 *)XPC_RP_VARS(rp) +
@@ -700,33 +693,6 @@ xpc_setup_rsvd_page_sn2(struct xpc_rsvd_page *rp)
 	return 0;
 }
 
-static int
-xpc_hb_allowed_sn2(short partid, void *heartbeating_to_mask)
-{
-	return test_bit(partid, heartbeating_to_mask);
-}
-
-static void
-xpc_allow_hb_sn2(short partid)
-{
-	DBUG_ON(xpc_vars_sn2 == NULL);
-	set_bit(partid, xpc_vars_sn2->heartbeating_to_mask);
-}
-
-static void
-xpc_disallow_hb_sn2(short partid)
-{
-	DBUG_ON(xpc_vars_sn2 == NULL);
-	clear_bit(partid, xpc_vars_sn2->heartbeating_to_mask);
-}
-
-static void
-xpc_disallow_all_hbs_sn2(void)
-{
-	DBUG_ON(xpc_vars_sn2 == NULL);
-	bitmap_zero(xpc_vars_sn2->heartbeating_to_mask, xp_max_npartitions);
-}
-
 static void
 xpc_increment_heartbeat_sn2(void)
 {
@@ -753,6 +719,7 @@ xpc_heartbeat_init_sn2(void)
 	DBUG_ON(xpc_vars_sn2 == NULL);
 
 	bitmap_zero(xpc_vars_sn2->heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2);
+	xpc_heartbeating_to_mask = &xpc_vars_sn2->heartbeating_to_mask[0];
 	xpc_online_heartbeat_sn2();
 }
 
@@ -784,9 +751,9 @@ xpc_get_remote_heartbeat_sn2(struct xpc_partition *part)
 		remote_vars->heartbeating_to_mask[0]);
 
 	if ((remote_vars->heartbeat == part->last_heartbeat &&
-	    !remote_vars->heartbeat_offline) ||
-	    !xpc_hb_allowed_sn2(sn_partition_id,
-				remote_vars->heartbeating_to_mask)) {
+	    remote_vars->heartbeat_offline == 0) ||
+	    !xpc_hb_allowed(sn_partition_id,
+			    &remote_vars->heartbeating_to_mask)) {
 		ret = xpNoHeartbeat;
 	} else {
 		part->last_heartbeat = remote_vars->heartbeat;
@@ -1005,7 +972,7 @@ xpc_identify_activate_IRQ_req_sn2(int nasid)
 		return;
 	}
 
-	remote_vars_pa = remote_rp->sn.sn2.vars_pa;
+	remote_vars_pa = remote_rp->sn.vars_pa;
 	remote_rp_version = remote_rp->version;
 	remote_rp_ts_jiffies = remote_rp->ts_jiffies;
 
@@ -1162,7 +1129,7 @@ xpc_process_activate_IRQ_rcvd_sn2(void)
  * Setup the channel structures that are sn2 specific.
  */
 static enum xp_retval
-xpc_setup_ch_structures_sn2(struct xpc_partition *part)
+xpc_setup_ch_structures_sn_sn2(struct xpc_partition *part)
 {
 	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
 	struct xpc_channel_sn2 *ch_sn2;
@@ -1284,7 +1251,7 @@ xpc_setup_ch_structures_sn2(struct xpc_partition *part)
  * Teardown the channel structures that are sn2 specific.
  */
 static void
-xpc_teardown_ch_structures_sn2(struct xpc_partition *part)
+xpc_teardown_ch_structures_sn_sn2(struct xpc_partition *part)
 {
 	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
 	short partid = XPC_PARTID(part);
@@ -2348,70 +2315,61 @@ xpc_received_payload_sn2(struct xpc_channel *ch, void *payload)
 		xpc_acknowledge_msgs_sn2(ch, get, msg->flags);
 }
 
-static struct xpc_arch_operations xpc_arch_ops_sn2 = {
-	.setup_partitions = xpc_setup_partitions_sn2,
-	.teardown_partitions = xpc_teardown_partitions_sn2,
-	.process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_sn2,
-	.get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_sn2,
-	.setup_rsvd_page = xpc_setup_rsvd_page_sn2,
-
-	.allow_hb = xpc_allow_hb_sn2,
-	.disallow_hb = xpc_disallow_hb_sn2,
-	.disallow_all_hbs = xpc_disallow_all_hbs_sn2,
-	.increment_heartbeat = xpc_increment_heartbeat_sn2,
-	.offline_heartbeat = xpc_offline_heartbeat_sn2,
-	.online_heartbeat = xpc_online_heartbeat_sn2,
-	.heartbeat_init = xpc_heartbeat_init_sn2,
-	.heartbeat_exit = xpc_heartbeat_exit_sn2,
-	.get_remote_heartbeat = xpc_get_remote_heartbeat_sn2,
-
-	.request_partition_activation =
-		xpc_request_partition_activation_sn2,
-	.request_partition_reactivation =
-		xpc_request_partition_reactivation_sn2,
-	.request_partition_deactivation =
-		xpc_request_partition_deactivation_sn2,
-	.cancel_partition_deactivation_request =
-		xpc_cancel_partition_deactivation_request_sn2,
-
-	.setup_ch_structures = xpc_setup_ch_structures_sn2,
-	.teardown_ch_structures = xpc_teardown_ch_structures_sn2,
-
-	.make_first_contact = xpc_make_first_contact_sn2,
-
-	.get_chctl_all_flags = xpc_get_chctl_all_flags_sn2,
-	.send_chctl_closerequest = xpc_send_chctl_closerequest_sn2,
-	.send_chctl_closereply = xpc_send_chctl_closereply_sn2,
-	.send_chctl_openrequest = xpc_send_chctl_openrequest_sn2,
-	.send_chctl_openreply = xpc_send_chctl_openreply_sn2,
-	.send_chctl_opencomplete = xpc_send_chctl_opencomplete_sn2,
-	.process_msg_chctl_flags = xpc_process_msg_chctl_flags_sn2,
-
-	.save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_sn2,
-
-	.setup_msg_structures = xpc_setup_msg_structures_sn2,
-	.teardown_msg_structures = xpc_teardown_msg_structures_sn2,
-
-	.indicate_partition_engaged = xpc_indicate_partition_engaged_sn2,
-	.indicate_partition_disengaged = xpc_indicate_partition_disengaged_sn2,
-	.partition_engaged = xpc_partition_engaged_sn2,
-	.any_partition_engaged = xpc_any_partition_engaged_sn2,
-	.assume_partition_disengaged = xpc_assume_partition_disengaged_sn2,
-
-	.n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_sn2,
-	.send_payload = xpc_send_payload_sn2,
-	.get_deliverable_payload = xpc_get_deliverable_payload_sn2,
-	.received_payload = xpc_received_payload_sn2,
-	.notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_sn2,
-};
-
 int
 xpc_init_sn2(void)
 {
 	int ret;
 	size_t buf_size;
 
-	xpc_arch_ops = xpc_arch_ops_sn2;
+	xpc_setup_partitions_sn = xpc_setup_partitions_sn_sn2;
+	xpc_teardown_partitions_sn = xpc_teardown_partitions_sn_sn2;
+	xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_sn2;
+	xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_sn2;
+	xpc_increment_heartbeat = xpc_increment_heartbeat_sn2;
+	xpc_offline_heartbeat = xpc_offline_heartbeat_sn2;
+	xpc_online_heartbeat = xpc_online_heartbeat_sn2;
+	xpc_heartbeat_init = xpc_heartbeat_init_sn2;
+	xpc_heartbeat_exit = xpc_heartbeat_exit_sn2;
+	xpc_get_remote_heartbeat = xpc_get_remote_heartbeat_sn2;
+
+	xpc_request_partition_activation = xpc_request_partition_activation_sn2;
+	xpc_request_partition_reactivation =
+	    xpc_request_partition_reactivation_sn2;
+	xpc_request_partition_deactivation =
+	    xpc_request_partition_deactivation_sn2;
+	xpc_cancel_partition_deactivation_request =
+	    xpc_cancel_partition_deactivation_request_sn2;
+
+	xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_sn2;
+	xpc_setup_ch_structures_sn = xpc_setup_ch_structures_sn_sn2;
+	xpc_teardown_ch_structures_sn = xpc_teardown_ch_structures_sn_sn2;
+	xpc_make_first_contact = xpc_make_first_contact_sn2;
+
+	xpc_get_chctl_all_flags = xpc_get_chctl_all_flags_sn2;
+	xpc_send_chctl_closerequest = xpc_send_chctl_closerequest_sn2;
+	xpc_send_chctl_closereply = xpc_send_chctl_closereply_sn2;
+	xpc_send_chctl_openrequest = xpc_send_chctl_openrequest_sn2;
+	xpc_send_chctl_openreply = xpc_send_chctl_openreply_sn2;
+
+	xpc_save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_sn2;
+
+	xpc_setup_msg_structures = xpc_setup_msg_structures_sn2;
+	xpc_teardown_msg_structures = xpc_teardown_msg_structures_sn2;
+
+	xpc_notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_sn2;
+	xpc_process_msg_chctl_flags = xpc_process_msg_chctl_flags_sn2;
+	xpc_n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_sn2;
+	xpc_get_deliverable_payload = xpc_get_deliverable_payload_sn2;
+
+	xpc_indicate_partition_engaged = xpc_indicate_partition_engaged_sn2;
+	xpc_indicate_partition_disengaged =
+	    xpc_indicate_partition_disengaged_sn2;
+	xpc_partition_engaged = xpc_partition_engaged_sn2;
+	xpc_any_partition_engaged = xpc_any_partition_engaged_sn2;
+	xpc_assume_partition_disengaged = xpc_assume_partition_disengaged_sn2;
+
+	xpc_send_payload = xpc_send_payload_sn2;
+	xpc_received_payload = xpc_received_payload_sn2;
 
 	if (offsetof(struct xpc_msg_sn2, payload) > XPC_MSG_HDR_MAX_SIZE) {
 		dev_err(xpc_part, "header portion of struct xpc_msg_sn2 is "
diff --git a/trunk/drivers/misc/sgi-xp/xpc_uv.c b/trunk/drivers/misc/sgi-xp/xpc_uv.c
index 9172fcdee4e2..f7fff4727edb 100644
--- a/trunk/drivers/misc/sgi-xp/xpc_uv.c
+++ b/trunk/drivers/misc/sgi-xp/xpc_uv.c
@@ -46,7 +46,8 @@ struct uv_IO_APIC_route_entry {
 };
 #endif
 
-static struct xpc_heartbeat_uv *xpc_heartbeat_uv;
+static atomic64_t xpc_heartbeat_uv;
+static DECLARE_BITMAP(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV);
 
 #define XPC_ACTIVATE_MSG_SIZE_UV	(1 * GRU_CACHE_LINE_BYTES)
 #define XPC_ACTIVATE_MQ_SIZE_UV		(4 * XP_MAX_NPARTITIONS_UV * \
@@ -62,7 +63,7 @@ static struct xpc_gru_mq_uv *xpc_activate_mq_uv;
 static struct xpc_gru_mq_uv *xpc_notify_mq_uv;
 
 static int
-xpc_setup_partitions_uv(void)
+xpc_setup_partitions_sn_uv(void)
 {
 	short partid;
 	struct xpc_partition_uv *part_uv;
@@ -78,7 +79,7 @@ xpc_setup_partitions_uv(void)
 }
 
 static void
-xpc_teardown_partitions_uv(void)
+xpc_teardown_partitions_sn_uv(void)
 {
 	short partid;
 	struct xpc_partition_uv *part_uv;
@@ -422,6 +423,41 @@ xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
 		/* syncing of remote_act_state was just done above */
 		break;
 
+	case XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV: {
+		struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
+
+		msg = container_of(msg_hdr,
+				   struct xpc_activate_mq_msg_heartbeat_req_uv,
+				   hdr);
+		part_uv->heartbeat = msg->heartbeat;
+		break;
+	}
+	case XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV: {
+		struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
+
+		msg = container_of(msg_hdr,
+				   struct xpc_activate_mq_msg_heartbeat_req_uv,
+				   hdr);
+		part_uv->heartbeat = msg->heartbeat;
+
+		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+		part_uv->flags |= XPC_P_HEARTBEAT_OFFLINE_UV;
+		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+		break;
+	}
+	case XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV: {
+		struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
+
+		msg = container_of(msg_hdr,
+				   struct xpc_activate_mq_msg_heartbeat_req_uv,
+				   hdr);
+		part_uv->heartbeat = msg->heartbeat;
+
+		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+		part_uv->flags &= ~XPC_P_HEARTBEAT_OFFLINE_UV;
+		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+		break;
+	}
 	case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: {
 		struct xpc_activate_mq_msg_activate_req_uv *msg;
 
@@ -439,7 +475,6 @@ xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
 		part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV;
 		part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */
 		part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies;
-		part_uv->heartbeat_gpa = msg->heartbeat_gpa;
 
 		if (msg->activate_gru_mq_desc_gpa !=
 		    part_uv->activate_gru_mq_desc_gpa) {
@@ -534,17 +569,6 @@ xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
 		xpc_wakeup_channel_mgr(part);
 		break;
 	}
-	case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV: {
-		struct xpc_activate_mq_msg_chctl_opencomplete_uv *msg;
-
-		msg = container_of(msg_hdr, struct
-				xpc_activate_mq_msg_chctl_opencomplete_uv, hdr);
-		spin_lock_irqsave(&part->chctl_lock, irq_flags);
-		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENCOMPLETE;
-		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
-
-		xpc_wakeup_channel_mgr(part);
-	}
 	case XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV:
 		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
 		part_uv->flags |= XPC_P_ENGAGED_UV;
@@ -735,7 +759,7 @@ xpc_send_local_activate_IRQ_uv(struct xpc_partition *part, int act_state_req)
 
 	/*
 	 * !!! Make our side think that the remote partition sent an activate
-	 * !!! mq message our way by doing what the activate IRQ handler would
+	 * !!! message our way by doing what the activate IRQ handler would
 	 * !!! do had one really been sent.
 	 */
 
@@ -782,82 +806,90 @@ xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa,
 }
 
 static int
-xpc_setup_rsvd_page_uv(struct xpc_rsvd_page *rp)
+xpc_setup_rsvd_page_sn_uv(struct xpc_rsvd_page *rp)
 {
-	xpc_heartbeat_uv =
-	    &xpc_partitions[sn_partition_id].sn.uv.cached_heartbeat;
-	rp->sn.uv.heartbeat_gpa = uv_gpa(xpc_heartbeat_uv);
-	rp->sn.uv.activate_gru_mq_desc_gpa =
+	rp->sn.activate_gru_mq_desc_gpa =
 	    uv_gpa(xpc_activate_mq_uv->gru_mq_desc);
 	return 0;
 }
 
 static void
-xpc_allow_hb_uv(short partid)
+xpc_send_heartbeat_uv(int msg_type)
 {
-}
+	short partid;
+	struct xpc_partition *part;
+	struct xpc_activate_mq_msg_heartbeat_req_uv msg;
 
-static void
-xpc_disallow_hb_uv(short partid)
-{
-}
+	/*
+	 * !!! On uv we're broadcasting a heartbeat message every 5 seconds.
+	 * !!! Whereas on sn2 we're bte_copy'ng the heartbeat info every 20
+	 * !!! seconds. This is an increase in numalink traffic.
+	 * ??? Is this good?
+	 */
 
-static void
-xpc_disallow_all_hbs_uv(void)
-{
+	msg.heartbeat = atomic64_inc_return(&xpc_heartbeat_uv);
+
+	partid = find_first_bit(xpc_heartbeating_to_mask_uv,
+				XP_MAX_NPARTITIONS_UV);
+
+	while (partid < XP_MAX_NPARTITIONS_UV) {
+		part = &xpc_partitions[partid];
+
+		xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+					      msg_type);
+
+		partid = find_next_bit(xpc_heartbeating_to_mask_uv,
+				       XP_MAX_NPARTITIONS_UV, partid + 1);
+	}
 }
 
 static void
 xpc_increment_heartbeat_uv(void)
 {
-	xpc_heartbeat_uv->value++;
+	xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV);
 }
 
 static void
 xpc_offline_heartbeat_uv(void)
 {
-	xpc_increment_heartbeat_uv();
-	xpc_heartbeat_uv->offline = 1;
+	xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV);
 }
 
 static void
 xpc_online_heartbeat_uv(void)
 {
-	xpc_increment_heartbeat_uv();
-	xpc_heartbeat_uv->offline = 0;
+	xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV);
 }
 
 static void
 xpc_heartbeat_init_uv(void)
 {
-	xpc_heartbeat_uv->value = 1;
-	xpc_heartbeat_uv->offline = 0;
+	atomic64_set(&xpc_heartbeat_uv, 0);
+	bitmap_zero(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV);
+	xpc_heartbeating_to_mask = &xpc_heartbeating_to_mask_uv[0];
 }
 
 static void
 xpc_heartbeat_exit_uv(void)
 {
-	xpc_offline_heartbeat_uv();
+	xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV);
 }
 
 static enum xp_retval
 xpc_get_remote_heartbeat_uv(struct xpc_partition *part)
 {
 	struct xpc_partition_uv *part_uv = &part->sn.uv;
-	enum xp_retval ret;
+	enum xp_retval ret = xpNoHeartbeat;
 
-	ret = xp_remote_memcpy(uv_gpa(&part_uv->cached_heartbeat),
-			       part_uv->heartbeat_gpa,
-			       sizeof(struct xpc_heartbeat_uv));
-	if (ret != xpSuccess)
-		return ret;
+	if (part_uv->remote_act_state != XPC_P_AS_INACTIVE &&
+	    part_uv->remote_act_state != XPC_P_AS_DEACTIVATING) {
 
-	if (part_uv->cached_heartbeat.value == part->last_heartbeat &&
-	    !part_uv->cached_heartbeat.offline) {
+		if (part_uv->heartbeat != part->last_heartbeat ||
+		    (part_uv->flags & XPC_P_HEARTBEAT_OFFLINE_UV)) {
 
-		ret = xpNoHeartbeat;
-	} else {
-		part->last_heartbeat = part_uv->cached_heartbeat.value;
+			part->last_heartbeat = part_uv->heartbeat;
+			ret = xpSuccess;
+		}
 	}
 	return ret;
 }
@@ -872,9 +904,8 @@ xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
 
 	part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */
 	part->remote_rp_ts_jiffies = remote_rp->ts_jiffies;
-	part->sn.uv.heartbeat_gpa = remote_rp->sn.uv.heartbeat_gpa;
 	part->sn.uv.activate_gru_mq_desc_gpa =
-	    remote_rp->sn.uv.activate_gru_mq_desc_gpa;
+	    remote_rp->sn.activate_gru_mq_desc_gpa;
 
 	/*
 	 * ??? Is it a good idea to make this conditional on what is
@@ -882,9 +913,8 @@ xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
 	 */
 	if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) {
 		msg.rp_gpa = uv_gpa(xpc_rsvd_page);
-		msg.heartbeat_gpa = xpc_rsvd_page->sn.uv.heartbeat_gpa;
 		msg.activate_gru_mq_desc_gpa =
-		    xpc_rsvd_page->sn.uv.activate_gru_mq_desc_gpa;
+		    xpc_rsvd_page->sn.activate_gru_mq_desc_gpa;
 		xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
 					   XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV);
 	}
@@ -980,7 +1010,7 @@ xpc_n_of_fifo_entries_uv(struct xpc_fifo_head_uv *head)
  * Setup the channel structures that are uv specific.
  */
 static enum xp_retval
-xpc_setup_ch_structures_uv(struct xpc_partition *part)
+xpc_setup_ch_structures_sn_uv(struct xpc_partition *part)
 {
 	struct xpc_channel_uv *ch_uv;
 	int ch_number;
@@ -999,7 +1029,7 @@ xpc_setup_ch_structures_uv(struct xpc_partition *part)
  * Teardown the channel structures that are uv specific.
  */
 static void
-xpc_teardown_ch_structures_uv(struct xpc_partition *part)
+xpc_teardown_ch_structures_sn_uv(struct xpc_partition *part)
 {
 	/* nothing needs to be done */
 	return;
@@ -1212,16 +1242,6 @@ xpc_send_chctl_openreply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
 				    XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV);
 }
 
-static void
-xpc_send_chctl_opencomplete_uv(struct xpc_channel *ch, unsigned long *irq_flags)
-{
-	struct xpc_activate_mq_msg_chctl_opencomplete_uv msg;
-
-	msg.ch_number = ch->number;
-	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
-				    XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV);
-}
-
 static void
 xpc_send_chctl_local_msgrequest_uv(struct xpc_partition *part, int ch_number)
 {
@@ -1649,67 +1669,58 @@ xpc_received_payload_uv(struct xpc_channel *ch, void *payload)
 	msg->hdr.msg_slot_number += ch->remote_nentries;
 }
 
-static struct xpc_arch_operations xpc_arch_ops_uv = {
-	.setup_partitions = xpc_setup_partitions_uv,
-	.teardown_partitions = xpc_teardown_partitions_uv,
-	.process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv,
-	.get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv,
-	.setup_rsvd_page = xpc_setup_rsvd_page_uv,
-
-	.allow_hb = xpc_allow_hb_uv,
-	.disallow_hb = xpc_disallow_hb_uv,
-	.disallow_all_hbs = xpc_disallow_all_hbs_uv,
-	.increment_heartbeat = xpc_increment_heartbeat_uv,
-	.offline_heartbeat = xpc_offline_heartbeat_uv,
-	.online_heartbeat = xpc_online_heartbeat_uv,
-	.heartbeat_init = xpc_heartbeat_init_uv,
-	.heartbeat_exit = xpc_heartbeat_exit_uv,
-	.get_remote_heartbeat = xpc_get_remote_heartbeat_uv,
-
-	.request_partition_activation =
-		xpc_request_partition_activation_uv,
-	.request_partition_reactivation =
-		xpc_request_partition_reactivation_uv,
-	.request_partition_deactivation =
-		xpc_request_partition_deactivation_uv,
-	.cancel_partition_deactivation_request =
-		xpc_cancel_partition_deactivation_request_uv,
-
-	.setup_ch_structures = xpc_setup_ch_structures_uv,
-	.teardown_ch_structures = xpc_teardown_ch_structures_uv,
-
-	.make_first_contact = xpc_make_first_contact_uv,
-
-	.get_chctl_all_flags = xpc_get_chctl_all_flags_uv,
-	.send_chctl_closerequest = xpc_send_chctl_closerequest_uv,
-	.send_chctl_closereply = xpc_send_chctl_closereply_uv,
-	.send_chctl_openrequest = xpc_send_chctl_openrequest_uv,
-	.send_chctl_openreply = xpc_send_chctl_openreply_uv,
-	.send_chctl_opencomplete = xpc_send_chctl_opencomplete_uv,
-	.process_msg_chctl_flags = xpc_process_msg_chctl_flags_uv,
-
-	.save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_uv,
-
-	.setup_msg_structures = xpc_setup_msg_structures_uv,
-	.teardown_msg_structures = xpc_teardown_msg_structures_uv,
-
-	.indicate_partition_engaged = xpc_indicate_partition_engaged_uv,
-	.indicate_partition_disengaged = xpc_indicate_partition_disengaged_uv,
-	.assume_partition_disengaged = xpc_assume_partition_disengaged_uv,
-	.partition_engaged = xpc_partition_engaged_uv,
-	.any_partition_engaged = xpc_any_partition_engaged_uv,
-
-	.n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_uv,
-	.send_payload = xpc_send_payload_uv,
-	.get_deliverable_payload = xpc_get_deliverable_payload_uv,
-	.received_payload = xpc_received_payload_uv,
-	.notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv,
-};
-
 int
 xpc_init_uv(void)
 {
-	xpc_arch_ops = xpc_arch_ops_uv;
+	xpc_setup_partitions_sn = xpc_setup_partitions_sn_uv;
+	xpc_teardown_partitions_sn = xpc_teardown_partitions_sn_uv;
+	xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv;
+	xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv;
+	xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_uv;
+	xpc_increment_heartbeat = xpc_increment_heartbeat_uv;
+	xpc_offline_heartbeat = xpc_offline_heartbeat_uv;
+	xpc_online_heartbeat = xpc_online_heartbeat_uv;
+	xpc_heartbeat_init = xpc_heartbeat_init_uv;
+	xpc_heartbeat_exit = xpc_heartbeat_exit_uv;
+	xpc_get_remote_heartbeat = xpc_get_remote_heartbeat_uv;
+
+	xpc_request_partition_activation = xpc_request_partition_activation_uv;
+	xpc_request_partition_reactivation =
+	    xpc_request_partition_reactivation_uv;
+	xpc_request_partition_deactivation =
+	    xpc_request_partition_deactivation_uv;
+	xpc_cancel_partition_deactivation_request =
+	    xpc_cancel_partition_deactivation_request_uv;
+
+	xpc_setup_ch_structures_sn = xpc_setup_ch_structures_sn_uv;
+	xpc_teardown_ch_structures_sn = xpc_teardown_ch_structures_sn_uv;
+
+	xpc_make_first_contact = xpc_make_first_contact_uv;
+
+	xpc_get_chctl_all_flags = xpc_get_chctl_all_flags_uv;
+	xpc_send_chctl_closerequest = xpc_send_chctl_closerequest_uv;
+	xpc_send_chctl_closereply = xpc_send_chctl_closereply_uv;
+	xpc_send_chctl_openrequest = xpc_send_chctl_openrequest_uv;
+	xpc_send_chctl_openreply = xpc_send_chctl_openreply_uv;
+
+	xpc_save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_uv;
+
+	xpc_setup_msg_structures = xpc_setup_msg_structures_uv;
+	xpc_teardown_msg_structures = xpc_teardown_msg_structures_uv;
+
+	xpc_indicate_partition_engaged = xpc_indicate_partition_engaged_uv;
+	xpc_indicate_partition_disengaged =
+	    xpc_indicate_partition_disengaged_uv;
+	xpc_assume_partition_disengaged = xpc_assume_partition_disengaged_uv;
+	xpc_partition_engaged = xpc_partition_engaged_uv;
+	xpc_any_partition_engaged = xpc_any_partition_engaged_uv;
+
+	xpc_n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_uv;
+	xpc_process_msg_chctl_flags = xpc_process_msg_chctl_flags_uv;
+	xpc_send_payload = xpc_send_payload_uv;
+	xpc_notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv;
+	xpc_get_deliverable_payload = xpc_get_deliverable_payload_uv;
+	xpc_received_payload = xpc_received_payload_uv;
 
 	if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) {
 		dev_err(xpc_part, "xpc_notify_mq_msghdr_uv is larger than %d\n",
diff --git a/trunk/drivers/net/atl1c/atl1c_main.c b/trunk/drivers/net/atl1c/atl1c_main.c
index 83a12125b94e..deb7b53167ee 100644
--- a/trunk/drivers/net/atl1c/atl1c_main.c
+++ b/trunk/drivers/net/atl1c/atl1c_main.c
@@ -2532,8 +2532,8 @@ static int __devinit atl1c_probe(struct pci_dev *pdev,
 	 * various kernel subsystems to support the mechanics required by a
 	 * fixed-high-32-bit system.
 	 */
-	if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) ||
-	    (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)) != 0)) {
+	if ((pci_set_dma_mask(pdev, DMA_32BIT_MASK) != 0) ||
+	    (pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK) != 0)) {
 		dev_err(&pdev->dev, "No usable DMA configuration,aborting\n");
 		goto err_dma;
 	}
diff --git a/trunk/drivers/net/benet/be_main.c b/trunk/drivers/net/benet/be_main.c
index 30d0c81c989e..9b75aa630062 100644
--- a/trunk/drivers/net/benet/be_main.c
+++ b/trunk/drivers/net/benet/be_main.c
@@ -1821,11 +1821,11 @@ static int __devinit be_probe(struct pci_dev *pdev,
 
 	be_msix_enable(adapter);
 
-	status = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	status = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
 	if (!status) {
 		netdev->features |= NETIF_F_HIGHDMA;
 	} else {
-		status = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		status = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
 		if (status) {
 			dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
 			goto free_netdev;
diff --git a/trunk/drivers/net/jme.c b/trunk/drivers/net/jme.c
index 621a7c0c46ba..ece35040288c 100644
--- a/trunk/drivers/net/jme.c
+++ b/trunk/drivers/net/jme.c
@@ -2591,13 +2591,13 @@ static int
 jme_pci_dma64(struct pci_dev *pdev)
 {
 	if (pdev->device == PCI_DEVICE_ID_JMICRON_JMC250 &&
-	    !pci_set_dma_mask(pdev, DMA_BIT_MASK(64)))
-		if (!pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)))
+	    !pci_set_dma_mask(pdev, DMA_64BIT_MASK))
+		if (!pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK))
 			return 1;
 
 	if (pdev->device == PCI_DEVICE_ID_JMICRON_JMC250 &&
-	    !pci_set_dma_mask(pdev, DMA_BIT_MASK(40)))
-		if (!pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)))
+	    !pci_set_dma_mask(pdev, DMA_40BIT_MASK))
+		if (!pci_set_consistent_dma_mask(pdev, DMA_40BIT_MASK))
 			return 1;
 
 	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))
diff --git a/trunk/drivers/net/wireless/ath9k/pci.c b/trunk/drivers/net/wireless/ath9k/pci.c
index 168411d322a2..6dbc58580abb 100644
--- a/trunk/drivers/net/wireless/ath9k/pci.c
+++ b/trunk/drivers/net/wireless/ath9k/pci.c
@@ -93,14 +93,14 @@ static int ath_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (pci_enable_device(pdev))
 		return -EIO;
 
-	ret =  pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	ret =  pci_set_dma_mask(pdev, DMA_32BIT_MASK);
 
 	if (ret) {
 		printk(KERN_ERR "ath9k: 32-bit DMA not available\n");
 		goto bad;
 	}
 
-	ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+	ret = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
 
 	if (ret) {
 		printk(KERN_ERR "ath9k: 32-bit DMA consistent "
diff --git a/trunk/drivers/net/wireless/p54/p54pci.c b/trunk/drivers/net/wireless/p54/p54pci.c
index b1610ea4bb3d..e3569a0a952d 100644
--- a/trunk/drivers/net/wireless/p54/p54pci.c
+++ b/trunk/drivers/net/wireless/p54/p54pci.c
@@ -492,8 +492,8 @@ static int __devinit p54p_probe(struct pci_dev *pdev,
 		goto err_disable_dev;
 	}
 
-	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) ||
-	    pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) {
+	if (pci_set_dma_mask(pdev, DMA_32BIT_MASK) ||
+	    pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK)) {
 		dev_err(&pdev->dev, "No suitable DMA available\n");
 		goto err_free_reg;
 	}
diff --git a/trunk/drivers/scsi/3w-9xxx.c b/trunk/drivers/scsi/3w-9xxx.c
index 8b7983aba8f7..fdb14ec4fd47 100644
--- a/trunk/drivers/scsi/3w-9xxx.c
+++ b/trunk/drivers/scsi/3w-9xxx.c
@@ -2234,10 +2234,10 @@ static int twa_resume(struct pci_dev *pdev)
 	pci_set_master(pdev);
 	pci_try_set_mwi(pdev);
 
-	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))
-	    || pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)))
-		if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32))
-		    || pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) {
+	if (pci_set_dma_mask(pdev, DMA_64BIT_MASK)
+	    || pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK))
+		if (pci_set_dma_mask(pdev, DMA_32BIT_MASK)
+		    || pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK)) {
 			TW_PRINTK(host, TW_DRIVER, 0x40, "Failed to set dma mask during resume");
 			retval = -ENODEV;
 			goto out_disable_device;
diff --git a/trunk/drivers/scsi/aacraid/aachba.c b/trunk/drivers/scsi/aacraid/aachba.c
index 2a889853a106..280261c451d6 100644
--- a/trunk/drivers/scsi/aacraid/aachba.c
+++ b/trunk/drivers/scsi/aacraid/aachba.c
@@ -1378,7 +1378,7 @@ int aac_get_adapter_info(struct aac_dev* dev)
 	if (dev->nondasd_support && !dev->in_reset)
 		printk(KERN_INFO "%s%d: Non-DASD support enabled.\n",dev->name, dev->id);
 
-	if (dma_get_required_mask(&dev->pdev->dev) > DMA_BIT_MASK(32))
+	if (dma_get_required_mask(&dev->pdev->dev) > DMA_32BIT_MASK)
 		dev->needs_dac = 1;
 	dev->dac_support = 0;
 	if ((sizeof(dma_addr_t) > 4) && dev->needs_dac &&
diff --git a/trunk/drivers/scsi/mpt2sas/mpt2sas_base.c b/trunk/drivers/scsi/mpt2sas/mpt2sas_base.c
index a91f5143ceac..52427a8324f5 100644
--- a/trunk/drivers/scsi/mpt2sas/mpt2sas_base.c
+++ b/trunk/drivers/scsi/mpt2sas/mpt2sas_base.c
@@ -855,9 +855,9 @@ _base_config_dma_addressing(struct MPT2SAS_ADAPTER *ioc, struct pci_dev *pdev)
 	if (sizeof(dma_addr_t) > 4) {
 		const uint64_t required_mask =
 		    dma_get_required_mask(&pdev->dev);
-		if ((required_mask > DMA_BIT_MASK(32)) && !pci_set_dma_mask(pdev,
-		    DMA_BIT_MASK(64)) && !pci_set_consistent_dma_mask(pdev,
-		    DMA_BIT_MASK(64))) {
+		if ((required_mask > DMA_32BIT_MASK) && !pci_set_dma_mask(pdev,
+		    DMA_64BIT_MASK) && !pci_set_consistent_dma_mask(pdev,
+		    DMA_64BIT_MASK)) {
 			ioc->base_add_sg_single = &_base_add_sg_single_64;
 			ioc->sge_size = sizeof(Mpi2SGESimple64_t);
 			desc = "64";
@@ -865,8 +865,8 @@ _base_config_dma_addressing(struct MPT2SAS_ADAPTER *ioc, struct pci_dev *pdev)
 		}
 	}
 
-	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))
-	    && !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) {
+	if (!pci_set_dma_mask(pdev, DMA_32BIT_MASK)
+	    && !pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK)) {
 		ioc->base_add_sg_single = &_base_add_sg_single_32;
 		ioc->sge_size = sizeof(Mpi2SGESimple32_t);
 		desc = "32";
diff --git a/trunk/drivers/spi/spi.c b/trunk/drivers/spi/spi.c
index 8eba98c8ed1e..643908b74bc0 100644
--- a/trunk/drivers/spi/spi.c
+++ b/trunk/drivers/spi/spi.c
@@ -658,7 +658,7 @@ int spi_write_then_read(struct spi_device *spi,
 
 	int			status;
 	struct spi_message	message;
-	struct spi_transfer	x[2];
+	struct spi_transfer	x;
 	u8			*local_buf;
 
 	/* Use preallocated DMA-safe buffer.  We can't avoid copying here,
@@ -669,15 +669,9 @@ int spi_write_then_read(struct spi_device *spi,
 		return -EINVAL;
 
 	spi_message_init(&message);
-	memset(x, 0, sizeof x);
-	if (n_tx) {
-		x[0].len = n_tx;
-		spi_message_add_tail(&x[0], &message);
-	}
-	if (n_rx) {
-		x[1].len = n_rx;
-		spi_message_add_tail(&x[1], &message);
-	}
+	memset(&x, 0, sizeof x);
+	x.len = n_tx + n_rx;
+	spi_message_add_tail(&x, &message);
 
 	/* ... unless someone else is using the pre-allocated buffer */
 	if (!mutex_trylock(&lock)) {
@@ -688,15 +682,15 @@ int spi_write_then_read(struct spi_device *spi,
 		local_buf = buf;
 
 	memcpy(local_buf, txbuf, n_tx);
-	x[0].tx_buf = local_buf;
-	x[1].rx_buf = local_buf + n_tx;
+	x.tx_buf = local_buf;
+	x.rx_buf = local_buf;
 
 	/* do the i/o */
 	status = spi_sync(spi, &message);
 	if (status == 0)
-		memcpy(rxbuf, x[1].rx_buf, n_rx);
+		memcpy(rxbuf, x.rx_buf + n_tx, n_rx);
 
-	if (x[0].tx_buf == buf)
+	if (x.tx_buf == buf)
 		mutex_unlock(&lock);
 	else
 		kfree(local_buf);
diff --git a/trunk/drivers/staging/b3dfg/b3dfg.c b/trunk/drivers/staging/b3dfg/b3dfg.c
index 75ebe338c6f2..0348072b3ab5 100644
--- a/trunk/drivers/staging/b3dfg/b3dfg.c
+++ b/trunk/drivers/staging/b3dfg/b3dfg.c
@@ -1000,7 +1000,7 @@ static int __devinit b3dfg_probe(struct pci_dev *pdev,
 
 	pci_set_master(pdev);
 
-	r = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	r = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
 	if (r) {
 		dev_err(&pdev->dev, "no usable DMA configuration\n");
 		goto err_free_res;
diff --git a/trunk/drivers/usb/otg/nop-usb-xceiv.c b/trunk/drivers/usb/otg/nop-usb-xceiv.c
index c567168f89af..4b933f646f2e 100644
--- a/trunk/drivers/usb/otg/nop-usb-xceiv.c
+++ b/trunk/drivers/usb/otg/nop-usb-xceiv.c
@@ -36,14 +36,14 @@ struct nop_usb_xceiv {
 	struct device		*dev;
 };
 
-static u64 nop_xceiv_dmamask = DMA_BIT_MASK(32);
+static u64 nop_xceiv_dmamask = DMA_32BIT_MASK;
 
 static struct platform_device nop_xceiv_device = {
 	.name           = "nop_usb_xceiv",
 	.id             = -1,
 	.dev = {
 		.dma_mask               = &nop_xceiv_dmamask,
-		.coherent_dma_mask      = DMA_BIT_MASK(32),
+		.coherent_dma_mask      = DMA_32BIT_MASK,
 		.platform_data          = NULL,
 	},
 };
diff --git a/trunk/drivers/video/aty/radeon_base.c b/trunk/drivers/video/aty/radeon_base.c
index 6c37e8ee5efe..16bb7e3c0310 100644
--- a/trunk/drivers/video/aty/radeon_base.c
+++ b/trunk/drivers/video/aty/radeon_base.c
@@ -698,8 +698,8 @@ static void __devinit radeon_get_pllinfo(struct radeonfb_info *rinfo)
 found:
 	/*
 	 * Some methods fail to retrieve SCLK and MCLK values, we apply default
-	 * settings in this case (200Mhz). If that really happens often, we
-	 * could fetch from registers instead...
+	 * settings in this case (200Mhz). If that really happne often, we could
+	 * fetch from registers instead...
 	 */
 	if (rinfo->pll.mclk == 0)
 		rinfo->pll.mclk = 20000;
diff --git a/trunk/drivers/video/backlight/backlight.c b/trunk/drivers/video/backlight/backlight.c
index 157057c79ca3..dd37cbcaf8ce 100644
--- a/trunk/drivers/video/backlight/backlight.c
+++ b/trunk/drivers/video/backlight/backlight.c
@@ -35,6 +35,8 @@ static int fb_notifier_callback(struct notifier_block *self,
 		return 0;
 
 	bd = container_of(self, struct backlight_device, fb_notif);
+	if (!lock_fb_info(evdata->info))
+		return -ENODEV;
 	mutex_lock(&bd->ops_lock);
 	if (bd->ops)
 		if (!bd->ops->check_fb ||
@@ -47,6 +49,7 @@ static int fb_notifier_callback(struct notifier_block *self,
 			backlight_update_status(bd);
 		}
 	mutex_unlock(&bd->ops_lock);
+	unlock_fb_info(evdata->info);
 	return 0;
 }
 
diff --git a/trunk/drivers/video/backlight/lcd.c b/trunk/drivers/video/backlight/lcd.c
index b6449470106c..0bb13df0fa89 100644
--- a/trunk/drivers/video/backlight/lcd.c
+++ b/trunk/drivers/video/backlight/lcd.c
@@ -40,6 +40,8 @@ static int fb_notifier_callback(struct notifier_block *self,
 	if (!ld->ops)
 		return 0;
 
+	if (!lock_fb_info(evdata->info))
+		return -ENODEV;
 	mutex_lock(&ld->ops_lock);
 	if (!ld->ops->check_fb || ld->ops->check_fb(ld, evdata->info)) {
 		if (event == FB_EVENT_BLANK) {
@@ -51,6 +53,7 @@ static int fb_notifier_callback(struct notifier_block *self,
 		}
 	}
 	mutex_unlock(&ld->ops_lock);
+	unlock_fb_info(evdata->info);
 	return 0;
 }
 
diff --git a/trunk/drivers/video/cirrusfb.c b/trunk/drivers/video/cirrusfb.c
index 4c2bf923418c..d42e385f091c 100644
--- a/trunk/drivers/video/cirrusfb.c
+++ b/trunk/drivers/video/cirrusfb.c
@@ -567,7 +567,9 @@ static int cirrusfb_check_var(struct fb_var_screeninfo *var,
 	default:
 		dev_dbg(info->device,
 			"Unsupported bpp size: %d\n", var->bits_per_pixel);
-		return -EINVAL;
+		assert(false);
+		/* should never occur */
+		break;
 	}
 
 	if (var->xres_virtual < var->xres)
diff --git a/trunk/drivers/video/console/fbcon.c b/trunk/drivers/video/console/fbcon.c
index 471a9a60376a..2cd500a304f2 100644
--- a/trunk/drivers/video/console/fbcon.c
+++ b/trunk/drivers/video/console/fbcon.c
@@ -2263,12 +2263,9 @@ static void fbcon_generic_blank(struct vc_data *vc, struct fb_info *info,
 	}
 
 
-	if (!lock_fb_info(info))
-		return;
 	event.info = info;
 	event.data = &blank;
 	fb_notifier_call_chain(FB_EVENT_CONBLANK, &event);
-	unlock_fb_info(info);
 }
 
 static int fbcon_blank(struct vc_data *vc, int blank, int mode_switch)
@@ -2959,6 +2956,8 @@ static int fbcon_fb_unregistered(struct fb_info *info)
 {
 	int i, idx;
 
+	if (!lock_fb_info(info))
+		return -ENODEV;
 	idx = info->node;
 	for (i = first_fb_vc; i <= last_fb_vc; i++) {
 		if (con2fb_map[i] == idx)
@@ -2986,6 +2985,8 @@ static int fbcon_fb_unregistered(struct fb_info *info)
 	if (primary_device == idx)
 		primary_device = -1;
 
+	unlock_fb_info(info);
+
 	if (!num_registered_fb)
 		unregister_con_driver(&fb_con);
 
@@ -3026,8 +3027,11 @@ static int fbcon_fb_registered(struct fb_info *info)
 {
 	int ret = 0, i, idx;
 
+	if (!lock_fb_info(info))
+		return -ENODEV;
 	idx = info->node;
 	fbcon_select_primary(info);
+	unlock_fb_info(info);
 
 	if (info_idx == -1) {
 		for (i = first_fb_vc; i <= last_fb_vc; i++) {
@@ -3148,23 +3152,53 @@ static int fbcon_event_notify(struct notifier_block *self,
 
 	switch(action) {
 	case FB_EVENT_SUSPEND:
+		if (!lock_fb_info(info)) {
+			ret = -ENODEV;
+			goto done;
+		}
 		fbcon_suspended(info);
+		unlock_fb_info(info);
 		break;
 	case FB_EVENT_RESUME:
+		if (!lock_fb_info(info)) {
+			ret = -ENODEV;
+			goto done;
+		}
 		fbcon_resumed(info);
+		unlock_fb_info(info);
 		break;
 	case FB_EVENT_MODE_CHANGE:
+		if (!lock_fb_info(info)) {
+			ret = -ENODEV;
+			goto done;
+		}
 		fbcon_modechanged(info);
+		unlock_fb_info(info);
 		break;
 	case FB_EVENT_MODE_CHANGE_ALL:
+		if (!lock_fb_info(info)) {
+			ret = -ENODEV;
+			goto done;
+		}
 		fbcon_set_all_vcs(info);
+		unlock_fb_info(info);
 		break;
 	case FB_EVENT_MODE_DELETE:
 		mode = event->data;
+		if (!lock_fb_info(info)) {
+			ret = -ENODEV;
+			goto done;
+		}
 		ret = fbcon_mode_deleted(info, mode);
+		unlock_fb_info(info);
 		break;
 	case FB_EVENT_FB_UNBIND:
+		if (!lock_fb_info(info)) {
+			ret = -ENODEV;
+			goto done;
+		}
 		idx = info->node;
+		unlock_fb_info(info);
 		ret = fbcon_fb_unbind(idx);
 		break;
 	case FB_EVENT_FB_REGISTERED:
@@ -3183,14 +3217,29 @@ static int fbcon_event_notify(struct notifier_block *self,
 		con2fb->framebuffer = con2fb_map[con2fb->console - 1];
 		break;
 	case FB_EVENT_BLANK:
+		if (!lock_fb_info(info)) {
+			ret = -ENODEV;
+			goto done;
+		}
 		fbcon_fb_blanked(info, *(int *)event->data);
+		unlock_fb_info(info);
 		break;
 	case FB_EVENT_NEW_MODELIST:
+		if (!lock_fb_info(info)) {
+			ret = -ENODEV;
+			goto done;
+		}
 		fbcon_new_modelist(info);
+		unlock_fb_info(info);
 		break;
 	case FB_EVENT_GET_REQ:
 		caps = event->data;
+		if (!lock_fb_info(info)) {
+			ret = -ENODEV;
+			goto done;
+		}
 		fbcon_get_requirement(info, caps);
+		unlock_fb_info(info);
 		break;
 	}
 done:
diff --git a/trunk/drivers/video/efifb.c b/trunk/drivers/video/efifb.c
index 8dea2bc92705..0c5b9a9fd56f 100644
--- a/trunk/drivers/video/efifb.c
+++ b/trunk/drivers/video/efifb.c
@@ -210,15 +210,12 @@ static int __init efifb_probe(struct platform_device *dev)
 	unsigned int size_total;
 	int request_succeeded = 0;
 
+	printk(KERN_INFO "efifb: probing for efifb\n");
+
 	if (!screen_info.lfb_depth)
 		screen_info.lfb_depth = 32;
 	if (!screen_info.pages)
 		screen_info.pages = 1;
-	if (!screen_info.lfb_base) {
-		printk(KERN_DEBUG "efifb: invalid framebuffer address\n");
-		return -ENODEV;
-	}
-	printk(KERN_INFO "efifb: probing for efifb\n");
 
 	/* just assume they're all unset if any are */
 	if (!screen_info.blue_size) {
diff --git a/trunk/drivers/video/fbmem.c b/trunk/drivers/video/fbmem.c
index d412a1ddc12f..2ac32e6b5953 100644
--- a/trunk/drivers/video/fbmem.c
+++ b/trunk/drivers/video/fbmem.c
@@ -1097,11 +1097,8 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd,
 			return -EINVAL;
 		con2fb.framebuffer = -1;
 		event.data = &con2fb;
-		if (!lock_fb_info(info))
-			return -ENODEV;
 		event.info = info;
 		fb_notifier_call_chain(FB_EVENT_GET_CONSOLE_MAP, &event);
-		unlock_fb_info(info);
 		ret = copy_to_user(argp, &con2fb, sizeof(con2fb)) ? -EFAULT : 0;
 		break;
 	case FBIOPUT_CON2FBMAP:
@@ -1118,11 +1115,8 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd,
 			break;
 		}
 		event.data = &con2fb;
-		if (!lock_fb_info(info))
-			return -ENODEV;
 		event.info = info;
 		ret = fb_notifier_call_chain(FB_EVENT_SET_CONSOLE_MAP, &event);
-		unlock_fb_info(info);
 		break;
 	case FBIOBLANK:
 		if (!lock_fb_info(info))
@@ -1527,10 +1521,7 @@ register_framebuffer(struct fb_info *fb_info)
 	registered_fb[i] = fb_info;
 
 	event.info = fb_info;
-	if (!lock_fb_info(fb_info))
-		return -ENODEV;
 	fb_notifier_call_chain(FB_EVENT_FB_REGISTERED, &event);
-	unlock_fb_info(fb_info);
 	return 0;
 }
 
@@ -1564,12 +1555,8 @@ unregister_framebuffer(struct fb_info *fb_info)
 		goto done;
 	}
 
-
-	if (!lock_fb_info(fb_info))
-		return -ENODEV;
 	event.info = fb_info;
 	ret = fb_notifier_call_chain(FB_EVENT_FB_UNBIND, &event);
-	unlock_fb_info(fb_info);
 
 	if (ret) {
 		ret = -EINVAL;
@@ -1603,8 +1590,6 @@ void fb_set_suspend(struct fb_info *info, int state)
 {
 	struct fb_event event;
 
-	if (!lock_fb_info(info))
-		return;
 	event.info = info;
 	if (state) {
 		fb_notifier_call_chain(FB_EVENT_SUSPEND, &event);
@@ -1613,7 +1598,6 @@ void fb_set_suspend(struct fb_info *info, int state)
 		info->state = FBINFO_STATE_RUNNING;
 		fb_notifier_call_chain(FB_EVENT_RESUME, &event);
 	}
-	unlock_fb_info(info);
 }
 
 /**
@@ -1683,11 +1667,8 @@ int fb_new_modelist(struct fb_info *info)
 	err = 1;
 
 	if (!list_empty(&info->modelist)) {
-		if (!lock_fb_info(info))
-			return -ENODEV;
 		event.info = info;
 		err = fb_notifier_call_chain(FB_EVENT_NEW_MODELIST, &event);
-		unlock_fb_info(info);
 	}
 
 	return err;
diff --git a/trunk/drivers/video/intelfb/intelfb.h b/trunk/drivers/video/intelfb/intelfb.h
index 40984551c927..a50bea614804 100644
--- a/trunk/drivers/video/intelfb/intelfb.h
+++ b/trunk/drivers/video/intelfb/intelfb.h
@@ -53,7 +53,6 @@
 #define PCI_DEVICE_ID_INTEL_830M	0x3577
 #define PCI_DEVICE_ID_INTEL_845G	0x2562
 #define PCI_DEVICE_ID_INTEL_85XGM	0x3582
-#define PCI_DEVICE_ID_INTEL_854		0x358E
 #define PCI_DEVICE_ID_INTEL_865G	0x2572
 #define PCI_DEVICE_ID_INTEL_915G	0x2582
 #define PCI_DEVICE_ID_INTEL_915GM	0x2592
@@ -155,7 +154,6 @@ enum intel_chips {
 	INTEL_85XGM,
 	INTEL_852GM,
 	INTEL_852GME,
-	INTEL_854,
 	INTEL_855GM,
 	INTEL_855GME,
 	INTEL_865G,
diff --git a/trunk/drivers/video/intelfb/intelfb_i2c.c b/trunk/drivers/video/intelfb/intelfb_i2c.c
index 487f2be47460..b3065492bb20 100644
--- a/trunk/drivers/video/intelfb/intelfb_i2c.c
+++ b/trunk/drivers/video/intelfb/intelfb_i2c.c
@@ -156,7 +156,6 @@ void intelfb_create_i2c_busses(struct intelfb_info *dinfo)
 	switch(dinfo->chipset) {
 	case INTEL_830M:
 	case INTEL_845G:
-	case INTEL_854:
 	case INTEL_855GM:
 	case INTEL_865G:
 		dinfo->output[i].type = INTELFB_OUTPUT_DVO;
diff --git a/trunk/drivers/video/intelfb/intelfbdrv.c b/trunk/drivers/video/intelfb/intelfbdrv.c
index ace14fe02fc4..6d8e5415c809 100644
--- a/trunk/drivers/video/intelfb/intelfbdrv.c
+++ b/trunk/drivers/video/intelfb/intelfbdrv.c
@@ -182,7 +182,6 @@ static struct pci_device_id intelfb_pci_table[] __devinitdata = {
 	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_845G, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_845G },
 	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_85XGM, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_85XGM },
 	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_865G, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_865G },
-	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_854, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_854 },
 	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_915G, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_915G },
 	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_915GM, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_915GM },
 	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_945G, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_945G },
diff --git a/trunk/drivers/video/intelfb/intelfbhw.c b/trunk/drivers/video/intelfb/intelfbhw.c
index 0689f97c5238..8b26b27c2db6 100644
--- a/trunk/drivers/video/intelfb/intelfbhw.c
+++ b/trunk/drivers/video/intelfb/intelfbhw.c
@@ -84,11 +84,6 @@ int intelfbhw_get_chipset(struct pci_dev *pdev, struct intelfb_info *dinfo)
 		dinfo->mobile = 0;
 		dinfo->pll_index = PLLS_I8xx;
 		return 0;
-	case PCI_DEVICE_ID_INTEL_854:
-		dinfo->mobile = 1;
-		dinfo->name = "Intel(R) 854";
-		dinfo->chipset = INTEL_854;
-		return 0;
 	case PCI_DEVICE_ID_INTEL_85XGM:
 		tmp = 0;
 		dinfo->mobile = 1;
diff --git a/trunk/drivers/video/s3fb.c b/trunk/drivers/video/s3fb.c
index c3fad34309ed..4dcec48a1d78 100644
--- a/trunk/drivers/video/s3fb.c
+++ b/trunk/drivers/video/s3fb.c
@@ -45,11 +45,11 @@ struct s3fb_info {
 static const struct svga_fb_format s3fb_formats[] = {
 	{ 0,  {0, 6, 0},  {0, 6, 0},  {0, 6, 0}, {0, 0, 0}, 0,
 		FB_TYPE_TEXT, FB_AUX_TEXT_SVGA_STEP4,	FB_VISUAL_PSEUDOCOLOR, 8, 16},
-	{ 4,  {0, 4, 0},  {0, 4, 0},  {0, 4, 0}, {0, 0, 0}, 0,
+	{ 4,  {0, 6, 0},  {0, 6, 0},  {0, 6, 0}, {0, 0, 0}, 0,
 		FB_TYPE_PACKED_PIXELS, 0,		FB_VISUAL_PSEUDOCOLOR, 8, 16},
-	{ 4,  {0, 4, 0},  {0, 4, 0},  {0, 4, 0}, {0, 0, 0}, 1,
+	{ 4,  {0, 6, 0},  {0, 6, 0},  {0, 6, 0}, {0, 0, 0}, 1,
 		FB_TYPE_INTERLEAVED_PLANES, 1,		FB_VISUAL_PSEUDOCOLOR, 8, 16},
-	{ 8,  {0, 8, 0},  {0, 8, 0},  {0, 8, 0}, {0, 0, 0}, 0,
+	{ 8,  {0, 6, 0},  {0, 6, 0},  {0, 6, 0}, {0, 0, 0}, 0,
 		FB_TYPE_PACKED_PIXELS, 0,		FB_VISUAL_PSEUDOCOLOR, 4, 8},
 	{16,  {10, 5, 0}, {5, 5, 0},  {0, 5, 0}, {0, 0, 0}, 0,
 		FB_TYPE_PACKED_PIXELS, 0,		FB_VISUAL_TRUECOLOR, 2, 4},
diff --git a/trunk/drivers/video/sa1100fb.c b/trunk/drivers/video/sa1100fb.c
index 10ddad8e17d6..fad58cf9ef73 100644
--- a/trunk/drivers/video/sa1100fb.c
+++ b/trunk/drivers/video/sa1100fb.c
@@ -199,20 +199,16 @@
 extern void (*sa1100fb_backlight_power)(int on);
 extern void (*sa1100fb_lcd_power)(int on);
 
-static struct sa1100fb_rgb rgb_4 = {
+/*
+ * IMHO this looks wrong.  In 8BPP, length should be 8.
+ */
+static struct sa1100fb_rgb rgb_8 = {
 	.red	= { .offset = 0,  .length = 4, },
 	.green	= { .offset = 0,  .length = 4, },
 	.blue	= { .offset = 0,  .length = 4, },
 	.transp	= { .offset = 0,  .length = 0, },
 };
 
-static struct sa1100fb_rgb rgb_8 = {
-	.red	= { .offset = 0,  .length = 8, },
-	.green	= { .offset = 0,  .length = 8, },
-	.blue	= { .offset = 0,  .length = 8, },
-	.transp	= { .offset = 0,  .length = 0, },
-};
-
 static struct sa1100fb_rgb def_rgb_16 = {
 	.red	= { .offset = 11, .length = 5, },
 	.green	= { .offset = 5,  .length = 6, },
@@ -617,7 +613,7 @@ sa1100fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 	DPRINTK("var->bits_per_pixel=%d\n", var->bits_per_pixel);
 	switch (var->bits_per_pixel) {
 	case 4:
-		rgbidx = RGB_4;
+		rgbidx = RGB_8;
 		break;
 	case 8:
 		rgbidx = RGB_8;
@@ -1386,7 +1382,6 @@ static struct sa1100fb_info * __init sa1100fb_init_fbinfo(struct device *dev)
 	fbi->fb.monspecs	= monspecs;
 	fbi->fb.pseudo_palette	= (fbi + 1);
 
-	fbi->rgb[RGB_4]		= &rgb_4;
 	fbi->rgb[RGB_8]		= &rgb_8;
 	fbi->rgb[RGB_16]	= &def_rgb_16;
 
diff --git a/trunk/drivers/video/sa1100fb.h b/trunk/drivers/video/sa1100fb.h
index 1c3b459865d8..86831db9a042 100644
--- a/trunk/drivers/video/sa1100fb.h
+++ b/trunk/drivers/video/sa1100fb.h
@@ -57,10 +57,9 @@ struct sa1100fb_lcd_reg {
 	unsigned long lccr3;
 };
 
-#define RGB_4	(0)
-#define RGB_8	(1)
-#define RGB_16	(2)
-#define NR_RGB	3
+#define RGB_8	(0)
+#define RGB_16	(1)
+#define NR_RGB	2
 
 struct sa1100fb_info {
 	struct fb_info		fb;
diff --git a/trunk/drivers/video/sis/sis_main.c b/trunk/drivers/video/sis/sis_main.c
index 7e17ee95a97a..346d6458cf76 100644
--- a/trunk/drivers/video/sis/sis_main.c
+++ b/trunk/drivers/video/sis/sis_main.c
@@ -1129,7 +1129,7 @@ sisfb_bpp_to_var(struct sis_video_info *ivideo, struct fb_var_screeninfo *var)
 	switch(var->bits_per_pixel) {
 	case 8:
 		var->red.offset = var->green.offset = var->blue.offset = 0;
-		var->red.length = var->green.length = var->blue.length = 8;
+		var->red.length = var->green.length = var->blue.length = 6;
 		break;
 	case 16:
 		var->red.offset = 11;
diff --git a/trunk/drivers/video/skeletonfb.c b/trunk/drivers/video/skeletonfb.c
index 89158bc71da2..a439159204a8 100644
--- a/trunk/drivers/video/skeletonfb.c
+++ b/trunk/drivers/video/skeletonfb.c
@@ -308,11 +308,9 @@ static int xxxfb_setcolreg(unsigned regno, unsigned red, unsigned green,
      *   color depth = SUM(var->{color}.length)
      *
      * Pseudocolor:
-     *    var->{color}.offset is 0 unless the palette index takes less than
-     *                        bits_per_pixel bits and is stored in the upper
-     *                        bits of the pixel value
-     *    var->{color}.length is set so that 1 << length is the number of
-     *                        available palette entries
+     *    var->{color}.offset is 0
+     *    var->{color}.length contains width of DAC or the number of unique
+     *                        colors available (color depth)
      *    pseudo_palette is not used
      *    RAMDAC[X] is programmed to (red, green, blue)
      *    color depth = var->{color}.length
diff --git a/trunk/drivers/video/uvesafb.c b/trunk/drivers/video/uvesafb.c
index 421770b5e6ab..0b370aebdbfd 100644
--- a/trunk/drivers/video/uvesafb.c
+++ b/trunk/drivers/video/uvesafb.c
@@ -55,7 +55,6 @@ static u16 maxvf	__devinitdata; /* maximum vertical frequency */
 static u16 maxhf	__devinitdata; /* maximum horizontal frequency */
 static u16 vbemode	__devinitdata; /* force use of a specific VBE mode */
 static char *mode_option __devinitdata;
-static u8  dac_width	= 6;
 
 static struct uvesafb_ktask *uvfb_tasks[UVESAFB_TASKS_MAX];
 static DEFINE_MUTEX(uvfb_lock);
@@ -304,10 +303,22 @@ static void uvesafb_setup_var(struct fb_var_screeninfo *var,
 		var->blue.offset   = 0;
 		var->transp.offset = 0;
 
-		var->red.length    = 8;
-		var->green.length  = 8;
-		var->blue.length   = 8;
-		var->transp.length = 0;
+		/*
+		 * We're assuming that we can switch the DAC to 8 bits. If
+		 * this proves to be incorrect, we'll update the fields
+		 * later in set_par().
+		 */
+		if (par->vbe_ib.capabilities & VBE_CAP_CAN_SWITCH_DAC) {
+			var->red.length    = 8;
+			var->green.length  = 8;
+			var->blue.length   = 8;
+			var->transp.length = 0;
+		} else {
+			var->red.length    = 6;
+			var->green.length  = 6;
+			var->blue.length   = 6;
+			var->transp.length = 0;
+		}
 	}
 }
 
@@ -995,7 +1006,7 @@ static int uvesafb_setcolreg(unsigned regno, unsigned red, unsigned green,
 		struct fb_info *info)
 {
 	struct uvesafb_pal_entry entry;
-	int shift = 16 - dac_width;
+	int shift = 16 - info->var.green.length;
 	int err = 0;
 
 	if (regno >= info->cmap.len)
@@ -1044,7 +1055,7 @@ static int uvesafb_setcolreg(unsigned regno, unsigned red, unsigned green,
 static int uvesafb_setcmap(struct fb_cmap *cmap, struct fb_info *info)
 {
 	struct uvesafb_pal_entry *entries;
-	int shift = 16 - dac_width;
+	int shift = 16 - info->var.green.length;
 	int i, err = 0;
 
 	if (info->var.bits_per_pixel == 8) {
@@ -1306,9 +1317,13 @@ static int uvesafb_set_par(struct fb_info *info)
 		err = uvesafb_exec(task);
 		if (err || (task->t.regs.eax & 0xffff) != 0x004f ||
 		    ((task->t.regs.ebx & 0xff00) >> 8) != 8) {
-			dac_width = 6;
-		} else {
-			dac_width = 8;
+			/*
+			 * We've failed to set the DAC palette format -
+			 * time to correct var.
+			 */
+			info->var.red.length    = 6;
+			info->var.green.length  = 6;
+			info->var.blue.length   = 6;
 		}
 	}
 
diff --git a/trunk/drivers/video/vfb.c b/trunk/drivers/video/vfb.c
index 050d432c7d95..cc919ae46571 100644
--- a/trunk/drivers/video/vfb.c
+++ b/trunk/drivers/video/vfb.c
@@ -318,16 +318,13 @@ static int vfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
 	 *   {hardwarespecific} contains width of RAMDAC
 	 *   cmap[X] is programmed to (X << red.offset) | (X << green.offset) | (X << blue.offset)
 	 *   RAMDAC[X] is programmed to (red, green, blue)
-	 *
+	 * 
 	 * Pseudocolor:
-	 *    var->{color}.offset is 0 unless the palette index takes less than
-	 *                        bits_per_pixel bits and is stored in the upper
-	 *                        bits of the pixel value
-	 *    var->{color}.length is set so that 1 << length is the number of available
-	 *                        palette entries
+	 *    uses offset = 0 && length = RAMDAC register width.
+	 *    var->{color}.offset is 0
+	 *    var->{color}.length contains widht of DAC
 	 *    cmap is not used
 	 *    RAMDAC[X] is programmed to (red, green, blue)
-	 *
 	 * Truecolor:
 	 *    does not use DAC. Usually 3 are present.
 	 *    var->{color}.offset contains start of bitfield
diff --git a/trunk/drivers/xen/cpu_hotplug.c b/trunk/drivers/xen/cpu_hotplug.c
index bdfd584ad853..5f54c01c1568 100644
--- a/trunk/drivers/xen/cpu_hotplug.c
+++ b/trunk/drivers/xen/cpu_hotplug.c
@@ -21,41 +21,29 @@ static void disable_hotplug_cpu(int cpu)
 	set_cpu_present(cpu, false);
 }
 
-static int vcpu_online(unsigned int cpu)
+static void vcpu_hotplug(unsigned int cpu)
 {
 	int err;
 	char dir[32], state[32];
 
+	if (!cpu_possible(cpu))
+		return;
+
 	sprintf(dir, "cpu/%u", cpu);
 	err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state);
 	if (err != 1) {
 		printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
-		return err;
-	}
-
-	if (strcmp(state, "online") == 0)
-		return 1;
-	else if (strcmp(state, "offline") == 0)
-		return 0;
-
-	printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", state, cpu);
-	return -EINVAL;
-}
-static void vcpu_hotplug(unsigned int cpu)
-{
-	if (!cpu_possible(cpu))
 		return;
+	}
 
-	switch (vcpu_online(cpu)) {
-	case 1:
+	if (strcmp(state, "online") == 0) {
 		enable_hotplug_cpu(cpu);
-		break;
-	case 0:
+	} else if (strcmp(state, "offline") == 0) {
 		(void)cpu_down(cpu);
 		disable_hotplug_cpu(cpu);
-		break;
-	default:
-		break;
+	} else {
+		printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
+		       state, cpu);
 	}
 }
 
@@ -76,20 +64,12 @@ static void handle_vcpu_hotplug_event(struct xenbus_watch *watch,
 static int setup_cpu_watcher(struct notifier_block *notifier,
 			      unsigned long event, void *data)
 {
-	int cpu;
 	static struct xenbus_watch cpu_watch = {
 		.node = "cpu",
 		.callback = handle_vcpu_hotplug_event};
 
 	(void)register_xenbus_watch(&cpu_watch);
 
-	for_each_possible_cpu(cpu) {
-		if (vcpu_online(cpu) == 0) {
-			(void)cpu_down(cpu);
-			cpu_clear(cpu, cpu_present_map);
-		}
-	}
-
 	return NOTIFY_DONE;
 }
 
diff --git a/trunk/drivers/xen/manage.c b/trunk/drivers/xen/manage.c
index 4b5b84837ee1..0d61db1e7b49 100644
--- a/trunk/drivers/xen/manage.c
+++ b/trunk/drivers/xen/manage.c
@@ -62,15 +62,14 @@ static int xen_suspend(void *data)
 	gnttab_resume();
 	xen_mm_unpin_all();
 
+	sysdev_resume();
+
 	if (!*cancelled) {
 		xen_irq_resume();
 		xen_console_resume();
 		xen_timer_resume();
 	}
 
-	sysdev_resume();
-	device_power_up(PMSG_RESUME);
-
 	return 0;
 }
 
diff --git a/trunk/fs/ext2/inode.c b/trunk/fs/ext2/inode.c
index acf678831103..b43b95563663 100644
--- a/trunk/fs/ext2/inode.c
+++ b/trunk/fs/ext2/inode.c
@@ -590,8 +590,9 @@ static int ext2_get_blocks(struct inode *inode,
 
 	if (depth == 0)
 		return (err);
-
+reread:
 	partial = ext2_get_branch(inode, depth, offsets, chain, &err);
+
 	/* Simplest case - block found, no allocation needed */
 	if (!partial) {
 		first_block = le32_to_cpu(chain[depth - 1].key);
@@ -601,16 +602,15 @@ static int ext2_get_blocks(struct inode *inode,
 		while (count < maxblocks && count <= blocks_to_boundary) {
 			ext2_fsblk_t blk;
 
-			if (!verify_chain(chain, chain + depth - 1)) {
+			if (!verify_chain(chain, partial)) {
 				/*
 				 * Indirect block might be removed by
 				 * truncate while we were reading it.
 				 * Handling of that case: forget what we've
 				 * got now, go to reread.
 				 */
-				err = -EAGAIN;
 				count = 0;
-				break;
+				goto changed;
 			}
 			blk = le32_to_cpu(*(chain[depth-1].p + count));
 			if (blk == first_block + count)
@@ -618,8 +618,7 @@ static int ext2_get_blocks(struct inode *inode,
 			else
 				break;
 		}
-		if (err != -EAGAIN)
-			goto got_it;
+		goto got_it;
 	}
 
 	/* Next simple case - plain lookup or failed read of indirect block */
@@ -627,33 +626,6 @@ static int ext2_get_blocks(struct inode *inode,
 		goto cleanup;
 
 	mutex_lock(&ei->truncate_mutex);
-	/*
-	 * If the indirect block is missing while we are reading
-	 * the chain(ext3_get_branch() returns -EAGAIN err), or
-	 * if the chain has been changed after we grab the semaphore,
-	 * (either because another process truncated this branch, or
-	 * another get_block allocated this branch) re-grab the chain to see if
-	 * the request block has been allocated or not.
-	 *
-	 * Since we already block the truncate/other get_block
-	 * at this point, we will have the current copy of the chain when we
-	 * splice the branch into the tree.
-	 */
-	if (err == -EAGAIN || !verify_chain(chain, partial)) {
-		while (partial > chain) {
-			brelse(partial->bh);
-			partial--;
-		}
-		partial = ext2_get_branch(inode, depth, offsets, chain, &err);
-		if (!partial) {
-			count++;
-			mutex_unlock(&ei->truncate_mutex);
-			if (err)
-				goto cleanup;
-			clear_buffer_new(bh_result);
-			goto got_it;
-		}
-	}
 
 	/*
 	 * Okay, we need to do block allocation.  Lazily initialize the block
@@ -711,6 +683,12 @@ static int ext2_get_blocks(struct inode *inode,
 		partial--;
 	}
 	return err;
+changed:
+	while (partial > chain) {
+		brelse(partial->bh);
+		partial--;
+	}
+	goto reread;
 }
 
 int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create)
diff --git a/trunk/fs/hfs/inode.c b/trunk/fs/hfs/inode.c
index a1cbff2b4d99..9435dda8f1e0 100644
--- a/trunk/fs/hfs/inode.c
+++ b/trunk/fs/hfs/inode.c
@@ -70,10 +70,6 @@ static int hfs_releasepage(struct page *page, gfp_t mask)
 		BUG();
 		return 0;
 	}
-
-	if (!tree)
-		return 0;
-
 	if (tree->node_size >= PAGE_CACHE_SIZE) {
 		nidx = page->index >> (tree->node_size_shift - PAGE_CACHE_SHIFT);
 		spin_lock(&tree->hash_lock);
diff --git a/trunk/fs/hfs/mdb.c b/trunk/fs/hfs/mdb.c
index 7b6165f25fbe..36ca2e1a4fa3 100644
--- a/trunk/fs/hfs/mdb.c
+++ b/trunk/fs/hfs/mdb.c
@@ -349,7 +349,6 @@ void hfs_mdb_put(struct super_block *sb)
 	if (HFS_SB(sb)->nls_disk)
 		unload_nls(HFS_SB(sb)->nls_disk);
 
-	free_pages((unsigned long)HFS_SB(sb)->bitmap, PAGE_SIZE < 8192 ? 1 : 0);
 	kfree(HFS_SB(sb));
 	sb->s_fs_info = NULL;
 }
diff --git a/trunk/fs/jbd/revoke.c b/trunk/fs/jbd/revoke.c
index 3e9afc2a91d2..c7bd649bbbdc 100644
--- a/trunk/fs/jbd/revoke.c
+++ b/trunk/fs/jbd/revoke.c
@@ -55,25 +55,6 @@
  *			need do nothing.
  * RevokeValid set, Revoked set:
  *			buffer has been revoked.
- *
- * Locking rules:
- * We keep two hash tables of revoke records. One hashtable belongs to the
- * running transaction (is pointed to by journal->j_revoke), the other one
- * belongs to the committing transaction. Accesses to the second hash table
- * happen only from the kjournald and no other thread touches this table.  Also
- * journal_switch_revoke_table() which switches which hashtable belongs to the
- * running and which to the committing transaction is called only from
- * kjournald. Therefore we need no locks when accessing the hashtable belonging
- * to the committing transaction.
- *
- * All users operating on the hash table belonging to the running transaction
- * have a handle to the transaction. Therefore they are safe from kjournald
- * switching hash tables under them. For operations on the lists of entries in
- * the hash table j_revoke_lock is used.
- *
- * Finally, also replay code uses the hash tables but at this moment noone else
- * can touch them (filesystem isn't mounted yet) and hence no locking is
- * needed.
  */
 
 #ifndef __KERNEL__
@@ -421,6 +402,8 @@ int journal_revoke(handle_t *handle, unsigned long blocknr,
  * the second time we would still have a pending revoke to cancel.  So,
  * do not trust the Revoked bit on buffers unless RevokeValid is also
  * set.
+ *
+ * The caller must have the journal locked.
  */
 int journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
 {
@@ -498,7 +481,10 @@ void journal_switch_revoke_table(journal_t *journal)
 /*
  * Write revoke records to the journal for all entries in the current
  * revoke hash, deleting the entries as we go.
+ *
+ * Called with the journal lock held.
  */
+
 void journal_write_revoke_records(journal_t *journal,
 				  transaction_t *transaction)
 {
diff --git a/trunk/fs/xfs/linux-2.6/xfs_aops.c b/trunk/fs/xfs/linux-2.6/xfs_aops.c
index 7ec89fc05b2b..c13f67300fe7 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_aops.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_aops.c
@@ -152,6 +152,23 @@ xfs_find_bdev_for_inode(
 		return mp->m_ddev_targp->bt_bdev;
 }
 
+/*
+ * Schedule IO completion handling on a xfsdatad if this was
+ * the final hold on this ioend. If we are asked to wait,
+ * flush the workqueue.
+ */
+STATIC void
+xfs_finish_ioend(
+	xfs_ioend_t	*ioend,
+	int		wait)
+{
+	if (atomic_dec_and_test(&ioend->io_remaining)) {
+		queue_work(xfsdatad_workqueue, &ioend->io_work);
+		if (wait)
+			flush_workqueue(xfsdatad_workqueue);
+	}
+}
+
 /*
  * We're now finished for good with this ioend structure.
  * Update the page state via the associated buffer_heads,
@@ -292,27 +309,6 @@ xfs_end_bio_read(
 	xfs_destroy_ioend(ioend);
 }
 
-/*
- * Schedule IO completion handling on a xfsdatad if this was
- * the final hold on this ioend. If we are asked to wait,
- * flush the workqueue.
- */
-STATIC void
-xfs_finish_ioend(
-	xfs_ioend_t	*ioend,
-	int		wait)
-{
-	if (atomic_dec_and_test(&ioend->io_remaining)) {
-		struct workqueue_struct *wq = xfsdatad_workqueue;
-		if (ioend->io_work.func == xfs_end_bio_unwritten)
-			wq = xfsconvertd_workqueue;
-
-		queue_work(wq, &ioend->io_work);
-		if (wait)
-			flush_workqueue(wq);
-	}
-}
-
 /*
  * Allocate and initialise an IO completion structure.
  * We need to track unwritten extent write completion here initially.
diff --git a/trunk/fs/xfs/linux-2.6/xfs_aops.h b/trunk/fs/xfs/linux-2.6/xfs_aops.h
index 221b3e66ceef..1dd528849755 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_aops.h
+++ b/trunk/fs/xfs/linux-2.6/xfs_aops.h
@@ -19,7 +19,6 @@
 #define __XFS_AOPS_H__
 
 extern struct workqueue_struct *xfsdatad_workqueue;
-extern struct workqueue_struct *xfsconvertd_workqueue;
 extern mempool_t *xfs_ioend_pool;
 
 /*
diff --git a/trunk/fs/xfs/linux-2.6/xfs_buf.c b/trunk/fs/xfs/linux-2.6/xfs_buf.c
index e28800a9f2b5..aa1016bb9134 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_buf.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_buf.c
@@ -51,7 +51,6 @@ static struct shrinker xfs_buf_shake = {
 
 static struct workqueue_struct *xfslogd_workqueue;
 struct workqueue_struct *xfsdatad_workqueue;
-struct workqueue_struct *xfsconvertd_workqueue;
 
 #ifdef XFS_BUF_TRACE
 void
@@ -1776,7 +1775,6 @@ xfs_flush_buftarg(
 	xfs_buf_t	*bp, *n;
 	int		pincount = 0;
 
-	xfs_buf_runall_queues(xfsconvertd_workqueue);
 	xfs_buf_runall_queues(xfsdatad_workqueue);
 	xfs_buf_runall_queues(xfslogd_workqueue);
 
@@ -1833,15 +1831,9 @@ xfs_buf_init(void)
 	if (!xfsdatad_workqueue)
 		goto out_destroy_xfslogd_workqueue;
 
-	xfsconvertd_workqueue = create_workqueue("xfsconvertd");
-	if (!xfsconvertd_workqueue)
-		goto out_destroy_xfsdatad_workqueue;
-
 	register_shrinker(&xfs_buf_shake);
 	return 0;
 
- out_destroy_xfsdatad_workqueue:
-	destroy_workqueue(xfsdatad_workqueue);
  out_destroy_xfslogd_workqueue:
 	destroy_workqueue(xfslogd_workqueue);
  out_free_buf_zone:
@@ -1857,7 +1849,6 @@ void
 xfs_buf_terminate(void)
 {
 	unregister_shrinker(&xfs_buf_shake);
-	destroy_workqueue(xfsconvertd_workqueue);
 	destroy_workqueue(xfsdatad_workqueue);
 	destroy_workqueue(xfslogd_workqueue);
 	kmem_zone_destroy(xfs_buf_zone);
diff --git a/trunk/fs/xfs/linux-2.6/xfs_fs_subr.c b/trunk/fs/xfs/linux-2.6/xfs_fs_subr.c
index 08be36d7326c..5aeb77776961 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -74,14 +74,14 @@ xfs_flush_pages(
 
 	if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
 		xfs_iflags_clear(ip, XFS_ITRUNCATED);
-		ret = -filemap_fdatawrite(mapping);
+		ret = filemap_fdatawrite(mapping);
+		if (flags & XFS_B_ASYNC)
+			return -ret;
+		ret2 = filemap_fdatawait(mapping);
+		if (!ret)
+			ret = ret2;
 	}
-	if (flags & XFS_B_ASYNC)
-		return ret;
-	ret2 = xfs_wait_on_pages(ip, first, last);
-	if (!ret)
-		ret = ret2;
-	return ret;
+	return -ret;
 }
 
 int
diff --git a/trunk/fs/xfs/linux-2.6/xfs_lrw.c b/trunk/fs/xfs/linux-2.6/xfs_lrw.c
index 9142192ccbe6..7e90daa0d1d1 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_lrw.c
@@ -751,26 +751,10 @@ xfs_write(
 			goto relock;
 		}
 	} else {
-		int enospc = 0;
-		ssize_t ret2 = 0;
-
-write_retry:
 		xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs,
 				*offset, ioflags);
-		ret2 = generic_file_buffered_write(iocb, iovp, segs,
+		ret = generic_file_buffered_write(iocb, iovp, segs,
 				pos, offset, count, ret);
-		/*
-		 * if we just got an ENOSPC, flush the inode now we
-		 * aren't holding any page locks and retry *once*
-		 */
-		if (ret2 == -ENOSPC && !enospc) {
-			error = xfs_flush_pages(xip, 0, -1, 0, FI_NONE);
-			if (error)
-				goto out_unlock_internal;
-			enospc = 1;
-			goto write_retry;
-		}
-		ret = ret2;
 	}
 
 	current->backing_dev_info = NULL;
diff --git a/trunk/fs/xfs/linux-2.6/xfs_sync.c b/trunk/fs/xfs/linux-2.6/xfs_sync.c
index f7ba76633c29..a608e72fa405 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_sync.c
+++ b/trunk/fs/xfs/linux-2.6/xfs_sync.c
@@ -62,6 +62,12 @@ xfs_sync_inodes_ag(
 	uint32_t	first_index = 0;
 	int		error = 0;
 	int		last_error = 0;
+	int		fflag = XFS_B_ASYNC;
+
+	if (flags & SYNC_DELWRI)
+		fflag = XFS_B_DELWRI;
+	if (flags & SYNC_WAIT)
+		fflag = 0;		/* synchronous overrides all */
 
 	do {
 		struct inode	*inode;
@@ -122,23 +128,11 @@ xfs_sync_inodes_ag(
 		 * If we have to flush data or wait for I/O completion
 		 * we need to hold the iolock.
 		 */
-		if (flags & SYNC_DELWRI) {
-			if (VN_DIRTY(inode)) {
-				if (flags & SYNC_TRYLOCK) {
-					if (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
-						lock_flags |= XFS_IOLOCK_SHARED;
-				} else {
-					xfs_ilock(ip, XFS_IOLOCK_SHARED);
-					lock_flags |= XFS_IOLOCK_SHARED;
-				}
-				if (lock_flags & XFS_IOLOCK_SHARED) {
-					error = xfs_flush_pages(ip, 0, -1,
-							(flags & SYNC_WAIT) ? 0
-								: XFS_B_ASYNC,
-							FI_NONE);
-				}
-			}
-			if (VN_CACHED(inode) && (flags & SYNC_IOWAIT))
+		if ((flags & SYNC_DELWRI) && VN_DIRTY(inode)) {
+			xfs_ilock(ip, XFS_IOLOCK_SHARED);
+			lock_flags |= XFS_IOLOCK_SHARED;
+			error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE);
+			if (flags & SYNC_IOWAIT)
 				xfs_ioend_wait(ip);
 		}
 		xfs_ilock(ip, XFS_ILOCK_SHARED);
@@ -404,17 +398,15 @@ STATIC void
 xfs_syncd_queue_work(
 	struct xfs_mount *mp,
 	void		*data,
-	void		(*syncer)(struct xfs_mount *, void *),
-	struct completion *completion)
+	void		(*syncer)(struct xfs_mount *, void *))
 {
-	struct xfs_sync_work *work;
+	struct bhv_vfs_sync_work *work;
 
-	work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP);
+	work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP);
 	INIT_LIST_HEAD(&work->w_list);
 	work->w_syncer = syncer;
 	work->w_data = data;
 	work->w_mount = mp;
-	work->w_completion = completion;
 	spin_lock(&mp->m_sync_lock);
 	list_add_tail(&work->w_list, &mp->m_sync_list);
 	spin_unlock(&mp->m_sync_lock);
@@ -428,26 +420,49 @@ xfs_syncd_queue_work(
  * heads, looking about for more room...
  */
 STATIC void
-xfs_flush_inodes_work(
+xfs_flush_inode_work(
+	struct xfs_mount *mp,
+	void		*arg)
+{
+	struct inode	*inode = arg;
+	filemap_flush(inode->i_mapping);
+	iput(inode);
+}
+
+void
+xfs_flush_inode(
+	xfs_inode_t	*ip)
+{
+	struct inode	*inode = VFS_I(ip);
+
+	igrab(inode);
+	xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work);
+	delay(msecs_to_jiffies(500));
+}
+
+/*
+ * This is the "bigger hammer" version of xfs_flush_inode_work...
+ * (IOW, "If at first you don't succeed, use a Bigger Hammer").
+ */
+STATIC void
+xfs_flush_device_work(
 	struct xfs_mount *mp,
 	void		*arg)
 {
 	struct inode	*inode = arg;
-	xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK);
-	xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK | SYNC_IOWAIT);
+	sync_blockdev(mp->m_super->s_bdev);
 	iput(inode);
 }
 
 void
-xfs_flush_inodes(
+xfs_flush_device(
 	xfs_inode_t	*ip)
 {
 	struct inode	*inode = VFS_I(ip);
-	DECLARE_COMPLETION_ONSTACK(completion);
 
 	igrab(inode);
-	xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
-	wait_for_completion(&completion);
+	xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work);
+	delay(msecs_to_jiffies(500));
 	xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
 }
 
@@ -482,7 +497,7 @@ xfssyncd(
 {
 	struct xfs_mount	*mp = arg;
 	long			timeleft;
-	xfs_sync_work_t		*work, *n;
+	bhv_vfs_sync_work_t	*work, *n;
 	LIST_HEAD		(tmp);
 
 	set_freezable();
@@ -517,8 +532,6 @@ xfssyncd(
 			list_del(&work->w_list);
 			if (work == &mp->m_sync_work)
 				continue;
-			if (work->w_completion)
-				complete(work->w_completion);
 			kmem_free(work);
 		}
 	}
@@ -532,7 +545,6 @@ xfs_syncd_init(
 {
 	mp->m_sync_work.w_syncer = xfs_sync_worker;
 	mp->m_sync_work.w_mount = mp;
-	mp->m_sync_work.w_completion = NULL;
 	mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
 	if (IS_ERR(mp->m_sync_task))
 		return -PTR_ERR(mp->m_sync_task);
diff --git a/trunk/fs/xfs/linux-2.6/xfs_sync.h b/trunk/fs/xfs/linux-2.6/xfs_sync.h
index 308d5bf6dfbd..04f058c848ae 100644
--- a/trunk/fs/xfs/linux-2.6/xfs_sync.h
+++ b/trunk/fs/xfs/linux-2.6/xfs_sync.h
@@ -21,20 +21,18 @@
 struct xfs_mount;
 struct xfs_perag;
 
-typedef struct xfs_sync_work {
+typedef struct bhv_vfs_sync_work {
 	struct list_head	w_list;
 	struct xfs_mount	*w_mount;
 	void			*w_data;	/* syncer routine argument */
 	void			(*w_syncer)(struct xfs_mount *, void *);
-	struct completion	*w_completion;
-} xfs_sync_work_t;
+} bhv_vfs_sync_work_t;
 
 #define SYNC_ATTR		0x0001	/* sync attributes */
 #define SYNC_DELWRI		0x0002	/* look at delayed writes */
 #define SYNC_WAIT		0x0004	/* wait for i/o to complete */
 #define SYNC_BDFLUSH		0x0008	/* BDFLUSH is calling -- don't block */
 #define SYNC_IOWAIT		0x0010  /* wait for all I/O to complete */
-#define SYNC_TRYLOCK		0x0020  /* only try to lock inodes */
 
 int xfs_syncd_init(struct xfs_mount *mp);
 void xfs_syncd_stop(struct xfs_mount *mp);
@@ -45,7 +43,8 @@ int xfs_sync_fsdata(struct xfs_mount *mp, int flags);
 int xfs_quiesce_data(struct xfs_mount *mp);
 void xfs_quiesce_attr(struct xfs_mount *mp);
 
-void xfs_flush_inodes(struct xfs_inode *ip);
+void xfs_flush_inode(struct xfs_inode *ip);
+void xfs_flush_device(struct xfs_inode *ip);
 
 int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
 int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode);
diff --git a/trunk/fs/xfs/xfs_iget.c b/trunk/fs/xfs/xfs_iget.c
index 89b81eedce6a..478e587087fe 100644
--- a/trunk/fs/xfs/xfs_iget.c
+++ b/trunk/fs/xfs/xfs_iget.c
@@ -69,6 +69,15 @@ xfs_inode_alloc(
 	ASSERT(!spin_is_locked(&ip->i_flags_lock));
 	ASSERT(completion_done(&ip->i_flush));
 
+	/*
+	 * initialise the VFS inode here to get failures
+	 * out of the way early.
+	 */
+	if (!inode_init_always(mp->m_super, VFS_I(ip))) {
+		kmem_zone_free(xfs_inode_zone, ip);
+		return NULL;
+	}
+
 	/* initialise the xfs inode */
 	ip->i_ino = ino;
 	ip->i_mount = mp;
@@ -104,20 +113,6 @@ xfs_inode_alloc(
 #ifdef XFS_DIR2_TRACE
 	ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
 #endif
-	/*
-	* Now initialise the VFS inode. We do this after the xfs_inode
-	* initialisation as internal failures will result in ->destroy_inode
-	* being called and that will pass down through the reclaim path and
-	* free the XFS inode. This path requires the XFS inode to already be
-	* initialised. Hence if this call fails, the xfs_inode has already
-	* been freed and we should not reference it at all in the error
-	* handling.
-	*/
-	if (!inode_init_always(mp->m_super, VFS_I(ip)))
-		return NULL;
-
-	/* prevent anyone from using this yet */
-	VFS_I(ip)->i_state = I_NEW|I_LOCK;
 
 	return ip;
 }
diff --git a/trunk/fs/xfs/xfs_iomap.c b/trunk/fs/xfs/xfs_iomap.c
index 5aaa2d7ec155..08ce72316bfe 100644
--- a/trunk/fs/xfs/xfs_iomap.c
+++ b/trunk/fs/xfs/xfs_iomap.c
@@ -337,6 +337,38 @@ xfs_iomap_eof_align_last_fsb(
 	return 0;
 }
 
+STATIC int
+xfs_flush_space(
+	xfs_inode_t	*ip,
+	int		*fsynced,
+	int		*ioflags)
+{
+	switch (*fsynced) {
+	case 0:
+		if (ip->i_delayed_blks) {
+			xfs_iunlock(ip, XFS_ILOCK_EXCL);
+			xfs_flush_inode(ip);
+			xfs_ilock(ip, XFS_ILOCK_EXCL);
+			*fsynced = 1;
+		} else {
+			*ioflags |= BMAPI_SYNC;
+			*fsynced = 2;
+		}
+		return 0;
+	case 1:
+		*fsynced = 2;
+		*ioflags |= BMAPI_SYNC;
+		return 0;
+	case 2:
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		xfs_flush_device(ip);
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+		*fsynced = 3;
+		return 0;
+	}
+	return 1;
+}
+
 STATIC int
 xfs_cmn_err_fsblock_zero(
 	xfs_inode_t	*ip,
@@ -506,9 +538,15 @@ xfs_iomap_write_direct(
 }
 
 /*
- * If the caller is doing a write at the end of the file, then extend the
- * allocation out to the file system's write iosize.  We clean up any extra
- * space left over when the file is closed in xfs_inactive().
+ * If the caller is doing a write at the end of the file,
+ * then extend the allocation out to the file system's write
+ * iosize.  We clean up any extra space left over when the
+ * file is closed in xfs_inactive().
+ *
+ * For sync writes, we are flushing delayed allocate space to
+ * try to make additional space available for allocation near
+ * the filesystem full boundary - preallocation hurts in that
+ * situation, of course.
  */
 STATIC int
 xfs_iomap_eof_want_preallocate(
@@ -527,7 +565,7 @@ xfs_iomap_eof_want_preallocate(
 	int		n, error, imaps;
 
 	*prealloc = 0;
-	if ((offset + count) <= ip->i_size)
+	if ((ioflag & BMAPI_SYNC) || (offset + count) <= ip->i_size)
 		return 0;
 
 	/*
@@ -573,7 +611,7 @@ xfs_iomap_write_delay(
 	xfs_extlen_t	extsz;
 	int		nimaps;
 	xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
-	int		prealloc, flushed = 0;
+	int		prealloc, fsynced = 0;
 	int		error;
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
@@ -589,12 +627,12 @@ xfs_iomap_write_delay(
 	extsz = xfs_get_extsz_hint(ip);
 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
 
+retry:
 	error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
 				ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
 	if (error)
 		return error;
 
-retry:
 	if (prealloc) {
 		aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
 		ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
@@ -621,22 +659,15 @@ xfs_iomap_write_delay(
 
 	/*
 	 * If bmapi returned us nothing, and if we didn't get back EDQUOT,
-	 * then we must have run out of space - flush all other inodes with
-	 * delalloc blocks and retry without EOF preallocation.
+	 * then we must have run out of space - flush delalloc, and retry..
 	 */
 	if (nimaps == 0) {
 		xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE,
 					ip, offset, count);
-		if (flushed)
+		if (xfs_flush_space(ip, &fsynced, &ioflag))
 			return XFS_ERROR(ENOSPC);
 
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-		xfs_flush_inodes(ip);
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-		flushed = 1;
 		error = 0;
-		prealloc = 0;
 		goto retry;
 	}
 
diff --git a/trunk/fs/xfs/xfs_iomap.h b/trunk/fs/xfs/xfs_iomap.h
index fdcf7b82747f..a1cc1322fc0f 100644
--- a/trunk/fs/xfs/xfs_iomap.h
+++ b/trunk/fs/xfs/xfs_iomap.h
@@ -40,7 +40,8 @@ typedef enum {
 	BMAPI_IGNSTATE = (1 << 4),	/* ignore unwritten state on read */
 	BMAPI_DIRECT = (1 << 5),	/* direct instead of buffered write */
 	BMAPI_MMAP = (1 << 6),		/* allocate for mmap write */
-	BMAPI_TRYLOCK = (1 << 7),	/* non-blocking request */
+	BMAPI_SYNC = (1 << 7),		/* sync write to flush delalloc space */
+	BMAPI_TRYLOCK = (1 << 8),	/* non-blocking request */
 } bmapi_flags_t;
 
 
diff --git a/trunk/fs/xfs/xfs_log.c b/trunk/fs/xfs/xfs_log.c
index 3750f04ede0b..f76c6d7cea21 100644
--- a/trunk/fs/xfs/xfs_log.c
+++ b/trunk/fs/xfs/xfs_log.c
@@ -562,8 +562,9 @@ xfs_log_mount(
 	}
 
 	mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks);
-	if (IS_ERR(mp->m_log)) {
-		error = -PTR_ERR(mp->m_log);
+	if (!mp->m_log) {
+		cmn_err(CE_WARN, "XFS: Log allocation failed: No memory!");
+		error = ENOMEM;
 		goto out;
 	}
 
@@ -1179,13 +1180,10 @@ xlog_alloc_log(xfs_mount_t	*mp,
 	xfs_buf_t		*bp;
 	int			i;
 	int			iclogsize;
-	int			error = ENOMEM;
 
 	log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL);
-	if (!log) {
-		xlog_warn("XFS: Log allocation failed: No memory!");
-		goto out;
-	}
+	if (!log)
+		return NULL;
 
 	log->l_mp	   = mp;
 	log->l_targ	   = log_target;
@@ -1203,35 +1201,19 @@ xlog_alloc_log(xfs_mount_t	*mp,
 	log->l_grant_reserve_cycle = 1;
 	log->l_grant_write_cycle = 1;
 
-	error = EFSCORRUPTED;
 	if (xfs_sb_version_hassector(&mp->m_sb)) {
 		log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT;
-		if (log->l_sectbb_log < 0 ||
-		    log->l_sectbb_log > mp->m_sectbb_log) {
-			xlog_warn("XFS: Log sector size (0x%x) out of range.",
-						log->l_sectbb_log);
-			goto out_free_log;
-		}
-
+		ASSERT(log->l_sectbb_log <= mp->m_sectbb_log);
 		/* for larger sector sizes, must have v2 or external log */
-		if (log->l_sectbb_log != 0 &&
-		    (log->l_logBBstart != 0 &&
-		     !xfs_sb_version_haslogv2(&mp->m_sb))) {
-			xlog_warn("XFS: log sector size (0x%x) invalid "
-				  "for configuration.", log->l_sectbb_log);
-			goto out_free_log;
-		}
-		if (mp->m_sb.sb_logsectlog < BBSHIFT) {
-			xlog_warn("XFS: Log sector log (0x%x) too small.",
-						mp->m_sb.sb_logsectlog);
-			goto out_free_log;
-		}
+		ASSERT(log->l_sectbb_log == 0 ||
+			log->l_logBBstart == 0 ||
+			xfs_sb_version_haslogv2(&mp->m_sb));
+		ASSERT(mp->m_sb.sb_logsectlog >= BBSHIFT);
 	}
 	log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1;
 
 	xlog_get_iclog_buffer_size(mp, log);
 
-	error = ENOMEM;
 	bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp);
 	if (!bp)
 		goto out_free_log;
@@ -1331,8 +1313,7 @@ xlog_alloc_log(xfs_mount_t	*mp,
 	xfs_buf_free(log->l_xbuf);
 out_free_log:
 	kmem_free(log);
-out:
-	return ERR_PTR(-error);
+	return NULL;
 }	/* xlog_alloc_log */
 
 
@@ -2560,19 +2541,18 @@ xlog_grant_log_space(xlog_t	   *log,
 			xlog_ins_ticketq(&log->l_reserve_headq, tic);
 		xlog_trace_loggrant(log, tic,
 				    "xlog_grant_log_space: sleep 2");
-		spin_unlock(&log->l_grant_lock);
-		xlog_grant_push_ail(log->l_mp, need_bytes);
-		spin_lock(&log->l_grant_lock);
-
 		XFS_STATS_INC(xs_sleep_logspace);
 		sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
 
-		spin_lock(&log->l_grant_lock);
-		if (XLOG_FORCED_SHUTDOWN(log))
+		if (XLOG_FORCED_SHUTDOWN(log)) {
+			spin_lock(&log->l_grant_lock);
 			goto error_return;
+		}
 
 		xlog_trace_loggrant(log, tic,
 				    "xlog_grant_log_space: wake 2");
+		xlog_grant_push_ail(log->l_mp, need_bytes);
+		spin_lock(&log->l_grant_lock);
 		goto redo;
 	} else if (tic->t_flags & XLOG_TIC_IN_Q)
 		xlog_del_ticketq(&log->l_reserve_headq, tic);
@@ -2651,7 +2631,7 @@ xlog_regrant_write_log_space(xlog_t	   *log,
 	 * for more free space, otherwise try to get some space for
 	 * this transaction.
 	 */
-	need_bytes = tic->t_unit_res;
+
 	if ((ntic = log->l_write_headq)) {
 		free_bytes = xlog_space_left(log, log->l_grant_write_cycle,
 					     log->l_grant_write_bytes);
@@ -2671,25 +2651,26 @@ xlog_regrant_write_log_space(xlog_t	   *log,
 
 			xlog_trace_loggrant(log, tic,
 				    "xlog_regrant_write_log_space: sleep 1");
-			spin_unlock(&log->l_grant_lock);
-			xlog_grant_push_ail(log->l_mp, need_bytes);
-			spin_lock(&log->l_grant_lock);
-
 			XFS_STATS_INC(xs_sleep_logspace);
 			sv_wait(&tic->t_wait, PINOD|PLTWAIT,
 				&log->l_grant_lock, s);
 
 			/* If we're shutting down, this tic is already
 			 * off the queue */
-			spin_lock(&log->l_grant_lock);
-			if (XLOG_FORCED_SHUTDOWN(log))
+			if (XLOG_FORCED_SHUTDOWN(log)) {
+				spin_lock(&log->l_grant_lock);
 				goto error_return;
+			}
 
 			xlog_trace_loggrant(log, tic,
 				    "xlog_regrant_write_log_space: wake 1");
+			xlog_grant_push_ail(log->l_mp, tic->t_unit_res);
+			spin_lock(&log->l_grant_lock);
 		}
 	}
 
+	need_bytes = tic->t_unit_res;
+
 redo:
 	if (XLOG_FORCED_SHUTDOWN(log))
 		goto error_return;
@@ -2699,20 +2680,19 @@ xlog_regrant_write_log_space(xlog_t	   *log,
 	if (free_bytes < need_bytes) {
 		if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
 			xlog_ins_ticketq(&log->l_write_headq, tic);
-		spin_unlock(&log->l_grant_lock);
-		xlog_grant_push_ail(log->l_mp, need_bytes);
-		spin_lock(&log->l_grant_lock);
-
 		XFS_STATS_INC(xs_sleep_logspace);
 		sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
 
 		/* If we're shutting down, this tic is already off the queue */
-		spin_lock(&log->l_grant_lock);
-		if (XLOG_FORCED_SHUTDOWN(log))
+		if (XLOG_FORCED_SHUTDOWN(log)) {
+			spin_lock(&log->l_grant_lock);
 			goto error_return;
+		}
 
 		xlog_trace_loggrant(log, tic,
 				    "xlog_regrant_write_log_space: wake 2");
+		xlog_grant_push_ail(log->l_mp, need_bytes);
+		spin_lock(&log->l_grant_lock);
 		goto redo;
 	} else if (tic->t_flags & XLOG_TIC_IN_Q)
 		xlog_del_ticketq(&log->l_write_headq, tic);
diff --git a/trunk/fs/xfs/xfs_mount.h b/trunk/fs/xfs/xfs_mount.h
index d6a64392f983..7af44adffc8f 100644
--- a/trunk/fs/xfs/xfs_mount.h
+++ b/trunk/fs/xfs/xfs_mount.h
@@ -313,7 +313,7 @@ typedef struct xfs_mount {
 #endif
 	struct xfs_mru_cache	*m_filestream;  /* per-mount filestream data */
 	struct task_struct	*m_sync_task;	/* generalised sync thread */
-	xfs_sync_work_t		m_sync_work;	/* work item for VFS_SYNC */
+	bhv_vfs_sync_work_t	m_sync_work;	/* work item for VFS_SYNC */
 	struct list_head	m_sync_list;	/* sync thread work item list */
 	spinlock_t		m_sync_lock;	/* work item list lock */
 	int			m_sync_seq;	/* sync thread generation no. */
diff --git a/trunk/fs/xfs/xfs_vnodeops.c b/trunk/fs/xfs/xfs_vnodeops.c
index 19cf90a9c762..7394c7af5de5 100644
--- a/trunk/fs/xfs/xfs_vnodeops.c
+++ b/trunk/fs/xfs/xfs_vnodeops.c
@@ -1457,13 +1457,6 @@ xfs_create(
 	error = xfs_trans_reserve(tp, resblks, log_res, 0,
 			XFS_TRANS_PERM_LOG_RES, log_count);
 	if (error == ENOSPC) {
-		/* flush outstanding delalloc blocks and retry */
-		xfs_flush_inodes(dp);
-		error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0,
-			XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT);
-	}
-	if (error == ENOSPC) {
-		/* No space at all so try a "no-allocation" reservation */
 		resblks = 0;
 		error = xfs_trans_reserve(tp, 0, log_res, 0,
 				XFS_TRANS_PERM_LOG_RES, log_count);
diff --git a/trunk/include/asm-generic/siginfo.h b/trunk/include/asm-generic/siginfo.h
index c840719a8c59..35752dadd6df 100644
--- a/trunk/include/asm-generic/siginfo.h
+++ b/trunk/include/asm-generic/siginfo.h
@@ -201,7 +201,7 @@ typedef struct siginfo {
 #define TRAP_TRACE	(__SI_FAULT|2)	/* process trace trap */
 #define TRAP_BRANCH     (__SI_FAULT|3)  /* process taken branch trap */
 #define TRAP_HWBKPT     (__SI_FAULT|4)  /* hardware breakpoint/watchpoint */
-#define NSIGTRAP	4
+#define NSIGTRAP	2
 
 /*
  * SIGCHLD si_codes
diff --git a/trunk/include/drm/drm_pciids.h b/trunk/include/drm/drm_pciids.h
index 9477af01a639..2df74eb09563 100644
--- a/trunk/include/drm/drm_pciids.h
+++ b/trunk/include/drm/drm_pciids.h
@@ -472,7 +472,6 @@
 	{0x8086, 0x2562, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 	{0x8086, 0x3582, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 	{0x8086, 0x2572, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
-	{0x8086, 0x358e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 	{0, 0, 0}
 
 #define gamma_PCI_IDS \
@@ -534,5 +533,4 @@
 	{0x8086, 0x2e22, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0x8086, 0xa001, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0x8086, 0xa011, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
-	{0x8086, 0x35e8, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0, 0, 0}
diff --git a/trunk/include/linux/fb.h b/trunk/include/linux/fb.h
index 330c4b1bfcaa..f563c5013932 100644
--- a/trunk/include/linux/fb.h
+++ b/trunk/include/linux/fb.h
@@ -173,12 +173,8 @@ struct fb_fix_screeninfo {
 /* Interpretation of offset for color fields: All offsets are from the right,
  * inside a "pixel" value, which is exactly 'bits_per_pixel' wide (means: you
  * can use the offset as right argument to <<). A pixel afterwards is a bit
- * stream and is written to video memory as that unmodified.
- *
- * For pseudocolor: offset and length should be the same for all color
- * components. Offset specifies the position of the least significant bit
- * of the pallette index in a pixel value. Length indicates the number
- * of available palette entries (i.e. # of entries = 1 << length).
+ * stream and is written to video memory as that unmodified. This implies
+ * big-endian byte order if bits_per_pixel is greater than 8.
  */
 struct fb_bitfield {
 	__u32 offset;			/* beginning of bitfield	*/
diff --git a/trunk/include/linux/fiemap.h b/trunk/include/linux/fiemap.h
index 934e22d65801..671decbd2aeb 100644
--- a/trunk/include/linux/fiemap.h
+++ b/trunk/include/linux/fiemap.h
@@ -11,8 +11,6 @@
 #ifndef _LINUX_FIEMAP_H
 #define _LINUX_FIEMAP_H
 
-#include <linux/types.h>
-
 struct fiemap_extent {
 	__u64 fe_logical;  /* logical offset in bytes for the start of
 			    * the extent from the beginning of the file */
diff --git a/trunk/include/linux/init_task.h b/trunk/include/linux/init_task.h
index d87247d2641f..dcfb93337e9a 100644
--- a/trunk/include/linux/init_task.h
+++ b/trunk/include/linux/init_task.h
@@ -15,6 +15,19 @@
 extern struct files_struct init_files;
 extern struct fs_struct init_fs;
 
+#define INIT_KIOCTX(name, which_mm) \
+{							\
+	.users		= ATOMIC_INIT(1),		\
+	.dead		= 0,				\
+	.mm		= &which_mm,			\
+	.user_id	= 0,				\
+	.next		= NULL,				\
+	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER(name.wait), \
+	.ctx_lock	= __SPIN_LOCK_UNLOCKED(name.ctx_lock), \
+	.reqs_active	= 0U,				\
+	.max_reqs	= ~0U,				\
+}
+
 #define INIT_MM(name) \
 {			 					\
 	.mm_rb		= RB_ROOT,				\
diff --git a/trunk/include/linux/pci_ids.h b/trunk/include/linux/pci_ids.h
index 06ba90c211a5..ee98cd570885 100644
--- a/trunk/include/linux/pci_ids.h
+++ b/trunk/include/linux/pci_ids.h
@@ -2514,8 +2514,6 @@
 #define PCI_DEVICE_ID_INTEL_IOAT_TBG3	0x3433
 #define PCI_DEVICE_ID_INTEL_82830_HB	0x3575
 #define PCI_DEVICE_ID_INTEL_82830_CGC	0x3577
-#define PCI_DEVICE_ID_INTEL_82854_HB	0x358c
-#define PCI_DEVICE_ID_INTEL_82854_IG	0x358e
 #define PCI_DEVICE_ID_INTEL_82855GM_HB	0x3580
 #define PCI_DEVICE_ID_INTEL_82855GM_IG	0x3582
 #define PCI_DEVICE_ID_INTEL_E7520_MCH	0x3590
diff --git a/trunk/include/linux/sht15.h b/trunk/include/linux/sht15.h
deleted file mode 100644
index 046bce05ecab..000000000000
--- a/trunk/include/linux/sht15.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * sht15.h - support for the SHT15 Temperature and Humidity Sensor
- *
- * Copyright (c) 2009 Jonathan Cameron
- *
- * Copyright (c) 2007 Wouter Horre
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/**
- * struct sht15_platform_data - sht15 connectivity info
- * @gpio_data:	no. of gpio to which bidirectional data line is connected
- * @gpio_sck:	no. of gpio to which the data clock is connected.
- * @supply_mv:	supply voltage in mv. Overridden by regulator if available.
- **/
-struct sht15_platform_data {
-	int gpio_data;
-	int gpio_sck;
-	int supply_mv;
-};
-
diff --git a/trunk/include/video/cyblafb.h b/trunk/include/video/cyblafb.h
new file mode 100644
index 000000000000..d3c1d4e2c8e3
--- /dev/null
+++ b/trunk/include/video/cyblafb.h
@@ -0,0 +1,175 @@
+
+#ifndef CYBLAFB_DEBUG
+#define CYBLAFB_DEBUG 0
+#endif
+
+#if CYBLAFB_DEBUG
+#define debug(f,a...)	printk("%s:" f,  __func__ , ## a);
+#else
+#define debug(f,a...)
+#endif
+
+#define output(f, a...) printk("cyblafb: " f, ## a)
+
+#define Kb	(1024)
+#define Mb	(Kb*Kb)
+
+/* PCI IDS of supported cards temporarily here */
+
+#define CYBERBLADEi1	0x8500
+
+/* these defines are for 'lcd' variable */
+#define LCD_STRETCH	0
+#define LCD_CENTER	1
+#define LCD_BIOS	2
+
+/* display types */
+#define DISPLAY_CRT	0
+#define DISPLAY_FP	1
+
+#define ROP_S	0xCC
+
+#define point(x,y) ((y)<<16|(x))
+
+//
+// Attribute Regs, ARxx, 3c0/3c1
+//
+#define AR00	0x00
+#define AR01	0x01
+#define AR02	0x02
+#define AR03	0x03
+#define AR04	0x04
+#define AR05	0x05
+#define AR06	0x06
+#define AR07	0x07
+#define AR08	0x08
+#define AR09	0x09
+#define AR0A	0x0A
+#define AR0B	0x0B
+#define AR0C	0x0C
+#define AR0D	0x0D
+#define AR0E	0x0E
+#define AR0F	0x0F
+#define AR10	0x10
+#define AR12	0x12
+#define AR13	0x13
+
+//
+// Sequencer Regs, SRxx, 3c4/3c5
+//
+#define SR00	0x00
+#define SR01	0x01
+#define SR02	0x02
+#define SR03	0x03
+#define SR04	0x04
+#define SR0D	0x0D
+#define SR0E	0x0E
+#define SR11	0x11
+#define SR18	0x18
+#define SR19	0x19
+
+//
+//
+//
+#define CR00	0x00
+#define CR01	0x01
+#define CR02	0x02
+#define CR03	0x03
+#define CR04	0x04
+#define CR05	0x05
+#define CR06	0x06
+#define CR07	0x07
+#define CR08	0x08
+#define CR09	0x09
+#define CR0A	0x0A
+#define CR0B	0x0B
+#define CR0C	0x0C
+#define CR0D	0x0D
+#define CR0E	0x0E
+#define CR0F	0x0F
+#define CR10	0x10
+#define CR11	0x11
+#define CR12	0x12
+#define CR13	0x13
+#define CR14	0x14
+#define CR15	0x15
+#define CR16	0x16
+#define CR17	0x17
+#define CR18	0x18
+#define CR19	0x19
+#define CR1A	0x1A
+#define CR1B	0x1B
+#define CR1C	0x1C
+#define CR1D	0x1D
+#define CR1E	0x1E
+#define CR1F	0x1F
+#define CR20	0x20
+#define CR21	0x21
+#define CR27	0x27
+#define CR29	0x29
+#define CR2A	0x2A
+#define CR2B	0x2B
+#define CR2D	0x2D
+#define CR2F	0x2F
+#define CR36	0x36
+#define CR38	0x38
+#define CR39	0x39
+#define CR3A	0x3A
+#define CR55	0x55
+#define CR56	0x56
+#define CR57	0x57
+#define CR58	0x58
+
+//
+//
+//
+
+#define GR00	0x01
+#define GR01	0x01
+#define GR02	0x02
+#define GR03	0x03
+#define GR04	0x04
+#define GR05	0x05
+#define GR06	0x06
+#define GR07	0x07
+#define GR08	0x08
+#define GR0F	0x0F
+#define GR20	0x20
+#define GR23	0x23
+#define GR2F	0x2F
+#define GR30	0x30
+#define GR31	0x31
+#define GR33	0x33
+#define GR52	0x52
+#define GR53	0x53
+#define GR5D	0x5d
+
+
+//
+// Graphics Engine
+//
+#define GEBase	0x2100		// could be mapped elsewhere if we like it
+#define GE00	(GEBase+0x00)	// source 1, p 111
+#define GE04	(GEBase+0x04)	// source 2, p 111
+#define GE08	(GEBase+0x08)	// destination 1, p 111
+#define GE0C	(GEBase+0x0C)	// destination 2, p 112
+#define GE10	(GEBase+0x10)	// right view base & enable, p 112
+#define GE13	(GEBase+0x13)	// left view base & enable, p 112
+#define GE18	(GEBase+0x18)	// block write start address, p 112
+#define GE1C	(GEBase+0x1C)	// block write end address, p 112
+#define GE20	(GEBase+0x20)	// engine status, p 113
+#define GE24	(GEBase+0x24)	// reset all GE pointers
+#define GE44	(GEBase+0x44)	// command register, p 126
+#define GE48	(GEBase+0x48)	// raster operation, p 127
+#define GE60	(GEBase+0x60)	// foreground color, p 128
+#define GE64	(GEBase+0x64)	// background color, p 128
+#define GE6C	(GEBase+0x6C)	// Pattern and Style, p 129, ok
+#define GE9C	(GEBase+0x9C)	// pixel engine data port, p 125
+#define GEB8	(GEBase+0xB8)	// Destination Stride / Buffer Base 0, p 133
+#define GEBC	(GEBase+0xBC)	// Destination Stride / Buffer Base 1, p 133
+#define GEC0	(GEBase+0xC0)	// Destination Stride / Buffer Base 2, p 133
+#define GEC4	(GEBase+0xC4)	// Destination Stride / Buffer Base 3, p 133
+#define GEC8	(GEBase+0xC8)	// Source Stride / Buffer Base 0, p 133
+#define GECC	(GEBase+0xCC)	// Source Stride / Buffer Base 1, p 133
+#define GED0	(GEBase+0xD0)	// Source Stride / Buffer Base 2, p 133
+#define GED4	(GEBase+0xD4)	// Source Stride / Buffer Base 3, p 133
diff --git a/trunk/init/initramfs.c b/trunk/init/initramfs.c
index 9ee7b7810417..80cd713f6cc5 100644
--- a/trunk/init/initramfs.c
+++ b/trunk/init/initramfs.c
@@ -310,8 +310,7 @@ static int __init do_name(void)
 			if (wfd >= 0) {
 				sys_fchown(wfd, uid, gid);
 				sys_fchmod(wfd, mode);
-				if (body_len)
-					sys_ftruncate(wfd, body_len);
+				sys_ftruncate(wfd, body_len);
 				vcollected = kstrdup(collected, GFP_KERNEL);
 				state = CopyFile;
 			}
@@ -516,7 +515,6 @@ static void __init free_initrd(void)
 	initrd_end = 0;
 }
 
-#ifdef CONFIG_BLK_DEV_RAM
 #define BUF_SIZE 1024
 static void __init clean_rootfs(void)
 {
@@ -563,7 +561,6 @@ static void __init clean_rootfs(void)
 	sys_close(fd);
 	kfree(buf);
 }
-#endif
 
 static int __init populate_rootfs(void)
 {
diff --git a/trunk/ipc/mq_sysctl.c b/trunk/ipc/mq_sysctl.c
index 24ae46dfe45d..89f60ec8ee54 100644
--- a/trunk/ipc/mq_sysctl.c
+++ b/trunk/ipc/mq_sysctl.c
@@ -22,7 +22,6 @@
 #define MIN_MSGSIZEMAX	128		/* min value for msgsize_max */
 #define MAX_MSGSIZEMAX	(8192*128)	/* max value for msgsize_max */
 
-#ifdef CONFIG_PROC_SYSCTL
 static void *get_mq(ctl_table *table)
 {
 	char *which = table->data;
@@ -31,6 +30,7 @@ static void *get_mq(ctl_table *table)
 	return which;
 }
 
+#ifdef CONFIG_PROC_SYSCTL
 static int proc_mq_dointvec(ctl_table *table, int write, struct file *filp,
 	void __user *buffer, size_t *lenp, loff_t *ppos)
 {
diff --git a/trunk/kernel/ptrace.c b/trunk/kernel/ptrace.c
index dfcd83ceee3b..64191fa09b7e 100644
--- a/trunk/kernel/ptrace.c
+++ b/trunk/kernel/ptrace.c
@@ -604,11 +604,10 @@ int ptrace_traceme(void)
 		ret = security_ptrace_traceme(current->parent);
 
 		/*
-		 * Check PF_EXITING to ensure ->real_parent has not passed
-		 * exit_ptrace(). Otherwise we don't report the error but
-		 * pretend ->real_parent untraces us right after return.
+		 * Set the ptrace bit in the process ptrace flags.
+		 * Then link us on our parent's ptraced list.
 		 */
-		if (!ret && !(current->real_parent->flags & PF_EXITING)) {
+		if (!ret) {
 			current->ptrace |= PT_PTRACED;
 			__ptrace_link(current, current->real_parent);
 		}
diff --git a/trunk/kernel/sys.c b/trunk/kernel/sys.c
index e7998cf31498..51dbb55604e8 100644
--- a/trunk/kernel/sys.c
+++ b/trunk/kernel/sys.c
@@ -360,7 +360,6 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
 		void __user *, arg)
 {
 	char buffer[256];
-	int ret = 0;
 
 	/* We only trust the superuser with rebooting the system. */
 	if (!capable(CAP_SYS_BOOT))
@@ -398,7 +397,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
 		kernel_halt();
 		unlock_kernel();
 		do_exit(0);
-		panic("cannot halt");
+		break;
 
 	case LINUX_REBOOT_CMD_POWER_OFF:
 		kernel_power_off();
@@ -418,22 +417,29 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
 
 #ifdef CONFIG_KEXEC
 	case LINUX_REBOOT_CMD_KEXEC:
-		ret = kernel_kexec();
-		break;
+		{
+			int ret;
+			ret = kernel_kexec();
+			unlock_kernel();
+			return ret;
+		}
 #endif
 
 #ifdef CONFIG_HIBERNATION
 	case LINUX_REBOOT_CMD_SW_SUSPEND:
-		ret = hibernate();
-		break;
+		{
+			int ret = hibernate();
+			unlock_kernel();
+			return ret;
+		}
 #endif
 
 	default:
-		ret = -EINVAL;
-		break;
+		unlock_kernel();
+		return -EINVAL;
 	}
 	unlock_kernel();
-	return ret;
+	return 0;
 }
 
 static void deferred_cad(struct work_struct *dummy)
diff --git a/trunk/kernel/sysctl.c b/trunk/kernel/sysctl.c
index e3d2c7dd59b9..4286b62b34a0 100644
--- a/trunk/kernel/sysctl.c
+++ b/trunk/kernel/sysctl.c
@@ -902,6 +902,16 @@ static struct ctl_table kern_table[] = {
 		.proc_handler   = &proc_dointvec,
 	},
 #endif
+#ifdef CONFIG_UNEVICTABLE_LRU
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "scan_unevictable_pages",
+		.data		= &scan_unevictable_pages,
+		.maxlen		= sizeof(scan_unevictable_pages),
+		.mode		= 0644,
+		.proc_handler	= &scan_unevictable_handler,
+	},
+#endif
 #ifdef CONFIG_SLOW_WORK
 	{
 		.ctl_name	= CTL_UNNUMBERED,
@@ -1292,16 +1302,6 @@ static struct ctl_table vm_table[] = {
 		.extra2		= &one,
 	},
 #endif
-#ifdef CONFIG_UNEVICTABLE_LRU
-	{
-		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "scan_unevictable_pages",
-		.data		= &scan_unevictable_pages,
-		.maxlen		= sizeof(scan_unevictable_pages),
-		.mode		= 0644,
-		.proc_handler	= &scan_unevictable_handler,
-	},
-#endif
 /*
  * NOTE: do not add new entries to this table unless you have read
  * Documentation/sysctl/ctl_unnumbered.txt
diff --git a/trunk/mm/Kconfig b/trunk/mm/Kconfig
index 57971d2ab848..b53427ad30a3 100644
--- a/trunk/mm/Kconfig
+++ b/trunk/mm/Kconfig
@@ -213,8 +213,6 @@ config UNEVICTABLE_LRU
 	  will use one page flag and increase the code size a little,
 	  say Y unless you know what you are doing.
 
-	  See Documentation/vm/unevictable-lru.txt for more information.
-
 config HAVE_MLOCK
 	bool
 	default y if MMU=y
diff --git a/trunk/mm/filemap.c b/trunk/mm/filemap.c
index 8bd498040f32..2e2d38ebda4b 100644
--- a/trunk/mm/filemap.c
+++ b/trunk/mm/filemap.c
@@ -567,8 +567,8 @@ EXPORT_SYMBOL(wait_on_page_bit);
 
 /**
  * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
- * @page: Page defining the wait queue of interest
- * @waiter: Waiter to add to the queue
+ * @page - Page defining the wait queue of interest
+ * @waiter - Waiter to add to the queue
  *
  * Add an arbitrary @waiter to the wait queue for the nominated @page.
  */
diff --git a/trunk/mm/memcontrol.c b/trunk/mm/memcontrol.c
index e44fb0fbb80e..2fc6d6c48238 100644
--- a/trunk/mm/memcontrol.c
+++ b/trunk/mm/memcontrol.c
@@ -932,7 +932,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 	if (unlikely(!mem))
 		return 0;
 
-	VM_BUG_ON(!mem || mem_cgroup_is_obsolete(mem));
+	VM_BUG_ON(mem_cgroup_is_obsolete(mem));
 
 	while (1) {
 		int ret;
diff --git a/trunk/mm/shmem.c b/trunk/mm/shmem.c
index f9cb20ebb990..d94d2e9146bc 100644
--- a/trunk/mm/shmem.c
+++ b/trunk/mm/shmem.c
@@ -24,7 +24,6 @@
 #include <linux/init.h>
 #include <linux/vfs.h>
 #include <linux/mount.h>
-#include <linux/pagemap.h>
 #include <linux/file.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -44,6 +43,7 @@ static struct vfsmount *shm_mnt;
 #include <linux/exportfs.h>
 #include <linux/generic_acl.h>
 #include <linux/mman.h>
+#include <linux/pagemap.h>
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/backing-dev.h>
@@ -65,28 +65,13 @@ static struct vfsmount *shm_mnt;
 #include <asm/div64.h>
 #include <asm/pgtable.h>
 
-/*
- * The maximum size of a shmem/tmpfs file is limited by the maximum size of
- * its triple-indirect swap vector - see illustration at shmem_swp_entry().
- *
- * With 4kB page size, maximum file size is just over 2TB on a 32-bit kernel,
- * but one eighth of that on a 64-bit kernel.  With 8kB page size, maximum
- * file size is just over 4TB on a 64-bit kernel, but 16TB on a 32-bit kernel,
- * MAX_LFS_FILESIZE being then more restrictive than swap vector layout.
- *
- * We use / and * instead of shifts in the definitions below, so that the swap
- * vector can be tested with small even values (e.g. 20) for ENTRIES_PER_PAGE.
- */
 #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
-#define ENTRIES_PER_PAGEPAGE ((unsigned long long)ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
-
-#define SHMSWP_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
-#define SHMSWP_MAX_BYTES (SHMSWP_MAX_INDEX << PAGE_CACHE_SHIFT)
+#define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
+#define BLOCKS_PER_PAGE  (PAGE_CACHE_SIZE/512)
 
-#define SHMEM_MAX_BYTES  min_t(unsigned long long, SHMSWP_MAX_BYTES, MAX_LFS_FILESIZE)
-#define SHMEM_MAX_INDEX  ((unsigned long)((SHMEM_MAX_BYTES+1) >> PAGE_CACHE_SHIFT))
+#define SHMEM_MAX_INDEX  (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
+#define SHMEM_MAX_BYTES  ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT)
 
-#define BLOCKS_PER_PAGE  (PAGE_CACHE_SIZE/512)
 #define VM_ACCT(size)    (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
 
 /* info->flags needs VM_flags to handle pagein/truncate races efficiently */
@@ -2596,7 +2581,7 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
 #define shmem_get_inode(sb, mode, dev, flags)	ramfs_get_inode(sb, mode, dev)
 #define shmem_acct_size(flags, size)		0
 #define shmem_unacct_size(flags, size)		do {} while (0)
-#define SHMEM_MAX_BYTES				MAX_LFS_FILESIZE
+#define SHMEM_MAX_BYTES				LLONG_MAX
 
 #endif /* CONFIG_SHMEM */
 
diff --git a/trunk/mm/util.c b/trunk/mm/util.c
index 55bef160b9f1..2599e83eea17 100644
--- a/trunk/mm/util.c
+++ b/trunk/mm/util.c
@@ -223,22 +223,6 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 }
 #endif
 
-/**
- * get_user_pages_fast() - pin user pages in memory
- * @start:	starting user address
- * @nr_pages:	number of pages from start to pin
- * @write:	whether pages will be written to
- * @pages:	array that receives pointers to the pages pinned.
- *		Should be at least nr_pages long.
- *
- * Attempt to pin user pages in memory without taking mm->mmap_sem.
- * If not successful, it will fall back to taking the lock and
- * calling get_user_pages().
- *
- * Returns number of pages pinned. This may be fewer than the number
- * requested. If nr_pages is 0 or negative, returns 0. If no pages
- * were pinned, returns -errno.
- */
 int __attribute__((weak)) get_user_pages_fast(unsigned long start,
 				int nr_pages, int write, struct page **pages)
 {
diff --git a/trunk/sound/pci/hda/hda_intel.c b/trunk/sound/pci/hda/hda_intel.c
index 7ba8db5d4c42..30829ee920c3 100644
--- a/trunk/sound/pci/hda/hda_intel.c
+++ b/trunk/sound/pci/hda/hda_intel.c
@@ -2260,11 +2260,11 @@ static int __devinit azx_create(struct snd_card *card, struct pci_dev *pci,
 		gcap &= ~0x01;
 
 	/* allow 64bit DMA address if supported by H/W */
-	if ((gcap & 0x01) && !pci_set_dma_mask(pci, DMA_BIT_MASK(64)))
-		pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(64));
+	if ((gcap & 0x01) && !pci_set_dma_mask(pci, DMA_64BIT_MASK))
+		pci_set_consistent_dma_mask(pci, DMA_64BIT_MASK);
 	else {
-		pci_set_dma_mask(pci, DMA_BIT_MASK(32));
-		pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(32));
+		pci_set_dma_mask(pci, DMA_32BIT_MASK);
+		pci_set_consistent_dma_mask(pci, DMA_32BIT_MASK);
 	}
 
 	/* read number of streams from GCAP register instead of using