diff --git a/[refs] b/[refs]
index 95766f7f5bf7..16267f61e2c4 100644
--- a/[refs]
+++ b/[refs]
@@ -1,2 +1,2 @@
 ---
-refs/heads/master: 938473b24636d77dc5e9c3f41090d071b6cf4389
+refs/heads/master: 5224e6cc3ab5ae03895bbb67f4a26ce72e62ce58
diff --git a/trunk/Documentation/memory-barriers.txt b/trunk/Documentation/memory-barriers.txt
index 4710845dbac4..c61d8b876fdb 100644
--- a/trunk/Documentation/memory-barriers.txt
+++ b/trunk/Documentation/memory-barriers.txt
@@ -19,7 +19,6 @@ Contents:
      - Control dependencies.
      - SMP barrier pairing.
      - Examples of memory barrier sequences.
-     - Read memory barriers vs load speculation.
 
  (*) Explicit kernel barriers.
 
@@ -249,7 +248,7 @@ And there are a number of things that _must_ or _must_not_ be assumed:
      we may get either of:
 
 	STORE *A = X; Y = LOAD *A;
-	STORE *A = Y = X;
+	STORE *A = Y;
 
 
 =========================
@@ -345,12 +344,9 @@ Memory barriers come in four basic varieties:
 
  (4) General memory barriers.
 
-     A general memory barrier gives a guarantee that all the LOAD and STORE
-     operations specified before the barrier will appear to happen before all
-     the LOAD and STORE operations specified after the barrier with respect to
-     the other components of the system.
-
-     A general memory barrier is a partial ordering over both loads and stores.
+     A general memory barrier is a combination of both a read memory barrier
+     and a write memory barrier.  It is a partial ordering over both loads and
+     stores.
 
      General memory barriers imply both read and write memory barriers, and so
      can substitute for either.
@@ -550,9 +546,9 @@ write barrier, though, again, a general barrier is viable:
 	===============	===============
 	a = 1;
 	<write barrier>
-	b = 2;		x = b;
+	b = 2;		x = a;
 			<read barrier>
-			y = a;
+			y = b;
 
 Or:
 
@@ -567,18 +563,6 @@ Or:
 Basically, the read barrier always has to be there, even though it can be of
 the "weaker" type.
 
-[!] Note that the stores before the write barrier would normally be expected to
-match the loads after the read barrier or data dependency barrier, and vice
-versa:
-
-	CPU 1                           CPU 2
-	===============                 ===============
-	a = 1;           }----   --->{  v = c
-	b = 2;           }    \ /    {  w = d
-	<write barrier>        \        <read barrier>
-	c = 3;           }    / \    {  x = a;
-	d = 4;           }----   --->{  y = b;
-
 
 EXAMPLES OF MEMORY BARRIER SEQUENCES
 ------------------------------------
@@ -616,8 +600,8 @@ STORE B, STORE C } all occuring before the unordered set of { STORE D, STORE E
 	|       |       +------+
 	+-------+       :      :
 	                   |
-	                   | Sequence in which stores are committed to the
-	                   | memory system by CPU 1
+	                   | Sequence in which stores committed to memory system
+	                   | by CPU 1
 	                   V
 
 
@@ -699,12 +683,14 @@ then the following will occur:
 	                               |        :       :       |       |
 	                               |        :       :       | CPU 2 |
 	                               |        +-------+       |       |
-	                               |        | X->9  |------>|       |
-	                               |        +-------+       |       |
-	  Makes sure all effects --->   \   ddddddddddddddddd   |       |
-	  prior to the store of C        \      +-------+       |       |
-	  are perceptible to              ----->| B->2  |------>|       |
-	  subsequent loads                      +-------+       |       |
+	                                \       | X->9  |------>|       |
+	                                 \      +-------+       |       |
+	                                  ----->| B->2  |       |       |
+	                                        +-------+       |       |
+	     Makes sure all effects --->    ddddddddddddddddd   |       |
+	     prior to the store of C            +-------+       |       |
+	     are perceptible to                 | B->2  |------>|       |
+	     successive loads                   +-------+       |       |
 	                                        :       :       +-------+
 
 
@@ -713,239 +699,73 @@ following sequence of events:
 
 	CPU 1			CPU 2
 	=======================	=======================
-		{ A = 0, B = 9 }
 	STORE A=1
-	<write barrier>
 	STORE B=2
-				LOAD B
-				LOAD A
-
-Without intervention, CPU 2 may then choose to perceive the events on CPU 1 in
-some effectively random order, despite the write barrier issued by CPU 1:
-
-	+-------+       :      :                :       :
-	|       |       +------+                +-------+
-	|       |------>| A=1  |------      --->| A->0  |
-	|       |       +------+      \         +-------+
-	| CPU 1 |   wwwwwwwwwwwwwwww   \    --->| B->9  |
-	|       |       +------+        |       +-------+
-	|       |------>| B=2  |---     |       :       :
-	|       |       +------+   \    |       :       :       +-------+
-	+-------+       :      :    \   |       +-------+       |       |
-	                             ---------->| B->2  |------>|       |
-	                                |       +-------+       | CPU 2 |
-	                                |       | A->0  |------>|       |
-	                                |       +-------+       |       |
-	                                |       :       :       +-------+
-	                                 \      :       :
-	                                  \     +-------+
-	                                   ---->| A->1  |
-	                                        +-------+
-	                                        :       :
-
-
-If, however, a read barrier were to be placed between the load of E and the
-load of A on CPU 2:
-
-	CPU 1			CPU 2
-	=======================	=======================
-		{ A = 0, B = 9 }
-	STORE A=1
+	STORE C=3
 	<write barrier>
-	STORE B=2
-				LOAD B
-				<read barrier>
+	STORE D=4
+	STORE E=5
 				LOAD A
-
-then the partial ordering imposed by CPU 1 will be perceived correctly by CPU
-2:
-
-	+-------+       :      :                :       :
-	|       |       +------+                +-------+
-	|       |------>| A=1  |------      --->| A->0  |
-	|       |       +------+      \         +-------+
-	| CPU 1 |   wwwwwwwwwwwwwwww   \    --->| B->9  |
-	|       |       +------+        |       +-------+
-	|       |------>| B=2  |---     |       :       :
-	|       |       +------+   \    |       :       :       +-------+
-	+-------+       :      :    \   |       +-------+       |       |
-	                             ---------->| B->2  |------>|       |
-	                                |       +-------+       | CPU 2 |
-	                                |       :       :       |       |
-	                                |       :       :       |       |
-	  At this point the read ---->   \  rrrrrrrrrrrrrrrrr   |       |
-	  barrier causes all effects      \     +-------+       |       |
-	  prior to the storage of B        ---->| A->1  |------>|       |
-	  to be perceptible to CPU 2            +-------+       |       |
-	                                        :       :       +-------+
-
-
-To illustrate this more completely, consider what could happen if the code
-contained a load of A either side of the read barrier:
-
-	CPU 1			CPU 2
-	=======================	=======================
-		{ A = 0, B = 9 }
-	STORE A=1
-	<write barrier>
-	STORE B=2
 				LOAD B
-				LOAD A [first load of A]
-				<read barrier>
-				LOAD A [second load of A]
-
-Even though the two loads of A both occur after the load of B, they may both
-come up with different values:
-
-	+-------+       :      :                :       :
-	|       |       +------+                +-------+
-	|       |------>| A=1  |------      --->| A->0  |
-	|       |       +------+      \         +-------+
-	| CPU 1 |   wwwwwwwwwwwwwwww   \    --->| B->9  |
-	|       |       +------+        |       +-------+
-	|       |------>| B=2  |---     |       :       :
-	|       |       +------+   \    |       :       :       +-------+
-	+-------+       :      :    \   |       +-------+       |       |
-	                             ---------->| B->2  |------>|       |
-	                                |       +-------+       | CPU 2 |
-	                                |       :       :       |       |
-	                                |       :       :       |       |
-	                                |       +-------+       |       |
-	                                |       | A->0  |------>| 1st   |
-	                                |       +-------+       |       |
-	  At this point the read ---->   \  rrrrrrrrrrrrrrrrr   |       |
-	  barrier causes all effects      \     +-------+       |       |
-	  prior to the storage of B        ---->| A->1  |------>| 2nd   |
-	  to be perceptible to CPU 2            +-------+       |       |
-	                                        :       :       +-------+
-
-
-But it may be that the update to A from CPU 1 becomes perceptible to CPU 2
-before the read barrier completes anyway:
-
-	+-------+       :      :                :       :
-	|       |       +------+                +-------+
-	|       |------>| A=1  |------      --->| A->0  |
-	|       |       +------+      \         +-------+
-	| CPU 1 |   wwwwwwwwwwwwwwww   \    --->| B->9  |
-	|       |       +------+        |       +-------+
-	|       |------>| B=2  |---     |       :       :
-	|       |       +------+   \    |       :       :       +-------+
-	+-------+       :      :    \   |       +-------+       |       |
-	                             ---------->| B->2  |------>|       |
-	                                |       +-------+       | CPU 2 |
-	                                |       :       :       |       |
-	                                 \      :       :       |       |
-	                                  \     +-------+       |       |
-	                                   ---->| A->1  |------>| 1st   |
-	                                        +-------+       |       |
-	                                    rrrrrrrrrrrrrrrrr   |       |
-	                                        +-------+       |       |
-	                                        | A->1  |------>| 2nd   |
-	                                        +-------+       |       |
-	                                        :       :       +-------+
-
-
-The guarantee is that the second load will always come up with A == 1 if the
-load of B came up with B == 2.  No such guarantee exists for the first load of
-A; that may come up with either A == 0 or A == 1.
-
-
-READ MEMORY BARRIERS VS LOAD SPECULATION
-----------------------------------------
-
-Many CPUs speculate with loads: that is they see that they will need to load an
-item from memory, and they find a time where they're not using the bus for any
-other loads, and so do the load in advance - even though they haven't actually
-got to that point in the instruction execution flow yet.  This permits the
-actual load instruction to potentially complete immediately because the CPU
-already has the value to hand.
-
-It may turn out that the CPU didn't actually need the value - perhaps because a
-branch circumvented the load - in which case it can discard the value or just
-cache it for later use.
-
-Consider:
-
-	CPU 1	   		CPU 2
-	=======================	=======================
-	 	   		LOAD B
-	 	   		DIVIDE		} Divide instructions generally
-	 	   		DIVIDE		} take a long time to perform
-	 	   		LOAD A
-
-Which might appear as this:
-
-	                                        :       :       +-------+
-	                                        +-------+       |       |
-	                                    --->| B->2  |------>|       |
-	                                        +-------+       | CPU 2 |
-	                                        :       :DIVIDE |       |
-	                                        +-------+       |       |
-	The CPU being busy doing a --->     --->| A->0  |~~~~   |       |
-	division speculates on the              +-------+   ~   |       |
-	LOAD of A                               :       :   ~   |       |
-	                                        :       :DIVIDE |       |
-	                                        :       :   ~   |       |
-	Once the divisions are complete -->     :       :   ~-->|       |
-	the CPU can then perform the            :       :       |       |
-	LOAD with immediate effect              :       :       +-------+
-
-
-Placing a read barrier or a data dependency barrier just before the second
-load:
-
-	CPU 1	   		CPU 2
-	=======================	=======================
-	 	   		LOAD B
-	 	   		DIVIDE
-	 	   		DIVIDE
-				<read barrier>
-	 	   		LOAD A
-
-will force any value speculatively obtained to be reconsidered to an extent
-dependent on the type of barrier used.  If there was no change made to the
-speculated memory location, then the speculated value will just be used:
-
-	                                        :       :       +-------+
-	                                        +-------+       |       |
-	                                    --->| B->2  |------>|       |
-	                                        +-------+       | CPU 2 |
-	                                        :       :DIVIDE |       |
-	                                        +-------+       |       |
-	The CPU being busy doing a --->     --->| A->0  |~~~~   |       |
-	division speculates on the              +-------+   ~   |       |
-	LOAD of A                               :       :   ~   |       |
-	                                        :       :DIVIDE |       |
-	                                        :       :   ~   |       |
-	                                        :       :   ~   |       |
-	                                    rrrrrrrrrrrrrrrr~   |       |
-	                                        :       :   ~   |       |
-	                                        :       :   ~-->|       |
-	                                        :       :       |       |
-	                                        :       :       +-------+
+				LOAD C
+				LOAD D
+				LOAD E
 
+Without intervention, CPU 2 may then choose to perceive the events on CPU 1 in
+some effectively random order, despite the write barrier issued by CPU 1:
 
-but if there was an update or an invalidation from another CPU pending, then
-the speculation will be cancelled and the value reloaded:
+	+-------+       :      :
+	|       |       +------+
+	|       |------>| C=3  | }
+	|       |  :    +------+ }
+	|       |  :    | A=1  | }
+	|       |  :    +------+ }
+	| CPU 1 |  :    | B=2  | }---
+	|       |       +------+ }   \
+	|       |   wwwwwwwwwwwww}    \
+	|       |       +------+ }     \          :       :       +-------+
+	|       |  :    | E=5  | }      \         +-------+       |       |
+	|       |  :    +------+ }       \      { | C->3  |------>|       |
+	|       |------>| D=4  | }        \     { +-------+    :  |       |
+	|       |       +------+           \    { | E->5  |    :  |       |
+	+-------+       :      :            \   { +-------+    :  |       |
+	                           Transfer  -->{ | A->1  |    :  | CPU 2 |
+	                          from CPU 1    { +-------+    :  |       |
+	                           to CPU 2     { | D->4  |    :  |       |
+	                                        { +-------+    :  |       |
+	                                        { | B->2  |------>|       |
+	                                          +-------+       |       |
+	                                          :       :       +-------+
+
+
+If, however, a read barrier were to be placed between the load of C and the
+load of D on CPU 2, then the partial ordering imposed by CPU 1 will be
+perceived correctly by CPU 2.
 
-	                                        :       :       +-------+
-	                                        +-------+       |       |
-	                                    --->| B->2  |------>|       |
-	                                        +-------+       | CPU 2 |
-	                                        :       :DIVIDE |       |
-	                                        +-------+       |       |
-	The CPU being busy doing a --->     --->| A->0  |~~~~   |       |
-	division speculates on the              +-------+   ~   |       |
-	LOAD of A                               :       :   ~   |       |
-	                                        :       :DIVIDE |       |
-	                                        :       :   ~   |       |
-	                                        :       :   ~   |       |
-	                                    rrrrrrrrrrrrrrrrr   |       |
-	                                        +-------+       |       |
-	The speculation is discarded --->   --->| A->1  |------>|       |
-	and an updated value is                 +-------+       |       |
-	retrieved                               :       :       +-------+
+	+-------+       :      :
+	|       |       +------+
+	|       |------>| C=3  | }
+	|       |  :    +------+ }
+	|       |  :    | A=1  | }---
+	|       |  :    +------+ }   \
+	| CPU 1 |  :    | B=2  | }    \
+	|       |       +------+       \
+	|       |   wwwwwwwwwwwwwwww    \
+	|       |       +------+         \        :       :       +-------+
+	|       |  :    | E=5  | }        \       +-------+       |       |
+	|       |  :    +------+ }---      \    { | C->3  |------>|       |
+	|       |------>| D=4  | }   \      \   { +-------+    :  |       |
+	|       |       +------+      \      -->{ | B->2  |    :  |       |
+	+-------+       :      :       \        { +-------+    :  |       |
+	                                \       { | A->1  |    :  | CPU 2 |
+	                                 \        +-------+       |       |
+	   At this point the read ---->   \   rrrrrrrrrrrrrrrrr   |       |
+	   barrier causes all effects      \      +-------+       |       |
+	   prior to the storage of C        \   { | E->5  |    :  |       |
+	   to be perceptible to CPU 2        -->{ +-------+    :  |       |
+	                                        { | D->4  |------>|       |
+	                                          +-------+       |       |
+	                                          :       :       +-------+
 
 
 ========================
@@ -1081,7 +901,7 @@ IMPLICIT KERNEL MEMORY BARRIERS
 ===============================
 
 Some of the other functions in the linux kernel imply memory barriers, amongst
-which are locking and scheduling functions.
+which are locking, scheduling and memory allocation functions.
 
 This specification is a _minimum_ guarantee; any particular architecture may
 provide more substantial guarantees, but these may not be relied upon outside
@@ -1146,20 +966,6 @@ equivalent to a full barrier, but a LOCK followed by an UNLOCK is not.
     barriers is that the effects instructions outside of a critical section may
     seep into the inside of the critical section.
 
-A LOCK followed by an UNLOCK may not be assumed to be full memory barrier
-because it is possible for an access preceding the LOCK to happen after the
-LOCK, and an access following the UNLOCK to happen before the UNLOCK, and the
-two accesses can themselves then cross:
-
-	*A = a;
-	LOCK
-	UNLOCK
-	*B = b;
-
-may occur as:
-
-	LOCK, STORE *B, STORE *A, UNLOCK
-
 Locks and semaphores may not provide any guarantee of ordering on UP compiled
 systems, and so cannot be counted on in such a situation to actually achieve
 anything at all - especially with respect to I/O accesses - unless combined
@@ -1210,6 +1016,8 @@ Other functions that imply barriers:
 
  (*) schedule() and similar imply full memory barriers.
 
+ (*) Memory allocation and release functions imply full memory barriers.
+
 
 =================================
 INTER-CPU LOCKING BARRIER EFFECTS
diff --git a/trunk/arch/powerpc/kernel/prom_init.c b/trunk/arch/powerpc/kernel/prom_init.c
index f70bd090dacd..41e9ab40cd54 100644
--- a/trunk/arch/powerpc/kernel/prom_init.c
+++ b/trunk/arch/powerpc/kernel/prom_init.c
@@ -822,7 +822,6 @@ static void __init prom_send_capabilities(void)
 		/* try calling the ibm,client-architecture-support method */
 		if (call_prom_ret("call-method", 3, 2, &ret,
 				  ADDR("ibm,client-architecture-support"),
-				  root,
 				  ADDR(ibm_architecture_vec)) == 0) {
 			/* the call exists... */
 			if (ret)
@@ -1623,15 +1622,6 @@ static int __init prom_find_machine_type(void)
 			if (strstr(p, RELOC("Power Macintosh")) ||
 			    strstr(p, RELOC("MacRISC")))
 				return PLATFORM_POWERMAC;
-#ifdef CONFIG_PPC64
-			/* We must make sure we don't detect the IBM Cell
-			 * blades as pSeries due to some firmware issues,
-			 * so we do it here.
-			 */
-			if (strstr(p, RELOC("IBM,CBEA")) ||
-			    strstr(p, RELOC("IBM,CPBW-1.0")))
-				return PLATFORM_GENERIC;
-#endif /* CONFIG_PPC64 */
 			i += sl + 1;
 		}
 	}
diff --git a/trunk/arch/powerpc/kernel/signal_32.c b/trunk/arch/powerpc/kernel/signal_32.c
index 8fdeca2d4597..01e3c08cb550 100644
--- a/trunk/arch/powerpc/kernel/signal_32.c
+++ b/trunk/arch/powerpc/kernel/signal_32.c
@@ -803,13 +803,10 @@ static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int
 		if (__get_user(cmcp, &ucp->uc_regs))
 			return -EFAULT;
 		mcp = (struct mcontext __user *)(u64)cmcp;
-		/* no need to check access_ok(mcp), since mcp < 4GB */
 	}
 #else
 	if (__get_user(mcp, &ucp->uc_regs))
 		return -EFAULT;
-	if (!access_ok(VERIFY_READ, mcp, sizeof(*mcp)))
-		return -EFAULT;
 #endif
 	restore_sigmask(&set);
 	if (restore_user_regs(regs, mcp, sig))
@@ -911,14 +908,13 @@ int sys_debug_setcontext(struct ucontext __user *ctx,
 {
 	struct sig_dbg_op op;
 	int i;
-	unsigned char tmp;
 	unsigned long new_msr = regs->msr;
 #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
 	unsigned long new_dbcr0 = current->thread.dbcr0;
 #endif
 
 	for (i=0; i<ndbg; i++) {
-		if (copy_from_user(&op, dbg + i, sizeof(op)))
+		if (__copy_from_user(&op, dbg, sizeof(op)))
 			return -EFAULT;
 		switch (op.dbg_type) {
 		case SIG_DBG_SINGLE_STEPPING:
@@ -963,11 +959,6 @@ int sys_debug_setcontext(struct ucontext __user *ctx,
 	current->thread.dbcr0 = new_dbcr0;
 #endif
 
-	if (!access_ok(VERIFY_READ, ctx, sizeof(*ctx))
-	    || __get_user(tmp, (u8 __user *) ctx)
-	    || __get_user(tmp, (u8 __user *) (ctx + 1) - 1))
-		return -EFAULT;
-
 	/*
 	 * If we get a fault copying the context into the kernel's
 	 * image of the user's registers, we can't just return -EFAULT
diff --git a/trunk/arch/powerpc/kernel/signal_64.c b/trunk/arch/powerpc/kernel/signal_64.c
index c2db642f4cdd..27f65b95184d 100644
--- a/trunk/arch/powerpc/kernel/signal_64.c
+++ b/trunk/arch/powerpc/kernel/signal_64.c
@@ -182,8 +182,6 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,
 	err |= __get_user(msr, &sc->gp_regs[PT_MSR]);
 	if (err)
 		return err;
-	if (v_regs && !access_ok(VERIFY_READ, v_regs, 34 * sizeof(vector128)))
-		return -EFAULT;
 	/* Copy 33 vec registers (vr0..31 and vscr) from the stack */
 	if (v_regs != 0 && (msr & MSR_VEC) != 0)
 		err |= __copy_from_user(current->thread.vr, v_regs,
diff --git a/trunk/arch/powerpc/platforms/cell/setup.c b/trunk/arch/powerpc/platforms/cell/setup.c
index fd3e5609e3e0..6574b22b3cf3 100644
--- a/trunk/arch/powerpc/platforms/cell/setup.c
+++ b/trunk/arch/powerpc/platforms/cell/setup.c
@@ -125,13 +125,14 @@ static void __init cell_init_early(void)
 
 static int __init cell_probe(void)
 {
+	/* XXX This is temporary, the Cell maintainer will come up with
+	 * more appropriate detection logic
+	 */
 	unsigned long root = of_get_flat_dt_root();
+	if (!of_flat_dt_is_compatible(root, "IBM,CPBW-1.0"))
+		return 0;
 
-	if (of_flat_dt_is_compatible(root, "IBM,CBEA") ||
-	    of_flat_dt_is_compatible(root, "IBM,CPBW-1.0"))
-		return 1;
-
-	return 0;
+	return 1;
 }
 
 /*
diff --git a/trunk/arch/powerpc/platforms/pseries/setup.c b/trunk/arch/powerpc/platforms/pseries/setup.c
index 3ba87835757e..5f79f01c44f2 100644
--- a/trunk/arch/powerpc/platforms/pseries/setup.c
+++ b/trunk/arch/powerpc/platforms/pseries/setup.c
@@ -389,7 +389,6 @@ static int __init pSeries_probe_hypertas(unsigned long node,
 
 static int __init pSeries_probe(void)
 {
-	unsigned long root = of_get_flat_dt_root();
  	char *dtype = of_get_flat_dt_prop(of_get_flat_dt_root(),
  					  "device_type", NULL);
  	if (dtype == NULL)
@@ -397,13 +396,6 @@ static int __init pSeries_probe(void)
  	if (strcmp(dtype, "chrp"))
 		return 0;
 
-	/* Cell blades firmware claims to be chrp while it's not. Until this
-	 * is fixed, we need to avoid those here.
-	 */
-	if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0") ||
-	    of_flat_dt_is_compatible(root, "IBM,CBEA"))
-		return 0;
-
 	DBG("pSeries detected, looking for LPAR capability...\n");
 
 	/* Now try to figure out if we are running on LPAR */
diff --git a/trunk/arch/sparc64/kernel/traps.c b/trunk/arch/sparc64/kernel/traps.c
index 2793a5d82380..563db528e031 100644
--- a/trunk/arch/sparc64/kernel/traps.c
+++ b/trunk/arch/sparc64/kernel/traps.c
@@ -1797,7 +1797,9 @@ static const char *sun4v_err_type_to_str(u32 type)
 	};
 }
 
-static void sun4v_log_error(struct sun4v_error_entry *ent, int cpu, const char *pfx, atomic_t *ocnt)
+extern void __show_regs(struct pt_regs * regs);
+
+static void sun4v_log_error(struct pt_regs *regs, struct sun4v_error_entry *ent, int cpu, const char *pfx, atomic_t *ocnt)
 {
 	int cnt;
 
@@ -1830,6 +1832,8 @@ static void sun4v_log_error(struct sun4v_error_entry *ent, int cpu, const char *
 	       pfx,
 	       ent->err_raddr, ent->err_size, ent->err_cpu);
 
+	__show_regs(regs);
+
 	if ((cnt = atomic_read(ocnt)) != 0) {
 		atomic_set(ocnt, 0);
 		wmb();
@@ -1862,7 +1866,7 @@ void sun4v_resum_error(struct pt_regs *regs, unsigned long offset)
 
 	put_cpu();
 
-	sun4v_log_error(&local_copy, cpu,
+	sun4v_log_error(regs, &local_copy, cpu,
 			KERN_ERR "RESUMABLE ERROR",
 			&sun4v_resum_oflow_cnt);
 }
@@ -1910,7 +1914,7 @@ void sun4v_nonresum_error(struct pt_regs *regs, unsigned long offset)
 	}
 #endif
 
-	sun4v_log_error(&local_copy, cpu,
+	sun4v_log_error(regs, &local_copy, cpu,
 			KERN_EMERG "NON-RESUMABLE ERROR",
 			&sun4v_nonresum_oflow_cnt);
 
@@ -2200,7 +2204,6 @@ static inline struct reg_window *kernel_stack_up(struct reg_window *rw)
 void die_if_kernel(char *str, struct pt_regs *regs)
 {
 	static int die_counter;
-	extern void __show_regs(struct pt_regs * regs);
 	extern void smp_report_regs(void);
 	int count = 0;
 	
diff --git a/trunk/drivers/acpi/processor_perflib.c b/trunk/drivers/acpi/processor_perflib.c
index f36db22ce1ae..abbdb37a7f5f 100644
--- a/trunk/drivers/acpi/processor_perflib.c
+++ b/trunk/drivers/acpi/processor_perflib.c
@@ -577,8 +577,6 @@ acpi_processor_register_performance(struct acpi_processor_performance
 		return_VALUE(-EBUSY);
 	}
 
-	WARN_ON(!performance);
-
 	pr->performance = performance;
 
 	if (acpi_processor_get_performance_info(pr)) {
@@ -611,8 +609,7 @@ acpi_processor_unregister_performance(struct acpi_processor_performance
 		return_VOID;
 	}
 
-	if (pr->performance)
-		kfree(pr->performance->states);
+	kfree(pr->performance->states);
 	pr->performance = NULL;
 
 	acpi_cpufreq_remove_file(pr);
diff --git a/trunk/drivers/char/Makefile b/trunk/drivers/char/Makefile
index fb919bfb2824..f5b01c6d498e 100644
--- a/trunk/drivers/char/Makefile
+++ b/trunk/drivers/char/Makefile
@@ -41,9 +41,9 @@ obj-$(CONFIG_N_HDLC)		+= n_hdlc.o
 obj-$(CONFIG_AMIGA_BUILTIN_SERIAL) += amiserial.o
 obj-$(CONFIG_SX)		+= sx.o generic_serial.o
 obj-$(CONFIG_RIO)		+= rio/ generic_serial.o
+obj-$(CONFIG_HVC_DRIVER)	+= hvc_console.o
 obj-$(CONFIG_HVC_CONSOLE)	+= hvc_vio.o hvsi.o
 obj-$(CONFIG_HVC_RTAS)		+= hvc_rtas.o
-obj-$(CONFIG_HVC_DRIVER)	+= hvc_console.o
 obj-$(CONFIG_RAW_DRIVER)	+= raw.o
 obj-$(CONFIG_SGI_SNSC)		+= snsc.o snsc_event.o
 obj-$(CONFIG_MMTIMER)		+= mmtimer.o
diff --git a/trunk/drivers/message/i2o/exec-osm.c b/trunk/drivers/message/i2o/exec-osm.c
index 7bd4d85d0b42..5ea133c59afb 100644
--- a/trunk/drivers/message/i2o/exec-osm.c
+++ b/trunk/drivers/message/i2o/exec-osm.c
@@ -55,7 +55,6 @@ struct i2o_exec_wait {
 	u32 m;			/* message id */
 	struct i2o_message *msg;	/* pointer to the reply message */
 	struct list_head list;	/* node in global wait list */
-	spinlock_t lock;	/* lock before modifying */
 };
 
 /* Work struct needed to handle LCT NOTIFY replies */
@@ -88,7 +87,6 @@ static struct i2o_exec_wait *i2o_exec_wait_alloc(void)
 		return NULL;
 
 	INIT_LIST_HEAD(&wait->list);
-	spin_lock_init(&wait->lock);
 
 	return wait;
 };
@@ -127,7 +125,6 @@ int i2o_msg_post_wait_mem(struct i2o_controller *c, struct i2o_message *msg,
 	DECLARE_WAIT_QUEUE_HEAD(wq);
 	struct i2o_exec_wait *wait;
 	static u32 tcntxt = 0x80000000;
-	long flags;
 	int rc = 0;
 
 	wait = i2o_exec_wait_alloc();
@@ -149,28 +146,33 @@ int i2o_msg_post_wait_mem(struct i2o_controller *c, struct i2o_message *msg,
 	wait->tcntxt = tcntxt++;
 	msg->u.s.tcntxt = cpu_to_le32(wait->tcntxt);
 
-	wait->wq = &wq;
-	/*
-	 * we add elements to the head, because if a entry in the list will
-	 * never be removed, we have to iterate over it every time
-	 */
-	list_add(&wait->list, &i2o_exec_wait_list);
-
 	/*
 	 * Post the message to the controller. At some point later it will
 	 * return. If we time out before it returns then complete will be zero.
 	 */
 	i2o_msg_post(c, msg);
 
-	wait_event_interruptible_timeout(wq, wait->complete, timeout * HZ);
+	if (!wait->complete) {
+		wait->wq = &wq;
+		/*
+		 * we add elements add the head, because if a entry in the list
+		 * will never be removed, we have to iterate over it every time
+		 */
+		list_add(&wait->list, &i2o_exec_wait_list);
+
+		wait_event_interruptible_timeout(wq, wait->complete,
+						 timeout * HZ);
 
-	spin_lock_irqsave(&wait->lock, flags);
+		wait->wq = NULL;
+	}
 
-	wait->wq = NULL;
+	barrier();
 
-	if (wait->complete)
+	if (wait->complete) {
 		rc = le32_to_cpu(wait->msg->body[0]) >> 24;
-	else {
+		i2o_flush_reply(c, wait->m);
+		i2o_exec_wait_free(wait);
+	} else {
 		/*
 		 * We cannot remove it now. This is important. When it does
 		 * terminate (which it must do if the controller has not
@@ -184,13 +186,6 @@ int i2o_msg_post_wait_mem(struct i2o_controller *c, struct i2o_message *msg,
 		rc = -ETIMEDOUT;
 	}
 
-	spin_unlock_irqrestore(&wait->lock, flags);
-
-	if (rc != -ETIMEDOUT) {
-		i2o_flush_reply(c, wait->m);
-		i2o_exec_wait_free(wait);
-	}
-
 	return rc;
 };
 
@@ -218,6 +213,7 @@ static int i2o_msg_post_wait_complete(struct i2o_controller *c, u32 m,
 {
 	struct i2o_exec_wait *wait, *tmp;
 	unsigned long flags;
+	static spinlock_t lock = SPIN_LOCK_UNLOCKED;
 	int rc = 1;
 
 	/*
@@ -227,24 +223,23 @@ static int i2o_msg_post_wait_complete(struct i2o_controller *c, u32 m,
 	 * already expired. Not much we can do about that except log it for
 	 * debug purposes, increase timeout, and recompile.
 	 */
+	spin_lock_irqsave(&lock, flags);
 	list_for_each_entry_safe(wait, tmp, &i2o_exec_wait_list, list) {
 		if (wait->tcntxt == context) {
-			spin_lock_irqsave(&wait->lock, flags);
-
 			list_del(&wait->list);
 
+			spin_unlock_irqrestore(&lock, flags);
+
 			wait->m = m;
 			wait->msg = msg;
 			wait->complete = 1;
 
-			if (wait->wq)
-				rc = 0;
-			else
-				rc = -1;
+			barrier();
 
-			spin_unlock_irqrestore(&wait->lock, flags);
-
-			if (rc) {
+			if (wait->wq) {
+				wake_up_interruptible(wait->wq);
+				rc = 0;
+			} else {
 				struct device *dev;
 
 				dev = &c->pdev->dev;
@@ -253,13 +248,15 @@ static int i2o_msg_post_wait_complete(struct i2o_controller *c, u32 m,
 					 c->name);
 				i2o_dma_free(dev, &wait->dma);
 				i2o_exec_wait_free(wait);
-			} else
-				wake_up_interruptible(wait->wq);
+				rc = -1;
+			}
 
 			return rc;
 		}
 	}
 
+	spin_unlock_irqrestore(&lock, flags);
+
 	osm_warn("%s: Bogus reply in POST WAIT (tr-context: %08x)!\n", c->name,
 		 context);
 
@@ -325,9 +322,14 @@ static DEVICE_ATTR(product_id, S_IRUGO, i2o_exec_show_product_id, NULL);
 static int i2o_exec_probe(struct device *dev)
 {
 	struct i2o_device *i2o_dev = to_i2o_device(dev);
+	struct i2o_controller *c = i2o_dev->iop;
 
 	i2o_event_register(i2o_dev, &i2o_exec_driver, 0, 0xffffffff);
 
+	c->exec = i2o_dev;
+
+	i2o_exec_lct_notify(c, c->lct->change_ind + 1);
+
 	device_create_file(dev, &dev_attr_vendor_id);
 	device_create_file(dev, &dev_attr_product_id);
 
@@ -521,8 +523,6 @@ static int i2o_exec_lct_notify(struct i2o_controller *c, u32 change_ind)
 	struct device *dev;
 	struct i2o_message *msg;
 
-	down(&c->lct_lock);
-
 	dev = &c->pdev->dev;
 
 	if (i2o_dma_realloc
@@ -545,8 +545,6 @@ static int i2o_exec_lct_notify(struct i2o_controller *c, u32 change_ind)
 
 	i2o_msg_post(c, msg);
 
-	up(&c->lct_lock);
-
 	return 0;
 };
 
diff --git a/trunk/drivers/message/i2o/iop.c b/trunk/drivers/message/i2o/iop.c
index febbdd4e0605..492167446936 100644
--- a/trunk/drivers/message/i2o/iop.c
+++ b/trunk/drivers/message/i2o/iop.c
@@ -804,6 +804,8 @@ void i2o_iop_remove(struct i2o_controller *c)
 
 	/* Ask the IOP to switch to RESET state */
 	i2o_iop_reset(c);
+
+	put_device(&c->device);
 }
 
 /**
@@ -1057,7 +1059,7 @@ struct i2o_controller *i2o_iop_alloc(void)
 
 	snprintf(poolname, sizeof(poolname), "i2o_%s_msg_inpool", c->name);
 	if (i2o_pool_alloc
-	    (&c->in_msg, poolname, I2O_INBOUND_MSG_FRAME_SIZE * 4 + sizeof(u32),
+	    (&c->in_msg, poolname, I2O_INBOUND_MSG_FRAME_SIZE * 4,
 	     I2O_MSG_INPOOL_MIN)) {
 		kfree(c);
 		return ERR_PTR(-ENOMEM);
diff --git a/trunk/include/linux/i2o.h b/trunk/include/linux/i2o.h
index c115e9e840b4..dd7d627bf66f 100644
--- a/trunk/include/linux/i2o.h
+++ b/trunk/include/linux/i2o.h
@@ -1114,11 +1114,8 @@ static inline struct i2o_message *i2o_msg_get(struct i2o_controller *c)
 
 	mmsg->mfa = readl(c->in_port);
 	if (unlikely(mmsg->mfa >= c->in_queue.len)) {
-		u32 mfa = mmsg->mfa;
-
 		mempool_free(mmsg, c->in_msg.mempool);
-
-		if (mfa == I2O_QUEUE_EMPTY)
+		if(mmsg->mfa == I2O_QUEUE_EMPTY)
 			return ERR_PTR(-EBUSY);
 		return ERR_PTR(-EFAULT);
 	}