From 9ea46abe22550e3366ff7cee2f8391b35b12f730 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 25 May 2016 12:51:20 -0700
Subject: [PATCH 1/3] sparc64: Take ctx_alloc_lock properly in hugetlb_setup().

On cheetahplus chips we take the ctx_alloc_lock in order to
modify the TLB lookup parameters for the indexed TLBs, which
are stored in the context register.

This is called with interrupts disabled, however ctx_alloc_lock
is an IRQ safe lock, therefore we must take acquire/release it
properly with spin_{lock,unlock}_irq().

Reported-by: Meelis Roos <mroos@linux.ee>
Tested-by: Meelis Roos <mroos@linux.ee>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/mm/init_64.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 652683cb4b4bc..14bb0d5ed3c60 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2824,9 +2824,10 @@ void hugetlb_setup(struct pt_regs *regs)
 	 * the Data-TLB for huge pages.
 	 */
 	if (tlb_type == cheetah_plus) {
+		bool need_context_reload = false;
 		unsigned long ctx;
 
-		spin_lock(&ctx_alloc_lock);
+		spin_lock_irq(&ctx_alloc_lock);
 		ctx = mm->context.sparc64_ctx_val;
 		ctx &= ~CTX_PGSZ_MASK;
 		ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT;
@@ -2845,9 +2846,12 @@ void hugetlb_setup(struct pt_regs *regs)
 			 * also executing in this address space.
 			 */
 			mm->context.sparc64_ctx_val = ctx;
-			on_each_cpu(context_reload, mm, 0);
+			need_context_reload = true;
 		}
-		spin_unlock(&ctx_alloc_lock);
+		spin_unlock_irq(&ctx_alloc_lock);
+
+		if (need_context_reload)
+			on_each_cpu(context_reload, mm, 0);
 	}
 }
 #endif

From d11c2a0de2824395656cf8ed15811580c9dd38aa Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 28 May 2016 21:21:31 -0700
Subject: [PATCH 2/3] sparc: Harden signal return frame checks.

All signal frames must be at least 16-byte aligned, because that is
the alignment we explicitly create when we build signal return stack
frames.

All stack pointers must be at least 8-byte aligned.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/signal32.c   | 46 ++++++++++++++++++++++------------
 arch/sparc/kernel/signal_32.c  | 41 +++++++++++++++++++-----------
 arch/sparc/kernel/signal_64.c  | 31 +++++++++++++++--------
 arch/sparc/kernel/sigutil_32.c |  9 ++++++-
 arch/sparc/kernel/sigutil_64.c | 10 ++++++--
 5 files changed, 92 insertions(+), 45 deletions(-)

diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index 3c25241fa5cbd..91cc2f4ae4d98 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -138,12 +138,24 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
 	return 0;
 }
 
+/* Checks if the fp is valid.  We always build signal frames which are
+ * 16-byte aligned, therefore we can always enforce that the restore
+ * frame has that property as well.
+ */
+static bool invalid_frame_pointer(void __user *fp, int fplen)
+{
+	if ((((unsigned long) fp) & 15) ||
+	    ((unsigned long)fp) > 0x100000000ULL - fplen)
+		return true;
+	return false;
+}
+
 void do_sigreturn32(struct pt_regs *regs)
 {
 	struct signal_frame32 __user *sf;
 	compat_uptr_t fpu_save;
 	compat_uptr_t rwin_save;
-	unsigned int psr;
+	unsigned int psr, ufp;
 	unsigned int pc, npc;
 	sigset_t set;
 	compat_sigset_t seta;
@@ -158,11 +170,16 @@ void do_sigreturn32(struct pt_regs *regs)
 	sf = (struct signal_frame32 __user *) regs->u_regs[UREG_FP];
 
 	/* 1. Make sure we are not getting garbage from the user */
-	if (!access_ok(VERIFY_READ, sf, sizeof(*sf)) ||
-	    (((unsigned long) sf) & 3))
+	if (invalid_frame_pointer(sf, sizeof(*sf)))
+		goto segv;
+
+	if (get_user(ufp, &sf->info.si_regs.u_regs[UREG_FP]))
+		goto segv;
+
+	if (ufp & 0x7)
 		goto segv;
 
-	if (get_user(pc, &sf->info.si_regs.pc) ||
+	if (__get_user(pc, &sf->info.si_regs.pc) ||
 	    __get_user(npc, &sf->info.si_regs.npc))
 		goto segv;
 
@@ -227,7 +244,7 @@ void do_sigreturn32(struct pt_regs *regs)
 asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
 {
 	struct rt_signal_frame32 __user *sf;
-	unsigned int psr, pc, npc;
+	unsigned int psr, pc, npc, ufp;
 	compat_uptr_t fpu_save;
 	compat_uptr_t rwin_save;
 	sigset_t set;
@@ -242,11 +259,16 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
 	sf = (struct rt_signal_frame32 __user *) regs->u_regs[UREG_FP];
 
 	/* 1. Make sure we are not getting garbage from the user */
-	if (!access_ok(VERIFY_READ, sf, sizeof(*sf)) ||
-	    (((unsigned long) sf) & 3))
+	if (invalid_frame_pointer(sf, sizeof(*sf)))
 		goto segv;
 
-	if (get_user(pc, &sf->regs.pc) || 
+	if (get_user(ufp, &sf->regs.u_regs[UREG_FP]))
+		goto segv;
+
+	if (ufp & 0x7)
+		goto segv;
+
+	if (__get_user(pc, &sf->regs.pc) || 
 	    __get_user(npc, &sf->regs.npc))
 		goto segv;
 
@@ -307,14 +329,6 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
 	force_sig(SIGSEGV, current);
 }
 
-/* Checks if the fp is valid */
-static int invalid_frame_pointer(void __user *fp, int fplen)
-{
-	if ((((unsigned long) fp) & 7) || ((unsigned long)fp) > 0x100000000ULL - fplen)
-		return 1;
-	return 0;
-}
-
 static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize)
 {
 	unsigned long sp;
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index 52aa5e4ce5e77..c3c12efe0bc00 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -60,10 +60,22 @@ struct rt_signal_frame {
 #define SF_ALIGNEDSZ  (((sizeof(struct signal_frame) + 7) & (~7)))
 #define RT_ALIGNEDSZ  (((sizeof(struct rt_signal_frame) + 7) & (~7)))
 
+/* Checks if the fp is valid.  We always build signal frames which are
+ * 16-byte aligned, therefore we can always enforce that the restore
+ * frame has that property as well.
+ */
+static inline bool invalid_frame_pointer(void __user *fp, int fplen)
+{
+	if ((((unsigned long) fp) & 15) || !__access_ok((unsigned long)fp, fplen))
+		return true;
+
+	return false;
+}
+
 asmlinkage void do_sigreturn(struct pt_regs *regs)
 {
+	unsigned long up_psr, pc, npc, ufp;
 	struct signal_frame __user *sf;
-	unsigned long up_psr, pc, npc;
 	sigset_t set;
 	__siginfo_fpu_t __user *fpu_save;
 	__siginfo_rwin_t __user *rwin_save;
@@ -77,10 +89,13 @@ asmlinkage void do_sigreturn(struct pt_regs *regs)
 	sf = (struct signal_frame __user *) regs->u_regs[UREG_FP];
 
 	/* 1. Make sure we are not getting garbage from the user */
-	if (!access_ok(VERIFY_READ, sf, sizeof(*sf)))
+	if (!invalid_frame_pointer(sf, sizeof(*sf)))
+		goto segv_and_exit;
+
+	if (get_user(ufp, &sf->info.si_regs.u_regs[UREG_FP]))
 		goto segv_and_exit;
 
-	if (((unsigned long) sf) & 3)
+	if (ufp & 0x7)
 		goto segv_and_exit;
 
 	err = __get_user(pc,  &sf->info.si_regs.pc);
@@ -127,7 +142,7 @@ asmlinkage void do_sigreturn(struct pt_regs *regs)
 asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
 {
 	struct rt_signal_frame __user *sf;
-	unsigned int psr, pc, npc;
+	unsigned int psr, pc, npc, ufp;
 	__siginfo_fpu_t __user *fpu_save;
 	__siginfo_rwin_t __user *rwin_save;
 	sigset_t set;
@@ -135,8 +150,13 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
 
 	synchronize_user_stack();
 	sf = (struct rt_signal_frame __user *) regs->u_regs[UREG_FP];
-	if (!access_ok(VERIFY_READ, sf, sizeof(*sf)) ||
-	    (((unsigned long) sf) & 0x03))
+	if (!invalid_frame_pointer(sf, sizeof(*sf)))
+		goto segv;
+
+	if (get_user(ufp, &sf->regs.u_regs[UREG_FP]))
+		goto segv;
+
+	if (ufp & 0x7)
 		goto segv;
 
 	err = __get_user(pc, &sf->regs.pc);
@@ -178,15 +198,6 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
 	force_sig(SIGSEGV, current);
 }
 
-/* Checks if the fp is valid */
-static inline int invalid_frame_pointer(void __user *fp, int fplen)
-{
-	if ((((unsigned long) fp) & 7) || !__access_ok((unsigned long)fp, fplen))
-		return 1;
-
-	return 0;
-}
-
 static inline void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize)
 {
 	unsigned long sp = regs->u_regs[UREG_FP];
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index 39aaec173f66e..5ee930c48f4c9 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -234,6 +234,17 @@ asmlinkage void sparc64_get_context(struct pt_regs *regs)
 	goto out;
 }
 
+/* Checks if the fp is valid.  We always build rt signal frames which
+ * are 16-byte aligned, therefore we can always enforce that the
+ * restore frame has that property as well.
+ */
+static bool invalid_frame_pointer(void __user *fp)
+{
+	if (((unsigned long) fp) & 15)
+		return true;
+	return false;
+}
+
 struct rt_signal_frame {
 	struct sparc_stackf	ss;
 	siginfo_t		info;
@@ -246,8 +257,8 @@ struct rt_signal_frame {
 
 void do_rt_sigreturn(struct pt_regs *regs)
 {
+	unsigned long tpc, tnpc, tstate, ufp;
 	struct rt_signal_frame __user *sf;
-	unsigned long tpc, tnpc, tstate;
 	__siginfo_fpu_t __user *fpu_save;
 	__siginfo_rwin_t __user *rwin_save;
 	sigset_t set;
@@ -261,10 +272,16 @@ void do_rt_sigreturn(struct pt_regs *regs)
 		(regs->u_regs [UREG_FP] + STACK_BIAS);
 
 	/* 1. Make sure we are not getting garbage from the user */
-	if (((unsigned long) sf) & 3)
+	if (invalid_frame_pointer(sf))
+		goto segv;
+
+	if (get_user(ufp, &sf->regs.u_regs[UREG_FP]))
 		goto segv;
 
-	err = get_user(tpc, &sf->regs.tpc);
+	if ((ufp + STACK_BIAS) & 0x7)
+		goto segv;
+
+	err = __get_user(tpc, &sf->regs.tpc);
 	err |= __get_user(tnpc, &sf->regs.tnpc);
 	if (test_thread_flag(TIF_32BIT)) {
 		tpc &= 0xffffffff;
@@ -308,14 +325,6 @@ void do_rt_sigreturn(struct pt_regs *regs)
 	force_sig(SIGSEGV, current);
 }
 
-/* Checks if the fp is valid */
-static int invalid_frame_pointer(void __user *fp)
-{
-	if (((unsigned long) fp) & 15)
-		return 1;
-	return 0;
-}
-
 static inline void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize)
 {
 	unsigned long sp = regs->u_regs[UREG_FP] + STACK_BIAS;
diff --git a/arch/sparc/kernel/sigutil_32.c b/arch/sparc/kernel/sigutil_32.c
index 0f6eebe71e6c9..e5fe8cef9a699 100644
--- a/arch/sparc/kernel/sigutil_32.c
+++ b/arch/sparc/kernel/sigutil_32.c
@@ -48,6 +48,10 @@ int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
 int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
 {
 	int err;
+
+	if (((unsigned long) fpu) & 3)
+		return -EFAULT;
+
 #ifdef CONFIG_SMP
 	if (test_tsk_thread_flag(current, TIF_USEDFPU))
 		regs->psr &= ~PSR_EF;
@@ -97,7 +101,10 @@ int restore_rwin_state(__siginfo_rwin_t __user *rp)
 	struct thread_info *t = current_thread_info();
 	int i, wsaved, err;
 
-	__get_user(wsaved, &rp->wsaved);
+	if (((unsigned long) rp) & 3)
+		return -EFAULT;
+
+	get_user(wsaved, &rp->wsaved);
 	if (wsaved > NSWINS)
 		return -EFAULT;
 
diff --git a/arch/sparc/kernel/sigutil_64.c b/arch/sparc/kernel/sigutil_64.c
index 387834a9c56a5..36aadcbeac694 100644
--- a/arch/sparc/kernel/sigutil_64.c
+++ b/arch/sparc/kernel/sigutil_64.c
@@ -37,7 +37,10 @@ int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
 	unsigned long fprs;
 	int err;
 
-	err = __get_user(fprs, &fpu->si_fprs);
+	if (((unsigned long) fpu) & 7)
+		return -EFAULT;
+
+	err = get_user(fprs, &fpu->si_fprs);
 	fprs_write(0);
 	regs->tstate &= ~TSTATE_PEF;
 	if (fprs & FPRS_DL)
@@ -72,7 +75,10 @@ int restore_rwin_state(__siginfo_rwin_t __user *rp)
 	struct thread_info *t = current_thread_info();
 	int i, wsaved, err;
 
-	__get_user(wsaved, &rp->wsaved);
+	if (((unsigned long) rp) & 7)
+		return -EFAULT;
+
+	get_user(wsaved, &rp->wsaved);
 	if (wsaved > NSWINS)
 		return -EFAULT;
 

From 7cafc0b8bf130f038b0ec2dcdd6a9de6dc59b65a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 28 May 2016 20:41:12 -0700
Subject: [PATCH 3/3] sparc64: Fix return from trap window fill crashes.

We must handle data access exception as well as memory address unaligned
exceptions from return from trap window fill faults, not just normal
TLB misses.

Otherwise we can get an OOPS that looks like this:

ld-linux.so.2(36808): Kernel bad sw trap 5 [#1]
CPU: 1 PID: 36808 Comm: ld-linux.so.2 Not tainted 4.6.0 #34
task: fff8000303be5c60 ti: fff8000301344000 task.ti: fff8000301344000
TSTATE: 0000004410001601 TPC: 0000000000a1a784 TNPC: 0000000000a1a788 Y: 00000002    Not tainted
TPC: <do_sparc64_fault+0x5c4/0x700>
g0: fff8000024fc8248 g1: 0000000000db04dc g2: 0000000000000000 g3: 0000000000000001
g4: fff8000303be5c60 g5: fff800030e672000 g6: fff8000301344000 g7: 0000000000000001
o0: 0000000000b95ee8 o1: 000000000000012b o2: 0000000000000000 o3: 0000000200b9b358
o4: 0000000000000000 o5: fff8000301344040 sp: fff80003013475c1 ret_pc: 0000000000a1a77c
RPC: <do_sparc64_fault+0x5bc/0x700>
l0: 00000000000007ff l1: 0000000000000000 l2: 000000000000005f l3: 0000000000000000
l4: fff8000301347e98 l5: fff8000024ff3060 l6: 0000000000000000 l7: 0000000000000000
i0: fff8000301347f60 i1: 0000000000102400 i2: 0000000000000000 i3: 0000000000000000
i4: 0000000000000000 i5: 0000000000000000 i6: fff80003013476a1 i7: 0000000000404d4c
I7: <user_rtt_fill_fixup+0x6c/0x7c>
Call Trace:
 [0000000000404d4c] user_rtt_fill_fixup+0x6c/0x7c

The window trap handlers are slightly clever, the trap table entries for them are
composed of two pieces of code.  First comes the code that actually performs
the window fill or spill trap handling, and then there are three instructions at
the end which are for exception processing.

The userland register window fill handler is:

	add	%sp, STACK_BIAS + 0x00, %g1;		\
	ldxa	[%g1 + %g0] ASI, %l0;			\
	mov	0x08, %g2;				\
	mov	0x10, %g3;				\
	ldxa	[%g1 + %g2] ASI, %l1;			\
	mov	0x18, %g5;				\
	ldxa	[%g1 + %g3] ASI, %l2;			\
	ldxa	[%g1 + %g5] ASI, %l3;			\
	add	%g1, 0x20, %g1;				\
	ldxa	[%g1 + %g0] ASI, %l4;			\
	ldxa	[%g1 + %g2] ASI, %l5;			\
	ldxa	[%g1 + %g3] ASI, %l6;			\
	ldxa	[%g1 + %g5] ASI, %l7;			\
	add	%g1, 0x20, %g1;				\
	ldxa	[%g1 + %g0] ASI, %i0;			\
	ldxa	[%g1 + %g2] ASI, %i1;			\
	ldxa	[%g1 + %g3] ASI, %i2;			\
	ldxa	[%g1 + %g5] ASI, %i3;			\
	add	%g1, 0x20, %g1;				\
	ldxa	[%g1 + %g0] ASI, %i4;			\
	ldxa	[%g1 + %g2] ASI, %i5;			\
	ldxa	[%g1 + %g3] ASI, %i6;			\
	ldxa	[%g1 + %g5] ASI, %i7;			\
	restored;					\
	retry; nop; nop; nop; nop;			\
	b,a,pt	%xcc, fill_fixup_dax;			\
	b,a,pt	%xcc, fill_fixup_mna;			\
	b,a,pt	%xcc, fill_fixup;

And the way this works is that if any of those memory accesses
generate an exception, the exception handler can revector to one of
those final three branch instructions depending upon which kind of
exception the memory access took.  In this way, the fault handler
doesn't have to know if it was a spill or a fill that it's handling
the fault for.  It just always branches to the last instruction in
the parent trap's handler.

For example, for a regular fault, the code goes:

winfix_trampoline:
	rdpr	%tpc, %g3
	or	%g3, 0x7c, %g3
	wrpr	%g3, %tnpc
	done

All window trap handlers are 0x80 aligned, so if we "or" 0x7c into the
trap time program counter, we'll get that final instruction in the
trap handler.

On return from trap, we have to pull the register window in but we do
this by hand instead of just executing a "restore" instruction for
several reasons.  The largest being that from Niagara and onward we
simply don't have enough levels in the trap stack to fully resolve all
possible exception cases of a window fault when we are already at
trap level 1 (which we enter to get ready to return from the original
trap).

This is executed inline via the FILL_*_RTRAP handlers.  rtrap_64.S's
code branches directly to these to do the window fill by hand if
necessary.  Now if you look at them, we'll see at the end:

	    ba,a,pt    %xcc, user_rtt_fill_fixup;
	    ba,a,pt    %xcc, user_rtt_fill_fixup;
	    ba,a,pt    %xcc, user_rtt_fill_fixup;

And oops, all three cases are handled like a fault.

This doesn't work because each of these trap types (data access
exception, memory address unaligned, and faults) store their auxiliary
info in different registers to pass on to the C handler which does the
real work.

So in the case where the stack was unaligned, the unaligned trap
handler sets up the arg registers one way, and then we branched to
the fault handler which expects them setup another way.

So the FAULT_TYPE_* value ends up basically being garbage, and
randomly would generate the backtrace seen above.

Reported-by: Nick Alcock <nix@esperi.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/head_64.h |  4 ++
 arch/sparc/include/asm/ttable.h  |  8 +--
 arch/sparc/kernel/Makefile       |  1 +
 arch/sparc/kernel/rtrap_64.S     | 57 +++----------------
 arch/sparc/kernel/urtt_fill.S    | 98 ++++++++++++++++++++++++++++++++
 5 files changed, 116 insertions(+), 52 deletions(-)
 create mode 100644 arch/sparc/kernel/urtt_fill.S

diff --git a/arch/sparc/include/asm/head_64.h b/arch/sparc/include/asm/head_64.h
index 10e9dabc4c413..f0700cfeedd7b 100644
--- a/arch/sparc/include/asm/head_64.h
+++ b/arch/sparc/include/asm/head_64.h
@@ -15,6 +15,10 @@
 
 #define	PTREGS_OFF	(STACK_BIAS + STACKFRAME_SZ)
 
+#define	RTRAP_PSTATE		(PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_IE)
+#define	RTRAP_PSTATE_IRQOFF	(PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV)
+#define RTRAP_PSTATE_AG_IRQOFF	(PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_AG)
+
 #define __CHEETAH_ID	0x003e0014
 #define __JALAPENO_ID	0x003e0016
 #define __SERRANO_ID	0x003e0022
diff --git a/arch/sparc/include/asm/ttable.h b/arch/sparc/include/asm/ttable.h
index 71b5a67522abb..781b9f1dbdc2d 100644
--- a/arch/sparc/include/asm/ttable.h
+++ b/arch/sparc/include/asm/ttable.h
@@ -589,8 +589,8 @@ user_rtt_fill_64bit:					\
 	 restored;					\
 	nop; nop; nop; nop; nop; nop;			\
 	nop; nop; nop; nop; nop;			\
-	ba,a,pt	%xcc, user_rtt_fill_fixup;		\
-	ba,a,pt	%xcc, user_rtt_fill_fixup;		\
+	ba,a,pt	%xcc, user_rtt_fill_fixup_dax;		\
+	ba,a,pt	%xcc, user_rtt_fill_fixup_mna;		\
 	ba,a,pt	%xcc, user_rtt_fill_fixup;
 
 
@@ -652,8 +652,8 @@ user_rtt_fill_32bit:					\
 	 restored;					\
 	nop; nop; nop; nop; nop;			\
 	nop; nop; nop;					\
-	ba,a,pt	%xcc, user_rtt_fill_fixup;		\
-	ba,a,pt	%xcc, user_rtt_fill_fixup;		\
+	ba,a,pt	%xcc, user_rtt_fill_fixup_dax;		\
+	ba,a,pt	%xcc, user_rtt_fill_fixup_mna;		\
 	ba,a,pt	%xcc, user_rtt_fill_fixup;
 
 
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index 7cf9c6ea3f1f2..fdb13327fded3 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -21,6 +21,7 @@ CFLAGS_REMOVE_perf_event.o := -pg
 CFLAGS_REMOVE_pcr.o := -pg
 endif
 
+obj-$(CONFIG_SPARC64)   += urtt_fill.o
 obj-$(CONFIG_SPARC32)   += entry.o wof.o wuf.o
 obj-$(CONFIG_SPARC32)   += etrap_32.o
 obj-$(CONFIG_SPARC32)   += rtrap_32.o
diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S
index d08bdaffdbfcc..216948ca43829 100644
--- a/arch/sparc/kernel/rtrap_64.S
+++ b/arch/sparc/kernel/rtrap_64.S
@@ -14,10 +14,6 @@
 #include <asm/visasm.h>
 #include <asm/processor.h>
 
-#define		RTRAP_PSTATE		(PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_IE)
-#define		RTRAP_PSTATE_IRQOFF	(PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV)
-#define		RTRAP_PSTATE_AG_IRQOFF	(PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_AG)
-
 #ifdef CONFIG_CONTEXT_TRACKING
 # define SCHEDULE_USER schedule_user
 #else
@@ -242,52 +238,17 @@ rt_continue:	ldx			[%sp + PTREGS_OFF + PT_V9_G1], %g1
 		 wrpr			%g1, %cwp
 		ba,a,pt			%xcc, user_rtt_fill_64bit
 
-user_rtt_fill_fixup:
-		rdpr	%cwp, %g1
-		add	%g1, 1, %g1
-		wrpr	%g1, 0x0, %cwp
-
-		rdpr	%wstate, %g2
-		sll	%g2, 3, %g2
-		wrpr	%g2, 0x0, %wstate
-
-		/* We know %canrestore and %otherwin are both zero.  */
-
-		sethi	%hi(sparc64_kern_pri_context), %g2
-		ldx	[%g2 + %lo(sparc64_kern_pri_context)], %g2
-		mov	PRIMARY_CONTEXT, %g1
-
-661:		stxa	%g2, [%g1] ASI_DMMU
-		.section .sun4v_1insn_patch, "ax"
-		.word	661b
-		stxa	%g2, [%g1] ASI_MMU
-		.previous
-
-		sethi	%hi(KERNBASE), %g1
-		flush	%g1
+user_rtt_fill_fixup_dax:
+		ba,pt	%xcc, user_rtt_fill_fixup_common
+		 mov	1, %g3
 
-		or	%g4, FAULT_CODE_WINFIXUP, %g4
-		stb	%g4, [%g6 + TI_FAULT_CODE]
-		stx	%g5, [%g6 + TI_FAULT_ADDR]
+user_rtt_fill_fixup_mna:
+		ba,pt	%xcc, user_rtt_fill_fixup_common
+		 mov	2, %g3
 
-		mov	%g6, %l1
-		wrpr	%g0, 0x0, %tl
-
-661:		nop
-		.section		.sun4v_1insn_patch, "ax"
-		.word			661b
-		SET_GL(0)
-		.previous
-
-		wrpr	%g0, RTRAP_PSTATE, %pstate
-
-		mov	%l1, %g6
-		ldx	[%g6 + TI_TASK], %g4
-		LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3)
-		call	do_sparc64_fault
-		 add	%sp, PTREGS_OFF, %o0
-		ba,pt	%xcc, rtrap
-		 nop
+user_rtt_fill_fixup:
+		ba,pt	%xcc, user_rtt_fill_fixup_common
+		 clr	%g3
 
 user_rtt_pre_restore:
 		add			%g1, 1, %g1
diff --git a/arch/sparc/kernel/urtt_fill.S b/arch/sparc/kernel/urtt_fill.S
new file mode 100644
index 0000000000000..5604a2b051d46
--- /dev/null
+++ b/arch/sparc/kernel/urtt_fill.S
@@ -0,0 +1,98 @@
+#include <asm/thread_info.h>
+#include <asm/trap_block.h>
+#include <asm/spitfire.h>
+#include <asm/ptrace.h>
+#include <asm/head.h>
+
+		.text
+		.align	8
+		.globl	user_rtt_fill_fixup_common
+user_rtt_fill_fixup_common:
+		rdpr	%cwp, %g1
+		add	%g1, 1, %g1
+		wrpr	%g1, 0x0, %cwp
+
+		rdpr	%wstate, %g2
+		sll	%g2, 3, %g2
+		wrpr	%g2, 0x0, %wstate
+
+		/* We know %canrestore and %otherwin are both zero.  */
+
+		sethi	%hi(sparc64_kern_pri_context), %g2
+		ldx	[%g2 + %lo(sparc64_kern_pri_context)], %g2
+		mov	PRIMARY_CONTEXT, %g1
+
+661:		stxa	%g2, [%g1] ASI_DMMU
+		.section .sun4v_1insn_patch, "ax"
+		.word	661b
+		stxa	%g2, [%g1] ASI_MMU
+		.previous
+
+		sethi	%hi(KERNBASE), %g1
+		flush	%g1
+
+		mov	%g4, %l4
+		mov	%g5, %l5
+		brnz,pn	%g3, 1f
+		 mov	%g3, %l3
+
+		or	%g4, FAULT_CODE_WINFIXUP, %g4
+		stb	%g4, [%g6 + TI_FAULT_CODE]
+		stx	%g5, [%g6 + TI_FAULT_ADDR]
+1:
+		mov	%g6, %l1
+		wrpr	%g0, 0x0, %tl
+
+661:		nop
+		.section		.sun4v_1insn_patch, "ax"
+		.word			661b
+		SET_GL(0)
+		.previous
+
+		wrpr	%g0, RTRAP_PSTATE, %pstate
+
+		mov	%l1, %g6
+		ldx	[%g6 + TI_TASK], %g4
+		LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3)
+
+		brnz,pn	%l3, 1f
+		 nop
+
+		call	do_sparc64_fault
+		 add	%sp, PTREGS_OFF, %o0
+		ba,pt	%xcc, rtrap
+		 nop
+
+1:		cmp	%g3, 2
+		bne,pn	%xcc, 2f
+		 nop
+
+		sethi	%hi(tlb_type), %g1
+		lduw	[%g1 + %lo(tlb_type)], %g1
+		cmp	%g1, 3
+		bne,pt	%icc, 1f
+		 add	%sp, PTREGS_OFF, %o0
+		mov	%l4, %o2
+		call	sun4v_do_mna
+		 mov	%l5, %o1
+		ba,a,pt	%xcc, rtrap
+1:		mov	%l4, %o1
+		mov	%l5, %o2
+		call	mem_address_unaligned
+		 nop
+		ba,a,pt	%xcc, rtrap
+
+2:		sethi	%hi(tlb_type), %g1
+		mov	%l4, %o1
+		lduw	[%g1 + %lo(tlb_type)], %g1
+		mov	%l5, %o2
+		cmp	%g1, 3
+		bne,pt	%icc, 1f
+		 add	%sp, PTREGS_OFF, %o0
+		call	sun4v_data_access_exception
+		 nop
+		ba,a,pt	%xcc, rtrap
+
+1:		call	spitfire_data_access_exception
+		 nop
+		ba,a,pt	%xcc, rtrap