Skip to content

Commit

Permalink
xtensa: new fast_alloca handler
Browse files Browse the repository at this point in the history
Instead of emulating movsp instruction in the kernel use window
underflow handler to load missing register window and retry failed
movsp.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Chris Zankel <chris@zankel.net>
  • Loading branch information
Max Filippov authored and Chris Zankel committed Sep 6, 2013
1 parent 99d5040 commit fff96d6
Showing 1 changed file with 40 additions and 152 deletions.
192 changes: 40 additions & 152 deletions arch/xtensa/kernel/entry.S
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
/* Unimplemented features. */

#undef KERNEL_STACK_OVERFLOW_CHECK
#undef ALLOCA_EXCEPTION_IN_IRAM

/* Not well tested.
*
Expand Down Expand Up @@ -819,11 +818,27 @@ ENDPROC(unrecoverable_exception)
*
* The ALLOCA handler is entered when user code executes the MOVSP
* instruction and the caller's frame is not in the register file.
* In this case, the caller frame's a0..a3 are on the stack just
* below sp (a1), and this handler moves them.
*
* For "MOVSP <ar>,<as>" without destination register a1, this routine
* simply moves the value from <as> to <ar> without moving the save area.
* This algorithm was taken from the Ross Morley's RTOS Porting Layer:
*
* /home/ross/rtos/porting/XtensaRTOS-PortingLayer-20090507/xtensa_vectors.S
*
* It leverages the existing window spill/fill routines and their support for
* double exceptions. The 'movsp' instruction will only cause an exception if
* the next window needs to be loaded. In fact this ALLOCA exception may be
* replaced at some point by changing the hardware to do a underflow exception
* of the proper size instead.
*
* This algorithm simply backs out the register changes started by the user
* excpetion handler, makes it appear that we have started a window underflow
* by rotating the window back and then setting the old window base (OWB) in
* the 'ps' register with the rolled back window base. The 'movsp' instruction
* will be re-executed and this time since the next window frames is in the
* active AR registers it won't cause an exception.
*
* If the WindowUnderflow code gets a TLB miss the page will get mapped
* the the partial windeowUnderflow will be handeled in the double exception
* handler.
*
* Entry condition:
*
Expand All @@ -838,155 +853,28 @@ ENDPROC(unrecoverable_exception)
* < VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception
*/

#if XCHAL_HAVE_BE
#define _EXTUI_MOVSP_SRC(ar) extui ar, ar, 4, 4
#define _EXTUI_MOVSP_DST(ar) extui ar, ar, 0, 4
#else
#define _EXTUI_MOVSP_SRC(ar) extui ar, ar, 0, 4
#define _EXTUI_MOVSP_DST(ar) extui ar, ar, 4, 4
#endif

ENTRY(fast_alloca)
rsr a0, windowbase
rotw -1
rsr a2, ps
extui a3, a2, PS_OWB_SHIFT, PS_OWB_WIDTH
xor a3, a3, a4
l32i a4, a6, PT_AREG0
l32i a1, a6, PT_DEPC
rsr a6, depc
wsr a1, depc
slli a3, a3, PS_OWB_SHIFT
xor a2, a2, a3
wsr a2, ps
rsync

/* We shouldn't be in a double exception. */

l32i a0, a2, PT_DEPC
_bgeui a0, VALID_DOUBLE_EXCEPTION_ADDRESS, .Lunhandled_double

rsr a0, depc # get a2
s32i a4, a2, PT_AREG4 # save a4 and
s32i a3, a2, PT_AREG3
s32i a0, a2, PT_AREG2 # a2 to stack

/* Exit critical section. */

movi a0, 0
rsr a3, excsave1
s32i a0, a3, EXC_TABLE_FIXUP

rsr a4, epc1 # get exception address

#ifdef ALLOCA_EXCEPTION_IN_IRAM
#error iram not supported
#else
/* Note: l8ui not allowed in IRAM/IROM!! */
l8ui a0, a4, 1 # read as(src) from MOVSP instruction
#endif
movi a3, .Lmovsp_src
_EXTUI_MOVSP_SRC(a0) # extract source register number
addx8 a3, a0, a3
jx a3

.Lunhandled_double:
wsr a0, excsave1
movi a0, unrecoverable_exception
callx0 a0

.align 8
.Lmovsp_src:
l32i a3, a2, PT_AREG0; _j 1f; .align 8
mov a3, a1; _j 1f; .align 8
l32i a3, a2, PT_AREG2; _j 1f; .align 8
l32i a3, a2, PT_AREG3; _j 1f; .align 8
l32i a3, a2, PT_AREG4; _j 1f; .align 8
mov a3, a5; _j 1f; .align 8
mov a3, a6; _j 1f; .align 8
mov a3, a7; _j 1f; .align 8
mov a3, a8; _j 1f; .align 8
mov a3, a9; _j 1f; .align 8
mov a3, a10; _j 1f; .align 8
mov a3, a11; _j 1f; .align 8
mov a3, a12; _j 1f; .align 8
mov a3, a13; _j 1f; .align 8
mov a3, a14; _j 1f; .align 8
mov a3, a15; _j 1f; .align 8

1:

#ifdef ALLOCA_EXCEPTION_IN_IRAM
#error iram not supported
#else
l8ui a0, a4, 0 # read ar(dst) from MOVSP instruction
#endif
addi a4, a4, 3 # step over movsp
_EXTUI_MOVSP_DST(a0) # extract destination register
wsr a4, epc1 # save new epc_1

_bnei a0, 1, 1f # no 'movsp a1, ax': jump

/* Move the save area. This implies the use of the L32E
* and S32E instructions, because this move must be done with
* the user's PS.RING privilege levels, not with ring 0
* (kernel's) privileges currently active with PS.EXCM
* set. Note that we have stil registered a fixup routine with the
* double exception vector in case a double exception occurs.
*/

/* a0,a4:avail a1:old user stack a2:exc. stack a3:new user stack. */

l32e a0, a1, -16
l32e a4, a1, -12
s32e a0, a3, -16
s32e a4, a3, -12
l32e a0, a1, -8
l32e a4, a1, -4
s32e a0, a3, -8
s32e a4, a3, -4

/* Restore stack-pointer and all the other saved registers. */

mov a1, a3

l32i a4, a2, PT_AREG4
l32i a3, a2, PT_AREG3
l32i a0, a2, PT_AREG0
l32i a2, a2, PT_AREG2
rfe

/* MOVSP <at>,<as> was invoked with <at> != a1.
* Because the stack pointer is not being modified,
* we should be able to just modify the pointer
* without moving any save area.
* The processor only traps these occurrences if the
* caller window isn't live, so unfortunately we can't
* use this as an alternate trap mechanism.
* So we just do the move. This requires that we
* resolve the destination register, not just the source,
* so there's some extra work.
* (PERHAPS NOT REALLY NEEDED, BUT CLEANER...)
*/

/* a0 dst-reg, a1 user-stack, a2 stack, a3 value of src reg. */

1: movi a4, .Lmovsp_dst
addx8 a4, a0, a4
jx a4

.align 8
.Lmovsp_dst:
s32i a3, a2, PT_AREG0; _j 1f; .align 8
mov a1, a3; _j 1f; .align 8
s32i a3, a2, PT_AREG2; _j 1f; .align 8
s32i a3, a2, PT_AREG3; _j 1f; .align 8
s32i a3, a2, PT_AREG4; _j 1f; .align 8
mov a5, a3; _j 1f; .align 8
mov a6, a3; _j 1f; .align 8
mov a7, a3; _j 1f; .align 8
mov a8, a3; _j 1f; .align 8
mov a9, a3; _j 1f; .align 8
mov a10, a3; _j 1f; .align 8
mov a11, a3; _j 1f; .align 8
mov a12, a3; _j 1f; .align 8
mov a13, a3; _j 1f; .align 8
mov a14, a3; _j 1f; .align 8
mov a15, a3; _j 1f; .align 8

1: l32i a4, a2, PT_AREG4
l32i a3, a2, PT_AREG3
l32i a0, a2, PT_AREG0
l32i a2, a2, PT_AREG2
rfe

_bbci.l a4, 31, 4f
rotw -1
_bbci.l a8, 30, 8f
rotw -1
j _WindowUnderflow12
8: j _WindowUnderflow8
4: j _WindowUnderflow4
ENDPROC(fast_alloca)

/*
Expand Down

0 comments on commit fff96d6

Please sign in to comment.