Skip to content

Commit

Permalink
[SPARC64]: Revamp Spitfire error trap handling.
Browse files Browse the repository at this point in the history
Current uncorrectable error handling was poor enough
that the processor could just loop taking the same
trap over and over again.  Fix things up so that we
at least get a log message and perhaps even some register
state.

In the process, much consolidation became possible,
particularly with the correctable error handler.

Prefix assembler and C function names with "spitfire"
to indicate that these are for Ultra-I/II/IIi/IIe only.

More work is needed to make these routines robust and
featureful to the level of the Ultra-III error handlers.

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Aug 29, 2005
1 parent bde4e4e commit 6c52a96
Show file tree
Hide file tree
Showing 6 changed files with 418 additions and 245 deletions.
266 changes: 163 additions & 103 deletions arch/sparc64/kernel/entry.S
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <asm/visasm.h>
#include <asm/estate.h>
#include <asm/auxio.h>
#include <asm/sfafsr.h>

#define curptr g6

Expand Down Expand Up @@ -690,9 +691,159 @@ netbsd_syscall:
retl
nop

.globl __do_data_access_exception
.globl __do_data_access_exception_tl1
__do_data_access_exception_tl1:
/* We need to carefully read the error status, ACK
* the errors, prevent recursive traps, and pass the
* information on to C code for logging.
*
* We pass the AFAR in as-is, and we encode the status
* information as described in asm-sparc64/sfafsr.h
*/
.globl __spitfire_access_error
__spitfire_access_error:
/* Disable ESTATE error reporting so that we do not
* take recursive traps and RED state the processor.
*/
stxa %g0, [%g0] ASI_ESTATE_ERROR_EN
membar #Sync

mov UDBE_UE, %g1
ldxa [%g0] ASI_AFSR, %g4 ! Get AFSR

/* __spitfire_cee_trap branches here with AFSR in %g4 and
* UDBE_CE in %g1. It only clears ESTATE_ERR_CE in the
* ESTATE Error Enable register.
*/
__spitfire_cee_trap_continue:
ldxa [%g0] ASI_AFAR, %g5 ! Get AFAR

rdpr %tt, %g3
and %g3, 0x1ff, %g3 ! Paranoia
sllx %g3, SFSTAT_TRAP_TYPE_SHIFT, %g3
or %g4, %g3, %g4
rdpr %tl, %g3
cmp %g3, 1
mov 1, %g3
bleu %xcc, 1f
sllx %g3, SFSTAT_TL_GT_ONE_SHIFT, %g3

or %g4, %g3, %g4

/* Read in the UDB error register state, clearing the
* sticky error bits as-needed. We only clear them if
* the UE bit is set. Likewise, __spitfire_cee_trap
* below will only do so if the CE bit is set.
*
* NOTE: UltraSparc-I/II have high and low UDB error
* registers, corresponding to the two UDB units
* present on those chips. UltraSparc-IIi only
* has a single UDB, called "SDB" in the manual.
* For IIi the upper UDB register always reads
* as zero so for our purposes things will just
* work with the checks below.
*/
1: ldxa [%g0] ASI_UDBH_ERROR_R, %g3
and %g3, 0x3ff, %g7 ! Paranoia
sllx %g7, SFSTAT_UDBH_SHIFT, %g7
or %g4, %g7, %g4
andcc %g3, %g1, %g3 ! UDBE_UE or UDBE_CE
be,pn %xcc, 1f
nop
stxa %g3, [%g0] ASI_UDB_ERROR_W
membar #Sync

1: mov 0x18, %g3
ldxa [%g3] ASI_UDBL_ERROR_R, %g3
and %g3, 0x3ff, %g7 ! Paranoia
sllx %g7, SFSTAT_UDBL_SHIFT, %g7
or %g4, %g7, %g4
andcc %g3, %g1, %g3 ! UDBE_UE or UDBE_CE
be,pn %xcc, 1f
nop
mov 0x18, %g7
stxa %g3, [%g7] ASI_UDB_ERROR_W
membar #Sync

1: /* Ok, now that we've latched the error state,
* clear the sticky bits in the AFSR.
*/
stxa %g4, [%g0] ASI_AFSR
membar #Sync

rdpr %tl, %g2
cmp %g2, 1
rdpr %pil, %g2
bleu,pt %xcc, 1f
wrpr %g0, 15, %pil

ba,pt %xcc, etraptl1
rd %pc, %g7

ba,pt %xcc, 2f
nop

1: ba,pt %xcc, etrap_irq
rd %pc, %g7

2: mov %l4, %o1
mov %l5, %o2
call spitfire_access_error
add %sp, PTREGS_OFF, %o0
ba,pt %xcc, rtrap
clr %l6

/* This is the trap handler entry point for ECC correctable
* errors. They are corrected, but we listen for the trap
* so that the event can be logged.
*
* Disrupting errors are either:
* 1) single-bit ECC errors during UDB reads to system
* memory
* 2) data parity errors during write-back events
*
* As far as I can make out from the manual, the CEE trap
* is only for correctable errors during memory read
* accesses by the front-end of the processor.
*
* The code below is only for trap level 1 CEE events,
* as it is the only situation where we can safely record
* and log. For trap level >1 we just clear the CE bit
* in the AFSR and return.
*
* This is just like __spiftire_access_error above, but it
* specifically handles correctable errors. If an
* uncorrectable error is indicated in the AFSR we
* will branch directly above to __spitfire_access_error
* to handle it instead. Uncorrectable therefore takes
* priority over correctable, and the error logging
* C code will notice this case by inspecting the
* trap type.
*/
.globl __spitfire_cee_trap
__spitfire_cee_trap:
ldxa [%g0] ASI_AFSR, %g4 ! Get AFSR
mov 1, %g3
sllx %g3, SFAFSR_UE_SHIFT, %g3
andcc %g4, %g3, %g0 ! Check for UE
bne,pn %xcc, __spitfire_access_error
nop

/* Ok, in this case we only have a correctable error.
* Indicate we only wish to capture that state in register
* %g1, and we only disable CE error reporting unlike UE
* handling which disables all errors.
*/
ldxa [%g0] ASI_ESTATE_ERROR_EN, %g3
andn %g3, ESTATE_ERR_CE, %g3
stxa %g3, [%g0] ASI_ESTATE_ERROR_EN
membar #Sync

/* Preserve AFSR in %g4, indicate UDB state to capture in %g1 */
ba,pt %xcc, __spitfire_cee_trap_continue
mov UDBE_CE, %g1

.globl __spitfire_data_access_exception
.globl __spitfire_data_access_exception_tl1
__spitfire_data_access_exception_tl1:
rdpr %pstate, %g4
wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate
mov TLB_SFSR, %g3
Expand All @@ -714,12 +865,12 @@ __do_data_access_exception_tl1:
109: or %g7, %lo(109b), %g7
mov %l4, %o1
mov %l5, %o2
call data_access_exception_tl1
call spitfire_data_access_exception_tl1
add %sp, PTREGS_OFF, %o0
ba,pt %xcc, rtrap
clr %l6

__do_data_access_exception:
__spitfire_data_access_exception:
rdpr %pstate, %g4
wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate
mov TLB_SFSR, %g3
Expand All @@ -733,14 +884,14 @@ __do_data_access_exception:
109: or %g7, %lo(109b), %g7
mov %l4, %o1
mov %l5, %o2
call data_access_exception
call spitfire_data_access_exception
add %sp, PTREGS_OFF, %o0
ba,pt %xcc, rtrap
clr %l6

.globl __do_instruction_access_exception
.globl __do_instruction_access_exception_tl1
__do_instruction_access_exception_tl1:
.globl __spitfire_insn_access_exception
.globl __spitfire_insn_access_exception_tl1
__spitfire_insn_access_exception_tl1:
rdpr %pstate, %g4
wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate
mov TLB_SFSR, %g3
Expand All @@ -753,12 +904,12 @@ __do_instruction_access_exception_tl1:
109: or %g7, %lo(109b), %g7
mov %l4, %o1
mov %l5, %o2
call instruction_access_exception_tl1
call spitfire_insn_access_exception_tl1
add %sp, PTREGS_OFF, %o0
ba,pt %xcc, rtrap
clr %l6

__do_instruction_access_exception:
__spitfire_insn_access_exception:
rdpr %pstate, %g4
wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate
mov TLB_SFSR, %g3
Expand All @@ -771,102 +922,11 @@ __do_instruction_access_exception:
109: or %g7, %lo(109b), %g7
mov %l4, %o1
mov %l5, %o2
call instruction_access_exception
call spitfire_insn_access_exception
add %sp, PTREGS_OFF, %o0
ba,pt %xcc, rtrap
clr %l6

/* This is the trap handler entry point for ECC correctable
* errors. They are corrected, but we listen for the trap
* so that the event can be logged.
*
* Disrupting errors are either:
* 1) single-bit ECC errors during UDB reads to system
* memory
* 2) data parity errors during write-back events
*
* As far as I can make out from the manual, the CEE trap
* is only for correctable errors during memory read
* accesses by the front-end of the processor.
*
* The code below is only for trap level 1 CEE events,
* as it is the only situation where we can safely record
* and log. For trap level >1 we just clear the CE bit
* in the AFSR and return.
*/

/* Our trap handling infrastructure allows us to preserve
* two 64-bit values during etrap for arguments to
* subsequent C code. Therefore we encode the information
* as follows:
*
* value 1) Full 64-bits of AFAR
* value 2) Low 33-bits of AFSR, then bits 33-->42
* are UDBL error status and bits 43-->52
* are UDBH error status
*/
.align 64
.globl cee_trap
cee_trap:
ldxa [%g0] ASI_AFSR, %g1 ! Read AFSR
ldxa [%g0] ASI_AFAR, %g2 ! Read AFAR
sllx %g1, 31, %g1 ! Clear reserved bits
srlx %g1, 31, %g1 ! in AFSR

/* NOTE: UltraSparc-I/II have high and low UDB error
* registers, corresponding to the two UDB units
* present on those chips. UltraSparc-IIi only
* has a single UDB, called "SDB" in the manual.
* For IIi the upper UDB register always reads
* as zero so for our purposes things will just
* work with the checks below.
*/
ldxa [%g0] ASI_UDBL_ERROR_R, %g3 ! Read UDB-Low error status
andcc %g3, (1 << 8), %g4 ! Check CE bit
sllx %g3, (64 - 10), %g3 ! Clear reserved bits
srlx %g3, (64 - 10), %g3 ! in UDB-Low error status

sllx %g3, (33 + 0), %g3 ! Shift up to encoding area
or %g1, %g3, %g1 ! Or it in
be,pn %xcc, 1f ! Branch if CE bit was clear
nop
stxa %g4, [%g0] ASI_UDB_ERROR_W ! Clear CE sticky bit in UDBL
membar #Sync ! Synchronize ASI stores
1: mov 0x18, %g5 ! Addr of UDB-High error status
ldxa [%g5] ASI_UDBH_ERROR_R, %g3 ! Read it

andcc %g3, (1 << 8), %g4 ! Check CE bit
sllx %g3, (64 - 10), %g3 ! Clear reserved bits
srlx %g3, (64 - 10), %g3 ! in UDB-High error status
sllx %g3, (33 + 10), %g3 ! Shift up to encoding area
or %g1, %g3, %g1 ! Or it in
be,pn %xcc, 1f ! Branch if CE bit was clear
nop
nop

stxa %g4, [%g5] ASI_UDB_ERROR_W ! Clear CE sticky bit in UDBH
membar #Sync ! Synchronize ASI stores
1: mov 1, %g5 ! AFSR CE bit is
sllx %g5, 20, %g5 ! bit 20
stxa %g5, [%g0] ASI_AFSR ! Clear CE sticky bit in AFSR
membar #Sync ! Synchronize ASI stores
sllx %g2, (64 - 41), %g2 ! Clear reserved bits
srlx %g2, (64 - 41), %g2 ! in latched AFAR

andn %g2, 0x0f, %g2 ! Finish resv bit clearing
mov %g1, %g4 ! Move AFSR+UDB* into save reg
mov %g2, %g5 ! Move AFAR into save reg
rdpr %pil, %g2
wrpr %g0, 15, %pil
ba,pt %xcc, etrap_irq
rd %pc, %g7
mov %l4, %o0

mov %l5, %o1
call cee_log
add %sp, PTREGS_OFF, %o2
ba,a,pt %xcc, rtrap_irq

/* Capture I/D/E-cache state into per-cpu error scoreboard.
*
* %g1: (TL>=0) ? 1 : 0
Expand Down
Loading

0 comments on commit 6c52a96

Please sign in to comment.