Skip to content

Commit

Permalink
[SPARC64]: Elminate all usage of hard-coded trap globals.
Browse files Browse the repository at this point in the history
UltraSPARC has special sets of global registers which are switched to
for certain trap types.  There is one set for MMU related traps, one
set of Interrupt Vector processing, and another set (called the
Alternate globals) for all other trap types.

For what seems like forever we've hard coded the values in some of
these trap registers.  Some examples include:

1) Interrupt Vector global %g6 holds current processors interrupt
   work struct where received interrupts are managed for IRQ handler
   dispatch.

2) MMU global %g7 holds the base of the page tables of the currently
   active address space.

3) Alternate global %g6 held the current_thread_info() value.

Such hardcoding has resulted in some serious issues in many areas.
There are some code sequences where having another register available
would help clean up the implementation.  Taking traps such as
cross-calls from the OBP firmware requires some trick code sequences
wherein we have to save away and restore all of the special sets of
global registers when we enter/exit OBP.

We were also using the IMMU TSB register on SMP to hold the per-cpu
area base address, which doesn't work any longer now that we actually
use the TSB facility of the cpu.

The implementation is pretty straight forward.  One tricky bit is
getting the current processor ID as that is different on different cpu
variants.  We use a stub with a fancy calling convention which we
patch at boot time.  The calling convention is that the stub is
branched to and the (PC - 4) to return to is in register %g1.  The cpu
number is left in %g6.  This stub can be invoked by using the
__GET_CPUID macro.

We use an array of per-cpu trap state to store the current thread and
physical address of the current address space's page tables.  The
TRAP_LOAD_THREAD_REG loads %g6 with the current thread from this
table, it uses __GET_CPUID and also clobbers %g1.

TRAP_LOAD_IRQ_WORK is used by the interrupt vector processing to load
the current processor's IRQ software state into %g6.  It also uses
__GET_CPUID and clobbers %g1.

Finally, TRAP_LOAD_PGD_PHYS loads the physical address base of the
current address space's page tables into %g7, it clobbers %g1 and uses
__GET_CPUID.

Many refinements are possible, as well as some tuning, with this stuff
in place.

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller authored and David S. Miller committed Mar 20, 2006
1 parent 3c93646 commit 56fb4df
Show file tree
Hide file tree
Showing 16 changed files with 287 additions and 168 deletions.
122 changes: 105 additions & 17 deletions arch/sparc64/kernel/entry.S
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ do_fpdis:
add %g0, %g0, %g0
ba,a,pt %xcc, rtrap_clr_l6

1: ldub [%g6 + TI_FPSAVED], %g5
1: TRAP_LOAD_THREAD_REG
ldub [%g6 + TI_FPSAVED], %g5
wr %g0, FPRS_FEF, %fprs
andcc %g5, FPRS_FEF, %g0
be,a,pt %icc, 1f
Expand Down Expand Up @@ -189,6 +190,7 @@ fp_other_bounce:
.globl do_fpother_check_fitos
.align 32
do_fpother_check_fitos:
TRAP_LOAD_THREAD_REG
sethi %hi(fp_other_bounce - 4), %g7
or %g7, %lo(fp_other_bounce - 4), %g7

Expand Down Expand Up @@ -353,8 +355,6 @@ do_fptrap_after_fsr:
*
* With this method we can do most of the cross-call tlb/cache
* flushing very quickly.
*
* Current CPU's IRQ worklist table is locked into %g6, don't touch.
*/
.text
.align 32
Expand All @@ -378,6 +378,8 @@ do_ivec:
sllx %g2, %g4, %g2
sllx %g4, 2, %g4

TRAP_LOAD_IRQ_WORK

lduw [%g6 + %g4], %g5 /* g5 = irq_work(cpu, pil) */
stw %g5, [%g3 + 0x00] /* bucket->irq_chain = g5 */
stw %g3, [%g6 + %g4] /* irq_work(cpu, pil) = bucket */
Expand Down Expand Up @@ -488,9 +490,24 @@ setcc:
retl
stx %o1, [%o0 + PT_V9_TSTATE]

.globl utrap, utrap_ill
utrap: brz,pn %g1, etrap
.globl utrap_trap
utrap_trap: /* %g3=handler,%g4=level */
TRAP_LOAD_THREAD_REG
ldx [%g6 + TI_UTRAPS], %g1
brnz,pt %g1, invoke_utrap
nop

ba,pt %xcc, etrap
rd %pc, %g7
mov %l4, %o1
call bad_trap
add %sp, PTREGS_OFF, %o0
ba,pt %xcc, rtrap
clr %l6

invoke_utrap:
sllx %g3, 3, %g3
ldx [%g1 + %g3], %g1
save %sp, -128, %sp
rdpr %tstate, %l6
rdpr %cwp, %l7
Expand All @@ -500,17 +517,6 @@ utrap: brz,pn %g1, etrap
rdpr %tnpc, %l7
wrpr %g1, 0, %tnpc
done
utrap_ill:
call bad_trap
add %sp, PTREGS_OFF, %o0
ba,pt %xcc, rtrap
clr %l6

/* XXX Here is stuff we still need to write... -DaveM XXX */
.globl netbsd_syscall
netbsd_syscall:
retl
nop

/* We need to carefully read the error status, ACK
* the errors, prevent recursive traps, and pass the
Expand Down Expand Up @@ -1001,7 +1007,7 @@ dcpe_icpe_tl1_common:
* %g3: scratch
* %g4: AFSR
* %g5: AFAR
* %g6: current thread ptr
* %g6: unused, will have current thread ptr after etrap
* %g7: scratch
*/
__cheetah_log_error:
Expand Down Expand Up @@ -1690,3 +1696,85 @@ __flushw_user:
restore %g0, %g0, %g0
2: retl
nop

/* Read cpu ID from hardware, return in %g6.
* (callers_pc - 4) is in %g1. Patched at boot time.
*
* Default is spitfire implementation.
*
* The instruction sequence needs to be 5 instructions
* in order to fit the longest implementation, which is
* currently starfire.
*/
.align 32
.globl __get_cpu_id
__get_cpu_id:
ldxa [%g0] ASI_UPA_CONFIG, %g6
srlx %g6, 17, %g6
jmpl %g1 + 0x4, %g0
and %g6, 0x1f, %g6
nop

__get_cpu_id_cheetah_safari:
ldxa [%g0] ASI_SAFARI_CONFIG, %g6
srlx %g6, 17, %g6
jmpl %g1 + 0x4, %g0
and %g6, 0x3ff, %g6
nop

__get_cpu_id_cheetah_jbus:
ldxa [%g0] ASI_JBUS_CONFIG, %g6
srlx %g6, 17, %g6
jmpl %g1 + 0x4, %g0
and %g6, 0x1f, %g6
nop

__get_cpu_id_starfire:
sethi %hi(0x1fff40000d0 >> 9), %g6
sllx %g6, 9, %g6
or %g6, 0xd0, %g6
jmpl %g1 + 0x4, %g0
lduwa [%g6] ASI_PHYS_BYPASS_EC_E, %g6

.globl per_cpu_patch
per_cpu_patch:
sethi %hi(this_is_starfire), %o0
lduw [%o0 + %lo(this_is_starfire)], %o1
sethi %hi(__get_cpu_id_starfire), %o0
brnz,pn %o1, 10f
or %o0, %lo(__get_cpu_id_starfire), %o0
sethi %hi(tlb_type), %o0
lduw [%o0 + %lo(tlb_type)], %o1
brz,pt %o1, 11f
nop
rdpr %ver, %o0
srlx %o0, 32, %o0
sethi %hi(0x003e0016), %o1
or %o1, %lo(0x003e0016), %o1
cmp %o0, %o1
sethi %hi(__get_cpu_id_cheetah_jbus), %o0
be,pn %icc, 10f
or %o0, %lo(__get_cpu_id_cheetah_jbus), %o0
sethi %hi(__get_cpu_id_cheetah_safari), %o0
or %o0, %lo(__get_cpu_id_cheetah_safari), %o0
10:
sethi %hi(__get_cpu_id), %o1
or %o1, %lo(__get_cpu_id), %o1
lduw [%o0 + 0x00], %o2
stw %o2, [%o1 + 0x00]
flush %o1 + 0x00
lduw [%o0 + 0x04], %o2
stw %o2, [%o1 + 0x04]
flush %o1 + 0x04
lduw [%o0 + 0x08], %o2
stw %o2, [%o1 + 0x08]
flush %o1 + 0x08
lduw [%o0 + 0x0c], %o2
stw %o2, [%o1 + 0x0c]
flush %o1 + 0x0c
lduw [%o0 + 0x10], %o2
stw %o2, [%o1 + 0x10]
flush %o1 + 0x10
11:
retl
nop
18 changes: 7 additions & 11 deletions arch/sparc64/kernel/etrap.S
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
.globl etrap, etrap_irq, etraptl1
etrap: rdpr %pil, %g2
etrap_irq:
TRAP_LOAD_THREAD_REG
rdpr %tstate, %g1
sllx %g2, 20, %g3
andcc %g1, TSTATE_PRIV, %g0
Expand Down Expand Up @@ -98,11 +99,7 @@ etrap_irq:
stx %i7, [%sp + PTREGS_OFF + PT_V9_I7]
wrpr %g0, ETRAP_PSTATE2, %pstate
mov %l6, %g6
#ifdef CONFIG_SMP
#error IMMU TSB usage must be fixed
mov TSB_REG, %g3
ldxa [%g3] ASI_IMMU, %g5
#endif
LOAD_PER_CPU_BASE(%g4, %g3)
jmpl %l2 + 0x4, %g0
ldx [%g6 + TI_TASK], %g4

Expand All @@ -126,6 +123,7 @@ etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself.
* 0x58 TL4's TT
* 0x60 TL
*/
TRAP_LOAD_THREAD_REG
sub %sp, ((4 * 8) * 4) + 8, %g2
rdpr %tl, %g1

Expand Down Expand Up @@ -179,7 +177,9 @@ etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself.

.align 64
.globl scetrap
scetrap: rdpr %pil, %g2
scetrap:
TRAP_LOAD_THREAD_REG
rdpr %pil, %g2
rdpr %tstate, %g1
sllx %g2, 20, %g3
andcc %g1, TSTATE_PRIV, %g0
Expand Down Expand Up @@ -248,11 +248,7 @@ scetrap: rdpr %pil, %g2
stx %i6, [%sp + PTREGS_OFF + PT_V9_I6]
mov %l6, %g6
stx %i7, [%sp + PTREGS_OFF + PT_V9_I7]
#ifdef CONFIG_SMP
#error IMMU TSB usage must be fixed
mov TSB_REG, %g3
ldxa [%g3] ASI_IMMU, %g5
#endif
LOAD_PER_CPU_BASE(%g4, %g3)
ldx [%g6 + TI_TASK], %g4
done

Expand Down
20 changes: 1 addition & 19 deletions arch/sparc64/kernel/head.S
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <asm/head.h>
#include <asm/ttable.h>
#include <asm/mmu.h>
#include <asm/cpudata.h>

/* This section from from _start to sparc64_boot_end should fit into
* 0x0000000000404000 to 0x0000000000408000.
Expand Down Expand Up @@ -421,24 +422,6 @@ setup_trap_table:
stxa %g2, [%g1] ASI_DMMU
membar #Sync

/* The Linux trap handlers expect various trap global registers
* to be setup with some fixed values. So here we set these
* up very carefully. These globals are:
*
* Alternate Globals (PSTATE_AG):
*
* %g6 --> current_thread_info()
*
* Interrupt Globals (PSTATE_IG, setup by init_irqwork_curcpu()):
*
* %g6 --> __irq_work[smp_processor_id()]
*/

rdpr %pstate, %o1
mov %g6, %o2
wrpr %o1, PSTATE_AG, %pstate
mov %o2, %g6

/* Kill PROM timer */
sethi %hi(0x80000000), %o2
sllx %o2, 32, %o2
Expand All @@ -457,7 +440,6 @@ setup_trap_table:

2:
wrpr %g0, %g0, %wstate
wrpr %o1, 0x0, %pstate

call init_irqwork_curcpu
nop
Expand Down
26 changes: 1 addition & 25 deletions arch/sparc64/kernel/irq.c
Original file line number Diff line number Diff line change
Expand Up @@ -848,33 +848,9 @@ static void kill_prom_timer(void)

void init_irqwork_curcpu(void)
{
register struct irq_work_struct *workp asm("o2");
register unsigned long tmp asm("o3");
int cpu = hard_smp_processor_id();

memset(__irq_work + cpu, 0, sizeof(*workp));

/* Make sure we are called with PSTATE_IE disabled. */
__asm__ __volatile__("rdpr %%pstate, %0\n\t"
: "=r" (tmp));
if (tmp & PSTATE_IE) {
prom_printf("BUG: init_irqwork_curcpu() called with "
"PSTATE_IE enabled, bailing.\n");
__asm__ __volatile__("mov %%i7, %0\n\t"
: "=r" (tmp));
prom_printf("BUG: Called from %lx\n", tmp);
prom_halt();
}

/* Set interrupt globals. */
workp = &__irq_work[cpu];
__asm__ __volatile__(
"rdpr %%pstate, %0\n\t"
"wrpr %0, %1, %%pstate\n\t"
"mov %2, %%g6\n\t"
"wrpr %0, 0x0, %%pstate\n\t"
: "=&r" (tmp)
: "i" (PSTATE_IG), "r" (workp));
memset(__irq_work + cpu, 0, sizeof(struct irq_work_struct));
}

/* Only invoked on boot processor. */
Expand Down
10 changes: 4 additions & 6 deletions arch/sparc64/kernel/rtrap.S
Original file line number Diff line number Diff line change
Expand Up @@ -223,12 +223,10 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1
ldx [%sp + PTREGS_OFF + PT_V9_G3], %g3
ldx [%sp + PTREGS_OFF + PT_V9_G4], %g4
ldx [%sp + PTREGS_OFF + PT_V9_G5], %g5
#ifdef CONFIG_SMP
#error IMMU TSB usage must be fixed
mov TSB_REG, %g6
brnz,a,pn %l3, 1f
ldxa [%g6] ASI_IMMU, %g5
#endif
brz,pt %l3, 1f
nop
/* Must do this before thread reg is clobbered below. */
LOAD_PER_CPU_BASE(%g6, %g7)
1:
ldx [%sp + PTREGS_OFF + PT_V9_G6], %g6
ldx [%sp + PTREGS_OFF + PT_V9_G7], %g7
Expand Down
8 changes: 8 additions & 0 deletions arch/sparc64/kernel/setup.c
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,11 @@ void __init setup_arch(char **cmdline_p)
/* Work out if we are starfire early on */
check_if_starfire();

/* Now we know enough to patch the __get_cpu_id()
* trampoline used by trap code.
*/
per_cpu_patch();

boot_flags_init(*cmdline_p);

idprom_init();
Expand Down Expand Up @@ -545,6 +550,9 @@ void __init setup_arch(char **cmdline_p)
smp_setup_cpu_possible_map();

paging_init();

/* Get boot processor trap_block[] setup. */
init_cur_cpu_trap();
}

static int __init set_preferred_console(void)
Expand Down
Loading

0 comments on commit 56fb4df

Please sign in to comment.