Skip to content

Commit

Permalink
[SPARC64]: Avoid membar instructions in delay slots.
Browse files Browse the repository at this point in the history
In particular, avoid membar instructions in the delay
slot of a jmpl instruction.

UltraSPARC-I, II, IIi, and IIe have a bug, documented in
the UltraSPARC-IIi User's Manual, Appendix K, Erratum 51

The long and short of it is that if the IMU unit misses
on a branch or jmpl, and there is a store buffer synchronizing
membar in the delay slot, the chip can stop fetching instructions.

If interrupts are enabled or some other trap is enabled, the
chip will unwedge itself, but performance will suffer.

We already had a workaround for this bug in a few spots, but
it's better to have the entire tree sanitized for this rule.

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Jun 27, 2005
1 parent 020f46a commit b445e26
Show file tree
Hide file tree
Showing 15 changed files with 171 additions and 110 deletions.
6 changes: 4 additions & 2 deletions arch/sparc64/kernel/entry.S
Original file line number Diff line number Diff line change
Expand Up @@ -271,8 +271,9 @@ cplus_fptrap_insn_1:
fmuld %f0, %f2, %f26
faddd %f0, %f2, %f28
fmuld %f0, %f2, %f30
membar #Sync
b,pt %xcc, fpdis_exit
membar #Sync
nop
2: andcc %g5, FPRS_DU, %g0
bne,pt %icc, 3f
fzero %f32
Expand Down Expand Up @@ -301,8 +302,9 @@ cplus_fptrap_insn_2:
fmuld %f32, %f34, %f58
faddd %f32, %f34, %f60
fmuld %f32, %f34, %f62
membar #Sync
ba,pt %xcc, fpdis_exit
membar #Sync
nop
3: mov SECONDARY_CONTEXT, %g3
add %g6, TI_FPREGS, %g1
ldxa [%g3] ASI_DMMU, %g5
Expand Down
12 changes: 8 additions & 4 deletions arch/sparc64/kernel/semaphore.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@ static __inline__ int __sem_update_count(struct semaphore *sem, int incr)
" add %1, %4, %1\n"
" cas [%3], %0, %1\n"
" cmp %0, %1\n"
" membar #StoreLoad | #StoreStore\n"
" bne,pn %%icc, 1b\n"
" membar #StoreLoad | #StoreStore\n"
" nop\n"
: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
: "r" (&sem->count), "r" (incr), "m" (sem->count)
: "cc");
Expand Down Expand Up @@ -71,8 +72,9 @@ void up(struct semaphore *sem)
" cmp %%g1, %%g7\n"
" bne,pn %%icc, 1b\n"
" addcc %%g7, 1, %%g0\n"
" membar #StoreLoad | #StoreStore\n"
" ble,pn %%icc, 3f\n"
" membar #StoreLoad | #StoreStore\n"
" nop\n"
"2:\n"
" .subsection 2\n"
"3: mov %0, %%g1\n"
Expand Down Expand Up @@ -128,8 +130,9 @@ void __sched down(struct semaphore *sem)
" cmp %%g1, %%g7\n"
" bne,pn %%icc, 1b\n"
" cmp %%g7, 1\n"
" membar #StoreLoad | #StoreStore\n"
" bl,pn %%icc, 3f\n"
" membar #StoreLoad | #StoreStore\n"
" nop\n"
"2:\n"
" .subsection 2\n"
"3: mov %0, %%g1\n"
Expand Down Expand Up @@ -233,8 +236,9 @@ int __sched down_interruptible(struct semaphore *sem)
" cmp %%g1, %%g7\n"
" bne,pn %%icc, 1b\n"
" cmp %%g7, 1\n"
" membar #StoreLoad | #StoreStore\n"
" bl,pn %%icc, 3f\n"
" membar #StoreLoad | #StoreStore\n"
" nop\n"
"2:\n"
" .subsection 2\n"
"3: mov %2, %%g1\n"
Expand Down
3 changes: 2 additions & 1 deletion arch/sparc64/kernel/trampoline.S
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,9 @@ startup_continue:

sethi %hi(prom_entry_lock), %g2
1: ldstub [%g2 + %lo(prom_entry_lock)], %g1
membar #StoreLoad | #StoreStore
brnz,pn %g1, 1b
membar #StoreLoad | #StoreStore
nop

sethi %hi(p1275buf), %g2
or %g2, %lo(p1275buf), %g2
Expand Down
103 changes: 53 additions & 50 deletions arch/sparc64/lib/U1memcpy.S
Original file line number Diff line number Diff line change
Expand Up @@ -87,14 +87,17 @@
#define LOOP_CHUNK3(src, dest, len, branch_dest) \
MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)

#define DO_SYNC membar #Sync;
#define STORE_SYNC(dest, fsrc) \
EX_ST(STORE_BLK(%fsrc, %dest)); \
add %dest, 0x40, %dest;
add %dest, 0x40, %dest; \
DO_SYNC

#define STORE_JUMP(dest, fsrc, target) \
EX_ST(STORE_BLK(%fsrc, %dest)); \
add %dest, 0x40, %dest; \
ba,pt %xcc, target;
ba,pt %xcc, target; \
nop;

#define FINISH_VISCHUNK(dest, f0, f1, left) \
subcc %left, 8, %left;\
Expand Down Expand Up @@ -239,17 +242,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
ba,pt %xcc, 1b+4
faligndata %f0, %f2, %f48
1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
STORE_SYNC(o0, f48) membar #Sync
STORE_SYNC(o0, f48)
FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
STORE_JUMP(o0, f48, 40f) membar #Sync
STORE_JUMP(o0, f48, 40f)
2: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
STORE_SYNC(o0, f48) membar #Sync
STORE_SYNC(o0, f48)
FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
STORE_JUMP(o0, f48, 48f) membar #Sync
STORE_JUMP(o0, f48, 48f)
3: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
STORE_SYNC(o0, f48) membar #Sync
STORE_SYNC(o0, f48)
FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
STORE_JUMP(o0, f48, 56f) membar #Sync
STORE_JUMP(o0, f48, 56f)

1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
Expand All @@ -260,17 +263,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
ba,pt %xcc, 1b+4
faligndata %f2, %f4, %f48
1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
STORE_SYNC(o0, f48) membar #Sync
STORE_SYNC(o0, f48)
FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
STORE_JUMP(o0, f48, 41f) membar #Sync
STORE_JUMP(o0, f48, 41f)
2: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
STORE_SYNC(o0, f48) membar #Sync
STORE_SYNC(o0, f48)
FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
STORE_JUMP(o0, f48, 49f) membar #Sync
STORE_JUMP(o0, f48, 49f)
3: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
STORE_SYNC(o0, f48) membar #Sync
STORE_SYNC(o0, f48)
FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
STORE_JUMP(o0, f48, 57f) membar #Sync
STORE_JUMP(o0, f48, 57f)

1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
Expand All @@ -281,17 +284,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
ba,pt %xcc, 1b+4
faligndata %f4, %f6, %f48
1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
STORE_SYNC(o0, f48) membar #Sync
STORE_SYNC(o0, f48)
FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
STORE_JUMP(o0, f48, 42f) membar #Sync
STORE_JUMP(o0, f48, 42f)
2: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
STORE_SYNC(o0, f48) membar #Sync
STORE_SYNC(o0, f48)
FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
STORE_JUMP(o0, f48, 50f) membar #Sync
STORE_JUMP(o0, f48, 50f)
3: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
STORE_SYNC(o0, f48) membar #Sync
STORE_SYNC(o0, f48)
FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
STORE_JUMP(o0, f48, 58f) membar #Sync
STORE_JUMP(o0, f48, 58f)

1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
Expand All @@ -302,17 +305,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
ba,pt %xcc, 1b+4
faligndata %f6, %f8, %f48
1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
STORE_SYNC(o0, f48) membar #Sync
STORE_SYNC(o0, f48)
FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
STORE_JUMP(o0, f48, 43f) membar #Sync
STORE_JUMP(o0, f48, 43f)
2: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
STORE_SYNC(o0, f48) membar #Sync
STORE_SYNC(o0, f48)
FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
STORE_JUMP(o0, f48, 51f) membar #Sync
STORE_JUMP(o0, f48, 51f)
3: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
STORE_SYNC(o0, f48) membar #Sync
STORE_SYNC(o0, f48)
FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
STORE_JUMP(o0, f48, 59f) membar #Sync
STORE_JUMP(o0, f48, 59f)

1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
Expand All @@ -323,17 +326,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
ba,pt %xcc, 1b+4
faligndata %f8, %f10, %f48
1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
STORE_SYNC(o0, f48) membar #Sync
STORE_SYNC(o0, f48)
FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
STORE_JUMP(o0, f48, 44f) membar #Sync
STORE_JUMP(o0, f48, 44f)
2: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
STORE_SYNC(o0, f48) membar #Sync
STORE_SYNC(o0, f48)
FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
STORE_JUMP(o0, f48, 52f) membar #Sync
STORE_JUMP(o0, f48, 52f)
3: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
STORE_SYNC(o0, f48) membar #Sync
STORE_SYNC(o0, f48)
FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
STORE_JUMP(o0, f48, 60f) membar #Sync
STORE_JUMP(o0, f48, 60f)

1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
Expand All @@ -344,17 +347,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
ba,pt %xcc, 1b+4
faligndata %f10, %f12, %f48
1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
STORE_SYNC(o0, f48) membar #Sync
STORE_SYNC(o0, f48)
FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
STORE_JUMP(o0, f48, 45f) membar #Sync
STORE_JUMP(o0, f48, 45f)
2: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
STORE_SYNC(o0, f48) membar #Sync
STORE_SYNC(o0, f48)
FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
STORE_JUMP(o0, f48, 53f) membar #Sync
STORE_JUMP(o0, f48, 53f)
3: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
STORE_SYNC(o0, f48) membar #Sync
STORE_SYNC(o0, f48)
FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
STORE_JUMP(o0, f48, 61f) membar #Sync
STORE_JUMP(o0, f48, 61f)

1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
Expand All @@ -365,17 +368,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
ba,pt %xcc, 1b+4
faligndata %f12, %f14, %f48
1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
STORE_SYNC(o0, f48) membar #Sync
STORE_SYNC(o0, f48)
FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
STORE_JUMP(o0, f48, 46f) membar #Sync
STORE_JUMP(o0, f48, 46f)
2: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
STORE_SYNC(o0, f48) membar #Sync
STORE_SYNC(o0, f48)
FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
STORE_JUMP(o0, f48, 54f) membar #Sync
STORE_JUMP(o0, f48, 54f)
3: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
STORE_SYNC(o0, f48) membar #Sync
STORE_SYNC(o0, f48)
FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
STORE_JUMP(o0, f48, 62f) membar #Sync
STORE_JUMP(o0, f48, 62f)

1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
Expand All @@ -386,17 +389,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
ba,pt %xcc, 1b+4
faligndata %f14, %f16, %f48
1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
STORE_SYNC(o0, f48) membar #Sync
STORE_SYNC(o0, f48)
FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
STORE_JUMP(o0, f48, 47f) membar #Sync
STORE_JUMP(o0, f48, 47f)
2: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
STORE_SYNC(o0, f48) membar #Sync
STORE_SYNC(o0, f48)
FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
STORE_JUMP(o0, f48, 55f) membar #Sync
STORE_JUMP(o0, f48, 55f)
3: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
STORE_SYNC(o0, f48) membar #Sync
STORE_SYNC(o0, f48)
FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
STORE_JUMP(o0, f48, 63f) membar #Sync
STORE_JUMP(o0, f48, 63f)

40: FINISH_VISCHUNK(o0, f0, f2, g3)
41: FINISH_VISCHUNK(o0, f2, f4, g3)
Expand Down
15 changes: 13 additions & 2 deletions arch/sparc64/lib/VISsave.S
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,11 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3

stda %f48, [%g3 + %g1] ASI_BLK_P
5: membar #Sync
jmpl %g7 + %g0, %g0
ba,pt %xcc, 80f
nop

.align 32
80: jmpl %g7 + %g0, %g0
nop

6: ldub [%g3 + TI_FPSAVED], %o5
Expand All @@ -87,8 +91,11 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3
stda %f32, [%g2 + %g1] ASI_BLK_P
stda %f48, [%g3 + %g1] ASI_BLK_P
membar #Sync
jmpl %g7 + %g0, %g0
ba,pt %xcc, 80f
nop

.align 32
80: jmpl %g7 + %g0, %g0
nop

.align 32
Expand Down Expand Up @@ -126,6 +133,10 @@ VISenterhalf:
stda %f0, [%g2 + %g1] ASI_BLK_P
stda %f16, [%g3 + %g1] ASI_BLK_P
membar #Sync
ba,pt %xcc, 4f
nop

.align 32
4: and %o5, FPRS_DU, %o5
jmpl %g7 + %g0, %g0
wr %o5, FPRS_FEF, %fprs
Loading

0 comments on commit b445e26

Please sign in to comment.