Skip to content

Commit

Permalink
[MIPS] R4000/R4400 daddiu erratum workaround
Browse files Browse the repository at this point in the history
 This complements the generic R4000/R4400 errata workaround code and adds 
bits for the daddiu problem.  In most places it just modifies handwritten 
assembly code so that the assembler is allowed to use a temporary register 
as daddiu may now be treated as a macro that expands to a sequence of li 
and daddu.  It is the AT register or, where AT is unavailable or used 
explicitly for another purpose, an explicitly-named register is selected, 
using the .set at=<reg> feature added recently to gas.  This feature is 
only used if CONFIG_CPU_DADDI_WORKAROUNDS has been set, so if the 
workaround remains disabled, the required version of binutils stays 
unchanged.

 Similarly, daddiu instructions put in branch delay slots in noreorder 
fragments are now taken out of them and the assembler is allowed to 
reorder them itself as possible (which it does making the whole idea of 
scheduling them into delay slots manually questionable).

 Also in the very few places where such a simple conversion was not 
possible, a handcoded longer sequence is implemented.

 Other than that there are changes to code responsible for building the 
TLB fault and page clear/copy handlers to avoid daddiu as appropriate.  
These are only effective if the erratum is verified to be present at the 
run time.

 Finally there is a trivial update to __delay(), because it uses daddiu in 
a branch delay slot.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
  • Loading branch information
Maciej W. Rozycki authored and Ralf Baechle committed Jan 29, 2008
1 parent 20d60d9 commit 619b6e1
Show file tree
Hide file tree
Showing 11 changed files with 227 additions and 83 deletions.
8 changes: 7 additions & 1 deletion arch/mips/kernel/genex.S
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
* Copyright (C) 1994 - 2000, 2001, 2003 Ralf Baechle
* Copyright (C) 1999, 2000 Silicon Graphics, Inc.
* Copyright (C) 2001 MIPS Technologies, Inc.
* Copyright (C) 2002 Maciej W. Rozycki
* Copyright (C) 2002, 2007 Maciej W. Rozycki
*/
#include <linux/init.h>

Expand Down Expand Up @@ -471,7 +471,13 @@ NESTED(nmi_handler, PT_SIZE, sp)
jr k0
rfe
#else
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
LONG_ADDIU k0, 4 /* stall on $k0 */
#else
.set at=v1
LONG_ADDIU k0, 4
.set noat
#endif
MTC0 k0, CP0_EPC
/* I hope three instructions between MTC0 and ERET are enough... */
ori k1, _THREAD_MASK
Expand Down
61 changes: 51 additions & 10 deletions arch/mips/lib/csum_partial.S
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
*
* Copyright (C) 1998, 1999 Ralf Baechle
* Copyright (C) 1999 Silicon Graphics, Inc.
* Copyright (C) 2007 Maciej W. Rozycki
*/
#include <linux/errno.h>
#include <asm/asm.h>
Expand Down Expand Up @@ -52,9 +53,12 @@
#define UNIT(unit) ((unit)*NBYTES)

#define ADDC(sum,reg) \
.set push; \
.set noat; \
ADD sum, reg; \
sltu v1, sum, reg; \
ADD sum, v1
ADD sum, v1; \
.set pop

#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \
LOAD _t0, (offset + UNIT(0))(src); \
Expand Down Expand Up @@ -178,8 +182,10 @@ move_128bytes:
CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
LONG_SUBU t8, t8, 0x01
.set reorder /* DADDI_WAR */
PTR_ADDU src, src, 0x80
bnez t8, move_128bytes
PTR_ADDU src, src, 0x80
.set noreorder

1:
beqz t2, 1f
Expand Down Expand Up @@ -208,8 +214,10 @@ end_words:
lw t0, (src)
LONG_SUBU t8, t8, 0x1
ADDC(sum, t0)
.set reorder /* DADDI_WAR */
PTR_ADDU src, src, 0x4
bnez t8, end_words
PTR_ADDU src, src, 0x4
.set noreorder

/* unknown src alignment and < 8 bytes to go */
small_csumcpy:
Expand Down Expand Up @@ -246,6 +254,8 @@ small_csumcpy:
1: ADDC(sum, t1)

/* fold checksum */
.set push
.set noat
#ifdef USE_DOUBLE
dsll32 v1, sum, 0
daddu sum, v1
Expand All @@ -266,6 +276,7 @@ small_csumcpy:
srl sum, sum, 8
or sum, v1
andi sum, 0xffff
.set pop
1:
.set reorder
/* Add the passed partial csum. */
Expand Down Expand Up @@ -373,7 +384,11 @@ small_csumcpy:

#define ADDRMASK (NBYTES-1)

#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
.set noat
#else
.set at=v1
#endif

LEAF(__csum_partial_copy_user)
PTR_ADDU AT, src, len /* See (1) above. */
Expand Down Expand Up @@ -441,8 +456,10 @@ EXC( STORE t6, UNIT(6)(dst), s_exc)
ADDC(sum, t6)
EXC( STORE t7, UNIT(7)(dst), s_exc)
ADDC(sum, t7)
.set reorder /* DADDI_WAR */
ADD dst, dst, 8*NBYTES
bgez len, 1b
ADD dst, dst, 8*NBYTES
.set noreorder
ADD len, 8*NBYTES # revert len (see above)

/*
Expand Down Expand Up @@ -471,8 +488,10 @@ EXC( STORE t2, UNIT(2)(dst), s_exc)
ADDC(sum, t2)
EXC( STORE t3, UNIT(3)(dst), s_exc)
ADDC(sum, t3)
.set reorder /* DADDI_WAR */
ADD dst, dst, 4*NBYTES
beqz len, done
ADD dst, dst, 4*NBYTES
.set noreorder
less_than_4units:
/*
* rem = len % NBYTES
Expand All @@ -485,8 +504,10 @@ EXC( LOAD t0, 0(src), l_exc)
SUB len, len, NBYTES
EXC( STORE t0, 0(dst), s_exc)
ADDC(sum, t0)
.set reorder /* DADDI_WAR */
ADD dst, dst, NBYTES
bne rem, len, 1b
ADD dst, dst, NBYTES
.set noreorder

/*
* src and dst are aligned, need to copy rem bytes (rem < NBYTES)
Expand Down Expand Up @@ -572,8 +593,10 @@ EXC( STORE t2, UNIT(2)(dst), s_exc)
ADDC(sum, t2)
EXC( STORE t3, UNIT(3)(dst), s_exc)
ADDC(sum, t3)
.set reorder /* DADDI_WAR */
ADD dst, dst, 4*NBYTES
bne len, rem, 1b
ADD dst, dst, 4*NBYTES
.set noreorder

cleanup_src_unaligned:
beqz len, done
Expand All @@ -587,8 +610,10 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy)
SUB len, len, NBYTES
EXC( STORE t0, 0(dst), s_exc)
ADDC(sum, t0)
.set reorder /* DADDI_WAR */
ADD dst, dst, NBYTES
bne len, rem, 1b
ADD dst, dst, NBYTES
.set noreorder

copy_bytes_checklen:
beqz len, done
Expand Down Expand Up @@ -631,6 +656,8 @@ copy_bytes_done:
ADDC(sum, t2)
done:
/* fold checksum */
.set push
.set noat
#ifdef USE_DOUBLE
dsll32 v1, sum, 0
daddu sum, v1
Expand All @@ -651,6 +678,7 @@ done:
srl sum, sum, 8
or sum, v1
andi sum, 0xffff
.set pop
1:
.set reorder
ADDC(sum, psum)
Expand Down Expand Up @@ -678,8 +706,10 @@ EXC( lbu t1, 0(src), l_exc)
SLLV t1, t1, t2
addu t2, SHIFT_INC
ADDC(sum, t1)
.set reorder /* DADDI_WAR */
ADD dst, dst, 1
bne src, t0, 1b
ADD dst, dst, 1
.set noreorder
l_exc:
LOAD t0, TI_TASK($28)
nop
Expand All @@ -697,12 +727,22 @@ l_exc:
* Clear len bytes starting at dst. Can't call __bzero because it
* might modify len. An inefficient loop for these rare times...
*/
.set reorder /* DADDI_WAR */
SUB src, len, 1
beqz len, done
SUB src, len, 1
.set noreorder
1: sb zero, 0(dst)
ADD dst, dst, 1
.set push
.set noat
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
bnez src, 1b
SUB src, src, 1
#else
li v1, 1
bnez src, 1b
SUB src, src, v1
#endif
li v1, -EFAULT
b done
sw v1, (errptr)
Expand All @@ -712,4 +752,5 @@ s_exc:
li v1, -EFAULT
jr ra
sw v1, (errptr)
.set pop
END(__csum_partial_copy_user)
25 changes: 20 additions & 5 deletions arch/mips/lib/memcpy-inatomic.S
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
* Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc.
* Copyright (C) 2002 Broadcom, Inc.
* memcpy/copy_user author: Mark Vandevoorde
* Copyright (C) 2007 Maciej W. Rozycki
*
* Mnemonic names for arguments to memcpy/__copy_user
*/
Expand Down Expand Up @@ -175,7 +176,11 @@

.text
.set noreorder
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
.set noat
#else
.set at=v1
#endif

/*
* A combined memcpy/__copy_user
Expand Down Expand Up @@ -268,8 +273,10 @@ EXC( LOAD t3, UNIT(3)(src), l_exc_copy)
STORE t1, UNIT(1)(dst)
STORE t2, UNIT(2)(dst)
STORE t3, UNIT(3)(dst)
.set reorder /* DADDI_WAR */
ADD dst, dst, 4*NBYTES
beqz len, done
ADD dst, dst, 4*NBYTES
.set noreorder
less_than_4units:
/*
* rem = len % NBYTES
Expand All @@ -281,8 +288,10 @@ EXC( LOAD t0, 0(src), l_exc)
ADD src, src, NBYTES
SUB len, len, NBYTES
STORE t0, 0(dst)
.set reorder /* DADDI_WAR */
ADD dst, dst, NBYTES
bne rem, len, 1b
ADD dst, dst, NBYTES
.set noreorder

/*
* src and dst are aligned, need to copy rem bytes (rem < NBYTES)
Expand Down Expand Up @@ -361,8 +370,10 @@ EXC( LDREST t3, REST(3)(src), l_exc_copy)
STORE t2, UNIT(2)(dst)
STORE t3, UNIT(3)(dst)
PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed)
.set reorder /* DADDI_WAR */
ADD dst, dst, 4*NBYTES
bne len, rem, 1b
ADD dst, dst, 4*NBYTES
.set noreorder

cleanup_src_unaligned:
beqz len, done
Expand All @@ -375,8 +386,10 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy)
ADD src, src, NBYTES
SUB len, len, NBYTES
STORE t0, 0(dst)
.set reorder /* DADDI_WAR */
ADD dst, dst, NBYTES
bne len, rem, 1b
ADD dst, dst, NBYTES
.set noreorder

copy_bytes_checklen:
beqz len, done
Expand Down Expand Up @@ -424,8 +437,10 @@ l_exc_copy:
EXC( lb t1, 0(src), l_exc)
ADD src, src, 1
sb t1, 0(dst) # can't fault -- we're copy_from_user
.set reorder /* DADDI_WAR */
ADD dst, dst, 1
bne src, t0, 1b
ADD dst, dst, 1
.set noreorder
l_exc:
LOAD t0, TI_TASK($28)
nop
Expand Down
Loading

0 comments on commit 619b6e1

Please sign in to comment.