-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
sh: Provide sdivsi3/udivsi3/udivdi3 for sh64, kill off libgcc linking.
This moves in the necessary libgcc bits and kills off the libgcc linking for sh64 kernels as well. Signed-off-by: Paul Mundt <lethal@linux-sh.org>
- Loading branch information
Paul Mundt
committed
Dec 22, 2008
1 parent
209aa4f
commit 180ae20
Showing
6 changed files
with
314 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
.global __sdivsi3 | ||
.section .text..SHmedia32,"ax" | ||
.align 2 | ||
|
||
/* inputs: r4,r5 */ | ||
/* clobbered: r1,r18,r19,r20,r21,r25,tr0 */ | ||
/* result in r0 */ | ||
__sdivsi3: | ||
ptb __div_table,tr0 | ||
|
||
nsb r5, r1 | ||
shlld r5, r1, r25 /* normalize; [-2 ..1, 1..2) in s2.62 */ | ||
shari r25, 58, r21 /* extract 5(6) bit index (s2.4 with hole -1..1) */ | ||
/* bubble */ | ||
gettr tr0,r20 | ||
ldx.ub r20, r21, r19 /* u0.8 */ | ||
shari r25, 32, r25 /* normalize to s2.30 */ | ||
shlli r21, 1, r21 | ||
muls.l r25, r19, r19 /* s2.38 */ | ||
ldx.w r20, r21, r21 /* s2.14 */ | ||
ptabs r18, tr0 | ||
shari r19, 24, r19 /* truncate to s2.14 */ | ||
sub r21, r19, r19 /* some 11 bit inverse in s1.14 */ | ||
muls.l r19, r19, r21 /* u0.28 */ | ||
sub r63, r1, r1 | ||
addi r1, 92, r1 | ||
muls.l r25, r21, r18 /* s2.58 */ | ||
shlli r19, 45, r19 /* multiply by two and convert to s2.58 */ | ||
/* bubble */ | ||
sub r19, r18, r18 | ||
shari r18, 28, r18 /* some 22 bit inverse in s1.30 */ | ||
muls.l r18, r25, r0 /* s2.60 */ | ||
muls.l r18, r4, r25 /* s32.30 */ | ||
/* bubble */ | ||
shari r0, 16, r19 /* s-16.44 */ | ||
muls.l r19, r18, r19 /* s-16.74 */ | ||
shari r25, 63, r0 | ||
shari r4, 14, r18 /* s19.-14 */ | ||
shari r19, 30, r19 /* s-16.44 */ | ||
muls.l r19, r18, r19 /* s15.30 */ | ||
xor r21, r0, r21 /* You could also use the constant 1 << 27. */ | ||
add r21, r25, r21 | ||
sub r21, r19, r21 | ||
shard r21, r1, r21 | ||
sub r21, r0, r0 | ||
blink tr0, r63 | ||
|
||
/* This table has been generated by divtab.c . | ||
Defects for bias -330: | ||
Max defect: 6.081536e-07 at -1.000000e+00 | ||
Min defect: 2.849516e-08 at 1.030651e+00 | ||
Max 2nd step defect: 9.606539e-12 at -1.000000e+00 | ||
Min 2nd step defect: 0.000000e+00 at 0.000000e+00 | ||
Defect at 1: 1.238659e-07 | ||
Defect at -2: 1.061708e-07 */ | ||
|
||
.balign 2 | ||
.type __div_table,@object | ||
.size __div_table,128 | ||
/* negative division constants */ | ||
.word -16638 | ||
.word -17135 | ||
.word -17737 | ||
.word -18433 | ||
.word -19103 | ||
.word -19751 | ||
.word -20583 | ||
.word -21383 | ||
.word -22343 | ||
.word -23353 | ||
.word -24407 | ||
.word -25582 | ||
.word -26863 | ||
.word -28382 | ||
.word -29965 | ||
.word -31800 | ||
/* negative division factors */ | ||
.byte 66 | ||
.byte 70 | ||
.byte 75 | ||
.byte 81 | ||
.byte 87 | ||
.byte 93 | ||
.byte 101 | ||
.byte 109 | ||
.byte 119 | ||
.byte 130 | ||
.byte 142 | ||
.byte 156 | ||
.byte 172 | ||
.byte 192 | ||
.byte 214 | ||
.byte 241 | ||
.skip 16 | ||
.global __div_table | ||
__div_table: | ||
.skip 16 | ||
/* positive division factors */ | ||
.byte 241 | ||
.byte 214 | ||
.byte 192 | ||
.byte 172 | ||
.byte 156 | ||
.byte 142 | ||
.byte 130 | ||
.byte 119 | ||
.byte 109 | ||
.byte 101 | ||
.byte 93 | ||
.byte 87 | ||
.byte 81 | ||
.byte 75 | ||
.byte 70 | ||
.byte 66 | ||
/* positive division constants */ | ||
.word 31801 | ||
.word 29966 | ||
.word 28383 | ||
.word 26864 | ||
.word 25583 | ||
.word 24408 | ||
.word 23354 | ||
.word 22344 | ||
.word 21384 | ||
.word 20584 | ||
.word 19752 | ||
.word 19104 | ||
.word 18434 | ||
.word 17738 | ||
.word 17136 | ||
.word 16639 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
.section .text..SHmedia32,"ax" | ||
.align 2 | ||
.global __udivdi3 | ||
__udivdi3: | ||
shlri r3,1,r4 | ||
nsb r4,r22 | ||
shlld r3,r22,r6 | ||
shlri r6,49,r5 | ||
movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */ | ||
sub r21,r5,r1 | ||
mmulfx.w r1,r1,r4 | ||
mshflo.w r1,r63,r1 | ||
sub r63,r22,r20 // r63 == 64 % 64 | ||
mmulfx.w r5,r4,r4 | ||
pta large_divisor,tr0 | ||
addi r20,32,r9 | ||
msub.w r1,r4,r1 | ||
madd.w r1,r1,r1 | ||
mmulfx.w r1,r1,r4 | ||
shlri r6,32,r7 | ||
bgt/u r9,r63,tr0 // large_divisor | ||
mmulfx.w r5,r4,r4 | ||
shlri r2,32+14,r19 | ||
addi r22,-31,r0 | ||
msub.w r1,r4,r1 | ||
|
||
mulu.l r1,r7,r4 | ||
addi r1,-3,r5 | ||
mulu.l r5,r19,r5 | ||
sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 | ||
shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as | ||
the case may be, %0000000000000000 000.11111111111, still */ | ||
muls.l r1,r4,r4 /* leaving at least one sign bit. */ | ||
mulu.l r5,r3,r8 | ||
mshalds.l r1,r21,r1 | ||
shari r4,26,r4 | ||
shlld r8,r0,r8 | ||
add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) | ||
sub r2,r8,r2 | ||
/* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ | ||
|
||
shlri r2,22,r21 | ||
mulu.l r21,r1,r21 | ||
shlld r5,r0,r8 | ||
addi r20,30-22,r0 | ||
shlrd r21,r0,r21 | ||
mulu.l r21,r3,r5 | ||
add r8,r21,r8 | ||
mcmpgt.l r21,r63,r21 // See Note 1 | ||
addi r20,30,r0 | ||
mshfhi.l r63,r21,r21 | ||
sub r2,r5,r2 | ||
andc r2,r21,r2 | ||
|
||
/* small divisor: need a third divide step */ | ||
mulu.l r2,r1,r7 | ||
ptabs r18,tr0 | ||
addi r2,1,r2 | ||
shlrd r7,r0,r7 | ||
mulu.l r7,r3,r5 | ||
add r8,r7,r8 | ||
sub r2,r3,r2 | ||
cmpgt r2,r5,r5 | ||
add r8,r5,r2 | ||
/* could test r3 here to check for divide by zero. */ | ||
blink tr0,r63 | ||
|
||
large_divisor: | ||
mmulfx.w r5,r4,r4 | ||
shlrd r2,r9,r25 | ||
shlri r25,32,r8 | ||
msub.w r1,r4,r1 | ||
|
||
mulu.l r1,r7,r4 | ||
addi r1,-3,r5 | ||
mulu.l r5,r8,r5 | ||
sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 | ||
shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as | ||
the case may be, %0000000000000000 000.11111111111, still */ | ||
muls.l r1,r4,r4 /* leaving at least one sign bit. */ | ||
shlri r5,14-1,r8 | ||
mulu.l r8,r7,r5 | ||
mshalds.l r1,r21,r1 | ||
shari r4,26,r4 | ||
add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) | ||
sub r25,r5,r25 | ||
/* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ | ||
|
||
shlri r25,22,r21 | ||
mulu.l r21,r1,r21 | ||
pta no_lo_adj,tr0 | ||
addi r22,32,r0 | ||
shlri r21,40,r21 | ||
mulu.l r21,r7,r5 | ||
add r8,r21,r8 | ||
shlld r2,r0,r2 | ||
sub r25,r5,r25 | ||
bgtu/u r7,r25,tr0 // no_lo_adj | ||
addi r8,1,r8 | ||
sub r25,r7,r25 | ||
no_lo_adj: | ||
mextr4 r2,r25,r2 | ||
|
||
/* large_divisor: only needs a few adjustments. */ | ||
mulu.l r8,r6,r5 | ||
ptabs r18,tr0 | ||
/* bubble */ | ||
cmpgtu r5,r2,r5 | ||
sub r8,r5,r2 | ||
blink tr0,r63 | ||
|
||
/* Note 1: To shift the result of the second divide stage so that the result | ||
always fits into 32 bits, yet we still reduce the rest sufficiently | ||
would require a lot of instructions to do the shifts just right. Using | ||
the full 64 bit shift result to multiply with the divisor would require | ||
four extra instructions for the upper 32 bits (shift / mulu / shift / sub). | ||
Fortunately, if the upper 32 bits of the shift result are nonzero, we | ||
know that the rest after taking this partial result into account will | ||
fit into 32 bits. So we just clear the upper 32 bits of the rest if the | ||
upper 32 bits of the partial result are nonzero. */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
.global __udivsi3 | ||
.section .text..SHmedia32,"ax" | ||
.align 2 | ||
|
||
/* | ||
inputs: r4,r5 | ||
clobbered: r18,r19,r20,r21,r22,r25,tr0 | ||
result in r0. | ||
*/ | ||
__udivsi3: | ||
addz.l r5,r63,r22 | ||
nsb r22,r0 | ||
shlld r22,r0,r25 | ||
shlri r25,48,r25 | ||
movi 0xffffffffffffbb0c,r20 /* shift count eqiv 76 */ | ||
sub r20,r25,r21 | ||
mmulfx.w r21,r21,r19 | ||
mshflo.w r21,r63,r21 | ||
ptabs r18,tr0 | ||
mmulfx.w r25,r19,r19 | ||
sub r20,r0,r0 | ||
/* bubble */ | ||
msub.w r21,r19,r19 | ||
|
||
/* | ||
* It would be nice for scheduling to do this add to r21 before | ||
* the msub.w, but we need a different value for r19 to keep | ||
* errors under control. | ||
*/ | ||
addi r19,-2,r21 | ||
mulu.l r4,r21,r18 | ||
mmulfx.w r19,r19,r19 | ||
shlli r21,15,r21 | ||
shlrd r18,r0,r18 | ||
mulu.l r18,r22,r20 | ||
mmacnfx.wl r25,r19,r21 | ||
/* bubble */ | ||
sub r4,r20,r25 | ||
|
||
mulu.l r25,r21,r19 | ||
addi r0,14,r0 | ||
/* bubble */ | ||
shlrd r19,r0,r19 | ||
mulu.l r19,r22,r20 | ||
add r18,r19,r18 | ||
/* bubble */ | ||
sub.l r25,r20,r25 | ||
|
||
mulu.l r25,r21,r19 | ||
addz.l r25,r63,r25 | ||
sub r25,r22,r25 | ||
shlrd r19,r0,r19 | ||
mulu.l r19,r22,r20 | ||
addi r25,1,r25 | ||
add r18,r19,r18 | ||
|
||
cmpgt r25,r20,r25 | ||
add.l r18,r25,r0 | ||
blink tr0,r63 |