Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 71948
b: refs/heads/master
c: b0a68dc
h: refs/heads/master
v: v3
  • Loading branch information
Mike Frysinger authored and Bryan Wu committed Oct 21, 2007
1 parent 10abb28 commit 62e5012
Show file tree
Hide file tree
Showing 3 changed files with 377 additions and 2 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 1c668d82465cd5c17030c0f69561841374380ac8
refs/heads/master: b0a68dc07ec395d44849ce98eb417713ca333410
2 changes: 1 addition & 1 deletion trunk/arch/blackfin/lib/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

lib-y := \
ashldi3.o ashrdi3.o lshrdi3.o \
muldi3.o divsi3.o udivsi3.o modsi3.o umodsi3.o \
muldi3.o divsi3.o udivsi3.o udivdi3.o modsi3.o umodsi3.o \
checksum.o memcpy.o memset.o memcmp.o memchr.o memmove.o \
strcmp.o strcpy.o strncmp.o strncpy.o \
umulsi3_highpart.o smulsi3_highpart.o \
Expand Down
375 changes: 375 additions & 0 deletions trunk/arch/blackfin/lib/udivdi3.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,375 @@
/*
* udivdi3.S - unsigned long long division
*
* Copyright 2003-2007 Analog Devices Inc.
* Enter bugs at http://blackfin.uclinux.org/
*
* Licensed under the GPLv2 or later.
*/

#include <linux/linkage.h>

#define CARRY AC0

#ifdef CONFIG_ARITHMETIC_OPS_L1
.section .l1.text
#else
.text
#endif


ENTRY(___udivdi3)
R3 = [SP + 12];
[--SP] = (R7:4, P5:3);

/* Attempt to use divide primitive first; these will handle
** most cases, and they're quick - avoids stalls incurred by
** testing for identities.
*/

R4 = R2 | R3;
CC = R4 == 0;
IF CC JUMP .LDIV_BY_ZERO;

R4.H = 0x8000;
R4 >>>= 16; // R4 now 0xFFFF8000
R5 = R0 | R2; // If either dividend or
R4 = R5 & R4; // divisor have bits in
CC = R4; // top half or low half's sign
IF CC JUMP .LIDENTS; // bit, skip builtins.
R4 = R1 | R3; // Also check top halves
CC = R4;
IF CC JUMP .LIDENTS;

/* Can use the builtins. */

AQ = CC; // Clear AQ (CC==0)
DIVQ(R0, R2);
DIVQ(R0, R2);
DIVQ(R0, R2);
DIVQ(R0, R2);
DIVQ(R0, R2);
DIVQ(R0, R2);
DIVQ(R0, R2);
DIVQ(R0, R2);
DIVQ(R0, R2);
DIVQ(R0, R2);
DIVQ(R0, R2);
DIVQ(R0, R2);
DIVQ(R0, R2);
DIVQ(R0, R2);
DIVQ(R0, R2);
DIVQ(R0, R2);
DIVQ(R0, R2);
R0 = R0.L (Z);
R1 = 0;
(R7:4, P5:3) = [SP++];
RTS;

.LIDENTS:
/* Test for common identities. Value to be returned is
** placed in R6,R7.
*/
// Check for 0/y, return 0
R4 = R0 | R1;
CC = R4 == 0;
IF CC JUMP .LRETURN_R0;

// Check for x/x, return 1
R6 = R0 - R2; // If x == y, then both R6 and R7 will be zero
R7 = R1 - R3;
R4 = R6 | R7; // making R4 zero.
R6 += 1; // which would now make R6:R7==1.
CC = R4 == 0;
IF CC JUMP .LRETURN_IDENT;

// Check for x/1, return x
R6 = R0;
R7 = R1;
CC = R3 == 0;
IF !CC JUMP .Lnexttest;
CC = R2 == 1;
IF CC JUMP .LRETURN_IDENT;

.Lnexttest:
R4.L = ONES R2; // check for div by power of two which
R5.L = ONES R3; // can be done using a shift
R6 = PACK (R5.L, R4.L);
CC = R6 == 1;
IF CC JUMP .Lpower_of_two_upper_zero;
R6 = PACK (R4.L, R5.L);
CC = R6 == 1;
IF CC JUMP .Lpower_of_two_lower_zero;

// Check for x < y, return 0
R6 = 0;
R7 = R6;
CC = R1 < R3 (IU);
IF CC JUMP .LRETURN_IDENT;
CC = R1 == R3;
IF !CC JUMP .Lno_idents;
CC = R0 < R2 (IU);
IF CC JUMP .LRETURN_IDENT;

.Lno_idents: // Idents don't match. Go for the full operation


// If X, or X and Y have high bit set, it'll affect the
// results, so shift right one to stop this. Note: we've already
// checked that X >= Y, so Y's msb won't be set unless X's
// is.

R4 = 0;
CC = R1 < 0;
IF !CC JUMP .Lx_msb_clear;
CC = !CC; // 1 -> 0;
R1 = ROT R1 BY -1; // Shift X >> 1
R0 = ROT R0 BY -1; // lsb -> CC
BITSET(R4,31); // to record only x msb was set
CC = R3 < 0;
IF !CC JUMP .Ly_msb_clear;
CC = !CC;
R3 = ROT R3 BY -1; // Shift Y >> 1
R2 = ROT R2 BY -1;
BITCLR(R4,31); // clear bit to record only x msb was set

.Ly_msb_clear:
.Lx_msb_clear:
// Bit 31 in R4 indicates X msb set, but Y msb wasn't, and no bits
// were lost, so we should shift result left by one.

[--SP] = R4; // save for later

// In the loop that follows, each iteration we add
// either Y' or -Y' to the Remainder. We compute the
// negated Y', and store, for convenience. Y' goes
// into P0:P1, while -Y' goes into P2:P3.

P0 = R2;
P1 = R3;
R2 = -R2;
CC = CARRY;
CC = !CC;
R4 = CC;
R3 = -R3;
R3 = R3 - R4;

R6 = 0; // remainder = 0
R7 = R6;

[--SP] = R2; P2 = SP;
[--SP] = R3; P3 = SP;
[--SP] = R6; P5 = SP; // AQ = 0
[--SP] = P1;

/* In the loop that follows, we use the following
** register assignments:
** R0,R1 X, workspace
** R2,R3 Y, workspace
** R4,R5 partial Div
** R6,R7 partial remainder
** P5 AQ
** The remainder and div form a 128-bit number, with
** the remainder in the high 64-bits.
*/
R4 = R0; // Div = X'
R5 = R1;
R3 = 0;

P4 = 64; // Iterate once per bit
LSETUP(.LULST,.LULEND) LC0 = P4;
.LULST:
/* Shift Div and remainder up by one. The bit shifted
** out of the top of the quotient is shifted into the bottom
** of the remainder.
*/
CC = R3;
R4 = ROT R4 BY 1;
R5 = ROT R5 BY 1 || // low q to high q
R2 = [P5]; // load saved AQ
R6 = ROT R6 BY 1 || // high q to low r
R0 = [P2]; // load -Y'
R7 = ROT R7 BY 1 || // low r to high r
R1 = [P3];

// Assume add -Y'
CC = R2 < 0; // But if AQ is set...
IF CC R0 = P0; // then add Y' instead
IF CC R1 = P1;

R6 = R6 + R0; // Rem += (Y' or -Y')
CC = CARRY;
R0 = CC;
R7 = R7 + R1;
R7 = R7 + R0 (NS) ||
R1 = [SP];
// Set the next AQ bit
R1 = R7 ^ R1; // from Remainder and Y'
R1 = R1 >> 31 || // Negate AQ's value, and
[P5] = R1; // save next AQ
BITTGL(R1, 0); // add neg AQ to the Div
.LULEND: R4 = R4 + R1;

R6 = [SP + 16];

R0 = R4;
R1 = R5;
CC = BITTST(R6,30); // Just set CC=0
R4 = ROT R0 BY 1; // but if we had to shift X,
R5 = ROT R1 BY 1; // and didn't shift any bits out,
CC = BITTST(R6,31); // then the result will be half as
IF CC R0 = R4; // much as required, so shift left
IF CC R1 = R5; // one space.

SP += 20;
(R7:4, P5:3) = [SP++];
RTS;

.Lpower_of_two:
/* Y has a single bit set, which means it's a power of two.
** That means we can perform the division just by shifting
** X to the right the appropriate number of bits
*/

/* signbits returns the number of sign bits, minus one.
** 1=>30, 2=>29, ..., 0x40000000=>0. Which means we need
** to shift right n-signbits spaces. It also means 0x80000000
** is a special case, because that *also* gives a signbits of 0
*/
.Lpower_of_two_lower_zero:
R7 = 0;
R6 = R1 >> 31;
CC = R3 < 0;
IF CC JUMP .LRETURN_IDENT;

R2.L = SIGNBITS R3;
R2 = R2.L (Z);
R2 += -62;
(R7:4, P5:3) = [SP++];
JUMP ___lshftli;

.Lpower_of_two_upper_zero:
CC = R2 < 0;
IF CC JUMP .Lmaxint_shift;

R2.L = SIGNBITS R2;
R2 = R2.L (Z);
R2 += -30;
(R7:4, P5:3) = [SP++];
JUMP ___lshftli;

.Lmaxint_shift:
R2 = -31;
(R7:4, P5:3) = [SP++];
JUMP ___lshftli;

.LRETURN_IDENT:
R0 = R6;
R1 = R7;
.LRETURN_R0:
(R7:4, P5:3) = [SP++];
RTS;
.LDIV_BY_ZERO:
R0 = ~R2;
R1 = R0;
(R7:4, P5:3) = [SP++];
RTS;

ENDPROC(___udivdi3)


ENTRY(___lshftli)
CC = R2 == 0;
IF CC JUMP .Lfinished; // nothing to do
CC = R2 < 0;
IF CC JUMP .Lrshift;
R3 = 64;
CC = R2 < R3;
IF !CC JUMP .Lretzero;

// We're shifting left, and it's less than 64 bits, so
// a valid result will be returned.

R3 >>= 1; // R3 now 32
CC = R2 < R3;

IF !CC JUMP .Lzerohalf;

// We're shifting left, between 1 and 31 bits, which means
// some of the low half will be shifted into the high half.
// Work out how much.

R3 = R3 - R2;

// Save that much data from the bottom half.

P1 = R7;
R7 = R0;
R7 >>= R3;

// Adjust both parts of the parameter.

R0 <<= R2;
R1 <<= R2;

// And include the bits moved across.

R1 = R1 | R7;
R7 = P1;
RTS;

.Lzerohalf:
// We're shifting left, between 32 and 63 bits, so the
// bottom half will become zero, and the top half will
// lose some bits. How many?

R2 = R2 - R3; // N - 32
R1 = LSHIFT R0 BY R2.L;
R0 = R0 - R0;
RTS;

.Lretzero:
R0 = R0 - R0;
R1 = R0;
.Lfinished:
RTS;

.Lrshift:
// We're shifting right, but by how much?
R2 = -R2;
R3 = 64;
CC = R2 < R3;
IF !CC JUMP .Lretzero;

// Shifting right less than 64 bits, so some result bits will
// be retained.

R3 >>= 1; // R3 now 32
CC = R2 < R3;
IF !CC JUMP .Lsignhalf;

// Shifting right between 1 and 31 bits, so need to copy
// data across words.

P1 = R7;
R3 = R3 - R2;
R7 = R1;
R7 <<= R3;
R1 >>= R2;
R0 >>= R2;
R0 = R7 | R0;
R7 = P1;
RTS;

.Lsignhalf:
// Shifting right between 32 and 63 bits, so the top half
// will become all zero-bits, and the bottom half is some
// of the top half. But how much?

R2 = R2 - R3;
R0 = R1;
R0 >>= R2;
R1 = 0;
RTS;

ENDPROC(___lshftli)

0 comments on commit 62e5012

Please sign in to comment.