Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 127117
b: refs/heads/master
c: 71ae92f
h: refs/heads/master
i:
  127115: fd12a59
v: v3
  • Loading branch information
Bernd Schmidt authored and Bryan Wu committed Jan 7, 2009
1 parent 2646307 commit aa3d9b3
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 100 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 36478585d994f82654cf8435b34c1a8df3c6ae69
refs/heads/master: 71ae92f51a5f2d824972cf60b25cc40def62ba29
68 changes: 68 additions & 0 deletions trunk/arch/blackfin/lib/muldi3.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
.align 2
.global ___muldi3;
.type ___muldi3, STT_FUNC;

#ifdef CONFIG_ARITHMETIC_OPS_L1
.section .l1.text
#else
.text
#endif

/*
R1:R0 * R3:R2
= R1.h:R1.l:R0.h:R0.l * R3.h:R3.l:R2.h:R2.l
[X] = (R1.h * R3.h) * 2^96
[X] + (R1.h * R3.l + R1.l * R3.h) * 2^80
[X] + (R1.h * R2.h + R1.l * R3.l + R3.h * R0.h) * 2^64
[T1] + (R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h) * 2^48
[T2] + (R1.l * R2.l + R3.l * R0.l + R0.h * R2.h) * 2^32
[T3] + (R0.l * R2.h + R2.l * R0.h) * 2^16
[T4] + (R0.l * R2.l)

We can discard the first three lines marked "X" since we produce
only a 64 bit result. So, we need ten 16-bit multiplies.

Individual mul-acc results:
[E1] = R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h
[E2] = R1.l * R2.l + R3.l * R0.l + R0.h * R2.h
[E3] = R0.l * R2.h + R2.l * R0.h
[E4] = R0.l * R2.l

We also need to add high parts from lower-level results to higher ones:
E[n]c = E[n] + (E[n+1]c >> 16), where E4c := E4

One interesting property is that all parts of the result that depend
on the sign of the multiplication are discarded. Those would be the
multiplications involving R1.h and R3.h, but only the top 16 bit of
the 32 bit result depend on the sign, and since R1.h and R3.h only
occur in E1, the top half of these results is cut off.
So, we can just use FU mode for all of the 16-bit multiplies, and
ignore questions of when to use mixed mode. */

___muldi3:
/* [SP] technically is part of the caller's frame, but we can
use it as scratch space. */
A0 = R2.H * R1.L, A1 = R2.L * R1.H (FU) || R3 = [SP + 12]; /* E1 */
A0 += R3.H * R0.L, A1 += R3.L * R0.H (FU) || [SP] = R4; /* E1 */
A0 += A1; /* E1 */
R4 = A0.w;
A0 = R0.l * R3.l (FU); /* E2 */
A0 += R2.l * R1.l (FU); /* E2 */

A1 = R2.L * R0.L (FU); /* E4 */
R3 = A1.w;
A1 = A1 >> 16; /* E3c */
A0 += R2.H * R0.H, A1 += R2.L * R0.H (FU); /* E2, E3c */
A1 += R0.L * R2.H (FU); /* E3c */
R0 = A1.w;
A1 = A1 >> 16; /* E2c */
A0 += A1; /* E2c */
R1 = A0.w;

/* low(result) = low(E3c):low(E4) */
R0 = PACK (R0.l, R3.l);
/* high(result) = E2c + (E1 << 16) */
R1.h = R1.h + R4.l (NS) || R4 = [SP];
RTS;

.size ___muldi3, .-___muldi3
99 changes: 0 additions & 99 deletions trunk/arch/blackfin/lib/muldi3.c

This file was deleted.

0 comments on commit aa3d9b3

Please sign in to comment.