Skip to content

Commit

Permalink
Update x86-64 mpn routines from GMP 5.0.1.
Browse files Browse the repository at this point in the history
  • Loading branch information
Ulrich Drepper committed Sep 3, 2010
1 parent ece2984 commit 0959ffc
Show file tree
Hide file tree
Showing 8 changed files with 482 additions and 187 deletions.
20 changes: 15 additions & 5 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,12 +1,22 @@
2010-09-02 Ulrich Drepper <drepper@redhat.com>

* sysdeps/x86_64/add_n.S: Update from GMP 5.0.1.
* sysdeps/x86_64/addmul_1.S: Likewise.
* sysdeps/x86_64/lshift.S: Likewise.
* sysdeps/x86_64/mul_1.S: Likewise.
* sysdeps/x86_64/rshift.S: Likewise.
* sysdeps/x86_64/sub_n.S: Likewise.
* sysdeps/x86_64/submul_1.S: Likewise.

2010-09-01 Samuel Thibault <samuel.thibault@ens-lyon.org>

This aligns bits/sched.h onto sysdeps/unix/sysv/linux/bits/sched.h:
Define __sched_param instead of SCHED_* and sched_param when
This aligns bits/sched.h onto sysdeps/unix/sysv/linux/bits/sched.h:
Define __sched_param instead of SCHED_* and sched_param when
<bits/sched.h> is included with __need_schedparam defined.
* bits/sched.h [__need_schedparam]
* bits/sched.h [__need_schedparam]
(SCHED_OTHER, SCHED_FIFO, SCHED_RR, sched_param): Do not define.
[!__defined_schedparam && (__need_schedparam || _SCHED_H)]
(__defined_schedparam): Define to 1.
[!__defined_schedparam && (__need_schedparam || _SCHED_H)]
(__defined_schedparam): Define to 1.
(__sched_param): New structure, identical to sched_param.
(__need_schedparam): Undefine.

Expand Down
99 changes: 79 additions & 20 deletions sysdeps/x86_64/add_n.S
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* Add two limb vectors of the same length > 0 and store sum in a third
limb vector.
Copyright (C) 2004 Free Software Foundation, Inc.
/* x86-64 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
sum in a third limb vector.
Copyright (C) 2006, 2007 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
Expand All @@ -21,22 +21,81 @@
#include "sysdep.h"
#include "asm-syntax.h"

#define rp %rdi
#define up %rsi
#define vp %rdx
#define n %rcx
#define cy %r8

#ifndef func
# define func __mpn_add_n
# define ADCSBB adc
#endif

.text
ENTRY (__mpn_add_n)
leaq (%rsi,%rcx,8), %rsi
leaq (%rdi,%rcx,8), %rdi
leaq (%rdx,%rcx,8), %rdx
negq %rcx
xorl %eax, %eax # clear cy
.p2align 2
L(loop):
movq (%rsi,%rcx,8), %rax
movq (%rdx,%rcx,8), %r10
adcq %r10, %rax
movq %rax, (%rdi,%rcx,8)
incq %rcx
jne L(loop)
movq %rcx, %rax # zero %rax
adcq %rax, %rax
ENTRY (func)
xor %r8, %r8
mov (up), %r10
mov (vp), %r11

lea -8(up,n,8), up
lea -8(vp,n,8), vp
lea -16(rp,n,8), rp
mov %ecx, %eax
neg n
and $3, %eax
je L(b00)
add %rax, n /* clear low rcx bits for jrcxz */
cmp $2, %eax
jl L(b01)
je L(b10)

L(b11): shr %r8 /* set cy */
jmp L(e11)

L(b00): shr %r8 /* set cy */
mov %r10, %r8
mov %r11, %r9
lea 4(n), n
jmp L(e00)

L(b01): shr %r8 /* set cy */
jmp L(e01)

L(b10): shr %r8 /* set cy */
mov %r10, %r8
mov %r11, %r9
jmp L(e10)

L(end): ADCSBB %r11, %r10
mov %r10, 8(rp)
mov %ecx, %eax /* clear eax, ecx contains 0 */
adc %eax, %eax
ret
END (__mpn_add_n)

.p2align 4
L(top):
mov -24(up,n,8), %r8
mov -24(vp,n,8), %r9
ADCSBB %r11, %r10
mov %r10, -24(rp,n,8)
L(e00):
mov -16(up,n,8), %r10
mov -16(vp,n,8), %r11
ADCSBB %r9, %r8
mov %r8, -16(rp,n,8)
L(e11):
mov -8(up,n,8), %r8
mov -8(vp,n,8), %r9
ADCSBB %r11, %r10
mov %r10, -8(rp,n,8)
L(e10):
mov (up,n,8), %r10
mov (vp,n,8), %r11
ADCSBB %r9, %r8
mov %r8, (rp,n,8)
L(e01):
jrcxz L(end)
lea 4(n), n
jmp L(top)
END (func)
115 changes: 92 additions & 23 deletions sysdeps/x86_64/addmul_1.S
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* AMD64 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
/* x86-64 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
the result to a second limb vector.
Copyright (C) 2004 Free Software Foundation, Inc.
Copyright (C) 2003,2004,2005,2007,2008,2009 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
Expand All @@ -21,26 +21,95 @@
#include "sysdep.h"
#include "asm-syntax.h"

#define rp %rdi
#define up %rsi
#define n %rdx
#define v0 %rcx

#ifndef func
# define func __mpn_addmul_1
# define ADDSUB add
#endif

.text
ENTRY (__mpn_addmul_1)
movq %rdx, %r11
leaq (%rsi,%rdx,8), %rsi
leaq (%rdi,%rdx,8), %rdi
negq %r11
xorl %r8d, %r8d
xorl %r10d, %r10d
.p2align 2
L(loop):
movq (%rsi,%r11,8), %rax
mulq %rcx
addq (%rdi,%r11,8), %rax
adcq %r10, %rdx
addq %r8, %rax
movq %r10, %r8
movq %rax, (%rdi,%r11,8)
adcq %rdx, %r8
incq %r11
jne L(loop)
movq %r8, %rax
ENTRY (func)
push %rbx
push %rbp
lea (%rdx), %rbx
neg %rbx

mov (up), %rax
mov (rp), %r10

lea -16(rp,%rdx,8), rp
lea (up,%rdx,8), up
mul %rcx

bt $0, %ebx
jc L(odd)

lea (%rax), %r11
mov 8(up,%rbx,8), %rax
lea (%rdx), %rbp
mul %rcx
add $2, %rbx
jns L(n2)

lea (%rax), %r8
mov (up,%rbx,8), %rax
lea (%rdx), %r9
jmp L(mid)

L(odd): add $1, %rbx
jns L(n1)

lea (%rax), %r8
mov (up,%rbx,8), %rax
lea (%rdx), %r9
mul %rcx
lea (%rax), %r11
mov 8(up,%rbx,8), %rax
lea (%rdx), %rbp
jmp L(e)

.p2align 4
L(top): mul %rcx
ADDSUB %r8, %r10
lea (%rax), %r8
mov (up,%rbx,8), %rax
adc %r9, %r11
mov %r10, -8(rp,%rbx,8)
mov (rp,%rbx,8), %r10
lea (%rdx), %r9
adc $0, %rbp
L(mid): mul %rcx
ADDSUB %r11, %r10
lea (%rax), %r11
mov 8(up,%rbx,8), %rax
adc %rbp, %r8
mov %r10, (rp,%rbx,8)
mov 8(rp,%rbx,8), %r10
lea (%rdx), %rbp
adc $0, %r9
L(e): add $2, %rbx
js L(top)

mul %rcx
ADDSUB %r8, %r10
adc %r9, %r11
mov %r10, -8(rp)
adc $0, %rbp
L(n2): mov (rp), %r10
ADDSUB %r11, %r10
adc %rbp, %rax
mov %r10, (rp)
adc $0, %rdx
L(n1): mov 8(rp), %r10
ADDSUB %rax, %r10
mov %r10, 8(rp)
mov %ebx, %eax /* zero rax */
adc %rdx, %rax
pop %rbp
pop %rbx
ret
END (__mpn_addmul_1)
END (func)
Loading

0 comments on commit 0959ffc

Please sign in to comment.