Skip to content

Commit

Permalink
Small optimizations to cancellable x86-64 code.
Browse files Browse the repository at this point in the history
  • Loading branch information
Ulrich Drepper committed Aug 8, 2009
1 parent 57b378a commit 9083bcc
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 61 deletions.
9 changes: 9 additions & 0 deletions nptl/ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
2009-08-07 Ulrich Drepper <drepper@redhat.com>

* sysdeps/unix/sysv/linux/x86_64/sem_wait.S: Little optimizations
enabled by the special *_asynccancel functions.
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S: Likewise.
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S: Likewise.

* sysdeps/unix/sysv/linux/x86_64/cancellation.S: Include lowlevellock.h.

2009-08-04 Ulrich Drepper <drepper@redhat.com>

* sysdeps/unix/sysv/linux/x86_64/cancellation.S: New file.
Expand Down
1 change: 1 addition & 0 deletions nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <sysdep.h>
#include <tcb-offsets.h>
#include <kernel-features.h>
#include "lowlevellock.h"

#ifdef IS_IN_libpthread
# ifdef SHARED
Expand Down
2 changes: 0 additions & 2 deletions nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,6 @@ __pthread_cond_timedwait:
.LcleanupSTART1:
34: callq __pthread_enable_asynccancel
movl %eax, (%rsp)
movq 8(%rsp), %rdi

movq %r13, %r10
movl $FUTEX_WAIT_BITSET, %esi
Expand Down Expand Up @@ -511,7 +510,6 @@ __pthread_cond_timedwait:
.LcleanupSTART2:
4: callq __pthread_enable_asynccancel
movl %eax, (%rsp)
movq 8(%rsp), %rdi

leaq 32(%rsp), %r10
cmpq $-1, dep_mutex(%rdi)
Expand Down
40 changes: 15 additions & 25 deletions nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,11 @@ __pthread_cond_wait:
cfi_lsda(DW_EH_PE_udata4, .LexceptSTART)
#endif

pushq %r12
cfi_adjust_cfa_offset(8)
cfi_rel_offset(%r12, 0)
pushq %r13
cfi_adjust_cfa_offset(8)
cfi_rel_offset(%r13, 0)
#define FRAME_SIZE 32
subq $FRAME_SIZE, %rsp
leaq -FRAME_SIZE(%rsp), %rsp
cfi_adjust_cfa_offset(FRAME_SIZE)

/* Stack frame:
Expand Down Expand Up @@ -112,7 +109,7 @@ __pthread_cond_wait:
movl %edx, 4(%rsp)

/* Unlock. */
8: movl cond_futex(%rdi), %r12d
8: movl cond_futex(%rdi), %edx
LOCK
#if cond_lock == 0
decl (%rdi)
Expand All @@ -125,9 +122,7 @@ __pthread_cond_wait:
4: callq __pthread_enable_asynccancel
movl %eax, (%rsp)

movq 8(%rsp), %rdi
xorq %r10, %r10
movq %r12, %rdx
cmpq $-1, dep_mutex(%rdi)
leaq cond_futex(%rdi), %rdi
movl $FUTEX_WAIT, %esi
Expand Down Expand Up @@ -243,21 +238,14 @@ __pthread_cond_wait:

callq __pthread_mutex_cond_lock

14: addq $FRAME_SIZE, %rsp
cfi_adjust_cfa_offset(-FRAME_SIZE)

popq %r13
cfi_adjust_cfa_offset(-8)
cfi_restore(%r13)
popq %r12
cfi_adjust_cfa_offset(-8)
cfi_restore(%r12)
14: movq FRAME_SIZE(%rsp), %r13
leaq FRAME_SIZE+8(%rsp), %rsp
cfi_adjust_cfa_offset(-(FRAME_SIZE + 8))

/* We return the result of the mutex_lock operation. */
retq

cfi_adjust_cfa_offset(16 + FRAME_SIZE)
cfi_rel_offset(%r12, FRAME_SIZE + 8)
cfi_adjust_cfa_offset(8 + FRAME_SIZE)
cfi_rel_offset(%r13, FRAME_SIZE)

18: callq __pthread_mutex_cond_lock_adjust
Expand Down Expand Up @@ -285,7 +273,11 @@ __pthread_cond_wait:
movl $LLL_PRIVATE, %eax
movl $LLL_SHARED, %esi
cmovne %eax, %esi
/* The call preserves %rdx. */
callq __lll_unlock_wake
#if cond_lock != 0
subq $cond_lock, %rdi
#endif
jmp 4b

/* Locking in loop failed. */
Expand Down Expand Up @@ -349,9 +341,7 @@ versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait,
__condvar_cleanup1:
/* Stack frame:
rsp + 48
+--------------------------+
rsp + 40 | %r12 |
rsp + 40
+--------------------------+
rsp + 32 | %r13 |
+--------------------------+
Expand Down Expand Up @@ -410,7 +400,7 @@ __condvar_cleanup1:
3: subl $(1 << nwaiters_shift), cond_nwaiters(%rdi)

/* Wake up a thread which wants to destroy the condvar object. */
xorq %r12, %r12
xorl %ecx, %ecx
cmpq $0xffffffffffffffff, total_seq(%rdi)
jne 4f
movl cond_nwaiters(%rdi), %eax
Expand All @@ -433,7 +423,7 @@ __condvar_cleanup1:
movl $SYS_futex, %eax
syscall
subq $cond_nwaiters, %rdi
movl $1, %r12d
movl $1, %ecx

4: LOCK
#if cond_lock == 0
Expand All @@ -449,10 +439,11 @@ __condvar_cleanup1:
movl $LLL_PRIVATE, %eax
movl $LLL_SHARED, %esi
cmovne %eax, %esi
/* The call preserves %rcx. */
callq __lll_unlock_wake

/* Wake up all waiters to make sure no signal gets lost. */
2: testq %r12, %r12
2: testl %ecx, %ecx
jnz 5f
addq $cond_futex, %rdi
cmpq $-1, dep_mutex-cond_futex(%rdi)
Expand All @@ -474,7 +465,6 @@ __condvar_cleanup1:
callq __pthread_mutex_cond_lock

movq 24(%rsp), %rdi
movq 40(%rsp), %r12
movq 32(%rsp), %r13
.LcallUR:
call _Unwind_Resume@PLT
Expand Down
58 changes: 24 additions & 34 deletions nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S
Original file line number Diff line number Diff line change
Expand Up @@ -61,24 +61,20 @@ sem_wait:
xorl %eax, %eax
retq

1: pushq %r12
/* This push is only needed to store the sem_t pointer for the
exception handler. */
1: pushq %rdi
cfi_adjust_cfa_offset(8)
cfi_rel_offset(%r12, 0)
pushq %r13
cfi_adjust_cfa_offset(8)
cfi_rel_offset(%r13, 0)
movq %rdi, %r13

LOCK
addq $1, NWAITERS(%r13)
addq $1, NWAITERS(%rdi)

.LcleanupSTART:
6: call __pthread_enable_asynccancel
movl %eax, %r8d

xorq %r10, %r10
movl $SYS_futex, %eax
movq %r13, %rdi
#if FUTEX_WAIT == 0
movl PRIVATE(%rdi), %esi
#else
Expand All @@ -87,73 +83,67 @@ sem_wait:
#endif
xorl %edx, %edx
syscall
movq %rax, %r12
movq %rax, %rcx

movl %r8d, %edi
xchgq %r8, %rdi
call __pthread_disable_asynccancel
.LcleanupEND:
movq %r8, %rdi

testq %r12, %r12
testq %rcx, %rcx
je 3f
cmpq $-EWOULDBLOCK, %r12
cmpq $-EWOULDBLOCK, %rcx
jne 4f

3:
#if VALUE == 0
movl (%r13), %eax
movl (%rdi), %eax
#else
movl VALUE(%r13), %eax
movl VALUE(%rdi), %eax
#endif
5: testl %eax, %eax
je 6b

leal -1(%rax), %edx
LOCK
#if VALUE == 0
cmpxchgl %edx, (%r13)
cmpxchgl %edx, (%rdi)
#else
cmpxchgl %edx, VALUE(%r13)
cmpxchgl %edx, VALUE(%rdi)
#endif
jne 5b

LOCK
subq $1, NWAITERS(%r13)

xorl %eax, %eax

9: popq %r13
cfi_adjust_cfa_offset(-8)
cfi_restore(%r13)
popq %r12
9: LOCK
subq $1, NWAITERS(%rdi)

leaq 8(%rsp), %rsp
cfi_adjust_cfa_offset(-8)
cfi_restore(%r12)

retq

cfi_adjust_cfa_offset(2 * 8)
cfi_rel_offset(%r12, 8)
cfi_rel_offset(%r13, 0)
4: negq %r12
cfi_adjust_cfa_offset(8)
4: negq %rcx
#if USE___THREAD
movq errno@gottpoff(%rip), %rdx
movl %r12d, %fs:(%rdx)
movl %ecx, %fs:(%rdx)
#else
# error "not supported. %rcx and %rdi must be preserved"
callq __errno_location@plt
movl %r12d, (%rax)
movl %ecx, (%rax)
#endif
orl $-1, %eax

LOCK
subq $1, NWAITERS(%r13)

jmp 9b
.size sem_wait,.-sem_wait


.type sem_wait_cleanup,@function
sem_wait_cleanup:
movq (%rsp), %rdi
LOCK
subq $1, NWAITERS(%r13)
subq $1, NWAITERS(%rdi)
movq %rax, %rdi
.LcallUR:
call _Unwind_Resume@PLT
Expand Down

0 comments on commit 9083bcc

Please sign in to comment.