Skip to content

Commit

Permalink
Unroll the loop x86-64 SSE4.2 strlen.
Browse files Browse the repository at this point in the history
  • Loading branch information
H.J. Lu authored and Ulrich Drepper committed Jan 13, 2010
1 parent 52e96a8 commit 5a7af22
Showing 2 changed files with 49 additions and 15 deletions.
4 changes: 4 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
2010-01-12 H.J. Lu <hongjiu.lu@intel.com>

* sysdeps/x86_64/multiarch/strlen.S: Unroll the loop.

2010-01-13 Ulrich Drepper <drepper@redhat.com>

* stdlib/stdlib.h: Be a bit more relaxed about obsoleted mktemp symbol.
60 changes: 45 additions & 15 deletions sysdeps/x86_64/multiarch/strlen.S
Original file line number Diff line number Diff line change
@@ -46,28 +46,58 @@ END(strlen)
__strlen_sse42:
cfi_startproc
CALL_MCOUNT
pxor %xmm2, %xmm2
movq %rdi, %rcx
pxor %xmm1, %xmm1
movl %edi, %ecx
movq %rdi, %r8
andq $~15, %rdi
movdqa %xmm2, %xmm1
pcmpeqb (%rdi), %xmm2
orl $0xffffffff, %esi
subq %rdi, %rcx
shll %cl, %esi
pmovmskb %xmm2, %edx
andl %esi, %edx
jnz 1f

2: pcmpistri $0x08, 16(%rdi), %xmm1
leaq 16(%rdi), %rdi
jnz 2b
xor %edi, %ecx
pcmpeqb (%rdi), %xmm1
pmovmskb %xmm1, %edx
shrl %cl, %edx
shll %cl, %edx
andl %edx, %edx
jnz L(less16bytes)
pxor %xmm1, %xmm1

.p2align 4
L(more64bytes_loop):
pcmpistri $0x08, 16(%rdi), %xmm1
jz L(more32bytes)

pcmpistri $0x08, 32(%rdi), %xmm1
jz L(more48bytes)

pcmpistri $0x08, 48(%rdi), %xmm1
jz L(more64bytes)

add $64, %rdi
pcmpistri $0x08, (%rdi), %xmm1
jnz L(more64bytes_loop)
leaq (%rdi,%rcx), %rax
subq %r8, %rax
ret

1: subq %r8, %rdi
.p2align 4
L(more32bytes):
leaq 16(%rdi,%rcx, 1), %rax
subq %r8, %rax
ret

.p2align 4
L(more48bytes):
leaq 32(%rdi,%rcx, 1), %rax
subq %r8, %rax
ret

.p2align 4
L(more64bytes):
leaq 48(%rdi,%rcx, 1), %rax
subq %r8, %rax
ret

.p2align 4
L(less16bytes):
subq %r8, %rdi
bsfl %edx, %eax
addq %rdi, %rax
ret

0 comments on commit 5a7af22

Please sign in to comment.