Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Fix memmove-vec-unaligned-erms.S
__mempcpy_erms and __memmove_erms can't be placed between __memmove_chk
and __memmove it breaks __memmove_chk.

Don't check source == destination first since it is less common.

	* sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:
	(__mempcpy_erms, __memmove_erms): Moved before __mempcpy_chk
	with unaligned_erms.
	(__memmove_erms): Skip if source == destination.
	(__memmove_unaligned_erms): Don't check source == destination
	first.
  • Loading branch information
H.J. Lu committed Apr 3, 2016
1 parent 27d3ce1 commit ea2785e
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 24 deletions.
9 changes: 9 additions & 0 deletions ChangeLog
@@ -1,3 +1,12 @@
2016-04-03 H.J. Lu <hongjiu.lu@intel.com>

* sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:
(__mempcpy_erms, __memmove_erms): Moved before __mempcpy_chk
with unaligned_erms.
(__memmove_erms): Skip if source == destination.
(__memmove_unaligned_erms): Don't check source == destination
first.

2016-04-01 H.J. Lu <hongjiu.lu@intel.com> 2016-04-01 H.J. Lu <hongjiu.lu@intel.com>


* sysdeps/x86/cpu-features.c (init_cpu_features): Don't set * sysdeps/x86/cpu-features.c (init_cpu_features): Don't set
Expand Down
54 changes: 30 additions & 24 deletions sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
Expand Up @@ -95,46 +95,30 @@ L(start):
ret ret
END (MEMMOVE_SYMBOL (__memmove, unaligned_2)) END (MEMMOVE_SYMBOL (__memmove, unaligned_2))


# ifdef SHARED
ENTRY (MEMMOVE_SYMBOL (__mempcpy_chk, unaligned_erms))
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
END (MEMMOVE_SYMBOL (__mempcpy_chk, unaligned_erms))
# endif

ENTRY (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))
movq %rdi, %rax
addq %rdx, %rax
jmp L(start_erms)
END (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))

# ifdef SHARED
ENTRY (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms))
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
END (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms))
# endif

# if VEC_SIZE == 16 # if VEC_SIZE == 16
/* Only used to measure performance of REP MOVSB. */ /* Only used to measure performance of REP MOVSB. */
# ifdef SHARED # ifdef SHARED
ENTRY (__mempcpy_erms) ENTRY (__mempcpy_erms)
movq %rdi, %rax movq %rdi, %rax
addq %rdx, %rax addq %rdx, %rax
jmp L(movsb) jmp L(start_movsb)
END (__mempcpy_erms) END (__mempcpy_erms)
# endif # endif


ENTRY (__memmove_erms) ENTRY (__memmove_erms)
movq %rdi, %rax movq %rdi, %rax
L(start_movsb):
movq %rdx, %rcx movq %rdx, %rcx
cmpq %rsi, %rdi cmpq %rsi, %rdi
jbe 1f jb 1f
/* Source == destination is less common. */
je 2f
leaq (%rsi,%rcx), %rdx leaq (%rsi,%rcx), %rdx
cmpq %rdx, %rdi cmpq %rdx, %rdi
jb L(movsb_backward) jb L(movsb_backward)
1: 1:
rep movsb rep movsb
2:
ret ret
L(movsb_backward): L(movsb_backward):
leaq -1(%rdi,%rcx), %rdi leaq -1(%rdi,%rcx), %rdi
Expand All @@ -147,6 +131,26 @@ END (__memmove_erms)
strong_alias (__memmove_erms, __memcpy_erms) strong_alias (__memmove_erms, __memcpy_erms)
# endif # endif


# ifdef SHARED
ENTRY (MEMMOVE_SYMBOL (__mempcpy_chk, unaligned_erms))
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
END (MEMMOVE_SYMBOL (__mempcpy_chk, unaligned_erms))
# endif

ENTRY (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))
movq %rdi, %rax
addq %rdx, %rax
jmp L(start_erms)
END (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))

# ifdef SHARED
ENTRY (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms))
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
END (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms))
# endif

ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_erms)) ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
movq %rdi, %rax movq %rdi, %rax
L(start_erms): L(start_erms):
Expand All @@ -166,8 +170,9 @@ L(return):


L(movsb): L(movsb):
cmpq %rsi, %rdi cmpq %rsi, %rdi
je L(nop)
jb 1f jb 1f
/* Source == destination is less common. */
je L(nop)
leaq (%rsi,%rdx), %r9 leaq (%rsi,%rdx), %r9
cmpq %r9, %rdi cmpq %r9, %rdi
/* Avoid slow backward REP MOVSB. */ /* Avoid slow backward REP MOVSB. */
Expand All @@ -191,8 +196,9 @@ L(movsb_more_2x_vec):
L(more_2x_vec): L(more_2x_vec):
/* More than 2 * VEC. */ /* More than 2 * VEC. */
cmpq %rsi, %rdi cmpq %rsi, %rdi
je L(nop)
jb L(copy_forward) jb L(copy_forward)
/* Source == destination is less common. */
je L(nop)
leaq (%rsi,%rdx), %rcx leaq (%rsi,%rdx), %rcx
cmpq %rcx, %rdi cmpq %rcx, %rdi
jb L(more_2x_vec_overlap) jb L(more_2x_vec_overlap)
Expand Down

0 comments on commit ea2785e

Please sign in to comment.