Skip to content

Commit

Permalink
x86-64: Fix memcpy IFUNC selection
Browse files Browse the repository at this point in the history
Chek Fast_Unaligned_Load, instead of Slow_BSF, and also check for
Fast_Copy_Backward to enable __memcpy_ssse3_back.  Existing selection
order is updated with following selection order:

1. __memcpy_avx_unaligned if AVX_Fast_Unaligned_Load bit is set.
2. __memcpy_sse2_unaligned if Fast_Unaligned_Load bit is set.
3. __memcpy_sse2 if SSSE3 isn't available.
4. __memcpy_ssse3_back if Fast_Copy_Backward bit it set.
5. __memcpy_ssse3

	[BZ #18880]
	* sysdeps/x86_64/multiarch/memcpy.S: Check Fast_Unaligned_Load,
	instead of Slow_BSF, and also check for Fast_Copy_Backward to
	enable __memcpy_ssse3_back.
  • Loading branch information
H.J. Lu committed Mar 4, 2016
1 parent 4b230f6 commit 14a1d7c
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 13 deletions.
8 changes: 8 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
2016-03-04 Amit Pawar <Amit.Pawar@amd.com>
H.J. Lu <hongjiu.lu@intel.com>

[BZ #18880]
* sysdeps/x86_64/multiarch/memcpy.S: Check Fast_Unaligned_Load,
instead of Slow_BSF, and also check for Fast_Copy_Backward to
enable __memcpy_ssse3_back.

2016-03-03 H.J. Lu <hongjiu.lu@intel.com>

[BZ #19758]
Expand Down
27 changes: 14 additions & 13 deletions sysdeps/x86_64/multiarch/memcpy.S
Original file line number Diff line number Diff line change
Expand Up @@ -35,22 +35,23 @@ ENTRY(__new_memcpy)
jz 1f
HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
jz 1f
leaq __memcpy_avx512_no_vzeroupper(%rip), %rax
lea __memcpy_avx512_no_vzeroupper(%rip), %RAX_LP
ret
#endif
1: leaq __memcpy_avx_unaligned(%rip), %rax
1: lea __memcpy_avx_unaligned(%rip), %RAX_LP
HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
jz 2f
ret
2: leaq __memcpy_sse2(%rip), %rax
HAS_ARCH_FEATURE (Slow_BSF)
jnz 3f
leaq __memcpy_sse2_unaligned(%rip), %rax
ret
3: HAS_CPU_FEATURE (SSSE3)
jz 4f
leaq __memcpy_ssse3(%rip), %rax
4: ret
jnz 2f
lea __memcpy_sse2_unaligned(%rip), %RAX_LP
HAS_ARCH_FEATURE (Fast_Unaligned_Load)
jnz 2f
lea __memcpy_sse2(%rip), %RAX_LP
HAS_CPU_FEATURE (SSSE3)
jz 2f
lea __memcpy_ssse3_back(%rip), %RAX_LP
HAS_ARCH_FEATURE (Fast_Copy_Backward)
jnz 2f
lea __memcpy_ssse3(%rip), %RAX_LP
2: ret
END(__new_memcpy)

# undef ENTRY
Expand Down

0 comments on commit 14a1d7c

Please sign in to comment.