Skip to content

Commit

Permalink
[x86] Add a feature bit: Fast_Unaligned_Copy
Browse files Browse the repository at this point in the history
On AMD processors, memcpy optimized with unaligned SSE load is
slower than emcpy optimized with aligned SSSE3 while other string
functions are faster with unaligned SSE load.  A feature bit,
Fast_Unaligned_Copy, is added to select memcpy optimized with
unaligned SSE load.

	[BZ #19583]
	* sysdeps/x86/cpu-features.c (init_cpu_features): Set
	Fast_Unaligned_Copy with Fast_Unaligned_Load for Intel
	processors.  Set Fast_Copy_Backward for AMD Excavator
	processors.
	* sysdeps/x86/cpu-features.h (bit_arch_Fast_Unaligned_Copy):
	New.
	(index_arch_Fast_Unaligned_Copy): Likewise.
	* sysdeps/x86_64/multiarch/memcpy.S (__new_memcpy): Check
	Fast_Unaligned_Copy instead of Fast_Unaligned_Load.
  • Loading branch information
H.J. Lu committed Mar 28, 2016
1 parent b66d837 commit e41b395
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 2 deletions.
14 changes: 14 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
2016-03-28 H.J. Lu <hongjiu.lu@intel.com>
Amit Pawar <Amit.Pawar@amd.com>

[BZ #19583]
* sysdeps/x86/cpu-features.c (init_cpu_features): Set
Fast_Unaligned_Copy with Fast_Unaligned_Load for Intel
processors. Set Fast_Copy_Backward for AMD Excavator
processors.
* sysdeps/x86/cpu-features.h (bit_arch_Fast_Unaligned_Copy):
New.
(index_arch_Fast_Unaligned_Copy): Likewise.
* sysdeps/x86_64/multiarch/memcpy.S (__new_memcpy): Check
Fast_Unaligned_Copy instead of Fast_Unaligned_Load.

2016-03-25 Florian Weimer <fweimer@redhat.com>

[BZ #19791]
Expand Down
14 changes: 13 additions & 1 deletion sysdeps/x86/cpu-features.c
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,13 @@ init_cpu_features (struct cpu_features *cpu_features)
#endif
#if index_arch_Fast_Unaligned_Load != index_arch_Slow_SSE4_2
# error index_arch_Fast_Unaligned_Load != index_arch_Slow_SSE4_2
#endif
#if index_arch_Fast_Unaligned_Load != index_arch_Fast_Unaligned_Copy
# error index_arch_Fast_Unaligned_Load != index_arch_Fast_Unaligned_Copy
#endif
cpu_features->feature[index_arch_Fast_Unaligned_Load]
|= (bit_arch_Fast_Unaligned_Load
| bit_arch_Fast_Unaligned_Copy
| bit_arch_Prefer_PMINUB_for_stringop
| bit_arch_Slow_SSE4_2);
break;
Expand Down Expand Up @@ -182,11 +186,15 @@ init_cpu_features (struct cpu_features *cpu_features)
#endif
#if index_arch_Fast_Rep_String != index_arch_Prefer_PMINUB_for_stringop
# error index_arch_Fast_Rep_String != index_arch_Prefer_PMINUB_for_stringop
#endif
#if index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Copy
# error index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Copy
#endif
cpu_features->feature[index_arch_Fast_Rep_String]
|= (bit_arch_Fast_Rep_String
| bit_arch_Fast_Copy_Backward
| bit_arch_Fast_Unaligned_Load
| bit_arch_Fast_Unaligned_Copy
| bit_arch_Prefer_PMINUB_for_stringop);
break;
}
Expand Down Expand Up @@ -220,10 +228,14 @@ init_cpu_features (struct cpu_features *cpu_features)

if (family == 0x15)
{
#if index_arch_Fast_Unaligned_Load != index_arch_Fast_Copy_Backward
# error index_arch_Fast_Unaligned_Load != index_arch_Fast_Copy_Backward
#endif
/* "Excavator" */
if (model >= 0x60 && model <= 0x7f)
cpu_features->feature[index_arch_Fast_Unaligned_Load]
|= bit_arch_Fast_Unaligned_Load;
|= (bit_arch_Fast_Unaligned_Load
| bit_arch_Fast_Copy_Backward);
}
}
else
Expand Down
3 changes: 3 additions & 0 deletions sysdeps/x86/cpu-features.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#define bit_arch_I686 (1 << 15)
#define bit_arch_Prefer_MAP_32BIT_EXEC (1 << 16)
#define bit_arch_Prefer_No_VZEROUPPER (1 << 17)
#define bit_arch_Fast_Unaligned_Copy (1 << 18)

/* CPUID Feature flags. */

Expand Down Expand Up @@ -101,6 +102,7 @@
# define index_arch_I686 FEATURE_INDEX_1*FEATURE_SIZE
# define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1*FEATURE_SIZE
# define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1*FEATURE_SIZE
# define index_arch_Fast_Unaligned_Copy FEATURE_INDEX_1*FEATURE_SIZE


# if defined (_LIBC) && !IS_IN (nonlib)
Expand Down Expand Up @@ -265,6 +267,7 @@ extern const struct cpu_features *__get_cpu_features (void)
# define index_arch_I686 FEATURE_INDEX_1
# define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1
# define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1
# define index_arch_Fast_Unaligned_Copy FEATURE_INDEX_1

#endif /* !__ASSEMBLER__ */

Expand Down
2 changes: 1 addition & 1 deletion sysdeps/x86_64/multiarch/memcpy.S
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ ENTRY(__new_memcpy)
HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
jnz 2f
lea __memcpy_sse2_unaligned(%rip), %RAX_LP
HAS_ARCH_FEATURE (Fast_Unaligned_Load)
HAS_ARCH_FEATURE (Fast_Unaligned_Copy)
jnz 2f
lea __memcpy_sse2(%rip), %RAX_LP
HAS_CPU_FEATURE (SSSE3)
Expand Down

0 comments on commit e41b395

Please sign in to comment.