Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Add _arch_/_cpu_ to index_*/bit_* in x86 cpu-features.h
index_* and bit_* macros are used to access cpuid and feature arrays o
struct cpu_features.  It is very easy to use bits and indices of cpuid
array on feature array, especially in assembly codes.  For example,
sysdeps/i386/i686/multiarch/bcopy.S has

	HAS_CPU_FEATURE (Fast_Rep_String)

which should be

	HAS_ARCH_FEATURE (Fast_Rep_String)

We change index_* and bit_* to index_cpu_*/index_arch_* and
bit_cpu_*/bit_arch_* so that we can catch such error at build time.

	[BZ #19762]
	* sysdeps/unix/sysv/linux/x86_64/64/dl-librecon.h
	(EXTRA_LD_ENVVARS): Add _arch_ to index_*/bit_*.
	* sysdeps/x86/cpu-features.c (init_cpu_features): Likewise.
	* sysdeps/x86/cpu-features.h (bit_*): Renamed to ...
	(bit_arch_*): This for feature array.
	(bit_*): Renamed to ...
	(bit_cpu_*): This for cpu array.
	(index_*): Renamed to ...
	(index_arch_*): This for feature array.
	(index_*): Renamed to ...
	(index_cpu_*): This for cpu array.
	[__ASSEMBLER__] (HAS_FEATURE): Add and use field.
	[__ASSEMBLER__] (HAS_CPU_FEATURE)): Pass cpu to HAS_FEATURE.
	[__ASSEMBLER__] (HAS_ARCH_FEATURE)): Pass arch to HAS_FEATURE.
	[!__ASSEMBLER__] (HAS_CPU_FEATURE): Replace index_##name and
	bit_##name with index_cpu_##name and bit_cpu_##name.
	[!__ASSEMBLER__] (HAS_ARCH_FEATURE): Replace index_##name and
	bit_##name with index_arch_##name and bit_arch_##name.
  • Loading branch information
H.J. Lu committed Mar 10, 2016
1 parent f8e9c4d commit 6aa3e97
Show file tree
Hide file tree
Showing 4 changed files with 181 additions and 151 deletions.
22 changes: 22 additions & 0 deletions ChangeLog
@@ -1,3 +1,25 @@
2016-03-10 H.J. Lu <hongjiu.lu@intel.com>

[BZ #19762]
* sysdeps/unix/sysv/linux/x86_64/64/dl-librecon.h
(EXTRA_LD_ENVVARS): Add _arch_ to index_*/bit_*.
* sysdeps/x86/cpu-features.c (init_cpu_features): Likewise.
* sysdeps/x86/cpu-features.h (bit_*): Renamed to ...
(bit_arch_*): This for feature array.
(bit_*): Renamed to ...
(bit_cpu_*): This for cpu array.
(index_*): Renamed to ...
(index_arch_*): This for feature array.
(index_*): Renamed to ...
(index_cpu_*): This for cpu array.
[__ASSEMBLER__] (HAS_FEATURE): Add and use field.
[__ASSEMBLER__] (HAS_CPU_FEATURE)): Pass cpu to HAS_FEATURE.
[__ASSEMBLER__] (HAS_ARCH_FEATURE)): Pass arch to HAS_FEATURE.
[!__ASSEMBLER__] (HAS_CPU_FEATURE): Replace index_##name and
bit_##name with index_cpu_##name and bit_cpu_##name.
[!__ASSEMBLER__] (HAS_ARCH_FEATURE): Replace index_##name and
bit_##name with index_arch_##name and bit_arch_##name.

2016-03-09 Aurelien Jarno <aurelien@aurel32.net>

[BZ #19792]
Expand Down
8 changes: 4 additions & 4 deletions sysdeps/unix/sysv/linux/x86_64/64/dl-librecon.h
Expand Up @@ -30,10 +30,10 @@
is always disabled for SUID programs and can be enabled by setting
environment variable, LD_PREFER_MAP_32BIT_EXEC. */
#define EXTRA_LD_ENVVARS \
case 21: \
if (memcmp (envline, "PREFER_MAP_32BIT_EXEC", 21) == 0) \
GLRO(dl_x86_cpu_features).feature[index_Prefer_MAP_32BIT_EXEC] \
|= bit_Prefer_MAP_32BIT_EXEC; \
case 21: \
if (memcmp (envline, "PREFER_MAP_32BIT_EXEC", 21) == 0) \
GLRO(dl_x86_cpu_features).feature[index_arch_Prefer_MAP_32BIT_EXEC] \
|= bit_arch_Prefer_MAP_32BIT_EXEC; \
break;

/* Extra unsecure variables. The names are all stuffed in a single
Expand Down
80 changes: 42 additions & 38 deletions sysdeps/x86/cpu-features.c
Expand Up @@ -75,13 +75,14 @@ init_cpu_features (struct cpu_features *cpu_features)
case 0x1c:
case 0x26:
/* BSF is slow on Atom. */
cpu_features->feature[index_Slow_BSF] |= bit_Slow_BSF;
cpu_features->feature[index_arch_Slow_BSF]
|= bit_arch_Slow_BSF;
break;

case 0x57:
/* Knights Landing. Enable Silvermont optimizations. */
cpu_features->feature[index_Prefer_No_VZEROUPPER]
|= bit_Prefer_No_VZEROUPPER;
cpu_features->feature[index_arch_Prefer_No_VZEROUPPER]
|= bit_arch_Prefer_No_VZEROUPPER;

case 0x37:
case 0x4a:
Expand All @@ -90,22 +91,22 @@ init_cpu_features (struct cpu_features *cpu_features)
case 0x5d:
/* Unaligned load versions are faster than SSSE3
on Silvermont. */
#if index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop
# error index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop
#if index_arch_Fast_Unaligned_Load != index_arch_Prefer_PMINUB_for_stringop
# error index_arch_Fast_Unaligned_Load != index_arch_Prefer_PMINUB_for_stringop
#endif
#if index_Fast_Unaligned_Load != index_Slow_SSE4_2
# error index_Fast_Unaligned_Load != index_Slow_SSE4_2
#if index_arch_Fast_Unaligned_Load != index_arch_Slow_SSE4_2
# error index_arch_Fast_Unaligned_Load != index_arch_Slow_SSE4_2
#endif
cpu_features->feature[index_Fast_Unaligned_Load]
|= (bit_Fast_Unaligned_Load
| bit_Prefer_PMINUB_for_stringop
| bit_Slow_SSE4_2);
cpu_features->feature[index_arch_Fast_Unaligned_Load]
|= (bit_arch_Fast_Unaligned_Load
| bit_arch_Prefer_PMINUB_for_stringop
| bit_arch_Slow_SSE4_2);
break;

default:
/* Unknown family 0x06 processors. Assuming this is one
of Core i3/i5/i7 processors if AVX is available. */
if ((ecx & bit_AVX) == 0)
if ((ecx & bit_cpu_AVX) == 0)
break;

case 0x1a:
Expand All @@ -117,20 +118,20 @@ init_cpu_features (struct cpu_features *cpu_features)
case 0x2f:
/* Rep string instructions, copy backward, unaligned loads
and pminub are fast on Intel Core i3, i5 and i7. */
#if index_Fast_Rep_String != index_Fast_Copy_Backward
# error index_Fast_Rep_String != index_Fast_Copy_Backward
#if index_arch_Fast_Rep_String != index_arch_Fast_Copy_Backward
# error index_arch_Fast_Rep_String != index_arch_Fast_Copy_Backward
#endif
#if index_Fast_Rep_String != index_Fast_Unaligned_Load
# error index_Fast_Rep_String != index_Fast_Unaligned_Load
#if index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Load
# error index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Load
#endif
#if index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop
# error index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop
#if index_arch_Fast_Rep_String != index_arch_Prefer_PMINUB_for_stringop
# error index_arch_Fast_Rep_String != index_arch_Prefer_PMINUB_for_stringop
#endif
cpu_features->feature[index_Fast_Rep_String]
|= (bit_Fast_Rep_String
| bit_Fast_Copy_Backward
| bit_Fast_Unaligned_Load
| bit_Prefer_PMINUB_for_stringop);
cpu_features->feature[index_arch_Fast_Rep_String]
|= (bit_arch_Fast_Rep_String
| bit_arch_Fast_Copy_Backward
| bit_arch_Fast_Unaligned_Load
| bit_arch_Prefer_PMINUB_for_stringop);
break;
}
}
Expand Down Expand Up @@ -159,20 +160,20 @@ init_cpu_features (struct cpu_features *cpu_features)
{
/* "Excavator" */
if (model >= 0x60 && model <= 0x7f)
cpu_features->feature[index_Fast_Unaligned_Load]
|= bit_Fast_Unaligned_Load;
cpu_features->feature[index_arch_Fast_Unaligned_Load]
|= bit_arch_Fast_Unaligned_Load;
}
}
else
kind = arch_kind_other;

/* Support i586 if CX8 is available. */
if (HAS_CPU_FEATURE (CX8))
cpu_features->feature[index_I586] |= bit_I586;
cpu_features->feature[index_arch_I586] |= bit_arch_I586;

/* Support i686 if CMOV is available. */
if (HAS_CPU_FEATURE (CMOV))
cpu_features->feature[index_I686] |= bit_I686;
cpu_features->feature[index_arch_I686] |= bit_arch_I686;

if (cpu_features->max_cpuid >= 7)
__cpuid_count (7, 0,
Expand All @@ -193,15 +194,16 @@ init_cpu_features (struct cpu_features *cpu_features)
{
/* Determine if AVX is usable. */
if (HAS_CPU_FEATURE (AVX))
cpu_features->feature[index_AVX_Usable] |= bit_AVX_Usable;
#if index_AVX2_Usable != index_AVX_Fast_Unaligned_Load
# error index_AVX2_Usable != index_AVX_Fast_Unaligned_Load
cpu_features->feature[index_arch_AVX_Usable]
|= bit_arch_AVX_Usable;
#if index_arch_AVX2_Usable != index_arch_AVX_Fast_Unaligned_Load
# error index_arch_AVX2_Usable != index_arch_AVX_Fast_Unaligned_Load
#endif
/* Determine if AVX2 is usable. Unaligned load with 256-bit
AVX registers are faster on processors with AVX2. */
if (HAS_CPU_FEATURE (AVX2))
cpu_features->feature[index_AVX2_Usable]
|= bit_AVX2_Usable | bit_AVX_Fast_Unaligned_Load;
cpu_features->feature[index_arch_AVX2_Usable]
|= bit_arch_AVX2_Usable | bit_arch_AVX_Fast_Unaligned_Load;
/* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
ZMM16-ZMM31 state are enabled. */
if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
Expand All @@ -211,20 +213,22 @@ init_cpu_features (struct cpu_features *cpu_features)
/* Determine if AVX512F is usable. */
if (HAS_CPU_FEATURE (AVX512F))
{
cpu_features->feature[index_AVX512F_Usable]
|= bit_AVX512F_Usable;
cpu_features->feature[index_arch_AVX512F_Usable]
|= bit_arch_AVX512F_Usable;
/* Determine if AVX512DQ is usable. */
if (HAS_CPU_FEATURE (AVX512DQ))
cpu_features->feature[index_AVX512DQ_Usable]
|= bit_AVX512DQ_Usable;
cpu_features->feature[index_arch_AVX512DQ_Usable]
|= bit_arch_AVX512DQ_Usable;
}
}
/* Determine if FMA is usable. */
if (HAS_CPU_FEATURE (FMA))
cpu_features->feature[index_FMA_Usable] |= bit_FMA_Usable;
cpu_features->feature[index_arch_FMA_Usable]
|= bit_arch_FMA_Usable;
/* Determine if FMA4 is usable. */
if (HAS_CPU_FEATURE (FMA4))
cpu_features->feature[index_FMA4_Usable] |= bit_FMA4_Usable;
cpu_features->feature[index_arch_FMA4_Usable]
|= bit_arch_FMA4_Usable;
}
}

Expand Down

0 comments on commit 6aa3e97

Please sign in to comment.