Skip to content

Commit

Permalink
Really fix AVX tests
Browse files Browse the repository at this point in the history
There is no problem with strcmp, it doesn't use the YMM registers.
The math routines might since gcc perhaps generates such code.
Introduce bit_YMM_USBALE and use it in the math routines.
  • Loading branch information
Ulrich Drepper committed Jan 26, 2012
1 parent afc5ed0 commit 08cf777
Show file tree
Hide file tree
Showing 9 changed files with 41 additions and 28 deletions.
11 changes: 10 additions & 1 deletion ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,17 @@

[BZ #13583]
* sysdeps/x86_64/multiarch/init-arch.h: Define bit_OSXSAVE.
Clean up HAS_* macros.
* sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features): If
bit_AVX is set also check OSXAVE/XCR0 and reset bit_AVX if necessary.
bit_AVX is set also check OSXAVE/XCR0 and set bit_YMM_Usable if
possible.
* sysdeps/x86_64/fpu/multiarch/e_atan2.c: Use HAS_YMM_USABLE, not
HAS_AVX.
* sysdeps/x86_64/fpu/multiarch/e_exp.c: Likewise.
* sysdeps/x86_64/fpu/multiarch/e_log.c: Likewise.
* sysdeps/x86_64/fpu/multiarch/s_atan.c: Likewise.
* sysdeps/x86_64/fpu/multiarch/s_sin.c: Likewise.
* sysdeps/x86_64/fpu/multiarch/s_tan.c: Likewise.

2012-01-25 Joseph Myers <joseph@codesourcery.com>

Expand Down
2 changes: 1 addition & 1 deletion sysdeps/x86_64/fpu/multiarch/e_atan2.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ extern double __ieee754_atan2_fma4 (double, double);

libm_ifunc (__ieee754_atan2,
HAS_FMA4 ? __ieee754_atan2_fma4
: (HAS_AVX ? __ieee754_atan2_avx : __ieee754_atan2_sse2));
: (HAS_YMM_USABLE ? __ieee754_atan2_avx : __ieee754_atan2_sse2));
strong_alias (__ieee754_atan2, __atan2_finite)

# define __ieee754_atan2 __ieee754_atan2_sse2
Expand Down
2 changes: 1 addition & 1 deletion sysdeps/x86_64/fpu/multiarch/e_exp.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ extern double __ieee754_exp_fma4 (double);

libm_ifunc (__ieee754_exp,
HAS_FMA4 ? __ieee754_exp_fma4
: (HAS_AVX ? __ieee754_exp_avx : __ieee754_exp_sse2));
: (HAS_YMM_USABLE ? __ieee754_exp_avx : __ieee754_exp_sse2));
strong_alias (__ieee754_exp, __exp_finite)

# define __ieee754_exp __ieee754_exp_sse2
Expand Down
2 changes: 1 addition & 1 deletion sysdeps/x86_64/fpu/multiarch/e_log.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ extern double __ieee754_log_fma4 (double);

libm_ifunc (__ieee754_log,
HAS_FMA4 ? __ieee754_log_fma4
: (HAS_AVX ? __ieee754_log_avx
: (HAS_YMM_USABLE ? __ieee754_log_avx
: __ieee754_log_sse2));
strong_alias (__ieee754_log, __log_finite)

Expand Down
3 changes: 2 additions & 1 deletion sysdeps/x86_64/fpu/multiarch/s_atan.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ extern double __atan_fma4 (double);
# define __atan_fma4 ((void *) 0)
# endif

libm_ifunc (atan, HAS_FMA4 ? __atan_fma4 : HAS_AVX ? __atan_avx : __atan_sse2);
libm_ifunc (atan, (HAS_FMA4 ? __atan_fma4 :
HAS_YMM_USABLE ? __atan_avx : __atan_sse2));

# define atan __atan_sse2
#endif
Expand Down
6 changes: 4 additions & 2 deletions sysdeps/x86_64/fpu/multiarch/s_sin.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@ extern double __sin_fma4 (double);
# define __sin_fma4 ((void *) 0)
# endif

libm_ifunc (__cos, HAS_FMA4 ? __cos_fma4 : HAS_AVX ? __cos_avx : __cos_sse2);
libm_ifunc (__cos, (HAS_FMA4 ? __cos_fma4 :
HAS_YMM_USABLE ? __cos_avx : __cos_sse2));
weak_alias (__cos, cos)

libm_ifunc (__sin, HAS_FMA4 ? __sin_fma4 : HAS_AVX ? __sin_avx : __sin_sse2);
libm_ifunc (__sin, (HAS_FMA4 ? __sin_fma4 :
HAS_YMM_USABLE ? __sin_avx : __sin_sse2));
weak_alias (__sin, sin)

# define __cos __cos_sse2
Expand Down
3 changes: 2 additions & 1 deletion sysdeps/x86_64/fpu/multiarch/s_tan.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ extern double __tan_fma4 (double);
# define __tan_fma4 ((void *) 0)
# endif

libm_ifunc (tan, HAS_FMA4 ? __tan_fma4 : HAS_AVX ? __tan_avx : __tan_sse2);
libm_ifunc (tan, (HAS_FMA4 ? __tan_fma4 :
HAS_YMM_USABLE ? __tan_avx : __tan_sse2));

# define tan __tan_sse2
#endif
Expand Down
14 changes: 7 additions & 7 deletions sysdeps/x86_64/multiarch/init-arch.c
Original file line number Diff line number Diff line change
Expand Up @@ -147,13 +147,13 @@ __init_cpu_features (void)
if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX)
{
/* Reset the AVX bit in case OSXSAVE is disabled. */
if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) == 0
|| ({ unsigned int xcrlow;
unsigned int xcrhigh;
asm ("xgetbv"
: "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
(xcrlow & 6) != 6; }))
__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx &= ~bit_AVX;
if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) != 0
&& ({ unsigned int xcrlow;
unsigned int xcrhigh;
asm ("xgetbv"
: "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
(xcrlow & 6) == 6; }))
__cpu_features.feature[index_YMM_Usable] |= bit_YMM_Usable;
}

__cpu_features.family = family;
Expand Down
26 changes: 13 additions & 13 deletions sysdeps/x86_64/multiarch/init-arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#define bit_Prefer_SSE_for_memop (1 << 3)
#define bit_Fast_Unaligned_Load (1 << 4)
#define bit_Prefer_PMINUB_for_stringop (1 << 5)
#define bit_YMM_Usable (1 << 6)

#define bit_SSE2 (1 << 26)
#define bit_SSSE3 (1 << 9)
Expand Down Expand Up @@ -49,6 +50,7 @@
# define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE
# define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
# define index_YMM_Usable FEATURE_INDEX_1*FEATURE_SIZE

#else /* __ASSEMBLER__ */

Expand Down Expand Up @@ -93,7 +95,7 @@ extern struct cpu_features


extern void __init_cpu_features (void) attribute_hidden;
#define INIT_ARCH()\
# define INIT_ARCH() \
do \
if (__cpu_features.kind == arch_kind_unknown) \
__init_cpu_features (); \
Expand Down Expand Up @@ -126,23 +128,21 @@ extern const struct cpu_features *__get_cpu_features (void)
# define index_Slow_BSF FEATURE_INDEX_1
# define index_Prefer_SSE_for_memop FEATURE_INDEX_1
# define index_Fast_Unaligned_Load FEATURE_INDEX_1
# define index_YMM_Usable FEATURE_INDEX_1

#define HAS_ARCH_FEATURE(idx, bit) \
((__get_cpu_features ()->feature[idx] & (bit)) != 0)
# define HAS_ARCH_FEATURE(name) \
((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)

#define HAS_FAST_REP_STRING \
HAS_ARCH_FEATURE (index_Fast_Rep_String, bit_Fast_Rep_String)
# define HAS_FAST_REP_STRING HAS_ARCH_FEATURE (Fast_Rep_String)

#define HAS_FAST_COPY_BACKWARD \
HAS_ARCH_FEATURE (index_Fast_Copy_Backward, bit_Fast_Copy_Backward)
# define HAS_FAST_COPY_BACKWARD HAS_ARCH_FEATURE (Fast_Copy_Backward)

#define HAS_SLOW_BSF \
HAS_ARCH_FEATURE (index_Slow_BSF, bit_Slow_BSF)
# define HAS_SLOW_BSF HAS_ARCH_FEATURE (Slow_BSF)

#define HAS_PREFER_SSE_FOR_MEMOP \
HAS_ARCH_FEATURE (index_Prefer_SSE_for_memop, bit_Prefer_SSE_for_memop)
# define HAS_PREFER_SSE_FOR_MEMOP HAS_ARCH_FEATURE (Prefer_SSE_for_memop)

#define HAS_FAST_UNALIGNED_LOAD \
HAS_ARCH_FEATURE (index_Fast_Unaligned_Load, bit_Fast_Unaligned_Load)
# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load)

# define HAS_YMM_USABLE HAS_ARCH_FEATURE (YMM_Usable)

#endif /* __ASSEMBLER__ */

0 comments on commit 08cf777

Please sign in to comment.