diff --git a/[refs] b/[refs] index ab707dac1ba7..4eea1916958c 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 1edfbb4153bd29bcf8d2236676238d5237972be1 +refs/heads/master: 5870661c091e827973674cc3469b50c959008c2b diff --git a/trunk/arch/x86/include/asm/bitops.h b/trunk/arch/x86/include/asm/bitops.h index ebaee695394e..b2af6645ea7e 100644 --- a/trunk/arch/x86/include/asm/bitops.h +++ b/trunk/arch/x86/include/asm/bitops.h @@ -347,6 +347,19 @@ static int test_bit(int nr, const volatile unsigned long *addr); ? constant_test_bit((nr), (addr)) \ : variable_test_bit((nr), (addr))) +#if (defined(CONFIG_X86_GENERIC) || defined(CONFIG_GENERIC_CPU)) \ + && !defined(CONFIG_CC_OPTIMIZE_FOR_SIZE) +/* + * Since BSF and TZCNT have sufficiently similar semantics for the purposes + * for which we use them here, BMI-capable hardware will decode the prefixed + * variant as 'tzcnt ...' and may execute that faster than 'bsf ...', while + * older hardware will ignore the REP prefix and decode it as 'bsf ...'. + */ +# define BSF_PREFIX "rep;" +#else +# define BSF_PREFIX +#endif + /** * __ffs - find first set bit in word * @word: The word to search @@ -355,7 +368,7 @@ static int test_bit(int nr, const volatile unsigned long *addr); */ static inline unsigned long __ffs(unsigned long word) { - asm("bsf %1,%0" + asm(BSF_PREFIX "bsf %1,%0" : "=r" (word) : "rm" (word)); return word; @@ -369,12 +382,14 @@ static inline unsigned long __ffs(unsigned long word) */ static inline unsigned long ffz(unsigned long word) { - asm("bsf %1,%0" + asm(BSF_PREFIX "bsf %1,%0" : "=r" (word) : "r" (~word)); return word; } +#undef BSF_PREFIX + /* * __fls: find last set bit in word * @word: The word to search