Skip to content

Commit

Permalink
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linu…
Browse files Browse the repository at this point in the history
…x/kernel/git/tip/tip

Pull x86/asm changes from Ingo Molnar:
 "Main changes:

   - Apply low level mutex optimization on x86-64, by Wedson Almeida
     Filho.

   - Change bitops to be naturally 'long', by H Peter Anvin.

   - Add TSX-NI opcodes support to the x86 (instrumentation) decoder, by
     Masami Hiramatsu.

   - Add clang compatibility adjustments/workarounds, by Jan-Simon
     Möller"

* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86, doc: Update uaccess.h comment to reflect clang changes
  x86, asm: Fix a compilation issue with clang
  x86, asm: Extend definitions of _ASM_* with a raw format
  x86, insn: Add new opcodes as of June, 2013
  x86/ia32/asm: Remove unused argument in macro
  x86, bitops: Change bitops to be native operand size
  x86: Use asm-goto to implement mutex fast path on x86-64
  • Loading branch information
Linus Torvalds committed Sep 4, 2013
2 parents 6924a46 + f69fa9a commit 3d7e5fc
Show file tree
Hide file tree
Showing 8 changed files with 107 additions and 54 deletions.
2 changes: 1 addition & 1 deletion arch/x86/ia32/ia32entry.S
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ ia32_badsys:

CFI_ENDPROC

.macro PTREGSCALL label, func, arg
.macro PTREGSCALL label, func
ALIGN
GLOBAL(\label)
leaq \func(%rip),%rax
Expand Down
6 changes: 5 additions & 1 deletion arch/x86/include/asm/asm.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,25 @@

#ifdef __ASSEMBLY__
# define __ASM_FORM(x) x
# define __ASM_FORM_RAW(x) x
# define __ASM_FORM_COMMA(x) x,
#else
# define __ASM_FORM(x) " " #x " "
# define __ASM_FORM_RAW(x) #x
# define __ASM_FORM_COMMA(x) " " #x ","
#endif

#ifdef CONFIG_X86_32
# define __ASM_SEL(a,b) __ASM_FORM(a)
# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a)
#else
# define __ASM_SEL(a,b) __ASM_FORM(b)
# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b)
#endif

#define __ASM_SIZE(inst, ...) __ASM_SEL(inst##l##__VA_ARGS__, \
inst##q##__VA_ARGS__)
#define __ASM_REG(reg) __ASM_SEL(e##reg, r##reg)
#define __ASM_REG(reg) __ASM_SEL_RAW(e##reg, r##reg)

#define _ASM_PTR __ASM_SEL(.long, .quad)
#define _ASM_ALIGN __ASM_SEL(.balign 4, .balign 8)
Expand Down
46 changes: 27 additions & 19 deletions arch/x86/include/asm/bitops.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@
#include <linux/compiler.h>
#include <asm/alternative.h>

#if BITS_PER_LONG == 32
# define _BITOPS_LONG_SHIFT 5
#elif BITS_PER_LONG == 64
# define _BITOPS_LONG_SHIFT 6
#else
# error "Unexpected BITS_PER_LONG"
#endif

#define BIT_64(n) (U64_C(1) << (n))

/*
Expand Down Expand Up @@ -59,7 +67,7 @@
* restricted to acting on a single-word quantity.
*/
static __always_inline void
set_bit(unsigned int nr, volatile unsigned long *addr)
set_bit(long nr, volatile unsigned long *addr)
{
if (IS_IMMEDIATE(nr)) {
asm volatile(LOCK_PREFIX "orb %1,%0"
Expand All @@ -81,7 +89,7 @@ set_bit(unsigned int nr, volatile unsigned long *addr)
* If it's called on the same region of memory simultaneously, the effect
* may be that only one operation succeeds.
*/
static inline void __set_bit(int nr, volatile unsigned long *addr)
static inline void __set_bit(long nr, volatile unsigned long *addr)
{
asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
}
Expand All @@ -97,7 +105,7 @@ static inline void __set_bit(int nr, volatile unsigned long *addr)
* in order to ensure changes are visible on other processors.
*/
static __always_inline void
clear_bit(int nr, volatile unsigned long *addr)
clear_bit(long nr, volatile unsigned long *addr)
{
if (IS_IMMEDIATE(nr)) {
asm volatile(LOCK_PREFIX "andb %1,%0"
Expand All @@ -118,13 +126,13 @@ clear_bit(int nr, volatile unsigned long *addr)
* clear_bit() is atomic and implies release semantics before the memory
* operation. It can be used for an unlock.
*/
static inline void clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
static inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
{
barrier();
clear_bit(nr, addr);
}

static inline void __clear_bit(int nr, volatile unsigned long *addr)
static inline void __clear_bit(long nr, volatile unsigned long *addr)
{
asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
}
Expand All @@ -141,7 +149,7 @@ static inline void __clear_bit(int nr, volatile unsigned long *addr)
* No memory barrier is required here, because x86 cannot reorder stores past
* older loads. Same principle as spin_unlock.
*/
static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
{
barrier();
__clear_bit(nr, addr);
Expand All @@ -159,7 +167,7 @@ static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
* If it's called on the same region of memory simultaneously, the effect
* may be that only one operation succeeds.
*/
static inline void __change_bit(int nr, volatile unsigned long *addr)
static inline void __change_bit(long nr, volatile unsigned long *addr)
{
asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
}
Expand All @@ -173,7 +181,7 @@ static inline void __change_bit(int nr, volatile unsigned long *addr)
* Note that @nr may be almost arbitrarily large; this function is not
* restricted to acting on a single-word quantity.
*/
static inline void change_bit(int nr, volatile unsigned long *addr)
static inline void change_bit(long nr, volatile unsigned long *addr)
{
if (IS_IMMEDIATE(nr)) {
asm volatile(LOCK_PREFIX "xorb %1,%0"
Expand All @@ -194,7 +202,7 @@ static inline void change_bit(int nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
static inline int test_and_set_bit(long nr, volatile unsigned long *addr)
{
int oldbit;

Expand All @@ -212,7 +220,7 @@ static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
* This is the same as test_and_set_bit on x86.
*/
static __always_inline int
test_and_set_bit_lock(int nr, volatile unsigned long *addr)
test_and_set_bit_lock(long nr, volatile unsigned long *addr)
{
return test_and_set_bit(nr, addr);
}
Expand All @@ -226,7 +234,7 @@ test_and_set_bit_lock(int nr, volatile unsigned long *addr)
* If two examples of this operation race, one can appear to succeed
* but actually fail. You must protect multiple accesses with a lock.
*/
static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
{
int oldbit;

Expand All @@ -245,7 +253,7 @@ static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
{
int oldbit;

Expand All @@ -272,7 +280,7 @@ static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
* accessed from a hypervisor on the same CPU if running in a VM: don't change
* this without also updating arch/x86/kernel/kvm.c
*/
static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
{
int oldbit;

Expand All @@ -284,7 +292,7 @@ static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
}

/* WARNING: non atomic and it can be reordered! */
static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
{
int oldbit;

Expand All @@ -304,7 +312,7 @@ static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
static inline int test_and_change_bit(long nr, volatile unsigned long *addr)
{
int oldbit;

Expand All @@ -315,13 +323,13 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
return oldbit;
}

static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr)
static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr)
{
return ((1UL << (nr % BITS_PER_LONG)) &
(addr[nr / BITS_PER_LONG])) != 0;
return ((1UL << (nr & (BITS_PER_LONG-1))) &
(addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
}

static inline int variable_test_bit(int nr, volatile const unsigned long *addr)
static inline int variable_test_bit(long nr, volatile const unsigned long *addr)
{
int oldbit;

Expand Down
30 changes: 30 additions & 0 deletions arch/x86/include/asm/mutex_64.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,20 @@
*
* Atomically decrements @v and calls <fail_fn> if the result is negative.
*/
#ifdef CC_HAVE_ASM_GOTO
static inline void __mutex_fastpath_lock(atomic_t *v,
void (*fail_fn)(atomic_t *))
{
asm volatile goto(LOCK_PREFIX " decl %0\n"
" jns %l[exit]\n"
: : "m" (v->counter)
: "memory", "cc"
: exit);
fail_fn(v);
exit:
return;
}
#else
#define __mutex_fastpath_lock(v, fail_fn) \
do { \
unsigned long dummy; \
Expand All @@ -32,6 +46,7 @@ do { \
: "rax", "rsi", "rdx", "rcx", \
"r8", "r9", "r10", "r11", "memory"); \
} while (0)
#endif

/**
* __mutex_fastpath_lock_retval - try to take the lock by moving the count
Expand All @@ -56,6 +71,20 @@ static inline int __mutex_fastpath_lock_retval(atomic_t *count)
*
* Atomically increments @v and calls <fail_fn> if the result is nonpositive.
*/
#ifdef CC_HAVE_ASM_GOTO
static inline void __mutex_fastpath_unlock(atomic_t *v,
void (*fail_fn)(atomic_t *))
{
asm volatile goto(LOCK_PREFIX " incl %0\n"
" jg %l[exit]\n"
: : "m" (v->counter)
: "memory", "cc"
: exit);
fail_fn(v);
exit:
return;
}
#else
#define __mutex_fastpath_unlock(v, fail_fn) \
do { \
unsigned long dummy; \
Expand All @@ -72,6 +101,7 @@ do { \
: "rax", "rsi", "rdx", "rcx", \
"r8", "r9", "r10", "r11", "memory"); \
} while (0)
#endif

#define __mutex_slowpath_needs_to_unlock() 1

Expand Down
24 changes: 12 additions & 12 deletions arch/x86/include/asm/sync_bitops.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@
* Note that @nr may be almost arbitrarily large; this function is not
* restricted to acting on a single-word quantity.
*/
static inline void sync_set_bit(int nr, volatile unsigned long *addr)
static inline void sync_set_bit(long nr, volatile unsigned long *addr)
{
asm volatile("lock; btsl %1,%0"
asm volatile("lock; bts %1,%0"
: "+m" (ADDR)
: "Ir" (nr)
: "memory");
Expand All @@ -44,9 +44,9 @@ static inline void sync_set_bit(int nr, volatile unsigned long *addr)
* you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
* in order to ensure changes are visible on other processors.
*/
static inline void sync_clear_bit(int nr, volatile unsigned long *addr)
static inline void sync_clear_bit(long nr, volatile unsigned long *addr)
{
asm volatile("lock; btrl %1,%0"
asm volatile("lock; btr %1,%0"
: "+m" (ADDR)
: "Ir" (nr)
: "memory");
Expand All @@ -61,9 +61,9 @@ static inline void sync_clear_bit(int nr, volatile unsigned long *addr)
* Note that @nr may be almost arbitrarily large; this function is not
* restricted to acting on a single-word quantity.
*/
static inline void sync_change_bit(int nr, volatile unsigned long *addr)
static inline void sync_change_bit(long nr, volatile unsigned long *addr)
{
asm volatile("lock; btcl %1,%0"
asm volatile("lock; btc %1,%0"
: "+m" (ADDR)
: "Ir" (nr)
: "memory");
Expand All @@ -77,11 +77,11 @@ static inline void sync_change_bit(int nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
static inline int sync_test_and_set_bit(int nr, volatile unsigned long *addr)
static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
{
int oldbit;

asm volatile("lock; btsl %2,%1\n\tsbbl %0,%0"
asm volatile("lock; bts %2,%1\n\tsbbl %0,%0"
: "=r" (oldbit), "+m" (ADDR)
: "Ir" (nr) : "memory");
return oldbit;
Expand All @@ -95,11 +95,11 @@ static inline int sync_test_and_set_bit(int nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
static inline int sync_test_and_clear_bit(int nr, volatile unsigned long *addr)
static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
{
int oldbit;

asm volatile("lock; btrl %2,%1\n\tsbbl %0,%0"
asm volatile("lock; btr %2,%1\n\tsbbl %0,%0"
: "=r" (oldbit), "+m" (ADDR)
: "Ir" (nr) : "memory");
return oldbit;
Expand All @@ -113,11 +113,11 @@ static inline int sync_test_and_clear_bit(int nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
static inline int sync_test_and_change_bit(int nr, volatile unsigned long *addr)
static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr)
{
int oldbit;

asm volatile("lock; btcl %2,%1\n\tsbbl %0,%0"
asm volatile("lock; btc %2,%1\n\tsbbl %0,%0"
: "=r" (oldbit), "+m" (ADDR)
: "Ir" (nr) : "memory");
return oldbit;
Expand Down
7 changes: 5 additions & 2 deletions arch/x86/include/asm/uaccess.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,16 +153,19 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
* Careful: we have to cast the result to the type of the pointer
* for sign reasons.
*
* The use of %edx as the register specifier is a bit of a
* The use of _ASM_DX as the register specifier is a bit of a
* simplification, as gcc only cares about it as the starting point
* and not size: for a 64-bit value it will use %ecx:%edx on 32 bits
* (%ecx being the next register in gcc's x86 register sequence), and
* %rdx on 64 bits.
*
* Clang/LLVM cares about the size of the register, but still wants
* the base register for something that ends up being a pair.
*/
#define get_user(x, ptr) \
({ \
int __ret_gu; \
register __inttype(*(ptr)) __val_gu asm("%edx"); \
register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \
__chk_user_ptr(ptr); \
might_fault(); \
asm volatile("call __get_user_%P3" \
Expand Down
Loading

0 comments on commit 3d7e5fc

Please sign in to comment.