Skip to content

Commit

Permalink
x86: Add memory modify constraints to xchg() and cmpxchg()
Browse files Browse the repository at this point in the history
commit 113fc5a upstream.

[ Backport to .32 by Tomáš Janoušek <tomi@nomi.cz> ]

xchg() and cmpxchg() modify their memory operands, not merely read
them.  For some versions of gcc the "memory" clobber has apparently
dealt with the situation, but not for all.

Originally-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: Glauber Costa <glommer@redhat.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Peter Palfrader <peter@palfrader.org>
Cc: Greg KH <gregkh@suse.de>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Zachary Amsden <zamsden@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
LKML-Reference: <4C4F7277.8050306@zytor.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
  • Loading branch information
H. Peter Anvin authored and Greg Kroah-Hartman committed Sep 27, 2010
1 parent 5d88118 commit a1a34b6
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 49 deletions.
65 changes: 19 additions & 46 deletions arch/x86/include/asm/cmpxchg_32.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,60 +17,33 @@ struct __xchg_dummy {
#define __xg(x) ((struct __xchg_dummy *)(x))

/*
* The semantics of XCHGCMP8B are a bit strange, this is why
* there is a loop and the loading of %%eax and %%edx has to
* be inside. This inlines well in most cases, the cached
* cost is around ~38 cycles. (in the future we might want
* to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that
* might have an implicit FPU-save as a cost, so it's not
* clear which path to go.)
* CMPXCHG8B only writes to the target if we had the previous
* value in registers, otherwise it acts as a read and gives us the
* "new previous" value. That is why there is a loop. Preloading
* EDX:EAX is a performance optimization: in the common case it means
* we need only one locked operation.
*
* cmpxchg8b must be used with the lock prefix here to allow
* the instruction to be executed atomically, see page 3-102
* of the instruction set reference 24319102.pdf. We need
* the reader side to see the coherent 64bit value.
* A SIMD/3DNOW!/MMX/FPU 64-bit store here would require at the very
* least an FPU save and/or %cr0.ts manipulation.
*
* cmpxchg8b must be used with the lock prefix here to allow the
* instruction to be executed atomically. We need to have the reader
* side to see the coherent 64bit value.
*/
static inline void __set_64bit(unsigned long long *ptr,
unsigned int low, unsigned int high)
static inline void set_64bit(volatile u64 *ptr, u64 value)
{
u32 low = value;
u32 high = value >> 32;
u64 prev = *ptr;

asm volatile("\n1:\t"
"movl (%1), %%eax\n\t"
"movl 4(%1), %%edx\n\t"
LOCK_PREFIX "cmpxchg8b %0\n\t"
"jnz 1b"
: "=m"(*ptr)
: "D" (ptr),
"b"(low),
"c"(high)
: "ax", "dx", "memory");
}

static inline void __set_64bit_constant(unsigned long long *ptr,
unsigned long long value)
{
__set_64bit(ptr, (unsigned int)value, (unsigned int)(value >> 32));
}

#define ll_low(x) *(((unsigned int *)&(x)) + 0)
#define ll_high(x) *(((unsigned int *)&(x)) + 1)

static inline void __set_64bit_var(unsigned long long *ptr,
unsigned long long value)
{
__set_64bit(ptr, ll_low(value), ll_high(value));
: "=m" (*ptr), "+A" (prev)
: "b" (low), "c" (high)
: "memory");
}

#define set_64bit(ptr, value) \
(__builtin_constant_p((value)) \
? __set_64bit_constant((ptr), (value)) \
: __set_64bit_var((ptr), (value)))

#define _set_64bit(ptr, value) \
(__builtin_constant_p(value) \
? __set_64bit(ptr, (unsigned int)(value), \
(unsigned int)((value) >> 32)) \
: __set_64bit(ptr, ll_low((value)), ll_high((value))))

/*
* Note: no "lock" prefix even on SMP: xchg always implies lock anyway
* Note 2: xchg has side effect, so that attribute volatile is necessary,
Expand Down
4 changes: 1 addition & 3 deletions arch/x86/include/asm/cmpxchg_64.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,11 @@

#define __xg(x) ((volatile long *)(x))

static inline void set_64bit(volatile unsigned long *ptr, unsigned long val)
static inline void set_64bit(volatile u64 *ptr, u64 val)
{
*ptr = val;
}

#define _set_64bit set_64bit

/*
* Note: no "lock" prefix even on SMP: xchg always implies lock anyway
* Note 2: xchg has side effect, so that attribute volatile is necessary,
Expand Down

0 comments on commit a1a34b6

Please sign in to comment.