Skip to content

Commit

Permalink
percpu, x86: Add arch-specific this_cpu_cmpxchg_double() support
Browse files Browse the repository at this point in the history
Support this_cpu_cmpxchg_double() using the cmpxchg16b and cmpxchg8b
instructions.

-tj: s/percpu_cmpxchg16b/percpu_cmpxchg16b_double/ for consistency and
     other cosmetic changes.

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
  • Loading branch information
Christoph Lameter authored and Tejun Heo committed Feb 28, 2011
1 parent 7c33433 commit b9ec40a
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 0 deletions.
48 changes: 48 additions & 0 deletions arch/x86/include/asm/percpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,26 @@ do { \
#define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
#endif /* !CONFIG_M386 */

#ifdef CONFIG_X86_CMPXCHG64
#define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) \
({ \
char __ret; \
typeof(o1) __o1 = o1; \
typeof(o1) __n1 = n1; \
typeof(o2) __o2 = o2; \
typeof(o2) __n2 = n2; \
typeof(o2) __dummy = n2; \
asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \
: "=a"(__ret), "=m" (pcp1), "=d"(__dummy) \
: "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2)); \
__ret; \
})

#define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
#define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
#define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
#endif /* CONFIG_X86_CMPXCHG64 */

/*
* Per cpu atomic 64 bit operations are only available under 64 bit.
* 32 bit must fall back to generic operations.
Expand Down Expand Up @@ -480,6 +500,34 @@ do { \
#define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
#define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)

/*
* Pretty complex macro to generate cmpxchg16 instruction. The instruction
* is not supported on early AMD64 processors so we must be able to emulate
* it in software. The address used in the cmpxchg16 instruction must be
* aligned to a 16 byte boundary.
*/
#define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \
({ \
char __ret; \
typeof(o1) __o1 = o1; \
typeof(o1) __n1 = n1; \
typeof(o2) __o2 = o2; \
typeof(o2) __n2 = n2; \
typeof(o2) __dummy; \
alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \
"cmpxchg16b %%gs:(%%rsi)\n\tsetz %0\n\t", \
X86_FEATURE_CX16, \
ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \
"S" (&pcp1), "b"(__n1), "c"(__n2), \
"a"(__o1), "d"(__o2)); \
__ret; \
})

#define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
#define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
#define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)

#endif

/* This is not atomic against other CPUs -- CPU preemption needs to be off */
Expand Down
1 change: 1 addition & 0 deletions arch/x86/lib/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,5 @@ else
lib-y += memmove_64.o memset_64.o
lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o
lib-y += cmpxchg16b_emu.o
endif
59 changes: 59 additions & 0 deletions arch/x86/lib/cmpxchg16b_emu.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; version 2
* of the License.
*
*/
#include <linux/linkage.h>
#include <asm/alternative-asm.h>
#include <asm/frame.h>
#include <asm/dwarf2.h>

.text

/*
* Inputs:
* %rsi : memory location to compare
* %rax : low 64 bits of old value
* %rdx : high 64 bits of old value
* %rbx : low 64 bits of new value
* %rcx : high 64 bits of new value
* %al : Operation successful
*/
ENTRY(this_cpu_cmpxchg16b_emu)
CFI_STARTPROC

#
# Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not
# via the ZF. Caller will access %al to get result.
#
# Note that this is only useful for a cpuops operation. Meaning that we
# do *not* have a fully atomic operation but just an operation that is
# *atomic* on a single cpu (as provided by the this_cpu_xx class of
# macros).
#
this_cpu_cmpxchg16b_emu:
pushf
cli

cmpq %gs:(%rsi), %rax
jne not_same
cmpq %gs:8(%rsi), %rdx
jne not_same

movq %rbx, %gs:(%rsi)
movq %rcx, %gs:8(%rsi)

popf
mov $1, %al
ret

not_same:
popf
xor %al,%al
ret

CFI_ENDPROC

ENDPROC(this_cpu_cmpxchg16b_emu)

0 comments on commit b9ec40a

Please sign in to comment.