Skip to content

Commit

Permalink
Merge branch 'this_cpu_ops' into for-2.6.38
Browse files Browse the repository at this point in the history
  • Loading branch information
Tejun Heo committed Dec 18, 2010
2 parents 3ea9f68 + 8270137 commit 05c2d08
Show file tree
Hide file tree
Showing 3 changed files with 316 additions and 68 deletions.
3 changes: 3 additions & 0 deletions arch/x86/Kconfig.cpu
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,9 @@ config X86_INTERNODE_CACHE_SHIFT
config X86_CMPXCHG
def_bool X86_64 || (X86_32 && !M386)

config CMPXCHG_LOCAL
def_bool X86_64 || (X86_32 && !M386)

config X86_L1_CACHE_SHIFT
int
default "7" if MPENTIUM4 || MPSC
Expand Down
187 changes: 150 additions & 37 deletions arch/x86/include/asm/percpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,39 +177,6 @@ do { \
} \
} while (0)

/*
* Add return operation
*/
#define percpu_add_return_op(var, val) \
({ \
typeof(var) paro_ret__ = val; \
switch (sizeof(var)) { \
case 1: \
asm("xaddb %0, "__percpu_arg(1) \
: "+q" (paro_ret__), "+m" (var) \
: : "memory"); \
break; \
case 2: \
asm("xaddw %0, "__percpu_arg(1) \
: "+r" (paro_ret__), "+m" (var) \
: : "memory"); \
break; \
case 4: \
asm("xaddl %0, "__percpu_arg(1) \
: "+r" (paro_ret__), "+m" (var) \
: : "memory"); \
break; \
case 8: \
asm("xaddq %0, "__percpu_arg(1) \
: "+re" (paro_ret__), "+m" (var) \
: : "memory"); \
break; \
default: __bad_percpu_size(); \
} \
paro_ret__ += val; \
paro_ret__; \
})

#define percpu_from_op(op, var, constraint) \
({ \
typeof(var) pfo_ret__; \
Expand Down Expand Up @@ -262,6 +229,125 @@ do { \
} \
})

/*
* Add return operation
*/
#define percpu_add_return_op(var, val) \
({ \
typeof(var) paro_ret__ = val; \
switch (sizeof(var)) { \
case 1: \
asm("xaddb %0, "__percpu_arg(1) \
: "+q" (paro_ret__), "+m" (var) \
: : "memory"); \
break; \
case 2: \
asm("xaddw %0, "__percpu_arg(1) \
: "+r" (paro_ret__), "+m" (var) \
: : "memory"); \
break; \
case 4: \
asm("xaddl %0, "__percpu_arg(1) \
: "+r" (paro_ret__), "+m" (var) \
: : "memory"); \
break; \
case 8: \
asm("xaddq %0, "__percpu_arg(1) \
: "+re" (paro_ret__), "+m" (var) \
: : "memory"); \
break; \
default: __bad_percpu_size(); \
} \
paro_ret__ += val; \
paro_ret__; \
})

/*
* xchg is implemented using cmpxchg without a lock prefix. xchg is
* expensive due to the implied lock prefix. The processor cannot prefetch
* cachelines if xchg is used.
*/
#define percpu_xchg_op(var, nval) \
({ \
typeof(var) pxo_ret__; \
typeof(var) pxo_new__ = (nval); \
switch (sizeof(var)) { \
case 1: \
asm("\n1:mov "__percpu_arg(1)",%%al" \
"\n\tcmpxchgb %2, "__percpu_arg(1) \
"\n\tjnz 1b" \
: "=a" (pxo_ret__), "+m" (var) \
: "q" (pxo_new__) \
: "memory"); \
break; \
case 2: \
asm("\n1:mov "__percpu_arg(1)",%%ax" \
"\n\tcmpxchgw %2, "__percpu_arg(1) \
"\n\tjnz 1b" \
: "=a" (pxo_ret__), "+m" (var) \
: "r" (pxo_new__) \
: "memory"); \
break; \
case 4: \
asm("\n1:mov "__percpu_arg(1)",%%eax" \
"\n\tcmpxchgl %2, "__percpu_arg(1) \
"\n\tjnz 1b" \
: "=a" (pxo_ret__), "+m" (var) \
: "r" (pxo_new__) \
: "memory"); \
break; \
case 8: \
asm("\n1:mov "__percpu_arg(1)",%%rax" \
"\n\tcmpxchgq %2, "__percpu_arg(1) \
"\n\tjnz 1b" \
: "=a" (pxo_ret__), "+m" (var) \
: "r" (pxo_new__) \
: "memory"); \
break; \
default: __bad_percpu_size(); \
} \
pxo_ret__; \
})

/*
* cmpxchg has no such implied lock semantics as a result it is much
* more efficient for cpu local operations.
*/
#define percpu_cmpxchg_op(var, oval, nval) \
({ \
typeof(var) pco_ret__; \
typeof(var) pco_old__ = (oval); \
typeof(var) pco_new__ = (nval); \
switch (sizeof(var)) { \
case 1: \
asm("cmpxchgb %2, "__percpu_arg(1) \
: "=a" (pco_ret__), "+m" (var) \
: "q" (pco_new__), "0" (pco_old__) \
: "memory"); \
break; \
case 2: \
asm("cmpxchgw %2, "__percpu_arg(1) \
: "=a" (pco_ret__), "+m" (var) \
: "r" (pco_new__), "0" (pco_old__) \
: "memory"); \
break; \
case 4: \
asm("cmpxchgl %2, "__percpu_arg(1) \
: "=a" (pco_ret__), "+m" (var) \
: "r" (pco_new__), "0" (pco_old__) \
: "memory"); \
break; \
case 8: \
asm("cmpxchgq %2, "__percpu_arg(1) \
: "=a" (pco_ret__), "+m" (var) \
: "r" (pco_new__), "0" (pco_old__) \
: "memory"); \
break; \
default: __bad_percpu_size(); \
} \
pco_ret__; \
})

/*
* percpu_read() makes gcc load the percpu variable every time it is
* accessed while percpu_read_stable() allows the value to be cached.
Expand Down Expand Up @@ -300,6 +386,12 @@ do { \
#define __this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
#define __this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
#define __this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
/*
* Generic fallback operations for __this_cpu_xchg_[1-4] are okay and much
* faster than an xchg with forced lock semantics.
*/
#define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
#define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)

#define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
Expand All @@ -319,6 +411,11 @@ do { \
#define this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
#define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
#define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)

#define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
#define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
Expand All @@ -332,15 +429,32 @@ do { \
#define irqsafe_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
#define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
#define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
#define irqsafe_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
#define irqsafe_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
#define irqsafe_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
#define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)

#ifndef CONFIG_M386
#define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
#define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
#define __this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
#define __this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
#define __this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
#define __this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)

#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
#endif
#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)

#define irqsafe_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
#define irqsafe_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
#define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
#endif /* !CONFIG_M386 */

/*
* Per cpu atomic 64 bit operations are only available under 64 bit.
* 32 bit must fall back to generic operations.
Expand All @@ -352,21 +466,20 @@ do { \
#define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
#define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
#define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)

#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
#define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)

#define irqsafe_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
#define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
#define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
#define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)

#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
#endif

/* This is not atomic against other CPUs -- CPU preemption needs to be off */
Expand Down
Loading

0 comments on commit 05c2d08

Please sign in to comment.