Skip to content

Commit

Permalink
x86-64: support native xadd rwsem implementation
Browse files Browse the repository at this point in the history
This one is much faster than the spinlock based fallback rwsem code,
with certain artifical benchmarks having shown 300%+ improvement on
threaded page faults etc.

Again, note the 32767-thread limit here. So this really does need that
whole "make rwsem_count_t be 64-bit and fix the BIAS values to match"
extension on top of it, but that is conceptually a totally independent
issue.

NOT TESTED! The original patch that this all was based on were tested by
KAMEZAWA Hiroyuki, but maybe I screwed up something when I created the
cleaned-up series, so caveat emptor..

Also note that it _may_ be a good idea to mark some more registers
clobbered on x86-64 in the inline asms instead of saving/restoring them.
They are inline functions, but they are only used in places where there
are not a lot of live registers _anyway_, so doing for example the
clobbers of %r8-%r11 in the asm wouldn't make the fast-path code any
worse, and would make the slow-path code smaller.

(Not that the slow-path really matters to that degree. Saving a few
unnecessary registers is the _least_ of our problems when we hit the slow
path. The instruction/cycle counting really only matters in the fast
path).

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <alpine.LFD.2.00.1001121810410.17145@localhost.localdomain>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
  • Loading branch information
Linus Torvalds authored and H. Peter Anvin committed Jan 14, 2010
1 parent 5d0b723 commit bafaecd
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 1 deletion.
2 changes: 1 addition & 1 deletion arch/x86/Kconfig.cpu
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ config X86_L1_CACHE_SHIFT

config X86_XADD
def_bool y
depends on X86_32 && !M386
depends on X86_64 || !M386

config X86_PPRO_FENCE
bool "PentiumPro memory ordering errata workaround"
Expand Down
1 change: 1 addition & 0 deletions arch/x86/lib/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,5 @@ else
lib-y += thunk_64.o clear_page_64.o copy_page_64.o
lib-y += memmove_64.o memset_64.o
lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o
endif
81 changes: 81 additions & 0 deletions arch/x86/lib/rwsem_64.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* x86-64 rwsem wrappers
*
* This interfaces the inline asm code to the slow-path
* C routines. We need to save the call-clobbered regs
* that the asm does not mark as clobbered, and move the
* argument from %rax to %rdi.
*
* NOTE! We don't need to save %rax, because the functions
* will always return the semaphore pointer in %rax (which
* is also the input argument to these helpers)
*
* The following can clobber %rdx because the asm clobbers it:
* call_rwsem_down_write_failed
* call_rwsem_wake
* but %rdi, %rsi, %rcx, %r8-r11 always need saving.
*/

#include <linux/linkage.h>
#include <asm/rwlock.h>
#include <asm/alternative-asm.h>
#include <asm/frame.h>
#include <asm/dwarf2.h>

#define save_common_regs \
pushq %rdi; \
pushq %rsi; \
pushq %rcx; \
pushq %r8; \
pushq %r9; \
pushq %r10; \
pushq %r11

#define restore_common_regs \
popq %r11; \
popq %r10; \
popq %r9; \
popq %r8; \
popq %rcx; \
popq %rsi; \
popq %rdi

/* Fix up special calling conventions */
ENTRY(call_rwsem_down_read_failed)
save_common_regs
pushq %rdx
movq %rax,%rdi
call rwsem_down_read_failed
popq %rdx
restore_common_regs
ret
ENDPROC(call_rwsem_down_read_failed)

ENTRY(call_rwsem_down_write_failed)
save_common_regs
movq %rax,%rdi
call rwsem_down_write_failed
restore_common_regs
ret
ENDPROC(call_rwsem_down_write_failed)

ENTRY(call_rwsem_wake)
decw %dx /* do nothing if still outstanding active readers */
jnz 1f
save_common_regs
movq %rax,%rdi
call rwsem_wake
restore_common_regs
1: ret
ENDPROC(call_rwsem_wake)

/* Fix up special calling conventions */
ENTRY(call_rwsem_downgrade_wake)
save_common_regs
pushq %rdx
movq %rax,%rdi
call rwsem_downgrade_wake
popq %rdx
restore_common_regs
ret
ENDPROC(call_rwsem_downgrade_wake)

0 comments on commit bafaecd

Please sign in to comment.