Skip to content

Commit

Permalink
[SPARC64]: Inline membar()'s again.
Browse files Browse the repository at this point in the history
Since GCC has to emit a call and a delay slot to the
out-of-line "membar" routines in arch/sparc64/lib/mb.S
it is much better to just do the necessary predicted
branch inline instead as:

	ba,pt	%xcc, 1f
	 membar	#whatever
1:

instead of the current:

	call	membar_foo
	 dslot

because this way GCC is not required to allocate a stack
frame if the function can be a leaf function.

This also makes this bug fix easier to backport to 2.4.x

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Sep 8, 2005
1 parent 1b11d78 commit 4d803fc
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 90 deletions.
9 changes: 0 additions & 9 deletions arch/sparc64/kernel/sparc64_ksyms.c
Original file line number Diff line number Diff line change
Expand Up @@ -403,12 +403,3 @@ EXPORT_SYMBOL(xor_vis_4);
EXPORT_SYMBOL(xor_vis_5);

EXPORT_SYMBOL(prom_palette);

/* memory barriers */
EXPORT_SYMBOL(mb);
EXPORT_SYMBOL(rmb);
EXPORT_SYMBOL(wmb);
EXPORT_SYMBOL(membar_storeload);
EXPORT_SYMBOL(membar_storeload_storestore);
EXPORT_SYMBOL(membar_storeload_loadload);
EXPORT_SYMBOL(membar_storestore_loadstore);
2 changes: 1 addition & 1 deletion arch/sparc64/lib/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \
U1memcpy.o U1copy_from_user.o U1copy_to_user.o \
U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \
copy_in_user.o user_fixup.o memmove.o \
mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o mb.o
mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o

lib-$(CONFIG_DEBUG_SPINLOCK) += debuglocks.o
lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
Expand Down
73 changes: 0 additions & 73 deletions arch/sparc64/lib/mb.S

This file was deleted.

49 changes: 42 additions & 7 deletions include/asm-sparc64/system.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,48 @@ enum sparc_cpu {
#define ARCH_SUN4C_SUN4 0
#define ARCH_SUN4 0

extern void mb(void);
extern void rmb(void);
extern void wmb(void);
extern void membar_storeload(void);
extern void membar_storeload_storestore(void);
extern void membar_storeload_loadload(void);
extern void membar_storestore_loadstore(void);
/* These are here in an effort to more fully work around Spitfire Errata
* #51. Essentially, if a memory barrier occurs soon after a mispredicted
* branch, the chip can stop executing instructions until a trap occurs.
* Therefore, if interrupts are disabled, the chip can hang forever.
*
* It used to be believed that the memory barrier had to be right in the
* delay slot, but a case has been traced recently wherein the memory barrier
* was one instruction after the branch delay slot and the chip still hung.
* The offending sequence was the following in sym_wakeup_done() of the
* sym53c8xx_2 driver:
*
* call sym_ccb_from_dsa, 0
* movge %icc, 0, %l0
* brz,pn %o0, .LL1303
* mov %o0, %l2
* membar #LoadLoad
*
* The branch has to be mispredicted for the bug to occur. Therefore, we put
* the memory barrier explicitly into a "branch always, predicted taken"
* delay slot to avoid the problem case.
*/
#define membar_safe(type) \
do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
" membar " type "\n" \
"1:\n" \
: : : "memory"); \
} while (0)

#define mb() \
membar_safe("#LoadLoad | #LoadStore | #StoreStore | #StoreLoad")
#define rmb() \
membar_safe("#LoadLoad")
#define wmb() \
membar_safe("#StoreStore")
#define membar_storeload() \
membar_safe("#StoreLoad")
#define membar_storeload_storestore() \
membar_safe("#StoreLoad | #StoreStore")
#define membar_storeload_loadload() \
membar_safe("#StoreLoad | #LoadLoad")
#define membar_storestore_loadstore() \
membar_safe("#StoreStore | #LoadStore")

#endif

Expand Down

0 comments on commit 4d803fc

Please sign in to comment.