Skip to content

Commit

Permalink
powerpc: memcpy optimization for 64bit LE
Browse files Browse the repository at this point in the history
Unaligned stores take alignment exceptions on POWER7 running in little-endian.
This is a dumb little-endian base memcpy that prevents unaligned stores.
Once booted the feature fixup code switches over to the VMX copy loops
(which are already endian safe).

The question is what we do before that switch over. The base 64bit
memcpy takes alignment exceptions on POWER7 so we can't use it as is.
Fixing the causes of alignment exception would slow it down, because
we'd need to ensure all loads and stores are aligned either through
rotate tricks or bytewise loads and stores. Either would be bad for
all other 64bit platforms.

[ I simplified the loop a bit - Anton ]

Signed-off-by: Philippe Bergheaud <felix@linux.vnet.ibm.com>
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
  • Loading branch information
Philippe Bergheaud authored and Benjamin Herrenschmidt committed Apr 30, 2014
1 parent 48ce3b7 commit 00f554f
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 8 deletions.
4 changes: 0 additions & 4 deletions arch/powerpc/include/asm/string.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@
#define __HAVE_ARCH_STRNCMP
#define __HAVE_ARCH_STRCAT
#define __HAVE_ARCH_MEMSET
#ifdef __BIG_ENDIAN__
#define __HAVE_ARCH_MEMCPY
#endif
#define __HAVE_ARCH_MEMMOVE
#define __HAVE_ARCH_MEMCMP
#define __HAVE_ARCH_MEMCHR
Expand All @@ -24,9 +22,7 @@ extern int strcmp(const char *,const char *);
extern int strncmp(const char *, const char *, __kernel_size_t);
extern char * strcat(char *, const char *);
extern void * memset(void *,int,__kernel_size_t);
#ifdef __BIG_ENDIAN__
extern void * memcpy(void *,const void *,__kernel_size_t);
#endif
extern void * memmove(void *,const void *,__kernel_size_t);
extern int memcmp(const void *,const void *,__kernel_size_t);
extern void * memchr(const void *,int,__kernel_size_t);
Expand Down
2 changes: 0 additions & 2 deletions arch/powerpc/kernel/ppc_ksyms.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,7 @@ EXPORT_SYMBOL(__cmpdi2);
#endif
long long __bswapdi2(long long);
EXPORT_SYMBOL(__bswapdi2);
#ifdef __BIG_ENDIAN__
EXPORT_SYMBOL(memcpy);
#endif
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(memcmp);
Expand Down
2 changes: 0 additions & 2 deletions arch/powerpc/lib/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@ obj-y += checksum_$(CONFIG_WORD_SIZE).o
obj-$(CONFIG_PPC64) += checksum_wrappers_64.o
endif

ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),)
obj-$(CONFIG_PPC64) += memcpy_power7.o memcpy_64.o
endif

obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o

Expand Down
16 changes: 16 additions & 0 deletions arch/powerpc/lib/memcpy_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,27 @@
.align 7
_GLOBAL(memcpy)
BEGIN_FTR_SECTION
#ifdef __LITTLE_ENDIAN__
cmpdi cr7,r5,0
#else
std r3,48(r1) /* save destination pointer for return value */
#endif
FTR_SECTION_ELSE
#ifndef SELFTEST
b memcpy_power7
#endif
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
#ifdef __LITTLE_ENDIAN__
/* dumb little-endian memcpy that will get replaced at runtime */
addi r9,r3,-1
addi r4,r4,-1
beqlr cr7
mtctr r5
1: lbzu r10,1(r4)
stbu r10,1(r9)
bdnz 1b
blr
#else
PPC_MTOCRF(0x01,r5)
cmpldi cr1,r5,16
neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry
Expand Down Expand Up @@ -203,3 +218,4 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
stb r0,0(r3)
4: ld r3,48(r1) /* return dest pointer */
blr
#endif

0 comments on commit 00f554f

Please sign in to comment.