Skip to content

Commit

Permalink
powerpc/lib: Adjust .balign inside string functions for PPC32
Browse files Browse the repository at this point in the history
commit 87a156f ("Align hot loops of some string functions")
degraded the performance of string functions by adding useless
nops

A simple benchmark on an 8xx calling 100000x a memchr() that
matches the first byte runs in 41668 TB ticks before this patch
and in 35986 TB ticks after this patch. So this gives an
improvement of approx 10%

Another benchmark doing the same with a memchr() matching the 128th
byte runs in 1011365 TB ticks before this patch and 1005682 TB ticks
after this patch, so regardless on the number of loops, removing
those useless nops improves the test by 5683 TB ticks.

Fixes: 87a156f ("Align hot loops of some string functions")
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
  • Loading branch information
Christophe Leroy authored and Michael Ellerman committed Jun 3, 2018
1 parent 56b04d5 commit 1128bb7
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 3 deletions.
3 changes: 3 additions & 0 deletions arch/powerpc/include/asm/cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,14 @@
#if defined(CONFIG_PPC_8xx) || defined(CONFIG_403GCX)
#define L1_CACHE_SHIFT 4
#define MAX_COPY_PREFETCH 1
#define IFETCH_ALIGN_SHIFT 2
#elif defined(CONFIG_PPC_E500MC)
#define L1_CACHE_SHIFT 6
#define MAX_COPY_PREFETCH 4
#define IFETCH_ALIGN_SHIFT 3
#elif defined(CONFIG_PPC32)
#define MAX_COPY_PREFETCH 4
#define IFETCH_ALIGN_SHIFT 3 /* 603 fetches 2 insn at a time */
#if defined(CONFIG_PPC_47x)
#define L1_CACHE_SHIFT 7
#else
Expand Down
7 changes: 4 additions & 3 deletions arch/powerpc/lib/string.S
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <asm/errno.h>
#include <asm/ppc_asm.h>
#include <asm/export.h>
#include <asm/cache.h>

.text

Expand All @@ -23,7 +24,7 @@ _GLOBAL(strncpy)
mtctr r5
addi r6,r3,-1
addi r4,r4,-1
.balign 16
.balign IFETCH_ALIGN_BYTES
1: lbzu r0,1(r4)
cmpwi 0,r0,0
stbu r0,1(r6)
Expand All @@ -43,7 +44,7 @@ _GLOBAL(strncmp)
mtctr r5
addi r5,r3,-1
addi r4,r4,-1
.balign 16
.balign IFETCH_ALIGN_BYTES
1: lbzu r3,1(r5)
cmpwi 1,r3,0
lbzu r0,1(r4)
Expand Down Expand Up @@ -77,7 +78,7 @@ _GLOBAL(memchr)
beq- 2f
mtctr r5
addi r3,r3,-1
.balign 16
.balign IFETCH_ALIGN_BYTES
1: lbzu r0,1(r3)
cmpw 0,r0,r4
bdnzf 2,1b
Expand Down

0 comments on commit 1128bb7

Please sign in to comment.