Skip to content

Commit

Permalink
powerpc/lib: optimise PPC32 memcmp
Browse files Browse the repository at this point in the history
At the time being, memcmp() compares two chunks of memory
byte per byte.

This patch optimises the comparison by comparing word by word.

On the same way as commit 15c2d45 ("powerpc: Add 64bit
optimised memcmp"), this patch moves memcmp() into a dedicated
file named memcmp_32.S

A small benchmark performed on an 8xx comparing two chuncks
of 512 bytes performed 100000 times gives:

Before : 5852274 TB ticks
After:   1488638 TB ticks

This is almost 4 times faster

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
  • Loading branch information
Christophe Leroy authored and Michael Ellerman committed Jun 3, 2018
1 parent f36bbf2 commit 2676b89
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 19 deletions.
4 changes: 2 additions & 2 deletions arch/powerpc/lib/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
memcpy_power7.o

obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
memcpy_64.o memcmp_64.o pmem.o
memcpy_64.o pmem.o

obj64-$(CONFIG_SMP) += locks.o
obj64-$(CONFIG_ALTIVEC) += vmx-helper.o
obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o

obj-y += checksum_$(BITS).o checksum_wrappers.o \
string_$(BITS).o
string_$(BITS).o memcmp_$(BITS).o

obj-y += sstep.o ldstfp.o quad.o
obj64-y += quad.o
Expand Down
45 changes: 45 additions & 0 deletions arch/powerpc/lib/memcmp_32.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/* SPDX-License-Identifier: GPL-2.0 */

/*
* memcmp for PowerPC32
*
* Copyright (C) 1996 Paul Mackerras.
*
*/

#include <asm/ppc_asm.h>
#include <asm/export.h>

.text

_GLOBAL(memcmp)
srawi. r7, r5, 2 /* Divide len by 4 */
mr r6, r3
beq- 3f
mtctr r7
li r7, 0
1: lwzx r3, r6, r7
lwzx r0, r4, r7
addi r7, r7, 4
cmplw cr0, r3, r0
bdnzt eq, 1b
bne 5f
3: andi. r3, r5, 3
beqlr
cmplwi cr1, r3, 2
blt- cr1, 4f
lhzx r3, r6, r7
lhzx r0, r4, r7
addi r7, r7, 2
subf. r3, r0, r3
beqlr cr1
bnelr
4: lbzx r3, r6, r7
lbzx r0, r4, r7
subf. r3, r0, r3
blr
5: li r3, 1
bgtlr
li r3, -1
blr
EXPORT_SYMBOL(memcmp)
17 changes: 0 additions & 17 deletions arch/powerpc/lib/string.S
Original file line number Diff line number Diff line change
Expand Up @@ -54,23 +54,6 @@ _GLOBAL(strncmp)
blr
EXPORT_SYMBOL(strncmp)

#ifdef CONFIG_PPC32
_GLOBAL(memcmp)
PPC_LCMPI 0,r5,0
beq- 2f
mtctr r5
addi r6,r3,-1
addi r4,r4,-1
1: lbzu r3,1(r6)
lbzu r0,1(r4)
subf. r3,r0,r3
bdnzt 2,1b
blr
2: li r3,0
blr
EXPORT_SYMBOL(memcmp)
#endif

_GLOBAL(memchr)
PPC_LCMPI 0,r5,0
beq- 2f
Expand Down

0 comments on commit 2676b89

Please sign in to comment.