-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
At the time being, memcmp() compares two chunks of memory byte per byte. This patch optimises the comparison by comparing word by word. On the same way as commit 15c2d45 ("powerpc: Add 64bit optimised memcmp"), this patch moves memcmp() into a dedicated file named memcmp_32.S A small benchmark performed on an 8xx comparing two chuncks of 512 bytes performed 100000 times gives: Before : 5852274 TB ticks After: 1488638 TB ticks This is almost 4 times faster Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
- Loading branch information
Christophe Leroy
authored and
Michael Ellerman
committed
Jun 3, 2018
1 parent
f36bbf2
commit 2676b89
Showing
3 changed files
with
47 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
/* SPDX-License-Identifier: GPL-2.0 */ | ||
|
||
/* | ||
* memcmp for PowerPC32 | ||
* | ||
* Copyright (C) 1996 Paul Mackerras. | ||
* | ||
*/ | ||
|
||
#include <asm/ppc_asm.h> | ||
#include <asm/export.h> | ||
|
||
.text | ||
|
||
_GLOBAL(memcmp) | ||
srawi. r7, r5, 2 /* Divide len by 4 */ | ||
mr r6, r3 | ||
beq- 3f | ||
mtctr r7 | ||
li r7, 0 | ||
1: lwzx r3, r6, r7 | ||
lwzx r0, r4, r7 | ||
addi r7, r7, 4 | ||
cmplw cr0, r3, r0 | ||
bdnzt eq, 1b | ||
bne 5f | ||
3: andi. r3, r5, 3 | ||
beqlr | ||
cmplwi cr1, r3, 2 | ||
blt- cr1, 4f | ||
lhzx r3, r6, r7 | ||
lhzx r0, r4, r7 | ||
addi r7, r7, 2 | ||
subf. r3, r0, r3 | ||
beqlr cr1 | ||
bnelr | ||
4: lbzx r3, r6, r7 | ||
lbzx r0, r4, r7 | ||
subf. r3, r0, r3 | ||
blr | ||
5: li r3, 1 | ||
bgtlr | ||
li r3, -1 | ||
blr | ||
EXPORT_SYMBOL(memcmp) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters