-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge patch series "Zbb string optimizations"
Heiko Stuebner <heiko@sntech.de> says: From: Heiko Stuebner <heiko.stuebner@vrull.eu> This series still tries to allow optimized string functions for specific extensions. The last approach of using an inline base function to hold the alternative calls did cause some issues in a number of places So instead of that we're now just using an alternative j at the beginning of the generic function to jump to a separate place inside the function itself. * b4-shazam-merge: RISC-V: add zbb support to string functions RISC-V: add infrastructure to allow different str* implementations Link: https://lore.kernel.org/r/20230113212301.3534711-1-heiko@sntech.de Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
- Loading branch information
Showing
12 changed files
with
468 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
/* SPDX-License-Identifier: GPL-2.0-only */ | ||
|
||
#include <linux/linkage.h> | ||
#include <asm/asm.h> | ||
#include <asm-generic/export.h> | ||
#include <asm/alternative-macros.h> | ||
#include <asm/errata_list.h> | ||
|
||
/* int strcmp(const char *cs, const char *ct) */ | ||
SYM_FUNC_START(strcmp) | ||
|
||
ALTERNATIVE("nop", "j strcmp_zbb", 0, CPUFEATURE_ZBB, CONFIG_RISCV_ISA_ZBB) | ||
|
||
/* | ||
* Returns | ||
* a0 - comparison result, value like strcmp | ||
* | ||
* Parameters | ||
* a0 - string1 | ||
* a1 - string2 | ||
* | ||
* Clobbers | ||
* t0, t1 | ||
*/ | ||
1: | ||
lbu t0, 0(a0) | ||
lbu t1, 0(a1) | ||
addi a0, a0, 1 | ||
addi a1, a1, 1 | ||
bne t0, t1, 2f | ||
bnez t0, 1b | ||
li a0, 0 | ||
ret | ||
2: | ||
/* | ||
* strcmp only needs to return (< 0, 0, > 0) values | ||
* not necessarily -1, 0, +1 | ||
*/ | ||
sub a0, t0, t1 | ||
ret | ||
|
||
/* | ||
* Variant of strcmp using the ZBB extension if available | ||
*/ | ||
#ifdef CONFIG_RISCV_ISA_ZBB | ||
strcmp_zbb: | ||
|
||
.option push | ||
.option arch,+zbb | ||
|
||
/* | ||
* Returns | ||
* a0 - comparison result, value like strcmp | ||
* | ||
* Parameters | ||
* a0 - string1 | ||
* a1 - string2 | ||
* | ||
* Clobbers | ||
* t0, t1, t2, t3, t4, t5 | ||
*/ | ||
|
||
or t2, a0, a1 | ||
li t4, -1 | ||
and t2, t2, SZREG-1 | ||
bnez t2, 3f | ||
|
||
/* Main loop for aligned string. */ | ||
.p2align 3 | ||
1: | ||
REG_L t0, 0(a0) | ||
REG_L t1, 0(a1) | ||
orc.b t3, t0 | ||
bne t3, t4, 2f | ||
addi a0, a0, SZREG | ||
addi a1, a1, SZREG | ||
beq t0, t1, 1b | ||
|
||
/* | ||
* Words don't match, and no null byte in the first | ||
* word. Get bytes in big-endian order and compare. | ||
*/ | ||
#ifndef CONFIG_CPU_BIG_ENDIAN | ||
rev8 t0, t0 | ||
rev8 t1, t1 | ||
#endif | ||
|
||
/* Synthesize (t0 >= t1) ? 1 : -1 in a branchless sequence. */ | ||
sltu a0, t0, t1 | ||
neg a0, a0 | ||
ori a0, a0, 1 | ||
ret | ||
|
||
2: | ||
/* | ||
* Found a null byte. | ||
* If words don't match, fall back to simple loop. | ||
*/ | ||
bne t0, t1, 3f | ||
|
||
/* Otherwise, strings are equal. */ | ||
li a0, 0 | ||
ret | ||
|
||
/* Simple loop for misaligned strings. */ | ||
.p2align 3 | ||
3: | ||
lbu t0, 0(a0) | ||
lbu t1, 0(a1) | ||
addi a0, a0, 1 | ||
addi a1, a1, 1 | ||
bne t0, t1, 4f | ||
bnez t0, 3b | ||
|
||
4: | ||
sub a0, t0, t1 | ||
ret | ||
|
||
.option pop | ||
#endif | ||
SYM_FUNC_END(strcmp) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
/* SPDX-License-Identifier: GPL-2.0-only */ | ||
|
||
#include <linux/linkage.h> | ||
#include <asm/asm.h> | ||
#include <asm-generic/export.h> | ||
#include <asm/alternative-macros.h> | ||
#include <asm/errata_list.h> | ||
|
||
/* int strlen(const char *s) */ | ||
SYM_FUNC_START(strlen) | ||
|
||
ALTERNATIVE("nop", "j strlen_zbb", 0, CPUFEATURE_ZBB, CONFIG_RISCV_ISA_ZBB) | ||
|
||
/* | ||
* Returns | ||
* a0 - string length | ||
* | ||
* Parameters | ||
* a0 - String to measure | ||
* | ||
* Clobbers: | ||
* t0, t1 | ||
*/ | ||
mv t1, a0 | ||
1: | ||
lbu t0, 0(t1) | ||
beqz t0, 2f | ||
addi t1, t1, 1 | ||
j 1b | ||
2: | ||
sub a0, t1, a0 | ||
ret | ||
|
||
/* | ||
* Variant of strlen using the ZBB extension if available | ||
*/ | ||
#ifdef CONFIG_RISCV_ISA_ZBB | ||
strlen_zbb: | ||
|
||
#ifdef CONFIG_CPU_BIG_ENDIAN | ||
# define CZ clz | ||
# define SHIFT sll | ||
#else | ||
# define CZ ctz | ||
# define SHIFT srl | ||
#endif | ||
|
||
.option push | ||
.option arch,+zbb | ||
|
||
/* | ||
* Returns | ||
* a0 - string length | ||
* | ||
* Parameters | ||
* a0 - String to measure | ||
* | ||
* Clobbers | ||
* t0, t1, t2, t3 | ||
*/ | ||
|
||
/* Number of irrelevant bytes in the first word. */ | ||
andi t2, a0, SZREG-1 | ||
|
||
/* Align pointer. */ | ||
andi t0, a0, -SZREG | ||
|
||
li t3, SZREG | ||
sub t3, t3, t2 | ||
slli t2, t2, 3 | ||
|
||
/* Get the first word. */ | ||
REG_L t1, 0(t0) | ||
|
||
/* | ||
* Shift away the partial data we loaded to remove the irrelevant bytes | ||
* preceding the string with the effect of adding NUL bytes at the | ||
* end of the string's first word. | ||
*/ | ||
SHIFT t1, t1, t2 | ||
|
||
/* Convert non-NUL into 0xff and NUL into 0x00. */ | ||
orc.b t1, t1 | ||
|
||
/* Convert non-NUL into 0x00 and NUL into 0xff. */ | ||
not t1, t1 | ||
|
||
/* | ||
* Search for the first set bit (corresponding to a NUL byte in the | ||
* original chunk). | ||
*/ | ||
CZ t1, t1 | ||
|
||
/* | ||
* The first chunk is special: compare against the number | ||
* of valid bytes in this chunk. | ||
*/ | ||
srli a0, t1, 3 | ||
bgtu t3, a0, 3f | ||
|
||
/* Prepare for the word comparison loop. */ | ||
addi t2, t0, SZREG | ||
li t3, -1 | ||
|
||
/* | ||
* Our critical loop is 4 instructions and processes data in | ||
* 4 byte or 8 byte chunks. | ||
*/ | ||
.p2align 3 | ||
1: | ||
REG_L t1, SZREG(t0) | ||
addi t0, t0, SZREG | ||
orc.b t1, t1 | ||
beq t1, t3, 1b | ||
2: | ||
not t1, t1 | ||
CZ t1, t1 | ||
|
||
/* Get number of processed words. */ | ||
sub t2, t0, t2 | ||
|
||
/* Add number of characters in the first word. */ | ||
add a0, a0, t2 | ||
srli t1, t1, 3 | ||
|
||
/* Add number of characters in the last word. */ | ||
add a0, a0, t1 | ||
3: | ||
ret | ||
|
||
.option pop | ||
#endif | ||
SYM_FUNC_END(strlen) |
Oops, something went wrong.