-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
riscv: Add vector extension XOR implementation
This patch adds support for vector optimized XOR and it is tested in qemu. Co-developed-by: Han-Kuan Chen <hankuan.chen@sifive.com> Signed-off-by: Han-Kuan Chen <hankuan.chen@sifive.com> Signed-off-by: Greentime Hu <greentime.hu@sifive.com> Signed-off-by: Andy Chiu <andy.chiu@sifive.com> Tested-by: Björn Töpel <bjorn@rivosinc.com> Tested-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> Link: https://lore.kernel.org/r/20240115055929.4736-4-andy.chiu@sifive.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
- Loading branch information
Greentime Hu
authored and
Palmer Dabbelt
committed
Jan 16, 2024
1 parent
956895b
commit c5674d0
Showing
4 changed files
with
168 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
/* SPDX-License-Identifier: GPL-2.0-or-later */ | ||
/* | ||
* Copyright (C) 2021 SiFive | ||
*/ | ||
|
||
#include <linux/hardirq.h> | ||
#include <asm-generic/xor.h> | ||
#ifdef CONFIG_RISCV_ISA_V | ||
#include <asm/vector.h> | ||
#include <asm/switch_to.h> | ||
#include <asm/asm-prototypes.h> | ||
|
||
static void xor_vector_2(unsigned long bytes, unsigned long *__restrict p1, | ||
const unsigned long *__restrict p2) | ||
{ | ||
kernel_vector_begin(); | ||
xor_regs_2_(bytes, p1, p2); | ||
kernel_vector_end(); | ||
} | ||
|
||
static void xor_vector_3(unsigned long bytes, unsigned long *__restrict p1, | ||
const unsigned long *__restrict p2, | ||
const unsigned long *__restrict p3) | ||
{ | ||
kernel_vector_begin(); | ||
xor_regs_3_(bytes, p1, p2, p3); | ||
kernel_vector_end(); | ||
} | ||
|
||
static void xor_vector_4(unsigned long bytes, unsigned long *__restrict p1, | ||
const unsigned long *__restrict p2, | ||
const unsigned long *__restrict p3, | ||
const unsigned long *__restrict p4) | ||
{ | ||
kernel_vector_begin(); | ||
xor_regs_4_(bytes, p1, p2, p3, p4); | ||
kernel_vector_end(); | ||
} | ||
|
||
static void xor_vector_5(unsigned long bytes, unsigned long *__restrict p1, | ||
const unsigned long *__restrict p2, | ||
const unsigned long *__restrict p3, | ||
const unsigned long *__restrict p4, | ||
const unsigned long *__restrict p5) | ||
{ | ||
kernel_vector_begin(); | ||
xor_regs_5_(bytes, p1, p2, p3, p4, p5); | ||
kernel_vector_end(); | ||
} | ||
|
||
static struct xor_block_template xor_block_rvv = { | ||
.name = "rvv", | ||
.do_2 = xor_vector_2, | ||
.do_3 = xor_vector_3, | ||
.do_4 = xor_vector_4, | ||
.do_5 = xor_vector_5 | ||
}; | ||
|
||
#undef XOR_TRY_TEMPLATES | ||
#define XOR_TRY_TEMPLATES \ | ||
do { \ | ||
xor_speed(&xor_block_8regs); \ | ||
xor_speed(&xor_block_32regs); \ | ||
if (has_vector()) { \ | ||
xor_speed(&xor_block_rvv);\ | ||
} \ | ||
} while (0) | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
/* SPDX-License-Identifier: GPL-2.0-or-later */ | ||
/* | ||
* Copyright (C) 2021 SiFive | ||
*/ | ||
#include <linux/linkage.h> | ||
#include <linux/export.h> | ||
#include <asm/asm.h> | ||
|
||
SYM_FUNC_START(xor_regs_2_) | ||
vsetvli a3, a0, e8, m8, ta, ma | ||
vle8.v v0, (a1) | ||
vle8.v v8, (a2) | ||
sub a0, a0, a3 | ||
vxor.vv v16, v0, v8 | ||
add a2, a2, a3 | ||
vse8.v v16, (a1) | ||
add a1, a1, a3 | ||
bnez a0, xor_regs_2_ | ||
ret | ||
SYM_FUNC_END(xor_regs_2_) | ||
EXPORT_SYMBOL(xor_regs_2_) | ||
|
||
SYM_FUNC_START(xor_regs_3_) | ||
vsetvli a4, a0, e8, m8, ta, ma | ||
vle8.v v0, (a1) | ||
vle8.v v8, (a2) | ||
sub a0, a0, a4 | ||
vxor.vv v0, v0, v8 | ||
vle8.v v16, (a3) | ||
add a2, a2, a4 | ||
vxor.vv v16, v0, v16 | ||
add a3, a3, a4 | ||
vse8.v v16, (a1) | ||
add a1, a1, a4 | ||
bnez a0, xor_regs_3_ | ||
ret | ||
SYM_FUNC_END(xor_regs_3_) | ||
EXPORT_SYMBOL(xor_regs_3_) | ||
|
||
SYM_FUNC_START(xor_regs_4_) | ||
vsetvli a5, a0, e8, m8, ta, ma | ||
vle8.v v0, (a1) | ||
vle8.v v8, (a2) | ||
sub a0, a0, a5 | ||
vxor.vv v0, v0, v8 | ||
vle8.v v16, (a3) | ||
add a2, a2, a5 | ||
vxor.vv v0, v0, v16 | ||
vle8.v v24, (a4) | ||
add a3, a3, a5 | ||
vxor.vv v16, v0, v24 | ||
add a4, a4, a5 | ||
vse8.v v16, (a1) | ||
add a1, a1, a5 | ||
bnez a0, xor_regs_4_ | ||
ret | ||
SYM_FUNC_END(xor_regs_4_) | ||
EXPORT_SYMBOL(xor_regs_4_) | ||
|
||
SYM_FUNC_START(xor_regs_5_) | ||
vsetvli a6, a0, e8, m8, ta, ma | ||
vle8.v v0, (a1) | ||
vle8.v v8, (a2) | ||
sub a0, a0, a6 | ||
vxor.vv v0, v0, v8 | ||
vle8.v v16, (a3) | ||
add a2, a2, a6 | ||
vxor.vv v0, v0, v16 | ||
vle8.v v24, (a4) | ||
add a3, a3, a6 | ||
vxor.vv v0, v0, v24 | ||
vle8.v v8, (a5) | ||
add a4, a4, a6 | ||
vxor.vv v16, v0, v8 | ||
add a5, a5, a6 | ||
vse8.v v16, (a1) | ||
add a1, a1, a6 | ||
bnez a0, xor_regs_5_ | ||
ret | ||
SYM_FUNC_END(xor_regs_5_) | ||
EXPORT_SYMBOL(xor_regs_5_) |