-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
RISC-V: Report vector unaligned access speed hwprobe
Detect if vector misaligned accesses are faster or slower than equivalent vector byte accesses. This is useful for usermode to know whether vector byte accesses or vector misaligned accesses have a better bandwidth for operations like memcpy. Signed-off-by: Jesse Taube <jesse@rivosinc.com> Reviewed-by: Charlie Jenkins <charlie@rivosinc.com> Link: https://lore.kernel.org/r/20241017-jesse_unaligned_vector-v10-5-5b33500160f8@rivosinc.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
- Loading branch information
Jesse Taube
authored and
Palmer Dabbelt
committed
Oct 18, 2024
1 parent
d1703dc
commit e7c9d66
Showing
6 changed files
with
228 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
/* SPDX-License-Identifier: GPL-2.0 */ | ||
/* Copyright (C) 2024 Rivos Inc. */ | ||
|
||
#include <linux/args.h> | ||
#include <linux/linkage.h> | ||
#include <asm/asm.h> | ||
|
||
.text | ||
|
||
#define WORD_EEW 32 | ||
|
||
#define WORD_SEW CONCATENATE(e, WORD_EEW) | ||
#define VEC_L CONCATENATE(vle, WORD_EEW).v | ||
#define VEC_S CONCATENATE(vle, WORD_EEW).v | ||
|
||
/* void __riscv_copy_vec_words_unaligned(void *, const void *, size_t) */ | ||
/* Performs a memcpy without aligning buffers, using word loads and stores. */ | ||
/* Note: The size is truncated to a multiple of WORD_EEW */ | ||
SYM_FUNC_START(__riscv_copy_vec_words_unaligned) | ||
andi a4, a2, ~(WORD_EEW-1) | ||
beqz a4, 2f | ||
add a3, a1, a4 | ||
.option push | ||
.option arch, +zve32x | ||
1: | ||
vsetivli t0, 8, WORD_SEW, m8, ta, ma | ||
VEC_L v0, (a1) | ||
VEC_S v0, (a0) | ||
addi a0, a0, WORD_EEW | ||
addi a1, a1, WORD_EEW | ||
bltu a1, a3, 1b | ||
|
||
2: | ||
.option pop | ||
ret | ||
SYM_FUNC_END(__riscv_copy_vec_words_unaligned) | ||
|
||
/* void __riscv_copy_vec_bytes_unaligned(void *, const void *, size_t) */ | ||
/* Performs a memcpy without aligning buffers, using only byte accesses. */ | ||
/* Note: The size is truncated to a multiple of 8 */ | ||
SYM_FUNC_START(__riscv_copy_vec_bytes_unaligned) | ||
andi a4, a2, ~(8-1) | ||
beqz a4, 2f | ||
add a3, a1, a4 | ||
.option push | ||
.option arch, +zve32x | ||
1: | ||
vsetivli t0, 8, e8, m8, ta, ma | ||
vle8.v v0, (a1) | ||
vse8.v v0, (a0) | ||
addi a0, a0, 8 | ||
addi a1, a1, 8 | ||
bltu a1, a3, 1b | ||
|
||
2: | ||
.option pop | ||
ret | ||
SYM_FUNC_END(__riscv_copy_vec_bytes_unaligned) |