Skip to content

Commit

Permalink
RISC-V: Detect unaligned vector accesses supported
Browse files Browse the repository at this point in the history
Run an unaligned vector access to test if the system supports
vector unaligned access. Add the result to a new key in hwprobe.
This is useful for usermode to know if vector misaligned accesses are
supported and if they are faster or slower than equivalent byte accesses.

Signed-off-by: Jesse Taube <jesse@rivosinc.com>
Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
Link: https://lore.kernel.org/r/20241017-jesse_unaligned_vector-v10-4-5b33500160f8@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
  • Loading branch information
Jesse Taube authored and Palmer Dabbelt committed Oct 18, 2024
1 parent c05a62c commit d1703dc
Show file tree
Hide file tree
Showing 11 changed files with 222 additions and 30 deletions.
36 changes: 36 additions & 0 deletions arch/riscv/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -784,12 +784,26 @@ config THREAD_SIZE_ORDER
Specify the Pages of thread stack size (from 4KB to 64KB), which also
affects irq stack size, which is equal to thread stack size.

config RISCV_MISALIGNED
bool
help
Embed support for detecting and emulating misaligned
scalar or vector loads and stores.

config RISCV_SCALAR_MISALIGNED
bool
select RISCV_MISALIGNED
select SYSCTL_ARCH_UNALIGN_ALLOW
help
Embed support for emulating misaligned loads and stores.

config RISCV_VECTOR_MISALIGNED
bool
select RISCV_MISALIGNED
depends on RISCV_ISA_V
help
Enable detecting support for vector misaligned loads and stores.

choice
prompt "Unaligned Accesses Support"
default RISCV_PROBE_UNALIGNED_ACCESS
Expand Down Expand Up @@ -841,6 +855,28 @@ config RISCV_EFFICIENT_UNALIGNED_ACCESS

endchoice

choice
prompt "Vector unaligned Accesses Support"
depends on RISCV_ISA_V
default RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
help
This determines the level of support for vector unaligned accesses. This
information is used by the kernel to perform optimizations. It is also
exposed to user space via the hwprobe syscall. The hardware will be
probed at boot by default.

config RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
bool "Probe speed of vector unaligned accesses"
select RISCV_VECTOR_MISALIGNED
depends on RISCV_ISA_V
help
During boot, the kernel will run a series of tests to determine the
speed of vector unaligned accesses if they are supported. This probing
will dynamically determine the speed of vector unaligned accesses on
the underlying system if they are supported.

endchoice

source "arch/riscv/Kconfig.vendor"

endmenu # "Platform type"
Expand Down
8 changes: 7 additions & 1 deletion arch/riscv/include/asm/cpufeature.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ void riscv_user_isa_enable(void);
#define __RISCV_ISA_EXT_SUPERSET_VALIDATE(_name, _id, _sub_exts, _validate) \
_RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), _validate)

#if defined(CONFIG_RISCV_SCALAR_MISALIGNED)
bool check_unaligned_access_emulated_all_cpus(void);
#if defined(CONFIG_RISCV_SCALAR_MISALIGNED)
void check_unaligned_access_emulated(struct work_struct *work __always_unused);
void unaligned_emulation_finish(void);
bool unaligned_ctl_available(void);
Expand All @@ -72,6 +72,12 @@ static inline bool unaligned_ctl_available(void)
}
#endif

bool check_vector_unaligned_access_emulated_all_cpus(void);
#if defined(CONFIG_RISCV_VECTOR_MISALIGNED)
void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused);
DECLARE_PER_CPU(long, vector_misaligned_access);
#endif

#if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS)
DECLARE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key);

Expand Down
11 changes: 0 additions & 11 deletions arch/riscv/include/asm/entry-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,7 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
void handle_page_fault(struct pt_regs *regs);
void handle_break(struct pt_regs *regs);

#ifdef CONFIG_RISCV_SCALAR_MISALIGNED
int handle_misaligned_load(struct pt_regs *regs);
int handle_misaligned_store(struct pt_regs *regs);
#else
static inline int handle_misaligned_load(struct pt_regs *regs)
{
return -1;
}
static inline int handle_misaligned_store(struct pt_regs *regs)
{
return -1;
}
#endif

#endif /* _ASM_RISCV_ENTRY_COMMON_H */
2 changes: 1 addition & 1 deletion arch/riscv/include/asm/hwprobe.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

#include <uapi/asm/hwprobe.h>

#define RISCV_HWPROBE_MAX_KEY 9
#define RISCV_HWPROBE_MAX_KEY 10

static inline bool riscv_hwprobe_key_is_valid(__s64 key)
{
Expand Down
2 changes: 2 additions & 0 deletions arch/riscv/include/asm/vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

extern unsigned long riscv_v_vsize;
int riscv_v_setup_vsize(void);
bool insn_is_vector(u32 insn_buf);
bool riscv_v_first_use_handler(struct pt_regs *regs);
void kernel_vector_begin(void);
void kernel_vector_end(void);
Expand Down Expand Up @@ -268,6 +269,7 @@ struct pt_regs;

static inline int riscv_v_setup_vsize(void) { return -EOPNOTSUPP; }
static __always_inline bool has_vector(void) { return false; }
static __always_inline bool insn_is_vector(u32 insn_buf) { return false; }
static inline bool riscv_v_first_use_handler(struct pt_regs *regs) { return false; }
static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; }
static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; }
Expand Down
5 changes: 5 additions & 0 deletions arch/riscv/include/uapi/asm/hwprobe.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,11 @@ struct riscv_hwprobe {
#define RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW 2
#define RISCV_HWPROBE_MISALIGNED_SCALAR_FAST 3
#define RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED 4
#define RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF 10
#define RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN 0
#define RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW 2
#define RISCV_HWPROBE_MISALIGNED_VECTOR_FAST 3
#define RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED 4
/* Increase RISCV_HWPROBE_MAX_KEY when adding items. */

/* Flags */
Expand Down
4 changes: 2 additions & 2 deletions arch/riscv/kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ obj-y += probes/
obj-y += tests/
obj-$(CONFIG_MMU) += vdso.o vdso/

obj-$(CONFIG_RISCV_SCALAR_MISALIGNED) += traps_misaligned.o
obj-$(CONFIG_RISCV_SCALAR_MISALIGNED) += unaligned_access_speed.o
obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o
obj-$(CONFIG_RISCV_MISALIGNED) += unaligned_access_speed.o
obj-$(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) += copy-unaligned.o

obj-$(CONFIG_FPU) += fpu.o
Expand Down
35 changes: 35 additions & 0 deletions arch/riscv/kernel/sys_hwprobe.c
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,37 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus)
}
#endif

#ifdef CONFIG_RISCV_VECTOR_MISALIGNED
static u64 hwprobe_vec_misaligned(const struct cpumask *cpus)
{
int cpu;
u64 perf = -1ULL;

/* Return if supported or not even if speed wasn't probed */
for_each_cpu(cpu, cpus) {
int this_perf = per_cpu(vector_misaligned_access, cpu);

if (perf == -1ULL)
perf = this_perf;

if (perf != this_perf) {
perf = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN;
break;
}
}

if (perf == -1ULL)
return RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN;

return perf;
}
#else
static u64 hwprobe_vec_misaligned(const struct cpumask *cpus)
{
return RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN;
}
#endif

static void hwprobe_one_pair(struct riscv_hwprobe *pair,
const struct cpumask *cpus)
{
Expand Down Expand Up @@ -229,6 +260,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
pair->value = hwprobe_misaligned(cpus);
break;

case RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF:
pair->value = hwprobe_vec_misaligned(cpus);
break;

case RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE:
pair->value = 0;
if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ))
Expand Down
125 changes: 121 additions & 4 deletions arch/riscv/kernel/traps_misaligned.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <asm/entry-common.h>
#include <asm/hwprobe.h>
#include <asm/cpufeature.h>
#include <asm/vector.h>

#define INSN_MATCH_LB 0x3
#define INSN_MASK_LB 0x707f
Expand Down Expand Up @@ -322,12 +323,37 @@ union reg_data {
u64 data_u64;
};

static bool unaligned_ctl __read_mostly;

/* sysctl hooks */
int unaligned_enabled __read_mostly = 1; /* Enabled by default */

int handle_misaligned_load(struct pt_regs *regs)
#ifdef CONFIG_RISCV_VECTOR_MISALIGNED
static int handle_vector_misaligned_load(struct pt_regs *regs)
{
unsigned long epc = regs->epc;
unsigned long insn;

if (get_insn(regs, epc, &insn))
return -1;

/* Only return 0 when in check_vector_unaligned_access_emulated */
if (*this_cpu_ptr(&vector_misaligned_access) == RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) {
*this_cpu_ptr(&vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
regs->epc = epc + INSN_LEN(insn);
return 0;
}

/* If vector instruction we don't emulate it yet */
regs->epc = epc;
return -1;
}
#else
static int handle_vector_misaligned_load(struct pt_regs *regs)
{
return -1;
}
#endif

static int handle_scalar_misaligned_load(struct pt_regs *regs)
{
union reg_data val;
unsigned long epc = regs->epc;
Expand Down Expand Up @@ -435,7 +461,7 @@ int handle_misaligned_load(struct pt_regs *regs)
return 0;
}

int handle_misaligned_store(struct pt_regs *regs)
static int handle_scalar_misaligned_store(struct pt_regs *regs)
{
union reg_data val;
unsigned long epc = regs->epc;
Expand Down Expand Up @@ -526,6 +552,91 @@ int handle_misaligned_store(struct pt_regs *regs)
return 0;
}

int handle_misaligned_load(struct pt_regs *regs)
{
unsigned long epc = regs->epc;
unsigned long insn;

if (IS_ENABLED(CONFIG_RISCV_VECTOR_MISALIGNED)) {
if (get_insn(regs, epc, &insn))
return -1;

if (insn_is_vector(insn))
return handle_vector_misaligned_load(regs);
}

if (IS_ENABLED(CONFIG_RISCV_SCALAR_MISALIGNED))
return handle_scalar_misaligned_load(regs);

return -1;
}

int handle_misaligned_store(struct pt_regs *regs)
{
if (IS_ENABLED(CONFIG_RISCV_SCALAR_MISALIGNED))
return handle_scalar_misaligned_store(regs);

return -1;
}

#ifdef CONFIG_RISCV_VECTOR_MISALIGNED
void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused)
{
long *mas_ptr = this_cpu_ptr(&vector_misaligned_access);
unsigned long tmp_var;

*mas_ptr = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN;

kernel_vector_begin();
/*
* In pre-13.0.0 versions of GCC, vector registers cannot appear in
* the clobber list. This inline asm clobbers v0, but since we do not
* currently build the kernel with V enabled, the v0 clobber arg is not
* needed (as the compiler will not emit vector code itself). If the kernel
* is changed to build with V enabled, the clobber arg will need to be
* added here.
*/
__asm__ __volatile__ (
".balign 4\n\t"
".option push\n\t"
".option arch, +zve32x\n\t"
" vsetivli zero, 1, e16, m1, ta, ma\n\t" // Vectors of 16b
" vle16.v v0, (%[ptr])\n\t" // Load bytes
".option pop\n\t"
: : [ptr] "r" ((u8 *)&tmp_var + 1));
kernel_vector_end();
}

bool check_vector_unaligned_access_emulated_all_cpus(void)
{
int cpu;

if (!has_vector()) {
for_each_online_cpu(cpu)
per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
return false;
}

schedule_on_each_cpu(check_vector_unaligned_access_emulated);

for_each_online_cpu(cpu)
if (per_cpu(vector_misaligned_access, cpu)
== RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
return false;

return true;
}
#else
bool check_vector_unaligned_access_emulated_all_cpus(void)
{
return false;
}
#endif

#ifdef CONFIG_RISCV_SCALAR_MISALIGNED

static bool unaligned_ctl __read_mostly;

void check_unaligned_access_emulated(struct work_struct *work __always_unused)
{
int cpu = smp_processor_id();
Expand Down Expand Up @@ -574,3 +685,9 @@ bool unaligned_ctl_available(void)
{
return unaligned_ctl;
}
#else
bool check_unaligned_access_emulated_all_cpus(void)
{
return false;
}
#endif
22 changes: 12 additions & 10 deletions arch/riscv/kernel/unaligned_access_speed.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE)
#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)

DEFINE_PER_CPU(long, misaligned_access_speed);
DEFINE_PER_CPU(long, misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN;
DEFINE_PER_CPU(long, vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;

#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
static cpumask_t fast_misaligned_access;
Expand Down Expand Up @@ -260,23 +261,24 @@ static int check_unaligned_access_speed_all_cpus(void)
kfree(bufs);
return 0;
}
#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
static int check_unaligned_access_speed_all_cpus(void)
{
return 0;
}
#endif

static int check_unaligned_access_all_cpus(void)
{
bool all_cpus_emulated = check_unaligned_access_emulated_all_cpus();
bool all_cpus_emulated;

all_cpus_emulated = check_unaligned_access_emulated_all_cpus();
check_vector_unaligned_access_emulated_all_cpus();

if (!all_cpus_emulated)
return check_unaligned_access_speed_all_cpus();

return 0;
}
#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
static int check_unaligned_access_all_cpus(void)
{
check_unaligned_access_emulated_all_cpus();

return 0;
}
#endif

arch_initcall(check_unaligned_access_all_cpus);
Loading

0 comments on commit d1703dc

Please sign in to comment.