Skip to content

Commit

Permalink
riscv: vector: Support xtheadvector save/restore
Browse files Browse the repository at this point in the history
Use alternatives to add support for xtheadvector vector save/restore
routines.

Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Tested-by: Yangyu Chen <cyy@cyyself.name>
Link: https://lore.kernel.org/r/20241113-xtheadvector-v11-9-236c22791ef9@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
  • Loading branch information
Charlie Jenkins authored and Palmer Dabbelt committed Jan 18, 2025
1 parent 01e3313 commit d863910
Show file tree
Hide file tree
Showing 8 changed files with 198 additions and 68 deletions.
6 changes: 6 additions & 0 deletions arch/riscv/include/asm/csr.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@
#define SR_VS_CLEAN _AC(0x00000400, UL)
#define SR_VS_DIRTY _AC(0x00000600, UL)

#define SR_VS_THEAD _AC(0x01800000, UL) /* xtheadvector Status */
#define SR_VS_OFF_THEAD _AC(0x00000000, UL)
#define SR_VS_INITIAL_THEAD _AC(0x00800000, UL)
#define SR_VS_CLEAN_THEAD _AC(0x01000000, UL)
#define SR_VS_DIRTY_THEAD _AC(0x01800000, UL)

#define SR_XS _AC(0x00018000, UL) /* Extension Status */
#define SR_XS_OFF _AC(0x00000000, UL)
#define SR_XS_INITIAL _AC(0x00008000, UL)
Expand Down
2 changes: 1 addition & 1 deletion arch/riscv/include/asm/switch_to.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ do { \
__set_prev_cpu(__prev->thread); \
if (has_fpu()) \
__switch_to_fpu(__prev, __next); \
if (has_vector()) \
if (has_vector() || has_xtheadvector()) \
__switch_to_vector(__prev, __next); \
if (switch_to_should_flush_icache(__next)) \
local_flush_icache_all(); \
Expand Down
222 changes: 173 additions & 49 deletions arch/riscv/include/asm/vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,27 @@
#include <asm/cpufeature.h>
#include <asm/csr.h>
#include <asm/asm.h>
#include <asm/vendorid_list.h>
#include <asm/vendor_extensions.h>
#include <asm/vendor_extensions/thead.h>

#define __riscv_v_vstate_or(_val, TYPE) ({ \
typeof(_val) _res = _val; \
if (has_xtheadvector()) \
_res = (_res & ~SR_VS_THEAD) | SR_VS_##TYPE##_THEAD; \
else \
_res = (_res & ~SR_VS) | SR_VS_##TYPE; \
_res; \
})

#define __riscv_v_vstate_check(_val, TYPE) ({ \
bool _res; \
if (has_xtheadvector()) \
_res = ((_val) & SR_VS_THEAD) == SR_VS_##TYPE##_THEAD; \
else \
_res = ((_val) & SR_VS) == SR_VS_##TYPE; \
_res; \
})

extern unsigned long riscv_v_vsize;
int riscv_v_setup_vsize(void);
Expand All @@ -41,39 +62,62 @@ static __always_inline bool has_vector(void)
return riscv_has_extension_unlikely(RISCV_ISA_EXT_ZVE32X);
}

static __always_inline bool has_xtheadvector_no_alternatives(void)
{
if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR))
return riscv_isa_vendor_extension_available(THEAD_VENDOR_ID, XTHEADVECTOR);
else
return false;
}

static __always_inline bool has_xtheadvector(void)
{
if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR))
return riscv_has_vendor_extension_unlikely(THEAD_VENDOR_ID,
RISCV_ISA_VENDOR_EXT_XTHEADVECTOR);
else
return false;
}

static inline void __riscv_v_vstate_clean(struct pt_regs *regs)
{
regs->status = (regs->status & ~SR_VS) | SR_VS_CLEAN;
regs->status = __riscv_v_vstate_or(regs->status, CLEAN);
}

static inline void __riscv_v_vstate_dirty(struct pt_regs *regs)
{
regs->status = (regs->status & ~SR_VS) | SR_VS_DIRTY;
regs->status = __riscv_v_vstate_or(regs->status, DIRTY);
}

static inline void riscv_v_vstate_off(struct pt_regs *regs)
{
regs->status = (regs->status & ~SR_VS) | SR_VS_OFF;
regs->status = __riscv_v_vstate_or(regs->status, OFF);
}

static inline void riscv_v_vstate_on(struct pt_regs *regs)
{
regs->status = (regs->status & ~SR_VS) | SR_VS_INITIAL;
regs->status = __riscv_v_vstate_or(regs->status, INITIAL);
}

static inline bool riscv_v_vstate_query(struct pt_regs *regs)
{
return (regs->status & SR_VS) != 0;
return !__riscv_v_vstate_check(regs->status, OFF);
}

static __always_inline void riscv_v_enable(void)
{
csr_set(CSR_SSTATUS, SR_VS);
if (has_xtheadvector())
csr_set(CSR_SSTATUS, SR_VS_THEAD);
else
csr_set(CSR_SSTATUS, SR_VS);
}

static __always_inline void riscv_v_disable(void)
{
csr_clear(CSR_SSTATUS, SR_VS);
if (has_xtheadvector())
csr_clear(CSR_SSTATUS, SR_VS_THEAD);
else
csr_clear(CSR_SSTATUS, SR_VS);
}

static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest)
Expand All @@ -82,10 +126,36 @@ static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest)
"csrr %0, " __stringify(CSR_VSTART) "\n\t"
"csrr %1, " __stringify(CSR_VTYPE) "\n\t"
"csrr %2, " __stringify(CSR_VL) "\n\t"
"csrr %3, " __stringify(CSR_VCSR) "\n\t"
"csrr %4, " __stringify(CSR_VLENB) "\n\t"
: "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl),
"=r" (dest->vcsr), "=r" (dest->vlenb) : :);
"=r" (dest->vcsr) : :);

if (has_xtheadvector()) {
unsigned long status;

/*
* CSR_VCSR is defined as
* [2:1] - vxrm[1:0]
* [0] - vxsat
* The earlier vector spec implemented by T-Head uses separate
* registers for the same bit-elements, so just combine those
* into the existing output field.
*
* Additionally T-Head cores need FS to be enabled when accessing
* the VXRM and VXSAT CSRs, otherwise ending in illegal instructions.
* Though the cores do not implement the VXRM and VXSAT fields in the
* FCSR CSR that vector-0.7.1 specifies.
*/
status = csr_read_set(CSR_STATUS, SR_FS_DIRTY);
dest->vcsr = csr_read(CSR_VXSAT) | csr_read(CSR_VXRM) << CSR_VXRM_SHIFT;

dest->vlenb = riscv_v_vsize / 32;

if ((status & SR_FS) != SR_FS_DIRTY)
csr_write(CSR_STATUS, status);
} else {
dest->vcsr = csr_read(CSR_VCSR);
dest->vlenb = csr_read(CSR_VLENB);
}
}

static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src)
Expand All @@ -96,9 +166,25 @@ static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src
"vsetvl x0, %2, %1\n\t"
".option pop\n\t"
"csrw " __stringify(CSR_VSTART) ", %0\n\t"
"csrw " __stringify(CSR_VCSR) ", %3\n\t"
: : "r" (src->vstart), "r" (src->vtype), "r" (src->vl),
"r" (src->vcsr) :);
: : "r" (src->vstart), "r" (src->vtype), "r" (src->vl));

if (has_xtheadvector()) {
unsigned long status = csr_read(CSR_SSTATUS);

/*
* Similar to __vstate_csr_save above, restore values for the
* separate VXRM and VXSAT CSRs from the vcsr variable.
*/
status = csr_read_set(CSR_STATUS, SR_FS_DIRTY);

csr_write(CSR_VXRM, (src->vcsr >> CSR_VXRM_SHIFT) & CSR_VXRM_MASK);
csr_write(CSR_VXSAT, src->vcsr & CSR_VXSAT_MASK);

if ((status & SR_FS) != SR_FS_DIRTY)
csr_write(CSR_STATUS, status);
} else {
csr_write(CSR_VCSR, src->vcsr);
}
}

static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
Expand All @@ -108,19 +194,33 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,

riscv_v_enable();
__vstate_csr_save(save_to);
asm volatile (
".option push\n\t"
".option arch, +zve32x\n\t"
"vsetvli %0, x0, e8, m8, ta, ma\n\t"
"vse8.v v0, (%1)\n\t"
"add %1, %1, %0\n\t"
"vse8.v v8, (%1)\n\t"
"add %1, %1, %0\n\t"
"vse8.v v16, (%1)\n\t"
"add %1, %1, %0\n\t"
"vse8.v v24, (%1)\n\t"
".option pop\n\t"
: "=&r" (vl) : "r" (datap) : "memory");
if (has_xtheadvector()) {
asm volatile (
"mv t0, %0\n\t"
THEAD_VSETVLI_T4X0E8M8D1
THEAD_VSB_V_V0T0
"add t0, t0, t4\n\t"
THEAD_VSB_V_V0T0
"add t0, t0, t4\n\t"
THEAD_VSB_V_V0T0
"add t0, t0, t4\n\t"
THEAD_VSB_V_V0T0
: : "r" (datap) : "memory", "t0", "t4");
} else {
asm volatile (
".option push\n\t"
".option arch, +zve32x\n\t"
"vsetvli %0, x0, e8, m8, ta, ma\n\t"
"vse8.v v0, (%1)\n\t"
"add %1, %1, %0\n\t"
"vse8.v v8, (%1)\n\t"
"add %1, %1, %0\n\t"
"vse8.v v16, (%1)\n\t"
"add %1, %1, %0\n\t"
"vse8.v v24, (%1)\n\t"
".option pop\n\t"
: "=&r" (vl) : "r" (datap) : "memory");
}
riscv_v_disable();
}

Expand All @@ -130,19 +230,33 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
unsigned long vl;

riscv_v_enable();
asm volatile (
".option push\n\t"
".option arch, +zve32x\n\t"
"vsetvli %0, x0, e8, m8, ta, ma\n\t"
"vle8.v v0, (%1)\n\t"
"add %1, %1, %0\n\t"
"vle8.v v8, (%1)\n\t"
"add %1, %1, %0\n\t"
"vle8.v v16, (%1)\n\t"
"add %1, %1, %0\n\t"
"vle8.v v24, (%1)\n\t"
".option pop\n\t"
: "=&r" (vl) : "r" (datap) : "memory");
if (has_xtheadvector()) {
asm volatile (
"mv t0, %0\n\t"
THEAD_VSETVLI_T4X0E8M8D1
THEAD_VLB_V_V0T0
"add t0, t0, t4\n\t"
THEAD_VLB_V_V0T0
"add t0, t0, t4\n\t"
THEAD_VLB_V_V0T0
"add t0, t0, t4\n\t"
THEAD_VLB_V_V0T0
: : "r" (datap) : "memory", "t0", "t4");
} else {
asm volatile (
".option push\n\t"
".option arch, +zve32x\n\t"
"vsetvli %0, x0, e8, m8, ta, ma\n\t"
"vle8.v v0, (%1)\n\t"
"add %1, %1, %0\n\t"
"vle8.v v8, (%1)\n\t"
"add %1, %1, %0\n\t"
"vle8.v v16, (%1)\n\t"
"add %1, %1, %0\n\t"
"vle8.v v24, (%1)\n\t"
".option pop\n\t"
: "=&r" (vl) : "r" (datap) : "memory");
}
__vstate_csr_restore(restore_from);
riscv_v_disable();
}
Expand All @@ -152,33 +266,41 @@ static inline void __riscv_v_vstate_discard(void)
unsigned long vl, vtype_inval = 1UL << (BITS_PER_LONG - 1);

riscv_v_enable();
if (has_xtheadvector())
asm volatile (THEAD_VSETVLI_T4X0E8M8D1 : : : "t4");
else
asm volatile (
".option push\n\t"
".option arch, +zve32x\n\t"
"vsetvli %0, x0, e8, m8, ta, ma\n\t"
".option pop\n\t": "=&r" (vl));

asm volatile (
".option push\n\t"
".option arch, +zve32x\n\t"
"vsetvli %0, x0, e8, m8, ta, ma\n\t"
"vmv.v.i v0, -1\n\t"
"vmv.v.i v8, -1\n\t"
"vmv.v.i v16, -1\n\t"
"vmv.v.i v24, -1\n\t"
"vsetvl %0, x0, %1\n\t"
".option pop\n\t"
: "=&r" (vl) : "r" (vtype_inval) : "memory");
: "=&r" (vl) : "r" (vtype_inval));

riscv_v_disable();
}

static inline void riscv_v_vstate_discard(struct pt_regs *regs)
{
if ((regs->status & SR_VS) == SR_VS_OFF)
return;

__riscv_v_vstate_discard();
__riscv_v_vstate_dirty(regs);
if (riscv_v_vstate_query(regs)) {
__riscv_v_vstate_discard();
__riscv_v_vstate_dirty(regs);
}
}

static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate,
struct pt_regs *regs)
{
if ((regs->status & SR_VS) == SR_VS_DIRTY) {
if (__riscv_v_vstate_check(regs->status, DIRTY)) {
__riscv_v_vstate_save(vstate, vstate->datap);
__riscv_v_vstate_clean(regs);
}
Expand All @@ -187,7 +309,7 @@ static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate,
static inline void riscv_v_vstate_restore(struct __riscv_v_ext_state *vstate,
struct pt_regs *regs)
{
if ((regs->status & SR_VS) != SR_VS_OFF) {
if (riscv_v_vstate_query(regs)) {
__riscv_v_vstate_restore(vstate, vstate->datap);
__riscv_v_vstate_clean(regs);
}
Expand All @@ -196,7 +318,7 @@ static inline void riscv_v_vstate_restore(struct __riscv_v_ext_state *vstate,
static inline void riscv_v_vstate_set_restore(struct task_struct *task,
struct pt_regs *regs)
{
if ((regs->status & SR_VS) != SR_VS_OFF) {
if (riscv_v_vstate_query(regs)) {
set_tsk_thread_flag(task, TIF_RISCV_V_DEFER_RESTORE);
riscv_v_vstate_on(regs);
}
Expand Down Expand Up @@ -270,6 +392,8 @@ struct pt_regs;
static inline int riscv_v_setup_vsize(void) { return -EOPNOTSUPP; }
static __always_inline bool has_vector(void) { return false; }
static __always_inline bool insn_is_vector(u32 insn_buf) { return false; }
static __always_inline bool has_xtheadvector_no_alternatives(void) { return false; }
static __always_inline bool has_xtheadvector(void) { return false; }
static inline bool riscv_v_first_use_handler(struct pt_regs *regs) { return false; }
static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; }
static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; }
Expand Down
6 changes: 3 additions & 3 deletions arch/riscv/kernel/cpufeature.c
Original file line number Diff line number Diff line change
Expand Up @@ -874,8 +874,7 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap)
riscv_fill_vendor_ext_list(cpu);
}

if (riscv_isa_vendor_extension_available(THEAD_VENDOR_ID, XTHEADVECTOR) &&
has_thead_homogeneous_vlenb() < 0) {
if (has_xtheadvector_no_alternatives() && has_thead_homogeneous_vlenb() < 0) {
pr_warn("Unsupported heterogeneous vlenb detected, vector extension disabled.\n");
disable_xtheadvector();
}
Expand Down Expand Up @@ -932,7 +931,8 @@ void __init riscv_fill_hwcap(void)
elf_hwcap &= ~COMPAT_HWCAP_ISA_F;
}

if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_ZVE32X)) {
if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_ZVE32X) ||
has_xtheadvector_no_alternatives()) {
/*
* This cannot fail when called on the boot hart
*/
Expand Down
Loading

0 comments on commit d863910

Please sign in to comment.