Skip to content

Commit

Permalink
arm64/sme: Implement traps and syscall handling for SME
Browse files Browse the repository at this point in the history
By default all SME operations in userspace will trap.  When this happens
we allocate storage space for the SME register state, set up the SVE
registers and disable traps.  We do not need to initialize ZA since the
architecture guarantees that it will be zeroed when enabled and when we
trap ZA is disabled.

On syscall we exit streaming mode if we were previously in it and ensure
that all but the lower 128 bits of the registers are zeroed while
preserving the state of ZA. This follows the aarch64 PCS for SME, ZA
state is preserved over a function call and streaming mode is exited.
Since the traps for SME do not distinguish between streaming mode SVE
and ZA usage if ZA is in use rather than reenabling traps we instead
zero the parts of the SVE registers not shared with FPSIMD and leave SME
enabled, this simplifies handling SME traps. If ZA is not in use then we
reenable SME traps and fall through to normal handling of SVE.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220419112247.711548-17-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
  • Loading branch information
Mark Brown authored and Catalin Marinas committed Apr 22, 2022
1 parent 0033cd9 commit 8bd7f91
Show file tree
Hide file tree
Showing 7 changed files with 255 additions and 23 deletions.
1 change: 1 addition & 0 deletions arch/arm64/include/asm/esr.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
#define ESR_ELx_IL_SHIFT (25)
#define ESR_ELx_IL (UL(1) << ESR_ELx_IL_SHIFT)
#define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1)
#define ESR_ELx_ISS(esr) ((esr) & ESR_ELx_ISS_MASK)

/* ISS field definitions shared by different classes */
#define ESR_ELx_WNR_SHIFT (6)
Expand Down
1 change: 1 addition & 0 deletions arch/arm64/include/asm/exception.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
struct pt_regs *regs);
void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs);
void do_sve_acc(unsigned int esr, struct pt_regs *regs);
void do_sme_acc(unsigned int esr, struct pt_regs *regs);
void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs);
void do_sysinstr(unsigned int esr, struct pt_regs *regs);
void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
Expand Down
39 changes: 39 additions & 0 deletions arch/arm64/include/asm/fpsimd.h
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,8 @@ static inline bool sve_vq_available(unsigned int vq)
return vq_available(ARM64_VEC_SVE, vq);
}

size_t sve_state_size(struct task_struct const *task);

#else /* ! CONFIG_ARM64_SVE */

static inline void sve_alloc(struct task_struct *task) { }
Expand Down Expand Up @@ -278,10 +280,25 @@ static inline void vec_update_vq_map(enum vec_type t) { }
static inline int vec_verify_vq_map(enum vec_type t) { return 0; }
static inline void sve_setup(void) { }

static inline size_t sve_state_size(struct task_struct const *task)
{
return 0;
}

#endif /* ! CONFIG_ARM64_SVE */

#ifdef CONFIG_ARM64_SME

static inline void sme_user_disable(void)
{
sysreg_clear_set(cpacr_el1, CPACR_EL1_SMEN_EL0EN, 0);
}

static inline void sme_user_enable(void)
{
sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_SMEN_EL0EN);
}

static inline void sme_smstart_sm(void)
{
asm volatile(__msr_s(SYS_SVCR_SMSTART_SM_EL0, "xzr"));
Expand Down Expand Up @@ -309,23 +326,45 @@ static inline int sme_max_virtualisable_vl(void)
return vec_max_virtualisable_vl(ARM64_VEC_SME);
}

extern void sme_alloc(struct task_struct *task);
extern unsigned int sme_get_vl(void);
extern int sme_set_current_vl(unsigned long arg);
extern int sme_get_current_vl(void);

/*
* Return how many bytes of memory are required to store the full SME
* specific state (currently just ZA) for task, given task's currently
* configured vector length.
*/
static inline size_t za_state_size(struct task_struct const *task)
{
unsigned int vl = task_get_sme_vl(task);

return ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl));
}

#else

static inline void sme_user_disable(void) { BUILD_BUG(); }
static inline void sme_user_enable(void) { BUILD_BUG(); }

static inline void sme_smstart_sm(void) { }
static inline void sme_smstop_sm(void) { }
static inline void sme_smstop(void) { }

static inline void sme_alloc(struct task_struct *task) { }
static inline void sme_setup(void) { }
static inline unsigned int sme_get_vl(void) { return 0; }
static inline int sme_max_vl(void) { return 0; }
static inline int sme_max_virtualisable_vl(void) { return 0; }
static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; }
static inline int sme_get_current_vl(void) { return -EINVAL; }

static inline size_t za_state_size(struct task_struct const *task)
{
return 0;
}

#endif /* ! CONFIG_ARM64_SME */

/* For use by EFI runtime services calls only */
Expand Down
11 changes: 11 additions & 0 deletions arch/arm64/kernel/entry-common.c
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,14 @@ static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr)
exit_to_user_mode(regs);
}

static void noinstr el0_sme_acc(struct pt_regs *regs, unsigned long esr)
{
enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_sme_acc(esr, regs);
exit_to_user_mode(regs);
}

static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr)
{
enter_from_user_mode(regs);
Expand Down Expand Up @@ -645,6 +653,9 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
case ESR_ELx_EC_SVE:
el0_sve_acc(regs, esr);
break;
case ESR_ELx_EC_SME:
el0_sme_acc(regs, esr);
break;
case ESR_ELx_EC_FP_EXC64:
el0_fpsimd_exc(regs, esr);
break;
Expand Down
167 changes: 149 additions & 18 deletions arch/arm64/kernel/fpsimd.c
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,12 @@ static void set_sme_default_vl(int val)
set_default_vl(ARM64_VEC_SME, val);
}

static void sme_free(struct task_struct *);

#else

static inline void sme_free(struct task_struct *t) { }

#endif

DEFINE_PER_CPU(bool, fpsimd_context_busy);
Expand Down Expand Up @@ -676,7 +682,7 @@ static void sve_to_fpsimd(struct task_struct *task)
* Return how many bytes of memory are required to store the full SVE
* state for task, given task's currently configured vector length.
*/
static size_t sve_state_size(struct task_struct const *task)
size_t sve_state_size(struct task_struct const *task)
{
unsigned int vl = 0;

Expand Down Expand Up @@ -818,18 +824,22 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
thread_sm_enabled(&task->thread))
sve_to_fpsimd(task);

if (system_supports_sme() && type == ARM64_VEC_SME)
if (system_supports_sme() && type == ARM64_VEC_SME) {
task->thread.svcr &= ~(SYS_SVCR_EL0_SM_MASK |
SYS_SVCR_EL0_ZA_MASK);
clear_thread_flag(TIF_SME);
}

if (task == current)
put_cpu_fpsimd_context();

/*
* Force reallocation of task SVE state to the correct size
* on next use:
* Force reallocation of task SVE and SME state to the correct
* size on next use:
*/
sve_free(task);
if (system_supports_sme() && type == ARM64_VEC_SME)
sme_free(task);

task_set_vl(task, type, vl);

Expand Down Expand Up @@ -1164,12 +1174,43 @@ void __init sve_setup(void)
void fpsimd_release_task(struct task_struct *dead_task)
{
__sve_free(dead_task);
sme_free(dead_task);
}

#endif /* CONFIG_ARM64_SVE */

#ifdef CONFIG_ARM64_SME

/* This will move to uapi/asm/sigcontext.h when signals are implemented */
#define ZA_SIG_REGS_SIZE(vq) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES))

/*
* Ensure that task->thread.za_state is allocated and sufficiently large.
*
* This function should be used only in preparation for replacing
* task->thread.za_state with new data. The memory is always zeroed
* here to prevent stale data from showing through: this is done in
* the interest of testability and predictability, the architecture
* guarantees that when ZA is enabled it will be zeroed.
*/
void sme_alloc(struct task_struct *task)
{
if (task->thread.za_state) {
memset(task->thread.za_state, 0, za_state_size(task));
return;
}

/* This could potentially be up to 64K. */
task->thread.za_state =
kzalloc(za_state_size(task), GFP_KERNEL);
}

static void sme_free(struct task_struct *task)
{
kfree(task->thread.za_state);
task->thread.za_state = NULL;
}

void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
{
/* Set priority for all PEs to architecturally defined minimum */
Expand Down Expand Up @@ -1279,6 +1320,29 @@ void __init sme_setup(void)

#endif /* CONFIG_ARM64_SME */

static void sve_init_regs(void)
{
/*
* Convert the FPSIMD state to SVE, zeroing all the state that
* is not shared with FPSIMD. If (as is likely) the current
* state is live in the registers then do this there and
* update our metadata for the current task including
* disabling the trap, otherwise update our in-memory copy.
* We are guaranteed to not be in streaming mode, we can only
* take a SVE trap when not in streaming mode and we can't be
* in streaming mode when taking a SME trap.
*/
if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
unsigned long vq_minus_one =
sve_vq_from_vl(task_get_sve_vl(current)) - 1;
sve_set_vq(vq_minus_one);
sve_flush_live(true, vq_minus_one);
fpsimd_bind_task_to_cpu();
} else {
fpsimd_to_sve(current);
}
}

/*
* Trapped SVE access
*
Expand Down Expand Up @@ -1310,22 +1374,77 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs)
WARN_ON(1); /* SVE access shouldn't have trapped */

/*
* Convert the FPSIMD state to SVE, zeroing all the state that
* is not shared with FPSIMD. If (as is likely) the current
* state is live in the registers then do this there and
* update our metadata for the current task including
* disabling the trap, otherwise update our in-memory copy.
* Even if the task can have used streaming mode we can only
* generate SVE access traps in normal SVE mode and
* transitioning out of streaming mode may discard any
* streaming mode state. Always clear the high bits to avoid
* any potential errors tracking what is properly initialised.
*/
sve_init_regs();

put_cpu_fpsimd_context();
}

/*
* Trapped SME access
*
* Storage is allocated for the full SVE and SME state, the current
* FPSIMD register contents are migrated to SVE if SVE is not already
* active, and the access trap is disabled.
*
* TIF_SME should be clear on entry: otherwise, fpsimd_restore_current_state()
* would have disabled the SME access trap for userspace during
* ret_to_user, making an SVE access trap impossible in that case.
*/
void do_sme_acc(unsigned int esr, struct pt_regs *regs)
{
/* Even if we chose not to use SME, the hardware could still trap: */
if (unlikely(!system_supports_sme()) || WARN_ON(is_compat_task())) {
force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
return;
}

/*
* If this not a trap due to SME being disabled then something
* is being used in the wrong mode, report as SIGILL.
*/
if (ESR_ELx_ISS(esr) != ESR_ELx_SME_ISS_SME_DISABLED) {
force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
return;
}

sve_alloc(current);
sme_alloc(current);
if (!current->thread.sve_state || !current->thread.za_state) {
force_sig(SIGKILL);
return;
}

get_cpu_fpsimd_context();

/* With TIF_SME userspace shouldn't generate any traps */
if (test_and_set_thread_flag(TIF_SME))
WARN_ON(1);

if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
unsigned long vq_minus_one =
sve_vq_from_vl(task_get_sve_vl(current)) - 1;
sve_set_vq(vq_minus_one);
sve_flush_live(true, vq_minus_one);
sve_vq_from_vl(task_get_sme_vl(current)) - 1;
sme_set_vq(vq_minus_one);

fpsimd_bind_task_to_cpu();
} else {
fpsimd_to_sve(current);
}

/*
* If SVE was not already active initialise the SVE registers,
* any non-shared state between the streaming and regular SVE
* registers is architecturally guaranteed to be zeroed when
* we enter streaming mode. We do not need to initialize ZA
* since ZA must be disabled at this point and enabling ZA is
* architecturally defined to zero ZA.
*/
if (system_supports_sve() && !test_thread_flag(TIF_SVE))
sve_init_regs();

put_cpu_fpsimd_context();
}

Expand Down Expand Up @@ -1442,8 +1561,12 @@ void fpsimd_flush_thread(void)
fpsimd_flush_thread_vl(ARM64_VEC_SVE);
}

if (system_supports_sme())
if (system_supports_sme()) {
clear_thread_flag(TIF_SME);
sme_free(current);
fpsimd_flush_thread_vl(ARM64_VEC_SME);
current->thread.svcr = 0;
}

put_cpu_fpsimd_context();
}
Expand Down Expand Up @@ -1493,14 +1616,22 @@ static void fpsimd_bind_task_to_cpu(void)
last->svcr = &current->thread.svcr;
current->thread.fpsimd_cpu = smp_processor_id();

/*
* Toggle SVE and SME trapping for userspace if needed, these
* are serialsied by ret_to_user().
*/
if (system_supports_sme()) {
if (test_thread_flag(TIF_SME))
sme_user_enable();
else
sme_user_disable();
}

if (system_supports_sve()) {
/* Toggle SVE trapping for userspace if needed */
if (test_thread_flag(TIF_SVE))
sve_user_enable();
else
sve_user_disable();

/* Serialised by exception return to user */
}
}

Expand Down
Loading

0 comments on commit 8bd7f91

Please sign in to comment.