Skip to content

Commit

Permalink
MIPS: Basic MSA context switching support
Browse files Browse the repository at this point in the history
This patch adds support for context switching the MSA vector registers.
These 128 bit vector registers are aliased with the FP registers - an
FP register accesses the least significant bits of the vector register
with which it is aliased (ie. the register with the same index). Due to
both this & the requirement that the scalar FPU must be 64-bit (FR=1) if
enabled at the same time as MSA the kernel will enable MSA & scalar FP
at the same time for tasks which use MSA. If we restore the MSA vector
context then we might as well enable the scalar FPU since the reason it
was left disabled was to allow for lazy FP context restoring - but we
just restored the FP context as it's a subset of the vector context. If
we restore the FP context and have previously used MSA then we have to
restore the whole vector context anyway (see comment in
enable_restore_fp_context for details) so similarly we might as well
enable MSA.

Thus if a task does not use MSA then it will continue to behave as
without this patch - the scalar FP context will be saved & restored as
usual. But if a task executes an MSA instruction then it will save &
restore the vector context forever more.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/6431/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
  • Loading branch information
Paul Burton authored and Ralf Baechle committed Mar 26, 2014
1 parent a5e9a69 commit 1db1af8
Show file tree
Hide file tree
Showing 10 changed files with 275 additions and 34 deletions.
9 changes: 5 additions & 4 deletions arch/mips/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -2090,10 +2090,11 @@ config CPU_HAS_MSA
help
MIPS SIMD Architecture (MSA) introduces 128 bit wide vector registers
and a set of SIMD instructions to operate on them. When this option
is enabled the kernel will support detection of the MSA ASE. If you
know that your kernel will only be running on CPUs which do not
support MSA then you may wish to say N here to reduce the size of
your kernel.
is enabled the kernel will support allocating & switching MSA
vector register contexts. If you know that your kernel will only be
running on CPUs which do not support MSA or that your userland will
not be making use of it then you may wish to say N here to reduce
the size & complexity of your kernel.

If unsure, say Y.

Expand Down
70 changes: 70 additions & 0 deletions arch/mips/include/asm/asmmacro.h
Original file line number Diff line number Diff line change
Expand Up @@ -328,4 +328,74 @@
.endm
#endif

.macro msa_save_all thread
st_d 0, THREAD_FPR0, \thread
st_d 1, THREAD_FPR1, \thread
st_d 2, THREAD_FPR2, \thread
st_d 3, THREAD_FPR3, \thread
st_d 4, THREAD_FPR4, \thread
st_d 5, THREAD_FPR5, \thread
st_d 6, THREAD_FPR6, \thread
st_d 7, THREAD_FPR7, \thread
st_d 8, THREAD_FPR8, \thread
st_d 9, THREAD_FPR9, \thread
st_d 10, THREAD_FPR10, \thread
st_d 11, THREAD_FPR11, \thread
st_d 12, THREAD_FPR12, \thread
st_d 13, THREAD_FPR13, \thread
st_d 14, THREAD_FPR14, \thread
st_d 15, THREAD_FPR15, \thread
st_d 16, THREAD_FPR16, \thread
st_d 17, THREAD_FPR17, \thread
st_d 18, THREAD_FPR18, \thread
st_d 19, THREAD_FPR19, \thread
st_d 20, THREAD_FPR20, \thread
st_d 21, THREAD_FPR21, \thread
st_d 22, THREAD_FPR22, \thread
st_d 23, THREAD_FPR23, \thread
st_d 24, THREAD_FPR24, \thread
st_d 25, THREAD_FPR25, \thread
st_d 26, THREAD_FPR26, \thread
st_d 27, THREAD_FPR27, \thread
st_d 28, THREAD_FPR28, \thread
st_d 29, THREAD_FPR29, \thread
st_d 30, THREAD_FPR30, \thread
st_d 31, THREAD_FPR31, \thread
.endm

.macro msa_restore_all thread
ld_d 0, THREAD_FPR0, \thread
ld_d 1, THREAD_FPR1, \thread
ld_d 2, THREAD_FPR2, \thread
ld_d 3, THREAD_FPR3, \thread
ld_d 4, THREAD_FPR4, \thread
ld_d 5, THREAD_FPR5, \thread
ld_d 6, THREAD_FPR6, \thread
ld_d 7, THREAD_FPR7, \thread
ld_d 8, THREAD_FPR8, \thread
ld_d 9, THREAD_FPR9, \thread
ld_d 10, THREAD_FPR10, \thread
ld_d 11, THREAD_FPR11, \thread
ld_d 12, THREAD_FPR12, \thread
ld_d 13, THREAD_FPR13, \thread
ld_d 14, THREAD_FPR14, \thread
ld_d 15, THREAD_FPR15, \thread
ld_d 16, THREAD_FPR16, \thread
ld_d 17, THREAD_FPR17, \thread
ld_d 18, THREAD_FPR18, \thread
ld_d 19, THREAD_FPR19, \thread
ld_d 20, THREAD_FPR20, \thread
ld_d 21, THREAD_FPR21, \thread
ld_d 22, THREAD_FPR22, \thread
ld_d 23, THREAD_FPR23, \thread
ld_d 24, THREAD_FPR24, \thread
ld_d 25, THREAD_FPR25, \thread
ld_d 26, THREAD_FPR26, \thread
ld_d 27, THREAD_FPR27, \thread
ld_d 28, THREAD_FPR28, \thread
ld_d 29, THREAD_FPR29, \thread
ld_d 30, THREAD_FPR30, \thread
ld_d 31, THREAD_FPR31, \thread
.endm

#endif /* _ASM_ASMMACRO_H */
28 changes: 28 additions & 0 deletions arch/mips/include/asm/msa.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@

#include <asm/mipsregs.h>

extern void _save_msa(struct task_struct *);
extern void _restore_msa(struct task_struct *);

static inline void enable_msa(void)
{
if (cpu_has_msa) {
Expand All @@ -36,6 +39,31 @@ static inline int is_msa_enabled(void)
return read_c0_config5() & MIPS_CONF5_MSAEN;
}

static inline int thread_msa_context_live(void)
{
/*
* Check cpu_has_msa only if it's a constant. This will allow the
* compiler to optimise out code for CPUs without MSA without adding
* an extra redundant check for CPUs with MSA.
*/
if (__builtin_constant_p(cpu_has_msa) && !cpu_has_msa)
return 0;

return test_thread_flag(TIF_MSA_CTX_LIVE);
}

static inline void save_msa(struct task_struct *t)
{
if (cpu_has_msa)
_save_msa(t);
}

static inline void restore_msa(struct task_struct *t)
{
if (cpu_has_msa)
_restore_msa(t);
}

#ifdef TOOLCHAIN_SUPPORTS_MSA

#define __BUILD_MSA_CTL_REG(name, cs) \
Expand Down
9 changes: 8 additions & 1 deletion arch/mips/include/asm/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,12 @@ extern unsigned int vced_count, vcei_count;


#define NUM_FPU_REGS 32
#define FPU_REG_WIDTH 64

#ifdef CONFIG_CPU_HAS_MSA
# define FPU_REG_WIDTH 128
#else
# define FPU_REG_WIDTH 64
#endif

union fpureg {
__u32 val32[FPU_REG_WIDTH / 32];
Expand Down Expand Up @@ -133,6 +138,7 @@ BUILD_FPR_ACCESS(64)
struct mips_fpu_struct {
union fpureg fpr[NUM_FPU_REGS];
unsigned int fcr31;
unsigned int msacsr;
};

#define NUM_DSP_REGS 6
Expand Down Expand Up @@ -310,6 +316,7 @@ struct thread_struct {
.fpu = { \
.fpr = {{{0,},},}, \
.fcr31 = 0, \
.msacsr = 0, \
}, \
/* \
* FPU affinity state (null if not FPAFF) \
Expand Down
22 changes: 17 additions & 5 deletions arch/mips/include/asm/switch_to.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,29 @@
#include <asm/watch.h>
#include <asm/dsp.h>
#include <asm/cop2.h>
#include <asm/msa.h>

struct task_struct;

enum {
FP_SAVE_NONE = 0,
FP_SAVE_VECTOR = -1,
FP_SAVE_SCALAR = 1,
};

/**
* resume - resume execution of a task
* @prev: The task previously executed.
* @next: The task to begin executing.
* @next_ti: task_thread_info(next).
* @usedfpu: Non-zero if prev's FP context should be saved.
* @fp_save: Which, if any, FP context to save for prev.
*
* This function is used whilst scheduling to save the context of prev & load
* the context of next. Returns prev.
*/
extern asmlinkage struct task_struct *resume(struct task_struct *prev,
struct task_struct *next, struct thread_info *next_ti,
u32 usedfpu);
s32 fp_save);

extern unsigned int ll_bit;
extern struct task_struct *ll_task;
Expand Down Expand Up @@ -75,7 +82,8 @@ do { \

#define switch_to(prev, next, last) \
do { \
u32 __usedfpu, __c0_stat; \
u32 __c0_stat; \
s32 __fpsave = FP_SAVE_NONE; \
__mips_mt_fpaff_switch_to(prev); \
if (cpu_has_dsp) \
__save_dsp(prev); \
Expand All @@ -88,8 +96,12 @@ do { \
write_c0_status(__c0_stat & ~ST0_CU2); \
} \
__clear_software_ll_bit(); \
__usedfpu = test_and_clear_tsk_thread_flag(prev, TIF_USEDFPU); \
(last) = resume(prev, next, task_thread_info(next), __usedfpu); \
if (test_and_clear_tsk_thread_flag(prev, TIF_USEDFPU)) \
__fpsave = FP_SAVE_SCALAR; \
if (test_and_clear_tsk_thread_flag(prev, TIF_USEDMSA)) \
__fpsave = FP_SAVE_VECTOR; \
(last) = resume(prev, next, task_thread_info(next), __fpsave); \
disable_msa(); \
} while (0)

#define finish_arch_switch(prev) \
Expand Down
4 changes: 4 additions & 0 deletions arch/mips/include/asm/thread_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_LOAD_WATCH 25 /* If set, load watch registers */
#define TIF_SYSCALL_TRACEPOINT 26 /* syscall tracepoint instrumentation */
#define TIF_32BIT_FPREGS 27 /* 32-bit floating point registers */
#define TIF_USEDMSA 29 /* MSA has been used this quantum */
#define TIF_MSA_CTX_LIVE 30 /* MSA context must be preserved */
#define TIF_SYSCALL_TRACE 31 /* syscall trace active */

#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
Expand All @@ -133,6 +135,8 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_FPUBOUND (1<<TIF_FPUBOUND)
#define _TIF_LOAD_WATCH (1<<TIF_LOAD_WATCH)
#define _TIF_32BIT_FPREGS (1<<TIF_32BIT_FPREGS)
#define _TIF_USEDMSA (1<<TIF_USEDMSA)
#define _TIF_MSA_CTX_LIVE (1<<TIF_MSA_CTX_LIVE)
#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)

#define _TIF_WORK_SYSCALL_ENTRY (_TIF_NOHZ | _TIF_SYSCALL_TRACE | \
Expand Down
1 change: 1 addition & 0 deletions arch/mips/kernel/genex.S
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,7 @@ NESTED(nmi_handler, PT_SIZE, sp)
BUILD_HANDLER tr tr sti silent /* #13 */
BUILD_HANDLER fpe fpe fpe silent /* #15 */
BUILD_HANDLER ftlb ftlb none silent /* #16 */
BUILD_HANDLER msa msa sti silent /* #21 */
BUILD_HANDLER mdmx mdmx sti silent /* #22 */
#ifdef CONFIG_HARDWARE_WATCHPOINTS
/*
Expand Down
7 changes: 6 additions & 1 deletion arch/mips/kernel/process.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <asm/cpu.h>
#include <asm/dsp.h>
#include <asm/fpu.h>
#include <asm/msa.h>
#include <asm/pgtable.h>
#include <asm/mipsregs.h>
#include <asm/processor.h>
Expand Down Expand Up @@ -65,6 +66,8 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp)
clear_used_math();
clear_fpu_owner();
init_dsp();
clear_thread_flag(TIF_MSA_CTX_LIVE);
disable_msa();
regs->cp0_epc = pc;
regs->regs[29] = sp;
}
Expand All @@ -89,7 +92,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,

preempt_disable();

if (is_fpu_owner())
if (is_msa_enabled())
save_msa(p);
else if (is_fpu_owner())
save_fp(p);

if (cpu_has_dsp)
Expand Down
58 changes: 41 additions & 17 deletions arch/mips/kernel/r4k_switch.S
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,9 @@
*/
#define ST_OFF (_THREAD_SIZE - 32 - PT_SIZE + PT_STATUS)

/*
* FPU context is saved iff the process has used it's FPU in the current
* time slice as indicated by _TIF_USEDFPU. In any case, the CU1 bit for user
* space STATUS register should be 0, so that a process *always* starts its
* userland with FPU disabled after each context switch.
*
* FPU will be enabled as soon as the process accesses FPU again, through
* do_cpu() trap.
*/

/*
* task_struct *resume(task_struct *prev, task_struct *next,
* struct thread_info *next_ti, int usedfpu)
* struct thread_info *next_ti, s32 fp_save)
*/
.align 5
LEAF(resume)
Expand All @@ -50,23 +40,37 @@
LONG_S ra, THREAD_REG31(a0)

/*
* check if we need to save FPU registers
* Check whether we need to save any FP context. FP context is saved
* iff the process has used the context with the scalar FPU or the MSA
* ASE in the current time slice, as indicated by _TIF_USEDFPU and
* _TIF_USEDMSA respectively. switch_to will have set fp_save
* accordingly to an FP_SAVE_ enum value.
*/
beqz a3, 2f

beqz a3, 1f

PTR_L t3, TASK_THREAD_INFO(a0)
/*
* clear saved user stack CU1 bit
* We do. Clear the saved CU1 bit for prev, such that next time it is
* scheduled it will start in userland with the FPU disabled. If the
* task uses the FPU then it will be enabled again via the do_cpu trap.
* This allows us to lazily restore the FP context.
*/
PTR_L t3, TASK_THREAD_INFO(a0)
LONG_L t0, ST_OFF(t3)
li t1, ~ST0_CU1
and t0, t0, t1
LONG_S t0, ST_OFF(t3)

/* Check whether we're saving scalar or vector context. */
bgtz a3, 1f

/* Save 128b MSA vector context. */
msa_save_all a0
b 2f

1: /* Save 32b/64b scalar FP context. */
fpu_save_double a0 t0 t1 # c0_status passed in t0
# clobbers t1
1:
2:

#if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
PTR_LA t8, __stack_chk_guard
Expand Down Expand Up @@ -141,6 +145,26 @@ LEAF(_restore_fp)
jr ra
END(_restore_fp)

#ifdef CONFIG_CPU_HAS_MSA

/*
* Save a thread's MSA vector context.
*/
LEAF(_save_msa)
msa_save_all a0
jr ra
END(_save_msa)

/*
* Restore a thread's MSA vector context.
*/
LEAF(_restore_msa)
msa_restore_all a0
jr ra
END(_restore_msa)

#endif

/*
* Load the FPU with signalling NANS. This bit pattern we're using has
* the property that no matter whether considered as single or as double
Expand Down
Loading

0 comments on commit 1db1af8

Please sign in to comment.