diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h index 659e07d7f31a8..7f5004065e8aa 100644 --- a/arch/s390/include/asm/entry-common.h +++ b/arch/s390/include/asm/entry-common.h @@ -41,8 +41,7 @@ static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs, static __always_inline void arch_exit_to_user_mode(void) { - if (test_thread_flag(TIF_FPU)) - __load_user_fpu_regs(); + load_user_fpu_regs(); if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) debug_user_asce(1); diff --git a/arch/s390/include/asm/fpu.h b/arch/s390/include/asm/fpu.h index c1b3920092a1c..c84cb33913e29 100644 --- a/arch/s390/include/asm/fpu.h +++ b/arch/s390/include/asm/fpu.h @@ -58,10 +58,6 @@ static inline bool cpu_has_vx(void) return likely(test_facility(129)); } -void save_user_fpu_regs(void); -void load_user_fpu_regs(void); -void __load_user_fpu_regs(void); - enum { KERNEL_FPC_BIT = 0, KERNEL_VXR_V0V7_BIT, @@ -83,6 +79,8 @@ enum { #define KERNEL_VXR (KERNEL_VXR_LOW | KERNEL_VXR_HIGH) #define KERNEL_FPR (KERNEL_FPC | KERNEL_VXR_LOW) +void load_fpu_state(struct fpu *state, int flags); +void save_fpu_state(struct fpu *state, int flags); void __kernel_fpu_begin(struct kernel_fpu *state, int flags); void __kernel_fpu_end(struct kernel_fpu *state, int flags); @@ -162,26 +160,57 @@ static __always_inline void load_fp_regs_vx(__vector128 *vxrs) __load_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t)); } -static inline void _kernel_fpu_begin(struct kernel_fpu *state, int flags) +static inline void load_user_fpu_regs(void) +{ + struct thread_struct *thread = ¤t->thread; + + if (!thread->ufpu_flags) + return; + load_fpu_state(&thread->ufpu, thread->ufpu_flags); + thread->ufpu_flags = 0; +} + +static __always_inline void __save_user_fpu_regs(struct thread_struct *thread, int flags) { - state->hdr.mask = READ_ONCE(current->thread.kfpu_flags); - if (!test_thread_flag(TIF_FPU)) { - /* Save user space FPU state and register contents */ - save_user_fpu_regs(); - } else if (state->hdr.mask & flags) { - /* Save FPU/vector register in-use by the kernel */ + save_fpu_state(&thread->ufpu, flags); + __atomic_or(flags, &thread->ufpu_flags); +} + +static inline void save_user_fpu_regs(void) +{ + struct thread_struct *thread = ¤t->thread; + int mask, flags; + + mask = __atomic_or(KERNEL_FPC | KERNEL_VXR, &thread->kfpu_flags); + flags = ~READ_ONCE(thread->ufpu_flags) & (KERNEL_FPC | KERNEL_VXR); + if (flags) + __save_user_fpu_regs(thread, flags); + barrier(); + WRITE_ONCE(thread->kfpu_flags, mask); +} + +static __always_inline void _kernel_fpu_begin(struct kernel_fpu *state, int flags) +{ + struct thread_struct *thread = ¤t->thread; + int mask, uflags; + + mask = __atomic_or(flags, &thread->kfpu_flags); + state->hdr.mask = mask; + uflags = READ_ONCE(thread->ufpu_flags); + if ((uflags & flags) != flags) + __save_user_fpu_regs(thread, ~uflags & flags); + if (mask & flags) __kernel_fpu_begin(state, flags); - } - __atomic_or(flags, ¤t->thread.kfpu_flags); } -static inline void _kernel_fpu_end(struct kernel_fpu *state, int flags) +static __always_inline void _kernel_fpu_end(struct kernel_fpu *state, int flags) { - WRITE_ONCE(current->thread.kfpu_flags, state->hdr.mask); - if (state->hdr.mask & flags) { - /* Restore FPU/vector register in-use by the kernel */ + int mask = state->hdr.mask; + + if (mask & flags) __kernel_fpu_end(state, flags); - } + barrier(); + WRITE_ONCE(current->thread.kfpu_flags, mask); } void __kernel_fpu_invalid_size(void); @@ -222,28 +251,16 @@ static __always_inline void kernel_fpu_check_size(int flags, unsigned int size) static inline void save_kernel_fpu_regs(struct thread_struct *thread) { - struct fpu *state = &thread->kfpu; - if (!thread->kfpu_flags) return; - fpu_stfpc(&state->fpc); - if (likely(cpu_has_vx())) - save_vx_regs(state->vxrs); - else - save_fp_regs_vx(state->vxrs); + save_fpu_state(&thread->kfpu, thread->kfpu_flags); } static inline void restore_kernel_fpu_regs(struct thread_struct *thread) { - struct fpu *state = &thread->kfpu; - if (!thread->kfpu_flags) return; - fpu_lfpc(&state->fpc); - if (likely(cpu_has_vx())) - load_vx_regs(state->vxrs); - else - load_fp_regs_vx(state->vxrs); + load_fpu_state(&thread->kfpu, thread->kfpu_flags); } static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index ecce58abf3dba..7cf00cf8fb0bc 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -166,6 +166,7 @@ struct thread_struct { unsigned int gmap_write_flag; /* gmap fault write indication */ unsigned int gmap_int_code; /* int code of last gmap fault */ unsigned int gmap_pfault; /* signal of a pending guest pfault */ + int ufpu_flags; /* user fpu flags */ int kfpu_flags; /* kernel fpu flags */ /* Per-thread information related to debugging */ diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c index 62e9befe7890a..fa90bbdc5ef94 100644 --- a/arch/s390/kernel/fpu.c +++ b/arch/s390/kernel/fpu.c @@ -107,45 +107,87 @@ void __kernel_fpu_end(struct kernel_fpu *state, int flags) } EXPORT_SYMBOL(__kernel_fpu_end); -void __load_user_fpu_regs(void) +void load_fpu_state(struct fpu *state, int flags) { - struct fpu *state = ¤t->thread.ufpu; - - fpu_lfpc_safe(&state->fpc); - if (likely(cpu_has_vx())) - load_vx_regs(state->vxrs); - else - load_fp_regs_vx(state->vxrs); - clear_thread_flag(TIF_FPU); -} + __vector128 *vxrs = &state->vxrs[0]; + int mask; -void load_user_fpu_regs(void) -{ - raw_local_irq_disable(); - __load_user_fpu_regs(); - raw_local_irq_enable(); + if (flags & KERNEL_FPC) + fpu_lfpc(&state->fpc); + if (!cpu_has_vx()) { + if (flags & KERNEL_VXR_V0V7) + load_fp_regs_vx(state->vxrs); + return; + } + mask = flags & KERNEL_VXR; + if (mask == KERNEL_VXR) { + fpu_vlm(0, 15, &vxrs[0]); + fpu_vlm(16, 31, &vxrs[16]); + return; + } + if (mask == KERNEL_VXR_MID) { + fpu_vlm(8, 23, &vxrs[8]); + return; + } + mask = flags & KERNEL_VXR_LOW; + if (mask) { + if (mask == KERNEL_VXR_LOW) + fpu_vlm(0, 15, &vxrs[0]); + else if (mask == KERNEL_VXR_V0V7) + fpu_vlm(0, 7, &vxrs[0]); + else + fpu_vlm(8, 15, &vxrs[8]); + } + mask = flags & KERNEL_VXR_HIGH; + if (mask) { + if (mask == KERNEL_VXR_HIGH) + fpu_vlm(16, 31, &vxrs[16]); + else if (mask == KERNEL_VXR_V16V23) + fpu_vlm(16, 23, &vxrs[16]); + else + fpu_vlm(24, 31, &vxrs[24]); + } } -EXPORT_SYMBOL(load_user_fpu_regs); -void save_user_fpu_regs(void) +void save_fpu_state(struct fpu *state, int flags) { - unsigned long flags; - struct fpu *state; - - local_irq_save(flags); - - if (test_thread_flag(TIF_FPU)) - goto out; - - state = ¤t->thread.ufpu; + __vector128 *vxrs = &state->vxrs[0]; + int mask; - fpu_stfpc(&state->fpc); - if (likely(cpu_has_vx())) - save_vx_regs(state->vxrs); - else - save_fp_regs_vx(state->vxrs); - set_thread_flag(TIF_FPU); -out: - local_irq_restore(flags); + if (flags & KERNEL_FPC) + fpu_stfpc(&state->fpc); + if (!cpu_has_vx()) { + if (flags & KERNEL_VXR_LOW) + save_fp_regs_vx(state->vxrs); + return; + } + mask = flags & KERNEL_VXR; + if (mask == KERNEL_VXR) { + fpu_vstm(0, 15, &vxrs[0]); + fpu_vstm(16, 31, &vxrs[16]); + return; + } + if (mask == KERNEL_VXR_MID) { + fpu_vstm(8, 23, &vxrs[8]); + return; + } + mask = flags & KERNEL_VXR_LOW; + if (mask) { + if (mask == KERNEL_VXR_LOW) + fpu_vstm(0, 15, &vxrs[0]); + else if (mask == KERNEL_VXR_V0V7) + fpu_vstm(0, 7, &vxrs[0]); + else + fpu_vstm(8, 15, &vxrs[8]); + } + mask = flags & KERNEL_VXR_HIGH; + if (mask) { + if (mask == KERNEL_VXR_HIGH) + fpu_vstm(16, 31, &vxrs[16]); + else if (mask == KERNEL_VXR_V16V23) + fpu_vstm(16, 23, &vxrs[16]); + else + fpu_vstm(24, 31, &vxrs[24]); + } } -EXPORT_SYMBOL(save_user_fpu_regs); +EXPORT_SYMBOL(save_fpu_state);