Skip to content

Commit

Permalink
s390/fpu: remove anonymous union from struct fpu
Browse files Browse the repository at this point in the history
The anonymous union within struct fpu contains a floating point register
array and a vector register array. Given that the vector register is always
present remove the floating point register array. For configurations
without vector registers save the floating point register contents within
their corresponding vector register location.

This allows to remove the union, and also to simplify ptrace and perf code.

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
  • Loading branch information
Heiko Carstens committed Feb 16, 2024
1 parent 9cbff7f commit bdbd3ac
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 109 deletions.
14 changes: 3 additions & 11 deletions arch/s390/include/asm/fpu-types.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,15 @@
#include <asm/sigcontext.h>

struct fpu {
__u32 fpc; /* Floating-point control */
union {
/* Floating-point register save area */
freg_t fprs[__NUM_FPRS];
/* Vector register save area */
__vector128 vxrs[__NUM_VXRS];
};
u32 fpc;
__vector128 vxrs[__NUM_VXRS] __aligned(8);
};

/* In-kernel FPU state structure */
struct kernel_fpu {
int mask;
u32 fpc;
union {
freg_t fprs[__NUM_FPRS];
__vector128 vxrs[__NUM_VXRS];
};
__vector128 vxrs[__NUM_VXRS] __aligned(8);
};

#define DECLARE_KERNEL_FPU_ONSTACK(name) \
Expand Down
102 changes: 60 additions & 42 deletions arch/s390/include/asm/fpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,44 +98,68 @@ static __always_inline void load_vx_regs(__vector128 *vxrs)
fpu_vlm(16, 31, &vxrs[16]);
}

static __always_inline void __save_fp_regs(freg_t *fprs, unsigned int offset)
{
fpu_std(0, &fprs[0 * offset]);
fpu_std(1, &fprs[1 * offset]);
fpu_std(2, &fprs[2 * offset]);
fpu_std(3, &fprs[3 * offset]);
fpu_std(4, &fprs[4 * offset]);
fpu_std(5, &fprs[5 * offset]);
fpu_std(6, &fprs[6 * offset]);
fpu_std(7, &fprs[7 * offset]);
fpu_std(8, &fprs[8 * offset]);
fpu_std(9, &fprs[9 * offset]);
fpu_std(10, &fprs[10 * offset]);
fpu_std(11, &fprs[11 * offset]);
fpu_std(12, &fprs[12 * offset]);
fpu_std(13, &fprs[13 * offset]);
fpu_std(14, &fprs[14 * offset]);
fpu_std(15, &fprs[15 * offset]);
}

static __always_inline void __load_fp_regs(freg_t *fprs, unsigned int offset)
{
fpu_ld(0, &fprs[0 * offset]);
fpu_ld(1, &fprs[1 * offset]);
fpu_ld(2, &fprs[2 * offset]);
fpu_ld(3, &fprs[3 * offset]);
fpu_ld(4, &fprs[4 * offset]);
fpu_ld(5, &fprs[5 * offset]);
fpu_ld(6, &fprs[6 * offset]);
fpu_ld(7, &fprs[7 * offset]);
fpu_ld(8, &fprs[8 * offset]);
fpu_ld(9, &fprs[9 * offset]);
fpu_ld(10, &fprs[10 * offset]);
fpu_ld(11, &fprs[11 * offset]);
fpu_ld(12, &fprs[12 * offset]);
fpu_ld(13, &fprs[13 * offset]);
fpu_ld(14, &fprs[14 * offset]);
fpu_ld(15, &fprs[15 * offset]);
}

static __always_inline void save_fp_regs(freg_t *fprs)
{
fpu_std(0, &fprs[0]);
fpu_std(1, &fprs[1]);
fpu_std(2, &fprs[2]);
fpu_std(3, &fprs[3]);
fpu_std(4, &fprs[4]);
fpu_std(5, &fprs[5]);
fpu_std(6, &fprs[6]);
fpu_std(7, &fprs[7]);
fpu_std(8, &fprs[8]);
fpu_std(9, &fprs[9]);
fpu_std(10, &fprs[10]);
fpu_std(11, &fprs[11]);
fpu_std(12, &fprs[12]);
fpu_std(13, &fprs[13]);
fpu_std(14, &fprs[14]);
fpu_std(15, &fprs[15]);
__save_fp_regs(fprs, sizeof(freg_t) / sizeof(freg_t));
}

static __always_inline void load_fp_regs(freg_t *fprs)
{
fpu_ld(0, &fprs[0]);
fpu_ld(1, &fprs[1]);
fpu_ld(2, &fprs[2]);
fpu_ld(3, &fprs[3]);
fpu_ld(4, &fprs[4]);
fpu_ld(5, &fprs[5]);
fpu_ld(6, &fprs[6]);
fpu_ld(7, &fprs[7]);
fpu_ld(8, &fprs[8]);
fpu_ld(9, &fprs[9]);
fpu_ld(10, &fprs[10]);
fpu_ld(11, &fprs[11]);
fpu_ld(12, &fprs[12]);
fpu_ld(13, &fprs[13]);
fpu_ld(14, &fprs[14]);
fpu_ld(15, &fprs[15]);
__load_fp_regs(fprs, sizeof(freg_t) / sizeof(freg_t));
}

static __always_inline void save_fp_regs_vx(__vector128 *vxrs)
{
freg_t *fprs = (freg_t *)&vxrs[0].high;

__save_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t));
}

static __always_inline void load_fp_regs_vx(__vector128 *vxrs)
{
freg_t *fprs = (freg_t *)&vxrs[0].high;

__load_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t));
}

static inline void kernel_fpu_begin(struct kernel_fpu *state, int flags)
Expand Down Expand Up @@ -170,7 +194,7 @@ static inline void save_kernel_fpu_regs(struct thread_struct *thread)
if (likely(cpu_has_vx()))
save_vx_regs(state->vxrs);
else
save_fp_regs(state->fprs);
save_fp_regs_vx(state->vxrs);
}

static inline void restore_kernel_fpu_regs(struct thread_struct *thread)
Expand All @@ -183,7 +207,7 @@ static inline void restore_kernel_fpu_regs(struct thread_struct *thread)
if (likely(cpu_has_vx()))
load_vx_regs(state->vxrs);
else
load_fp_regs(state->fprs);
load_fp_regs_vx(state->vxrs);
}

static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
Expand All @@ -206,19 +230,13 @@ static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
{
fpregs->pad = 0;
fpregs->fpc = fpu->fpc;
if (cpu_has_vx())
convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
else
memcpy((freg_t *)&fpregs->fprs, fpu->fprs, sizeof(fpregs->fprs));
convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
}

static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
{
fpu->fpc = fpregs->fpc;
if (cpu_has_vx())
convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
else
memcpy(fpu->fprs, (freg_t *)&fpregs->fprs, sizeof(fpregs->fprs));
convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
}

#endif /* _ASM_S390_FPU_H */
8 changes: 4 additions & 4 deletions arch/s390/kernel/fpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ void __kernel_fpu_begin(struct kernel_fpu *state, int flags)
fpu_stfpc(&state->fpc);
if (!cpu_has_vx()) {
if (flags & KERNEL_VXR_LOW)
save_fp_regs(state->fprs);
save_fp_regs_vx(state->vxrs);
return;
}
mask = flags & KERNEL_VXR;
Expand Down Expand Up @@ -73,7 +73,7 @@ void __kernel_fpu_end(struct kernel_fpu *state, int flags)
fpu_lfpc(&state->fpc);
if (!cpu_has_vx()) {
if (flags & KERNEL_VXR_LOW)
load_fp_regs(state->fprs);
load_fp_regs_vx(state->vxrs);
return;
}
mask = flags & KERNEL_VXR;
Expand Down Expand Up @@ -115,7 +115,7 @@ void __load_user_fpu_regs(void)
if (likely(cpu_has_vx()))
load_vx_regs(state->vxrs);
else
load_fp_regs(state->fprs);
load_fp_regs_vx(state->vxrs);
clear_thread_flag(TIF_FPU);
}

Expand Down Expand Up @@ -143,7 +143,7 @@ void save_user_fpu_regs(void)
if (likely(cpu_has_vx()))
save_vx_regs(state->vxrs);
else
save_fp_regs(state->fprs);
save_fp_regs_vx(state->vxrs);
set_thread_flag(TIF_FPU);
out:
local_irq_restore(flags);
Expand Down
5 changes: 1 addition & 4 deletions arch/s390/kernel/perf_regs.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,7 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
return 0;

idx -= PERF_REG_S390_FP0;
if (cpu_has_vx())
fp = *(freg_t *)(current->thread.ufpu.vxrs + idx);
else
fp = current->thread.ufpu.fprs[idx];
fp = *(freg_t *)(current->thread.ufpu.vxrs + idx);
return fp.ui;
}

Expand Down
58 changes: 10 additions & 48 deletions arch/s390/kernel/ptrace.c
Original file line number Diff line number Diff line change
Expand Up @@ -252,17 +252,10 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr)

} else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) {
/*
* floating point regs. are either in child->thread.ufpu
* or the child->thread.ufpu.vxrs array
* floating point regs. are in the child->thread.ufpu.vxrs array
*/
offset = addr - offsetof(struct user, regs.fp_regs.fprs);
if (cpu_has_vx())
tmp = *(addr_t *)
((addr_t)child->thread.ufpu.vxrs + 2 * offset);
else
tmp = *(addr_t *)
((addr_t)child->thread.ufpu.fprs + offset);

tmp = *(addr_t *)((addr_t)child->thread.ufpu.vxrs + 2 * offset);
} else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) {
/*
* Handle access to the per_info structure.
Expand Down Expand Up @@ -400,17 +393,10 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)

} else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) {
/*
* floating point regs. are either in child->thread.ufpu
* or the child->thread.ufpu.vxrs array
* floating point regs. are in the child->thread.ufpu.vxrs array
*/
offset = addr - offsetof(struct user, regs.fp_regs.fprs);
if (cpu_has_vx())
*(addr_t *)((addr_t)
child->thread.ufpu.vxrs + 2 * offset) = data;
else
*(addr_t *)((addr_t)
child->thread.ufpu.fprs + offset) = data;

*(addr_t *)((addr_t)child->thread.ufpu.vxrs + 2 * offset) = data;
} else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) {
/*
* Handle access to the per_info structure.
Expand Down Expand Up @@ -627,17 +613,10 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr)

} else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) {
/*
* floating point regs. are either in child->thread.ufpu
* or the child->thread.ufpu.vxrs array
* floating point regs. are in the child->thread.ufpu.vxrs array
*/
offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs);
if (cpu_has_vx())
tmp = *(__u32 *)
((addr_t)child->thread.ufpu.vxrs + 2 * offset);
else
tmp = *(__u32 *)
((addr_t)child->thread.ufpu.fprs + offset);

tmp = *(__u32 *)((addr_t)child->thread.ufpu.vxrs + 2 * offset);
} else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) {
/*
* Handle access to the per_info structure.
Expand Down Expand Up @@ -753,17 +732,10 @@ static int __poke_user_compat(struct task_struct *child,

} else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) {
/*
* floating point regs. are either in child->thread.ufpu
* or the child->thread.ufpu.vxrs array
* floating point regs. are in the child->thread.ufpu.vxrs array
*/
offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs);
if (cpu_has_vx())
*(__u32 *)((addr_t)
child->thread.ufpu.vxrs + 2 * offset) = tmp;
else
*(__u32 *)((addr_t)
child->thread.ufpu.fprs + offset) = tmp;

*(__u32 *)((addr_t)child->thread.ufpu.vxrs + 2 * offset) = tmp;
} else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) {
/*
* Handle access to the per_info structure.
Expand Down Expand Up @@ -912,12 +884,7 @@ static int s390_fpregs_set(struct task_struct *target,

if (target == current)
save_user_fpu_regs();

if (cpu_has_vx())
convert_vx_to_fp(fprs, target->thread.ufpu.vxrs);
else
memcpy(&fprs, target->thread.ufpu.fprs, sizeof(fprs));

convert_vx_to_fp(fprs, target->thread.ufpu.vxrs);
if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) {
u32 ufpc[2] = { target->thread.ufpu.fpc, 0 };
rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc,
Expand All @@ -934,12 +901,7 @@ static int s390_fpregs_set(struct task_struct *target,
fprs, offsetof(s390_fp_regs, fprs), -1);
if (rc)
return rc;

if (cpu_has_vx())
convert_fp_to_vx(target->thread.ufpu.vxrs, fprs);
else
memcpy(target->thread.ufpu.fprs, &fprs, sizeof(fprs));

convert_fp_to_vx(target->thread.ufpu.vxrs, fprs);
return rc;
}

Expand Down

0 comments on commit bdbd3ac

Please sign in to comment.