Skip to content

Commit

Permalink
s390/mm: Reimplement lazy ASCE handling
Browse files Browse the repository at this point in the history
Reduce system call overhead time (round trip time for invoking a
non-existent system call) by 25%.

With the removal of set_fs() [1] lazy control register handling was removed
in order to keep kernel entry and exit simple. However this made system
calls slower.

With the conversion to generic entry [2] and numerous follow up changes
which simplified the entry code significantly, adding support for lazy asce
handling doesn't add much complexity to the entry code anymore.

In particular this means:

- On kernel entry the primary asce is not modified and contains the user
  asce

- Kernel accesses which require secondary-space mode (for example futex
  operations) are surrounded by enable_sacf_uaccess() and
  disable_sacf_uaccess() calls. enable_sacf_uaccess() sets the primary asce
  to kernel asce so that the sacf instruction can be used to switch to
  secondary-space mode. The primary asce is changed back to user asce with
  disable_sacf_uaccess().

The state of the control register which contains the primary asce is
reflected with a new TIF_ASCE_PRIMARY bit. This is required on context
switch so that the correct asce is restored for the scheduled in process.

In result address spaces are now setup like this:

CPU running in               | %cr1 ASCE | %cr7 ASCE | %cr13 ASCE
-----------------------------|-----------|-----------|-----------
user space                   |  user     |  user     |  kernel
kernel (no sacf)             |  user     |  user     |  kernel
kernel (during sacf uaccess) |  kernel   |  user     |  kernel
kernel (kvm guest execution) |  guest    |  user     |  kernel

In result cr1 control register content is not changed except for:
- futex system calls
- legacy s390 PCI system calls
- the kvm specific cmpxchg_user_key() uaccess helper

This leads to faster system call execution.

[1] 87d5986 ("s390/mm: remove set_fs / rework address space handling")
[2] 56e62a7 ("s390: convert to generic entry")

Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
  • Loading branch information
Heiko Carstens committed Apr 14, 2025
1 parent 8ffd015 commit 8b72f5a
Show file tree
Hide file tree
Showing 12 changed files with 97 additions and 25 deletions.
36 changes: 36 additions & 0 deletions arch/s390/include/asm/asce.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/* SPDX-License-Identifier: GPL-2.0 */

#ifndef _ASM_S390_ASCE_H
#define _ASM_S390_ASCE_H

#include <linux/thread_info.h>
#include <linux/irqflags.h>
#include <asm/lowcore.h>
#include <asm/ctlreg.h>

static inline bool enable_sacf_uaccess(void)
{
unsigned long flags;

if (test_thread_flag(TIF_ASCE_PRIMARY))
return true;
local_irq_save(flags);
local_ctl_load(1, &get_lowcore()->kernel_asce);
set_thread_flag(TIF_ASCE_PRIMARY);
local_irq_restore(flags);
return false;
}

static inline void disable_sacf_uaccess(bool previous)
{
unsigned long flags;

if (previous)
return;
local_irq_save(flags);
local_ctl_load(1, &get_lowcore()->user_asce);
clear_thread_flag(TIF_ASCE_PRIMARY);
local_irq_restore(flags);
}

#endif /* _ASM_S390_ASCE_H */
6 changes: 6 additions & 0 deletions arch/s390/include/asm/futex.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@
static uaccess_kmsan_or_inline int \
__futex_atomic_##name(int oparg, int *old, u32 __user *uaddr) \
{ \
bool sacf_flag; \
int rc, new; \
\
instrument_copy_from_user_before(old, uaddr, sizeof(*old)); \
sacf_flag = enable_sacf_uaccess(); \
asm_inline volatile( \
" sacf 256\n" \
"0: l %[old],%[uaddr]\n" \
Expand All @@ -32,6 +34,7 @@ __futex_atomic_##name(int oparg, int *old, u32 __user *uaddr) \
[new] "=&d" (new), [uaddr] "+Q" (*uaddr) \
: [oparg] "d" (oparg) \
: "cc"); \
disable_sacf_uaccess(sacf_flag); \
if (!rc) \
instrument_copy_from_user_after(old, uaddr, sizeof(*old), 0); \
return rc; \
Expand Down Expand Up @@ -75,9 +78,11 @@ int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
static uaccess_kmsan_or_inline
int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval)
{
bool sacf_flag;
int rc;

instrument_copy_from_user_before(uval, uaddr, sizeof(*uval));
sacf_flag = enable_sacf_uaccess();
asm_inline volatile(
" sacf 256\n"
"0: cs %[old],%[new],%[uaddr]\n"
Expand All @@ -88,6 +93,7 @@ int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32
: [rc] "=d" (rc), [old] "+d" (oldval), [uaddr] "+Q" (*uaddr)
: [new] "d" (newval)
: "cc", "memory");
disable_sacf_uaccess(sacf_flag);
*uval = oldval;
instrument_copy_from_user_after(uval, uaddr, sizeof(*uval), 0);
return rc;
Expand Down
19 changes: 18 additions & 1 deletion arch/s390/include/asm/mmu_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <linux/mm_types.h>
#include <asm/tlbflush.h>
#include <asm/ctlreg.h>
#include <asm/asce.h>
#include <asm-generic/mm_hooks.h>

#define init_new_context init_new_context
Expand Down Expand Up @@ -77,7 +78,8 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *
else
get_lowcore()->user_asce.val = next->context.asce;
cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
/* Clear previous user-ASCE from CR7 */
/* Clear previous user-ASCE from CR1 and CR7 */
local_ctl_load(1, &s390_invalid_asce);
local_ctl_load(7, &s390_invalid_asce);
if (prev != next)
cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
Expand All @@ -99,6 +101,7 @@ static inline void finish_arch_post_lock_switch(void)
{
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
unsigned long flags;

if (mm) {
preempt_disable();
Expand All @@ -108,16 +111,30 @@ static inline void finish_arch_post_lock_switch(void)
__tlb_flush_mm_lazy(mm);
preempt_enable();
}
local_irq_save(flags);
if (test_thread_flag(TIF_ASCE_PRIMARY))
local_ctl_load(1, &get_lowcore()->kernel_asce);
else
local_ctl_load(1, &get_lowcore()->user_asce);
local_ctl_load(7, &get_lowcore()->user_asce);
local_irq_restore(flags);
}

#define activate_mm activate_mm
static inline void activate_mm(struct mm_struct *prev,
struct mm_struct *next)
{
unsigned long flags;

switch_mm(prev, next, current);
cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
local_irq_save(flags);
if (test_thread_flag(TIF_ASCE_PRIMARY))
local_ctl_load(1, &get_lowcore()->kernel_asce);
else
local_ctl_load(1, &get_lowcore()->user_asce);
local_ctl_load(7, &get_lowcore()->user_asce);
local_irq_restore(flags);
}

#include <asm-generic/mmu_context.h>
Expand Down
1 change: 0 additions & 1 deletion arch/s390/include/asm/ptrace.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@ struct pt_regs {
struct tpi_info tpi_info;
};
unsigned long flags;
unsigned long cr1;
unsigned long last_break;
};

Expand Down
2 changes: 2 additions & 0 deletions arch/s390/include/asm/thread_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ void arch_setup_new_exec(void);
#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling needed */
#define TIF_UPROBE 4 /* breakpointed or single-stepping */
#define TIF_PATCH_PENDING 5 /* pending live patching update */
#define TIF_ASCE_PRIMARY 6 /* primary asce is kernel asce */
#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */
#define TIF_GUARDED_STORAGE 8 /* load guarded storage control block */
#define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */
Expand All @@ -85,6 +86,7 @@ void arch_setup_new_exec(void);
#define _TIF_NEED_RESCHED_LAZY BIT(TIF_NEED_RESCHED_LAZY)
#define _TIF_UPROBE BIT(TIF_UPROBE)
#define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING)
#define _TIF_ASCE_PRIMARY BIT(TIF_ASCE_PRIMARY)
#define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL)
#define _TIF_GUARDED_STORAGE BIT(TIF_GUARDED_STORAGE)
#define _TIF_ISOLATE_BP_GUEST BIT(TIF_ISOLATE_BP_GUEST)
Expand Down
12 changes: 12 additions & 0 deletions arch/s390/include/asm/uaccess.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <asm/extable.h>
#include <asm/facility.h>
#include <asm-generic/access_ok.h>
#include <asm/asce.h>
#include <linux/instrumented.h>

void debug_user_asce(int exit);
Expand Down Expand Up @@ -478,6 +479,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
__uint128_t old, __uint128_t new,
unsigned long key, int size)
{
bool sacf_flag;
int rc = 0;

switch (size) {
Expand All @@ -490,6 +492,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
_old = ((unsigned int)old & 0xff) << shift;
_new = ((unsigned int)new & 0xff) << shift;
mask = ~(0xff << shift);
sacf_flag = enable_sacf_uaccess();
asm_inline volatile(
" spka 0(%[key])\n"
" sacf 256\n"
Expand Down Expand Up @@ -524,6 +527,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
[default_key] "J" (PAGE_DEFAULT_KEY),
[max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
: "memory", "cc");
disable_sacf_uaccess(sacf_flag);
*(unsigned char *)uval = prev >> shift;
if (!count)
rc = -EAGAIN;
Expand All @@ -538,6 +542,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
_old = ((unsigned int)old & 0xffff) << shift;
_new = ((unsigned int)new & 0xffff) << shift;
mask = ~(0xffff << shift);
sacf_flag = enable_sacf_uaccess();
asm_inline volatile(
" spka 0(%[key])\n"
" sacf 256\n"
Expand Down Expand Up @@ -572,6 +577,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
[default_key] "J" (PAGE_DEFAULT_KEY),
[max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
: "memory", "cc");
disable_sacf_uaccess(sacf_flag);
*(unsigned short *)uval = prev >> shift;
if (!count)
rc = -EAGAIN;
Expand All @@ -580,6 +586,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
case 4: {
unsigned int prev = old;

sacf_flag = enable_sacf_uaccess();
asm_inline volatile(
" spka 0(%[key])\n"
" sacf 256\n"
Expand All @@ -595,12 +602,14 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
[key] "a" (key << 4),
[default_key] "J" (PAGE_DEFAULT_KEY)
: "memory", "cc");
disable_sacf_uaccess(sacf_flag);
*(unsigned int *)uval = prev;
return rc;
}
case 8: {
unsigned long prev = old;

sacf_flag = enable_sacf_uaccess();
asm_inline volatile(
" spka 0(%[key])\n"
" sacf 256\n"
Expand All @@ -616,12 +625,14 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
[key] "a" (key << 4),
[default_key] "J" (PAGE_DEFAULT_KEY)
: "memory", "cc");
disable_sacf_uaccess(sacf_flag);
*(unsigned long *)uval = prev;
return rc;
}
case 16: {
__uint128_t prev = old;

sacf_flag = enable_sacf_uaccess();
asm_inline volatile(
" spka 0(%[key])\n"
" sacf 256\n"
Expand All @@ -637,6 +648,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
[key] "a" (key << 4),
[default_key] "J" (PAGE_DEFAULT_KEY)
: "memory", "cc");
disable_sacf_uaccess(sacf_flag);
*(__uint128_t *)uval = prev;
return rc;
}
Expand Down
1 change: 0 additions & 1 deletion arch/s390/kernel/asm-offsets.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ int main(void)
OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2);
OFFSET(__PT_INT_CODE, pt_regs, int_code);
OFFSET(__PT_FLAGS, pt_regs, flags);
OFFSET(__PT_CR1, pt_regs, cr1);
OFFSET(__PT_LAST_BREAK, pt_regs, last_break);
DEFINE(__PT_SIZE, sizeof(struct pt_regs));
BLANK();
Expand Down
20 changes: 4 additions & 16 deletions arch/s390/kernel/entry.S
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ _LPP_OFFSET = __LC_LPP
.macro SIEEXIT sie_control,lowcore
lg %r9,\sie_control # get control block pointer
ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_KERNEL_ASCE(\lowcore) # load primary asce
lctlg %c1,%c1,__LC_USER_ASCE(\lowcore) # load primary asce
lg %r9,__LC_CURRENT(\lowcore)
mvi __TI_sie(%r9),0
larl %r9,sie_exit # skip forward to sie_exit
Expand Down Expand Up @@ -208,7 +208,7 @@ SYM_FUNC_START(__sie64a)
lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
GET_LC %r14
lctlg %c1,%c1,__LC_KERNEL_ASCE(%r14) # load primary asce
lctlg %c1,%c1,__LC_USER_ASCE(%r14) # load primary asce
lg %r14,__LC_CURRENT(%r14)
mvi __TI_sie(%r14),0
SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL)
Expand Down Expand Up @@ -240,7 +240,6 @@ SYM_CODE_START(system_call)
lghi %r14,0
.Lsysc_per:
STBEAR __LC_LAST_BREAK(%r13)
lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
lg %r15,__LC_KERNEL_STACK(%r13)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
stmg %r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
Expand All @@ -261,7 +260,6 @@ SYM_CODE_START(system_call)
lgr %r3,%r14
brasl %r14,__do_syscall
STACKLEAK_ERASE
lctlg %c1,%c1,__LC_USER_ASCE(%r13)
mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
BPON
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
Expand All @@ -278,7 +276,6 @@ SYM_CODE_START(ret_from_fork)
brasl %r14,__ret_from_fork
STACKLEAK_ERASE
GET_LC %r13
lctlg %c1,%c1,__LC_USER_ASCE(%r13)
mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
BPON
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
Expand All @@ -299,10 +296,7 @@ SYM_CODE_START(pgm_check_handler)
lmg %r8,%r9,__LC_PGM_OLD_PSW(%r13)
xgr %r10,%r10
tmhh %r8,0x0001 # coming from user space?
jno .Lpgm_skip_asce
lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
j 3f # -> fault in user space
.Lpgm_skip_asce:
jo 3f # -> fault in user space
#if IS_ENABLED(CONFIG_KVM)
lg %r11,__LC_CURRENT(%r13)
tm __TI_sie(%r11),0xff
Expand Down Expand Up @@ -340,7 +334,6 @@ SYM_CODE_START(pgm_check_handler)
tmhh %r8,0x0001 # returning to user space?
jno .Lpgm_exit_kernel
STACKLEAK_ERASE
lctlg %c1,%c1,__LC_USER_ASCE(%r13)
BPON
stpt __LC_EXIT_TIMER(%r13)
.Lpgm_exit_kernel:
Expand Down Expand Up @@ -384,8 +377,7 @@ SYM_CODE_START(\name)
#endif
0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j 2f
1: lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
lg %r15,__LC_KERNEL_STACK(%r13)
1: lg %r15,__LC_KERNEL_STACK(%r13)
2: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
Expand All @@ -408,7 +400,6 @@ SYM_CODE_START(\name)
tmhh %r8,0x0001 # returning to user ?
jno 2f
STACKLEAK_ERASE
lctlg %c1,%c1,__LC_USER_ASCE(%r13)
BPON
stpt __LC_EXIT_TIMER(%r13)
2: LBEAR __PT_LAST_BREAK(%r11)
Expand Down Expand Up @@ -476,8 +467,6 @@ SYM_CODE_START(mcck_int_handler)
.Lmcck_user:
lg %r15,__LC_MCCK_STACK(%r13)
la %r11,STACK_FRAME_OVERHEAD(%r15)
stctg %c1,%c1,__PT_CR1(%r11)
lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lay %r14,__LC_GPREGS_SAVE_AREA(%r13)
mvc __PT_R0(128,%r11),0(%r14)
Expand All @@ -495,7 +484,6 @@ SYM_CODE_START(mcck_int_handler)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,s390_do_machine_check
lctlg %c1,%c1,__PT_CR1(%r11)
lmg %r0,%r10,__PT_R0(%r11)
mvc __LC_RETURN_MCCK_PSW(16,%r13),__PT_PSW(%r11) # move return PSW
tm __LC_RETURN_MCCK_PSW+1(%r13),0x01 # returning to user ?
Expand Down
2 changes: 1 addition & 1 deletion arch/s390/kernel/smp.c
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
abs_lc = get_abs_lowcore();
memcpy(lc->cregs_save_area, abs_lc->cregs_save_area, sizeof(lc->cregs_save_area));
put_abs_lowcore(abs_lc);
lc->cregs_save_area[1] = lc->kernel_asce;
lc->cregs_save_area[1] = lc->user_asce;
lc->cregs_save_area[7] = lc->user_asce;
save_access_regs((unsigned int *) lc->access_regs_save_area);
arch_spin_lock_setup(cpu);
Expand Down
5 changes: 3 additions & 2 deletions arch/s390/lib/uaccess.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,18 @@
#ifdef CONFIG_DEBUG_ENTRY
void debug_user_asce(int exit)
{
struct lowcore *lc = get_lowcore();
struct ctlreg cr1, cr7;

local_ctl_store(1, &cr1);
local_ctl_store(7, &cr7);
if (cr1.val == get_lowcore()->kernel_asce.val && cr7.val == get_lowcore()->user_asce.val)
if (cr1.val == lc->user_asce.val && cr7.val == lc->user_asce.val)
return;
panic("incorrect ASCE on kernel %s\n"
"cr1: %016lx cr7: %016lx\n"
"kernel: %016lx user: %016lx\n",
exit ? "exit" : "entry", cr1.val, cr7.val,
get_lowcore()->kernel_asce.val, get_lowcore()->user_asce.val);
lc->kernel_asce.val, lc->user_asce.val);
}
#endif /*CONFIG_DEBUG_ENTRY */

Expand Down
Loading

0 comments on commit 8b72f5a

Please sign in to comment.