Skip to content

Commit

Permalink
x86/entry: Use generic syscall entry function
Browse files Browse the repository at this point in the history
Replace the syscall entry work handling with the generic version. Provide
the necessary helper inlines to handle the real architecture specific
parts, e.g. ptrace.

Use a temporary define for idtentry_enter_user which will be cleaned up
seperately.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lkml.kernel.org/r/20200722220520.376213694@linutronix.de
  • Loading branch information
Thomas Gleixner committed Jul 24, 2020
1 parent 0bf019e commit 27d6b4d
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 179 deletions.
1 change: 1 addition & 0 deletions arch/x86/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ config X86
select GENERIC_CPU_AUTOPROBE
select GENERIC_CPU_VULNERABILITIES
select GENERIC_EARLY_IOREMAP
select GENERIC_ENTRY
select GENERIC_FIND_FIRST_BIT
select GENERIC_IOMAP
select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP
Expand Down
181 changes: 8 additions & 173 deletions arch/x86/entry/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
#include <linux/entry-common.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/errno.h>
#include <linux/ptrace.h>
#include <linux/tracehook.h>
#include <linux/audit.h>
#include <linux/seccomp.h>
#include <linux/signal.h>
#include <linux/export.h>
#include <linux/context_tracking.h>
Expand All @@ -42,70 +42,8 @@
#include <asm/syscall.h>
#include <asm/irq_stack.h>

#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>

/* Check that the stack and regs on entry from user mode are sane. */
static noinstr void check_user_regs(struct pt_regs *regs)
{
if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) {
/*
* Make sure that the entry code gave us a sensible EFLAGS
* register. Native because we want to check the actual CPU
* state, not the interrupt state as imagined by Xen.
*/
unsigned long flags = native_save_fl();
WARN_ON_ONCE(flags & (X86_EFLAGS_AC | X86_EFLAGS_DF |
X86_EFLAGS_NT));

/* We think we came from user mode. Make sure pt_regs agrees. */
WARN_ON_ONCE(!user_mode(regs));

/*
* All entries from user mode (except #DF) should be on the
* normal thread stack and should have user pt_regs in the
* correct location.
*/
WARN_ON_ONCE(!on_thread_stack());
WARN_ON_ONCE(regs != task_pt_regs(current));
}
}

#ifdef CONFIG_CONTEXT_TRACKING
/**
* enter_from_user_mode - Establish state when coming from user mode
*
* Syscall entry disables interrupts, but user mode is traced as interrupts
* enabled. Also with NO_HZ_FULL RCU might be idle.
*
* 1) Tell lockdep that interrupts are disabled
* 2) Invoke context tracking if enabled to reactivate RCU
* 3) Trace interrupts off state
*/
static noinstr void enter_from_user_mode(struct pt_regs *regs)
{
enum ctx_state state = ct_state();

check_user_regs(regs);
lockdep_hardirqs_off(CALLER_ADDR0);
user_exit_irqoff();

instrumentation_begin();
CT_WARN_ON(state != CONTEXT_USER);
trace_hardirqs_off_finish();
instrumentation_end();
}
#else
static __always_inline void enter_from_user_mode(struct pt_regs *regs)
{
check_user_regs(regs);
lockdep_hardirqs_off(CALLER_ADDR0);
instrumentation_begin();
trace_hardirqs_off_finish();
instrumentation_end();
}
#endif

/**
* exit_to_user_mode - Fixup state when exiting to user mode
*
Expand All @@ -129,83 +67,6 @@ static __always_inline void exit_to_user_mode(void)
lockdep_hardirqs_on(CALLER_ADDR0);
}

static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
{
#ifdef CONFIG_X86_64
if (arch == AUDIT_ARCH_X86_64) {
audit_syscall_entry(regs->orig_ax, regs->di,
regs->si, regs->dx, regs->r10);
} else
#endif
{
audit_syscall_entry(regs->orig_ax, regs->bx,
regs->cx, regs->dx, regs->si);
}
}

/*
* Returns the syscall nr to run (which should match regs->orig_ax) or -1
* to skip the syscall.
*/
static long syscall_trace_enter(struct pt_regs *regs)
{
u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;

struct thread_info *ti = current_thread_info();
unsigned long ret = 0;
u32 work;

work = READ_ONCE(ti->flags);

if (work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) {
ret = tracehook_report_syscall_entry(regs);
if (ret || (work & _TIF_SYSCALL_EMU))
return -1L;
}

#ifdef CONFIG_SECCOMP
/*
* Do seccomp after ptrace, to catch any tracer changes.
*/
if (work & _TIF_SECCOMP) {
struct seccomp_data sd;

sd.arch = arch;
sd.nr = regs->orig_ax;
sd.instruction_pointer = regs->ip;
#ifdef CONFIG_X86_64
if (arch == AUDIT_ARCH_X86_64) {
sd.args[0] = regs->di;
sd.args[1] = regs->si;
sd.args[2] = regs->dx;
sd.args[3] = regs->r10;
sd.args[4] = regs->r8;
sd.args[5] = regs->r9;
} else
#endif
{
sd.args[0] = regs->bx;
sd.args[1] = regs->cx;
sd.args[2] = regs->dx;
sd.args[3] = regs->si;
sd.args[4] = regs->di;
sd.args[5] = regs->bp;
}

ret = __secure_computing(&sd);
if (ret == -1)
return ret;
}
#endif

if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs->orig_ax);

do_audit_syscall_entry(regs, arch);

return ret ?: regs->orig_ax;
}

#define EXIT_TO_USERMODE_LOOP_FLAGS \
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
_TIF_NEED_RESCHED | _TIF_PATCH_PENDING)
Expand Down Expand Up @@ -366,26 +227,10 @@ __visible noinstr void syscall_return_slowpath(struct pt_regs *regs)
exit_to_user_mode();
}

static noinstr long syscall_enter(struct pt_regs *regs, unsigned long nr)
{
struct thread_info *ti;

enter_from_user_mode(regs);
instrumentation_begin();

local_irq_enable();
ti = current_thread_info();
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
nr = syscall_trace_enter(regs);

instrumentation_end();
return nr;
}

#ifdef CONFIG_X86_64
__visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
{
nr = syscall_enter(regs, nr);
nr = syscall_enter_from_user_mode(regs, nr);

instrumentation_begin();
if (likely(nr < NR_syscalls)) {
Expand All @@ -407,14 +252,16 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs)
{
unsigned int nr = (unsigned int)regs->orig_ax;

if (IS_ENABLED(CONFIG_IA32_EMULATION))
current_thread_info()->status |= TS_COMPAT;
/*
* Subtlety here: if ptrace pokes something larger than 2^32-1 into
* orig_ax, the unsigned int return value truncates it. This may
* or may not be necessary, but it matches the old asm behavior.
*/
return syscall_enter(regs, (unsigned int)regs->orig_ax);
return (unsigned int)syscall_enter_from_user_mode(regs, nr);
}

/*
Expand Down Expand Up @@ -568,7 +415,7 @@ SYSCALL_DEFINE0(ni_syscall)
* solves the problem of kernel mode pagefaults which can schedule, which
* is not possible after invoking rcu_irq_enter() without undoing it.
*
* For user mode entries enter_from_user_mode() must be invoked to
* For user mode entries irqentry_enter_from_user_mode() must be invoked to
* establish the proper context for NOHZ_FULL. Otherwise scheduling on exit
* would not be possible.
*
Expand All @@ -584,7 +431,7 @@ idtentry_state_t noinstr idtentry_enter(struct pt_regs *regs)
};

if (user_mode(regs)) {
enter_from_user_mode(regs);
irqentry_enter_from_user_mode(regs);
return ret;
}

Expand Down Expand Up @@ -615,7 +462,7 @@ idtentry_state_t noinstr idtentry_enter(struct pt_regs *regs)
/*
* If RCU is not watching then the same careful
* sequence vs. lockdep and tracing is required
* as in enter_from_user_mode().
* as in irqentry_enter_from_user_mode().
*/
lockdep_hardirqs_off(CALLER_ADDR0);
rcu_irq_enter();
Expand Down Expand Up @@ -708,18 +555,6 @@ void noinstr idtentry_exit(struct pt_regs *regs, idtentry_state_t state)
}
}

/**
* idtentry_enter_user - Handle state tracking on idtentry from user mode
* @regs: Pointer to pt_regs of interrupted context
*
* Invokes enter_from_user_mode() to establish the proper context for
* NOHZ_FULL. Otherwise scheduling on exit would not be possible.
*/
void noinstr idtentry_enter_user(struct pt_regs *regs)
{
enter_from_user_mode(regs);
}

/**
* idtentry_exit_user - Handle return from exception to user mode
* @regs: Pointer to pt_regs (exception entry regs)
Expand Down
32 changes: 32 additions & 0 deletions arch/x86/include/asm/entry-common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef _ASM_X86_ENTRY_COMMON_H
#define _ASM_X86_ENTRY_COMMON_H

/* Check that the stack and regs on entry from user mode are sane. */
static __always_inline void arch_check_user_regs(struct pt_regs *regs)
{
if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) {
/*
* Make sure that the entry code gave us a sensible EFLAGS
* register. Native because we want to check the actual CPU
* state, not the interrupt state as imagined by Xen.
*/
unsigned long flags = native_save_fl();
WARN_ON_ONCE(flags & (X86_EFLAGS_AC | X86_EFLAGS_DF |
X86_EFLAGS_NT));

/* We think we came from user mode. Make sure pt_regs agrees. */
WARN_ON_ONCE(!user_mode(regs));

/*
* All entries from user mode (except #DF) should be on the
* normal thread stack and should have user pt_regs in the
* correct location.
*/
WARN_ON_ONCE(!on_thread_stack());
WARN_ON_ONCE(regs != task_pt_regs(current));
}
}
#define arch_check_user_regs arch_check_user_regs

#endif
5 changes: 4 additions & 1 deletion arch/x86/include/asm/idtentry.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@
#include <asm/trapnr.h>

#ifndef __ASSEMBLY__
#include <linux/entry-common.h>
#include <linux/hardirq.h>

#include <asm/irq_stack.h>

void idtentry_enter_user(struct pt_regs *regs);
/* Temporary define */
#define idtentry_enter_user irqentry_enter_from_user_mode

void idtentry_exit_user(struct pt_regs *regs);

typedef struct idtentry_state {
Expand Down
5 changes: 0 additions & 5 deletions arch/x86/include/asm/thread_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,11 +133,6 @@ struct thread_info {
#define _TIF_X32 (1 << TIF_X32)
#define _TIF_FSCHECK (1 << TIF_FSCHECK)

/* Work to do before invoking the actual syscall. */
#define _TIF_WORK_SYSCALL_ENTRY \
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \
_TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT)

/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW_BASE \
(_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP | \
Expand Down

0 comments on commit 27d6b4d

Please sign in to comment.