Skip to content

Commit

Permalink
x86/entry/compat: Implement opportunistic SYSRETL for compat syscalls
Browse files Browse the repository at this point in the history
If CS, SS and IP are as expected and FLAGS is compatible with
SYSRETL, then return from fast compat syscalls (both SYSCALL and
SYSENTER) using SYSRETL.

Unlike native 64-bit opportunistic SYSRET, this is not invisible
to user code: RCX and R8-R15 end up in a different state than
shown saved in pt_regs.  To compensate, we only do this when
returning to the vDSO fast syscall return path.  This won't
interfere with syscall restart, as we won't use SYSRETL when
returning to the INT80 restart instruction.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/r/aa15e49db33773eb10b73d73466b6d5466d7856a.1444091585.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Andy Lutomirski authored and Ingo Molnar committed Oct 9, 2015
1 parent a474e67 commit 7841b40
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 5 deletions.
23 changes: 20 additions & 3 deletions arch/x86/entry/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,8 @@ __visible void do_int80_syscall_32(struct pt_regs *regs)
syscall_return_slowpath(regs);
}

__visible void do_fast_syscall_32(struct pt_regs *regs)
/* Returns 0 to return using IRET or 1 to return using SYSRETL. */
__visible long do_fast_syscall_32(struct pt_regs *regs)
{
/*
* Called using the internal vDSO SYSENTER/SYSCALL32 calling
Expand Down Expand Up @@ -395,12 +396,28 @@ __visible void do_fast_syscall_32(struct pt_regs *regs)
enter_from_user_mode();
#endif
prepare_exit_to_usermode(regs);
return;
return 0; /* Keep it simple: use IRET. */
}
local_irq_disable();

/* Now this is just like a normal syscall. */
do_int80_syscall_32(regs);
return;

#ifdef CONFIG_X86_64
/*
* Opportunistic SYSRETL: if possible, try to return using SYSRETL.
* SYSRETL is available on all 64-bit CPUs, so we don't need to
* bother with SYSEXIT.
*
* Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP,
* because the ECX fixup above will ensure that this is essentially
* never the case.
*/
return regs->cs == __USER32_CS && regs->ss == __USER_DS &&
regs->ip == landing_pad &&
(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)) == 0;
#else
return 0;
#endif
}
#endif
42 changes: 40 additions & 2 deletions arch/x86/entry/entry_64_compat.S
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,9 @@ sysenter_flags_fixed:

movq %rsp, %rdi
call do_fast_syscall_32
jmp .Lsyscall_32_done
testl %eax, %eax
jz .Lsyscall_32_done
jmp sysret32_from_system_call

sysenter_fix_flags:
pushq $X86_EFLAGS_FIXED
Expand Down Expand Up @@ -192,7 +194,43 @@ ENTRY(entry_SYSCALL_compat)

movq %rsp, %rdi
call do_fast_syscall_32
jmp .Lsyscall_32_done
testl %eax, %eax
jz .Lsyscall_32_done

/* Opportunistic SYSRET */
sysret32_from_system_call:
TRACE_IRQS_ON /* User mode traces as IRQs on. */
movq RBX(%rsp), %rbx /* pt_regs->rbx */
movq RBP(%rsp), %rbp /* pt_regs->rbp */
movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */
movq RIP(%rsp), %rcx /* pt_regs->ip (in rcx) */
addq $RAX, %rsp /* Skip r8-r15 */
popq %rax /* pt_regs->rax */
popq %rdx /* Skip pt_regs->cx */
popq %rdx /* pt_regs->dx */
popq %rsi /* pt_regs->si */
popq %rdi /* pt_regs->di */

/*
* USERGS_SYSRET32 does:
* GSBASE = user's GS base
* EIP = ECX
* RFLAGS = R11
* CS = __USER32_CS
* SS = __USER_DS
*
* ECX will not match pt_regs->cx, but we're returning to a vDSO
* trampoline that will fix up RCX, so this is okay.
*
* R12-R15 are callee-saved, so they contain whatever was in them
* when the system call started, which is already known to user
* code. We zero R8-R10 to avoid info leaks.
*/
xorq %r8, %r8
xorq %r9, %r9
xorq %r10, %r10
movq RSP-ORIG_RAX(%rsp), %rsp
USERGS_SYSRET32
END(entry_SYSCALL_compat)

/*
Expand Down

0 comments on commit 7841b40

Please sign in to comment.