Skip to content

Commit

Permalink
Merge branch 'WIP.x86-pti.entry-for-linus' of git://git.kernel.org/pu…
Browse files Browse the repository at this point in the history
…b/scm/linux/kernel/git/tip/tip

Pull x86 syscall entry code changes for PTI from Ingo Molnar:
 "The main changes here are Andy Lutomirski's changes to switch the
  x86-64 entry code to use the 'per CPU entry trampoline stack'. This,
  besides helping fix KASLR leaks (the pending Page Table Isolation
  (PTI) work), also robustifies the x86 entry code"

* 'WIP.x86-pti.entry-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (26 commits)
  x86/cpufeatures: Make CPU bugs sticky
  x86/paravirt: Provide a way to check for hypervisors
  x86/paravirt: Dont patch flush_tlb_single
  x86/entry/64: Make cpu_entry_area.tss read-only
  x86/entry: Clean up the SYSENTER_stack code
  x86/entry/64: Remove the SYSENTER stack canary
  x86/entry/64: Move the IST stacks into struct cpu_entry_area
  x86/entry/64: Create a per-CPU SYSCALL entry trampoline
  x86/entry/64: Return to userspace from the trampoline stack
  x86/entry/64: Use a per-CPU trampoline stack for IDT entries
  x86/espfix/64: Stop assuming that pt_regs is on the entry stack
  x86/entry/64: Separate cpu_current_top_of_stack from TSS.sp0
  x86/entry: Remap the TSS into the CPU entry area
  x86/entry: Move SYSENTER_stack to the beginning of struct tss_struct
  x86/dumpstack: Handle stack overflow on all stacks
  x86/entry: Fix assumptions that the HW TSS is at the beginning of cpu_tss
  x86/kasan/64: Teach KASAN about the cpu_entry_area
  x86/mm/fixmap: Generalize the GDT fixmap mechanism, introduce struct cpu_entry_area
  x86/entry/gdt: Put per-CPU GDT remaps in ascending order
  x86/dumpstack: Add get_stack_info() support for the SYSENTER stack
  ...
  • Loading branch information
Linus Torvalds committed Dec 18, 2017
2 parents 1291a0d + 6cbd217 commit 64a4809
Show file tree
Hide file tree
Showing 40 changed files with 691 additions and 286 deletions.
6 changes: 4 additions & 2 deletions arch/x86/entry/entry_32.S
Original file line number Diff line number Diff line change
Expand Up @@ -941,7 +941,8 @@ ENTRY(debug)
movl %esp, %eax # pt_regs pointer

/* Are we currently on the SYSENTER stack? */
PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
movl PER_CPU_VAR(cpu_entry_area), %ecx
addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
cmpl $SIZEOF_SYSENTER_stack, %ecx
jb .Ldebug_from_sysenter_stack
Expand Down Expand Up @@ -984,7 +985,8 @@ ENTRY(nmi)
movl %esp, %eax # pt_regs pointer

/* Are we currently on the SYSENTER stack? */
PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
movl PER_CPU_VAR(cpu_entry_area), %ecx
addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
cmpl $SIZEOF_SYSENTER_stack, %ecx
jb .Lnmi_from_sysenter_stack
Expand Down
189 changes: 164 additions & 25 deletions arch/x86/entry/entry_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,64 @@ END(native_usergs_sysret64)
* with them due to bugs in both AMD and Intel CPUs.
*/

.pushsection .entry_trampoline, "ax"

/*
* The code in here gets remapped into cpu_entry_area's trampoline. This means
* that the assembler and linker have the wrong idea as to where this code
* lives (and, in fact, it's mapped more than once, so it's not even at a
* fixed address). So we can't reference any symbols outside the entry
* trampoline and expect it to work.
*
* Instead, we carefully abuse %rip-relative addressing.
* _entry_trampoline(%rip) refers to the start of the remapped) entry
* trampoline. We can thus find cpu_entry_area with this macro:
*/

#define CPU_ENTRY_AREA \
_entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)

/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
#define RSP_SCRATCH CPU_ENTRY_AREA_SYSENTER_stack + \
SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA

ENTRY(entry_SYSCALL_64_trampoline)
UNWIND_HINT_EMPTY
swapgs

/* Stash the user RSP. */
movq %rsp, RSP_SCRATCH

/* Load the top of the task stack into RSP */
movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp

/* Start building the simulated IRET frame. */
pushq $__USER_DS /* pt_regs->ss */
pushq RSP_SCRATCH /* pt_regs->sp */
pushq %r11 /* pt_regs->flags */
pushq $__USER_CS /* pt_regs->cs */
pushq %rcx /* pt_regs->ip */

/*
* x86 lacks a near absolute jump, and we can't jump to the real
* entry text with a relative jump. We could push the target
* address and then use retq, but this destroys the pipeline on
* many CPUs (wasting over 20 cycles on Sandy Bridge). Instead,
* spill RDI and restore it in a second-stage trampoline.
*/
pushq %rdi
movq $entry_SYSCALL_64_stage2, %rdi
jmp *%rdi
END(entry_SYSCALL_64_trampoline)

.popsection

ENTRY(entry_SYSCALL_64_stage2)
UNWIND_HINT_EMPTY
popq %rdi
jmp entry_SYSCALL_64_after_hwframe
END(entry_SYSCALL_64_stage2)

ENTRY(entry_SYSCALL_64)
UNWIND_HINT_EMPTY
/*
Expand Down Expand Up @@ -330,8 +388,24 @@ syscall_return_via_sysret:
popq %rsi /* skip rcx */
popq %rdx
popq %rsi

/*
* Now all regs are restored except RSP and RDI.
* Save old stack pointer and switch to trampoline stack.
*/
movq %rsp, %rdi
movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp

pushq RSP-RDI(%rdi) /* RSP */
pushq (%rdi) /* RDI */

/*
* We are on the trampoline stack. All regs except RDI are live.
* We can do future final exit work right here.
*/

popq %rdi
movq RSP-ORIG_RAX(%rsp), %rsp
popq %rsp
USERGS_SYSRET64
END(entry_SYSCALL_64)

Expand Down Expand Up @@ -466,12 +540,13 @@ END(irq_entries_start)

.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
#ifdef CONFIG_DEBUG_ENTRY
pushfq
testl $X86_EFLAGS_IF, (%rsp)
pushq %rax
SAVE_FLAGS(CLBR_RAX)
testl $X86_EFLAGS_IF, %eax
jz .Lokay_\@
ud2
.Lokay_\@:
addq $8, %rsp
popq %rax
#endif
.endm

Expand Down Expand Up @@ -563,6 +638,13 @@ END(irq_entries_start)
/* 0(%rsp): ~(interrupt number) */
.macro interrupt func
cld

testb $3, CS-ORIG_RAX(%rsp)
jz 1f
SWAPGS
call switch_to_thread_stack
1:

ALLOC_PT_GPREGS_ON_STACK
SAVE_C_REGS
SAVE_EXTRA_REGS
Expand All @@ -572,12 +654,8 @@ END(irq_entries_start)
jz 1f

/*
* IRQ from user mode. Switch to kernel gsbase and inform context
* tracking that we're in kernel mode.
*/
SWAPGS

/*
* IRQ from user mode.
*
* We need to tell lockdep that IRQs are off. We can't do this until
* we fix gsbase, and we should do it before enter_from_user_mode
* (which can take locks). Since TRACE_IRQS_OFF idempotent,
Expand Down Expand Up @@ -630,10 +708,41 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
ud2
1:
#endif
SWAPGS
POP_EXTRA_REGS
POP_C_REGS
addq $8, %rsp /* skip regs->orig_ax */
popq %r11
popq %r10
popq %r9
popq %r8
popq %rax
popq %rcx
popq %rdx
popq %rsi

/*
* The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
* Save old stack pointer and switch to trampoline stack.
*/
movq %rsp, %rdi
movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp

/* Copy the IRET frame to the trampoline stack. */
pushq 6*8(%rdi) /* SS */
pushq 5*8(%rdi) /* RSP */
pushq 4*8(%rdi) /* EFLAGS */
pushq 3*8(%rdi) /* CS */
pushq 2*8(%rdi) /* RIP */

/* Push user RDI on the trampoline stack. */
pushq (%rdi)

/*
* We are on the trampoline stack. All regs except RDI are live.
* We can do future final exit work right here.
*/

/* Restore RDI. */
popq %rdi
SWAPGS
INTERRUPT_RETURN


Expand Down Expand Up @@ -829,7 +938,33 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
/*
* Exception entry points.
*/
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)

/*
* Switch to the thread stack. This is called with the IRET frame and
* orig_ax on the stack. (That is, RDI..R12 are not on the stack and
* space has not been allocated for them.)
*/
ENTRY(switch_to_thread_stack)
UNWIND_HINT_FUNC

pushq %rdi
movq %rsp, %rdi
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI

pushq 7*8(%rdi) /* regs->ss */
pushq 6*8(%rdi) /* regs->rsp */
pushq 5*8(%rdi) /* regs->eflags */
pushq 4*8(%rdi) /* regs->cs */
pushq 3*8(%rdi) /* regs->ip */
pushq 2*8(%rdi) /* regs->orig_ax */
pushq 8(%rdi) /* return address */
UNWIND_HINT_FUNC

movq (%rdi), %rdi
ret
END(switch_to_thread_stack)

.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
ENTRY(\sym)
Expand All @@ -848,11 +983,12 @@ ENTRY(\sym)

ALLOC_PT_GPREGS_ON_STACK

.if \paranoid
.if \paranoid == 1
.if \paranoid < 2
testb $3, CS(%rsp) /* If coming from userspace, switch stacks */
jnz 1f
jnz .Lfrom_usermode_switch_stack_\@
.endif

.if \paranoid
call paranoid_entry
.else
call error_entry
Expand Down Expand Up @@ -894,20 +1030,15 @@ ENTRY(\sym)
jmp error_exit
.endif

.if \paranoid == 1
.if \paranoid < 2
/*
* Paranoid entry from userspace. Switch stacks and treat it
* Entry from userspace. Switch stacks and treat it
* as a normal entry. This means that paranoid handlers
* run in real process context if user_mode(regs).
*/
1:
.Lfrom_usermode_switch_stack_\@:
call error_entry


movq %rsp, %rdi /* pt_regs pointer */
call sync_regs
movq %rax, %rsp /* switch stack */

movq %rsp, %rdi /* pt_regs pointer */

.if \has_error_code
Expand Down Expand Up @@ -1170,6 +1301,14 @@ ENTRY(error_entry)
SWAPGS

.Lerror_entry_from_usermode_after_swapgs:
/* Put us onto the real thread stack. */
popq %r12 /* save return addr in %12 */
movq %rsp, %rdi /* arg0 = pt_regs pointer */
call sync_regs
movq %rax, %rsp /* switch stack */
ENCODE_FRAME_POINTER
pushq %r12

/*
* We need to tell lockdep that IRQs are off. We can't do this until
* we fix gsbase, and we should do it before enter_from_user_mode
Expand Down
7 changes: 5 additions & 2 deletions arch/x86/entry/entry_64_compat.S
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
*/
ENTRY(entry_SYSENTER_compat)
/* Interrupts are off on entry. */
SWAPGS_UNSAFE_STACK
SWAPGS
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp

/*
Expand Down Expand Up @@ -306,8 +306,11 @@ ENTRY(entry_INT80_compat)
*/
movl %eax, %eax

/* Construct struct pt_regs on stack (iret frame is already on stack) */
pushq %rax /* pt_regs->orig_ax */

/* switch to thread stack expects orig_ax to be pushed */
call switch_to_thread_stack

pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
pushq %rdx /* pt_regs->dx */
Expand Down
2 changes: 2 additions & 0 deletions arch/x86/include/asm/cpufeature.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
set_bit(bit, (unsigned long *)cpu_caps_set); \
} while (0)

#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)

#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
/*
* Static testing of CPU features. Used the same as boot_cpu_has().
Expand Down
11 changes: 2 additions & 9 deletions arch/x86/include/asm/desc.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,17 +60,10 @@ static inline struct desc_struct *get_current_gdt_rw(void)
return this_cpu_ptr(&gdt_page)->gdt;
}

/* Get the fixmap index for a specific processor */
static inline unsigned int get_cpu_gdt_ro_index(int cpu)
{
return FIX_GDT_REMAP_BEGIN + cpu;
}

/* Provide the fixmap address of the remapped GDT */
static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
{
unsigned int idx = get_cpu_gdt_ro_index(cpu);
return (struct desc_struct *)__fix_to_virt(idx);
return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt;
}

/* Provide the current read-only GDT */
Expand Down Expand Up @@ -185,7 +178,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr,
#endif
}

static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
{
struct desc_struct *d = get_cpu_gdt_rw(cpu);
tss_desc tss;
Expand Down
Loading

0 comments on commit 64a4809

Please sign in to comment.