Skip to content

Commit

Permalink
x86/decompressor: Avoid the need for a stack in the 32-bit trampoline
Browse files Browse the repository at this point in the history
The 32-bit trampoline no longer uses the stack for anything except
performing a far return back to long mode, and preserving the caller's
stack pointer value. Currently, the trampoline stack is placed in the
same page that carries the trampoline code, which means this page must
be mapped writable and executable, and the stack is therefore executable
as well.

Replace the far return with a far jump, so that the return address can
be pre-calculated and patched into the code before it is called. This
removes the need for a 32-bit addressable stack entirely, and in a later
patch, this will be taken advantage of by removing writable permissions
from (and adding executable permissions to) the trampoline code page
when booting via the EFI stub.

Note that the value of RSP still needs to be preserved explicitly across
the switch into 32-bit mode, as the register may get truncated to 32
bits.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Link: https://lore.kernel.org/r/20230807162720.545787-12-ardb@kernel.org
  • Loading branch information
Ard Biesheuvel authored and Borislav Petkov (AMD) committed Aug 7, 2023
1 parent 918a7a0 commit bd328aa
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 21 deletions.
45 changes: 27 additions & 18 deletions arch/x86/boot/compressed/head_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,7 @@ SYM_FUNC_END(.Lrelocated)
* trampoline memory. A non-zero second argument (ESI) means that the
* trampoline needs to enable 5-level paging.
*/
.section ".rodata", "a", @progbits
SYM_CODE_START(trampoline_32bit_src)
/*
* Preserve live 64-bit registers on the stack: this is necessary
Expand All @@ -550,35 +551,34 @@ SYM_CODE_START(trampoline_32bit_src)
pushq %rbp
pushq %rbx

/* Set up 32-bit addressable stack and push the old RSP value */
leaq (TRAMPOLINE_32BIT_STACK_END - 8)(%rcx), %rbx
movq %rsp, (%rbx)
movq %rbx, %rsp

/* Take the address of the trampoline exit code */
leaq .Lret(%rip), %rbx
/* Preserve top half of RSP in a legacy mode GPR to avoid truncation */
movq %rsp, %rbx
shrq $32, %rbx

/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
pushq $__KERNEL32_CS
leaq 0f(%rip), %rax
pushq %rax
lretq

/*
* The 32-bit code below will do a far jump back to long mode and end
* up here after reconfiguring the number of paging levels. First, the
* stack pointer needs to be restored to its full 64-bit value before
* the callee save register contents can be popped from the stack.
*/
.Lret:
shlq $32, %rbx
orq %rbx, %rsp

/* Restore the preserved 64-bit registers */
movq (%rsp), %rsp
popq %rbx
popq %rbp
popq %r15
retq

.code32
0:
/* Set up data and stack segments */
movl $__KERNEL_DS, %eax
movl %eax, %ds
movl %eax, %ss

/* Disable paging */
movl %cr0, %eax
btrl $X86_CR0_PG_BIT, %eax
Expand Down Expand Up @@ -633,25 +633,34 @@ SYM_CODE_START(trampoline_32bit_src)
1:
movl %eax, %cr4

/* Prepare the stack for far return to Long Mode */
pushl $__KERNEL_CS
pushl %ebx

/* Enable paging again. */
movl %cr0, %eax
btsl $X86_CR0_PG_BIT, %eax
movl %eax, %cr0

lret
/*
* Return to the 64-bit calling code using LJMP rather than LRET, to
* avoid the need for a 32-bit addressable stack. The destination
* address will be adjusted after the template code is copied into a
* 32-bit addressable buffer.
*/
.Ljmp: ljmpl $__KERNEL_CS, $(.Lret - trampoline_32bit_src)
SYM_CODE_END(trampoline_32bit_src)

/*
* This symbol is placed right after trampoline_32bit_src() so its address can
* be used to infer the size of the trampoline code.
*/
SYM_DATA(trampoline_ljmp_imm_offset, .word .Ljmp + 1 - trampoline_32bit_src)

/*
* The trampoline code has a size limit.
* Make sure we fail to compile if the trampoline code grows
* beyond TRAMPOLINE_32BIT_CODE_SIZE bytes.
*/
.org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE

.text
SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
/* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
1:
Expand Down
4 changes: 2 additions & 2 deletions arch/x86/boot/compressed/pgtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@
#define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE
#define TRAMPOLINE_32BIT_CODE_SIZE 0xA0

#define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE

#ifndef __ASSEMBLER__

extern unsigned long *trampoline_32bit;

extern void trampoline_32bit_src(void *trampoline, bool enable_5lvl);

extern const u16 trampoline_ljmp_imm_offset;

#endif /* __ASSEMBLER__ */
#endif /* BOOT_COMPRESSED_PAGETABLE_H */
12 changes: 11 additions & 1 deletion arch/x86/boot/compressed/pgtable_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ static unsigned long find_trampoline_placement(void)
struct paging_config paging_prepare(void *rmode)
{
struct paging_config paging_config = {};
void *tramp_code;

/* Initialize boot_params. Required for cmdline_find_option_bool(). */
boot_params = rmode;
Expand Down Expand Up @@ -148,9 +149,18 @@ struct paging_config paging_prepare(void *rmode)
memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE);

/* Copy trampoline code in place */
memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
tramp_code = memcpy(trampoline_32bit +
TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
&trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE);

/*
* Avoid the need for a stack in the 32-bit trampoline code, by using
* LJMP rather than LRET to return back to long mode. LJMP takes an
* immediate absolute address, which needs to be adjusted based on the
* placement of the trampoline.
*/
*(u32 *)(tramp_code + trampoline_ljmp_imm_offset) += (unsigned long)tramp_code;

/*
* The code below prepares page table in trampoline memory.
*
Expand Down

0 comments on commit bd328aa

Please sign in to comment.