Skip to content

Commit

Permalink
x86, kexec: x86_64: add kexec jump support for x86_64
Browse files Browse the repository at this point in the history
Impact: New major feature

This patch add kexec jump support for x86_64. More information about
kexec jump can be found in corresponding x86_32 support patch.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
  • Loading branch information
Huang Ying authored and H. Peter Anvin committed Mar 11, 2009
1 parent 5359454 commit fee7b0d
Show file tree
Hide file tree
Showing 5 changed files with 197 additions and 44 deletions.
2 changes: 1 addition & 1 deletion arch/x86/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -1431,7 +1431,7 @@ config CRASH_DUMP
config KEXEC_JUMP
bool "kexec jump (EXPERIMENTAL)"
depends on EXPERIMENTAL
depends on KEXEC && HIBERNATION && X86_32
depends on KEXEC && HIBERNATION
---help---
Jump between original kernel and kexeced kernel and invoke
code in physical address mode via KEXEC
Expand Down
13 changes: 7 additions & 6 deletions arch/x86/include/asm/kexec.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
# define PAGES_NR 4
#else
# define PA_CONTROL_PAGE 0
# define PA_TABLE_PAGE 1
# define PAGES_NR 2
# define VA_CONTROL_PAGE 1
# define PA_TABLE_PAGE 2
# define PA_SWAP_PAGE 3
# define PAGES_NR 4
#endif

#ifdef CONFIG_X86_32
# define KEXEC_CONTROL_CODE_MAX_SIZE 2048
#endif

#ifndef __ASSEMBLY__

Expand Down Expand Up @@ -136,10 +136,11 @@ relocate_kernel(unsigned long indirection_page,
unsigned int has_pae,
unsigned int preserve_context);
#else
NORET_TYPE void
unsigned long
relocate_kernel(unsigned long indirection_page,
unsigned long page_list,
unsigned long start_address) ATTRIB_NORET;
unsigned long start_address,
unsigned int preserve_context);
#endif

#define ARCH_HAS_KIMAGE_ARCH
Expand Down
42 changes: 38 additions & 4 deletions arch/x86/kernel/machine_kexec_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <linux/numa.h>
#include <linux/ftrace.h>
#include <linux/io.h>
#include <linux/suspend.h>

#include <asm/pgtable.h>
#include <asm/tlbflush.h>
Expand Down Expand Up @@ -270,19 +271,43 @@ void machine_kexec(struct kimage *image)
{
unsigned long page_list[PAGES_NR];
void *control_page;
int save_ftrace_enabled;

tracer_disable();
#ifdef CONFIG_KEXEC_JUMP
if (kexec_image->preserve_context)
save_processor_state();
#endif

save_ftrace_enabled = __ftrace_enabled_save();

/* Interrupts aren't acceptable while we reboot */
local_irq_disable();

if (image->preserve_context) {
#ifdef CONFIG_X86_IO_APIC
/*
* We need to put APICs in legacy mode so that we can
* get timer interrupts in second kernel. kexec/kdump
* paths already have calls to disable_IO_APIC() in
* one form or other. kexec jump path also need
* one.
*/
disable_IO_APIC();
#endif
}

control_page = page_address(image->control_code_page) + PAGE_SIZE;
memcpy(control_page, relocate_kernel, PAGE_SIZE);
memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);

page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
page_list[PA_TABLE_PAGE] =
(unsigned long)__pa(page_address(image->control_code_page));

if (image->type == KEXEC_TYPE_DEFAULT)
page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
<< PAGE_SHIFT);

/*
* The segment registers are funny things, they have both a
* visible and an invisible part. Whenever the visible part is
Expand All @@ -302,8 +327,17 @@ void machine_kexec(struct kimage *image)
set_idt(phys_to_virt(0), 0);

/* now call it */
relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
image->start);
image->start = relocate_kernel((unsigned long)image->head,
(unsigned long)page_list,
image->start,
image->preserve_context);

#ifdef CONFIG_KEXEC_JUMP
if (kexec_image->preserve_context)
restore_processor_state();
#endif

__ftrace_enabled_restore(save_ftrace_enabled);
}

void arch_crash_save_vmcoreinfo(void)
Expand Down
177 changes: 144 additions & 33 deletions arch/x86/kernel/relocate_kernel_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,24 @@
#define PTR(x) (x << 3)
#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)

/*
* control_page + KEXEC_CONTROL_CODE_MAX_SIZE
* ~ control_page + PAGE_SIZE are used as data storage and stack for
* jumping back
*/
#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset))

/* Minimal CPU state */
#define RSP DATA(0x0)
#define CR0 DATA(0x8)
#define CR3 DATA(0x10)
#define CR4 DATA(0x18)

/* other data */
#define CP_PA_TABLE_PAGE DATA(0x20)
#define CP_PA_SWAP_PAGE DATA(0x28)
#define CP_PA_BACKUP_PAGES_MAP DATA(0x30)

.text
.align PAGE_SIZE
.code64
Expand All @@ -28,8 +46,27 @@ relocate_kernel:
* %rdi indirection_page
* %rsi page_list
* %rdx start address
* %rcx preserve_context
*/

/* Save the CPU context, used for jumping back */
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushf

movq PTR(VA_CONTROL_PAGE)(%rsi), %r11
movq %rsp, RSP(%r11)
movq %cr0, %rax
movq %rax, CR0(%r11)
movq %cr3, %rax
movq %rax, CR3(%r11)
movq %cr4, %rax
movq %rax, CR4(%r11)

/* zero out flags, and disable interrupts */
pushq $0
popfq
Expand All @@ -41,10 +78,18 @@ relocate_kernel:
movq PTR(PA_CONTROL_PAGE)(%rsi), %r8

/* get physical address of page table now too */
movq PTR(PA_TABLE_PAGE)(%rsi), %rcx
movq PTR(PA_TABLE_PAGE)(%rsi), %r9

/* get physical address of swap page now */
movq PTR(PA_SWAP_PAGE)(%rsi), %r10

/* save some information for jumping back */
movq %r9, CP_PA_TABLE_PAGE(%r11)
movq %r10, CP_PA_SWAP_PAGE(%r11)
movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11)

/* Switch to the identity mapped page tables */
movq %rcx, %cr3
movq %r9, %cr3

/* setup a new stack at the end of the physical control page */
lea PAGE_SIZE(%r8), %rsp
Expand Down Expand Up @@ -83,9 +128,87 @@ identity_mapped:
1:

/* Flush the TLB (needed?) */
movq %rcx, %cr3
movq %r9, %cr3

movq %rcx, %r11
call swap_pages

/*
* To be certain of avoiding problems with self-modifying code
* I need to execute a serializing instruction here.
* So I flush the TLB by reloading %cr3 here, it's handy,
* and not processor dependent.
*/
movq %cr3, %rax
movq %rax, %cr3

/*
* set all of the registers to known values
* leave %rsp alone
*/

testq %r11, %r11
jnz 1f
xorq %rax, %rax
xorq %rbx, %rbx
xorq %rcx, %rcx
xorq %rdx, %rdx
xorq %rsi, %rsi
xorq %rdi, %rdi
xorq %rbp, %rbp
xorq %r8, %r8
xorq %r9, %r9
xorq %r10, %r9
xorq %r11, %r11
xorq %r12, %r12
xorq %r13, %r13
xorq %r14, %r14
xorq %r15, %r15

ret

1:
popq %rdx
leaq PAGE_SIZE(%r10), %rsp
call *%rdx

/* get the re-entry point of the peer system */
movq 0(%rsp), %rbp
call 1f
1:
popq %r8
subq $(1b - relocate_kernel), %r8
movq CP_PA_SWAP_PAGE(%r8), %r10
movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
movq CP_PA_TABLE_PAGE(%r8), %rax
movq %rax, %cr3
lea PAGE_SIZE(%r8), %rsp
call swap_pages
movq $virtual_mapped, %rax
pushq %rax
ret

virtual_mapped:
movq RSP(%r8), %rsp
movq CR4(%r8), %rax
movq %rax, %cr4
movq CR3(%r8), %rax
movq CR0(%r8), %r8
movq %rax, %cr3
movq %r8, %cr0
movq %rbp, %rax

popf
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbp
popq %rbx
ret

/* Do the copies */
swap_pages:
movq %rdi, %rcx /* Put the page_list in %rcx */
xorq %rdi, %rdi
xorq %rsi, %rsi
Expand Down Expand Up @@ -117,39 +240,27 @@ identity_mapped:
movq %rcx, %rsi /* For ever source page do a copy */
andq $0xfffffffffffff000, %rsi

movq %rdi, %rdx
movq %rsi, %rax

movq %r10, %rdi
movq $512, %rcx
rep ; movsq
jmp 0b
3:

/*
* To be certain of avoiding problems with self-modifying code
* I need to execute a serializing instruction here.
* So I flush the TLB by reloading %cr3 here, it's handy,
* and not processor dependent.
*/
movq %cr3, %rax
movq %rax, %cr3

/*
* set all of the registers to known values
* leave %rsp alone
*/
movq %rax, %rdi
movq %rdx, %rsi
movq $512, %rcx
rep ; movsq

xorq %rax, %rax
xorq %rbx, %rbx
xorq %rcx, %rcx
xorq %rdx, %rdx
xorq %rsi, %rsi
xorq %rdi, %rdi
xorq %rbp, %rbp
xorq %r8, %r8
xorq %r9, %r9
xorq %r10, %r9
xorq %r11, %r11
xorq %r12, %r12
xorq %r13, %r13
xorq %r14, %r14
xorq %r15, %r15
movq %rdx, %rdi
movq %r10, %rsi
movq $512, %rcx
rep ; movsq

lea PAGE_SIZE(%rax), %rsi
jmp 0b
3:
ret

.globl kexec_control_code_size
.set kexec_control_code_size, . - relocate_kernel
7 changes: 7 additions & 0 deletions arch/x86/kernel/vmlinux_64.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -275,3 +275,10 @@ ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
ASSERT((per_cpu__irq_stack_union == 0),
"irq_stack_union is not at start of per-cpu area");
#endif

#ifdef CONFIG_KEXEC
#include <asm/kexec.h>

ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
"kexec control code size is too big")
#endif

0 comments on commit fee7b0d

Please sign in to comment.