Skip to content

Commit

Permalink
x86-32, mm: Add an initial page table for core bootstrapping
Browse files Browse the repository at this point in the history
This patch adds an initial page table with low mappings used exclusively
for booting APs/resuming after ACPI suspend/machine restart. After this,
there's no need to add low mappings to swapper_pg_dir and zap them later
or create own swsusp PGD page solely for ACPI sleep needs - we have
initial_page_table for that.

Signed-off-by: Borislav Petkov <bp@alien8.de>
LKML-Reference: <20101020070526.GA9588@liondog.tnic>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
  • Loading branch information
Borislav Petkov authored and H. Peter Anvin committed Oct 20, 2010
1 parent d25e6b0 commit b40827f
Show file tree
Hide file tree
Showing 11 changed files with 56 additions and 119 deletions.
2 changes: 1 addition & 1 deletion arch/x86/include/asm/pgtable_32.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ struct mm_struct;
struct vm_area_struct;

extern pgd_t swapper_pg_dir[1024];
extern pgd_t trampoline_pg_dir[1024];
extern pgd_t initial_page_table[1024];

static inline void pgtable_cache_init(void) { }
static inline void check_pgt_cache(void) { }
Expand Down
2 changes: 0 additions & 2 deletions arch/x86/include/asm/tlbflush.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,4 @@ static inline void flush_tlb_kernel_range(unsigned long start,
flush_tlb_all();
}

extern void zap_low_mappings(bool early);

#endif /* _ASM_X86_TLBFLUSH_H */
3 changes: 0 additions & 3 deletions arch/x86/include/asm/trampoline.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,13 @@ extern unsigned char *trampoline_base;

extern unsigned long init_rsp;
extern unsigned long initial_code;
extern unsigned long initial_page_table;
extern unsigned long initial_gs;

#define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE)

extern unsigned long setup_trampoline(void);
extern void __init setup_trampoline_page_table(void);
extern void __init reserve_trampoline_memory(void);
#else
static inline void setup_trampoline_page_table(void) {}
static inline void reserve_trampoline_memory(void) {}
#endif /* CONFIG_X86_TRAMPOLINE */

Expand Down
7 changes: 6 additions & 1 deletion arch/x86/kernel/acpi/sleep.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@
#include <asm/segment.h>
#include <asm/desc.h>

#ifdef CONFIG_X86_32
#include <asm/pgtable.h>
#include <asm/pgtable_32.h>
#endif

#include "realmode/wakeup.h"
#include "sleep.h"

Expand Down Expand Up @@ -90,7 +95,7 @@ int acpi_save_state_mem(void)

#ifndef CONFIG_64BIT
header->pmode_entry = (u32)&wakeup_pmode_return;
header->pmode_cr3 = (u32)(swsusp_pg_dir - __PAGE_OFFSET);
header->pmode_cr3 = (u32)__pa(&initial_page_table);
saved_magic = 0x12345678;
#else /* CONFIG_64BIT */
header->trampoline_segment = setup_trampoline() >> 4;
Expand Down
1 change: 1 addition & 0 deletions arch/x86/kernel/head32.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <asm/apic.h>
#include <asm/io_apic.h>
#include <asm/bios_ebda.h>
#include <asm/tlbflush.h>

static void __init i386_default_early_setup(void)
{
Expand Down
55 changes: 25 additions & 30 deletions arch/x86/kernel/head_32.S
Original file line number Diff line number Diff line change
Expand Up @@ -183,21 +183,20 @@ default_entry:
#ifdef CONFIG_X86_PAE

/*
* In PAE mode swapper_pg_dir is statically defined to contain enough
* entries to cover the VMSPLIT option (that is the top 1, 2 or 3
* entries). The identity mapping is handled by pointing two PGD
* entries to the first kernel PMD.
* In PAE mode initial_page_table is statically defined to contain
* enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3
* entries). The identity mapping is handled by pointing two PGD entries
* to the first kernel PMD.
*
* Note the upper half of each PMD or PTE are always zero at
* this stage.
* Note the upper half of each PMD or PTE are always zero at this stage.
*/

#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */

xorl %ebx,%ebx /* %ebx is kept at zero */

movl $pa(__brk_base), %edi
movl $pa(swapper_pg_pmd), %edx
movl $pa(initial_pg_pmd), %edx
movl $PTE_IDENT_ATTR, %eax
10:
leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */
Expand Down Expand Up @@ -226,14 +225,14 @@ default_entry:
movl %eax, pa(max_pfn_mapped)

/* Do early initialization of the fixmap area */
movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax
movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8)
movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8)
#else /* Not PAE */

page_pde_offset = (__PAGE_OFFSET >> 20);

movl $pa(__brk_base), %edi
movl $pa(swapper_pg_dir), %edx
movl $pa(initial_page_table), %edx
movl $PTE_IDENT_ATTR, %eax
10:
leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */
Expand All @@ -257,8 +256,8 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
movl %eax, pa(max_pfn_mapped)

/* Do early initialization of the fixmap area */
movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax
movl %eax,pa(swapper_pg_dir+0xffc)
movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
movl %eax,pa(initial_page_table+0xffc)
#endif
jmp 3f
/*
Expand Down Expand Up @@ -334,7 +333,7 @@ ENTRY(startup_32_smp)
/*
* Enable paging
*/
movl pa(initial_page_table), %eax
movl $pa(initial_page_table), %eax
movl %eax,%cr3 /* set the page table pointer.. */
movl %cr0,%eax
orl $X86_CR0_PG,%eax
Expand Down Expand Up @@ -614,29 +613,25 @@ ignore_int:
.align 4
ENTRY(initial_code)
.long i386_start_kernel
ENTRY(initial_page_table)
.long pa(swapper_pg_dir)

/*
* BSS section
*/
__PAGE_ALIGNED_BSS
.align PAGE_SIZE_asm
#ifdef CONFIG_X86_PAE
swapper_pg_pmd:
initial_pg_pmd:
.fill 1024*KPMDS,4,0
#else
ENTRY(swapper_pg_dir)
ENTRY(initial_page_table)
.fill 1024,4,0
#endif
swapper_pg_fixmap:
initial_pg_fixmap:
.fill 1024,4,0
#ifdef CONFIG_X86_TRAMPOLINE
ENTRY(trampoline_pg_dir)
.fill 1024,4,0
#endif
ENTRY(empty_zero_page)
.fill 4096,1,0
ENTRY(swapper_pg_dir)
.fill 1024,4,0

/*
* This starts the data section.
Expand All @@ -645,20 +640,20 @@ ENTRY(empty_zero_page)
__PAGE_ALIGNED_DATA
/* Page-aligned for the benefit of paravirt? */
.align PAGE_SIZE_asm
ENTRY(swapper_pg_dir)
.long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */
ENTRY(initial_page_table)
.long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */
# if KPMDS == 3
.long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0
.long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0
.long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x2000),0
.long pa(initial_pg_pmd+PGD_IDENT_ATTR),0
.long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0
.long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x2000),0
# elif KPMDS == 2
.long 0,0
.long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0
.long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0
.long pa(initial_pg_pmd+PGD_IDENT_ATTR),0
.long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0
# elif KPMDS == 1
.long 0,0
.long 0,0
.long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0
.long pa(initial_pg_pmd+PGD_IDENT_ATTR),0
# else
# error "Kernel PMDs should be 1, 2 or 3"
# endif
Expand Down
10 changes: 2 additions & 8 deletions arch/x86/kernel/reboot.c
Original file line number Diff line number Diff line change
Expand Up @@ -371,16 +371,10 @@ void machine_real_restart(const unsigned char *code, int length)
CMOS_WRITE(0x00, 0x8f);
spin_unlock(&rtc_lock);

/* Remap the kernel at virtual address zero, as well as offset zero
from the kernel segment. This assumes the kernel segment starts at
virtual address PAGE_OFFSET. */
memcpy(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS);

/*
* Use `swapper_pg_dir' as our page directory.
* Switch back to the initial page table.
*/
load_cr3(swapper_pg_dir);
load_cr3(initial_page_table);

/* Write 0x1234 to absolute memory location 0x472. The BIOS reads
this on booting to tell it to "Bypass memory test (also warm
Expand Down
18 changes: 17 additions & 1 deletion arch/x86/kernel/setup.c
Original file line number Diff line number Diff line change
Expand Up @@ -728,6 +728,17 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_X86_32
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
visws_early_detect();

/*
* copy kernel address range established so far and switch
* to the proper swapper page table
*/
clone_pgd_range(swapper_pg_dir + KERNEL_PGD_BOUNDARY,
initial_page_table + KERNEL_PGD_BOUNDARY,
KERNEL_PGD_PTRS);

load_cr3(swapper_pg_dir);
__flush_tlb_all();
#else
printk(KERN_INFO "Command line: %s\n", boot_command_line);
#endif
Expand Down Expand Up @@ -1009,7 +1020,12 @@ void __init setup_arch(char **cmdline_p)
paging_init();
x86_init.paging.pagetable_setup_done(swapper_pg_dir);

setup_trampoline_page_table();
#ifdef CONFIG_X86_32
/* sync back kernel address range */
clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
KERNEL_PGD_PTRS);
#endif

tboot_probe();

Expand Down
16 changes: 4 additions & 12 deletions arch/x86/kernel/smpboot.c
Original file line number Diff line number Diff line change
Expand Up @@ -298,22 +298,16 @@ notrace static void __cpuinit start_secondary(void *unused)
* fragile that we want to limit the things done here to the
* most necessary things.
*/
cpu_init();
preempt_disable();
smp_callin();

#ifdef CONFIG_X86_32
/*
* Switch away from the trampoline page-table
*
* Do this before cpu_init() because it needs to access per-cpu
* data which may not be mapped in the trampoline page-table.
*/
/* switch away from the initial page table */
load_cr3(swapper_pg_dir);
__flush_tlb_all();
#endif

cpu_init();
preempt_disable();
smp_callin();

/* otherwise gcc will move up smp_processor_id before the cpu_init */
barrier();
/*
Expand Down Expand Up @@ -772,7 +766,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
irq_ctx_init(cpu);
initial_page_table = __pa(&trampoline_pg_dir);
#else
clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
initial_gs = per_cpu_offset(cpu);
Expand Down Expand Up @@ -921,7 +914,6 @@ int __cpuinit native_cpu_up(unsigned int cpu)
per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;

err = do_boot_cpu(apicid, cpu);

if (err) {
pr_debug("do_boot_cpu failed %d\n", err);
return -EIO;
Expand Down
16 changes: 0 additions & 16 deletions arch/x86/kernel/trampoline.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,19 +38,3 @@ unsigned long __trampinit setup_trampoline(void)
memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE);
return virt_to_phys(trampoline_base);
}

void __init setup_trampoline_page_table(void)
{
#ifdef CONFIG_X86_32
/* Copy kernel address range */
clone_pgd_range(trampoline_pg_dir + KERNEL_PGD_BOUNDARY,
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
KERNEL_PGD_PTRS);

/* Initialize low mappings */
clone_pgd_range(trampoline_pg_dir,
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
min_t(unsigned long, KERNEL_PGD_PTRS,
KERNEL_PGD_BOUNDARY));
#endif
}
45 changes: 0 additions & 45 deletions arch/x86/mm/init_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -548,48 +548,6 @@ static void __init pagetable_init(void)
permanent_kmaps_init(pgd_base);
}

#ifdef CONFIG_ACPI_SLEEP
/*
* ACPI suspend needs this for resume, because things like the intel-agp
* driver might have split up a kernel 4MB mapping.
*/
char swsusp_pg_dir[PAGE_SIZE]
__attribute__ ((aligned(PAGE_SIZE)));

static inline void save_pg_dir(void)
{
copy_page(swsusp_pg_dir, swapper_pg_dir);
}
#else /* !CONFIG_ACPI_SLEEP */
static inline void save_pg_dir(void)
{
}
#endif /* !CONFIG_ACPI_SLEEP */

void zap_low_mappings(bool early)
{
int i;

/*
* Zap initial low-memory mappings.
*
* Note that "pgd_clear()" doesn't do it for
* us, because pgd_clear() is a no-op on i386.
*/
for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) {
#ifdef CONFIG_X86_PAE
set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
#else
set_pgd(swapper_pg_dir+i, __pgd(0));
#endif
}

if (early)
__flush_tlb();
else
flush_tlb_all();
}

pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP);
EXPORT_SYMBOL_GPL(__supported_pte_mask);

Expand Down Expand Up @@ -958,9 +916,6 @@ void __init mem_init(void)

if (boot_cpu_data.wp_works_ok < 0)
test_wp_bit();

save_pg_dir();
zap_low_mappings(true);
}

#ifdef CONFIG_MEMORY_HOTPLUG
Expand Down

0 comments on commit b40827f

Please sign in to comment.