x86-32, mm: Add an initial page table for core bootstrapping

This patch adds an initial page table with low mappings used exclusively
for booting APs/resuming after ACPI suspend/machine restart. After this,
there's no need to add low mappings to swapper_pg_dir and zap them later
or create own swsusp PGD page solely for ACPI sleep needs - we have
initial_page_table for that.

Signed-off-by: Borislav Petkov <bp@alien8.de>
LKML-Reference: <20101020070526.GA9588@liondog.tnic>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
This commit is contained in:
Borislav Petkov 2010-08-28 15:58:33 +02:00 committed by H. Peter Anvin
parent d25e6b0b32
commit b40827fa72
11 changed files with 58 additions and 121 deletions

View File

@ -26,7 +26,7 @@ struct mm_struct;
struct vm_area_struct; struct vm_area_struct;
extern pgd_t swapper_pg_dir[1024]; extern pgd_t swapper_pg_dir[1024];
extern pgd_t trampoline_pg_dir[1024]; extern pgd_t initial_page_table[1024];
static inline void pgtable_cache_init(void) { } static inline void pgtable_cache_init(void) { }
static inline void check_pgt_cache(void) { } static inline void check_pgt_cache(void) { }

View File

@ -172,6 +172,4 @@ static inline void flush_tlb_kernel_range(unsigned long start,
flush_tlb_all(); flush_tlb_all();
} }
extern void zap_low_mappings(bool early);
#endif /* _ASM_X86_TLBFLUSH_H */ #endif /* _ASM_X86_TLBFLUSH_H */

View File

@ -13,16 +13,13 @@ extern unsigned char *trampoline_base;
extern unsigned long init_rsp; extern unsigned long init_rsp;
extern unsigned long initial_code; extern unsigned long initial_code;
extern unsigned long initial_page_table;
extern unsigned long initial_gs; extern unsigned long initial_gs;
#define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE) #define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE)
extern unsigned long setup_trampoline(void); extern unsigned long setup_trampoline(void);
extern void __init setup_trampoline_page_table(void);
extern void __init reserve_trampoline_memory(void); extern void __init reserve_trampoline_memory(void);
#else #else
static inline void setup_trampoline_page_table(void) {}
static inline void reserve_trampoline_memory(void) {} static inline void reserve_trampoline_memory(void) {}
#endif /* CONFIG_X86_TRAMPOLINE */ #endif /* CONFIG_X86_TRAMPOLINE */

View File

@ -12,6 +12,11 @@
#include <asm/segment.h> #include <asm/segment.h>
#include <asm/desc.h> #include <asm/desc.h>
#ifdef CONFIG_X86_32
#include <asm/pgtable.h>
#include <asm/pgtable_32.h>
#endif
#include "realmode/wakeup.h" #include "realmode/wakeup.h"
#include "sleep.h" #include "sleep.h"
@ -90,7 +95,7 @@ int acpi_save_state_mem(void)
#ifndef CONFIG_64BIT #ifndef CONFIG_64BIT
header->pmode_entry = (u32)&wakeup_pmode_return; header->pmode_entry = (u32)&wakeup_pmode_return;
header->pmode_cr3 = (u32)(swsusp_pg_dir - __PAGE_OFFSET); header->pmode_cr3 = (u32)__pa(&initial_page_table);
saved_magic = 0x12345678; saved_magic = 0x12345678;
#else /* CONFIG_64BIT */ #else /* CONFIG_64BIT */
header->trampoline_segment = setup_trampoline() >> 4; header->trampoline_segment = setup_trampoline() >> 4;

View File

@ -17,6 +17,7 @@
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/io_apic.h> #include <asm/io_apic.h>
#include <asm/bios_ebda.h> #include <asm/bios_ebda.h>
#include <asm/tlbflush.h>
static void __init i386_default_early_setup(void) static void __init i386_default_early_setup(void)
{ {

View File

@ -183,13 +183,12 @@ default_entry:
#ifdef CONFIG_X86_PAE #ifdef CONFIG_X86_PAE
/* /*
* In PAE mode swapper_pg_dir is statically defined to contain enough * In PAE mode initial_page_table is statically defined to contain
* entries to cover the VMSPLIT option (that is the top 1, 2 or 3 * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3
* entries). The identity mapping is handled by pointing two PGD * entries). The identity mapping is handled by pointing two PGD entries
* entries to the first kernel PMD. * to the first kernel PMD.
* *
* Note the upper half of each PMD or PTE are always zero at * Note the upper half of each PMD or PTE are always zero at this stage.
* this stage.
*/ */
#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ #define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
@ -197,7 +196,7 @@ default_entry:
xorl %ebx,%ebx /* %ebx is kept at zero */ xorl %ebx,%ebx /* %ebx is kept at zero */
movl $pa(__brk_base), %edi movl $pa(__brk_base), %edi
movl $pa(swapper_pg_pmd), %edx movl $pa(initial_pg_pmd), %edx
movl $PTE_IDENT_ATTR, %eax movl $PTE_IDENT_ATTR, %eax
10: 10:
leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */
@ -226,14 +225,14 @@ default_entry:
movl %eax, pa(max_pfn_mapped) movl %eax, pa(max_pfn_mapped)
/* Do early initialization of the fixmap area */ /* Do early initialization of the fixmap area */
movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8) movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8)
#else /* Not PAE */ #else /* Not PAE */
page_pde_offset = (__PAGE_OFFSET >> 20); page_pde_offset = (__PAGE_OFFSET >> 20);
movl $pa(__brk_base), %edi movl $pa(__brk_base), %edi
movl $pa(swapper_pg_dir), %edx movl $pa(initial_page_table), %edx
movl $PTE_IDENT_ATTR, %eax movl $PTE_IDENT_ATTR, %eax
10: 10:
leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */
@ -257,8 +256,8 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
movl %eax, pa(max_pfn_mapped) movl %eax, pa(max_pfn_mapped)
/* Do early initialization of the fixmap area */ /* Do early initialization of the fixmap area */
movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
movl %eax,pa(swapper_pg_dir+0xffc) movl %eax,pa(initial_page_table+0xffc)
#endif #endif
jmp 3f jmp 3f
/* /*
@ -334,7 +333,7 @@ ENTRY(startup_32_smp)
/* /*
* Enable paging * Enable paging
*/ */
movl pa(initial_page_table), %eax movl $pa(initial_page_table), %eax
movl %eax,%cr3 /* set the page table pointer.. */ movl %eax,%cr3 /* set the page table pointer.. */
movl %cr0,%eax movl %cr0,%eax
orl $X86_CR0_PG,%eax orl $X86_CR0_PG,%eax
@ -614,8 +613,6 @@ ignore_int:
.align 4 .align 4
ENTRY(initial_code) ENTRY(initial_code)
.long i386_start_kernel .long i386_start_kernel
ENTRY(initial_page_table)
.long pa(swapper_pg_dir)
/* /*
* BSS section * BSS section
@ -623,20 +620,18 @@ ENTRY(initial_page_table)
__PAGE_ALIGNED_BSS __PAGE_ALIGNED_BSS
.align PAGE_SIZE_asm .align PAGE_SIZE_asm
#ifdef CONFIG_X86_PAE #ifdef CONFIG_X86_PAE
swapper_pg_pmd: initial_pg_pmd:
.fill 1024*KPMDS,4,0 .fill 1024*KPMDS,4,0
#else #else
ENTRY(swapper_pg_dir) ENTRY(initial_page_table)
.fill 1024,4,0 .fill 1024,4,0
#endif #endif
swapper_pg_fixmap: initial_pg_fixmap:
.fill 1024,4,0 .fill 1024,4,0
#ifdef CONFIG_X86_TRAMPOLINE
ENTRY(trampoline_pg_dir)
.fill 1024,4,0
#endif
ENTRY(empty_zero_page) ENTRY(empty_zero_page)
.fill 4096,1,0 .fill 4096,1,0
ENTRY(swapper_pg_dir)
.fill 1024,4,0
/* /*
* This starts the data section. * This starts the data section.
@ -645,20 +640,20 @@ ENTRY(empty_zero_page)
__PAGE_ALIGNED_DATA __PAGE_ALIGNED_DATA
/* Page-aligned for the benefit of paravirt? */ /* Page-aligned for the benefit of paravirt? */
.align PAGE_SIZE_asm .align PAGE_SIZE_asm
ENTRY(swapper_pg_dir) ENTRY(initial_page_table)
.long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */
# if KPMDS == 3 # if KPMDS == 3
.long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0
.long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0 .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0
.long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x2000),0 .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x2000),0
# elif KPMDS == 2 # elif KPMDS == 2
.long 0,0 .long 0,0
.long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0
.long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0 .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0
# elif KPMDS == 1 # elif KPMDS == 1
.long 0,0 .long 0,0
.long 0,0 .long 0,0
.long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0
# else # else
# error "Kernel PMDs should be 1, 2 or 3" # error "Kernel PMDs should be 1, 2 or 3"
# endif # endif

View File

@ -371,16 +371,10 @@ void machine_real_restart(const unsigned char *code, int length)
CMOS_WRITE(0x00, 0x8f); CMOS_WRITE(0x00, 0x8f);
spin_unlock(&rtc_lock); spin_unlock(&rtc_lock);
/* Remap the kernel at virtual address zero, as well as offset zero
from the kernel segment. This assumes the kernel segment starts at
virtual address PAGE_OFFSET. */
memcpy(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS);
/* /*
* Use `swapper_pg_dir' as our page directory. * Switch back to the initial page table.
*/ */
load_cr3(swapper_pg_dir); load_cr3(initial_page_table);
/* Write 0x1234 to absolute memory location 0x472. The BIOS reads /* Write 0x1234 to absolute memory location 0x472. The BIOS reads
this on booting to tell it to "Bypass memory test (also warm this on booting to tell it to "Bypass memory test (also warm

View File

@ -728,6 +728,17 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
visws_early_detect(); visws_early_detect();
/*
* copy kernel address range established so far and switch
* to the proper swapper page table
*/
clone_pgd_range(swapper_pg_dir + KERNEL_PGD_BOUNDARY,
initial_page_table + KERNEL_PGD_BOUNDARY,
KERNEL_PGD_PTRS);
load_cr3(swapper_pg_dir);
__flush_tlb_all();
#else #else
printk(KERN_INFO "Command line: %s\n", boot_command_line); printk(KERN_INFO "Command line: %s\n", boot_command_line);
#endif #endif
@ -1009,7 +1020,12 @@ void __init setup_arch(char **cmdline_p)
paging_init(); paging_init();
x86_init.paging.pagetable_setup_done(swapper_pg_dir); x86_init.paging.pagetable_setup_done(swapper_pg_dir);
setup_trampoline_page_table(); #ifdef CONFIG_X86_32
/* sync back kernel address range */
clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
KERNEL_PGD_PTRS);
#endif
tboot_probe(); tboot_probe();

View File

@ -298,22 +298,16 @@ notrace static void __cpuinit start_secondary(void *unused)
* fragile that we want to limit the things done here to the * fragile that we want to limit the things done here to the
* most necessary things. * most necessary things.
*/ */
#ifdef CONFIG_X86_32
/*
* Switch away from the trampoline page-table
*
* Do this before cpu_init() because it needs to access per-cpu
* data which may not be mapped in the trampoline page-table.
*/
load_cr3(swapper_pg_dir);
__flush_tlb_all();
#endif
cpu_init(); cpu_init();
preempt_disable(); preempt_disable();
smp_callin(); smp_callin();
#ifdef CONFIG_X86_32
/* switch away from the initial page table */
load_cr3(swapper_pg_dir);
__flush_tlb_all();
#endif
/* otherwise gcc will move up smp_processor_id before the cpu_init */ /* otherwise gcc will move up smp_processor_id before the cpu_init */
barrier(); barrier();
/* /*
@ -772,7 +766,6 @@ do_rest:
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */ /* Stack for startup_32 can be just as for start_secondary onwards */
irq_ctx_init(cpu); irq_ctx_init(cpu);
initial_page_table = __pa(&trampoline_pg_dir);
#else #else
clear_tsk_thread_flag(c_idle.idle, TIF_FORK); clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
initial_gs = per_cpu_offset(cpu); initial_gs = per_cpu_offset(cpu);
@ -921,7 +914,6 @@ int __cpuinit native_cpu_up(unsigned int cpu)
per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
err = do_boot_cpu(apicid, cpu); err = do_boot_cpu(apicid, cpu);
if (err) { if (err) {
pr_debug("do_boot_cpu failed %d\n", err); pr_debug("do_boot_cpu failed %d\n", err);
return -EIO; return -EIO;

View File

@ -38,19 +38,3 @@ unsigned long __trampinit setup_trampoline(void)
memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE);
return virt_to_phys(trampoline_base); return virt_to_phys(trampoline_base);
} }
void __init setup_trampoline_page_table(void)
{
#ifdef CONFIG_X86_32
/* Copy kernel address range */
clone_pgd_range(trampoline_pg_dir + KERNEL_PGD_BOUNDARY,
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
KERNEL_PGD_PTRS);
/* Initialize low mappings */
clone_pgd_range(trampoline_pg_dir,
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
min_t(unsigned long, KERNEL_PGD_PTRS,
KERNEL_PGD_BOUNDARY));
#endif
}

View File

@ -548,48 +548,6 @@ static void __init pagetable_init(void)
permanent_kmaps_init(pgd_base); permanent_kmaps_init(pgd_base);
} }
#ifdef CONFIG_ACPI_SLEEP
/*
* ACPI suspend needs this for resume, because things like the intel-agp
* driver might have split up a kernel 4MB mapping.
*/
char swsusp_pg_dir[PAGE_SIZE]
__attribute__ ((aligned(PAGE_SIZE)));
static inline void save_pg_dir(void)
{
copy_page(swsusp_pg_dir, swapper_pg_dir);
}
#else /* !CONFIG_ACPI_SLEEP */
static inline void save_pg_dir(void)
{
}
#endif /* !CONFIG_ACPI_SLEEP */
void zap_low_mappings(bool early)
{
int i;
/*
* Zap initial low-memory mappings.
*
* Note that "pgd_clear()" doesn't do it for
* us, because pgd_clear() is a no-op on i386.
*/
for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) {
#ifdef CONFIG_X86_PAE
set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
#else
set_pgd(swapper_pg_dir+i, __pgd(0));
#endif
}
if (early)
__flush_tlb();
else
flush_tlb_all();
}
pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP);
EXPORT_SYMBOL_GPL(__supported_pte_mask); EXPORT_SYMBOL_GPL(__supported_pte_mask);
@ -958,9 +916,6 @@ void __init mem_init(void)
if (boot_cpu_data.wp_works_ok < 0) if (boot_cpu_data.wp_works_ok < 0)
test_wp_bit(); test_wp_bit();
save_pg_dir();
zap_low_mappings(true);
} }
#ifdef CONFIG_MEMORY_HOTPLUG #ifdef CONFIG_MEMORY_HOTPLUG