forked from Minki/linux
fd89a13792
This patch fixes machine crashes which occur when heavily exercising the CPU hotplug codepaths on a 32-bit kernel. These crashes are caused by AMD Erratum 383 and result in a fatal machine check exception. Here's the scenario: 1. On 32-bit, the swapper_pg_dir page table is used as the initial page table for booting a secondary CPU. 2. To make this work, swapper_pg_dir needs a direct mapping of physical memory in it (the low mappings). By adding those low, large page (2M) mappings (PAE kernel), we create the necessary conditions for Erratum 383 to occur. 3. Other CPUs which do not participate in the off- and onlining game may use swapper_pg_dir while the low mappings are present (when leave_mm is called). For all steps below, the CPU referred to is a CPU that is using swapper_pg_dir, and not the CPU which is being onlined. 4. The presence of the low mappings in swapper_pg_dir can result in TLB entries for addresses below __PAGE_OFFSET to be established speculatively. These TLB entries are marked global and large. 5. When the CPU with such TLB entry switches to another page table, this TLB entry remains because it is global. 6. The process then generates an access to an address covered by the above TLB entry but there is a permission mismatch - the TLB entry covers a large global page not accessible to userspace. 7. Due to this permission mismatch a new 4kb, user TLB entry gets established. Further, Erratum 383 provides for a small window of time where both TLB entries are present. This results in an uncorrectable machine check exception signalling a TLB multimatch which panics the machine. There are two ways to fix this issue: 1. Always do a global TLB flush when a new cr3 is loaded and the old page table was swapper_pg_dir. I consider this a hack hard to understand and with performance implications 2. Do not use swapper_pg_dir to boot secondary CPUs like 64-bit does. This patch implements solution 2. It introduces a trampoline_pg_dir which has the same layout as swapper_pg_dir with low_mappings. This page table is used as the initial page table of the booting CPU. Later in the bringup process, it switches to swapper_pg_dir and does a global TLB flush. This fixes the crashes in our test cases. -v2: switch to swapper_pg_dir right after entering start_secondary() so that we are able to access percpu data which might not be mapped in the trampoline page table. Signed-off-by: Joerg Roedel <joerg.roedel@amd.com> LKML-Reference: <20100816123833.GB28147@aftab> Signed-off-by: Borislav Petkov <borislav.petkov@amd.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
98 lines
2.6 KiB
C
98 lines
2.6 KiB
C
#ifndef _ASM_X86_PGTABLE_32_H
|
|
#define _ASM_X86_PGTABLE_32_H
|
|
|
|
#include <asm/pgtable_32_types.h>
|
|
|
|
/*
|
|
* The Linux memory management assumes a three-level page table setup. On
|
|
* the i386, we use that, but "fold" the mid level into the top-level page
|
|
* table, so that we physically have the same two-level page table as the
|
|
* i386 mmu expects.
|
|
*
|
|
* This file contains the functions and defines necessary to modify and use
|
|
* the i386 page table tree.
|
|
*/
|
|
#ifndef __ASSEMBLY__
|
|
#include <asm/processor.h>
|
|
#include <asm/fixmap.h>
|
|
#include <linux/threads.h>
|
|
#include <asm/paravirt.h>
|
|
|
|
#include <linux/bitops.h>
|
|
#include <linux/list.h>
|
|
#include <linux/spinlock.h>
|
|
|
|
struct mm_struct;
|
|
struct vm_area_struct;
|
|
|
|
extern pgd_t swapper_pg_dir[1024];
|
|
extern pgd_t trampoline_pg_dir[1024];
|
|
|
|
static inline void pgtable_cache_init(void) { }
|
|
static inline void check_pgt_cache(void) { }
|
|
void paging_init(void);
|
|
|
|
extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t);
|
|
|
|
|
|
/*
|
|
* Define this if things work differently on an i386 and an i486:
|
|
* it will (on an i486) warn about kernel memory accesses that are
|
|
* done without a 'access_ok(VERIFY_WRITE,..)'
|
|
*/
|
|
#undef TEST_ACCESS_OK
|
|
|
|
#ifdef CONFIG_X86_PAE
|
|
# include <asm/pgtable-3level.h>
|
|
#else
|
|
# include <asm/pgtable-2level.h>
|
|
#endif
|
|
|
|
#if defined(CONFIG_HIGHPTE)
|
|
#define __KM_PTE \
|
|
(in_nmi() ? KM_NMI_PTE : \
|
|
in_irq() ? KM_IRQ_PTE : \
|
|
KM_PTE0)
|
|
#define pte_offset_map(dir, address) \
|
|
((pte_t *)kmap_atomic(pmd_page(*(dir)), __KM_PTE) + \
|
|
pte_index((address)))
|
|
#define pte_offset_map_nested(dir, address) \
|
|
((pte_t *)kmap_atomic(pmd_page(*(dir)), KM_PTE1) + \
|
|
pte_index((address)))
|
|
#define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE)
|
|
#define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1)
|
|
#else
|
|
#define pte_offset_map(dir, address) \
|
|
((pte_t *)page_address(pmd_page(*(dir))) + pte_index((address)))
|
|
#define pte_offset_map_nested(dir, address) pte_offset_map((dir), (address))
|
|
#define pte_unmap(pte) do { } while (0)
|
|
#define pte_unmap_nested(pte) do { } while (0)
|
|
#endif
|
|
|
|
/* Clear a kernel PTE and flush it from the TLB */
|
|
#define kpte_clear_flush(ptep, vaddr) \
|
|
do { \
|
|
pte_clear(&init_mm, (vaddr), (ptep)); \
|
|
__flush_tlb_one((vaddr)); \
|
|
} while (0)
|
|
|
|
/*
|
|
* The i386 doesn't have any external MMU info: the kernel page
|
|
* tables contain all the necessary information.
|
|
*/
|
|
#define update_mmu_cache(vma, address, ptep) do { } while (0)
|
|
|
|
#endif /* !__ASSEMBLY__ */
|
|
|
|
/*
|
|
* kern_addr_valid() is (1) for FLATMEM and (0) for
|
|
* SPARSEMEM and DISCONTIGMEM
|
|
*/
|
|
#ifdef CONFIG_FLATMEM
|
|
#define kern_addr_valid(addr) (1)
|
|
#else
|
|
#define kern_addr_valid(kaddr) (0)
|
|
#endif
|
|
|
|
#endif /* _ASM_X86_PGTABLE_32_H */
|