forked from Minki/linux
Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 PTI preparatory patches from Thomas Gleixner: "Todays Advent calendar window contains twentyfour easy to digest patches. The original plan was to have twenty three matching the date, but a late fixup made that moot. - Move the cpu_entry_area mapping out of the fixmap into a separate address space. That's necessary because the fixmap becomes too big with NRCPUS=8192 and this caused already subtle and hard to diagnose failures. The top most patch is fresh from today and cures a brain slip of that tall grumpy german greybeard, who ignored the intricacies of 32bit wraparounds. - Limit the number of CPUs on 32bit to 64. That's insane big already, but at least it's small enough to prevent address space issues with the cpu_entry_area map, which have been observed and debugged with the fixmap code - A few TLB flush fixes in various places plus documentation which of the TLB functions should be used for what. - Rename the SYSENTER stack to CPU_ENTRY_AREA stack as it is used for more than sysenter now and keeping the name makes backtraces confusing. - Prevent LDT inheritance on exec() by moving it to arch_dup_mmap(), which is only invoked on fork(). - Make vysycall more robust. - A few fixes and cleanups of the debug_pagetables code. Check PAGE_PRESENT instead of checking the PTE for 0 and a cleanup of the C89 initialization of the address hint array which already was out of sync with the index enums. - Move the ESPFIX init to a different place to prepare for PTI. - Several code moves with no functional change to make PTI integration simpler and header files less convoluted. - Documentation fixes and clarifications" * 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits) x86/cpu_entry_area: Prevent wraparound in setup_cpu_entry_area_ptes() on 32bit init: Invoke init_espfix_bsp() from mm_init() x86/cpu_entry_area: Move it out of the fixmap x86/cpu_entry_area: Move it to a separate unit x86/mm: Create asm/invpcid.h x86/mm: Put MMU to hardware ASID translation in one place x86/mm: Remove hard-coded ASID limit checks x86/mm: Move the CR3 construction functions to tlbflush.h x86/mm: Add comments to clarify which TLB-flush functions are supposed to flush what x86/mm: Remove superfluous barriers x86/mm: Use __flush_tlb_one() for kernel memory x86/microcode: Dont abuse the TLB-flush interface x86/uv: Use the right TLB-flush API x86/entry: Rename SYSENTER_stack to CPU_ENTRY_AREA_entry_stack x86/doc: Remove obvious weirdnesses from the x86 MM layout documentation x86/mm/64: Improve the memory map documentation x86/ldt: Prevent LDT inheritance on exec x86/ldt: Rework locking arch, mm: Allow arch_dup_mmap() to fail x86/vsyscall/64: Warn and fail vsyscall emulation in NATIVE mode ...
This commit is contained in:
commit
caf9a82657
@ -1,6 +1,4 @@
|
||||
|
||||
<previous description obsolete, deleted>
|
||||
|
||||
Virtual memory map with 4 level page tables:
|
||||
|
||||
0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
|
||||
@ -14,13 +12,15 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
|
||||
... unused hole ...
|
||||
ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
|
||||
... unused hole ...
|
||||
fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
|
||||
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
|
||||
... unused hole ...
|
||||
ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
|
||||
... unused hole ...
|
||||
ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0
|
||||
ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable)
|
||||
ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
|
||||
ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space (variable)
|
||||
[fixmap start] - ffffffffff5fffff kernel-internal fixmap range
|
||||
ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
|
||||
ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
|
||||
|
||||
Virtual memory map with 5 level page tables:
|
||||
@ -36,19 +36,22 @@ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
|
||||
... unused hole ...
|
||||
ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
|
||||
... unused hole ...
|
||||
fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
|
||||
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
|
||||
... unused hole ...
|
||||
ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
|
||||
... unused hole ...
|
||||
ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0
|
||||
ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space
|
||||
ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
|
||||
ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space
|
||||
[fixmap start] - ffffffffff5fffff kernel-internal fixmap range
|
||||
ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
|
||||
ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
|
||||
|
||||
Architecture defines a 64-bit virtual address. Implementations can support
|
||||
less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
|
||||
through to the most-significant implemented bit are set to either all ones
|
||||
or all zero. This causes hole between user space and kernel addresses.
|
||||
through to the most-significant implemented bit are sign extended.
|
||||
This causes hole between user space and kernel addresses if you interpret them
|
||||
as unsigned.
|
||||
|
||||
The direct mapping covers all memory in the system up to the highest
|
||||
memory address (this means in some cases it can also include PCI memory
|
||||
@ -58,9 +61,6 @@ vmalloc space is lazily synchronized into the different PML4/PML5 pages of
|
||||
the processes using the page fault handler, with init_top_pgt as
|
||||
reference.
|
||||
|
||||
Current X86-64 implementations support up to 46 bits of address space (64 TB),
|
||||
which is our current limit. This expands into MBZ space in the page tables.
|
||||
|
||||
We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual
|
||||
memory window (this size is arbitrary, it can be raised later if needed).
|
||||
The mappings are not part of any other kernel PGD and are only available
|
||||
@ -72,5 +72,3 @@ following fixmap section.
|
||||
Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
|
||||
physical memory, vmalloc/ioremap space and virtual memory map are randomized.
|
||||
Their order is preserved but their base will be offset early at boot time.
|
||||
|
||||
-Andi Kleen, Jul 2004
|
||||
|
@ -160,9 +160,10 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void arch_dup_mmap(struct mm_struct *oldmm,
|
||||
struct mm_struct *mm)
|
||||
static inline int arch_dup_mmap(struct mm_struct *oldmm,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifndef CONFIG_PPC_BOOK3S_64
|
||||
|
@ -15,9 +15,10 @@ extern void uml_setup_stubs(struct mm_struct *mm);
|
||||
/*
|
||||
* Needed since we do not use the asm-generic/mm_hooks.h:
|
||||
*/
|
||||
static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
|
||||
static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
|
||||
{
|
||||
uml_setup_stubs(mm);
|
||||
return 0;
|
||||
}
|
||||
extern void arch_exit_mmap(struct mm_struct *mm);
|
||||
static inline void arch_unmap(struct mm_struct *mm,
|
||||
|
@ -81,9 +81,10 @@ do { \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static inline void arch_dup_mmap(struct mm_struct *oldmm,
|
||||
struct mm_struct *mm)
|
||||
static inline int arch_dup_mmap(struct mm_struct *oldmm,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void arch_unmap(struct mm_struct *mm,
|
||||
|
@ -926,7 +926,8 @@ config MAXSMP
|
||||
config NR_CPUS
|
||||
int "Maximum number of CPUs" if SMP && !MAXSMP
|
||||
range 2 8 if SMP && X86_32 && !X86_BIGSMP
|
||||
range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK
|
||||
range 2 64 if SMP && X86_32 && X86_BIGSMP
|
||||
range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
|
||||
range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
|
||||
default "1" if !SMP
|
||||
default "8192" if MAXSMP
|
||||
|
@ -942,9 +942,9 @@ ENTRY(debug)
|
||||
|
||||
/* Are we currently on the SYSENTER stack? */
|
||||
movl PER_CPU_VAR(cpu_entry_area), %ecx
|
||||
addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
|
||||
subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
|
||||
cmpl $SIZEOF_SYSENTER_stack, %ecx
|
||||
addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
|
||||
subl %eax, %ecx /* ecx = (end of entry_stack) - esp */
|
||||
cmpl $SIZEOF_entry_stack, %ecx
|
||||
jb .Ldebug_from_sysenter_stack
|
||||
|
||||
TRACE_IRQS_OFF
|
||||
@ -986,9 +986,9 @@ ENTRY(nmi)
|
||||
|
||||
/* Are we currently on the SYSENTER stack? */
|
||||
movl PER_CPU_VAR(cpu_entry_area), %ecx
|
||||
addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
|
||||
subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
|
||||
cmpl $SIZEOF_SYSENTER_stack, %ecx
|
||||
addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
|
||||
subl %eax, %ecx /* ecx = (end of entry_stack) - esp */
|
||||
cmpl $SIZEOF_entry_stack, %ecx
|
||||
jb .Lnmi_from_sysenter_stack
|
||||
|
||||
/* Not on SYSENTER stack. */
|
||||
|
@ -158,8 +158,8 @@ END(native_usergs_sysret64)
|
||||
_entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
|
||||
|
||||
/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
|
||||
#define RSP_SCRATCH CPU_ENTRY_AREA_SYSENTER_stack + \
|
||||
SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA
|
||||
#define RSP_SCRATCH CPU_ENTRY_AREA_entry_stack + \
|
||||
SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA
|
||||
|
||||
ENTRY(entry_SYSCALL_64_trampoline)
|
||||
UNWIND_HINT_EMPTY
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include <asm/unistd.h>
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/traps.h>
|
||||
#include <asm/paravirt.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "vsyscall_trace.h"
|
||||
@ -138,6 +139,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
|
||||
|
||||
WARN_ON_ONCE(address != regs->ip);
|
||||
|
||||
/* This should be unreachable in NATIVE mode. */
|
||||
if (WARN_ON(vsyscall_mode == NATIVE))
|
||||
return false;
|
||||
|
||||
if (vsyscall_mode == NONE) {
|
||||
warn_bad_vsyscall(KERN_INFO, regs,
|
||||
"vsyscall attempted with vsyscall=none");
|
||||
@ -329,16 +334,47 @@ int in_gate_area_no_mm(unsigned long addr)
|
||||
return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
|
||||
}
|
||||
|
||||
/*
|
||||
* The VSYSCALL page is the only user-accessible page in the kernel address
|
||||
* range. Normally, the kernel page tables can have _PAGE_USER clear, but
|
||||
* the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls
|
||||
* are enabled.
|
||||
*
|
||||
* Some day we may create a "minimal" vsyscall mode in which we emulate
|
||||
* vsyscalls but leave the page not present. If so, we skip calling
|
||||
* this.
|
||||
*/
|
||||
static void __init set_vsyscall_pgtable_user_bits(void)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
|
||||
pgd = pgd_offset_k(VSYSCALL_ADDR);
|
||||
set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
|
||||
p4d = p4d_offset(pgd, VSYSCALL_ADDR);
|
||||
#if CONFIG_PGTABLE_LEVELS >= 5
|
||||
p4d->p4d |= _PAGE_USER;
|
||||
#endif
|
||||
pud = pud_offset(p4d, VSYSCALL_ADDR);
|
||||
set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
|
||||
pmd = pmd_offset(pud, VSYSCALL_ADDR);
|
||||
set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER));
|
||||
}
|
||||
|
||||
void __init map_vsyscall(void)
|
||||
{
|
||||
extern char __vsyscall_page;
|
||||
unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
|
||||
|
||||
if (vsyscall_mode != NONE)
|
||||
if (vsyscall_mode != NONE) {
|
||||
__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
|
||||
vsyscall_mode == NATIVE
|
||||
? PAGE_KERNEL_VSYSCALL
|
||||
: PAGE_KERNEL_VVAR);
|
||||
set_vsyscall_pgtable_user_bits();
|
||||
}
|
||||
|
||||
BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
|
||||
(unsigned long)VSYSCALL_ADDR);
|
||||
|
68
arch/x86/include/asm/cpu_entry_area.h
Normal file
68
arch/x86/include/asm/cpu_entry_area.h
Normal file
@ -0,0 +1,68 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#ifndef _ASM_X86_CPU_ENTRY_AREA_H
|
||||
#define _ASM_X86_CPU_ENTRY_AREA_H
|
||||
|
||||
#include <linux/percpu-defs.h>
|
||||
#include <asm/processor.h>
|
||||
|
||||
/*
|
||||
* cpu_entry_area is a percpu region that contains things needed by the CPU
|
||||
* and early entry/exit code. Real types aren't used for all fields here
|
||||
* to avoid circular header dependencies.
|
||||
*
|
||||
* Every field is a virtual alias of some other allocated backing store.
|
||||
* There is no direct allocation of a struct cpu_entry_area.
|
||||
*/
|
||||
struct cpu_entry_area {
|
||||
char gdt[PAGE_SIZE];
|
||||
|
||||
/*
|
||||
* The GDT is just below entry_stack and thus serves (on x86_64) as
|
||||
* a a read-only guard page.
|
||||
*/
|
||||
struct entry_stack_page entry_stack_page;
|
||||
|
||||
/*
|
||||
* On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
|
||||
* we need task switches to work, and task switches write to the TSS.
|
||||
*/
|
||||
struct tss_struct tss;
|
||||
|
||||
char entry_trampoline[PAGE_SIZE];
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* Exception stacks used for IST entries.
|
||||
*
|
||||
* In the future, this should have a separate slot for each stack
|
||||
* with guard pages between them.
|
||||
*/
|
||||
char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
|
||||
#endif
|
||||
};
|
||||
|
||||
#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
|
||||
#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
|
||||
|
||||
DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
|
||||
|
||||
extern void setup_cpu_entry_areas(void);
|
||||
extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
|
||||
|
||||
#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
|
||||
#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
|
||||
|
||||
#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
|
||||
|
||||
#define CPU_ENTRY_AREA_MAP_SIZE \
|
||||
(CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
|
||||
|
||||
extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
|
||||
|
||||
static inline struct entry_stack *cpu_entry_stack(int cpu)
|
||||
{
|
||||
return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
|
||||
}
|
||||
|
||||
#endif
|
@ -7,6 +7,7 @@
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/irq_vectors.h>
|
||||
#include <asm/cpu_entry_area.h>
|
||||
|
||||
#include <linux/smp.h>
|
||||
#include <linux/percpu.h>
|
||||
|
@ -2,7 +2,7 @@
|
||||
#ifndef _ASM_X86_ESPFIX_H
|
||||
#define _ASM_X86_ESPFIX_H
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#ifdef CONFIG_X86_ESPFIX64
|
||||
|
||||
#include <asm/percpu.h>
|
||||
|
||||
@ -11,7 +11,8 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
|
||||
|
||||
extern void init_espfix_bsp(void);
|
||||
extern void init_espfix_ap(int cpu);
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
#else
|
||||
static inline void init_espfix_ap(int cpu) { }
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_X86_ESPFIX_H */
|
||||
|
@ -44,46 +44,6 @@ extern unsigned long __FIXADDR_TOP;
|
||||
PAGE_SIZE)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* cpu_entry_area is a percpu region in the fixmap that contains things
|
||||
* needed by the CPU and early entry/exit code. Real types aren't used
|
||||
* for all fields here to avoid circular header dependencies.
|
||||
*
|
||||
* Every field is a virtual alias of some other allocated backing store.
|
||||
* There is no direct allocation of a struct cpu_entry_area.
|
||||
*/
|
||||
struct cpu_entry_area {
|
||||
char gdt[PAGE_SIZE];
|
||||
|
||||
/*
|
||||
* The GDT is just below SYSENTER_stack and thus serves (on x86_64) as
|
||||
* a a read-only guard page.
|
||||
*/
|
||||
struct SYSENTER_stack_page SYSENTER_stack_page;
|
||||
|
||||
/*
|
||||
* On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
|
||||
* we need task switches to work, and task switches write to the TSS.
|
||||
*/
|
||||
struct tss_struct tss;
|
||||
|
||||
char entry_trampoline[PAGE_SIZE];
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* Exception stacks used for IST entries.
|
||||
*
|
||||
* In the future, this should have a separate slot for each stack
|
||||
* with guard pages between them.
|
||||
*/
|
||||
char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
|
||||
#endif
|
||||
};
|
||||
|
||||
#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
|
||||
|
||||
extern void setup_cpu_entry_areas(void);
|
||||
|
||||
/*
|
||||
* Here we define all the compile-time 'special' virtual
|
||||
* addresses. The point is to have a constant address at
|
||||
@ -123,7 +83,6 @@ enum fixed_addresses {
|
||||
FIX_IO_APIC_BASE_0,
|
||||
FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
|
||||
#endif
|
||||
FIX_RO_IDT, /* Virtual mapping for read-only IDT */
|
||||
#ifdef CONFIG_X86_32
|
||||
FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
|
||||
FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
|
||||
@ -139,9 +98,6 @@ enum fixed_addresses {
|
||||
#ifdef CONFIG_X86_INTEL_MID
|
||||
FIX_LNW_VRTC,
|
||||
#endif
|
||||
/* Fixmap entries to remap the GDTs, one per processor. */
|
||||
FIX_CPU_ENTRY_AREA_TOP,
|
||||
FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + (CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1,
|
||||
|
||||
#ifdef CONFIG_ACPI_APEI_GHES
|
||||
/* Used for GHES mapping from assorted contexts */
|
||||
@ -182,7 +138,7 @@ enum fixed_addresses {
|
||||
extern void reserve_top_address(unsigned long reserve);
|
||||
|
||||
#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
|
||||
#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
|
||||
#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
|
||||
|
||||
extern int fixmaps_set;
|
||||
|
||||
@ -230,30 +186,5 @@ void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
|
||||
void __early_set_fixmap(enum fixed_addresses idx,
|
||||
phys_addr_t phys, pgprot_t flags);
|
||||
|
||||
static inline unsigned int __get_cpu_entry_area_page_index(int cpu, int page)
|
||||
{
|
||||
BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
|
||||
|
||||
return FIX_CPU_ENTRY_AREA_BOTTOM - cpu*CPU_ENTRY_AREA_PAGES - page;
|
||||
}
|
||||
|
||||
#define __get_cpu_entry_area_offset_index(cpu, offset) ({ \
|
||||
BUILD_BUG_ON(offset % PAGE_SIZE != 0); \
|
||||
__get_cpu_entry_area_page_index(cpu, offset / PAGE_SIZE); \
|
||||
})
|
||||
|
||||
#define get_cpu_entry_area_index(cpu, field) \
|
||||
__get_cpu_entry_area_offset_index((cpu), offsetof(struct cpu_entry_area, field))
|
||||
|
||||
static inline struct cpu_entry_area *get_cpu_entry_area(int cpu)
|
||||
{
|
||||
return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0));
|
||||
}
|
||||
|
||||
static inline struct SYSENTER_stack *cpu_SYSENTER_stack(int cpu)
|
||||
{
|
||||
return &get_cpu_entry_area(cpu)->SYSENTER_stack_page.stack;
|
||||
}
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
#endif /* _ASM_X86_FIXMAP_H */
|
||||
|
53
arch/x86/include/asm/invpcid.h
Normal file
53
arch/x86/include/asm/invpcid.h
Normal file
@ -0,0 +1,53 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _ASM_X86_INVPCID
|
||||
#define _ASM_X86_INVPCID
|
||||
|
||||
static inline void __invpcid(unsigned long pcid, unsigned long addr,
|
||||
unsigned long type)
|
||||
{
|
||||
struct { u64 d[2]; } desc = { { pcid, addr } };
|
||||
|
||||
/*
|
||||
* The memory clobber is because the whole point is to invalidate
|
||||
* stale TLB entries and, especially if we're flushing global
|
||||
* mappings, we don't want the compiler to reorder any subsequent
|
||||
* memory accesses before the TLB flush.
|
||||
*
|
||||
* The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
|
||||
* invpcid (%rcx), %rax in long mode.
|
||||
*/
|
||||
asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
|
||||
: : "m" (desc), "a" (type), "c" (&desc) : "memory");
|
||||
}
|
||||
|
||||
#define INVPCID_TYPE_INDIV_ADDR 0
|
||||
#define INVPCID_TYPE_SINGLE_CTXT 1
|
||||
#define INVPCID_TYPE_ALL_INCL_GLOBAL 2
|
||||
#define INVPCID_TYPE_ALL_NON_GLOBAL 3
|
||||
|
||||
/* Flush all mappings for a given pcid and addr, not including globals. */
|
||||
static inline void invpcid_flush_one(unsigned long pcid,
|
||||
unsigned long addr)
|
||||
{
|
||||
__invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
|
||||
}
|
||||
|
||||
/* Flush all mappings for a given PCID, not including globals. */
|
||||
static inline void invpcid_flush_single_context(unsigned long pcid)
|
||||
{
|
||||
__invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
|
||||
}
|
||||
|
||||
/* Flush all mappings, including globals, for all PCIDs. */
|
||||
static inline void invpcid_flush_all(void)
|
||||
{
|
||||
__invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
|
||||
}
|
||||
|
||||
/* Flush all mappings for all PCIDs except globals. */
|
||||
static inline void invpcid_flush_all_nonglobals(void)
|
||||
{
|
||||
__invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
|
||||
}
|
||||
|
||||
#endif /* _ASM_X86_INVPCID */
|
@ -3,6 +3,7 @@
|
||||
#define _ASM_X86_MMU_H
|
||||
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/atomic.h>
|
||||
|
||||
@ -27,7 +28,8 @@ typedef struct {
|
||||
atomic64_t tlb_gen;
|
||||
|
||||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||
struct ldt_struct *ldt;
|
||||
struct rw_semaphore ldt_usr_sem;
|
||||
struct ldt_struct *ldt;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
@ -57,11 +57,17 @@ struct ldt_struct {
|
||||
/*
|
||||
* Used for LDT copy/destruction.
|
||||
*/
|
||||
int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm);
|
||||
static inline void init_new_context_ldt(struct mm_struct *mm)
|
||||
{
|
||||
mm->context.ldt = NULL;
|
||||
init_rwsem(&mm->context.ldt_usr_sem);
|
||||
}
|
||||
int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm);
|
||||
void destroy_context_ldt(struct mm_struct *mm);
|
||||
#else /* CONFIG_MODIFY_LDT_SYSCALL */
|
||||
static inline int init_new_context_ldt(struct task_struct *tsk,
|
||||
struct mm_struct *mm)
|
||||
static inline void init_new_context_ldt(struct mm_struct *mm) { }
|
||||
static inline int ldt_dup_context(struct mm_struct *oldmm,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@ -132,18 +138,21 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
|
||||
static inline int init_new_context(struct task_struct *tsk,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
mutex_init(&mm->context.lock);
|
||||
|
||||
mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
|
||||
atomic64_set(&mm->context.tlb_gen, 0);
|
||||
|
||||
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
|
||||
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
|
||||
if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
|
||||
/* pkey 0 is the default and always allocated */
|
||||
mm->context.pkey_allocation_map = 0x1;
|
||||
/* -1 means unallocated or invalid */
|
||||
mm->context.execute_only_pkey = -1;
|
||||
}
|
||||
#endif
|
||||
return init_new_context_ldt(tsk, mm);
|
||||
#endif
|
||||
init_new_context_ldt(mm);
|
||||
return 0;
|
||||
}
|
||||
static inline void destroy_context(struct mm_struct *mm)
|
||||
{
|
||||
@ -176,10 +185,10 @@ do { \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
static inline void arch_dup_mmap(struct mm_struct *oldmm,
|
||||
struct mm_struct *mm)
|
||||
static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
|
||||
{
|
||||
paravirt_arch_dup_mmap(oldmm, mm);
|
||||
return ldt_dup_context(oldmm, mm);
|
||||
}
|
||||
|
||||
static inline void arch_exit_mmap(struct mm_struct *mm)
|
||||
@ -281,33 +290,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
|
||||
return __pkru_allows_pkey(vma_pkey(vma), write);
|
||||
}
|
||||
|
||||
/*
|
||||
* If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
|
||||
* bits. This serves two purposes. It prevents a nasty situation in
|
||||
* which PCID-unaware code saves CR3, loads some other value (with PCID
|
||||
* == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
|
||||
* the saved ASID was nonzero. It also means that any bugs involving
|
||||
* loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
|
||||
* deterministically.
|
||||
*/
|
||||
|
||||
static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
|
||||
{
|
||||
if (static_cpu_has(X86_FEATURE_PCID)) {
|
||||
VM_WARN_ON_ONCE(asid > 4094);
|
||||
return __sme_pa(mm->pgd) | (asid + 1);
|
||||
} else {
|
||||
VM_WARN_ON_ONCE(asid != 0);
|
||||
return __sme_pa(mm->pgd);
|
||||
}
|
||||
}
|
||||
|
||||
static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
|
||||
{
|
||||
VM_WARN_ON_ONCE(asid > 4094);
|
||||
return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
|
||||
}
|
||||
|
||||
/*
|
||||
* This can be used from process context to figure out what the value of
|
||||
* CR3 is without needing to do a (slow) __read_cr3().
|
||||
@ -317,7 +299,7 @@ static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
|
||||
*/
|
||||
static inline unsigned long __get_current_cr3_fast(void)
|
||||
{
|
||||
unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
|
||||
unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
|
||||
this_cpu_read(cpu_tlbstate.loaded_mm_asid));
|
||||
|
||||
/* For now, be very restrictive about when this can be called. */
|
||||
|
@ -38,13 +38,22 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
|
||||
#define LAST_PKMAP 1024
|
||||
#endif
|
||||
|
||||
#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1)) \
|
||||
& PMD_MASK)
|
||||
/*
|
||||
* Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
|
||||
* to avoid include recursion hell
|
||||
*/
|
||||
#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40)
|
||||
|
||||
#define CPU_ENTRY_AREA_BASE \
|
||||
((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK)
|
||||
|
||||
#define PKMAP_BASE \
|
||||
((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
|
||||
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
|
||||
#else
|
||||
# define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE)
|
||||
# define VMALLOC_END (CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE)
|
||||
#endif
|
||||
|
||||
#define MODULES_VADDR VMALLOC_START
|
||||
|
@ -76,32 +76,41 @@ typedef struct { pteval_t pte; } pte_t;
|
||||
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
|
||||
|
||||
/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
|
||||
#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
|
||||
#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
#define VMALLOC_SIZE_TB _AC(16384, UL)
|
||||
#define __VMALLOC_BASE _AC(0xff92000000000000, UL)
|
||||
#define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
|
||||
# define VMALLOC_SIZE_TB _AC(16384, UL)
|
||||
# define __VMALLOC_BASE _AC(0xff92000000000000, UL)
|
||||
# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
|
||||
#else
|
||||
#define VMALLOC_SIZE_TB _AC(32, UL)
|
||||
#define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
|
||||
#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
|
||||
# define VMALLOC_SIZE_TB _AC(32, UL)
|
||||
# define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
|
||||
# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_RANDOMIZE_MEMORY
|
||||
#define VMALLOC_START vmalloc_base
|
||||
#define VMEMMAP_START vmemmap_base
|
||||
# define VMALLOC_START vmalloc_base
|
||||
# define VMEMMAP_START vmemmap_base
|
||||
#else
|
||||
#define VMALLOC_START __VMALLOC_BASE
|
||||
#define VMEMMAP_START __VMEMMAP_BASE
|
||||
# define VMALLOC_START __VMALLOC_BASE
|
||||
# define VMEMMAP_START __VMEMMAP_BASE
|
||||
#endif /* CONFIG_RANDOMIZE_MEMORY */
|
||||
#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
|
||||
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
|
||||
|
||||
#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
|
||||
|
||||
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
|
||||
/* The module sections ends with the start of the fixmap */
|
||||
#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1)
|
||||
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
|
||||
#define ESPFIX_PGD_ENTRY _AC(-2, UL)
|
||||
#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
|
||||
#define EFI_VA_START ( -4 * (_AC(1, UL) << 30))
|
||||
#define EFI_VA_END (-68 * (_AC(1, UL) << 30))
|
||||
#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1)
|
||||
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
|
||||
|
||||
#define ESPFIX_PGD_ENTRY _AC(-2, UL)
|
||||
#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
|
||||
|
||||
#define CPU_ENTRY_AREA_PGD _AC(-3, UL)
|
||||
#define CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT)
|
||||
|
||||
#define EFI_VA_START ( -4 * (_AC(1, UL) << 30))
|
||||
#define EFI_VA_END (-68 * (_AC(1, UL) << 30))
|
||||
|
||||
#define EARLY_DYNAMIC_PAGE_TABLES 64
|
||||
|
||||
|
@ -337,12 +337,12 @@ struct x86_hw_tss {
|
||||
#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
|
||||
#define INVALID_IO_BITMAP_OFFSET 0x8000
|
||||
|
||||
struct SYSENTER_stack {
|
||||
struct entry_stack {
|
||||
unsigned long words[64];
|
||||
};
|
||||
|
||||
struct SYSENTER_stack_page {
|
||||
struct SYSENTER_stack stack;
|
||||
struct entry_stack_page {
|
||||
struct entry_stack stack;
|
||||
} __aligned(PAGE_SIZE);
|
||||
|
||||
struct tss_struct {
|
||||
|
@ -16,7 +16,7 @@ enum stack_type {
|
||||
STACK_TYPE_TASK,
|
||||
STACK_TYPE_IRQ,
|
||||
STACK_TYPE_SOFTIRQ,
|
||||
STACK_TYPE_SYSENTER,
|
||||
STACK_TYPE_ENTRY,
|
||||
STACK_TYPE_EXCEPTION,
|
||||
STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
|
||||
};
|
||||
@ -29,7 +29,7 @@ struct stack_info {
|
||||
bool in_task_stack(unsigned long *stack, struct task_struct *task,
|
||||
struct stack_info *info);
|
||||
|
||||
bool in_sysenter_stack(unsigned long *stack, struct stack_info *info);
|
||||
bool in_entry_stack(unsigned long *stack, struct stack_info *info);
|
||||
|
||||
int get_stack_info(unsigned long *stack, struct task_struct *task,
|
||||
struct stack_info *info, unsigned long *visit_mask);
|
||||
|
@ -9,70 +9,66 @@
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/special_insns.h>
|
||||
#include <asm/smp.h>
|
||||
|
||||
static inline void __invpcid(unsigned long pcid, unsigned long addr,
|
||||
unsigned long type)
|
||||
{
|
||||
struct { u64 d[2]; } desc = { { pcid, addr } };
|
||||
|
||||
/*
|
||||
* The memory clobber is because the whole point is to invalidate
|
||||
* stale TLB entries and, especially if we're flushing global
|
||||
* mappings, we don't want the compiler to reorder any subsequent
|
||||
* memory accesses before the TLB flush.
|
||||
*
|
||||
* The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
|
||||
* invpcid (%rcx), %rax in long mode.
|
||||
*/
|
||||
asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
|
||||
: : "m" (desc), "a" (type), "c" (&desc) : "memory");
|
||||
}
|
||||
|
||||
#define INVPCID_TYPE_INDIV_ADDR 0
|
||||
#define INVPCID_TYPE_SINGLE_CTXT 1
|
||||
#define INVPCID_TYPE_ALL_INCL_GLOBAL 2
|
||||
#define INVPCID_TYPE_ALL_NON_GLOBAL 3
|
||||
|
||||
/* Flush all mappings for a given pcid and addr, not including globals. */
|
||||
static inline void invpcid_flush_one(unsigned long pcid,
|
||||
unsigned long addr)
|
||||
{
|
||||
__invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
|
||||
}
|
||||
|
||||
/* Flush all mappings for a given PCID, not including globals. */
|
||||
static inline void invpcid_flush_single_context(unsigned long pcid)
|
||||
{
|
||||
__invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
|
||||
}
|
||||
|
||||
/* Flush all mappings, including globals, for all PCIDs. */
|
||||
static inline void invpcid_flush_all(void)
|
||||
{
|
||||
__invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
|
||||
}
|
||||
|
||||
/* Flush all mappings for all PCIDs except globals. */
|
||||
static inline void invpcid_flush_all_nonglobals(void)
|
||||
{
|
||||
__invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
|
||||
}
|
||||
#include <asm/invpcid.h>
|
||||
|
||||
static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
|
||||
{
|
||||
u64 new_tlb_gen;
|
||||
|
||||
/*
|
||||
* Bump the generation count. This also serves as a full barrier
|
||||
* that synchronizes with switch_mm(): callers are required to order
|
||||
* their read of mm_cpumask after their writes to the paging
|
||||
* structures.
|
||||
*/
|
||||
smp_mb__before_atomic();
|
||||
new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen);
|
||||
smp_mb__after_atomic();
|
||||
return atomic64_inc_return(&mm->context.tlb_gen);
|
||||
}
|
||||
|
||||
return new_tlb_gen;
|
||||
/* There are 12 bits of space for ASIDS in CR3 */
|
||||
#define CR3_HW_ASID_BITS 12
|
||||
/*
|
||||
* When enabled, PAGE_TABLE_ISOLATION consumes a single bit for
|
||||
* user/kernel switches
|
||||
*/
|
||||
#define PTI_CONSUMED_ASID_BITS 0
|
||||
|
||||
#define CR3_AVAIL_ASID_BITS (CR3_HW_ASID_BITS - PTI_CONSUMED_ASID_BITS)
|
||||
/*
|
||||
* ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account
|
||||
* for them being zero-based. Another -1 is because ASID 0 is reserved for
|
||||
* use by non-PCID-aware users.
|
||||
*/
|
||||
#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2)
|
||||
|
||||
static inline u16 kern_pcid(u16 asid)
|
||||
{
|
||||
VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
|
||||
/*
|
||||
* If PCID is on, ASID-aware code paths put the ASID+1 into the
|
||||
* PCID bits. This serves two purposes. It prevents a nasty
|
||||
* situation in which PCID-unaware code saves CR3, loads some other
|
||||
* value (with PCID == 0), and then restores CR3, thus corrupting
|
||||
* the TLB for ASID 0 if the saved ASID was nonzero. It also means
|
||||
* that any bugs involving loading a PCID-enabled CR3 with
|
||||
* CR4.PCIDE off will trigger deterministically.
|
||||
*/
|
||||
return asid + 1;
|
||||
}
|
||||
|
||||
struct pgd_t;
|
||||
static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
|
||||
{
|
||||
if (static_cpu_has(X86_FEATURE_PCID)) {
|
||||
return __sme_pa(pgd) | kern_pcid(asid);
|
||||
} else {
|
||||
VM_WARN_ON_ONCE(asid != 0);
|
||||
return __sme_pa(pgd);
|
||||
}
|
||||
}
|
||||
|
||||
static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
|
||||
{
|
||||
VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
|
||||
VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID));
|
||||
return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
@ -237,6 +233,9 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
|
||||
|
||||
extern void initialize_tlbstate_and_flush(void);
|
||||
|
||||
/*
|
||||
* flush the entire current user mapping
|
||||
*/
|
||||
static inline void __native_flush_tlb(void)
|
||||
{
|
||||
/*
|
||||
@ -249,20 +248,12 @@ static inline void __native_flush_tlb(void)
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static inline void __native_flush_tlb_global_irq_disabled(void)
|
||||
{
|
||||
unsigned long cr4;
|
||||
|
||||
cr4 = this_cpu_read(cpu_tlbstate.cr4);
|
||||
/* clear PGE */
|
||||
native_write_cr4(cr4 & ~X86_CR4_PGE);
|
||||
/* write old PGE again and flush TLBs */
|
||||
native_write_cr4(cr4);
|
||||
}
|
||||
|
||||
/*
|
||||
* flush everything
|
||||
*/
|
||||
static inline void __native_flush_tlb_global(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned long cr4, flags;
|
||||
|
||||
if (static_cpu_has(X86_FEATURE_INVPCID)) {
|
||||
/*
|
||||
@ -280,22 +271,36 @@ static inline void __native_flush_tlb_global(void)
|
||||
*/
|
||||
raw_local_irq_save(flags);
|
||||
|
||||
__native_flush_tlb_global_irq_disabled();
|
||||
cr4 = this_cpu_read(cpu_tlbstate.cr4);
|
||||
/* toggle PGE */
|
||||
native_write_cr4(cr4 ^ X86_CR4_PGE);
|
||||
/* write old PGE again and flush TLBs */
|
||||
native_write_cr4(cr4);
|
||||
|
||||
raw_local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* flush one page in the user mapping
|
||||
*/
|
||||
static inline void __native_flush_tlb_single(unsigned long addr)
|
||||
{
|
||||
asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
|
||||
}
|
||||
|
||||
/*
|
||||
* flush everything
|
||||
*/
|
||||
static inline void __flush_tlb_all(void)
|
||||
{
|
||||
if (boot_cpu_has(X86_FEATURE_PGE))
|
||||
if (boot_cpu_has(X86_FEATURE_PGE)) {
|
||||
__flush_tlb_global();
|
||||
else
|
||||
} else {
|
||||
/*
|
||||
* !PGE -> !PCID (setup_pcid()), thus every flush is total.
|
||||
*/
|
||||
__flush_tlb();
|
||||
}
|
||||
|
||||
/*
|
||||
* Note: if we somehow had PCID but not PGE, then this wouldn't work --
|
||||
@ -306,6 +311,9 @@ static inline void __flush_tlb_all(void)
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
* flush one page in the kernel mapping
|
||||
*/
|
||||
static inline void __flush_tlb_one(unsigned long addr)
|
||||
{
|
||||
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
|
||||
|
@ -97,6 +97,6 @@ void common(void) {
|
||||
/* Layout info for cpu_entry_area */
|
||||
OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
|
||||
OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
|
||||
OFFSET(CPU_ENTRY_AREA_SYSENTER_stack, cpu_entry_area, SYSENTER_stack_page);
|
||||
DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack));
|
||||
OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page);
|
||||
DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack));
|
||||
}
|
||||
|
@ -48,7 +48,7 @@ void foo(void)
|
||||
|
||||
/* Offset from the sysenter stack to tss.sp0 */
|
||||
DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
|
||||
offsetofend(struct cpu_entry_area, SYSENTER_stack_page.stack));
|
||||
offsetofend(struct cpu_entry_area, entry_stack_page.stack));
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
BLANK();
|
||||
|
@ -506,102 +506,8 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
|
||||
[0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
|
||||
[DEBUG_STACK - 1] = DEBUG_STKSZ
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
|
||||
[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
|
||||
#endif
|
||||
|
||||
static DEFINE_PER_CPU_PAGE_ALIGNED(struct SYSENTER_stack_page,
|
||||
SYSENTER_stack_storage);
|
||||
|
||||
static void __init
|
||||
set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
|
||||
{
|
||||
for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
|
||||
__set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
|
||||
}
|
||||
|
||||
/* Setup the fixmap mappings only once per-processor */
|
||||
static void __init setup_cpu_entry_area(int cpu)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
extern char _entry_trampoline[];
|
||||
|
||||
/* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
|
||||
pgprot_t gdt_prot = PAGE_KERNEL_RO;
|
||||
pgprot_t tss_prot = PAGE_KERNEL_RO;
|
||||
#else
|
||||
/*
|
||||
* On native 32-bit systems, the GDT cannot be read-only because
|
||||
* our double fault handler uses a task gate, and entering through
|
||||
* a task gate needs to change an available TSS to busy. If the
|
||||
* GDT is read-only, that will triple fault. The TSS cannot be
|
||||
* read-only because the CPU writes to it on task switches.
|
||||
*
|
||||
* On Xen PV, the GDT must be read-only because the hypervisor
|
||||
* requires it.
|
||||
*/
|
||||
pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
|
||||
PAGE_KERNEL_RO : PAGE_KERNEL;
|
||||
pgprot_t tss_prot = PAGE_KERNEL;
|
||||
#endif
|
||||
|
||||
__set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
|
||||
set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, SYSENTER_stack_page),
|
||||
per_cpu_ptr(&SYSENTER_stack_storage, cpu), 1,
|
||||
PAGE_KERNEL);
|
||||
|
||||
/*
|
||||
* The Intel SDM says (Volume 3, 7.2.1):
|
||||
*
|
||||
* Avoid placing a page boundary in the part of the TSS that the
|
||||
* processor reads during a task switch (the first 104 bytes). The
|
||||
* processor may not correctly perform address translations if a
|
||||
* boundary occurs in this area. During a task switch, the processor
|
||||
* reads and writes into the first 104 bytes of each TSS (using
|
||||
* contiguous physical addresses beginning with the physical address
|
||||
* of the first byte of the TSS). So, after TSS access begins, if
|
||||
* part of the 104 bytes is not physically contiguous, the processor
|
||||
* will access incorrect information without generating a page-fault
|
||||
* exception.
|
||||
*
|
||||
* There are also a lot of errata involving the TSS spanning a page
|
||||
* boundary. Assert that we're not doing that.
|
||||
*/
|
||||
BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
|
||||
offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
|
||||
BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
|
||||
set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
|
||||
&per_cpu(cpu_tss_rw, cpu),
|
||||
sizeof(struct tss_struct) / PAGE_SIZE,
|
||||
tss_prot);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
|
||||
BUILD_BUG_ON(sizeof(exception_stacks) !=
|
||||
sizeof(((struct cpu_entry_area *)0)->exception_stacks));
|
||||
set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
|
||||
&per_cpu(exception_stacks, cpu),
|
||||
sizeof(exception_stacks) / PAGE_SIZE,
|
||||
PAGE_KERNEL);
|
||||
|
||||
__set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
|
||||
__pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
|
||||
#endif
|
||||
}
|
||||
|
||||
void __init setup_cpu_entry_areas(void)
|
||||
{
|
||||
unsigned int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
setup_cpu_entry_area(cpu);
|
||||
}
|
||||
|
||||
/* Load the original GDT from the per-cpu structure */
|
||||
void load_direct_gdt(int cpu)
|
||||
{
|
||||
@ -1348,7 +1254,7 @@ void enable_sep_cpu(void)
|
||||
|
||||
tss->x86_tss.ss1 = __KERNEL_CS;
|
||||
wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
|
||||
wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1), 0);
|
||||
wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0);
|
||||
wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
|
||||
|
||||
put_cpu();
|
||||
@ -1465,7 +1371,7 @@ void syscall_init(void)
|
||||
* AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
|
||||
*/
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
|
||||
#else
|
||||
wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
|
||||
@ -1680,7 +1586,7 @@ void cpu_init(void)
|
||||
*/
|
||||
set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
|
||||
load_TR_desc();
|
||||
load_sp0((unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
|
||||
load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
|
||||
|
||||
load_mm_ldt(&init_mm);
|
||||
|
||||
|
@ -565,15 +565,6 @@ static void print_ucode(struct ucode_cpu_info *uci)
|
||||
}
|
||||
#else
|
||||
|
||||
/*
|
||||
* Flush global tlb. We only do this in x86_64 where paging has been enabled
|
||||
* already and PGE should be enabled as well.
|
||||
*/
|
||||
static inline void flush_tlb_early(void)
|
||||
{
|
||||
__native_flush_tlb_global_irq_disabled();
|
||||
}
|
||||
|
||||
static inline void print_ucode(struct ucode_cpu_info *uci)
|
||||
{
|
||||
struct microcode_intel *mc;
|
||||
@ -602,10 +593,6 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
|
||||
if (rev != mc->hdr.rev)
|
||||
return -1;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* Flush global tlb. This is precaution. */
|
||||
flush_tlb_early();
|
||||
#endif
|
||||
uci->cpu_sig.rev = rev;
|
||||
|
||||
if (early)
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include <linux/nmi.h>
|
||||
#include <linux/sysfs.h>
|
||||
|
||||
#include <asm/cpu_entry_area.h>
|
||||
#include <asm/stacktrace.h>
|
||||
#include <asm/unwind.h>
|
||||
|
||||
@ -43,9 +44,9 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool in_sysenter_stack(unsigned long *stack, struct stack_info *info)
|
||||
bool in_entry_stack(unsigned long *stack, struct stack_info *info)
|
||||
{
|
||||
struct SYSENTER_stack *ss = cpu_SYSENTER_stack(smp_processor_id());
|
||||
struct entry_stack *ss = cpu_entry_stack(smp_processor_id());
|
||||
|
||||
void *begin = ss;
|
||||
void *end = ss + 1;
|
||||
@ -53,7 +54,7 @@ bool in_sysenter_stack(unsigned long *stack, struct stack_info *info)
|
||||
if ((void *)stack < begin || (void *)stack >= end)
|
||||
return false;
|
||||
|
||||
info->type = STACK_TYPE_SYSENTER;
|
||||
info->type = STACK_TYPE_ENTRY;
|
||||
info->begin = begin;
|
||||
info->end = end;
|
||||
info->next_sp = NULL;
|
||||
@ -111,13 +112,13 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
* - task stack
|
||||
* - interrupt stack
|
||||
* - HW exception stacks (double fault, nmi, debug, mce)
|
||||
* - SYSENTER stack
|
||||
* - entry stack
|
||||
*
|
||||
* x86-32 can have up to four stacks:
|
||||
* - task stack
|
||||
* - softirq stack
|
||||
* - hardirq stack
|
||||
* - SYSENTER stack
|
||||
* - entry stack
|
||||
*/
|
||||
for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
|
||||
const char *stack_name;
|
||||
|
@ -26,8 +26,8 @@ const char *stack_type_name(enum stack_type type)
|
||||
if (type == STACK_TYPE_SOFTIRQ)
|
||||
return "SOFTIRQ";
|
||||
|
||||
if (type == STACK_TYPE_SYSENTER)
|
||||
return "SYSENTER";
|
||||
if (type == STACK_TYPE_ENTRY)
|
||||
return "ENTRY_TRAMPOLINE";
|
||||
|
||||
return NULL;
|
||||
}
|
||||
@ -96,7 +96,7 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
|
||||
if (task != current)
|
||||
goto unknown;
|
||||
|
||||
if (in_sysenter_stack(stack, info))
|
||||
if (in_entry_stack(stack, info))
|
||||
goto recursion_check;
|
||||
|
||||
if (in_hardirq_stack(stack, info))
|
||||
|
@ -37,8 +37,14 @@ const char *stack_type_name(enum stack_type type)
|
||||
if (type == STACK_TYPE_IRQ)
|
||||
return "IRQ";
|
||||
|
||||
if (type == STACK_TYPE_SYSENTER)
|
||||
return "SYSENTER";
|
||||
if (type == STACK_TYPE_ENTRY) {
|
||||
/*
|
||||
* On 64-bit, we have a generic entry stack that we
|
||||
* use for all the kernel entry points, including
|
||||
* SYSENTER.
|
||||
*/
|
||||
return "ENTRY_TRAMPOLINE";
|
||||
}
|
||||
|
||||
if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
|
||||
return exception_stack_names[type - STACK_TYPE_EXCEPTION];
|
||||
@ -118,7 +124,7 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
|
||||
if (in_irq_stack(stack, info))
|
||||
goto recursion_check;
|
||||
|
||||
if (in_sysenter_stack(stack, info))
|
||||
if (in_entry_stack(stack, info))
|
||||
goto recursion_check;
|
||||
|
||||
goto unknown;
|
||||
|
@ -5,6 +5,11 @@
|
||||
* Copyright (C) 2002 Andi Kleen
|
||||
*
|
||||
* This handles calls from both 32bit and 64bit mode.
|
||||
*
|
||||
* Lock order:
|
||||
* contex.ldt_usr_sem
|
||||
* mmap_sem
|
||||
* context.lock
|
||||
*/
|
||||
|
||||
#include <linux/errno.h>
|
||||
@ -42,7 +47,7 @@ static void refresh_ldt_segments(void)
|
||||
#endif
|
||||
}
|
||||
|
||||
/* context.lock is held for us, so we don't need any locking. */
|
||||
/* context.lock is held by the task which issued the smp function call */
|
||||
static void flush_ldt(void *__mm)
|
||||
{
|
||||
struct mm_struct *mm = __mm;
|
||||
@ -99,15 +104,17 @@ static void finalize_ldt_struct(struct ldt_struct *ldt)
|
||||
paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
|
||||
}
|
||||
|
||||
/* context.lock is held */
|
||||
static void install_ldt(struct mm_struct *current_mm,
|
||||
struct ldt_struct *ldt)
|
||||
static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt)
|
||||
{
|
||||
/* Synchronizes with READ_ONCE in load_mm_ldt. */
|
||||
smp_store_release(¤t_mm->context.ldt, ldt);
|
||||
mutex_lock(&mm->context.lock);
|
||||
|
||||
/* Activate the LDT for all CPUs using current_mm. */
|
||||
on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true);
|
||||
/* Synchronizes with READ_ONCE in load_mm_ldt. */
|
||||
smp_store_release(&mm->context.ldt, ldt);
|
||||
|
||||
/* Activate the LDT for all CPUs using currents mm. */
|
||||
on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);
|
||||
|
||||
mutex_unlock(&mm->context.lock);
|
||||
}
|
||||
|
||||
static void free_ldt_struct(struct ldt_struct *ldt)
|
||||
@ -124,27 +131,20 @@ static void free_ldt_struct(struct ldt_struct *ldt)
|
||||
}
|
||||
|
||||
/*
|
||||
* we do not have to muck with descriptors here, that is
|
||||
* done in switch_mm() as needed.
|
||||
* Called on fork from arch_dup_mmap(). Just copy the current LDT state,
|
||||
* the new task is not running, so nothing can be installed.
|
||||
*/
|
||||
int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
|
||||
int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
|
||||
{
|
||||
struct ldt_struct *new_ldt;
|
||||
struct mm_struct *old_mm;
|
||||
int retval = 0;
|
||||
|
||||
mutex_init(&mm->context.lock);
|
||||
old_mm = current->mm;
|
||||
if (!old_mm) {
|
||||
mm->context.ldt = NULL;
|
||||
if (!old_mm)
|
||||
return 0;
|
||||
}
|
||||
|
||||
mutex_lock(&old_mm->context.lock);
|
||||
if (!old_mm->context.ldt) {
|
||||
mm->context.ldt = NULL;
|
||||
if (!old_mm->context.ldt)
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
|
||||
if (!new_ldt) {
|
||||
@ -180,7 +180,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount)
|
||||
unsigned long entries_size;
|
||||
int retval;
|
||||
|
||||
mutex_lock(&mm->context.lock);
|
||||
down_read(&mm->context.ldt_usr_sem);
|
||||
|
||||
if (!mm->context.ldt) {
|
||||
retval = 0;
|
||||
@ -209,7 +209,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount)
|
||||
retval = bytecount;
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&mm->context.lock);
|
||||
up_read(&mm->context.ldt_usr_sem);
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -269,7 +269,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
|
||||
ldt.avl = 0;
|
||||
}
|
||||
|
||||
mutex_lock(&mm->context.lock);
|
||||
if (down_write_killable(&mm->context.ldt_usr_sem))
|
||||
return -EINTR;
|
||||
|
||||
old_ldt = mm->context.ldt;
|
||||
old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
|
||||
@ -291,7 +292,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
|
||||
error = 0;
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&mm->context.lock);
|
||||
up_write(&mm->context.ldt_usr_sem);
|
||||
out:
|
||||
return error;
|
||||
}
|
||||
|
@ -932,12 +932,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
|
||||
initial_code = (unsigned long)start_secondary;
|
||||
initial_stack = idle->thread.sp;
|
||||
|
||||
/*
|
||||
* Enable the espfix hack for this CPU
|
||||
*/
|
||||
#ifdef CONFIG_X86_ESPFIX64
|
||||
/* Enable the espfix hack for this CPU */
|
||||
init_espfix_ap(cpu);
|
||||
#endif
|
||||
|
||||
/* So we see what's up */
|
||||
announce_cpu(cpu, apicid);
|
||||
|
@ -51,6 +51,7 @@
|
||||
#include <asm/traps.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/fpu/internal.h>
|
||||
#include <asm/cpu_entry_area.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/mach_traps.h>
|
||||
@ -951,8 +952,9 @@ void __init trap_init(void)
|
||||
* "sidt" instruction will not leak the location of the kernel, and
|
||||
* to defend the IDT against arbitrary memory write vulnerabilities.
|
||||
* It will be reloaded in cpu_init() */
|
||||
__set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO);
|
||||
idt_descr.address = fix_to_virt(FIX_RO_IDT);
|
||||
cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table),
|
||||
PAGE_KERNEL_RO);
|
||||
idt_descr.address = CPU_ENTRY_AREA_RO_IDT;
|
||||
|
||||
/*
|
||||
* Should be a barrier for any external CPU state:
|
||||
|
@ -10,7 +10,7 @@ CFLAGS_REMOVE_mem_encrypt.o = -pg
|
||||
endif
|
||||
|
||||
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
|
||||
pat.o pgtable.o physaddr.o setup_nx.o tlb.o
|
||||
pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
|
||||
|
||||
# Make sure __phys_addr has no stackprotector
|
||||
nostackp := $(call cc-option, -fno-stack-protector)
|
||||
|
139
arch/x86/mm/cpu_entry_area.c
Normal file
139
arch/x86/mm/cpu_entry_area.c
Normal file
@ -0,0 +1,139 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/percpu.h>
|
||||
|
||||
#include <asm/cpu_entry_area.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/desc.h>
|
||||
|
||||
static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
|
||||
[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
|
||||
#endif
|
||||
|
||||
struct cpu_entry_area *get_cpu_entry_area(int cpu)
|
||||
{
|
||||
unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
|
||||
BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
|
||||
|
||||
return (struct cpu_entry_area *) va;
|
||||
}
|
||||
EXPORT_SYMBOL(get_cpu_entry_area);
|
||||
|
||||
void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
|
||||
{
|
||||
unsigned long va = (unsigned long) cea_vaddr;
|
||||
|
||||
set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags));
|
||||
}
|
||||
|
||||
static void __init
|
||||
cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
|
||||
{
|
||||
for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE)
|
||||
cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
|
||||
}
|
||||
|
||||
/* Setup the fixmap mappings only once per-processor */
|
||||
static void __init setup_cpu_entry_area(int cpu)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
extern char _entry_trampoline[];
|
||||
|
||||
/* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
|
||||
pgprot_t gdt_prot = PAGE_KERNEL_RO;
|
||||
pgprot_t tss_prot = PAGE_KERNEL_RO;
|
||||
#else
|
||||
/*
|
||||
* On native 32-bit systems, the GDT cannot be read-only because
|
||||
* our double fault handler uses a task gate, and entering through
|
||||
* a task gate needs to change an available TSS to busy. If the
|
||||
* GDT is read-only, that will triple fault. The TSS cannot be
|
||||
* read-only because the CPU writes to it on task switches.
|
||||
*
|
||||
* On Xen PV, the GDT must be read-only because the hypervisor
|
||||
* requires it.
|
||||
*/
|
||||
pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
|
||||
PAGE_KERNEL_RO : PAGE_KERNEL;
|
||||
pgprot_t tss_prot = PAGE_KERNEL;
|
||||
#endif
|
||||
|
||||
cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu),
|
||||
gdt_prot);
|
||||
|
||||
cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page,
|
||||
per_cpu_ptr(&entry_stack_storage, cpu), 1,
|
||||
PAGE_KERNEL);
|
||||
|
||||
/*
|
||||
* The Intel SDM says (Volume 3, 7.2.1):
|
||||
*
|
||||
* Avoid placing a page boundary in the part of the TSS that the
|
||||
* processor reads during a task switch (the first 104 bytes). The
|
||||
* processor may not correctly perform address translations if a
|
||||
* boundary occurs in this area. During a task switch, the processor
|
||||
* reads and writes into the first 104 bytes of each TSS (using
|
||||
* contiguous physical addresses beginning with the physical address
|
||||
* of the first byte of the TSS). So, after TSS access begins, if
|
||||
* part of the 104 bytes is not physically contiguous, the processor
|
||||
* will access incorrect information without generating a page-fault
|
||||
* exception.
|
||||
*
|
||||
* There are also a lot of errata involving the TSS spanning a page
|
||||
* boundary. Assert that we're not doing that.
|
||||
*/
|
||||
BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
|
||||
offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
|
||||
BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
|
||||
cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss,
|
||||
&per_cpu(cpu_tss_rw, cpu),
|
||||
sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
|
||||
BUILD_BUG_ON(sizeof(exception_stacks) !=
|
||||
sizeof(((struct cpu_entry_area *)0)->exception_stacks));
|
||||
cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks,
|
||||
&per_cpu(exception_stacks, cpu),
|
||||
sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
|
||||
|
||||
cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline,
|
||||
__pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
|
||||
#endif
|
||||
}
|
||||
|
||||
static __init void setup_cpu_entry_area_ptes(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
unsigned long start, end;
|
||||
|
||||
BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE);
|
||||
BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
|
||||
|
||||
start = CPU_ENTRY_AREA_BASE;
|
||||
end = start + CPU_ENTRY_AREA_MAP_SIZE;
|
||||
|
||||
/* Careful here: start + PMD_SIZE might wrap around */
|
||||
for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE)
|
||||
populate_extra_pte(start);
|
||||
#endif
|
||||
}
|
||||
|
||||
void __init setup_cpu_entry_areas(void)
|
||||
{
|
||||
unsigned int cpu;
|
||||
|
||||
setup_cpu_entry_area_ptes();
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
setup_cpu_entry_area(cpu);
|
||||
}
|
@ -44,10 +44,12 @@ struct addr_marker {
|
||||
unsigned long max_lines;
|
||||
};
|
||||
|
||||
/* indices for address_markers; keep sync'd w/ address_markers below */
|
||||
/* Address space markers hints */
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
enum address_markers_idx {
|
||||
USER_SPACE_NR = 0,
|
||||
#ifdef CONFIG_X86_64
|
||||
KERNEL_SPACE_NR,
|
||||
LOW_KERNEL_NR,
|
||||
VMALLOC_START_NR,
|
||||
@ -56,56 +58,74 @@ enum address_markers_idx {
|
||||
KASAN_SHADOW_START_NR,
|
||||
KASAN_SHADOW_END_NR,
|
||||
#endif
|
||||
# ifdef CONFIG_X86_ESPFIX64
|
||||
CPU_ENTRY_AREA_NR,
|
||||
#ifdef CONFIG_X86_ESPFIX64
|
||||
ESPFIX_START_NR,
|
||||
# endif
|
||||
#endif
|
||||
#ifdef CONFIG_EFI
|
||||
EFI_END_NR,
|
||||
#endif
|
||||
HIGH_KERNEL_NR,
|
||||
MODULES_VADDR_NR,
|
||||
MODULES_END_NR,
|
||||
#else
|
||||
FIXADDR_START_NR,
|
||||
END_OF_SPACE_NR,
|
||||
};
|
||||
|
||||
static struct addr_marker address_markers[] = {
|
||||
[USER_SPACE_NR] = { 0, "User Space" },
|
||||
[KERNEL_SPACE_NR] = { (1UL << 63), "Kernel Space" },
|
||||
[LOW_KERNEL_NR] = { 0UL, "Low Kernel Mapping" },
|
||||
[VMALLOC_START_NR] = { 0UL, "vmalloc() Area" },
|
||||
[VMEMMAP_START_NR] = { 0UL, "Vmemmap" },
|
||||
#ifdef CONFIG_KASAN
|
||||
[KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
|
||||
[KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" },
|
||||
#endif
|
||||
[CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
|
||||
#ifdef CONFIG_X86_ESPFIX64
|
||||
[ESPFIX_START_NR] = { ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
|
||||
#endif
|
||||
#ifdef CONFIG_EFI
|
||||
[EFI_END_NR] = { EFI_VA_END, "EFI Runtime Services" },
|
||||
#endif
|
||||
[HIGH_KERNEL_NR] = { __START_KERNEL_map, "High Kernel Mapping" },
|
||||
[MODULES_VADDR_NR] = { MODULES_VADDR, "Modules" },
|
||||
[MODULES_END_NR] = { MODULES_END, "End Modules" },
|
||||
[FIXADDR_START_NR] = { FIXADDR_START, "Fixmap Area" },
|
||||
[END_OF_SPACE_NR] = { -1, NULL }
|
||||
};
|
||||
|
||||
#else /* CONFIG_X86_64 */
|
||||
|
||||
enum address_markers_idx {
|
||||
USER_SPACE_NR = 0,
|
||||
KERNEL_SPACE_NR,
|
||||
VMALLOC_START_NR,
|
||||
VMALLOC_END_NR,
|
||||
# ifdef CONFIG_HIGHMEM
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
PKMAP_BASE_NR,
|
||||
# endif
|
||||
FIXADDR_START_NR,
|
||||
#endif
|
||||
CPU_ENTRY_AREA_NR,
|
||||
FIXADDR_START_NR,
|
||||
END_OF_SPACE_NR,
|
||||
};
|
||||
|
||||
/* Address space markers hints */
|
||||
static struct addr_marker address_markers[] = {
|
||||
{ 0, "User Space" },
|
||||
#ifdef CONFIG_X86_64
|
||||
{ 0x8000000000000000UL, "Kernel Space" },
|
||||
{ 0/* PAGE_OFFSET */, "Low Kernel Mapping" },
|
||||
{ 0/* VMALLOC_START */, "vmalloc() Area" },
|
||||
{ 0/* VMEMMAP_START */, "Vmemmap" },
|
||||
#ifdef CONFIG_KASAN
|
||||
{ KASAN_SHADOW_START, "KASAN shadow" },
|
||||
{ KASAN_SHADOW_END, "KASAN shadow end" },
|
||||
[USER_SPACE_NR] = { 0, "User Space" },
|
||||
[KERNEL_SPACE_NR] = { PAGE_OFFSET, "Kernel Mapping" },
|
||||
[VMALLOC_START_NR] = { 0UL, "vmalloc() Area" },
|
||||
[VMALLOC_END_NR] = { 0UL, "vmalloc() End" },
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
[PKMAP_BASE_NR] = { 0UL, "Persistent kmap() Area" },
|
||||
#endif
|
||||
# ifdef CONFIG_X86_ESPFIX64
|
||||
{ ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
|
||||
# endif
|
||||
# ifdef CONFIG_EFI
|
||||
{ EFI_VA_END, "EFI Runtime Services" },
|
||||
# endif
|
||||
{ __START_KERNEL_map, "High Kernel Mapping" },
|
||||
{ MODULES_VADDR, "Modules" },
|
||||
{ MODULES_END, "End Modules" },
|
||||
#else
|
||||
{ PAGE_OFFSET, "Kernel Mapping" },
|
||||
{ 0/* VMALLOC_START */, "vmalloc() Area" },
|
||||
{ 0/*VMALLOC_END*/, "vmalloc() End" },
|
||||
# ifdef CONFIG_HIGHMEM
|
||||
{ 0/*PKMAP_BASE*/, "Persistent kmap() Area" },
|
||||
# endif
|
||||
{ 0/*FIXADDR_START*/, "Fixmap Area" },
|
||||
#endif
|
||||
{ -1, NULL } /* End of list */
|
||||
[CPU_ENTRY_AREA_NR] = { 0UL, "CPU entry area" },
|
||||
[FIXADDR_START_NR] = { 0UL, "Fixmap area" },
|
||||
[END_OF_SPACE_NR] = { -1, NULL }
|
||||
};
|
||||
|
||||
#endif /* !CONFIG_X86_64 */
|
||||
|
||||
/* Multipliers for offsets within the PTEs */
|
||||
#define PTE_LEVEL_MULT (PAGE_SIZE)
|
||||
#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
|
||||
@ -140,7 +160,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
|
||||
static const char * const level_name[] =
|
||||
{ "cr3", "pgd", "p4d", "pud", "pmd", "pte" };
|
||||
|
||||
if (!pgprot_val(prot)) {
|
||||
if (!(pr & _PAGE_PRESENT)) {
|
||||
/* Not present */
|
||||
pt_dump_cont_printf(m, dmsg, " ");
|
||||
} else {
|
||||
@ -525,8 +545,8 @@ static int __init pt_dump_init(void)
|
||||
address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
|
||||
# endif
|
||||
address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
|
||||
address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE;
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
__initcall(pt_dump_init);
|
||||
|
@ -50,6 +50,7 @@
|
||||
#include <asm/setup.h>
|
||||
#include <asm/set_memory.h>
|
||||
#include <asm/page_types.h>
|
||||
#include <asm/cpu_entry_area.h>
|
||||
#include <asm/init.h>
|
||||
|
||||
#include "mm_internal.h"
|
||||
@ -766,6 +767,7 @@ void __init mem_init(void)
|
||||
mem_init_print_info(NULL);
|
||||
printk(KERN_INFO "virtual kernel memory layout:\n"
|
||||
" fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
|
||||
" cpu_entry : 0x%08lx - 0x%08lx (%4ld kB)\n"
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
" pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
|
||||
#endif
|
||||
@ -777,6 +779,10 @@ void __init mem_init(void)
|
||||
FIXADDR_START, FIXADDR_TOP,
|
||||
(FIXADDR_TOP - FIXADDR_START) >> 10,
|
||||
|
||||
CPU_ENTRY_AREA_BASE,
|
||||
CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE,
|
||||
CPU_ENTRY_AREA_MAP_SIZE >> 10,
|
||||
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
|
||||
(LAST_PKMAP*PAGE_SIZE) >> 10,
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/cpu_entry_area.h>
|
||||
|
||||
extern struct range pfn_mapped[E820_MAX_ENTRIES];
|
||||
|
||||
@ -322,31 +323,33 @@ void __init kasan_init(void)
|
||||
map_range(&pfn_mapped[i]);
|
||||
}
|
||||
|
||||
shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE;
|
||||
shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
|
||||
shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
|
||||
PAGE_SIZE);
|
||||
|
||||
shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE +
|
||||
CPU_ENTRY_AREA_MAP_SIZE);
|
||||
shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
|
||||
shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
|
||||
PAGE_SIZE);
|
||||
|
||||
kasan_populate_zero_shadow(
|
||||
kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
|
||||
kasan_mem_to_shadow((void *)__START_KERNEL_map));
|
||||
shadow_cpu_entry_begin);
|
||||
|
||||
kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
|
||||
(unsigned long)shadow_cpu_entry_end, 0);
|
||||
|
||||
kasan_populate_zero_shadow(shadow_cpu_entry_end,
|
||||
kasan_mem_to_shadow((void *)__START_KERNEL_map));
|
||||
|
||||
kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
|
||||
(unsigned long)kasan_mem_to_shadow(_end),
|
||||
early_pfn_to_nid(__pa(_stext)));
|
||||
|
||||
shadow_cpu_entry_begin = (void *)__fix_to_virt(FIX_CPU_ENTRY_AREA_BOTTOM);
|
||||
shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
|
||||
shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
|
||||
PAGE_SIZE);
|
||||
|
||||
shadow_cpu_entry_end = (void *)(__fix_to_virt(FIX_CPU_ENTRY_AREA_TOP) + PAGE_SIZE);
|
||||
shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
|
||||
shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
|
||||
PAGE_SIZE);
|
||||
|
||||
kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
|
||||
shadow_cpu_entry_begin);
|
||||
|
||||
kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
|
||||
(unsigned long)shadow_cpu_entry_end, 0);
|
||||
|
||||
kasan_populate_zero_shadow(shadow_cpu_entry_end, (void *)KASAN_SHADOW_END);
|
||||
(void *)KASAN_SHADOW_END);
|
||||
|
||||
load_cr3(init_top_pgt);
|
||||
__flush_tlb_all();
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/spinlock.h>
|
||||
|
||||
#include <asm/cpu_entry_area.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/fixmap.h>
|
||||
|
@ -128,7 +128,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
* isn't free.
|
||||
*/
|
||||
#ifdef CONFIG_DEBUG_VM
|
||||
if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
|
||||
if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
|
||||
/*
|
||||
* If we were to BUG here, we'd be very likely to kill
|
||||
* the system so hard that we don't see the call trace.
|
||||
@ -195,7 +195,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
if (need_flush) {
|
||||
this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
|
||||
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
|
||||
write_cr3(build_cr3(next, new_asid));
|
||||
write_cr3(build_cr3(next->pgd, new_asid));
|
||||
|
||||
/*
|
||||
* NB: This gets called via leave_mm() in the idle path
|
||||
@ -208,7 +208,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
|
||||
} else {
|
||||
/* The new ASID is already up to date. */
|
||||
write_cr3(build_cr3_noflush(next, new_asid));
|
||||
write_cr3(build_cr3_noflush(next->pgd, new_asid));
|
||||
|
||||
/* See above wrt _rcuidle. */
|
||||
trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
|
||||
@ -288,7 +288,7 @@ void initialize_tlbstate_and_flush(void)
|
||||
!(cr4_read_shadow() & X86_CR4_PCIDE));
|
||||
|
||||
/* Force ASID 0 and force a TLB flush. */
|
||||
write_cr3(build_cr3(mm, 0));
|
||||
write_cr3(build_cr3(mm->pgd, 0));
|
||||
|
||||
/* Reinitialize tlbstate. */
|
||||
this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
|
||||
@ -551,7 +551,7 @@ static void do_kernel_range_flush(void *info)
|
||||
|
||||
/* flush range by one by one 'invlpg' */
|
||||
for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
|
||||
__flush_tlb_single(addr);
|
||||
__flush_tlb_one(addr);
|
||||
}
|
||||
|
||||
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
|
||||
|
@ -299,7 +299,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
|
||||
local_flush_tlb();
|
||||
stat->d_alltlb++;
|
||||
} else {
|
||||
__flush_tlb_one(msg->address);
|
||||
__flush_tlb_single(msg->address);
|
||||
stat->d_onetlb++;
|
||||
}
|
||||
stat->d_requestee++;
|
||||
|
@ -2273,7 +2273,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
|
||||
|
||||
switch (idx) {
|
||||
case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
|
||||
case FIX_RO_IDT:
|
||||
#ifdef CONFIG_X86_32
|
||||
case FIX_WP_TEST:
|
||||
# ifdef CONFIG_HIGHMEM
|
||||
@ -2284,7 +2283,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
|
||||
#endif
|
||||
case FIX_TEXT_POKE0:
|
||||
case FIX_TEXT_POKE1:
|
||||
case FIX_CPU_ENTRY_AREA_TOP ... FIX_CPU_ENTRY_AREA_BOTTOM:
|
||||
/* All local page mappings */
|
||||
pte = pfn_pte(phys, prot);
|
||||
break;
|
||||
|
@ -7,9 +7,10 @@
|
||||
#ifndef _ASM_GENERIC_MM_HOOKS_H
|
||||
#define _ASM_GENERIC_MM_HOOKS_H
|
||||
|
||||
static inline void arch_dup_mmap(struct mm_struct *oldmm,
|
||||
struct mm_struct *mm)
|
||||
static inline int arch_dup_mmap(struct mm_struct *oldmm,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void arch_exit_mmap(struct mm_struct *mm)
|
||||
|
@ -1025,6 +1025,11 @@ static inline int pmd_clear_huge(pmd_t *pmd)
|
||||
struct file;
|
||||
int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
|
||||
unsigned long size, pgprot_t *vma_prot);
|
||||
|
||||
#ifndef CONFIG_X86_ESPFIX64
|
||||
static inline void init_espfix_bsp(void) { }
|
||||
#endif
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
||||
#ifndef io_remap_pfn_range
|
||||
|
@ -504,6 +504,8 @@ static void __init mm_init(void)
|
||||
pgtable_init();
|
||||
vmalloc_init();
|
||||
ioremap_huge_init();
|
||||
/* Should be run before the first non-init thread is created */
|
||||
init_espfix_bsp();
|
||||
}
|
||||
|
||||
asmlinkage __visible void __init start_kernel(void)
|
||||
@ -678,10 +680,6 @@ asmlinkage __visible void __init start_kernel(void)
|
||||
#ifdef CONFIG_X86
|
||||
if (efi_enabled(EFI_RUNTIME_SERVICES))
|
||||
efi_enter_virtual_mode();
|
||||
#endif
|
||||
#ifdef CONFIG_X86_ESPFIX64
|
||||
/* Should be run before the first non-init thread is created */
|
||||
init_espfix_bsp();
|
||||
#endif
|
||||
thread_stack_cache_init();
|
||||
cred_init();
|
||||
|
@ -721,8 +721,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
|
||||
goto out;
|
||||
}
|
||||
/* a new mm has just been created */
|
||||
arch_dup_mmap(oldmm, mm);
|
||||
retval = 0;
|
||||
retval = arch_dup_mmap(oldmm, mm);
|
||||
out:
|
||||
up_write(&mm->mmap_sem);
|
||||
flush_tlb_mm(oldmm);
|
||||
|
@ -627,13 +627,10 @@ static void do_multicpu_tests(void)
|
||||
static int finish_exec_test(void)
|
||||
{
|
||||
/*
|
||||
* In a sensible world, this would be check_invalid_segment(0, 1);
|
||||
* For better or for worse, though, the LDT is inherited across exec.
|
||||
* We can probably change this safely, but for now we test it.
|
||||
* Older kernel versions did inherit the LDT on exec() which is
|
||||
* wrong because exec() starts from a clean state.
|
||||
*/
|
||||
check_valid_segment(0, 1,
|
||||
AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB,
|
||||
42, true);
|
||||
check_invalid_segment(0, 1);
|
||||
|
||||
return nerrs ? 1 : 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user