Merge branch 'x86/entry' into ras/core
to fixup conflicts in arch/x86/kernel/cpu/mce/core.c so MCE specific follow up patches can be applied without creating a horrible merge conflict afterwards.
This commit is contained in:
@@ -90,7 +90,6 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o
|
||||
obj-y += apic/
|
||||
obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
|
||||
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
|
||||
obj-$(CONFIG_LIVEPATCH) += livepatch.o
|
||||
obj-$(CONFIG_FUNCTION_TRACER) += ftrace_$(BITS).o
|
||||
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
|
||||
obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
|
||||
@@ -102,9 +101,7 @@ obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o
|
||||
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
|
||||
obj-y += kprobes/
|
||||
obj-$(CONFIG_MODULES) += module.o
|
||||
ifeq ($(CONFIG_X86_32),y)
|
||||
obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
|
||||
endif
|
||||
obj-$(CONFIG_X86_32) += doublefault_32.o
|
||||
obj-$(CONFIG_KGDB) += kgdb.o
|
||||
obj-$(CONFIG_VM86) += vm86_32.o
|
||||
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
|
||||
|
||||
@@ -20,11 +20,11 @@
|
||||
#include <linux/pci.h>
|
||||
#include <linux/efi-bgrt.h>
|
||||
#include <linux/serial_core.h>
|
||||
#include <linux/pgtable.h>
|
||||
|
||||
#include <asm/e820/api.h>
|
||||
#include <asm/irqdomain.h>
|
||||
#include <asm/pci_x86.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/io_apic.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/io.h>
|
||||
|
||||
@@ -10,9 +10,9 @@
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/dmi.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/pgtable.h>
|
||||
#include <asm/segment.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/realmode.h>
|
||||
|
||||
|
||||
@@ -18,7 +18,6 @@
|
||||
#include <asm/text-patching.h>
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/cacheflush.h>
|
||||
@@ -783,6 +782,61 @@ void __init_or_module text_poke_early(void *addr, const void *opcode,
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
struct mm_struct *mm;
|
||||
} temp_mm_state_t;
|
||||
|
||||
/*
|
||||
* Using a temporary mm allows to set temporary mappings that are not accessible
|
||||
* by other CPUs. Such mappings are needed to perform sensitive memory writes
|
||||
* that override the kernel memory protections (e.g., W^X), without exposing the
|
||||
* temporary page-table mappings that are required for these write operations to
|
||||
* other CPUs. Using a temporary mm also allows to avoid TLB shootdowns when the
|
||||
* mapping is torn down.
|
||||
*
|
||||
* Context: The temporary mm needs to be used exclusively by a single core. To
|
||||
* harden security IRQs must be disabled while the temporary mm is
|
||||
* loaded, thereby preventing interrupt handler bugs from overriding
|
||||
* the kernel memory protection.
|
||||
*/
|
||||
static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm)
|
||||
{
|
||||
temp_mm_state_t temp_state;
|
||||
|
||||
lockdep_assert_irqs_disabled();
|
||||
temp_state.mm = this_cpu_read(cpu_tlbstate.loaded_mm);
|
||||
switch_mm_irqs_off(NULL, mm, current);
|
||||
|
||||
/*
|
||||
* If breakpoints are enabled, disable them while the temporary mm is
|
||||
* used. Userspace might set up watchpoints on addresses that are used
|
||||
* in the temporary mm, which would lead to wrong signals being sent or
|
||||
* crashes.
|
||||
*
|
||||
* Note that breakpoints are not disabled selectively, which also causes
|
||||
* kernel breakpoints (e.g., perf's) to be disabled. This might be
|
||||
* undesirable, but still seems reasonable as the code that runs in the
|
||||
* temporary mm should be short.
|
||||
*/
|
||||
if (hw_breakpoint_active())
|
||||
hw_breakpoint_disable();
|
||||
|
||||
return temp_state;
|
||||
}
|
||||
|
||||
static inline void unuse_temporary_mm(temp_mm_state_t prev_state)
|
||||
{
|
||||
lockdep_assert_irqs_disabled();
|
||||
switch_mm_irqs_off(NULL, prev_state.mm, current);
|
||||
|
||||
/*
|
||||
* Restore the breakpoints if they were disabled before the temporary mm
|
||||
* was loaded.
|
||||
*/
|
||||
if (hw_breakpoint_active())
|
||||
hw_breakpoint_restore();
|
||||
}
|
||||
|
||||
__ro_after_init struct mm_struct *poking_mm;
|
||||
__ro_after_init unsigned long poking_addr;
|
||||
|
||||
@@ -957,28 +1011,29 @@ struct bp_patching_desc {
|
||||
|
||||
static struct bp_patching_desc *bp_desc;
|
||||
|
||||
static inline struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp)
|
||||
static __always_inline
|
||||
struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp)
|
||||
{
|
||||
struct bp_patching_desc *desc = READ_ONCE(*descp); /* rcu_dereference */
|
||||
struct bp_patching_desc *desc = __READ_ONCE(*descp); /* rcu_dereference */
|
||||
|
||||
if (!desc || !atomic_inc_not_zero(&desc->refs))
|
||||
if (!desc || !arch_atomic_inc_not_zero(&desc->refs))
|
||||
return NULL;
|
||||
|
||||
return desc;
|
||||
}
|
||||
|
||||
static inline void put_desc(struct bp_patching_desc *desc)
|
||||
static __always_inline void put_desc(struct bp_patching_desc *desc)
|
||||
{
|
||||
smp_mb__before_atomic();
|
||||
atomic_dec(&desc->refs);
|
||||
arch_atomic_dec(&desc->refs);
|
||||
}
|
||||
|
||||
static inline void *text_poke_addr(struct text_poke_loc *tp)
|
||||
static __always_inline void *text_poke_addr(struct text_poke_loc *tp)
|
||||
{
|
||||
return _stext + tp->rel_addr;
|
||||
}
|
||||
|
||||
static int notrace patch_cmp(const void *key, const void *elt)
|
||||
static __always_inline int patch_cmp(const void *key, const void *elt)
|
||||
{
|
||||
struct text_poke_loc *tp = (struct text_poke_loc *) elt;
|
||||
|
||||
@@ -988,9 +1043,8 @@ static int notrace patch_cmp(const void *key, const void *elt)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
NOKPROBE_SYMBOL(patch_cmp);
|
||||
|
||||
int notrace poke_int3_handler(struct pt_regs *regs)
|
||||
int noinstr poke_int3_handler(struct pt_regs *regs)
|
||||
{
|
||||
struct bp_patching_desc *desc;
|
||||
struct text_poke_loc *tp;
|
||||
@@ -1023,9 +1077,9 @@ int notrace poke_int3_handler(struct pt_regs *regs)
|
||||
* Skip the binary search if there is a single member in the vector.
|
||||
*/
|
||||
if (unlikely(desc->nr_entries > 1)) {
|
||||
tp = bsearch(ip, desc->vec, desc->nr_entries,
|
||||
sizeof(struct text_poke_loc),
|
||||
patch_cmp);
|
||||
tp = __inline_bsearch(ip, desc->vec, desc->nr_entries,
|
||||
sizeof(struct text_poke_loc),
|
||||
patch_cmp);
|
||||
if (!tp)
|
||||
goto out_put;
|
||||
} else {
|
||||
@@ -1064,7 +1118,6 @@ out_put:
|
||||
put_desc(desc);
|
||||
return ret;
|
||||
}
|
||||
NOKPROBE_SYMBOL(poke_int3_handler);
|
||||
|
||||
#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc))
|
||||
static struct text_poke_loc tp_vec[TP_VEC_MAX];
|
||||
|
||||
@@ -33,7 +33,6 @@
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/dma-direct.h>
|
||||
#include <asm/mtrr.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/proto.h>
|
||||
#include <asm/iommu.h>
|
||||
#include <asm/gart.h>
|
||||
@@ -159,7 +158,7 @@ static void dump_leak(void)
|
||||
return;
|
||||
dump = 1;
|
||||
|
||||
show_stack(NULL, NULL);
|
||||
show_stack(NULL, NULL, KERN_ERR);
|
||||
debug_dma_dump_mappings(NULL);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -345,56 +345,3 @@ out_noapbt:
|
||||
apb_timer_block_enabled = 0;
|
||||
panic("failed to enable APB timer\n");
|
||||
}
|
||||
|
||||
/* called before apb_timer_enable, use early map */
|
||||
unsigned long apbt_quick_calibrate(void)
|
||||
{
|
||||
int i, scale;
|
||||
u64 old, new;
|
||||
u64 t1, t2;
|
||||
unsigned long khz = 0;
|
||||
u32 loop, shift;
|
||||
|
||||
apbt_set_mapping();
|
||||
dw_apb_clocksource_start(clocksource_apbt);
|
||||
|
||||
/* check if the timer can count down, otherwise return */
|
||||
old = dw_apb_clocksource_read(clocksource_apbt);
|
||||
i = 10000;
|
||||
while (--i) {
|
||||
if (old != dw_apb_clocksource_read(clocksource_apbt))
|
||||
break;
|
||||
}
|
||||
if (!i)
|
||||
goto failed;
|
||||
|
||||
/* count 16 ms */
|
||||
loop = (apbt_freq / 1000) << 4;
|
||||
|
||||
/* restart the timer to ensure it won't get to 0 in the calibration */
|
||||
dw_apb_clocksource_start(clocksource_apbt);
|
||||
|
||||
old = dw_apb_clocksource_read(clocksource_apbt);
|
||||
old += loop;
|
||||
|
||||
t1 = rdtsc();
|
||||
|
||||
do {
|
||||
new = dw_apb_clocksource_read(clocksource_apbt);
|
||||
} while (new < old);
|
||||
|
||||
t2 = rdtsc();
|
||||
|
||||
shift = 5;
|
||||
if (unlikely(loop >> shift == 0)) {
|
||||
printk(KERN_INFO
|
||||
"APBT TSC calibration failed, not enough resolution\n");
|
||||
return 0;
|
||||
}
|
||||
scale = (int)div_u64((t2 - t1), loop >> shift);
|
||||
khz = (scale * (apbt_freq / 1000)) >> shift;
|
||||
printk(KERN_INFO "TSC freq calculated by APB timer is %lu khz\n", khz);
|
||||
return khz;
|
||||
failed:
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -352,8 +352,6 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
|
||||
* According to Intel, MFENCE can do the serialization here.
|
||||
*/
|
||||
asm volatile("mfence" : : : "memory");
|
||||
|
||||
printk_once(KERN_DEBUG "TSC deadline timer enabled\n");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -546,46 +544,20 @@ static struct clock_event_device lapic_clockevent = {
|
||||
};
|
||||
static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
|
||||
|
||||
static u32 hsx_deadline_rev(void)
|
||||
{
|
||||
switch (boot_cpu_data.x86_stepping) {
|
||||
case 0x02: return 0x3a; /* EP */
|
||||
case 0x04: return 0x0f; /* EX */
|
||||
}
|
||||
static const struct x86_cpu_id deadline_match[] __initconst = {
|
||||
X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(HASWELL_X, X86_STEPPINGS(0x2, 0x2), 0x3a), /* EP */
|
||||
X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(HASWELL_X, X86_STEPPINGS(0x4, 0x4), 0x0f), /* EX */
|
||||
|
||||
return ~0U;
|
||||
}
|
||||
|
||||
static u32 bdx_deadline_rev(void)
|
||||
{
|
||||
switch (boot_cpu_data.x86_stepping) {
|
||||
case 0x02: return 0x00000011;
|
||||
case 0x03: return 0x0700000e;
|
||||
case 0x04: return 0x0f00000c;
|
||||
case 0x05: return 0x0e000003;
|
||||
}
|
||||
|
||||
return ~0U;
|
||||
}
|
||||
|
||||
static u32 skx_deadline_rev(void)
|
||||
{
|
||||
switch (boot_cpu_data.x86_stepping) {
|
||||
case 0x03: return 0x01000136;
|
||||
case 0x04: return 0x02000014;
|
||||
}
|
||||
|
||||
if (boot_cpu_data.x86_stepping > 4)
|
||||
return 0;
|
||||
|
||||
return ~0U;
|
||||
}
|
||||
|
||||
static const struct x86_cpu_id deadline_match[] = {
|
||||
X86_MATCH_INTEL_FAM6_MODEL( HASWELL_X, &hsx_deadline_rev),
|
||||
X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X, 0x0b000020),
|
||||
X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_D, &bdx_deadline_rev),
|
||||
X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE_X, &skx_deadline_rev),
|
||||
|
||||
X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x2, 0x2), 0x00000011),
|
||||
X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x3), 0x0700000e),
|
||||
X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x4, 0x4), 0x0f00000c),
|
||||
X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x5, 0x5), 0x0e000003),
|
||||
|
||||
X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x3, 0x3), 0x01000136),
|
||||
X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x4, 0x4), 0x02000014),
|
||||
X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x5, 0xf), 0),
|
||||
|
||||
X86_MATCH_INTEL_FAM6_MODEL( HASWELL, 0x22),
|
||||
X86_MATCH_INTEL_FAM6_MODEL( HASWELL_L, 0x20),
|
||||
@@ -603,34 +575,29 @@ static const struct x86_cpu_id deadline_match[] = {
|
||||
{},
|
||||
};
|
||||
|
||||
static void apic_check_deadline_errata(void)
|
||||
static __init bool apic_validate_deadline_timer(void)
|
||||
{
|
||||
const struct x86_cpu_id *m;
|
||||
u32 rev;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) ||
|
||||
boot_cpu_has(X86_FEATURE_HYPERVISOR))
|
||||
return;
|
||||
if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
|
||||
return false;
|
||||
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
|
||||
return true;
|
||||
|
||||
m = x86_match_cpu(deadline_match);
|
||||
if (!m)
|
||||
return;
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Function pointers will have the MSB set due to address layout,
|
||||
* immediate revisions will not.
|
||||
*/
|
||||
if ((long)m->driver_data < 0)
|
||||
rev = ((u32 (*)(void))(m->driver_data))();
|
||||
else
|
||||
rev = (u32)m->driver_data;
|
||||
rev = (u32)m->driver_data;
|
||||
|
||||
if (boot_cpu_data.microcode >= rev)
|
||||
return;
|
||||
return true;
|
||||
|
||||
setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
|
||||
pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; "
|
||||
"please update microcode to version: 0x%x (or later)\n", rev);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1121,23 +1088,14 @@ static void local_apic_timer_interrupt(void)
|
||||
* [ if a single-CPU system runs an SMP kernel then we call the local
|
||||
* interrupt as well. Thus we cannot inline the local irq ... ]
|
||||
*/
|
||||
__visible void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
|
||||
DEFINE_IDTENTRY_SYSVEC(sysvec_apic_timer_interrupt)
|
||||
{
|
||||
struct pt_regs *old_regs = set_irq_regs(regs);
|
||||
|
||||
/*
|
||||
* NOTE! We'd better ACK the irq immediately,
|
||||
* because timer handling can be slow.
|
||||
*
|
||||
* update_process_times() expects us to have done irq_enter().
|
||||
* Besides, if we don't timer interrupts ignore the global
|
||||
* interrupt lock, which is the WrongThing (tm) to do.
|
||||
*/
|
||||
entering_ack_irq();
|
||||
ack_APIC_irq();
|
||||
trace_local_timer_entry(LOCAL_TIMER_VECTOR);
|
||||
local_apic_timer_interrupt();
|
||||
trace_local_timer_exit(LOCAL_TIMER_VECTOR);
|
||||
exiting_irq();
|
||||
|
||||
set_irq_regs(old_regs);
|
||||
}
|
||||
@@ -2092,7 +2050,8 @@ void __init init_apic_mappings(void)
|
||||
{
|
||||
unsigned int new_apicid;
|
||||
|
||||
apic_check_deadline_errata();
|
||||
if (apic_validate_deadline_timer())
|
||||
pr_debug("TSC deadline timer available\n");
|
||||
|
||||
if (x2apic_mode) {
|
||||
boot_cpu_physical_apicid = read_apic_id();
|
||||
@@ -2152,15 +2111,21 @@ void __init register_lapic_address(unsigned long address)
|
||||
* Local APIC interrupts
|
||||
*/
|
||||
|
||||
/*
|
||||
* This interrupt should _never_ happen with our APIC/SMP architecture
|
||||
/**
|
||||
* spurious_interrupt - Catch all for interrupts raised on unused vectors
|
||||
* @regs: Pointer to pt_regs on stack
|
||||
* @vector: The vector number
|
||||
*
|
||||
* This is invoked from ASM entry code to catch all interrupts which
|
||||
* trigger on an entry which is routed to the common_spurious idtentry
|
||||
* point.
|
||||
*
|
||||
* Also called from sysvec_spurious_apic_interrupt().
|
||||
*/
|
||||
__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs)
|
||||
DEFINE_IDTENTRY_IRQ(spurious_interrupt)
|
||||
{
|
||||
u8 vector = ~regs->orig_ax;
|
||||
u32 v;
|
||||
|
||||
entering_irq();
|
||||
trace_spurious_apic_entry(vector);
|
||||
|
||||
inc_irq_stat(irq_spurious_count);
|
||||
@@ -2190,13 +2155,17 @@ __visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs)
|
||||
}
|
||||
out:
|
||||
trace_spurious_apic_exit(vector);
|
||||
exiting_irq();
|
||||
}
|
||||
|
||||
DEFINE_IDTENTRY_SYSVEC(sysvec_spurious_apic_interrupt)
|
||||
{
|
||||
__spurious_interrupt(regs, SPURIOUS_APIC_VECTOR);
|
||||
}
|
||||
|
||||
/*
|
||||
* This interrupt should never happen with our APIC/SMP architecture
|
||||
*/
|
||||
__visible void __irq_entry smp_error_interrupt(struct pt_regs *regs)
|
||||
DEFINE_IDTENTRY_SYSVEC(sysvec_error_interrupt)
|
||||
{
|
||||
static const char * const error_interrupt_reason[] = {
|
||||
"Send CS error", /* APIC Error Bit 0 */
|
||||
@@ -2210,7 +2179,6 @@ __visible void __irq_entry smp_error_interrupt(struct pt_regs *regs)
|
||||
};
|
||||
u32 v, i = 0;
|
||||
|
||||
entering_irq();
|
||||
trace_error_apic_entry(ERROR_APIC_VECTOR);
|
||||
|
||||
/* First tickle the hardware, only then report what went on. -- REW */
|
||||
@@ -2234,7 +2202,6 @@ __visible void __irq_entry smp_error_interrupt(struct pt_regs *regs)
|
||||
apic_printk(APIC_DEBUG, KERN_CONT "\n");
|
||||
|
||||
trace_error_apic_exit(ERROR_APIC_VECTOR);
|
||||
exiting_irq();
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -12,11 +12,11 @@
|
||||
*/
|
||||
#include <linux/types.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/pgtable.h>
|
||||
|
||||
#include <asm/numachip/numachip.h>
|
||||
#include <asm/numachip/numachip_csr.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
|
||||
#include "local.h"
|
||||
|
||||
|
||||
@@ -154,19 +154,6 @@ static inline bool mp_is_legacy_irq(int irq)
|
||||
return irq >= 0 && irq < nr_legacy_irqs();
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize all legacy IRQs and all pins on the first IOAPIC
|
||||
* if we have legacy interrupt controller. Kernel boot option "pirq="
|
||||
* may rely on non-legacy pins on the first IOAPIC.
|
||||
*/
|
||||
static inline int mp_init_irq_at_boot(int ioapic, int irq)
|
||||
{
|
||||
if (!nr_legacy_irqs())
|
||||
return 0;
|
||||
|
||||
return ioapic == 0 || mp_is_legacy_irq(irq);
|
||||
}
|
||||
|
||||
static inline struct irq_domain *mp_ioapic_irqdomain(int ioapic)
|
||||
{
|
||||
return ioapics[ioapic].irqdomain;
|
||||
|
||||
@@ -115,7 +115,8 @@ msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force)
|
||||
* denote it as spurious which is no harm as this is a rare event
|
||||
* and interrupt handlers have to cope with spurious interrupts
|
||||
* anyway. If the vector is unused, then it is marked so it won't
|
||||
* trigger the 'No irq handler for vector' warning in do_IRQ().
|
||||
* trigger the 'No irq handler for vector' warning in
|
||||
* common_interrupt().
|
||||
*
|
||||
* This requires to hold vector lock to prevent concurrent updates to
|
||||
* the affected vector.
|
||||
|
||||
@@ -861,13 +861,13 @@ static void free_moved_vector(struct apic_chip_data *apicd)
|
||||
apicd->move_in_progress = 0;
|
||||
}
|
||||
|
||||
asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
|
||||
DEFINE_IDTENTRY_SYSVEC(sysvec_irq_move_cleanup)
|
||||
{
|
||||
struct hlist_head *clhead = this_cpu_ptr(&cleanup_list);
|
||||
struct apic_chip_data *apicd;
|
||||
struct hlist_node *tmp;
|
||||
|
||||
entering_ack_irq();
|
||||
ack_APIC_irq();
|
||||
/* Prevent vectors vanishing under us */
|
||||
raw_spin_lock(&vector_lock);
|
||||
|
||||
@@ -892,7 +892,6 @@ asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
|
||||
}
|
||||
|
||||
raw_spin_unlock(&vector_lock);
|
||||
exiting_irq();
|
||||
}
|
||||
|
||||
static void __send_cleanup_vector(struct apic_chip_data *apicd)
|
||||
|
||||
@@ -30,8 +30,6 @@ static enum uv_system_type uv_system_type;
|
||||
static int uv_hubbed_system;
|
||||
static int uv_hubless_system;
|
||||
static u64 gru_start_paddr, gru_end_paddr;
|
||||
static u64 gru_dist_base, gru_first_node_paddr = -1LL, gru_last_node_paddr;
|
||||
static u64 gru_dist_lmask, gru_dist_umask;
|
||||
static union uvh_apicid uvh_apicid;
|
||||
|
||||
/* Unpack OEM/TABLE ID's to be NULL terminated strings */
|
||||
@@ -48,11 +46,9 @@ static struct {
|
||||
unsigned int gnode_shift;
|
||||
} uv_cpuid;
|
||||
|
||||
int uv_min_hub_revision_id;
|
||||
EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
|
||||
static int uv_min_hub_revision_id;
|
||||
|
||||
unsigned int uv_apicid_hibits;
|
||||
EXPORT_SYMBOL_GPL(uv_apicid_hibits);
|
||||
|
||||
static struct apic apic_x2apic_uv_x;
|
||||
static struct uv_hub_info_s uv_hub_info_node0;
|
||||
@@ -85,20 +81,7 @@ static unsigned long __init uv_early_read_mmr(unsigned long addr)
|
||||
|
||||
static inline bool is_GRU_range(u64 start, u64 end)
|
||||
{
|
||||
if (gru_dist_base) {
|
||||
u64 su = start & gru_dist_umask; /* Upper (incl pnode) bits */
|
||||
u64 sl = start & gru_dist_lmask; /* Base offset bits */
|
||||
u64 eu = end & gru_dist_umask;
|
||||
u64 el = end & gru_dist_lmask;
|
||||
|
||||
/* Must reside completely within a single GRU range: */
|
||||
return (sl == gru_dist_base && el == gru_dist_base &&
|
||||
su >= gru_first_node_paddr &&
|
||||
su <= gru_last_node_paddr &&
|
||||
eu == su);
|
||||
} else {
|
||||
return start >= gru_start_paddr && end <= gru_end_paddr;
|
||||
}
|
||||
return start >= gru_start_paddr && end <= gru_end_paddr;
|
||||
}
|
||||
|
||||
static bool uv_is_untracked_pat_range(u64 start, u64 end)
|
||||
@@ -385,11 +368,10 @@ int is_uv_hubbed(int uvtype)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(is_uv_hubbed);
|
||||
|
||||
int is_uv_hubless(int uvtype)
|
||||
static int is_uv_hubless(int uvtype)
|
||||
{
|
||||
return (uv_hubless_system & uvtype);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(is_uv_hubless);
|
||||
|
||||
void **__uv_hub_info_list;
|
||||
EXPORT_SYMBOL_GPL(__uv_hub_info_list);
|
||||
@@ -417,12 +399,6 @@ static __initdata struct uv_gam_range_s *_gr_table;
|
||||
|
||||
#define SOCK_EMPTY ((unsigned short)~0)
|
||||
|
||||
extern int uv_hub_info_version(void)
|
||||
{
|
||||
return UV_HUB_INFO_VERSION;
|
||||
}
|
||||
EXPORT_SYMBOL(uv_hub_info_version);
|
||||
|
||||
/* Default UV memory block size is 2GB */
|
||||
static unsigned long mem_block_size __initdata = (2UL << 30);
|
||||
|
||||
@@ -590,12 +566,21 @@ static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
|
||||
|
||||
static void uv_send_IPI_one(int cpu, int vector)
|
||||
{
|
||||
unsigned long apicid;
|
||||
int pnode;
|
||||
unsigned long apicid = per_cpu(x86_cpu_to_apicid, cpu);
|
||||
int pnode = uv_apicid_to_pnode(apicid);
|
||||
unsigned long dmode, val;
|
||||
|
||||
apicid = per_cpu(x86_cpu_to_apicid, cpu);
|
||||
pnode = uv_apicid_to_pnode(apicid);
|
||||
uv_hub_send_ipi(pnode, apicid, vector);
|
||||
if (vector == NMI_VECTOR)
|
||||
dmode = dest_NMI;
|
||||
else
|
||||
dmode = dest_Fixed;
|
||||
|
||||
val = (1UL << UVH_IPI_INT_SEND_SHFT) |
|
||||
((apicid | uv_apicid_hibits) << UVH_IPI_INT_APIC_ID_SHFT) |
|
||||
(dmode << UVH_IPI_INT_DELIVERY_MODE_SHFT) |
|
||||
(vector << UVH_IPI_INT_VECTOR_SHFT);
|
||||
|
||||
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
|
||||
}
|
||||
|
||||
static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
|
||||
@@ -797,42 +782,6 @@ static __init void map_high(char *id, unsigned long base, int pshift, int bshift
|
||||
init_extra_mapping_wb(paddr, bytes);
|
||||
}
|
||||
|
||||
static __init void map_gru_distributed(unsigned long c)
|
||||
{
|
||||
union uvh_rh_gam_gru_overlay_config_mmr_u gru;
|
||||
u64 paddr;
|
||||
unsigned long bytes;
|
||||
int nid;
|
||||
|
||||
gru.v = c;
|
||||
|
||||
/* Only base bits 42:28 relevant in dist mode */
|
||||
gru_dist_base = gru.v & 0x000007fff0000000UL;
|
||||
if (!gru_dist_base) {
|
||||
pr_info("UV: Map GRU_DIST base address NULL\n");
|
||||
return;
|
||||
}
|
||||
|
||||
bytes = 1UL << UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
|
||||
gru_dist_lmask = ((1UL << uv_hub_info->m_val) - 1) & ~(bytes - 1);
|
||||
gru_dist_umask = ~((1UL << uv_hub_info->m_val) - 1);
|
||||
gru_dist_base &= gru_dist_lmask; /* Clear bits above M */
|
||||
|
||||
for_each_online_node(nid) {
|
||||
paddr = ((u64)uv_node_to_pnode(nid) << uv_hub_info->m_val) |
|
||||
gru_dist_base;
|
||||
init_extra_mapping_wb(paddr, bytes);
|
||||
gru_first_node_paddr = min(paddr, gru_first_node_paddr);
|
||||
gru_last_node_paddr = max(paddr, gru_last_node_paddr);
|
||||
}
|
||||
|
||||
/* Save upper (63:M) bits of address only for is_GRU_range */
|
||||
gru_first_node_paddr &= gru_dist_umask;
|
||||
gru_last_node_paddr &= gru_dist_umask;
|
||||
|
||||
pr_debug("UV: Map GRU_DIST base 0x%016llx 0x%016llx - 0x%016llx\n", gru_dist_base, gru_first_node_paddr, gru_last_node_paddr);
|
||||
}
|
||||
|
||||
static __init void map_gru_high(int max_pnode)
|
||||
{
|
||||
union uvh_rh_gam_gru_overlay_config_mmr_u gru;
|
||||
@@ -846,12 +795,6 @@ static __init void map_gru_high(int max_pnode)
|
||||
return;
|
||||
}
|
||||
|
||||
/* Only UV3 has distributed GRU mode */
|
||||
if (is_uv3_hub() && gru.s3.mode) {
|
||||
map_gru_distributed(gru.v);
|
||||
return;
|
||||
}
|
||||
|
||||
base = (gru.v & mask) >> shift;
|
||||
map_high("GRU", base, shift, shift, max_pnode, map_wb);
|
||||
gru_start_paddr = ((u64)base << shift);
|
||||
|
||||
@@ -57,9 +57,6 @@ int main(void)
|
||||
BLANK();
|
||||
#undef ENTRY
|
||||
|
||||
OFFSET(TSS_ist, tss_struct, x86_tss.ist);
|
||||
DEFINE(DB_STACK_OFFSET, offsetof(struct cea_exception_stacks, DB_stack) -
|
||||
offsetof(struct cea_exception_stacks, DB1_stack));
|
||||
BLANK();
|
||||
|
||||
#ifdef CONFIG_STACKPROTECTOR
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/audit.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <asm/audit.h>
|
||||
|
||||
static unsigned dir_class[] = {
|
||||
#include <asm-generic/audit_dir_write.h>
|
||||
@@ -41,7 +42,6 @@ int audit_classify_arch(int arch)
|
||||
int audit_classify_syscall(int abi, unsigned syscall)
|
||||
{
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
extern int ia32_classify_syscall(unsigned);
|
||||
if (abi == AUDIT_ARCH_I386)
|
||||
return ia32_classify_syscall(syscall);
|
||||
#endif
|
||||
|
||||
@@ -10,10 +10,10 @@
|
||||
*/
|
||||
|
||||
#include <linux/interrupt.h>
|
||||
#include <asm/acrn.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/hypervisor.h>
|
||||
#include <asm/idtentry.h>
|
||||
#include <asm/irq_regs.h>
|
||||
|
||||
static uint32_t __init acrn_detect(void)
|
||||
@@ -24,7 +24,7 @@ static uint32_t __init acrn_detect(void)
|
||||
static void __init acrn_init_platform(void)
|
||||
{
|
||||
/* Setup the IDT for ACRN hypervisor callback */
|
||||
alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, acrn_hv_callback_vector);
|
||||
alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_acrn_hv_callback);
|
||||
}
|
||||
|
||||
static bool acrn_x2apic_available(void)
|
||||
@@ -39,7 +39,7 @@ static bool acrn_x2apic_available(void)
|
||||
|
||||
static void (*acrn_intr_handler)(void);
|
||||
|
||||
__visible void __irq_entry acrn_hv_vector_handler(struct pt_regs *regs)
|
||||
DEFINE_IDTENTRY_SYSVEC(sysvec_acrn_hv_callback)
|
||||
{
|
||||
struct pt_regs *old_regs = set_irq_regs(regs);
|
||||
|
||||
@@ -50,13 +50,12 @@ __visible void __irq_entry acrn_hv_vector_handler(struct pt_regs *regs)
|
||||
* will block the interrupt whose vector is lower than
|
||||
* HYPERVISOR_CALLBACK_VECTOR.
|
||||
*/
|
||||
entering_ack_irq();
|
||||
ack_APIC_irq();
|
||||
inc_irq_stat(irq_hv_callback_count);
|
||||
|
||||
if (acrn_intr_handler)
|
||||
acrn_intr_handler();
|
||||
|
||||
exiting_irq();
|
||||
set_irq_regs(old_regs);
|
||||
}
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
#include <asm/pci-direct.h>
|
||||
#include <asm/delay.h>
|
||||
#include <asm/debugreg.h>
|
||||
#include <asm/resctrl.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
# include <asm/mmconfig.h>
|
||||
@@ -597,6 +598,8 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
|
||||
x86_amd_ls_cfg_ssbd_mask = 1ULL << bit;
|
||||
}
|
||||
}
|
||||
|
||||
resctrl_cpu_detect(c);
|
||||
}
|
||||
|
||||
static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
|
||||
@@ -1142,8 +1145,7 @@ static const int amd_erratum_383[] =
|
||||
|
||||
/* #1054: Instructions Retired Performance Counter May Be Inaccurate */
|
||||
static const int amd_erratum_1054[] =
|
||||
AMD_OSVW_ERRATUM(0, AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
|
||||
|
||||
AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
|
||||
|
||||
static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
|
||||
{
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#include <linux/nospec.h>
|
||||
#include <linux/prctl.h>
|
||||
#include <linux/sched/smt.h>
|
||||
#include <linux/pgtable.h>
|
||||
|
||||
#include <asm/spec-ctrl.h>
|
||||
#include <asm/cmdline.h>
|
||||
@@ -26,7 +27,6 @@
|
||||
#include <asm/vmx.h>
|
||||
#include <asm/paravirt.h>
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/set_memory.h>
|
||||
#include <asm/intel-family.h>
|
||||
#include <asm/e820/api.h>
|
||||
@@ -41,6 +41,7 @@ static void __init l1tf_select_mitigation(void);
|
||||
static void __init mds_select_mitigation(void);
|
||||
static void __init mds_print_mitigation(void);
|
||||
static void __init taa_select_mitigation(void);
|
||||
static void __init srbds_select_mitigation(void);
|
||||
|
||||
/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
|
||||
u64 x86_spec_ctrl_base;
|
||||
@@ -108,6 +109,7 @@ void __init check_bugs(void)
|
||||
l1tf_select_mitigation();
|
||||
mds_select_mitigation();
|
||||
taa_select_mitigation();
|
||||
srbds_select_mitigation();
|
||||
|
||||
/*
|
||||
* As MDS and TAA mitigations are inter-related, print MDS
|
||||
@@ -397,6 +399,97 @@ static int __init tsx_async_abort_parse_cmdline(char *str)
|
||||
}
|
||||
early_param("tsx_async_abort", tsx_async_abort_parse_cmdline);
|
||||
|
||||
#undef pr_fmt
|
||||
#define pr_fmt(fmt) "SRBDS: " fmt
|
||||
|
||||
enum srbds_mitigations {
|
||||
SRBDS_MITIGATION_OFF,
|
||||
SRBDS_MITIGATION_UCODE_NEEDED,
|
||||
SRBDS_MITIGATION_FULL,
|
||||
SRBDS_MITIGATION_TSX_OFF,
|
||||
SRBDS_MITIGATION_HYPERVISOR,
|
||||
};
|
||||
|
||||
static enum srbds_mitigations srbds_mitigation __ro_after_init = SRBDS_MITIGATION_FULL;
|
||||
|
||||
static const char * const srbds_strings[] = {
|
||||
[SRBDS_MITIGATION_OFF] = "Vulnerable",
|
||||
[SRBDS_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode",
|
||||
[SRBDS_MITIGATION_FULL] = "Mitigation: Microcode",
|
||||
[SRBDS_MITIGATION_TSX_OFF] = "Mitigation: TSX disabled",
|
||||
[SRBDS_MITIGATION_HYPERVISOR] = "Unknown: Dependent on hypervisor status",
|
||||
};
|
||||
|
||||
static bool srbds_off;
|
||||
|
||||
void update_srbds_msr(void)
|
||||
{
|
||||
u64 mcu_ctrl;
|
||||
|
||||
if (!boot_cpu_has_bug(X86_BUG_SRBDS))
|
||||
return;
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
|
||||
return;
|
||||
|
||||
if (srbds_mitigation == SRBDS_MITIGATION_UCODE_NEEDED)
|
||||
return;
|
||||
|
||||
rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
|
||||
|
||||
switch (srbds_mitigation) {
|
||||
case SRBDS_MITIGATION_OFF:
|
||||
case SRBDS_MITIGATION_TSX_OFF:
|
||||
mcu_ctrl |= RNGDS_MITG_DIS;
|
||||
break;
|
||||
case SRBDS_MITIGATION_FULL:
|
||||
mcu_ctrl &= ~RNGDS_MITG_DIS;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
|
||||
}
|
||||
|
||||
static void __init srbds_select_mitigation(void)
|
||||
{
|
||||
u64 ia32_cap;
|
||||
|
||||
if (!boot_cpu_has_bug(X86_BUG_SRBDS))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Check to see if this is one of the MDS_NO systems supporting
|
||||
* TSX that are only exposed to SRBDS when TSX is enabled.
|
||||
*/
|
||||
ia32_cap = x86_read_arch_cap_msr();
|
||||
if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM))
|
||||
srbds_mitigation = SRBDS_MITIGATION_TSX_OFF;
|
||||
else if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
|
||||
srbds_mitigation = SRBDS_MITIGATION_HYPERVISOR;
|
||||
else if (!boot_cpu_has(X86_FEATURE_SRBDS_CTRL))
|
||||
srbds_mitigation = SRBDS_MITIGATION_UCODE_NEEDED;
|
||||
else if (cpu_mitigations_off() || srbds_off)
|
||||
srbds_mitigation = SRBDS_MITIGATION_OFF;
|
||||
|
||||
update_srbds_msr();
|
||||
pr_info("%s\n", srbds_strings[srbds_mitigation]);
|
||||
}
|
||||
|
||||
static int __init srbds_parse_cmdline(char *str)
|
||||
{
|
||||
if (!str)
|
||||
return -EINVAL;
|
||||
|
||||
if (!boot_cpu_has_bug(X86_BUG_SRBDS))
|
||||
return 0;
|
||||
|
||||
srbds_off = !strcmp(str, "off");
|
||||
return 0;
|
||||
}
|
||||
early_param("srbds", srbds_parse_cmdline);
|
||||
|
||||
#undef pr_fmt
|
||||
#define pr_fmt(fmt) "Spectre V1 : " fmt
|
||||
|
||||
@@ -1528,6 +1621,11 @@ static char *ibpb_state(void)
|
||||
return "";
|
||||
}
|
||||
|
||||
static ssize_t srbds_show_state(char *buf)
|
||||
{
|
||||
return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]);
|
||||
}
|
||||
|
||||
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
|
||||
char *buf, unsigned int bug)
|
||||
{
|
||||
@@ -1572,6 +1670,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
|
||||
case X86_BUG_ITLB_MULTIHIT:
|
||||
return itlb_multihit_show_state(buf);
|
||||
|
||||
case X86_BUG_SRBDS:
|
||||
return srbds_show_state(buf);
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -1618,4 +1719,9 @@ ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr
|
||||
{
|
||||
return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT);
|
||||
}
|
||||
|
||||
ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return cpu_show_common(dev, attr, buf, X86_BUG_SRBDS);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
#include <linux/smp.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/syscore_ops.h>
|
||||
#include <linux/pgtable.h>
|
||||
|
||||
#include <asm/stackprotector.h>
|
||||
#include <asm/perf_event.h>
|
||||
@@ -35,7 +36,6 @@
|
||||
#include <asm/vsyscall.h>
|
||||
#include <linux/topology.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <asm/proto.h>
|
||||
#include <asm/setup.h>
|
||||
@@ -387,7 +387,30 @@ set_register:
|
||||
bits_missing);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(native_write_cr4);
|
||||
#if IS_MODULE(CONFIG_LKDTM)
|
||||
EXPORT_SYMBOL_GPL(native_write_cr4);
|
||||
#endif
|
||||
|
||||
void cr4_update_irqsoff(unsigned long set, unsigned long clear)
|
||||
{
|
||||
unsigned long newval, cr4 = this_cpu_read(cpu_tlbstate.cr4);
|
||||
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
newval = (cr4 & ~clear) | set;
|
||||
if (newval != cr4) {
|
||||
this_cpu_write(cpu_tlbstate.cr4, newval);
|
||||
__write_cr4(newval);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(cr4_update_irqsoff);
|
||||
|
||||
/* Read the CR4 shadow. */
|
||||
unsigned long cr4_read_shadow(void)
|
||||
{
|
||||
return this_cpu_read(cpu_tlbstate.cr4);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cr4_read_shadow);
|
||||
|
||||
void cr4_init(void)
|
||||
{
|
||||
@@ -854,30 +877,6 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
|
||||
}
|
||||
}
|
||||
|
||||
static void init_cqm(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
|
||||
c->x86_cache_max_rmid = -1;
|
||||
c->x86_cache_occ_scale = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
/* will be overridden if occupancy monitoring exists */
|
||||
c->x86_cache_max_rmid = cpuid_ebx(0xf);
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
|
||||
cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
|
||||
cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
|
||||
u32 eax, ebx, ecx, edx;
|
||||
|
||||
/* QoS sub-leaf, EAX=0Fh, ECX=1 */
|
||||
cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
c->x86_cache_max_rmid = ecx;
|
||||
c->x86_cache_occ_scale = ebx;
|
||||
}
|
||||
}
|
||||
|
||||
void get_cpu_cap(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 eax, ebx, ecx, edx;
|
||||
@@ -945,7 +944,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
|
||||
|
||||
init_scattered_cpuid_features(c);
|
||||
init_speculation_control(c);
|
||||
init_cqm(c);
|
||||
|
||||
/*
|
||||
* Clear/Set all flags overridden by options, after probe.
|
||||
@@ -1075,9 +1073,30 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
|
||||
{}
|
||||
};
|
||||
|
||||
static bool __init cpu_matches(unsigned long which)
|
||||
#define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \
|
||||
X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \
|
||||
INTEL_FAM6_##model, steppings, \
|
||||
X86_FEATURE_ANY, issues)
|
||||
|
||||
#define SRBDS BIT(0)
|
||||
|
||||
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
|
||||
VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
|
||||
VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS),
|
||||
VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS),
|
||||
VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS),
|
||||
VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS),
|
||||
VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS),
|
||||
VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS),
|
||||
VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS),
|
||||
VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0xC), SRBDS),
|
||||
VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0xD), SRBDS),
|
||||
{}
|
||||
};
|
||||
|
||||
static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long which)
|
||||
{
|
||||
const struct x86_cpu_id *m = x86_match_cpu(cpu_vuln_whitelist);
|
||||
const struct x86_cpu_id *m = x86_match_cpu(table);
|
||||
|
||||
return m && !!(m->driver_data & which);
|
||||
}
|
||||
@@ -1097,31 +1116,34 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
|
||||
u64 ia32_cap = x86_read_arch_cap_msr();
|
||||
|
||||
/* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */
|
||||
if (!cpu_matches(NO_ITLB_MULTIHIT) && !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO))
|
||||
if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) &&
|
||||
!(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO))
|
||||
setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT);
|
||||
|
||||
if (cpu_matches(NO_SPECULATION))
|
||||
if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION))
|
||||
return;
|
||||
|
||||
setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
|
||||
|
||||
if (!cpu_matches(NO_SPECTRE_V2))
|
||||
if (!cpu_matches(cpu_vuln_whitelist, NO_SPECTRE_V2))
|
||||
setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
|
||||
|
||||
if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) &&
|
||||
if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) &&
|
||||
!(ia32_cap & ARCH_CAP_SSB_NO) &&
|
||||
!cpu_has(c, X86_FEATURE_AMD_SSB_NO))
|
||||
setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
|
||||
|
||||
if (ia32_cap & ARCH_CAP_IBRS_ALL)
|
||||
setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED);
|
||||
|
||||
if (!cpu_matches(NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) {
|
||||
if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) &&
|
||||
!(ia32_cap & ARCH_CAP_MDS_NO)) {
|
||||
setup_force_cpu_bug(X86_BUG_MDS);
|
||||
if (cpu_matches(MSBDS_ONLY))
|
||||
if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY))
|
||||
setup_force_cpu_bug(X86_BUG_MSBDS_ONLY);
|
||||
}
|
||||
|
||||
if (!cpu_matches(NO_SWAPGS))
|
||||
if (!cpu_matches(cpu_vuln_whitelist, NO_SWAPGS))
|
||||
setup_force_cpu_bug(X86_BUG_SWAPGS);
|
||||
|
||||
/*
|
||||
@@ -1139,7 +1161,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
|
||||
(ia32_cap & ARCH_CAP_TSX_CTRL_MSR)))
|
||||
setup_force_cpu_bug(X86_BUG_TAA);
|
||||
|
||||
if (cpu_matches(NO_MELTDOWN))
|
||||
/*
|
||||
* SRBDS affects CPUs which support RDRAND or RDSEED and are listed
|
||||
* in the vulnerability blacklist.
|
||||
*/
|
||||
if ((cpu_has(c, X86_FEATURE_RDRAND) ||
|
||||
cpu_has(c, X86_FEATURE_RDSEED)) &&
|
||||
cpu_matches(cpu_vuln_blacklist, SRBDS))
|
||||
setup_force_cpu_bug(X86_BUG_SRBDS);
|
||||
|
||||
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
|
||||
return;
|
||||
|
||||
/* Rogue Data Cache Load? No! */
|
||||
@@ -1148,7 +1179,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
|
||||
|
||||
setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
|
||||
|
||||
if (cpu_matches(NO_L1TF))
|
||||
if (cpu_matches(cpu_vuln_whitelist, NO_L1TF))
|
||||
return;
|
||||
|
||||
setup_force_cpu_bug(X86_BUG_L1TF);
|
||||
@@ -1377,20 +1408,6 @@ static void generic_identify(struct cpuinfo_x86 *c)
|
||||
#endif
|
||||
}
|
||||
|
||||
static void x86_init_cache_qos(struct cpuinfo_x86 *c)
|
||||
{
|
||||
/*
|
||||
* The heavy lifting of max_rmid and cache_occ_scale are handled
|
||||
* in get_cpu_cap(). Here we just set the max_rmid for the boot_cpu
|
||||
* in case CQM bits really aren't there in this CPU.
|
||||
*/
|
||||
if (c != &boot_cpu_data) {
|
||||
boot_cpu_data.x86_cache_max_rmid =
|
||||
min(boot_cpu_data.x86_cache_max_rmid,
|
||||
c->x86_cache_max_rmid);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Validate that ACPI/mptables have the same information about the
|
||||
* effective APIC id and update the package map.
|
||||
@@ -1503,7 +1520,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
|
||||
#endif
|
||||
|
||||
x86_init_rdrand(c);
|
||||
x86_init_cache_qos(c);
|
||||
setup_pku(c);
|
||||
|
||||
/*
|
||||
@@ -1591,6 +1607,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
|
||||
mtrr_ap_init();
|
||||
validate_apic_and_package_id(c);
|
||||
x86_spec_ctrl_setup_ap();
|
||||
update_srbds_msr();
|
||||
}
|
||||
|
||||
static __init int setup_noclflush(char *arg)
|
||||
@@ -1689,25 +1706,6 @@ void syscall_init(void)
|
||||
X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT);
|
||||
}
|
||||
|
||||
DEFINE_PER_CPU(int, debug_stack_usage);
|
||||
DEFINE_PER_CPU(u32, debug_idt_ctr);
|
||||
|
||||
void debug_stack_set_zero(void)
|
||||
{
|
||||
this_cpu_inc(debug_idt_ctr);
|
||||
load_current_idt();
|
||||
}
|
||||
NOKPROBE_SYMBOL(debug_stack_set_zero);
|
||||
|
||||
void debug_stack_reset(void)
|
||||
{
|
||||
if (WARN_ON(!this_cpu_read(debug_idt_ctr)))
|
||||
return;
|
||||
if (this_cpu_dec_return(debug_idt_ctr) == 0)
|
||||
load_current_idt();
|
||||
}
|
||||
NOKPROBE_SYMBOL(debug_stack_reset);
|
||||
|
||||
#else /* CONFIG_X86_64 */
|
||||
|
||||
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
|
||||
|
||||
@@ -77,6 +77,7 @@ extern void detect_ht(struct cpuinfo_x86 *c);
|
||||
unsigned int aperfmperf_get_khz(int cpu);
|
||||
|
||||
extern void x86_spec_ctrl_setup_ap(void);
|
||||
extern void update_srbds_msr(void);
|
||||
|
||||
extern u64 x86_read_arch_cap_msr(void);
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/pgtable.h>
|
||||
|
||||
#include <linux/string.h>
|
||||
#include <linux/bitops.h>
|
||||
@@ -11,7 +12,6 @@
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/bugs.h>
|
||||
#include <asm/cpu.h>
|
||||
@@ -22,6 +22,7 @@
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include <asm/cmdline.h>
|
||||
#include <asm/traps.h>
|
||||
#include <asm/resctrl.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#include <linux/topology.h>
|
||||
@@ -322,6 +323,11 @@ static void early_init_intel(struct cpuinfo_x86 *c)
|
||||
detect_ht_early(c);
|
||||
}
|
||||
|
||||
static void bsp_init_intel(struct cpuinfo_x86 *c)
|
||||
{
|
||||
resctrl_cpu_detect(c);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* Early probe support logic for ppro memory erratum #50
|
||||
@@ -961,6 +967,7 @@ static const struct cpu_dev intel_cpu_dev = {
|
||||
#endif
|
||||
.c_detect_tlb = intel_detect_tlb,
|
||||
.c_early_init = early_init_intel,
|
||||
.c_bsp_init = bsp_init_intel,
|
||||
.c_init = init_intel,
|
||||
.c_x86_vendor = X86_VENDOR_INTEL,
|
||||
};
|
||||
@@ -1119,35 +1126,53 @@ void switch_to_sld(unsigned long tifn)
|
||||
sld_update_msr(!(tifn & _TIF_SLD));
|
||||
}
|
||||
|
||||
#define SPLIT_LOCK_CPU(model) {X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY}
|
||||
|
||||
/*
|
||||
* The following processors have the split lock detection feature. But
|
||||
* since they don't have the IA32_CORE_CAPABILITIES MSR, the feature cannot
|
||||
* be enumerated. Enable it by family and model matching on these
|
||||
* processors.
|
||||
* Bits in the IA32_CORE_CAPABILITIES are not architectural, so they should
|
||||
* only be trusted if it is confirmed that a CPU model implements a
|
||||
* specific feature at a particular bit position.
|
||||
*
|
||||
* The possible driver data field values:
|
||||
*
|
||||
* - 0: CPU models that are known to have the per-core split-lock detection
|
||||
* feature even though they do not enumerate IA32_CORE_CAPABILITIES.
|
||||
*
|
||||
* - 1: CPU models which may enumerate IA32_CORE_CAPABILITIES and if so use
|
||||
* bit 5 to enumerate the per-core split-lock detection feature.
|
||||
*/
|
||||
static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
|
||||
SPLIT_LOCK_CPU(INTEL_FAM6_ICELAKE_X),
|
||||
SPLIT_LOCK_CPU(INTEL_FAM6_ICELAKE_L),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, 1),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, 1),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, 1),
|
||||
{}
|
||||
};
|
||||
|
||||
void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u64 ia32_core_caps = 0;
|
||||
const struct x86_cpu_id *m;
|
||||
u64 ia32_core_caps;
|
||||
|
||||
if (c->x86_vendor != X86_VENDOR_INTEL)
|
||||
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
|
||||
return;
|
||||
if (cpu_has(c, X86_FEATURE_CORE_CAPABILITIES)) {
|
||||
/* Enumerate features reported in IA32_CORE_CAPABILITIES MSR. */
|
||||
|
||||
m = x86_match_cpu(split_lock_cpu_ids);
|
||||
if (!m)
|
||||
return;
|
||||
|
||||
switch (m->driver_data) {
|
||||
case 0:
|
||||
break;
|
||||
case 1:
|
||||
if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES))
|
||||
return;
|
||||
rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps);
|
||||
} else if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
|
||||
/* Enumerate split lock detection by family and model. */
|
||||
if (x86_match_cpu(split_lock_cpu_ids))
|
||||
ia32_core_caps |= MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT;
|
||||
if (!(ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT))
|
||||
return;
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
||||
if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT)
|
||||
split_lock_setup();
|
||||
split_lock_setup();
|
||||
}
|
||||
|
||||
@@ -39,13 +39,18 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
|
||||
const struct x86_cpu_id *m;
|
||||
struct cpuinfo_x86 *c = &boot_cpu_data;
|
||||
|
||||
for (m = match; m->vendor | m->family | m->model | m->feature; m++) {
|
||||
for (m = match;
|
||||
m->vendor | m->family | m->model | m->steppings | m->feature;
|
||||
m++) {
|
||||
if (m->vendor != X86_VENDOR_ANY && c->x86_vendor != m->vendor)
|
||||
continue;
|
||||
if (m->family != X86_FAMILY_ANY && c->x86 != m->family)
|
||||
continue;
|
||||
if (m->model != X86_MODEL_ANY && c->x86_model != m->model)
|
||||
continue;
|
||||
if (m->steppings != X86_STEPPING_ANY &&
|
||||
!(BIT(c->x86_stepping) & m->steppings))
|
||||
continue;
|
||||
if (m->feature != X86_FEATURE_ANY && !cpu_has(c, m->feature))
|
||||
continue;
|
||||
return m;
|
||||
|
||||
@@ -921,14 +921,13 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
|
||||
mce_log(&m);
|
||||
}
|
||||
|
||||
asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(struct pt_regs *regs)
|
||||
DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
|
||||
{
|
||||
entering_irq();
|
||||
trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
|
||||
inc_irq_stat(irq_deferred_error_count);
|
||||
deferred_error_int_vector();
|
||||
trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
|
||||
exiting_ack_irq();
|
||||
ack_APIC_irq();
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -42,6 +42,8 @@
|
||||
#include <linux/export.h>
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/set_memory.h>
|
||||
#include <linux/task_work.h>
|
||||
#include <linux/hardirq.h>
|
||||
|
||||
#include <asm/intel-family.h>
|
||||
#include <asm/processor.h>
|
||||
@@ -128,7 +130,7 @@ static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
|
||||
BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain);
|
||||
|
||||
/* Do initial initialization of a struct mce */
|
||||
void mce_setup(struct mce *m)
|
||||
noinstr void mce_setup(struct mce *m)
|
||||
{
|
||||
memset(m, 0, sizeof(struct mce));
|
||||
m->cpu = m->extcpu = smp_processor_id();
|
||||
@@ -138,12 +140,12 @@ void mce_setup(struct mce *m)
|
||||
m->cpuid = cpuid_eax(1);
|
||||
m->socketid = cpu_data(m->extcpu).phys_proc_id;
|
||||
m->apicid = cpu_data(m->extcpu).initial_apicid;
|
||||
rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
|
||||
m->mcgcap = __rdmsr(MSR_IA32_MCG_CAP);
|
||||
|
||||
if (this_cpu_has(X86_FEATURE_INTEL_PPIN))
|
||||
rdmsrl(MSR_PPIN, m->ppin);
|
||||
m->ppin = __rdmsr(MSR_PPIN);
|
||||
else if (this_cpu_has(X86_FEATURE_AMD_PPIN))
|
||||
rdmsrl(MSR_AMD_PPIN, m->ppin);
|
||||
m->ppin = __rdmsr(MSR_AMD_PPIN);
|
||||
|
||||
m->microcode = boot_cpu_data.microcode;
|
||||
}
|
||||
@@ -1057,23 +1059,6 @@ static void mce_clear_state(unsigned long *toclear)
|
||||
}
|
||||
}
|
||||
|
||||
static int do_memory_failure(struct mce *m)
|
||||
{
|
||||
int flags = MF_ACTION_REQUIRED;
|
||||
int ret;
|
||||
|
||||
pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr);
|
||||
if (!(m->mcgstatus & MCG_STATUS_RIPV))
|
||||
flags |= MF_MUST_KILL;
|
||||
ret = memory_failure(m->addr >> PAGE_SHIFT, flags);
|
||||
if (ret)
|
||||
pr_err("Memory error not recovered");
|
||||
else
|
||||
set_mce_nospec(m->addr >> PAGE_SHIFT);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Cases where we avoid rendezvous handler timeout:
|
||||
* 1) If this CPU is offline.
|
||||
@@ -1086,13 +1071,15 @@ static int do_memory_failure(struct mce *m)
|
||||
* kdump kernel establishing a new #MC handler where a broadcasted MCE
|
||||
* might not get handled properly.
|
||||
*/
|
||||
static bool __mc_check_crashing_cpu(int cpu)
|
||||
static noinstr bool mce_check_crashing_cpu(void)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
|
||||
if (cpu_is_offline(cpu) ||
|
||||
(crashing_cpu != -1 && crashing_cpu != cpu)) {
|
||||
u64 mcgstatus;
|
||||
|
||||
mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
|
||||
mcgstatus = __rdmsr(MSR_IA32_MCG_STATUS);
|
||||
|
||||
if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) {
|
||||
if (mcgstatus & MCG_STATUS_LMCES)
|
||||
@@ -1100,7 +1087,7 @@ static bool __mc_check_crashing_cpu(int cpu)
|
||||
}
|
||||
|
||||
if (mcgstatus & MCG_STATUS_RIPV) {
|
||||
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
|
||||
__wrmsr(MSR_IA32_MCG_STATUS, 0, 0);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -1175,6 +1162,29 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
|
||||
*m = *final;
|
||||
}
|
||||
|
||||
static void kill_me_now(struct callback_head *ch)
|
||||
{
|
||||
force_sig(SIGBUS);
|
||||
}
|
||||
|
||||
static void kill_me_maybe(struct callback_head *cb)
|
||||
{
|
||||
struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
|
||||
int flags = MF_ACTION_REQUIRED;
|
||||
|
||||
pr_err("Uncorrected hardware memory error in user-access at %llx", p->mce_addr);
|
||||
if (!(p->mce_status & MCG_STATUS_RIPV))
|
||||
flags |= MF_MUST_KILL;
|
||||
|
||||
if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags)) {
|
||||
set_mce_nospec(p->mce_addr >> PAGE_SHIFT);
|
||||
return;
|
||||
}
|
||||
|
||||
pr_err("Memory error not recovered");
|
||||
kill_me_now(cb);
|
||||
}
|
||||
|
||||
/*
|
||||
* The actual machine check handler. This only handles real
|
||||
* exceptions when something got corrupted coming in through int 18.
|
||||
@@ -1193,12 +1203,11 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
|
||||
* backing the user stack, tracing that reads the user stack will cause
|
||||
* potentially infinite recursion.
|
||||
*/
|
||||
void notrace do_machine_check(struct pt_regs *regs, long error_code)
|
||||
void noinstr do_machine_check(struct pt_regs *regs)
|
||||
{
|
||||
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
|
||||
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
|
||||
struct mca_config *cfg = &mca_cfg;
|
||||
int cpu = smp_processor_id();
|
||||
struct mce m, *final;
|
||||
char *msg = NULL;
|
||||
int worst = 0;
|
||||
@@ -1227,11 +1236,6 @@ void notrace do_machine_check(struct pt_regs *regs, long error_code)
|
||||
*/
|
||||
int lmce = 1;
|
||||
|
||||
if (__mc_check_crashing_cpu(cpu))
|
||||
return;
|
||||
|
||||
ist_enter(regs);
|
||||
|
||||
this_cpu_inc(mce_exception_count);
|
||||
|
||||
mce_gather_info(&m, regs);
|
||||
@@ -1319,17 +1323,19 @@ void notrace do_machine_check(struct pt_regs *regs, long error_code)
|
||||
sync_core();
|
||||
|
||||
if (worst != MCE_AR_SEVERITY && !kill_it)
|
||||
goto out_ist;
|
||||
return;
|
||||
|
||||
/* Fault was in user mode and we need to take some action */
|
||||
if ((m.cs & 3) == 3) {
|
||||
ist_begin_non_atomic(regs);
|
||||
local_irq_enable();
|
||||
/* If this triggers there is no way to recover. Die hard. */
|
||||
BUG_ON(!on_thread_stack() || !user_mode(regs));
|
||||
|
||||
if (kill_it || do_memory_failure(&m))
|
||||
force_sig(SIGBUS);
|
||||
local_irq_disable();
|
||||
ist_end_non_atomic();
|
||||
current->mce_addr = m.addr;
|
||||
current->mce_status = m.mcgstatus;
|
||||
current->mce_kill_me.func = kill_me_maybe;
|
||||
if (kill_it)
|
||||
current->mce_kill_me.func = kill_me_now;
|
||||
task_work_add(current, ¤t->mce_kill_me, true);
|
||||
} else {
|
||||
/*
|
||||
* Handle an MCE which has happened in kernel space but from
|
||||
@@ -1341,16 +1347,12 @@ void notrace do_machine_check(struct pt_regs *regs, long error_code)
|
||||
* proper one.
|
||||
*/
|
||||
if (m.kflags & MCE_IN_KERNEL_RECOV) {
|
||||
if (!fixup_exception(regs, X86_TRAP_MC, error_code, 0))
|
||||
if (!fixup_exception(regs, X86_TRAP_MC, 0, 0))
|
||||
mce_panic("Failed kernel mode recovery", &m, msg);
|
||||
}
|
||||
}
|
||||
|
||||
out_ist:
|
||||
ist_exit(regs);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(do_machine_check);
|
||||
NOKPROBE_SYMBOL(do_machine_check);
|
||||
|
||||
#ifndef CONFIG_MEMORY_FAILURE
|
||||
int memory_failure(unsigned long pfn, int flags)
|
||||
@@ -1876,21 +1878,84 @@ bool filter_mce(struct mce *m)
|
||||
}
|
||||
|
||||
/* Handle unconfigured int18 (should never happen) */
|
||||
static void unexpected_machine_check(struct pt_regs *regs, long error_code)
|
||||
static noinstr void unexpected_machine_check(struct pt_regs *regs)
|
||||
{
|
||||
instrumentation_begin();
|
||||
pr_err("CPU#%d: Unexpected int18 (Machine Check)\n",
|
||||
smp_processor_id());
|
||||
instrumentation_end();
|
||||
}
|
||||
|
||||
/* Call the installed machine check handler for this CPU setup. */
|
||||
void (*machine_check_vector)(struct pt_regs *, long error_code) =
|
||||
unexpected_machine_check;
|
||||
void (*machine_check_vector)(struct pt_regs *) = unexpected_machine_check;
|
||||
|
||||
dotraplinkage notrace void do_mce(struct pt_regs *regs, long error_code)
|
||||
static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
|
||||
{
|
||||
machine_check_vector(regs, error_code);
|
||||
/*
|
||||
* Only required when from kernel mode. See
|
||||
* mce_check_crashing_cpu() for details.
|
||||
*/
|
||||
if (machine_check_vector == do_machine_check &&
|
||||
mce_check_crashing_cpu())
|
||||
return;
|
||||
|
||||
nmi_enter();
|
||||
/*
|
||||
* The call targets are marked noinstr, but objtool can't figure
|
||||
* that out because it's an indirect call. Annotate it.
|
||||
*/
|
||||
instrumentation_begin();
|
||||
trace_hardirqs_off_finish();
|
||||
machine_check_vector(regs);
|
||||
if (regs->flags & X86_EFLAGS_IF)
|
||||
trace_hardirqs_on_prepare();
|
||||
instrumentation_end();
|
||||
nmi_exit();
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_mce);
|
||||
|
||||
static __always_inline void exc_machine_check_user(struct pt_regs *regs)
|
||||
{
|
||||
idtentry_enter_user(regs);
|
||||
instrumentation_begin();
|
||||
machine_check_vector(regs);
|
||||
instrumentation_end();
|
||||
idtentry_exit_user(regs);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* MCE hit kernel mode */
|
||||
DEFINE_IDTENTRY_MCE(exc_machine_check)
|
||||
{
|
||||
unsigned long dr7;
|
||||
|
||||
dr7 = local_db_save();
|
||||
exc_machine_check_kernel(regs);
|
||||
local_db_restore(dr7);
|
||||
}
|
||||
|
||||
/* The user mode variant. */
|
||||
DEFINE_IDTENTRY_MCE_USER(exc_machine_check)
|
||||
{
|
||||
unsigned long dr7;
|
||||
|
||||
dr7 = local_db_save();
|
||||
exc_machine_check_user(regs);
|
||||
local_db_restore(dr7);
|
||||
}
|
||||
#else
|
||||
/* 32bit unified entry point */
|
||||
DEFINE_IDTENTRY_MCE(exc_machine_check)
|
||||
{
|
||||
unsigned long dr7;
|
||||
|
||||
dr7 = local_db_save();
|
||||
if (user_mode(regs))
|
||||
exc_machine_check_user(regs);
|
||||
else
|
||||
exc_machine_check_kernel(regs);
|
||||
local_db_restore(dr7);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Called for each booted CPU to set up machine checks.
|
||||
|
||||
@@ -146,9 +146,9 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs)
|
||||
regs.cs = m->cs;
|
||||
pregs = ®s;
|
||||
}
|
||||
/* in mcheck exeception handler, irq will be disabled */
|
||||
/* do_machine_check() expects interrupts disabled -- at least */
|
||||
local_irq_save(flags);
|
||||
do_machine_check(pregs, 0);
|
||||
do_machine_check(pregs);
|
||||
local_irq_restore(flags);
|
||||
m->finished = 0;
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
#include <asm/mce.h>
|
||||
|
||||
/* Pointer to the installed machine check handler for this CPU setup. */
|
||||
extern void (*machine_check_vector)(struct pt_regs *, long error_code);
|
||||
extern void (*machine_check_vector)(struct pt_regs *);
|
||||
|
||||
enum severity_level {
|
||||
MCE_NO_SEVERITY,
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/hardirq.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/traps.h>
|
||||
@@ -20,12 +21,11 @@
|
||||
int mce_p5_enabled __read_mostly;
|
||||
|
||||
/* Machine check handler for Pentium class Intel CPUs: */
|
||||
static void pentium_machine_check(struct pt_regs *regs, long error_code)
|
||||
static noinstr void pentium_machine_check(struct pt_regs *regs)
|
||||
{
|
||||
u32 loaddr, hi, lotype;
|
||||
|
||||
ist_enter(regs);
|
||||
|
||||
instrumentation_begin();
|
||||
rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
|
||||
rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
|
||||
|
||||
@@ -38,8 +38,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
|
||||
}
|
||||
|
||||
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
|
||||
|
||||
ist_exit(regs);
|
||||
instrumentation_end();
|
||||
}
|
||||
|
||||
/* Set up machine check reporting for processors with Intel style MCE: */
|
||||
|
||||
@@ -614,14 +614,13 @@ static void unexpected_thermal_interrupt(void)
|
||||
|
||||
static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
|
||||
|
||||
asmlinkage __visible void __irq_entry smp_thermal_interrupt(struct pt_regs *regs)
|
||||
DEFINE_IDTENTRY_SYSVEC(sysvec_thermal)
|
||||
{
|
||||
entering_irq();
|
||||
trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
|
||||
inc_irq_stat(irq_thermal_count);
|
||||
smp_thermal_vector();
|
||||
trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
|
||||
exiting_ack_irq();
|
||||
ack_APIC_irq();
|
||||
}
|
||||
|
||||
/* Thermal monitoring depends on APIC, ACPI and clock modulation */
|
||||
|
||||
@@ -21,12 +21,11 @@ static void default_threshold_interrupt(void)
|
||||
|
||||
void (*mce_threshold_vector)(void) = default_threshold_interrupt;
|
||||
|
||||
asmlinkage __visible void __irq_entry smp_threshold_interrupt(struct pt_regs *regs)
|
||||
DEFINE_IDTENTRY_SYSVEC(sysvec_threshold)
|
||||
{
|
||||
entering_irq();
|
||||
trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
|
||||
inc_irq_stat(irq_threshold_count);
|
||||
mce_threshold_vector();
|
||||
trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR);
|
||||
exiting_ack_irq();
|
||||
ack_APIC_irq();
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/hardirq.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/traps.h>
|
||||
@@ -16,14 +17,12 @@
|
||||
#include "internal.h"
|
||||
|
||||
/* Machine check handler for WinChip C6: */
|
||||
static void winchip_machine_check(struct pt_regs *regs, long error_code)
|
||||
static noinstr void winchip_machine_check(struct pt_regs *regs)
|
||||
{
|
||||
ist_enter(regs);
|
||||
|
||||
instrumentation_begin();
|
||||
pr_emerg("CPU0: Machine Check Exception.\n");
|
||||
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
|
||||
|
||||
ist_exit(regs);
|
||||
instrumentation_end();
|
||||
}
|
||||
|
||||
/* Set up machine check reporting on the Winchip C6 series */
|
||||
|
||||
@@ -545,8 +545,7 @@ static int __wait_for_cpus(atomic_t *t, long long timeout)
|
||||
/*
|
||||
* Returns:
|
||||
* < 0 - on error
|
||||
* 0 - no update done
|
||||
* 1 - microcode was updated
|
||||
* 0 - success (no update done or microcode was updated)
|
||||
*/
|
||||
static int __reload_late(void *info)
|
||||
{
|
||||
@@ -573,11 +572,11 @@ static int __reload_late(void *info)
|
||||
else
|
||||
goto wait_for_siblings;
|
||||
|
||||
if (err > UCODE_NFOUND) {
|
||||
pr_warn("Error reloading microcode on CPU %d\n", cpu);
|
||||
if (err >= UCODE_NFOUND) {
|
||||
if (err == UCODE_ERROR)
|
||||
pr_warn("Error reloading microcode on CPU %d\n", cpu);
|
||||
|
||||
ret = -1;
|
||||
} else if (err == UCODE_UPDATED || err == UCODE_OK) {
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
wait_for_siblings:
|
||||
@@ -608,7 +607,7 @@ static int microcode_reload_late(void)
|
||||
atomic_set(&late_cpus_out, 0);
|
||||
|
||||
ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask);
|
||||
if (ret > 0)
|
||||
if (ret == 0)
|
||||
microcode_check();
|
||||
|
||||
pr_info("Reload completed, microcode revision: 0x%x\n", boot_cpu_data.microcode);
|
||||
@@ -649,7 +648,7 @@ static ssize_t reload_store(struct device *dev,
|
||||
put:
|
||||
put_online_cpus();
|
||||
|
||||
if (ret >= 0)
|
||||
if (ret == 0)
|
||||
ret = size;
|
||||
|
||||
return ret;
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
#include <asm/hyperv-tlfs.h>
|
||||
#include <asm/mshyperv.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/idtentry.h>
|
||||
#include <asm/irq_regs.h>
|
||||
#include <asm/i8259.h>
|
||||
#include <asm/apic.h>
|
||||
@@ -40,11 +41,10 @@ static void (*hv_stimer0_handler)(void);
|
||||
static void (*hv_kexec_handler)(void);
|
||||
static void (*hv_crash_handler)(struct pt_regs *regs);
|
||||
|
||||
__visible void __irq_entry hyperv_vector_handler(struct pt_regs *regs)
|
||||
DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
|
||||
{
|
||||
struct pt_regs *old_regs = set_irq_regs(regs);
|
||||
|
||||
entering_irq();
|
||||
inc_irq_stat(irq_hv_callback_count);
|
||||
if (vmbus_handler)
|
||||
vmbus_handler();
|
||||
@@ -52,7 +52,6 @@ __visible void __irq_entry hyperv_vector_handler(struct pt_regs *regs)
|
||||
if (ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED)
|
||||
ack_APIC_irq();
|
||||
|
||||
exiting_irq();
|
||||
set_irq_regs(old_regs);
|
||||
}
|
||||
|
||||
@@ -73,19 +72,16 @@ EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq);
|
||||
* Routines to do per-architecture handling of stimer0
|
||||
* interrupts when in Direct Mode
|
||||
*/
|
||||
|
||||
__visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs)
|
||||
DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_stimer0)
|
||||
{
|
||||
struct pt_regs *old_regs = set_irq_regs(regs);
|
||||
|
||||
entering_irq();
|
||||
inc_irq_stat(hyperv_stimer0_count);
|
||||
if (hv_stimer0_handler)
|
||||
hv_stimer0_handler();
|
||||
add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0);
|
||||
ack_APIC_irq();
|
||||
|
||||
exiting_irq();
|
||||
set_irq_regs(old_regs);
|
||||
}
|
||||
|
||||
@@ -227,8 +223,8 @@ static void __init ms_hyperv_init_platform(void)
|
||||
ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
|
||||
ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
|
||||
|
||||
pr_info("Hyper-V: features 0x%x, hints 0x%x\n",
|
||||
ms_hyperv.features, ms_hyperv.hints);
|
||||
pr_info("Hyper-V: features 0x%x, hints 0x%x, misc 0x%x\n",
|
||||
ms_hyperv.features, ms_hyperv.hints, ms_hyperv.misc_features);
|
||||
|
||||
ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS);
|
||||
ms_hyperv.max_lp_index = cpuid_ebx(HYPERV_CPUID_IMPLEMENT_LIMITS);
|
||||
@@ -263,6 +259,16 @@ static void __init ms_hyperv_init_platform(void)
|
||||
cpuid_eax(HYPERV_CPUID_NESTED_FEATURES);
|
||||
}
|
||||
|
||||
/*
|
||||
* Hyper-V expects to get crash register data or kmsg when
|
||||
* crash enlightment is available and system crashes. Set
|
||||
* crash_kexec_post_notifiers to be true to make sure that
|
||||
* calling crash enlightment interface before running kdump
|
||||
* kernel.
|
||||
*/
|
||||
if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE)
|
||||
crash_kexec_post_notifiers = true;
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
if (ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS &&
|
||||
ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) {
|
||||
@@ -321,17 +327,19 @@ static void __init ms_hyperv_init_platform(void)
|
||||
x86_platform.apic_post_init = hyperv_init;
|
||||
hyperv_setup_mmu_ops();
|
||||
/* Setup the IDT for hypervisor callback */
|
||||
alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
|
||||
alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_hyperv_callback);
|
||||
|
||||
/* Setup the IDT for reenlightenment notifications */
|
||||
if (ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT)
|
||||
if (ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT) {
|
||||
alloc_intr_gate(HYPERV_REENLIGHTENMENT_VECTOR,
|
||||
hyperv_reenlightenment_vector);
|
||||
asm_sysvec_hyperv_reenlightenment);
|
||||
}
|
||||
|
||||
/* Setup the IDT for stimer0 */
|
||||
if (ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE)
|
||||
if (ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE) {
|
||||
alloc_intr_gate(HYPERV_STIMER0_VECTOR,
|
||||
hv_stimer0_callback_vector);
|
||||
asm_sysvec_hyperv_stimer0);
|
||||
}
|
||||
|
||||
# ifdef CONFIG_SMP
|
||||
smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu;
|
||||
|
||||
@@ -761,7 +761,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
|
||||
|
||||
/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
|
||||
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
|
||||
__flush_tlb();
|
||||
flush_tlb_local();
|
||||
|
||||
/* Save MTRR state */
|
||||
rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
|
||||
@@ -778,7 +778,7 @@ static void post_set(void) __releases(set_atomicity_lock)
|
||||
{
|
||||
/* Flush TLBs (no need to flush caches - they are disabled) */
|
||||
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
|
||||
__flush_tlb();
|
||||
flush_tlb_local();
|
||||
|
||||
/* Intel (P6) standard MTRRs */
|
||||
mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
|
||||
|
||||
@@ -63,6 +63,10 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
|
||||
case 15:
|
||||
return msr - MSR_P4_BPU_PERFCTR0;
|
||||
}
|
||||
fallthrough;
|
||||
case X86_VENDOR_ZHAOXIN:
|
||||
case X86_VENDOR_CENTAUR:
|
||||
return msr - MSR_ARCH_PERFMON_PERFCTR0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -92,6 +96,10 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
|
||||
case 15:
|
||||
return msr - MSR_P4_BSU_ESCR0;
|
||||
}
|
||||
fallthrough;
|
||||
case X86_VENDOR_ZHAOXIN:
|
||||
case X86_VENDOR_CENTAUR:
|
||||
return msr - MSR_ARCH_PERFMON_EVENTSEL0;
|
||||
}
|
||||
return 0;
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
#include <linux/cpuhotplug.h>
|
||||
|
||||
#include <asm/intel-family.h>
|
||||
#include <asm/resctrl_sched.h>
|
||||
#include <asm/resctrl.h>
|
||||
#include "internal.h"
|
||||
|
||||
/* Mutex to protect rdtgroup access. */
|
||||
@@ -578,6 +578,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
|
||||
d->id = id;
|
||||
cpumask_set_cpu(cpu, &d->cpu_mask);
|
||||
|
||||
rdt_domain_reconfigure_cdp(r);
|
||||
|
||||
if (r->alloc_capable && domain_setup_ctrlval(r, d)) {
|
||||
kfree(d);
|
||||
return;
|
||||
@@ -956,6 +958,36 @@ static __init void rdt_init_res_defs(void)
|
||||
|
||||
static enum cpuhp_state rdt_online;
|
||||
|
||||
/* Runs once on the BSP during boot. */
|
||||
void resctrl_cpu_detect(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
|
||||
c->x86_cache_max_rmid = -1;
|
||||
c->x86_cache_occ_scale = -1;
|
||||
c->x86_cache_mbm_width_offset = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
/* will be overridden if occupancy monitoring exists */
|
||||
c->x86_cache_max_rmid = cpuid_ebx(0xf);
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
|
||||
cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
|
||||
cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
|
||||
u32 eax, ebx, ecx, edx;
|
||||
|
||||
/* QoS sub-leaf, EAX=0Fh, ECX=1 */
|
||||
cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
c->x86_cache_max_rmid = ecx;
|
||||
c->x86_cache_occ_scale = ebx;
|
||||
if (c->x86_vendor == X86_VENDOR_INTEL)
|
||||
c->x86_cache_mbm_width_offset = eax & 0xff;
|
||||
else
|
||||
c->x86_cache_mbm_width_offset = -1;
|
||||
}
|
||||
}
|
||||
|
||||
static int __init resctrl_late_init(void)
|
||||
{
|
||||
struct rdt_resource *r;
|
||||
|
||||
@@ -495,14 +495,16 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
|
||||
return ret;
|
||||
}
|
||||
|
||||
void mon_event_read(struct rmid_read *rr, struct rdt_domain *d,
|
||||
struct rdtgroup *rdtgrp, int evtid, int first)
|
||||
void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
|
||||
struct rdt_domain *d, struct rdtgroup *rdtgrp,
|
||||
int evtid, int first)
|
||||
{
|
||||
/*
|
||||
* setup the parameters to send to the IPI to read the data.
|
||||
*/
|
||||
rr->rgrp = rdtgrp;
|
||||
rr->evtid = evtid;
|
||||
rr->r = r;
|
||||
rr->d = d;
|
||||
rr->val = 0;
|
||||
rr->first = first;
|
||||
@@ -539,7 +541,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
|
||||
goto out;
|
||||
}
|
||||
|
||||
mon_event_read(&rr, d, rdtgrp, evtid, false);
|
||||
mon_event_read(&rr, r, d, rdtgrp, evtid, false);
|
||||
|
||||
if (rr.val & RMID_VAL_ERROR)
|
||||
seq_puts(m, "Error\n");
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
|
||||
#define CQM_LIMBOCHECK_INTERVAL 1000
|
||||
|
||||
#define MBM_CNTR_WIDTH 24
|
||||
#define MBM_CNTR_WIDTH_BASE 24
|
||||
#define MBM_OVERFLOW_INTERVAL 1000
|
||||
#define MAX_MBA_BW 100u
|
||||
#define MBA_IS_LINEAR 0x4
|
||||
@@ -40,6 +40,12 @@
|
||||
|
||||
#define RMID_VAL_ERROR BIT_ULL(63)
|
||||
#define RMID_VAL_UNAVAIL BIT_ULL(62)
|
||||
/*
|
||||
* With the above fields in use 62 bits remain in MSR_IA32_QM_CTR for
|
||||
* data to be returned. The counter width is discovered from the hardware
|
||||
* as an offset from MBM_CNTR_WIDTH_BASE.
|
||||
*/
|
||||
#define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE)
|
||||
|
||||
|
||||
struct rdt_fs_context {
|
||||
@@ -87,6 +93,7 @@ union mon_data_bits {
|
||||
|
||||
struct rmid_read {
|
||||
struct rdtgroup *rgrp;
|
||||
struct rdt_resource *r;
|
||||
struct rdt_domain *d;
|
||||
int evtid;
|
||||
bool first;
|
||||
@@ -460,6 +467,7 @@ struct rdt_resource {
|
||||
struct list_head evt_list;
|
||||
int num_rmid;
|
||||
unsigned int mon_scale;
|
||||
unsigned int mbm_width;
|
||||
unsigned long fflags;
|
||||
};
|
||||
|
||||
@@ -587,8 +595,9 @@ void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
|
||||
unsigned int dom_id);
|
||||
void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
|
||||
struct rdt_domain *d);
|
||||
void mon_event_read(struct rmid_read *rr, struct rdt_domain *d,
|
||||
struct rdtgroup *rdtgrp, int evtid, int first);
|
||||
void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
|
||||
struct rdt_domain *d, struct rdtgroup *rdtgrp,
|
||||
int evtid, int first);
|
||||
void mbm_setup_overflow_handler(struct rdt_domain *dom,
|
||||
unsigned long delay_ms);
|
||||
void mbm_handle_overflow(struct work_struct *work);
|
||||
@@ -601,5 +610,6 @@ bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
|
||||
void __check_limbo(struct rdt_domain *d, bool force_free);
|
||||
bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r);
|
||||
bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r);
|
||||
void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
|
||||
|
||||
#endif /* _ASM_X86_RESCTRL_INTERNAL_H */
|
||||
|
||||
@@ -214,9 +214,9 @@ void free_rmid(u32 rmid)
|
||||
list_add_tail(&entry->list, &rmid_free_lru);
|
||||
}
|
||||
|
||||
static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr)
|
||||
static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
|
||||
{
|
||||
u64 shift = 64 - MBM_CNTR_WIDTH, chunks;
|
||||
u64 shift = 64 - width, chunks;
|
||||
|
||||
chunks = (cur_msr << shift) - (prev_msr << shift);
|
||||
return chunks >>= shift;
|
||||
@@ -256,7 +256,7 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
|
||||
return 0;
|
||||
}
|
||||
|
||||
chunks = mbm_overflow_count(m->prev_msr, tval);
|
||||
chunks = mbm_overflow_count(m->prev_msr, tval, rr->r->mbm_width);
|
||||
m->chunks += chunks;
|
||||
m->prev_msr = tval;
|
||||
|
||||
@@ -278,7 +278,7 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
|
||||
if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
|
||||
return;
|
||||
|
||||
chunks = mbm_overflow_count(m->prev_bw_msr, tval);
|
||||
chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width);
|
||||
m->chunks_bw += chunks;
|
||||
m->chunks = m->chunks_bw;
|
||||
cur_bw = (chunks * r->mon_scale) >> 20;
|
||||
@@ -433,11 +433,12 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
|
||||
}
|
||||
}
|
||||
|
||||
static void mbm_update(struct rdt_domain *d, int rmid)
|
||||
static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
|
||||
{
|
||||
struct rmid_read rr;
|
||||
|
||||
rr.first = false;
|
||||
rr.r = r;
|
||||
rr.d = d;
|
||||
|
||||
/*
|
||||
@@ -510,6 +511,7 @@ void mbm_handle_overflow(struct work_struct *work)
|
||||
struct rdtgroup *prgrp, *crgrp;
|
||||
int cpu = smp_processor_id();
|
||||
struct list_head *head;
|
||||
struct rdt_resource *r;
|
||||
struct rdt_domain *d;
|
||||
|
||||
mutex_lock(&rdtgroup_mutex);
|
||||
@@ -517,16 +519,18 @@ void mbm_handle_overflow(struct work_struct *work)
|
||||
if (!static_branch_likely(&rdt_mon_enable_key))
|
||||
goto out_unlock;
|
||||
|
||||
d = get_domain_from_cpu(cpu, &rdt_resources_all[RDT_RESOURCE_L3]);
|
||||
r = &rdt_resources_all[RDT_RESOURCE_L3];
|
||||
|
||||
d = get_domain_from_cpu(cpu, r);
|
||||
if (!d)
|
||||
goto out_unlock;
|
||||
|
||||
list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
|
||||
mbm_update(d, prgrp->mon.rmid);
|
||||
mbm_update(r, d, prgrp->mon.rmid);
|
||||
|
||||
head = &prgrp->mon.crdtgrp_list;
|
||||
list_for_each_entry(crgrp, head, mon.crdtgrp_list)
|
||||
mbm_update(d, crgrp->mon.rmid);
|
||||
mbm_update(r, d, crgrp->mon.rmid);
|
||||
|
||||
if (is_mba_sc(NULL))
|
||||
update_mba_bw(prgrp, d);
|
||||
@@ -614,11 +618,18 @@ static void l3_mon_evt_init(struct rdt_resource *r)
|
||||
|
||||
int rdt_get_mon_l3_config(struct rdt_resource *r)
|
||||
{
|
||||
unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset;
|
||||
unsigned int cl_size = boot_cpu_data.x86_cache_size;
|
||||
int ret;
|
||||
|
||||
r->mon_scale = boot_cpu_data.x86_cache_occ_scale;
|
||||
r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1;
|
||||
r->mbm_width = MBM_CNTR_WIDTH_BASE;
|
||||
|
||||
if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX)
|
||||
r->mbm_width += mbm_offset;
|
||||
else if (mbm_offset > MBM_CNTR_WIDTH_OFFSET_MAX)
|
||||
pr_warn("Ignoring impossible MBM counter offset\n");
|
||||
|
||||
/*
|
||||
* A reasonable upper limit on the max threshold is the number
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/intel-family.h>
|
||||
#include <asm/resctrl_sched.h>
|
||||
#include <asm/resctrl.h>
|
||||
#include <asm/perf_event.h>
|
||||
|
||||
#include "../../events/perf_event.h" /* For X86_CONFIG() */
|
||||
@@ -1326,9 +1326,9 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
|
||||
* pseudo-locked region will still be here on return.
|
||||
*
|
||||
* The mutex has to be released temporarily to avoid a potential
|
||||
* deadlock with the mm->mmap_sem semaphore which is obtained in
|
||||
* the device_create() and debugfs_create_dir() callpath below
|
||||
* as well as before the mmap() callback is called.
|
||||
* deadlock with the mm->mmap_lock which is obtained in the
|
||||
* device_create() and debugfs_create_dir() callpath below as well as
|
||||
* before the mmap() callback is called.
|
||||
*/
|
||||
mutex_unlock(&rdtgroup_mutex);
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
|
||||
#include <uapi/linux/magic.h>
|
||||
|
||||
#include <asm/resctrl_sched.h>
|
||||
#include <asm/resctrl.h>
|
||||
#include "internal.h"
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
|
||||
@@ -1859,6 +1859,19 @@ static int set_cache_qos_cfg(int level, bool enable)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Restore the qos cfg state when a domain comes online */
|
||||
void rdt_domain_reconfigure_cdp(struct rdt_resource *r)
|
||||
{
|
||||
if (!r->alloc_capable)
|
||||
return;
|
||||
|
||||
if (r == &rdt_resources_all[RDT_RESOURCE_L2DATA])
|
||||
l2_qos_cfg_update(&r->alloc_enabled);
|
||||
|
||||
if (r == &rdt_resources_all[RDT_RESOURCE_L3DATA])
|
||||
l3_qos_cfg_update(&r->alloc_enabled);
|
||||
}
|
||||
|
||||
/*
|
||||
* Enable or disable the MBA software controller
|
||||
* which helps user specify bandwidth in MBps.
|
||||
@@ -2459,7 +2472,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
|
||||
goto out_destroy;
|
||||
|
||||
if (is_mbm_event(mevt->evtid))
|
||||
mon_event_read(&rr, d, prgrp, mevt->evtid, true);
|
||||
mon_event_read(&rr, r, d, prgrp, mevt->evtid, true);
|
||||
}
|
||||
kernfs_activate(kn);
|
||||
return 0;
|
||||
@@ -3072,7 +3085,8 @@ static int rdtgroup_rmdir(struct kernfs_node *kn)
|
||||
* If the rdtgroup is a mon group and parent directory
|
||||
* is a valid "mon_groups" directory, remove the mon group.
|
||||
*/
|
||||
if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn) {
|
||||
if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn &&
|
||||
rdtgrp != &rdtgroup_default) {
|
||||
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
|
||||
rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
|
||||
ret = rdtgroup_ctrl_remove(kn, rdtgrp);
|
||||
@@ -3185,10 +3199,10 @@ int __init rdtgroup_init(void)
|
||||
* during the debugfs directory creation also &sb->s_type->i_mutex_key
|
||||
* (the lockdep class of inode->i_rwsem). Other filesystem
|
||||
* interactions (eg. SyS_getdents) have the lock ordering:
|
||||
* &sb->s_type->i_mutex_key --> &mm->mmap_sem
|
||||
* During mmap(), called with &mm->mmap_sem, the rdtgroup_mutex
|
||||
* &sb->s_type->i_mutex_key --> &mm->mmap_lock
|
||||
* During mmap(), called with &mm->mmap_lock, the rdtgroup_mutex
|
||||
* is taken, thus creating dependency:
|
||||
* &mm->mmap_sem --> rdtgroup_mutex for the latter that can cause
|
||||
* &mm->mmap_lock --> rdtgroup_mutex for the latter that can cause
|
||||
* issues considering the other two lock dependencies.
|
||||
* By creating the debugfs directory here we avoid a dependency
|
||||
* that may cause deadlock (even though file operations cannot
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
#include <linux/crash_core.h>
|
||||
#include <linux/pgtable.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/setup.h>
|
||||
|
||||
void arch_crash_save_vmcoreinfo(void)
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
#include <linux/crash_core.h>
|
||||
#include <linux/pgtable.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/setup.h>
|
||||
|
||||
void arch_crash_save_vmcoreinfo(void)
|
||||
|
||||
@@ -6,12 +6,10 @@
|
||||
#include <linux/fs.h>
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/traps.h>
|
||||
|
||||
extern void double_fault(void);
|
||||
#define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM)
|
||||
|
||||
#define TSS(x) this_cpu_read(cpu_tss_rw.x86_tss.x)
|
||||
@@ -22,7 +20,7 @@ static void set_df_gdt_entry(unsigned int cpu);
|
||||
* Called by double_fault with CR0.TS and EFLAGS.NT cleared. The CPU thinks
|
||||
* we're running the doublefault task. Cannot return.
|
||||
*/
|
||||
asmlinkage notrace void __noreturn doublefault_shim(void)
|
||||
asmlinkage noinstr void __noreturn doublefault_shim(void)
|
||||
{
|
||||
unsigned long cr2;
|
||||
struct pt_regs regs;
|
||||
@@ -41,7 +39,7 @@ asmlinkage notrace void __noreturn doublefault_shim(void)
|
||||
* Fill in pt_regs. A downside of doing this in C is that the unwinder
|
||||
* won't see it (no ENCODE_FRAME_POINTER), so a nested stack dump
|
||||
* won't successfully unwind to the source of the double fault.
|
||||
* The main dump from do_double_fault() is fine, though, since it
|
||||
* The main dump from exc_double_fault() is fine, though, since it
|
||||
* uses these regs directly.
|
||||
*
|
||||
* If anyone ever cares, this could be moved to asm.
|
||||
@@ -71,7 +69,7 @@ asmlinkage notrace void __noreturn doublefault_shim(void)
|
||||
regs.cx = TSS(cx);
|
||||
regs.bx = TSS(bx);
|
||||
|
||||
do_double_fault(®s, 0, cr2);
|
||||
exc_double_fault(®s, 0, cr2);
|
||||
|
||||
/*
|
||||
* x86_32 does not save the original CR3 anywhere on a task switch.
|
||||
@@ -85,7 +83,6 @@ asmlinkage notrace void __noreturn doublefault_shim(void)
|
||||
*/
|
||||
panic("cannot return from double fault\n");
|
||||
}
|
||||
NOKPROBE_SYMBOL(doublefault_shim);
|
||||
|
||||
DEFINE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack) = {
|
||||
.tss = {
|
||||
@@ -96,7 +93,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack) = {
|
||||
.ldt = 0,
|
||||
.io_bitmap_base = IO_BITMAP_OFFSET_INVALID,
|
||||
|
||||
.ip = (unsigned long) double_fault,
|
||||
.ip = (unsigned long) asm_exc_double_fault,
|
||||
.flags = X86_EFLAGS_FIXED,
|
||||
.es = __USER_DS,
|
||||
.cs = __KERNEL_CS,
|
||||
|
||||
@@ -65,7 +65,7 @@ bool in_entry_stack(unsigned long *stack, struct stack_info *info)
|
||||
}
|
||||
|
||||
static void printk_stack_address(unsigned long address, int reliable,
|
||||
char *log_lvl)
|
||||
const char *log_lvl)
|
||||
{
|
||||
touch_nmi_watchdog();
|
||||
printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
|
||||
@@ -160,7 +160,7 @@ static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs,
|
||||
}
|
||||
|
||||
void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long *stack, char *log_lvl)
|
||||
unsigned long *stack, const char *log_lvl)
|
||||
{
|
||||
struct unwind_state state;
|
||||
struct stack_info stack_info = {0};
|
||||
@@ -279,7 +279,8 @@ next:
|
||||
}
|
||||
}
|
||||
|
||||
void show_stack(struct task_struct *task, unsigned long *sp)
|
||||
void show_stack(struct task_struct *task, unsigned long *sp,
|
||||
const char *loglvl)
|
||||
{
|
||||
task = task ? : current;
|
||||
|
||||
@@ -290,7 +291,7 @@ void show_stack(struct task_struct *task, unsigned long *sp)
|
||||
if (!sp && task == current)
|
||||
sp = get_stack_pointer(current, NULL);
|
||||
|
||||
show_trace_log_lvl(task, NULL, sp, KERN_DEFAULT);
|
||||
show_trace_log_lvl(task, NULL, sp, loglvl);
|
||||
}
|
||||
|
||||
void show_stack_regs(struct pt_regs *regs)
|
||||
|
||||
@@ -87,7 +87,6 @@ static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
|
||||
|
||||
static bool in_doublefault_stack(unsigned long *stack, struct stack_info *info)
|
||||
{
|
||||
#ifdef CONFIG_DOUBLEFAULT
|
||||
struct cpu_entry_area *cea = get_cpu_entry_area(raw_smp_processor_id());
|
||||
struct doublefault_stack *ss = &cea->doublefault_stack;
|
||||
|
||||
@@ -103,9 +102,6 @@ static bool in_doublefault_stack(unsigned long *stack, struct stack_info *info)
|
||||
info->next_sp = (unsigned long *)this_cpu_read(cpu_tss_rw.x86_tss.sp);
|
||||
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -22,15 +22,13 @@
|
||||
static const char * const exception_stack_names[] = {
|
||||
[ ESTACK_DF ] = "#DF",
|
||||
[ ESTACK_NMI ] = "NMI",
|
||||
[ ESTACK_DB2 ] = "#DB2",
|
||||
[ ESTACK_DB1 ] = "#DB1",
|
||||
[ ESTACK_DB ] = "#DB",
|
||||
[ ESTACK_MCE ] = "#MC",
|
||||
};
|
||||
|
||||
const char *stack_type_name(enum stack_type type)
|
||||
{
|
||||
BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
|
||||
BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
|
||||
|
||||
if (type == STACK_TYPE_IRQ)
|
||||
return "IRQ";
|
||||
@@ -79,7 +77,6 @@ static const
|
||||
struct estack_pages estack_pages[CEA_ESTACK_PAGES] ____cacheline_aligned = {
|
||||
EPAGERANGE(DF),
|
||||
EPAGERANGE(NMI),
|
||||
EPAGERANGE(DB1),
|
||||
EPAGERANGE(DB),
|
||||
EPAGERANGE(MCE),
|
||||
};
|
||||
@@ -91,7 +88,7 @@ static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
|
||||
struct pt_regs *regs;
|
||||
unsigned int k;
|
||||
|
||||
BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
|
||||
BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
|
||||
|
||||
begin = (unsigned long)__this_cpu_read(cea_exception_stacks);
|
||||
/*
|
||||
@@ -183,7 +180,8 @@ recursion_check:
|
||||
*/
|
||||
if (visit_mask) {
|
||||
if (*visit_mask & (1UL << info->type)) {
|
||||
printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
|
||||
if (task == current)
|
||||
printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
|
||||
goto unknown;
|
||||
}
|
||||
*visit_mask |= 1UL << info->type;
|
||||
|
||||
@@ -910,14 +910,6 @@ static int __init parse_memmap_one(char *p)
|
||||
return -EINVAL;
|
||||
|
||||
if (!strncmp(p, "exactmap", 8)) {
|
||||
#ifdef CONFIG_CRASH_DUMP
|
||||
/*
|
||||
* If we are doing a crash dump, we still need to know
|
||||
* the real memory size before the original memory map is
|
||||
* reset.
|
||||
*/
|
||||
saved_max_pfn = e820__end_of_ram_pfn();
|
||||
#endif
|
||||
e820_table->nr_entries = 0;
|
||||
userdef = 1;
|
||||
return 0;
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include <linux/pci_regs.h>
|
||||
#include <linux/pci_ids.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/pgtable.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/fcntl.h>
|
||||
@@ -15,12 +16,8 @@
|
||||
#include <xen/hvc-console.h>
|
||||
#include <asm/pci-direct.h>
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/intel-mid.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <linux/usb/ehci_def.h>
|
||||
#include <linux/usb/xhci-dbgp.h>
|
||||
#include <linux/efi.h>
|
||||
#include <asm/efi.h>
|
||||
#include <asm/pci_x86.h>
|
||||
|
||||
/* Simple VGA output */
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/random.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <linux/pgtable.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/setup.h>
|
||||
#include <asm/espfix.h>
|
||||
|
||||
@@ -291,15 +291,13 @@ void fpu__drop(struct fpu *fpu)
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear FPU registers by setting them up from
|
||||
* the init fpstate:
|
||||
* Clear FPU registers by setting them up from the init fpstate.
|
||||
* Caller must do fpregs_[un]lock() around it.
|
||||
*/
|
||||
static inline void copy_init_fpstate_to_fpregs(void)
|
||||
static inline void copy_init_fpstate_to_fpregs(u64 features_mask)
|
||||
{
|
||||
fpregs_lock();
|
||||
|
||||
if (use_xsave())
|
||||
copy_kernel_to_xregs(&init_fpstate.xsave, -1);
|
||||
copy_kernel_to_xregs(&init_fpstate.xsave, features_mask);
|
||||
else if (static_cpu_has(X86_FEATURE_FXSR))
|
||||
copy_kernel_to_fxregs(&init_fpstate.fxsave);
|
||||
else
|
||||
@@ -307,9 +305,6 @@ static inline void copy_init_fpstate_to_fpregs(void)
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_OSPKE))
|
||||
copy_init_pkru_to_fpregs();
|
||||
|
||||
fpregs_mark_activate();
|
||||
fpregs_unlock();
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -318,18 +313,40 @@ static inline void copy_init_fpstate_to_fpregs(void)
|
||||
* Called by sys_execve(), by the signal handler code and by various
|
||||
* error paths.
|
||||
*/
|
||||
void fpu__clear(struct fpu *fpu)
|
||||
static void fpu__clear(struct fpu *fpu, bool user_only)
|
||||
{
|
||||
WARN_ON_FPU(fpu != ¤t->thread.fpu); /* Almost certainly an anomaly */
|
||||
WARN_ON_FPU(fpu != ¤t->thread.fpu);
|
||||
|
||||
fpu__drop(fpu);
|
||||
if (!static_cpu_has(X86_FEATURE_FPU)) {
|
||||
fpu__drop(fpu);
|
||||
fpu__initialize(fpu);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure fpstate is cleared and initialized.
|
||||
*/
|
||||
fpu__initialize(fpu);
|
||||
if (static_cpu_has(X86_FEATURE_FPU))
|
||||
copy_init_fpstate_to_fpregs();
|
||||
fpregs_lock();
|
||||
|
||||
if (user_only) {
|
||||
if (!fpregs_state_valid(fpu, smp_processor_id()) &&
|
||||
xfeatures_mask_supervisor())
|
||||
copy_kernel_to_xregs(&fpu->state.xsave,
|
||||
xfeatures_mask_supervisor());
|
||||
copy_init_fpstate_to_fpregs(xfeatures_mask_user());
|
||||
} else {
|
||||
copy_init_fpstate_to_fpregs(xfeatures_mask_all);
|
||||
}
|
||||
|
||||
fpregs_mark_activate();
|
||||
fpregs_unlock();
|
||||
}
|
||||
|
||||
void fpu__clear_user_states(struct fpu *fpu)
|
||||
{
|
||||
fpu__clear(fpu, true);
|
||||
}
|
||||
|
||||
void fpu__clear_all(struct fpu *fpu)
|
||||
{
|
||||
fpu__clear(fpu, false);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -224,7 +224,8 @@ static void __init fpu__init_system_xstate_size_legacy(void)
|
||||
*/
|
||||
u64 __init fpu__get_supported_xfeatures_mask(void)
|
||||
{
|
||||
return XCNTXT_MASK;
|
||||
return XFEATURE_MASK_USER_SUPPORTED |
|
||||
XFEATURE_MASK_SUPERVISOR_SUPPORTED;
|
||||
}
|
||||
|
||||
/* Legacy code to initialize eager fpu mode. */
|
||||
|
||||
@@ -139,7 +139,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
|
||||
} else {
|
||||
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
|
||||
if (!ret)
|
||||
ret = validate_xstate_header(&xsave->header);
|
||||
ret = validate_user_xstate_header(&xsave->header);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -211,9 +211,9 @@ retry:
|
||||
}
|
||||
|
||||
static inline void
|
||||
sanitize_restored_xstate(union fpregs_state *state,
|
||||
struct user_i387_ia32_struct *ia32_env,
|
||||
u64 xfeatures, int fx_only)
|
||||
sanitize_restored_user_xstate(union fpregs_state *state,
|
||||
struct user_i387_ia32_struct *ia32_env,
|
||||
u64 user_xfeatures, int fx_only)
|
||||
{
|
||||
struct xregs_state *xsave = &state->xsave;
|
||||
struct xstate_header *header = &xsave->header;
|
||||
@@ -226,13 +226,22 @@ sanitize_restored_xstate(union fpregs_state *state,
|
||||
*/
|
||||
|
||||
/*
|
||||
* Init the state that is not present in the memory
|
||||
* layout and not enabled by the OS.
|
||||
* 'user_xfeatures' might have bits clear which are
|
||||
* set in header->xfeatures. This represents features that
|
||||
* were in init state prior to a signal delivery, and need
|
||||
* to be reset back to the init state. Clear any user
|
||||
* feature bits which are set in the kernel buffer to get
|
||||
* them back to the init state.
|
||||
*
|
||||
* Supervisor state is unchanged by input from userspace.
|
||||
* Ensure supervisor state bits stay set and supervisor
|
||||
* state is not modified.
|
||||
*/
|
||||
if (fx_only)
|
||||
header->xfeatures = XFEATURE_MASK_FPSSE;
|
||||
else
|
||||
header->xfeatures &= xfeatures;
|
||||
header->xfeatures &= user_xfeatures |
|
||||
xfeatures_mask_supervisor();
|
||||
}
|
||||
|
||||
if (use_fxsr()) {
|
||||
@@ -252,16 +261,24 @@ sanitize_restored_xstate(union fpregs_state *state,
|
||||
*/
|
||||
static int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only)
|
||||
{
|
||||
u64 init_bv;
|
||||
int r;
|
||||
|
||||
if (use_xsave()) {
|
||||
if (fx_only) {
|
||||
u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE;
|
||||
copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
|
||||
return copy_user_to_fxregs(buf);
|
||||
} else {
|
||||
u64 init_bv = xfeatures_mask & ~xbv;
|
||||
if (unlikely(init_bv))
|
||||
init_bv = xfeatures_mask_user() & ~XFEATURE_MASK_FPSSE;
|
||||
|
||||
r = copy_user_to_fxregs(buf);
|
||||
if (!r)
|
||||
copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
|
||||
return copy_user_to_xregs(buf, xbv);
|
||||
return r;
|
||||
} else {
|
||||
init_bv = xfeatures_mask_user() & ~xbv;
|
||||
|
||||
r = copy_user_to_xregs(buf, xbv);
|
||||
if (!r && unlikely(init_bv))
|
||||
copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
|
||||
return r;
|
||||
}
|
||||
} else if (use_fxsr()) {
|
||||
return copy_user_to_fxregs(buf);
|
||||
@@ -277,7 +294,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
|
||||
struct task_struct *tsk = current;
|
||||
struct fpu *fpu = &tsk->thread.fpu;
|
||||
struct user_i387_ia32_struct env;
|
||||
u64 xfeatures = 0;
|
||||
u64 user_xfeatures = 0;
|
||||
int fx_only = 0;
|
||||
int ret = 0;
|
||||
|
||||
@@ -285,7 +302,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
|
||||
IS_ENABLED(CONFIG_IA32_EMULATION));
|
||||
|
||||
if (!buf) {
|
||||
fpu__clear(fpu);
|
||||
fpu__clear_user_states(fpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -310,32 +327,14 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
|
||||
trace_x86_fpu_xstate_check_failed(fpu);
|
||||
} else {
|
||||
state_size = fx_sw_user.xstate_size;
|
||||
xfeatures = fx_sw_user.xfeatures;
|
||||
user_xfeatures = fx_sw_user.xfeatures;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The current state of the FPU registers does not matter. By setting
|
||||
* TIF_NEED_FPU_LOAD unconditionally it is ensured that the our xstate
|
||||
* is not modified on context switch and that the xstate is considered
|
||||
* to be loaded again on return to userland (overriding last_cpu avoids
|
||||
* the optimisation).
|
||||
*/
|
||||
set_thread_flag(TIF_NEED_FPU_LOAD);
|
||||
__fpu_invalidate_fpregs_state(fpu);
|
||||
|
||||
if ((unsigned long)buf_fx % 64)
|
||||
fx_only = 1;
|
||||
/*
|
||||
* For 32-bit frames with fxstate, copy the fxstate so it can be
|
||||
* reconstructed later.
|
||||
*/
|
||||
if (ia32_fxstate) {
|
||||
ret = __copy_from_user(&env, buf, sizeof(env));
|
||||
if (ret)
|
||||
goto err_out;
|
||||
envp = &env;
|
||||
} else {
|
||||
|
||||
if (!ia32_fxstate) {
|
||||
/*
|
||||
* Attempt to restore the FPU registers directly from user
|
||||
* memory. For that to succeed, the user access cannot cause
|
||||
@@ -345,20 +344,65 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
|
||||
*/
|
||||
fpregs_lock();
|
||||
pagefault_disable();
|
||||
ret = copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only);
|
||||
ret = copy_user_to_fpregs_zeroing(buf_fx, user_xfeatures, fx_only);
|
||||
pagefault_enable();
|
||||
if (!ret) {
|
||||
|
||||
/*
|
||||
* Restore supervisor states: previous context switch
|
||||
* etc has done XSAVES and saved the supervisor states
|
||||
* in the kernel buffer from which they can be restored
|
||||
* now.
|
||||
*
|
||||
* We cannot do a single XRSTORS here - which would
|
||||
* be nice - because the rest of the FPU registers are
|
||||
* being restored from a user buffer directly. The
|
||||
* single XRSTORS happens below, when the user buffer
|
||||
* has been copied to the kernel one.
|
||||
*/
|
||||
if (test_thread_flag(TIF_NEED_FPU_LOAD) &&
|
||||
xfeatures_mask_supervisor())
|
||||
copy_kernel_to_xregs(&fpu->state.xsave,
|
||||
xfeatures_mask_supervisor());
|
||||
fpregs_mark_activate();
|
||||
fpregs_unlock();
|
||||
return 0;
|
||||
}
|
||||
fpregs_deactivate(fpu);
|
||||
fpregs_unlock();
|
||||
} else {
|
||||
/*
|
||||
* For 32-bit frames with fxstate, copy the fxstate so it can
|
||||
* be reconstructed later.
|
||||
*/
|
||||
ret = __copy_from_user(&env, buf, sizeof(env));
|
||||
if (ret)
|
||||
goto err_out;
|
||||
envp = &env;
|
||||
}
|
||||
|
||||
/*
|
||||
* By setting TIF_NEED_FPU_LOAD it is ensured that our xstate is
|
||||
* not modified on context switch and that the xstate is considered
|
||||
* to be loaded again on return to userland (overriding last_cpu avoids
|
||||
* the optimisation).
|
||||
*/
|
||||
fpregs_lock();
|
||||
|
||||
if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
|
||||
|
||||
/*
|
||||
* Supervisor states are not modified by user space input. Save
|
||||
* current supervisor states first and invalidate the FPU regs.
|
||||
*/
|
||||
if (xfeatures_mask_supervisor())
|
||||
copy_supervisor_to_kernel(&fpu->state.xsave);
|
||||
set_thread_flag(TIF_NEED_FPU_LOAD);
|
||||
}
|
||||
__fpu_invalidate_fpregs_state(fpu);
|
||||
fpregs_unlock();
|
||||
|
||||
if (use_xsave() && !fx_only) {
|
||||
u64 init_bv = xfeatures_mask & ~xfeatures;
|
||||
u64 init_bv = xfeatures_mask_user() & ~user_xfeatures;
|
||||
|
||||
if (using_compacted_format()) {
|
||||
ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
|
||||
@@ -366,17 +410,24 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
|
||||
ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
|
||||
|
||||
if (!ret && state_size > offsetof(struct xregs_state, header))
|
||||
ret = validate_xstate_header(&fpu->state.xsave.header);
|
||||
ret = validate_user_xstate_header(&fpu->state.xsave.header);
|
||||
}
|
||||
if (ret)
|
||||
goto err_out;
|
||||
|
||||
sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only);
|
||||
sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures,
|
||||
fx_only);
|
||||
|
||||
fpregs_lock();
|
||||
if (unlikely(init_bv))
|
||||
copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
|
||||
ret = copy_kernel_to_xregs_err(&fpu->state.xsave, xfeatures);
|
||||
|
||||
/*
|
||||
* Restore previously saved supervisor xstates along with
|
||||
* copied-in user xstates.
|
||||
*/
|
||||
ret = copy_kernel_to_xregs_err(&fpu->state.xsave,
|
||||
user_xfeatures | xfeatures_mask_supervisor());
|
||||
|
||||
} else if (use_fxsr()) {
|
||||
ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size);
|
||||
@@ -385,11 +436,14 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only);
|
||||
sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures,
|
||||
fx_only);
|
||||
|
||||
fpregs_lock();
|
||||
if (use_xsave()) {
|
||||
u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE;
|
||||
u64 init_bv;
|
||||
|
||||
init_bv = xfeatures_mask_user() & ~XFEATURE_MASK_FPSSE;
|
||||
copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
|
||||
}
|
||||
|
||||
@@ -410,7 +464,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
|
||||
|
||||
err_out:
|
||||
if (ret)
|
||||
fpu__clear(fpu);
|
||||
fpu__clear_user_states(fpu);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -465,7 +519,7 @@ void fpu__init_prepare_fx_sw_frame(void)
|
||||
|
||||
fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
|
||||
fx_sw_reserved.extended_size = size;
|
||||
fx_sw_reserved.xfeatures = xfeatures_mask;
|
||||
fx_sw_reserved.xfeatures = xfeatures_mask_user();
|
||||
fx_sw_reserved.xstate_size = fpu_user_xstate_size;
|
||||
|
||||
if (IS_ENABLED(CONFIG_IA32_EMULATION) ||
|
||||
|
||||
@@ -54,13 +54,15 @@ static short xsave_cpuid_features[] __initdata = {
|
||||
};
|
||||
|
||||
/*
|
||||
* Mask of xstate features supported by the CPU and the kernel:
|
||||
* This represents the full set of bits that should ever be set in a kernel
|
||||
* XSAVE buffer, both supervisor and user xstates.
|
||||
*/
|
||||
u64 xfeatures_mask __read_mostly;
|
||||
u64 xfeatures_mask_all __read_mostly;
|
||||
|
||||
static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
|
||||
static unsigned int xstate_sizes[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
|
||||
static unsigned int xstate_comp_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
|
||||
static unsigned int xstate_supervisor_only_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
|
||||
|
||||
/*
|
||||
* The XSAVE area of kernel can be in standard or compacted format;
|
||||
@@ -76,7 +78,7 @@ unsigned int fpu_user_xstate_size;
|
||||
*/
|
||||
int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
|
||||
{
|
||||
u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask;
|
||||
u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask_all;
|
||||
|
||||
if (unlikely(feature_name)) {
|
||||
long xfeature_idx, max_idx;
|
||||
@@ -150,7 +152,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
|
||||
* None of the feature bits are in init state. So nothing else
|
||||
* to do for us, as the memory layout is up to date.
|
||||
*/
|
||||
if ((xfeatures & xfeatures_mask) == xfeatures_mask)
|
||||
if ((xfeatures & xfeatures_mask_all) == xfeatures_mask_all)
|
||||
return;
|
||||
|
||||
/*
|
||||
@@ -177,7 +179,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
|
||||
* in a special way already:
|
||||
*/
|
||||
feature_bit = 0x2;
|
||||
xfeatures = (xfeatures_mask & ~xfeatures) >> 2;
|
||||
xfeatures = (xfeatures_mask_user() & ~xfeatures) >> 2;
|
||||
|
||||
/*
|
||||
* Update all the remaining memory layouts according to their
|
||||
@@ -205,30 +207,39 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
|
||||
*/
|
||||
void fpu__init_cpu_xstate(void)
|
||||
{
|
||||
if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask)
|
||||
u64 unsup_bits;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask_all)
|
||||
return;
|
||||
/*
|
||||
* Make it clear that XSAVES supervisor states are not yet
|
||||
* implemented should anyone expect it to work by changing
|
||||
* bits in XFEATURE_MASK_* macros and XCR0.
|
||||
* Unsupported supervisor xstates should not be found in
|
||||
* the xfeatures mask.
|
||||
*/
|
||||
WARN_ONCE((xfeatures_mask & XFEATURE_MASK_SUPERVISOR),
|
||||
"x86/fpu: XSAVES supervisor states are not yet implemented.\n");
|
||||
unsup_bits = xfeatures_mask_all & XFEATURE_MASK_SUPERVISOR_UNSUPPORTED;
|
||||
WARN_ONCE(unsup_bits, "x86/fpu: Found unsupported supervisor xstates: 0x%llx\n",
|
||||
unsup_bits);
|
||||
|
||||
xfeatures_mask &= ~XFEATURE_MASK_SUPERVISOR;
|
||||
xfeatures_mask_all &= ~XFEATURE_MASK_SUPERVISOR_UNSUPPORTED;
|
||||
|
||||
cr4_set_bits(X86_CR4_OSXSAVE);
|
||||
xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
|
||||
|
||||
/*
|
||||
* XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features
|
||||
* managed by XSAVE{C, OPT, S} and XRSTOR{S}. Only XSAVE user
|
||||
* states can be set here.
|
||||
*/
|
||||
xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask_user());
|
||||
|
||||
/*
|
||||
* MSR_IA32_XSS sets supervisor states managed by XSAVES.
|
||||
*/
|
||||
if (boot_cpu_has(X86_FEATURE_XSAVES))
|
||||
wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
|
||||
}
|
||||
|
||||
/*
|
||||
* Note that in the future we will likely need a pair of
|
||||
* functions here: one for user xstates and the other for
|
||||
* system xstates. For now, they are the same.
|
||||
*/
|
||||
static int xfeature_enabled(enum xfeature xfeature)
|
||||
static bool xfeature_enabled(enum xfeature xfeature)
|
||||
{
|
||||
return !!(xfeatures_mask & (1UL << xfeature));
|
||||
return xfeatures_mask_all & BIT_ULL(xfeature);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -382,6 +393,33 @@ static void __init setup_xstate_comp_offsets(void)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup offsets of a supervisor-state-only XSAVES buffer:
|
||||
*
|
||||
* The offsets stored in xstate_comp_offsets[] only work for one specific
|
||||
* value of the Requested Feature BitMap (RFBM). In cases where a different
|
||||
* RFBM value is used, a different set of offsets is required. This set of
|
||||
* offsets is for when RFBM=xfeatures_mask_supervisor().
|
||||
*/
|
||||
static void __init setup_supervisor_only_offsets(void)
|
||||
{
|
||||
unsigned int next_offset;
|
||||
int i;
|
||||
|
||||
next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE;
|
||||
|
||||
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
|
||||
if (!xfeature_enabled(i) || !xfeature_is_supervisor(i))
|
||||
continue;
|
||||
|
||||
if (xfeature_is_aligned(i))
|
||||
next_offset = ALIGN(next_offset, 64);
|
||||
|
||||
xstate_supervisor_only_offsets[i] = next_offset;
|
||||
next_offset += xstate_sizes[i];
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Print out xstate component offsets and sizes
|
||||
*/
|
||||
@@ -415,7 +453,7 @@ static void __init setup_init_fpu_buf(void)
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_XSAVES))
|
||||
init_fpstate.xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT |
|
||||
xfeatures_mask;
|
||||
xfeatures_mask_all;
|
||||
|
||||
/*
|
||||
* Init all the features state with header.xfeatures being 0x0
|
||||
@@ -438,7 +476,7 @@ static int xfeature_uncompacted_offset(int xfeature_nr)
|
||||
* format. Checking a supervisor state's uncompacted offset is
|
||||
* an error.
|
||||
*/
|
||||
if (XFEATURE_MASK_SUPERVISOR & BIT_ULL(xfeature_nr)) {
|
||||
if (XFEATURE_MASK_SUPERVISOR_ALL & BIT_ULL(xfeature_nr)) {
|
||||
WARN_ONCE(1, "No fixed offset for xstate %d\n", xfeature_nr);
|
||||
return -1;
|
||||
}
|
||||
@@ -472,10 +510,10 @@ int using_compacted_format(void)
|
||||
}
|
||||
|
||||
/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
|
||||
int validate_xstate_header(const struct xstate_header *hdr)
|
||||
int validate_user_xstate_header(const struct xstate_header *hdr)
|
||||
{
|
||||
/* No unknown or supervisor features may be set */
|
||||
if (hdr->xfeatures & (~xfeatures_mask | XFEATURE_MASK_SUPERVISOR))
|
||||
if (hdr->xfeatures & ~xfeatures_mask_user())
|
||||
return -EINVAL;
|
||||
|
||||
/* Userspace must use the uncompacted format */
|
||||
@@ -610,15 +648,12 @@ static void do_extra_xstate_size_checks(void)
|
||||
|
||||
|
||||
/*
|
||||
* Get total size of enabled xstates in XCR0/xfeatures_mask.
|
||||
* Get total size of enabled xstates in XCR0 | IA32_XSS.
|
||||
*
|
||||
* Note the SDM's wording here. "sub-function 0" only enumerates
|
||||
* the size of the *user* states. If we use it to size a buffer
|
||||
* that we use 'XSAVES' on, we could potentially overflow the
|
||||
* buffer because 'XSAVES' saves system states too.
|
||||
*
|
||||
* Note that we do not currently set any bits on IA32_XSS so
|
||||
* 'XCR0 | IA32_XSS == XCR0' for now.
|
||||
*/
|
||||
static unsigned int __init get_xsaves_size(void)
|
||||
{
|
||||
@@ -700,7 +735,7 @@ static int __init init_xstate_size(void)
|
||||
*/
|
||||
static void fpu__init_disable_system_xstate(void)
|
||||
{
|
||||
xfeatures_mask = 0;
|
||||
xfeatures_mask_all = 0;
|
||||
cr4_clear_bits(X86_CR4_OSXSAVE);
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
|
||||
}
|
||||
@@ -735,16 +770,26 @@ void __init fpu__init_system_xstate(void)
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find user xstates supported by the processor.
|
||||
*/
|
||||
cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
|
||||
xfeatures_mask = eax + ((u64)edx << 32);
|
||||
xfeatures_mask_all = eax + ((u64)edx << 32);
|
||||
|
||||
if ((xfeatures_mask & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
|
||||
/*
|
||||
* Find supervisor xstates supported by the processor.
|
||||
*/
|
||||
cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
|
||||
xfeatures_mask_all |= ecx + ((u64)edx << 32);
|
||||
|
||||
if ((xfeatures_mask_user() & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
|
||||
/*
|
||||
* This indicates that something really unexpected happened
|
||||
* with the enumeration. Disable XSAVE and try to continue
|
||||
* booting without it. This is too early to BUG().
|
||||
*/
|
||||
pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask);
|
||||
pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n",
|
||||
xfeatures_mask_all);
|
||||
goto out_disable;
|
||||
}
|
||||
|
||||
@@ -753,10 +798,10 @@ void __init fpu__init_system_xstate(void)
|
||||
*/
|
||||
for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
|
||||
if (!boot_cpu_has(xsave_cpuid_features[i]))
|
||||
xfeatures_mask &= ~BIT(i);
|
||||
xfeatures_mask_all &= ~BIT_ULL(i);
|
||||
}
|
||||
|
||||
xfeatures_mask &= fpu__get_supported_xfeatures_mask();
|
||||
xfeatures_mask_all &= fpu__get_supported_xfeatures_mask();
|
||||
|
||||
/* Enable xstate instructions to be able to continue with initialization: */
|
||||
fpu__init_cpu_xstate();
|
||||
@@ -768,15 +813,16 @@ void __init fpu__init_system_xstate(void)
|
||||
* Update info used for ptrace frames; use standard-format size and no
|
||||
* supervisor xstates:
|
||||
*/
|
||||
update_regset_xstate_info(fpu_user_xstate_size, xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR);
|
||||
update_regset_xstate_info(fpu_user_xstate_size, xfeatures_mask_user());
|
||||
|
||||
fpu__init_prepare_fx_sw_frame();
|
||||
setup_init_fpu_buf();
|
||||
setup_xstate_comp_offsets();
|
||||
setup_supervisor_only_offsets();
|
||||
print_xstate_offset_size();
|
||||
|
||||
pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
|
||||
xfeatures_mask,
|
||||
xfeatures_mask_all,
|
||||
fpu_kernel_xstate_size,
|
||||
boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
|
||||
return;
|
||||
@@ -795,7 +841,14 @@ void fpu__resume_cpu(void)
|
||||
* Restore XCR0 on xsave capable CPUs:
|
||||
*/
|
||||
if (boot_cpu_has(X86_FEATURE_XSAVE))
|
||||
xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
|
||||
xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask_user());
|
||||
|
||||
/*
|
||||
* Restore IA32_XSS. The same CPUID bit enumerates support
|
||||
* of XSAVES and MSR_IA32_XSS.
|
||||
*/
|
||||
if (boot_cpu_has(X86_FEATURE_XSAVES))
|
||||
wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -840,10 +893,9 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
|
||||
|
||||
/*
|
||||
* We should not ever be requesting features that we
|
||||
* have not enabled. Remember that xfeatures_mask is
|
||||
* what we write to the XCR0 register.
|
||||
* have not enabled.
|
||||
*/
|
||||
WARN_ONCE(!(xfeatures_mask & BIT_ULL(xfeature_nr)),
|
||||
WARN_ONCE(!(xfeatures_mask_all & BIT_ULL(xfeature_nr)),
|
||||
"get of unsupported state");
|
||||
/*
|
||||
* This assumes the last 'xsave*' instruction to
|
||||
@@ -957,18 +1009,31 @@ static inline bool xfeatures_mxcsr_quirk(u64 xfeatures)
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is similar to user_regset_copyout(), but will not add offset to
|
||||
* the source data pointer or increment pos, count, kbuf, and ubuf.
|
||||
*/
|
||||
static inline void
|
||||
__copy_xstate_to_kernel(void *kbuf, const void *data,
|
||||
unsigned int offset, unsigned int size, unsigned int size_total)
|
||||
static void fill_gap(unsigned to, void **kbuf, unsigned *pos, unsigned *count)
|
||||
{
|
||||
if (offset < size_total) {
|
||||
unsigned int copy = min(size, size_total - offset);
|
||||
if (*pos < to) {
|
||||
unsigned size = to - *pos;
|
||||
|
||||
memcpy(kbuf + offset, data, copy);
|
||||
if (size > *count)
|
||||
size = *count;
|
||||
memcpy(*kbuf, (void *)&init_fpstate.xsave + *pos, size);
|
||||
*kbuf += size;
|
||||
*pos += size;
|
||||
*count -= size;
|
||||
}
|
||||
}
|
||||
|
||||
static void copy_part(unsigned offset, unsigned size, void *from,
|
||||
void **kbuf, unsigned *pos, unsigned *count)
|
||||
{
|
||||
fill_gap(offset, kbuf, pos, count);
|
||||
if (size > *count)
|
||||
size = *count;
|
||||
if (size) {
|
||||
memcpy(*kbuf, from, size);
|
||||
*kbuf += size;
|
||||
*pos += size;
|
||||
*count -= size;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -981,8 +1046,9 @@ __copy_xstate_to_kernel(void *kbuf, const void *data,
|
||||
*/
|
||||
int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
|
||||
{
|
||||
unsigned int offset, size;
|
||||
struct xstate_header header;
|
||||
const unsigned off_mxcsr = offsetof(struct fxregs_state, mxcsr);
|
||||
unsigned count = size_total;
|
||||
int i;
|
||||
|
||||
/*
|
||||
@@ -996,48 +1062,44 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of
|
||||
*/
|
||||
memset(&header, 0, sizeof(header));
|
||||
header.xfeatures = xsave->header.xfeatures;
|
||||
header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR;
|
||||
header.xfeatures &= xfeatures_mask_user();
|
||||
|
||||
if (header.xfeatures & XFEATURE_MASK_FP)
|
||||
copy_part(0, off_mxcsr,
|
||||
&xsave->i387, &kbuf, &offset_start, &count);
|
||||
if (header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM))
|
||||
copy_part(off_mxcsr, MXCSR_AND_FLAGS_SIZE,
|
||||
&xsave->i387.mxcsr, &kbuf, &offset_start, &count);
|
||||
if (header.xfeatures & XFEATURE_MASK_FP)
|
||||
copy_part(offsetof(struct fxregs_state, st_space), 128,
|
||||
&xsave->i387.st_space, &kbuf, &offset_start, &count);
|
||||
if (header.xfeatures & XFEATURE_MASK_SSE)
|
||||
copy_part(xstate_offsets[XFEATURE_MASK_SSE], 256,
|
||||
&xsave->i387.xmm_space, &kbuf, &offset_start, &count);
|
||||
/*
|
||||
* Fill xsave->i387.sw_reserved value for ptrace frame:
|
||||
*/
|
||||
copy_part(offsetof(struct fxregs_state, sw_reserved), 48,
|
||||
xstate_fx_sw_bytes, &kbuf, &offset_start, &count);
|
||||
/*
|
||||
* Copy xregs_state->header:
|
||||
*/
|
||||
offset = offsetof(struct xregs_state, header);
|
||||
size = sizeof(header);
|
||||
copy_part(offsetof(struct xregs_state, header), sizeof(header),
|
||||
&header, &kbuf, &offset_start, &count);
|
||||
|
||||
__copy_xstate_to_kernel(kbuf, &header, offset, size, size_total);
|
||||
|
||||
for (i = 0; i < XFEATURE_MAX; i++) {
|
||||
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
|
||||
/*
|
||||
* Copy only in-use xstates:
|
||||
*/
|
||||
if ((header.xfeatures >> i) & 1) {
|
||||
void *src = __raw_xsave_addr(xsave, i);
|
||||
|
||||
offset = xstate_offsets[i];
|
||||
size = xstate_sizes[i];
|
||||
|
||||
/* The next component has to fit fully into the output buffer: */
|
||||
if (offset + size > size_total)
|
||||
break;
|
||||
|
||||
__copy_xstate_to_kernel(kbuf, src, offset, size, size_total);
|
||||
copy_part(xstate_offsets[i], xstate_sizes[i],
|
||||
src, &kbuf, &offset_start, &count);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (xfeatures_mxcsr_quirk(header.xfeatures)) {
|
||||
offset = offsetof(struct fxregs_state, mxcsr);
|
||||
size = MXCSR_AND_FLAGS_SIZE;
|
||||
__copy_xstate_to_kernel(kbuf, &xsave->i387.mxcsr, offset, size, size_total);
|
||||
}
|
||||
|
||||
/*
|
||||
* Fill xsave->i387.sw_reserved value for ptrace frame:
|
||||
*/
|
||||
offset = offsetof(struct fxregs_state, sw_reserved);
|
||||
size = sizeof(xstate_fx_sw_bytes);
|
||||
|
||||
__copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, size_total);
|
||||
fill_gap(size_total, &kbuf, &offset_start, &count);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1080,7 +1142,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
|
||||
*/
|
||||
memset(&header, 0, sizeof(header));
|
||||
header.xfeatures = xsave->header.xfeatures;
|
||||
header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR;
|
||||
header.xfeatures &= xfeatures_mask_user();
|
||||
|
||||
/*
|
||||
* Copy xregs_state->header:
|
||||
@@ -1147,7 +1209,7 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
|
||||
|
||||
memcpy(&hdr, kbuf + offset, size);
|
||||
|
||||
if (validate_xstate_header(&hdr))
|
||||
if (validate_user_xstate_header(&hdr))
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < XFEATURE_MAX; i++) {
|
||||
@@ -1173,7 +1235,7 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
|
||||
* The state that came in from userspace was user-state only.
|
||||
* Mask all the user states out of 'xfeatures':
|
||||
*/
|
||||
xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR;
|
||||
xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL;
|
||||
|
||||
/*
|
||||
* Add back in the features that came in from userspace:
|
||||
@@ -1201,7 +1263,7 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
|
||||
if (__copy_from_user(&hdr, ubuf + offset, size))
|
||||
return -EFAULT;
|
||||
|
||||
if (validate_xstate_header(&hdr))
|
||||
if (validate_user_xstate_header(&hdr))
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < XFEATURE_MAX; i++) {
|
||||
@@ -1229,7 +1291,7 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
|
||||
* The state that came in from userspace was user-state only.
|
||||
* Mask all the user states out of 'xfeatures':
|
||||
*/
|
||||
xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR;
|
||||
xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL;
|
||||
|
||||
/*
|
||||
* Add back in the features that came in from userspace:
|
||||
@@ -1239,6 +1301,61 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Save only supervisor states to the kernel buffer. This blows away all
|
||||
* old states, and is intended to be used only in __fpu__restore_sig(), where
|
||||
* user states are restored from the user buffer.
|
||||
*/
|
||||
void copy_supervisor_to_kernel(struct xregs_state *xstate)
|
||||
{
|
||||
struct xstate_header *header;
|
||||
u64 max_bit, min_bit;
|
||||
u32 lmask, hmask;
|
||||
int err, i;
|
||||
|
||||
if (WARN_ON(!boot_cpu_has(X86_FEATURE_XSAVES)))
|
||||
return;
|
||||
|
||||
if (!xfeatures_mask_supervisor())
|
||||
return;
|
||||
|
||||
max_bit = __fls(xfeatures_mask_supervisor());
|
||||
min_bit = __ffs(xfeatures_mask_supervisor());
|
||||
|
||||
lmask = xfeatures_mask_supervisor();
|
||||
hmask = xfeatures_mask_supervisor() >> 32;
|
||||
XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
|
||||
|
||||
/* We should never fault when copying to a kernel buffer: */
|
||||
if (WARN_ON_FPU(err))
|
||||
return;
|
||||
|
||||
/*
|
||||
* At this point, the buffer has only supervisor states and must be
|
||||
* converted back to normal kernel format.
|
||||
*/
|
||||
header = &xstate->header;
|
||||
header->xcomp_bv |= xfeatures_mask_all;
|
||||
|
||||
/*
|
||||
* This only moves states up in the buffer. Start with
|
||||
* the last state and move backwards so that states are
|
||||
* not overwritten until after they are moved. Note:
|
||||
* memmove() allows overlapping src/dst buffers.
|
||||
*/
|
||||
for (i = max_bit; i >= min_bit; i--) {
|
||||
u8 *xbuf = (u8 *)xstate;
|
||||
|
||||
if (!((header->xfeatures >> i) & 1))
|
||||
continue;
|
||||
|
||||
/* Move xfeature 'i' into its normal location */
|
||||
memmove(xbuf + xstate_comp_offsets[i],
|
||||
xbuf + xstate_supervisor_only_offsets[i],
|
||||
xstate_sizes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROC_PID_ARCH_STATUS
|
||||
/*
|
||||
* Report the amount of time elapsed in millisecond since last AVX512
|
||||
|
||||
@@ -282,7 +282,8 @@ static inline void tramp_free(void *tramp) { }
|
||||
|
||||
/* Defined as markers to the end of the ftrace default trampolines */
|
||||
extern void ftrace_regs_caller_end(void);
|
||||
extern void ftrace_epilogue(void);
|
||||
extern void ftrace_regs_caller_ret(void);
|
||||
extern void ftrace_caller_end(void);
|
||||
extern void ftrace_caller_op_ptr(void);
|
||||
extern void ftrace_regs_caller_op_ptr(void);
|
||||
|
||||
@@ -334,7 +335,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
|
||||
call_offset = (unsigned long)ftrace_regs_call;
|
||||
} else {
|
||||
start_offset = (unsigned long)ftrace_caller;
|
||||
end_offset = (unsigned long)ftrace_epilogue;
|
||||
end_offset = (unsigned long)ftrace_caller_end;
|
||||
op_offset = (unsigned long)ftrace_caller_op_ptr;
|
||||
call_offset = (unsigned long)ftrace_call;
|
||||
}
|
||||
@@ -366,6 +367,13 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
|
||||
if (WARN_ON(ret < 0))
|
||||
goto fail;
|
||||
|
||||
if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
|
||||
ip = trampoline + (ftrace_regs_caller_ret - ftrace_regs_caller);
|
||||
ret = probe_kernel_read(ip, (void *)retq, RET_SIZE);
|
||||
if (WARN_ON(ret < 0))
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/*
|
||||
* The address of the ftrace_ops that is used for this trampoline
|
||||
* is stored at the end of the trampoline. This will be used to
|
||||
@@ -407,7 +415,8 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
|
||||
|
||||
set_vm_flush_reset_perms(trampoline);
|
||||
|
||||
set_memory_ro((unsigned long)trampoline, npages);
|
||||
if (likely(system_state != SYSTEM_BOOTING))
|
||||
set_memory_ro((unsigned long)trampoline, npages);
|
||||
set_memory_x((unsigned long)trampoline, npages);
|
||||
return (unsigned long)trampoline;
|
||||
fail:
|
||||
@@ -415,6 +424,32 @@ fail:
|
||||
return 0;
|
||||
}
|
||||
|
||||
void set_ftrace_ops_ro(void)
|
||||
{
|
||||
struct ftrace_ops *ops;
|
||||
unsigned long start_offset;
|
||||
unsigned long end_offset;
|
||||
unsigned long npages;
|
||||
unsigned long size;
|
||||
|
||||
do_for_each_ftrace_op(ops, ftrace_ops_list) {
|
||||
if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
|
||||
continue;
|
||||
|
||||
if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
|
||||
start_offset = (unsigned long)ftrace_regs_caller;
|
||||
end_offset = (unsigned long)ftrace_regs_caller_end;
|
||||
} else {
|
||||
start_offset = (unsigned long)ftrace_caller;
|
||||
end_offset = (unsigned long)ftrace_caller_end;
|
||||
}
|
||||
size = end_offset - start_offset;
|
||||
size = size + RET_SIZE + sizeof(void *);
|
||||
npages = DIV_ROUND_UP(size, PAGE_SIZE);
|
||||
set_memory_ro((unsigned long)ops->trampoline, npages);
|
||||
} while_for_each_ftrace_op(ops);
|
||||
}
|
||||
|
||||
static unsigned long calc_trampoline_call_offset(bool save_regs)
|
||||
{
|
||||
unsigned long start_offset;
|
||||
|
||||
@@ -189,5 +189,5 @@ return_to_handler:
|
||||
movl %eax, %ecx
|
||||
popl %edx
|
||||
popl %eax
|
||||
JMP_NOSPEC %ecx
|
||||
JMP_NOSPEC ecx
|
||||
#endif
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
#include <asm/frame.h>
|
||||
|
||||
.code64
|
||||
.section .entry.text, "ax"
|
||||
.section .text, "ax"
|
||||
|
||||
#ifdef CONFIG_FRAME_POINTER
|
||||
/* Save parent and function stack frames (rip and rbp) */
|
||||
@@ -23,7 +23,7 @@
|
||||
#endif /* CONFIG_FRAME_POINTER */
|
||||
|
||||
/* Size of stack used to save mcount regs in save_mcount_regs */
|
||||
#define MCOUNT_REG_SIZE (SS+8 + MCOUNT_FRAME_SIZE)
|
||||
#define MCOUNT_REG_SIZE (FRAME_SIZE + MCOUNT_FRAME_SIZE)
|
||||
|
||||
/*
|
||||
* gcc -pg option adds a call to 'mcount' in most functions.
|
||||
@@ -77,7 +77,7 @@
|
||||
/*
|
||||
* We add enough stack to save all regs.
|
||||
*/
|
||||
subq $(MCOUNT_REG_SIZE - MCOUNT_FRAME_SIZE), %rsp
|
||||
subq $(FRAME_SIZE), %rsp
|
||||
movq %rax, RAX(%rsp)
|
||||
movq %rcx, RCX(%rsp)
|
||||
movq %rdx, RDX(%rsp)
|
||||
@@ -157,8 +157,12 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
|
||||
* think twice before adding any new code or changing the
|
||||
* layout here.
|
||||
*/
|
||||
SYM_INNER_LABEL(ftrace_epilogue, SYM_L_GLOBAL)
|
||||
SYM_INNER_LABEL(ftrace_caller_end, SYM_L_GLOBAL)
|
||||
|
||||
jmp ftrace_epilogue
|
||||
SYM_FUNC_END(ftrace_caller);
|
||||
|
||||
SYM_FUNC_START(ftrace_epilogue)
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
|
||||
jmp ftrace_stub
|
||||
@@ -170,14 +174,12 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
|
||||
*/
|
||||
SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK)
|
||||
retq
|
||||
SYM_FUNC_END(ftrace_caller)
|
||||
SYM_FUNC_END(ftrace_epilogue)
|
||||
|
||||
SYM_FUNC_START(ftrace_regs_caller)
|
||||
/* Save the current flags before any operations that can change them */
|
||||
pushfq
|
||||
|
||||
UNWIND_HINT_SAVE
|
||||
|
||||
/* added 8 bytes to save flags */
|
||||
save_mcount_regs 8
|
||||
/* save_mcount_regs fills in first two parameters */
|
||||
@@ -233,10 +235,13 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
|
||||
movq ORIG_RAX(%rsp), %rax
|
||||
movq %rax, MCOUNT_REG_SIZE-8(%rsp)
|
||||
|
||||
/* If ORIG_RAX is anything but zero, make this a call to that */
|
||||
/*
|
||||
* If ORIG_RAX is anything but zero, make this a call to that.
|
||||
* See arch_ftrace_set_direct_caller().
|
||||
*/
|
||||
movq ORIG_RAX(%rsp), %rax
|
||||
cmpq $0, %rax
|
||||
je 1f
|
||||
testq %rax, %rax
|
||||
jz 1f
|
||||
|
||||
/* Swap the flags with orig_rax */
|
||||
movq MCOUNT_REG_SIZE(%rsp), %rdi
|
||||
@@ -244,20 +249,14 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
|
||||
movq %rax, MCOUNT_REG_SIZE(%rsp)
|
||||
|
||||
restore_mcount_regs 8
|
||||
/* Restore flags */
|
||||
popfq
|
||||
|
||||
jmp 2f
|
||||
SYM_INNER_LABEL(ftrace_regs_caller_ret, SYM_L_GLOBAL);
|
||||
UNWIND_HINT_RET_OFFSET
|
||||
jmp ftrace_epilogue
|
||||
|
||||
1: restore_mcount_regs
|
||||
|
||||
|
||||
2:
|
||||
/*
|
||||
* The stack layout is nondetermistic here, depending on which path was
|
||||
* taken. This confuses objtool and ORC, rightfully so. For now,
|
||||
* pretend the stack always looks like the non-direct case.
|
||||
*/
|
||||
UNWIND_HINT_RESTORE
|
||||
|
||||
/* Restore flags */
|
||||
popfq
|
||||
|
||||
@@ -268,7 +267,6 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
|
||||
* to the return.
|
||||
*/
|
||||
SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL)
|
||||
|
||||
jmp ftrace_epilogue
|
||||
|
||||
SYM_FUNC_END(ftrace_regs_caller)
|
||||
@@ -303,7 +301,7 @@ trace:
|
||||
* function tracing is enabled.
|
||||
*/
|
||||
movq ftrace_trace_function, %r8
|
||||
CALL_NOSPEC %r8
|
||||
CALL_NOSPEC r8
|
||||
restore_mcount_regs
|
||||
|
||||
jmp fgraph_trace
|
||||
@@ -340,6 +338,6 @@ SYM_CODE_START(return_to_handler)
|
||||
movq 8(%rsp), %rdx
|
||||
movq (%rsp), %rax
|
||||
addq $24, %rsp
|
||||
JMP_NOSPEC %rdi
|
||||
JMP_NOSPEC rdi
|
||||
SYM_CODE_END(return_to_handler)
|
||||
#endif
|
||||
|
||||
@@ -20,13 +20,13 @@
|
||||
#include <linux/io.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/mem_encrypt.h>
|
||||
#include <linux/pgtable.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/proto.h>
|
||||
#include <asm/smp.h>
|
||||
#include <asm/setup.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/kdebug.h>
|
||||
|
||||
@@ -13,8 +13,8 @@
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/threads.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/pgtable.h>
|
||||
#include <asm/segment.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/cache.h>
|
||||
@@ -29,15 +29,16 @@
|
||||
#ifdef CONFIG_PARAVIRT_XXL
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/paravirt.h>
|
||||
#define GET_CR2_INTO(reg) GET_CR2_INTO_AX ; _ASM_MOV %_ASM_AX, reg
|
||||
#else
|
||||
#define INTERRUPT_RETURN iretq
|
||||
#define GET_CR2_INTO(reg) _ASM_MOV %cr2, reg
|
||||
#endif
|
||||
|
||||
/* we are not able to switch in one step to the final KERNEL ADDRESS SPACE
|
||||
/*
|
||||
* We are not able to switch in one step to the final KERNEL ADDRESS SPACE
|
||||
* because we need identity-mapped pages.
|
||||
*
|
||||
*/
|
||||
|
||||
#define l4_index(x) (((x) >> 39) & 511)
|
||||
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
|
||||
|
||||
|
||||
@@ -32,6 +32,8 @@
|
||||
#include <asm/processor.h>
|
||||
#include <asm/debugreg.h>
|
||||
#include <asm/user.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
/* Per cpu debug control register value */
|
||||
DEFINE_PER_CPU(unsigned long, cpu_dr7);
|
||||
@@ -97,6 +99,8 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
|
||||
unsigned long *dr7;
|
||||
int i;
|
||||
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
for (i = 0; i < HBP_NUM; i++) {
|
||||
struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]);
|
||||
|
||||
@@ -115,6 +119,12 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
|
||||
dr7 = this_cpu_ptr(&cpu_dr7);
|
||||
*dr7 |= encode_dr7(i, info->len, info->type);
|
||||
|
||||
/*
|
||||
* Ensure we first write cpu_dr7 before we set the DR7 register.
|
||||
* This ensures an NMI never see cpu_dr7 0 when DR7 is not.
|
||||
*/
|
||||
barrier();
|
||||
|
||||
set_debugreg(*dr7, 7);
|
||||
if (info->mask)
|
||||
set_dr_addr_mask(info->mask, i);
|
||||
@@ -134,9 +144,11 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
|
||||
void arch_uninstall_hw_breakpoint(struct perf_event *bp)
|
||||
{
|
||||
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
|
||||
unsigned long *dr7;
|
||||
unsigned long dr7;
|
||||
int i;
|
||||
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
for (i = 0; i < HBP_NUM; i++) {
|
||||
struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]);
|
||||
|
||||
@@ -149,12 +161,20 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
|
||||
if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
|
||||
return;
|
||||
|
||||
dr7 = this_cpu_ptr(&cpu_dr7);
|
||||
*dr7 &= ~__encode_dr7(i, info->len, info->type);
|
||||
dr7 = this_cpu_read(cpu_dr7);
|
||||
dr7 &= ~__encode_dr7(i, info->len, info->type);
|
||||
|
||||
set_debugreg(*dr7, 7);
|
||||
set_debugreg(dr7, 7);
|
||||
if (info->mask)
|
||||
set_dr_addr_mask(0, i);
|
||||
|
||||
/*
|
||||
* Ensure the write to cpu_dr7 is after we've set the DR7 register.
|
||||
* This ensures an NMI never see cpu_dr7 0 when DR7 is not.
|
||||
*/
|
||||
barrier();
|
||||
|
||||
this_cpu_write(cpu_dr7, dr7);
|
||||
}
|
||||
|
||||
static int arch_bp_generic_len(int x86_len)
|
||||
@@ -227,10 +247,76 @@ int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw)
|
||||
return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX);
|
||||
}
|
||||
|
||||
/*
|
||||
* Checks whether the range [addr, end], overlaps the area [base, base + size).
|
||||
*/
|
||||
static inline bool within_area(unsigned long addr, unsigned long end,
|
||||
unsigned long base, unsigned long size)
|
||||
{
|
||||
return end >= base && addr < (base + size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Checks whether the range from addr to end, inclusive, overlaps the fixed
|
||||
* mapped CPU entry area range or other ranges used for CPU entry.
|
||||
*/
|
||||
static inline bool within_cpu_entry(unsigned long addr, unsigned long end)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
/* CPU entry erea is always used for CPU entry */
|
||||
if (within_area(addr, end, CPU_ENTRY_AREA_BASE,
|
||||
CPU_ENTRY_AREA_TOTAL_SIZE))
|
||||
return true;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
/* The original rw GDT is being used after load_direct_gdt() */
|
||||
if (within_area(addr, end, (unsigned long)get_cpu_gdt_rw(cpu),
|
||||
GDT_SIZE))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* cpu_tss_rw is not directly referenced by hardware, but
|
||||
* cpu_tss_rw is also used in CPU entry code,
|
||||
*/
|
||||
if (within_area(addr, end,
|
||||
(unsigned long)&per_cpu(cpu_tss_rw, cpu),
|
||||
sizeof(struct tss_struct)))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* cpu_tlbstate.user_pcid_flush_mask is used for CPU entry.
|
||||
* If a data breakpoint on it, it will cause an unwanted #DB.
|
||||
* Protect the full cpu_tlbstate structure to be sure.
|
||||
*/
|
||||
if (within_area(addr, end,
|
||||
(unsigned long)&per_cpu(cpu_tlbstate, cpu),
|
||||
sizeof(struct tlb_state)))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int arch_build_bp_info(struct perf_event *bp,
|
||||
const struct perf_event_attr *attr,
|
||||
struct arch_hw_breakpoint *hw)
|
||||
{
|
||||
unsigned long bp_end;
|
||||
|
||||
bp_end = attr->bp_addr + attr->bp_len - 1;
|
||||
if (bp_end < attr->bp_addr)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Prevent any breakpoint of any type that overlaps the CPU
|
||||
* entry area and data. This protects the IST stacks and also
|
||||
* reduces the chance that we ever find out what happens if
|
||||
* there's a data breakpoint on the GDT, IDT, or TSS.
|
||||
*/
|
||||
if (within_cpu_entry(attr->bp_addr, bp_end))
|
||||
return -EINVAL;
|
||||
|
||||
hw->address = attr->bp_addr;
|
||||
hw->mask = 0;
|
||||
|
||||
@@ -439,7 +525,7 @@ static int hw_breakpoint_handler(struct die_args *args)
|
||||
{
|
||||
int i, cpu, rc = NOTIFY_STOP;
|
||||
struct perf_event *bp;
|
||||
unsigned long dr7, dr6;
|
||||
unsigned long dr6;
|
||||
unsigned long *dr6_p;
|
||||
|
||||
/* The DR6 value is pointed by args->err */
|
||||
@@ -454,9 +540,6 @@ static int hw_breakpoint_handler(struct die_args *args)
|
||||
if ((dr6 & DR_TRAP_BITS) == 0)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
get_debugreg(dr7, 7);
|
||||
/* Disable breakpoints during exception handling */
|
||||
set_debugreg(0UL, 7);
|
||||
/*
|
||||
* Assert that local interrupts are disabled
|
||||
* Reset the DRn bits in the virtualized register value.
|
||||
@@ -513,7 +596,6 @@ static int hw_breakpoint_handler(struct die_args *args)
|
||||
(dr6 & (~DR_TRAP_BITS)))
|
||||
rc = NOTIFY_DONE;
|
||||
|
||||
set_debugreg(dr7, 7);
|
||||
put_cpu();
|
||||
|
||||
return rc;
|
||||
|
||||
@@ -15,11 +15,11 @@
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/pgtable.h>
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <asm/timer.h>
|
||||
#include <asm/hw_irq.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/i8259.h>
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
*/
|
||||
#include <linux/interrupt.h>
|
||||
|
||||
#include <asm/cpu_entry_area.h>
|
||||
#include <asm/set_memory.h>
|
||||
#include <asm/traps.h>
|
||||
#include <asm/proto.h>
|
||||
#include <asm/desc.h>
|
||||
@@ -51,15 +53,23 @@ struct idt_data {
|
||||
#define TSKG(_vector, _gdt) \
|
||||
G(_vector, NULL, DEFAULT_STACK, GATE_TASK, DPL0, _gdt << 3)
|
||||
|
||||
#define IDT_TABLE_SIZE (IDT_ENTRIES * sizeof(gate_desc))
|
||||
|
||||
static bool idt_setup_done __initdata;
|
||||
|
||||
/*
|
||||
* Early traps running on the DEFAULT_STACK because the other interrupt
|
||||
* stacks work only after cpu_init().
|
||||
*/
|
||||
static const __initconst struct idt_data early_idts[] = {
|
||||
INTG(X86_TRAP_DB, debug),
|
||||
SYSG(X86_TRAP_BP, int3),
|
||||
INTG(X86_TRAP_DB, asm_exc_debug),
|
||||
SYSG(X86_TRAP_BP, asm_exc_int3),
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
INTG(X86_TRAP_PF, page_fault),
|
||||
/*
|
||||
* Not possible on 64-bit. See idt_setup_early_pf() for details.
|
||||
*/
|
||||
INTG(X86_TRAP_PF, asm_exc_page_fault),
|
||||
#endif
|
||||
};
|
||||
|
||||
@@ -70,33 +80,33 @@ static const __initconst struct idt_data early_idts[] = {
|
||||
* set up TSS.
|
||||
*/
|
||||
static const __initconst struct idt_data def_idts[] = {
|
||||
INTG(X86_TRAP_DE, divide_error),
|
||||
INTG(X86_TRAP_NMI, nmi),
|
||||
INTG(X86_TRAP_BR, bounds),
|
||||
INTG(X86_TRAP_UD, invalid_op),
|
||||
INTG(X86_TRAP_NM, device_not_available),
|
||||
INTG(X86_TRAP_OLD_MF, coprocessor_segment_overrun),
|
||||
INTG(X86_TRAP_TS, invalid_TSS),
|
||||
INTG(X86_TRAP_NP, segment_not_present),
|
||||
INTG(X86_TRAP_SS, stack_segment),
|
||||
INTG(X86_TRAP_GP, general_protection),
|
||||
INTG(X86_TRAP_SPURIOUS, spurious_interrupt_bug),
|
||||
INTG(X86_TRAP_MF, coprocessor_error),
|
||||
INTG(X86_TRAP_AC, alignment_check),
|
||||
INTG(X86_TRAP_XF, simd_coprocessor_error),
|
||||
INTG(X86_TRAP_DE, asm_exc_divide_error),
|
||||
INTG(X86_TRAP_NMI, asm_exc_nmi),
|
||||
INTG(X86_TRAP_BR, asm_exc_bounds),
|
||||
INTG(X86_TRAP_UD, asm_exc_invalid_op),
|
||||
INTG(X86_TRAP_NM, asm_exc_device_not_available),
|
||||
INTG(X86_TRAP_OLD_MF, asm_exc_coproc_segment_overrun),
|
||||
INTG(X86_TRAP_TS, asm_exc_invalid_tss),
|
||||
INTG(X86_TRAP_NP, asm_exc_segment_not_present),
|
||||
INTG(X86_TRAP_SS, asm_exc_stack_segment),
|
||||
INTG(X86_TRAP_GP, asm_exc_general_protection),
|
||||
INTG(X86_TRAP_SPURIOUS, asm_exc_spurious_interrupt_bug),
|
||||
INTG(X86_TRAP_MF, asm_exc_coprocessor_error),
|
||||
INTG(X86_TRAP_AC, asm_exc_alignment_check),
|
||||
INTG(X86_TRAP_XF, asm_exc_simd_coprocessor_error),
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
TSKG(X86_TRAP_DF, GDT_ENTRY_DOUBLEFAULT_TSS),
|
||||
#else
|
||||
INTG(X86_TRAP_DF, double_fault),
|
||||
INTG(X86_TRAP_DF, asm_exc_double_fault),
|
||||
#endif
|
||||
INTG(X86_TRAP_DB, debug),
|
||||
INTG(X86_TRAP_DB, asm_exc_debug),
|
||||
|
||||
#ifdef CONFIG_X86_MCE
|
||||
INTG(X86_TRAP_MC, &machine_check),
|
||||
INTG(X86_TRAP_MC, asm_exc_machine_check),
|
||||
#endif
|
||||
|
||||
SYSG(X86_TRAP_OF, overflow),
|
||||
SYSG(X86_TRAP_OF, asm_exc_overflow),
|
||||
#if defined(CONFIG_IA32_EMULATION)
|
||||
SYSG(IA32_SYSCALL_VECTOR, entry_INT80_compat),
|
||||
#elif defined(CONFIG_X86_32)
|
||||
@@ -109,95 +119,63 @@ static const __initconst struct idt_data def_idts[] = {
|
||||
*/
|
||||
static const __initconst struct idt_data apic_idts[] = {
|
||||
#ifdef CONFIG_SMP
|
||||
INTG(RESCHEDULE_VECTOR, reschedule_interrupt),
|
||||
INTG(CALL_FUNCTION_VECTOR, call_function_interrupt),
|
||||
INTG(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt),
|
||||
INTG(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt),
|
||||
INTG(REBOOT_VECTOR, reboot_interrupt),
|
||||
INTG(RESCHEDULE_VECTOR, asm_sysvec_reschedule_ipi),
|
||||
INTG(CALL_FUNCTION_VECTOR, asm_sysvec_call_function),
|
||||
INTG(CALL_FUNCTION_SINGLE_VECTOR, asm_sysvec_call_function_single),
|
||||
INTG(IRQ_MOVE_CLEANUP_VECTOR, asm_sysvec_irq_move_cleanup),
|
||||
INTG(REBOOT_VECTOR, asm_sysvec_reboot),
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_THERMAL_VECTOR
|
||||
INTG(THERMAL_APIC_VECTOR, thermal_interrupt),
|
||||
INTG(THERMAL_APIC_VECTOR, asm_sysvec_thermal),
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_MCE_THRESHOLD
|
||||
INTG(THRESHOLD_APIC_VECTOR, threshold_interrupt),
|
||||
INTG(THRESHOLD_APIC_VECTOR, asm_sysvec_threshold),
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_MCE_AMD
|
||||
INTG(DEFERRED_ERROR_VECTOR, deferred_error_interrupt),
|
||||
INTG(DEFERRED_ERROR_VECTOR, asm_sysvec_deferred_error),
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
INTG(LOCAL_TIMER_VECTOR, apic_timer_interrupt),
|
||||
INTG(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi),
|
||||
INTG(LOCAL_TIMER_VECTOR, asm_sysvec_apic_timer_interrupt),
|
||||
INTG(X86_PLATFORM_IPI_VECTOR, asm_sysvec_x86_platform_ipi),
|
||||
# ifdef CONFIG_HAVE_KVM
|
||||
INTG(POSTED_INTR_VECTOR, kvm_posted_intr_ipi),
|
||||
INTG(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi),
|
||||
INTG(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi),
|
||||
INTG(POSTED_INTR_VECTOR, asm_sysvec_kvm_posted_intr_ipi),
|
||||
INTG(POSTED_INTR_WAKEUP_VECTOR, asm_sysvec_kvm_posted_intr_wakeup_ipi),
|
||||
INTG(POSTED_INTR_NESTED_VECTOR, asm_sysvec_kvm_posted_intr_nested_ipi),
|
||||
# endif
|
||||
# ifdef CONFIG_IRQ_WORK
|
||||
INTG(IRQ_WORK_VECTOR, irq_work_interrupt),
|
||||
INTG(IRQ_WORK_VECTOR, asm_sysvec_irq_work),
|
||||
# endif
|
||||
#ifdef CONFIG_X86_UV
|
||||
INTG(UV_BAU_MESSAGE, uv_bau_message_intr1),
|
||||
#endif
|
||||
INTG(SPURIOUS_APIC_VECTOR, spurious_interrupt),
|
||||
INTG(ERROR_APIC_VECTOR, error_interrupt),
|
||||
# ifdef CONFIG_X86_UV
|
||||
INTG(UV_BAU_MESSAGE, asm_sysvec_uv_bau_message),
|
||||
# endif
|
||||
INTG(SPURIOUS_APIC_VECTOR, asm_sysvec_spurious_apic_interrupt),
|
||||
INTG(ERROR_APIC_VECTOR, asm_sysvec_error_interrupt),
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* Early traps running on the DEFAULT_STACK because the other interrupt
|
||||
* stacks work only after cpu_init().
|
||||
*/
|
||||
static const __initconst struct idt_data early_pf_idts[] = {
|
||||
INTG(X86_TRAP_PF, page_fault),
|
||||
};
|
||||
|
||||
/*
|
||||
* Override for the debug_idt. Same as the default, but with interrupt
|
||||
* stack set to DEFAULT_STACK (0). Required for NMI trap handling.
|
||||
*/
|
||||
static const __initconst struct idt_data dbg_idts[] = {
|
||||
INTG(X86_TRAP_DB, debug),
|
||||
};
|
||||
#endif
|
||||
|
||||
/* Must be page-aligned because the real IDT is used in a fixmap. */
|
||||
gate_desc idt_table[IDT_ENTRIES] __page_aligned_bss;
|
||||
/* Must be page-aligned because the real IDT is used in the cpu entry area */
|
||||
static gate_desc idt_table[IDT_ENTRIES] __page_aligned_bss;
|
||||
|
||||
struct desc_ptr idt_descr __ro_after_init = {
|
||||
.size = (IDT_ENTRIES * 2 * sizeof(unsigned long)) - 1,
|
||||
.size = IDT_TABLE_SIZE - 1,
|
||||
.address = (unsigned long) idt_table,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* No need to be aligned, but done to keep all IDTs defined the same way. */
|
||||
gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
|
||||
void load_current_idt(void)
|
||||
{
|
||||
lockdep_assert_irqs_disabled();
|
||||
load_idt(&idt_descr);
|
||||
}
|
||||
|
||||
/*
|
||||
* The exceptions which use Interrupt stacks. They are setup after
|
||||
* cpu_init() when the TSS has been initialized.
|
||||
*/
|
||||
static const __initconst struct idt_data ist_idts[] = {
|
||||
ISTG(X86_TRAP_DB, debug, IST_INDEX_DB),
|
||||
ISTG(X86_TRAP_NMI, nmi, IST_INDEX_NMI),
|
||||
ISTG(X86_TRAP_DF, double_fault, IST_INDEX_DF),
|
||||
#ifdef CONFIG_X86_MCE
|
||||
ISTG(X86_TRAP_MC, &machine_check, IST_INDEX_MCE),
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
* Override for the debug_idt. Same as the default, but with interrupt
|
||||
* stack set to DEFAULT_STACK (0). Required for NMI trap handling.
|
||||
*/
|
||||
const struct desc_ptr debug_idt_descr = {
|
||||
.size = IDT_ENTRIES * 16 - 1,
|
||||
.address = (unsigned long) debug_idt_table,
|
||||
};
|
||||
#ifdef CONFIG_X86_F00F_BUG
|
||||
bool idt_is_f00f_address(unsigned long address)
|
||||
{
|
||||
return ((address - idt_descr.address) >> 3) == 6;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void idt_init_desc(gate_desc *gate, const struct idt_data *d)
|
||||
@@ -214,7 +192,7 @@ static inline void idt_init_desc(gate_desc *gate, const struct idt_data *d)
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
static __init void
|
||||
idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size, bool sys)
|
||||
{
|
||||
gate_desc desc;
|
||||
@@ -227,7 +205,7 @@ idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size, bool sy
|
||||
}
|
||||
}
|
||||
|
||||
static void set_intr_gate(unsigned int n, const void *addr)
|
||||
static __init void set_intr_gate(unsigned int n, const void *addr)
|
||||
{
|
||||
struct idt_data data;
|
||||
|
||||
@@ -266,6 +244,27 @@ void __init idt_setup_traps(void)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* Early traps running on the DEFAULT_STACK because the other interrupt
|
||||
* stacks work only after cpu_init().
|
||||
*/
|
||||
static const __initconst struct idt_data early_pf_idts[] = {
|
||||
INTG(X86_TRAP_PF, asm_exc_page_fault),
|
||||
};
|
||||
|
||||
/*
|
||||
* The exceptions which use Interrupt stacks. They are setup after
|
||||
* cpu_init() when the TSS has been initialized.
|
||||
*/
|
||||
static const __initconst struct idt_data ist_idts[] = {
|
||||
ISTG(X86_TRAP_DB, asm_exc_debug, IST_INDEX_DB),
|
||||
ISTG(X86_TRAP_NMI, asm_exc_nmi, IST_INDEX_NMI),
|
||||
ISTG(X86_TRAP_DF, asm_exc_double_fault, IST_INDEX_DF),
|
||||
#ifdef CONFIG_X86_MCE
|
||||
ISTG(X86_TRAP_MC, asm_exc_machine_check, IST_INDEX_MCE),
|
||||
#endif
|
||||
};
|
||||
|
||||
/**
|
||||
* idt_setup_early_pf - Initialize the idt table with early pagefault handler
|
||||
*
|
||||
@@ -273,8 +272,10 @@ void __init idt_setup_traps(void)
|
||||
* cpu_init() is invoked and sets up TSS. The IST variant is installed
|
||||
* after that.
|
||||
*
|
||||
* FIXME: Why is 32bit and 64bit installing the PF handler at different
|
||||
* places in the early setup code?
|
||||
* Note, that X86_64 cannot install the real #PF handler in
|
||||
* idt_setup_early_traps() because the memory intialization needs the #PF
|
||||
* handler from the early_idt_handler_array to initialize the early page
|
||||
* tables.
|
||||
*/
|
||||
void __init idt_setup_early_pf(void)
|
||||
{
|
||||
@@ -289,18 +290,21 @@ void __init idt_setup_ist_traps(void)
|
||||
{
|
||||
idt_setup_from_table(idt_table, ist_idts, ARRAY_SIZE(ist_idts), true);
|
||||
}
|
||||
|
||||
/**
|
||||
* idt_setup_debugidt_traps - Initialize the debug idt table with debug traps
|
||||
*/
|
||||
void __init idt_setup_debugidt_traps(void)
|
||||
{
|
||||
memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16);
|
||||
|
||||
idt_setup_from_table(debug_idt_table, dbg_idts, ARRAY_SIZE(dbg_idts), false);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void __init idt_map_in_cea(void)
|
||||
{
|
||||
/*
|
||||
* Set the IDT descriptor to a fixed read-only location in the cpu
|
||||
* entry area, so that the "sidt" instruction will not leak the
|
||||
* location of the kernel, and to defend the IDT against arbitrary
|
||||
* memory write vulnerabilities.
|
||||
*/
|
||||
cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table),
|
||||
PAGE_KERNEL_RO);
|
||||
idt_descr.address = CPU_ENTRY_AREA_RO_IDT;
|
||||
}
|
||||
|
||||
/**
|
||||
* idt_setup_apic_and_irq_gates - Setup APIC/SMP and normal interrupt gates
|
||||
*/
|
||||
@@ -318,11 +322,23 @@ void __init idt_setup_apic_and_irq_gates(void)
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
for_each_clear_bit_from(i, system_vectors, NR_VECTORS) {
|
||||
set_bit(i, system_vectors);
|
||||
/*
|
||||
* Don't set the non assigned system vectors in the
|
||||
* system_vectors bitmap. Otherwise they show up in
|
||||
* /proc/interrupts.
|
||||
*/
|
||||
entry = spurious_entries_start + 8 * (i - FIRST_SYSTEM_VECTOR);
|
||||
set_intr_gate(i, entry);
|
||||
}
|
||||
#endif
|
||||
/* Map IDT into CPU entry area and reload it. */
|
||||
idt_map_in_cea();
|
||||
load_idt(&idt_descr);
|
||||
|
||||
/* Make the IDT table read only */
|
||||
set_memory_ro((unsigned long)&idt_table, 1);
|
||||
|
||||
idt_setup_done = true;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -352,16 +368,14 @@ void idt_invalidate(void *addr)
|
||||
load_idt(&idt);
|
||||
}
|
||||
|
||||
void __init update_intr_gate(unsigned int n, const void *addr)
|
||||
void __init alloc_intr_gate(unsigned int n, const void *addr)
|
||||
{
|
||||
if (WARN_ON_ONCE(!test_bit(n, system_vectors)))
|
||||
if (WARN_ON(n < FIRST_SYSTEM_VECTOR))
|
||||
return;
|
||||
set_intr_gate(n, addr);
|
||||
}
|
||||
|
||||
void alloc_intr_gate(unsigned int n, const void *addr)
|
||||
{
|
||||
BUG_ON(n < FIRST_SYSTEM_VECTOR);
|
||||
if (!test_and_set_bit(n, system_vectors))
|
||||
if (WARN_ON(idt_setup_done))
|
||||
return;
|
||||
|
||||
if (!WARN_ON(test_and_set_bit(n, system_vectors)))
|
||||
set_intr_gate(n, addr);
|
||||
}
|
||||
|
||||
@@ -33,15 +33,15 @@ void io_bitmap_share(struct task_struct *tsk)
|
||||
set_tsk_thread_flag(tsk, TIF_IO_BITMAP);
|
||||
}
|
||||
|
||||
static void task_update_io_bitmap(void)
|
||||
static void task_update_io_bitmap(struct task_struct *tsk)
|
||||
{
|
||||
struct thread_struct *t = ¤t->thread;
|
||||
struct thread_struct *t = &tsk->thread;
|
||||
|
||||
if (t->iopl_emul == 3 || t->io_bitmap) {
|
||||
/* TSS update is handled on exit to user space */
|
||||
set_thread_flag(TIF_IO_BITMAP);
|
||||
set_tsk_thread_flag(tsk, TIF_IO_BITMAP);
|
||||
} else {
|
||||
clear_thread_flag(TIF_IO_BITMAP);
|
||||
clear_tsk_thread_flag(tsk, TIF_IO_BITMAP);
|
||||
/* Invalidate TSS */
|
||||
preempt_disable();
|
||||
tss_update_io_bitmap();
|
||||
@@ -49,12 +49,12 @@ static void task_update_io_bitmap(void)
|
||||
}
|
||||
}
|
||||
|
||||
void io_bitmap_exit(void)
|
||||
void io_bitmap_exit(struct task_struct *tsk)
|
||||
{
|
||||
struct io_bitmap *iobm = current->thread.io_bitmap;
|
||||
struct io_bitmap *iobm = tsk->thread.io_bitmap;
|
||||
|
||||
current->thread.io_bitmap = NULL;
|
||||
task_update_io_bitmap();
|
||||
tsk->thread.io_bitmap = NULL;
|
||||
task_update_io_bitmap(tsk);
|
||||
if (iobm && refcount_dec_and_test(&iobm->refcnt))
|
||||
kfree(iobm);
|
||||
}
|
||||
@@ -102,7 +102,7 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on)
|
||||
if (!iobm)
|
||||
return -ENOMEM;
|
||||
refcount_set(&iobm->refcnt, 1);
|
||||
io_bitmap_exit();
|
||||
io_bitmap_exit(current);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -134,7 +134,7 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on)
|
||||
}
|
||||
/* All permissions dropped? */
|
||||
if (max_long == UINT_MAX) {
|
||||
io_bitmap_exit();
|
||||
io_bitmap_exit(current);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -192,7 +192,7 @@ SYSCALL_DEFINE1(iopl, unsigned int, level)
|
||||
}
|
||||
|
||||
t->iopl_emul = level;
|
||||
task_update_io_bitmap();
|
||||
task_update_io_bitmap(current);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -13,12 +13,14 @@
|
||||
#include <linux/export.h>
|
||||
#include <linux/irq.h>
|
||||
|
||||
#include <asm/irq_stack.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/io_apic.h>
|
||||
#include <asm/irq.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/hw_irq.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/traps.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <asm/trace/irq_vectors.h>
|
||||
@@ -26,9 +28,6 @@
|
||||
DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
|
||||
EXPORT_PER_CPU_SYMBOL(irq_stat);
|
||||
|
||||
DEFINE_PER_CPU(struct pt_regs *, irq_regs);
|
||||
EXPORT_PER_CPU_SYMBOL(irq_regs);
|
||||
|
||||
atomic_t irq_err_count;
|
||||
|
||||
/*
|
||||
@@ -224,35 +223,35 @@ u64 arch_irq_stat(void)
|
||||
return sum;
|
||||
}
|
||||
|
||||
static __always_inline void handle_irq(struct irq_desc *desc,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_X86_64))
|
||||
run_on_irqstack_cond(desc->handle_irq, desc, regs);
|
||||
else
|
||||
__handle_irq(desc, regs);
|
||||
}
|
||||
|
||||
/*
|
||||
* do_IRQ handles all normal device IRQ's (the special
|
||||
* SMP cross-CPU interrupts have their own specific
|
||||
* handlers).
|
||||
* common_interrupt() handles all normal device IRQ's (the special SMP
|
||||
* cross-CPU interrupts have their own entry points).
|
||||
*/
|
||||
__visible void __irq_entry do_IRQ(struct pt_regs *regs)
|
||||
DEFINE_IDTENTRY_IRQ(common_interrupt)
|
||||
{
|
||||
struct pt_regs *old_regs = set_irq_regs(regs);
|
||||
struct irq_desc * desc;
|
||||
/* high bit used in ret_from_ code */
|
||||
unsigned vector = ~regs->orig_ax;
|
||||
struct irq_desc *desc;
|
||||
|
||||
entering_irq();
|
||||
|
||||
/* entering_irq() tells RCU that we're not quiescent. Check it. */
|
||||
/* entry code tells RCU that we're not quiescent. Check it. */
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU");
|
||||
|
||||
desc = __this_cpu_read(vector_irq[vector]);
|
||||
if (likely(!IS_ERR_OR_NULL(desc))) {
|
||||
if (IS_ENABLED(CONFIG_X86_32))
|
||||
handle_irq(desc, regs);
|
||||
else
|
||||
generic_handle_irq_desc(desc);
|
||||
handle_irq(desc, regs);
|
||||
} else {
|
||||
ack_APIC_irq();
|
||||
|
||||
if (desc == VECTOR_UNUSED) {
|
||||
pr_emerg_ratelimited("%s: %d.%d No irq handler for vector\n",
|
||||
pr_emerg_ratelimited("%s: %d.%u No irq handler for vector\n",
|
||||
__func__, smp_processor_id(),
|
||||
vector);
|
||||
} else {
|
||||
@@ -260,8 +259,6 @@ __visible void __irq_entry do_IRQ(struct pt_regs *regs)
|
||||
}
|
||||
}
|
||||
|
||||
exiting_irq();
|
||||
|
||||
set_irq_regs(old_regs);
|
||||
}
|
||||
|
||||
@@ -271,17 +268,16 @@ void (*x86_platform_ipi_callback)(void) = NULL;
|
||||
/*
|
||||
* Handler for X86_PLATFORM_IPI_VECTOR.
|
||||
*/
|
||||
__visible void __irq_entry smp_x86_platform_ipi(struct pt_regs *regs)
|
||||
DEFINE_IDTENTRY_SYSVEC(sysvec_x86_platform_ipi)
|
||||
{
|
||||
struct pt_regs *old_regs = set_irq_regs(regs);
|
||||
|
||||
entering_ack_irq();
|
||||
ack_APIC_irq();
|
||||
trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR);
|
||||
inc_irq_stat(x86_platform_ipis);
|
||||
if (x86_platform_ipi_callback)
|
||||
x86_platform_ipi_callback();
|
||||
trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR);
|
||||
exiting_irq();
|
||||
set_irq_regs(old_regs);
|
||||
}
|
||||
#endif
|
||||
@@ -302,41 +298,29 @@ EXPORT_SYMBOL_GPL(kvm_set_posted_intr_wakeup_handler);
|
||||
/*
|
||||
* Handler for POSTED_INTERRUPT_VECTOR.
|
||||
*/
|
||||
__visible void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
|
||||
DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_ipi)
|
||||
{
|
||||
struct pt_regs *old_regs = set_irq_regs(regs);
|
||||
|
||||
entering_ack_irq();
|
||||
ack_APIC_irq();
|
||||
inc_irq_stat(kvm_posted_intr_ipis);
|
||||
exiting_irq();
|
||||
set_irq_regs(old_regs);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
|
||||
*/
|
||||
__visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs)
|
||||
DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_posted_intr_wakeup_ipi)
|
||||
{
|
||||
struct pt_regs *old_regs = set_irq_regs(regs);
|
||||
|
||||
entering_ack_irq();
|
||||
ack_APIC_irq();
|
||||
inc_irq_stat(kvm_posted_intr_wakeup_ipis);
|
||||
kvm_posted_intr_wakeup_handler();
|
||||
exiting_irq();
|
||||
set_irq_regs(old_regs);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handler for POSTED_INTERRUPT_NESTED_VECTOR.
|
||||
*/
|
||||
__visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs)
|
||||
DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_nested_ipi)
|
||||
{
|
||||
struct pt_regs *old_regs = set_irq_regs(regs);
|
||||
|
||||
entering_ack_irq();
|
||||
ack_APIC_irq();
|
||||
inc_irq_stat(kvm_posted_intr_nested_ipis);
|
||||
exiting_irq();
|
||||
set_irq_regs(old_regs);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -148,7 +148,7 @@ void do_softirq_own_stack(void)
|
||||
call_on_stack(__do_softirq, isp);
|
||||
}
|
||||
|
||||
void handle_irq(struct irq_desc *desc, struct pt_regs *regs)
|
||||
void __handle_irq(struct irq_desc *desc, struct pt_regs *regs)
|
||||
{
|
||||
int overflow = check_stack_overflow();
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
#include <linux/sched/task_stack.h>
|
||||
|
||||
#include <asm/cpu_entry_area.h>
|
||||
#include <asm/irq_stack.h>
|
||||
#include <asm/io_apic.h>
|
||||
#include <asm/apic.h>
|
||||
|
||||
@@ -43,7 +44,7 @@ static int map_irq_stack(unsigned int cpu)
|
||||
pages[i] = pfn_to_page(pa >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
va = vmap(pages, IRQ_STACK_SIZE / PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
|
||||
va = vmap(pages, IRQ_STACK_SIZE / PAGE_SIZE, VM_MAP, PAGE_KERNEL);
|
||||
if (!va)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -70,3 +71,8 @@ int irq_init_percpu_irqstack(unsigned int cpu)
|
||||
return 0;
|
||||
return map_irq_stack(cpu);
|
||||
}
|
||||
|
||||
void do_softirq_own_stack(void)
|
||||
{
|
||||
run_on_irqstack_cond(__do_softirq, NULL, NULL);
|
||||
}
|
||||
|
||||
@@ -9,18 +9,18 @@
|
||||
#include <linux/irq_work.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/idtentry.h>
|
||||
#include <asm/trace/irq_vectors.h>
|
||||
#include <linux/interrupt.h>
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
__visible void __irq_entry smp_irq_work_interrupt(struct pt_regs *regs)
|
||||
DEFINE_IDTENTRY_SYSVEC(sysvec_irq_work)
|
||||
{
|
||||
ipi_entering_ack_irq();
|
||||
ack_APIC_irq();
|
||||
trace_irq_work_entry(IRQ_WORK_VECTOR);
|
||||
inc_irq_stat(apic_irq_work_irqs);
|
||||
irq_work_run();
|
||||
trace_irq_work_exit(IRQ_WORK_VECTOR);
|
||||
exiting_irq();
|
||||
}
|
||||
|
||||
void arch_irq_work_raise(void)
|
||||
|
||||
@@ -16,11 +16,11 @@
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/pgtable.h>
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <asm/timer.h>
|
||||
#include <asm/hw_irq.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/setup.h>
|
||||
|
||||
@@ -39,8 +39,7 @@ static bool __read_mostly sched_itmt_capable;
|
||||
unsigned int __read_mostly sysctl_sched_itmt_enabled;
|
||||
|
||||
static int sched_itmt_update_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos)
|
||||
void *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
unsigned int old_sysctl;
|
||||
int ret;
|
||||
|
||||
@@ -41,11 +41,11 @@
|
||||
#include <linux/kasan.h>
|
||||
#include <linux/moduleloader.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/pgtable.h>
|
||||
|
||||
#include <asm/text-patching.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/insn.h>
|
||||
@@ -1073,13 +1073,6 @@ NOKPROBE_SYMBOL(kprobe_fault_handler);
|
||||
|
||||
int __init arch_populate_kprobe_blacklist(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = kprobe_add_area_blacklist((unsigned long)__irqentry_text_start,
|
||||
(unsigned long)__irqentry_text_end);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return kprobe_add_area_blacklist((unsigned long)__entry_text_start,
|
||||
(unsigned long)__entry_text_end);
|
||||
}
|
||||
|
||||
@@ -16,11 +16,11 @@
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/frame.h>
|
||||
#include <linux/pgtable.h>
|
||||
|
||||
#include <asm/text-patching.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/insn.h>
|
||||
@@ -286,9 +286,7 @@ static int can_optimize(unsigned long paddr)
|
||||
* stack handling and registers setup.
|
||||
*/
|
||||
if (((paddr >= (unsigned long)__entry_text_start) &&
|
||||
(paddr < (unsigned long)__entry_text_end)) ||
|
||||
((paddr >= (unsigned long)__irqentry_text_start) &&
|
||||
(paddr < (unsigned long)__irqentry_text_end)))
|
||||
(paddr < (unsigned long)__entry_text_end)))
|
||||
return 0;
|
||||
|
||||
/* Check there is enough space for a relative jump. */
|
||||
|
||||
@@ -35,6 +35,8 @@
|
||||
#include <asm/tlb.h>
|
||||
#include <asm/cpuidle_haltpoll.h>
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
|
||||
|
||||
static int kvmapf = 1;
|
||||
|
||||
static int __init parse_no_kvmapf(char *arg)
|
||||
@@ -73,7 +75,6 @@ struct kvm_task_sleep_node {
|
||||
struct swait_queue_head wq;
|
||||
u32 token;
|
||||
int cpu;
|
||||
bool halted;
|
||||
};
|
||||
|
||||
static struct kvm_task_sleep_head {
|
||||
@@ -96,77 +97,64 @@ static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* @interrupt_kernel: Is this called from a routine which interrupts the kernel
|
||||
* (other than user space)?
|
||||
*/
|
||||
void kvm_async_pf_task_wait(u32 token, int interrupt_kernel)
|
||||
static bool kvm_async_pf_queue_task(u32 token, struct kvm_task_sleep_node *n)
|
||||
{
|
||||
u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
|
||||
struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
|
||||
struct kvm_task_sleep_node n, *e;
|
||||
DECLARE_SWAITQUEUE(wait);
|
||||
|
||||
rcu_irq_enter();
|
||||
struct kvm_task_sleep_node *e;
|
||||
|
||||
raw_spin_lock(&b->lock);
|
||||
e = _find_apf_task(b, token);
|
||||
if (e) {
|
||||
/* dummy entry exist -> wake up was delivered ahead of PF */
|
||||
hlist_del(&e->link);
|
||||
kfree(e);
|
||||
raw_spin_unlock(&b->lock);
|
||||
|
||||
rcu_irq_exit();
|
||||
return;
|
||||
kfree(e);
|
||||
return false;
|
||||
}
|
||||
|
||||
n.token = token;
|
||||
n.cpu = smp_processor_id();
|
||||
n.halted = is_idle_task(current) ||
|
||||
(IS_ENABLED(CONFIG_PREEMPT_COUNT)
|
||||
? preempt_count() > 1 || rcu_preempt_depth()
|
||||
: interrupt_kernel);
|
||||
init_swait_queue_head(&n.wq);
|
||||
hlist_add_head(&n.link, &b->list);
|
||||
n->token = token;
|
||||
n->cpu = smp_processor_id();
|
||||
init_swait_queue_head(&n->wq);
|
||||
hlist_add_head(&n->link, &b->list);
|
||||
raw_spin_unlock(&b->lock);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* kvm_async_pf_task_wait_schedule - Wait for pagefault to be handled
|
||||
* @token: Token to identify the sleep node entry
|
||||
*
|
||||
* Invoked from the async pagefault handling code or from the VM exit page
|
||||
* fault handler. In both cases RCU is watching.
|
||||
*/
|
||||
void kvm_async_pf_task_wait_schedule(u32 token)
|
||||
{
|
||||
struct kvm_task_sleep_node n;
|
||||
DECLARE_SWAITQUEUE(wait);
|
||||
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
if (!kvm_async_pf_queue_task(token, &n))
|
||||
return;
|
||||
|
||||
for (;;) {
|
||||
if (!n.halted)
|
||||
prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
|
||||
prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
|
||||
if (hlist_unhashed(&n.link))
|
||||
break;
|
||||
|
||||
rcu_irq_exit();
|
||||
|
||||
if (!n.halted) {
|
||||
local_irq_enable();
|
||||
schedule();
|
||||
local_irq_disable();
|
||||
} else {
|
||||
/*
|
||||
* We cannot reschedule. So halt.
|
||||
*/
|
||||
native_safe_halt();
|
||||
local_irq_disable();
|
||||
}
|
||||
|
||||
rcu_irq_enter();
|
||||
local_irq_enable();
|
||||
schedule();
|
||||
local_irq_disable();
|
||||
}
|
||||
if (!n.halted)
|
||||
finish_swait(&n.wq, &wait);
|
||||
|
||||
rcu_irq_exit();
|
||||
return;
|
||||
finish_swait(&n.wq, &wait);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait);
|
||||
EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait_schedule);
|
||||
|
||||
static void apf_task_wake_one(struct kvm_task_sleep_node *n)
|
||||
{
|
||||
hlist_del_init(&n->link);
|
||||
if (n->halted)
|
||||
smp_send_reschedule(n->cpu);
|
||||
else if (swq_has_sleeper(&n->wq))
|
||||
if (swq_has_sleeper(&n->wq))
|
||||
swake_up_one(&n->wq);
|
||||
}
|
||||
|
||||
@@ -175,12 +163,13 @@ static void apf_task_wake_all(void)
|
||||
int i;
|
||||
|
||||
for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) {
|
||||
struct hlist_node *p, *next;
|
||||
struct kvm_task_sleep_head *b = &async_pf_sleepers[i];
|
||||
struct kvm_task_sleep_node *n;
|
||||
struct hlist_node *p, *next;
|
||||
|
||||
raw_spin_lock(&b->lock);
|
||||
hlist_for_each_safe(p, next, &b->list) {
|
||||
struct kvm_task_sleep_node *n =
|
||||
hlist_entry(p, typeof(*n), link);
|
||||
n = hlist_entry(p, typeof(*n), link);
|
||||
if (n->cpu == smp_processor_id())
|
||||
apf_task_wake_one(n);
|
||||
}
|
||||
@@ -221,46 +210,64 @@ again:
|
||||
n->cpu = smp_processor_id();
|
||||
init_swait_queue_head(&n->wq);
|
||||
hlist_add_head(&n->link, &b->list);
|
||||
} else
|
||||
} else {
|
||||
apf_task_wake_one(n);
|
||||
}
|
||||
raw_spin_unlock(&b->lock);
|
||||
return;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
|
||||
|
||||
u32 kvm_read_and_reset_pf_reason(void)
|
||||
noinstr u32 kvm_read_and_reset_apf_flags(void)
|
||||
{
|
||||
u32 reason = 0;
|
||||
u32 flags = 0;
|
||||
|
||||
if (__this_cpu_read(apf_reason.enabled)) {
|
||||
reason = __this_cpu_read(apf_reason.reason);
|
||||
__this_cpu_write(apf_reason.reason, 0);
|
||||
flags = __this_cpu_read(apf_reason.flags);
|
||||
__this_cpu_write(apf_reason.flags, 0);
|
||||
}
|
||||
|
||||
return reason;
|
||||
return flags;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason);
|
||||
NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason);
|
||||
EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags);
|
||||
|
||||
dotraplinkage void
|
||||
do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address)
|
||||
noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
|
||||
{
|
||||
switch (kvm_read_and_reset_pf_reason()) {
|
||||
default:
|
||||
do_page_fault(regs, error_code, address);
|
||||
break;
|
||||
u32 reason = kvm_read_and_reset_apf_flags();
|
||||
bool rcu_exit;
|
||||
|
||||
switch (reason) {
|
||||
case KVM_PV_REASON_PAGE_NOT_PRESENT:
|
||||
/* page is swapped out by the host. */
|
||||
kvm_async_pf_task_wait((u32)address, !user_mode(regs));
|
||||
break;
|
||||
case KVM_PV_REASON_PAGE_READY:
|
||||
rcu_irq_enter();
|
||||
kvm_async_pf_task_wake((u32)address);
|
||||
rcu_irq_exit();
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
rcu_exit = idtentry_enter_cond_rcu(regs);
|
||||
instrumentation_begin();
|
||||
|
||||
/*
|
||||
* If the host managed to inject an async #PF into an interrupt
|
||||
* disabled region, then die hard as this is not going to end well
|
||||
* and the host side is seriously broken.
|
||||
*/
|
||||
if (unlikely(!(regs->flags & X86_EFLAGS_IF)))
|
||||
panic("Host injected async #PF in interrupt disabled region\n");
|
||||
|
||||
if (reason == KVM_PV_REASON_PAGE_NOT_PRESENT) {
|
||||
if (unlikely(!(user_mode(regs))))
|
||||
panic("Host injected async #PF in kernel mode\n");
|
||||
/* Page is swapped out by the host. */
|
||||
kvm_async_pf_task_wait_schedule(token);
|
||||
} else {
|
||||
kvm_async_pf_task_wake(token);
|
||||
}
|
||||
|
||||
instrumentation_end();
|
||||
idtentry_exit_cond_rcu(regs, rcu_exit);
|
||||
return true;
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_async_page_fault);
|
||||
|
||||
static void __init paravirt_ops_setup(void)
|
||||
{
|
||||
@@ -306,11 +313,11 @@ static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val)
|
||||
static void kvm_guest_cpu_init(void)
|
||||
{
|
||||
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
|
||||
u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
|
||||
u64 pa;
|
||||
|
||||
#ifdef CONFIG_PREEMPTION
|
||||
pa |= KVM_ASYNC_PF_SEND_ALWAYS;
|
||||
#endif
|
||||
WARN_ON_ONCE(!static_branch_likely(&kvm_async_pf_enabled));
|
||||
|
||||
pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
|
||||
pa |= KVM_ASYNC_PF_ENABLED;
|
||||
|
||||
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
|
||||
@@ -318,12 +325,12 @@ static void kvm_guest_cpu_init(void)
|
||||
|
||||
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
|
||||
__this_cpu_write(apf_reason.enabled, 1);
|
||||
printk(KERN_INFO"KVM setup async PF for cpu %d\n",
|
||||
smp_processor_id());
|
||||
pr_info("KVM setup async PF for cpu %d\n", smp_processor_id());
|
||||
}
|
||||
|
||||
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
|
||||
unsigned long pa;
|
||||
|
||||
/* Size alignment is implied but just to make it explicit. */
|
||||
BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4);
|
||||
__this_cpu_write(kvm_apic_eoi, 0);
|
||||
@@ -344,8 +351,7 @@ static void kvm_pv_disable_apf(void)
|
||||
wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
|
||||
__this_cpu_write(apf_reason.enabled, 0);
|
||||
|
||||
printk(KERN_INFO"Unregister pv shared memory for cpu %d\n",
|
||||
smp_processor_id());
|
||||
pr_info("Unregister pv shared memory for cpu %d\n", smp_processor_id());
|
||||
}
|
||||
|
||||
static void kvm_pv_guest_cpu_reboot(void *unused)
|
||||
@@ -592,12 +598,6 @@ static int kvm_cpu_down_prepare(unsigned int cpu)
|
||||
}
|
||||
#endif
|
||||
|
||||
static void __init kvm_apf_trap_init(void)
|
||||
{
|
||||
update_intr_gate(X86_TRAP_PF, async_page_fault);
|
||||
}
|
||||
|
||||
|
||||
static void kvm_flush_tlb_others(const struct cpumask *cpumask,
|
||||
const struct flush_tlb_info *info)
|
||||
{
|
||||
@@ -632,8 +632,6 @@ static void __init kvm_guest_init(void)
|
||||
register_reboot_notifier(&kvm_pv_reboot_nb);
|
||||
for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
|
||||
raw_spin_lock_init(&async_pf_sleepers[i].lock);
|
||||
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
|
||||
x86_init.irqs.trap_init = kvm_apf_trap_init;
|
||||
|
||||
if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
|
||||
has_steal_clock = 1;
|
||||
@@ -649,6 +647,9 @@ static void __init kvm_guest_init(void)
|
||||
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
|
||||
apic_set_eoi_write(kvm_guest_apic_eoi_write);
|
||||
|
||||
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf)
|
||||
static_branch_enable(&kvm_async_pf_enabled);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus;
|
||||
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
* Lock order:
|
||||
* contex.ldt_usr_sem
|
||||
* mmap_sem
|
||||
* mmap_lock
|
||||
* context.lock
|
||||
*/
|
||||
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* livepatch.c - x86-specific Kernel Live Patching Core
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/livepatch.h>
|
||||
#include <asm/text-patching.h>
|
||||
|
||||
/* Apply per-object alternatives. Based on x86 module_finalize() */
|
||||
void arch_klp_init_object_loaded(struct klp_patch *patch,
|
||||
struct klp_object *obj)
|
||||
{
|
||||
int cnt;
|
||||
struct klp_modinfo *info;
|
||||
Elf_Shdr *s, *alt = NULL, *para = NULL;
|
||||
void *aseg, *pseg;
|
||||
const char *objname;
|
||||
char sec_objname[MODULE_NAME_LEN];
|
||||
char secname[KSYM_NAME_LEN];
|
||||
|
||||
info = patch->mod->klp_info;
|
||||
objname = obj->name ? obj->name : "vmlinux";
|
||||
|
||||
/* See livepatch core code for BUILD_BUG_ON() explanation */
|
||||
BUILD_BUG_ON(MODULE_NAME_LEN < 56 || KSYM_NAME_LEN != 128);
|
||||
|
||||
for (s = info->sechdrs; s < info->sechdrs + info->hdr.e_shnum; s++) {
|
||||
/* Apply per-object .klp.arch sections */
|
||||
cnt = sscanf(info->secstrings + s->sh_name,
|
||||
".klp.arch.%55[^.].%127s",
|
||||
sec_objname, secname);
|
||||
if (cnt != 2)
|
||||
continue;
|
||||
if (strcmp(sec_objname, objname))
|
||||
continue;
|
||||
if (!strcmp(".altinstructions", secname))
|
||||
alt = s;
|
||||
if (!strcmp(".parainstructions", secname))
|
||||
para = s;
|
||||
}
|
||||
|
||||
if (alt) {
|
||||
aseg = (void *) alt->sh_addr;
|
||||
apply_alternatives(aseg, aseg + alt->sh_size);
|
||||
}
|
||||
|
||||
if (para) {
|
||||
pseg = (void *) para->sh_addr;
|
||||
apply_paravirt(pseg, pseg + para->sh_size);
|
||||
}
|
||||
}
|
||||
@@ -13,7 +13,6 @@
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/io.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/mmu_context.h>
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
#include <linux/efi.h>
|
||||
|
||||
#include <asm/init.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/io_apic.h>
|
||||
|
||||
@@ -18,10 +18,10 @@
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/memory.h>
|
||||
|
||||
#include <asm/text-patching.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/setup.h>
|
||||
#include <asm/unwind.h>
|
||||
|
||||
@@ -126,11 +126,12 @@ int apply_relocate(Elf32_Shdr *sechdrs,
|
||||
return 0;
|
||||
}
|
||||
#else /*X86_64*/
|
||||
int apply_relocate_add(Elf64_Shdr *sechdrs,
|
||||
static int __apply_relocate_add(Elf64_Shdr *sechdrs,
|
||||
const char *strtab,
|
||||
unsigned int symindex,
|
||||
unsigned int relsec,
|
||||
struct module *me)
|
||||
struct module *me,
|
||||
void *(*write)(void *dest, const void *src, size_t len))
|
||||
{
|
||||
unsigned int i;
|
||||
Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr;
|
||||
@@ -162,19 +163,19 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
|
||||
case R_X86_64_64:
|
||||
if (*(u64 *)loc != 0)
|
||||
goto invalid_relocation;
|
||||
*(u64 *)loc = val;
|
||||
write(loc, &val, 8);
|
||||
break;
|
||||
case R_X86_64_32:
|
||||
if (*(u32 *)loc != 0)
|
||||
goto invalid_relocation;
|
||||
*(u32 *)loc = val;
|
||||
write(loc, &val, 4);
|
||||
if (val != *(u32 *)loc)
|
||||
goto overflow;
|
||||
break;
|
||||
case R_X86_64_32S:
|
||||
if (*(s32 *)loc != 0)
|
||||
goto invalid_relocation;
|
||||
*(s32 *)loc = val;
|
||||
write(loc, &val, 4);
|
||||
if ((s64)val != *(s32 *)loc)
|
||||
goto overflow;
|
||||
break;
|
||||
@@ -183,7 +184,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
|
||||
if (*(u32 *)loc != 0)
|
||||
goto invalid_relocation;
|
||||
val -= (u64)loc;
|
||||
*(u32 *)loc = val;
|
||||
write(loc, &val, 4);
|
||||
#if 0
|
||||
if ((s64)val != *(s32 *)loc)
|
||||
goto overflow;
|
||||
@@ -193,7 +194,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
|
||||
if (*(u64 *)loc != 0)
|
||||
goto invalid_relocation;
|
||||
val -= (u64)loc;
|
||||
*(u64 *)loc = val;
|
||||
write(loc, &val, 8);
|
||||
break;
|
||||
default:
|
||||
pr_err("%s: Unknown rela relocation: %llu\n",
|
||||
@@ -215,6 +216,33 @@ overflow:
|
||||
me->name);
|
||||
return -ENOEXEC;
|
||||
}
|
||||
|
||||
int apply_relocate_add(Elf64_Shdr *sechdrs,
|
||||
const char *strtab,
|
||||
unsigned int symindex,
|
||||
unsigned int relsec,
|
||||
struct module *me)
|
||||
{
|
||||
int ret;
|
||||
bool early = me->state == MODULE_STATE_UNFORMED;
|
||||
void *(*write)(void *, const void *, size_t) = memcpy;
|
||||
|
||||
if (!early) {
|
||||
write = text_poke;
|
||||
mutex_lock(&text_mutex);
|
||||
}
|
||||
|
||||
ret = __apply_relocate_add(sechdrs, strtab, symindex, relsec, me,
|
||||
write);
|
||||
|
||||
if (!early) {
|
||||
text_poke_sync();
|
||||
mutex_unlock(&text_mutex);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int module_finalize(const Elf_Ehdr *hdr,
|
||||
|
||||
@@ -25,10 +25,6 @@
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/sched/clock.h>
|
||||
|
||||
#if defined(CONFIG_EDAC)
|
||||
#include <linux/edac.h>
|
||||
#endif
|
||||
|
||||
#include <asm/cpu_entry_area.h>
|
||||
#include <asm/traps.h>
|
||||
#include <asm/mach_traps.h>
|
||||
@@ -307,7 +303,7 @@ NOKPROBE_SYMBOL(unknown_nmi_error);
|
||||
static DEFINE_PER_CPU(bool, swallow_nmi);
|
||||
static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
|
||||
|
||||
static void default_do_nmi(struct pt_regs *regs)
|
||||
static noinstr void default_do_nmi(struct pt_regs *regs)
|
||||
{
|
||||
unsigned char reason = 0;
|
||||
int handled;
|
||||
@@ -333,6 +329,9 @@ static void default_do_nmi(struct pt_regs *regs)
|
||||
|
||||
__this_cpu_write(last_nmi_rip, regs->ip);
|
||||
|
||||
instrumentation_begin();
|
||||
trace_hardirqs_off_finish();
|
||||
|
||||
handled = nmi_handle(NMI_LOCAL, regs);
|
||||
__this_cpu_add(nmi_stats.normal, handled);
|
||||
if (handled) {
|
||||
@@ -346,7 +345,7 @@ static void default_do_nmi(struct pt_regs *regs)
|
||||
*/
|
||||
if (handled > 1)
|
||||
__this_cpu_write(swallow_nmi, true);
|
||||
return;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -378,7 +377,7 @@ static void default_do_nmi(struct pt_regs *regs)
|
||||
#endif
|
||||
__this_cpu_add(nmi_stats.external, 1);
|
||||
raw_spin_unlock(&nmi_reason_lock);
|
||||
return;
|
||||
goto out;
|
||||
}
|
||||
raw_spin_unlock(&nmi_reason_lock);
|
||||
|
||||
@@ -416,8 +415,12 @@ static void default_do_nmi(struct pt_regs *regs)
|
||||
__this_cpu_add(nmi_stats.swallow, 1);
|
||||
else
|
||||
unknown_nmi_error(reason, regs);
|
||||
|
||||
out:
|
||||
if (regs->flags & X86_EFLAGS_IF)
|
||||
trace_hardirqs_on_prepare();
|
||||
instrumentation_end();
|
||||
}
|
||||
NOKPROBE_SYMBOL(default_do_nmi);
|
||||
|
||||
/*
|
||||
* NMIs can page fault or hit breakpoints which will cause it to lose
|
||||
@@ -471,44 +474,9 @@ enum nmi_states {
|
||||
};
|
||||
static DEFINE_PER_CPU(enum nmi_states, nmi_state);
|
||||
static DEFINE_PER_CPU(unsigned long, nmi_cr2);
|
||||
static DEFINE_PER_CPU(unsigned long, nmi_dr7);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* In x86_64, we need to handle breakpoint -> NMI -> breakpoint. Without
|
||||
* some care, the inner breakpoint will clobber the outer breakpoint's
|
||||
* stack.
|
||||
*
|
||||
* If a breakpoint is being processed, and the debug stack is being
|
||||
* used, if an NMI comes in and also hits a breakpoint, the stack
|
||||
* pointer will be set to the same fixed address as the breakpoint that
|
||||
* was interrupted, causing that stack to be corrupted. To handle this
|
||||
* case, check if the stack that was interrupted is the debug stack, and
|
||||
* if so, change the IDT so that new breakpoints will use the current
|
||||
* stack and not switch to the fixed address. On return of the NMI,
|
||||
* switch back to the original IDT.
|
||||
*/
|
||||
static DEFINE_PER_CPU(int, update_debug_stack);
|
||||
|
||||
static bool notrace is_debug_stack(unsigned long addr)
|
||||
{
|
||||
struct cea_exception_stacks *cs = __this_cpu_read(cea_exception_stacks);
|
||||
unsigned long top = CEA_ESTACK_TOP(cs, DB);
|
||||
unsigned long bot = CEA_ESTACK_BOT(cs, DB1);
|
||||
|
||||
if (__this_cpu_read(debug_stack_usage))
|
||||
return true;
|
||||
/*
|
||||
* Note, this covers the guard page between DB and DB1 as well to
|
||||
* avoid two checks. But by all means @addr can never point into
|
||||
* the guard page.
|
||||
*/
|
||||
return addr >= bot && addr < top;
|
||||
}
|
||||
NOKPROBE_SYMBOL(is_debug_stack);
|
||||
#endif
|
||||
|
||||
dotraplinkage notrace void
|
||||
do_nmi(struct pt_regs *regs, long error_code)
|
||||
DEFINE_IDTENTRY_NMI(exc_nmi)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_SMP) && cpu_is_offline(smp_processor_id()))
|
||||
return;
|
||||
@@ -521,18 +489,7 @@ do_nmi(struct pt_regs *regs, long error_code)
|
||||
this_cpu_write(nmi_cr2, read_cr2());
|
||||
nmi_restart:
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* If we interrupted a breakpoint, it is possible that
|
||||
* the nmi handler will have breakpoints too. We need to
|
||||
* change the IDT such that breakpoints that happen here
|
||||
* continue to use the NMI stack.
|
||||
*/
|
||||
if (unlikely(is_debug_stack(regs->sp))) {
|
||||
debug_stack_set_zero();
|
||||
this_cpu_write(update_debug_stack, 1);
|
||||
}
|
||||
#endif
|
||||
this_cpu_write(nmi_dr7, local_db_save());
|
||||
|
||||
nmi_enter();
|
||||
|
||||
@@ -543,12 +500,7 @@ nmi_restart:
|
||||
|
||||
nmi_exit();
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
if (unlikely(this_cpu_read(update_debug_stack))) {
|
||||
debug_stack_reset();
|
||||
this_cpu_write(update_debug_stack, 0);
|
||||
}
|
||||
#endif
|
||||
local_db_restore(this_cpu_read(nmi_dr7));
|
||||
|
||||
if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))
|
||||
write_cr2(this_cpu_read(nmi_cr2));
|
||||
@@ -558,7 +510,6 @@ nmi_restart:
|
||||
if (user_mode(regs))
|
||||
mds_user_clear_cpu_buffers();
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_nmi);
|
||||
|
||||
void stop_nmi(void)
|
||||
{
|
||||
|
||||
@@ -13,13 +13,13 @@
|
||||
#include <linux/bcd.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/pgtable.h>
|
||||
|
||||
#include <asm/bug.h>
|
||||
#include <asm/paravirt.h>
|
||||
#include <asm/debugreg.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/setup.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/time.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/irq.h>
|
||||
@@ -160,25 +160,6 @@ unsigned paravirt_patch_insns(void *insn_buff, unsigned len,
|
||||
return insn_len;
|
||||
}
|
||||
|
||||
static void native_flush_tlb(void)
|
||||
{
|
||||
__native_flush_tlb();
|
||||
}
|
||||
|
||||
/*
|
||||
* Global pages have to be flushed a bit differently. Not a real
|
||||
* performance problem because this does not happen often.
|
||||
*/
|
||||
static void native_flush_tlb_global(void)
|
||||
{
|
||||
__native_flush_tlb_global();
|
||||
}
|
||||
|
||||
static void native_flush_tlb_one_user(unsigned long addr)
|
||||
{
|
||||
__native_flush_tlb_one_user(addr);
|
||||
}
|
||||
|
||||
struct static_key paravirt_steal_enabled;
|
||||
struct static_key paravirt_steal_rq_enabled;
|
||||
|
||||
@@ -359,7 +340,7 @@ struct paravirt_patch_template pv_ops = {
|
||||
#endif /* CONFIG_PARAVIRT_XXL */
|
||||
|
||||
/* Mmu ops. */
|
||||
.mmu.flush_tlb_user = native_flush_tlb,
|
||||
.mmu.flush_tlb_user = native_flush_tlb_local,
|
||||
.mmu.flush_tlb_kernel = native_flush_tlb_global,
|
||||
.mmu.flush_tlb_one_user = native_flush_tlb_one_user,
|
||||
.mmu.flush_tlb_others = native_flush_tlb_others,
|
||||
|
||||
@@ -96,7 +96,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
|
||||
}
|
||||
|
||||
/*
|
||||
* Free current thread data structures etc..
|
||||
* Free thread data structures etc..
|
||||
*/
|
||||
void exit_thread(struct task_struct *tsk)
|
||||
{
|
||||
@@ -104,7 +104,7 @@ void exit_thread(struct task_struct *tsk)
|
||||
struct fpu *fpu = &t->fpu;
|
||||
|
||||
if (test_thread_flag(TIF_IO_BITMAP))
|
||||
io_bitmap_exit();
|
||||
io_bitmap_exit(tsk);
|
||||
|
||||
free_vm86(t);
|
||||
|
||||
@@ -191,7 +191,7 @@ void flush_thread(void)
|
||||
flush_ptrace_hw_breakpoint(tsk);
|
||||
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
|
||||
|
||||
fpu__clear(&tsk->thread.fpu);
|
||||
fpu__clear_all(&tsk->thread.fpu);
|
||||
}
|
||||
|
||||
void disable_TSC(void)
|
||||
@@ -612,6 +612,17 @@ void speculation_ctrl_update_current(void)
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static inline void cr4_toggle_bits_irqsoff(unsigned long mask)
|
||||
{
|
||||
unsigned long newval, cr4 = this_cpu_read(cpu_tlbstate.cr4);
|
||||
|
||||
newval = cr4 ^ mask;
|
||||
if (newval != cr4) {
|
||||
this_cpu_write(cpu_tlbstate.cr4, newval);
|
||||
__write_cr4(newval);
|
||||
}
|
||||
}
|
||||
|
||||
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
{
|
||||
unsigned long tifp, tifn;
|
||||
|
||||
@@ -39,7 +39,6 @@
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/syscalls.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/ldt.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/fpu/internal.h>
|
||||
@@ -52,7 +51,7 @@
|
||||
#include <asm/debugreg.h>
|
||||
#include <asm/switch_to.h>
|
||||
#include <asm/vm86.h>
|
||||
#include <asm/resctrl_sched.h>
|
||||
#include <asm/resctrl.h>
|
||||
#include <asm/proto.h>
|
||||
|
||||
#include "process.h"
|
||||
|
||||
@@ -40,7 +40,6 @@
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/syscalls.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/fpu/internal.h>
|
||||
#include <asm/mmu_context.h>
|
||||
@@ -52,7 +51,7 @@
|
||||
#include <asm/switch_to.h>
|
||||
#include <asm/xen/hypervisor.h>
|
||||
#include <asm/vdso.h>
|
||||
#include <asm/resctrl_sched.h>
|
||||
#include <asm/resctrl.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <asm/fsgsbase.h>
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
|
||||
@@ -28,7 +28,6 @@
|
||||
#include <linux/nospec.h>
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/fpu/internal.h>
|
||||
#include <asm/fpu/signal.h>
|
||||
|
||||
@@ -11,13 +11,13 @@
|
||||
#include <linux/tboot.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/frame.h>
|
||||
#include <linux/pgtable.h>
|
||||
#include <acpi/reboot.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/io_apic.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/hpet.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/proto.h>
|
||||
#include <asm/reboot_fixups.h>
|
||||
#include <asm/reboot.h>
|
||||
|
||||
@@ -237,6 +237,9 @@ static u64 __init get_ramdisk_image(void)
|
||||
|
||||
ramdisk_image |= (u64)boot_params.ext_ramdisk_image << 32;
|
||||
|
||||
if (ramdisk_image == 0)
|
||||
ramdisk_image = phys_initrd_start;
|
||||
|
||||
return ramdisk_image;
|
||||
}
|
||||
static u64 __init get_ramdisk_size(void)
|
||||
@@ -245,6 +248,9 @@ static u64 __init get_ramdisk_size(void)
|
||||
|
||||
ramdisk_size |= (u64)boot_params.ext_ramdisk_size << 32;
|
||||
|
||||
if (ramdisk_size == 0)
|
||||
ramdisk_size = phys_initrd_size;
|
||||
|
||||
return ramdisk_size;
|
||||
}
|
||||
|
||||
|
||||
@@ -287,9 +287,9 @@ void __init setup_per_cpu_areas(void)
|
||||
/*
|
||||
* Sync back kernel address range again. We already did this in
|
||||
* setup_arch(), but percpu data also needs to be available in
|
||||
* the smpboot asm. We can't reliably pick up percpu mappings
|
||||
* using vmalloc_fault(), because exception dispatch needs
|
||||
* percpu data.
|
||||
* the smpboot asm and arch_sync_kernel_mappings() doesn't sync to
|
||||
* swapper_pg_dir on 32-bit. The per-cpu mappings need to be available
|
||||
* there too.
|
||||
*
|
||||
* FIXME: Can the later sync in setup_cpu_entry_areas() replace
|
||||
* this call?
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
#include <asm/vm86.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#include <linux/compat.h>
|
||||
#include <asm/proto.h>
|
||||
#include <asm/ia32_unistd.h>
|
||||
#endif /* CONFIG_X86_64 */
|
||||
@@ -511,6 +512,31 @@ Efault:
|
||||
}
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
static int x32_copy_siginfo_to_user(struct compat_siginfo __user *to,
|
||||
const struct kernel_siginfo *from)
|
||||
{
|
||||
struct compat_siginfo new;
|
||||
|
||||
copy_siginfo_to_external32(&new, from);
|
||||
if (from->si_signo == SIGCHLD) {
|
||||
new._sifields._sigchld_x32._utime = from->si_utime;
|
||||
new._sifields._sigchld_x32._stime = from->si_stime;
|
||||
}
|
||||
if (copy_to_user(to, &new, sizeof(struct compat_siginfo)))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int copy_siginfo_to_user32(struct compat_siginfo __user *to,
|
||||
const struct kernel_siginfo *from)
|
||||
{
|
||||
if (in_x32_syscall())
|
||||
return x32_copy_siginfo_to_user(to, from);
|
||||
return __copy_siginfo_to_user32(to, from);
|
||||
}
|
||||
#endif /* CONFIG_X86_X32_ABI */
|
||||
|
||||
static int x32_setup_rt_frame(struct ksignal *ksig,
|
||||
compat_sigset_t *set,
|
||||
struct pt_regs *regs)
|
||||
@@ -543,7 +569,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
|
||||
user_access_end();
|
||||
|
||||
if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
|
||||
if (__copy_siginfo_to_user32(&frame->info, &ksig->info, true))
|
||||
if (x32_copy_siginfo_to_user(&frame->info, &ksig->info))
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
@@ -732,7 +758,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
|
||||
/*
|
||||
* Ensure the signal handler starts with the new fpu state.
|
||||
*/
|
||||
fpu__clear(fpu);
|
||||
fpu__clear_user_states(fpu);
|
||||
}
|
||||
signal_setup_done(failed, ksig, stepping);
|
||||
}
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/proto.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/idtentry.h>
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/trace/irq_vectors.h>
|
||||
@@ -130,13 +131,11 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
|
||||
/*
|
||||
* this function calls the 'stop' function on all other CPUs in the system.
|
||||
*/
|
||||
|
||||
asmlinkage __visible void smp_reboot_interrupt(void)
|
||||
DEFINE_IDTENTRY_SYSVEC(sysvec_reboot)
|
||||
{
|
||||
ipi_entering_ack_irq();
|
||||
ack_APIC_irq();
|
||||
cpu_emergency_vmxoff();
|
||||
stop_this_cpu(NULL);
|
||||
irq_exit();
|
||||
}
|
||||
|
||||
static int register_stop_handler(void)
|
||||
@@ -221,47 +220,33 @@ static void native_stop_other_cpus(int wait)
|
||||
|
||||
/*
|
||||
* Reschedule call back. KVM uses this interrupt to force a cpu out of
|
||||
* guest mode
|
||||
* guest mode.
|
||||
*/
|
||||
__visible void __irq_entry smp_reschedule_interrupt(struct pt_regs *regs)
|
||||
DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_reschedule_ipi)
|
||||
{
|
||||
ack_APIC_irq();
|
||||
trace_reschedule_entry(RESCHEDULE_VECTOR);
|
||||
inc_irq_stat(irq_resched_count);
|
||||
kvm_set_cpu_l1tf_flush_l1d();
|
||||
|
||||
if (trace_resched_ipi_enabled()) {
|
||||
/*
|
||||
* scheduler_ipi() might call irq_enter() as well, but
|
||||
* nested calls are fine.
|
||||
*/
|
||||
irq_enter();
|
||||
trace_reschedule_entry(RESCHEDULE_VECTOR);
|
||||
scheduler_ipi();
|
||||
trace_reschedule_exit(RESCHEDULE_VECTOR);
|
||||
irq_exit();
|
||||
return;
|
||||
}
|
||||
scheduler_ipi();
|
||||
trace_reschedule_exit(RESCHEDULE_VECTOR);
|
||||
}
|
||||
|
||||
__visible void __irq_entry smp_call_function_interrupt(struct pt_regs *regs)
|
||||
DEFINE_IDTENTRY_SYSVEC(sysvec_call_function)
|
||||
{
|
||||
ipi_entering_ack_irq();
|
||||
ack_APIC_irq();
|
||||
trace_call_function_entry(CALL_FUNCTION_VECTOR);
|
||||
inc_irq_stat(irq_call_count);
|
||||
generic_smp_call_function_interrupt();
|
||||
trace_call_function_exit(CALL_FUNCTION_VECTOR);
|
||||
exiting_irq();
|
||||
}
|
||||
|
||||
__visible void __irq_entry smp_call_function_single_interrupt(struct pt_regs *r)
|
||||
DEFINE_IDTENTRY_SYSVEC(sysvec_call_function_single)
|
||||
{
|
||||
ipi_entering_ack_irq();
|
||||
ack_APIC_irq();
|
||||
trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
|
||||
inc_irq_stat(irq_call_count);
|
||||
generic_smp_call_function_single_interrupt();
|
||||
trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR);
|
||||
exiting_irq();
|
||||
}
|
||||
|
||||
static int __init nonmi_ipi_setup(char *str)
|
||||
|
||||
@@ -55,6 +55,7 @@
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/cpuidle.h>
|
||||
#include <linux/numa.h>
|
||||
#include <linux/pgtable.h>
|
||||
|
||||
#include <asm/acpi.h>
|
||||
#include <asm/desc.h>
|
||||
@@ -63,7 +64,6 @@
|
||||
#include <asm/realmode.h>
|
||||
#include <asm/cpu.h>
|
||||
#include <asm/numa.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/mtrr.h>
|
||||
#include <asm/mwait.h>
|
||||
@@ -147,7 +147,7 @@ static inline void smpboot_restore_warm_reset_vector(void)
|
||||
*((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
|
||||
}
|
||||
|
||||
static void init_freq_invariance(void);
|
||||
static void init_freq_invariance(bool secondary);
|
||||
|
||||
/*
|
||||
* Report back to the Boot Processor during boot time or to the caller processor
|
||||
@@ -185,7 +185,7 @@ static void smp_callin(void)
|
||||
*/
|
||||
set_cpu_sibling_map(raw_smp_processor_id());
|
||||
|
||||
init_freq_invariance();
|
||||
init_freq_invariance(true);
|
||||
|
||||
/*
|
||||
* Get our bogomips.
|
||||
@@ -266,6 +266,14 @@ static void notrace start_secondary(void *unused)
|
||||
|
||||
wmb();
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
|
||||
/*
|
||||
* Prevent tail call to cpu_startup_entry() because the stack protector
|
||||
* guard has been changed a couple of function calls up, in
|
||||
* boot_init_stack_canary() and must not be checked before tail calling
|
||||
* another function.
|
||||
*/
|
||||
prevent_tail_call_optimization();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1341,7 +1349,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
|
||||
set_sched_topology(x86_topology);
|
||||
|
||||
set_cpu_sibling_map(0);
|
||||
init_freq_invariance();
|
||||
init_freq_invariance(false);
|
||||
smp_sanity_check();
|
||||
|
||||
switch (apic_intr_mode) {
|
||||
@@ -1376,12 +1384,12 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
|
||||
speculative_store_bypass_ht_init();
|
||||
}
|
||||
|
||||
void arch_enable_nonboot_cpus_begin(void)
|
||||
void arch_thaw_secondary_cpus_begin(void)
|
||||
{
|
||||
set_mtrr_aps_delayed_init();
|
||||
}
|
||||
|
||||
void arch_enable_nonboot_cpus_end(void)
|
||||
void arch_thaw_secondary_cpus_end(void)
|
||||
{
|
||||
mtrr_aps_init();
|
||||
}
|
||||
@@ -1849,24 +1857,25 @@ static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include <asm/intel-family.h>
|
||||
|
||||
#define ICPU(model) \
|
||||
{X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF, 0}
|
||||
#define X86_MATCH(model) \
|
||||
X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, \
|
||||
INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL)
|
||||
|
||||
static const struct x86_cpu_id has_knl_turbo_ratio_limits[] = {
|
||||
ICPU(INTEL_FAM6_XEON_PHI_KNL),
|
||||
ICPU(INTEL_FAM6_XEON_PHI_KNM),
|
||||
X86_MATCH(XEON_PHI_KNL),
|
||||
X86_MATCH(XEON_PHI_KNM),
|
||||
{}
|
||||
};
|
||||
|
||||
static const struct x86_cpu_id has_skx_turbo_ratio_limits[] = {
|
||||
ICPU(INTEL_FAM6_SKYLAKE_X),
|
||||
X86_MATCH(SKYLAKE_X),
|
||||
{}
|
||||
};
|
||||
|
||||
static const struct x86_cpu_id has_glm_turbo_ratio_limits[] = {
|
||||
ICPU(INTEL_FAM6_ATOM_GOLDMONT),
|
||||
ICPU(INTEL_FAM6_ATOM_GOLDMONT_D),
|
||||
ICPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS),
|
||||
X86_MATCH(ATOM_GOLDMONT),
|
||||
X86_MATCH(ATOM_GOLDMONT_D),
|
||||
X86_MATCH(ATOM_GOLDMONT_PLUS),
|
||||
{}
|
||||
};
|
||||
|
||||
@@ -1877,9 +1886,6 @@ static bool knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
|
||||
int err, i;
|
||||
u64 msr;
|
||||
|
||||
if (!x86_match_cpu(has_knl_turbo_ratio_limits))
|
||||
return false;
|
||||
|
||||
err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
|
||||
if (err)
|
||||
return false;
|
||||
@@ -1945,18 +1951,23 @@ static bool skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size)
|
||||
|
||||
static bool core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
|
||||
{
|
||||
u64 msr;
|
||||
int err;
|
||||
|
||||
err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
|
||||
if (err)
|
||||
return false;
|
||||
|
||||
err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, turbo_freq);
|
||||
err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
|
||||
if (err)
|
||||
return false;
|
||||
|
||||
*base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
|
||||
*turbo_freq = (*turbo_freq >> 24) & 0xFF; /* 4C turbo */
|
||||
*base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
|
||||
*turbo_freq = (msr >> 24) & 0xFF; /* 4C turbo */
|
||||
|
||||
/* The CPU may have less than 4 cores */
|
||||
if (!*turbo_freq)
|
||||
*turbo_freq = msr & 0xFF; /* 1C turbo */
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -1972,7 +1983,8 @@ static bool intel_set_max_freq_ratio(void)
|
||||
skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
|
||||
goto out;
|
||||
|
||||
if (knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
|
||||
if (x86_match_cpu(has_knl_turbo_ratio_limits) &&
|
||||
knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
|
||||
goto out;
|
||||
|
||||
if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
|
||||
@@ -1985,13 +1997,22 @@ static bool intel_set_max_freq_ratio(void)
|
||||
return false;
|
||||
|
||||
out:
|
||||
/*
|
||||
* Some hypervisors advertise X86_FEATURE_APERFMPERF
|
||||
* but then fill all MSR's with zeroes.
|
||||
*/
|
||||
if (!base_freq) {
|
||||
pr_debug("Couldn't determine cpu base frequency, necessary for scale-invariant accounting.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
arch_turbo_freq_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE,
|
||||
base_freq);
|
||||
arch_set_max_freq_ratio(turbo_disabled());
|
||||
return true;
|
||||
}
|
||||
|
||||
static void init_counter_refs(void *arg)
|
||||
static void init_counter_refs(void)
|
||||
{
|
||||
u64 aperf, mperf;
|
||||
|
||||
@@ -2002,18 +2023,25 @@ static void init_counter_refs(void *arg)
|
||||
this_cpu_write(arch_prev_mperf, mperf);
|
||||
}
|
||||
|
||||
static void init_freq_invariance(void)
|
||||
static void init_freq_invariance(bool secondary)
|
||||
{
|
||||
bool ret = false;
|
||||
|
||||
if (smp_processor_id() != 0 || !boot_cpu_has(X86_FEATURE_APERFMPERF))
|
||||
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
|
||||
return;
|
||||
|
||||
if (secondary) {
|
||||
if (static_branch_likely(&arch_scale_freq_key)) {
|
||||
init_counter_refs();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
|
||||
ret = intel_set_max_freq_ratio();
|
||||
|
||||
if (ret) {
|
||||
on_each_cpu(init_counter_refs, NULL, 1);
|
||||
init_counter_refs();
|
||||
static_branch_enable(&arch_scale_freq_key);
|
||||
} else {
|
||||
pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n");
|
||||
|
||||
@@ -135,26 +135,30 @@ static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat)
|
||||
typeof(ubuf->st_gid) gid = 0;
|
||||
SET_UID(uid, from_kuid_munged(current_user_ns(), stat->uid));
|
||||
SET_GID(gid, from_kgid_munged(current_user_ns(), stat->gid));
|
||||
if (!access_ok(ubuf, sizeof(struct stat64)) ||
|
||||
__put_user(huge_encode_dev(stat->dev), &ubuf->st_dev) ||
|
||||
__put_user(stat->ino, &ubuf->__st_ino) ||
|
||||
__put_user(stat->ino, &ubuf->st_ino) ||
|
||||
__put_user(stat->mode, &ubuf->st_mode) ||
|
||||
__put_user(stat->nlink, &ubuf->st_nlink) ||
|
||||
__put_user(uid, &ubuf->st_uid) ||
|
||||
__put_user(gid, &ubuf->st_gid) ||
|
||||
__put_user(huge_encode_dev(stat->rdev), &ubuf->st_rdev) ||
|
||||
__put_user(stat->size, &ubuf->st_size) ||
|
||||
__put_user(stat->atime.tv_sec, &ubuf->st_atime) ||
|
||||
__put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec) ||
|
||||
__put_user(stat->mtime.tv_sec, &ubuf->st_mtime) ||
|
||||
__put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec) ||
|
||||
__put_user(stat->ctime.tv_sec, &ubuf->st_ctime) ||
|
||||
__put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec) ||
|
||||
__put_user(stat->blksize, &ubuf->st_blksize) ||
|
||||
__put_user(stat->blocks, &ubuf->st_blocks))
|
||||
if (!user_write_access_begin(ubuf, sizeof(struct stat64)))
|
||||
return -EFAULT;
|
||||
unsafe_put_user(huge_encode_dev(stat->dev), &ubuf->st_dev, Efault);
|
||||
unsafe_put_user(stat->ino, &ubuf->__st_ino, Efault);
|
||||
unsafe_put_user(stat->ino, &ubuf->st_ino, Efault);
|
||||
unsafe_put_user(stat->mode, &ubuf->st_mode, Efault);
|
||||
unsafe_put_user(stat->nlink, &ubuf->st_nlink, Efault);
|
||||
unsafe_put_user(uid, &ubuf->st_uid, Efault);
|
||||
unsafe_put_user(gid, &ubuf->st_gid, Efault);
|
||||
unsafe_put_user(huge_encode_dev(stat->rdev), &ubuf->st_rdev, Efault);
|
||||
unsafe_put_user(stat->size, &ubuf->st_size, Efault);
|
||||
unsafe_put_user(stat->atime.tv_sec, &ubuf->st_atime, Efault);
|
||||
unsafe_put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec, Efault);
|
||||
unsafe_put_user(stat->mtime.tv_sec, &ubuf->st_mtime, Efault);
|
||||
unsafe_put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec, Efault);
|
||||
unsafe_put_user(stat->ctime.tv_sec, &ubuf->st_ctime, Efault);
|
||||
unsafe_put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec, Efault);
|
||||
unsafe_put_user(stat->blksize, &ubuf->st_blksize, Efault);
|
||||
unsafe_put_user(stat->blocks, &ubuf->st_blocks, Efault);
|
||||
user_access_end();
|
||||
return 0;
|
||||
Efault:
|
||||
user_write_access_end();
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
COMPAT_SYSCALL_DEFINE2(ia32_stat64, const char __user *, filename,
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
#include <asm/realmode.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/bootparam.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/swiotlb.h>
|
||||
#include <asm/fixmap.h>
|
||||
@@ -35,8 +34,7 @@
|
||||
#include "../realmode/rm/wakeup.h"
|
||||
|
||||
/* Global pointer to shared data; NULL means no measured launch. */
|
||||
struct tboot *tboot __read_mostly;
|
||||
EXPORT_SYMBOL(tboot);
|
||||
static struct tboot *tboot __read_mostly;
|
||||
|
||||
/* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */
|
||||
#define AP_WAIT_TIMEOUT 1
|
||||
@@ -46,6 +44,11 @@ EXPORT_SYMBOL(tboot);
|
||||
|
||||
static u8 tboot_uuid[16] __initdata = TBOOT_UUID;
|
||||
|
||||
bool tboot_enabled(void)
|
||||
{
|
||||
return tboot != NULL;
|
||||
}
|
||||
|
||||
void __init tboot_probe(void)
|
||||
{
|
||||
/* Look for valid page-aligned address for shared page. */
|
||||
@@ -90,7 +93,7 @@ static struct mm_struct tboot_mm = {
|
||||
.pgd = swapper_pg_dir,
|
||||
.mm_users = ATOMIC_INIT(2),
|
||||
.mm_count = ATOMIC_INIT(1),
|
||||
.mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem),
|
||||
MMAP_LOCK_INITIALIZER(init_mm)
|
||||
.page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
|
||||
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
|
||||
};
|
||||
|
||||
@@ -103,6 +103,9 @@ static __init void x86_late_time_init(void)
|
||||
*/
|
||||
x86_init.irqs.intr_mode_init();
|
||||
tsc_init();
|
||||
|
||||
if (static_cpu_has(X86_FEATURE_WAITPKG))
|
||||
use_tpause_delay();
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -25,20 +25,3 @@ void trace_pagefault_unreg(void)
|
||||
{
|
||||
static_branch_dec(&trace_pagefault_key);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(trace_resched_ipi_key);
|
||||
|
||||
int trace_resched_ipi_reg(void)
|
||||
{
|
||||
static_branch_inc(&trace_resched_ipi_key);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void trace_resched_ipi_unreg(void)
|
||||
{
|
||||
static_branch_dec(&trace_resched_ipi_key);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -37,10 +37,12 @@
|
||||
#include <linux/mm.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/atomic.h>
|
||||
|
||||
#include <asm/stacktrace.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/debugreg.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <asm/text-patching.h>
|
||||
#include <asm/ftrace.h>
|
||||
#include <asm/traps.h>
|
||||
@@ -82,78 +84,6 @@ static inline void cond_local_irq_disable(struct pt_regs *regs)
|
||||
local_irq_disable();
|
||||
}
|
||||
|
||||
/*
|
||||
* In IST context, we explicitly disable preemption. This serves two
|
||||
* purposes: it makes it much less likely that we would accidentally
|
||||
* schedule in IST context and it will force a warning if we somehow
|
||||
* manage to schedule by accident.
|
||||
*/
|
||||
void ist_enter(struct pt_regs *regs)
|
||||
{
|
||||
if (user_mode(regs)) {
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
} else {
|
||||
/*
|
||||
* We might have interrupted pretty much anything. In
|
||||
* fact, if we're a machine check, we can even interrupt
|
||||
* NMI processing. We don't want in_nmi() to return true,
|
||||
* but we need to notify RCU.
|
||||
*/
|
||||
rcu_nmi_enter();
|
||||
}
|
||||
|
||||
preempt_disable();
|
||||
|
||||
/* This code is a bit fragile. Test it. */
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work");
|
||||
}
|
||||
NOKPROBE_SYMBOL(ist_enter);
|
||||
|
||||
void ist_exit(struct pt_regs *regs)
|
||||
{
|
||||
preempt_enable_no_resched();
|
||||
|
||||
if (!user_mode(regs))
|
||||
rcu_nmi_exit();
|
||||
}
|
||||
|
||||
/**
|
||||
* ist_begin_non_atomic() - begin a non-atomic section in an IST exception
|
||||
* @regs: regs passed to the IST exception handler
|
||||
*
|
||||
* IST exception handlers normally cannot schedule. As a special
|
||||
* exception, if the exception interrupted userspace code (i.e.
|
||||
* user_mode(regs) would return true) and the exception was not
|
||||
* a double fault, it can be safe to schedule. ist_begin_non_atomic()
|
||||
* begins a non-atomic section within an ist_enter()/ist_exit() region.
|
||||
* Callers are responsible for enabling interrupts themselves inside
|
||||
* the non-atomic section, and callers must call ist_end_non_atomic()
|
||||
* before ist_exit().
|
||||
*/
|
||||
void ist_begin_non_atomic(struct pt_regs *regs)
|
||||
{
|
||||
BUG_ON(!user_mode(regs));
|
||||
|
||||
/*
|
||||
* Sanity check: we need to be on the normal thread stack. This
|
||||
* will catch asm bugs and any attempt to use ist_preempt_enable
|
||||
* from double_fault.
|
||||
*/
|
||||
BUG_ON(!on_thread_stack());
|
||||
|
||||
preempt_enable_no_resched();
|
||||
}
|
||||
|
||||
/**
|
||||
* ist_end_non_atomic() - begin a non-atomic section in an IST exception
|
||||
*
|
||||
* Ends a non-atomic section started with ist_begin_non_atomic().
|
||||
*/
|
||||
void ist_end_non_atomic(void)
|
||||
{
|
||||
preempt_disable();
|
||||
}
|
||||
|
||||
int is_valid_bugaddr(unsigned long addr)
|
||||
{
|
||||
unsigned short ud;
|
||||
@@ -215,7 +145,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, const char *str,
|
||||
* process no chance to handle the signal and notice the
|
||||
* kernel fault information, so that won't result in polluting
|
||||
* the information about previously queued, but not yet
|
||||
* delivered, faults. See also do_general_protection below.
|
||||
* delivered, faults. See also exc_general_protection below.
|
||||
*/
|
||||
tsk->thread.error_code = error_code;
|
||||
tsk->thread.trap_nr = trapnr;
|
||||
@@ -271,31 +201,79 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
|
||||
NOTIFY_STOP) {
|
||||
cond_local_irq_enable(regs);
|
||||
do_trap(trapnr, signr, str, regs, error_code, sicode, addr);
|
||||
cond_local_irq_disable(regs);
|
||||
}
|
||||
}
|
||||
|
||||
#define IP ((void __user *)uprobe_get_trap_addr(regs))
|
||||
#define DO_ERROR(trapnr, signr, sicode, addr, str, name) \
|
||||
dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
|
||||
{ \
|
||||
do_error_trap(regs, error_code, str, trapnr, signr, sicode, addr); \
|
||||
/*
|
||||
* Posix requires to provide the address of the faulting instruction for
|
||||
* SIGILL (#UD) and SIGFPE (#DE) in the si_addr member of siginfo_t.
|
||||
*
|
||||
* This address is usually regs->ip, but when an uprobe moved the code out
|
||||
* of line then regs->ip points to the XOL code which would confuse
|
||||
* anything which analyzes the fault address vs. the unmodified binary. If
|
||||
* a trap happened in XOL code then uprobe maps regs->ip back to the
|
||||
* original instruction address.
|
||||
*/
|
||||
static __always_inline void __user *error_get_trap_addr(struct pt_regs *regs)
|
||||
{
|
||||
return (void __user *)uprobe_get_trap_addr(regs);
|
||||
}
|
||||
|
||||
DO_ERROR(X86_TRAP_DE, SIGFPE, FPE_INTDIV, IP, "divide error", divide_error)
|
||||
DO_ERROR(X86_TRAP_OF, SIGSEGV, 0, NULL, "overflow", overflow)
|
||||
DO_ERROR(X86_TRAP_UD, SIGILL, ILL_ILLOPN, IP, "invalid opcode", invalid_op)
|
||||
DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, 0, NULL, "coprocessor segment overrun", coprocessor_segment_overrun)
|
||||
DO_ERROR(X86_TRAP_TS, SIGSEGV, 0, NULL, "invalid TSS", invalid_TSS)
|
||||
DO_ERROR(X86_TRAP_NP, SIGBUS, 0, NULL, "segment not present", segment_not_present)
|
||||
DO_ERROR(X86_TRAP_SS, SIGBUS, 0, NULL, "stack segment", stack_segment)
|
||||
#undef IP
|
||||
DEFINE_IDTENTRY(exc_divide_error)
|
||||
{
|
||||
do_error_trap(regs, 0, "divide_error", X86_TRAP_DE, SIGFPE,
|
||||
FPE_INTDIV, error_get_trap_addr(regs));
|
||||
}
|
||||
|
||||
dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code)
|
||||
DEFINE_IDTENTRY(exc_overflow)
|
||||
{
|
||||
do_error_trap(regs, 0, "overflow", X86_TRAP_OF, SIGSEGV, 0, NULL);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_F00F_BUG
|
||||
void handle_invalid_op(struct pt_regs *regs)
|
||||
#else
|
||||
static inline void handle_invalid_op(struct pt_regs *regs)
|
||||
#endif
|
||||
{
|
||||
do_error_trap(regs, 0, "invalid opcode", X86_TRAP_UD, SIGILL,
|
||||
ILL_ILLOPN, error_get_trap_addr(regs));
|
||||
}
|
||||
|
||||
DEFINE_IDTENTRY(exc_invalid_op)
|
||||
{
|
||||
handle_invalid_op(regs);
|
||||
}
|
||||
|
||||
DEFINE_IDTENTRY(exc_coproc_segment_overrun)
|
||||
{
|
||||
do_error_trap(regs, 0, "coprocessor segment overrun",
|
||||
X86_TRAP_OLD_MF, SIGFPE, 0, NULL);
|
||||
}
|
||||
|
||||
DEFINE_IDTENTRY_ERRORCODE(exc_invalid_tss)
|
||||
{
|
||||
do_error_trap(regs, error_code, "invalid TSS", X86_TRAP_TS, SIGSEGV,
|
||||
0, NULL);
|
||||
}
|
||||
|
||||
DEFINE_IDTENTRY_ERRORCODE(exc_segment_not_present)
|
||||
{
|
||||
do_error_trap(regs, error_code, "segment not present", X86_TRAP_NP,
|
||||
SIGBUS, 0, NULL);
|
||||
}
|
||||
|
||||
DEFINE_IDTENTRY_ERRORCODE(exc_stack_segment)
|
||||
{
|
||||
do_error_trap(regs, error_code, "stack segment", X86_TRAP_SS, SIGBUS,
|
||||
0, NULL);
|
||||
}
|
||||
|
||||
DEFINE_IDTENTRY_ERRORCODE(exc_alignment_check)
|
||||
{
|
||||
char *str = "alignment check";
|
||||
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
|
||||
if (notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_AC, SIGBUS) == NOTIFY_STOP)
|
||||
return;
|
||||
|
||||
@@ -326,7 +304,6 @@ __visible void __noreturn handle_stack_overflow(const char *message,
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_X86_64) || defined(CONFIG_DOUBLEFAULT)
|
||||
/*
|
||||
* Runs on an IST stack for x86_64 and on a special task stack for x86_32.
|
||||
*
|
||||
@@ -342,12 +319,19 @@ __visible void __noreturn handle_stack_overflow(const char *message,
|
||||
* from the TSS. Returning is, in principle, okay, but changes to regs will
|
||||
* be lost. If, for some reason, we need to return to a context with modified
|
||||
* regs, the shim code could be adjusted to synchronize the registers.
|
||||
*
|
||||
* The 32bit #DF shim provides CR2 already as an argument. On 64bit it needs
|
||||
* to be read before doing anything else.
|
||||
*/
|
||||
dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2)
|
||||
DEFINE_IDTENTRY_DF(exc_double_fault)
|
||||
{
|
||||
static const char str[] = "double fault";
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
#ifdef CONFIG_VMAP_STACK
|
||||
unsigned long address = read_cr2();
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_ESPFIX64
|
||||
extern unsigned char native_irq_return_iret[];
|
||||
|
||||
@@ -363,13 +347,14 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign
|
||||
* The net result is that our #GP handler will think that we
|
||||
* entered from usermode with the bad user context.
|
||||
*
|
||||
* No need for ist_enter here because we don't use RCU.
|
||||
* No need for nmi_enter() here because we don't use RCU.
|
||||
*/
|
||||
if (((long)regs->sp >> P4D_SHIFT) == ESPFIX_PGD_ENTRY &&
|
||||
regs->cs == __KERNEL_CS &&
|
||||
regs->ip == (unsigned long)native_irq_return_iret)
|
||||
{
|
||||
struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
|
||||
unsigned long *p = (unsigned long *)regs->sp;
|
||||
|
||||
/*
|
||||
* regs->sp points to the failing IRET frame on the
|
||||
@@ -377,7 +362,11 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign
|
||||
* in gpregs->ss through gpregs->ip.
|
||||
*
|
||||
*/
|
||||
memmove(&gpregs->ip, (void *)regs->sp, 5*8);
|
||||
gpregs->ip = p[0];
|
||||
gpregs->cs = p[1];
|
||||
gpregs->flags = p[2];
|
||||
gpregs->sp = p[3];
|
||||
gpregs->ss = p[4];
|
||||
gpregs->orig_ax = 0; /* Missing (lost) #GP error code */
|
||||
|
||||
/*
|
||||
@@ -391,14 +380,15 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign
|
||||
* which is what the stub expects, given that the faulting
|
||||
* RIP will be the IRET instruction.
|
||||
*/
|
||||
regs->ip = (unsigned long)general_protection;
|
||||
regs->ip = (unsigned long)asm_exc_general_protection;
|
||||
regs->sp = (unsigned long)&gpregs->orig_ax;
|
||||
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
ist_enter(regs);
|
||||
nmi_enter();
|
||||
instrumentation_begin();
|
||||
notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
|
||||
|
||||
tsk->thread.error_code = error_code;
|
||||
@@ -442,28 +432,31 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign
|
||||
* stack even if the actual trigger for the double fault was
|
||||
* something else.
|
||||
*/
|
||||
if ((unsigned long)task_stack_page(tsk) - 1 - cr2 < PAGE_SIZE)
|
||||
handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2);
|
||||
if ((unsigned long)task_stack_page(tsk) - 1 - address < PAGE_SIZE) {
|
||||
handle_stack_overflow("kernel stack overflow (double-fault)",
|
||||
regs, address);
|
||||
}
|
||||
#endif
|
||||
|
||||
pr_emerg("PANIC: double fault, error_code: 0x%lx\n", error_code);
|
||||
die("double fault", regs, error_code);
|
||||
panic("Machine halted.");
|
||||
instrumentation_end();
|
||||
}
|
||||
#endif
|
||||
|
||||
dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
|
||||
DEFINE_IDTENTRY(exc_bounds)
|
||||
{
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
if (notify_die(DIE_TRAP, "bounds", regs, error_code,
|
||||
if (notify_die(DIE_TRAP, "bounds", regs, 0,
|
||||
X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP)
|
||||
return;
|
||||
cond_local_irq_enable(regs);
|
||||
|
||||
if (!user_mode(regs))
|
||||
die("bounds", regs, error_code);
|
||||
die("bounds", regs, 0);
|
||||
|
||||
do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, 0, NULL);
|
||||
do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, 0, 0, NULL);
|
||||
|
||||
cond_local_irq_disable(regs);
|
||||
}
|
||||
|
||||
enum kernel_gp_hint {
|
||||
@@ -510,7 +503,7 @@ static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs,
|
||||
|
||||
#define GPFSTR "general protection fault"
|
||||
|
||||
dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code)
|
||||
DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
|
||||
{
|
||||
char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
|
||||
enum kernel_gp_hint hint = GP_NO_HINT;
|
||||
@@ -518,17 +511,17 @@ dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code)
|
||||
unsigned long gp_addr;
|
||||
int ret;
|
||||
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
cond_local_irq_enable(regs);
|
||||
|
||||
if (static_cpu_has(X86_FEATURE_UMIP)) {
|
||||
if (user_mode(regs) && fixup_umip_exception(regs))
|
||||
return;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (v8086_mode(regs)) {
|
||||
local_irq_enable();
|
||||
handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
|
||||
local_irq_disable();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -540,12 +533,11 @@ dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code)
|
||||
|
||||
show_signal(tsk, SIGSEGV, "", desc, regs, error_code);
|
||||
force_sig(SIGSEGV);
|
||||
|
||||
return;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (fixup_exception(regs, X86_TRAP_GP, error_code, 0))
|
||||
return;
|
||||
goto exit;
|
||||
|
||||
tsk->thread.error_code = error_code;
|
||||
tsk->thread.trap_nr = X86_TRAP_GP;
|
||||
@@ -557,11 +549,11 @@ dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code)
|
||||
if (!preemptible() &&
|
||||
kprobe_running() &&
|
||||
kprobe_fault_handler(regs, X86_TRAP_GP))
|
||||
return;
|
||||
goto exit;
|
||||
|
||||
ret = notify_die(DIE_GPF, desc, regs, error_code, X86_TRAP_GP, SIGSEGV);
|
||||
if (ret == NOTIFY_STOP)
|
||||
return;
|
||||
goto exit;
|
||||
|
||||
if (error_code)
|
||||
snprintf(desc, sizeof(desc), "segment-related " GPFSTR);
|
||||
@@ -583,55 +575,74 @@ dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code)
|
||||
|
||||
die_addr(desc, regs, error_code, gp_addr);
|
||||
|
||||
exit:
|
||||
cond_local_irq_disable(regs);
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_general_protection);
|
||||
|
||||
dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
|
||||
static bool do_int3(struct pt_regs *regs)
|
||||
{
|
||||
if (poke_int3_handler(regs))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Unlike any other non-IST entry, we can be called from a kprobe in
|
||||
* non-CONTEXT_KERNEL kernel mode or even during context tracking
|
||||
* state changes. Make sure that we wake up RCU even if we're coming
|
||||
* from kernel code.
|
||||
*
|
||||
* This means that we can't schedule even if we came from a
|
||||
* preemptible kernel context. That's okay.
|
||||
*/
|
||||
if (!user_mode(regs)) {
|
||||
rcu_nmi_enter();
|
||||
preempt_disable();
|
||||
}
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
int res;
|
||||
|
||||
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
|
||||
if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
|
||||
SIGTRAP) == NOTIFY_STOP)
|
||||
goto exit;
|
||||
if (kgdb_ll_trap(DIE_INT3, "int3", regs, 0, X86_TRAP_BP,
|
||||
SIGTRAP) == NOTIFY_STOP)
|
||||
return true;
|
||||
#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
|
||||
|
||||
#ifdef CONFIG_KPROBES
|
||||
if (kprobe_int3_handler(regs))
|
||||
goto exit;
|
||||
return true;
|
||||
#endif
|
||||
res = notify_die(DIE_INT3, "int3", regs, 0, X86_TRAP_BP, SIGTRAP);
|
||||
|
||||
if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
|
||||
SIGTRAP) == NOTIFY_STOP)
|
||||
goto exit;
|
||||
return res == NOTIFY_STOP;
|
||||
}
|
||||
|
||||
static void do_int3_user(struct pt_regs *regs)
|
||||
{
|
||||
if (do_int3(regs))
|
||||
return;
|
||||
|
||||
cond_local_irq_enable(regs);
|
||||
do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, 0, NULL);
|
||||
do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, 0, 0, NULL);
|
||||
cond_local_irq_disable(regs);
|
||||
}
|
||||
|
||||
exit:
|
||||
if (!user_mode(regs)) {
|
||||
preempt_enable_no_resched();
|
||||
rcu_nmi_exit();
|
||||
DEFINE_IDTENTRY_RAW(exc_int3)
|
||||
{
|
||||
/*
|
||||
* poke_int3_handler() is completely self contained code; it does (and
|
||||
* must) *NOT* call out to anything, lest it hits upon yet another
|
||||
* INT3.
|
||||
*/
|
||||
if (poke_int3_handler(regs))
|
||||
return;
|
||||
|
||||
/*
|
||||
* idtentry_enter_user() uses static_branch_{,un}likely() and therefore
|
||||
* can trigger INT3, hence poke_int3_handler() must be done
|
||||
* before. If the entry came from kernel mode, then use nmi_enter()
|
||||
* because the INT3 could have been hit in any context including
|
||||
* NMI.
|
||||
*/
|
||||
if (user_mode(regs)) {
|
||||
idtentry_enter_user(regs);
|
||||
instrumentation_begin();
|
||||
do_int3_user(regs);
|
||||
instrumentation_end();
|
||||
idtentry_exit_user(regs);
|
||||
} else {
|
||||
nmi_enter();
|
||||
instrumentation_begin();
|
||||
trace_hardirqs_off_finish();
|
||||
if (!do_int3(regs))
|
||||
die("int3", regs, 0);
|
||||
if (regs->flags & X86_EFLAGS_IF)
|
||||
trace_hardirqs_on_prepare();
|
||||
instrumentation_end();
|
||||
nmi_exit();
|
||||
}
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_int3);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
@@ -639,21 +650,20 @@ NOKPROBE_SYMBOL(do_int3);
|
||||
* to switch to the normal thread stack if the interrupted code was in
|
||||
* user mode. The actual stack switch is done in entry_64.S
|
||||
*/
|
||||
asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
|
||||
asmlinkage __visible noinstr struct pt_regs *sync_regs(struct pt_regs *eregs)
|
||||
{
|
||||
struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
|
||||
if (regs != eregs)
|
||||
*regs = *eregs;
|
||||
return regs;
|
||||
}
|
||||
NOKPROBE_SYMBOL(sync_regs);
|
||||
|
||||
struct bad_iret_stack {
|
||||
void *error_entry_ret;
|
||||
struct pt_regs regs;
|
||||
};
|
||||
|
||||
asmlinkage __visible notrace
|
||||
asmlinkage __visible noinstr
|
||||
struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
|
||||
{
|
||||
/*
|
||||
@@ -664,19 +674,21 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
|
||||
* just below the IRET frame) and we want to pretend that the
|
||||
* exception came from the IRET target.
|
||||
*/
|
||||
struct bad_iret_stack *new_stack =
|
||||
(struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
|
||||
struct bad_iret_stack tmp, *new_stack =
|
||||
(struct bad_iret_stack *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
|
||||
|
||||
/* Copy the IRET target to the new stack. */
|
||||
memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
|
||||
/* Copy the IRET target to the temporary storage. */
|
||||
memcpy(&tmp.regs.ip, (void *)s->regs.sp, 5*8);
|
||||
|
||||
/* Copy the remainder of the stack from the current stack. */
|
||||
memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip));
|
||||
memcpy(&tmp, s, offsetof(struct bad_iret_stack, regs.ip));
|
||||
|
||||
/* Update the entry stack */
|
||||
memcpy(new_stack, &tmp, sizeof(tmp));
|
||||
|
||||
BUG_ON(!user_mode(&new_stack->regs));
|
||||
return new_stack;
|
||||
}
|
||||
NOKPROBE_SYMBOL(fixup_bad_iret);
|
||||
#endif
|
||||
|
||||
static bool is_sysenter_singlestep(struct pt_regs *regs)
|
||||
@@ -702,6 +714,43 @@ static bool is_sysenter_singlestep(struct pt_regs *regs)
|
||||
#endif
|
||||
}
|
||||
|
||||
static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7)
|
||||
{
|
||||
/*
|
||||
* Disable breakpoints during exception handling; recursive exceptions
|
||||
* are exceedingly 'fun'.
|
||||
*
|
||||
* Since this function is NOKPROBE, and that also applies to
|
||||
* HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a
|
||||
* HW_BREAKPOINT_W on our stack)
|
||||
*
|
||||
* Entry text is excluded for HW_BP_X and cpu_entry_area, which
|
||||
* includes the entry stack is excluded for everything.
|
||||
*/
|
||||
*dr7 = local_db_save();
|
||||
|
||||
/*
|
||||
* The Intel SDM says:
|
||||
*
|
||||
* Certain debug exceptions may clear bits 0-3. The remaining
|
||||
* contents of the DR6 register are never cleared by the
|
||||
* processor. To avoid confusion in identifying debug
|
||||
* exceptions, debug handlers should clear the register before
|
||||
* returning to the interrupted task.
|
||||
*
|
||||
* Keep it simple: clear DR6 immediately.
|
||||
*/
|
||||
get_debugreg(*dr6, 6);
|
||||
set_debugreg(0, 6);
|
||||
/* Filter out all the reserved bits which are preset to 1 */
|
||||
*dr6 &= ~DR6_RESERVED;
|
||||
}
|
||||
|
||||
static __always_inline void debug_exit(unsigned long dr7)
|
||||
{
|
||||
local_db_restore(dr7);
|
||||
}
|
||||
|
||||
/*
|
||||
* Our handling of the processor debug registers is non-trivial.
|
||||
* We do not clear them on entry and exit from the kernel. Therefore
|
||||
@@ -726,86 +775,54 @@ static bool is_sysenter_singlestep(struct pt_regs *regs)
|
||||
*
|
||||
* May run on IST stack.
|
||||
*/
|
||||
dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
|
||||
static void handle_debug(struct pt_regs *regs, unsigned long dr6, bool user)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
int user_icebp = 0;
|
||||
unsigned long dr6;
|
||||
bool user_icebp;
|
||||
int si_code;
|
||||
|
||||
ist_enter(regs);
|
||||
|
||||
get_debugreg(dr6, 6);
|
||||
/*
|
||||
* The Intel SDM says:
|
||||
*
|
||||
* Certain debug exceptions may clear bits 0-3. The remaining
|
||||
* contents of the DR6 register are never cleared by the
|
||||
* processor. To avoid confusion in identifying debug
|
||||
* exceptions, debug handlers should clear the register before
|
||||
* returning to the interrupted task.
|
||||
*
|
||||
* Keep it simple: clear DR6 immediately.
|
||||
*/
|
||||
set_debugreg(0, 6);
|
||||
|
||||
/* Filter out all the reserved bits which are preset to 1 */
|
||||
dr6 &= ~DR6_RESERVED;
|
||||
|
||||
/*
|
||||
* The SDM says "The processor clears the BTF flag when it
|
||||
* generates a debug exception." Clear TIF_BLOCKSTEP to keep
|
||||
* TIF_BLOCKSTEP in sync with the hardware BTF flag.
|
||||
*/
|
||||
clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP);
|
||||
clear_thread_flag(TIF_BLOCKSTEP);
|
||||
|
||||
if (unlikely(!user_mode(regs) && (dr6 & DR_STEP) &&
|
||||
is_sysenter_singlestep(regs))) {
|
||||
dr6 &= ~DR_STEP;
|
||||
if (!dr6)
|
||||
goto exit;
|
||||
/*
|
||||
* else we might have gotten a single-step trap and hit a
|
||||
* watchpoint at the same time, in which case we should fall
|
||||
* through and handle the watchpoint.
|
||||
*/
|
||||
}
|
||||
/*
|
||||
* If DR6 is zero, no point in trying to handle it. The kernel is
|
||||
* not using INT1.
|
||||
*/
|
||||
if (!user && !dr6)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If dr6 has no reason to give us about the origin of this trap,
|
||||
* then it's very likely the result of an icebp/int01 trap.
|
||||
* User wants a sigtrap for that.
|
||||
*/
|
||||
if (!dr6 && user_mode(regs))
|
||||
user_icebp = 1;
|
||||
user_icebp = user && !dr6;
|
||||
|
||||
/* Store the virtualized DR6 value */
|
||||
tsk->thread.debugreg6 = dr6;
|
||||
|
||||
#ifdef CONFIG_KPROBES
|
||||
if (kprobe_debug_handler(regs))
|
||||
goto exit;
|
||||
if (kprobe_debug_handler(regs)) {
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, error_code,
|
||||
SIGTRAP) == NOTIFY_STOP)
|
||||
goto exit;
|
||||
|
||||
/*
|
||||
* Let others (NMI) know that the debug stack is in use
|
||||
* as we may switch to the interrupt stack.
|
||||
*/
|
||||
debug_stack_usage_inc();
|
||||
if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, 0,
|
||||
SIGTRAP) == NOTIFY_STOP) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* It's safe to allow irq's after DR6 has been saved */
|
||||
cond_local_irq_enable(regs);
|
||||
|
||||
if (v8086_mode(regs)) {
|
||||
handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
|
||||
X86_TRAP_DB);
|
||||
cond_local_irq_disable(regs);
|
||||
debug_stack_usage_dec();
|
||||
goto exit;
|
||||
handle_vm86_trap((struct kernel_vm86_regs *) regs, 0,
|
||||
X86_TRAP_DB);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) {
|
||||
@@ -819,23 +836,91 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
|
||||
set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
|
||||
regs->flags &= ~X86_EFLAGS_TF;
|
||||
}
|
||||
|
||||
si_code = get_si_code(tsk->thread.debugreg6);
|
||||
if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
|
||||
send_sigtrap(regs, error_code, si_code);
|
||||
cond_local_irq_disable(regs);
|
||||
debug_stack_usage_dec();
|
||||
send_sigtrap(regs, 0, si_code);
|
||||
|
||||
exit:
|
||||
ist_exit(regs);
|
||||
out:
|
||||
cond_local_irq_disable(regs);
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_debug);
|
||||
|
||||
static __always_inline void exc_debug_kernel(struct pt_regs *regs,
|
||||
unsigned long dr6)
|
||||
{
|
||||
nmi_enter();
|
||||
instrumentation_begin();
|
||||
trace_hardirqs_off_finish();
|
||||
|
||||
/*
|
||||
* Catch SYSENTER with TF set and clear DR_STEP. If this hit a
|
||||
* watchpoint at the same time then that will still be handled.
|
||||
*/
|
||||
if ((dr6 & DR_STEP) && is_sysenter_singlestep(regs))
|
||||
dr6 &= ~DR_STEP;
|
||||
|
||||
handle_debug(regs, dr6, false);
|
||||
|
||||
if (regs->flags & X86_EFLAGS_IF)
|
||||
trace_hardirqs_on_prepare();
|
||||
instrumentation_end();
|
||||
nmi_exit();
|
||||
}
|
||||
|
||||
static __always_inline void exc_debug_user(struct pt_regs *regs,
|
||||
unsigned long dr6)
|
||||
{
|
||||
idtentry_enter_user(regs);
|
||||
instrumentation_begin();
|
||||
|
||||
handle_debug(regs, dr6, true);
|
||||
instrumentation_end();
|
||||
idtentry_exit_user(regs);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* IST stack entry */
|
||||
DEFINE_IDTENTRY_DEBUG(exc_debug)
|
||||
{
|
||||
unsigned long dr6, dr7;
|
||||
|
||||
debug_enter(&dr6, &dr7);
|
||||
exc_debug_kernel(regs, dr6);
|
||||
debug_exit(dr7);
|
||||
}
|
||||
|
||||
/* User entry, runs on regular task stack */
|
||||
DEFINE_IDTENTRY_DEBUG_USER(exc_debug)
|
||||
{
|
||||
unsigned long dr6, dr7;
|
||||
|
||||
debug_enter(&dr6, &dr7);
|
||||
exc_debug_user(regs, dr6);
|
||||
debug_exit(dr7);
|
||||
}
|
||||
#else
|
||||
/* 32 bit does not have separate entry points. */
|
||||
DEFINE_IDTENTRY_DEBUG(exc_debug)
|
||||
{
|
||||
unsigned long dr6, dr7;
|
||||
|
||||
debug_enter(&dr6, &dr7);
|
||||
|
||||
if (user_mode(regs))
|
||||
exc_debug_user(regs, dr6);
|
||||
else
|
||||
exc_debug_kernel(regs, dr6);
|
||||
|
||||
debug_exit(dr7);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Note that we play around with the 'TS' bit in an attempt to get
|
||||
* the correct behaviour even in the presence of the asynchronous
|
||||
* IRQ13 behaviour
|
||||
*/
|
||||
static void math_error(struct pt_regs *regs, int error_code, int trapnr)
|
||||
static void math_error(struct pt_regs *regs, int trapnr)
|
||||
{
|
||||
struct task_struct *task = current;
|
||||
struct fpu *fpu = &task->thread.fpu;
|
||||
@@ -846,16 +931,16 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
|
||||
cond_local_irq_enable(regs);
|
||||
|
||||
if (!user_mode(regs)) {
|
||||
if (fixup_exception(regs, trapnr, error_code, 0))
|
||||
return;
|
||||
if (fixup_exception(regs, trapnr, 0, 0))
|
||||
goto exit;
|
||||
|
||||
task->thread.error_code = error_code;
|
||||
task->thread.error_code = 0;
|
||||
task->thread.trap_nr = trapnr;
|
||||
|
||||
if (notify_die(DIE_TRAP, str, regs, error_code,
|
||||
trapnr, SIGFPE) != NOTIFY_STOP)
|
||||
die(str, regs, error_code);
|
||||
return;
|
||||
if (notify_die(DIE_TRAP, str, regs, 0, trapnr,
|
||||
SIGFPE) != NOTIFY_STOP)
|
||||
die(str, regs, 0);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -864,32 +949,37 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
|
||||
fpu__save(fpu);
|
||||
|
||||
task->thread.trap_nr = trapnr;
|
||||
task->thread.error_code = error_code;
|
||||
task->thread.error_code = 0;
|
||||
|
||||
si_code = fpu__exception_code(fpu, trapnr);
|
||||
/* Retry when we get spurious exceptions: */
|
||||
if (!si_code)
|
||||
return;
|
||||
goto exit;
|
||||
|
||||
force_sig_fault(SIGFPE, si_code,
|
||||
(void __user *)uprobe_get_trap_addr(regs));
|
||||
exit:
|
||||
cond_local_irq_disable(regs);
|
||||
}
|
||||
|
||||
dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
|
||||
DEFINE_IDTENTRY(exc_coprocessor_error)
|
||||
{
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
math_error(regs, error_code, X86_TRAP_MF);
|
||||
math_error(regs, X86_TRAP_MF);
|
||||
}
|
||||
|
||||
dotraplinkage void
|
||||
do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
|
||||
DEFINE_IDTENTRY(exc_simd_coprocessor_error)
|
||||
{
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
math_error(regs, error_code, X86_TRAP_XF);
|
||||
if (IS_ENABLED(CONFIG_X86_INVD_BUG)) {
|
||||
/* AMD 486 bug: INVD in CPL 0 raises #XF instead of #GP */
|
||||
if (!static_cpu_has(X86_FEATURE_XMM)) {
|
||||
__exc_general_protection(regs, 0);
|
||||
return;
|
||||
}
|
||||
}
|
||||
math_error(regs, X86_TRAP_XF);
|
||||
}
|
||||
|
||||
dotraplinkage void
|
||||
do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
|
||||
DEFINE_IDTENTRY(exc_spurious_interrupt_bug)
|
||||
{
|
||||
/*
|
||||
* This addresses a Pentium Pro Erratum:
|
||||
@@ -912,13 +1002,10 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
|
||||
*/
|
||||
}
|
||||
|
||||
dotraplinkage void
|
||||
do_device_not_available(struct pt_regs *regs, long error_code)
|
||||
DEFINE_IDTENTRY(exc_device_not_available)
|
||||
{
|
||||
unsigned long cr0 = read_cr0();
|
||||
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
|
||||
#ifdef CONFIG_MATH_EMULATION
|
||||
if (!boot_cpu_has(X86_FEATURE_FPU) && (cr0 & X86_CR0_EM)) {
|
||||
struct math_emu_info info = { };
|
||||
@@ -927,6 +1014,8 @@ do_device_not_available(struct pt_regs *regs, long error_code)
|
||||
|
||||
info.regs = regs;
|
||||
math_emulate(&info);
|
||||
|
||||
cond_local_irq_disable(regs);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
@@ -941,22 +1030,20 @@ do_device_not_available(struct pt_regs *regs, long error_code)
|
||||
* to kill the task than getting stuck in a never-ending
|
||||
* loop of #NM faults.
|
||||
*/
|
||||
die("unexpected #NM exception", regs, error_code);
|
||||
die("unexpected #NM exception", regs, 0);
|
||||
}
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_device_not_available);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
|
||||
DEFINE_IDTENTRY_SW(iret_error)
|
||||
{
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
local_irq_enable();
|
||||
|
||||
if (notify_die(DIE_TRAP, "iret exception", regs, error_code,
|
||||
if (notify_die(DIE_TRAP, "iret exception", regs, 0,
|
||||
X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) {
|
||||
do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code,
|
||||
do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, 0,
|
||||
ILL_BADSTK, (void __user *)NULL);
|
||||
}
|
||||
local_irq_disable();
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -967,23 +1054,10 @@ void __init trap_init(void)
|
||||
|
||||
idt_setup_traps();
|
||||
|
||||
/*
|
||||
* Set the IDT descriptor to a fixed read-only location, so that the
|
||||
* "sidt" instruction will not leak the location of the kernel, and
|
||||
* to defend the IDT against arbitrary memory write vulnerabilities.
|
||||
* It will be reloaded in cpu_init() */
|
||||
cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table),
|
||||
PAGE_KERNEL_RO);
|
||||
idt_descr.address = CPU_ENTRY_AREA_RO_IDT;
|
||||
|
||||
/*
|
||||
* Should be a barrier for any external CPU state:
|
||||
*/
|
||||
cpu_init();
|
||||
|
||||
idt_setup_ist_traps();
|
||||
|
||||
x86_init.irqs.trap_init();
|
||||
|
||||
idt_setup_debugidt_traps();
|
||||
}
|
||||
|
||||
@@ -41,6 +41,7 @@ EXPORT_SYMBOL(tsc_khz);
|
||||
* TSC can be unstable due to cpufreq or due to unsynced TSCs
|
||||
*/
|
||||
static int __read_mostly tsc_unstable;
|
||||
static unsigned int __initdata tsc_early_khz;
|
||||
|
||||
static DEFINE_STATIC_KEY_FALSE(__use_tsc);
|
||||
|
||||
@@ -59,6 +60,12 @@ struct cyc2ns {
|
||||
|
||||
static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);
|
||||
|
||||
static int __init tsc_early_khz_setup(char *buf)
|
||||
{
|
||||
return kstrtouint(buf, 0, &tsc_early_khz);
|
||||
}
|
||||
early_param("tsc_early_khz", tsc_early_khz_setup);
|
||||
|
||||
__always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
|
||||
{
|
||||
int seq, idx;
|
||||
@@ -1412,7 +1419,10 @@ static bool __init determine_cpu_tsc_frequencies(bool early)
|
||||
|
||||
if (early) {
|
||||
cpu_khz = x86_platform.calibrate_cpu();
|
||||
tsc_khz = x86_platform.calibrate_tsc();
|
||||
if (tsc_early_khz)
|
||||
tsc_khz = tsc_early_khz;
|
||||
else
|
||||
tsc_khz = x86_platform.calibrate_tsc();
|
||||
} else {
|
||||
/* We should not be here with non-native cpu calibration */
|
||||
WARN_ON(x86_platform.calibrate_cpu != native_calibrate_cpu);
|
||||
|
||||
@@ -81,7 +81,7 @@
|
||||
#define UMIP_INST_SLDT 3 /* 0F 00 /0 */
|
||||
#define UMIP_INST_STR 4 /* 0F 00 /1 */
|
||||
|
||||
const char * const umip_insns[5] = {
|
||||
static const char * const umip_insns[5] = {
|
||||
[UMIP_INST_SGDT] = "SGDT",
|
||||
[UMIP_INST_SIDT] = "SIDT",
|
||||
[UMIP_INST_SMSW] = "SMSW",
|
||||
|
||||
@@ -74,13 +74,7 @@ static bool in_entry_code(unsigned long ip)
|
||||
{
|
||||
char *addr = (char *)ip;
|
||||
|
||||
if (addr >= __entry_text_start && addr < __entry_text_end)
|
||||
return true;
|
||||
|
||||
if (addr >= __irqentry_text_start && addr < __irqentry_text_end)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
return addr >= __entry_text_start && addr < __entry_text_end;
|
||||
}
|
||||
|
||||
static inline unsigned long *last_frame(struct unwind_state *state)
|
||||
@@ -344,6 +338,9 @@ bad_address:
|
||||
if (IS_ENABLED(CONFIG_X86_32))
|
||||
goto the_end;
|
||||
|
||||
if (state->task != current)
|
||||
goto the_end;
|
||||
|
||||
if (state->regs) {
|
||||
printk_deferred_once(KERN_WARNING
|
||||
"WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
|
||||
|
||||
@@ -8,19 +8,21 @@
|
||||
#include <asm/orc_lookup.h>
|
||||
|
||||
#define orc_warn(fmt, ...) \
|
||||
printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__)
|
||||
printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__)
|
||||
|
||||
#define orc_warn_current(args...) \
|
||||
({ \
|
||||
if (state->task == current) \
|
||||
orc_warn(args); \
|
||||
})
|
||||
|
||||
extern int __start_orc_unwind_ip[];
|
||||
extern int __stop_orc_unwind_ip[];
|
||||
extern struct orc_entry __start_orc_unwind[];
|
||||
extern struct orc_entry __stop_orc_unwind[];
|
||||
|
||||
static DEFINE_MUTEX(sort_mutex);
|
||||
int *cur_orc_ip_table = __start_orc_unwind_ip;
|
||||
struct orc_entry *cur_orc_table = __start_orc_unwind;
|
||||
|
||||
unsigned int lookup_num_blocks;
|
||||
bool orc_init;
|
||||
static bool orc_init __ro_after_init;
|
||||
static unsigned int lookup_num_blocks __ro_after_init;
|
||||
|
||||
static inline unsigned long orc_ip(const int *ip)
|
||||
{
|
||||
@@ -142,9 +144,6 @@ static struct orc_entry *orc_find(unsigned long ip)
|
||||
{
|
||||
static struct orc_entry *orc;
|
||||
|
||||
if (!orc_init)
|
||||
return NULL;
|
||||
|
||||
if (ip == 0)
|
||||
return &null_orc_entry;
|
||||
|
||||
@@ -189,6 +188,10 @@ static struct orc_entry *orc_find(unsigned long ip)
|
||||
|
||||
#ifdef CONFIG_MODULES
|
||||
|
||||
static DEFINE_MUTEX(sort_mutex);
|
||||
static int *cur_orc_ip_table = __start_orc_unwind_ip;
|
||||
static struct orc_entry *cur_orc_table = __start_orc_unwind;
|
||||
|
||||
static void orc_sort_swap(void *_a, void *_b, int size)
|
||||
{
|
||||
struct orc_entry *orc_a, *orc_b;
|
||||
@@ -317,12 +320,19 @@ EXPORT_SYMBOL_GPL(unwind_get_return_address);
|
||||
|
||||
unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
|
||||
{
|
||||
struct task_struct *task = state->task;
|
||||
|
||||
if (unwind_done(state))
|
||||
return NULL;
|
||||
|
||||
if (state->regs)
|
||||
return &state->regs->ip;
|
||||
|
||||
if (task != current && state->sp == task->thread.sp) {
|
||||
struct inactive_task_frame *frame = (void *)task->thread.sp;
|
||||
return &frame->ret_addr;
|
||||
}
|
||||
|
||||
if (state->sp)
|
||||
return (unsigned long *)state->sp - 1;
|
||||
|
||||
@@ -381,9 +391,38 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* If state->regs is non-NULL, and points to a full pt_regs, just get the reg
|
||||
* value from state->regs.
|
||||
*
|
||||
* Otherwise, if state->regs just points to IRET regs, and the previous frame
|
||||
* had full regs, it's safe to get the value from the previous regs. This can
|
||||
* happen when early/late IRQ entry code gets interrupted by an NMI.
|
||||
*/
|
||||
static bool get_reg(struct unwind_state *state, unsigned int reg_off,
|
||||
unsigned long *val)
|
||||
{
|
||||
unsigned int reg = reg_off/8;
|
||||
|
||||
if (!state->regs)
|
||||
return false;
|
||||
|
||||
if (state->full_regs) {
|
||||
*val = ((unsigned long *)state->regs)[reg];
|
||||
return true;
|
||||
}
|
||||
|
||||
if (state->prev_regs) {
|
||||
*val = ((unsigned long *)state->prev_regs)[reg];
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool unwind_next_frame(struct unwind_state *state)
|
||||
{
|
||||
unsigned long ip_p, sp, orig_ip = state->ip, prev_sp = state->sp;
|
||||
unsigned long ip_p, sp, tmp, orig_ip = state->ip, prev_sp = state->sp;
|
||||
enum stack_type prev_type = state->stack_info.type;
|
||||
struct orc_entry *orc;
|
||||
bool indirect = false;
|
||||
@@ -445,43 +484,39 @@ bool unwind_next_frame(struct unwind_state *state)
|
||||
break;
|
||||
|
||||
case ORC_REG_R10:
|
||||
if (!state->regs || !state->full_regs) {
|
||||
orc_warn("missing regs for base reg R10 at ip %pB\n",
|
||||
(void *)state->ip);
|
||||
if (!get_reg(state, offsetof(struct pt_regs, r10), &sp)) {
|
||||
orc_warn_current("missing R10 value at %pB\n",
|
||||
(void *)state->ip);
|
||||
goto err;
|
||||
}
|
||||
sp = state->regs->r10;
|
||||
break;
|
||||
|
||||
case ORC_REG_R13:
|
||||
if (!state->regs || !state->full_regs) {
|
||||
orc_warn("missing regs for base reg R13 at ip %pB\n",
|
||||
(void *)state->ip);
|
||||
if (!get_reg(state, offsetof(struct pt_regs, r13), &sp)) {
|
||||
orc_warn_current("missing R13 value at %pB\n",
|
||||
(void *)state->ip);
|
||||
goto err;
|
||||
}
|
||||
sp = state->regs->r13;
|
||||
break;
|
||||
|
||||
case ORC_REG_DI:
|
||||
if (!state->regs || !state->full_regs) {
|
||||
orc_warn("missing regs for base reg DI at ip %pB\n",
|
||||
(void *)state->ip);
|
||||
if (!get_reg(state, offsetof(struct pt_regs, di), &sp)) {
|
||||
orc_warn_current("missing RDI value at %pB\n",
|
||||
(void *)state->ip);
|
||||
goto err;
|
||||
}
|
||||
sp = state->regs->di;
|
||||
break;
|
||||
|
||||
case ORC_REG_DX:
|
||||
if (!state->regs || !state->full_regs) {
|
||||
orc_warn("missing regs for base reg DX at ip %pB\n",
|
||||
(void *)state->ip);
|
||||
if (!get_reg(state, offsetof(struct pt_regs, dx), &sp)) {
|
||||
orc_warn_current("missing DX value at %pB\n",
|
||||
(void *)state->ip);
|
||||
goto err;
|
||||
}
|
||||
sp = state->regs->dx;
|
||||
break;
|
||||
|
||||
default:
|
||||
orc_warn("unknown SP base reg %d for ip %pB\n",
|
||||
orc_warn("unknown SP base reg %d at %pB\n",
|
||||
orc->sp_reg, (void *)state->ip);
|
||||
goto err;
|
||||
}
|
||||
@@ -504,44 +539,48 @@ bool unwind_next_frame(struct unwind_state *state)
|
||||
|
||||
state->sp = sp;
|
||||
state->regs = NULL;
|
||||
state->prev_regs = NULL;
|
||||
state->signal = false;
|
||||
break;
|
||||
|
||||
case ORC_TYPE_REGS:
|
||||
if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
|
||||
orc_warn("can't dereference registers at %p for ip %pB\n",
|
||||
(void *)sp, (void *)orig_ip);
|
||||
orc_warn_current("can't access registers at %pB\n",
|
||||
(void *)orig_ip);
|
||||
goto err;
|
||||
}
|
||||
|
||||
state->regs = (struct pt_regs *)sp;
|
||||
state->prev_regs = NULL;
|
||||
state->full_regs = true;
|
||||
state->signal = true;
|
||||
break;
|
||||
|
||||
case ORC_TYPE_REGS_IRET:
|
||||
if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
|
||||
orc_warn("can't dereference iret registers at %p for ip %pB\n",
|
||||
(void *)sp, (void *)orig_ip);
|
||||
orc_warn_current("can't access iret registers at %pB\n",
|
||||
(void *)orig_ip);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (state->full_regs)
|
||||
state->prev_regs = state->regs;
|
||||
state->regs = (void *)sp - IRET_FRAME_OFFSET;
|
||||
state->full_regs = false;
|
||||
state->signal = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
orc_warn("unknown .orc_unwind entry type %d for ip %pB\n",
|
||||
orc_warn("unknown .orc_unwind entry type %d at %pB\n",
|
||||
orc->type, (void *)orig_ip);
|
||||
break;
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Find BP: */
|
||||
switch (orc->bp_reg) {
|
||||
case ORC_REG_UNDEFINED:
|
||||
if (state->regs && state->full_regs)
|
||||
state->bp = state->regs->bp;
|
||||
if (get_reg(state, offsetof(struct pt_regs, bp), &tmp))
|
||||
state->bp = tmp;
|
||||
break;
|
||||
|
||||
case ORC_REG_PREV_SP:
|
||||
@@ -564,8 +603,8 @@ bool unwind_next_frame(struct unwind_state *state)
|
||||
if (state->stack_info.type == prev_type &&
|
||||
on_stack(&state->stack_info, (void *)state->sp, sizeof(long)) &&
|
||||
state->sp <= prev_sp) {
|
||||
orc_warn("stack going in the wrong direction? ip=%pB\n",
|
||||
(void *)orig_ip);
|
||||
orc_warn_current("stack going in the wrong direction? at %pB\n",
|
||||
(void *)orig_ip);
|
||||
goto err;
|
||||
}
|
||||
|
||||
@@ -588,17 +627,20 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
|
||||
memset(state, 0, sizeof(*state));
|
||||
state->task = task;
|
||||
|
||||
if (!orc_init)
|
||||
goto err;
|
||||
|
||||
/*
|
||||
* Refuse to unwind the stack of a task while it's executing on another
|
||||
* CPU. This check is racy, but that's ok: the unwinder has other
|
||||
* checks to prevent it from going off the rails.
|
||||
*/
|
||||
if (task_on_another_cpu(task))
|
||||
goto done;
|
||||
goto err;
|
||||
|
||||
if (regs) {
|
||||
if (user_mode(regs))
|
||||
goto done;
|
||||
goto the_end;
|
||||
|
||||
state->ip = regs->ip;
|
||||
state->sp = regs->sp;
|
||||
@@ -631,6 +673,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
|
||||
* generate some kind of backtrace if this happens.
|
||||
*/
|
||||
void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp);
|
||||
state->error = true;
|
||||
if (get_stack_info(next_page, state->task, &state->stack_info,
|
||||
&state->stack_mask))
|
||||
return;
|
||||
@@ -651,13 +694,14 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
|
||||
/* Otherwise, skip ahead to the user-specified starting frame: */
|
||||
while (!unwind_done(state) &&
|
||||
(!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
|
||||
state->sp <= (unsigned long)first_frame))
|
||||
state->sp < (unsigned long)first_frame))
|
||||
unwind_next_frame(state);
|
||||
|
||||
return;
|
||||
|
||||
done:
|
||||
err:
|
||||
state->error = true;
|
||||
the_end:
|
||||
state->stack_info.type = STACK_TYPE_UNKNOWN;
|
||||
return;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__unwind_start);
|
||||
|
||||
@@ -171,7 +171,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
|
||||
pte_t *pte;
|
||||
int i;
|
||||
|
||||
down_write(&mm->mmap_sem);
|
||||
mmap_write_lock(mm);
|
||||
pgd = pgd_offset(mm, 0xA0000);
|
||||
if (pgd_none_or_clear_bad(pgd))
|
||||
goto out;
|
||||
@@ -197,7 +197,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
|
||||
}
|
||||
pte_unmap_unlock(pte, ptl);
|
||||
out:
|
||||
up_write(&mm->mmap_sem);
|
||||
mmap_write_unlock(mm);
|
||||
flush_tlb_mm_range(mm, 0xA0000, 0xA0000 + 32*PAGE_SIZE, PAGE_SHIFT, false);
|
||||
}
|
||||
|
||||
|
||||
@@ -134,7 +134,6 @@ SECTIONS
|
||||
KPROBES_TEXT
|
||||
ALIGN_ENTRY_TEXT_BEGIN
|
||||
ENTRY_TEXT
|
||||
IRQENTRY_TEXT
|
||||
ALIGN_ENTRY_TEXT_END
|
||||
SOFTIRQENTRY_TEXT
|
||||
*(.fixup)
|
||||
|
||||
@@ -79,7 +79,6 @@ struct x86_init_ops x86_init __initdata = {
|
||||
.irqs = {
|
||||
.pre_vector_init = init_ISA_irqs,
|
||||
.intr_init = native_init_IRQ,
|
||||
.trap_init = x86_init_noop,
|
||||
.intr_mode_select = apic_intr_mode_select,
|
||||
.intr_mode_init = apic_intr_mode_init
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user