Merge drm/drm-next into drm-intel-next
Sync up with upstream. Signed-off-by: Jani Nikula <jani.nikula@intel.com>
This commit is contained in:
@@ -116,7 +116,6 @@ obj-$(CONFIG_VM86) += vm86_32.o
|
||||
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
|
||||
|
||||
obj-$(CONFIG_HPET_TIMER) += hpet.o
|
||||
obj-$(CONFIG_APB_TIMER) += apb_timer.o
|
||||
|
||||
obj-$(CONFIG_AMD_NB) += amd_nb.o
|
||||
obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
OBJECT_FILES_NON_STANDARD_wakeup_$(BITS).o := y
|
||||
|
||||
obj-$(CONFIG_ACPI) += boot.o
|
||||
obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
.text
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/objtool.h>
|
||||
#include <asm/segment.h>
|
||||
#include <asm/pgtable_types.h>
|
||||
#include <asm/page_types.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/frame.h>
|
||||
#include <asm/nospec-branch.h>
|
||||
|
||||
# Copyright 2003 Pavel Machek <pavel@suse.cz
|
||||
|
||||
@@ -39,6 +41,7 @@ SYM_FUNC_START(wakeup_long64)
|
||||
movq saved_rbp, %rbp
|
||||
|
||||
movq saved_rip, %rax
|
||||
ANNOTATE_RETPOLINE_SAFE
|
||||
jmp *%rax
|
||||
SYM_FUNC_END(wakeup_long64)
|
||||
|
||||
@@ -126,6 +129,7 @@ SYM_FUNC_START(do_suspend_lowlevel)
|
||||
FRAME_END
|
||||
jmp restore_processor_state
|
||||
SYM_FUNC_END(do_suspend_lowlevel)
|
||||
STACK_FRAME_NON_STANDARD do_suspend_lowlevel
|
||||
|
||||
.data
|
||||
saved_rbp: .quad 0
|
||||
|
||||
@@ -1,347 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* apb_timer.c: Driver for Langwell APB timers
|
||||
*
|
||||
* (C) Copyright 2009 Intel Corporation
|
||||
* Author: Jacob Pan (jacob.jun.pan@intel.com)
|
||||
*
|
||||
* Note:
|
||||
* Langwell is the south complex of Intel Moorestown MID platform. There are
|
||||
* eight external timers in total that can be used by the operating system.
|
||||
* The timer information, such as frequency and addresses, is provided to the
|
||||
* OS via SFI tables.
|
||||
* Timer interrupts are routed via FW/HW emulated IOAPIC independently via
|
||||
* individual redirection table entries (RTE).
|
||||
* Unlike HPET, there is no master counter, therefore one of the timers are
|
||||
* used as clocksource. The overall allocation looks like:
|
||||
* - timer 0 - NR_CPUs for per cpu timer
|
||||
* - one timer for clocksource
|
||||
* - one timer for watchdog driver.
|
||||
* It is also worth notice that APB timer does not support true one-shot mode,
|
||||
* free-running mode will be used here to emulate one-shot mode.
|
||||
* APB timer can also be used as broadcast timer along with per cpu local APIC
|
||||
* timer, but by default APB timer has higher rating than local APIC timers.
|
||||
*/
|
||||
|
||||
#include <linux/delay.h>
|
||||
#include <linux/dw_apb_timer.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/pm.h>
|
||||
#include <linux/sfi.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/irq.h>
|
||||
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/apb_timer.h>
|
||||
#include <asm/intel-mid.h>
|
||||
#include <asm/time.h>
|
||||
|
||||
#define APBT_CLOCKEVENT_RATING 110
|
||||
#define APBT_CLOCKSOURCE_RATING 250
|
||||
|
||||
#define APBT_CLOCKEVENT0_NUM (0)
|
||||
#define APBT_CLOCKSOURCE_NUM (2)
|
||||
|
||||
static phys_addr_t apbt_address;
|
||||
static int apb_timer_block_enabled;
|
||||
static void __iomem *apbt_virt_address;
|
||||
|
||||
/*
|
||||
* Common DW APB timer info
|
||||
*/
|
||||
static unsigned long apbt_freq;
|
||||
|
||||
struct apbt_dev {
|
||||
struct dw_apb_clock_event_device *timer;
|
||||
unsigned int num;
|
||||
int cpu;
|
||||
unsigned int irq;
|
||||
char name[10];
|
||||
};
|
||||
|
||||
static struct dw_apb_clocksource *clocksource_apbt;
|
||||
|
||||
static inline void __iomem *adev_virt_addr(struct apbt_dev *adev)
|
||||
{
|
||||
return apbt_virt_address + adev->num * APBTMRS_REG_SIZE;
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU(struct apbt_dev, cpu_apbt_dev);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static unsigned int apbt_num_timers_used;
|
||||
#endif
|
||||
|
||||
static inline void apbt_set_mapping(void)
|
||||
{
|
||||
struct sfi_timer_table_entry *mtmr;
|
||||
int phy_cs_timer_id = 0;
|
||||
|
||||
if (apbt_virt_address) {
|
||||
pr_debug("APBT base already mapped\n");
|
||||
return;
|
||||
}
|
||||
mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM);
|
||||
if (mtmr == NULL) {
|
||||
printk(KERN_ERR "Failed to get MTMR %d from SFI\n",
|
||||
APBT_CLOCKEVENT0_NUM);
|
||||
return;
|
||||
}
|
||||
apbt_address = (phys_addr_t)mtmr->phys_addr;
|
||||
if (!apbt_address) {
|
||||
printk(KERN_WARNING "No timer base from SFI, use default\n");
|
||||
apbt_address = APBT_DEFAULT_BASE;
|
||||
}
|
||||
apbt_virt_address = ioremap(apbt_address, APBT_MMAP_SIZE);
|
||||
if (!apbt_virt_address) {
|
||||
pr_debug("Failed mapping APBT phy address at %lu\n",\
|
||||
(unsigned long)apbt_address);
|
||||
goto panic_noapbt;
|
||||
}
|
||||
apbt_freq = mtmr->freq_hz;
|
||||
sfi_free_mtmr(mtmr);
|
||||
|
||||
/* Now figure out the physical timer id for clocksource device */
|
||||
mtmr = sfi_get_mtmr(APBT_CLOCKSOURCE_NUM);
|
||||
if (mtmr == NULL)
|
||||
goto panic_noapbt;
|
||||
|
||||
/* Now figure out the physical timer id */
|
||||
pr_debug("Use timer %d for clocksource\n",
|
||||
(int)(mtmr->phys_addr & 0xff) / APBTMRS_REG_SIZE);
|
||||
phy_cs_timer_id = (unsigned int)(mtmr->phys_addr & 0xff) /
|
||||
APBTMRS_REG_SIZE;
|
||||
|
||||
clocksource_apbt = dw_apb_clocksource_init(APBT_CLOCKSOURCE_RATING,
|
||||
"apbt0", apbt_virt_address + phy_cs_timer_id *
|
||||
APBTMRS_REG_SIZE, apbt_freq);
|
||||
return;
|
||||
|
||||
panic_noapbt:
|
||||
panic("Failed to setup APB system timer\n");
|
||||
|
||||
}
|
||||
|
||||
static inline void apbt_clear_mapping(void)
|
||||
{
|
||||
iounmap(apbt_virt_address);
|
||||
apbt_virt_address = NULL;
|
||||
}
|
||||
|
||||
static int __init apbt_clockevent_register(void)
|
||||
{
|
||||
struct sfi_timer_table_entry *mtmr;
|
||||
struct apbt_dev *adev = this_cpu_ptr(&cpu_apbt_dev);
|
||||
|
||||
mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM);
|
||||
if (mtmr == NULL) {
|
||||
printk(KERN_ERR "Failed to get MTMR %d from SFI\n",
|
||||
APBT_CLOCKEVENT0_NUM);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
adev->num = smp_processor_id();
|
||||
adev->timer = dw_apb_clockevent_init(smp_processor_id(), "apbt0",
|
||||
intel_mid_timer_options == INTEL_MID_TIMER_LAPIC_APBT ?
|
||||
APBT_CLOCKEVENT_RATING - 100 : APBT_CLOCKEVENT_RATING,
|
||||
adev_virt_addr(adev), 0, apbt_freq);
|
||||
/* Firmware does EOI handling for us. */
|
||||
adev->timer->eoi = NULL;
|
||||
|
||||
if (intel_mid_timer_options == INTEL_MID_TIMER_LAPIC_APBT) {
|
||||
global_clock_event = &adev->timer->ced;
|
||||
printk(KERN_DEBUG "%s clockevent registered as global\n",
|
||||
global_clock_event->name);
|
||||
}
|
||||
|
||||
dw_apb_clockevent_register(adev->timer);
|
||||
|
||||
sfi_free_mtmr(mtmr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
static void apbt_setup_irq(struct apbt_dev *adev)
|
||||
{
|
||||
irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT);
|
||||
irq_set_affinity(adev->irq, cpumask_of(adev->cpu));
|
||||
}
|
||||
|
||||
/* Should be called with per cpu */
|
||||
void apbt_setup_secondary_clock(void)
|
||||
{
|
||||
struct apbt_dev *adev;
|
||||
int cpu;
|
||||
|
||||
/* Don't register boot CPU clockevent */
|
||||
cpu = smp_processor_id();
|
||||
if (!cpu)
|
||||
return;
|
||||
|
||||
adev = this_cpu_ptr(&cpu_apbt_dev);
|
||||
if (!adev->timer) {
|
||||
adev->timer = dw_apb_clockevent_init(cpu, adev->name,
|
||||
APBT_CLOCKEVENT_RATING, adev_virt_addr(adev),
|
||||
adev->irq, apbt_freq);
|
||||
adev->timer->eoi = NULL;
|
||||
} else {
|
||||
dw_apb_clockevent_resume(adev->timer);
|
||||
}
|
||||
|
||||
printk(KERN_INFO "Registering CPU %d clockevent device %s, cpu %08x\n",
|
||||
cpu, adev->name, adev->cpu);
|
||||
|
||||
apbt_setup_irq(adev);
|
||||
dw_apb_clockevent_register(adev->timer);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* this notify handler process CPU hotplug events. in case of S0i3, nonboot
|
||||
* cpus are disabled/enabled frequently, for performance reasons, we keep the
|
||||
* per cpu timer irq registered so that we do need to do free_irq/request_irq.
|
||||
*
|
||||
* TODO: it might be more reliable to directly disable percpu clockevent device
|
||||
* without the notifier chain. currently, cpu 0 may get interrupts from other
|
||||
* cpu timers during the offline process due to the ordering of notification.
|
||||
* the extra interrupt is harmless.
|
||||
*/
|
||||
static int apbt_cpu_dead(unsigned int cpu)
|
||||
{
|
||||
struct apbt_dev *adev = &per_cpu(cpu_apbt_dev, cpu);
|
||||
|
||||
dw_apb_clockevent_pause(adev->timer);
|
||||
if (system_state == SYSTEM_RUNNING) {
|
||||
pr_debug("skipping APBT CPU %u offline\n", cpu);
|
||||
} else {
|
||||
pr_debug("APBT clockevent for cpu %u offline\n", cpu);
|
||||
dw_apb_clockevent_stop(adev->timer);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __init int apbt_late_init(void)
|
||||
{
|
||||
if (intel_mid_timer_options == INTEL_MID_TIMER_LAPIC_APBT ||
|
||||
!apb_timer_block_enabled)
|
||||
return 0;
|
||||
return cpuhp_setup_state(CPUHP_X86_APB_DEAD, "x86/apb:dead", NULL,
|
||||
apbt_cpu_dead);
|
||||
}
|
||||
fs_initcall(apbt_late_init);
|
||||
#else
|
||||
|
||||
void apbt_setup_secondary_clock(void) {}
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
static int apbt_clocksource_register(void)
|
||||
{
|
||||
u64 start, now;
|
||||
u64 t1;
|
||||
|
||||
/* Start the counter, use timer 2 as source, timer 0/1 for event */
|
||||
dw_apb_clocksource_start(clocksource_apbt);
|
||||
|
||||
/* Verify whether apbt counter works */
|
||||
t1 = dw_apb_clocksource_read(clocksource_apbt);
|
||||
start = rdtsc();
|
||||
|
||||
/*
|
||||
* We don't know the TSC frequency yet, but waiting for
|
||||
* 200000 TSC cycles is safe:
|
||||
* 4 GHz == 50us
|
||||
* 1 GHz == 200us
|
||||
*/
|
||||
do {
|
||||
rep_nop();
|
||||
now = rdtsc();
|
||||
} while ((now - start) < 200000UL);
|
||||
|
||||
/* APBT is the only always on clocksource, it has to work! */
|
||||
if (t1 == dw_apb_clocksource_read(clocksource_apbt))
|
||||
panic("APBT counter not counting. APBT disabled\n");
|
||||
|
||||
dw_apb_clocksource_register(clocksource_apbt);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Early setup the APBT timer, only use timer 0 for booting then switch to
|
||||
* per CPU timer if possible.
|
||||
* returns 1 if per cpu apbt is setup
|
||||
* returns 0 if no per cpu apbt is chosen
|
||||
* panic if set up failed, this is the only platform timer on Moorestown.
|
||||
*/
|
||||
void __init apbt_time_init(void)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
int i;
|
||||
struct sfi_timer_table_entry *p_mtmr;
|
||||
struct apbt_dev *adev;
|
||||
#endif
|
||||
|
||||
if (apb_timer_block_enabled)
|
||||
return;
|
||||
apbt_set_mapping();
|
||||
if (!apbt_virt_address)
|
||||
goto out_noapbt;
|
||||
/*
|
||||
* Read the frequency and check for a sane value, for ESL model
|
||||
* we extend the possible clock range to allow time scaling.
|
||||
*/
|
||||
|
||||
if (apbt_freq < APBT_MIN_FREQ || apbt_freq > APBT_MAX_FREQ) {
|
||||
pr_debug("APBT has invalid freq 0x%lx\n", apbt_freq);
|
||||
goto out_noapbt;
|
||||
}
|
||||
if (apbt_clocksource_register()) {
|
||||
pr_debug("APBT has failed to register clocksource\n");
|
||||
goto out_noapbt;
|
||||
}
|
||||
if (!apbt_clockevent_register())
|
||||
apb_timer_block_enabled = 1;
|
||||
else {
|
||||
pr_debug("APBT has failed to register clockevent\n");
|
||||
goto out_noapbt;
|
||||
}
|
||||
#ifdef CONFIG_SMP
|
||||
/* kernel cmdline disable apb timer, so we will use lapic timers */
|
||||
if (intel_mid_timer_options == INTEL_MID_TIMER_LAPIC_APBT) {
|
||||
printk(KERN_INFO "apbt: disabled per cpu timer\n");
|
||||
return;
|
||||
}
|
||||
pr_debug("%s: %d CPUs online\n", __func__, num_online_cpus());
|
||||
if (num_possible_cpus() <= sfi_mtimer_num)
|
||||
apbt_num_timers_used = num_possible_cpus();
|
||||
else
|
||||
apbt_num_timers_used = 1;
|
||||
pr_debug("%s: %d APB timers used\n", __func__, apbt_num_timers_used);
|
||||
|
||||
/* here we set up per CPU timer data structure */
|
||||
for (i = 0; i < apbt_num_timers_used; i++) {
|
||||
adev = &per_cpu(cpu_apbt_dev, i);
|
||||
adev->num = i;
|
||||
adev->cpu = i;
|
||||
p_mtmr = sfi_get_mtmr(i);
|
||||
if (p_mtmr)
|
||||
adev->irq = p_mtmr->irq;
|
||||
else
|
||||
printk(KERN_ERR "Failed to get timer for cpu %d\n", i);
|
||||
snprintf(adev->name, sizeof(adev->name) - 1, "apbt%d", i);
|
||||
}
|
||||
#endif
|
||||
|
||||
return;
|
||||
|
||||
out_noapbt:
|
||||
apbt_clear_mapping();
|
||||
apb_timer_block_enabled = 0;
|
||||
panic("failed to enable APB timer\n");
|
||||
}
|
||||
@@ -41,6 +41,7 @@
|
||||
#include <asm/perf_event.h>
|
||||
#include <asm/x86_init.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <asm/barrier.h>
|
||||
#include <asm/mpspec.h>
|
||||
#include <asm/i8259.h>
|
||||
#include <asm/proto.h>
|
||||
@@ -477,6 +478,9 @@ static int lapic_next_deadline(unsigned long delta,
|
||||
{
|
||||
u64 tsc;
|
||||
|
||||
/* This MSR is special and need a special fence: */
|
||||
weak_wrmsr_fence();
|
||||
|
||||
tsc = rdtsc();
|
||||
wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
|
||||
return 0;
|
||||
@@ -1743,6 +1747,7 @@ void apic_ap_setup(void)
|
||||
|
||||
#ifdef CONFIG_X86_X2APIC
|
||||
int x2apic_mode;
|
||||
EXPORT_SYMBOL_GPL(x2apic_mode);
|
||||
|
||||
enum {
|
||||
X2APIC_OFF,
|
||||
@@ -2133,18 +2138,11 @@ void __init register_lapic_address(unsigned long address)
|
||||
* Local APIC interrupts
|
||||
*/
|
||||
|
||||
/**
|
||||
* spurious_interrupt - Catch all for interrupts raised on unused vectors
|
||||
* @regs: Pointer to pt_regs on stack
|
||||
* @vector: The vector number
|
||||
*
|
||||
* This is invoked from ASM entry code to catch all interrupts which
|
||||
* trigger on an entry which is routed to the common_spurious idtentry
|
||||
* point.
|
||||
*
|
||||
* Also called from sysvec_spurious_apic_interrupt().
|
||||
/*
|
||||
* Common handling code for spurious_interrupt and spurious_vector entry
|
||||
* points below. No point in allowing the compiler to inline it twice.
|
||||
*/
|
||||
DEFINE_IDTENTRY_IRQ(spurious_interrupt)
|
||||
static noinline void handle_spurious_interrupt(u8 vector)
|
||||
{
|
||||
u32 v;
|
||||
|
||||
@@ -2179,9 +2177,23 @@ out:
|
||||
trace_spurious_apic_exit(vector);
|
||||
}
|
||||
|
||||
/**
|
||||
* spurious_interrupt - Catch all for interrupts raised on unused vectors
|
||||
* @regs: Pointer to pt_regs on stack
|
||||
* @vector: The vector number
|
||||
*
|
||||
* This is invoked from ASM entry code to catch all interrupts which
|
||||
* trigger on an entry which is routed to the common_spurious idtentry
|
||||
* point.
|
||||
*/
|
||||
DEFINE_IDTENTRY_IRQ(spurious_interrupt)
|
||||
{
|
||||
handle_spurious_interrupt(vector);
|
||||
}
|
||||
|
||||
DEFINE_IDTENTRY_SYSVEC(sysvec_spurious_apic_interrupt)
|
||||
{
|
||||
__spurious_interrupt(regs, SPURIOUS_APIC_VECTOR);
|
||||
handle_spurious_interrupt(SPURIOUS_APIC_VECTOR);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -198,7 +198,7 @@ static int __init parse_noapic(char *str)
|
||||
}
|
||||
early_param("noapic", parse_noapic);
|
||||
|
||||
/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
|
||||
/* Will be called in mpparse/ACPI codes for saving IRQ info */
|
||||
void mp_save_irq(struct mpc_intsrc *m)
|
||||
{
|
||||
int i;
|
||||
@@ -2863,7 +2863,7 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base,
|
||||
|
||||
/*
|
||||
* If mp_register_ioapic() is called during early boot stage when
|
||||
* walking ACPI/SFI/DT tables, it's too early to create irqdomain,
|
||||
* walking ACPI/DT tables, it's too early to create irqdomain,
|
||||
* we are still using bootmem allocator. So delay it to setup_IO_APIC().
|
||||
*/
|
||||
if (hotplug) {
|
||||
|
||||
@@ -29,7 +29,8 @@ static void x2apic_send_IPI(int cpu, int vector)
|
||||
{
|
||||
u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu);
|
||||
|
||||
x2apic_wrmsr_fence();
|
||||
/* x2apic MSRs are special and need a special fence: */
|
||||
weak_wrmsr_fence();
|
||||
__x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL);
|
||||
}
|
||||
|
||||
@@ -41,7 +42,8 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
|
||||
unsigned long flags;
|
||||
u32 dest;
|
||||
|
||||
x2apic_wrmsr_fence();
|
||||
/* x2apic MSRs are special and need a special fence: */
|
||||
weak_wrmsr_fence();
|
||||
local_irq_save(flags);
|
||||
|
||||
tmpmsk = this_cpu_cpumask_var_ptr(ipi_mask);
|
||||
|
||||
@@ -43,7 +43,8 @@ static void x2apic_send_IPI(int cpu, int vector)
|
||||
{
|
||||
u32 dest = per_cpu(x86_cpu_to_apicid, cpu);
|
||||
|
||||
x2apic_wrmsr_fence();
|
||||
/* x2apic MSRs are special and need a special fence: */
|
||||
weak_wrmsr_fence();
|
||||
__x2apic_send_IPI_dest(dest, vector, APIC_DEST_PHYSICAL);
|
||||
}
|
||||
|
||||
@@ -54,7 +55,8 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
|
||||
unsigned long this_cpu;
|
||||
unsigned long flags;
|
||||
|
||||
x2apic_wrmsr_fence();
|
||||
/* x2apic MSRs are special and need a special fence: */
|
||||
weak_wrmsr_fence();
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
@@ -125,7 +127,8 @@ void __x2apic_send_IPI_shorthand(int vector, u32 which)
|
||||
{
|
||||
unsigned long cfg = __prepare_ICR(which, vector, 0);
|
||||
|
||||
x2apic_wrmsr_fence();
|
||||
/* x2apic MSRs are special and need a special fence: */
|
||||
weak_wrmsr_fence();
|
||||
native_x2apic_icr_write(cfg, 0);
|
||||
}
|
||||
|
||||
|
||||
@@ -13,9 +13,6 @@ int main(void)
|
||||
{
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
#ifdef CONFIG_PARAVIRT_XXL
|
||||
OFFSET(PV_CPU_usergs_sysret64, paravirt_patch_template,
|
||||
cpu.usergs_sysret64);
|
||||
OFFSET(PV_CPU_swapgs, paravirt_patch_template, cpu.swapgs);
|
||||
#ifdef CONFIG_DEBUG_ENTRY
|
||||
OFFSET(PV_IRQ_save_fl, paravirt_patch_template, irq.save_fl);
|
||||
#endif
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
*/
|
||||
|
||||
#include <linux/interrupt.h>
|
||||
|
||||
#include <asm/acrn.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/desc.h>
|
||||
@@ -19,7 +21,7 @@
|
||||
|
||||
static u32 __init acrn_detect(void)
|
||||
{
|
||||
return hypervisor_cpuid_base("ACRNACRNACRN", 0);
|
||||
return acrn_cpuid_base();
|
||||
}
|
||||
|
||||
static void __init acrn_init_platform(void)
|
||||
@@ -55,6 +57,18 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_acrn_hv_callback)
|
||||
set_irq_regs(old_regs);
|
||||
}
|
||||
|
||||
void acrn_setup_intr_handler(void (*handler)(void))
|
||||
{
|
||||
acrn_intr_handler = handler;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(acrn_setup_intr_handler);
|
||||
|
||||
void acrn_remove_intr_handler(void)
|
||||
{
|
||||
acrn_intr_handler = NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(acrn_remove_intr_handler);
|
||||
|
||||
const __initconst struct hypervisor_x86 x86_hyper_acrn = {
|
||||
.name = "ACRN",
|
||||
.detect = acrn_detect,
|
||||
|
||||
@@ -542,12 +542,12 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
|
||||
u32 ecx;
|
||||
|
||||
ecx = cpuid_ecx(0x8000001e);
|
||||
nodes_per_socket = ((ecx >> 8) & 7) + 1;
|
||||
__max_die_per_package = nodes_per_socket = ((ecx >> 8) & 7) + 1;
|
||||
} else if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) {
|
||||
u64 value;
|
||||
|
||||
rdmsrl(MSR_FAM10H_NODE_ID, value);
|
||||
nodes_per_socket = ((value >> 3) & 7) + 1;
|
||||
__max_die_per_package = nodes_per_socket = ((value >> 3) & 7) + 1;
|
||||
}
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) &&
|
||||
|
||||
@@ -960,6 +960,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
|
||||
if (c->extended_cpuid_level >= 0x8000000a)
|
||||
c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
|
||||
|
||||
if (c->extended_cpuid_level >= 0x8000001f)
|
||||
c->x86_capability[CPUID_8000_001F_EAX] = cpuid_eax(0x8000001f);
|
||||
|
||||
init_scattered_cpuid_features(c);
|
||||
init_speculation_control(c);
|
||||
|
||||
@@ -1739,8 +1742,8 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
|
||||
&init_task;
|
||||
EXPORT_PER_CPU_SYMBOL(current_task);
|
||||
|
||||
DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
|
||||
DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
|
||||
DEFINE_PER_CPU(void *, hardirq_stack_ptr);
|
||||
DEFINE_PER_CPU(bool, hardirq_stack_inuse);
|
||||
|
||||
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
|
||||
EXPORT_PER_CPU_SYMBOL(__preempt_count);
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include <asm/traps.h>
|
||||
#include <asm/resctrl.h>
|
||||
#include <asm/numa.h>
|
||||
#include <asm/thermal.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#include <linux/topology.h>
|
||||
@@ -719,6 +720,8 @@ static void init_intel(struct cpuinfo_x86 *c)
|
||||
tsx_disable();
|
||||
|
||||
split_lock_init();
|
||||
|
||||
intel_init_thermal(c);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
@@ -1159,6 +1162,7 @@ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
|
||||
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, 1),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 1),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, 1),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 1),
|
||||
{}
|
||||
};
|
||||
|
||||
|
||||
@@ -9,8 +9,6 @@ obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
|
||||
mce-inject-y := inject.o
|
||||
obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o
|
||||
|
||||
obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
|
||||
|
||||
obj-$(CONFIG_ACPI_APEI) += apei.o
|
||||
|
||||
obj-$(CONFIG_X86_MCELOG_LEGACY) += dev-mcelog.o
|
||||
|
||||
@@ -877,6 +877,12 @@ static atomic_t mce_executing;
|
||||
*/
|
||||
static atomic_t mce_callin;
|
||||
|
||||
/*
|
||||
* Track which CPUs entered the MCA broadcast synchronization and which not in
|
||||
* order to print holdouts.
|
||||
*/
|
||||
static cpumask_t mce_missing_cpus = CPU_MASK_ALL;
|
||||
|
||||
/*
|
||||
* Check if a timeout waiting for other CPUs happened.
|
||||
*/
|
||||
@@ -894,8 +900,12 @@ static int mce_timed_out(u64 *t, const char *msg)
|
||||
if (!mca_cfg.monarch_timeout)
|
||||
goto out;
|
||||
if ((s64)*t < SPINUNIT) {
|
||||
if (mca_cfg.tolerant <= 1)
|
||||
if (mca_cfg.tolerant <= 1) {
|
||||
if (cpumask_and(&mce_missing_cpus, cpu_online_mask, &mce_missing_cpus))
|
||||
pr_emerg("CPUs not responding to MCE broadcast (may include false positives): %*pbl\n",
|
||||
cpumask_pr_args(&mce_missing_cpus));
|
||||
mce_panic(msg, NULL, NULL);
|
||||
}
|
||||
cpu_missing = 1;
|
||||
return 1;
|
||||
}
|
||||
@@ -1006,6 +1016,7 @@ static int mce_start(int *no_way_out)
|
||||
* is updated before mce_callin.
|
||||
*/
|
||||
order = atomic_inc_return(&mce_callin);
|
||||
cpumask_clear_cpu(smp_processor_id(), &mce_missing_cpus);
|
||||
|
||||
/*
|
||||
* Wait for everyone.
|
||||
@@ -1114,6 +1125,7 @@ static int mce_end(int order)
|
||||
reset:
|
||||
atomic_set(&global_nwo, 0);
|
||||
atomic_set(&mce_callin, 0);
|
||||
cpumask_setall(&mce_missing_cpus);
|
||||
barrier();
|
||||
|
||||
/*
|
||||
@@ -1992,10 +2004,9 @@ static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
|
||||
* that out because it's an indirect call. Annotate it.
|
||||
*/
|
||||
instrumentation_begin();
|
||||
trace_hardirqs_off_finish();
|
||||
|
||||
machine_check_vector(regs);
|
||||
if (regs->flags & X86_EFLAGS_IF)
|
||||
trace_hardirqs_on_prepare();
|
||||
|
||||
instrumentation_end();
|
||||
irqentry_nmi_exit(regs, irq_state);
|
||||
}
|
||||
@@ -2004,7 +2015,9 @@ static __always_inline void exc_machine_check_user(struct pt_regs *regs)
|
||||
{
|
||||
irqentry_enter_from_user_mode(regs);
|
||||
instrumentation_begin();
|
||||
|
||||
machine_check_vector(regs);
|
||||
|
||||
instrumentation_end();
|
||||
irqentry_exit_to_user_mode(regs);
|
||||
}
|
||||
@@ -2177,7 +2190,6 @@ __setup("mce", mcheck_enable);
|
||||
|
||||
int __init mcheck_init(void)
|
||||
{
|
||||
mcheck_intel_therm_init();
|
||||
mce_register_decode_chain(&early_nb);
|
||||
mce_register_decode_chain(&mce_uc_nb);
|
||||
mce_register_decode_chain(&mce_default_nb);
|
||||
@@ -2712,6 +2724,7 @@ static void mce_reset(void)
|
||||
atomic_set(&mce_executing, 0);
|
||||
atomic_set(&mce_callin, 0);
|
||||
atomic_set(&global_nwo, 0);
|
||||
cpumask_setall(&mce_missing_cpus);
|
||||
}
|
||||
|
||||
static int fake_panic_get(void *data, u64 *val)
|
||||
|
||||
@@ -531,7 +531,6 @@ static void intel_imc_init(struct cpuinfo_x86 *c)
|
||||
|
||||
void mce_intel_feature_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
intel_init_thermal(c);
|
||||
intel_init_cmci();
|
||||
intel_init_lmce();
|
||||
intel_ppin_init(c);
|
||||
|
||||
@@ -1,739 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Thermal throttle event support code (such as syslog messaging and rate
|
||||
* limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
|
||||
*
|
||||
* This allows consistent reporting of CPU thermal throttle events.
|
||||
*
|
||||
* Maintains a counter in /sys that keeps track of the number of thermal
|
||||
* events, such that the user knows how bad the thermal problem might be
|
||||
* (since the logging to syslog is rate limited).
|
||||
*
|
||||
* Author: Dmitriy Zavin (dmitriyz@google.com)
|
||||
*
|
||||
* Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
|
||||
* Inspired by Ross Biro's and Al Borchers' counter code.
|
||||
*/
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/cpu.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/traps.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/trace/irq_vectors.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
/* How long to wait between reporting thermal events */
|
||||
#define CHECK_INTERVAL (300 * HZ)
|
||||
|
||||
#define THERMAL_THROTTLING_EVENT 0
|
||||
#define POWER_LIMIT_EVENT 1
|
||||
|
||||
/**
|
||||
* struct _thermal_state - Represent the current thermal event state
|
||||
* @next_check: Stores the next timestamp, when it is allowed
|
||||
* to log the next warning message.
|
||||
* @last_interrupt_time: Stores the timestamp for the last threshold
|
||||
* high event.
|
||||
* @therm_work: Delayed workqueue structure
|
||||
* @count: Stores the current running count for thermal
|
||||
* or power threshold interrupts.
|
||||
* @last_count: Stores the previous running count for thermal
|
||||
* or power threshold interrupts.
|
||||
* @max_time_ms: This shows the maximum amount of time CPU was
|
||||
* in throttled state for a single thermal
|
||||
* threshold high to low state.
|
||||
* @total_time_ms: This is a cumulative time during which CPU was
|
||||
* in the throttled state.
|
||||
* @rate_control_active: Set when a throttling message is logged.
|
||||
* This is used for the purpose of rate-control.
|
||||
* @new_event: Stores the last high/low status of the
|
||||
* THERM_STATUS_PROCHOT or
|
||||
* THERM_STATUS_POWER_LIMIT.
|
||||
* @level: Stores whether this _thermal_state instance is
|
||||
* for a CORE level or for PACKAGE level.
|
||||
* @sample_index: Index for storing the next sample in the buffer
|
||||
* temp_samples[].
|
||||
* @sample_count: Total number of samples collected in the buffer
|
||||
* temp_samples[].
|
||||
* @average: The last moving average of temperature samples
|
||||
* @baseline_temp: Temperature at which thermal threshold high
|
||||
* interrupt was generated.
|
||||
* @temp_samples: Storage for temperature samples to calculate
|
||||
* moving average.
|
||||
*
|
||||
* This structure is used to represent data related to thermal state for a CPU.
|
||||
* There is a separate storage for core and package level for each CPU.
|
||||
*/
|
||||
struct _thermal_state {
|
||||
u64 next_check;
|
||||
u64 last_interrupt_time;
|
||||
struct delayed_work therm_work;
|
||||
unsigned long count;
|
||||
unsigned long last_count;
|
||||
unsigned long max_time_ms;
|
||||
unsigned long total_time_ms;
|
||||
bool rate_control_active;
|
||||
bool new_event;
|
||||
u8 level;
|
||||
u8 sample_index;
|
||||
u8 sample_count;
|
||||
u8 average;
|
||||
u8 baseline_temp;
|
||||
u8 temp_samples[3];
|
||||
};
|
||||
|
||||
struct thermal_state {
|
||||
struct _thermal_state core_throttle;
|
||||
struct _thermal_state core_power_limit;
|
||||
struct _thermal_state package_throttle;
|
||||
struct _thermal_state package_power_limit;
|
||||
struct _thermal_state core_thresh0;
|
||||
struct _thermal_state core_thresh1;
|
||||
struct _thermal_state pkg_thresh0;
|
||||
struct _thermal_state pkg_thresh1;
|
||||
};
|
||||
|
||||
/* Callback to handle core threshold interrupts */
|
||||
int (*platform_thermal_notify)(__u64 msr_val);
|
||||
EXPORT_SYMBOL(platform_thermal_notify);
|
||||
|
||||
/* Callback to handle core package threshold_interrupts */
|
||||
int (*platform_thermal_package_notify)(__u64 msr_val);
|
||||
EXPORT_SYMBOL_GPL(platform_thermal_package_notify);
|
||||
|
||||
/* Callback support of rate control, return true, if
|
||||
* callback has rate control */
|
||||
bool (*platform_thermal_package_rate_control)(void);
|
||||
EXPORT_SYMBOL_GPL(platform_thermal_package_rate_control);
|
||||
|
||||
|
||||
static DEFINE_PER_CPU(struct thermal_state, thermal_state);
|
||||
|
||||
static atomic_t therm_throt_en = ATOMIC_INIT(0);
|
||||
|
||||
static u32 lvtthmr_init __read_mostly;
|
||||
|
||||
#ifdef CONFIG_SYSFS
|
||||
#define define_therm_throt_device_one_ro(_name) \
|
||||
static DEVICE_ATTR(_name, 0444, \
|
||||
therm_throt_device_show_##_name, \
|
||||
NULL) \
|
||||
|
||||
#define define_therm_throt_device_show_func(event, name) \
|
||||
\
|
||||
static ssize_t therm_throt_device_show_##event##_##name( \
|
||||
struct device *dev, \
|
||||
struct device_attribute *attr, \
|
||||
char *buf) \
|
||||
{ \
|
||||
unsigned int cpu = dev->id; \
|
||||
ssize_t ret; \
|
||||
\
|
||||
preempt_disable(); /* CPU hotplug */ \
|
||||
if (cpu_online(cpu)) { \
|
||||
ret = sprintf(buf, "%lu\n", \
|
||||
per_cpu(thermal_state, cpu).event.name); \
|
||||
} else \
|
||||
ret = 0; \
|
||||
preempt_enable(); \
|
||||
\
|
||||
return ret; \
|
||||
}
|
||||
|
||||
define_therm_throt_device_show_func(core_throttle, count);
|
||||
define_therm_throt_device_one_ro(core_throttle_count);
|
||||
|
||||
define_therm_throt_device_show_func(core_power_limit, count);
|
||||
define_therm_throt_device_one_ro(core_power_limit_count);
|
||||
|
||||
define_therm_throt_device_show_func(package_throttle, count);
|
||||
define_therm_throt_device_one_ro(package_throttle_count);
|
||||
|
||||
define_therm_throt_device_show_func(package_power_limit, count);
|
||||
define_therm_throt_device_one_ro(package_power_limit_count);
|
||||
|
||||
define_therm_throt_device_show_func(core_throttle, max_time_ms);
|
||||
define_therm_throt_device_one_ro(core_throttle_max_time_ms);
|
||||
|
||||
define_therm_throt_device_show_func(package_throttle, max_time_ms);
|
||||
define_therm_throt_device_one_ro(package_throttle_max_time_ms);
|
||||
|
||||
define_therm_throt_device_show_func(core_throttle, total_time_ms);
|
||||
define_therm_throt_device_one_ro(core_throttle_total_time_ms);
|
||||
|
||||
define_therm_throt_device_show_func(package_throttle, total_time_ms);
|
||||
define_therm_throt_device_one_ro(package_throttle_total_time_ms);
|
||||
|
||||
static struct attribute *thermal_throttle_attrs[] = {
|
||||
&dev_attr_core_throttle_count.attr,
|
||||
&dev_attr_core_throttle_max_time_ms.attr,
|
||||
&dev_attr_core_throttle_total_time_ms.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static const struct attribute_group thermal_attr_group = {
|
||||
.attrs = thermal_throttle_attrs,
|
||||
.name = "thermal_throttle"
|
||||
};
|
||||
#endif /* CONFIG_SYSFS */
|
||||
|
||||
#define CORE_LEVEL 0
|
||||
#define PACKAGE_LEVEL 1
|
||||
|
||||
#define THERM_THROT_POLL_INTERVAL HZ
|
||||
#define THERM_STATUS_PROCHOT_LOG BIT(1)
|
||||
|
||||
#define THERM_STATUS_CLEAR_CORE_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(13) | BIT(15))
|
||||
#define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11))
|
||||
|
||||
static void clear_therm_status_log(int level)
|
||||
{
|
||||
int msr;
|
||||
u64 mask, msr_val;
|
||||
|
||||
if (level == CORE_LEVEL) {
|
||||
msr = MSR_IA32_THERM_STATUS;
|
||||
mask = THERM_STATUS_CLEAR_CORE_MASK;
|
||||
} else {
|
||||
msr = MSR_IA32_PACKAGE_THERM_STATUS;
|
||||
mask = THERM_STATUS_CLEAR_PKG_MASK;
|
||||
}
|
||||
|
||||
rdmsrl(msr, msr_val);
|
||||
msr_val &= mask;
|
||||
wrmsrl(msr, msr_val & ~THERM_STATUS_PROCHOT_LOG);
|
||||
}
|
||||
|
||||
static void get_therm_status(int level, bool *proc_hot, u8 *temp)
|
||||
{
|
||||
int msr;
|
||||
u64 msr_val;
|
||||
|
||||
if (level == CORE_LEVEL)
|
||||
msr = MSR_IA32_THERM_STATUS;
|
||||
else
|
||||
msr = MSR_IA32_PACKAGE_THERM_STATUS;
|
||||
|
||||
rdmsrl(msr, msr_val);
|
||||
if (msr_val & THERM_STATUS_PROCHOT_LOG)
|
||||
*proc_hot = true;
|
||||
else
|
||||
*proc_hot = false;
|
||||
|
||||
*temp = (msr_val >> 16) & 0x7F;
|
||||
}
|
||||
|
||||
static void __maybe_unused throttle_active_work(struct work_struct *work)
|
||||
{
|
||||
struct _thermal_state *state = container_of(to_delayed_work(work),
|
||||
struct _thermal_state, therm_work);
|
||||
unsigned int i, avg, this_cpu = smp_processor_id();
|
||||
u64 now = get_jiffies_64();
|
||||
bool hot;
|
||||
u8 temp;
|
||||
|
||||
get_therm_status(state->level, &hot, &temp);
|
||||
/* temperature value is offset from the max so lesser means hotter */
|
||||
if (!hot && temp > state->baseline_temp) {
|
||||
if (state->rate_control_active)
|
||||
pr_info("CPU%d: %s temperature/speed normal (total events = %lu)\n",
|
||||
this_cpu,
|
||||
state->level == CORE_LEVEL ? "Core" : "Package",
|
||||
state->count);
|
||||
|
||||
state->rate_control_active = false;
|
||||
return;
|
||||
}
|
||||
|
||||
if (time_before64(now, state->next_check) &&
|
||||
state->rate_control_active)
|
||||
goto re_arm;
|
||||
|
||||
state->next_check = now + CHECK_INTERVAL;
|
||||
|
||||
if (state->count != state->last_count) {
|
||||
/* There was one new thermal interrupt */
|
||||
state->last_count = state->count;
|
||||
state->average = 0;
|
||||
state->sample_count = 0;
|
||||
state->sample_index = 0;
|
||||
}
|
||||
|
||||
state->temp_samples[state->sample_index] = temp;
|
||||
state->sample_count++;
|
||||
state->sample_index = (state->sample_index + 1) % ARRAY_SIZE(state->temp_samples);
|
||||
if (state->sample_count < ARRAY_SIZE(state->temp_samples))
|
||||
goto re_arm;
|
||||
|
||||
avg = 0;
|
||||
for (i = 0; i < ARRAY_SIZE(state->temp_samples); ++i)
|
||||
avg += state->temp_samples[i];
|
||||
|
||||
avg /= ARRAY_SIZE(state->temp_samples);
|
||||
|
||||
if (state->average > avg) {
|
||||
pr_warn("CPU%d: %s temperature is above threshold, cpu clock is throttled (total events = %lu)\n",
|
||||
this_cpu,
|
||||
state->level == CORE_LEVEL ? "Core" : "Package",
|
||||
state->count);
|
||||
state->rate_control_active = true;
|
||||
}
|
||||
|
||||
state->average = avg;
|
||||
|
||||
re_arm:
|
||||
clear_therm_status_log(state->level);
|
||||
schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL);
|
||||
}
|
||||
|
||||
/***
|
||||
* therm_throt_process - Process thermal throttling event from interrupt
|
||||
* @curr: Whether the condition is current or not (boolean), since the
|
||||
* thermal interrupt normally gets called both when the thermal
|
||||
* event begins and once the event has ended.
|
||||
*
|
||||
* This function is called by the thermal interrupt after the
|
||||
* IRQ has been acknowledged.
|
||||
*
|
||||
* It will take care of rate limiting and printing messages to the syslog.
|
||||
*/
|
||||
static void therm_throt_process(bool new_event, int event, int level)
|
||||
{
|
||||
struct _thermal_state *state;
|
||||
unsigned int this_cpu = smp_processor_id();
|
||||
bool old_event;
|
||||
u64 now;
|
||||
struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
|
||||
|
||||
now = get_jiffies_64();
|
||||
if (level == CORE_LEVEL) {
|
||||
if (event == THERMAL_THROTTLING_EVENT)
|
||||
state = &pstate->core_throttle;
|
||||
else if (event == POWER_LIMIT_EVENT)
|
||||
state = &pstate->core_power_limit;
|
||||
else
|
||||
return;
|
||||
} else if (level == PACKAGE_LEVEL) {
|
||||
if (event == THERMAL_THROTTLING_EVENT)
|
||||
state = &pstate->package_throttle;
|
||||
else if (event == POWER_LIMIT_EVENT)
|
||||
state = &pstate->package_power_limit;
|
||||
else
|
||||
return;
|
||||
} else
|
||||
return;
|
||||
|
||||
old_event = state->new_event;
|
||||
state->new_event = new_event;
|
||||
|
||||
if (new_event)
|
||||
state->count++;
|
||||
|
||||
if (event != THERMAL_THROTTLING_EVENT)
|
||||
return;
|
||||
|
||||
if (new_event && !state->last_interrupt_time) {
|
||||
bool hot;
|
||||
u8 temp;
|
||||
|
||||
get_therm_status(state->level, &hot, &temp);
|
||||
/*
|
||||
* Ignore short temperature spike as the system is not close
|
||||
* to PROCHOT. 10C offset is large enough to ignore. It is
|
||||
* already dropped from the high threshold temperature.
|
||||
*/
|
||||
if (temp > 10)
|
||||
return;
|
||||
|
||||
state->baseline_temp = temp;
|
||||
state->last_interrupt_time = now;
|
||||
schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL);
|
||||
} else if (old_event && state->last_interrupt_time) {
|
||||
unsigned long throttle_time;
|
||||
|
||||
throttle_time = jiffies_delta_to_msecs(now - state->last_interrupt_time);
|
||||
if (throttle_time > state->max_time_ms)
|
||||
state->max_time_ms = throttle_time;
|
||||
state->total_time_ms += throttle_time;
|
||||
state->last_interrupt_time = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int thresh_event_valid(int level, int event)
|
||||
{
|
||||
struct _thermal_state *state;
|
||||
unsigned int this_cpu = smp_processor_id();
|
||||
struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
|
||||
u64 now = get_jiffies_64();
|
||||
|
||||
if (level == PACKAGE_LEVEL)
|
||||
state = (event == 0) ? &pstate->pkg_thresh0 :
|
||||
&pstate->pkg_thresh1;
|
||||
else
|
||||
state = (event == 0) ? &pstate->core_thresh0 :
|
||||
&pstate->core_thresh1;
|
||||
|
||||
if (time_before64(now, state->next_check))
|
||||
return 0;
|
||||
|
||||
state->next_check = now + CHECK_INTERVAL;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static bool int_pln_enable;
|
||||
static int __init int_pln_enable_setup(char *s)
|
||||
{
|
||||
int_pln_enable = true;
|
||||
|
||||
return 1;
|
||||
}
|
||||
__setup("int_pln_enable", int_pln_enable_setup);
|
||||
|
||||
#ifdef CONFIG_SYSFS
|
||||
/* Add/Remove thermal_throttle interface for CPU device: */
|
||||
static int thermal_throttle_add_dev(struct device *dev, unsigned int cpu)
|
||||
{
|
||||
int err;
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||
|
||||
err = sysfs_create_group(&dev->kobj, &thermal_attr_group);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) {
|
||||
err = sysfs_add_file_to_group(&dev->kobj,
|
||||
&dev_attr_core_power_limit_count.attr,
|
||||
thermal_attr_group.name);
|
||||
if (err)
|
||||
goto del_group;
|
||||
}
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_PTS)) {
|
||||
err = sysfs_add_file_to_group(&dev->kobj,
|
||||
&dev_attr_package_throttle_count.attr,
|
||||
thermal_attr_group.name);
|
||||
if (err)
|
||||
goto del_group;
|
||||
|
||||
err = sysfs_add_file_to_group(&dev->kobj,
|
||||
&dev_attr_package_throttle_max_time_ms.attr,
|
||||
thermal_attr_group.name);
|
||||
if (err)
|
||||
goto del_group;
|
||||
|
||||
err = sysfs_add_file_to_group(&dev->kobj,
|
||||
&dev_attr_package_throttle_total_time_ms.attr,
|
||||
thermal_attr_group.name);
|
||||
if (err)
|
||||
goto del_group;
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) {
|
||||
err = sysfs_add_file_to_group(&dev->kobj,
|
||||
&dev_attr_package_power_limit_count.attr,
|
||||
thermal_attr_group.name);
|
||||
if (err)
|
||||
goto del_group;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
del_group:
|
||||
sysfs_remove_group(&dev->kobj, &thermal_attr_group);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void thermal_throttle_remove_dev(struct device *dev)
|
||||
{
|
||||
sysfs_remove_group(&dev->kobj, &thermal_attr_group);
|
||||
}
|
||||
|
||||
/* Get notified when a cpu comes on/off. Be hotplug friendly. */
|
||||
static int thermal_throttle_online(unsigned int cpu)
|
||||
{
|
||||
struct thermal_state *state = &per_cpu(thermal_state, cpu);
|
||||
struct device *dev = get_cpu_device(cpu);
|
||||
u32 l;
|
||||
|
||||
state->package_throttle.level = PACKAGE_LEVEL;
|
||||
state->core_throttle.level = CORE_LEVEL;
|
||||
|
||||
INIT_DELAYED_WORK(&state->package_throttle.therm_work, throttle_active_work);
|
||||
INIT_DELAYED_WORK(&state->core_throttle.therm_work, throttle_active_work);
|
||||
|
||||
/* Unmask the thermal vector after the above workqueues are initialized. */
|
||||
l = apic_read(APIC_LVTTHMR);
|
||||
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
|
||||
|
||||
return thermal_throttle_add_dev(dev, cpu);
|
||||
}
|
||||
|
||||
static int thermal_throttle_offline(unsigned int cpu)
|
||||
{
|
||||
struct thermal_state *state = &per_cpu(thermal_state, cpu);
|
||||
struct device *dev = get_cpu_device(cpu);
|
||||
u32 l;
|
||||
|
||||
/* Mask the thermal vector before draining evtl. pending work */
|
||||
l = apic_read(APIC_LVTTHMR);
|
||||
apic_write(APIC_LVTTHMR, l | APIC_LVT_MASKED);
|
||||
|
||||
cancel_delayed_work_sync(&state->package_throttle.therm_work);
|
||||
cancel_delayed_work_sync(&state->core_throttle.therm_work);
|
||||
|
||||
state->package_throttle.rate_control_active = false;
|
||||
state->core_throttle.rate_control_active = false;
|
||||
|
||||
thermal_throttle_remove_dev(dev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __init int thermal_throttle_init_device(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!atomic_read(&therm_throt_en))
|
||||
return 0;
|
||||
|
||||
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/therm:online",
|
||||
thermal_throttle_online,
|
||||
thermal_throttle_offline);
|
||||
return ret < 0 ? ret : 0;
|
||||
}
|
||||
device_initcall(thermal_throttle_init_device);
|
||||
|
||||
#endif /* CONFIG_SYSFS */
|
||||
|
||||
static void notify_package_thresholds(__u64 msr_val)
|
||||
{
|
||||
bool notify_thres_0 = false;
|
||||
bool notify_thres_1 = false;
|
||||
|
||||
if (!platform_thermal_package_notify)
|
||||
return;
|
||||
|
||||
/* lower threshold check */
|
||||
if (msr_val & THERM_LOG_THRESHOLD0)
|
||||
notify_thres_0 = true;
|
||||
/* higher threshold check */
|
||||
if (msr_val & THERM_LOG_THRESHOLD1)
|
||||
notify_thres_1 = true;
|
||||
|
||||
if (!notify_thres_0 && !notify_thres_1)
|
||||
return;
|
||||
|
||||
if (platform_thermal_package_rate_control &&
|
||||
platform_thermal_package_rate_control()) {
|
||||
/* Rate control is implemented in callback */
|
||||
platform_thermal_package_notify(msr_val);
|
||||
return;
|
||||
}
|
||||
|
||||
/* lower threshold reached */
|
||||
if (notify_thres_0 && thresh_event_valid(PACKAGE_LEVEL, 0))
|
||||
platform_thermal_package_notify(msr_val);
|
||||
/* higher threshold reached */
|
||||
if (notify_thres_1 && thresh_event_valid(PACKAGE_LEVEL, 1))
|
||||
platform_thermal_package_notify(msr_val);
|
||||
}
|
||||
|
||||
static void notify_thresholds(__u64 msr_val)
|
||||
{
|
||||
/* check whether the interrupt handler is defined;
|
||||
* otherwise simply return
|
||||
*/
|
||||
if (!platform_thermal_notify)
|
||||
return;
|
||||
|
||||
/* lower threshold reached */
|
||||
if ((msr_val & THERM_LOG_THRESHOLD0) &&
|
||||
thresh_event_valid(CORE_LEVEL, 0))
|
||||
platform_thermal_notify(msr_val);
|
||||
/* higher threshold reached */
|
||||
if ((msr_val & THERM_LOG_THRESHOLD1) &&
|
||||
thresh_event_valid(CORE_LEVEL, 1))
|
||||
platform_thermal_notify(msr_val);
|
||||
}
|
||||
|
||||
/* Thermal transition interrupt handler */
|
||||
static void intel_thermal_interrupt(void)
|
||||
{
|
||||
__u64 msr_val;
|
||||
|
||||
if (static_cpu_has(X86_FEATURE_HWP))
|
||||
wrmsrl_safe(MSR_HWP_STATUS, 0);
|
||||
|
||||
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
|
||||
|
||||
/* Check for violation of core thermal thresholds*/
|
||||
notify_thresholds(msr_val);
|
||||
|
||||
therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
|
||||
THERMAL_THROTTLING_EVENT,
|
||||
CORE_LEVEL);
|
||||
|
||||
if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
|
||||
therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
|
||||
POWER_LIMIT_EVENT,
|
||||
CORE_LEVEL);
|
||||
|
||||
if (this_cpu_has(X86_FEATURE_PTS)) {
|
||||
rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
|
||||
/* check violations of package thermal thresholds */
|
||||
notify_package_thresholds(msr_val);
|
||||
therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
|
||||
THERMAL_THROTTLING_EVENT,
|
||||
PACKAGE_LEVEL);
|
||||
if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
|
||||
therm_throt_process(msr_val &
|
||||
PACKAGE_THERM_STATUS_POWER_LIMIT,
|
||||
POWER_LIMIT_EVENT,
|
||||
PACKAGE_LEVEL);
|
||||
}
|
||||
}
|
||||
|
||||
static void unexpected_thermal_interrupt(void)
|
||||
{
|
||||
pr_err("CPU%d: Unexpected LVT thermal interrupt!\n",
|
||||
smp_processor_id());
|
||||
}
|
||||
|
||||
static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
|
||||
|
||||
DEFINE_IDTENTRY_SYSVEC(sysvec_thermal)
|
||||
{
|
||||
trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
|
||||
inc_irq_stat(irq_thermal_count);
|
||||
smp_thermal_vector();
|
||||
trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
|
||||
ack_APIC_irq();
|
||||
}
|
||||
|
||||
/* Thermal monitoring depends on APIC, ACPI and clock modulation */
|
||||
static int intel_thermal_supported(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (!boot_cpu_has(X86_FEATURE_APIC))
|
||||
return 0;
|
||||
if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
void __init mcheck_intel_therm_init(void)
|
||||
{
|
||||
/*
|
||||
* This function is only called on boot CPU. Save the init thermal
|
||||
* LVT value on BSP and use that value to restore APs' thermal LVT
|
||||
* entry BIOS programmed later
|
||||
*/
|
||||
if (intel_thermal_supported(&boot_cpu_data))
|
||||
lvtthmr_init = apic_read(APIC_LVTTHMR);
|
||||
}
|
||||
|
||||
void intel_init_thermal(struct cpuinfo_x86 *c)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
int tm2 = 0;
|
||||
u32 l, h;
|
||||
|
||||
if (!intel_thermal_supported(c))
|
||||
return;
|
||||
|
||||
/*
|
||||
* First check if its enabled already, in which case there might
|
||||
* be some SMM goo which handles it, so we can't even put a handler
|
||||
* since it might be delivered via SMI already:
|
||||
*/
|
||||
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
|
||||
|
||||
h = lvtthmr_init;
|
||||
/*
|
||||
* The initial value of thermal LVT entries on all APs always reads
|
||||
* 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
|
||||
* sequence to them and LVT registers are reset to 0s except for
|
||||
* the mask bits which are set to 1s when APs receive INIT IPI.
|
||||
* If BIOS takes over the thermal interrupt and sets its interrupt
|
||||
* delivery mode to SMI (not fixed), it restores the value that the
|
||||
* BIOS has programmed on AP based on BSP's info we saved since BIOS
|
||||
* is always setting the same value for all threads/cores.
|
||||
*/
|
||||
if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
|
||||
apic_write(APIC_LVTTHMR, lvtthmr_init);
|
||||
|
||||
|
||||
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
|
||||
if (system_state == SYSTEM_BOOTING)
|
||||
pr_debug("CPU%d: Thermal monitoring handled by SMI\n", cpu);
|
||||
return;
|
||||
}
|
||||
|
||||
/* early Pentium M models use different method for enabling TM2 */
|
||||
if (cpu_has(c, X86_FEATURE_TM2)) {
|
||||
if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) {
|
||||
rdmsr(MSR_THERM2_CTL, l, h);
|
||||
if (l & MSR_THERM2_CTL_TM_SELECT)
|
||||
tm2 = 1;
|
||||
} else if (l & MSR_IA32_MISC_ENABLE_TM2)
|
||||
tm2 = 1;
|
||||
}
|
||||
|
||||
/* We'll mask the thermal vector in the lapic till we're ready: */
|
||||
h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
|
||||
apic_write(APIC_LVTTHMR, h);
|
||||
|
||||
rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
|
||||
if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
|
||||
wrmsr(MSR_IA32_THERM_INTERRUPT,
|
||||
(l | (THERM_INT_LOW_ENABLE
|
||||
| THERM_INT_HIGH_ENABLE)) & ~THERM_INT_PLN_ENABLE, h);
|
||||
else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
|
||||
wrmsr(MSR_IA32_THERM_INTERRUPT,
|
||||
l | (THERM_INT_LOW_ENABLE
|
||||
| THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h);
|
||||
else
|
||||
wrmsr(MSR_IA32_THERM_INTERRUPT,
|
||||
l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_PTS)) {
|
||||
rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
|
||||
if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
|
||||
wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
|
||||
(l | (PACKAGE_THERM_INT_LOW_ENABLE
|
||||
| PACKAGE_THERM_INT_HIGH_ENABLE))
|
||||
& ~PACKAGE_THERM_INT_PLN_ENABLE, h);
|
||||
else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
|
||||
wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
|
||||
l | (PACKAGE_THERM_INT_LOW_ENABLE
|
||||
| PACKAGE_THERM_INT_HIGH_ENABLE
|
||||
| PACKAGE_THERM_INT_PLN_ENABLE), h);
|
||||
else
|
||||
wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
|
||||
l | (PACKAGE_THERM_INT_LOW_ENABLE
|
||||
| PACKAGE_THERM_INT_HIGH_ENABLE), h);
|
||||
}
|
||||
|
||||
smp_thermal_vector = intel_thermal_interrupt;
|
||||
|
||||
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
|
||||
wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
|
||||
|
||||
pr_info_once("CPU0: Thermal monitoring enabled (%s)\n",
|
||||
tm2 ? "TM2" : "TM1");
|
||||
|
||||
/* enable thermal throttle processing */
|
||||
atomic_set(&therm_throt_en, 1);
|
||||
}
|
||||
@@ -830,7 +830,7 @@ static const struct attribute_group cpu_root_microcode_group = {
|
||||
.attrs = cpu_root_microcode_attrs,
|
||||
};
|
||||
|
||||
int __init microcode_init(void)
|
||||
static int __init microcode_init(void)
|
||||
{
|
||||
struct cpuinfo_x86 *c = &boot_cpu_data;
|
||||
int error;
|
||||
|
||||
@@ -31,6 +31,11 @@
|
||||
#include <asm/reboot.h>
|
||||
#include <asm/nmi.h>
|
||||
#include <clocksource/hyperv_timer.h>
|
||||
#include <asm/numa.h>
|
||||
|
||||
/* Is Linux running as the root partition? */
|
||||
bool hv_root_partition;
|
||||
EXPORT_SYMBOL_GPL(hv_root_partition);
|
||||
|
||||
struct ms_hyperv_info ms_hyperv;
|
||||
EXPORT_SYMBOL_GPL(ms_hyperv);
|
||||
@@ -135,14 +140,32 @@ static void hv_machine_shutdown(void)
|
||||
{
|
||||
if (kexec_in_progress && hv_kexec_handler)
|
||||
hv_kexec_handler();
|
||||
|
||||
/*
|
||||
* Call hv_cpu_die() on all the CPUs, otherwise later the hypervisor
|
||||
* corrupts the old VP Assist Pages and can crash the kexec kernel.
|
||||
*/
|
||||
if (kexec_in_progress && hyperv_init_cpuhp > 0)
|
||||
cpuhp_remove_state(hyperv_init_cpuhp);
|
||||
|
||||
/* The function calls stop_other_cpus(). */
|
||||
native_machine_shutdown();
|
||||
|
||||
/* Disable the hypercall page when there is only 1 active CPU. */
|
||||
if (kexec_in_progress)
|
||||
hyperv_cleanup();
|
||||
}
|
||||
|
||||
static void hv_machine_crash_shutdown(struct pt_regs *regs)
|
||||
{
|
||||
if (hv_crash_handler)
|
||||
hv_crash_handler(regs);
|
||||
|
||||
/* The function calls crash_smp_send_stop(). */
|
||||
native_machine_crash_shutdown(regs);
|
||||
|
||||
/* Disable the hypercall page when there is only 1 active CPU. */
|
||||
hyperv_cleanup();
|
||||
}
|
||||
#endif /* CONFIG_KEXEC_CORE */
|
||||
#endif /* CONFIG_HYPERV */
|
||||
@@ -208,6 +231,32 @@ static void __init hv_smp_prepare_boot_cpu(void)
|
||||
hv_init_spinlocks();
|
||||
#endif
|
||||
}
|
||||
|
||||
static void __init hv_smp_prepare_cpus(unsigned int max_cpus)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
int i;
|
||||
int ret;
|
||||
#endif
|
||||
|
||||
native_smp_prepare_cpus(max_cpus);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
for_each_present_cpu(i) {
|
||||
if (i == 0)
|
||||
continue;
|
||||
ret = hv_call_add_logical_proc(numa_cpu_node(i), i, cpu_physical_id(i));
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
for_each_present_cpu(i) {
|
||||
if (i == 0)
|
||||
continue;
|
||||
ret = hv_call_create_vp(numa_cpu_node(i), hv_current_partition_id, i, i);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
static void __init ms_hyperv_init_platform(void)
|
||||
@@ -225,6 +274,7 @@ static void __init ms_hyperv_init_platform(void)
|
||||
* Extract the features and hints
|
||||
*/
|
||||
ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES);
|
||||
ms_hyperv.features_b = cpuid_ebx(HYPERV_CPUID_FEATURES);
|
||||
ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
|
||||
ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
|
||||
|
||||
@@ -237,6 +287,22 @@ static void __init ms_hyperv_init_platform(void)
|
||||
pr_debug("Hyper-V: max %u virtual processors, %u logical processors\n",
|
||||
ms_hyperv.max_vp_index, ms_hyperv.max_lp_index);
|
||||
|
||||
/*
|
||||
* Check CPU management privilege.
|
||||
*
|
||||
* To mirror what Windows does we should extract CPU management
|
||||
* features and use the ReservedIdentityBit to detect if Linux is the
|
||||
* root partition. But that requires negotiating CPU management
|
||||
* interface (a process to be finalized).
|
||||
*
|
||||
* For now, use the privilege flag as the indicator for running as
|
||||
* root.
|
||||
*/
|
||||
if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_CPU_MANAGEMENT) {
|
||||
hv_root_partition = true;
|
||||
pr_info("Hyper-V: running as root partition\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* Extract host information.
|
||||
*/
|
||||
@@ -259,6 +325,14 @@ static void __init ms_hyperv_init_platform(void)
|
||||
x86_platform.calibrate_cpu = hv_get_tsc_khz;
|
||||
}
|
||||
|
||||
if (ms_hyperv.features_b & HV_ISOLATION) {
|
||||
ms_hyperv.isolation_config_a = cpuid_eax(HYPERV_CPUID_ISOLATION_CONFIG);
|
||||
ms_hyperv.isolation_config_b = cpuid_ebx(HYPERV_CPUID_ISOLATION_CONFIG);
|
||||
|
||||
pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n",
|
||||
ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b);
|
||||
}
|
||||
|
||||
if (ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED) {
|
||||
ms_hyperv.nested_features =
|
||||
cpuid_eax(HYPERV_CPUID_NESTED_FEATURES);
|
||||
@@ -348,6 +422,8 @@ static void __init ms_hyperv_init_platform(void)
|
||||
|
||||
# ifdef CONFIG_SMP
|
||||
smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu;
|
||||
if (hv_root_partition)
|
||||
smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus;
|
||||
# endif
|
||||
|
||||
/*
|
||||
|
||||
@@ -537,9 +537,9 @@ static void __init print_out_mtrr_range_state(void)
|
||||
if (!size_base)
|
||||
continue;
|
||||
|
||||
size_base = to_size_factor(size_base, &size_factor),
|
||||
size_base = to_size_factor(size_base, &size_factor);
|
||||
start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
|
||||
start_base = to_size_factor(start_base, &start_factor),
|
||||
start_base = to_size_factor(start_base, &start_factor);
|
||||
type = range_state[i].type;
|
||||
|
||||
pr_debug("reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong
|
||||
* because MTRRs can span up to 40 bits (36bits on most modern x86)
|
||||
*/
|
||||
#define DEBUG
|
||||
|
||||
#include <linux/export.h>
|
||||
#include <linux/init.h>
|
||||
@@ -167,9 +166,6 @@ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end,
|
||||
*repeat = 0;
|
||||
*uniform = 1;
|
||||
|
||||
/* Make end inclusive instead of exclusive */
|
||||
end--;
|
||||
|
||||
prev_match = MTRR_TYPE_INVALID;
|
||||
for (i = 0; i < num_var_ranges; ++i) {
|
||||
unsigned short start_state, end_state, inclusive;
|
||||
@@ -261,6 +257,9 @@ u8 mtrr_type_lookup(u64 start, u64 end, u8 *uniform)
|
||||
int repeat;
|
||||
u64 partial_end;
|
||||
|
||||
/* Make end inclusive instead of exclusive */
|
||||
end--;
|
||||
|
||||
if (!mtrr_state_set)
|
||||
return MTRR_TYPE_INVALID;
|
||||
|
||||
|
||||
@@ -31,8 +31,6 @@
|
||||
System Programming Guide; Section 9.11. (1997 edition - PPro).
|
||||
*/
|
||||
|
||||
#define DEBUG
|
||||
|
||||
#include <linux/types.h> /* FIXME: kvm_para.h needs this */
|
||||
|
||||
#include <linux/stop_machine.h>
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
* local apic based NMI watchdog for various CPUs.
|
||||
*
|
||||
* This file also handles reservation of performance counters for coordination
|
||||
* with other users (like oprofile).
|
||||
* with other users.
|
||||
*
|
||||
* Note that these events normally don't tick when the CPU idles. This means
|
||||
* the frequency varies with CPU load.
|
||||
@@ -105,15 +105,6 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
|
||||
|
||||
}
|
||||
|
||||
/* checks for a bit availability (hack for oprofile) */
|
||||
int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
|
||||
{
|
||||
BUG_ON(counter > NMI_MAX_COUNTER_BITS);
|
||||
|
||||
return !test_bit(counter, perfctr_nmi_owner);
|
||||
}
|
||||
EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
|
||||
|
||||
int reserve_perfctr_nmi(unsigned int msr)
|
||||
{
|
||||
unsigned int counter;
|
||||
|
||||
@@ -572,6 +572,7 @@ union cpuid_0x10_x_edx {
|
||||
|
||||
void rdt_last_cmd_clear(void);
|
||||
void rdt_last_cmd_puts(const char *s);
|
||||
__printf(1, 2)
|
||||
void rdt_last_cmd_printf(const char *fmt, ...);
|
||||
|
||||
void rdt_ctrl_update(void *arg);
|
||||
|
||||
@@ -525,89 +525,70 @@ static void rdtgroup_remove(struct rdtgroup *rdtgrp)
|
||||
kfree(rdtgrp);
|
||||
}
|
||||
|
||||
struct task_move_callback {
|
||||
struct callback_head work;
|
||||
struct rdtgroup *rdtgrp;
|
||||
};
|
||||
|
||||
static void move_myself(struct callback_head *head)
|
||||
static void _update_task_closid_rmid(void *task)
|
||||
{
|
||||
struct task_move_callback *callback;
|
||||
struct rdtgroup *rdtgrp;
|
||||
|
||||
callback = container_of(head, struct task_move_callback, work);
|
||||
rdtgrp = callback->rdtgrp;
|
||||
|
||||
/*
|
||||
* If resource group was deleted before this task work callback
|
||||
* was invoked, then assign the task to root group and free the
|
||||
* resource group.
|
||||
* If the task is still current on this CPU, update PQR_ASSOC MSR.
|
||||
* Otherwise, the MSR is updated when the task is scheduled in.
|
||||
*/
|
||||
if (atomic_dec_and_test(&rdtgrp->waitcount) &&
|
||||
(rdtgrp->flags & RDT_DELETED)) {
|
||||
current->closid = 0;
|
||||
current->rmid = 0;
|
||||
rdtgroup_remove(rdtgrp);
|
||||
}
|
||||
if (task == current)
|
||||
resctrl_sched_in();
|
||||
}
|
||||
|
||||
if (unlikely(current->flags & PF_EXITING))
|
||||
goto out;
|
||||
|
||||
preempt_disable();
|
||||
/* update PQR_ASSOC MSR to make resource group go into effect */
|
||||
resctrl_sched_in();
|
||||
preempt_enable();
|
||||
|
||||
out:
|
||||
kfree(callback);
|
||||
static void update_task_closid_rmid(struct task_struct *t)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_SMP) && task_curr(t))
|
||||
smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1);
|
||||
else
|
||||
_update_task_closid_rmid(t);
|
||||
}
|
||||
|
||||
static int __rdtgroup_move_task(struct task_struct *tsk,
|
||||
struct rdtgroup *rdtgrp)
|
||||
{
|
||||
struct task_move_callback *callback;
|
||||
int ret;
|
||||
|
||||
callback = kzalloc(sizeof(*callback), GFP_KERNEL);
|
||||
if (!callback)
|
||||
return -ENOMEM;
|
||||
callback->work.func = move_myself;
|
||||
callback->rdtgrp = rdtgrp;
|
||||
/* If the task is already in rdtgrp, no need to move the task. */
|
||||
if ((rdtgrp->type == RDTCTRL_GROUP && tsk->closid == rdtgrp->closid &&
|
||||
tsk->rmid == rdtgrp->mon.rmid) ||
|
||||
(rdtgrp->type == RDTMON_GROUP && tsk->rmid == rdtgrp->mon.rmid &&
|
||||
tsk->closid == rdtgrp->mon.parent->closid))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Take a refcount, so rdtgrp cannot be freed before the
|
||||
* callback has been invoked.
|
||||
* Set the task's closid/rmid before the PQR_ASSOC MSR can be
|
||||
* updated by them.
|
||||
*
|
||||
* For ctrl_mon groups, move both closid and rmid.
|
||||
* For monitor groups, can move the tasks only from
|
||||
* their parent CTRL group.
|
||||
*/
|
||||
atomic_inc(&rdtgrp->waitcount);
|
||||
ret = task_work_add(tsk, &callback->work, TWA_RESUME);
|
||||
if (ret) {
|
||||
/*
|
||||
* Task is exiting. Drop the refcount and free the callback.
|
||||
* No need to check the refcount as the group cannot be
|
||||
* deleted before the write function unlocks rdtgroup_mutex.
|
||||
*/
|
||||
atomic_dec(&rdtgrp->waitcount);
|
||||
kfree(callback);
|
||||
rdt_last_cmd_puts("Task exited\n");
|
||||
} else {
|
||||
/*
|
||||
* For ctrl_mon groups move both closid and rmid.
|
||||
* For monitor groups, can move the tasks only from
|
||||
* their parent CTRL group.
|
||||
*/
|
||||
if (rdtgrp->type == RDTCTRL_GROUP) {
|
||||
tsk->closid = rdtgrp->closid;
|
||||
tsk->rmid = rdtgrp->mon.rmid;
|
||||
} else if (rdtgrp->type == RDTMON_GROUP) {
|
||||
if (rdtgrp->mon.parent->closid == tsk->closid) {
|
||||
tsk->rmid = rdtgrp->mon.rmid;
|
||||
} else {
|
||||
rdt_last_cmd_puts("Can't move task to different control group\n");
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
if (rdtgrp->type == RDTCTRL_GROUP) {
|
||||
WRITE_ONCE(tsk->closid, rdtgrp->closid);
|
||||
WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid);
|
||||
} else if (rdtgrp->type == RDTMON_GROUP) {
|
||||
if (rdtgrp->mon.parent->closid == tsk->closid) {
|
||||
WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid);
|
||||
} else {
|
||||
rdt_last_cmd_puts("Can't move task to different control group\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Ensure the task's closid and rmid are written before determining if
|
||||
* the task is current that will decide if it will be interrupted.
|
||||
*/
|
||||
barrier();
|
||||
|
||||
/*
|
||||
* By now, the task's closid and rmid are set. If the task is current
|
||||
* on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource
|
||||
* group go into effect. If the task is not current, the MSR will be
|
||||
* updated when the task is scheduled in.
|
||||
*/
|
||||
update_task_closid_rmid(tsk);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
|
||||
@@ -2329,22 +2310,18 @@ static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
|
||||
for_each_process_thread(p, t) {
|
||||
if (!from || is_closid_match(t, from) ||
|
||||
is_rmid_match(t, from)) {
|
||||
t->closid = to->closid;
|
||||
t->rmid = to->mon.rmid;
|
||||
WRITE_ONCE(t->closid, to->closid);
|
||||
WRITE_ONCE(t->rmid, to->mon.rmid);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* This is safe on x86 w/o barriers as the ordering
|
||||
* of writing to task_cpu() and t->on_cpu is
|
||||
* reverse to the reading here. The detection is
|
||||
* inaccurate as tasks might move or schedule
|
||||
* before the smp function call takes place. In
|
||||
* such a case the function call is pointless, but
|
||||
* If the task is on a CPU, set the CPU in the mask.
|
||||
* The detection is inaccurate as tasks might move or
|
||||
* schedule before the smp function call takes place.
|
||||
* In such a case the function call is pointless, but
|
||||
* there is no other side effect.
|
||||
*/
|
||||
if (mask && t->on_cpu)
|
||||
if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t))
|
||||
cpumask_set_cpu(task_cpu(t), mask);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
read_unlock(&tasklist_lock);
|
||||
|
||||
@@ -40,11 +40,6 @@ static const struct cpuid_bit cpuid_bits[] = {
|
||||
{ X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
|
||||
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
|
||||
{ X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 },
|
||||
{ X86_FEATURE_SME, CPUID_EAX, 0, 0x8000001f, 0 },
|
||||
{ X86_FEATURE_SEV, CPUID_EAX, 1, 0x8000001f, 0 },
|
||||
{ X86_FEATURE_SEV_ES, CPUID_EAX, 3, 0x8000001f, 0 },
|
||||
{ X86_FEATURE_SME_COHERENT, CPUID_EAX, 10, 0x8000001f, 0 },
|
||||
{ X86_FEATURE_VM_PAGE_FLUSH, CPUID_EAX, 2, 0x8000001f, 0 },
|
||||
{ 0, 0, 0, 0, 0 }
|
||||
};
|
||||
|
||||
|
||||
@@ -72,6 +72,9 @@ static int sgx_release(struct inode *inode, struct file *file)
|
||||
synchronize_srcu(&encl->srcu);
|
||||
mmu_notifier_unregister(&encl_mm->mmu_notifier, encl_mm->mm);
|
||||
kfree(encl_mm);
|
||||
|
||||
/* 'encl_mm' is gone, put encl_mm->encl reference: */
|
||||
kref_put(&encl->refcount, sgx_encl_release);
|
||||
}
|
||||
|
||||
kref_put(&encl->refcount, sgx_encl_release);
|
||||
|
||||
@@ -141,7 +141,6 @@ static vm_fault_t sgx_vma_fault(struct vm_fault *vmf)
|
||||
struct sgx_encl_page *entry;
|
||||
unsigned long phys_addr;
|
||||
struct sgx_encl *encl;
|
||||
unsigned long pfn;
|
||||
vm_fault_t ret;
|
||||
|
||||
encl = vma->vm_private_data;
|
||||
@@ -168,13 +167,6 @@ static vm_fault_t sgx_vma_fault(struct vm_fault *vmf)
|
||||
|
||||
phys_addr = sgx_get_epc_phys_addr(entry->epc_page);
|
||||
|
||||
/* Check if another thread got here first to insert the PTE. */
|
||||
if (!follow_pfn(vma, addr, &pfn)) {
|
||||
mutex_unlock(&encl->lock);
|
||||
|
||||
return VM_FAULT_NOPAGE;
|
||||
}
|
||||
|
||||
ret = vmf_insert_pfn(vma, addr, PFN_DOWN(phys_addr));
|
||||
if (ret != VM_FAULT_NOPAGE) {
|
||||
mutex_unlock(&encl->lock);
|
||||
@@ -481,6 +473,9 @@ static void sgx_mmu_notifier_free(struct mmu_notifier *mn)
|
||||
{
|
||||
struct sgx_encl_mm *encl_mm = container_of(mn, struct sgx_encl_mm, mmu_notifier);
|
||||
|
||||
/* 'encl_mm' is going away, put encl_mm->encl reference: */
|
||||
kref_put(&encl_mm->encl->refcount, sgx_encl_release);
|
||||
|
||||
kfree(encl_mm);
|
||||
}
|
||||
|
||||
@@ -534,6 +529,8 @@ int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm)
|
||||
if (!encl_mm)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Grab a refcount for the encl_mm->encl reference: */
|
||||
kref_get(&encl->refcount);
|
||||
encl_mm->encl = encl;
|
||||
encl_mm->mm = mm;
|
||||
encl_mm->mmu_notifier.ops = &sgx_mmu_notifier_ops;
|
||||
|
||||
@@ -700,25 +700,27 @@ static bool __init sgx_page_cache_init(void)
|
||||
return true;
|
||||
}
|
||||
|
||||
static void __init sgx_init(void)
|
||||
static int __init sgx_init(void)
|
||||
{
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
if (!cpu_feature_enabled(X86_FEATURE_SGX))
|
||||
return;
|
||||
return -ENODEV;
|
||||
|
||||
if (!sgx_page_cache_init())
|
||||
return;
|
||||
return -ENOMEM;
|
||||
|
||||
if (!sgx_page_reclaimer_init())
|
||||
if (!sgx_page_reclaimer_init()) {
|
||||
ret = -ENOMEM;
|
||||
goto err_page_cache;
|
||||
}
|
||||
|
||||
ret = sgx_drv_init();
|
||||
if (ret)
|
||||
goto err_kthread;
|
||||
|
||||
return;
|
||||
return 0;
|
||||
|
||||
err_kthread:
|
||||
kthread_stop(ksgxd_tsk);
|
||||
@@ -728,6 +730,8 @@ err_page_cache:
|
||||
vfree(sgx_epc_sections[i].pages);
|
||||
memunmap(sgx_epc_sections[i].virt_addr);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
device_initcall(sgx_init);
|
||||
|
||||
@@ -25,10 +25,10 @@
|
||||
#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f)
|
||||
#define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff)
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
unsigned int __max_die_per_package __read_mostly = 1;
|
||||
EXPORT_SYMBOL(__max_die_per_package);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* Check if given CPUID extended toplogy "leaf" is implemented
|
||||
*/
|
||||
|
||||
@@ -128,12 +128,21 @@ static __always_inline bool in_exception_stack(unsigned long *stack, struct stac
|
||||
|
||||
static __always_inline bool in_irq_stack(unsigned long *stack, struct stack_info *info)
|
||||
{
|
||||
unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
|
||||
unsigned long *begin = end - (IRQ_STACK_SIZE / sizeof(long));
|
||||
unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
|
||||
unsigned long *begin;
|
||||
|
||||
/*
|
||||
* This is a software stack, so 'end' can be a valid stack pointer.
|
||||
* It just means the stack is empty.
|
||||
* @end points directly to the top most stack entry to avoid a -8
|
||||
* adjustment in the stack switch hotpath. Adjust it back before
|
||||
* calculating @begin.
|
||||
*/
|
||||
end++;
|
||||
begin = end - (IRQ_STACK_SIZE / sizeof(long));
|
||||
|
||||
/*
|
||||
* Due to the switching logic RSP can never be == @end because the
|
||||
* final operation is 'popq %rsp' which means after that RSP points
|
||||
* to the original stack and not to @end.
|
||||
*/
|
||||
if (stack < begin || stack >= end)
|
||||
return false;
|
||||
@@ -143,8 +152,9 @@ static __always_inline bool in_irq_stack(unsigned long *stack, struct stack_info
|
||||
info->end = end;
|
||||
|
||||
/*
|
||||
* The next stack pointer is the first thing pushed by the entry code
|
||||
* after switching to the irq stack.
|
||||
* The next stack pointer is stored at the top of the irq stack
|
||||
* before switching to the irq stack. Actual stack entries are all
|
||||
* below that.
|
||||
*/
|
||||
info->next_sp = (unsigned long *)*(end - 1);
|
||||
|
||||
|
||||
@@ -121,7 +121,7 @@ int copy_fpregs_to_fpstate(struct fpu *fpu)
|
||||
}
|
||||
EXPORT_SYMBOL(copy_fpregs_to_fpstate);
|
||||
|
||||
void kernel_fpu_begin(void)
|
||||
void kernel_fpu_begin_mask(unsigned int kfpu_mask)
|
||||
{
|
||||
preempt_disable();
|
||||
|
||||
@@ -141,13 +141,14 @@ void kernel_fpu_begin(void)
|
||||
}
|
||||
__cpu_invalidate_fpregs_state();
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_XMM))
|
||||
/* Put sane initial values into the control registers. */
|
||||
if (likely(kfpu_mask & KFPU_MXCSR) && boot_cpu_has(X86_FEATURE_XMM))
|
||||
ldmxcsr(MXCSR_DEFAULT);
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_FPU))
|
||||
if (unlikely(kfpu_mask & KFPU_387) && boot_cpu_has(X86_FEATURE_FPU))
|
||||
asm volatile ("fninit");
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kernel_fpu_begin);
|
||||
EXPORT_SYMBOL_GPL(kernel_fpu_begin_mask);
|
||||
|
||||
void kernel_fpu_end(void)
|
||||
{
|
||||
|
||||
@@ -167,14 +167,14 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
|
||||
fx->fop = 0;
|
||||
fx->rip = 0;
|
||||
fx->rdp = 0;
|
||||
memset(&fx->st_space[0], 0, 128);
|
||||
memset(fx->st_space, 0, sizeof(fx->st_space));
|
||||
}
|
||||
|
||||
/*
|
||||
* SSE is in init state
|
||||
*/
|
||||
if (!(xfeatures & XFEATURE_MASK_SSE))
|
||||
memset(&fx->xmm_space[0], 0, 256);
|
||||
memset(fx->xmm_space, 0, sizeof(fx->xmm_space));
|
||||
|
||||
/*
|
||||
* First two features are FPU and SSE, which above we handled
|
||||
|
||||
@@ -184,6 +184,7 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
|
||||
* It is also used to copy the retq for trampolines.
|
||||
*/
|
||||
SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK)
|
||||
UNWIND_HINT_FUNC
|
||||
retq
|
||||
SYM_FUNC_END(ftrace_epilogue)
|
||||
|
||||
@@ -276,7 +277,7 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL)
|
||||
restore_mcount_regs 8
|
||||
/* Restore flags */
|
||||
popfq
|
||||
UNWIND_HINT_RET_OFFSET
|
||||
UNWIND_HINT_FUNC
|
||||
jmp ftrace_epilogue
|
||||
|
||||
SYM_FUNC_END(ftrace_regs_caller)
|
||||
@@ -333,8 +334,7 @@ SYM_FUNC_START(ftrace_graph_caller)
|
||||
retq
|
||||
SYM_FUNC_END(ftrace_graph_caller)
|
||||
|
||||
SYM_CODE_START(return_to_handler)
|
||||
UNWIND_HINT_EMPTY
|
||||
SYM_FUNC_START(return_to_handler)
|
||||
subq $24, %rsp
|
||||
|
||||
/* Save the return values */
|
||||
@@ -349,5 +349,5 @@ SYM_CODE_START(return_to_handler)
|
||||
movq (%rsp), %rax
|
||||
addq $24, %rsp
|
||||
JMP_NOSPEC rdi
|
||||
SYM_CODE_END(return_to_handler)
|
||||
SYM_FUNC_END(return_to_handler)
|
||||
#endif
|
||||
|
||||
@@ -269,6 +269,20 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end)
|
||||
CPU_ENTRY_AREA_TOTAL_SIZE))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* When FSGSBASE is enabled, paranoid_entry() fetches the per-CPU
|
||||
* GSBASE value via __per_cpu_offset or pcpu_unit_offsets.
|
||||
*/
|
||||
#ifdef CONFIG_SMP
|
||||
if (within_area(addr, end, (unsigned long)__per_cpu_offset,
|
||||
sizeof(unsigned long) * nr_cpu_ids))
|
||||
return true;
|
||||
#else
|
||||
if (within_area(addr, end, (unsigned long)&pcpu_unit_offsets,
|
||||
sizeof(pcpu_unit_offsets)))
|
||||
return true;
|
||||
#endif
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
/* The original rw GDT is being used after load_direct_gdt() */
|
||||
if (within_area(addr, end, (unsigned long)get_cpu_gdt_rw(cpu),
|
||||
@@ -293,6 +307,14 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end)
|
||||
(unsigned long)&per_cpu(cpu_tlbstate, cpu),
|
||||
sizeof(struct tlb_state)))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* When in guest (X86_FEATURE_HYPERVISOR), local_db_save()
|
||||
* will read per-cpu cpu_dr7 before clear dr7 register.
|
||||
*/
|
||||
if (within_area(addr, end, (unsigned long)&per_cpu(cpu_dr7, cpu),
|
||||
sizeof(cpu_dr7)))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
@@ -491,15 +513,12 @@ static int hw_breakpoint_handler(struct die_args *args)
|
||||
struct perf_event *bp;
|
||||
unsigned long *dr6_p;
|
||||
unsigned long dr6;
|
||||
bool bpx;
|
||||
|
||||
/* The DR6 value is pointed by args->err */
|
||||
dr6_p = (unsigned long *)ERR_PTR(args->err);
|
||||
dr6 = *dr6_p;
|
||||
|
||||
/* If it's a single step, TRAP bits are random */
|
||||
if (dr6 & DR_STEP)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
/* Do an early return if no trap bits are set in DR6 */
|
||||
if ((dr6 & DR_TRAP_BITS) == 0)
|
||||
return NOTIFY_DONE;
|
||||
@@ -509,28 +528,29 @@ static int hw_breakpoint_handler(struct die_args *args)
|
||||
if (likely(!(dr6 & (DR_TRAP0 << i))))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* The counter may be concurrently released but that can only
|
||||
* occur from a call_rcu() path. We can then safely fetch
|
||||
* the breakpoint, use its callback, touch its counter
|
||||
* while we are in an rcu_read_lock() path.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
|
||||
bp = this_cpu_read(bp_per_reg[i]);
|
||||
if (!bp)
|
||||
continue;
|
||||
|
||||
bpx = bp->hw.info.type == X86_BREAKPOINT_EXECUTE;
|
||||
|
||||
/*
|
||||
* TF and data breakpoints are traps and can be merged, however
|
||||
* instruction breakpoints are faults and will be raised
|
||||
* separately.
|
||||
*
|
||||
* However DR6 can indicate both TF and instruction
|
||||
* breakpoints. In that case take TF as that has precedence and
|
||||
* delay the instruction breakpoint for the next exception.
|
||||
*/
|
||||
if (bpx && (dr6 & DR_STEP))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Reset the 'i'th TRAP bit in dr6 to denote completion of
|
||||
* exception handling
|
||||
*/
|
||||
(*dr6_p) &= ~(DR_TRAP0 << i);
|
||||
/*
|
||||
* bp can be NULL due to lazy debug register switching
|
||||
* or due to concurrent perf counter removing.
|
||||
*/
|
||||
if (!bp) {
|
||||
rcu_read_unlock();
|
||||
break;
|
||||
}
|
||||
|
||||
perf_bp_event(bp, args->regs);
|
||||
|
||||
@@ -538,11 +558,10 @@ static int hw_breakpoint_handler(struct die_args *args)
|
||||
* Set up resume flag to avoid breakpoint recursion when
|
||||
* returning back to origin.
|
||||
*/
|
||||
if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE)
|
||||
if (bpx)
|
||||
args->regs->flags |= X86_EFLAGS_RF;
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/*
|
||||
* Further processing in do_debug() is needed for a) user-space
|
||||
* breakpoints (to generate signals) and b) when the system has
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
#include <asm/hw_irq.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/traps.h>
|
||||
#include <asm/thermal.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <asm/trace/irq_vectors.h>
|
||||
@@ -227,7 +228,7 @@ static __always_inline void handle_irq(struct irq_desc *desc,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_X86_64))
|
||||
run_irq_on_irqstack_cond(desc->handle_irq, desc, regs);
|
||||
generic_handle_irq_desc(desc);
|
||||
else
|
||||
__handle_irq(desc, regs);
|
||||
}
|
||||
@@ -374,3 +375,23 @@ void fixup_irqs(void)
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_THERMAL_VECTOR
|
||||
static void smp_thermal_vector(void)
|
||||
{
|
||||
if (x86_thermal_enabled())
|
||||
intel_thermal_interrupt();
|
||||
else
|
||||
pr_err("CPU%d: Unexpected LVT thermal interrupt!\n",
|
||||
smp_processor_id());
|
||||
}
|
||||
|
||||
DEFINE_IDTENTRY_SYSVEC(sysvec_thermal)
|
||||
{
|
||||
trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
|
||||
inc_irq_stat(irq_thermal_count);
|
||||
smp_thermal_vector();
|
||||
trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
|
||||
ack_APIC_irq();
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
|
||||
#include <asm/apic.h>
|
||||
#include <asm/nospec-branch.h>
|
||||
#include <asm/softirq_stack.h>
|
||||
|
||||
#ifdef CONFIG_DEBUG_STACKOVERFLOW
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
#include <linux/sched/task_stack.h>
|
||||
|
||||
#include <asm/cpu_entry_area.h>
|
||||
#include <asm/softirq_stack.h>
|
||||
#include <asm/irq_stack.h>
|
||||
#include <asm/io_apic.h>
|
||||
#include <asm/apic.h>
|
||||
@@ -48,7 +49,8 @@ static int map_irq_stack(unsigned int cpu)
|
||||
if (!va)
|
||||
return -ENOMEM;
|
||||
|
||||
per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE;
|
||||
/* Store actual TOS to avoid adjustment in the hotpath */
|
||||
per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8;
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
@@ -60,7 +62,8 @@ static int map_irq_stack(unsigned int cpu)
|
||||
{
|
||||
void *va = per_cpu_ptr(&irq_stack_backing_store, cpu);
|
||||
|
||||
per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE;
|
||||
/* Store actual TOS to avoid adjustment in the hotpath */
|
||||
per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
@@ -71,8 +74,3 @@ int irq_init_percpu_irqstack(unsigned int cpu)
|
||||
return 0;
|
||||
return map_irq_stack(cpu);
|
||||
}
|
||||
|
||||
void do_softirq_own_stack(void)
|
||||
{
|
||||
run_on_irqstack_cond(__do_softirq, NULL);
|
||||
}
|
||||
|
||||
@@ -13,14 +13,3 @@ SYM_FUNC_START(native_save_fl)
|
||||
ret
|
||||
SYM_FUNC_END(native_save_fl)
|
||||
EXPORT_SYMBOL(native_save_fl)
|
||||
|
||||
/*
|
||||
* void native_restore_fl(unsigned long flags)
|
||||
* %eax/%rdi: flags
|
||||
*/
|
||||
SYM_FUNC_START(native_restore_fl)
|
||||
push %_ASM_ARG1
|
||||
popf
|
||||
ret
|
||||
SYM_FUNC_END(native_restore_fl)
|
||||
EXPORT_SYMBOL(native_restore_fl)
|
||||
|
||||
@@ -132,26 +132,6 @@ void synthesize_relcall(void *dest, void *from, void *to)
|
||||
}
|
||||
NOKPROBE_SYMBOL(synthesize_relcall);
|
||||
|
||||
/*
|
||||
* Skip the prefixes of the instruction.
|
||||
*/
|
||||
static kprobe_opcode_t *skip_prefixes(kprobe_opcode_t *insn)
|
||||
{
|
||||
insn_attr_t attr;
|
||||
|
||||
attr = inat_get_opcode_attribute((insn_byte_t)*insn);
|
||||
while (inat_is_legacy_prefix(attr)) {
|
||||
insn++;
|
||||
attr = inat_get_opcode_attribute((insn_byte_t)*insn);
|
||||
}
|
||||
#ifdef CONFIG_X86_64
|
||||
if (inat_is_rex_prefix(attr))
|
||||
insn++;
|
||||
#endif
|
||||
return insn;
|
||||
}
|
||||
NOKPROBE_SYMBOL(skip_prefixes);
|
||||
|
||||
/*
|
||||
* Returns non-zero if INSN is boostable.
|
||||
* RIP relative instructions are adjusted at copying time in 64 bits mode
|
||||
@@ -311,25 +291,6 @@ static int can_probe(unsigned long paddr)
|
||||
return (addr == paddr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns non-zero if opcode modifies the interrupt flag.
|
||||
*/
|
||||
static int is_IF_modifier(kprobe_opcode_t *insn)
|
||||
{
|
||||
/* Skip prefixes */
|
||||
insn = skip_prefixes(insn);
|
||||
|
||||
switch (*insn) {
|
||||
case 0xfa: /* cli */
|
||||
case 0xfb: /* sti */
|
||||
case 0xcf: /* iret/iretd */
|
||||
case 0x9d: /* popf/popfd */
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy an instruction with recovering modified instruction by kprobes
|
||||
* and adjust the displacement if the instruction uses the %rip-relative
|
||||
@@ -411,9 +372,9 @@ static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p,
|
||||
synthesize_reljump(buf + len, p->ainsn.insn + len,
|
||||
p->addr + insn->length);
|
||||
len += JMP32_INSN_SIZE;
|
||||
p->ainsn.boostable = true;
|
||||
p->ainsn.boostable = 1;
|
||||
} else {
|
||||
p->ainsn.boostable = false;
|
||||
p->ainsn.boostable = 0;
|
||||
}
|
||||
|
||||
return len;
|
||||
@@ -450,6 +411,67 @@ void free_insn_page(void *page)
|
||||
module_memfree(page);
|
||||
}
|
||||
|
||||
static void set_resume_flags(struct kprobe *p, struct insn *insn)
|
||||
{
|
||||
insn_byte_t opcode = insn->opcode.bytes[0];
|
||||
|
||||
switch (opcode) {
|
||||
case 0xfa: /* cli */
|
||||
case 0xfb: /* sti */
|
||||
case 0x9d: /* popf/popfd */
|
||||
/* Check whether the instruction modifies Interrupt Flag or not */
|
||||
p->ainsn.if_modifier = 1;
|
||||
break;
|
||||
case 0x9c: /* pushfl */
|
||||
p->ainsn.is_pushf = 1;
|
||||
break;
|
||||
case 0xcf: /* iret */
|
||||
p->ainsn.if_modifier = 1;
|
||||
fallthrough;
|
||||
case 0xc2: /* ret/lret */
|
||||
case 0xc3:
|
||||
case 0xca:
|
||||
case 0xcb:
|
||||
case 0xea: /* jmp absolute -- ip is correct */
|
||||
/* ip is already adjusted, no more changes required */
|
||||
p->ainsn.is_abs_ip = 1;
|
||||
/* Without resume jump, this is boostable */
|
||||
p->ainsn.boostable = 1;
|
||||
break;
|
||||
case 0xe8: /* call relative - Fix return addr */
|
||||
p->ainsn.is_call = 1;
|
||||
break;
|
||||
#ifdef CONFIG_X86_32
|
||||
case 0x9a: /* call absolute -- same as call absolute, indirect */
|
||||
p->ainsn.is_call = 1;
|
||||
p->ainsn.is_abs_ip = 1;
|
||||
break;
|
||||
#endif
|
||||
case 0xff:
|
||||
opcode = insn->opcode.bytes[1];
|
||||
if ((opcode & 0x30) == 0x10) {
|
||||
/*
|
||||
* call absolute, indirect
|
||||
* Fix return addr; ip is correct.
|
||||
* But this is not boostable
|
||||
*/
|
||||
p->ainsn.is_call = 1;
|
||||
p->ainsn.is_abs_ip = 1;
|
||||
break;
|
||||
} else if (((opcode & 0x31) == 0x20) ||
|
||||
((opcode & 0x31) == 0x21)) {
|
||||
/*
|
||||
* jmp near and far, absolute indirect
|
||||
* ip is correct.
|
||||
*/
|
||||
p->ainsn.is_abs_ip = 1;
|
||||
/* Without resume jump, this is boostable */
|
||||
p->ainsn.boostable = 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int arch_copy_kprobe(struct kprobe *p)
|
||||
{
|
||||
struct insn insn;
|
||||
@@ -467,8 +489,8 @@ static int arch_copy_kprobe(struct kprobe *p)
|
||||
*/
|
||||
len = prepare_boost(buf, p, &insn);
|
||||
|
||||
/* Check whether the instruction modifies Interrupt Flag or not */
|
||||
p->ainsn.if_modifier = is_IF_modifier(buf);
|
||||
/* Analyze the opcode and set resume flags */
|
||||
set_resume_flags(p, &insn);
|
||||
|
||||
/* Also, displacement change doesn't affect the first byte */
|
||||
p->opcode = buf[0];
|
||||
@@ -491,6 +513,9 @@ int arch_prepare_kprobe(struct kprobe *p)
|
||||
|
||||
if (!can_probe((unsigned long)p->addr))
|
||||
return -EILSEQ;
|
||||
|
||||
memset(&p->ainsn, 0, sizeof(p->ainsn));
|
||||
|
||||
/* insn: must be on special executable page on x86. */
|
||||
p->ainsn.insn = get_insn_slot();
|
||||
if (!p->ainsn.insn)
|
||||
@@ -806,11 +831,6 @@ NOKPROBE_SYMBOL(trampoline_handler);
|
||||
* 2) If the single-stepped instruction was a call, the return address
|
||||
* that is atop the stack is the address following the copied instruction.
|
||||
* We need to make it the address following the original instruction.
|
||||
*
|
||||
* If this is the first time we've single-stepped the instruction at
|
||||
* this probepoint, and the instruction is boostable, boost it: add a
|
||||
* jump instruction after the copied instruction, that jumps to the next
|
||||
* instruction after the probepoint.
|
||||
*/
|
||||
static void resume_execution(struct kprobe *p, struct pt_regs *regs,
|
||||
struct kprobe_ctlblk *kcb)
|
||||
@@ -818,60 +838,20 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs,
|
||||
unsigned long *tos = stack_addr(regs);
|
||||
unsigned long copy_ip = (unsigned long)p->ainsn.insn;
|
||||
unsigned long orig_ip = (unsigned long)p->addr;
|
||||
kprobe_opcode_t *insn = p->ainsn.insn;
|
||||
|
||||
/* Skip prefixes */
|
||||
insn = skip_prefixes(insn);
|
||||
|
||||
regs->flags &= ~X86_EFLAGS_TF;
|
||||
switch (*insn) {
|
||||
case 0x9c: /* pushfl */
|
||||
|
||||
/* Fixup the contents of top of stack */
|
||||
if (p->ainsn.is_pushf) {
|
||||
*tos &= ~(X86_EFLAGS_TF | X86_EFLAGS_IF);
|
||||
*tos |= kcb->kprobe_old_flags;
|
||||
break;
|
||||
case 0xc2: /* iret/ret/lret */
|
||||
case 0xc3:
|
||||
case 0xca:
|
||||
case 0xcb:
|
||||
case 0xcf:
|
||||
case 0xea: /* jmp absolute -- ip is correct */
|
||||
/* ip is already adjusted, no more changes required */
|
||||
p->ainsn.boostable = true;
|
||||
goto no_change;
|
||||
case 0xe8: /* call relative - Fix return addr */
|
||||
} else if (p->ainsn.is_call) {
|
||||
*tos = orig_ip + (*tos - copy_ip);
|
||||
break;
|
||||
#ifdef CONFIG_X86_32
|
||||
case 0x9a: /* call absolute -- same as call absolute, indirect */
|
||||
*tos = orig_ip + (*tos - copy_ip);
|
||||
goto no_change;
|
||||
#endif
|
||||
case 0xff:
|
||||
if ((insn[1] & 0x30) == 0x10) {
|
||||
/*
|
||||
* call absolute, indirect
|
||||
* Fix return addr; ip is correct.
|
||||
* But this is not boostable
|
||||
*/
|
||||
*tos = orig_ip + (*tos - copy_ip);
|
||||
goto no_change;
|
||||
} else if (((insn[1] & 0x31) == 0x20) ||
|
||||
((insn[1] & 0x31) == 0x21)) {
|
||||
/*
|
||||
* jmp near and far, absolute indirect
|
||||
* ip is correct. And this is boostable
|
||||
*/
|
||||
p->ainsn.boostable = true;
|
||||
goto no_change;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
regs->ip += orig_ip - copy_ip;
|
||||
if (!p->ainsn.is_abs_ip)
|
||||
regs->ip += orig_ip - copy_ip;
|
||||
|
||||
no_change:
|
||||
restore_btf();
|
||||
}
|
||||
NOKPROBE_SYMBOL(resume_execution);
|
||||
|
||||
@@ -398,9 +398,15 @@ static void free_ldt_pgtables(struct mm_struct *mm)
|
||||
if (!boot_cpu_has(X86_FEATURE_PTI))
|
||||
return;
|
||||
|
||||
tlb_gather_mmu(&tlb, mm, start, end);
|
||||
/*
|
||||
* Although free_pgd_range() is intended for freeing user
|
||||
* page-tables, it also works out for kernel mappings on x86.
|
||||
* We use tlb_gather_mmu_fullmm() to avoid confusing the
|
||||
* range-tracking logic in __tlb_adjust_range().
|
||||
*/
|
||||
tlb_gather_mmu_fullmm(&tlb, mm);
|
||||
free_pgd_range(&tlb, start, end, start, end);
|
||||
tlb_finish_mmu(&tlb, start, end);
|
||||
tlb_finish_mmu(&tlb);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -114,6 +114,7 @@ int apply_relocate(Elf32_Shdr *sechdrs,
|
||||
*location += sym->st_value;
|
||||
break;
|
||||
case R_386_PC32:
|
||||
case R_386_PLT32:
|
||||
/* Add the value, subtract its position */
|
||||
*location += sym->st_value - (uint32_t)location;
|
||||
break;
|
||||
|
||||
@@ -182,6 +182,13 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg)
|
||||
err = security_locked_down(LOCKDOWN_MSR);
|
||||
if (err)
|
||||
break;
|
||||
|
||||
err = filter_write(regs[1]);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
|
||||
|
||||
err = wrmsr_safe_regs_on_cpu(cpu, regs);
|
||||
if (err)
|
||||
break;
|
||||
|
||||
@@ -135,8 +135,7 @@ unsigned paravirt_patch_default(u8 type, void *insn_buff,
|
||||
else if (opfunc == _paravirt_ident_64)
|
||||
ret = paravirt_patch_ident_64(insn_buff, len);
|
||||
|
||||
else if (type == PARAVIRT_PATCH(cpu.iret) ||
|
||||
type == PARAVIRT_PATCH(cpu.usergs_sysret64))
|
||||
else if (type == PARAVIRT_PATCH(cpu.iret))
|
||||
/* If operation requires a jmp, then jmp */
|
||||
ret = paravirt_patch_jmp(insn_buff, opfunc, addr, len);
|
||||
#endif
|
||||
@@ -170,7 +169,6 @@ static u64 native_steal_clock(int cpu)
|
||||
|
||||
/* These are in entry.S */
|
||||
extern void native_iret(void);
|
||||
extern void native_usergs_sysret64(void);
|
||||
|
||||
static struct resource reserve_ioports = {
|
||||
.start = 0,
|
||||
@@ -310,9 +308,7 @@ struct paravirt_patch_template pv_ops = {
|
||||
|
||||
.cpu.load_sp0 = native_load_sp0,
|
||||
|
||||
.cpu.usergs_sysret64 = native_usergs_sysret64,
|
||||
.cpu.iret = native_iret,
|
||||
.cpu.swapgs = native_swapgs,
|
||||
|
||||
#ifdef CONFIG_X86_IOPL_IOPERM
|
||||
.cpu.invalidate_io_bitmap = native_tss_invalidate_io_bitmap,
|
||||
@@ -324,7 +320,6 @@ struct paravirt_patch_template pv_ops = {
|
||||
|
||||
/* Irq ops. */
|
||||
.irq.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
|
||||
.irq.restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl),
|
||||
.irq.irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable),
|
||||
.irq.irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
|
||||
.irq.safe_halt = native_safe_halt,
|
||||
|
||||
@@ -25,10 +25,7 @@ struct patch_xxl {
|
||||
const unsigned char mmu_read_cr2[3];
|
||||
const unsigned char mmu_read_cr3[3];
|
||||
const unsigned char mmu_write_cr3[3];
|
||||
const unsigned char irq_restore_fl[2];
|
||||
const unsigned char cpu_wbinvd[2];
|
||||
const unsigned char cpu_usergs_sysret64[6];
|
||||
const unsigned char cpu_swapgs[3];
|
||||
const unsigned char mov64[3];
|
||||
};
|
||||
|
||||
@@ -39,11 +36,7 @@ static const struct patch_xxl patch_data_xxl = {
|
||||
.mmu_read_cr2 = { 0x0f, 0x20, 0xd0 }, // mov %cr2, %[re]ax
|
||||
.mmu_read_cr3 = { 0x0f, 0x20, 0xd8 }, // mov %cr3, %[re]ax
|
||||
.mmu_write_cr3 = { 0x0f, 0x22, 0xdf }, // mov %rdi, %cr3
|
||||
.irq_restore_fl = { 0x57, 0x9d }, // push %rdi; popfq
|
||||
.cpu_wbinvd = { 0x0f, 0x09 }, // wbinvd
|
||||
.cpu_usergs_sysret64 = { 0x0f, 0x01, 0xf8,
|
||||
0x48, 0x0f, 0x07 }, // swapgs; sysretq
|
||||
.cpu_swapgs = { 0x0f, 0x01, 0xf8 }, // swapgs
|
||||
.mov64 = { 0x48, 0x89, 0xf8 }, // mov %rdi, %rax
|
||||
};
|
||||
|
||||
@@ -76,7 +69,6 @@ unsigned int native_patch(u8 type, void *insn_buff, unsigned long addr,
|
||||
switch (type) {
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_XXL
|
||||
PATCH_CASE(irq, restore_fl, xxl, insn_buff, len);
|
||||
PATCH_CASE(irq, save_fl, xxl, insn_buff, len);
|
||||
PATCH_CASE(irq, irq_enable, xxl, insn_buff, len);
|
||||
PATCH_CASE(irq, irq_disable, xxl, insn_buff, len);
|
||||
@@ -85,8 +77,6 @@ unsigned int native_patch(u8 type, void *insn_buff, unsigned long addr,
|
||||
PATCH_CASE(mmu, read_cr3, xxl, insn_buff, len);
|
||||
PATCH_CASE(mmu, write_cr3, xxl, insn_buff, len);
|
||||
|
||||
PATCH_CASE(cpu, usergs_sysret64, xxl, insn_buff, len);
|
||||
PATCH_CASE(cpu, swapgs, xxl, insn_buff, len);
|
||||
PATCH_CASE(cpu, wbinvd, xxl, insn_buff, len);
|
||||
#endif
|
||||
|
||||
|
||||
@@ -4,9 +4,6 @@
|
||||
#include <linux/string.h>
|
||||
#include <linux/kallsyms.h>
|
||||
|
||||
|
||||
#define DEBUG 1
|
||||
|
||||
static struct iommu_table_entry * __init
|
||||
find_dependents_of(struct iommu_table_entry *start,
|
||||
struct iommu_table_entry *finish,
|
||||
|
||||
@@ -161,7 +161,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
|
||||
#endif
|
||||
|
||||
/* Kernel thread ? */
|
||||
if (unlikely(p->flags & PF_KTHREAD)) {
|
||||
if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
|
||||
memset(childregs, 0, sizeof(struct pt_regs));
|
||||
kthread_frame_init(frame, sp, arg);
|
||||
return 0;
|
||||
|
||||
@@ -539,7 +539,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
|
||||
this_cpu_read(irq_count) != -1);
|
||||
this_cpu_read(hardirq_stack_inuse));
|
||||
|
||||
if (!test_thread_flag(TIF_NEED_FPU_LOAD))
|
||||
switch_fpu_prepare(prev_fpu, cpu);
|
||||
|
||||
@@ -704,6 +704,9 @@ void ptrace_disable(struct task_struct *child)
|
||||
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
|
||||
static const struct user_regset_view user_x86_32_view; /* Initialized below. */
|
||||
#endif
|
||||
#ifdef CONFIG_X86_64
|
||||
static const struct user_regset_view user_x86_64_view; /* Initialized below. */
|
||||
#endif
|
||||
|
||||
long arch_ptrace(struct task_struct *child, long request,
|
||||
unsigned long addr, unsigned long data)
|
||||
@@ -711,6 +714,14 @@ long arch_ptrace(struct task_struct *child, long request,
|
||||
int ret;
|
||||
unsigned long __user *datap = (unsigned long __user *)data;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* This is native 64-bit ptrace() */
|
||||
const struct user_regset_view *regset_view = &user_x86_64_view;
|
||||
#else
|
||||
/* This is native 32-bit ptrace() */
|
||||
const struct user_regset_view *regset_view = &user_x86_32_view;
|
||||
#endif
|
||||
|
||||
switch (request) {
|
||||
/* read the word at location addr in the USER area. */
|
||||
case PTRACE_PEEKUSR: {
|
||||
@@ -749,28 +760,28 @@ long arch_ptrace(struct task_struct *child, long request,
|
||||
|
||||
case PTRACE_GETREGS: /* Get all gp regs from the child. */
|
||||
return copy_regset_to_user(child,
|
||||
task_user_regset_view(current),
|
||||
regset_view,
|
||||
REGSET_GENERAL,
|
||||
0, sizeof(struct user_regs_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_SETREGS: /* Set all gp regs in the child. */
|
||||
return copy_regset_from_user(child,
|
||||
task_user_regset_view(current),
|
||||
regset_view,
|
||||
REGSET_GENERAL,
|
||||
0, sizeof(struct user_regs_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_GETFPREGS: /* Get the child FPU state. */
|
||||
return copy_regset_to_user(child,
|
||||
task_user_regset_view(current),
|
||||
regset_view,
|
||||
REGSET_FP,
|
||||
0, sizeof(struct user_i387_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_SETFPREGS: /* Set the child FPU state. */
|
||||
return copy_regset_from_user(child,
|
||||
task_user_regset_view(current),
|
||||
regset_view,
|
||||
REGSET_FP,
|
||||
0, sizeof(struct user_i387_struct),
|
||||
datap);
|
||||
@@ -1152,28 +1163,28 @@ static long x32_arch_ptrace(struct task_struct *child,
|
||||
|
||||
case PTRACE_GETREGS: /* Get all gp regs from the child. */
|
||||
return copy_regset_to_user(child,
|
||||
task_user_regset_view(current),
|
||||
&user_x86_64_view,
|
||||
REGSET_GENERAL,
|
||||
0, sizeof(struct user_regs_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_SETREGS: /* Set all gp regs in the child. */
|
||||
return copy_regset_from_user(child,
|
||||
task_user_regset_view(current),
|
||||
&user_x86_64_view,
|
||||
REGSET_GENERAL,
|
||||
0, sizeof(struct user_regs_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_GETFPREGS: /* Get the child FPU state. */
|
||||
return copy_regset_to_user(child,
|
||||
task_user_regset_view(current),
|
||||
&user_x86_64_view,
|
||||
REGSET_FP,
|
||||
0, sizeof(struct user_i387_struct),
|
||||
datap);
|
||||
|
||||
case PTRACE_SETFPREGS: /* Set the child FPU state. */
|
||||
return copy_regset_from_user(child,
|
||||
task_user_regset_view(current),
|
||||
&user_x86_64_view,
|
||||
REGSET_FP,
|
||||
0, sizeof(struct user_i387_struct),
|
||||
datap);
|
||||
@@ -1309,6 +1320,25 @@ void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask)
|
||||
xstate_fx_sw_bytes[USER_XSTATE_XCR0_WORD] = xstate_mask;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is used by the core dump code to decide which regset to dump. The
|
||||
* core dump code writes out the resulting .e_machine and the corresponding
|
||||
* regsets. This is suboptimal if the task is messing around with its CS.L
|
||||
* field, but at worst the core dump will end up missing some information.
|
||||
*
|
||||
* Unfortunately, it is also used by the broken PTRACE_GETREGSET and
|
||||
* PTRACE_SETREGSET APIs. These APIs look at the .regsets field but have
|
||||
* no way to make sure that the e_machine they use matches the caller's
|
||||
* expectations. The result is that the data format returned by
|
||||
* PTRACE_GETREGSET depends on the returned CS field (and even the offset
|
||||
* of the returned CS field depends on its value!) and the data format
|
||||
* accepted by PTRACE_SETREGSET is determined by the old CS value. The
|
||||
* upshot is that it is basically impossible to use these APIs correctly.
|
||||
*
|
||||
* The best way to fix it in the long run would probably be to add new
|
||||
* improved ptrace() APIs to read and write registers reliably, possibly by
|
||||
* allowing userspace to select the ELF e_machine variant that they expect.
|
||||
*/
|
||||
const struct user_regset_view *task_user_regset_view(struct task_struct *task)
|
||||
{
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
|
||||
@@ -477,6 +477,15 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = {
|
||||
},
|
||||
},
|
||||
|
||||
{ /* PCIe Wifi card isn't detected after reboot otherwise */
|
||||
.callback = set_pci_reboot,
|
||||
.ident = "Zotac ZBOX CI327 nano",
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_SYS_VENDOR, "NA"),
|
||||
DMI_MATCH(DMI_PRODUCT_NAME, "ZBOX-CI327NANO-GS-01"),
|
||||
},
|
||||
},
|
||||
|
||||
/* Sony */
|
||||
{ /* Handle problems with rebooting on Sony VGN-Z540N */
|
||||
.callback = set_bios_reboot,
|
||||
@@ -538,31 +547,21 @@ static void emergency_vmx_disable_all(void)
|
||||
local_irq_disable();
|
||||
|
||||
/*
|
||||
* We need to disable VMX on all CPUs before rebooting, otherwise
|
||||
* we risk hanging up the machine, because the CPU ignores INIT
|
||||
* signals when VMX is enabled.
|
||||
* Disable VMX on all CPUs before rebooting, otherwise we risk hanging
|
||||
* the machine, because the CPU blocks INIT when it's in VMX root.
|
||||
*
|
||||
* We can't take any locks and we may be on an inconsistent
|
||||
* state, so we use NMIs as IPIs to tell the other CPUs to disable
|
||||
* VMX and halt.
|
||||
* We can't take any locks and we may be on an inconsistent state, so
|
||||
* use NMIs as IPIs to tell the other CPUs to exit VMX root and halt.
|
||||
*
|
||||
* For safety, we will avoid running the nmi_shootdown_cpus()
|
||||
* stuff unnecessarily, but we don't have a way to check
|
||||
* if other CPUs have VMX enabled. So we will call it only if the
|
||||
* CPU we are running on has VMX enabled.
|
||||
*
|
||||
* We will miss cases where VMX is not enabled on all CPUs. This
|
||||
* shouldn't do much harm because KVM always enable VMX on all
|
||||
* CPUs anyway. But we can miss it on the small window where KVM
|
||||
* is still enabling VMX.
|
||||
* Do the NMI shootdown even if VMX if off on _this_ CPU, as that
|
||||
* doesn't prevent a different CPU from being in VMX root operation.
|
||||
*/
|
||||
if (cpu_has_vmx() && cpu_vmx_enabled()) {
|
||||
/* Disable VMX on this CPU. */
|
||||
cpu_vmxoff();
|
||||
if (cpu_has_vmx()) {
|
||||
/* Safely force _this_ CPU out of VMX root operation. */
|
||||
__cpu_emergency_vmxoff();
|
||||
|
||||
/* Halt and disable VMX on the other CPUs */
|
||||
/* Halt and exit VMX root operation on the other CPUs. */
|
||||
nmi_shootdown_cpus(vmxoff_nmi);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/root_dev.h>
|
||||
#include <linux/sfi.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/tboot.h>
|
||||
#include <linux/usb/xhci-dbgp.h>
|
||||
@@ -1185,7 +1184,6 @@ void __init setup_arch(char **cmdline_p)
|
||||
* Read APIC and some other early information from ACPI tables.
|
||||
*/
|
||||
acpi_boot_init();
|
||||
sfi_init();
|
||||
x86_dtb_init();
|
||||
|
||||
/*
|
||||
|
||||
@@ -305,14 +305,14 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
|
||||
case 0xe4:
|
||||
case 0xe5:
|
||||
*exitinfo |= IOIO_TYPE_IN;
|
||||
*exitinfo |= (u64)insn->immediate.value << 16;
|
||||
*exitinfo |= (u8)insn->immediate.value << 16;
|
||||
break;
|
||||
|
||||
/* OUT immediate opcodes */
|
||||
case 0xe6:
|
||||
case 0xe7:
|
||||
*exitinfo |= IOIO_TYPE_OUT;
|
||||
*exitinfo |= (u64)insn->immediate.value << 16;
|
||||
*exitinfo |= (u8)insn->immediate.value << 16;
|
||||
break;
|
||||
|
||||
/* IN register opcodes */
|
||||
|
||||
@@ -225,7 +225,7 @@ static inline u64 sev_es_rd_ghcb_msr(void)
|
||||
return __rdmsr(MSR_AMD64_SEV_ES_GHCB);
|
||||
}
|
||||
|
||||
static inline void sev_es_wr_ghcb_msr(u64 val)
|
||||
static __always_inline void sev_es_wr_ghcb_msr(u64 val)
|
||||
{
|
||||
u32 low, high;
|
||||
|
||||
@@ -286,6 +286,12 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
|
||||
u16 d2;
|
||||
u8 d1;
|
||||
|
||||
/* If instruction ran in kernel mode and the I/O buffer is in kernel space */
|
||||
if (!user_mode(ctxt->regs) && !access_ok(target, size)) {
|
||||
memcpy(dst, buf, size);
|
||||
return ES_OK;
|
||||
}
|
||||
|
||||
switch (size) {
|
||||
case 1:
|
||||
memcpy(&d1, buf, 1);
|
||||
@@ -335,6 +341,12 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
|
||||
u16 d2;
|
||||
u8 d1;
|
||||
|
||||
/* If instruction ran in kernel mode and the I/O buffer is in kernel space */
|
||||
if (!user_mode(ctxt->regs) && !access_ok(s, size)) {
|
||||
memcpy(buf, src, size);
|
||||
return ES_OK;
|
||||
}
|
||||
|
||||
switch (size) {
|
||||
case 1:
|
||||
if (get_user(d1, s))
|
||||
|
||||
@@ -56,6 +56,7 @@
|
||||
#include <linux/numa.h>
|
||||
#include <linux/pgtable.h>
|
||||
#include <linux/overflow.h>
|
||||
#include <linux/syscore_ops.h>
|
||||
|
||||
#include <asm/acpi.h>
|
||||
#include <asm/desc.h>
|
||||
@@ -1832,6 +1833,7 @@ void arch_set_max_freq_ratio(bool turbo_disabled)
|
||||
arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
|
||||
arch_turbo_freq_ratio;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio);
|
||||
|
||||
static bool turbo_disabled(void)
|
||||
{
|
||||
@@ -2083,6 +2085,23 @@ static void init_counter_refs(void)
|
||||
this_cpu_write(arch_prev_mperf, mperf);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PM_SLEEP
|
||||
static struct syscore_ops freq_invariance_syscore_ops = {
|
||||
.resume = init_counter_refs,
|
||||
};
|
||||
|
||||
static void register_freq_invariance_syscore_ops(void)
|
||||
{
|
||||
/* Bail out if registered already. */
|
||||
if (freq_invariance_syscore_ops.node.prev)
|
||||
return;
|
||||
|
||||
register_syscore_ops(&freq_invariance_syscore_ops);
|
||||
}
|
||||
#else
|
||||
static inline void register_freq_invariance_syscore_ops(void) {}
|
||||
#endif
|
||||
|
||||
static void init_freq_invariance(bool secondary, bool cppc_ready)
|
||||
{
|
||||
bool ret = false;
|
||||
@@ -2109,6 +2128,7 @@ static void init_freq_invariance(bool secondary, bool cppc_ready)
|
||||
if (ret) {
|
||||
init_counter_refs();
|
||||
static_branch_enable(&arch_scale_freq_key);
|
||||
register_freq_invariance_syscore_ops();
|
||||
pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio);
|
||||
} else {
|
||||
pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n");
|
||||
|
||||
@@ -11,14 +11,26 @@ enum insn_type {
|
||||
RET = 3, /* tramp / site cond-tail-call */
|
||||
};
|
||||
|
||||
/*
|
||||
* data16 data16 xorq %rax, %rax - a single 5 byte instruction that clears %rax
|
||||
* The REX.W cancels the effect of any data16.
|
||||
*/
|
||||
static const u8 xor5rax[] = { 0x66, 0x66, 0x48, 0x31, 0xc0 };
|
||||
|
||||
static void __ref __static_call_transform(void *insn, enum insn_type type, void *func)
|
||||
{
|
||||
const void *emulate = NULL;
|
||||
int size = CALL_INSN_SIZE;
|
||||
const void *code;
|
||||
|
||||
switch (type) {
|
||||
case CALL:
|
||||
code = text_gen_insn(CALL_INSN_OPCODE, insn, func);
|
||||
if (func == &__static_call_return0) {
|
||||
emulate = code;
|
||||
code = &xor5rax;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case NOP:
|
||||
@@ -41,7 +53,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void
|
||||
if (unlikely(system_state == SYSTEM_BOOTING))
|
||||
return text_poke_early(insn, code, size);
|
||||
|
||||
text_poke_bp(insn, code, size, NULL);
|
||||
text_poke_bp(insn, code, size, emulate);
|
||||
}
|
||||
|
||||
static void __static_call_validate(void *insn, bool tail)
|
||||
@@ -54,7 +66,8 @@ static void __static_call_validate(void *insn, bool tail)
|
||||
return;
|
||||
} else {
|
||||
if (opcode == CALL_INSN_OPCODE ||
|
||||
!memcmp(insn, ideal_nops[NOP_ATOMIC5], 5))
|
||||
!memcmp(insn, ideal_nops[NOP_ATOMIC5], 5) ||
|
||||
!memcmp(insn, xor5rax, 5))
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -127,12 +127,17 @@ static int enable_single_step(struct task_struct *child)
|
||||
regs->flags |= X86_EFLAGS_TF;
|
||||
|
||||
/*
|
||||
* Always set TIF_SINGLESTEP - this guarantees that
|
||||
* we single-step system calls etc.. This will also
|
||||
* Always set TIF_SINGLESTEP. This will also
|
||||
* cause us to set TF when returning to user mode.
|
||||
*/
|
||||
set_tsk_thread_flag(child, TIF_SINGLESTEP);
|
||||
|
||||
/*
|
||||
* Ensure that a trap is triggered once stepping out of a system
|
||||
* call prior to executing any user instruction.
|
||||
*/
|
||||
set_task_syscall_work(child, SYSCALL_EXIT_TRAP);
|
||||
|
||||
oflags = regs->flags;
|
||||
|
||||
/* Set TF on the kernel stack.. */
|
||||
@@ -230,6 +235,7 @@ void user_disable_single_step(struct task_struct *child)
|
||||
|
||||
/* Always clear TIF_SINGLESTEP... */
|
||||
clear_tsk_thread_flag(child, TIF_SINGLESTEP);
|
||||
clear_task_syscall_work(child, SYSCALL_EXIT_TRAP);
|
||||
|
||||
/* But touch TF only if it was set by us.. */
|
||||
if (test_and_clear_tsk_thread_flag(child, TIF_FORCED_TF))
|
||||
|
||||
@@ -90,14 +90,10 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
|
||||
unsigned long, prot, unsigned long, flags,
|
||||
unsigned long, fd, unsigned long, off)
|
||||
{
|
||||
long error;
|
||||
error = -EINVAL;
|
||||
if (off & ~PAGE_MASK)
|
||||
goto out;
|
||||
return -EINVAL;
|
||||
|
||||
error = ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
|
||||
out:
|
||||
return error;
|
||||
return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
static void find_start_end(unsigned long addr, unsigned long flags,
|
||||
|
||||
@@ -471,7 +471,7 @@ bool unwind_next_frame(struct unwind_state *state)
|
||||
break;
|
||||
|
||||
case ORC_REG_SP_INDIRECT:
|
||||
sp = state->sp + orc->sp_offset;
|
||||
sp = state->sp;
|
||||
indirect = true;
|
||||
break;
|
||||
|
||||
@@ -521,6 +521,9 @@ bool unwind_next_frame(struct unwind_state *state)
|
||||
if (indirect) {
|
||||
if (!deref_stack_reg(state, sp, &sp))
|
||||
goto err;
|
||||
|
||||
if (orc->sp_reg == ORC_REG_SP_INDIRECT)
|
||||
sp += orc->sp_offset;
|
||||
}
|
||||
|
||||
/* Find IP, SP and possibly regs: */
|
||||
|
||||
@@ -134,7 +134,11 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
|
||||
unsafe_put_user(regs->ds, &user->regs.ds, Efault_end);
|
||||
unsafe_put_user(regs->fs, &user->regs.fs, Efault_end);
|
||||
unsafe_put_user(regs->gs, &user->regs.gs, Efault_end);
|
||||
unsafe_put_user(vm86->screen_bitmap, &user->screen_bitmap, Efault_end);
|
||||
|
||||
/*
|
||||
* Don't write screen_bitmap in case some user had a value there
|
||||
* and expected it to remain unchanged.
|
||||
*/
|
||||
|
||||
user_access_end();
|
||||
|
||||
@@ -160,49 +164,6 @@ Efault:
|
||||
do_exit(SIGSEGV);
|
||||
}
|
||||
|
||||
static void mark_screen_rdonly(struct mm_struct *mm)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
spinlock_t *ptl;
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
int i;
|
||||
|
||||
mmap_write_lock(mm);
|
||||
pgd = pgd_offset(mm, 0xA0000);
|
||||
if (pgd_none_or_clear_bad(pgd))
|
||||
goto out;
|
||||
p4d = p4d_offset(pgd, 0xA0000);
|
||||
if (p4d_none_or_clear_bad(p4d))
|
||||
goto out;
|
||||
pud = pud_offset(p4d, 0xA0000);
|
||||
if (pud_none_or_clear_bad(pud))
|
||||
goto out;
|
||||
pmd = pmd_offset(pud, 0xA0000);
|
||||
|
||||
if (pmd_trans_huge(*pmd)) {
|
||||
vma = find_vma(mm, 0xA0000);
|
||||
split_huge_pmd(vma, pmd, 0xA0000);
|
||||
}
|
||||
if (pmd_none_or_clear_bad(pmd))
|
||||
goto out;
|
||||
pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl);
|
||||
for (i = 0; i < 32; i++) {
|
||||
if (pte_present(*pte))
|
||||
set_pte(pte, pte_wrprotect(*pte));
|
||||
pte++;
|
||||
}
|
||||
pte_unmap_unlock(pte, ptl);
|
||||
out:
|
||||
mmap_write_unlock(mm);
|
||||
flush_tlb_mm_range(mm, 0xA0000, 0xA0000 + 32*PAGE_SIZE, PAGE_SHIFT, false);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int do_vm86_irq_handling(int subfunction, int irqnumber);
|
||||
static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus);
|
||||
|
||||
@@ -282,6 +243,15 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
|
||||
offsetof(struct vm86_struct, int_revectored)))
|
||||
return -EFAULT;
|
||||
|
||||
|
||||
/* VM86_SCREEN_BITMAP had numerous bugs and appears to have no users. */
|
||||
if (v.flags & VM86_SCREEN_BITMAP) {
|
||||
char comm[TASK_COMM_LEN];
|
||||
|
||||
pr_info_once("vm86: '%s' uses VM86_SCREEN_BITMAP, which is no longer supported\n", get_task_comm(comm, current));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
memset(&vm86regs, 0, sizeof(vm86regs));
|
||||
|
||||
vm86regs.pt.bx = v.regs.ebx;
|
||||
@@ -302,7 +272,6 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
|
||||
vm86regs.gs = v.regs.gs;
|
||||
|
||||
vm86->flags = v.flags;
|
||||
vm86->screen_bitmap = v.screen_bitmap;
|
||||
vm86->cpu_type = v.cpu_type;
|
||||
|
||||
if (copy_from_user(&vm86->int_revectored,
|
||||
@@ -370,9 +339,6 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
|
||||
update_task_stack(tsk);
|
||||
preempt_enable();
|
||||
|
||||
if (vm86->flags & VM86_SCREEN_BITMAP)
|
||||
mark_screen_rdonly(tsk->mm);
|
||||
|
||||
memcpy((struct kernel_vm86_regs *)regs, &vm86regs, sizeof(vm86regs));
|
||||
return regs->ax;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user