mirror of
https://github.com/torvalds/linux.git
synced 2024-12-15 15:41:58 +00:00
d1ecfa9d1f
This patch fixes crashes during boot for HVM guests on older (pre HVM vector callback) Xen versions. Without this, current kernels will always fail to boot on those Xen versions. Sample stack trace: BUG: unable to handle kernel paging request at ffffffffff200000 IP: __xen_evtchn_do_upcall+0x1e/0x80 PGD 1e0e067 P4D 1e0e067 PUD 1e10067 PMD 235c067 PTE 0 Oops: 0002 [#1] SMP PTI Modules linked in: CPU: 0 PID: 512 Comm: kworker/u2:0 Not tainted 4.14.33-52.13.amzn1.x86_64 #1 Hardware name: Xen HVM domU, BIOS 3.4.3.amazon 11/11/2016 task: ffff88002531d700 task.stack: ffffc90000480000 RIP: 0010:__xen_evtchn_do_upcall+0x1e/0x80 RSP: 0000:ffff880025403ef0 EFLAGS: 00010046 RAX: ffffffff813cc760 RBX: ffffffffff200000 RCX: ffffc90000483ef0 RDX: ffff880020540a00 RSI: ffff880023c78000 RDI: 000000000000001c RBP: 0000000000000001 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: ffff880025403f5c R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff880025400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffff200000 CR3: 0000000001e0a000 CR4: 00000000000006f0 Call Trace: <IRQ> do_hvm_evtchn_intr+0xa/0x10 __handle_irq_event_percpu+0x43/0x1a0 handle_irq_event_percpu+0x20/0x50 handle_irq_event+0x39/0x60 handle_fasteoi_irq+0x80/0x140 handle_irq+0xaf/0x120 do_IRQ+0x41/0xd0 common_interrupt+0x7d/0x7d </IRQ> During boot, the HYPERVISOR_shared_info page gets remapped to make it work with KASLR. This means that any pointer derived from it needs to be adjusted. The only value that this applies to is the vcpu_info pointer for VCPU 0. For PV and HVM with the callback vector feature, this gets done via the smp_ops prepare_boot_cpu callback. Older Xen versions do not support the HVM callback vector, so there is no Xen-specific smp_ops set up in that scenario. So, the vcpu_info pointer for VCPU 0 never gets set to the proper value, and the first reference of it will be bad. Fix this by resetting it immediately after the remap. Signed-off-by: Frank van der Linden <fllinden@amazon.com> Reviewed-by: Eduardo Valentin <eduval@amazon.com> Reviewed-by: Alakesh Haloi <alakeshh@amazon.com> Reviewed-by: Vallish Vaidyeshwara <vallish@amazon.com> Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Juergen Gross <jgross@suse.com> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: xen-devel@lists.xenproject.org Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
271 lines
6.1 KiB
C
271 lines
6.1 KiB
C
#include <linux/acpi.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/kexec.h>
|
|
#include <linux/memblock.h>
|
|
|
|
#include <xen/features.h>
|
|
#include <xen/events.h>
|
|
#include <xen/interface/memory.h>
|
|
|
|
#include <asm/cpu.h>
|
|
#include <asm/smp.h>
|
|
#include <asm/reboot.h>
|
|
#include <asm/setup.h>
|
|
#include <asm/hypervisor.h>
|
|
#include <asm/e820/api.h>
|
|
#include <asm/early_ioremap.h>
|
|
|
|
#include <asm/xen/cpuid.h>
|
|
#include <asm/xen/hypervisor.h>
|
|
#include <asm/xen/page.h>
|
|
|
|
#include "xen-ops.h"
|
|
#include "mmu.h"
|
|
#include "smp.h"
|
|
|
|
static unsigned long shared_info_pfn;
|
|
|
|
void xen_hvm_init_shared_info(void)
|
|
{
|
|
struct xen_add_to_physmap xatp;
|
|
|
|
xatp.domid = DOMID_SELF;
|
|
xatp.idx = 0;
|
|
xatp.space = XENMAPSPACE_shared_info;
|
|
xatp.gpfn = shared_info_pfn;
|
|
if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
|
|
BUG();
|
|
}
|
|
|
|
static void __init reserve_shared_info(void)
|
|
{
|
|
u64 pa;
|
|
|
|
/*
|
|
* Search for a free page starting at 4kB physical address.
|
|
* Low memory is preferred to avoid an EPT large page split up
|
|
* by the mapping.
|
|
* Starting below X86_RESERVE_LOW (usually 64kB) is fine as
|
|
* the BIOS used for HVM guests is well behaved and won't
|
|
* clobber memory other than the first 4kB.
|
|
*/
|
|
for (pa = PAGE_SIZE;
|
|
!e820__mapped_all(pa, pa + PAGE_SIZE, E820_TYPE_RAM) ||
|
|
memblock_is_reserved(pa);
|
|
pa += PAGE_SIZE)
|
|
;
|
|
|
|
shared_info_pfn = PHYS_PFN(pa);
|
|
|
|
memblock_reserve(pa, PAGE_SIZE);
|
|
HYPERVISOR_shared_info = early_memremap(pa, PAGE_SIZE);
|
|
}
|
|
|
|
static void __init xen_hvm_init_mem_mapping(void)
|
|
{
|
|
early_memunmap(HYPERVISOR_shared_info, PAGE_SIZE);
|
|
HYPERVISOR_shared_info = __va(PFN_PHYS(shared_info_pfn));
|
|
|
|
/*
|
|
* The virtual address of the shared_info page has changed, so
|
|
* the vcpu_info pointer for VCPU 0 is now stale.
|
|
*
|
|
* The prepare_boot_cpu callback will re-initialize it via
|
|
* xen_vcpu_setup, but we can't rely on that to be called for
|
|
* old Xen versions (xen_have_vector_callback == 0).
|
|
*
|
|
* It is, in any case, bad to have a stale vcpu_info pointer
|
|
* so reset it now.
|
|
*/
|
|
xen_vcpu_info_reset(0);
|
|
}
|
|
|
|
static void __init init_hvm_pv_info(void)
|
|
{
|
|
int major, minor;
|
|
uint32_t eax, ebx, ecx, edx, base;
|
|
|
|
base = xen_cpuid_base();
|
|
eax = cpuid_eax(base + 1);
|
|
|
|
major = eax >> 16;
|
|
minor = eax & 0xffff;
|
|
printk(KERN_INFO "Xen version %d.%d.\n", major, minor);
|
|
|
|
xen_domain_type = XEN_HVM_DOMAIN;
|
|
|
|
/* PVH set up hypercall page in xen_prepare_pvh(). */
|
|
if (xen_pvh_domain())
|
|
pv_info.name = "Xen PVH";
|
|
else {
|
|
u64 pfn;
|
|
uint32_t msr;
|
|
|
|
pv_info.name = "Xen HVM";
|
|
msr = cpuid_ebx(base + 2);
|
|
pfn = __pa(hypercall_page);
|
|
wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
|
|
}
|
|
|
|
xen_setup_features();
|
|
|
|
cpuid(base + 4, &eax, &ebx, &ecx, &edx);
|
|
if (eax & XEN_HVM_CPUID_VCPU_ID_PRESENT)
|
|
this_cpu_write(xen_vcpu_id, ebx);
|
|
else
|
|
this_cpu_write(xen_vcpu_id, smp_processor_id());
|
|
}
|
|
|
|
#ifdef CONFIG_KEXEC_CORE
|
|
static void xen_hvm_shutdown(void)
|
|
{
|
|
native_machine_shutdown();
|
|
if (kexec_in_progress)
|
|
xen_reboot(SHUTDOWN_soft_reset);
|
|
}
|
|
|
|
static void xen_hvm_crash_shutdown(struct pt_regs *regs)
|
|
{
|
|
native_machine_crash_shutdown(regs);
|
|
xen_reboot(SHUTDOWN_soft_reset);
|
|
}
|
|
#endif
|
|
|
|
static int xen_cpu_up_prepare_hvm(unsigned int cpu)
|
|
{
|
|
int rc = 0;
|
|
|
|
/*
|
|
* This can happen if CPU was offlined earlier and
|
|
* offlining timed out in common_cpu_die().
|
|
*/
|
|
if (cpu_report_state(cpu) == CPU_DEAD_FROZEN) {
|
|
xen_smp_intr_free(cpu);
|
|
xen_uninit_lock_cpu(cpu);
|
|
}
|
|
|
|
if (cpu_acpi_id(cpu) != U32_MAX)
|
|
per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu);
|
|
else
|
|
per_cpu(xen_vcpu_id, cpu) = cpu;
|
|
rc = xen_vcpu_setup(cpu);
|
|
if (rc)
|
|
return rc;
|
|
|
|
if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock))
|
|
xen_setup_timer(cpu);
|
|
|
|
rc = xen_smp_intr_init(cpu);
|
|
if (rc) {
|
|
WARN(1, "xen_smp_intr_init() for CPU %d failed: %d\n",
|
|
cpu, rc);
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
static int xen_cpu_dead_hvm(unsigned int cpu)
|
|
{
|
|
xen_smp_intr_free(cpu);
|
|
|
|
if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock))
|
|
xen_teardown_timer(cpu);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void __init xen_hvm_guest_init(void)
|
|
{
|
|
if (xen_pv_domain())
|
|
return;
|
|
|
|
init_hvm_pv_info();
|
|
|
|
reserve_shared_info();
|
|
xen_hvm_init_shared_info();
|
|
|
|
/*
|
|
* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
|
|
* page, we use it in the event channel upcall and in some pvclock
|
|
* related functions.
|
|
*/
|
|
xen_vcpu_info_reset(0);
|
|
|
|
xen_panic_handler_init();
|
|
|
|
if (xen_feature(XENFEAT_hvm_callback_vector))
|
|
xen_have_vector_callback = 1;
|
|
|
|
xen_hvm_smp_init();
|
|
WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_hvm, xen_cpu_dead_hvm));
|
|
xen_unplug_emulated_devices();
|
|
x86_init.irqs.intr_init = xen_init_IRQ;
|
|
xen_hvm_init_time_ops();
|
|
xen_hvm_init_mmu_ops();
|
|
|
|
#ifdef CONFIG_KEXEC_CORE
|
|
machine_ops.shutdown = xen_hvm_shutdown;
|
|
machine_ops.crash_shutdown = xen_hvm_crash_shutdown;
|
|
#endif
|
|
}
|
|
|
|
static bool xen_nopv;
|
|
static __init int xen_parse_nopv(char *arg)
|
|
{
|
|
xen_nopv = true;
|
|
return 0;
|
|
}
|
|
early_param("xen_nopv", xen_parse_nopv);
|
|
|
|
bool xen_hvm_need_lapic(void)
|
|
{
|
|
if (xen_nopv)
|
|
return false;
|
|
if (xen_pv_domain())
|
|
return false;
|
|
if (!xen_hvm_domain())
|
|
return false;
|
|
if (xen_feature(XENFEAT_hvm_pirqs) && xen_have_vector_callback)
|
|
return false;
|
|
return true;
|
|
}
|
|
EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
|
|
|
|
static uint32_t __init xen_platform_hvm(void)
|
|
{
|
|
if (xen_pv_domain() || xen_nopv)
|
|
return 0;
|
|
|
|
return xen_cpuid_base();
|
|
}
|
|
|
|
static __init void xen_hvm_guest_late_init(void)
|
|
{
|
|
#ifdef CONFIG_XEN_PVH
|
|
/* Test for PVH domain (PVH boot path taken overrides ACPI flags). */
|
|
if (!xen_pvh &&
|
|
(x86_platform.legacy.rtc || !x86_platform.legacy.no_vga))
|
|
return;
|
|
|
|
/* PVH detected. */
|
|
xen_pvh = true;
|
|
|
|
/* Make sure we don't fall back to (default) ACPI_IRQ_MODEL_PIC. */
|
|
if (!nr_ioapics && acpi_irq_model == ACPI_IRQ_MODEL_PIC)
|
|
acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
|
|
|
|
machine_ops.emergency_restart = xen_emergency_restart;
|
|
pv_info.name = "Xen PVH";
|
|
#endif
|
|
}
|
|
|
|
const __initconst struct hypervisor_x86 x86_hyper_xen_hvm = {
|
|
.name = "Xen HVM",
|
|
.detect = xen_platform_hvm,
|
|
.type = X86_HYPER_XEN_HVM,
|
|
.init.init_platform = xen_hvm_guest_init,
|
|
.init.x2apic_available = xen_x2apic_para_available,
|
|
.init.init_mem_mapping = xen_hvm_init_mem_mapping,
|
|
.init.guest_late_init = xen_hvm_guest_late_init,
|
|
.runtime.pin_vcpu = xen_pin_vcpu,
|
|
};
|