6f84f8d158
Juergen Gross noticed that commit f7f99100d8
("mm: stop zeroing memory
during allocation in vmemmap") broke XEN PV domains when deferred struct
page initialization is enabled.
This is because the xen's PagePinned() flag is getting erased from
struct pages when they are initialized later in boot.
Juergen fixed this problem by disabling deferred pages on xen pv
domains. It is desirable, however, to have this feature available as it
reduces boot time. This fix re-enables the feature for pv-dmains, and
fixes the problem the following way:
The fix is to delay setting PagePinned flag until struct pages for all
allocated memory are initialized, i.e. until after free_all_bootmem().
A new x86_init.hyper op init_after_bootmem() is called to let xen know
that boot allocator is done, and hence struct pages for all the
allocated memory are now initialized. If deferred page initialization
is enabled, the rest of struct pages are going to be initialized later
in boot once page_alloc_init_late() is called.
xen_after_bootmem() walks page table's pages and marks them pinned.
Link: http://lkml.kernel.org/r/20180226160112.24724-2-pasha.tatashin@oracle.com
Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Juergen Gross <jgross@suse.com>
Tested-by: Juergen Gross <jgross@suse.com>
Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
Cc: Pavel Tatashin <pasha.tatashin@oracle.com>
Cc: Alok Kataria <akataria@vmware.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Mathias Krause <minipli@googlemail.com>
Cc: Jinbum Park <jinb.park7@gmail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Jia Zhang <zhang.jia@linux.alibaba.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
161 lines
4.0 KiB
C
161 lines
4.0 KiB
C
/*
|
|
* Copyright (C) 2009 Thomas Gleixner <tglx@linutronix.de>
|
|
*
|
|
* For licencing details see kernel-base/COPYING
|
|
*/
|
|
#include <linux/init.h>
|
|
#include <linux/ioport.h>
|
|
#include <linux/export.h>
|
|
#include <linux/pci.h>
|
|
|
|
#include <asm/acpi.h>
|
|
#include <asm/bios_ebda.h>
|
|
#include <asm/paravirt.h>
|
|
#include <asm/pci_x86.h>
|
|
#include <asm/mpspec.h>
|
|
#include <asm/setup.h>
|
|
#include <asm/apic.h>
|
|
#include <asm/e820/api.h>
|
|
#include <asm/time.h>
|
|
#include <asm/irq.h>
|
|
#include <asm/io_apic.h>
|
|
#include <asm/hpet.h>
|
|
#include <asm/pat.h>
|
|
#include <asm/tsc.h>
|
|
#include <asm/iommu.h>
|
|
#include <asm/mach_traps.h>
|
|
|
|
void x86_init_noop(void) { }
|
|
void __init x86_init_uint_noop(unsigned int unused) { }
|
|
static int __init iommu_init_noop(void) { return 0; }
|
|
static void iommu_shutdown_noop(void) { }
|
|
static bool __init bool_x86_init_noop(void) { return false; }
|
|
static void x86_op_int_noop(int cpu) { }
|
|
static u64 u64_x86_init_noop(void) { return 0; }
|
|
|
|
/*
|
|
* The platform setup functions are preset with the default functions
|
|
* for standard PC hardware.
|
|
*/
|
|
struct x86_init_ops x86_init __initdata = {
|
|
|
|
.resources = {
|
|
.probe_roms = probe_roms,
|
|
.reserve_resources = reserve_standard_io_resources,
|
|
.memory_setup = e820__memory_setup_default,
|
|
},
|
|
|
|
.mpparse = {
|
|
.mpc_record = x86_init_uint_noop,
|
|
.setup_ioapic_ids = x86_init_noop,
|
|
.mpc_apic_id = default_mpc_apic_id,
|
|
.smp_read_mpc_oem = default_smp_read_mpc_oem,
|
|
.mpc_oem_bus_info = default_mpc_oem_bus_info,
|
|
.find_smp_config = default_find_smp_config,
|
|
.get_smp_config = default_get_smp_config,
|
|
},
|
|
|
|
.irqs = {
|
|
.pre_vector_init = init_ISA_irqs,
|
|
.intr_init = native_init_IRQ,
|
|
.trap_init = x86_init_noop,
|
|
.intr_mode_init = apic_intr_mode_init
|
|
},
|
|
|
|
.oem = {
|
|
.arch_setup = x86_init_noop,
|
|
.banner = default_banner,
|
|
},
|
|
|
|
.paging = {
|
|
.pagetable_init = native_pagetable_init,
|
|
},
|
|
|
|
.timers = {
|
|
.setup_percpu_clockev = setup_boot_APIC_clock,
|
|
.timer_init = hpet_time_init,
|
|
.wallclock_init = x86_init_noop,
|
|
},
|
|
|
|
.iommu = {
|
|
.iommu_init = iommu_init_noop,
|
|
},
|
|
|
|
.pci = {
|
|
.init = x86_default_pci_init,
|
|
.init_irq = x86_default_pci_init_irq,
|
|
.fixup_irqs = x86_default_pci_fixup_irqs,
|
|
},
|
|
|
|
.hyper = {
|
|
.init_platform = x86_init_noop,
|
|
.guest_late_init = x86_init_noop,
|
|
.x2apic_available = bool_x86_init_noop,
|
|
.init_mem_mapping = x86_init_noop,
|
|
.init_after_bootmem = x86_init_noop,
|
|
},
|
|
|
|
.acpi = {
|
|
.get_root_pointer = u64_x86_init_noop,
|
|
.reduced_hw_early_init = acpi_generic_reduced_hw_init,
|
|
},
|
|
};
|
|
|
|
struct x86_cpuinit_ops x86_cpuinit = {
|
|
.early_percpu_clock_init = x86_init_noop,
|
|
.setup_percpu_clockev = setup_secondary_APIC_clock,
|
|
};
|
|
|
|
static void default_nmi_init(void) { };
|
|
|
|
struct x86_platform_ops x86_platform __ro_after_init = {
|
|
.calibrate_cpu = native_calibrate_cpu,
|
|
.calibrate_tsc = native_calibrate_tsc,
|
|
.get_wallclock = mach_get_cmos_time,
|
|
.set_wallclock = mach_set_rtc_mmss,
|
|
.iommu_shutdown = iommu_shutdown_noop,
|
|
.is_untracked_pat_range = is_ISA_range,
|
|
.nmi_init = default_nmi_init,
|
|
.get_nmi_reason = default_get_nmi_reason,
|
|
.save_sched_clock_state = tsc_save_sched_clock_state,
|
|
.restore_sched_clock_state = tsc_restore_sched_clock_state,
|
|
.hyper.pin_vcpu = x86_op_int_noop,
|
|
};
|
|
|
|
EXPORT_SYMBOL_GPL(x86_platform);
|
|
|
|
#if defined(CONFIG_PCI_MSI)
|
|
struct x86_msi_ops x86_msi __ro_after_init = {
|
|
.setup_msi_irqs = native_setup_msi_irqs,
|
|
.teardown_msi_irq = native_teardown_msi_irq,
|
|
.teardown_msi_irqs = default_teardown_msi_irqs,
|
|
.restore_msi_irqs = default_restore_msi_irqs,
|
|
};
|
|
|
|
/* MSI arch specific hooks */
|
|
int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
|
|
{
|
|
return x86_msi.setup_msi_irqs(dev, nvec, type);
|
|
}
|
|
|
|
void arch_teardown_msi_irqs(struct pci_dev *dev)
|
|
{
|
|
x86_msi.teardown_msi_irqs(dev);
|
|
}
|
|
|
|
void arch_teardown_msi_irq(unsigned int irq)
|
|
{
|
|
x86_msi.teardown_msi_irq(irq);
|
|
}
|
|
|
|
void arch_restore_msi_irqs(struct pci_dev *dev)
|
|
{
|
|
x86_msi.restore_msi_irqs(dev);
|
|
}
|
|
#endif
|
|
|
|
struct x86_apic_ops x86_apic_ops __ro_after_init = {
|
|
.io_apic_read = native_io_apic_read,
|
|
.restore = native_restore_boot_irq_mode,
|
|
};
|