mirror of
https://github.com/torvalds/linux.git
synced 2024-11-29 07:31:29 +00:00
b2371587fe
This patch implements arch_xen_unpopulated_init() on Arm where the extended regions (if any) are gathered from DT and inserted into specific Xen resource to be used as unused address space for Xen scratch pages by unpopulated-alloc code. The extended region (safe range) is a region of guest physical address space which is unused and could be safely used to create grant/foreign mappings instead of wasting real RAM pages from the domain memory for establishing these mappings. The extended regions are chosen by the hypervisor at the domain creation time and advertised to it via "reg" property under hypervisor node in the guest device-tree. As region 0 is reserved for grant table space (always present), the indexes for extended regions are 1...N. If arch_xen_unpopulated_init() fails for some reason the default behaviour will be restored (allocate xenballooned pages). This patch also removes XEN_UNPOPULATED_ALLOC dependency on x86. Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com> Reviewed-by: Stefano Stabellini <sstabellini@kernel.org> Link: https://lore.kernel.org/r/1639080336-26573-6-git-send-email-olekstysh@gmail.com Signed-off-by: Juergen Gross <jgross@suse.com>
570 lines
14 KiB
C
570 lines
14 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
#include <xen/xen.h>
|
|
#include <xen/events.h>
|
|
#include <xen/grant_table.h>
|
|
#include <xen/hvm.h>
|
|
#include <xen/interface/vcpu.h>
|
|
#include <xen/interface/xen.h>
|
|
#include <xen/interface/memory.h>
|
|
#include <xen/interface/hvm/params.h>
|
|
#include <xen/features.h>
|
|
#include <xen/platform_pci.h>
|
|
#include <xen/xenbus.h>
|
|
#include <xen/page.h>
|
|
#include <xen/interface/sched.h>
|
|
#include <xen/xen-ops.h>
|
|
#include <asm/xen/hypervisor.h>
|
|
#include <asm/xen/hypercall.h>
|
|
#include <asm/system_misc.h>
|
|
#include <asm/efi.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/irqreturn.h>
|
|
#include <linux/module.h>
|
|
#include <linux/of.h>
|
|
#include <linux/of_fdt.h>
|
|
#include <linux/of_irq.h>
|
|
#include <linux/of_address.h>
|
|
#include <linux/cpuidle.h>
|
|
#include <linux/cpufreq.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/console.h>
|
|
#include <linux/pvclock_gtod.h>
|
|
#include <linux/reboot.h>
|
|
#include <linux/time64.h>
|
|
#include <linux/timekeeping.h>
|
|
#include <linux/timekeeper_internal.h>
|
|
#include <linux/acpi.h>
|
|
|
|
#include <linux/mm.h>
|
|
|
|
static struct start_info _xen_start_info;
|
|
struct start_info *xen_start_info = &_xen_start_info;
|
|
EXPORT_SYMBOL(xen_start_info);
|
|
|
|
enum xen_domain_type xen_domain_type = XEN_NATIVE;
|
|
EXPORT_SYMBOL(xen_domain_type);
|
|
|
|
struct shared_info xen_dummy_shared_info;
|
|
struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
|
|
|
|
DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
|
|
static struct vcpu_info __percpu *xen_vcpu_info;
|
|
|
|
/* Linux <-> Xen vCPU id mapping */
|
|
DEFINE_PER_CPU(uint32_t, xen_vcpu_id);
|
|
EXPORT_PER_CPU_SYMBOL(xen_vcpu_id);
|
|
|
|
/* These are unused until we support booting "pre-ballooned" */
|
|
unsigned long xen_released_pages;
|
|
struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
|
|
|
|
static __read_mostly unsigned int xen_events_irq;
|
|
static __read_mostly phys_addr_t xen_grant_frames;
|
|
|
|
#define GRANT_TABLE_INDEX 0
|
|
#define EXT_REGION_INDEX 1
|
|
|
|
uint32_t xen_start_flags;
|
|
EXPORT_SYMBOL(xen_start_flags);
|
|
|
|
int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
|
|
int nr, struct page **pages)
|
|
{
|
|
return xen_xlate_unmap_gfn_range(vma, nr, pages);
|
|
}
|
|
EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range);
|
|
|
|
static void xen_read_wallclock(struct timespec64 *ts)
|
|
{
|
|
u32 version;
|
|
struct timespec64 now, ts_monotonic;
|
|
struct shared_info *s = HYPERVISOR_shared_info;
|
|
struct pvclock_wall_clock *wall_clock = &(s->wc);
|
|
|
|
/* get wallclock at system boot */
|
|
do {
|
|
version = wall_clock->version;
|
|
rmb(); /* fetch version before time */
|
|
now.tv_sec = ((uint64_t)wall_clock->sec_hi << 32) | wall_clock->sec;
|
|
now.tv_nsec = wall_clock->nsec;
|
|
rmb(); /* fetch time before checking version */
|
|
} while ((wall_clock->version & 1) || (version != wall_clock->version));
|
|
|
|
/* time since system boot */
|
|
ktime_get_ts64(&ts_monotonic);
|
|
*ts = timespec64_add(now, ts_monotonic);
|
|
}
|
|
|
|
static int xen_pvclock_gtod_notify(struct notifier_block *nb,
|
|
unsigned long was_set, void *priv)
|
|
{
|
|
/* Protected by the calling core code serialization */
|
|
static struct timespec64 next_sync;
|
|
|
|
struct xen_platform_op op;
|
|
struct timespec64 now, system_time;
|
|
struct timekeeper *tk = priv;
|
|
|
|
now.tv_sec = tk->xtime_sec;
|
|
now.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
|
|
system_time = timespec64_add(now, tk->wall_to_monotonic);
|
|
|
|
/*
|
|
* We only take the expensive HV call when the clock was set
|
|
* or when the 11 minutes RTC synchronization time elapsed.
|
|
*/
|
|
if (!was_set && timespec64_compare(&now, &next_sync) < 0)
|
|
return NOTIFY_OK;
|
|
|
|
op.cmd = XENPF_settime64;
|
|
op.u.settime64.mbz = 0;
|
|
op.u.settime64.secs = now.tv_sec;
|
|
op.u.settime64.nsecs = now.tv_nsec;
|
|
op.u.settime64.system_time = timespec64_to_ns(&system_time);
|
|
(void)HYPERVISOR_platform_op(&op);
|
|
|
|
/*
|
|
* Move the next drift compensation time 11 minutes
|
|
* ahead. That's emulating the sync_cmos_clock() update for
|
|
* the hardware RTC.
|
|
*/
|
|
next_sync = now;
|
|
next_sync.tv_sec += 11 * 60;
|
|
|
|
return NOTIFY_OK;
|
|
}
|
|
|
|
static struct notifier_block xen_pvclock_gtod_notifier = {
|
|
.notifier_call = xen_pvclock_gtod_notify,
|
|
};
|
|
|
|
static int xen_starting_cpu(unsigned int cpu)
|
|
{
|
|
struct vcpu_register_vcpu_info info;
|
|
struct vcpu_info *vcpup;
|
|
int err;
|
|
|
|
/*
|
|
* VCPUOP_register_vcpu_info cannot be called twice for the same
|
|
* vcpu, so if vcpu_info is already registered, just get out. This
|
|
* can happen with cpu-hotplug.
|
|
*/
|
|
if (per_cpu(xen_vcpu, cpu) != NULL)
|
|
goto after_register_vcpu_info;
|
|
|
|
pr_info("Xen: initializing cpu%d\n", cpu);
|
|
vcpup = per_cpu_ptr(xen_vcpu_info, cpu);
|
|
|
|
info.mfn = percpu_to_gfn(vcpup);
|
|
info.offset = xen_offset_in_page(vcpup);
|
|
|
|
err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu),
|
|
&info);
|
|
BUG_ON(err);
|
|
per_cpu(xen_vcpu, cpu) = vcpup;
|
|
|
|
if (!xen_kernel_unmapped_at_usr())
|
|
xen_setup_runstate_info(cpu);
|
|
|
|
after_register_vcpu_info:
|
|
enable_percpu_irq(xen_events_irq, 0);
|
|
return 0;
|
|
}
|
|
|
|
static int xen_dying_cpu(unsigned int cpu)
|
|
{
|
|
disable_percpu_irq(xen_events_irq);
|
|
return 0;
|
|
}
|
|
|
|
void xen_reboot(int reason)
|
|
{
|
|
struct sched_shutdown r = { .reason = reason };
|
|
int rc;
|
|
|
|
rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &r);
|
|
BUG_ON(rc);
|
|
}
|
|
|
|
static int xen_restart(struct notifier_block *nb, unsigned long action,
|
|
void *data)
|
|
{
|
|
xen_reboot(SHUTDOWN_reboot);
|
|
|
|
return NOTIFY_DONE;
|
|
}
|
|
|
|
static struct notifier_block xen_restart_nb = {
|
|
.notifier_call = xen_restart,
|
|
.priority = 192,
|
|
};
|
|
|
|
static void xen_power_off(void)
|
|
{
|
|
xen_reboot(SHUTDOWN_poweroff);
|
|
}
|
|
|
|
static irqreturn_t xen_arm_callback(int irq, void *arg)
|
|
{
|
|
xen_hvm_evtchn_do_upcall();
|
|
return IRQ_HANDLED;
|
|
}
|
|
|
|
static __initdata struct {
|
|
const char *compat;
|
|
const char *prefix;
|
|
const char *version;
|
|
bool found;
|
|
} hyper_node = {"xen,xen", "xen,xen-", NULL, false};
|
|
|
|
static int __init fdt_find_hyper_node(unsigned long node, const char *uname,
|
|
int depth, void *data)
|
|
{
|
|
const void *s = NULL;
|
|
int len;
|
|
|
|
if (depth != 1 || strcmp(uname, "hypervisor") != 0)
|
|
return 0;
|
|
|
|
if (of_flat_dt_is_compatible(node, hyper_node.compat))
|
|
hyper_node.found = true;
|
|
|
|
s = of_get_flat_dt_prop(node, "compatible", &len);
|
|
if (strlen(hyper_node.prefix) + 3 < len &&
|
|
!strncmp(hyper_node.prefix, s, strlen(hyper_node.prefix)))
|
|
hyper_node.version = s + strlen(hyper_node.prefix);
|
|
|
|
/*
|
|
* Check if Xen supports EFI by checking whether there is the
|
|
* "/hypervisor/uefi" node in DT. If so, runtime services are available
|
|
* through proxy functions (e.g. in case of Xen dom0 EFI implementation
|
|
* they call special hypercall which executes relevant EFI functions)
|
|
* and that is why they are always enabled.
|
|
*/
|
|
if (IS_ENABLED(CONFIG_XEN_EFI)) {
|
|
if ((of_get_flat_dt_subnode_by_name(node, "uefi") > 0) &&
|
|
!efi_runtime_disabled())
|
|
set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* see Documentation/devicetree/bindings/arm/xen.txt for the
|
|
* documentation of the Xen Device Tree format.
|
|
*/
|
|
void __init xen_early_init(void)
|
|
{
|
|
of_scan_flat_dt(fdt_find_hyper_node, NULL);
|
|
if (!hyper_node.found) {
|
|
pr_debug("No Xen support\n");
|
|
return;
|
|
}
|
|
|
|
if (hyper_node.version == NULL) {
|
|
pr_debug("Xen version not found\n");
|
|
return;
|
|
}
|
|
|
|
pr_info("Xen %s support found\n", hyper_node.version);
|
|
|
|
xen_domain_type = XEN_HVM_DOMAIN;
|
|
|
|
xen_setup_features();
|
|
|
|
if (xen_feature(XENFEAT_dom0))
|
|
xen_start_flags |= SIF_INITDOMAIN|SIF_PRIVILEGED;
|
|
|
|
if (!console_set_on_cmdline && !xen_initial_domain())
|
|
add_preferred_console("hvc", 0, NULL);
|
|
}
|
|
|
|
static void __init xen_acpi_guest_init(void)
|
|
{
|
|
#ifdef CONFIG_ACPI
|
|
struct xen_hvm_param a;
|
|
int interrupt, trigger, polarity;
|
|
|
|
a.domid = DOMID_SELF;
|
|
a.index = HVM_PARAM_CALLBACK_IRQ;
|
|
|
|
if (HYPERVISOR_hvm_op(HVMOP_get_param, &a)
|
|
|| (a.value >> 56) != HVM_PARAM_CALLBACK_TYPE_PPI) {
|
|
xen_events_irq = 0;
|
|
return;
|
|
}
|
|
|
|
interrupt = a.value & 0xff;
|
|
trigger = ((a.value >> 8) & 0x1) ? ACPI_EDGE_SENSITIVE
|
|
: ACPI_LEVEL_SENSITIVE;
|
|
polarity = ((a.value >> 8) & 0x2) ? ACPI_ACTIVE_LOW
|
|
: ACPI_ACTIVE_HIGH;
|
|
xen_events_irq = acpi_register_gsi(NULL, interrupt, trigger, polarity);
|
|
#endif
|
|
}
|
|
|
|
#ifdef CONFIG_XEN_UNPOPULATED_ALLOC
|
|
/*
|
|
* A type-less specific Xen resource which contains extended regions
|
|
* (unused regions of guest physical address space provided by the hypervisor).
|
|
*/
|
|
static struct resource xen_resource = {
|
|
.name = "Xen unused space",
|
|
};
|
|
|
|
int __init arch_xen_unpopulated_init(struct resource **res)
|
|
{
|
|
struct device_node *np;
|
|
struct resource *regs, *tmp_res;
|
|
uint64_t min_gpaddr = -1, max_gpaddr = 0;
|
|
unsigned int i, nr_reg = 0;
|
|
int rc;
|
|
|
|
if (!xen_domain())
|
|
return -ENODEV;
|
|
|
|
if (!acpi_disabled)
|
|
return -ENODEV;
|
|
|
|
np = of_find_compatible_node(NULL, NULL, "xen,xen");
|
|
if (WARN_ON(!np))
|
|
return -ENODEV;
|
|
|
|
/* Skip region 0 which is reserved for grant table space */
|
|
while (of_get_address(np, nr_reg + EXT_REGION_INDEX, NULL, NULL))
|
|
nr_reg++;
|
|
|
|
if (!nr_reg) {
|
|
pr_err("No extended regions are found\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
regs = kcalloc(nr_reg, sizeof(*regs), GFP_KERNEL);
|
|
if (!regs)
|
|
return -ENOMEM;
|
|
|
|
/*
|
|
* Create resource from extended regions provided by the hypervisor to be
|
|
* used as unused address space for Xen scratch pages.
|
|
*/
|
|
for (i = 0; i < nr_reg; i++) {
|
|
rc = of_address_to_resource(np, i + EXT_REGION_INDEX, ®s[i]);
|
|
if (rc)
|
|
goto err;
|
|
|
|
if (max_gpaddr < regs[i].end)
|
|
max_gpaddr = regs[i].end;
|
|
if (min_gpaddr > regs[i].start)
|
|
min_gpaddr = regs[i].start;
|
|
}
|
|
|
|
xen_resource.start = min_gpaddr;
|
|
xen_resource.end = max_gpaddr;
|
|
|
|
/*
|
|
* Mark holes between extended regions as unavailable. The rest of that
|
|
* address space will be available for the allocation.
|
|
*/
|
|
for (i = 1; i < nr_reg; i++) {
|
|
resource_size_t start, end;
|
|
|
|
/* There is an overlap between regions */
|
|
if (regs[i - 1].end + 1 > regs[i].start) {
|
|
rc = -EINVAL;
|
|
goto err;
|
|
}
|
|
|
|
/* There is no hole between regions */
|
|
if (regs[i - 1].end + 1 == regs[i].start)
|
|
continue;
|
|
|
|
start = regs[i - 1].end + 1;
|
|
end = regs[i].start - 1;
|
|
|
|
tmp_res = kzalloc(sizeof(*tmp_res), GFP_KERNEL);
|
|
if (!tmp_res) {
|
|
rc = -ENOMEM;
|
|
goto err;
|
|
}
|
|
|
|
tmp_res->name = "Unavailable space";
|
|
tmp_res->start = start;
|
|
tmp_res->end = end;
|
|
|
|
rc = insert_resource(&xen_resource, tmp_res);
|
|
if (rc) {
|
|
pr_err("Cannot insert resource %pR (%d)\n", tmp_res, rc);
|
|
kfree(tmp_res);
|
|
goto err;
|
|
}
|
|
}
|
|
|
|
*res = &xen_resource;
|
|
|
|
err:
|
|
kfree(regs);
|
|
|
|
return rc;
|
|
}
|
|
#endif
|
|
|
|
static void __init xen_dt_guest_init(void)
|
|
{
|
|
struct device_node *xen_node;
|
|
struct resource res;
|
|
|
|
xen_node = of_find_compatible_node(NULL, NULL, "xen,xen");
|
|
if (!xen_node) {
|
|
pr_err("Xen support was detected before, but it has disappeared\n");
|
|
return;
|
|
}
|
|
|
|
xen_events_irq = irq_of_parse_and_map(xen_node, 0);
|
|
|
|
if (of_address_to_resource(xen_node, GRANT_TABLE_INDEX, &res)) {
|
|
pr_err("Xen grant table region is not found\n");
|
|
return;
|
|
}
|
|
xen_grant_frames = res.start;
|
|
}
|
|
|
|
static int __init xen_guest_init(void)
|
|
{
|
|
struct xen_add_to_physmap xatp;
|
|
struct shared_info *shared_info_page = NULL;
|
|
int rc, cpu;
|
|
|
|
if (!xen_domain())
|
|
return 0;
|
|
|
|
if (!acpi_disabled)
|
|
xen_acpi_guest_init();
|
|
else
|
|
xen_dt_guest_init();
|
|
|
|
if (!xen_events_irq) {
|
|
pr_err("Xen event channel interrupt not found\n");
|
|
return -ENODEV;
|
|
}
|
|
|
|
/*
|
|
* The fdt parsing codes have set EFI_RUNTIME_SERVICES if Xen EFI
|
|
* parameters are found. Force enable runtime services.
|
|
*/
|
|
if (efi_enabled(EFI_RUNTIME_SERVICES))
|
|
xen_efi_runtime_setup();
|
|
|
|
shared_info_page = (struct shared_info *)get_zeroed_page(GFP_KERNEL);
|
|
|
|
if (!shared_info_page) {
|
|
pr_err("not enough memory\n");
|
|
return -ENOMEM;
|
|
}
|
|
xatp.domid = DOMID_SELF;
|
|
xatp.idx = 0;
|
|
xatp.space = XENMAPSPACE_shared_info;
|
|
xatp.gpfn = virt_to_gfn(shared_info_page);
|
|
if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
|
|
BUG();
|
|
|
|
HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
|
|
|
|
/* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
|
|
* page, we use it in the event channel upcall and in some pvclock
|
|
* related functions.
|
|
* The shared info contains exactly 1 CPU (the boot CPU). The guest
|
|
* is required to use VCPUOP_register_vcpu_info to place vcpu info
|
|
* for secondary CPUs as they are brought up.
|
|
* For uniformity we use VCPUOP_register_vcpu_info even on cpu0.
|
|
*/
|
|
xen_vcpu_info = alloc_percpu(struct vcpu_info);
|
|
if (xen_vcpu_info == NULL)
|
|
return -ENOMEM;
|
|
|
|
/* Direct vCPU id mapping for ARM guests. */
|
|
for_each_possible_cpu(cpu)
|
|
per_cpu(xen_vcpu_id, cpu) = cpu;
|
|
|
|
if (!xen_grant_frames) {
|
|
xen_auto_xlat_grant_frames.count = gnttab_max_grant_frames();
|
|
rc = xen_xlate_map_ballooned_pages(&xen_auto_xlat_grant_frames.pfn,
|
|
&xen_auto_xlat_grant_frames.vaddr,
|
|
xen_auto_xlat_grant_frames.count);
|
|
} else
|
|
rc = gnttab_setup_auto_xlat_frames(xen_grant_frames);
|
|
if (rc) {
|
|
free_percpu(xen_vcpu_info);
|
|
return rc;
|
|
}
|
|
gnttab_init();
|
|
|
|
/*
|
|
* Making sure board specific code will not set up ops for
|
|
* cpu idle and cpu freq.
|
|
*/
|
|
disable_cpuidle();
|
|
disable_cpufreq();
|
|
|
|
xen_init_IRQ();
|
|
|
|
if (request_percpu_irq(xen_events_irq, xen_arm_callback,
|
|
"events", &xen_vcpu)) {
|
|
pr_err("Error request IRQ %d\n", xen_events_irq);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (!xen_kernel_unmapped_at_usr())
|
|
xen_time_setup_guest();
|
|
|
|
if (xen_initial_domain())
|
|
pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
|
|
|
|
return cpuhp_setup_state(CPUHP_AP_ARM_XEN_STARTING,
|
|
"arm/xen:starting", xen_starting_cpu,
|
|
xen_dying_cpu);
|
|
}
|
|
early_initcall(xen_guest_init);
|
|
|
|
static int __init xen_pm_init(void)
|
|
{
|
|
if (!xen_domain())
|
|
return -ENODEV;
|
|
|
|
pm_power_off = xen_power_off;
|
|
register_restart_handler(&xen_restart_nb);
|
|
if (!xen_initial_domain()) {
|
|
struct timespec64 ts;
|
|
xen_read_wallclock(&ts);
|
|
do_settimeofday64(&ts);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
late_initcall(xen_pm_init);
|
|
|
|
|
|
/* empty stubs */
|
|
void xen_arch_pre_suspend(void) { }
|
|
void xen_arch_post_suspend(int suspend_cancelled) { }
|
|
void xen_timer_resume(void) { }
|
|
void xen_arch_resume(void) { }
|
|
void xen_arch_suspend(void) { }
|
|
|
|
|
|
/* In the hypercall.S file. */
|
|
EXPORT_SYMBOL_GPL(HYPERVISOR_event_channel_op);
|
|
EXPORT_SYMBOL_GPL(HYPERVISOR_grant_table_op);
|
|
EXPORT_SYMBOL_GPL(HYPERVISOR_xen_version);
|
|
EXPORT_SYMBOL_GPL(HYPERVISOR_console_io);
|
|
EXPORT_SYMBOL_GPL(HYPERVISOR_sched_op);
|
|
EXPORT_SYMBOL_GPL(HYPERVISOR_hvm_op);
|
|
EXPORT_SYMBOL_GPL(HYPERVISOR_memory_op);
|
|
EXPORT_SYMBOL_GPL(HYPERVISOR_physdev_op);
|
|
EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op);
|
|
EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op_raw);
|
|
EXPORT_SYMBOL_GPL(HYPERVISOR_multicall);
|
|
EXPORT_SYMBOL_GPL(HYPERVISOR_vm_assist);
|
|
EXPORT_SYMBOL_GPL(HYPERVISOR_dm_op);
|
|
EXPORT_SYMBOL_GPL(privcmd_call);
|