forked from Minki/linux
4df4cb9e99
Hyper-V has historically initialized stimer-based clockevents late in the process of onlining a CPU because clockevents depend on stimer interrupts. In the original Hyper-V design, stimer interrupts generate a VMbus message, so the VMbus machinery must be running first, and VMbus can't be initialized until relatively late. On x86/64, LAPIC timer based clockevents are used during early initialization before VMbus and stimer-based clockevents are ready, and again during CPU offlining after the stimer clockevents have been shut down. Unfortunately, this design creates problems when offlining CPUs for hibernation or other purposes. stimer-based clockevents are shut down relatively early in the offlining process, so clockevents_unbind_device() must be used to fallback to the LAPIC-based clockevents for the remainder of the offlining process. Furthermore, the late initialization and early shutdown of stimer-based clockevents doesn't work well on ARM64 since there is no other timer like the LAPIC to fallback to. So CPU onlining and offlining doesn't work properly. Fix this by recognizing that stimer Direct Mode is the normal path for newer versions of Hyper-V on x86/64, and the only path on other architectures. With stimer Direct Mode, stimer interrupts don't require any VMbus machinery. stimer clockevents can be initialized and shut down consistent with how it is done for other clockevent devices. While the old VMbus-based stimer interrupts must still be supported for backward compatibility on x86, that mode of operation can be treated as legacy. So add a new Hyper-V stimer entry in the CPU hotplug state list, and use that new state when in Direct Mode. Update the Hyper-V clocksource driver to allocate and initialize stimer clockevents earlier during boot. Update Hyper-V initialization and the VMbus driver to use this new design. As a result, the LAPIC timer is no longer used during boot or CPU onlining/offlining and clockevents_unbind_device() is not called. But retain the old design as a legacy implementation for older versions of Hyper-V that don't support Direct Mode. Signed-off-by: Michael Kelley <mikelley@microsoft.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Dexuan Cui <decui@microsoft.com> Reviewed-by: Dexuan Cui <decui@microsoft.com> Link: https://lkml.kernel.org/r/1573607467-9456-1-git-send-email-mikelley@microsoft.com
286 lines
7.0 KiB
C
286 lines
7.0 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* Copyright (c) 2009, Microsoft Corporation.
|
|
*
|
|
* Authors:
|
|
* Haiyang Zhang <haiyangz@microsoft.com>
|
|
* Hank Janssen <hjanssen@microsoft.com>
|
|
*/
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/hyperv.h>
|
|
#include <linux/version.h>
|
|
#include <linux/random.h>
|
|
#include <linux/clockchips.h>
|
|
#include <clocksource/hyperv_timer.h>
|
|
#include <asm/mshyperv.h>
|
|
#include "hyperv_vmbus.h"
|
|
|
|
/* The one and only */
|
|
struct hv_context hv_context;
|
|
|
|
/*
|
|
* hv_init - Main initialization routine.
|
|
*
|
|
* This routine must be called before any other routines in here are called
|
|
*/
|
|
int hv_init(void)
|
|
{
|
|
hv_context.cpu_context = alloc_percpu(struct hv_per_cpu_context);
|
|
if (!hv_context.cpu_context)
|
|
return -ENOMEM;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* hv_post_message - Post a message using the hypervisor message IPC.
|
|
*
|
|
* This involves a hypercall.
|
|
*/
|
|
int hv_post_message(union hv_connection_id connection_id,
|
|
enum hv_message_type message_type,
|
|
void *payload, size_t payload_size)
|
|
{
|
|
struct hv_input_post_message *aligned_msg;
|
|
struct hv_per_cpu_context *hv_cpu;
|
|
u64 status;
|
|
|
|
if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
|
|
return -EMSGSIZE;
|
|
|
|
hv_cpu = get_cpu_ptr(hv_context.cpu_context);
|
|
aligned_msg = hv_cpu->post_msg_page;
|
|
aligned_msg->connectionid = connection_id;
|
|
aligned_msg->reserved = 0;
|
|
aligned_msg->message_type = message_type;
|
|
aligned_msg->payload_size = payload_size;
|
|
memcpy((void *)aligned_msg->payload, payload, payload_size);
|
|
|
|
status = hv_do_hypercall(HVCALL_POST_MESSAGE, aligned_msg, NULL);
|
|
|
|
/* Preemption must remain disabled until after the hypercall
|
|
* so some other thread can't get scheduled onto this cpu and
|
|
* corrupt the per-cpu post_msg_page
|
|
*/
|
|
put_cpu_ptr(hv_cpu);
|
|
|
|
return status & 0xFFFF;
|
|
}
|
|
|
|
int hv_synic_alloc(void)
|
|
{
|
|
int cpu;
|
|
struct hv_per_cpu_context *hv_cpu;
|
|
|
|
/*
|
|
* First, zero all per-cpu memory areas so hv_synic_free() can
|
|
* detect what memory has been allocated and cleanup properly
|
|
* after any failures.
|
|
*/
|
|
for_each_present_cpu(cpu) {
|
|
hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
|
|
memset(hv_cpu, 0, sizeof(*hv_cpu));
|
|
}
|
|
|
|
hv_context.hv_numa_map = kcalloc(nr_node_ids, sizeof(struct cpumask),
|
|
GFP_KERNEL);
|
|
if (hv_context.hv_numa_map == NULL) {
|
|
pr_err("Unable to allocate NUMA map\n");
|
|
goto err;
|
|
}
|
|
|
|
for_each_present_cpu(cpu) {
|
|
hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
|
|
|
|
tasklet_init(&hv_cpu->msg_dpc,
|
|
vmbus_on_msg_dpc, (unsigned long) hv_cpu);
|
|
|
|
hv_cpu->synic_message_page =
|
|
(void *)get_zeroed_page(GFP_ATOMIC);
|
|
if (hv_cpu->synic_message_page == NULL) {
|
|
pr_err("Unable to allocate SYNIC message page\n");
|
|
goto err;
|
|
}
|
|
|
|
hv_cpu->synic_event_page = (void *)get_zeroed_page(GFP_ATOMIC);
|
|
if (hv_cpu->synic_event_page == NULL) {
|
|
pr_err("Unable to allocate SYNIC event page\n");
|
|
goto err;
|
|
}
|
|
|
|
hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC);
|
|
if (hv_cpu->post_msg_page == NULL) {
|
|
pr_err("Unable to allocate post msg page\n");
|
|
goto err;
|
|
}
|
|
|
|
INIT_LIST_HEAD(&hv_cpu->chan_list);
|
|
}
|
|
|
|
return 0;
|
|
err:
|
|
/*
|
|
* Any memory allocations that succeeded will be freed when
|
|
* the caller cleans up by calling hv_synic_free()
|
|
*/
|
|
return -ENOMEM;
|
|
}
|
|
|
|
|
|
void hv_synic_free(void)
|
|
{
|
|
int cpu;
|
|
|
|
for_each_present_cpu(cpu) {
|
|
struct hv_per_cpu_context *hv_cpu
|
|
= per_cpu_ptr(hv_context.cpu_context, cpu);
|
|
|
|
free_page((unsigned long)hv_cpu->synic_event_page);
|
|
free_page((unsigned long)hv_cpu->synic_message_page);
|
|
free_page((unsigned long)hv_cpu->post_msg_page);
|
|
}
|
|
|
|
kfree(hv_context.hv_numa_map);
|
|
}
|
|
|
|
/*
|
|
* hv_synic_init - Initialize the Synthetic Interrupt Controller.
|
|
*
|
|
* If it is already initialized by another entity (ie x2v shim), we need to
|
|
* retrieve the initialized message and event pages. Otherwise, we create and
|
|
* initialize the message and event pages.
|
|
*/
|
|
void hv_synic_enable_regs(unsigned int cpu)
|
|
{
|
|
struct hv_per_cpu_context *hv_cpu
|
|
= per_cpu_ptr(hv_context.cpu_context, cpu);
|
|
union hv_synic_simp simp;
|
|
union hv_synic_siefp siefp;
|
|
union hv_synic_sint shared_sint;
|
|
union hv_synic_scontrol sctrl;
|
|
|
|
/* Setup the Synic's message page */
|
|
hv_get_simp(simp.as_uint64);
|
|
simp.simp_enabled = 1;
|
|
simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
|
|
>> PAGE_SHIFT;
|
|
|
|
hv_set_simp(simp.as_uint64);
|
|
|
|
/* Setup the Synic's event page */
|
|
hv_get_siefp(siefp.as_uint64);
|
|
siefp.siefp_enabled = 1;
|
|
siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
|
|
>> PAGE_SHIFT;
|
|
|
|
hv_set_siefp(siefp.as_uint64);
|
|
|
|
/* Setup the shared SINT. */
|
|
hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
|
|
|
|
shared_sint.vector = HYPERVISOR_CALLBACK_VECTOR;
|
|
shared_sint.masked = false;
|
|
if (ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED)
|
|
shared_sint.auto_eoi = false;
|
|
else
|
|
shared_sint.auto_eoi = true;
|
|
|
|
hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
|
|
|
|
/* Enable the global synic bit */
|
|
hv_get_synic_state(sctrl.as_uint64);
|
|
sctrl.enable = 1;
|
|
|
|
hv_set_synic_state(sctrl.as_uint64);
|
|
}
|
|
|
|
int hv_synic_init(unsigned int cpu)
|
|
{
|
|
hv_synic_enable_regs(cpu);
|
|
|
|
hv_stimer_legacy_init(cpu, VMBUS_MESSAGE_SINT);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* hv_synic_cleanup - Cleanup routine for hv_synic_init().
|
|
*/
|
|
void hv_synic_disable_regs(unsigned int cpu)
|
|
{
|
|
union hv_synic_sint shared_sint;
|
|
union hv_synic_simp simp;
|
|
union hv_synic_siefp siefp;
|
|
union hv_synic_scontrol sctrl;
|
|
|
|
hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
|
|
|
|
shared_sint.masked = 1;
|
|
|
|
/* Need to correctly cleanup in the case of SMP!!! */
|
|
/* Disable the interrupt */
|
|
hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
|
|
|
|
hv_get_simp(simp.as_uint64);
|
|
simp.simp_enabled = 0;
|
|
simp.base_simp_gpa = 0;
|
|
|
|
hv_set_simp(simp.as_uint64);
|
|
|
|
hv_get_siefp(siefp.as_uint64);
|
|
siefp.siefp_enabled = 0;
|
|
siefp.base_siefp_gpa = 0;
|
|
|
|
hv_set_siefp(siefp.as_uint64);
|
|
|
|
/* Disable the global synic bit */
|
|
hv_get_synic_state(sctrl.as_uint64);
|
|
sctrl.enable = 0;
|
|
hv_set_synic_state(sctrl.as_uint64);
|
|
}
|
|
|
|
int hv_synic_cleanup(unsigned int cpu)
|
|
{
|
|
struct vmbus_channel *channel, *sc;
|
|
bool channel_found = false;
|
|
unsigned long flags;
|
|
|
|
/*
|
|
* Search for channels which are bound to the CPU we're about to
|
|
* cleanup. In case we find one and vmbus is still connected we need to
|
|
* fail, this will effectively prevent CPU offlining. There is no way
|
|
* we can re-bind channels to different CPUs for now.
|
|
*/
|
|
mutex_lock(&vmbus_connection.channel_mutex);
|
|
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
|
|
if (channel->target_cpu == cpu) {
|
|
channel_found = true;
|
|
break;
|
|
}
|
|
spin_lock_irqsave(&channel->lock, flags);
|
|
list_for_each_entry(sc, &channel->sc_list, sc_list) {
|
|
if (sc->target_cpu == cpu) {
|
|
channel_found = true;
|
|
break;
|
|
}
|
|
}
|
|
spin_unlock_irqrestore(&channel->lock, flags);
|
|
if (channel_found)
|
|
break;
|
|
}
|
|
mutex_unlock(&vmbus_connection.channel_mutex);
|
|
|
|
if (channel_found && vmbus_connection.conn_state == CONNECTED)
|
|
return -EBUSY;
|
|
|
|
hv_stimer_legacy_cleanup(cpu);
|
|
|
|
hv_synic_disable_regs(cpu);
|
|
|
|
return 0;
|
|
}
|