mirror of
https://github.com/torvalds/linux.git
synced 2024-12-04 18:13:04 +00:00
de0f619564
Defer reprogramming counters and handling overflow via KVM_REQ_PMU
when incrementing counters. KVM skips emulated WRMSR in the VM-Exit
fastpath, the fastpath runs with IRQs disabled, skipping instructions
can increment and reprogram counters, reprogramming counters can
sleep, and sleeping is disallowed while IRQs are disabled.
[*] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:580
[*] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 2981888, name: CPU 15/KVM
[*] preempt_count: 1, expected: 0
[*] RCU nest depth: 0, expected: 0
[*] INFO: lockdep is turned off.
[*] irq event stamp: 0
[*] hardirqs last enabled at (0): [<0000000000000000>] 0x0
[*] hardirqs last disabled at (0): [<ffffffff8121222a>] copy_process+0x146a/0x62d0
[*] softirqs last enabled at (0): [<ffffffff81212269>] copy_process+0x14a9/0x62d0
[*] softirqs last disabled at (0): [<0000000000000000>] 0x0
[*] Preemption disabled at:
[*] [<ffffffffc2063fc1>] vcpu_enter_guest+0x1001/0x3dc0 [kvm]
[*] CPU: 17 PID: 2981888 Comm: CPU 15/KVM Kdump: 5.19.0-rc1-g239111db364c-dirty #2
[*] Call Trace:
[*] <TASK>
[*] dump_stack_lvl+0x6c/0x9b
[*] __might_resched.cold+0x22e/0x297
[*] __mutex_lock+0xc0/0x23b0
[*] perf_event_ctx_lock_nested+0x18f/0x340
[*] perf_event_pause+0x1a/0x110
[*] reprogram_counter+0x2af/0x1490 [kvm]
[*] kvm_pmu_trigger_event+0x429/0x950 [kvm]
[*] kvm_skip_emulated_instruction+0x48/0x90 [kvm]
[*] handle_fastpath_set_msr_irqoff+0x349/0x3b0 [kvm]
[*] vmx_vcpu_run+0x268e/0x3b80 [kvm_intel]
[*] vcpu_enter_guest+0x1d22/0x3dc0 [kvm]
Add a field to kvm_pmc to track the previous counter value in order
to defer overflow detection to kvm_pmu_handle_event() (the counter must
be paused before handling overflow, and that may increment the counter).
Opportunistically shrink sizeof(struct kvm_pmc) a bit.
Suggested-by: Wanpeng Li <wanpengli@tencent.com>
Fixes: 9cd803d496
("KVM: x86: Update vPMCs when retiring instructions")
Signed-off-by: Like Xu <likexu@tencent.com>
Link: https://lore.kernel.org/r/20220831085328.45489-6-likexu@tencent.com
[sean: avoid re-triggering KVM_REQ_PMU on overflow, tweak changelog]
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220923001355.3741194-5-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
233 lines
5.6 KiB
C
233 lines
5.6 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* KVM PMU support for AMD
|
|
*
|
|
* Copyright 2015, Red Hat, Inc. and/or its affiliates.
|
|
*
|
|
* Author:
|
|
* Wei Huang <wei@redhat.com>
|
|
*
|
|
* Implementation is based on pmu_intel.c file
|
|
*/
|
|
#include <linux/types.h>
|
|
#include <linux/kvm_host.h>
|
|
#include <linux/perf_event.h>
|
|
#include "x86.h"
|
|
#include "cpuid.h"
|
|
#include "lapic.h"
|
|
#include "pmu.h"
|
|
#include "svm.h"
|
|
|
|
enum pmu_type {
|
|
PMU_TYPE_COUNTER = 0,
|
|
PMU_TYPE_EVNTSEL,
|
|
};
|
|
|
|
static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
|
|
{
|
|
unsigned int num_counters = pmu->nr_arch_gp_counters;
|
|
|
|
if (pmc_idx >= num_counters)
|
|
return NULL;
|
|
|
|
return &pmu->gp_counters[array_index_nospec(pmc_idx, num_counters)];
|
|
}
|
|
|
|
static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
|
|
enum pmu_type type)
|
|
{
|
|
struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
|
|
unsigned int idx;
|
|
|
|
if (!vcpu->kvm->arch.enable_pmu)
|
|
return NULL;
|
|
|
|
switch (msr) {
|
|
case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
|
|
if (!guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE))
|
|
return NULL;
|
|
/*
|
|
* Each PMU counter has a pair of CTL and CTR MSRs. CTLn
|
|
* MSRs (accessed via EVNTSEL) are even, CTRn MSRs are odd.
|
|
*/
|
|
idx = (unsigned int)((msr - MSR_F15H_PERF_CTL0) / 2);
|
|
if (!(msr & 0x1) != (type == PMU_TYPE_EVNTSEL))
|
|
return NULL;
|
|
break;
|
|
case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
|
|
if (type != PMU_TYPE_EVNTSEL)
|
|
return NULL;
|
|
idx = msr - MSR_K7_EVNTSEL0;
|
|
break;
|
|
case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
|
|
if (type != PMU_TYPE_COUNTER)
|
|
return NULL;
|
|
idx = msr - MSR_K7_PERFCTR0;
|
|
break;
|
|
default:
|
|
return NULL;
|
|
}
|
|
|
|
return amd_pmc_idx_to_pmc(pmu, idx);
|
|
}
|
|
|
|
static bool amd_hw_event_available(struct kvm_pmc *pmc)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
/* check if a PMC is enabled by comparing it against global_ctrl bits. Because
|
|
* AMD CPU doesn't have global_ctrl MSR, all PMCs are enabled (return TRUE).
|
|
*/
|
|
static bool amd_pmc_is_enabled(struct kvm_pmc *pmc)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
static bool amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
|
|
{
|
|
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
|
|
|
idx &= ~(3u << 30);
|
|
|
|
return idx < pmu->nr_arch_gp_counters;
|
|
}
|
|
|
|
/* idx is the ECX register of RDPMC instruction */
|
|
static struct kvm_pmc *amd_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
|
|
unsigned int idx, u64 *mask)
|
|
{
|
|
return amd_pmc_idx_to_pmc(vcpu_to_pmu(vcpu), idx & ~(3u << 30));
|
|
}
|
|
|
|
static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
|
|
{
|
|
/* All MSRs refer to exactly one PMC, so msr_idx_to_pmc is enough. */
|
|
return false;
|
|
}
|
|
|
|
static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
|
|
{
|
|
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
|
struct kvm_pmc *pmc;
|
|
|
|
pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
|
|
pmc = pmc ? pmc : get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL);
|
|
|
|
return pmc;
|
|
}
|
|
|
|
static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|
{
|
|
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
|
struct kvm_pmc *pmc;
|
|
u32 msr = msr_info->index;
|
|
|
|
/* MSR_PERFCTRn */
|
|
pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
|
|
if (pmc) {
|
|
msr_info->data = pmc_read_counter(pmc);
|
|
return 0;
|
|
}
|
|
/* MSR_EVNTSELn */
|
|
pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL);
|
|
if (pmc) {
|
|
msr_info->data = pmc->eventsel;
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|
{
|
|
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
|
struct kvm_pmc *pmc;
|
|
u32 msr = msr_info->index;
|
|
u64 data = msr_info->data;
|
|
|
|
/* MSR_PERFCTRn */
|
|
pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
|
|
if (pmc) {
|
|
pmc->counter += data - pmc_read_counter(pmc);
|
|
pmc_update_sample_period(pmc);
|
|
return 0;
|
|
}
|
|
/* MSR_EVNTSELn */
|
|
pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL);
|
|
if (pmc) {
|
|
data &= ~pmu->reserved_bits;
|
|
if (data != pmc->eventsel) {
|
|
pmc->eventsel = data;
|
|
kvm_pmu_request_counter_reprogam(pmc);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
|
|
|
if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE))
|
|
pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS_CORE;
|
|
else
|
|
pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS;
|
|
|
|
pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1;
|
|
pmu->reserved_bits = 0xfffffff000280000ull;
|
|
pmu->raw_event_mask = AMD64_RAW_EVENT_MASK;
|
|
pmu->version = 1;
|
|
/* not applicable to AMD; but clean them to prevent any fall out */
|
|
pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
|
|
pmu->nr_arch_fixed_counters = 0;
|
|
pmu->global_status = 0;
|
|
bitmap_set(pmu->all_valid_pmc_idx, 0, pmu->nr_arch_gp_counters);
|
|
}
|
|
|
|
static void amd_pmu_init(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
|
int i;
|
|
|
|
BUILD_BUG_ON(KVM_AMD_PMC_MAX_GENERIC > AMD64_NUM_COUNTERS_CORE);
|
|
BUILD_BUG_ON(KVM_AMD_PMC_MAX_GENERIC > INTEL_PMC_MAX_GENERIC);
|
|
|
|
for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC ; i++) {
|
|
pmu->gp_counters[i].type = KVM_PMC_GP;
|
|
pmu->gp_counters[i].vcpu = vcpu;
|
|
pmu->gp_counters[i].idx = i;
|
|
pmu->gp_counters[i].current_config = 0;
|
|
}
|
|
}
|
|
|
|
static void amd_pmu_reset(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
|
int i;
|
|
|
|
for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC; i++) {
|
|
struct kvm_pmc *pmc = &pmu->gp_counters[i];
|
|
|
|
pmc_stop_counter(pmc);
|
|
pmc->counter = pmc->prev_counter = pmc->eventsel = 0;
|
|
}
|
|
}
|
|
|
|
struct kvm_pmu_ops amd_pmu_ops __initdata = {
|
|
.hw_event_available = amd_hw_event_available,
|
|
.pmc_is_enabled = amd_pmc_is_enabled,
|
|
.pmc_idx_to_pmc = amd_pmc_idx_to_pmc,
|
|
.rdpmc_ecx_to_pmc = amd_rdpmc_ecx_to_pmc,
|
|
.msr_idx_to_pmc = amd_msr_idx_to_pmc,
|
|
.is_valid_rdpmc_ecx = amd_is_valid_rdpmc_ecx,
|
|
.is_valid_msr = amd_is_valid_msr,
|
|
.get_msr = amd_pmu_get_msr,
|
|
.set_msr = amd_pmu_set_msr,
|
|
.refresh = amd_pmu_refresh,
|
|
.init = amd_pmu_init,
|
|
.reset = amd_pmu_reset,
|
|
};
|