KVM: x86/pmu: Add IA32_PEBS_ENABLE MSR emulation for extended PEBS
If IA32_PERF_CAPABILITIES.PEBS_BASELINE [bit 14] is set, the IA32_PEBS_ENABLE MSR exists and all architecturally enumerated fixed and general-purpose counters have corresponding bits in IA32_PEBS_ENABLE that enable generation of PEBS records. The general-purpose counter bits start at bit IA32_PEBS_ENABLE[0], and the fixed counter bits start at bit IA32_PEBS_ENABLE[32]. When guest PEBS is enabled, the IA32_PEBS_ENABLE MSR will be added to the perf_guest_switch_msr() and atomically switched during the VMX transitions just like CORE_PERF_GLOBAL_CTRL MSR. Based on whether the platform supports x86_pmu.pebs_ept, it has also refactored the way to add more msrs to arr[] in intel_guest_get_msrs() for extensibility. Originally-by: Andi Kleen <ak@linux.intel.com> Co-developed-by: Kan Liang <kan.liang@linux.intel.com> Signed-off-by: Kan Liang <kan.liang@linux.intel.com> Co-developed-by: Luwei Kang <luwei.kang@intel.com> Signed-off-by: Luwei Kang <luwei.kang@intel.com> Signed-off-by: Like Xu <like.xu@linux.intel.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Message-Id: <20220411101946.20262-8-likexu@tencent.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
		
							parent
							
								
									0d23dc34a7
								
							
						
					
					
						commit
						c59a1f106f
					
				| @ -3969,33 +3969,72 @@ static int intel_pmu_hw_config(struct perf_event *event) | |||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * Currently, the only caller of this function is the atomic_switch_perf_msrs(). | ||||||
|  |  * The host perf conext helps to prepare the values of the real hardware for | ||||||
|  |  * a set of msrs that need to be switched atomically in a vmx transaction. | ||||||
|  |  * | ||||||
|  |  * For example, the pseudocode needed to add a new msr should look like: | ||||||
|  |  * | ||||||
|  |  * arr[(*nr)++] = (struct perf_guest_switch_msr){ | ||||||
|  |  *	.msr = the hardware msr address, | ||||||
|  |  *	.host = the value the hardware has when it doesn't run a guest, | ||||||
|  |  *	.guest = the value the hardware has when it runs a guest, | ||||||
|  |  * }; | ||||||
|  |  * | ||||||
|  |  * These values have nothing to do with the emulated values the guest sees | ||||||
|  |  * when it uses {RD,WR}MSR, which should be handled by the KVM context, | ||||||
|  |  * specifically in the intel_pmu_{get,set}_msr(). | ||||||
|  |  */ | ||||||
| static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) | static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) | ||||||
| { | { | ||||||
| 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); | 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); | ||||||
| 	struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; | 	struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; | ||||||
| 	u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl); | 	u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl); | ||||||
|  | 	u64 pebs_mask = cpuc->pebs_enabled & x86_pmu.pebs_capable; | ||||||
|  | 	int global_ctrl, pebs_enable; | ||||||
| 
 | 
 | ||||||
| 	arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; | 	*nr = 0; | ||||||
| 	arr[0].host = intel_ctrl & ~cpuc->intel_ctrl_guest_mask; | 	global_ctrl = (*nr)++; | ||||||
| 	arr[0].guest = intel_ctrl & ~cpuc->intel_ctrl_host_mask; | 	arr[global_ctrl] = (struct perf_guest_switch_msr){ | ||||||
| 	arr[0].guest &= ~(cpuc->pebs_enabled & x86_pmu.pebs_capable); | 		.msr = MSR_CORE_PERF_GLOBAL_CTRL, | ||||||
| 	*nr = 1; | 		.host = intel_ctrl & ~cpuc->intel_ctrl_guest_mask, | ||||||
|  | 		.guest = intel_ctrl & (~cpuc->intel_ctrl_host_mask | ~pebs_mask), | ||||||
|  | 	}; | ||||||
| 
 | 
 | ||||||
| 	if (x86_pmu.pebs && x86_pmu.pebs_no_isolation) { | 	if (!x86_pmu.pebs) | ||||||
| 		/*
 | 		return arr; | ||||||
| 		 * If PMU counter has PEBS enabled it is not enough to | 
 | ||||||
| 		 * disable counter on a guest entry since PEBS memory | 	/*
 | ||||||
| 		 * write can overshoot guest entry and corrupt guest | 	 * If PMU counter has PEBS enabled it is not enough to | ||||||
| 		 * memory. Disabling PEBS solves the problem. | 	 * disable counter on a guest entry since PEBS memory | ||||||
| 		 * | 	 * write can overshoot guest entry and corrupt guest | ||||||
| 		 * Don't do this if the CPU already enforces it. | 	 * memory. Disabling PEBS solves the problem. | ||||||
| 		 */ | 	 * | ||||||
| 		arr[1].msr = MSR_IA32_PEBS_ENABLE; | 	 * Don't do this if the CPU already enforces it. | ||||||
| 		arr[1].host = cpuc->pebs_enabled; | 	 */ | ||||||
| 		arr[1].guest = 0; | 	if (x86_pmu.pebs_no_isolation) { | ||||||
| 		*nr = 2; | 		arr[(*nr)++] = (struct perf_guest_switch_msr){ | ||||||
|  | 			.msr = MSR_IA32_PEBS_ENABLE, | ||||||
|  | 			.host = cpuc->pebs_enabled, | ||||||
|  | 			.guest = 0, | ||||||
|  | 		}; | ||||||
|  | 		return arr; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | 	if (!x86_pmu.pebs_ept) | ||||||
|  | 		return arr; | ||||||
|  | 	pebs_enable = (*nr)++; | ||||||
|  | 
 | ||||||
|  | 	arr[pebs_enable] = (struct perf_guest_switch_msr){ | ||||||
|  | 		.msr = MSR_IA32_PEBS_ENABLE, | ||||||
|  | 		.host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask, | ||||||
|  | 		.guest = pebs_mask & ~cpuc->intel_ctrl_host_mask, | ||||||
|  | 	}; | ||||||
|  | 
 | ||||||
|  | 	/* Set hw GLOBAL_CTRL bits for PEBS counter when it runs for guest */ | ||||||
|  | 	arr[0].guest |= arr[*nr].guest; | ||||||
|  | 
 | ||||||
| 	return arr; | 	return arr; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -521,6 +521,9 @@ struct kvm_pmu { | |||||||
| 	DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX); | 	DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX); | ||||||
| 	DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX); | 	DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX); | ||||||
| 
 | 
 | ||||||
|  | 	u64 pebs_enable; | ||||||
|  | 	u64 pebs_enable_mask; | ||||||
|  | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * The gate to release perf_events not marked in | 	 * The gate to release perf_events not marked in | ||||||
| 	 * pmc_in_use only once in a vcpu time slice. | 	 * pmc_in_use only once in a vcpu time slice. | ||||||
|  | |||||||
| @ -196,6 +196,12 @@ | |||||||
| #define PERF_CAP_PT_IDX			16 | #define PERF_CAP_PT_IDX			16 | ||||||
| 
 | 
 | ||||||
| #define MSR_PEBS_LD_LAT_THRESHOLD	0x000003f6 | #define MSR_PEBS_LD_LAT_THRESHOLD	0x000003f6 | ||||||
|  | #define PERF_CAP_PEBS_TRAP             BIT_ULL(6) | ||||||
|  | #define PERF_CAP_ARCH_REG              BIT_ULL(7) | ||||||
|  | #define PERF_CAP_PEBS_FORMAT           0xf00 | ||||||
|  | #define PERF_CAP_PEBS_BASELINE         BIT_ULL(14) | ||||||
|  | #define PERF_CAP_PEBS_MASK	(PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \ | ||||||
|  | 				 PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE) | ||||||
| 
 | 
 | ||||||
| #define MSR_IA32_RTIT_CTL		0x00000570 | #define MSR_IA32_RTIT_CTL		0x00000570 | ||||||
| #define RTIT_CTL_TRACEEN		BIT(0) | #define RTIT_CTL_TRACEEN		BIT(0) | ||||||
|  | |||||||
| @ -214,6 +214,9 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) | |||||||
| 	case MSR_CORE_PERF_GLOBAL_OVF_CTRL: | 	case MSR_CORE_PERF_GLOBAL_OVF_CTRL: | ||||||
| 		ret = pmu->version > 1; | 		ret = pmu->version > 1; | ||||||
| 		break; | 		break; | ||||||
|  | 	case MSR_IA32_PEBS_ENABLE: | ||||||
|  | 		ret = vcpu->arch.perf_capabilities & PERF_CAP_PEBS_FORMAT; | ||||||
|  | 		break; | ||||||
| 	default: | 	default: | ||||||
| 		ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) || | 		ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) || | ||||||
| 			get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) || | 			get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) || | ||||||
| @ -361,6 +364,9 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||||||
| 	case MSR_CORE_PERF_GLOBAL_OVF_CTRL: | 	case MSR_CORE_PERF_GLOBAL_OVF_CTRL: | ||||||
| 		msr_info->data = 0; | 		msr_info->data = 0; | ||||||
| 		return 0; | 		return 0; | ||||||
|  | 	case MSR_IA32_PEBS_ENABLE: | ||||||
|  | 		msr_info->data = pmu->pebs_enable; | ||||||
|  | 		return 0; | ||||||
| 	default: | 	default: | ||||||
| 		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || | 		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || | ||||||
| 		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { | 		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { | ||||||
| @ -421,6 +427,14 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||||||
| 			return 0; | 			return 0; | ||||||
| 		} | 		} | ||||||
| 		break; | 		break; | ||||||
|  | 	case MSR_IA32_PEBS_ENABLE: | ||||||
|  | 		if (pmu->pebs_enable == data) | ||||||
|  | 			return 0; | ||||||
|  | 		if (!(data & pmu->pebs_enable_mask)) { | ||||||
|  | 			pmu->pebs_enable = data; | ||||||
|  | 			return 0; | ||||||
|  | 		} | ||||||
|  | 		break; | ||||||
| 	default: | 	default: | ||||||
| 		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || | 		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || | ||||||
| 		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { | 		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { | ||||||
| @ -489,6 +503,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) | |||||||
| 	pmu->reserved_bits = 0xffffffff00200000ull; | 	pmu->reserved_bits = 0xffffffff00200000ull; | ||||||
| 	pmu->raw_event_mask = X86_RAW_EVENT_MASK; | 	pmu->raw_event_mask = X86_RAW_EVENT_MASK; | ||||||
| 	pmu->fixed_ctr_ctrl_mask = ~0ull; | 	pmu->fixed_ctr_ctrl_mask = ~0ull; | ||||||
|  | 	pmu->pebs_enable_mask = ~0ull; | ||||||
| 
 | 
 | ||||||
| 	entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); | 	entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); | ||||||
| 	if (!entry || !vcpu->kvm->arch.enable_pmu) | 	if (!entry || !vcpu->kvm->arch.enable_pmu) | ||||||
| @ -560,6 +575,22 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) | |||||||
| 
 | 
 | ||||||
| 	if (lbr_desc->records.nr) | 	if (lbr_desc->records.nr) | ||||||
| 		bitmap_set(pmu->all_valid_pmc_idx, INTEL_PMC_IDX_FIXED_VLBR, 1); | 		bitmap_set(pmu->all_valid_pmc_idx, INTEL_PMC_IDX_FIXED_VLBR, 1); | ||||||
|  | 
 | ||||||
|  | 	if (vcpu->arch.perf_capabilities & PERF_CAP_PEBS_FORMAT) { | ||||||
|  | 		if (vcpu->arch.perf_capabilities & PERF_CAP_PEBS_BASELINE) { | ||||||
|  | 			pmu->pebs_enable_mask = ~pmu->global_ctrl; | ||||||
|  | 			pmu->reserved_bits &= ~ICL_EVENTSEL_ADAPTIVE; | ||||||
|  | 			for (i = 0; i < pmu->nr_arch_fixed_counters; i++) { | ||||||
|  | 				pmu->fixed_ctr_ctrl_mask &= | ||||||
|  | 					~(1ULL << (INTEL_PMC_IDX_FIXED + i * 4)); | ||||||
|  | 			} | ||||||
|  | 		} else { | ||||||
|  | 			pmu->pebs_enable_mask = | ||||||
|  | 				~((1ull << pmu->nr_arch_gp_counters) - 1); | ||||||
|  | 		} | ||||||
|  | 	} else { | ||||||
|  | 		vcpu->arch.perf_capabilities &= ~PERF_CAP_PEBS_MASK; | ||||||
|  | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void intel_pmu_init(struct kvm_vcpu *vcpu) | static void intel_pmu_init(struct kvm_vcpu *vcpu) | ||||||
|  | |||||||
| @ -1448,6 +1448,7 @@ static const u32 msrs_to_save_all[] = { | |||||||
| 	MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13, | 	MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13, | ||||||
| 	MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15, | 	MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15, | ||||||
| 	MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17, | 	MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17, | ||||||
|  | 	MSR_IA32_PEBS_ENABLE, | ||||||
| 
 | 
 | ||||||
| 	MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3, | 	MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3, | ||||||
| 	MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3, | 	MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3, | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user