mirror of
https://github.com/torvalds/linux.git
synced 2024-11-21 19:41:42 +00:00
KVM x86 misc changes for 6.10:
- Advertise the max mappable GPA in the "guest MAXPHYADDR" CPUID field, which is unused by hardware, so that KVM can communicate its inability to map GPAs that set bits 51:48 due to lack of 5-level paging. Guest firmware is expected to use the information to safely remap BARs in the uppermost GPA space, i.e to avoid placing a BAR at a legal, but unmappable, GPA. - Use vfree() instead of kvfree() for allocations that always use vcalloc() or __vcalloc(). - Don't completely ignore same-value writes to immutable feature MSRs, as doing so results in KVM failing to reject accesses to MSR that aren't supposed to exist given the vCPU model and/or KVM configuration. - Don't mark APICv as being inhibited due to ABSENT if APICv is disabled KVM-wide to avoid confusing debuggers (KVM will never bother clearing the ABSENT inhibit, even if userspace enables in-kernel local APIC). -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEKTobbabEP7vbhhN9OlYIJqCjN/0FAmY+rlEACgkQOlYIJqCj N/3/xQ/7BvNl1aCJSIQy+yanCKK4wV0wWoY/hD+1wVge3zoaLZqLNHeR7fEa3vo+ OSS/pOz+PT6DbkokZYjjVaGs6+pFqaYg5YvRE7SPbj903phm81H7v5ZLtwgOBcXx dG9cSLTaRhos0PxqoiLfmiGK5IDKmWuZyJzhw+nPh2YmxoRDO/4exsLA9xWWhQSh BjPf32cq69fn39Mo/KeANdLR1FEjvKItEty7St5r/OZFxejP8VPe1xuFxHPJn4U+ FBbDe0DMXAPfoAQImBBhHUpm5Rp7Hwbh90tM8xY6rf3hvRZWmMCAX/Hx8C562M2b k6jB13gsoVesatT6lgKs2I0KGL7TSC0jLYG8aeREdBz6AEo5bkBegB5965MZYfGv T43i/zk+Ha5VIEURqE/CtocKF8AEjnUWLaIyL7VsDqaMslmaMdWzr8RouaO1snMT N/mfilzx9/rzltTV67TI8FSykPNxehwNoc9P8l+ulbW1KKIzpZCWxtIpQnT2TGdn 89zAJ7LUbEAOnO+jMsJjld0fcNEmUqiqu9tezHuu0rVYErYqtfVhrWIf52r0AHDK HRY5FNcZzCE+8FFAVDNl92Of+mPeF47RELXNMLAT+1lm91ug4k62GF4UDw7hsbFo 6+ductlj2DZlwxZVGKxKhBDxFg+AfsNCC1fZvYq+D/6ZE51eABo= =9RXP -----END PGP SIGNATURE----- Merge tag 'kvm-x86-misc-6.10' of https://github.com/kvm-x86/linux into HEAD KVM x86 misc changes for 6.10: - Advertise the max mappable GPA in the "guest MAXPHYADDR" CPUID field, which is unused by hardware, so that KVM can communicate its inability to map GPAs that set bits 51:48 due to lack of 5-level paging. Guest firmware is expected to use the information to safely remap BARs in the uppermost GPA space, i.e to avoid placing a BAR at a legal, but unmappable, GPA. - Use vfree() instead of kvfree() for allocations that always use vcalloc() or __vcalloc(). - Don't completely ignore same-value writes to immutable feature MSRs, as doing so results in KVM failing to reject accesses to MSR that aren't supposed to exist given the vCPU model and/or KVM configuration. - Don't mark APICv as being inhibited due to ABSENT if APICv is disabled KVM-wide to avoid confusing debuggers (KVM will never bother clearing the ABSENT inhibit, even if userspace enables in-kernel local APIC).
This commit is contained in:
commit
7d41e24da2
@ -1232,9 +1232,22 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
|
||||
entry->eax = entry->ebx = entry->ecx = 0;
|
||||
break;
|
||||
case 0x80000008: {
|
||||
unsigned g_phys_as = (entry->eax >> 16) & 0xff;
|
||||
unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
|
||||
unsigned phys_as = entry->eax & 0xff;
|
||||
/*
|
||||
* GuestPhysAddrSize (EAX[23:16]) is intended for software
|
||||
* use.
|
||||
*
|
||||
* KVM's ABI is to report the effective MAXPHYADDR for the
|
||||
* guest in PhysAddrSize (phys_as), and the maximum
|
||||
* *addressable* GPA in GuestPhysAddrSize (g_phys_as).
|
||||
*
|
||||
* GuestPhysAddrSize is valid if and only if TDP is enabled,
|
||||
* in which case the max GPA that can be addressed by KVM may
|
||||
* be less than the max GPA that can be legally generated by
|
||||
* the guest, e.g. if MAXPHYADDR>48 but the CPU doesn't
|
||||
* support 5-level TDP.
|
||||
*/
|
||||
unsigned int virt_as = max((entry->eax >> 8) & 0xff, 48U);
|
||||
unsigned int phys_as, g_phys_as;
|
||||
|
||||
/*
|
||||
* If TDP (NPT) is disabled use the adjusted host MAXPHYADDR as
|
||||
@ -1242,16 +1255,24 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
|
||||
* reductions in MAXPHYADDR for memory encryption affect shadow
|
||||
* paging, too.
|
||||
*
|
||||
* If TDP is enabled but an explicit guest MAXPHYADDR is not
|
||||
* provided, use the raw bare metal MAXPHYADDR as reductions to
|
||||
* the HPAs do not affect GPAs.
|
||||
* If TDP is enabled, use the raw bare metal MAXPHYADDR as
|
||||
* reductions to the HPAs do not affect GPAs. The max
|
||||
* addressable GPA is the same as the max effective GPA, except
|
||||
* that it's capped at 48 bits if 5-level TDP isn't supported
|
||||
* (hardware processes bits 51:48 only when walking the fifth
|
||||
* level page table).
|
||||
*/
|
||||
if (!tdp_enabled)
|
||||
g_phys_as = boot_cpu_data.x86_phys_bits;
|
||||
else if (!g_phys_as)
|
||||
if (!tdp_enabled) {
|
||||
phys_as = boot_cpu_data.x86_phys_bits;
|
||||
g_phys_as = 0;
|
||||
} else {
|
||||
phys_as = entry->eax & 0xff;
|
||||
g_phys_as = phys_as;
|
||||
if (kvm_mmu_get_max_tdp_level() < 5)
|
||||
g_phys_as = min(g_phys_as, 48);
|
||||
}
|
||||
|
||||
entry->eax = g_phys_as | (virt_as << 8);
|
||||
entry->eax = phys_as | (virt_as << 8) | (g_phys_as << 16);
|
||||
entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8));
|
||||
entry->edx = 0;
|
||||
cpuid_entry_override(entry, CPUID_8000_0008_EBX);
|
||||
|
@ -100,6 +100,8 @@ static inline u8 kvm_get_shadow_phys_bits(void)
|
||||
return boot_cpu_data.x86_phys_bits;
|
||||
}
|
||||
|
||||
u8 kvm_mmu_get_max_tdp_level(void);
|
||||
|
||||
void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask);
|
||||
void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask);
|
||||
void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only);
|
||||
|
@ -5316,6 +5316,11 @@ static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu)
|
||||
return max_tdp_level;
|
||||
}
|
||||
|
||||
u8 kvm_mmu_get_max_tdp_level(void)
|
||||
{
|
||||
return tdp_root_level ? tdp_root_level : max_tdp_level;
|
||||
}
|
||||
|
||||
static union kvm_mmu_page_role
|
||||
kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu,
|
||||
union kvm_cpu_role cpu_role)
|
||||
|
@ -41,7 +41,7 @@ bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
|
||||
|
||||
void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
|
||||
{
|
||||
kvfree(slot->arch.gfn_write_track);
|
||||
vfree(slot->arch.gfn_write_track);
|
||||
slot->arch.gfn_write_track = NULL;
|
||||
}
|
||||
|
||||
|
@ -1074,7 +1074,7 @@ TRACE_EVENT(kvm_smm_transition,
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracepoint for VT-d posted-interrupts.
|
||||
* Tracepoint for VT-d posted-interrupts and AMD-Vi Guest Virtual APIC.
|
||||
*/
|
||||
TRACE_EVENT(kvm_pi_irte_update,
|
||||
TP_PROTO(unsigned int host_irq, unsigned int vcpu_id,
|
||||
@ -1100,7 +1100,7 @@ TRACE_EVENT(kvm_pi_irte_update,
|
||||
__entry->set = set;
|
||||
),
|
||||
|
||||
TP_printk("VT-d PI is %s for irq %u, vcpu %u, gsi: 0x%x, "
|
||||
TP_printk("PI is %s for irq %u, vcpu %u, gsi: 0x%x, "
|
||||
"gvec: 0x%x, pi_desc_addr: 0x%llx",
|
||||
__entry->set ? "enabled and being updated" : "disabled",
|
||||
__entry->host_irq,
|
||||
|
@ -2233,16 +2233,13 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
|
||||
/*
|
||||
* Disallow writes to immutable feature MSRs after KVM_RUN. KVM does
|
||||
* not support modifying the guest vCPU model on the fly, e.g. changing
|
||||
* the nVMX capabilities while L2 is running is nonsensical. Ignore
|
||||
* the nVMX capabilities while L2 is running is nonsensical. Allow
|
||||
* writes of the same value, e.g. to allow userspace to blindly stuff
|
||||
* all MSRs when emulating RESET.
|
||||
*/
|
||||
if (kvm_vcpu_has_run(vcpu) && kvm_is_immutable_feature_msr(index)) {
|
||||
if (do_get_msr(vcpu, index, &val) || *data != val)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
if (kvm_vcpu_has_run(vcpu) && kvm_is_immutable_feature_msr(index) &&
|
||||
(do_get_msr(vcpu, index, &val) || *data != val))
|
||||
return -EINVAL;
|
||||
|
||||
return kvm_set_msr_ignored_check(vcpu, index, *data, true);
|
||||
}
|
||||
@ -10031,15 +10028,12 @@ static void set_or_clear_apicv_inhibit(unsigned long *inhibits,
|
||||
|
||||
static void kvm_apicv_init(struct kvm *kvm)
|
||||
{
|
||||
unsigned long *inhibits = &kvm->arch.apicv_inhibit_reasons;
|
||||
enum kvm_apicv_inhibit reason = enable_apicv ? APICV_INHIBIT_REASON_ABSENT :
|
||||
APICV_INHIBIT_REASON_DISABLE;
|
||||
|
||||
set_or_clear_apicv_inhibit(&kvm->arch.apicv_inhibit_reasons, reason, true);
|
||||
|
||||
init_rwsem(&kvm->arch.apicv_update_lock);
|
||||
|
||||
set_or_clear_apicv_inhibit(inhibits, APICV_INHIBIT_REASON_ABSENT, true);
|
||||
|
||||
if (!enable_apicv)
|
||||
set_or_clear_apicv_inhibit(inhibits,
|
||||
APICV_INHIBIT_REASON_DISABLE, true);
|
||||
}
|
||||
|
||||
static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id)
|
||||
@ -12805,7 +12799,7 @@ static void memslot_rmap_free(struct kvm_memory_slot *slot)
|
||||
int i;
|
||||
|
||||
for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
|
||||
kvfree(slot->arch.rmap[i]);
|
||||
vfree(slot->arch.rmap[i]);
|
||||
slot->arch.rmap[i] = NULL;
|
||||
}
|
||||
}
|
||||
@ -12817,7 +12811,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
|
||||
memslot_rmap_free(slot);
|
||||
|
||||
for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
|
||||
kvfree(slot->arch.lpage_info[i - 1]);
|
||||
vfree(slot->arch.lpage_info[i - 1]);
|
||||
slot->arch.lpage_info[i - 1] = NULL;
|
||||
}
|
||||
|
||||
@ -12909,7 +12903,7 @@ out_free:
|
||||
memslot_rmap_free(slot);
|
||||
|
||||
for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
|
||||
kvfree(slot->arch.lpage_info[i - 1]);
|
||||
vfree(slot->arch.lpage_info[i - 1]);
|
||||
slot->arch.lpage_info[i - 1] = NULL;
|
||||
}
|
||||
return -ENOMEM;
|
||||
|
@ -974,7 +974,7 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
|
||||
if (!memslot->dirty_bitmap)
|
||||
return;
|
||||
|
||||
kvfree(memslot->dirty_bitmap);
|
||||
vfree(memslot->dirty_bitmap);
|
||||
memslot->dirty_bitmap = NULL;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user