diff --git a/Documentation/virt/kvm/x86/mmu.rst b/Documentation/virt/kvm/x86/mmu.rst index 5b1ebad24c77..8739120f4300 100644 --- a/Documentation/virt/kvm/x86/mmu.rst +++ b/Documentation/virt/kvm/x86/mmu.rst @@ -202,6 +202,10 @@ Shadow pages contain the following information: Is 1 if the MMU instance cannot use A/D bits. EPT did not have A/D bits before Haswell; shadow EPT page tables also cannot use A/D bits if the L1 hypervisor does not enable them. + role.passthrough: + The page is not backed by a guest page table, but its first entry + points to one. This is set if NPT uses 5-level page tables (host + CR4.LA57=1) and is shadowing L1's 4-level NPT (L1 CR4.LA57=1). gfn: Either the guest page table containing the translations shadowed by this page, or the base page frame for linear translations. See role.direct. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 752a6d2357ce..f164c6c1514a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -285,7 +285,7 @@ struct kvm_kernel_irq_routing_entry; * minimize the size of kvm_memory_slot.arch.gfn_track, i.e. allows allocating * 2 bytes per gfn instead of 4 bytes per gfn. * - * Indirect upper-level shadow pages are tracked for write-protection via + * Upper-level shadow pages having gptes are tracked for write-protection via * gfn_track. As above, gfn_track is a 16 bit counter, so KVM must not create * more than 2^16-1 upper-level shadow pages at a single gfn, otherwise * gfn_track will overflow and explosions will ensure. @@ -331,7 +331,8 @@ union kvm_mmu_page_role { unsigned smap_andnot_wp:1; unsigned ad_disabled:1; unsigned guest_mode:1; - unsigned :6; + unsigned passthrough:1; + unsigned :5; /* * This is left at the top of the word so that diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index caa208a3fcdc..abdce06f6880 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -734,6 +734,9 @@ static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc) static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index) { + if (sp->role.passthrough) + return sp->gfn; + if (!sp->role.direct) return sp->gfns[index]; @@ -742,6 +745,11 @@ static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index) static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn) { + if (sp->role.passthrough) { + WARN_ON_ONCE(gfn != sp->gfn); + return; + } + if (!sp->role.direct) { sp->gfns[index] = gfn; return; @@ -1858,6 +1866,9 @@ static bool sp_has_gptes(struct kvm_mmu_page *sp) if (sp->role.direct) return false; + if (sp->role.passthrough) + return false; + return true; } @@ -2054,6 +2065,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; role.quadrant = quadrant; } + if (level <= vcpu->arch.mmu->cpu_role.base.level) + role.passthrough = 0; sp_list = &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]; for_each_valid_sp(vcpu->kvm, sp, sp_list) { @@ -4907,6 +4920,9 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0, root_role = cpu_role.base; root_role.level = kvm_mmu_get_tdp_level(vcpu); + if (root_role.level == PT64_ROOT_5LEVEL && + cpu_role.base.level == PT64_ROOT_4LEVEL) + root_role.passthrough = 1; shadow_mmu_init_context(vcpu, context, cpu_role, root_role); kvm_mmu_new_pgd(vcpu, nested_cr3); diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 66f1acf153c4..b025decf610d 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -1007,6 +1007,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) .level = 0xf, .access = 0x7, .quadrant = 0x3, + .passthrough = 0x1, }; /*