linux/arch/arm64/include/asm/kvm_mmu.h

297 lines
8.6 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012,2013 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*/
#ifndef __ARM64_KVM_MMU_H__
#define __ARM64_KVM_MMU_H__
#include <asm/page.h>
#include <asm/memory.h>
#include <asm/mmu.h>
#include <asm/cpufeature.h>
/*
* As ARMv8.0 only has the TTBR0_EL2 register, we cannot express
* "negative" addresses. This makes it impossible to directly share
* mappings with the kernel.
*
* Instead, give the HYP mode its own VA region at a fixed offset from
* the kernel by just masking the top bits (which are all ones for a
* kernel address). We need to find out how many bits to mask.
*
* We want to build a set of page tables that cover both parts of the
* idmap (the trampoline page used to initialize EL2), and our normal
* runtime VA space, at the same time.
*
* Given that the kernel uses VA_BITS for its entire address space,
* and that half of that space (VA_BITS - 1) is used for the linear
* mapping, we can also limit the EL2 space to (VA_BITS - 1).
*
* The main question is "Within the VA_BITS space, does EL2 use the
* top or the bottom half of that space to shadow the kernel's linear
* mapping?". As we need to idmap the trampoline page, this is
* determined by the range in which this page lives.
*
* If the page is in the bottom half, we have to use the top half. If
* the page is in the top half, we have to use the bottom half:
*
* T = __pa_symbol(__hyp_idmap_text_start)
* if (T & BIT(VA_BITS - 1))
* HYP_VA_MIN = 0 //idmap in upper half
* else
* HYP_VA_MIN = 1 << (VA_BITS - 1)
* HYP_VA_MAX = HYP_VA_MIN + (1 << (VA_BITS - 1)) - 1
*
* When using VHE, there are no separate hyp mappings and all KVM
* functionality is already mapped as part of the main kernel
* mappings, and none of this applies in that case.
*/
#ifdef __ASSEMBLY__
#include <asm/alternative.h>
/*
* Convert a kernel VA into a HYP VA.
* reg: VA to be converted.
*
* The actual code generation takes place in kvm_update_va_mask, and
* the instructions below are only there to reserve the space and
* perform the register allocation (kvm_update_va_mask uses the
* specific registers encoded in the instructions).
*/
.macro kern_hyp_va reg
alternative_cb kvm_update_va_mask
and \reg, \reg, #1 /* mask with va_mask */
ror \reg, \reg, #1 /* rotate to the first tag bit */
add \reg, \reg, #0 /* insert the low 12 bits of the tag */
add \reg, \reg, #0, lsl 12 /* insert the top 12 bits of the tag */
ror \reg, \reg, #63 /* rotate back */
alternative_cb_end
.endm
/*
* Convert a hypervisor VA to a PA
* reg: hypervisor address to be converted in place
* tmp: temporary register
*/
.macro hyp_pa reg, tmp
ldr_l \tmp, hyp_physvirt_offset
add \reg, \reg, \tmp
.endm
/*
* Convert a hypervisor VA to a kernel image address
* reg: hypervisor address to be converted in place
* tmp: temporary register
*
* The actual code generation takes place in kvm_get_kimage_voffset, and
* the instructions below are only there to reserve the space and
* perform the register allocation (kvm_get_kimage_voffset uses the
* specific registers encoded in the instructions).
*/
.macro hyp_kimg_va reg, tmp
/* Convert hyp VA -> PA. */
hyp_pa \reg, \tmp
/* Load kimage_voffset. */
alternative_cb kvm_get_kimage_voffset
movz \tmp, #0
movk \tmp, #0, lsl #16
movk \tmp, #0, lsl #32
movk \tmp, #0, lsl #48
alternative_cb_end
/* Convert PA -> kimg VA. */
add \reg, \reg, \tmp
.endm
#else
mm: reorder includes after introduction of linux/pgtable.h The replacement of <asm/pgrable.h> with <linux/pgtable.h> made the include of the latter in the middle of asm includes. Fix this up with the aid of the below script and manual adjustments here and there. import sys import re if len(sys.argv) is not 3: print "USAGE: %s <file> <header>" % (sys.argv[0]) sys.exit(1) hdr_to_move="#include <linux/%s>" % sys.argv[2] moved = False in_hdrs = False with open(sys.argv[1], "r") as f: lines = f.readlines() for _line in lines: line = _line.rstrip(' ') if line == hdr_to_move: continue if line.startswith("#include <linux/"): in_hdrs = True elif not moved and in_hdrs: moved = True print hdr_to_move print line Signed-off-by: Mike Rapoport <rppt@linux.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Cain <bcain@codeaurora.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Chris Zankel <chris@zankel.net> Cc: "David S. Miller" <davem@davemloft.net> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Greentime Hu <green.hu@gmail.com> Cc: Greg Ungerer <gerg@linux-m68k.org> Cc: Guan Xuetao <gxt@pku.edu.cn> Cc: Guo Ren <guoren@kernel.org> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Helge Deller <deller@gmx.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: Ley Foon Tan <ley.foon.tan@intel.com> Cc: Mark Salter <msalter@redhat.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Matt Turner <mattst88@gmail.com> Cc: Max Filippov <jcmvbkbc@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Simek <monstr@monstr.eu> Cc: Nick Hu <nickhu@andestech.com> Cc: Paul Walmsley <paul.walmsley@sifive.com> Cc: Richard Weinberger <richard@nod.at> Cc: Rich Felker <dalias@libc.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Stafford Horne <shorne@gmail.com> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tony Luck <tony.luck@intel.com> Cc: Vincent Chen <deanbo422@gmail.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Will Deacon <will@kernel.org> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Link: http://lkml.kernel.org/r/20200514170327.31389-4-rppt@kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-06-09 04:32:42 +00:00
#include <linux/pgtable.h>
arm64: KVM: Implement 48 VA support for KVM EL2 and Stage-2 This patch adds the necessary support for all host kernel PGSIZE and VA_SPACE configuration options for both EL2 and the Stage-2 page tables. However, for 40bit and 42bit PARange systems, the architecture mandates that VTCR_EL2.SL0 is maximum 1, resulting in fewer levels of stage-2 pagge tables than levels of host kernel page tables. At the same time, systems with a PARange > 42bit, we limit the IPA range by always setting VTCR_EL2.T0SZ to 24. To solve the situation with different levels of page tables for Stage-2 translation than the host kernel page tables, we allocate a dummy PGD with pointers to our actual inital level Stage-2 page table, in order for us to reuse the kernel pgtable manipulation primitives. Reproducing all these in KVM does not look pretty and unnecessarily complicates the 32-bit side. Systems with a PARange < 40bits are not yet supported. [ I have reworked this patch from its original form submitted by Jungseok to take the architecture constraints into consideration. There were too many changes from the original patch for me to preserve the authorship. Thanks to Catalin Marinas for his help in figuring out a good solution to this challenge. I have also fixed various bugs and missing error code handling from the original patch. - Christoffer ] Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> Acked-by: Marc Zyngier <marc.zyngier@arm.com> Signed-off-by: Jungseok Lee <jungseoklee85@gmail.com> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
2014-10-10 10:14:28 +00:00
#include <asm/pgalloc.h>
#include <asm/cache.h>
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>
void kvm_update_va_mask(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr, int nr_inst);
void kvm_compute_layout(void);
void kvm_apply_hyp_relocations(void);
static __always_inline unsigned long __kern_hyp_va(unsigned long v)
{
asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n"
"ror %0, %0, #1\n"
"add %0, %0, #0\n"
"add %0, %0, #0, lsl 12\n"
"ror %0, %0, #63\n",
kvm_update_va_mask)
: "+r" (v));
return v;
}
#define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v))))
/*
* We currently support using a VM-specified IPA size. For backward
* compatibility, the default IPA size is fixed to 40bits.
*/
#define KVM_PHYS_SHIFT (40)
#define kvm_phys_shift(kvm) VTCR_EL2_IPA(kvm->arch.vtcr)
#define kvm_phys_size(kvm) (_AC(1, ULL) << kvm_phys_shift(kvm))
#define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - _AC(1, ULL))
#include <asm/kvm_pgtable.h>
#include <asm/stage2_pgtable.h>
int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
void __iomem **kaddr,
void __iomem **haddr);
int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
void **haddr);
void free_hyp_pgds(void);
void stage2_unmap_vm(struct kvm *kvm);
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu);
void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu);
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
phys_addr_t pa, unsigned long size, bool writable);
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu);
phys_addr_t kvm_mmu_get_httbr(void);
phys_addr_t kvm_get_idmap_vector(void);
int kvm_mmu_init(u32 *hyp_va_bits);
arm64: add support for folded p4d page tables Implement primitives necessary for the 4th level folding, add walks of p4d level where appropriate, replace 5level-fixup.h with pgtable-nop4d.h and remove __ARCH_USE_5LEVEL_HACK. [arnd@arndb.de: fix gcc-10 shift warning] Link: http://lkml.kernel.org/r/20200429185657.4085975-1-arnd@arndb.de Signed-off-by: Mike Rapoport <rppt@linux.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Brian Cain <bcain@codeaurora.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christophe Leroy <christophe.leroy@c-s.fr> Cc: Fenghua Yu <fenghua.yu@intel.com> Cc: Geert Uytterhoeven <geert+renesas@glider.be> Cc: Guan Xuetao <gxt@pku.edu.cn> Cc: James Morse <james.morse@arm.com> Cc: Jonas Bonn <jonas@southpole.se> Cc: Julien Thierry <julien.thierry.kdev@gmail.com> Cc: Ley Foon Tan <ley.foon.tan@intel.com> Cc: Marc Zyngier <maz@kernel.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Paul Mackerras <paulus@samba.org> Cc: Rich Felker <dalias@libc.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Stafford Horne <shorne@gmail.com> Cc: Stefan Kristiansson <stefan.kristiansson@saunalahti.fi> Cc: Suzuki K Poulose <suzuki.poulose@arm.com> Cc: Tony Luck <tony.luck@intel.com> Cc: Will Deacon <will@kernel.org> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Link: http://lkml.kernel.org/r/20200414153455.21744-4-rppt@kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-06-04 23:46:23 +00:00
static inline void *__kvm_vector_slot2addr(void *base,
enum arm64_hyp_spectre_vector slot)
{
int idx = slot - (slot != HYP_VECTOR_DIRECT);
return base + (idx * SZ_2K);
}
struct kvm;
#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l))
static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
{
return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
}
static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
{
void *va = page_address(pfn_to_page(pfn));
/*
* With FWB, we ensure that the guest always accesses memory using
* cacheable attributes, and we don't have to clean to PoC when
* faulting in pages. Furthermore, FWB implies IDC, so cleaning to
* PoU is not required either in this case.
*/
if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
return;
kvm_flush_dcache_to_poc(va, size);
}
static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
unsigned long size)
{
if (icache_is_aliasing()) {
/* any kind of VIPT cache */
__flush_icache_all();
} else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
/* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */
void *va = page_address(pfn_to_page(pfn));
invalidate_icache_range((unsigned long)va,
(unsigned long)va + size);
}
}
void kvm_set_way_flush(struct kvm_vcpu *vcpu);
void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
static inline unsigned int kvm_get_vmid_bits(void)
{
int reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
return get_vmid_bits(reg);
}
/*
* We are not in the kvm->srcu critical section most of the time, so we take
* the SRCU read lock here. Since we copy the data from the user page, we
* can immediately drop the lock again.
*/
static inline int kvm_read_guest_lock(struct kvm *kvm,
gpa_t gpa, void *data, unsigned long len)
{
int srcu_idx = srcu_read_lock(&kvm->srcu);
int ret = kvm_read_guest(kvm, gpa, data, len);
srcu_read_unlock(&kvm->srcu, srcu_idx);
return ret;
}
KVM: arm/arm64: vgic-its: Take the srcu lock when writing to guest memory When halting a guest, QEMU flushes the virtual ITS caches, which amounts to writing to the various tables that the guest has allocated. When doing this, we fail to take the srcu lock, and the kernel shouts loudly if running a lockdep kernel: [ 69.680416] ============================= [ 69.680819] WARNING: suspicious RCU usage [ 69.681526] 5.1.0-rc1-00008-g600025238f51-dirty #18 Not tainted [ 69.682096] ----------------------------- [ 69.682501] ./include/linux/kvm_host.h:605 suspicious rcu_dereference_check() usage! [ 69.683225] [ 69.683225] other info that might help us debug this: [ 69.683225] [ 69.683975] [ 69.683975] rcu_scheduler_active = 2, debug_locks = 1 [ 69.684598] 6 locks held by qemu-system-aar/4097: [ 69.685059] #0: 0000000034196013 (&kvm->lock){+.+.}, at: vgic_its_set_attr+0x244/0x3a0 [ 69.686087] #1: 00000000f2ed935e (&its->its_lock){+.+.}, at: vgic_its_set_attr+0x250/0x3a0 [ 69.686919] #2: 000000005e71ea54 (&vcpu->mutex){+.+.}, at: lock_all_vcpus+0x64/0xd0 [ 69.687698] #3: 00000000c17e548d (&vcpu->mutex){+.+.}, at: lock_all_vcpus+0x64/0xd0 [ 69.688475] #4: 00000000ba386017 (&vcpu->mutex){+.+.}, at: lock_all_vcpus+0x64/0xd0 [ 69.689978] #5: 00000000c2c3c335 (&vcpu->mutex){+.+.}, at: lock_all_vcpus+0x64/0xd0 [ 69.690729] [ 69.690729] stack backtrace: [ 69.691151] CPU: 2 PID: 4097 Comm: qemu-system-aar Not tainted 5.1.0-rc1-00008-g600025238f51-dirty #18 [ 69.691984] Hardware name: rockchip evb_rk3399/evb_rk3399, BIOS 2019.04-rc3-00124-g2feec69fb1 03/15/2019 [ 69.692831] Call trace: [ 69.694072] lockdep_rcu_suspicious+0xcc/0x110 [ 69.694490] gfn_to_memslot+0x174/0x190 [ 69.694853] kvm_write_guest+0x50/0xb0 [ 69.695209] vgic_its_save_tables_v0+0x248/0x330 [ 69.695639] vgic_its_set_attr+0x298/0x3a0 [ 69.696024] kvm_device_ioctl_attr+0x9c/0xd8 [ 69.696424] kvm_device_ioctl+0x8c/0xf8 [ 69.696788] do_vfs_ioctl+0xc8/0x960 [ 69.697128] ksys_ioctl+0x8c/0xa0 [ 69.697445] __arm64_sys_ioctl+0x28/0x38 [ 69.697817] el0_svc_common+0xd8/0x138 [ 69.698173] el0_svc_handler+0x38/0x78 [ 69.698528] el0_svc+0x8/0xc The fix is to obviously take the srcu lock, just like we do on the read side of things since bf308242ab98. One wonders why this wasn't fixed at the same time, but hey... Fixes: bf308242ab98 ("KVM: arm/arm64: VGIC/ITS: protect kvm_read_guest() calls with SRCU lock") Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
2019-03-19 12:47:11 +00:00
static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa,
const void *data, unsigned long len)
{
int srcu_idx = srcu_read_lock(&kvm->srcu);
int ret = kvm_write_guest(kvm, gpa, data, len);
srcu_read_unlock(&kvm->srcu, srcu_idx);
return ret;
}
#define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr)
static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
{
struct kvm_vmid *vmid = &mmu->vmid;
u64 vmid_field, baddr;
u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0;
baddr = mmu->pgd_phys;
vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT;
return kvm_phys_to_vttbr(baddr) | vmid_field | cnp;
}
/*
* Must be called from hyp code running at EL2 with an updated VTTBR
* and interrupts disabled.
*/
static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, unsigned long vtcr)
{
write_sysreg(vtcr, vtcr_el2);
write_sysreg(kvm_get_vttbr(mmu), vttbr_el2);
/*
* ARM errata 1165522 and 1530923 require the actual execution of the
* above before we can switch to the EL1/EL0 translation regime used by
* the guest.
*/
asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
}
static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu)
{
__load_stage2(mmu, kern_hyp_va(mmu->arch)->vtcr);
}
static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu)
{
return container_of(mmu->arch, struct kvm, arch);
}
#endif /* __ASSEMBLY__ */
#endif /* __ARM64_KVM_MMU_H__ */