mirror of
https://github.com/torvalds/linux.git
synced 2024-12-02 17:11:33 +00:00
Merge branch kvm-arm64/lpa2 into kvmarm-master/next
* kvm-arm64/lpa2: : . : Support FEAT_LPA2 at EL2 S1 and S2, courtesy of Ryan Roberts : : From the cover letter: : : "This adds support for FEAT_LPA2 to KVM for both hypervisor stage 1 (for the : nvhe/protected modes) and the vm stage 2 translation tables (for all modes). : FEAT_LPA2 enables 52 bit PAs and VAs for 4KB and 16KB granules (note this is : already supported for 64KB granules via the FEAT_LPA and FEAT_LVA extensions)." : . KVM: arm64: Use helpers to classify exception types reported via ESR KVM: selftests: arm64: Support P52V48 4K and 16K guest_modes KVM: selftests: arm64: Determine max ipa size per-page size KVM: arm64: Allow guests with >48-bit IPA size on FEAT_LPA2 systems KVM: arm64: Support up to 5 levels of translation in kvm_pgtable KVM: arm64: Convert translation level parameter to s8 KVM: arm64: Use LPA2 page-tables for stage2 and hyp stage1 KVM: arm64: Add new (V)TCR_EL2 field definitions for FEAT_LPA2 arm64: Add ARM64_HAS_LPA2 CPU capability arm64/mm: Add FEAT_LPA2 specific ID_AA64MMFR0.TGRAN[2] arm64/mm: Update tlb invalidation routines for FEAT_LPA2 arm64/mm: Add lpa2_is_enabled() kvm_lpa2_is_enabled() stubs arm64/mm: Modify range-based tlbi to decrement scale Signed-off-by: Marc Zyngier <maz@kernel.org>
This commit is contained in:
commit
189f2c8e0c
@ -819,6 +819,11 @@ static inline bool system_supports_tlb_range(void)
|
||||
return alternative_has_cap_unlikely(ARM64_HAS_TLB_RANGE);
|
||||
}
|
||||
|
||||
static inline bool system_supports_lpa2(void)
|
||||
{
|
||||
return cpus_have_final_cap(ARM64_HAS_LPA2);
|
||||
}
|
||||
|
||||
int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
|
||||
bool try_emulate_mrs(struct pt_regs *regs, u32 isn);
|
||||
|
||||
|
@ -392,6 +392,21 @@ static inline bool esr_is_data_abort(unsigned long esr)
|
||||
return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR;
|
||||
}
|
||||
|
||||
static inline bool esr_fsc_is_translation_fault(unsigned long esr)
|
||||
{
|
||||
return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_FAULT;
|
||||
}
|
||||
|
||||
static inline bool esr_fsc_is_permission_fault(unsigned long esr)
|
||||
{
|
||||
return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM;
|
||||
}
|
||||
|
||||
static inline bool esr_fsc_is_access_flag_fault(unsigned long esr)
|
||||
{
|
||||
return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS;
|
||||
}
|
||||
|
||||
const char *esr_get_class_string(unsigned long esr);
|
||||
#endif /* __ASSEMBLY */
|
||||
|
||||
|
@ -108,6 +108,7 @@
|
||||
#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En)
|
||||
|
||||
/* TCR_EL2 Registers bits */
|
||||
#define TCR_EL2_DS (1UL << 32)
|
||||
#define TCR_EL2_RES1 ((1U << 31) | (1 << 23))
|
||||
#define TCR_EL2_TBI (1 << 20)
|
||||
#define TCR_EL2_PS_SHIFT 16
|
||||
@ -122,6 +123,7 @@
|
||||
TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK)
|
||||
|
||||
/* VTCR_EL2 Registers bits */
|
||||
#define VTCR_EL2_DS TCR_EL2_DS
|
||||
#define VTCR_EL2_RES1 (1U << 31)
|
||||
#define VTCR_EL2_HD (1 << 22)
|
||||
#define VTCR_EL2_HA (1 << 21)
|
||||
|
@ -404,14 +404,25 @@ static __always_inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
|
||||
return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC;
|
||||
}
|
||||
|
||||
static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
|
||||
static inline
|
||||
bool kvm_vcpu_trap_is_permission_fault(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_TYPE;
|
||||
return esr_fsc_is_permission_fault(kvm_vcpu_get_esr(vcpu));
|
||||
}
|
||||
|
||||
static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *vcpu)
|
||||
static inline
|
||||
bool kvm_vcpu_trap_is_translation_fault(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_LEVEL;
|
||||
return esr_fsc_is_translation_fault(kvm_vcpu_get_esr(vcpu));
|
||||
}
|
||||
|
||||
static inline
|
||||
u64 kvm_vcpu_trap_get_perm_fault_granule(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long esr = kvm_vcpu_get_esr(vcpu);
|
||||
|
||||
BUG_ON(!esr_fsc_is_permission_fault(esr));
|
||||
return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(esr & ESR_ELx_FSC_LEVEL));
|
||||
}
|
||||
|
||||
static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
|
||||
@ -454,12 +465,7 @@ static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
|
||||
* first), then a permission fault to allow the flags
|
||||
* to be set.
|
||||
*/
|
||||
switch (kvm_vcpu_trap_get_fault_type(vcpu)) {
|
||||
case ESR_ELx_FSC_PERM:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
return kvm_vcpu_trap_is_permission_fault(vcpu);
|
||||
}
|
||||
|
||||
if (kvm_vcpu_trap_is_iabt(vcpu))
|
||||
|
@ -11,7 +11,8 @@
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#define KVM_PGTABLE_MAX_LEVELS 4U
|
||||
#define KVM_PGTABLE_FIRST_LEVEL -1
|
||||
#define KVM_PGTABLE_LAST_LEVEL 3
|
||||
|
||||
/*
|
||||
* The largest supported block sizes for KVM (no 52-bit PA support):
|
||||
@ -20,17 +21,29 @@
|
||||
* - 64K (level 2): 512MB
|
||||
*/
|
||||
#ifdef CONFIG_ARM64_4K_PAGES
|
||||
#define KVM_PGTABLE_MIN_BLOCK_LEVEL 1U
|
||||
#define KVM_PGTABLE_MIN_BLOCK_LEVEL 1
|
||||
#else
|
||||
#define KVM_PGTABLE_MIN_BLOCK_LEVEL 2U
|
||||
#define KVM_PGTABLE_MIN_BLOCK_LEVEL 2
|
||||
#endif
|
||||
|
||||
#define kvm_lpa2_is_enabled() system_supports_lpa2()
|
||||
|
||||
static inline u64 kvm_get_parange_max(void)
|
||||
{
|
||||
if (kvm_lpa2_is_enabled() ||
|
||||
(IS_ENABLED(CONFIG_ARM64_PA_BITS_52) && PAGE_SHIFT == 16))
|
||||
return ID_AA64MMFR0_EL1_PARANGE_52;
|
||||
else
|
||||
return ID_AA64MMFR0_EL1_PARANGE_48;
|
||||
}
|
||||
|
||||
static inline u64 kvm_get_parange(u64 mmfr0)
|
||||
{
|
||||
u64 parange_max = kvm_get_parange_max();
|
||||
u64 parange = cpuid_feature_extract_unsigned_field(mmfr0,
|
||||
ID_AA64MMFR0_EL1_PARANGE_SHIFT);
|
||||
if (parange > ID_AA64MMFR0_EL1_PARANGE_MAX)
|
||||
parange = ID_AA64MMFR0_EL1_PARANGE_MAX;
|
||||
if (parange > parange_max)
|
||||
parange = parange_max;
|
||||
|
||||
return parange;
|
||||
}
|
||||
@ -41,6 +54,8 @@ typedef u64 kvm_pte_t;
|
||||
|
||||
#define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT)
|
||||
#define KVM_PTE_ADDR_51_48 GENMASK(15, 12)
|
||||
#define KVM_PTE_ADDR_MASK_LPA2 GENMASK(49, PAGE_SHIFT)
|
||||
#define KVM_PTE_ADDR_51_50_LPA2 GENMASK(9, 8)
|
||||
|
||||
#define KVM_PHYS_INVALID (-1ULL)
|
||||
|
||||
@ -51,21 +66,34 @@ static inline bool kvm_pte_valid(kvm_pte_t pte)
|
||||
|
||||
static inline u64 kvm_pte_to_phys(kvm_pte_t pte)
|
||||
{
|
||||
u64 pa = pte & KVM_PTE_ADDR_MASK;
|
||||
u64 pa;
|
||||
|
||||
if (PAGE_SHIFT == 16)
|
||||
pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
|
||||
if (kvm_lpa2_is_enabled()) {
|
||||
pa = pte & KVM_PTE_ADDR_MASK_LPA2;
|
||||
pa |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, pte) << 50;
|
||||
} else {
|
||||
pa = pte & KVM_PTE_ADDR_MASK;
|
||||
if (PAGE_SHIFT == 16)
|
||||
pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
|
||||
}
|
||||
|
||||
return pa;
|
||||
}
|
||||
|
||||
static inline kvm_pte_t kvm_phys_to_pte(u64 pa)
|
||||
{
|
||||
kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK;
|
||||
kvm_pte_t pte;
|
||||
|
||||
if (PAGE_SHIFT == 16) {
|
||||
pa &= GENMASK(51, 48);
|
||||
pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48);
|
||||
if (kvm_lpa2_is_enabled()) {
|
||||
pte = pa & KVM_PTE_ADDR_MASK_LPA2;
|
||||
pa &= GENMASK(51, 50);
|
||||
pte |= FIELD_PREP(KVM_PTE_ADDR_51_50_LPA2, pa >> 50);
|
||||
} else {
|
||||
pte = pa & KVM_PTE_ADDR_MASK;
|
||||
if (PAGE_SHIFT == 16) {
|
||||
pa &= GENMASK(51, 48);
|
||||
pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48);
|
||||
}
|
||||
}
|
||||
|
||||
return pte;
|
||||
@ -76,28 +104,28 @@ static inline kvm_pfn_t kvm_pte_to_pfn(kvm_pte_t pte)
|
||||
return __phys_to_pfn(kvm_pte_to_phys(pte));
|
||||
}
|
||||
|
||||
static inline u64 kvm_granule_shift(u32 level)
|
||||
static inline u64 kvm_granule_shift(s8 level)
|
||||
{
|
||||
/* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */
|
||||
/* Assumes KVM_PGTABLE_LAST_LEVEL is 3 */
|
||||
return ARM64_HW_PGTABLE_LEVEL_SHIFT(level);
|
||||
}
|
||||
|
||||
static inline u64 kvm_granule_size(u32 level)
|
||||
static inline u64 kvm_granule_size(s8 level)
|
||||
{
|
||||
return BIT(kvm_granule_shift(level));
|
||||
}
|
||||
|
||||
static inline bool kvm_level_supports_block_mapping(u32 level)
|
||||
static inline bool kvm_level_supports_block_mapping(s8 level)
|
||||
{
|
||||
return level >= KVM_PGTABLE_MIN_BLOCK_LEVEL;
|
||||
}
|
||||
|
||||
static inline u32 kvm_supported_block_sizes(void)
|
||||
{
|
||||
u32 level = KVM_PGTABLE_MIN_BLOCK_LEVEL;
|
||||
s8 level = KVM_PGTABLE_MIN_BLOCK_LEVEL;
|
||||
u32 r = 0;
|
||||
|
||||
for (; level < KVM_PGTABLE_MAX_LEVELS; level++)
|
||||
for (; level <= KVM_PGTABLE_LAST_LEVEL; level++)
|
||||
r |= BIT(kvm_granule_shift(level));
|
||||
|
||||
return r;
|
||||
@ -142,7 +170,7 @@ struct kvm_pgtable_mm_ops {
|
||||
void* (*zalloc_page)(void *arg);
|
||||
void* (*zalloc_pages_exact)(size_t size);
|
||||
void (*free_pages_exact)(void *addr, size_t size);
|
||||
void (*free_unlinked_table)(void *addr, u32 level);
|
||||
void (*free_unlinked_table)(void *addr, s8 level);
|
||||
void (*get_page)(void *addr);
|
||||
void (*put_page)(void *addr);
|
||||
int (*page_count)(void *addr);
|
||||
@ -238,7 +266,7 @@ struct kvm_pgtable_visit_ctx {
|
||||
u64 start;
|
||||
u64 addr;
|
||||
u64 end;
|
||||
u32 level;
|
||||
s8 level;
|
||||
enum kvm_pgtable_walk_flags flags;
|
||||
};
|
||||
|
||||
@ -341,7 +369,7 @@ static inline bool kvm_pgtable_walk_lock_held(void)
|
||||
*/
|
||||
struct kvm_pgtable {
|
||||
u32 ia_bits;
|
||||
u32 start_level;
|
||||
s8 start_level;
|
||||
kvm_pteref_t pgd;
|
||||
struct kvm_pgtable_mm_ops *mm_ops;
|
||||
|
||||
@ -475,7 +503,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
|
||||
* The page-table is assumed to be unreachable by any hardware walkers prior to
|
||||
* freeing and therefore no TLB invalidation is performed.
|
||||
*/
|
||||
void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level);
|
||||
void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_create_unlinked() - Create an unlinked stage-2 paging structure.
|
||||
@ -499,7 +527,7 @@ void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *p
|
||||
* an ERR_PTR(error) on failure.
|
||||
*/
|
||||
kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
|
||||
u64 phys, u32 level,
|
||||
u64 phys, s8 level,
|
||||
enum kvm_pgtable_prot prot,
|
||||
void *mc, bool force_pte);
|
||||
|
||||
@ -725,7 +753,7 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
* Return: 0 on success, negative error code on failure.
|
||||
*/
|
||||
int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
|
||||
kvm_pte_t *ptep, u32 *level);
|
||||
kvm_pte_t *ptep, s8 *level);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_pte_prot() - Retrieve the protection attributes of a
|
||||
|
@ -56,10 +56,11 @@ static inline unsigned long hyp_vm_table_pages(void)
|
||||
|
||||
static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
|
||||
{
|
||||
unsigned long total = 0, i;
|
||||
unsigned long total = 0;
|
||||
int i;
|
||||
|
||||
/* Provision the worst case scenario */
|
||||
for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) {
|
||||
for (i = KVM_PGTABLE_FIRST_LEVEL; i <= KVM_PGTABLE_LAST_LEVEL; i++) {
|
||||
nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE);
|
||||
total += nr_pages;
|
||||
}
|
||||
|
@ -71,6 +71,8 @@ extern bool arm64_use_ng_mappings;
|
||||
#define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0)
|
||||
#define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0)
|
||||
|
||||
#define lpa2_is_enabled() false
|
||||
|
||||
/*
|
||||
* If we have userspace only BTI we don't want to mark kernel pages
|
||||
* guarded even if the system does support BTI.
|
||||
|
@ -871,10 +871,12 @@
|
||||
|
||||
/* id_aa64mmfr0 */
|
||||
#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN 0x0
|
||||
#define ID_AA64MMFR0_EL1_TGRAN4_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT
|
||||
#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX 0x7
|
||||
#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN 0x0
|
||||
#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX 0x7
|
||||
#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN 0x1
|
||||
#define ID_AA64MMFR0_EL1_TGRAN16_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT
|
||||
#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX 0xf
|
||||
|
||||
#define ARM64_MIN_PARANGE_BITS 32
|
||||
@ -882,6 +884,7 @@
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_DEFAULT 0x0
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_NONE 0x1
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MIN 0x2
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2 0x3
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MAX 0x7
|
||||
|
||||
#ifdef CONFIG_ARM64_PA_BITS_52
|
||||
@ -892,11 +895,13 @@
|
||||
|
||||
#if defined(CONFIG_ARM64_4K_PAGES)
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN4_SHIFT
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT
|
||||
#elif defined(CONFIG_ARM64_16K_PAGES)
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN16_SHIFT
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX
|
||||
#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT
|
||||
|
@ -22,15 +22,15 @@ static void tlb_flush(struct mmu_gather *tlb);
|
||||
#include <asm-generic/tlb.h>
|
||||
|
||||
/*
|
||||
* get the tlbi levels in arm64. Default value is 0 if more than one
|
||||
* of cleared_* is set or neither is set.
|
||||
* Arm64 doesn't support p4ds now.
|
||||
* get the tlbi levels in arm64. Default value is TLBI_TTL_UNKNOWN if more than
|
||||
* one of cleared_* is set or neither is set - this elides the level hinting to
|
||||
* the hardware.
|
||||
*/
|
||||
static inline int tlb_get_level(struct mmu_gather *tlb)
|
||||
{
|
||||
/* The TTL field is only valid for the leaf entry. */
|
||||
if (tlb->freed_tables)
|
||||
return 0;
|
||||
return TLBI_TTL_UNKNOWN;
|
||||
|
||||
if (tlb->cleared_ptes && !(tlb->cleared_pmds ||
|
||||
tlb->cleared_puds ||
|
||||
@ -47,7 +47,12 @@ static inline int tlb_get_level(struct mmu_gather *tlb)
|
||||
tlb->cleared_p4ds))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
if (tlb->cleared_p4ds && !(tlb->cleared_ptes ||
|
||||
tlb->cleared_pmds ||
|
||||
tlb->cleared_puds))
|
||||
return 0;
|
||||
|
||||
return TLBI_TTL_UNKNOWN;
|
||||
}
|
||||
|
||||
static inline void tlb_flush(struct mmu_gather *tlb)
|
||||
|
@ -94,19 +94,22 @@ static inline unsigned long get_trans_granule(void)
|
||||
* When ARMv8.4-TTL exists, TLBI operations take an additional hint for
|
||||
* the level at which the invalidation must take place. If the level is
|
||||
* wrong, no invalidation may take place. In the case where the level
|
||||
* cannot be easily determined, a 0 value for the level parameter will
|
||||
* perform a non-hinted invalidation.
|
||||
* cannot be easily determined, the value TLBI_TTL_UNKNOWN will perform
|
||||
* a non-hinted invalidation. Any provided level outside the hint range
|
||||
* will also cause fall-back to non-hinted invalidation.
|
||||
*
|
||||
* For Stage-2 invalidation, use the level values provided to that effect
|
||||
* in asm/stage2_pgtable.h.
|
||||
*/
|
||||
#define TLBI_TTL_MASK GENMASK_ULL(47, 44)
|
||||
|
||||
#define TLBI_TTL_UNKNOWN INT_MAX
|
||||
|
||||
#define __tlbi_level(op, addr, level) do { \
|
||||
u64 arg = addr; \
|
||||
\
|
||||
if (alternative_has_cap_unlikely(ARM64_HAS_ARMv8_4_TTL) && \
|
||||
level) { \
|
||||
level >= 0 && level <= 3) { \
|
||||
u64 ttl = level & 3; \
|
||||
ttl |= get_trans_granule() << 2; \
|
||||
arg &= ~TLBI_TTL_MASK; \
|
||||
@ -122,28 +125,34 @@ static inline unsigned long get_trans_granule(void)
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* This macro creates a properly formatted VA operand for the TLB RANGE.
|
||||
* The value bit assignments are:
|
||||
* This macro creates a properly formatted VA operand for the TLB RANGE. The
|
||||
* value bit assignments are:
|
||||
*
|
||||
* +----------+------+-------+-------+-------+----------------------+
|
||||
* | ASID | TG | SCALE | NUM | TTL | BADDR |
|
||||
* +-----------------+-------+-------+-------+----------------------+
|
||||
* |63 48|47 46|45 44|43 39|38 37|36 0|
|
||||
*
|
||||
* The address range is determined by below formula:
|
||||
* [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE)
|
||||
* The address range is determined by below formula: [BADDR, BADDR + (NUM + 1) *
|
||||
* 2^(5*SCALE + 1) * PAGESIZE)
|
||||
*
|
||||
* Note that the first argument, baddr, is pre-shifted; If LPA2 is in use, BADDR
|
||||
* holds addr[52:16]. Else BADDR holds page number. See for example ARM DDI
|
||||
* 0487J.a section C5.5.60 "TLBI VAE1IS, TLBI VAE1ISNXS, TLB Invalidate by VA,
|
||||
* EL1, Inner Shareable".
|
||||
*
|
||||
*/
|
||||
#define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl) \
|
||||
({ \
|
||||
unsigned long __ta = (addr) >> PAGE_SHIFT; \
|
||||
__ta &= GENMASK_ULL(36, 0); \
|
||||
__ta |= (unsigned long)(ttl) << 37; \
|
||||
__ta |= (unsigned long)(num) << 39; \
|
||||
__ta |= (unsigned long)(scale) << 44; \
|
||||
__ta |= get_trans_granule() << 46; \
|
||||
__ta |= (unsigned long)(asid) << 48; \
|
||||
__ta; \
|
||||
#define __TLBI_VADDR_RANGE(baddr, asid, scale, num, ttl) \
|
||||
({ \
|
||||
unsigned long __ta = (baddr); \
|
||||
unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0; \
|
||||
__ta &= GENMASK_ULL(36, 0); \
|
||||
__ta |= __ttl << 37; \
|
||||
__ta |= (unsigned long)(num) << 39; \
|
||||
__ta |= (unsigned long)(scale) << 44; \
|
||||
__ta |= get_trans_granule() << 46; \
|
||||
__ta |= (unsigned long)(asid) << 48; \
|
||||
__ta; \
|
||||
})
|
||||
|
||||
/* These macros are used by the TLBI RANGE feature. */
|
||||
@ -216,12 +225,16 @@ static inline unsigned long get_trans_granule(void)
|
||||
* CPUs, ensuring that any walk-cache entries associated with the
|
||||
* translation are also invalidated.
|
||||
*
|
||||
* __flush_tlb_range(vma, start, end, stride, last_level)
|
||||
* __flush_tlb_range(vma, start, end, stride, last_level, tlb_level)
|
||||
* Invalidate the virtual-address range '[start, end)' on all
|
||||
* CPUs for the user address space corresponding to 'vma->mm'.
|
||||
* The invalidation operations are issued at a granularity
|
||||
* determined by 'stride' and only affect any walk-cache entries
|
||||
* if 'last_level' is equal to false.
|
||||
* if 'last_level' is equal to false. tlb_level is the level at
|
||||
* which the invalidation must take place. If the level is wrong,
|
||||
* no invalidation may take place. In the case where the level
|
||||
* cannot be easily determined, the value TLBI_TTL_UNKNOWN will
|
||||
* perform a non-hinted invalidation.
|
||||
*
|
||||
*
|
||||
* Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented
|
||||
@ -345,34 +358,44 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
|
||||
* @tlb_level: Translation Table level hint, if known
|
||||
* @tlbi_user: If 'true', call an additional __tlbi_user()
|
||||
* (typically for user ASIDs). 'flase' for IPA instructions
|
||||
* @lpa2: If 'true', the lpa2 scheme is used as set out below
|
||||
*
|
||||
* When the CPU does not support TLB range operations, flush the TLB
|
||||
* entries one by one at the granularity of 'stride'. If the TLB
|
||||
* range ops are supported, then:
|
||||
*
|
||||
* 1. If 'pages' is odd, flush the first page through non-range
|
||||
* operations;
|
||||
* 1. If FEAT_LPA2 is in use, the start address of a range operation must be
|
||||
* 64KB aligned, so flush pages one by one until the alignment is reached
|
||||
* using the non-range operations. This step is skipped if LPA2 is not in
|
||||
* use.
|
||||
*
|
||||
* 2. For remaining pages: the minimum range granularity is decided
|
||||
* by 'scale', so multiple range TLBI operations may be required.
|
||||
* Start from scale = 0, flush the corresponding number of pages
|
||||
* ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
|
||||
* until no pages left.
|
||||
* 2. The minimum range granularity is decided by 'scale', so multiple range
|
||||
* TLBI operations may be required. Start from scale = 3, flush the largest
|
||||
* possible number of pages ((num+1)*2^(5*scale+1)) that fit into the
|
||||
* requested range, then decrement scale and continue until one or zero pages
|
||||
* are left. We must start from highest scale to ensure 64KB start alignment
|
||||
* is maintained in the LPA2 case.
|
||||
*
|
||||
* 3. If there is 1 page remaining, flush it through non-range operations. Range
|
||||
* operations can only span an even number of pages. We save this for last to
|
||||
* ensure 64KB start alignment is maintained for the LPA2 case.
|
||||
*
|
||||
* Note that certain ranges can be represented by either num = 31 and
|
||||
* scale or num = 0 and scale + 1. The loop below favours the latter
|
||||
* since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
|
||||
*/
|
||||
#define __flush_tlb_range_op(op, start, pages, stride, \
|
||||
asid, tlb_level, tlbi_user) \
|
||||
asid, tlb_level, tlbi_user, lpa2) \
|
||||
do { \
|
||||
int num = 0; \
|
||||
int scale = 0; \
|
||||
int scale = 3; \
|
||||
int shift = lpa2 ? 16 : PAGE_SHIFT; \
|
||||
unsigned long addr; \
|
||||
\
|
||||
while (pages > 0) { \
|
||||
if (!system_supports_tlb_range() || \
|
||||
pages % 2 == 1) { \
|
||||
pages == 1 || \
|
||||
(lpa2 && start != ALIGN(start, SZ_64K))) { \
|
||||
addr = __TLBI_VADDR(start, asid); \
|
||||
__tlbi_level(op, addr, tlb_level); \
|
||||
if (tlbi_user) \
|
||||
@ -384,20 +407,20 @@ do { \
|
||||
\
|
||||
num = __TLBI_RANGE_NUM(pages, scale); \
|
||||
if (num >= 0) { \
|
||||
addr = __TLBI_VADDR_RANGE(start, asid, scale, \
|
||||
num, tlb_level); \
|
||||
addr = __TLBI_VADDR_RANGE(start >> shift, asid, \
|
||||
scale, num, tlb_level); \
|
||||
__tlbi(r##op, addr); \
|
||||
if (tlbi_user) \
|
||||
__tlbi_user(r##op, addr); \
|
||||
start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
|
||||
pages -= __TLBI_RANGE_PAGES(num, scale); \
|
||||
} \
|
||||
scale++; \
|
||||
scale--; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
|
||||
__flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false)
|
||||
__flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false, kvm_lpa2_is_enabled());
|
||||
|
||||
static inline void __flush_tlb_range(struct vm_area_struct *vma,
|
||||
unsigned long start, unsigned long end,
|
||||
@ -427,9 +450,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
|
||||
asid = ASID(vma->vm_mm);
|
||||
|
||||
if (last_level)
|
||||
__flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true);
|
||||
__flush_tlb_range_op(vale1is, start, pages, stride, asid,
|
||||
tlb_level, true, lpa2_is_enabled());
|
||||
else
|
||||
__flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true);
|
||||
__flush_tlb_range_op(vae1is, start, pages, stride, asid,
|
||||
tlb_level, true, lpa2_is_enabled());
|
||||
|
||||
dsb(ish);
|
||||
mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
|
||||
@ -441,9 +466,10 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
|
||||
/*
|
||||
* We cannot use leaf-only invalidation here, since we may be invalidating
|
||||
* table entries as part of collapsing hugepages or moving page tables.
|
||||
* Set the tlb_level to 0 because we can not get enough information here.
|
||||
* Set the tlb_level to TLBI_TTL_UNKNOWN because we can not get enough
|
||||
* information here.
|
||||
*/
|
||||
__flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0);
|
||||
__flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN);
|
||||
}
|
||||
|
||||
static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
|
||||
|
@ -1768,6 +1768,39 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
|
||||
return !meltdown_safe;
|
||||
}
|
||||
|
||||
#if defined(ID_AA64MMFR0_EL1_TGRAN_LPA2) && defined(ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2)
|
||||
static bool has_lpa2_at_stage1(u64 mmfr0)
|
||||
{
|
||||
unsigned int tgran;
|
||||
|
||||
tgran = cpuid_feature_extract_unsigned_field(mmfr0,
|
||||
ID_AA64MMFR0_EL1_TGRAN_SHIFT);
|
||||
return tgran == ID_AA64MMFR0_EL1_TGRAN_LPA2;
|
||||
}
|
||||
|
||||
static bool has_lpa2_at_stage2(u64 mmfr0)
|
||||
{
|
||||
unsigned int tgran;
|
||||
|
||||
tgran = cpuid_feature_extract_unsigned_field(mmfr0,
|
||||
ID_AA64MMFR0_EL1_TGRAN_2_SHIFT);
|
||||
return tgran == ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2;
|
||||
}
|
||||
|
||||
static bool has_lpa2(const struct arm64_cpu_capabilities *entry, int scope)
|
||||
{
|
||||
u64 mmfr0;
|
||||
|
||||
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
|
||||
return has_lpa2_at_stage1(mmfr0) && has_lpa2_at_stage2(mmfr0);
|
||||
}
|
||||
#else
|
||||
static bool has_lpa2(const struct arm64_cpu_capabilities *entry, int scope)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
|
||||
#define KPTI_NG_TEMP_VA (-(1UL << PMD_SHIFT))
|
||||
|
||||
@ -2731,6 +2764,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
|
||||
.matches = has_cpuid_feature,
|
||||
ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, EVT, IMP)
|
||||
},
|
||||
{
|
||||
.desc = "52-bit Virtual Addressing for KVM (LPA2)",
|
||||
.capability = ARM64_HAS_LPA2,
|
||||
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
|
||||
.matches = has_lpa2,
|
||||
},
|
||||
{},
|
||||
};
|
||||
|
||||
|
@ -1837,6 +1837,7 @@ static int kvm_init_vector_slots(void)
|
||||
static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits)
|
||||
{
|
||||
struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
|
||||
u64 mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
|
||||
unsigned long tcr;
|
||||
|
||||
/*
|
||||
@ -1859,6 +1860,10 @@ static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits)
|
||||
}
|
||||
tcr &= ~TCR_T0SZ_MASK;
|
||||
tcr |= TCR_T0SZ(hyp_va_bits);
|
||||
tcr &= ~TCR_EL2_PS_MASK;
|
||||
tcr |= FIELD_PREP(TCR_EL2_PS_MASK, kvm_get_parange(mmfr0));
|
||||
if (kvm_lpa2_is_enabled())
|
||||
tcr |= TCR_EL2_DS;
|
||||
params->tcr_el2 = tcr;
|
||||
|
||||
params->pgd_pa = kvm_mmu_get_httbr();
|
||||
|
@ -60,7 +60,7 @@ static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
|
||||
*/
|
||||
if (!(esr & ESR_ELx_S1PTW) &&
|
||||
(cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
|
||||
(esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM)) {
|
||||
esr_fsc_is_permission_fault(esr))) {
|
||||
if (!__translate_far_to_hpfar(far, &hpfar))
|
||||
return false;
|
||||
} else {
|
||||
|
@ -591,7 +591,7 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
|
||||
bool valid;
|
||||
|
||||
valid = kvm_vcpu_trap_get_fault_type(vcpu) == ESR_ELx_FSC_FAULT &&
|
||||
valid = kvm_vcpu_trap_is_translation_fault(vcpu) &&
|
||||
kvm_vcpu_dabt_isvalid(vcpu) &&
|
||||
!kvm_vcpu_abt_issea(vcpu) &&
|
||||
!kvm_vcpu_abt_iss1tw(vcpu);
|
||||
|
@ -122,11 +122,7 @@ alternative_if ARM64_HAS_CNP
|
||||
alternative_else_nop_endif
|
||||
msr ttbr0_el2, x2
|
||||
|
||||
/*
|
||||
* Set the PS bits in TCR_EL2.
|
||||
*/
|
||||
ldr x0, [x0, #NVHE_INIT_TCR_EL2]
|
||||
tcr_compute_pa_size x0, #TCR_EL2_PS_SHIFT, x1, x2
|
||||
msr tcr_el2, x0
|
||||
|
||||
isb
|
||||
|
@ -91,7 +91,7 @@ static void host_s2_put_page(void *addr)
|
||||
hyp_put_page(&host_s2_pool, addr);
|
||||
}
|
||||
|
||||
static void host_s2_free_unlinked_table(void *addr, u32 level)
|
||||
static void host_s2_free_unlinked_table(void *addr, s8 level)
|
||||
{
|
||||
kvm_pgtable_stage2_free_unlinked(&host_mmu.mm_ops, addr, level);
|
||||
}
|
||||
@ -443,7 +443,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
|
||||
{
|
||||
struct kvm_mem_range cur;
|
||||
kvm_pte_t pte;
|
||||
u32 level;
|
||||
s8 level;
|
||||
int ret;
|
||||
|
||||
hyp_assert_lock_held(&host_mmu.lock);
|
||||
@ -462,7 +462,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
|
||||
cur.start = ALIGN_DOWN(addr, granule);
|
||||
cur.end = cur.start + granule;
|
||||
level++;
|
||||
} while ((level < KVM_PGTABLE_MAX_LEVELS) &&
|
||||
} while ((level <= KVM_PGTABLE_LAST_LEVEL) &&
|
||||
!(kvm_level_supports_block_mapping(level) &&
|
||||
range_included(&cur, range)));
|
||||
|
||||
|
@ -260,7 +260,7 @@ static void fixmap_clear_slot(struct hyp_fixmap_slot *slot)
|
||||
* https://lore.kernel.org/kvm/20221017115209.2099-1-will@kernel.org/T/#mf10dfbaf1eaef9274c581b81c53758918c1d0f03
|
||||
*/
|
||||
dsb(ishst);
|
||||
__tlbi_level(vale2is, __TLBI_VADDR(addr, 0), (KVM_PGTABLE_MAX_LEVELS - 1));
|
||||
__tlbi_level(vale2is, __TLBI_VADDR(addr, 0), KVM_PGTABLE_LAST_LEVEL);
|
||||
dsb(ish);
|
||||
isb();
|
||||
}
|
||||
@ -275,7 +275,7 @@ static int __create_fixmap_slot_cb(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
{
|
||||
struct hyp_fixmap_slot *slot = per_cpu_ptr(&fixmap_slots, (u64)ctx->arg);
|
||||
|
||||
if (!kvm_pte_valid(ctx->old) || ctx->level != KVM_PGTABLE_MAX_LEVELS - 1)
|
||||
if (!kvm_pte_valid(ctx->old) || ctx->level != KVM_PGTABLE_LAST_LEVEL)
|
||||
return -EINVAL;
|
||||
|
||||
slot->addr = ctx->addr;
|
||||
|
@ -181,7 +181,7 @@ static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
if (!kvm_pte_valid(ctx->old))
|
||||
return 0;
|
||||
|
||||
if (ctx->level != (KVM_PGTABLE_MAX_LEVELS - 1))
|
||||
if (ctx->level != KVM_PGTABLE_LAST_LEVEL)
|
||||
return -EINVAL;
|
||||
|
||||
phys = kvm_pte_to_phys(ctx->old);
|
||||
|
@ -79,7 +79,10 @@ static bool kvm_pgtable_walk_skip_cmo(const struct kvm_pgtable_visit_ctx *ctx)
|
||||
|
||||
static bool kvm_phys_is_valid(u64 phys)
|
||||
{
|
||||
return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_EL1_PARANGE_MAX));
|
||||
u64 parange_max = kvm_get_parange_max();
|
||||
u8 shift = id_aa64mmfr0_parange_to_phys_shift(parange_max);
|
||||
|
||||
return phys < BIT(shift);
|
||||
}
|
||||
|
||||
static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx, u64 phys)
|
||||
@ -98,7 +101,7 @@ static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
return IS_ALIGNED(ctx->addr, granule);
|
||||
}
|
||||
|
||||
static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level)
|
||||
static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, s8 level)
|
||||
{
|
||||
u64 shift = kvm_granule_shift(level);
|
||||
u64 mask = BIT(PAGE_SHIFT - 3) - 1;
|
||||
@ -114,7 +117,7 @@ static u32 kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr)
|
||||
return (addr & mask) >> shift;
|
||||
}
|
||||
|
||||
static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
|
||||
static u32 kvm_pgd_pages(u32 ia_bits, s8 start_level)
|
||||
{
|
||||
struct kvm_pgtable pgt = {
|
||||
.ia_bits = ia_bits,
|
||||
@ -124,9 +127,9 @@ static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
|
||||
return kvm_pgd_page_idx(&pgt, -1ULL) + 1;
|
||||
}
|
||||
|
||||
static bool kvm_pte_table(kvm_pte_t pte, u32 level)
|
||||
static bool kvm_pte_table(kvm_pte_t pte, s8 level)
|
||||
{
|
||||
if (level == KVM_PGTABLE_MAX_LEVELS - 1)
|
||||
if (level == KVM_PGTABLE_LAST_LEVEL)
|
||||
return false;
|
||||
|
||||
if (!kvm_pte_valid(pte))
|
||||
@ -154,11 +157,11 @@ static kvm_pte_t kvm_init_table_pte(kvm_pte_t *childp, struct kvm_pgtable_mm_ops
|
||||
return pte;
|
||||
}
|
||||
|
||||
static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level)
|
||||
static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, s8 level)
|
||||
{
|
||||
kvm_pte_t pte = kvm_phys_to_pte(pa);
|
||||
u64 type = (level == KVM_PGTABLE_MAX_LEVELS - 1) ? KVM_PTE_TYPE_PAGE :
|
||||
KVM_PTE_TYPE_BLOCK;
|
||||
u64 type = (level == KVM_PGTABLE_LAST_LEVEL) ? KVM_PTE_TYPE_PAGE :
|
||||
KVM_PTE_TYPE_BLOCK;
|
||||
|
||||
pte |= attr & (KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI);
|
||||
pte |= FIELD_PREP(KVM_PTE_TYPE, type);
|
||||
@ -203,11 +206,11 @@ static bool kvm_pgtable_walk_continue(const struct kvm_pgtable_walker *walker,
|
||||
}
|
||||
|
||||
static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
|
||||
struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level);
|
||||
struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, s8 level);
|
||||
|
||||
static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
|
||||
struct kvm_pgtable_mm_ops *mm_ops,
|
||||
kvm_pteref_t pteref, u32 level)
|
||||
kvm_pteref_t pteref, s8 level)
|
||||
{
|
||||
enum kvm_pgtable_walk_flags flags = data->walker->flags;
|
||||
kvm_pte_t *ptep = kvm_dereference_pteref(data->walker, pteref);
|
||||
@ -272,12 +275,13 @@ out:
|
||||
}
|
||||
|
||||
static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
|
||||
struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level)
|
||||
struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, s8 level)
|
||||
{
|
||||
u32 idx;
|
||||
int ret = 0;
|
||||
|
||||
if (WARN_ON_ONCE(level >= KVM_PGTABLE_MAX_LEVELS))
|
||||
if (WARN_ON_ONCE(level < KVM_PGTABLE_FIRST_LEVEL ||
|
||||
level > KVM_PGTABLE_LAST_LEVEL))
|
||||
return -EINVAL;
|
||||
|
||||
for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) {
|
||||
@ -340,7 +344,7 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
|
||||
struct leaf_walk_data {
|
||||
kvm_pte_t pte;
|
||||
u32 level;
|
||||
s8 level;
|
||||
};
|
||||
|
||||
static int leaf_walker(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
@ -355,7 +359,7 @@ static int leaf_walker(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
}
|
||||
|
||||
int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
|
||||
kvm_pte_t *ptep, u32 *level)
|
||||
kvm_pte_t *ptep, s8 *level)
|
||||
{
|
||||
struct leaf_walk_data data;
|
||||
struct kvm_pgtable_walker walker = {
|
||||
@ -408,7 +412,8 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
|
||||
}
|
||||
|
||||
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
|
||||
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
|
||||
if (!kvm_lpa2_is_enabled())
|
||||
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
|
||||
attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
|
||||
attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
|
||||
*ptep = attr;
|
||||
@ -467,7 +472,7 @@ static int hyp_map_walker(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
if (hyp_map_walker_try_leaf(ctx, data))
|
||||
return 0;
|
||||
|
||||
if (WARN_ON(ctx->level == KVM_PGTABLE_MAX_LEVELS - 1))
|
||||
if (WARN_ON(ctx->level == KVM_PGTABLE_LAST_LEVEL))
|
||||
return -EINVAL;
|
||||
|
||||
childp = (kvm_pte_t *)mm_ops->zalloc_page(NULL);
|
||||
@ -563,14 +568,19 @@ u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
|
||||
int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
|
||||
struct kvm_pgtable_mm_ops *mm_ops)
|
||||
{
|
||||
u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits);
|
||||
s8 start_level = KVM_PGTABLE_LAST_LEVEL + 1 -
|
||||
ARM64_HW_PGTABLE_LEVELS(va_bits);
|
||||
|
||||
if (start_level < KVM_PGTABLE_FIRST_LEVEL ||
|
||||
start_level > KVM_PGTABLE_LAST_LEVEL)
|
||||
return -EINVAL;
|
||||
|
||||
pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_page(NULL);
|
||||
if (!pgt->pgd)
|
||||
return -ENOMEM;
|
||||
|
||||
pgt->ia_bits = va_bits;
|
||||
pgt->start_level = KVM_PGTABLE_MAX_LEVELS - levels;
|
||||
pgt->start_level = start_level;
|
||||
pgt->mm_ops = mm_ops;
|
||||
pgt->mmu = NULL;
|
||||
pgt->force_pte_cb = NULL;
|
||||
@ -624,7 +634,7 @@ struct stage2_map_data {
|
||||
u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
|
||||
{
|
||||
u64 vtcr = VTCR_EL2_FLAGS;
|
||||
u8 lvls;
|
||||
s8 lvls;
|
||||
|
||||
vtcr |= kvm_get_parange(mmfr0) << VTCR_EL2_PS_SHIFT;
|
||||
vtcr |= VTCR_EL2_T0SZ(phys_shift);
|
||||
@ -635,6 +645,15 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
|
||||
lvls = stage2_pgtable_levels(phys_shift);
|
||||
if (lvls < 2)
|
||||
lvls = 2;
|
||||
|
||||
/*
|
||||
* When LPA2 is enabled, the HW supports an extra level of translation
|
||||
* (for 5 in total) when using 4K pages. It also introduces VTCR_EL2.SL2
|
||||
* to as an addition to SL0 to enable encoding this extra start level.
|
||||
* However, since we always use concatenated pages for the first level
|
||||
* lookup, we will never need this extra level and therefore do not need
|
||||
* to touch SL2.
|
||||
*/
|
||||
vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls);
|
||||
|
||||
#ifdef CONFIG_ARM64_HW_AFDBM
|
||||
@ -654,6 +673,9 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
|
||||
vtcr |= VTCR_EL2_HA;
|
||||
#endif /* CONFIG_ARM64_HW_AFDBM */
|
||||
|
||||
if (kvm_lpa2_is_enabled())
|
||||
vtcr |= VTCR_EL2_DS;
|
||||
|
||||
/* Set the vmid bits */
|
||||
vtcr |= (get_vmid_bits(mmfr1) == 16) ?
|
||||
VTCR_EL2_VS_16BIT :
|
||||
@ -711,7 +733,9 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p
|
||||
if (prot & KVM_PGTABLE_PROT_W)
|
||||
attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
|
||||
|
||||
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
|
||||
if (!kvm_lpa2_is_enabled())
|
||||
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
|
||||
|
||||
attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
|
||||
attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
|
||||
*ptep = attr;
|
||||
@ -902,7 +926,7 @@ static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
{
|
||||
u64 phys = stage2_map_walker_phys_addr(ctx, data);
|
||||
|
||||
if (data->force_pte && (ctx->level < (KVM_PGTABLE_MAX_LEVELS - 1)))
|
||||
if (data->force_pte && ctx->level < KVM_PGTABLE_LAST_LEVEL)
|
||||
return false;
|
||||
|
||||
return kvm_block_mapping_supported(ctx, phys);
|
||||
@ -981,7 +1005,7 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
if (ret != -E2BIG)
|
||||
return ret;
|
||||
|
||||
if (WARN_ON(ctx->level == KVM_PGTABLE_MAX_LEVELS - 1))
|
||||
if (WARN_ON(ctx->level == KVM_PGTABLE_LAST_LEVEL))
|
||||
return -EINVAL;
|
||||
|
||||
if (!data->memcache)
|
||||
@ -1151,7 +1175,7 @@ struct stage2_attr_data {
|
||||
kvm_pte_t attr_set;
|
||||
kvm_pte_t attr_clr;
|
||||
kvm_pte_t pte;
|
||||
u32 level;
|
||||
s8 level;
|
||||
};
|
||||
|
||||
static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
@ -1194,7 +1218,7 @@ static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
|
||||
u64 size, kvm_pte_t attr_set,
|
||||
kvm_pte_t attr_clr, kvm_pte_t *orig_pte,
|
||||
u32 *level, enum kvm_pgtable_walk_flags flags)
|
||||
s8 *level, enum kvm_pgtable_walk_flags flags)
|
||||
{
|
||||
int ret;
|
||||
kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI;
|
||||
@ -1296,7 +1320,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
|
||||
enum kvm_pgtable_prot prot)
|
||||
{
|
||||
int ret;
|
||||
u32 level;
|
||||
s8 level;
|
||||
kvm_pte_t set = 0, clr = 0;
|
||||
|
||||
if (prot & KVM_PTE_LEAF_ATTR_HI_SW)
|
||||
@ -1349,7 +1373,7 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
|
||||
}
|
||||
|
||||
kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
|
||||
u64 phys, u32 level,
|
||||
u64 phys, s8 level,
|
||||
enum kvm_pgtable_prot prot,
|
||||
void *mc, bool force_pte)
|
||||
{
|
||||
@ -1407,7 +1431,7 @@ kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
|
||||
* fully populated tree up to the PTE entries. Note that @level is
|
||||
* interpreted as in "level @level entry".
|
||||
*/
|
||||
static int stage2_block_get_nr_page_tables(u32 level)
|
||||
static int stage2_block_get_nr_page_tables(s8 level)
|
||||
{
|
||||
switch (level) {
|
||||
case 1:
|
||||
@ -1418,7 +1442,7 @@ static int stage2_block_get_nr_page_tables(u32 level)
|
||||
return 0;
|
||||
default:
|
||||
WARN_ON_ONCE(level < KVM_PGTABLE_MIN_BLOCK_LEVEL ||
|
||||
level >= KVM_PGTABLE_MAX_LEVELS);
|
||||
level > KVM_PGTABLE_LAST_LEVEL);
|
||||
return -EINVAL;
|
||||
};
|
||||
}
|
||||
@ -1431,13 +1455,13 @@ static int stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
struct kvm_s2_mmu *mmu;
|
||||
kvm_pte_t pte = ctx->old, new, *childp;
|
||||
enum kvm_pgtable_prot prot;
|
||||
u32 level = ctx->level;
|
||||
s8 level = ctx->level;
|
||||
bool force_pte;
|
||||
int nr_pages;
|
||||
u64 phys;
|
||||
|
||||
/* No huge-pages exist at the last level */
|
||||
if (level == KVM_PGTABLE_MAX_LEVELS - 1)
|
||||
if (level == KVM_PGTABLE_LAST_LEVEL)
|
||||
return 0;
|
||||
|
||||
/* We only split valid block mappings */
|
||||
@ -1514,7 +1538,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
|
||||
u64 vtcr = mmu->vtcr;
|
||||
u32 ia_bits = VTCR_EL2_IPA(vtcr);
|
||||
u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
|
||||
u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
|
||||
s8 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
|
||||
|
||||
pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
|
||||
pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_pages_exact(pgd_sz);
|
||||
@ -1537,7 +1561,7 @@ size_t kvm_pgtable_stage2_pgd_size(u64 vtcr)
|
||||
{
|
||||
u32 ia_bits = VTCR_EL2_IPA(vtcr);
|
||||
u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
|
||||
u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
|
||||
s8 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
|
||||
|
||||
return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
|
||||
}
|
||||
@ -1573,7 +1597,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
|
||||
pgt->pgd = NULL;
|
||||
}
|
||||
|
||||
void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level)
|
||||
void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
|
||||
{
|
||||
kvm_pteref_t ptep = (kvm_pteref_t)pgtable;
|
||||
struct kvm_pgtable_walker walker = {
|
||||
|
@ -223,12 +223,12 @@ static void stage2_free_unlinked_table_rcu_cb(struct rcu_head *head)
|
||||
{
|
||||
struct page *page = container_of(head, struct page, rcu_head);
|
||||
void *pgtable = page_to_virt(page);
|
||||
u32 level = page_private(page);
|
||||
s8 level = page_private(page);
|
||||
|
||||
kvm_pgtable_stage2_free_unlinked(&kvm_s2_mm_ops, pgtable, level);
|
||||
}
|
||||
|
||||
static void stage2_free_unlinked_table(void *addr, u32 level)
|
||||
static void stage2_free_unlinked_table(void *addr, s8 level)
|
||||
{
|
||||
struct page *page = virt_to_page(addr);
|
||||
|
||||
@ -804,13 +804,13 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr)
|
||||
struct kvm_pgtable pgt = {
|
||||
.pgd = (kvm_pteref_t)kvm->mm->pgd,
|
||||
.ia_bits = vabits_actual,
|
||||
.start_level = (KVM_PGTABLE_MAX_LEVELS -
|
||||
CONFIG_PGTABLE_LEVELS),
|
||||
.start_level = (KVM_PGTABLE_LAST_LEVEL -
|
||||
CONFIG_PGTABLE_LEVELS + 1),
|
||||
.mm_ops = &kvm_user_mm_ops,
|
||||
};
|
||||
unsigned long flags;
|
||||
kvm_pte_t pte = 0; /* Keep GCC quiet... */
|
||||
u32 level = ~0;
|
||||
s8 level = S8_MAX;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
@ -829,7 +829,9 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr)
|
||||
* Not seeing an error, but not updating level? Something went
|
||||
* deeply wrong...
|
||||
*/
|
||||
if (WARN_ON(level >= KVM_PGTABLE_MAX_LEVELS))
|
||||
if (WARN_ON(level > KVM_PGTABLE_LAST_LEVEL))
|
||||
return -EFAULT;
|
||||
if (WARN_ON(level < KVM_PGTABLE_FIRST_LEVEL))
|
||||
return -EFAULT;
|
||||
|
||||
/* Oops, the userspace PTs are gone... Replay the fault */
|
||||
@ -1374,7 +1376,7 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
|
||||
|
||||
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
struct kvm_memory_slot *memslot, unsigned long hva,
|
||||
unsigned long fault_status)
|
||||
bool fault_is_perm)
|
||||
{
|
||||
int ret = 0;
|
||||
bool write_fault, writable, force_pte = false;
|
||||
@ -1388,17 +1390,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
gfn_t gfn;
|
||||
kvm_pfn_t pfn;
|
||||
bool logging_active = memslot_is_logging(memslot);
|
||||
unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
|
||||
long vma_pagesize, fault_granule;
|
||||
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
|
||||
struct kvm_pgtable *pgt;
|
||||
|
||||
fault_granule = 1UL << ARM64_HW_PGTABLE_LEVEL_SHIFT(fault_level);
|
||||
if (fault_is_perm)
|
||||
fault_granule = kvm_vcpu_trap_get_perm_fault_granule(vcpu);
|
||||
write_fault = kvm_is_write_fault(vcpu);
|
||||
exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
|
||||
VM_BUG_ON(write_fault && exec_fault);
|
||||
|
||||
if (fault_status == ESR_ELx_FSC_PERM && !write_fault && !exec_fault) {
|
||||
if (fault_is_perm && !write_fault && !exec_fault) {
|
||||
kvm_err("Unexpected L2 read permission error\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
@ -1409,8 +1411,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
* only exception to this is when dirty logging is enabled at runtime
|
||||
* and a write fault needs to collapse a block entry into a table.
|
||||
*/
|
||||
if (fault_status != ESR_ELx_FSC_PERM ||
|
||||
(logging_active && write_fault)) {
|
||||
if (!fault_is_perm || (logging_active && write_fault)) {
|
||||
ret = kvm_mmu_topup_memory_cache(memcache,
|
||||
kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu));
|
||||
if (ret)
|
||||
@ -1527,8 +1528,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
* backed by a THP and thus use block mapping if possible.
|
||||
*/
|
||||
if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
|
||||
if (fault_status == ESR_ELx_FSC_PERM &&
|
||||
fault_granule > PAGE_SIZE)
|
||||
if (fault_is_perm && fault_granule > PAGE_SIZE)
|
||||
vma_pagesize = fault_granule;
|
||||
else
|
||||
vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
|
||||
@ -1541,7 +1541,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
}
|
||||
}
|
||||
|
||||
if (fault_status != ESR_ELx_FSC_PERM && !device && kvm_has_mte(kvm)) {
|
||||
if (!fault_is_perm && !device && kvm_has_mte(kvm)) {
|
||||
/* Check the VMM hasn't introduced a new disallowed VMA */
|
||||
if (mte_allowed) {
|
||||
sanitise_mte_tags(kvm, pfn, vma_pagesize);
|
||||
@ -1567,7 +1567,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
* permissions only if vma_pagesize equals fault_granule. Otherwise,
|
||||
* kvm_pgtable_stage2_map() should be called to change block size.
|
||||
*/
|
||||
if (fault_status == ESR_ELx_FSC_PERM && vma_pagesize == fault_granule)
|
||||
if (fault_is_perm && vma_pagesize == fault_granule)
|
||||
ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
|
||||
else
|
||||
ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
|
||||
@ -1618,7 +1618,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
|
||||
*/
|
||||
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long fault_status;
|
||||
unsigned long esr;
|
||||
phys_addr_t fault_ipa;
|
||||
struct kvm_memory_slot *memslot;
|
||||
unsigned long hva;
|
||||
@ -1626,12 +1626,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
|
||||
gfn_t gfn;
|
||||
int ret, idx;
|
||||
|
||||
fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
|
||||
esr = kvm_vcpu_get_esr(vcpu);
|
||||
|
||||
fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
|
||||
is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
|
||||
|
||||
if (fault_status == ESR_ELx_FSC_FAULT) {
|
||||
if (esr_fsc_is_permission_fault(esr)) {
|
||||
/* Beyond sanitised PARange (which is the IPA limit) */
|
||||
if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
|
||||
kvm_inject_size_fault(vcpu);
|
||||
@ -1666,9 +1666,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
|
||||
kvm_vcpu_get_hfar(vcpu), fault_ipa);
|
||||
|
||||
/* Check the stage-2 fault is trans. fault or write fault */
|
||||
if (fault_status != ESR_ELx_FSC_FAULT &&
|
||||
fault_status != ESR_ELx_FSC_PERM &&
|
||||
fault_status != ESR_ELx_FSC_ACCESS) {
|
||||
if (!esr_fsc_is_translation_fault(esr) &&
|
||||
!esr_fsc_is_permission_fault(esr) &&
|
||||
!esr_fsc_is_access_flag_fault(esr)) {
|
||||
kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
|
||||
kvm_vcpu_trap_get_class(vcpu),
|
||||
(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
|
||||
@ -1730,13 +1730,14 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
|
||||
/* Userspace should not be able to register out-of-bounds IPAs */
|
||||
VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->arch.hw_mmu));
|
||||
|
||||
if (fault_status == ESR_ELx_FSC_ACCESS) {
|
||||
if (esr_fsc_is_access_flag_fault(esr)) {
|
||||
handle_access_fault(vcpu, fault_ipa);
|
||||
ret = 1;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
|
||||
ret = user_mem_abort(vcpu, fault_ipa, memslot, hva,
|
||||
esr_fsc_is_permission_fault(esr));
|
||||
if (ret == 0)
|
||||
ret = 1;
|
||||
out:
|
||||
|
@ -280,12 +280,11 @@ int __init kvm_set_ipa_limit(void)
|
||||
parange = cpuid_feature_extract_unsigned_field(mmfr0,
|
||||
ID_AA64MMFR0_EL1_PARANGE_SHIFT);
|
||||
/*
|
||||
* IPA size beyond 48 bits could not be supported
|
||||
* on either 4K or 16K page size. Hence let's cap
|
||||
* it to 48 bits, in case it's reported as larger
|
||||
* on the system.
|
||||
* IPA size beyond 48 bits for 4K and 16K page size is only supported
|
||||
* when LPA2 is available. So if we have LPA2, enable it, else cap to 48
|
||||
* bits, in case it's reported as larger on the system.
|
||||
*/
|
||||
if (PAGE_SIZE != SZ_64K)
|
||||
if (!kvm_lpa2_is_enabled() && PAGE_SIZE != SZ_64K)
|
||||
parange = min(parange, (unsigned int)ID_AA64MMFR0_EL1_PARANGE_48);
|
||||
|
||||
/*
|
||||
|
@ -37,6 +37,7 @@ HAS_GIC_PRIO_MASKING
|
||||
HAS_GIC_PRIO_RELAXED_SYNC
|
||||
HAS_HCX
|
||||
HAS_LDAPR
|
||||
HAS_LPA2
|
||||
HAS_LSE_ATOMICS
|
||||
HAS_MOPS
|
||||
HAS_NESTED_VIRT
|
||||
|
@ -119,8 +119,8 @@ enum {
|
||||
/* Access flag update enable/disable */
|
||||
#define TCR_EL1_HA (1ULL << 39)
|
||||
|
||||
void aarch64_get_supported_page_sizes(uint32_t ipa,
|
||||
bool *ps4k, bool *ps16k, bool *ps64k);
|
||||
void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
|
||||
uint32_t *ipa16k, uint32_t *ipa64k);
|
||||
|
||||
void vm_init_descriptor_tables(struct kvm_vm *vm);
|
||||
void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu);
|
||||
|
@ -11,8 +11,8 @@ struct guest_mode {
|
||||
|
||||
extern struct guest_mode guest_modes[NUM_VM_MODES];
|
||||
|
||||
#define guest_mode_append(mode, supported, enabled) ({ \
|
||||
guest_modes[mode] = (struct guest_mode){ supported, enabled }; \
|
||||
#define guest_mode_append(mode, enabled) ({ \
|
||||
guest_modes[mode] = (struct guest_mode){ (enabled), (enabled) }; \
|
||||
})
|
||||
|
||||
void guest_modes_append_default(void);
|
||||
|
@ -171,6 +171,7 @@ static inline struct userspace_mem_region *vm_get_mem_region(struct kvm_vm *vm,
|
||||
|
||||
enum vm_guest_mode {
|
||||
VM_MODE_P52V48_4K,
|
||||
VM_MODE_P52V48_16K,
|
||||
VM_MODE_P52V48_64K,
|
||||
VM_MODE_P48V48_4K,
|
||||
VM_MODE_P48V48_16K,
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "kvm_util.h"
|
||||
#include "processor.h"
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/sizes.h>
|
||||
|
||||
#define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000
|
||||
|
||||
@ -58,13 +59,25 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva)
|
||||
return (gva >> vm->page_shift) & mask;
|
||||
}
|
||||
|
||||
static inline bool use_lpa2_pte_format(struct kvm_vm *vm)
|
||||
{
|
||||
return (vm->page_size == SZ_4K || vm->page_size == SZ_16K) &&
|
||||
(vm->pa_bits > 48 || vm->va_bits > 48);
|
||||
}
|
||||
|
||||
static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs)
|
||||
{
|
||||
uint64_t pte;
|
||||
|
||||
pte = pa & GENMASK(47, vm->page_shift);
|
||||
if (vm->page_shift == 16)
|
||||
pte |= FIELD_GET(GENMASK(51, 48), pa) << 12;
|
||||
if (use_lpa2_pte_format(vm)) {
|
||||
pte = pa & GENMASK(49, vm->page_shift);
|
||||
pte |= FIELD_GET(GENMASK(51, 50), pa) << 8;
|
||||
attrs &= ~GENMASK(9, 8);
|
||||
} else {
|
||||
pte = pa & GENMASK(47, vm->page_shift);
|
||||
if (vm->page_shift == 16)
|
||||
pte |= FIELD_GET(GENMASK(51, 48), pa) << 12;
|
||||
}
|
||||
pte |= attrs;
|
||||
|
||||
return pte;
|
||||
@ -74,9 +87,14 @@ static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte)
|
||||
{
|
||||
uint64_t pa;
|
||||
|
||||
pa = pte & GENMASK(47, vm->page_shift);
|
||||
if (vm->page_shift == 16)
|
||||
pa |= FIELD_GET(GENMASK(15, 12), pte) << 48;
|
||||
if (use_lpa2_pte_format(vm)) {
|
||||
pa = pte & GENMASK(49, vm->page_shift);
|
||||
pa |= FIELD_GET(GENMASK(9, 8), pte) << 50;
|
||||
} else {
|
||||
pa = pte & GENMASK(47, vm->page_shift);
|
||||
if (vm->page_shift == 16)
|
||||
pa |= FIELD_GET(GENMASK(15, 12), pte) << 48;
|
||||
}
|
||||
|
||||
return pa;
|
||||
}
|
||||
@ -266,9 +284,6 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
|
||||
|
||||
/* Configure base granule size */
|
||||
switch (vm->mode) {
|
||||
case VM_MODE_P52V48_4K:
|
||||
TEST_FAIL("AArch64 does not support 4K sized pages "
|
||||
"with 52-bit physical address ranges");
|
||||
case VM_MODE_PXXV48_4K:
|
||||
TEST_FAIL("AArch64 does not support 4K sized pages "
|
||||
"with ANY-bit physical address ranges");
|
||||
@ -278,12 +293,14 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
|
||||
case VM_MODE_P36V48_64K:
|
||||
tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
|
||||
break;
|
||||
case VM_MODE_P52V48_16K:
|
||||
case VM_MODE_P48V48_16K:
|
||||
case VM_MODE_P40V48_16K:
|
||||
case VM_MODE_P36V48_16K:
|
||||
case VM_MODE_P36V47_16K:
|
||||
tcr_el1 |= 2ul << 14; /* TG0 = 16KB */
|
||||
break;
|
||||
case VM_MODE_P52V48_4K:
|
||||
case VM_MODE_P48V48_4K:
|
||||
case VM_MODE_P40V48_4K:
|
||||
case VM_MODE_P36V48_4K:
|
||||
@ -297,6 +314,8 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
|
||||
|
||||
/* Configure output size */
|
||||
switch (vm->mode) {
|
||||
case VM_MODE_P52V48_4K:
|
||||
case VM_MODE_P52V48_16K:
|
||||
case VM_MODE_P52V48_64K:
|
||||
tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
|
||||
ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2;
|
||||
@ -325,6 +344,8 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
|
||||
/* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */;
|
||||
tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12);
|
||||
tcr_el1 |= (64 - vm->va_bits) /* T0SZ */;
|
||||
if (use_lpa2_pte_format(vm))
|
||||
tcr_el1 |= (1ul << 59) /* DS */;
|
||||
|
||||
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1);
|
||||
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1);
|
||||
@ -492,12 +513,24 @@ uint32_t guest_get_vcpuid(void)
|
||||
return read_sysreg(tpidr_el1);
|
||||
}
|
||||
|
||||
void aarch64_get_supported_page_sizes(uint32_t ipa,
|
||||
bool *ps4k, bool *ps16k, bool *ps64k)
|
||||
static uint32_t max_ipa_for_page_size(uint32_t vm_ipa, uint32_t gran,
|
||||
uint32_t not_sup_val, uint32_t ipa52_min_val)
|
||||
{
|
||||
if (gran == not_sup_val)
|
||||
return 0;
|
||||
else if (gran >= ipa52_min_val && vm_ipa >= 52)
|
||||
return 52;
|
||||
else
|
||||
return min(vm_ipa, 48U);
|
||||
}
|
||||
|
||||
void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
|
||||
uint32_t *ipa16k, uint32_t *ipa64k)
|
||||
{
|
||||
struct kvm_vcpu_init preferred_init;
|
||||
int kvm_fd, vm_fd, vcpu_fd, err;
|
||||
uint64_t val;
|
||||
uint32_t gran;
|
||||
struct kvm_one_reg reg = {
|
||||
.id = KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR0_EL1),
|
||||
.addr = (uint64_t)&val,
|
||||
@ -518,9 +551,17 @@ void aarch64_get_supported_page_sizes(uint32_t ipa,
|
||||
err = ioctl(vcpu_fd, KVM_GET_ONE_REG, ®);
|
||||
TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd));
|
||||
|
||||
*ps4k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val) != 0xf;
|
||||
*ps64k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val) == 0;
|
||||
*ps16k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val) != 0;
|
||||
gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val);
|
||||
*ipa4k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN4_NI,
|
||||
ID_AA64MMFR0_EL1_TGRAN4_52_BIT);
|
||||
|
||||
gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val);
|
||||
*ipa64k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN64_NI,
|
||||
ID_AA64MMFR0_EL1_TGRAN64_IMP);
|
||||
|
||||
gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val);
|
||||
*ipa16k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN16_NI,
|
||||
ID_AA64MMFR0_EL1_TGRAN16_52_BIT);
|
||||
|
||||
close(vcpu_fd);
|
||||
close(vm_fd);
|
||||
|
@ -14,37 +14,33 @@ struct guest_mode guest_modes[NUM_VM_MODES];
|
||||
void guest_modes_append_default(void)
|
||||
{
|
||||
#ifndef __aarch64__
|
||||
guest_mode_append(VM_MODE_DEFAULT, true, true);
|
||||
guest_mode_append(VM_MODE_DEFAULT, true);
|
||||
#else
|
||||
{
|
||||
unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
|
||||
bool ps4k, ps16k, ps64k;
|
||||
uint32_t ipa4k, ipa16k, ipa64k;
|
||||
int i;
|
||||
|
||||
aarch64_get_supported_page_sizes(limit, &ps4k, &ps16k, &ps64k);
|
||||
aarch64_get_supported_page_sizes(limit, &ipa4k, &ipa16k, &ipa64k);
|
||||
|
||||
vm_mode_default = NUM_VM_MODES;
|
||||
guest_mode_append(VM_MODE_P52V48_4K, ipa4k >= 52);
|
||||
guest_mode_append(VM_MODE_P52V48_16K, ipa16k >= 52);
|
||||
guest_mode_append(VM_MODE_P52V48_64K, ipa64k >= 52);
|
||||
|
||||
if (limit >= 52)
|
||||
guest_mode_append(VM_MODE_P52V48_64K, ps64k, ps64k);
|
||||
if (limit >= 48) {
|
||||
guest_mode_append(VM_MODE_P48V48_4K, ps4k, ps4k);
|
||||
guest_mode_append(VM_MODE_P48V48_16K, ps16k, ps16k);
|
||||
guest_mode_append(VM_MODE_P48V48_64K, ps64k, ps64k);
|
||||
}
|
||||
if (limit >= 40) {
|
||||
guest_mode_append(VM_MODE_P40V48_4K, ps4k, ps4k);
|
||||
guest_mode_append(VM_MODE_P40V48_16K, ps16k, ps16k);
|
||||
guest_mode_append(VM_MODE_P40V48_64K, ps64k, ps64k);
|
||||
if (ps4k)
|
||||
vm_mode_default = VM_MODE_P40V48_4K;
|
||||
}
|
||||
if (limit >= 36) {
|
||||
guest_mode_append(VM_MODE_P36V48_4K, ps4k, ps4k);
|
||||
guest_mode_append(VM_MODE_P36V48_16K, ps16k, ps16k);
|
||||
guest_mode_append(VM_MODE_P36V48_64K, ps64k, ps64k);
|
||||
guest_mode_append(VM_MODE_P36V47_16K, ps16k, ps16k);
|
||||
}
|
||||
guest_mode_append(VM_MODE_P48V48_4K, ipa4k >= 48);
|
||||
guest_mode_append(VM_MODE_P48V48_16K, ipa16k >= 48);
|
||||
guest_mode_append(VM_MODE_P48V48_64K, ipa64k >= 48);
|
||||
|
||||
guest_mode_append(VM_MODE_P40V48_4K, ipa4k >= 40);
|
||||
guest_mode_append(VM_MODE_P40V48_16K, ipa16k >= 40);
|
||||
guest_mode_append(VM_MODE_P40V48_64K, ipa64k >= 40);
|
||||
|
||||
guest_mode_append(VM_MODE_P36V48_4K, ipa4k >= 36);
|
||||
guest_mode_append(VM_MODE_P36V48_16K, ipa16k >= 36);
|
||||
guest_mode_append(VM_MODE_P36V48_64K, ipa64k >= 36);
|
||||
guest_mode_append(VM_MODE_P36V47_16K, ipa16k >= 36);
|
||||
|
||||
vm_mode_default = ipa4k >= 40 ? VM_MODE_P40V48_4K : NUM_VM_MODES;
|
||||
|
||||
/*
|
||||
* Pick the first supported IPA size if the default
|
||||
@ -72,7 +68,7 @@ void guest_modes_append_default(void)
|
||||
close(kvm_fd);
|
||||
/* Starting with z13 we have 47bits of physical address */
|
||||
if (info.ibc >= 0x30)
|
||||
guest_mode_append(VM_MODE_P47V64_4K, true, true);
|
||||
guest_mode_append(VM_MODE_P47V64_4K, true);
|
||||
}
|
||||
#endif
|
||||
#ifdef __riscv
|
||||
@ -80,9 +76,9 @@ void guest_modes_append_default(void)
|
||||
unsigned int sz = kvm_check_cap(KVM_CAP_VM_GPA_BITS);
|
||||
|
||||
if (sz >= 52)
|
||||
guest_mode_append(VM_MODE_P52V48_4K, true, true);
|
||||
guest_mode_append(VM_MODE_P52V48_4K, true);
|
||||
if (sz >= 48)
|
||||
guest_mode_append(VM_MODE_P48V48_4K, true, true);
|
||||
guest_mode_append(VM_MODE_P48V48_4K, true);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -148,6 +148,7 @@ const char *vm_guest_mode_string(uint32_t i)
|
||||
{
|
||||
static const char * const strings[] = {
|
||||
[VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages",
|
||||
[VM_MODE_P52V48_16K] = "PA-bits:52, VA-bits:48, 16K pages",
|
||||
[VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages",
|
||||
[VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages",
|
||||
[VM_MODE_P48V48_16K] = "PA-bits:48, VA-bits:48, 16K pages",
|
||||
@ -173,6 +174,7 @@ const char *vm_guest_mode_string(uint32_t i)
|
||||
|
||||
const struct vm_guest_mode_params vm_guest_mode_params[] = {
|
||||
[VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 },
|
||||
[VM_MODE_P52V48_16K] = { 52, 48, 0x4000, 14 },
|
||||
[VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 },
|
||||
[VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 },
|
||||
[VM_MODE_P48V48_16K] = { 48, 48, 0x4000, 14 },
|
||||
@ -251,6 +253,7 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode)
|
||||
case VM_MODE_P36V48_64K:
|
||||
vm->pgtable_levels = 3;
|
||||
break;
|
||||
case VM_MODE_P52V48_16K:
|
||||
case VM_MODE_P48V48_16K:
|
||||
case VM_MODE_P40V48_16K:
|
||||
case VM_MODE_P36V48_16K:
|
||||
|
Loading…
Reference in New Issue
Block a user