From 1f0f4a2ef7a5693b135ce174e71f116db4bd684d Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 21 Apr 2023 07:16:05 +0000 Subject: [PATCH 1/3] KVM: arm64: Infer the PA offset from IPA in stage-2 map walker Until now, the page table walker counted increments to the PA and IPA of a walk in two separate places. While the PA is incremented as soon as a leaf PTE is installed in stage2_map_walker_try_leaf(), the IPA is actually bumped in the generic table walker context. Critically, __kvm_pgtable_visit() rereads the PTE after the LEAF callback returns to work out if a table or leaf was installed, and only bumps the IPA for a leaf PTE. This arrangement worked fine when we handled faults behind the write lock, as the walker had exclusive access to the stage-2 page tables. However, commit 1577cb5823ce ("KVM: arm64: Handle stage-2 faults in parallel") started handling all stage-2 faults behind the read lock, opening up a race where a walker could increment the PA but not the IPA of a walk. Nothing good ensues, as the walker starts mapping with the incorrect IPA -> PA relationship. For example, assume that two vCPUs took a data abort on the same IPA. One observes that dirty logging is disabled, and the other observed that it is enabled: vCPU attempting PMD mapping vCPU attempting PTE mapping ====================================== ===================================== /* install PMD */ stage2_make_pte(ctx, leaf); data->phys += granule; /* replace PMD with a table */ stage2_try_break_pte(ctx, data->mmu); stage2_make_pte(ctx, table); /* table is observed */ ctx.old = READ_ONCE(*ptep); table = kvm_pte_table(ctx.old, level); /* * map walk continues w/o incrementing * IPA. */ __kvm_pgtable_walk(..., level + 1); Bring an end to the whole mess by using the IPA as the single source of truth for how far along a walk has gotten. Work out the correct PA to map by calculating the IPA offset from the beginning of the walk and add that to the starting physical address. Cc: stable@vger.kernel.org Fixes: 1577cb5823ce ("KVM: arm64: Handle stage-2 faults in parallel") Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230421071606.1603916-2-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_pgtable.h | 1 + arch/arm64/kvm/hyp/pgtable.c | 32 ++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 4cd6762bda80..dc3c072e862f 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -209,6 +209,7 @@ struct kvm_pgtable_visit_ctx { kvm_pte_t old; void *arg; struct kvm_pgtable_mm_ops *mm_ops; + u64 start; u64 addr; u64 end; u32 level; diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 3d61bd3e591d..140f82300db5 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -58,6 +58,7 @@ struct kvm_pgtable_walk_data { struct kvm_pgtable_walker *walker; + u64 start; u64 addr; u64 end; }; @@ -201,6 +202,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, .old = READ_ONCE(*ptep), .arg = data->walker->arg, .mm_ops = mm_ops, + .start = data->start, .addr = data->addr, .end = data->end, .level = level, @@ -293,6 +295,7 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, struct kvm_pgtable_walker *walker) { struct kvm_pgtable_walk_data walk_data = { + .start = ALIGN_DOWN(addr, PAGE_SIZE), .addr = ALIGN_DOWN(addr, PAGE_SIZE), .end = PAGE_ALIGN(walk_data.addr + size), .walker = walker, @@ -794,20 +797,43 @@ static bool stage2_pte_executable(kvm_pte_t pte) return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN); } +static u64 stage2_map_walker_phys_addr(const struct kvm_pgtable_visit_ctx *ctx, + const struct stage2_map_data *data) +{ + u64 phys = data->phys; + + /* + * Stage-2 walks to update ownership data are communicated to the map + * walker using an invalid PA. Avoid offsetting an already invalid PA, + * which could overflow and make the address valid again. + */ + if (!kvm_phys_is_valid(phys)) + return phys; + + /* + * Otherwise, work out the correct PA based on how far the walk has + * gotten. + */ + return phys + (ctx->addr - ctx->start); +} + static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { + u64 phys = stage2_map_walker_phys_addr(ctx, data); + if (data->force_pte && (ctx->level < (KVM_PGTABLE_MAX_LEVELS - 1))) return false; - return kvm_block_mapping_supported(ctx, data->phys); + return kvm_block_mapping_supported(ctx, phys); } static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { kvm_pte_t new; - u64 granule = kvm_granule_size(ctx->level), phys = data->phys; + u64 phys = stage2_map_walker_phys_addr(ctx, data); + u64 granule = kvm_granule_size(ctx->level); struct kvm_pgtable *pgt = data->mmu->pgt; struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; @@ -841,8 +867,6 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, stage2_make_pte(ctx, new); - if (kvm_phys_is_valid(phys)) - data->phys += granule; return 0; } From 39bc95be3782ba88e55cd72e830f37e74395831b Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 21 Apr 2023 07:16:06 +0000 Subject: [PATCH 2/3] KVM: arm64: Infer PA offset from VA in hyp map walker Similar to the recently fixed stage-2 walker, the hyp map walker increments the PA and VA of a walk separately. Unlike stage-2, there is no bug here as the map walker has exclusive access to the stage-1 page tables. Nonetheless, in the interest of continuity throughout the page table code, tweak the hyp map walker to avoid incrementing the PA and instead use the VA as the authoritative source of how far along a table walk has gotten. Calculate the PA to use for a leaf PTE by adding the offset of the VA from the start of the walk to the starting PA. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230421071606.1603916-3-oliver.upton@linux.dev --- arch/arm64/kvm/hyp/pgtable.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 140f82300db5..356a3fd5220c 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -410,13 +410,12 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte) static bool hyp_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, struct hyp_map_data *data) { + u64 phys = data->phys + (ctx->addr - ctx->start); kvm_pte_t new; - u64 granule = kvm_granule_size(ctx->level), phys = data->phys; if (!kvm_block_mapping_supported(ctx, phys)) return false; - data->phys += granule; new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level); if (ctx->old == new) return true; From 1ea244158a4a20ac686f4d1d46285f3d38d4571f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 21 Apr 2023 10:18:34 +0100 Subject: [PATCH 3/3] KVM: arm64: Constify start/end/phys fields of the pgtable walker data As we are revamping the way the pgtable walker evaluates some of the data, make it clear that we rely on somew of the fields to be constant across the lifetime of a walk. For this, flag the start, end and phys fields of the walk data as 'const', which will generate an error if we were to accidentally update these fields again. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/pgtable.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 356a3fd5220c..5282cb9ca4cf 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -58,9 +58,9 @@ struct kvm_pgtable_walk_data { struct kvm_pgtable_walker *walker; - u64 start; + const u64 start; u64 addr; - u64 end; + const u64 end; }; static bool kvm_phys_is_valid(u64 phys) @@ -352,7 +352,7 @@ int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr, } struct hyp_map_data { - u64 phys; + const u64 phys; kvm_pte_t attr; }; @@ -578,7 +578,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) } struct stage2_map_data { - u64 phys; + const u64 phys; kvm_pte_t attr; u8 owner_id;