mirror of
https://github.com/torvalds/linux.git
synced 2024-11-21 19:41:42 +00:00
KVM/arm64 updates for 6.5
- Eager page splitting optimization for dirty logging, optionally allowing for a VM to avoid the cost of block splitting in the stage-2 fault path. - Arm FF-A proxy for pKVM, allowing a pKVM host to safely interact with services that live in the Secure world. pKVM intervenes on FF-A calls to guarantee the host doesn't misuse memory donated to the hyp or a pKVM guest. - Support for running the split hypervisor with VHE enabled, known as 'hVHE' mode. This is extremely useful for testing the split hypervisor on VHE-only systems, and paves the way for new use cases that depend on having two TTBRs available at EL2. - Generalized framework for configurable ID registers from userspace. KVM/arm64 currently prevents arbitrary CPU feature set configuration from userspace, but the intent is to relax this limitation and allow userspace to select a feature set consistent with the CPU. - Enable the use of Branch Target Identification (FEAT_BTI) in the hypervisor. - Use a separate set of pointer authentication keys for the hypervisor when running in protected mode, as the host is untrusted at runtime. - Ensure timer IRQs are consistently released in the init failure paths. - Avoid trapping CTR_EL0 on systems with Enhanced Virtualization Traps (FEAT_EVT), as it is a register commonly read from userspace. - Erratum workaround for the upcoming AmpereOne part, which has broken hardware A/D state management. As a consequence of the hVHE series reworking the arm64 software features framework, the for-next/module-alloc branch from the arm64 tree comes along for the ride. -----BEGIN PGP SIGNATURE----- iHUEABYIAB0WIQSNXHjWXuzMZutrKNKivnWIJHzdFgUCZJWrhwAKCRCivnWIJHzd Fs07AP9xliv5yIoQtRgPZXc0QDPyUm7zg8f5KDgqCVJtyHXcvAEAmmerBr39nbPc XoMXALKx6NGt836P0C+bhvRcHdFPGwE= =c/Xh -----END PGP SIGNATURE----- Merge tag 'kvmarm-6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD KVM/arm64 updates for 6.5 - Eager page splitting optimization for dirty logging, optionally allowing for a VM to avoid the cost of block splitting in the stage-2 fault path. - Arm FF-A proxy for pKVM, allowing a pKVM host to safely interact with services that live in the Secure world. pKVM intervenes on FF-A calls to guarantee the host doesn't misuse memory donated to the hyp or a pKVM guest. - Support for running the split hypervisor with VHE enabled, known as 'hVHE' mode. This is extremely useful for testing the split hypervisor on VHE-only systems, and paves the way for new use cases that depend on having two TTBRs available at EL2. - Generalized framework for configurable ID registers from userspace. KVM/arm64 currently prevents arbitrary CPU feature set configuration from userspace, but the intent is to relax this limitation and allow userspace to select a feature set consistent with the CPU. - Enable the use of Branch Target Identification (FEAT_BTI) in the hypervisor. - Use a separate set of pointer authentication keys for the hypervisor when running in protected mode, as the host is untrusted at runtime. - Ensure timer IRQs are consistently released in the init failure paths. - Avoid trapping CTR_EL0 on systems with Enhanced Virtualization Traps (FEAT_EVT), as it is a register commonly read from userspace. - Erratum workaround for the upcoming AmpereOne part, which has broken hardware A/D state management. As a consequence of the hVHE series reworking the arm64 software features framework, the for-next/module-alloc branch from the arm64 tree comes along for the ride.
This commit is contained in:
commit
cc744042d9
@ -33,8 +33,8 @@ AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit)::
|
||||
0000000000000000 0000ffffffffffff 256TB user
|
||||
ffff000000000000 ffff7fffffffffff 128TB kernel logical memory map
|
||||
[ffff600000000000 ffff7fffffffffff] 32TB [kasan shadow region]
|
||||
ffff800000000000 ffff800007ffffff 128MB modules
|
||||
ffff800008000000 fffffbffefffffff 124TB vmalloc
|
||||
ffff800000000000 ffff80007fffffff 2GB modules
|
||||
ffff800080000000 fffffbffefffffff 124TB vmalloc
|
||||
fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down)
|
||||
fffffbfffe000000 fffffbfffe7fffff 8MB [guard region]
|
||||
fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space
|
||||
@ -50,8 +50,8 @@ AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support):
|
||||
0000000000000000 000fffffffffffff 4PB user
|
||||
fff0000000000000 ffff7fffffffffff ~4PB kernel logical memory map
|
||||
[fffd800000000000 ffff7fffffffffff] 512TB [kasan shadow region]
|
||||
ffff800000000000 ffff800007ffffff 128MB modules
|
||||
ffff800008000000 fffffbffefffffff 124TB vmalloc
|
||||
ffff800000000000 ffff80007fffffff 2GB modules
|
||||
ffff800080000000 fffffbffefffffff 124TB vmalloc
|
||||
fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down)
|
||||
fffffbfffe000000 fffffbfffe7fffff 8MB [guard region]
|
||||
fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space
|
||||
|
@ -52,6 +52,9 @@ stable kernels.
|
||||
| Allwinner | A64/R18 | UNKNOWN1 | SUN50I_ERRATUM_UNKNOWN1 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Ampere | AmpereOne | AC03_CPU_38 | AMPERE_ERRATUM_AC03_CPU_38 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A510 | #2457168 | ARM64_ERRATUM_2457168 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A510 | #2064142 | ARM64_ERRATUM_2064142 |
|
||||
|
@ -8445,6 +8445,33 @@ structure.
|
||||
When getting the Modified Change Topology Report value, the attr->addr
|
||||
must point to a byte where the value will be stored or retrieved from.
|
||||
|
||||
8.40 KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
|
||||
---------------------------------------
|
||||
|
||||
:Capability: KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
|
||||
:Architectures: arm64
|
||||
:Type: vm
|
||||
:Parameters: arg[0] is the new split chunk size.
|
||||
:Returns: 0 on success, -EINVAL if any memslot was already created.
|
||||
|
||||
This capability sets the chunk size used in Eager Page Splitting.
|
||||
|
||||
Eager Page Splitting improves the performance of dirty-logging (used
|
||||
in live migrations) when guest memory is backed by huge-pages. It
|
||||
avoids splitting huge-pages (into PAGE_SIZE pages) on fault, by doing
|
||||
it eagerly when enabling dirty logging (with the
|
||||
KVM_MEM_LOG_DIRTY_PAGES flag for a memory region), or when using
|
||||
KVM_CLEAR_DIRTY_LOG.
|
||||
|
||||
The chunk size specifies how many pages to break at a time, using a
|
||||
single allocation for each chunk. Bigger the chunk size, more pages
|
||||
need to be allocated ahead of time.
|
||||
|
||||
The chunk size needs to be a valid block size. The list of acceptable
|
||||
block sizes is exposed in KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES as a
|
||||
64-bit bitmap (each bit describing a block size). The default value is
|
||||
0, to disable the eager page splitting.
|
||||
|
||||
9. Known KVM API problems
|
||||
=========================
|
||||
|
||||
|
@ -207,6 +207,7 @@ config ARM64
|
||||
select HAVE_IOREMAP_PROT
|
||||
select HAVE_IRQ_TIME_ACCOUNTING
|
||||
select HAVE_KVM
|
||||
select HAVE_MOD_ARCH_SPECIFIC
|
||||
select HAVE_NMI
|
||||
select HAVE_PERF_EVENTS
|
||||
select HAVE_PERF_REGS
|
||||
@ -406,6 +407,25 @@ menu "Kernel Features"
|
||||
|
||||
menu "ARM errata workarounds via the alternatives framework"
|
||||
|
||||
config AMPERE_ERRATUM_AC03_CPU_38
|
||||
bool "AmpereOne: AC03_CPU_38: Certain bits in the Virtualization Translation Control Register and Translation Control Registers do not follow RES0 semantics"
|
||||
default y
|
||||
help
|
||||
This option adds an alternative code sequence to work around Ampere
|
||||
erratum AC03_CPU_38 on AmpereOne.
|
||||
|
||||
The affected design reports FEAT_HAFDBS as not implemented in
|
||||
ID_AA64MMFR1_EL1.HAFDBS, but (V)TCR_ELx.{HA,HD} are not RES0
|
||||
as required by the architecture. The unadvertised HAFDBS
|
||||
implementation suffers from an additional erratum where hardware
|
||||
A/D updates can occur after a PTE has been marked invalid.
|
||||
|
||||
The workaround forces KVM to explicitly set VTCR_EL2.HA to 0,
|
||||
which avoids enabling unadvertised hardware Access Flag management
|
||||
at stage-2.
|
||||
|
||||
If unsure, say Y.
|
||||
|
||||
config ARM64_WORKAROUND_CLEAN_CACHE
|
||||
bool
|
||||
|
||||
@ -577,7 +597,6 @@ config ARM64_ERRATUM_845719
|
||||
config ARM64_ERRATUM_843419
|
||||
bool "Cortex-A53: 843419: A load or store might access an incorrect address"
|
||||
default y
|
||||
select ARM64_MODULE_PLTS if MODULES
|
||||
help
|
||||
This option links the kernel with '--fix-cortex-a53-843419' and
|
||||
enables PLT support to replace certain ADRP instructions, which can
|
||||
@ -2107,26 +2126,6 @@ config ARM64_SME
|
||||
register state capable of holding two dimensional matrix tiles to
|
||||
enable various matrix operations.
|
||||
|
||||
config ARM64_MODULE_PLTS
|
||||
bool "Use PLTs to allow module memory to spill over into vmalloc area"
|
||||
depends on MODULES
|
||||
select HAVE_MOD_ARCH_SPECIFIC
|
||||
help
|
||||
Allocate PLTs when loading modules so that jumps and calls whose
|
||||
targets are too far away for their relative offsets to be encoded
|
||||
in the instructions themselves can be bounced via veneers in the
|
||||
module's PLT. This allows modules to be allocated in the generic
|
||||
vmalloc area after the dedicated module memory area has been
|
||||
exhausted.
|
||||
|
||||
When running with address space randomization (KASLR), the module
|
||||
region itself may be too far away for ordinary relative jumps and
|
||||
calls, and so in that case, module PLTs are required and cannot be
|
||||
disabled.
|
||||
|
||||
Specific errata workaround(s) might also force module PLTs to be
|
||||
enabled (ARM64_ERRATUM_843419).
|
||||
|
||||
config ARM64_PSEUDO_NMI
|
||||
bool "Support for NMI-like interrupts"
|
||||
select ARM_GIC_V3
|
||||
@ -2167,7 +2166,6 @@ config RELOCATABLE
|
||||
|
||||
config RANDOMIZE_BASE
|
||||
bool "Randomize the address of the kernel image"
|
||||
select ARM64_MODULE_PLTS if MODULES
|
||||
select RELOCATABLE
|
||||
help
|
||||
Randomizes the virtual address at which the kernel image is
|
||||
@ -2198,9 +2196,8 @@ config RANDOMIZE_MODULE_REGION_FULL
|
||||
When this option is not set, the module region will be randomized over
|
||||
a limited range that contains the [_stext, _etext] interval of the
|
||||
core kernel, so branch relocations are almost always in range unless
|
||||
ARM64_MODULE_PLTS is enabled and the region is exhausted. In this
|
||||
particular case of region exhaustion, modules might be able to fall
|
||||
back to a larger 2GB area.
|
||||
the region is exhausted. In this particular case of region
|
||||
exhaustion, modules might be able to fall back to a larger 2GB area.
|
||||
|
||||
config CC_HAVE_STACKPROTECTOR_SYSREG
|
||||
def_bool $(cc-option,-mstack-protector-guard=sysreg -mstack-protector-guard-reg=sp_el0 -mstack-protector-guard-offset=0)
|
||||
|
@ -15,6 +15,9 @@
|
||||
#define MAX_CPU_FEATURES 128
|
||||
#define cpu_feature(x) KERNEL_HWCAP_ ## x
|
||||
|
||||
#define ARM64_SW_FEATURE_OVERRIDE_NOKASLR 0
|
||||
#define ARM64_SW_FEATURE_OVERRIDE_HVHE 4
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <linux/bug.h>
|
||||
@ -915,6 +918,7 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
|
||||
return 8;
|
||||
}
|
||||
|
||||
s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, s64 cur);
|
||||
struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id);
|
||||
|
||||
extern struct arm64_ftr_override id_aa64mmfr1_override;
|
||||
@ -925,6 +929,8 @@ extern struct arm64_ftr_override id_aa64smfr0_override;
|
||||
extern struct arm64_ftr_override id_aa64isar1_override;
|
||||
extern struct arm64_ftr_override id_aa64isar2_override;
|
||||
|
||||
extern struct arm64_ftr_override arm64_sw_feature_override;
|
||||
|
||||
u32 get_kvm_ipa_limit(void);
|
||||
void dump_cpu_features(void);
|
||||
|
||||
|
@ -34,6 +34,11 @@
|
||||
*/
|
||||
.macro __init_el2_timers
|
||||
mov x0, #3 // Enable EL1 physical timers
|
||||
mrs x1, hcr_el2
|
||||
and x1, x1, #HCR_E2H
|
||||
cbz x1, .LnVHE_\@
|
||||
lsl x0, x0, #10
|
||||
.LnVHE_\@:
|
||||
msr cnthctl_el2, x0
|
||||
msr cntvoff_el2, xzr // Clear virtual offset
|
||||
.endm
|
||||
@ -124,8 +129,15 @@
|
||||
.endm
|
||||
|
||||
/* Coprocessor traps */
|
||||
.macro __init_el2_nvhe_cptr
|
||||
.macro __init_el2_cptr
|
||||
mrs x1, hcr_el2
|
||||
and x1, x1, #HCR_E2H
|
||||
cbz x1, .LnVHE_\@
|
||||
mov x0, #(CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN)
|
||||
b .Lset_cptr_\@
|
||||
.LnVHE_\@:
|
||||
mov x0, #0x33ff
|
||||
.Lset_cptr_\@:
|
||||
msr cptr_el2, x0 // Disable copro. traps to EL2
|
||||
.endm
|
||||
|
||||
@ -191,9 +203,8 @@
|
||||
__init_el2_gicv3
|
||||
__init_el2_hstr
|
||||
__init_el2_nvhe_idregs
|
||||
__init_el2_nvhe_cptr
|
||||
__init_el2_cptr
|
||||
__init_el2_fgt
|
||||
__init_el2_nvhe_prepare_eret
|
||||
.endm
|
||||
|
||||
#ifndef __KVM_NVHE_HYPERVISOR__
|
||||
@ -239,7 +250,17 @@
|
||||
|
||||
.Linit_sve_\@: /* SVE register access */
|
||||
mrs x0, cptr_el2 // Disable SVE traps
|
||||
mrs x1, hcr_el2
|
||||
and x1, x1, #HCR_E2H
|
||||
cbz x1, .Lcptr_nvhe_\@
|
||||
|
||||
// VHE case
|
||||
orr x0, x0, #(CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN)
|
||||
b .Lset_cptr_\@
|
||||
|
||||
.Lcptr_nvhe_\@: // nVHE case
|
||||
bic x0, x0, #CPTR_EL2_TZ
|
||||
.Lset_cptr_\@:
|
||||
msr cptr_el2, x0
|
||||
isb
|
||||
mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector
|
||||
|
@ -18,6 +18,7 @@
|
||||
#define HCR_ATA_SHIFT 56
|
||||
#define HCR_ATA (UL(1) << HCR_ATA_SHIFT)
|
||||
#define HCR_AMVOFFEN (UL(1) << 51)
|
||||
#define HCR_TID4 (UL(1) << 49)
|
||||
#define HCR_FIEN (UL(1) << 47)
|
||||
#define HCR_FWB (UL(1) << 46)
|
||||
#define HCR_API (UL(1) << 41)
|
||||
@ -86,7 +87,7 @@
|
||||
#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
|
||||
HCR_BSU_IS | HCR_FB | HCR_TACR | \
|
||||
HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
|
||||
HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 | HCR_TID2)
|
||||
HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3)
|
||||
#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
|
||||
#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
|
||||
#define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
|
||||
@ -285,7 +286,6 @@
|
||||
#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
|
||||
#define CPTR_EL2_TZ (1 << 8)
|
||||
#define CPTR_NVHE_EL2_RES1 0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */
|
||||
#define CPTR_EL2_DEFAULT CPTR_NVHE_EL2_RES1
|
||||
#define CPTR_NVHE_EL2_RES0 (GENMASK(63, 32) | \
|
||||
GENMASK(29, 21) | \
|
||||
GENMASK(19, 14) | \
|
||||
@ -347,8 +347,7 @@
|
||||
ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
|
||||
ECN(BKPT32), ECN(VECTOR32), ECN(BRK64), ECN(ERET)
|
||||
|
||||
#define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |\
|
||||
CPACR_EL1_ZEN_EL1EN)
|
||||
#define CPACR_EL1_TTA (1 << 28)
|
||||
|
||||
#define kvm_mode_names \
|
||||
{ PSR_MODE_EL0t, "EL0t" }, \
|
||||
|
@ -68,6 +68,7 @@ enum __kvm_host_smccc_func {
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa_nsh,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
|
||||
@ -225,6 +226,9 @@ extern void __kvm_flush_vm_context(void);
|
||||
extern void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu);
|
||||
extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
|
||||
int level);
|
||||
extern void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
|
||||
phys_addr_t ipa,
|
||||
int level);
|
||||
extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
|
||||
|
||||
extern void __kvm_timer_set_cntvoff(u64 cntvoff);
|
||||
|
@ -62,19 +62,14 @@ static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
|
||||
#else
|
||||
static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
|
||||
WARN_ON_ONCE(!test_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED,
|
||||
&kvm->arch.flags));
|
||||
|
||||
return test_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags);
|
||||
return test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
|
||||
if (is_kernel_in_hyp_mode())
|
||||
if (has_vhe() || has_hvhe())
|
||||
vcpu->arch.hcr_el2 |= HCR_E2H;
|
||||
if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) {
|
||||
/* route synchronous external abort exceptions to EL2 */
|
||||
@ -95,6 +90,12 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.hcr_el2 |= HCR_TVM;
|
||||
}
|
||||
|
||||
if (cpus_have_final_cap(ARM64_HAS_EVT) &&
|
||||
!cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
|
||||
vcpu->arch.hcr_el2 |= HCR_TID4;
|
||||
else
|
||||
vcpu->arch.hcr_el2 |= HCR_TID2;
|
||||
|
||||
if (vcpu_el1_is_32bit(vcpu))
|
||||
vcpu->arch.hcr_el2 &= ~HCR_RW;
|
||||
|
||||
@ -570,4 +571,35 @@ static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature)
|
||||
return test_bit(feature, vcpu->arch.features);
|
||||
}
|
||||
|
||||
static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
if (has_vhe()) {
|
||||
val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |
|
||||
CPACR_EL1_ZEN_EL1EN);
|
||||
} else if (has_hvhe()) {
|
||||
val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
|
||||
} else {
|
||||
val = CPTR_NVHE_EL2_RES1;
|
||||
|
||||
if (vcpu_has_sve(vcpu) &&
|
||||
(vcpu->arch.fp_state == FP_STATE_GUEST_OWNED))
|
||||
val |= CPTR_EL2_TZ;
|
||||
if (cpus_have_final_cap(ARM64_SME))
|
||||
val &= ~CPTR_EL2_TSM;
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static __always_inline void kvm_reset_cptr_el2(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 val = kvm_get_reset_cptr_el2(vcpu);
|
||||
|
||||
if (has_vhe() || has_hvhe())
|
||||
write_sysreg(val, cpacr_el1);
|
||||
else
|
||||
write_sysreg(val, cptr_el2);
|
||||
}
|
||||
#endif /* __ARM64_KVM_EMULATE_H__ */
|
||||
|
@ -39,6 +39,7 @@
|
||||
#define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
|
||||
|
||||
#define KVM_VCPU_MAX_FEATURES 7
|
||||
#define KVM_VCPU_VALID_FEATURES (BIT(KVM_VCPU_MAX_FEATURES) - 1)
|
||||
|
||||
#define KVM_REQ_SLEEP \
|
||||
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
|
||||
@ -159,6 +160,21 @@ struct kvm_s2_mmu {
|
||||
/* The last vcpu id that ran on each physical CPU */
|
||||
int __percpu *last_vcpu_ran;
|
||||
|
||||
#define KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT 0
|
||||
/*
|
||||
* Memory cache used to split
|
||||
* KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE worth of huge pages. It
|
||||
* is used to allocate stage2 page tables while splitting huge
|
||||
* pages. The choice of KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
|
||||
* influences both the capacity of the split page cache, and
|
||||
* how often KVM reschedules. Be wary of raising CHUNK_SIZE
|
||||
* too high.
|
||||
*
|
||||
* Protected by kvm->slots_lock.
|
||||
*/
|
||||
struct kvm_mmu_memory_cache split_page_cache;
|
||||
uint64_t split_page_chunk_size;
|
||||
|
||||
struct kvm_arch *arch;
|
||||
};
|
||||
|
||||
@ -214,25 +230,23 @@ struct kvm_arch {
|
||||
#define KVM_ARCH_FLAG_MTE_ENABLED 1
|
||||
/* At least one vCPU has ran in the VM */
|
||||
#define KVM_ARCH_FLAG_HAS_RAN_ONCE 2
|
||||
/*
|
||||
* The following two bits are used to indicate the guest's EL1
|
||||
* register width configuration. A value of KVM_ARCH_FLAG_EL1_32BIT
|
||||
* bit is valid only when KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED is set.
|
||||
* Otherwise, the guest's EL1 register width has not yet been
|
||||
* determined yet.
|
||||
*/
|
||||
#define KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED 3
|
||||
#define KVM_ARCH_FLAG_EL1_32BIT 4
|
||||
/* The vCPU feature set for the VM is configured */
|
||||
#define KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED 3
|
||||
/* PSCI SYSTEM_SUSPEND enabled for the guest */
|
||||
#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5
|
||||
#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 4
|
||||
/* VM counter offset */
|
||||
#define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 6
|
||||
#define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 5
|
||||
/* Timer PPIs made immutable */
|
||||
#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 7
|
||||
#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 6
|
||||
/* SMCCC filter initialized for the VM */
|
||||
#define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 8
|
||||
#define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 7
|
||||
/* Initial ID reg values loaded */
|
||||
#define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 8
|
||||
unsigned long flags;
|
||||
|
||||
/* VM-wide vCPU feature set */
|
||||
DECLARE_BITMAP(vcpu_features, KVM_VCPU_MAX_FEATURES);
|
||||
|
||||
/*
|
||||
* VM-wide PMU filter, implemented as a bitmap and big enough for
|
||||
* up to 2^10 events (ARMv8.0) or 2^16 events (ARMv8.1+).
|
||||
@ -242,17 +256,23 @@ struct kvm_arch {
|
||||
|
||||
cpumask_var_t supported_cpus;
|
||||
|
||||
u8 pfr0_csv2;
|
||||
u8 pfr0_csv3;
|
||||
struct {
|
||||
u8 imp:4;
|
||||
u8 unimp:4;
|
||||
} dfr0_pmuver;
|
||||
|
||||
/* Hypercall features firmware registers' descriptor */
|
||||
struct kvm_smccc_features smccc_feat;
|
||||
struct maple_tree smccc_filter;
|
||||
|
||||
/*
|
||||
* Emulated CPU ID registers per VM
|
||||
* (Op0, Op1, CRn, CRm, Op2) of the ID registers to be saved in it
|
||||
* is (3, 0, 0, crm, op2), where 1<=crm<8, 0<=op2<8.
|
||||
*
|
||||
* These emulated idregs are VM-wide, but accessed from the context of a vCPU.
|
||||
* Atomic access to multiple idregs are guarded by kvm_arch.config_lock.
|
||||
*/
|
||||
#define IDREG_IDX(id) (((sys_reg_CRm(id) - 1) << 3) | sys_reg_Op2(id))
|
||||
#define IDREG(kvm, id) ((kvm)->arch.id_regs[IDREG_IDX(id)])
|
||||
#define KVM_ARM_ID_REG_NUM (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1)
|
||||
u64 id_regs[KVM_ARM_ID_REG_NUM];
|
||||
|
||||
/*
|
||||
* For an untrusted host VM, 'pkvm.handle' is used to lookup
|
||||
* the associated pKVM instance in the hypervisor.
|
||||
@ -405,6 +425,7 @@ struct kvm_host_data {
|
||||
struct kvm_host_psci_config {
|
||||
/* PSCI version used by host. */
|
||||
u32 version;
|
||||
u32 smccc_version;
|
||||
|
||||
/* Function IDs used by host if version is v0.1. */
|
||||
struct psci_0_1_function_ids function_ids_0_1;
|
||||
|
@ -16,12 +16,35 @@ DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
|
||||
DECLARE_PER_CPU(unsigned long, kvm_hyp_vector);
|
||||
DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
|
||||
|
||||
/*
|
||||
* Unified accessors for registers that have a different encoding
|
||||
* between VHE and non-VHE. They must be specified without their "ELx"
|
||||
* encoding, but with the SYS_ prefix, as defined in asm/sysreg.h.
|
||||
*/
|
||||
|
||||
#if defined(__KVM_VHE_HYPERVISOR__)
|
||||
|
||||
#define read_sysreg_el0(r) read_sysreg_s(r##_EL02)
|
||||
#define write_sysreg_el0(v,r) write_sysreg_s(v, r##_EL02)
|
||||
#define read_sysreg_el1(r) read_sysreg_s(r##_EL12)
|
||||
#define write_sysreg_el1(v,r) write_sysreg_s(v, r##_EL12)
|
||||
#define read_sysreg_el2(r) read_sysreg_s(r##_EL1)
|
||||
#define write_sysreg_el2(v,r) write_sysreg_s(v, r##_EL1)
|
||||
|
||||
#else // !__KVM_VHE_HYPERVISOR__
|
||||
|
||||
#if defined(__KVM_NVHE_HYPERVISOR__)
|
||||
#define VHE_ALT_KEY ARM64_KVM_HVHE
|
||||
#else
|
||||
#define VHE_ALT_KEY ARM64_HAS_VIRT_HOST_EXTN
|
||||
#endif
|
||||
|
||||
#define read_sysreg_elx(r,nvh,vh) \
|
||||
({ \
|
||||
u64 reg; \
|
||||
asm volatile(ALTERNATIVE(__mrs_s("%0", r##nvh), \
|
||||
asm volatile(ALTERNATIVE(__mrs_s("%0", r##nvh), \
|
||||
__mrs_s("%0", r##vh), \
|
||||
ARM64_HAS_VIRT_HOST_EXTN) \
|
||||
VHE_ALT_KEY) \
|
||||
: "=r" (reg)); \
|
||||
reg; \
|
||||
})
|
||||
@ -31,16 +54,10 @@ DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
|
||||
u64 __val = (u64)(v); \
|
||||
asm volatile(ALTERNATIVE(__msr_s(r##nvh, "%x0"), \
|
||||
__msr_s(r##vh, "%x0"), \
|
||||
ARM64_HAS_VIRT_HOST_EXTN) \
|
||||
VHE_ALT_KEY) \
|
||||
: : "rZ" (__val)); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Unified accessors for registers that have a different encoding
|
||||
* between VHE and non-VHE. They must be specified without their "ELx"
|
||||
* encoding, but with the SYS_ prefix, as defined in asm/sysreg.h.
|
||||
*/
|
||||
|
||||
#define read_sysreg_el0(r) read_sysreg_elx(r, _EL0, _EL02)
|
||||
#define write_sysreg_el0(v,r) write_sysreg_elx(v, r, _EL0, _EL02)
|
||||
#define read_sysreg_el1(r) read_sysreg_elx(r, _EL1, _EL12)
|
||||
@ -48,6 +65,8 @@ DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
|
||||
#define read_sysreg_el2(r) read_sysreg_elx(r, _EL2, _EL1)
|
||||
#define write_sysreg_el2(v,r) write_sysreg_elx(v, r, _EL2, _EL1)
|
||||
|
||||
#endif // __KVM_VHE_HYPERVISOR__
|
||||
|
||||
/*
|
||||
* Without an __arch_swab32(), we fall back to ___constant_swab32(), but the
|
||||
* static inline can allow the compiler to out-of-line this. KVM always wants
|
||||
|
@ -172,6 +172,7 @@ void __init free_hyp_pgds(void);
|
||||
|
||||
void stage2_unmap_vm(struct kvm *kvm);
|
||||
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type);
|
||||
void kvm_uninit_stage2_mmu(struct kvm *kvm);
|
||||
void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu);
|
||||
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
|
||||
phys_addr_t pa, unsigned long size, bool writable);
|
||||
@ -227,7 +228,8 @@ static inline void __invalidate_icache_guest_page(void *va, size_t size)
|
||||
if (icache_is_aliasing()) {
|
||||
/* any kind of VIPT cache */
|
||||
icache_inval_all_pou();
|
||||
} else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
|
||||
} else if (read_sysreg(CurrentEL) != CurrentEL_EL1 ||
|
||||
!icache_is_vpipt()) {
|
||||
/* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */
|
||||
icache_inval_pou((unsigned long)va, (unsigned long)va + size);
|
||||
}
|
||||
|
@ -92,6 +92,24 @@ static inline bool kvm_level_supports_block_mapping(u32 level)
|
||||
return level >= KVM_PGTABLE_MIN_BLOCK_LEVEL;
|
||||
}
|
||||
|
||||
static inline u32 kvm_supported_block_sizes(void)
|
||||
{
|
||||
u32 level = KVM_PGTABLE_MIN_BLOCK_LEVEL;
|
||||
u32 r = 0;
|
||||
|
||||
for (; level < KVM_PGTABLE_MAX_LEVELS; level++)
|
||||
r |= BIT(kvm_granule_shift(level));
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline bool kvm_is_block_size_supported(u64 size)
|
||||
{
|
||||
bool is_power_of_two = IS_ALIGNED(size, size);
|
||||
|
||||
return is_power_of_two && (size & kvm_supported_block_sizes());
|
||||
}
|
||||
|
||||
/**
|
||||
* struct kvm_pgtable_mm_ops - Memory management callbacks.
|
||||
* @zalloc_page: Allocate a single zeroed memory page.
|
||||
@ -104,7 +122,7 @@ static inline bool kvm_level_supports_block_mapping(u32 level)
|
||||
* allocation is physically contiguous.
|
||||
* @free_pages_exact: Free an exact number of memory pages previously
|
||||
* allocated by zalloc_pages_exact.
|
||||
* @free_removed_table: Free a removed paging structure by unlinking and
|
||||
* @free_unlinked_table: Free an unlinked paging structure by unlinking and
|
||||
* dropping references.
|
||||
* @get_page: Increment the refcount on a page.
|
||||
* @put_page: Decrement the refcount on a page. When the
|
||||
@ -124,7 +142,7 @@ struct kvm_pgtable_mm_ops {
|
||||
void* (*zalloc_page)(void *arg);
|
||||
void* (*zalloc_pages_exact)(size_t size);
|
||||
void (*free_pages_exact)(void *addr, size_t size);
|
||||
void (*free_removed_table)(void *addr, u32 level);
|
||||
void (*free_unlinked_table)(void *addr, u32 level);
|
||||
void (*get_page)(void *addr);
|
||||
void (*put_page)(void *addr);
|
||||
int (*page_count)(void *addr);
|
||||
@ -195,6 +213,12 @@ typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
|
||||
* with other software walkers.
|
||||
* @KVM_PGTABLE_WALK_HANDLE_FAULT: Indicates the page-table walk was
|
||||
* invoked from a fault handler.
|
||||
* @KVM_PGTABLE_WALK_SKIP_BBM_TLBI: Visit and update table entries
|
||||
* without Break-before-make's
|
||||
* TLB invalidation.
|
||||
* @KVM_PGTABLE_WALK_SKIP_CMO: Visit and update table entries
|
||||
* without Cache maintenance
|
||||
* operations required.
|
||||
*/
|
||||
enum kvm_pgtable_walk_flags {
|
||||
KVM_PGTABLE_WALK_LEAF = BIT(0),
|
||||
@ -202,6 +226,8 @@ enum kvm_pgtable_walk_flags {
|
||||
KVM_PGTABLE_WALK_TABLE_POST = BIT(2),
|
||||
KVM_PGTABLE_WALK_SHARED = BIT(3),
|
||||
KVM_PGTABLE_WALK_HANDLE_FAULT = BIT(4),
|
||||
KVM_PGTABLE_WALK_SKIP_BBM_TLBI = BIT(5),
|
||||
KVM_PGTABLE_WALK_SKIP_CMO = BIT(6),
|
||||
};
|
||||
|
||||
struct kvm_pgtable_visit_ctx {
|
||||
@ -441,7 +467,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
|
||||
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_free_removed() - Free a removed stage-2 paging structure.
|
||||
* kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure.
|
||||
* @mm_ops: Memory management callbacks.
|
||||
* @pgtable: Unlinked stage-2 paging structure to be freed.
|
||||
* @level: Level of the stage-2 paging structure to be freed.
|
||||
@ -449,7 +475,33 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
|
||||
* The page-table is assumed to be unreachable by any hardware walkers prior to
|
||||
* freeing and therefore no TLB invalidation is performed.
|
||||
*/
|
||||
void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level);
|
||||
void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_create_unlinked() - Create an unlinked stage-2 paging structure.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
|
||||
* @phys: Physical address of the memory to map.
|
||||
* @level: Starting level of the stage-2 paging structure to be created.
|
||||
* @prot: Permissions and attributes for the mapping.
|
||||
* @mc: Cache of pre-allocated and zeroed memory from which to allocate
|
||||
* page-table pages.
|
||||
* @force_pte: Force mappings to PAGE_SIZE granularity.
|
||||
*
|
||||
* Returns an unlinked page-table tree. This new page-table tree is
|
||||
* not reachable (i.e., it is unlinked) from the root pgd and it's
|
||||
* therefore unreachableby the hardware page-table walker. No TLB
|
||||
* invalidation or CMOs are performed.
|
||||
*
|
||||
* If device attributes are not explicitly requested in @prot, then the
|
||||
* mapping will be normal, cacheable.
|
||||
*
|
||||
* Return: The fully populated (unlinked) stage-2 paging structure, or
|
||||
* an ERR_PTR(error) on failure.
|
||||
*/
|
||||
kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
|
||||
u64 phys, u32 level,
|
||||
enum kvm_pgtable_prot prot,
|
||||
void *mc, bool force_pte);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table.
|
||||
@ -620,6 +672,25 @@ bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr);
|
||||
*/
|
||||
int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_split() - Split a range of huge pages into leaf PTEs pointing
|
||||
* to PAGE_SIZE guest pages.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
|
||||
* @addr: Intermediate physical address from which to split.
|
||||
* @size: Size of the range.
|
||||
* @mc: Cache of pre-allocated and zeroed memory from which to allocate
|
||||
* page-table pages.
|
||||
*
|
||||
* The function tries to split any level 1 or 2 entry that overlaps
|
||||
* with the input range (given by @addr and @size).
|
||||
*
|
||||
* Return: 0 on success, negative error code on failure. Note that
|
||||
* kvm_pgtable_stage2_split() is best effort: it tries to break as many
|
||||
* blocks in the input range as allowed by @mc_capacity.
|
||||
*/
|
||||
int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
struct kvm_mmu_memory_cache *mc);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_walk() - Walk a page-table.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_*_init().
|
||||
|
@ -6,7 +6,9 @@
|
||||
#ifndef __ARM64_KVM_PKVM_H__
|
||||
#define __ARM64_KVM_PKVM_H__
|
||||
|
||||
#include <linux/arm_ffa.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <asm/kvm_pgtable.h>
|
||||
|
||||
/* Maximum number of VMs that can co-exist under pKVM. */
|
||||
@ -106,4 +108,23 @@ static inline unsigned long host_s2_pgtable_pages(void)
|
||||
return res;
|
||||
}
|
||||
|
||||
#define KVM_FFA_MBOX_NR_PAGES 1
|
||||
|
||||
static inline unsigned long hyp_ffa_proxy_pages(void)
|
||||
{
|
||||
size_t desc_max;
|
||||
|
||||
/*
|
||||
* The hypervisor FFA proxy needs enough memory to buffer a fragmented
|
||||
* descriptor returned from EL3 in response to a RETRIEVE_REQ call.
|
||||
*/
|
||||
desc_max = sizeof(struct ffa_mem_region) +
|
||||
sizeof(struct ffa_mem_region_attributes) +
|
||||
sizeof(struct ffa_composite_mem_region) +
|
||||
SG_MAX_SEGMENTS * sizeof(struct ffa_mem_region_addr_range);
|
||||
|
||||
/* Plus a page each for the hypervisor's RX and TX mailboxes. */
|
||||
return (2 * KVM_FFA_MBOX_NR_PAGES) + DIV_ROUND_UP(desc_max, PAGE_SIZE);
|
||||
}
|
||||
|
||||
#endif /* __ARM64_KVM_PKVM_H__ */
|
||||
|
@ -46,7 +46,7 @@
|
||||
#define KIMAGE_VADDR (MODULES_END)
|
||||
#define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
|
||||
#define MODULES_VADDR (_PAGE_END(VA_BITS_MIN))
|
||||
#define MODULES_VSIZE (SZ_128M)
|
||||
#define MODULES_VSIZE (SZ_2G)
|
||||
#define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
|
||||
#define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE)
|
||||
#define PCI_IO_END (VMEMMAP_START - SZ_8M)
|
||||
@ -204,15 +204,17 @@ static inline unsigned long kaslr_offset(void)
|
||||
return kimage_vaddr - KIMAGE_VADDR;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RANDOMIZE_BASE
|
||||
void kaslr_init(void);
|
||||
static inline bool kaslr_enabled(void)
|
||||
{
|
||||
/*
|
||||
* The KASLR offset modulo MIN_KIMG_ALIGN is taken from the physical
|
||||
* placement of the image rather than from the seed, so a displacement
|
||||
* of less than MIN_KIMG_ALIGN means that no seed was provided.
|
||||
*/
|
||||
return kaslr_offset() >= MIN_KIMG_ALIGN;
|
||||
extern bool __kaslr_is_enabled;
|
||||
return __kaslr_is_enabled;
|
||||
}
|
||||
#else
|
||||
static inline void kaslr_init(void) { }
|
||||
static inline bool kaslr_enabled(void) { return false; }
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Allow all memory at the discovery stage. We will clip it later.
|
||||
|
@ -7,7 +7,6 @@
|
||||
|
||||
#include <asm-generic/module.h>
|
||||
|
||||
#ifdef CONFIG_ARM64_MODULE_PLTS
|
||||
struct mod_plt_sec {
|
||||
int plt_shndx;
|
||||
int plt_num_entries;
|
||||
@ -21,7 +20,6 @@ struct mod_arch_specific {
|
||||
/* for CONFIG_DYNAMIC_FTRACE */
|
||||
struct plt_entry *ftrace_trampolines;
|
||||
};
|
||||
#endif
|
||||
|
||||
u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs,
|
||||
void *loc, const Elf64_Rela *rela,
|
||||
@ -30,12 +28,6 @@ u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs,
|
||||
u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs,
|
||||
void *loc, u64 val);
|
||||
|
||||
#ifdef CONFIG_RANDOMIZE_BASE
|
||||
extern u64 module_alloc_base;
|
||||
#else
|
||||
#define module_alloc_base ((u64)_etext - MODULES_VSIZE)
|
||||
#endif
|
||||
|
||||
struct plt_entry {
|
||||
/*
|
||||
* A program that conforms to the AArch64 Procedure Call Standard
|
||||
|
@ -1,9 +1,7 @@
|
||||
SECTIONS {
|
||||
#ifdef CONFIG_ARM64_MODULE_PLTS
|
||||
.plt 0 : { BYTE(0) }
|
||||
.init.plt 0 : { BYTE(0) }
|
||||
.text.ftrace_trampoline 0 : { BYTE(0) }
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KASAN_SW_TAGS
|
||||
/*
|
||||
|
@ -564,6 +564,7 @@
|
||||
(BIT(18)) | (BIT(22)) | (BIT(23)) | (BIT(28)) | \
|
||||
(BIT(29)))
|
||||
|
||||
#define SCTLR_EL2_BT (BIT(36))
|
||||
#ifdef CONFIG_CPU_BIG_ENDIAN
|
||||
#define ENDIAN_SET_EL2 SCTLR_ELx_EE
|
||||
#else
|
||||
|
@ -110,8 +110,10 @@ static inline bool is_hyp_mode_mismatched(void)
|
||||
return __boot_cpu_mode[0] != __boot_cpu_mode[1];
|
||||
}
|
||||
|
||||
static inline bool is_kernel_in_hyp_mode(void)
|
||||
static __always_inline bool is_kernel_in_hyp_mode(void)
|
||||
{
|
||||
BUILD_BUG_ON(__is_defined(__KVM_NVHE_HYPERVISOR__) ||
|
||||
__is_defined(__KVM_VHE_HYPERVISOR__));
|
||||
return read_sysreg(CurrentEL) == CurrentEL_EL2;
|
||||
}
|
||||
|
||||
@ -140,6 +142,14 @@ static __always_inline bool is_protected_kvm_enabled(void)
|
||||
return cpus_have_final_cap(ARM64_KVM_PROTECTED_MODE);
|
||||
}
|
||||
|
||||
static __always_inline bool has_hvhe(void)
|
||||
{
|
||||
if (is_vhe_hyp_code())
|
||||
return false;
|
||||
|
||||
return cpus_have_final_cap(ARM64_KVM_HVHE);
|
||||
}
|
||||
|
||||
static inline bool is_hyp_nvhe(void)
|
||||
{
|
||||
return is_hyp_mode_available() && !is_kernel_in_hyp_mode();
|
||||
|
@ -42,8 +42,7 @@ obj-$(CONFIG_COMPAT) += sigreturn32.o
|
||||
obj-$(CONFIG_COMPAT_ALIGNMENT_FIXUPS) += compat_alignment.o
|
||||
obj-$(CONFIG_KUSER_HELPERS) += kuser32.o
|
||||
obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o
|
||||
obj-$(CONFIG_MODULES) += module.o
|
||||
obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o
|
||||
obj-$(CONFIG_MODULES) += module.o module-plts.o
|
||||
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o
|
||||
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
|
||||
obj-$(CONFIG_CPU_PM) += sleep.o suspend.o
|
||||
|
@ -729,6 +729,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
|
||||
MIDR_FIXED(MIDR_CPU_VAR_REV(1,1), BIT(25)),
|
||||
.cpu_enable = cpu_clear_bf16_from_user_emulation,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38
|
||||
{
|
||||
.desc = "AmpereOne erratum AC03_CPU_38",
|
||||
.capability = ARM64_WORKAROUND_AMPERE_AC03_CPU_38,
|
||||
ERRATA_MIDR_ALL_VERSIONS(MIDR_AMPERE1),
|
||||
},
|
||||
#endif
|
||||
{
|
||||
}
|
||||
|
@ -664,6 +664,8 @@ struct arm64_ftr_override __ro_after_init id_aa64smfr0_override;
|
||||
struct arm64_ftr_override __ro_after_init id_aa64isar1_override;
|
||||
struct arm64_ftr_override __ro_after_init id_aa64isar2_override;
|
||||
|
||||
struct arm64_ftr_override arm64_sw_feature_override;
|
||||
|
||||
static const struct __ftr_reg_entry {
|
||||
u32 sys_id;
|
||||
struct arm64_ftr_reg *reg;
|
||||
@ -798,7 +800,7 @@ static u64 arm64_ftr_set_value(const struct arm64_ftr_bits *ftrp, s64 reg,
|
||||
return reg;
|
||||
}
|
||||
|
||||
static s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new,
|
||||
s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new,
|
||||
s64 cur)
|
||||
{
|
||||
s64 ret = 0;
|
||||
@ -1996,6 +1998,19 @@ static bool has_nested_virt_support(const struct arm64_cpu_capabilities *cap,
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool hvhe_possible(const struct arm64_cpu_capabilities *entry,
|
||||
int __unused)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
val = read_sysreg(id_aa64mmfr1_el1);
|
||||
if (!cpuid_feature_extract_unsigned_field(val, ID_AA64MMFR1_EL1_VH_SHIFT))
|
||||
return false;
|
||||
|
||||
val = arm64_sw_feature_override.val & arm64_sw_feature_override.mask;
|
||||
return cpuid_feature_extract_unsigned_field(val, ARM64_SW_FEATURE_OVERRIDE_HVHE);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ARM64_PAN
|
||||
static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused)
|
||||
{
|
||||
@ -2641,6 +2656,23 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
|
||||
.cpu_enable = cpu_enable_dit,
|
||||
ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, DIT, IMP)
|
||||
},
|
||||
{
|
||||
.desc = "VHE for hypervisor only",
|
||||
.capability = ARM64_KVM_HVHE,
|
||||
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
|
||||
.matches = hvhe_possible,
|
||||
},
|
||||
{
|
||||
.desc = "Enhanced Virtualization Traps",
|
||||
.capability = ARM64_HAS_EVT,
|
||||
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
|
||||
.sys_reg = SYS_ID_AA64MMFR2_EL1,
|
||||
.sign = FTR_UNSIGNED,
|
||||
.field_pos = ID_AA64MMFR2_EL1_EVT_SHIFT,
|
||||
.field_width = 4,
|
||||
.min_field_value = ID_AA64MMFR2_EL1_EVT_IMP,
|
||||
.matches = has_cpuid_feature,
|
||||
},
|
||||
{},
|
||||
};
|
||||
|
||||
|
@ -197,7 +197,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
|
||||
|
||||
static struct plt_entry *get_ftrace_plt(struct module *mod)
|
||||
{
|
||||
#ifdef CONFIG_ARM64_MODULE_PLTS
|
||||
#ifdef CONFIG_MODULES
|
||||
struct plt_entry *plt = mod->arch.ftrace_trampolines;
|
||||
|
||||
return &plt[FTRACE_PLT_IDX];
|
||||
@ -249,7 +249,7 @@ static bool ftrace_find_callable_addr(struct dyn_ftrace *rec,
|
||||
* must use a PLT to reach it. We can only place PLTs for modules, and
|
||||
* only when module PLT support is built-in.
|
||||
*/
|
||||
if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
|
||||
if (!IS_ENABLED(CONFIG_MODULES))
|
||||
return false;
|
||||
|
||||
/*
|
||||
@ -431,10 +431,8 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
|
||||
*
|
||||
* Note: 'mod' is only set at module load time.
|
||||
*/
|
||||
if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS) &&
|
||||
IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && mod) {
|
||||
if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS) && mod)
|
||||
return aarch64_insn_patch_text_nosync((void *)pc, new);
|
||||
}
|
||||
|
||||
if (!ftrace_find_callable_addr(rec, mod, &addr))
|
||||
return -EINVAL;
|
||||
|
@ -603,6 +603,8 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
|
||||
msr sctlr_el1, x1
|
||||
mov x2, xzr
|
||||
2:
|
||||
__init_el2_nvhe_prepare_eret
|
||||
|
||||
mov w0, #BOOT_CPU_MODE_EL2
|
||||
orr x0, x0, x2
|
||||
eret
|
||||
|
@ -82,7 +82,15 @@ SYM_CODE_START_LOCAL(__finalise_el2)
|
||||
tbnz x1, #0, 1f
|
||||
|
||||
// Needs to be VHE capable, obviously
|
||||
check_override id_aa64mmfr1 ID_AA64MMFR1_EL1_VH_SHIFT 2f 1f x1 x2
|
||||
check_override id_aa64mmfr1 ID_AA64MMFR1_EL1_VH_SHIFT 0f 1f x1 x2
|
||||
|
||||
0: // Check whether we only want the hypervisor to run VHE, not the kernel
|
||||
adr_l x1, arm64_sw_feature_override
|
||||
ldr x2, [x1, FTR_OVR_VAL_OFFSET]
|
||||
ldr x1, [x1, FTR_OVR_MASK_OFFSET]
|
||||
and x2, x2, x1
|
||||
ubfx x2, x2, #ARM64_SW_FEATURE_OVERRIDE_HVHE, #4
|
||||
cbz x2, 2f
|
||||
|
||||
1: mov_q x0, HVC_STUB_ERR
|
||||
eret
|
||||
|
@ -138,15 +138,22 @@ static const struct ftr_set_desc smfr0 __initconst = {
|
||||
},
|
||||
};
|
||||
|
||||
extern struct arm64_ftr_override kaslr_feature_override;
|
||||
static bool __init hvhe_filter(u64 val)
|
||||
{
|
||||
u64 mmfr1 = read_sysreg(id_aa64mmfr1_el1);
|
||||
|
||||
static const struct ftr_set_desc kaslr __initconst = {
|
||||
.name = "kaslr",
|
||||
#ifdef CONFIG_RANDOMIZE_BASE
|
||||
.override = &kaslr_feature_override,
|
||||
#endif
|
||||
return (val == 1 &&
|
||||
lower_32_bits(__boot_status) == BOOT_CPU_MODE_EL2 &&
|
||||
cpuid_feature_extract_unsigned_field(mmfr1,
|
||||
ID_AA64MMFR1_EL1_VH_SHIFT));
|
||||
}
|
||||
|
||||
static const struct ftr_set_desc sw_features __initconst = {
|
||||
.name = "arm64_sw",
|
||||
.override = &arm64_sw_feature_override,
|
||||
.fields = {
|
||||
FIELD("disabled", 0, NULL),
|
||||
FIELD("nokaslr", ARM64_SW_FEATURE_OVERRIDE_NOKASLR, NULL),
|
||||
FIELD("hvhe", ARM64_SW_FEATURE_OVERRIDE_HVHE, hvhe_filter),
|
||||
{}
|
||||
},
|
||||
};
|
||||
@ -158,7 +165,7 @@ static const struct ftr_set_desc * const regs[] __initconst = {
|
||||
&isar1,
|
||||
&isar2,
|
||||
&smfr0,
|
||||
&kaslr,
|
||||
&sw_features,
|
||||
};
|
||||
|
||||
static const struct {
|
||||
@ -175,7 +182,7 @@ static const struct {
|
||||
"id_aa64isar1.api=0 id_aa64isar1.apa=0 "
|
||||
"id_aa64isar2.gpa3=0 id_aa64isar2.apa3=0" },
|
||||
{ "arm64.nomte", "id_aa64pfr1.mte=0" },
|
||||
{ "nokaslr", "kaslr.disabled=1" },
|
||||
{ "nokaslr", "arm64_sw.nokaslr=1" },
|
||||
};
|
||||
|
||||
static int __init parse_nokaslr(char *unused)
|
||||
|
@ -4,90 +4,35 @@
|
||||
*/
|
||||
|
||||
#include <linux/cache.h>
|
||||
#include <linux/crc32.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/libfdt.h>
|
||||
#include <linux/mm_types.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/pgtable.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/printk.h>
|
||||
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/kernel-pgtable.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/memory.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/setup.h>
|
||||
|
||||
u64 __ro_after_init module_alloc_base;
|
||||
u16 __initdata memstart_offset_seed;
|
||||
|
||||
struct arm64_ftr_override kaslr_feature_override __initdata;
|
||||
bool __ro_after_init __kaslr_is_enabled = false;
|
||||
|
||||
static int __init kaslr_init(void)
|
||||
void __init kaslr_init(void)
|
||||
{
|
||||
u64 module_range;
|
||||
u32 seed;
|
||||
|
||||
/*
|
||||
* Set a reasonable default for module_alloc_base in case
|
||||
* we end up running with module randomization disabled.
|
||||
*/
|
||||
module_alloc_base = (u64)_etext - MODULES_VSIZE;
|
||||
|
||||
if (kaslr_feature_override.val & kaslr_feature_override.mask & 0xf) {
|
||||
if (cpuid_feature_extract_unsigned_field(arm64_sw_feature_override.val &
|
||||
arm64_sw_feature_override.mask,
|
||||
ARM64_SW_FEATURE_OVERRIDE_NOKASLR)) {
|
||||
pr_info("KASLR disabled on command line\n");
|
||||
return 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!kaslr_enabled()) {
|
||||
/*
|
||||
* The KASLR offset modulo MIN_KIMG_ALIGN is taken from the physical
|
||||
* placement of the image rather than from the seed, so a displacement
|
||||
* of less than MIN_KIMG_ALIGN means that no seed was provided.
|
||||
*/
|
||||
if (kaslr_offset() < MIN_KIMG_ALIGN) {
|
||||
pr_warn("KASLR disabled due to lack of seed\n");
|
||||
return 0;
|
||||
return;
|
||||
}
|
||||
|
||||
pr_info("KASLR enabled\n");
|
||||
|
||||
/*
|
||||
* KASAN without KASAN_VMALLOC does not expect the module region to
|
||||
* intersect the vmalloc region, since shadow memory is allocated for
|
||||
* each module at load time, whereas the vmalloc region will already be
|
||||
* shadowed by KASAN zero pages.
|
||||
*/
|
||||
BUILD_BUG_ON((IS_ENABLED(CONFIG_KASAN_GENERIC) ||
|
||||
IS_ENABLED(CONFIG_KASAN_SW_TAGS)) &&
|
||||
!IS_ENABLED(CONFIG_KASAN_VMALLOC));
|
||||
|
||||
seed = get_random_u32();
|
||||
|
||||
if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
|
||||
/*
|
||||
* Randomize the module region over a 2 GB window covering the
|
||||
* kernel. This reduces the risk of modules leaking information
|
||||
* about the address of the kernel itself, but results in
|
||||
* branches between modules and the core kernel that are
|
||||
* resolved via PLTs. (Branches between modules will be
|
||||
* resolved normally.)
|
||||
*/
|
||||
module_range = SZ_2G - (u64)(_end - _stext);
|
||||
module_alloc_base = max((u64)_end - SZ_2G, (u64)MODULES_VADDR);
|
||||
} else {
|
||||
/*
|
||||
* Randomize the module region by setting module_alloc_base to
|
||||
* a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE,
|
||||
* _stext) . This guarantees that the resulting region still
|
||||
* covers [_stext, _etext], and that all relative branches can
|
||||
* be resolved without veneers unless this region is exhausted
|
||||
* and we fall back to a larger 2GB window in module_alloc()
|
||||
* when ARM64_MODULE_PLTS is enabled.
|
||||
*/
|
||||
module_range = MODULES_VSIZE - (u64)(_etext - _stext);
|
||||
}
|
||||
|
||||
/* use the lower 21 bits to randomize the base of the module region */
|
||||
module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21;
|
||||
module_alloc_base &= PAGE_MASK;
|
||||
|
||||
return 0;
|
||||
__kaslr_is_enabled = true;
|
||||
}
|
||||
subsys_initcall(kaslr_init)
|
||||
|
@ -7,6 +7,8 @@
|
||||
* Author: Will Deacon <will.deacon@arm.com>
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "Modules: " fmt
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/elf.h>
|
||||
#include <linux/ftrace.h>
|
||||
@ -15,52 +17,131 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/moduleloader.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/scs.h>
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/insn.h>
|
||||
#include <asm/scs.h>
|
||||
#include <asm/sections.h>
|
||||
|
||||
static u64 module_direct_base __ro_after_init = 0;
|
||||
static u64 module_plt_base __ro_after_init = 0;
|
||||
|
||||
/*
|
||||
* Choose a random page-aligned base address for a window of 'size' bytes which
|
||||
* entirely contains the interval [start, end - 1].
|
||||
*/
|
||||
static u64 __init random_bounding_box(u64 size, u64 start, u64 end)
|
||||
{
|
||||
u64 max_pgoff, pgoff;
|
||||
|
||||
if ((end - start) >= size)
|
||||
return 0;
|
||||
|
||||
max_pgoff = (size - (end - start)) / PAGE_SIZE;
|
||||
pgoff = get_random_u32_inclusive(0, max_pgoff);
|
||||
|
||||
return start - pgoff * PAGE_SIZE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Modules may directly reference data and text anywhere within the kernel
|
||||
* image and other modules. References using PREL32 relocations have a +/-2G
|
||||
* range, and so we need to ensure that the entire kernel image and all modules
|
||||
* fall within a 2G window such that these are always within range.
|
||||
*
|
||||
* Modules may directly branch to functions and code within the kernel text,
|
||||
* and to functions and code within other modules. These branches will use
|
||||
* CALL26/JUMP26 relocations with a +/-128M range. Without PLTs, we must ensure
|
||||
* that the entire kernel text and all module text falls within a 128M window
|
||||
* such that these are always within range. With PLTs, we can expand this to a
|
||||
* 2G window.
|
||||
*
|
||||
* We chose the 128M region to surround the entire kernel image (rather than
|
||||
* just the text) as using the same bounds for the 128M and 2G regions ensures
|
||||
* by construction that we never select a 128M region that is not a subset of
|
||||
* the 2G region. For very large and unusual kernel configurations this means
|
||||
* we may fall back to PLTs where they could have been avoided, but this keeps
|
||||
* the logic significantly simpler.
|
||||
*/
|
||||
static int __init module_init_limits(void)
|
||||
{
|
||||
u64 kernel_end = (u64)_end;
|
||||
u64 kernel_start = (u64)_text;
|
||||
u64 kernel_size = kernel_end - kernel_start;
|
||||
|
||||
/*
|
||||
* The default modules region is placed immediately below the kernel
|
||||
* image, and is large enough to use the full 2G relocation range.
|
||||
*/
|
||||
BUILD_BUG_ON(KIMAGE_VADDR != MODULES_END);
|
||||
BUILD_BUG_ON(MODULES_VSIZE < SZ_2G);
|
||||
|
||||
if (!kaslr_enabled()) {
|
||||
if (kernel_size < SZ_128M)
|
||||
module_direct_base = kernel_end - SZ_128M;
|
||||
if (kernel_size < SZ_2G)
|
||||
module_plt_base = kernel_end - SZ_2G;
|
||||
} else {
|
||||
u64 min = kernel_start;
|
||||
u64 max = kernel_end;
|
||||
|
||||
if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
|
||||
pr_info("2G module region forced by RANDOMIZE_MODULE_REGION_FULL\n");
|
||||
} else {
|
||||
module_direct_base = random_bounding_box(SZ_128M, min, max);
|
||||
if (module_direct_base) {
|
||||
min = module_direct_base;
|
||||
max = module_direct_base + SZ_128M;
|
||||
}
|
||||
}
|
||||
|
||||
module_plt_base = random_bounding_box(SZ_2G, min, max);
|
||||
}
|
||||
|
||||
pr_info("%llu pages in range for non-PLT usage",
|
||||
module_direct_base ? (SZ_128M - kernel_size) / PAGE_SIZE : 0);
|
||||
pr_info("%llu pages in range for PLT usage",
|
||||
module_plt_base ? (SZ_2G - kernel_size) / PAGE_SIZE : 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
subsys_initcall(module_init_limits);
|
||||
|
||||
void *module_alloc(unsigned long size)
|
||||
{
|
||||
u64 module_alloc_end = module_alloc_base + MODULES_VSIZE;
|
||||
gfp_t gfp_mask = GFP_KERNEL;
|
||||
void *p;
|
||||
void *p = NULL;
|
||||
|
||||
/* Silence the initial allocation */
|
||||
if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
|
||||
gfp_mask |= __GFP_NOWARN;
|
||||
/*
|
||||
* Where possible, prefer to allocate within direct branch range of the
|
||||
* kernel such that no PLTs are necessary.
|
||||
*/
|
||||
if (module_direct_base) {
|
||||
p = __vmalloc_node_range(size, MODULE_ALIGN,
|
||||
module_direct_base,
|
||||
module_direct_base + SZ_128M,
|
||||
GFP_KERNEL | __GFP_NOWARN,
|
||||
PAGE_KERNEL, 0, NUMA_NO_NODE,
|
||||
__builtin_return_address(0));
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_KASAN_GENERIC) ||
|
||||
IS_ENABLED(CONFIG_KASAN_SW_TAGS))
|
||||
/* don't exceed the static module region - see below */
|
||||
module_alloc_end = MODULES_END;
|
||||
if (!p && module_plt_base) {
|
||||
p = __vmalloc_node_range(size, MODULE_ALIGN,
|
||||
module_plt_base,
|
||||
module_plt_base + SZ_2G,
|
||||
GFP_KERNEL | __GFP_NOWARN,
|
||||
PAGE_KERNEL, 0, NUMA_NO_NODE,
|
||||
__builtin_return_address(0));
|
||||
}
|
||||
|
||||
p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
|
||||
module_alloc_end, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK,
|
||||
NUMA_NO_NODE, __builtin_return_address(0));
|
||||
if (!p) {
|
||||
pr_warn_ratelimited("%s: unable to allocate memory\n",
|
||||
__func__);
|
||||
}
|
||||
|
||||
if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
|
||||
(IS_ENABLED(CONFIG_KASAN_VMALLOC) ||
|
||||
(!IS_ENABLED(CONFIG_KASAN_GENERIC) &&
|
||||
!IS_ENABLED(CONFIG_KASAN_SW_TAGS))))
|
||||
/*
|
||||
* KASAN without KASAN_VMALLOC can only deal with module
|
||||
* allocations being served from the reserved module region,
|
||||
* since the remainder of the vmalloc region is already
|
||||
* backed by zero shadow pages, and punching holes into it
|
||||
* is non-trivial. Since the module region is not randomized
|
||||
* when KASAN is enabled without KASAN_VMALLOC, it is even
|
||||
* less likely that the module region gets exhausted, so we
|
||||
* can simply omit this fallback in that case.
|
||||
*/
|
||||
p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
|
||||
module_alloc_base + SZ_2G, GFP_KERNEL,
|
||||
PAGE_KERNEL, 0, NUMA_NO_NODE,
|
||||
__builtin_return_address(0));
|
||||
|
||||
if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) {
|
||||
if (p && (kasan_alloc_module_shadow(p, size, GFP_KERNEL) < 0)) {
|
||||
vfree(p);
|
||||
return NULL;
|
||||
}
|
||||
@ -448,9 +529,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
|
||||
case R_AARCH64_CALL26:
|
||||
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26,
|
||||
AARCH64_INSN_IMM_26);
|
||||
|
||||
if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
|
||||
ovf == -ERANGE) {
|
||||
if (ovf == -ERANGE) {
|
||||
val = module_emit_plt_entry(me, sechdrs, loc, &rel[i], sym);
|
||||
if (!val)
|
||||
return -ENOEXEC;
|
||||
@ -487,7 +566,7 @@ static int module_init_ftrace_plt(const Elf_Ehdr *hdr,
|
||||
const Elf_Shdr *sechdrs,
|
||||
struct module *mod)
|
||||
{
|
||||
#if defined(CONFIG_ARM64_MODULE_PLTS) && defined(CONFIG_DYNAMIC_FTRACE)
|
||||
#if defined(CONFIG_DYNAMIC_FTRACE)
|
||||
const Elf_Shdr *s;
|
||||
struct plt_entry *plts;
|
||||
|
||||
|
@ -296,6 +296,8 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
|
||||
|
||||
*cmdline_p = boot_command_line;
|
||||
|
||||
kaslr_init();
|
||||
|
||||
/*
|
||||
* If know now we are going to need KPTI then use non-global
|
||||
* mappings from the start, avoiding the cost of rewriting
|
||||
|
@ -1406,7 +1406,7 @@ int __init kvm_timer_hyp_init(bool has_gic)
|
||||
kvm_get_running_vcpus());
|
||||
if (err) {
|
||||
kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
|
||||
goto out_free_irq;
|
||||
goto out_free_vtimer_irq;
|
||||
}
|
||||
|
||||
static_branch_enable(&has_gic_active_state);
|
||||
@ -1422,7 +1422,7 @@ int __init kvm_timer_hyp_init(bool has_gic)
|
||||
if (err) {
|
||||
kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n",
|
||||
host_ptimer_irq, err);
|
||||
return err;
|
||||
goto out_free_vtimer_irq;
|
||||
}
|
||||
|
||||
if (has_gic) {
|
||||
@ -1430,7 +1430,7 @@ int __init kvm_timer_hyp_init(bool has_gic)
|
||||
kvm_get_running_vcpus());
|
||||
if (err) {
|
||||
kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
|
||||
goto out_free_irq;
|
||||
goto out_free_ptimer_irq;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1439,11 +1439,15 @@ int __init kvm_timer_hyp_init(bool has_gic)
|
||||
kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n",
|
||||
info->physical_irq);
|
||||
err = -ENODEV;
|
||||
goto out_free_irq;
|
||||
goto out_free_vtimer_irq;
|
||||
}
|
||||
|
||||
return 0;
|
||||
out_free_irq:
|
||||
|
||||
out_free_ptimer_irq:
|
||||
if (info->physical_irq > 0)
|
||||
free_percpu_irq(host_ptimer_irq, kvm_get_running_vcpus());
|
||||
out_free_vtimer_irq:
|
||||
free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus());
|
||||
return err;
|
||||
}
|
||||
|
@ -51,6 +51,8 @@ DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
|
||||
DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
|
||||
DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
|
||||
|
||||
DECLARE_KVM_NVHE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
|
||||
|
||||
static bool vgic_present;
|
||||
|
||||
static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
|
||||
@ -65,6 +67,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
|
||||
struct kvm_enable_cap *cap)
|
||||
{
|
||||
int r;
|
||||
u64 new_cap;
|
||||
|
||||
if (cap->flags)
|
||||
return -EINVAL;
|
||||
@ -89,6 +92,24 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
|
||||
r = 0;
|
||||
set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags);
|
||||
break;
|
||||
case KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE:
|
||||
new_cap = cap->args[0];
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
/*
|
||||
* To keep things simple, allow changing the chunk
|
||||
* size only when no memory slots have been created.
|
||||
*/
|
||||
if (!kvm_are_all_memslots_empty(kvm)) {
|
||||
r = -EINVAL;
|
||||
} else if (new_cap && !kvm_is_block_size_supported(new_cap)) {
|
||||
r = -EINVAL;
|
||||
} else {
|
||||
r = 0;
|
||||
kvm->arch.mmu.split_page_chunk_size = new_cap;
|
||||
}
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
break;
|
||||
default:
|
||||
r = -EINVAL;
|
||||
break;
|
||||
@ -102,22 +123,6 @@ static int kvm_arm_default_max_vcpus(void)
|
||||
return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
|
||||
}
|
||||
|
||||
static void set_default_spectre(struct kvm *kvm)
|
||||
{
|
||||
/*
|
||||
* The default is to expose CSV2 == 1 if the HW isn't affected.
|
||||
* Although this is a per-CPU feature, we make it global because
|
||||
* asymmetric systems are just a nuisance.
|
||||
*
|
||||
* Userspace can override this as long as it doesn't promise
|
||||
* the impossible.
|
||||
*/
|
||||
if (arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED)
|
||||
kvm->arch.pfr0_csv2 = 1;
|
||||
if (arm64_get_meltdown_state() == SPECTRE_UNAFFECTED)
|
||||
kvm->arch.pfr0_csv3 = 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_arch_init_vm - initializes a VM data structure
|
||||
* @kvm: pointer to the KVM struct
|
||||
@ -161,14 +166,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
/* The maximum number of VCPUs is limited by the host's GIC model */
|
||||
kvm->max_vcpus = kvm_arm_default_max_vcpus();
|
||||
|
||||
set_default_spectre(kvm);
|
||||
kvm_arm_init_hypercalls(kvm);
|
||||
|
||||
/*
|
||||
* Initialise the default PMUver before there is a chance to
|
||||
* create an actual PMU.
|
||||
*/
|
||||
kvm->arch.dfr0_pmuver.imp = kvm_arm_pmu_get_pmuver_limit();
|
||||
bitmap_zero(kvm->arch.vcpu_features, KVM_VCPU_MAX_FEATURES);
|
||||
|
||||
return 0;
|
||||
|
||||
@ -302,6 +302,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_ARM_PTRAUTH_GENERIC:
|
||||
r = system_has_full_ptr_auth();
|
||||
break;
|
||||
case KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE:
|
||||
if (kvm)
|
||||
r = kvm->arch.mmu.split_page_chunk_size;
|
||||
else
|
||||
r = KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT;
|
||||
break;
|
||||
case KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES:
|
||||
r = kvm_supported_block_sizes();
|
||||
break;
|
||||
default:
|
||||
r = 0;
|
||||
}
|
||||
@ -1167,58 +1176,115 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_vcpu_init *init)
|
||||
static int kvm_vcpu_init_check_features(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_vcpu_init *init)
|
||||
{
|
||||
unsigned int i, ret;
|
||||
u32 phys_target = kvm_target_cpu();
|
||||
unsigned long features = init->features[0];
|
||||
int i;
|
||||
|
||||
if (init->target != phys_target)
|
||||
return -EINVAL;
|
||||
if (features & ~KVM_VCPU_VALID_FEATURES)
|
||||
return -ENOENT;
|
||||
|
||||
/*
|
||||
* Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
|
||||
* use the same target.
|
||||
*/
|
||||
if (vcpu->arch.target != -1 && vcpu->arch.target != init->target)
|
||||
return -EINVAL;
|
||||
|
||||
/* -ENOENT for unknown features, -EINVAL for invalid combinations. */
|
||||
for (i = 0; i < sizeof(init->features) * 8; i++) {
|
||||
bool set = (init->features[i / 32] & (1 << (i % 32)));
|
||||
|
||||
if (set && i >= KVM_VCPU_MAX_FEATURES)
|
||||
for (i = 1; i < ARRAY_SIZE(init->features); i++) {
|
||||
if (init->features[i])
|
||||
return -ENOENT;
|
||||
|
||||
/*
|
||||
* Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
|
||||
* use the same feature set.
|
||||
*/
|
||||
if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES &&
|
||||
test_bit(i, vcpu->arch.features) != set)
|
||||
return -EINVAL;
|
||||
|
||||
if (set)
|
||||
set_bit(i, vcpu->arch.features);
|
||||
}
|
||||
|
||||
vcpu->arch.target = phys_target;
|
||||
if (!test_bit(KVM_ARM_VCPU_EL1_32BIT, &features))
|
||||
return 0;
|
||||
|
||||
if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1))
|
||||
return -EINVAL;
|
||||
|
||||
/* MTE is incompatible with AArch32 */
|
||||
if (kvm_has_mte(vcpu->kvm))
|
||||
return -EINVAL;
|
||||
|
||||
/* NV is incompatible with AArch32 */
|
||||
if (test_bit(KVM_ARM_VCPU_HAS_EL2, &features))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool kvm_vcpu_init_changed(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_vcpu_init *init)
|
||||
{
|
||||
unsigned long features = init->features[0];
|
||||
|
||||
return !bitmap_equal(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES) ||
|
||||
vcpu->arch.target != init->target;
|
||||
}
|
||||
|
||||
static int __kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_vcpu_init *init)
|
||||
{
|
||||
unsigned long features = init->features[0];
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
int ret = -EINVAL;
|
||||
|
||||
mutex_lock(&kvm->arch.config_lock);
|
||||
|
||||
if (test_bit(KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED, &kvm->arch.flags) &&
|
||||
!bitmap_equal(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES))
|
||||
goto out_unlock;
|
||||
|
||||
vcpu->arch.target = init->target;
|
||||
bitmap_copy(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES);
|
||||
|
||||
/* Now we know what it is, we can reset it. */
|
||||
ret = kvm_reset_vcpu(vcpu);
|
||||
if (ret) {
|
||||
vcpu->arch.target = -1;
|
||||
bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
bitmap_copy(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES);
|
||||
set_bit(KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED, &kvm->arch.flags);
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&kvm->arch.config_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_vcpu_init *init)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (init->target != kvm_target_cpu())
|
||||
return -EINVAL;
|
||||
|
||||
ret = kvm_vcpu_init_check_features(vcpu, init);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (vcpu->arch.target == -1)
|
||||
return __kvm_vcpu_set_target(vcpu, init);
|
||||
|
||||
if (kvm_vcpu_init_changed(vcpu, init))
|
||||
return -EINVAL;
|
||||
|
||||
return kvm_reset_vcpu(vcpu);
|
||||
}
|
||||
|
||||
static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
|
||||
struct kvm_vcpu_init *init)
|
||||
{
|
||||
bool power_off = false;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Treat the power-off vCPU feature as ephemeral. Clear the bit to avoid
|
||||
* reflecting it in the finalized feature set, thus limiting its scope
|
||||
* to a single KVM_ARM_VCPU_INIT call.
|
||||
*/
|
||||
if (init->features[0] & BIT(KVM_ARM_VCPU_POWER_OFF)) {
|
||||
init->features[0] &= ~BIT(KVM_ARM_VCPU_POWER_OFF);
|
||||
power_off = true;
|
||||
}
|
||||
|
||||
ret = kvm_vcpu_set_target(vcpu, init);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -1240,14 +1306,14 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
|
||||
vcpu_reset_hcr(vcpu);
|
||||
vcpu->arch.cptr_el2 = CPTR_EL2_DEFAULT;
|
||||
vcpu->arch.cptr_el2 = kvm_get_reset_cptr_el2(vcpu);
|
||||
|
||||
/*
|
||||
* Handle the "start in power-off" case.
|
||||
*/
|
||||
spin_lock(&vcpu->arch.mp_state_lock);
|
||||
|
||||
if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
|
||||
if (power_off)
|
||||
__kvm_arm_vcpu_power_off(vcpu);
|
||||
else
|
||||
WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE);
|
||||
@ -1666,7 +1732,13 @@ static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits)
|
||||
|
||||
params->mair_el2 = read_sysreg(mair_el1);
|
||||
|
||||
tcr = (read_sysreg(tcr_el1) & TCR_EL2_MASK) | TCR_EL2_RES1;
|
||||
tcr = read_sysreg(tcr_el1);
|
||||
if (cpus_have_final_cap(ARM64_KVM_HVHE)) {
|
||||
tcr |= TCR_EPD1_MASK;
|
||||
} else {
|
||||
tcr &= TCR_EL2_MASK;
|
||||
tcr |= TCR_EL2_RES1;
|
||||
}
|
||||
tcr &= ~TCR_T0SZ_MASK;
|
||||
tcr |= TCR_T0SZ(hyp_va_bits);
|
||||
params->tcr_el2 = tcr;
|
||||
@ -1676,6 +1748,8 @@ static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits)
|
||||
params->hcr_el2 = HCR_HOST_NVHE_PROTECTED_FLAGS;
|
||||
else
|
||||
params->hcr_el2 = HCR_HOST_NVHE_FLAGS;
|
||||
if (cpus_have_final_cap(ARM64_KVM_HVHE))
|
||||
params->hcr_el2 |= HCR_E2H;
|
||||
params->vttbr = params->vtcr = 0;
|
||||
|
||||
/*
|
||||
@ -1910,6 +1984,7 @@ static bool __init init_psci_relay(void)
|
||||
}
|
||||
|
||||
kvm_host_psci_config.version = psci_ops.get_version();
|
||||
kvm_host_psci_config.smccc_version = arm_smccc_get_version();
|
||||
|
||||
if (kvm_host_psci_config.version == PSCI_VERSION(0, 1)) {
|
||||
kvm_host_psci_config.function_ids_0_1 = get_psci_0_1_function_ids();
|
||||
@ -2067,6 +2142,26 @@ static int __init kvm_hyp_init_protection(u32 hyp_va_bits)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pkvm_hyp_init_ptrauth(void)
|
||||
{
|
||||
struct kvm_cpu_context *hyp_ctxt;
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
hyp_ctxt = per_cpu_ptr_nvhe_sym(kvm_hyp_ctxt, cpu);
|
||||
hyp_ctxt->sys_regs[APIAKEYLO_EL1] = get_random_long();
|
||||
hyp_ctxt->sys_regs[APIAKEYHI_EL1] = get_random_long();
|
||||
hyp_ctxt->sys_regs[APIBKEYLO_EL1] = get_random_long();
|
||||
hyp_ctxt->sys_regs[APIBKEYHI_EL1] = get_random_long();
|
||||
hyp_ctxt->sys_regs[APDAKEYLO_EL1] = get_random_long();
|
||||
hyp_ctxt->sys_regs[APDAKEYHI_EL1] = get_random_long();
|
||||
hyp_ctxt->sys_regs[APDBKEYLO_EL1] = get_random_long();
|
||||
hyp_ctxt->sys_regs[APDBKEYHI_EL1] = get_random_long();
|
||||
hyp_ctxt->sys_regs[APGAKEYLO_EL1] = get_random_long();
|
||||
hyp_ctxt->sys_regs[APGAKEYHI_EL1] = get_random_long();
|
||||
}
|
||||
}
|
||||
|
||||
/* Inits Hyp-mode on all online CPUs */
|
||||
static int __init init_hyp_mode(void)
|
||||
{
|
||||
@ -2228,6 +2323,10 @@ static int __init init_hyp_mode(void)
|
||||
kvm_hyp_init_symbols();
|
||||
|
||||
if (is_protected_kvm_enabled()) {
|
||||
if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) &&
|
||||
cpus_have_const_cap(ARM64_HAS_ADDRESS_AUTH))
|
||||
pkvm_hyp_init_ptrauth();
|
||||
|
||||
init_cpu_logical_map();
|
||||
|
||||
if (!init_psci_relay()) {
|
||||
|
@ -180,7 +180,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
|
||||
|
||||
/*
|
||||
* If we have VHE then the Hyp code will reset CPACR_EL1 to
|
||||
* CPACR_EL1_DEFAULT and we need to reenable SME.
|
||||
* the default value and we need to reenable SME.
|
||||
*/
|
||||
if (has_vhe() && system_supports_sme()) {
|
||||
/* Also restore EL0 state seen on entry */
|
||||
@ -210,7 +210,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
|
||||
/*
|
||||
* The FPSIMD/SVE state in the CPU has not been touched, and we
|
||||
* have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been
|
||||
* reset to CPACR_EL1_DEFAULT by the Hyp code, disabling SVE
|
||||
* reset by kvm_reset_cptr_el2() in the Hyp code, disabling SVE
|
||||
* for EL0. To avoid spurious traps, restore the trap state
|
||||
* seen by kvm_arch_vcpu_load_fp():
|
||||
*/
|
||||
|
@ -70,6 +70,56 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool __hfgxtr_traps_required(void)
|
||||
{
|
||||
if (cpus_have_final_cap(ARM64_SME))
|
||||
return true;
|
||||
|
||||
if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void __activate_traps_hfgxtr(void)
|
||||
{
|
||||
u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp;
|
||||
|
||||
if (cpus_have_final_cap(ARM64_SME)) {
|
||||
tmp = HFGxTR_EL2_nSMPRI_EL1_MASK | HFGxTR_EL2_nTPIDR2_EL0_MASK;
|
||||
|
||||
r_clr |= tmp;
|
||||
w_clr |= tmp;
|
||||
}
|
||||
|
||||
/*
|
||||
* Trap guest writes to TCR_EL1 to prevent it from enabling HA or HD.
|
||||
*/
|
||||
if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
|
||||
w_set |= HFGxTR_EL2_TCR_EL1_MASK;
|
||||
|
||||
sysreg_clear_set_s(SYS_HFGRTR_EL2, r_clr, r_set);
|
||||
sysreg_clear_set_s(SYS_HFGWTR_EL2, w_clr, w_set);
|
||||
}
|
||||
|
||||
static inline void __deactivate_traps_hfgxtr(void)
|
||||
{
|
||||
u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp;
|
||||
|
||||
if (cpus_have_final_cap(ARM64_SME)) {
|
||||
tmp = HFGxTR_EL2_nSMPRI_EL1_MASK | HFGxTR_EL2_nTPIDR2_EL0_MASK;
|
||||
|
||||
r_set |= tmp;
|
||||
w_set |= tmp;
|
||||
}
|
||||
|
||||
if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
|
||||
w_clr |= HFGxTR_EL2_TCR_EL1_MASK;
|
||||
|
||||
sysreg_clear_set_s(SYS_HFGRTR_EL2, r_clr, r_set);
|
||||
sysreg_clear_set_s(SYS_HFGWTR_EL2, w_clr, w_set);
|
||||
}
|
||||
|
||||
static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */
|
||||
@ -95,16 +145,8 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2);
|
||||
write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
|
||||
|
||||
if (cpus_have_final_cap(ARM64_SME)) {
|
||||
sysreg_clear_set_s(SYS_HFGRTR_EL2,
|
||||
HFGxTR_EL2_nSMPRI_EL1_MASK |
|
||||
HFGxTR_EL2_nTPIDR2_EL0_MASK,
|
||||
0);
|
||||
sysreg_clear_set_s(SYS_HFGWTR_EL2,
|
||||
HFGxTR_EL2_nSMPRI_EL1_MASK |
|
||||
HFGxTR_EL2_nTPIDR2_EL0_MASK,
|
||||
0);
|
||||
}
|
||||
if (__hfgxtr_traps_required())
|
||||
__activate_traps_hfgxtr();
|
||||
}
|
||||
|
||||
static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
|
||||
@ -120,14 +162,8 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
|
||||
vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU);
|
||||
}
|
||||
|
||||
if (cpus_have_final_cap(ARM64_SME)) {
|
||||
sysreg_clear_set_s(SYS_HFGRTR_EL2, 0,
|
||||
HFGxTR_EL2_nSMPRI_EL1_MASK |
|
||||
HFGxTR_EL2_nTPIDR2_EL0_MASK);
|
||||
sysreg_clear_set_s(SYS_HFGWTR_EL2, 0,
|
||||
HFGxTR_EL2_nSMPRI_EL1_MASK |
|
||||
HFGxTR_EL2_nTPIDR2_EL0_MASK);
|
||||
}
|
||||
if (__hfgxtr_traps_required())
|
||||
__deactivate_traps_hfgxtr();
|
||||
}
|
||||
|
||||
static inline void ___activate_traps(struct kvm_vcpu *vcpu)
|
||||
@ -203,7 +239,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
/* Valid trap. Switch the context: */
|
||||
|
||||
/* First disable enough traps to allow us to update the registers */
|
||||
if (has_vhe()) {
|
||||
if (has_vhe() || has_hvhe()) {
|
||||
reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN;
|
||||
if (sve_guest)
|
||||
reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
|
||||
@ -395,12 +431,39 @@ static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool handle_ampere1_tcr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu));
|
||||
int rt = kvm_vcpu_sys_get_rt(vcpu);
|
||||
u64 val = vcpu_get_reg(vcpu, rt);
|
||||
|
||||
if (sysreg != SYS_TCR_EL1)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Affected parts do not advertise support for hardware Access Flag /
|
||||
* Dirty state management in ID_AA64MMFR1_EL1.HAFDBS, but the underlying
|
||||
* control bits are still functional. The architecture requires these be
|
||||
* RES0 on systems that do not implement FEAT_HAFDBS.
|
||||
*
|
||||
* Uphold the requirements of the architecture by masking guest writes
|
||||
* to TCR_EL1.{HA,HD} here.
|
||||
*/
|
||||
val &= ~(TCR_HD | TCR_HA);
|
||||
write_sysreg_el1(val, SYS_TCR);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
|
||||
handle_tx2_tvm(vcpu))
|
||||
return true;
|
||||
|
||||
if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38) &&
|
||||
handle_ampere1_tcr(vcpu))
|
||||
return true;
|
||||
|
||||
if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
|
||||
__vgic_v3_perform_cpuif_access(vcpu) == 1)
|
||||
return true;
|
||||
|
17
arch/arm64/kvm/hyp/include/nvhe/ffa.h
Normal file
17
arch/arm64/kvm/hyp/include/nvhe/ffa.h
Normal file
@ -0,0 +1,17 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (C) 2022 - Google LLC
|
||||
* Author: Andrew Walbran <qwandor@google.com>
|
||||
*/
|
||||
#ifndef __KVM_HYP_FFA_H
|
||||
#define __KVM_HYP_FFA_H
|
||||
|
||||
#include <asm/kvm_host.h>
|
||||
|
||||
#define FFA_MIN_FUNC_NUM 0x60
|
||||
#define FFA_MAX_FUNC_NUM 0x7F
|
||||
|
||||
int hyp_ffa_init(void *pages);
|
||||
bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt);
|
||||
|
||||
#endif /* __KVM_HYP_FFA_H */
|
@ -57,6 +57,7 @@ extern struct host_mmu host_mmu;
|
||||
enum pkvm_component_id {
|
||||
PKVM_ID_HOST,
|
||||
PKVM_ID_HYP,
|
||||
PKVM_ID_FFA,
|
||||
};
|
||||
|
||||
extern unsigned long hyp_nr_cpus;
|
||||
@ -66,6 +67,8 @@ int __pkvm_host_share_hyp(u64 pfn);
|
||||
int __pkvm_host_unshare_hyp(u64 pfn);
|
||||
int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages);
|
||||
int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
|
||||
int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages);
|
||||
int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages);
|
||||
|
||||
bool addr_is_memory(phys_addr_t phys);
|
||||
int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
|
||||
|
@ -22,7 +22,7 @@ lib-objs := $(addprefix ../../../lib/, $(lib-objs))
|
||||
|
||||
hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
|
||||
hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \
|
||||
cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o
|
||||
cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o
|
||||
hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
|
||||
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
|
||||
hyp-obj-$(CONFIG_DEBUG_LIST) += list_debug.o
|
||||
|
762
arch/arm64/kvm/hyp/nvhe/ffa.c
Normal file
762
arch/arm64/kvm/hyp/nvhe/ffa.c
Normal file
@ -0,0 +1,762 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* FF-A v1.0 proxy to filter out invalid memory-sharing SMC calls issued by
|
||||
* the host. FF-A is a slightly more palatable abbreviation of "Arm Firmware
|
||||
* Framework for Arm A-profile", which is specified by Arm in document
|
||||
* number DEN0077.
|
||||
*
|
||||
* Copyright (C) 2022 - Google LLC
|
||||
* Author: Andrew Walbran <qwandor@google.com>
|
||||
*
|
||||
* This driver hooks into the SMC trapping logic for the host and intercepts
|
||||
* all calls falling within the FF-A range. Each call is either:
|
||||
*
|
||||
* - Forwarded on unmodified to the SPMD at EL3
|
||||
* - Rejected as "unsupported"
|
||||
* - Accompanied by a host stage-2 page-table check/update and reissued
|
||||
*
|
||||
* Consequently, any attempts by the host to make guest memory pages
|
||||
* accessible to the secure world using FF-A will be detected either here
|
||||
* (in the case that the memory is already owned by the guest) or during
|
||||
* donation to the guest (in the case that the memory was previously shared
|
||||
* with the secure world).
|
||||
*
|
||||
* To allow the rolling-back of page-table updates and FF-A calls in the
|
||||
* event of failure, operations involving the RXTX buffers are locked for
|
||||
* the duration and are therefore serialised.
|
||||
*/
|
||||
|
||||
#include <linux/arm-smccc.h>
|
||||
#include <linux/arm_ffa.h>
|
||||
#include <asm/kvm_pkvm.h>
|
||||
|
||||
#include <nvhe/ffa.h>
|
||||
#include <nvhe/mem_protect.h>
|
||||
#include <nvhe/memory.h>
|
||||
#include <nvhe/trap_handler.h>
|
||||
#include <nvhe/spinlock.h>
|
||||
|
||||
/*
|
||||
* "ID value 0 must be returned at the Non-secure physical FF-A instance"
|
||||
* We share this ID with the host.
|
||||
*/
|
||||
#define HOST_FFA_ID 0
|
||||
|
||||
/*
|
||||
* A buffer to hold the maximum descriptor size we can see from the host,
|
||||
* which is required when the SPMD returns a fragmented FFA_MEM_RETRIEVE_RESP
|
||||
* when resolving the handle on the reclaim path.
|
||||
*/
|
||||
struct kvm_ffa_descriptor_buffer {
|
||||
void *buf;
|
||||
size_t len;
|
||||
};
|
||||
|
||||
static struct kvm_ffa_descriptor_buffer ffa_desc_buf;
|
||||
|
||||
struct kvm_ffa_buffers {
|
||||
hyp_spinlock_t lock;
|
||||
void *tx;
|
||||
void *rx;
|
||||
};
|
||||
|
||||
/*
|
||||
* Note that we don't currently lock these buffers explicitly, instead
|
||||
* relying on the locking of the host FFA buffers as we only have one
|
||||
* client.
|
||||
*/
|
||||
static struct kvm_ffa_buffers hyp_buffers;
|
||||
static struct kvm_ffa_buffers host_buffers;
|
||||
|
||||
static void ffa_to_smccc_error(struct arm_smccc_res *res, u64 ffa_errno)
|
||||
{
|
||||
*res = (struct arm_smccc_res) {
|
||||
.a0 = FFA_ERROR,
|
||||
.a2 = ffa_errno,
|
||||
};
|
||||
}
|
||||
|
||||
static void ffa_to_smccc_res_prop(struct arm_smccc_res *res, int ret, u64 prop)
|
||||
{
|
||||
if (ret == FFA_RET_SUCCESS) {
|
||||
*res = (struct arm_smccc_res) { .a0 = FFA_SUCCESS,
|
||||
.a2 = prop };
|
||||
} else {
|
||||
ffa_to_smccc_error(res, ret);
|
||||
}
|
||||
}
|
||||
|
||||
static void ffa_to_smccc_res(struct arm_smccc_res *res, int ret)
|
||||
{
|
||||
ffa_to_smccc_res_prop(res, ret, 0);
|
||||
}
|
||||
|
||||
static void ffa_set_retval(struct kvm_cpu_context *ctxt,
|
||||
struct arm_smccc_res *res)
|
||||
{
|
||||
cpu_reg(ctxt, 0) = res->a0;
|
||||
cpu_reg(ctxt, 1) = res->a1;
|
||||
cpu_reg(ctxt, 2) = res->a2;
|
||||
cpu_reg(ctxt, 3) = res->a3;
|
||||
}
|
||||
|
||||
static bool is_ffa_call(u64 func_id)
|
||||
{
|
||||
return ARM_SMCCC_IS_FAST_CALL(func_id) &&
|
||||
ARM_SMCCC_OWNER_NUM(func_id) == ARM_SMCCC_OWNER_STANDARD &&
|
||||
ARM_SMCCC_FUNC_NUM(func_id) >= FFA_MIN_FUNC_NUM &&
|
||||
ARM_SMCCC_FUNC_NUM(func_id) <= FFA_MAX_FUNC_NUM;
|
||||
}
|
||||
|
||||
static int ffa_map_hyp_buffers(u64 ffa_page_count)
|
||||
{
|
||||
struct arm_smccc_res res;
|
||||
|
||||
arm_smccc_1_1_smc(FFA_FN64_RXTX_MAP,
|
||||
hyp_virt_to_phys(hyp_buffers.tx),
|
||||
hyp_virt_to_phys(hyp_buffers.rx),
|
||||
ffa_page_count,
|
||||
0, 0, 0, 0,
|
||||
&res);
|
||||
|
||||
return res.a0 == FFA_SUCCESS ? FFA_RET_SUCCESS : res.a2;
|
||||
}
|
||||
|
||||
static int ffa_unmap_hyp_buffers(void)
|
||||
{
|
||||
struct arm_smccc_res res;
|
||||
|
||||
arm_smccc_1_1_smc(FFA_RXTX_UNMAP,
|
||||
HOST_FFA_ID,
|
||||
0, 0, 0, 0, 0, 0,
|
||||
&res);
|
||||
|
||||
return res.a0 == FFA_SUCCESS ? FFA_RET_SUCCESS : res.a2;
|
||||
}
|
||||
|
||||
static void ffa_mem_frag_tx(struct arm_smccc_res *res, u32 handle_lo,
|
||||
u32 handle_hi, u32 fraglen, u32 endpoint_id)
|
||||
{
|
||||
arm_smccc_1_1_smc(FFA_MEM_FRAG_TX,
|
||||
handle_lo, handle_hi, fraglen, endpoint_id,
|
||||
0, 0, 0,
|
||||
res);
|
||||
}
|
||||
|
||||
static void ffa_mem_frag_rx(struct arm_smccc_res *res, u32 handle_lo,
|
||||
u32 handle_hi, u32 fragoff)
|
||||
{
|
||||
arm_smccc_1_1_smc(FFA_MEM_FRAG_RX,
|
||||
handle_lo, handle_hi, fragoff, HOST_FFA_ID,
|
||||
0, 0, 0,
|
||||
res);
|
||||
}
|
||||
|
||||
static void ffa_mem_xfer(struct arm_smccc_res *res, u64 func_id, u32 len,
|
||||
u32 fraglen)
|
||||
{
|
||||
arm_smccc_1_1_smc(func_id, len, fraglen,
|
||||
0, 0, 0, 0, 0,
|
||||
res);
|
||||
}
|
||||
|
||||
static void ffa_mem_reclaim(struct arm_smccc_res *res, u32 handle_lo,
|
||||
u32 handle_hi, u32 flags)
|
||||
{
|
||||
arm_smccc_1_1_smc(FFA_MEM_RECLAIM,
|
||||
handle_lo, handle_hi, flags,
|
||||
0, 0, 0, 0,
|
||||
res);
|
||||
}
|
||||
|
||||
static void ffa_retrieve_req(struct arm_smccc_res *res, u32 len)
|
||||
{
|
||||
arm_smccc_1_1_smc(FFA_FN64_MEM_RETRIEVE_REQ,
|
||||
len, len,
|
||||
0, 0, 0, 0, 0,
|
||||
res);
|
||||
}
|
||||
|
||||
static void do_ffa_rxtx_map(struct arm_smccc_res *res,
|
||||
struct kvm_cpu_context *ctxt)
|
||||
{
|
||||
DECLARE_REG(phys_addr_t, tx, ctxt, 1);
|
||||
DECLARE_REG(phys_addr_t, rx, ctxt, 2);
|
||||
DECLARE_REG(u32, npages, ctxt, 3);
|
||||
int ret = 0;
|
||||
void *rx_virt, *tx_virt;
|
||||
|
||||
if (npages != (KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE) / FFA_PAGE_SIZE) {
|
||||
ret = FFA_RET_INVALID_PARAMETERS;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!PAGE_ALIGNED(tx) || !PAGE_ALIGNED(rx)) {
|
||||
ret = FFA_RET_INVALID_PARAMETERS;
|
||||
goto out;
|
||||
}
|
||||
|
||||
hyp_spin_lock(&host_buffers.lock);
|
||||
if (host_buffers.tx) {
|
||||
ret = FFA_RET_DENIED;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* Map our hypervisor buffers into the SPMD before mapping and
|
||||
* pinning the host buffers in our own address space.
|
||||
*/
|
||||
ret = ffa_map_hyp_buffers(npages);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
ret = __pkvm_host_share_hyp(hyp_phys_to_pfn(tx));
|
||||
if (ret) {
|
||||
ret = FFA_RET_INVALID_PARAMETERS;
|
||||
goto err_unmap;
|
||||
}
|
||||
|
||||
ret = __pkvm_host_share_hyp(hyp_phys_to_pfn(rx));
|
||||
if (ret) {
|
||||
ret = FFA_RET_INVALID_PARAMETERS;
|
||||
goto err_unshare_tx;
|
||||
}
|
||||
|
||||
tx_virt = hyp_phys_to_virt(tx);
|
||||
ret = hyp_pin_shared_mem(tx_virt, tx_virt + 1);
|
||||
if (ret) {
|
||||
ret = FFA_RET_INVALID_PARAMETERS;
|
||||
goto err_unshare_rx;
|
||||
}
|
||||
|
||||
rx_virt = hyp_phys_to_virt(rx);
|
||||
ret = hyp_pin_shared_mem(rx_virt, rx_virt + 1);
|
||||
if (ret) {
|
||||
ret = FFA_RET_INVALID_PARAMETERS;
|
||||
goto err_unpin_tx;
|
||||
}
|
||||
|
||||
host_buffers.tx = tx_virt;
|
||||
host_buffers.rx = rx_virt;
|
||||
|
||||
out_unlock:
|
||||
hyp_spin_unlock(&host_buffers.lock);
|
||||
out:
|
||||
ffa_to_smccc_res(res, ret);
|
||||
return;
|
||||
|
||||
err_unpin_tx:
|
||||
hyp_unpin_shared_mem(tx_virt, tx_virt + 1);
|
||||
err_unshare_rx:
|
||||
__pkvm_host_unshare_hyp(hyp_phys_to_pfn(rx));
|
||||
err_unshare_tx:
|
||||
__pkvm_host_unshare_hyp(hyp_phys_to_pfn(tx));
|
||||
err_unmap:
|
||||
ffa_unmap_hyp_buffers();
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
static void do_ffa_rxtx_unmap(struct arm_smccc_res *res,
|
||||
struct kvm_cpu_context *ctxt)
|
||||
{
|
||||
DECLARE_REG(u32, id, ctxt, 1);
|
||||
int ret = 0;
|
||||
|
||||
if (id != HOST_FFA_ID) {
|
||||
ret = FFA_RET_INVALID_PARAMETERS;
|
||||
goto out;
|
||||
}
|
||||
|
||||
hyp_spin_lock(&host_buffers.lock);
|
||||
if (!host_buffers.tx) {
|
||||
ret = FFA_RET_INVALID_PARAMETERS;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
hyp_unpin_shared_mem(host_buffers.tx, host_buffers.tx + 1);
|
||||
WARN_ON(__pkvm_host_unshare_hyp(hyp_virt_to_pfn(host_buffers.tx)));
|
||||
host_buffers.tx = NULL;
|
||||
|
||||
hyp_unpin_shared_mem(host_buffers.rx, host_buffers.rx + 1);
|
||||
WARN_ON(__pkvm_host_unshare_hyp(hyp_virt_to_pfn(host_buffers.rx)));
|
||||
host_buffers.rx = NULL;
|
||||
|
||||
ffa_unmap_hyp_buffers();
|
||||
|
||||
out_unlock:
|
||||
hyp_spin_unlock(&host_buffers.lock);
|
||||
out:
|
||||
ffa_to_smccc_res(res, ret);
|
||||
}
|
||||
|
||||
static u32 __ffa_host_share_ranges(struct ffa_mem_region_addr_range *ranges,
|
||||
u32 nranges)
|
||||
{
|
||||
u32 i;
|
||||
|
||||
for (i = 0; i < nranges; ++i) {
|
||||
struct ffa_mem_region_addr_range *range = &ranges[i];
|
||||
u64 sz = (u64)range->pg_cnt * FFA_PAGE_SIZE;
|
||||
u64 pfn = hyp_phys_to_pfn(range->address);
|
||||
|
||||
if (!PAGE_ALIGNED(sz))
|
||||
break;
|
||||
|
||||
if (__pkvm_host_share_ffa(pfn, sz / PAGE_SIZE))
|
||||
break;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
static u32 __ffa_host_unshare_ranges(struct ffa_mem_region_addr_range *ranges,
|
||||
u32 nranges)
|
||||
{
|
||||
u32 i;
|
||||
|
||||
for (i = 0; i < nranges; ++i) {
|
||||
struct ffa_mem_region_addr_range *range = &ranges[i];
|
||||
u64 sz = (u64)range->pg_cnt * FFA_PAGE_SIZE;
|
||||
u64 pfn = hyp_phys_to_pfn(range->address);
|
||||
|
||||
if (!PAGE_ALIGNED(sz))
|
||||
break;
|
||||
|
||||
if (__pkvm_host_unshare_ffa(pfn, sz / PAGE_SIZE))
|
||||
break;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
static int ffa_host_share_ranges(struct ffa_mem_region_addr_range *ranges,
|
||||
u32 nranges)
|
||||
{
|
||||
u32 nshared = __ffa_host_share_ranges(ranges, nranges);
|
||||
int ret = 0;
|
||||
|
||||
if (nshared != nranges) {
|
||||
WARN_ON(__ffa_host_unshare_ranges(ranges, nshared) != nshared);
|
||||
ret = FFA_RET_DENIED;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ffa_host_unshare_ranges(struct ffa_mem_region_addr_range *ranges,
|
||||
u32 nranges)
|
||||
{
|
||||
u32 nunshared = __ffa_host_unshare_ranges(ranges, nranges);
|
||||
int ret = 0;
|
||||
|
||||
if (nunshared != nranges) {
|
||||
WARN_ON(__ffa_host_share_ranges(ranges, nunshared) != nunshared);
|
||||
ret = FFA_RET_DENIED;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void do_ffa_mem_frag_tx(struct arm_smccc_res *res,
|
||||
struct kvm_cpu_context *ctxt)
|
||||
{
|
||||
DECLARE_REG(u32, handle_lo, ctxt, 1);
|
||||
DECLARE_REG(u32, handle_hi, ctxt, 2);
|
||||
DECLARE_REG(u32, fraglen, ctxt, 3);
|
||||
DECLARE_REG(u32, endpoint_id, ctxt, 4);
|
||||
struct ffa_mem_region_addr_range *buf;
|
||||
int ret = FFA_RET_INVALID_PARAMETERS;
|
||||
u32 nr_ranges;
|
||||
|
||||
if (fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE)
|
||||
goto out;
|
||||
|
||||
if (fraglen % sizeof(*buf))
|
||||
goto out;
|
||||
|
||||
hyp_spin_lock(&host_buffers.lock);
|
||||
if (!host_buffers.tx)
|
||||
goto out_unlock;
|
||||
|
||||
buf = hyp_buffers.tx;
|
||||
memcpy(buf, host_buffers.tx, fraglen);
|
||||
nr_ranges = fraglen / sizeof(*buf);
|
||||
|
||||
ret = ffa_host_share_ranges(buf, nr_ranges);
|
||||
if (ret) {
|
||||
/*
|
||||
* We're effectively aborting the transaction, so we need
|
||||
* to restore the global state back to what it was prior to
|
||||
* transmission of the first fragment.
|
||||
*/
|
||||
ffa_mem_reclaim(res, handle_lo, handle_hi, 0);
|
||||
WARN_ON(res->a0 != FFA_SUCCESS);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
ffa_mem_frag_tx(res, handle_lo, handle_hi, fraglen, endpoint_id);
|
||||
if (res->a0 != FFA_SUCCESS && res->a0 != FFA_MEM_FRAG_RX)
|
||||
WARN_ON(ffa_host_unshare_ranges(buf, nr_ranges));
|
||||
|
||||
out_unlock:
|
||||
hyp_spin_unlock(&host_buffers.lock);
|
||||
out:
|
||||
if (ret)
|
||||
ffa_to_smccc_res(res, ret);
|
||||
|
||||
/*
|
||||
* If for any reason this did not succeed, we're in trouble as we have
|
||||
* now lost the content of the previous fragments and we can't rollback
|
||||
* the host stage-2 changes. The pages previously marked as shared will
|
||||
* remain stuck in that state forever, hence preventing the host from
|
||||
* sharing/donating them again and may possibly lead to subsequent
|
||||
* failures, but this will not compromise confidentiality.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
static __always_inline void do_ffa_mem_xfer(const u64 func_id,
|
||||
struct arm_smccc_res *res,
|
||||
struct kvm_cpu_context *ctxt)
|
||||
{
|
||||
DECLARE_REG(u32, len, ctxt, 1);
|
||||
DECLARE_REG(u32, fraglen, ctxt, 2);
|
||||
DECLARE_REG(u64, addr_mbz, ctxt, 3);
|
||||
DECLARE_REG(u32, npages_mbz, ctxt, 4);
|
||||
struct ffa_composite_mem_region *reg;
|
||||
struct ffa_mem_region *buf;
|
||||
u32 offset, nr_ranges;
|
||||
int ret = 0;
|
||||
|
||||
BUILD_BUG_ON(func_id != FFA_FN64_MEM_SHARE &&
|
||||
func_id != FFA_FN64_MEM_LEND);
|
||||
|
||||
if (addr_mbz || npages_mbz || fraglen > len ||
|
||||
fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE) {
|
||||
ret = FFA_RET_INVALID_PARAMETERS;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (fraglen < sizeof(struct ffa_mem_region) +
|
||||
sizeof(struct ffa_mem_region_attributes)) {
|
||||
ret = FFA_RET_INVALID_PARAMETERS;
|
||||
goto out;
|
||||
}
|
||||
|
||||
hyp_spin_lock(&host_buffers.lock);
|
||||
if (!host_buffers.tx) {
|
||||
ret = FFA_RET_INVALID_PARAMETERS;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
buf = hyp_buffers.tx;
|
||||
memcpy(buf, host_buffers.tx, fraglen);
|
||||
|
||||
offset = buf->ep_mem_access[0].composite_off;
|
||||
if (!offset || buf->ep_count != 1 || buf->sender_id != HOST_FFA_ID) {
|
||||
ret = FFA_RET_INVALID_PARAMETERS;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (fraglen < offset + sizeof(struct ffa_composite_mem_region)) {
|
||||
ret = FFA_RET_INVALID_PARAMETERS;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
reg = (void *)buf + offset;
|
||||
nr_ranges = ((void *)buf + fraglen) - (void *)reg->constituents;
|
||||
if (nr_ranges % sizeof(reg->constituents[0])) {
|
||||
ret = FFA_RET_INVALID_PARAMETERS;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
nr_ranges /= sizeof(reg->constituents[0]);
|
||||
ret = ffa_host_share_ranges(reg->constituents, nr_ranges);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
ffa_mem_xfer(res, func_id, len, fraglen);
|
||||
if (fraglen != len) {
|
||||
if (res->a0 != FFA_MEM_FRAG_RX)
|
||||
goto err_unshare;
|
||||
|
||||
if (res->a3 != fraglen)
|
||||
goto err_unshare;
|
||||
} else if (res->a0 != FFA_SUCCESS) {
|
||||
goto err_unshare;
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
hyp_spin_unlock(&host_buffers.lock);
|
||||
out:
|
||||
if (ret)
|
||||
ffa_to_smccc_res(res, ret);
|
||||
return;
|
||||
|
||||
err_unshare:
|
||||
WARN_ON(ffa_host_unshare_ranges(reg->constituents, nr_ranges));
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
static void do_ffa_mem_reclaim(struct arm_smccc_res *res,
|
||||
struct kvm_cpu_context *ctxt)
|
||||
{
|
||||
DECLARE_REG(u32, handle_lo, ctxt, 1);
|
||||
DECLARE_REG(u32, handle_hi, ctxt, 2);
|
||||
DECLARE_REG(u32, flags, ctxt, 3);
|
||||
struct ffa_composite_mem_region *reg;
|
||||
u32 offset, len, fraglen, fragoff;
|
||||
struct ffa_mem_region *buf;
|
||||
int ret = 0;
|
||||
u64 handle;
|
||||
|
||||
handle = PACK_HANDLE(handle_lo, handle_hi);
|
||||
|
||||
hyp_spin_lock(&host_buffers.lock);
|
||||
|
||||
buf = hyp_buffers.tx;
|
||||
*buf = (struct ffa_mem_region) {
|
||||
.sender_id = HOST_FFA_ID,
|
||||
.handle = handle,
|
||||
};
|
||||
|
||||
ffa_retrieve_req(res, sizeof(*buf));
|
||||
buf = hyp_buffers.rx;
|
||||
if (res->a0 != FFA_MEM_RETRIEVE_RESP)
|
||||
goto out_unlock;
|
||||
|
||||
len = res->a1;
|
||||
fraglen = res->a2;
|
||||
|
||||
offset = buf->ep_mem_access[0].composite_off;
|
||||
/*
|
||||
* We can trust the SPMD to get this right, but let's at least
|
||||
* check that we end up with something that doesn't look _completely_
|
||||
* bogus.
|
||||
*/
|
||||
if (WARN_ON(offset > len ||
|
||||
fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE)) {
|
||||
ret = FFA_RET_ABORTED;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (len > ffa_desc_buf.len) {
|
||||
ret = FFA_RET_NO_MEMORY;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
buf = ffa_desc_buf.buf;
|
||||
memcpy(buf, hyp_buffers.rx, fraglen);
|
||||
|
||||
for (fragoff = fraglen; fragoff < len; fragoff += fraglen) {
|
||||
ffa_mem_frag_rx(res, handle_lo, handle_hi, fragoff);
|
||||
if (res->a0 != FFA_MEM_FRAG_TX) {
|
||||
ret = FFA_RET_INVALID_PARAMETERS;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
fraglen = res->a3;
|
||||
memcpy((void *)buf + fragoff, hyp_buffers.rx, fraglen);
|
||||
}
|
||||
|
||||
ffa_mem_reclaim(res, handle_lo, handle_hi, flags);
|
||||
if (res->a0 != FFA_SUCCESS)
|
||||
goto out_unlock;
|
||||
|
||||
reg = (void *)buf + offset;
|
||||
/* If the SPMD was happy, then we should be too. */
|
||||
WARN_ON(ffa_host_unshare_ranges(reg->constituents,
|
||||
reg->addr_range_cnt));
|
||||
out_unlock:
|
||||
hyp_spin_unlock(&host_buffers.lock);
|
||||
|
||||
if (ret)
|
||||
ffa_to_smccc_res(res, ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* Is a given FFA function supported, either by forwarding on directly
|
||||
* or by handling at EL2?
|
||||
*/
|
||||
static bool ffa_call_supported(u64 func_id)
|
||||
{
|
||||
switch (func_id) {
|
||||
/* Unsupported memory management calls */
|
||||
case FFA_FN64_MEM_RETRIEVE_REQ:
|
||||
case FFA_MEM_RETRIEVE_RESP:
|
||||
case FFA_MEM_RELINQUISH:
|
||||
case FFA_MEM_OP_PAUSE:
|
||||
case FFA_MEM_OP_RESUME:
|
||||
case FFA_MEM_FRAG_RX:
|
||||
case FFA_FN64_MEM_DONATE:
|
||||
/* Indirect message passing via RX/TX buffers */
|
||||
case FFA_MSG_SEND:
|
||||
case FFA_MSG_POLL:
|
||||
case FFA_MSG_WAIT:
|
||||
/* 32-bit variants of 64-bit calls */
|
||||
case FFA_MSG_SEND_DIRECT_REQ:
|
||||
case FFA_MSG_SEND_DIRECT_RESP:
|
||||
case FFA_RXTX_MAP:
|
||||
case FFA_MEM_DONATE:
|
||||
case FFA_MEM_RETRIEVE_REQ:
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool do_ffa_features(struct arm_smccc_res *res,
|
||||
struct kvm_cpu_context *ctxt)
|
||||
{
|
||||
DECLARE_REG(u32, id, ctxt, 1);
|
||||
u64 prop = 0;
|
||||
int ret = 0;
|
||||
|
||||
if (!ffa_call_supported(id)) {
|
||||
ret = FFA_RET_NOT_SUPPORTED;
|
||||
goto out_handled;
|
||||
}
|
||||
|
||||
switch (id) {
|
||||
case FFA_MEM_SHARE:
|
||||
case FFA_FN64_MEM_SHARE:
|
||||
case FFA_MEM_LEND:
|
||||
case FFA_FN64_MEM_LEND:
|
||||
ret = FFA_RET_SUCCESS;
|
||||
prop = 0; /* No support for dynamic buffers */
|
||||
goto out_handled;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
out_handled:
|
||||
ffa_to_smccc_res_prop(res, ret, prop);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(u64, func_id, host_ctxt, 0);
|
||||
struct arm_smccc_res res;
|
||||
|
||||
/*
|
||||
* There's no way we can tell what a non-standard SMC call might
|
||||
* be up to. Ideally, we would terminate these here and return
|
||||
* an error to the host, but sadly devices make use of custom
|
||||
* firmware calls for things like power management, debugging,
|
||||
* RNG access and crash reporting.
|
||||
*
|
||||
* Given that the architecture requires us to trust EL3 anyway,
|
||||
* we forward unrecognised calls on under the assumption that
|
||||
* the firmware doesn't expose a mechanism to access arbitrary
|
||||
* non-secure memory. Short of a per-device table of SMCs, this
|
||||
* is the best we can do.
|
||||
*/
|
||||
if (!is_ffa_call(func_id))
|
||||
return false;
|
||||
|
||||
switch (func_id) {
|
||||
case FFA_FEATURES:
|
||||
if (!do_ffa_features(&res, host_ctxt))
|
||||
return false;
|
||||
goto out_handled;
|
||||
/* Memory management */
|
||||
case FFA_FN64_RXTX_MAP:
|
||||
do_ffa_rxtx_map(&res, host_ctxt);
|
||||
goto out_handled;
|
||||
case FFA_RXTX_UNMAP:
|
||||
do_ffa_rxtx_unmap(&res, host_ctxt);
|
||||
goto out_handled;
|
||||
case FFA_MEM_SHARE:
|
||||
case FFA_FN64_MEM_SHARE:
|
||||
do_ffa_mem_xfer(FFA_FN64_MEM_SHARE, &res, host_ctxt);
|
||||
goto out_handled;
|
||||
case FFA_MEM_RECLAIM:
|
||||
do_ffa_mem_reclaim(&res, host_ctxt);
|
||||
goto out_handled;
|
||||
case FFA_MEM_LEND:
|
||||
case FFA_FN64_MEM_LEND:
|
||||
do_ffa_mem_xfer(FFA_FN64_MEM_LEND, &res, host_ctxt);
|
||||
goto out_handled;
|
||||
case FFA_MEM_FRAG_TX:
|
||||
do_ffa_mem_frag_tx(&res, host_ctxt);
|
||||
goto out_handled;
|
||||
}
|
||||
|
||||
if (ffa_call_supported(func_id))
|
||||
return false; /* Pass through */
|
||||
|
||||
ffa_to_smccc_error(&res, FFA_RET_NOT_SUPPORTED);
|
||||
out_handled:
|
||||
ffa_set_retval(host_ctxt, &res);
|
||||
return true;
|
||||
}
|
||||
|
||||
int hyp_ffa_init(void *pages)
|
||||
{
|
||||
struct arm_smccc_res res;
|
||||
size_t min_rxtx_sz;
|
||||
void *tx, *rx;
|
||||
|
||||
if (kvm_host_psci_config.smccc_version < ARM_SMCCC_VERSION_1_2)
|
||||
return 0;
|
||||
|
||||
arm_smccc_1_1_smc(FFA_VERSION, FFA_VERSION_1_0, 0, 0, 0, 0, 0, 0, &res);
|
||||
if (res.a0 == FFA_RET_NOT_SUPPORTED)
|
||||
return 0;
|
||||
|
||||
if (res.a0 != FFA_VERSION_1_0)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
arm_smccc_1_1_smc(FFA_ID_GET, 0, 0, 0, 0, 0, 0, 0, &res);
|
||||
if (res.a0 != FFA_SUCCESS)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (res.a2 != HOST_FFA_ID)
|
||||
return -EINVAL;
|
||||
|
||||
arm_smccc_1_1_smc(FFA_FEATURES, FFA_FN64_RXTX_MAP,
|
||||
0, 0, 0, 0, 0, 0, &res);
|
||||
if (res.a0 != FFA_SUCCESS)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
switch (res.a2) {
|
||||
case FFA_FEAT_RXTX_MIN_SZ_4K:
|
||||
min_rxtx_sz = SZ_4K;
|
||||
break;
|
||||
case FFA_FEAT_RXTX_MIN_SZ_16K:
|
||||
min_rxtx_sz = SZ_16K;
|
||||
break;
|
||||
case FFA_FEAT_RXTX_MIN_SZ_64K:
|
||||
min_rxtx_sz = SZ_64K;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (min_rxtx_sz > PAGE_SIZE)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
tx = pages;
|
||||
pages += KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE;
|
||||
rx = pages;
|
||||
pages += KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE;
|
||||
|
||||
ffa_desc_buf = (struct kvm_ffa_descriptor_buffer) {
|
||||
.buf = pages,
|
||||
.len = PAGE_SIZE *
|
||||
(hyp_ffa_proxy_pages() - (2 * KVM_FFA_MBOX_NR_PAGES)),
|
||||
};
|
||||
|
||||
hyp_buffers = (struct kvm_ffa_buffers) {
|
||||
.lock = __HYP_SPIN_LOCK_UNLOCKED,
|
||||
.tx = tx,
|
||||
.rx = rx,
|
||||
};
|
||||
|
||||
host_buffers = (struct kvm_ffa_buffers) {
|
||||
.lock = __HYP_SPIN_LOCK_UNLOCKED,
|
||||
};
|
||||
|
||||
return 0;
|
||||
}
|
@ -10,6 +10,7 @@
|
||||
#include <asm/kvm_arm.h>
|
||||
#include <asm/kvm_asm.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include <asm/kvm_ptrauth.h>
|
||||
|
||||
.text
|
||||
|
||||
@ -37,10 +38,43 @@ SYM_FUNC_START(__host_exit)
|
||||
|
||||
/* Save the host context pointer in x29 across the function call */
|
||||
mov x29, x0
|
||||
|
||||
#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
|
||||
alternative_if_not ARM64_HAS_ADDRESS_AUTH
|
||||
b __skip_pauth_save
|
||||
alternative_else_nop_endif
|
||||
|
||||
alternative_if ARM64_KVM_PROTECTED_MODE
|
||||
/* Save kernel ptrauth keys. */
|
||||
add x18, x29, #CPU_APIAKEYLO_EL1
|
||||
ptrauth_save_state x18, x19, x20
|
||||
|
||||
/* Use hyp keys. */
|
||||
adr_this_cpu x18, kvm_hyp_ctxt, x19
|
||||
add x18, x18, #CPU_APIAKEYLO_EL1
|
||||
ptrauth_restore_state x18, x19, x20
|
||||
isb
|
||||
alternative_else_nop_endif
|
||||
__skip_pauth_save:
|
||||
#endif /* CONFIG_ARM64_PTR_AUTH_KERNEL */
|
||||
|
||||
bl handle_trap
|
||||
|
||||
/* Restore host regs x0-x17 */
|
||||
__host_enter_restore_full:
|
||||
/* Restore kernel keys. */
|
||||
#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
|
||||
alternative_if_not ARM64_HAS_ADDRESS_AUTH
|
||||
b __skip_pauth_restore
|
||||
alternative_else_nop_endif
|
||||
|
||||
alternative_if ARM64_KVM_PROTECTED_MODE
|
||||
add x18, x29, #CPU_APIAKEYLO_EL1
|
||||
ptrauth_restore_state x18, x19, x20
|
||||
alternative_else_nop_endif
|
||||
__skip_pauth_restore:
|
||||
#endif /* CONFIG_ARM64_PTR_AUTH_KERNEL */
|
||||
|
||||
/* Restore host regs x0-x17 */
|
||||
ldp x0, x1, [x29, #CPU_XREG_OFFSET(0)]
|
||||
ldp x2, x3, [x29, #CPU_XREG_OFFSET(2)]
|
||||
ldp x4, x5, [x29, #CPU_XREG_OFFSET(4)]
|
||||
|
@ -83,9 +83,6 @@ SYM_CODE_END(__kvm_hyp_init)
|
||||
* x0: struct kvm_nvhe_init_params PA
|
||||
*/
|
||||
SYM_CODE_START_LOCAL(___kvm_hyp_init)
|
||||
ldr x1, [x0, #NVHE_INIT_TPIDR_EL2]
|
||||
msr tpidr_el2, x1
|
||||
|
||||
ldr x1, [x0, #NVHE_INIT_STACK_HYP_VA]
|
||||
mov sp, x1
|
||||
|
||||
@ -95,6 +92,22 @@ SYM_CODE_START_LOCAL(___kvm_hyp_init)
|
||||
ldr x1, [x0, #NVHE_INIT_HCR_EL2]
|
||||
msr hcr_el2, x1
|
||||
|
||||
mov x2, #HCR_E2H
|
||||
and x2, x1, x2
|
||||
cbz x2, 1f
|
||||
|
||||
// hVHE: Replay the EL2 setup to account for the E2H bit
|
||||
// TPIDR_EL2 is used to preserve x0 across the macro maze...
|
||||
isb
|
||||
msr tpidr_el2, x0
|
||||
init_el2_state
|
||||
finalise_el2_state
|
||||
mrs x0, tpidr_el2
|
||||
|
||||
1:
|
||||
ldr x1, [x0, #NVHE_INIT_TPIDR_EL2]
|
||||
msr tpidr_el2, x1
|
||||
|
||||
ldr x1, [x0, #NVHE_INIT_VTTBR]
|
||||
msr vttbr_el2, x1
|
||||
|
||||
@ -128,6 +141,13 @@ alternative_if ARM64_HAS_ADDRESS_AUTH
|
||||
SCTLR_ELx_ENDA | SCTLR_ELx_ENDB)
|
||||
orr x0, x0, x1
|
||||
alternative_else_nop_endif
|
||||
|
||||
#ifdef CONFIG_ARM64_BTI_KERNEL
|
||||
alternative_if ARM64_BTI
|
||||
orr x0, x0, #SCTLR_EL2_BT
|
||||
alternative_else_nop_endif
|
||||
#endif /* CONFIG_ARM64_BTI_KERNEL */
|
||||
|
||||
msr sctlr_el2, x0
|
||||
isb
|
||||
|
||||
@ -184,6 +204,7 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu)
|
||||
/* Initialize EL2 CPU state to sane values. */
|
||||
init_el2_state // Clobbers x0..x2
|
||||
finalise_el2_state
|
||||
__init_el2_nvhe_prepare_eret
|
||||
|
||||
/* Enable MMU, set vectors and stack. */
|
||||
mov x0, x28
|
||||
@ -196,6 +217,11 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu)
|
||||
SYM_CODE_END(__kvm_hyp_init_cpu)
|
||||
|
||||
SYM_CODE_START(__kvm_handle_stub_hvc)
|
||||
/*
|
||||
* __kvm_handle_stub_hvc called from __host_hvc through branch instruction(br) so
|
||||
* we need bti j at beginning.
|
||||
*/
|
||||
bti j
|
||||
cmp x0, #HVC_SOFT_RESTART
|
||||
b.ne 1f
|
||||
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
|
||||
#include <nvhe/ffa.h>
|
||||
#include <nvhe/mem_protect.h>
|
||||
#include <nvhe/mm.h>
|
||||
#include <nvhe/pkvm.h>
|
||||
@ -125,6 +126,15 @@ static void handle___kvm_tlb_flush_vmid_ipa(struct kvm_cpu_context *host_ctxt)
|
||||
__kvm_tlb_flush_vmid_ipa(kern_hyp_va(mmu), ipa, level);
|
||||
}
|
||||
|
||||
static void handle___kvm_tlb_flush_vmid_ipa_nsh(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
|
||||
DECLARE_REG(phys_addr_t, ipa, host_ctxt, 2);
|
||||
DECLARE_REG(int, level, host_ctxt, 3);
|
||||
|
||||
__kvm_tlb_flush_vmid_ipa_nsh(kern_hyp_va(mmu), ipa, level);
|
||||
}
|
||||
|
||||
static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
|
||||
@ -315,6 +325,7 @@ static const hcall_t host_hcall[] = {
|
||||
HANDLE_FUNC(__kvm_vcpu_run),
|
||||
HANDLE_FUNC(__kvm_flush_vm_context),
|
||||
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
|
||||
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa_nsh),
|
||||
HANDLE_FUNC(__kvm_tlb_flush_vmid),
|
||||
HANDLE_FUNC(__kvm_flush_cpu_context),
|
||||
HANDLE_FUNC(__kvm_timer_set_cntvoff),
|
||||
@ -373,6 +384,8 @@ static void handle_host_smc(struct kvm_cpu_context *host_ctxt)
|
||||
bool handled;
|
||||
|
||||
handled = kvm_host_psci_handler(host_ctxt);
|
||||
if (!handled)
|
||||
handled = kvm_host_ffa_handler(host_ctxt);
|
||||
if (!handled)
|
||||
default_host_smc_handler(host_ctxt);
|
||||
|
||||
@ -392,7 +405,11 @@ void handle_trap(struct kvm_cpu_context *host_ctxt)
|
||||
handle_host_smc(host_ctxt);
|
||||
break;
|
||||
case ESR_ELx_EC_SVE:
|
||||
sysreg_clear_set(cptr_el2, CPTR_EL2_TZ, 0);
|
||||
if (has_hvhe())
|
||||
sysreg_clear_set(cpacr_el1, 0, (CPACR_EL1_ZEN_EL1EN |
|
||||
CPACR_EL1_ZEN_EL0EN));
|
||||
else
|
||||
sysreg_clear_set(cptr_el2, CPTR_EL2_TZ, 0);
|
||||
isb();
|
||||
sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
|
||||
break;
|
||||
|
@ -91,9 +91,9 @@ static void host_s2_put_page(void *addr)
|
||||
hyp_put_page(&host_s2_pool, addr);
|
||||
}
|
||||
|
||||
static void host_s2_free_removed_table(void *addr, u32 level)
|
||||
static void host_s2_free_unlinked_table(void *addr, u32 level)
|
||||
{
|
||||
kvm_pgtable_stage2_free_removed(&host_mmu.mm_ops, addr, level);
|
||||
kvm_pgtable_stage2_free_unlinked(&host_mmu.mm_ops, addr, level);
|
||||
}
|
||||
|
||||
static int prepare_s2_pool(void *pgt_pool_base)
|
||||
@ -110,7 +110,7 @@ static int prepare_s2_pool(void *pgt_pool_base)
|
||||
host_mmu.mm_ops = (struct kvm_pgtable_mm_ops) {
|
||||
.zalloc_pages_exact = host_s2_zalloc_pages_exact,
|
||||
.zalloc_page = host_s2_zalloc_page,
|
||||
.free_removed_table = host_s2_free_removed_table,
|
||||
.free_unlinked_table = host_s2_free_unlinked_table,
|
||||
.phys_to_virt = hyp_phys_to_virt,
|
||||
.virt_to_phys = hyp_virt_to_phys,
|
||||
.page_count = hyp_page_count,
|
||||
@ -842,6 +842,13 @@ static int check_share(struct pkvm_mem_share *share)
|
||||
case PKVM_ID_HYP:
|
||||
ret = hyp_ack_share(completer_addr, tx, share->completer_prot);
|
||||
break;
|
||||
case PKVM_ID_FFA:
|
||||
/*
|
||||
* We only check the host; the secure side will check the other
|
||||
* end when we forward the FFA call.
|
||||
*/
|
||||
ret = 0;
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
@ -870,6 +877,13 @@ static int __do_share(struct pkvm_mem_share *share)
|
||||
case PKVM_ID_HYP:
|
||||
ret = hyp_complete_share(completer_addr, tx, share->completer_prot);
|
||||
break;
|
||||
case PKVM_ID_FFA:
|
||||
/*
|
||||
* We're not responsible for any secure page-tables, so there's
|
||||
* nothing to do here.
|
||||
*/
|
||||
ret = 0;
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
@ -918,6 +932,10 @@ static int check_unshare(struct pkvm_mem_share *share)
|
||||
case PKVM_ID_HYP:
|
||||
ret = hyp_ack_unshare(completer_addr, tx);
|
||||
break;
|
||||
case PKVM_ID_FFA:
|
||||
/* See check_share() */
|
||||
ret = 0;
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
@ -946,6 +964,10 @@ static int __do_unshare(struct pkvm_mem_share *share)
|
||||
case PKVM_ID_HYP:
|
||||
ret = hyp_complete_unshare(completer_addr, tx);
|
||||
break;
|
||||
case PKVM_ID_FFA:
|
||||
/* See __do_share() */
|
||||
ret = 0;
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
@ -1235,3 +1257,49 @@ void hyp_unpin_shared_mem(void *from, void *to)
|
||||
hyp_unlock_component();
|
||||
host_unlock_component();
|
||||
}
|
||||
|
||||
int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages)
|
||||
{
|
||||
int ret;
|
||||
struct pkvm_mem_share share = {
|
||||
.tx = {
|
||||
.nr_pages = nr_pages,
|
||||
.initiator = {
|
||||
.id = PKVM_ID_HOST,
|
||||
.addr = hyp_pfn_to_phys(pfn),
|
||||
},
|
||||
.completer = {
|
||||
.id = PKVM_ID_FFA,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
host_lock_component();
|
||||
ret = do_share(&share);
|
||||
host_unlock_component();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages)
|
||||
{
|
||||
int ret;
|
||||
struct pkvm_mem_share share = {
|
||||
.tx = {
|
||||
.nr_pages = nr_pages,
|
||||
.initiator = {
|
||||
.id = PKVM_ID_HOST,
|
||||
.addr = hyp_pfn_to_phys(pfn),
|
||||
},
|
||||
.completer = {
|
||||
.id = PKVM_ID_FFA,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
host_lock_component();
|
||||
ret = do_unshare(&share);
|
||||
host_unlock_component();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -27,6 +27,7 @@ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
|
||||
u64 hcr_set = HCR_RW;
|
||||
u64 hcr_clear = 0;
|
||||
u64 cptr_set = 0;
|
||||
u64 cptr_clear = 0;
|
||||
|
||||
/* Protected KVM does not support AArch32 guests. */
|
||||
BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0),
|
||||
@ -43,6 +44,9 @@ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
|
||||
BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD),
|
||||
PVM_ID_AA64PFR0_ALLOW));
|
||||
|
||||
if (has_hvhe())
|
||||
hcr_set |= HCR_E2H;
|
||||
|
||||
/* Trap RAS unless all current versions are supported */
|
||||
if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), feature_ids) <
|
||||
ID_AA64PFR0_EL1_RAS_V1P1) {
|
||||
@ -57,12 +61,17 @@ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
/* Trap SVE */
|
||||
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), feature_ids))
|
||||
cptr_set |= CPTR_EL2_TZ;
|
||||
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), feature_ids)) {
|
||||
if (has_hvhe())
|
||||
cptr_clear |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
|
||||
else
|
||||
cptr_set |= CPTR_EL2_TZ;
|
||||
}
|
||||
|
||||
vcpu->arch.hcr_el2 |= hcr_set;
|
||||
vcpu->arch.hcr_el2 &= ~hcr_clear;
|
||||
vcpu->arch.cptr_el2 |= cptr_set;
|
||||
vcpu->arch.cptr_el2 &= ~cptr_clear;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -120,8 +129,12 @@ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu)
|
||||
mdcr_set |= MDCR_EL2_TTRF;
|
||||
|
||||
/* Trap Trace */
|
||||
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceVer), feature_ids))
|
||||
cptr_set |= CPTR_EL2_TTA;
|
||||
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceVer), feature_ids)) {
|
||||
if (has_hvhe())
|
||||
cptr_set |= CPACR_EL1_TTA;
|
||||
else
|
||||
cptr_set |= CPTR_EL2_TTA;
|
||||
}
|
||||
|
||||
vcpu->arch.mdcr_el2 |= mdcr_set;
|
||||
vcpu->arch.mdcr_el2 &= ~mdcr_clear;
|
||||
@ -176,8 +189,10 @@ static void pvm_init_trap_regs(struct kvm_vcpu *vcpu)
|
||||
/* Clear res0 and set res1 bits to trap potential new features. */
|
||||
vcpu->arch.hcr_el2 &= ~(HCR_RES0);
|
||||
vcpu->arch.mdcr_el2 &= ~(MDCR_EL2_RES0);
|
||||
vcpu->arch.cptr_el2 |= CPTR_NVHE_EL2_RES1;
|
||||
vcpu->arch.cptr_el2 &= ~(CPTR_NVHE_EL2_RES0);
|
||||
if (!has_hvhe()) {
|
||||
vcpu->arch.cptr_el2 |= CPTR_NVHE_EL2_RES1;
|
||||
vcpu->arch.cptr_el2 &= ~(CPTR_NVHE_EL2_RES0);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <asm/kvm_pkvm.h>
|
||||
|
||||
#include <nvhe/early_alloc.h>
|
||||
#include <nvhe/ffa.h>
|
||||
#include <nvhe/fixed_config.h>
|
||||
#include <nvhe/gfp.h>
|
||||
#include <nvhe/memory.h>
|
||||
@ -28,6 +29,7 @@ static void *vmemmap_base;
|
||||
static void *vm_table_base;
|
||||
static void *hyp_pgt_base;
|
||||
static void *host_s2_pgt_base;
|
||||
static void *ffa_proxy_pages;
|
||||
static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops;
|
||||
static struct hyp_pool hpool;
|
||||
|
||||
@ -57,6 +59,11 @@ static int divide_memory_pool(void *virt, unsigned long size)
|
||||
if (!host_s2_pgt_base)
|
||||
return -ENOMEM;
|
||||
|
||||
nr_pages = hyp_ffa_proxy_pages();
|
||||
ffa_proxy_pages = hyp_early_alloc_contig(nr_pages);
|
||||
if (!ffa_proxy_pages)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -314,6 +321,10 @@ void __noreturn __pkvm_init_finalise(void)
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = hyp_ffa_init(ffa_proxy_pages);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
pkvm_hyp_vm_table_init(vm_table_base);
|
||||
out:
|
||||
/*
|
||||
|
@ -44,13 +44,24 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
|
||||
__activate_traps_common(vcpu);
|
||||
|
||||
val = vcpu->arch.cptr_el2;
|
||||
val |= CPTR_EL2_TTA | CPTR_EL2_TAM;
|
||||
val |= CPTR_EL2_TAM; /* Same bit irrespective of E2H */
|
||||
val |= has_hvhe() ? CPACR_EL1_TTA : CPTR_EL2_TTA;
|
||||
if (cpus_have_final_cap(ARM64_SME)) {
|
||||
if (has_hvhe())
|
||||
val &= ~(CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN);
|
||||
else
|
||||
val |= CPTR_EL2_TSM;
|
||||
}
|
||||
|
||||
if (!guest_owns_fp_regs(vcpu)) {
|
||||
val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
|
||||
if (has_hvhe())
|
||||
val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |
|
||||
CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN);
|
||||
else
|
||||
val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
|
||||
|
||||
__activate_traps_fpsimd32(vcpu);
|
||||
}
|
||||
if (cpus_have_final_cap(ARM64_SME))
|
||||
val |= CPTR_EL2_TSM;
|
||||
|
||||
write_sysreg(val, cptr_el2);
|
||||
write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2);
|
||||
@ -73,7 +84,6 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
|
||||
static void __deactivate_traps(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
extern char __kvm_hyp_host_vector[];
|
||||
u64 cptr;
|
||||
|
||||
___deactivate_traps(vcpu);
|
||||
|
||||
@ -98,13 +108,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
|
||||
|
||||
write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2);
|
||||
|
||||
cptr = CPTR_EL2_DEFAULT;
|
||||
if (vcpu_has_sve(vcpu) && (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED))
|
||||
cptr |= CPTR_EL2_TZ;
|
||||
if (cpus_have_final_cap(ARM64_SME))
|
||||
cptr &= ~CPTR_EL2_TSM;
|
||||
|
||||
write_sysreg(cptr, cptr_el2);
|
||||
kvm_reset_cptr_el2(vcpu);
|
||||
write_sysreg(__kvm_hyp_host_vector, vbar_el2);
|
||||
}
|
||||
|
||||
|
@ -17,21 +17,24 @@ void __kvm_timer_set_cntvoff(u64 cntvoff)
|
||||
}
|
||||
|
||||
/*
|
||||
* Should only be called on non-VHE systems.
|
||||
* Should only be called on non-VHE or hVHE setups.
|
||||
* VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe().
|
||||
*/
|
||||
void __timer_disable_traps(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 val;
|
||||
u64 val, shift = 0;
|
||||
|
||||
if (has_hvhe())
|
||||
shift = 10;
|
||||
|
||||
/* Allow physical timer/counter access for the host */
|
||||
val = read_sysreg(cnthctl_el2);
|
||||
val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
|
||||
val |= (CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN) << shift;
|
||||
write_sysreg(val, cnthctl_el2);
|
||||
}
|
||||
|
||||
/*
|
||||
* Should only be called on non-VHE systems.
|
||||
* Should only be called on non-VHE or hVHE setups.
|
||||
* VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe().
|
||||
*/
|
||||
void __timer_enable_traps(struct kvm_vcpu *vcpu)
|
||||
@ -50,5 +53,10 @@ void __timer_enable_traps(struct kvm_vcpu *vcpu)
|
||||
else
|
||||
clr |= CNTHCTL_EL1PCTEN;
|
||||
|
||||
if (has_hvhe()) {
|
||||
clr <<= 10;
|
||||
set <<= 10;
|
||||
}
|
||||
|
||||
sysreg_clear_set(cnthctl_el2, clr, set);
|
||||
}
|
||||
|
@ -130,6 +130,58 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
|
||||
__tlb_switch_to_host(&cxt);
|
||||
}
|
||||
|
||||
void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
|
||||
phys_addr_t ipa, int level)
|
||||
{
|
||||
struct tlb_inv_context cxt;
|
||||
|
||||
/* Switch to requested VMID */
|
||||
__tlb_switch_to_guest(mmu, &cxt, true);
|
||||
|
||||
/*
|
||||
* We could do so much better if we had the VA as well.
|
||||
* Instead, we invalidate Stage-2 for this IPA, and the
|
||||
* whole of Stage-1. Weep...
|
||||
*/
|
||||
ipa >>= 12;
|
||||
__tlbi_level(ipas2e1, ipa, level);
|
||||
|
||||
/*
|
||||
* We have to ensure completion of the invalidation at Stage-2,
|
||||
* since a table walk on another CPU could refill a TLB with a
|
||||
* complete (S1 + S2) walk based on the old Stage-2 mapping if
|
||||
* the Stage-1 invalidation happened first.
|
||||
*/
|
||||
dsb(nsh);
|
||||
__tlbi(vmalle1);
|
||||
dsb(nsh);
|
||||
isb();
|
||||
|
||||
/*
|
||||
* If the host is running at EL1 and we have a VPIPT I-cache,
|
||||
* then we must perform I-cache maintenance at EL2 in order for
|
||||
* it to have an effect on the guest. Since the guest cannot hit
|
||||
* I-cache lines allocated with a different VMID, we don't need
|
||||
* to worry about junk out of guest reset (we nuke the I-cache on
|
||||
* VMID rollover), but we do need to be careful when remapping
|
||||
* executable pages for the same guest. This can happen when KSM
|
||||
* takes a CoW fault on an executable page, copies the page into
|
||||
* a page that was previously mapped in the guest and then needs
|
||||
* to invalidate the guest view of the I-cache for that page
|
||||
* from EL1. To solve this, we invalidate the entire I-cache when
|
||||
* unmapping a page from a guest if we have a VPIPT I-cache but
|
||||
* the host is running at EL1. As above, we could do better if
|
||||
* we had the VA.
|
||||
*
|
||||
* The moral of this story is: if you have a VPIPT I-cache, then
|
||||
* you should be running with VHE enabled.
|
||||
*/
|
||||
if (icache_is_vpipt())
|
||||
icache_inval_all_pou();
|
||||
|
||||
__tlb_switch_to_host(&cxt);
|
||||
}
|
||||
|
||||
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
|
||||
{
|
||||
struct tlb_inv_context cxt;
|
||||
|
@ -21,8 +21,10 @@
|
||||
|
||||
#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2)
|
||||
#define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6)
|
||||
#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO 3
|
||||
#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW 1
|
||||
#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO \
|
||||
({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 2 : 3; })
|
||||
#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW \
|
||||
({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 0 : 1; })
|
||||
#define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8)
|
||||
#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3
|
||||
#define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10)
|
||||
@ -34,7 +36,7 @@
|
||||
#define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3
|
||||
#define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10)
|
||||
|
||||
#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 51)
|
||||
#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 50)
|
||||
|
||||
#define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55)
|
||||
|
||||
@ -42,6 +44,8 @@
|
||||
|
||||
#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
|
||||
|
||||
#define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50)
|
||||
|
||||
#define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \
|
||||
KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
|
||||
KVM_PTE_LEAF_ATTR_HI_S2_XN)
|
||||
@ -63,6 +67,16 @@ struct kvm_pgtable_walk_data {
|
||||
const u64 end;
|
||||
};
|
||||
|
||||
static bool kvm_pgtable_walk_skip_bbm_tlbi(const struct kvm_pgtable_visit_ctx *ctx)
|
||||
{
|
||||
return unlikely(ctx->flags & KVM_PGTABLE_WALK_SKIP_BBM_TLBI);
|
||||
}
|
||||
|
||||
static bool kvm_pgtable_walk_skip_cmo(const struct kvm_pgtable_visit_ctx *ctx)
|
||||
{
|
||||
return unlikely(ctx->flags & KVM_PGTABLE_WALK_SKIP_CMO);
|
||||
}
|
||||
|
||||
static bool kvm_phys_is_valid(u64 phys)
|
||||
{
|
||||
return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_EL1_PARANGE_MAX));
|
||||
@ -386,6 +400,9 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
|
||||
|
||||
if (device)
|
||||
return -EINVAL;
|
||||
|
||||
if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && system_supports_bti())
|
||||
attr |= KVM_PTE_LEAF_ATTR_HI_S1_GP;
|
||||
} else {
|
||||
attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN;
|
||||
}
|
||||
@ -623,10 +640,18 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
|
||||
#ifdef CONFIG_ARM64_HW_AFDBM
|
||||
/*
|
||||
* Enable the Hardware Access Flag management, unconditionally
|
||||
* on all CPUs. The features is RES0 on CPUs without the support
|
||||
* and must be ignored by the CPUs.
|
||||
* on all CPUs. In systems that have asymmetric support for the feature
|
||||
* this allows KVM to leverage hardware support on the subset of cores
|
||||
* that implement the feature.
|
||||
*
|
||||
* The architecture requires VTCR_EL2.HA to be RES0 (thus ignored by
|
||||
* hardware) on implementations that do not advertise support for the
|
||||
* feature. As such, setting HA unconditionally is safe, unless you
|
||||
* happen to be running on a design that has unadvertised support for
|
||||
* HAFDBS. Here be dragons.
|
||||
*/
|
||||
vtcr |= VTCR_EL2_HA;
|
||||
if (!cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
|
||||
vtcr |= VTCR_EL2_HA;
|
||||
#endif /* CONFIG_ARM64_HW_AFDBM */
|
||||
|
||||
/* Set the vmid bits */
|
||||
@ -755,14 +780,17 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
if (!stage2_try_set_pte(ctx, KVM_INVALID_PTE_LOCKED))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Perform the appropriate TLB invalidation based on the evicted pte
|
||||
* value (if any).
|
||||
*/
|
||||
if (kvm_pte_table(ctx->old, ctx->level))
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
|
||||
else if (kvm_pte_valid(ctx->old))
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level);
|
||||
if (!kvm_pgtable_walk_skip_bbm_tlbi(ctx)) {
|
||||
/*
|
||||
* Perform the appropriate TLB invalidation based on the
|
||||
* evicted pte value (if any).
|
||||
*/
|
||||
if (kvm_pte_table(ctx->old, ctx->level))
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
|
||||
else if (kvm_pte_valid(ctx->old))
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
|
||||
ctx->addr, ctx->level);
|
||||
}
|
||||
|
||||
if (stage2_pte_is_counted(ctx->old))
|
||||
mm_ops->put_page(ctx->ptep);
|
||||
@ -869,11 +897,13 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
return -EAGAIN;
|
||||
|
||||
/* Perform CMOs before installation of the guest stage-2 PTE */
|
||||
if (mm_ops->dcache_clean_inval_poc && stage2_pte_cacheable(pgt, new))
|
||||
if (!kvm_pgtable_walk_skip_cmo(ctx) && mm_ops->dcache_clean_inval_poc &&
|
||||
stage2_pte_cacheable(pgt, new))
|
||||
mm_ops->dcache_clean_inval_poc(kvm_pte_follow(new, mm_ops),
|
||||
granule);
|
||||
granule);
|
||||
|
||||
if (mm_ops->icache_inval_pou && stage2_pte_executable(new))
|
||||
if (!kvm_pgtable_walk_skip_cmo(ctx) && mm_ops->icache_inval_pou &&
|
||||
stage2_pte_executable(new))
|
||||
mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule);
|
||||
|
||||
stage2_make_pte(ctx, new);
|
||||
@ -895,7 +925,7 @@ static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mm_ops->free_removed_table(childp, ctx->level);
|
||||
mm_ops->free_unlinked_table(childp, ctx->level);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -940,7 +970,7 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
* The TABLE_PRE callback runs for table entries on the way down, looking
|
||||
* for table entries which we could conceivably replace with a block entry
|
||||
* for this mapping. If it finds one it replaces the entry and calls
|
||||
* kvm_pgtable_mm_ops::free_removed_table() to tear down the detached table.
|
||||
* kvm_pgtable_mm_ops::free_unlinked_table() to tear down the detached table.
|
||||
*
|
||||
* Otherwise, the LEAF callback performs the mapping at the existing leaves
|
||||
* instead.
|
||||
@ -1209,7 +1239,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
|
||||
KVM_PGTABLE_WALK_HANDLE_FAULT |
|
||||
KVM_PGTABLE_WALK_SHARED);
|
||||
if (!ret)
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level);
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa_nsh, pgt->mmu, addr, level);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1242,6 +1272,162 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
|
||||
return kvm_pgtable_walk(pgt, addr, size, &walker);
|
||||
}
|
||||
|
||||
kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
|
||||
u64 phys, u32 level,
|
||||
enum kvm_pgtable_prot prot,
|
||||
void *mc, bool force_pte)
|
||||
{
|
||||
struct stage2_map_data map_data = {
|
||||
.phys = phys,
|
||||
.mmu = pgt->mmu,
|
||||
.memcache = mc,
|
||||
.force_pte = force_pte,
|
||||
};
|
||||
struct kvm_pgtable_walker walker = {
|
||||
.cb = stage2_map_walker,
|
||||
.flags = KVM_PGTABLE_WALK_LEAF |
|
||||
KVM_PGTABLE_WALK_SKIP_BBM_TLBI |
|
||||
KVM_PGTABLE_WALK_SKIP_CMO,
|
||||
.arg = &map_data,
|
||||
};
|
||||
/*
|
||||
* The input address (.addr) is irrelevant for walking an
|
||||
* unlinked table. Construct an ambiguous IA range to map
|
||||
* kvm_granule_size(level) worth of memory.
|
||||
*/
|
||||
struct kvm_pgtable_walk_data data = {
|
||||
.walker = &walker,
|
||||
.addr = 0,
|
||||
.end = kvm_granule_size(level),
|
||||
};
|
||||
struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
|
||||
kvm_pte_t *pgtable;
|
||||
int ret;
|
||||
|
||||
if (!IS_ALIGNED(phys, kvm_granule_size(level)))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
ret = stage2_set_prot_attr(pgt, prot, &map_data.attr);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
pgtable = mm_ops->zalloc_page(mc);
|
||||
if (!pgtable)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
ret = __kvm_pgtable_walk(&data, mm_ops, (kvm_pteref_t)pgtable,
|
||||
level + 1);
|
||||
if (ret) {
|
||||
kvm_pgtable_stage2_free_unlinked(mm_ops, pgtable, level);
|
||||
mm_ops->put_page(pgtable);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
return pgtable;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the number of page-tables needed to replace a block with a
|
||||
* fully populated tree up to the PTE entries. Note that @level is
|
||||
* interpreted as in "level @level entry".
|
||||
*/
|
||||
static int stage2_block_get_nr_page_tables(u32 level)
|
||||
{
|
||||
switch (level) {
|
||||
case 1:
|
||||
return PTRS_PER_PTE + 1;
|
||||
case 2:
|
||||
return 1;
|
||||
case 3:
|
||||
return 0;
|
||||
default:
|
||||
WARN_ON_ONCE(level < KVM_PGTABLE_MIN_BLOCK_LEVEL ||
|
||||
level >= KVM_PGTABLE_MAX_LEVELS);
|
||||
return -EINVAL;
|
||||
};
|
||||
}
|
||||
|
||||
static int stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
enum kvm_pgtable_walk_flags visit)
|
||||
{
|
||||
struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
|
||||
struct kvm_mmu_memory_cache *mc = ctx->arg;
|
||||
struct kvm_s2_mmu *mmu;
|
||||
kvm_pte_t pte = ctx->old, new, *childp;
|
||||
enum kvm_pgtable_prot prot;
|
||||
u32 level = ctx->level;
|
||||
bool force_pte;
|
||||
int nr_pages;
|
||||
u64 phys;
|
||||
|
||||
/* No huge-pages exist at the last level */
|
||||
if (level == KVM_PGTABLE_MAX_LEVELS - 1)
|
||||
return 0;
|
||||
|
||||
/* We only split valid block mappings */
|
||||
if (!kvm_pte_valid(pte))
|
||||
return 0;
|
||||
|
||||
nr_pages = stage2_block_get_nr_page_tables(level);
|
||||
if (nr_pages < 0)
|
||||
return nr_pages;
|
||||
|
||||
if (mc->nobjs >= nr_pages) {
|
||||
/* Build a tree mapped down to the PTE granularity. */
|
||||
force_pte = true;
|
||||
} else {
|
||||
/*
|
||||
* Don't force PTEs, so create_unlinked() below does
|
||||
* not populate the tree up to the PTE level. The
|
||||
* consequence is that the call will require a single
|
||||
* page of level 2 entries at level 1, or a single
|
||||
* page of PTEs at level 2. If we are at level 1, the
|
||||
* PTEs will be created recursively.
|
||||
*/
|
||||
force_pte = false;
|
||||
nr_pages = 1;
|
||||
}
|
||||
|
||||
if (mc->nobjs < nr_pages)
|
||||
return -ENOMEM;
|
||||
|
||||
mmu = container_of(mc, struct kvm_s2_mmu, split_page_cache);
|
||||
phys = kvm_pte_to_phys(pte);
|
||||
prot = kvm_pgtable_stage2_pte_prot(pte);
|
||||
|
||||
childp = kvm_pgtable_stage2_create_unlinked(mmu->pgt, phys,
|
||||
level, prot, mc, force_pte);
|
||||
if (IS_ERR(childp))
|
||||
return PTR_ERR(childp);
|
||||
|
||||
if (!stage2_try_break_pte(ctx, mmu)) {
|
||||
kvm_pgtable_stage2_free_unlinked(mm_ops, childp, level);
|
||||
mm_ops->put_page(childp);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
/*
|
||||
* Note, the contents of the page table are guaranteed to be made
|
||||
* visible before the new PTE is assigned because stage2_make_pte()
|
||||
* writes the PTE using smp_store_release().
|
||||
*/
|
||||
new = kvm_init_table_pte(childp, mm_ops);
|
||||
stage2_make_pte(ctx, new);
|
||||
dsb(ishst);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
struct kvm_mmu_memory_cache *mc)
|
||||
{
|
||||
struct kvm_pgtable_walker walker = {
|
||||
.cb = stage2_split_walker,
|
||||
.flags = KVM_PGTABLE_WALK_LEAF,
|
||||
.arg = mc,
|
||||
};
|
||||
|
||||
return kvm_pgtable_walk(pgt, addr, size, &walker);
|
||||
}
|
||||
|
||||
int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
|
||||
struct kvm_pgtable_mm_ops *mm_ops,
|
||||
@ -1311,7 +1497,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
|
||||
pgt->pgd = NULL;
|
||||
}
|
||||
|
||||
void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level)
|
||||
void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level)
|
||||
{
|
||||
kvm_pteref_t ptep = (kvm_pteref_t)pgtable;
|
||||
struct kvm_pgtable_walker walker = {
|
||||
|
@ -84,7 +84,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
|
||||
|
||||
write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
|
||||
kvm_reset_cptr_el2(vcpu);
|
||||
|
||||
if (!arm64_kernel_unmapped_at_el0())
|
||||
host_vectors = __this_cpu_read(this_cpu_vector);
|
||||
|
@ -111,6 +111,38 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
|
||||
__tlb_switch_to_host(&cxt);
|
||||
}
|
||||
|
||||
void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
|
||||
phys_addr_t ipa, int level)
|
||||
{
|
||||
struct tlb_inv_context cxt;
|
||||
|
||||
dsb(nshst);
|
||||
|
||||
/* Switch to requested VMID */
|
||||
__tlb_switch_to_guest(mmu, &cxt);
|
||||
|
||||
/*
|
||||
* We could do so much better if we had the VA as well.
|
||||
* Instead, we invalidate Stage-2 for this IPA, and the
|
||||
* whole of Stage-1. Weep...
|
||||
*/
|
||||
ipa >>= 12;
|
||||
__tlbi_level(ipas2e1, ipa, level);
|
||||
|
||||
/*
|
||||
* We have to ensure completion of the invalidation at Stage-2,
|
||||
* since a table walk on another CPU could refill a TLB with a
|
||||
* complete (S1 + S2) walk based on the old Stage-2 mapping if
|
||||
* the Stage-1 invalidation happened first.
|
||||
*/
|
||||
dsb(nsh);
|
||||
__tlbi(vmalle1);
|
||||
dsb(nsh);
|
||||
isb();
|
||||
|
||||
__tlb_switch_to_host(&cxt);
|
||||
}
|
||||
|
||||
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
|
||||
{
|
||||
struct tlb_inv_context cxt;
|
||||
|
@ -31,14 +31,21 @@ static phys_addr_t __ro_after_init hyp_idmap_vector;
|
||||
|
||||
static unsigned long __ro_after_init io_map_base;
|
||||
|
||||
static phys_addr_t stage2_range_addr_end(phys_addr_t addr, phys_addr_t end)
|
||||
static phys_addr_t __stage2_range_addr_end(phys_addr_t addr, phys_addr_t end,
|
||||
phys_addr_t size)
|
||||
{
|
||||
phys_addr_t size = kvm_granule_size(KVM_PGTABLE_MIN_BLOCK_LEVEL);
|
||||
phys_addr_t boundary = ALIGN_DOWN(addr + size, size);
|
||||
|
||||
return (boundary - 1 < end - 1) ? boundary : end;
|
||||
}
|
||||
|
||||
static phys_addr_t stage2_range_addr_end(phys_addr_t addr, phys_addr_t end)
|
||||
{
|
||||
phys_addr_t size = kvm_granule_size(KVM_PGTABLE_MIN_BLOCK_LEVEL);
|
||||
|
||||
return __stage2_range_addr_end(addr, end, size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Release kvm_mmu_lock periodically if the memory region is large. Otherwise,
|
||||
* we may see kernel panics with CONFIG_DETECT_HUNG_TASK,
|
||||
@ -75,6 +82,79 @@ static int stage2_apply_range(struct kvm_s2_mmu *mmu, phys_addr_t addr,
|
||||
#define stage2_apply_range_resched(mmu, addr, end, fn) \
|
||||
stage2_apply_range(mmu, addr, end, fn, true)
|
||||
|
||||
/*
|
||||
* Get the maximum number of page-tables pages needed to split a range
|
||||
* of blocks into PAGE_SIZE PTEs. It assumes the range is already
|
||||
* mapped at level 2, or at level 1 if allowed.
|
||||
*/
|
||||
static int kvm_mmu_split_nr_page_tables(u64 range)
|
||||
{
|
||||
int n = 0;
|
||||
|
||||
if (KVM_PGTABLE_MIN_BLOCK_LEVEL < 2)
|
||||
n += DIV_ROUND_UP(range, PUD_SIZE);
|
||||
n += DIV_ROUND_UP(range, PMD_SIZE);
|
||||
return n;
|
||||
}
|
||||
|
||||
static bool need_split_memcache_topup_or_resched(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_mmu_memory_cache *cache;
|
||||
u64 chunk_size, min;
|
||||
|
||||
if (need_resched() || rwlock_needbreak(&kvm->mmu_lock))
|
||||
return true;
|
||||
|
||||
chunk_size = kvm->arch.mmu.split_page_chunk_size;
|
||||
min = kvm_mmu_split_nr_page_tables(chunk_size);
|
||||
cache = &kvm->arch.mmu.split_page_cache;
|
||||
return kvm_mmu_memory_cache_nr_free_objects(cache) < min;
|
||||
}
|
||||
|
||||
static int kvm_mmu_split_huge_pages(struct kvm *kvm, phys_addr_t addr,
|
||||
phys_addr_t end)
|
||||
{
|
||||
struct kvm_mmu_memory_cache *cache;
|
||||
struct kvm_pgtable *pgt;
|
||||
int ret, cache_capacity;
|
||||
u64 next, chunk_size;
|
||||
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
|
||||
chunk_size = kvm->arch.mmu.split_page_chunk_size;
|
||||
cache_capacity = kvm_mmu_split_nr_page_tables(chunk_size);
|
||||
|
||||
if (chunk_size == 0)
|
||||
return 0;
|
||||
|
||||
cache = &kvm->arch.mmu.split_page_cache;
|
||||
|
||||
do {
|
||||
if (need_split_memcache_topup_or_resched(kvm)) {
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
cond_resched();
|
||||
/* Eager page splitting is best-effort. */
|
||||
ret = __kvm_mmu_topup_memory_cache(cache,
|
||||
cache_capacity,
|
||||
cache_capacity);
|
||||
write_lock(&kvm->mmu_lock);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
pgt = kvm->arch.mmu.pgt;
|
||||
if (!pgt)
|
||||
return -EINVAL;
|
||||
|
||||
next = __stage2_range_addr_end(addr, end, chunk_size);
|
||||
ret = kvm_pgtable_stage2_split(pgt, addr, next - addr, cache);
|
||||
if (ret)
|
||||
break;
|
||||
} while (addr = next, addr != end);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool memslot_is_logging(struct kvm_memory_slot *memslot)
|
||||
{
|
||||
return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
|
||||
@ -131,21 +211,21 @@ static void kvm_s2_free_pages_exact(void *virt, size_t size)
|
||||
|
||||
static struct kvm_pgtable_mm_ops kvm_s2_mm_ops;
|
||||
|
||||
static void stage2_free_removed_table_rcu_cb(struct rcu_head *head)
|
||||
static void stage2_free_unlinked_table_rcu_cb(struct rcu_head *head)
|
||||
{
|
||||
struct page *page = container_of(head, struct page, rcu_head);
|
||||
void *pgtable = page_to_virt(page);
|
||||
u32 level = page_private(page);
|
||||
|
||||
kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, pgtable, level);
|
||||
kvm_pgtable_stage2_free_unlinked(&kvm_s2_mm_ops, pgtable, level);
|
||||
}
|
||||
|
||||
static void stage2_free_removed_table(void *addr, u32 level)
|
||||
static void stage2_free_unlinked_table(void *addr, u32 level)
|
||||
{
|
||||
struct page *page = virt_to_page(addr);
|
||||
|
||||
set_page_private(page, (unsigned long)level);
|
||||
call_rcu(&page->rcu_head, stage2_free_removed_table_rcu_cb);
|
||||
call_rcu(&page->rcu_head, stage2_free_unlinked_table_rcu_cb);
|
||||
}
|
||||
|
||||
static void kvm_host_get_page(void *addr)
|
||||
@ -701,7 +781,7 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
|
||||
.zalloc_page = stage2_memcache_zalloc_page,
|
||||
.zalloc_pages_exact = kvm_s2_zalloc_pages_exact,
|
||||
.free_pages_exact = kvm_s2_free_pages_exact,
|
||||
.free_removed_table = stage2_free_removed_table,
|
||||
.free_unlinked_table = stage2_free_unlinked_table,
|
||||
.get_page = kvm_host_get_page,
|
||||
.put_page = kvm_s2_put_page,
|
||||
.page_count = kvm_host_page_count,
|
||||
@ -775,6 +855,10 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
|
||||
for_each_possible_cpu(cpu)
|
||||
*per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1;
|
||||
|
||||
/* The eager page splitting is disabled by default */
|
||||
mmu->split_page_chunk_size = KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT;
|
||||
mmu->split_page_cache.gfp_zero = __GFP_ZERO;
|
||||
|
||||
mmu->pgt = pgt;
|
||||
mmu->pgd_phys = __pa(pgt->pgd);
|
||||
return 0;
|
||||
@ -786,6 +870,12 @@ out_free_pgtable:
|
||||
return err;
|
||||
}
|
||||
|
||||
void kvm_uninit_stage2_mmu(struct kvm *kvm)
|
||||
{
|
||||
kvm_free_stage2_pgd(&kvm->arch.mmu);
|
||||
kvm_mmu_free_memory_cache(&kvm->arch.mmu.split_page_cache);
|
||||
}
|
||||
|
||||
static void stage2_unmap_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot)
|
||||
{
|
||||
@ -989,17 +1079,45 @@ static void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_mmu_write_protect_pt_masked() - write protect dirty pages
|
||||
* kvm_mmu_split_memory_region() - split the stage 2 blocks into PAGE_SIZE
|
||||
* pages for memory slot
|
||||
* @kvm: The KVM pointer
|
||||
* @slot: The memory slot to split
|
||||
*
|
||||
* Acquires kvm->mmu_lock. Called with kvm->slots_lock mutex acquired,
|
||||
* serializing operations for VM memory regions.
|
||||
*/
|
||||
static void kvm_mmu_split_memory_region(struct kvm *kvm, int slot)
|
||||
{
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *memslot;
|
||||
phys_addr_t start, end;
|
||||
|
||||
lockdep_assert_held(&kvm->slots_lock);
|
||||
|
||||
slots = kvm_memslots(kvm);
|
||||
memslot = id_to_memslot(slots, slot);
|
||||
|
||||
start = memslot->base_gfn << PAGE_SHIFT;
|
||||
end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
|
||||
|
||||
write_lock(&kvm->mmu_lock);
|
||||
kvm_mmu_split_huge_pages(kvm, start, end);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* kvm_arch_mmu_enable_log_dirty_pt_masked() - enable dirty logging for selected pages.
|
||||
* @kvm: The KVM pointer
|
||||
* @slot: The memory slot associated with mask
|
||||
* @gfn_offset: The gfn offset in memory slot
|
||||
* @mask: The mask of dirty pages at offset 'gfn_offset' in this memory
|
||||
* slot to be write protected
|
||||
* @mask: The mask of pages at offset 'gfn_offset' in this memory
|
||||
* slot to enable dirty logging on
|
||||
*
|
||||
* Walks bits set in mask write protects the associated pte's. Caller must
|
||||
* acquire kvm_mmu_lock.
|
||||
* Writes protect selected pages to enable dirty logging, and then
|
||||
* splits them to PAGE_SIZE. Caller must acquire kvm->mmu_lock.
|
||||
*/
|
||||
static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
|
||||
void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
gfn_t gfn_offset, unsigned long mask)
|
||||
{
|
||||
@ -1007,21 +1125,20 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
|
||||
phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT;
|
||||
phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
|
||||
|
||||
stage2_wp_range(&kvm->arch.mmu, start, end);
|
||||
}
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
|
||||
/*
|
||||
* kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected
|
||||
* dirty pages.
|
||||
*
|
||||
* It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to
|
||||
* enable dirty logging for them.
|
||||
*/
|
||||
void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
gfn_t gfn_offset, unsigned long mask)
|
||||
{
|
||||
kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
|
||||
stage2_wp_range(&kvm->arch.mmu, start, end);
|
||||
|
||||
/*
|
||||
* Eager-splitting is done when manual-protect is set. We
|
||||
* also check for initially-all-set because we can avoid
|
||||
* eager-splitting if initially-all-set is false.
|
||||
* Initially-all-set equal false implies that huge-pages were
|
||||
* already split when enabling dirty logging: no need to do it
|
||||
* again.
|
||||
*/
|
||||
if (kvm_dirty_log_manual_protect_and_init_set(kvm))
|
||||
kvm_mmu_split_huge_pages(kvm, start, end);
|
||||
}
|
||||
|
||||
static void kvm_send_hwpoison_signal(unsigned long address, short lsb)
|
||||
@ -1790,20 +1907,42 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
bool log_dirty_pages = new && new->flags & KVM_MEM_LOG_DIRTY_PAGES;
|
||||
|
||||
/*
|
||||
* At this point memslot has been committed and there is an
|
||||
* allocated dirty_bitmap[], dirty pages will be tracked while the
|
||||
* memory slot is write protected.
|
||||
*/
|
||||
if (change != KVM_MR_DELETE && new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
|
||||
if (log_dirty_pages) {
|
||||
|
||||
if (change == KVM_MR_DELETE)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If we're with initial-all-set, we don't need to write
|
||||
* protect any pages because they're all reported as dirty.
|
||||
* Huge pages and normal pages will be write protect gradually.
|
||||
* Huge and normal pages are write-protected and split
|
||||
* on either of these two cases:
|
||||
*
|
||||
* 1. with initial-all-set: gradually with CLEAR ioctls,
|
||||
*/
|
||||
if (!kvm_dirty_log_manual_protect_and_init_set(kvm)) {
|
||||
kvm_mmu_wp_memory_region(kvm, new->id);
|
||||
}
|
||||
if (kvm_dirty_log_manual_protect_and_init_set(kvm))
|
||||
return;
|
||||
/*
|
||||
* or
|
||||
* 2. without initial-all-set: all in one shot when
|
||||
* enabling dirty logging.
|
||||
*/
|
||||
kvm_mmu_wp_memory_region(kvm, new->id);
|
||||
kvm_mmu_split_memory_region(kvm, new->id);
|
||||
} else {
|
||||
/*
|
||||
* Free any leftovers from the eager page splitting cache. Do
|
||||
* this when deleting, moving, disabling dirty logging, or
|
||||
* creating the memslot (a nop). Doing it for deletes makes
|
||||
* sure we don't leak memory, and there's no need to keep the
|
||||
* cache around for any of the other cases.
|
||||
*/
|
||||
kvm_mmu_free_memory_cache(&kvm->arch.mmu.split_page_cache);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1877,7 +2016,7 @@ void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
|
||||
|
||||
void kvm_arch_flush_shadow_all(struct kvm *kvm)
|
||||
{
|
||||
kvm_free_stage2_pgd(&kvm->arch.mmu);
|
||||
kvm_uninit_stage2_mmu(kvm);
|
||||
}
|
||||
|
||||
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||
|
@ -78,6 +78,7 @@ void __init kvm_hyp_reserve(void)
|
||||
hyp_mem_pages += host_s2_pgtable_pages();
|
||||
hyp_mem_pages += hyp_vm_table_pages();
|
||||
hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
|
||||
hyp_mem_pages += hyp_ffa_proxy_pages();
|
||||
|
||||
/*
|
||||
* Try to allocate a PMD-aligned region to reduce TLB pressure once
|
||||
|
@ -186,57 +186,6 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_set_vm_width() - set the register width for the guest
|
||||
* @vcpu: Pointer to the vcpu being configured
|
||||
*
|
||||
* Set both KVM_ARCH_FLAG_EL1_32BIT and KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED
|
||||
* in the VM flags based on the vcpu's requested register width, the HW
|
||||
* capabilities and other options (such as MTE).
|
||||
* When REG_WIDTH_CONFIGURED is already set, the vcpu settings must be
|
||||
* consistent with the value of the FLAG_EL1_32BIT bit in the flags.
|
||||
*
|
||||
* Return: 0 on success, negative error code on failure.
|
||||
*/
|
||||
static int kvm_set_vm_width(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
bool is32bit;
|
||||
|
||||
is32bit = vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT);
|
||||
|
||||
lockdep_assert_held(&kvm->arch.config_lock);
|
||||
|
||||
if (test_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, &kvm->arch.flags)) {
|
||||
/*
|
||||
* The guest's register width is already configured.
|
||||
* Make sure that the vcpu is consistent with it.
|
||||
*/
|
||||
if (is32bit == test_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags))
|
||||
return 0;
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit)
|
||||
return -EINVAL;
|
||||
|
||||
/* MTE is incompatible with AArch32 */
|
||||
if (kvm_has_mte(kvm) && is32bit)
|
||||
return -EINVAL;
|
||||
|
||||
/* NV is incompatible with AArch32 */
|
||||
if (vcpu_has_nv(vcpu) && is32bit)
|
||||
return -EINVAL;
|
||||
|
||||
if (is32bit)
|
||||
set_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags);
|
||||
|
||||
set_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, &kvm->arch.flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_reset_vcpu - sets core registers and sys_regs to reset value
|
||||
* @vcpu: The VCPU pointer
|
||||
@ -262,13 +211,6 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
|
||||
bool loaded;
|
||||
u32 pstate;
|
||||
|
||||
mutex_lock(&vcpu->kvm->arch.config_lock);
|
||||
ret = kvm_set_vm_width(vcpu);
|
||||
mutex_unlock(&vcpu->kvm->arch.config_lock);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
spin_lock(&vcpu->arch.mp_state_lock);
|
||||
reset_state = vcpu->arch.reset_state;
|
||||
vcpu->arch.reset_state.reset = false;
|
||||
|
@ -42,6 +42,8 @@
|
||||
*/
|
||||
|
||||
static u64 sys_reg_to_index(const struct sys_reg_desc *reg);
|
||||
static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
u64 val);
|
||||
|
||||
static bool read_from_write_only(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *params,
|
||||
@ -553,10 +555,11 @@ static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void reset_bvr(struct kvm_vcpu *vcpu,
|
||||
static u64 reset_bvr(struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *rd)
|
||||
{
|
||||
vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm] = rd->val;
|
||||
return rd->val;
|
||||
}
|
||||
|
||||
static bool trap_bcr(struct kvm_vcpu *vcpu,
|
||||
@ -589,10 +592,11 @@ static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void reset_bcr(struct kvm_vcpu *vcpu,
|
||||
static u64 reset_bcr(struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *rd)
|
||||
{
|
||||
vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm] = rd->val;
|
||||
return rd->val;
|
||||
}
|
||||
|
||||
static bool trap_wvr(struct kvm_vcpu *vcpu,
|
||||
@ -626,10 +630,11 @@ static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void reset_wvr(struct kvm_vcpu *vcpu,
|
||||
static u64 reset_wvr(struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *rd)
|
||||
{
|
||||
vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm] = rd->val;
|
||||
return rd->val;
|
||||
}
|
||||
|
||||
static bool trap_wcr(struct kvm_vcpu *vcpu,
|
||||
@ -662,25 +667,28 @@ static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void reset_wcr(struct kvm_vcpu *vcpu,
|
||||
static u64 reset_wcr(struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *rd)
|
||||
{
|
||||
vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm] = rd->val;
|
||||
return rd->val;
|
||||
}
|
||||
|
||||
static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
static u64 reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
{
|
||||
u64 amair = read_sysreg(amair_el1);
|
||||
vcpu_write_sys_reg(vcpu, amair, AMAIR_EL1);
|
||||
return amair;
|
||||
}
|
||||
|
||||
static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
static u64 reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
{
|
||||
u64 actlr = read_sysreg(actlr_el1);
|
||||
vcpu_write_sys_reg(vcpu, actlr, ACTLR_EL1);
|
||||
return actlr;
|
||||
}
|
||||
|
||||
static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
static u64 reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
{
|
||||
u64 mpidr;
|
||||
|
||||
@ -694,7 +702,10 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
mpidr = (vcpu->vcpu_id & 0x0f) << MPIDR_LEVEL_SHIFT(0);
|
||||
mpidr |= ((vcpu->vcpu_id >> 4) & 0xff) << MPIDR_LEVEL_SHIFT(1);
|
||||
mpidr |= ((vcpu->vcpu_id >> 12) & 0xff) << MPIDR_LEVEL_SHIFT(2);
|
||||
vcpu_write_sys_reg(vcpu, (1ULL << 31) | mpidr, MPIDR_EL1);
|
||||
mpidr |= (1ULL << 31);
|
||||
vcpu_write_sys_reg(vcpu, mpidr, MPIDR_EL1);
|
||||
|
||||
return mpidr;
|
||||
}
|
||||
|
||||
static unsigned int pmu_visibility(const struct kvm_vcpu *vcpu,
|
||||
@ -706,13 +717,13 @@ static unsigned int pmu_visibility(const struct kvm_vcpu *vcpu,
|
||||
return REG_HIDDEN;
|
||||
}
|
||||
|
||||
static void reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
static u64 reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
{
|
||||
u64 n, mask = BIT(ARMV8_PMU_CYCLE_IDX);
|
||||
|
||||
/* No PMU available, any PMU reg may UNDEF... */
|
||||
if (!kvm_arm_support_pmu_v3())
|
||||
return;
|
||||
return 0;
|
||||
|
||||
n = read_sysreg(pmcr_el0) >> ARMV8_PMU_PMCR_N_SHIFT;
|
||||
n &= ARMV8_PMU_PMCR_N_MASK;
|
||||
@ -721,33 +732,41 @@ static void reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
|
||||
reset_unknown(vcpu, r);
|
||||
__vcpu_sys_reg(vcpu, r->reg) &= mask;
|
||||
|
||||
return __vcpu_sys_reg(vcpu, r->reg);
|
||||
}
|
||||
|
||||
static void reset_pmevcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
static u64 reset_pmevcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
{
|
||||
reset_unknown(vcpu, r);
|
||||
__vcpu_sys_reg(vcpu, r->reg) &= GENMASK(31, 0);
|
||||
|
||||
return __vcpu_sys_reg(vcpu, r->reg);
|
||||
}
|
||||
|
||||
static void reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
static u64 reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
{
|
||||
reset_unknown(vcpu, r);
|
||||
__vcpu_sys_reg(vcpu, r->reg) &= ARMV8_PMU_EVTYPE_MASK;
|
||||
|
||||
return __vcpu_sys_reg(vcpu, r->reg);
|
||||
}
|
||||
|
||||
static void reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
static u64 reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
{
|
||||
reset_unknown(vcpu, r);
|
||||
__vcpu_sys_reg(vcpu, r->reg) &= ARMV8_PMU_COUNTER_MASK;
|
||||
|
||||
return __vcpu_sys_reg(vcpu, r->reg);
|
||||
}
|
||||
|
||||
static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
static u64 reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
{
|
||||
u64 pmcr;
|
||||
|
||||
/* No PMU available, PMCR_EL0 may UNDEF... */
|
||||
if (!kvm_arm_support_pmu_v3())
|
||||
return;
|
||||
return 0;
|
||||
|
||||
/* Only preserve PMCR_EL0.N, and reset the rest to 0 */
|
||||
pmcr = read_sysreg(pmcr_el0) & (ARMV8_PMU_PMCR_N_MASK << ARMV8_PMU_PMCR_N_SHIFT);
|
||||
@ -755,6 +774,8 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
pmcr |= ARMV8_PMU_PMCR_LC;
|
||||
|
||||
__vcpu_sys_reg(vcpu, r->reg) = pmcr;
|
||||
|
||||
return __vcpu_sys_reg(vcpu, r->reg);
|
||||
}
|
||||
|
||||
static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags)
|
||||
@ -1187,25 +1208,89 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
|
||||
return true;
|
||||
}
|
||||
|
||||
static u8 vcpu_pmuver(const struct kvm_vcpu *vcpu)
|
||||
static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp,
|
||||
s64 new, s64 cur)
|
||||
{
|
||||
if (kvm_vcpu_has_pmu(vcpu))
|
||||
return vcpu->kvm->arch.dfr0_pmuver.imp;
|
||||
struct arm64_ftr_bits kvm_ftr = *ftrp;
|
||||
|
||||
return vcpu->kvm->arch.dfr0_pmuver.unimp;
|
||||
/* Some features have different safe value type in KVM than host features */
|
||||
switch (id) {
|
||||
case SYS_ID_AA64DFR0_EL1:
|
||||
if (kvm_ftr.shift == ID_AA64DFR0_EL1_PMUVer_SHIFT)
|
||||
kvm_ftr.type = FTR_LOWER_SAFE;
|
||||
break;
|
||||
case SYS_ID_DFR0_EL1:
|
||||
if (kvm_ftr.shift == ID_DFR0_EL1_PerfMon_SHIFT)
|
||||
kvm_ftr.type = FTR_LOWER_SAFE;
|
||||
break;
|
||||
}
|
||||
|
||||
return arm64_ftr_safe_value(&kvm_ftr, new, cur);
|
||||
}
|
||||
|
||||
static u8 perfmon_to_pmuver(u8 perfmon)
|
||||
/**
|
||||
* arm64_check_features() - Check if a feature register value constitutes
|
||||
* a subset of features indicated by the idreg's KVM sanitised limit.
|
||||
*
|
||||
* This function will check if each feature field of @val is the "safe" value
|
||||
* against idreg's KVM sanitised limit return from reset() callback.
|
||||
* If a field value in @val is the same as the one in limit, it is always
|
||||
* considered the safe value regardless For register fields that are not in
|
||||
* writable, only the value in limit is considered the safe value.
|
||||
*
|
||||
* Return: 0 if all the fields are safe. Otherwise, return negative errno.
|
||||
*/
|
||||
static int arm64_check_features(struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *rd,
|
||||
u64 val)
|
||||
{
|
||||
switch (perfmon) {
|
||||
case ID_DFR0_EL1_PerfMon_PMUv3:
|
||||
return ID_AA64DFR0_EL1_PMUVer_IMP;
|
||||
case ID_DFR0_EL1_PerfMon_IMPDEF:
|
||||
return ID_AA64DFR0_EL1_PMUVer_IMP_DEF;
|
||||
default:
|
||||
/* Anything ARMv8.1+ and NI have the same value. For now. */
|
||||
return perfmon;
|
||||
const struct arm64_ftr_reg *ftr_reg;
|
||||
const struct arm64_ftr_bits *ftrp = NULL;
|
||||
u32 id = reg_to_encoding(rd);
|
||||
u64 writable_mask = rd->val;
|
||||
u64 limit = rd->reset(vcpu, rd);
|
||||
u64 mask = 0;
|
||||
|
||||
/*
|
||||
* Hidden and unallocated ID registers may not have a corresponding
|
||||
* struct arm64_ftr_reg. Of course, if the register is RAZ we know the
|
||||
* only safe value is 0.
|
||||
*/
|
||||
if (sysreg_visible_as_raz(vcpu, rd))
|
||||
return val ? -E2BIG : 0;
|
||||
|
||||
ftr_reg = get_arm64_ftr_reg(id);
|
||||
if (!ftr_reg)
|
||||
return -EINVAL;
|
||||
|
||||
ftrp = ftr_reg->ftr_bits;
|
||||
|
||||
for (; ftrp && ftrp->width; ftrp++) {
|
||||
s64 f_val, f_lim, safe_val;
|
||||
u64 ftr_mask;
|
||||
|
||||
ftr_mask = arm64_ftr_mask(ftrp);
|
||||
if ((ftr_mask & writable_mask) != ftr_mask)
|
||||
continue;
|
||||
|
||||
f_val = arm64_ftr_value(ftrp, val);
|
||||
f_lim = arm64_ftr_value(ftrp, limit);
|
||||
mask |= ftr_mask;
|
||||
|
||||
if (f_val == f_lim)
|
||||
safe_val = f_val;
|
||||
else
|
||||
safe_val = kvm_arm64_ftr_safe_value(id, ftrp, f_val, f_lim);
|
||||
|
||||
if (safe_val != f_val)
|
||||
return -E2BIG;
|
||||
}
|
||||
|
||||
/* For fields that are not writable, values in limit are the safe values. */
|
||||
if ((val & ~mask) != (limit & ~mask))
|
||||
return -E2BIG;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u8 pmuver_to_perfmon(u8 pmuver)
|
||||
@ -1222,7 +1307,8 @@ static u8 pmuver_to_perfmon(u8 pmuver)
|
||||
}
|
||||
|
||||
/* Read a sanitised cpufeature ID register by sys_reg_desc */
|
||||
static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r)
|
||||
static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
u32 id = reg_to_encoding(r);
|
||||
u64 val;
|
||||
@ -1233,19 +1319,6 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r
|
||||
val = read_sanitised_ftr_reg(id);
|
||||
|
||||
switch (id) {
|
||||
case SYS_ID_AA64PFR0_EL1:
|
||||
if (!vcpu_has_sve(vcpu))
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE);
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AMU);
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2);
|
||||
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2);
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3);
|
||||
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3);
|
||||
if (kvm_vgic_global_state.type == VGIC_V3) {
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC);
|
||||
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), 1);
|
||||
}
|
||||
break;
|
||||
case SYS_ID_AA64PFR1_EL1:
|
||||
if (!kvm_has_mte(vcpu->kvm))
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE);
|
||||
@ -1266,22 +1339,6 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r
|
||||
if (!cpus_have_final_cap(ARM64_HAS_WFXT))
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT);
|
||||
break;
|
||||
case SYS_ID_AA64DFR0_EL1:
|
||||
/* Limit debug to ARMv8.0 */
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer);
|
||||
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), 6);
|
||||
/* Set PMUver to the required version */
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer);
|
||||
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer),
|
||||
vcpu_pmuver(vcpu));
|
||||
/* Hide SPE from guests */
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer);
|
||||
break;
|
||||
case SYS_ID_DFR0_EL1:
|
||||
val &= ~ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon);
|
||||
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon),
|
||||
pmuver_to_perfmon(vcpu_pmuver(vcpu)));
|
||||
break;
|
||||
case SYS_ID_AA64MMFR2_EL1:
|
||||
val &= ~ID_AA64MMFR2_EL1_CCIDX_MASK;
|
||||
break;
|
||||
@ -1293,6 +1350,28 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r
|
||||
return val;
|
||||
}
|
||||
|
||||
static u64 kvm_read_sanitised_id_reg(struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
return __kvm_read_sanitised_id_reg(vcpu, r);
|
||||
}
|
||||
|
||||
static u64 read_id_reg(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
{
|
||||
return IDREG(vcpu->kvm, reg_to_encoding(r));
|
||||
}
|
||||
|
||||
/*
|
||||
* Return true if the register's (Op0, Op1, CRn, CRm, Op2) is
|
||||
* (3, 0, 0, crm, op2), where 1<=crm<8, 0<=op2<8.
|
||||
*/
|
||||
static inline bool is_id_reg(u32 id)
|
||||
{
|
||||
return (sys_reg_Op0(id) == 3 && sys_reg_Op1(id) == 0 &&
|
||||
sys_reg_CRn(id) == 0 && sys_reg_CRm(id) >= 1 &&
|
||||
sys_reg_CRm(id) < 8);
|
||||
}
|
||||
|
||||
static unsigned int id_visibility(const struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
@ -1354,88 +1433,113 @@ static unsigned int sve_visibility(const struct kvm_vcpu *vcpu,
|
||||
return REG_HIDDEN;
|
||||
}
|
||||
|
||||
static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *rd,
|
||||
u64 val)
|
||||
static u64 read_sanitised_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *rd)
|
||||
{
|
||||
u8 csv2, csv3;
|
||||
u64 val = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
|
||||
|
||||
if (!vcpu_has_sve(vcpu))
|
||||
val &= ~ID_AA64PFR0_EL1_SVE_MASK;
|
||||
|
||||
/*
|
||||
* Allow AA64PFR0_EL1.CSV2 to be set from userspace as long as
|
||||
* it doesn't promise more than what is actually provided (the
|
||||
* guest could otherwise be covered in ectoplasmic residue).
|
||||
* The default is to expose CSV2 == 1 if the HW isn't affected.
|
||||
* Although this is a per-CPU feature, we make it global because
|
||||
* asymmetric systems are just a nuisance.
|
||||
*
|
||||
* Userspace can override this as long as it doesn't promise
|
||||
* the impossible.
|
||||
*/
|
||||
csv2 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_EL1_CSV2_SHIFT);
|
||||
if (csv2 > 1 ||
|
||||
(csv2 && arm64_get_spectre_v2_state() != SPECTRE_UNAFFECTED))
|
||||
return -EINVAL;
|
||||
if (arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED) {
|
||||
val &= ~ID_AA64PFR0_EL1_CSV2_MASK;
|
||||
val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, CSV2, IMP);
|
||||
}
|
||||
if (arm64_get_meltdown_state() == SPECTRE_UNAFFECTED) {
|
||||
val &= ~ID_AA64PFR0_EL1_CSV3_MASK;
|
||||
val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, CSV3, IMP);
|
||||
}
|
||||
|
||||
/* Same thing for CSV3 */
|
||||
csv3 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_EL1_CSV3_SHIFT);
|
||||
if (csv3 > 1 ||
|
||||
(csv3 && arm64_get_meltdown_state() != SPECTRE_UNAFFECTED))
|
||||
return -EINVAL;
|
||||
if (kvm_vgic_global_state.type == VGIC_V3) {
|
||||
val &= ~ID_AA64PFR0_EL1_GIC_MASK;
|
||||
val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, GIC, IMP);
|
||||
}
|
||||
|
||||
/* We can only differ with CSV[23], and anything else is an error */
|
||||
val ^= read_id_reg(vcpu, rd);
|
||||
val &= ~(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) |
|
||||
ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3));
|
||||
if (val)
|
||||
return -EINVAL;
|
||||
val &= ~ID_AA64PFR0_EL1_AMU_MASK;
|
||||
|
||||
vcpu->kvm->arch.pfr0_csv2 = csv2;
|
||||
vcpu->kvm->arch.pfr0_csv3 = csv3;
|
||||
return val;
|
||||
}
|
||||
|
||||
return 0;
|
||||
static u64 read_sanitised_id_aa64dfr0_el1(struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *rd)
|
||||
{
|
||||
u64 val = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
|
||||
|
||||
/* Limit debug to ARMv8.0 */
|
||||
val &= ~ID_AA64DFR0_EL1_DebugVer_MASK;
|
||||
val |= SYS_FIELD_PREP_ENUM(ID_AA64DFR0_EL1, DebugVer, IMP);
|
||||
|
||||
/*
|
||||
* Only initialize the PMU version if the vCPU was configured with one.
|
||||
*/
|
||||
val &= ~ID_AA64DFR0_EL1_PMUVer_MASK;
|
||||
if (kvm_vcpu_has_pmu(vcpu))
|
||||
val |= SYS_FIELD_PREP(ID_AA64DFR0_EL1, PMUVer,
|
||||
kvm_arm_pmu_get_pmuver_limit());
|
||||
|
||||
/* Hide SPE from guests */
|
||||
val &= ~ID_AA64DFR0_EL1_PMSVer_MASK;
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *rd,
|
||||
u64 val)
|
||||
{
|
||||
u8 pmuver, host_pmuver;
|
||||
bool valid_pmu;
|
||||
|
||||
host_pmuver = kvm_arm_pmu_get_pmuver_limit();
|
||||
u8 pmuver = SYS_FIELD_GET(ID_AA64DFR0_EL1, PMUVer, val);
|
||||
|
||||
/*
|
||||
* Allow AA64DFR0_EL1.PMUver to be set from userspace as long
|
||||
* as it doesn't promise more than what the HW gives us. We
|
||||
* allow an IMPDEF PMU though, only if no PMU is supported
|
||||
* (KVM backward compatibility handling).
|
||||
* Prior to commit 3d0dba5764b9 ("KVM: arm64: PMU: Move the
|
||||
* ID_AA64DFR0_EL1.PMUver limit to VM creation"), KVM erroneously
|
||||
* exposed an IMP_DEF PMU to userspace and the guest on systems w/
|
||||
* non-architectural PMUs. Of course, PMUv3 is the only game in town for
|
||||
* PMU virtualization, so the IMP_DEF value was rather user-hostile.
|
||||
*
|
||||
* At minimum, we're on the hook to allow values that were given to
|
||||
* userspace by KVM. Cover our tracks here and replace the IMP_DEF value
|
||||
* with a more sensible NI. The value of an ID register changing under
|
||||
* the nose of the guest is unfortunate, but is certainly no more
|
||||
* surprising than an ill-guided PMU driver poking at impdef system
|
||||
* registers that end in an UNDEF...
|
||||
*/
|
||||
pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), val);
|
||||
if ((pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF && pmuver > host_pmuver))
|
||||
return -EINVAL;
|
||||
if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
|
||||
val &= ~ID_AA64DFR0_EL1_PMUVer_MASK;
|
||||
|
||||
valid_pmu = (pmuver != 0 && pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF);
|
||||
return set_id_reg(vcpu, rd, val);
|
||||
}
|
||||
|
||||
/* Make sure view register and PMU support do match */
|
||||
if (kvm_vcpu_has_pmu(vcpu) != valid_pmu)
|
||||
return -EINVAL;
|
||||
static u64 read_sanitised_id_dfr0_el1(struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *rd)
|
||||
{
|
||||
u8 perfmon = pmuver_to_perfmon(kvm_arm_pmu_get_pmuver_limit());
|
||||
u64 val = read_sanitised_ftr_reg(SYS_ID_DFR0_EL1);
|
||||
|
||||
/* We can only differ with PMUver, and anything else is an error */
|
||||
val ^= read_id_reg(vcpu, rd);
|
||||
val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer);
|
||||
if (val)
|
||||
return -EINVAL;
|
||||
val &= ~ID_DFR0_EL1_PerfMon_MASK;
|
||||
if (kvm_vcpu_has_pmu(vcpu))
|
||||
val |= SYS_FIELD_PREP(ID_DFR0_EL1, PerfMon, perfmon);
|
||||
|
||||
if (valid_pmu)
|
||||
vcpu->kvm->arch.dfr0_pmuver.imp = pmuver;
|
||||
else
|
||||
vcpu->kvm->arch.dfr0_pmuver.unimp = pmuver;
|
||||
|
||||
return 0;
|
||||
return val;
|
||||
}
|
||||
|
||||
static int set_id_dfr0_el1(struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *rd,
|
||||
u64 val)
|
||||
{
|
||||
u8 perfmon, host_perfmon;
|
||||
bool valid_pmu;
|
||||
u8 perfmon = SYS_FIELD_GET(ID_DFR0_EL1, PerfMon, val);
|
||||
|
||||
host_perfmon = pmuver_to_perfmon(kvm_arm_pmu_get_pmuver_limit());
|
||||
if (perfmon == ID_DFR0_EL1_PerfMon_IMPDEF) {
|
||||
val &= ~ID_DFR0_EL1_PerfMon_MASK;
|
||||
perfmon = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allow DFR0_EL1.PerfMon to be set from userspace as long as
|
||||
@ -1443,29 +1547,10 @@ static int set_id_dfr0_el1(struct kvm_vcpu *vcpu,
|
||||
* AArch64 side (as everything is emulated with that), and
|
||||
* that this is a PMUv3.
|
||||
*/
|
||||
perfmon = FIELD_GET(ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon), val);
|
||||
if ((perfmon != ID_DFR0_EL1_PerfMon_IMPDEF && perfmon > host_perfmon) ||
|
||||
(perfmon != 0 && perfmon < ID_DFR0_EL1_PerfMon_PMUv3))
|
||||
if (perfmon != 0 && perfmon < ID_DFR0_EL1_PerfMon_PMUv3)
|
||||
return -EINVAL;
|
||||
|
||||
valid_pmu = (perfmon != 0 && perfmon != ID_DFR0_EL1_PerfMon_IMPDEF);
|
||||
|
||||
/* Make sure view register and PMU support do match */
|
||||
if (kvm_vcpu_has_pmu(vcpu) != valid_pmu)
|
||||
return -EINVAL;
|
||||
|
||||
/* We can only differ with PerfMon, and anything else is an error */
|
||||
val ^= read_id_reg(vcpu, rd);
|
||||
val &= ~ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon);
|
||||
if (val)
|
||||
return -EINVAL;
|
||||
|
||||
if (valid_pmu)
|
||||
vcpu->kvm->arch.dfr0_pmuver.imp = perfmon_to_pmuver(perfmon);
|
||||
else
|
||||
vcpu->kvm->arch.dfr0_pmuver.unimp = perfmon_to_pmuver(perfmon);
|
||||
|
||||
return 0;
|
||||
return set_id_reg(vcpu, rd, val);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1478,18 +1563,60 @@ static int set_id_dfr0_el1(struct kvm_vcpu *vcpu,
|
||||
static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
u64 *val)
|
||||
{
|
||||
/*
|
||||
* Avoid locking if the VM has already started, as the ID registers are
|
||||
* guaranteed to be invariant at that point.
|
||||
*/
|
||||
if (kvm_vm_has_ran_once(vcpu->kvm)) {
|
||||
*val = read_id_reg(vcpu, rd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
mutex_lock(&vcpu->kvm->arch.config_lock);
|
||||
*val = read_id_reg(vcpu, rd);
|
||||
mutex_unlock(&vcpu->kvm->arch.config_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
u64 val)
|
||||
{
|
||||
/* This is what we mean by invariant: you can't change it. */
|
||||
if (val != read_id_reg(vcpu, rd))
|
||||
return -EINVAL;
|
||||
u32 id = reg_to_encoding(rd);
|
||||
int ret;
|
||||
|
||||
return 0;
|
||||
mutex_lock(&vcpu->kvm->arch.config_lock);
|
||||
|
||||
/*
|
||||
* Once the VM has started the ID registers are immutable. Reject any
|
||||
* write that does not match the final register value.
|
||||
*/
|
||||
if (kvm_vm_has_ran_once(vcpu->kvm)) {
|
||||
if (val != read_id_reg(vcpu, rd))
|
||||
ret = -EBUSY;
|
||||
else
|
||||
ret = 0;
|
||||
|
||||
mutex_unlock(&vcpu->kvm->arch.config_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = arm64_check_features(vcpu, rd, val);
|
||||
if (!ret)
|
||||
IDREG(vcpu->kvm, id) = val;
|
||||
|
||||
mutex_unlock(&vcpu->kvm->arch.config_lock);
|
||||
|
||||
/*
|
||||
* arm64_check_features() returns -E2BIG to indicate the register's
|
||||
* feature set is a superset of the maximally-allowed register value.
|
||||
* While it would be nice to precisely describe this to userspace, the
|
||||
* existing UAPI for KVM_SET_ONE_REG has it that invalid register
|
||||
* writes return -EINVAL.
|
||||
*/
|
||||
if (ret == -E2BIG)
|
||||
ret = -EINVAL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int get_raz_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
@ -1529,7 +1656,7 @@ static bool access_clidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
* Fabricate a CLIDR_EL1 value instead of using the real value, which can vary
|
||||
* by the physical CPU which the vcpu currently resides in.
|
||||
*/
|
||||
static void reset_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
static u64 reset_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
{
|
||||
u64 ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
|
||||
u64 clidr;
|
||||
@ -1577,6 +1704,8 @@ static void reset_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
clidr |= 2 << CLIDR_TTYPE_SHIFT(loc);
|
||||
|
||||
__vcpu_sys_reg(vcpu, r->reg) = clidr;
|
||||
|
||||
return __vcpu_sys_reg(vcpu, r->reg);
|
||||
}
|
||||
|
||||
static int set_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
@ -1676,6 +1805,17 @@ static unsigned int elx2_visibility(const struct kvm_vcpu *vcpu,
|
||||
.visibility = elx2_visibility, \
|
||||
}
|
||||
|
||||
/*
|
||||
* Since reset() callback and field val are not used for idregs, they will be
|
||||
* used for specific purposes for idregs.
|
||||
* The reset() would return KVM sanitised register value. The value would be the
|
||||
* same as the host kernel sanitised value if there is no KVM sanitisation.
|
||||
* The val would be used as a mask indicating writable fields for the idreg.
|
||||
* Only bits with 1 are writable from userspace. This mask might not be
|
||||
* necessary in the future whenever all ID registers are enabled as writable
|
||||
* from userspace.
|
||||
*/
|
||||
|
||||
/* sys_reg_desc initialiser for known cpufeature ID registers */
|
||||
#define ID_SANITISED(name) { \
|
||||
SYS_DESC(SYS_##name), \
|
||||
@ -1683,6 +1823,8 @@ static unsigned int elx2_visibility(const struct kvm_vcpu *vcpu,
|
||||
.get_user = get_id_reg, \
|
||||
.set_user = set_id_reg, \
|
||||
.visibility = id_visibility, \
|
||||
.reset = kvm_read_sanitised_id_reg, \
|
||||
.val = 0, \
|
||||
}
|
||||
|
||||
/* sys_reg_desc initialiser for known cpufeature ID registers */
|
||||
@ -1692,6 +1834,8 @@ static unsigned int elx2_visibility(const struct kvm_vcpu *vcpu,
|
||||
.get_user = get_id_reg, \
|
||||
.set_user = set_id_reg, \
|
||||
.visibility = aa32_id_visibility, \
|
||||
.reset = kvm_read_sanitised_id_reg, \
|
||||
.val = 0, \
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1704,7 +1848,9 @@ static unsigned int elx2_visibility(const struct kvm_vcpu *vcpu,
|
||||
.access = access_id_reg, \
|
||||
.get_user = get_id_reg, \
|
||||
.set_user = set_id_reg, \
|
||||
.visibility = raz_visibility \
|
||||
.visibility = raz_visibility, \
|
||||
.reset = kvm_read_sanitised_id_reg, \
|
||||
.val = 0, \
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1718,6 +1864,8 @@ static unsigned int elx2_visibility(const struct kvm_vcpu *vcpu,
|
||||
.get_user = get_id_reg, \
|
||||
.set_user = set_id_reg, \
|
||||
.visibility = raz_visibility, \
|
||||
.reset = kvm_read_sanitised_id_reg, \
|
||||
.val = 0, \
|
||||
}
|
||||
|
||||
static bool access_sp_el1(struct kvm_vcpu *vcpu,
|
||||
@ -1825,9 +1973,13 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
||||
/* CRm=1 */
|
||||
AA32_ID_SANITISED(ID_PFR0_EL1),
|
||||
AA32_ID_SANITISED(ID_PFR1_EL1),
|
||||
{ SYS_DESC(SYS_ID_DFR0_EL1), .access = access_id_reg,
|
||||
.get_user = get_id_reg, .set_user = set_id_dfr0_el1,
|
||||
.visibility = aa32_id_visibility, },
|
||||
{ SYS_DESC(SYS_ID_DFR0_EL1),
|
||||
.access = access_id_reg,
|
||||
.get_user = get_id_reg,
|
||||
.set_user = set_id_dfr0_el1,
|
||||
.visibility = aa32_id_visibility,
|
||||
.reset = read_sanitised_id_dfr0_el1,
|
||||
.val = ID_DFR0_EL1_PerfMon_MASK, },
|
||||
ID_HIDDEN(ID_AFR0_EL1),
|
||||
AA32_ID_SANITISED(ID_MMFR0_EL1),
|
||||
AA32_ID_SANITISED(ID_MMFR1_EL1),
|
||||
@ -1856,8 +2008,12 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
||||
|
||||
/* AArch64 ID registers */
|
||||
/* CRm=4 */
|
||||
{ SYS_DESC(SYS_ID_AA64PFR0_EL1), .access = access_id_reg,
|
||||
.get_user = get_id_reg, .set_user = set_id_aa64pfr0_el1, },
|
||||
{ SYS_DESC(SYS_ID_AA64PFR0_EL1),
|
||||
.access = access_id_reg,
|
||||
.get_user = get_id_reg,
|
||||
.set_user = set_id_reg,
|
||||
.reset = read_sanitised_id_aa64pfr0_el1,
|
||||
.val = ID_AA64PFR0_EL1_CSV2_MASK | ID_AA64PFR0_EL1_CSV3_MASK, },
|
||||
ID_SANITISED(ID_AA64PFR1_EL1),
|
||||
ID_UNALLOCATED(4,2),
|
||||
ID_UNALLOCATED(4,3),
|
||||
@ -1867,8 +2023,12 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
||||
ID_UNALLOCATED(4,7),
|
||||
|
||||
/* CRm=5 */
|
||||
{ SYS_DESC(SYS_ID_AA64DFR0_EL1), .access = access_id_reg,
|
||||
.get_user = get_id_reg, .set_user = set_id_aa64dfr0_el1, },
|
||||
{ SYS_DESC(SYS_ID_AA64DFR0_EL1),
|
||||
.access = access_id_reg,
|
||||
.get_user = get_id_reg,
|
||||
.set_user = set_id_aa64dfr0_el1,
|
||||
.reset = read_sanitised_id_aa64dfr0_el1,
|
||||
.val = ID_AA64DFR0_EL1_PMUVer_MASK, },
|
||||
ID_SANITISED(ID_AA64DFR1_EL1),
|
||||
ID_UNALLOCATED(5,2),
|
||||
ID_UNALLOCATED(5,3),
|
||||
@ -2199,7 +2359,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
||||
EL2_REG(ACTLR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(HCR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(MDCR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_EL2_DEFAULT ),
|
||||
EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1),
|
||||
EL2_REG(HSTR_EL2, access_rw, reset_val, 0),
|
||||
EL2_REG(HACR_EL2, access_rw, reset_val, 0),
|
||||
|
||||
@ -2256,6 +2416,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
||||
EL2_REG(SP_EL2, NULL, reset_unknown, 0),
|
||||
};
|
||||
|
||||
static const struct sys_reg_desc *first_idreg;
|
||||
|
||||
static bool trap_dbgdidr(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
@ -2946,6 +3108,28 @@ static bool emulate_sys_reg(struct kvm_vcpu *vcpu,
|
||||
return false;
|
||||
}
|
||||
|
||||
static void kvm_reset_id_regs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
const struct sys_reg_desc *idreg = first_idreg;
|
||||
u32 id = reg_to_encoding(idreg);
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
|
||||
if (test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags))
|
||||
return;
|
||||
|
||||
lockdep_assert_held(&kvm->arch.config_lock);
|
||||
|
||||
/* Initialize all idregs */
|
||||
while (is_id_reg(id)) {
|
||||
IDREG(kvm, id) = idreg->reset(vcpu, idreg);
|
||||
|
||||
idreg++;
|
||||
id = reg_to_encoding(idreg);
|
||||
}
|
||||
|
||||
set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_reset_sys_regs - sets system registers to reset value
|
||||
* @vcpu: The VCPU pointer
|
||||
@ -2957,9 +3141,17 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(sys_reg_descs); i++)
|
||||
if (sys_reg_descs[i].reset)
|
||||
sys_reg_descs[i].reset(vcpu, &sys_reg_descs[i]);
|
||||
kvm_reset_id_regs(vcpu);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(sys_reg_descs); i++) {
|
||||
const struct sys_reg_desc *r = &sys_reg_descs[i];
|
||||
|
||||
if (is_id_reg(reg_to_encoding(r)))
|
||||
continue;
|
||||
|
||||
if (r->reset)
|
||||
r->reset(vcpu, r);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -3060,19 +3252,21 @@ id_to_sys_reg_desc(struct kvm_vcpu *vcpu, u64 id,
|
||||
*/
|
||||
|
||||
#define FUNCTION_INVARIANT(reg) \
|
||||
static void get_##reg(struct kvm_vcpu *v, \
|
||||
static u64 get_##reg(struct kvm_vcpu *v, \
|
||||
const struct sys_reg_desc *r) \
|
||||
{ \
|
||||
((struct sys_reg_desc *)r)->val = read_sysreg(reg); \
|
||||
return ((struct sys_reg_desc *)r)->val; \
|
||||
}
|
||||
|
||||
FUNCTION_INVARIANT(midr_el1)
|
||||
FUNCTION_INVARIANT(revidr_el1)
|
||||
FUNCTION_INVARIANT(aidr_el1)
|
||||
|
||||
static void get_ctr_el0(struct kvm_vcpu *v, const struct sys_reg_desc *r)
|
||||
static u64 get_ctr_el0(struct kvm_vcpu *v, const struct sys_reg_desc *r)
|
||||
{
|
||||
((struct sys_reg_desc *)r)->val = read_sanitised_ftr_reg(SYS_CTR_EL0);
|
||||
return ((struct sys_reg_desc *)r)->val;
|
||||
}
|
||||
|
||||
/* ->val is filled in by kvm_sys_reg_table_init() */
|
||||
@ -3364,6 +3558,7 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
|
||||
|
||||
int __init kvm_sys_reg_table_init(void)
|
||||
{
|
||||
struct sys_reg_params params;
|
||||
bool valid = true;
|
||||
unsigned int i;
|
||||
|
||||
@ -3382,5 +3577,11 @@ int __init kvm_sys_reg_table_init(void)
|
||||
for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++)
|
||||
invariant_sys_regs[i].reset(NULL, &invariant_sys_regs[i]);
|
||||
|
||||
/* Find the first idreg (SYS_ID_PFR0_EL1) in sys_reg_descs. */
|
||||
params = encoding_to_params(SYS_ID_PFR0_EL1);
|
||||
first_idreg = find_reg(¶ms, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
|
||||
if (!first_idreg)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -27,6 +27,13 @@ struct sys_reg_params {
|
||||
bool is_write;
|
||||
};
|
||||
|
||||
#define encoding_to_params(reg) \
|
||||
((struct sys_reg_params){ .Op0 = sys_reg_Op0(reg), \
|
||||
.Op1 = sys_reg_Op1(reg), \
|
||||
.CRn = sys_reg_CRn(reg), \
|
||||
.CRm = sys_reg_CRm(reg), \
|
||||
.Op2 = sys_reg_Op2(reg) })
|
||||
|
||||
#define esr_sys64_to_params(esr) \
|
||||
((struct sys_reg_params){ .Op0 = ((esr) >> 20) & 3, \
|
||||
.Op1 = ((esr) >> 14) & 0x7, \
|
||||
@ -64,13 +71,16 @@ struct sys_reg_desc {
|
||||
struct sys_reg_params *,
|
||||
const struct sys_reg_desc *);
|
||||
|
||||
/* Initialization for vcpu. */
|
||||
void (*reset)(struct kvm_vcpu *, const struct sys_reg_desc *);
|
||||
/*
|
||||
* Initialization for vcpu. Return initialized value, or KVM
|
||||
* sanitized value for ID registers.
|
||||
*/
|
||||
u64 (*reset)(struct kvm_vcpu *, const struct sys_reg_desc *);
|
||||
|
||||
/* Index into sys_reg[], or 0 if we don't need to save it. */
|
||||
int reg;
|
||||
|
||||
/* Value (usually reset value) */
|
||||
/* Value (usually reset value), or write mask for idregs */
|
||||
u64 val;
|
||||
|
||||
/* Custom get/set_user functions, fallback to generic if NULL */
|
||||
@ -123,19 +133,21 @@ static inline bool read_zero(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
|
||||
/* Reset functions */
|
||||
static inline void reset_unknown(struct kvm_vcpu *vcpu,
|
||||
static inline u64 reset_unknown(struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
BUG_ON(!r->reg);
|
||||
BUG_ON(r->reg >= NR_SYS_REGS);
|
||||
__vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL;
|
||||
return __vcpu_sys_reg(vcpu, r->reg);
|
||||
}
|
||||
|
||||
static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
static inline u64 reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
{
|
||||
BUG_ON(!r->reg);
|
||||
BUG_ON(r->reg >= NR_SYS_REGS);
|
||||
__vcpu_sys_reg(vcpu, r->reg) = r->val;
|
||||
return __vcpu_sys_reg(vcpu, r->reg);
|
||||
}
|
||||
|
||||
static inline unsigned int sysreg_visibility(const struct kvm_vcpu *vcpu,
|
||||
|
@ -214,7 +214,7 @@ static void __init clear_pgds(unsigned long start,
|
||||
static void __init kasan_init_shadow(void)
|
||||
{
|
||||
u64 kimg_shadow_start, kimg_shadow_end;
|
||||
u64 mod_shadow_start, mod_shadow_end;
|
||||
u64 mod_shadow_start;
|
||||
u64 vmalloc_shadow_end;
|
||||
phys_addr_t pa_start, pa_end;
|
||||
u64 i;
|
||||
@ -223,7 +223,6 @@ static void __init kasan_init_shadow(void)
|
||||
kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(KERNEL_END));
|
||||
|
||||
mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR);
|
||||
mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END);
|
||||
|
||||
vmalloc_shadow_end = (u64)kasan_mem_to_shadow((void *)VMALLOC_END);
|
||||
|
||||
@ -246,17 +245,9 @@ static void __init kasan_init_shadow(void)
|
||||
kasan_populate_early_shadow(kasan_mem_to_shadow((void *)PAGE_END),
|
||||
(void *)mod_shadow_start);
|
||||
|
||||
if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
|
||||
BUILD_BUG_ON(VMALLOC_START != MODULES_END);
|
||||
kasan_populate_early_shadow((void *)vmalloc_shadow_end,
|
||||
(void *)KASAN_SHADOW_END);
|
||||
} else {
|
||||
kasan_populate_early_shadow((void *)kimg_shadow_end,
|
||||
(void *)KASAN_SHADOW_END);
|
||||
if (kimg_shadow_start > mod_shadow_end)
|
||||
kasan_populate_early_shadow((void *)mod_shadow_end,
|
||||
(void *)kimg_shadow_start);
|
||||
}
|
||||
BUILD_BUG_ON(VMALLOC_START != MODULES_END);
|
||||
kasan_populate_early_shadow((void *)vmalloc_shadow_end,
|
||||
(void *)KASAN_SHADOW_END);
|
||||
|
||||
for_each_mem_range(i, &pa_start, &pa_end) {
|
||||
void *start = (void *)__phys_to_virt(pa_start);
|
||||
|
@ -25,6 +25,7 @@ HAS_E0PD
|
||||
HAS_ECV
|
||||
HAS_ECV_CNTPOFF
|
||||
HAS_EPAN
|
||||
HAS_EVT
|
||||
HAS_GENERIC_AUTH
|
||||
HAS_GENERIC_AUTH_ARCH_QARMA3
|
||||
HAS_GENERIC_AUTH_ARCH_QARMA5
|
||||
@ -47,6 +48,7 @@ HAS_TLB_RANGE
|
||||
HAS_VIRT_HOST_EXTN
|
||||
HAS_WFXT
|
||||
HW_DBM
|
||||
KVM_HVHE
|
||||
KVM_PROTECTED_MODE
|
||||
MISMATCHED_CACHE_TYPE
|
||||
MTE
|
||||
@ -77,6 +79,7 @@ WORKAROUND_2077057
|
||||
WORKAROUND_2457168
|
||||
WORKAROUND_2645198
|
||||
WORKAROUND_2658417
|
||||
WORKAROUND_AMPERE_AC03_CPU_38
|
||||
WORKAROUND_TRBE_OVERWRITE_FILL_MODE
|
||||
WORKAROUND_TSB_FLUSH_FAILURE
|
||||
WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
|
||||
|
@ -92,8 +92,12 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu);
|
||||
/*
|
||||
* Evaluates as true when emulating PMUv3p5, and false otherwise.
|
||||
*/
|
||||
#define kvm_pmu_is_3p5(vcpu) \
|
||||
(vcpu->kvm->arch.dfr0_pmuver.imp >= ID_AA64DFR0_EL1_PMUVer_V3P5)
|
||||
#define kvm_pmu_is_3p5(vcpu) ({ \
|
||||
u64 val = IDREG(vcpu->kvm, SYS_ID_AA64DFR0_EL1); \
|
||||
u8 pmuver = SYS_FIELD_GET(ID_AA64DFR0_EL1, PMUVer, val); \
|
||||
\
|
||||
pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P5; \
|
||||
})
|
||||
|
||||
u8 kvm_arm_pmu_get_pmuver_limit(void);
|
||||
|
||||
|
@ -94,6 +94,14 @@
|
||||
*/
|
||||
#define FFA_PAGE_SIZE SZ_4K
|
||||
|
||||
/*
|
||||
* Minimum buffer size/alignment encodings returned by an FFA_FEATURES
|
||||
* query for FFA_RXTX_MAP.
|
||||
*/
|
||||
#define FFA_FEAT_RXTX_MIN_SZ_4K 0
|
||||
#define FFA_FEAT_RXTX_MIN_SZ_64K 1
|
||||
#define FFA_FEAT_RXTX_MIN_SZ_16K 2
|
||||
|
||||
/* FFA Bus/Device/Driver related */
|
||||
struct ffa_device {
|
||||
u32 id;
|
||||
|
@ -991,6 +991,8 @@ static inline bool kvm_memslots_empty(struct kvm_memslots *slots)
|
||||
return RB_EMPTY_ROOT(&slots->gfn_tree);
|
||||
}
|
||||
|
||||
bool kvm_are_all_memslots_empty(struct kvm *kvm);
|
||||
|
||||
#define kvm_for_each_memslot(memslot, bkt, slots) \
|
||||
hash_for_each(slots->id_hash, bkt, memslot, id_node[slots->node_idx]) \
|
||||
if (WARN_ON_ONCE(!memslot->npages)) { \
|
||||
|
@ -1190,6 +1190,8 @@ struct kvm_ppc_resize_hpt {
|
||||
#define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225
|
||||
#define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226
|
||||
#define KVM_CAP_COUNTER_OFFSET 227
|
||||
#define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228
|
||||
#define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
|
@ -4620,7 +4620,7 @@ int __attribute__((weak)) kvm_vm_ioctl_enable_cap(struct kvm *kvm,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static bool kvm_are_all_memslots_empty(struct kvm *kvm)
|
||||
bool kvm_are_all_memslots_empty(struct kvm *kvm)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -4633,6 +4633,7 @@ static bool kvm_are_all_memslots_empty(struct kvm *kvm)
|
||||
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_are_all_memslots_empty);
|
||||
|
||||
static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
|
||||
struct kvm_enable_cap *cap)
|
||||
|
Loading…
Reference in New Issue
Block a user