Merge branch kvm-arm64/mmu/MMIO-block-mapping into kvmarm-master/next
MMIO block mapping support from Keqian Zhu, allowing larger (and lazy) mappings for devices assigned to guests. * kvm-arm64/mmu/MMIO-block-mapping: KVM: arm64: Try stage2 block mapping for host device MMIO KVM: arm64: Remove the creation time's mapping of MMIO regions
This commit is contained in:
commit
32ab5a5e97
@ -822,6 +822,35 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
|
||||
return PAGE_SIZE;
|
||||
}
|
||||
|
||||
static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva)
|
||||
{
|
||||
unsigned long pa;
|
||||
|
||||
if (is_vm_hugetlb_page(vma) && !(vma->vm_flags & VM_PFNMAP))
|
||||
return huge_page_shift(hstate_vma(vma));
|
||||
|
||||
if (!(vma->vm_flags & VM_PFNMAP))
|
||||
return PAGE_SHIFT;
|
||||
|
||||
VM_BUG_ON(is_vm_hugetlb_page(vma));
|
||||
|
||||
pa = (vma->vm_pgoff << PAGE_SHIFT) + (hva - vma->vm_start);
|
||||
|
||||
#ifndef __PAGETABLE_PMD_FOLDED
|
||||
if ((hva & (PUD_SIZE - 1)) == (pa & (PUD_SIZE - 1)) &&
|
||||
ALIGN_DOWN(hva, PUD_SIZE) >= vma->vm_start &&
|
||||
ALIGN(hva, PUD_SIZE) <= vma->vm_end)
|
||||
return PUD_SHIFT;
|
||||
#endif
|
||||
|
||||
if ((hva & (PMD_SIZE - 1)) == (pa & (PMD_SIZE - 1)) &&
|
||||
ALIGN_DOWN(hva, PMD_SIZE) >= vma->vm_start &&
|
||||
ALIGN(hva, PMD_SIZE) <= vma->vm_end)
|
||||
return PMD_SHIFT;
|
||||
|
||||
return PAGE_SHIFT;
|
||||
}
|
||||
|
||||
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
struct kvm_memory_slot *memslot, unsigned long hva,
|
||||
unsigned long fault_status)
|
||||
@ -853,7 +882,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
/* Let's check if we will get back a huge page backed by hugetlbfs */
|
||||
/*
|
||||
* Let's check if we will get back a huge page backed by hugetlbfs, or
|
||||
* get block mapping for device MMIO region.
|
||||
*/
|
||||
mmap_read_lock(current->mm);
|
||||
vma = find_vma_intersection(current->mm, hva, hva + 1);
|
||||
if (unlikely(!vma)) {
|
||||
@ -862,15 +894,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (is_vm_hugetlb_page(vma))
|
||||
vma_shift = huge_page_shift(hstate_vma(vma));
|
||||
else
|
||||
vma_shift = PAGE_SHIFT;
|
||||
|
||||
if (logging_active ||
|
||||
(vma->vm_flags & VM_PFNMAP)) {
|
||||
/*
|
||||
* logging_active is guaranteed to never be true for VM_PFNMAP
|
||||
* memslots.
|
||||
*/
|
||||
if (logging_active) {
|
||||
force_pte = true;
|
||||
vma_shift = PAGE_SHIFT;
|
||||
} else {
|
||||
vma_shift = get_vma_page_shift(vma, hva);
|
||||
}
|
||||
|
||||
switch (vma_shift) {
|
||||
@ -943,8 +975,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
return -EFAULT;
|
||||
|
||||
if (kvm_is_device_pfn(pfn)) {
|
||||
/*
|
||||
* If the page was identified as device early by looking at
|
||||
* the VMA flags, vma_pagesize is already representing the
|
||||
* largest quantity we can map. If instead it was mapped
|
||||
* via gfn_to_pfn_prot(), vma_pagesize is set to PAGE_SIZE
|
||||
* and must not be upgraded.
|
||||
*
|
||||
* In both cases, we don't let transparent_hugepage_adjust()
|
||||
* change things at the last minute.
|
||||
*/
|
||||
device = true;
|
||||
force_pte = true;
|
||||
} else if (logging_active && !write_fault) {
|
||||
/*
|
||||
* Only actually map the page as writable if this was a write
|
||||
@ -965,7 +1006,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
* If we are not forced to use page mapping, check if we are
|
||||
* backed by a THP and thus use block mapping if possible.
|
||||
*/
|
||||
if (vma_pagesize == PAGE_SIZE && !force_pte)
|
||||
if (vma_pagesize == PAGE_SIZE && !(force_pte || device))
|
||||
vma_pagesize = transparent_hugepage_adjust(memslot, hva,
|
||||
&pfn, &fault_ipa);
|
||||
if (writable)
|
||||
@ -1346,7 +1387,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
{
|
||||
hva_t hva = mem->userspace_addr;
|
||||
hva_t reg_end = hva + mem->memory_size;
|
||||
bool writable = !(mem->flags & KVM_MEM_READONLY);
|
||||
int ret = 0;
|
||||
|
||||
if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
|
||||
@ -1363,8 +1403,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
mmap_read_lock(current->mm);
|
||||
/*
|
||||
* A memory region could potentially cover multiple VMAs, and any holes
|
||||
* between them, so iterate over all of them to find out if we can map
|
||||
* any of them right now.
|
||||
* between them, so iterate over all of them.
|
||||
*
|
||||
* +--------------------------------------------+
|
||||
* +---------------+----------------+ +----------------+
|
||||
@ -1375,51 +1414,21 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
*/
|
||||
do {
|
||||
struct vm_area_struct *vma;
|
||||
hva_t vm_start, vm_end;
|
||||
|
||||
vma = find_vma_intersection(current->mm, hva, reg_end);
|
||||
if (!vma)
|
||||
break;
|
||||
|
||||
/*
|
||||
* Take the intersection of this VMA with the memory region
|
||||
*/
|
||||
vm_start = max(hva, vma->vm_start);
|
||||
vm_end = min(reg_end, vma->vm_end);
|
||||
|
||||
if (vma->vm_flags & VM_PFNMAP) {
|
||||
gpa_t gpa = mem->guest_phys_addr +
|
||||
(vm_start - mem->userspace_addr);
|
||||
phys_addr_t pa;
|
||||
|
||||
pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
|
||||
pa += vm_start - vma->vm_start;
|
||||
|
||||
/* IO region dirty page logging not allowed */
|
||||
if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
|
||||
vm_end - vm_start,
|
||||
writable);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
}
|
||||
hva = vm_end;
|
||||
hva = min(reg_end, vma->vm_end);
|
||||
} while (hva < reg_end);
|
||||
|
||||
if (change == KVM_MR_FLAGS_ONLY)
|
||||
goto out;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
if (ret)
|
||||
unmap_stage2_range(&kvm->arch.mmu, mem->guest_phys_addr, mem->memory_size);
|
||||
else if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
|
||||
stage2_flush_memslot(kvm, memslot);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
out:
|
||||
mmap_read_unlock(current->mm);
|
||||
return ret;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user