KVM PPC update for 4.17
- Improvements for the radix page fault handler for HV KVM on POWER9. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQEcBAABAgAGBQJavHlMAAoJEJ2a6ncsY3GfWTAIANbY6b8xvhUY6c3WM1dt6o78 6MWJW1vCcsDYuM5yyIjYTds2xjY6vm7oWbo9thDdLIE9sWP2uKBDbdem8KxZb6JL t/tdzuLOYkB60BfwQL0z77UmLlHSQYF5RfJjbYVe5oXt7OU5TCe43udkHsT9QtLH HTpxvl7ebf2TIoex1+XqrD0eJ93tSOVFWB7Ay7WRUQu08CMEQRcHaszyMqdNfHfs LHoPwLgxyWf+7/zt2T4++ebfysQDFpQgsEuEBugXkaHkw6pSGi6R+BOrZwVVmcGm jiHq5+mdho3wL+B47Rt7UTjkpsMLyFbWR6TrhMD7y84/CislizKhEdnEYymKwH4= =0igD -----END PGP SIGNATURE----- Merge tag 'kvm-ppc-next-4.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc KVM PPC update for 4.17 - Improvements for the radix page fault handler for HV KVM on POWER9.
This commit is contained in:
commit
27aa896281
@ -60,7 +60,6 @@
|
||||
|
||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||
|
||||
extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
|
||||
extern int kvm_unmap_hva_range(struct kvm *kvm,
|
||||
unsigned long start, unsigned long end);
|
||||
extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
|
||||
|
@ -295,7 +295,6 @@ struct kvmppc_ops {
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old,
|
||||
const struct kvm_memory_slot *new);
|
||||
int (*unmap_hva)(struct kvm *kvm, unsigned long hva);
|
||||
int (*unmap_hva_range)(struct kvm *kvm, unsigned long start,
|
||||
unsigned long end);
|
||||
int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end);
|
||||
|
@ -819,12 +819,6 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
|
||||
kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new);
|
||||
}
|
||||
|
||||
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
|
||||
{
|
||||
return kvm->arch.kvm_ops->unmap_hva(kvm, hva);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_unmap_hva);
|
||||
|
||||
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
|
||||
{
|
||||
return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end);
|
||||
|
@ -14,7 +14,6 @@
|
||||
|
||||
extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot);
|
||||
extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva);
|
||||
extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start,
|
||||
unsigned long end);
|
||||
extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long start,
|
||||
|
@ -877,15 +877,6 @@ static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva)
|
||||
{
|
||||
hva_handler_fn handler;
|
||||
|
||||
handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp;
|
||||
kvm_handle_hva(kvm, hva, handler);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end)
|
||||
{
|
||||
hva_handler_fn handler;
|
||||
|
@ -150,7 +150,9 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
|
||||
{
|
||||
int psize = MMU_BASE_PSIZE;
|
||||
|
||||
if (pshift >= PMD_SHIFT)
|
||||
if (pshift >= PUD_SHIFT)
|
||||
psize = MMU_PAGE_1G;
|
||||
else if (pshift >= PMD_SHIFT)
|
||||
psize = MMU_PAGE_2M;
|
||||
addr &= ~0xfffUL;
|
||||
addr |= mmu_psize_defs[psize].ap << 5;
|
||||
@ -160,6 +162,17 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
|
||||
asm volatile("ptesync": : :"memory");
|
||||
}
|
||||
|
||||
static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr)
|
||||
{
|
||||
unsigned long rb = 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */
|
||||
|
||||
asm volatile("ptesync": : :"memory");
|
||||
/* RIC=1 PRS=0 R=1 IS=2 */
|
||||
asm volatile(PPC_TLBIE_5(%0, %1, 1, 0, 1)
|
||||
: : "r" (rb), "r" (kvm->arch.lpid) : "memory");
|
||||
asm volatile("ptesync": : :"memory");
|
||||
}
|
||||
|
||||
unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
|
||||
unsigned long clr, unsigned long set,
|
||||
unsigned long addr, unsigned int shift)
|
||||
@ -195,6 +208,12 @@ static void kvmppc_pte_free(pte_t *ptep)
|
||||
kmem_cache_free(kvm_pte_cache, ptep);
|
||||
}
|
||||
|
||||
/* Like pmd_huge() and pmd_large(), but works regardless of config options */
|
||||
static inline int pmd_is_leaf(pmd_t pmd)
|
||||
{
|
||||
return !!(pmd_val(pmd) & _PAGE_PTE);
|
||||
}
|
||||
|
||||
static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
|
||||
unsigned int level, unsigned long mmu_seq)
|
||||
{
|
||||
@ -214,12 +233,12 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
|
||||
new_pud = pud_alloc_one(kvm->mm, gpa);
|
||||
|
||||
pmd = NULL;
|
||||
if (pud && pud_present(*pud))
|
||||
if (pud && pud_present(*pud) && !pud_huge(*pud))
|
||||
pmd = pmd_offset(pud, gpa);
|
||||
else
|
||||
else if (level <= 1)
|
||||
new_pmd = pmd_alloc_one(kvm->mm, gpa);
|
||||
|
||||
if (level == 0 && !(pmd && pmd_present(*pmd)))
|
||||
if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd)))
|
||||
new_ptep = kvmppc_pte_alloc();
|
||||
|
||||
/* Check if we might have been invalidated; let the guest retry if so */
|
||||
@ -237,6 +256,50 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
|
||||
new_pud = NULL;
|
||||
}
|
||||
pud = pud_offset(pgd, gpa);
|
||||
if (pud_huge(*pud)) {
|
||||
unsigned long hgpa = gpa & PUD_MASK;
|
||||
|
||||
/*
|
||||
* If we raced with another CPU which has just put
|
||||
* a 1GB pte in after we saw a pmd page, try again.
|
||||
*/
|
||||
if (level <= 1 && !new_pmd) {
|
||||
ret = -EAGAIN;
|
||||
goto out_unlock;
|
||||
}
|
||||
/* Check if we raced and someone else has set the same thing */
|
||||
if (level == 2 && pud_raw(*pud) == pte_raw(pte)) {
|
||||
ret = 0;
|
||||
goto out_unlock;
|
||||
}
|
||||
/* Valid 1GB page here already, remove it */
|
||||
old = kvmppc_radix_update_pte(kvm, (pte_t *)pud,
|
||||
~0UL, 0, hgpa, PUD_SHIFT);
|
||||
kvmppc_radix_tlbie_page(kvm, hgpa, PUD_SHIFT);
|
||||
if (old & _PAGE_DIRTY) {
|
||||
unsigned long gfn = hgpa >> PAGE_SHIFT;
|
||||
struct kvm_memory_slot *memslot;
|
||||
memslot = gfn_to_memslot(kvm, gfn);
|
||||
if (memslot && memslot->dirty_bitmap)
|
||||
kvmppc_update_dirty_map(memslot,
|
||||
gfn, PUD_SIZE);
|
||||
}
|
||||
}
|
||||
if (level == 2) {
|
||||
if (!pud_none(*pud)) {
|
||||
/*
|
||||
* There's a page table page here, but we wanted to
|
||||
* install a large page, so remove and free the page
|
||||
* table page. new_pmd will be NULL since level == 2.
|
||||
*/
|
||||
new_pmd = pmd_offset(pud, 0);
|
||||
pud_clear(pud);
|
||||
kvmppc_radix_flush_pwc(kvm, gpa);
|
||||
}
|
||||
kvmppc_radix_set_pte_at(kvm, gpa, (pte_t *)pud, pte);
|
||||
ret = 0;
|
||||
goto out_unlock;
|
||||
}
|
||||
if (pud_none(*pud)) {
|
||||
if (!new_pmd)
|
||||
goto out_unlock;
|
||||
@ -244,40 +307,71 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
|
||||
new_pmd = NULL;
|
||||
}
|
||||
pmd = pmd_offset(pud, gpa);
|
||||
if (pmd_large(*pmd)) {
|
||||
/* Someone else has instantiated a large page here; retry */
|
||||
ret = -EAGAIN;
|
||||
goto out_unlock;
|
||||
}
|
||||
if (level == 1 && !pmd_none(*pmd)) {
|
||||
if (pmd_is_leaf(*pmd)) {
|
||||
unsigned long lgpa = gpa & PMD_MASK;
|
||||
|
||||
/*
|
||||
* There's a page table page here, but we wanted
|
||||
* to install a large page. Tell the caller and let
|
||||
* it try installing a normal page if it wants.
|
||||
* If we raced with another CPU which has just put
|
||||
* a 2MB pte in after we saw a pte page, try again.
|
||||
*/
|
||||
ret = -EBUSY;
|
||||
if (level == 0 && !new_ptep) {
|
||||
ret = -EAGAIN;
|
||||
goto out_unlock;
|
||||
}
|
||||
/* Check if we raced and someone else has set the same thing */
|
||||
if (level == 1 && pmd_raw(*pmd) == pte_raw(pte)) {
|
||||
ret = 0;
|
||||
goto out_unlock;
|
||||
}
|
||||
/* Valid 2MB page here already, remove it */
|
||||
old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
|
||||
~0UL, 0, lgpa, PMD_SHIFT);
|
||||
kvmppc_radix_tlbie_page(kvm, lgpa, PMD_SHIFT);
|
||||
if (old & _PAGE_DIRTY) {
|
||||
unsigned long gfn = lgpa >> PAGE_SHIFT;
|
||||
struct kvm_memory_slot *memslot;
|
||||
memslot = gfn_to_memslot(kvm, gfn);
|
||||
if (memslot && memslot->dirty_bitmap)
|
||||
kvmppc_update_dirty_map(memslot,
|
||||
gfn, PMD_SIZE);
|
||||
}
|
||||
}
|
||||
if (level == 1) {
|
||||
if (!pmd_none(*pmd)) {
|
||||
/*
|
||||
* There's a page table page here, but we wanted to
|
||||
* install a large page, so remove and free the page
|
||||
* table page. new_ptep will be NULL since level == 1.
|
||||
*/
|
||||
new_ptep = pte_offset_kernel(pmd, 0);
|
||||
pmd_clear(pmd);
|
||||
kvmppc_radix_flush_pwc(kvm, gpa);
|
||||
}
|
||||
kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte);
|
||||
ret = 0;
|
||||
goto out_unlock;
|
||||
}
|
||||
if (level == 0) {
|
||||
if (pmd_none(*pmd)) {
|
||||
if (!new_ptep)
|
||||
goto out_unlock;
|
||||
pmd_populate(kvm->mm, pmd, new_ptep);
|
||||
new_ptep = NULL;
|
||||
}
|
||||
ptep = pte_offset_kernel(pmd, gpa);
|
||||
if (pte_present(*ptep)) {
|
||||
/* PTE was previously valid, so invalidate it */
|
||||
old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT,
|
||||
0, gpa, 0);
|
||||
kvmppc_radix_tlbie_page(kvm, gpa, 0);
|
||||
if (old & _PAGE_DIRTY)
|
||||
mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
|
||||
}
|
||||
kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
|
||||
} else {
|
||||
kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte);
|
||||
if (pmd_none(*pmd)) {
|
||||
if (!new_ptep)
|
||||
goto out_unlock;
|
||||
pmd_populate(kvm->mm, pmd, new_ptep);
|
||||
new_ptep = NULL;
|
||||
}
|
||||
ptep = pte_offset_kernel(pmd, gpa);
|
||||
if (pte_present(*ptep)) {
|
||||
/* Check if someone else set the same thing */
|
||||
if (pte_raw(*ptep) == pte_raw(pte)) {
|
||||
ret = 0;
|
||||
goto out_unlock;
|
||||
}
|
||||
/* PTE was previously valid, so invalidate it */
|
||||
old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT,
|
||||
0, gpa, 0);
|
||||
kvmppc_radix_tlbie_page(kvm, gpa, 0);
|
||||
if (old & _PAGE_DIRTY)
|
||||
mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
|
||||
}
|
||||
kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
|
||||
ret = 0;
|
||||
|
||||
out_unlock:
|
||||
@ -298,11 +392,11 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
unsigned long mmu_seq, pte_size;
|
||||
unsigned long gpa, gfn, hva, pfn;
|
||||
struct kvm_memory_slot *memslot;
|
||||
struct page *page = NULL, *pages[1];
|
||||
long ret, npages, ok;
|
||||
unsigned int writing;
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long flags;
|
||||
struct page *page = NULL;
|
||||
long ret;
|
||||
bool writing;
|
||||
bool upgrade_write = false;
|
||||
bool *upgrade_p = &upgrade_write;
|
||||
pte_t pte, *ptep;
|
||||
unsigned long pgflags;
|
||||
unsigned int shift, level;
|
||||
@ -342,135 +436,137 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
dsisr & DSISR_ISSTORE);
|
||||
}
|
||||
|
||||
writing = (dsisr & DSISR_ISSTORE) != 0;
|
||||
if (memslot->flags & KVM_MEM_READONLY) {
|
||||
if (writing) {
|
||||
/* give the guest a DSI */
|
||||
dsisr = DSISR_ISSTORE | DSISR_PROTFAULT;
|
||||
kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
|
||||
return RESUME_GUEST;
|
||||
}
|
||||
upgrade_p = NULL;
|
||||
}
|
||||
|
||||
if (dsisr & DSISR_SET_RC) {
|
||||
/*
|
||||
* Need to set an R or C bit in the 2nd-level tables;
|
||||
* since we are just helping out the hardware here,
|
||||
* it is sufficient to do what the hardware does.
|
||||
*/
|
||||
pgflags = _PAGE_ACCESSED;
|
||||
if (writing)
|
||||
pgflags |= _PAGE_DIRTY;
|
||||
/*
|
||||
* We are walking the secondary page table here. We can do this
|
||||
* without disabling irq.
|
||||
*/
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
ptep = __find_linux_pte(kvm->arch.pgtable,
|
||||
gpa, NULL, &shift);
|
||||
if (ptep && pte_present(*ptep) &&
|
||||
(!writing || pte_write(*ptep))) {
|
||||
kvmppc_radix_update_pte(kvm, ptep, 0, pgflags,
|
||||
gpa, shift);
|
||||
dsisr &= ~DSISR_SET_RC;
|
||||
}
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
|
||||
DSISR_PROTFAULT | DSISR_SET_RC)))
|
||||
return RESUME_GUEST;
|
||||
}
|
||||
|
||||
/* used to check for invalidations in progress */
|
||||
mmu_seq = kvm->mmu_notifier_seq;
|
||||
smp_rmb();
|
||||
|
||||
writing = (dsisr & DSISR_ISSTORE) != 0;
|
||||
/*
|
||||
* Do a fast check first, since __gfn_to_pfn_memslot doesn't
|
||||
* do it with !atomic && !async, which is how we call it.
|
||||
* We always ask for write permission since the common case
|
||||
* is that the page is writable.
|
||||
*/
|
||||
hva = gfn_to_hva_memslot(memslot, gfn);
|
||||
if (dsisr & DSISR_SET_RC) {
|
||||
/*
|
||||
* Need to set an R or C bit in the 2nd-level tables;
|
||||
* if the relevant bits aren't already set in the linux
|
||||
* page tables, fall through to do the gup_fast to
|
||||
* set them in the linux page tables too.
|
||||
*/
|
||||
ok = 0;
|
||||
pgflags = _PAGE_ACCESSED;
|
||||
if (writing)
|
||||
pgflags |= _PAGE_DIRTY;
|
||||
local_irq_save(flags);
|
||||
ptep = find_current_mm_pte(current->mm->pgd, hva, NULL, NULL);
|
||||
if (ptep) {
|
||||
pte = READ_ONCE(*ptep);
|
||||
if (pte_present(pte) &&
|
||||
(pte_val(pte) & pgflags) == pgflags)
|
||||
ok = 1;
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
if (ok) {
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) {
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
return RESUME_GUEST;
|
||||
}
|
||||
/*
|
||||
* We are walking the secondary page table here. We can do this
|
||||
* without disabling irq.
|
||||
*/
|
||||
ptep = __find_linux_pte(kvm->arch.pgtable,
|
||||
gpa, NULL, &shift);
|
||||
if (ptep && pte_present(*ptep)) {
|
||||
kvmppc_radix_update_pte(kvm, ptep, 0, pgflags,
|
||||
gpa, shift);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
return RESUME_GUEST;
|
||||
}
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
|
||||
pfn = page_to_pfn(page);
|
||||
upgrade_write = true;
|
||||
} else {
|
||||
/* Call KVM generic code to do the slow-path check */
|
||||
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
|
||||
writing, upgrade_p);
|
||||
if (is_error_noslot_pfn(pfn))
|
||||
return -EFAULT;
|
||||
page = NULL;
|
||||
if (pfn_valid(pfn)) {
|
||||
page = pfn_to_page(pfn);
|
||||
if (PageReserved(page))
|
||||
page = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
ret = -EFAULT;
|
||||
pfn = 0;
|
||||
pte_size = PAGE_SIZE;
|
||||
pgflags = _PAGE_READ | _PAGE_EXEC;
|
||||
/* See if we can insert a 1GB or 2MB large PTE here */
|
||||
level = 0;
|
||||
npages = get_user_pages_fast(hva, 1, writing, pages);
|
||||
if (npages < 1) {
|
||||
/* Check if it's an I/O mapping */
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
vma = find_vma(current->mm, hva);
|
||||
if (vma && vma->vm_start <= hva && hva < vma->vm_end &&
|
||||
(vma->vm_flags & VM_PFNMAP)) {
|
||||
pfn = vma->vm_pgoff +
|
||||
((hva - vma->vm_start) >> PAGE_SHIFT);
|
||||
pgflags = pgprot_val(vma->vm_page_prot);
|
||||
}
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
if (!pfn)
|
||||
return -EFAULT;
|
||||
} else {
|
||||
page = pages[0];
|
||||
pfn = page_to_pfn(page);
|
||||
if (PageHuge(page)) {
|
||||
page = compound_head(page);
|
||||
pte_size <<= compound_order(page);
|
||||
/* See if we can insert a 2MB large-page PTE here */
|
||||
if (pte_size >= PMD_SIZE &&
|
||||
(gpa & PMD_MASK & PAGE_MASK) ==
|
||||
(hva & PMD_MASK & PAGE_MASK)) {
|
||||
level = 1;
|
||||
pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
|
||||
}
|
||||
}
|
||||
/* See if we can provide write access */
|
||||
if (writing) {
|
||||
/*
|
||||
* We assume gup_fast has set dirty on the host PTE.
|
||||
*/
|
||||
pgflags |= _PAGE_WRITE;
|
||||
} else {
|
||||
local_irq_save(flags);
|
||||
ptep = find_current_mm_pte(current->mm->pgd,
|
||||
hva, NULL, NULL);
|
||||
if (ptep && pte_write(*ptep) && pte_dirty(*ptep))
|
||||
pgflags |= _PAGE_WRITE;
|
||||
local_irq_restore(flags);
|
||||
if (page && PageCompound(page)) {
|
||||
pte_size = PAGE_SIZE << compound_order(compound_head(page));
|
||||
if (pte_size >= PUD_SIZE &&
|
||||
(gpa & (PUD_SIZE - PAGE_SIZE)) ==
|
||||
(hva & (PUD_SIZE - PAGE_SIZE))) {
|
||||
level = 2;
|
||||
pfn &= ~((PUD_SIZE >> PAGE_SHIFT) - 1);
|
||||
} else if (pte_size >= PMD_SIZE &&
|
||||
(gpa & (PMD_SIZE - PAGE_SIZE)) ==
|
||||
(hva & (PMD_SIZE - PAGE_SIZE))) {
|
||||
level = 1;
|
||||
pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute the PTE value that we need to insert.
|
||||
*/
|
||||
pgflags |= _PAGE_PRESENT | _PAGE_PTE | _PAGE_ACCESSED;
|
||||
if (pgflags & _PAGE_WRITE)
|
||||
pgflags |= _PAGE_DIRTY;
|
||||
pte = pfn_pte(pfn, __pgprot(pgflags));
|
||||
if (page) {
|
||||
pgflags = _PAGE_READ | _PAGE_EXEC | _PAGE_PRESENT | _PAGE_PTE |
|
||||
_PAGE_ACCESSED;
|
||||
if (writing || upgrade_write)
|
||||
pgflags |= _PAGE_WRITE | _PAGE_DIRTY;
|
||||
pte = pfn_pte(pfn, __pgprot(pgflags));
|
||||
} else {
|
||||
/*
|
||||
* Read the PTE from the process' radix tree and use that
|
||||
* so we get the attribute bits.
|
||||
*/
|
||||
local_irq_disable();
|
||||
ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
|
||||
pte = *ptep;
|
||||
local_irq_enable();
|
||||
if (shift == PUD_SHIFT &&
|
||||
(gpa & (PUD_SIZE - PAGE_SIZE)) ==
|
||||
(hva & (PUD_SIZE - PAGE_SIZE))) {
|
||||
level = 2;
|
||||
} else if (shift == PMD_SHIFT &&
|
||||
(gpa & (PMD_SIZE - PAGE_SIZE)) ==
|
||||
(hva & (PMD_SIZE - PAGE_SIZE))) {
|
||||
level = 1;
|
||||
} else if (shift && shift != PAGE_SHIFT) {
|
||||
/* Adjust PFN */
|
||||
unsigned long mask = (1ul << shift) - PAGE_SIZE;
|
||||
pte = __pte(pte_val(pte) | (hva & mask));
|
||||
}
|
||||
if (!(writing || upgrade_write))
|
||||
pte = __pte(pte_val(pte) & ~ _PAGE_WRITE);
|
||||
pte = __pte(pte_val(pte) | _PAGE_EXEC);
|
||||
}
|
||||
|
||||
/* Allocate space in the tree and write the PTE */
|
||||
ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
|
||||
if (ret == -EBUSY) {
|
||||
/*
|
||||
* There's already a PMD where wanted to install a large page;
|
||||
* for now, fall back to installing a small page.
|
||||
*/
|
||||
level = 0;
|
||||
pfn |= gfn & ((PMD_SIZE >> PAGE_SHIFT) - 1);
|
||||
pte = pfn_pte(pfn, __pgprot(pgflags));
|
||||
ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
|
||||
}
|
||||
if (ret == 0 || ret == -EAGAIN)
|
||||
ret = RESUME_GUEST;
|
||||
|
||||
if (page) {
|
||||
/*
|
||||
* We drop pages[0] here, not page because page might
|
||||
* have been set to the head page of a compound, but
|
||||
* we have to drop the reference on the correct tail
|
||||
* page to match the get inside gup()
|
||||
*/
|
||||
put_page(pages[0]);
|
||||
if (!ret && (pte_val(pte) & _PAGE_WRITE))
|
||||
set_page_dirty_lock(page);
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
if (ret == 0 || ret == -EAGAIN)
|
||||
ret = RESUME_GUEST;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -642,9 +738,13 @@ void kvmppc_free_radix(struct kvm *kvm)
|
||||
for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++pud) {
|
||||
if (!pud_present(*pud))
|
||||
continue;
|
||||
if (pud_huge(*pud)) {
|
||||
pud_clear(pud);
|
||||
continue;
|
||||
}
|
||||
pmd = pmd_offset(pud, 0);
|
||||
for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) {
|
||||
if (pmd_huge(*pmd)) {
|
||||
if (pmd_is_leaf(*pmd)) {
|
||||
pmd_clear(pmd);
|
||||
continue;
|
||||
}
|
||||
|
@ -450,7 +450,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
|
||||
|
||||
/*
|
||||
* Synchronize with the MMU notifier callbacks in
|
||||
* book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.).
|
||||
* book3s_64_mmu_hv.c (kvm_unmap_hva_range_hv etc.).
|
||||
* While we have the rmap lock, code running on other CPUs
|
||||
* cannot finish unmapping the host real page that backs
|
||||
* this guest real page, so we are OK to access the host
|
||||
|
@ -2885,7 +2885,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
||||
*/
|
||||
trace_hardirqs_on();
|
||||
|
||||
guest_enter();
|
||||
guest_enter_irqoff();
|
||||
|
||||
srcu_idx = srcu_read_lock(&vc->kvm->srcu);
|
||||
|
||||
@ -2893,8 +2893,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
||||
|
||||
srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
|
||||
|
||||
guest_exit();
|
||||
|
||||
trace_hardirqs_off();
|
||||
set_irq_happened(trap);
|
||||
|
||||
@ -2937,6 +2935,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
||||
kvmppc_set_host_core(pcpu);
|
||||
|
||||
local_irq_enable();
|
||||
guest_exit();
|
||||
|
||||
/* Let secondaries go back to the offline loop */
|
||||
for (i = 0; i < controlled_threads; ++i) {
|
||||
@ -3656,15 +3655,17 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
|
||||
goto up_out;
|
||||
|
||||
psize = vma_kernel_pagesize(vma);
|
||||
porder = __ilog2(psize);
|
||||
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
|
||||
/* We can handle 4k, 64k or 16M pages in the VRMA */
|
||||
err = -EINVAL;
|
||||
if (!(psize == 0x1000 || psize == 0x10000 ||
|
||||
psize == 0x1000000))
|
||||
goto out_srcu;
|
||||
if (psize >= 0x1000000)
|
||||
psize = 0x1000000;
|
||||
else if (psize >= 0x10000)
|
||||
psize = 0x10000;
|
||||
else
|
||||
psize = 0x1000;
|
||||
porder = __ilog2(psize);
|
||||
|
||||
senc = slb_pgsize_encoding(psize);
|
||||
kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
|
||||
@ -4350,7 +4351,6 @@ static struct kvmppc_ops kvm_ops_hv = {
|
||||
.flush_memslot = kvmppc_core_flush_memslot_hv,
|
||||
.prepare_memory_region = kvmppc_core_prepare_memory_region_hv,
|
||||
.commit_memory_region = kvmppc_core_commit_memory_region_hv,
|
||||
.unmap_hva = kvm_unmap_hva_hv,
|
||||
.unmap_hva_range = kvm_unmap_hva_range_hv,
|
||||
.age_hva = kvm_age_hva_hv,
|
||||
.test_age_hva = kvm_test_age_hva_hv,
|
||||
|
@ -320,7 +320,6 @@ kvm_novcpu_exit:
|
||||
stw r12, STACK_SLOT_TRAP(r1)
|
||||
bl kvmhv_commence_exit
|
||||
nop
|
||||
lwz r12, STACK_SLOT_TRAP(r1)
|
||||
b kvmhv_switch_to_host
|
||||
|
||||
/*
|
||||
@ -1220,6 +1219,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
||||
|
||||
secondary_too_late:
|
||||
li r12, 0
|
||||
stw r12, STACK_SLOT_TRAP(r1)
|
||||
cmpdi r4, 0
|
||||
beq 11f
|
||||
stw r12, VCPU_TRAP(r4)
|
||||
@ -1558,12 +1558,12 @@ mc_cont:
|
||||
3: stw r5,VCPU_SLB_MAX(r9)
|
||||
|
||||
guest_bypass:
|
||||
stw r12, STACK_SLOT_TRAP(r1)
|
||||
mr r3, r12
|
||||
/* Increment exit count, poke other threads to exit */
|
||||
bl kvmhv_commence_exit
|
||||
nop
|
||||
ld r9, HSTATE_KVM_VCPU(r13)
|
||||
lwz r12, VCPU_TRAP(r9)
|
||||
|
||||
/* Stop others sending VCPU interrupts to this physical CPU */
|
||||
li r0, -1
|
||||
@ -1898,6 +1898,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
|
||||
* POWER7/POWER8 guest -> host partition switch code.
|
||||
* We don't have to lock against tlbies but we do
|
||||
* have to coordinate the hardware threads.
|
||||
* Here STACK_SLOT_TRAP(r1) contains the trap number.
|
||||
*/
|
||||
kvmhv_switch_to_host:
|
||||
/* Secondary threads wait for primary to do partition switch */
|
||||
@ -1950,12 +1951,12 @@ BEGIN_FTR_SECTION
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
||||
|
||||
/* If HMI, call kvmppc_realmode_hmi_handler() */
|
||||
lwz r12, STACK_SLOT_TRAP(r1)
|
||||
cmpwi r12, BOOK3S_INTERRUPT_HMI
|
||||
bne 27f
|
||||
bl kvmppc_realmode_hmi_handler
|
||||
nop
|
||||
cmpdi r3, 0
|
||||
li r12, BOOK3S_INTERRUPT_HMI
|
||||
/*
|
||||
* At this point kvmppc_realmode_hmi_handler may have resync-ed
|
||||
* the TB, and if it has, we must not subtract the guest timebase
|
||||
@ -2008,10 +2009,8 @@ BEGIN_FTR_SECTION
|
||||
lwz r8, KVM_SPLIT_DO_RESTORE(r3)
|
||||
cmpwi r8, 0
|
||||
beq 47f
|
||||
stw r12, STACK_SLOT_TRAP(r1)
|
||||
bl kvmhv_p9_restore_lpcr
|
||||
nop
|
||||
lwz r12, STACK_SLOT_TRAP(r1)
|
||||
b 48f
|
||||
47:
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
||||
@ -2049,6 +2048,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
|
||||
li r0, KVM_GUEST_MODE_NONE
|
||||
stb r0, HSTATE_IN_GUEST(r13)
|
||||
|
||||
lwz r12, STACK_SLOT_TRAP(r1) /* return trap # in r12 */
|
||||
ld r0, SFS+PPC_LR_STKOFF(r1)
|
||||
addi r1, r1, SFS
|
||||
mtlr r0
|
||||
|
@ -277,15 +277,6 @@ static void do_kvm_unmap_hva(struct kvm *kvm, unsigned long start,
|
||||
}
|
||||
}
|
||||
|
||||
static int kvm_unmap_hva_pr(struct kvm *kvm, unsigned long hva)
|
||||
{
|
||||
trace_kvm_unmap_hva(hva);
|
||||
|
||||
do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
@ -1773,7 +1764,6 @@ static struct kvmppc_ops kvm_ops_pr = {
|
||||
.flush_memslot = kvmppc_core_flush_memslot_pr,
|
||||
.prepare_memory_region = kvmppc_core_prepare_memory_region_pr,
|
||||
.commit_memory_region = kvmppc_core_commit_memory_region_pr,
|
||||
.unmap_hva = kvm_unmap_hva_pr,
|
||||
.unmap_hva_range = kvm_unmap_hva_range_pr,
|
||||
.age_hva = kvm_age_hva_pr,
|
||||
.test_age_hva = kvm_test_age_hva_pr,
|
||||
|
@ -724,7 +724,7 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
|
||||
|
||||
/************* MMU Notifiers *************/
|
||||
|
||||
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
|
||||
static int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
|
||||
{
|
||||
trace_kvm_unmap_hva(hva);
|
||||
|
||||
|
@ -1345,7 +1345,7 @@ static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu,
|
||||
int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
unsigned int rt, int is_default_endian)
|
||||
{
|
||||
enum emulation_result emulated;
|
||||
enum emulation_result emulated = EMULATE_DONE;
|
||||
|
||||
while (vcpu->arch.mmio_vmx_copy_nums) {
|
||||
emulated = __kvmppc_handle_load(run, vcpu, rt, 8,
|
||||
@ -1608,7 +1608,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
|
||||
kvm_sigset_deactivate(vcpu);
|
||||
|
||||
#ifdef CONFIG_ALTIVEC
|
||||
out:
|
||||
#endif
|
||||
vcpu_put(vcpu);
|
||||
return r;
|
||||
}
|
||||
|
@ -254,21 +254,6 @@ TRACE_EVENT(kvm_exit,
|
||||
)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_unmap_hva,
|
||||
TP_PROTO(unsigned long hva),
|
||||
TP_ARGS(hva),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned long, hva )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->hva = hva;
|
||||
),
|
||||
|
||||
TP_printk("unmap hva 0x%lx\n", __entry->hva)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_KVM_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
Loading…
Reference in New Issue
Block a user