mirror of
https://github.com/torvalds/linux.git
synced 2024-11-27 06:31:52 +00:00
KVM: PPC: Avoid marking DMA-mapped pages dirty in real mode
At the moment the real mode handler of H_PUT_TCE calls iommu_tce_xchg_rm() which in turn reads the old TCE and if it was a valid entry, marks the physical page dirty if it was mapped for writing. Since it is in real mode, realmode_pfn_to_page() is used instead of pfn_to_page() to get the page struct. However SetPageDirty() itself reads the compound page head and returns a virtual address for the head page struct and setting dirty bit for that kills the system. This adds additional dirty bit tracking into the MM/IOMMU API for use in the real mode. Note that this does not change how VFIO and KVM (in virtual mode) set this bit. The KVM (real mode) changes include: - use the lowest bit of the cached host phys address to carry the dirty bit; - mark pages dirty when they are unpinned which happens when the preregistered memory is released which always happens in virtual mode; - add mm_iommu_ua_mark_dirty_rm() helper to set delayed dirty bit; - change iommu_tce_xchg_rm() to take the kvm struct for the mm to use in the new mm_iommu_ua_mark_dirty_rm() helper; - move iommu_tce_xchg_rm() to book3s_64_vio_hv.c (which is the only caller anyway) to reduce the real mode KVM and IOMMU knowledge across different subsystems. This removes realmode_pfn_to_page() as it is not used anymore. While we at it, remove some EXPORT_SYMBOL_GPL() as that code is for the real mode only and modules cannot call it anyway. Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
This commit is contained in:
parent
bdf7ffc899
commit
425333bf3a
@ -1051,7 +1051,6 @@ static inline void vmemmap_remove_mapping(unsigned long start,
|
||||
return hash__vmemmap_remove_mapping(start, page_size);
|
||||
}
|
||||
#endif
|
||||
struct page *realmode_pfn_to_page(unsigned long pfn);
|
||||
|
||||
static inline pte_t pmd_pte(pmd_t pmd)
|
||||
{
|
||||
|
@ -220,8 +220,6 @@ extern void iommu_del_device(struct device *dev);
|
||||
extern int __init tce_iommu_bus_notifier_init(void);
|
||||
extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
|
||||
unsigned long *hpa, enum dma_data_direction *direction);
|
||||
extern long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
|
||||
unsigned long *hpa, enum dma_data_direction *direction);
|
||||
#else
|
||||
static inline void iommu_register_group(struct iommu_table_group *table_group,
|
||||
int pci_domain_number,
|
||||
|
@ -38,6 +38,7 @@ extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
|
||||
unsigned long ua, unsigned int pageshift, unsigned long *hpa);
|
||||
extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
|
||||
unsigned long ua, unsigned int pageshift, unsigned long *hpa);
|
||||
extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua);
|
||||
extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
|
||||
extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
|
||||
#endif
|
||||
|
@ -1013,31 +1013,6 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_tce_xchg);
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
|
||||
unsigned long *hpa, enum dma_data_direction *direction)
|
||||
{
|
||||
long ret;
|
||||
|
||||
ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
|
||||
|
||||
if (!ret && ((*direction == DMA_FROM_DEVICE) ||
|
||||
(*direction == DMA_BIDIRECTIONAL))) {
|
||||
struct page *pg = realmode_pfn_to_page(*hpa >> PAGE_SHIFT);
|
||||
|
||||
if (likely(pg)) {
|
||||
SetPageDirty(pg);
|
||||
} else {
|
||||
tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
|
||||
ret = -EFAULT;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_tce_xchg_rm);
|
||||
#endif
|
||||
|
||||
int iommu_take_ownership(struct iommu_table *tbl)
|
||||
{
|
||||
unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
|
||||
|
@ -187,12 +187,35 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
|
||||
EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua);
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||
static void kvmppc_rm_clear_tce(struct iommu_table *tbl, unsigned long entry)
|
||||
static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
|
||||
unsigned long entry, unsigned long *hpa,
|
||||
enum dma_data_direction *direction)
|
||||
{
|
||||
long ret;
|
||||
|
||||
ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
|
||||
|
||||
if (!ret && ((*direction == DMA_FROM_DEVICE) ||
|
||||
(*direction == DMA_BIDIRECTIONAL))) {
|
||||
__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry);
|
||||
/*
|
||||
* kvmppc_rm_tce_iommu_do_map() updates the UA cache after
|
||||
* calling this so we still get here a valid UA.
|
||||
*/
|
||||
if (pua && *pua)
|
||||
mm_iommu_ua_mark_dirty_rm(mm, be64_to_cpu(*pua));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl,
|
||||
unsigned long entry)
|
||||
{
|
||||
unsigned long hpa = 0;
|
||||
enum dma_data_direction dir = DMA_NONE;
|
||||
|
||||
iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
|
||||
iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
|
||||
}
|
||||
|
||||
static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
|
||||
@ -224,7 +247,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm,
|
||||
unsigned long hpa = 0;
|
||||
long ret;
|
||||
|
||||
if (iommu_tce_xchg_rm(tbl, entry, &hpa, &dir))
|
||||
if (iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir))
|
||||
/*
|
||||
* real mode xchg can fail if struct page crosses
|
||||
* a page boundary
|
||||
@ -236,7 +259,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm,
|
||||
|
||||
ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry);
|
||||
if (ret)
|
||||
iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
|
||||
iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -282,7 +305,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
|
||||
if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
|
||||
return H_CLOSED;
|
||||
|
||||
ret = iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
|
||||
ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
|
||||
if (ret) {
|
||||
mm_iommu_mapped_dec(mem);
|
||||
/*
|
||||
@ -371,7 +394,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
|
||||
return ret;
|
||||
|
||||
WARN_ON_ONCE_RM(1);
|
||||
kvmppc_rm_clear_tce(stit->tbl, entry);
|
||||
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
|
||||
}
|
||||
|
||||
kvmppc_tce_put(stt, entry, tce);
|
||||
@ -520,7 +543,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
|
||||
goto unlock_exit;
|
||||
|
||||
WARN_ON_ONCE_RM(1);
|
||||
kvmppc_rm_clear_tce(stit->tbl, entry);
|
||||
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
|
||||
}
|
||||
|
||||
kvmppc_tce_put(stt, entry + i, tce);
|
||||
@ -571,7 +594,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
|
||||
return ret;
|
||||
|
||||
WARN_ON_ONCE_RM(1);
|
||||
kvmppc_rm_clear_tce(stit->tbl, entry);
|
||||
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -308,55 +308,6 @@ void register_page_bootmem_memmap(unsigned long section_nr,
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* We do not have access to the sparsemem vmemmap, so we fallback to
|
||||
* walking the list of sparsemem blocks which we already maintain for
|
||||
* the sake of crashdump. In the long run, we might want to maintain
|
||||
* a tree if performance of that linear walk becomes a problem.
|
||||
*
|
||||
* realmode_pfn_to_page functions can fail due to:
|
||||
* 1) As real sparsemem blocks do not lay in RAM continously (they
|
||||
* are in virtual address space which is not available in the real mode),
|
||||
* the requested page struct can be split between blocks so get_page/put_page
|
||||
* may fail.
|
||||
* 2) When huge pages are used, the get_page/put_page API will fail
|
||||
* in real mode as the linked addresses in the page struct are virtual
|
||||
* too.
|
||||
*/
|
||||
struct page *realmode_pfn_to_page(unsigned long pfn)
|
||||
{
|
||||
struct vmemmap_backing *vmem_back;
|
||||
struct page *page;
|
||||
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
|
||||
unsigned long pg_va = (unsigned long) pfn_to_page(pfn);
|
||||
|
||||
for (vmem_back = vmemmap_list; vmem_back; vmem_back = vmem_back->list) {
|
||||
if (pg_va < vmem_back->virt_addr)
|
||||
continue;
|
||||
|
||||
/* After vmemmap_list entry free is possible, need check all */
|
||||
if ((pg_va + sizeof(struct page)) <=
|
||||
(vmem_back->virt_addr + page_size)) {
|
||||
page = (struct page *) (vmem_back->phys + pg_va -
|
||||
vmem_back->virt_addr);
|
||||
return page;
|
||||
}
|
||||
}
|
||||
|
||||
/* Probably that page struct is split between real pages */
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
|
||||
|
||||
#else
|
||||
|
||||
struct page *realmode_pfn_to_page(unsigned long pfn)
|
||||
{
|
||||
struct page *page = pfn_to_page(pfn);
|
||||
return page;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
|
||||
|
||||
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
|
@ -18,11 +18,15 @@
|
||||
#include <linux/migrate.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/sizes.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/pte-walk.h>
|
||||
|
||||
static DEFINE_MUTEX(mem_list_mutex);
|
||||
|
||||
#define MM_IOMMU_TABLE_GROUP_PAGE_DIRTY 0x1
|
||||
#define MM_IOMMU_TABLE_GROUP_PAGE_MASK ~(SZ_4K - 1)
|
||||
|
||||
struct mm_iommu_table_group_mem_t {
|
||||
struct list_head next;
|
||||
struct rcu_head rcu;
|
||||
@ -263,6 +267,9 @@ static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
|
||||
if (!page)
|
||||
continue;
|
||||
|
||||
if (mem->hpas[i] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY)
|
||||
SetPageDirty(page);
|
||||
|
||||
put_page(page);
|
||||
mem->hpas[i] = 0;
|
||||
}
|
||||
@ -360,7 +367,6 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm,
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mm_iommu_lookup_rm);
|
||||
|
||||
struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
|
||||
unsigned long ua, unsigned long entries)
|
||||
@ -390,7 +396,7 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
|
||||
if (pageshift > mem->pageshift)
|
||||
return -EFAULT;
|
||||
|
||||
*hpa = *va | (ua & ~PAGE_MASK);
|
||||
*hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -413,11 +419,31 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
|
||||
if (!pa)
|
||||
return -EFAULT;
|
||||
|
||||
*hpa = *pa | (ua & ~PAGE_MASK);
|
||||
*hpa = (*pa & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa_rm);
|
||||
|
||||
extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua)
|
||||
{
|
||||
struct mm_iommu_table_group_mem_t *mem;
|
||||
long entry;
|
||||
void *va;
|
||||
unsigned long *pa;
|
||||
|
||||
mem = mm_iommu_lookup_rm(mm, ua, PAGE_SIZE);
|
||||
if (!mem)
|
||||
return;
|
||||
|
||||
entry = (ua - mem->ua) >> PAGE_SHIFT;
|
||||
va = &mem->hpas[entry];
|
||||
|
||||
pa = (void *) vmalloc_to_phys(va);
|
||||
if (!pa)
|
||||
return;
|
||||
|
||||
*pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY;
|
||||
}
|
||||
|
||||
long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user