vfio/type1: Batch page pinning
Pinning one 4K page at a time is inefficient, so do it in batches of 512 instead. This is just an optimization with no functional change intended, and in particular the driver still calls iommu_map() with the largest physically contiguous range possible. Add two fields in vfio_batch to remember where to start between calls to vfio_pin_pages_remote(), and use vfio_batch_unpin() to handle remaining pages in the batch in case of error. qemu pins pages for guests around 8% faster on my test system, a two-node Broadwell server with 128G memory per node. The qemu process was bound to one node with its allocations constrained there as well. base test guest ---------------- ---------------- mem (GB) speedup avg sec (std) avg sec (std) 1 7.4% 0.61 (0.00) 0.56 (0.00) 2 8.3% 0.93 (0.00) 0.85 (0.00) 4 8.4% 1.46 (0.00) 1.34 (0.00) 8 8.6% 2.54 (0.01) 2.32 (0.00) 16 8.3% 4.66 (0.00) 4.27 (0.01) 32 8.3% 8.94 (0.01) 8.20 (0.01) 64 8.2% 17.47 (0.01) 16.04 (0.03) 120 8.5% 32.45 (0.13) 29.69 (0.01) perf diff confirms less time spent in pup. Here are the top ten functions: Baseline Delta Abs Symbol 78.63% +6.64% clear_page_erms 1.50% -1.50% __gup_longterm_locked 1.27% -0.78% __get_user_pages +0.76% kvm_zap_rmapp.constprop.0 0.54% -0.53% vmacache_find 0.55% -0.51% get_pfnblock_flags_mask 0.48% -0.48% __get_user_pages_remote +0.39% slot_rmap_walk_next +0.32% vfio_pin_map_dma +0.26% kvm_handle_hva_range ... Suggested-by: Matthew Wilcox (Oracle) <willy@infradead.org> Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
This commit is contained in:
parent
4b6c33b322
commit
4d83de6da2
@ -107,6 +107,8 @@ struct vfio_batch {
|
|||||||
struct page **pages; /* for pin_user_pages_remote */
|
struct page **pages; /* for pin_user_pages_remote */
|
||||||
struct page *fallback_page; /* if pages alloc fails */
|
struct page *fallback_page; /* if pages alloc fails */
|
||||||
int capacity; /* length of pages array */
|
int capacity; /* length of pages array */
|
||||||
|
int size; /* of batch currently */
|
||||||
|
int offset; /* of next entry in pages */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct vfio_group {
|
struct vfio_group {
|
||||||
@ -469,6 +471,9 @@ static int put_pfn(unsigned long pfn, int prot)
|
|||||||
|
|
||||||
static void vfio_batch_init(struct vfio_batch *batch)
|
static void vfio_batch_init(struct vfio_batch *batch)
|
||||||
{
|
{
|
||||||
|
batch->size = 0;
|
||||||
|
batch->offset = 0;
|
||||||
|
|
||||||
if (unlikely(disable_hugepages))
|
if (unlikely(disable_hugepages))
|
||||||
goto fallback;
|
goto fallback;
|
||||||
|
|
||||||
@ -484,6 +489,17 @@ fallback:
|
|||||||
batch->capacity = 1;
|
batch->capacity = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void vfio_batch_unpin(struct vfio_batch *batch, struct vfio_dma *dma)
|
||||||
|
{
|
||||||
|
while (batch->size) {
|
||||||
|
unsigned long pfn = page_to_pfn(batch->pages[batch->offset]);
|
||||||
|
|
||||||
|
put_pfn(pfn, dma->prot);
|
||||||
|
batch->offset++;
|
||||||
|
batch->size--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void vfio_batch_fini(struct vfio_batch *batch)
|
static void vfio_batch_fini(struct vfio_batch *batch)
|
||||||
{
|
{
|
||||||
if (batch->capacity == VFIO_BATCH_MAX_CAPACITY)
|
if (batch->capacity == VFIO_BATCH_MAX_CAPACITY)
|
||||||
@ -635,65 +651,88 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
|
|||||||
long npage, unsigned long *pfn_base,
|
long npage, unsigned long *pfn_base,
|
||||||
unsigned long limit, struct vfio_batch *batch)
|
unsigned long limit, struct vfio_batch *batch)
|
||||||
{
|
{
|
||||||
unsigned long pfn = 0;
|
unsigned long pfn;
|
||||||
|
struct mm_struct *mm = current->mm;
|
||||||
long ret, pinned = 0, lock_acct = 0;
|
long ret, pinned = 0, lock_acct = 0;
|
||||||
bool rsvd;
|
bool rsvd;
|
||||||
dma_addr_t iova = vaddr - dma->vaddr + dma->iova;
|
dma_addr_t iova = vaddr - dma->vaddr + dma->iova;
|
||||||
|
|
||||||
/* This code path is only user initiated */
|
/* This code path is only user initiated */
|
||||||
if (!current->mm)
|
if (!mm)
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
|
|
||||||
ret = vaddr_get_pfns(current->mm, vaddr, 1, dma->prot, pfn_base,
|
if (batch->size) {
|
||||||
batch->pages);
|
/* Leftover pages in batch from an earlier call. */
|
||||||
if (ret < 0)
|
*pfn_base = page_to_pfn(batch->pages[batch->offset]);
|
||||||
return ret;
|
pfn = *pfn_base;
|
||||||
|
rsvd = is_invalid_reserved_pfn(*pfn_base);
|
||||||
pinned++;
|
} else {
|
||||||
rsvd = is_invalid_reserved_pfn(*pfn_base);
|
*pfn_base = 0;
|
||||||
|
|
||||||
/*
|
|
||||||
* Reserved pages aren't counted against the user, externally pinned
|
|
||||||
* pages are already counted against the user.
|
|
||||||
*/
|
|
||||||
if (!rsvd && !vfio_find_vpfn(dma, iova)) {
|
|
||||||
if (!dma->lock_cap && current->mm->locked_vm + 1 > limit) {
|
|
||||||
put_pfn(*pfn_base, dma->prot);
|
|
||||||
pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
|
|
||||||
limit << PAGE_SHIFT);
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
lock_acct++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (unlikely(disable_hugepages))
|
while (npage) {
|
||||||
goto out;
|
if (!batch->size) {
|
||||||
|
/* Empty batch, so refill it. */
|
||||||
|
long req_pages = min_t(long, npage, batch->capacity);
|
||||||
|
|
||||||
/* Lock all the consecutive pages from pfn_base */
|
ret = vaddr_get_pfns(mm, vaddr, req_pages, dma->prot,
|
||||||
for (vaddr += PAGE_SIZE, iova += PAGE_SIZE; pinned < npage;
|
&pfn, batch->pages);
|
||||||
pinned++, vaddr += PAGE_SIZE, iova += PAGE_SIZE) {
|
if (ret < 0)
|
||||||
ret = vaddr_get_pfns(current->mm, vaddr, 1, dma->prot, &pfn,
|
|
||||||
batch->pages);
|
|
||||||
if (ret < 0)
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (pfn != *pfn_base + pinned ||
|
|
||||||
rsvd != is_invalid_reserved_pfn(pfn)) {
|
|
||||||
put_pfn(pfn, dma->prot);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!rsvd && !vfio_find_vpfn(dma, iova)) {
|
|
||||||
if (!dma->lock_cap &&
|
|
||||||
current->mm->locked_vm + lock_acct + 1 > limit) {
|
|
||||||
put_pfn(pfn, dma->prot);
|
|
||||||
pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
|
|
||||||
__func__, limit << PAGE_SHIFT);
|
|
||||||
ret = -ENOMEM;
|
|
||||||
goto unpin_out;
|
goto unpin_out;
|
||||||
|
|
||||||
|
batch->size = ret;
|
||||||
|
batch->offset = 0;
|
||||||
|
|
||||||
|
if (!*pfn_base) {
|
||||||
|
*pfn_base = pfn;
|
||||||
|
rsvd = is_invalid_reserved_pfn(*pfn_base);
|
||||||
}
|
}
|
||||||
lock_acct++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* pfn is preset for the first iteration of this inner loop and
|
||||||
|
* updated at the end to handle a VM_PFNMAP pfn. In that case,
|
||||||
|
* batch->pages isn't valid (there's no struct page), so allow
|
||||||
|
* batch->pages to be touched only when there's more than one
|
||||||
|
* pfn to check, which guarantees the pfns are from a
|
||||||
|
* !VM_PFNMAP vma.
|
||||||
|
*/
|
||||||
|
while (true) {
|
||||||
|
if (pfn != *pfn_base + pinned ||
|
||||||
|
rsvd != is_invalid_reserved_pfn(pfn))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reserved pages aren't counted against the user,
|
||||||
|
* externally pinned pages are already counted against
|
||||||
|
* the user.
|
||||||
|
*/
|
||||||
|
if (!rsvd && !vfio_find_vpfn(dma, iova)) {
|
||||||
|
if (!dma->lock_cap &&
|
||||||
|
mm->locked_vm + lock_acct + 1 > limit) {
|
||||||
|
pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
|
||||||
|
__func__, limit << PAGE_SHIFT);
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto unpin_out;
|
||||||
|
}
|
||||||
|
lock_acct++;
|
||||||
|
}
|
||||||
|
|
||||||
|
pinned++;
|
||||||
|
npage--;
|
||||||
|
vaddr += PAGE_SIZE;
|
||||||
|
iova += PAGE_SIZE;
|
||||||
|
batch->offset++;
|
||||||
|
batch->size--;
|
||||||
|
|
||||||
|
if (!batch->size)
|
||||||
|
break;
|
||||||
|
|
||||||
|
pfn = page_to_pfn(batch->pages[batch->offset]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unlikely(disable_hugepages))
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
out:
|
out:
|
||||||
@ -701,10 +740,11 @@ out:
|
|||||||
|
|
||||||
unpin_out:
|
unpin_out:
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
if (!rsvd) {
|
if (pinned && !rsvd) {
|
||||||
for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
|
for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
|
||||||
put_pfn(pfn, dma->prot);
|
put_pfn(pfn, dma->prot);
|
||||||
}
|
}
|
||||||
|
vfio_batch_unpin(batch, dma);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -1463,6 +1503,7 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma,
|
|||||||
if (ret) {
|
if (ret) {
|
||||||
vfio_unpin_pages_remote(dma, iova + dma->size, pfn,
|
vfio_unpin_pages_remote(dma, iova + dma->size, pfn,
|
||||||
npage, true);
|
npage, true);
|
||||||
|
vfio_batch_unpin(&batch, dma);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1726,11 +1767,13 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
|
|||||||
ret = iommu_map(domain->domain, iova, phys,
|
ret = iommu_map(domain->domain, iova, phys,
|
||||||
size, dma->prot | domain->prot);
|
size, dma->prot | domain->prot);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
if (!dma->iommu_mapped)
|
if (!dma->iommu_mapped) {
|
||||||
vfio_unpin_pages_remote(dma, iova,
|
vfio_unpin_pages_remote(dma, iova,
|
||||||
phys >> PAGE_SHIFT,
|
phys >> PAGE_SHIFT,
|
||||||
size >> PAGE_SHIFT,
|
size >> PAGE_SHIFT,
|
||||||
true);
|
true);
|
||||||
|
vfio_batch_unpin(&batch, dma);
|
||||||
|
}
|
||||||
goto unwind;
|
goto unwind;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user