iommu/iova: Extend rbtree node caching
The cached node mechanism provides a significant performance benefit for allocations using a 32-bit DMA mask, but in the case of non-PCI devices or where the 32-bit space is full, the loss of this benefit can be significant - on large systems there can be many thousands of entries in the tree, such that walking all the way down to find free space every time becomes increasingly awful. Maintain a similar cached node for the whole IOVA space as a superset of the 32-bit space so that performance can remain much more consistent. Inspired by work by Zhen Lei <thunder.leizhen@huawei.com>. Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Tested-by: Zhen Lei <thunder.leizhen@huawei.com> Tested-by: Nate Watterson <nwatters@codeaurora.org> Signed-off-by: Robin Murphy <robin.murphy@arm.com> Signed-off-by: Joerg Roedel <jroedel@suse.de>
This commit is contained in:
parent
086c83acb7
commit
e60aa7b538
@ -48,6 +48,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule,
|
|||||||
|
|
||||||
spin_lock_init(&iovad->iova_rbtree_lock);
|
spin_lock_init(&iovad->iova_rbtree_lock);
|
||||||
iovad->rbroot = RB_ROOT;
|
iovad->rbroot = RB_ROOT;
|
||||||
|
iovad->cached_node = NULL;
|
||||||
iovad->cached32_node = NULL;
|
iovad->cached32_node = NULL;
|
||||||
iovad->granule = granule;
|
iovad->granule = granule;
|
||||||
iovad->start_pfn = start_pfn;
|
iovad->start_pfn = start_pfn;
|
||||||
@ -110,48 +111,44 @@ EXPORT_SYMBOL_GPL(init_iova_flush_queue);
|
|||||||
static struct rb_node *
|
static struct rb_node *
|
||||||
__get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn)
|
__get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn)
|
||||||
{
|
{
|
||||||
if ((*limit_pfn > iovad->dma_32bit_pfn) ||
|
struct rb_node *cached_node = NULL;
|
||||||
(iovad->cached32_node == NULL))
|
struct iova *curr_iova;
|
||||||
|
|
||||||
|
if (*limit_pfn <= iovad->dma_32bit_pfn)
|
||||||
|
cached_node = iovad->cached32_node;
|
||||||
|
if (!cached_node)
|
||||||
|
cached_node = iovad->cached_node;
|
||||||
|
if (!cached_node)
|
||||||
return rb_last(&iovad->rbroot);
|
return rb_last(&iovad->rbroot);
|
||||||
else {
|
|
||||||
struct rb_node *prev_node = rb_prev(iovad->cached32_node);
|
curr_iova = rb_entry(cached_node, struct iova, node);
|
||||||
struct iova *curr_iova =
|
*limit_pfn = min(*limit_pfn, curr_iova->pfn_lo);
|
||||||
rb_entry(iovad->cached32_node, struct iova, node);
|
|
||||||
*limit_pfn = curr_iova->pfn_lo;
|
return rb_prev(cached_node);
|
||||||
return prev_node;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
__cached_rbnode_insert_update(struct iova_domain *iovad,
|
__cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new)
|
||||||
unsigned long limit_pfn, struct iova *new)
|
|
||||||
{
|
{
|
||||||
if (limit_pfn != iovad->dma_32bit_pfn)
|
if (new->pfn_hi < iovad->dma_32bit_pfn)
|
||||||
return;
|
|
||||||
iovad->cached32_node = &new->node;
|
iovad->cached32_node = &new->node;
|
||||||
|
else
|
||||||
|
iovad->cached_node = &new->node;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
__cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
|
__cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
|
||||||
{
|
{
|
||||||
struct iova *cached_iova;
|
struct iova *cached_iova;
|
||||||
struct rb_node *curr;
|
|
||||||
|
|
||||||
if (!iovad->cached32_node)
|
cached_iova = rb_entry(iovad->cached32_node, struct iova, node);
|
||||||
return;
|
if (free->pfn_hi < iovad->dma_32bit_pfn &&
|
||||||
curr = iovad->cached32_node;
|
iovad->cached32_node && free->pfn_lo >= cached_iova->pfn_lo)
|
||||||
cached_iova = rb_entry(curr, struct iova, node);
|
iovad->cached32_node = rb_next(&free->node);
|
||||||
|
|
||||||
if (free->pfn_lo >= cached_iova->pfn_lo) {
|
cached_iova = rb_entry(iovad->cached_node, struct iova, node);
|
||||||
struct rb_node *node = rb_next(&free->node);
|
if (iovad->cached_node && free->pfn_lo >= cached_iova->pfn_lo)
|
||||||
struct iova *iova = rb_entry(node, struct iova, node);
|
iovad->cached_node = rb_next(&free->node);
|
||||||
|
|
||||||
/* only cache if it's below 32bit pfn */
|
|
||||||
if (node && iova->pfn_lo < iovad->dma_32bit_pfn)
|
|
||||||
iovad->cached32_node = node;
|
|
||||||
else
|
|
||||||
iovad->cached32_node = NULL;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Insert the iova into domain rbtree by holding writer lock */
|
/* Insert the iova into domain rbtree by holding writer lock */
|
||||||
@ -188,7 +185,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
|
|||||||
{
|
{
|
||||||
struct rb_node *prev, *curr = NULL;
|
struct rb_node *prev, *curr = NULL;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
unsigned long saved_pfn, new_pfn;
|
unsigned long new_pfn;
|
||||||
unsigned long align_mask = ~0UL;
|
unsigned long align_mask = ~0UL;
|
||||||
|
|
||||||
if (size_aligned)
|
if (size_aligned)
|
||||||
@ -196,7 +193,6 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
|
|||||||
|
|
||||||
/* Walk the tree backwards */
|
/* Walk the tree backwards */
|
||||||
spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
|
spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
|
||||||
saved_pfn = limit_pfn;
|
|
||||||
curr = __get_cached_rbnode(iovad, &limit_pfn);
|
curr = __get_cached_rbnode(iovad, &limit_pfn);
|
||||||
prev = curr;
|
prev = curr;
|
||||||
while (curr) {
|
while (curr) {
|
||||||
@ -226,7 +222,7 @@ move_left:
|
|||||||
|
|
||||||
/* If we have 'prev', it's a valid place to start the insertion. */
|
/* If we have 'prev', it's a valid place to start the insertion. */
|
||||||
iova_insert_rbtree(&iovad->rbroot, new, prev);
|
iova_insert_rbtree(&iovad->rbroot, new, prev);
|
||||||
__cached_rbnode_insert_update(iovad, saved_pfn, new);
|
__cached_rbnode_insert_update(iovad, new);
|
||||||
|
|
||||||
spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
|
spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
|
||||||
|
|
||||||
|
@ -70,7 +70,8 @@ struct iova_fq {
|
|||||||
struct iova_domain {
|
struct iova_domain {
|
||||||
spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */
|
spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */
|
||||||
struct rb_root rbroot; /* iova domain rbtree root */
|
struct rb_root rbroot; /* iova domain rbtree root */
|
||||||
struct rb_node *cached32_node; /* Save last alloced node */
|
struct rb_node *cached_node; /* Save last alloced node */
|
||||||
|
struct rb_node *cached32_node; /* Save last 32-bit alloced node */
|
||||||
unsigned long granule; /* pfn granularity for this domain */
|
unsigned long granule; /* pfn granularity for this domain */
|
||||||
unsigned long start_pfn; /* Lower limit for this domain */
|
unsigned long start_pfn; /* Lower limit for this domain */
|
||||||
unsigned long dma_32bit_pfn;
|
unsigned long dma_32bit_pfn;
|
||||||
|
Loading…
Reference in New Issue
Block a user