xen/grants: support allocating consecutive grants

For support of virtio via grant mappings in rare cases larger mappings
using consecutive grants are needed. Support those by adding a bitmap
of free grants.

As consecutive grants will be needed only in very rare cases (e.g. when
configuring a virtio device with a multi-page ring), optimize for the
normal case of non-consecutive allocations.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Link: https://lore.kernel.org/r/1654197833-25362-3-git-send-email-olekstysh@gmail.com
Signed-off-by: Juergen Gross <jgross@suse.com>
This commit is contained in:
Juergen Gross 2022-06-02 22:23:47 +03:00
parent 9bf22421dc
commit 02a9e681a3
2 changed files with 221 additions and 38 deletions

View File

@ -33,6 +33,7 @@
#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
#include <linux/bitmap.h>
#include <linux/memblock.h> #include <linux/memblock.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/mm.h> #include <linux/mm.h>
@ -70,9 +71,32 @@
static grant_ref_t **gnttab_list; static grant_ref_t **gnttab_list;
static unsigned int nr_grant_frames; static unsigned int nr_grant_frames;
/*
* Handling of free grants:
*
* Free grants are in a simple list anchored in gnttab_free_head. They are
* linked by grant ref, the last element contains GNTTAB_LIST_END. The number
* of free entries is stored in gnttab_free_count.
* Additionally there is a bitmap of free entries anchored in
* gnttab_free_bitmap. This is being used for simplifying allocation of
* multiple consecutive grants, which is needed e.g. for support of virtio.
* gnttab_last_free is used to add free entries of new frames at the end of
* the free list.
* gnttab_free_tail_ptr specifies the variable which references the start
* of consecutive free grants ending with gnttab_last_free. This pointer is
* updated in a rather defensive way, in order to avoid performance hits in
* hot paths.
* All those variables are protected by gnttab_list_lock.
*/
static int gnttab_free_count; static int gnttab_free_count;
static grant_ref_t gnttab_free_head; static unsigned int gnttab_size;
static grant_ref_t gnttab_free_head = GNTTAB_LIST_END;
static grant_ref_t gnttab_last_free = GNTTAB_LIST_END;
static grant_ref_t *gnttab_free_tail_ptr;
static unsigned long *gnttab_free_bitmap;
static DEFINE_SPINLOCK(gnttab_list_lock); static DEFINE_SPINLOCK(gnttab_list_lock);
struct grant_frames xen_auto_xlat_grant_frames; struct grant_frames xen_auto_xlat_grant_frames;
static unsigned int xen_gnttab_version; static unsigned int xen_gnttab_version;
module_param_named(version, xen_gnttab_version, uint, 0); module_param_named(version, xen_gnttab_version, uint, 0);
@ -168,16 +192,116 @@ static int get_free_entries(unsigned count)
ref = head = gnttab_free_head; ref = head = gnttab_free_head;
gnttab_free_count -= count; gnttab_free_count -= count;
while (count-- > 1) while (count--) {
head = gnttab_entry(head); bitmap_clear(gnttab_free_bitmap, head, 1);
if (gnttab_free_tail_ptr == __gnttab_entry(head))
gnttab_free_tail_ptr = &gnttab_free_head;
if (count)
head = gnttab_entry(head);
}
gnttab_free_head = gnttab_entry(head); gnttab_free_head = gnttab_entry(head);
gnttab_entry(head) = GNTTAB_LIST_END; gnttab_entry(head) = GNTTAB_LIST_END;
if (!gnttab_free_count) {
gnttab_last_free = GNTTAB_LIST_END;
gnttab_free_tail_ptr = NULL;
}
spin_unlock_irqrestore(&gnttab_list_lock, flags); spin_unlock_irqrestore(&gnttab_list_lock, flags);
return ref; return ref;
} }
static int get_seq_entry_count(void)
{
if (gnttab_last_free == GNTTAB_LIST_END || !gnttab_free_tail_ptr ||
*gnttab_free_tail_ptr == GNTTAB_LIST_END)
return 0;
return gnttab_last_free - *gnttab_free_tail_ptr + 1;
}
/* Rebuilds the free grant list and tries to find count consecutive entries. */
static int get_free_seq(unsigned int count)
{
int ret = -ENOSPC;
unsigned int from, to;
grant_ref_t *last;
gnttab_free_tail_ptr = &gnttab_free_head;
last = &gnttab_free_head;
for (from = find_first_bit(gnttab_free_bitmap, gnttab_size);
from < gnttab_size;
from = find_next_bit(gnttab_free_bitmap, gnttab_size, to + 1)) {
to = find_next_zero_bit(gnttab_free_bitmap, gnttab_size,
from + 1);
if (ret < 0 && to - from >= count) {
ret = from;
bitmap_clear(gnttab_free_bitmap, ret, count);
from += count;
gnttab_free_count -= count;
if (from == to)
continue;
}
/*
* Recreate the free list in order to have it properly sorted.
* This is needed to make sure that the free tail has the maximum
* possible size.
*/
while (from < to) {
*last = from;
last = __gnttab_entry(from);
gnttab_last_free = from;
from++;
}
if (to < gnttab_size)
gnttab_free_tail_ptr = __gnttab_entry(to - 1);
}
*last = GNTTAB_LIST_END;
if (gnttab_last_free != gnttab_size - 1)
gnttab_free_tail_ptr = NULL;
return ret;
}
static int get_free_entries_seq(unsigned int count)
{
unsigned long flags;
int ret = 0;
spin_lock_irqsave(&gnttab_list_lock, flags);
if (gnttab_free_count < count) {
ret = gnttab_expand(count - gnttab_free_count);
if (ret < 0)
goto out;
}
if (get_seq_entry_count() < count) {
ret = get_free_seq(count);
if (ret >= 0)
goto out;
ret = gnttab_expand(count - get_seq_entry_count());
if (ret < 0)
goto out;
}
ret = *gnttab_free_tail_ptr;
*gnttab_free_tail_ptr = gnttab_entry(ret + count - 1);
gnttab_free_count -= count;
if (!gnttab_free_count)
gnttab_free_tail_ptr = NULL;
bitmap_clear(gnttab_free_bitmap, ret, count);
out:
spin_unlock_irqrestore(&gnttab_list_lock, flags);
return ret;
}
static void do_free_callbacks(void) static void do_free_callbacks(void)
{ {
struct gnttab_free_callback *callback, *next; struct gnttab_free_callback *callback, *next;
@ -204,21 +328,51 @@ static inline void check_free_callbacks(void)
do_free_callbacks(); do_free_callbacks();
} }
static void put_free_entry_locked(grant_ref_t ref)
{
if (unlikely(ref < GNTTAB_NR_RESERVED_ENTRIES))
return;
gnttab_entry(ref) = gnttab_free_head;
gnttab_free_head = ref;
if (!gnttab_free_count)
gnttab_last_free = ref;
if (gnttab_free_tail_ptr == &gnttab_free_head)
gnttab_free_tail_ptr = __gnttab_entry(ref);
gnttab_free_count++;
bitmap_set(gnttab_free_bitmap, ref, 1);
}
static void put_free_entry(grant_ref_t ref) static void put_free_entry(grant_ref_t ref)
{ {
unsigned long flags; unsigned long flags;
if (unlikely(ref < GNTTAB_NR_RESERVED_ENTRIES))
return;
spin_lock_irqsave(&gnttab_list_lock, flags); spin_lock_irqsave(&gnttab_list_lock, flags);
gnttab_entry(ref) = gnttab_free_head; put_free_entry_locked(ref);
gnttab_free_head = ref;
gnttab_free_count++;
check_free_callbacks(); check_free_callbacks();
spin_unlock_irqrestore(&gnttab_list_lock, flags); spin_unlock_irqrestore(&gnttab_list_lock, flags);
} }
static void gnttab_set_free(unsigned int start, unsigned int n)
{
unsigned int i;
for (i = start; i < start + n - 1; i++)
gnttab_entry(i) = i + 1;
gnttab_entry(i) = GNTTAB_LIST_END;
if (!gnttab_free_count) {
gnttab_free_head = start;
gnttab_free_tail_ptr = &gnttab_free_head;
} else {
gnttab_entry(gnttab_last_free) = start;
}
gnttab_free_count += n;
gnttab_last_free = i;
bitmap_set(gnttab_free_bitmap, start, n);
}
/* /*
* Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2. * Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2.
* Introducing a valid entry into the grant table: * Introducing a valid entry into the grant table:
@ -450,23 +604,31 @@ void gnttab_free_grant_references(grant_ref_t head)
{ {
grant_ref_t ref; grant_ref_t ref;
unsigned long flags; unsigned long flags;
int count = 1;
if (head == GNTTAB_LIST_END)
return;
spin_lock_irqsave(&gnttab_list_lock, flags); spin_lock_irqsave(&gnttab_list_lock, flags);
ref = head; while (head != GNTTAB_LIST_END) {
while (gnttab_entry(ref) != GNTTAB_LIST_END) { ref = gnttab_entry(head);
ref = gnttab_entry(ref); put_free_entry_locked(head);
count++; head = ref;
} }
gnttab_entry(ref) = gnttab_free_head;
gnttab_free_head = head;
gnttab_free_count += count;
check_free_callbacks(); check_free_callbacks();
spin_unlock_irqrestore(&gnttab_list_lock, flags); spin_unlock_irqrestore(&gnttab_list_lock, flags);
} }
EXPORT_SYMBOL_GPL(gnttab_free_grant_references); EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
void gnttab_free_grant_reference_seq(grant_ref_t head, unsigned int count)
{
unsigned long flags;
unsigned int i;
spin_lock_irqsave(&gnttab_list_lock, flags);
for (i = count; i > 0; i--)
put_free_entry_locked(head + i - 1);
check_free_callbacks();
spin_unlock_irqrestore(&gnttab_list_lock, flags);
}
EXPORT_SYMBOL_GPL(gnttab_free_grant_reference_seq);
int gnttab_alloc_grant_references(u16 count, grant_ref_t *head) int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
{ {
int h = get_free_entries(count); int h = get_free_entries(count);
@ -480,6 +642,24 @@ int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
} }
EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references); EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
int gnttab_alloc_grant_reference_seq(unsigned int count, grant_ref_t *first)
{
int h;
if (count == 1)
h = get_free_entries(1);
else
h = get_free_entries_seq(count);
if (h < 0)
return -ENOSPC;
*first = h;
return 0;
}
EXPORT_SYMBOL_GPL(gnttab_alloc_grant_reference_seq);
int gnttab_empty_grant_references(const grant_ref_t *private_head) int gnttab_empty_grant_references(const grant_ref_t *private_head)
{ {
return (*private_head == GNTTAB_LIST_END); return (*private_head == GNTTAB_LIST_END);
@ -572,16 +752,13 @@ static int grow_gnttab_list(unsigned int more_frames)
goto grow_nomem; goto grow_nomem;
} }
gnttab_set_free(gnttab_size, extra_entries);
for (i = grefs_per_frame * nr_grant_frames; if (!gnttab_free_tail_ptr)
i < grefs_per_frame * new_nr_grant_frames - 1; i++) gnttab_free_tail_ptr = __gnttab_entry(gnttab_size);
gnttab_entry(i) = i + 1;
gnttab_entry(i) = gnttab_free_head;
gnttab_free_head = grefs_per_frame * nr_grant_frames;
gnttab_free_count += extra_entries;
nr_grant_frames = new_nr_grant_frames; nr_grant_frames = new_nr_grant_frames;
gnttab_size += extra_entries;
check_free_callbacks(); check_free_callbacks();
@ -1424,20 +1601,20 @@ static int gnttab_expand(unsigned int req_entries)
int gnttab_init(void) int gnttab_init(void)
{ {
int i; int i;
unsigned long max_nr_grant_frames; unsigned long max_nr_grant_frames, max_nr_grefs;
unsigned int max_nr_glist_frames, nr_glist_frames; unsigned int max_nr_glist_frames, nr_glist_frames;
unsigned int nr_init_grefs;
int ret; int ret;
gnttab_request_version(); gnttab_request_version();
max_nr_grant_frames = gnttab_max_grant_frames(); max_nr_grant_frames = gnttab_max_grant_frames();
max_nr_grefs = max_nr_grant_frames *
gnttab_interface->grefs_per_grant_frame;
nr_grant_frames = 1; nr_grant_frames = 1;
/* Determine the maximum number of frames required for the /* Determine the maximum number of frames required for the
* grant reference free list on the current hypervisor. * grant reference free list on the current hypervisor.
*/ */
max_nr_glist_frames = (max_nr_grant_frames * max_nr_glist_frames = max_nr_grefs / RPP;
gnttab_interface->grefs_per_grant_frame / RPP);
gnttab_list = kmalloc_array(max_nr_glist_frames, gnttab_list = kmalloc_array(max_nr_glist_frames,
sizeof(grant_ref_t *), sizeof(grant_ref_t *),
@ -1454,6 +1631,12 @@ int gnttab_init(void)
} }
} }
gnttab_free_bitmap = bitmap_zalloc(max_nr_grefs, GFP_KERNEL);
if (!gnttab_free_bitmap) {
ret = -ENOMEM;
goto ini_nomem;
}
ret = arch_gnttab_init(max_nr_grant_frames, ret = arch_gnttab_init(max_nr_grant_frames,
nr_status_frames(max_nr_grant_frames)); nr_status_frames(max_nr_grant_frames));
if (ret < 0) if (ret < 0)
@ -1464,15 +1647,10 @@ int gnttab_init(void)
goto ini_nomem; goto ini_nomem;
} }
nr_init_grefs = nr_grant_frames * gnttab_size = nr_grant_frames * gnttab_interface->grefs_per_grant_frame;
gnttab_interface->grefs_per_grant_frame;
for (i = GNTTAB_NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++) gnttab_set_free(GNTTAB_NR_RESERVED_ENTRIES,
gnttab_entry(i) = i + 1; gnttab_size - GNTTAB_NR_RESERVED_ENTRIES);
gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
gnttab_free_count = nr_init_grefs - GNTTAB_NR_RESERVED_ENTRIES;
gnttab_free_head = GNTTAB_NR_RESERVED_ENTRIES;
printk("Grant table initialized\n"); printk("Grant table initialized\n");
return 0; return 0;
@ -1481,6 +1659,7 @@ int gnttab_init(void)
for (i--; i >= 0; i--) for (i--; i >= 0; i--)
free_page((unsigned long)gnttab_list[i]); free_page((unsigned long)gnttab_list[i]);
kfree(gnttab_list); kfree(gnttab_list);
bitmap_free(gnttab_free_bitmap);
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(gnttab_init); EXPORT_SYMBOL_GPL(gnttab_init);

View File

@ -127,10 +127,14 @@ int gnttab_try_end_foreign_access(grant_ref_t ref);
*/ */
int gnttab_alloc_grant_references(u16 count, grant_ref_t *pprivate_head); int gnttab_alloc_grant_references(u16 count, grant_ref_t *pprivate_head);
int gnttab_alloc_grant_reference_seq(unsigned int count, grant_ref_t *first);
void gnttab_free_grant_reference(grant_ref_t ref); void gnttab_free_grant_reference(grant_ref_t ref);
void gnttab_free_grant_references(grant_ref_t head); void gnttab_free_grant_references(grant_ref_t head);
void gnttab_free_grant_reference_seq(grant_ref_t head, unsigned int count);
int gnttab_empty_grant_references(const grant_ref_t *pprivate_head); int gnttab_empty_grant_references(const grant_ref_t *pprivate_head);
int gnttab_claim_grant_reference(grant_ref_t *pprivate_head); int gnttab_claim_grant_reference(grant_ref_t *pprivate_head);