virtio, vhost, balloon: bugfixes

A couple of last minute bugfixes. And a revert of a failed attempt at
 metadata access optimization - we'll try again in the next cycle.
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQEcBAABAgAGBQJdb6MSAAoJECgfDbjSjVRpWq0IALJqn4RTQJiFUg4pa6qV1Uxb
 DJtHCmYhW+m9VB+5gmKJ9ugFcBbdbSEy81kwrc6lKywTttevk+whrlRry49ufbMx
 htoRFGG4gm2RgmXNkV92RQwrz0ajtG0hjm3/Gaxi2OzOudpB4/DJnUcXJKEa2UvD
 qAH4n9SN6QXQ6zfU20EvNyA0++RwIkg9xx0r5IZ8eddOlS5tqFasr7TkMBr7Tj9V
 a1QkCVGfCDUBpthMwrOuJpYkTWf2vRyarqWUvxsJbFqyECossHIYM7EWGu8apFYW
 pbQbn8bXVNNJoA8ERmCkiptHQALK8qeONu0MOarnDVRXvGni4OHTuXJfMYyCkEY=
 =RqIf
 -----END PGP SIGNATURE-----

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio fixes from Michael Tsirkin:
 "virtio, vhost, and balloon bugfixes.

  A couple of last minute bugfixes. And a revert of a failed attempt at
  metadata access optimization - we'll try again in the next cycle"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
  mm/balloon_compaction: suppress allocation warnings
  Revert "vhost: access vq metadata through kernel virtual address"
  vhost: Remove unnecessary variable
  virtio-net: lower min ring num_free for efficiency
  vhost/test: fix build for vhost test
  vhost/test: fix build for vhost test
This commit is contained in:
Linus Torvalds 2019-09-06 08:56:06 -07:00
commit 9d098a6234
5 changed files with 17 additions and 562 deletions

View File

@ -1331,7 +1331,7 @@ static int virtnet_receive(struct receive_queue *rq, int budget,
} }
} }
if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) { if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {
if (!try_fill_recv(vi, rq, GFP_ATOMIC)) if (!try_fill_recv(vi, rq, GFP_ATOMIC))
schedule_delayed_work(&vi->refill, 0); schedule_delayed_work(&vi->refill, 0);
} }

View File

@ -22,6 +22,12 @@
* Using this limit prevents one virtqueue from starving others. */ * Using this limit prevents one virtqueue from starving others. */
#define VHOST_TEST_WEIGHT 0x80000 #define VHOST_TEST_WEIGHT 0x80000
/* Max number of packets transferred before requeueing the job.
* Using this limit prevents one virtqueue from starving others with
* pkts.
*/
#define VHOST_TEST_PKT_WEIGHT 256
enum { enum {
VHOST_TEST_VQ = 0, VHOST_TEST_VQ = 0,
VHOST_TEST_VQ_MAX = 1, VHOST_TEST_VQ_MAX = 1,
@ -80,11 +86,9 @@ static void handle_vq(struct vhost_test *n)
} }
vhost_add_used_and_signal(&n->dev, vq, head, 0); vhost_add_used_and_signal(&n->dev, vq, head, 0);
total_len += len; total_len += len;
if (unlikely(total_len >= VHOST_TEST_WEIGHT)) { if (unlikely(vhost_exceeds_weight(vq, 0, total_len)))
vhost_poll_queue(&vq->poll);
break; break;
} }
}
mutex_unlock(&vq->mutex); mutex_unlock(&vq->mutex);
} }
@ -115,7 +119,8 @@ static int vhost_test_open(struct inode *inode, struct file *f)
dev = &n->dev; dev = &n->dev;
vqs[VHOST_TEST_VQ] = &n->vqs[VHOST_TEST_VQ]; vqs[VHOST_TEST_VQ] = &n->vqs[VHOST_TEST_VQ];
n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick; n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick;
vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX); vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV,
VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT);
f->private_data = n; f->private_data = n;

View File

@ -203,7 +203,6 @@ EXPORT_SYMBOL_GPL(vhost_poll_init);
int vhost_poll_start(struct vhost_poll *poll, struct file *file) int vhost_poll_start(struct vhost_poll *poll, struct file *file)
{ {
__poll_t mask; __poll_t mask;
int ret = 0;
if (poll->wqh) if (poll->wqh)
return 0; return 0;
@ -213,10 +212,10 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file)
vhost_poll_wakeup(&poll->wait, 0, 0, poll_to_key(mask)); vhost_poll_wakeup(&poll->wait, 0, 0, poll_to_key(mask));
if (mask & EPOLLERR) { if (mask & EPOLLERR) {
vhost_poll_stop(poll); vhost_poll_stop(poll);
ret = -EINVAL; return -EINVAL;
} }
return ret; return 0;
} }
EXPORT_SYMBOL_GPL(vhost_poll_start); EXPORT_SYMBOL_GPL(vhost_poll_start);
@ -298,160 +297,6 @@ static void vhost_vq_meta_reset(struct vhost_dev *d)
__vhost_vq_meta_reset(d->vqs[i]); __vhost_vq_meta_reset(d->vqs[i]);
} }
#if VHOST_ARCH_CAN_ACCEL_UACCESS
static void vhost_map_unprefetch(struct vhost_map *map)
{
kfree(map->pages);
map->pages = NULL;
map->npages = 0;
map->addr = NULL;
}
static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq)
{
struct vhost_map *map[VHOST_NUM_ADDRS];
int i;
spin_lock(&vq->mmu_lock);
for (i = 0; i < VHOST_NUM_ADDRS; i++) {
map[i] = rcu_dereference_protected(vq->maps[i],
lockdep_is_held(&vq->mmu_lock));
if (map[i])
rcu_assign_pointer(vq->maps[i], NULL);
}
spin_unlock(&vq->mmu_lock);
synchronize_rcu();
for (i = 0; i < VHOST_NUM_ADDRS; i++)
if (map[i])
vhost_map_unprefetch(map[i]);
}
static void vhost_reset_vq_maps(struct vhost_virtqueue *vq)
{
int i;
vhost_uninit_vq_maps(vq);
for (i = 0; i < VHOST_NUM_ADDRS; i++)
vq->uaddrs[i].size = 0;
}
static bool vhost_map_range_overlap(struct vhost_uaddr *uaddr,
unsigned long start,
unsigned long end)
{
if (unlikely(!uaddr->size))
return false;
return !(end < uaddr->uaddr || start > uaddr->uaddr - 1 + uaddr->size);
}
static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
int index,
unsigned long start,
unsigned long end)
{
struct vhost_uaddr *uaddr = &vq->uaddrs[index];
struct vhost_map *map;
int i;
if (!vhost_map_range_overlap(uaddr, start, end))
return;
spin_lock(&vq->mmu_lock);
++vq->invalidate_count;
map = rcu_dereference_protected(vq->maps[index],
lockdep_is_held(&vq->mmu_lock));
if (map) {
if (uaddr->write) {
for (i = 0; i < map->npages; i++)
set_page_dirty(map->pages[i]);
}
rcu_assign_pointer(vq->maps[index], NULL);
}
spin_unlock(&vq->mmu_lock);
if (map) {
synchronize_rcu();
vhost_map_unprefetch(map);
}
}
static void vhost_invalidate_vq_end(struct vhost_virtqueue *vq,
int index,
unsigned long start,
unsigned long end)
{
if (!vhost_map_range_overlap(&vq->uaddrs[index], start, end))
return;
spin_lock(&vq->mmu_lock);
--vq->invalidate_count;
spin_unlock(&vq->mmu_lock);
}
static int vhost_invalidate_range_start(struct mmu_notifier *mn,
const struct mmu_notifier_range *range)
{
struct vhost_dev *dev = container_of(mn, struct vhost_dev,
mmu_notifier);
int i, j;
if (!mmu_notifier_range_blockable(range))
return -EAGAIN;
for (i = 0; i < dev->nvqs; i++) {
struct vhost_virtqueue *vq = dev->vqs[i];
for (j = 0; j < VHOST_NUM_ADDRS; j++)
vhost_invalidate_vq_start(vq, j,
range->start,
range->end);
}
return 0;
}
static void vhost_invalidate_range_end(struct mmu_notifier *mn,
const struct mmu_notifier_range *range)
{
struct vhost_dev *dev = container_of(mn, struct vhost_dev,
mmu_notifier);
int i, j;
for (i = 0; i < dev->nvqs; i++) {
struct vhost_virtqueue *vq = dev->vqs[i];
for (j = 0; j < VHOST_NUM_ADDRS; j++)
vhost_invalidate_vq_end(vq, j,
range->start,
range->end);
}
}
static const struct mmu_notifier_ops vhost_mmu_notifier_ops = {
.invalidate_range_start = vhost_invalidate_range_start,
.invalidate_range_end = vhost_invalidate_range_end,
};
static void vhost_init_maps(struct vhost_dev *dev)
{
struct vhost_virtqueue *vq;
int i, j;
dev->mmu_notifier.ops = &vhost_mmu_notifier_ops;
for (i = 0; i < dev->nvqs; ++i) {
vq = dev->vqs[i];
for (j = 0; j < VHOST_NUM_ADDRS; j++)
RCU_INIT_POINTER(vq->maps[j], NULL);
}
}
#endif
static void vhost_vq_reset(struct vhost_dev *dev, static void vhost_vq_reset(struct vhost_dev *dev,
struct vhost_virtqueue *vq) struct vhost_virtqueue *vq)
{ {
@ -480,11 +325,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vq->busyloop_timeout = 0; vq->busyloop_timeout = 0;
vq->umem = NULL; vq->umem = NULL;
vq->iotlb = NULL; vq->iotlb = NULL;
vq->invalidate_count = 0;
__vhost_vq_meta_reset(vq); __vhost_vq_meta_reset(vq);
#if VHOST_ARCH_CAN_ACCEL_UACCESS
vhost_reset_vq_maps(vq);
#endif
} }
static int vhost_worker(void *data) static int vhost_worker(void *data)
@ -634,9 +475,7 @@ void vhost_dev_init(struct vhost_dev *dev,
INIT_LIST_HEAD(&dev->read_list); INIT_LIST_HEAD(&dev->read_list);
INIT_LIST_HEAD(&dev->pending_list); INIT_LIST_HEAD(&dev->pending_list);
spin_lock_init(&dev->iotlb_lock); spin_lock_init(&dev->iotlb_lock);
#if VHOST_ARCH_CAN_ACCEL_UACCESS
vhost_init_maps(dev);
#endif
for (i = 0; i < dev->nvqs; ++i) { for (i = 0; i < dev->nvqs; ++i) {
vq = dev->vqs[i]; vq = dev->vqs[i];
@ -645,7 +484,6 @@ void vhost_dev_init(struct vhost_dev *dev,
vq->heads = NULL; vq->heads = NULL;
vq->dev = dev; vq->dev = dev;
mutex_init(&vq->mutex); mutex_init(&vq->mutex);
spin_lock_init(&vq->mmu_lock);
vhost_vq_reset(dev, vq); vhost_vq_reset(dev, vq);
if (vq->handle_kick) if (vq->handle_kick)
vhost_poll_init(&vq->poll, vq->handle_kick, vhost_poll_init(&vq->poll, vq->handle_kick,
@ -725,18 +563,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
if (err) if (err)
goto err_cgroup; goto err_cgroup;
#if VHOST_ARCH_CAN_ACCEL_UACCESS
err = mmu_notifier_register(&dev->mmu_notifier, dev->mm);
if (err)
goto err_mmu_notifier;
#endif
return 0; return 0;
#if VHOST_ARCH_CAN_ACCEL_UACCESS
err_mmu_notifier:
vhost_dev_free_iovecs(dev);
#endif
err_cgroup: err_cgroup:
kthread_stop(worker); kthread_stop(worker);
dev->worker = NULL; dev->worker = NULL;
@ -827,107 +654,6 @@ static void vhost_clear_msg(struct vhost_dev *dev)
spin_unlock(&dev->iotlb_lock); spin_unlock(&dev->iotlb_lock);
} }
#if VHOST_ARCH_CAN_ACCEL_UACCESS
static void vhost_setup_uaddr(struct vhost_virtqueue *vq,
int index, unsigned long uaddr,
size_t size, bool write)
{
struct vhost_uaddr *addr = &vq->uaddrs[index];
addr->uaddr = uaddr;
addr->size = size;
addr->write = write;
}
static void vhost_setup_vq_uaddr(struct vhost_virtqueue *vq)
{
vhost_setup_uaddr(vq, VHOST_ADDR_DESC,
(unsigned long)vq->desc,
vhost_get_desc_size(vq, vq->num),
false);
vhost_setup_uaddr(vq, VHOST_ADDR_AVAIL,
(unsigned long)vq->avail,
vhost_get_avail_size(vq, vq->num),
false);
vhost_setup_uaddr(vq, VHOST_ADDR_USED,
(unsigned long)vq->used,
vhost_get_used_size(vq, vq->num),
true);
}
static int vhost_map_prefetch(struct vhost_virtqueue *vq,
int index)
{
struct vhost_map *map;
struct vhost_uaddr *uaddr = &vq->uaddrs[index];
struct page **pages;
int npages = DIV_ROUND_UP(uaddr->size, PAGE_SIZE);
int npinned;
void *vaddr, *v;
int err;
int i;
spin_lock(&vq->mmu_lock);
err = -EFAULT;
if (vq->invalidate_count)
goto err;
err = -ENOMEM;
map = kmalloc(sizeof(*map), GFP_ATOMIC);
if (!map)
goto err;
pages = kmalloc_array(npages, sizeof(struct page *), GFP_ATOMIC);
if (!pages)
goto err_pages;
err = EFAULT;
npinned = __get_user_pages_fast(uaddr->uaddr, npages,
uaddr->write, pages);
if (npinned > 0)
release_pages(pages, npinned);
if (npinned != npages)
goto err_gup;
for (i = 0; i < npinned; i++)
if (PageHighMem(pages[i]))
goto err_gup;
vaddr = v = page_address(pages[0]);
/* For simplicity, fallback to userspace address if VA is not
* contigious.
*/
for (i = 1; i < npinned; i++) {
v += PAGE_SIZE;
if (v != page_address(pages[i]))
goto err_gup;
}
map->addr = vaddr + (uaddr->uaddr & (PAGE_SIZE - 1));
map->npages = npages;
map->pages = pages;
rcu_assign_pointer(vq->maps[index], map);
/* No need for a synchronize_rcu(). This function should be
* called by dev->worker so we are serialized with all
* readers.
*/
spin_unlock(&vq->mmu_lock);
return 0;
err_gup:
kfree(pages);
err_pages:
kfree(map);
err:
spin_unlock(&vq->mmu_lock);
return err;
}
#endif
void vhost_dev_cleanup(struct vhost_dev *dev) void vhost_dev_cleanup(struct vhost_dev *dev)
{ {
int i; int i;
@ -957,16 +683,8 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
kthread_stop(dev->worker); kthread_stop(dev->worker);
dev->worker = NULL; dev->worker = NULL;
} }
if (dev->mm) { if (dev->mm)
#if VHOST_ARCH_CAN_ACCEL_UACCESS
mmu_notifier_unregister(&dev->mmu_notifier, dev->mm);
#endif
mmput(dev->mm); mmput(dev->mm);
}
#if VHOST_ARCH_CAN_ACCEL_UACCESS
for (i = 0; i < dev->nvqs; i++)
vhost_uninit_vq_maps(dev->vqs[i]);
#endif
dev->mm = NULL; dev->mm = NULL;
} }
EXPORT_SYMBOL_GPL(vhost_dev_cleanup); EXPORT_SYMBOL_GPL(vhost_dev_cleanup);
@ -1195,26 +913,6 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
static inline int vhost_put_avail_event(struct vhost_virtqueue *vq) static inline int vhost_put_avail_event(struct vhost_virtqueue *vq)
{ {
#if VHOST_ARCH_CAN_ACCEL_UACCESS
struct vhost_map *map;
struct vring_used *used;
if (!vq->iotlb) {
rcu_read_lock();
map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
if (likely(map)) {
used = map->addr;
*((__virtio16 *)&used->ring[vq->num]) =
cpu_to_vhost16(vq, vq->avail_idx);
rcu_read_unlock();
return 0;
}
rcu_read_unlock();
}
#endif
return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx), return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx),
vhost_avail_event(vq)); vhost_avail_event(vq));
} }
@ -1223,27 +921,6 @@ static inline int vhost_put_used(struct vhost_virtqueue *vq,
struct vring_used_elem *head, int idx, struct vring_used_elem *head, int idx,
int count) int count)
{ {
#if VHOST_ARCH_CAN_ACCEL_UACCESS
struct vhost_map *map;
struct vring_used *used;
size_t size;
if (!vq->iotlb) {
rcu_read_lock();
map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
if (likely(map)) {
used = map->addr;
size = count * sizeof(*head);
memcpy(used->ring + idx, head, size);
rcu_read_unlock();
return 0;
}
rcu_read_unlock();
}
#endif
return vhost_copy_to_user(vq, vq->used->ring + idx, head, return vhost_copy_to_user(vq, vq->used->ring + idx, head,
count * sizeof(*head)); count * sizeof(*head));
} }
@ -1251,25 +928,6 @@ static inline int vhost_put_used(struct vhost_virtqueue *vq,
static inline int vhost_put_used_flags(struct vhost_virtqueue *vq) static inline int vhost_put_used_flags(struct vhost_virtqueue *vq)
{ {
#if VHOST_ARCH_CAN_ACCEL_UACCESS
struct vhost_map *map;
struct vring_used *used;
if (!vq->iotlb) {
rcu_read_lock();
map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
if (likely(map)) {
used = map->addr;
used->flags = cpu_to_vhost16(vq, vq->used_flags);
rcu_read_unlock();
return 0;
}
rcu_read_unlock();
}
#endif
return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags), return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags),
&vq->used->flags); &vq->used->flags);
} }
@ -1277,25 +935,6 @@ static inline int vhost_put_used_flags(struct vhost_virtqueue *vq)
static inline int vhost_put_used_idx(struct vhost_virtqueue *vq) static inline int vhost_put_used_idx(struct vhost_virtqueue *vq)
{ {
#if VHOST_ARCH_CAN_ACCEL_UACCESS
struct vhost_map *map;
struct vring_used *used;
if (!vq->iotlb) {
rcu_read_lock();
map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
if (likely(map)) {
used = map->addr;
used->idx = cpu_to_vhost16(vq, vq->last_used_idx);
rcu_read_unlock();
return 0;
}
rcu_read_unlock();
}
#endif
return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx), return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx),
&vq->used->idx); &vq->used->idx);
} }
@ -1341,50 +980,12 @@ static void vhost_dev_unlock_vqs(struct vhost_dev *d)
static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq, static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq,
__virtio16 *idx) __virtio16 *idx)
{ {
#if VHOST_ARCH_CAN_ACCEL_UACCESS
struct vhost_map *map;
struct vring_avail *avail;
if (!vq->iotlb) {
rcu_read_lock();
map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
if (likely(map)) {
avail = map->addr;
*idx = avail->idx;
rcu_read_unlock();
return 0;
}
rcu_read_unlock();
}
#endif
return vhost_get_avail(vq, *idx, &vq->avail->idx); return vhost_get_avail(vq, *idx, &vq->avail->idx);
} }
static inline int vhost_get_avail_head(struct vhost_virtqueue *vq, static inline int vhost_get_avail_head(struct vhost_virtqueue *vq,
__virtio16 *head, int idx) __virtio16 *head, int idx)
{ {
#if VHOST_ARCH_CAN_ACCEL_UACCESS
struct vhost_map *map;
struct vring_avail *avail;
if (!vq->iotlb) {
rcu_read_lock();
map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
if (likely(map)) {
avail = map->addr;
*head = avail->ring[idx & (vq->num - 1)];
rcu_read_unlock();
return 0;
}
rcu_read_unlock();
}
#endif
return vhost_get_avail(vq, *head, return vhost_get_avail(vq, *head,
&vq->avail->ring[idx & (vq->num - 1)]); &vq->avail->ring[idx & (vq->num - 1)]);
} }
@ -1392,98 +993,24 @@ static inline int vhost_get_avail_head(struct vhost_virtqueue *vq,
static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq, static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq,
__virtio16 *flags) __virtio16 *flags)
{ {
#if VHOST_ARCH_CAN_ACCEL_UACCESS
struct vhost_map *map;
struct vring_avail *avail;
if (!vq->iotlb) {
rcu_read_lock();
map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
if (likely(map)) {
avail = map->addr;
*flags = avail->flags;
rcu_read_unlock();
return 0;
}
rcu_read_unlock();
}
#endif
return vhost_get_avail(vq, *flags, &vq->avail->flags); return vhost_get_avail(vq, *flags, &vq->avail->flags);
} }
static inline int vhost_get_used_event(struct vhost_virtqueue *vq, static inline int vhost_get_used_event(struct vhost_virtqueue *vq,
__virtio16 *event) __virtio16 *event)
{ {
#if VHOST_ARCH_CAN_ACCEL_UACCESS
struct vhost_map *map;
struct vring_avail *avail;
if (!vq->iotlb) {
rcu_read_lock();
map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
if (likely(map)) {
avail = map->addr;
*event = (__virtio16)avail->ring[vq->num];
rcu_read_unlock();
return 0;
}
rcu_read_unlock();
}
#endif
return vhost_get_avail(vq, *event, vhost_used_event(vq)); return vhost_get_avail(vq, *event, vhost_used_event(vq));
} }
static inline int vhost_get_used_idx(struct vhost_virtqueue *vq, static inline int vhost_get_used_idx(struct vhost_virtqueue *vq,
__virtio16 *idx) __virtio16 *idx)
{ {
#if VHOST_ARCH_CAN_ACCEL_UACCESS
struct vhost_map *map;
struct vring_used *used;
if (!vq->iotlb) {
rcu_read_lock();
map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
if (likely(map)) {
used = map->addr;
*idx = used->idx;
rcu_read_unlock();
return 0;
}
rcu_read_unlock();
}
#endif
return vhost_get_used(vq, *idx, &vq->used->idx); return vhost_get_used(vq, *idx, &vq->used->idx);
} }
static inline int vhost_get_desc(struct vhost_virtqueue *vq, static inline int vhost_get_desc(struct vhost_virtqueue *vq,
struct vring_desc *desc, int idx) struct vring_desc *desc, int idx)
{ {
#if VHOST_ARCH_CAN_ACCEL_UACCESS
struct vhost_map *map;
struct vring_desc *d;
if (!vq->iotlb) {
rcu_read_lock();
map = rcu_dereference(vq->maps[VHOST_ADDR_DESC]);
if (likely(map)) {
d = map->addr;
*desc = *(d + idx);
rcu_read_unlock();
return 0;
}
rcu_read_unlock();
}
#endif
return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc)); return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc));
} }
@ -1824,32 +1351,12 @@ static bool iotlb_access_ok(struct vhost_virtqueue *vq,
return true; return true;
} }
#if VHOST_ARCH_CAN_ACCEL_UACCESS
static void vhost_vq_map_prefetch(struct vhost_virtqueue *vq)
{
struct vhost_map __rcu *map;
int i;
for (i = 0; i < VHOST_NUM_ADDRS; i++) {
rcu_read_lock();
map = rcu_dereference(vq->maps[i]);
rcu_read_unlock();
if (unlikely(!map))
vhost_map_prefetch(vq, i);
}
}
#endif
int vq_meta_prefetch(struct vhost_virtqueue *vq) int vq_meta_prefetch(struct vhost_virtqueue *vq)
{ {
unsigned int num = vq->num; unsigned int num = vq->num;
if (!vq->iotlb) { if (!vq->iotlb)
#if VHOST_ARCH_CAN_ACCEL_UACCESS
vhost_vq_map_prefetch(vq);
#endif
return 1; return 1;
}
return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc, return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc,
vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) && vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) &&
@ -2060,16 +1567,6 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d,
mutex_lock(&vq->mutex); mutex_lock(&vq->mutex);
#if VHOST_ARCH_CAN_ACCEL_UACCESS
/* Unregister MMU notifer to allow invalidation callback
* can access vq->uaddrs[] without holding a lock.
*/
if (d->mm)
mmu_notifier_unregister(&d->mmu_notifier, d->mm);
vhost_uninit_vq_maps(vq);
#endif
switch (ioctl) { switch (ioctl) {
case VHOST_SET_VRING_NUM: case VHOST_SET_VRING_NUM:
r = vhost_vring_set_num(d, vq, argp); r = vhost_vring_set_num(d, vq, argp);
@ -2081,13 +1578,6 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d,
BUG(); BUG();
} }
#if VHOST_ARCH_CAN_ACCEL_UACCESS
vhost_setup_vq_uaddr(vq);
if (d->mm)
mmu_notifier_register(&d->mmu_notifier, d->mm);
#endif
mutex_unlock(&vq->mutex); mutex_unlock(&vq->mutex);
return r; return r;

View File

@ -12,9 +12,6 @@
#include <linux/virtio_config.h> #include <linux/virtio_config.h>
#include <linux/virtio_ring.h> #include <linux/virtio_ring.h>
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/pagemap.h>
#include <linux/mmu_notifier.h>
#include <asm/cacheflush.h>
struct vhost_work; struct vhost_work;
typedef void (*vhost_work_fn_t)(struct vhost_work *work); typedef void (*vhost_work_fn_t)(struct vhost_work *work);
@ -83,24 +80,6 @@ enum vhost_uaddr_type {
VHOST_NUM_ADDRS = 3, VHOST_NUM_ADDRS = 3,
}; };
struct vhost_map {
int npages;
void *addr;
struct page **pages;
};
struct vhost_uaddr {
unsigned long uaddr;
size_t size;
bool write;
};
#if defined(CONFIG_MMU_NOTIFIER) && ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 0
#define VHOST_ARCH_CAN_ACCEL_UACCESS 0
#else
#define VHOST_ARCH_CAN_ACCEL_UACCESS 0
#endif
/* The virtqueue structure describes a queue attached to a device. */ /* The virtqueue structure describes a queue attached to a device. */
struct vhost_virtqueue { struct vhost_virtqueue {
struct vhost_dev *dev; struct vhost_dev *dev;
@ -111,22 +90,7 @@ struct vhost_virtqueue {
struct vring_desc __user *desc; struct vring_desc __user *desc;
struct vring_avail __user *avail; struct vring_avail __user *avail;
struct vring_used __user *used; struct vring_used __user *used;
#if VHOST_ARCH_CAN_ACCEL_UACCESS
/* Read by memory accessors, modified by meta data
* prefetching, MMU notifier and vring ioctl().
* Synchonrized through mmu_lock (writers) and RCU (writers
* and readers).
*/
struct vhost_map __rcu *maps[VHOST_NUM_ADDRS];
/* Read by MMU notifier, modified by vring ioctl(),
* synchronized through MMU notifier
* registering/unregistering.
*/
struct vhost_uaddr uaddrs[VHOST_NUM_ADDRS];
#endif
const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS]; const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
struct file *kick; struct file *kick;
struct eventfd_ctx *call_ctx; struct eventfd_ctx *call_ctx;
struct eventfd_ctx *error_ctx; struct eventfd_ctx *error_ctx;
@ -181,8 +145,6 @@ struct vhost_virtqueue {
bool user_be; bool user_be;
#endif #endif
u32 busyloop_timeout; u32 busyloop_timeout;
spinlock_t mmu_lock;
int invalidate_count;
}; };
struct vhost_msg_node { struct vhost_msg_node {
@ -196,9 +158,6 @@ struct vhost_msg_node {
struct vhost_dev { struct vhost_dev {
struct mm_struct *mm; struct mm_struct *mm;
#ifdef CONFIG_MMU_NOTIFIER
struct mmu_notifier mmu_notifier;
#endif
struct mutex mutex; struct mutex mutex;
struct vhost_virtqueue **vqs; struct vhost_virtqueue **vqs;
int nvqs; int nvqs;

View File

@ -124,7 +124,8 @@ EXPORT_SYMBOL_GPL(balloon_page_list_dequeue);
struct page *balloon_page_alloc(void) struct page *balloon_page_alloc(void)
{ {
struct page *page = alloc_page(balloon_mapping_gfp_mask() | struct page *page = alloc_page(balloon_mapping_gfp_mask() |
__GFP_NOMEMALLOC | __GFP_NORETRY); __GFP_NOMEMALLOC | __GFP_NORETRY |
__GFP_NOWARN);
return page; return page;
} }
EXPORT_SYMBOL_GPL(balloon_page_alloc); EXPORT_SYMBOL_GPL(balloon_page_alloc);