From 93d2c4de8d8129b97ee1e1a222aedb0719d2fcd9 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 28 Aug 2019 13:36:59 +0800 Subject: [PATCH 1/6] vhost/test: fix build for vhost test Since below commit, callers need to specify the iov_limit in vhost_dev_init() explicitly. Fixes: b46a0bf78ad7 ("vhost: fix OOB in get_rx_bufs()") Cc: stable@vger.kernel.org Signed-off-by: Tiwei Bie Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vhost/test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index 9e90e969af55..ac4f762c4f65 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -115,7 +115,7 @@ static int vhost_test_open(struct inode *inode, struct file *f) dev = &n->dev; vqs[VHOST_TEST_VQ] = &n->vqs[VHOST_TEST_VQ]; n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick; - vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX); + vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV); f->private_data = n; From 264b563b8675771834419057cbe076c1a41fb666 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 28 Aug 2019 13:37:00 +0800 Subject: [PATCH 2/6] vhost/test: fix build for vhost test Since vhost_exceeds_weight() was introduced, callers need to specify the packet weight and byte weight in vhost_dev_init(). Note that, the packet weight isn't counted in this patch to keep the original behavior unchanged. Fixes: e82b9b0727ff ("vhost: introduce vhost_exceeds_weight()") Cc: stable@vger.kernel.org Signed-off-by: Tiwei Bie Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vhost/test.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index ac4f762c4f65..7804869c6a31 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -22,6 +22,12 @@ * Using this limit prevents one virtqueue from starving others. */ #define VHOST_TEST_WEIGHT 0x80000 +/* Max number of packets transferred before requeueing the job. + * Using this limit prevents one virtqueue from starving others with + * pkts. + */ +#define VHOST_TEST_PKT_WEIGHT 256 + enum { VHOST_TEST_VQ = 0, VHOST_TEST_VQ_MAX = 1, @@ -80,10 +86,8 @@ static void handle_vq(struct vhost_test *n) } vhost_add_used_and_signal(&n->dev, vq, head, 0); total_len += len; - if (unlikely(total_len >= VHOST_TEST_WEIGHT)) { - vhost_poll_queue(&vq->poll); + if (unlikely(vhost_exceeds_weight(vq, 0, total_len))) break; - } } mutex_unlock(&vq->mutex); @@ -115,7 +119,8 @@ static int vhost_test_open(struct inode *inode, struct file *f) dev = &n->dev; vqs[VHOST_TEST_VQ] = &n->vqs[VHOST_TEST_VQ]; n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick; - vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV); + vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV, + VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT); f->private_data = n; From 718be6bab26fb01fa0147a2b614a3eefd96a2957 Mon Sep 17 00:00:00 2001 From: ? jiang Date: Tue, 20 Aug 2019 02:51:23 +0000 Subject: [PATCH 3/6] virtio-net: lower min ring num_free for efficiency This change lowers ring buffer reclaim threshold from 1/2*queue to budget for better performance. According to our test with qemu + dpdk, packet dropping happens when the guest is not able to provide free buffer in avail ring timely with default 1/2*queue. The value in the patch has been tested and does show better performance. Test setup: iperf3 to generate packets to guest (total 30mins, pps 400k, UDP) avg packets drop before: 2842 avg packets drop after: 360(-87.3%) Further, current code suffers from a starvation problem: the amount of work done by try_fill_recv is not bounded by the budget parameter, thus (with large queues) once in a while userspace gets blocked for a long time while queue is being refilled. Trigger refills earlier to make sure the amount of work to do is limited. Signed-off-by: jiangkidd Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- drivers/net/virtio_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 4f3de0ac8b0b..ba98e0971b84 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1331,7 +1331,7 @@ static int virtnet_receive(struct receive_queue *rq, int budget, } } - if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) { + if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { if (!try_fill_recv(vi, rq, GFP_ATOMIC)) schedule_delayed_work(&vi->refill, 0); } From 896fc242bc1d261c1178838487a0a54b260625cc Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Tue, 20 Aug 2019 20:36:32 +0800 Subject: [PATCH 4/6] vhost: Remove unnecessary variable It is unnecessary to use ret variable to return the error code, just return the error code directly. Signed-off-by: Yunsheng Lin Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 0536f8526359..1ac9de250319 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -203,7 +203,6 @@ EXPORT_SYMBOL_GPL(vhost_poll_init); int vhost_poll_start(struct vhost_poll *poll, struct file *file) { __poll_t mask; - int ret = 0; if (poll->wqh) return 0; @@ -213,10 +212,10 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file) vhost_poll_wakeup(&poll->wait, 0, 0, poll_to_key(mask)); if (mask & EPOLLERR) { vhost_poll_stop(poll); - ret = -EINVAL; + return -EINVAL; } - return ret; + return 0; } EXPORT_SYMBOL_GPL(vhost_poll_start); From 3d2c7d37047557175fb41de044091050b5f0d73b Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sat, 10 Aug 2019 13:53:21 -0400 Subject: [PATCH 5/6] Revert "vhost: access vq metadata through kernel virtual address" This reverts commit 7f466032dc ("vhost: access vq metadata through kernel virtual address"). The commit caused a bunch of issues, and while commit 73f628ec9e ("vhost: disable metadata prefetch optimization") disabled the optimization it's not nice to keep lots of dead code around. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 515 +----------------------------------------- drivers/vhost/vhost.h | 41 ---- 2 files changed, 3 insertions(+), 553 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 1ac9de250319..5dc174ac8cac 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -297,160 +297,6 @@ static void vhost_vq_meta_reset(struct vhost_dev *d) __vhost_vq_meta_reset(d->vqs[i]); } -#if VHOST_ARCH_CAN_ACCEL_UACCESS -static void vhost_map_unprefetch(struct vhost_map *map) -{ - kfree(map->pages); - map->pages = NULL; - map->npages = 0; - map->addr = NULL; -} - -static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq) -{ - struct vhost_map *map[VHOST_NUM_ADDRS]; - int i; - - spin_lock(&vq->mmu_lock); - for (i = 0; i < VHOST_NUM_ADDRS; i++) { - map[i] = rcu_dereference_protected(vq->maps[i], - lockdep_is_held(&vq->mmu_lock)); - if (map[i]) - rcu_assign_pointer(vq->maps[i], NULL); - } - spin_unlock(&vq->mmu_lock); - - synchronize_rcu(); - - for (i = 0; i < VHOST_NUM_ADDRS; i++) - if (map[i]) - vhost_map_unprefetch(map[i]); - -} - -static void vhost_reset_vq_maps(struct vhost_virtqueue *vq) -{ - int i; - - vhost_uninit_vq_maps(vq); - for (i = 0; i < VHOST_NUM_ADDRS; i++) - vq->uaddrs[i].size = 0; -} - -static bool vhost_map_range_overlap(struct vhost_uaddr *uaddr, - unsigned long start, - unsigned long end) -{ - if (unlikely(!uaddr->size)) - return false; - - return !(end < uaddr->uaddr || start > uaddr->uaddr - 1 + uaddr->size); -} - -static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq, - int index, - unsigned long start, - unsigned long end) -{ - struct vhost_uaddr *uaddr = &vq->uaddrs[index]; - struct vhost_map *map; - int i; - - if (!vhost_map_range_overlap(uaddr, start, end)) - return; - - spin_lock(&vq->mmu_lock); - ++vq->invalidate_count; - - map = rcu_dereference_protected(vq->maps[index], - lockdep_is_held(&vq->mmu_lock)); - if (map) { - if (uaddr->write) { - for (i = 0; i < map->npages; i++) - set_page_dirty(map->pages[i]); - } - rcu_assign_pointer(vq->maps[index], NULL); - } - spin_unlock(&vq->mmu_lock); - - if (map) { - synchronize_rcu(); - vhost_map_unprefetch(map); - } -} - -static void vhost_invalidate_vq_end(struct vhost_virtqueue *vq, - int index, - unsigned long start, - unsigned long end) -{ - if (!vhost_map_range_overlap(&vq->uaddrs[index], start, end)) - return; - - spin_lock(&vq->mmu_lock); - --vq->invalidate_count; - spin_unlock(&vq->mmu_lock); -} - -static int vhost_invalidate_range_start(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) -{ - struct vhost_dev *dev = container_of(mn, struct vhost_dev, - mmu_notifier); - int i, j; - - if (!mmu_notifier_range_blockable(range)) - return -EAGAIN; - - for (i = 0; i < dev->nvqs; i++) { - struct vhost_virtqueue *vq = dev->vqs[i]; - - for (j = 0; j < VHOST_NUM_ADDRS; j++) - vhost_invalidate_vq_start(vq, j, - range->start, - range->end); - } - - return 0; -} - -static void vhost_invalidate_range_end(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) -{ - struct vhost_dev *dev = container_of(mn, struct vhost_dev, - mmu_notifier); - int i, j; - - for (i = 0; i < dev->nvqs; i++) { - struct vhost_virtqueue *vq = dev->vqs[i]; - - for (j = 0; j < VHOST_NUM_ADDRS; j++) - vhost_invalidate_vq_end(vq, j, - range->start, - range->end); - } -} - -static const struct mmu_notifier_ops vhost_mmu_notifier_ops = { - .invalidate_range_start = vhost_invalidate_range_start, - .invalidate_range_end = vhost_invalidate_range_end, -}; - -static void vhost_init_maps(struct vhost_dev *dev) -{ - struct vhost_virtqueue *vq; - int i, j; - - dev->mmu_notifier.ops = &vhost_mmu_notifier_ops; - - for (i = 0; i < dev->nvqs; ++i) { - vq = dev->vqs[i]; - for (j = 0; j < VHOST_NUM_ADDRS; j++) - RCU_INIT_POINTER(vq->maps[j], NULL); - } -} -#endif - static void vhost_vq_reset(struct vhost_dev *dev, struct vhost_virtqueue *vq) { @@ -479,11 +325,7 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->busyloop_timeout = 0; vq->umem = NULL; vq->iotlb = NULL; - vq->invalidate_count = 0; __vhost_vq_meta_reset(vq); -#if VHOST_ARCH_CAN_ACCEL_UACCESS - vhost_reset_vq_maps(vq); -#endif } static int vhost_worker(void *data) @@ -633,9 +475,7 @@ void vhost_dev_init(struct vhost_dev *dev, INIT_LIST_HEAD(&dev->read_list); INIT_LIST_HEAD(&dev->pending_list); spin_lock_init(&dev->iotlb_lock); -#if VHOST_ARCH_CAN_ACCEL_UACCESS - vhost_init_maps(dev); -#endif + for (i = 0; i < dev->nvqs; ++i) { vq = dev->vqs[i]; @@ -644,7 +484,6 @@ void vhost_dev_init(struct vhost_dev *dev, vq->heads = NULL; vq->dev = dev; mutex_init(&vq->mutex); - spin_lock_init(&vq->mmu_lock); vhost_vq_reset(dev, vq); if (vq->handle_kick) vhost_poll_init(&vq->poll, vq->handle_kick, @@ -724,18 +563,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev) if (err) goto err_cgroup; -#if VHOST_ARCH_CAN_ACCEL_UACCESS - err = mmu_notifier_register(&dev->mmu_notifier, dev->mm); - if (err) - goto err_mmu_notifier; -#endif - return 0; - -#if VHOST_ARCH_CAN_ACCEL_UACCESS -err_mmu_notifier: - vhost_dev_free_iovecs(dev); -#endif err_cgroup: kthread_stop(worker); dev->worker = NULL; @@ -826,107 +654,6 @@ static void vhost_clear_msg(struct vhost_dev *dev) spin_unlock(&dev->iotlb_lock); } -#if VHOST_ARCH_CAN_ACCEL_UACCESS -static void vhost_setup_uaddr(struct vhost_virtqueue *vq, - int index, unsigned long uaddr, - size_t size, bool write) -{ - struct vhost_uaddr *addr = &vq->uaddrs[index]; - - addr->uaddr = uaddr; - addr->size = size; - addr->write = write; -} - -static void vhost_setup_vq_uaddr(struct vhost_virtqueue *vq) -{ - vhost_setup_uaddr(vq, VHOST_ADDR_DESC, - (unsigned long)vq->desc, - vhost_get_desc_size(vq, vq->num), - false); - vhost_setup_uaddr(vq, VHOST_ADDR_AVAIL, - (unsigned long)vq->avail, - vhost_get_avail_size(vq, vq->num), - false); - vhost_setup_uaddr(vq, VHOST_ADDR_USED, - (unsigned long)vq->used, - vhost_get_used_size(vq, vq->num), - true); -} - -static int vhost_map_prefetch(struct vhost_virtqueue *vq, - int index) -{ - struct vhost_map *map; - struct vhost_uaddr *uaddr = &vq->uaddrs[index]; - struct page **pages; - int npages = DIV_ROUND_UP(uaddr->size, PAGE_SIZE); - int npinned; - void *vaddr, *v; - int err; - int i; - - spin_lock(&vq->mmu_lock); - - err = -EFAULT; - if (vq->invalidate_count) - goto err; - - err = -ENOMEM; - map = kmalloc(sizeof(*map), GFP_ATOMIC); - if (!map) - goto err; - - pages = kmalloc_array(npages, sizeof(struct page *), GFP_ATOMIC); - if (!pages) - goto err_pages; - - err = EFAULT; - npinned = __get_user_pages_fast(uaddr->uaddr, npages, - uaddr->write, pages); - if (npinned > 0) - release_pages(pages, npinned); - if (npinned != npages) - goto err_gup; - - for (i = 0; i < npinned; i++) - if (PageHighMem(pages[i])) - goto err_gup; - - vaddr = v = page_address(pages[0]); - - /* For simplicity, fallback to userspace address if VA is not - * contigious. - */ - for (i = 1; i < npinned; i++) { - v += PAGE_SIZE; - if (v != page_address(pages[i])) - goto err_gup; - } - - map->addr = vaddr + (uaddr->uaddr & (PAGE_SIZE - 1)); - map->npages = npages; - map->pages = pages; - - rcu_assign_pointer(vq->maps[index], map); - /* No need for a synchronize_rcu(). This function should be - * called by dev->worker so we are serialized with all - * readers. - */ - spin_unlock(&vq->mmu_lock); - - return 0; - -err_gup: - kfree(pages); -err_pages: - kfree(map); -err: - spin_unlock(&vq->mmu_lock); - return err; -} -#endif - void vhost_dev_cleanup(struct vhost_dev *dev) { int i; @@ -956,16 +683,8 @@ void vhost_dev_cleanup(struct vhost_dev *dev) kthread_stop(dev->worker); dev->worker = NULL; } - if (dev->mm) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - mmu_notifier_unregister(&dev->mmu_notifier, dev->mm); -#endif + if (dev->mm) mmput(dev->mm); - } -#if VHOST_ARCH_CAN_ACCEL_UACCESS - for (i = 0; i < dev->nvqs; i++) - vhost_uninit_vq_maps(dev->vqs[i]); -#endif dev->mm = NULL; } EXPORT_SYMBOL_GPL(vhost_dev_cleanup); @@ -1194,26 +913,6 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq, static inline int vhost_put_avail_event(struct vhost_virtqueue *vq) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_used *used; - - if (!vq->iotlb) { - rcu_read_lock(); - - map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); - if (likely(map)) { - used = map->addr; - *((__virtio16 *)&used->ring[vq->num]) = - cpu_to_vhost16(vq, vq->avail_idx); - rcu_read_unlock(); - return 0; - } - - rcu_read_unlock(); - } -#endif - return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx), vhost_avail_event(vq)); } @@ -1222,27 +921,6 @@ static inline int vhost_put_used(struct vhost_virtqueue *vq, struct vring_used_elem *head, int idx, int count) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_used *used; - size_t size; - - if (!vq->iotlb) { - rcu_read_lock(); - - map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); - if (likely(map)) { - used = map->addr; - size = count * sizeof(*head); - memcpy(used->ring + idx, head, size); - rcu_read_unlock(); - return 0; - } - - rcu_read_unlock(); - } -#endif - return vhost_copy_to_user(vq, vq->used->ring + idx, head, count * sizeof(*head)); } @@ -1250,25 +928,6 @@ static inline int vhost_put_used(struct vhost_virtqueue *vq, static inline int vhost_put_used_flags(struct vhost_virtqueue *vq) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_used *used; - - if (!vq->iotlb) { - rcu_read_lock(); - - map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); - if (likely(map)) { - used = map->addr; - used->flags = cpu_to_vhost16(vq, vq->used_flags); - rcu_read_unlock(); - return 0; - } - - rcu_read_unlock(); - } -#endif - return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags), &vq->used->flags); } @@ -1276,25 +935,6 @@ static inline int vhost_put_used_flags(struct vhost_virtqueue *vq) static inline int vhost_put_used_idx(struct vhost_virtqueue *vq) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_used *used; - - if (!vq->iotlb) { - rcu_read_lock(); - - map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); - if (likely(map)) { - used = map->addr; - used->idx = cpu_to_vhost16(vq, vq->last_used_idx); - rcu_read_unlock(); - return 0; - } - - rcu_read_unlock(); - } -#endif - return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx), &vq->used->idx); } @@ -1340,50 +980,12 @@ static void vhost_dev_unlock_vqs(struct vhost_dev *d) static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq, __virtio16 *idx) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_avail *avail; - - if (!vq->iotlb) { - rcu_read_lock(); - - map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]); - if (likely(map)) { - avail = map->addr; - *idx = avail->idx; - rcu_read_unlock(); - return 0; - } - - rcu_read_unlock(); - } -#endif - return vhost_get_avail(vq, *idx, &vq->avail->idx); } static inline int vhost_get_avail_head(struct vhost_virtqueue *vq, __virtio16 *head, int idx) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_avail *avail; - - if (!vq->iotlb) { - rcu_read_lock(); - - map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]); - if (likely(map)) { - avail = map->addr; - *head = avail->ring[idx & (vq->num - 1)]; - rcu_read_unlock(); - return 0; - } - - rcu_read_unlock(); - } -#endif - return vhost_get_avail(vq, *head, &vq->avail->ring[idx & (vq->num - 1)]); } @@ -1391,98 +993,24 @@ static inline int vhost_get_avail_head(struct vhost_virtqueue *vq, static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq, __virtio16 *flags) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_avail *avail; - - if (!vq->iotlb) { - rcu_read_lock(); - - map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]); - if (likely(map)) { - avail = map->addr; - *flags = avail->flags; - rcu_read_unlock(); - return 0; - } - - rcu_read_unlock(); - } -#endif - return vhost_get_avail(vq, *flags, &vq->avail->flags); } static inline int vhost_get_used_event(struct vhost_virtqueue *vq, __virtio16 *event) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_avail *avail; - - if (!vq->iotlb) { - rcu_read_lock(); - map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]); - if (likely(map)) { - avail = map->addr; - *event = (__virtio16)avail->ring[vq->num]; - rcu_read_unlock(); - return 0; - } - rcu_read_unlock(); - } -#endif - return vhost_get_avail(vq, *event, vhost_used_event(vq)); } static inline int vhost_get_used_idx(struct vhost_virtqueue *vq, __virtio16 *idx) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_used *used; - - if (!vq->iotlb) { - rcu_read_lock(); - - map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); - if (likely(map)) { - used = map->addr; - *idx = used->idx; - rcu_read_unlock(); - return 0; - } - - rcu_read_unlock(); - } -#endif - return vhost_get_used(vq, *idx, &vq->used->idx); } static inline int vhost_get_desc(struct vhost_virtqueue *vq, struct vring_desc *desc, int idx) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_desc *d; - - if (!vq->iotlb) { - rcu_read_lock(); - - map = rcu_dereference(vq->maps[VHOST_ADDR_DESC]); - if (likely(map)) { - d = map->addr; - *desc = *(d + idx); - rcu_read_unlock(); - return 0; - } - - rcu_read_unlock(); - } -#endif - return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc)); } @@ -1823,32 +1351,12 @@ static bool iotlb_access_ok(struct vhost_virtqueue *vq, return true; } -#if VHOST_ARCH_CAN_ACCEL_UACCESS -static void vhost_vq_map_prefetch(struct vhost_virtqueue *vq) -{ - struct vhost_map __rcu *map; - int i; - - for (i = 0; i < VHOST_NUM_ADDRS; i++) { - rcu_read_lock(); - map = rcu_dereference(vq->maps[i]); - rcu_read_unlock(); - if (unlikely(!map)) - vhost_map_prefetch(vq, i); - } -} -#endif - int vq_meta_prefetch(struct vhost_virtqueue *vq) { unsigned int num = vq->num; - if (!vq->iotlb) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - vhost_vq_map_prefetch(vq); -#endif + if (!vq->iotlb) return 1; - } return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc, vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) && @@ -2059,16 +1567,6 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d, mutex_lock(&vq->mutex); -#if VHOST_ARCH_CAN_ACCEL_UACCESS - /* Unregister MMU notifer to allow invalidation callback - * can access vq->uaddrs[] without holding a lock. - */ - if (d->mm) - mmu_notifier_unregister(&d->mmu_notifier, d->mm); - - vhost_uninit_vq_maps(vq); -#endif - switch (ioctl) { case VHOST_SET_VRING_NUM: r = vhost_vring_set_num(d, vq, argp); @@ -2080,13 +1578,6 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d, BUG(); } -#if VHOST_ARCH_CAN_ACCEL_UACCESS - vhost_setup_vq_uaddr(vq); - - if (d->mm) - mmu_notifier_register(&d->mmu_notifier, d->mm); -#endif - mutex_unlock(&vq->mutex); return r; diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 42a8c2a13ab1..e9ed2722b633 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -12,9 +12,6 @@ #include #include #include -#include -#include -#include struct vhost_work; typedef void (*vhost_work_fn_t)(struct vhost_work *work); @@ -83,24 +80,6 @@ enum vhost_uaddr_type { VHOST_NUM_ADDRS = 3, }; -struct vhost_map { - int npages; - void *addr; - struct page **pages; -}; - -struct vhost_uaddr { - unsigned long uaddr; - size_t size; - bool write; -}; - -#if defined(CONFIG_MMU_NOTIFIER) && ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 0 -#define VHOST_ARCH_CAN_ACCEL_UACCESS 0 -#else -#define VHOST_ARCH_CAN_ACCEL_UACCESS 0 -#endif - /* The virtqueue structure describes a queue attached to a device. */ struct vhost_virtqueue { struct vhost_dev *dev; @@ -111,22 +90,7 @@ struct vhost_virtqueue { struct vring_desc __user *desc; struct vring_avail __user *avail; struct vring_used __user *used; - -#if VHOST_ARCH_CAN_ACCEL_UACCESS - /* Read by memory accessors, modified by meta data - * prefetching, MMU notifier and vring ioctl(). - * Synchonrized through mmu_lock (writers) and RCU (writers - * and readers). - */ - struct vhost_map __rcu *maps[VHOST_NUM_ADDRS]; - /* Read by MMU notifier, modified by vring ioctl(), - * synchronized through MMU notifier - * registering/unregistering. - */ - struct vhost_uaddr uaddrs[VHOST_NUM_ADDRS]; -#endif const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS]; - struct file *kick; struct eventfd_ctx *call_ctx; struct eventfd_ctx *error_ctx; @@ -181,8 +145,6 @@ struct vhost_virtqueue { bool user_be; #endif u32 busyloop_timeout; - spinlock_t mmu_lock; - int invalidate_count; }; struct vhost_msg_node { @@ -196,9 +158,6 @@ struct vhost_msg_node { struct vhost_dev { struct mm_struct *mm; -#ifdef CONFIG_MMU_NOTIFIER - struct mmu_notifier mmu_notifier; -#endif struct mutex mutex; struct vhost_virtqueue **vqs; int nvqs; From 02fa5d7b17a761f53ef1eedfc254e1f33bd226b0 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Tue, 20 Aug 2019 02:16:46 -0700 Subject: [PATCH 6/6] mm/balloon_compaction: suppress allocation warnings There is no reason to print warnings when balloon page allocation fails, as they are expected and can be handled gracefully. Since VMware balloon now uses balloon-compaction infrastructure, and suppressed these warnings before, it is also beneficial to suppress these warnings to keep the same behavior that the balloon had before. Cc: Jason Wang Signed-off-by: Nadav Amit Signed-off-by: Michael S. Tsirkin Reviewed-by: David Hildenbrand --- mm/balloon_compaction.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index 798275a51887..26de020aae7b 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -124,7 +124,8 @@ EXPORT_SYMBOL_GPL(balloon_page_list_dequeue); struct page *balloon_page_alloc(void) { struct page *page = alloc_page(balloon_mapping_gfp_mask() | - __GFP_NOMEMALLOC | __GFP_NORETRY); + __GFP_NOMEMALLOC | __GFP_NORETRY | + __GFP_NOWARN); return page; } EXPORT_SYMBOL_GPL(balloon_page_alloc);