From e8dbf19508a112d125190df77ee0464b7ba56192 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Mon, 26 Oct 2020 17:53:25 +0000
Subject: [PATCH] kvm/eventfd: Use priority waitqueue to catch events before
 userspace

As far as I can tell, when we use posted interrupts we silently cut off
the events from userspace, if it's listening on the same eventfd that
feeds the irqfd.

I like that behaviour. Let's do it all the time, even without posted
interrupts. It makes it much easier to handle IRQ remapping invalidation
without having to constantly add/remove the fd from the userspace poll
set. We can just leave userspace polling on it, and the bypass will...
well... bypass it.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Message-Id: <20201026175325.585623-2-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/eventfd.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index c2323c27a28b..efa8a5ae7a95 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -191,6 +191,7 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 	struct kvm *kvm = irqfd->kvm;
 	unsigned seq;
 	int idx;
+	int ret = 0;
 
 	if (flags & EPOLLIN) {
 		idx = srcu_read_lock(&kvm->irq_srcu);
@@ -204,6 +205,7 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 					      false) == -EWOULDBLOCK)
 			schedule_work(&irqfd->inject);
 		srcu_read_unlock(&kvm->irq_srcu, idx);
+		ret = 1;
 	}
 
 	if (flags & EPOLLHUP) {
@@ -227,7 +229,7 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 		spin_unlock_irqrestore(&kvm->irqfds.lock, iflags);
 	}
 
-	return 0;
+	return ret;
 }
 
 static void
@@ -236,7 +238,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
 {
 	struct kvm_kernel_irqfd *irqfd =
 		container_of(pt, struct kvm_kernel_irqfd, pt);
-	add_wait_queue(wqh, &irqfd->wait);
+	add_wait_queue_priority(wqh, &irqfd->wait);
 }
 
 /* Must be called under irqfds.lock */