kvm, rt: change async pagefault code locking for PREEMPT_RT

The async pagefault wake code can run from the idle task in exception
context, so everything here needs to be made non-preemptible.

Conversion to a simple wait queue and raw spinlock does the trick.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Rik van Riel 2016-03-21 15:13:27 +01:00 committed by Paolo Bonzini
parent 489153c746
commit 9db284f303

View File

@ -36,6 +36,7 @@
#include <linux/kprobes.h> #include <linux/kprobes.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <linux/nmi.h> #include <linux/nmi.h>
#include <linux/swait.h>
#include <asm/timer.h> #include <asm/timer.h>
#include <asm/cpu.h> #include <asm/cpu.h>
#include <asm/traps.h> #include <asm/traps.h>
@ -91,14 +92,14 @@ static void kvm_io_delay(void)
struct kvm_task_sleep_node { struct kvm_task_sleep_node {
struct hlist_node link; struct hlist_node link;
wait_queue_head_t wq; struct swait_queue_head wq;
u32 token; u32 token;
int cpu; int cpu;
bool halted; bool halted;
}; };
static struct kvm_task_sleep_head { static struct kvm_task_sleep_head {
spinlock_t lock; raw_spinlock_t lock;
struct hlist_head list; struct hlist_head list;
} async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE]; } async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE];
@ -122,17 +123,17 @@ void kvm_async_pf_task_wait(u32 token)
u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
struct kvm_task_sleep_node n, *e; struct kvm_task_sleep_node n, *e;
DEFINE_WAIT(wait); DECLARE_SWAITQUEUE(wait);
rcu_irq_enter(); rcu_irq_enter();
spin_lock(&b->lock); raw_spin_lock(&b->lock);
e = _find_apf_task(b, token); e = _find_apf_task(b, token);
if (e) { if (e) {
/* dummy entry exist -> wake up was delivered ahead of PF */ /* dummy entry exist -> wake up was delivered ahead of PF */
hlist_del(&e->link); hlist_del(&e->link);
kfree(e); kfree(e);
spin_unlock(&b->lock); raw_spin_unlock(&b->lock);
rcu_irq_exit(); rcu_irq_exit();
return; return;
@ -141,13 +142,13 @@ void kvm_async_pf_task_wait(u32 token)
n.token = token; n.token = token;
n.cpu = smp_processor_id(); n.cpu = smp_processor_id();
n.halted = is_idle_task(current) || preempt_count() > 1; n.halted = is_idle_task(current) || preempt_count() > 1;
init_waitqueue_head(&n.wq); init_swait_queue_head(&n.wq);
hlist_add_head(&n.link, &b->list); hlist_add_head(&n.link, &b->list);
spin_unlock(&b->lock); raw_spin_unlock(&b->lock);
for (;;) { for (;;) {
if (!n.halted) if (!n.halted)
prepare_to_wait(&n.wq, &wait, TASK_UNINTERRUPTIBLE); prepare_to_swait(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
if (hlist_unhashed(&n.link)) if (hlist_unhashed(&n.link))
break; break;
@ -166,7 +167,7 @@ void kvm_async_pf_task_wait(u32 token)
} }
} }
if (!n.halted) if (!n.halted)
finish_wait(&n.wq, &wait); finish_swait(&n.wq, &wait);
rcu_irq_exit(); rcu_irq_exit();
return; return;
@ -178,8 +179,8 @@ static void apf_task_wake_one(struct kvm_task_sleep_node *n)
hlist_del_init(&n->link); hlist_del_init(&n->link);
if (n->halted) if (n->halted)
smp_send_reschedule(n->cpu); smp_send_reschedule(n->cpu);
else if (waitqueue_active(&n->wq)) else if (swait_active(&n->wq))
wake_up(&n->wq); swake_up(&n->wq);
} }
static void apf_task_wake_all(void) static void apf_task_wake_all(void)
@ -189,14 +190,14 @@ static void apf_task_wake_all(void)
for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) { for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) {
struct hlist_node *p, *next; struct hlist_node *p, *next;
struct kvm_task_sleep_head *b = &async_pf_sleepers[i]; struct kvm_task_sleep_head *b = &async_pf_sleepers[i];
spin_lock(&b->lock); raw_spin_lock(&b->lock);
hlist_for_each_safe(p, next, &b->list) { hlist_for_each_safe(p, next, &b->list) {
struct kvm_task_sleep_node *n = struct kvm_task_sleep_node *n =
hlist_entry(p, typeof(*n), link); hlist_entry(p, typeof(*n), link);
if (n->cpu == smp_processor_id()) if (n->cpu == smp_processor_id())
apf_task_wake_one(n); apf_task_wake_one(n);
} }
spin_unlock(&b->lock); raw_spin_unlock(&b->lock);
} }
} }
@ -212,7 +213,7 @@ void kvm_async_pf_task_wake(u32 token)
} }
again: again:
spin_lock(&b->lock); raw_spin_lock(&b->lock);
n = _find_apf_task(b, token); n = _find_apf_task(b, token);
if (!n) { if (!n) {
/* /*
@ -225,17 +226,17 @@ again:
* Allocation failed! Busy wait while other cpu * Allocation failed! Busy wait while other cpu
* handles async PF. * handles async PF.
*/ */
spin_unlock(&b->lock); raw_spin_unlock(&b->lock);
cpu_relax(); cpu_relax();
goto again; goto again;
} }
n->token = token; n->token = token;
n->cpu = smp_processor_id(); n->cpu = smp_processor_id();
init_waitqueue_head(&n->wq); init_swait_queue_head(&n->wq);
hlist_add_head(&n->link, &b->list); hlist_add_head(&n->link, &b->list);
} else } else
apf_task_wake_one(n); apf_task_wake_one(n);
spin_unlock(&b->lock); raw_spin_unlock(&b->lock);
return; return;
} }
EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake); EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
@ -486,7 +487,7 @@ void __init kvm_guest_init(void)
paravirt_ops_setup(); paravirt_ops_setup();
register_reboot_notifier(&kvm_pv_reboot_nb); register_reboot_notifier(&kvm_pv_reboot_nb);
for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
spin_lock_init(&async_pf_sleepers[i].lock); raw_spin_lock_init(&async_pf_sleepers[i].lock);
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
x86_init.irqs.trap_init = kvm_apf_trap_init; x86_init.irqs.trap_init = kvm_apf_trap_init;