diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1b404e4d7dd8..b967c1c774a1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1782,6 +1782,7 @@ EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr); bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu) { + xfer_to_guest_mode_prepare(); return vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending(); } diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h index 9b93f8584ff7..8b2b1d68b954 100644 --- a/include/linux/entry-kvm.h +++ b/include/linux/entry-kvm.h @@ -46,6 +46,20 @@ static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu, */ int xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu); +/** + * xfer_to_guest_mode_prepare - Perform last minute preparation work that + * need to be handled while IRQs are disabled + * upon entering to guest. + * + * Has to be invoked with interrupts disabled before the last call + * to xfer_to_guest_mode_work_pending(). + */ +static inline void xfer_to_guest_mode_prepare(void) +{ + lockdep_assert_irqs_disabled(); + rcu_nocb_flush_deferred_wakeup(); +} + /** * __xfer_to_guest_mode_work_pending - Check if work is pending * diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 2ebc211fffcb..ce17b8477442 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -678,9 +678,10 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter); #ifdef CONFIG_NO_HZ_FULL +#if !defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK) /* * An empty function that will trigger a reschedule on - * IRQ tail once IRQs get re-enabled on userspace resume. + * IRQ tail once IRQs get re-enabled on userspace/guest resume. */ static void late_wakeup_func(struct irq_work *work) { @@ -689,6 +690,37 @@ static void late_wakeup_func(struct irq_work *work) static DEFINE_PER_CPU(struct irq_work, late_wakeup_work) = IRQ_WORK_INIT(late_wakeup_func); +/* + * If either: + * + * 1) the task is about to enter in guest mode and $ARCH doesn't support KVM generic work + * 2) the task is about to enter in user mode and $ARCH doesn't support generic entry. + * + * In these cases the late RCU wake ups aren't supported in the resched loops and our + * last resort is to fire a local irq_work that will trigger a reschedule once IRQs + * get re-enabled again. + */ +noinstr static void rcu_irq_work_resched(void) +{ + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); + + if (IS_ENABLED(CONFIG_GENERIC_ENTRY) && !(current->flags & PF_VCPU)) + return; + + if (IS_ENABLED(CONFIG_KVM_XFER_TO_GUEST_WORK) && (current->flags & PF_VCPU)) + return; + + instrumentation_begin(); + if (do_nocb_deferred_wakeup(rdp) && need_resched()) { + irq_work_queue(this_cpu_ptr(&late_wakeup_work)); + } + instrumentation_end(); +} + +#else +static inline void rcu_irq_work_resched(void) { } +#endif + /** * rcu_user_enter - inform RCU that we are resuming userspace. * @@ -702,8 +734,6 @@ static DEFINE_PER_CPU(struct irq_work, late_wakeup_work) = */ noinstr void rcu_user_enter(void) { - struct rcu_data *rdp = this_cpu_ptr(&rcu_data); - lockdep_assert_irqs_disabled(); /* @@ -711,13 +741,7 @@ noinstr void rcu_user_enter(void) * rescheduling opportunity in the entry code. Trigger a self IPI * that will fire and reschedule once we resume in user/guest mode. */ - instrumentation_begin(); - if (!IS_ENABLED(CONFIG_GENERIC_ENTRY) || (current->flags & PF_VCPU)) { - if (do_nocb_deferred_wakeup(rdp) && need_resched()) - irq_work_queue(this_cpu_ptr(&late_wakeup_work)); - } - instrumentation_end(); - + rcu_irq_work_resched(); rcu_eqs_enter(true); } diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 384856e4d13e..cdc1b7651c03 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -2197,6 +2197,7 @@ void rcu_nocb_flush_deferred_wakeup(void) { do_nocb_deferred_wakeup(this_cpu_ptr(&rcu_data)); } +EXPORT_SYMBOL_GPL(rcu_nocb_flush_deferred_wakeup); void __init rcu_init_nohz(void) {