mirror of
https://github.com/torvalds/linux.git
synced 2024-12-02 00:51:44 +00:00
KVM: x86/xen: Use fast path for Xen timer delivery
Most of the time there's no need to kick the vCPU and deliver the timer event through kvm_xen_inject_timer_irqs(). Use kvm_xen_set_evtchn_fast() directly from the timer callback, and only fall back to the slow path if delivering the timer would block, i.e. if kvm_xen_set_evtchn_fast() returns -EWOULDBLOCK. If delivery fails for any other reason, do nothing and just let it fail silently, as that is what the slow path would end up doing anyways. This gives a significant improvement in timer latency testing (using nanosleep() for various periods and then measuring the actual time elapsed). However, there was a reason[1] the fast path was dropped when this support was first added. The current code holds vcpu->mutex for all operations on the kvm->arch.timer_expires field, and the fast path introduces a potential race condition. Avoid that race by ensuring the hrtimer is (temporarily) cancelled before making changes in kvm_xen_start_timer(), and also when reading the values out for KVM_XEN_VCPU_ATTR_TYPE_TIMER. [1] https://lore.kernel.org/kvm/846caa99-2e42-4443-1070-84e49d2f11d2@redhat.com Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Paul Durrant <paul@xen.org> Link: https://lore.kernel.org/r/f21ee3bd852761e7808240d4ecaec3013c649dc7.camel@infradead.org [sean: massage changelog] Signed-off-by: Sean Christopherson <seanjc@google.com>
This commit is contained in:
parent
ee11ab6bb0
commit
77c9b9dea4
@ -134,9 +134,23 @@ static enum hrtimer_restart xen_timer_callback(struct hrtimer *timer)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = container_of(timer, struct kvm_vcpu,
|
||||
arch.xen.timer);
|
||||
struct kvm_xen_evtchn e;
|
||||
int rc;
|
||||
|
||||
if (atomic_read(&vcpu->arch.xen.timer_pending))
|
||||
return HRTIMER_NORESTART;
|
||||
|
||||
e.vcpu_id = vcpu->vcpu_id;
|
||||
e.vcpu_idx = vcpu->vcpu_idx;
|
||||
e.port = vcpu->arch.xen.timer_virq;
|
||||
e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
|
||||
|
||||
rc = kvm_xen_set_evtchn_fast(&e, vcpu->kvm);
|
||||
if (rc != -EWOULDBLOCK) {
|
||||
vcpu->arch.xen.timer_expires = 0;
|
||||
return HRTIMER_NORESTART;
|
||||
}
|
||||
|
||||
atomic_inc(&vcpu->arch.xen.timer_pending);
|
||||
kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
|
||||
kvm_vcpu_kick(vcpu);
|
||||
@ -146,6 +160,14 @@ static enum hrtimer_restart xen_timer_callback(struct hrtimer *timer)
|
||||
|
||||
static void kvm_xen_start_timer(struct kvm_vcpu *vcpu, u64 guest_abs, s64 delta_ns)
|
||||
{
|
||||
/*
|
||||
* Avoid races with the old timer firing. Checking timer_expires
|
||||
* to avoid calling hrtimer_cancel() will only have false positives
|
||||
* so is fine.
|
||||
*/
|
||||
if (vcpu->arch.xen.timer_expires)
|
||||
hrtimer_cancel(&vcpu->arch.xen.timer);
|
||||
|
||||
atomic_set(&vcpu->arch.xen.timer_pending, 0);
|
||||
vcpu->arch.xen.timer_expires = guest_abs;
|
||||
|
||||
@ -1019,9 +1041,36 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
|
||||
break;
|
||||
|
||||
case KVM_XEN_VCPU_ATTR_TYPE_TIMER:
|
||||
/*
|
||||
* Ensure a consistent snapshot of state is captured, with a
|
||||
* timer either being pending, or the event channel delivered
|
||||
* to the corresponding bit in the shared_info. Not still
|
||||
* lurking in the timer_pending flag for deferred delivery.
|
||||
* Purely as an optimisation, if the timer_expires field is
|
||||
* zero, that means the timer isn't active (or even in the
|
||||
* timer_pending flag) and there is no need to cancel it.
|
||||
*/
|
||||
if (vcpu->arch.xen.timer_expires) {
|
||||
hrtimer_cancel(&vcpu->arch.xen.timer);
|
||||
kvm_xen_inject_timer_irqs(vcpu);
|
||||
}
|
||||
|
||||
data->u.timer.port = vcpu->arch.xen.timer_virq;
|
||||
data->u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
|
||||
data->u.timer.expires_ns = vcpu->arch.xen.timer_expires;
|
||||
|
||||
/*
|
||||
* The hrtimer may trigger and raise the IRQ immediately,
|
||||
* while the returned state causes it to be set up and
|
||||
* raised again on the destination system after migration.
|
||||
* That's fine, as the guest won't even have had a chance
|
||||
* to run and handle the interrupt. Asserting an already
|
||||
* pending event channel is idempotent.
|
||||
*/
|
||||
if (vcpu->arch.xen.timer_expires)
|
||||
hrtimer_start_expires(&vcpu->arch.xen.timer,
|
||||
HRTIMER_MODE_ABS_HARD);
|
||||
|
||||
r = 0;
|
||||
break;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user