Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Paolo Bonzini:
 "First batch of KVM changes for 4.4.

  s390:
     A bunch of fixes and optimizations for interrupt and time handling.

  PPC:
     Mostly bug fixes.

  ARM:
     No big features, but many small fixes and prerequisites including:

      - a number of fixes for the arch-timer

      - introducing proper level-triggered semantics for the arch-timers

      - a series of patches to synchronously halt a guest (prerequisite
        for IRQ forwarding)

      - some tracepoint improvements

      - a tweak for the EL2 panic handlers

      - some more VGIC cleanups getting rid of redundant state

  x86:
     Quite a few changes:

      - support for VT-d posted interrupts (i.e. PCI devices can inject
        interrupts directly into vCPUs).  This introduces a new
        component (in virt/lib/) that connects VFIO and KVM together.
        The same infrastructure will be used for ARM interrupt
        forwarding as well.

      - more Hyper-V features, though the main one Hyper-V synthetic
        interrupt controller will have to wait for 4.5.  These will let
        KVM expose Hyper-V devices.

      - nested virtualization now supports VPID (same as PCID but for
        vCPUs) which makes it quite a bit faster

      - for future hardware that supports NVDIMM, there is support for
        clflushopt, clwb, pcommit

      - support for "split irqchip", i.e.  LAPIC in kernel +
        IOAPIC/PIC/PIT in userspace, which reduces the attack surface of
        the hypervisor

      - obligatory smattering of SMM fixes

      - on the guest side, stable scheduler clock support was rewritten
        to not require help from the hypervisor"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (123 commits)
  KVM: VMX: Fix commit which broke PML
  KVM: x86: obey KVM_X86_QUIRK_CD_NW_CLEARED in kvm_set_cr0()
  KVM: x86: allow RSM from 64-bit mode
  KVM: VMX: fix SMEP and SMAP without EPT
  KVM: x86: move kvm_set_irq_inatomic to legacy device assignment
  KVM: device assignment: remove pointless #ifdefs
  KVM: x86: merge kvm_arch_set_irq with kvm_set_msi_inatomic
  KVM: x86: zero apic_arb_prio on reset
  drivers/hv: share Hyper-V SynIC constants with userspace
  KVM: x86: handle SMBASE as physical address in RSM
  KVM: x86: add read_phys to x86_emulate_ops
  KVM: x86: removing unused variable
  KVM: don't pointlessly leave KVM_COMPAT=y in non-KVM configs
  KVM: arm/arm64: Merge vgic_set_lr() and vgic_sync_lr_elrsr()
  KVM: arm/arm64: Clean up vgic_retire_lr() and surroundings
  KVM: arm/arm64: Optimize away redundant LR tracking
  KVM: s390: use simple switch statement as multiplexer
  KVM: s390: drop useless newline in debugging data
  KVM: s390: SCA must not cross page boundaries
  KVM: arm: Do not indent the arguments of DECLARE_BITMAP
  ...
This commit is contained in:
Linus Torvalds
2015-11-05 16:26:26 -08:00
89 changed files with 2956 additions and 1029 deletions

View File

@@ -51,7 +51,7 @@ struct arch_timer_cpu {
bool armed;
/* Timer IRQ */
const struct kvm_irq_level *irq;
struct kvm_irq_level irq;
/* VGIC mapping */
struct irq_phys_map *map;
@@ -71,5 +71,7 @@ u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
bool kvm_timer_should_fire(struct kvm_vcpu *vcpu);
void kvm_timer_schedule(struct kvm_vcpu *vcpu);
void kvm_timer_unschedule(struct kvm_vcpu *vcpu);
#endif

View File

@@ -112,7 +112,6 @@ struct vgic_vmcr {
struct vgic_ops {
struct vgic_lr (*get_lr)(const struct kvm_vcpu *, int);
void (*set_lr)(struct kvm_vcpu *, int, struct vgic_lr);
void (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr);
u64 (*get_elrsr)(const struct kvm_vcpu *vcpu);
u64 (*get_eisr)(const struct kvm_vcpu *vcpu);
void (*clear_eisr)(struct kvm_vcpu *vcpu);
@@ -159,7 +158,6 @@ struct irq_phys_map {
u32 virt_irq;
u32 phys_irq;
u32 irq;
bool active;
};
struct irq_phys_map_entry {
@@ -296,22 +294,16 @@ struct vgic_v3_cpu_if {
};
struct vgic_cpu {
/* per IRQ to LR mapping */
u8 *vgic_irq_lr_map;
/* Pending/active/both interrupts on this VCPU */
DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS);
DECLARE_BITMAP( active_percpu, VGIC_NR_PRIVATE_IRQS);
DECLARE_BITMAP( pend_act_percpu, VGIC_NR_PRIVATE_IRQS);
DECLARE_BITMAP(pending_percpu, VGIC_NR_PRIVATE_IRQS);
DECLARE_BITMAP(active_percpu, VGIC_NR_PRIVATE_IRQS);
DECLARE_BITMAP(pend_act_percpu, VGIC_NR_PRIVATE_IRQS);
/* Pending/active/both shared interrupts, dynamically sized */
unsigned long *pending_shared;
unsigned long *active_shared;
unsigned long *pend_act_shared;
/* Bitmap of used/free list registers */
DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS);
/* Number of list registers on this CPU */
int nr_lr;
@@ -354,8 +346,6 @@ int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
int virt_irq, int irq);
int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map);
void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active);
#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
#define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus))

View File

@@ -26,6 +26,7 @@
#define _HYPERV_H
#include <uapi/linux/hyperv.h>
#include <uapi/asm/hyperv.h>
#include <linux/types.h>
#include <linux/scatterlist.h>

90
include/linux/irqbypass.h Normal file
View File

@@ -0,0 +1,90 @@
/*
* IRQ offload/bypass manager
*
* Copyright (C) 2015 Red Hat, Inc.
* Copyright (c) 2015 Linaro Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#ifndef IRQBYPASS_H
#define IRQBYPASS_H
#include <linux/list.h>
struct irq_bypass_consumer;
/*
* Theory of operation
*
* The IRQ bypass manager is a simple set of lists and callbacks that allows
* IRQ producers (ex. physical interrupt sources) to be matched to IRQ
* consumers (ex. virtualization hardware that allows IRQ bypass or offload)
* via a shared token (ex. eventfd_ctx). Producers and consumers register
* independently. When a token match is found, the optional @stop callback
* will be called for each participant. The pair will then be connected via
* the @add_* callbacks, and finally the optional @start callback will allow
* any final coordination. When either participant is unregistered, the
* process is repeated using the @del_* callbacks in place of the @add_*
* callbacks. Match tokens must be unique per producer/consumer, 1:N pairings
* are not supported.
*/
/**
* struct irq_bypass_producer - IRQ bypass producer definition
* @node: IRQ bypass manager private list management
* @token: opaque token to match between producer and consumer
* @irq: Linux IRQ number for the producer device
* @add_consumer: Connect the IRQ producer to an IRQ consumer (optional)
* @del_consumer: Disconnect the IRQ producer from an IRQ consumer (optional)
* @stop: Perform any quiesce operations necessary prior to add/del (optional)
* @start: Perform any startup operations necessary after add/del (optional)
*
* The IRQ bypass producer structure represents an interrupt source for
* participation in possible host bypass, for instance an interrupt vector
* for a physical device assigned to a VM.
*/
struct irq_bypass_producer {
struct list_head node;
void *token;
int irq;
int (*add_consumer)(struct irq_bypass_producer *,
struct irq_bypass_consumer *);
void (*del_consumer)(struct irq_bypass_producer *,
struct irq_bypass_consumer *);
void (*stop)(struct irq_bypass_producer *);
void (*start)(struct irq_bypass_producer *);
};
/**
* struct irq_bypass_consumer - IRQ bypass consumer definition
* @node: IRQ bypass manager private list management
* @token: opaque token to match between producer and consumer
* @add_producer: Connect the IRQ consumer to an IRQ producer
* @del_producer: Disconnect the IRQ consumer from an IRQ producer
* @stop: Perform any quiesce operations necessary prior to add/del (optional)
* @start: Perform any startup operations necessary after add/del (optional)
*
* The IRQ bypass consumer structure represents an interrupt sink for
* participation in possible host bypass, for instance a hypervisor may
* support offloads to allow bypassing the host entirely or offload
* portions of the interrupt handling to the VM.
*/
struct irq_bypass_consumer {
struct list_head node;
void *token;
int (*add_producer)(struct irq_bypass_consumer *,
struct irq_bypass_producer *);
void (*del_producer)(struct irq_bypass_consumer *,
struct irq_bypass_producer *);
void (*stop)(struct irq_bypass_consumer *);
void (*start)(struct irq_bypass_consumer *);
};
int irq_bypass_register_producer(struct irq_bypass_producer *);
void irq_bypass_unregister_producer(struct irq_bypass_producer *);
int irq_bypass_register_consumer(struct irq_bypass_consumer *);
void irq_bypass_unregister_consumer(struct irq_bypass_consumer *);
#endif /* IRQBYPASS_H */

View File

@@ -24,6 +24,7 @@
#include <linux/err.h>
#include <linux/irqflags.h>
#include <linux/context_tracking.h>
#include <linux/irqbypass.h>
#include <asm/signal.h>
#include <linux/kvm.h>
@@ -140,6 +141,8 @@ static inline bool is_error_page(struct page *page)
#define KVM_REQ_APIC_PAGE_RELOAD 25
#define KVM_REQ_SMI 26
#define KVM_REQ_HV_CRASH 27
#define KVM_REQ_IOAPIC_EOI_EXIT 28
#define KVM_REQ_HV_RESET 29
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
@@ -231,6 +234,9 @@ struct kvm_vcpu {
unsigned long requests;
unsigned long guest_debug;
int pre_pcpu;
struct list_head blocked_vcpu_list;
struct mutex mutex;
struct kvm_run *run;
@@ -329,6 +335,18 @@ struct kvm_kernel_irq_routing_entry {
struct hlist_node link;
};
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
struct kvm_irq_routing_table {
int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
u32 nr_rt_entries;
/*
* Array indexed by gsi. Each entry contains list of irq chips
* the gsi is connected to.
*/
struct hlist_head map[0];
};
#endif
#ifndef KVM_PRIVATE_MEM_SLOTS
#define KVM_PRIVATE_MEM_SLOTS 0
#endif
@@ -455,10 +473,14 @@ void vcpu_put(struct kvm_vcpu *vcpu);
#ifdef __KVM_HAVE_IOAPIC
void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
void kvm_arch_irq_routing_update(struct kvm *kvm);
#else
static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
{
}
static inline void kvm_arch_irq_routing_update(struct kvm *kvm)
{
}
#endif
#ifdef CONFIG_HAVE_KVM_IRQFD
@@ -625,6 +647,8 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
void kvm_vcpu_block(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu);
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
int kvm_vcpu_yield_to(struct kvm_vcpu *target);
void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
@@ -803,10 +827,13 @@ int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin);
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
bool line_status);
int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
int irq_source_id, int level, bool line_status);
int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id,
int level, bool line_status);
bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin);
void kvm_notify_acked_gsi(struct kvm *kvm, int gsi);
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
void kvm_register_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian);
@@ -1002,6 +1029,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
#endif
int kvm_setup_default_irq_routing(struct kvm *kvm);
int kvm_setup_empty_irq_routing(struct kvm *kvm);
int kvm_set_irq_routing(struct kvm *kvm,
const struct kvm_irq_routing_entry *entries,
unsigned nr,
@@ -1144,5 +1172,15 @@ static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
{
}
#endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
#endif
#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *,
struct irq_bypass_producer *);
void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *,
struct irq_bypass_producer *);
void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *);
void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *);
int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
uint32_t guest_irq, bool set);
#endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */
#endif

71
include/linux/kvm_irqfd.h Normal file
View File

@@ -0,0 +1,71 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* irqfd: Allows an fd to be used to inject an interrupt to the guest
* Credit goes to Avi Kivity for the original idea.
*/
#ifndef __LINUX_KVM_IRQFD_H
#define __LINUX_KVM_IRQFD_H
#include <linux/kvm_host.h>
#include <linux/poll.h>
/*
* Resampling irqfds are a special variety of irqfds used to emulate
* level triggered interrupts. The interrupt is asserted on eventfd
* trigger. On acknowledgment through the irq ack notifier, the
* interrupt is de-asserted and userspace is notified through the
* resamplefd. All resamplers on the same gsi are de-asserted
* together, so we don't need to track the state of each individual
* user. We can also therefore share the same irq source ID.
*/
struct kvm_kernel_irqfd_resampler {
struct kvm *kvm;
/*
* List of resampling struct _irqfd objects sharing this gsi.
* RCU list modified under kvm->irqfds.resampler_lock
*/
struct list_head list;
struct kvm_irq_ack_notifier notifier;
/*
* Entry in list of kvm->irqfd.resampler_list. Use for sharing
* resamplers among irqfds on the same gsi.
* Accessed and modified under kvm->irqfds.resampler_lock
*/
struct list_head link;
};
struct kvm_kernel_irqfd {
/* Used for MSI fast-path */
struct kvm *kvm;
wait_queue_t wait;
/* Update side is protected by irqfds.lock */
struct kvm_kernel_irq_routing_entry irq_entry;
seqcount_t irq_entry_sc;
/* Used for level IRQ fast-path */
int gsi;
struct work_struct inject;
/* The resampler used by this irqfd (resampler-only) */
struct kvm_kernel_irqfd_resampler *resampler;
/* Eventfd notified on resample (resampler-only) */
struct eventfd_ctx *resamplefd;
/* Entry in list of irqfds for a resampler (resampler-only) */
struct list_head resampler_link;
/* Used for setup/shutdown */
struct eventfd_ctx *eventfd;
struct list_head list;
poll_table pt;
struct work_struct shutdown;
struct irq_bypass_consumer consumer;
struct irq_bypass_producer *producer;
};
#endif /* __LINUX_KVM_IRQFD_H */

View File

@@ -183,6 +183,7 @@ struct kvm_s390_skeys {
#define KVM_EXIT_EPR 23
#define KVM_EXIT_SYSTEM_EVENT 24
#define KVM_EXIT_S390_STSI 25
#define KVM_EXIT_IOAPIC_EOI 26
/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@@ -333,6 +334,10 @@ struct kvm_run {
__u8 sel1;
__u16 sel2;
} s390_stsi;
/* KVM_EXIT_IOAPIC_EOI */
struct {
__u8 vector;
} eoi;
/* Fix the size of the union. */
char padding[256];
};
@@ -824,6 +829,8 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_MULTI_ADDRESS_SPACE 118
#define KVM_CAP_GUEST_DEBUG_HW_BPS 119
#define KVM_CAP_GUEST_DEBUG_HW_WPS 120
#define KVM_CAP_SPLIT_IRQCHIP 121
#define KVM_CAP_IOEVENTFD_ANY_LENGTH 122
#ifdef KVM_CAP_IRQ_ROUTING