Merge branch 'kvm-updates/2.6.31' of git://git.kernel.org/pub/scm/virt/kvm/kvm

* 'kvm-updates/2.6.31' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (138 commits)
  KVM: Prevent overflow in largepages calculation
  KVM: Disable large pages on misaligned memory slots
  KVM: Add VT-x machine check support
  KVM: VMX: Rename rmode.active to rmode.vm86_active
  KVM: Move "exit due to NMI" handling into vmx_complete_interrupts()
  KVM: Disable CR8 intercept if tpr patching is active
  KVM: Do not migrate pending software interrupts.
  KVM: inject NMI after IRET from a previous NMI, not before.
  KVM: Always request IRQ/NMI window if an interrupt is pending
  KVM: Do not re-execute INTn instruction.
  KVM: skip_emulated_instruction() decode instruction if size is not known
  KVM: Remove irq_pending bitmap
  KVM: Do not allow interrupt injection from userspace if there is a pending event.
  KVM: Unprotect a page if #PF happens during NMI injection.
  KVM: s390: Verify memory in kvm run
  KVM: s390: Sanity check on validity intercept
  KVM: s390: Unlink vcpu on destroy - v2
  KVM: s390: optimize float int lock: spin_lock_bh --> spin_lock
  KVM: s390: use hrtimer for clock wakeup from idle - v2
  KVM: s390: Fix memory slot versus run - v3
  ...
This commit is contained in:
Linus Torvalds 2009-06-11 10:03:30 -07:00
commit 6cd8e300b4
55 changed files with 2489 additions and 1662 deletions

View File

@ -371,6 +371,7 @@ struct kvm_vcpu_arch {
int last_run_cpu; int last_run_cpu;
int vmm_tr_slot; int vmm_tr_slot;
int vm_tr_slot; int vm_tr_slot;
int sn_rtc_tr_slot;
#define KVM_MP_STATE_RUNNABLE 0 #define KVM_MP_STATE_RUNNABLE 0
#define KVM_MP_STATE_UNINITIALIZED 1 #define KVM_MP_STATE_UNINITIALIZED 1
@ -465,6 +466,7 @@ struct kvm_arch {
unsigned long vmm_init_rr; unsigned long vmm_init_rr;
int online_vcpus; int online_vcpus;
int is_sn2;
struct kvm_ioapic *vioapic; struct kvm_ioapic *vioapic;
struct kvm_vm_stat stat; struct kvm_vm_stat stat;
@ -472,6 +474,7 @@ struct kvm_arch {
struct list_head assigned_dev_head; struct list_head assigned_dev_head;
struct iommu_domain *iommu_domain; struct iommu_domain *iommu_domain;
int iommu_flags;
struct hlist_head irq_ack_notifier_list; struct hlist_head irq_ack_notifier_list;
unsigned long irq_sources_bitmap; unsigned long irq_sources_bitmap;
@ -578,6 +581,8 @@ struct kvm_vmm_info{
kvm_vmm_entry *vmm_entry; kvm_vmm_entry *vmm_entry;
kvm_tramp_entry *tramp_entry; kvm_tramp_entry *tramp_entry;
unsigned long vmm_ivt; unsigned long vmm_ivt;
unsigned long patch_mov_ar;
unsigned long patch_mov_ar_sn2;
}; };
int kvm_highest_pending_irq(struct kvm_vcpu *vcpu); int kvm_highest_pending_irq(struct kvm_vcpu *vcpu);
@ -585,7 +590,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu);
int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run); int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
void kvm_sal_emul(struct kvm_vcpu *vcpu); void kvm_sal_emul(struct kvm_vcpu *vcpu);
static inline void kvm_inject_nmi(struct kvm_vcpu *vcpu) {}
#endif /* __ASSEMBLY__*/ #endif /* __ASSEMBLY__*/
#endif #endif

View File

@ -146,6 +146,8 @@
#define PAGE_GATE __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX) #define PAGE_GATE __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX)
#define PAGE_KERNEL __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX) #define PAGE_KERNEL __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX)
#define PAGE_KERNELRX __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX) #define PAGE_KERNELRX __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX)
#define PAGE_KERNEL_UC __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX | \
_PAGE_MA_UC)
# ifndef __ASSEMBLY__ # ifndef __ASSEMBLY__

View File

@ -610,6 +610,9 @@ static struct irqaction ipi_irqaction = {
.name = "IPI" .name = "IPI"
}; };
/*
* KVM uses this interrupt to force a cpu out of guest mode
*/
static struct irqaction resched_irqaction = { static struct irqaction resched_irqaction = {
.handler = dummy_handler, .handler = dummy_handler,
.flags = IRQF_DISABLED, .flags = IRQF_DISABLED,

View File

@ -23,7 +23,7 @@ if VIRTUALIZATION
config KVM config KVM
tristate "Kernel-based Virtual Machine (KVM) support" tristate "Kernel-based Virtual Machine (KVM) support"
depends on HAVE_KVM && EXPERIMENTAL depends on HAVE_KVM && MODULES && EXPERIMENTAL
# for device assignment: # for device assignment:
depends on PCI depends on PCI
select PREEMPT_NOTIFIERS select PREEMPT_NOTIFIERS

View File

@ -41,6 +41,9 @@
#include <asm/div64.h> #include <asm/div64.h>
#include <asm/tlb.h> #include <asm/tlb.h>
#include <asm/elf.h> #include <asm/elf.h>
#include <asm/sn/addrs.h>
#include <asm/sn/clksupport.h>
#include <asm/sn/shub_mmr.h>
#include "misc.h" #include "misc.h"
#include "vti.h" #include "vti.h"
@ -65,6 +68,16 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL } { NULL }
}; };
static unsigned long kvm_get_itc(struct kvm_vcpu *vcpu)
{
#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
if (vcpu->kvm->arch.is_sn2)
return rtc_time();
else
#endif
return ia64_getreg(_IA64_REG_AR_ITC);
}
static void kvm_flush_icache(unsigned long start, unsigned long len) static void kvm_flush_icache(unsigned long start, unsigned long len)
{ {
int l; int l;
@ -119,8 +132,7 @@ void kvm_arch_hardware_enable(void *garbage)
unsigned long saved_psr; unsigned long saved_psr;
int slot; int slot;
pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
PAGE_KERNEL));
local_irq_save(saved_psr); local_irq_save(saved_psr);
slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
local_irq_restore(saved_psr); local_irq_restore(saved_psr);
@ -283,6 +295,18 @@ static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
} }
static int __apic_accept_irq(struct kvm_vcpu *vcpu, uint64_t vector)
{
struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
if (!test_and_set_bit(vector, &vpd->irr[0])) {
vcpu->arch.irq_new_pending = 1;
kvm_vcpu_kick(vcpu);
return 1;
}
return 0;
}
/* /*
* offset: address offset to IPI space. * offset: address offset to IPI space.
* value: deliver value. * value: deliver value.
@ -292,20 +316,20 @@ static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm,
{ {
switch (dm) { switch (dm) {
case SAPIC_FIXED: case SAPIC_FIXED:
kvm_apic_set_irq(vcpu, vector, 0);
break; break;
case SAPIC_NMI: case SAPIC_NMI:
kvm_apic_set_irq(vcpu, 2, 0); vector = 2;
break; break;
case SAPIC_EXTINT: case SAPIC_EXTINT:
kvm_apic_set_irq(vcpu, 0, 0); vector = 0;
break; break;
case SAPIC_INIT: case SAPIC_INIT:
case SAPIC_PMI: case SAPIC_PMI:
default: default:
printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n"); printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n");
break; return;
} }
__apic_accept_irq(vcpu, vector);
} }
static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id, static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
@ -413,6 +437,23 @@ static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 1; return 1;
} }
static int kvm_sn2_setup_mappings(struct kvm_vcpu *vcpu)
{
unsigned long pte, rtc_phys_addr, map_addr;
int slot;
map_addr = KVM_VMM_BASE + (1UL << KVM_VMM_SHIFT);
rtc_phys_addr = LOCAL_MMR_OFFSET | SH_RTC;
pte = pte_val(mk_pte_phys(rtc_phys_addr, PAGE_KERNEL_UC));
slot = ia64_itr_entry(0x3, map_addr, pte, PAGE_SHIFT);
vcpu->arch.sn_rtc_tr_slot = slot;
if (slot < 0) {
printk(KERN_ERR "Mayday mayday! RTC mapping failed!\n");
slot = 0;
}
return slot;
}
int kvm_emulate_halt(struct kvm_vcpu *vcpu) int kvm_emulate_halt(struct kvm_vcpu *vcpu)
{ {
@ -426,7 +467,7 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
if (irqchip_in_kernel(vcpu->kvm)) { if (irqchip_in_kernel(vcpu->kvm)) {
vcpu_now_itc = ia64_getreg(_IA64_REG_AR_ITC) + vcpu->arch.itc_offset; vcpu_now_itc = kvm_get_itc(vcpu) + vcpu->arch.itc_offset;
if (time_after(vcpu_now_itc, vpd->itm)) { if (time_after(vcpu_now_itc, vpd->itm)) {
vcpu->arch.timer_check = 1; vcpu->arch.timer_check = 1;
@ -447,10 +488,10 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
hrtimer_cancel(p_ht); hrtimer_cancel(p_ht);
vcpu->arch.ht_active = 0; vcpu->arch.ht_active = 0;
if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests) ||
kvm_cpu_has_pending_timer(vcpu))
if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
vcpu->arch.mp_state = vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
KVM_MP_STATE_RUNNABLE;
if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
return -EINTR; return -EINTR;
@ -551,22 +592,35 @@ static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu)
if (r < 0) if (r < 0)
goto out; goto out;
vcpu->arch.vm_tr_slot = r; vcpu->arch.vm_tr_slot = r;
#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
if (kvm->arch.is_sn2) {
r = kvm_sn2_setup_mappings(vcpu);
if (r < 0)
goto out;
}
#endif
r = 0; r = 0;
out: out:
return r; return r;
} }
static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu) static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu)
{ {
struct kvm *kvm = vcpu->kvm;
ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot); ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot);
ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot); ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot);
#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
if (kvm->arch.is_sn2)
ia64_ptr_entry(0x3, vcpu->arch.sn_rtc_tr_slot);
#endif
} }
static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu) static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
{ {
unsigned long psr;
int r;
int cpu = smp_processor_id(); int cpu = smp_processor_id();
if (vcpu->arch.last_run_cpu != cpu || if (vcpu->arch.last_run_cpu != cpu ||
@ -578,36 +632,27 @@ static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
vcpu->arch.host_rr6 = ia64_get_rr(RR6); vcpu->arch.host_rr6 = ia64_get_rr(RR6);
vti_set_rr6(vcpu->arch.vmm_rr); vti_set_rr6(vcpu->arch.vmm_rr);
return kvm_insert_vmm_mapping(vcpu); local_irq_save(psr);
r = kvm_insert_vmm_mapping(vcpu);
local_irq_restore(psr);
return r;
} }
static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu) static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
{ {
kvm_purge_vmm_mapping(vcpu); kvm_purge_vmm_mapping(vcpu);
vti_set_rr6(vcpu->arch.host_rr6); vti_set_rr6(vcpu->arch.host_rr6);
} }
static int vti_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{ {
union context *host_ctx, *guest_ctx; union context *host_ctx, *guest_ctx;
int r; int r;
/*Get host and guest context with guest address space.*/ /*
host_ctx = kvm_get_host_context(vcpu); * down_read() may sleep and return with interrupts enabled
guest_ctx = kvm_get_guest_context(vcpu); */
down_read(&vcpu->kvm->slots_lock);
r = kvm_vcpu_pre_transition(vcpu);
if (r < 0)
goto out;
kvm_vmm_info->tramp_entry(host_ctx, guest_ctx);
kvm_vcpu_post_transition(vcpu);
r = 0;
out:
return r;
}
static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int r;
again: again:
if (signal_pending(current)) { if (signal_pending(current)) {
@ -616,26 +661,31 @@ again:
goto out; goto out;
} }
/*
* down_read() may sleep and return with interrupts enabled
*/
down_read(&vcpu->kvm->slots_lock);
preempt_disable(); preempt_disable();
local_irq_disable(); local_irq_disable();
vcpu->guest_mode = 1; /*Get host and guest context with guest address space.*/
host_ctx = kvm_get_host_context(vcpu);
guest_ctx = kvm_get_guest_context(vcpu);
clear_bit(KVM_REQ_KICK, &vcpu->requests);
r = kvm_vcpu_pre_transition(vcpu);
if (r < 0)
goto vcpu_run_fail;
up_read(&vcpu->kvm->slots_lock);
kvm_guest_enter(); kvm_guest_enter();
r = vti_vcpu_run(vcpu, kvm_run);
if (r < 0) { /*
local_irq_enable(); * Transition to the guest
preempt_enable(); */
kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; kvm_vmm_info->tramp_entry(host_ctx, guest_ctx);
goto out;
} kvm_vcpu_post_transition(vcpu);
vcpu->arch.launched = 1; vcpu->arch.launched = 1;
vcpu->guest_mode = 0; set_bit(KVM_REQ_KICK, &vcpu->requests);
local_irq_enable(); local_irq_enable();
/* /*
@ -646,9 +696,10 @@ again:
*/ */
barrier(); barrier();
kvm_guest_exit(); kvm_guest_exit();
up_read(&vcpu->kvm->slots_lock);
preempt_enable(); preempt_enable();
down_read(&vcpu->kvm->slots_lock);
r = kvm_handle_exit(kvm_run, vcpu); r = kvm_handle_exit(kvm_run, vcpu);
if (r > 0) { if (r > 0) {
@ -657,12 +708,20 @@ again:
} }
out: out:
up_read(&vcpu->kvm->slots_lock);
if (r > 0) { if (r > 0) {
kvm_resched(vcpu); kvm_resched(vcpu);
down_read(&vcpu->kvm->slots_lock);
goto again; goto again;
} }
return r; return r;
vcpu_run_fail:
local_irq_enable();
preempt_enable();
kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
goto out;
} }
static void kvm_set_mmio_data(struct kvm_vcpu *vcpu) static void kvm_set_mmio_data(struct kvm_vcpu *vcpu)
@ -788,6 +847,9 @@ struct kvm *kvm_arch_create_vm(void)
if (IS_ERR(kvm)) if (IS_ERR(kvm))
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
kvm->arch.is_sn2 = ia64_platform_is("sn2");
kvm_init_vm(kvm); kvm_init_vm(kvm);
kvm->arch.online_vcpus = 0; kvm->arch.online_vcpus = 0;
@ -884,7 +946,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
RESTORE_REGS(saved_gp); RESTORE_REGS(saved_gp);
vcpu->arch.irq_new_pending = 1; vcpu->arch.irq_new_pending = 1;
vcpu->arch.itc_offset = regs->saved_itc - ia64_getreg(_IA64_REG_AR_ITC); vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu);
set_bit(KVM_REQ_RESUME, &vcpu->requests); set_bit(KVM_REQ_RESUME, &vcpu->requests);
vcpu_put(vcpu); vcpu_put(vcpu);
@ -1043,10 +1105,6 @@ static void kvm_free_vmm_area(void)
} }
} }
static void vti_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
}
static int vti_init_vpd(struct kvm_vcpu *vcpu) static int vti_init_vpd(struct kvm_vcpu *vcpu)
{ {
int i; int i;
@ -1165,7 +1223,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
regs->cr_iip = PALE_RESET_ENTRY; regs->cr_iip = PALE_RESET_ENTRY;
/*Initialize itc offset for vcpus*/ /*Initialize itc offset for vcpus*/
itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC); itc_offset = 0UL - kvm_get_itc(vcpu);
for (i = 0; i < kvm->arch.online_vcpus; i++) { for (i = 0; i < kvm->arch.online_vcpus; i++) {
v = (struct kvm_vcpu *)((char *)vcpu + v = (struct kvm_vcpu *)((char *)vcpu +
sizeof(struct kvm_vcpu_data) * i); sizeof(struct kvm_vcpu_data) * i);
@ -1237,6 +1295,7 @@ static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id)
local_irq_save(psr); local_irq_save(psr);
r = kvm_insert_vmm_mapping(vcpu); r = kvm_insert_vmm_mapping(vcpu);
local_irq_restore(psr);
if (r) if (r)
goto fail; goto fail;
r = kvm_vcpu_init(vcpu, vcpu->kvm, id); r = kvm_vcpu_init(vcpu, vcpu->kvm, id);
@ -1254,13 +1313,11 @@ static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id)
goto uninit; goto uninit;
kvm_purge_vmm_mapping(vcpu); kvm_purge_vmm_mapping(vcpu);
local_irq_restore(psr);
return 0; return 0;
uninit: uninit:
kvm_vcpu_uninit(vcpu); kvm_vcpu_uninit(vcpu);
fail: fail:
local_irq_restore(psr);
return r; return r;
} }
@ -1291,7 +1348,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
vcpu->kvm = kvm; vcpu->kvm = kvm;
cpu = get_cpu(); cpu = get_cpu();
vti_vcpu_load(vcpu, cpu);
r = vti_vcpu_setup(vcpu, id); r = vti_vcpu_setup(vcpu, id);
put_cpu(); put_cpu();
@ -1427,7 +1483,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
} }
for (i = 0; i < 4; i++) for (i = 0; i < 4; i++)
regs->insvc[i] = vcpu->arch.insvc[i]; regs->insvc[i] = vcpu->arch.insvc[i];
regs->saved_itc = vcpu->arch.itc_offset + ia64_getreg(_IA64_REG_AR_ITC); regs->saved_itc = vcpu->arch.itc_offset + kvm_get_itc(vcpu);
SAVE_REGS(xtp); SAVE_REGS(xtp);
SAVE_REGS(metaphysical_rr0); SAVE_REGS(metaphysical_rr0);
SAVE_REGS(metaphysical_rr4); SAVE_REGS(metaphysical_rr4);
@ -1574,6 +1630,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
void kvm_arch_flush_shadow(struct kvm *kvm) void kvm_arch_flush_shadow(struct kvm *kvm)
{ {
kvm_flush_remote_tlbs(kvm);
} }
long kvm_arch_dev_ioctl(struct file *filp, long kvm_arch_dev_ioctl(struct file *filp,
@ -1616,8 +1673,37 @@ out:
return 0; return 0;
} }
/*
* On SN2, the ITC isn't stable, so copy in fast path code to use the
* SN2 RTC, replacing the ITC based default verion.
*/
static void kvm_patch_vmm(struct kvm_vmm_info *vmm_info,
struct module *module)
{
unsigned long new_ar, new_ar_sn2;
unsigned long module_base;
if (!ia64_platform_is("sn2"))
return;
module_base = (unsigned long)module->module_core;
new_ar = kvm_vmm_base + vmm_info->patch_mov_ar - module_base;
new_ar_sn2 = kvm_vmm_base + vmm_info->patch_mov_ar_sn2 - module_base;
printk(KERN_INFO "kvm: Patching ITC emulation to use SGI SN2 RTC "
"as source\n");
/*
* Copy the SN2 version of mov_ar into place. They are both
* the same size, so 6 bundles is sufficient (6 * 0x10).
*/
memcpy((void *)new_ar, (void *)new_ar_sn2, 0x60);
}
static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info, static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
struct module *module) struct module *module)
{ {
unsigned long module_base; unsigned long module_base;
unsigned long vmm_size; unsigned long vmm_size;
@ -1639,6 +1725,7 @@ static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
return -EFAULT; return -EFAULT;
memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size); memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size);
kvm_patch_vmm(vmm_info, module);
kvm_flush_icache(kvm_vmm_base, vmm_size); kvm_flush_icache(kvm_vmm_base, vmm_size);
/*Recalculate kvm_vmm_info based on new VMM*/ /*Recalculate kvm_vmm_info based on new VMM*/
@ -1792,38 +1879,24 @@ void kvm_arch_hardware_unsetup(void)
{ {
} }
static void vcpu_kick_intr(void *info)
{
#ifdef DEBUG
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
printk(KERN_DEBUG"vcpu_kick_intr %p \n", vcpu);
#endif
}
void kvm_vcpu_kick(struct kvm_vcpu *vcpu) void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
{ {
int ipi_pcpu = vcpu->cpu; int me;
int cpu = get_cpu(); int cpu = vcpu->cpu;
if (waitqueue_active(&vcpu->wq)) if (waitqueue_active(&vcpu->wq))
wake_up_interruptible(&vcpu->wq); wake_up_interruptible(&vcpu->wq);
if (vcpu->guest_mode && cpu != ipi_pcpu) me = get_cpu();
smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0); if (cpu != me && (unsigned) cpu < nr_cpu_ids && cpu_online(cpu))
if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
smp_send_reschedule(cpu);
put_cpu(); put_cpu();
} }
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig) int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
{ {
return __apic_accept_irq(vcpu, irq->vector);
struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
if (!test_and_set_bit(vec, &vpd->irr[0])) {
vcpu->arch.irq_new_pending = 1;
kvm_vcpu_kick(vcpu);
return 1;
}
return 0;
} }
int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
@ -1836,20 +1909,18 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
return 0; return 0;
} }
struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
unsigned long bitmap)
{ {
struct kvm_vcpu *lvcpu = kvm->vcpus[0]; return vcpu1->arch.xtp - vcpu2->arch.xtp;
int i; }
for (i = 1; i < kvm->arch.online_vcpus; i++) { int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
if (!kvm->vcpus[i]) int short_hand, int dest, int dest_mode)
continue; {
if (lvcpu->arch.xtp > kvm->vcpus[i]->arch.xtp) struct kvm_lapic *target = vcpu->arch.apic;
lvcpu = kvm->vcpus[i]; return (dest_mode == 0) ?
} kvm_apic_match_physical_addr(target, dest) :
kvm_apic_match_logical_addr(target, dest);
return lvcpu;
} }
static int find_highest_bits(int *dat) static int find_highest_bits(int *dat)
@ -1888,6 +1959,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
return 0; return 0;
} }
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
{
/* do real check here */
return 1;
}
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{ {
return vcpu->arch.timer_fired; return vcpu->arch.timer_fired;
@ -1918,6 +1995,7 @@ static int vcpu_reset(struct kvm_vcpu *vcpu)
long psr; long psr;
local_irq_save(psr); local_irq_save(psr);
r = kvm_insert_vmm_mapping(vcpu); r = kvm_insert_vmm_mapping(vcpu);
local_irq_restore(psr);
if (r) if (r)
goto fail; goto fail;
@ -1930,7 +2008,6 @@ static int vcpu_reset(struct kvm_vcpu *vcpu)
kvm_purge_vmm_mapping(vcpu); kvm_purge_vmm_mapping(vcpu);
r = 0; r = 0;
fail: fail:
local_irq_restore(psr);
return r; return r;
} }

View File

@ -21,6 +21,9 @@
#include <linux/kvm_host.h> #include <linux/kvm_host.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <asm/sn/addrs.h>
#include <asm/sn/clksupport.h>
#include <asm/sn/shub_mmr.h>
#include "vti.h" #include "vti.h"
#include "misc.h" #include "misc.h"
@ -188,12 +191,35 @@ static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu)
return result; return result;
} }
/*
* On the SGI SN2, the ITC isn't stable. Emulation backed by the SN2
* RTC is used instead. This function patches the ratios from SAL
* to match the RTC before providing them to the guest.
*/
static void sn2_patch_itc_freq_ratios(struct ia64_pal_retval *result)
{
struct pal_freq_ratio *ratio;
unsigned long sal_freq, sal_drift, factor;
result->status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
&sal_freq, &sal_drift);
ratio = (struct pal_freq_ratio *)&result->v2;
factor = ((sal_freq * 3) + (sn_rtc_cycles_per_second / 2)) /
sn_rtc_cycles_per_second;
ratio->num = 3;
ratio->den = factor;
}
static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu) static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu)
{ {
struct ia64_pal_retval result; struct ia64_pal_retval result;
PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0); PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0);
if (vcpu->kvm->arch.is_sn2)
sn2_patch_itc_freq_ratios(&result);
return result; return result;
} }

View File

@ -20,6 +20,10 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu);
int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig); int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
int short_hand, int dest, int dest_mode);
int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
#define kvm_apic_present(x) (true)
#endif #endif

View File

@ -11,6 +11,7 @@
#include <asm/asmmacro.h> #include <asm/asmmacro.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/kvm_host.h>
#include "vti.h" #include "vti.h"
#include "asm-offsets.h" #include "asm-offsets.h"
@ -140,6 +141,35 @@ GLOBAL_ENTRY(kvm_asm_mov_from_ar)
;; ;;
END(kvm_asm_mov_from_ar) END(kvm_asm_mov_from_ar)
/*
* Special SGI SN2 optimized version of mov_from_ar using the SN2 RTC
* clock as it's source for emulating the ITC. This version will be
* copied on top of the original version if the host is determined to
* be an SN2.
*/
GLOBAL_ENTRY(kvm_asm_mov_from_ar_sn2)
add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
movl r19 = (KVM_VMM_BASE+(1<<KVM_VMM_SHIFT))
add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
extr.u r17=r25,6,7
mov r24=b0
;;
ld8 r18=[r18]
ld8 r19=[r19]
addl r20=@gprel(asm_mov_to_reg),gp
;;
add r19=r19,r18
shladd r17=r17,4,r20
;;
adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
st8 [r16] = r19
mov b0=r17
br.sptk.few b0
;;
END(kvm_asm_mov_from_ar_sn2)
// mov r1=rr[r3] // mov r1=rr[r3]
GLOBAL_ENTRY(kvm_asm_mov_from_rr) GLOBAL_ENTRY(kvm_asm_mov_from_rr)

View File

@ -652,20 +652,25 @@ void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
unsigned long isr, unsigned long iim) unsigned long isr, unsigned long iim)
{ {
struct kvm_vcpu *v = current_vcpu; struct kvm_vcpu *v = current_vcpu;
long psr;
if (ia64_psr(regs)->cpl == 0) { if (ia64_psr(regs)->cpl == 0) {
/* Allow hypercalls only when cpl = 0. */ /* Allow hypercalls only when cpl = 0. */
if (iim == DOMN_PAL_REQUEST) { if (iim == DOMN_PAL_REQUEST) {
local_irq_save(psr);
set_pal_call_data(v); set_pal_call_data(v);
vmm_transition(v); vmm_transition(v);
get_pal_call_result(v); get_pal_call_result(v);
vcpu_increment_iip(v); vcpu_increment_iip(v);
local_irq_restore(psr);
return; return;
} else if (iim == DOMN_SAL_REQUEST) { } else if (iim == DOMN_SAL_REQUEST) {
local_irq_save(psr);
set_sal_call_data(v); set_sal_call_data(v);
vmm_transition(v); vmm_transition(v);
get_sal_call_result(v); get_sal_call_result(v);
vcpu_increment_iip(v); vcpu_increment_iip(v);
local_irq_restore(psr);
return; return;
} }
} }

View File

@ -788,13 +788,29 @@ void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
setfpreg(reg, val, regs); /* FIXME: handle NATs later*/ setfpreg(reg, val, regs); /* FIXME: handle NATs later*/
} }
/*
* The Altix RTC is mapped specially here for the vmm module
*/
#define SN_RTC_BASE (u64 *)(KVM_VMM_BASE+(1UL<<KVM_VMM_SHIFT))
static long kvm_get_itc(struct kvm_vcpu *vcpu)
{
#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
struct kvm *kvm = (struct kvm *)KVM_VM_BASE;
if (kvm->arch.is_sn2)
return (*SN_RTC_BASE);
else
#endif
return ia64_getreg(_IA64_REG_AR_ITC);
}
/************************************************************************ /************************************************************************
* lsapic timer * lsapic timer
***********************************************************************/ ***********************************************************************/
u64 vcpu_get_itc(struct kvm_vcpu *vcpu) u64 vcpu_get_itc(struct kvm_vcpu *vcpu)
{ {
unsigned long guest_itc; unsigned long guest_itc;
guest_itc = VMX(vcpu, itc_offset) + ia64_getreg(_IA64_REG_AR_ITC); guest_itc = VMX(vcpu, itc_offset) + kvm_get_itc(vcpu);
if (guest_itc >= VMX(vcpu, last_itc)) { if (guest_itc >= VMX(vcpu, last_itc)) {
VMX(vcpu, last_itc) = guest_itc; VMX(vcpu, last_itc) = guest_itc;
@ -809,7 +825,7 @@ static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
struct kvm_vcpu *v; struct kvm_vcpu *v;
struct kvm *kvm; struct kvm *kvm;
int i; int i;
long itc_offset = val - ia64_getreg(_IA64_REG_AR_ITC); long itc_offset = val - kvm_get_itc(vcpu);
unsigned long vitv = VCPU(vcpu, itv); unsigned long vitv = VCPU(vcpu, itv);
kvm = (struct kvm *)KVM_VM_BASE; kvm = (struct kvm *)KVM_VM_BASE;

View File

@ -30,15 +30,19 @@ MODULE_AUTHOR("Intel");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
extern char kvm_ia64_ivt; extern char kvm_ia64_ivt;
extern char kvm_asm_mov_from_ar;
extern char kvm_asm_mov_from_ar_sn2;
extern fpswa_interface_t *vmm_fpswa_interface; extern fpswa_interface_t *vmm_fpswa_interface;
long vmm_sanity = 1; long vmm_sanity = 1;
struct kvm_vmm_info vmm_info = { struct kvm_vmm_info vmm_info = {
.module = THIS_MODULE, .module = THIS_MODULE,
.vmm_entry = vmm_entry, .vmm_entry = vmm_entry,
.tramp_entry = vmm_trampoline, .tramp_entry = vmm_trampoline,
.vmm_ivt = (unsigned long)&kvm_ia64_ivt, .vmm_ivt = (unsigned long)&kvm_ia64_ivt,
.patch_mov_ar = (unsigned long)&kvm_asm_mov_from_ar,
.patch_mov_ar_sn2 = (unsigned long)&kvm_asm_mov_from_ar_sn2,
}; };
static int __init kvm_vmm_init(void) static int __init kvm_vmm_init(void)

View File

@ -95,7 +95,7 @@ GLOBAL_ENTRY(kvm_vmm_panic)
;; ;;
srlz.i // guarantee that interruption collection is on srlz.i // guarantee that interruption collection is on
;; ;;
//(p15) ssm psr.i // restore psr.i (p15) ssm psr.i // restore psr.
addl r14=@gprel(ia64_leave_hypervisor),gp addl r14=@gprel(ia64_leave_hypervisor),gp
;; ;;
KVM_SAVE_REST KVM_SAVE_REST
@ -249,7 +249,7 @@ ENTRY(kvm_break_fault)
;; ;;
srlz.i // guarantee that interruption collection is on srlz.i // guarantee that interruption collection is on
;; ;;
//(p15)ssm psr.i // restore psr.i (p15)ssm psr.i // restore psr.i
addl r14=@gprel(ia64_leave_hypervisor),gp addl r14=@gprel(ia64_leave_hypervisor),gp
;; ;;
KVM_SAVE_REST KVM_SAVE_REST
@ -439,7 +439,7 @@ kvm_dispatch_vexirq:
;; ;;
srlz.i // guarantee that interruption collection is on srlz.i // guarantee that interruption collection is on
;; ;;
//(p15) ssm psr.i // restore psr.i (p15) ssm psr.i // restore psr.i
adds r3=8,r2 // set up second base pointer adds r3=8,r2 // set up second base pointer
;; ;;
KVM_SAVE_REST KVM_SAVE_REST
@ -819,7 +819,7 @@ ENTRY(kvm_dtlb_miss_dispatch)
;; ;;
srlz.i // guarantee that interruption collection is on srlz.i // guarantee that interruption collection is on
;; ;;
//(p15) ssm psr.i // restore psr.i (p15) ssm psr.i // restore psr.i
addl r14=@gprel(ia64_leave_hypervisor_prepare),gp addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
;; ;;
KVM_SAVE_REST KVM_SAVE_REST
@ -842,7 +842,7 @@ ENTRY(kvm_itlb_miss_dispatch)
;; ;;
srlz.i // guarantee that interruption collection is on srlz.i // guarantee that interruption collection is on
;; ;;
//(p15) ssm psr.i // restore psr.i (p15) ssm psr.i // restore psr.i
addl r14=@gprel(ia64_leave_hypervisor),gp addl r14=@gprel(ia64_leave_hypervisor),gp
;; ;;
KVM_SAVE_REST KVM_SAVE_REST
@ -871,7 +871,7 @@ ENTRY(kvm_dispatch_reflection)
;; ;;
srlz.i // guarantee that interruption collection is on srlz.i // guarantee that interruption collection is on
;; ;;
//(p15) ssm psr.i // restore psr.i (p15) ssm psr.i // restore psr.i
addl r14=@gprel(ia64_leave_hypervisor),gp addl r14=@gprel(ia64_leave_hypervisor),gp
;; ;;
KVM_SAVE_REST KVM_SAVE_REST
@ -898,7 +898,7 @@ ENTRY(kvm_dispatch_virtualization_fault)
;; ;;
srlz.i // guarantee that interruption collection is on srlz.i // guarantee that interruption collection is on
;; ;;
//(p15) ssm psr.i // restore psr.i (p15) ssm psr.i // restore psr.i
addl r14=@gprel(ia64_leave_hypervisor_prepare),gp addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
;; ;;
KVM_SAVE_REST KVM_SAVE_REST
@ -920,7 +920,7 @@ ENTRY(kvm_dispatch_interrupt)
;; ;;
srlz.i srlz.i
;; ;;
//(p15) ssm psr.i (p15) ssm psr.i
addl r14=@gprel(ia64_leave_hypervisor),gp addl r14=@gprel(ia64_leave_hypervisor),gp
;; ;;
KVM_SAVE_REST KVM_SAVE_REST
@ -1333,7 +1333,7 @@ hostret = r24
;; ;;
(p7) srlz.i (p7) srlz.i
;; ;;
//(p6) ssm psr.i (p6) ssm psr.i
;; ;;
mov rp=rpsave mov rp=rpsave
mov ar.pfs=pfssave mov ar.pfs=pfssave

View File

@ -254,7 +254,8 @@ u64 guest_vhpt_lookup(u64 iha, u64 *pte)
"(p7) st8 [%2]=r9;;" "(p7) st8 [%2]=r9;;"
"ssm psr.ic;;" "ssm psr.ic;;"
"srlz.d;;" "srlz.d;;"
/* "ssm psr.i;;" Once interrupts in vmm open, need fix*/ "ssm psr.i;;"
"srlz.d;;"
: "=r"(ret) : "r"(iha), "r"(pte):"memory"); : "=r"(ret) : "r"(iha), "r"(pte):"memory");
return ret; return ret;

View File

@ -41,6 +41,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
return !!(v->arch.pending_exceptions); return !!(v->arch.pending_exceptions);
} }
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
{
/* do real check here */
return 1;
}
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{ {
return !(v->arch.msr & MSR_WE); return !(v->arch.msr & MSR_WE);

View File

@ -13,6 +13,8 @@
#ifndef ASM_KVM_HOST_H #ifndef ASM_KVM_HOST_H
#define ASM_KVM_HOST_H #define ASM_KVM_HOST_H
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/kvm_host.h> #include <linux/kvm_host.h>
#include <asm/debug.h> #include <asm/debug.h>
#include <asm/cpuid.h> #include <asm/cpuid.h>
@ -210,7 +212,8 @@ struct kvm_vcpu_arch {
s390_fp_regs guest_fpregs; s390_fp_regs guest_fpregs;
unsigned int guest_acrs[NUM_ACRS]; unsigned int guest_acrs[NUM_ACRS];
struct kvm_s390_local_interrupt local_int; struct kvm_s390_local_interrupt local_int;
struct timer_list ckc_timer; struct hrtimer ckc_timer;
struct tasklet_struct tasklet;
union { union {
cpuid_t cpu_id; cpuid_t cpu_id;
u64 stidp_data; u64 stidp_data;

View File

@ -154,17 +154,25 @@ static int handle_stop(struct kvm_vcpu *vcpu)
static int handle_validity(struct kvm_vcpu *vcpu) static int handle_validity(struct kvm_vcpu *vcpu)
{ {
int viwhy = vcpu->arch.sie_block->ipb >> 16; int viwhy = vcpu->arch.sie_block->ipb >> 16;
int rc;
vcpu->stat.exit_validity++; vcpu->stat.exit_validity++;
if (viwhy == 0x37) { if ((viwhy == 0x37) && (vcpu->arch.sie_block->prefix
fault_in_pages_writeable((char __user *) <= vcpu->kvm->arch.guest_memsize - 2*PAGE_SIZE)){
vcpu->kvm->arch.guest_origin + rc = fault_in_pages_writeable((char __user *)
vcpu->arch.sie_block->prefix, vcpu->kvm->arch.guest_origin +
PAGE_SIZE); vcpu->arch.sie_block->prefix,
return 0; 2*PAGE_SIZE);
} if (rc)
VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d", /* user will receive sigsegv, exit to user */
viwhy); rc = -ENOTSUPP;
return -ENOTSUPP; } else
rc = -ENOTSUPP;
if (rc)
VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d",
viwhy);
return rc;
} }
static int handle_instruction(struct kvm_vcpu *vcpu) static int handle_instruction(struct kvm_vcpu *vcpu)

View File

@ -12,6 +12,8 @@
#include <asm/lowcore.h> #include <asm/lowcore.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/kvm_host.h> #include <linux/kvm_host.h>
#include <linux/signal.h> #include <linux/signal.h>
#include "kvm-s390.h" #include "kvm-s390.h"
@ -299,13 +301,13 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
} }
if ((!rc) && atomic_read(&fi->active)) { if ((!rc) && atomic_read(&fi->active)) {
spin_lock_bh(&fi->lock); spin_lock(&fi->lock);
list_for_each_entry(inti, &fi->list, list) list_for_each_entry(inti, &fi->list, list)
if (__interrupt_is_deliverable(vcpu, inti)) { if (__interrupt_is_deliverable(vcpu, inti)) {
rc = 1; rc = 1;
break; break;
} }
spin_unlock_bh(&fi->lock); spin_unlock(&fi->lock);
} }
if ((!rc) && (vcpu->arch.sie_block->ckc < if ((!rc) && (vcpu->arch.sie_block->ckc <
@ -318,6 +320,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
return rc; return rc;
} }
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
{
/* do real check here */
return 1;
}
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{ {
return 0; return 0;
@ -355,14 +363,12 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
return 0; return 0;
} }
sltime = (vcpu->arch.sie_block->ckc - now) / (0xf4240000ul / HZ) + 1; sltime = ((vcpu->arch.sie_block->ckc - now)*125)>>9;
vcpu->arch.ckc_timer.expires = jiffies + sltime; hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime);
add_timer(&vcpu->arch.ckc_timer);
VCPU_EVENT(vcpu, 5, "enabled wait timer:%llx jiffies", sltime);
no_timer: no_timer:
spin_lock_bh(&vcpu->arch.local_int.float_int->lock); spin_lock(&vcpu->arch.local_int.float_int->lock);
spin_lock_bh(&vcpu->arch.local_int.lock); spin_lock_bh(&vcpu->arch.local_int.lock);
add_wait_queue(&vcpu->arch.local_int.wq, &wait); add_wait_queue(&vcpu->arch.local_int.wq, &wait);
while (list_empty(&vcpu->arch.local_int.list) && while (list_empty(&vcpu->arch.local_int.list) &&
@ -371,33 +377,46 @@ no_timer:
!signal_pending(current)) { !signal_pending(current)) {
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
spin_unlock_bh(&vcpu->arch.local_int.lock); spin_unlock_bh(&vcpu->arch.local_int.lock);
spin_unlock_bh(&vcpu->arch.local_int.float_int->lock); spin_unlock(&vcpu->arch.local_int.float_int->lock);
vcpu_put(vcpu); vcpu_put(vcpu);
schedule(); schedule();
vcpu_load(vcpu); vcpu_load(vcpu);
spin_lock_bh(&vcpu->arch.local_int.float_int->lock); spin_lock(&vcpu->arch.local_int.float_int->lock);
spin_lock_bh(&vcpu->arch.local_int.lock); spin_lock_bh(&vcpu->arch.local_int.lock);
} }
__unset_cpu_idle(vcpu); __unset_cpu_idle(vcpu);
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
remove_wait_queue(&vcpu->wq, &wait); remove_wait_queue(&vcpu->wq, &wait);
spin_unlock_bh(&vcpu->arch.local_int.lock); spin_unlock_bh(&vcpu->arch.local_int.lock);
spin_unlock_bh(&vcpu->arch.local_int.float_int->lock); spin_unlock(&vcpu->arch.local_int.float_int->lock);
del_timer(&vcpu->arch.ckc_timer); hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
return 0; return 0;
} }
void kvm_s390_idle_wakeup(unsigned long data) void kvm_s390_tasklet(unsigned long parm)
{ {
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; struct kvm_vcpu *vcpu = (struct kvm_vcpu *) parm;
spin_lock_bh(&vcpu->arch.local_int.lock); spin_lock(&vcpu->arch.local_int.lock);
vcpu->arch.local_int.timer_due = 1; vcpu->arch.local_int.timer_due = 1;
if (waitqueue_active(&vcpu->arch.local_int.wq)) if (waitqueue_active(&vcpu->arch.local_int.wq))
wake_up_interruptible(&vcpu->arch.local_int.wq); wake_up_interruptible(&vcpu->arch.local_int.wq);
spin_unlock_bh(&vcpu->arch.local_int.lock); spin_unlock(&vcpu->arch.local_int.lock);
} }
/*
* low level hrtimer wake routine. Because this runs in hardirq context
* we schedule a tasklet to do the real work.
*/
enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
{
struct kvm_vcpu *vcpu;
vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
tasklet_schedule(&vcpu->arch.tasklet);
return HRTIMER_NORESTART;
}
void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
{ {
@ -436,7 +455,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
if (atomic_read(&fi->active)) { if (atomic_read(&fi->active)) {
do { do {
deliver = 0; deliver = 0;
spin_lock_bh(&fi->lock); spin_lock(&fi->lock);
list_for_each_entry_safe(inti, n, &fi->list, list) { list_for_each_entry_safe(inti, n, &fi->list, list) {
if (__interrupt_is_deliverable(vcpu, inti)) { if (__interrupt_is_deliverable(vcpu, inti)) {
list_del(&inti->list); list_del(&inti->list);
@ -447,7 +466,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
} }
if (list_empty(&fi->list)) if (list_empty(&fi->list))
atomic_set(&fi->active, 0); atomic_set(&fi->active, 0);
spin_unlock_bh(&fi->lock); spin_unlock(&fi->lock);
if (deliver) { if (deliver) {
__do_deliver_interrupt(vcpu, inti); __do_deliver_interrupt(vcpu, inti);
kfree(inti); kfree(inti);
@ -512,7 +531,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
mutex_lock(&kvm->lock); mutex_lock(&kvm->lock);
fi = &kvm->arch.float_int; fi = &kvm->arch.float_int;
spin_lock_bh(&fi->lock); spin_lock(&fi->lock);
list_add_tail(&inti->list, &fi->list); list_add_tail(&inti->list, &fi->list);
atomic_set(&fi->active, 1); atomic_set(&fi->active, 1);
sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
@ -529,7 +548,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
if (waitqueue_active(&li->wq)) if (waitqueue_active(&li->wq))
wake_up_interruptible(&li->wq); wake_up_interruptible(&li->wq);
spin_unlock_bh(&li->lock); spin_unlock_bh(&li->lock);
spin_unlock_bh(&fi->lock); spin_unlock(&fi->lock);
mutex_unlock(&kvm->lock); mutex_unlock(&kvm->lock);
return 0; return 0;
} }

View File

@ -15,6 +15,7 @@
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/err.h> #include <linux/err.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/hrtimer.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/kvm.h> #include <linux/kvm.h>
#include <linux/kvm_host.h> #include <linux/kvm_host.h>
@ -195,6 +196,10 @@ out_nokvm:
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{ {
VCPU_EVENT(vcpu, 3, "%s", "free cpu"); VCPU_EVENT(vcpu, 3, "%s", "free cpu");
if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
(__u64) vcpu->arch.sie_block)
vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
smp_mb();
free_page((unsigned long)(vcpu->arch.sie_block)); free_page((unsigned long)(vcpu->arch.sie_block));
kvm_vcpu_uninit(vcpu); kvm_vcpu_uninit(vcpu);
kfree(vcpu); kfree(vcpu);
@ -283,8 +288,10 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin; vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin;
vcpu->arch.sie_block->ecb = 2; vcpu->arch.sie_block->ecb = 2;
vcpu->arch.sie_block->eca = 0xC1002001U; vcpu->arch.sie_block->eca = 0xC1002001U;
setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup, hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
(unsigned long) vcpu); tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
(unsigned long) vcpu);
vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
get_cpu_id(&vcpu->arch.cpu_id); get_cpu_id(&vcpu->arch.cpu_id);
vcpu->arch.cpu_id.version = 0xff; vcpu->arch.cpu_id.version = 0xff;
return 0; return 0;
@ -307,19 +314,21 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
vcpu->arch.sie_block->icpua = id; vcpu->arch.sie_block->icpua = id;
BUG_ON(!kvm->arch.sca); BUG_ON(!kvm->arch.sca);
BUG_ON(kvm->arch.sca->cpu[id].sda); if (!kvm->arch.sca->cpu[id].sda)
kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
else
BUG_ON(!kvm->vcpus[id]); /* vcpu does already exist */
vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32); vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
spin_lock_init(&vcpu->arch.local_int.lock); spin_lock_init(&vcpu->arch.local_int.lock);
INIT_LIST_HEAD(&vcpu->arch.local_int.list); INIT_LIST_HEAD(&vcpu->arch.local_int.list);
vcpu->arch.local_int.float_int = &kvm->arch.float_int; vcpu->arch.local_int.float_int = &kvm->arch.float_int;
spin_lock_bh(&kvm->arch.float_int.lock); spin_lock(&kvm->arch.float_int.lock);
kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int; kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
init_waitqueue_head(&vcpu->arch.local_int.wq); init_waitqueue_head(&vcpu->arch.local_int.wq);
vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
spin_unlock_bh(&kvm->arch.float_int.lock); spin_unlock(&kvm->arch.float_int.lock);
rc = kvm_vcpu_init(vcpu, kvm, id); rc = kvm_vcpu_init(vcpu, kvm, id);
if (rc) if (rc)
@ -478,6 +487,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
vcpu_load(vcpu); vcpu_load(vcpu);
/* verify, that memory has been registered */
if (!vcpu->kvm->arch.guest_memsize) {
vcpu_put(vcpu);
return -EINVAL;
}
if (vcpu->sigset_active) if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
@ -657,6 +672,8 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
struct kvm_memory_slot old, struct kvm_memory_slot old,
int user_alloc) int user_alloc)
{ {
int i;
/* A few sanity checks. We can have exactly one memory slot which has /* A few sanity checks. We can have exactly one memory slot which has
to start at guest virtual zero and which has to be located at a to start at guest virtual zero and which has to be located at a
page boundary in userland and which has to end at a page boundary. page boundary in userland and which has to end at a page boundary.
@ -664,7 +681,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
vmas. It is okay to mmap() and munmap() stuff in this slot after vmas. It is okay to mmap() and munmap() stuff in this slot after
doing this call at any time */ doing this call at any time */
if (mem->slot) if (mem->slot || kvm->arch.guest_memsize)
return -EINVAL; return -EINVAL;
if (mem->guest_phys_addr) if (mem->guest_phys_addr)
@ -676,15 +693,39 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
if (mem->memory_size & (PAGE_SIZE - 1)) if (mem->memory_size & (PAGE_SIZE - 1))
return -EINVAL; return -EINVAL;
if (!user_alloc)
return -EINVAL;
/* lock all vcpus */
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
if (!kvm->vcpus[i])
continue;
if (!mutex_trylock(&kvm->vcpus[i]->mutex))
goto fail_out;
}
kvm->arch.guest_origin = mem->userspace_addr; kvm->arch.guest_origin = mem->userspace_addr;
kvm->arch.guest_memsize = mem->memory_size; kvm->arch.guest_memsize = mem->memory_size;
/* FIXME: we do want to interrupt running CPUs and update their memory /* update sie control blocks, and unlock all vcpus */
configuration now to avoid race conditions. But hey, changing the for (i = 0; i < KVM_MAX_VCPUS; ++i) {
memory layout while virtual CPUs are running is usually bad if (kvm->vcpus[i]) {
programming practice. */ kvm->vcpus[i]->arch.sie_block->gmsor =
kvm->arch.guest_origin;
kvm->vcpus[i]->arch.sie_block->gmslm =
kvm->arch.guest_memsize +
kvm->arch.guest_origin +
VIRTIODESCSPACE - 1ul;
mutex_unlock(&kvm->vcpus[i]->mutex);
}
}
return 0; return 0;
fail_out:
for (; i >= 0; i--)
mutex_unlock(&kvm->vcpus[i]->mutex);
return -EINVAL;
} }
void kvm_arch_flush_shadow(struct kvm *kvm) void kvm_arch_flush_shadow(struct kvm *kvm)

View File

@ -14,6 +14,7 @@
#ifndef ARCH_S390_KVM_S390_H #ifndef ARCH_S390_KVM_S390_H
#define ARCH_S390_KVM_S390_H #define ARCH_S390_KVM_S390_H
#include <linux/hrtimer.h>
#include <linux/kvm.h> #include <linux/kvm.h>
#include <linux/kvm_host.h> #include <linux/kvm_host.h>
@ -41,7 +42,8 @@ static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu)
} }
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
void kvm_s390_idle_wakeup(unsigned long data); enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
void kvm_s390_tasklet(unsigned long parm);
void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
int kvm_s390_inject_vm(struct kvm *kvm, int kvm_s390_inject_vm(struct kvm *kvm,
struct kvm_s390_interrupt *s390int); struct kvm_s390_interrupt *s390int);

View File

@ -204,11 +204,11 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
int cpus = 0; int cpus = 0;
int n; int n;
spin_lock_bh(&fi->lock); spin_lock(&fi->lock);
for (n = 0; n < KVM_MAX_VCPUS; n++) for (n = 0; n < KVM_MAX_VCPUS; n++)
if (fi->local_int[n]) if (fi->local_int[n])
cpus++; cpus++;
spin_unlock_bh(&fi->lock); spin_unlock(&fi->lock);
/* deal with other level 3 hypervisors */ /* deal with other level 3 hypervisors */
if (stsi(mem, 3, 2, 2) == -ENOSYS) if (stsi(mem, 3, 2, 2) == -ENOSYS)

View File

@ -52,7 +52,7 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
if (cpu_addr >= KVM_MAX_VCPUS) if (cpu_addr >= KVM_MAX_VCPUS)
return 3; /* not operational */ return 3; /* not operational */
spin_lock_bh(&fi->lock); spin_lock(&fi->lock);
if (fi->local_int[cpu_addr] == NULL) if (fi->local_int[cpu_addr] == NULL)
rc = 3; /* not operational */ rc = 3; /* not operational */
else if (atomic_read(fi->local_int[cpu_addr]->cpuflags) else if (atomic_read(fi->local_int[cpu_addr]->cpuflags)
@ -64,7 +64,7 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
*reg |= SIGP_STAT_STOPPED; *reg |= SIGP_STAT_STOPPED;
rc = 1; /* status stored */ rc = 1; /* status stored */
} }
spin_unlock_bh(&fi->lock); spin_unlock(&fi->lock);
VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc); VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc);
return rc; return rc;
@ -86,7 +86,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
inti->type = KVM_S390_INT_EMERGENCY; inti->type = KVM_S390_INT_EMERGENCY;
spin_lock_bh(&fi->lock); spin_lock(&fi->lock);
li = fi->local_int[cpu_addr]; li = fi->local_int[cpu_addr];
if (li == NULL) { if (li == NULL) {
rc = 3; /* not operational */ rc = 3; /* not operational */
@ -102,7 +102,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
spin_unlock_bh(&li->lock); spin_unlock_bh(&li->lock);
rc = 0; /* order accepted */ rc = 0; /* order accepted */
unlock: unlock:
spin_unlock_bh(&fi->lock); spin_unlock(&fi->lock);
VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
return rc; return rc;
} }
@ -123,7 +123,7 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store)
inti->type = KVM_S390_SIGP_STOP; inti->type = KVM_S390_SIGP_STOP;
spin_lock_bh(&fi->lock); spin_lock(&fi->lock);
li = fi->local_int[cpu_addr]; li = fi->local_int[cpu_addr];
if (li == NULL) { if (li == NULL) {
rc = 3; /* not operational */ rc = 3; /* not operational */
@ -142,7 +142,7 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store)
spin_unlock_bh(&li->lock); spin_unlock_bh(&li->lock);
rc = 0; /* order accepted */ rc = 0; /* order accepted */
unlock: unlock:
spin_unlock_bh(&fi->lock); spin_unlock(&fi->lock);
VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
return rc; return rc;
} }
@ -188,7 +188,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
if (!inti) if (!inti)
return 2; /* busy */ return 2; /* busy */
spin_lock_bh(&fi->lock); spin_lock(&fi->lock);
li = fi->local_int[cpu_addr]; li = fi->local_int[cpu_addr];
if ((cpu_addr >= KVM_MAX_VCPUS) || (li == NULL)) { if ((cpu_addr >= KVM_MAX_VCPUS) || (li == NULL)) {
@ -220,7 +220,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
out_li: out_li:
spin_unlock_bh(&li->lock); spin_unlock_bh(&li->lock);
out_fi: out_fi:
spin_unlock_bh(&fi->lock); spin_unlock(&fi->lock);
return rc; return rc;
} }

View File

@ -116,6 +116,8 @@
#define X86_FEATURE_XMM4_1 (4*32+19) /* "sse4_1" SSE-4.1 */ #define X86_FEATURE_XMM4_1 (4*32+19) /* "sse4_1" SSE-4.1 */
#define X86_FEATURE_XMM4_2 (4*32+20) /* "sse4_2" SSE-4.2 */ #define X86_FEATURE_XMM4_2 (4*32+20) /* "sse4_2" SSE-4.2 */
#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ #define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */
#define X86_FEATURE_MOVBE (4*32+22) /* MOVBE instruction */
#define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */
#define X86_FEATURE_AES (4*32+25) /* AES instructions */ #define X86_FEATURE_AES (4*32+25) /* AES instructions */
#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ #define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
#define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */

View File

@ -16,6 +16,7 @@
#define __KVM_HAVE_MSI #define __KVM_HAVE_MSI
#define __KVM_HAVE_USER_NMI #define __KVM_HAVE_USER_NMI
#define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_GUEST_DEBUG
#define __KVM_HAVE_MSIX
/* Architectural interrupt line count. */ /* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256 #define KVM_NR_INTERRUPTS 256

View File

@ -185,6 +185,7 @@ union kvm_mmu_page_role {
unsigned access:3; unsigned access:3;
unsigned invalid:1; unsigned invalid:1;
unsigned cr4_pge:1; unsigned cr4_pge:1;
unsigned nxe:1;
}; };
}; };
@ -212,7 +213,6 @@ struct kvm_mmu_page {
int multimapped; /* More than one parent_pte? */ int multimapped; /* More than one parent_pte? */
int root_count; /* Currently serving as active root */ int root_count; /* Currently serving as active root */
bool unsync; bool unsync;
bool global;
unsigned int unsync_children; unsigned int unsync_children;
union { union {
u64 *parent_pte; /* !multimapped */ u64 *parent_pte; /* !multimapped */
@ -261,13 +261,11 @@ struct kvm_mmu {
union kvm_mmu_page_role base_role; union kvm_mmu_page_role base_role;
u64 *pae_root; u64 *pae_root;
u64 rsvd_bits_mask[2][4];
}; };
struct kvm_vcpu_arch { struct kvm_vcpu_arch {
u64 host_tsc; u64 host_tsc;
int interrupt_window_open;
unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS);
/* /*
* rip and regs accesses must go through * rip and regs accesses must go through
* kvm_{register,rip}_{read,write} functions. * kvm_{register,rip}_{read,write} functions.
@ -286,6 +284,7 @@ struct kvm_vcpu_arch {
u64 shadow_efer; u64 shadow_efer;
u64 apic_base; u64 apic_base;
struct kvm_lapic *apic; /* kernel irqchip context */ struct kvm_lapic *apic; /* kernel irqchip context */
int32_t apic_arb_prio;
int mp_state; int mp_state;
int sipi_vector; int sipi_vector;
u64 ia32_misc_enable_msr; u64 ia32_misc_enable_msr;
@ -320,6 +319,8 @@ struct kvm_vcpu_arch {
struct kvm_pio_request pio; struct kvm_pio_request pio;
void *pio_data; void *pio_data;
u8 event_exit_inst_len;
struct kvm_queued_exception { struct kvm_queued_exception {
bool pending; bool pending;
bool has_error_code; bool has_error_code;
@ -329,11 +330,12 @@ struct kvm_vcpu_arch {
struct kvm_queued_interrupt { struct kvm_queued_interrupt {
bool pending; bool pending;
bool soft;
u8 nr; u8 nr;
} interrupt; } interrupt;
struct { struct {
int active; int vm86_active;
u8 save_iopl; u8 save_iopl;
struct kvm_save_segment { struct kvm_save_segment {
u16 selector; u16 selector;
@ -356,9 +358,9 @@ struct kvm_vcpu_arch {
unsigned int time_offset; unsigned int time_offset;
struct page *time_page; struct page *time_page;
bool singlestep; /* guest is single stepped by KVM */
bool nmi_pending; bool nmi_pending;
bool nmi_injected; bool nmi_injected;
bool nmi_window_open;
struct mtrr_state_type mtrr_state; struct mtrr_state_type mtrr_state;
u32 pat; u32 pat;
@ -392,15 +394,14 @@ struct kvm_arch{
*/ */
struct list_head active_mmu_pages; struct list_head active_mmu_pages;
struct list_head assigned_dev_head; struct list_head assigned_dev_head;
struct list_head oos_global_pages;
struct iommu_domain *iommu_domain; struct iommu_domain *iommu_domain;
int iommu_flags;
struct kvm_pic *vpic; struct kvm_pic *vpic;
struct kvm_ioapic *vioapic; struct kvm_ioapic *vioapic;
struct kvm_pit *vpit; struct kvm_pit *vpit;
struct hlist_head irq_ack_notifier_list; struct hlist_head irq_ack_notifier_list;
int vapics_in_nmi_mode; int vapics_in_nmi_mode;
int round_robin_prev_vcpu;
unsigned int tss_addr; unsigned int tss_addr;
struct page *apic_access_page; struct page *apic_access_page;
@ -423,7 +424,6 @@ struct kvm_vm_stat {
u32 mmu_recycled; u32 mmu_recycled;
u32 mmu_cache_miss; u32 mmu_cache_miss;
u32 mmu_unsync; u32 mmu_unsync;
u32 mmu_unsync_global;
u32 remote_tlb_flush; u32 remote_tlb_flush;
u32 lpages; u32 lpages;
}; };
@ -443,7 +443,6 @@ struct kvm_vcpu_stat {
u32 halt_exits; u32 halt_exits;
u32 halt_wakeup; u32 halt_wakeup;
u32 request_irq_exits; u32 request_irq_exits;
u32 request_nmi_exits;
u32 irq_exits; u32 irq_exits;
u32 host_state_reload; u32 host_state_reload;
u32 efer_reload; u32 efer_reload;
@ -511,20 +510,22 @@ struct kvm_x86_ops {
void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu); int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu);
void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
void (*patch_hypercall)(struct kvm_vcpu *vcpu, void (*patch_hypercall)(struct kvm_vcpu *vcpu,
unsigned char *hypercall_addr); unsigned char *hypercall_addr);
int (*get_irq)(struct kvm_vcpu *vcpu); void (*set_irq)(struct kvm_vcpu *vcpu);
void (*set_irq)(struct kvm_vcpu *vcpu, int vec); void (*set_nmi)(struct kvm_vcpu *vcpu);
void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
bool has_error_code, u32 error_code); bool has_error_code, u32 error_code);
bool (*exception_injected)(struct kvm_vcpu *vcpu); int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
void (*inject_pending_irq)(struct kvm_vcpu *vcpu); int (*nmi_allowed)(struct kvm_vcpu *vcpu);
void (*inject_pending_vectors)(struct kvm_vcpu *vcpu, void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
struct kvm_run *run); void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*get_tdp_level)(void); int (*get_tdp_level)(void);
int (*get_mt_mask_shift)(void); u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
}; };
extern struct kvm_x86_ops *kvm_x86_ops; extern struct kvm_x86_ops *kvm_x86_ops;
@ -538,7 +539,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu);
void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
void kvm_mmu_set_base_ptes(u64 base_pte); void kvm_mmu_set_base_ptes(u64 base_pte);
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask); u64 dirty_mask, u64 nx_mask, u64 x_mask);
int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
@ -552,6 +553,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
const void *val, int bytes); const void *val, int bytes);
int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
gpa_t addr, unsigned long *ret); gpa_t addr, unsigned long *ret);
u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
extern bool tdp_enabled; extern bool tdp_enabled;
@ -563,6 +565,7 @@ enum emulation_result {
#define EMULTYPE_NO_DECODE (1 << 0) #define EMULTYPE_NO_DECODE (1 << 0)
#define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_TRAP_UD (1 << 1)
#define EMULTYPE_SKIP (1 << 2)
int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run, int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run,
unsigned long cr2, u16 error_code, int emulation_type); unsigned long cr2, u16 error_code, int emulation_type);
void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context);
@ -638,7 +641,6 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
int kvm_mmu_load(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu); void kvm_mmu_unload(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_global(struct kvm_vcpu *vcpu);
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
@ -769,6 +771,8 @@ enum {
#define HF_GIF_MASK (1 << 0) #define HF_GIF_MASK (1 << 0)
#define HF_HIF_MASK (1 << 1) #define HF_HIF_MASK (1 << 1)
#define HF_VINTR_MASK (1 << 2) #define HF_VINTR_MASK (1 << 2)
#define HF_NMI_MASK (1 << 3)
#define HF_IRET_MASK (1 << 4)
/* /*
* Hardware virtualization extension instructions may fault if a * Hardware virtualization extension instructions may fault if a
@ -791,5 +795,6 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
#define KVM_ARCH_WANT_MMU_NOTIFIER #define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
int kvm_age_hva(struct kvm *kvm, unsigned long hva); int kvm_age_hva(struct kvm *kvm, unsigned long hva);
int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
#endif /* _ASM_X86_KVM_HOST_H */ #endif /* _ASM_X86_KVM_HOST_H */

View File

@ -143,6 +143,9 @@ struct decode_cache {
struct fetch_cache fetch; struct fetch_cache fetch;
}; };
#define X86_SHADOW_INT_MOV_SS 1
#define X86_SHADOW_INT_STI 2
struct x86_emulate_ctxt { struct x86_emulate_ctxt {
/* Register state before/after emulation. */ /* Register state before/after emulation. */
struct kvm_vcpu *vcpu; struct kvm_vcpu *vcpu;
@ -152,6 +155,9 @@ struct x86_emulate_ctxt {
int mode; int mode;
u32 cs_base; u32 cs_base;
/* interruptibility state, as a result of execution of STI or MOV SS */
int interruptibility;
/* decode cache */ /* decode cache */
struct decode_cache decode; struct decode_cache decode;
}; };

View File

@ -225,6 +225,7 @@ struct __attribute__ ((__packed__)) vmcb {
#define SVM_EVTINJ_VALID_ERR (1 << 11) #define SVM_EVTINJ_VALID_ERR (1 << 11)
#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK #define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK
#define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR #define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
#define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI #define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI

View File

@ -247,6 +247,7 @@ enum vmcs_field {
#define EXIT_REASON_MSR_READ 31 #define EXIT_REASON_MSR_READ 31
#define EXIT_REASON_MSR_WRITE 32 #define EXIT_REASON_MSR_WRITE 32
#define EXIT_REASON_MWAIT_INSTRUCTION 36 #define EXIT_REASON_MWAIT_INSTRUCTION 36
#define EXIT_REASON_MCE_DURING_VMENTRY 41
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43
#define EXIT_REASON_APIC_ACCESS 44 #define EXIT_REASON_APIC_ACCESS 44
#define EXIT_REASON_EPT_VIOLATION 48 #define EXIT_REASON_EPT_VIOLATION 48

View File

@ -420,6 +420,7 @@ void do_machine_check(struct pt_regs * regs, long error_code)
out2: out2:
atomic_dec(&mce_entry); atomic_dec(&mce_entry);
} }
EXPORT_SYMBOL_GPL(do_machine_check);
#ifdef CONFIG_X86_MCE_INTEL #ifdef CONFIG_X86_MCE_INTEL
/*** /***

View File

@ -27,6 +27,7 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/highmem.h> #include <linux/highmem.h>
#include <linux/hardirq.h> #include <linux/hardirq.h>
#include <asm/timer.h>
#define MMU_QUEUE_SIZE 1024 #define MMU_QUEUE_SIZE 1024
@ -230,6 +231,9 @@ static void paravirt_ops_setup(void)
pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu; pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu;
pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu; pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu;
} }
#ifdef CONFIG_X86_IO_APIC
no_timer_check = 1;
#endif
} }
void __init kvm_guest_init(void) void __init kvm_guest_init(void)

View File

@ -172,6 +172,9 @@ void smp_reschedule_interrupt(struct pt_regs *regs)
{ {
ack_APIC_irq(); ack_APIC_irq();
inc_irq_stat(irq_resched_count); inc_irq_stat(irq_resched_count);
/*
* KVM uses this interrupt to force a cpu out of guest mode
*/
} }
void smp_call_function_interrupt(struct pt_regs *regs) void smp_call_function_interrupt(struct pt_regs *regs)

View File

@ -50,6 +50,9 @@ config KVM_INTEL
Provides support for KVM on Intel processors equipped with the VT Provides support for KVM on Intel processors equipped with the VT
extensions. extensions.
To compile this as a module, choose M here: the module
will be called kvm-intel.
config KVM_AMD config KVM_AMD
tristate "KVM for AMD processors support" tristate "KVM for AMD processors support"
depends on KVM depends on KVM
@ -57,6 +60,9 @@ config KVM_AMD
Provides support for KVM on AMD processors equipped with the AMD-V Provides support for KVM on AMD processors equipped with the AMD-V
(SVM) extensions. (SVM) extensions.
To compile this as a module, choose M here: the module
will be called kvm-amd.
config KVM_TRACE config KVM_TRACE
bool "KVM trace support" bool "KVM trace support"
depends on KVM && SYSFS depends on KVM && SYSFS

View File

@ -14,7 +14,7 @@ endif
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \ kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
i8254.o i8254.o timer.o
obj-$(CONFIG_KVM) += kvm.o obj-$(CONFIG_KVM) += kvm.o
kvm-intel-objs = vmx.o kvm-intel-objs = vmx.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o

View File

@ -98,6 +98,37 @@ static int pit_get_gate(struct kvm *kvm, int channel)
return kvm->arch.vpit->pit_state.channels[channel].gate; return kvm->arch.vpit->pit_state.channels[channel].gate;
} }
static s64 __kpit_elapsed(struct kvm *kvm)
{
s64 elapsed;
ktime_t remaining;
struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
/*
* The Counter does not stop when it reaches zero. In
* Modes 0, 1, 4, and 5 the Counter ``wraps around'' to
* the highest count, either FFFF hex for binary counting
* or 9999 for BCD counting, and continues counting.
* Modes 2 and 3 are periodic; the Counter reloads
* itself with the initial count and continues counting
* from there.
*/
remaining = hrtimer_expires_remaining(&ps->pit_timer.timer);
elapsed = ps->pit_timer.period - ktime_to_ns(remaining);
elapsed = mod_64(elapsed, ps->pit_timer.period);
return elapsed;
}
static s64 kpit_elapsed(struct kvm *kvm, struct kvm_kpit_channel_state *c,
int channel)
{
if (channel == 0)
return __kpit_elapsed(kvm);
return ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
}
static int pit_get_count(struct kvm *kvm, int channel) static int pit_get_count(struct kvm *kvm, int channel)
{ {
struct kvm_kpit_channel_state *c = struct kvm_kpit_channel_state *c =
@ -107,7 +138,7 @@ static int pit_get_count(struct kvm *kvm, int channel)
WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time)); t = kpit_elapsed(kvm, c, channel);
d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC); d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
switch (c->mode) { switch (c->mode) {
@ -137,7 +168,7 @@ static int pit_get_out(struct kvm *kvm, int channel)
WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time)); t = kpit_elapsed(kvm, c, channel);
d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC); d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
switch (c->mode) { switch (c->mode) {
@ -193,28 +224,6 @@ static void pit_latch_status(struct kvm *kvm, int channel)
} }
} }
static int __pit_timer_fn(struct kvm_kpit_state *ps)
{
struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0];
struct kvm_kpit_timer *pt = &ps->pit_timer;
if (!atomic_inc_and_test(&pt->pending))
set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
if (!pt->reinject)
atomic_set(&pt->pending, 1);
if (vcpu0 && waitqueue_active(&vcpu0->wq))
wake_up_interruptible(&vcpu0->wq);
hrtimer_add_expires_ns(&pt->timer, pt->period);
pt->scheduled = hrtimer_get_expires_ns(&pt->timer);
if (pt->period)
ps->channels[0].count_load_time = ktime_get();
return (pt->period == 0 ? 0 : 1);
}
int pit_has_pending_timer(struct kvm_vcpu *vcpu) int pit_has_pending_timer(struct kvm_vcpu *vcpu)
{ {
struct kvm_pit *pit = vcpu->kvm->arch.vpit; struct kvm_pit *pit = vcpu->kvm->arch.vpit;
@ -235,21 +244,6 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
spin_unlock(&ps->inject_lock); spin_unlock(&ps->inject_lock);
} }
static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
{
struct kvm_kpit_state *ps;
int restart_timer = 0;
ps = container_of(data, struct kvm_kpit_state, pit_timer.timer);
restart_timer = __pit_timer_fn(ps);
if (restart_timer)
return HRTIMER_RESTART;
else
return HRTIMER_NORESTART;
}
void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
{ {
struct kvm_pit *pit = vcpu->kvm->arch.vpit; struct kvm_pit *pit = vcpu->kvm->arch.vpit;
@ -263,15 +257,26 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
hrtimer_start_expires(timer, HRTIMER_MODE_ABS); hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
} }
static void destroy_pit_timer(struct kvm_kpit_timer *pt) static void destroy_pit_timer(struct kvm_timer *pt)
{ {
pr_debug("pit: execute del timer!\n"); pr_debug("pit: execute del timer!\n");
hrtimer_cancel(&pt->timer); hrtimer_cancel(&pt->timer);
} }
static bool kpit_is_periodic(struct kvm_timer *ktimer)
{
struct kvm_kpit_state *ps = container_of(ktimer, struct kvm_kpit_state,
pit_timer);
return ps->is_periodic;
}
static struct kvm_timer_ops kpit_ops = {
.is_periodic = kpit_is_periodic,
};
static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
{ {
struct kvm_kpit_timer *pt = &ps->pit_timer; struct kvm_timer *pt = &ps->pit_timer;
s64 interval; s64 interval;
interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
@ -280,8 +285,14 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
/* TODO The new value only affected after the retriggered */ /* TODO The new value only affected after the retriggered */
hrtimer_cancel(&pt->timer); hrtimer_cancel(&pt->timer);
pt->period = (is_period == 0) ? 0 : interval; pt->period = interval;
pt->timer.function = pit_timer_fn; ps->is_periodic = is_period;
pt->timer.function = kvm_timer_fn;
pt->t_ops = &kpit_ops;
pt->kvm = ps->pit->kvm;
pt->vcpu_id = 0;
atomic_set(&pt->pending, 0); atomic_set(&pt->pending, 0);
ps->irq_ack = 1; ps->irq_ack = 1;
@ -298,23 +309,23 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
pr_debug("pit: load_count val is %d, channel is %d\n", val, channel); pr_debug("pit: load_count val is %d, channel is %d\n", val, channel);
/* /*
* Though spec said the state of 8254 is undefined after power-up, * The largest possible initial count is 0; this is equivalent
* seems some tricky OS like Windows XP depends on IRQ0 interrupt * to 216 for binary counting and 104 for BCD counting.
* when booting up.
* So here setting initialize rate for it, and not a specific number
*/ */
if (val == 0) if (val == 0)
val = 0x10000; val = 0x10000;
ps->channels[channel].count_load_time = ktime_get();
ps->channels[channel].count = val; ps->channels[channel].count = val;
if (channel != 0) if (channel != 0) {
ps->channels[channel].count_load_time = ktime_get();
return; return;
}
/* Two types of timer /* Two types of timer
* mode 1 is one shot, mode 2 is period, otherwise del timer */ * mode 1 is one shot, mode 2 is period, otherwise del timer */
switch (ps->channels[0].mode) { switch (ps->channels[0].mode) {
case 0:
case 1: case 1:
/* FIXME: enhance mode 4 precision */ /* FIXME: enhance mode 4 precision */
case 4: case 4:

View File

@ -3,15 +3,6 @@
#include "iodev.h" #include "iodev.h"
struct kvm_kpit_timer {
struct hrtimer timer;
int irq;
s64 period; /* unit: ns */
s64 scheduled;
atomic_t pending;
bool reinject;
};
struct kvm_kpit_channel_state { struct kvm_kpit_channel_state {
u32 count; /* can be 65536 */ u32 count; /* can be 65536 */
u16 latched_count; u16 latched_count;
@ -30,7 +21,8 @@ struct kvm_kpit_channel_state {
struct kvm_kpit_state { struct kvm_kpit_state {
struct kvm_kpit_channel_state channels[3]; struct kvm_kpit_channel_state channels[3];
struct kvm_kpit_timer pit_timer; struct kvm_timer pit_timer;
bool is_periodic;
u32 speaker_data_on; u32 speaker_data_on;
struct mutex lock; struct mutex lock;
struct kvm_pit *pit; struct kvm_pit *pit;

View File

@ -24,6 +24,7 @@
#include "irq.h" #include "irq.h"
#include "i8254.h" #include "i8254.h"
#include "x86.h"
/* /*
* check if there are pending timer events * check if there are pending timer events
@ -48,6 +49,9 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
{ {
struct kvm_pic *s; struct kvm_pic *s;
if (!irqchip_in_kernel(v->kvm))
return v->arch.interrupt.pending;
if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */ if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */
if (kvm_apic_accept_pic_intr(v)) { if (kvm_apic_accept_pic_intr(v)) {
s = pic_irqchip(v->kvm); /* PIC */ s = pic_irqchip(v->kvm); /* PIC */
@ -67,6 +71,9 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
struct kvm_pic *s; struct kvm_pic *s;
int vector; int vector;
if (!irqchip_in_kernel(v->kvm))
return v->arch.interrupt.nr;
vector = kvm_get_apic_interrupt(v); /* APIC */ vector = kvm_get_apic_interrupt(v); /* APIC */
if (vector == -1) { if (vector == -1) {
if (kvm_apic_accept_pic_intr(v)) { if (kvm_apic_accept_pic_intr(v)) {

18
arch/x86/kvm/kvm_timer.h Normal file
View File

@ -0,0 +1,18 @@
struct kvm_timer {
struct hrtimer timer;
s64 period; /* unit: ns */
atomic_t pending; /* accumulated triggered timers */
bool reinject;
struct kvm_timer_ops *t_ops;
struct kvm *kvm;
int vcpu_id;
};
struct kvm_timer_ops {
bool (*is_periodic)(struct kvm_timer *);
};
enum hrtimer_restart kvm_timer_fn(struct hrtimer *data);

View File

@ -196,20 +196,15 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
} }
EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr); EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig) static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
int vector, int level, int trig_mode);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
{ {
struct kvm_lapic *apic = vcpu->arch.apic; struct kvm_lapic *apic = vcpu->arch.apic;
if (!apic_test_and_set_irr(vec, apic)) { return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
/* a new pending irq is set in IRR */ irq->level, irq->trig_mode);
if (trig)
apic_set_vector(vec, apic->regs + APIC_TMR);
else
apic_clear_vector(vec, apic->regs + APIC_TMR);
kvm_vcpu_kick(apic->vcpu);
return 1;
}
return 0;
} }
static inline int apic_find_highest_isr(struct kvm_lapic *apic) static inline int apic_find_highest_isr(struct kvm_lapic *apic)
@ -250,7 +245,7 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
{ {
return kvm_apic_id(apic) == dest; return dest == 0xff || kvm_apic_id(apic) == dest;
} }
int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
@ -279,37 +274,34 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
return result; return result;
} }
static int apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
int short_hand, int dest, int dest_mode) int short_hand, int dest, int dest_mode)
{ {
int result = 0; int result = 0;
struct kvm_lapic *target = vcpu->arch.apic; struct kvm_lapic *target = vcpu->arch.apic;
apic_debug("target %p, source %p, dest 0x%x, " apic_debug("target %p, source %p, dest 0x%x, "
"dest_mode 0x%x, short_hand 0x%x", "dest_mode 0x%x, short_hand 0x%x\n",
target, source, dest, dest_mode, short_hand); target, source, dest, dest_mode, short_hand);
ASSERT(!target); ASSERT(!target);
switch (short_hand) { switch (short_hand) {
case APIC_DEST_NOSHORT: case APIC_DEST_NOSHORT:
if (dest_mode == 0) { if (dest_mode == 0)
/* Physical mode. */ /* Physical mode. */
if ((dest == 0xFF) || (dest == kvm_apic_id(target))) result = kvm_apic_match_physical_addr(target, dest);
result = 1; else
} else
/* Logical mode. */ /* Logical mode. */
result = kvm_apic_match_logical_addr(target, dest); result = kvm_apic_match_logical_addr(target, dest);
break; break;
case APIC_DEST_SELF: case APIC_DEST_SELF:
if (target == source) result = (target == source);
result = 1;
break; break;
case APIC_DEST_ALLINC: case APIC_DEST_ALLINC:
result = 1; result = 1;
break; break;
case APIC_DEST_ALLBUT: case APIC_DEST_ALLBUT:
if (target != source) result = (target != source);
result = 1;
break; break;
default: default:
printk(KERN_WARNING "Bad dest shorthand value %x\n", printk(KERN_WARNING "Bad dest shorthand value %x\n",
@ -327,20 +319,22 @@ static int apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
int vector, int level, int trig_mode) int vector, int level, int trig_mode)
{ {
int orig_irr, result = 0; int result = 0;
struct kvm_vcpu *vcpu = apic->vcpu; struct kvm_vcpu *vcpu = apic->vcpu;
switch (delivery_mode) { switch (delivery_mode) {
case APIC_DM_FIXED:
case APIC_DM_LOWEST: case APIC_DM_LOWEST:
vcpu->arch.apic_arb_prio++;
case APIC_DM_FIXED:
/* FIXME add logic for vcpu on reset */ /* FIXME add logic for vcpu on reset */
if (unlikely(!apic_enabled(apic))) if (unlikely(!apic_enabled(apic)))
break; break;
orig_irr = apic_test_and_set_irr(vector, apic); result = !apic_test_and_set_irr(vector, apic);
if (orig_irr && trig_mode) { if (!result) {
apic_debug("level trig mode repeatedly for vector %d", if (trig_mode)
vector); apic_debug("level trig mode repeatedly for "
"vector %d", vector);
break; break;
} }
@ -349,10 +343,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
apic_set_vector(vector, apic->regs + APIC_TMR); apic_set_vector(vector, apic->regs + APIC_TMR);
} else } else
apic_clear_vector(vector, apic->regs + APIC_TMR); apic_clear_vector(vector, apic->regs + APIC_TMR);
kvm_vcpu_kick(vcpu); kvm_vcpu_kick(vcpu);
result = (orig_irr == 0);
break; break;
case APIC_DM_REMRD: case APIC_DM_REMRD:
@ -364,12 +355,14 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
break; break;
case APIC_DM_NMI: case APIC_DM_NMI:
result = 1;
kvm_inject_nmi(vcpu); kvm_inject_nmi(vcpu);
kvm_vcpu_kick(vcpu); kvm_vcpu_kick(vcpu);
break; break;
case APIC_DM_INIT: case APIC_DM_INIT:
if (level) { if (level) {
result = 1;
if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
printk(KERN_DEBUG printk(KERN_DEBUG
"INIT on a runnable vcpu %d\n", "INIT on a runnable vcpu %d\n",
@ -386,6 +379,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
apic_debug("SIPI to vcpu %d vector 0x%02x\n", apic_debug("SIPI to vcpu %d vector 0x%02x\n",
vcpu->vcpu_id, vector); vcpu->vcpu_id, vector);
if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
result = 1;
vcpu->arch.sipi_vector = vector; vcpu->arch.sipi_vector = vector;
vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
kvm_vcpu_kick(vcpu); kvm_vcpu_kick(vcpu);
@ -408,43 +402,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
return result; return result;
} }
static struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector, int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
unsigned long bitmap)
{ {
int last; return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
int next;
struct kvm_lapic *apic = NULL;
last = kvm->arch.round_robin_prev_vcpu;
next = last;
do {
if (++next == KVM_MAX_VCPUS)
next = 0;
if (kvm->vcpus[next] == NULL || !test_bit(next, &bitmap))
continue;
apic = kvm->vcpus[next]->arch.apic;
if (apic && apic_enabled(apic))
break;
apic = NULL;
} while (next != last);
kvm->arch.round_robin_prev_vcpu = next;
if (!apic)
printk(KERN_DEBUG "vcpu not ready for apic_round_robin\n");
return apic;
}
struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
unsigned long bitmap)
{
struct kvm_lapic *apic;
apic = kvm_apic_round_robin(kvm, vector, bitmap);
if (apic)
return apic->vcpu;
return NULL;
} }
static void apic_set_eoi(struct kvm_lapic *apic) static void apic_set_eoi(struct kvm_lapic *apic)
@ -472,47 +432,24 @@ static void apic_send_ipi(struct kvm_lapic *apic)
{ {
u32 icr_low = apic_get_reg(apic, APIC_ICR); u32 icr_low = apic_get_reg(apic, APIC_ICR);
u32 icr_high = apic_get_reg(apic, APIC_ICR2); u32 icr_high = apic_get_reg(apic, APIC_ICR2);
struct kvm_lapic_irq irq;
unsigned int dest = GET_APIC_DEST_FIELD(icr_high); irq.vector = icr_low & APIC_VECTOR_MASK;
unsigned int short_hand = icr_low & APIC_SHORT_MASK; irq.delivery_mode = icr_low & APIC_MODE_MASK;
unsigned int trig_mode = icr_low & APIC_INT_LEVELTRIG; irq.dest_mode = icr_low & APIC_DEST_MASK;
unsigned int level = icr_low & APIC_INT_ASSERT; irq.level = icr_low & APIC_INT_ASSERT;
unsigned int dest_mode = icr_low & APIC_DEST_MASK; irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
unsigned int delivery_mode = icr_low & APIC_MODE_MASK; irq.shorthand = icr_low & APIC_SHORT_MASK;
unsigned int vector = icr_low & APIC_VECTOR_MASK; irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
struct kvm_vcpu *target;
struct kvm_vcpu *vcpu;
unsigned long lpr_map = 0;
int i;
apic_debug("icr_high 0x%x, icr_low 0x%x, " apic_debug("icr_high 0x%x, icr_low 0x%x, "
"short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, " "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
"dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n", "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n",
icr_high, icr_low, short_hand, dest, icr_high, icr_low, irq.shorthand, irq.dest_id,
trig_mode, level, dest_mode, delivery_mode, vector); irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
irq.vector);
for (i = 0; i < KVM_MAX_VCPUS; i++) { kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
vcpu = apic->vcpu->kvm->vcpus[i];
if (!vcpu)
continue;
if (vcpu->arch.apic &&
apic_match_dest(vcpu, apic, short_hand, dest, dest_mode)) {
if (delivery_mode == APIC_DM_LOWEST)
set_bit(vcpu->vcpu_id, &lpr_map);
else
__apic_accept_irq(vcpu->arch.apic, delivery_mode,
vector, level, trig_mode);
}
}
if (delivery_mode == APIC_DM_LOWEST) {
target = kvm_get_lowest_prio_vcpu(vcpu->kvm, vector, lpr_map);
if (target != NULL)
__apic_accept_irq(target->arch.apic, delivery_mode,
vector, level, trig_mode);
}
} }
static u32 apic_get_tmcct(struct kvm_lapic *apic) static u32 apic_get_tmcct(struct kvm_lapic *apic)
@ -527,12 +464,13 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
if (apic_get_reg(apic, APIC_TMICT) == 0) if (apic_get_reg(apic, APIC_TMICT) == 0)
return 0; return 0;
remaining = hrtimer_expires_remaining(&apic->timer.dev); remaining = hrtimer_expires_remaining(&apic->lapic_timer.timer);
if (ktime_to_ns(remaining) < 0) if (ktime_to_ns(remaining) < 0)
remaining = ktime_set(0, 0); remaining = ktime_set(0, 0);
ns = mod_64(ktime_to_ns(remaining), apic->timer.period); ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
tmcct = div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->timer.divide_count)); tmcct = div64_u64(ns,
(APIC_BUS_CYCLE_NS * apic->divide_count));
return tmcct; return tmcct;
} }
@ -619,25 +557,25 @@ static void update_divide_count(struct kvm_lapic *apic)
tdcr = apic_get_reg(apic, APIC_TDCR); tdcr = apic_get_reg(apic, APIC_TDCR);
tmp1 = tdcr & 0xf; tmp1 = tdcr & 0xf;
tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
apic->timer.divide_count = 0x1 << (tmp2 & 0x7); apic->divide_count = 0x1 << (tmp2 & 0x7);
apic_debug("timer divide count is 0x%x\n", apic_debug("timer divide count is 0x%x\n",
apic->timer.divide_count); apic->divide_count);
} }
static void start_apic_timer(struct kvm_lapic *apic) static void start_apic_timer(struct kvm_lapic *apic)
{ {
ktime_t now = apic->timer.dev.base->get_time(); ktime_t now = apic->lapic_timer.timer.base->get_time();
apic->timer.period = apic_get_reg(apic, APIC_TMICT) * apic->lapic_timer.period = apic_get_reg(apic, APIC_TMICT) *
APIC_BUS_CYCLE_NS * apic->timer.divide_count; APIC_BUS_CYCLE_NS * apic->divide_count;
atomic_set(&apic->timer.pending, 0); atomic_set(&apic->lapic_timer.pending, 0);
if (!apic->timer.period) if (!apic->lapic_timer.period)
return; return;
hrtimer_start(&apic->timer.dev, hrtimer_start(&apic->lapic_timer.timer,
ktime_add_ns(now, apic->timer.period), ktime_add_ns(now, apic->lapic_timer.period),
HRTIMER_MODE_ABS); HRTIMER_MODE_ABS);
apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
@ -646,9 +584,9 @@ static void start_apic_timer(struct kvm_lapic *apic)
"expire @ 0x%016" PRIx64 ".\n", __func__, "expire @ 0x%016" PRIx64 ".\n", __func__,
APIC_BUS_CYCLE_NS, ktime_to_ns(now), APIC_BUS_CYCLE_NS, ktime_to_ns(now),
apic_get_reg(apic, APIC_TMICT), apic_get_reg(apic, APIC_TMICT),
apic->timer.period, apic->lapic_timer.period,
ktime_to_ns(ktime_add_ns(now, ktime_to_ns(ktime_add_ns(now,
apic->timer.period))); apic->lapic_timer.period)));
} }
static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
@ -730,7 +668,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
apic_set_reg(apic, APIC_LVTT + 0x10 * i, apic_set_reg(apic, APIC_LVTT + 0x10 * i,
lvt_val | APIC_LVT_MASKED); lvt_val | APIC_LVT_MASKED);
} }
atomic_set(&apic->timer.pending, 0); atomic_set(&apic->lapic_timer.pending, 0);
} }
break; break;
@ -762,7 +700,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
break; break;
case APIC_TMICT: case APIC_TMICT:
hrtimer_cancel(&apic->timer.dev); hrtimer_cancel(&apic->lapic_timer.timer);
apic_set_reg(apic, APIC_TMICT, val); apic_set_reg(apic, APIC_TMICT, val);
start_apic_timer(apic); start_apic_timer(apic);
return; return;
@ -802,7 +740,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
if (!vcpu->arch.apic) if (!vcpu->arch.apic)
return; return;
hrtimer_cancel(&vcpu->arch.apic->timer.dev); hrtimer_cancel(&vcpu->arch.apic->lapic_timer.timer);
if (vcpu->arch.apic->regs_page) if (vcpu->arch.apic->regs_page)
__free_page(vcpu->arch.apic->regs_page); __free_page(vcpu->arch.apic->regs_page);
@ -880,7 +818,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
ASSERT(apic != NULL); ASSERT(apic != NULL);
/* Stop the timer in case it's a reset to an active apic */ /* Stop the timer in case it's a reset to an active apic */
hrtimer_cancel(&apic->timer.dev); hrtimer_cancel(&apic->lapic_timer.timer);
apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24); apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24);
apic_set_reg(apic, APIC_LVR, APIC_VERSION); apic_set_reg(apic, APIC_LVR, APIC_VERSION);
@ -905,11 +843,13 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
} }
update_divide_count(apic); update_divide_count(apic);
atomic_set(&apic->timer.pending, 0); atomic_set(&apic->lapic_timer.pending, 0);
if (vcpu->vcpu_id == 0) if (vcpu->vcpu_id == 0)
vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
apic_update_ppr(apic); apic_update_ppr(apic);
vcpu->arch.apic_arb_prio = 0;
apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr="
"0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
vcpu, kvm_apic_id(apic), vcpu, kvm_apic_id(apic),
@ -917,16 +857,14 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
} }
EXPORT_SYMBOL_GPL(kvm_lapic_reset); EXPORT_SYMBOL_GPL(kvm_lapic_reset);
bool kvm_apic_present(struct kvm_vcpu *vcpu)
{
return vcpu->arch.apic && apic_hw_enabled(vcpu->arch.apic);
}
int kvm_lapic_enabled(struct kvm_vcpu *vcpu) int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
{ {
struct kvm_lapic *apic = vcpu->arch.apic; return kvm_apic_present(vcpu) && apic_sw_enabled(vcpu->arch.apic);
int ret = 0;
if (!apic)
return 0;
ret = apic_enabled(apic);
return ret;
} }
EXPORT_SYMBOL_GPL(kvm_lapic_enabled); EXPORT_SYMBOL_GPL(kvm_lapic_enabled);
@ -936,22 +874,11 @@ EXPORT_SYMBOL_GPL(kvm_lapic_enabled);
*---------------------------------------------------------------------- *----------------------------------------------------------------------
*/ */
/* TODO: make sure __apic_timer_fn runs in current pCPU */ static bool lapic_is_periodic(struct kvm_timer *ktimer)
static int __apic_timer_fn(struct kvm_lapic *apic)
{ {
int result = 0; struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic,
wait_queue_head_t *q = &apic->vcpu->wq; lapic_timer);
return apic_lvtt_period(apic);
if(!atomic_inc_and_test(&apic->timer.pending))
set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests);
if (waitqueue_active(q))
wake_up_interruptible(q);
if (apic_lvtt_period(apic)) {
result = 1;
hrtimer_add_expires_ns(&apic->timer.dev, apic->timer.period);
}
return result;
} }
int apic_has_pending_timer(struct kvm_vcpu *vcpu) int apic_has_pending_timer(struct kvm_vcpu *vcpu)
@ -959,7 +886,7 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu)
struct kvm_lapic *lapic = vcpu->arch.apic; struct kvm_lapic *lapic = vcpu->arch.apic;
if (lapic && apic_enabled(lapic) && apic_lvt_enabled(lapic, APIC_LVTT)) if (lapic && apic_enabled(lapic) && apic_lvt_enabled(lapic, APIC_LVTT))
return atomic_read(&lapic->timer.pending); return atomic_read(&lapic->lapic_timer.pending);
return 0; return 0;
} }
@ -986,20 +913,9 @@ void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
kvm_apic_local_deliver(apic, APIC_LVT0); kvm_apic_local_deliver(apic, APIC_LVT0);
} }
static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) static struct kvm_timer_ops lapic_timer_ops = {
{ .is_periodic = lapic_is_periodic,
struct kvm_lapic *apic; };
int restart_timer = 0;
apic = container_of(data, struct kvm_lapic, timer.dev);
restart_timer = __apic_timer_fn(apic);
if (restart_timer)
return HRTIMER_RESTART;
else
return HRTIMER_NORESTART;
}
int kvm_create_lapic(struct kvm_vcpu *vcpu) int kvm_create_lapic(struct kvm_vcpu *vcpu)
{ {
@ -1024,8 +940,13 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
memset(apic->regs, 0, PAGE_SIZE); memset(apic->regs, 0, PAGE_SIZE);
apic->vcpu = vcpu; apic->vcpu = vcpu;
hrtimer_init(&apic->timer.dev, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
apic->timer.dev.function = apic_timer_fn; HRTIMER_MODE_ABS);
apic->lapic_timer.timer.function = kvm_timer_fn;
apic->lapic_timer.t_ops = &lapic_timer_ops;
apic->lapic_timer.kvm = vcpu->kvm;
apic->lapic_timer.vcpu_id = vcpu->vcpu_id;
apic->base_address = APIC_DEFAULT_PHYS_BASE; apic->base_address = APIC_DEFAULT_PHYS_BASE;
vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE; vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE;
@ -1078,9 +999,9 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
{ {
struct kvm_lapic *apic = vcpu->arch.apic; struct kvm_lapic *apic = vcpu->arch.apic;
if (apic && atomic_read(&apic->timer.pending) > 0) { if (apic && atomic_read(&apic->lapic_timer.pending) > 0) {
if (kvm_apic_local_deliver(apic, APIC_LVTT)) if (kvm_apic_local_deliver(apic, APIC_LVTT))
atomic_dec(&apic->timer.pending); atomic_dec(&apic->lapic_timer.pending);
} }
} }
@ -1106,7 +1027,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
MSR_IA32_APICBASE_BASE; MSR_IA32_APICBASE_BASE;
apic_set_reg(apic, APIC_LVR, APIC_VERSION); apic_set_reg(apic, APIC_LVR, APIC_VERSION);
apic_update_ppr(apic); apic_update_ppr(apic);
hrtimer_cancel(&apic->timer.dev); hrtimer_cancel(&apic->lapic_timer.timer);
update_divide_count(apic); update_divide_count(apic);
start_apic_timer(apic); start_apic_timer(apic);
} }
@ -1119,7 +1040,7 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
if (!apic) if (!apic)
return; return;
timer = &apic->timer.dev; timer = &apic->lapic_timer.timer;
if (hrtimer_cancel(timer)) if (hrtimer_cancel(timer))
hrtimer_start_expires(timer, HRTIMER_MODE_ABS); hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
} }

View File

@ -2,18 +2,15 @@
#define __KVM_X86_LAPIC_H #define __KVM_X86_LAPIC_H
#include "iodev.h" #include "iodev.h"
#include "kvm_timer.h"
#include <linux/kvm_host.h> #include <linux/kvm_host.h>
struct kvm_lapic { struct kvm_lapic {
unsigned long base_address; unsigned long base_address;
struct kvm_io_device dev; struct kvm_io_device dev;
struct { struct kvm_timer lapic_timer;
atomic_t pending; u32 divide_count;
s64 period; /* unit: ns */
u32 divide_count;
struct hrtimer dev;
} timer;
struct kvm_vcpu *vcpu; struct kvm_vcpu *vcpu;
struct page *regs_page; struct page *regs_page;
void *regs; void *regs;
@ -34,12 +31,13 @@ u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig); int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu); void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
int kvm_lapic_enabled(struct kvm_vcpu *vcpu); int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
bool kvm_apic_present(struct kvm_vcpu *vcpu);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);

View File

@ -126,6 +126,7 @@ module_param(oos_shadow, bool, 0644);
#define PFERR_PRESENT_MASK (1U << 0) #define PFERR_PRESENT_MASK (1U << 0)
#define PFERR_WRITE_MASK (1U << 1) #define PFERR_WRITE_MASK (1U << 1)
#define PFERR_USER_MASK (1U << 2) #define PFERR_USER_MASK (1U << 2)
#define PFERR_RSVD_MASK (1U << 3)
#define PFERR_FETCH_MASK (1U << 4) #define PFERR_FETCH_MASK (1U << 4)
#define PT_DIRECTORY_LEVEL 2 #define PT_DIRECTORY_LEVEL 2
@ -177,7 +178,11 @@ static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
static u64 __read_mostly shadow_user_mask; static u64 __read_mostly shadow_user_mask;
static u64 __read_mostly shadow_accessed_mask; static u64 __read_mostly shadow_accessed_mask;
static u64 __read_mostly shadow_dirty_mask; static u64 __read_mostly shadow_dirty_mask;
static u64 __read_mostly shadow_mt_mask;
static inline u64 rsvd_bits(int s, int e)
{
return ((1ULL << (e - s + 1)) - 1) << s;
}
void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
{ {
@ -193,14 +198,13 @@ void kvm_mmu_set_base_ptes(u64 base_pte)
EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes);
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask) u64 dirty_mask, u64 nx_mask, u64 x_mask)
{ {
shadow_user_mask = user_mask; shadow_user_mask = user_mask;
shadow_accessed_mask = accessed_mask; shadow_accessed_mask = accessed_mask;
shadow_dirty_mask = dirty_mask; shadow_dirty_mask = dirty_mask;
shadow_nx_mask = nx_mask; shadow_nx_mask = nx_mask;
shadow_x_mask = x_mask; shadow_x_mask = x_mask;
shadow_mt_mask = mt_mask;
} }
EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
@ -219,11 +223,6 @@ static int is_nx(struct kvm_vcpu *vcpu)
return vcpu->arch.shadow_efer & EFER_NX; return vcpu->arch.shadow_efer & EFER_NX;
} }
static int is_present_pte(unsigned long pte)
{
return pte & PT_PRESENT_MASK;
}
static int is_shadow_present_pte(u64 pte) static int is_shadow_present_pte(u64 pte)
{ {
return pte != shadow_trap_nonpresent_pte return pte != shadow_trap_nonpresent_pte
@ -1074,18 +1073,10 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
return NULL; return NULL;
} }
static void kvm_unlink_unsync_global(struct kvm *kvm, struct kvm_mmu_page *sp)
{
list_del(&sp->oos_link);
--kvm->stat.mmu_unsync_global;
}
static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{ {
WARN_ON(!sp->unsync); WARN_ON(!sp->unsync);
sp->unsync = 0; sp->unsync = 0;
if (sp->global)
kvm_unlink_unsync_global(kvm, sp);
--kvm->stat.mmu_unsync; --kvm->stat.mmu_unsync;
} }
@ -1248,7 +1239,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word); pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word);
sp->gfn = gfn; sp->gfn = gfn;
sp->role = role; sp->role = role;
sp->global = 0;
hlist_add_head(&sp->hash_link, bucket); hlist_add_head(&sp->hash_link, bucket);
if (!direct) { if (!direct) {
if (rmap_write_protect(vcpu->kvm, gfn)) if (rmap_write_protect(vcpu->kvm, gfn))
@ -1616,7 +1606,7 @@ static int get_mtrr_type(struct mtrr_state_type *mtrr_state,
return mtrr_state->def_type; return mtrr_state->def_type;
} }
static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
{ {
u8 mtrr; u8 mtrr;
@ -1626,6 +1616,7 @@ static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
mtrr = MTRR_TYPE_WRBACK; mtrr = MTRR_TYPE_WRBACK;
return mtrr; return mtrr;
} }
EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type);
static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{ {
@ -1646,11 +1637,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
++vcpu->kvm->stat.mmu_unsync; ++vcpu->kvm->stat.mmu_unsync;
sp->unsync = 1; sp->unsync = 1;
if (sp->global) { kvm_mmu_mark_parents_unsync(vcpu, sp);
list_add(&sp->oos_link, &vcpu->kvm->arch.oos_global_pages);
++vcpu->kvm->stat.mmu_unsync_global;
} else
kvm_mmu_mark_parents_unsync(vcpu, sp);
mmu_convert_notrap(sp); mmu_convert_notrap(sp);
return 0; return 0;
@ -1677,21 +1664,11 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
unsigned pte_access, int user_fault, unsigned pte_access, int user_fault,
int write_fault, int dirty, int largepage, int write_fault, int dirty, int largepage,
int global, gfn_t gfn, pfn_t pfn, bool speculative, gfn_t gfn, pfn_t pfn, bool speculative,
bool can_unsync) bool can_unsync)
{ {
u64 spte; u64 spte;
int ret = 0; int ret = 0;
u64 mt_mask = shadow_mt_mask;
struct kvm_mmu_page *sp = page_header(__pa(shadow_pte));
if (!global && sp->global) {
sp->global = 0;
if (sp->unsync) {
kvm_unlink_unsync_global(vcpu->kvm, sp);
kvm_mmu_mark_parents_unsync(vcpu, sp);
}
}
/* /*
* We don't set the accessed bit, since we sometimes want to see * We don't set the accessed bit, since we sometimes want to see
@ -1711,16 +1688,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
spte |= shadow_user_mask; spte |= shadow_user_mask;
if (largepage) if (largepage)
spte |= PT_PAGE_SIZE_MASK; spte |= PT_PAGE_SIZE_MASK;
if (mt_mask) { if (tdp_enabled)
if (!kvm_is_mmio_pfn(pfn)) { spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
mt_mask = get_memory_type(vcpu, gfn) << kvm_is_mmio_pfn(pfn));
kvm_x86_ops->get_mt_mask_shift();
mt_mask |= VMX_EPT_IGMT_BIT;
} else
mt_mask = MTRR_TYPE_UNCACHABLE <<
kvm_x86_ops->get_mt_mask_shift();
spte |= mt_mask;
}
spte |= (u64)pfn << PAGE_SHIFT; spte |= (u64)pfn << PAGE_SHIFT;
@ -1765,8 +1735,8 @@ set_pte:
static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
unsigned pt_access, unsigned pte_access, unsigned pt_access, unsigned pte_access,
int user_fault, int write_fault, int dirty, int user_fault, int write_fault, int dirty,
int *ptwrite, int largepage, int global, int *ptwrite, int largepage, gfn_t gfn,
gfn_t gfn, pfn_t pfn, bool speculative) pfn_t pfn, bool speculative)
{ {
int was_rmapped = 0; int was_rmapped = 0;
int was_writeble = is_writeble_pte(*shadow_pte); int was_writeble = is_writeble_pte(*shadow_pte);
@ -1795,7 +1765,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
was_rmapped = 1; was_rmapped = 1;
} }
if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
dirty, largepage, global, gfn, pfn, speculative, true)) { dirty, largepage, gfn, pfn, speculative, true)) {
if (write_fault) if (write_fault)
*ptwrite = 1; *ptwrite = 1;
kvm_x86_ops->tlb_flush(vcpu); kvm_x86_ops->tlb_flush(vcpu);
@ -1843,7 +1813,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
|| (largepage && iterator.level == PT_DIRECTORY_LEVEL)) { || (largepage && iterator.level == PT_DIRECTORY_LEVEL)) {
mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
0, write, 1, &pt_write, 0, write, 1, &pt_write,
largepage, 0, gfn, pfn, false); largepage, gfn, pfn, false);
++vcpu->stat.pf_fixed; ++vcpu->stat.pf_fixed;
break; break;
} }
@ -1942,7 +1912,19 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
vcpu->arch.mmu.root_hpa = INVALID_PAGE; vcpu->arch.mmu.root_hpa = INVALID_PAGE;
} }
static void mmu_alloc_roots(struct kvm_vcpu *vcpu) static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
{
int ret = 0;
if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) {
set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
ret = 1;
}
return ret;
}
static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
{ {
int i; int i;
gfn_t root_gfn; gfn_t root_gfn;
@ -1957,13 +1939,15 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
ASSERT(!VALID_PAGE(root)); ASSERT(!VALID_PAGE(root));
if (tdp_enabled) if (tdp_enabled)
direct = 1; direct = 1;
if (mmu_check_root(vcpu, root_gfn))
return 1;
sp = kvm_mmu_get_page(vcpu, root_gfn, 0, sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
PT64_ROOT_LEVEL, direct, PT64_ROOT_LEVEL, direct,
ACC_ALL, NULL); ACC_ALL, NULL);
root = __pa(sp->spt); root = __pa(sp->spt);
++sp->root_count; ++sp->root_count;
vcpu->arch.mmu.root_hpa = root; vcpu->arch.mmu.root_hpa = root;
return; return 0;
} }
direct = !is_paging(vcpu); direct = !is_paging(vcpu);
if (tdp_enabled) if (tdp_enabled)
@ -1980,6 +1964,8 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
root_gfn = vcpu->arch.pdptrs[i] >> PAGE_SHIFT; root_gfn = vcpu->arch.pdptrs[i] >> PAGE_SHIFT;
} else if (vcpu->arch.mmu.root_level == 0) } else if (vcpu->arch.mmu.root_level == 0)
root_gfn = 0; root_gfn = 0;
if (mmu_check_root(vcpu, root_gfn))
return 1;
sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
PT32_ROOT_LEVEL, direct, PT32_ROOT_LEVEL, direct,
ACC_ALL, NULL); ACC_ALL, NULL);
@ -1988,6 +1974,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
} }
vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
return 0;
} }
static void mmu_sync_roots(struct kvm_vcpu *vcpu) static void mmu_sync_roots(struct kvm_vcpu *vcpu)
@ -2006,7 +1993,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
for (i = 0; i < 4; ++i) { for (i = 0; i < 4; ++i) {
hpa_t root = vcpu->arch.mmu.pae_root[i]; hpa_t root = vcpu->arch.mmu.pae_root[i];
if (root) { if (root && VALID_PAGE(root)) {
root &= PT64_BASE_ADDR_MASK; root &= PT64_BASE_ADDR_MASK;
sp = page_header(root); sp = page_header(root);
mmu_sync_children(vcpu, sp); mmu_sync_children(vcpu, sp);
@ -2014,15 +2001,6 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
} }
} }
static void mmu_sync_global(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
struct kvm_mmu_page *sp, *n;
list_for_each_entry_safe(sp, n, &kvm->arch.oos_global_pages, oos_link)
kvm_sync_page(vcpu, sp);
}
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
{ {
spin_lock(&vcpu->kvm->mmu_lock); spin_lock(&vcpu->kvm->mmu_lock);
@ -2030,13 +2008,6 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
spin_unlock(&vcpu->kvm->mmu_lock); spin_unlock(&vcpu->kvm->mmu_lock);
} }
void kvm_mmu_sync_global(struct kvm_vcpu *vcpu)
{
spin_lock(&vcpu->kvm->mmu_lock);
mmu_sync_global(vcpu);
spin_unlock(&vcpu->kvm->mmu_lock);
}
static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
{ {
return vaddr; return vaddr;
@ -2151,6 +2122,14 @@ static void paging_free(struct kvm_vcpu *vcpu)
nonpaging_free(vcpu); nonpaging_free(vcpu);
} }
static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level)
{
int bit7;
bit7 = (gpte >> 7) & 1;
return (gpte & vcpu->arch.mmu.rsvd_bits_mask[bit7][level-1]) != 0;
}
#define PTTYPE 64 #define PTTYPE 64
#include "paging_tmpl.h" #include "paging_tmpl.h"
#undef PTTYPE #undef PTTYPE
@ -2159,6 +2138,59 @@ static void paging_free(struct kvm_vcpu *vcpu)
#include "paging_tmpl.h" #include "paging_tmpl.h"
#undef PTTYPE #undef PTTYPE
static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
{
struct kvm_mmu *context = &vcpu->arch.mmu;
int maxphyaddr = cpuid_maxphyaddr(vcpu);
u64 exb_bit_rsvd = 0;
if (!is_nx(vcpu))
exb_bit_rsvd = rsvd_bits(63, 63);
switch (level) {
case PT32_ROOT_LEVEL:
/* no rsvd bits for 2 level 4K page table entries */
context->rsvd_bits_mask[0][1] = 0;
context->rsvd_bits_mask[0][0] = 0;
if (is_cpuid_PSE36())
/* 36bits PSE 4MB page */
context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
else
/* 32 bits PSE 4MB page */
context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
context->rsvd_bits_mask[1][0] = ~0ull;
break;
case PT32E_ROOT_LEVEL:
context->rsvd_bits_mask[0][2] =
rsvd_bits(maxphyaddr, 63) |
rsvd_bits(7, 8) | rsvd_bits(1, 2); /* PDPTE */
context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
rsvd_bits(maxphyaddr, 62); /* PDE */
context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
rsvd_bits(maxphyaddr, 62); /* PTE */
context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
rsvd_bits(maxphyaddr, 62) |
rsvd_bits(13, 20); /* large page */
context->rsvd_bits_mask[1][0] = ~0ull;
break;
case PT64_ROOT_LEVEL:
context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
rsvd_bits(maxphyaddr, 51);
context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
rsvd_bits(maxphyaddr, 51);
context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
context->rsvd_bits_mask[1][2] = context->rsvd_bits_mask[0][2];
context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
rsvd_bits(maxphyaddr, 51) |
rsvd_bits(13, 20); /* large page */
context->rsvd_bits_mask[1][0] = ~0ull;
break;
}
}
static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
{ {
struct kvm_mmu *context = &vcpu->arch.mmu; struct kvm_mmu *context = &vcpu->arch.mmu;
@ -2179,6 +2211,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
static int paging64_init_context(struct kvm_vcpu *vcpu) static int paging64_init_context(struct kvm_vcpu *vcpu)
{ {
reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL);
return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL); return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
} }
@ -2186,6 +2219,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
{ {
struct kvm_mmu *context = &vcpu->arch.mmu; struct kvm_mmu *context = &vcpu->arch.mmu;
reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL);
context->new_cr3 = paging_new_cr3; context->new_cr3 = paging_new_cr3;
context->page_fault = paging32_page_fault; context->page_fault = paging32_page_fault;
context->gva_to_gpa = paging32_gva_to_gpa; context->gva_to_gpa = paging32_gva_to_gpa;
@ -2201,6 +2235,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
static int paging32E_init_context(struct kvm_vcpu *vcpu) static int paging32E_init_context(struct kvm_vcpu *vcpu)
{ {
reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL);
return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL); return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
} }
@ -2221,12 +2256,15 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
context->gva_to_gpa = nonpaging_gva_to_gpa; context->gva_to_gpa = nonpaging_gva_to_gpa;
context->root_level = 0; context->root_level = 0;
} else if (is_long_mode(vcpu)) { } else if (is_long_mode(vcpu)) {
reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL);
context->gva_to_gpa = paging64_gva_to_gpa; context->gva_to_gpa = paging64_gva_to_gpa;
context->root_level = PT64_ROOT_LEVEL; context->root_level = PT64_ROOT_LEVEL;
} else if (is_pae(vcpu)) { } else if (is_pae(vcpu)) {
reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL);
context->gva_to_gpa = paging64_gva_to_gpa; context->gva_to_gpa = paging64_gva_to_gpa;
context->root_level = PT32E_ROOT_LEVEL; context->root_level = PT32E_ROOT_LEVEL;
} else { } else {
reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL);
context->gva_to_gpa = paging32_gva_to_gpa; context->gva_to_gpa = paging32_gva_to_gpa;
context->root_level = PT32_ROOT_LEVEL; context->root_level = PT32_ROOT_LEVEL;
} }
@ -2290,9 +2328,11 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
goto out; goto out;
spin_lock(&vcpu->kvm->mmu_lock); spin_lock(&vcpu->kvm->mmu_lock);
kvm_mmu_free_some_pages(vcpu); kvm_mmu_free_some_pages(vcpu);
mmu_alloc_roots(vcpu); r = mmu_alloc_roots(vcpu);
mmu_sync_roots(vcpu); mmu_sync_roots(vcpu);
spin_unlock(&vcpu->kvm->mmu_lock); spin_unlock(&vcpu->kvm->mmu_lock);
if (r)
goto out;
kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa); kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
kvm_mmu_flush_tlb(vcpu); kvm_mmu_flush_tlb(vcpu);
out: out:
@ -2638,14 +2678,6 @@ EXPORT_SYMBOL_GPL(kvm_disable_tdp);
static void free_mmu_pages(struct kvm_vcpu *vcpu) static void free_mmu_pages(struct kvm_vcpu *vcpu)
{ {
struct kvm_mmu_page *sp;
while (!list_empty(&vcpu->kvm->arch.active_mmu_pages)) {
sp = container_of(vcpu->kvm->arch.active_mmu_pages.next,
struct kvm_mmu_page, link);
kvm_mmu_zap_page(vcpu->kvm, sp);
cond_resched();
}
free_page((unsigned long)vcpu->arch.mmu.pae_root); free_page((unsigned long)vcpu->arch.mmu.pae_root);
} }
@ -2710,7 +2742,6 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
{ {
struct kvm_mmu_page *sp; struct kvm_mmu_page *sp;
spin_lock(&kvm->mmu_lock);
list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) {
int i; int i;
u64 *pt; u64 *pt;
@ -2725,7 +2756,6 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
pt[i] &= ~PT_WRITABLE_MASK; pt[i] &= ~PT_WRITABLE_MASK;
} }
kvm_flush_remote_tlbs(kvm); kvm_flush_remote_tlbs(kvm);
spin_unlock(&kvm->mmu_lock);
} }
void kvm_mmu_zap_all(struct kvm *kvm) void kvm_mmu_zap_all(struct kvm *kvm)
@ -3007,11 +3037,13 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
" in nonleaf level: levels %d gva %lx" " in nonleaf level: levels %d gva %lx"
" level %d pte %llx\n", audit_msg, " level %d pte %llx\n", audit_msg,
vcpu->arch.mmu.root_level, va, level, ent); vcpu->arch.mmu.root_level, va, level, ent);
else
audit_mappings_page(vcpu, ent, va, level - 1); audit_mappings_page(vcpu, ent, va, level - 1);
} else { } else {
gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va); gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va);
hpa_t hpa = (hpa_t)gpa_to_pfn(vcpu, gpa) << PAGE_SHIFT; gfn_t gfn = gpa >> PAGE_SHIFT;
pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn);
hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT;
if (is_shadow_present_pte(ent) if (is_shadow_present_pte(ent)
&& (ent & PT64_BASE_ADDR_MASK) != hpa) && (ent & PT64_BASE_ADDR_MASK) != hpa)

View File

@ -75,4 +75,9 @@ static inline int is_paging(struct kvm_vcpu *vcpu)
return vcpu->arch.cr0 & X86_CR0_PG; return vcpu->arch.cr0 & X86_CR0_PG;
} }
static inline int is_present_pte(unsigned long pte)
{
return pte & PT_PRESENT_MASK;
}
#endif #endif

View File

@ -123,6 +123,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
gfn_t table_gfn; gfn_t table_gfn;
unsigned index, pt_access, pte_access; unsigned index, pt_access, pte_access;
gpa_t pte_gpa; gpa_t pte_gpa;
int rsvd_fault = 0;
pgprintk("%s: addr %lx\n", __func__, addr); pgprintk("%s: addr %lx\n", __func__, addr);
walk: walk:
@ -157,6 +158,10 @@ walk:
if (!is_present_pte(pte)) if (!is_present_pte(pte))
goto not_present; goto not_present;
rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level);
if (rsvd_fault)
goto access_error;
if (write_fault && !is_writeble_pte(pte)) if (write_fault && !is_writeble_pte(pte))
if (user_fault || is_write_protection(vcpu)) if (user_fault || is_write_protection(vcpu))
goto access_error; goto access_error;
@ -209,7 +214,6 @@ walk:
if (ret) if (ret)
goto walk; goto walk;
pte |= PT_DIRTY_MASK; pte |= PT_DIRTY_MASK;
kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0);
walker->ptes[walker->level - 1] = pte; walker->ptes[walker->level - 1] = pte;
} }
@ -233,6 +237,8 @@ err:
walker->error_code |= PFERR_USER_MASK; walker->error_code |= PFERR_USER_MASK;
if (fetch_fault) if (fetch_fault)
walker->error_code |= PFERR_FETCH_MASK; walker->error_code |= PFERR_FETCH_MASK;
if (rsvd_fault)
walker->error_code |= PFERR_RSVD_MASK;
return 0; return 0;
} }
@ -262,8 +268,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
kvm_get_pfn(pfn); kvm_get_pfn(pfn);
mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
gpte & PT_DIRTY_MASK, NULL, largepage, gpte & PT_DIRTY_MASK, NULL, largepage,
gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte), gpte_to_gfn(gpte), pfn, true);
pfn, true);
} }
/* /*
@ -297,7 +302,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
user_fault, write_fault, user_fault, write_fault,
gw->ptes[gw->level-1] & PT_DIRTY_MASK, gw->ptes[gw->level-1] & PT_DIRTY_MASK,
ptwrite, largepage, ptwrite, largepage,
gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
gw->gfn, pfn, false); gw->gfn, pfn, false);
break; break;
} }
@ -380,7 +384,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
return r; return r;
/* /*
* Look up the shadow pte for the faulting address. * Look up the guest pte for the faulting address.
*/ */
r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault, r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
fetch_fault); fetch_fault);
@ -586,7 +590,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
nr_present++; nr_present++;
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn, is_dirty_pte(gpte), 0, gfn,
spte_to_pfn(sp->spt[i]), true, false); spte_to_pfn(sp->spt[i]), true, false);
} }

View File

@ -19,6 +19,7 @@
#include "irq.h" #include "irq.h"
#include "mmu.h" #include "mmu.h"
#include "kvm_cache_regs.h" #include "kvm_cache_regs.h"
#include "x86.h"
#include <linux/module.h> #include <linux/module.h>
#include <linux/kernel.h> #include <linux/kernel.h>
@ -69,7 +70,6 @@ module_param(npt, int, S_IRUGO);
static int nested = 0; static int nested = 0;
module_param(nested, int, S_IRUGO); module_param(nested, int, S_IRUGO);
static void kvm_reput_irq(struct vcpu_svm *svm);
static void svm_flush_tlb(struct kvm_vcpu *vcpu); static void svm_flush_tlb(struct kvm_vcpu *vcpu);
static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override); static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override);
@ -132,24 +132,6 @@ static inline u32 svm_has(u32 feat)
return svm_features & feat; return svm_features & feat;
} }
static inline u8 pop_irq(struct kvm_vcpu *vcpu)
{
int word_index = __ffs(vcpu->arch.irq_summary);
int bit_index = __ffs(vcpu->arch.irq_pending[word_index]);
int irq = word_index * BITS_PER_LONG + bit_index;
clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]);
if (!vcpu->arch.irq_pending[word_index])
clear_bit(word_index, &vcpu->arch.irq_summary);
return irq;
}
static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq)
{
set_bit(irq, vcpu->arch.irq_pending);
set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
}
static inline void clgi(void) static inline void clgi(void)
{ {
asm volatile (__ex(SVM_CLGI)); asm volatile (__ex(SVM_CLGI));
@ -214,25 +196,41 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
svm->vmcb->control.event_inj_err = error_code; svm->vmcb->control.event_inj_err = error_code;
} }
static bool svm_exception_injected(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
return !(svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID);
}
static int is_external_interrupt(u32 info) static int is_external_interrupt(u32 info)
{ {
info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
} }
static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
{
struct vcpu_svm *svm = to_svm(vcpu);
u32 ret = 0;
if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
ret |= X86_SHADOW_INT_STI | X86_SHADOW_INT_MOV_SS;
return ret & mask;
}
static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
{
struct vcpu_svm *svm = to_svm(vcpu);
if (mask == 0)
svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
else
svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
}
static void skip_emulated_instruction(struct kvm_vcpu *vcpu) static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
if (!svm->next_rip) { if (!svm->next_rip) {
printk(KERN_DEBUG "%s: NOP\n", __func__); if (emulate_instruction(vcpu, vcpu->run, 0, 0, EMULTYPE_SKIP) !=
EMULATE_DONE)
printk(KERN_DEBUG "%s: NOP\n", __func__);
return; return;
} }
if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE) if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
@ -240,9 +238,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
__func__, kvm_rip_read(vcpu), svm->next_rip); __func__, kvm_rip_read(vcpu), svm->next_rip);
kvm_rip_write(vcpu, svm->next_rip); kvm_rip_write(vcpu, svm->next_rip);
svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; svm_set_interrupt_shadow(vcpu, 0);
vcpu->arch.interrupt_window_open = (svm->vcpu.arch.hflags & HF_GIF_MASK);
} }
static int has_svm(void) static int has_svm(void)
@ -830,6 +826,15 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
if (!var->unusable) if (!var->unusable)
var->type |= 0x1; var->type |= 0x1;
break; break;
case VCPU_SREG_SS:
/* On AMD CPUs sometimes the DB bit in the segment
* descriptor is left as 1, although the whole segment has
* been made unusable. Clear it here to pass an Intel VMX
* entry check when cross vendor migrating.
*/
if (var->unusable)
var->db = 0;
break;
} }
} }
@ -960,15 +965,16 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
} }
static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) static void update_db_intercept(struct kvm_vcpu *vcpu)
{ {
int old_debug = vcpu->guest_debug;
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
vcpu->guest_debug = dbg->control;
svm->vmcb->control.intercept_exceptions &= svm->vmcb->control.intercept_exceptions &=
~((1 << DB_VECTOR) | (1 << BP_VECTOR)); ~((1 << DB_VECTOR) | (1 << BP_VECTOR));
if (vcpu->arch.singlestep)
svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR);
if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
if (vcpu->guest_debug & if (vcpu->guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
@ -979,6 +985,16 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
1 << BP_VECTOR; 1 << BP_VECTOR;
} else } else
vcpu->guest_debug = 0; vcpu->guest_debug = 0;
}
static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
{
int old_debug = vcpu->guest_debug;
struct vcpu_svm *svm = to_svm(vcpu);
vcpu->guest_debug = dbg->control;
update_db_intercept(vcpu);
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
@ -993,16 +1009,6 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
return 0; return 0;
} }
static int svm_get_irq(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
u32 exit_int_info = svm->vmcb->control.exit_int_info;
if (is_external_interrupt(exit_int_info))
return exit_int_info & SVM_EVTINJ_VEC_MASK;
return -1;
}
static void load_host_msrs(struct kvm_vcpu *vcpu) static void load_host_msrs(struct kvm_vcpu *vcpu)
{ {
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
@ -1107,17 +1113,8 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{ {
u32 exit_int_info = svm->vmcb->control.exit_int_info;
struct kvm *kvm = svm->vcpu.kvm;
u64 fault_address; u64 fault_address;
u32 error_code; u32 error_code;
bool event_injection = false;
if (!irqchip_in_kernel(kvm) &&
is_external_interrupt(exit_int_info)) {
event_injection = true;
push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
}
fault_address = svm->vmcb->control.exit_info_2; fault_address = svm->vmcb->control.exit_info_2;
error_code = svm->vmcb->control.exit_info_1; error_code = svm->vmcb->control.exit_info_1;
@ -1137,23 +1134,40 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
*/ */
if (npt_enabled) if (npt_enabled)
svm_flush_tlb(&svm->vcpu); svm_flush_tlb(&svm->vcpu);
else {
if (!npt_enabled && event_injection) if (kvm_event_needs_reinjection(&svm->vcpu))
kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
}
return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
} }
static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{ {
if (!(svm->vcpu.guest_debug & if (!(svm->vcpu.guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
!svm->vcpu.arch.singlestep) {
kvm_queue_exception(&svm->vcpu, DB_VECTOR); kvm_queue_exception(&svm->vcpu, DB_VECTOR);
return 1; return 1;
} }
kvm_run->exit_reason = KVM_EXIT_DEBUG;
kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; if (svm->vcpu.arch.singlestep) {
kvm_run->debug.arch.exception = DB_VECTOR; svm->vcpu.arch.singlestep = false;
return 0; if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
svm->vmcb->save.rflags &=
~(X86_EFLAGS_TF | X86_EFLAGS_RF);
update_db_intercept(&svm->vcpu);
}
if (svm->vcpu.guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)){
kvm_run->exit_reason = KVM_EXIT_DEBUG;
kvm_run->debug.arch.pc =
svm->vmcb->save.cs.base + svm->vmcb->save.rip;
kvm_run->debug.arch.exception = DB_VECTOR;
return 0;
}
return 1;
} }
static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
@ -1842,17 +1856,51 @@ static int task_switch_interception(struct vcpu_svm *svm,
struct kvm_run *kvm_run) struct kvm_run *kvm_run)
{ {
u16 tss_selector; u16 tss_selector;
int reason;
int int_type = svm->vmcb->control.exit_int_info &
SVM_EXITINTINFO_TYPE_MASK;
int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK;
uint32_t type =
svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
uint32_t idt_v =
svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
tss_selector = (u16)svm->vmcb->control.exit_info_1; tss_selector = (u16)svm->vmcb->control.exit_info_1;
if (svm->vmcb->control.exit_info_2 & if (svm->vmcb->control.exit_info_2 &
(1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET)) (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
return kvm_task_switch(&svm->vcpu, tss_selector, reason = TASK_SWITCH_IRET;
TASK_SWITCH_IRET); else if (svm->vmcb->control.exit_info_2 &
if (svm->vmcb->control.exit_info_2 & (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
(1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP)) reason = TASK_SWITCH_JMP;
return kvm_task_switch(&svm->vcpu, tss_selector, else if (idt_v)
TASK_SWITCH_JMP); reason = TASK_SWITCH_GATE;
return kvm_task_switch(&svm->vcpu, tss_selector, TASK_SWITCH_CALL); else
reason = TASK_SWITCH_CALL;
if (reason == TASK_SWITCH_GATE) {
switch (type) {
case SVM_EXITINTINFO_TYPE_NMI:
svm->vcpu.arch.nmi_injected = false;
break;
case SVM_EXITINTINFO_TYPE_EXEPT:
kvm_clear_exception_queue(&svm->vcpu);
break;
case SVM_EXITINTINFO_TYPE_INTR:
kvm_clear_interrupt_queue(&svm->vcpu);
break;
default:
break;
}
}
if (reason != TASK_SWITCH_GATE ||
int_type == SVM_EXITINTINFO_TYPE_SOFT ||
(int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
(int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
skip_emulated_instruction(&svm->vcpu);
return kvm_task_switch(&svm->vcpu, tss_selector, reason);
} }
static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
@ -1862,6 +1910,14 @@ static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return 1; return 1;
} }
static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{
++svm->vcpu.stat.nmi_window_exits;
svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
svm->vcpu.arch.hflags |= HF_IRET_MASK;
return 1;
}
static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{ {
if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE) if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE)
@ -1879,8 +1935,14 @@ static int emulate_on_interception(struct vcpu_svm *svm,
static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{ {
u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
/* instruction emulation calls kvm_set_cr8() */
emulate_instruction(&svm->vcpu, NULL, 0, 0, 0); emulate_instruction(&svm->vcpu, NULL, 0, 0, 0);
if (irqchip_in_kernel(svm->vcpu.kvm)) if (irqchip_in_kernel(svm->vcpu.kvm)) {
svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
return 1;
}
if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
return 1; return 1;
kvm_run->exit_reason = KVM_EXIT_SET_TPR; kvm_run->exit_reason = KVM_EXIT_SET_TPR;
return 0; return 0;
@ -2090,8 +2152,9 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
* If the user space waits to inject interrupts, exit as soon as * If the user space waits to inject interrupts, exit as soon as
* possible * possible
*/ */
if (kvm_run->request_interrupt_window && if (!irqchip_in_kernel(svm->vcpu.kvm) &&
!svm->vcpu.arch.irq_summary) { kvm_run->request_interrupt_window &&
!kvm_cpu_has_interrupt(&svm->vcpu)) {
++svm->vcpu.stat.irq_window_exits; ++svm->vcpu.stat.irq_window_exits;
kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
return 0; return 0;
@ -2134,6 +2197,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
[SVM_EXIT_VINTR] = interrupt_window_interception, [SVM_EXIT_VINTR] = interrupt_window_interception,
/* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */ /* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */
[SVM_EXIT_CPUID] = cpuid_interception, [SVM_EXIT_CPUID] = cpuid_interception,
[SVM_EXIT_IRET] = iret_interception,
[SVM_EXIT_INVD] = emulate_on_interception, [SVM_EXIT_INVD] = emulate_on_interception,
[SVM_EXIT_HLT] = halt_interception, [SVM_EXIT_HLT] = halt_interception,
[SVM_EXIT_INVLPG] = invlpg_interception, [SVM_EXIT_INVLPG] = invlpg_interception,
@ -2194,7 +2258,6 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
} }
} }
kvm_reput_irq(svm);
if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
@ -2205,7 +2268,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
if (is_external_interrupt(svm->vmcb->control.exit_int_info) && if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
exit_code != SVM_EXIT_NPF) exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH)
printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
"exit_code 0x%x\n", "exit_code 0x%x\n",
__func__, svm->vmcb->control.exit_int_info, __func__, svm->vmcb->control.exit_int_info,
@ -2242,6 +2305,15 @@ static void pre_svm_run(struct vcpu_svm *svm)
new_asid(svm, svm_data); new_asid(svm, svm_data);
} }
static void svm_inject_nmi(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
vcpu->arch.hflags |= HF_NMI_MASK;
svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
++vcpu->stat.nmi_injections;
}
static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
{ {
@ -2257,134 +2329,71 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
} }
static void svm_set_irq(struct kvm_vcpu *vcpu, int irq) static void svm_queue_irq(struct kvm_vcpu *vcpu, unsigned nr)
{
struct vcpu_svm *svm = to_svm(vcpu);
svm->vmcb->control.event_inj = nr |
SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
}
static void svm_set_irq(struct kvm_vcpu *vcpu)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
nested_svm_intr(svm); nested_svm_intr(svm);
svm_inject_irq(svm, irq); svm_queue_irq(vcpu, vcpu->arch.interrupt.nr);
} }
static void update_cr8_intercept(struct kvm_vcpu *vcpu) static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
{
struct vcpu_svm *svm = to_svm(vcpu);
if (irr == -1)
return;
if (tpr >= irr)
svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
}
static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *vmcb = svm->vmcb; struct vmcb *vmcb = svm->vmcb;
int max_irr, tpr; return !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
!(svm->vcpu.arch.hflags & HF_NMI_MASK);
if (!irqchip_in_kernel(vcpu->kvm) || vcpu->arch.apic->vapic_addr)
return;
vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
max_irr = kvm_lapic_find_highest_irr(vcpu);
if (max_irr == -1)
return;
tpr = kvm_lapic_get_cr8(vcpu) << 4;
if (tpr >= (max_irr & 0xf0))
vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
} }
static void svm_intr_assist(struct kvm_vcpu *vcpu) static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *vmcb = svm->vmcb; struct vmcb *vmcb = svm->vmcb;
int intr_vector = -1; return (vmcb->save.rflags & X86_EFLAGS_IF) &&
!(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
if ((vmcb->control.exit_int_info & SVM_EVTINJ_VALID) && (svm->vcpu.arch.hflags & HF_GIF_MASK);
((vmcb->control.exit_int_info & SVM_EVTINJ_TYPE_MASK) == 0)) {
intr_vector = vmcb->control.exit_int_info &
SVM_EVTINJ_VEC_MASK;
vmcb->control.exit_int_info = 0;
svm_inject_irq(svm, intr_vector);
goto out;
}
if (vmcb->control.int_ctl & V_IRQ_MASK)
goto out;
if (!kvm_cpu_has_interrupt(vcpu))
goto out;
if (nested_svm_intr(svm))
goto out;
if (!(svm->vcpu.arch.hflags & HF_GIF_MASK))
goto out;
if (!(vmcb->save.rflags & X86_EFLAGS_IF) ||
(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
(vmcb->control.event_inj & SVM_EVTINJ_VALID)) {
/* unable to deliver irq, set pending irq */
svm_set_vintr(svm);
svm_inject_irq(svm, 0x0);
goto out;
}
/* Okay, we can deliver the interrupt: grab it and update PIC state. */
intr_vector = kvm_cpu_get_interrupt(vcpu);
svm_inject_irq(svm, intr_vector);
out:
update_cr8_intercept(vcpu);
} }
static void kvm_reput_irq(struct vcpu_svm *svm) static void enable_irq_window(struct kvm_vcpu *vcpu)
{ {
struct vmcb_control_area *control = &svm->vmcb->control; svm_set_vintr(to_svm(vcpu));
svm_inject_irq(to_svm(vcpu), 0x0);
if ((control->int_ctl & V_IRQ_MASK)
&& !irqchip_in_kernel(svm->vcpu.kvm)) {
control->int_ctl &= ~V_IRQ_MASK;
push_irq(&svm->vcpu, control->int_vector);
}
svm->vcpu.arch.interrupt_window_open =
!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
(svm->vcpu.arch.hflags & HF_GIF_MASK);
} }
static void svm_do_inject_vector(struct vcpu_svm *svm) static void enable_nmi_window(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
int word_index = __ffs(vcpu->arch.irq_summary);
int bit_index = __ffs(vcpu->arch.irq_pending[word_index]);
int irq = word_index * BITS_PER_LONG + bit_index;
clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]);
if (!vcpu->arch.irq_pending[word_index])
clear_bit(word_index, &vcpu->arch.irq_summary);
svm_inject_irq(svm, irq);
}
static void do_interrupt_requests(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb_control_area *control = &svm->vmcb->control;
if (nested_svm_intr(svm)) if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
return; == HF_NMI_MASK)
return; /* IRET will cause a vm exit */
svm->vcpu.arch.interrupt_window_open = /* Something prevents NMI from been injected. Single step over
(!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && possible problem (IRET or exception injection or interrupt
(svm->vmcb->save.rflags & X86_EFLAGS_IF) && shadow) */
(svm->vcpu.arch.hflags & HF_GIF_MASK)); vcpu->arch.singlestep = true;
svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary) update_db_intercept(vcpu);
/*
* If interrupts enabled, and not blocked by sti or mov ss. Good.
*/
svm_do_inject_vector(svm);
/*
* Interrupts blocked. Wait for unblock.
*/
if (!svm->vcpu.arch.interrupt_window_open &&
(svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window))
svm_set_vintr(svm);
else
svm_clear_vintr(svm);
} }
static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@ -2407,7 +2416,7 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) {
int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
kvm_lapic_set_tpr(vcpu, cr8); kvm_set_cr8(vcpu, cr8);
} }
} }
@ -2416,14 +2425,54 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
u64 cr8; u64 cr8;
if (!irqchip_in_kernel(vcpu->kvm))
return;
cr8 = kvm_get_cr8(vcpu); cr8 = kvm_get_cr8(vcpu);
svm->vmcb->control.int_ctl &= ~V_TPR_MASK; svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
} }
static void svm_complete_interrupts(struct vcpu_svm *svm)
{
u8 vector;
int type;
u32 exitintinfo = svm->vmcb->control.exit_int_info;
if (svm->vcpu.arch.hflags & HF_IRET_MASK)
svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
svm->vcpu.arch.nmi_injected = false;
kvm_clear_exception_queue(&svm->vcpu);
kvm_clear_interrupt_queue(&svm->vcpu);
if (!(exitintinfo & SVM_EXITINTINFO_VALID))
return;
vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
switch (type) {
case SVM_EXITINTINFO_TYPE_NMI:
svm->vcpu.arch.nmi_injected = true;
break;
case SVM_EXITINTINFO_TYPE_EXEPT:
/* In case of software exception do not reinject an exception
vector, but re-execute and instruction instead */
if (kvm_exception_is_soft(vector))
break;
if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
u32 err = svm->vmcb->control.exit_int_info_err;
kvm_queue_exception_e(&svm->vcpu, vector, err);
} else
kvm_queue_exception(&svm->vcpu, vector);
break;
case SVM_EXITINTINFO_TYPE_INTR:
kvm_queue_interrupt(&svm->vcpu, vector, false);
break;
default:
break;
}
}
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
#define R "r" #define R "r"
#else #else
@ -2552,6 +2601,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
sync_cr8_to_lapic(vcpu); sync_cr8_to_lapic(vcpu);
svm->next_rip = 0; svm->next_rip = 0;
svm_complete_interrupts(svm);
} }
#undef R #undef R
@ -2617,7 +2668,7 @@ static int get_npt_level(void)
#endif #endif
} }
static int svm_get_mt_mask_shift(void) static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
{ {
return 0; return 0;
} }
@ -2667,17 +2718,21 @@ static struct kvm_x86_ops svm_x86_ops = {
.run = svm_vcpu_run, .run = svm_vcpu_run,
.handle_exit = handle_exit, .handle_exit = handle_exit,
.skip_emulated_instruction = skip_emulated_instruction, .skip_emulated_instruction = skip_emulated_instruction,
.set_interrupt_shadow = svm_set_interrupt_shadow,
.get_interrupt_shadow = svm_get_interrupt_shadow,
.patch_hypercall = svm_patch_hypercall, .patch_hypercall = svm_patch_hypercall,
.get_irq = svm_get_irq,
.set_irq = svm_set_irq, .set_irq = svm_set_irq,
.set_nmi = svm_inject_nmi,
.queue_exception = svm_queue_exception, .queue_exception = svm_queue_exception,
.exception_injected = svm_exception_injected, .interrupt_allowed = svm_interrupt_allowed,
.inject_pending_irq = svm_intr_assist, .nmi_allowed = svm_nmi_allowed,
.inject_pending_vectors = do_interrupt_requests, .enable_nmi_window = enable_nmi_window,
.enable_irq_window = enable_irq_window,
.update_cr8_intercept = update_cr8_intercept,
.set_tss_addr = svm_set_tss_addr, .set_tss_addr = svm_set_tss_addr,
.get_tdp_level = get_npt_level, .get_tdp_level = get_npt_level,
.get_mt_mask_shift = svm_get_mt_mask_shift, .get_mt_mask = svm_get_mt_mask,
}; };
static int __init svm_init(void) static int __init svm_init(void)

46
arch/x86/kvm/timer.c Normal file
View File

@ -0,0 +1,46 @@
#include <linux/kvm_host.h>
#include <linux/kvm.h>
#include <linux/hrtimer.h>
#include <asm/atomic.h>
#include "kvm_timer.h"
static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer)
{
int restart_timer = 0;
wait_queue_head_t *q = &vcpu->wq;
/* FIXME: this code should not know anything about vcpus */
if (!atomic_inc_and_test(&ktimer->pending))
set_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
if (!ktimer->reinject)
atomic_set(&ktimer->pending, 1);
if (waitqueue_active(q))
wake_up_interruptible(q);
if (ktimer->t_ops->is_periodic(ktimer)) {
hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
restart_timer = 1;
}
return restart_timer;
}
enum hrtimer_restart kvm_timer_fn(struct hrtimer *data)
{
int restart_timer;
struct kvm_vcpu *vcpu;
struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
vcpu = ktimer->kvm->vcpus[ktimer->vcpu_id];
if (!vcpu)
return HRTIMER_NORESTART;
restart_timer = __kvm_timer_fn(vcpu, ktimer);
if (restart_timer)
return HRTIMER_RESTART;
else
return HRTIMER_NORESTART;
}

File diff suppressed because it is too large Load Diff

View File

@ -91,7 +91,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "halt_wakeup", VCPU_STAT(halt_wakeup) },
{ "hypercalls", VCPU_STAT(hypercalls) }, { "hypercalls", VCPU_STAT(hypercalls) },
{ "request_irq", VCPU_STAT(request_irq_exits) }, { "request_irq", VCPU_STAT(request_irq_exits) },
{ "request_nmi", VCPU_STAT(request_nmi_exits) },
{ "irq_exits", VCPU_STAT(irq_exits) }, { "irq_exits", VCPU_STAT(irq_exits) },
{ "host_state_reload", VCPU_STAT(host_state_reload) }, { "host_state_reload", VCPU_STAT(host_state_reload) },
{ "efer_reload", VCPU_STAT(efer_reload) }, { "efer_reload", VCPU_STAT(efer_reload) },
@ -108,7 +107,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "mmu_recycled", VM_STAT(mmu_recycled) }, { "mmu_recycled", VM_STAT(mmu_recycled) },
{ "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
{ "mmu_unsync", VM_STAT(mmu_unsync) }, { "mmu_unsync", VM_STAT(mmu_unsync) },
{ "mmu_unsync_global", VM_STAT(mmu_unsync_global) },
{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
{ "largepages", VM_STAT(lpages) }, { "largepages", VM_STAT(lpages) },
{ NULL } { NULL }
@ -234,7 +232,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
goto out; goto out;
} }
for (i = 0; i < ARRAY_SIZE(pdpte); ++i) { for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) { if (is_present_pte(pdpte[i]) &&
(pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
ret = 0; ret = 0;
goto out; goto out;
} }
@ -321,7 +320,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
kvm_x86_ops->set_cr0(vcpu, cr0); kvm_x86_ops->set_cr0(vcpu, cr0);
vcpu->arch.cr0 = cr0; vcpu->arch.cr0 = cr0;
kvm_mmu_sync_global(vcpu);
kvm_mmu_reset_context(vcpu); kvm_mmu_reset_context(vcpu);
return; return;
} }
@ -370,7 +368,6 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
kvm_x86_ops->set_cr4(vcpu, cr4); kvm_x86_ops->set_cr4(vcpu, cr4);
vcpu->arch.cr4 = cr4; vcpu->arch.cr4 = cr4;
vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled; vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled;
kvm_mmu_sync_global(vcpu);
kvm_mmu_reset_context(vcpu); kvm_mmu_reset_context(vcpu);
} }
EXPORT_SYMBOL_GPL(kvm_set_cr4); EXPORT_SYMBOL_GPL(kvm_set_cr4);
@ -523,6 +520,9 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
efer |= vcpu->arch.shadow_efer & EFER_LMA; efer |= vcpu->arch.shadow_efer & EFER_LMA;
vcpu->arch.shadow_efer = efer; vcpu->arch.shadow_efer = efer;
vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
kvm_mmu_reset_context(vcpu);
} }
void kvm_enable_efer_bits(u64 mask) void kvm_enable_efer_bits(u64 mask)
@ -630,14 +630,17 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
unsigned long flags; unsigned long flags;
struct kvm_vcpu_arch *vcpu = &v->arch; struct kvm_vcpu_arch *vcpu = &v->arch;
void *shared_kaddr; void *shared_kaddr;
unsigned long this_tsc_khz;
if ((!vcpu->time_page)) if ((!vcpu->time_page))
return; return;
if (unlikely(vcpu->hv_clock_tsc_khz != __get_cpu_var(cpu_tsc_khz))) { this_tsc_khz = get_cpu_var(cpu_tsc_khz);
kvm_set_time_scale(__get_cpu_var(cpu_tsc_khz), &vcpu->hv_clock); if (unlikely(vcpu->hv_clock_tsc_khz != this_tsc_khz)) {
vcpu->hv_clock_tsc_khz = __get_cpu_var(cpu_tsc_khz); kvm_set_time_scale(this_tsc_khz, &vcpu->hv_clock);
vcpu->hv_clock_tsc_khz = this_tsc_khz;
} }
put_cpu_var(cpu_tsc_khz);
/* Keep irq disabled to prevent changes to the clock */ /* Keep irq disabled to prevent changes to the clock */
local_irq_save(flags); local_irq_save(flags);
@ -893,6 +896,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case MSR_IA32_LASTINTFROMIP: case MSR_IA32_LASTINTFROMIP:
case MSR_IA32_LASTINTTOIP: case MSR_IA32_LASTINTTOIP:
case MSR_VM_HSAVE_PA: case MSR_VM_HSAVE_PA:
case MSR_P6_EVNTSEL0:
case MSR_P6_EVNTSEL1:
data = 0; data = 0;
break; break;
case MSR_MTRRcap: case MSR_MTRRcap:
@ -1024,6 +1029,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_SYNC_MMU: case KVM_CAP_SYNC_MMU:
case KVM_CAP_REINJECT_CONTROL: case KVM_CAP_REINJECT_CONTROL:
case KVM_CAP_IRQ_INJECT_STATUS: case KVM_CAP_IRQ_INJECT_STATUS:
case KVM_CAP_ASSIGN_DEV_IRQ:
r = 1; r = 1;
break; break;
case KVM_CAP_COALESCED_MMIO: case KVM_CAP_COALESCED_MMIO:
@ -1241,41 +1247,53 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->flags = 0; entry->flags = 0;
} }
#define F(x) bit(X86_FEATURE_##x)
static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
u32 index, int *nent, int maxnent) u32 index, int *nent, int maxnent)
{ {
const u32 kvm_supported_word0_x86_features = bit(X86_FEATURE_FPU) | unsigned f_nx = is_efer_nx() ? F(NX) : 0;
bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) |
bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) |
bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) |
bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) |
bit(X86_FEATURE_SEP) | bit(X86_FEATURE_PGE) |
bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) |
bit(X86_FEATURE_CLFLSH) | bit(X86_FEATURE_MMX) |
bit(X86_FEATURE_FXSR) | bit(X86_FEATURE_XMM) |
bit(X86_FEATURE_XMM2) | bit(X86_FEATURE_SELFSNOOP);
const u32 kvm_supported_word1_x86_features = bit(X86_FEATURE_FPU) |
bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) |
bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) |
bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) |
bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) |
bit(X86_FEATURE_PGE) |
bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) |
bit(X86_FEATURE_MMX) | bit(X86_FEATURE_FXSR) |
bit(X86_FEATURE_SYSCALL) |
(is_efer_nx() ? bit(X86_FEATURE_NX) : 0) |
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
bit(X86_FEATURE_LM) | unsigned f_lm = F(LM);
#else
unsigned f_lm = 0;
#endif #endif
bit(X86_FEATURE_FXSR_OPT) |
bit(X86_FEATURE_MMXEXT) | /* cpuid 1.edx */
bit(X86_FEATURE_3DNOWEXT) | const u32 kvm_supported_word0_x86_features =
bit(X86_FEATURE_3DNOW); F(FPU) | F(VME) | F(DE) | F(PSE) |
const u32 kvm_supported_word3_x86_features = F(TSC) | F(MSR) | F(PAE) | F(MCE) |
bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16); F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) |
0 /* Reserved, DS, ACPI */ | F(MMX) |
F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
0 /* HTT, TM, Reserved, PBE */;
/* cpuid 0x80000001.edx */
const u32 kvm_supported_word1_x86_features =
F(FPU) | F(VME) | F(DE) | F(PSE) |
F(TSC) | F(MSR) | F(PAE) | F(MCE) |
F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
F(PAT) | F(PSE36) | 0 /* Reserved */ |
f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
F(FXSR) | F(FXSR_OPT) | 0 /* GBPAGES */ | 0 /* RDTSCP */ |
0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
/* cpuid 1.ecx */
const u32 kvm_supported_word4_x86_features =
F(XMM3) | 0 /* Reserved, DTES64, MONITOR */ |
0 /* DS-CPL, VMX, SMX, EST */ |
0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
0 /* Reserved, DCA */ | F(XMM4_1) |
F(XMM4_2) | 0 /* x2APIC */ | F(MOVBE) | F(POPCNT) |
0 /* Reserved, XSAVE, OSXSAVE */;
/* cpuid 0x80000001.ecx */
const u32 kvm_supported_word6_x86_features = const u32 kvm_supported_word6_x86_features =
bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY) | F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ |
bit(X86_FEATURE_SVM); F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(SSE5) |
0 /* SKINIT */ | 0 /* WDT */;
/* all calls to cpuid_count() should be made on the same cpu */ /* all calls to cpuid_count() should be made on the same cpu */
get_cpu(); get_cpu();
@ -1288,7 +1306,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
break; break;
case 1: case 1:
entry->edx &= kvm_supported_word0_x86_features; entry->edx &= kvm_supported_word0_x86_features;
entry->ecx &= kvm_supported_word3_x86_features; entry->ecx &= kvm_supported_word4_x86_features;
break; break;
/* function 2 entries are STATEFUL. That is, repeated cpuid commands /* function 2 entries are STATEFUL. That is, repeated cpuid commands
* may return different values. This forces us to get_cpu() before * may return different values. This forces us to get_cpu() before
@ -1350,6 +1368,8 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
put_cpu(); put_cpu();
} }
#undef F
static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 __user *entries) struct kvm_cpuid_entry2 __user *entries)
{ {
@ -1421,8 +1441,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
return -ENXIO; return -ENXIO;
vcpu_load(vcpu); vcpu_load(vcpu);
set_bit(irq->irq, vcpu->arch.irq_pending); kvm_queue_interrupt(vcpu, irq->irq, false);
set_bit(irq->irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
vcpu_put(vcpu); vcpu_put(vcpu);
@ -1584,8 +1603,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = -EINVAL; r = -EINVAL;
} }
out: out:
if (lapic) kfree(lapic);
kfree(lapic);
return r; return r;
} }
@ -1606,10 +1624,12 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
return -EINVAL; return -EINVAL;
down_write(&kvm->slots_lock); down_write(&kvm->slots_lock);
spin_lock(&kvm->mmu_lock);
kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
spin_unlock(&kvm->mmu_lock);
up_write(&kvm->slots_lock); up_write(&kvm->slots_lock);
return 0; return 0;
} }
@ -1785,7 +1805,9 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
/* If nothing is dirty, don't bother messing with page tables. */ /* If nothing is dirty, don't bother messing with page tables. */
if (is_dirty) { if (is_dirty) {
spin_lock(&kvm->mmu_lock);
kvm_mmu_slot_remove_write_access(kvm, log->slot); kvm_mmu_slot_remove_write_access(kvm, log->slot);
spin_unlock(&kvm->mmu_lock);
kvm_flush_remote_tlbs(kvm); kvm_flush_remote_tlbs(kvm);
memslot = &kvm->memslots[log->slot]; memslot = &kvm->memslots[log->slot];
n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
@ -2360,7 +2382,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
u16 error_code, u16 error_code,
int emulation_type) int emulation_type)
{ {
int r; int r, shadow_mask;
struct decode_cache *c; struct decode_cache *c;
kvm_clear_exception_queue(vcpu); kvm_clear_exception_queue(vcpu);
@ -2408,7 +2430,16 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
} }
} }
if (emulation_type & EMULTYPE_SKIP) {
kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.decode.eip);
return EMULATE_DONE;
}
r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
shadow_mask = vcpu->arch.emulate_ctxt.interruptibility;
if (r == 0)
kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask);
if (vcpu->arch.pio.string) if (vcpu->arch.pio.string)
return EMULATE_DO_MMIO; return EMULATE_DO_MMIO;
@ -2761,7 +2792,7 @@ int kvm_arch_init(void *opaque)
kvm_mmu_set_nonpresent_ptes(0ull, 0ull); kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
kvm_mmu_set_base_ptes(PT_PRESENT_MASK); kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
PT_DIRTY_MASK, PT64_NX_MASK, 0, 0); PT_DIRTY_MASK, PT64_NX_MASK, 0);
for_each_possible_cpu(cpu) for_each_possible_cpu(cpu)
per_cpu(cpu_tsc_khz, cpu) = tsc_khz; per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
@ -3012,6 +3043,16 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
return best; return best;
} }
int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
if (best)
return best->eax & 0xff;
return 36;
}
void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
{ {
u32 function, index; u32 function, index;
@ -3048,10 +3089,9 @@ EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run) struct kvm_run *kvm_run)
{ {
return (!vcpu->arch.irq_summary && return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
kvm_run->request_interrupt_window && kvm_run->request_interrupt_window &&
vcpu->arch.interrupt_window_open && kvm_arch_interrupt_allowed(vcpu));
(kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
} }
static void post_kvm_run_save(struct kvm_vcpu *vcpu, static void post_kvm_run_save(struct kvm_vcpu *vcpu,
@ -3064,8 +3104,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
kvm_run->ready_for_interrupt_injection = 1; kvm_run->ready_for_interrupt_injection = 1;
else else
kvm_run->ready_for_interrupt_injection = kvm_run->ready_for_interrupt_injection =
(vcpu->arch.interrupt_window_open && kvm_arch_interrupt_allowed(vcpu) &&
vcpu->arch.irq_summary == 0); !kvm_cpu_has_interrupt(vcpu) &&
!kvm_event_needs_reinjection(vcpu);
} }
static void vapic_enter(struct kvm_vcpu *vcpu) static void vapic_enter(struct kvm_vcpu *vcpu)
@ -3094,9 +3135,63 @@ static void vapic_exit(struct kvm_vcpu *vcpu)
up_read(&vcpu->kvm->slots_lock); up_read(&vcpu->kvm->slots_lock);
} }
static void update_cr8_intercept(struct kvm_vcpu *vcpu)
{
int max_irr, tpr;
if (!kvm_x86_ops->update_cr8_intercept)
return;
if (!vcpu->arch.apic->vapic_addr)
max_irr = kvm_lapic_find_highest_irr(vcpu);
else
max_irr = -1;
if (max_irr != -1)
max_irr >>= 4;
tpr = kvm_lapic_get_cr8(vcpu);
kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
}
static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
kvm_x86_ops->set_interrupt_shadow(vcpu, 0);
/* try to reinject previous events if any */
if (vcpu->arch.nmi_injected) {
kvm_x86_ops->set_nmi(vcpu);
return;
}
if (vcpu->arch.interrupt.pending) {
kvm_x86_ops->set_irq(vcpu);
return;
}
/* try to inject new event if pending */
if (vcpu->arch.nmi_pending) {
if (kvm_x86_ops->nmi_allowed(vcpu)) {
vcpu->arch.nmi_pending = false;
vcpu->arch.nmi_injected = true;
kvm_x86_ops->set_nmi(vcpu);
}
} else if (kvm_cpu_has_interrupt(vcpu)) {
if (kvm_x86_ops->interrupt_allowed(vcpu)) {
kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
false);
kvm_x86_ops->set_irq(vcpu);
}
}
}
static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{ {
int r; int r;
bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
kvm_run->request_interrupt_window;
if (vcpu->requests) if (vcpu->requests)
if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
@ -3128,9 +3223,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
} }
} }
clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
kvm_inject_pending_timer_irqs(vcpu);
preempt_disable(); preempt_disable();
kvm_x86_ops->prepare_guest_switch(vcpu); kvm_x86_ops->prepare_guest_switch(vcpu);
@ -3138,6 +3230,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
local_irq_disable(); local_irq_disable();
clear_bit(KVM_REQ_KICK, &vcpu->requests);
smp_mb__after_clear_bit();
if (vcpu->requests || need_resched() || signal_pending(current)) { if (vcpu->requests || need_resched() || signal_pending(current)) {
local_irq_enable(); local_irq_enable();
preempt_enable(); preempt_enable();
@ -3145,21 +3240,21 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
goto out; goto out;
} }
vcpu->guest_mode = 1;
/*
* Make sure that guest_mode assignment won't happen after
* testing the pending IRQ vector bitmap.
*/
smp_wmb();
if (vcpu->arch.exception.pending) if (vcpu->arch.exception.pending)
__queue_exception(vcpu); __queue_exception(vcpu);
else if (irqchip_in_kernel(vcpu->kvm))
kvm_x86_ops->inject_pending_irq(vcpu);
else else
kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run); inject_pending_irq(vcpu, kvm_run);
kvm_lapic_sync_to_vapic(vcpu); /* enable NMI/IRQ window open exits if needed */
if (vcpu->arch.nmi_pending)
kvm_x86_ops->enable_nmi_window(vcpu);
else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
kvm_x86_ops->enable_irq_window(vcpu);
if (kvm_lapic_enabled(vcpu)) {
update_cr8_intercept(vcpu);
kvm_lapic_sync_to_vapic(vcpu);
}
up_read(&vcpu->kvm->slots_lock); up_read(&vcpu->kvm->slots_lock);
@ -3193,7 +3288,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
set_debugreg(vcpu->arch.host_dr6, 6); set_debugreg(vcpu->arch.host_dr6, 6);
set_debugreg(vcpu->arch.host_dr7, 7); set_debugreg(vcpu->arch.host_dr7, 7);
vcpu->guest_mode = 0; set_bit(KVM_REQ_KICK, &vcpu->requests);
local_irq_enable(); local_irq_enable();
++vcpu->stat.exits; ++vcpu->stat.exits;
@ -3220,8 +3315,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
profile_hit(KVM_PROFILING, (void *)rip); profile_hit(KVM_PROFILING, (void *)rip);
} }
if (vcpu->arch.exception.pending && kvm_x86_ops->exception_injected(vcpu))
vcpu->arch.exception.pending = false;
kvm_lapic_sync_from_vapic(vcpu); kvm_lapic_sync_from_vapic(vcpu);
@ -3230,6 +3323,7 @@ out:
return r; return r;
} }
static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{ {
int r; int r;
@ -3256,29 +3350,42 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_vcpu_block(vcpu); kvm_vcpu_block(vcpu);
down_read(&vcpu->kvm->slots_lock); down_read(&vcpu->kvm->slots_lock);
if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) {
switch(vcpu->arch.mp_state) {
case KVM_MP_STATE_HALTED:
vcpu->arch.mp_state = vcpu->arch.mp_state =
KVM_MP_STATE_RUNNABLE; KVM_MP_STATE_RUNNABLE;
if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) case KVM_MP_STATE_RUNNABLE:
r = -EINTR; break;
case KVM_MP_STATE_SIPI_RECEIVED:
default:
r = -EINTR;
break;
}
}
} }
if (r > 0) { if (r <= 0)
if (dm_request_for_irq_injection(vcpu, kvm_run)) { break;
r = -EINTR;
kvm_run->exit_reason = KVM_EXIT_INTR; clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
++vcpu->stat.request_irq_exits; if (kvm_cpu_has_pending_timer(vcpu))
} kvm_inject_pending_timer_irqs(vcpu);
if (signal_pending(current)) {
r = -EINTR; if (dm_request_for_irq_injection(vcpu, kvm_run)) {
kvm_run->exit_reason = KVM_EXIT_INTR; r = -EINTR;
++vcpu->stat.signal_exits; kvm_run->exit_reason = KVM_EXIT_INTR;
} ++vcpu->stat.request_irq_exits;
if (need_resched()) { }
up_read(&vcpu->kvm->slots_lock); if (signal_pending(current)) {
kvm_resched(vcpu); r = -EINTR;
down_read(&vcpu->kvm->slots_lock); kvm_run->exit_reason = KVM_EXIT_INTR;
} ++vcpu->stat.signal_exits;
}
if (need_resched()) {
up_read(&vcpu->kvm->slots_lock);
kvm_resched(vcpu);
down_read(&vcpu->kvm->slots_lock);
} }
} }
@ -3442,7 +3549,6 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs) struct kvm_sregs *sregs)
{ {
struct descriptor_table dt; struct descriptor_table dt;
int pending_vec;
vcpu_load(vcpu); vcpu_load(vcpu);
@ -3472,16 +3578,11 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
sregs->efer = vcpu->arch.shadow_efer; sregs->efer = vcpu->arch.shadow_efer;
sregs->apic_base = kvm_get_apic_base(vcpu); sregs->apic_base = kvm_get_apic_base(vcpu);
if (irqchip_in_kernel(vcpu->kvm)) { memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
memset(sregs->interrupt_bitmap, 0,
sizeof sregs->interrupt_bitmap); if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft)
pending_vec = kvm_x86_ops->get_irq(vcpu); set_bit(vcpu->arch.interrupt.nr,
if (pending_vec >= 0) (unsigned long *)sregs->interrupt_bitmap);
set_bit(pending_vec,
(unsigned long *)sregs->interrupt_bitmap);
} else
memcpy(sregs->interrupt_bitmap, vcpu->arch.irq_pending,
sizeof sregs->interrupt_bitmap);
vcpu_put(vcpu); vcpu_put(vcpu);
@ -3688,7 +3789,6 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu,
tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS); tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS);
tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS); tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS);
tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
} }
static int load_state_from_tss32(struct kvm_vcpu *vcpu, static int load_state_from_tss32(struct kvm_vcpu *vcpu,
@ -3785,8 +3885,8 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu,
} }
static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
u32 old_tss_base, u16 old_tss_sel, u32 old_tss_base,
struct desc_struct *nseg_desc) struct desc_struct *nseg_desc)
{ {
struct tss_segment_16 tss_segment_16; struct tss_segment_16 tss_segment_16;
int ret = 0; int ret = 0;
@ -3805,6 +3905,16 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
&tss_segment_16, sizeof tss_segment_16)) &tss_segment_16, sizeof tss_segment_16))
goto out; goto out;
if (old_tss_sel != 0xffff) {
tss_segment_16.prev_task_link = old_tss_sel;
if (kvm_write_guest(vcpu->kvm,
get_tss_base_addr(vcpu, nseg_desc),
&tss_segment_16.prev_task_link,
sizeof tss_segment_16.prev_task_link))
goto out;
}
if (load_state_from_tss16(vcpu, &tss_segment_16)) if (load_state_from_tss16(vcpu, &tss_segment_16))
goto out; goto out;
@ -3814,7 +3924,7 @@ out:
} }
static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
u32 old_tss_base, u16 old_tss_sel, u32 old_tss_base,
struct desc_struct *nseg_desc) struct desc_struct *nseg_desc)
{ {
struct tss_segment_32 tss_segment_32; struct tss_segment_32 tss_segment_32;
@ -3834,6 +3944,16 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
&tss_segment_32, sizeof tss_segment_32)) &tss_segment_32, sizeof tss_segment_32))
goto out; goto out;
if (old_tss_sel != 0xffff) {
tss_segment_32.prev_task_link = old_tss_sel;
if (kvm_write_guest(vcpu->kvm,
get_tss_base_addr(vcpu, nseg_desc),
&tss_segment_32.prev_task_link,
sizeof tss_segment_32.prev_task_link))
goto out;
}
if (load_state_from_tss32(vcpu, &tss_segment_32)) if (load_state_from_tss32(vcpu, &tss_segment_32))
goto out; goto out;
@ -3887,14 +4007,22 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
} }
kvm_x86_ops->skip_emulated_instruction(vcpu); /* set back link to prev task only if NT bit is set in eflags
note that old_tss_sel is not used afetr this point */
if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
old_tss_sel = 0xffff;
/* set back link to prev task only if NT bit is set in eflags
note that old_tss_sel is not used afetr this point */
if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
old_tss_sel = 0xffff;
if (nseg_desc.type & 8) if (nseg_desc.type & 8)
ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base, ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel,
&nseg_desc); old_tss_base, &nseg_desc);
else else
ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_base, ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel,
&nseg_desc); old_tss_base, &nseg_desc);
if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
u32 eflags = kvm_x86_ops->get_rflags(vcpu); u32 eflags = kvm_x86_ops->get_rflags(vcpu);
@ -3920,7 +4048,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs) struct kvm_sregs *sregs)
{ {
int mmu_reset_needed = 0; int mmu_reset_needed = 0;
int i, pending_vec, max_bits; int pending_vec, max_bits;
struct descriptor_table dt; struct descriptor_table dt;
vcpu_load(vcpu); vcpu_load(vcpu);
@ -3934,7 +4062,13 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
vcpu->arch.cr2 = sregs->cr2; vcpu->arch.cr2 = sregs->cr2;
mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3; mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
vcpu->arch.cr3 = sregs->cr3;
down_read(&vcpu->kvm->slots_lock);
if (gfn_to_memslot(vcpu->kvm, sregs->cr3 >> PAGE_SHIFT))
vcpu->arch.cr3 = sregs->cr3;
else
set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
up_read(&vcpu->kvm->slots_lock);
kvm_set_cr8(vcpu, sregs->cr8); kvm_set_cr8(vcpu, sregs->cr8);
@ -3956,25 +4090,14 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
if (mmu_reset_needed) if (mmu_reset_needed)
kvm_mmu_reset_context(vcpu); kvm_mmu_reset_context(vcpu);
if (!irqchip_in_kernel(vcpu->kvm)) { max_bits = (sizeof sregs->interrupt_bitmap) << 3;
memcpy(vcpu->arch.irq_pending, sregs->interrupt_bitmap, pending_vec = find_first_bit(
sizeof vcpu->arch.irq_pending); (const unsigned long *)sregs->interrupt_bitmap, max_bits);
vcpu->arch.irq_summary = 0; if (pending_vec < max_bits) {
for (i = 0; i < ARRAY_SIZE(vcpu->arch.irq_pending); ++i) kvm_queue_interrupt(vcpu, pending_vec, false);
if (vcpu->arch.irq_pending[i]) pr_debug("Set back pending irq %d\n", pending_vec);
__set_bit(i, &vcpu->arch.irq_summary); if (irqchip_in_kernel(vcpu->kvm))
} else { kvm_pic_clear_isr_ack(vcpu->kvm);
max_bits = (sizeof sregs->interrupt_bitmap) << 3;
pending_vec = find_first_bit(
(const unsigned long *)sregs->interrupt_bitmap,
max_bits);
/* Only pending external irq is handled here */
if (pending_vec < max_bits) {
kvm_x86_ops->set_irq(vcpu, pending_vec);
pr_debug("Set back pending irq %d\n",
pending_vec);
}
kvm_pic_clear_isr_ack(vcpu->kvm);
} }
kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
@ -4308,7 +4431,6 @@ struct kvm *kvm_arch_create_vm(void)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.oos_global_pages);
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
@ -4411,12 +4533,14 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
} }
} }
spin_lock(&kvm->mmu_lock);
if (!kvm->arch.n_requested_mmu_pages) { if (!kvm->arch.n_requested_mmu_pages) {
unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
} }
kvm_mmu_slot_remove_write_access(kvm, mem->slot); kvm_mmu_slot_remove_write_access(kvm, mem->slot);
spin_unlock(&kvm->mmu_lock);
kvm_flush_remote_tlbs(kvm); kvm_flush_remote_tlbs(kvm);
return 0; return 0;
@ -4425,6 +4549,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
void kvm_arch_flush_shadow(struct kvm *kvm) void kvm_arch_flush_shadow(struct kvm *kvm)
{ {
kvm_mmu_zap_all(kvm); kvm_mmu_zap_all(kvm);
kvm_reload_remote_mmus(kvm);
} }
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
@ -4434,28 +4559,24 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
|| vcpu->arch.nmi_pending; || vcpu->arch.nmi_pending;
} }
static void vcpu_kick_intr(void *info)
{
#ifdef DEBUG
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu);
#endif
}
void kvm_vcpu_kick(struct kvm_vcpu *vcpu) void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
{ {
int ipi_pcpu = vcpu->cpu; int me;
int cpu = get_cpu(); int cpu = vcpu->cpu;
if (waitqueue_active(&vcpu->wq)) { if (waitqueue_active(&vcpu->wq)) {
wake_up_interruptible(&vcpu->wq); wake_up_interruptible(&vcpu->wq);
++vcpu->stat.halt_wakeup; ++vcpu->stat.halt_wakeup;
} }
/*
* We may be called synchronously with irqs disabled in guest mode, me = get_cpu();
* So need not to call smp_call_function_single() in that case. if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
*/ if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
if (vcpu->guest_mode && vcpu->cpu != cpu) smp_send_reschedule(cpu);
smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0);
put_cpu(); put_cpu();
} }
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
{
return kvm_x86_ops->interrupt_allowed(vcpu);
}

View File

@ -8,9 +8,11 @@ static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
vcpu->arch.exception.pending = false; vcpu->arch.exception.pending = false;
} }
static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector) static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
bool soft)
{ {
vcpu->arch.interrupt.pending = true; vcpu->arch.interrupt.pending = true;
vcpu->arch.interrupt.soft = soft;
vcpu->arch.interrupt.nr = vector; vcpu->arch.interrupt.nr = vector;
} }
@ -19,4 +21,14 @@ static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu)
vcpu->arch.interrupt.pending = false; vcpu->arch.interrupt.pending = false;
} }
static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
{
return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending ||
vcpu->arch.nmi_injected;
}
static inline bool kvm_exception_is_soft(unsigned int nr)
{
return (nr == BP_VECTOR) || (nr == OF_VECTOR);
}
#endif #endif

View File

@ -59,13 +59,14 @@
#define SrcImm (5<<4) /* Immediate operand. */ #define SrcImm (5<<4) /* Immediate operand. */
#define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */ #define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */
#define SrcOne (7<<4) /* Implied '1' */ #define SrcOne (7<<4) /* Implied '1' */
#define SrcMask (7<<4) #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */
#define SrcMask (0xf<<4)
/* Generic ModRM decode. */ /* Generic ModRM decode. */
#define ModRM (1<<7) #define ModRM (1<<8)
/* Destination is only written; never read. */ /* Destination is only written; never read. */
#define Mov (1<<8) #define Mov (1<<9)
#define BitOp (1<<9) #define BitOp (1<<10)
#define MemAbs (1<<10) /* Memory operand is absolute displacement */ #define MemAbs (1<<11) /* Memory operand is absolute displacement */
#define String (1<<12) /* String instruction (rep capable) */ #define String (1<<12) /* String instruction (rep capable) */
#define Stack (1<<13) /* Stack instruction (push/pop) */ #define Stack (1<<13) /* Stack instruction (push/pop) */
#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
@ -76,6 +77,7 @@
#define Src2CL (1<<29) #define Src2CL (1<<29)
#define Src2ImmByte (2<<29) #define Src2ImmByte (2<<29)
#define Src2One (3<<29) #define Src2One (3<<29)
#define Src2Imm16 (4<<29)
#define Src2Mask (7<<29) #define Src2Mask (7<<29)
enum { enum {
@ -135,11 +137,11 @@ static u32 opcode_table[256] = {
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */
/* 0x70 - 0x77 */ /* 0x70 - 0x77 */
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
/* 0x78 - 0x7F */ /* 0x78 - 0x7F */
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
/* 0x80 - 0x87 */ /* 0x80 - 0x87 */
Group | Group1_80, Group | Group1_81, Group | Group1_80, Group | Group1_81,
Group | Group1_82, Group | Group1_83, Group | Group1_82, Group | Group1_83,
@ -153,7 +155,8 @@ static u32 opcode_table[256] = {
/* 0x90 - 0x97 */ /* 0x90 - 0x97 */
DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
/* 0x98 - 0x9F */ /* 0x98 - 0x9F */
0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, 0, 0, SrcImm | Src2Imm16, 0,
ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
/* 0xA0 - 0xA7 */ /* 0xA0 - 0xA7 */
ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs,
@ -178,7 +181,8 @@ static u32 opcode_table[256] = {
0, ImplicitOps | Stack, 0, 0, 0, ImplicitOps | Stack, 0, 0,
ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov, ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
/* 0xC8 - 0xCF */ /* 0xC8 - 0xCF */
0, 0, 0, ImplicitOps | Stack, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | Stack,
ImplicitOps, SrcImmByte, ImplicitOps, ImplicitOps,
/* 0xD0 - 0xD7 */ /* 0xD0 - 0xD7 */
ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
@ -187,11 +191,11 @@ static u32 opcode_table[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0xE0 - 0xE7 */ /* 0xE0 - 0xE7 */
0, 0, 0, 0, 0, 0, 0, 0,
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, ByteOp | SrcImmUByte, SrcImmUByte,
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, ByteOp | SrcImmUByte, SrcImmUByte,
/* 0xE8 - 0xEF */ /* 0xE8 - 0xEF */
ImplicitOps | Stack, SrcImm | ImplicitOps, SrcImm | Stack, SrcImm | ImplicitOps,
ImplicitOps, SrcImmByte | ImplicitOps, SrcImm | Src2Imm16, SrcImmByte | ImplicitOps,
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
/* 0xF0 - 0xF7 */ /* 0xF0 - 0xF7 */
@ -230,10 +234,8 @@ static u32 twobyte_table[256] = {
/* 0x70 - 0x7F */ /* 0x70 - 0x7F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0x80 - 0x8F */ /* 0x80 - 0x8F */
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
/* 0x90 - 0x9F */ /* 0x90 - 0x9F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0xA0 - 0xA7 */ /* 0xA0 - 0xA7 */
@ -1044,10 +1046,14 @@ done_prefixes:
} }
break; break;
case SrcImmByte: case SrcImmByte:
case SrcImmUByte:
c->src.type = OP_IMM; c->src.type = OP_IMM;
c->src.ptr = (unsigned long *)c->eip; c->src.ptr = (unsigned long *)c->eip;
c->src.bytes = 1; c->src.bytes = 1;
c->src.val = insn_fetch(s8, 1, c->eip); if ((c->d & SrcMask) == SrcImmByte)
c->src.val = insn_fetch(s8, 1, c->eip);
else
c->src.val = insn_fetch(u8, 1, c->eip);
break; break;
case SrcOne: case SrcOne:
c->src.bytes = 1; c->src.bytes = 1;
@ -1072,6 +1078,12 @@ done_prefixes:
c->src2.bytes = 1; c->src2.bytes = 1;
c->src2.val = insn_fetch(u8, 1, c->eip); c->src2.val = insn_fetch(u8, 1, c->eip);
break; break;
case Src2Imm16:
c->src2.type = OP_IMM;
c->src2.ptr = (unsigned long *)c->eip;
c->src2.bytes = 2;
c->src2.val = insn_fetch(u16, 2, c->eip);
break;
case Src2One: case Src2One:
c->src2.bytes = 1; c->src2.bytes = 1;
c->src2.val = 1; c->src2.val = 1;
@ -1349,6 +1361,20 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
return 0; return 0;
} }
void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask)
{
u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask);
/*
* an sti; sti; sequence only disable interrupts for the first
* instruction. So, if the last instruction, be it emulated or
* not, left the system with the INT_STI flag enabled, it
* means that the last instruction is an sti. We should not
* leave the flag on in this case. The same goes for mov ss
*/
if (!(int_shadow & mask))
ctxt->interruptibility = mask;
}
int int
x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
{ {
@ -1360,6 +1386,8 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
int io_dir_in; int io_dir_in;
int rc = 0; int rc = 0;
ctxt->interruptibility = 0;
/* Shadow copy of register state. Committed on successful emulation. /* Shadow copy of register state. Committed on successful emulation.
* NOTE: we can copy them from vcpu as x86_decode_insn() doesn't * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't
* modify them. * modify them.
@ -1531,13 +1559,10 @@ special_insn:
return -1; return -1;
} }
return 0; return 0;
case 0x70 ... 0x7f: /* jcc (short) */ { case 0x70 ... 0x7f: /* jcc (short) */
int rel = insn_fetch(s8, 1, c->eip);
if (test_cc(c->b, ctxt->eflags)) if (test_cc(c->b, ctxt->eflags))
jmp_rel(c, rel); jmp_rel(c, c->src.val);
break; break;
}
case 0x80 ... 0x83: /* Grp1 */ case 0x80 ... 0x83: /* Grp1 */
switch (c->modrm_reg) { switch (c->modrm_reg) {
case 0: case 0:
@ -1609,6 +1634,9 @@ special_insn:
int err; int err;
sel = c->src.val; sel = c->src.val;
if (c->modrm_reg == VCPU_SREG_SS)
toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS);
if (c->modrm_reg <= 5) { if (c->modrm_reg <= 5) {
type_bits = (c->modrm_reg == 1) ? 9 : 1; type_bits = (c->modrm_reg == 1) ? 9 : 1;
err = kvm_load_segment_descriptor(ctxt->vcpu, sel, err = kvm_load_segment_descriptor(ctxt->vcpu, sel,
@ -1769,59 +1797,32 @@ special_insn:
break; break;
case 0xe4: /* inb */ case 0xe4: /* inb */
case 0xe5: /* in */ case 0xe5: /* in */
port = insn_fetch(u8, 1, c->eip); port = c->src.val;
io_dir_in = 1; io_dir_in = 1;
goto do_io; goto do_io;
case 0xe6: /* outb */ case 0xe6: /* outb */
case 0xe7: /* out */ case 0xe7: /* out */
port = insn_fetch(u8, 1, c->eip); port = c->src.val;
io_dir_in = 0; io_dir_in = 0;
goto do_io; goto do_io;
case 0xe8: /* call (near) */ { case 0xe8: /* call (near) */ {
long int rel; long int rel = c->src.val;
switch (c->op_bytes) {
case 2:
rel = insn_fetch(s16, 2, c->eip);
break;
case 4:
rel = insn_fetch(s32, 4, c->eip);
break;
default:
DPRINTF("Call: Invalid op_bytes\n");
goto cannot_emulate;
}
c->src.val = (unsigned long) c->eip; c->src.val = (unsigned long) c->eip;
jmp_rel(c, rel); jmp_rel(c, rel);
c->op_bytes = c->ad_bytes;
emulate_push(ctxt); emulate_push(ctxt);
break; break;
} }
case 0xe9: /* jmp rel */ case 0xe9: /* jmp rel */
goto jmp; goto jmp;
case 0xea: /* jmp far */ { case 0xea: /* jmp far */
uint32_t eip; if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, 9,
uint16_t sel; VCPU_SREG_CS) < 0) {
switch (c->op_bytes) {
case 2:
eip = insn_fetch(u16, 2, c->eip);
break;
case 4:
eip = insn_fetch(u32, 4, c->eip);
break;
default:
DPRINTF("jmp far: Invalid op_bytes\n");
goto cannot_emulate;
}
sel = insn_fetch(u16, 2, c->eip);
if (kvm_load_segment_descriptor(ctxt->vcpu, sel, 9, VCPU_SREG_CS) < 0) {
DPRINTF("jmp far: Failed to load CS descriptor\n"); DPRINTF("jmp far: Failed to load CS descriptor\n");
goto cannot_emulate; goto cannot_emulate;
} }
c->eip = eip; c->eip = c->src.val;
break; break;
}
case 0xeb: case 0xeb:
jmp: /* jmp rel short */ jmp: /* jmp rel short */
jmp_rel(c, c->src.val); jmp_rel(c, c->src.val);
@ -1865,6 +1866,7 @@ special_insn:
c->dst.type = OP_NONE; /* Disable writeback. */ c->dst.type = OP_NONE; /* Disable writeback. */
break; break;
case 0xfb: /* sti */ case 0xfb: /* sti */
toggle_interruptibility(ctxt, X86_SHADOW_INT_STI);
ctxt->eflags |= X86_EFLAGS_IF; ctxt->eflags |= X86_EFLAGS_IF;
c->dst.type = OP_NONE; /* Disable writeback. */ c->dst.type = OP_NONE; /* Disable writeback. */
break; break;
@ -2039,28 +2041,11 @@ twobyte_insn:
if (!test_cc(c->b, ctxt->eflags)) if (!test_cc(c->b, ctxt->eflags))
c->dst.type = OP_NONE; /* no writeback */ c->dst.type = OP_NONE; /* no writeback */
break; break;
case 0x80 ... 0x8f: /* jnz rel, etc*/ { case 0x80 ... 0x8f: /* jnz rel, etc*/
long int rel;
switch (c->op_bytes) {
case 2:
rel = insn_fetch(s16, 2, c->eip);
break;
case 4:
rel = insn_fetch(s32, 4, c->eip);
break;
case 8:
rel = insn_fetch(s64, 8, c->eip);
break;
default:
DPRINTF("jnz: Invalid op_bytes\n");
goto cannot_emulate;
}
if (test_cc(c->b, ctxt->eflags)) if (test_cc(c->b, ctxt->eflags))
jmp_rel(c, rel); jmp_rel(c, c->src.val);
c->dst.type = OP_NONE; c->dst.type = OP_NONE;
break; break;
}
case 0xa3: case 0xa3:
bt: /* bt */ bt: /* bt */
c->dst.type = OP_NONE; c->dst.type = OP_NONE;

View File

@ -119,7 +119,7 @@ struct kvm_run {
__u32 error_code; __u32 error_code;
} ex; } ex;
/* KVM_EXIT_IO */ /* KVM_EXIT_IO */
struct kvm_io { struct {
#define KVM_EXIT_IO_IN 0 #define KVM_EXIT_IO_IN 0
#define KVM_EXIT_IO_OUT 1 #define KVM_EXIT_IO_OUT 1
__u8 direction; __u8 direction;
@ -224,10 +224,10 @@ struct kvm_interrupt {
/* for KVM_GET_DIRTY_LOG */ /* for KVM_GET_DIRTY_LOG */
struct kvm_dirty_log { struct kvm_dirty_log {
__u32 slot; __u32 slot;
__u32 padding; __u32 padding1;
union { union {
void __user *dirty_bitmap; /* one bit per page */ void __user *dirty_bitmap; /* one bit per page */
__u64 padding; __u64 padding2;
}; };
}; };
@ -409,6 +409,10 @@ struct kvm_trace_rec {
#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
#define KVM_CAP_DEVICE_DEASSIGNMENT 27 #define KVM_CAP_DEVICE_DEASSIGNMENT 27
#endif #endif
#ifdef __KVM_HAVE_MSIX
#define KVM_CAP_DEVICE_MSIX 28
#endif
#define KVM_CAP_ASSIGN_DEV_IRQ 29
/* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
#define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
@ -482,11 +486,18 @@ struct kvm_irq_routing {
#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ #define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
struct kvm_assigned_pci_dev) struct kvm_assigned_pci_dev)
#define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) #define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing)
/* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */
#define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
struct kvm_assigned_irq) struct kvm_assigned_irq)
#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq)
#define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71)
#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ #define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \
struct kvm_assigned_pci_dev) struct kvm_assigned_pci_dev)
#define KVM_ASSIGN_SET_MSIX_NR \
_IOW(KVMIO, 0x73, struct kvm_assigned_msix_nr)
#define KVM_ASSIGN_SET_MSIX_ENTRY \
_IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry)
#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
/* /*
* ioctls for vcpu fds * ioctls for vcpu fds
@ -577,6 +588,8 @@ struct kvm_debug_guest {
#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) #define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18)
#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) #define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19)
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
struct kvm_assigned_pci_dev { struct kvm_assigned_pci_dev {
__u32 assigned_dev_id; __u32 assigned_dev_id;
__u32 busnr; __u32 busnr;
@ -587,6 +600,17 @@ struct kvm_assigned_pci_dev {
}; };
}; };
#define KVM_DEV_IRQ_HOST_INTX (1 << 0)
#define KVM_DEV_IRQ_HOST_MSI (1 << 1)
#define KVM_DEV_IRQ_HOST_MSIX (1 << 2)
#define KVM_DEV_IRQ_GUEST_INTX (1 << 8)
#define KVM_DEV_IRQ_GUEST_MSI (1 << 9)
#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10)
#define KVM_DEV_IRQ_HOST_MASK 0x00ff
#define KVM_DEV_IRQ_GUEST_MASK 0xff00
struct kvm_assigned_irq { struct kvm_assigned_irq {
__u32 assigned_dev_id; __u32 assigned_dev_id;
__u32 host_irq; __u32 host_irq;
@ -602,9 +626,19 @@ struct kvm_assigned_irq {
}; };
}; };
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
#define KVM_DEV_IRQ_ASSIGN_MSI_ACTION KVM_DEV_IRQ_ASSIGN_ENABLE_MSI struct kvm_assigned_msix_nr {
#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0) __u32 assigned_dev_id;
__u16 entry_nr;
__u16 padding;
};
#define KVM_MAX_MSIX_PER_DEV 512
struct kvm_assigned_msix_entry {
__u32 assigned_dev_id;
__u32 gsi;
__u16 entry; /* The index of entry in the MSI-X table */
__u16 padding[3];
};
#endif #endif

View File

@ -38,6 +38,7 @@
#define KVM_REQ_UNHALT 6 #define KVM_REQ_UNHALT 6
#define KVM_REQ_MMU_SYNC 7 #define KVM_REQ_MMU_SYNC 7
#define KVM_REQ_KVMCLOCK_UPDATE 8 #define KVM_REQ_KVMCLOCK_UPDATE 8
#define KVM_REQ_KICK 9
#define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_USERSPACE_IRQ_SOURCE_ID 0
@ -72,7 +73,6 @@ struct kvm_vcpu {
struct mutex mutex; struct mutex mutex;
int cpu; int cpu;
struct kvm_run *run; struct kvm_run *run;
int guest_mode;
unsigned long requests; unsigned long requests;
unsigned long guest_debug; unsigned long guest_debug;
int fpu_active; int fpu_active;
@ -298,6 +298,7 @@ int kvm_arch_hardware_setup(void);
void kvm_arch_hardware_unsetup(void); void kvm_arch_hardware_unsetup(void);
void kvm_arch_check_processor_compat(void *rtn); void kvm_arch_check_processor_compat(void *rtn);
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
void kvm_free_physmem(struct kvm *kvm); void kvm_free_physmem(struct kvm *kvm);
@ -319,6 +320,13 @@ struct kvm_irq_ack_notifier {
void (*irq_acked)(struct kvm_irq_ack_notifier *kian); void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
}; };
#define KVM_ASSIGNED_MSIX_PENDING 0x1
struct kvm_guest_msix_entry {
u32 vector;
u16 entry;
u16 flags;
};
struct kvm_assigned_dev_kernel { struct kvm_assigned_dev_kernel {
struct kvm_irq_ack_notifier ack_notifier; struct kvm_irq_ack_notifier ack_notifier;
struct work_struct interrupt_work; struct work_struct interrupt_work;
@ -326,18 +334,18 @@ struct kvm_assigned_dev_kernel {
int assigned_dev_id; int assigned_dev_id;
int host_busnr; int host_busnr;
int host_devfn; int host_devfn;
unsigned int entries_nr;
int host_irq; int host_irq;
bool host_irq_disabled; bool host_irq_disabled;
struct msix_entry *host_msix_entries;
int guest_irq; int guest_irq;
#define KVM_ASSIGNED_DEV_GUEST_INTX (1 << 0) struct kvm_guest_msix_entry *guest_msix_entries;
#define KVM_ASSIGNED_DEV_GUEST_MSI (1 << 1)
#define KVM_ASSIGNED_DEV_HOST_INTX (1 << 8)
#define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9)
unsigned long irq_requested_type; unsigned long irq_requested_type;
int irq_source_id; int irq_source_id;
int flags; int flags;
struct pci_dev *dev; struct pci_dev *dev;
struct kvm *kvm; struct kvm *kvm;
spinlock_t assigned_dev_lock;
}; };
struct kvm_irq_mask_notifier { struct kvm_irq_mask_notifier {
@ -360,6 +368,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
int kvm_request_irq_source_id(struct kvm *kvm); int kvm_request_irq_source_id(struct kvm *kvm);
void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
/* For vcpu->arch.iommu_flags */
#define KVM_IOMMU_CACHE_COHERENCY 0x1
#ifdef CONFIG_IOMMU_API #ifdef CONFIG_IOMMU_API
int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
unsigned long npages); unsigned long npages);

View File

@ -40,4 +40,31 @@ typedef unsigned long hfn_t;
typedef hfn_t pfn_t; typedef hfn_t pfn_t;
union kvm_ioapic_redirect_entry {
u64 bits;
struct {
u8 vector;
u8 delivery_mode:3;
u8 dest_mode:1;
u8 delivery_status:1;
u8 polarity:1;
u8 remote_irr:1;
u8 trig_mode:1;
u8 mask:1;
u8 reserve:7;
u8 reserved[4];
u8 dest_id;
} fields;
};
struct kvm_lapic_irq {
u32 vector;
u32 delivery_mode;
u32 dest_mode;
u32 level;
u32 trig_mode;
u32 shorthand;
u32 dest_id;
};
#endif /* __KVM_TYPES_H__ */ #endif /* __KVM_TYPES_H__ */

View File

@ -85,7 +85,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
{ {
union ioapic_redir_entry *pent; union kvm_ioapic_redirect_entry *pent;
int injected = -1; int injected = -1;
pent = &ioapic->redirtbl[idx]; pent = &ioapic->redirtbl[idx];
@ -142,149 +142,40 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
} }
} }
static int ioapic_inj_irq(struct kvm_ioapic *ioapic,
struct kvm_vcpu *vcpu,
u8 vector, u8 trig_mode, u8 delivery_mode)
{
ioapic_debug("irq %d trig %d deliv %d\n", vector, trig_mode,
delivery_mode);
ASSERT((delivery_mode == IOAPIC_FIXED) ||
(delivery_mode == IOAPIC_LOWEST_PRIORITY));
return kvm_apic_set_irq(vcpu, vector, trig_mode);
}
static void ioapic_inj_nmi(struct kvm_vcpu *vcpu)
{
kvm_inject_nmi(vcpu);
kvm_vcpu_kick(vcpu);
}
u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
u8 dest_mode)
{
u32 mask = 0;
int i;
struct kvm *kvm = ioapic->kvm;
struct kvm_vcpu *vcpu;
ioapic_debug("dest %d dest_mode %d\n", dest, dest_mode);
if (dest_mode == 0) { /* Physical mode. */
if (dest == 0xFF) { /* Broadcast. */
for (i = 0; i < KVM_MAX_VCPUS; ++i)
if (kvm->vcpus[i] && kvm->vcpus[i]->arch.apic)
mask |= 1 << i;
return mask;
}
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
vcpu = kvm->vcpus[i];
if (!vcpu)
continue;
if (kvm_apic_match_physical_addr(vcpu->arch.apic, dest)) {
if (vcpu->arch.apic)
mask = 1 << i;
break;
}
}
} else if (dest != 0) /* Logical mode, MDA non-zero. */
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
vcpu = kvm->vcpus[i];
if (!vcpu)
continue;
if (vcpu->arch.apic &&
kvm_apic_match_logical_addr(vcpu->arch.apic, dest))
mask |= 1 << vcpu->vcpu_id;
}
ioapic_debug("mask %x\n", mask);
return mask;
}
static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
{ {
u8 dest = ioapic->redirtbl[irq].fields.dest_id; union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
u8 dest_mode = ioapic->redirtbl[irq].fields.dest_mode; struct kvm_lapic_irq irqe;
u8 delivery_mode = ioapic->redirtbl[irq].fields.delivery_mode;
u8 vector = ioapic->redirtbl[irq].fields.vector;
u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode;
u32 deliver_bitmask;
struct kvm_vcpu *vcpu;
int vcpu_id, r = -1;
ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
"vector=%x trig_mode=%x\n", "vector=%x trig_mode=%x\n",
dest, dest_mode, delivery_mode, vector, trig_mode); entry->fields.dest, entry->fields.dest_mode,
entry->fields.delivery_mode, entry->fields.vector,
entry->fields.trig_mode);
deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, dest, irqe.dest_id = entry->fields.dest_id;
dest_mode); irqe.vector = entry->fields.vector;
if (!deliver_bitmask) { irqe.dest_mode = entry->fields.dest_mode;
ioapic_debug("no target on destination\n"); irqe.trig_mode = entry->fields.trig_mode;
return 0; irqe.delivery_mode = entry->fields.delivery_mode << 8;
} irqe.level = 1;
irqe.shorthand = 0;
switch (delivery_mode) {
case IOAPIC_LOWEST_PRIORITY:
vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
deliver_bitmask);
#ifdef CONFIG_X86 #ifdef CONFIG_X86
if (irq == 0) /* Always delivery PIT interrupt to vcpu 0 */
vcpu = ioapic->kvm->vcpus[0]; if (irq == 0) {
#endif irqe.dest_mode = 0; /* Physical mode. */
if (vcpu != NULL) irqe.dest_id = ioapic->kvm->vcpus[0]->vcpu_id;
r = ioapic_inj_irq(ioapic, vcpu, vector,
trig_mode, delivery_mode);
else
ioapic_debug("null lowest prio vcpu: "
"mask=%x vector=%x delivery_mode=%x\n",
deliver_bitmask, vector, IOAPIC_LOWEST_PRIORITY);
break;
case IOAPIC_FIXED:
#ifdef CONFIG_X86
if (irq == 0)
deliver_bitmask = 1;
#endif
for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
if (!(deliver_bitmask & (1 << vcpu_id)))
continue;
deliver_bitmask &= ~(1 << vcpu_id);
vcpu = ioapic->kvm->vcpus[vcpu_id];
if (vcpu) {
if (r < 0)
r = 0;
r += ioapic_inj_irq(ioapic, vcpu, vector,
trig_mode, delivery_mode);
}
}
break;
case IOAPIC_NMI:
for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
if (!(deliver_bitmask & (1 << vcpu_id)))
continue;
deliver_bitmask &= ~(1 << vcpu_id);
vcpu = ioapic->kvm->vcpus[vcpu_id];
if (vcpu) {
ioapic_inj_nmi(vcpu);
r = 1;
}
else
ioapic_debug("NMI to vcpu %d failed\n",
vcpu->vcpu_id);
}
break;
default:
printk(KERN_WARNING "Unsupported delivery mode %d\n",
delivery_mode);
break;
} }
return r; #endif
return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe);
} }
int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
{ {
u32 old_irr = ioapic->irr; u32 old_irr = ioapic->irr;
u32 mask = 1 << irq; u32 mask = 1 << irq;
union ioapic_redir_entry entry; union kvm_ioapic_redirect_entry entry;
int ret = 1; int ret = 1;
if (irq >= 0 && irq < IOAPIC_NUM_PINS) { if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
@ -305,7 +196,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin, static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
int trigger_mode) int trigger_mode)
{ {
union ioapic_redir_entry *ent; union kvm_ioapic_redirect_entry *ent;
ent = &ioapic->redirtbl[pin]; ent = &ioapic->redirtbl[pin];

View File

@ -40,22 +40,7 @@ struct kvm_ioapic {
u32 id; u32 id;
u32 irr; u32 irr;
u32 pad; u32 pad;
union ioapic_redir_entry { union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS];
u64 bits;
struct {
u8 vector;
u8 delivery_mode:3;
u8 dest_mode:1;
u8 delivery_status:1;
u8 polarity:1;
u8 remote_irr:1;
u8 trig_mode:1;
u8 mask:1;
u8 reserve:7;
u8 reserved[4];
u8 dest_id;
} fields;
} redirtbl[IOAPIC_NUM_PINS];
struct kvm_io_device dev; struct kvm_io_device dev;
struct kvm *kvm; struct kvm *kvm;
void (*ack_notifier)(void *opaque, int irq); void (*ack_notifier)(void *opaque, int irq);
@ -79,13 +64,13 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
return kvm->arch.vioapic; return kvm->arch.vioapic;
} }
struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
unsigned long bitmap); int short_hand, int dest, int dest_mode);
int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
int kvm_ioapic_init(struct kvm *kvm); int kvm_ioapic_init(struct kvm *kvm);
int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
void kvm_ioapic_reset(struct kvm_ioapic *ioapic); void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
u8 dest_mode); struct kvm_lapic_irq *irq);
#endif #endif

View File

@ -39,11 +39,16 @@ int kvm_iommu_map_pages(struct kvm *kvm,
pfn_t pfn; pfn_t pfn;
int i, r = 0; int i, r = 0;
struct iommu_domain *domain = kvm->arch.iommu_domain; struct iommu_domain *domain = kvm->arch.iommu_domain;
int flags;
/* check if iommu exists and in use */ /* check if iommu exists and in use */
if (!domain) if (!domain)
return 0; return 0;
flags = IOMMU_READ | IOMMU_WRITE;
if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)
flags |= IOMMU_CACHE;
for (i = 0; i < npages; i++) { for (i = 0; i < npages; i++) {
/* check if already mapped */ /* check if already mapped */
if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn)))
@ -53,8 +58,7 @@ int kvm_iommu_map_pages(struct kvm *kvm,
r = iommu_map_range(domain, r = iommu_map_range(domain,
gfn_to_gpa(gfn), gfn_to_gpa(gfn),
pfn_to_hpa(pfn), pfn_to_hpa(pfn),
PAGE_SIZE, PAGE_SIZE, flags);
IOMMU_READ | IOMMU_WRITE);
if (r) { if (r) {
printk(KERN_ERR "kvm_iommu_map_address:" printk(KERN_ERR "kvm_iommu_map_address:"
"iommu failed to map pfn=%lx\n", pfn); "iommu failed to map pfn=%lx\n", pfn);
@ -88,7 +92,7 @@ int kvm_assign_device(struct kvm *kvm,
{ {
struct pci_dev *pdev = NULL; struct pci_dev *pdev = NULL;
struct iommu_domain *domain = kvm->arch.iommu_domain; struct iommu_domain *domain = kvm->arch.iommu_domain;
int r; int r, last_flags;
/* check if iommu exists and in use */ /* check if iommu exists and in use */
if (!domain) if (!domain)
@ -107,12 +111,29 @@ int kvm_assign_device(struct kvm *kvm,
return r; return r;
} }
last_flags = kvm->arch.iommu_flags;
if (iommu_domain_has_cap(kvm->arch.iommu_domain,
IOMMU_CAP_CACHE_COHERENCY))
kvm->arch.iommu_flags |= KVM_IOMMU_CACHE_COHERENCY;
/* Check if need to update IOMMU page table for guest memory */
if ((last_flags ^ kvm->arch.iommu_flags) ==
KVM_IOMMU_CACHE_COHERENCY) {
kvm_iommu_unmap_memslots(kvm);
r = kvm_iommu_map_memslots(kvm);
if (r)
goto out_unmap;
}
printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n", printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n",
assigned_dev->host_busnr, assigned_dev->host_busnr,
PCI_SLOT(assigned_dev->host_devfn), PCI_SLOT(assigned_dev->host_devfn),
PCI_FUNC(assigned_dev->host_devfn)); PCI_FUNC(assigned_dev->host_devfn));
return 0; return 0;
out_unmap:
kvm_iommu_unmap_memslots(kvm);
return r;
} }
int kvm_deassign_device(struct kvm *kvm, int kvm_deassign_device(struct kvm *kvm,

View File

@ -22,6 +22,9 @@
#include <linux/kvm_host.h> #include <linux/kvm_host.h>
#include <asm/msidef.h> #include <asm/msidef.h>
#ifdef CONFIG_IA64
#include <asm/iosapic.h>
#endif
#include "irq.h" #include "irq.h"
@ -43,55 +46,71 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level); return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level);
} }
inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
{
#ifdef CONFIG_IA64
return irq->delivery_mode ==
(IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
#else
return irq->delivery_mode == APIC_DM_LOWEST;
#endif
}
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq)
{
int i, r = -1;
struct kvm_vcpu *vcpu, *lowest = NULL;
if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
kvm_is_dm_lowest_prio(irq))
printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
for (i = 0; i < KVM_MAX_VCPUS; i++) {
vcpu = kvm->vcpus[i];
if (!vcpu || !kvm_apic_present(vcpu))
continue;
if (!kvm_apic_match_dest(vcpu, src, irq->shorthand,
irq->dest_id, irq->dest_mode))
continue;
if (!kvm_is_dm_lowest_prio(irq)) {
if (r < 0)
r = 0;
r += kvm_apic_set_irq(vcpu, irq);
} else {
if (!lowest)
lowest = vcpu;
else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
lowest = vcpu;
}
}
if (lowest)
r = kvm_apic_set_irq(lowest, irq);
return r;
}
static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int level) struct kvm *kvm, int level)
{ {
int vcpu_id, r = -1; struct kvm_lapic_irq irq;
struct kvm_vcpu *vcpu;
struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
int dest_id = (e->msi.address_lo & MSI_ADDR_DEST_ID_MASK)
>> MSI_ADDR_DEST_ID_SHIFT;
int vector = (e->msi.data & MSI_DATA_VECTOR_MASK)
>> MSI_DATA_VECTOR_SHIFT;
int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT,
(unsigned long *)&e->msi.address_lo);
int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT,
(unsigned long *)&e->msi.data);
int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT,
(unsigned long *)&e->msi.data);
u32 deliver_bitmask;
BUG_ON(!ioapic); irq.dest_id = (e->msi.address_lo &
MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
irq.vector = (e->msi.data &
MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
irq.dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
irq.trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
irq.delivery_mode = e->msi.data & 0x700;
irq.level = 1;
irq.shorthand = 0;
deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, /* TODO Deal with RH bit of MSI message address */
dest_id, dest_mode); return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
/* IOAPIC delivery mode value is the same as MSI here */
switch (delivery_mode) {
case IOAPIC_LOWEST_PRIORITY:
vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
deliver_bitmask);
if (vcpu != NULL)
r = kvm_apic_set_irq(vcpu, vector, trig_mode);
else
printk(KERN_INFO "kvm: null lowest priority vcpu!\n");
break;
case IOAPIC_FIXED:
for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
if (!(deliver_bitmask & (1 << vcpu_id)))
continue;
deliver_bitmask &= ~(1 << vcpu_id);
vcpu = ioapic->kvm->vcpus[vcpu_id];
if (vcpu) {
if (r < 0)
r = 0;
r += kvm_apic_set_irq(vcpu, vector, trig_mode);
}
}
break;
default:
break;
}
return r;
} }
/* This should be called with the kvm->lock mutex held /* This should be called with the kvm->lock mutex held
@ -252,7 +271,7 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
delta = 8; delta = 8;
break; break;
case KVM_IRQCHIP_IOAPIC: case KVM_IRQCHIP_IOAPIC:
e->set = kvm_set_ioapic_irq; e->set = kvm_set_ioapic_irq;
break; break;
default: default:
goto out; goto out;

View File

@ -41,6 +41,8 @@
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/mman.h> #include <linux/mman.h>
#include <linux/swap.h> #include <linux/swap.h>
#include <linux/bitops.h>
#include <linux/spinlock.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/io.h> #include <asm/io.h>
@ -60,9 +62,6 @@
MODULE_AUTHOR("Qumranet"); MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
static int msi2intx = 1;
module_param(msi2intx, bool, 0);
DEFINE_SPINLOCK(kvm_lock); DEFINE_SPINLOCK(kvm_lock);
LIST_HEAD(vm_list); LIST_HEAD(vm_list);
@ -95,38 +94,96 @@ static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *h
return NULL; return NULL;
} }
static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
*assigned_dev, int irq)
{
int i, index;
struct msix_entry *host_msix_entries;
host_msix_entries = assigned_dev->host_msix_entries;
index = -1;
for (i = 0; i < assigned_dev->entries_nr; i++)
if (irq == host_msix_entries[i].vector) {
index = i;
break;
}
if (index < 0) {
printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
return 0;
}
return index;
}
static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
{ {
struct kvm_assigned_dev_kernel *assigned_dev; struct kvm_assigned_dev_kernel *assigned_dev;
struct kvm *kvm;
int irq, i;
assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
interrupt_work); interrupt_work);
kvm = assigned_dev->kvm;
/* This is taken to safely inject irq inside the guest. When /* This is taken to safely inject irq inside the guest. When
* the interrupt injection (or the ioapic code) uses a * the interrupt injection (or the ioapic code) uses a
* finer-grained lock, update this * finer-grained lock, update this
*/ */
mutex_lock(&assigned_dev->kvm->lock); mutex_lock(&kvm->lock);
kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, spin_lock_irq(&assigned_dev->assigned_dev_lock);
assigned_dev->guest_irq, 1); if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
struct kvm_guest_msix_entry *guest_entries =
if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI) { assigned_dev->guest_msix_entries;
enable_irq(assigned_dev->host_irq); for (i = 0; i < assigned_dev->entries_nr; i++) {
assigned_dev->host_irq_disabled = false; if (!(guest_entries[i].flags &
KVM_ASSIGNED_MSIX_PENDING))
continue;
guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING;
kvm_set_irq(assigned_dev->kvm,
assigned_dev->irq_source_id,
guest_entries[i].vector, 1);
irq = assigned_dev->host_msix_entries[i].vector;
if (irq != 0)
enable_irq(irq);
assigned_dev->host_irq_disabled = false;
}
} else {
kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
assigned_dev->guest_irq, 1);
if (assigned_dev->irq_requested_type &
KVM_DEV_IRQ_GUEST_MSI) {
enable_irq(assigned_dev->host_irq);
assigned_dev->host_irq_disabled = false;
}
} }
spin_unlock_irq(&assigned_dev->assigned_dev_lock);
mutex_unlock(&assigned_dev->kvm->lock); mutex_unlock(&assigned_dev->kvm->lock);
} }
static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
{ {
unsigned long flags;
struct kvm_assigned_dev_kernel *assigned_dev = struct kvm_assigned_dev_kernel *assigned_dev =
(struct kvm_assigned_dev_kernel *) dev_id; (struct kvm_assigned_dev_kernel *) dev_id;
spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags);
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
int index = find_index_from_host_irq(assigned_dev, irq);
if (index < 0)
goto out;
assigned_dev->guest_msix_entries[index].flags |=
KVM_ASSIGNED_MSIX_PENDING;
}
schedule_work(&assigned_dev->interrupt_work); schedule_work(&assigned_dev->interrupt_work);
disable_irq_nosync(irq); disable_irq_nosync(irq);
assigned_dev->host_irq_disabled = true; assigned_dev->host_irq_disabled = true;
out:
spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags);
return IRQ_HANDLED; return IRQ_HANDLED;
} }
@ -134,6 +191,7 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
{ {
struct kvm_assigned_dev_kernel *dev; struct kvm_assigned_dev_kernel *dev;
unsigned long flags;
if (kian->gsi == -1) if (kian->gsi == -1)
return; return;
@ -146,28 +204,30 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
/* The guest irq may be shared so this ack may be /* The guest irq may be shared so this ack may be
* from another device. * from another device.
*/ */
spin_lock_irqsave(&dev->assigned_dev_lock, flags);
if (dev->host_irq_disabled) { if (dev->host_irq_disabled) {
enable_irq(dev->host_irq); enable_irq(dev->host_irq);
dev->host_irq_disabled = false; dev->host_irq_disabled = false;
} }
spin_unlock_irqrestore(&dev->assigned_dev_lock, flags);
} }
/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ static void deassign_guest_irq(struct kvm *kvm,
static void kvm_free_assigned_irq(struct kvm *kvm, struct kvm_assigned_dev_kernel *assigned_dev)
struct kvm_assigned_dev_kernel *assigned_dev)
{ {
if (!irqchip_in_kernel(kvm))
return;
kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier); kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
assigned_dev->ack_notifier.gsi = -1;
if (assigned_dev->irq_source_id != -1) if (assigned_dev->irq_source_id != -1)
kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
assigned_dev->irq_source_id = -1; assigned_dev->irq_source_id = -1;
assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
}
if (!assigned_dev->irq_requested_type) /* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
return; static void deassign_host_irq(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev)
{
/* /*
* In kvm_free_device_irq, cancel_work_sync return true if: * In kvm_free_device_irq, cancel_work_sync return true if:
* 1. work is scheduled, and then cancelled. * 1. work is scheduled, and then cancelled.
@ -184,17 +244,64 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
* now, the kvm state is still legal for probably we also have to wait * now, the kvm state is still legal for probably we also have to wait
* interrupt_work done. * interrupt_work done.
*/ */
disable_irq_nosync(assigned_dev->host_irq); if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
cancel_work_sync(&assigned_dev->interrupt_work); int i;
for (i = 0; i < assigned_dev->entries_nr; i++)
disable_irq_nosync(assigned_dev->
host_msix_entries[i].vector);
free_irq(assigned_dev->host_irq, (void *)assigned_dev); cancel_work_sync(&assigned_dev->interrupt_work);
if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) for (i = 0; i < assigned_dev->entries_nr; i++)
pci_disable_msi(assigned_dev->dev); free_irq(assigned_dev->host_msix_entries[i].vector,
(void *)assigned_dev);
assigned_dev->irq_requested_type = 0; assigned_dev->entries_nr = 0;
kfree(assigned_dev->host_msix_entries);
kfree(assigned_dev->guest_msix_entries);
pci_disable_msix(assigned_dev->dev);
} else {
/* Deal with MSI and INTx */
disable_irq_nosync(assigned_dev->host_irq);
cancel_work_sync(&assigned_dev->interrupt_work);
free_irq(assigned_dev->host_irq, (void *)assigned_dev);
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
pci_disable_msi(assigned_dev->dev);
}
assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
} }
static int kvm_deassign_irq(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev,
unsigned long irq_requested_type)
{
unsigned long guest_irq_type, host_irq_type;
if (!irqchip_in_kernel(kvm))
return -EINVAL;
/* no irq assignment to deassign */
if (!assigned_dev->irq_requested_type)
return -ENXIO;
host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
if (host_irq_type)
deassign_host_irq(kvm, assigned_dev);
if (guest_irq_type)
deassign_guest_irq(kvm, assigned_dev);
return 0;
}
static void kvm_free_assigned_irq(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev)
{
kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
}
static void kvm_free_assigned_device(struct kvm *kvm, static void kvm_free_assigned_device(struct kvm *kvm,
struct kvm_assigned_dev_kernel struct kvm_assigned_dev_kernel
@ -226,191 +333,245 @@ void kvm_free_all_assigned_devices(struct kvm *kvm)
} }
} }
static int assigned_device_update_intx(struct kvm *kvm, static int assigned_device_enable_host_intx(struct kvm *kvm,
struct kvm_assigned_dev_kernel *adev, struct kvm_assigned_dev_kernel *dev)
struct kvm_assigned_irq *airq)
{ {
adev->guest_irq = airq->guest_irq; dev->host_irq = dev->dev->irq;
adev->ack_notifier.gsi = airq->guest_irq; /* Even though this is PCI, we don't want to use shared
* interrupts. Sharing host devices with guest-assigned devices
if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_INTX) * on the same interrupt line is not a happy situation: there
return 0; * are going to be long delays in accepting, acking, etc.
*/
if (irqchip_in_kernel(kvm)) { if (request_irq(dev->host_irq, kvm_assigned_dev_intr,
if (!msi2intx && 0, "kvm_assigned_intx_device", (void *)dev))
(adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)) { return -EIO;
free_irq(adev->host_irq, (void *)adev);
pci_disable_msi(adev->dev);
}
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
if (airq->host_irq)
adev->host_irq = airq->host_irq;
else
adev->host_irq = adev->dev->irq;
/* Even though this is PCI, we don't want to use shared
* interrupts. Sharing host devices with guest-assigned devices
* on the same interrupt line is not a happy situation: there
* are going to be long delays in accepting, acking, etc.
*/
if (request_irq(adev->host_irq, kvm_assigned_dev_intr,
0, "kvm_assigned_intx_device", (void *)adev))
return -EIO;
}
adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_INTX |
KVM_ASSIGNED_DEV_HOST_INTX;
return 0; return 0;
} }
#ifdef CONFIG_X86 #ifdef __KVM_HAVE_MSI
static int assigned_device_update_msi(struct kvm *kvm, static int assigned_device_enable_host_msi(struct kvm *kvm,
struct kvm_assigned_dev_kernel *adev, struct kvm_assigned_dev_kernel *dev)
struct kvm_assigned_irq *airq)
{ {
int r; int r;
adev->guest_irq = airq->guest_irq; if (!dev->dev->msi_enabled) {
if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) { r = pci_enable_msi(dev->dev);
/* x86 don't care upper address of guest msi message addr */ if (r)
adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI; return r;
adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX;
adev->ack_notifier.gsi = -1;
} else if (msi2intx) {
adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX;
adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI;
adev->ack_notifier.gsi = airq->guest_irq;
} else {
/*
* Guest require to disable device MSI, we disable MSI and
* re-enable INTx by default again. Notice it's only for
* non-msi2intx.
*/
assigned_device_update_intx(kvm, adev, airq);
return 0;
} }
if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) dev->host_irq = dev->dev->irq;
return 0; if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0,
"kvm_assigned_msi_device", (void *)dev)) {
if (irqchip_in_kernel(kvm)) { pci_disable_msi(dev->dev);
if (!msi2intx) { return -EIO;
if (adev->irq_requested_type &
KVM_ASSIGNED_DEV_HOST_INTX)
free_irq(adev->host_irq, (void *)adev);
r = pci_enable_msi(adev->dev);
if (r)
return r;
}
adev->host_irq = adev->dev->irq;
if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 0,
"kvm_assigned_msi_device", (void *)adev))
return -EIO;
} }
if (!msi2intx)
adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_MSI;
adev->irq_requested_type |= KVM_ASSIGNED_DEV_HOST_MSI;
return 0; return 0;
} }
#endif #endif
static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, #ifdef __KVM_HAVE_MSIX
struct kvm_assigned_irq static int assigned_device_enable_host_msix(struct kvm *kvm,
*assigned_irq) struct kvm_assigned_dev_kernel *dev)
{ {
int r = 0; int i, r = -EINVAL;
/* host_msix_entries and guest_msix_entries should have been
* initialized */
if (dev->entries_nr == 0)
return r;
r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr);
if (r)
return r;
for (i = 0; i < dev->entries_nr; i++) {
r = request_irq(dev->host_msix_entries[i].vector,
kvm_assigned_dev_intr, 0,
"kvm_assigned_msix_device",
(void *)dev);
/* FIXME: free requested_irq's on failure */
if (r)
return r;
}
return 0;
}
#endif
static int assigned_device_enable_guest_intx(struct kvm *kvm,
struct kvm_assigned_dev_kernel *dev,
struct kvm_assigned_irq *irq)
{
dev->guest_irq = irq->guest_irq;
dev->ack_notifier.gsi = irq->guest_irq;
return 0;
}
#ifdef __KVM_HAVE_MSI
static int assigned_device_enable_guest_msi(struct kvm *kvm,
struct kvm_assigned_dev_kernel *dev,
struct kvm_assigned_irq *irq)
{
dev->guest_irq = irq->guest_irq;
dev->ack_notifier.gsi = -1;
return 0;
}
#endif
#ifdef __KVM_HAVE_MSIX
static int assigned_device_enable_guest_msix(struct kvm *kvm,
struct kvm_assigned_dev_kernel *dev,
struct kvm_assigned_irq *irq)
{
dev->guest_irq = irq->guest_irq;
dev->ack_notifier.gsi = -1;
return 0;
}
#endif
static int assign_host_irq(struct kvm *kvm,
struct kvm_assigned_dev_kernel *dev,
__u32 host_irq_type)
{
int r = -EEXIST;
if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
return r;
switch (host_irq_type) {
case KVM_DEV_IRQ_HOST_INTX:
r = assigned_device_enable_host_intx(kvm, dev);
break;
#ifdef __KVM_HAVE_MSI
case KVM_DEV_IRQ_HOST_MSI:
r = assigned_device_enable_host_msi(kvm, dev);
break;
#endif
#ifdef __KVM_HAVE_MSIX
case KVM_DEV_IRQ_HOST_MSIX:
r = assigned_device_enable_host_msix(kvm, dev);
break;
#endif
default:
r = -EINVAL;
}
if (!r)
dev->irq_requested_type |= host_irq_type;
return r;
}
static int assign_guest_irq(struct kvm *kvm,
struct kvm_assigned_dev_kernel *dev,
struct kvm_assigned_irq *irq,
unsigned long guest_irq_type)
{
int id;
int r = -EEXIST;
if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
return r;
id = kvm_request_irq_source_id(kvm);
if (id < 0)
return id;
dev->irq_source_id = id;
switch (guest_irq_type) {
case KVM_DEV_IRQ_GUEST_INTX:
r = assigned_device_enable_guest_intx(kvm, dev, irq);
break;
#ifdef __KVM_HAVE_MSI
case KVM_DEV_IRQ_GUEST_MSI:
r = assigned_device_enable_guest_msi(kvm, dev, irq);
break;
#endif
#ifdef __KVM_HAVE_MSIX
case KVM_DEV_IRQ_GUEST_MSIX:
r = assigned_device_enable_guest_msix(kvm, dev, irq);
break;
#endif
default:
r = -EINVAL;
}
if (!r) {
dev->irq_requested_type |= guest_irq_type;
kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
} else
kvm_free_irq_source_id(kvm, dev->irq_source_id);
return r;
}
/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
struct kvm_assigned_irq *assigned_irq)
{
int r = -EINVAL;
struct kvm_assigned_dev_kernel *match;
unsigned long host_irq_type, guest_irq_type;
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
if (!irqchip_in_kernel(kvm))
return r;
mutex_lock(&kvm->lock);
r = -ENODEV;
match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
assigned_irq->assigned_dev_id);
if (!match)
goto out;
host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
r = -EINVAL;
/* can only assign one type at a time */
if (hweight_long(host_irq_type) > 1)
goto out;
if (hweight_long(guest_irq_type) > 1)
goto out;
if (host_irq_type == 0 && guest_irq_type == 0)
goto out;
r = 0;
if (host_irq_type)
r = assign_host_irq(kvm, match, host_irq_type);
if (r)
goto out;
if (guest_irq_type)
r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
out:
mutex_unlock(&kvm->lock);
return r;
}
static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
struct kvm_assigned_irq
*assigned_irq)
{
int r = -ENODEV;
struct kvm_assigned_dev_kernel *match; struct kvm_assigned_dev_kernel *match;
u32 current_flags = 0, changed_flags;
mutex_lock(&kvm->lock); mutex_lock(&kvm->lock);
match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
assigned_irq->assigned_dev_id); assigned_irq->assigned_dev_id);
if (!match) { if (!match)
mutex_unlock(&kvm->lock); goto out;
return -EINVAL;
}
if (!match->irq_requested_type) {
INIT_WORK(&match->interrupt_work,
kvm_assigned_dev_interrupt_work_handler);
if (irqchip_in_kernel(kvm)) {
/* Register ack nofitier */
match->ack_notifier.gsi = -1;
match->ack_notifier.irq_acked =
kvm_assigned_dev_ack_irq;
kvm_register_irq_ack_notifier(kvm,
&match->ack_notifier);
/* Request IRQ source ID */
r = kvm_request_irq_source_id(kvm);
if (r < 0)
goto out_release;
else
match->irq_source_id = r;
#ifdef CONFIG_X86
/* Determine host device irq type, we can know the
* result from dev->msi_enabled */
if (msi2intx)
pci_enable_msi(match->dev);
#endif
}
}
if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) &&
(match->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI))
current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI;
changed_flags = assigned_irq->flags ^ current_flags;
if ((changed_flags & KVM_DEV_IRQ_ASSIGN_MSI_ACTION) ||
(msi2intx && match->dev->msi_enabled)) {
#ifdef CONFIG_X86
r = assigned_device_update_msi(kvm, match, assigned_irq);
if (r) {
printk(KERN_WARNING "kvm: failed to enable "
"MSI device!\n");
goto out_release;
}
#else
r = -ENOTTY;
#endif
} else if (assigned_irq->host_irq == 0 && match->dev->irq == 0) {
/* Host device IRQ 0 means don't support INTx */
if (!msi2intx) {
printk(KERN_WARNING
"kvm: wait device to enable MSI!\n");
r = 0;
} else {
printk(KERN_WARNING
"kvm: failed to enable MSI device!\n");
r = -ENOTTY;
goto out_release;
}
} else {
/* Non-sharing INTx mode */
r = assigned_device_update_intx(kvm, match, assigned_irq);
if (r) {
printk(KERN_WARNING "kvm: failed to enable "
"INTx device!\n");
goto out_release;
}
}
r = kvm_deassign_irq(kvm, match, assigned_irq->flags);
out:
mutex_unlock(&kvm->lock); mutex_unlock(&kvm->lock);
return r; return r;
out_release:
mutex_unlock(&kvm->lock);
kvm_free_assigned_device(kvm, match);
return r;
} }
static int kvm_vm_ioctl_assign_device(struct kvm *kvm, static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
@ -427,7 +588,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
assigned_dev->assigned_dev_id); assigned_dev->assigned_dev_id);
if (match) { if (match) {
/* device already assigned */ /* device already assigned */
r = -EINVAL; r = -EEXIST;
goto out; goto out;
} }
@ -464,8 +625,12 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
match->host_devfn = assigned_dev->devfn; match->host_devfn = assigned_dev->devfn;
match->flags = assigned_dev->flags; match->flags = assigned_dev->flags;
match->dev = dev; match->dev = dev;
spin_lock_init(&match->assigned_dev_lock);
match->irq_source_id = -1; match->irq_source_id = -1;
match->kvm = kvm; match->kvm = kvm;
match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
INIT_WORK(&match->interrupt_work,
kvm_assigned_dev_interrupt_work_handler);
list_add(&match->list, &kvm->arch.assigned_dev_head); list_add(&match->list, &kvm->arch.assigned_dev_head);
@ -878,6 +1043,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
#endif #endif
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
#else
kvm_arch_flush_shadow(kvm);
#endif #endif
kvm_arch_destroy_vm(kvm); kvm_arch_destroy_vm(kvm);
mmdrop(mm); mmdrop(mm);
@ -919,9 +1086,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
{ {
int r; int r;
gfn_t base_gfn; gfn_t base_gfn;
unsigned long npages; unsigned long npages, ugfn;
int largepages; unsigned long largepages, i;
unsigned long i;
struct kvm_memory_slot *memslot; struct kvm_memory_slot *memslot;
struct kvm_memory_slot old, new; struct kvm_memory_slot old, new;
@ -1010,6 +1176,14 @@ int __kvm_set_memory_region(struct kvm *kvm,
new.lpage_info[0].write_count = 1; new.lpage_info[0].write_count = 1;
if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE) if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE)
new.lpage_info[largepages-1].write_count = 1; new.lpage_info[largepages-1].write_count = 1;
ugfn = new.userspace_addr >> PAGE_SHIFT;
/*
* If the gfn and userspace address are not aligned wrt each
* other, disable large page support for this slot
*/
if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE - 1))
for (i = 0; i < largepages; ++i)
new.lpage_info[i].write_count = 1;
} }
/* Allocate page dirty bitmap if needed */ /* Allocate page dirty bitmap if needed */
@ -1043,8 +1217,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
kvm_free_physmem_slot(&old, npages ? &new : NULL); kvm_free_physmem_slot(&old, npages ? &new : NULL);
/* Slot deletion case: we have to update the current slot */ /* Slot deletion case: we have to update the current slot */
spin_lock(&kvm->mmu_lock);
if (!npages) if (!npages)
*memslot = old; *memslot = old;
spin_unlock(&kvm->mmu_lock);
#ifdef CONFIG_DMAR #ifdef CONFIG_DMAR
/* map the pages in iommu page table */ /* map the pages in iommu page table */
r = kvm_iommu_map_pages(kvm, base_gfn, npages); r = kvm_iommu_map_pages(kvm, base_gfn, npages);
@ -1454,12 +1630,14 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
for (;;) { for (;;) {
prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
if (kvm_cpu_has_interrupt(vcpu) || if ((kvm_arch_interrupt_allowed(vcpu) &&
kvm_cpu_has_pending_timer(vcpu) || kvm_cpu_has_interrupt(vcpu)) ||
kvm_arch_vcpu_runnable(vcpu)) { kvm_arch_vcpu_runnable(vcpu)) {
set_bit(KVM_REQ_UNHALT, &vcpu->requests); set_bit(KVM_REQ_UNHALT, &vcpu->requests);
break; break;
} }
if (kvm_cpu_has_pending_timer(vcpu))
break;
if (signal_pending(current)) if (signal_pending(current))
break; break;
@ -1593,6 +1771,88 @@ static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset)
return 0; return 0;
} }
#ifdef __KVM_HAVE_MSIX
static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
struct kvm_assigned_msix_nr *entry_nr)
{
int r = 0;
struct kvm_assigned_dev_kernel *adev;
mutex_lock(&kvm->lock);
adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
entry_nr->assigned_dev_id);
if (!adev) {
r = -EINVAL;
goto msix_nr_out;
}
if (adev->entries_nr == 0) {
adev->entries_nr = entry_nr->entry_nr;
if (adev->entries_nr == 0 ||
adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) {
r = -EINVAL;
goto msix_nr_out;
}
adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
entry_nr->entry_nr,
GFP_KERNEL);
if (!adev->host_msix_entries) {
r = -ENOMEM;
goto msix_nr_out;
}
adev->guest_msix_entries = kzalloc(
sizeof(struct kvm_guest_msix_entry) *
entry_nr->entry_nr, GFP_KERNEL);
if (!adev->guest_msix_entries) {
kfree(adev->host_msix_entries);
r = -ENOMEM;
goto msix_nr_out;
}
} else /* Not allowed set MSI-X number twice */
r = -EINVAL;
msix_nr_out:
mutex_unlock(&kvm->lock);
return r;
}
static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
struct kvm_assigned_msix_entry *entry)
{
int r = 0, i;
struct kvm_assigned_dev_kernel *adev;
mutex_lock(&kvm->lock);
adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
entry->assigned_dev_id);
if (!adev) {
r = -EINVAL;
goto msix_entry_out;
}
for (i = 0; i < adev->entries_nr; i++)
if (adev->guest_msix_entries[i].vector == 0 ||
adev->guest_msix_entries[i].entry == entry->entry) {
adev->guest_msix_entries[i].entry = entry->entry;
adev->guest_msix_entries[i].vector = entry->gsi;
adev->host_msix_entries[i].entry = entry->entry;
break;
}
if (i == adev->entries_nr) {
r = -ENOSPC;
goto msix_entry_out;
}
msix_entry_out:
mutex_unlock(&kvm->lock);
return r;
}
#endif
static long kvm_vcpu_ioctl(struct file *filp, static long kvm_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg) unsigned int ioctl, unsigned long arg)
{ {
@ -1864,6 +2124,11 @@ static long kvm_vm_ioctl(struct file *filp,
break; break;
} }
case KVM_ASSIGN_IRQ: { case KVM_ASSIGN_IRQ: {
r = -EOPNOTSUPP;
break;
}
#ifdef KVM_CAP_ASSIGN_DEV_IRQ
case KVM_ASSIGN_DEV_IRQ: {
struct kvm_assigned_irq assigned_irq; struct kvm_assigned_irq assigned_irq;
r = -EFAULT; r = -EFAULT;
@ -1874,6 +2139,18 @@ static long kvm_vm_ioctl(struct file *filp,
goto out; goto out;
break; break;
} }
case KVM_DEASSIGN_DEV_IRQ: {
struct kvm_assigned_irq assigned_irq;
r = -EFAULT;
if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
goto out;
r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
if (r)
goto out;
break;
}
#endif
#endif #endif
#ifdef KVM_CAP_DEVICE_DEASSIGNMENT #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
case KVM_DEASSIGN_PCI_DEVICE: { case KVM_DEASSIGN_PCI_DEVICE: {
@ -1917,7 +2194,29 @@ static long kvm_vm_ioctl(struct file *filp,
vfree(entries); vfree(entries);
break; break;
} }
#ifdef __KVM_HAVE_MSIX
case KVM_ASSIGN_SET_MSIX_NR: {
struct kvm_assigned_msix_nr entry_nr;
r = -EFAULT;
if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
goto out;
r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
if (r)
goto out;
break;
}
case KVM_ASSIGN_SET_MSIX_ENTRY: {
struct kvm_assigned_msix_entry entry;
r = -EFAULT;
if (copy_from_user(&entry, argp, sizeof entry))
goto out;
r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
if (r)
goto out;
break;
}
#endif #endif
#endif /* KVM_CAP_IRQ_ROUTING */
default: default:
r = kvm_arch_vm_ioctl(filp, ioctl, arg); r = kvm_arch_vm_ioctl(filp, ioctl, arg);
} }
@ -2112,15 +2411,15 @@ EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot);
static int kvm_reboot(struct notifier_block *notifier, unsigned long val, static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
void *v) void *v)
{ {
if (val == SYS_RESTART) { /*
/* * Some (well, at least mine) BIOSes hang on reboot if
* Some (well, at least mine) BIOSes hang on reboot if * in vmx root mode.
* in vmx root mode. *
*/ * And Intel TXT required VMX off for all cpu when system shutdown.
printk(KERN_INFO "kvm: exiting hardware virtualization\n"); */
kvm_rebooting = true; printk(KERN_INFO "kvm: exiting hardware virtualization\n");
on_each_cpu(hardware_disable, NULL, 1); kvm_rebooting = true;
} on_each_cpu(hardware_disable, NULL, 1);
return NOTIFY_OK; return NOTIFY_OK;
} }
@ -2354,9 +2653,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
kvm_preempt_ops.sched_in = kvm_sched_in; kvm_preempt_ops.sched_in = kvm_sched_in;
kvm_preempt_ops.sched_out = kvm_sched_out; kvm_preempt_ops.sched_out = kvm_sched_out;
#ifndef CONFIG_X86
msi2intx = 0;
#endif
return 0; return 0;