forked from Minki/linux
Merge commit 'origin/next' into kvm-ppc-next
This commit is contained in:
commit
dd92d6f274
@ -219,19 +219,6 @@ allocation of vcpu ids. For example, if userspace wants
|
||||
single-threaded guest vcpus, it should make all vcpu ids be a multiple
|
||||
of the number of vcpus per vcore.
|
||||
|
||||
On powerpc using book3s_hv mode, the vcpus are mapped onto virtual
|
||||
threads in one or more virtual CPU cores. (This is because the
|
||||
hardware requires all the hardware threads in a CPU core to be in the
|
||||
same partition.) The KVM_CAP_PPC_SMT capability indicates the number
|
||||
of vcpus per virtual core (vcore). The vcore id is obtained by
|
||||
dividing the vcpu id by the number of vcpus per vcore. The vcpus in a
|
||||
given vcore will always be in the same physical core as each other
|
||||
(though that might be a different physical core from time to time).
|
||||
Userspace can control the threading (SMT) mode of the guest by its
|
||||
allocation of vcpu ids. For example, if userspace wants
|
||||
single-threaded guest vcpus, it should make all vcpu ids be a multiple
|
||||
of the number of vcpus per vcore.
|
||||
|
||||
For virtual cpus that have been created with S390 user controlled virtual
|
||||
machines, the resulting vcpu fd can be memory mapped at page offset
|
||||
KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of the virtual
|
||||
@ -874,12 +861,12 @@ It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr
|
||||
be identical. This allows large pages in the guest to be backed by large
|
||||
pages in the host.
|
||||
|
||||
The flags field supports two flag, KVM_MEM_LOG_DIRTY_PAGES, which instructs
|
||||
kvm to keep track of writes to memory within the slot. See KVM_GET_DIRTY_LOG
|
||||
ioctl. The KVM_CAP_READONLY_MEM capability indicates the availability of the
|
||||
KVM_MEM_READONLY flag. When this flag is set for a memory region, KVM only
|
||||
allows read accesses. Writes will be posted to userspace as KVM_EXIT_MMIO
|
||||
exits.
|
||||
The flags field supports two flags: KVM_MEM_LOG_DIRTY_PAGES and
|
||||
KVM_MEM_READONLY. The former can be set to instruct KVM to keep track of
|
||||
writes to memory within the slot. See KVM_GET_DIRTY_LOG ioctl to know how to
|
||||
use it. The latter can be set, if KVM_CAP_READONLY_MEM capability allows it,
|
||||
to make a new slot read-only. In this case, writes to this memory will be
|
||||
posted to userspace as KVM_EXIT_MMIO exits.
|
||||
|
||||
When the KVM_CAP_SYNC_MMU capability is available, changes in the backing of
|
||||
the memory region are automatically reflected into the guest. For example, an
|
||||
|
@ -27,4 +27,10 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
|
||||
#define kvm_apic_present(x) (true)
|
||||
#define kvm_lapic_enabled(x) (true)
|
||||
|
||||
static inline bool kvm_apic_vid_enabled(void)
|
||||
{
|
||||
/* IA64 has no apicv supporting, do nothing here */
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -770,6 +770,14 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
|
||||
} else
|
||||
prefix = 0;
|
||||
|
||||
/*
|
||||
* The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
|
||||
* copying in vcpu load/put. Lets update our copies before we save
|
||||
* it into the save area
|
||||
*/
|
||||
save_fp_regs(&vcpu->arch.guest_fpregs);
|
||||
save_access_regs(vcpu->run->s.regs.acrs);
|
||||
|
||||
if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
|
||||
vcpu->arch.guest_fpregs.fprs, 128, prefix))
|
||||
return -EFAULT;
|
||||
|
@ -67,8 +67,8 @@ static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
|
||||
|
||||
static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int base2 = vcpu->arch.sie_block->ipb >> 28;
|
||||
int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
|
||||
u32 base2 = vcpu->arch.sie_block->ipb >> 28;
|
||||
u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
|
||||
|
||||
return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
|
||||
}
|
||||
@ -76,10 +76,10 @@ static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu)
|
||||
static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
|
||||
u64 *address1, u64 *address2)
|
||||
{
|
||||
int base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
|
||||
int disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16;
|
||||
int base2 = (vcpu->arch.sie_block->ipb & 0xf000) >> 12;
|
||||
int disp2 = vcpu->arch.sie_block->ipb & 0x0fff;
|
||||
u32 base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
|
||||
u32 disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16;
|
||||
u32 base2 = (vcpu->arch.sie_block->ipb & 0xf000) >> 12;
|
||||
u32 disp2 = vcpu->arch.sie_block->ipb & 0x0fff;
|
||||
|
||||
*address1 = (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1;
|
||||
*address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
|
||||
@ -87,17 +87,20 @@ static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
|
||||
|
||||
static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int base2 = vcpu->arch.sie_block->ipb >> 28;
|
||||
int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
|
||||
u32 base2 = vcpu->arch.sie_block->ipb >> 28;
|
||||
u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
|
||||
((vcpu->arch.sie_block->ipb & 0xff00) << 4);
|
||||
/* The displacement is a 20bit _SIGNED_ value */
|
||||
if (disp2 & 0x80000)
|
||||
disp2+=0xfff00000;
|
||||
|
||||
return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
|
||||
return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + (long)(int)disp2;
|
||||
}
|
||||
|
||||
static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int base2 = vcpu->arch.sie_block->ipb >> 28;
|
||||
int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
|
||||
u32 base2 = vcpu->arch.sie_block->ipb >> 28;
|
||||
u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
|
||||
|
||||
return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
|
||||
}
|
||||
|
@ -699,6 +699,11 @@ struct kvm_x86_ops {
|
||||
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
|
||||
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
|
||||
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
|
||||
int (*vm_has_apicv)(struct kvm *kvm);
|
||||
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
|
||||
void (*hwapic_isr_update)(struct kvm *kvm, int isr);
|
||||
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
|
||||
void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
|
||||
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
|
||||
int (*get_tdp_level)(void);
|
||||
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
|
||||
@ -993,6 +998,7 @@ int kvm_age_hva(struct kvm *kvm, unsigned long hva);
|
||||
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
||||
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
||||
int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
|
||||
int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
|
||||
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
|
||||
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
|
||||
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
|
||||
|
@ -62,10 +62,12 @@
|
||||
#define EXIT_REASON_MCE_DURING_VMENTRY 41
|
||||
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
|
||||
#define EXIT_REASON_APIC_ACCESS 44
|
||||
#define EXIT_REASON_EOI_INDUCED 45
|
||||
#define EXIT_REASON_EPT_VIOLATION 48
|
||||
#define EXIT_REASON_EPT_MISCONFIG 49
|
||||
#define EXIT_REASON_WBINVD 54
|
||||
#define EXIT_REASON_XSETBV 55
|
||||
#define EXIT_REASON_APIC_WRITE 56
|
||||
#define EXIT_REASON_INVPCID 58
|
||||
|
||||
#define VMX_EXIT_REASONS \
|
||||
@ -103,7 +105,12 @@
|
||||
{ EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \
|
||||
{ EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \
|
||||
{ EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \
|
||||
{ EXIT_REASON_WBINVD, "WBINVD" }
|
||||
{ EXIT_REASON_WBINVD, "WBINVD" }, \
|
||||
{ EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \
|
||||
{ EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
|
||||
{ EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
|
||||
{ EXIT_REASON_INVD, "INVD" }, \
|
||||
{ EXIT_REASON_INVPCID, "INVPCID" }
|
||||
|
||||
#ifdef __KERNEL__
|
||||
|
||||
@ -138,9 +145,12 @@
|
||||
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
|
||||
#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
|
||||
#define SECONDARY_EXEC_RDTSCP 0x00000008
|
||||
#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010
|
||||
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
|
||||
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
|
||||
#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
|
||||
#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100
|
||||
#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200
|
||||
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
|
||||
#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
|
||||
|
||||
@ -178,6 +188,7 @@ enum vmcs_field {
|
||||
GUEST_GS_SELECTOR = 0x0000080a,
|
||||
GUEST_LDTR_SELECTOR = 0x0000080c,
|
||||
GUEST_TR_SELECTOR = 0x0000080e,
|
||||
GUEST_INTR_STATUS = 0x00000810,
|
||||
HOST_ES_SELECTOR = 0x00000c00,
|
||||
HOST_CS_SELECTOR = 0x00000c02,
|
||||
HOST_SS_SELECTOR = 0x00000c04,
|
||||
@ -205,6 +216,14 @@ enum vmcs_field {
|
||||
APIC_ACCESS_ADDR_HIGH = 0x00002015,
|
||||
EPT_POINTER = 0x0000201a,
|
||||
EPT_POINTER_HIGH = 0x0000201b,
|
||||
EOI_EXIT_BITMAP0 = 0x0000201c,
|
||||
EOI_EXIT_BITMAP0_HIGH = 0x0000201d,
|
||||
EOI_EXIT_BITMAP1 = 0x0000201e,
|
||||
EOI_EXIT_BITMAP1_HIGH = 0x0000201f,
|
||||
EOI_EXIT_BITMAP2 = 0x00002020,
|
||||
EOI_EXIT_BITMAP2_HIGH = 0x00002021,
|
||||
EOI_EXIT_BITMAP3 = 0x00002022,
|
||||
EOI_EXIT_BITMAP3_HIGH = 0x00002023,
|
||||
GUEST_PHYSICAL_ADDRESS = 0x00002400,
|
||||
GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
|
||||
VMCS_LINK_POINTER = 0x00002800,
|
||||
|
@ -1013,7 +1013,7 @@ static u8 test_cc(unsigned int condition, unsigned long flags)
|
||||
void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
|
||||
|
||||
flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
|
||||
asm("pushq %[flags]; popf; call *%[fastop]"
|
||||
asm("push %[flags]; popf; call *%[fastop]"
|
||||
: "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags));
|
||||
return rc;
|
||||
}
|
||||
|
@ -37,6 +37,38 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
|
||||
|
||||
/*
|
||||
* check if there is pending interrupt from
|
||||
* non-APIC source without intack.
|
||||
*/
|
||||
static int kvm_cpu_has_extint(struct kvm_vcpu *v)
|
||||
{
|
||||
if (kvm_apic_accept_pic_intr(v))
|
||||
return pic_irqchip(v->kvm)->output; /* PIC */
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* check if there is injectable interrupt:
|
||||
* when virtual interrupt delivery enabled,
|
||||
* interrupt from apic will handled by hardware,
|
||||
* we don't need to check it here.
|
||||
*/
|
||||
int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
|
||||
{
|
||||
if (!irqchip_in_kernel(v->kvm))
|
||||
return v->arch.interrupt.pending;
|
||||
|
||||
if (kvm_cpu_has_extint(v))
|
||||
return 1;
|
||||
|
||||
if (kvm_apic_vid_enabled(v->kvm))
|
||||
return 0;
|
||||
|
||||
return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
|
||||
}
|
||||
|
||||
/*
|
||||
* check if there is pending interrupt without
|
||||
* intack.
|
||||
@ -46,27 +78,41 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
|
||||
if (!irqchip_in_kernel(v->kvm))
|
||||
return v->arch.interrupt.pending;
|
||||
|
||||
if (kvm_apic_accept_pic_intr(v) && pic_irqchip(v->kvm)->output)
|
||||
return pic_irqchip(v->kvm)->output; /* PIC */
|
||||
if (kvm_cpu_has_extint(v))
|
||||
return 1;
|
||||
|
||||
return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
|
||||
|
||||
/*
|
||||
* Read pending interrupt(from non-APIC source)
|
||||
* vector and intack.
|
||||
*/
|
||||
static int kvm_cpu_get_extint(struct kvm_vcpu *v)
|
||||
{
|
||||
if (kvm_cpu_has_extint(v))
|
||||
return kvm_pic_read_irq(v->kvm); /* PIC */
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read pending interrupt vector and intack.
|
||||
*/
|
||||
int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
|
||||
{
|
||||
int vector;
|
||||
|
||||
if (!irqchip_in_kernel(v->kvm))
|
||||
return v->arch.interrupt.nr;
|
||||
|
||||
if (kvm_apic_accept_pic_intr(v) && pic_irqchip(v->kvm)->output)
|
||||
return kvm_pic_read_irq(v->kvm); /* PIC */
|
||||
vector = kvm_cpu_get_extint(v);
|
||||
|
||||
if (kvm_apic_vid_enabled(v->kvm) || vector != -1)
|
||||
return vector; /* PIC */
|
||||
|
||||
return kvm_get_apic_interrupt(v); /* APIC */
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
|
||||
|
||||
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
|
@ -140,31 +140,56 @@ static inline int apic_enabled(struct kvm_lapic *apic)
|
||||
(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
|
||||
APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
|
||||
|
||||
static inline int apic_x2apic_mode(struct kvm_lapic *apic)
|
||||
{
|
||||
return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
|
||||
}
|
||||
|
||||
static inline int kvm_apic_id(struct kvm_lapic *apic)
|
||||
{
|
||||
return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
|
||||
}
|
||||
|
||||
static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
|
||||
void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
|
||||
struct kvm_lapic_irq *irq,
|
||||
u64 *eoi_exit_bitmap)
|
||||
{
|
||||
u16 cid;
|
||||
ldr >>= 32 - map->ldr_bits;
|
||||
cid = (ldr >> map->cid_shift) & map->cid_mask;
|
||||
struct kvm_lapic **dst;
|
||||
struct kvm_apic_map *map;
|
||||
unsigned long bitmap = 1;
|
||||
int i;
|
||||
|
||||
BUG_ON(cid >= ARRAY_SIZE(map->logical_map));
|
||||
rcu_read_lock();
|
||||
map = rcu_dereference(vcpu->kvm->arch.apic_map);
|
||||
|
||||
return cid;
|
||||
}
|
||||
if (unlikely(!map)) {
|
||||
__set_bit(irq->vector, (unsigned long *)eoi_exit_bitmap);
|
||||
goto out;
|
||||
}
|
||||
|
||||
static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
|
||||
{
|
||||
ldr >>= (32 - map->ldr_bits);
|
||||
return ldr & map->lid_mask;
|
||||
if (irq->dest_mode == 0) { /* physical mode */
|
||||
if (irq->delivery_mode == APIC_DM_LOWEST ||
|
||||
irq->dest_id == 0xff) {
|
||||
__set_bit(irq->vector,
|
||||
(unsigned long *)eoi_exit_bitmap);
|
||||
goto out;
|
||||
}
|
||||
dst = &map->phys_map[irq->dest_id & 0xff];
|
||||
} else {
|
||||
u32 mda = irq->dest_id << (32 - map->ldr_bits);
|
||||
|
||||
dst = map->logical_map[apic_cluster_id(map, mda)];
|
||||
|
||||
bitmap = apic_logical_id(map, mda);
|
||||
}
|
||||
|
||||
for_each_set_bit(i, &bitmap, 16) {
|
||||
if (!dst[i])
|
||||
continue;
|
||||
if (dst[i]->vcpu == vcpu) {
|
||||
__set_bit(irq->vector,
|
||||
(unsigned long *)eoi_exit_bitmap);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void recalculate_apic_map(struct kvm *kvm)
|
||||
@ -230,6 +255,8 @@ out:
|
||||
|
||||
if (old)
|
||||
kfree_rcu(old, rcu);
|
||||
|
||||
kvm_ioapic_make_eoibitmap_request(kvm);
|
||||
}
|
||||
|
||||
static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
|
||||
@ -345,6 +372,10 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
|
||||
{
|
||||
int result;
|
||||
|
||||
/*
|
||||
* Note that irr_pending is just a hint. It will be always
|
||||
* true with virtual interrupt delivery enabled.
|
||||
*/
|
||||
if (!apic->irr_pending)
|
||||
return -1;
|
||||
|
||||
@ -461,6 +492,8 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
|
||||
static inline int apic_find_highest_isr(struct kvm_lapic *apic)
|
||||
{
|
||||
int result;
|
||||
|
||||
/* Note that isr_count is always 1 with vid enabled */
|
||||
if (!apic->isr_count)
|
||||
return -1;
|
||||
if (likely(apic->highest_isr_cache != -1))
|
||||
@ -740,6 +773,19 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
|
||||
return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
|
||||
}
|
||||
|
||||
static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
|
||||
{
|
||||
if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
|
||||
kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
|
||||
int trigger_mode;
|
||||
if (apic_test_vector(vector, apic->regs + APIC_TMR))
|
||||
trigger_mode = IOAPIC_LEVEL_TRIG;
|
||||
else
|
||||
trigger_mode = IOAPIC_EDGE_TRIG;
|
||||
kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
|
||||
}
|
||||
}
|
||||
|
||||
static int apic_set_eoi(struct kvm_lapic *apic)
|
||||
{
|
||||
int vector = apic_find_highest_isr(apic);
|
||||
@ -756,19 +802,26 @@ static int apic_set_eoi(struct kvm_lapic *apic)
|
||||
apic_clear_isr(vector, apic);
|
||||
apic_update_ppr(apic);
|
||||
|
||||
if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
|
||||
kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
|
||||
int trigger_mode;
|
||||
if (apic_test_vector(vector, apic->regs + APIC_TMR))
|
||||
trigger_mode = IOAPIC_LEVEL_TRIG;
|
||||
else
|
||||
trigger_mode = IOAPIC_EDGE_TRIG;
|
||||
kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
|
||||
}
|
||||
kvm_ioapic_send_eoi(apic, vector);
|
||||
kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
|
||||
return vector;
|
||||
}
|
||||
|
||||
/*
|
||||
* this interface assumes a trap-like exit, which has already finished
|
||||
* desired side effect including vISR and vPPR update.
|
||||
*/
|
||||
void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
|
||||
{
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
|
||||
trace_kvm_eoi(apic, vector);
|
||||
|
||||
kvm_ioapic_send_eoi(apic, vector);
|
||||
kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
|
||||
|
||||
static void apic_send_ipi(struct kvm_lapic *apic)
|
||||
{
|
||||
u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR);
|
||||
@ -1212,6 +1265,21 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
|
||||
|
||||
/* emulate APIC access in a trap manner */
|
||||
void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
|
||||
{
|
||||
u32 val = 0;
|
||||
|
||||
/* hw has done the conditional check and inst decode */
|
||||
offset &= 0xff0;
|
||||
|
||||
apic_reg_read(vcpu->arch.apic, offset, 4, &val);
|
||||
|
||||
/* TODO: optimize to just emulate side effect w/o one more write */
|
||||
apic_reg_write(vcpu->arch.apic, offset, val);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
|
||||
|
||||
void kvm_free_lapic(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
@ -1288,6 +1356,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
|
||||
|
||||
void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
|
||||
{
|
||||
u64 old_value = vcpu->arch.apic_base;
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
|
||||
if (!apic) {
|
||||
@ -1309,11 +1378,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
|
||||
value &= ~MSR_IA32_APICBASE_BSP;
|
||||
|
||||
vcpu->arch.apic_base = value;
|
||||
if (apic_x2apic_mode(apic)) {
|
||||
u32 id = kvm_apic_id(apic);
|
||||
u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
|
||||
kvm_apic_set_ldr(apic, ldr);
|
||||
if ((old_value ^ value) & X2APIC_ENABLE) {
|
||||
if (value & X2APIC_ENABLE) {
|
||||
u32 id = kvm_apic_id(apic);
|
||||
u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
|
||||
kvm_apic_set_ldr(apic, ldr);
|
||||
kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true);
|
||||
} else
|
||||
kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false);
|
||||
}
|
||||
|
||||
apic->base_address = apic->vcpu->arch.apic_base &
|
||||
MSR_IA32_APICBASE_BASE;
|
||||
|
||||
@ -1359,8 +1433,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
|
||||
apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
|
||||
apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
|
||||
}
|
||||
apic->irr_pending = false;
|
||||
apic->isr_count = 0;
|
||||
apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm);
|
||||
apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm);
|
||||
apic->highest_isr_cache = -1;
|
||||
update_divide_count(apic);
|
||||
atomic_set(&apic->lapic_timer.pending, 0);
|
||||
@ -1575,8 +1649,10 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
|
||||
update_divide_count(apic);
|
||||
start_apic_timer(apic);
|
||||
apic->irr_pending = true;
|
||||
apic->isr_count = count_vectors(apic->regs + APIC_ISR);
|
||||
apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ?
|
||||
1 : count_vectors(apic->regs + APIC_ISR);
|
||||
apic->highest_isr_cache = -1;
|
||||
kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic));
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
}
|
||||
|
||||
|
@ -64,6 +64,9 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
|
||||
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
|
||||
void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data);
|
||||
|
||||
void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset);
|
||||
void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector);
|
||||
|
||||
void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
|
||||
void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
|
||||
void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
|
||||
@ -124,4 +127,35 @@ static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
|
||||
return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic);
|
||||
}
|
||||
|
||||
static inline int apic_x2apic_mode(struct kvm_lapic *apic)
|
||||
{
|
||||
return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
|
||||
}
|
||||
|
||||
static inline bool kvm_apic_vid_enabled(struct kvm *kvm)
|
||||
{
|
||||
return kvm_x86_ops->vm_has_apicv(kvm);
|
||||
}
|
||||
|
||||
static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
|
||||
{
|
||||
u16 cid;
|
||||
ldr >>= 32 - map->ldr_bits;
|
||||
cid = (ldr >> map->cid_shift) & map->cid_mask;
|
||||
|
||||
BUG_ON(cid >= ARRAY_SIZE(map->logical_map));
|
||||
|
||||
return cid;
|
||||
}
|
||||
|
||||
static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
|
||||
{
|
||||
ldr >>= (32 - map->ldr_bits);
|
||||
return ldr & map->lid_mask;
|
||||
}
|
||||
|
||||
void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
|
||||
struct kvm_lapic_irq *irq,
|
||||
u64 *eoi_bitmap);
|
||||
|
||||
#endif
|
||||
|
@ -448,7 +448,8 @@ static bool __check_direct_spte_mmio_pf(u64 spte)
|
||||
|
||||
static bool spte_is_locklessly_modifiable(u64 spte)
|
||||
{
|
||||
return !(~spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE));
|
||||
return (spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)) ==
|
||||
(SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE);
|
||||
}
|
||||
|
||||
static bool spte_has_volatile_bits(u64 spte)
|
||||
@ -1460,28 +1461,14 @@ static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
|
||||
percpu_counter_add(&kvm_total_used_mmu_pages, nr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove the sp from shadow page cache, after call it,
|
||||
* we can not find this sp from the cache, and the shadow
|
||||
* page table is still valid.
|
||||
* It should be under the protection of mmu lock.
|
||||
*/
|
||||
static void kvm_mmu_isolate_page(struct kvm_mmu_page *sp)
|
||||
static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
|
||||
{
|
||||
ASSERT(is_empty_shadow_page(sp->spt));
|
||||
hlist_del(&sp->hash_link);
|
||||
if (!sp->role.direct)
|
||||
free_page((unsigned long)sp->gfns);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free the shadow page table and the sp, we can do it
|
||||
* out of the protection of mmu lock.
|
||||
*/
|
||||
static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
|
||||
{
|
||||
list_del(&sp->link);
|
||||
free_page((unsigned long)sp->spt);
|
||||
if (!sp->role.direct)
|
||||
free_page((unsigned long)sp->gfns);
|
||||
kmem_cache_free(mmu_page_header_cache, sp);
|
||||
}
|
||||
|
||||
@ -2125,7 +2112,6 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
|
||||
do {
|
||||
sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
|
||||
WARN_ON(!sp->role.invalid || sp->root_count);
|
||||
kvm_mmu_isolate_page(sp);
|
||||
kvm_mmu_free_page(sp);
|
||||
} while (!list_empty(invalid_list));
|
||||
}
|
||||
@ -2327,9 +2313,8 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
|
||||
if (s->role.level != PT_PAGE_TABLE_LEVEL)
|
||||
return 1;
|
||||
|
||||
if (!need_unsync && !s->unsync) {
|
||||
if (!s->unsync)
|
||||
need_unsync = true;
|
||||
}
|
||||
}
|
||||
if (need_unsync)
|
||||
kvm_unsync_pages(vcpu, gfn);
|
||||
@ -3687,6 +3672,7 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
|
||||
else
|
||||
r = paging32_init_context(vcpu, context);
|
||||
|
||||
vcpu->arch.mmu.base_role.nxe = is_nx(vcpu);
|
||||
vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu);
|
||||
vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu);
|
||||
vcpu->arch.mmu.base_role.smep_andnot_wp
|
||||
@ -3853,7 +3839,7 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
|
||||
/* Handle a 32-bit guest writing two halves of a 64-bit gpte */
|
||||
*gpa &= ~(gpa_t)7;
|
||||
*bytes = 8;
|
||||
r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, min(*bytes, 8));
|
||||
r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, 8);
|
||||
if (r)
|
||||
gentry = 0;
|
||||
new = (const u8 *)&gentry;
|
||||
@ -4007,7 +3993,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
!((sp->role.word ^ vcpu->arch.mmu.base_role.word)
|
||||
& mask.word) && rmap_can_add(vcpu))
|
||||
mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
|
||||
if (!remote_flush && need_remote_flush(entry, *spte))
|
||||
if (need_remote_flush(entry, *spte))
|
||||
remote_flush = true;
|
||||
++spte;
|
||||
}
|
||||
|
@ -409,9 +409,6 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
|
||||
unsigned direct_access, access = gw->pt_access;
|
||||
int top_level, emulate = 0;
|
||||
|
||||
if (!is_present_gpte(gw->ptes[gw->level - 1]))
|
||||
return 0;
|
||||
|
||||
direct_access = gw->pte_access;
|
||||
|
||||
top_level = vcpu->arch.mmu.root_level;
|
||||
|
@ -3571,6 +3571,26 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
|
||||
set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
|
||||
}
|
||||
|
||||
static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
static int svm_vm_has_apicv(struct kvm *kvm)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
static void svm_hwapic_isr_update(struct kvm *kvm, int isr)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
@ -4290,6 +4310,10 @@ static struct kvm_x86_ops svm_x86_ops = {
|
||||
.enable_nmi_window = enable_nmi_window,
|
||||
.enable_irq_window = enable_irq_window,
|
||||
.update_cr8_intercept = update_cr8_intercept,
|
||||
.set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
|
||||
.vm_has_apicv = svm_vm_has_apicv,
|
||||
.load_eoi_exitmap = svm_load_eoi_exitmap,
|
||||
.hwapic_isr_update = svm_hwapic_isr_update,
|
||||
|
||||
.set_tss_addr = svm_set_tss_addr,
|
||||
.get_tdp_level = get_npt_level,
|
||||
|
@ -84,6 +84,9 @@ module_param(vmm_exclusive, bool, S_IRUGO);
|
||||
static bool __read_mostly fasteoi = 1;
|
||||
module_param(fasteoi, bool, S_IRUGO);
|
||||
|
||||
static bool __read_mostly enable_apicv_reg_vid = 1;
|
||||
module_param(enable_apicv_reg_vid, bool, S_IRUGO);
|
||||
|
||||
/*
|
||||
* If nested=1, nested virtualization is supported, i.e., guests may use
|
||||
* VMX and be a hypervisor for its own guests. If nested=0, guests may not
|
||||
@ -640,6 +643,8 @@ static unsigned long *vmx_io_bitmap_a;
|
||||
static unsigned long *vmx_io_bitmap_b;
|
||||
static unsigned long *vmx_msr_bitmap_legacy;
|
||||
static unsigned long *vmx_msr_bitmap_longmode;
|
||||
static unsigned long *vmx_msr_bitmap_legacy_x2apic;
|
||||
static unsigned long *vmx_msr_bitmap_longmode_x2apic;
|
||||
|
||||
static bool cpu_has_load_ia32_efer;
|
||||
static bool cpu_has_load_perf_global_ctrl;
|
||||
@ -764,6 +769,24 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
|
||||
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_virtualize_x2apic_mode(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_apic_register_virt(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_APIC_REGISTER_VIRT;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_virtual_intr_delivery(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_flexpriority(void)
|
||||
{
|
||||
return cpu_has_vmx_tpr_shadow() &&
|
||||
@ -1821,6 +1844,25 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
|
||||
vmx->guest_msrs[from] = tmp;
|
||||
}
|
||||
|
||||
static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long *msr_bitmap;
|
||||
|
||||
if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->arch.apic)) {
|
||||
if (is_long_mode(vcpu))
|
||||
msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
|
||||
else
|
||||
msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
|
||||
} else {
|
||||
if (is_long_mode(vcpu))
|
||||
msr_bitmap = vmx_msr_bitmap_longmode;
|
||||
else
|
||||
msr_bitmap = vmx_msr_bitmap_legacy;
|
||||
}
|
||||
|
||||
vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
|
||||
}
|
||||
|
||||
/*
|
||||
* Set up the vmcs to automatically save and restore system
|
||||
* msrs. Don't touch the 64-bit msrs if the guest is in legacy
|
||||
@ -1829,7 +1871,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
|
||||
static void setup_msrs(struct vcpu_vmx *vmx)
|
||||
{
|
||||
int save_nmsrs, index;
|
||||
unsigned long *msr_bitmap;
|
||||
|
||||
save_nmsrs = 0;
|
||||
#ifdef CONFIG_X86_64
|
||||
@ -1861,14 +1902,8 @@ static void setup_msrs(struct vcpu_vmx *vmx)
|
||||
|
||||
vmx->save_nmsrs = save_nmsrs;
|
||||
|
||||
if (cpu_has_vmx_msr_bitmap()) {
|
||||
if (is_long_mode(&vmx->vcpu))
|
||||
msr_bitmap = vmx_msr_bitmap_longmode;
|
||||
else
|
||||
msr_bitmap = vmx_msr_bitmap_legacy;
|
||||
|
||||
vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
|
||||
}
|
||||
if (cpu_has_vmx_msr_bitmap())
|
||||
vmx_set_msr_bitmap(&vmx->vcpu);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2534,13 +2569,16 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
|
||||
if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
|
||||
min2 = 0;
|
||||
opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
|
||||
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
|
||||
SECONDARY_EXEC_WBINVD_EXITING |
|
||||
SECONDARY_EXEC_ENABLE_VPID |
|
||||
SECONDARY_EXEC_ENABLE_EPT |
|
||||
SECONDARY_EXEC_UNRESTRICTED_GUEST |
|
||||
SECONDARY_EXEC_PAUSE_LOOP_EXITING |
|
||||
SECONDARY_EXEC_RDTSCP |
|
||||
SECONDARY_EXEC_ENABLE_INVPCID;
|
||||
SECONDARY_EXEC_ENABLE_INVPCID |
|
||||
SECONDARY_EXEC_APIC_REGISTER_VIRT |
|
||||
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
|
||||
if (adjust_vmx_controls(min2, opt2,
|
||||
MSR_IA32_VMX_PROCBASED_CTLS2,
|
||||
&_cpu_based_2nd_exec_control) < 0)
|
||||
@ -2551,6 +2589,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
|
||||
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
|
||||
_cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
|
||||
#endif
|
||||
|
||||
if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
|
||||
_cpu_based_2nd_exec_control &= ~(
|
||||
SECONDARY_EXEC_APIC_REGISTER_VIRT |
|
||||
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
|
||||
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
|
||||
|
||||
if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
|
||||
/* CR3 accesses and invlpg don't need to cause VM Exits when EPT
|
||||
enabled */
|
||||
@ -2748,6 +2793,15 @@ static __init int hardware_setup(void)
|
||||
if (!cpu_has_vmx_ple())
|
||||
ple_gap = 0;
|
||||
|
||||
if (!cpu_has_vmx_apic_register_virt() ||
|
||||
!cpu_has_vmx_virtual_intr_delivery())
|
||||
enable_apicv_reg_vid = 0;
|
||||
|
||||
if (enable_apicv_reg_vid)
|
||||
kvm_x86_ops->update_cr8_intercept = NULL;
|
||||
else
|
||||
kvm_x86_ops->hwapic_irr_update = NULL;
|
||||
|
||||
if (nested)
|
||||
nested_vmx_setup_ctls_msrs();
|
||||
|
||||
@ -3173,6 +3227,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
|
||||
if (!is_paging(vcpu)) {
|
||||
hw_cr4 &= ~X86_CR4_PAE;
|
||||
hw_cr4 |= X86_CR4_PSE;
|
||||
/*
|
||||
* SMEP is disabled if CPU is in non-paging mode in
|
||||
* hardware. However KVM always uses paging mode to
|
||||
* emulate guest non-paging mode with TDP.
|
||||
* To emulate this behavior, SMEP needs to be manually
|
||||
* disabled when guest switches to non-paging mode.
|
||||
*/
|
||||
hw_cr4 &= ~X86_CR4_SMEP;
|
||||
} else if (!(cr4 & X86_CR4_PAE)) {
|
||||
hw_cr4 &= ~X86_CR4_PAE;
|
||||
}
|
||||
@ -3707,7 +3769,10 @@ static void free_vpid(struct vcpu_vmx *vmx)
|
||||
spin_unlock(&vmx_vpid_lock);
|
||||
}
|
||||
|
||||
static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
|
||||
#define MSR_TYPE_R 1
|
||||
#define MSR_TYPE_W 2
|
||||
static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
|
||||
u32 msr, int type)
|
||||
{
|
||||
int f = sizeof(unsigned long);
|
||||
|
||||
@ -3720,20 +3785,93 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
|
||||
* We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
|
||||
*/
|
||||
if (msr <= 0x1fff) {
|
||||
__clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */
|
||||
__clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */
|
||||
if (type & MSR_TYPE_R)
|
||||
/* read-low */
|
||||
__clear_bit(msr, msr_bitmap + 0x000 / f);
|
||||
|
||||
if (type & MSR_TYPE_W)
|
||||
/* write-low */
|
||||
__clear_bit(msr, msr_bitmap + 0x800 / f);
|
||||
|
||||
} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
|
||||
msr &= 0x1fff;
|
||||
__clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */
|
||||
__clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */
|
||||
if (type & MSR_TYPE_R)
|
||||
/* read-high */
|
||||
__clear_bit(msr, msr_bitmap + 0x400 / f);
|
||||
|
||||
if (type & MSR_TYPE_W)
|
||||
/* write-high */
|
||||
__clear_bit(msr, msr_bitmap + 0xc00 / f);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
|
||||
u32 msr, int type)
|
||||
{
|
||||
int f = sizeof(unsigned long);
|
||||
|
||||
if (!cpu_has_vmx_msr_bitmap())
|
||||
return;
|
||||
|
||||
/*
|
||||
* See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
|
||||
* have the write-low and read-high bitmap offsets the wrong way round.
|
||||
* We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
|
||||
*/
|
||||
if (msr <= 0x1fff) {
|
||||
if (type & MSR_TYPE_R)
|
||||
/* read-low */
|
||||
__set_bit(msr, msr_bitmap + 0x000 / f);
|
||||
|
||||
if (type & MSR_TYPE_W)
|
||||
/* write-low */
|
||||
__set_bit(msr, msr_bitmap + 0x800 / f);
|
||||
|
||||
} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
|
||||
msr &= 0x1fff;
|
||||
if (type & MSR_TYPE_R)
|
||||
/* read-high */
|
||||
__set_bit(msr, msr_bitmap + 0x400 / f);
|
||||
|
||||
if (type & MSR_TYPE_W)
|
||||
/* write-high */
|
||||
__set_bit(msr, msr_bitmap + 0xc00 / f);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
|
||||
{
|
||||
if (!longmode_only)
|
||||
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr);
|
||||
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr);
|
||||
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
|
||||
msr, MSR_TYPE_R | MSR_TYPE_W);
|
||||
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
|
||||
msr, MSR_TYPE_R | MSR_TYPE_W);
|
||||
}
|
||||
|
||||
static void vmx_enable_intercept_msr_read_x2apic(u32 msr)
|
||||
{
|
||||
__vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
|
||||
msr, MSR_TYPE_R);
|
||||
__vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
|
||||
msr, MSR_TYPE_R);
|
||||
}
|
||||
|
||||
static void vmx_disable_intercept_msr_read_x2apic(u32 msr)
|
||||
{
|
||||
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
|
||||
msr, MSR_TYPE_R);
|
||||
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
|
||||
msr, MSR_TYPE_R);
|
||||
}
|
||||
|
||||
static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
|
||||
{
|
||||
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
|
||||
msr, MSR_TYPE_W);
|
||||
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
|
||||
msr, MSR_TYPE_W);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3812,6 +3950,11 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
|
||||
return exec_control;
|
||||
}
|
||||
|
||||
static int vmx_vm_has_apicv(struct kvm *kvm)
|
||||
{
|
||||
return enable_apicv_reg_vid && irqchip_in_kernel(kvm);
|
||||
}
|
||||
|
||||
static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
|
||||
{
|
||||
u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
|
||||
@ -3829,6 +3972,10 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
|
||||
exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
|
||||
if (!ple_gap)
|
||||
exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
|
||||
if (!vmx_vm_has_apicv(vmx->vcpu.kvm))
|
||||
exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
|
||||
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
|
||||
exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
|
||||
return exec_control;
|
||||
}
|
||||
|
||||
@ -3873,6 +4020,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
|
||||
vmx_secondary_exec_control(vmx));
|
||||
}
|
||||
|
||||
if (enable_apicv_reg_vid) {
|
||||
vmcs_write64(EOI_EXIT_BITMAP0, 0);
|
||||
vmcs_write64(EOI_EXIT_BITMAP1, 0);
|
||||
vmcs_write64(EOI_EXIT_BITMAP2, 0);
|
||||
vmcs_write64(EOI_EXIT_BITMAP3, 0);
|
||||
|
||||
vmcs_write16(GUEST_INTR_STATUS, 0);
|
||||
}
|
||||
|
||||
if (ple_gap) {
|
||||
vmcs_write32(PLE_GAP, ple_gap);
|
||||
vmcs_write32(PLE_WINDOW, ple_window);
|
||||
@ -4787,6 +4943,26 @@ static int handle_apic_access(struct kvm_vcpu *vcpu)
|
||||
return emulate_instruction(vcpu, 0) == EMULATE_DONE;
|
||||
}
|
||||
|
||||
static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
|
||||
int vector = exit_qualification & 0xff;
|
||||
|
||||
/* EOI-induced VM exit is trap-like and thus no need to adjust IP */
|
||||
kvm_apic_set_eoi_accelerated(vcpu, vector);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int handle_apic_write(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
|
||||
u32 offset = exit_qualification & 0xfff;
|
||||
|
||||
/* APIC-write VM exit is trap-like and thus no need to adjust IP */
|
||||
kvm_apic_write_nodecode(vcpu, offset);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int handle_task_switch(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
@ -5721,6 +5897,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
|
||||
[EXIT_REASON_VMON] = handle_vmon,
|
||||
[EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
|
||||
[EXIT_REASON_APIC_ACCESS] = handle_apic_access,
|
||||
[EXIT_REASON_APIC_WRITE] = handle_apic_write,
|
||||
[EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced,
|
||||
[EXIT_REASON_WBINVD] = handle_wbinvd,
|
||||
[EXIT_REASON_XSETBV] = handle_xsetbv,
|
||||
[EXIT_REASON_TASK_SWITCH] = handle_task_switch,
|
||||
@ -6070,6 +6248,85 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
|
||||
vmcs_write32(TPR_THRESHOLD, irr);
|
||||
}
|
||||
|
||||
static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
|
||||
{
|
||||
u32 sec_exec_control;
|
||||
|
||||
/*
|
||||
* There is not point to enable virtualize x2apic without enable
|
||||
* apicv
|
||||
*/
|
||||
if (!cpu_has_vmx_virtualize_x2apic_mode() ||
|
||||
!vmx_vm_has_apicv(vcpu->kvm))
|
||||
return;
|
||||
|
||||
if (!vm_need_tpr_shadow(vcpu->kvm))
|
||||
return;
|
||||
|
||||
sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
|
||||
|
||||
if (set) {
|
||||
sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
|
||||
sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
|
||||
} else {
|
||||
sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
|
||||
sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
|
||||
}
|
||||
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
|
||||
|
||||
vmx_set_msr_bitmap(vcpu);
|
||||
}
|
||||
|
||||
static void vmx_hwapic_isr_update(struct kvm *kvm, int isr)
|
||||
{
|
||||
u16 status;
|
||||
u8 old;
|
||||
|
||||
if (!vmx_vm_has_apicv(kvm))
|
||||
return;
|
||||
|
||||
if (isr == -1)
|
||||
isr = 0;
|
||||
|
||||
status = vmcs_read16(GUEST_INTR_STATUS);
|
||||
old = status >> 8;
|
||||
if (isr != old) {
|
||||
status &= 0xff;
|
||||
status |= isr << 8;
|
||||
vmcs_write16(GUEST_INTR_STATUS, status);
|
||||
}
|
||||
}
|
||||
|
||||
static void vmx_set_rvi(int vector)
|
||||
{
|
||||
u16 status;
|
||||
u8 old;
|
||||
|
||||
status = vmcs_read16(GUEST_INTR_STATUS);
|
||||
old = (u8)status & 0xff;
|
||||
if ((u8)vector != old) {
|
||||
status &= ~0xff;
|
||||
status |= (u8)vector;
|
||||
vmcs_write16(GUEST_INTR_STATUS, status);
|
||||
}
|
||||
}
|
||||
|
||||
static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
|
||||
{
|
||||
if (max_irr == -1)
|
||||
return;
|
||||
|
||||
vmx_set_rvi(max_irr);
|
||||
}
|
||||
|
||||
static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
|
||||
{
|
||||
vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
|
||||
vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
|
||||
vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
|
||||
vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
|
||||
}
|
||||
|
||||
static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
|
||||
{
|
||||
u32 exit_intr_info;
|
||||
@ -7333,6 +7590,11 @@ static struct kvm_x86_ops vmx_x86_ops = {
|
||||
.enable_nmi_window = enable_nmi_window,
|
||||
.enable_irq_window = enable_irq_window,
|
||||
.update_cr8_intercept = update_cr8_intercept,
|
||||
.set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
|
||||
.vm_has_apicv = vmx_vm_has_apicv,
|
||||
.load_eoi_exitmap = vmx_load_eoi_exitmap,
|
||||
.hwapic_irr_update = vmx_hwapic_irr_update,
|
||||
.hwapic_isr_update = vmx_hwapic_isr_update,
|
||||
|
||||
.set_tss_addr = vmx_set_tss_addr,
|
||||
.get_tdp_level = get_ept_level,
|
||||
@ -7365,7 +7627,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
|
||||
|
||||
static int __init vmx_init(void)
|
||||
{
|
||||
int r, i;
|
||||
int r, i, msr;
|
||||
|
||||
rdmsrl_safe(MSR_EFER, &host_efer);
|
||||
|
||||
@ -7386,11 +7648,19 @@ static int __init vmx_init(void)
|
||||
if (!vmx_msr_bitmap_legacy)
|
||||
goto out1;
|
||||
|
||||
vmx_msr_bitmap_legacy_x2apic =
|
||||
(unsigned long *)__get_free_page(GFP_KERNEL);
|
||||
if (!vmx_msr_bitmap_legacy_x2apic)
|
||||
goto out2;
|
||||
|
||||
vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
|
||||
if (!vmx_msr_bitmap_longmode)
|
||||
goto out2;
|
||||
goto out3;
|
||||
|
||||
vmx_msr_bitmap_longmode_x2apic =
|
||||
(unsigned long *)__get_free_page(GFP_KERNEL);
|
||||
if (!vmx_msr_bitmap_longmode_x2apic)
|
||||
goto out4;
|
||||
|
||||
/*
|
||||
* Allow direct access to the PC debug port (it is often used for I/O
|
||||
@ -7422,6 +7692,28 @@ static int __init vmx_init(void)
|
||||
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
|
||||
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
|
||||
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
|
||||
memcpy(vmx_msr_bitmap_legacy_x2apic,
|
||||
vmx_msr_bitmap_legacy, PAGE_SIZE);
|
||||
memcpy(vmx_msr_bitmap_longmode_x2apic,
|
||||
vmx_msr_bitmap_longmode, PAGE_SIZE);
|
||||
|
||||
if (enable_apicv_reg_vid) {
|
||||
for (msr = 0x800; msr <= 0x8ff; msr++)
|
||||
vmx_disable_intercept_msr_read_x2apic(msr);
|
||||
|
||||
/* According SDM, in x2apic mode, the whole id reg is used.
|
||||
* But in KVM, it only use the highest eight bits. Need to
|
||||
* intercept it */
|
||||
vmx_enable_intercept_msr_read_x2apic(0x802);
|
||||
/* TMCCT */
|
||||
vmx_enable_intercept_msr_read_x2apic(0x839);
|
||||
/* TPR */
|
||||
vmx_disable_intercept_msr_write_x2apic(0x808);
|
||||
/* EOI */
|
||||
vmx_disable_intercept_msr_write_x2apic(0x80b);
|
||||
/* SELF-IPI */
|
||||
vmx_disable_intercept_msr_write_x2apic(0x83f);
|
||||
}
|
||||
|
||||
if (enable_ept) {
|
||||
kvm_mmu_set_mask_ptes(0ull,
|
||||
@ -7435,8 +7727,10 @@ static int __init vmx_init(void)
|
||||
|
||||
return 0;
|
||||
|
||||
out3:
|
||||
out4:
|
||||
free_page((unsigned long)vmx_msr_bitmap_longmode);
|
||||
out3:
|
||||
free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
|
||||
out2:
|
||||
free_page((unsigned long)vmx_msr_bitmap_legacy);
|
||||
out1:
|
||||
@ -7448,6 +7742,8 @@ out:
|
||||
|
||||
static void __exit vmx_exit(void)
|
||||
{
|
||||
free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
|
||||
free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
|
||||
free_page((unsigned long)vmx_msr_bitmap_legacy);
|
||||
free_page((unsigned long)vmx_msr_bitmap_longmode);
|
||||
free_page((unsigned long)vmx_io_bitmap_b);
|
||||
|
@ -870,8 +870,6 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
|
||||
|
||||
kvm_x86_ops->set_efer(vcpu, efer);
|
||||
|
||||
vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
|
||||
|
||||
/* Update reserved bits */
|
||||
if ((efer ^ old_efer) & EFER_NX)
|
||||
kvm_mmu_reset_context(vcpu);
|
||||
@ -5565,7 +5563,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.nmi_injected = true;
|
||||
kvm_x86_ops->set_nmi(vcpu);
|
||||
}
|
||||
} else if (kvm_cpu_has_interrupt(vcpu)) {
|
||||
} else if (kvm_cpu_has_injectable_intr(vcpu)) {
|
||||
if (kvm_x86_ops->interrupt_allowed(vcpu)) {
|
||||
kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
|
||||
false);
|
||||
@ -5633,6 +5631,16 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
|
||||
#endif
|
||||
}
|
||||
|
||||
static void update_eoi_exitmap(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 eoi_exit_bitmap[4];
|
||||
|
||||
memset(eoi_exit_bitmap, 0, 32);
|
||||
|
||||
kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap);
|
||||
kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
|
||||
}
|
||||
|
||||
static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r;
|
||||
@ -5686,6 +5694,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
kvm_handle_pmu_event(vcpu);
|
||||
if (kvm_check_request(KVM_REQ_PMI, vcpu))
|
||||
kvm_deliver_pmi(vcpu);
|
||||
if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu))
|
||||
update_eoi_exitmap(vcpu);
|
||||
}
|
||||
|
||||
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
|
||||
@ -5694,10 +5704,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
/* enable NMI/IRQ window open exits if needed */
|
||||
if (vcpu->arch.nmi_pending)
|
||||
kvm_x86_ops->enable_nmi_window(vcpu);
|
||||
else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
|
||||
else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
|
||||
kvm_x86_ops->enable_irq_window(vcpu);
|
||||
|
||||
if (kvm_lapic_enabled(vcpu)) {
|
||||
/*
|
||||
* Update architecture specific hints for APIC
|
||||
* virtual interrupt delivery.
|
||||
*/
|
||||
if (kvm_x86_ops->hwapic_irr_update)
|
||||
kvm_x86_ops->hwapic_irr_update(vcpu,
|
||||
kvm_lapic_find_highest_irr(vcpu));
|
||||
update_cr8_intercept(vcpu);
|
||||
kvm_lapic_sync_to_vapic(vcpu);
|
||||
}
|
||||
|
@ -244,9 +244,9 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
|
||||
{
|
||||
struct virtio_ccw_device *vcdev = to_vc_device(vdev);
|
||||
int err;
|
||||
struct virtqueue *vq;
|
||||
struct virtqueue *vq = NULL;
|
||||
struct virtio_ccw_vq_info *info;
|
||||
unsigned long size;
|
||||
unsigned long size = 0; /* silence the compiler */
|
||||
unsigned long flags;
|
||||
|
||||
/* Allocate queue. */
|
||||
@ -279,11 +279,8 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
|
||||
/* For now, we fail if we can't get the requested size. */
|
||||
dev_warn(&vcdev->cdev->dev, "no vq\n");
|
||||
err = -ENOMEM;
|
||||
free_pages_exact(info->queue, size);
|
||||
goto out_err;
|
||||
}
|
||||
info->vq = vq;
|
||||
vq->priv = info;
|
||||
|
||||
/* Register it with the host. */
|
||||
info->info_block->queue = (__u64)info->queue;
|
||||
@ -297,12 +294,12 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
|
||||
err = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_VQ | i);
|
||||
if (err) {
|
||||
dev_warn(&vcdev->cdev->dev, "SET_VQ failed\n");
|
||||
free_pages_exact(info->queue, size);
|
||||
info->vq = NULL;
|
||||
vq->priv = NULL;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
info->vq = vq;
|
||||
vq->priv = info;
|
||||
|
||||
/* Save it to our list. */
|
||||
spin_lock_irqsave(&vcdev->lock, flags);
|
||||
list_add(&info->node, &vcdev->virtqueues);
|
||||
@ -311,8 +308,13 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
|
||||
return vq;
|
||||
|
||||
out_err:
|
||||
if (info)
|
||||
if (vq)
|
||||
vring_del_virtqueue(vq);
|
||||
if (info) {
|
||||
if (info->queue)
|
||||
free_pages_exact(info->queue, size);
|
||||
kfree(info->info_block);
|
||||
}
|
||||
kfree(info);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
@ -123,6 +123,7 @@ static inline bool is_error_page(struct page *page)
|
||||
#define KVM_REQ_MASTERCLOCK_UPDATE 19
|
||||
#define KVM_REQ_MCLOCK_INPROGRESS 20
|
||||
#define KVM_REQ_EPR_EXIT 21
|
||||
#define KVM_REQ_EOIBITMAP 22
|
||||
|
||||
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
|
||||
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
|
||||
@ -538,6 +539,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
|
||||
void kvm_flush_remote_tlbs(struct kvm *kvm);
|
||||
void kvm_reload_remote_mmus(struct kvm *kvm);
|
||||
void kvm_make_mclock_inprogress_request(struct kvm *kvm);
|
||||
void kvm_make_update_eoibitmap_request(struct kvm *kvm);
|
||||
|
||||
long kvm_arch_dev_ioctl(struct file *filp,
|
||||
unsigned int ioctl, unsigned long arg);
|
||||
@ -691,6 +693,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
|
||||
int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
|
||||
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
|
||||
int irq_source_id, int level);
|
||||
bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin);
|
||||
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
|
||||
void kvm_register_irq_ack_notifier(struct kvm *kvm,
|
||||
struct kvm_irq_ack_notifier *kian);
|
||||
|
@ -4316,7 +4316,10 @@ EXPORT_SYMBOL(yield);
|
||||
* It's the caller's job to ensure that the target task struct
|
||||
* can't go away on us before we can do any checks.
|
||||
*
|
||||
* Returns true if we indeed boosted the target task.
|
||||
* Returns:
|
||||
* true (>0) if we indeed boosted the target task.
|
||||
* false (0) if we failed to boost the target.
|
||||
* -ESRCH if there's no task to yield to.
|
||||
*/
|
||||
bool __sched yield_to(struct task_struct *p, bool preempt)
|
||||
{
|
||||
@ -4330,6 +4333,15 @@ bool __sched yield_to(struct task_struct *p, bool preempt)
|
||||
|
||||
again:
|
||||
p_rq = task_rq(p);
|
||||
/*
|
||||
* If we're the only runnable task on the rq and target rq also
|
||||
* has only one task, there's absolutely no point in yielding.
|
||||
*/
|
||||
if (rq->nr_running == 1 && p_rq->nr_running == 1) {
|
||||
yielded = -ESRCH;
|
||||
goto out_irq;
|
||||
}
|
||||
|
||||
double_rq_lock(rq, p_rq);
|
||||
while (task_rq(p) != p_rq) {
|
||||
double_rq_unlock(rq, p_rq);
|
||||
@ -4337,13 +4349,13 @@ again:
|
||||
}
|
||||
|
||||
if (!curr->sched_class->yield_to_task)
|
||||
goto out;
|
||||
goto out_unlock;
|
||||
|
||||
if (curr->sched_class != p->sched_class)
|
||||
goto out;
|
||||
goto out_unlock;
|
||||
|
||||
if (task_running(p_rq, p) || p->state)
|
||||
goto out;
|
||||
goto out_unlock;
|
||||
|
||||
yielded = curr->sched_class->yield_to_task(rq, p, preempt);
|
||||
if (yielded) {
|
||||
@ -4356,11 +4368,12 @@ again:
|
||||
resched_task(p_rq->curr);
|
||||
}
|
||||
|
||||
out:
|
||||
out_unlock:
|
||||
double_rq_unlock(rq, p_rq);
|
||||
out_irq:
|
||||
local_irq_restore(flags);
|
||||
|
||||
if (yielded)
|
||||
if (yielded > 0)
|
||||
schedule();
|
||||
|
||||
return yielded;
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include <linux/hrtimer.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/export.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/current.h>
|
||||
@ -115,6 +116,42 @@ static void update_handled_vectors(struct kvm_ioapic *ioapic)
|
||||
smp_wmb();
|
||||
}
|
||||
|
||||
void kvm_ioapic_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
|
||||
u64 *eoi_exit_bitmap)
|
||||
{
|
||||
struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
|
||||
union kvm_ioapic_redirect_entry *e;
|
||||
struct kvm_lapic_irq irqe;
|
||||
int index;
|
||||
|
||||
spin_lock(&ioapic->lock);
|
||||
/* traverse ioapic entry to set eoi exit bitmap*/
|
||||
for (index = 0; index < IOAPIC_NUM_PINS; index++) {
|
||||
e = &ioapic->redirtbl[index];
|
||||
if (!e->fields.mask &&
|
||||
(e->fields.trig_mode == IOAPIC_LEVEL_TRIG ||
|
||||
kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC,
|
||||
index))) {
|
||||
irqe.dest_id = e->fields.dest_id;
|
||||
irqe.vector = e->fields.vector;
|
||||
irqe.dest_mode = e->fields.dest_mode;
|
||||
irqe.delivery_mode = e->fields.delivery_mode << 8;
|
||||
kvm_calculate_eoi_exitmap(vcpu, &irqe, eoi_exit_bitmap);
|
||||
}
|
||||
}
|
||||
spin_unlock(&ioapic->lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_ioapic_calculate_eoi_exitmap);
|
||||
|
||||
void kvm_ioapic_make_eoibitmap_request(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
|
||||
|
||||
if (!kvm_apic_vid_enabled(kvm) || !ioapic)
|
||||
return;
|
||||
kvm_make_update_eoibitmap_request(kvm);
|
||||
}
|
||||
|
||||
static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
|
||||
{
|
||||
unsigned index;
|
||||
@ -156,6 +193,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
|
||||
if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
|
||||
&& ioapic->irr & (1 << index))
|
||||
ioapic_service(ioapic, index);
|
||||
kvm_ioapic_make_eoibitmap_request(ioapic->kvm);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -455,6 +493,7 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
|
||||
spin_lock(&ioapic->lock);
|
||||
memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
|
||||
update_handled_vectors(ioapic);
|
||||
kvm_ioapic_make_eoibitmap_request(kvm);
|
||||
spin_unlock(&ioapic->lock);
|
||||
return 0;
|
||||
}
|
||||
|
@ -82,5 +82,9 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
|
||||
struct kvm_lapic_irq *irq);
|
||||
int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
|
||||
int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
|
||||
void kvm_ioapic_make_eoibitmap_request(struct kvm *kvm);
|
||||
void kvm_ioapic_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
|
||||
u64 *eoi_exit_bitmap);
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -76,7 +76,9 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
|
||||
gfn = slot->base_gfn;
|
||||
end_gfn = gfn + slot->npages;
|
||||
|
||||
flags = IOMMU_READ | IOMMU_WRITE;
|
||||
flags = IOMMU_READ;
|
||||
if (!(slot->flags & KVM_MEM_READONLY))
|
||||
flags |= IOMMU_WRITE;
|
||||
if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)
|
||||
flags |= IOMMU_CACHE;
|
||||
|
||||
|
@ -22,6 +22,7 @@
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/export.h>
|
||||
#include <trace/events/kvm.h>
|
||||
|
||||
#include <asm/msidef.h>
|
||||
@ -237,6 +238,28 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
|
||||
{
|
||||
struct kvm_irq_ack_notifier *kian;
|
||||
struct hlist_node *n;
|
||||
int gsi;
|
||||
|
||||
rcu_read_lock();
|
||||
gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
|
||||
if (gsi != -1)
|
||||
hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list,
|
||||
link)
|
||||
if (kian->gsi == gsi) {
|
||||
rcu_read_unlock();
|
||||
return true;
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
return false;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
|
||||
|
||||
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
|
||||
{
|
||||
struct kvm_irq_ack_notifier *kian;
|
||||
@ -261,6 +284,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
kvm_ioapic_make_eoibitmap_request(kvm);
|
||||
}
|
||||
|
||||
void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
|
||||
@ -270,6 +294,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
|
||||
hlist_del_init_rcu(&kian->link);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
synchronize_rcu();
|
||||
kvm_ioapic_make_eoibitmap_request(kvm);
|
||||
}
|
||||
|
||||
int kvm_request_irq_source_id(struct kvm *kvm)
|
||||
|
@ -217,6 +217,11 @@ void kvm_make_mclock_inprogress_request(struct kvm *kvm)
|
||||
make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
|
||||
}
|
||||
|
||||
void kvm_make_update_eoibitmap_request(struct kvm *kvm)
|
||||
{
|
||||
make_all_cpus_request(kvm, KVM_REQ_EOIBITMAP);
|
||||
}
|
||||
|
||||
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
|
||||
{
|
||||
struct page *page;
|
||||
@ -713,6 +718,24 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
|
||||
return old_memslots;
|
||||
}
|
||||
|
||||
/*
|
||||
* KVM_SET_USER_MEMORY_REGION ioctl allows the following operations:
|
||||
* - create a new memory slot
|
||||
* - delete an existing memory slot
|
||||
* - modify an existing memory slot
|
||||
* -- move it in the guest physical memory space
|
||||
* -- just change its flags
|
||||
*
|
||||
* Since flags can be changed by some of these operations, the following
|
||||
* differentiation is the best we can do for __kvm_set_memory_region():
|
||||
*/
|
||||
enum kvm_mr_change {
|
||||
KVM_MR_CREATE,
|
||||
KVM_MR_DELETE,
|
||||
KVM_MR_MOVE,
|
||||
KVM_MR_FLAGS_ONLY,
|
||||
};
|
||||
|
||||
/*
|
||||
* Allocate some memory and give it an address in the guest physical address
|
||||
* space.
|
||||
@ -731,6 +754,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot;
|
||||
struct kvm_memory_slot old, new;
|
||||
struct kvm_memslots *slots = NULL, *old_memslots;
|
||||
enum kvm_mr_change change;
|
||||
|
||||
r = check_memory_region_flags(mem);
|
||||
if (r)
|
||||
@ -772,17 +796,31 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
new.npages = npages;
|
||||
new.flags = mem->flags;
|
||||
|
||||
/*
|
||||
* Disallow changing a memory slot's size or changing anything about
|
||||
* zero sized slots that doesn't involve making them non-zero.
|
||||
*/
|
||||
r = -EINVAL;
|
||||
if (npages && old.npages && npages != old.npages)
|
||||
goto out;
|
||||
if (!npages && !old.npages)
|
||||
if (npages) {
|
||||
if (!old.npages)
|
||||
change = KVM_MR_CREATE;
|
||||
else { /* Modify an existing slot. */
|
||||
if ((mem->userspace_addr != old.userspace_addr) ||
|
||||
(npages != old.npages) ||
|
||||
((new.flags ^ old.flags) & KVM_MEM_READONLY))
|
||||
goto out;
|
||||
|
||||
if (base_gfn != old.base_gfn)
|
||||
change = KVM_MR_MOVE;
|
||||
else if (new.flags != old.flags)
|
||||
change = KVM_MR_FLAGS_ONLY;
|
||||
else { /* Nothing to change. */
|
||||
r = 0;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
} else if (old.npages) {
|
||||
change = KVM_MR_DELETE;
|
||||
} else /* Modify a non-existent slot: disallowed. */
|
||||
goto out;
|
||||
|
||||
if ((npages && !old.npages) || (base_gfn != old.base_gfn)) {
|
||||
if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
|
||||
/* Check for overlaps */
|
||||
r = -EEXIST;
|
||||
kvm_for_each_memslot(slot, kvm->memslots) {
|
||||
@ -800,20 +838,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
new.dirty_bitmap = NULL;
|
||||
|
||||
r = -ENOMEM;
|
||||
|
||||
/*
|
||||
* Allocate if a slot is being created. If modifying a slot,
|
||||
* the userspace_addr cannot change.
|
||||
*/
|
||||
if (!old.npages) {
|
||||
if (change == KVM_MR_CREATE) {
|
||||
new.user_alloc = user_alloc;
|
||||
new.userspace_addr = mem->userspace_addr;
|
||||
|
||||
if (kvm_arch_create_memslot(&new, npages))
|
||||
goto out_free;
|
||||
} else if (npages && mem->userspace_addr != old.userspace_addr) {
|
||||
r = -EINVAL;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
/* Allocate page dirty bitmap if needed */
|
||||
@ -822,7 +852,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
if (!npages || base_gfn != old.base_gfn) {
|
||||
if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
|
||||
r = -ENOMEM;
|
||||
slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
|
||||
GFP_KERNEL);
|
||||
@ -863,15 +893,23 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
/* map new memory slot into the iommu */
|
||||
if (npages) {
|
||||
/*
|
||||
* IOMMU mapping: New slots need to be mapped. Old slots need to be
|
||||
* un-mapped and re-mapped if their base changes. Since base change
|
||||
* unmapping is handled above with slot deletion, mapping alone is
|
||||
* needed here. Anything else the iommu might care about for existing
|
||||
* slots (size changes, userspace addr changes and read-only flag
|
||||
* changes) is disallowed above, so any other attribute changes getting
|
||||
* here can be skipped.
|
||||
*/
|
||||
if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
|
||||
r = kvm_iommu_map_pages(kvm, &new);
|
||||
if (r)
|
||||
goto out_slots;
|
||||
}
|
||||
|
||||
/* actual memory is freed via old in kvm_free_physmem_slot below */
|
||||
if (!npages) {
|
||||
if (change == KVM_MR_DELETE) {
|
||||
new.dirty_bitmap = NULL;
|
||||
memset(&new.arch, 0, sizeof(new.arch));
|
||||
}
|
||||
@ -1669,6 +1707,7 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
|
||||
{
|
||||
struct pid *pid;
|
||||
struct task_struct *task = NULL;
|
||||
bool ret = false;
|
||||
|
||||
rcu_read_lock();
|
||||
pid = rcu_dereference(target->pid);
|
||||
@ -1676,17 +1715,15 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
|
||||
task = get_pid_task(target->pid, PIDTYPE_PID);
|
||||
rcu_read_unlock();
|
||||
if (!task)
|
||||
return false;
|
||||
return ret;
|
||||
if (task->flags & PF_VCPU) {
|
||||
put_task_struct(task);
|
||||
return false;
|
||||
}
|
||||
if (yield_to(task, 1)) {
|
||||
put_task_struct(task);
|
||||
return true;
|
||||
return ret;
|
||||
}
|
||||
ret = yield_to(task, 1);
|
||||
put_task_struct(task);
|
||||
return false;
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
|
||||
|
||||
@ -1727,12 +1764,14 @@ bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
|
||||
return eligible;
|
||||
}
|
||||
#endif
|
||||
|
||||
void kvm_vcpu_on_spin(struct kvm_vcpu *me)
|
||||
{
|
||||
struct kvm *kvm = me->kvm;
|
||||
struct kvm_vcpu *vcpu;
|
||||
int last_boosted_vcpu = me->kvm->last_boosted_vcpu;
|
||||
int yielded = 0;
|
||||
int try = 3;
|
||||
int pass;
|
||||
int i;
|
||||
|
||||
@ -1744,7 +1783,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
|
||||
* VCPU is holding the lock that we need and will release it.
|
||||
* We approximate round-robin by starting at the last boosted VCPU.
|
||||
*/
|
||||
for (pass = 0; pass < 2 && !yielded; pass++) {
|
||||
for (pass = 0; pass < 2 && !yielded && try; pass++) {
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
if (!pass && i <= last_boosted_vcpu) {
|
||||
i = last_boosted_vcpu;
|
||||
@ -1757,10 +1796,15 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
|
||||
continue;
|
||||
if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
|
||||
continue;
|
||||
if (kvm_vcpu_yield_to(vcpu)) {
|
||||
|
||||
yielded = kvm_vcpu_yield_to(vcpu);
|
||||
if (yielded > 0) {
|
||||
kvm->last_boosted_vcpu = i;
|
||||
yielded = 1;
|
||||
break;
|
||||
} else if (yielded < 0) {
|
||||
try--;
|
||||
if (!try)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user