Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "PPC: - Better machine check handling for HV KVM - Ability to support guests with threads=2, 4 or 8 on POWER9 - Fix for a race that could cause delayed recognition of signals - Fix for a bug where POWER9 guests could sleep with interrupts pending. ARM: - VCPU request overhaul - allow timer and PMU to have their interrupt number selected from userspace - workaround for Cavium erratum 30115 - handling of memory poisonning - the usual crop of fixes and cleanups s390: - initial machine check forwarding - migration support for the CMMA page hinting information - cleanups and fixes x86: - nested VMX bugfixes and improvements - more reliable NMI window detection on AMD - APIC timer optimizations Generic: - VCPU request overhaul + documentation of common code patterns - kvm_stat improvements" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (124 commits) Update my email address kvm: vmx: allow host to access guest MSR_IA32_BNDCFGS x86: kvm: mmu: use ept a/d in vmcs02 iff used in vmcs12 kvm: x86: mmu: allow A/D bits to be disabled in an mmu x86: kvm: mmu: make spte mmio mask more explicit x86: kvm: mmu: dead code thanks to access tracking KVM: PPC: Book3S: Fix typo in XICS-on-XIVE state saving code KVM: PPC: Book3S HV: Close race with testing for signals on guest entry KVM: PPC: Book3S HV: Simplify dynamic micro-threading code KVM: x86: remove ignored type attribute KVM: LAPIC: Fix lapic timer injection delay KVM: lapic: reorganize restart_apic_timer KVM: lapic: reorganize start_hv_timer kvm: nVMX: Check memory operand to INVVPID KVM: s390: Inject machine check into the nested guest KVM: s390: Inject machine check into the guest tools/kvm_stat: add new interactive command 'b' tools/kvm_stat: add new command line switch '-i' tools/kvm_stat: fix error on interactive command 'g' KVM: SVM: suppress unnecessary NMI singlestep on GIF=0 and nested exit ...
This commit is contained in:
@@ -30,6 +30,7 @@
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/lowcore.h>
|
||||
@@ -386,6 +387,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_S390_SKEYS:
|
||||
case KVM_CAP_S390_IRQ_STATE:
|
||||
case KVM_CAP_S390_USER_INSTR0:
|
||||
case KVM_CAP_S390_CMMA_MIGRATION:
|
||||
case KVM_CAP_S390_AIS:
|
||||
r = 1;
|
||||
break;
|
||||
@@ -749,6 +751,129 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
|
||||
{
|
||||
int cx;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
kvm_for_each_vcpu(cx, vcpu, kvm)
|
||||
kvm_s390_sync_request(req, vcpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* Must be called with kvm->srcu held to avoid races on memslots, and with
|
||||
* kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
|
||||
*/
|
||||
static int kvm_s390_vm_start_migration(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_s390_migration_state *mgs;
|
||||
struct kvm_memory_slot *ms;
|
||||
/* should be the only one */
|
||||
struct kvm_memslots *slots;
|
||||
unsigned long ram_pages;
|
||||
int slotnr;
|
||||
|
||||
/* migration mode already enabled */
|
||||
if (kvm->arch.migration_state)
|
||||
return 0;
|
||||
|
||||
slots = kvm_memslots(kvm);
|
||||
if (!slots || !slots->used_slots)
|
||||
return -EINVAL;
|
||||
|
||||
mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
|
||||
if (!mgs)
|
||||
return -ENOMEM;
|
||||
kvm->arch.migration_state = mgs;
|
||||
|
||||
if (kvm->arch.use_cmma) {
|
||||
/*
|
||||
* Get the last slot. They should be sorted by base_gfn, so the
|
||||
* last slot is also the one at the end of the address space.
|
||||
* We have verified above that at least one slot is present.
|
||||
*/
|
||||
ms = slots->memslots + slots->used_slots - 1;
|
||||
/* round up so we only use full longs */
|
||||
ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
|
||||
/* allocate enough bytes to store all the bits */
|
||||
mgs->pgste_bitmap = vmalloc(ram_pages / 8);
|
||||
if (!mgs->pgste_bitmap) {
|
||||
kfree(mgs);
|
||||
kvm->arch.migration_state = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
mgs->bitmap_size = ram_pages;
|
||||
atomic64_set(&mgs->dirty_pages, ram_pages);
|
||||
/* mark all the pages in active slots as dirty */
|
||||
for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
|
||||
ms = slots->memslots + slotnr;
|
||||
bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
|
||||
}
|
||||
|
||||
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Must be called with kvm->lock to avoid races with ourselves and
|
||||
* kvm_s390_vm_start_migration.
|
||||
*/
|
||||
static int kvm_s390_vm_stop_migration(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_s390_migration_state *mgs;
|
||||
|
||||
/* migration mode already disabled */
|
||||
if (!kvm->arch.migration_state)
|
||||
return 0;
|
||||
mgs = kvm->arch.migration_state;
|
||||
kvm->arch.migration_state = NULL;
|
||||
|
||||
if (kvm->arch.use_cmma) {
|
||||
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
|
||||
vfree(mgs->pgste_bitmap);
|
||||
}
|
||||
kfree(mgs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_s390_vm_set_migration(struct kvm *kvm,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
int idx, res = -ENXIO;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
switch (attr->attr) {
|
||||
case KVM_S390_VM_MIGRATION_START:
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
res = kvm_s390_vm_start_migration(kvm);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
break;
|
||||
case KVM_S390_VM_MIGRATION_STOP:
|
||||
res = kvm_s390_vm_stop_migration(kvm);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static int kvm_s390_vm_get_migration(struct kvm *kvm,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
u64 mig = (kvm->arch.migration_state != NULL);
|
||||
|
||||
if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
|
||||
return -ENXIO;
|
||||
|
||||
if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
u8 gtod_high;
|
||||
@@ -1089,6 +1214,9 @@ static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
case KVM_S390_VM_CRYPTO:
|
||||
ret = kvm_s390_vm_set_crypto(kvm, attr);
|
||||
break;
|
||||
case KVM_S390_VM_MIGRATION:
|
||||
ret = kvm_s390_vm_set_migration(kvm, attr);
|
||||
break;
|
||||
default:
|
||||
ret = -ENXIO;
|
||||
break;
|
||||
@@ -1111,6 +1239,9 @@ static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
case KVM_S390_VM_CPU_MODEL:
|
||||
ret = kvm_s390_get_cpu_model(kvm, attr);
|
||||
break;
|
||||
case KVM_S390_VM_MIGRATION:
|
||||
ret = kvm_s390_vm_get_migration(kvm, attr);
|
||||
break;
|
||||
default:
|
||||
ret = -ENXIO;
|
||||
break;
|
||||
@@ -1178,6 +1309,9 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case KVM_S390_VM_MIGRATION:
|
||||
ret = 0;
|
||||
break;
|
||||
default:
|
||||
ret = -ENXIO;
|
||||
break;
|
||||
@@ -1285,6 +1419,182 @@ out:
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* Base address and length must be sent at the start of each block, therefore
|
||||
* it's cheaper to send some clean data, as long as it's less than the size of
|
||||
* two longs.
|
||||
*/
|
||||
#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
|
||||
/* for consistency */
|
||||
#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
|
||||
|
||||
/*
|
||||
* This function searches for the next page with dirty CMMA attributes, and
|
||||
* saves the attributes in the buffer up to either the end of the buffer or
|
||||
* until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
|
||||
* no trailing clean bytes are saved.
|
||||
* In case no dirty bits were found, or if CMMA was not enabled or used, the
|
||||
* output buffer will indicate 0 as length.
|
||||
*/
|
||||
static int kvm_s390_get_cmma_bits(struct kvm *kvm,
|
||||
struct kvm_s390_cmma_log *args)
|
||||
{
|
||||
struct kvm_s390_migration_state *s = kvm->arch.migration_state;
|
||||
unsigned long bufsize, hva, pgstev, i, next, cur;
|
||||
int srcu_idx, peek, r = 0, rr;
|
||||
u8 *res;
|
||||
|
||||
cur = args->start_gfn;
|
||||
i = next = pgstev = 0;
|
||||
|
||||
if (unlikely(!kvm->arch.use_cmma))
|
||||
return -ENXIO;
|
||||
/* Invalid/unsupported flags were specified */
|
||||
if (args->flags & ~KVM_S390_CMMA_PEEK)
|
||||
return -EINVAL;
|
||||
/* Migration mode query, and we are not doing a migration */
|
||||
peek = !!(args->flags & KVM_S390_CMMA_PEEK);
|
||||
if (!peek && !s)
|
||||
return -EINVAL;
|
||||
/* CMMA is disabled or was not used, or the buffer has length zero */
|
||||
bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
|
||||
if (!bufsize || !kvm->mm->context.use_cmma) {
|
||||
memset(args, 0, sizeof(*args));
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!peek) {
|
||||
/* We are not peeking, and there are no dirty pages */
|
||||
if (!atomic64_read(&s->dirty_pages)) {
|
||||
memset(args, 0, sizeof(*args));
|
||||
return 0;
|
||||
}
|
||||
cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
|
||||
args->start_gfn);
|
||||
if (cur >= s->bitmap_size) /* nothing found, loop back */
|
||||
cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
|
||||
if (cur >= s->bitmap_size) { /* again! (very unlikely) */
|
||||
memset(args, 0, sizeof(*args));
|
||||
return 0;
|
||||
}
|
||||
next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
|
||||
}
|
||||
|
||||
res = vmalloc(bufsize);
|
||||
if (!res)
|
||||
return -ENOMEM;
|
||||
|
||||
args->start_gfn = cur;
|
||||
|
||||
down_read(&kvm->mm->mmap_sem);
|
||||
srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
while (i < bufsize) {
|
||||
hva = gfn_to_hva(kvm, cur);
|
||||
if (kvm_is_error_hva(hva)) {
|
||||
r = -EFAULT;
|
||||
break;
|
||||
}
|
||||
/* decrement only if we actually flipped the bit to 0 */
|
||||
if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
|
||||
atomic64_dec(&s->dirty_pages);
|
||||
r = get_pgste(kvm->mm, hva, &pgstev);
|
||||
if (r < 0)
|
||||
pgstev = 0;
|
||||
/* save the value */
|
||||
res[i++] = (pgstev >> 24) & 0x3;
|
||||
/*
|
||||
* if the next bit is too far away, stop.
|
||||
* if we reached the previous "next", find the next one
|
||||
*/
|
||||
if (!peek) {
|
||||
if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
|
||||
break;
|
||||
if (cur == next)
|
||||
next = find_next_bit(s->pgste_bitmap,
|
||||
s->bitmap_size, cur + 1);
|
||||
/* reached the end of the bitmap or of the buffer, stop */
|
||||
if ((next >= s->bitmap_size) ||
|
||||
(next >= args->start_gfn + bufsize))
|
||||
break;
|
||||
}
|
||||
cur++;
|
||||
}
|
||||
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
||||
up_read(&kvm->mm->mmap_sem);
|
||||
args->count = i;
|
||||
args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
|
||||
|
||||
rr = copy_to_user((void __user *)args->values, res, args->count);
|
||||
if (rr)
|
||||
r = -EFAULT;
|
||||
|
||||
vfree(res);
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function sets the CMMA attributes for the given pages. If the input
|
||||
* buffer has zero length, no action is taken, otherwise the attributes are
|
||||
* set and the mm->context.use_cmma flag is set.
|
||||
*/
|
||||
static int kvm_s390_set_cmma_bits(struct kvm *kvm,
|
||||
const struct kvm_s390_cmma_log *args)
|
||||
{
|
||||
unsigned long hva, mask, pgstev, i;
|
||||
uint8_t *bits;
|
||||
int srcu_idx, r = 0;
|
||||
|
||||
mask = args->mask;
|
||||
|
||||
if (!kvm->arch.use_cmma)
|
||||
return -ENXIO;
|
||||
/* invalid/unsupported flags */
|
||||
if (args->flags != 0)
|
||||
return -EINVAL;
|
||||
/* Enforce sane limit on memory allocation */
|
||||
if (args->count > KVM_S390_CMMA_SIZE_MAX)
|
||||
return -EINVAL;
|
||||
/* Nothing to do */
|
||||
if (args->count == 0)
|
||||
return 0;
|
||||
|
||||
bits = vmalloc(sizeof(*bits) * args->count);
|
||||
if (!bits)
|
||||
return -ENOMEM;
|
||||
|
||||
r = copy_from_user(bits, (void __user *)args->values, args->count);
|
||||
if (r) {
|
||||
r = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
down_read(&kvm->mm->mmap_sem);
|
||||
srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
for (i = 0; i < args->count; i++) {
|
||||
hva = gfn_to_hva(kvm, args->start_gfn + i);
|
||||
if (kvm_is_error_hva(hva)) {
|
||||
r = -EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
pgstev = bits[i];
|
||||
pgstev = pgstev << 24;
|
||||
mask &= _PGSTE_GPS_USAGE_MASK;
|
||||
set_pgste_bits(kvm->mm, hva, mask, pgstev);
|
||||
}
|
||||
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
||||
up_read(&kvm->mm->mmap_sem);
|
||||
|
||||
if (!kvm->mm->context.use_cmma) {
|
||||
down_write(&kvm->mm->mmap_sem);
|
||||
kvm->mm->context.use_cmma = 1;
|
||||
up_write(&kvm->mm->mmap_sem);
|
||||
}
|
||||
out:
|
||||
vfree(bits);
|
||||
return r;
|
||||
}
|
||||
|
||||
long kvm_arch_vm_ioctl(struct file *filp,
|
||||
unsigned int ioctl, unsigned long arg)
|
||||
{
|
||||
@@ -1363,6 +1673,29 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
r = kvm_s390_set_skeys(kvm, &args);
|
||||
break;
|
||||
}
|
||||
case KVM_S390_GET_CMMA_BITS: {
|
||||
struct kvm_s390_cmma_log args;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&args, argp, sizeof(args)))
|
||||
break;
|
||||
r = kvm_s390_get_cmma_bits(kvm, &args);
|
||||
if (!r) {
|
||||
r = copy_to_user(argp, &args, sizeof(args));
|
||||
if (r)
|
||||
r = -EFAULT;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case KVM_S390_SET_CMMA_BITS: {
|
||||
struct kvm_s390_cmma_log args;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&args, argp, sizeof(args)))
|
||||
break;
|
||||
r = kvm_s390_set_cmma_bits(kvm, &args);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
r = -ENOTTY;
|
||||
}
|
||||
@@ -1631,6 +1964,10 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
|
||||
kvm_s390_destroy_adapters(kvm);
|
||||
kvm_s390_clear_float_irqs(kvm);
|
||||
kvm_s390_vsie_destroy(kvm);
|
||||
if (kvm->arch.migration_state) {
|
||||
vfree(kvm->arch.migration_state->pgste_bitmap);
|
||||
kfree(kvm->arch.migration_state);
|
||||
}
|
||||
KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
|
||||
}
|
||||
|
||||
@@ -1975,7 +2312,6 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
|
||||
if (!vcpu->arch.sie_block->cbrlo)
|
||||
return -ENOMEM;
|
||||
|
||||
vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
|
||||
vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
|
||||
return 0;
|
||||
}
|
||||
@@ -2439,7 +2775,7 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
retry:
|
||||
kvm_s390_vcpu_request_handled(vcpu);
|
||||
if (!vcpu->requests)
|
||||
if (!kvm_request_pending(vcpu))
|
||||
return 0;
|
||||
/*
|
||||
* We use MMU_RELOAD just to re-arm the ipte notifier for the
|
||||
@@ -2488,6 +2824,27 @@ retry:
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
|
||||
/*
|
||||
* Disable CMMA virtualization; we will emulate the ESSA
|
||||
* instruction manually, in order to provide additional
|
||||
* functionalities needed for live migration.
|
||||
*/
|
||||
vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
|
||||
/*
|
||||
* Re-enable CMMA virtualization if CMMA is available and
|
||||
* was used.
|
||||
*/
|
||||
if ((vcpu->kvm->arch.use_cmma) &&
|
||||
(vcpu->kvm->mm->context.use_cmma))
|
||||
vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* nothing to do, just clear the request */
|
||||
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
|
||||
|
||||
@@ -2682,6 +3039,9 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
|
||||
|
||||
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
|
||||
{
|
||||
struct mcck_volatile_info *mcck_info;
|
||||
struct sie_page *sie_page;
|
||||
|
||||
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
|
||||
vcpu->arch.sie_block->icptcode);
|
||||
trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
|
||||
@@ -2692,6 +3052,15 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
|
||||
vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
|
||||
vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
|
||||
|
||||
if (exit_reason == -EINTR) {
|
||||
VCPU_EVENT(vcpu, 3, "%s", "machine check");
|
||||
sie_page = container_of(vcpu->arch.sie_block,
|
||||
struct sie_page, sie_block);
|
||||
mcck_info = &sie_page->mcck_info;
|
||||
kvm_s390_reinject_machine_check(vcpu, mcck_info);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (vcpu->arch.sie_block->icptcode > 0) {
|
||||
int rc = kvm_handle_sie_intercept(vcpu);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user