2020-03-24 09:41:54 +00:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
|
|
|
/*
|
|
|
|
* Kernel-based Virtual Machine driver for Linux
|
|
|
|
*
|
|
|
|
* AMD SVM-SEV support
|
|
|
|
*
|
|
|
|
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/kvm_types.h>
|
|
|
|
#include <linux/kvm_host.h>
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/highmem.h>
|
|
|
|
#include <linux/psp-sev.h>
|
2020-04-11 16:09:27 +00:00
|
|
|
#include <linux/pagemap.h>
|
2020-03-24 09:41:54 +00:00
|
|
|
#include <linux/swap.h>
|
2021-03-30 04:42:06 +00:00
|
|
|
#include <linux/misc_cgroup.h>
|
2020-12-10 17:09:40 +00:00
|
|
|
#include <linux/processor.h>
|
2020-12-10 17:09:48 +00:00
|
|
|
#include <linux/trace_events.h>
|
2020-03-24 09:41:54 +00:00
|
|
|
|
2021-06-23 12:02:05 +00:00
|
|
|
#include <asm/pkru.h>
|
2020-12-15 17:44:07 +00:00
|
|
|
#include <asm/trapnr.h>
|
2021-10-15 01:16:31 +00:00
|
|
|
#include <asm/fpu/xcr.h>
|
2020-12-15 17:44:07 +00:00
|
|
|
|
2022-08-03 22:49:57 +00:00
|
|
|
#include "mmu.h"
|
2020-03-24 09:41:54 +00:00
|
|
|
#include "x86.h"
|
|
|
|
#include "svm.h"
|
2020-12-31 00:27:00 +00:00
|
|
|
#include "svm_ops.h"
|
2020-12-10 17:09:47 +00:00
|
|
|
#include "cpuid.h"
|
2020-12-10 17:09:48 +00:00
|
|
|
#include "trace.h"
|
2020-03-24 09:41:54 +00:00
|
|
|
|
2021-03-30 04:42:06 +00:00
|
|
|
#ifndef CONFIG_KVM_AMD_SEV
|
|
|
|
/*
|
|
|
|
* When this config is not defined, SEV feature is not supported and APIs in
|
|
|
|
* this file are not used but this file still gets compiled into the KVM AMD
|
|
|
|
* module.
|
|
|
|
*
|
|
|
|
* We will not have MISC_CG_RES_SEV and MISC_CG_RES_SEV_ES entries in the enum
|
|
|
|
* misc_res_type {} defined in linux/misc_cgroup.h.
|
|
|
|
*
|
|
|
|
* Below macros allow compilation to succeed.
|
|
|
|
*/
|
|
|
|
#define MISC_CG_RES_SEV MISC_CG_RES_TYPES
|
|
|
|
#define MISC_CG_RES_SEV_ES MISC_CG_RES_TYPES
|
|
|
|
#endif
|
|
|
|
|
2021-04-22 02:11:18 +00:00
|
|
|
#ifdef CONFIG_KVM_AMD_SEV
|
2021-04-22 02:11:14 +00:00
|
|
|
/* enable/disable SEV support */
|
2021-04-22 02:11:19 +00:00
|
|
|
static bool sev_enabled = true;
|
2021-04-22 02:11:17 +00:00
|
|
|
module_param_named(sev, sev_enabled, bool, 0444);
|
2021-04-22 02:11:14 +00:00
|
|
|
|
|
|
|
/* enable/disable SEV-ES support */
|
2021-04-22 02:11:19 +00:00
|
|
|
static bool sev_es_enabled = true;
|
2021-04-22 02:11:17 +00:00
|
|
|
module_param_named(sev_es, sev_es_enabled, bool, 0444);
|
2021-04-22 02:11:18 +00:00
|
|
|
#else
|
|
|
|
#define sev_enabled false
|
|
|
|
#define sev_es_enabled false
|
|
|
|
#endif /* CONFIG_KVM_AMD_SEV */
|
2021-04-22 02:11:14 +00:00
|
|
|
|
2020-12-10 17:09:49 +00:00
|
|
|
static u8 sev_enc_bit;
|
2020-03-24 09:41:54 +00:00
|
|
|
static DECLARE_RWSEM(sev_deactivate_lock);
|
|
|
|
static DEFINE_MUTEX(sev_bitmap_lock);
|
|
|
|
unsigned int max_sev_asid;
|
|
|
|
static unsigned int min_sev_asid;
|
2021-04-15 15:53:55 +00:00
|
|
|
static unsigned long sev_me_mask;
|
2021-08-02 18:09:03 +00:00
|
|
|
static unsigned int nr_asids;
|
2020-03-24 09:41:54 +00:00
|
|
|
static unsigned long *sev_asid_bitmap;
|
|
|
|
static unsigned long *sev_reclaim_asid_bitmap;
|
|
|
|
|
|
|
|
struct enc_region {
|
|
|
|
struct list_head list;
|
|
|
|
unsigned long npages;
|
|
|
|
struct page **pages;
|
|
|
|
unsigned long uaddr;
|
|
|
|
unsigned long size;
|
|
|
|
};
|
|
|
|
|
2021-04-22 02:11:25 +00:00
|
|
|
/* Called with the sev_bitmap_lock held, or on shutdown */
|
|
|
|
static int sev_flush_asids(int min_asid, int max_asid)
|
2020-03-24 09:41:54 +00:00
|
|
|
{
|
2021-08-02 18:09:03 +00:00
|
|
|
int ret, asid, error = 0;
|
2021-04-22 02:11:25 +00:00
|
|
|
|
|
|
|
/* Check if there are any ASIDs to reclaim before performing a flush */
|
2021-08-02 18:09:03 +00:00
|
|
|
asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid);
|
|
|
|
if (asid > max_asid)
|
2021-04-22 02:11:25 +00:00
|
|
|
return -EBUSY;
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* DEACTIVATE will clear the WBINVD indicator causing DF_FLUSH to fail,
|
|
|
|
* so it must be guarded.
|
|
|
|
*/
|
|
|
|
down_write(&sev_deactivate_lock);
|
|
|
|
|
|
|
|
wbinvd_on_all_cpus();
|
|
|
|
ret = sev_guest_df_flush(&error);
|
|
|
|
|
|
|
|
up_write(&sev_deactivate_lock);
|
|
|
|
|
|
|
|
if (ret)
|
|
|
|
pr_err("SEV: DF_FLUSH failed, ret=%d, error=%#x\n", ret, error);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2021-04-08 22:32:14 +00:00
|
|
|
static inline bool is_mirroring_enc_context(struct kvm *kvm)
|
|
|
|
{
|
|
|
|
return !!to_kvm_svm(kvm)->sev_info.enc_context_owner;
|
|
|
|
}
|
|
|
|
|
2020-03-24 09:41:54 +00:00
|
|
|
/* Must be called with the sev_bitmap_lock held */
|
2020-12-10 17:10:05 +00:00
|
|
|
static bool __sev_recycle_asids(int min_asid, int max_asid)
|
2020-03-24 09:41:54 +00:00
|
|
|
{
|
2021-04-22 02:11:25 +00:00
|
|
|
if (sev_flush_asids(min_asid, max_asid))
|
2020-03-24 09:41:54 +00:00
|
|
|
return false;
|
|
|
|
|
2020-12-10 17:10:05 +00:00
|
|
|
/* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */
|
2020-03-24 09:41:54 +00:00
|
|
|
bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap,
|
2021-08-02 18:09:03 +00:00
|
|
|
nr_asids);
|
|
|
|
bitmap_zero(sev_reclaim_asid_bitmap, nr_asids);
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-11-11 15:02:26 +00:00
|
|
|
static int sev_misc_cg_try_charge(struct kvm_sev_info *sev)
|
|
|
|
{
|
|
|
|
enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
|
|
|
|
return misc_cg_try_charge(type, sev->misc_cg, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sev_misc_cg_uncharge(struct kvm_sev_info *sev)
|
|
|
|
{
|
|
|
|
enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
|
|
|
|
misc_cg_uncharge(type, sev->misc_cg, 1);
|
|
|
|
}
|
|
|
|
|
2020-12-10 17:10:05 +00:00
|
|
|
static int sev_asid_new(struct kvm_sev_info *sev)
|
2020-03-24 09:41:54 +00:00
|
|
|
{
|
2021-08-02 18:09:03 +00:00
|
|
|
int asid, min_asid, max_asid, ret;
|
2020-03-24 09:41:54 +00:00
|
|
|
bool retry = true;
|
2021-03-30 04:42:06 +00:00
|
|
|
|
|
|
|
WARN_ON(sev->misc_cg);
|
|
|
|
sev->misc_cg = get_current_misc_cg();
|
2021-11-11 15:02:26 +00:00
|
|
|
ret = sev_misc_cg_try_charge(sev);
|
2021-03-30 04:42:06 +00:00
|
|
|
if (ret) {
|
|
|
|
put_misc_cg(sev->misc_cg);
|
|
|
|
sev->misc_cg = NULL;
|
|
|
|
return ret;
|
|
|
|
}
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
mutex_lock(&sev_bitmap_lock);
|
|
|
|
|
|
|
|
/*
|
2020-12-10 17:10:05 +00:00
|
|
|
* SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
|
|
|
|
* SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
|
2020-03-24 09:41:54 +00:00
|
|
|
*/
|
2021-08-02 18:09:03 +00:00
|
|
|
min_asid = sev->es_active ? 1 : min_sev_asid;
|
2020-12-10 17:10:05 +00:00
|
|
|
max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
|
2020-03-24 09:41:54 +00:00
|
|
|
again:
|
2021-08-02 18:09:03 +00:00
|
|
|
asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid);
|
|
|
|
if (asid > max_asid) {
|
2020-12-10 17:10:05 +00:00
|
|
|
if (retry && __sev_recycle_asids(min_asid, max_asid)) {
|
2020-03-24 09:41:54 +00:00
|
|
|
retry = false;
|
|
|
|
goto again;
|
|
|
|
}
|
|
|
|
mutex_unlock(&sev_bitmap_lock);
|
2021-03-30 04:42:06 +00:00
|
|
|
ret = -EBUSY;
|
|
|
|
goto e_uncharge;
|
2020-03-24 09:41:54 +00:00
|
|
|
}
|
|
|
|
|
2021-08-02 18:09:03 +00:00
|
|
|
__set_bit(asid, sev_asid_bitmap);
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
mutex_unlock(&sev_bitmap_lock);
|
|
|
|
|
2021-08-02 18:09:03 +00:00
|
|
|
return asid;
|
2021-03-30 04:42:06 +00:00
|
|
|
e_uncharge:
|
2021-11-11 15:02:26 +00:00
|
|
|
sev_misc_cg_uncharge(sev);
|
2021-03-30 04:42:06 +00:00
|
|
|
put_misc_cg(sev->misc_cg);
|
|
|
|
sev->misc_cg = NULL;
|
|
|
|
return ret;
|
2020-03-24 09:41:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int sev_get_asid(struct kvm *kvm)
|
|
|
|
{
|
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
|
|
|
|
|
|
|
return sev->asid;
|
|
|
|
}
|
|
|
|
|
2021-03-30 04:42:06 +00:00
|
|
|
static void sev_asid_free(struct kvm_sev_info *sev)
|
2020-03-24 09:41:54 +00:00
|
|
|
{
|
|
|
|
struct svm_cpu_data *sd;
|
2021-08-02 18:09:03 +00:00
|
|
|
int cpu;
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
mutex_lock(&sev_bitmap_lock);
|
|
|
|
|
2021-08-02 18:09:03 +00:00
|
|
|
__set_bit(sev->asid, sev_reclaim_asid_bitmap);
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
for_each_possible_cpu(cpu) {
|
2022-11-09 14:07:55 +00:00
|
|
|
sd = per_cpu_ptr(&svm_data, cpu);
|
KVM: SVM: Fix off-by-one indexing when nullifying last used SEV VMCB
Use the raw ASID, not ASID-1, when nullifying the last used VMCB when
freeing an SEV ASID. The consumer, pre_sev_run(), indexes the array by
the raw ASID, thus KVM could get a false negative when checking for a
different VMCB if KVM manages to reallocate the same ASID+VMCB combo for
a new VM.
Note, this cannot cause a functional issue _in the current code_, as
pre_sev_run() also checks which pCPU last did VMRUN for the vCPU, and
last_vmentry_cpu is initialized to -1 during vCPU creation, i.e. is
guaranteed to mismatch on the first VMRUN. However, prior to commit
8a14fe4f0c54 ("kvm: x86: Move last_cpu into kvm_vcpu_arch as
last_vmentry_cpu"), SVM tracked pCPU on its own and zero-initialized the
last_cpu variable. Thus it's theoretically possible that older versions
of KVM could miss a TLB flush if the first VMRUN is on pCPU0 and the ASID
and VMCB exactly match those of a prior VM.
Fixes: 70cd94e60c73 ("KVM: SVM: VMRUN should use associated ASID when SEV is enabled")
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Brijesh Singh <brijesh.singh@amd.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2021-08-03 16:27:46 +00:00
|
|
|
sd->sev_vmcbs[sev->asid] = NULL;
|
2020-03-24 09:41:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
mutex_unlock(&sev_bitmap_lock);
|
2021-03-30 04:42:06 +00:00
|
|
|
|
2021-11-11 15:02:26 +00:00
|
|
|
sev_misc_cg_uncharge(sev);
|
2021-03-30 04:42:06 +00:00
|
|
|
put_misc_cg(sev->misc_cg);
|
|
|
|
sev->misc_cg = NULL;
|
2020-03-24 09:41:54 +00:00
|
|
|
}
|
|
|
|
|
2021-06-10 17:46:04 +00:00
|
|
|
static void sev_decommission(unsigned int handle)
|
2020-03-24 09:41:54 +00:00
|
|
|
{
|
2021-04-06 22:49:52 +00:00
|
|
|
struct sev_data_decommission decommission;
|
2021-06-10 17:46:04 +00:00
|
|
|
|
|
|
|
if (!handle)
|
|
|
|
return;
|
|
|
|
|
|
|
|
decommission.handle = handle;
|
|
|
|
sev_guest_decommission(&decommission, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
|
|
|
|
{
|
2021-04-06 22:49:52 +00:00
|
|
|
struct sev_data_deactivate deactivate;
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
if (!handle)
|
|
|
|
return;
|
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
deactivate.handle = handle;
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
/* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */
|
|
|
|
down_read(&sev_deactivate_lock);
|
2021-04-06 22:49:52 +00:00
|
|
|
sev_guest_deactivate(&deactivate, NULL);
|
2020-03-24 09:41:54 +00:00
|
|
|
up_read(&sev_deactivate_lock);
|
|
|
|
|
2021-06-10 17:46:04 +00:00
|
|
|
sev_decommission(handle);
|
2020-03-24 09:41:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
|
|
|
{
|
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
|
|
|
int asid, ret;
|
|
|
|
|
2021-03-31 03:19:36 +00:00
|
|
|
if (kvm->created_vcpus)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2020-03-24 09:41:54 +00:00
|
|
|
ret = -EBUSY;
|
|
|
|
if (unlikely(sev->active))
|
|
|
|
return ret;
|
|
|
|
|
2021-11-09 21:50:58 +00:00
|
|
|
sev->active = true;
|
|
|
|
sev->es_active = argp->id == KVM_SEV_ES_INIT;
|
2020-12-10 17:10:05 +00:00
|
|
|
asid = sev_asid_new(sev);
|
2020-03-24 09:41:54 +00:00
|
|
|
if (asid < 0)
|
2021-04-22 06:39:48 +00:00
|
|
|
goto e_no_asid;
|
2021-03-30 04:42:06 +00:00
|
|
|
sev->asid = asid;
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
ret = sev_platform_init(&argp->error);
|
|
|
|
if (ret)
|
|
|
|
goto e_free;
|
|
|
|
|
|
|
|
INIT_LIST_HEAD(&sev->regions_list);
|
2022-02-11 19:36:34 +00:00
|
|
|
INIT_LIST_HEAD(&sev->mirror_vms);
|
2020-03-24 09:41:54 +00:00
|
|
|
|
2022-04-08 13:37:10 +00:00
|
|
|
kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_SEV);
|
|
|
|
|
2020-03-24 09:41:54 +00:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
e_free:
|
2021-03-30 04:42:06 +00:00
|
|
|
sev_asid_free(sev);
|
|
|
|
sev->asid = 0;
|
2021-04-22 06:39:48 +00:00
|
|
|
e_no_asid:
|
|
|
|
sev->es_active = false;
|
2021-11-09 21:50:58 +00:00
|
|
|
sev->active = false;
|
2020-03-24 09:41:54 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
|
|
|
|
{
|
2021-04-06 22:49:52 +00:00
|
|
|
struct sev_data_activate activate;
|
2020-03-24 09:41:54 +00:00
|
|
|
int asid = sev_get_asid(kvm);
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* activate ASID on the given handle */
|
2021-04-06 22:49:52 +00:00
|
|
|
activate.handle = handle;
|
|
|
|
activate.asid = asid;
|
|
|
|
ret = sev_guest_activate(&activate, error);
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __sev_issue_cmd(int fd, int id, void *data, int *error)
|
|
|
|
{
|
|
|
|
struct fd f;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
f = fdget(fd);
|
|
|
|
if (!f.file)
|
|
|
|
return -EBADF;
|
|
|
|
|
|
|
|
ret = sev_issue_cmd_external_user(f.file, id, data, error);
|
|
|
|
|
|
|
|
fdput(f);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
|
|
|
|
{
|
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
|
|
|
|
|
|
|
return __sev_issue_cmd(sev->fd, id, data, error);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
|
|
|
{
|
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
2021-04-06 22:49:52 +00:00
|
|
|
struct sev_data_launch_start start;
|
2020-03-24 09:41:54 +00:00
|
|
|
struct kvm_sev_launch_start params;
|
|
|
|
void *dh_blob, *session_blob;
|
|
|
|
int *error = &argp->error;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!sev_guest(kvm))
|
|
|
|
return -ENOTTY;
|
|
|
|
|
|
|
|
if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
memset(&start, 0, sizeof(start));
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
dh_blob = NULL;
|
|
|
|
if (params.dh_uaddr) {
|
|
|
|
dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len);
|
2021-04-06 22:49:52 +00:00
|
|
|
if (IS_ERR(dh_blob))
|
|
|
|
return PTR_ERR(dh_blob);
|
2020-03-24 09:41:54 +00:00
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
start.dh_cert_address = __sme_set(__pa(dh_blob));
|
|
|
|
start.dh_cert_len = params.dh_len;
|
2020-03-24 09:41:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
session_blob = NULL;
|
|
|
|
if (params.session_uaddr) {
|
|
|
|
session_blob = psp_copy_user_blob(params.session_uaddr, params.session_len);
|
|
|
|
if (IS_ERR(session_blob)) {
|
|
|
|
ret = PTR_ERR(session_blob);
|
|
|
|
goto e_free_dh;
|
|
|
|
}
|
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
start.session_address = __sme_set(__pa(session_blob));
|
|
|
|
start.session_len = params.session_len;
|
2020-03-24 09:41:54 +00:00
|
|
|
}
|
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
start.handle = params.handle;
|
|
|
|
start.policy = params.policy;
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
/* create memory encryption context */
|
2021-04-06 22:49:52 +00:00
|
|
|
ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, &start, error);
|
2020-03-24 09:41:54 +00:00
|
|
|
if (ret)
|
|
|
|
goto e_free_session;
|
|
|
|
|
|
|
|
/* Bind ASID to this guest */
|
2021-04-06 22:49:52 +00:00
|
|
|
ret = sev_bind_asid(kvm, start.handle, error);
|
2021-06-10 17:46:04 +00:00
|
|
|
if (ret) {
|
|
|
|
sev_decommission(start.handle);
|
2020-03-24 09:41:54 +00:00
|
|
|
goto e_free_session;
|
2021-06-10 17:46:04 +00:00
|
|
|
}
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
/* return handle to userspace */
|
2021-04-06 22:49:52 +00:00
|
|
|
params.handle = start.handle;
|
2020-03-24 09:41:54 +00:00
|
|
|
if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) {
|
2021-04-06 22:49:52 +00:00
|
|
|
sev_unbind_asid(kvm, start.handle);
|
2020-03-24 09:41:54 +00:00
|
|
|
ret = -EFAULT;
|
|
|
|
goto e_free_session;
|
|
|
|
}
|
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
sev->handle = start.handle;
|
2020-03-24 09:41:54 +00:00
|
|
|
sev->fd = argp->sev_fd;
|
|
|
|
|
|
|
|
e_free_session:
|
|
|
|
kfree(session_blob);
|
|
|
|
e_free_dh:
|
|
|
|
kfree(dh_blob);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
|
|
|
|
unsigned long ulen, unsigned long *n,
|
|
|
|
int write)
|
|
|
|
{
|
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
2020-05-26 06:22:06 +00:00
|
|
|
unsigned long npages, size;
|
|
|
|
int npinned;
|
2020-03-24 09:41:54 +00:00
|
|
|
unsigned long locked, lock_limit;
|
|
|
|
struct page **pages;
|
|
|
|
unsigned long first, last;
|
2020-07-14 14:23:51 +00:00
|
|
|
int ret;
|
2020-03-24 09:41:54 +00:00
|
|
|
|
2021-01-27 16:15:24 +00:00
|
|
|
lockdep_assert_held(&kvm->lock);
|
|
|
|
|
2020-03-24 09:41:54 +00:00
|
|
|
if (ulen == 0 || uaddr + ulen < uaddr)
|
2020-06-23 09:12:24 +00:00
|
|
|
return ERR_PTR(-EINVAL);
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
/* Calculate number of pages. */
|
|
|
|
first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
|
|
|
|
last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT;
|
|
|
|
npages = (last - first + 1);
|
|
|
|
|
|
|
|
locked = sev->pages_locked + npages;
|
|
|
|
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
|
|
|
|
if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
|
|
|
|
pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit);
|
2020-06-23 09:12:24 +00:00
|
|
|
return ERR_PTR(-ENOMEM);
|
2020-03-24 09:41:54 +00:00
|
|
|
}
|
|
|
|
|
2020-05-26 06:22:06 +00:00
|
|
|
if (WARN_ON_ONCE(npages > INT_MAX))
|
2020-06-23 09:12:24 +00:00
|
|
|
return ERR_PTR(-EINVAL);
|
2020-05-26 06:22:06 +00:00
|
|
|
|
2020-03-24 09:41:54 +00:00
|
|
|
/* Avoid using vmalloc for smaller buffers. */
|
|
|
|
size = npages * sizeof(struct page *);
|
|
|
|
if (size > PAGE_SIZE)
|
2020-06-02 04:51:40 +00:00
|
|
|
pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
|
2020-03-24 09:41:54 +00:00
|
|
|
else
|
|
|
|
pages = kmalloc(size, GFP_KERNEL_ACCOUNT);
|
|
|
|
|
|
|
|
if (!pages)
|
2020-06-23 09:12:24 +00:00
|
|
|
return ERR_PTR(-ENOMEM);
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
/* Pin the user virtual address. */
|
2020-05-26 06:22:07 +00:00
|
|
|
npinned = pin_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
|
2020-03-24 09:41:54 +00:00
|
|
|
if (npinned != npages) {
|
|
|
|
pr_err("SEV: Failure locking %lu pages.\n", npages);
|
2020-07-14 14:23:51 +00:00
|
|
|
ret = -ENOMEM;
|
2020-03-24 09:41:54 +00:00
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
*n = npages;
|
|
|
|
sev->pages_locked = locked;
|
|
|
|
|
|
|
|
return pages;
|
|
|
|
|
|
|
|
err:
|
2020-07-14 14:23:51 +00:00
|
|
|
if (npinned > 0)
|
2020-05-26 06:22:07 +00:00
|
|
|
unpin_user_pages(pages, npinned);
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
kvfree(pages);
|
2020-07-14 14:23:51 +00:00
|
|
|
return ERR_PTR(ret);
|
2020-03-24 09:41:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
|
|
|
|
unsigned long npages)
|
|
|
|
{
|
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
|
|
|
|
2020-05-26 06:22:07 +00:00
|
|
|
unpin_user_pages(pages, npages);
|
2020-03-24 09:41:54 +00:00
|
|
|
kvfree(pages);
|
|
|
|
sev->pages_locked -= npages;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sev_clflush_pages(struct page *pages[], unsigned long npages)
|
|
|
|
{
|
|
|
|
uint8_t *page_virtual;
|
|
|
|
unsigned long i;
|
|
|
|
|
2020-09-17 21:20:38 +00:00
|
|
|
if (this_cpu_has(X86_FEATURE_SME_COHERENT) || npages == 0 ||
|
|
|
|
pages == NULL)
|
2020-03-24 09:41:54 +00:00
|
|
|
return;
|
|
|
|
|
|
|
|
for (i = 0; i < npages; i++) {
|
2022-09-28 09:27:48 +00:00
|
|
|
page_virtual = kmap_local_page(pages[i]);
|
2020-03-24 09:41:54 +00:00
|
|
|
clflush_cache_range(page_virtual, PAGE_SIZE);
|
2022-09-28 09:27:48 +00:00
|
|
|
kunmap_local(page_virtual);
|
2022-03-30 16:43:06 +00:00
|
|
|
cond_resched();
|
2020-03-24 09:41:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned long get_num_contig_pages(unsigned long idx,
|
|
|
|
struct page **inpages, unsigned long npages)
|
|
|
|
{
|
|
|
|
unsigned long paddr, next_paddr;
|
|
|
|
unsigned long i = idx + 1, pages = 1;
|
|
|
|
|
|
|
|
/* find the number of contiguous pages starting from idx */
|
|
|
|
paddr = __sme_page_pa(inpages[idx]);
|
|
|
|
while (i < npages) {
|
|
|
|
next_paddr = __sme_page_pa(inpages[i++]);
|
|
|
|
if ((paddr + PAGE_SIZE) == next_paddr) {
|
|
|
|
pages++;
|
|
|
|
paddr = next_paddr;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return pages;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
|
|
|
{
|
|
|
|
unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i;
|
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
|
|
|
struct kvm_sev_launch_update_data params;
|
2021-04-06 22:49:52 +00:00
|
|
|
struct sev_data_launch_update_data data;
|
2020-03-24 09:41:54 +00:00
|
|
|
struct page **inpages;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!sev_guest(kvm))
|
|
|
|
return -ENOTTY;
|
|
|
|
|
|
|
|
if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
vaddr = params.uaddr;
|
|
|
|
size = params.len;
|
|
|
|
vaddr_end = vaddr + size;
|
|
|
|
|
|
|
|
/* Lock the user memory. */
|
|
|
|
inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
|
2021-04-06 22:49:52 +00:00
|
|
|
if (IS_ERR(inpages))
|
|
|
|
return PTR_ERR(inpages);
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
/*
|
2020-09-23 17:01:33 +00:00
|
|
|
* Flush (on non-coherent CPUs) before LAUNCH_UPDATE encrypts pages in
|
|
|
|
* place; the cache may contain the data that was written unencrypted.
|
2020-03-24 09:41:54 +00:00
|
|
|
*/
|
|
|
|
sev_clflush_pages(inpages, npages);
|
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
data.reserved = 0;
|
|
|
|
data.handle = sev->handle;
|
|
|
|
|
2020-03-24 09:41:54 +00:00
|
|
|
for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) {
|
|
|
|
int offset, len;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the user buffer is not page-aligned, calculate the offset
|
|
|
|
* within the page.
|
|
|
|
*/
|
|
|
|
offset = vaddr & (PAGE_SIZE - 1);
|
|
|
|
|
|
|
|
/* Calculate the number of pages that can be encrypted in one go. */
|
|
|
|
pages = get_num_contig_pages(i, inpages, npages);
|
|
|
|
|
|
|
|
len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size);
|
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
data.len = len;
|
|
|
|
data.address = __sme_page_pa(inpages[i]) + offset;
|
|
|
|
ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, &data, &argp->error);
|
2020-03-24 09:41:54 +00:00
|
|
|
if (ret)
|
|
|
|
goto e_unpin;
|
|
|
|
|
|
|
|
size -= len;
|
|
|
|
next_vaddr = vaddr + len;
|
|
|
|
}
|
|
|
|
|
|
|
|
e_unpin:
|
|
|
|
/* content of memory is updated, mark pages dirty */
|
|
|
|
for (i = 0; i < npages; i++) {
|
|
|
|
set_page_dirty_lock(inpages[i]);
|
|
|
|
mark_page_accessed(inpages[i]);
|
|
|
|
}
|
|
|
|
/* unlock the user pages */
|
|
|
|
sev_unpin_memory(kvm, inpages, npages);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2020-12-10 17:10:09 +00:00
|
|
|
static int sev_es_sync_vmsa(struct vcpu_svm *svm)
|
|
|
|
{
|
2022-04-05 18:27:43 +00:00
|
|
|
struct sev_es_save_area *save = svm->sev_es.vmsa;
|
2020-12-10 17:10:09 +00:00
|
|
|
|
|
|
|
/* Check some debug related fields before encrypting the VMSA */
|
2022-04-05 18:27:43 +00:00
|
|
|
if (svm->vcpu.guest_debug || (svm->vmcb->save.dr7 & ~DR7_FIXED_1))
|
2020-12-10 17:10:09 +00:00
|
|
|
return -EINVAL;
|
|
|
|
|
2022-04-05 18:27:43 +00:00
|
|
|
/*
|
|
|
|
* SEV-ES will use a VMSA that is pointed to by the VMCB, not
|
|
|
|
* the traditional VMSA that is part of the VMCB. Copy the
|
|
|
|
* traditional VMSA as it has been built so far (in prep
|
|
|
|
* for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state.
|
|
|
|
*/
|
|
|
|
memcpy(save, &svm->vmcb->save, sizeof(svm->vmcb->save));
|
|
|
|
|
2020-12-10 17:10:09 +00:00
|
|
|
/* Sync registgers */
|
|
|
|
save->rax = svm->vcpu.arch.regs[VCPU_REGS_RAX];
|
|
|
|
save->rbx = svm->vcpu.arch.regs[VCPU_REGS_RBX];
|
|
|
|
save->rcx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
|
|
|
|
save->rdx = svm->vcpu.arch.regs[VCPU_REGS_RDX];
|
|
|
|
save->rsp = svm->vcpu.arch.regs[VCPU_REGS_RSP];
|
|
|
|
save->rbp = svm->vcpu.arch.regs[VCPU_REGS_RBP];
|
|
|
|
save->rsi = svm->vcpu.arch.regs[VCPU_REGS_RSI];
|
|
|
|
save->rdi = svm->vcpu.arch.regs[VCPU_REGS_RDI];
|
2020-12-16 18:08:21 +00:00
|
|
|
#ifdef CONFIG_X86_64
|
2020-12-10 17:10:09 +00:00
|
|
|
save->r8 = svm->vcpu.arch.regs[VCPU_REGS_R8];
|
|
|
|
save->r9 = svm->vcpu.arch.regs[VCPU_REGS_R9];
|
|
|
|
save->r10 = svm->vcpu.arch.regs[VCPU_REGS_R10];
|
|
|
|
save->r11 = svm->vcpu.arch.regs[VCPU_REGS_R11];
|
|
|
|
save->r12 = svm->vcpu.arch.regs[VCPU_REGS_R12];
|
|
|
|
save->r13 = svm->vcpu.arch.regs[VCPU_REGS_R13];
|
|
|
|
save->r14 = svm->vcpu.arch.regs[VCPU_REGS_R14];
|
|
|
|
save->r15 = svm->vcpu.arch.regs[VCPU_REGS_R15];
|
2020-12-16 18:08:21 +00:00
|
|
|
#endif
|
2020-12-10 17:10:09 +00:00
|
|
|
save->rip = svm->vcpu.arch.regs[VCPU_REGS_RIP];
|
|
|
|
|
|
|
|
/* Sync some non-GPR registers before encrypting */
|
|
|
|
save->xcr0 = svm->vcpu.arch.xcr0;
|
|
|
|
save->pkru = svm->vcpu.arch.pkru;
|
|
|
|
save->xss = svm->vcpu.arch.ia32_xss;
|
2021-07-13 16:33:10 +00:00
|
|
|
save->dr6 = svm->vcpu.arch.dr6;
|
2020-12-10 17:10:09 +00:00
|
|
|
|
2022-07-28 05:09:19 +00:00
|
|
|
pr_debug("Virtual Machine Save Area (VMSA):\n");
|
2022-11-04 14:22:20 +00:00
|
|
|
print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false);
|
2022-07-28 05:09:19 +00:00
|
|
|
|
2020-12-10 17:10:09 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-09-15 17:17:55 +00:00
|
|
|
static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
|
|
|
|
int *error)
|
2020-12-10 17:10:09 +00:00
|
|
|
{
|
2021-04-06 22:49:52 +00:00
|
|
|
struct sev_data_launch_update_vmsa vmsa;
|
2021-09-15 17:17:55 +00:00
|
|
|
struct vcpu_svm *svm = to_svm(vcpu);
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* Perform some pre-encryption checks against the VMSA */
|
|
|
|
ret = sev_es_sync_vmsa(svm);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The LAUNCH_UPDATE_VMSA command will perform in-place encryption of
|
|
|
|
* the VMSA memory content (i.e it will write the same memory region
|
|
|
|
* with the guest's key), so invalidate it first.
|
|
|
|
*/
|
2021-10-21 17:42:59 +00:00
|
|
|
clflush_cache_range(svm->sev_es.vmsa, PAGE_SIZE);
|
2021-09-15 17:17:55 +00:00
|
|
|
|
|
|
|
vmsa.reserved = 0;
|
|
|
|
vmsa.handle = to_kvm_svm(kvm)->sev_info.handle;
|
2021-10-21 17:42:59 +00:00
|
|
|
vmsa.address = __sme_pa(svm->sev_es.vmsa);
|
2021-09-15 17:17:55 +00:00
|
|
|
vmsa.len = PAGE_SIZE;
|
2021-10-15 17:32:22 +00:00
|
|
|
ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
vcpu->arch.guest_state_protected = true;
|
|
|
|
return 0;
|
2021-09-15 17:17:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
|
|
|
{
|
2021-03-31 03:19:34 +00:00
|
|
|
struct kvm_vcpu *vcpu;
|
2021-11-16 16:04:02 +00:00
|
|
|
unsigned long i;
|
|
|
|
int ret;
|
2020-12-10 17:10:09 +00:00
|
|
|
|
|
|
|
if (!sev_es_guest(kvm))
|
|
|
|
return -ENOTTY;
|
|
|
|
|
2021-03-31 03:19:34 +00:00
|
|
|
kvm_for_each_vcpu(i, vcpu, kvm) {
|
2021-09-15 17:17:55 +00:00
|
|
|
ret = mutex_lock_killable(&vcpu->mutex);
|
2020-12-10 17:10:09 +00:00
|
|
|
if (ret)
|
2021-04-06 22:49:52 +00:00
|
|
|
return ret;
|
2020-12-10 17:10:09 +00:00
|
|
|
|
2021-09-15 17:17:55 +00:00
|
|
|
ret = __sev_launch_update_vmsa(kvm, vcpu, &argp->error);
|
2020-12-10 17:10:09 +00:00
|
|
|
|
2021-09-15 17:17:55 +00:00
|
|
|
mutex_unlock(&vcpu->mutex);
|
2020-12-10 17:10:09 +00:00
|
|
|
if (ret)
|
2021-04-06 22:49:52 +00:00
|
|
|
return ret;
|
2020-12-10 17:10:09 +00:00
|
|
|
}
|
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
return 0;
|
2020-12-10 17:10:09 +00:00
|
|
|
}
|
|
|
|
|
2020-03-24 09:41:54 +00:00
|
|
|
static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
|
|
|
{
|
|
|
|
void __user *measure = (void __user *)(uintptr_t)argp->data;
|
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
2021-04-06 22:49:52 +00:00
|
|
|
struct sev_data_launch_measure data;
|
2020-03-24 09:41:54 +00:00
|
|
|
struct kvm_sev_launch_measure params;
|
|
|
|
void __user *p = NULL;
|
|
|
|
void *blob = NULL;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!sev_guest(kvm))
|
|
|
|
return -ENOTTY;
|
|
|
|
|
|
|
|
if (copy_from_user(¶ms, measure, sizeof(params)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
memset(&data, 0, sizeof(data));
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
/* User wants to query the blob length */
|
|
|
|
if (!params.len)
|
|
|
|
goto cmd;
|
|
|
|
|
|
|
|
p = (void __user *)(uintptr_t)params.uaddr;
|
|
|
|
if (p) {
|
2021-04-06 22:49:52 +00:00
|
|
|
if (params.len > SEV_FW_BLOB_MAX_SIZE)
|
|
|
|
return -EINVAL;
|
2020-03-24 09:41:54 +00:00
|
|
|
|
2022-05-16 15:43:10 +00:00
|
|
|
blob = kzalloc(params.len, GFP_KERNEL_ACCOUNT);
|
2020-03-24 09:41:54 +00:00
|
|
|
if (!blob)
|
2021-04-06 22:49:52 +00:00
|
|
|
return -ENOMEM;
|
2020-03-24 09:41:54 +00:00
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
data.address = __psp_pa(blob);
|
|
|
|
data.len = params.len;
|
2020-03-24 09:41:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
cmd:
|
2021-04-06 22:49:52 +00:00
|
|
|
data.handle = sev->handle;
|
|
|
|
ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, &data, &argp->error);
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If we query the session length, FW responded with expected data.
|
|
|
|
*/
|
|
|
|
if (!params.len)
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
if (ret)
|
|
|
|
goto e_free_blob;
|
|
|
|
|
|
|
|
if (blob) {
|
|
|
|
if (copy_to_user(p, blob, params.len))
|
|
|
|
ret = -EFAULT;
|
|
|
|
}
|
|
|
|
|
|
|
|
done:
|
2021-04-06 22:49:52 +00:00
|
|
|
params.len = data.len;
|
2020-03-24 09:41:54 +00:00
|
|
|
if (copy_to_user(measure, ¶ms, sizeof(params)))
|
|
|
|
ret = -EFAULT;
|
|
|
|
e_free_blob:
|
|
|
|
kfree(blob);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
|
|
|
{
|
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
2021-04-06 22:49:52 +00:00
|
|
|
struct sev_data_launch_finish data;
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
if (!sev_guest(kvm))
|
|
|
|
return -ENOTTY;
|
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
data.handle = sev->handle;
|
|
|
|
return sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, &data, &argp->error);
|
2020-03-24 09:41:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
|
|
|
{
|
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
|
|
|
struct kvm_sev_guest_status params;
|
2021-04-06 22:49:52 +00:00
|
|
|
struct sev_data_guest_status data;
|
2020-03-24 09:41:54 +00:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!sev_guest(kvm))
|
|
|
|
return -ENOTTY;
|
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
memset(&data, 0, sizeof(data));
|
2020-03-24 09:41:54 +00:00
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
data.handle = sev->handle;
|
|
|
|
ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, &data, &argp->error);
|
2020-03-24 09:41:54 +00:00
|
|
|
if (ret)
|
2021-04-06 22:49:52 +00:00
|
|
|
return ret;
|
2020-03-24 09:41:54 +00:00
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
params.policy = data.policy;
|
|
|
|
params.state = data.state;
|
|
|
|
params.handle = data.handle;
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params)))
|
|
|
|
ret = -EFAULT;
|
2021-04-06 22:49:52 +00:00
|
|
|
|
2020-03-24 09:41:54 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
|
|
|
|
unsigned long dst, int size,
|
|
|
|
int *error, bool enc)
|
|
|
|
{
|
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
2021-04-06 22:49:52 +00:00
|
|
|
struct sev_data_dbg data;
|
2020-03-24 09:41:54 +00:00
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
data.reserved = 0;
|
|
|
|
data.handle = sev->handle;
|
|
|
|
data.dst_addr = dst;
|
|
|
|
data.src_addr = src;
|
|
|
|
data.len = size;
|
2020-03-24 09:41:54 +00:00
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
return sev_issue_cmd(kvm,
|
|
|
|
enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT,
|
|
|
|
&data, error);
|
2020-03-24 09:41:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
|
|
|
|
unsigned long dst_paddr, int sz, int *err)
|
|
|
|
{
|
|
|
|
int offset;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Its safe to read more than we are asked, caller should ensure that
|
|
|
|
* destination has enough space.
|
|
|
|
*/
|
|
|
|
offset = src_paddr & 15;
|
2020-11-10 22:42:05 +00:00
|
|
|
src_paddr = round_down(src_paddr, 16);
|
2020-03-24 09:41:54 +00:00
|
|
|
sz = round_up(sz + offset, 16);
|
|
|
|
|
|
|
|
return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
|
2021-05-06 23:15:42 +00:00
|
|
|
void __user *dst_uaddr,
|
2020-03-24 09:41:54 +00:00
|
|
|
unsigned long dst_paddr,
|
|
|
|
int size, int *err)
|
|
|
|
{
|
|
|
|
struct page *tpage = NULL;
|
|
|
|
int ret, offset;
|
|
|
|
|
|
|
|
/* if inputs are not 16-byte then use intermediate buffer */
|
|
|
|
if (!IS_ALIGNED(dst_paddr, 16) ||
|
|
|
|
!IS_ALIGNED(paddr, 16) ||
|
|
|
|
!IS_ALIGNED(size, 16)) {
|
2022-05-16 15:43:10 +00:00
|
|
|
tpage = (void *)alloc_page(GFP_KERNEL | __GFP_ZERO);
|
2020-03-24 09:41:54 +00:00
|
|
|
if (!tpage)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
dst_paddr = __sme_page_pa(tpage);
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = __sev_dbg_decrypt(kvm, paddr, dst_paddr, size, err);
|
|
|
|
if (ret)
|
|
|
|
goto e_free;
|
|
|
|
|
|
|
|
if (tpage) {
|
|
|
|
offset = paddr & 15;
|
2021-05-06 23:15:42 +00:00
|
|
|
if (copy_to_user(dst_uaddr, page_address(tpage) + offset, size))
|
2020-03-24 09:41:54 +00:00
|
|
|
ret = -EFAULT;
|
|
|
|
}
|
|
|
|
|
|
|
|
e_free:
|
|
|
|
if (tpage)
|
|
|
|
__free_page(tpage);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
|
2021-05-06 23:15:42 +00:00
|
|
|
void __user *vaddr,
|
2020-03-24 09:41:54 +00:00
|
|
|
unsigned long dst_paddr,
|
2021-05-06 23:15:42 +00:00
|
|
|
void __user *dst_vaddr,
|
2020-03-24 09:41:54 +00:00
|
|
|
int size, int *error)
|
|
|
|
{
|
|
|
|
struct page *src_tpage = NULL;
|
|
|
|
struct page *dst_tpage = NULL;
|
|
|
|
int ret, len = size;
|
|
|
|
|
|
|
|
/* If source buffer is not aligned then use an intermediate buffer */
|
2021-05-06 23:15:42 +00:00
|
|
|
if (!IS_ALIGNED((unsigned long)vaddr, 16)) {
|
2022-06-23 17:18:58 +00:00
|
|
|
src_tpage = alloc_page(GFP_KERNEL_ACCOUNT);
|
2020-03-24 09:41:54 +00:00
|
|
|
if (!src_tpage)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2021-05-06 23:15:42 +00:00
|
|
|
if (copy_from_user(page_address(src_tpage), vaddr, size)) {
|
2020-03-24 09:41:54 +00:00
|
|
|
__free_page(src_tpage);
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
|
|
|
|
paddr = __sme_page_pa(src_tpage);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If destination buffer or length is not aligned then do read-modify-write:
|
|
|
|
* - decrypt destination in an intermediate buffer
|
|
|
|
* - copy the source buffer in an intermediate buffer
|
|
|
|
* - use the intermediate buffer as source buffer
|
|
|
|
*/
|
2021-05-06 23:15:42 +00:00
|
|
|
if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
|
2020-03-24 09:41:54 +00:00
|
|
|
int dst_offset;
|
|
|
|
|
2022-06-23 17:18:58 +00:00
|
|
|
dst_tpage = alloc_page(GFP_KERNEL_ACCOUNT);
|
2020-03-24 09:41:54 +00:00
|
|
|
if (!dst_tpage) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto e_free;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = __sev_dbg_decrypt(kvm, dst_paddr,
|
|
|
|
__sme_page_pa(dst_tpage), size, error);
|
|
|
|
if (ret)
|
|
|
|
goto e_free;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If source is kernel buffer then use memcpy() otherwise
|
|
|
|
* copy_from_user().
|
|
|
|
*/
|
|
|
|
dst_offset = dst_paddr & 15;
|
|
|
|
|
|
|
|
if (src_tpage)
|
|
|
|
memcpy(page_address(dst_tpage) + dst_offset,
|
|
|
|
page_address(src_tpage), size);
|
|
|
|
else {
|
|
|
|
if (copy_from_user(page_address(dst_tpage) + dst_offset,
|
2021-05-06 23:15:42 +00:00
|
|
|
vaddr, size)) {
|
2020-03-24 09:41:54 +00:00
|
|
|
ret = -EFAULT;
|
|
|
|
goto e_free;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
paddr = __sme_page_pa(dst_tpage);
|
|
|
|
dst_paddr = round_down(dst_paddr, 16);
|
|
|
|
len = round_up(size, 16);
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = __sev_issue_dbg_cmd(kvm, paddr, dst_paddr, len, error, true);
|
|
|
|
|
|
|
|
e_free:
|
|
|
|
if (src_tpage)
|
|
|
|
__free_page(src_tpage);
|
|
|
|
if (dst_tpage)
|
|
|
|
__free_page(dst_tpage);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
|
|
|
|
{
|
|
|
|
unsigned long vaddr, vaddr_end, next_vaddr;
|
|
|
|
unsigned long dst_vaddr;
|
|
|
|
struct page **src_p, **dst_p;
|
|
|
|
struct kvm_sev_dbg debug;
|
|
|
|
unsigned long n;
|
|
|
|
unsigned int size;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!sev_guest(kvm))
|
|
|
|
return -ENOTTY;
|
|
|
|
|
|
|
|
if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr)
|
|
|
|
return -EINVAL;
|
|
|
|
if (!debug.dst_uaddr)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
vaddr = debug.src_uaddr;
|
|
|
|
size = debug.len;
|
|
|
|
vaddr_end = vaddr + size;
|
|
|
|
dst_vaddr = debug.dst_uaddr;
|
|
|
|
|
|
|
|
for (; vaddr < vaddr_end; vaddr = next_vaddr) {
|
|
|
|
int len, s_off, d_off;
|
|
|
|
|
|
|
|
/* lock userspace source and destination page */
|
|
|
|
src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0);
|
2020-07-14 14:23:51 +00:00
|
|
|
if (IS_ERR(src_p))
|
|
|
|
return PTR_ERR(src_p);
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1);
|
2020-07-14 14:23:51 +00:00
|
|
|
if (IS_ERR(dst_p)) {
|
2020-03-24 09:41:54 +00:00
|
|
|
sev_unpin_memory(kvm, src_p, n);
|
2020-07-14 14:23:51 +00:00
|
|
|
return PTR_ERR(dst_p);
|
2020-03-24 09:41:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2020-09-23 17:01:33 +00:00
|
|
|
* Flush (on non-coherent CPUs) before DBG_{DE,EN}CRYPT read or modify
|
|
|
|
* the pages; flush the destination too so that future accesses do not
|
|
|
|
* see stale data.
|
2020-03-24 09:41:54 +00:00
|
|
|
*/
|
|
|
|
sev_clflush_pages(src_p, 1);
|
|
|
|
sev_clflush_pages(dst_p, 1);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Since user buffer may not be page aligned, calculate the
|
|
|
|
* offset within the page.
|
|
|
|
*/
|
|
|
|
s_off = vaddr & ~PAGE_MASK;
|
|
|
|
d_off = dst_vaddr & ~PAGE_MASK;
|
|
|
|
len = min_t(size_t, (PAGE_SIZE - s_off), size);
|
|
|
|
|
|
|
|
if (dec)
|
|
|
|
ret = __sev_dbg_decrypt_user(kvm,
|
|
|
|
__sme_page_pa(src_p[0]) + s_off,
|
2021-05-06 23:15:42 +00:00
|
|
|
(void __user *)dst_vaddr,
|
2020-03-24 09:41:54 +00:00
|
|
|
__sme_page_pa(dst_p[0]) + d_off,
|
|
|
|
len, &argp->error);
|
|
|
|
else
|
|
|
|
ret = __sev_dbg_encrypt_user(kvm,
|
|
|
|
__sme_page_pa(src_p[0]) + s_off,
|
2021-05-06 23:15:42 +00:00
|
|
|
(void __user *)vaddr,
|
2020-03-24 09:41:54 +00:00
|
|
|
__sme_page_pa(dst_p[0]) + d_off,
|
2021-05-06 23:15:42 +00:00
|
|
|
(void __user *)dst_vaddr,
|
2020-03-24 09:41:54 +00:00
|
|
|
len, &argp->error);
|
|
|
|
|
|
|
|
sev_unpin_memory(kvm, src_p, n);
|
|
|
|
sev_unpin_memory(kvm, dst_p, n);
|
|
|
|
|
|
|
|
if (ret)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
next_vaddr = vaddr + len;
|
|
|
|
dst_vaddr = dst_vaddr + len;
|
|
|
|
size -= len;
|
|
|
|
}
|
|
|
|
err:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
|
|
|
{
|
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
2021-04-06 22:49:52 +00:00
|
|
|
struct sev_data_launch_secret data;
|
2020-03-24 09:41:54 +00:00
|
|
|
struct kvm_sev_launch_secret params;
|
|
|
|
struct page **pages;
|
|
|
|
void *blob, *hdr;
|
2020-08-08 00:37:46 +00:00
|
|
|
unsigned long n, i;
|
2020-03-24 09:41:54 +00:00
|
|
|
int ret, offset;
|
|
|
|
|
|
|
|
if (!sev_guest(kvm))
|
|
|
|
return -ENOTTY;
|
|
|
|
|
|
|
|
if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
|
2020-06-23 09:12:24 +00:00
|
|
|
if (IS_ERR(pages))
|
|
|
|
return PTR_ERR(pages);
|
2020-03-24 09:41:54 +00:00
|
|
|
|
2020-08-08 00:37:46 +00:00
|
|
|
/*
|
2020-09-23 17:01:33 +00:00
|
|
|
* Flush (on non-coherent CPUs) before LAUNCH_SECRET encrypts pages in
|
|
|
|
* place; the cache may contain the data that was written unencrypted.
|
2020-08-08 00:37:46 +00:00
|
|
|
*/
|
|
|
|
sev_clflush_pages(pages, n);
|
|
|
|
|
2020-03-24 09:41:54 +00:00
|
|
|
/*
|
|
|
|
* The secret must be copied into contiguous memory region, lets verify
|
|
|
|
* that userspace memory pages are contiguous before we issue command.
|
|
|
|
*/
|
|
|
|
if (get_num_contig_pages(0, pages, n) != n) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto e_unpin_memory;
|
|
|
|
}
|
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
memset(&data, 0, sizeof(data));
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
offset = params.guest_uaddr & (PAGE_SIZE - 1);
|
2021-04-06 22:49:52 +00:00
|
|
|
data.guest_address = __sme_page_pa(pages[0]) + offset;
|
|
|
|
data.guest_len = params.guest_len;
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
|
|
|
|
if (IS_ERR(blob)) {
|
|
|
|
ret = PTR_ERR(blob);
|
2021-04-06 22:49:52 +00:00
|
|
|
goto e_unpin_memory;
|
2020-03-24 09:41:54 +00:00
|
|
|
}
|
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
data.trans_address = __psp_pa(blob);
|
|
|
|
data.trans_len = params.trans_len;
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
|
|
|
|
if (IS_ERR(hdr)) {
|
|
|
|
ret = PTR_ERR(hdr);
|
|
|
|
goto e_free_blob;
|
|
|
|
}
|
2021-04-06 22:49:52 +00:00
|
|
|
data.hdr_address = __psp_pa(hdr);
|
|
|
|
data.hdr_len = params.hdr_len;
|
2020-03-24 09:41:54 +00:00
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
data.handle = sev->handle;
|
|
|
|
ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, &data, &argp->error);
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
kfree(hdr);
|
|
|
|
|
|
|
|
e_free_blob:
|
|
|
|
kfree(blob);
|
|
|
|
e_unpin_memory:
|
2020-08-08 00:37:46 +00:00
|
|
|
/* content of memory is updated, mark pages dirty */
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
set_page_dirty_lock(pages[i]);
|
|
|
|
mark_page_accessed(pages[i]);
|
|
|
|
}
|
2020-03-24 09:41:54 +00:00
|
|
|
sev_unpin_memory(kvm, pages, n);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2021-01-04 15:17:49 +00:00
|
|
|
static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
|
|
|
{
|
|
|
|
void __user *report = (void __user *)(uintptr_t)argp->data;
|
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
2021-04-06 22:49:52 +00:00
|
|
|
struct sev_data_attestation_report data;
|
2021-01-04 15:17:49 +00:00
|
|
|
struct kvm_sev_attestation_report params;
|
|
|
|
void __user *p;
|
|
|
|
void *blob = NULL;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!sev_guest(kvm))
|
|
|
|
return -ENOTTY;
|
|
|
|
|
|
|
|
if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
memset(&data, 0, sizeof(data));
|
2021-01-04 15:17:49 +00:00
|
|
|
|
|
|
|
/* User wants to query the blob length */
|
|
|
|
if (!params.len)
|
|
|
|
goto cmd;
|
|
|
|
|
|
|
|
p = (void __user *)(uintptr_t)params.uaddr;
|
|
|
|
if (p) {
|
2021-04-06 22:49:52 +00:00
|
|
|
if (params.len > SEV_FW_BLOB_MAX_SIZE)
|
|
|
|
return -EINVAL;
|
2021-01-04 15:17:49 +00:00
|
|
|
|
2022-05-16 15:43:10 +00:00
|
|
|
blob = kzalloc(params.len, GFP_KERNEL_ACCOUNT);
|
2021-01-04 15:17:49 +00:00
|
|
|
if (!blob)
|
2021-04-06 22:49:52 +00:00
|
|
|
return -ENOMEM;
|
2021-01-04 15:17:49 +00:00
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
data.address = __psp_pa(blob);
|
|
|
|
data.len = params.len;
|
|
|
|
memcpy(data.mnonce, params.mnonce, sizeof(params.mnonce));
|
2021-01-04 15:17:49 +00:00
|
|
|
}
|
|
|
|
cmd:
|
2021-04-06 22:49:52 +00:00
|
|
|
data.handle = sev->handle;
|
|
|
|
ret = sev_issue_cmd(kvm, SEV_CMD_ATTESTATION_REPORT, &data, &argp->error);
|
2021-01-04 15:17:49 +00:00
|
|
|
/*
|
|
|
|
* If we query the session length, FW responded with expected data.
|
|
|
|
*/
|
|
|
|
if (!params.len)
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
if (ret)
|
|
|
|
goto e_free_blob;
|
|
|
|
|
|
|
|
if (blob) {
|
|
|
|
if (copy_to_user(p, blob, params.len))
|
|
|
|
ret = -EFAULT;
|
|
|
|
}
|
|
|
|
|
|
|
|
done:
|
2021-04-06 22:49:52 +00:00
|
|
|
params.len = data.len;
|
2021-01-04 15:17:49 +00:00
|
|
|
if (copy_to_user(report, ¶ms, sizeof(params)))
|
|
|
|
ret = -EFAULT;
|
|
|
|
e_free_blob:
|
|
|
|
kfree(blob);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2021-04-15 15:53:14 +00:00
|
|
|
/* Userspace wants to query session length. */
|
|
|
|
static int
|
|
|
|
__sev_send_start_query_session_length(struct kvm *kvm, struct kvm_sev_cmd *argp,
|
|
|
|
struct kvm_sev_send_start *params)
|
|
|
|
{
|
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
2021-04-06 22:49:52 +00:00
|
|
|
struct sev_data_send_start data;
|
2021-04-15 15:53:14 +00:00
|
|
|
int ret;
|
|
|
|
|
2021-06-07 06:15:32 +00:00
|
|
|
memset(&data, 0, sizeof(data));
|
2021-04-06 22:49:52 +00:00
|
|
|
data.handle = sev->handle;
|
|
|
|
ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error);
|
2021-04-15 15:53:14 +00:00
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
params->session_len = data.session_len;
|
2021-04-15 15:53:14 +00:00
|
|
|
if (copy_to_user((void __user *)(uintptr_t)argp->data, params,
|
|
|
|
sizeof(struct kvm_sev_send_start)))
|
|
|
|
ret = -EFAULT;
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sev_send_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
|
|
|
{
|
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
2021-04-06 22:49:52 +00:00
|
|
|
struct sev_data_send_start data;
|
2021-04-15 15:53:14 +00:00
|
|
|
struct kvm_sev_send_start params;
|
|
|
|
void *amd_certs, *session_data;
|
|
|
|
void *pdh_cert, *plat_certs;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!sev_guest(kvm))
|
|
|
|
return -ENOTTY;
|
|
|
|
|
|
|
|
if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data,
|
|
|
|
sizeof(struct kvm_sev_send_start)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
/* if session_len is zero, userspace wants to query the session length */
|
|
|
|
if (!params.session_len)
|
|
|
|
return __sev_send_start_query_session_length(kvm, argp,
|
|
|
|
¶ms);
|
|
|
|
|
|
|
|
/* some sanity checks */
|
|
|
|
if (!params.pdh_cert_uaddr || !params.pdh_cert_len ||
|
|
|
|
!params.session_uaddr || params.session_len > SEV_FW_BLOB_MAX_SIZE)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* allocate the memory to hold the session data blob */
|
2022-05-16 15:43:10 +00:00
|
|
|
session_data = kzalloc(params.session_len, GFP_KERNEL_ACCOUNT);
|
2021-04-15 15:53:14 +00:00
|
|
|
if (!session_data)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
/* copy the certificate blobs from userspace */
|
|
|
|
pdh_cert = psp_copy_user_blob(params.pdh_cert_uaddr,
|
|
|
|
params.pdh_cert_len);
|
|
|
|
if (IS_ERR(pdh_cert)) {
|
|
|
|
ret = PTR_ERR(pdh_cert);
|
|
|
|
goto e_free_session;
|
|
|
|
}
|
|
|
|
|
|
|
|
plat_certs = psp_copy_user_blob(params.plat_certs_uaddr,
|
|
|
|
params.plat_certs_len);
|
|
|
|
if (IS_ERR(plat_certs)) {
|
|
|
|
ret = PTR_ERR(plat_certs);
|
|
|
|
goto e_free_pdh;
|
|
|
|
}
|
|
|
|
|
|
|
|
amd_certs = psp_copy_user_blob(params.amd_certs_uaddr,
|
|
|
|
params.amd_certs_len);
|
|
|
|
if (IS_ERR(amd_certs)) {
|
|
|
|
ret = PTR_ERR(amd_certs);
|
|
|
|
goto e_free_plat_cert;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* populate the FW SEND_START field with system physical address */
|
2021-04-06 22:49:52 +00:00
|
|
|
memset(&data, 0, sizeof(data));
|
|
|
|
data.pdh_cert_address = __psp_pa(pdh_cert);
|
|
|
|
data.pdh_cert_len = params.pdh_cert_len;
|
|
|
|
data.plat_certs_address = __psp_pa(plat_certs);
|
|
|
|
data.plat_certs_len = params.plat_certs_len;
|
|
|
|
data.amd_certs_address = __psp_pa(amd_certs);
|
|
|
|
data.amd_certs_len = params.amd_certs_len;
|
|
|
|
data.session_address = __psp_pa(session_data);
|
|
|
|
data.session_len = params.session_len;
|
|
|
|
data.handle = sev->handle;
|
|
|
|
|
|
|
|
ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error);
|
2021-04-15 15:53:14 +00:00
|
|
|
|
|
|
|
if (!ret && copy_to_user((void __user *)(uintptr_t)params.session_uaddr,
|
|
|
|
session_data, params.session_len)) {
|
|
|
|
ret = -EFAULT;
|
2021-04-06 22:49:52 +00:00
|
|
|
goto e_free_amd_cert;
|
2021-04-15 15:53:14 +00:00
|
|
|
}
|
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
params.policy = data.policy;
|
|
|
|
params.session_len = data.session_len;
|
2021-04-15 15:53:14 +00:00
|
|
|
if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms,
|
|
|
|
sizeof(struct kvm_sev_send_start)))
|
|
|
|
ret = -EFAULT;
|
|
|
|
|
|
|
|
e_free_amd_cert:
|
|
|
|
kfree(amd_certs);
|
|
|
|
e_free_plat_cert:
|
|
|
|
kfree(plat_certs);
|
|
|
|
e_free_pdh:
|
|
|
|
kfree(pdh_cert);
|
|
|
|
e_free_session:
|
|
|
|
kfree(session_data);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2021-04-15 15:53:55 +00:00
|
|
|
/* Userspace wants to query either header or trans length. */
|
|
|
|
static int
|
|
|
|
__sev_send_update_data_query_lengths(struct kvm *kvm, struct kvm_sev_cmd *argp,
|
|
|
|
struct kvm_sev_send_update_data *params)
|
|
|
|
{
|
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
2021-04-06 22:49:52 +00:00
|
|
|
struct sev_data_send_update_data data;
|
2021-04-15 15:53:55 +00:00
|
|
|
int ret;
|
|
|
|
|
2021-06-07 06:15:32 +00:00
|
|
|
memset(&data, 0, sizeof(data));
|
2021-04-06 22:49:52 +00:00
|
|
|
data.handle = sev->handle;
|
|
|
|
ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error);
|
2021-04-15 15:53:55 +00:00
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
params->hdr_len = data.hdr_len;
|
|
|
|
params->trans_len = data.trans_len;
|
2021-04-15 15:53:55 +00:00
|
|
|
|
|
|
|
if (copy_to_user((void __user *)(uintptr_t)argp->data, params,
|
|
|
|
sizeof(struct kvm_sev_send_update_data)))
|
|
|
|
ret = -EFAULT;
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
|
|
|
{
|
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
2021-04-06 22:49:52 +00:00
|
|
|
struct sev_data_send_update_data data;
|
2021-04-15 15:53:55 +00:00
|
|
|
struct kvm_sev_send_update_data params;
|
|
|
|
void *hdr, *trans_data;
|
|
|
|
struct page **guest_page;
|
|
|
|
unsigned long n;
|
|
|
|
int ret, offset;
|
|
|
|
|
|
|
|
if (!sev_guest(kvm))
|
|
|
|
return -ENOTTY;
|
|
|
|
|
|
|
|
if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data,
|
|
|
|
sizeof(struct kvm_sev_send_update_data)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
/* userspace wants to query either header or trans length */
|
|
|
|
if (!params.trans_len || !params.hdr_len)
|
|
|
|
return __sev_send_update_data_query_lengths(kvm, argp, ¶ms);
|
|
|
|
|
|
|
|
if (!params.trans_uaddr || !params.guest_uaddr ||
|
|
|
|
!params.guest_len || !params.hdr_uaddr)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* Check if we are crossing the page boundary */
|
|
|
|
offset = params.guest_uaddr & (PAGE_SIZE - 1);
|
|
|
|
if ((params.guest_len + offset > PAGE_SIZE))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* Pin guest memory */
|
|
|
|
guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
|
|
|
|
PAGE_SIZE, &n, 0);
|
2021-05-06 17:58:26 +00:00
|
|
|
if (IS_ERR(guest_page))
|
|
|
|
return PTR_ERR(guest_page);
|
2021-04-15 15:53:55 +00:00
|
|
|
|
|
|
|
/* allocate memory for header and transport buffer */
|
|
|
|
ret = -ENOMEM;
|
2022-05-16 15:43:10 +00:00
|
|
|
hdr = kzalloc(params.hdr_len, GFP_KERNEL_ACCOUNT);
|
2021-04-15 15:53:55 +00:00
|
|
|
if (!hdr)
|
|
|
|
goto e_unpin;
|
|
|
|
|
2022-05-16 15:43:10 +00:00
|
|
|
trans_data = kzalloc(params.trans_len, GFP_KERNEL_ACCOUNT);
|
2021-04-15 15:53:55 +00:00
|
|
|
if (!trans_data)
|
|
|
|
goto e_free_hdr;
|
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
memset(&data, 0, sizeof(data));
|
|
|
|
data.hdr_address = __psp_pa(hdr);
|
|
|
|
data.hdr_len = params.hdr_len;
|
|
|
|
data.trans_address = __psp_pa(trans_data);
|
|
|
|
data.trans_len = params.trans_len;
|
2021-04-15 15:53:55 +00:00
|
|
|
|
|
|
|
/* The SEND_UPDATE_DATA command requires C-bit to be always set. */
|
2021-04-06 22:49:52 +00:00
|
|
|
data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset;
|
|
|
|
data.guest_address |= sev_me_mask;
|
|
|
|
data.guest_len = params.guest_len;
|
|
|
|
data.handle = sev->handle;
|
2021-04-15 15:53:55 +00:00
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error);
|
2021-04-15 15:53:55 +00:00
|
|
|
|
|
|
|
if (ret)
|
2021-04-06 22:49:52 +00:00
|
|
|
goto e_free_trans_data;
|
2021-04-15 15:53:55 +00:00
|
|
|
|
|
|
|
/* copy transport buffer to user space */
|
|
|
|
if (copy_to_user((void __user *)(uintptr_t)params.trans_uaddr,
|
|
|
|
trans_data, params.trans_len)) {
|
|
|
|
ret = -EFAULT;
|
2021-04-06 22:49:52 +00:00
|
|
|
goto e_free_trans_data;
|
2021-04-15 15:53:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Copy packet header to userspace. */
|
2021-05-06 17:58:25 +00:00
|
|
|
if (copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr,
|
|
|
|
params.hdr_len))
|
|
|
|
ret = -EFAULT;
|
2021-04-15 15:53:55 +00:00
|
|
|
|
|
|
|
e_free_trans_data:
|
|
|
|
kfree(trans_data);
|
|
|
|
e_free_hdr:
|
|
|
|
kfree(hdr);
|
|
|
|
e_unpin:
|
|
|
|
sev_unpin_memory(kvm, guest_page, n);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2021-04-15 15:54:15 +00:00
|
|
|
static int sev_send_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
|
|
|
{
|
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
2021-04-06 22:49:52 +00:00
|
|
|
struct sev_data_send_finish data;
|
2021-04-15 15:54:15 +00:00
|
|
|
|
|
|
|
if (!sev_guest(kvm))
|
|
|
|
return -ENOTTY;
|
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
data.handle = sev->handle;
|
|
|
|
return sev_issue_cmd(kvm, SEV_CMD_SEND_FINISH, &data, &argp->error);
|
2021-04-15 15:54:15 +00:00
|
|
|
}
|
|
|
|
|
2021-04-20 09:01:20 +00:00
|
|
|
static int sev_send_cancel(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
|
|
|
{
|
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
2021-04-06 22:49:52 +00:00
|
|
|
struct sev_data_send_cancel data;
|
2021-04-20 09:01:20 +00:00
|
|
|
|
|
|
|
if (!sev_guest(kvm))
|
|
|
|
return -ENOTTY;
|
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
data.handle = sev->handle;
|
|
|
|
return sev_issue_cmd(kvm, SEV_CMD_SEND_CANCEL, &data, &argp->error);
|
2021-04-20 09:01:20 +00:00
|
|
|
}
|
|
|
|
|
2021-04-15 15:54:50 +00:00
|
|
|
static int sev_receive_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
|
|
|
{
|
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
2021-04-06 22:49:52 +00:00
|
|
|
struct sev_data_receive_start start;
|
2021-04-15 15:54:50 +00:00
|
|
|
struct kvm_sev_receive_start params;
|
|
|
|
int *error = &argp->error;
|
|
|
|
void *session_data;
|
|
|
|
void *pdh_data;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!sev_guest(kvm))
|
|
|
|
return -ENOTTY;
|
|
|
|
|
|
|
|
/* Get parameter from the userspace */
|
|
|
|
if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data,
|
|
|
|
sizeof(struct kvm_sev_receive_start)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
/* some sanity checks */
|
|
|
|
if (!params.pdh_uaddr || !params.pdh_len ||
|
|
|
|
!params.session_uaddr || !params.session_len)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
pdh_data = psp_copy_user_blob(params.pdh_uaddr, params.pdh_len);
|
|
|
|
if (IS_ERR(pdh_data))
|
|
|
|
return PTR_ERR(pdh_data);
|
|
|
|
|
|
|
|
session_data = psp_copy_user_blob(params.session_uaddr,
|
|
|
|
params.session_len);
|
|
|
|
if (IS_ERR(session_data)) {
|
|
|
|
ret = PTR_ERR(session_data);
|
|
|
|
goto e_free_pdh;
|
|
|
|
}
|
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
memset(&start, 0, sizeof(start));
|
|
|
|
start.handle = params.handle;
|
|
|
|
start.policy = params.policy;
|
|
|
|
start.pdh_cert_address = __psp_pa(pdh_data);
|
|
|
|
start.pdh_cert_len = params.pdh_len;
|
|
|
|
start.session_address = __psp_pa(session_data);
|
|
|
|
start.session_len = params.session_len;
|
2021-04-15 15:54:50 +00:00
|
|
|
|
|
|
|
/* create memory encryption context */
|
2021-04-06 22:49:52 +00:00
|
|
|
ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_RECEIVE_START, &start,
|
2021-04-15 15:54:50 +00:00
|
|
|
error);
|
|
|
|
if (ret)
|
2021-04-06 22:49:52 +00:00
|
|
|
goto e_free_session;
|
2021-04-15 15:54:50 +00:00
|
|
|
|
|
|
|
/* Bind ASID to this guest */
|
2021-04-06 22:49:52 +00:00
|
|
|
ret = sev_bind_asid(kvm, start.handle, error);
|
2021-09-12 18:18:15 +00:00
|
|
|
if (ret) {
|
|
|
|
sev_decommission(start.handle);
|
2021-04-06 22:49:52 +00:00
|
|
|
goto e_free_session;
|
2021-09-12 18:18:15 +00:00
|
|
|
}
|
2021-04-15 15:54:50 +00:00
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
params.handle = start.handle;
|
2021-04-15 15:54:50 +00:00
|
|
|
if (copy_to_user((void __user *)(uintptr_t)argp->data,
|
|
|
|
¶ms, sizeof(struct kvm_sev_receive_start))) {
|
|
|
|
ret = -EFAULT;
|
2021-04-06 22:49:52 +00:00
|
|
|
sev_unbind_asid(kvm, start.handle);
|
|
|
|
goto e_free_session;
|
2021-04-15 15:54:50 +00:00
|
|
|
}
|
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
sev->handle = start.handle;
|
2021-04-15 15:54:50 +00:00
|
|
|
sev->fd = argp->sev_fd;
|
|
|
|
|
|
|
|
e_free_session:
|
|
|
|
kfree(session_data);
|
|
|
|
e_free_pdh:
|
|
|
|
kfree(pdh_data);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2021-04-15 15:55:17 +00:00
|
|
|
static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
|
|
|
{
|
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
|
|
|
struct kvm_sev_receive_update_data params;
|
2021-04-06 22:49:52 +00:00
|
|
|
struct sev_data_receive_update_data data;
|
2021-04-15 15:55:17 +00:00
|
|
|
void *hdr = NULL, *trans = NULL;
|
|
|
|
struct page **guest_page;
|
|
|
|
unsigned long n;
|
|
|
|
int ret, offset;
|
|
|
|
|
|
|
|
if (!sev_guest(kvm))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data,
|
|
|
|
sizeof(struct kvm_sev_receive_update_data)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
if (!params.hdr_uaddr || !params.hdr_len ||
|
|
|
|
!params.guest_uaddr || !params.guest_len ||
|
|
|
|
!params.trans_uaddr || !params.trans_len)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* Check if we are crossing the page boundary */
|
|
|
|
offset = params.guest_uaddr & (PAGE_SIZE - 1);
|
|
|
|
if ((params.guest_len + offset > PAGE_SIZE))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
|
|
|
|
if (IS_ERR(hdr))
|
|
|
|
return PTR_ERR(hdr);
|
|
|
|
|
|
|
|
trans = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
|
|
|
|
if (IS_ERR(trans)) {
|
|
|
|
ret = PTR_ERR(trans);
|
|
|
|
goto e_free_hdr;
|
|
|
|
}
|
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
memset(&data, 0, sizeof(data));
|
|
|
|
data.hdr_address = __psp_pa(hdr);
|
|
|
|
data.hdr_len = params.hdr_len;
|
|
|
|
data.trans_address = __psp_pa(trans);
|
|
|
|
data.trans_len = params.trans_len;
|
2021-04-15 15:55:17 +00:00
|
|
|
|
|
|
|
/* Pin guest memory */
|
|
|
|
guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
|
2021-09-14 21:09:50 +00:00
|
|
|
PAGE_SIZE, &n, 1);
|
2021-05-06 17:58:26 +00:00
|
|
|
if (IS_ERR(guest_page)) {
|
|
|
|
ret = PTR_ERR(guest_page);
|
2021-04-06 22:49:52 +00:00
|
|
|
goto e_free_trans;
|
2021-05-06 17:58:26 +00:00
|
|
|
}
|
2021-04-15 15:55:17 +00:00
|
|
|
|
2021-09-14 21:09:51 +00:00
|
|
|
/*
|
|
|
|
* Flush (on non-coherent CPUs) before RECEIVE_UPDATE_DATA, the PSP
|
|
|
|
* encrypts the written data with the guest's key, and the cache may
|
|
|
|
* contain dirty, unencrypted data.
|
|
|
|
*/
|
|
|
|
sev_clflush_pages(guest_page, n);
|
|
|
|
|
2021-04-15 15:55:17 +00:00
|
|
|
/* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */
|
2021-04-06 22:49:52 +00:00
|
|
|
data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset;
|
|
|
|
data.guest_address |= sev_me_mask;
|
|
|
|
data.guest_len = params.guest_len;
|
|
|
|
data.handle = sev->handle;
|
2021-04-15 15:55:17 +00:00
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
ret = sev_issue_cmd(kvm, SEV_CMD_RECEIVE_UPDATE_DATA, &data,
|
2021-04-15 15:55:17 +00:00
|
|
|
&argp->error);
|
|
|
|
|
|
|
|
sev_unpin_memory(kvm, guest_page, n);
|
|
|
|
|
|
|
|
e_free_trans:
|
|
|
|
kfree(trans);
|
|
|
|
e_free_hdr:
|
|
|
|
kfree(hdr);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2021-04-15 15:55:40 +00:00
|
|
|
static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
|
|
|
{
|
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
2021-04-06 22:49:52 +00:00
|
|
|
struct sev_data_receive_finish data;
|
2021-04-15 15:55:40 +00:00
|
|
|
|
|
|
|
if (!sev_guest(kvm))
|
|
|
|
return -ENOTTY;
|
|
|
|
|
2021-04-06 22:49:52 +00:00
|
|
|
data.handle = sev->handle;
|
|
|
|
return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error);
|
2021-04-15 15:55:40 +00:00
|
|
|
}
|
|
|
|
|
2021-11-09 21:51:01 +00:00
|
|
|
static bool is_cmd_allowed_from_mirror(u32 cmd_id)
|
2021-09-21 15:03:45 +00:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Allow mirrors VM to call KVM_SEV_LAUNCH_UPDATE_VMSA to enable SEV-ES
|
|
|
|
* active mirror VMs. Also allow the debugging and status commands.
|
|
|
|
*/
|
|
|
|
if (cmd_id == KVM_SEV_LAUNCH_UPDATE_VMSA ||
|
|
|
|
cmd_id == KVM_SEV_GUEST_STATUS || cmd_id == KVM_SEV_DBG_DECRYPT ||
|
|
|
|
cmd_id == KVM_SEV_DBG_ENCRYPT)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-11-23 00:50:29 +00:00
|
|
|
static int sev_lock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
|
2021-10-21 17:43:00 +00:00
|
|
|
{
|
2021-11-23 00:50:29 +00:00
|
|
|
struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info;
|
|
|
|
struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info;
|
2021-11-23 00:50:36 +00:00
|
|
|
int r = -EBUSY;
|
2021-11-23 00:50:29 +00:00
|
|
|
|
|
|
|
if (dst_kvm == src_kvm)
|
|
|
|
return -EINVAL;
|
2021-10-21 17:43:00 +00:00
|
|
|
|
|
|
|
/*
|
2021-11-23 00:50:29 +00:00
|
|
|
* Bail if these VMs are already involved in a migration to avoid
|
|
|
|
* deadlock between two VMs trying to migrate to/from each other.
|
2021-10-21 17:43:00 +00:00
|
|
|
*/
|
2021-11-23 00:50:29 +00:00
|
|
|
if (atomic_cmpxchg_acquire(&dst_sev->migration_in_progress, 0, 1))
|
2021-10-21 17:43:00 +00:00
|
|
|
return -EBUSY;
|
|
|
|
|
2021-11-23 00:50:36 +00:00
|
|
|
if (atomic_cmpxchg_acquire(&src_sev->migration_in_progress, 0, 1))
|
|
|
|
goto release_dst;
|
2021-10-21 17:43:00 +00:00
|
|
|
|
2021-11-23 00:50:36 +00:00
|
|
|
r = -EINTR;
|
|
|
|
if (mutex_lock_killable(&dst_kvm->lock))
|
|
|
|
goto release_src;
|
2022-01-05 06:41:03 +00:00
|
|
|
if (mutex_lock_killable_nested(&src_kvm->lock, SINGLE_DEPTH_NESTING))
|
2021-11-23 00:50:36 +00:00
|
|
|
goto unlock_dst;
|
2021-10-21 17:43:00 +00:00
|
|
|
return 0;
|
2021-11-23 00:50:36 +00:00
|
|
|
|
|
|
|
unlock_dst:
|
|
|
|
mutex_unlock(&dst_kvm->lock);
|
|
|
|
release_src:
|
|
|
|
atomic_set_release(&src_sev->migration_in_progress, 0);
|
|
|
|
release_dst:
|
|
|
|
atomic_set_release(&dst_sev->migration_in_progress, 0);
|
|
|
|
return r;
|
2021-10-21 17:43:00 +00:00
|
|
|
}
|
|
|
|
|
2021-11-23 00:50:29 +00:00
|
|
|
static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
|
2021-10-21 17:43:00 +00:00
|
|
|
{
|
2021-11-23 00:50:29 +00:00
|
|
|
struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info;
|
|
|
|
struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info;
|
2021-10-21 17:43:00 +00:00
|
|
|
|
2021-11-23 00:50:29 +00:00
|
|
|
mutex_unlock(&dst_kvm->lock);
|
|
|
|
mutex_unlock(&src_kvm->lock);
|
|
|
|
atomic_set_release(&dst_sev->migration_in_progress, 0);
|
|
|
|
atomic_set_release(&src_sev->migration_in_progress, 0);
|
2021-10-21 17:43:00 +00:00
|
|
|
}
|
|
|
|
|
2022-05-02 16:58:07 +00:00
|
|
|
/* vCPU mutex subclasses. */
|
|
|
|
enum sev_migration_role {
|
|
|
|
SEV_MIGRATION_SOURCE = 0,
|
|
|
|
SEV_MIGRATION_TARGET,
|
|
|
|
SEV_NR_MIGRATION_ROLES,
|
|
|
|
};
|
2021-10-21 17:43:00 +00:00
|
|
|
|
2022-05-02 16:58:07 +00:00
|
|
|
static int sev_lock_vcpus_for_migration(struct kvm *kvm,
|
|
|
|
enum sev_migration_role role)
|
2021-10-21 17:43:00 +00:00
|
|
|
{
|
|
|
|
struct kvm_vcpu *vcpu;
|
2021-11-16 16:04:02 +00:00
|
|
|
unsigned long i, j;
|
2021-10-21 17:43:00 +00:00
|
|
|
|
|
|
|
kvm_for_each_vcpu(i, vcpu, kvm) {
|
2022-05-02 16:58:07 +00:00
|
|
|
if (mutex_lock_killable_nested(&vcpu->mutex, role))
|
2021-10-21 17:43:00 +00:00
|
|
|
goto out_unlock;
|
2022-05-02 16:58:07 +00:00
|
|
|
|
2022-06-13 21:42:37 +00:00
|
|
|
#ifdef CONFIG_PROVE_LOCKING
|
|
|
|
if (!i)
|
2022-05-02 16:58:07 +00:00
|
|
|
/*
|
|
|
|
* Reset the role to one that avoids colliding with
|
|
|
|
* the role used for the first vcpu mutex.
|
|
|
|
*/
|
|
|
|
role = SEV_NR_MIGRATION_ROLES;
|
2022-06-13 21:42:37 +00:00
|
|
|
else
|
2022-05-02 16:58:07 +00:00
|
|
|
mutex_release(&vcpu->mutex.dep_map, _THIS_IP_);
|
2022-06-13 21:42:37 +00:00
|
|
|
#endif
|
2021-10-21 17:43:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
out_unlock:
|
2022-05-02 16:58:07 +00:00
|
|
|
|
2021-10-21 17:43:00 +00:00
|
|
|
kvm_for_each_vcpu(j, vcpu, kvm) {
|
|
|
|
if (i == j)
|
|
|
|
break;
|
|
|
|
|
2022-06-13 21:42:37 +00:00
|
|
|
#ifdef CONFIG_PROVE_LOCKING
|
|
|
|
if (j)
|
2022-05-02 16:58:07 +00:00
|
|
|
mutex_acquire(&vcpu->mutex.dep_map, role, 0, _THIS_IP_);
|
2022-06-13 21:42:37 +00:00
|
|
|
#endif
|
2022-05-02 16:58:07 +00:00
|
|
|
|
2021-10-21 17:43:00 +00:00
|
|
|
mutex_unlock(&vcpu->mutex);
|
|
|
|
}
|
|
|
|
return -EINTR;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sev_unlock_vcpus_for_migration(struct kvm *kvm)
|
|
|
|
{
|
|
|
|
struct kvm_vcpu *vcpu;
|
2021-11-16 16:04:02 +00:00
|
|
|
unsigned long i;
|
2022-05-02 16:58:07 +00:00
|
|
|
bool first = true;
|
2021-10-21 17:43:00 +00:00
|
|
|
|
|
|
|
kvm_for_each_vcpu(i, vcpu, kvm) {
|
2022-05-02 16:58:07 +00:00
|
|
|
if (first)
|
|
|
|
first = false;
|
|
|
|
else
|
|
|
|
mutex_acquire(&vcpu->mutex.dep_map,
|
|
|
|
SEV_NR_MIGRATION_ROLES, 0, _THIS_IP_);
|
|
|
|
|
2021-10-21 17:43:00 +00:00
|
|
|
mutex_unlock(&vcpu->mutex);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-11 19:36:34 +00:00
|
|
|
static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm)
|
2021-10-21 17:43:00 +00:00
|
|
|
{
|
2022-02-11 19:36:34 +00:00
|
|
|
struct kvm_sev_info *dst = &to_kvm_svm(dst_kvm)->sev_info;
|
|
|
|
struct kvm_sev_info *src = &to_kvm_svm(src_kvm)->sev_info;
|
2022-06-23 17:34:06 +00:00
|
|
|
struct kvm_vcpu *dst_vcpu, *src_vcpu;
|
|
|
|
struct vcpu_svm *dst_svm, *src_svm;
|
2022-02-11 19:36:34 +00:00
|
|
|
struct kvm_sev_info *mirror;
|
2022-06-23 17:34:06 +00:00
|
|
|
unsigned long i;
|
2022-02-11 19:36:34 +00:00
|
|
|
|
2021-10-21 17:43:00 +00:00
|
|
|
dst->active = true;
|
|
|
|
dst->asid = src->asid;
|
|
|
|
dst->handle = src->handle;
|
|
|
|
dst->pages_locked = src->pages_locked;
|
2021-11-23 00:50:31 +00:00
|
|
|
dst->enc_context_owner = src->enc_context_owner;
|
2022-06-23 17:34:06 +00:00
|
|
|
dst->es_active = src->es_active;
|
2021-10-21 17:43:00 +00:00
|
|
|
|
|
|
|
src->asid = 0;
|
|
|
|
src->active = false;
|
|
|
|
src->handle = 0;
|
|
|
|
src->pages_locked = 0;
|
2021-11-23 00:50:31 +00:00
|
|
|
src->enc_context_owner = NULL;
|
2022-06-23 17:34:06 +00:00
|
|
|
src->es_active = false;
|
2021-10-21 17:43:00 +00:00
|
|
|
|
KVM: SEV: do not use list_replace_init on an empty list
list_replace_init cannot be used if the source is an empty list,
because "new->next->prev = new" will overwrite "old->next":
new old
prev = new, next = new prev = old, next = old
new->next = old->next prev = new, next = old prev = old, next = old
new->next->prev = new prev = new, next = old prev = old, next = new
new->prev = old->prev prev = old, next = old prev = old, next = old
new->next->prev = new prev = old, next = old prev = new, next = new
The desired outcome instead would be to leave both old and new the same
as they were (two empty circular lists). Use list_cut_before, which
already has the necessary check and is documented to discard the
previous contents of the list that will hold the result.
Fixes: b56639318bb2 ("KVM: SEV: Add support for SEV intra host migration")
Reviewed-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20211123005036.2954379-5-pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2021-11-23 00:50:28 +00:00
|
|
|
list_cut_before(&dst->regions_list, &src->regions_list, &src->regions_list);
|
2022-02-11 19:36:34 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If this VM has mirrors, "transfer" each mirror's refcount of the
|
|
|
|
* source to the destination (this KVM). The caller holds a reference
|
|
|
|
* to the source, so there's no danger of use-after-free.
|
|
|
|
*/
|
|
|
|
list_cut_before(&dst->mirror_vms, &src->mirror_vms, &src->mirror_vms);
|
|
|
|
list_for_each_entry(mirror, &dst->mirror_vms, mirror_entry) {
|
|
|
|
kvm_get_kvm(dst_kvm);
|
|
|
|
kvm_put_kvm(src_kvm);
|
|
|
|
mirror->enc_context_owner = dst_kvm;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If this VM is a mirror, remove the old mirror from the owners list
|
|
|
|
* and add the new mirror to the list.
|
|
|
|
*/
|
|
|
|
if (is_mirroring_enc_context(dst_kvm)) {
|
|
|
|
struct kvm_sev_info *owner_sev_info =
|
|
|
|
&to_kvm_svm(dst->enc_context_owner)->sev_info;
|
|
|
|
|
|
|
|
list_del(&src->mirror_entry);
|
|
|
|
list_add_tail(&dst->mirror_entry, &owner_sev_info->mirror_vms);
|
|
|
|
}
|
2021-10-21 17:43:00 +00:00
|
|
|
|
2022-06-23 17:34:06 +00:00
|
|
|
kvm_for_each_vcpu(i, dst_vcpu, dst_kvm) {
|
|
|
|
dst_svm = to_svm(dst_vcpu);
|
2021-10-21 17:43:01 +00:00
|
|
|
|
2022-06-23 17:34:06 +00:00
|
|
|
sev_init_vmcb(dst_svm);
|
2021-10-21 17:43:01 +00:00
|
|
|
|
2022-06-23 17:34:06 +00:00
|
|
|
if (!dst->es_active)
|
|
|
|
continue;
|
2021-10-21 17:43:01 +00:00
|
|
|
|
2022-06-23 17:34:06 +00:00
|
|
|
/*
|
|
|
|
* Note, the source is not required to have the same number of
|
|
|
|
* vCPUs as the destination when migrating a vanilla SEV VM.
|
|
|
|
*/
|
|
|
|
src_vcpu = kvm_get_vcpu(dst_kvm, i);
|
2021-10-21 17:43:01 +00:00
|
|
|
src_svm = to_svm(src_vcpu);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Transfer VMSA and GHCB state to the destination. Nullify and
|
|
|
|
* clear source fields as appropriate, the state now belongs to
|
|
|
|
* the destination.
|
|
|
|
*/
|
|
|
|
memcpy(&dst_svm->sev_es, &src_svm->sev_es, sizeof(src_svm->sev_es));
|
|
|
|
dst_svm->vmcb->control.ghcb_gpa = src_svm->vmcb->control.ghcb_gpa;
|
|
|
|
dst_svm->vmcb->control.vmsa_pa = src_svm->vmcb->control.vmsa_pa;
|
|
|
|
dst_vcpu->arch.guest_state_protected = true;
|
|
|
|
|
|
|
|
memset(&src_svm->sev_es, 0, sizeof(src_svm->sev_es));
|
|
|
|
src_svm->vmcb->control.ghcb_gpa = INVALID_PAGE;
|
|
|
|
src_svm->vmcb->control.vmsa_pa = INVALID_PAGE;
|
|
|
|
src_vcpu->arch.guest_state_protected = false;
|
|
|
|
}
|
2022-06-23 17:34:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src)
|
|
|
|
{
|
|
|
|
struct kvm_vcpu *src_vcpu;
|
|
|
|
unsigned long i;
|
|
|
|
|
|
|
|
if (!sev_es_guest(src))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (atomic_read(&src->online_vcpus) != atomic_read(&dst->online_vcpus))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
kvm_for_each_vcpu(i, src_vcpu, src) {
|
|
|
|
if (!src_vcpu->arch.guest_state_protected)
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2021-10-21 17:43:01 +00:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-01-28 00:52:06 +00:00
|
|
|
int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
|
2021-10-21 17:43:00 +00:00
|
|
|
{
|
|
|
|
struct kvm_sev_info *dst_sev = &to_kvm_svm(kvm)->sev_info;
|
2021-11-12 09:02:24 +00:00
|
|
|
struct kvm_sev_info *src_sev, *cg_cleanup_sev;
|
2021-10-21 17:43:00 +00:00
|
|
|
struct file *source_kvm_file;
|
|
|
|
struct kvm *source_kvm;
|
2021-11-12 09:02:24 +00:00
|
|
|
bool charged = false;
|
2021-10-21 17:43:00 +00:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
source_kvm_file = fget(source_fd);
|
|
|
|
if (!file_is_kvm(source_kvm_file)) {
|
|
|
|
ret = -EBADF;
|
|
|
|
goto out_fput;
|
|
|
|
}
|
|
|
|
|
|
|
|
source_kvm = source_kvm_file->private_data;
|
2021-11-23 00:50:29 +00:00
|
|
|
ret = sev_lock_two_vms(kvm, source_kvm);
|
2021-10-21 17:43:00 +00:00
|
|
|
if (ret)
|
|
|
|
goto out_fput;
|
|
|
|
|
2021-11-23 00:50:29 +00:00
|
|
|
if (sev_guest(kvm) || !sev_guest(source_kvm)) {
|
2021-10-21 17:43:00 +00:00
|
|
|
ret = -EINVAL;
|
2021-11-23 00:50:29 +00:00
|
|
|
goto out_unlock;
|
2021-10-21 17:43:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
src_sev = &to_kvm_svm(source_kvm)->sev_info;
|
2021-11-23 00:50:34 +00:00
|
|
|
|
2021-10-21 17:43:00 +00:00
|
|
|
dst_sev->misc_cg = get_current_misc_cg();
|
2021-11-12 09:02:24 +00:00
|
|
|
cg_cleanup_sev = dst_sev;
|
2021-10-21 17:43:00 +00:00
|
|
|
if (dst_sev->misc_cg != src_sev->misc_cg) {
|
|
|
|
ret = sev_misc_cg_try_charge(dst_sev);
|
|
|
|
if (ret)
|
2021-11-12 09:02:24 +00:00
|
|
|
goto out_dst_cgroup;
|
|
|
|
charged = true;
|
2021-10-21 17:43:00 +00:00
|
|
|
}
|
|
|
|
|
2022-05-02 16:58:07 +00:00
|
|
|
ret = sev_lock_vcpus_for_migration(kvm, SEV_MIGRATION_SOURCE);
|
2021-10-21 17:43:00 +00:00
|
|
|
if (ret)
|
|
|
|
goto out_dst_cgroup;
|
2022-05-02 16:58:07 +00:00
|
|
|
ret = sev_lock_vcpus_for_migration(source_kvm, SEV_MIGRATION_TARGET);
|
2021-10-21 17:43:00 +00:00
|
|
|
if (ret)
|
|
|
|
goto out_dst_vcpu;
|
|
|
|
|
2022-06-23 17:34:06 +00:00
|
|
|
ret = sev_check_source_vcpus(kvm, source_kvm);
|
|
|
|
if (ret)
|
|
|
|
goto out_source_vcpu;
|
2022-02-11 19:36:34 +00:00
|
|
|
|
|
|
|
sev_migrate_from(kvm, source_kvm);
|
2021-10-21 17:43:00 +00:00
|
|
|
kvm_vm_dead(source_kvm);
|
2021-11-12 09:02:24 +00:00
|
|
|
cg_cleanup_sev = src_sev;
|
2021-10-21 17:43:00 +00:00
|
|
|
ret = 0;
|
|
|
|
|
2021-10-21 17:43:01 +00:00
|
|
|
out_source_vcpu:
|
2021-10-21 17:43:00 +00:00
|
|
|
sev_unlock_vcpus_for_migration(source_kvm);
|
|
|
|
out_dst_vcpu:
|
|
|
|
sev_unlock_vcpus_for_migration(kvm);
|
|
|
|
out_dst_cgroup:
|
2021-11-12 09:02:24 +00:00
|
|
|
/* Operates on the source on success, on the destination on failure. */
|
|
|
|
if (charged)
|
|
|
|
sev_misc_cg_uncharge(cg_cleanup_sev);
|
|
|
|
put_misc_cg(cg_cleanup_sev->misc_cg);
|
|
|
|
cg_cleanup_sev->misc_cg = NULL;
|
2021-11-23 00:50:29 +00:00
|
|
|
out_unlock:
|
|
|
|
sev_unlock_two_vms(kvm, source_kvm);
|
2021-10-21 17:43:00 +00:00
|
|
|
out_fput:
|
|
|
|
if (source_kvm_file)
|
|
|
|
fput(source_kvm_file);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2022-01-28 00:52:06 +00:00
|
|
|
int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
|
2020-03-24 09:41:54 +00:00
|
|
|
{
|
|
|
|
struct kvm_sev_cmd sev_cmd;
|
|
|
|
int r;
|
|
|
|
|
2021-04-22 02:11:23 +00:00
|
|
|
if (!sev_enabled)
|
2020-03-24 09:41:54 +00:00
|
|
|
return -ENOTTY;
|
|
|
|
|
|
|
|
if (!argp)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
mutex_lock(&kvm->lock);
|
|
|
|
|
2021-09-21 15:03:45 +00:00
|
|
|
/* Only the enc_context_owner handles some memory enc operations. */
|
|
|
|
if (is_mirroring_enc_context(kvm) &&
|
2021-11-09 21:51:01 +00:00
|
|
|
!is_cmd_allowed_from_mirror(sev_cmd.id)) {
|
2021-04-08 22:32:14 +00:00
|
|
|
r = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2020-03-24 09:41:54 +00:00
|
|
|
switch (sev_cmd.id) {
|
2021-03-31 03:19:35 +00:00
|
|
|
case KVM_SEV_ES_INIT:
|
2021-04-22 02:11:17 +00:00
|
|
|
if (!sev_es_enabled) {
|
2021-03-31 03:19:35 +00:00
|
|
|
r = -ENOTTY;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
fallthrough;
|
2020-03-24 09:41:54 +00:00
|
|
|
case KVM_SEV_INIT:
|
|
|
|
r = sev_guest_init(kvm, &sev_cmd);
|
|
|
|
break;
|
|
|
|
case KVM_SEV_LAUNCH_START:
|
|
|
|
r = sev_launch_start(kvm, &sev_cmd);
|
|
|
|
break;
|
|
|
|
case KVM_SEV_LAUNCH_UPDATE_DATA:
|
|
|
|
r = sev_launch_update_data(kvm, &sev_cmd);
|
|
|
|
break;
|
2020-12-10 17:10:09 +00:00
|
|
|
case KVM_SEV_LAUNCH_UPDATE_VMSA:
|
|
|
|
r = sev_launch_update_vmsa(kvm, &sev_cmd);
|
|
|
|
break;
|
2020-03-24 09:41:54 +00:00
|
|
|
case KVM_SEV_LAUNCH_MEASURE:
|
|
|
|
r = sev_launch_measure(kvm, &sev_cmd);
|
|
|
|
break;
|
|
|
|
case KVM_SEV_LAUNCH_FINISH:
|
|
|
|
r = sev_launch_finish(kvm, &sev_cmd);
|
|
|
|
break;
|
|
|
|
case KVM_SEV_GUEST_STATUS:
|
|
|
|
r = sev_guest_status(kvm, &sev_cmd);
|
|
|
|
break;
|
|
|
|
case KVM_SEV_DBG_DECRYPT:
|
|
|
|
r = sev_dbg_crypt(kvm, &sev_cmd, true);
|
|
|
|
break;
|
|
|
|
case KVM_SEV_DBG_ENCRYPT:
|
|
|
|
r = sev_dbg_crypt(kvm, &sev_cmd, false);
|
|
|
|
break;
|
|
|
|
case KVM_SEV_LAUNCH_SECRET:
|
|
|
|
r = sev_launch_secret(kvm, &sev_cmd);
|
|
|
|
break;
|
2021-01-04 15:17:49 +00:00
|
|
|
case KVM_SEV_GET_ATTESTATION_REPORT:
|
|
|
|
r = sev_get_attestation_report(kvm, &sev_cmd);
|
|
|
|
break;
|
2021-04-15 15:53:14 +00:00
|
|
|
case KVM_SEV_SEND_START:
|
|
|
|
r = sev_send_start(kvm, &sev_cmd);
|
|
|
|
break;
|
2021-04-15 15:53:55 +00:00
|
|
|
case KVM_SEV_SEND_UPDATE_DATA:
|
|
|
|
r = sev_send_update_data(kvm, &sev_cmd);
|
|
|
|
break;
|
2021-04-15 15:54:15 +00:00
|
|
|
case KVM_SEV_SEND_FINISH:
|
|
|
|
r = sev_send_finish(kvm, &sev_cmd);
|
|
|
|
break;
|
2021-04-20 09:01:20 +00:00
|
|
|
case KVM_SEV_SEND_CANCEL:
|
|
|
|
r = sev_send_cancel(kvm, &sev_cmd);
|
|
|
|
break;
|
2021-04-15 15:54:50 +00:00
|
|
|
case KVM_SEV_RECEIVE_START:
|
|
|
|
r = sev_receive_start(kvm, &sev_cmd);
|
|
|
|
break;
|
2021-04-15 15:55:17 +00:00
|
|
|
case KVM_SEV_RECEIVE_UPDATE_DATA:
|
|
|
|
r = sev_receive_update_data(kvm, &sev_cmd);
|
|
|
|
break;
|
2021-04-15 15:55:40 +00:00
|
|
|
case KVM_SEV_RECEIVE_FINISH:
|
|
|
|
r = sev_receive_finish(kvm, &sev_cmd);
|
|
|
|
break;
|
2020-03-24 09:41:54 +00:00
|
|
|
default:
|
|
|
|
r = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd)))
|
|
|
|
r = -EFAULT;
|
|
|
|
|
|
|
|
out:
|
|
|
|
mutex_unlock(&kvm->lock);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2022-01-28 00:52:06 +00:00
|
|
|
int sev_mem_enc_register_region(struct kvm *kvm,
|
|
|
|
struct kvm_enc_region *range)
|
2020-03-24 09:41:54 +00:00
|
|
|
{
|
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
|
|
|
struct enc_region *region;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
if (!sev_guest(kvm))
|
|
|
|
return -ENOTTY;
|
|
|
|
|
2021-04-08 22:32:14 +00:00
|
|
|
/* If kvm is mirroring encryption context it isn't responsible for it */
|
|
|
|
if (is_mirroring_enc_context(kvm))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2020-03-24 09:41:54 +00:00
|
|
|
if (range->addr > ULONG_MAX || range->size > ULONG_MAX)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
region = kzalloc(sizeof(*region), GFP_KERNEL_ACCOUNT);
|
|
|
|
if (!region)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2021-01-27 16:15:24 +00:00
|
|
|
mutex_lock(&kvm->lock);
|
2020-03-24 09:41:54 +00:00
|
|
|
region->pages = sev_pin_memory(kvm, range->addr, range->size, ®ion->npages, 1);
|
2020-06-23 09:12:24 +00:00
|
|
|
if (IS_ERR(region->pages)) {
|
|
|
|
ret = PTR_ERR(region->pages);
|
2021-01-27 16:15:24 +00:00
|
|
|
mutex_unlock(&kvm->lock);
|
2020-03-24 09:41:54 +00:00
|
|
|
goto e_free;
|
|
|
|
}
|
|
|
|
|
2021-01-27 16:15:24 +00:00
|
|
|
region->uaddr = range->addr;
|
|
|
|
region->size = range->size;
|
|
|
|
|
|
|
|
list_add_tail(®ion->list, &sev->regions_list);
|
|
|
|
mutex_unlock(&kvm->lock);
|
|
|
|
|
2020-03-24 09:41:54 +00:00
|
|
|
/*
|
|
|
|
* The guest may change the memory encryption attribute from C=0 -> C=1
|
|
|
|
* or vice versa for this memory range. Lets make sure caches are
|
|
|
|
* flushed to ensure that guest data gets written into memory with
|
|
|
|
* correct C-bit.
|
|
|
|
*/
|
|
|
|
sev_clflush_pages(region->pages, region->npages);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
e_free:
|
|
|
|
kfree(region);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct enc_region *
|
|
|
|
find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
|
|
|
|
{
|
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
|
|
|
struct list_head *head = &sev->regions_list;
|
|
|
|
struct enc_region *i;
|
|
|
|
|
|
|
|
list_for_each_entry(i, head, list) {
|
|
|
|
if (i->uaddr == range->addr &&
|
|
|
|
i->size == range->size)
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __unregister_enc_region_locked(struct kvm *kvm,
|
|
|
|
struct enc_region *region)
|
|
|
|
{
|
|
|
|
sev_unpin_memory(kvm, region->pages, region->npages);
|
|
|
|
list_del(®ion->list);
|
|
|
|
kfree(region);
|
|
|
|
}
|
|
|
|
|
2022-01-28 00:52:06 +00:00
|
|
|
int sev_mem_enc_unregister_region(struct kvm *kvm,
|
|
|
|
struct kvm_enc_region *range)
|
2020-03-24 09:41:54 +00:00
|
|
|
{
|
|
|
|
struct enc_region *region;
|
|
|
|
int ret;
|
|
|
|
|
2021-04-08 22:32:14 +00:00
|
|
|
/* If kvm is mirroring encryption context it isn't responsible for it */
|
|
|
|
if (is_mirroring_enc_context(kvm))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2020-03-24 09:41:54 +00:00
|
|
|
mutex_lock(&kvm->lock);
|
|
|
|
|
|
|
|
if (!sev_guest(kvm)) {
|
|
|
|
ret = -ENOTTY;
|
|
|
|
goto failed;
|
|
|
|
}
|
|
|
|
|
|
|
|
region = find_enc_region(kvm, range);
|
|
|
|
if (!region) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto failed;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Ensure that all guest tagged cache entries are flushed before
|
|
|
|
* releasing the pages back to the system for use. CLFLUSH will
|
|
|
|
* not do this, so issue a WBINVD.
|
|
|
|
*/
|
|
|
|
wbinvd_on_all_cpus();
|
|
|
|
|
|
|
|
__unregister_enc_region_locked(kvm, region);
|
|
|
|
|
|
|
|
mutex_unlock(&kvm->lock);
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
failed:
|
|
|
|
mutex_unlock(&kvm->lock);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2022-01-28 00:52:06 +00:00
|
|
|
int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd)
|
2021-04-08 22:32:14 +00:00
|
|
|
{
|
|
|
|
struct file *source_kvm_file;
|
|
|
|
struct kvm *source_kvm;
|
2021-11-23 00:50:33 +00:00
|
|
|
struct kvm_sev_info *source_sev, *mirror_sev;
|
2021-04-08 22:32:14 +00:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
source_kvm_file = fget(source_fd);
|
|
|
|
if (!file_is_kvm(source_kvm_file)) {
|
|
|
|
ret = -EBADF;
|
2021-11-23 00:50:33 +00:00
|
|
|
goto e_source_fput;
|
2021-04-08 22:32:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
source_kvm = source_kvm_file->private_data;
|
2021-11-23 00:50:33 +00:00
|
|
|
ret = sev_lock_two_vms(kvm, source_kvm);
|
|
|
|
if (ret)
|
|
|
|
goto e_source_fput;
|
2021-04-08 22:32:14 +00:00
|
|
|
|
2021-11-23 00:50:33 +00:00
|
|
|
/*
|
|
|
|
* Mirrors of mirrors should work, but let's not get silly. Also
|
|
|
|
* disallow out-of-band SEV/SEV-ES init if the target is already an
|
|
|
|
* SEV guest, or if vCPUs have been created. KVM relies on vCPUs being
|
|
|
|
* created after SEV/SEV-ES initialization, e.g. to init intercepts.
|
|
|
|
*/
|
|
|
|
if (sev_guest(kvm) || !sev_guest(source_kvm) ||
|
|
|
|
is_mirroring_enc_context(source_kvm) || kvm->created_vcpus) {
|
2021-04-08 22:32:14 +00:00
|
|
|
ret = -EINVAL;
|
2021-11-23 00:50:33 +00:00
|
|
|
goto e_unlock;
|
2021-04-08 22:32:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The mirror kvm holds an enc_context_owner ref so its asid can't
|
|
|
|
* disappear until we're done with it
|
|
|
|
*/
|
2021-11-23 00:50:33 +00:00
|
|
|
source_sev = &to_kvm_svm(source_kvm)->sev_info;
|
2021-04-08 22:32:14 +00:00
|
|
|
kvm_get_kvm(source_kvm);
|
2022-02-11 19:36:34 +00:00
|
|
|
mirror_sev = &to_kvm_svm(kvm)->sev_info;
|
|
|
|
list_add_tail(&mirror_sev->mirror_entry, &source_sev->mirror_vms);
|
2021-04-08 22:32:14 +00:00
|
|
|
|
|
|
|
/* Set enc_context_owner and copy its encryption context over */
|
|
|
|
mirror_sev->enc_context_owner = source_kvm;
|
|
|
|
mirror_sev->active = true;
|
2021-11-23 00:50:33 +00:00
|
|
|
mirror_sev->asid = source_sev->asid;
|
|
|
|
mirror_sev->fd = source_sev->fd;
|
|
|
|
mirror_sev->es_active = source_sev->es_active;
|
|
|
|
mirror_sev->handle = source_sev->handle;
|
2021-11-23 00:50:30 +00:00
|
|
|
INIT_LIST_HEAD(&mirror_sev->regions_list);
|
2022-02-11 19:36:34 +00:00
|
|
|
INIT_LIST_HEAD(&mirror_sev->mirror_vms);
|
2021-11-23 00:50:33 +00:00
|
|
|
ret = 0;
|
|
|
|
|
2021-09-21 15:03:44 +00:00
|
|
|
/*
|
|
|
|
* Do not copy ap_jump_table. Since the mirror does not share the same
|
|
|
|
* KVM contexts as the original, and they may have different
|
|
|
|
* memory-views.
|
|
|
|
*/
|
2021-04-08 22:32:14 +00:00
|
|
|
|
2021-11-23 00:50:33 +00:00
|
|
|
e_unlock:
|
|
|
|
sev_unlock_two_vms(kvm, source_kvm);
|
|
|
|
e_source_fput:
|
2021-04-30 17:03:03 +00:00
|
|
|
if (source_kvm_file)
|
|
|
|
fput(source_kvm_file);
|
2021-04-08 22:32:14 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2020-03-24 09:41:54 +00:00
|
|
|
void sev_vm_destroy(struct kvm *kvm)
|
|
|
|
{
|
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
|
|
|
struct list_head *head = &sev->regions_list;
|
|
|
|
struct list_head *pos, *q;
|
|
|
|
|
|
|
|
if (!sev_guest(kvm))
|
|
|
|
return;
|
|
|
|
|
2022-02-11 19:36:34 +00:00
|
|
|
WARN_ON(!list_empty(&sev->mirror_vms));
|
|
|
|
|
2021-04-08 22:32:14 +00:00
|
|
|
/* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */
|
|
|
|
if (is_mirroring_enc_context(kvm)) {
|
2021-11-23 00:50:34 +00:00
|
|
|
struct kvm *owner_kvm = sev->enc_context_owner;
|
|
|
|
|
|
|
|
mutex_lock(&owner_kvm->lock);
|
2022-02-11 19:36:34 +00:00
|
|
|
list_del(&sev->mirror_entry);
|
2021-11-23 00:50:34 +00:00
|
|
|
mutex_unlock(&owner_kvm->lock);
|
|
|
|
kvm_put_kvm(owner_kvm);
|
2021-04-08 22:32:14 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-03-24 09:41:54 +00:00
|
|
|
/*
|
|
|
|
* Ensure that all guest tagged cache entries are flushed before
|
|
|
|
* releasing the pages back to the system for use. CLFLUSH will
|
|
|
|
* not do this, so issue a WBINVD.
|
|
|
|
*/
|
|
|
|
wbinvd_on_all_cpus();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* if userspace was terminated before unregistering the memory regions
|
|
|
|
* then lets unpin all the registered memory.
|
|
|
|
*/
|
|
|
|
if (!list_empty(head)) {
|
|
|
|
list_for_each_safe(pos, q, head) {
|
|
|
|
__unregister_enc_region_locked(kvm,
|
|
|
|
list_entry(pos, struct enc_region, list));
|
2020-08-25 19:56:28 +00:00
|
|
|
cond_resched();
|
2020-03-24 09:41:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
sev_unbind_asid(kvm, sev->handle);
|
2021-03-30 04:42:06 +00:00
|
|
|
sev_asid_free(sev);
|
2020-03-24 09:41:54 +00:00
|
|
|
}
|
|
|
|
|
2021-04-22 02:11:15 +00:00
|
|
|
void __init sev_set_cpu_caps(void)
|
|
|
|
{
|
2021-04-22 02:11:17 +00:00
|
|
|
if (!sev_enabled)
|
2021-04-22 02:11:15 +00:00
|
|
|
kvm_cpu_cap_clear(X86_FEATURE_SEV);
|
2021-04-22 02:11:17 +00:00
|
|
|
if (!sev_es_enabled)
|
2021-04-22 02:11:15 +00:00
|
|
|
kvm_cpu_cap_clear(X86_FEATURE_SEV_ES);
|
|
|
|
}
|
|
|
|
|
2020-12-10 17:09:38 +00:00
|
|
|
void __init sev_hardware_setup(void)
|
2020-03-24 09:41:54 +00:00
|
|
|
{
|
2021-04-22 02:11:18 +00:00
|
|
|
#ifdef CONFIG_KVM_AMD_SEV
|
2021-03-30 04:42:06 +00:00
|
|
|
unsigned int eax, ebx, ecx, edx, sev_asid_count, sev_es_asid_count;
|
2020-12-10 17:09:38 +00:00
|
|
|
bool sev_es_supported = false;
|
|
|
|
bool sev_supported = false;
|
|
|
|
|
2021-04-22 02:11:18 +00:00
|
|
|
if (!sev_enabled || !npt_enabled)
|
2021-04-22 02:11:14 +00:00
|
|
|
goto out;
|
|
|
|
|
2022-01-20 01:07:14 +00:00
|
|
|
/*
|
|
|
|
* SEV must obviously be supported in hardware. Sanity check that the
|
|
|
|
* CPU supports decode assists, which is mandatory for SEV guests to
|
|
|
|
* support instruction emulation.
|
|
|
|
*/
|
|
|
|
if (!boot_cpu_has(X86_FEATURE_SEV) ||
|
|
|
|
WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_DECODEASSISTS)))
|
2020-12-10 17:09:38 +00:00
|
|
|
goto out;
|
|
|
|
|
|
|
|
/* Retrieve SEV CPUID information */
|
|
|
|
cpuid(0x8000001f, &eax, &ebx, &ecx, &edx);
|
|
|
|
|
2020-12-10 17:09:49 +00:00
|
|
|
/* Set encryption bit location for SEV-ES guests */
|
|
|
|
sev_enc_bit = ebx & 0x3f;
|
|
|
|
|
2020-03-24 09:41:54 +00:00
|
|
|
/* Maximum number of encrypted guests supported simultaneously */
|
2020-12-10 17:09:38 +00:00
|
|
|
max_sev_asid = ecx;
|
2021-04-22 02:11:21 +00:00
|
|
|
if (!max_sev_asid)
|
2020-12-10 17:09:38 +00:00
|
|
|
goto out;
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
/* Minimum ASID value that should be used for SEV guest */
|
2020-12-10 17:09:38 +00:00
|
|
|
min_sev_asid = edx;
|
2021-04-15 15:53:55 +00:00
|
|
|
sev_me_mask = 1UL << (ebx & 0x3f);
|
2020-03-24 09:41:54 +00:00
|
|
|
|
2021-08-02 18:09:03 +00:00
|
|
|
/*
|
|
|
|
* Initialize SEV ASID bitmaps. Allocate space for ASID 0 in the bitmap,
|
|
|
|
* even though it's never used, so that the bitmap is indexed by the
|
|
|
|
* actual ASID.
|
|
|
|
*/
|
|
|
|
nr_asids = max_sev_asid + 1;
|
|
|
|
sev_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL);
|
2020-03-24 09:41:54 +00:00
|
|
|
if (!sev_asid_bitmap)
|
2020-12-10 17:09:38 +00:00
|
|
|
goto out;
|
2020-03-24 09:41:54 +00:00
|
|
|
|
2021-08-02 18:09:03 +00:00
|
|
|
sev_reclaim_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL);
|
2021-04-22 02:11:12 +00:00
|
|
|
if (!sev_reclaim_asid_bitmap) {
|
|
|
|
bitmap_free(sev_asid_bitmap);
|
|
|
|
sev_asid_bitmap = NULL;
|
2020-12-10 17:09:38 +00:00
|
|
|
goto out;
|
2021-04-22 02:11:12 +00:00
|
|
|
}
|
2020-03-24 09:41:54 +00:00
|
|
|
|
2021-03-30 04:42:06 +00:00
|
|
|
sev_asid_count = max_sev_asid - min_sev_asid + 1;
|
|
|
|
if (misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count))
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
pr_info("SEV supported: %u ASIDs\n", sev_asid_count);
|
2020-12-10 17:09:38 +00:00
|
|
|
sev_supported = true;
|
2020-03-24 09:41:54 +00:00
|
|
|
|
2020-12-10 17:09:38 +00:00
|
|
|
/* SEV-ES support requested? */
|
2021-04-22 02:11:17 +00:00
|
|
|
if (!sev_es_enabled)
|
2020-12-10 17:09:38 +00:00
|
|
|
goto out;
|
|
|
|
|
2022-08-03 22:49:57 +00:00
|
|
|
/*
|
|
|
|
* SEV-ES requires MMIO caching as KVM doesn't have access to the guest
|
|
|
|
* instruction stream, i.e. can't emulate in response to a #NPF and
|
|
|
|
* instead relies on #NPF(RSVD) being reflected into the guest as #VC
|
|
|
|
* (the guest can then do a #VMGEXIT to request MMIO emulation).
|
|
|
|
*/
|
|
|
|
if (!enable_mmio_caching)
|
|
|
|
goto out;
|
|
|
|
|
2020-12-10 17:09:38 +00:00
|
|
|
/* Does the CPU support SEV-ES? */
|
|
|
|
if (!boot_cpu_has(X86_FEATURE_SEV_ES))
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
/* Has the system been allocated ASIDs for SEV-ES? */
|
|
|
|
if (min_sev_asid == 1)
|
|
|
|
goto out;
|
|
|
|
|
2021-03-30 04:42:06 +00:00
|
|
|
sev_es_asid_count = min_sev_asid - 1;
|
|
|
|
if (misc_cg_set_capacity(MISC_CG_RES_SEV_ES, sev_es_asid_count))
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
pr_info("SEV-ES supported: %u ASIDs\n", sev_es_asid_count);
|
2020-12-10 17:09:38 +00:00
|
|
|
sev_es_supported = true;
|
|
|
|
|
|
|
|
out:
|
2021-04-22 02:11:17 +00:00
|
|
|
sev_enabled = sev_supported;
|
|
|
|
sev_es_enabled = sev_es_supported;
|
2021-04-22 02:11:18 +00:00
|
|
|
#endif
|
2020-03-24 09:41:54 +00:00
|
|
|
}
|
|
|
|
|
2022-01-28 00:52:07 +00:00
|
|
|
void sev_hardware_unsetup(void)
|
2020-03-24 09:41:54 +00:00
|
|
|
{
|
2021-04-22 02:11:23 +00:00
|
|
|
if (!sev_enabled)
|
2020-04-13 07:20:06 +00:00
|
|
|
return;
|
|
|
|
|
2021-04-22 02:11:25 +00:00
|
|
|
/* No need to take sev_bitmap_lock, all VMs have been destroyed. */
|
2021-08-02 18:09:03 +00:00
|
|
|
sev_flush_asids(1, max_sev_asid);
|
2021-04-22 02:11:25 +00:00
|
|
|
|
2020-03-24 09:41:54 +00:00
|
|
|
bitmap_free(sev_asid_bitmap);
|
|
|
|
bitmap_free(sev_reclaim_asid_bitmap);
|
2021-04-22 02:11:25 +00:00
|
|
|
|
2021-03-30 04:42:06 +00:00
|
|
|
misc_cg_set_capacity(MISC_CG_RES_SEV, 0);
|
|
|
|
misc_cg_set_capacity(MISC_CG_RES_SEV_ES, 0);
|
2020-03-24 09:41:54 +00:00
|
|
|
}
|
|
|
|
|
2021-04-22 02:11:22 +00:00
|
|
|
int sev_cpu_init(struct svm_cpu_data *sd)
|
|
|
|
{
|
2021-04-22 02:11:23 +00:00
|
|
|
if (!sev_enabled)
|
2021-04-22 02:11:22 +00:00
|
|
|
return 0;
|
|
|
|
|
2021-08-02 18:09:03 +00:00
|
|
|
sd->sev_vmcbs = kcalloc(nr_asids, sizeof(void *), GFP_KERNEL);
|
2021-04-22 02:11:22 +00:00
|
|
|
if (!sd->sev_vmcbs)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
return 0;
|
2020-03-24 09:41:54 +00:00
|
|
|
}
|
|
|
|
|
2020-12-10 17:09:40 +00:00
|
|
|
/*
|
|
|
|
* Pages used by hardware to hold guest encrypted state must be flushed before
|
|
|
|
* returning them to the system.
|
|
|
|
*/
|
KVM: SVM: Simplify and harden helper to flush SEV guest page(s)
Rework sev_flush_guest_memory() to explicitly handle only a single page,
and harden it to fall back to WBINVD if VM_PAGE_FLUSH fails. Per-page
flushing is currently used only to flush the VMSA, and in its current
form, the helper is completely broken with respect to flushing actual
guest memory, i.e. won't work correctly for an arbitrary memory range.
VM_PAGE_FLUSH takes a host virtual address, and is subject to normal page
walks, i.e. will fault if the address is not present in the host page
tables or does not have the correct permissions. Current AMD CPUs also
do not honor SMAP overrides (undocumented in kernel versions of the APM),
so passing in a userspace address is completely out of the question. In
other words, KVM would need to manually walk the host page tables to get
the pfn, ensure the pfn is stable, and then use the direct map to invoke
VM_PAGE_FLUSH. And the latter might not even work, e.g. if userspace is
particularly evil/clever and backs the guest with Secret Memory (which
unmaps memory from the direct map).
Signed-off-by: Sean Christopherson <seanjc@google.com>
Fixes: add5e2f04541 ("KVM: SVM: Add support for the SEV-ES VMSA")
Reported-by: Mingwei Zhang <mizhang@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Mingwei Zhang <mizhang@google.com>
Message-Id: <20220421031407.2516575-2-mizhang@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-04-21 03:14:05 +00:00
|
|
|
static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va)
|
2020-12-10 17:09:40 +00:00
|
|
|
{
|
KVM: SVM: Simplify and harden helper to flush SEV guest page(s)
Rework sev_flush_guest_memory() to explicitly handle only a single page,
and harden it to fall back to WBINVD if VM_PAGE_FLUSH fails. Per-page
flushing is currently used only to flush the VMSA, and in its current
form, the helper is completely broken with respect to flushing actual
guest memory, i.e. won't work correctly for an arbitrary memory range.
VM_PAGE_FLUSH takes a host virtual address, and is subject to normal page
walks, i.e. will fault if the address is not present in the host page
tables or does not have the correct permissions. Current AMD CPUs also
do not honor SMAP overrides (undocumented in kernel versions of the APM),
so passing in a userspace address is completely out of the question. In
other words, KVM would need to manually walk the host page tables to get
the pfn, ensure the pfn is stable, and then use the direct map to invoke
VM_PAGE_FLUSH. And the latter might not even work, e.g. if userspace is
particularly evil/clever and backs the guest with Secret Memory (which
unmaps memory from the direct map).
Signed-off-by: Sean Christopherson <seanjc@google.com>
Fixes: add5e2f04541 ("KVM: SVM: Add support for the SEV-ES VMSA")
Reported-by: Mingwei Zhang <mizhang@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Mingwei Zhang <mizhang@google.com>
Message-Id: <20220421031407.2516575-2-mizhang@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-04-21 03:14:05 +00:00
|
|
|
int asid = to_kvm_svm(vcpu->kvm)->sev_info.asid;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Note! The address must be a kernel address, as regular page walk
|
|
|
|
* checks are performed by VM_PAGE_FLUSH, i.e. operating on a user
|
|
|
|
* address is non-deterministic and unsafe. This function deliberately
|
|
|
|
* takes a pointer to deter passing in a user address.
|
|
|
|
*/
|
|
|
|
unsigned long addr = (unsigned long)va;
|
|
|
|
|
2020-12-10 17:09:40 +00:00
|
|
|
/*
|
2022-04-21 03:14:06 +00:00
|
|
|
* If CPU enforced cache coherency for encrypted mappings of the
|
|
|
|
* same physical page is supported, use CLFLUSHOPT instead. NOTE: cache
|
|
|
|
* flush is still needed in order to work properly with DMA devices.
|
2020-12-10 17:09:40 +00:00
|
|
|
*/
|
2022-04-21 03:14:06 +00:00
|
|
|
if (boot_cpu_has(X86_FEATURE_SME_COHERENT)) {
|
|
|
|
clflush_cache_range(va, PAGE_SIZE);
|
2020-12-10 17:09:40 +00:00
|
|
|
return;
|
2022-04-21 03:14:06 +00:00
|
|
|
}
|
2020-12-10 17:09:40 +00:00
|
|
|
|
|
|
|
/*
|
KVM: SVM: Simplify and harden helper to flush SEV guest page(s)
Rework sev_flush_guest_memory() to explicitly handle only a single page,
and harden it to fall back to WBINVD if VM_PAGE_FLUSH fails. Per-page
flushing is currently used only to flush the VMSA, and in its current
form, the helper is completely broken with respect to flushing actual
guest memory, i.e. won't work correctly for an arbitrary memory range.
VM_PAGE_FLUSH takes a host virtual address, and is subject to normal page
walks, i.e. will fault if the address is not present in the host page
tables or does not have the correct permissions. Current AMD CPUs also
do not honor SMAP overrides (undocumented in kernel versions of the APM),
so passing in a userspace address is completely out of the question. In
other words, KVM would need to manually walk the host page tables to get
the pfn, ensure the pfn is stable, and then use the direct map to invoke
VM_PAGE_FLUSH. And the latter might not even work, e.g. if userspace is
particularly evil/clever and backs the guest with Secret Memory (which
unmaps memory from the direct map).
Signed-off-by: Sean Christopherson <seanjc@google.com>
Fixes: add5e2f04541 ("KVM: SVM: Add support for the SEV-ES VMSA")
Reported-by: Mingwei Zhang <mizhang@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Mingwei Zhang <mizhang@google.com>
Message-Id: <20220421031407.2516575-2-mizhang@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-04-21 03:14:05 +00:00
|
|
|
* VM Page Flush takes a host virtual address and a guest ASID. Fall
|
|
|
|
* back to WBINVD if this faults so as not to make any problems worse
|
|
|
|
* by leaving stale encrypted data in the cache.
|
2020-12-10 17:09:40 +00:00
|
|
|
*/
|
KVM: SVM: Simplify and harden helper to flush SEV guest page(s)
Rework sev_flush_guest_memory() to explicitly handle only a single page,
and harden it to fall back to WBINVD if VM_PAGE_FLUSH fails. Per-page
flushing is currently used only to flush the VMSA, and in its current
form, the helper is completely broken with respect to flushing actual
guest memory, i.e. won't work correctly for an arbitrary memory range.
VM_PAGE_FLUSH takes a host virtual address, and is subject to normal page
walks, i.e. will fault if the address is not present in the host page
tables or does not have the correct permissions. Current AMD CPUs also
do not honor SMAP overrides (undocumented in kernel versions of the APM),
so passing in a userspace address is completely out of the question. In
other words, KVM would need to manually walk the host page tables to get
the pfn, ensure the pfn is stable, and then use the direct map to invoke
VM_PAGE_FLUSH. And the latter might not even work, e.g. if userspace is
particularly evil/clever and backs the guest with Secret Memory (which
unmaps memory from the direct map).
Signed-off-by: Sean Christopherson <seanjc@google.com>
Fixes: add5e2f04541 ("KVM: SVM: Add support for the SEV-ES VMSA")
Reported-by: Mingwei Zhang <mizhang@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Mingwei Zhang <mizhang@google.com>
Message-Id: <20220421031407.2516575-2-mizhang@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-04-21 03:14:05 +00:00
|
|
|
if (WARN_ON_ONCE(wrmsrl_safe(MSR_AMD64_VM_PAGE_FLUSH, addr | asid)))
|
|
|
|
goto do_wbinvd;
|
2020-12-10 17:09:40 +00:00
|
|
|
|
KVM: SVM: Simplify and harden helper to flush SEV guest page(s)
Rework sev_flush_guest_memory() to explicitly handle only a single page,
and harden it to fall back to WBINVD if VM_PAGE_FLUSH fails. Per-page
flushing is currently used only to flush the VMSA, and in its current
form, the helper is completely broken with respect to flushing actual
guest memory, i.e. won't work correctly for an arbitrary memory range.
VM_PAGE_FLUSH takes a host virtual address, and is subject to normal page
walks, i.e. will fault if the address is not present in the host page
tables or does not have the correct permissions. Current AMD CPUs also
do not honor SMAP overrides (undocumented in kernel versions of the APM),
so passing in a userspace address is completely out of the question. In
other words, KVM would need to manually walk the host page tables to get
the pfn, ensure the pfn is stable, and then use the direct map to invoke
VM_PAGE_FLUSH. And the latter might not even work, e.g. if userspace is
particularly evil/clever and backs the guest with Secret Memory (which
unmaps memory from the direct map).
Signed-off-by: Sean Christopherson <seanjc@google.com>
Fixes: add5e2f04541 ("KVM: SVM: Add support for the SEV-ES VMSA")
Reported-by: Mingwei Zhang <mizhang@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Mingwei Zhang <mizhang@google.com>
Message-Id: <20220421031407.2516575-2-mizhang@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-04-21 03:14:05 +00:00
|
|
|
return;
|
2020-12-10 17:09:40 +00:00
|
|
|
|
KVM: SVM: Simplify and harden helper to flush SEV guest page(s)
Rework sev_flush_guest_memory() to explicitly handle only a single page,
and harden it to fall back to WBINVD if VM_PAGE_FLUSH fails. Per-page
flushing is currently used only to flush the VMSA, and in its current
form, the helper is completely broken with respect to flushing actual
guest memory, i.e. won't work correctly for an arbitrary memory range.
VM_PAGE_FLUSH takes a host virtual address, and is subject to normal page
walks, i.e. will fault if the address is not present in the host page
tables or does not have the correct permissions. Current AMD CPUs also
do not honor SMAP overrides (undocumented in kernel versions of the APM),
so passing in a userspace address is completely out of the question. In
other words, KVM would need to manually walk the host page tables to get
the pfn, ensure the pfn is stable, and then use the direct map to invoke
VM_PAGE_FLUSH. And the latter might not even work, e.g. if userspace is
particularly evil/clever and backs the guest with Secret Memory (which
unmaps memory from the direct map).
Signed-off-by: Sean Christopherson <seanjc@google.com>
Fixes: add5e2f04541 ("KVM: SVM: Add support for the SEV-ES VMSA")
Reported-by: Mingwei Zhang <mizhang@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Mingwei Zhang <mizhang@google.com>
Message-Id: <20220421031407.2516575-2-mizhang@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-04-21 03:14:05 +00:00
|
|
|
do_wbinvd:
|
2020-12-10 17:09:40 +00:00
|
|
|
wbinvd_on_all_cpus();
|
|
|
|
}
|
|
|
|
|
2022-04-21 03:14:07 +00:00
|
|
|
void sev_guest_memory_reclaimed(struct kvm *kvm)
|
|
|
|
{
|
|
|
|
if (!sev_guest(kvm))
|
|
|
|
return;
|
|
|
|
|
|
|
|
wbinvd_on_all_cpus();
|
|
|
|
}
|
|
|
|
|
2020-12-10 17:09:40 +00:00
|
|
|
void sev_free_vcpu(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
struct vcpu_svm *svm;
|
|
|
|
|
|
|
|
if (!sev_es_guest(vcpu->kvm))
|
|
|
|
return;
|
|
|
|
|
|
|
|
svm = to_svm(vcpu);
|
|
|
|
|
|
|
|
if (vcpu->arch.guest_state_protected)
|
KVM: SVM: Simplify and harden helper to flush SEV guest page(s)
Rework sev_flush_guest_memory() to explicitly handle only a single page,
and harden it to fall back to WBINVD if VM_PAGE_FLUSH fails. Per-page
flushing is currently used only to flush the VMSA, and in its current
form, the helper is completely broken with respect to flushing actual
guest memory, i.e. won't work correctly for an arbitrary memory range.
VM_PAGE_FLUSH takes a host virtual address, and is subject to normal page
walks, i.e. will fault if the address is not present in the host page
tables or does not have the correct permissions. Current AMD CPUs also
do not honor SMAP overrides (undocumented in kernel versions of the APM),
so passing in a userspace address is completely out of the question. In
other words, KVM would need to manually walk the host page tables to get
the pfn, ensure the pfn is stable, and then use the direct map to invoke
VM_PAGE_FLUSH. And the latter might not even work, e.g. if userspace is
particularly evil/clever and backs the guest with Secret Memory (which
unmaps memory from the direct map).
Signed-off-by: Sean Christopherson <seanjc@google.com>
Fixes: add5e2f04541 ("KVM: SVM: Add support for the SEV-ES VMSA")
Reported-by: Mingwei Zhang <mizhang@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Mingwei Zhang <mizhang@google.com>
Message-Id: <20220421031407.2516575-2-mizhang@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-04-21 03:14:05 +00:00
|
|
|
sev_flush_encrypted_page(vcpu, svm->sev_es.vmsa);
|
|
|
|
|
2021-10-21 17:42:59 +00:00
|
|
|
__free_page(virt_to_page(svm->sev_es.vmsa));
|
2020-12-10 17:09:53 +00:00
|
|
|
|
2021-10-21 17:42:59 +00:00
|
|
|
if (svm->sev_es.ghcb_sa_free)
|
2021-11-09 22:23:50 +00:00
|
|
|
kvfree(svm->sev_es.ghcb_sa);
|
2020-12-10 17:09:40 +00:00
|
|
|
}
|
|
|
|
|
2020-12-10 17:09:47 +00:00
|
|
|
static void dump_ghcb(struct vcpu_svm *svm)
|
|
|
|
{
|
2021-10-21 17:42:59 +00:00
|
|
|
struct ghcb *ghcb = svm->sev_es.ghcb;
|
2020-12-10 17:09:47 +00:00
|
|
|
unsigned int nbits;
|
|
|
|
|
|
|
|
/* Re-use the dump_invalid_vmcb module parameter */
|
|
|
|
if (!dump_invalid_vmcb) {
|
|
|
|
pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
nbits = sizeof(ghcb->save.valid_bitmap) * 8;
|
|
|
|
|
|
|
|
pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa);
|
|
|
|
pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code",
|
|
|
|
ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb));
|
|
|
|
pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1",
|
|
|
|
ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb));
|
|
|
|
pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2",
|
|
|
|
ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb));
|
|
|
|
pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch",
|
|
|
|
ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb));
|
|
|
|
pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
|
|
|
|
{
|
|
|
|
struct kvm_vcpu *vcpu = &svm->vcpu;
|
2021-10-21 17:42:59 +00:00
|
|
|
struct ghcb *ghcb = svm->sev_es.ghcb;
|
2020-12-10 17:09:47 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The GHCB protocol so far allows for the following data
|
|
|
|
* to be returned:
|
|
|
|
* GPRs RAX, RBX, RCX, RDX
|
|
|
|
*
|
2021-01-22 23:50:47 +00:00
|
|
|
* Copy their values, even if they may not have been written during the
|
|
|
|
* VM-Exit. It's the guest's responsibility to not consume random data.
|
2020-12-10 17:09:47 +00:00
|
|
|
*/
|
2021-01-22 23:50:47 +00:00
|
|
|
ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]);
|
|
|
|
ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]);
|
|
|
|
ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]);
|
|
|
|
ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]);
|
2020-12-10 17:09:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
|
|
|
|
{
|
|
|
|
struct vmcb_control_area *control = &svm->vmcb->control;
|
|
|
|
struct kvm_vcpu *vcpu = &svm->vcpu;
|
2021-10-21 17:42:59 +00:00
|
|
|
struct ghcb *ghcb = svm->sev_es.ghcb;
|
2020-12-10 17:09:47 +00:00
|
|
|
u64 exit_code;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The GHCB protocol so far allows for the following data
|
|
|
|
* to be supplied:
|
|
|
|
* GPRs RAX, RBX, RCX, RDX
|
|
|
|
* XCR0
|
|
|
|
* CPL
|
|
|
|
*
|
|
|
|
* VMMCALL allows the guest to provide extra registers. KVM also
|
|
|
|
* expects RSI for hypercalls, so include that, too.
|
|
|
|
*
|
|
|
|
* Copy their values to the appropriate location if supplied.
|
|
|
|
*/
|
|
|
|
memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
|
|
|
|
|
|
|
|
vcpu->arch.regs[VCPU_REGS_RAX] = ghcb_get_rax_if_valid(ghcb);
|
|
|
|
vcpu->arch.regs[VCPU_REGS_RBX] = ghcb_get_rbx_if_valid(ghcb);
|
|
|
|
vcpu->arch.regs[VCPU_REGS_RCX] = ghcb_get_rcx_if_valid(ghcb);
|
|
|
|
vcpu->arch.regs[VCPU_REGS_RDX] = ghcb_get_rdx_if_valid(ghcb);
|
|
|
|
vcpu->arch.regs[VCPU_REGS_RSI] = ghcb_get_rsi_if_valid(ghcb);
|
|
|
|
|
|
|
|
svm->vmcb->save.cpl = ghcb_get_cpl_if_valid(ghcb);
|
|
|
|
|
|
|
|
if (ghcb_xcr0_is_valid(ghcb)) {
|
|
|
|
vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb);
|
|
|
|
kvm_update_cpuid_runtime(vcpu);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Copy the GHCB exit information into the VMCB fields */
|
|
|
|
exit_code = ghcb_get_sw_exit_code(ghcb);
|
|
|
|
control->exit_code = lower_32_bits(exit_code);
|
|
|
|
control->exit_code_hi = upper_32_bits(exit_code);
|
|
|
|
control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb);
|
|
|
|
control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb);
|
|
|
|
|
|
|
|
/* Clear the valid entries fields */
|
|
|
|
memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
|
|
|
|
}
|
|
|
|
|
KVM: SVM: Exit to userspace on ENOMEM/EFAULT GHCB errors
Exit to userspace if setup_vmgexit_scratch() fails due to OOM or because
copying data from guest (userspace) memory failed/faulted. The OOM
scenario is clearcut, it's userspace's decision as to whether it should
terminate the guest, free memory, etc...
As for -EFAULT, arguably, any guest issue is a violation of the guest's
contract with userspace, and thus userspace needs to decide how to
proceed. E.g. userspace defines what is RAM vs. MMIO and communicates
that directly to the guest, KVM is not involved in deciding what is/isn't
RAM nor in communicating that information to the guest. If the scratch
GPA doesn't resolve to a memslot, then the guest is not honoring the
memory configuration as defined by userspace.
And if userspace unmaps an hva for whatever reason, then exiting to
userspace with -EFAULT is absolutely the right thing to do. KVM's ABI
currently sucks and doesn't provide enough information to act on the
-EFAULT, but that will hopefully be remedied in the future as there are
multiple use cases, e.g. uffd and virtiofs truncation, that shouldn't
require any work in KVM beyond returning -EFAULT with a small amount of
metadata.
KVM could define its ABI such that failure to access the scratch area is
reflected into the guest, i.e. establish a contract with userspace, but
that's undesirable as it limits KVM's options in the future, e.g. in the
potential uffd case any failure on a uaccess needs to kick out to
userspace. KVM does have several cases where it reflects these errors
into the guest, e.g. kvm_pv_clock_pairing() and Hyper-V emulation, but
KVM would preferably "fix" those instead of propagating the falsehood
that any memory failure is the guest's fault.
Lastly, returning a boolean as an "error" for that a helper that isn't
named accordingly never works out well.
Fixes: ad5b353240c8 ("KVM: SVM: Do not terminate SEV-ES guests on GHCB validation failure")
Cc: Alper Gun <alpergun@google.com>
Cc: Peter Gonda <pgonda@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220225205209.3881130-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-02-25 20:52:09 +00:00
|
|
|
static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
|
2020-12-10 17:09:47 +00:00
|
|
|
{
|
|
|
|
struct kvm_vcpu *vcpu;
|
|
|
|
struct ghcb *ghcb;
|
2021-12-02 18:52:05 +00:00
|
|
|
u64 exit_code;
|
|
|
|
u64 reason;
|
2020-12-10 17:09:47 +00:00
|
|
|
|
2021-10-21 17:42:59 +00:00
|
|
|
ghcb = svm->sev_es.ghcb;
|
2020-12-10 17:09:47 +00:00
|
|
|
|
|
|
|
/*
|
2021-12-02 18:52:05 +00:00
|
|
|
* Retrieve the exit code now even though it may not be marked valid
|
2020-12-10 17:09:47 +00:00
|
|
|
* as it could help with debugging.
|
|
|
|
*/
|
|
|
|
exit_code = ghcb_get_sw_exit_code(ghcb);
|
|
|
|
|
2021-12-02 18:52:05 +00:00
|
|
|
/* Only GHCB Usage code 0 is supported */
|
|
|
|
if (ghcb->ghcb_usage) {
|
|
|
|
reason = GHCB_ERR_INVALID_USAGE;
|
|
|
|
goto vmgexit_err;
|
|
|
|
}
|
|
|
|
|
|
|
|
reason = GHCB_ERR_MISSING_INPUT;
|
|
|
|
|
2020-12-10 17:09:47 +00:00
|
|
|
if (!ghcb_sw_exit_code_is_valid(ghcb) ||
|
|
|
|
!ghcb_sw_exit_info_1_is_valid(ghcb) ||
|
|
|
|
!ghcb_sw_exit_info_2_is_valid(ghcb))
|
|
|
|
goto vmgexit_err;
|
|
|
|
|
|
|
|
switch (ghcb_get_sw_exit_code(ghcb)) {
|
|
|
|
case SVM_EXIT_READ_DR7:
|
|
|
|
break;
|
|
|
|
case SVM_EXIT_WRITE_DR7:
|
|
|
|
if (!ghcb_rax_is_valid(ghcb))
|
|
|
|
goto vmgexit_err;
|
|
|
|
break;
|
|
|
|
case SVM_EXIT_RDTSC:
|
|
|
|
break;
|
|
|
|
case SVM_EXIT_RDPMC:
|
|
|
|
if (!ghcb_rcx_is_valid(ghcb))
|
|
|
|
goto vmgexit_err;
|
|
|
|
break;
|
|
|
|
case SVM_EXIT_CPUID:
|
|
|
|
if (!ghcb_rax_is_valid(ghcb) ||
|
|
|
|
!ghcb_rcx_is_valid(ghcb))
|
|
|
|
goto vmgexit_err;
|
|
|
|
if (ghcb_get_rax(ghcb) == 0xd)
|
|
|
|
if (!ghcb_xcr0_is_valid(ghcb))
|
|
|
|
goto vmgexit_err;
|
|
|
|
break;
|
|
|
|
case SVM_EXIT_INVD:
|
|
|
|
break;
|
|
|
|
case SVM_EXIT_IOIO:
|
2020-12-10 17:09:54 +00:00
|
|
|
if (ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_STR_MASK) {
|
|
|
|
if (!ghcb_sw_scratch_is_valid(ghcb))
|
2020-12-10 17:09:47 +00:00
|
|
|
goto vmgexit_err;
|
2020-12-10 17:09:54 +00:00
|
|
|
} else {
|
|
|
|
if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK))
|
|
|
|
if (!ghcb_rax_is_valid(ghcb))
|
|
|
|
goto vmgexit_err;
|
|
|
|
}
|
2020-12-10 17:09:47 +00:00
|
|
|
break;
|
|
|
|
case SVM_EXIT_MSR:
|
|
|
|
if (!ghcb_rcx_is_valid(ghcb))
|
|
|
|
goto vmgexit_err;
|
|
|
|
if (ghcb_get_sw_exit_info_1(ghcb)) {
|
|
|
|
if (!ghcb_rax_is_valid(ghcb) ||
|
|
|
|
!ghcb_rdx_is_valid(ghcb))
|
|
|
|
goto vmgexit_err;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case SVM_EXIT_VMMCALL:
|
|
|
|
if (!ghcb_rax_is_valid(ghcb) ||
|
|
|
|
!ghcb_cpl_is_valid(ghcb))
|
|
|
|
goto vmgexit_err;
|
|
|
|
break;
|
|
|
|
case SVM_EXIT_RDTSCP:
|
|
|
|
break;
|
|
|
|
case SVM_EXIT_WBINVD:
|
|
|
|
break;
|
|
|
|
case SVM_EXIT_MONITOR:
|
|
|
|
if (!ghcb_rax_is_valid(ghcb) ||
|
|
|
|
!ghcb_rcx_is_valid(ghcb) ||
|
|
|
|
!ghcb_rdx_is_valid(ghcb))
|
|
|
|
goto vmgexit_err;
|
|
|
|
break;
|
|
|
|
case SVM_EXIT_MWAIT:
|
|
|
|
if (!ghcb_rax_is_valid(ghcb) ||
|
|
|
|
!ghcb_rcx_is_valid(ghcb))
|
|
|
|
goto vmgexit_err;
|
|
|
|
break;
|
2020-12-10 17:09:53 +00:00
|
|
|
case SVM_VMGEXIT_MMIO_READ:
|
|
|
|
case SVM_VMGEXIT_MMIO_WRITE:
|
|
|
|
if (!ghcb_sw_scratch_is_valid(ghcb))
|
|
|
|
goto vmgexit_err;
|
|
|
|
break;
|
2020-12-14 16:16:03 +00:00
|
|
|
case SVM_VMGEXIT_NMI_COMPLETE:
|
KVM: SVM: Add support for booting APs in an SEV-ES guest
Typically under KVM, an AP is booted using the INIT-SIPI-SIPI sequence,
where the guest vCPU register state is updated and then the vCPU is VMRUN
to begin execution of the AP. For an SEV-ES guest, this won't work because
the guest register state is encrypted.
Following the GHCB specification, the hypervisor must not alter the guest
register state, so KVM must track an AP/vCPU boot. Should the guest want
to park the AP, it must use the AP Reset Hold exit event in place of, for
example, a HLT loop.
First AP boot (first INIT-SIPI-SIPI sequence):
Execute the AP (vCPU) as it was initialized and measured by the SEV-ES
support. It is up to the guest to transfer control of the AP to the
proper location.
Subsequent AP boot:
KVM will expect to receive an AP Reset Hold exit event indicating that
the vCPU is being parked and will require an INIT-SIPI-SIPI sequence to
awaken it. When the AP Reset Hold exit event is received, KVM will place
the vCPU into a simulated HLT mode. Upon receiving the INIT-SIPI-SIPI
sequence, KVM will make the vCPU runnable. It is again up to the guest
to then transfer control of the AP to the proper location.
To differentiate between an actual HLT and an AP Reset Hold, a new MP
state is introduced, KVM_MP_STATE_AP_RESET_HOLD, which the vCPU is
placed in upon receiving the AP Reset Hold exit event. Additionally, to
communicate the AP Reset Hold exit event up to userspace (if needed), a
new exit reason is introduced, KVM_EXIT_AP_RESET_HOLD.
A new x86 ops function is introduced, vcpu_deliver_sipi_vector, in order
to accomplish AP booting. For VMX, vcpu_deliver_sipi_vector is set to the
original SIPI delivery function, kvm_vcpu_deliver_sipi_vector(). SVM adds
a new function that, for non SEV-ES guests, invokes the original SIPI
delivery function, kvm_vcpu_deliver_sipi_vector(), but for SEV-ES guests,
implements the logic above.
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Message-Id: <e8fbebe8eb161ceaabdad7c01a5859a78b424d5e.1609791600.git.thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2021-01-04 20:20:01 +00:00
|
|
|
case SVM_VMGEXIT_AP_HLT_LOOP:
|
2020-12-15 17:44:07 +00:00
|
|
|
case SVM_VMGEXIT_AP_JUMP_TABLE:
|
2020-12-10 17:09:47 +00:00
|
|
|
case SVM_VMGEXIT_UNSUPPORTED_EVENT:
|
|
|
|
break;
|
|
|
|
default:
|
2021-12-02 18:52:05 +00:00
|
|
|
reason = GHCB_ERR_INVALID_EVENT;
|
2020-12-10 17:09:47 +00:00
|
|
|
goto vmgexit_err;
|
|
|
|
}
|
|
|
|
|
KVM: SVM: Exit to userspace on ENOMEM/EFAULT GHCB errors
Exit to userspace if setup_vmgexit_scratch() fails due to OOM or because
copying data from guest (userspace) memory failed/faulted. The OOM
scenario is clearcut, it's userspace's decision as to whether it should
terminate the guest, free memory, etc...
As for -EFAULT, arguably, any guest issue is a violation of the guest's
contract with userspace, and thus userspace needs to decide how to
proceed. E.g. userspace defines what is RAM vs. MMIO and communicates
that directly to the guest, KVM is not involved in deciding what is/isn't
RAM nor in communicating that information to the guest. If the scratch
GPA doesn't resolve to a memslot, then the guest is not honoring the
memory configuration as defined by userspace.
And if userspace unmaps an hva for whatever reason, then exiting to
userspace with -EFAULT is absolutely the right thing to do. KVM's ABI
currently sucks and doesn't provide enough information to act on the
-EFAULT, but that will hopefully be remedied in the future as there are
multiple use cases, e.g. uffd and virtiofs truncation, that shouldn't
require any work in KVM beyond returning -EFAULT with a small amount of
metadata.
KVM could define its ABI such that failure to access the scratch area is
reflected into the guest, i.e. establish a contract with userspace, but
that's undesirable as it limits KVM's options in the future, e.g. in the
potential uffd case any failure on a uaccess needs to kick out to
userspace. KVM does have several cases where it reflects these errors
into the guest, e.g. kvm_pv_clock_pairing() and Hyper-V emulation, but
KVM would preferably "fix" those instead of propagating the falsehood
that any memory failure is the guest's fault.
Lastly, returning a boolean as an "error" for that a helper that isn't
named accordingly never works out well.
Fixes: ad5b353240c8 ("KVM: SVM: Do not terminate SEV-ES guests on GHCB validation failure")
Cc: Alper Gun <alpergun@google.com>
Cc: Peter Gonda <pgonda@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220225205209.3881130-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-02-25 20:52:09 +00:00
|
|
|
return 0;
|
2020-12-10 17:09:47 +00:00
|
|
|
|
|
|
|
vmgexit_err:
|
|
|
|
vcpu = &svm->vcpu;
|
|
|
|
|
2021-12-02 18:52:05 +00:00
|
|
|
if (reason == GHCB_ERR_INVALID_USAGE) {
|
2020-12-10 17:09:47 +00:00
|
|
|
vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n",
|
|
|
|
ghcb->ghcb_usage);
|
2021-12-02 18:52:05 +00:00
|
|
|
} else if (reason == GHCB_ERR_INVALID_EVENT) {
|
|
|
|
vcpu_unimpl(vcpu, "vmgexit: exit code %#llx is not valid\n",
|
|
|
|
exit_code);
|
2020-12-10 17:09:47 +00:00
|
|
|
} else {
|
2021-12-02 18:52:05 +00:00
|
|
|
vcpu_unimpl(vcpu, "vmgexit: exit code %#llx input is not valid\n",
|
2020-12-10 17:09:47 +00:00
|
|
|
exit_code);
|
|
|
|
dump_ghcb(svm);
|
|
|
|
}
|
|
|
|
|
2021-12-02 18:52:05 +00:00
|
|
|
/* Clear the valid entries fields */
|
|
|
|
memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
|
|
|
|
|
|
|
|
ghcb_set_sw_exit_info_1(ghcb, 2);
|
|
|
|
ghcb_set_sw_exit_info_2(ghcb, reason);
|
2020-12-10 17:09:47 +00:00
|
|
|
|
KVM: SVM: Exit to userspace on ENOMEM/EFAULT GHCB errors
Exit to userspace if setup_vmgexit_scratch() fails due to OOM or because
copying data from guest (userspace) memory failed/faulted. The OOM
scenario is clearcut, it's userspace's decision as to whether it should
terminate the guest, free memory, etc...
As for -EFAULT, arguably, any guest issue is a violation of the guest's
contract with userspace, and thus userspace needs to decide how to
proceed. E.g. userspace defines what is RAM vs. MMIO and communicates
that directly to the guest, KVM is not involved in deciding what is/isn't
RAM nor in communicating that information to the guest. If the scratch
GPA doesn't resolve to a memslot, then the guest is not honoring the
memory configuration as defined by userspace.
And if userspace unmaps an hva for whatever reason, then exiting to
userspace with -EFAULT is absolutely the right thing to do. KVM's ABI
currently sucks and doesn't provide enough information to act on the
-EFAULT, but that will hopefully be remedied in the future as there are
multiple use cases, e.g. uffd and virtiofs truncation, that shouldn't
require any work in KVM beyond returning -EFAULT with a small amount of
metadata.
KVM could define its ABI such that failure to access the scratch area is
reflected into the guest, i.e. establish a contract with userspace, but
that's undesirable as it limits KVM's options in the future, e.g. in the
potential uffd case any failure on a uaccess needs to kick out to
userspace. KVM does have several cases where it reflects these errors
into the guest, e.g. kvm_pv_clock_pairing() and Hyper-V emulation, but
KVM would preferably "fix" those instead of propagating the falsehood
that any memory failure is the guest's fault.
Lastly, returning a boolean as an "error" for that a helper that isn't
named accordingly never works out well.
Fixes: ad5b353240c8 ("KVM: SVM: Do not terminate SEV-ES guests on GHCB validation failure")
Cc: Alper Gun <alpergun@google.com>
Cc: Peter Gonda <pgonda@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220225205209.3881130-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-02-25 20:52:09 +00:00
|
|
|
/* Resume the guest to "return" the error code. */
|
|
|
|
return 1;
|
2020-12-10 17:09:47 +00:00
|
|
|
}
|
|
|
|
|
2021-05-06 20:14:41 +00:00
|
|
|
void sev_es_unmap_ghcb(struct vcpu_svm *svm)
|
2020-12-10 17:09:47 +00:00
|
|
|
{
|
2021-10-21 17:42:59 +00:00
|
|
|
if (!svm->sev_es.ghcb)
|
2020-12-10 17:09:47 +00:00
|
|
|
return;
|
|
|
|
|
2021-10-21 17:42:59 +00:00
|
|
|
if (svm->sev_es.ghcb_sa_free) {
|
2020-12-10 17:09:53 +00:00
|
|
|
/*
|
|
|
|
* The scratch area lives outside the GHCB, so there is a
|
|
|
|
* buffer that, depending on the operation performed, may
|
|
|
|
* need to be synced, then freed.
|
|
|
|
*/
|
2021-10-21 17:42:59 +00:00
|
|
|
if (svm->sev_es.ghcb_sa_sync) {
|
2020-12-10 17:09:53 +00:00
|
|
|
kvm_write_guest(svm->vcpu.kvm,
|
2021-10-21 17:42:59 +00:00
|
|
|
ghcb_get_sw_scratch(svm->sev_es.ghcb),
|
|
|
|
svm->sev_es.ghcb_sa,
|
|
|
|
svm->sev_es.ghcb_sa_len);
|
|
|
|
svm->sev_es.ghcb_sa_sync = false;
|
2020-12-10 17:09:53 +00:00
|
|
|
}
|
|
|
|
|
2021-11-09 22:23:50 +00:00
|
|
|
kvfree(svm->sev_es.ghcb_sa);
|
2021-10-21 17:42:59 +00:00
|
|
|
svm->sev_es.ghcb_sa = NULL;
|
|
|
|
svm->sev_es.ghcb_sa_free = false;
|
2020-12-10 17:09:53 +00:00
|
|
|
}
|
|
|
|
|
2021-10-21 17:42:59 +00:00
|
|
|
trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->sev_es.ghcb);
|
2020-12-10 17:09:48 +00:00
|
|
|
|
2020-12-10 17:09:47 +00:00
|
|
|
sev_es_sync_to_ghcb(svm);
|
|
|
|
|
2021-10-21 17:42:59 +00:00
|
|
|
kvm_vcpu_unmap(&svm->vcpu, &svm->sev_es.ghcb_map, true);
|
|
|
|
svm->sev_es.ghcb = NULL;
|
2020-12-10 17:09:47 +00:00
|
|
|
}
|
|
|
|
|
2020-03-24 09:41:54 +00:00
|
|
|
void pre_sev_run(struct vcpu_svm *svm, int cpu)
|
|
|
|
{
|
2022-11-09 14:07:55 +00:00
|
|
|
struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
|
2020-03-24 09:41:54 +00:00
|
|
|
int asid = sev_get_asid(svm->vcpu.kvm);
|
|
|
|
|
|
|
|
/* Assign the asid allocated with this SEV guest */
|
2020-11-30 14:39:59 +00:00
|
|
|
svm->asid = asid;
|
2020-03-24 09:41:54 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Flush guest TLB:
|
|
|
|
*
|
|
|
|
* 1) when different VMCB for the same ASID is to be run on the same host CPU.
|
|
|
|
* 2) or this VMCB was executed on different host CPU in previous VMRUNs.
|
|
|
|
*/
|
|
|
|
if (sd->sev_vmcbs[asid] == svm->vmcb &&
|
2020-06-03 23:56:22 +00:00
|
|
|
svm->vcpu.arch.last_vmentry_cpu == cpu)
|
2020-03-24 09:41:54 +00:00
|
|
|
return;
|
|
|
|
|
|
|
|
sd->sev_vmcbs[asid] = svm->vmcb;
|
|
|
|
svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
|
2020-06-25 08:03:23 +00:00
|
|
|
vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
|
2020-03-24 09:41:54 +00:00
|
|
|
}
|
2020-12-10 17:09:47 +00:00
|
|
|
|
2020-12-10 17:09:53 +00:00
|
|
|
#define GHCB_SCRATCH_AREA_LIMIT (16ULL * PAGE_SIZE)
|
KVM: SVM: Exit to userspace on ENOMEM/EFAULT GHCB errors
Exit to userspace if setup_vmgexit_scratch() fails due to OOM or because
copying data from guest (userspace) memory failed/faulted. The OOM
scenario is clearcut, it's userspace's decision as to whether it should
terminate the guest, free memory, etc...
As for -EFAULT, arguably, any guest issue is a violation of the guest's
contract with userspace, and thus userspace needs to decide how to
proceed. E.g. userspace defines what is RAM vs. MMIO and communicates
that directly to the guest, KVM is not involved in deciding what is/isn't
RAM nor in communicating that information to the guest. If the scratch
GPA doesn't resolve to a memslot, then the guest is not honoring the
memory configuration as defined by userspace.
And if userspace unmaps an hva for whatever reason, then exiting to
userspace with -EFAULT is absolutely the right thing to do. KVM's ABI
currently sucks and doesn't provide enough information to act on the
-EFAULT, but that will hopefully be remedied in the future as there are
multiple use cases, e.g. uffd and virtiofs truncation, that shouldn't
require any work in KVM beyond returning -EFAULT with a small amount of
metadata.
KVM could define its ABI such that failure to access the scratch area is
reflected into the guest, i.e. establish a contract with userspace, but
that's undesirable as it limits KVM's options in the future, e.g. in the
potential uffd case any failure on a uaccess needs to kick out to
userspace. KVM does have several cases where it reflects these errors
into the guest, e.g. kvm_pv_clock_pairing() and Hyper-V emulation, but
KVM would preferably "fix" those instead of propagating the falsehood
that any memory failure is the guest's fault.
Lastly, returning a boolean as an "error" for that a helper that isn't
named accordingly never works out well.
Fixes: ad5b353240c8 ("KVM: SVM: Do not terminate SEV-ES guests on GHCB validation failure")
Cc: Alper Gun <alpergun@google.com>
Cc: Peter Gonda <pgonda@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220225205209.3881130-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-02-25 20:52:09 +00:00
|
|
|
static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
|
2020-12-10 17:09:53 +00:00
|
|
|
{
|
|
|
|
struct vmcb_control_area *control = &svm->vmcb->control;
|
2021-10-21 17:42:59 +00:00
|
|
|
struct ghcb *ghcb = svm->sev_es.ghcb;
|
2020-12-10 17:09:53 +00:00
|
|
|
u64 ghcb_scratch_beg, ghcb_scratch_end;
|
|
|
|
u64 scratch_gpa_beg, scratch_gpa_end;
|
|
|
|
void *scratch_va;
|
|
|
|
|
|
|
|
scratch_gpa_beg = ghcb_get_sw_scratch(ghcb);
|
|
|
|
if (!scratch_gpa_beg) {
|
|
|
|
pr_err("vmgexit: scratch gpa not provided\n");
|
2021-12-02 18:52:05 +00:00
|
|
|
goto e_scratch;
|
2020-12-10 17:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
scratch_gpa_end = scratch_gpa_beg + len;
|
|
|
|
if (scratch_gpa_end < scratch_gpa_beg) {
|
|
|
|
pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n",
|
|
|
|
len, scratch_gpa_beg);
|
2021-12-02 18:52:05 +00:00
|
|
|
goto e_scratch;
|
2020-12-10 17:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) {
|
|
|
|
/* Scratch area begins within GHCB */
|
|
|
|
ghcb_scratch_beg = control->ghcb_gpa +
|
|
|
|
offsetof(struct ghcb, shared_buffer);
|
|
|
|
ghcb_scratch_end = control->ghcb_gpa +
|
2022-10-24 16:44:48 +00:00
|
|
|
offsetof(struct ghcb, reserved_0xff0);
|
2020-12-10 17:09:53 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If the scratch area begins within the GHCB, it must be
|
|
|
|
* completely contained in the GHCB shared buffer area.
|
|
|
|
*/
|
|
|
|
if (scratch_gpa_beg < ghcb_scratch_beg ||
|
|
|
|
scratch_gpa_end > ghcb_scratch_end) {
|
|
|
|
pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n",
|
|
|
|
scratch_gpa_beg, scratch_gpa_end);
|
2021-12-02 18:52:05 +00:00
|
|
|
goto e_scratch;
|
2020-12-10 17:09:53 +00:00
|
|
|
}
|
|
|
|
|
2021-10-21 17:42:59 +00:00
|
|
|
scratch_va = (void *)svm->sev_es.ghcb;
|
2020-12-10 17:09:53 +00:00
|
|
|
scratch_va += (scratch_gpa_beg - control->ghcb_gpa);
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* The guest memory must be read into a kernel buffer, so
|
|
|
|
* limit the size
|
|
|
|
*/
|
|
|
|
if (len > GHCB_SCRATCH_AREA_LIMIT) {
|
|
|
|
pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n",
|
|
|
|
len, GHCB_SCRATCH_AREA_LIMIT);
|
2021-12-02 18:52:05 +00:00
|
|
|
goto e_scratch;
|
2020-12-10 17:09:53 +00:00
|
|
|
}
|
2021-11-09 22:23:50 +00:00
|
|
|
scratch_va = kvzalloc(len, GFP_KERNEL_ACCOUNT);
|
2020-12-10 17:09:53 +00:00
|
|
|
if (!scratch_va)
|
KVM: SVM: Exit to userspace on ENOMEM/EFAULT GHCB errors
Exit to userspace if setup_vmgexit_scratch() fails due to OOM or because
copying data from guest (userspace) memory failed/faulted. The OOM
scenario is clearcut, it's userspace's decision as to whether it should
terminate the guest, free memory, etc...
As for -EFAULT, arguably, any guest issue is a violation of the guest's
contract with userspace, and thus userspace needs to decide how to
proceed. E.g. userspace defines what is RAM vs. MMIO and communicates
that directly to the guest, KVM is not involved in deciding what is/isn't
RAM nor in communicating that information to the guest. If the scratch
GPA doesn't resolve to a memslot, then the guest is not honoring the
memory configuration as defined by userspace.
And if userspace unmaps an hva for whatever reason, then exiting to
userspace with -EFAULT is absolutely the right thing to do. KVM's ABI
currently sucks and doesn't provide enough information to act on the
-EFAULT, but that will hopefully be remedied in the future as there are
multiple use cases, e.g. uffd and virtiofs truncation, that shouldn't
require any work in KVM beyond returning -EFAULT with a small amount of
metadata.
KVM could define its ABI such that failure to access the scratch area is
reflected into the guest, i.e. establish a contract with userspace, but
that's undesirable as it limits KVM's options in the future, e.g. in the
potential uffd case any failure on a uaccess needs to kick out to
userspace. KVM does have several cases where it reflects these errors
into the guest, e.g. kvm_pv_clock_pairing() and Hyper-V emulation, but
KVM would preferably "fix" those instead of propagating the falsehood
that any memory failure is the guest's fault.
Lastly, returning a boolean as an "error" for that a helper that isn't
named accordingly never works out well.
Fixes: ad5b353240c8 ("KVM: SVM: Do not terminate SEV-ES guests on GHCB validation failure")
Cc: Alper Gun <alpergun@google.com>
Cc: Peter Gonda <pgonda@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220225205209.3881130-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-02-25 20:52:09 +00:00
|
|
|
return -ENOMEM;
|
2020-12-10 17:09:53 +00:00
|
|
|
|
|
|
|
if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) {
|
|
|
|
/* Unable to copy scratch area from guest */
|
|
|
|
pr_err("vmgexit: kvm_read_guest for scratch area failed\n");
|
|
|
|
|
2021-11-09 22:23:50 +00:00
|
|
|
kvfree(scratch_va);
|
KVM: SVM: Exit to userspace on ENOMEM/EFAULT GHCB errors
Exit to userspace if setup_vmgexit_scratch() fails due to OOM or because
copying data from guest (userspace) memory failed/faulted. The OOM
scenario is clearcut, it's userspace's decision as to whether it should
terminate the guest, free memory, etc...
As for -EFAULT, arguably, any guest issue is a violation of the guest's
contract with userspace, and thus userspace needs to decide how to
proceed. E.g. userspace defines what is RAM vs. MMIO and communicates
that directly to the guest, KVM is not involved in deciding what is/isn't
RAM nor in communicating that information to the guest. If the scratch
GPA doesn't resolve to a memslot, then the guest is not honoring the
memory configuration as defined by userspace.
And if userspace unmaps an hva for whatever reason, then exiting to
userspace with -EFAULT is absolutely the right thing to do. KVM's ABI
currently sucks and doesn't provide enough information to act on the
-EFAULT, but that will hopefully be remedied in the future as there are
multiple use cases, e.g. uffd and virtiofs truncation, that shouldn't
require any work in KVM beyond returning -EFAULT with a small amount of
metadata.
KVM could define its ABI such that failure to access the scratch area is
reflected into the guest, i.e. establish a contract with userspace, but
that's undesirable as it limits KVM's options in the future, e.g. in the
potential uffd case any failure on a uaccess needs to kick out to
userspace. KVM does have several cases where it reflects these errors
into the guest, e.g. kvm_pv_clock_pairing() and Hyper-V emulation, but
KVM would preferably "fix" those instead of propagating the falsehood
that any memory failure is the guest's fault.
Lastly, returning a boolean as an "error" for that a helper that isn't
named accordingly never works out well.
Fixes: ad5b353240c8 ("KVM: SVM: Do not terminate SEV-ES guests on GHCB validation failure")
Cc: Alper Gun <alpergun@google.com>
Cc: Peter Gonda <pgonda@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220225205209.3881130-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-02-25 20:52:09 +00:00
|
|
|
return -EFAULT;
|
2020-12-10 17:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The scratch area is outside the GHCB. The operation will
|
|
|
|
* dictate whether the buffer needs to be synced before running
|
|
|
|
* the vCPU next time (i.e. a read was requested so the data
|
|
|
|
* must be written back to the guest memory).
|
|
|
|
*/
|
2021-10-21 17:42:59 +00:00
|
|
|
svm->sev_es.ghcb_sa_sync = sync;
|
|
|
|
svm->sev_es.ghcb_sa_free = true;
|
2020-12-10 17:09:53 +00:00
|
|
|
}
|
|
|
|
|
2021-10-21 17:42:59 +00:00
|
|
|
svm->sev_es.ghcb_sa = scratch_va;
|
|
|
|
svm->sev_es.ghcb_sa_len = len;
|
2020-12-10 17:09:53 +00:00
|
|
|
|
KVM: SVM: Exit to userspace on ENOMEM/EFAULT GHCB errors
Exit to userspace if setup_vmgexit_scratch() fails due to OOM or because
copying data from guest (userspace) memory failed/faulted. The OOM
scenario is clearcut, it's userspace's decision as to whether it should
terminate the guest, free memory, etc...
As for -EFAULT, arguably, any guest issue is a violation of the guest's
contract with userspace, and thus userspace needs to decide how to
proceed. E.g. userspace defines what is RAM vs. MMIO and communicates
that directly to the guest, KVM is not involved in deciding what is/isn't
RAM nor in communicating that information to the guest. If the scratch
GPA doesn't resolve to a memslot, then the guest is not honoring the
memory configuration as defined by userspace.
And if userspace unmaps an hva for whatever reason, then exiting to
userspace with -EFAULT is absolutely the right thing to do. KVM's ABI
currently sucks and doesn't provide enough information to act on the
-EFAULT, but that will hopefully be remedied in the future as there are
multiple use cases, e.g. uffd and virtiofs truncation, that shouldn't
require any work in KVM beyond returning -EFAULT with a small amount of
metadata.
KVM could define its ABI such that failure to access the scratch area is
reflected into the guest, i.e. establish a contract with userspace, but
that's undesirable as it limits KVM's options in the future, e.g. in the
potential uffd case any failure on a uaccess needs to kick out to
userspace. KVM does have several cases where it reflects these errors
into the guest, e.g. kvm_pv_clock_pairing() and Hyper-V emulation, but
KVM would preferably "fix" those instead of propagating the falsehood
that any memory failure is the guest's fault.
Lastly, returning a boolean as an "error" for that a helper that isn't
named accordingly never works out well.
Fixes: ad5b353240c8 ("KVM: SVM: Do not terminate SEV-ES guests on GHCB validation failure")
Cc: Alper Gun <alpergun@google.com>
Cc: Peter Gonda <pgonda@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220225205209.3881130-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-02-25 20:52:09 +00:00
|
|
|
return 0;
|
2021-12-02 18:52:05 +00:00
|
|
|
|
|
|
|
e_scratch:
|
|
|
|
ghcb_set_sw_exit_info_1(ghcb, 2);
|
|
|
|
ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_SCRATCH_AREA);
|
|
|
|
|
KVM: SVM: Exit to userspace on ENOMEM/EFAULT GHCB errors
Exit to userspace if setup_vmgexit_scratch() fails due to OOM or because
copying data from guest (userspace) memory failed/faulted. The OOM
scenario is clearcut, it's userspace's decision as to whether it should
terminate the guest, free memory, etc...
As for -EFAULT, arguably, any guest issue is a violation of the guest's
contract with userspace, and thus userspace needs to decide how to
proceed. E.g. userspace defines what is RAM vs. MMIO and communicates
that directly to the guest, KVM is not involved in deciding what is/isn't
RAM nor in communicating that information to the guest. If the scratch
GPA doesn't resolve to a memslot, then the guest is not honoring the
memory configuration as defined by userspace.
And if userspace unmaps an hva for whatever reason, then exiting to
userspace with -EFAULT is absolutely the right thing to do. KVM's ABI
currently sucks and doesn't provide enough information to act on the
-EFAULT, but that will hopefully be remedied in the future as there are
multiple use cases, e.g. uffd and virtiofs truncation, that shouldn't
require any work in KVM beyond returning -EFAULT with a small amount of
metadata.
KVM could define its ABI such that failure to access the scratch area is
reflected into the guest, i.e. establish a contract with userspace, but
that's undesirable as it limits KVM's options in the future, e.g. in the
potential uffd case any failure on a uaccess needs to kick out to
userspace. KVM does have several cases where it reflects these errors
into the guest, e.g. kvm_pv_clock_pairing() and Hyper-V emulation, but
KVM would preferably "fix" those instead of propagating the falsehood
that any memory failure is the guest's fault.
Lastly, returning a boolean as an "error" for that a helper that isn't
named accordingly never works out well.
Fixes: ad5b353240c8 ("KVM: SVM: Do not terminate SEV-ES guests on GHCB validation failure")
Cc: Alper Gun <alpergun@google.com>
Cc: Peter Gonda <pgonda@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220225205209.3881130-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-02-25 20:52:09 +00:00
|
|
|
return 1;
|
2020-12-10 17:09:53 +00:00
|
|
|
}
|
|
|
|
|
2020-12-10 17:09:50 +00:00
|
|
|
static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask,
|
|
|
|
unsigned int pos)
|
|
|
|
{
|
|
|
|
svm->vmcb->control.ghcb_gpa &= ~(mask << pos);
|
|
|
|
svm->vmcb->control.ghcb_gpa |= (value & mask) << pos;
|
|
|
|
}
|
|
|
|
|
|
|
|
static u64 get_ghcb_msr_bits(struct vcpu_svm *svm, u64 mask, unsigned int pos)
|
|
|
|
{
|
|
|
|
return (svm->vmcb->control.ghcb_gpa >> pos) & mask;
|
|
|
|
}
|
|
|
|
|
2020-12-10 17:09:49 +00:00
|
|
|
static void set_ghcb_msr(struct vcpu_svm *svm, u64 value)
|
|
|
|
{
|
|
|
|
svm->vmcb->control.ghcb_gpa = value;
|
|
|
|
}
|
|
|
|
|
2020-12-10 17:09:47 +00:00
|
|
|
static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
|
|
|
|
{
|
2020-12-10 17:09:49 +00:00
|
|
|
struct vmcb_control_area *control = &svm->vmcb->control;
|
2020-12-10 17:09:50 +00:00
|
|
|
struct kvm_vcpu *vcpu = &svm->vcpu;
|
2020-12-10 17:09:49 +00:00
|
|
|
u64 ghcb_info;
|
2020-12-10 17:09:50 +00:00
|
|
|
int ret = 1;
|
2020-12-10 17:09:49 +00:00
|
|
|
|
|
|
|
ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK;
|
|
|
|
|
2020-12-10 17:09:52 +00:00
|
|
|
trace_kvm_vmgexit_msr_protocol_enter(svm->vcpu.vcpu_id,
|
|
|
|
control->ghcb_gpa);
|
|
|
|
|
2020-12-10 17:09:49 +00:00
|
|
|
switch (ghcb_info) {
|
|
|
|
case GHCB_MSR_SEV_INFO_REQ:
|
|
|
|
set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
|
|
|
|
GHCB_VERSION_MIN,
|
|
|
|
sev_enc_bit));
|
|
|
|
break;
|
2020-12-10 17:09:50 +00:00
|
|
|
case GHCB_MSR_CPUID_REQ: {
|
|
|
|
u64 cpuid_fn, cpuid_reg, cpuid_value;
|
|
|
|
|
|
|
|
cpuid_fn = get_ghcb_msr_bits(svm,
|
|
|
|
GHCB_MSR_CPUID_FUNC_MASK,
|
|
|
|
GHCB_MSR_CPUID_FUNC_POS);
|
|
|
|
|
|
|
|
/* Initialize the registers needed by the CPUID intercept */
|
|
|
|
vcpu->arch.regs[VCPU_REGS_RAX] = cpuid_fn;
|
|
|
|
vcpu->arch.regs[VCPU_REGS_RCX] = 0;
|
|
|
|
|
2021-03-02 19:40:39 +00:00
|
|
|
ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_CPUID);
|
2020-12-10 17:09:50 +00:00
|
|
|
if (!ret) {
|
2021-12-02 18:52:05 +00:00
|
|
|
/* Error, keep GHCB MSR value as-is */
|
2020-12-10 17:09:50 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
cpuid_reg = get_ghcb_msr_bits(svm,
|
|
|
|
GHCB_MSR_CPUID_REG_MASK,
|
|
|
|
GHCB_MSR_CPUID_REG_POS);
|
|
|
|
if (cpuid_reg == 0)
|
|
|
|
cpuid_value = vcpu->arch.regs[VCPU_REGS_RAX];
|
|
|
|
else if (cpuid_reg == 1)
|
|
|
|
cpuid_value = vcpu->arch.regs[VCPU_REGS_RBX];
|
|
|
|
else if (cpuid_reg == 2)
|
|
|
|
cpuid_value = vcpu->arch.regs[VCPU_REGS_RCX];
|
|
|
|
else
|
|
|
|
cpuid_value = vcpu->arch.regs[VCPU_REGS_RDX];
|
|
|
|
|
|
|
|
set_ghcb_msr_bits(svm, cpuid_value,
|
|
|
|
GHCB_MSR_CPUID_VALUE_MASK,
|
|
|
|
GHCB_MSR_CPUID_VALUE_POS);
|
|
|
|
|
|
|
|
set_ghcb_msr_bits(svm, GHCB_MSR_CPUID_RESP,
|
|
|
|
GHCB_MSR_INFO_MASK,
|
|
|
|
GHCB_MSR_INFO_POS);
|
|
|
|
break;
|
|
|
|
}
|
2020-12-10 17:09:51 +00:00
|
|
|
case GHCB_MSR_TERM_REQ: {
|
|
|
|
u64 reason_set, reason_code;
|
|
|
|
|
|
|
|
reason_set = get_ghcb_msr_bits(svm,
|
|
|
|
GHCB_MSR_TERM_REASON_SET_MASK,
|
|
|
|
GHCB_MSR_TERM_REASON_SET_POS);
|
|
|
|
reason_code = get_ghcb_msr_bits(svm,
|
|
|
|
GHCB_MSR_TERM_REASON_MASK,
|
|
|
|
GHCB_MSR_TERM_REASON_POS);
|
|
|
|
pr_info("SEV-ES guest requested termination: %#llx:%#llx\n",
|
|
|
|
reason_set, reason_code);
|
2021-12-02 18:52:05 +00:00
|
|
|
|
2022-04-07 21:02:33 +00:00
|
|
|
vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
|
2022-04-29 10:38:56 +00:00
|
|
|
vcpu->run->system_event.type = KVM_SYSTEM_EVENT_SEV_TERM;
|
2022-04-07 21:02:33 +00:00
|
|
|
vcpu->run->system_event.ndata = 1;
|
2022-04-29 10:38:56 +00:00
|
|
|
vcpu->run->system_event.data[0] = control->ghcb_gpa;
|
2022-04-07 21:02:33 +00:00
|
|
|
|
|
|
|
return 0;
|
2020-12-10 17:09:51 +00:00
|
|
|
}
|
2020-12-10 17:09:49 +00:00
|
|
|
default:
|
2021-12-02 18:52:05 +00:00
|
|
|
/* Error, keep GHCB MSR value as-is */
|
|
|
|
break;
|
2020-12-10 17:09:49 +00:00
|
|
|
}
|
|
|
|
|
2020-12-10 17:09:52 +00:00
|
|
|
trace_kvm_vmgexit_msr_protocol_exit(svm->vcpu.vcpu_id,
|
|
|
|
control->ghcb_gpa, ret);
|
|
|
|
|
2020-12-10 17:09:50 +00:00
|
|
|
return ret;
|
2020-12-10 17:09:47 +00:00
|
|
|
}
|
|
|
|
|
2021-03-02 19:40:39 +00:00
|
|
|
int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
|
2020-12-10 17:09:47 +00:00
|
|
|
{
|
2021-03-02 19:40:39 +00:00
|
|
|
struct vcpu_svm *svm = to_svm(vcpu);
|
2020-12-10 17:09:47 +00:00
|
|
|
struct vmcb_control_area *control = &svm->vmcb->control;
|
|
|
|
u64 ghcb_gpa, exit_code;
|
|
|
|
struct ghcb *ghcb;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* Validate the GHCB */
|
|
|
|
ghcb_gpa = control->ghcb_gpa;
|
|
|
|
if (ghcb_gpa & GHCB_MSR_INFO_MASK)
|
|
|
|
return sev_handle_vmgexit_msr_protocol(svm);
|
|
|
|
|
|
|
|
if (!ghcb_gpa) {
|
2021-03-02 19:40:39 +00:00
|
|
|
vcpu_unimpl(vcpu, "vmgexit: GHCB gpa is not set\n");
|
2021-12-02 18:52:05 +00:00
|
|
|
|
|
|
|
/* Without a GHCB, just return right back to the guest */
|
|
|
|
return 1;
|
2020-12-10 17:09:47 +00:00
|
|
|
}
|
|
|
|
|
2021-10-21 17:42:59 +00:00
|
|
|
if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->sev_es.ghcb_map)) {
|
2020-12-10 17:09:47 +00:00
|
|
|
/* Unable to map GHCB from guest */
|
2021-03-02 19:40:39 +00:00
|
|
|
vcpu_unimpl(vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n",
|
2020-12-10 17:09:47 +00:00
|
|
|
ghcb_gpa);
|
2021-12-02 18:52:05 +00:00
|
|
|
|
|
|
|
/* Without a GHCB, just return right back to the guest */
|
|
|
|
return 1;
|
2020-12-10 17:09:47 +00:00
|
|
|
}
|
|
|
|
|
2021-10-21 17:42:59 +00:00
|
|
|
svm->sev_es.ghcb = svm->sev_es.ghcb_map.hva;
|
|
|
|
ghcb = svm->sev_es.ghcb_map.hva;
|
2020-12-10 17:09:47 +00:00
|
|
|
|
2021-03-02 19:40:39 +00:00
|
|
|
trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb);
|
2020-12-10 17:09:48 +00:00
|
|
|
|
2020-12-10 17:09:47 +00:00
|
|
|
exit_code = ghcb_get_sw_exit_code(ghcb);
|
|
|
|
|
KVM: SVM: Exit to userspace on ENOMEM/EFAULT GHCB errors
Exit to userspace if setup_vmgexit_scratch() fails due to OOM or because
copying data from guest (userspace) memory failed/faulted. The OOM
scenario is clearcut, it's userspace's decision as to whether it should
terminate the guest, free memory, etc...
As for -EFAULT, arguably, any guest issue is a violation of the guest's
contract with userspace, and thus userspace needs to decide how to
proceed. E.g. userspace defines what is RAM vs. MMIO and communicates
that directly to the guest, KVM is not involved in deciding what is/isn't
RAM nor in communicating that information to the guest. If the scratch
GPA doesn't resolve to a memslot, then the guest is not honoring the
memory configuration as defined by userspace.
And if userspace unmaps an hva for whatever reason, then exiting to
userspace with -EFAULT is absolutely the right thing to do. KVM's ABI
currently sucks and doesn't provide enough information to act on the
-EFAULT, but that will hopefully be remedied in the future as there are
multiple use cases, e.g. uffd and virtiofs truncation, that shouldn't
require any work in KVM beyond returning -EFAULT with a small amount of
metadata.
KVM could define its ABI such that failure to access the scratch area is
reflected into the guest, i.e. establish a contract with userspace, but
that's undesirable as it limits KVM's options in the future, e.g. in the
potential uffd case any failure on a uaccess needs to kick out to
userspace. KVM does have several cases where it reflects these errors
into the guest, e.g. kvm_pv_clock_pairing() and Hyper-V emulation, but
KVM would preferably "fix" those instead of propagating the falsehood
that any memory failure is the guest's fault.
Lastly, returning a boolean as an "error" for that a helper that isn't
named accordingly never works out well.
Fixes: ad5b353240c8 ("KVM: SVM: Do not terminate SEV-ES guests on GHCB validation failure")
Cc: Alper Gun <alpergun@google.com>
Cc: Peter Gonda <pgonda@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220225205209.3881130-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-02-25 20:52:09 +00:00
|
|
|
ret = sev_es_validate_vmgexit(svm);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2020-12-10 17:09:47 +00:00
|
|
|
|
|
|
|
sev_es_sync_from_ghcb(svm);
|
|
|
|
ghcb_set_sw_exit_info_1(ghcb, 0);
|
|
|
|
ghcb_set_sw_exit_info_2(ghcb, 0);
|
|
|
|
|
|
|
|
switch (exit_code) {
|
2020-12-10 17:09:53 +00:00
|
|
|
case SVM_VMGEXIT_MMIO_READ:
|
KVM: SVM: Exit to userspace on ENOMEM/EFAULT GHCB errors
Exit to userspace if setup_vmgexit_scratch() fails due to OOM or because
copying data from guest (userspace) memory failed/faulted. The OOM
scenario is clearcut, it's userspace's decision as to whether it should
terminate the guest, free memory, etc...
As for -EFAULT, arguably, any guest issue is a violation of the guest's
contract with userspace, and thus userspace needs to decide how to
proceed. E.g. userspace defines what is RAM vs. MMIO and communicates
that directly to the guest, KVM is not involved in deciding what is/isn't
RAM nor in communicating that information to the guest. If the scratch
GPA doesn't resolve to a memslot, then the guest is not honoring the
memory configuration as defined by userspace.
And if userspace unmaps an hva for whatever reason, then exiting to
userspace with -EFAULT is absolutely the right thing to do. KVM's ABI
currently sucks and doesn't provide enough information to act on the
-EFAULT, but that will hopefully be remedied in the future as there are
multiple use cases, e.g. uffd and virtiofs truncation, that shouldn't
require any work in KVM beyond returning -EFAULT with a small amount of
metadata.
KVM could define its ABI such that failure to access the scratch area is
reflected into the guest, i.e. establish a contract with userspace, but
that's undesirable as it limits KVM's options in the future, e.g. in the
potential uffd case any failure on a uaccess needs to kick out to
userspace. KVM does have several cases where it reflects these errors
into the guest, e.g. kvm_pv_clock_pairing() and Hyper-V emulation, but
KVM would preferably "fix" those instead of propagating the falsehood
that any memory failure is the guest's fault.
Lastly, returning a boolean as an "error" for that a helper that isn't
named accordingly never works out well.
Fixes: ad5b353240c8 ("KVM: SVM: Do not terminate SEV-ES guests on GHCB validation failure")
Cc: Alper Gun <alpergun@google.com>
Cc: Peter Gonda <pgonda@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220225205209.3881130-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-02-25 20:52:09 +00:00
|
|
|
ret = setup_vmgexit_scratch(svm, true, control->exit_info_2);
|
|
|
|
if (ret)
|
2020-12-10 17:09:53 +00:00
|
|
|
break;
|
|
|
|
|
2021-03-02 19:40:39 +00:00
|
|
|
ret = kvm_sev_es_mmio_read(vcpu,
|
2020-12-10 17:09:53 +00:00
|
|
|
control->exit_info_1,
|
|
|
|
control->exit_info_2,
|
2021-10-21 17:42:59 +00:00
|
|
|
svm->sev_es.ghcb_sa);
|
2020-12-10 17:09:53 +00:00
|
|
|
break;
|
|
|
|
case SVM_VMGEXIT_MMIO_WRITE:
|
KVM: SVM: Exit to userspace on ENOMEM/EFAULT GHCB errors
Exit to userspace if setup_vmgexit_scratch() fails due to OOM or because
copying data from guest (userspace) memory failed/faulted. The OOM
scenario is clearcut, it's userspace's decision as to whether it should
terminate the guest, free memory, etc...
As for -EFAULT, arguably, any guest issue is a violation of the guest's
contract with userspace, and thus userspace needs to decide how to
proceed. E.g. userspace defines what is RAM vs. MMIO and communicates
that directly to the guest, KVM is not involved in deciding what is/isn't
RAM nor in communicating that information to the guest. If the scratch
GPA doesn't resolve to a memslot, then the guest is not honoring the
memory configuration as defined by userspace.
And if userspace unmaps an hva for whatever reason, then exiting to
userspace with -EFAULT is absolutely the right thing to do. KVM's ABI
currently sucks and doesn't provide enough information to act on the
-EFAULT, but that will hopefully be remedied in the future as there are
multiple use cases, e.g. uffd and virtiofs truncation, that shouldn't
require any work in KVM beyond returning -EFAULT with a small amount of
metadata.
KVM could define its ABI such that failure to access the scratch area is
reflected into the guest, i.e. establish a contract with userspace, but
that's undesirable as it limits KVM's options in the future, e.g. in the
potential uffd case any failure on a uaccess needs to kick out to
userspace. KVM does have several cases where it reflects these errors
into the guest, e.g. kvm_pv_clock_pairing() and Hyper-V emulation, but
KVM would preferably "fix" those instead of propagating the falsehood
that any memory failure is the guest's fault.
Lastly, returning a boolean as an "error" for that a helper that isn't
named accordingly never works out well.
Fixes: ad5b353240c8 ("KVM: SVM: Do not terminate SEV-ES guests on GHCB validation failure")
Cc: Alper Gun <alpergun@google.com>
Cc: Peter Gonda <pgonda@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220225205209.3881130-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-02-25 20:52:09 +00:00
|
|
|
ret = setup_vmgexit_scratch(svm, false, control->exit_info_2);
|
|
|
|
if (ret)
|
2020-12-10 17:09:53 +00:00
|
|
|
break;
|
|
|
|
|
2021-03-02 19:40:39 +00:00
|
|
|
ret = kvm_sev_es_mmio_write(vcpu,
|
2020-12-10 17:09:53 +00:00
|
|
|
control->exit_info_1,
|
|
|
|
control->exit_info_2,
|
2021-10-21 17:42:59 +00:00
|
|
|
svm->sev_es.ghcb_sa);
|
2020-12-10 17:09:53 +00:00
|
|
|
break;
|
2020-12-14 16:16:03 +00:00
|
|
|
case SVM_VMGEXIT_NMI_COMPLETE:
|
2021-03-02 19:40:39 +00:00
|
|
|
ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET);
|
2020-12-14 16:16:03 +00:00
|
|
|
break;
|
KVM: SVM: Add support for booting APs in an SEV-ES guest
Typically under KVM, an AP is booted using the INIT-SIPI-SIPI sequence,
where the guest vCPU register state is updated and then the vCPU is VMRUN
to begin execution of the AP. For an SEV-ES guest, this won't work because
the guest register state is encrypted.
Following the GHCB specification, the hypervisor must not alter the guest
register state, so KVM must track an AP/vCPU boot. Should the guest want
to park the AP, it must use the AP Reset Hold exit event in place of, for
example, a HLT loop.
First AP boot (first INIT-SIPI-SIPI sequence):
Execute the AP (vCPU) as it was initialized and measured by the SEV-ES
support. It is up to the guest to transfer control of the AP to the
proper location.
Subsequent AP boot:
KVM will expect to receive an AP Reset Hold exit event indicating that
the vCPU is being parked and will require an INIT-SIPI-SIPI sequence to
awaken it. When the AP Reset Hold exit event is received, KVM will place
the vCPU into a simulated HLT mode. Upon receiving the INIT-SIPI-SIPI
sequence, KVM will make the vCPU runnable. It is again up to the guest
to then transfer control of the AP to the proper location.
To differentiate between an actual HLT and an AP Reset Hold, a new MP
state is introduced, KVM_MP_STATE_AP_RESET_HOLD, which the vCPU is
placed in upon receiving the AP Reset Hold exit event. Additionally, to
communicate the AP Reset Hold exit event up to userspace (if needed), a
new exit reason is introduced, KVM_EXIT_AP_RESET_HOLD.
A new x86 ops function is introduced, vcpu_deliver_sipi_vector, in order
to accomplish AP booting. For VMX, vcpu_deliver_sipi_vector is set to the
original SIPI delivery function, kvm_vcpu_deliver_sipi_vector(). SVM adds
a new function that, for non SEV-ES guests, invokes the original SIPI
delivery function, kvm_vcpu_deliver_sipi_vector(), but for SEV-ES guests,
implements the logic above.
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Message-Id: <e8fbebe8eb161ceaabdad7c01a5859a78b424d5e.1609791600.git.thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2021-01-04 20:20:01 +00:00
|
|
|
case SVM_VMGEXIT_AP_HLT_LOOP:
|
2021-03-02 19:40:39 +00:00
|
|
|
ret = kvm_emulate_ap_reset_hold(vcpu);
|
KVM: SVM: Add support for booting APs in an SEV-ES guest
Typically under KVM, an AP is booted using the INIT-SIPI-SIPI sequence,
where the guest vCPU register state is updated and then the vCPU is VMRUN
to begin execution of the AP. For an SEV-ES guest, this won't work because
the guest register state is encrypted.
Following the GHCB specification, the hypervisor must not alter the guest
register state, so KVM must track an AP/vCPU boot. Should the guest want
to park the AP, it must use the AP Reset Hold exit event in place of, for
example, a HLT loop.
First AP boot (first INIT-SIPI-SIPI sequence):
Execute the AP (vCPU) as it was initialized and measured by the SEV-ES
support. It is up to the guest to transfer control of the AP to the
proper location.
Subsequent AP boot:
KVM will expect to receive an AP Reset Hold exit event indicating that
the vCPU is being parked and will require an INIT-SIPI-SIPI sequence to
awaken it. When the AP Reset Hold exit event is received, KVM will place
the vCPU into a simulated HLT mode. Upon receiving the INIT-SIPI-SIPI
sequence, KVM will make the vCPU runnable. It is again up to the guest
to then transfer control of the AP to the proper location.
To differentiate between an actual HLT and an AP Reset Hold, a new MP
state is introduced, KVM_MP_STATE_AP_RESET_HOLD, which the vCPU is
placed in upon receiving the AP Reset Hold exit event. Additionally, to
communicate the AP Reset Hold exit event up to userspace (if needed), a
new exit reason is introduced, KVM_EXIT_AP_RESET_HOLD.
A new x86 ops function is introduced, vcpu_deliver_sipi_vector, in order
to accomplish AP booting. For VMX, vcpu_deliver_sipi_vector is set to the
original SIPI delivery function, kvm_vcpu_deliver_sipi_vector(). SVM adds
a new function that, for non SEV-ES guests, invokes the original SIPI
delivery function, kvm_vcpu_deliver_sipi_vector(), but for SEV-ES guests,
implements the logic above.
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Message-Id: <e8fbebe8eb161ceaabdad7c01a5859a78b424d5e.1609791600.git.thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2021-01-04 20:20:01 +00:00
|
|
|
break;
|
2020-12-15 17:44:07 +00:00
|
|
|
case SVM_VMGEXIT_AP_JUMP_TABLE: {
|
2021-03-02 19:40:39 +00:00
|
|
|
struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
|
2020-12-15 17:44:07 +00:00
|
|
|
|
|
|
|
switch (control->exit_info_1) {
|
|
|
|
case 0:
|
|
|
|
/* Set AP jump table address */
|
|
|
|
sev->ap_jump_table = control->exit_info_2;
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
/* Get AP jump table address */
|
|
|
|
ghcb_set_sw_exit_info_2(ghcb, sev->ap_jump_table);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n",
|
|
|
|
control->exit_info_1);
|
2021-12-02 18:52:05 +00:00
|
|
|
ghcb_set_sw_exit_info_1(ghcb, 2);
|
|
|
|
ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_INPUT);
|
2020-12-15 17:44:07 +00:00
|
|
|
}
|
|
|
|
|
KVM: SVM: Exit to userspace on ENOMEM/EFAULT GHCB errors
Exit to userspace if setup_vmgexit_scratch() fails due to OOM or because
copying data from guest (userspace) memory failed/faulted. The OOM
scenario is clearcut, it's userspace's decision as to whether it should
terminate the guest, free memory, etc...
As for -EFAULT, arguably, any guest issue is a violation of the guest's
contract with userspace, and thus userspace needs to decide how to
proceed. E.g. userspace defines what is RAM vs. MMIO and communicates
that directly to the guest, KVM is not involved in deciding what is/isn't
RAM nor in communicating that information to the guest. If the scratch
GPA doesn't resolve to a memslot, then the guest is not honoring the
memory configuration as defined by userspace.
And if userspace unmaps an hva for whatever reason, then exiting to
userspace with -EFAULT is absolutely the right thing to do. KVM's ABI
currently sucks and doesn't provide enough information to act on the
-EFAULT, but that will hopefully be remedied in the future as there are
multiple use cases, e.g. uffd and virtiofs truncation, that shouldn't
require any work in KVM beyond returning -EFAULT with a small amount of
metadata.
KVM could define its ABI such that failure to access the scratch area is
reflected into the guest, i.e. establish a contract with userspace, but
that's undesirable as it limits KVM's options in the future, e.g. in the
potential uffd case any failure on a uaccess needs to kick out to
userspace. KVM does have several cases where it reflects these errors
into the guest, e.g. kvm_pv_clock_pairing() and Hyper-V emulation, but
KVM would preferably "fix" those instead of propagating the falsehood
that any memory failure is the guest's fault.
Lastly, returning a boolean as an "error" for that a helper that isn't
named accordingly never works out well.
Fixes: ad5b353240c8 ("KVM: SVM: Do not terminate SEV-ES guests on GHCB validation failure")
Cc: Alper Gun <alpergun@google.com>
Cc: Peter Gonda <pgonda@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220225205209.3881130-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-02-25 20:52:09 +00:00
|
|
|
ret = 1;
|
2020-12-15 17:44:07 +00:00
|
|
|
break;
|
|
|
|
}
|
2020-12-10 17:09:47 +00:00
|
|
|
case SVM_VMGEXIT_UNSUPPORTED_EVENT:
|
2021-03-02 19:40:39 +00:00
|
|
|
vcpu_unimpl(vcpu,
|
2020-12-10 17:09:47 +00:00
|
|
|
"vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
|
|
|
|
control->exit_info_1, control->exit_info_2);
|
2021-11-09 22:23:49 +00:00
|
|
|
ret = -EINVAL;
|
2020-12-10 17:09:47 +00:00
|
|
|
break;
|
|
|
|
default:
|
2021-03-02 19:40:39 +00:00
|
|
|
ret = svm_invoke_exit_handler(vcpu, exit_code);
|
2020-12-10 17:09:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
2020-12-10 17:09:54 +00:00
|
|
|
|
|
|
|
int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
|
|
|
|
{
|
2021-10-25 16:14:31 +00:00
|
|
|
int count;
|
|
|
|
int bytes;
|
KVM: SVM: Exit to userspace on ENOMEM/EFAULT GHCB errors
Exit to userspace if setup_vmgexit_scratch() fails due to OOM or because
copying data from guest (userspace) memory failed/faulted. The OOM
scenario is clearcut, it's userspace's decision as to whether it should
terminate the guest, free memory, etc...
As for -EFAULT, arguably, any guest issue is a violation of the guest's
contract with userspace, and thus userspace needs to decide how to
proceed. E.g. userspace defines what is RAM vs. MMIO and communicates
that directly to the guest, KVM is not involved in deciding what is/isn't
RAM nor in communicating that information to the guest. If the scratch
GPA doesn't resolve to a memslot, then the guest is not honoring the
memory configuration as defined by userspace.
And if userspace unmaps an hva for whatever reason, then exiting to
userspace with -EFAULT is absolutely the right thing to do. KVM's ABI
currently sucks and doesn't provide enough information to act on the
-EFAULT, but that will hopefully be remedied in the future as there are
multiple use cases, e.g. uffd and virtiofs truncation, that shouldn't
require any work in KVM beyond returning -EFAULT with a small amount of
metadata.
KVM could define its ABI such that failure to access the scratch area is
reflected into the guest, i.e. establish a contract with userspace, but
that's undesirable as it limits KVM's options in the future, e.g. in the
potential uffd case any failure on a uaccess needs to kick out to
userspace. KVM does have several cases where it reflects these errors
into the guest, e.g. kvm_pv_clock_pairing() and Hyper-V emulation, but
KVM would preferably "fix" those instead of propagating the falsehood
that any memory failure is the guest's fault.
Lastly, returning a boolean as an "error" for that a helper that isn't
named accordingly never works out well.
Fixes: ad5b353240c8 ("KVM: SVM: Do not terminate SEV-ES guests on GHCB validation failure")
Cc: Alper Gun <alpergun@google.com>
Cc: Peter Gonda <pgonda@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220225205209.3881130-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-02-25 20:52:09 +00:00
|
|
|
int r;
|
2021-10-25 16:14:31 +00:00
|
|
|
|
|
|
|
if (svm->vmcb->control.exit_info_2 > INT_MAX)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
count = svm->vmcb->control.exit_info_2;
|
|
|
|
if (unlikely(check_mul_overflow(count, size, &bytes)))
|
|
|
|
return -EINVAL;
|
|
|
|
|
KVM: SVM: Exit to userspace on ENOMEM/EFAULT GHCB errors
Exit to userspace if setup_vmgexit_scratch() fails due to OOM or because
copying data from guest (userspace) memory failed/faulted. The OOM
scenario is clearcut, it's userspace's decision as to whether it should
terminate the guest, free memory, etc...
As for -EFAULT, arguably, any guest issue is a violation of the guest's
contract with userspace, and thus userspace needs to decide how to
proceed. E.g. userspace defines what is RAM vs. MMIO and communicates
that directly to the guest, KVM is not involved in deciding what is/isn't
RAM nor in communicating that information to the guest. If the scratch
GPA doesn't resolve to a memslot, then the guest is not honoring the
memory configuration as defined by userspace.
And if userspace unmaps an hva for whatever reason, then exiting to
userspace with -EFAULT is absolutely the right thing to do. KVM's ABI
currently sucks and doesn't provide enough information to act on the
-EFAULT, but that will hopefully be remedied in the future as there are
multiple use cases, e.g. uffd and virtiofs truncation, that shouldn't
require any work in KVM beyond returning -EFAULT with a small amount of
metadata.
KVM could define its ABI such that failure to access the scratch area is
reflected into the guest, i.e. establish a contract with userspace, but
that's undesirable as it limits KVM's options in the future, e.g. in the
potential uffd case any failure on a uaccess needs to kick out to
userspace. KVM does have several cases where it reflects these errors
into the guest, e.g. kvm_pv_clock_pairing() and Hyper-V emulation, but
KVM would preferably "fix" those instead of propagating the falsehood
that any memory failure is the guest's fault.
Lastly, returning a boolean as an "error" for that a helper that isn't
named accordingly never works out well.
Fixes: ad5b353240c8 ("KVM: SVM: Do not terminate SEV-ES guests on GHCB validation failure")
Cc: Alper Gun <alpergun@google.com>
Cc: Peter Gonda <pgonda@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220225205209.3881130-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2022-02-25 20:52:09 +00:00
|
|
|
r = setup_vmgexit_scratch(svm, in, bytes);
|
|
|
|
if (r)
|
|
|
|
return r;
|
2020-12-10 17:09:54 +00:00
|
|
|
|
2021-10-21 17:42:59 +00:00
|
|
|
return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->sev_es.ghcb_sa,
|
2021-11-11 15:52:26 +00:00
|
|
|
count, in);
|
2020-12-10 17:09:54 +00:00
|
|
|
}
|
2020-12-10 17:10:06 +00:00
|
|
|
|
2022-06-23 17:34:06 +00:00
|
|
|
static void sev_es_init_vmcb(struct vcpu_svm *svm)
|
2020-12-10 17:10:06 +00:00
|
|
|
{
|
|
|
|
struct kvm_vcpu *vcpu = &svm->vcpu;
|
|
|
|
|
|
|
|
svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
|
|
|
|
svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* An SEV-ES guest requires a VMSA area that is a separate from the
|
|
|
|
* VMCB page. Do not include the encryption mask on the VMSA physical
|
|
|
|
* address since hardware will access it using the guest key.
|
|
|
|
*/
|
2021-10-21 17:42:59 +00:00
|
|
|
svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
|
2020-12-10 17:10:06 +00:00
|
|
|
|
|
|
|
/* Can't intercept CR register access, HV can't modify CR registers */
|
|
|
|
svm_clr_intercept(svm, INTERCEPT_CR0_READ);
|
|
|
|
svm_clr_intercept(svm, INTERCEPT_CR4_READ);
|
|
|
|
svm_clr_intercept(svm, INTERCEPT_CR8_READ);
|
|
|
|
svm_clr_intercept(svm, INTERCEPT_CR0_WRITE);
|
|
|
|
svm_clr_intercept(svm, INTERCEPT_CR4_WRITE);
|
|
|
|
svm_clr_intercept(svm, INTERCEPT_CR8_WRITE);
|
|
|
|
|
|
|
|
svm_clr_intercept(svm, INTERCEPT_SELECTIVE_CR0);
|
|
|
|
|
|
|
|
/* Track EFER/CR register changes */
|
|
|
|
svm_set_intercept(svm, TRAP_EFER_WRITE);
|
|
|
|
svm_set_intercept(svm, TRAP_CR0_WRITE);
|
|
|
|
svm_set_intercept(svm, TRAP_CR4_WRITE);
|
|
|
|
svm_set_intercept(svm, TRAP_CR8_WRITE);
|
|
|
|
|
|
|
|
/* No support for enable_vmware_backdoor */
|
|
|
|
clr_exception_intercept(svm, GP_VECTOR);
|
|
|
|
|
|
|
|
/* Can't intercept XSETBV, HV can't modify XCR0 directly */
|
|
|
|
svm_clr_intercept(svm, INTERCEPT_XSETBV);
|
|
|
|
|
|
|
|
/* Clear intercepts on selected MSRs */
|
|
|
|
set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1);
|
|
|
|
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1);
|
|
|
|
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
|
|
|
|
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
|
|
|
|
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
|
|
|
|
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
|
2022-04-19 20:54:44 +00:00
|
|
|
|
|
|
|
if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) &&
|
|
|
|
(guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP) ||
|
|
|
|
guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDPID))) {
|
|
|
|
set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, 1, 1);
|
|
|
|
if (guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP))
|
|
|
|
svm_clr_intercept(svm, INTERCEPT_RDTSCP);
|
|
|
|
}
|
2020-12-10 17:10:06 +00:00
|
|
|
}
|
|
|
|
|
2022-06-23 17:34:06 +00:00
|
|
|
void sev_init_vmcb(struct vcpu_svm *svm)
|
|
|
|
{
|
|
|
|
svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
|
|
|
|
clr_exception_intercept(svm, UD_VECTOR);
|
|
|
|
|
|
|
|
if (sev_es_guest(svm->vcpu.kvm))
|
|
|
|
sev_es_init_vmcb(svm);
|
|
|
|
}
|
|
|
|
|
2021-09-21 00:03:02 +00:00
|
|
|
void sev_es_vcpu_reset(struct vcpu_svm *svm)
|
2020-12-10 17:10:06 +00:00
|
|
|
{
|
|
|
|
/*
|
2021-09-21 00:03:02 +00:00
|
|
|
* Set the GHCB MSR value as per the GHCB specification when emulating
|
|
|
|
* vCPU RESET for an SEV-ES guest.
|
2020-12-10 17:10:06 +00:00
|
|
|
*/
|
|
|
|
set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
|
|
|
|
GHCB_VERSION_MIN,
|
|
|
|
sev_enc_bit));
|
|
|
|
}
|
2020-12-10 17:10:07 +00:00
|
|
|
|
2022-04-05 18:27:43 +00:00
|
|
|
void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa)
|
2020-12-10 17:10:07 +00:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* As an SEV-ES guest, hardware will restore the host state on VMEXIT,
|
2022-01-25 16:11:30 +00:00
|
|
|
* of which one step is to perform a VMLOAD. KVM performs the
|
|
|
|
* corresponding VMSAVE in svm_prepare_guest_switch for both
|
|
|
|
* traditional and SEV-ES guests.
|
2020-12-10 17:10:07 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
/* XCR0 is restored on VMEXIT, save the current host value */
|
|
|
|
hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
|
|
|
|
|
2021-03-18 14:28:01 +00:00
|
|
|
/* PKRU is restored on VMEXIT, save the current host value */
|
2020-12-10 17:10:07 +00:00
|
|
|
hostsa->pkru = read_pkru();
|
|
|
|
|
|
|
|
/* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */
|
|
|
|
hostsa->xss = host_xss;
|
|
|
|
}
|
|
|
|
|
KVM: SVM: Add support for booting APs in an SEV-ES guest
Typically under KVM, an AP is booted using the INIT-SIPI-SIPI sequence,
where the guest vCPU register state is updated and then the vCPU is VMRUN
to begin execution of the AP. For an SEV-ES guest, this won't work because
the guest register state is encrypted.
Following the GHCB specification, the hypervisor must not alter the guest
register state, so KVM must track an AP/vCPU boot. Should the guest want
to park the AP, it must use the AP Reset Hold exit event in place of, for
example, a HLT loop.
First AP boot (first INIT-SIPI-SIPI sequence):
Execute the AP (vCPU) as it was initialized and measured by the SEV-ES
support. It is up to the guest to transfer control of the AP to the
proper location.
Subsequent AP boot:
KVM will expect to receive an AP Reset Hold exit event indicating that
the vCPU is being parked and will require an INIT-SIPI-SIPI sequence to
awaken it. When the AP Reset Hold exit event is received, KVM will place
the vCPU into a simulated HLT mode. Upon receiving the INIT-SIPI-SIPI
sequence, KVM will make the vCPU runnable. It is again up to the guest
to then transfer control of the AP to the proper location.
To differentiate between an actual HLT and an AP Reset Hold, a new MP
state is introduced, KVM_MP_STATE_AP_RESET_HOLD, which the vCPU is
placed in upon receiving the AP Reset Hold exit event. Additionally, to
communicate the AP Reset Hold exit event up to userspace (if needed), a
new exit reason is introduced, KVM_EXIT_AP_RESET_HOLD.
A new x86 ops function is introduced, vcpu_deliver_sipi_vector, in order
to accomplish AP booting. For VMX, vcpu_deliver_sipi_vector is set to the
original SIPI delivery function, kvm_vcpu_deliver_sipi_vector(). SVM adds
a new function that, for non SEV-ES guests, invokes the original SIPI
delivery function, kvm_vcpu_deliver_sipi_vector(), but for SEV-ES guests,
implements the logic above.
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Message-Id: <e8fbebe8eb161ceaabdad7c01a5859a78b424d5e.1609791600.git.thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2021-01-04 20:20:01 +00:00
|
|
|
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
|
|
|
|
{
|
|
|
|
struct vcpu_svm *svm = to_svm(vcpu);
|
|
|
|
|
|
|
|
/* First SIPI: Use the values as initially set by the VMM */
|
2021-10-21 17:42:59 +00:00
|
|
|
if (!svm->sev_es.received_first_sipi) {
|
|
|
|
svm->sev_es.received_first_sipi = true;
|
KVM: SVM: Add support for booting APs in an SEV-ES guest
Typically under KVM, an AP is booted using the INIT-SIPI-SIPI sequence,
where the guest vCPU register state is updated and then the vCPU is VMRUN
to begin execution of the AP. For an SEV-ES guest, this won't work because
the guest register state is encrypted.
Following the GHCB specification, the hypervisor must not alter the guest
register state, so KVM must track an AP/vCPU boot. Should the guest want
to park the AP, it must use the AP Reset Hold exit event in place of, for
example, a HLT loop.
First AP boot (first INIT-SIPI-SIPI sequence):
Execute the AP (vCPU) as it was initialized and measured by the SEV-ES
support. It is up to the guest to transfer control of the AP to the
proper location.
Subsequent AP boot:
KVM will expect to receive an AP Reset Hold exit event indicating that
the vCPU is being parked and will require an INIT-SIPI-SIPI sequence to
awaken it. When the AP Reset Hold exit event is received, KVM will place
the vCPU into a simulated HLT mode. Upon receiving the INIT-SIPI-SIPI
sequence, KVM will make the vCPU runnable. It is again up to the guest
to then transfer control of the AP to the proper location.
To differentiate between an actual HLT and an AP Reset Hold, a new MP
state is introduced, KVM_MP_STATE_AP_RESET_HOLD, which the vCPU is
placed in upon receiving the AP Reset Hold exit event. Additionally, to
communicate the AP Reset Hold exit event up to userspace (if needed), a
new exit reason is introduced, KVM_EXIT_AP_RESET_HOLD.
A new x86 ops function is introduced, vcpu_deliver_sipi_vector, in order
to accomplish AP booting. For VMX, vcpu_deliver_sipi_vector is set to the
original SIPI delivery function, kvm_vcpu_deliver_sipi_vector(). SVM adds
a new function that, for non SEV-ES guests, invokes the original SIPI
delivery function, kvm_vcpu_deliver_sipi_vector(), but for SEV-ES guests,
implements the logic above.
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Message-Id: <e8fbebe8eb161ceaabdad7c01a5859a78b424d5e.1609791600.git.thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2021-01-04 20:20:01 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Subsequent SIPI: Return from an AP Reset Hold VMGEXIT, where
|
|
|
|
* the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a
|
|
|
|
* non-zero value.
|
|
|
|
*/
|
2021-10-21 17:42:59 +00:00
|
|
|
if (!svm->sev_es.ghcb)
|
2021-04-09 14:38:42 +00:00
|
|
|
return;
|
|
|
|
|
2021-10-21 17:42:59 +00:00
|
|
|
ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1);
|
KVM: SVM: Add support for booting APs in an SEV-ES guest
Typically under KVM, an AP is booted using the INIT-SIPI-SIPI sequence,
where the guest vCPU register state is updated and then the vCPU is VMRUN
to begin execution of the AP. For an SEV-ES guest, this won't work because
the guest register state is encrypted.
Following the GHCB specification, the hypervisor must not alter the guest
register state, so KVM must track an AP/vCPU boot. Should the guest want
to park the AP, it must use the AP Reset Hold exit event in place of, for
example, a HLT loop.
First AP boot (first INIT-SIPI-SIPI sequence):
Execute the AP (vCPU) as it was initialized and measured by the SEV-ES
support. It is up to the guest to transfer control of the AP to the
proper location.
Subsequent AP boot:
KVM will expect to receive an AP Reset Hold exit event indicating that
the vCPU is being parked and will require an INIT-SIPI-SIPI sequence to
awaken it. When the AP Reset Hold exit event is received, KVM will place
the vCPU into a simulated HLT mode. Upon receiving the INIT-SIPI-SIPI
sequence, KVM will make the vCPU runnable. It is again up to the guest
to then transfer control of the AP to the proper location.
To differentiate between an actual HLT and an AP Reset Hold, a new MP
state is introduced, KVM_MP_STATE_AP_RESET_HOLD, which the vCPU is
placed in upon receiving the AP Reset Hold exit event. Additionally, to
communicate the AP Reset Hold exit event up to userspace (if needed), a
new exit reason is introduced, KVM_EXIT_AP_RESET_HOLD.
A new x86 ops function is introduced, vcpu_deliver_sipi_vector, in order
to accomplish AP booting. For VMX, vcpu_deliver_sipi_vector is set to the
original SIPI delivery function, kvm_vcpu_deliver_sipi_vector(). SVM adds
a new function that, for non SEV-ES guests, invokes the original SIPI
delivery function, kvm_vcpu_deliver_sipi_vector(), but for SEV-ES guests,
implements the logic above.
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Message-Id: <e8fbebe8eb161ceaabdad7c01a5859a78b424d5e.1609791600.git.thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2021-01-04 20:20:01 +00:00
|
|
|
}
|