Add support for SEV-ES guests booting through the 32-bit boot path, along with

cleanups, fixes and improvements.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmCGkbQACgkQEsHwGGHe
 VUrJ4w/+M9TbCppcILNvtaHn0mgpcDVmKvRSDdLl/MWcW1kuzczcdFYAK+OFFD0E
 TYKSEmkJUz3Tm0YBkO9PSPTBk+hnduPunXRk2Mzse1Uv3LxPuWEN3q6ZAfP1rOZ1
 3nlEnzHCWZdf4d7uz49qCXj96bfv98+zU2DaCoVoNUImp8jzo6hMtTPI4N31Tply
 Rb0b0acIkdmy0eaADilMciimZevs9EF3KgiwSd0AUAJE1aRtRpPKtv2F1OraJPkH
 T7AunJvoO8Sb2vpHfaW8iZrx2HKE8KZ4QOfM+dAXurjadlPVBLN34MC8FIw4tIS+
 m2dc/CMaVy1QpyHKOTZqY9ZsCndunrMJXsolhCyBjA6fAZ1aFZswxRWUeGrOkCJ2
 ZGJetB0tADi0gIRZerwyPXOKLiJBo8BSmIr8FzHq8CYYoxKH9D1dqEZVj9kBcGLJ
 SYbgUIKNuw54RzE00S8i2s625RG5A7qn6GrRMvnkVyJnKoD01na0trND2AbufBJz
 oDhBXfvP5SwswEt4YYZ1rn3JO1nRZzn4WGfiUQ4ElOEFYuUEZOJtcw1LHwDJ0LcQ
 bfOs0mmDFajFH1DyILyHfji4rdqHGWIpGIHfmYs98Njtfa8dtximU/csr69by/xV
 dcycXbPaw5psDe4Acw2vb7DM7h7T9fHNG+VgRJb25gXeywGutac=
 =AUGR
 -----END PGP SIGNATURE-----

Merge tag 'x86_seves_for_v5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 AMD secure virtualization (SEV-ES) updates from Borislav Petkov:
 "Add support for SEV-ES guests booting through the 32-bit boot path,
  along with cleanups, fixes and improvements"

* tag 'x86_seves_for_v5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/sev-es: Optimize __sev_es_ist_enter() for better readability
  x86/sev-es: Replace open-coded hlt-loops with sev_es_terminate()
  x86/boot/compressed/64: Check SEV encryption in the 32-bit boot-path
  x86/boot/compressed/64: Add CPUID sanity check to 32-bit boot-path
  x86/boot/compressed/64: Add 32-bit boot #VC handler
  x86/boot/compressed/64: Setup IDT in startup_32 boot path
  x86/boot/compressed/64: Reload CS in startup_32
  x86/sev: Do not require Hypervisor CPUID bit for SEV guests
  x86/boot/compressed/64: Cleanup exception handling before booting kernel
  x86/virtio: Have SEV guests enforce restricted virtio memory access
  x86/sev-es: Remove subtraction of res variable
This commit is contained in:
Linus Torvalds 2021-04-26 09:11:10 -07:00
commit 26a4ef7e48
11 changed files with 372 additions and 63 deletions

View File

@ -1519,6 +1519,7 @@ config AMD_MEM_ENCRYPT
select ARCH_USE_MEMREMAP_PROT
select ARCH_HAS_FORCE_DMA_UNENCRYPTED
select INSTRUCTION_DECODER
select ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS
help
Say yes to enable support for the encryption of system memory.
This requires an AMD processor that supports Secure Memory

View File

@ -34,6 +34,7 @@
#include <asm/asm-offsets.h>
#include <asm/bootparam.h>
#include <asm/desc_defs.h>
#include <asm/trapnr.h>
#include "pgtable.h"
/*
@ -107,9 +108,19 @@ SYM_FUNC_START(startup_32)
movl %eax, %gs
movl %eax, %ss
/* setup a stack and make sure cpu supports long mode. */
/* Setup a stack and load CS from current GDT */
leal rva(boot_stack_end)(%ebp), %esp
pushl $__KERNEL32_CS
leal rva(1f)(%ebp), %eax
pushl %eax
lretl
1:
/* Setup Exception handling for SEV-ES */
call startup32_load_idt
/* Make sure cpu supports long mode. */
call verify_cpu
testl %eax, %eax
jnz .Lno_longmode
@ -172,11 +183,21 @@ SYM_FUNC_START(startup_32)
*/
call get_sev_encryption_bit
xorl %edx, %edx
#ifdef CONFIG_AMD_MEM_ENCRYPT
testl %eax, %eax
jz 1f
subl $32, %eax /* Encryption bit is always above bit 31 */
bts %eax, %edx /* Set encryption mask for page tables */
/*
* Mark SEV as active in sev_status so that startup32_check_sev_cbit()
* will do a check. The sev_status memory will be fully initialized
* with the contents of MSR_AMD_SEV_STATUS later in
* set_sev_encryption_mask(). For now it is sufficient to know that SEV
* is active.
*/
movl $1, rva(sev_status)(%ebp)
1:
#endif
/* Initialize Page tables to 0 */
leal rva(pgtable)(%ebx), %edi
@ -261,6 +282,9 @@ SYM_FUNC_START(startup_32)
movl %esi, %edx
1:
#endif
/* Check if the C-bit position is correct when SEV is active */
call startup32_check_sev_cbit
pushl $__KERNEL_CS
pushl %eax
@ -694,6 +718,19 @@ SYM_DATA_START(boot_idt)
.endr
SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end)
#ifdef CONFIG_AMD_MEM_ENCRYPT
SYM_DATA_START(boot32_idt_desc)
.word boot32_idt_end - boot32_idt - 1
.long 0
SYM_DATA_END(boot32_idt_desc)
.balign 8
SYM_DATA_START(boot32_idt)
.rept 32
.quad 0
.endr
SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end)
#endif
#ifdef CONFIG_EFI_STUB
SYM_DATA(image_offset, .long 0)
#endif
@ -786,6 +823,137 @@ SYM_DATA_START_LOCAL(loaded_image_proto)
SYM_DATA_END(loaded_image_proto)
#endif
#ifdef CONFIG_AMD_MEM_ENCRYPT
__HEAD
.code32
/*
* Write an IDT entry into boot32_idt
*
* Parameters:
*
* %eax: Handler address
* %edx: Vector number
*
* Physical offset is expected in %ebp
*/
SYM_FUNC_START(startup32_set_idt_entry)
push %ebx
push %ecx
/* IDT entry address to %ebx */
leal rva(boot32_idt)(%ebp), %ebx
shl $3, %edx
addl %edx, %ebx
/* Build IDT entry, lower 4 bytes */
movl %eax, %edx
andl $0x0000ffff, %edx # Target code segment offset [15:0]
movl $__KERNEL32_CS, %ecx # Target code segment selector
shl $16, %ecx
orl %ecx, %edx
/* Store lower 4 bytes to IDT */
movl %edx, (%ebx)
/* Build IDT entry, upper 4 bytes */
movl %eax, %edx
andl $0xffff0000, %edx # Target code segment offset [31:16]
orl $0x00008e00, %edx # Present, Type 32-bit Interrupt Gate
/* Store upper 4 bytes to IDT */
movl %edx, 4(%ebx)
pop %ecx
pop %ebx
ret
SYM_FUNC_END(startup32_set_idt_entry)
#endif
SYM_FUNC_START(startup32_load_idt)
#ifdef CONFIG_AMD_MEM_ENCRYPT
/* #VC handler */
leal rva(startup32_vc_handler)(%ebp), %eax
movl $X86_TRAP_VC, %edx
call startup32_set_idt_entry
/* Load IDT */
leal rva(boot32_idt)(%ebp), %eax
movl %eax, rva(boot32_idt_desc+2)(%ebp)
lidt rva(boot32_idt_desc)(%ebp)
#endif
ret
SYM_FUNC_END(startup32_load_idt)
/*
* Check for the correct C-bit position when the startup_32 boot-path is used.
*
* The check makes use of the fact that all memory is encrypted when paging is
* disabled. The function creates 64 bits of random data using the RDRAND
* instruction. RDRAND is mandatory for SEV guests, so always available. If the
* hypervisor violates that the kernel will crash right here.
*
* The 64 bits of random data are stored to a memory location and at the same
* time kept in the %eax and %ebx registers. Since encryption is always active
* when paging is off the random data will be stored encrypted in main memory.
*
* Then paging is enabled. When the C-bit position is correct all memory is
* still mapped encrypted and comparing the register values with memory will
* succeed. An incorrect C-bit position will map all memory unencrypted, so that
* the compare will use the encrypted random data and fail.
*/
SYM_FUNC_START(startup32_check_sev_cbit)
#ifdef CONFIG_AMD_MEM_ENCRYPT
pushl %eax
pushl %ebx
pushl %ecx
pushl %edx
/* Check for non-zero sev_status */
movl rva(sev_status)(%ebp), %eax
testl %eax, %eax
jz 4f
/*
* Get two 32-bit random values - Don't bail out if RDRAND fails
* because it is better to prevent forward progress if no random value
* can be gathered.
*/
1: rdrand %eax
jnc 1b
2: rdrand %ebx
jnc 2b
/* Store to memory and keep it in the registers */
movl %eax, rva(sev_check_data)(%ebp)
movl %ebx, rva(sev_check_data+4)(%ebp)
/* Enable paging to see if encryption is active */
movl %cr0, %edx /* Backup %cr0 in %edx */
movl $(X86_CR0_PG | X86_CR0_PE), %ecx /* Enable Paging and Protected mode */
movl %ecx, %cr0
cmpl %eax, rva(sev_check_data)(%ebp)
jne 3f
cmpl %ebx, rva(sev_check_data+4)(%ebp)
jne 3f
movl %edx, %cr0 /* Restore previous %cr0 */
jmp 4f
3: /* Check failed - hlt the machine */
hlt
jmp 3b
4:
popl %edx
popl %ecx
popl %ebx
popl %eax
#endif
ret
SYM_FUNC_END(startup32_check_sev_cbit)
/*
* Stack and heap for uncompression
*/

View File

@ -52,3 +52,17 @@ void load_stage2_idt(void)
load_boot_idt(&boot_idt_desc);
}
void cleanup_exception_handling(void)
{
/*
* Flush GHCB from cache and map it encrypted again when running as
* SEV-ES guest.
*/
sev_es_shutdown_ghcb();
/* Set a null-idt, disabling #PF and #VC handling */
boot_idt_desc.size = 0;
boot_idt_desc.address = 0;
load_boot_idt(&boot_idt_desc);
}

View File

@ -23,12 +23,6 @@ SYM_FUNC_START(get_sev_encryption_bit)
push %ecx
push %edx
/* Check if running under a hypervisor */
movl $1, %eax
cpuid
bt $31, %ecx /* Check the hypervisor bit */
jnc .Lno_sev
movl $0x80000000, %eax /* CPUID to check the highest leaf */
cpuid
cmpl $0x8000001f, %eax /* See if 0x8000001f is available */
@ -67,10 +61,132 @@ SYM_FUNC_START(get_sev_encryption_bit)
ret
SYM_FUNC_END(get_sev_encryption_bit)
/**
* sev_es_req_cpuid - Request a CPUID value from the Hypervisor using
* the GHCB MSR protocol
*
* @%eax: Register to request (0=EAX, 1=EBX, 2=ECX, 3=EDX)
* @%edx: CPUID Function
*
* Returns 0 in %eax on success, non-zero on failure
* %edx returns CPUID value on success
*/
SYM_CODE_START_LOCAL(sev_es_req_cpuid)
shll $30, %eax
orl $0x00000004, %eax
movl $MSR_AMD64_SEV_ES_GHCB, %ecx
wrmsr
rep; vmmcall # VMGEXIT
rdmsr
/* Check response */
movl %eax, %ecx
andl $0x3ffff000, %ecx # Bits [12-29] MBZ
jnz 2f
/* Check return code */
andl $0xfff, %eax
cmpl $5, %eax
jne 2f
/* All good - return success */
xorl %eax, %eax
1:
ret
2:
movl $-1, %eax
jmp 1b
SYM_CODE_END(sev_es_req_cpuid)
SYM_CODE_START(startup32_vc_handler)
pushl %eax
pushl %ebx
pushl %ecx
pushl %edx
/* Keep CPUID function in %ebx */
movl %eax, %ebx
/* Check if error-code == SVM_EXIT_CPUID */
cmpl $0x72, 16(%esp)
jne .Lfail
movl $0, %eax # Request CPUID[fn].EAX
movl %ebx, %edx # CPUID fn
call sev_es_req_cpuid # Call helper
testl %eax, %eax # Check return code
jnz .Lfail
movl %edx, 12(%esp) # Store result
movl $1, %eax # Request CPUID[fn].EBX
movl %ebx, %edx # CPUID fn
call sev_es_req_cpuid # Call helper
testl %eax, %eax # Check return code
jnz .Lfail
movl %edx, 8(%esp) # Store result
movl $2, %eax # Request CPUID[fn].ECX
movl %ebx, %edx # CPUID fn
call sev_es_req_cpuid # Call helper
testl %eax, %eax # Check return code
jnz .Lfail
movl %edx, 4(%esp) # Store result
movl $3, %eax # Request CPUID[fn].EDX
movl %ebx, %edx # CPUID fn
call sev_es_req_cpuid # Call helper
testl %eax, %eax # Check return code
jnz .Lfail
movl %edx, 0(%esp) # Store result
/*
* Sanity check CPUID results from the Hypervisor. See comment in
* do_vc_no_ghcb() for more details on why this is necessary.
*/
/* Fail if SEV leaf not available in CPUID[0x80000000].EAX */
cmpl $0x80000000, %ebx
jne .Lcheck_sev
cmpl $0x8000001f, 12(%esp)
jb .Lfail
jmp .Ldone
.Lcheck_sev:
/* Fail if SEV bit not set in CPUID[0x8000001f].EAX[1] */
cmpl $0x8000001f, %ebx
jne .Ldone
btl $1, 12(%esp)
jnc .Lfail
.Ldone:
popl %edx
popl %ecx
popl %ebx
popl %eax
/* Remove error code */
addl $4, %esp
/* Jump over CPUID instruction */
addl $2, (%esp)
iret
.Lfail:
/* Send terminate request to Hypervisor */
movl $0x100, %eax
xorl %edx, %edx
movl $MSR_AMD64_SEV_ES_GHCB, %ecx
wrmsr
rep; vmmcall
/* If request fails, go to hlt loop */
hlt
jmp .Lfail
SYM_CODE_END(startup32_vc_handler)
.code64
#include "../../kernel/sev_verify_cbit.S"
SYM_FUNC_START(set_sev_encryption_mask)
#ifdef CONFIG_AMD_MEM_ENCRYPT
push %rbp

View File

@ -443,11 +443,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
handle_relocations(output, output_len, virt_addr);
debug_putstr("done.\nBooting the kernel.\n");
/*
* Flush GHCB from cache and map it encrypted again when running as
* SEV-ES guest.
*/
sev_es_shutdown_ghcb();
/* Disable exception handling before booting the kernel */
cleanup_exception_handling();
return output;
}

View File

@ -155,6 +155,12 @@ extern pteval_t __default_kernel_pte_mask;
extern gate_desc boot_idt[BOOT_IDT_ENTRIES];
extern struct desc_ptr boot_idt_desc;
#ifdef CONFIG_X86_64
void cleanup_exception_handling(void);
#else
static inline void cleanup_exception_handling(void) { }
#endif
/* IDT Entry Points */
void boot_page_fault(void);
void boot_stage1_vc(void);

View File

@ -200,14 +200,8 @@ void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code)
}
finish:
if (result == ES_OK) {
if (result == ES_OK)
vc_finish_insn(&ctxt);
} else if (result != ES_RETRY) {
/*
* For now, just halt the machine. That makes debugging easier,
* later we just call sev_es_terminate() here.
*/
while (true)
asm volatile("hlt\n");
}
else if (result != ES_RETRY)
sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
}

View File

@ -24,7 +24,7 @@ static bool __init sev_es_check_cpu_features(void)
return true;
}
static void sev_es_terminate(unsigned int reason)
static void __noreturn sev_es_terminate(unsigned int reason)
{
u64 val = GHCB_SEV_TERMINATE;
@ -186,7 +186,6 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
* make it accessible to the hypervisor.
*
* In particular, check for:
* - Hypervisor CPUID bit
* - Availability of CPUID leaf 0x8000001f
* - SEV CPUID bit.
*
@ -194,10 +193,7 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
* can't be checked here.
*/
if ((fn == 1 && !(regs->cx & BIT(31))))
/* Hypervisor bit */
goto fail;
else if (fn == 0x80000000 && (regs->ax < 0x8000001f))
if (fn == 0x80000000 && (regs->ax < 0x8000001f))
/* SEV leaf check */
goto fail;
else if ((fn == 0x8000001f && !(regs->ax & BIT(1))))
@ -210,12 +206,8 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
return;
fail:
sev_es_wr_ghcb_msr(GHCB_SEV_TERMINATE);
VMGEXIT();
/* Shouldn't get here - if we do halt the machine */
while (true)
asm volatile("hlt\n");
/* Terminate the guest */
sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
}
static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt,

View File

@ -137,29 +137,41 @@ static __always_inline bool on_vc_stack(struct pt_regs *regs)
}
/*
* This function handles the case when an NMI is raised in the #VC exception
* handler entry code. In this case, the IST entry for #VC must be adjusted, so
* that any subsequent #VC exception will not overwrite the stack contents of the
* interrupted #VC handler.
* This function handles the case when an NMI is raised in the #VC
* exception handler entry code, before the #VC handler has switched off
* its IST stack. In this case, the IST entry for #VC must be adjusted,
* so that any nested #VC exception will not overwrite the stack
* contents of the interrupted #VC handler.
*
* The IST entry is adjusted unconditionally so that it can be also be
* unconditionally adjusted back in sev_es_ist_exit(). Otherwise a nested
* sev_es_ist_exit() call may adjust back the IST entry too early.
* unconditionally adjusted back in __sev_es_ist_exit(). Otherwise a
* nested sev_es_ist_exit() call may adjust back the IST entry too
* early.
*
* The __sev_es_ist_enter() and __sev_es_ist_exit() functions always run
* on the NMI IST stack, as they are only called from NMI handling code
* right now.
*/
void noinstr __sev_es_ist_enter(struct pt_regs *regs)
{
unsigned long old_ist, new_ist;
/* Read old IST entry */
old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
new_ist = old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
/* Make room on the IST stack */
/*
* If NMI happened while on the #VC IST stack, set the new IST
* value below regs->sp, so that the interrupted stack frame is
* not overwritten by subsequent #VC exceptions.
*/
if (on_vc_stack(regs))
new_ist = ALIGN_DOWN(regs->sp, 8) - sizeof(old_ist);
else
new_ist = old_ist - sizeof(old_ist);
new_ist = regs->sp;
/* Store old IST entry */
/*
* Reserve additional 8 bytes and store old IST value so this
* adjustment can be unrolled in __sev_es_ist_exit().
*/
new_ist -= sizeof(old_ist);
*(unsigned long *)new_ist = old_ist;
/* Set new IST entry */
@ -277,7 +289,7 @@ static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
return ES_EXCEPTION;
}
insn_init(&ctxt->insn, buffer, MAX_INSN_SIZE - res, 1);
insn_init(&ctxt->insn, buffer, MAX_INSN_SIZE, 1);
insn_get_length(&ctxt->insn);
}

View File

@ -19,6 +19,7 @@
#include <linux/kernel.h>
#include <linux/bitops.h>
#include <linux/dma-mapping.h>
#include <linux/virtio_config.h>
#include <asm/tlbflush.h>
#include <asm/fixmap.h>
@ -484,3 +485,8 @@ void __init mem_encrypt_init(void)
print_mem_encrypt_feature_info();
}
int arch_has_restricted_virtio_memory_access(void)
{
return sev_active();
}
EXPORT_SYMBOL_GPL(arch_has_restricted_virtio_memory_access);

View File

@ -503,14 +503,10 @@ void __init sme_enable(struct boot_params *bp)
#define AMD_SME_BIT BIT(0)
#define AMD_SEV_BIT BIT(1)
/*
* Set the feature mask (SME or SEV) based on whether we are
* running under a hypervisor.
*/
eax = 1;
ecx = 0;
native_cpuid(&eax, &ebx, &ecx, &edx);
feature_mask = (ecx & BIT(31)) ? AMD_SEV_BIT : AMD_SME_BIT;
/* Check the SEV MSR whether SEV or SME is enabled */
sev_status = __rdmsr(MSR_AMD64_SEV);
feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT;
/*
* Check for the SME/SEV feature:
@ -530,19 +526,26 @@ void __init sme_enable(struct boot_params *bp)
/* Check if memory encryption is enabled */
if (feature_mask == AMD_SME_BIT) {
/*
* No SME if Hypervisor bit is set. This check is here to
* prevent a guest from trying to enable SME. For running as a
* KVM guest the MSR_K8_SYSCFG will be sufficient, but there
* might be other hypervisors which emulate that MSR as non-zero
* or even pass it through to the guest.
* A malicious hypervisor can still trick a guest into this
* path, but there is no way to protect against that.
*/
eax = 1;
ecx = 0;
native_cpuid(&eax, &ebx, &ecx, &edx);
if (ecx & BIT(31))
return;
/* For SME, check the SYSCFG MSR */
msr = __rdmsr(MSR_K8_SYSCFG);
if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
return;
} else {
/* For SEV, check the SEV MSR */
msr = __rdmsr(MSR_AMD64_SEV);
if (!(msr & MSR_AMD64_SEV_ENABLED))
return;
/* Save SEV_STATUS to avoid reading MSR again */
sev_status = msr;
/* SEV state cannot be controlled by a command line option */
sme_me_mask = me_mask;
sev_enabled = true;