diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index fa5cd05d3b5b..ec060c433589 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -53,6 +53,7 @@ static inline u64 lower_bits(u64 val, unsigned int bits) struct real_mode_header; enum stack_type; +struct ghcb; /* Early IDT entry points for #VC handler */ extern void vc_no_ghcb(void); @@ -81,6 +82,11 @@ static __always_inline void sev_es_nmi_complete(void) __sev_es_nmi_complete(); } extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd); +extern enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, + bool set_ghcb_msr, + struct es_em_ctxt *ctxt, + u64 exit_code, u64 exit_info_1, + u64 exit_info_2); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index ff1e82ff52d9..787dc5f568b5 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -94,25 +94,15 @@ static void vc_finish_insn(struct es_em_ctxt *ctxt) ctxt->regs->ip += ctxt->insn.length; } -static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, - struct es_em_ctxt *ctxt, - u64 exit_code, u64 exit_info_1, - u64 exit_info_2) +static enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt *ctxt) { - enum es_result ret; + u32 ret; - /* Fill in protocol and format specifiers */ - ghcb->protocol_version = GHCB_PROTOCOL_MAX; - ghcb->ghcb_usage = GHCB_DEFAULT_USAGE; + ret = ghcb->save.sw_exit_info_1 & GENMASK_ULL(31, 0); + if (!ret) + return ES_OK; - ghcb_set_sw_exit_code(ghcb, exit_code); - ghcb_set_sw_exit_info_1(ghcb, exit_info_1); - ghcb_set_sw_exit_info_2(ghcb, exit_info_2); - - sev_es_wr_ghcb_msr(__pa(ghcb)); - VMGEXIT(); - - if ((ghcb->save.sw_exit_info_1 & 0xffffffff) == 1) { + if (ret == 1) { u64 info = ghcb->save.sw_exit_info_2; unsigned long v; @@ -124,19 +114,40 @@ static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, ((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) && ((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) { ctxt->fi.vector = v; + if (info & SVM_EVTINJ_VALID_ERR) ctxt->fi.error_code = info >> 32; - ret = ES_EXCEPTION; - } else { - ret = ES_VMM_ERROR; + + return ES_EXCEPTION; } - } else if (ghcb->save.sw_exit_info_1 & 0xffffffff) { - ret = ES_VMM_ERROR; - } else { - ret = ES_OK; } - return ret; + return ES_VMM_ERROR; +} + +enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, bool set_ghcb_msr, + struct es_em_ctxt *ctxt, u64 exit_code, + u64 exit_info_1, u64 exit_info_2) +{ + /* Fill in protocol and format specifiers */ + ghcb->protocol_version = GHCB_PROTOCOL_MAX; + ghcb->ghcb_usage = GHCB_DEFAULT_USAGE; + + ghcb_set_sw_exit_code(ghcb, exit_code); + ghcb_set_sw_exit_info_1(ghcb, exit_info_1); + ghcb_set_sw_exit_info_2(ghcb, exit_info_2); + + /* + * Hyper-V unenlightened guests use a paravisor for communicating and + * GHCB pages are being allocated and set up by that paravisor. Linux + * should not change the GHCB page's physical address. + */ + if (set_ghcb_msr) + sev_es_wr_ghcb_msr(__pa(ghcb)); + + VMGEXIT(); + + return verify_exception_info(ghcb, ctxt); } /* @@ -413,7 +424,7 @@ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) */ sw_scratch = __pa(ghcb) + offsetof(struct ghcb, shared_buffer); ghcb_set_sw_scratch(ghcb, sw_scratch); - ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, + ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_IOIO, exit_info_1, exit_info_2); if (ret != ES_OK) return ret; @@ -455,7 +466,8 @@ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) ghcb_set_rax(ghcb, rax); - ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, exit_info_1, 0); + ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, + SVM_EXIT_IOIO, exit_info_1, 0); if (ret != ES_OK) return ret; @@ -486,7 +498,7 @@ static enum es_result vc_handle_cpuid(struct ghcb *ghcb, /* xgetbv will cause #GP - use reset value for xcr0 */ ghcb_set_xcr0(ghcb, 1); - ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0); + ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_CPUID, 0, 0); if (ret != ES_OK) return ret; @@ -511,7 +523,7 @@ static enum es_result vc_handle_rdtsc(struct ghcb *ghcb, bool rdtscp = (exit_code == SVM_EXIT_RDTSCP); enum es_result ret; - ret = sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, 0, 0); + ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, exit_code, 0, 0); if (ret != ES_OK) return ret; diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 88c46136bbda..c195ffe58049 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -648,7 +648,8 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) ghcb_set_rdx(ghcb, regs->dx); } - ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, exit_info_1, 0); + ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_MSR, + exit_info_1, 0); if ((ret == ES_OK) && (!exit_info_1)) { regs->ax = ghcb->save.rax; @@ -867,7 +868,7 @@ static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt, ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer)); - return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2); + return sev_es_ghcb_hv_call(ghcb, true, ctxt, exit_code, exit_info_1, exit_info_2); } static enum es_result vc_handle_mmio_twobyte_ops(struct ghcb *ghcb, @@ -1117,7 +1118,7 @@ static enum es_result vc_handle_dr7_write(struct ghcb *ghcb, /* Using a value of 0 for ExitInfo1 means RAX holds the value */ ghcb_set_rax(ghcb, val); - ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0); + ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_WRITE_DR7, 0, 0); if (ret != ES_OK) return ret; @@ -1147,7 +1148,7 @@ static enum es_result vc_handle_dr7_read(struct ghcb *ghcb, static enum es_result vc_handle_wbinvd(struct ghcb *ghcb, struct es_em_ctxt *ctxt) { - return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0); + return sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_WBINVD, 0, 0); } static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt) @@ -1156,7 +1157,7 @@ static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt ghcb_set_rcx(ghcb, ctxt->regs->cx); - ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0); + ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_RDPMC, 0, 0); if (ret != ES_OK) return ret; @@ -1197,7 +1198,7 @@ static enum es_result vc_handle_vmmcall(struct ghcb *ghcb, if (x86_platform.hyper.sev_es_hcall_prepare) x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs); - ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0); + ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_VMMCALL, 0, 0); if (ret != ES_OK) return ret; @@ -1319,13 +1320,26 @@ static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt) } } -static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs) +static __always_inline bool is_vc2_stack(unsigned long sp) { - unsigned long sp = (unsigned long)regs; - return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2)); } +static __always_inline bool vc_from_invalid_context(struct pt_regs *regs) +{ + unsigned long sp, prev_sp; + + sp = (unsigned long)regs; + prev_sp = regs->sp; + + /* + * If the code was already executing on the VC2 stack when the #VC + * happened, let it proceed to the normal handling routine. This way the + * code executing on the VC2 stack can cause #VC exceptions to get handled. + */ + return is_vc2_stack(sp) && !is_vc2_stack(prev_sp); +} + static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code) { struct ghcb_state state; @@ -1406,7 +1420,7 @@ DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication) * But keep this here in case the noinstr annotations are violated due * to bug elsewhere. */ - if (unlikely(on_vc_fallback_stack(regs))) { + if (unlikely(vc_from_invalid_context(regs))) { instrumentation_begin(); panic("Can't handle #VC exception from unsupported context\n"); instrumentation_end(); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 6ca1454a65d4..43f70bc01762 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -709,7 +709,7 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r stack = (unsigned long *)sp; if (!get_stack_info_noinstr(stack, current, &info) || info.type == STACK_TYPE_ENTRY || - info.type >= STACK_TYPE_EXCEPTION_LAST) + info.type > STACK_TYPE_EXCEPTION_LAST) sp = __this_cpu_ist_top_va(VC2); sync: diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index f8c612902038..3f0abb403340 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -27,6 +27,15 @@ #undef CONFIG_PARAVIRT_XXL #undef CONFIG_PARAVIRT_SPINLOCKS +/* + * This code runs before CPU feature bits are set. By default, the + * pgtable_l5_enabled() function uses bit X86_FEATURE_LA57 to determine if + * 5-level paging is active, so that won't work here. USE_EARLY_PGTABLE_L5 + * is provided to handle this situation and, instead, use a variable that + * has been set by the early boot code. + */ +#define USE_EARLY_PGTABLE_L5 + #include #include #include