mirror of
https://github.com/torvalds/linux.git
synced 2024-11-24 21:21:41 +00:00
- Unrelated x86/cc changes queued here to avoid ugly cross-merges and
conflicts: - Carve out CPU hotplug function declarations into a separate header with the goal to be able to use the lockdep assertions in a more flexible manner - As a result, refactor cacheinfo code after carving out a function to return the cache ID associated with a given cache level - Cleanups - Add support to be able to kexec TDX guests. For that - Expand ACPI MADT CPU offlining support - Add machinery to prepare CoCo guests memory before kexec-ing into a new kernel - Cleanup, readjust and massage related code -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmaVCYoACgkQEsHwGGHe VUoi6g//Up/4vMzcjqzrndXfl0aP+NpK4zNud+ZPP4Qza2yPhKydniMvkWVQ8DTx jQaGk/tJDeFG6ofOzGkmBGyuZzuO4D7E0XFyXZZeVgSvdk2Af5vaWu1D3e4i4MiM Ox4H8NtWnC4MozP0hos4qB0vtYaBWVJkNvIXDVF6162zLwEmbuyrpFe3glscwIxv hMZR/C47RHcEeOb7yA4m/gJ+AqMe9OKradoNJkkfDpnYr6CYsbmpY09or2WYuvoI 0gevkIe6Q9HMcq3CQl6/pR8IgbA5VmGi7iCiE1ihgTPwR3AaU8llzBqYdSgezFrk 68A7oGeUZQeifQgjwkreZclMtsGEeGWVOB0Bh3Jgr6uaWGFXtpydi/hc73wbTz+F IazKQcKQYjaPW/9UG+0+cFTQlCgQ+WxwqAsN1uqzL6gMgmC9B+TM//xzk5nVxpOd ouf8T85tyceIPCKepGE/bWEHYYCjfbqBMyQT6RHmxUKbb1/PIsbzN26cenkZmPXT cpwurWVG7mRQJRqTrsS+D+opP1h/jOdkpwGlBfl1s0sX6RZuMFBk+7TlMMs61Cyo PWtrLV7Dr369cuXE72wIgfBAao2AS8kFshc7Atokq7/XfL9cCWHeqIcu7yvParP5 WY43YQv8XPGI7ZnPqULByTY0Wxg8TFk8whamx97kEp8uy2HmbQU= =k+T+ -----END PGP SIGNATURE----- Merge tag 'x86_cc_for_v6.11_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 confidential computing updates from Borislav Petkov: "Unrelated x86/cc changes queued here to avoid ugly cross-merges and conflicts: - Carve out CPU hotplug function declarations into a separate header with the goal to be able to use the lockdep assertions in a more flexible manner - As a result, refactor cacheinfo code after carving out a function to return the cache ID associated with a given cache level - Cleanups Add support to be able to kexec TDX guests: - Expand ACPI MADT CPU offlining support - Add machinery to prepare CoCo guests memory before kexec-ing into a new kernel - Cleanup, readjust and massage related code" * tag 'x86_cc_for_v6.11_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits) ACPI: tables: Print MULTIPROC_WAKEUP when MADT is parsed x86/acpi: Add support for CPU offlining for ACPI MADT wakeup method x86/mm: Introduce kernel_ident_mapping_free() x86/smp: Add smp_ops.stop_this_cpu() callback x86/acpi: Do not attempt to bring up secondary CPUs in the kexec case x86/acpi: Rename fields in the acpi_madt_multiproc_wakeup structure x86/mm: Do not zap page table entries mapping unaccepted memory table during kdump x86/mm: Make e820__end_ram_pfn() cover E820_TYPE_ACPI ranges x86/tdx: Convert shared memory back to private on kexec x86/mm: Add callbacks to prepare encrypted memory for kexec x86/tdx: Account shared memory x86/mm: Return correct level from lookup_address() if pte is none x86/mm: Make x86_platform.guest.enc_status_change_*() return an error x86/kexec: Keep CR4.MCE set during kexec for TDX guest x86/relocate_kernel: Use named labels for less confusion cpu/hotplug, x86/acpi: Disable CPU offlining for ACPI MADT wakeup cpu/hotplug: Add support for declaring CPU offlining not supported x86/apic: Mark acpi_mp_wake_* variables as __ro_after_init x86/acpi: Extract ACPI MADT wakeup code into a separate file x86/kexec: Remove spurious unconditional JMP from from identity_mapped() ...
This commit is contained in:
commit
98896d8795
@ -1118,6 +1118,13 @@ config X86_LOCAL_APIC
|
||||
depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI
|
||||
select IRQ_DOMAIN_HIERARCHY
|
||||
|
||||
config ACPI_MADT_WAKEUP
|
||||
def_bool y
|
||||
depends on X86_64
|
||||
depends on ACPI
|
||||
depends on SMP
|
||||
depends on X86_LOCAL_APIC
|
||||
|
||||
config X86_IO_APIC
|
||||
def_bool y
|
||||
depends on X86_LOCAL_APIC || X86_UP_IOAPIC
|
||||
|
@ -29,7 +29,6 @@ static bool noinstr intel_cc_platform_has(enum cc_attr attr)
|
||||
{
|
||||
switch (attr) {
|
||||
case CC_ATTR_GUEST_UNROLL_STRING_IO:
|
||||
case CC_ATTR_HOTPLUG_DISABLED:
|
||||
case CC_ATTR_GUEST_MEM_ENCRYPT:
|
||||
case CC_ATTR_MEM_ENCRYPT:
|
||||
return true;
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <linux/cpufeature.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/kexec.h>
|
||||
#include <asm/coco.h>
|
||||
#include <asm/tdx.h>
|
||||
#include <asm/vmx.h>
|
||||
@ -14,6 +15,7 @@
|
||||
#include <asm/insn.h>
|
||||
#include <asm/insn-eval.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/set_memory.h>
|
||||
|
||||
/* MMIO direction */
|
||||
#define EPT_READ 0
|
||||
@ -38,6 +40,8 @@
|
||||
|
||||
#define TDREPORT_SUBTYPE_0 0
|
||||
|
||||
static atomic_long_t nr_shared;
|
||||
|
||||
/* Called from __tdx_hypercall() for unrecoverable failure */
|
||||
noinstr void __noreturn __tdx_hypercall_failed(void)
|
||||
{
|
||||
@ -798,28 +802,124 @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool tdx_enc_status_change_prepare(unsigned long vaddr, int numpages,
|
||||
bool enc)
|
||||
static int tdx_enc_status_change_prepare(unsigned long vaddr, int numpages,
|
||||
bool enc)
|
||||
{
|
||||
/*
|
||||
* Only handle shared->private conversion here.
|
||||
* See the comment in tdx_early_init().
|
||||
*/
|
||||
if (enc)
|
||||
return tdx_enc_status_changed(vaddr, numpages, enc);
|
||||
return true;
|
||||
if (enc && !tdx_enc_status_changed(vaddr, numpages, enc))
|
||||
return -EIO;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool tdx_enc_status_change_finish(unsigned long vaddr, int numpages,
|
||||
static int tdx_enc_status_change_finish(unsigned long vaddr, int numpages,
|
||||
bool enc)
|
||||
{
|
||||
/*
|
||||
* Only handle private->shared conversion here.
|
||||
* See the comment in tdx_early_init().
|
||||
*/
|
||||
if (!enc)
|
||||
return tdx_enc_status_changed(vaddr, numpages, enc);
|
||||
return true;
|
||||
if (!enc && !tdx_enc_status_changed(vaddr, numpages, enc))
|
||||
return -EIO;
|
||||
|
||||
if (enc)
|
||||
atomic_long_sub(numpages, &nr_shared);
|
||||
else
|
||||
atomic_long_add(numpages, &nr_shared);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Stop new private<->shared conversions */
|
||||
static void tdx_kexec_begin(void)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_KEXEC_CORE))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Crash kernel reaches here with interrupts disabled: can't wait for
|
||||
* conversions to finish.
|
||||
*
|
||||
* If race happened, just report and proceed.
|
||||
*/
|
||||
if (!set_memory_enc_stop_conversion())
|
||||
pr_warn("Failed to stop shared<->private conversions\n");
|
||||
}
|
||||
|
||||
/* Walk direct mapping and convert all shared memory back to private */
|
||||
static void tdx_kexec_finish(void)
|
||||
{
|
||||
unsigned long addr, end;
|
||||
long found = 0, shared;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_KEXEC_CORE))
|
||||
return;
|
||||
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
addr = PAGE_OFFSET;
|
||||
end = PAGE_OFFSET + get_max_mapped();
|
||||
|
||||
while (addr < end) {
|
||||
unsigned long size;
|
||||
unsigned int level;
|
||||
pte_t *pte;
|
||||
|
||||
pte = lookup_address(addr, &level);
|
||||
size = page_level_size(level);
|
||||
|
||||
if (pte && pte_decrypted(*pte)) {
|
||||
int pages = size / PAGE_SIZE;
|
||||
|
||||
/*
|
||||
* Touching memory with shared bit set triggers implicit
|
||||
* conversion to shared.
|
||||
*
|
||||
* Make sure nobody touches the shared range from
|
||||
* now on.
|
||||
*/
|
||||
set_pte(pte, __pte(0));
|
||||
|
||||
/*
|
||||
* Memory encryption state persists across kexec.
|
||||
* If tdx_enc_status_changed() fails in the first
|
||||
* kernel, it leaves memory in an unknown state.
|
||||
*
|
||||
* If that memory remains shared, accessing it in the
|
||||
* *next* kernel through a private mapping will result
|
||||
* in an unrecoverable guest shutdown.
|
||||
*
|
||||
* The kdump kernel boot is not impacted as it uses
|
||||
* a pre-reserved memory range that is always private.
|
||||
* However, gathering crash information could lead to
|
||||
* a crash if it accesses unconverted memory through
|
||||
* a private mapping which is possible when accessing
|
||||
* that memory through /proc/vmcore, for example.
|
||||
*
|
||||
* In all cases, print error info in order to leave
|
||||
* enough bread crumbs for debugging.
|
||||
*/
|
||||
if (!tdx_enc_status_changed(addr, pages, true)) {
|
||||
pr_err("Failed to unshare range %#lx-%#lx\n",
|
||||
addr, addr + size);
|
||||
}
|
||||
|
||||
found += pages;
|
||||
}
|
||||
|
||||
addr += size;
|
||||
}
|
||||
|
||||
__flush_tlb_all();
|
||||
|
||||
shared = atomic_long_read(&nr_shared);
|
||||
if (shared != found) {
|
||||
pr_err("shared page accounting is off\n");
|
||||
pr_err("nr_shared = %ld, nr_found = %ld\n", shared, found);
|
||||
}
|
||||
}
|
||||
|
||||
void __init tdx_early_init(void)
|
||||
@ -881,6 +981,9 @@ void __init tdx_early_init(void)
|
||||
x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required;
|
||||
x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
|
||||
|
||||
x86_platform.guest.enc_kexec_begin = tdx_kexec_begin;
|
||||
x86_platform.guest.enc_kexec_finish = tdx_kexec_finish;
|
||||
|
||||
/*
|
||||
* TDX intercepts the RDMSR to read the X2APIC ID in the parallel
|
||||
* bringup low level code. That raises #VE which cannot be handled
|
||||
|
@ -523,9 +523,9 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
|
||||
* transition is complete, hv_vtom_set_host_visibility() marks the pages
|
||||
* as "present" again.
|
||||
*/
|
||||
static bool hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc)
|
||||
static int hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc)
|
||||
{
|
||||
return !set_memory_np(kbuffer, pagecount);
|
||||
return set_memory_np(kbuffer, pagecount);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -536,20 +536,19 @@ static bool hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc
|
||||
* with host. This function works as wrap of hv_mark_gpa_visibility()
|
||||
* with memory base and size.
|
||||
*/
|
||||
static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bool enc)
|
||||
static int hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bool enc)
|
||||
{
|
||||
enum hv_mem_host_visibility visibility = enc ?
|
||||
VMBUS_PAGE_NOT_VISIBLE : VMBUS_PAGE_VISIBLE_READ_WRITE;
|
||||
u64 *pfn_array;
|
||||
phys_addr_t paddr;
|
||||
int i, pfn, err;
|
||||
void *vaddr;
|
||||
int ret = 0;
|
||||
bool result = true;
|
||||
int i, pfn;
|
||||
|
||||
pfn_array = kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
|
||||
if (!pfn_array) {
|
||||
result = false;
|
||||
ret = -ENOMEM;
|
||||
goto err_set_memory_p;
|
||||
}
|
||||
|
||||
@ -568,10 +567,8 @@ static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bo
|
||||
if (pfn == HV_MAX_MODIFY_GPA_REP_COUNT || i == pagecount - 1) {
|
||||
ret = hv_mark_gpa_visibility(pfn, pfn_array,
|
||||
visibility);
|
||||
if (ret) {
|
||||
result = false;
|
||||
if (ret)
|
||||
goto err_free_pfn_array;
|
||||
}
|
||||
pfn = 0;
|
||||
}
|
||||
}
|
||||
@ -586,10 +583,11 @@ err_set_memory_p:
|
||||
* order to avoid leaving the memory range in a "broken" state. Setting
|
||||
* the PRESENT bits shouldn't fail, but return an error if it does.
|
||||
*/
|
||||
if (set_memory_p(kbuffer, pagecount))
|
||||
result = false;
|
||||
err = set_memory_p(kbuffer, pagecount);
|
||||
if (err && !ret)
|
||||
ret = err;
|
||||
|
||||
return result;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool hv_vtom_tlb_flush_required(bool private)
|
||||
|
@ -78,6 +78,13 @@ static inline bool acpi_skip_set_wakeup_address(void)
|
||||
|
||||
#define acpi_skip_set_wakeup_address acpi_skip_set_wakeup_address
|
||||
|
||||
union acpi_subtable_headers;
|
||||
|
||||
int __init acpi_parse_mp_wake(union acpi_subtable_headers *header,
|
||||
const unsigned long end);
|
||||
|
||||
void asm_acpi_mp_play_dead(u64 reset_vector, u64 pgd_pa);
|
||||
|
||||
/*
|
||||
* Check if the CPU can handle C2 and deeper
|
||||
*/
|
||||
|
@ -6,6 +6,7 @@
|
||||
|
||||
struct x86_mapping_info {
|
||||
void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
|
||||
void (*free_pgt_page)(void *, void *); /* free buf for page table */
|
||||
void *context; /* context for alloc_pgt_page */
|
||||
unsigned long page_flag; /* page flag for PMD or PUD entry */
|
||||
unsigned long offset; /* ident mapping offset */
|
||||
@ -16,4 +17,6 @@ struct x86_mapping_info {
|
||||
int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
|
||||
unsigned long pstart, unsigned long pend);
|
||||
|
||||
void kernel_ident_mapping_free(struct x86_mapping_info *info, pgd_t *pgd);
|
||||
|
||||
#endif /* _ASM_X86_INIT_H */
|
||||
|
@ -140,6 +140,11 @@ static inline int pte_young(pte_t pte)
|
||||
return pte_flags(pte) & _PAGE_ACCESSED;
|
||||
}
|
||||
|
||||
static inline bool pte_decrypted(pte_t pte)
|
||||
{
|
||||
return cc_mkdec(pte_val(pte)) == pte_val(pte);
|
||||
}
|
||||
|
||||
#define pmd_dirty pmd_dirty
|
||||
static inline bool pmd_dirty(pmd_t pmd)
|
||||
{
|
||||
|
@ -549,6 +549,7 @@ enum pg_level {
|
||||
PG_LEVEL_2M,
|
||||
PG_LEVEL_1G,
|
||||
PG_LEVEL_512G,
|
||||
PG_LEVEL_256T,
|
||||
PG_LEVEL_NUM
|
||||
};
|
||||
|
||||
|
@ -49,8 +49,11 @@ int set_memory_wb(unsigned long addr, int numpages);
|
||||
int set_memory_np(unsigned long addr, int numpages);
|
||||
int set_memory_p(unsigned long addr, int numpages);
|
||||
int set_memory_4k(unsigned long addr, int numpages);
|
||||
|
||||
bool set_memory_enc_stop_conversion(void);
|
||||
int set_memory_encrypted(unsigned long addr, int numpages);
|
||||
int set_memory_decrypted(unsigned long addr, int numpages);
|
||||
|
||||
int set_memory_np_noalias(unsigned long addr, int numpages);
|
||||
int set_memory_nonglobal(unsigned long addr, int numpages);
|
||||
int set_memory_global(unsigned long addr, int numpages);
|
||||
|
@ -35,6 +35,7 @@ struct smp_ops {
|
||||
int (*cpu_disable)(void);
|
||||
void (*cpu_die)(unsigned int cpu);
|
||||
void (*play_dead)(void);
|
||||
void (*stop_this_cpu)(void);
|
||||
|
||||
void (*send_call_func_ipi)(const struct cpumask *mask);
|
||||
void (*send_call_func_single_ipi)(int cpu);
|
||||
|
@ -149,12 +149,22 @@ struct x86_init_acpi {
|
||||
* @enc_status_change_finish Notify HV after the encryption status of a range is changed
|
||||
* @enc_tlb_flush_required Returns true if a TLB flush is needed before changing page encryption status
|
||||
* @enc_cache_flush_required Returns true if a cache flush is needed before changing page encryption status
|
||||
* @enc_kexec_begin Begin the two-step process of converting shared memory back
|
||||
* to private. It stops the new conversions from being started
|
||||
* and waits in-flight conversions to finish, if possible.
|
||||
* @enc_kexec_finish Finish the two-step process of converting shared memory to
|
||||
* private. All memory is private after the call when
|
||||
* the function returns.
|
||||
* It is called on only one CPU while the others are shut down
|
||||
* and with interrupts disabled.
|
||||
*/
|
||||
struct x86_guest {
|
||||
bool (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc);
|
||||
bool (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc);
|
||||
int (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc);
|
||||
int (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc);
|
||||
bool (*enc_tlb_flush_required)(bool enc);
|
||||
bool (*enc_cache_flush_required)(void);
|
||||
void (*enc_kexec_begin)(void);
|
||||
void (*enc_kexec_finish)(void);
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -4,6 +4,7 @@ obj-$(CONFIG_ACPI) += boot.o
|
||||
obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o
|
||||
obj-$(CONFIG_ACPI_APEI) += apei.o
|
||||
obj-$(CONFIG_ACPI_CPPC_LIB) += cppc.o
|
||||
obj-$(CONFIG_ACPI_MADT_WAKEUP) += madt_wakeup.o madt_playdead.o
|
||||
|
||||
ifneq ($(CONFIG_ACPI_PROCESSOR),)
|
||||
obj-y += cstate.o
|
||||
|
@ -67,13 +67,6 @@ static bool has_lapic_cpus __initdata;
|
||||
static bool acpi_support_online_capable;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* Physical address of the Multiprocessor Wakeup Structure mailbox */
|
||||
static u64 acpi_mp_wake_mailbox_paddr;
|
||||
/* Virtual address of the Multiprocessor Wakeup Structure mailbox */
|
||||
static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
/*
|
||||
* Locks related to IOAPIC hotplug
|
||||
@ -341,60 +334,6 @@ acpi_parse_lapic_nmi(union acpi_subtable_headers * header, const unsigned long e
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static int acpi_wakeup_cpu(u32 apicid, unsigned long start_ip)
|
||||
{
|
||||
/*
|
||||
* Remap mailbox memory only for the first call to acpi_wakeup_cpu().
|
||||
*
|
||||
* Wakeup of secondary CPUs is fully serialized in the core code.
|
||||
* No need to protect acpi_mp_wake_mailbox from concurrent accesses.
|
||||
*/
|
||||
if (!acpi_mp_wake_mailbox) {
|
||||
acpi_mp_wake_mailbox = memremap(acpi_mp_wake_mailbox_paddr,
|
||||
sizeof(*acpi_mp_wake_mailbox),
|
||||
MEMREMAP_WB);
|
||||
}
|
||||
|
||||
/*
|
||||
* Mailbox memory is shared between the firmware and OS. Firmware will
|
||||
* listen on mailbox command address, and once it receives the wakeup
|
||||
* command, the CPU associated with the given apicid will be booted.
|
||||
*
|
||||
* The value of 'apic_id' and 'wakeup_vector' must be visible to the
|
||||
* firmware before the wakeup command is visible. smp_store_release()
|
||||
* ensures ordering and visibility.
|
||||
*/
|
||||
acpi_mp_wake_mailbox->apic_id = apicid;
|
||||
acpi_mp_wake_mailbox->wakeup_vector = start_ip;
|
||||
smp_store_release(&acpi_mp_wake_mailbox->command,
|
||||
ACPI_MP_WAKE_COMMAND_WAKEUP);
|
||||
|
||||
/*
|
||||
* Wait for the CPU to wake up.
|
||||
*
|
||||
* The CPU being woken up is essentially in a spin loop waiting to be
|
||||
* woken up. It should not take long for it wake up and acknowledge by
|
||||
* zeroing out ->command.
|
||||
*
|
||||
* ACPI specification doesn't provide any guidance on how long kernel
|
||||
* has to wait for a wake up acknowledgement. It also doesn't provide
|
||||
* a way to cancel a wake up request if it takes too long.
|
||||
*
|
||||
* In TDX environment, the VMM has control over how long it takes to
|
||||
* wake up secondary. It can postpone scheduling secondary vCPU
|
||||
* indefinitely. Giving up on wake up request and reporting error opens
|
||||
* possible attack vector for VMM: it can wake up a secondary CPU when
|
||||
* kernel doesn't expect it. Wait until positive result of the wake up
|
||||
* request.
|
||||
*/
|
||||
while (READ_ONCE(acpi_mp_wake_mailbox->command))
|
||||
cpu_relax();
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_X86_64 */
|
||||
#endif /* CONFIG_X86_LOCAL_APIC */
|
||||
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
@ -1124,29 +1063,6 @@ static int __init acpi_parse_madt_lapic_entries(void)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static int __init acpi_parse_mp_wake(union acpi_subtable_headers *header,
|
||||
const unsigned long end)
|
||||
{
|
||||
struct acpi_madt_multiproc_wakeup *mp_wake;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_SMP))
|
||||
return -ENODEV;
|
||||
|
||||
mp_wake = (struct acpi_madt_multiproc_wakeup *)header;
|
||||
if (BAD_MADT_ENTRY(mp_wake, end))
|
||||
return -EINVAL;
|
||||
|
||||
acpi_table_print_madt_entry(&header->common);
|
||||
|
||||
acpi_mp_wake_mailbox_paddr = mp_wake->base_address;
|
||||
|
||||
apic_update_callback(wakeup_secondary_cpu_64, acpi_wakeup_cpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_X86_64 */
|
||||
#endif /* CONFIG_X86_LOCAL_APIC */
|
||||
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
@ -1343,7 +1259,7 @@ static void __init acpi_process_madt(void)
|
||||
smp_found_config = 1;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#ifdef CONFIG_ACPI_MADT_WAKEUP
|
||||
/*
|
||||
* Parse MADT MP Wake entry.
|
||||
*/
|
||||
|
28
arch/x86/kernel/acpi/madt_playdead.S
Normal file
28
arch/x86/kernel/acpi/madt_playdead.S
Normal file
@ -0,0 +1,28 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/nospec-branch.h>
|
||||
#include <asm/page_types.h>
|
||||
#include <asm/processor-flags.h>
|
||||
|
||||
.text
|
||||
.align PAGE_SIZE
|
||||
|
||||
/*
|
||||
* asm_acpi_mp_play_dead() - Hand over control of the CPU to the BIOS
|
||||
*
|
||||
* rdi: Address of the ACPI MADT MPWK ResetVector
|
||||
* rsi: PGD of the identity mapping
|
||||
*/
|
||||
SYM_FUNC_START(asm_acpi_mp_play_dead)
|
||||
/* Turn off global entries. Following CR3 write will flush them. */
|
||||
movq %cr4, %rdx
|
||||
andq $~(X86_CR4_PGE), %rdx
|
||||
movq %rdx, %cr4
|
||||
|
||||
/* Switch to identity mapping */
|
||||
movq %rsi, %cr3
|
||||
|
||||
/* Jump to reset vector */
|
||||
ANNOTATE_RETPOLINE_SAFE
|
||||
jmp *%rdi
|
||||
SYM_FUNC_END(asm_acpi_mp_play_dead)
|
292
arch/x86/kernel/acpi/madt_wakeup.c
Normal file
292
arch/x86/kernel/acpi/madt_wakeup.c
Normal file
@ -0,0 +1,292 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/kexec.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/pgtable.h>
|
||||
#include <linux/sched/hotplug.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/barrier.h>
|
||||
#include <asm/init.h>
|
||||
#include <asm/intel_pt.h>
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/reboot.h>
|
||||
|
||||
/* Physical address of the Multiprocessor Wakeup Structure mailbox */
|
||||
static u64 acpi_mp_wake_mailbox_paddr __ro_after_init;
|
||||
|
||||
/* Virtual address of the Multiprocessor Wakeup Structure mailbox */
|
||||
static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox __ro_after_init;
|
||||
|
||||
static u64 acpi_mp_pgd __ro_after_init;
|
||||
static u64 acpi_mp_reset_vector_paddr __ro_after_init;
|
||||
|
||||
static void acpi_mp_stop_this_cpu(void)
|
||||
{
|
||||
asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr, acpi_mp_pgd);
|
||||
}
|
||||
|
||||
static void acpi_mp_play_dead(void)
|
||||
{
|
||||
play_dead_common();
|
||||
asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr, acpi_mp_pgd);
|
||||
}
|
||||
|
||||
static void acpi_mp_cpu_die(unsigned int cpu)
|
||||
{
|
||||
u32 apicid = per_cpu(x86_cpu_to_apicid, cpu);
|
||||
unsigned long timeout;
|
||||
|
||||
/*
|
||||
* Use TEST mailbox command to prove that BIOS got control over
|
||||
* the CPU before declaring it dead.
|
||||
*
|
||||
* BIOS has to clear 'command' field of the mailbox.
|
||||
*/
|
||||
acpi_mp_wake_mailbox->apic_id = apicid;
|
||||
smp_store_release(&acpi_mp_wake_mailbox->command,
|
||||
ACPI_MP_WAKE_COMMAND_TEST);
|
||||
|
||||
/* Don't wait longer than a second. */
|
||||
timeout = USEC_PER_SEC;
|
||||
while (READ_ONCE(acpi_mp_wake_mailbox->command) && --timeout)
|
||||
udelay(1);
|
||||
|
||||
if (!timeout)
|
||||
pr_err("Failed to hand over CPU %d to BIOS\n", cpu);
|
||||
}
|
||||
|
||||
/* The argument is required to match type of x86_mapping_info::alloc_pgt_page */
|
||||
static void __init *alloc_pgt_page(void *dummy)
|
||||
{
|
||||
return memblock_alloc(PAGE_SIZE, PAGE_SIZE);
|
||||
}
|
||||
|
||||
static void __init free_pgt_page(void *pgt, void *dummy)
|
||||
{
|
||||
return memblock_free(pgt, PAGE_SIZE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure asm_acpi_mp_play_dead() is present in the identity mapping at
|
||||
* the same place as in the kernel page tables. asm_acpi_mp_play_dead() switches
|
||||
* to the identity mapping and the function has be present at the same spot in
|
||||
* the virtual address space before and after switching page tables.
|
||||
*/
|
||||
static int __init init_transition_pgtable(pgd_t *pgd)
|
||||
{
|
||||
pgprot_t prot = PAGE_KERNEL_EXEC_NOENC;
|
||||
unsigned long vaddr, paddr;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
|
||||
vaddr = (unsigned long)asm_acpi_mp_play_dead;
|
||||
pgd += pgd_index(vaddr);
|
||||
if (!pgd_present(*pgd)) {
|
||||
p4d = (p4d_t *)alloc_pgt_page(NULL);
|
||||
if (!p4d)
|
||||
return -ENOMEM;
|
||||
set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE));
|
||||
}
|
||||
p4d = p4d_offset(pgd, vaddr);
|
||||
if (!p4d_present(*p4d)) {
|
||||
pud = (pud_t *)alloc_pgt_page(NULL);
|
||||
if (!pud)
|
||||
return -ENOMEM;
|
||||
set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
|
||||
}
|
||||
pud = pud_offset(p4d, vaddr);
|
||||
if (!pud_present(*pud)) {
|
||||
pmd = (pmd_t *)alloc_pgt_page(NULL);
|
||||
if (!pmd)
|
||||
return -ENOMEM;
|
||||
set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
|
||||
}
|
||||
pmd = pmd_offset(pud, vaddr);
|
||||
if (!pmd_present(*pmd)) {
|
||||
pte = (pte_t *)alloc_pgt_page(NULL);
|
||||
if (!pte)
|
||||
return -ENOMEM;
|
||||
set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
|
||||
}
|
||||
pte = pte_offset_kernel(pmd, vaddr);
|
||||
|
||||
paddr = __pa(vaddr);
|
||||
set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init acpi_mp_setup_reset(u64 reset_vector)
|
||||
{
|
||||
struct x86_mapping_info info = {
|
||||
.alloc_pgt_page = alloc_pgt_page,
|
||||
.free_pgt_page = free_pgt_page,
|
||||
.page_flag = __PAGE_KERNEL_LARGE_EXEC,
|
||||
.kernpg_flag = _KERNPG_TABLE_NOENC,
|
||||
};
|
||||
pgd_t *pgd;
|
||||
|
||||
pgd = alloc_pgt_page(NULL);
|
||||
if (!pgd)
|
||||
return -ENOMEM;
|
||||
|
||||
for (int i = 0; i < nr_pfn_mapped; i++) {
|
||||
unsigned long mstart, mend;
|
||||
|
||||
mstart = pfn_mapped[i].start << PAGE_SHIFT;
|
||||
mend = pfn_mapped[i].end << PAGE_SHIFT;
|
||||
if (kernel_ident_mapping_init(&info, pgd, mstart, mend)) {
|
||||
kernel_ident_mapping_free(&info, pgd);
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
|
||||
if (kernel_ident_mapping_init(&info, pgd,
|
||||
PAGE_ALIGN_DOWN(reset_vector),
|
||||
PAGE_ALIGN(reset_vector + 1))) {
|
||||
kernel_ident_mapping_free(&info, pgd);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (init_transition_pgtable(pgd)) {
|
||||
kernel_ident_mapping_free(&info, pgd);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
smp_ops.play_dead = acpi_mp_play_dead;
|
||||
smp_ops.stop_this_cpu = acpi_mp_stop_this_cpu;
|
||||
smp_ops.cpu_die = acpi_mp_cpu_die;
|
||||
|
||||
acpi_mp_reset_vector_paddr = reset_vector;
|
||||
acpi_mp_pgd = __pa(pgd);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int acpi_wakeup_cpu(u32 apicid, unsigned long start_ip)
|
||||
{
|
||||
if (!acpi_mp_wake_mailbox_paddr) {
|
||||
pr_warn_once("No MADT mailbox: cannot bringup secondary CPUs. Booting with kexec?\n");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remap mailbox memory only for the first call to acpi_wakeup_cpu().
|
||||
*
|
||||
* Wakeup of secondary CPUs is fully serialized in the core code.
|
||||
* No need to protect acpi_mp_wake_mailbox from concurrent accesses.
|
||||
*/
|
||||
if (!acpi_mp_wake_mailbox) {
|
||||
acpi_mp_wake_mailbox = memremap(acpi_mp_wake_mailbox_paddr,
|
||||
sizeof(*acpi_mp_wake_mailbox),
|
||||
MEMREMAP_WB);
|
||||
}
|
||||
|
||||
/*
|
||||
* Mailbox memory is shared between the firmware and OS. Firmware will
|
||||
* listen on mailbox command address, and once it receives the wakeup
|
||||
* command, the CPU associated with the given apicid will be booted.
|
||||
*
|
||||
* The value of 'apic_id' and 'wakeup_vector' must be visible to the
|
||||
* firmware before the wakeup command is visible. smp_store_release()
|
||||
* ensures ordering and visibility.
|
||||
*/
|
||||
acpi_mp_wake_mailbox->apic_id = apicid;
|
||||
acpi_mp_wake_mailbox->wakeup_vector = start_ip;
|
||||
smp_store_release(&acpi_mp_wake_mailbox->command,
|
||||
ACPI_MP_WAKE_COMMAND_WAKEUP);
|
||||
|
||||
/*
|
||||
* Wait for the CPU to wake up.
|
||||
*
|
||||
* The CPU being woken up is essentially in a spin loop waiting to be
|
||||
* woken up. It should not take long for it wake up and acknowledge by
|
||||
* zeroing out ->command.
|
||||
*
|
||||
* ACPI specification doesn't provide any guidance on how long kernel
|
||||
* has to wait for a wake up acknowledgment. It also doesn't provide
|
||||
* a way to cancel a wake up request if it takes too long.
|
||||
*
|
||||
* In TDX environment, the VMM has control over how long it takes to
|
||||
* wake up secondary. It can postpone scheduling secondary vCPU
|
||||
* indefinitely. Giving up on wake up request and reporting error opens
|
||||
* possible attack vector for VMM: it can wake up a secondary CPU when
|
||||
* kernel doesn't expect it. Wait until positive result of the wake up
|
||||
* request.
|
||||
*/
|
||||
while (READ_ONCE(acpi_mp_wake_mailbox->command))
|
||||
cpu_relax();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void acpi_mp_disable_offlining(struct acpi_madt_multiproc_wakeup *mp_wake)
|
||||
{
|
||||
cpu_hotplug_disable_offlining();
|
||||
|
||||
/*
|
||||
* ACPI MADT doesn't allow to offline a CPU after it was onlined. This
|
||||
* limits kexec: the second kernel won't be able to use more than one CPU.
|
||||
*
|
||||
* To prevent a kexec kernel from onlining secondary CPUs invalidate the
|
||||
* mailbox address in the ACPI MADT wakeup structure which prevents a
|
||||
* kexec kernel to use it.
|
||||
*
|
||||
* This is safe as the booting kernel has the mailbox address cached
|
||||
* already and acpi_wakeup_cpu() uses the cached value to bring up the
|
||||
* secondary CPUs.
|
||||
*
|
||||
* Note: This is a Linux specific convention and not covered by the
|
||||
* ACPI specification.
|
||||
*/
|
||||
mp_wake->mailbox_address = 0;
|
||||
}
|
||||
|
||||
int __init acpi_parse_mp_wake(union acpi_subtable_headers *header,
|
||||
const unsigned long end)
|
||||
{
|
||||
struct acpi_madt_multiproc_wakeup *mp_wake;
|
||||
|
||||
mp_wake = (struct acpi_madt_multiproc_wakeup *)header;
|
||||
|
||||
/*
|
||||
* Cannot use the standard BAD_MADT_ENTRY() to sanity check the @mp_wake
|
||||
* entry. 'sizeof (struct acpi_madt_multiproc_wakeup)' can be larger
|
||||
* than the actual size of the MP wakeup entry in ACPI table because the
|
||||
* 'reset_vector' is only available in the V1 MP wakeup structure.
|
||||
*/
|
||||
if (!mp_wake)
|
||||
return -EINVAL;
|
||||
if (end - (unsigned long)mp_wake < ACPI_MADT_MP_WAKEUP_SIZE_V0)
|
||||
return -EINVAL;
|
||||
if (mp_wake->header.length < ACPI_MADT_MP_WAKEUP_SIZE_V0)
|
||||
return -EINVAL;
|
||||
|
||||
acpi_table_print_madt_entry(&header->common);
|
||||
|
||||
acpi_mp_wake_mailbox_paddr = mp_wake->mailbox_address;
|
||||
|
||||
if (mp_wake->version >= ACPI_MADT_MP_WAKEUP_VERSION_V1 &&
|
||||
mp_wake->header.length >= ACPI_MADT_MP_WAKEUP_SIZE_V1) {
|
||||
if (acpi_mp_setup_reset(mp_wake->reset_vector)) {
|
||||
pr_warn("Failed to setup MADT reset vector\n");
|
||||
acpi_mp_disable_offlining(mp_wake);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* CPU offlining requires version 1 of the ACPI MADT wakeup
|
||||
* structure.
|
||||
*/
|
||||
acpi_mp_disable_offlining(mp_wake);
|
||||
}
|
||||
|
||||
apic_update_callback(wakeup_secondary_cpu_64, acpi_wakeup_cpu);
|
||||
|
||||
return 0;
|
||||
}
|
@ -292,9 +292,8 @@ static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
|
||||
*/
|
||||
static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
|
||||
{
|
||||
struct cpu_cacheinfo *ci;
|
||||
struct cacheinfo *ci;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
/* Pick the first cpu we find that is associated with the cache. */
|
||||
plr->cpu = cpumask_first(&plr->d->cpu_mask);
|
||||
@ -306,15 +305,11 @@ static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
|
||||
goto out_region;
|
||||
}
|
||||
|
||||
ci = get_cpu_cacheinfo(plr->cpu);
|
||||
|
||||
plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm);
|
||||
|
||||
for (i = 0; i < ci->num_leaves; i++) {
|
||||
if (ci->info_list[i].level == plr->s->res->cache_level) {
|
||||
plr->line_size = ci->info_list[i].coherency_line_size;
|
||||
return 0;
|
||||
}
|
||||
ci = get_cpu_cacheinfo_level(plr->cpu, plr->s->res->cache_level);
|
||||
if (ci) {
|
||||
plr->line_size = ci->coherency_line_size;
|
||||
plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = -1;
|
||||
|
@ -1450,18 +1450,14 @@ out:
|
||||
unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
|
||||
struct rdt_domain *d, unsigned long cbm)
|
||||
{
|
||||
struct cpu_cacheinfo *ci;
|
||||
unsigned int size = 0;
|
||||
int num_b, i;
|
||||
struct cacheinfo *ci;
|
||||
int num_b;
|
||||
|
||||
num_b = bitmap_weight(&cbm, r->cache.cbm_len);
|
||||
ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
|
||||
for (i = 0; i < ci->num_leaves; i++) {
|
||||
if (ci->info_list[i].level == r->cache_level) {
|
||||
size = ci->info_list[i].size / r->cache.cbm_len * num_b;
|
||||
break;
|
||||
}
|
||||
}
|
||||
ci = get_cpu_cacheinfo_level(cpumask_any(&d->cpu_mask), r->cache_level);
|
||||
if (ci)
|
||||
size = ci->size / r->cache.cbm_len * num_b;
|
||||
|
||||
return size;
|
||||
}
|
||||
|
@ -128,6 +128,18 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
|
||||
#ifdef CONFIG_HPET_TIMER
|
||||
hpet_disable();
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Non-crash kexec calls enc_kexec_begin() while scheduling is still
|
||||
* active. This allows the callback to wait until all in-flight
|
||||
* shared<->private conversions are complete. In a crash scenario,
|
||||
* enc_kexec_begin() gets called after all but one CPU have been shut
|
||||
* down and interrupts have been disabled. This allows the callback to
|
||||
* detect a race with the conversion and report it.
|
||||
*/
|
||||
x86_platform.guest.enc_kexec_begin();
|
||||
x86_platform.guest.enc_kexec_finish();
|
||||
|
||||
crash_save_cpu(regs, safe_smp_processor_id());
|
||||
}
|
||||
|
||||
|
@ -828,7 +828,7 @@ u64 __init e820__memblock_alloc_reserved(u64 size, u64 align)
|
||||
/*
|
||||
* Find the highest page frame number we have available
|
||||
*/
|
||||
static unsigned long __init e820_end_pfn(unsigned long limit_pfn, enum e820_type type)
|
||||
static unsigned long __init e820__end_ram_pfn(unsigned long limit_pfn)
|
||||
{
|
||||
int i;
|
||||
unsigned long last_pfn = 0;
|
||||
@ -839,7 +839,8 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, enum e820_type
|
||||
unsigned long start_pfn;
|
||||
unsigned long end_pfn;
|
||||
|
||||
if (entry->type != type)
|
||||
if (entry->type != E820_TYPE_RAM &&
|
||||
entry->type != E820_TYPE_ACPI)
|
||||
continue;
|
||||
|
||||
start_pfn = entry->addr >> PAGE_SHIFT;
|
||||
@ -865,12 +866,12 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, enum e820_type
|
||||
|
||||
unsigned long __init e820__end_of_ram_pfn(void)
|
||||
{
|
||||
return e820_end_pfn(MAX_ARCH_PFN, E820_TYPE_RAM);
|
||||
return e820__end_ram_pfn(MAX_ARCH_PFN);
|
||||
}
|
||||
|
||||
unsigned long __init e820__end_of_low_ram_pfn(void)
|
||||
{
|
||||
return e820_end_pfn(1UL << (32 - PAGE_SHIFT), E820_TYPE_RAM);
|
||||
return e820__end_ram_pfn(1UL << (32 - PAGE_SHIFT));
|
||||
}
|
||||
|
||||
static void __init early_panic(char *msg)
|
||||
|
@ -835,6 +835,13 @@ void __noreturn stop_this_cpu(void *dummy)
|
||||
*/
|
||||
cpumask_clear_cpu(cpu, &cpus_stop_mask);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
if (smp_ops.stop_this_cpu) {
|
||||
smp_ops.stop_this_cpu();
|
||||
unreachable();
|
||||
}
|
||||
#endif
|
||||
|
||||
for (;;) {
|
||||
/*
|
||||
* Use native_halt() so that memory contents don't change
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <linux/delay.h>
|
||||
#include <linux/objtool.h>
|
||||
#include <linux/pgtable.h>
|
||||
#include <linux/kexec.h>
|
||||
#include <acpi/reboot.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/apic.h>
|
||||
@ -716,6 +717,14 @@ static void native_machine_emergency_restart(void)
|
||||
|
||||
void native_machine_shutdown(void)
|
||||
{
|
||||
/*
|
||||
* Call enc_kexec_begin() while all CPUs are still active and
|
||||
* interrupts are enabled. This will allow all in-flight memory
|
||||
* conversions to finish cleanly.
|
||||
*/
|
||||
if (kexec_in_progress)
|
||||
x86_platform.guest.enc_kexec_begin();
|
||||
|
||||
/* Stop the cpus and apics */
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
/*
|
||||
@ -752,6 +761,9 @@ void native_machine_shutdown(void)
|
||||
#ifdef CONFIG_X86_64
|
||||
x86_platform.iommu_shutdown();
|
||||
#endif
|
||||
|
||||
if (kexec_in_progress)
|
||||
x86_platform.guest.enc_kexec_finish();
|
||||
}
|
||||
|
||||
static void __machine_emergency_restart(int emergency)
|
||||
@ -868,6 +880,12 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
|
||||
cpu_emergency_disable_virtualization();
|
||||
|
||||
atomic_dec(&waiting_for_crash_ipi);
|
||||
|
||||
if (smp_ops.stop_this_cpu) {
|
||||
smp_ops.stop_this_cpu();
|
||||
unreachable();
|
||||
}
|
||||
|
||||
/* Assume hlt works */
|
||||
halt();
|
||||
for (;;)
|
||||
|
@ -5,6 +5,8 @@
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/stringify.h>
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/page_types.h>
|
||||
#include <asm/kexec.h>
|
||||
#include <asm/processor-flags.h>
|
||||
@ -145,16 +147,15 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
|
||||
* Set cr4 to a known state:
|
||||
* - physical address extension enabled
|
||||
* - 5-level paging, if it was enabled before
|
||||
* - Machine check exception on TDX guest, if it was enabled before.
|
||||
* Clearing MCE might not be allowed in TDX guests, depending on setup.
|
||||
*
|
||||
* Use R13 that contains the original CR4 value, read in relocate_kernel().
|
||||
* PAE is always set in the original CR4.
|
||||
*/
|
||||
movl $X86_CR4_PAE, %eax
|
||||
testq $X86_CR4_LA57, %r13
|
||||
jz 1f
|
||||
orl $X86_CR4_LA57, %eax
|
||||
1:
|
||||
movq %rax, %cr4
|
||||
|
||||
jmp 1f
|
||||
1:
|
||||
andl $(X86_CR4_PAE | X86_CR4_LA57), %r13d
|
||||
ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST
|
||||
movq %r13, %cr4
|
||||
|
||||
/* Flush the TLB (needed?) */
|
||||
movq %r9, %cr3
|
||||
@ -165,9 +166,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
|
||||
* used by kexec. Flush the caches before copying the kernel.
|
||||
*/
|
||||
testq %r12, %r12
|
||||
jz 1f
|
||||
jz .Lsme_off
|
||||
wbinvd
|
||||
1:
|
||||
.Lsme_off:
|
||||
|
||||
movq %rcx, %r11
|
||||
call swap_pages
|
||||
@ -187,7 +188,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
|
||||
*/
|
||||
|
||||
testq %r11, %r11
|
||||
jnz 1f
|
||||
jnz .Lrelocate
|
||||
xorl %eax, %eax
|
||||
xorl %ebx, %ebx
|
||||
xorl %ecx, %ecx
|
||||
@ -208,7 +209,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
|
||||
ret
|
||||
int3
|
||||
|
||||
1:
|
||||
.Lrelocate:
|
||||
popq %rdx
|
||||
leaq PAGE_SIZE(%r10), %rsp
|
||||
ANNOTATE_RETPOLINE_SAFE
|
||||
|
@ -134,10 +134,12 @@ struct x86_cpuinit_ops x86_cpuinit = {
|
||||
|
||||
static void default_nmi_init(void) { };
|
||||
|
||||
static bool enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { return true; }
|
||||
static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return true; }
|
||||
static int enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { return 0; }
|
||||
static int enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return 0; }
|
||||
static bool enc_tlb_flush_required_noop(bool enc) { return false; }
|
||||
static bool enc_cache_flush_required_noop(void) { return false; }
|
||||
static void enc_kexec_begin_noop(void) {}
|
||||
static void enc_kexec_finish_noop(void) {}
|
||||
static bool is_private_mmio_noop(u64 addr) {return false; }
|
||||
|
||||
struct x86_platform_ops x86_platform __ro_after_init = {
|
||||
@ -161,6 +163,8 @@ struct x86_platform_ops x86_platform __ro_after_init = {
|
||||
.enc_status_change_finish = enc_status_change_finish_noop,
|
||||
.enc_tlb_flush_required = enc_tlb_flush_required_noop,
|
||||
.enc_cache_flush_required = enc_cache_flush_required_noop,
|
||||
.enc_kexec_begin = enc_kexec_begin_noop,
|
||||
.enc_kexec_finish = enc_kexec_finish_noop,
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -4,6 +4,79 @@
|
||||
* included by both the compressed kernel and the regular kernel.
|
||||
*/
|
||||
|
||||
static void free_pte(struct x86_mapping_info *info, pmd_t *pmd)
|
||||
{
|
||||
pte_t *pte = pte_offset_kernel(pmd, 0);
|
||||
|
||||
info->free_pgt_page(pte, info->context);
|
||||
}
|
||||
|
||||
static void free_pmd(struct x86_mapping_info *info, pud_t *pud)
|
||||
{
|
||||
pmd_t *pmd = pmd_offset(pud, 0);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < PTRS_PER_PMD; i++) {
|
||||
if (!pmd_present(pmd[i]))
|
||||
continue;
|
||||
|
||||
if (pmd_leaf(pmd[i]))
|
||||
continue;
|
||||
|
||||
free_pte(info, &pmd[i]);
|
||||
}
|
||||
|
||||
info->free_pgt_page(pmd, info->context);
|
||||
}
|
||||
|
||||
static void free_pud(struct x86_mapping_info *info, p4d_t *p4d)
|
||||
{
|
||||
pud_t *pud = pud_offset(p4d, 0);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < PTRS_PER_PUD; i++) {
|
||||
if (!pud_present(pud[i]))
|
||||
continue;
|
||||
|
||||
if (pud_leaf(pud[i]))
|
||||
continue;
|
||||
|
||||
free_pmd(info, &pud[i]);
|
||||
}
|
||||
|
||||
info->free_pgt_page(pud, info->context);
|
||||
}
|
||||
|
||||
static void free_p4d(struct x86_mapping_info *info, pgd_t *pgd)
|
||||
{
|
||||
p4d_t *p4d = p4d_offset(pgd, 0);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < PTRS_PER_P4D; i++) {
|
||||
if (!p4d_present(p4d[i]))
|
||||
continue;
|
||||
|
||||
free_pud(info, &p4d[i]);
|
||||
}
|
||||
|
||||
if (pgtable_l5_enabled())
|
||||
info->free_pgt_page(p4d, info->context);
|
||||
}
|
||||
|
||||
void kernel_ident_mapping_free(struct x86_mapping_info *info, pgd_t *pgd)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < PTRS_PER_PGD; i++) {
|
||||
if (!pgd_present(pgd[i]))
|
||||
continue;
|
||||
|
||||
free_p4d(info, &pgd[i]);
|
||||
}
|
||||
|
||||
info->free_pgt_page(pgd, info->context);
|
||||
}
|
||||
|
||||
static void ident_pmd_init(struct x86_mapping_info *info, pmd_t *pmd_page,
|
||||
unsigned long addr, unsigned long end)
|
||||
{
|
||||
|
@ -469,7 +469,9 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
|
||||
!e820__mapped_any(paddr & PAGE_MASK, paddr_next,
|
||||
E820_TYPE_RAM) &&
|
||||
!e820__mapped_any(paddr & PAGE_MASK, paddr_next,
|
||||
E820_TYPE_RESERVED_KERN))
|
||||
E820_TYPE_RESERVED_KERN) &&
|
||||
!e820__mapped_any(paddr & PAGE_MASK, paddr_next,
|
||||
E820_TYPE_ACPI))
|
||||
set_pte_init(pte, __pte(0), init);
|
||||
continue;
|
||||
}
|
||||
@ -524,7 +526,9 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
|
||||
!e820__mapped_any(paddr & PMD_MASK, paddr_next,
|
||||
E820_TYPE_RAM) &&
|
||||
!e820__mapped_any(paddr & PMD_MASK, paddr_next,
|
||||
E820_TYPE_RESERVED_KERN))
|
||||
E820_TYPE_RESERVED_KERN) &&
|
||||
!e820__mapped_any(paddr & PMD_MASK, paddr_next,
|
||||
E820_TYPE_ACPI))
|
||||
set_pmd_init(pmd, __pmd(0), init);
|
||||
continue;
|
||||
}
|
||||
@ -611,7 +615,9 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
|
||||
!e820__mapped_any(paddr & PUD_MASK, paddr_next,
|
||||
E820_TYPE_RAM) &&
|
||||
!e820__mapped_any(paddr & PUD_MASK, paddr_next,
|
||||
E820_TYPE_RESERVED_KERN))
|
||||
E820_TYPE_RESERVED_KERN) &&
|
||||
!e820__mapped_any(paddr & PUD_MASK, paddr_next,
|
||||
E820_TYPE_ACPI))
|
||||
set_pud_init(pud, __pud(0), init);
|
||||
continue;
|
||||
}
|
||||
@ -698,7 +704,9 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
|
||||
!e820__mapped_any(paddr & P4D_MASK, paddr_next,
|
||||
E820_TYPE_RAM) &&
|
||||
!e820__mapped_any(paddr & P4D_MASK, paddr_next,
|
||||
E820_TYPE_RESERVED_KERN))
|
||||
E820_TYPE_RESERVED_KERN) &&
|
||||
!e820__mapped_any(paddr & P4D_MASK, paddr_next,
|
||||
E820_TYPE_ACPI))
|
||||
set_p4d_init(p4d, __p4d(0), init);
|
||||
continue;
|
||||
}
|
||||
|
@ -283,7 +283,7 @@ static void enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc)
|
||||
#endif
|
||||
}
|
||||
|
||||
static bool amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc)
|
||||
static int amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc)
|
||||
{
|
||||
/*
|
||||
* To maintain the security guarantees of SEV-SNP guests, make sure
|
||||
@ -292,11 +292,11 @@ static bool amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool
|
||||
if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !enc)
|
||||
snp_set_memory_shared(vaddr, npages);
|
||||
|
||||
return true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Return true unconditionally: return value doesn't matter for the SEV side */
|
||||
static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc)
|
||||
static int amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc)
|
||||
{
|
||||
/*
|
||||
* After memory is mapped encrypted in the page table, validate it
|
||||
@ -308,7 +308,7 @@ static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool e
|
||||
if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
|
||||
enc_dec_hypercall(vaddr, npages << PAGE_SHIFT, enc);
|
||||
|
||||
return true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
|
||||
|
@ -662,8 +662,9 @@ static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long star
|
||||
|
||||
/*
|
||||
* Lookup the page table entry for a virtual address in a specific pgd.
|
||||
* Return a pointer to the entry, the level of the mapping, and the effective
|
||||
* NX and RW bits of all page table levels.
|
||||
* Return a pointer to the entry (or NULL if the entry does not exist),
|
||||
* the level of the entry, and the effective NX and RW bits of all
|
||||
* page table levels.
|
||||
*/
|
||||
pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address,
|
||||
unsigned int *level, bool *nx, bool *rw)
|
||||
@ -672,13 +673,14 @@ pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address,
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
|
||||
*level = PG_LEVEL_NONE;
|
||||
*level = PG_LEVEL_256T;
|
||||
*nx = false;
|
||||
*rw = true;
|
||||
|
||||
if (pgd_none(*pgd))
|
||||
return NULL;
|
||||
|
||||
*level = PG_LEVEL_512G;
|
||||
*nx |= pgd_flags(*pgd) & _PAGE_NX;
|
||||
*rw &= pgd_flags(*pgd) & _PAGE_RW;
|
||||
|
||||
@ -686,10 +688,10 @@ pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address,
|
||||
if (p4d_none(*p4d))
|
||||
return NULL;
|
||||
|
||||
*level = PG_LEVEL_512G;
|
||||
if (p4d_leaf(*p4d) || !p4d_present(*p4d))
|
||||
return (pte_t *)p4d;
|
||||
|
||||
*level = PG_LEVEL_1G;
|
||||
*nx |= p4d_flags(*p4d) & _PAGE_NX;
|
||||
*rw &= p4d_flags(*p4d) & _PAGE_RW;
|
||||
|
||||
@ -697,10 +699,10 @@ pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address,
|
||||
if (pud_none(*pud))
|
||||
return NULL;
|
||||
|
||||
*level = PG_LEVEL_1G;
|
||||
if (pud_leaf(*pud) || !pud_present(*pud))
|
||||
return (pte_t *)pud;
|
||||
|
||||
*level = PG_LEVEL_2M;
|
||||
*nx |= pud_flags(*pud) & _PAGE_NX;
|
||||
*rw &= pud_flags(*pud) & _PAGE_RW;
|
||||
|
||||
@ -708,15 +710,13 @@ pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address,
|
||||
if (pmd_none(*pmd))
|
||||
return NULL;
|
||||
|
||||
*level = PG_LEVEL_2M;
|
||||
if (pmd_leaf(*pmd) || !pmd_present(*pmd))
|
||||
return (pte_t *)pmd;
|
||||
|
||||
*level = PG_LEVEL_4K;
|
||||
*nx |= pmd_flags(*pmd) & _PAGE_NX;
|
||||
*rw &= pmd_flags(*pmd) & _PAGE_RW;
|
||||
|
||||
*level = PG_LEVEL_4K;
|
||||
|
||||
return pte_offset_kernel(pmd, address);
|
||||
}
|
||||
|
||||
@ -736,9 +736,8 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
|
||||
* Lookup the page table entry for a virtual address. Return a pointer
|
||||
* to the entry and the level of the mapping.
|
||||
*
|
||||
* Note: We return pud and pmd either when the entry is marked large
|
||||
* or when the present bit is not set. Otherwise we would return a
|
||||
* pointer to a nonexisting mapping.
|
||||
* Note: the function returns p4d, pud or pmd either when the entry is marked
|
||||
* large or when the present bit is not set. Otherwise it returns NULL.
|
||||
*/
|
||||
pte_t *lookup_address(unsigned long address, unsigned int *level)
|
||||
{
|
||||
@ -2196,7 +2195,8 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
|
||||
cpa_flush(&cpa, x86_platform.guest.enc_cache_flush_required());
|
||||
|
||||
/* Notify hypervisor that we are about to set/clr encryption attribute. */
|
||||
if (!x86_platform.guest.enc_status_change_prepare(addr, numpages, enc))
|
||||
ret = x86_platform.guest.enc_status_change_prepare(addr, numpages, enc);
|
||||
if (ret)
|
||||
goto vmm_fail;
|
||||
|
||||
ret = __change_page_attr_set_clr(&cpa, 1);
|
||||
@ -2214,24 +2214,61 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
|
||||
return ret;
|
||||
|
||||
/* Notify hypervisor that we have successfully set/clr encryption attribute. */
|
||||
if (!x86_platform.guest.enc_status_change_finish(addr, numpages, enc))
|
||||
ret = x86_platform.guest.enc_status_change_finish(addr, numpages, enc);
|
||||
if (ret)
|
||||
goto vmm_fail;
|
||||
|
||||
return 0;
|
||||
|
||||
vmm_fail:
|
||||
WARN_ONCE(1, "CPA VMM failure to convert memory (addr=%p, numpages=%d) to %s.\n",
|
||||
(void *)addr, numpages, enc ? "private" : "shared");
|
||||
WARN_ONCE(1, "CPA VMM failure to convert memory (addr=%p, numpages=%d) to %s: %d\n",
|
||||
(void *)addr, numpages, enc ? "private" : "shared", ret);
|
||||
|
||||
return -EIO;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* The lock serializes conversions between private and shared memory.
|
||||
*
|
||||
* It is taken for read on conversion. A write lock guarantees that no
|
||||
* concurrent conversions are in progress.
|
||||
*/
|
||||
static DECLARE_RWSEM(mem_enc_lock);
|
||||
|
||||
/*
|
||||
* Stop new private<->shared conversions.
|
||||
*
|
||||
* Taking the exclusive mem_enc_lock waits for in-flight conversions to complete.
|
||||
* The lock is not released to prevent new conversions from being started.
|
||||
*/
|
||||
bool set_memory_enc_stop_conversion(void)
|
||||
{
|
||||
/*
|
||||
* In a crash scenario, sleep is not allowed. Try to take the lock.
|
||||
* Failure indicates that there is a race with the conversion.
|
||||
*/
|
||||
if (oops_in_progress)
|
||||
return down_write_trylock(&mem_enc_lock);
|
||||
|
||||
down_write(&mem_enc_lock);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
|
||||
{
|
||||
if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
|
||||
return __set_memory_enc_pgtable(addr, numpages, enc);
|
||||
int ret = 0;
|
||||
|
||||
return 0;
|
||||
if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
|
||||
if (!down_read_trylock(&mem_enc_lock))
|
||||
return -EBUSY;
|
||||
|
||||
ret = __set_memory_enc_pgtable(addr, numpages, enc);
|
||||
|
||||
up_read(&mem_enc_lock);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int set_memory_encrypted(unsigned long addr, int numpages)
|
||||
|
@ -198,6 +198,20 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header)
|
||||
}
|
||||
break;
|
||||
|
||||
case ACPI_MADT_TYPE_MULTIPROC_WAKEUP:
|
||||
{
|
||||
struct acpi_madt_multiproc_wakeup *p =
|
||||
(struct acpi_madt_multiproc_wakeup *)header;
|
||||
u64 reset_vector = 0;
|
||||
|
||||
if (p->version >= ACPI_MADT_MP_WAKEUP_VERSION_V1)
|
||||
reset_vector = p->reset_vector;
|
||||
|
||||
pr_debug("MP Wakeup (version[%d], mailbox[%#llx], reset[%#llx])\n",
|
||||
p->version, p->mailbox_address, reset_vector);
|
||||
}
|
||||
break;
|
||||
|
||||
case ACPI_MADT_TYPE_CORE_PIC:
|
||||
{
|
||||
struct acpi_madt_core_pic *p = (struct acpi_madt_core_pic *)header;
|
||||
|
@ -1194,11 +1194,23 @@ struct acpi_madt_generic_translator {
|
||||
|
||||
struct acpi_madt_multiproc_wakeup {
|
||||
struct acpi_subtable_header header;
|
||||
u16 mailbox_version;
|
||||
u16 version;
|
||||
u32 reserved; /* reserved - must be zero */
|
||||
u64 base_address;
|
||||
u64 mailbox_address;
|
||||
u64 reset_vector;
|
||||
};
|
||||
|
||||
/* Values for Version field above */
|
||||
|
||||
enum acpi_madt_multiproc_wakeup_version {
|
||||
ACPI_MADT_MP_WAKEUP_VERSION_NONE = 0,
|
||||
ACPI_MADT_MP_WAKEUP_VERSION_V1 = 1,
|
||||
ACPI_MADT_MP_WAKEUP_VERSION_RESERVED = 2, /* 2 and greater are reserved */
|
||||
};
|
||||
|
||||
#define ACPI_MADT_MP_WAKEUP_SIZE_V0 16
|
||||
#define ACPI_MADT_MP_WAKEUP_SIZE_V1 24
|
||||
|
||||
#define ACPI_MULTIPROC_WAKEUP_MB_OS_SIZE 2032
|
||||
#define ACPI_MULTIPROC_WAKEUP_MB_FIRMWARE_SIZE 2048
|
||||
|
||||
@ -1211,7 +1223,8 @@ struct acpi_madt_multiproc_wakeup_mailbox {
|
||||
u8 reserved_firmware[ACPI_MULTIPROC_WAKEUP_MB_FIRMWARE_SIZE]; /* reserved for firmware use */
|
||||
};
|
||||
|
||||
#define ACPI_MP_WAKE_COMMAND_WAKEUP 1
|
||||
#define ACPI_MP_WAKE_COMMAND_WAKEUP 1
|
||||
#define ACPI_MP_WAKE_COMMAND_TEST 2
|
||||
|
||||
/* 17: CPU Core Interrupt Controller (ACPI 6.5) */
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
#define _LINUX_CACHEINFO_H
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/cpuhplock.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
@ -112,24 +113,38 @@ int acpi_get_cache_info(unsigned int cpu,
|
||||
|
||||
const struct attribute_group *cache_get_priv_group(struct cacheinfo *this_leaf);
|
||||
|
||||
/*
|
||||
* Get the cacheinfo structure for the cache associated with @cpu at
|
||||
* level @level.
|
||||
* cpuhp lock must be held.
|
||||
*/
|
||||
static inline struct cacheinfo *get_cpu_cacheinfo_level(int cpu, int level)
|
||||
{
|
||||
struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
|
||||
int i;
|
||||
|
||||
lockdep_assert_cpus_held();
|
||||
|
||||
for (i = 0; i < ci->num_leaves; i++) {
|
||||
if (ci->info_list[i].level == level) {
|
||||
if (ci->info_list[i].attributes & CACHE_ID)
|
||||
return &ci->info_list[i];
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the id of the cache associated with @cpu at level @level.
|
||||
* cpuhp lock must be held.
|
||||
*/
|
||||
static inline int get_cpu_cacheinfo_id(int cpu, int level)
|
||||
{
|
||||
struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
|
||||
int i;
|
||||
struct cacheinfo *ci = get_cpu_cacheinfo_level(cpu, level);
|
||||
|
||||
for (i = 0; i < ci->num_leaves; i++) {
|
||||
if (ci->info_list[i].level == level) {
|
||||
if (ci->info_list[i].attributes & CACHE_ID)
|
||||
return ci->info_list[i].id;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
return ci ? ci->id : -1;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ARM64
|
||||
|
@ -81,16 +81,6 @@ enum cc_attr {
|
||||
*/
|
||||
CC_ATTR_GUEST_SEV_SNP,
|
||||
|
||||
/**
|
||||
* @CC_ATTR_HOTPLUG_DISABLED: Hotplug is not supported or disabled.
|
||||
*
|
||||
* The platform/OS is running as a guest/virtual machine does not
|
||||
* support CPU hotplug feature.
|
||||
*
|
||||
* Examples include TDX Guest.
|
||||
*/
|
||||
CC_ATTR_HOTPLUG_DISABLED,
|
||||
|
||||
/**
|
||||
* @CC_ATTR_HOST_SEV_SNP: AMD SNP enabled on the host.
|
||||
*
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/cpuhotplug.h>
|
||||
#include <linux/cpuhplock.h>
|
||||
#include <linux/cpu_smt.h>
|
||||
|
||||
struct device;
|
||||
@ -132,38 +133,6 @@ static inline int add_cpu(unsigned int cpu) { return 0;}
|
||||
#endif /* CONFIG_SMP */
|
||||
extern const struct bus_type cpu_subsys;
|
||||
|
||||
extern int lockdep_is_cpus_held(void);
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
extern void cpus_write_lock(void);
|
||||
extern void cpus_write_unlock(void);
|
||||
extern void cpus_read_lock(void);
|
||||
extern void cpus_read_unlock(void);
|
||||
extern int cpus_read_trylock(void);
|
||||
extern void lockdep_assert_cpus_held(void);
|
||||
extern void cpu_hotplug_disable(void);
|
||||
extern void cpu_hotplug_enable(void);
|
||||
void clear_tasks_mm_cpumask(int cpu);
|
||||
int remove_cpu(unsigned int cpu);
|
||||
int cpu_device_down(struct device *dev);
|
||||
extern void smp_shutdown_nonboot_cpus(unsigned int primary_cpu);
|
||||
|
||||
#else /* CONFIG_HOTPLUG_CPU */
|
||||
|
||||
static inline void cpus_write_lock(void) { }
|
||||
static inline void cpus_write_unlock(void) { }
|
||||
static inline void cpus_read_lock(void) { }
|
||||
static inline void cpus_read_unlock(void) { }
|
||||
static inline int cpus_read_trylock(void) { return true; }
|
||||
static inline void lockdep_assert_cpus_held(void) { }
|
||||
static inline void cpu_hotplug_disable(void) { }
|
||||
static inline void cpu_hotplug_enable(void) { }
|
||||
static inline int remove_cpu(unsigned int cpu) { return -EPERM; }
|
||||
static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { }
|
||||
#endif /* !CONFIG_HOTPLUG_CPU */
|
||||
|
||||
DEFINE_LOCK_GUARD_0(cpus_read_lock, cpus_read_lock(), cpus_read_unlock())
|
||||
|
||||
#ifdef CONFIG_PM_SLEEP_SMP
|
||||
extern int freeze_secondary_cpus(int primary);
|
||||
extern void thaw_secondary_cpus(void);
|
||||
|
49
include/linux/cpuhplock.h
Normal file
49
include/linux/cpuhplock.h
Normal file
@ -0,0 +1,49 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* include/linux/cpuhplock.h - CPU hotplug locking
|
||||
*
|
||||
* Locking functions for CPU hotplug.
|
||||
*/
|
||||
#ifndef _LINUX_CPUHPLOCK_H_
|
||||
#define _LINUX_CPUHPLOCK_H_
|
||||
|
||||
#include <linux/cleanup.h>
|
||||
#include <linux/errno.h>
|
||||
|
||||
struct device;
|
||||
|
||||
extern int lockdep_is_cpus_held(void);
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
void cpus_write_lock(void);
|
||||
void cpus_write_unlock(void);
|
||||
void cpus_read_lock(void);
|
||||
void cpus_read_unlock(void);
|
||||
int cpus_read_trylock(void);
|
||||
void lockdep_assert_cpus_held(void);
|
||||
void cpu_hotplug_disable_offlining(void);
|
||||
void cpu_hotplug_disable(void);
|
||||
void cpu_hotplug_enable(void);
|
||||
void clear_tasks_mm_cpumask(int cpu);
|
||||
int remove_cpu(unsigned int cpu);
|
||||
int cpu_device_down(struct device *dev);
|
||||
void smp_shutdown_nonboot_cpus(unsigned int primary_cpu);
|
||||
|
||||
#else /* CONFIG_HOTPLUG_CPU */
|
||||
|
||||
static inline void cpus_write_lock(void) { }
|
||||
static inline void cpus_write_unlock(void) { }
|
||||
static inline void cpus_read_lock(void) { }
|
||||
static inline void cpus_read_unlock(void) { }
|
||||
static inline int cpus_read_trylock(void) { return true; }
|
||||
static inline void lockdep_assert_cpus_held(void) { }
|
||||
static inline void cpu_hotplug_disable_offlining(void) { }
|
||||
static inline void cpu_hotplug_disable(void) { }
|
||||
static inline void cpu_hotplug_enable(void) { }
|
||||
static inline int remove_cpu(unsigned int cpu) { return -EPERM; }
|
||||
static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { }
|
||||
#endif /* !CONFIG_HOTPLUG_CPU */
|
||||
|
||||
DEFINE_LOCK_GUARD_0(cpus_read_lock, cpus_read_lock(), cpus_read_unlock())
|
||||
|
||||
#endif /* _LINUX_CPUHPLOCK_H_ */
|
12
kernel/cpu.c
12
kernel/cpu.c
@ -483,6 +483,8 @@ static int cpu_hotplug_disabled;
|
||||
|
||||
DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
|
||||
|
||||
static bool cpu_hotplug_offline_disabled __ro_after_init;
|
||||
|
||||
void cpus_read_lock(void)
|
||||
{
|
||||
percpu_down_read(&cpu_hotplug_lock);
|
||||
@ -542,6 +544,14 @@ static void lockdep_release_cpus_lock(void)
|
||||
rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_);
|
||||
}
|
||||
|
||||
/* Declare CPU offlining not supported */
|
||||
void cpu_hotplug_disable_offlining(void)
|
||||
{
|
||||
cpu_maps_update_begin();
|
||||
cpu_hotplug_offline_disabled = true;
|
||||
cpu_maps_update_done();
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for currently running CPU hotplug operations to complete (if any) and
|
||||
* disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
|
||||
@ -1471,7 +1481,7 @@ static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
|
||||
* If the platform does not support hotplug, report it explicitly to
|
||||
* differentiate it from a transient offlining failure.
|
||||
*/
|
||||
if (cc_platform_has(CC_ATTR_HOTPLUG_DISABLED))
|
||||
if (cpu_hotplug_offline_disabled)
|
||||
return -EOPNOTSUPP;
|
||||
if (cpu_hotplug_disabled)
|
||||
return -EBUSY;
|
||||
|
Loading…
Reference in New Issue
Block a user