mirror of
https://github.com/torvalds/linux.git
synced 2024-12-02 17:11:33 +00:00
bfd20f1cc8
IOMMU harms performance signficantly when we run very fast networking workloads. It's 40GB networking doing XDP test. Software overhead is almost unaware, but it's the IOTLB miss (based on our analysis) which kills the performance. We observed the same performance issue even with software passthrough (identity mapping), only the hardware passthrough survives. The pps with iommu (with software passthrough) is only about ~30% of that without it. This is a limitation in hardware based on our observation, so we'd like to disable the IOMMU force on, but we do want to use TBOOT and we can sacrifice the DMA security bought by IOMMU. I must admit I know nothing about TBOOT, but TBOOT guys (cc-ed) think not eabling IOMMU is totally ok. So introduce a new boot option to disable the force on. It's kind of silly we need to run into intel_iommu_init even without force on, but we need to disable TBOOT PMR registers. For system without the boot option, nothing is changed. Signed-off-by: Shaohua Li <shli@fb.com> Signed-off-by: Joerg Roedel <jroedel@suse.de>
527 lines
13 KiB
C
527 lines
13 KiB
C
/*
|
|
* tboot.c: main implementation of helper functions used by kernel for
|
|
* runtime support of Intel(R) Trusted Execution Technology
|
|
*
|
|
* Copyright (c) 2006-2009, Intel Corporation
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along with
|
|
* this program; if not, write to the Free Software Foundation, Inc.,
|
|
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
|
*
|
|
*/
|
|
|
|
#include <linux/dma_remapping.h>
|
|
#include <linux/init_task.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/export.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/init.h>
|
|
#include <linux/dmar.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/pfn.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/tboot.h>
|
|
#include <linux/debugfs.h>
|
|
|
|
#include <asm/realmode.h>
|
|
#include <asm/processor.h>
|
|
#include <asm/bootparam.h>
|
|
#include <asm/pgtable.h>
|
|
#include <asm/pgalloc.h>
|
|
#include <asm/swiotlb.h>
|
|
#include <asm/fixmap.h>
|
|
#include <asm/proto.h>
|
|
#include <asm/setup.h>
|
|
#include <asm/e820.h>
|
|
#include <asm/io.h>
|
|
|
|
#include "../realmode/rm/wakeup.h"
|
|
|
|
/* Global pointer to shared data; NULL means no measured launch. */
|
|
struct tboot *tboot __read_mostly;
|
|
EXPORT_SYMBOL(tboot);
|
|
|
|
/* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */
|
|
#define AP_WAIT_TIMEOUT 1
|
|
|
|
#undef pr_fmt
|
|
#define pr_fmt(fmt) "tboot: " fmt
|
|
|
|
static u8 tboot_uuid[16] __initdata = TBOOT_UUID;
|
|
|
|
void __init tboot_probe(void)
|
|
{
|
|
/* Look for valid page-aligned address for shared page. */
|
|
if (!boot_params.tboot_addr)
|
|
return;
|
|
/*
|
|
* also verify that it is mapped as we expect it before calling
|
|
* set_fixmap(), to reduce chance of garbage value causing crash
|
|
*/
|
|
if (!e820_any_mapped(boot_params.tboot_addr,
|
|
boot_params.tboot_addr, E820_RESERVED)) {
|
|
pr_warning("non-0 tboot_addr but it is not of type E820_RESERVED\n");
|
|
return;
|
|
}
|
|
|
|
/* Map and check for tboot UUID. */
|
|
set_fixmap(FIX_TBOOT_BASE, boot_params.tboot_addr);
|
|
tboot = (struct tboot *)fix_to_virt(FIX_TBOOT_BASE);
|
|
if (memcmp(&tboot_uuid, &tboot->uuid, sizeof(tboot->uuid))) {
|
|
pr_warning("tboot at 0x%llx is invalid\n",
|
|
boot_params.tboot_addr);
|
|
tboot = NULL;
|
|
return;
|
|
}
|
|
if (tboot->version < 5) {
|
|
pr_warning("tboot version is invalid: %u\n", tboot->version);
|
|
tboot = NULL;
|
|
return;
|
|
}
|
|
|
|
pr_info("found shared page at phys addr 0x%llx:\n",
|
|
boot_params.tboot_addr);
|
|
pr_debug("version: %d\n", tboot->version);
|
|
pr_debug("log_addr: 0x%08x\n", tboot->log_addr);
|
|
pr_debug("shutdown_entry: 0x%x\n", tboot->shutdown_entry);
|
|
pr_debug("tboot_base: 0x%08x\n", tboot->tboot_base);
|
|
pr_debug("tboot_size: 0x%x\n", tboot->tboot_size);
|
|
}
|
|
|
|
static pgd_t *tboot_pg_dir;
|
|
static struct mm_struct tboot_mm = {
|
|
.mm_rb = RB_ROOT,
|
|
.pgd = swapper_pg_dir,
|
|
.mm_users = ATOMIC_INIT(2),
|
|
.mm_count = ATOMIC_INIT(1),
|
|
.mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem),
|
|
.page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
|
|
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
|
|
};
|
|
|
|
static inline void switch_to_tboot_pt(void)
|
|
{
|
|
write_cr3(virt_to_phys(tboot_pg_dir));
|
|
}
|
|
|
|
static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
|
|
pgprot_t prot)
|
|
{
|
|
pgd_t *pgd;
|
|
pud_t *pud;
|
|
pmd_t *pmd;
|
|
pte_t *pte;
|
|
|
|
pgd = pgd_offset(&tboot_mm, vaddr);
|
|
pud = pud_alloc(&tboot_mm, pgd, vaddr);
|
|
if (!pud)
|
|
return -1;
|
|
pmd = pmd_alloc(&tboot_mm, pud, vaddr);
|
|
if (!pmd)
|
|
return -1;
|
|
pte = pte_alloc_map(&tboot_mm, pmd, vaddr);
|
|
if (!pte)
|
|
return -1;
|
|
set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
|
|
pte_unmap(pte);
|
|
return 0;
|
|
}
|
|
|
|
static int map_tboot_pages(unsigned long vaddr, unsigned long start_pfn,
|
|
unsigned long nr)
|
|
{
|
|
/* Reuse the original kernel mapping */
|
|
tboot_pg_dir = pgd_alloc(&tboot_mm);
|
|
if (!tboot_pg_dir)
|
|
return -1;
|
|
|
|
for (; nr > 0; nr--, vaddr += PAGE_SIZE, start_pfn++) {
|
|
if (map_tboot_page(vaddr, start_pfn, PAGE_KERNEL_EXEC))
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void tboot_create_trampoline(void)
|
|
{
|
|
u32 map_base, map_size;
|
|
|
|
/* Create identity map for tboot shutdown code. */
|
|
map_base = PFN_DOWN(tboot->tboot_base);
|
|
map_size = PFN_UP(tboot->tboot_size);
|
|
if (map_tboot_pages(map_base << PAGE_SHIFT, map_base, map_size))
|
|
panic("tboot: Error mapping tboot pages (mfns) @ 0x%x, 0x%x\n",
|
|
map_base, map_size);
|
|
}
|
|
|
|
#ifdef CONFIG_ACPI_SLEEP
|
|
|
|
static void add_mac_region(phys_addr_t start, unsigned long size)
|
|
{
|
|
struct tboot_mac_region *mr;
|
|
phys_addr_t end = start + size;
|
|
|
|
if (tboot->num_mac_regions >= MAX_TB_MAC_REGIONS)
|
|
panic("tboot: Too many MAC regions\n");
|
|
|
|
if (start && size) {
|
|
mr = &tboot->mac_regions[tboot->num_mac_regions++];
|
|
mr->start = round_down(start, PAGE_SIZE);
|
|
mr->size = round_up(end, PAGE_SIZE) - mr->start;
|
|
}
|
|
}
|
|
|
|
static int tboot_setup_sleep(void)
|
|
{
|
|
int i;
|
|
|
|
tboot->num_mac_regions = 0;
|
|
|
|
for (i = 0; i < e820->nr_map; i++) {
|
|
if ((e820->map[i].type != E820_RAM)
|
|
&& (e820->map[i].type != E820_RESERVED_KERN))
|
|
continue;
|
|
|
|
add_mac_region(e820->map[i].addr, e820->map[i].size);
|
|
}
|
|
|
|
tboot->acpi_sinfo.kernel_s3_resume_vector =
|
|
real_mode_header->wakeup_start;
|
|
|
|
return 0;
|
|
}
|
|
|
|
#else /* no CONFIG_ACPI_SLEEP */
|
|
|
|
static int tboot_setup_sleep(void)
|
|
{
|
|
/* S3 shutdown requested, but S3 not supported by the kernel... */
|
|
BUG();
|
|
return -1;
|
|
}
|
|
|
|
#endif
|
|
|
|
void tboot_shutdown(u32 shutdown_type)
|
|
{
|
|
void (*shutdown)(void);
|
|
|
|
if (!tboot_enabled())
|
|
return;
|
|
|
|
/*
|
|
* if we're being called before the 1:1 mapping is set up then just
|
|
* return and let the normal shutdown happen; this should only be
|
|
* due to very early panic()
|
|
*/
|
|
if (!tboot_pg_dir)
|
|
return;
|
|
|
|
/* if this is S3 then set regions to MAC */
|
|
if (shutdown_type == TB_SHUTDOWN_S3)
|
|
if (tboot_setup_sleep())
|
|
return;
|
|
|
|
tboot->shutdown_type = shutdown_type;
|
|
|
|
switch_to_tboot_pt();
|
|
|
|
shutdown = (void(*)(void))(unsigned long)tboot->shutdown_entry;
|
|
shutdown();
|
|
|
|
/* should not reach here */
|
|
while (1)
|
|
halt();
|
|
}
|
|
|
|
static void tboot_copy_fadt(const struct acpi_table_fadt *fadt)
|
|
{
|
|
#define TB_COPY_GAS(tbg, g) \
|
|
tbg.space_id = g.space_id; \
|
|
tbg.bit_width = g.bit_width; \
|
|
tbg.bit_offset = g.bit_offset; \
|
|
tbg.access_width = g.access_width; \
|
|
tbg.address = g.address;
|
|
|
|
TB_COPY_GAS(tboot->acpi_sinfo.pm1a_cnt_blk, fadt->xpm1a_control_block);
|
|
TB_COPY_GAS(tboot->acpi_sinfo.pm1b_cnt_blk, fadt->xpm1b_control_block);
|
|
TB_COPY_GAS(tboot->acpi_sinfo.pm1a_evt_blk, fadt->xpm1a_event_block);
|
|
TB_COPY_GAS(tboot->acpi_sinfo.pm1b_evt_blk, fadt->xpm1b_event_block);
|
|
|
|
/*
|
|
* We need phys addr of waking vector, but can't use virt_to_phys() on
|
|
* &acpi_gbl_FACS because it is ioremap'ed, so calc from FACS phys
|
|
* addr.
|
|
*/
|
|
tboot->acpi_sinfo.wakeup_vector = fadt->facs +
|
|
offsetof(struct acpi_table_facs, firmware_waking_vector);
|
|
}
|
|
|
|
static int tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
|
|
{
|
|
static u32 acpi_shutdown_map[ACPI_S_STATE_COUNT] = {
|
|
/* S0,1,2: */ -1, -1, -1,
|
|
/* S3: */ TB_SHUTDOWN_S3,
|
|
/* S4: */ TB_SHUTDOWN_S4,
|
|
/* S5: */ TB_SHUTDOWN_S5 };
|
|
|
|
if (!tboot_enabled())
|
|
return 0;
|
|
|
|
tboot_copy_fadt(&acpi_gbl_FADT);
|
|
tboot->acpi_sinfo.pm1a_cnt_val = pm1a_control;
|
|
tboot->acpi_sinfo.pm1b_cnt_val = pm1b_control;
|
|
/* we always use the 32b wakeup vector */
|
|
tboot->acpi_sinfo.vector_width = 32;
|
|
|
|
if (sleep_state >= ACPI_S_STATE_COUNT ||
|
|
acpi_shutdown_map[sleep_state] == -1) {
|
|
pr_warning("unsupported sleep state 0x%x\n", sleep_state);
|
|
return -1;
|
|
}
|
|
|
|
tboot_shutdown(acpi_shutdown_map[sleep_state]);
|
|
return 0;
|
|
}
|
|
|
|
static int tboot_extended_sleep(u8 sleep_state, u32 val_a, u32 val_b)
|
|
{
|
|
if (!tboot_enabled())
|
|
return 0;
|
|
|
|
pr_warning("tboot is not able to suspend on platforms with reduced hardware sleep (ACPIv5)");
|
|
return -ENODEV;
|
|
}
|
|
|
|
static atomic_t ap_wfs_count;
|
|
|
|
static int tboot_wait_for_aps(int num_aps)
|
|
{
|
|
unsigned long timeout;
|
|
|
|
timeout = AP_WAIT_TIMEOUT*HZ;
|
|
while (atomic_read((atomic_t *)&tboot->num_in_wfs) != num_aps &&
|
|
timeout) {
|
|
mdelay(1);
|
|
timeout--;
|
|
}
|
|
|
|
if (timeout)
|
|
pr_warning("tboot wait for APs timeout\n");
|
|
|
|
return !(atomic_read((atomic_t *)&tboot->num_in_wfs) == num_aps);
|
|
}
|
|
|
|
static int tboot_dying_cpu(unsigned int cpu)
|
|
{
|
|
atomic_inc(&ap_wfs_count);
|
|
if (num_online_cpus() == 1) {
|
|
if (tboot_wait_for_aps(atomic_read(&ap_wfs_count)))
|
|
return -EBUSY;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#ifdef CONFIG_DEBUG_FS
|
|
|
|
#define TBOOT_LOG_UUID { 0x26, 0x25, 0x19, 0xc0, 0x30, 0x6b, 0xb4, 0x4d, \
|
|
0x4c, 0x84, 0xa3, 0xe9, 0x53, 0xb8, 0x81, 0x74 }
|
|
|
|
#define TBOOT_SERIAL_LOG_ADDR 0x60000
|
|
#define TBOOT_SERIAL_LOG_SIZE 0x08000
|
|
#define LOG_MAX_SIZE_OFF 16
|
|
#define LOG_BUF_OFF 24
|
|
|
|
static uint8_t tboot_log_uuid[16] = TBOOT_LOG_UUID;
|
|
|
|
static ssize_t tboot_log_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos)
|
|
{
|
|
void __iomem *log_base;
|
|
u8 log_uuid[16];
|
|
u32 max_size;
|
|
void *kbuf;
|
|
int ret = -EFAULT;
|
|
|
|
log_base = ioremap_nocache(TBOOT_SERIAL_LOG_ADDR, TBOOT_SERIAL_LOG_SIZE);
|
|
if (!log_base)
|
|
return ret;
|
|
|
|
memcpy_fromio(log_uuid, log_base, sizeof(log_uuid));
|
|
if (memcmp(&tboot_log_uuid, log_uuid, sizeof(log_uuid)))
|
|
goto err_iounmap;
|
|
|
|
max_size = readl(log_base + LOG_MAX_SIZE_OFF);
|
|
if (*ppos >= max_size) {
|
|
ret = 0;
|
|
goto err_iounmap;
|
|
}
|
|
|
|
if (*ppos + count > max_size)
|
|
count = max_size - *ppos;
|
|
|
|
kbuf = kmalloc(count, GFP_KERNEL);
|
|
if (!kbuf) {
|
|
ret = -ENOMEM;
|
|
goto err_iounmap;
|
|
}
|
|
|
|
memcpy_fromio(kbuf, log_base + LOG_BUF_OFF + *ppos, count);
|
|
if (copy_to_user(user_buf, kbuf, count))
|
|
goto err_kfree;
|
|
|
|
*ppos += count;
|
|
|
|
ret = count;
|
|
|
|
err_kfree:
|
|
kfree(kbuf);
|
|
|
|
err_iounmap:
|
|
iounmap(log_base);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static const struct file_operations tboot_log_fops = {
|
|
.read = tboot_log_read,
|
|
.llseek = default_llseek,
|
|
};
|
|
|
|
#endif /* CONFIG_DEBUG_FS */
|
|
|
|
static __init int tboot_late_init(void)
|
|
{
|
|
if (!tboot_enabled())
|
|
return 0;
|
|
|
|
tboot_create_trampoline();
|
|
|
|
atomic_set(&ap_wfs_count, 0);
|
|
cpuhp_setup_state(CPUHP_AP_X86_TBOOT_DYING, "x86/tboot:dying", NULL,
|
|
tboot_dying_cpu);
|
|
#ifdef CONFIG_DEBUG_FS
|
|
debugfs_create_file("tboot_log", S_IRUSR,
|
|
arch_debugfs_dir, NULL, &tboot_log_fops);
|
|
#endif
|
|
|
|
acpi_os_set_prepare_sleep(&tboot_sleep);
|
|
acpi_os_set_prepare_extended_sleep(&tboot_extended_sleep);
|
|
return 0;
|
|
}
|
|
|
|
late_initcall(tboot_late_init);
|
|
|
|
/*
|
|
* TXT configuration registers (offsets from TXT_{PUB, PRIV}_CONFIG_REGS_BASE)
|
|
*/
|
|
|
|
#define TXT_PUB_CONFIG_REGS_BASE 0xfed30000
|
|
#define TXT_PRIV_CONFIG_REGS_BASE 0xfed20000
|
|
|
|
/* # pages for each config regs space - used by fixmap */
|
|
#define NR_TXT_CONFIG_PAGES ((TXT_PUB_CONFIG_REGS_BASE - \
|
|
TXT_PRIV_CONFIG_REGS_BASE) >> PAGE_SHIFT)
|
|
|
|
/* offsets from pub/priv config space */
|
|
#define TXTCR_HEAP_BASE 0x0300
|
|
#define TXTCR_HEAP_SIZE 0x0308
|
|
|
|
#define SHA1_SIZE 20
|
|
|
|
struct sha1_hash {
|
|
u8 hash[SHA1_SIZE];
|
|
};
|
|
|
|
struct sinit_mle_data {
|
|
u32 version; /* currently 6 */
|
|
struct sha1_hash bios_acm_id;
|
|
u32 edx_senter_flags;
|
|
u64 mseg_valid;
|
|
struct sha1_hash sinit_hash;
|
|
struct sha1_hash mle_hash;
|
|
struct sha1_hash stm_hash;
|
|
struct sha1_hash lcp_policy_hash;
|
|
u32 lcp_policy_control;
|
|
u32 rlp_wakeup_addr;
|
|
u32 reserved;
|
|
u32 num_mdrs;
|
|
u32 mdrs_off;
|
|
u32 num_vtd_dmars;
|
|
u32 vtd_dmars_off;
|
|
} __packed;
|
|
|
|
struct acpi_table_header *tboot_get_dmar_table(struct acpi_table_header *dmar_tbl)
|
|
{
|
|
void *heap_base, *heap_ptr, *config;
|
|
|
|
if (!tboot_enabled())
|
|
return dmar_tbl;
|
|
|
|
/*
|
|
* ACPI tables may not be DMA protected by tboot, so use DMAR copy
|
|
* SINIT saved in SinitMleData in TXT heap (which is DMA protected)
|
|
*/
|
|
|
|
/* map config space in order to get heap addr */
|
|
config = ioremap(TXT_PUB_CONFIG_REGS_BASE, NR_TXT_CONFIG_PAGES *
|
|
PAGE_SIZE);
|
|
if (!config)
|
|
return NULL;
|
|
|
|
/* now map TXT heap */
|
|
heap_base = ioremap(*(u64 *)(config + TXTCR_HEAP_BASE),
|
|
*(u64 *)(config + TXTCR_HEAP_SIZE));
|
|
iounmap(config);
|
|
if (!heap_base)
|
|
return NULL;
|
|
|
|
/* walk heap to SinitMleData */
|
|
/* skip BiosData */
|
|
heap_ptr = heap_base + *(u64 *)heap_base;
|
|
/* skip OsMleData */
|
|
heap_ptr += *(u64 *)heap_ptr;
|
|
/* skip OsSinitData */
|
|
heap_ptr += *(u64 *)heap_ptr;
|
|
/* now points to SinitMleDataSize; set to SinitMleData */
|
|
heap_ptr += sizeof(u64);
|
|
/* get addr of DMAR table */
|
|
dmar_tbl = (struct acpi_table_header *)(heap_ptr +
|
|
((struct sinit_mle_data *)heap_ptr)->vtd_dmars_off -
|
|
sizeof(u64));
|
|
|
|
/* don't unmap heap because dmar.c needs access to this */
|
|
|
|
return dmar_tbl;
|
|
}
|
|
|
|
int tboot_force_iommu(void)
|
|
{
|
|
if (!tboot_enabled())
|
|
return 0;
|
|
|
|
if (!intel_iommu_tboot_noforce)
|
|
return 1;
|
|
|
|
if (no_iommu || swiotlb || dmar_disabled)
|
|
pr_warning("Forcing Intel-IOMMU to enabled\n");
|
|
|
|
dmar_disabled = 0;
|
|
#ifdef CONFIG_SWIOTLB
|
|
swiotlb = 0;
|
|
#endif
|
|
no_iommu = 0;
|
|
|
|
return 1;
|
|
}
|