linux/drivers/firmware/efi/efi.c
Linus Torvalds 0cbee99269 Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull user namespace updates from Eric Biederman:
 "Long ago and far away when user namespaces where young it was realized
  that allowing fresh mounts of proc and sysfs with only user namespace
  permissions could violate the basic rule that only root gets to decide
  if proc or sysfs should be mounted at all.

  Some hacks were put in place to reduce the worst of the damage could
  be done, and the common sense rule was adopted that fresh mounts of
  proc and sysfs should allow no more than bind mounts of proc and
  sysfs.  Unfortunately that rule has not been fully enforced.

  There are two kinds of gaps in that enforcement.  Only filesystems
  mounted on empty directories of proc and sysfs should be ignored but
  the test for empty directories was insufficient.  So in my tree
  directories on proc, sysctl and sysfs that will always be empty are
  created specially.  Every other technique is imperfect as an ordinary
  directory can have entries added even after a readdir returns and
  shows that the directory is empty.  Special creation of directories
  for mount points makes the code in the kernel a smidge clearer about
  it's purpose.  I asked container developers from the various container
  projects to help test this and no holes were found in the set of mount
  points on proc and sysfs that are created specially.

  This set of changes also starts enforcing the mount flags of fresh
  mounts of proc and sysfs are consistent with the existing mount of
  proc and sysfs.  I expected this to be the boring part of the work but
  unfortunately unprivileged userspace winds up mounting fresh copies of
  proc and sysfs with noexec and nosuid clear when root set those flags
  on the previous mount of proc and sysfs.  So for now only the atime,
  read-only and nodev attributes which userspace happens to keep
  consistent are enforced.  Dealing with the noexec and nosuid
  attributes remains for another time.

  This set of changes also addresses an issue with how open file
  descriptors from /proc/<pid>/ns/* are displayed.  Recently readlink of
  /proc/<pid>/fd has been triggering a WARN_ON that has not been
  meaningful since it was added (as all of the code in the kernel was
  converted) and is not now actively wrong.

  There is also a short list of issues that have not been fixed yet that
  I will mention briefly.

  It is possible to rename a directory from below to above a bind mount.
  At which point any directory pointers below the renamed directory can
  be walked up to the root directory of the filesystem.  With user
  namespaces enabled a bind mount of the bind mount can be created
  allowing the user to pick a directory whose children they can rename
  to outside of the bind mount.  This is challenging to fix and doubly
  so because all obvious solutions must touch code that is in the
  performance part of pathname resolution.

  As mentioned above there is also a question of how to ensure that
  developers by accident or with purpose do not introduce exectuable
  files on sysfs and proc and in doing so introduce security regressions
  in the current userspace that will not be immediately obvious and as
  such are likely to require breaking userspace in painful ways once
  they are recognized"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
  vfs: Remove incorrect debugging WARN in prepend_path
  mnt: Update fs_fully_visible to test for permanently empty directories
  sysfs: Create mountpoints with sysfs_create_mount_point
  sysfs: Add support for permanently empty directories to serve as mount points.
  kernfs: Add support for always empty directories.
  proc: Allow creating permanently empty directories that serve as mount points
  sysctl: Allow creating permanently empty directories that serve as mountpoints.
  fs: Add helper functions for permanently empty directories.
  vfs: Ignore unlocked mounts in fs_fully_visible
  mnt: Modify fs_fully_visible to deal with locked ro nodev and atime
  mnt: Refactor the logic for mounting sysfs and proc in a user namespace
2015-07-03 15:20:57 -07:00

603 lines
15 KiB
C

/*
* efi.c - EFI subsystem
*
* Copyright (C) 2001,2003,2004 Dell <Matt_Domsch@dell.com>
* Copyright (C) 2004 Intel Corporation <matthew.e.tolentino@intel.com>
* Copyright (C) 2013 Tom Gundersen <teg@jklm.no>
*
* This code registers /sys/firmware/efi{,/efivars} when EFI is supported,
* allowing the efivarfs to be mounted or the efivars module to be loaded.
* The existance of /sys/firmware/efi may also be used by userspace to
* determine that the system supports EFI.
*
* This file is released under the GPLv2.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kobject.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/device.h>
#include <linux/efi.h>
#include <linux/of.h>
#include <linux/of_fdt.h>
#include <linux/io.h>
#include <linux/platform_device.h>
struct efi __read_mostly efi = {
.mps = EFI_INVALID_TABLE_ADDR,
.acpi = EFI_INVALID_TABLE_ADDR,
.acpi20 = EFI_INVALID_TABLE_ADDR,
.smbios = EFI_INVALID_TABLE_ADDR,
.smbios3 = EFI_INVALID_TABLE_ADDR,
.sal_systab = EFI_INVALID_TABLE_ADDR,
.boot_info = EFI_INVALID_TABLE_ADDR,
.hcdp = EFI_INVALID_TABLE_ADDR,
.uga = EFI_INVALID_TABLE_ADDR,
.uv_systab = EFI_INVALID_TABLE_ADDR,
.fw_vendor = EFI_INVALID_TABLE_ADDR,
.runtime = EFI_INVALID_TABLE_ADDR,
.config_table = EFI_INVALID_TABLE_ADDR,
.esrt = EFI_INVALID_TABLE_ADDR,
};
EXPORT_SYMBOL(efi);
static bool disable_runtime;
static int __init setup_noefi(char *arg)
{
disable_runtime = true;
return 0;
}
early_param("noefi", setup_noefi);
bool efi_runtime_disabled(void)
{
return disable_runtime;
}
static int __init parse_efi_cmdline(char *str)
{
if (parse_option_str(str, "noruntime"))
disable_runtime = true;
return 0;
}
early_param("efi", parse_efi_cmdline);
struct kobject *efi_kobj;
/*
* Let's not leave out systab information that snuck into
* the efivars driver
*/
static ssize_t systab_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
char *str = buf;
if (!kobj || !buf)
return -EINVAL;
if (efi.mps != EFI_INVALID_TABLE_ADDR)
str += sprintf(str, "MPS=0x%lx\n", efi.mps);
if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
str += sprintf(str, "ACPI20=0x%lx\n", efi.acpi20);
if (efi.acpi != EFI_INVALID_TABLE_ADDR)
str += sprintf(str, "ACPI=0x%lx\n", efi.acpi);
/*
* If both SMBIOS and SMBIOS3 entry points are implemented, the
* SMBIOS3 entry point shall be preferred, so we list it first to
* let applications stop parsing after the first match.
*/
if (efi.smbios3 != EFI_INVALID_TABLE_ADDR)
str += sprintf(str, "SMBIOS3=0x%lx\n", efi.smbios3);
if (efi.smbios != EFI_INVALID_TABLE_ADDR)
str += sprintf(str, "SMBIOS=0x%lx\n", efi.smbios);
if (efi.hcdp != EFI_INVALID_TABLE_ADDR)
str += sprintf(str, "HCDP=0x%lx\n", efi.hcdp);
if (efi.boot_info != EFI_INVALID_TABLE_ADDR)
str += sprintf(str, "BOOTINFO=0x%lx\n", efi.boot_info);
if (efi.uga != EFI_INVALID_TABLE_ADDR)
str += sprintf(str, "UGA=0x%lx\n", efi.uga);
return str - buf;
}
static struct kobj_attribute efi_attr_systab =
__ATTR(systab, 0400, systab_show, NULL);
#define EFI_FIELD(var) efi.var
#define EFI_ATTR_SHOW(name) \
static ssize_t name##_show(struct kobject *kobj, \
struct kobj_attribute *attr, char *buf) \
{ \
return sprintf(buf, "0x%lx\n", EFI_FIELD(name)); \
}
EFI_ATTR_SHOW(fw_vendor);
EFI_ATTR_SHOW(runtime);
EFI_ATTR_SHOW(config_table);
static ssize_t fw_platform_size_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
return sprintf(buf, "%d\n", efi_enabled(EFI_64BIT) ? 64 : 32);
}
static struct kobj_attribute efi_attr_fw_vendor = __ATTR_RO(fw_vendor);
static struct kobj_attribute efi_attr_runtime = __ATTR_RO(runtime);
static struct kobj_attribute efi_attr_config_table = __ATTR_RO(config_table);
static struct kobj_attribute efi_attr_fw_platform_size =
__ATTR_RO(fw_platform_size);
static struct attribute *efi_subsys_attrs[] = {
&efi_attr_systab.attr,
&efi_attr_fw_vendor.attr,
&efi_attr_runtime.attr,
&efi_attr_config_table.attr,
&efi_attr_fw_platform_size.attr,
NULL,
};
static umode_t efi_attr_is_visible(struct kobject *kobj,
struct attribute *attr, int n)
{
if (attr == &efi_attr_fw_vendor.attr) {
if (efi_enabled(EFI_PARAVIRT) ||
efi.fw_vendor == EFI_INVALID_TABLE_ADDR)
return 0;
} else if (attr == &efi_attr_runtime.attr) {
if (efi.runtime == EFI_INVALID_TABLE_ADDR)
return 0;
} else if (attr == &efi_attr_config_table.attr) {
if (efi.config_table == EFI_INVALID_TABLE_ADDR)
return 0;
}
return attr->mode;
}
static struct attribute_group efi_subsys_attr_group = {
.attrs = efi_subsys_attrs,
.is_visible = efi_attr_is_visible,
};
static struct efivars generic_efivars;
static struct efivar_operations generic_ops;
static int generic_ops_register(void)
{
generic_ops.get_variable = efi.get_variable;
generic_ops.set_variable = efi.set_variable;
generic_ops.get_next_variable = efi.get_next_variable;
generic_ops.query_variable_store = efi_query_variable_store;
return efivars_register(&generic_efivars, &generic_ops, efi_kobj);
}
static void generic_ops_unregister(void)
{
efivars_unregister(&generic_efivars);
}
/*
* We register the efi subsystem with the firmware subsystem and the
* efivars subsystem with the efi subsystem, if the system was booted with
* EFI.
*/
static int __init efisubsys_init(void)
{
int error;
if (!efi_enabled(EFI_BOOT))
return 0;
/* We register the efi directory at /sys/firmware/efi */
efi_kobj = kobject_create_and_add("efi", firmware_kobj);
if (!efi_kobj) {
pr_err("efi: Firmware registration failed.\n");
return -ENOMEM;
}
error = generic_ops_register();
if (error)
goto err_put;
error = sysfs_create_group(efi_kobj, &efi_subsys_attr_group);
if (error) {
pr_err("efi: Sysfs attribute export failed with error %d.\n",
error);
goto err_unregister;
}
error = efi_runtime_map_init(efi_kobj);
if (error)
goto err_remove_group;
/* and the standard mountpoint for efivarfs */
error = sysfs_create_mount_point(efi_kobj, "efivars");
if (error) {
pr_err("efivars: Subsystem registration failed.\n");
goto err_remove_group;
}
return 0;
err_remove_group:
sysfs_remove_group(efi_kobj, &efi_subsys_attr_group);
err_unregister:
generic_ops_unregister();
err_put:
kobject_put(efi_kobj);
return error;
}
subsys_initcall(efisubsys_init);
/*
* Find the efi memory descriptor for a given physical address. Given a
* physicall address, determine if it exists within an EFI Memory Map entry,
* and if so, populate the supplied memory descriptor with the appropriate
* data.
*/
int __init efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md)
{
struct efi_memory_map *map = efi.memmap;
void *p, *e;
if (!efi_enabled(EFI_MEMMAP)) {
pr_err_once("EFI_MEMMAP is not enabled.\n");
return -EINVAL;
}
if (!map) {
pr_err_once("efi.memmap is not set.\n");
return -EINVAL;
}
if (!out_md) {
pr_err_once("out_md is null.\n");
return -EINVAL;
}
if (WARN_ON_ONCE(!map->phys_map))
return -EINVAL;
if (WARN_ON_ONCE(map->nr_map == 0) || WARN_ON_ONCE(map->desc_size == 0))
return -EINVAL;
e = map->phys_map + map->nr_map * map->desc_size;
for (p = map->phys_map; p < e; p += map->desc_size) {
efi_memory_desc_t *md;
u64 size;
u64 end;
/*
* If a driver calls this after efi_free_boot_services,
* ->map will be NULL, and the target may also not be mapped.
* So just always get our own virtual map on the CPU.
*
*/
md = early_memremap((phys_addr_t)p, sizeof (*md));
if (!md) {
pr_err_once("early_memremap(%p, %zu) failed.\n",
p, sizeof (*md));
return -ENOMEM;
}
if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
md->type != EFI_BOOT_SERVICES_DATA &&
md->type != EFI_RUNTIME_SERVICES_DATA) {
early_memunmap(md, sizeof (*md));
continue;
}
size = md->num_pages << EFI_PAGE_SHIFT;
end = md->phys_addr + size;
if (phys_addr >= md->phys_addr && phys_addr < end) {
memcpy(out_md, md, sizeof(*out_md));
early_memunmap(md, sizeof (*md));
return 0;
}
early_memunmap(md, sizeof (*md));
}
pr_err_once("requested map not found.\n");
return -ENOENT;
}
/*
* Calculate the highest address of an efi memory descriptor.
*/
u64 __init efi_mem_desc_end(efi_memory_desc_t *md)
{
u64 size = md->num_pages << EFI_PAGE_SHIFT;
u64 end = md->phys_addr + size;
return end;
}
/*
* We can't ioremap data in EFI boot services RAM, because we've already mapped
* it as RAM. So, look it up in the existing EFI memory map instead. Only
* callable after efi_enter_virtual_mode and before efi_free_boot_services.
*/
void __iomem *efi_lookup_mapped_addr(u64 phys_addr)
{
struct efi_memory_map *map;
void *p;
map = efi.memmap;
if (!map)
return NULL;
if (WARN_ON(!map->map))
return NULL;
for (p = map->map; p < map->map_end; p += map->desc_size) {
efi_memory_desc_t *md = p;
u64 size = md->num_pages << EFI_PAGE_SHIFT;
u64 end = md->phys_addr + size;
if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
md->type != EFI_BOOT_SERVICES_CODE &&
md->type != EFI_BOOT_SERVICES_DATA)
continue;
if (!md->virt_addr)
continue;
if (phys_addr >= md->phys_addr && phys_addr < end) {
phys_addr += md->virt_addr - md->phys_addr;
return (__force void __iomem *)(unsigned long)phys_addr;
}
}
return NULL;
}
static __initdata efi_config_table_type_t common_tables[] = {
{ACPI_20_TABLE_GUID, "ACPI 2.0", &efi.acpi20},
{ACPI_TABLE_GUID, "ACPI", &efi.acpi},
{HCDP_TABLE_GUID, "HCDP", &efi.hcdp},
{MPS_TABLE_GUID, "MPS", &efi.mps},
{SAL_SYSTEM_TABLE_GUID, "SALsystab", &efi.sal_systab},
{SMBIOS_TABLE_GUID, "SMBIOS", &efi.smbios},
{SMBIOS3_TABLE_GUID, "SMBIOS 3.0", &efi.smbios3},
{UGA_IO_PROTOCOL_GUID, "UGA", &efi.uga},
{EFI_SYSTEM_RESOURCE_TABLE_GUID, "ESRT", &efi.esrt},
{NULL_GUID, NULL, NULL},
};
static __init int match_config_table(efi_guid_t *guid,
unsigned long table,
efi_config_table_type_t *table_types)
{
int i;
if (table_types) {
for (i = 0; efi_guidcmp(table_types[i].guid, NULL_GUID); i++) {
if (!efi_guidcmp(*guid, table_types[i].guid)) {
*(table_types[i].ptr) = table;
pr_cont(" %s=0x%lx ",
table_types[i].name, table);
return 1;
}
}
}
return 0;
}
int __init efi_config_parse_tables(void *config_tables, int count, int sz,
efi_config_table_type_t *arch_tables)
{
void *tablep;
int i;
tablep = config_tables;
pr_info("");
for (i = 0; i < count; i++) {
efi_guid_t guid;
unsigned long table;
if (efi_enabled(EFI_64BIT)) {
u64 table64;
guid = ((efi_config_table_64_t *)tablep)->guid;
table64 = ((efi_config_table_64_t *)tablep)->table;
table = table64;
#ifndef CONFIG_64BIT
if (table64 >> 32) {
pr_cont("\n");
pr_err("Table located above 4GB, disabling EFI.\n");
return -EINVAL;
}
#endif
} else {
guid = ((efi_config_table_32_t *)tablep)->guid;
table = ((efi_config_table_32_t *)tablep)->table;
}
if (!match_config_table(&guid, table, common_tables))
match_config_table(&guid, table, arch_tables);
tablep += sz;
}
pr_cont("\n");
set_bit(EFI_CONFIG_TABLES, &efi.flags);
return 0;
}
int __init efi_config_init(efi_config_table_type_t *arch_tables)
{
void *config_tables;
int sz, ret;
if (efi_enabled(EFI_64BIT))
sz = sizeof(efi_config_table_64_t);
else
sz = sizeof(efi_config_table_32_t);
/*
* Let's see what config tables the firmware passed to us.
*/
config_tables = early_memremap(efi.systab->tables,
efi.systab->nr_tables * sz);
if (config_tables == NULL) {
pr_err("Could not map Configuration table!\n");
return -ENOMEM;
}
ret = efi_config_parse_tables(config_tables, efi.systab->nr_tables, sz,
arch_tables);
early_memunmap(config_tables, efi.systab->nr_tables * sz);
return ret;
}
#ifdef CONFIG_EFI_VARS_MODULE
static int __init efi_load_efivars(void)
{
struct platform_device *pdev;
if (!efi_enabled(EFI_RUNTIME_SERVICES))
return 0;
pdev = platform_device_register_simple("efivars", 0, NULL, 0);
return IS_ERR(pdev) ? PTR_ERR(pdev) : 0;
}
device_initcall(efi_load_efivars);
#endif
#ifdef CONFIG_EFI_PARAMS_FROM_FDT
#define UEFI_PARAM(name, prop, field) \
{ \
{ name }, \
{ prop }, \
offsetof(struct efi_fdt_params, field), \
FIELD_SIZEOF(struct efi_fdt_params, field) \
}
static __initdata struct {
const char name[32];
const char propname[32];
int offset;
int size;
} dt_params[] = {
UEFI_PARAM("System Table", "linux,uefi-system-table", system_table),
UEFI_PARAM("MemMap Address", "linux,uefi-mmap-start", mmap),
UEFI_PARAM("MemMap Size", "linux,uefi-mmap-size", mmap_size),
UEFI_PARAM("MemMap Desc. Size", "linux,uefi-mmap-desc-size", desc_size),
UEFI_PARAM("MemMap Desc. Version", "linux,uefi-mmap-desc-ver", desc_ver)
};
struct param_info {
int verbose;
int found;
void *params;
};
static int __init fdt_find_uefi_params(unsigned long node, const char *uname,
int depth, void *data)
{
struct param_info *info = data;
const void *prop;
void *dest;
u64 val;
int i, len;
if (depth != 1 || strcmp(uname, "chosen") != 0)
return 0;
for (i = 0; i < ARRAY_SIZE(dt_params); i++) {
prop = of_get_flat_dt_prop(node, dt_params[i].propname, &len);
if (!prop)
return 0;
dest = info->params + dt_params[i].offset;
info->found++;
val = of_read_number(prop, len / sizeof(u32));
if (dt_params[i].size == sizeof(u32))
*(u32 *)dest = val;
else
*(u64 *)dest = val;
if (info->verbose)
pr_info(" %s: 0x%0*llx\n", dt_params[i].name,
dt_params[i].size * 2, val);
}
return 1;
}
int __init efi_get_fdt_params(struct efi_fdt_params *params, int verbose)
{
struct param_info info;
int ret;
pr_info("Getting EFI parameters from FDT:\n");
info.verbose = verbose;
info.found = 0;
info.params = params;
ret = of_scan_flat_dt(fdt_find_uefi_params, &info);
if (!info.found)
pr_info("UEFI not found.\n");
else if (!ret)
pr_err("Can't find '%s' in device tree!\n",
dt_params[info.found].name);
return ret;
}
#endif /* CONFIG_EFI_PARAMS_FROM_FDT */
static __initdata char memory_type_name[][20] = {
"Reserved",
"Loader Code",
"Loader Data",
"Boot Code",
"Boot Data",
"Runtime Code",
"Runtime Data",
"Conventional Memory",
"Unusable Memory",
"ACPI Reclaim Memory",
"ACPI Memory NVS",
"Memory Mapped I/O",
"MMIO Port Space",
"PAL Code"
};
char * __init efi_md_typeattr_format(char *buf, size_t size,
const efi_memory_desc_t *md)
{
char *pos;
int type_len;
u64 attr;
pos = buf;
if (md->type >= ARRAY_SIZE(memory_type_name))
type_len = snprintf(pos, size, "[type=%u", md->type);
else
type_len = snprintf(pos, size, "[%-*s",
(int)(sizeof(memory_type_name[0]) - 1),
memory_type_name[md->type]);
if (type_len >= size)
return buf;
pos += type_len;
size -= type_len;
attr = md->attribute;
if (attr & ~(EFI_MEMORY_UC | EFI_MEMORY_WC | EFI_MEMORY_WT |
EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_WP |
EFI_MEMORY_RP | EFI_MEMORY_XP | EFI_MEMORY_RUNTIME))
snprintf(pos, size, "|attr=0x%016llx]",
(unsigned long long)attr);
else
snprintf(pos, size, "|%3s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]",
attr & EFI_MEMORY_RUNTIME ? "RUN" : "",
attr & EFI_MEMORY_XP ? "XP" : "",
attr & EFI_MEMORY_RP ? "RP" : "",
attr & EFI_MEMORY_WP ? "WP" : "",
attr & EFI_MEMORY_UCE ? "UCE" : "",
attr & EFI_MEMORY_WB ? "WB" : "",
attr & EFI_MEMORY_WT ? "WT" : "",
attr & EFI_MEMORY_WC ? "WC" : "",
attr & EFI_MEMORY_UC ? "UC" : "");
return buf;
}