e754aedc26
I've had this code for a while, but never submitted it upstream. Now that Skylake hardware is out in the wild, folks can actually run this for real. It tests the following: 1. The MPX hardware is enabled by the kernel and doing what it is supposed to 2. The MPX management code is present and enabled in the kernel 3. MPX Signal handling 4. The MPX bounds table population code (on-demand population) 5. The MPX bounds table unmapping code (kernel-initiated freeing when unused) This has also caught bugs in the XSAVE code because MPX state is saved/restored with XSAVE. I'm submitting it now because it would have caught the recent issues with the compat_siginfo code not being properly augmented when new siginfo state is added. Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Dave Hansen <dave@sr71.net> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Shuah Khan <shuahkh@osg.samsung.com> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/20160608172535.5B40B0EE@viggo.jf.intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
499 lines
12 KiB
C
499 lines
12 KiB
C
/*
|
|
* Written by Dave Hansen <dave.hansen@intel.com>
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
#include <sys/types.h>
|
|
#include <unistd.h>
|
|
#include <stdio.h>
|
|
#include <errno.h>
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
#include <unistd.h>
|
|
#include <sys/mman.h>
|
|
#include <string.h>
|
|
#include <fcntl.h>
|
|
#include "mpx-debug.h"
|
|
#include "mpx-mm.h"
|
|
#include "mpx-hw.h"
|
|
|
|
unsigned long bounds_dir_global;
|
|
|
|
#define mpx_dig_abort() __mpx_dig_abort(__FILE__, __func__, __LINE__)
|
|
static void inline __mpx_dig_abort(const char *file, const char *func, int line)
|
|
{
|
|
fprintf(stderr, "MPX dig abort @ %s::%d in %s()\n", file, line, func);
|
|
printf("MPX dig abort @ %s::%d in %s()\n", file, line, func);
|
|
abort();
|
|
}
|
|
|
|
/*
|
|
* run like this (BDIR finds the probably bounds directory):
|
|
*
|
|
* BDIR="$(cat /proc/$pid/smaps | grep -B1 2097152 \
|
|
* | head -1 | awk -F- '{print $1}')";
|
|
* ./mpx-dig $pid 0x$BDIR
|
|
*
|
|
* NOTE:
|
|
* assumes that the only 2097152-kb VMA is the bounds dir
|
|
*/
|
|
|
|
long nr_incore(void *ptr, unsigned long size_bytes)
|
|
{
|
|
int i;
|
|
long ret = 0;
|
|
long vec_len = size_bytes / PAGE_SIZE;
|
|
unsigned char *vec = malloc(vec_len);
|
|
int incore_ret;
|
|
|
|
if (!vec)
|
|
mpx_dig_abort();
|
|
|
|
incore_ret = mincore(ptr, size_bytes, vec);
|
|
if (incore_ret) {
|
|
printf("mincore ret: %d\n", incore_ret);
|
|
perror("mincore");
|
|
mpx_dig_abort();
|
|
}
|
|
for (i = 0; i < vec_len; i++)
|
|
ret += vec[i];
|
|
free(vec);
|
|
return ret;
|
|
}
|
|
|
|
int open_proc(int pid, char *file)
|
|
{
|
|
static char buf[100];
|
|
int fd;
|
|
|
|
snprintf(&buf[0], sizeof(buf), "/proc/%d/%s", pid, file);
|
|
fd = open(&buf[0], O_RDONLY);
|
|
if (fd < 0)
|
|
perror(buf);
|
|
|
|
return fd;
|
|
}
|
|
|
|
struct vaddr_range {
|
|
unsigned long start;
|
|
unsigned long end;
|
|
};
|
|
struct vaddr_range *ranges;
|
|
int nr_ranges_allocated;
|
|
int nr_ranges_populated;
|
|
int last_range = -1;
|
|
|
|
int __pid_load_vaddrs(int pid)
|
|
{
|
|
int ret = 0;
|
|
int proc_maps_fd = open_proc(pid, "maps");
|
|
char linebuf[10000];
|
|
unsigned long start;
|
|
unsigned long end;
|
|
char rest[1000];
|
|
FILE *f = fdopen(proc_maps_fd, "r");
|
|
|
|
if (!f)
|
|
mpx_dig_abort();
|
|
nr_ranges_populated = 0;
|
|
while (!feof(f)) {
|
|
char *readret = fgets(linebuf, sizeof(linebuf), f);
|
|
int parsed;
|
|
|
|
if (readret == NULL) {
|
|
if (feof(f))
|
|
break;
|
|
mpx_dig_abort();
|
|
}
|
|
|
|
parsed = sscanf(linebuf, "%lx-%lx%s", &start, &end, rest);
|
|
if (parsed != 3)
|
|
mpx_dig_abort();
|
|
|
|
dprintf4("result[%d]: %lx-%lx<->%s\n", parsed, start, end, rest);
|
|
if (nr_ranges_populated >= nr_ranges_allocated) {
|
|
ret = -E2BIG;
|
|
break;
|
|
}
|
|
ranges[nr_ranges_populated].start = start;
|
|
ranges[nr_ranges_populated].end = end;
|
|
nr_ranges_populated++;
|
|
}
|
|
last_range = -1;
|
|
fclose(f);
|
|
close(proc_maps_fd);
|
|
return ret;
|
|
}
|
|
|
|
int pid_load_vaddrs(int pid)
|
|
{
|
|
int ret;
|
|
|
|
dprintf2("%s(%d)\n", __func__, pid);
|
|
if (!ranges) {
|
|
nr_ranges_allocated = 4;
|
|
ranges = malloc(nr_ranges_allocated * sizeof(ranges[0]));
|
|
dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__, pid,
|
|
nr_ranges_allocated, ranges);
|
|
assert(ranges != NULL);
|
|
}
|
|
do {
|
|
ret = __pid_load_vaddrs(pid);
|
|
if (!ret)
|
|
break;
|
|
if (ret == -E2BIG) {
|
|
dprintf2("%s(%d) need to realloc\n", __func__, pid);
|
|
nr_ranges_allocated *= 2;
|
|
ranges = realloc(ranges,
|
|
nr_ranges_allocated * sizeof(ranges[0]));
|
|
dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__,
|
|
pid, nr_ranges_allocated, ranges);
|
|
assert(ranges != NULL);
|
|
dprintf1("reallocating to hold %d ranges\n", nr_ranges_allocated);
|
|
}
|
|
} while (1);
|
|
|
|
dprintf2("%s(%d) done\n", __func__, pid);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static inline int vaddr_in_range(unsigned long vaddr, struct vaddr_range *r)
|
|
{
|
|
if (vaddr < r->start)
|
|
return 0;
|
|
if (vaddr >= r->end)
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
static inline int vaddr_mapped_by_range(unsigned long vaddr)
|
|
{
|
|
int i;
|
|
|
|
if (last_range > 0 && vaddr_in_range(vaddr, &ranges[last_range]))
|
|
return 1;
|
|
|
|
for (i = 0; i < nr_ranges_populated; i++) {
|
|
struct vaddr_range *r = &ranges[i];
|
|
|
|
if (vaddr_in_range(vaddr, r))
|
|
continue;
|
|
last_range = i;
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
const int bt_entry_size_bytes = sizeof(unsigned long) * 4;
|
|
|
|
void *read_bounds_table_into_buf(unsigned long table_vaddr)
|
|
{
|
|
#ifdef MPX_DIG_STANDALONE
|
|
static char bt_buf[MPX_BOUNDS_TABLE_SIZE_BYTES];
|
|
off_t seek_ret = lseek(fd, table_vaddr, SEEK_SET);
|
|
if (seek_ret != table_vaddr)
|
|
mpx_dig_abort();
|
|
|
|
int read_ret = read(fd, &bt_buf, sizeof(bt_buf));
|
|
if (read_ret != sizeof(bt_buf))
|
|
mpx_dig_abort();
|
|
return &bt_buf;
|
|
#else
|
|
return (void *)table_vaddr;
|
|
#endif
|
|
}
|
|
|
|
int dump_table(unsigned long table_vaddr, unsigned long base_controlled_vaddr,
|
|
unsigned long bde_vaddr)
|
|
{
|
|
unsigned long offset_inside_bt;
|
|
int nr_entries = 0;
|
|
int do_abort = 0;
|
|
char *bt_buf;
|
|
|
|
dprintf3("%s() base_controlled_vaddr: 0x%012lx bde_vaddr: 0x%012lx\n",
|
|
__func__, base_controlled_vaddr, bde_vaddr);
|
|
|
|
bt_buf = read_bounds_table_into_buf(table_vaddr);
|
|
|
|
dprintf4("%s() read done\n", __func__);
|
|
|
|
for (offset_inside_bt = 0;
|
|
offset_inside_bt < MPX_BOUNDS_TABLE_SIZE_BYTES;
|
|
offset_inside_bt += bt_entry_size_bytes) {
|
|
unsigned long bt_entry_index;
|
|
unsigned long bt_entry_controls;
|
|
unsigned long this_bt_entry_for_vaddr;
|
|
unsigned long *bt_entry_buf;
|
|
int i;
|
|
|
|
dprintf4("%s() offset_inside_bt: 0x%lx of 0x%llx\n", __func__,
|
|
offset_inside_bt, MPX_BOUNDS_TABLE_SIZE_BYTES);
|
|
bt_entry_buf = (void *)&bt_buf[offset_inside_bt];
|
|
if (!bt_buf) {
|
|
printf("null bt_buf\n");
|
|
mpx_dig_abort();
|
|
}
|
|
if (!bt_entry_buf) {
|
|
printf("null bt_entry_buf\n");
|
|
mpx_dig_abort();
|
|
}
|
|
dprintf4("%s() reading *bt_entry_buf @ %p\n", __func__,
|
|
bt_entry_buf);
|
|
if (!bt_entry_buf[0] &&
|
|
!bt_entry_buf[1] &&
|
|
!bt_entry_buf[2] &&
|
|
!bt_entry_buf[3])
|
|
continue;
|
|
|
|
nr_entries++;
|
|
|
|
bt_entry_index = offset_inside_bt/bt_entry_size_bytes;
|
|
bt_entry_controls = sizeof(void *);
|
|
this_bt_entry_for_vaddr =
|
|
base_controlled_vaddr + bt_entry_index*bt_entry_controls;
|
|
/*
|
|
* We sign extend vaddr bits 48->63 which effectively
|
|
* creates a hole in the virtual address space.
|
|
* This calculation corrects for the hole.
|
|
*/
|
|
if (this_bt_entry_for_vaddr > 0x00007fffffffffffUL)
|
|
this_bt_entry_for_vaddr |= 0xffff800000000000;
|
|
|
|
if (!vaddr_mapped_by_range(this_bt_entry_for_vaddr)) {
|
|
printf("bt_entry_buf: %p\n", bt_entry_buf);
|
|
printf("there is a bte for %lx but no mapping\n",
|
|
this_bt_entry_for_vaddr);
|
|
printf(" bde vaddr: %016lx\n", bde_vaddr);
|
|
printf("base_controlled_vaddr: %016lx\n", base_controlled_vaddr);
|
|
printf(" table_vaddr: %016lx\n", table_vaddr);
|
|
printf(" entry vaddr: %016lx @ offset %lx\n",
|
|
table_vaddr + offset_inside_bt, offset_inside_bt);
|
|
do_abort = 1;
|
|
mpx_dig_abort();
|
|
}
|
|
if (DEBUG_LEVEL < 4)
|
|
continue;
|
|
|
|
printf("table entry[%lx]: ", offset_inside_bt);
|
|
for (i = 0; i < bt_entry_size_bytes; i += sizeof(unsigned long))
|
|
printf("0x%016lx ", bt_entry_buf[i]);
|
|
printf("\n");
|
|
}
|
|
if (do_abort)
|
|
mpx_dig_abort();
|
|
dprintf4("%s() done\n", __func__);
|
|
return nr_entries;
|
|
}
|
|
|
|
int search_bd_buf(char *buf, int len_bytes, unsigned long bd_offset_bytes,
|
|
int *nr_populated_bdes)
|
|
{
|
|
unsigned long i;
|
|
int total_entries = 0;
|
|
|
|
dprintf3("%s(%p, %x, %lx, ...) buf end: %p\n", __func__, buf,
|
|
len_bytes, bd_offset_bytes, buf + len_bytes);
|
|
|
|
for (i = 0; i < len_bytes; i += sizeof(unsigned long)) {
|
|
unsigned long bd_index = (bd_offset_bytes + i) / sizeof(unsigned long);
|
|
unsigned long *bounds_dir_entry_ptr = (unsigned long *)&buf[i];
|
|
unsigned long bounds_dir_entry;
|
|
unsigned long bd_for_vaddr;
|
|
unsigned long bt_start;
|
|
unsigned long bt_tail;
|
|
int nr_entries;
|
|
|
|
dprintf4("%s() loop i: %ld bounds_dir_entry_ptr: %p\n", __func__, i,
|
|
bounds_dir_entry_ptr);
|
|
|
|
bounds_dir_entry = *bounds_dir_entry_ptr;
|
|
if (!bounds_dir_entry) {
|
|
dprintf4("no bounds dir at index 0x%lx / 0x%lx "
|
|
"start at offset:%lx %lx\n", bd_index, bd_index,
|
|
bd_offset_bytes, i);
|
|
continue;
|
|
}
|
|
dprintf3("found bounds_dir_entry: 0x%lx @ "
|
|
"index 0x%lx buf ptr: %p\n", bounds_dir_entry, i,
|
|
&buf[i]);
|
|
/* mask off the enable bit: */
|
|
bounds_dir_entry &= ~0x1;
|
|
(*nr_populated_bdes)++;
|
|
dprintf4("nr_populated_bdes: %p\n", nr_populated_bdes);
|
|
dprintf4("*nr_populated_bdes: %d\n", *nr_populated_bdes);
|
|
|
|
bt_start = bounds_dir_entry;
|
|
bt_tail = bounds_dir_entry + MPX_BOUNDS_TABLE_SIZE_BYTES - 1;
|
|
if (!vaddr_mapped_by_range(bt_start)) {
|
|
printf("bounds directory 0x%lx points to nowhere\n",
|
|
bounds_dir_entry);
|
|
mpx_dig_abort();
|
|
}
|
|
if (!vaddr_mapped_by_range(bt_tail)) {
|
|
printf("bounds directory end 0x%lx points to nowhere\n",
|
|
bt_tail);
|
|
mpx_dig_abort();
|
|
}
|
|
/*
|
|
* Each bounds directory entry controls 1MB of virtual address
|
|
* space. This variable is the virtual address in the process
|
|
* of the beginning of the area controlled by this bounds_dir.
|
|
*/
|
|
bd_for_vaddr = bd_index * (1UL<<20);
|
|
|
|
nr_entries = dump_table(bounds_dir_entry, bd_for_vaddr,
|
|
bounds_dir_global+bd_offset_bytes+i);
|
|
total_entries += nr_entries;
|
|
dprintf5("dir entry[%4ld @ %p]: 0x%lx %6d entries "
|
|
"total this buf: %7d bd_for_vaddrs: 0x%lx -> 0x%lx\n",
|
|
bd_index, buf+i,
|
|
bounds_dir_entry, nr_entries, total_entries,
|
|
bd_for_vaddr, bd_for_vaddr + (1UL<<20));
|
|
}
|
|
dprintf3("%s(%p, %x, %lx, ...) done\n", __func__, buf, len_bytes,
|
|
bd_offset_bytes);
|
|
return total_entries;
|
|
}
|
|
|
|
int proc_pid_mem_fd = -1;
|
|
|
|
void *fill_bounds_dir_buf_other(long byte_offset_inside_bounds_dir,
|
|
long buffer_size_bytes, void *buffer)
|
|
{
|
|
unsigned long seekto = bounds_dir_global + byte_offset_inside_bounds_dir;
|
|
int read_ret;
|
|
off_t seek_ret = lseek(proc_pid_mem_fd, seekto, SEEK_SET);
|
|
|
|
if (seek_ret != seekto)
|
|
mpx_dig_abort();
|
|
|
|
read_ret = read(proc_pid_mem_fd, buffer, buffer_size_bytes);
|
|
/* there shouldn't practically be short reads of /proc/$pid/mem */
|
|
if (read_ret != buffer_size_bytes)
|
|
mpx_dig_abort();
|
|
|
|
return buffer;
|
|
}
|
|
void *fill_bounds_dir_buf_self(long byte_offset_inside_bounds_dir,
|
|
long buffer_size_bytes, void *buffer)
|
|
|
|
{
|
|
unsigned char vec[buffer_size_bytes / PAGE_SIZE];
|
|
char *dig_bounds_dir_ptr =
|
|
(void *)(bounds_dir_global + byte_offset_inside_bounds_dir);
|
|
/*
|
|
* use mincore() to quickly find the areas of the bounds directory
|
|
* that have memory and thus will be worth scanning.
|
|
*/
|
|
int incore_ret;
|
|
|
|
int incore = 0;
|
|
int i;
|
|
|
|
dprintf4("%s() dig_bounds_dir_ptr: %p\n", __func__, dig_bounds_dir_ptr);
|
|
|
|
incore_ret = mincore(dig_bounds_dir_ptr, buffer_size_bytes, &vec[0]);
|
|
if (incore_ret) {
|
|
printf("mincore ret: %d\n", incore_ret);
|
|
perror("mincore");
|
|
mpx_dig_abort();
|
|
}
|
|
for (i = 0; i < sizeof(vec); i++)
|
|
incore += vec[i];
|
|
dprintf4("%s() total incore: %d\n", __func__, incore);
|
|
if (!incore)
|
|
return NULL;
|
|
dprintf3("%s() total incore: %d\n", __func__, incore);
|
|
return dig_bounds_dir_ptr;
|
|
}
|
|
|
|
int inspect_pid(int pid)
|
|
{
|
|
static int dig_nr;
|
|
long offset_inside_bounds_dir;
|
|
char bounds_dir_buf[sizeof(unsigned long) * (1UL << 15)];
|
|
char *dig_bounds_dir_ptr;
|
|
int total_entries = 0;
|
|
int nr_populated_bdes = 0;
|
|
int inspect_self;
|
|
|
|
if (getpid() == pid) {
|
|
dprintf4("inspecting self\n");
|
|
inspect_self = 1;
|
|
} else {
|
|
dprintf4("inspecting pid %d\n", pid);
|
|
mpx_dig_abort();
|
|
}
|
|
|
|
for (offset_inside_bounds_dir = 0;
|
|
offset_inside_bounds_dir < MPX_BOUNDS_TABLE_SIZE_BYTES;
|
|
offset_inside_bounds_dir += sizeof(bounds_dir_buf)) {
|
|
static int bufs_skipped;
|
|
int this_entries;
|
|
|
|
if (inspect_self) {
|
|
dig_bounds_dir_ptr =
|
|
fill_bounds_dir_buf_self(offset_inside_bounds_dir,
|
|
sizeof(bounds_dir_buf),
|
|
&bounds_dir_buf[0]);
|
|
} else {
|
|
dig_bounds_dir_ptr =
|
|
fill_bounds_dir_buf_other(offset_inside_bounds_dir,
|
|
sizeof(bounds_dir_buf),
|
|
&bounds_dir_buf[0]);
|
|
}
|
|
if (!dig_bounds_dir_ptr) {
|
|
bufs_skipped++;
|
|
continue;
|
|
}
|
|
this_entries = search_bd_buf(dig_bounds_dir_ptr,
|
|
sizeof(bounds_dir_buf),
|
|
offset_inside_bounds_dir,
|
|
&nr_populated_bdes);
|
|
total_entries += this_entries;
|
|
}
|
|
printf("mpx dig (%3d) complete, SUCCESS (%8d / %4d)\n", ++dig_nr,
|
|
total_entries, nr_populated_bdes);
|
|
return total_entries + nr_populated_bdes;
|
|
}
|
|
|
|
#ifdef MPX_DIG_REMOTE
|
|
int main(int argc, char **argv)
|
|
{
|
|
int err;
|
|
char *c;
|
|
unsigned long bounds_dir_entry;
|
|
int pid;
|
|
|
|
printf("mpx-dig starting...\n");
|
|
err = sscanf(argv[1], "%d", &pid);
|
|
printf("parsing: '%s', err: %d\n", argv[1], err);
|
|
if (err != 1)
|
|
mpx_dig_abort();
|
|
|
|
err = sscanf(argv[2], "%lx", &bounds_dir_global);
|
|
printf("parsing: '%s': %d\n", argv[2], err);
|
|
if (err != 1)
|
|
mpx_dig_abort();
|
|
|
|
proc_pid_mem_fd = open_proc(pid, "mem");
|
|
if (proc_pid_mem_fd < 0)
|
|
mpx_dig_abort();
|
|
|
|
inspect_pid(pid);
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
long inspect_me(struct mpx_bounds_dir *bounds_dir)
|
|
{
|
|
int pid = getpid();
|
|
|
|
pid_load_vaddrs(pid);
|
|
bounds_dir_global = (unsigned long)bounds_dir;
|
|
dprintf4("enter %s() bounds dir: %p\n", __func__, bounds_dir);
|
|
return inspect_pid(pid);
|
|
}
|