mirror of
https://github.com/torvalds/linux.git
synced 2024-12-07 03:21:32 +00:00
5316a017d0
vsyscall detection code uses direct call to the beginning of
the vsyscall page:
asm ("call %P0" :: "i" (0xffffffffff600000))
It generates "call rel32" instruction but it is not relocated if binary
is PIE, so binary segfaults into random userspace address and vsyscall
page status is detected incorrectly.
Do more direct:
asm ("call *%rax")
which doesn't do need any relocaltions.
Mark g_vsyscall as volatile for a good measure, I didn't find instruction
setting it to 0. Now the code is obviously correct:
xor eax, eax
mov rdi, rbp
mov rsi, rbp
mov DWORD PTR [rip+0x2d15], eax # g_vsyscall = 0
mov rax, 0xffffffffff600000
call rax
mov DWORD PTR [rip+0x2d02], 1 # g_vsyscall = 1
mov eax, DWORD PTR ds:0xffffffffff600000
mov DWORD PTR [rip+0x2cf1], 2 # g_vsyscall = 2
mov edi, [rip+0x2ceb] # exit(g_vsyscall)
call exit
Note: fixed proc-empty-vm test oopses 5.19.0-28-generic kernel
but this is separate story.
Link: https://lkml.kernel.org/r/Y7h2xvzKLg36DSq8@p183
Fixes: 5bc73bb345
("proc: test how it holds up with mapping'less process")
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Reported-by: Mirsad Goran Todorovac <mirsad.todorovac@alu.unizg.hr>
Tested-by: Mirsad Goran Todorovac <mirsad.todorovac@alu.unizg.hr>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
503 lines
11 KiB
C
503 lines
11 KiB
C
/*
|
|
* Copyright (c) 2019 Alexey Dobriyan <adobriyan@gmail.com>
|
|
*
|
|
* Permission to use, copy, modify, and distribute this software for any
|
|
* purpose with or without fee is hereby granted, provided that the above
|
|
* copyright notice and this permission notice appear in all copies.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
*/
|
|
/*
|
|
* Fork and exec tiny 1 page executable which precisely controls its VM.
|
|
* Test /proc/$PID/maps
|
|
* Test /proc/$PID/smaps
|
|
* Test /proc/$PID/smaps_rollup
|
|
* Test /proc/$PID/statm
|
|
*
|
|
* FIXME require CONFIG_TMPFS which can be disabled
|
|
* FIXME test other values from "smaps"
|
|
* FIXME support other archs
|
|
*/
|
|
#undef NDEBUG
|
|
#include <assert.h>
|
|
#include <errno.h>
|
|
#include <sched.h>
|
|
#include <signal.h>
|
|
#include <stdbool.h>
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <sys/mount.h>
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/wait.h>
|
|
#include <fcntl.h>
|
|
#include <unistd.h>
|
|
#include <sys/syscall.h>
|
|
#include <sys/uio.h>
|
|
#include <linux/kdev_t.h>
|
|
#include <sys/time.h>
|
|
#include <sys/resource.h>
|
|
|
|
#include "../kselftest.h"
|
|
|
|
static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags)
|
|
{
|
|
return syscall(SYS_execveat, dirfd, pathname, argv, envp, flags);
|
|
}
|
|
|
|
static void make_private_tmp(void)
|
|
{
|
|
if (unshare(CLONE_NEWNS) == -1) {
|
|
if (errno == ENOSYS || errno == EPERM) {
|
|
exit(4);
|
|
}
|
|
exit(1);
|
|
}
|
|
if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) {
|
|
exit(1);
|
|
}
|
|
if (mount(NULL, "/tmp", "tmpfs", 0, NULL) == -1) {
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
static pid_t pid = -1;
|
|
static void ate(void)
|
|
{
|
|
if (pid > 0) {
|
|
kill(pid, SIGTERM);
|
|
}
|
|
}
|
|
|
|
struct elf64_hdr {
|
|
uint8_t e_ident[16];
|
|
uint16_t e_type;
|
|
uint16_t e_machine;
|
|
uint32_t e_version;
|
|
uint64_t e_entry;
|
|
uint64_t e_phoff;
|
|
uint64_t e_shoff;
|
|
uint32_t e_flags;
|
|
uint16_t e_ehsize;
|
|
uint16_t e_phentsize;
|
|
uint16_t e_phnum;
|
|
uint16_t e_shentsize;
|
|
uint16_t e_shnum;
|
|
uint16_t e_shstrndx;
|
|
};
|
|
|
|
struct elf64_phdr {
|
|
uint32_t p_type;
|
|
uint32_t p_flags;
|
|
uint64_t p_offset;
|
|
uint64_t p_vaddr;
|
|
uint64_t p_paddr;
|
|
uint64_t p_filesz;
|
|
uint64_t p_memsz;
|
|
uint64_t p_align;
|
|
};
|
|
|
|
#ifdef __x86_64__
|
|
#define PAGE_SIZE 4096
|
|
#define VADDR (1UL << 32)
|
|
#define MAPS_OFFSET 73
|
|
|
|
#define syscall 0x0f, 0x05
|
|
#define mov_rdi(x) \
|
|
0x48, 0xbf, \
|
|
(x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff, \
|
|
((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff
|
|
|
|
#define mov_rsi(x) \
|
|
0x48, 0xbe, \
|
|
(x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff, \
|
|
((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff
|
|
|
|
#define mov_eax(x) \
|
|
0xb8, (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff
|
|
|
|
static const uint8_t payload[] = {
|
|
/* Casually unmap stack, vDSO and everything else. */
|
|
/* munmap */
|
|
mov_rdi(VADDR + 4096),
|
|
mov_rsi((1ULL << 47) - 4096 - VADDR - 4096),
|
|
mov_eax(11),
|
|
syscall,
|
|
|
|
/* Ping parent. */
|
|
/* write(0, &c, 1); */
|
|
0x31, 0xff, /* xor edi, edi */
|
|
0x48, 0x8d, 0x35, 0x00, 0x00, 0x00, 0x00, /* lea rsi, [rip] */
|
|
0xba, 0x01, 0x00, 0x00, 0x00, /* mov edx, 1 */
|
|
mov_eax(1),
|
|
syscall,
|
|
|
|
/* 1: pause(); */
|
|
mov_eax(34),
|
|
syscall,
|
|
|
|
0xeb, 0xf7, /* jmp 1b */
|
|
};
|
|
|
|
static int make_exe(const uint8_t *payload, size_t len)
|
|
{
|
|
struct elf64_hdr h;
|
|
struct elf64_phdr ph;
|
|
|
|
struct iovec iov[3] = {
|
|
{&h, sizeof(struct elf64_hdr)},
|
|
{&ph, sizeof(struct elf64_phdr)},
|
|
{(void *)payload, len},
|
|
};
|
|
int fd, fd1;
|
|
char buf[64];
|
|
|
|
memset(&h, 0, sizeof(h));
|
|
h.e_ident[0] = 0x7f;
|
|
h.e_ident[1] = 'E';
|
|
h.e_ident[2] = 'L';
|
|
h.e_ident[3] = 'F';
|
|
h.e_ident[4] = 2;
|
|
h.e_ident[5] = 1;
|
|
h.e_ident[6] = 1;
|
|
h.e_ident[7] = 0;
|
|
h.e_type = 2;
|
|
h.e_machine = 0x3e;
|
|
h.e_version = 1;
|
|
h.e_entry = VADDR + sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr);
|
|
h.e_phoff = sizeof(struct elf64_hdr);
|
|
h.e_shoff = 0;
|
|
h.e_flags = 0;
|
|
h.e_ehsize = sizeof(struct elf64_hdr);
|
|
h.e_phentsize = sizeof(struct elf64_phdr);
|
|
h.e_phnum = 1;
|
|
h.e_shentsize = 0;
|
|
h.e_shnum = 0;
|
|
h.e_shstrndx = 0;
|
|
|
|
memset(&ph, 0, sizeof(ph));
|
|
ph.p_type = 1;
|
|
ph.p_flags = (1<<2)|1;
|
|
ph.p_offset = 0;
|
|
ph.p_vaddr = VADDR;
|
|
ph.p_paddr = 0;
|
|
ph.p_filesz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len;
|
|
ph.p_memsz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len;
|
|
ph.p_align = 4096;
|
|
|
|
fd = openat(AT_FDCWD, "/tmp", O_WRONLY|O_EXCL|O_TMPFILE, 0700);
|
|
if (fd == -1) {
|
|
exit(1);
|
|
}
|
|
|
|
if (writev(fd, iov, 3) != sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len) {
|
|
exit(1);
|
|
}
|
|
|
|
/* Avoid ETXTBSY on exec. */
|
|
snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd);
|
|
fd1 = open(buf, O_RDONLY|O_CLOEXEC);
|
|
close(fd);
|
|
|
|
return fd1;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* 0: vsyscall VMA doesn't exist vsyscall=none
|
|
* 1: vsyscall VMA is --xp vsyscall=xonly
|
|
* 2: vsyscall VMA is r-xp vsyscall=emulate
|
|
*/
|
|
static volatile int g_vsyscall;
|
|
static const char *str_vsyscall;
|
|
|
|
static const char str_vsyscall_0[] = "";
|
|
static const char str_vsyscall_1[] =
|
|
"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n";
|
|
static const char str_vsyscall_2[] =
|
|
"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n";
|
|
|
|
#ifdef __x86_64__
|
|
static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___)
|
|
{
|
|
_exit(g_vsyscall);
|
|
}
|
|
|
|
/*
|
|
* vsyscall page can't be unmapped, probe it directly.
|
|
*/
|
|
static void vsyscall(void)
|
|
{
|
|
pid_t pid;
|
|
int wstatus;
|
|
|
|
pid = fork();
|
|
if (pid < 0) {
|
|
fprintf(stderr, "fork, errno %d\n", errno);
|
|
exit(1);
|
|
}
|
|
if (pid == 0) {
|
|
struct rlimit rlim = {0, 0};
|
|
(void)setrlimit(RLIMIT_CORE, &rlim);
|
|
|
|
/* Hide "segfault at ffffffffff600000" messages. */
|
|
struct sigaction act;
|
|
memset(&act, 0, sizeof(struct sigaction));
|
|
act.sa_flags = SA_SIGINFO;
|
|
act.sa_sigaction = sigaction_SIGSEGV;
|
|
(void)sigaction(SIGSEGV, &act, NULL);
|
|
|
|
g_vsyscall = 0;
|
|
/* gettimeofday(NULL, NULL); */
|
|
uint64_t rax = 0xffffffffff600000;
|
|
asm volatile (
|
|
"call *%[rax]"
|
|
: [rax] "+a" (rax)
|
|
: "D" (NULL), "S" (NULL)
|
|
: "rcx", "r11"
|
|
);
|
|
|
|
g_vsyscall = 1;
|
|
*(volatile int *)0xffffffffff600000UL;
|
|
|
|
g_vsyscall = 2;
|
|
exit(g_vsyscall);
|
|
}
|
|
waitpid(pid, &wstatus, 0);
|
|
if (WIFEXITED(wstatus)) {
|
|
g_vsyscall = WEXITSTATUS(wstatus);
|
|
} else {
|
|
fprintf(stderr, "error: wstatus %08x\n", wstatus);
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
int main(void)
|
|
{
|
|
int pipefd[2];
|
|
int exec_fd;
|
|
|
|
vsyscall();
|
|
switch (g_vsyscall) {
|
|
case 0:
|
|
str_vsyscall = str_vsyscall_0;
|
|
break;
|
|
case 1:
|
|
str_vsyscall = str_vsyscall_1;
|
|
break;
|
|
case 2:
|
|
str_vsyscall = str_vsyscall_2;
|
|
break;
|
|
default:
|
|
abort();
|
|
}
|
|
|
|
atexit(ate);
|
|
|
|
make_private_tmp();
|
|
|
|
/* Reserve fd 0 for 1-byte pipe ping from child. */
|
|
close(0);
|
|
if (open("/", O_RDONLY|O_DIRECTORY|O_PATH) != 0) {
|
|
return 1;
|
|
}
|
|
|
|
exec_fd = make_exe(payload, sizeof(payload));
|
|
|
|
if (pipe(pipefd) == -1) {
|
|
return 1;
|
|
}
|
|
if (dup2(pipefd[1], 0) != 0) {
|
|
return 1;
|
|
}
|
|
|
|
pid = fork();
|
|
if (pid == -1) {
|
|
return 1;
|
|
}
|
|
if (pid == 0) {
|
|
sys_execveat(exec_fd, "", NULL, NULL, AT_EMPTY_PATH);
|
|
return 1;
|
|
}
|
|
|
|
char _;
|
|
if (read(pipefd[0], &_, 1) != 1) {
|
|
return 1;
|
|
}
|
|
|
|
struct stat st;
|
|
if (fstat(exec_fd, &st) == -1) {
|
|
return 1;
|
|
}
|
|
|
|
/* Generate "head -n1 /proc/$PID/maps" */
|
|
char buf0[256];
|
|
memset(buf0, ' ', sizeof(buf0));
|
|
int len = snprintf(buf0, sizeof(buf0),
|
|
"%08lx-%08lx r-xp 00000000 %02lx:%02lx %llu",
|
|
VADDR, VADDR + PAGE_SIZE,
|
|
MAJOR(st.st_dev), MINOR(st.st_dev),
|
|
(unsigned long long)st.st_ino);
|
|
buf0[len] = ' ';
|
|
snprintf(buf0 + MAPS_OFFSET, sizeof(buf0) - MAPS_OFFSET,
|
|
"/tmp/#%llu (deleted)\n", (unsigned long long)st.st_ino);
|
|
|
|
/* Test /proc/$PID/maps */
|
|
{
|
|
const size_t len = strlen(buf0) + strlen(str_vsyscall);
|
|
char buf[256];
|
|
ssize_t rv;
|
|
int fd;
|
|
|
|
snprintf(buf, sizeof(buf), "/proc/%u/maps", pid);
|
|
fd = open(buf, O_RDONLY);
|
|
if (fd == -1) {
|
|
return 1;
|
|
}
|
|
rv = read(fd, buf, sizeof(buf));
|
|
assert(rv == len);
|
|
assert(memcmp(buf, buf0, strlen(buf0)) == 0);
|
|
if (g_vsyscall > 0) {
|
|
assert(memcmp(buf + strlen(buf0), str_vsyscall, strlen(str_vsyscall)) == 0);
|
|
}
|
|
}
|
|
|
|
/* Test /proc/$PID/smaps */
|
|
{
|
|
char buf[4096];
|
|
ssize_t rv;
|
|
int fd;
|
|
|
|
snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid);
|
|
fd = open(buf, O_RDONLY);
|
|
if (fd == -1) {
|
|
return 1;
|
|
}
|
|
rv = read(fd, buf, sizeof(buf));
|
|
assert(0 <= rv && rv <= sizeof(buf));
|
|
|
|
assert(rv >= strlen(buf0));
|
|
assert(memcmp(buf, buf0, strlen(buf0)) == 0);
|
|
|
|
#define RSS1 "Rss: 4 kB\n"
|
|
#define RSS2 "Rss: 0 kB\n"
|
|
#define PSS1 "Pss: 4 kB\n"
|
|
#define PSS2 "Pss: 0 kB\n"
|
|
assert(memmem(buf, rv, RSS1, strlen(RSS1)) ||
|
|
memmem(buf, rv, RSS2, strlen(RSS2)));
|
|
assert(memmem(buf, rv, PSS1, strlen(PSS1)) ||
|
|
memmem(buf, rv, PSS2, strlen(PSS2)));
|
|
|
|
static const char *S[] = {
|
|
"Size: 4 kB\n",
|
|
"KernelPageSize: 4 kB\n",
|
|
"MMUPageSize: 4 kB\n",
|
|
"Anonymous: 0 kB\n",
|
|
"AnonHugePages: 0 kB\n",
|
|
"Shared_Hugetlb: 0 kB\n",
|
|
"Private_Hugetlb: 0 kB\n",
|
|
"Locked: 0 kB\n",
|
|
};
|
|
int i;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(S); i++) {
|
|
assert(memmem(buf, rv, S[i], strlen(S[i])));
|
|
}
|
|
|
|
if (g_vsyscall > 0) {
|
|
assert(memmem(buf, rv, str_vsyscall, strlen(str_vsyscall)));
|
|
}
|
|
}
|
|
|
|
/* Test /proc/$PID/smaps_rollup */
|
|
{
|
|
char bufr[256];
|
|
memset(bufr, ' ', sizeof(bufr));
|
|
len = snprintf(bufr, sizeof(bufr),
|
|
"%08lx-%08lx ---p 00000000 00:00 0",
|
|
VADDR, VADDR + PAGE_SIZE);
|
|
bufr[len] = ' ';
|
|
snprintf(bufr + MAPS_OFFSET, sizeof(bufr) - MAPS_OFFSET,
|
|
"[rollup]\n");
|
|
|
|
char buf[1024];
|
|
ssize_t rv;
|
|
int fd;
|
|
|
|
snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid);
|
|
fd = open(buf, O_RDONLY);
|
|
if (fd == -1) {
|
|
return 1;
|
|
}
|
|
rv = read(fd, buf, sizeof(buf));
|
|
assert(0 <= rv && rv <= sizeof(buf));
|
|
|
|
assert(rv >= strlen(bufr));
|
|
assert(memcmp(buf, bufr, strlen(bufr)) == 0);
|
|
|
|
assert(memmem(buf, rv, RSS1, strlen(RSS1)) ||
|
|
memmem(buf, rv, RSS2, strlen(RSS2)));
|
|
assert(memmem(buf, rv, PSS1, strlen(PSS1)) ||
|
|
memmem(buf, rv, PSS2, strlen(PSS2)));
|
|
|
|
static const char *S[] = {
|
|
"Anonymous: 0 kB\n",
|
|
"AnonHugePages: 0 kB\n",
|
|
"Shared_Hugetlb: 0 kB\n",
|
|
"Private_Hugetlb: 0 kB\n",
|
|
"Locked: 0 kB\n",
|
|
};
|
|
int i;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(S); i++) {
|
|
assert(memmem(buf, rv, S[i], strlen(S[i])));
|
|
}
|
|
}
|
|
|
|
/* Test /proc/$PID/statm */
|
|
{
|
|
char buf[64];
|
|
ssize_t rv;
|
|
int fd;
|
|
|
|
snprintf(buf, sizeof(buf), "/proc/%u/statm", pid);
|
|
fd = open(buf, O_RDONLY);
|
|
if (fd == -1) {
|
|
return 1;
|
|
}
|
|
rv = read(fd, buf, sizeof(buf));
|
|
assert(rv == 7 * 2);
|
|
|
|
assert(buf[0] == '1'); /* ->total_vm */
|
|
assert(buf[1] == ' ');
|
|
assert(buf[2] == '0' || buf[2] == '1'); /* rss */
|
|
assert(buf[3] == ' ');
|
|
assert(buf[4] == '0' || buf[2] == '1'); /* file rss */
|
|
assert(buf[5] == ' ');
|
|
assert(buf[6] == '1'); /* ELF executable segments */
|
|
assert(buf[7] == ' ');
|
|
assert(buf[8] == '0');
|
|
assert(buf[9] == ' ');
|
|
assert(buf[10] == '0'); /* ->data_vm + ->stack_vm */
|
|
assert(buf[11] == ' ');
|
|
assert(buf[12] == '0');
|
|
assert(buf[13] == '\n');
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
#else
|
|
int main(void)
|
|
{
|
|
return 4;
|
|
}
|
|
#endif
|