c972cc60c2
Add a special module area on top of the vmalloc area, which may be only used for modules and bpf jit generated code. This makes sure that inter module branches will always happen without a trampoline and in addition having all the code within a 2GB frame is branch prediction unit friendly. Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
227 lines
5.7 KiB
C
227 lines
5.7 KiB
C
#include <linux/seq_file.h>
|
|
#include <linux/debugfs.h>
|
|
#include <linux/module.h>
|
|
#include <linux/mm.h>
|
|
#include <asm/sections.h>
|
|
#include <asm/pgtable.h>
|
|
|
|
static unsigned long max_addr;
|
|
|
|
struct addr_marker {
|
|
unsigned long start_address;
|
|
const char *name;
|
|
};
|
|
|
|
enum address_markers_idx {
|
|
IDENTITY_NR = 0,
|
|
KERNEL_START_NR,
|
|
KERNEL_END_NR,
|
|
VMEMMAP_NR,
|
|
VMALLOC_NR,
|
|
#ifdef CONFIG_64BIT
|
|
MODULES_NR,
|
|
#endif
|
|
};
|
|
|
|
static struct addr_marker address_markers[] = {
|
|
[IDENTITY_NR] = {0, "Identity Mapping"},
|
|
[KERNEL_START_NR] = {(unsigned long)&_stext, "Kernel Image Start"},
|
|
[KERNEL_END_NR] = {(unsigned long)&_end, "Kernel Image End"},
|
|
[VMEMMAP_NR] = {0, "vmemmap Area"},
|
|
[VMALLOC_NR] = {0, "vmalloc Area"},
|
|
#ifdef CONFIG_64BIT
|
|
[MODULES_NR] = {0, "Modules Area"},
|
|
#endif
|
|
{ -1, NULL }
|
|
};
|
|
|
|
struct pg_state {
|
|
int level;
|
|
unsigned int current_prot;
|
|
unsigned long start_address;
|
|
unsigned long current_address;
|
|
const struct addr_marker *marker;
|
|
};
|
|
|
|
static void print_prot(struct seq_file *m, unsigned int pr, int level)
|
|
{
|
|
static const char * const level_name[] =
|
|
{ "ASCE", "PGD", "PUD", "PMD", "PTE" };
|
|
|
|
seq_printf(m, "%s ", level_name[level]);
|
|
if (pr & _PAGE_INVALID)
|
|
seq_printf(m, "I\n");
|
|
else
|
|
seq_printf(m, "%s\n", pr & _PAGE_RO ? "RO" : "RW");
|
|
}
|
|
|
|
static void note_page(struct seq_file *m, struct pg_state *st,
|
|
unsigned int new_prot, int level)
|
|
{
|
|
static const char units[] = "KMGTPE";
|
|
int width = sizeof(unsigned long) * 2;
|
|
const char *unit = units;
|
|
unsigned int prot, cur;
|
|
unsigned long delta;
|
|
|
|
/*
|
|
* If we have a "break" in the series, we need to flush the state
|
|
* that we have now. "break" is either changing perms, levels or
|
|
* address space marker.
|
|
*/
|
|
prot = new_prot;
|
|
cur = st->current_prot;
|
|
|
|
if (!st->level) {
|
|
/* First entry */
|
|
st->current_prot = new_prot;
|
|
st->level = level;
|
|
st->marker = address_markers;
|
|
seq_printf(m, "---[ %s ]---\n", st->marker->name);
|
|
} else if (prot != cur || level != st->level ||
|
|
st->current_address >= st->marker[1].start_address) {
|
|
/* Print the actual finished series */
|
|
seq_printf(m, "0x%0*lx-0x%0*lx",
|
|
width, st->start_address,
|
|
width, st->current_address);
|
|
delta = (st->current_address - st->start_address) >> 10;
|
|
while (!(delta & 0x3ff) && unit[1]) {
|
|
delta >>= 10;
|
|
unit++;
|
|
}
|
|
seq_printf(m, "%9lu%c ", delta, *unit);
|
|
print_prot(m, st->current_prot, st->level);
|
|
if (st->current_address >= st->marker[1].start_address) {
|
|
st->marker++;
|
|
seq_printf(m, "---[ %s ]---\n", st->marker->name);
|
|
}
|
|
st->start_address = st->current_address;
|
|
st->current_prot = new_prot;
|
|
st->level = level;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* The actual page table walker functions. In order to keep the implementation
|
|
* of print_prot() short, we only check and pass _PAGE_INVALID and _PAGE_RO
|
|
* flags to note_page() if a region, segment or page table entry is invalid or
|
|
* read-only.
|
|
* After all it's just a hint that the current level being walked contains an
|
|
* invalid or read-only entry.
|
|
*/
|
|
static void walk_pte_level(struct seq_file *m, struct pg_state *st,
|
|
pmd_t *pmd, unsigned long addr)
|
|
{
|
|
unsigned int prot;
|
|
pte_t *pte;
|
|
int i;
|
|
|
|
for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) {
|
|
st->current_address = addr;
|
|
pte = pte_offset_kernel(pmd, addr);
|
|
prot = pte_val(*pte) & (_PAGE_RO | _PAGE_INVALID);
|
|
note_page(m, st, prot, 4);
|
|
addr += PAGE_SIZE;
|
|
}
|
|
}
|
|
|
|
static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
|
|
pud_t *pud, unsigned long addr)
|
|
{
|
|
unsigned int prot;
|
|
pmd_t *pmd;
|
|
int i;
|
|
|
|
for (i = 0; i < PTRS_PER_PMD && addr < max_addr; i++) {
|
|
st->current_address = addr;
|
|
pmd = pmd_offset(pud, addr);
|
|
if (!pmd_none(*pmd)) {
|
|
if (pmd_large(*pmd)) {
|
|
prot = pmd_val(*pmd) & _SEGMENT_ENTRY_RO;
|
|
note_page(m, st, prot, 3);
|
|
} else
|
|
walk_pte_level(m, st, pmd, addr);
|
|
} else
|
|
note_page(m, st, _PAGE_INVALID, 3);
|
|
addr += PMD_SIZE;
|
|
}
|
|
}
|
|
|
|
static void walk_pud_level(struct seq_file *m, struct pg_state *st,
|
|
pgd_t *pgd, unsigned long addr)
|
|
{
|
|
pud_t *pud;
|
|
int i;
|
|
|
|
for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) {
|
|
st->current_address = addr;
|
|
pud = pud_offset(pgd, addr);
|
|
if (!pud_none(*pud))
|
|
walk_pmd_level(m, st, pud, addr);
|
|
else
|
|
note_page(m, st, _PAGE_INVALID, 2);
|
|
addr += PUD_SIZE;
|
|
}
|
|
}
|
|
|
|
static void walk_pgd_level(struct seq_file *m)
|
|
{
|
|
unsigned long addr = 0;
|
|
struct pg_state st;
|
|
pgd_t *pgd;
|
|
int i;
|
|
|
|
memset(&st, 0, sizeof(st));
|
|
for (i = 0; i < PTRS_PER_PGD && addr < max_addr; i++) {
|
|
st.current_address = addr;
|
|
pgd = pgd_offset_k(addr);
|
|
if (!pgd_none(*pgd))
|
|
walk_pud_level(m, &st, pgd, addr);
|
|
else
|
|
note_page(m, &st, _PAGE_INVALID, 1);
|
|
addr += PGDIR_SIZE;
|
|
}
|
|
/* Flush out the last page */
|
|
st.current_address = max_addr;
|
|
note_page(m, &st, 0, 0);
|
|
}
|
|
|
|
static int ptdump_show(struct seq_file *m, void *v)
|
|
{
|
|
walk_pgd_level(m);
|
|
return 0;
|
|
}
|
|
|
|
static int ptdump_open(struct inode *inode, struct file *filp)
|
|
{
|
|
return single_open(filp, ptdump_show, NULL);
|
|
}
|
|
|
|
static const struct file_operations ptdump_fops = {
|
|
.open = ptdump_open,
|
|
.read = seq_read,
|
|
.llseek = seq_lseek,
|
|
.release = single_release,
|
|
};
|
|
|
|
static int pt_dump_init(void)
|
|
{
|
|
/*
|
|
* Figure out the maximum virtual address being accessible with the
|
|
* kernel ASCE. We need this to keep the page table walker functions
|
|
* from accessing non-existent entries.
|
|
*/
|
|
#ifdef CONFIG_32BIT
|
|
max_addr = 1UL << 31;
|
|
#else
|
|
max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2;
|
|
max_addr = 1UL << (max_addr * 11 + 31);
|
|
address_markers[MODULES_NR].start_address = MODULES_VADDR;
|
|
#endif
|
|
address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap;
|
|
address_markers[VMALLOC_NR].start_address = VMALLOC_START;
|
|
debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
|
|
return 0;
|
|
}
|
|
device_initcall(pt_dump_init);
|