a80313ff91
With a relocatable kernel that could reside at any place in memory, code and data that has to stay below 2 GB needs special handling. This patch introduces .dma sections for such text, data and ex_table. The sections will be part of the decompressor kernel, so they will not be relocated and stay below 2 GB. Their location is passed over to the decompressed / relocated kernel via the .boot.preserved.data section. The duald and aste for control register setup also need to stay below 2 GB, so move the setup code from arch/s390/kernel/head64.S to arch/s390/boot/head.S. The duct and linkage_stack could reside above 2 GB, but their content has to be preserved for the decompresed kernel, so they are also moved into the .dma section. The start and end address of the .dma sections is added to vmcoreinfo, for crash support, to help debugging in case the kernel crashed there. Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com> Reviewed-by: Philipp Rudo <prudo@linux.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
445 lines
10 KiB
C
445 lines
10 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright IBM Corp. 2006
|
|
* Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
|
|
*/
|
|
|
|
#include <linux/memblock.h>
|
|
#include <linux/pfn.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/init.h>
|
|
#include <linux/list.h>
|
|
#include <linux/hugetlb.h>
|
|
#include <linux/slab.h>
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/pgalloc.h>
|
|
#include <asm/pgtable.h>
|
|
#include <asm/setup.h>
|
|
#include <asm/tlbflush.h>
|
|
#include <asm/sections.h>
|
|
#include <asm/set_memory.h>
|
|
|
|
static DEFINE_MUTEX(vmem_mutex);
|
|
|
|
struct memory_segment {
|
|
struct list_head list;
|
|
unsigned long start;
|
|
unsigned long size;
|
|
};
|
|
|
|
static LIST_HEAD(mem_segs);
|
|
|
|
static void __ref *vmem_alloc_pages(unsigned int order)
|
|
{
|
|
unsigned long size = PAGE_SIZE << order;
|
|
|
|
if (slab_is_available())
|
|
return (void *)__get_free_pages(GFP_KERNEL, order);
|
|
return (void *) memblock_phys_alloc(size, size);
|
|
}
|
|
|
|
void *vmem_crst_alloc(unsigned long val)
|
|
{
|
|
unsigned long *table;
|
|
|
|
table = vmem_alloc_pages(CRST_ALLOC_ORDER);
|
|
if (table)
|
|
crst_table_init(table, val);
|
|
return table;
|
|
}
|
|
|
|
pte_t __ref *vmem_pte_alloc(void)
|
|
{
|
|
unsigned long size = PTRS_PER_PTE * sizeof(pte_t);
|
|
pte_t *pte;
|
|
|
|
if (slab_is_available())
|
|
pte = (pte_t *) page_table_alloc(&init_mm);
|
|
else
|
|
pte = (pte_t *) memblock_phys_alloc(size, size);
|
|
if (!pte)
|
|
return NULL;
|
|
memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
|
|
return pte;
|
|
}
|
|
|
|
/*
|
|
* Add a physical memory range to the 1:1 mapping.
|
|
*/
|
|
static int vmem_add_mem(unsigned long start, unsigned long size)
|
|
{
|
|
unsigned long pgt_prot, sgt_prot, r3_prot;
|
|
unsigned long pages4k, pages1m, pages2g;
|
|
unsigned long end = start + size;
|
|
unsigned long address = start;
|
|
pgd_t *pg_dir;
|
|
p4d_t *p4_dir;
|
|
pud_t *pu_dir;
|
|
pmd_t *pm_dir;
|
|
pte_t *pt_dir;
|
|
int ret = -ENOMEM;
|
|
|
|
pgt_prot = pgprot_val(PAGE_KERNEL);
|
|
sgt_prot = pgprot_val(SEGMENT_KERNEL);
|
|
r3_prot = pgprot_val(REGION3_KERNEL);
|
|
if (!MACHINE_HAS_NX) {
|
|
pgt_prot &= ~_PAGE_NOEXEC;
|
|
sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC;
|
|
r3_prot &= ~_REGION_ENTRY_NOEXEC;
|
|
}
|
|
pages4k = pages1m = pages2g = 0;
|
|
while (address < end) {
|
|
pg_dir = pgd_offset_k(address);
|
|
if (pgd_none(*pg_dir)) {
|
|
p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
|
|
if (!p4_dir)
|
|
goto out;
|
|
pgd_populate(&init_mm, pg_dir, p4_dir);
|
|
}
|
|
p4_dir = p4d_offset(pg_dir, address);
|
|
if (p4d_none(*p4_dir)) {
|
|
pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
|
|
if (!pu_dir)
|
|
goto out;
|
|
p4d_populate(&init_mm, p4_dir, pu_dir);
|
|
}
|
|
pu_dir = pud_offset(p4_dir, address);
|
|
if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
|
|
!(address & ~PUD_MASK) && (address + PUD_SIZE <= end) &&
|
|
!debug_pagealloc_enabled()) {
|
|
pud_val(*pu_dir) = address | r3_prot;
|
|
address += PUD_SIZE;
|
|
pages2g++;
|
|
continue;
|
|
}
|
|
if (pud_none(*pu_dir)) {
|
|
pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
|
|
if (!pm_dir)
|
|
goto out;
|
|
pud_populate(&init_mm, pu_dir, pm_dir);
|
|
}
|
|
pm_dir = pmd_offset(pu_dir, address);
|
|
if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address &&
|
|
!(address & ~PMD_MASK) && (address + PMD_SIZE <= end) &&
|
|
!debug_pagealloc_enabled()) {
|
|
pmd_val(*pm_dir) = address | sgt_prot;
|
|
address += PMD_SIZE;
|
|
pages1m++;
|
|
continue;
|
|
}
|
|
if (pmd_none(*pm_dir)) {
|
|
pt_dir = vmem_pte_alloc();
|
|
if (!pt_dir)
|
|
goto out;
|
|
pmd_populate(&init_mm, pm_dir, pt_dir);
|
|
}
|
|
|
|
pt_dir = pte_offset_kernel(pm_dir, address);
|
|
pte_val(*pt_dir) = address | pgt_prot;
|
|
address += PAGE_SIZE;
|
|
pages4k++;
|
|
}
|
|
ret = 0;
|
|
out:
|
|
update_page_count(PG_DIRECT_MAP_4K, pages4k);
|
|
update_page_count(PG_DIRECT_MAP_1M, pages1m);
|
|
update_page_count(PG_DIRECT_MAP_2G, pages2g);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Remove a physical memory range from the 1:1 mapping.
|
|
* Currently only invalidates page table entries.
|
|
*/
|
|
static void vmem_remove_range(unsigned long start, unsigned long size)
|
|
{
|
|
unsigned long pages4k, pages1m, pages2g;
|
|
unsigned long end = start + size;
|
|
unsigned long address = start;
|
|
pgd_t *pg_dir;
|
|
p4d_t *p4_dir;
|
|
pud_t *pu_dir;
|
|
pmd_t *pm_dir;
|
|
pte_t *pt_dir;
|
|
|
|
pages4k = pages1m = pages2g = 0;
|
|
while (address < end) {
|
|
pg_dir = pgd_offset_k(address);
|
|
if (pgd_none(*pg_dir)) {
|
|
address += PGDIR_SIZE;
|
|
continue;
|
|
}
|
|
p4_dir = p4d_offset(pg_dir, address);
|
|
if (p4d_none(*p4_dir)) {
|
|
address += P4D_SIZE;
|
|
continue;
|
|
}
|
|
pu_dir = pud_offset(p4_dir, address);
|
|
if (pud_none(*pu_dir)) {
|
|
address += PUD_SIZE;
|
|
continue;
|
|
}
|
|
if (pud_large(*pu_dir)) {
|
|
pud_clear(pu_dir);
|
|
address += PUD_SIZE;
|
|
pages2g++;
|
|
continue;
|
|
}
|
|
pm_dir = pmd_offset(pu_dir, address);
|
|
if (pmd_none(*pm_dir)) {
|
|
address += PMD_SIZE;
|
|
continue;
|
|
}
|
|
if (pmd_large(*pm_dir)) {
|
|
pmd_clear(pm_dir);
|
|
address += PMD_SIZE;
|
|
pages1m++;
|
|
continue;
|
|
}
|
|
pt_dir = pte_offset_kernel(pm_dir, address);
|
|
pte_clear(&init_mm, address, pt_dir);
|
|
address += PAGE_SIZE;
|
|
pages4k++;
|
|
}
|
|
flush_tlb_kernel_range(start, end);
|
|
update_page_count(PG_DIRECT_MAP_4K, -pages4k);
|
|
update_page_count(PG_DIRECT_MAP_1M, -pages1m);
|
|
update_page_count(PG_DIRECT_MAP_2G, -pages2g);
|
|
}
|
|
|
|
/*
|
|
* Add a backed mem_map array to the virtual mem_map array.
|
|
*/
|
|
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
|
|
struct vmem_altmap *altmap)
|
|
{
|
|
unsigned long pgt_prot, sgt_prot;
|
|
unsigned long address = start;
|
|
pgd_t *pg_dir;
|
|
p4d_t *p4_dir;
|
|
pud_t *pu_dir;
|
|
pmd_t *pm_dir;
|
|
pte_t *pt_dir;
|
|
int ret = -ENOMEM;
|
|
|
|
pgt_prot = pgprot_val(PAGE_KERNEL);
|
|
sgt_prot = pgprot_val(SEGMENT_KERNEL);
|
|
if (!MACHINE_HAS_NX) {
|
|
pgt_prot &= ~_PAGE_NOEXEC;
|
|
sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC;
|
|
}
|
|
for (address = start; address < end;) {
|
|
pg_dir = pgd_offset_k(address);
|
|
if (pgd_none(*pg_dir)) {
|
|
p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
|
|
if (!p4_dir)
|
|
goto out;
|
|
pgd_populate(&init_mm, pg_dir, p4_dir);
|
|
}
|
|
|
|
p4_dir = p4d_offset(pg_dir, address);
|
|
if (p4d_none(*p4_dir)) {
|
|
pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
|
|
if (!pu_dir)
|
|
goto out;
|
|
p4d_populate(&init_mm, p4_dir, pu_dir);
|
|
}
|
|
|
|
pu_dir = pud_offset(p4_dir, address);
|
|
if (pud_none(*pu_dir)) {
|
|
pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
|
|
if (!pm_dir)
|
|
goto out;
|
|
pud_populate(&init_mm, pu_dir, pm_dir);
|
|
}
|
|
|
|
pm_dir = pmd_offset(pu_dir, address);
|
|
if (pmd_none(*pm_dir)) {
|
|
/* Use 1MB frames for vmemmap if available. We always
|
|
* use large frames even if they are only partially
|
|
* used.
|
|
* Otherwise we would have also page tables since
|
|
* vmemmap_populate gets called for each section
|
|
* separately. */
|
|
if (MACHINE_HAS_EDAT1) {
|
|
void *new_page;
|
|
|
|
new_page = vmemmap_alloc_block(PMD_SIZE, node);
|
|
if (!new_page)
|
|
goto out;
|
|
pmd_val(*pm_dir) = __pa(new_page) | sgt_prot;
|
|
address = (address + PMD_SIZE) & PMD_MASK;
|
|
continue;
|
|
}
|
|
pt_dir = vmem_pte_alloc();
|
|
if (!pt_dir)
|
|
goto out;
|
|
pmd_populate(&init_mm, pm_dir, pt_dir);
|
|
} else if (pmd_large(*pm_dir)) {
|
|
address = (address + PMD_SIZE) & PMD_MASK;
|
|
continue;
|
|
}
|
|
|
|
pt_dir = pte_offset_kernel(pm_dir, address);
|
|
if (pte_none(*pt_dir)) {
|
|
void *new_page;
|
|
|
|
new_page = vmemmap_alloc_block(PAGE_SIZE, node);
|
|
if (!new_page)
|
|
goto out;
|
|
pte_val(*pt_dir) = __pa(new_page) | pgt_prot;
|
|
}
|
|
address += PAGE_SIZE;
|
|
}
|
|
ret = 0;
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
void vmemmap_free(unsigned long start, unsigned long end,
|
|
struct vmem_altmap *altmap)
|
|
{
|
|
}
|
|
|
|
/*
|
|
* Add memory segment to the segment list if it doesn't overlap with
|
|
* an already present segment.
|
|
*/
|
|
static int insert_memory_segment(struct memory_segment *seg)
|
|
{
|
|
struct memory_segment *tmp;
|
|
|
|
if (seg->start + seg->size > VMEM_MAX_PHYS ||
|
|
seg->start + seg->size < seg->start)
|
|
return -ERANGE;
|
|
|
|
list_for_each_entry(tmp, &mem_segs, list) {
|
|
if (seg->start >= tmp->start + tmp->size)
|
|
continue;
|
|
if (seg->start + seg->size <= tmp->start)
|
|
continue;
|
|
return -ENOSPC;
|
|
}
|
|
list_add(&seg->list, &mem_segs);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Remove memory segment from the segment list.
|
|
*/
|
|
static void remove_memory_segment(struct memory_segment *seg)
|
|
{
|
|
list_del(&seg->list);
|
|
}
|
|
|
|
static void __remove_shared_memory(struct memory_segment *seg)
|
|
{
|
|
remove_memory_segment(seg);
|
|
vmem_remove_range(seg->start, seg->size);
|
|
}
|
|
|
|
int vmem_remove_mapping(unsigned long start, unsigned long size)
|
|
{
|
|
struct memory_segment *seg;
|
|
int ret;
|
|
|
|
mutex_lock(&vmem_mutex);
|
|
|
|
ret = -ENOENT;
|
|
list_for_each_entry(seg, &mem_segs, list) {
|
|
if (seg->start == start && seg->size == size)
|
|
break;
|
|
}
|
|
|
|
if (seg->start != start || seg->size != size)
|
|
goto out;
|
|
|
|
ret = 0;
|
|
__remove_shared_memory(seg);
|
|
kfree(seg);
|
|
out:
|
|
mutex_unlock(&vmem_mutex);
|
|
return ret;
|
|
}
|
|
|
|
int vmem_add_mapping(unsigned long start, unsigned long size)
|
|
{
|
|
struct memory_segment *seg;
|
|
int ret;
|
|
|
|
mutex_lock(&vmem_mutex);
|
|
ret = -ENOMEM;
|
|
seg = kzalloc(sizeof(*seg), GFP_KERNEL);
|
|
if (!seg)
|
|
goto out;
|
|
seg->start = start;
|
|
seg->size = size;
|
|
|
|
ret = insert_memory_segment(seg);
|
|
if (ret)
|
|
goto out_free;
|
|
|
|
ret = vmem_add_mem(start, size);
|
|
if (ret)
|
|
goto out_remove;
|
|
goto out;
|
|
|
|
out_remove:
|
|
__remove_shared_memory(seg);
|
|
out_free:
|
|
kfree(seg);
|
|
out:
|
|
mutex_unlock(&vmem_mutex);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* map whole physical memory to virtual memory (identity mapping)
|
|
* we reserve enough space in the vmalloc area for vmemmap to hotplug
|
|
* additional memory segments.
|
|
*/
|
|
void __init vmem_map_init(void)
|
|
{
|
|
struct memblock_region *reg;
|
|
|
|
for_each_memblock(memory, reg)
|
|
vmem_add_mem(reg->base, reg->size);
|
|
__set_memory((unsigned long)_stext,
|
|
(unsigned long)(_etext - _stext) >> PAGE_SHIFT,
|
|
SET_MEMORY_RO | SET_MEMORY_X);
|
|
__set_memory((unsigned long)_etext,
|
|
(unsigned long)(__end_rodata - _etext) >> PAGE_SHIFT,
|
|
SET_MEMORY_RO);
|
|
__set_memory((unsigned long)_sinittext,
|
|
(unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
|
|
SET_MEMORY_RO | SET_MEMORY_X);
|
|
__set_memory(__stext_dma, (__etext_dma - __stext_dma) >> PAGE_SHIFT,
|
|
SET_MEMORY_RO | SET_MEMORY_X);
|
|
pr_info("Write protected kernel read-only data: %luk\n",
|
|
(unsigned long)(__end_rodata - _stext) >> 10);
|
|
}
|
|
|
|
/*
|
|
* Convert memblock.memory to a memory segment list so there is a single
|
|
* list that contains all memory segments.
|
|
*/
|
|
static int __init vmem_convert_memory_chunk(void)
|
|
{
|
|
struct memblock_region *reg;
|
|
struct memory_segment *seg;
|
|
|
|
mutex_lock(&vmem_mutex);
|
|
for_each_memblock(memory, reg) {
|
|
seg = kzalloc(sizeof(*seg), GFP_KERNEL);
|
|
if (!seg)
|
|
panic("Out of memory...\n");
|
|
seg->start = reg->base;
|
|
seg->size = reg->size;
|
|
insert_memory_segment(seg);
|
|
}
|
|
mutex_unlock(&vmem_mutex);
|
|
return 0;
|
|
}
|
|
|
|
core_initcall(vmem_convert_memory_chunk);
|