riscv: mm: Add memory hotplugging support

For an architecture to support memory hotplugging, a couple of
callbacks needs to be implemented:

 arch_add_memory()
  This callback is responsible for adding the physical memory into the
  direct map, and call into the memory hotplugging generic code via
  __add_pages() that adds the corresponding struct page entries, and
  updates the vmemmap mapping.

 arch_remove_memory()
  This is the inverse of the callback above.

 vmemmap_free()
  This function tears down the vmemmap mappings (if
  CONFIG_SPARSEMEM_VMEMMAP is enabled), and also deallocates the
  backing vmemmap pages. Note that for persistent memory, an
  alternative allocator for the backing pages can be used; The
  vmem_altmap. This means that when the backing pages are cleared,
  extra care is needed so that the correct deallocation method is
  used.

 arch_get_mappable_range()
  This functions returns the PA range that the direct map can map.
  Used by the MHP internals for sanity checks.

The page table unmap/teardown functions are heavily based on code from
the x86 tree. The same remove_pgd_mapping() function is used in both
vmemmap_free() and arch_remove_memory(), but in the latter function
the backing pages are not removed.

Signed-off-by: Björn Töpel <bjorn@rivosinc.com>
Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Link: https://lore.kernel.org/r/20240605114100.315918-7-bjorn@kernel.org
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
This commit is contained in:
Björn Töpel 2024-06-05 13:40:49 +02:00 committed by Palmer Dabbelt
parent 6e6c5e21b8
commit c75a74f4ba
No known key found for this signature in database
GPG Key ID: 2E1319F35FBB1889

View File

@ -1533,3 +1533,270 @@ struct execmem_info __init *execmem_arch_setup(void)
}
#endif /* CONFIG_MMU */
#endif /* CONFIG_EXECMEM */
#ifdef CONFIG_MEMORY_HOTPLUG
static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
{
struct page *page = pmd_page(*pmd);
struct ptdesc *ptdesc = page_ptdesc(page);
pte_t *pte;
int i;
for (i = 0; i < PTRS_PER_PTE; i++) {
pte = pte_start + i;
if (!pte_none(*pte))
return;
}
pagetable_pte_dtor(ptdesc);
if (PageReserved(page))
free_reserved_page(page);
else
pagetable_free(ptdesc);
pmd_clear(pmd);
}
static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
{
struct page *page = pud_page(*pud);
struct ptdesc *ptdesc = page_ptdesc(page);
pmd_t *pmd;
int i;
for (i = 0; i < PTRS_PER_PMD; i++) {
pmd = pmd_start + i;
if (!pmd_none(*pmd))
return;
}
pagetable_pmd_dtor(ptdesc);
if (PageReserved(page))
free_reserved_page(page);
else
pagetable_free(ptdesc);
pud_clear(pud);
}
static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
{
struct page *page = p4d_page(*p4d);
pud_t *pud;
int i;
for (i = 0; i < PTRS_PER_PUD; i++) {
pud = pud_start + i;
if (!pud_none(*pud))
return;
}
if (PageReserved(page))
free_reserved_page(page);
else
free_pages((unsigned long)page_address(page), 0);
p4d_clear(p4d);
}
static void __meminit free_vmemmap_storage(struct page *page, size_t size,
struct vmem_altmap *altmap)
{
int order = get_order(size);
if (altmap) {
vmem_altmap_free(altmap, size >> PAGE_SHIFT);
return;
}
if (PageReserved(page)) {
unsigned int nr_pages = 1 << order;
while (nr_pages--)
free_reserved_page(page++);
return;
}
free_pages((unsigned long)page_address(page), order);
}
static void __meminit remove_pte_mapping(pte_t *pte_base, unsigned long addr, unsigned long end,
bool is_vmemmap, struct vmem_altmap *altmap)
{
unsigned long next;
pte_t *ptep, pte;
for (; addr < end; addr = next) {
next = (addr + PAGE_SIZE) & PAGE_MASK;
if (next > end)
next = end;
ptep = pte_base + pte_index(addr);
pte = ptep_get(ptep);
if (!pte_present(*ptep))
continue;
pte_clear(&init_mm, addr, ptep);
if (is_vmemmap)
free_vmemmap_storage(pte_page(pte), PAGE_SIZE, altmap);
}
}
static void __meminit remove_pmd_mapping(pmd_t *pmd_base, unsigned long addr, unsigned long end,
bool is_vmemmap, struct vmem_altmap *altmap)
{
unsigned long next;
pte_t *pte_base;
pmd_t *pmdp, pmd;
for (; addr < end; addr = next) {
next = pmd_addr_end(addr, end);
pmdp = pmd_base + pmd_index(addr);
pmd = pmdp_get(pmdp);
if (!pmd_present(pmd))
continue;
if (pmd_leaf(pmd)) {
pmd_clear(pmdp);
if (is_vmemmap)
free_vmemmap_storage(pmd_page(pmd), PMD_SIZE, altmap);
continue;
}
pte_base = (pte_t *)pmd_page_vaddr(*pmdp);
remove_pte_mapping(pte_base, addr, next, is_vmemmap, altmap);
free_pte_table(pte_base, pmdp);
}
}
static void __meminit remove_pud_mapping(pud_t *pud_base, unsigned long addr, unsigned long end,
bool is_vmemmap, struct vmem_altmap *altmap)
{
unsigned long next;
pud_t *pudp, pud;
pmd_t *pmd_base;
for (; addr < end; addr = next) {
next = pud_addr_end(addr, end);
pudp = pud_base + pud_index(addr);
pud = pudp_get(pudp);
if (!pud_present(pud))
continue;
if (pud_leaf(pud)) {
if (pgtable_l4_enabled) {
pud_clear(pudp);
if (is_vmemmap)
free_vmemmap_storage(pud_page(pud), PUD_SIZE, altmap);
}
continue;
}
pmd_base = pmd_offset(pudp, 0);
remove_pmd_mapping(pmd_base, addr, next, is_vmemmap, altmap);
if (pgtable_l4_enabled)
free_pmd_table(pmd_base, pudp);
}
}
static void __meminit remove_p4d_mapping(p4d_t *p4d_base, unsigned long addr, unsigned long end,
bool is_vmemmap, struct vmem_altmap *altmap)
{
unsigned long next;
p4d_t *p4dp, p4d;
pud_t *pud_base;
for (; addr < end; addr = next) {
next = p4d_addr_end(addr, end);
p4dp = p4d_base + p4d_index(addr);
p4d = p4dp_get(p4dp);
if (!p4d_present(p4d))
continue;
if (p4d_leaf(p4d)) {
if (pgtable_l5_enabled) {
p4d_clear(p4dp);
if (is_vmemmap)
free_vmemmap_storage(p4d_page(p4d), P4D_SIZE, altmap);
}
continue;
}
pud_base = pud_offset(p4dp, 0);
remove_pud_mapping(pud_base, addr, next, is_vmemmap, altmap);
if (pgtable_l5_enabled)
free_pud_table(pud_base, p4dp);
}
}
static void __meminit remove_pgd_mapping(unsigned long va, unsigned long end, bool is_vmemmap,
struct vmem_altmap *altmap)
{
unsigned long addr, next;
p4d_t *p4d_base;
pgd_t *pgd;
for (addr = va; addr < end; addr = next) {
next = pgd_addr_end(addr, end);
pgd = pgd_offset_k(addr);
if (!pgd_present(*pgd))
continue;
if (pgd_leaf(*pgd))
continue;
p4d_base = p4d_offset(pgd, 0);
remove_p4d_mapping(p4d_base, addr, next, is_vmemmap, altmap);
}
flush_tlb_all();
}
static void __meminit remove_linear_mapping(phys_addr_t start, u64 size)
{
unsigned long va = (unsigned long)__va(start);
unsigned long end = (unsigned long)__va(start + size);
remove_pgd_mapping(va, end, false, NULL);
}
struct range arch_get_mappable_range(void)
{
struct range mhp_range;
mhp_range.start = __pa(PAGE_OFFSET);
mhp_range.end = __pa(PAGE_END - 1);
return mhp_range;
}
int __ref arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params)
{
int ret = 0;
create_linear_mapping_range(start, start + size, 0, &params->pgprot);
ret = __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, params);
if (ret) {
remove_linear_mapping(start, size);
goto out;
}
max_pfn = PFN_UP(start + size);
max_low_pfn = max_pfn;
out:
flush_tlb_all();
return ret;
}
void __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{
__remove_pages(start >> PAGE_SHIFT, size >> PAGE_SHIFT, altmap);
remove_linear_mapping(start, size);
flush_tlb_all();
}
void __ref vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *altmap)
{
remove_pgd_mapping(start, end, true, altmap);
}
#endif /* CONFIG_MEMORY_HOTPLUG */