diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 8dc8a554f774..dd0cf4834eaa 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -608,11 +608,11 @@ void __init paging_init(void) zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); } -pg_data_t *arch_alloc_nodedata(int nid) +pg_data_t * __init arch_alloc_nodedata(int nid) { unsigned long size = compute_pernodesize(nid); - return kzalloc(size, GFP_KERNEL); + return memblock_alloc(size, SMP_CACHE_BYTES); } void arch_free_nodedata(pg_data_t *pgdat) diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 4355983b364d..cdd66bfdf855 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -44,7 +44,7 @@ extern void arch_refresh_nodedata(int nid, pg_data_t *pgdat); */ #define generic_alloc_nodedata(nid) \ ({ \ - kzalloc(sizeof(pg_data_t), GFP_KERNEL); \ + memblock_alloc(sizeof(*pgdat), SMP_CACHE_BYTES); \ }) /* * This definition is just for error path in node hotadd. diff --git a/mm/internal.h b/mm/internal.h index 9c298afb9688..f1554a4e249e 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -707,4 +707,6 @@ void vunmap_range_noflush(unsigned long start, unsigned long end); int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, unsigned long addr, int page_nid, int *flags); +DECLARE_PER_CPU(struct per_cpu_nodestat, boot_nodestats); + #endif /* __MM_INTERNAL_H */ diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 0139b77c51d5..11f39d0e76ec 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1162,19 +1162,21 @@ static void reset_node_present_pages(pg_data_t *pgdat) } /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ -static pg_data_t __ref *hotadd_new_pgdat(int nid) +static pg_data_t __ref *hotadd_init_pgdat(int nid) { struct pglist_data *pgdat; pgdat = NODE_DATA(nid); - if (!pgdat) { - pgdat = arch_alloc_nodedata(nid); - if (!pgdat) - return NULL; + /* + * NODE_DATA is preallocated (free_area_init) but its internal + * state is not allocated completely. Add missing pieces. + * Completely offline nodes stay around and they just need + * reintialization. + */ + if (pgdat->per_cpu_nodestats == &boot_nodestats) { pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat); - arch_refresh_nodedata(nid, pgdat); } else { int cpu; /* @@ -1193,8 +1195,6 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid) } } - /* we can use NODE_DATA(nid) from here */ - pgdat->node_id = nid; pgdat->node_start_pfn = 0; /* init node's zones as empty zones, we don't have any present pages.*/ @@ -1246,7 +1246,7 @@ static int __try_online_node(int nid, bool set_node_online) if (node_online(nid)) return 0; - pgdat = hotadd_new_pgdat(nid); + pgdat = hotadd_init_pgdat(nid); if (!pgdat) { pr_err("Cannot online node %d due to NULL pgdat\n", nid); ret = -ENOMEM; @@ -1445,9 +1445,6 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) return ret; error: - /* rollback pgdat allocation and others */ - if (new_node) - rollback_node_hotadd(nid); if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) memblock_remove(start, size); error_mem_hotplug_end: diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8b18a077c409..4f141a4e5b64 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6341,7 +6341,7 @@ static void per_cpu_pages_init(struct per_cpu_pages *pcp, struct per_cpu_zonesta #define BOOT_PAGESET_BATCH 1 static DEFINE_PER_CPU(struct per_cpu_pages, boot_pageset); static DEFINE_PER_CPU(struct per_cpu_zonestat, boot_zonestats); -static DEFINE_PER_CPU(struct per_cpu_nodestat, boot_nodestats); +DEFINE_PER_CPU(struct per_cpu_nodestat, boot_nodestats); static void __build_all_zonelists(void *data) { @@ -6363,7 +6363,11 @@ static void __build_all_zonelists(void *data) if (self && !node_online(self->node_id)) { build_zonelists(self); } else { - for_each_online_node(nid) { + /* + * All possible nodes have pgdat preallocated + * in free_area_init + */ + for_each_node(nid) { pg_data_t *pgdat = NODE_DATA(nid); build_zonelists(pgdat); @@ -8063,8 +8067,36 @@ void __init free_area_init(unsigned long *max_zone_pfn) /* Initialise every node */ mminit_verify_pageflags_layout(); setup_nr_node_ids(); - for_each_online_node(nid) { - pg_data_t *pgdat = NODE_DATA(nid); + for_each_node(nid) { + pg_data_t *pgdat; + + if (!node_online(nid)) { + pr_info("Initializing node %d as memoryless\n", nid); + + /* Allocator not initialized yet */ + pgdat = arch_alloc_nodedata(nid); + if (!pgdat) { + pr_err("Cannot allocate %zuB for node %d.\n", + sizeof(*pgdat), nid); + continue; + } + arch_refresh_nodedata(nid, pgdat); + free_area_init_memoryless_node(nid); + + /* + * We do not want to confuse userspace by sysfs + * files/directories for node without any memory + * attached to it, so this node is not marked as + * N_MEMORY and not marked online so that no sysfs + * hierarchy will be created via register_one_node for + * it. The pgdat will get fully initialized by + * hotadd_init_pgdat() when memory is hotplugged into + * this node. + */ + continue; + } + + pgdat = NODE_DATA(nid); free_area_init_node(nid); /* Any memory on that node */