mm/page_alloc: Introduce free_area_init_core_hotplug
Currently, whenever a new node is created/re-used from the memhotplug path, we call free_area_init_node()->free_area_init_core(). But there is some code that we do not really need to run when we are coming from such path. free_area_init_core() performs the following actions: 1) Initializes pgdat internals, such as spinlock, waitqueues and more. 2) Account # nr_all_pages and # nr_kernel_pages. These values are used later on when creating hash tables. 3) Account number of managed_pages per zone, substracting dma_reserved and memmap pages. 4) Initializes some fields of the zone structure data 5) Calls init_currently_empty_zone to initialize all the freelists 6) Calls memmap_init to initialize all pages belonging to certain zone When called from memhotplug path, free_area_init_core() only performs actions #1 and #4. Action #2 is pointless as the zones do not have any pages since either the node was freed, or we are re-using it, eitherway all zones belonging to this node should have 0 pages. For the same reason, action #3 results always in manages_pages being 0. Action #5 and #6 are performed later on when onlining the pages: online_pages()->move_pfn_range_to_zone()->init_currently_empty_zone() online_pages()->move_pfn_range_to_zone()->memmap_init_zone() This patch does two things: First, moves the node/zone initializtion to their own function, so it allows us to create a small version of free_area_init_core, where we only perform: 1) Initialization of pgdat internals, such as spinlock, waitqueues and more 4) Initialization of some fields of the zone structure data These two functions are: pgdat_init_internals() and zone_init_internals(). The second thing this patch does, is to introduce free_area_init_core_hotplug(), the memhotplug version of free_area_init_core(): Currently, we call free_area_init_node() from the memhotplug path. In there, we set some pgdat's fields, and call calculate_node_totalpages(). calculate_node_totalpages() calculates the # of pages the node has. Since the node is either new, or we are re-using it, the zones belonging to this node should not have any pages, so there is no point to calculate this now. Actually, we re-set these values to 0 later on with the calls to: reset_node_managed_pages() reset_node_present_pages() The # of pages per node and the # of pages per zone will be calculated when onlining the pages: online_pages()->move_pfn_range()->move_pfn_range_to_zone()->resize_zone_range() online_pages()->move_pfn_range()->move_pfn_range_to_zone()->resize_pgdat_range() Also, since free_area_init_core/free_area_init_node will now only get called during early init, let us replace __paginginit with __init, so their code gets freed up. [osalvador@techadventures.net: fix section usage] Link: http://lkml.kernel.org/r/20180731101752.GA473@techadventures.net [osalvador@suse.de: v6] Link: http://lkml.kernel.org/r/20180801122348.21588-6-osalvador@techadventures.net Link: http://lkml.kernel.org/r/20180730101757.28058-5-osalvador@techadventures.net Signed-off-by: Oscar Salvador <osalvador@suse.de> Reviewed-by: Pavel Tatashin <pasha.tatashin@oracle.com> Acked-by: Michal Hocko <mhocko@suse.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Pasha Tatashin <Pavel.Tatashin@microsoft.com> Cc: Aaron Lu <aaron.lu@intel.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: David Hildenbrand <david@redhat.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Mel Gorman <mgorman@techsingularity.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
0188dc98ad
commit
03e85f9d5f
@ -319,6 +319,7 @@ static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
|
|||||||
static inline void remove_memory(int nid, u64 start, u64 size) {}
|
static inline void remove_memory(int nid, u64 start, u64 size) {}
|
||||||
#endif /* CONFIG_MEMORY_HOTREMOVE */
|
#endif /* CONFIG_MEMORY_HOTREMOVE */
|
||||||
|
|
||||||
|
extern void __ref free_area_init_core_hotplug(int nid);
|
||||||
extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
|
extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
|
||||||
void *arg, int (*func)(struct memory_block *, void *));
|
void *arg, int (*func)(struct memory_block *, void *));
|
||||||
extern int add_memory(int nid, u64 start, u64 size);
|
extern int add_memory(int nid, u64 start, u64 size);
|
||||||
|
@ -2015,7 +2015,7 @@ static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud)
|
|||||||
|
|
||||||
extern void __init pagecache_init(void);
|
extern void __init pagecache_init(void);
|
||||||
extern void free_area_init(unsigned long * zones_size);
|
extern void free_area_init(unsigned long * zones_size);
|
||||||
extern void free_area_init_node(int nid, unsigned long * zones_size,
|
extern void __init free_area_init_node(int nid, unsigned long * zones_size,
|
||||||
unsigned long zone_start_pfn, unsigned long *zholes_size);
|
unsigned long zone_start_pfn, unsigned long *zholes_size);
|
||||||
extern void free_initmem(void);
|
extern void free_initmem(void);
|
||||||
|
|
||||||
|
@ -982,8 +982,6 @@ static void reset_node_present_pages(pg_data_t *pgdat)
|
|||||||
static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
|
static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
|
||||||
{
|
{
|
||||||
struct pglist_data *pgdat;
|
struct pglist_data *pgdat;
|
||||||
unsigned long zones_size[MAX_NR_ZONES] = {0};
|
|
||||||
unsigned long zholes_size[MAX_NR_ZONES] = {0};
|
|
||||||
unsigned long start_pfn = PFN_DOWN(start);
|
unsigned long start_pfn = PFN_DOWN(start);
|
||||||
|
|
||||||
pgdat = NODE_DATA(nid);
|
pgdat = NODE_DATA(nid);
|
||||||
@ -1006,8 +1004,11 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
|
|||||||
|
|
||||||
/* we can use NODE_DATA(nid) from here */
|
/* we can use NODE_DATA(nid) from here */
|
||||||
|
|
||||||
|
pgdat->node_id = nid;
|
||||||
|
pgdat->node_start_pfn = start_pfn;
|
||||||
|
|
||||||
/* init node's zones as empty zones, we don't have any present pages.*/
|
/* init node's zones as empty zones, we don't have any present pages.*/
|
||||||
free_area_init_node(nid, zones_size, start_pfn, zholes_size);
|
free_area_init_core_hotplug(nid);
|
||||||
pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat);
|
pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1016,19 +1017,12 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
|
|||||||
*/
|
*/
|
||||||
build_all_zonelists(pgdat);
|
build_all_zonelists(pgdat);
|
||||||
|
|
||||||
/*
|
|
||||||
* zone->managed_pages is set to an approximate value in
|
|
||||||
* free_area_init_core(), which will cause
|
|
||||||
* /sys/device/system/node/nodeX/meminfo has wrong data.
|
|
||||||
* So reset it to 0 before any memory is onlined.
|
|
||||||
*/
|
|
||||||
reset_node_managed_pages(pgdat);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When memory is hot-added, all the memory is in offline state. So
|
* When memory is hot-added, all the memory is in offline state. So
|
||||||
* clear all zones' present_pages because they will be updated in
|
* clear all zones' present_pages because they will be updated in
|
||||||
* online_pages() and offline_pages().
|
* online_pages() and offline_pages().
|
||||||
*/
|
*/
|
||||||
|
reset_node_managed_pages(pgdat);
|
||||||
reset_node_present_pages(pgdat);
|
reset_node_present_pages(pgdat);
|
||||||
|
|
||||||
return pgdat;
|
return pgdat;
|
||||||
|
@ -6140,7 +6140,7 @@ static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone,
|
|||||||
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
|
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
|
||||||
|
|
||||||
/* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */
|
/* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */
|
||||||
void __meminit set_pageblock_order(void)
|
void __init set_pageblock_order(void)
|
||||||
{
|
{
|
||||||
unsigned int order;
|
unsigned int order;
|
||||||
|
|
||||||
@ -6168,13 +6168,13 @@ void __meminit set_pageblock_order(void)
|
|||||||
* include/linux/pageblock-flags.h for the values of pageblock_order based on
|
* include/linux/pageblock-flags.h for the values of pageblock_order based on
|
||||||
* the kernel config
|
* the kernel config
|
||||||
*/
|
*/
|
||||||
void __meminit set_pageblock_order(void)
|
void __init set_pageblock_order(void)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
|
#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
|
||||||
|
|
||||||
static unsigned long __meminit calc_memmap_size(unsigned long spanned_pages,
|
static unsigned long __init calc_memmap_size(unsigned long spanned_pages,
|
||||||
unsigned long present_pages)
|
unsigned long present_pages)
|
||||||
{
|
{
|
||||||
unsigned long pages = spanned_pages;
|
unsigned long pages = spanned_pages;
|
||||||
@ -6225,19 +6225,8 @@ static void pgdat_init_kcompactd(struct pglist_data *pgdat)
|
|||||||
static void pgdat_init_kcompactd(struct pglist_data *pgdat) {}
|
static void pgdat_init_kcompactd(struct pglist_data *pgdat) {}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
|
||||||
* Set up the zone data structures:
|
|
||||||
* - mark all pages reserved
|
|
||||||
* - mark all memory queues empty
|
|
||||||
* - clear the memory bitmaps
|
|
||||||
*
|
|
||||||
* NOTE: pgdat should get zeroed by caller.
|
|
||||||
*/
|
|
||||||
static void __meminit free_area_init_core(struct pglist_data *pgdat)
|
|
||||||
{
|
{
|
||||||
enum zone_type j;
|
|
||||||
int nid = pgdat->node_id;
|
|
||||||
|
|
||||||
pgdat_resize_init(pgdat);
|
pgdat_resize_init(pgdat);
|
||||||
|
|
||||||
pgdat_init_numabalancing(pgdat);
|
pgdat_init_numabalancing(pgdat);
|
||||||
@ -6250,7 +6239,54 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat)
|
|||||||
pgdat_page_ext_init(pgdat);
|
pgdat_page_ext_init(pgdat);
|
||||||
spin_lock_init(&pgdat->lru_lock);
|
spin_lock_init(&pgdat->lru_lock);
|
||||||
lruvec_init(node_lruvec(pgdat));
|
lruvec_init(node_lruvec(pgdat));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid,
|
||||||
|
unsigned long remaining_pages)
|
||||||
|
{
|
||||||
|
zone->managed_pages = remaining_pages;
|
||||||
|
zone_set_nid(zone, nid);
|
||||||
|
zone->name = zone_names[idx];
|
||||||
|
zone->zone_pgdat = NODE_DATA(nid);
|
||||||
|
spin_lock_init(&zone->lock);
|
||||||
|
zone_seqlock_init(zone);
|
||||||
|
zone_pcp_init(zone);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set up the zone data structures
|
||||||
|
* - init pgdat internals
|
||||||
|
* - init all zones belonging to this node
|
||||||
|
*
|
||||||
|
* NOTE: this function is only called during memory hotplug
|
||||||
|
*/
|
||||||
|
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||||
|
void __ref free_area_init_core_hotplug(int nid)
|
||||||
|
{
|
||||||
|
enum zone_type z;
|
||||||
|
pg_data_t *pgdat = NODE_DATA(nid);
|
||||||
|
|
||||||
|
pgdat_init_internals(pgdat);
|
||||||
|
for (z = 0; z < MAX_NR_ZONES; z++)
|
||||||
|
zone_init_internals(&pgdat->node_zones[z], z, nid, 0);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set up the zone data structures:
|
||||||
|
* - mark all pages reserved
|
||||||
|
* - mark all memory queues empty
|
||||||
|
* - clear the memory bitmaps
|
||||||
|
*
|
||||||
|
* NOTE: pgdat should get zeroed by caller.
|
||||||
|
* NOTE: this function is only called during early init.
|
||||||
|
*/
|
||||||
|
static void __init free_area_init_core(struct pglist_data *pgdat)
|
||||||
|
{
|
||||||
|
enum zone_type j;
|
||||||
|
int nid = pgdat->node_id;
|
||||||
|
|
||||||
|
pgdat_init_internals(pgdat);
|
||||||
pgdat->per_cpu_nodestats = &boot_nodestats;
|
pgdat->per_cpu_nodestats = &boot_nodestats;
|
||||||
|
|
||||||
for (j = 0; j < MAX_NR_ZONES; j++) {
|
for (j = 0; j < MAX_NR_ZONES; j++) {
|
||||||
@ -6298,13 +6334,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat)
|
|||||||
* when the bootmem allocator frees pages into the buddy system.
|
* when the bootmem allocator frees pages into the buddy system.
|
||||||
* And all highmem pages will be managed by the buddy system.
|
* And all highmem pages will be managed by the buddy system.
|
||||||
*/
|
*/
|
||||||
zone->managed_pages = freesize;
|
zone_init_internals(zone, j, nid, freesize);
|
||||||
zone_set_nid(zone, nid);
|
|
||||||
zone->name = zone_names[j];
|
|
||||||
zone->zone_pgdat = pgdat;
|
|
||||||
spin_lock_init(&zone->lock);
|
|
||||||
zone_seqlock_init(zone);
|
|
||||||
zone_pcp_init(zone);
|
|
||||||
|
|
||||||
if (!size)
|
if (!size)
|
||||||
continue;
|
continue;
|
||||||
@ -6379,7 +6409,7 @@ static inline void pgdat_set_deferred_range(pg_data_t *pgdat)
|
|||||||
static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
|
static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void __meminit free_area_init_node(int nid, unsigned long *zones_size,
|
void __init free_area_init_node(int nid, unsigned long *zones_size,
|
||||||
unsigned long node_start_pfn,
|
unsigned long node_start_pfn,
|
||||||
unsigned long *zholes_size)
|
unsigned long *zholes_size)
|
||||||
{
|
{
|
||||||
@ -6418,7 +6448,7 @@ void __meminit free_area_init_node(int nid, unsigned long *zones_size,
|
|||||||
* may be accessed (for example page_to_pfn() on some configuration accesses
|
* may be accessed (for example page_to_pfn() on some configuration accesses
|
||||||
* flags). We must explicitly zero those struct pages.
|
* flags). We must explicitly zero those struct pages.
|
||||||
*/
|
*/
|
||||||
void __meminit zero_resv_unavail(void)
|
void __init zero_resv_unavail(void)
|
||||||
{
|
{
|
||||||
phys_addr_t start, end;
|
phys_addr_t start, end;
|
||||||
unsigned long pfn;
|
unsigned long pfn;
|
||||||
|
Loading…
Reference in New Issue
Block a user