At memory hot-remove time we can retrieve an LMB's nid from its
corresponding memory_block. There is no need to store the nid
in multiple locations.
Note that lmb_to_memblock() uses find_memory_block() to get the
corresponding memory_block. As find_memory_block() runs in sub-linear
time this approach is negligibly slower than what we do at present.
In exchange for this lookup at hot-remove time we no longer need to
call memory_add_physaddr_to_nid() during drmem_init() for each LMB.
On powerpc, memory_add_physaddr_to_nid() is a linear search, so this
spares us an O(n^2) initialization during boot.
On systems with many LMBs that initialization overhead is palpable and
disruptive. For example, on a box with 249854 LMBs we're seeing
drmem_init() take upwards of 30 seconds to complete:
[ 53.721639] drmem: initializing drmem v2
[ 80.604346] watchdog: BUG: soft lockup - CPU#65 stuck for 23s! [swapper/0:1]
[ 80.604377] Modules linked in:
[ 80.604389] CPU: 65 PID: 1 Comm: swapper/0 Not tainted 5.6.0-rc2+ #4
[ 80.604397] NIP: c0000000000a4980 LR: c0000000000a4940 CTR: 0000000000000000
[ 80.604407] REGS: c0002dbff8493830 TRAP: 0901 Not tainted (5.6.0-rc2+)
[ 80.604412] MSR: 8000000002009033 <SF,VEC,EE,ME,IR,DR,RI,LE> CR: 44000248 XER: 0000000d
[ 80.604431] CFAR: c0000000000a4a38 IRQMASK: 0
[ 80.604431] GPR00: c0000000000a4940 c0002dbff8493ac0 c000000001904400 c0003cfffffede30
[ 80.604431] GPR04: 0000000000000000 c000000000f4095a 000000000000002f 0000000010000000
[ 80.604431] GPR08: c0000bf7ecdb7fb8 c0000bf7ecc2d3c8 0000000000000008 c00c0002fdfb2001
[ 80.604431] GPR12: 0000000000000000 c00000001e8ec200
[ 80.604477] NIP [c0000000000a4980] hot_add_scn_to_nid+0xa0/0x3e0
[ 80.604486] LR [c0000000000a4940] hot_add_scn_to_nid+0x60/0x3e0
[ 80.604492] Call Trace:
[ 80.604498] [c0002dbff8493ac0] [c0000000000a4940] hot_add_scn_to_nid+0x60/0x3e0 (unreliable)
[ 80.604509] [c0002dbff8493b20] [c000000000087c10] memory_add_physaddr_to_nid+0x20/0x60
[ 80.604521] [c0002dbff8493b40] [c0000000010d4880] drmem_init+0x25c/0x2f0
[ 80.604530] [c0002dbff8493c10] [c000000000010154] do_one_initcall+0x64/0x2c0
[ 80.604540] [c0002dbff8493ce0] [c0000000010c4aa0] kernel_init_freeable+0x2d8/0x3a0
[ 80.604550] [c0002dbff8493db0] [c000000000010824] kernel_init+0x2c/0x148
[ 80.604560] [c0002dbff8493e20] [c00000000000b648] ret_from_kernel_thread+0x5c/0x74
[ 80.604567] Instruction dump:
[ 80.604574] 392918e8 e9490000 e90a000a e92a0000 80ea000c 1d080018 3908ffe8 7d094214
[ 80.604586] 7fa94040 419d00dc e9490010 714a0088 <2faa0008> 409e00ac e9490000 7fbe5040
[ 89.047390] drmem: 249854 LMB(s)
With a patched kernel on the same machine we're no longer seeing the
soft lockup. drmem_init() now completes in negligible time, even when
the LMB count is large.
Fixes: b2d3b5ee66 ("powerpc/pseries: Track LMB nid instead of using device tree")
Signed-off-by: Scott Cheloha <cheloha@linux.ibm.com>
Reviewed-by: Nathan Lynch <nathanl@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200811015115.63677-1-cheloha@linux.ibm.com
122 lines
2.9 KiB
C
122 lines
2.9 KiB
C
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
/*
|
|
* drmem.h: Power specific logical memory block representation
|
|
*
|
|
* Copyright 2017 IBM Corporation
|
|
*/
|
|
|
|
#ifndef _ASM_POWERPC_LMB_H
|
|
#define _ASM_POWERPC_LMB_H
|
|
|
|
#include <linux/sched.h>
|
|
|
|
struct drmem_lmb {
|
|
u64 base_addr;
|
|
u32 drc_index;
|
|
u32 aa_index;
|
|
u32 flags;
|
|
};
|
|
|
|
struct drmem_lmb_info {
|
|
struct drmem_lmb *lmbs;
|
|
int n_lmbs;
|
|
u32 lmb_size;
|
|
};
|
|
|
|
extern struct drmem_lmb_info *drmem_info;
|
|
|
|
static inline struct drmem_lmb *drmem_lmb_next(struct drmem_lmb *lmb,
|
|
const struct drmem_lmb *start)
|
|
{
|
|
/*
|
|
* DLPAR code paths can take several milliseconds per element
|
|
* when interacting with firmware. Ensure that we don't
|
|
* unfairly monopolize the CPU.
|
|
*/
|
|
if (((++lmb - start) % 16) == 0)
|
|
cond_resched();
|
|
|
|
return lmb;
|
|
}
|
|
|
|
#define for_each_drmem_lmb_in_range(lmb, start, end) \
|
|
for ((lmb) = (start); (lmb) < (end); lmb = drmem_lmb_next(lmb, start))
|
|
|
|
#define for_each_drmem_lmb(lmb) \
|
|
for_each_drmem_lmb_in_range((lmb), \
|
|
&drmem_info->lmbs[0], \
|
|
&drmem_info->lmbs[drmem_info->n_lmbs])
|
|
|
|
/*
|
|
* The of_drconf_cell_v1 struct defines the layout of the LMB data
|
|
* specified in the ibm,dynamic-memory device tree property.
|
|
* The property itself is a 32-bit value specifying the number of
|
|
* LMBs followed by an array of of_drconf_cell_v1 entries, one
|
|
* per LMB.
|
|
*/
|
|
struct of_drconf_cell_v1 {
|
|
__be64 base_addr;
|
|
__be32 drc_index;
|
|
__be32 reserved;
|
|
__be32 aa_index;
|
|
__be32 flags;
|
|
};
|
|
|
|
/*
|
|
* Version 2 of the ibm,dynamic-memory property is defined as a
|
|
* 32-bit value specifying the number of LMB sets followed by an
|
|
* array of of_drconf_cell_v2 entries, one per LMB set.
|
|
*/
|
|
struct of_drconf_cell_v2 {
|
|
u32 seq_lmbs;
|
|
u64 base_addr;
|
|
u32 drc_index;
|
|
u32 aa_index;
|
|
u32 flags;
|
|
} __packed;
|
|
|
|
#define DRCONF_MEM_ASSIGNED 0x00000008
|
|
#define DRCONF_MEM_AI_INVALID 0x00000040
|
|
#define DRCONF_MEM_RESERVED 0x00000080
|
|
#define DRCONF_MEM_HOTREMOVABLE 0x00000100
|
|
|
|
static inline u32 drmem_lmb_size(void)
|
|
{
|
|
return drmem_info->lmb_size;
|
|
}
|
|
|
|
#define DRMEM_LMB_RESERVED 0x80000000
|
|
|
|
static inline void drmem_mark_lmb_reserved(struct drmem_lmb *lmb)
|
|
{
|
|
lmb->flags |= DRMEM_LMB_RESERVED;
|
|
}
|
|
|
|
static inline void drmem_remove_lmb_reservation(struct drmem_lmb *lmb)
|
|
{
|
|
lmb->flags &= ~DRMEM_LMB_RESERVED;
|
|
}
|
|
|
|
static inline bool drmem_lmb_reserved(struct drmem_lmb *lmb)
|
|
{
|
|
return lmb->flags & DRMEM_LMB_RESERVED;
|
|
}
|
|
|
|
u64 drmem_lmb_memory_max(void);
|
|
int walk_drmem_lmbs(struct device_node *dn, void *data,
|
|
int (*func)(struct drmem_lmb *, const __be32 **, void *));
|
|
int drmem_update_dt(void);
|
|
|
|
#ifdef CONFIG_PPC_PSERIES
|
|
int __init
|
|
walk_drmem_lmbs_early(unsigned long node, void *data,
|
|
int (*func)(struct drmem_lmb *, const __be32 **, void *));
|
|
#endif
|
|
|
|
static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
|
|
{
|
|
lmb->aa_index = 0xffffffff;
|
|
}
|
|
|
|
#endif /* _ASM_POWERPC_LMB_H */
|