42dbfc8649
While testing memory hot-remove, I found following dead lock: Process #1141 is drmgr, trying to remove some memory, i.e. memory499. It holds the memory_hotplug_mutex, and blocks when trying to remove file "online" under dir memory499, in kernfs_drain(), at wait_event(root->deactivate_waitq, atomic_read(&kn->active) == KN_DEACTIVATED_BIAS); Process #1120 is trying to online memory499 by echo 1 > memory499/online In .kernfs_fop_write, it uses kernfs_get_active() to increase &kn->active, thus blocking process #1141. While itself is blocked later when trying to acquire memory_hotplug_mutex, which is held by process The backtrace of both processes are shown below: [<c000000001b18600>] 0xc000000001b18600 [<c000000000015044>] .__switch_to+0x144/0x200 [<c000000000263ca4>] .online_pages+0x74/0x7b0 [<c00000000055b40c>] .memory_subsys_online+0x9c/0x150 [<c00000000053cbe8>] .device_online+0xb8/0x120 [<c00000000053cd04>] .online_store+0xb4/0xc0 [<c000000000538ce4>] .dev_attr_store+0x64/0xa0 [<c00000000030f4ec>] .sysfs_kf_write+0x7c/0xb0 [<c00000000030e574>] .kernfs_fop_write+0x154/0x1e0 [<c000000000268450>] .vfs_write+0xe0/0x260 [<c000000000269144>] .SyS_write+0x64/0x110 [<c000000000009ffc>] syscall_exit+0x0/0x7c [<c000000001b18600>] 0xc000000001b18600 [<c000000000015044>] .__switch_to+0x144/0x200 [<c00000000030be14>] .__kernfs_remove+0x204/0x300 [<c00000000030d428>] .kernfs_remove_by_name_ns+0x68/0xf0 [<c00000000030fb38>] .sysfs_remove_file_ns+0x38/0x60 [<c000000000539354>] .device_remove_attrs+0x54/0xc0 [<c000000000539fd8>] .device_del+0x158/0x250 [<c00000000053a104>] .device_unregister+0x34/0xa0 [<c00000000055bc14>] .unregister_memory_section+0x164/0x170 [<c00000000024ee18>] .__remove_pages+0x108/0x4c0 [<c00000000004b590>] .arch_remove_memory+0x60/0xc0 [<c00000000026446c>] .remove_memory+0x8c/0xe0 [<c00000000007f9f4>] .pseries_remove_memblock+0xd4/0x160 [<c00000000007fcfc>] .pseries_memory_notifier+0x27c/0x290 [<c0000000008ae6cc>] .notifier_call_chain+0x8c/0x100 [<c0000000000d858c>] .__blocking_notifier_call_chain+0x6c/0xe0 [<c00000000071ddec>] .of_property_notify+0x7c/0xc0 [<c00000000071ed3c>] .of_update_property+0x3c/0x1b0 [<c0000000000756cc>] .ofdt_write+0x3dc/0x740 [<c0000000002f60fc>] .proc_reg_write+0xac/0x110 [<c000000000268450>] .vfs_write+0xe0/0x260 [<c000000000269144>] .SyS_write+0x64/0x110 [<c000000000009ffc>] syscall_exit+0x0/0x7c This patch uses lock_device_hotplug() to protect remove_memory() called in pseries_remove_memblock(), which is also stated before function remove_memory(): * NOTE: The caller must call lock_device_hotplug() to serialize hotplug * and online/offline operations before this call, as required by * try_offline_node(). */ void __ref remove_memory(int nid, u64 start, u64 size) With this lock held, the other process(#1120 above) trying to online the memory block will retry the system call when calling lock_device_hotplug_sysfs(), and finally find No such device error. Signed-off-by: Li Zhong <zhong@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
280 lines
6.6 KiB
C
280 lines
6.6 KiB
C
/*
|
|
* pseries Memory Hotplug infrastructure.
|
|
*
|
|
* Copyright (C) 2008 Badari Pulavarty, IBM Corporation
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
|
|
#include <linux/of.h>
|
|
#include <linux/of_address.h>
|
|
#include <linux/memblock.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/memory.h>
|
|
#include <linux/memory_hotplug.h>
|
|
|
|
#include <asm/firmware.h>
|
|
#include <asm/machdep.h>
|
|
#include <asm/prom.h>
|
|
#include <asm/sparsemem.h>
|
|
|
|
static unsigned long get_memblock_size(void)
|
|
{
|
|
struct device_node *np;
|
|
unsigned int memblock_size = MIN_MEMORY_BLOCK_SIZE;
|
|
struct resource r;
|
|
|
|
np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
|
|
if (np) {
|
|
const __be64 *size;
|
|
|
|
size = of_get_property(np, "ibm,lmb-size", NULL);
|
|
if (size)
|
|
memblock_size = be64_to_cpup(size);
|
|
of_node_put(np);
|
|
} else if (machine_is(pseries)) {
|
|
/* This fallback really only applies to pseries */
|
|
unsigned int memzero_size = 0;
|
|
|
|
np = of_find_node_by_path("/memory@0");
|
|
if (np) {
|
|
if (!of_address_to_resource(np, 0, &r))
|
|
memzero_size = resource_size(&r);
|
|
of_node_put(np);
|
|
}
|
|
|
|
if (memzero_size) {
|
|
/* We now know the size of memory@0, use this to find
|
|
* the first memoryblock and get its size.
|
|
*/
|
|
char buf[64];
|
|
|
|
sprintf(buf, "/memory@%x", memzero_size);
|
|
np = of_find_node_by_path(buf);
|
|
if (np) {
|
|
if (!of_address_to_resource(np, 0, &r))
|
|
memblock_size = resource_size(&r);
|
|
of_node_put(np);
|
|
}
|
|
}
|
|
}
|
|
return memblock_size;
|
|
}
|
|
|
|
/* WARNING: This is going to override the generic definition whenever
|
|
* pseries is built-in regardless of what platform is active at boot
|
|
* time. This is fine for now as this is the only "option" and it
|
|
* should work everywhere. If not, we'll have to turn this into a
|
|
* ppc_md. callback
|
|
*/
|
|
unsigned long memory_block_size_bytes(void)
|
|
{
|
|
return get_memblock_size();
|
|
}
|
|
|
|
#ifdef CONFIG_MEMORY_HOTREMOVE
|
|
static int pseries_remove_memory(u64 start, u64 size)
|
|
{
|
|
int ret;
|
|
|
|
/* Remove htab bolted mappings for this section of memory */
|
|
start = (unsigned long)__va(start);
|
|
ret = remove_section_mapping(start, start + size);
|
|
|
|
/* Ensure all vmalloc mappings are flushed in case they also
|
|
* hit that section of memory
|
|
*/
|
|
vm_unmap_aliases();
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
|
|
{
|
|
unsigned long block_sz, start_pfn;
|
|
int sections_per_block;
|
|
int i, nid;
|
|
|
|
start_pfn = base >> PAGE_SHIFT;
|
|
|
|
lock_device_hotplug();
|
|
|
|
if (!pfn_valid(start_pfn))
|
|
goto out;
|
|
|
|
block_sz = memory_block_size_bytes();
|
|
sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
|
|
nid = memory_add_physaddr_to_nid(base);
|
|
|
|
for (i = 0; i < sections_per_block; i++) {
|
|
remove_memory(nid, base, MIN_MEMORY_BLOCK_SIZE);
|
|
base += MIN_MEMORY_BLOCK_SIZE;
|
|
}
|
|
|
|
out:
|
|
/* Update memory regions for memory remove */
|
|
memblock_remove(base, memblock_size);
|
|
unlock_device_hotplug();
|
|
return 0;
|
|
}
|
|
|
|
static int pseries_remove_mem_node(struct device_node *np)
|
|
{
|
|
const char *type;
|
|
const unsigned int *regs;
|
|
unsigned long base;
|
|
unsigned int lmb_size;
|
|
int ret = -EINVAL;
|
|
|
|
/*
|
|
* Check to see if we are actually removing memory
|
|
*/
|
|
type = of_get_property(np, "device_type", NULL);
|
|
if (type == NULL || strcmp(type, "memory") != 0)
|
|
return 0;
|
|
|
|
/*
|
|
* Find the bae address and size of the memblock
|
|
*/
|
|
regs = of_get_property(np, "reg", NULL);
|
|
if (!regs)
|
|
return ret;
|
|
|
|
base = *(unsigned long *)regs;
|
|
lmb_size = regs[3];
|
|
|
|
pseries_remove_memblock(base, lmb_size);
|
|
return 0;
|
|
}
|
|
#else
|
|
static inline int pseries_remove_memblock(unsigned long base,
|
|
unsigned int memblock_size)
|
|
{
|
|
return -EOPNOTSUPP;
|
|
}
|
|
static inline int pseries_remove_mem_node(struct device_node *np)
|
|
{
|
|
return -EOPNOTSUPP;
|
|
}
|
|
#endif /* CONFIG_MEMORY_HOTREMOVE */
|
|
|
|
static int pseries_add_mem_node(struct device_node *np)
|
|
{
|
|
const char *type;
|
|
const unsigned int *regs;
|
|
unsigned long base;
|
|
unsigned int lmb_size;
|
|
int ret = -EINVAL;
|
|
|
|
/*
|
|
* Check to see if we are actually adding memory
|
|
*/
|
|
type = of_get_property(np, "device_type", NULL);
|
|
if (type == NULL || strcmp(type, "memory") != 0)
|
|
return 0;
|
|
|
|
/*
|
|
* Find the base and size of the memblock
|
|
*/
|
|
regs = of_get_property(np, "reg", NULL);
|
|
if (!regs)
|
|
return ret;
|
|
|
|
base = *(unsigned long *)regs;
|
|
lmb_size = regs[3];
|
|
|
|
/*
|
|
* Update memory region to represent the memory add
|
|
*/
|
|
ret = memblock_add(base, lmb_size);
|
|
return (ret < 0) ? -EINVAL : 0;
|
|
}
|
|
|
|
static int pseries_update_drconf_memory(struct of_prop_reconfig *pr)
|
|
{
|
|
struct of_drconf_cell *new_drmem, *old_drmem;
|
|
unsigned long memblock_size;
|
|
u32 entries;
|
|
u32 *p;
|
|
int i, rc = -EINVAL;
|
|
|
|
memblock_size = get_memblock_size();
|
|
if (!memblock_size)
|
|
return -EINVAL;
|
|
|
|
p = (u32 *)of_get_property(pr->dn, "ibm,dynamic-memory", NULL);
|
|
if (!p)
|
|
return -EINVAL;
|
|
|
|
/* The first int of the property is the number of lmb's described
|
|
* by the property. This is followed by an array of of_drconf_cell
|
|
* entries. Get the niumber of entries and skip to the array of
|
|
* of_drconf_cell's.
|
|
*/
|
|
entries = *p++;
|
|
old_drmem = (struct of_drconf_cell *)p;
|
|
|
|
p = (u32 *)pr->prop->value;
|
|
p++;
|
|
new_drmem = (struct of_drconf_cell *)p;
|
|
|
|
for (i = 0; i < entries; i++) {
|
|
if ((old_drmem[i].flags & DRCONF_MEM_ASSIGNED) &&
|
|
(!(new_drmem[i].flags & DRCONF_MEM_ASSIGNED))) {
|
|
rc = pseries_remove_memblock(old_drmem[i].base_addr,
|
|
memblock_size);
|
|
break;
|
|
} else if ((!(old_drmem[i].flags & DRCONF_MEM_ASSIGNED)) &&
|
|
(new_drmem[i].flags & DRCONF_MEM_ASSIGNED)) {
|
|
rc = memblock_add(old_drmem[i].base_addr,
|
|
memblock_size);
|
|
rc = (rc < 0) ? -EINVAL : 0;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
static int pseries_memory_notifier(struct notifier_block *nb,
|
|
unsigned long action, void *node)
|
|
{
|
|
struct of_prop_reconfig *pr;
|
|
int err = 0;
|
|
|
|
switch (action) {
|
|
case OF_RECONFIG_ATTACH_NODE:
|
|
err = pseries_add_mem_node(node);
|
|
break;
|
|
case OF_RECONFIG_DETACH_NODE:
|
|
err = pseries_remove_mem_node(node);
|
|
break;
|
|
case OF_RECONFIG_UPDATE_PROPERTY:
|
|
pr = (struct of_prop_reconfig *)node;
|
|
if (!strcmp(pr->prop->name, "ibm,dynamic-memory"))
|
|
err = pseries_update_drconf_memory(pr);
|
|
break;
|
|
}
|
|
return notifier_from_errno(err);
|
|
}
|
|
|
|
static struct notifier_block pseries_mem_nb = {
|
|
.notifier_call = pseries_memory_notifier,
|
|
};
|
|
|
|
static int __init pseries_memory_hotplug_init(void)
|
|
{
|
|
if (firmware_has_feature(FW_FEATURE_LPAR))
|
|
of_reconfig_notifier_register(&pseries_mem_nb);
|
|
|
|
#ifdef CONFIG_MEMORY_HOTREMOVE
|
|
ppc_md.remove_memory = pseries_remove_memory;
|
|
#endif
|
|
|
|
return 0;
|
|
}
|
|
machine_device_initcall(pseries, pseries_memory_hotplug_init);
|