2008-04-18 20:33:50 +00:00
|
|
|
/*
|
|
|
|
* pseries Memory Hotplug infrastructure.
|
|
|
|
*
|
|
|
|
* Copyright (C) 2008 Badari Pulavarty, IBM Corporation
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
*/
|
|
|
|
|
powerpc/pseries: Create new device hotplug entry point
The current hotplug (or dlpar) of devices (the process is generally the
same for memory, cpu, and pci) on PowerVM systems is initiated
from the HMC, which communicates the request to the partitions through
the RSCT framework. The RSCT framework then invokes the drmgr command.
The drmgr command performs the hotplug operation by doing some pieces,
such as most of the rtas calls and device tree parsing, in userspace
and make requests to the kernel to online/offline the device, update the
device tree and add/remove the device.
For PowerKVM the approach for device hotplug is to follow what is currently
being done for pci hotplug. A hotplug request is initiated from the host.
QEMU then generates an EPOW interrupt to the guest which causes the guest
to make the rtas,check-exception call. In QEMU, the rtas,check-exception call
returns a rtas hotplug event to the guest.
Please note that the current pci hotplug path for PowerKVM involves the
kernel receiving the rtas hotplug event, passing it to rtas_errd in
userspace, and having rtas_errd invoke drmgr. The drmgr command then
handles the request as described above for PowerVM systems.
There is no need for this circuitous route, we should just handle the entire
hotplug of devices in the kernel. What I am planning is to enable this
by moving the code to handle hotplug from drmgr into the kernel to
provide a single path for handling device hotplug for both PowerVM and
PowerKVM systems. This patch provides the common iframework and entry point.
For PowerKVM a future update to the kernel rtas code will recognize rtas
hotplug events returned from rtas,check-exception calls and use the common
entry point to handle hotplug of the device.
For PowerVM systems, This patch creates /sys/kernel/dlpar that can be
used by the drmgr command to initiate hotplug requests. In order to do
this a string of the format "<resource> <action> <id_type> <id>" is
written to this file. The string consists of a resource (cpu, memory, pci,
phb), an action (add or remove), an id_type (count, drc index, drc name),
and the corresponding id. The kernel will parse the string and create a
rtas hotplug section that can be passed to the common entry point for
handling hotplug requests.
It should be noted that there is no chance of updating how we receive
hotplug (dlpar) requests from the HMC on PowerVM systems.
Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2015-02-10 19:47:02 +00:00
|
|
|
#define pr_fmt(fmt) "pseries-hotplug-mem: " fmt
|
|
|
|
|
2008-04-18 20:33:50 +00:00
|
|
|
#include <linux/of.h>
|
2013-09-26 12:40:04 +00:00
|
|
|
#include <linux/of_address.h>
|
2010-07-12 04:36:09 +00:00
|
|
|
#include <linux/memblock.h>
|
2011-06-14 00:57:51 +00:00
|
|
|
#include <linux/memory.h>
|
2014-01-27 16:54:06 +00:00
|
|
|
#include <linux/memory_hotplug.h>
|
2015-02-10 19:48:25 +00:00
|
|
|
#include <linux/slab.h>
|
2011-06-14 00:57:51 +00:00
|
|
|
|
2008-04-18 20:33:50 +00:00
|
|
|
#include <asm/firmware.h>
|
|
|
|
#include <asm/machdep.h>
|
2013-09-26 12:40:04 +00:00
|
|
|
#include <asm/prom.h>
|
2009-02-08 14:49:39 +00:00
|
|
|
#include <asm/sparsemem.h>
|
2014-08-19 22:55:19 +00:00
|
|
|
#include "pseries.h"
|
2008-04-18 20:33:50 +00:00
|
|
|
|
2015-02-10 19:48:25 +00:00
|
|
|
static bool rtas_hp_event;
|
|
|
|
|
2014-06-04 07:50:47 +00:00
|
|
|
unsigned long pseries_memory_block_size(void)
|
2011-01-20 16:45:20 +00:00
|
|
|
{
|
|
|
|
struct device_node *np;
|
2011-06-14 00:57:51 +00:00
|
|
|
unsigned int memblock_size = MIN_MEMORY_BLOCK_SIZE;
|
|
|
|
struct resource r;
|
2011-01-20 16:45:20 +00:00
|
|
|
|
|
|
|
np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
|
|
|
|
if (np) {
|
2011-06-14 00:57:51 +00:00
|
|
|
const __be64 *size;
|
2011-01-20 16:45:20 +00:00
|
|
|
|
|
|
|
size = of_get_property(np, "ibm,lmb-size", NULL);
|
2011-06-14 00:57:51 +00:00
|
|
|
if (size)
|
|
|
|
memblock_size = be64_to_cpup(size);
|
2011-01-20 16:45:20 +00:00
|
|
|
of_node_put(np);
|
2011-06-14 00:57:51 +00:00
|
|
|
} else if (machine_is(pseries)) {
|
|
|
|
/* This fallback really only applies to pseries */
|
2011-01-20 16:45:20 +00:00
|
|
|
unsigned int memzero_size = 0;
|
|
|
|
|
|
|
|
np = of_find_node_by_path("/memory@0");
|
|
|
|
if (np) {
|
2011-06-14 00:57:51 +00:00
|
|
|
if (!of_address_to_resource(np, 0, &r))
|
|
|
|
memzero_size = resource_size(&r);
|
2011-01-20 16:45:20 +00:00
|
|
|
of_node_put(np);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (memzero_size) {
|
|
|
|
/* We now know the size of memory@0, use this to find
|
|
|
|
* the first memoryblock and get its size.
|
|
|
|
*/
|
|
|
|
char buf[64];
|
|
|
|
|
|
|
|
sprintf(buf, "/memory@%x", memzero_size);
|
|
|
|
np = of_find_node_by_path(buf);
|
|
|
|
if (np) {
|
2011-06-14 00:57:51 +00:00
|
|
|
if (!of_address_to_resource(np, 0, &r))
|
|
|
|
memblock_size = resource_size(&r);
|
2011-01-20 16:45:20 +00:00
|
|
|
of_node_put(np);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return memblock_size;
|
|
|
|
}
|
|
|
|
|
2015-02-10 19:48:25 +00:00
|
|
|
static void dlpar_free_drconf_property(struct property *prop)
|
|
|
|
{
|
|
|
|
kfree(prop->name);
|
|
|
|
kfree(prop->value);
|
|
|
|
kfree(prop);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct property *dlpar_clone_drconf_property(struct device_node *dn)
|
|
|
|
{
|
|
|
|
struct property *prop, *new_prop;
|
|
|
|
struct of_drconf_cell *lmbs;
|
|
|
|
u32 num_lmbs, *p;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
prop = of_find_property(dn, "ibm,dynamic-memory", NULL);
|
|
|
|
if (!prop)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
|
|
|
|
if (!new_prop)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
new_prop->name = kstrdup(prop->name, GFP_KERNEL);
|
2015-08-07 07:59:12 +00:00
|
|
|
new_prop->value = kmemdup(prop->value, prop->length, GFP_KERNEL);
|
2015-02-10 19:48:25 +00:00
|
|
|
if (!new_prop->name || !new_prop->value) {
|
|
|
|
dlpar_free_drconf_property(new_prop);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
new_prop->length = prop->length;
|
|
|
|
|
|
|
|
/* Convert the property to cpu endian-ness */
|
|
|
|
p = new_prop->value;
|
|
|
|
*p = be32_to_cpu(*p);
|
|
|
|
|
|
|
|
num_lmbs = *p++;
|
|
|
|
lmbs = (struct of_drconf_cell *)p;
|
|
|
|
|
|
|
|
for (i = 0; i < num_lmbs; i++) {
|
|
|
|
lmbs[i].base_addr = be64_to_cpu(lmbs[i].base_addr);
|
|
|
|
lmbs[i].drc_index = be32_to_cpu(lmbs[i].drc_index);
|
|
|
|
lmbs[i].flags = be32_to_cpu(lmbs[i].flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
return new_prop;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
|
|
|
|
{
|
|
|
|
unsigned long section_nr;
|
|
|
|
struct mem_section *mem_sect;
|
|
|
|
struct memory_block *mem_block;
|
|
|
|
|
|
|
|
section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr));
|
|
|
|
mem_sect = __nr_to_section(section_nr);
|
|
|
|
|
|
|
|
mem_block = find_memory_block(mem_sect);
|
|
|
|
return mem_block;
|
|
|
|
}
|
|
|
|
|
2013-04-29 22:08:22 +00:00
|
|
|
#ifdef CONFIG_MEMORY_HOTREMOVE
|
2014-01-27 16:54:06 +00:00
|
|
|
static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
|
|
|
|
{
|
|
|
|
unsigned long block_sz, start_pfn;
|
|
|
|
int sections_per_block;
|
|
|
|
int i, nid;
|
2008-07-03 03:20:58 +00:00
|
|
|
|
2008-10-01 09:44:02 +00:00
|
|
|
start_pfn = base >> PAGE_SHIFT;
|
2008-10-13 08:42:00 +00:00
|
|
|
|
powerpc/pseries: Protect remove_memory() with device hotplug lock
While testing memory hot-remove, I found following dead lock:
Process #1141 is drmgr, trying to remove some memory, i.e. memory499.
It holds the memory_hotplug_mutex, and blocks when trying to remove file
"online" under dir memory499, in kernfs_drain(), at
wait_event(root->deactivate_waitq,
atomic_read(&kn->active) == KN_DEACTIVATED_BIAS);
Process #1120 is trying to online memory499 by
echo 1 > memory499/online
In .kernfs_fop_write, it uses kernfs_get_active() to increase
&kn->active, thus blocking process #1141. While itself is blocked later
when trying to acquire memory_hotplug_mutex, which is held by process
The backtrace of both processes are shown below:
[<c000000001b18600>] 0xc000000001b18600
[<c000000000015044>] .__switch_to+0x144/0x200
[<c000000000263ca4>] .online_pages+0x74/0x7b0
[<c00000000055b40c>] .memory_subsys_online+0x9c/0x150
[<c00000000053cbe8>] .device_online+0xb8/0x120
[<c00000000053cd04>] .online_store+0xb4/0xc0
[<c000000000538ce4>] .dev_attr_store+0x64/0xa0
[<c00000000030f4ec>] .sysfs_kf_write+0x7c/0xb0
[<c00000000030e574>] .kernfs_fop_write+0x154/0x1e0
[<c000000000268450>] .vfs_write+0xe0/0x260
[<c000000000269144>] .SyS_write+0x64/0x110
[<c000000000009ffc>] syscall_exit+0x0/0x7c
[<c000000001b18600>] 0xc000000001b18600
[<c000000000015044>] .__switch_to+0x144/0x200
[<c00000000030be14>] .__kernfs_remove+0x204/0x300
[<c00000000030d428>] .kernfs_remove_by_name_ns+0x68/0xf0
[<c00000000030fb38>] .sysfs_remove_file_ns+0x38/0x60
[<c000000000539354>] .device_remove_attrs+0x54/0xc0
[<c000000000539fd8>] .device_del+0x158/0x250
[<c00000000053a104>] .device_unregister+0x34/0xa0
[<c00000000055bc14>] .unregister_memory_section+0x164/0x170
[<c00000000024ee18>] .__remove_pages+0x108/0x4c0
[<c00000000004b590>] .arch_remove_memory+0x60/0xc0
[<c00000000026446c>] .remove_memory+0x8c/0xe0
[<c00000000007f9f4>] .pseries_remove_memblock+0xd4/0x160
[<c00000000007fcfc>] .pseries_memory_notifier+0x27c/0x290
[<c0000000008ae6cc>] .notifier_call_chain+0x8c/0x100
[<c0000000000d858c>] .__blocking_notifier_call_chain+0x6c/0xe0
[<c00000000071ddec>] .of_property_notify+0x7c/0xc0
[<c00000000071ed3c>] .of_update_property+0x3c/0x1b0
[<c0000000000756cc>] .ofdt_write+0x3dc/0x740
[<c0000000002f60fc>] .proc_reg_write+0xac/0x110
[<c000000000268450>] .vfs_write+0xe0/0x260
[<c000000000269144>] .SyS_write+0x64/0x110
[<c000000000009ffc>] syscall_exit+0x0/0x7c
This patch uses lock_device_hotplug() to protect remove_memory() called
in pseries_remove_memblock(), which is also stated before function
remove_memory():
* NOTE: The caller must call lock_device_hotplug() to serialize hotplug
* and online/offline operations before this call, as required by
* try_offline_node().
*/
void __ref remove_memory(int nid, u64 start, u64 size)
With this lock held, the other process(#1120 above) trying to online the
memory block will retry the system call when calling
lock_device_hotplug_sysfs(), and finally find No such device error.
Signed-off-by: Li Zhong <zhong@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2014-04-10 08:25:31 +00:00
|
|
|
lock_device_hotplug();
|
|
|
|
|
|
|
|
if (!pfn_valid(start_pfn))
|
|
|
|
goto out;
|
2008-10-13 08:42:00 +00:00
|
|
|
|
2014-06-04 07:50:47 +00:00
|
|
|
block_sz = pseries_memory_block_size();
|
2014-01-27 16:54:06 +00:00
|
|
|
sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
|
|
|
|
nid = memory_add_physaddr_to_nid(base);
|
2008-04-18 20:33:50 +00:00
|
|
|
|
2014-01-27 16:54:06 +00:00
|
|
|
for (i = 0; i < sections_per_block; i++) {
|
|
|
|
remove_memory(nid, base, MIN_MEMORY_BLOCK_SIZE);
|
|
|
|
base += MIN_MEMORY_BLOCK_SIZE;
|
2012-10-08 23:34:14 +00:00
|
|
|
}
|
2008-04-18 20:33:50 +00:00
|
|
|
|
powerpc/pseries: Protect remove_memory() with device hotplug lock
While testing memory hot-remove, I found following dead lock:
Process #1141 is drmgr, trying to remove some memory, i.e. memory499.
It holds the memory_hotplug_mutex, and blocks when trying to remove file
"online" under dir memory499, in kernfs_drain(), at
wait_event(root->deactivate_waitq,
atomic_read(&kn->active) == KN_DEACTIVATED_BIAS);
Process #1120 is trying to online memory499 by
echo 1 > memory499/online
In .kernfs_fop_write, it uses kernfs_get_active() to increase
&kn->active, thus blocking process #1141. While itself is blocked later
when trying to acquire memory_hotplug_mutex, which is held by process
The backtrace of both processes are shown below:
[<c000000001b18600>] 0xc000000001b18600
[<c000000000015044>] .__switch_to+0x144/0x200
[<c000000000263ca4>] .online_pages+0x74/0x7b0
[<c00000000055b40c>] .memory_subsys_online+0x9c/0x150
[<c00000000053cbe8>] .device_online+0xb8/0x120
[<c00000000053cd04>] .online_store+0xb4/0xc0
[<c000000000538ce4>] .dev_attr_store+0x64/0xa0
[<c00000000030f4ec>] .sysfs_kf_write+0x7c/0xb0
[<c00000000030e574>] .kernfs_fop_write+0x154/0x1e0
[<c000000000268450>] .vfs_write+0xe0/0x260
[<c000000000269144>] .SyS_write+0x64/0x110
[<c000000000009ffc>] syscall_exit+0x0/0x7c
[<c000000001b18600>] 0xc000000001b18600
[<c000000000015044>] .__switch_to+0x144/0x200
[<c00000000030be14>] .__kernfs_remove+0x204/0x300
[<c00000000030d428>] .kernfs_remove_by_name_ns+0x68/0xf0
[<c00000000030fb38>] .sysfs_remove_file_ns+0x38/0x60
[<c000000000539354>] .device_remove_attrs+0x54/0xc0
[<c000000000539fd8>] .device_del+0x158/0x250
[<c00000000053a104>] .device_unregister+0x34/0xa0
[<c00000000055bc14>] .unregister_memory_section+0x164/0x170
[<c00000000024ee18>] .__remove_pages+0x108/0x4c0
[<c00000000004b590>] .arch_remove_memory+0x60/0xc0
[<c00000000026446c>] .remove_memory+0x8c/0xe0
[<c00000000007f9f4>] .pseries_remove_memblock+0xd4/0x160
[<c00000000007fcfc>] .pseries_memory_notifier+0x27c/0x290
[<c0000000008ae6cc>] .notifier_call_chain+0x8c/0x100
[<c0000000000d858c>] .__blocking_notifier_call_chain+0x6c/0xe0
[<c00000000071ddec>] .of_property_notify+0x7c/0xc0
[<c00000000071ed3c>] .of_update_property+0x3c/0x1b0
[<c0000000000756cc>] .ofdt_write+0x3dc/0x740
[<c0000000002f60fc>] .proc_reg_write+0xac/0x110
[<c000000000268450>] .vfs_write+0xe0/0x260
[<c000000000269144>] .SyS_write+0x64/0x110
[<c000000000009ffc>] syscall_exit+0x0/0x7c
This patch uses lock_device_hotplug() to protect remove_memory() called
in pseries_remove_memblock(), which is also stated before function
remove_memory():
* NOTE: The caller must call lock_device_hotplug() to serialize hotplug
* and online/offline operations before this call, as required by
* try_offline_node().
*/
void __ref remove_memory(int nid, u64 start, u64 size)
With this lock held, the other process(#1120 above) trying to online the
memory block will retry the system call when calling
lock_device_hotplug_sysfs(), and finally find No such device error.
Signed-off-by: Li Zhong <zhong@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2014-04-10 08:25:31 +00:00
|
|
|
out:
|
2014-01-27 16:54:06 +00:00
|
|
|
/* Update memory regions for memory remove */
|
2010-07-12 04:36:09 +00:00
|
|
|
memblock_remove(base, memblock_size);
|
powerpc/pseries: Protect remove_memory() with device hotplug lock
While testing memory hot-remove, I found following dead lock:
Process #1141 is drmgr, trying to remove some memory, i.e. memory499.
It holds the memory_hotplug_mutex, and blocks when trying to remove file
"online" under dir memory499, in kernfs_drain(), at
wait_event(root->deactivate_waitq,
atomic_read(&kn->active) == KN_DEACTIVATED_BIAS);
Process #1120 is trying to online memory499 by
echo 1 > memory499/online
In .kernfs_fop_write, it uses kernfs_get_active() to increase
&kn->active, thus blocking process #1141. While itself is blocked later
when trying to acquire memory_hotplug_mutex, which is held by process
The backtrace of both processes are shown below:
[<c000000001b18600>] 0xc000000001b18600
[<c000000000015044>] .__switch_to+0x144/0x200
[<c000000000263ca4>] .online_pages+0x74/0x7b0
[<c00000000055b40c>] .memory_subsys_online+0x9c/0x150
[<c00000000053cbe8>] .device_online+0xb8/0x120
[<c00000000053cd04>] .online_store+0xb4/0xc0
[<c000000000538ce4>] .dev_attr_store+0x64/0xa0
[<c00000000030f4ec>] .sysfs_kf_write+0x7c/0xb0
[<c00000000030e574>] .kernfs_fop_write+0x154/0x1e0
[<c000000000268450>] .vfs_write+0xe0/0x260
[<c000000000269144>] .SyS_write+0x64/0x110
[<c000000000009ffc>] syscall_exit+0x0/0x7c
[<c000000001b18600>] 0xc000000001b18600
[<c000000000015044>] .__switch_to+0x144/0x200
[<c00000000030be14>] .__kernfs_remove+0x204/0x300
[<c00000000030d428>] .kernfs_remove_by_name_ns+0x68/0xf0
[<c00000000030fb38>] .sysfs_remove_file_ns+0x38/0x60
[<c000000000539354>] .device_remove_attrs+0x54/0xc0
[<c000000000539fd8>] .device_del+0x158/0x250
[<c00000000053a104>] .device_unregister+0x34/0xa0
[<c00000000055bc14>] .unregister_memory_section+0x164/0x170
[<c00000000024ee18>] .__remove_pages+0x108/0x4c0
[<c00000000004b590>] .arch_remove_memory+0x60/0xc0
[<c00000000026446c>] .remove_memory+0x8c/0xe0
[<c00000000007f9f4>] .pseries_remove_memblock+0xd4/0x160
[<c00000000007fcfc>] .pseries_memory_notifier+0x27c/0x290
[<c0000000008ae6cc>] .notifier_call_chain+0x8c/0x100
[<c0000000000d858c>] .__blocking_notifier_call_chain+0x6c/0xe0
[<c00000000071ddec>] .of_property_notify+0x7c/0xc0
[<c00000000071ed3c>] .of_update_property+0x3c/0x1b0
[<c0000000000756cc>] .ofdt_write+0x3dc/0x740
[<c0000000002f60fc>] .proc_reg_write+0xac/0x110
[<c000000000268450>] .vfs_write+0xe0/0x260
[<c000000000269144>] .SyS_write+0x64/0x110
[<c000000000009ffc>] syscall_exit+0x0/0x7c
This patch uses lock_device_hotplug() to protect remove_memory() called
in pseries_remove_memblock(), which is also stated before function
remove_memory():
* NOTE: The caller must call lock_device_hotplug() to serialize hotplug
* and online/offline operations before this call, as required by
* try_offline_node().
*/
void __ref remove_memory(int nid, u64 start, u64 size)
With this lock held, the other process(#1120 above) trying to online the
memory block will retry the system call when calling
lock_device_hotplug_sysfs(), and finally find No such device error.
Signed-off-by: Li Zhong <zhong@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2014-04-10 08:25:31 +00:00
|
|
|
unlock_device_hotplug();
|
2014-01-27 16:54:06 +00:00
|
|
|
return 0;
|
2008-04-18 20:33:50 +00:00
|
|
|
}
|
|
|
|
|
2014-01-27 16:54:06 +00:00
|
|
|
static int pseries_remove_mem_node(struct device_node *np)
|
2008-07-03 03:22:39 +00:00
|
|
|
{
|
|
|
|
const char *type;
|
2014-08-19 20:44:57 +00:00
|
|
|
const __be32 *regs;
|
2008-07-03 03:22:39 +00:00
|
|
|
unsigned long base;
|
2010-07-23 00:35:52 +00:00
|
|
|
unsigned int lmb_size;
|
2008-07-03 03:22:39 +00:00
|
|
|
int ret = -EINVAL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check to see if we are actually removing memory
|
|
|
|
*/
|
|
|
|
type = of_get_property(np, "device_type", NULL);
|
|
|
|
if (type == NULL || strcmp(type, "memory") != 0)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/*
|
2014-08-07 05:11:58 +00:00
|
|
|
* Find the base address and size of the memblock
|
2008-07-03 03:22:39 +00:00
|
|
|
*/
|
|
|
|
regs = of_get_property(np, "reg", NULL);
|
|
|
|
if (!regs)
|
|
|
|
return ret;
|
|
|
|
|
2014-08-19 20:44:57 +00:00
|
|
|
base = be64_to_cpu(*(unsigned long *)regs);
|
|
|
|
lmb_size = be32_to_cpu(regs[3]);
|
2008-07-03 03:22:39 +00:00
|
|
|
|
2014-01-27 16:54:06 +00:00
|
|
|
pseries_remove_memblock(base, lmb_size);
|
|
|
|
return 0;
|
2008-07-03 03:22:39 +00:00
|
|
|
}
|
2015-02-10 19:49:22 +00:00
|
|
|
|
|
|
|
static bool lmb_is_removable(struct of_drconf_cell *lmb)
|
|
|
|
{
|
|
|
|
int i, scns_per_block;
|
|
|
|
int rc = 1;
|
|
|
|
unsigned long pfn, block_sz;
|
|
|
|
u64 phys_addr;
|
|
|
|
|
|
|
|
if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
block_sz = memory_block_size_bytes();
|
|
|
|
scns_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
|
|
|
|
phys_addr = lmb->base_addr;
|
|
|
|
|
|
|
|
for (i = 0; i < scns_per_block; i++) {
|
|
|
|
pfn = PFN_DOWN(phys_addr);
|
|
|
|
if (!pfn_present(pfn))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
rc &= is_mem_section_removable(pfn, PAGES_PER_SECTION);
|
|
|
|
phys_addr += MIN_MEMORY_BLOCK_SIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
return rc ? true : false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int dlpar_add_lmb(struct of_drconf_cell *);
|
|
|
|
|
|
|
|
static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
|
|
|
|
{
|
|
|
|
struct memory_block *mem_block;
|
|
|
|
unsigned long block_sz;
|
|
|
|
int nid, rc;
|
|
|
|
|
|
|
|
if (!lmb_is_removable(lmb))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
mem_block = lmb_to_memblock(lmb);
|
|
|
|
if (!mem_block)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
rc = device_offline(&mem_block->dev);
|
|
|
|
put_device(&mem_block->dev);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
|
|
|
|
block_sz = pseries_memory_block_size();
|
|
|
|
nid = memory_add_physaddr_to_nid(lmb->base_addr);
|
|
|
|
|
|
|
|
remove_memory(nid, lmb->base_addr, block_sz);
|
|
|
|
|
|
|
|
/* Update memory regions for memory remove */
|
|
|
|
memblock_remove(lmb->base_addr, block_sz);
|
|
|
|
|
|
|
|
dlpar_release_drc(lmb->drc_index);
|
|
|
|
|
|
|
|
lmb->flags &= ~DRCONF_MEM_ASSIGNED;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int dlpar_memory_remove_by_count(u32 lmbs_to_remove,
|
|
|
|
struct property *prop)
|
|
|
|
{
|
|
|
|
struct of_drconf_cell *lmbs;
|
|
|
|
int lmbs_removed = 0;
|
|
|
|
int lmbs_available = 0;
|
|
|
|
u32 num_lmbs, *p;
|
|
|
|
int i, rc;
|
|
|
|
|
|
|
|
pr_info("Attempting to hot-remove %d LMB(s)\n", lmbs_to_remove);
|
|
|
|
|
|
|
|
if (lmbs_to_remove == 0)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
p = prop->value;
|
|
|
|
num_lmbs = *p++;
|
|
|
|
lmbs = (struct of_drconf_cell *)p;
|
|
|
|
|
|
|
|
/* Validate that there are enough LMBs to satisfy the request */
|
|
|
|
for (i = 0; i < num_lmbs; i++) {
|
|
|
|
if (lmbs[i].flags & DRCONF_MEM_ASSIGNED)
|
|
|
|
lmbs_available++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (lmbs_available < lmbs_to_remove)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
for (i = 0; i < num_lmbs && lmbs_removed < lmbs_to_remove; i++) {
|
|
|
|
rc = dlpar_remove_lmb(&lmbs[i]);
|
|
|
|
if (rc)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
lmbs_removed++;
|
|
|
|
|
|
|
|
/* Mark this lmb so we can add it later if all of the
|
|
|
|
* requested LMBs cannot be removed.
|
|
|
|
*/
|
|
|
|
lmbs[i].reserved = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (lmbs_removed != lmbs_to_remove) {
|
|
|
|
pr_err("Memory hot-remove failed, adding LMB's back\n");
|
|
|
|
|
|
|
|
for (i = 0; i < num_lmbs; i++) {
|
|
|
|
if (!lmbs[i].reserved)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
rc = dlpar_add_lmb(&lmbs[i]);
|
|
|
|
if (rc)
|
|
|
|
pr_err("Failed to add LMB back, drc index %x\n",
|
|
|
|
lmbs[i].drc_index);
|
|
|
|
|
|
|
|
lmbs[i].reserved = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = -EINVAL;
|
|
|
|
} else {
|
|
|
|
for (i = 0; i < num_lmbs; i++) {
|
|
|
|
if (!lmbs[i].reserved)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
pr_info("Memory at %llx was hot-removed\n",
|
|
|
|
lmbs[i].base_addr);
|
|
|
|
|
|
|
|
lmbs[i].reserved = 0;
|
|
|
|
}
|
|
|
|
rc = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop)
|
|
|
|
{
|
|
|
|
struct of_drconf_cell *lmbs;
|
|
|
|
u32 num_lmbs, *p;
|
|
|
|
int lmb_found;
|
|
|
|
int i, rc;
|
|
|
|
|
|
|
|
pr_info("Attempting to hot-remove LMB, drc index %x\n", drc_index);
|
|
|
|
|
|
|
|
p = prop->value;
|
|
|
|
num_lmbs = *p++;
|
|
|
|
lmbs = (struct of_drconf_cell *)p;
|
|
|
|
|
|
|
|
lmb_found = 0;
|
|
|
|
for (i = 0; i < num_lmbs; i++) {
|
|
|
|
if (lmbs[i].drc_index == drc_index) {
|
|
|
|
lmb_found = 1;
|
|
|
|
rc = dlpar_remove_lmb(&lmbs[i]);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!lmb_found)
|
|
|
|
rc = -EINVAL;
|
|
|
|
|
|
|
|
if (rc)
|
|
|
|
pr_info("Failed to hot-remove memory at %llx\n",
|
|
|
|
lmbs[i].base_addr);
|
|
|
|
else
|
|
|
|
pr_info("Memory at %llx was hot-removed\n", lmbs[i].base_addr);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2013-04-29 22:08:22 +00:00
|
|
|
#else
|
|
|
|
static inline int pseries_remove_memblock(unsigned long base,
|
|
|
|
unsigned int memblock_size)
|
|
|
|
{
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
2014-01-27 16:54:06 +00:00
|
|
|
static inline int pseries_remove_mem_node(struct device_node *np)
|
2013-04-29 22:08:22 +00:00
|
|
|
{
|
2014-08-11 09:16:19 +00:00
|
|
|
return 0;
|
2013-04-29 22:08:22 +00:00
|
|
|
}
|
2015-02-10 19:49:22 +00:00
|
|
|
static inline int dlpar_memory_remove(struct pseries_hp_errorlog *hp_elog)
|
|
|
|
{
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
2015-04-14 07:01:56 +00:00
|
|
|
static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
|
|
|
|
{
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
static int dlpar_memory_remove_by_count(u32 lmbs_to_remove,
|
|
|
|
struct property *prop)
|
|
|
|
{
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop)
|
|
|
|
{
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
2015-02-10 19:49:22 +00:00
|
|
|
|
2013-04-29 22:08:22 +00:00
|
|
|
#endif /* CONFIG_MEMORY_HOTREMOVE */
|
2008-07-03 03:22:39 +00:00
|
|
|
|
2015-02-10 19:48:25 +00:00
|
|
|
static int dlpar_add_lmb(struct of_drconf_cell *lmb)
|
|
|
|
{
|
|
|
|
struct memory_block *mem_block;
|
|
|
|
unsigned long block_sz;
|
|
|
|
int nid, rc;
|
|
|
|
|
|
|
|
if (lmb->flags & DRCONF_MEM_ASSIGNED)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
block_sz = memory_block_size_bytes();
|
|
|
|
|
|
|
|
rc = dlpar_acquire_drc(lmb->drc_index);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
|
|
|
|
/* Find the node id for this address */
|
|
|
|
nid = memory_add_physaddr_to_nid(lmb->base_addr);
|
|
|
|
|
|
|
|
/* Add the memory */
|
|
|
|
rc = add_memory(nid, lmb->base_addr, block_sz);
|
|
|
|
if (rc) {
|
|
|
|
dlpar_release_drc(lmb->drc_index);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Register this block of memory */
|
|
|
|
rc = memblock_add(lmb->base_addr, block_sz);
|
|
|
|
if (rc) {
|
|
|
|
remove_memory(nid, lmb->base_addr, block_sz);
|
|
|
|
dlpar_release_drc(lmb->drc_index);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
mem_block = lmb_to_memblock(lmb);
|
|
|
|
if (!mem_block) {
|
|
|
|
remove_memory(nid, lmb->base_addr, block_sz);
|
|
|
|
dlpar_release_drc(lmb->drc_index);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = device_online(&mem_block->dev);
|
|
|
|
put_device(&mem_block->dev);
|
|
|
|
if (rc) {
|
|
|
|
remove_memory(nid, lmb->base_addr, block_sz);
|
|
|
|
dlpar_release_drc(lmb->drc_index);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
lmb->flags |= DRCONF_MEM_ASSIGNED;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int dlpar_memory_add_by_count(u32 lmbs_to_add, struct property *prop)
|
|
|
|
{
|
|
|
|
struct of_drconf_cell *lmbs;
|
|
|
|
u32 num_lmbs, *p;
|
|
|
|
int lmbs_available = 0;
|
|
|
|
int lmbs_added = 0;
|
|
|
|
int i, rc;
|
|
|
|
|
|
|
|
pr_info("Attempting to hot-add %d LMB(s)\n", lmbs_to_add);
|
|
|
|
|
|
|
|
if (lmbs_to_add == 0)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
p = prop->value;
|
|
|
|
num_lmbs = *p++;
|
|
|
|
lmbs = (struct of_drconf_cell *)p;
|
|
|
|
|
|
|
|
/* Validate that there are enough LMBs to satisfy the request */
|
|
|
|
for (i = 0; i < num_lmbs; i++) {
|
|
|
|
if (!(lmbs[i].flags & DRCONF_MEM_ASSIGNED))
|
|
|
|
lmbs_available++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (lmbs_available < lmbs_to_add)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
for (i = 0; i < num_lmbs && lmbs_to_add != lmbs_added; i++) {
|
|
|
|
rc = dlpar_add_lmb(&lmbs[i]);
|
|
|
|
if (rc)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
lmbs_added++;
|
|
|
|
|
|
|
|
/* Mark this lmb so we can remove it later if all of the
|
|
|
|
* requested LMBs cannot be added.
|
|
|
|
*/
|
|
|
|
lmbs[i].reserved = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (lmbs_added != lmbs_to_add) {
|
2015-02-10 19:49:22 +00:00
|
|
|
pr_err("Memory hot-add failed, removing any added LMBs\n");
|
|
|
|
|
|
|
|
for (i = 0; i < num_lmbs; i++) {
|
|
|
|
if (!lmbs[i].reserved)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
rc = dlpar_remove_lmb(&lmbs[i]);
|
|
|
|
if (rc)
|
|
|
|
pr_err("Failed to remove LMB, drc index %x\n",
|
|
|
|
be32_to_cpu(lmbs[i].drc_index));
|
|
|
|
}
|
2015-02-10 19:48:25 +00:00
|
|
|
rc = -EINVAL;
|
|
|
|
} else {
|
|
|
|
for (i = 0; i < num_lmbs; i++) {
|
|
|
|
if (!lmbs[i].reserved)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
pr_info("Memory at %llx (drc index %x) was hot-added\n",
|
|
|
|
lmbs[i].base_addr, lmbs[i].drc_index);
|
|
|
|
lmbs[i].reserved = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int dlpar_memory_add_by_index(u32 drc_index, struct property *prop)
|
|
|
|
{
|
|
|
|
struct of_drconf_cell *lmbs;
|
|
|
|
u32 num_lmbs, *p;
|
|
|
|
int i, lmb_found;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
pr_info("Attempting to hot-add LMB, drc index %x\n", drc_index);
|
|
|
|
|
|
|
|
p = prop->value;
|
|
|
|
num_lmbs = *p++;
|
|
|
|
lmbs = (struct of_drconf_cell *)p;
|
|
|
|
|
|
|
|
lmb_found = 0;
|
|
|
|
for (i = 0; i < num_lmbs; i++) {
|
|
|
|
if (lmbs[i].drc_index == drc_index) {
|
|
|
|
lmb_found = 1;
|
|
|
|
rc = dlpar_add_lmb(&lmbs[i]);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!lmb_found)
|
|
|
|
rc = -EINVAL;
|
|
|
|
|
|
|
|
if (rc)
|
|
|
|
pr_info("Failed to hot-add memory, drc index %x\n", drc_index);
|
|
|
|
else
|
|
|
|
pr_info("Memory at %llx (drc index %x) was hot-added\n",
|
|
|
|
lmbs[i].base_addr, drc_index);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void dlpar_update_drconf_property(struct device_node *dn,
|
|
|
|
struct property *prop)
|
|
|
|
{
|
|
|
|
struct of_drconf_cell *lmbs;
|
|
|
|
u32 num_lmbs, *p;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/* Convert the property back to BE */
|
|
|
|
p = prop->value;
|
|
|
|
num_lmbs = *p;
|
|
|
|
*p = cpu_to_be32(*p);
|
|
|
|
p++;
|
|
|
|
|
|
|
|
lmbs = (struct of_drconf_cell *)p;
|
|
|
|
for (i = 0; i < num_lmbs; i++) {
|
|
|
|
lmbs[i].base_addr = cpu_to_be64(lmbs[i].base_addr);
|
|
|
|
lmbs[i].drc_index = cpu_to_be32(lmbs[i].drc_index);
|
|
|
|
lmbs[i].flags = cpu_to_be32(lmbs[i].flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
rtas_hp_event = true;
|
|
|
|
of_update_property(dn, prop);
|
|
|
|
rtas_hp_event = false;
|
|
|
|
}
|
|
|
|
|
powerpc/pseries: Create new device hotplug entry point
The current hotplug (or dlpar) of devices (the process is generally the
same for memory, cpu, and pci) on PowerVM systems is initiated
from the HMC, which communicates the request to the partitions through
the RSCT framework. The RSCT framework then invokes the drmgr command.
The drmgr command performs the hotplug operation by doing some pieces,
such as most of the rtas calls and device tree parsing, in userspace
and make requests to the kernel to online/offline the device, update the
device tree and add/remove the device.
For PowerKVM the approach for device hotplug is to follow what is currently
being done for pci hotplug. A hotplug request is initiated from the host.
QEMU then generates an EPOW interrupt to the guest which causes the guest
to make the rtas,check-exception call. In QEMU, the rtas,check-exception call
returns a rtas hotplug event to the guest.
Please note that the current pci hotplug path for PowerKVM involves the
kernel receiving the rtas hotplug event, passing it to rtas_errd in
userspace, and having rtas_errd invoke drmgr. The drmgr command then
handles the request as described above for PowerVM systems.
There is no need for this circuitous route, we should just handle the entire
hotplug of devices in the kernel. What I am planning is to enable this
by moving the code to handle hotplug from drmgr into the kernel to
provide a single path for handling device hotplug for both PowerVM and
PowerKVM systems. This patch provides the common iframework and entry point.
For PowerKVM a future update to the kernel rtas code will recognize rtas
hotplug events returned from rtas,check-exception calls and use the common
entry point to handle hotplug of the device.
For PowerVM systems, This patch creates /sys/kernel/dlpar that can be
used by the drmgr command to initiate hotplug requests. In order to do
this a string of the format "<resource> <action> <id_type> <id>" is
written to this file. The string consists of a resource (cpu, memory, pci,
phb), an action (add or remove), an id_type (count, drc index, drc name),
and the corresponding id. The kernel will parse the string and create a
rtas hotplug section that can be passed to the common entry point for
handling hotplug requests.
It should be noted that there is no chance of updating how we receive
hotplug (dlpar) requests from the HMC on PowerVM systems.
Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2015-02-10 19:47:02 +00:00
|
|
|
int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
|
|
|
|
{
|
2015-02-10 19:48:25 +00:00
|
|
|
struct device_node *dn;
|
|
|
|
struct property *prop;
|
|
|
|
u32 count, drc_index;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
count = hp_elog->_drc_u.drc_count;
|
|
|
|
drc_index = hp_elog->_drc_u.drc_index;
|
powerpc/pseries: Create new device hotplug entry point
The current hotplug (or dlpar) of devices (the process is generally the
same for memory, cpu, and pci) on PowerVM systems is initiated
from the HMC, which communicates the request to the partitions through
the RSCT framework. The RSCT framework then invokes the drmgr command.
The drmgr command performs the hotplug operation by doing some pieces,
such as most of the rtas calls and device tree parsing, in userspace
and make requests to the kernel to online/offline the device, update the
device tree and add/remove the device.
For PowerKVM the approach for device hotplug is to follow what is currently
being done for pci hotplug. A hotplug request is initiated from the host.
QEMU then generates an EPOW interrupt to the guest which causes the guest
to make the rtas,check-exception call. In QEMU, the rtas,check-exception call
returns a rtas hotplug event to the guest.
Please note that the current pci hotplug path for PowerKVM involves the
kernel receiving the rtas hotplug event, passing it to rtas_errd in
userspace, and having rtas_errd invoke drmgr. The drmgr command then
handles the request as described above for PowerVM systems.
There is no need for this circuitous route, we should just handle the entire
hotplug of devices in the kernel. What I am planning is to enable this
by moving the code to handle hotplug from drmgr into the kernel to
provide a single path for handling device hotplug for both PowerVM and
PowerKVM systems. This patch provides the common iframework and entry point.
For PowerKVM a future update to the kernel rtas code will recognize rtas
hotplug events returned from rtas,check-exception calls and use the common
entry point to handle hotplug of the device.
For PowerVM systems, This patch creates /sys/kernel/dlpar that can be
used by the drmgr command to initiate hotplug requests. In order to do
this a string of the format "<resource> <action> <id_type> <id>" is
written to this file. The string consists of a resource (cpu, memory, pci,
phb), an action (add or remove), an id_type (count, drc index, drc name),
and the corresponding id. The kernel will parse the string and create a
rtas hotplug section that can be passed to the common entry point for
handling hotplug requests.
It should be noted that there is no chance of updating how we receive
hotplug (dlpar) requests from the HMC on PowerVM systems.
Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2015-02-10 19:47:02 +00:00
|
|
|
|
|
|
|
lock_device_hotplug();
|
|
|
|
|
2015-02-10 19:48:25 +00:00
|
|
|
dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
|
2015-04-07 14:53:46 +00:00
|
|
|
if (!dn) {
|
|
|
|
rc = -EINVAL;
|
|
|
|
goto dlpar_memory_out;
|
|
|
|
}
|
2015-02-10 19:48:25 +00:00
|
|
|
|
|
|
|
prop = dlpar_clone_drconf_property(dn);
|
|
|
|
if (!prop) {
|
2015-04-07 14:53:46 +00:00
|
|
|
rc = -EINVAL;
|
|
|
|
goto dlpar_memory_out;
|
2015-02-10 19:48:25 +00:00
|
|
|
}
|
|
|
|
|
powerpc/pseries: Create new device hotplug entry point
The current hotplug (or dlpar) of devices (the process is generally the
same for memory, cpu, and pci) on PowerVM systems is initiated
from the HMC, which communicates the request to the partitions through
the RSCT framework. The RSCT framework then invokes the drmgr command.
The drmgr command performs the hotplug operation by doing some pieces,
such as most of the rtas calls and device tree parsing, in userspace
and make requests to the kernel to online/offline the device, update the
device tree and add/remove the device.
For PowerKVM the approach for device hotplug is to follow what is currently
being done for pci hotplug. A hotplug request is initiated from the host.
QEMU then generates an EPOW interrupt to the guest which causes the guest
to make the rtas,check-exception call. In QEMU, the rtas,check-exception call
returns a rtas hotplug event to the guest.
Please note that the current pci hotplug path for PowerKVM involves the
kernel receiving the rtas hotplug event, passing it to rtas_errd in
userspace, and having rtas_errd invoke drmgr. The drmgr command then
handles the request as described above for PowerVM systems.
There is no need for this circuitous route, we should just handle the entire
hotplug of devices in the kernel. What I am planning is to enable this
by moving the code to handle hotplug from drmgr into the kernel to
provide a single path for handling device hotplug for both PowerVM and
PowerKVM systems. This patch provides the common iframework and entry point.
For PowerKVM a future update to the kernel rtas code will recognize rtas
hotplug events returned from rtas,check-exception calls and use the common
entry point to handle hotplug of the device.
For PowerVM systems, This patch creates /sys/kernel/dlpar that can be
used by the drmgr command to initiate hotplug requests. In order to do
this a string of the format "<resource> <action> <id_type> <id>" is
written to this file. The string consists of a resource (cpu, memory, pci,
phb), an action (add or remove), an id_type (count, drc index, drc name),
and the corresponding id. The kernel will parse the string and create a
rtas hotplug section that can be passed to the common entry point for
handling hotplug requests.
It should be noted that there is no chance of updating how we receive
hotplug (dlpar) requests from the HMC on PowerVM systems.
Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2015-02-10 19:47:02 +00:00
|
|
|
switch (hp_elog->action) {
|
2015-02-10 19:48:25 +00:00
|
|
|
case PSERIES_HP_ELOG_ACTION_ADD:
|
|
|
|
if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
|
|
|
|
rc = dlpar_memory_add_by_count(count, prop);
|
|
|
|
else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
|
|
|
|
rc = dlpar_memory_add_by_index(drc_index, prop);
|
|
|
|
else
|
|
|
|
rc = -EINVAL;
|
|
|
|
break;
|
2015-02-10 19:49:22 +00:00
|
|
|
case PSERIES_HP_ELOG_ACTION_REMOVE:
|
|
|
|
if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
|
|
|
|
rc = dlpar_memory_remove_by_count(count, prop);
|
|
|
|
else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
|
|
|
|
rc = dlpar_memory_remove_by_index(drc_index, prop);
|
|
|
|
else
|
|
|
|
rc = -EINVAL;
|
|
|
|
break;
|
powerpc/pseries: Create new device hotplug entry point
The current hotplug (or dlpar) of devices (the process is generally the
same for memory, cpu, and pci) on PowerVM systems is initiated
from the HMC, which communicates the request to the partitions through
the RSCT framework. The RSCT framework then invokes the drmgr command.
The drmgr command performs the hotplug operation by doing some pieces,
such as most of the rtas calls and device tree parsing, in userspace
and make requests to the kernel to online/offline the device, update the
device tree and add/remove the device.
For PowerKVM the approach for device hotplug is to follow what is currently
being done for pci hotplug. A hotplug request is initiated from the host.
QEMU then generates an EPOW interrupt to the guest which causes the guest
to make the rtas,check-exception call. In QEMU, the rtas,check-exception call
returns a rtas hotplug event to the guest.
Please note that the current pci hotplug path for PowerKVM involves the
kernel receiving the rtas hotplug event, passing it to rtas_errd in
userspace, and having rtas_errd invoke drmgr. The drmgr command then
handles the request as described above for PowerVM systems.
There is no need for this circuitous route, we should just handle the entire
hotplug of devices in the kernel. What I am planning is to enable this
by moving the code to handle hotplug from drmgr into the kernel to
provide a single path for handling device hotplug for both PowerVM and
PowerKVM systems. This patch provides the common iframework and entry point.
For PowerKVM a future update to the kernel rtas code will recognize rtas
hotplug events returned from rtas,check-exception calls and use the common
entry point to handle hotplug of the device.
For PowerVM systems, This patch creates /sys/kernel/dlpar that can be
used by the drmgr command to initiate hotplug requests. In order to do
this a string of the format "<resource> <action> <id_type> <id>" is
written to this file. The string consists of a resource (cpu, memory, pci,
phb), an action (add or remove), an id_type (count, drc index, drc name),
and the corresponding id. The kernel will parse the string and create a
rtas hotplug section that can be passed to the common entry point for
handling hotplug requests.
It should be noted that there is no chance of updating how we receive
hotplug (dlpar) requests from the HMC on PowerVM systems.
Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2015-02-10 19:47:02 +00:00
|
|
|
default:
|
|
|
|
pr_err("Invalid action (%d) specified\n", hp_elog->action);
|
|
|
|
rc = -EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2015-02-10 19:48:25 +00:00
|
|
|
if (rc)
|
|
|
|
dlpar_free_drconf_property(prop);
|
|
|
|
else
|
|
|
|
dlpar_update_drconf_property(dn, prop);
|
|
|
|
|
2015-04-07 14:53:46 +00:00
|
|
|
dlpar_memory_out:
|
2015-02-10 19:48:25 +00:00
|
|
|
of_node_put(dn);
|
powerpc/pseries: Create new device hotplug entry point
The current hotplug (or dlpar) of devices (the process is generally the
same for memory, cpu, and pci) on PowerVM systems is initiated
from the HMC, which communicates the request to the partitions through
the RSCT framework. The RSCT framework then invokes the drmgr command.
The drmgr command performs the hotplug operation by doing some pieces,
such as most of the rtas calls and device tree parsing, in userspace
and make requests to the kernel to online/offline the device, update the
device tree and add/remove the device.
For PowerKVM the approach for device hotplug is to follow what is currently
being done for pci hotplug. A hotplug request is initiated from the host.
QEMU then generates an EPOW interrupt to the guest which causes the guest
to make the rtas,check-exception call. In QEMU, the rtas,check-exception call
returns a rtas hotplug event to the guest.
Please note that the current pci hotplug path for PowerKVM involves the
kernel receiving the rtas hotplug event, passing it to rtas_errd in
userspace, and having rtas_errd invoke drmgr. The drmgr command then
handles the request as described above for PowerVM systems.
There is no need for this circuitous route, we should just handle the entire
hotplug of devices in the kernel. What I am planning is to enable this
by moving the code to handle hotplug from drmgr into the kernel to
provide a single path for handling device hotplug for both PowerVM and
PowerKVM systems. This patch provides the common iframework and entry point.
For PowerKVM a future update to the kernel rtas code will recognize rtas
hotplug events returned from rtas,check-exception calls and use the common
entry point to handle hotplug of the device.
For PowerVM systems, This patch creates /sys/kernel/dlpar that can be
used by the drmgr command to initiate hotplug requests. In order to do
this a string of the format "<resource> <action> <id_type> <id>" is
written to this file. The string consists of a resource (cpu, memory, pci,
phb), an action (add or remove), an id_type (count, drc index, drc name),
and the corresponding id. The kernel will parse the string and create a
rtas hotplug section that can be passed to the common entry point for
handling hotplug requests.
It should be noted that there is no chance of updating how we receive
hotplug (dlpar) requests from the HMC on PowerVM systems.
Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2015-02-10 19:47:02 +00:00
|
|
|
unlock_device_hotplug();
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2014-01-27 16:54:06 +00:00
|
|
|
static int pseries_add_mem_node(struct device_node *np)
|
2008-04-18 20:33:52 +00:00
|
|
|
{
|
|
|
|
const char *type;
|
2014-08-19 20:44:57 +00:00
|
|
|
const __be32 *regs;
|
2008-07-03 03:20:58 +00:00
|
|
|
unsigned long base;
|
2010-07-23 00:35:52 +00:00
|
|
|
unsigned int lmb_size;
|
2008-04-18 20:33:52 +00:00
|
|
|
int ret = -EINVAL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check to see if we are actually adding memory
|
|
|
|
*/
|
|
|
|
type = of_get_property(np, "device_type", NULL);
|
|
|
|
if (type == NULL || strcmp(type, "memory") != 0)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/*
|
2010-07-12 04:36:09 +00:00
|
|
|
* Find the base and size of the memblock
|
2008-04-18 20:33:52 +00:00
|
|
|
*/
|
|
|
|
regs = of_get_property(np, "reg", NULL);
|
|
|
|
if (!regs)
|
|
|
|
return ret;
|
|
|
|
|
2014-08-19 20:44:57 +00:00
|
|
|
base = be64_to_cpu(*(unsigned long *)regs);
|
|
|
|
lmb_size = be32_to_cpu(regs[3]);
|
2008-04-18 20:33:52 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Update memory region to represent the memory add
|
|
|
|
*/
|
2010-07-23 00:35:52 +00:00
|
|
|
ret = memblock_add(base, lmb_size);
|
2008-07-03 03:22:39 +00:00
|
|
|
return (ret < 0) ? -EINVAL : 0;
|
|
|
|
}
|
|
|
|
|
2014-11-24 17:58:01 +00:00
|
|
|
static int pseries_update_drconf_memory(struct of_reconfig_data *pr)
|
2008-07-03 03:22:39 +00:00
|
|
|
{
|
2012-10-02 16:57:57 +00:00
|
|
|
struct of_drconf_cell *new_drmem, *old_drmem;
|
2011-01-20 16:45:20 +00:00
|
|
|
unsigned long memblock_size;
|
2012-10-02 16:57:57 +00:00
|
|
|
u32 entries;
|
2014-08-19 20:44:57 +00:00
|
|
|
__be32 *p;
|
2012-10-02 16:57:57 +00:00
|
|
|
int i, rc = -EINVAL;
|
2008-07-03 03:22:39 +00:00
|
|
|
|
2015-02-10 19:48:25 +00:00
|
|
|
if (rtas_hp_event)
|
|
|
|
return 0;
|
|
|
|
|
2014-06-04 07:50:47 +00:00
|
|
|
memblock_size = pseries_memory_block_size();
|
2011-01-20 16:45:20 +00:00
|
|
|
if (!memblock_size)
|
2008-07-03 03:22:39 +00:00
|
|
|
return -EINVAL;
|
|
|
|
|
2014-08-19 20:44:57 +00:00
|
|
|
p = (__be32 *) pr->old_prop->value;
|
2012-10-02 16:57:57 +00:00
|
|
|
if (!p)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* The first int of the property is the number of lmb's described
|
|
|
|
* by the property. This is followed by an array of of_drconf_cell
|
2014-08-07 05:11:58 +00:00
|
|
|
* entries. Get the number of entries and skip to the array of
|
2012-10-02 16:57:57 +00:00
|
|
|
* of_drconf_cell's.
|
|
|
|
*/
|
2014-08-19 20:44:57 +00:00
|
|
|
entries = be32_to_cpu(*p++);
|
2012-10-02 16:57:57 +00:00
|
|
|
old_drmem = (struct of_drconf_cell *)p;
|
|
|
|
|
2014-08-19 20:44:57 +00:00
|
|
|
p = (__be32 *)pr->prop->value;
|
2012-10-02 16:57:57 +00:00
|
|
|
p++;
|
|
|
|
new_drmem = (struct of_drconf_cell *)p;
|
|
|
|
|
|
|
|
for (i = 0; i < entries; i++) {
|
2014-08-19 20:44:57 +00:00
|
|
|
if ((be32_to_cpu(old_drmem[i].flags) & DRCONF_MEM_ASSIGNED) &&
|
|
|
|
(!(be32_to_cpu(new_drmem[i].flags) & DRCONF_MEM_ASSIGNED))) {
|
|
|
|
rc = pseries_remove_memblock(
|
|
|
|
be64_to_cpu(old_drmem[i].base_addr),
|
2012-10-02 16:57:57 +00:00
|
|
|
memblock_size);
|
|
|
|
break;
|
2014-08-19 20:44:57 +00:00
|
|
|
} else if ((!(be32_to_cpu(old_drmem[i].flags) &
|
|
|
|
DRCONF_MEM_ASSIGNED)) &&
|
|
|
|
(be32_to_cpu(new_drmem[i].flags) &
|
|
|
|
DRCONF_MEM_ASSIGNED)) {
|
|
|
|
rc = memblock_add(be64_to_cpu(old_drmem[i].base_addr),
|
2012-10-02 16:57:57 +00:00
|
|
|
memblock_size);
|
|
|
|
rc = (rc < 0) ? -EINVAL : 0;
|
|
|
|
break;
|
|
|
|
}
|
2008-07-03 03:22:39 +00:00
|
|
|
}
|
|
|
|
return rc;
|
2008-04-18 20:33:52 +00:00
|
|
|
}
|
|
|
|
|
2008-04-18 20:33:50 +00:00
|
|
|
static int pseries_memory_notifier(struct notifier_block *nb,
|
2014-11-24 17:58:01 +00:00
|
|
|
unsigned long action, void *data)
|
2008-04-18 20:33:50 +00:00
|
|
|
{
|
2014-11-24 17:58:01 +00:00
|
|
|
struct of_reconfig_data *rd = data;
|
2011-06-21 03:35:56 +00:00
|
|
|
int err = 0;
|
2008-04-18 20:33:50 +00:00
|
|
|
|
|
|
|
switch (action) {
|
2012-10-02 16:57:57 +00:00
|
|
|
case OF_RECONFIG_ATTACH_NODE:
|
2014-11-24 17:58:01 +00:00
|
|
|
err = pseries_add_mem_node(rd->dn);
|
2008-04-18 20:33:50 +00:00
|
|
|
break;
|
2012-10-02 16:57:57 +00:00
|
|
|
case OF_RECONFIG_DETACH_NODE:
|
2014-11-24 17:58:01 +00:00
|
|
|
err = pseries_remove_mem_node(rd->dn);
|
2008-04-18 20:33:50 +00:00
|
|
|
break;
|
2012-10-02 16:57:57 +00:00
|
|
|
case OF_RECONFIG_UPDATE_PROPERTY:
|
2014-11-24 17:58:01 +00:00
|
|
|
if (!strcmp(rd->prop->name, "ibm,dynamic-memory"))
|
|
|
|
err = pseries_update_drconf_memory(rd);
|
2008-04-18 20:33:50 +00:00
|
|
|
break;
|
|
|
|
}
|
2011-06-21 03:35:56 +00:00
|
|
|
return notifier_from_errno(err);
|
2008-04-18 20:33:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct notifier_block pseries_mem_nb = {
|
|
|
|
.notifier_call = pseries_memory_notifier,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int __init pseries_memory_hotplug_init(void)
|
|
|
|
{
|
|
|
|
if (firmware_has_feature(FW_FEATURE_LPAR))
|
2012-10-02 16:57:57 +00:00
|
|
|
of_reconfig_notifier_register(&pseries_mem_nb);
|
2008-04-18 20:33:50 +00:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
machine_device_initcall(pseries, pseries_memory_hotplug_init);
|