mirror of
https://github.com/torvalds/linux.git
synced 2024-11-22 20:22:09 +00:00
mm, hotplug: fix concurrent memory hot-add deadlock
There's a deadlock when concurrently hot-adding memory through the probe interface and switching a memory block from offline to online. When hot-adding memory via the probe interface, add_memory() first takes mem_hotplug_begin() and then device_lock() is later taken when registering the newly initialized memory block. This creates a lock dependency of (1) mem_hotplug.lock (2) dev->mutex. When switching a memory block from offline to online, dev->mutex is first grabbed in device_online() when the write(2) transitions an existing memory block from offline to online, and then online_pages() will take mem_hotplug_begin(). This creates a lock inversion between mem_hotplug.lock and dev->mutex. Vitaly reports that this deadlock can happen when kworker handling a probe event races with systemd-udevd switching a memory block's state. This patch requires the state transition to take mem_hotplug_begin() before dev->mutex. Hot-adding memory via the probe interface creates a memory block while holding mem_hotplug_begin(), there is no way to take dev->mutex first in this case. online_pages() and offline_pages() are only called when transitioning memory block state. We now require that mem_hotplug_begin() is taken before calling them -- this requires exporting the mem_hotplug_begin() and mem_hotplug_done() to generic code. In all hot-add and hot-remove cases, mem_hotplug_begin() is done prior to device_online(). This is all that is needed to avoid the deadlock. Signed-off-by: David Rientjes <rientjes@google.com> Reported-by: Vitaly Kuznetsov <vkuznets@redhat.com> Tested-by: Vitaly Kuznetsov <vkuznets@redhat.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net> Cc: "K. Y. Srinivasan" <kys@microsoft.com> Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com> Cc: Tang Chen <tangchen@cn.fujitsu.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Zhang Zhen <zhenzhang.zhang@huawei.com> Cc: Vladimir Davydov <vdavydov@parallels.com> Cc: Wang Nan <wangnan0@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
17e0db822b
commit
30467e0b3b
@ -219,6 +219,7 @@ static bool pages_correctly_reserved(unsigned long start_pfn)
|
|||||||
/*
|
/*
|
||||||
* MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is
|
* MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is
|
||||||
* OK to have direct references to sparsemem variables in here.
|
* OK to have direct references to sparsemem variables in here.
|
||||||
|
* Must already be protected by mem_hotplug_begin().
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
memory_block_action(unsigned long phys_index, unsigned long action, int online_type)
|
memory_block_action(unsigned long phys_index, unsigned long action, int online_type)
|
||||||
@ -286,6 +287,7 @@ static int memory_subsys_online(struct device *dev)
|
|||||||
if (mem->online_type < 0)
|
if (mem->online_type < 0)
|
||||||
mem->online_type = MMOP_ONLINE_KEEP;
|
mem->online_type = MMOP_ONLINE_KEEP;
|
||||||
|
|
||||||
|
/* Already under protection of mem_hotplug_begin() */
|
||||||
ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
|
ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
|
||||||
|
|
||||||
/* clear online_type */
|
/* clear online_type */
|
||||||
@ -328,17 +330,19 @@ store_mem_state(struct device *dev,
|
|||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Memory hotplug needs to hold mem_hotplug_begin() for probe to find
|
||||||
|
* the correct memory block to online before doing device_online(dev),
|
||||||
|
* which will take dev->mutex. Take the lock early to prevent an
|
||||||
|
* inversion, memory_subsys_online() callbacks will be implemented by
|
||||||
|
* assuming it's already protected.
|
||||||
|
*/
|
||||||
|
mem_hotplug_begin();
|
||||||
|
|
||||||
switch (online_type) {
|
switch (online_type) {
|
||||||
case MMOP_ONLINE_KERNEL:
|
case MMOP_ONLINE_KERNEL:
|
||||||
case MMOP_ONLINE_MOVABLE:
|
case MMOP_ONLINE_MOVABLE:
|
||||||
case MMOP_ONLINE_KEEP:
|
case MMOP_ONLINE_KEEP:
|
||||||
/*
|
|
||||||
* mem->online_type is not protected so there can be a
|
|
||||||
* race here. However, when racing online, the first
|
|
||||||
* will succeed and the second will just return as the
|
|
||||||
* block will already be online. The online type
|
|
||||||
* could be either one, but that is expected.
|
|
||||||
*/
|
|
||||||
mem->online_type = online_type;
|
mem->online_type = online_type;
|
||||||
ret = device_online(&mem->dev);
|
ret = device_online(&mem->dev);
|
||||||
break;
|
break;
|
||||||
@ -349,6 +353,7 @@ store_mem_state(struct device *dev,
|
|||||||
ret = -EINVAL; /* should never happen */
|
ret = -EINVAL; /* should never happen */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mem_hotplug_done();
|
||||||
err:
|
err:
|
||||||
unlock_device_hotplug();
|
unlock_device_hotplug();
|
||||||
|
|
||||||
|
@ -192,6 +192,9 @@ extern void get_page_bootmem(unsigned long ingo, struct page *page,
|
|||||||
void get_online_mems(void);
|
void get_online_mems(void);
|
||||||
void put_online_mems(void);
|
void put_online_mems(void);
|
||||||
|
|
||||||
|
void mem_hotplug_begin(void);
|
||||||
|
void mem_hotplug_done(void);
|
||||||
|
|
||||||
#else /* ! CONFIG_MEMORY_HOTPLUG */
|
#else /* ! CONFIG_MEMORY_HOTPLUG */
|
||||||
/*
|
/*
|
||||||
* Stub functions for when hotplug is off
|
* Stub functions for when hotplug is off
|
||||||
@ -231,6 +234,9 @@ static inline int try_online_node(int nid)
|
|||||||
static inline void get_online_mems(void) {}
|
static inline void get_online_mems(void) {}
|
||||||
static inline void put_online_mems(void) {}
|
static inline void put_online_mems(void) {}
|
||||||
|
|
||||||
|
static inline void mem_hotplug_begin(void) {}
|
||||||
|
static inline void mem_hotplug_done(void) {}
|
||||||
|
|
||||||
#endif /* ! CONFIG_MEMORY_HOTPLUG */
|
#endif /* ! CONFIG_MEMORY_HOTPLUG */
|
||||||
|
|
||||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||||
|
@ -104,7 +104,7 @@ void put_online_mems(void)
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mem_hotplug_begin(void)
|
void mem_hotplug_begin(void)
|
||||||
{
|
{
|
||||||
mem_hotplug.active_writer = current;
|
mem_hotplug.active_writer = current;
|
||||||
|
|
||||||
@ -119,7 +119,7 @@ static void mem_hotplug_begin(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mem_hotplug_done(void)
|
void mem_hotplug_done(void)
|
||||||
{
|
{
|
||||||
mem_hotplug.active_writer = NULL;
|
mem_hotplug.active_writer = NULL;
|
||||||
mutex_unlock(&mem_hotplug.lock);
|
mutex_unlock(&mem_hotplug.lock);
|
||||||
@ -959,6 +959,7 @@ static void node_states_set_node(int node, struct memory_notify *arg)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Must be protected by mem_hotplug_begin() */
|
||||||
int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_type)
|
int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_type)
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
@ -969,7 +970,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
|
|||||||
int ret;
|
int ret;
|
||||||
struct memory_notify arg;
|
struct memory_notify arg;
|
||||||
|
|
||||||
mem_hotplug_begin();
|
|
||||||
/*
|
/*
|
||||||
* This doesn't need a lock to do pfn_to_page().
|
* This doesn't need a lock to do pfn_to_page().
|
||||||
* The section can't be removed here because of the
|
* The section can't be removed here because of the
|
||||||
@ -977,21 +977,20 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
|
|||||||
*/
|
*/
|
||||||
zone = page_zone(pfn_to_page(pfn));
|
zone = page_zone(pfn_to_page(pfn));
|
||||||
|
|
||||||
ret = -EINVAL;
|
|
||||||
if ((zone_idx(zone) > ZONE_NORMAL ||
|
if ((zone_idx(zone) > ZONE_NORMAL ||
|
||||||
online_type == MMOP_ONLINE_MOVABLE) &&
|
online_type == MMOP_ONLINE_MOVABLE) &&
|
||||||
!can_online_high_movable(zone))
|
!can_online_high_movable(zone))
|
||||||
goto out;
|
return -EINVAL;
|
||||||
|
|
||||||
if (online_type == MMOP_ONLINE_KERNEL &&
|
if (online_type == MMOP_ONLINE_KERNEL &&
|
||||||
zone_idx(zone) == ZONE_MOVABLE) {
|
zone_idx(zone) == ZONE_MOVABLE) {
|
||||||
if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages))
|
if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages))
|
||||||
goto out;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
if (online_type == MMOP_ONLINE_MOVABLE &&
|
if (online_type == MMOP_ONLINE_MOVABLE &&
|
||||||
zone_idx(zone) == ZONE_MOVABLE - 1) {
|
zone_idx(zone) == ZONE_MOVABLE - 1) {
|
||||||
if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages))
|
if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages))
|
||||||
goto out;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Previous code may changed the zone of the pfn range */
|
/* Previous code may changed the zone of the pfn range */
|
||||||
@ -1007,7 +1006,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
|
|||||||
ret = notifier_to_errno(ret);
|
ret = notifier_to_errno(ret);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
memory_notify(MEM_CANCEL_ONLINE, &arg);
|
memory_notify(MEM_CANCEL_ONLINE, &arg);
|
||||||
goto out;
|
return ret;
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
* If this zone is not populated, then it is not in zonelist.
|
* If this zone is not populated, then it is not in zonelist.
|
||||||
@ -1031,7 +1030,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
|
|||||||
(((unsigned long long) pfn + nr_pages)
|
(((unsigned long long) pfn + nr_pages)
|
||||||
<< PAGE_SHIFT) - 1);
|
<< PAGE_SHIFT) - 1);
|
||||||
memory_notify(MEM_CANCEL_ONLINE, &arg);
|
memory_notify(MEM_CANCEL_ONLINE, &arg);
|
||||||
goto out;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
zone->present_pages += onlined_pages;
|
zone->present_pages += onlined_pages;
|
||||||
@ -1061,9 +1060,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
|
|||||||
|
|
||||||
if (onlined_pages)
|
if (onlined_pages)
|
||||||
memory_notify(MEM_ONLINE, &arg);
|
memory_notify(MEM_ONLINE, &arg);
|
||||||
out:
|
return 0;
|
||||||
mem_hotplug_done();
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
|
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
|
||||||
|
|
||||||
@ -1688,21 +1685,18 @@ static int __ref __offline_pages(unsigned long start_pfn,
|
|||||||
if (!test_pages_in_a_zone(start_pfn, end_pfn))
|
if (!test_pages_in_a_zone(start_pfn, end_pfn))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
mem_hotplug_begin();
|
|
||||||
|
|
||||||
zone = page_zone(pfn_to_page(start_pfn));
|
zone = page_zone(pfn_to_page(start_pfn));
|
||||||
node = zone_to_nid(zone);
|
node = zone_to_nid(zone);
|
||||||
nr_pages = end_pfn - start_pfn;
|
nr_pages = end_pfn - start_pfn;
|
||||||
|
|
||||||
ret = -EINVAL;
|
|
||||||
if (zone_idx(zone) <= ZONE_NORMAL && !can_offline_normal(zone, nr_pages))
|
if (zone_idx(zone) <= ZONE_NORMAL && !can_offline_normal(zone, nr_pages))
|
||||||
goto out;
|
return -EINVAL;
|
||||||
|
|
||||||
/* set above range as isolated */
|
/* set above range as isolated */
|
||||||
ret = start_isolate_page_range(start_pfn, end_pfn,
|
ret = start_isolate_page_range(start_pfn, end_pfn,
|
||||||
MIGRATE_MOVABLE, true);
|
MIGRATE_MOVABLE, true);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
return ret;
|
||||||
|
|
||||||
arg.start_pfn = start_pfn;
|
arg.start_pfn = start_pfn;
|
||||||
arg.nr_pages = nr_pages;
|
arg.nr_pages = nr_pages;
|
||||||
@ -1795,7 +1789,6 @@ repeat:
|
|||||||
writeback_set_ratelimit();
|
writeback_set_ratelimit();
|
||||||
|
|
||||||
memory_notify(MEM_OFFLINE, &arg);
|
memory_notify(MEM_OFFLINE, &arg);
|
||||||
mem_hotplug_done();
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
failed_removal:
|
failed_removal:
|
||||||
@ -1805,12 +1798,10 @@ failed_removal:
|
|||||||
memory_notify(MEM_CANCEL_OFFLINE, &arg);
|
memory_notify(MEM_CANCEL_OFFLINE, &arg);
|
||||||
/* pushback to free area */
|
/* pushback to free area */
|
||||||
undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
|
undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
|
||||||
|
|
||||||
out:
|
|
||||||
mem_hotplug_done();
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Must be protected by mem_hotplug_begin() */
|
||||||
int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
|
int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
|
||||||
{
|
{
|
||||||
return __offline_pages(start_pfn, start_pfn + nr_pages, 120 * HZ);
|
return __offline_pages(start_pfn, start_pfn + nr_pages, 120 * HZ);
|
||||||
|
Loading…
Reference in New Issue
Block a user