From 5e33bc4165f3edd558d9633002465a95230effc1 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 28 Aug 2013 21:41:01 +0200
Subject: [PATCH 1/2] driver core / ACPI: Avoid device hot remove locking
 issues

device_hotplug_lock is held around the acpi_bus_trim() call in
acpi_scan_hot_remove() which generally removes devices (it removes
ACPI device objects at least, but it may also remove "physical"
device objects through .detach() callbacks of ACPI scan handlers).
Thus, potentially, device sysfs attributes are removed under that
lock and to remove those attributes it is necessary to hold the
s_active references of their directory entries for writing.

On the other hand, the execution of a .show() or .store() callback
from a sysfs attribute is carried out with that attribute's s_active
reference held for reading.  Consequently, if any device sysfs
attribute that may be removed from within acpi_scan_hot_remove()
through acpi_bus_trim() has a .store() or .show() callback which
acquires device_hotplug_lock, the execution of that callback may
deadlock with the removal of the attribute.  [Unfortunately, the
"online" device attribute of CPUs and memory blocks is one of them.]

To avoid such deadlocks, make all of the sysfs attribute callbacks
that need to lock device hotplug, for example store_online(), use
a special function, lock_device_hotplug_sysfs(), to lock device
hotplug and return the result of that function immediately if it is
not zero.  This will cause the s_active reference of the directory
entry in question to be released and the syscall to be restarted
if device_hotplug_lock cannot be acquired.

[show_online() actually doesn't need to lock device hotplug, but
it is useful to serialize it with respect to device_offline() and
device_online() for the same device (in case user space attempts to
run them concurrently) which can be done with the help of
device_lock().]

Reported-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Reported-and-tested-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Toshi Kani <toshi.kani@hp.com>
---
 drivers/base/core.c    | 43 +++++++++++++++++++++++++++---------------
 drivers/base/memory.c  |  4 +++-
 include/linux/device.h |  1 +
 3 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 8856d74545d9..ac419a15fcd4 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -49,6 +49,28 @@ static struct kobject *dev_kobj;
 struct kobject *sysfs_dev_char_kobj;
 struct kobject *sysfs_dev_block_kobj;
 
+static DEFINE_MUTEX(device_hotplug_lock);
+
+void lock_device_hotplug(void)
+{
+	mutex_lock(&device_hotplug_lock);
+}
+
+void unlock_device_hotplug(void)
+{
+	mutex_unlock(&device_hotplug_lock);
+}
+
+int lock_device_hotplug_sysfs(void)
+{
+	if (mutex_trylock(&device_hotplug_lock))
+		return 0;
+
+	/* Avoid busy looping (5 ms of sleep should do). */
+	msleep(5);
+	return restart_syscall();
+}
+
 #ifdef CONFIG_BLOCK
 static inline int device_is_not_partition(struct device *dev)
 {
@@ -408,9 +430,9 @@ static ssize_t show_online(struct device *dev, struct device_attribute *attr,
 {
 	bool val;
 
-	lock_device_hotplug();
+	device_lock(dev);
 	val = !dev->offline;
-	unlock_device_hotplug();
+	device_unlock(dev);
 	return sprintf(buf, "%u\n", val);
 }
 
@@ -424,7 +446,10 @@ static ssize_t store_online(struct device *dev, struct device_attribute *attr,
 	if (ret < 0)
 		return ret;
 
-	lock_device_hotplug();
+	ret = lock_device_hotplug_sysfs();
+	if (ret)
+		return ret;
+
 	ret = val ? device_online(dev) : device_offline(dev);
 	unlock_device_hotplug();
 	return ret < 0 ? ret : count;
@@ -1479,18 +1504,6 @@ EXPORT_SYMBOL_GPL(put_device);
 EXPORT_SYMBOL_GPL(device_create_file);
 EXPORT_SYMBOL_GPL(device_remove_file);
 
-static DEFINE_MUTEX(device_hotplug_lock);
-
-void lock_device_hotplug(void)
-{
-	mutex_lock(&device_hotplug_lock);
-}
-
-void unlock_device_hotplug(void)
-{
-	mutex_unlock(&device_hotplug_lock);
-}
-
 static int device_check_offline(struct device *dev, void *not_used)
 {
 	int ret;
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 2b7813ec6d02..6f4c99ff1ce7 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -351,7 +351,9 @@ store_mem_state(struct device *dev,
 
 	mem = container_of(dev, struct memory_block, dev);
 
-	lock_device_hotplug();
+	ret = lock_device_hotplug_sysfs();
+	if (ret)
+		return ret;
 
 	if (!strncmp(buf, "online_kernel", min_t(int, count, 13))) {
 		offline = false;
diff --git a/include/linux/device.h b/include/linux/device.h
index 22b546a58591..545a04285120 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -895,6 +895,7 @@ static inline bool device_supports_offline(struct device *dev)
 
 extern void lock_device_hotplug(void);
 extern void unlock_device_hotplug(void);
+extern int lock_device_hotplug_sysfs(void);
 extern int device_offline(struct device *dev);
 extern int device_online(struct device *dev);
 /*

From f943db40c29f3c82a56956e9ca36f21d6d855db9 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 28 Aug 2013 21:41:07 +0200
Subject: [PATCH 2/2] ACPI / hotplug: Remove containers synchronously

The current protocol for handling hot remove of containers is very
fragile and causes acpi_eject_store() to acquire acpi_scan_lock
which may deadlock with the removal of the device that it is called
for (the reason is that device sysfs attributes cannot be removed
while their callbacks are being executed and ACPI device objects
are removed under acpi_scan_lock).

The problem is related to the fact that containers are handled by
acpi_bus_device_eject() in a special way, which is to emit an
offline uevent instead of just removing the container.  Then, user
space is expected to handle that uevent and use the container's
"eject" attribute to actually remove it.  That is fragile, because
user space may fail to complete the ejection (for example, by not
using the container's "eject" attribute at all) leaving the BIOS
kind of in a limbo.  Moreover, if the eject event is not signaled
for a container itself, but for its parent device object (or
generally, for an ancestor above it in the ACPI namespace), the
container will be removed straight away without doing that whole
dance.

For this reason, modify acpi_bus_device_eject() to remove containers
synchronously like any other objects (user space will get its uevent
anyway in case it does some other things in response to it) and
remove the eject_pending ACPI device flag that is not used any more.
This way acpi_eject_store() doesn't have a reason to acquire
acpi_scan_lock any more and one possible deadlock scenario goes
away (plus the code is simplified a bit).

Reported-and-tested-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Toshi Kani <toshi.kani@hp.com>
---
 drivers/acpi/scan.c     | 49 +++++++++++++----------------------------
 include/acpi/acpi_bus.h |  3 +--
 2 files changed, 16 insertions(+), 36 deletions(-)

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 8a46c924effd..e2f6d9dbdf0d 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -307,6 +307,7 @@ static void acpi_bus_device_eject(void *context)
 	struct acpi_device *device = NULL;
 	struct acpi_scan_handler *handler;
 	u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE;
+	int error;
 
 	mutex_lock(&acpi_scan_lock);
 
@@ -321,17 +322,13 @@ static void acpi_bus_device_eject(void *context)
 	}
 	acpi_evaluate_hotplug_ost(handle, ACPI_NOTIFY_EJECT_REQUEST,
 				  ACPI_OST_SC_EJECT_IN_PROGRESS, NULL);
-	if (handler->hotplug.mode == AHM_CONTAINER) {
-		device->flags.eject_pending = true;
+	if (handler->hotplug.mode == AHM_CONTAINER)
 		kobject_uevent(&device->dev.kobj, KOBJ_OFFLINE);
-	} else {
-		int error;
 
-		get_device(&device->dev);
-		error = acpi_scan_hot_remove(device);
-		if (error)
-			goto err_out;
-	}
+	get_device(&device->dev);
+	error = acpi_scan_hot_remove(device);
+	if (error)
+		goto err_out;
 
  out:
 	mutex_unlock(&acpi_scan_lock);
@@ -516,7 +513,6 @@ acpi_eject_store(struct device *d, struct device_attribute *attr,
 	struct acpi_eject_event *ej_event;
 	acpi_object_type not_used;
 	acpi_status status;
-	u32 ost_source;
 	int ret;
 
 	if (!count || buf[0] != '1')
@@ -530,43 +526,28 @@ acpi_eject_store(struct device *d, struct device_attribute *attr,
 	if (ACPI_FAILURE(status) || !acpi_device->flags.ejectable)
 		return -ENODEV;
 
-	mutex_lock(&acpi_scan_lock);
-
-	if (acpi_device->flags.eject_pending) {
-		/* ACPI eject notification event. */
-		ost_source = ACPI_NOTIFY_EJECT_REQUEST;
-		acpi_device->flags.eject_pending = 0;
-	} else {
-		/* Eject initiated by user space. */
-		ost_source = ACPI_OST_EC_OSPM_EJECT;
-	}
 	ej_event = kmalloc(sizeof(*ej_event), GFP_KERNEL);
 	if (!ej_event) {
 		ret = -ENOMEM;
 		goto err_out;
 	}
-	acpi_evaluate_hotplug_ost(acpi_device->handle, ost_source,
+	acpi_evaluate_hotplug_ost(acpi_device->handle, ACPI_OST_EC_OSPM_EJECT,
 				  ACPI_OST_SC_EJECT_IN_PROGRESS, NULL);
 	ej_event->device = acpi_device;
-	ej_event->event = ost_source;
+	ej_event->event = ACPI_OST_EC_OSPM_EJECT;
 	get_device(&acpi_device->dev);
 	status = acpi_os_hotplug_execute(acpi_bus_hot_remove_device, ej_event);
-	if (ACPI_FAILURE(status)) {
-		put_device(&acpi_device->dev);
-		kfree(ej_event);
-		ret = status == AE_NO_MEMORY ? -ENOMEM : -EAGAIN;
-		goto err_out;
-	}
-	ret = count;
+	if (ACPI_SUCCESS(status))
+		return count;
 
- out:
-	mutex_unlock(&acpi_scan_lock);
-	return ret;
+	put_device(&acpi_device->dev);
+	kfree(ej_event);
+	ret = status == AE_NO_MEMORY ? -ENOMEM : -EAGAIN;
 
  err_out:
-	acpi_evaluate_hotplug_ost(acpi_device->handle, ost_source,
+	acpi_evaluate_hotplug_ost(acpi_device->handle, ACPI_OST_EC_OSPM_EJECT,
 				  ACPI_OST_SC_NON_SPECIFIC_FAILURE, NULL);
-	goto out;
+	return ret;
 }
 
 static DEVICE_ATTR(eject, 0200, NULL, acpi_eject_store);
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 94383a70c1a3..6ff9510718d7 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -157,9 +157,8 @@ struct acpi_device_flags {
 	u32 removable:1;
 	u32 ejectable:1;
 	u32 power_manageable:1;
-	u32 eject_pending:1;
 	u32 match_driver:1;
-	u32 reserved:26;
+	u32 reserved:27;
 };
 
 /* File System */