From e441fd68663e298e99a99e215e0144a0eda6250d Mon Sep 17 00:00:00 2001 From: Keerthy Date: Tue, 18 Apr 2017 09:59:58 +0530 Subject: [PATCH 1/2] thermal: core: Allow orderly_poweroff to be called only once thermal_zone_device_check --> thermal_zone_device_update --> handle_thermal_trip --> handle_critical_trips --> orderly_poweroff The above sequence happens every 250/500 mS based on the configuration. The orderly_poweroff function is getting called every 250/500 mS. With a full fledged file system it takes at least 5-10 Seconds to power off gracefully. In that period due to the thermal_zone_device_check triggering periodically the thermal work queues bombard with orderly_poweroff calls multiple times eventually leading to failures in gracefully powering off the system. Make sure that orderly_poweroff is called only once. Signed-off-by: Keerthy Acked-by: Eduardo Valentin Signed-off-by: Zhang Rui --- drivers/thermal/thermal_core.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 11f0675cb7e5..8337c272d3ec 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -45,8 +45,10 @@ static LIST_HEAD(thermal_governor_list); static DEFINE_MUTEX(thermal_list_lock); static DEFINE_MUTEX(thermal_governor_lock); +static DEFINE_MUTEX(poweroff_lock); static atomic_t in_suspend; +static bool power_off_triggered; static struct thermal_governor *def_governor; @@ -342,7 +344,12 @@ static void handle_critical_trips(struct thermal_zone_device *tz, dev_emerg(&tz->device, "critical temperature reached(%d C),shutting down\n", tz->temperature / 1000); - orderly_poweroff(true); + mutex_lock(&poweroff_lock); + if (!power_off_triggered) { + orderly_poweroff(true); + power_off_triggered = true; + } + mutex_unlock(&poweroff_lock); } } @@ -1463,6 +1470,7 @@ static int __init thermal_init(void) { int result; + mutex_init(&poweroff_lock); result = thermal_register_governors(); if (result) goto error; @@ -1497,6 +1505,7 @@ error: ida_destroy(&thermal_cdev_ida); mutex_destroy(&thermal_list_lock); mutex_destroy(&thermal_governor_lock); + mutex_destroy(&poweroff_lock); return result; } From ef1d87e06ab4d3f9a95f02517ecc50902dc233a7 Mon Sep 17 00:00:00 2001 From: Keerthy Date: Tue, 18 Apr 2017 09:59:59 +0530 Subject: [PATCH 2/2] thermal: core: Add a back up thermal shutdown mechanism orderly_poweroff is triggered when a graceful shutdown of system is desired. This may be used in many critical states of the kernel such as when subsystems detects conditions such as critical temperature conditions. However, in certain conditions in system boot up sequences like those in the middle of driver probes being initiated, userspace will be unable to power off the system in a clean manner and leaves the system in a critical state. In cases like these, the /sbin/poweroff will return success (having forked off to attempt powering off the system. However, the system overall will fail to completely poweroff (since other modules will be probed) and the system is still functional with no userspace (since that would have shut itself off). However, there is no clean way of detecting such failure of userspace powering off the system. In such scenarios, it is necessary for a backup workqueue to be able to force a shutdown of the system when orderly shutdown is not successful after a configurable time period. Reported-by: Nishanth Menon Signed-off-by: Keerthy Acked-by: Eduardo Valentin Signed-off-by: Zhang Rui --- Documentation/thermal/sysfs-api.txt | 21 ++++++++++++ drivers/thermal/Kconfig | 17 +++++++++ drivers/thermal/thermal_core.c | 53 +++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+) diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt index ef473dc7f55e..bb9a0a53e76b 100644 --- a/Documentation/thermal/sysfs-api.txt +++ b/Documentation/thermal/sysfs-api.txt @@ -582,3 +582,24 @@ platform data is provided, this uses the step_wise throttling policy. This function serves as an arbitrator to set the state of a cooling device. It sets the cooling device to the deepest cooling state if possible. + +6. thermal_emergency_poweroff: + +On an event of critical trip temperature crossing. Thermal framework +allows the system to shutdown gracefully by calling orderly_poweroff(). +In the event of a failure of orderly_poweroff() to shut down the system +we are in danger of keeping the system alive at undesirably high +temperatures. To mitigate this high risk scenario we program a work +queue to fire after a pre-determined number of seconds to start +an emergency shutdown of the device using the kernel_power_off() +function. In case kernel_power_off() fails then finally +emergency_restart() is called in the worst case. + +The delay should be carefully profiled so as to give adequate time for +orderly_poweroff(). In case of failure of an orderly_poweroff() the +emergency poweroff kicks in after the delay has elapsed and shuts down +the system. + +If set to 0 emergency poweroff will not be supported. So a carefully +profiled non-zero positive value is a must for emergerncy poweroff to be +triggered. diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 776b34396144..74ef51dfb816 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -15,6 +15,23 @@ menuconfig THERMAL if THERMAL +config THERMAL_EMERGENCY_POWEROFF_DELAY_MS + int "Emergency poweroff delay in milli-seconds" + depends on THERMAL + default 0 + help + Thermal subsystem will issue a graceful shutdown when + critical temperatures are reached using orderly_poweroff(). In + case of failure of an orderly_poweroff(), the thermal emergency + poweroff kicks in after a delay has elapsed and shuts down the system. + This config is number of milliseconds to delay before emergency + poweroff kicks in. Similarly to the critical trip point, + the delay should be carefully profiled so as to give adequate + time for orderly_poweroff() to finish on regular execution. + If set to 0 emergency poweroff will not be supported. + + In doubt, leave as 0. + config THERMAL_HWMON bool prompt "Expose thermal sensors as hwmon device" diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 8337c272d3ec..b21b9cc2c8d6 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -324,6 +324,54 @@ static void handle_non_critical_trips(struct thermal_zone_device *tz, def_governor->throttle(tz, trip); } +/** + * thermal_emergency_poweroff_func - emergency poweroff work after a known delay + * @work: work_struct associated with the emergency poweroff function + * + * This function is called in very critical situations to force + * a kernel poweroff after a configurable timeout value. + */ +static void thermal_emergency_poweroff_func(struct work_struct *work) +{ + /* + * We have reached here after the emergency thermal shutdown + * Waiting period has expired. This means orderly_poweroff has + * not been able to shut off the system for some reason. + * Try to shut down the system immediately using kernel_power_off + * if populated + */ + WARN(1, "Attempting kernel_power_off: Temperature too high\n"); + kernel_power_off(); + + /* + * Worst of the worst case trigger emergency restart + */ + WARN(1, "Attempting emergency_restart: Temperature too high\n"); + emergency_restart(); +} + +static DECLARE_DELAYED_WORK(thermal_emergency_poweroff_work, + thermal_emergency_poweroff_func); + +/** + * thermal_emergency_poweroff - Trigger an emergency system poweroff + * + * This may be called from any critical situation to trigger a system shutdown + * after a known period of time. By default this is not scheduled. + */ +void thermal_emergency_poweroff(void) +{ + int poweroff_delay_ms = CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS; + /* + * poweroff_delay_ms must be a carefully profiled positive value. + * Its a must for thermal_emergency_poweroff_work to be scheduled + */ + if (poweroff_delay_ms <= 0) + return; + schedule_delayed_work(&thermal_emergency_poweroff_work, + msecs_to_jiffies(poweroff_delay_ms)); +} + static void handle_critical_trips(struct thermal_zone_device *tz, int trip, enum thermal_trip_type trip_type) { @@ -346,6 +394,11 @@ static void handle_critical_trips(struct thermal_zone_device *tz, tz->temperature / 1000); mutex_lock(&poweroff_lock); if (!power_off_triggered) { + /* + * Queue a backup emergency shutdown in the event of + * orderly_poweroff failure + */ + thermal_emergency_poweroff(); orderly_poweroff(true); power_off_triggered = true; }