mirror of
https://github.com/torvalds/linux.git
synced 2024-11-27 06:31:52 +00:00
softlockup: make detector be aware of task switch of processes hogging cpu
For now, soft lockup detector warns once for each case of process softlockup. But the thread 'watchdog/n' may not always get the cpu at the time slot between the task switch of two processes hogging that cpu to reset soft_watchdog_warn. An example would be two processes hogging the cpu. Process A causes the softlockup warning and is killed manually by a user. Process B immediately becomes the new process hogging the cpu preventing the softlockup code from resetting the soft_watchdog_warn variable. This case is a false negative of "warn only once for a process", as there may be a different process that is going to hog the cpu. Resolve this by saving/checking the task pointer of the hogging process and use that to reset soft_watchdog_warn too. [dzickus@redhat.com: update comment] Signed-off-by: chai wen <chaiw.fnst@cn.fujitsu.com> Signed-off-by: Don Zickus <dzickus@redhat.com> Cc: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
f775da2fc2
commit
b1a8de1f53
@ -47,6 +47,7 @@ static DEFINE_PER_CPU(bool, softlockup_touch_sync);
|
|||||||
static DEFINE_PER_CPU(bool, soft_watchdog_warn);
|
static DEFINE_PER_CPU(bool, soft_watchdog_warn);
|
||||||
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
|
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
|
||||||
static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
|
static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
|
||||||
|
static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
|
||||||
#ifdef CONFIG_HARDLOCKUP_DETECTOR
|
#ifdef CONFIG_HARDLOCKUP_DETECTOR
|
||||||
static DEFINE_PER_CPU(bool, hard_watchdog_warn);
|
static DEFINE_PER_CPU(bool, hard_watchdog_warn);
|
||||||
static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
|
static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
|
||||||
@ -333,8 +334,22 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
|
|||||||
return HRTIMER_RESTART;
|
return HRTIMER_RESTART;
|
||||||
|
|
||||||
/* only warn once */
|
/* only warn once */
|
||||||
if (__this_cpu_read(soft_watchdog_warn) == true)
|
if (__this_cpu_read(soft_watchdog_warn) == true) {
|
||||||
|
/*
|
||||||
|
* When multiple processes are causing softlockups the
|
||||||
|
* softlockup detector only warns on the first one
|
||||||
|
* because the code relies on a full quiet cycle to
|
||||||
|
* re-arm. The second process prevents the quiet cycle
|
||||||
|
* and never gets reported. Use task pointers to detect
|
||||||
|
* this.
|
||||||
|
*/
|
||||||
|
if (__this_cpu_read(softlockup_task_ptr_saved) !=
|
||||||
|
current) {
|
||||||
|
__this_cpu_write(soft_watchdog_warn, false);
|
||||||
|
__touch_watchdog();
|
||||||
|
}
|
||||||
return HRTIMER_RESTART;
|
return HRTIMER_RESTART;
|
||||||
|
}
|
||||||
|
|
||||||
if (softlockup_all_cpu_backtrace) {
|
if (softlockup_all_cpu_backtrace) {
|
||||||
/* Prevent multiple soft-lockup reports if one cpu is already
|
/* Prevent multiple soft-lockup reports if one cpu is already
|
||||||
@ -350,6 +365,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
|
|||||||
pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
|
pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
|
||||||
smp_processor_id(), duration,
|
smp_processor_id(), duration,
|
||||||
current->comm, task_pid_nr(current));
|
current->comm, task_pid_nr(current));
|
||||||
|
__this_cpu_write(softlockup_task_ptr_saved, current);
|
||||||
print_modules();
|
print_modules();
|
||||||
print_irqtrace_events(current);
|
print_irqtrace_events(current);
|
||||||
if (regs)
|
if (regs)
|
||||||
|
Loading…
Reference in New Issue
Block a user