mirror of
https://github.com/torvalds/linux.git
synced 2024-12-18 00:53:40 +00:00
powerpc/tau: Disable TAU between measurements
Enabling CONFIG_TAU_INT causes random crashes:
Unrecoverable exception 1700 at c0009414 (msr=1000)
Oops: Unrecoverable exception, sig: 6 [#1]
BE PAGE_SIZE=4K MMU=Hash SMP NR_CPUS=2 PowerMac
Modules linked in:
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.7.0-pmac-00043-gd5f545e1a8593 #5
NIP: c0009414 LR: c0009414 CTR: c00116fc
REGS: c0799eb8 TRAP: 1700 Not tainted (5.7.0-pmac-00043-gd5f545e1a8593)
MSR: 00001000 <ME> CR: 22000228 XER: 00000100
GPR00: 00000000 c0799f70 c076e300 00800000 0291c0ac 00e00000 c076e300 00049032
GPR08: 00000001 c00116fc 00000000 dfbd3200 ffffffff 007f80a8 00000000 00000000
GPR16: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 c075ce04
GPR24: c075ce04 dfff8880 c07b0000 c075ce04 00080000 00000001 c079ef98 c079ef5c
NIP [c0009414] arch_cpu_idle+0x24/0x6c
LR [c0009414] arch_cpu_idle+0x24/0x6c
Call Trace:
[c0799f70] [00000001] 0x1 (unreliable)
[c0799f80] [c0060990] do_idle+0xd8/0x17c
[c0799fa0] [c0060ba4] cpu_startup_entry+0x20/0x28
[c0799fb0] [c072d220] start_kernel+0x434/0x44c
[c0799ff0] [00003860] 0x3860
Instruction dump:
XXXXXXXX XXXXXXXX XXXXXXXX 3d20c07b XXXXXXXX XXXXXXXX XXXXXXXX 7c0802a6
XXXXXXXX XXXXXXXX XXXXXXXX 4e800421 XXXXXXXX XXXXXXXX XXXXXXXX 7d2000a6
---[ end trace 3a0c9b5cb216db6b ]---
Resolve this problem by disabling each THRMn comparator when handling
the associated THRMn interrupt and by disabling the TAU entirely when
updating THRMn thresholds.
Fixes: 1da177e4c3
("Linux-2.6.12-rc2")
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/5a0ba3dc5612c7aac596727331284a3676c08472.1599260540.git.fthain@telegraphics.com.au
This commit is contained in:
parent
5e3119e15f
commit
e63d6fb563
@ -42,8 +42,6 @@ static struct tau_temp
|
|||||||
|
|
||||||
static bool tau_int_enable;
|
static bool tau_int_enable;
|
||||||
|
|
||||||
#undef DEBUG
|
|
||||||
|
|
||||||
/* TODO: put these in a /proc interface, with some sanity checks, and maybe
|
/* TODO: put these in a /proc interface, with some sanity checks, and maybe
|
||||||
* dynamic adjustment to minimize # of interrupts */
|
* dynamic adjustment to minimize # of interrupts */
|
||||||
/* configurable values for step size and how much to expand the window when
|
/* configurable values for step size and how much to expand the window when
|
||||||
@ -67,42 +65,33 @@ static void set_thresholds(unsigned long cpu)
|
|||||||
|
|
||||||
static void TAUupdate(int cpu)
|
static void TAUupdate(int cpu)
|
||||||
{
|
{
|
||||||
unsigned thrm;
|
u32 thrm;
|
||||||
|
u32 bits = THRM1_TIV | THRM1_TIN | THRM1_V;
|
||||||
#ifdef DEBUG
|
|
||||||
printk("TAUupdate ");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* if both thresholds are crossed, the step_sizes cancel out
|
/* if both thresholds are crossed, the step_sizes cancel out
|
||||||
* and the window winds up getting expanded twice. */
|
* and the window winds up getting expanded twice. */
|
||||||
if((thrm = mfspr(SPRN_THRM1)) & THRM1_TIV){ /* is valid? */
|
thrm = mfspr(SPRN_THRM1);
|
||||||
if(thrm & THRM1_TIN){ /* crossed low threshold */
|
if ((thrm & bits) == bits) {
|
||||||
if (tau[cpu].low >= step_size){
|
mtspr(SPRN_THRM1, 0);
|
||||||
tau[cpu].low -= step_size;
|
|
||||||
tau[cpu].high -= (step_size - window_expand);
|
|
||||||
}
|
|
||||||
tau[cpu].grew = 1;
|
|
||||||
#ifdef DEBUG
|
|
||||||
printk("low threshold crossed ");
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if((thrm = mfspr(SPRN_THRM2)) & THRM1_TIV){ /* is valid? */
|
|
||||||
if(thrm & THRM1_TIN){ /* crossed high threshold */
|
|
||||||
if (tau[cpu].high <= 127-step_size){
|
|
||||||
tau[cpu].low += (step_size - window_expand);
|
|
||||||
tau[cpu].high += step_size;
|
|
||||||
}
|
|
||||||
tau[cpu].grew = 1;
|
|
||||||
#ifdef DEBUG
|
|
||||||
printk("high threshold crossed ");
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef DEBUG
|
if (tau[cpu].low >= step_size) {
|
||||||
printk("grew = %d\n", tau[cpu].grew);
|
tau[cpu].low -= step_size;
|
||||||
#endif
|
tau[cpu].high -= (step_size - window_expand);
|
||||||
|
}
|
||||||
|
tau[cpu].grew = 1;
|
||||||
|
pr_debug("%s: low threshold crossed\n", __func__);
|
||||||
|
}
|
||||||
|
thrm = mfspr(SPRN_THRM2);
|
||||||
|
if ((thrm & bits) == bits) {
|
||||||
|
mtspr(SPRN_THRM2, 0);
|
||||||
|
|
||||||
|
if (tau[cpu].high <= 127 - step_size) {
|
||||||
|
tau[cpu].low += (step_size - window_expand);
|
||||||
|
tau[cpu].high += step_size;
|
||||||
|
}
|
||||||
|
tau[cpu].grew = 1;
|
||||||
|
pr_debug("%s: high threshold crossed\n", __func__);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_TAU_INT
|
#ifdef CONFIG_TAU_INT
|
||||||
@ -127,17 +116,17 @@ void TAUException(struct pt_regs * regs)
|
|||||||
static void tau_timeout(void * info)
|
static void tau_timeout(void * info)
|
||||||
{
|
{
|
||||||
int cpu;
|
int cpu;
|
||||||
unsigned long flags;
|
|
||||||
int size;
|
int size;
|
||||||
int shrink;
|
int shrink;
|
||||||
|
|
||||||
/* disabling interrupts *should* be okay */
|
|
||||||
local_irq_save(flags);
|
|
||||||
cpu = smp_processor_id();
|
cpu = smp_processor_id();
|
||||||
|
|
||||||
if (!tau_int_enable)
|
if (!tau_int_enable)
|
||||||
TAUupdate(cpu);
|
TAUupdate(cpu);
|
||||||
|
|
||||||
|
/* Stop thermal sensor comparisons and interrupts */
|
||||||
|
mtspr(SPRN_THRM3, 0);
|
||||||
|
|
||||||
size = tau[cpu].high - tau[cpu].low;
|
size = tau[cpu].high - tau[cpu].low;
|
||||||
if (size > min_window && ! tau[cpu].grew) {
|
if (size > min_window && ! tau[cpu].grew) {
|
||||||
/* do an exponential shrink of half the amount currently over size */
|
/* do an exponential shrink of half the amount currently over size */
|
||||||
@ -159,18 +148,12 @@ static void tau_timeout(void * info)
|
|||||||
|
|
||||||
set_thresholds(cpu);
|
set_thresholds(cpu);
|
||||||
|
|
||||||
/*
|
/* Restart thermal sensor comparisons and interrupts.
|
||||||
* Do the enable every time, since otherwise a bunch of (relatively)
|
|
||||||
* complex sleep code needs to be added. One mtspr every time
|
|
||||||
* tau_timeout is called is probably not a big deal.
|
|
||||||
*
|
|
||||||
* The "PowerPC 740 and PowerPC 750 Microprocessor Datasheet"
|
* The "PowerPC 740 and PowerPC 750 Microprocessor Datasheet"
|
||||||
* recommends that "the maximum value be set in THRM3 under all
|
* recommends that "the maximum value be set in THRM3 under all
|
||||||
* conditions."
|
* conditions."
|
||||||
*/
|
*/
|
||||||
mtspr(SPRN_THRM3, THRM3_SITV(0x1fff) | THRM3_E);
|
mtspr(SPRN_THRM3, THRM3_SITV(0x1fff) | THRM3_E);
|
||||||
|
|
||||||
local_irq_restore(flags);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct workqueue_struct *tau_workq;
|
static struct workqueue_struct *tau_workq;
|
||||||
|
@ -227,7 +227,7 @@ config TAU
|
|||||||
don't assume the cpu temp is actually what /proc/cpuinfo says it is.
|
don't assume the cpu temp is actually what /proc/cpuinfo says it is.
|
||||||
|
|
||||||
config TAU_INT
|
config TAU_INT
|
||||||
bool "Interrupt driven TAU driver (DANGEROUS)"
|
bool "Interrupt driven TAU driver (EXPERIMENTAL)"
|
||||||
depends on TAU
|
depends on TAU
|
||||||
help
|
help
|
||||||
The TAU supports an interrupt driven mode which causes an interrupt
|
The TAU supports an interrupt driven mode which causes an interrupt
|
||||||
@ -235,12 +235,7 @@ config TAU_INT
|
|||||||
to get notified the temp has exceeded a range. With this option off,
|
to get notified the temp has exceeded a range. With this option off,
|
||||||
a timer is used to re-check the temperature periodically.
|
a timer is used to re-check the temperature periodically.
|
||||||
|
|
||||||
However, on some cpus it appears that the TAU interrupt hardware
|
If in doubt, say N here.
|
||||||
is buggy and can cause a situation which would lead unexplained hard
|
|
||||||
lockups.
|
|
||||||
|
|
||||||
Unless you are extending the TAU driver, or enjoy kernel/hardware
|
|
||||||
debugging, leave this option off.
|
|
||||||
|
|
||||||
config TAU_AVERAGE
|
config TAU_AVERAGE
|
||||||
bool "Average high and low temp"
|
bool "Average high and low temp"
|
||||||
|
Loading…
Reference in New Issue
Block a user