5a0e3ad6af
percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
369 lines
10 KiB
C
369 lines
10 KiB
C
/*
|
|
* Copyright (C) 2001 Dave Engebretsen IBM Corporation
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
*/
|
|
|
|
/* Change Activity:
|
|
* 2001/09/21 : engebret : Created with minimal EPOW and HW exception support.
|
|
* End Change Activity
|
|
*/
|
|
|
|
#include <linux/errno.h>
|
|
#include <linux/threads.h>
|
|
#include <linux/kernel_stat.h>
|
|
#include <linux/signal.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/ioport.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/timex.h>
|
|
#include <linux/init.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/irq.h>
|
|
#include <linux/random.h>
|
|
#include <linux/sysrq.h>
|
|
#include <linux/bitops.h>
|
|
|
|
#include <asm/uaccess.h>
|
|
#include <asm/system.h>
|
|
#include <asm/io.h>
|
|
#include <asm/pgtable.h>
|
|
#include <asm/irq.h>
|
|
#include <asm/cache.h>
|
|
#include <asm/prom.h>
|
|
#include <asm/ptrace.h>
|
|
#include <asm/machdep.h>
|
|
#include <asm/rtas.h>
|
|
#include <asm/udbg.h>
|
|
#include <asm/firmware.h>
|
|
|
|
#include "pseries.h"
|
|
|
|
static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX];
|
|
static DEFINE_SPINLOCK(ras_log_buf_lock);
|
|
|
|
static char mce_data_buf[RTAS_ERROR_LOG_MAX];
|
|
|
|
static int ras_get_sensor_state_token;
|
|
static int ras_check_exception_token;
|
|
|
|
#define EPOW_SENSOR_TOKEN 9
|
|
#define EPOW_SENSOR_INDEX 0
|
|
#define RAS_VECTOR_OFFSET 0x500
|
|
|
|
static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
|
|
static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
|
|
|
|
|
|
static void request_ras_irqs(struct device_node *np,
|
|
irq_handler_t handler,
|
|
const char *name)
|
|
{
|
|
int i, index, count = 0;
|
|
struct of_irq oirq;
|
|
const u32 *opicprop;
|
|
unsigned int opicplen;
|
|
unsigned int virqs[16];
|
|
|
|
/* Check for obsolete "open-pic-interrupt" property. If present, then
|
|
* map those interrupts using the default interrupt host and default
|
|
* trigger
|
|
*/
|
|
opicprop = of_get_property(np, "open-pic-interrupt", &opicplen);
|
|
if (opicprop) {
|
|
opicplen /= sizeof(u32);
|
|
for (i = 0; i < opicplen; i++) {
|
|
if (count > 15)
|
|
break;
|
|
virqs[count] = irq_create_mapping(NULL, *(opicprop++));
|
|
if (virqs[count] == NO_IRQ)
|
|
printk(KERN_ERR "Unable to allocate interrupt "
|
|
"number for %s\n", np->full_name);
|
|
else
|
|
count++;
|
|
|
|
}
|
|
}
|
|
/* Else use normal interrupt tree parsing */
|
|
else {
|
|
/* First try to do a proper OF tree parsing */
|
|
for (index = 0; of_irq_map_one(np, index, &oirq) == 0;
|
|
index++) {
|
|
if (count > 15)
|
|
break;
|
|
virqs[count] = irq_create_of_mapping(oirq.controller,
|
|
oirq.specifier,
|
|
oirq.size);
|
|
if (virqs[count] == NO_IRQ)
|
|
printk(KERN_ERR "Unable to allocate interrupt "
|
|
"number for %s\n", np->full_name);
|
|
else
|
|
count++;
|
|
}
|
|
}
|
|
|
|
/* Now request them */
|
|
for (i = 0; i < count; i++) {
|
|
if (request_irq(virqs[i], handler, 0, name, NULL)) {
|
|
printk(KERN_ERR "Unable to request interrupt %d for "
|
|
"%s\n", virqs[i], np->full_name);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Initialize handlers for the set of interrupts caused by hardware errors
|
|
* and power system events.
|
|
*/
|
|
static int __init init_ras_IRQ(void)
|
|
{
|
|
struct device_node *np;
|
|
|
|
ras_get_sensor_state_token = rtas_token("get-sensor-state");
|
|
ras_check_exception_token = rtas_token("check-exception");
|
|
|
|
/* Internal Errors */
|
|
np = of_find_node_by_path("/event-sources/internal-errors");
|
|
if (np != NULL) {
|
|
request_ras_irqs(np, ras_error_interrupt, "RAS_ERROR");
|
|
of_node_put(np);
|
|
}
|
|
|
|
/* EPOW Events */
|
|
np = of_find_node_by_path("/event-sources/epow-events");
|
|
if (np != NULL) {
|
|
request_ras_irqs(np, ras_epow_interrupt, "RAS_EPOW");
|
|
of_node_put(np);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
__initcall(init_ras_IRQ);
|
|
|
|
/*
|
|
* Handle power subsystem events (EPOW).
|
|
*
|
|
* Presently we just log the event has occurred. This should be fixed
|
|
* to examine the type of power failure and take appropriate action where
|
|
* the time horizon permits something useful to be done.
|
|
*/
|
|
static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
|
|
{
|
|
int status = 0xdeadbeef;
|
|
int state = 0;
|
|
int critical;
|
|
|
|
status = rtas_call(ras_get_sensor_state_token, 2, 2, &state,
|
|
EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX);
|
|
|
|
if (state > 3)
|
|
critical = 1; /* Time Critical */
|
|
else
|
|
critical = 0;
|
|
|
|
spin_lock(&ras_log_buf_lock);
|
|
|
|
status = rtas_call(ras_check_exception_token, 6, 1, NULL,
|
|
RAS_VECTOR_OFFSET,
|
|
irq_map[irq].hwirq,
|
|
RTAS_EPOW_WARNING | RTAS_POWERMGM_EVENTS,
|
|
critical, __pa(&ras_log_buf),
|
|
rtas_get_error_log_max());
|
|
|
|
udbg_printf("EPOW <0x%lx 0x%x 0x%x>\n",
|
|
*((unsigned long *)&ras_log_buf), status, state);
|
|
printk(KERN_WARNING "EPOW <0x%lx 0x%x 0x%x>\n",
|
|
*((unsigned long *)&ras_log_buf), status, state);
|
|
|
|
/* format and print the extended information */
|
|
log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
|
|
|
|
spin_unlock(&ras_log_buf_lock);
|
|
return IRQ_HANDLED;
|
|
}
|
|
|
|
/*
|
|
* Handle hardware error interrupts.
|
|
*
|
|
* RTAS check-exception is called to collect data on the exception. If
|
|
* the error is deemed recoverable, we log a warning and return.
|
|
* For nonrecoverable errors, an error is logged and we stop all processing
|
|
* as quickly as possible in order to prevent propagation of the failure.
|
|
*/
|
|
static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
|
|
{
|
|
struct rtas_error_log *rtas_elog;
|
|
int status = 0xdeadbeef;
|
|
int fatal;
|
|
|
|
spin_lock(&ras_log_buf_lock);
|
|
|
|
status = rtas_call(ras_check_exception_token, 6, 1, NULL,
|
|
RAS_VECTOR_OFFSET,
|
|
irq_map[irq].hwirq,
|
|
RTAS_INTERNAL_ERROR, 1 /*Time Critical */,
|
|
__pa(&ras_log_buf),
|
|
rtas_get_error_log_max());
|
|
|
|
rtas_elog = (struct rtas_error_log *)ras_log_buf;
|
|
|
|
if ((status == 0) && (rtas_elog->severity >= RTAS_SEVERITY_ERROR_SYNC))
|
|
fatal = 1;
|
|
else
|
|
fatal = 0;
|
|
|
|
/* format and print the extended information */
|
|
log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal);
|
|
|
|
if (fatal) {
|
|
udbg_printf("Fatal HW Error <0x%lx 0x%x>\n",
|
|
*((unsigned long *)&ras_log_buf), status);
|
|
printk(KERN_EMERG "Error: Fatal hardware error <0x%lx 0x%x>\n",
|
|
*((unsigned long *)&ras_log_buf), status);
|
|
|
|
#ifndef DEBUG_RTAS_POWER_OFF
|
|
/* Don't actually power off when debugging so we can test
|
|
* without actually failing while injecting errors.
|
|
* Error data will not be logged to syslog.
|
|
*/
|
|
ppc_md.power_off();
|
|
#endif
|
|
} else {
|
|
udbg_printf("Recoverable HW Error <0x%lx 0x%x>\n",
|
|
*((unsigned long *)&ras_log_buf), status);
|
|
printk(KERN_WARNING
|
|
"Warning: Recoverable hardware error <0x%lx 0x%x>\n",
|
|
*((unsigned long *)&ras_log_buf), status);
|
|
}
|
|
|
|
spin_unlock(&ras_log_buf_lock);
|
|
return IRQ_HANDLED;
|
|
}
|
|
|
|
/* Get the error information for errors coming through the
|
|
* FWNMI vectors. The pt_regs' r3 will be updated to reflect
|
|
* the actual r3 if possible, and a ptr to the error log entry
|
|
* will be returned if found.
|
|
*
|
|
* The mce_data_buf does not have any locks or protection around it,
|
|
* if a second machine check comes in, or a system reset is done
|
|
* before we have logged the error, then we will get corruption in the
|
|
* error log. This is preferable over holding off on calling
|
|
* ibm,nmi-interlock which would result in us checkstopping if a
|
|
* second machine check did come in.
|
|
*/
|
|
static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
|
|
{
|
|
unsigned long errdata = regs->gpr[3];
|
|
struct rtas_error_log *errhdr = NULL;
|
|
unsigned long *savep;
|
|
|
|
if ((errdata >= 0x7000 && errdata < 0x7fff0) ||
|
|
(errdata >= rtas.base && errdata < rtas.base + rtas.size - 16)) {
|
|
savep = __va(errdata);
|
|
regs->gpr[3] = savep[0]; /* restore original r3 */
|
|
memset(mce_data_buf, 0, RTAS_ERROR_LOG_MAX);
|
|
memcpy(mce_data_buf, (char *)(savep + 1), RTAS_ERROR_LOG_MAX);
|
|
errhdr = (struct rtas_error_log *)mce_data_buf;
|
|
} else {
|
|
printk("FWNMI: corrupt r3\n");
|
|
}
|
|
return errhdr;
|
|
}
|
|
|
|
/* Call this when done with the data returned by FWNMI_get_errinfo.
|
|
* It will release the saved data area for other CPUs in the
|
|
* partition to receive FWNMI errors.
|
|
*/
|
|
static void fwnmi_release_errinfo(void)
|
|
{
|
|
int ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL);
|
|
if (ret != 0)
|
|
printk("FWNMI: nmi-interlock failed: %d\n", ret);
|
|
}
|
|
|
|
int pSeries_system_reset_exception(struct pt_regs *regs)
|
|
{
|
|
if (fwnmi_active) {
|
|
struct rtas_error_log *errhdr = fwnmi_get_errinfo(regs);
|
|
if (errhdr) {
|
|
/* XXX Should look at FWNMI information */
|
|
}
|
|
fwnmi_release_errinfo();
|
|
}
|
|
return 0; /* need to perform reset */
|
|
}
|
|
|
|
/*
|
|
* See if we can recover from a machine check exception.
|
|
* This is only called on power4 (or above) and only via
|
|
* the Firmware Non-Maskable Interrupts (fwnmi) handler
|
|
* which provides the error analysis for us.
|
|
*
|
|
* Return 1 if corrected (or delivered a signal).
|
|
* Return 0 if there is nothing we can do.
|
|
*/
|
|
static int recover_mce(struct pt_regs *regs, struct rtas_error_log * err)
|
|
{
|
|
int nonfatal = 0;
|
|
|
|
if (err->disposition == RTAS_DISP_FULLY_RECOVERED) {
|
|
/* Platform corrected itself */
|
|
nonfatal = 1;
|
|
} else if ((regs->msr & MSR_RI) &&
|
|
user_mode(regs) &&
|
|
err->severity == RTAS_SEVERITY_ERROR_SYNC &&
|
|
err->disposition == RTAS_DISP_NOT_RECOVERED &&
|
|
err->target == RTAS_TARGET_MEMORY &&
|
|
err->type == RTAS_TYPE_ECC_UNCORR &&
|
|
!(current->pid == 0 || is_global_init(current))) {
|
|
/* Kill off a user process with an ECC error */
|
|
printk(KERN_ERR "MCE: uncorrectable ecc error for pid %d\n",
|
|
current->pid);
|
|
/* XXX something better for ECC error? */
|
|
_exception(SIGBUS, regs, BUS_ADRERR, regs->nip);
|
|
nonfatal = 1;
|
|
}
|
|
|
|
log_error((char *)err, ERR_TYPE_RTAS_LOG, !nonfatal);
|
|
|
|
return nonfatal;
|
|
}
|
|
|
|
/*
|
|
* Handle a machine check.
|
|
*
|
|
* Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi)
|
|
* should be present. If so the handler which called us tells us if the
|
|
* error was recovered (never true if RI=0).
|
|
*
|
|
* On hardware prior to Power 4 these exceptions were asynchronous which
|
|
* means we can't tell exactly where it occurred and so we can't recover.
|
|
*/
|
|
int pSeries_machine_check_exception(struct pt_regs *regs)
|
|
{
|
|
struct rtas_error_log *errp;
|
|
|
|
if (fwnmi_active) {
|
|
errp = fwnmi_get_errinfo(regs);
|
|
fwnmi_release_errinfo();
|
|
if (errp && recover_mce(regs, errp))
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|