Merge branch 'eeh' into next
This commit is contained in:
commit
aba0eb84c8
@ -31,6 +31,9 @@ struct dev_archdata {
|
||||
#ifdef CONFIG_SWIOTLB
|
||||
dma_addr_t max_direct_dma_addr;
|
||||
#endif
|
||||
#ifdef CONFIG_EEH
|
||||
struct eeh_dev *edev;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct pdev_archdata {
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* eeh.h
|
||||
* Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation.
|
||||
* Copyright 2001-2012 IBM Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@ -31,44 +31,105 @@ struct device_node;
|
||||
|
||||
#ifdef CONFIG_EEH
|
||||
|
||||
/*
|
||||
* The struct is used to trace EEH state for the associated
|
||||
* PCI device node or PCI device. In future, it might
|
||||
* represent PE as well so that the EEH device to form
|
||||
* another tree except the currently existing tree of PCI
|
||||
* buses and PCI devices
|
||||
*/
|
||||
#define EEH_MODE_SUPPORTED (1<<0) /* EEH supported on the device */
|
||||
#define EEH_MODE_NOCHECK (1<<1) /* EEH check should be skipped */
|
||||
#define EEH_MODE_ISOLATED (1<<2) /* The device has been isolated */
|
||||
#define EEH_MODE_RECOVERING (1<<3) /* Recovering the device */
|
||||
#define EEH_MODE_IRQ_DISABLED (1<<4) /* Interrupt disabled */
|
||||
|
||||
struct eeh_dev {
|
||||
int mode; /* EEH mode */
|
||||
int class_code; /* Class code of the device */
|
||||
int config_addr; /* Config address */
|
||||
int pe_config_addr; /* PE config address */
|
||||
int check_count; /* Times of ignored error */
|
||||
int freeze_count; /* Times of froze up */
|
||||
int false_positives; /* Times of reported #ff's */
|
||||
u32 config_space[16]; /* Saved PCI config space */
|
||||
struct pci_controller *phb; /* Associated PHB */
|
||||
struct device_node *dn; /* Associated device node */
|
||||
struct pci_dev *pdev; /* Associated PCI device */
|
||||
};
|
||||
|
||||
static inline struct device_node *eeh_dev_to_of_node(struct eeh_dev *edev)
|
||||
{
|
||||
return edev->dn;
|
||||
}
|
||||
|
||||
static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev)
|
||||
{
|
||||
return edev->pdev;
|
||||
}
|
||||
|
||||
/*
|
||||
* The struct is used to trace the registered EEH operation
|
||||
* callback functions. Actually, those operation callback
|
||||
* functions are heavily platform dependent. That means the
|
||||
* platform should register its own EEH operation callback
|
||||
* functions before any EEH further operations.
|
||||
*/
|
||||
#define EEH_OPT_DISABLE 0 /* EEH disable */
|
||||
#define EEH_OPT_ENABLE 1 /* EEH enable */
|
||||
#define EEH_OPT_THAW_MMIO 2 /* MMIO enable */
|
||||
#define EEH_OPT_THAW_DMA 3 /* DMA enable */
|
||||
#define EEH_STATE_UNAVAILABLE (1 << 0) /* State unavailable */
|
||||
#define EEH_STATE_NOT_SUPPORT (1 << 1) /* EEH not supported */
|
||||
#define EEH_STATE_RESET_ACTIVE (1 << 2) /* Active reset */
|
||||
#define EEH_STATE_MMIO_ACTIVE (1 << 3) /* Active MMIO */
|
||||
#define EEH_STATE_DMA_ACTIVE (1 << 4) /* Active DMA */
|
||||
#define EEH_STATE_MMIO_ENABLED (1 << 5) /* MMIO enabled */
|
||||
#define EEH_STATE_DMA_ENABLED (1 << 6) /* DMA enabled */
|
||||
#define EEH_RESET_DEACTIVATE 0 /* Deactivate the PE reset */
|
||||
#define EEH_RESET_HOT 1 /* Hot reset */
|
||||
#define EEH_RESET_FUNDAMENTAL 3 /* Fundamental reset */
|
||||
#define EEH_LOG_TEMP 1 /* EEH temporary error log */
|
||||
#define EEH_LOG_PERM 2 /* EEH permanent error log */
|
||||
|
||||
struct eeh_ops {
|
||||
char *name;
|
||||
int (*init)(void);
|
||||
int (*set_option)(struct device_node *dn, int option);
|
||||
int (*get_pe_addr)(struct device_node *dn);
|
||||
int (*get_state)(struct device_node *dn, int *state);
|
||||
int (*reset)(struct device_node *dn, int option);
|
||||
int (*wait_state)(struct device_node *dn, int max_wait);
|
||||
int (*get_log)(struct device_node *dn, int severity, char *drv_log, unsigned long len);
|
||||
int (*configure_bridge)(struct device_node *dn);
|
||||
int (*read_config)(struct device_node *dn, int where, int size, u32 *val);
|
||||
int (*write_config)(struct device_node *dn, int where, int size, u32 val);
|
||||
};
|
||||
|
||||
extern struct eeh_ops *eeh_ops;
|
||||
extern int eeh_subsystem_enabled;
|
||||
|
||||
/* Values for eeh_mode bits in device_node */
|
||||
#define EEH_MODE_SUPPORTED (1<<0)
|
||||
#define EEH_MODE_NOCHECK (1<<1)
|
||||
#define EEH_MODE_ISOLATED (1<<2)
|
||||
#define EEH_MODE_RECOVERING (1<<3)
|
||||
#define EEH_MODE_IRQ_DISABLED (1<<4)
|
||||
|
||||
/* Max number of EEH freezes allowed before we consider the device
|
||||
* to be permanently disabled. */
|
||||
/*
|
||||
* Max number of EEH freezes allowed before we consider the device
|
||||
* to be permanently disabled.
|
||||
*/
|
||||
#define EEH_MAX_ALLOWED_FREEZES 5
|
||||
|
||||
void * __devinit eeh_dev_init(struct device_node *dn, void *data);
|
||||
void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb);
|
||||
void __init eeh_dev_phb_init(void);
|
||||
void __init eeh_init(void);
|
||||
#ifdef CONFIG_PPC_PSERIES
|
||||
int __init eeh_pseries_init(void);
|
||||
#endif
|
||||
int __init eeh_ops_register(struct eeh_ops *ops);
|
||||
int __exit eeh_ops_unregister(const char *name);
|
||||
unsigned long eeh_check_failure(const volatile void __iomem *token,
|
||||
unsigned long val);
|
||||
int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev);
|
||||
void __init pci_addr_cache_build(void);
|
||||
|
||||
/**
|
||||
* eeh_add_device_early
|
||||
* eeh_add_device_late
|
||||
*
|
||||
* Perform eeh initialization for devices added after boot.
|
||||
* Call eeh_add_device_early before doing any i/o to the
|
||||
* device (including config space i/o). Call eeh_add_device_late
|
||||
* to finish the eeh setup for this device.
|
||||
*/
|
||||
void eeh_add_device_tree_early(struct device_node *);
|
||||
void eeh_add_device_tree_late(struct pci_bus *);
|
||||
|
||||
/**
|
||||
* eeh_remove_device_recursive - undo EEH for device & children.
|
||||
* @dev: pci device to be removed
|
||||
*
|
||||
* As above, this removes the device; it also removes child
|
||||
* pci devices as well.
|
||||
*/
|
||||
void eeh_remove_bus_device(struct pci_dev *);
|
||||
|
||||
/**
|
||||
@ -87,8 +148,25 @@ void eeh_remove_bus_device(struct pci_dev *);
|
||||
#define EEH_IO_ERROR_VALUE(size) (~0U >> ((4 - (size)) * 8))
|
||||
|
||||
#else /* !CONFIG_EEH */
|
||||
|
||||
static inline void *eeh_dev_init(struct device_node *dn, void *data)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void eeh_dev_phb_init_dynamic(struct pci_controller *phb) { }
|
||||
|
||||
static inline void eeh_dev_phb_init(void) { }
|
||||
|
||||
static inline void eeh_init(void) { }
|
||||
|
||||
#ifdef CONFIG_PPC_PSERIES
|
||||
static inline int eeh_pseries_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_PPC_PSERIES */
|
||||
|
||||
static inline unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
|
||||
{
|
||||
return val;
|
||||
|
@ -1,6 +1,4 @@
|
||||
/*
|
||||
* eeh_event.h
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
@ -22,32 +20,19 @@
|
||||
#define ASM_POWERPC_EEH_EVENT_H
|
||||
#ifdef __KERNEL__
|
||||
|
||||
/** EEH event -- structure holding pci controller data that describes
|
||||
* a change in the isolation status of a PCI slot. A pointer
|
||||
* to this struct is passed as the data pointer in a notify callback.
|
||||
/*
|
||||
* structure holding pci controller data that describes a
|
||||
* change in the isolation status of a PCI slot. A pointer
|
||||
* to this struct is passed as the data pointer in a notify
|
||||
* callback.
|
||||
*/
|
||||
struct eeh_event {
|
||||
struct list_head list;
|
||||
struct device_node *dn; /* struct device node */
|
||||
struct pci_dev *dev; /* affected device */
|
||||
struct list_head list; /* to form event queue */
|
||||
struct eeh_dev *edev; /* EEH device */
|
||||
};
|
||||
|
||||
/**
|
||||
* eeh_send_failure_event - generate a PCI error event
|
||||
* @dev pci device
|
||||
*
|
||||
* This routine builds a PCI error event which will be delivered
|
||||
* to all listeners on the eeh_notifier_chain.
|
||||
*
|
||||
* This routine can be called within an interrupt context;
|
||||
* the actual event will be delivered in a normal context
|
||||
* (from a workqueue).
|
||||
*/
|
||||
int eeh_send_failure_event (struct device_node *dn,
|
||||
struct pci_dev *dev);
|
||||
|
||||
/* Main recovery function */
|
||||
struct pci_dn * handle_eeh_events (struct eeh_event *);
|
||||
int eeh_send_failure_event(struct eeh_dev *edev);
|
||||
struct eeh_dev *handle_eeh_events(struct eeh_event *);
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
#endif /* ASM_POWERPC_EEH_EVENT_H */
|
||||
|
@ -47,92 +47,21 @@ extern int rtas_setup_phb(struct pci_controller *phb);
|
||||
|
||||
extern unsigned long pci_probe_only;
|
||||
|
||||
/* ---- EEH internal-use-only related routines ---- */
|
||||
#ifdef CONFIG_EEH
|
||||
|
||||
void pci_addr_cache_build(void);
|
||||
void pci_addr_cache_insert_device(struct pci_dev *dev);
|
||||
void pci_addr_cache_remove_device(struct pci_dev *dev);
|
||||
void pci_addr_cache_build(void);
|
||||
struct pci_dev *pci_get_device_by_addr(unsigned long addr);
|
||||
|
||||
/**
|
||||
* eeh_slot_error_detail -- record and EEH error condition to the log
|
||||
* @pdn: pci device node
|
||||
* @severity: EEH_LOG_TEMP_FAILURE or EEH_LOG_PERM_FAILURE
|
||||
*
|
||||
* Obtains the EEH error details from the RTAS subsystem,
|
||||
* and then logs these details with the RTAS error log system.
|
||||
*/
|
||||
#define EEH_LOG_TEMP_FAILURE 1
|
||||
#define EEH_LOG_PERM_FAILURE 2
|
||||
void eeh_slot_error_detail (struct pci_dn *pdn, int severity);
|
||||
|
||||
/**
|
||||
* rtas_pci_enable - enable IO transfers for this slot
|
||||
* @pdn: pci device node
|
||||
* @function: either EEH_THAW_MMIO or EEH_THAW_DMA
|
||||
*
|
||||
* Enable I/O transfers to this slot
|
||||
*/
|
||||
#define EEH_THAW_MMIO 2
|
||||
#define EEH_THAW_DMA 3
|
||||
int rtas_pci_enable(struct pci_dn *pdn, int function);
|
||||
|
||||
/**
|
||||
* rtas_set_slot_reset -- unfreeze a frozen slot
|
||||
* @pdn: pci device node
|
||||
*
|
||||
* Clear the EEH-frozen condition on a slot. This routine
|
||||
* does this by asserting the PCI #RST line for 1/8th of
|
||||
* a second; this routine will sleep while the adapter is
|
||||
* being reset.
|
||||
*
|
||||
* Returns a non-zero value if the reset failed.
|
||||
*/
|
||||
int rtas_set_slot_reset (struct pci_dn *);
|
||||
int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs);
|
||||
|
||||
/**
|
||||
* eeh_restore_bars - Restore device configuration info.
|
||||
* @pdn: pci device node
|
||||
*
|
||||
* A reset of a PCI device will clear out its config space.
|
||||
* This routines will restore the config space for this
|
||||
* device, and is children, to values previously obtained
|
||||
* from the firmware.
|
||||
*/
|
||||
void eeh_restore_bars(struct pci_dn *);
|
||||
|
||||
/**
|
||||
* rtas_configure_bridge -- firmware initialization of pci bridge
|
||||
* @pdn: pci device node
|
||||
*
|
||||
* Ask the firmware to configure all PCI bridges devices
|
||||
* located behind the indicated node. Required after a
|
||||
* pci device reset. Does essentially the same hing as
|
||||
* eeh_restore_bars, but for brdges, and lets firmware
|
||||
* do the work.
|
||||
*/
|
||||
void rtas_configure_bridge(struct pci_dn *);
|
||||
|
||||
struct pci_dev *pci_addr_cache_get_device(unsigned long addr);
|
||||
void eeh_slot_error_detail(struct eeh_dev *edev, int severity);
|
||||
int eeh_pci_enable(struct eeh_dev *edev, int function);
|
||||
int eeh_reset_pe(struct eeh_dev *);
|
||||
void eeh_restore_bars(struct eeh_dev *);
|
||||
int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
|
||||
int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);
|
||||
|
||||
/**
|
||||
* eeh_mark_slot -- set mode flags for pertition endpoint
|
||||
* @pdn: pci device node
|
||||
*
|
||||
* mark and clear slots: find "partition endpoint" PE and set or
|
||||
* clear the flags for each subnode of the PE.
|
||||
*/
|
||||
void eeh_mark_slot (struct device_node *dn, int mode_flag);
|
||||
void eeh_clear_slot (struct device_node *dn, int mode_flag);
|
||||
|
||||
/**
|
||||
* find_device_pe -- Find the associated "Partiationable Endpoint" PE
|
||||
* @pdn: pci device node
|
||||
*/
|
||||
struct device_node * find_device_pe(struct device_node *dn);
|
||||
void eeh_mark_slot(struct device_node *dn, int mode_flag);
|
||||
void eeh_clear_slot(struct device_node *dn, int mode_flag);
|
||||
struct device_node *eeh_find_device_pe(struct device_node *dn);
|
||||
|
||||
void eeh_sysfs_add_device(struct pci_dev *pdev);
|
||||
void eeh_sysfs_remove_device(struct pci_dev *pdev);
|
||||
|
@ -21,12 +21,13 @@
|
||||
#include <linux/of.h>
|
||||
#include <linux/of_device.h>
|
||||
#include <linux/of_platform.h>
|
||||
#include <linux/atomic.h>
|
||||
|
||||
#include <asm/errno.h>
|
||||
#include <asm/topology.h>
|
||||
#include <asm/pci-bridge.h>
|
||||
#include <asm/ppc-pci.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <asm/eeh.h>
|
||||
|
||||
#ifdef CONFIG_PPC_OF_PLATFORM_PCI
|
||||
|
||||
@ -66,6 +67,9 @@ static int __devinit of_pci_phb_probe(struct platform_device *dev)
|
||||
/* Init pci_dn data structures */
|
||||
pci_devs_phb_init_dynamic(phb);
|
||||
|
||||
/* Create EEH devices for the PHB */
|
||||
eeh_dev_phb_init_dynamic(phb);
|
||||
|
||||
/* Register devices with EEH */
|
||||
#ifdef CONFIG_EEH
|
||||
if (dev->dev.of_node->child)
|
||||
|
@ -275,6 +275,9 @@ void __init find_and_init_phbs(void)
|
||||
of_node_put(root);
|
||||
pci_devs_phb_init();
|
||||
|
||||
/* Create EEH devices for all PHBs */
|
||||
eeh_dev_phb_init();
|
||||
|
||||
/*
|
||||
* pci_probe_only and pci_assign_all_buses can be set via properties
|
||||
* in chosen.
|
||||
|
@ -6,7 +6,8 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \
|
||||
firmware.o power.o dlpar.o mobility.o
|
||||
obj-$(CONFIG_SMP) += smp.o
|
||||
obj-$(CONFIG_SCANLOG) += scanlog.o
|
||||
obj-$(CONFIG_EEH) += eeh.o eeh_cache.o eeh_driver.o eeh_event.o eeh_sysfs.o
|
||||
obj-$(CONFIG_EEH) += eeh.o eeh_dev.o eeh_cache.o eeh_driver.o \
|
||||
eeh_event.o eeh_sysfs.o eeh_pseries.o
|
||||
obj-$(CONFIG_KEXEC) += kexec.o
|
||||
obj-$(CONFIG_PCI) += pci.o pci_dlpar.o
|
||||
obj-$(CONFIG_PSERIES_MSI) += msi.o
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,4 @@
|
||||
/*
|
||||
* eeh_cache.c
|
||||
* PCI address cache; allows the lookup of PCI devices based on I/O address
|
||||
*
|
||||
* Copyright IBM Corporation 2004
|
||||
@ -47,8 +46,7 @@
|
||||
* than any hash algo I could think of for this problem, even
|
||||
* with the penalty of slow pointer chases for d-cache misses).
|
||||
*/
|
||||
struct pci_io_addr_range
|
||||
{
|
||||
struct pci_io_addr_range {
|
||||
struct rb_node rb_node;
|
||||
unsigned long addr_lo;
|
||||
unsigned long addr_hi;
|
||||
@ -56,13 +54,12 @@ struct pci_io_addr_range
|
||||
unsigned int flags;
|
||||
};
|
||||
|
||||
static struct pci_io_addr_cache
|
||||
{
|
||||
static struct pci_io_addr_cache {
|
||||
struct rb_root rb_root;
|
||||
spinlock_t piar_lock;
|
||||
} pci_io_addr_cache_root;
|
||||
|
||||
static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr)
|
||||
static inline struct pci_dev *__pci_addr_cache_get_device(unsigned long addr)
|
||||
{
|
||||
struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node;
|
||||
|
||||
@ -86,7 +83,7 @@ static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr)
|
||||
}
|
||||
|
||||
/**
|
||||
* pci_get_device_by_addr - Get device, given only address
|
||||
* pci_addr_cache_get_device - Get device, given only address
|
||||
* @addr: mmio (PIO) phys address or i/o port number
|
||||
*
|
||||
* Given an mmio phys address, or a port number, find a pci device
|
||||
@ -95,13 +92,13 @@ static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr)
|
||||
* from zero (that is, they do *not* have pci_io_addr added in).
|
||||
* It is safe to call this function within an interrupt.
|
||||
*/
|
||||
struct pci_dev *pci_get_device_by_addr(unsigned long addr)
|
||||
struct pci_dev *pci_addr_cache_get_device(unsigned long addr)
|
||||
{
|
||||
struct pci_dev *dev;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
|
||||
dev = __pci_get_device_by_addr(addr);
|
||||
dev = __pci_addr_cache_get_device(addr);
|
||||
spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
|
||||
return dev;
|
||||
}
|
||||
@ -166,7 +163,7 @@ pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
|
||||
|
||||
#ifdef DEBUG
|
||||
printk(KERN_DEBUG "PIAR: insert range=[%lx:%lx] dev=%s\n",
|
||||
alo, ahi, pci_name (dev));
|
||||
alo, ahi, pci_name(dev));
|
||||
#endif
|
||||
|
||||
rb_link_node(&piar->rb_node, parent, p);
|
||||
@ -178,7 +175,7 @@ pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
|
||||
static void __pci_addr_cache_insert_device(struct pci_dev *dev)
|
||||
{
|
||||
struct device_node *dn;
|
||||
struct pci_dn *pdn;
|
||||
struct eeh_dev *edev;
|
||||
int i;
|
||||
|
||||
dn = pci_device_to_OF_node(dev);
|
||||
@ -187,13 +184,19 @@ static void __pci_addr_cache_insert_device(struct pci_dev *dev)
|
||||
return;
|
||||
}
|
||||
|
||||
edev = of_node_to_eeh_dev(dn);
|
||||
if (!edev) {
|
||||
pr_warning("PCI: no EEH dev found for dn=%s\n",
|
||||
dn->full_name);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Skip any devices for which EEH is not enabled. */
|
||||
pdn = PCI_DN(dn);
|
||||
if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
|
||||
pdn->eeh_mode & EEH_MODE_NOCHECK) {
|
||||
if (!(edev->mode & EEH_MODE_SUPPORTED) ||
|
||||
edev->mode & EEH_MODE_NOCHECK) {
|
||||
#ifdef DEBUG
|
||||
printk(KERN_INFO "PCI: skip building address cache for=%s - %s\n",
|
||||
pci_name(dev), pdn->node->full_name);
|
||||
pr_info("PCI: skip building address cache for=%s - %s\n",
|
||||
pci_name(dev), dn->full_name);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
@ -284,6 +287,7 @@ void pci_addr_cache_remove_device(struct pci_dev *dev)
|
||||
void __init pci_addr_cache_build(void)
|
||||
{
|
||||
struct device_node *dn;
|
||||
struct eeh_dev *edev;
|
||||
struct pci_dev *dev = NULL;
|
||||
|
||||
spin_lock_init(&pci_io_addr_cache_root.piar_lock);
|
||||
@ -294,8 +298,14 @@ void __init pci_addr_cache_build(void)
|
||||
dn = pci_device_to_OF_node(dev);
|
||||
if (!dn)
|
||||
continue;
|
||||
|
||||
edev = of_node_to_eeh_dev(dn);
|
||||
if (!edev)
|
||||
continue;
|
||||
|
||||
pci_dev_get(dev); /* matching put is in eeh_remove_device() */
|
||||
PCI_DN(dn)->pcidev = dev;
|
||||
dev->dev.archdata.edev = edev;
|
||||
edev->pdev = dev;
|
||||
|
||||
eeh_sysfs_add_device(dev);
|
||||
}
|
||||
|
102
arch/powerpc/platforms/pseries/eeh_dev.c
Normal file
102
arch/powerpc/platforms/pseries/eeh_dev.c
Normal file
@ -0,0 +1,102 @@
|
||||
/*
|
||||
* The file intends to implement dynamic creation of EEH device, which will
|
||||
* be bound with OF node and PCI device simutaneously. The EEH devices would
|
||||
* be foundamental information for EEH core components to work proerly. Besides,
|
||||
* We have to support multiple situations where dynamic creation of EEH device
|
||||
* is required:
|
||||
*
|
||||
* 1) Before PCI emunation starts, we need create EEH devices according to the
|
||||
* PCI sensitive OF nodes.
|
||||
* 2) When PCI emunation is done, we need do the binding between PCI device and
|
||||
* the associated EEH device.
|
||||
* 3) DR (Dynamic Reconfiguration) would create PCI sensitive OF node. EEH device
|
||||
* will be created while PCI sensitive OF node is detected from DR.
|
||||
* 4) PCI hotplug needs redoing the binding between PCI device and EEH device. If
|
||||
* PHB is newly inserted, we also need create EEH devices accordingly.
|
||||
*
|
||||
* Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#include <linux/export.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
#include <asm/pci-bridge.h>
|
||||
#include <asm/ppc-pci.h>
|
||||
|
||||
/**
|
||||
* eeh_dev_init - Create EEH device according to OF node
|
||||
* @dn: device node
|
||||
* @data: PHB
|
||||
*
|
||||
* It will create EEH device according to the given OF node. The function
|
||||
* might be called by PCI emunation, DR, PHB hotplug.
|
||||
*/
|
||||
void * __devinit eeh_dev_init(struct device_node *dn, void *data)
|
||||
{
|
||||
struct pci_controller *phb = data;
|
||||
struct eeh_dev *edev;
|
||||
|
||||
/* Allocate EEH device */
|
||||
edev = zalloc_maybe_bootmem(sizeof(*edev), GFP_KERNEL);
|
||||
if (!edev) {
|
||||
pr_warning("%s: out of memory\n", __func__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Associate EEH device with OF node */
|
||||
dn->edev = edev;
|
||||
edev->dn = dn;
|
||||
edev->phb = phb;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* eeh_dev_phb_init_dynamic - Create EEH devices for devices included in PHB
|
||||
* @phb: PHB
|
||||
*
|
||||
* Scan the PHB OF node and its child association, then create the
|
||||
* EEH devices accordingly
|
||||
*/
|
||||
void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb)
|
||||
{
|
||||
struct device_node *dn = phb->dn;
|
||||
|
||||
/* EEH device for PHB */
|
||||
eeh_dev_init(dn, phb);
|
||||
|
||||
/* EEH devices for children OF nodes */
|
||||
traverse_pci_devices(dn, eeh_dev_init, phb);
|
||||
}
|
||||
|
||||
/**
|
||||
* eeh_dev_phb_init - Create EEH devices for devices included in existing PHBs
|
||||
*
|
||||
* Scan all the existing PHBs and create EEH devices for their OF
|
||||
* nodes and their children OF nodes
|
||||
*/
|
||||
void __init eeh_dev_phb_init(void)
|
||||
{
|
||||
struct pci_controller *phb, *tmp;
|
||||
|
||||
list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
|
||||
eeh_dev_phb_init_dynamic(phb);
|
||||
}
|
@ -33,8 +33,14 @@
|
||||
#include <asm/prom.h>
|
||||
#include <asm/rtas.h>
|
||||
|
||||
|
||||
static inline const char * pcid_name (struct pci_dev *pdev)
|
||||
/**
|
||||
* eeh_pcid_name - Retrieve name of PCI device driver
|
||||
* @pdev: PCI device
|
||||
*
|
||||
* This routine is used to retrieve the name of PCI device driver
|
||||
* if that's valid.
|
||||
*/
|
||||
static inline const char *eeh_pcid_name(struct pci_dev *pdev)
|
||||
{
|
||||
if (pdev && pdev->dev.driver)
|
||||
return pdev->dev.driver->name;
|
||||
@ -64,48 +70,59 @@ static void print_device_node_tree(struct pci_dn *pdn, int dent)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* eeh_disable_irq - disable interrupt for the recovering device
|
||||
* eeh_disable_irq - Disable interrupt for the recovering device
|
||||
* @dev: PCI device
|
||||
*
|
||||
* This routine must be called when reporting temporary or permanent
|
||||
* error to the particular PCI device to disable interrupt of that
|
||||
* device. If the device has enabled MSI or MSI-X interrupt, we needn't
|
||||
* do real work because EEH should freeze DMA transfers for those PCI
|
||||
* devices encountering EEH errors, which includes MSI or MSI-X.
|
||||
*/
|
||||
static void eeh_disable_irq(struct pci_dev *dev)
|
||||
{
|
||||
struct device_node *dn = pci_device_to_OF_node(dev);
|
||||
struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
|
||||
|
||||
/* Don't disable MSI and MSI-X interrupts. They are
|
||||
* effectively disabled by the DMA Stopped state
|
||||
* when an EEH error occurs.
|
||||
*/
|
||||
*/
|
||||
if (dev->msi_enabled || dev->msix_enabled)
|
||||
return;
|
||||
|
||||
if (!irq_has_action(dev->irq))
|
||||
return;
|
||||
|
||||
PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED;
|
||||
edev->mode |= EEH_MODE_IRQ_DISABLED;
|
||||
disable_irq_nosync(dev->irq);
|
||||
}
|
||||
|
||||
/**
|
||||
* eeh_enable_irq - enable interrupt for the recovering device
|
||||
* eeh_enable_irq - Enable interrupt for the recovering device
|
||||
* @dev: PCI device
|
||||
*
|
||||
* This routine must be called to enable interrupt while failed
|
||||
* device could be resumed.
|
||||
*/
|
||||
static void eeh_enable_irq(struct pci_dev *dev)
|
||||
{
|
||||
struct device_node *dn = pci_device_to_OF_node(dev);
|
||||
struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
|
||||
|
||||
if ((PCI_DN(dn)->eeh_mode) & EEH_MODE_IRQ_DISABLED) {
|
||||
PCI_DN(dn)->eeh_mode &= ~EEH_MODE_IRQ_DISABLED;
|
||||
if ((edev->mode) & EEH_MODE_IRQ_DISABLED) {
|
||||
edev->mode &= ~EEH_MODE_IRQ_DISABLED;
|
||||
enable_irq(dev->irq);
|
||||
}
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------- */
|
||||
/**
|
||||
* eeh_report_error - report pci error to each device driver
|
||||
* eeh_report_error - Report pci error to each device driver
|
||||
* @dev: PCI device
|
||||
* @userdata: return value
|
||||
*
|
||||
* Report an EEH error to each device driver, collect up and
|
||||
* merge the device driver responses. Cumulative response
|
||||
* passed back in "userdata".
|
||||
*/
|
||||
|
||||
static int eeh_report_error(struct pci_dev *dev, void *userdata)
|
||||
{
|
||||
enum pci_ers_result rc, *res = userdata;
|
||||
@ -122,7 +139,7 @@ static int eeh_report_error(struct pci_dev *dev, void *userdata)
|
||||
!driver->err_handler->error_detected)
|
||||
return 0;
|
||||
|
||||
rc = driver->err_handler->error_detected (dev, pci_channel_io_frozen);
|
||||
rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
|
||||
|
||||
/* A driver that needs a reset trumps all others */
|
||||
if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
|
||||
@ -132,13 +149,14 @@ static int eeh_report_error(struct pci_dev *dev, void *userdata)
|
||||
}
|
||||
|
||||
/**
|
||||
* eeh_report_mmio_enabled - tell drivers that MMIO has been enabled
|
||||
* eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled
|
||||
* @dev: PCI device
|
||||
* @userdata: return value
|
||||
*
|
||||
* Tells each device driver that IO ports, MMIO and config space I/O
|
||||
* are now enabled. Collects up and merges the device driver responses.
|
||||
* Cumulative response passed back in "userdata".
|
||||
*/
|
||||
|
||||
static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
|
||||
{
|
||||
enum pci_ers_result rc, *res = userdata;
|
||||
@ -149,7 +167,7 @@ static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
|
||||
!driver->err_handler->mmio_enabled)
|
||||
return 0;
|
||||
|
||||
rc = driver->err_handler->mmio_enabled (dev);
|
||||
rc = driver->err_handler->mmio_enabled(dev);
|
||||
|
||||
/* A driver that needs a reset trumps all others */
|
||||
if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
|
||||
@ -159,9 +177,15 @@ static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
|
||||
}
|
||||
|
||||
/**
|
||||
* eeh_report_reset - tell device that slot has been reset
|
||||
* eeh_report_reset - Tell device that slot has been reset
|
||||
* @dev: PCI device
|
||||
* @userdata: return value
|
||||
*
|
||||
* This routine must be called while EEH tries to reset particular
|
||||
* PCI device so that the associated PCI device driver could take
|
||||
* some actions, usually to save data the driver needs so that the
|
||||
* driver can work again while the device is recovered.
|
||||
*/
|
||||
|
||||
static int eeh_report_reset(struct pci_dev *dev, void *userdata)
|
||||
{
|
||||
enum pci_ers_result rc, *res = userdata;
|
||||
@ -188,9 +212,14 @@ static int eeh_report_reset(struct pci_dev *dev, void *userdata)
|
||||
}
|
||||
|
||||
/**
|
||||
* eeh_report_resume - tell device to resume normal operations
|
||||
* eeh_report_resume - Tell device to resume normal operations
|
||||
* @dev: PCI device
|
||||
* @userdata: return value
|
||||
*
|
||||
* This routine must be called to notify the device driver that it
|
||||
* could resume so that the device driver can do some initialization
|
||||
* to make the recovered device work again.
|
||||
*/
|
||||
|
||||
static int eeh_report_resume(struct pci_dev *dev, void *userdata)
|
||||
{
|
||||
struct pci_driver *driver = dev->driver;
|
||||
@ -212,12 +241,13 @@ static int eeh_report_resume(struct pci_dev *dev, void *userdata)
|
||||
}
|
||||
|
||||
/**
|
||||
* eeh_report_failure - tell device driver that device is dead.
|
||||
* eeh_report_failure - Tell device driver that device is dead.
|
||||
* @dev: PCI device
|
||||
* @userdata: return value
|
||||
*
|
||||
* This informs the device driver that the device is permanently
|
||||
* dead, and that no further recovery attempts will be made on it.
|
||||
*/
|
||||
|
||||
static int eeh_report_failure(struct pci_dev *dev, void *userdata)
|
||||
{
|
||||
struct pci_driver *driver = dev->driver;
|
||||
@ -238,65 +268,46 @@ static int eeh_report_failure(struct pci_dev *dev, void *userdata)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------- */
|
||||
/**
|
||||
* handle_eeh_events -- reset a PCI device after hard lockup.
|
||||
* eeh_reset_device - Perform actual reset of a pci slot
|
||||
* @edev: PE associated EEH device
|
||||
* @bus: PCI bus corresponding to the isolcated slot
|
||||
*
|
||||
* pSeries systems will isolate a PCI slot if the PCI-Host
|
||||
* bridge detects address or data parity errors, DMA's
|
||||
* occurring to wild addresses (which usually happen due to
|
||||
* bugs in device drivers or in PCI adapter firmware).
|
||||
* Slot isolations also occur if #SERR, #PERR or other misc
|
||||
* PCI-related errors are detected.
|
||||
*
|
||||
* Recovery process consists of unplugging the device driver
|
||||
* (which generated hotplug events to userspace), then issuing
|
||||
* a PCI #RST to the device, then reconfiguring the PCI config
|
||||
* space for all bridges & devices under this slot, and then
|
||||
* finally restarting the device drivers (which cause a second
|
||||
* set of hotplug events to go out to userspace).
|
||||
* This routine must be called to do reset on the indicated PE.
|
||||
* During the reset, udev might be invoked because those affected
|
||||
* PCI devices will be removed and then added.
|
||||
*/
|
||||
|
||||
/**
|
||||
* eeh_reset_device() -- perform actual reset of a pci slot
|
||||
* @bus: pointer to the pci bus structure corresponding
|
||||
* to the isolated slot. A non-null value will
|
||||
* cause all devices under the bus to be removed
|
||||
* and then re-added.
|
||||
* @pe_dn: pointer to a "Partionable Endpoint" device node.
|
||||
* This is the top-level structure on which pci
|
||||
* bus resets can be performed.
|
||||
*/
|
||||
|
||||
static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus)
|
||||
static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus)
|
||||
{
|
||||
struct device_node *dn;
|
||||
int cnt, rc;
|
||||
|
||||
/* pcibios will clear the counter; save the value */
|
||||
cnt = pe_dn->eeh_freeze_count;
|
||||
cnt = edev->freeze_count;
|
||||
|
||||
if (bus)
|
||||
pcibios_remove_pci_devices(bus);
|
||||
|
||||
/* Reset the pci controller. (Asserts RST#; resets config space).
|
||||
* Reconfigure bridges and devices. Don't try to bring the system
|
||||
* up if the reset failed for some reason. */
|
||||
rc = rtas_set_slot_reset(pe_dn);
|
||||
* up if the reset failed for some reason.
|
||||
*/
|
||||
rc = eeh_reset_pe(edev);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
/* Walk over all functions on this device. */
|
||||
dn = pe_dn->node;
|
||||
if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
|
||||
/* Walk over all functions on this device. */
|
||||
dn = eeh_dev_to_of_node(edev);
|
||||
if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent))
|
||||
dn = dn->parent->child;
|
||||
|
||||
while (dn) {
|
||||
struct pci_dn *ppe = PCI_DN(dn);
|
||||
struct eeh_dev *pedev = of_node_to_eeh_dev(dn);
|
||||
|
||||
/* On Power4, always true because eeh_pe_config_addr=0 */
|
||||
if (pe_dn->eeh_pe_config_addr == ppe->eeh_pe_config_addr) {
|
||||
rtas_configure_bridge(ppe);
|
||||
eeh_restore_bars(ppe);
|
||||
if (edev->pe_config_addr == pedev->pe_config_addr) {
|
||||
eeh_ops->configure_bridge(dn);
|
||||
eeh_restore_bars(pedev);
|
||||
}
|
||||
dn = dn->sibling;
|
||||
}
|
||||
@ -308,10 +319,10 @@ static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus)
|
||||
* potentially weird things happen.
|
||||
*/
|
||||
if (bus) {
|
||||
ssleep (5);
|
||||
ssleep(5);
|
||||
pcibios_add_pci_devices(bus);
|
||||
}
|
||||
pe_dn->eeh_freeze_count = cnt;
|
||||
edev->freeze_count = cnt;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -321,23 +332,39 @@ static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus)
|
||||
*/
|
||||
#define MAX_WAIT_FOR_RECOVERY 150
|
||||
|
||||
struct pci_dn * handle_eeh_events (struct eeh_event *event)
|
||||
/**
|
||||
* eeh_handle_event - Reset a PCI device after hard lockup.
|
||||
* @event: EEH event
|
||||
*
|
||||
* While PHB detects address or data parity errors on particular PCI
|
||||
* slot, the associated PE will be frozen. Besides, DMA's occurring
|
||||
* to wild addresses (which usually happen due to bugs in device
|
||||
* drivers or in PCI adapter firmware) can cause EEH error. #SERR,
|
||||
* #PERR or other misc PCI-related errors also can trigger EEH errors.
|
||||
*
|
||||
* Recovery process consists of unplugging the device driver (which
|
||||
* generated hotplug events to userspace), then issuing a PCI #RST to
|
||||
* the device, then reconfiguring the PCI config space for all bridges
|
||||
* & devices under this slot, and then finally restarting the device
|
||||
* drivers (which cause a second set of hotplug events to go out to
|
||||
* userspace).
|
||||
*/
|
||||
struct eeh_dev *handle_eeh_events(struct eeh_event *event)
|
||||
{
|
||||
struct device_node *frozen_dn;
|
||||
struct pci_dn *frozen_pdn;
|
||||
struct eeh_dev *frozen_edev;
|
||||
struct pci_bus *frozen_bus;
|
||||
int rc = 0;
|
||||
enum pci_ers_result result = PCI_ERS_RESULT_NONE;
|
||||
const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str;
|
||||
|
||||
frozen_dn = find_device_pe(event->dn);
|
||||
frozen_dn = eeh_find_device_pe(eeh_dev_to_of_node(event->edev));
|
||||
if (!frozen_dn) {
|
||||
|
||||
location = of_get_property(event->dn, "ibm,loc-code", NULL);
|
||||
location = of_get_property(eeh_dev_to_of_node(event->edev), "ibm,loc-code", NULL);
|
||||
location = location ? location : "unknown";
|
||||
printk(KERN_ERR "EEH: Error: Cannot find partition endpoint "
|
||||
"for location=%s pci addr=%s\n",
|
||||
location, eeh_pci_name(event->dev));
|
||||
location, eeh_pci_name(eeh_dev_to_pci_dev(event->edev)));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -350,9 +377,10 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
|
||||
* which was always an EADS pci bridge. In the new style,
|
||||
* there might not be any EADS bridges, and even when there are,
|
||||
* the firmware marks them as "EEH incapable". So another
|
||||
* two-step is needed to find the pci bus.. */
|
||||
* two-step is needed to find the pci bus..
|
||||
*/
|
||||
if (!frozen_bus)
|
||||
frozen_bus = pcibios_find_pci_bus (frozen_dn->parent);
|
||||
frozen_bus = pcibios_find_pci_bus(frozen_dn->parent);
|
||||
|
||||
if (!frozen_bus) {
|
||||
printk(KERN_ERR "EEH: Cannot find PCI bus "
|
||||
@ -361,22 +389,21 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
frozen_pdn = PCI_DN(frozen_dn);
|
||||
frozen_pdn->eeh_freeze_count++;
|
||||
frozen_edev = of_node_to_eeh_dev(frozen_dn);
|
||||
frozen_edev->freeze_count++;
|
||||
pci_str = eeh_pci_name(eeh_dev_to_pci_dev(event->edev));
|
||||
drv_str = eeh_pcid_name(eeh_dev_to_pci_dev(event->edev));
|
||||
|
||||
pci_str = eeh_pci_name(event->dev);
|
||||
drv_str = pcid_name(event->dev);
|
||||
|
||||
if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES)
|
||||
if (frozen_edev->freeze_count > EEH_MAX_ALLOWED_FREEZES)
|
||||
goto excess_failures;
|
||||
|
||||
printk(KERN_WARNING
|
||||
"EEH: This PCI device has failed %d times in the last hour:\n",
|
||||
frozen_pdn->eeh_freeze_count);
|
||||
frozen_edev->freeze_count);
|
||||
|
||||
if (frozen_pdn->pcidev) {
|
||||
bus_pci_str = pci_name(frozen_pdn->pcidev);
|
||||
bus_drv_str = pcid_name(frozen_pdn->pcidev);
|
||||
if (frozen_edev->pdev) {
|
||||
bus_pci_str = pci_name(frozen_edev->pdev);
|
||||
bus_drv_str = eeh_pcid_name(frozen_edev->pdev);
|
||||
printk(KERN_WARNING
|
||||
"EEH: Bus location=%s driver=%s pci addr=%s\n",
|
||||
location, bus_drv_str, bus_pci_str);
|
||||
@ -395,9 +422,10 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
|
||||
pci_walk_bus(frozen_bus, eeh_report_error, &result);
|
||||
|
||||
/* Get the current PCI slot state. This can take a long time,
|
||||
* sometimes over 3 seconds for certain systems. */
|
||||
rc = eeh_wait_for_slot_status (frozen_pdn, MAX_WAIT_FOR_RECOVERY*1000);
|
||||
if (rc < 0) {
|
||||
* sometimes over 3 seconds for certain systems.
|
||||
*/
|
||||
rc = eeh_ops->wait_state(eeh_dev_to_of_node(frozen_edev), MAX_WAIT_FOR_RECOVERY*1000);
|
||||
if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
|
||||
printk(KERN_WARNING "EEH: Permanent failure\n");
|
||||
goto hard_fail;
|
||||
}
|
||||
@ -406,14 +434,14 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
|
||||
* don't post the error log until after all dev drivers
|
||||
* have been informed.
|
||||
*/
|
||||
eeh_slot_error_detail(frozen_pdn, EEH_LOG_TEMP_FAILURE);
|
||||
eeh_slot_error_detail(frozen_edev, EEH_LOG_TEMP);
|
||||
|
||||
/* If all device drivers were EEH-unaware, then shut
|
||||
* down all of the device drivers, and hope they
|
||||
* go down willingly, without panicing the system.
|
||||
*/
|
||||
if (result == PCI_ERS_RESULT_NONE) {
|
||||
rc = eeh_reset_device(frozen_pdn, frozen_bus);
|
||||
rc = eeh_reset_device(frozen_edev, frozen_bus);
|
||||
if (rc) {
|
||||
printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc);
|
||||
goto hard_fail;
|
||||
@ -422,7 +450,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
|
||||
|
||||
/* If all devices reported they can proceed, then re-enable MMIO */
|
||||
if (result == PCI_ERS_RESULT_CAN_RECOVER) {
|
||||
rc = rtas_pci_enable(frozen_pdn, EEH_THAW_MMIO);
|
||||
rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_MMIO);
|
||||
|
||||
if (rc < 0)
|
||||
goto hard_fail;
|
||||
@ -436,7 +464,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
|
||||
|
||||
/* If all devices reported they can proceed, then re-enable DMA */
|
||||
if (result == PCI_ERS_RESULT_CAN_RECOVER) {
|
||||
rc = rtas_pci_enable(frozen_pdn, EEH_THAW_DMA);
|
||||
rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_DMA);
|
||||
|
||||
if (rc < 0)
|
||||
goto hard_fail;
|
||||
@ -454,7 +482,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
|
||||
|
||||
/* If any device called out for a reset, then reset the slot */
|
||||
if (result == PCI_ERS_RESULT_NEED_RESET) {
|
||||
rc = eeh_reset_device(frozen_pdn, NULL);
|
||||
rc = eeh_reset_device(frozen_edev, NULL);
|
||||
if (rc) {
|
||||
printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc);
|
||||
goto hard_fail;
|
||||
@ -473,7 +501,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
|
||||
/* Tell all device drivers that they can resume operations */
|
||||
pci_walk_bus(frozen_bus, eeh_report_resume, NULL);
|
||||
|
||||
return frozen_pdn;
|
||||
return frozen_edev;
|
||||
|
||||
excess_failures:
|
||||
/*
|
||||
@ -486,7 +514,7 @@ excess_failures:
|
||||
"has failed %d times in the last hour "
|
||||
"and has been permanently disabled.\n"
|
||||
"Please try reseating this device or replacing it.\n",
|
||||
location, drv_str, pci_str, frozen_pdn->eeh_freeze_count);
|
||||
location, drv_str, pci_str, frozen_edev->freeze_count);
|
||||
goto perm_error;
|
||||
|
||||
hard_fail:
|
||||
@ -497,7 +525,7 @@ hard_fail:
|
||||
location, drv_str, pci_str);
|
||||
|
||||
perm_error:
|
||||
eeh_slot_error_detail(frozen_pdn, EEH_LOG_PERM_FAILURE);
|
||||
eeh_slot_error_detail(frozen_edev, EEH_LOG_PERM);
|
||||
|
||||
/* Notify all devices that they're about to go down. */
|
||||
pci_walk_bus(frozen_bus, eeh_report_failure, NULL);
|
||||
@ -508,4 +536,3 @@ perm_error:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* ---------- end of file ---------- */
|
||||
|
@ -1,6 +1,4 @@
|
||||
/*
|
||||
* eeh_event.c
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
@ -46,7 +44,7 @@ DECLARE_WORK(eeh_event_wq, eeh_thread_launcher);
|
||||
DEFINE_MUTEX(eeh_event_mutex);
|
||||
|
||||
/**
|
||||
* eeh_event_handler - dispatch EEH events.
|
||||
* eeh_event_handler - Dispatch EEH events.
|
||||
* @dummy - unused
|
||||
*
|
||||
* The detection of a frozen slot can occur inside an interrupt,
|
||||
@ -58,10 +56,10 @@ DEFINE_MUTEX(eeh_event_mutex);
|
||||
static int eeh_event_handler(void * dummy)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct eeh_event *event;
|
||||
struct pci_dn *pdn;
|
||||
struct eeh_event *event;
|
||||
struct eeh_dev *edev;
|
||||
|
||||
daemonize ("eehd");
|
||||
daemonize("eehd");
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
||||
spin_lock_irqsave(&eeh_eventlist_lock, flags);
|
||||
@ -79,31 +77,37 @@ static int eeh_event_handler(void * dummy)
|
||||
|
||||
/* Serialize processing of EEH events */
|
||||
mutex_lock(&eeh_event_mutex);
|
||||
eeh_mark_slot(event->dn, EEH_MODE_RECOVERING);
|
||||
edev = event->edev;
|
||||
eeh_mark_slot(eeh_dev_to_of_node(edev), EEH_MODE_RECOVERING);
|
||||
|
||||
printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n",
|
||||
eeh_pci_name(event->dev));
|
||||
eeh_pci_name(edev->pdev));
|
||||
|
||||
pdn = handle_eeh_events(event);
|
||||
edev = handle_eeh_events(event);
|
||||
|
||||
eeh_clear_slot(eeh_dev_to_of_node(edev), EEH_MODE_RECOVERING);
|
||||
pci_dev_put(edev->pdev);
|
||||
|
||||
eeh_clear_slot(event->dn, EEH_MODE_RECOVERING);
|
||||
pci_dev_put(event->dev);
|
||||
kfree(event);
|
||||
mutex_unlock(&eeh_event_mutex);
|
||||
|
||||
/* If there are no new errors after an hour, clear the counter. */
|
||||
if (pdn && pdn->eeh_freeze_count>0) {
|
||||
msleep_interruptible (3600*1000);
|
||||
if (pdn->eeh_freeze_count>0)
|
||||
pdn->eeh_freeze_count--;
|
||||
if (edev && edev->freeze_count>0) {
|
||||
msleep_interruptible(3600*1000);
|
||||
if (edev->freeze_count>0)
|
||||
edev->freeze_count--;
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* eeh_thread_launcher
|
||||
* eeh_thread_launcher - Start kernel thread to handle EEH events
|
||||
* @dummy - unused
|
||||
*
|
||||
* This routine is called to start the kernel thread for processing
|
||||
* EEH event.
|
||||
*/
|
||||
static void eeh_thread_launcher(struct work_struct *dummy)
|
||||
{
|
||||
@ -112,18 +116,18 @@ static void eeh_thread_launcher(struct work_struct *dummy)
|
||||
}
|
||||
|
||||
/**
|
||||
* eeh_send_failure_event - generate a PCI error event
|
||||
* @dev pci device
|
||||
* eeh_send_failure_event - Generate a PCI error event
|
||||
* @edev: EEH device
|
||||
*
|
||||
* This routine can be called within an interrupt context;
|
||||
* the actual event will be delivered in a normal context
|
||||
* (from a workqueue).
|
||||
*/
|
||||
int eeh_send_failure_event (struct device_node *dn,
|
||||
struct pci_dev *dev)
|
||||
int eeh_send_failure_event(struct eeh_dev *edev)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct eeh_event *event;
|
||||
struct device_node *dn = eeh_dev_to_of_node(edev);
|
||||
const char *location;
|
||||
|
||||
if (!mem_init_done) {
|
||||
@ -135,15 +139,14 @@ int eeh_send_failure_event (struct device_node *dn,
|
||||
}
|
||||
event = kmalloc(sizeof(*event), GFP_ATOMIC);
|
||||
if (event == NULL) {
|
||||
printk (KERN_ERR "EEH: out of memory, event not handled\n");
|
||||
printk(KERN_ERR "EEH: out of memory, event not handled\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (dev)
|
||||
pci_dev_get(dev);
|
||||
if (edev->pdev)
|
||||
pci_dev_get(edev->pdev);
|
||||
|
||||
event->dn = dn;
|
||||
event->dev = dev;
|
||||
event->edev = edev;
|
||||
|
||||
/* We may or may not be called in an interrupt context */
|
||||
spin_lock_irqsave(&eeh_eventlist_lock, flags);
|
||||
@ -154,5 +157,3 @@ int eeh_send_failure_event (struct device_node *dn,
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/********************** END OF FILE ******************************/
|
||||
|
565
arch/powerpc/platforms/pseries/eeh_pseries.c
Normal file
565
arch/powerpc/platforms/pseries/eeh_pseries.c
Normal file
@ -0,0 +1,565 @@
|
||||
/*
|
||||
* The file intends to implement the platform dependent EEH operations on pseries.
|
||||
* Actually, the pseries platform is built based on RTAS heavily. That means the
|
||||
* pseries platform dependent EEH operations will be built on RTAS calls. The functions
|
||||
* are devired from arch/powerpc/platforms/pseries/eeh.c and necessary cleanup has
|
||||
* been done.
|
||||
*
|
||||
* Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2011.
|
||||
* Copyright IBM Corporation 2001, 2005, 2006
|
||||
* Copyright Dave Engebretsen & Todd Inglett 2001
|
||||
* Copyright Linas Vepstas 2005, 2006
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/spinlock.h>
|
||||
|
||||
#include <asm/eeh.h>
|
||||
#include <asm/eeh_event.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/machdep.h>
|
||||
#include <asm/ppc-pci.h>
|
||||
#include <asm/rtas.h>
|
||||
|
||||
/* RTAS tokens */
|
||||
static int ibm_set_eeh_option;
|
||||
static int ibm_set_slot_reset;
|
||||
static int ibm_read_slot_reset_state;
|
||||
static int ibm_read_slot_reset_state2;
|
||||
static int ibm_slot_error_detail;
|
||||
static int ibm_get_config_addr_info;
|
||||
static int ibm_get_config_addr_info2;
|
||||
static int ibm_configure_bridge;
|
||||
static int ibm_configure_pe;
|
||||
|
||||
/*
|
||||
* Buffer for reporting slot-error-detail rtas calls. Its here
|
||||
* in BSS, and not dynamically alloced, so that it ends up in
|
||||
* RMO where RTAS can access it.
|
||||
*/
|
||||
static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
|
||||
static DEFINE_SPINLOCK(slot_errbuf_lock);
|
||||
static int eeh_error_buf_size;
|
||||
|
||||
/**
|
||||
* pseries_eeh_init - EEH platform dependent initialization
|
||||
*
|
||||
* EEH platform dependent initialization on pseries.
|
||||
*/
|
||||
static int pseries_eeh_init(void)
|
||||
{
|
||||
/* figure out EEH RTAS function call tokens */
|
||||
ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
|
||||
ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
|
||||
ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
|
||||
ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
|
||||
ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
|
||||
ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2");
|
||||
ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");
|
||||
ibm_configure_pe = rtas_token("ibm,configure-pe");
|
||||
ibm_configure_bridge = rtas_token ("ibm,configure-bridge");
|
||||
|
||||
/* necessary sanity check */
|
||||
if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) {
|
||||
pr_warning("%s: RTAS service <ibm,set-eeh-option> invalid\n",
|
||||
__func__);
|
||||
return -EINVAL;
|
||||
} else if (ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE) {
|
||||
pr_warning("%s: RTAS service <ibm, set-slot-reset> invalid\n",
|
||||
__func__);
|
||||
return -EINVAL;
|
||||
} else if (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE &&
|
||||
ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) {
|
||||
pr_warning("%s: RTAS service <ibm,read-slot-reset-state2> and "
|
||||
"<ibm,read-slot-reset-state> invalid\n",
|
||||
__func__);
|
||||
return -EINVAL;
|
||||
} else if (ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE) {
|
||||
pr_warning("%s: RTAS service <ibm,slot-error-detail> invalid\n",
|
||||
__func__);
|
||||
return -EINVAL;
|
||||
} else if (ibm_get_config_addr_info2 == RTAS_UNKNOWN_SERVICE &&
|
||||
ibm_get_config_addr_info == RTAS_UNKNOWN_SERVICE) {
|
||||
pr_warning("%s: RTAS service <ibm,get-config-addr-info2> and "
|
||||
"<ibm,get-config-addr-info> invalid\n",
|
||||
__func__);
|
||||
return -EINVAL;
|
||||
} else if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE &&
|
||||
ibm_configure_bridge == RTAS_UNKNOWN_SERVICE) {
|
||||
pr_warning("%s: RTAS service <ibm,configure-pe> and "
|
||||
"<ibm,configure-bridge> invalid\n",
|
||||
__func__);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Initialize error log lock and size */
|
||||
spin_lock_init(&slot_errbuf_lock);
|
||||
eeh_error_buf_size = rtas_token("rtas-error-log-max");
|
||||
if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
|
||||
pr_warning("%s: unknown EEH error log size\n",
|
||||
__func__);
|
||||
eeh_error_buf_size = 1024;
|
||||
} else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
|
||||
pr_warning("%s: EEH error log size %d exceeds the maximal %d\n",
|
||||
__func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
|
||||
eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* pseries_eeh_set_option - Initialize EEH or MMIO/DMA reenable
|
||||
* @dn: device node
|
||||
* @option: operation to be issued
|
||||
*
|
||||
* The function is used to control the EEH functionality globally.
|
||||
* Currently, following options are support according to PAPR:
|
||||
* Enable EEH, Disable EEH, Enable MMIO and Enable DMA
|
||||
*/
|
||||
static int pseries_eeh_set_option(struct device_node *dn, int option)
|
||||
{
|
||||
int ret = 0;
|
||||
struct eeh_dev *edev;
|
||||
const u32 *reg;
|
||||
int config_addr;
|
||||
|
||||
edev = of_node_to_eeh_dev(dn);
|
||||
|
||||
/*
|
||||
* When we're enabling or disabling EEH functioality on
|
||||
* the particular PE, the PE config address is possibly
|
||||
* unavailable. Therefore, we have to figure it out from
|
||||
* the FDT node.
|
||||
*/
|
||||
switch (option) {
|
||||
case EEH_OPT_DISABLE:
|
||||
case EEH_OPT_ENABLE:
|
||||
reg = of_get_property(dn, "reg", NULL);
|
||||
config_addr = reg[0];
|
||||
break;
|
||||
|
||||
case EEH_OPT_THAW_MMIO:
|
||||
case EEH_OPT_THAW_DMA:
|
||||
config_addr = edev->config_addr;
|
||||
if (edev->pe_config_addr)
|
||||
config_addr = edev->pe_config_addr;
|
||||
break;
|
||||
|
||||
default:
|
||||
pr_err("%s: Invalid option %d\n",
|
||||
__func__, option);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
|
||||
config_addr, BUID_HI(edev->phb->buid),
|
||||
BUID_LO(edev->phb->buid), option);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* pseries_eeh_get_pe_addr - Retrieve PE address
|
||||
* @dn: device node
|
||||
*
|
||||
* Retrieve the assocated PE address. Actually, there're 2 RTAS
|
||||
* function calls dedicated for the purpose. We need implement
|
||||
* it through the new function and then the old one. Besides,
|
||||
* you should make sure the config address is figured out from
|
||||
* FDT node before calling the function.
|
||||
*
|
||||
* It's notable that zero'ed return value means invalid PE config
|
||||
* address.
|
||||
*/
|
||||
static int pseries_eeh_get_pe_addr(struct device_node *dn)
|
||||
{
|
||||
struct eeh_dev *edev;
|
||||
int ret = 0;
|
||||
int rets[3];
|
||||
|
||||
edev = of_node_to_eeh_dev(dn);
|
||||
|
||||
if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
|
||||
/*
|
||||
* First of all, we need to make sure there has one PE
|
||||
* associated with the device. Otherwise, PE address is
|
||||
* meaningless.
|
||||
*/
|
||||
ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
|
||||
edev->config_addr, BUID_HI(edev->phb->buid),
|
||||
BUID_LO(edev->phb->buid), 1);
|
||||
if (ret || (rets[0] == 0))
|
||||
return 0;
|
||||
|
||||
/* Retrieve the associated PE config address */
|
||||
ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
|
||||
edev->config_addr, BUID_HI(edev->phb->buid),
|
||||
BUID_LO(edev->phb->buid), 0);
|
||||
if (ret) {
|
||||
pr_warning("%s: Failed to get PE address for %s\n",
|
||||
__func__, dn->full_name);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return rets[0];
|
||||
}
|
||||
|
||||
if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
|
||||
ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
|
||||
edev->config_addr, BUID_HI(edev->phb->buid),
|
||||
BUID_LO(edev->phb->buid), 0);
|
||||
if (ret) {
|
||||
pr_warning("%s: Failed to get PE address for %s\n",
|
||||
__func__, dn->full_name);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return rets[0];
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* pseries_eeh_get_state - Retrieve PE state
|
||||
* @dn: PE associated device node
|
||||
* @state: return value
|
||||
*
|
||||
* Retrieve the state of the specified PE. On RTAS compliant
|
||||
* pseries platform, there already has one dedicated RTAS function
|
||||
* for the purpose. It's notable that the associated PE config address
|
||||
* might be ready when calling the function. Therefore, endeavour to
|
||||
* use the PE config address if possible. Further more, there're 2
|
||||
* RTAS calls for the purpose, we need to try the new one and back
|
||||
* to the old one if the new one couldn't work properly.
|
||||
*/
|
||||
static int pseries_eeh_get_state(struct device_node *dn, int *state)
|
||||
{
|
||||
struct eeh_dev *edev;
|
||||
int config_addr;
|
||||
int ret;
|
||||
int rets[4];
|
||||
int result;
|
||||
|
||||
/* Figure out PE config address if possible */
|
||||
edev = of_node_to_eeh_dev(dn);
|
||||
config_addr = edev->config_addr;
|
||||
if (edev->pe_config_addr)
|
||||
config_addr = edev->pe_config_addr;
|
||||
|
||||
if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
|
||||
ret = rtas_call(ibm_read_slot_reset_state2, 3, 4, rets,
|
||||
config_addr, BUID_HI(edev->phb->buid),
|
||||
BUID_LO(edev->phb->buid));
|
||||
} else if (ibm_read_slot_reset_state != RTAS_UNKNOWN_SERVICE) {
|
||||
/* Fake PE unavailable info */
|
||||
rets[2] = 0;
|
||||
ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets,
|
||||
config_addr, BUID_HI(edev->phb->buid),
|
||||
BUID_LO(edev->phb->buid));
|
||||
} else {
|
||||
return EEH_STATE_NOT_SUPPORT;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Parse the result out */
|
||||
result = 0;
|
||||
if (rets[1]) {
|
||||
switch(rets[0]) {
|
||||
case 0:
|
||||
result &= ~EEH_STATE_RESET_ACTIVE;
|
||||
result |= EEH_STATE_MMIO_ACTIVE;
|
||||
result |= EEH_STATE_DMA_ACTIVE;
|
||||
break;
|
||||
case 1:
|
||||
result |= EEH_STATE_RESET_ACTIVE;
|
||||
result |= EEH_STATE_MMIO_ACTIVE;
|
||||
result |= EEH_STATE_DMA_ACTIVE;
|
||||
break;
|
||||
case 2:
|
||||
result &= ~EEH_STATE_RESET_ACTIVE;
|
||||
result &= ~EEH_STATE_MMIO_ACTIVE;
|
||||
result &= ~EEH_STATE_DMA_ACTIVE;
|
||||
break;
|
||||
case 4:
|
||||
result &= ~EEH_STATE_RESET_ACTIVE;
|
||||
result &= ~EEH_STATE_MMIO_ACTIVE;
|
||||
result &= ~EEH_STATE_DMA_ACTIVE;
|
||||
result |= EEH_STATE_MMIO_ENABLED;
|
||||
break;
|
||||
case 5:
|
||||
if (rets[2]) {
|
||||
if (state) *state = rets[2];
|
||||
result = EEH_STATE_UNAVAILABLE;
|
||||
} else {
|
||||
result = EEH_STATE_NOT_SUPPORT;
|
||||
}
|
||||
default:
|
||||
result = EEH_STATE_NOT_SUPPORT;
|
||||
}
|
||||
} else {
|
||||
result = EEH_STATE_NOT_SUPPORT;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* pseries_eeh_reset - Reset the specified PE
|
||||
* @dn: PE associated device node
|
||||
* @option: reset option
|
||||
*
|
||||
* Reset the specified PE
|
||||
*/
|
||||
static int pseries_eeh_reset(struct device_node *dn, int option)
|
||||
{
|
||||
struct eeh_dev *edev;
|
||||
int config_addr;
|
||||
int ret;
|
||||
|
||||
/* Figure out PE address */
|
||||
edev = of_node_to_eeh_dev(dn);
|
||||
config_addr = edev->config_addr;
|
||||
if (edev->pe_config_addr)
|
||||
config_addr = edev->pe_config_addr;
|
||||
|
||||
/* Reset PE through RTAS call */
|
||||
ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
|
||||
config_addr, BUID_HI(edev->phb->buid),
|
||||
BUID_LO(edev->phb->buid), option);
|
||||
|
||||
/* If fundamental-reset not supported, try hot-reset */
|
||||
if (option == EEH_RESET_FUNDAMENTAL &&
|
||||
ret == -8) {
|
||||
ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
|
||||
config_addr, BUID_HI(edev->phb->buid),
|
||||
BUID_LO(edev->phb->buid), EEH_RESET_HOT);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* pseries_eeh_wait_state - Wait for PE state
|
||||
* @dn: PE associated device node
|
||||
* @max_wait: maximal period in microsecond
|
||||
*
|
||||
* Wait for the state of associated PE. It might take some time
|
||||
* to retrieve the PE's state.
|
||||
*/
|
||||
static int pseries_eeh_wait_state(struct device_node *dn, int max_wait)
|
||||
{
|
||||
int ret;
|
||||
int mwait;
|
||||
|
||||
/*
|
||||
* According to PAPR, the state of PE might be temporarily
|
||||
* unavailable. Under the circumstance, we have to wait
|
||||
* for indicated time determined by firmware. The maximal
|
||||
* wait time is 5 minutes, which is acquired from the original
|
||||
* EEH implementation. Also, the original implementation
|
||||
* also defined the minimal wait time as 1 second.
|
||||
*/
|
||||
#define EEH_STATE_MIN_WAIT_TIME (1000)
|
||||
#define EEH_STATE_MAX_WAIT_TIME (300 * 1000)
|
||||
|
||||
while (1) {
|
||||
ret = pseries_eeh_get_state(dn, &mwait);
|
||||
|
||||
/*
|
||||
* If the PE's state is temporarily unavailable,
|
||||
* we have to wait for the specified time. Otherwise,
|
||||
* the PE's state will be returned immediately.
|
||||
*/
|
||||
if (ret != EEH_STATE_UNAVAILABLE)
|
||||
return ret;
|
||||
|
||||
if (max_wait <= 0) {
|
||||
pr_warning("%s: Timeout when getting PE's state (%d)\n",
|
||||
__func__, max_wait);
|
||||
return EEH_STATE_NOT_SUPPORT;
|
||||
}
|
||||
|
||||
if (mwait <= 0) {
|
||||
pr_warning("%s: Firmware returned bad wait value %d\n",
|
||||
__func__, mwait);
|
||||
mwait = EEH_STATE_MIN_WAIT_TIME;
|
||||
} else if (mwait > EEH_STATE_MAX_WAIT_TIME) {
|
||||
pr_warning("%s: Firmware returned too long wait value %d\n",
|
||||
__func__, mwait);
|
||||
mwait = EEH_STATE_MAX_WAIT_TIME;
|
||||
}
|
||||
|
||||
max_wait -= mwait;
|
||||
msleep(mwait);
|
||||
}
|
||||
|
||||
return EEH_STATE_NOT_SUPPORT;
|
||||
}
|
||||
|
||||
/**
|
||||
* pseries_eeh_get_log - Retrieve error log
|
||||
* @dn: device node
|
||||
* @severity: temporary or permanent error log
|
||||
* @drv_log: driver log to be combined with retrieved error log
|
||||
* @len: length of driver log
|
||||
*
|
||||
* Retrieve the temporary or permanent error from the PE.
|
||||
* Actually, the error will be retrieved through the dedicated
|
||||
* RTAS call.
|
||||
*/
|
||||
static int pseries_eeh_get_log(struct device_node *dn, int severity, char *drv_log, unsigned long len)
|
||||
{
|
||||
struct eeh_dev *edev;
|
||||
int config_addr;
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
edev = of_node_to_eeh_dev(dn);
|
||||
spin_lock_irqsave(&slot_errbuf_lock, flags);
|
||||
memset(slot_errbuf, 0, eeh_error_buf_size);
|
||||
|
||||
/* Figure out the PE address */
|
||||
config_addr = edev->config_addr;
|
||||
if (edev->pe_config_addr)
|
||||
config_addr = edev->pe_config_addr;
|
||||
|
||||
ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, config_addr,
|
||||
BUID_HI(edev->phb->buid), BUID_LO(edev->phb->buid),
|
||||
virt_to_phys(drv_log), len,
|
||||
virt_to_phys(slot_errbuf), eeh_error_buf_size,
|
||||
severity);
|
||||
if (!ret)
|
||||
log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0);
|
||||
spin_unlock_irqrestore(&slot_errbuf_lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* pseries_eeh_configure_bridge - Configure PCI bridges in the indicated PE
|
||||
* @dn: PE associated device node
|
||||
*
|
||||
* The function will be called to reconfigure the bridges included
|
||||
* in the specified PE so that the mulfunctional PE would be recovered
|
||||
* again.
|
||||
*/
|
||||
static int pseries_eeh_configure_bridge(struct device_node *dn)
|
||||
{
|
||||
struct eeh_dev *edev;
|
||||
int config_addr;
|
||||
int ret;
|
||||
|
||||
/* Figure out the PE address */
|
||||
edev = of_node_to_eeh_dev(dn);
|
||||
config_addr = edev->config_addr;
|
||||
if (edev->pe_config_addr)
|
||||
config_addr = edev->pe_config_addr;
|
||||
|
||||
/* Use new configure-pe function, if supported */
|
||||
if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) {
|
||||
ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
|
||||
config_addr, BUID_HI(edev->phb->buid),
|
||||
BUID_LO(edev->phb->buid));
|
||||
} else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) {
|
||||
ret = rtas_call(ibm_configure_bridge, 3, 1, NULL,
|
||||
config_addr, BUID_HI(edev->phb->buid),
|
||||
BUID_LO(edev->phb->buid));
|
||||
} else {
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
pr_warning("%s: Unable to configure bridge %d for %s\n",
|
||||
__func__, ret, dn->full_name);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* pseries_eeh_read_config - Read PCI config space
|
||||
* @dn: device node
|
||||
* @where: PCI address
|
||||
* @size: size to read
|
||||
* @val: return value
|
||||
*
|
||||
* Read config space from the speicifed device
|
||||
*/
|
||||
static int pseries_eeh_read_config(struct device_node *dn, int where, int size, u32 *val)
|
||||
{
|
||||
struct pci_dn *pdn;
|
||||
|
||||
pdn = PCI_DN(dn);
|
||||
|
||||
return rtas_read_config(pdn, where, size, val);
|
||||
}
|
||||
|
||||
/**
|
||||
* pseries_eeh_write_config - Write PCI config space
|
||||
* @dn: device node
|
||||
* @where: PCI address
|
||||
* @size: size to write
|
||||
* @val: value to be written
|
||||
*
|
||||
* Write config space to the specified device
|
||||
*/
|
||||
static int pseries_eeh_write_config(struct device_node *dn, int where, int size, u32 val)
|
||||
{
|
||||
struct pci_dn *pdn;
|
||||
|
||||
pdn = PCI_DN(dn);
|
||||
|
||||
return rtas_write_config(pdn, where, size, val);
|
||||
}
|
||||
|
||||
static struct eeh_ops pseries_eeh_ops = {
|
||||
.name = "pseries",
|
||||
.init = pseries_eeh_init,
|
||||
.set_option = pseries_eeh_set_option,
|
||||
.get_pe_addr = pseries_eeh_get_pe_addr,
|
||||
.get_state = pseries_eeh_get_state,
|
||||
.reset = pseries_eeh_reset,
|
||||
.wait_state = pseries_eeh_wait_state,
|
||||
.get_log = pseries_eeh_get_log,
|
||||
.configure_bridge = pseries_eeh_configure_bridge,
|
||||
.read_config = pseries_eeh_read_config,
|
||||
.write_config = pseries_eeh_write_config
|
||||
};
|
||||
|
||||
/**
|
||||
* eeh_pseries_init - Register platform dependent EEH operations
|
||||
*
|
||||
* EEH initialization on pseries platform. This function should be
|
||||
* called before any EEH related functions.
|
||||
*/
|
||||
int __init eeh_pseries_init(void)
|
||||
{
|
||||
return eeh_ops_register(&pseries_eeh_ops);
|
||||
}
|
@ -28,7 +28,7 @@
|
||||
#include <asm/pci-bridge.h>
|
||||
|
||||
/**
|
||||
* EEH_SHOW_ATTR -- create sysfs entry for eeh statistic
|
||||
* EEH_SHOW_ATTR -- Create sysfs entry for eeh statistic
|
||||
* @_name: name of file in sysfs directory
|
||||
* @_memb: name of member in struct pci_dn to access
|
||||
* @_format: printf format for display
|
||||
@ -41,24 +41,21 @@ static ssize_t eeh_show_##_name(struct device *dev, \
|
||||
struct device_attribute *attr, char *buf) \
|
||||
{ \
|
||||
struct pci_dev *pdev = to_pci_dev(dev); \
|
||||
struct device_node *dn = pci_device_to_OF_node(pdev); \
|
||||
struct pci_dn *pdn; \
|
||||
struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); \
|
||||
\
|
||||
if (!dn || PCI_DN(dn) == NULL) \
|
||||
return 0; \
|
||||
if (!edev) \
|
||||
return 0; \
|
||||
\
|
||||
pdn = PCI_DN(dn); \
|
||||
return sprintf(buf, _format "\n", pdn->_memb); \
|
||||
return sprintf(buf, _format "\n", edev->_memb); \
|
||||
} \
|
||||
static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL);
|
||||
|
||||
|
||||
EEH_SHOW_ATTR(eeh_mode, eeh_mode, "0x%x");
|
||||
EEH_SHOW_ATTR(eeh_config_addr, eeh_config_addr, "0x%x");
|
||||
EEH_SHOW_ATTR(eeh_pe_config_addr, eeh_pe_config_addr, "0x%x");
|
||||
EEH_SHOW_ATTR(eeh_check_count, eeh_check_count, "%d");
|
||||
EEH_SHOW_ATTR(eeh_freeze_count, eeh_freeze_count, "%d");
|
||||
EEH_SHOW_ATTR(eeh_false_positives, eeh_false_positives, "%d");
|
||||
EEH_SHOW_ATTR(eeh_mode, mode, "0x%x");
|
||||
EEH_SHOW_ATTR(eeh_config_addr, config_addr, "0x%x");
|
||||
EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x");
|
||||
EEH_SHOW_ATTR(eeh_check_count, check_count, "%d" );
|
||||
EEH_SHOW_ATTR(eeh_freeze_count, freeze_count, "%d" );
|
||||
EEH_SHOW_ATTR(eeh_false_positives, false_positives, "%d" );
|
||||
|
||||
void eeh_sysfs_add_device(struct pci_dev *pdev)
|
||||
{
|
||||
|
@ -217,7 +217,7 @@ static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
|
||||
if (!dn)
|
||||
return NULL;
|
||||
|
||||
dn = find_device_pe(dn);
|
||||
dn = eeh_find_device_pe(dn);
|
||||
if (!dn)
|
||||
return NULL;
|
||||
|
||||
|
@ -147,6 +147,9 @@ struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn)
|
||||
|
||||
pci_devs_phb_init_dynamic(phb);
|
||||
|
||||
/* Create EEH devices for the PHB */
|
||||
eeh_dev_phb_init_dynamic(phb);
|
||||
|
||||
if (dn->child)
|
||||
eeh_add_device_tree_early(dn);
|
||||
|
||||
|
@ -260,8 +260,12 @@ static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long act
|
||||
switch (action) {
|
||||
case PSERIES_RECONFIG_ADD:
|
||||
pci = np->parent->data;
|
||||
if (pci)
|
||||
if (pci) {
|
||||
update_dn_pci_info(np, pci->phb);
|
||||
|
||||
/* Create EEH device for the OF node */
|
||||
eeh_dev_init(np, pci->phb);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
err = NOTIFY_DONE;
|
||||
@ -381,6 +385,7 @@ static void __init pSeries_setup_arch(void)
|
||||
|
||||
/* Find and initialize PCI host bridges */
|
||||
init_pci_config_tokens();
|
||||
eeh_pseries_init();
|
||||
find_and_init_phbs();
|
||||
pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb);
|
||||
eeh_init();
|
||||
|
@ -58,6 +58,9 @@ struct device_node {
|
||||
struct kref kref;
|
||||
unsigned long _flags;
|
||||
void *data;
|
||||
#if defined(CONFIG_EEH)
|
||||
struct eeh_dev *edev;
|
||||
#endif
|
||||
#if defined(CONFIG_SPARC)
|
||||
char *path_component_name;
|
||||
unsigned int unique_id;
|
||||
@ -72,6 +75,13 @@ struct of_phandle_args {
|
||||
uint32_t args[MAX_PHANDLE_ARGS];
|
||||
};
|
||||
|
||||
#if defined(CONFIG_EEH)
|
||||
static inline struct eeh_dev *of_node_to_eeh_dev(struct device_node *dn)
|
||||
{
|
||||
return dn->edev;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_SPARC) || !defined(CONFIG_OF)
|
||||
/* Dummy ref counting routines - to be implemented later */
|
||||
static inline struct device_node *of_node_get(struct device_node *node)
|
||||
|
@ -1647,6 +1647,13 @@ static inline void pci_set_bus_of_node(struct pci_bus *bus) { }
|
||||
static inline void pci_release_bus_of_node(struct pci_bus *bus) { }
|
||||
#endif /* CONFIG_OF */
|
||||
|
||||
#ifdef CONFIG_EEH
|
||||
static inline struct eeh_dev *pci_dev_to_eeh_dev(struct pci_dev *pdev)
|
||||
{
|
||||
return pdev->dev.archdata.edev;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* pci_find_upstream_pcie_bridge - find upstream PCIe-to-PCI bridge of a device
|
||||
* @pdev: the PCI device
|
||||
|
Loading…
Reference in New Issue
Block a user