From 3134689f98f9e09004a4727370adc46e7635b4be Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 15 Oct 2021 13:58:40 -0500 Subject: [PATCH 1/5] PCI/portdrv: Rename pm_iter() to pcie_port_device_iter() Rename pm_iter() to pcie_port_device_iter() and make it visible outside CONFIG_PM and portdrv_core.c so it can be used for pciehp slot reset recovery. [bhelgaas: split into its own patch] Link: https://lore.kernel.org/linux-pci/08c046b0-c9f2-3489-eeef-7e7aca435bb9@gmail.com/ Link: https://lore.kernel.org/r/251f4edcc04c14f873ff1c967bc686169cd07d2d.1627638184.git.lukas@wunner.de Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/portdrv.h | 1 + drivers/pci/pcie/portdrv_core.c | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 2ff5724b8f13..6126ee4676a7 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -110,6 +110,7 @@ void pcie_port_service_unregister(struct pcie_port_service_driver *new); extern struct bus_type pcie_port_bus_type; int pcie_port_device_register(struct pci_dev *dev); +int pcie_port_device_iter(struct device *dev, void *data); #ifdef CONFIG_PM int pcie_port_device_suspend(struct device *dev); int pcie_port_device_resume_noirq(struct device *dev); diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 3ee63968deaa..604feeb84ee4 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -367,24 +367,24 @@ error_disable: return status; } -#ifdef CONFIG_PM -typedef int (*pcie_pm_callback_t)(struct pcie_device *); +typedef int (*pcie_callback_t)(struct pcie_device *); -static int pm_iter(struct device *dev, void *data) +int pcie_port_device_iter(struct device *dev, void *data) { struct pcie_port_service_driver *service_driver; size_t offset = *(size_t *)data; - pcie_pm_callback_t cb; + pcie_callback_t cb; if ((dev->bus == &pcie_port_bus_type) && dev->driver) { service_driver = to_service_driver(dev->driver); - cb = *(pcie_pm_callback_t *)((void *)service_driver + offset); + cb = *(pcie_callback_t *)((void *)service_driver + offset); if (cb) return cb(to_pcie_device(dev)); } return 0; } +#ifdef CONFIG_PM /** * pcie_port_device_suspend - suspend port services associated with a PCIe port * @dev: PCI Express port to handle @@ -392,13 +392,13 @@ static int pm_iter(struct device *dev, void *data) int pcie_port_device_suspend(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, suspend); - return device_for_each_child(dev, &off, pm_iter); + return device_for_each_child(dev, &off, pcie_port_device_iter); } int pcie_port_device_resume_noirq(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, resume_noirq); - return device_for_each_child(dev, &off, pm_iter); + return device_for_each_child(dev, &off, pcie_port_device_iter); } /** @@ -408,7 +408,7 @@ int pcie_port_device_resume_noirq(struct device *dev) int pcie_port_device_resume(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, resume); - return device_for_each_child(dev, &off, pm_iter); + return device_for_each_child(dev, &off, pcie_port_device_iter); } /** @@ -418,7 +418,7 @@ int pcie_port_device_resume(struct device *dev) int pcie_port_device_runtime_suspend(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, runtime_suspend); - return device_for_each_child(dev, &off, pm_iter); + return device_for_each_child(dev, &off, pcie_port_device_iter); } /** @@ -428,7 +428,7 @@ int pcie_port_device_runtime_suspend(struct device *dev) int pcie_port_device_runtime_resume(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, runtime_resume); - return device_for_each_child(dev, &off, pm_iter); + return device_for_each_child(dev, &off, pcie_port_device_iter); } #endif /* PM */ From ea401499e943c307e6d44af6c2b4e068643e7884 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 31 Jul 2021 14:39:01 +0200 Subject: [PATCH 2/5] PCI: pciehp: Ignore Link Down/Up caused by error-induced Hot Reset Stuart Hayes reports that an error handled by DPC at a Root Port results in pciehp gratuitously bringing down a subordinate hotplug port: RP -- UP -- DP -- UP -- DP (hotplug) -- EP pciehp brings the slot down because the Link to the Endpoint goes down. That is caused by a Hot Reset being propagated as a result of DPC. Per PCIe Base Spec 5.0, section 6.6.1 "Conventional Reset": For a Switch, the following must cause a hot reset to be sent on all Downstream Ports: [...] * The Data Link Layer of the Upstream Port reporting DL_Down status. In Switches that support Link speeds greater than 5.0 GT/s, the Upstream Port must direct the LTSSM of each Downstream Port to the Hot Reset state, but not hold the LTSSMs in that state. This permits each Downstream Port to begin Link training immediately after its hot reset completes. This behavior is recommended for all Switches. * Receiving a hot reset on the Upstream Port. Once DPC recovers, pcie_do_recovery() walks down the hierarchy and invokes pcie_portdrv_slot_reset() to restore each port's config space. At that point, a hotplug interrupt is signaled per PCIe Base Spec r5.0, section 6.7.3.4 "Software Notification of Hot-Plug Events": If the Port is enabled for edge-triggered interrupt signaling using MSI or MSI-X, an interrupt message must be sent every time the logical AND of the following conditions transitions from FALSE to TRUE: [...] * The Hot-Plug Interrupt Enable bit in the Slot Control register is set to 1b. * At least one hot-plug event status bit in the Slot Status register and its associated enable bit in the Slot Control register are both set to 1b. Prevent pciehp from gratuitously bringing down the slot by clearing the error-induced Data Link Layer State Changed event before restoring config space. Afterwards, check whether the link has unexpectedly failed to retrain and synthesize a DLLSC event if so. Allow each pcie_port_service_driver (one of them being pciehp) to define a slot_reset callback and re-use the existing pm_iter() function to iterate over the callbacks. Thereby, the Endpoint driver remains bound throughout error recovery and may restore the device to working state. Surprise removal during error recovery is detected through a Presence Detect Changed event. The hotplug port is expected to not signal that event as a result of a Hot Reset. The issue isn't DPC-specific, it also occurs when an error is handled by AER through aer_root_reset(). So while the issue was noticed only now, it's been around since 2006 when AER support was first introduced. [bhelgaas: drop PCI_ERROR_RECOVERY Kconfig, split pm_iter() rename to preparatory patch] Link: https://lore.kernel.org/linux-pci/08c046b0-c9f2-3489-eeef-7e7aca435bb9@gmail.com/ Fixes: 6c2b374d7485 ("PCI-Express AER implemetation: AER core and aerdriver") Link: https://lore.kernel.org/r/251f4edcc04c14f873ff1c967bc686169cd07d2d.1627638184.git.lukas@wunner.de Reported-by: Stuart Hayes Tested-by: Stuart Hayes Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v2.6.19+: ba952824e6c1: PCI/portdrv: Report reset for frozen channel Cc: Keith Busch --- drivers/pci/hotplug/pciehp.h | 2 ++ drivers/pci/hotplug/pciehp_core.c | 2 ++ drivers/pci/hotplug/pciehp_hpc.c | 26 ++++++++++++++++++++++++++ drivers/pci/pcie/portdrv.h | 2 ++ drivers/pci/pcie/portdrv_pci.c | 3 +++ 5 files changed, 35 insertions(+) diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 69fd401691be..918dccbc74b6 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -189,6 +189,8 @@ int pciehp_get_attention_status(struct hotplug_slot *hotplug_slot, u8 *status); int pciehp_set_raw_indicator_status(struct hotplug_slot *h_slot, u8 status); int pciehp_get_raw_indicator_status(struct hotplug_slot *h_slot, u8 *status); +int pciehp_slot_reset(struct pcie_device *dev); + static inline const char *slot_name(struct controller *ctrl) { return hotplug_slot_name(&ctrl->hotplug_slot); diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index ad3393930ecb..f34114d45259 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -351,6 +351,8 @@ static struct pcie_port_service_driver hpdriver_portdrv = { .runtime_suspend = pciehp_runtime_suspend, .runtime_resume = pciehp_runtime_resume, #endif /* PM */ + + .slot_reset = pciehp_slot_reset, }; int __init pcie_hp_init(void) diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 3024d7e85e6a..83a0fa119cae 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -862,6 +862,32 @@ void pcie_disable_interrupt(struct controller *ctrl) pcie_write_cmd(ctrl, 0, mask); } +/** + * pciehp_slot_reset() - ignore link event caused by error-induced hot reset + * @dev: PCI Express port service device + * + * Called from pcie_portdrv_slot_reset() after AER or DPC initiated a reset + * further up in the hierarchy to recover from an error. The reset was + * propagated down to this hotplug port. Ignore the resulting link flap. + * If the link failed to retrain successfully, synthesize the ignored event. + * Surprise removal during reset is detected through Presence Detect Changed. + */ +int pciehp_slot_reset(struct pcie_device *dev) +{ + struct controller *ctrl = get_service_data(dev); + + if (ctrl->state != ON_STATE) + return 0; + + pcie_capability_write_word(dev->port, PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_DLLSC); + + if (!pciehp_check_link_active(ctrl)) + pciehp_request(ctrl, PCI_EXP_SLTSTA_DLLSC); + + return 0; +} + /* * pciehp has a 1:1 bus:slot relationship so we ultimately want a secondary * bus reset of the bridge, but at the same time we want to ensure that it is diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 6126ee4676a7..41fe1ffd5907 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -85,6 +85,8 @@ struct pcie_port_service_driver { int (*runtime_suspend)(struct pcie_device *dev); int (*runtime_resume)(struct pcie_device *dev); + int (*slot_reset)(struct pcie_device *dev); + /* Device driver may resume normal operations */ void (*error_resume)(struct pci_dev *dev); diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index c7ff1eea225a..1af74c3d9d5d 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -160,6 +160,9 @@ static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev, static pci_ers_result_t pcie_portdrv_slot_reset(struct pci_dev *dev) { + size_t off = offsetof(struct pcie_port_service_driver, slot_reset); + device_for_each_child(&dev->dev, &off, pcie_port_device_iter); + pci_restore_state(dev); pci_save_state(dev); return PCI_ERS_RESULT_RECOVERED; From 80dcd36c388a34d5c2d0b9c8c171887903dbefbb Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 31 Jul 2021 14:39:02 +0200 Subject: [PATCH 3/5] PCI/portdrv: Remove unused resume err_handler Commit 3e41a317ae45 ("PCI/AER: Remove unused aer_error_resume()") removed the resume err_handler from AER. Since no other port service implements the callback, support for it can be removed from portdrv. It can be revived later if need be, preferably by re-using the pcie_port_device_iter() iterator. Link: https://lore.kernel.org/r/25334149b604e005058aeb0fdf51e01f991d5d74.1627638184.git.lukas@wunner.de Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Cc: Keith Busch --- drivers/pci/pcie/portdrv.h | 3 --- drivers/pci/pcie/portdrv_pci.c | 24 ------------------------ 2 files changed, 27 deletions(-) diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 41fe1ffd5907..e9fe0fef6cde 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -87,9 +87,6 @@ struct pcie_port_service_driver { int (*slot_reset)(struct pcie_device *dev); - /* Device driver may resume normal operations */ - void (*error_resume)(struct pci_dev *dev); - int port_type; /* Type of the port this driver can handle */ u32 service; /* Port service this device represents */ diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 1af74c3d9d5d..35eca6277a96 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -173,29 +173,6 @@ static pci_ers_result_t pcie_portdrv_mmio_enabled(struct pci_dev *dev) return PCI_ERS_RESULT_RECOVERED; } -static int resume_iter(struct device *device, void *data) -{ - struct pcie_device *pcie_device; - struct pcie_port_service_driver *driver; - - if (device->bus == &pcie_port_bus_type && device->driver) { - driver = to_service_driver(device->driver); - if (driver && driver->error_resume) { - pcie_device = to_pcie_device(device); - - /* Forward error message to service drivers */ - driver->error_resume(pcie_device->port); - } - } - - return 0; -} - -static void pcie_portdrv_err_resume(struct pci_dev *dev) -{ - device_for_each_child(&dev->dev, NULL, resume_iter); -} - /* * LINUX Device Driver Model */ @@ -213,7 +190,6 @@ static const struct pci_error_handlers pcie_portdrv_err_handler = { .error_detected = pcie_portdrv_error_detected, .slot_reset = pcie_portdrv_slot_reset, .mmio_enabled = pcie_portdrv_mmio_enabled, - .resume = pcie_portdrv_err_resume, }; static struct pci_driver pcie_portdriver = { From bb6951b84fb46f4048ead76c524a084e7b132463 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 31 Jul 2021 14:39:03 +0200 Subject: [PATCH 4/5] PCI/portdrv: Remove unused pcie_port_bus_{,un}register() declarations Commit c6c889d932bb ("PCI/portdrv: Remove pcie_port_bus_type link order dependency") removed pcie_port_bus_{,un}register() but erroneously retained their declarations in portdrv.h. Remove them as well. Link: https://lore.kernel.org/r/7fd76b0591c37287ab94d911d8fd9ab9a2afcd16.1627638184.git.lukas@wunner.de Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/portdrv.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index e9fe0fef6cde..0ef4bf5f811d 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -118,8 +118,6 @@ int pcie_port_device_runtime_suspend(struct device *dev); int pcie_port_device_runtime_resume(struct device *dev); #endif void pcie_port_device_remove(struct pci_dev *dev); -int __must_check pcie_port_bus_register(void); -void pcie_port_bus_unregister(void); struct pci_dev; From f9a6c8ad4922bf386c366e5ece453d459e628784 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 31 Jul 2021 14:39:04 +0200 Subject: [PATCH 5/5] PCI/ERR: Reduce compile time for CONFIG_PCIEAER=n The sole non-static function in err.c, pcie_do_recovery(), is only called from: * aer.c (if CONFIG_PCIEAER=y) * dpc.c (if CONFIG_PCIE_DPC=y, which depends on CONFIG_PCIEAER) * edr.c (if CONFIG_PCIE_EDR=y, which depends on CONFIG_PCIE_DPC) Thus, err.c need not be compiled if CONFIG_PCIEAER=n. Also, pci_uevent_ers() and pcie_clear_device_status(), which are called from err.c, can be #ifdef'ed away unless CONFIG_PCIEAER=y. Since x86_64_defconfig doesn't enable CONFIG_PCIEAER, this change may slightly reduce compile time for anyone doing a test build with that config. Link: https://lore.kernel.org/r/98f9041151268c1c035ab64cca320ad86803f64a.1627638184.git.lukas@wunner.de Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas --- drivers/pci/pci-driver.c | 2 +- drivers/pci/pci.c | 2 ++ drivers/pci/pcie/Makefile | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 2761ab86490d..b0923bdcdb2e 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -1542,7 +1542,7 @@ static int pci_uevent(struct device *dev, struct kobj_uevent_env *env) return 0; } -#if defined(CONFIG_PCIEPORTBUS) || defined(CONFIG_EEH) +#if defined(CONFIG_PCIEAER) || defined(CONFIG_EEH) /** * pci_uevent_ers - emit a uevent during recovery path of PCI device * @pdev: PCI device undergoing error recovery diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index ce2ab62b64cf..e9d95189c79b 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2218,6 +2218,7 @@ int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) } EXPORT_SYMBOL_GPL(pci_set_pcie_reset_state); +#ifdef CONFIG_PCIEAER void pcie_clear_device_status(struct pci_dev *dev) { u16 sta; @@ -2225,6 +2226,7 @@ void pcie_clear_device_status(struct pci_dev *dev) pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &sta); pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta); } +#endif /** * pcie_clear_root_pme_status - Clear root port PME interrupt status. diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile index b2980db88cc0..5783a2f79e6a 100644 --- a/drivers/pci/pcie/Makefile +++ b/drivers/pci/pcie/Makefile @@ -2,12 +2,12 @@ # # Makefile for PCI Express features and port driver -pcieportdrv-y := portdrv_core.o portdrv_pci.o err.o rcec.o +pcieportdrv-y := portdrv_core.o portdrv_pci.o rcec.o obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o obj-$(CONFIG_PCIEASPM) += aspm.o -obj-$(CONFIG_PCIEAER) += aer.o +obj-$(CONFIG_PCIEAER) += aer.o err.o obj-$(CONFIG_PCIEAER_INJECT) += aer_inject.o obj-$(CONFIG_PCIE_PME) += pme.o obj-$(CONFIG_PCIE_DPC) += dpc.o