87d0f2a553
This addresses deadlocks in these common cases in hierarchies containing two switches: - All involved ports are runtime suspended and they are unplugged. This can happen easily if the drivers involved automatically enable runtime PM (xHCI for example does that). - System is suspended (e.g., closing the lid on a laptop) with a dock + something else connected, and the dock is unplugged while suspended. These cases lead to the following deadlock: INFO: task irq/126-pciehp:198 blocked for more than 120 seconds. irq/126-pciehp D 0 198 2 0x80000000 Call Trace: schedule+0x2c/0x80 schedule_timeout+0x246/0x350 wait_for_completion+0xb7/0x140 kthread_stop+0x49/0x110 free_irq+0x32/0x70 pcie_shutdown_notification+0x2f/0x50 pciehp_remove+0x27/0x50 pcie_port_remove_service+0x36/0x50 device_release_driver+0x12/0x20 bus_remove_device+0xec/0x160 device_del+0x13b/0x350 device_unregister+0x1a/0x60 remove_iter+0x1e/0x30 device_for_each_child+0x56/0x90 pcie_port_device_remove+0x22/0x40 pcie_portdrv_remove+0x20/0x60 pci_device_remove+0x3e/0xc0 device_release_driver_internal+0x18c/0x250 device_release_driver+0x12/0x20 pci_stop_bus_device+0x6f/0x90 pci_stop_bus_device+0x31/0x90 pci_stop_and_remove_bus_device+0x12/0x20 pciehp_unconfigure_device+0x88/0x140 pciehp_disable_slot+0x6a/0x110 pciehp_handle_presence_or_link_change+0x263/0x400 pciehp_ist+0x1c9/0x1d0 irq_thread_fn+0x24/0x60 irq_thread+0xeb/0x190 kthread+0x120/0x140 INFO: task irq/190-pciehp:2288 blocked for more than 120 seconds. irq/190-pciehp D 0 2288 2 0x80000000 Call Trace: __schedule+0x2a2/0x880 schedule+0x2c/0x80 schedule_preempt_disabled+0xe/0x10 mutex_lock+0x2c/0x30 pci_lock_rescan_remove+0x15/0x20 pciehp_unconfigure_device+0x4d/0x140 pciehp_disable_slot+0x6a/0x110 pciehp_handle_presence_or_link_change+0x263/0x400 pciehp_ist+0x1c9/0x1d0 irq_thread_fn+0x24/0x60 irq_thread+0xeb/0x190 kthread+0x120/0x140 What happens here is that the whole hierarchy is runtime resumed and the parent PCIe downstream port, which got the hot-remove event, starts removing devices below it, taking pci_lock_rescan_remove() lock. When the child PCIe port is runtime resumed it calls pciehp_check_presence() which ends up calling pciehp_card_present() and pciehp_check_link_active(). Both of these use pcie_capability_read_word(), which notices that the underlying device is already gone and returns PCIBIOS_DEVICE_NOT_FOUND with the capability value set to 0. When pciehp gets this value it thinks that its child device is also hot-removed and schedules its IRQ thread to handle the event. The deadlock happens when the child's IRQ thread runs and tries to acquire pci_lock_rescan_remove() which is already taken by the parent and the parent waits for the child's IRQ thread to finish. Prevent this from happening by checking the return value of pcie_capability_read_word() and if it is PCIBIOS_DEVICE_NOT_FOUND stop performing any hot-removal activities. [bhelgaas: add common scenarios to commit log] Link: https://lore.kernel.org/r/20191029170022.57528-2-mika.westerberg@linux.intel.com Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com> Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
366 lines
9.3 KiB
C
366 lines
9.3 KiB
C
// SPDX-License-Identifier: GPL-2.0+
|
|
/*
|
|
* PCI Express Hot Plug Controller Driver
|
|
*
|
|
* Copyright (C) 1995,2001 Compaq Computer Corporation
|
|
* Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
|
|
* Copyright (C) 2001 IBM Corp.
|
|
* Copyright (C) 2003-2004 Intel Corporation
|
|
*
|
|
* All rights reserved.
|
|
*
|
|
* Send feedback to <greg@kroah.com>, <kristen.c.accardi@intel.com>
|
|
*
|
|
* Authors:
|
|
* Dan Zink <dan.zink@compaq.com>
|
|
* Greg Kroah-Hartman <greg@kroah.com>
|
|
* Dely Sy <dely.l.sy@intel.com>"
|
|
*/
|
|
|
|
#define pr_fmt(fmt) "pciehp: " fmt
|
|
#define dev_fmt pr_fmt
|
|
|
|
#include <linux/moduleparam.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/types.h>
|
|
#include <linux/pci.h>
|
|
#include "pciehp.h"
|
|
|
|
#include "../pci.h"
|
|
|
|
/* Global variables */
|
|
bool pciehp_poll_mode;
|
|
int pciehp_poll_time;
|
|
|
|
/*
|
|
* not really modular, but the easiest way to keep compat with existing
|
|
* bootargs behaviour is to continue using module_param here.
|
|
*/
|
|
module_param(pciehp_poll_mode, bool, 0644);
|
|
module_param(pciehp_poll_time, int, 0644);
|
|
MODULE_PARM_DESC(pciehp_poll_mode, "Using polling mechanism for hot-plug events or not");
|
|
MODULE_PARM_DESC(pciehp_poll_time, "Polling mechanism frequency, in seconds");
|
|
|
|
static int set_attention_status(struct hotplug_slot *slot, u8 value);
|
|
static int get_power_status(struct hotplug_slot *slot, u8 *value);
|
|
static int get_latch_status(struct hotplug_slot *slot, u8 *value);
|
|
static int get_adapter_status(struct hotplug_slot *slot, u8 *value);
|
|
|
|
static int init_slot(struct controller *ctrl)
|
|
{
|
|
struct hotplug_slot_ops *ops;
|
|
char name[SLOT_NAME_SIZE];
|
|
int retval;
|
|
|
|
/* Setup hotplug slot ops */
|
|
ops = kzalloc(sizeof(*ops), GFP_KERNEL);
|
|
if (!ops)
|
|
return -ENOMEM;
|
|
|
|
ops->enable_slot = pciehp_sysfs_enable_slot;
|
|
ops->disable_slot = pciehp_sysfs_disable_slot;
|
|
ops->get_power_status = get_power_status;
|
|
ops->get_adapter_status = get_adapter_status;
|
|
ops->reset_slot = pciehp_reset_slot;
|
|
if (MRL_SENS(ctrl))
|
|
ops->get_latch_status = get_latch_status;
|
|
if (ATTN_LED(ctrl)) {
|
|
ops->get_attention_status = pciehp_get_attention_status;
|
|
ops->set_attention_status = set_attention_status;
|
|
} else if (ctrl->pcie->port->hotplug_user_indicators) {
|
|
ops->get_attention_status = pciehp_get_raw_indicator_status;
|
|
ops->set_attention_status = pciehp_set_raw_indicator_status;
|
|
}
|
|
|
|
/* register this slot with the hotplug pci core */
|
|
ctrl->hotplug_slot.ops = ops;
|
|
snprintf(name, SLOT_NAME_SIZE, "%u", PSN(ctrl));
|
|
|
|
retval = pci_hp_initialize(&ctrl->hotplug_slot,
|
|
ctrl->pcie->port->subordinate, 0, name);
|
|
if (retval) {
|
|
ctrl_err(ctrl, "pci_hp_initialize failed: error %d\n", retval);
|
|
kfree(ops);
|
|
}
|
|
return retval;
|
|
}
|
|
|
|
static void cleanup_slot(struct controller *ctrl)
|
|
{
|
|
struct hotplug_slot *hotplug_slot = &ctrl->hotplug_slot;
|
|
|
|
pci_hp_destroy(hotplug_slot);
|
|
kfree(hotplug_slot->ops);
|
|
}
|
|
|
|
/*
|
|
* set_attention_status - Turns the Attention Indicator on, off or blinking
|
|
*/
|
|
static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
|
|
{
|
|
struct controller *ctrl = to_ctrl(hotplug_slot);
|
|
struct pci_dev *pdev = ctrl->pcie->port;
|
|
|
|
if (status)
|
|
status <<= PCI_EXP_SLTCTL_ATTN_IND_SHIFT;
|
|
else
|
|
status = PCI_EXP_SLTCTL_ATTN_IND_OFF;
|
|
|
|
pci_config_pm_runtime_get(pdev);
|
|
pciehp_set_indicators(ctrl, INDICATOR_NOOP, status);
|
|
pci_config_pm_runtime_put(pdev);
|
|
return 0;
|
|
}
|
|
|
|
static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
|
|
{
|
|
struct controller *ctrl = to_ctrl(hotplug_slot);
|
|
struct pci_dev *pdev = ctrl->pcie->port;
|
|
|
|
pci_config_pm_runtime_get(pdev);
|
|
pciehp_get_power_status(ctrl, value);
|
|
pci_config_pm_runtime_put(pdev);
|
|
return 0;
|
|
}
|
|
|
|
static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
|
|
{
|
|
struct controller *ctrl = to_ctrl(hotplug_slot);
|
|
struct pci_dev *pdev = ctrl->pcie->port;
|
|
|
|
pci_config_pm_runtime_get(pdev);
|
|
pciehp_get_latch_status(ctrl, value);
|
|
pci_config_pm_runtime_put(pdev);
|
|
return 0;
|
|
}
|
|
|
|
static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
|
|
{
|
|
struct controller *ctrl = to_ctrl(hotplug_slot);
|
|
struct pci_dev *pdev = ctrl->pcie->port;
|
|
int ret;
|
|
|
|
pci_config_pm_runtime_get(pdev);
|
|
ret = pciehp_card_present_or_link_active(ctrl);
|
|
pci_config_pm_runtime_put(pdev);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
*value = ret;
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* pciehp_check_presence() - synthesize event if presence has changed
|
|
*
|
|
* On probe and resume, an explicit presence check is necessary to bring up an
|
|
* occupied slot or bring down an unoccupied slot. This can't be triggered by
|
|
* events in the Slot Status register, they may be stale and are therefore
|
|
* cleared. Secondly, sending an interrupt for "events that occur while
|
|
* interrupt generation is disabled [when] interrupt generation is subsequently
|
|
* enabled" is optional per PCIe r4.0, sec 6.7.3.4.
|
|
*/
|
|
static void pciehp_check_presence(struct controller *ctrl)
|
|
{
|
|
int occupied;
|
|
|
|
down_read(&ctrl->reset_lock);
|
|
mutex_lock(&ctrl->state_lock);
|
|
|
|
occupied = pciehp_card_present_or_link_active(ctrl);
|
|
if ((occupied > 0 && (ctrl->state == OFF_STATE ||
|
|
ctrl->state == BLINKINGON_STATE)) ||
|
|
(!occupied && (ctrl->state == ON_STATE ||
|
|
ctrl->state == BLINKINGOFF_STATE)))
|
|
pciehp_request(ctrl, PCI_EXP_SLTSTA_PDC);
|
|
|
|
mutex_unlock(&ctrl->state_lock);
|
|
up_read(&ctrl->reset_lock);
|
|
}
|
|
|
|
static int pciehp_probe(struct pcie_device *dev)
|
|
{
|
|
int rc;
|
|
struct controller *ctrl;
|
|
|
|
/* If this is not a "hotplug" service, we have no business here. */
|
|
if (dev->service != PCIE_PORT_SERVICE_HP)
|
|
return -ENODEV;
|
|
|
|
if (!dev->port->subordinate) {
|
|
/* Can happen if we run out of bus numbers during probe */
|
|
pci_err(dev->port,
|
|
"Hotplug bridge without secondary bus, ignoring\n");
|
|
return -ENODEV;
|
|
}
|
|
|
|
ctrl = pcie_init(dev);
|
|
if (!ctrl) {
|
|
pci_err(dev->port, "Controller initialization failed\n");
|
|
return -ENODEV;
|
|
}
|
|
set_service_data(dev, ctrl);
|
|
|
|
/* Setup the slot information structures */
|
|
rc = init_slot(ctrl);
|
|
if (rc) {
|
|
if (rc == -EBUSY)
|
|
ctrl_warn(ctrl, "Slot already registered by another hotplug driver\n");
|
|
else
|
|
ctrl_err(ctrl, "Slot initialization failed (%d)\n", rc);
|
|
goto err_out_release_ctlr;
|
|
}
|
|
|
|
/* Enable events after we have setup the data structures */
|
|
rc = pcie_init_notification(ctrl);
|
|
if (rc) {
|
|
ctrl_err(ctrl, "Notification initialization failed (%d)\n", rc);
|
|
goto err_out_free_ctrl_slot;
|
|
}
|
|
|
|
/* Publish to user space */
|
|
rc = pci_hp_add(&ctrl->hotplug_slot);
|
|
if (rc) {
|
|
ctrl_err(ctrl, "Publication to user space failed (%d)\n", rc);
|
|
goto err_out_shutdown_notification;
|
|
}
|
|
|
|
pciehp_check_presence(ctrl);
|
|
|
|
return 0;
|
|
|
|
err_out_shutdown_notification:
|
|
pcie_shutdown_notification(ctrl);
|
|
err_out_free_ctrl_slot:
|
|
cleanup_slot(ctrl);
|
|
err_out_release_ctlr:
|
|
pciehp_release_ctrl(ctrl);
|
|
return -ENODEV;
|
|
}
|
|
|
|
static void pciehp_remove(struct pcie_device *dev)
|
|
{
|
|
struct controller *ctrl = get_service_data(dev);
|
|
|
|
pci_hp_del(&ctrl->hotplug_slot);
|
|
pcie_shutdown_notification(ctrl);
|
|
cleanup_slot(ctrl);
|
|
pciehp_release_ctrl(ctrl);
|
|
}
|
|
|
|
#ifdef CONFIG_PM
|
|
static bool pme_is_native(struct pcie_device *dev)
|
|
{
|
|
const struct pci_host_bridge *host;
|
|
|
|
host = pci_find_host_bridge(dev->port->bus);
|
|
return pcie_ports_native || host->native_pme;
|
|
}
|
|
|
|
static void pciehp_disable_interrupt(struct pcie_device *dev)
|
|
{
|
|
/*
|
|
* Disable hotplug interrupt so that it does not trigger
|
|
* immediately when the downstream link goes down.
|
|
*/
|
|
if (pme_is_native(dev))
|
|
pcie_disable_interrupt(get_service_data(dev));
|
|
}
|
|
|
|
#ifdef CONFIG_PM_SLEEP
|
|
static int pciehp_suspend(struct pcie_device *dev)
|
|
{
|
|
/*
|
|
* If the port is already runtime suspended we can keep it that
|
|
* way.
|
|
*/
|
|
if (dev_pm_smart_suspend_and_suspended(&dev->port->dev))
|
|
return 0;
|
|
|
|
pciehp_disable_interrupt(dev);
|
|
return 0;
|
|
}
|
|
|
|
static int pciehp_resume_noirq(struct pcie_device *dev)
|
|
{
|
|
struct controller *ctrl = get_service_data(dev);
|
|
|
|
/* pci_restore_state() just wrote to the Slot Control register */
|
|
ctrl->cmd_started = jiffies;
|
|
ctrl->cmd_busy = true;
|
|
|
|
/* clear spurious events from rediscovery of inserted card */
|
|
if (ctrl->state == ON_STATE || ctrl->state == BLINKINGOFF_STATE)
|
|
pcie_clear_hotplug_events(ctrl);
|
|
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
static int pciehp_resume(struct pcie_device *dev)
|
|
{
|
|
struct controller *ctrl = get_service_data(dev);
|
|
|
|
if (pme_is_native(dev))
|
|
pcie_enable_interrupt(ctrl);
|
|
|
|
pciehp_check_presence(ctrl);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int pciehp_runtime_suspend(struct pcie_device *dev)
|
|
{
|
|
pciehp_disable_interrupt(dev);
|
|
return 0;
|
|
}
|
|
|
|
static int pciehp_runtime_resume(struct pcie_device *dev)
|
|
{
|
|
struct controller *ctrl = get_service_data(dev);
|
|
|
|
/* pci_restore_state() just wrote to the Slot Control register */
|
|
ctrl->cmd_started = jiffies;
|
|
ctrl->cmd_busy = true;
|
|
|
|
/* clear spurious events from rediscovery of inserted card */
|
|
if ((ctrl->state == ON_STATE || ctrl->state == BLINKINGOFF_STATE) &&
|
|
pme_is_native(dev))
|
|
pcie_clear_hotplug_events(ctrl);
|
|
|
|
return pciehp_resume(dev);
|
|
}
|
|
#endif /* PM */
|
|
|
|
static struct pcie_port_service_driver hpdriver_portdrv = {
|
|
.name = "pciehp",
|
|
.port_type = PCIE_ANY_PORT,
|
|
.service = PCIE_PORT_SERVICE_HP,
|
|
|
|
.probe = pciehp_probe,
|
|
.remove = pciehp_remove,
|
|
|
|
#ifdef CONFIG_PM
|
|
#ifdef CONFIG_PM_SLEEP
|
|
.suspend = pciehp_suspend,
|
|
.resume_noirq = pciehp_resume_noirq,
|
|
.resume = pciehp_resume,
|
|
#endif
|
|
.runtime_suspend = pciehp_runtime_suspend,
|
|
.runtime_resume = pciehp_runtime_resume,
|
|
#endif /* PM */
|
|
};
|
|
|
|
int __init pcie_hp_init(void)
|
|
{
|
|
int retval = 0;
|
|
|
|
retval = pcie_port_service_register(&hpdriver_portdrv);
|
|
pr_debug("pcie_port_service_register = %d\n", retval);
|
|
if (retval)
|
|
pr_debug("Failure to register service\n");
|
|
|
|
return retval;
|
|
}
|