iwlwifi: pcie: remove non-responsive device

If we fail to to grab NIC access because the device is not responding
(i.e. CSR_GP_CNTRL returns 0xFFFFFFFF), remove the device from the PCI
bus, to avoid any further damage, and to let the user space rescan.

In order to inform the userspace that a rescan is needed, we send a
kobject uevent with "INACCESSIBLE".

This functionality is disabled by default, but can be enabled via a
new module parameter called "remove_when_gone".  In the future we may
change this module parameter to include 3 modes instead: do nothing;
auto-rescan or; send uevent.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Rajat Jain <rajatja@google.com>
This commit is contained in:
Luca Coelho 2017-12-12 08:58:41 +02:00
parent de460ddd8b
commit 49564a806f
4 changed files with 84 additions and 3 deletions

View File

@ -1850,3 +1850,9 @@ MODULE_PARM_DESC(d0i3_timeout, "Timeout to D0i3 entry when idle (ms)");
module_param_named(disable_11ac, iwlwifi_mod_params.disable_11ac, bool, 0444);
MODULE_PARM_DESC(disable_11ac, "Disable VHT capabilities (default: false)");
module_param_named(remove_when_gone,
iwlwifi_mod_params.remove_when_gone, bool,
0444);
MODULE_PARM_DESC(remove_when_gone,
"Remove dev from PCIe bus if it is deemed inaccessible (default: false)");

View File

@ -122,6 +122,7 @@ enum iwl_uapsd_disable {
* @lar_disable: disable LAR (regulatory), default = 0
* @fw_monitor: allow to use firmware monitor
* @disable_11ac: disable VHT capabilities, default = false.
* @remove_when_gone: remove an inaccessible device from the PCIe bus.
*/
struct iwl_mod_params {
int swcrypto;
@ -143,6 +144,7 @@ struct iwl_mod_params {
bool lar_disable;
bool fw_monitor;
bool disable_11ac;
bool remove_when_gone;
};
#endif /* #__iwl_modparams_h__ */

View File

@ -383,6 +383,8 @@ struct iwl_self_init_dram {
* @hw_init_mask: initial unmasked hw causes
* @fh_mask: current unmasked fh causes
* @hw_mask: current unmasked hw causes
* @in_rescan: true if we have triggered a device rescan
* @scheduled_for_removal: true if we have scheduled a device removal
*/
struct iwl_trans_pcie {
struct iwl_rxq *rxq;
@ -464,6 +466,9 @@ struct iwl_trans_pcie {
u32 fh_mask;
u32 hw_mask;
cpumask_t affinity_mask[IWL_MAX_RX_HW_QUEUES];
u16 tx_cmd_queue_size;
bool in_rescan;
bool scheduled_for_removal;
};
static inline struct iwl_trans_pcie *

View File

@ -75,6 +75,7 @@
#include <linux/gfp.h>
#include <linux/vmalloc.h>
#include <linux/pm_runtime.h>
#include <linux/module.h>
#include "iwl-drv.h"
#include "iwl-trans.h"
@ -1935,6 +1936,29 @@ static void iwl_trans_pcie_set_pmi(struct iwl_trans *trans, bool state)
clear_bit(STATUS_TPOWER_PMI, &trans->status);
}
struct iwl_trans_pcie_removal {
struct pci_dev *pdev;
struct work_struct work;
};
static void iwl_trans_pcie_removal_wk(struct work_struct *wk)
{
struct iwl_trans_pcie_removal *removal =
container_of(wk, struct iwl_trans_pcie_removal, work);
struct pci_dev *pdev = removal->pdev;
char *prop[] = {"EVENT=INACCESSIBLE", NULL};
dev_err(&pdev->dev, "Device gone - attempting removal\n");
kobject_uevent_env(&pdev->dev.kobj, KOBJ_CHANGE, prop);
pci_lock_rescan_remove();
pci_dev_put(pdev);
pci_stop_and_remove_bus_device(pdev);
pci_unlock_rescan_remove();
kfree(removal);
module_put(THIS_MODULE);
}
static bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans,
unsigned long *flags)
{
@ -1977,11 +2001,55 @@ static bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans,
(BIT(trans->cfg->csr->flag_mac_clock_ready) |
CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP), 15000);
if (unlikely(ret < 0)) {
iwl_trans_pcie_dump_regs(trans);
iwl_write32(trans, CSR_RESET, CSR_RESET_REG_FLAG_FORCE_NMI);
u32 cntrl = iwl_read32(trans, CSR_GP_CNTRL);
WARN_ONCE(1,
"Timeout waiting for hardware access (CSR_GP_CNTRL 0x%08x)\n",
iwl_read32(trans, CSR_GP_CNTRL));
cntrl);
iwl_trans_pcie_dump_regs(trans);
if (iwlwifi_mod_params.remove_when_gone && cntrl == ~0U) {
struct iwl_trans_pcie_removal *removal;
if (trans_pcie->scheduled_for_removal)
goto err;
IWL_ERR(trans, "Device gone - scheduling removal!\n");
/*
* get a module reference to avoid doing this
* while unloading anyway and to avoid
* scheduling a work with code that's being
* removed.
*/
if (!try_module_get(THIS_MODULE)) {
IWL_ERR(trans,
"Module is being unloaded - abort\n");
goto err;
}
removal = kzalloc(sizeof(*removal), GFP_ATOMIC);
if (!removal) {
module_put(THIS_MODULE);
goto err;
}
/*
* we don't need to clear this flag, because
* the trans will be freed and reallocated.
*/
trans_pcie->scheduled_for_removal = true;
removal->pdev = to_pci_dev(trans->dev);
INIT_WORK(&removal->work, iwl_trans_pcie_removal_wk);
pci_dev_get(removal->pdev);
schedule_work(&removal->work);
} else {
iwl_write32(trans, CSR_RESET,
CSR_RESET_REG_FLAG_FORCE_NMI);
}
err:
spin_unlock_irqrestore(&trans_pcie->reg_lock, *flags);
return false;
}