iwlwifi: pcie: remove non-responsive device
If we fail to to grab NIC access because the device is not responding (i.e. CSR_GP_CNTRL returns 0xFFFFFFFF), remove the device from the PCI bus, to avoid any further damage, and to let the user space rescan. In order to inform the userspace that a rescan is needed, we send a kobject uevent with "INACCESSIBLE". This functionality is disabled by default, but can be enabled via a new module parameter called "remove_when_gone". In the future we may change this module parameter to include 3 modes instead: do nothing; auto-rescan or; send uevent. Signed-off-by: Luca Coelho <luciano.coelho@intel.com> Signed-off-by: Rajat Jain <rajatja@google.com>
This commit is contained in:
parent
de460ddd8b
commit
49564a806f
@ -1850,3 +1850,9 @@ MODULE_PARM_DESC(d0i3_timeout, "Timeout to D0i3 entry when idle (ms)");
|
||||
|
||||
module_param_named(disable_11ac, iwlwifi_mod_params.disable_11ac, bool, 0444);
|
||||
MODULE_PARM_DESC(disable_11ac, "Disable VHT capabilities (default: false)");
|
||||
|
||||
module_param_named(remove_when_gone,
|
||||
iwlwifi_mod_params.remove_when_gone, bool,
|
||||
0444);
|
||||
MODULE_PARM_DESC(remove_when_gone,
|
||||
"Remove dev from PCIe bus if it is deemed inaccessible (default: false)");
|
||||
|
@ -122,6 +122,7 @@ enum iwl_uapsd_disable {
|
||||
* @lar_disable: disable LAR (regulatory), default = 0
|
||||
* @fw_monitor: allow to use firmware monitor
|
||||
* @disable_11ac: disable VHT capabilities, default = false.
|
||||
* @remove_when_gone: remove an inaccessible device from the PCIe bus.
|
||||
*/
|
||||
struct iwl_mod_params {
|
||||
int swcrypto;
|
||||
@ -143,6 +144,7 @@ struct iwl_mod_params {
|
||||
bool lar_disable;
|
||||
bool fw_monitor;
|
||||
bool disable_11ac;
|
||||
bool remove_when_gone;
|
||||
};
|
||||
|
||||
#endif /* #__iwl_modparams_h__ */
|
||||
|
@ -383,6 +383,8 @@ struct iwl_self_init_dram {
|
||||
* @hw_init_mask: initial unmasked hw causes
|
||||
* @fh_mask: current unmasked fh causes
|
||||
* @hw_mask: current unmasked hw causes
|
||||
* @in_rescan: true if we have triggered a device rescan
|
||||
* @scheduled_for_removal: true if we have scheduled a device removal
|
||||
*/
|
||||
struct iwl_trans_pcie {
|
||||
struct iwl_rxq *rxq;
|
||||
@ -464,6 +466,9 @@ struct iwl_trans_pcie {
|
||||
u32 fh_mask;
|
||||
u32 hw_mask;
|
||||
cpumask_t affinity_mask[IWL_MAX_RX_HW_QUEUES];
|
||||
u16 tx_cmd_queue_size;
|
||||
bool in_rescan;
|
||||
bool scheduled_for_removal;
|
||||
};
|
||||
|
||||
static inline struct iwl_trans_pcie *
|
||||
|
@ -75,6 +75,7 @@
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include "iwl-drv.h"
|
||||
#include "iwl-trans.h"
|
||||
@ -1935,6 +1936,29 @@ static void iwl_trans_pcie_set_pmi(struct iwl_trans *trans, bool state)
|
||||
clear_bit(STATUS_TPOWER_PMI, &trans->status);
|
||||
}
|
||||
|
||||
struct iwl_trans_pcie_removal {
|
||||
struct pci_dev *pdev;
|
||||
struct work_struct work;
|
||||
};
|
||||
|
||||
static void iwl_trans_pcie_removal_wk(struct work_struct *wk)
|
||||
{
|
||||
struct iwl_trans_pcie_removal *removal =
|
||||
container_of(wk, struct iwl_trans_pcie_removal, work);
|
||||
struct pci_dev *pdev = removal->pdev;
|
||||
char *prop[] = {"EVENT=INACCESSIBLE", NULL};
|
||||
|
||||
dev_err(&pdev->dev, "Device gone - attempting removal\n");
|
||||
kobject_uevent_env(&pdev->dev.kobj, KOBJ_CHANGE, prop);
|
||||
pci_lock_rescan_remove();
|
||||
pci_dev_put(pdev);
|
||||
pci_stop_and_remove_bus_device(pdev);
|
||||
pci_unlock_rescan_remove();
|
||||
|
||||
kfree(removal);
|
||||
module_put(THIS_MODULE);
|
||||
}
|
||||
|
||||
static bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans,
|
||||
unsigned long *flags)
|
||||
{
|
||||
@ -1977,11 +2001,55 @@ static bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans,
|
||||
(BIT(trans->cfg->csr->flag_mac_clock_ready) |
|
||||
CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP), 15000);
|
||||
if (unlikely(ret < 0)) {
|
||||
iwl_trans_pcie_dump_regs(trans);
|
||||
iwl_write32(trans, CSR_RESET, CSR_RESET_REG_FLAG_FORCE_NMI);
|
||||
u32 cntrl = iwl_read32(trans, CSR_GP_CNTRL);
|
||||
|
||||
WARN_ONCE(1,
|
||||
"Timeout waiting for hardware access (CSR_GP_CNTRL 0x%08x)\n",
|
||||
iwl_read32(trans, CSR_GP_CNTRL));
|
||||
cntrl);
|
||||
|
||||
iwl_trans_pcie_dump_regs(trans);
|
||||
|
||||
if (iwlwifi_mod_params.remove_when_gone && cntrl == ~0U) {
|
||||
struct iwl_trans_pcie_removal *removal;
|
||||
|
||||
if (trans_pcie->scheduled_for_removal)
|
||||
goto err;
|
||||
|
||||
IWL_ERR(trans, "Device gone - scheduling removal!\n");
|
||||
|
||||
/*
|
||||
* get a module reference to avoid doing this
|
||||
* while unloading anyway and to avoid
|
||||
* scheduling a work with code that's being
|
||||
* removed.
|
||||
*/
|
||||
if (!try_module_get(THIS_MODULE)) {
|
||||
IWL_ERR(trans,
|
||||
"Module is being unloaded - abort\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
removal = kzalloc(sizeof(*removal), GFP_ATOMIC);
|
||||
if (!removal) {
|
||||
module_put(THIS_MODULE);
|
||||
goto err;
|
||||
}
|
||||
/*
|
||||
* we don't need to clear this flag, because
|
||||
* the trans will be freed and reallocated.
|
||||
*/
|
||||
trans_pcie->scheduled_for_removal = true;
|
||||
|
||||
removal->pdev = to_pci_dev(trans->dev);
|
||||
INIT_WORK(&removal->work, iwl_trans_pcie_removal_wk);
|
||||
pci_dev_get(removal->pdev);
|
||||
schedule_work(&removal->work);
|
||||
} else {
|
||||
iwl_write32(trans, CSR_RESET,
|
||||
CSR_RESET_REG_FLAG_FORCE_NMI);
|
||||
}
|
||||
|
||||
err:
|
||||
spin_unlock_irqrestore(&trans_pcie->reg_lock, *flags);
|
||||
return false;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user