Merge branch 'pds_core-pci-reset'

Shannon Nelson says:

====================
pds_core: add PCI reset handling

Make sure pds_core can handle and recover from PCI function resets and
similar PCI bus issues: add detection and handlers for PCI problems.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2023-09-18 09:28:22 +01:00
commit 760554a9ad
5 changed files with 101 additions and 11 deletions

View File

@ -445,12 +445,13 @@ int pdsc_setup(struct pdsc *pdsc, bool init)
goto err_out_teardown;
/* Set up the VIFs */
if (init) {
err = pdsc_viftypes_init(pdsc);
if (err)
goto err_out_teardown;
if (init)
pdsc_debugfs_add_viftype(pdsc);
}
clear_bit(PDSC_S_FW_DEAD, &pdsc->state);
return 0;
@ -469,8 +470,10 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing)
pdsc_qcq_free(pdsc, &pdsc->notifyqcq);
pdsc_qcq_free(pdsc, &pdsc->adminqcq);
if (removing) {
kfree(pdsc->viftype_status);
pdsc->viftype_status = NULL;
}
if (pdsc->intr_info) {
for (i = 0; i < pdsc->nintrs; i++)
@ -512,7 +515,7 @@ void pdsc_stop(struct pdsc *pdsc)
PDS_CORE_INTR_MASK_SET);
}
static void pdsc_fw_down(struct pdsc *pdsc)
void pdsc_fw_down(struct pdsc *pdsc)
{
union pds_core_notifyq_comp reset_event = {
.reset.ecode = cpu_to_le16(PDS_EVENT_RESET),
@ -520,10 +523,13 @@ static void pdsc_fw_down(struct pdsc *pdsc)
};
if (test_and_set_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
dev_err(pdsc->dev, "%s: already happening\n", __func__);
dev_warn(pdsc->dev, "%s: already happening\n", __func__);
return;
}
if (pdsc->pdev->is_virtfn)
return;
/* Notify clients of fw_down */
if (pdsc->fw_reporter)
devlink_health_report(pdsc->fw_reporter, "FW down reported", pdsc);
@ -533,7 +539,7 @@ static void pdsc_fw_down(struct pdsc *pdsc)
pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
}
static void pdsc_fw_up(struct pdsc *pdsc)
void pdsc_fw_up(struct pdsc *pdsc)
{
union pds_core_notifyq_comp reset_event = {
.reset.ecode = cpu_to_le16(PDS_EVENT_RESET),
@ -546,6 +552,11 @@ static void pdsc_fw_up(struct pdsc *pdsc)
return;
}
if (pdsc->pdev->is_virtfn) {
clear_bit(PDSC_S_FW_DEAD, &pdsc->state);
return;
}
err = pdsc_setup(pdsc, PDSC_SETUP_RECOVERY);
if (err)
goto err_out;
@ -567,6 +578,18 @@ err_out:
pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
}
static void pdsc_check_pci_health(struct pdsc *pdsc)
{
u8 fw_status = ioread8(&pdsc->info_regs->fw_status);
/* is PCI broken? */
if (fw_status != PDS_RC_BAD_PCI)
return;
pdsc_reset_prepare(pdsc->pdev);
pdsc_reset_done(pdsc->pdev);
}
void pdsc_health_thread(struct work_struct *work)
{
struct pdsc *pdsc = container_of(work, struct pdsc, health_work);
@ -593,6 +616,8 @@ void pdsc_health_thread(struct work_struct *work)
pdsc_fw_down(pdsc);
}
pdsc_check_pci_health(pdsc);
pdsc->fw_generation = pdsc->fw_status & PDS_CORE_FW_STS_F_GENERATION;
out_unlock:

View File

@ -283,6 +283,9 @@ int pdsc_devcmd_reset(struct pdsc *pdsc);
int pdsc_dev_reinit(struct pdsc *pdsc);
int pdsc_dev_init(struct pdsc *pdsc);
void pdsc_reset_prepare(struct pci_dev *pdev);
void pdsc_reset_done(struct pci_dev *pdev);
int pdsc_intr_alloc(struct pdsc *pdsc, char *name,
irq_handler_t handler, void *data);
void pdsc_intr_free(struct pdsc *pdsc, int index);
@ -309,4 +312,8 @@ irqreturn_t pdsc_adminq_isr(int irq, void *data);
int pdsc_firmware_update(struct pdsc *pdsc, const struct firmware *fw,
struct netlink_ext_ack *extack);
void pdsc_fw_down(struct pdsc *pdsc);
void pdsc_fw_up(struct pdsc *pdsc);
#endif /* _PDSC_H_ */

View File

@ -42,6 +42,8 @@ int pdsc_err_to_errno(enum pds_core_status_code code)
return -ERANGE;
case PDS_RC_BAD_ADDR:
return -EFAULT;
case PDS_RC_BAD_PCI:
return -ENXIO;
case PDS_RC_EOPCODE:
case PDS_RC_EINTR:
case PDS_RC_DEV_CMD:
@ -62,7 +64,7 @@ bool pdsc_is_fw_running(struct pdsc *pdsc)
/* Firmware is useful only if the running bit is set and
* fw_status != 0xff (bad PCI read)
*/
return (pdsc->fw_status != 0xff) &&
return (pdsc->fw_status != PDS_RC_BAD_PCI) &&
(pdsc->fw_status & PDS_CORE_FW_STS_F_RUNNING);
}
@ -128,6 +130,7 @@ static int pdsc_devcmd_wait(struct pdsc *pdsc, u8 opcode, int max_seconds)
unsigned long max_wait;
unsigned long duration;
int timeout = 0;
bool running;
int done = 0;
int err = 0;
int status;
@ -136,6 +139,10 @@ static int pdsc_devcmd_wait(struct pdsc *pdsc, u8 opcode, int max_seconds)
max_wait = start_time + (max_seconds * HZ);
while (!done && !timeout) {
running = pdsc_is_fw_running(pdsc);
if (!running)
break;
done = pdsc_devcmd_done(pdsc);
if (done)
break;
@ -152,7 +159,7 @@ static int pdsc_devcmd_wait(struct pdsc *pdsc, u8 opcode, int max_seconds)
dev_dbg(dev, "DEVCMD %d %s after %ld secs\n",
opcode, pdsc_devcmd_str(opcode), duration / HZ);
if (!done || timeout) {
if ((!done || timeout) && running) {
dev_err(dev, "DEVCMD %d %s timeout, done %d timeout %d max_seconds=%d\n",
opcode, pdsc_devcmd_str(opcode), done, timeout,
max_seconds);

View File

@ -445,12 +445,62 @@ static void pdsc_remove(struct pci_dev *pdev)
devlink_free(dl);
}
void pdsc_reset_prepare(struct pci_dev *pdev)
{
struct pdsc *pdsc = pci_get_drvdata(pdev);
pdsc_fw_down(pdsc);
pci_free_irq_vectors(pdev);
pdsc_unmap_bars(pdsc);
pci_release_regions(pdev);
pci_disable_device(pdev);
}
void pdsc_reset_done(struct pci_dev *pdev)
{
struct pdsc *pdsc = pci_get_drvdata(pdev);
struct device *dev = pdsc->dev;
int err;
err = pci_enable_device(pdev);
if (err) {
dev_err(dev, "Cannot enable PCI device: %pe\n", ERR_PTR(err));
return;
}
pci_set_master(pdev);
if (!pdev->is_virtfn) {
pcie_print_link_status(pdsc->pdev);
err = pci_request_regions(pdsc->pdev, PDS_CORE_DRV_NAME);
if (err) {
dev_err(pdsc->dev, "Cannot request PCI regions: %pe\n",
ERR_PTR(err));
return;
}
err = pdsc_map_bars(pdsc);
if (err)
return;
}
pdsc_fw_up(pdsc);
}
static const struct pci_error_handlers pdsc_err_handler = {
/* FLR handling */
.reset_prepare = pdsc_reset_prepare,
.reset_done = pdsc_reset_done,
};
static struct pci_driver pdsc_driver = {
.name = PDS_CORE_DRV_NAME,
.id_table = pdsc_id_table,
.probe = pdsc_probe,
.remove = pdsc_remove,
.sriov_configure = pdsc_sriov_configure,
.err_handler = &pdsc_err_handler,
};
void *pdsc_get_pf_struct(struct pci_dev *vf_pdev)

View File

@ -79,6 +79,7 @@ enum pds_core_status_code {
PDS_RC_EVFID = 31, /* VF ID does not exist */
PDS_RC_BAD_FW = 32, /* FW file is invalid or corrupted */
PDS_RC_ECLIENT = 33, /* No such client id */
PDS_RC_BAD_PCI = 255, /* Broken PCI when reading status */
};
/**