From 3567f36a09d1095bb0fb97aa686f7eabc64b45d9 Mon Sep 17 00:00:00 2001 From: Jayamohan Kallickal Date: Sat, 28 Sep 2013 15:35:58 -0700 Subject: [PATCH] [SCSI] be2iscsi: Fix AER handling in driver Signed-off-by: Minh Tran Signed-off-by: John Soni Jose Signed-off-by: Jayamohan Kallickal Signed-off-by: James Bottomley --- drivers/scsi/be2iscsi/be_cmds.c | 2 +- drivers/scsi/be2iscsi/be_iscsi.c | 68 ++++++++-- drivers/scsi/be2iscsi/be_main.c | 220 ++++++++++++++++++++++++++++--- drivers/scsi/be2iscsi/be_main.h | 9 +- 4 files changed, 270 insertions(+), 29 deletions(-) diff --git a/drivers/scsi/be2iscsi/be_cmds.c b/drivers/scsi/be2iscsi/be_cmds.c index fce298ba4b41..3338391b64de 100644 --- a/drivers/scsi/be2iscsi/be_cmds.c +++ b/drivers/scsi/be2iscsi/be_cmds.c @@ -377,7 +377,7 @@ void beiscsi_async_link_state_process(struct beiscsi_hba *phba, } else if ((evt->port_link_status & ASYNC_EVENT_LINK_UP) || ((evt->port_link_status & ASYNC_EVENT_LOGICAL) && (evt->port_fault == BEISCSI_PHY_LINK_FAULT_NONE))) { - phba->state = BE_ADAPTER_UP; + phba->state = BE_ADAPTER_LINK_UP; beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_CONFIG | BEISCSI_LOG_INIT, diff --git a/drivers/scsi/be2iscsi/be_iscsi.c b/drivers/scsi/be2iscsi/be_iscsi.c index e82ab8124958..ffadbee0b4d9 100644 --- a/drivers/scsi/be2iscsi/be_iscsi.c +++ b/drivers/scsi/be2iscsi/be_iscsi.c @@ -58,10 +58,15 @@ struct iscsi_cls_session *beiscsi_session_create(struct iscsi_endpoint *ep, } beiscsi_ep = ep->dd_data; phba = beiscsi_ep->phba; - shost = phba->shost; - beiscsi_log(phba, KERN_INFO, BEISCSI_LOG_CONFIG, - "BS_%d : In beiscsi_session_create\n"); + if (phba->state & BE_ADAPTER_PCI_ERR) { + beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_CONFIG, + "BS_%d : PCI_ERROR Recovery\n"); + return NULL; + } else { + beiscsi_log(phba, KERN_INFO, BEISCSI_LOG_CONFIG, + "BS_%d : In beiscsi_session_create\n"); + } if (cmds_max > beiscsi_ep->phba->params.wrbs_per_cxn) { beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_CONFIG, @@ -74,6 +79,7 @@ struct iscsi_cls_session *beiscsi_session_create(struct iscsi_endpoint *ep, cmds_max = beiscsi_ep->phba->params.wrbs_per_cxn; } + shost = phba->shost; cls_session = iscsi_session_setup(&beiscsi_iscsi_transport, shost, cmds_max, sizeof(*beiscsi_sess), @@ -477,6 +483,12 @@ int be2iscsi_iface_set_param(struct Scsi_Host *shost, uint32_t rm_len = dt_len; int ret = 0 ; + if (phba->state & BE_ADAPTER_PCI_ERR) { + beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_CONFIG, + "BS_%d : In PCI_ERROR Recovery\n"); + return -EBUSY; + } + nla_for_each_attr(attrib, data, dt_len, rm_len) { iface_param = nla_data(attrib); @@ -588,6 +600,12 @@ int be2iscsi_iface_get_param(struct iscsi_iface *iface, struct be_cmd_get_def_gateway_resp gateway; int len = -ENOSYS; + if (phba->state & BE_ADAPTER_PCI_ERR) { + beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_CONFIG, + "BS_%d : In PCI_ERROR Recovery\n"); + return -EBUSY; + } + switch (param) { case ISCSI_NET_PARAM_IPV4_ADDR: case ISCSI_NET_PARAM_IPV4_SUBNET: @@ -737,7 +755,7 @@ static void beiscsi_get_port_state(struct Scsi_Host *shost) struct beiscsi_hba *phba = iscsi_host_priv(shost); struct iscsi_cls_host *ihost = shost->shost_data; - ihost->port_state = (phba->state == BE_ADAPTER_UP) ? + ihost->port_state = (phba->state == BE_ADAPTER_LINK_UP) ? ISCSI_PORT_STATE_UP : ISCSI_PORT_STATE_DOWN; } @@ -805,9 +823,16 @@ int beiscsi_get_host_param(struct Scsi_Host *shost, struct beiscsi_hba *phba = iscsi_host_priv(shost); int status = 0; - beiscsi_log(phba, KERN_INFO, BEISCSI_LOG_CONFIG, - "BS_%d : In beiscsi_get_host_param," - " param= %d\n", param); + + if (phba->state & BE_ADAPTER_PCI_ERR) { + beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_CONFIG, + "BS_%d : In PCI_ERROR Recovery\n"); + return -EBUSY; + } else { + beiscsi_log(phba, KERN_INFO, BEISCSI_LOG_CONFIG, + "BS_%d : In beiscsi_get_host_param," + " param = %d\n", param); + } switch (param) { case ISCSI_HOST_PARAM_HWADDRESS: @@ -950,10 +975,19 @@ int beiscsi_conn_start(struct iscsi_cls_conn *cls_conn) struct beiscsi_conn *beiscsi_conn = conn->dd_data; struct beiscsi_endpoint *beiscsi_ep; struct beiscsi_offload_params params; + struct beiscsi_hba *phba; - beiscsi_log(beiscsi_conn->phba, KERN_INFO, - BEISCSI_LOG_CONFIG, - "BS_%d : In beiscsi_conn_start\n"); + phba = ((struct beiscsi_conn *)conn->dd_data)->phba; + + if (phba->state & BE_ADAPTER_PCI_ERR) { + beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_CONFIG, + "BS_%d : In PCI_ERROR Recovery\n"); + return -EBUSY; + } else { + beiscsi_log(beiscsi_conn->phba, KERN_INFO, + BEISCSI_LOG_CONFIG, + "BS_%d : In beiscsi_conn_start\n"); + } memset(¶ms, 0, sizeof(struct beiscsi_offload_params)); beiscsi_ep = beiscsi_conn->ep; @@ -1178,7 +1212,12 @@ beiscsi_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr, return ERR_PTR(ret); } - if (phba->state != BE_ADAPTER_UP) { + if (phba->state & BE_ADAPTER_PCI_ERR) { + ret = -EBUSY; + beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_CONFIG, + "BS_%d : In PCI_ERROR Recovery\n"); + return ERR_PTR(ret); + } else if (phba->state & BE_ADAPTER_LINK_DOWN) { ret = -EBUSY; beiscsi_log(phba, KERN_WARNING, BEISCSI_LOG_CONFIG, "BS_%d : The Adapter Port state is Down!!!\n"); @@ -1303,6 +1342,12 @@ void beiscsi_ep_disconnect(struct iscsi_endpoint *ep) tcp_upload_flag = CONNECTION_UPLOAD_ABORT; } + if (phba->state & BE_ADAPTER_PCI_ERR) { + beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_CONFIG, + "BS_%d : PCI_ERROR Recovery\n"); + goto free_ep; + } + tag = mgmt_invalidate_connection(phba, beiscsi_ep, beiscsi_ep->ep_cid, mgmt_invalidate_flag, @@ -1315,6 +1360,7 @@ void beiscsi_ep_disconnect(struct iscsi_endpoint *ep) beiscsi_mccq_compl(phba, tag, NULL, NULL); beiscsi_close_conn(beiscsi_ep, tcp_upload_flag); +free_ep: beiscsi_free_ep(beiscsi_ep); beiscsi_unbind_conn_to_cid(phba, beiscsi_ep->ep_cid); iscsi_destroy_endpoint(beiscsi_ep->openiscsi_ep); diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index 8f300534fc32..d84ecc5317ff 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -5140,10 +5140,12 @@ void beiscsi_hba_attrs_init(struct beiscsi_hba *phba) /* * beiscsi_quiesce()- Cleanup Driver resources * @phba: Instance Priv structure + * @unload_state:i Clean or EEH unload state * * Free the OS and HW resources held by the driver **/ -static void beiscsi_quiesce(struct beiscsi_hba *phba) +static void beiscsi_quiesce(struct beiscsi_hba *phba, + uint32_t unload_state) { struct hwi_controller *phwi_ctrlr; struct hwi_context_memory *phwi_context; @@ -5156,28 +5158,37 @@ static void beiscsi_quiesce(struct beiscsi_hba *phba) if (phba->msix_enabled) { for (i = 0; i <= phba->num_cpus; i++) { msix_vec = phba->msix_entries[i].vector; + synchronize_irq(msix_vec); free_irq(msix_vec, &phwi_context->be_eq[i]); kfree(phba->msi_name[i]); } } else - if (phba->pcidev->irq) + if (phba->pcidev->irq) { + synchronize_irq(phba->pcidev->irq); free_irq(phba->pcidev->irq, phba); + } pci_disable_msix(phba->pcidev); - destroy_workqueue(phba->wq); + if (blk_iopoll_enabled) for (i = 0; i < phba->num_cpus; i++) { pbe_eq = &phwi_context->be_eq[i]; blk_iopoll_disable(&pbe_eq->iopoll); } - beiscsi_clean_port(phba); - beiscsi_free_mem(phba); + if (unload_state == BEISCSI_CLEAN_UNLOAD) { + destroy_workqueue(phba->wq); + beiscsi_clean_port(phba); + beiscsi_free_mem(phba); - beiscsi_unmap_pci_function(phba); - pci_free_consistent(phba->pcidev, - phba->ctrl.mbox_mem_alloced.size, - phba->ctrl.mbox_mem_alloced.va, - phba->ctrl.mbox_mem_alloced.dma); + beiscsi_unmap_pci_function(phba); + pci_free_consistent(phba->pcidev, + phba->ctrl.mbox_mem_alloced.size, + phba->ctrl.mbox_mem_alloced.va, + phba->ctrl.mbox_mem_alloced.dma); + } else { + hwi_purge_eq(phba); + hwi_cleanup(phba); + } cancel_delayed_work_sync(&phba->beiscsi_hw_check_task); } @@ -5194,11 +5205,13 @@ static void beiscsi_remove(struct pci_dev *pcidev) } beiscsi_destroy_def_ifaces(phba); - beiscsi_quiesce(phba); + beiscsi_quiesce(phba, BEISCSI_CLEAN_UNLOAD); iscsi_boot_destroy_kset(phba->boot_kset); iscsi_host_remove(phba->shost); pci_dev_put(phba->pcidev); iscsi_host_free(phba->shost); + pci_disable_pcie_error_reporting(pcidev); + pci_set_drvdata(pcidev, NULL); pci_disable_device(pcidev); } @@ -5213,7 +5226,7 @@ static void beiscsi_shutdown(struct pci_dev *pcidev) return; } - beiscsi_quiesce(phba); + beiscsi_quiesce(phba, BEISCSI_CLEAN_UNLOAD); pci_disable_device(pcidev); } @@ -5251,6 +5264,167 @@ beiscsi_hw_health_check(struct work_struct *work) msecs_to_jiffies(1000)); } + +static pci_ers_result_t beiscsi_eeh_err_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct beiscsi_hba *phba = NULL; + + phba = (struct beiscsi_hba *)pci_get_drvdata(pdev); + phba->state |= BE_ADAPTER_PCI_ERR; + + beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_INIT, + "BM_%d : EEH error detected\n"); + + beiscsi_quiesce(phba, BEISCSI_EEH_UNLOAD); + + if (state == pci_channel_io_perm_failure) { + beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_INIT, + "BM_%d : EEH : State PERM Failure"); + return PCI_ERS_RESULT_DISCONNECT; + } + + pci_disable_device(pdev); + + /* The error could cause the FW to trigger a flash debug dump. + * Resetting the card while flash dump is in progress + * can cause it not to recover; wait for it to finish. + * Wait only for first function as it is needed only once per + * adapter. + **/ + if (pdev->devfn == 0) + ssleep(30); + + return PCI_ERS_RESULT_NEED_RESET; +} + +static pci_ers_result_t beiscsi_eeh_reset(struct pci_dev *pdev) +{ + struct beiscsi_hba *phba = NULL; + int status = 0; + + phba = (struct beiscsi_hba *)pci_get_drvdata(pdev); + + beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_INIT, + "BM_%d : EEH Reset\n"); + + status = pci_enable_device(pdev); + if (status) + return PCI_ERS_RESULT_DISCONNECT; + + pci_set_master(pdev); + pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); + + /* Wait for the CHIP Reset to complete */ + status = be_chk_reset_complete(phba); + if (!status) { + beiscsi_log(phba, KERN_WARNING, BEISCSI_LOG_INIT, + "BM_%d : EEH Reset Completed\n"); + } else { + beiscsi_log(phba, KERN_WARNING, BEISCSI_LOG_INIT, + "BM_%d : EEH Reset Completion Failure\n"); + return PCI_ERS_RESULT_DISCONNECT; + } + + pci_cleanup_aer_uncorrect_error_status(pdev); + return PCI_ERS_RESULT_RECOVERED; +} + +static void beiscsi_eeh_resume(struct pci_dev *pdev) +{ + int ret = 0, i; + struct be_eq_obj *pbe_eq; + struct beiscsi_hba *phba = NULL; + struct hwi_controller *phwi_ctrlr; + struct hwi_context_memory *phwi_context; + + phba = (struct beiscsi_hba *)pci_get_drvdata(pdev); + pci_save_state(pdev); + + if (enable_msix) + find_num_cpus(phba); + else + phba->num_cpus = 1; + + if (enable_msix) { + beiscsi_msix_enable(phba); + if (!phba->msix_enabled) + phba->num_cpus = 1; + } + + ret = beiscsi_cmd_reset_function(phba); + if (ret) { + beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_INIT, + "BM_%d : Reset Failed\n"); + goto ret_err; + } + + ret = be_chk_reset_complete(phba); + if (ret) { + beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_INIT, + "BM_%d : Failed to get out of reset.\n"); + goto ret_err; + } + + beiscsi_get_params(phba); + phba->shost->max_id = phba->params.cxns_per_ctrl; + phba->shost->can_queue = phba->params.ios_per_ctrl; + ret = hwi_init_controller(phba); + + for (i = 0; i < MAX_MCC_CMD; i++) { + init_waitqueue_head(&phba->ctrl.mcc_wait[i + 1]); + phba->ctrl.mcc_tag[i] = i + 1; + phba->ctrl.mcc_numtag[i + 1] = 0; + phba->ctrl.mcc_tag_available++; + } + + phwi_ctrlr = phba->phwi_ctrlr; + phwi_context = phwi_ctrlr->phwi_ctxt; + + if (blk_iopoll_enabled) { + for (i = 0; i < phba->num_cpus; i++) { + pbe_eq = &phwi_context->be_eq[i]; + blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget, + be_iopoll); + blk_iopoll_enable(&pbe_eq->iopoll); + } + + i = (phba->msix_enabled) ? i : 0; + /* Work item for MCC handling */ + pbe_eq = &phwi_context->be_eq[i]; + INIT_WORK(&pbe_eq->work_cqs, beiscsi_process_all_cqs); + } else { + if (phba->msix_enabled) { + for (i = 0; i <= phba->num_cpus; i++) { + pbe_eq = &phwi_context->be_eq[i]; + INIT_WORK(&pbe_eq->work_cqs, + beiscsi_process_all_cqs); + } + } else { + pbe_eq = &phwi_context->be_eq[0]; + INIT_WORK(&pbe_eq->work_cqs, + beiscsi_process_all_cqs); + } + } + + ret = beiscsi_init_irqs(phba); + if (ret < 0) { + beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_INIT, + "BM_%d : beiscsi_eeh_resume - " + "Failed to beiscsi_init_irqs\n"); + goto ret_err; + } + + hwi_enable_intr(phba); + phba->state &= ~BE_ADAPTER_PCI_ERR; + + return; +ret_err: + beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_INIT, + "BM_%d : AER EEH Resume Failed\n"); +} + static int beiscsi_dev_probe(struct pci_dev *pcidev, const struct pci_device_id *id) { @@ -5258,7 +5432,7 @@ static int beiscsi_dev_probe(struct pci_dev *pcidev, struct hwi_controller *phwi_ctrlr; struct hwi_context_memory *phwi_context; struct be_eq_obj *pbe_eq; - int ret, i; + int ret = 0, i; ret = beiscsi_enable_pci(pcidev); if (ret < 0) { @@ -5274,6 +5448,15 @@ static int beiscsi_dev_probe(struct pci_dev *pcidev, goto disable_pci; } + /* Enable EEH reporting */ + ret = pci_enable_pcie_error_reporting(pcidev); + if (ret) + beiscsi_log(phba, KERN_WARNING, BEISCSI_LOG_INIT, + "BM_%d : PCIe Error Reporting " + "Enabling Failed\n"); + + pci_save_state(pcidev); + /* Initialize Driver configuration Paramters */ beiscsi_hba_attrs_init(phba); @@ -5359,7 +5542,7 @@ static int beiscsi_dev_probe(struct pci_dev *pcidev, goto free_port; } - for (i = 0; i < MAX_MCC_CMD ; i++) { + for (i = 0; i < MAX_MCC_CMD; i++) { init_waitqueue_head(&phba->ctrl.mcc_wait[i + 1]); phba->ctrl.mcc_tag[i] = i + 1; phba->ctrl.mcc_numtag[i + 1] = 0; @@ -5463,6 +5646,12 @@ disable_pci: return ret; } +static struct pci_error_handlers beiscsi_eeh_handlers = { + .error_detected = beiscsi_eeh_err_detected, + .slot_reset = beiscsi_eeh_reset, + .resume = beiscsi_eeh_resume, +}; + struct iscsi_transport beiscsi_iscsi_transport = { .owner = THIS_MODULE, .name = DRV_NAME, @@ -5501,7 +5690,8 @@ static struct pci_driver beiscsi_pci_driver = { .probe = beiscsi_dev_probe, .remove = beiscsi_remove, .shutdown = beiscsi_shutdown, - .id_table = beiscsi_pci_id_table + .id_table = beiscsi_pci_id_table, + .err_handler = &beiscsi_eeh_handlers }; diff --git a/drivers/scsi/be2iscsi/be_main.h b/drivers/scsi/be2iscsi/be_main.h index a8ae6b82c3e1..aace072e033e 100644 --- a/drivers/scsi/be2iscsi/be_main.h +++ b/drivers/scsi/be2iscsi/be_main.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -96,8 +97,12 @@ #define INVALID_SESS_HANDLE 0xFFFFFFFF -#define BE_ADAPTER_UP 0x00000000 -#define BE_ADAPTER_LINK_DOWN 0x00000001 +#define BE_ADAPTER_LINK_UP 0x001 +#define BE_ADAPTER_LINK_DOWN 0x002 +#define BE_ADAPTER_PCI_ERR 0x004 + +#define BEISCSI_CLEAN_UNLOAD 0x01 +#define BEISCSI_EEH_UNLOAD 0x02 /** * hardware needs the async PDU buffers to be posted in multiples of 8 * So have atleast 8 of them by default