diff options
author | Manish Rangankar <mrangankar@marvell.com> | 2020-09-08 02:56:56 -0700 |
---|---|---|
committer | Martin K. Petersen <martin.petersen@oracle.com> | 2020-09-08 22:40:25 -0400 |
commit | f4ba4e55db6db7e85e847f1a5891efa792580809 (patch) | |
tree | 40faf3bb8256457f00909e5fcf514aababb1b248 /drivers/scsi/qedi | |
parent | 4118879be3755b38171063dfd4a57611d4b20a83 (diff) |
scsi: qedi: Add firmware error recovery invocation support
Add support to initiate MFW process recovery for all the devices if storage
function receives the event first.
Also added fix for kernel test robot warning,
>> drivers/scsi/qedi/qedi_main.c:1119:6: warning: no previous prototype
>> for 'qedi_schedule_hw_err_handler' [-Wmissing-prototypes]
Link: https://lore.kernel.org/r/20200908095657.26821-8-mrangankar@marvell.com
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Manish Rangankar <mrangankar@marvell.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Diffstat (limited to 'drivers/scsi/qedi')
-rw-r--r-- | drivers/scsi/qedi/qedi.h | 4 | ||||
-rw-r--r-- | drivers/scsi/qedi/qedi_fw.c | 7 | ||||
-rw-r--r-- | drivers/scsi/qedi/qedi_iscsi.c | 3 | ||||
-rw-r--r-- | drivers/scsi/qedi/qedi_main.c | 63 |
4 files changed, 73 insertions, 4 deletions
diff --git a/drivers/scsi/qedi/qedi.h b/drivers/scsi/qedi/qedi.h index 9c19ec9dc682..7e59d50f2fab 100644 --- a/drivers/scsi/qedi/qedi.h +++ b/drivers/scsi/qedi/qedi.h @@ -274,6 +274,10 @@ struct qedi_ctx { spinlock_t ll2_lock; /* Light L2 lock */ spinlock_t hba_lock; /* per port lock */ struct task_struct *ll2_recv_thread; + unsigned long qedi_err_flags; +#define QEDI_ERR_ATTN_CLR_EN 0 +#define QEDI_ERR_IS_RECOVERABLE 2 +#define QEDI_ERR_OVERRIDE_EN 31 unsigned long flags; #define UIO_DEV_OPENED 1 #define QEDI_IOTHREAD_WAKE 2 diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c index f158fde0a43c..440ddd2309f1 100644 --- a/drivers/scsi/qedi/qedi_fw.c +++ b/drivers/scsi/qedi/qedi_fw.c @@ -1267,7 +1267,8 @@ int qedi_cleanup_all_io(struct qedi_ctx *qedi, struct qedi_conn *qedi_conn, rval = wait_event_interruptible_timeout(qedi_conn->wait_queue, ((qedi_conn->cmd_cleanup_req == qedi_conn->cmd_cleanup_cmpl) || - qedi_conn->ep), + test_bit(QEDI_IN_RECOVERY, + &qedi->flags)), 5 * HZ); if (rval) { QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, @@ -1292,7 +1293,9 @@ int qedi_cleanup_all_io(struct qedi_ctx *qedi, struct qedi_conn *qedi_conn, /* Enable IOs for all other sessions except current.*/ if (!wait_event_interruptible_timeout(qedi_conn->wait_queue, (qedi_conn->cmd_cleanup_req == - qedi_conn->cmd_cleanup_cmpl), + qedi_conn->cmd_cleanup_cmpl) || + test_bit(QEDI_IN_RECOVERY, + &qedi->flags), 5 * HZ)) { iscsi_host_for_each_session(qedi->shost, qedi_mark_device_available); diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c index ae86a40ca040..08c05403cd72 100644 --- a/drivers/scsi/qedi/qedi_iscsi.c +++ b/drivers/scsi/qedi/qedi_iscsi.c @@ -1072,7 +1072,8 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep) qedi_ep->state = EP_STATE_DISCONN_START; - if (test_bit(QEDI_IN_SHUTDOWN, &qedi->flags)) + if (test_bit(QEDI_IN_SHUTDOWN, &qedi->flags) || + test_bit(QEDI_IN_RECOVERY, &qedi->flags)) goto ep_release_conn; ret = qedi_ops->destroy_conn(qedi->cdev, qedi_ep->handle, abrt_conn); diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 7186e814866e..8fde35f843f7 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -50,6 +50,10 @@ module_param(qedi_ll2_buf_size, uint, 0644); MODULE_PARM_DESC(qedi_ll2_buf_size, "parameter to set ping packet size, default - 0x400, Jumbo packets - 0x2400."); +static uint qedi_flags_override; +module_param(qedi_flags_override, uint, 0644); +MODULE_PARM_DESC(qedi_flags_override, "Disable/Enable MFW error flags bits action."); + const struct qed_iscsi_ops *qedi_ops; static struct scsi_transport_template *qedi_scsi_transport; static struct pci_driver qedi_pci_driver; @@ -63,6 +67,8 @@ static void qedi_reset_uio_rings(struct qedi_uio_dev *udev); static void qedi_ll2_free_skbs(struct qedi_ctx *qedi); static struct nvm_iscsi_block *qedi_get_nvram_block(struct qedi_ctx *qedi); static void qedi_recovery_handler(struct work_struct *work); +static void qedi_schedule_hw_err_handler(void *dev, + enum qed_hw_err_type err_type); static int qedi_iscsi_event_cb(void *context, u8 fw_event_code, void *fw_handle) { @@ -1112,6 +1118,39 @@ exit_get_data: return; } +void qedi_schedule_hw_err_handler(void *dev, + enum qed_hw_err_type err_type) +{ + struct qedi_ctx *qedi = (struct qedi_ctx *)dev; + unsigned long override_flags = qedi_flags_override; + + if (override_flags && test_bit(QEDI_ERR_OVERRIDE_EN, &override_flags)) + qedi->qedi_err_flags = qedi_flags_override; + + QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO, + "HW error handler scheduled, err=%d err_flags=0x%x\n", + err_type, qedi->qedi_err_flags); + + switch (err_type) { + case QED_HW_ERR_MFW_RESP_FAIL: + case QED_HW_ERR_HW_ATTN: + case QED_HW_ERR_DMAE_FAIL: + case QED_HW_ERR_RAMROD_FAIL: + case QED_HW_ERR_FW_ASSERT: + /* Prevent HW attentions from being reasserted */ + if (test_bit(QEDI_ERR_ATTN_CLR_EN, &qedi->qedi_err_flags)) + qedi_ops->common->attn_clr_enable(qedi->cdev, true); + + if (err_type == QED_HW_ERR_RAMROD_FAIL && + test_bit(QEDI_ERR_IS_RECOVERABLE, &qedi->qedi_err_flags)) + qedi_ops->common->recovery_process(qedi->cdev); + + break; + default: + break; + } +} + static void qedi_schedule_recovery_handler(void *dev) { struct qedi_ctx *qedi = dev; @@ -1154,6 +1193,7 @@ static struct qed_iscsi_cb_ops qedi_cb_ops = { { .link_update = qedi_link_update, .schedule_recovery_handler = qedi_schedule_recovery_handler, + .schedule_hw_err_handler = qedi_schedule_hw_err_handler, .get_protocol_tlv_data = qedi_get_protocol_tlv_data, .get_generic_tlv_data = qedi_get_generic_tlv_data, } @@ -2354,6 +2394,7 @@ static void __qedi_remove(struct pci_dev *pdev, int mode) { struct qedi_ctx *qedi = pci_get_drvdata(pdev); int rval; + u16 retry = 10; if (mode == QEDI_MODE_SHUTDOWN) iscsi_host_for_each_session(qedi->shost, @@ -2382,7 +2423,13 @@ static void __qedi_remove(struct pci_dev *pdev, int mode) qedi_sync_free_irqs(qedi); if (!test_bit(QEDI_IN_OFFLINE, &qedi->flags)) { - qedi_ops->stop(qedi->cdev); + while (retry--) { + rval = qedi_ops->stop(qedi->cdev); + if (rval < 0) + msleep(1000); + else + break; + } qedi_ops->ll2->stop(qedi->cdev); } @@ -2441,6 +2488,7 @@ static int __qedi_probe(struct pci_dev *pdev, int mode) struct qed_probe_params qed_params; void *task_start, *task_end; int rc; + u16 retry = 10; if (mode != QEDI_MODE_RECOVERY) { qedi = qedi_host_alloc(pdev); @@ -2452,6 +2500,10 @@ static int __qedi_probe(struct pci_dev *pdev, int mode) qedi = pci_get_drvdata(pdev); } +retry_probe: + if (mode == QEDI_MODE_RECOVERY) + msleep(2000); + memset(&qed_params, 0, sizeof(qed_params)); qed_params.protocol = QED_PROTOCOL_ISCSI; qed_params.dp_module = qedi_qed_debug; @@ -2459,11 +2511,20 @@ static int __qedi_probe(struct pci_dev *pdev, int mode) qed_params.is_vf = is_vf; qedi->cdev = qedi_ops->common->probe(pdev, &qed_params); if (!qedi->cdev) { + if (mode == QEDI_MODE_RECOVERY && retry) { + QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO, + "Retry %d initialize hardware\n", retry); + retry--; + goto retry_probe; + } + rc = -ENODEV; QEDI_ERR(&qedi->dbg_ctx, "Cannot initialize hardware\n"); goto free_host; } + set_bit(QEDI_ERR_ATTN_CLR_EN, &qedi->qedi_err_flags); + set_bit(QEDI_ERR_IS_RECOVERABLE, &qedi->qedi_err_flags); atomic_set(&qedi->link_state, QEDI_LINK_DOWN); rc = qedi_ops->fill_dev_info(qedi->cdev, &qedi->dev_info); |