From 05a05872c8d4b4357c9d913e6d73ae64882bddf5 Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Tue, 7 Jun 2016 20:13:08 -0300 Subject: lpfc: fix oops in lpfc_sli4_scmd_to_wqidx_distr() from lpfc_send_taskmgmt() The lpfc_sli4_scmd_to_wqidx_distr() function expects the scsi_cmnd 'lpfc_cmd->pCmd' not to be null, and point to the midlayer command. That's not true in the .eh_(device|target|bus)_reset_handler path, because lpfc_send_taskmgmt() sends commands not from the midlayer, so does not set 'lpfc_cmd->pCmd'. That is true in the .queuecommand path because lpfc_queuecommand() stores the scsi_cmnd from midlayer in lpfc_cmd->pCmd; and lpfc_cmd is stored by lpfc_scsi_prep_cmnd() in piocbq->context1 -- which is passed to lpfc_sli4_scmd_to_wqidx_distr() as lpfc_cmd parameter. This problem can be hit on SCSI EH, and immediately with sg_reset. These 2 test-cases demonstrate the problem/fix with next-20160601. Test-case 1) sg_reset # strace sg_reset --device /dev/sdm <...> open("/dev/sdm", O_RDWR|O_NONBLOCK) = 3 ioctl(3, SG_SCSI_RESET, 0x3fffde6d0994 +++ killed by SIGSEGV +++ Segmentation fault # dmesg Unable to handle kernel paging request for data at address 0x00000000 Faulting instruction address: 0xd00000001c88442c Oops: Kernel access of bad area, sig: 11 [#1] <...> CPU: 104 PID: 16333 Comm: sg_reset Tainted: G W 4.7.0-rc1-next-20160601-00004-g95b89dc #6 <...> NIP [d00000001c88442c] lpfc_sli4_scmd_to_wqidx_distr+0xc/0xd0 [lpfc] LR [d00000001c826fe8] lpfc_sli_calc_ring.part.27+0x98/0xd0 [lpfc] Call Trace: [c000003c9ec876f0] [c000003c9ec87770] 0xc000003c9ec87770 (unreliable) [c000003c9ec87720] [d00000001c82e004] lpfc_sli_issue_iocb+0xd4/0x260 [lpfc] [c000003c9ec87780] [d00000001c831a3c] lpfc_sli_issue_iocb_wait+0x15c/0x5b0 [lpfc] [c000003c9ec87880] [d00000001c87f27c] lpfc_send_taskmgmt+0x24c/0x650 [lpfc] [c000003c9ec87950] [d00000001c87fd7c] lpfc_device_reset_handler+0x10c/0x200 [lpfc] [c000003c9ec87a10] [c000000000610694] scsi_try_bus_device_reset+0x44/0xc0 [c000003c9ec87a40] [c0000000006113e8] scsi_ioctl_reset+0x198/0x2c0 [c000003c9ec87bf0] [c00000000060fe5c] scsi_ioctl+0x13c/0x4b0 [c000003c9ec87c80] [c0000000006629b0] sd_ioctl+0xf0/0x120 [c000003c9ec87cd0] [c00000000046e4f8] blkdev_ioctl+0x248/0xb70 [c000003c9ec87d30] [c0000000002a1f60] block_ioctl+0x70/0x90 [c000003c9ec87d50] [c00000000026d334] do_vfs_ioctl+0xc4/0x890 [c000003c9ec87de0] [c00000000026db60] SyS_ioctl+0x60/0xc0 [c000003c9ec87e30] [c000000000009120] system_call+0x38/0x108 Instruction dump: <...> With fix: # strace sg_reset --device /dev/sdm <...> open("/dev/sdm", O_RDWR|O_NONBLOCK) = 3 ioctl(3, SG_SCSI_RESET, 0x3fffe103c554) = 0 close(3) = 0 exit_group(0) = ? +++ exited with 0 +++ # dmesg [ 424.658649] lpfc 0006:01:00.4: 4:(0):0713 SCSI layer issued Device Reset (1, 0) return x2002 Test-case 2) SCSI EH Using this debug patch to wire an SCSI EH trigger, for lpfc_scsi_cmd_iocb_cmpl(): - cmd->scsi_done(cmd); + if ((phba->pport ? phba->pport->cfg_log_verbose : phba->cfg_log_verbose) == 0x32100000) + printk(KERN_ALERT "lpfc: skip scsi_done()\n"); + else + cmd->scsi_done(cmd); # echo 0x32100000 > /sys/class/scsi_host/host11/lpfc_log_verbose # dd if=/dev/sdm of=/dev/null iflag=direct & <...> After a while: # dmesg lpfc 0006:01:00.4: 4:(0):3053 lpfc_log_verbose changed from 0 (x0) to 839909376 (x32100000) lpfc: skip scsi_done() <...> Unable to handle kernel paging request for data at address 0x00000000 Faulting instruction address: 0xd0000000199e448c Oops: Kernel access of bad area, sig: 11 [#1] <...> CPU: 96 PID: 28556 Comm: scsi_eh_11 Tainted: G W 4.7.0-rc1-next-20160601-00004-g95b89dc #6 <...> NIP [d0000000199e448c] lpfc_sli4_scmd_to_wqidx_distr+0xc/0xd0 [lpfc] LR [d000000019986fe8] lpfc_sli_calc_ring.part.27+0x98/0xd0 [lpfc] Call Trace: [c000000ff0d0b890] [c000000ff0d0b900] 0xc000000ff0d0b900 (unreliable) [c000000ff0d0b8c0] [d00000001998e004] lpfc_sli_issue_iocb+0xd4/0x260 [lpfc] [c000000ff0d0b920] [d000000019991a3c] lpfc_sli_issue_iocb_wait+0x15c/0x5b0 [lpfc] [c000000ff0d0ba20] [d0000000199df27c] lpfc_send_taskmgmt+0x24c/0x650 [lpfc] [c000000ff0d0baf0] [d0000000199dfd7c] lpfc_device_reset_handler+0x10c/0x200 [lpfc] [c000000ff0d0bbb0] [c000000000610694] scsi_try_bus_device_reset+0x44/0xc0 [c000000ff0d0bbe0] [c0000000006126cc] scsi_eh_ready_devs+0x49c/0x9c0 [c000000ff0d0bcb0] [c000000000614160] scsi_error_handler+0x580/0x680 [c000000ff0d0bd80] [c0000000000ae848] kthread+0x108/0x130 [c000000ff0d0be30] [c0000000000094a8] ret_from_kernel_thread+0x5c/0xb4 Instruction dump: <...> With fix: # dmesg lpfc 0006:01:00.4: 4:(0):3053 lpfc_log_verbose changed from 0 (x0) to 839909376 (x32100000) lpfc: skip scsi_done() <...> lpfc 0006:01:00.4: 4:(0):0713 SCSI layer issued Device Reset (0, 0) return x2002 <...> lpfc 0006:01:00.4: 4:(0):0723 SCSI layer issued Target Reset (1, 0) return x2002 <...> lpfc 0006:01:00.4: 4:(0):0714 SCSI layer issued Bus Reset Data: x2002 <...> lpfc 0006:01:00.4: 4:(0):3172 SCSI layer issued Host Reset Data: <...> Fixes: 8b0dff14164d ("lpfc: Add support for using block multi-queue") Cc: # v4.2+ Signed-off-by: Mauricio Faria de Oliveira Reviewed-by: Johannes Thumshirn Acked-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_scsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/scsi/lpfc') diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index a5655d571906..d197aa176dee 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -3877,7 +3877,7 @@ int lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba, uint32_t tag; uint16_t hwq; - if (shost_use_blk_mq(cmnd->device->host)) { + if (cmnd && shost_use_blk_mq(cmnd->device->host)) { tag = blk_mq_unique_tag(cmnd->request); hwq = blk_mq_unique_tag_to_hwq(tag); -- cgit v1.2.3 From 22466da5b4b7a82d3e5a9c21c752cae91a21dc91 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Fri, 29 Jul 2016 15:30:56 +0200 Subject: lpfc: Fix possible NULL pointer dereference Check for the existence of piocb->vport before accessing it. Signed-off-by: Johannes Thumshirn Acked-by: James Smart Reviewed-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'drivers/scsi/lpfc') diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 351d08ace24a..7080ce2920fd 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -1323,21 +1323,18 @@ lpfc_sli_ringtxcmpl_put(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, { lockdep_assert_held(&phba->hbalock); + BUG_ON(!piocb || !piocb->vport); + list_add_tail(&piocb->list, &pring->txcmplq); piocb->iocb_flag |= LPFC_IO_ON_TXCMPLQ; if ((unlikely(pring->ringno == LPFC_ELS_RING)) && (piocb->iocb.ulpCommand != CMD_ABORT_XRI_CN) && (piocb->iocb.ulpCommand != CMD_CLOSE_XRI_CN) && - (!(piocb->vport->load_flag & FC_UNLOADING))) { - if (!piocb->vport) - BUG(); - else - mod_timer(&piocb->vport->els_tmofunc, - jiffies + - msecs_to_jiffies(1000 * (phba->fc_ratov << 1))); - } - + (!(piocb->vport->load_flag & FC_UNLOADING))) + mod_timer(&piocb->vport->els_tmofunc, + jiffies + + msecs_to_jiffies(1000 * (phba->fc_ratov << 1))); return 0; } -- cgit v1.2.3