From 1fdd14279eab2e9f79745631379f0c53cb8f9a5a Mon Sep 17 00:00:00 2001 From: "tang.junhui" Date: Fri, 28 Oct 2016 15:54:07 +0800 Subject: [PATCH 1/9] scsi: scsi_dh_alua: fix missing kref_put() in alua_rtpg_work() Reference count of pg leaks in alua_rtpg_work() since kref_put() is not called to decrease the reference count of pg when the condition pg->rtpg_sdev==NULL satisfied (actually it is easy to satisfy), it would cause memory of pg leakage. Signed-off-by: tang.junhui Cc: Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/device_handler/scsi_dh_alua.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index 241829e59668..f375167f16ea 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -793,6 +793,7 @@ static void alua_rtpg_work(struct work_struct *work) WARN_ON(pg->flags & ALUA_PG_RUN_RTPG); WARN_ON(pg->flags & ALUA_PG_RUN_STPG); spin_unlock_irqrestore(&pg->lock, flags); + kref_put(&pg->kref, release_port_group); return; } if (pg->flags & ALUA_SYNC_STPG) From 6d3a56ed098566bc83d6c2afa74b4199c12ea074 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Fri, 28 Oct 2016 10:09:12 +0530 Subject: [PATCH 2/9] scsi: mpt3sas: Fix for block device of raid exists even after deleting raid disk While merging mpt3sas & mpt2sas code, we added the is_warpdrive check condition on the wrong line --------------------------------------------------------------------------- scsih_target_alloc(struct scsi_target *starget) sas_target_priv_data->handle = raid_device->handle; sas_target_priv_data->sas_address = raid_device->wwid; sas_target_priv_data->flags |= MPT_TARGET_FLAGS_VOLUME; - raid_device->starget = starget; + sas_target_priv_data->raid_device = raid_device; + if (ioc->is_warpdrive) + raid_device->starget = starget; } spin_unlock_irqrestore(&ioc->raid_device_lock, flags); return 0; ------------------------------------------------------------------------------ That check should be for the line sas_target_priv_data->raid_device = raid_device; Due to above hunk, we are not initializing raid_device's starget for raid volumes, and so during raid disk deletion driver is not calling scsi_remove_target() API as driver observes starget field of raid_device's structure as NULL. Signed-off-by: Sreekanth Reddy Cc: # v4.4+ Fixes: 7786ab6aff9 ("mpt3sas: Ported WarpDrive product SSS6200 support") Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 209a969a979d..8aa769a2d919 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -1273,9 +1273,9 @@ scsih_target_alloc(struct scsi_target *starget) sas_target_priv_data->handle = raid_device->handle; sas_target_priv_data->sas_address = raid_device->wwid; sas_target_priv_data->flags |= MPT_TARGET_FLAGS_VOLUME; - sas_target_priv_data->raid_device = raid_device; if (ioc->is_warpdrive) - raid_device->starget = starget; + sas_target_priv_data->raid_device = raid_device; + raid_device->starget = starget; } spin_unlock_irqrestore(&ioc->raid_device_lock, flags); return 0; From aac173e9618faadf8f92af6cc05e64f7acc64d79 Mon Sep 17 00:00:00 2001 From: David Jeffery Date: Fri, 28 Oct 2016 12:27:26 -0400 Subject: [PATCH 3/9] scsi: vmw_pvscsi: return SUCCESS for successful command aborts The vmw_pvscsi driver reports most successful aborts as FAILED to the scsi error handler. This is do to a misunderstanding of how completion_done() works and its interaction with a successful wait using wait_for_completion_timeout(). The vmw_pvscsi driver is expecting completion_done() to always return true if complete() has been called on the completion structure. But completion_done() returns true after complete() has been called only if no function like wait_for_completion_timeout() has seen the completion and cleared it as part of successfully waiting for the completion. Instead of using completion_done(), vmw_pvscsi should just use the return value from wait_for_completion_timeout() to know if the wait timed out or not. [mkp: bumped driver version per request] Signed-off-by: David Jeffery Reviewed-by: Laurence Oberman Reviewed-by: Ewan D. Milne Acked-by: Jim Gill Signed-off-by: Martin K. Petersen --- drivers/scsi/vmw_pvscsi.c | 5 +++-- drivers/scsi/vmw_pvscsi.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c index 4a0d3cdc607c..15ca09cd16f3 100644 --- a/drivers/scsi/vmw_pvscsi.c +++ b/drivers/scsi/vmw_pvscsi.c @@ -793,6 +793,7 @@ static int pvscsi_abort(struct scsi_cmnd *cmd) unsigned long flags; int result = SUCCESS; DECLARE_COMPLETION_ONSTACK(abort_cmp); + int done; scmd_printk(KERN_DEBUG, cmd, "task abort on host %u, %p\n", adapter->host->host_no, cmd); @@ -824,10 +825,10 @@ static int pvscsi_abort(struct scsi_cmnd *cmd) pvscsi_abort_cmd(adapter, ctx); spin_unlock_irqrestore(&adapter->hw_lock, flags); /* Wait for 2 secs for the completion. */ - wait_for_completion_timeout(&abort_cmp, msecs_to_jiffies(2000)); + done = wait_for_completion_timeout(&abort_cmp, msecs_to_jiffies(2000)); spin_lock_irqsave(&adapter->hw_lock, flags); - if (!completion_done(&abort_cmp)) { + if (!done) { /* * Failed to abort the command, unmark the fact that it * was requested to be aborted. diff --git a/drivers/scsi/vmw_pvscsi.h b/drivers/scsi/vmw_pvscsi.h index c097d2ccbde3..d41292ef85f2 100644 --- a/drivers/scsi/vmw_pvscsi.h +++ b/drivers/scsi/vmw_pvscsi.h @@ -26,7 +26,7 @@ #include -#define PVSCSI_DRIVER_VERSION_STRING "1.0.6.0-k" +#define PVSCSI_DRIVER_VERSION_STRING "1.0.7.0-k" #define PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT 128 From df3d422cbac685da882e4c239dfda07de33d431b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 1 Nov 2016 08:19:57 -0600 Subject: [PATCH 4/9] scsi: scsi_dh_alua: Fix a reference counting bug The code at the end of alua_rtpg_work() is as follows: scsi_device_put(sdev); kref_put(&pg->kref, release_port_group); In other words, alua_rtpg_queue() must hold an sdev reference and a pg reference before queueing rtpg work. If no rtpg work is queued no additional references should be held when alua_rtpg_queue() returns. If no rtpg work is queued, ensure that alua_rtpg_queue() only gives up the sdev reference if that reference was obtained by the same alua_rtpg_queue() call. Signed-off-by: Bart Van Assche Reported-by: Tang Junhui Cc: Hannes Reinecke Cc: Tang Junhui Cc: Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/device_handler/scsi_dh_alua.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index f375167f16ea..7bb20684e9fa 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -891,6 +891,7 @@ static void alua_rtpg_queue(struct alua_port_group *pg, /* Do not queue if the worker is already running */ if (!(pg->flags & ALUA_PG_RUNNING)) { kref_get(&pg->kref); + sdev = NULL; start_queue = 1; } } @@ -902,7 +903,8 @@ static void alua_rtpg_queue(struct alua_port_group *pg, if (start_queue && !queue_delayed_work(alua_wq, &pg->rtpg_work, msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) { - scsi_device_put(sdev); + if (sdev) + scsi_device_put(sdev); kref_put(&pg->kref, release_port_group); } } From a5dd506e1584e91f3e7500ab9a165aa1b49eabd4 Mon Sep 17 00:00:00 2001 From: Bill Kuzeja Date: Fri, 21 Oct 2016 16:45:27 -0400 Subject: [PATCH 5/9] scsi: qla2xxx: Fix scsi scan hang triggered if adapter fails during init A system can get hung task timeouts if a qlogic board fails during initialization (if the board breaks again or fails the init). The hang involves the scsi scan. In a nutshell, since commit beb9e315e6e0 ("qla2xxx: Prevent removal and board_disable race"): ...it is possible to have freed ha (base_vha->hw) early by a call to qla2x00_remove_one when pdev->enable_cnt equals zero: if (!atomic_read(&pdev->enable_cnt)) { scsi_host_put(base_vha->host); kfree(ha); pci_set_drvdata(pdev, NULL); return; Almost always, the scsi_host_put above frees the vha structure (attached to the end of the Scsi_Host we're putting) since it's the last put, and life is good. However, if we are entering this routine because the adapter has broken sometime during initialization AND a scsi scan is already in progress (and has done its own scsi_host_get), vha will not be freed. What's worse, the scsi scan will access the freed ha structure through qla2xxx_scan_finished: if (time > vha->hw->loop_reset_delay * HZ) return 1; The scsi scan keeps checking to see if a scan is complete by calling qla2xxx_scan_finished. There is a timeout value that limits the length of time a scan can take (hw->loop_reset_delay, usually set to 5 seconds), but this definition is in the data structure (hw) that can get freed early. This can yield unpredictable results, the worst of which is that the scsi scan can hang indefinitely. This happens when the freed structure gets reused and loop_reset_delay gets overwritten with garbage, which the scan obliviously uses as its timeout value. The fix for this is simple: at the top of qla2xxx_scan_finished, check for the UNLOADING bit in the vha structure (_vha is not freed at this point). If UNLOADING is set, we exit the scan for this adapter immediately. After this last reference to the ha structure, we'll exit the scan for this adapter, and continue on. This problem is hard to hit, but I have run into it doing negative testing many times now (with a test specifically designed to bring it out), so I can verify that this fix works. My testing has been against a RHEL7 driver variant, but the bug and patch are equally relevant to to the upstream driver. Fixes: beb9e315e6e0 ("qla2xxx: Prevent removal and board_disable race") Cc: # v3.18+ Signed-off-by: Bill Kuzeja Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index ace65db1d2a2..4d99c3b37687 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -2341,6 +2341,8 @@ qla2xxx_scan_finished(struct Scsi_Host *shost, unsigned long time) { scsi_qla_host_t *vha = shost_priv(shost); + if (test_bit(UNLOADING, &vha->dpc_flags)) + return 1; if (!vha->host) return 1; if (time > vha->hw->loop_reset_delay * HZ) From 69e2d1e6c0af0dd7f18cfd434b008844568641a9 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Sat, 5 Nov 2016 21:49:28 +0530 Subject: [PATCH 6/9] scsi: libcxgbi: fix incorrect DDP resource cleanup Before calling task_release_itt() task data is memset to zero because of which DDP context information is lost resulting in incorrect DDP resource cleanup, to fix this call task_release_itt() before memset. Signed-off-by: Varun Prakash Signed-off-by: Martin K. Petersen --- drivers/scsi/cxgbi/libcxgbi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c index d1421139e6ea..2ffe029ff2b6 100644 --- a/drivers/scsi/cxgbi/libcxgbi.c +++ b/drivers/scsi/cxgbi/libcxgbi.c @@ -2081,9 +2081,10 @@ void cxgbi_cleanup_task(struct iscsi_task *task) /* never reached the xmit task callout */ if (tdata->skb) __kfree_skb(tdata->skb); - memset(tdata, 0, sizeof(*tdata)); task_release_itt(task, task->hdr_itt); + memset(tdata, 0, sizeof(*tdata)); + iscsi_tcp_cleanup_task(task); } EXPORT_SYMBOL_GPL(cxgbi_cleanup_task); From 04dfaa53a0b6e66b328a5bc549e3af8f8b6eac02 Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Mon, 7 Nov 2016 17:53:30 -0200 Subject: [PATCH 7/9] scsi: qla2xxx: do not queue commands when unloading When the driver is unloading, in qla2x00_remove_one(), there is a single call/point in time to abort ongoing commands, qla2x00_abort_all_cmds(), which is still several steps away from the call to scsi_remove_host(). If more commands continue to arrive and be processed during that interval, when the driver is tearing down and releasing its structures, it might potentially hit an oops due to invalid memory access: Unable to handle kernel paging request for data at address 0x00000138 <...> NIP [d000000004700a40] qla2xxx_queuecommand+0x80/0x3f0 [qla2xxx] LR [d000000004700a10] qla2xxx_queuecommand+0x50/0x3f0 [qla2xxx] So, fail commands in qla2xxx_queuecommand() if the UNLOADING bit is set. Signed-off-by: Mauricio Faria de Oliveira Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 4d99c3b37687..e5db47443668 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -707,6 +707,11 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) srb_t *sp; int rval; + if (unlikely(test_bit(UNLOADING, &base_vha->dpc_flags))) { + cmd->result = DID_NO_CONNECT << 16; + goto qc24_fail_command; + } + if (ha->flags.eeh_busy) { if (ha->flags.pci_channel_io_perm_failure) { ql_dbg(ql_dbg_aer, vha, 0x9010, From 1535aa75a3d8320374f82d198e3900d5b0969d7e Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Mon, 7 Nov 2016 17:53:31 -0200 Subject: [PATCH 8/9] scsi: qla2xxx: fix invalid DMA access after command aborts in PCI device remove If a command is aborted in the kernel but not in the adapter, it might be considered complete and its DMA memory released, but it is still alive in the adapter, which will trigger an invalid DMA access upon its completion (in the DMA operations to deliver the command response to the driver). On powerpc platforms with IOMMU/EEH capabilities, the problem is observed during PCI device removal with ongoing IO requests -- which might trigger an EEH event very often, pointing to a 'TCE Request Page Access Error'. In that path, which is qla2x00_remove_one(), the commands are aborted in qla2x00_abort_all_cmds(), which does not perform an abort in the adapter as is done in qla2xxx_eh_abort() for example. So, this patch changes qla2x00_abort_all_cmds() to abort commands in the adapter too, with a call to qla2xxx_eh_abort(), which already implements all the logic to submit abort requests and handle responses. Reported-by: Naresh Bannoth Signed-off-by: Mauricio Faria de Oliveira Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index e5db47443668..567fa080e261 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1456,6 +1456,15 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res) for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) { sp = req->outstanding_cmds[cnt]; if (sp) { + /* Get a reference to the sp and drop the lock. + * The reference ensures this sp->done() call + * - and not the call in qla2xxx_eh_abort() - + * ends the SCSI command (with result 'res'). + */ + sp_get(sp); + spin_unlock_irqrestore(&ha->hardware_lock, flags); + qla2xxx_eh_abort(GET_CMD_SP(sp)); + spin_lock_irqsave(&ha->hardware_lock, flags); req->outstanding_cmds[cnt] = NULL; sp->done(vha, sp, res); } From 5e5ec1759dd663a1d5a2f10930224dd009e500e8 Mon Sep 17 00:00:00 2001 From: Sumit Saxena Date: Wed, 9 Nov 2016 02:59:42 -0800 Subject: [PATCH 9/9] scsi: megaraid_sas: fix macro MEGASAS_IS_LOGICAL to avoid regression This patch will fix regression caused by commit 1e793f6fc0db ("scsi: megaraid_sas: Fix data integrity failure for JBOD (passthrough) devices"). The problem was that the MEGASAS_IS_LOGICAL macro did not have braces and as a result the driver ended up exposing a lot of non-existing SCSI devices (all SCSI commands to channels 1,2,3 were returned as SUCCESS-DID_OK by driver). [mkp: clarified patch description] Fixes: 1e793f6fc0db920400574211c48f9157a37e3945 Reported-by: Jens Axboe CC: stable@vger.kernel.org Signed-off-by: Kashyap Desai Signed-off-by: Sumit Saxena Tested-by: Sumit Saxena Reviewed-by: Tomas Henzl Tested-by: Jens Axboe Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index ca86c885dfaa..3aaea713bf37 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -2233,7 +2233,7 @@ struct megasas_instance_template { }; #define MEGASAS_IS_LOGICAL(scp) \ - (scp->device->channel < MEGASAS_MAX_PD_CHANNELS) ? 0 : 1 + ((scp->device->channel < MEGASAS_MAX_PD_CHANNELS) ? 0 : 1) #define MEGASAS_DEV_INDEX(scp) \ (((scp->device->channel % 2) * MEGASAS_MAX_DEV_PER_CHANNEL) + \