From bf1b4659dc278b68f22b11b2c5fe7e3eb96e75a7 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 28 Jan 2021 14:56:58 +0900 Subject: [PATCH 01/30] scsi: sd: Warn if unsupported ZBC device is probed In sd_probe(), print a warning if CONFIG_BLK_DEV_ZONED is disabled and a TYPE_ZBC device is found. While at it, use IS_ENABLED() to test if CONFIG_BLK_DEV_ZONED is enabled instead using of a #ifdef. Link: https://lore.kernel.org/r/20210128055658.530133-1-damien.lemoal@wdc.com Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index a3d2d4bc4a3d..6e41ecbf4399 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3379,10 +3379,12 @@ static int sd_probe(struct device *dev) sdp->type != TYPE_RBC) goto out; -#ifndef CONFIG_BLK_DEV_ZONED - if (sdp->type == TYPE_ZBC) + if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) && sdp->type == TYPE_ZBC) { + sdev_printk(KERN_WARNING, sdp, + "Unsupported ZBC host-managed device.\n"); goto out; -#endif + } + SCSI_LOG_HLQUEUE(3, sdev_printk(KERN_INFO, sdp, "sd_probe\n")); From e92b0b5edfc7c83bd2d791929aa4e0c89ac029aa Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 4 Feb 2021 17:30:14 +0100 Subject: [PATCH 02/30] scsi: pmcraid: Fix 'ioarcb' alignment warning Building with 'make W=1' enables -Wpacked-not-aligned, and this warns about pmcraid because of incompatible alignment constraints for pmcraid_passthrough_ioctl_buffer: drivers/scsi/pmcraid.h:1044:1: warning: alignment 1 of 'struct pmcraid_passthrough_ioctl_buffer' is less than 32 [-Wpacked-not-aligned] 1044 | } __attribute__ ((packed)); | ^ drivers/scsi/pmcraid.h:1041:24: warning: 'ioarcb' offset 16 in 'struct pmcraid_passthrough_ioctl_buffer' isn't aligned to 32 [-Wpacked-not-aligned] 1041 | struct pmcraid_ioarcb ioarcb; The inner structure is documented as having 32 byte alignment here, but is starts at a 16 byte offset in the outer structure, so it's never actually aligned, as the outer structure is also marked 'packed'. Lee Jones point this out as one of the last files that need to be changed before the warning can be enabled by default. Change the annotations in a way that avoids the warning but leaves the layout unchanged, by removing the packing on the inner structure and adding it to the outer one. The one-byte request_buffer[] array should have been a flexible array member here, which is how I change it to avoid extra padding from the alignment attribute. Link: https://lore.kernel.org/r/20210204163020.3286210-1-arnd@kernel.org Cc: Lee Jones Reviewed-by: Lee Jones Signed-off-by: Arnd Bergmann Signed-off-by: Martin K. Petersen --- drivers/scsi/pmcraid.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/pmcraid.h b/drivers/scsi/pmcraid.h index 15c962108075..6d36debde18e 100644 --- a/drivers/scsi/pmcraid.h +++ b/drivers/scsi/pmcraid.h @@ -244,7 +244,7 @@ struct pmcraid_ioarcb { __u8 hrrq_id; __u8 cdb[PMCRAID_MAX_CDB_LEN]; struct pmcraid_ioarcb_add_data add_data; -} __attribute__((packed, aligned(PMCRAID_IOARCB_ALIGNMENT))); +}; /* well known resource handle values */ #define PMCRAID_IOA_RES_HANDLE 0xffffffff @@ -1040,8 +1040,8 @@ struct pmcraid_passthrough_ioctl_buffer { struct pmcraid_ioctl_header ioctl_header; struct pmcraid_ioarcb ioarcb; struct pmcraid_ioasa ioasa; - u8 request_buffer[1]; -} __attribute__ ((packed)); + u8 request_buffer[]; +} __attribute__ ((packed, aligned(PMCRAID_IOARCB_ALIGNMENT))); /* * keys to differentiate between driver handled IOCTLs and passthrough From d309ae07327d19ce613629a0535e9a11a8ff5127 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Mon, 1 Feb 2021 19:45:22 +0530 Subject: [PATCH 03/30] scsi: mpt3sas: Fix ReplyPostFree pool allocation Currently the driver allocates memory for ReplyPostFree queues in chunks of 16. In resource constrained environments--such as VM with 1 GB RAM and 2 CPUs--memory allocation for ReplyPostFree pools may fail because the driver tries to allocate a memory for 16 ReplyPostFree queues even though the actual number needed is 2. Change the driver to allocate memory for only the actual number of queues needed if the ReplyPostFree queue count is less than 16. Link: https://lore.kernel.org/r/20210201141522.25363-1-sreekanth.reddy@broadcom.com Signed-off-by: Sreekanth Reddy Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index f5582c8e77c9..e2455b9c575e 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -5641,7 +5641,8 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc) reply_post_free_sz = ioc->reply_post_queue_depth * sizeof(Mpi2DefaultReplyDescriptor_t); rdpq_sz = reply_post_free_sz * RDPQ_MAX_INDEX_IN_ONE_CHUNK; - if (_base_is_controller_msix_enabled(ioc) && !ioc->rdpq_array_enable) + if ((_base_is_controller_msix_enabled(ioc) && !ioc->rdpq_array_enable) + || (ioc->reply_queue_count < RDPQ_MAX_INDEX_IN_ONE_CHUNK)) rdpq_sz = reply_post_free_sz * ioc->reply_queue_count; ret = base_alloc_rdpq_dma_pool(ioc, rdpq_sz); if (ret == -EAGAIN) { From 664f0dce20580837c7fa136a03b3a9fc43034104 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 2 Feb 2021 15:28:32 +0530 Subject: [PATCH 04/30] scsi: mpt3sas: Add support for shared host tagset for CPU hotplug MPT Fusion adapters can steer completions to individual queues and we now have support for shared host-wide tags in the I/O stack. The addition of the host-wide tags allows us to enable multiqueue support for MPT Fusion adapters. Once host-wise tags are enabled, the CPU hotplug feature is also supported. Allow use of host-wide tags to be disabled through the "host_tagset_enable" module parameter. Once we do not have any major performance regressions using host-wide tags, we will drop the hand-crafted interrupt affinity settings. Performance is meeting expectations. About 3.1M IOPS using 24 Drive SSD on Aero controllers. Link: https://lore.kernel.org/r/20210202095832.23072-1-sreekanth.reddy@broadcom.com Reported-by: kernel test robot Signed-off-by: Sreekanth Reddy Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 52 ++++++++++++++++++---------- drivers/scsi/mpt3sas/mpt3sas_base.h | 1 + drivers/scsi/mpt3sas/mpt3sas_scsih.c | 42 +++++++++++++++++++++- 3 files changed, 76 insertions(+), 19 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index e2455b9c575e..35078fedef54 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -3648,25 +3648,16 @@ _base_get_msix_index(struct MPT3SAS_ADAPTER *ioc, base_mod64(atomic64_add_return(1, &ioc->total_io_cnt), ioc->reply_queue_count) : 0; + if (scmd && ioc->shost->nr_hw_queues > 1) { + u32 tag = blk_mq_unique_tag(scmd->request); + + return blk_mq_unique_tag_to_hwq(tag) + + ioc->high_iops_queues; + } + return ioc->cpu_msix_table[raw_smp_processor_id()]; } -/** - * _base_sdev_nr_inflight_request -get number of inflight requests - * of a request queue. - * @q: request_queue object - * - * returns number of inflight request of a request queue. - */ -inline unsigned long -_base_sdev_nr_inflight_request(struct request_queue *q) -{ - struct blk_mq_hw_ctx *hctx = q->queue_hw_ctx[0]; - - return atomic_read(&hctx->nr_active); -} - - /** * _base_get_high_iops_msix_index - get the msix index of * high iops queues @@ -3686,7 +3677,8 @@ _base_get_high_iops_msix_index(struct MPT3SAS_ADAPTER *ioc, * reply queues in terms of batch count 16 when outstanding * IOs on the target device is >=8. */ - if (_base_sdev_nr_inflight_request(scmd->device->request_queue) > + + if (atomic_read(&scmd->device->device_busy) > MPT3SAS_DEVICE_HIGH_IOPS_DEPTH) return base_mod64(( atomic64_add_return(1, &ioc->high_iops_outstanding) / @@ -3739,8 +3731,23 @@ mpt3sas_base_get_smid_scsiio(struct MPT3SAS_ADAPTER *ioc, u8 cb_idx, struct scsi_cmnd *scmd) { struct scsiio_tracker *request = scsi_cmd_priv(scmd); - unsigned int tag = scmd->request->tag; u16 smid; + u32 tag, unique_tag; + + unique_tag = blk_mq_unique_tag(scmd->request); + tag = blk_mq_unique_tag_to_tag(unique_tag); + + /* + * Store hw queue number corresponding to the tag. + * This hw queue number is used later to determine + * the unique_tag using the logic below. This unique_tag + * is used to retrieve the scmd pointer corresponding + * to tag using scsi_host_find_tag() API. + * + * tag = smid - 1; + * unique_tag = ioc->io_queue_num[tag] << BLK_MQ_UNIQUE_TAG_BITS | tag; + */ + ioc->io_queue_num[tag] = blk_mq_unique_tag_to_hwq(unique_tag); smid = tag + 1; request->cb_idx = cb_idx; @@ -3831,6 +3838,7 @@ mpt3sas_base_free_smid(struct MPT3SAS_ADAPTER *ioc, u16 smid) mpt3sas_base_clear_st(ioc, st); _base_recovery_check(ioc); + ioc->io_queue_num[smid - 1] = 0; return; } @@ -5362,6 +5370,9 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc) kfree(ioc->chain_lookup); ioc->chain_lookup = NULL; } + + kfree(ioc->io_queue_num); + ioc->io_queue_num = NULL; } /** @@ -5773,6 +5784,11 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc) ioc_info(ioc, "internal(0x%p): depth(%d), start smid(%d)\n", ioc->internal, ioc->internal_depth, ioc->internal_smid)); + + ioc->io_queue_num = kcalloc(ioc->scsiio_depth, + sizeof(u16), GFP_KERNEL); + if (!ioc->io_queue_num) + goto out; /* * The number of NVMe page sized blocks needed is: * (((sg_tablesize * 8) - 1) / (page_size - 8)) + 1 diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index 2def7a340616..2eb94e477b3d 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -1439,6 +1439,7 @@ struct MPT3SAS_ADAPTER { spinlock_t scsi_lookup_lock; int pending_io_count; wait_queue_head_t reset_wq; + u16 *io_queue_num; /* PCIe SGL */ struct dma_pool *pcie_sgl_dma_pool; diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index c8b09a81834d..a665d1c93061 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include "mpt3sas_base.h" @@ -168,6 +169,11 @@ MODULE_PARM_DESC(multipath_on_hba, "\t SAS 2.0 & SAS 3.0 HBA - This will be disabled,\n\t\t" "\t SAS 3.5 HBA - This will be enabled)"); +static int host_tagset_enable = 1; +module_param(host_tagset_enable, int, 0444); +MODULE_PARM_DESC(host_tagset_enable, + "Shared host tagset enable/disable Default: enable(1)"); + /* raid transport support */ static struct raid_template *mpt3sas_raid_template; static struct raid_template *mpt2sas_raid_template; @@ -1743,10 +1749,12 @@ mpt3sas_scsih_scsi_lookup_get(struct MPT3SAS_ADAPTER *ioc, u16 smid) struct scsi_cmnd *scmd = NULL; struct scsiio_tracker *st; Mpi25SCSIIORequest_t *mpi_request; + u16 tag = smid - 1; if (smid > 0 && smid <= ioc->scsiio_depth - INTERNAL_SCSIIO_CMDS_COUNT) { - u32 unique_tag = smid - 1; + u32 unique_tag = + ioc->io_queue_num[tag] << BLK_MQ_UNIQUE_TAG_BITS | tag; mpi_request = mpt3sas_base_get_msg_frame(ioc, smid); @@ -11599,6 +11607,22 @@ scsih_scan_finished(struct Scsi_Host *shost, unsigned long time) return 1; } +/** + * scsih_map_queues - map reply queues with request queues + * @shost: SCSI host pointer + */ +static int scsih_map_queues(struct Scsi_Host *shost) +{ + struct MPT3SAS_ADAPTER *ioc = + (struct MPT3SAS_ADAPTER *)shost->hostdata; + + if (ioc->shost->nr_hw_queues == 1) + return 0; + + return blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], + ioc->pdev, ioc->high_iops_queues); +} + /* shost template for SAS 2.0 HBA devices */ static struct scsi_host_template mpt2sas_driver_template = { .module = THIS_MODULE, @@ -11666,6 +11690,7 @@ static struct scsi_host_template mpt3sas_driver_template = { .sdev_attrs = mpt3sas_dev_attrs, .track_queue_depth = 1, .cmd_size = sizeof(struct scsiio_tracker), + .map_queues = scsih_map_queues, }; /* raid transport support for SAS 3.0 HBA devices */ @@ -12028,6 +12053,21 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id) } else ioc->hide_drives = 0; + shost->host_tagset = 0; + shost->nr_hw_queues = 1; + + if (ioc->is_gen35_ioc && ioc->reply_queue_count > 1 && + host_tagset_enable && ioc->smp_affinity_enable) { + + shost->host_tagset = 1; + shost->nr_hw_queues = + ioc->reply_queue_count - ioc->high_iops_queues; + + dev_info(&ioc->pdev->dev, + "Max SCSIIO MPT commands: %d shared with nr_hw_queues = %d\n", + shost->can_queue, shost->nr_hw_queues); + } + rv = scsi_add_host(shost, &pdev->dev); if (rv) { ioc_err(ioc, "failure at %s:%d/%s()!\n", From 688c1a0a130ba33ebfbb45bfe2bbe151e48d385f Mon Sep 17 00:00:00 2001 From: Suganath Prabu S Date: Thu, 4 Feb 2021 09:07:23 +0530 Subject: [PATCH 05/30] scsi: mpt3sas: Additional diagnostic buffer query interface When a host trace buffer is released, applications never know for what reason the buffer is released. Add a new IOCTL MPT3ADDNLDIAGQUERY to provide the trigger information due to which the diag buffer is released. Link: https://lore.kernel.org/r/20210204033724.1345-2-suganath-prabu.subramani@broadcom.com Signed-off-by: Suganath Prabu S Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 5 +- drivers/scsi/mpt3sas/mpt3sas_base.h | 47 +++++++++++++++ drivers/scsi/mpt3sas/mpt3sas_ctl.c | 67 ++++++++++++++++++++- drivers/scsi/mpt3sas/mpt3sas_ctl.h | 22 +++++++ drivers/scsi/mpt3sas/mpt3sas_scsih.c | 2 + drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c | 38 +++++++++++- 6 files changed, 177 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 35078fedef54..ac066f86bb14 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -8191,8 +8191,11 @@ mpt3sas_base_hard_reset_handler(struct MPT3SAS_ADAPTER *ioc, ioc_state = mpt3sas_base_get_iocstate(ioc, 0); if ((ioc_state & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT || (ioc_state & MPI2_IOC_STATE_MASK) == - MPI2_IOC_STATE_COREDUMP) + MPI2_IOC_STATE_COREDUMP) { is_fault = 1; + ioc->htb_rel.trigger_info_dwords[1] = + (ioc_state & MPI2_DOORBELL_DATA_MASK); + } } _base_pre_reset_handler(ioc); mpt3sas_wait_for_commands_to_complete(ioc); diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index 2eb94e477b3d..b0f1d6738a6c 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -1073,6 +1073,50 @@ struct hba_port { #define MULTIPATH_DISABLED_PORT_ID 0xFF +/** + * struct htb_rel_query - diagnostic buffer release reason + * @unique_id - unique id associated with this buffer. + * @buffer_rel_condition - Release condition ioctl/sysfs/reset + * @reserved + * @trigger_type - Master/Event/scsi/MPI + * @trigger_info_dwords - Data Correspondig to trigger type + */ +struct htb_rel_query { + u16 buffer_rel_condition; + u16 reserved; + u32 trigger_type; + u32 trigger_info_dwords[2]; +}; + +/* Buffer_rel_condition bit fields */ + +/* Bit 0 - Diag Buffer not Released */ +#define MPT3_DIAG_BUFFER_NOT_RELEASED (0x00) +/* Bit 0 - Diag Buffer Released */ +#define MPT3_DIAG_BUFFER_RELEASED (0x01) + +/* + * Bit 1 - Diag Buffer Released by IOCTL, + * This bit is valid only if Bit 0 is one + */ +#define MPT3_DIAG_BUFFER_REL_IOCTL (0x02 | MPT3_DIAG_BUFFER_RELEASED) + +/* + * Bit 2 - Diag Buffer Released by Trigger, + * This bit is valid only if Bit 0 is one + */ +#define MPT3_DIAG_BUFFER_REL_TRIGGER (0x04 | MPT3_DIAG_BUFFER_RELEASED) + +/* + * Bit 3 - Diag Buffer Released by SysFs, + * This bit is valid only if Bit 0 is one + */ +#define MPT3_DIAG_BUFFER_REL_SYSFS (0x08 | MPT3_DIAG_BUFFER_RELEASED) + +/* DIAG RESET Master trigger flags */ +#define MPT_DIAG_RESET_ISSUED_BY_DRIVER 0x00000000 +#define MPT_DIAG_RESET_ISSUED_BY_USER 0x00000001 + typedef void (*MPT3SAS_FLUSH_RUNNING_CMDS)(struct MPT3SAS_ADAPTER *ioc); /** * struct MPT3SAS_ADAPTER - per adapter struct @@ -1530,6 +1574,8 @@ struct MPT3SAS_ADAPTER { u32 diagnostic_flags[MPI2_DIAG_BUF_TYPE_COUNT]; u32 ring_buffer_offset; u32 ring_buffer_sz; + struct htb_rel_query htb_rel; + u8 reset_from_user; u8 is_warpdrive; u8 is_mcpu_endpoint; u8 hide_ir_msg; @@ -1566,6 +1612,7 @@ struct mpt3sas_debugfs_buffer { }; #define MPT_DRV_SUPPORT_BITMAP_MEMMOVE 0x00000001 +#define MPT_DRV_SUPPORT_BITMAP_ADDNLQUERY 0x00000002 typedef u8 (*MPT_CALLBACK)(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply); diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c index c8a0ce18f2c5..44f9a05db94e 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c @@ -479,6 +479,8 @@ void mpt3sas_ctl_pre_reset_handler(struct MPT3SAS_ADAPTER *ioc) ioc_info(ioc, "%s: Releasing the trace buffer due to adapter reset.", __func__); + ioc->htb_rel.buffer_rel_condition = + MPT3_DIAG_BUFFER_REL_TRIGGER; mpt3sas_send_diag_release(ioc, i, &issue_reset); } } @@ -1334,6 +1336,7 @@ _ctl_do_reset(struct MPT3SAS_ADAPTER *ioc, void __user *arg) dctlprintk(ioc, ioc_info(ioc, "%s: enter\n", __func__)); + ioc->reset_from_user = 1; retval = mpt3sas_base_hard_reset_handler(ioc, FORCE_BIG_HAMMER); ioc_info(ioc, "Ioctl: host reset: %s\n", ((!retval) ? "SUCCESS" : "FAILED")); @@ -1687,6 +1690,9 @@ _ctl_diag_register_2(struct MPT3SAS_ADAPTER *ioc, request_data = ioc->diag_buffer[buffer_type]; request_data_sz = diag_register->requested_buffer_size; ioc->unique_id[buffer_type] = diag_register->unique_id; + /* Reset ioc variables used for additional query commands */ + ioc->reset_from_user = 0; + memset(&ioc->htb_rel, 0, sizeof(struct htb_rel_query)); ioc->diag_buffer_status[buffer_type] &= MPT3_DIAG_BUFFER_IS_DRIVER_ALLOCATED; memcpy(ioc->product_specific[buffer_type], @@ -2469,7 +2475,61 @@ _ctl_diag_read_buffer(struct MPT3SAS_ADAPTER *ioc, void __user *arg) return rc; } +/** + * _ctl_addnl_diag_query - query relevant info associated with diag buffers + * @ioc: per adapter object + * @arg: user space buffer containing ioctl content + * + * The application will send only unique_id. Driver will + * inspect unique_id first, if valid, fill the details related to cause + * for diag buffer release. + */ +static long +_ctl_addnl_diag_query(struct MPT3SAS_ADAPTER *ioc, void __user *arg) +{ + struct mpt3_addnl_diag_query karg; + u32 buffer_type = 0; + if (copy_from_user(&karg, arg, sizeof(karg))) { + pr_err("%s: failure at %s:%d/%s()!\n", + ioc->name, __FILE__, __LINE__, __func__); + return -EFAULT; + } + dctlprintk(ioc, ioc_info(ioc, "%s\n", __func__)); + if (karg.unique_id == 0) { + ioc_err(ioc, "%s: unique_id is(0x%08x)\n", + __func__, karg.unique_id); + return -EPERM; + } + buffer_type = _ctl_diag_get_bufftype(ioc, karg.unique_id); + if (buffer_type == MPT3_DIAG_UID_NOT_FOUND) { + ioc_err(ioc, "%s: buffer with unique_id(0x%08x) not found\n", + __func__, karg.unique_id); + return -EPERM; + } + memset(&karg.buffer_rel_condition, 0, sizeof(struct htb_rel_query)); + if ((ioc->diag_buffer_status[buffer_type] & + MPT3_DIAG_BUFFER_IS_REGISTERED) == 0) { + ioc_info(ioc, "%s: buffer_type(0x%02x) is not registered\n", + __func__, buffer_type); + goto out; + } + if ((ioc->diag_buffer_status[buffer_type] & + MPT3_DIAG_BUFFER_IS_RELEASED) == 0) { + ioc_err(ioc, "%s: buffer_type(0x%02x) is not released\n", + __func__, buffer_type); + return -EPERM; + } + memcpy(&karg.buffer_rel_condition, &ioc->htb_rel, + sizeof(struct htb_rel_query)); +out: + if (copy_to_user(arg, &karg, sizeof(struct mpt3_addnl_diag_query))) { + ioc_err(ioc, "%s: unable to write mpt3_addnl_diag_query data @ %p\n", + __func__, arg); + return -EFAULT; + } + return 0; +} #ifdef CONFIG_COMPAT /** @@ -2533,7 +2593,7 @@ _ctl_ioctl_main(struct file *file, unsigned int cmd, void __user *arg, struct MPT3SAS_ADAPTER *ioc; struct mpt3_ioctl_header ioctl_header; enum block_state state; - long ret = -EINVAL; + long ret = -ENOIOCTLCMD; /* get IOCTL header */ if (copy_from_user(&ioctl_header, (char __user *)arg, @@ -2643,6 +2703,10 @@ _ctl_ioctl_main(struct file *file, unsigned int cmd, void __user *arg, if (_IOC_SIZE(cmd) == sizeof(struct mpt3_diag_read_buffer)) ret = _ctl_diag_read_buffer(ioc, arg); break; + case MPT3ADDNLDIAGQUERY: + if (_IOC_SIZE(cmd) == sizeof(struct mpt3_addnl_diag_query)) + ret = _ctl_addnl_diag_query(ioc, arg); + break; default: dctlprintk(ioc, ioc_info(ioc, "unsupported ioctl opcode(0x%08x)\n", @@ -3425,6 +3489,7 @@ host_trace_buffer_enable_store(struct device *cdev, MPT3_DIAG_BUFFER_IS_RELEASED)) goto out; ioc_info(ioc, "releasing host trace buffer\n"); + ioc->htb_rel.buffer_rel_condition = MPT3_DIAG_BUFFER_REL_SYSFS; mpt3sas_send_diag_release(ioc, MPI2_DIAG_BUF_TYPE_TRACE, &issue_reset); } diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.h b/drivers/scsi/mpt3sas/mpt3sas_ctl.h index 0f7aa4ddade0..d2ccdafb8df2 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.h +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.h @@ -94,6 +94,8 @@ struct mpt3_diag_query) #define MPT3DIAGREADBUFFER _IOWR(MPT3_MAGIC_NUMBER, 30, \ struct mpt3_diag_read_buffer) +#define MPT3ADDNLDIAGQUERY _IOWR(MPT3_MAGIC_NUMBER, 32, \ + struct mpt3_addnl_diag_query) /* Trace Buffer default UniqueId */ #define MPT2DIAGBUFFUNIQUEID (0x07075900) @@ -430,4 +432,24 @@ struct mpt3_diag_read_buffer { uint32_t diagnostic_data[1]; }; +/** + * struct mpt3_addnl_diag_query - diagnostic buffer release reason + * @hdr - generic header + * @unique_id - unique id associated with this buffer. + * @buffer_rel_condition - Release condition ioctl/sysfs/reset + * @reserved1 + * @trigger_type - Master/Event/scsi/MPI + * @trigger_info_dwords - Data Correspondig to trigger type + * @reserved2 + */ +struct mpt3_addnl_diag_query { + struct mpt3_ioctl_header hdr; + uint32_t unique_id; + uint16_t buffer_rel_condition; + uint16_t reserved1; + uint32_t trigger_type; + uint32_t trigger_info_dwords[2]; + uint32_t reserved2[2]; +}; + #endif /* MPT3SAS_CTL_H_INCLUDED */ diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index a665d1c93061..ffca03064797 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -11947,6 +11947,8 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id) * Enable MEMORY MOVE support flag. */ ioc->drv_support_bitmap |= MPT_DRV_SUPPORT_BITMAP_MEMMOVE; + /* Enable ADDITIONAL QUERY support flag. */ + ioc->drv_support_bitmap |= MPT_DRV_SUPPORT_BITMAP_ADDNLQUERY; ioc->enable_sdev_max_qd = enable_sdev_max_qd; diff --git a/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c b/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c index 8ec9bab20ec4..d9b7d0ee25b0 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c +++ b/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c @@ -132,6 +132,35 @@ mpt3sas_process_trigger_data(struct MPT3SAS_ADAPTER *ioc, &issue_reset); } + ioc->htb_rel.buffer_rel_condition = MPT3_DIAG_BUFFER_REL_TRIGGER; + if (event_data) { + ioc->htb_rel.trigger_type = event_data->trigger_type; + switch (event_data->trigger_type) { + case MPT3SAS_TRIGGER_SCSI: + memcpy(&ioc->htb_rel.trigger_info_dwords, + &event_data->u.scsi, + sizeof(struct SL_WH_SCSI_TRIGGER_T)); + break; + case MPT3SAS_TRIGGER_MPI: + memcpy(&ioc->htb_rel.trigger_info_dwords, + &event_data->u.mpi, + sizeof(struct SL_WH_MPI_TRIGGER_T)); + break; + case MPT3SAS_TRIGGER_MASTER: + ioc->htb_rel.trigger_info_dwords[0] = + event_data->u.master.MasterData; + break; + case MPT3SAS_TRIGGER_EVENT: + memcpy(&ioc->htb_rel.trigger_info_dwords, + &event_data->u.event, + sizeof(struct SL_WH_EVENT_TRIGGER_T)); + break; + default: + ioc_err(ioc, "%d - Is not a valid Trigger type\n", + event_data->trigger_type); + break; + } + } _mpt3sas_raise_sigio(ioc, event_data); dTriggerDiagPrintk(ioc, ioc_info(ioc, "%s: exit\n", @@ -201,9 +230,14 @@ mpt3sas_trigger_master(struct MPT3SAS_ADAPTER *ioc, u32 trigger_bitmask) event_data.u.master.MasterData = trigger_bitmask; if (trigger_bitmask & MASTER_TRIGGER_FW_FAULT || - trigger_bitmask & MASTER_TRIGGER_ADAPTER_RESET) + trigger_bitmask & MASTER_TRIGGER_ADAPTER_RESET) { + ioc->htb_rel.trigger_type = MPT3SAS_TRIGGER_MASTER; + ioc->htb_rel.trigger_info_dwords[0] = trigger_bitmask; + if (ioc->reset_from_user) + ioc->htb_rel.trigger_info_dwords[1] = + MPT_DIAG_RESET_ISSUED_BY_USER; _mpt3sas_raise_sigio(ioc, &event_data); - else + } else mpt3sas_send_trigger_data_event(ioc, &event_data); out: From 446b5f3d3fd5545029525a7a3881ecd9dd246976 Mon Sep 17 00:00:00 2001 From: Suganath Prabu S Date: Thu, 4 Feb 2021 09:07:24 +0530 Subject: [PATCH 06/30] scsi: mpt3sas: Update driver version to 37.100.00.00 Update driver version to 37.100.00.00. Link: https://lore.kernel.org/r/20210204033724.1345-3-suganath-prabu.subramani@broadcom.com Signed-off-by: Suganath Prabu S Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index b0f1d6738a6c..315aee6ef86f 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -77,8 +77,8 @@ #define MPT3SAS_DRIVER_NAME "mpt3sas" #define MPT3SAS_AUTHOR "Avago Technologies " #define MPT3SAS_DESCRIPTION "LSI MPT Fusion SAS 3.0 Device Driver" -#define MPT3SAS_DRIVER_VERSION "36.100.00.00" -#define MPT3SAS_MAJOR_VERSION 36 +#define MPT3SAS_DRIVER_VERSION "37.100.00.00" +#define MPT3SAS_MAJOR_VERSION 37 #define MPT3SAS_MINOR_VERSION 100 #define MPT3SAS_BUILD_VERSION 0 #define MPT3SAS_RELEASE_VERSION 00 From 762a8ea515f57166697f45880b8a41162f63ccd9 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 3 Feb 2021 09:49:52 +0800 Subject: [PATCH 07/30] scsi: target: sbp: Remove unneeded semicolon Eliminate the following coccicheck warning: ./drivers/target/sbp/sbp_target.c:1009:2-3: Unneeded semicolon Link: https://lore.kernel.org/r/1612316992-71443-1-git-send-email-yang.lee@linux.alibaba.com Reported-by: Abaci Robot Reviewed-by: Chaitanya Kulkarni Signed-off-by: Yang Li Signed-off-by: Martin K. Petersen --- drivers/target/sbp/sbp_target.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index e4a9b9fe3dfb..2a6165febd3b 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -1006,7 +1006,7 @@ static void tgt_agent_fetch_work(struct work_struct *work) agent->state = AGENT_STATE_SUSPENDED; spin_unlock_bh(&agent->lock); - }; + } } static struct sbp_target_agent *sbp_target_agent_register( From 960204ecca5e71a7ef4319a2e94eed2279da1624 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 3 Feb 2021 10:26:30 +0800 Subject: [PATCH 08/30] scsi: qla2xxx: Simplify if statement Fix the following coccicheck warnings: ./drivers/scsi/qla2xxx/qla_target.c:984:12-14: WARNING !A || A && B is equivalent to !A || B. Link: https://lore.kernel.org/r/1612319190-111421-1-git-send-email-jiapeng.chong@linux.alibaba.com Reported-by: Abaci Robot Reviewed-by: Himanshu Madhani Signed-off-by: Jiapeng Chong Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_target.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 0d09480b66cd..c48daf52725d 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -981,8 +981,7 @@ void qlt_free_session_done(struct work_struct *work) int rc; if (!own || - (own && - (own->iocb.u.isp24.status_subcode == ELS_PLOGI))) { + (own->iocb.u.isp24.status_subcode == ELS_PLOGI)) { rc = qla2x00_post_async_logout_work(vha, sess, NULL); if (rc != QLA_SUCCESS) From bafd09f8d8ec0ab33c57bb919f95436175814ee5 Mon Sep 17 00:00:00 2001 From: DooHyun Hwang Date: Wed, 3 Feb 2021 19:14:43 +0900 Subject: [PATCH 09/30] scsi: ufs: Print the counter of each event history Print event counter after dumping the event history. Link: https://lore.kernel.org/r/20210203101443.28934-1-dh0421.hwang@samsung.com Reviewed-by: Avri Altman Signed-off-by: DooHyun Hwang Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 45624c757cd1..80620c866192 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -451,6 +451,8 @@ static void ufshcd_print_evt(struct ufs_hba *hba, u32 id, if (!found) dev_err(hba->dev, "No record of %s\n", err_name); + else + dev_err(hba->dev, "%s: total cnt=%llu\n", err_name, e->cnt); } static void ufshcd_print_evt_hist(struct ufs_hba *hba) @@ -1866,7 +1868,7 @@ static ssize_t ufshcd_clkgate_delay_show(struct device *dev, { struct ufs_hba *hba = dev_get_drvdata(dev); - return snprintf(buf, PAGE_SIZE, "%lu\n", hba->clk_gating.delay_ms); + return sysfs_emit(buf, "%lu\n", hba->clk_gating.delay_ms); } static ssize_t ufshcd_clkgate_delay_store(struct device *dev, @@ -1889,7 +1891,7 @@ static ssize_t ufshcd_clkgate_enable_show(struct device *dev, { struct ufs_hba *hba = dev_get_drvdata(dev); - return snprintf(buf, PAGE_SIZE, "%d\n", hba->clk_gating.is_enabled); + return sysfs_emit(buf, "%d\n", hba->clk_gating.is_enabled); } static ssize_t ufshcd_clkgate_enable_store(struct device *dev, From 70ae13abd5d054d32a68a35a81fd4f37f88fc7e1 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Thu, 4 Feb 2021 15:48:35 +0800 Subject: [PATCH 10/30] scsi: isci: Remove redundant initialization of variable 'status' This patch removes unneeded return variables. It fixes the following warning detected by coccinelle: ./drivers/scsi/isci/request.c:1483:17-23: Unneeded variable: "status". Return "SCI_SUCCESS" on line 1503 ./drivers/scsi/isci/request.c:2157:17-23: Unneeded variable: "status". Return "SCI_SUCCESS" on line 2177 Link: https://lore.kernel.org/r/1612424915-106608-1-git-send-email-yang.lee@linux.alibaba.com Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Martin K. Petersen --- drivers/scsi/isci/request.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c index bee16850b236..58e62162882f 100644 --- a/drivers/scsi/isci/request.c +++ b/drivers/scsi/isci/request.c @@ -1480,8 +1480,6 @@ static enum sci_status stp_request_pio_await_h2d_completion_tc_event(struct isci_request *ireq, u32 completion_code) { - enum sci_status status = SCI_SUCCESS; - switch (SCU_GET_COMPLETION_TL_STATUS(completion_code)) { case SCU_MAKE_COMPLETION_STATUS(SCU_TASK_DONE_GOOD): ireq->scu_status = SCU_TASK_DONE_GOOD; @@ -1500,7 +1498,7 @@ stp_request_pio_await_h2d_completion_tc_event(struct isci_request *ireq, break; } - return status; + return SCI_SUCCESS; } static enum sci_status @@ -2152,8 +2150,6 @@ static enum sci_status stp_request_udma_await_tc_event(struct isci_request *ireq static enum sci_status atapi_raw_completion(struct isci_request *ireq, u32 completion_code, enum sci_base_request_states next) { - enum sci_status status = SCI_SUCCESS; - switch (SCU_GET_COMPLETION_TL_STATUS(completion_code)) { case SCU_MAKE_COMPLETION_STATUS(SCU_TASK_DONE_GOOD): ireq->scu_status = SCU_TASK_DONE_GOOD; @@ -2172,7 +2168,7 @@ static enum sci_status atapi_raw_completion(struct isci_request *ireq, u32 compl break; } - return status; + return SCI_SUCCESS; } static enum sci_status atapi_data_tc_completion_handler(struct isci_request *ireq, From d28d48c699779973ab9a3bd0e5acfa112bd4fdef Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sat, 6 Feb 2021 22:46:00 -0600 Subject: [PATCH 11/30] scsi: libiscsi: Fix iscsi_prep_scsi_cmd_pdu() error handling If iscsi_prep_scsi_cmd_pdu() fails we try to add it back to the cmdqueue, but we leave it partially setup. We don't have functions that can undo the pdu and init task setup. We only have cleanup_task which can clean up both parts. So this has us just fail the cmd and go through the standard cleanup routine and then have the SCSI midlayer retry it like is done when it fails in the queuecommand path. Link: https://lore.kernel.org/r/20210207044608.27585-2-michael.christie@oracle.com Reviewed-by: Lee Duncan Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/libiscsi.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 4e668aafbcca..cee1dbaa1b93 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1532,14 +1532,9 @@ check_mgmt: } rc = iscsi_prep_scsi_cmd_pdu(conn->task); if (rc) { - if (rc == -ENOMEM || rc == -EACCES) { - spin_lock_bh(&conn->taskqueuelock); - list_add_tail(&conn->task->running, - &conn->cmdqueue); - conn->task = NULL; - spin_unlock_bh(&conn->taskqueuelock); - goto done; - } else + if (rc == -ENOMEM || rc == -EACCES) + fail_scsi_task(conn->task, DID_IMM_RETRY); + else fail_scsi_task(conn->task, DID_ABORT); spin_lock_bh(&conn->taskqueuelock); continue; From 5923d64b7ab63dcc6f0df946098f50902f9540d1 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sat, 6 Feb 2021 22:46:01 -0600 Subject: [PATCH 12/30] scsi: libiscsi: Drop taskqueuelock The purpose of the taskqueuelock was to handle the issue where a bad target decides to send a R2T and before its data has been sent decides to send a cmd response to complete the cmd. The following patches fix up the frwd/back locks so they are taken from the queue/xmit (frwd) and completion (back) paths again. To get there this patch removes the taskqueuelock which for iSCSI xmit wq based drivers was taken in the queue, xmit and completion paths. Instead of the lock, we just make sure we have a ref to the task when we queue a R2T, and then we always remove the task from the requeue list in the xmit path or the forced cleanup paths. Link: https://lore.kernel.org/r/20210207044608.27585-3-michael.christie@oracle.com Reviewed-by: Lee Duncan Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/libiscsi.c | 178 +++++++++++++++++++++++------------- drivers/scsi/libiscsi_tcp.c | 86 +++++++++++------ include/scsi/libiscsi.h | 4 +- 3 files changed, 172 insertions(+), 96 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index cee1dbaa1b93..3d74fdd9f31a 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -523,16 +523,6 @@ static void iscsi_complete_task(struct iscsi_task *task, int state) WARN_ON_ONCE(task->state == ISCSI_TASK_FREE); task->state = state; - spin_lock_bh(&conn->taskqueuelock); - if (!list_empty(&task->running)) { - pr_debug_once("%s while task on list", __func__); - list_del_init(&task->running); - } - spin_unlock_bh(&conn->taskqueuelock); - - if (conn->task == task) - conn->task = NULL; - if (READ_ONCE(conn->ping_task) == task) WRITE_ONCE(conn->ping_task, NULL); @@ -564,9 +554,40 @@ void iscsi_complete_scsi_task(struct iscsi_task *task, } EXPORT_SYMBOL_GPL(iscsi_complete_scsi_task); +/* + * Must be called with back and frwd lock + */ +static bool cleanup_queued_task(struct iscsi_task *task) +{ + struct iscsi_conn *conn = task->conn; + bool early_complete = false; + + /* Bad target might have completed task while it was still running */ + if (task->state == ISCSI_TASK_COMPLETED) + early_complete = true; + + if (!list_empty(&task->running)) { + list_del_init(&task->running); + /* + * If it's on a list but still running, this could be from + * a bad target sending a rsp early, cleanup from a TMF, or + * session recovery. + */ + if (task->state == ISCSI_TASK_RUNNING || + task->state == ISCSI_TASK_COMPLETED) + __iscsi_put_task(task); + } + + if (conn->task == task) { + conn->task = NULL; + __iscsi_put_task(task); + } + + return early_complete; +} /* - * session back_lock must be held and if not called for a task that is + * session frwd_lock must be held and if not called for a task that is * still pending or from the xmit thread, then xmit thread must * be suspended. */ @@ -585,6 +606,13 @@ static void fail_scsi_task(struct iscsi_task *task, int err) if (!sc) return; + /* regular RX path uses back_lock */ + spin_lock_bh(&conn->session->back_lock); + if (cleanup_queued_task(task)) { + spin_unlock_bh(&conn->session->back_lock); + return; + } + if (task->state == ISCSI_TASK_PENDING) { /* * cmd never made it to the xmit thread, so we should not count @@ -600,9 +628,6 @@ static void fail_scsi_task(struct iscsi_task *task, int err) sc->result = err << 16; scsi_set_resid(sc, scsi_bufflen(sc)); - - /* regular RX path uses back_lock */ - spin_lock_bh(&conn->session->back_lock); iscsi_complete_task(task, state); spin_unlock_bh(&conn->session->back_lock); } @@ -748,9 +773,7 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, if (session->tt->xmit_task(task)) goto free_task; } else { - spin_lock_bh(&conn->taskqueuelock); list_add_tail(&task->running, &conn->mgmtqueue); - spin_unlock_bh(&conn->taskqueuelock); iscsi_conn_queue_work(conn); } @@ -1411,31 +1434,61 @@ static int iscsi_check_cmdsn_window_closed(struct iscsi_conn *conn) return 0; } -static int iscsi_xmit_task(struct iscsi_conn *conn) +static int iscsi_xmit_task(struct iscsi_conn *conn, struct iscsi_task *task, + bool was_requeue) { - struct iscsi_task *task = conn->task; int rc; - if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) - return -ENODATA; - spin_lock_bh(&conn->session->back_lock); - if (conn->task == NULL) { + + if (!conn->task) { + /* Take a ref so we can access it after xmit_task() */ + __iscsi_get_task(task); + } else { + /* Already have a ref from when we failed to send it last call */ + conn->task = NULL; + } + + /* + * If this was a requeue for a R2T we have an extra ref on the task in + * case a bad target sends a cmd rsp before we have handled the task. + */ + if (was_requeue) + __iscsi_put_task(task); + + /* + * Do this after dropping the extra ref because if this was a requeue + * it's removed from that list and cleanup_queued_task would miss it. + */ + if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) { + /* + * Save the task and ref in case we weren't cleaning up this + * task and get woken up again. + */ + conn->task = task; spin_unlock_bh(&conn->session->back_lock); return -ENODATA; } - __iscsi_get_task(task); spin_unlock_bh(&conn->session->back_lock); + spin_unlock_bh(&conn->session->frwd_lock); rc = conn->session->tt->xmit_task(task); spin_lock_bh(&conn->session->frwd_lock); if (!rc) { /* done with this task */ task->last_xfer = jiffies; - conn->task = NULL; } /* regular RX path uses back_lock */ spin_lock(&conn->session->back_lock); + if (rc && task->state == ISCSI_TASK_RUNNING) { + /* + * get an extra ref that is released next time we access it + * as conn->task above. + */ + __iscsi_get_task(task); + conn->task = task; + } + __iscsi_put_task(task); spin_unlock(&conn->session->back_lock); return rc; @@ -1445,9 +1498,7 @@ static int iscsi_xmit_task(struct iscsi_conn *conn) * iscsi_requeue_task - requeue task to run from session workqueue * @task: task to requeue * - * LLDs that need to run a task from the session workqueue should call - * this. The session frwd_lock must be held. This should only be called - * by software drivers. + * Callers must have taken a ref to the task that is going to be requeued. */ void iscsi_requeue_task(struct iscsi_task *task) { @@ -1457,11 +1508,18 @@ void iscsi_requeue_task(struct iscsi_task *task) * this may be on the requeue list already if the xmit_task callout * is handling the r2ts while we are adding new ones */ - spin_lock_bh(&conn->taskqueuelock); - if (list_empty(&task->running)) + spin_lock_bh(&conn->session->frwd_lock); + if (list_empty(&task->running)) { list_add_tail(&task->running, &conn->requeue); - spin_unlock_bh(&conn->taskqueuelock); + } else { + /* + * Don't need the extra ref since it's already requeued and + * has a ref. + */ + iscsi_put_task(task); + } iscsi_conn_queue_work(conn); + spin_unlock_bh(&conn->session->frwd_lock); } EXPORT_SYMBOL_GPL(iscsi_requeue_task); @@ -1487,7 +1545,7 @@ static int iscsi_data_xmit(struct iscsi_conn *conn) } if (conn->task) { - rc = iscsi_xmit_task(conn); + rc = iscsi_xmit_task(conn, conn->task, false); if (rc) goto done; } @@ -1497,49 +1555,41 @@ static int iscsi_data_xmit(struct iscsi_conn *conn) * only have one nop-out as a ping from us and targets should not * overflow us with nop-ins */ - spin_lock_bh(&conn->taskqueuelock); check_mgmt: while (!list_empty(&conn->mgmtqueue)) { - conn->task = list_entry(conn->mgmtqueue.next, - struct iscsi_task, running); - list_del_init(&conn->task->running); - spin_unlock_bh(&conn->taskqueuelock); - if (iscsi_prep_mgmt_task(conn, conn->task)) { + task = list_entry(conn->mgmtqueue.next, struct iscsi_task, + running); + list_del_init(&task->running); + if (iscsi_prep_mgmt_task(conn, task)) { /* regular RX path uses back_lock */ spin_lock_bh(&conn->session->back_lock); - __iscsi_put_task(conn->task); + __iscsi_put_task(task); spin_unlock_bh(&conn->session->back_lock); - conn->task = NULL; - spin_lock_bh(&conn->taskqueuelock); continue; } - rc = iscsi_xmit_task(conn); + rc = iscsi_xmit_task(conn, task, false); if (rc) goto done; - spin_lock_bh(&conn->taskqueuelock); } /* process pending command queue */ while (!list_empty(&conn->cmdqueue)) { - conn->task = list_entry(conn->cmdqueue.next, struct iscsi_task, - running); - list_del_init(&conn->task->running); - spin_unlock_bh(&conn->taskqueuelock); + task = list_entry(conn->cmdqueue.next, struct iscsi_task, + running); + list_del_init(&task->running); if (conn->session->state == ISCSI_STATE_LOGGING_OUT) { - fail_scsi_task(conn->task, DID_IMM_RETRY); - spin_lock_bh(&conn->taskqueuelock); + fail_scsi_task(task, DID_IMM_RETRY); continue; } - rc = iscsi_prep_scsi_cmd_pdu(conn->task); + rc = iscsi_prep_scsi_cmd_pdu(task); if (rc) { if (rc == -ENOMEM || rc == -EACCES) - fail_scsi_task(conn->task, DID_IMM_RETRY); + fail_scsi_task(task, DID_IMM_RETRY); else - fail_scsi_task(conn->task, DID_ABORT); - spin_lock_bh(&conn->taskqueuelock); + fail_scsi_task(task, DID_ABORT); continue; } - rc = iscsi_xmit_task(conn); + rc = iscsi_xmit_task(conn, task, false); if (rc) goto done; /* @@ -1547,7 +1597,6 @@ check_mgmt: * we need to check the mgmt queue for nops that need to * be sent to aviod starvation */ - spin_lock_bh(&conn->taskqueuelock); if (!list_empty(&conn->mgmtqueue)) goto check_mgmt; } @@ -1561,21 +1610,17 @@ check_mgmt: task = list_entry(conn->requeue.next, struct iscsi_task, running); + if (iscsi_check_tmf_restrictions(task, ISCSI_OP_SCSI_DATA_OUT)) break; - conn->task = task; - list_del_init(&conn->task->running); - conn->task->state = ISCSI_TASK_RUNNING; - spin_unlock_bh(&conn->taskqueuelock); - rc = iscsi_xmit_task(conn); + list_del_init(&task->running); + rc = iscsi_xmit_task(conn, task, true); if (rc) goto done; - spin_lock_bh(&conn->taskqueuelock); if (!list_empty(&conn->mgmtqueue)) goto check_mgmt; } - spin_unlock_bh(&conn->taskqueuelock); spin_unlock_bh(&conn->session->frwd_lock); return -ENODATA; @@ -1741,9 +1786,7 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc) goto prepd_reject; } } else { - spin_lock_bh(&conn->taskqueuelock); list_add_tail(&task->running, &conn->cmdqueue); - spin_unlock_bh(&conn->taskqueuelock); iscsi_conn_queue_work(conn); } @@ -2914,7 +2957,6 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size, INIT_LIST_HEAD(&conn->mgmtqueue); INIT_LIST_HEAD(&conn->cmdqueue); INIT_LIST_HEAD(&conn->requeue); - spin_lock_init(&conn->taskqueuelock); INIT_WORK(&conn->xmitwork, iscsi_xmitworker); /* allocate login_task used for the login/text sequences */ @@ -3080,10 +3122,16 @@ fail_mgmt_tasks(struct iscsi_session *session, struct iscsi_conn *conn) ISCSI_DBG_SESSION(conn->session, "failing mgmt itt 0x%x state %d\n", task->itt, task->state); + + spin_lock_bh(&session->back_lock); + if (cleanup_queued_task(task)) { + spin_unlock_bh(&session->back_lock); + continue; + } + state = ISCSI_TASK_ABRT_SESS_RECOV; if (task->state == ISCSI_TASK_PENDING) state = ISCSI_TASK_COMPLETED; - spin_lock_bh(&session->back_lock); iscsi_complete_task(task, state); spin_unlock_bh(&session->back_lock); } diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c index 83f14b2c8804..2e9ffe3d1a55 100644 --- a/drivers/scsi/libiscsi_tcp.c +++ b/drivers/scsi/libiscsi_tcp.c @@ -524,48 +524,79 @@ static int iscsi_tcp_data_in(struct iscsi_conn *conn, struct iscsi_task *task) /** * iscsi_tcp_r2t_rsp - iSCSI R2T Response processing * @conn: iscsi connection - * @task: scsi command task + * @hdr: PDU header */ -static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task) +static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr) { struct iscsi_session *session = conn->session; - struct iscsi_tcp_task *tcp_task = task->dd_data; - struct iscsi_tcp_conn *tcp_conn = conn->dd_data; - struct iscsi_r2t_rsp *rhdr = (struct iscsi_r2t_rsp *)tcp_conn->in.hdr; + struct iscsi_tcp_task *tcp_task; + struct iscsi_tcp_conn *tcp_conn; + struct iscsi_r2t_rsp *rhdr; struct iscsi_r2t_info *r2t; - int r2tsn = be32_to_cpu(rhdr->r2tsn); + struct iscsi_task *task; u32 data_length; u32 data_offset; + int r2tsn; int rc; + spin_lock(&session->back_lock); + task = iscsi_itt_to_ctask(conn, hdr->itt); + if (!task) { + spin_unlock(&session->back_lock); + return ISCSI_ERR_BAD_ITT; + } else if (task->sc->sc_data_direction != DMA_TO_DEVICE) { + spin_unlock(&session->back_lock); + return ISCSI_ERR_PROTO; + } + /* + * A bad target might complete the cmd before we have handled R2Ts + * so get a ref to the task that will be dropped in the xmit path. + */ + if (task->state != ISCSI_TASK_RUNNING) { + spin_unlock(&session->back_lock); + /* Let the path that got the early rsp complete it */ + return 0; + } + task->last_xfer = jiffies; + __iscsi_get_task(task); + + tcp_conn = conn->dd_data; + rhdr = (struct iscsi_r2t_rsp *)tcp_conn->in.hdr; + /* fill-in new R2T associated with the task */ + iscsi_update_cmdsn(session, (struct iscsi_nopin *)rhdr); + spin_unlock(&session->back_lock); + if (tcp_conn->in.datalen) { iscsi_conn_printk(KERN_ERR, conn, "invalid R2t with datalen %d\n", tcp_conn->in.datalen); - return ISCSI_ERR_DATALEN; + rc = ISCSI_ERR_DATALEN; + goto put_task; } + tcp_task = task->dd_data; + r2tsn = be32_to_cpu(rhdr->r2tsn); if (tcp_task->exp_datasn != r2tsn){ ISCSI_DBG_TCP(conn, "task->exp_datasn(%d) != rhdr->r2tsn(%d)\n", tcp_task->exp_datasn, r2tsn); - return ISCSI_ERR_R2TSN; + rc = ISCSI_ERR_R2TSN; + goto put_task; } - /* fill-in new R2T associated with the task */ - iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr); - - if (!task->sc || session->state != ISCSI_STATE_LOGGED_IN) { + if (session->state != ISCSI_STATE_LOGGED_IN) { iscsi_conn_printk(KERN_INFO, conn, "dropping R2T itt %d in recovery.\n", task->itt); - return 0; + rc = 0; + goto put_task; } data_length = be32_to_cpu(rhdr->data_length); if (data_length == 0) { iscsi_conn_printk(KERN_ERR, conn, "invalid R2T with zero data len\n"); - return ISCSI_ERR_DATALEN; + rc = ISCSI_ERR_DATALEN; + goto put_task; } if (data_length > session->max_burst) @@ -579,7 +610,8 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task) "invalid R2T with data len %u at offset %u " "and total length %d\n", data_length, data_offset, task->sc->sdb.length); - return ISCSI_ERR_DATALEN; + rc = ISCSI_ERR_DATALEN; + goto put_task; } spin_lock(&tcp_task->pool2queue); @@ -589,7 +621,8 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task) "Target has sent more R2Ts than it " "negotiated for or driver has leaked.\n"); spin_unlock(&tcp_task->pool2queue); - return ISCSI_ERR_PROTO; + rc = ISCSI_ERR_PROTO; + goto put_task; } r2t->exp_statsn = rhdr->statsn; @@ -607,6 +640,10 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task) iscsi_requeue_task(task); return 0; + +put_task: + iscsi_put_task(task); + return rc; } /* @@ -730,20 +767,11 @@ iscsi_tcp_hdr_dissect(struct iscsi_conn *conn, struct iscsi_hdr *hdr) rc = iscsi_complete_pdu(conn, hdr, NULL, 0); break; case ISCSI_OP_R2T: - spin_lock(&conn->session->back_lock); - task = iscsi_itt_to_ctask(conn, hdr->itt); - spin_unlock(&conn->session->back_lock); - if (!task) - rc = ISCSI_ERR_BAD_ITT; - else if (ahslen) + if (ahslen) { rc = ISCSI_ERR_AHSLEN; - else if (task->sc->sc_data_direction == DMA_TO_DEVICE) { - task->last_xfer = jiffies; - spin_lock(&conn->session->frwd_lock); - rc = iscsi_tcp_r2t_rsp(conn, task); - spin_unlock(&conn->session->frwd_lock); - } else - rc = ISCSI_ERR_PROTO; + break; + } + rc = iscsi_tcp_r2t_rsp(conn, hdr); break; case ISCSI_OP_LOGIN_RSP: case ISCSI_OP_TEXT_RSP: diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index b3bbd10eb3f0..44a9554aea62 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -187,7 +187,7 @@ struct iscsi_conn { struct iscsi_task *task; /* xmit task in progress */ /* xmit */ - spinlock_t taskqueuelock; /* protects the next three lists */ + /* items must be added/deleted under frwd lock */ struct list_head mgmtqueue; /* mgmt (control) xmit queue */ struct list_head cmdqueue; /* data-path cmd queue */ struct list_head requeue; /* tasks needing another run */ @@ -332,7 +332,7 @@ struct iscsi_session { * cmdsn, queued_cmdsn * * session resources: * * - cmdpool kfifo_out , * - * - mgmtpool, */ + * - mgmtpool, queues */ spinlock_t back_lock; /* protects cmdsn_exp * * cmdsn_max, * * cmdpool kfifo_in */ From 14936b1ed249916c28642d0db47a51b085ce13b4 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sat, 6 Feb 2021 22:46:02 -0600 Subject: [PATCH 13/30] scsi: libiscsi: Fix iscsi_task use after free() The following bug was reported and debugged by wubo40@huawei.com: When testing kernel 4.18 version, NULL pointer dereference problem occurs in iscsi_eh_cmd_timed_out() function. I think this bug in the upstream is still exists. The analysis reasons are as follows: 1) For some reason, I/O command did not complete within the timeout period. The block layer timer works, call scsi_times_out() to handle I/O timeout logic. At the same time the command just completes. 2) scsi_times_out() call iscsi_eh_cmd_timed_out() to process timeout logic. Although there is an NULL judgment for the task, the task has not been released yet now. 3) iscsi_complete_task() calls __iscsi_put_task(). The task reference count reaches zero, the conditions for free task is met, then iscsi_free_task() frees the task, and sets sc->SCp.ptr = NULL. After iscsi_eh_cmd_timed_out() passes the task judgment check, there can still be NULL dereference scenarios. CPU0 CPU3 |- scsi_times_out() |- iscsi_complete_task() | | |- iscsi_eh_cmd_timed_out() |- __iscsi_put_task() | | |- task=sc->SCp.ptr, task is not NUL, check passed |- iscsi_free_task(task) | | | |-> sc->SCp.ptr = NULL | | |- task is NULL now, NULL pointer dereference | | | \|/ \|/ Calltrace: [380751.840862] BUG: unable to handle kernel NULL pointer dereference at 0000000000000138 [380751.843709] PGD 0 P4D 0 [380751.844770] Oops: 0000 [#1] SMP PTI [380751.846283] CPU: 0 PID: 403 Comm: kworker/0:1H Kdump: loaded Tainted: G [380751.851467] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) [380751.856521] Workqueue: kblockd blk_mq_timeout_work [380751.858527] RIP: 0010:iscsi_eh_cmd_timed_out+0x15e/0x2e0 [libiscsi] [380751.861129] Code: 83 ea 01 48 8d 74 d0 08 48 8b 10 48 8b 4a 50 48 85 c9 74 2c 48 39 d5 74 [380751.868811] RSP: 0018:ffffc1e280a5fd58 EFLAGS: 00010246 [380751.870978] RAX: ffff9fd1e84e15e0 RBX: ffff9fd1e84e6dd0 RCX: 0000000116acc580 [380751.873791] RDX: ffff9fd1f97a9400 RSI: ffff9fd1e84e1800 RDI: ffff9fd1e4d6d420 [380751.876059] RBP: ffff9fd1e4d49000 R08: 0000000116acc580 R09: 0000000116acc580 [380751.878284] R10: 0000000000000000 R11: 0000000000000000 R12: ffff9fd1e6e931e8 [380751.880500] R13: ffff9fd1e84e6ee0 R14: 0000000000000010 R15: 0000000000000003 [380751.882687] FS: 0000000000000000(0000) GS:ffff9fd1fac00000(0000) knlGS:0000000000000000 [380751.885236] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [380751.887059] CR2: 0000000000000138 CR3: 000000011860a001 CR4: 00000000003606f0 [380751.889308] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [380751.891523] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [380751.893738] Call Trace: [380751.894639] scsi_times_out+0x60/0x1c0 [380751.895861] blk_mq_check_expired+0x144/0x200 [380751.897302] ? __switch_to_asm+0x35/0x70 [380751.898551] blk_mq_queue_tag_busy_iter+0x195/0x2e0 [380751.900091] ? __blk_mq_requeue_request+0x100/0x100 [380751.901611] ? __switch_to_asm+0x41/0x70 [380751.902853] ? __blk_mq_requeue_request+0x100/0x100 [380751.904398] blk_mq_timeout_work+0x54/0x130 [380751.905740] process_one_work+0x195/0x390 [380751.907228] worker_thread+0x30/0x390 [380751.908713] ? process_one_work+0x390/0x390 [380751.910350] kthread+0x10d/0x130 [380751.911470] ? kthread_flush_work_fn+0x10/0x10 [380751.913007] ret_from_fork+0x35/0x40 crash> dis -l iscsi_eh_cmd_timed_out+0x15e xxxxx/drivers/scsi/libiscsi.c: 2062 1970 enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) { ... 1984 spin_lock_bh(&session->frwd_lock); 1985 task = (struct iscsi_task *)sc->SCp.ptr; 1986 if (!task) { 1987 /* 1988 * Raced with completion. Blk layer has taken ownership 1989 * so let timeout code complete it now. 1990 */ 1991 rc = BLK_EH_DONE; 1992 goto done; 1993 } ... 2052 for (i = 0; i < conn->session->cmds_max; i++) { 2053 running_task = conn->session->cmds[i]; 2054 if (!running_task->sc || running_task == task || 2055 running_task->state != ISCSI_TASK_RUNNING) 2056 continue; 2057 2058 /* 2059 * Only check if cmds started before this one have made 2060 * progress, or this could never fail 2061 */ 2062 if (time_after(running_task->sc->jiffies_at_alloc, 2063 task->sc->jiffies_at_alloc)) <--- 2064 continue; 2065 ... } carsh> struct scsi_cmnd ffff9fd1e6e931e8 struct scsi_cmnd { ... SCp = { ptr = 0x0, <--- iscsi_task this_residual = 0, ... }, } To prevent this, we take a ref to the cmd under the back (completion) lock so if the completion side were to call iscsi_complete_task() on the task while the timer/eh paths are not holding the back_lock it will not be freed from under us. Note that this requires the previous patch, "scsi: libiscsi: Drop taskqueuelock" because bnx2i sleeps in its cleanup_task callout if the cmd is aborted. If the EH/timer and completion path are racing we don't know which path will do the last put. The previous patch moved the operations we needed to do under the forward lock to cleanup_queued_task. Once that has run we can drop the forward lock for the cmd and bnx2i no longer has to worry about if the EH, timer or completion path did the ast put and if the forward lock is held or not since it won't be. Link: https://lore.kernel.org/r/20210207044608.27585-4-michael.christie@oracle.com Reported-by: Wu Bo Reviewed-by: Lee Duncan Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2i/bnx2i_iscsi.c | 2 - drivers/scsi/libiscsi.c | 71 ++++++++++++++++++++------------ 2 files changed, 45 insertions(+), 28 deletions(-) diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c index fdd446765311..1e6d8f62ea3c 100644 --- a/drivers/scsi/bnx2i/bnx2i_iscsi.c +++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c @@ -1171,10 +1171,8 @@ static void bnx2i_cleanup_task(struct iscsi_task *task) bnx2i_send_cmd_cleanup_req(hba, task->dd_data); spin_unlock_bh(&conn->session->back_lock); - spin_unlock_bh(&conn->session->frwd_lock); wait_for_completion_timeout(&bnx2i_conn->cmd_cleanup_cmpl, msecs_to_jiffies(ISCSI_CMD_CLEANUP_TIMEOUT)); - spin_lock_bh(&conn->session->frwd_lock); spin_lock_bh(&conn->session->back_lock); } bnx2i_iscsi_unmap_sg_list(task->dd_data); diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 3d74fdd9f31a..ec159bcb7460 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -587,9 +587,8 @@ static bool cleanup_queued_task(struct iscsi_task *task) } /* - * session frwd_lock must be held and if not called for a task that is - * still pending or from the xmit thread, then xmit thread must - * be suspended. + * session frwd lock must be held and if not called for a task that is still + * pending or from the xmit thread, then xmit thread must be suspended */ static void fail_scsi_task(struct iscsi_task *task, int err) { @@ -597,16 +596,6 @@ static void fail_scsi_task(struct iscsi_task *task, int err) struct scsi_cmnd *sc; int state; - /* - * if a command completes and we get a successful tmf response - * we will hit this because the scsi eh abort code does not take - * a ref to the task. - */ - sc = task->sc; - if (!sc) - return; - - /* regular RX path uses back_lock */ spin_lock_bh(&conn->session->back_lock); if (cleanup_queued_task(task)) { spin_unlock_bh(&conn->session->back_lock); @@ -626,6 +615,7 @@ static void fail_scsi_task(struct iscsi_task *task, int err) else state = ISCSI_TASK_ABRT_TMF; + sc = task->sc; sc->result = err << 16; scsi_set_resid(sc, scsi_bufflen(sc)); iscsi_complete_task(task, state); @@ -1893,27 +1883,39 @@ static int iscsi_exec_task_mgmt_fn(struct iscsi_conn *conn, } /* - * Fail commands. session lock held and recv side suspended and xmit - * thread flushed + * Fail commands. session frwd lock held and xmit thread flushed. */ static void fail_scsi_tasks(struct iscsi_conn *conn, u64 lun, int error) { + struct iscsi_session *session = conn->session; struct iscsi_task *task; int i; - for (i = 0; i < conn->session->cmds_max; i++) { - task = conn->session->cmds[i]; + spin_lock_bh(&session->back_lock); + for (i = 0; i < session->cmds_max; i++) { + task = session->cmds[i]; if (!task->sc || task->state == ISCSI_TASK_FREE) continue; if (lun != -1 && lun != task->sc->device->lun) continue; - ISCSI_DBG_SESSION(conn->session, + __iscsi_get_task(task); + spin_unlock_bh(&session->back_lock); + + ISCSI_DBG_SESSION(session, "failing sc %p itt 0x%x state %d\n", task->sc, task->itt, task->state); fail_scsi_task(task, error); + + spin_unlock_bh(&session->frwd_lock); + iscsi_put_task(task); + spin_lock_bh(&session->frwd_lock); + + spin_lock_bh(&session->back_lock); } + + spin_unlock_bh(&session->back_lock); } /** @@ -1991,6 +1993,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) ISCSI_DBG_EH(session, "scsi cmd %p timedout\n", sc); spin_lock_bh(&session->frwd_lock); + spin_lock(&session->back_lock); task = (struct iscsi_task *)sc->SCp.ptr; if (!task) { /* @@ -1998,8 +2001,11 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) * so let timeout code complete it now. */ rc = BLK_EH_DONE; + spin_unlock(&session->back_lock); goto done; } + __iscsi_get_task(task); + spin_unlock(&session->back_lock); if (session->state != ISCSI_STATE_LOGGED_IN) { /* @@ -2058,6 +2064,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) goto done; } + spin_lock(&session->back_lock); for (i = 0; i < conn->session->cmds_max; i++) { running_task = conn->session->cmds[i]; if (!running_task->sc || running_task == task || @@ -2090,10 +2097,12 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) "last xfer %lu/%lu. Last check %lu.\n", task->last_xfer, running_task->last_xfer, task->last_timeout); + spin_unlock(&session->back_lock); rc = BLK_EH_RESET_TIMER; goto done; } } + spin_unlock(&session->back_lock); /* Assumes nop timeout is shorter than scsi cmd timeout */ if (task->have_checked_conn) @@ -2115,9 +2124,12 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) rc = BLK_EH_RESET_TIMER; done: - if (task) - task->last_timeout = jiffies; spin_unlock_bh(&session->frwd_lock); + + if (task) { + task->last_timeout = jiffies; + iscsi_put_task(task); + } ISCSI_DBG_EH(session, "return %s\n", rc == BLK_EH_RESET_TIMER ? "timer reset" : "shutdown or nh"); return rc; @@ -2225,15 +2237,20 @@ int iscsi_eh_abort(struct scsi_cmnd *sc) conn->eh_abort_cnt++; age = session->age; + spin_lock(&session->back_lock); task = (struct iscsi_task *)sc->SCp.ptr; - ISCSI_DBG_EH(session, "aborting [sc %p itt 0x%x]\n", - sc, task->itt); - - /* task completed before time out */ - if (!task->sc) { + if (!task || !task->sc) { + /* task completed before time out */ ISCSI_DBG_EH(session, "sc completed while abort in progress\n"); - goto success; + + spin_unlock(&session->back_lock); + spin_unlock_bh(&session->frwd_lock); + mutex_unlock(&session->eh_mutex); + return SUCCESS; } + ISCSI_DBG_EH(session, "aborting [sc %p itt 0x%x]\n", sc, task->itt); + __iscsi_get_task(task); + spin_unlock(&session->back_lock); if (task->state == ISCSI_TASK_PENDING) { fail_scsi_task(task, DID_ABORT); @@ -2295,6 +2312,7 @@ success: success_unlocked: ISCSI_DBG_EH(session, "abort success [sc %p itt 0x%x]\n", sc, task->itt); + iscsi_put_task(task); mutex_unlock(&session->eh_mutex); return SUCCESS; @@ -2303,6 +2321,7 @@ failed: failed_unlocked: ISCSI_DBG_EH(session, "abort failed [sc %p itt 0x%x]\n", sc, task ? task->itt : 0); + iscsi_put_task(task); mutex_unlock(&session->eh_mutex); return FAILED; } From c435f0a9ecb7435e70f447b7231ca52de589b252 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sat, 6 Feb 2021 22:46:03 -0600 Subject: [PATCH 14/30] scsi: libiscsi: Fix iSCSI host workq destruction We allocate the iSCSI host workq in iscsi_host_alloc() so iscsi_host_free() should do the destruction. Drivers can then do their error/goto handling and call iscsi_host_free() to clean up what has been allocated in iscsi_host_alloc(). Link: https://lore.kernel.org/r/20210207044608.27585-5-michael.christie@oracle.com Reviewed-by: Lee Duncan Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/libiscsi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index ec159bcb7460..b271d3accd2a 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -2738,8 +2738,6 @@ void iscsi_host_remove(struct Scsi_Host *shost) flush_signals(current); scsi_remove_host(shost); - if (ihost->workq) - destroy_workqueue(ihost->workq); } EXPORT_SYMBOL_GPL(iscsi_host_remove); @@ -2747,6 +2745,9 @@ void iscsi_host_free(struct Scsi_Host *shost) { struct iscsi_host *ihost = shost_priv(shost); + if (ihost->workq) + destroy_workqueue(ihost->workq); + kfree(ihost->netdev); kfree(ihost->hwaddress); kfree(ihost->initiatorname); From b4046922b3c0740ad50a6e9c59e12f4dc43946d4 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sat, 6 Feb 2021 22:46:04 -0600 Subject: [PATCH 15/30] scsi: libiscsi: Add helper to calculate max SCSI cmds per session This patch just breaks out the code that calculates the number of SCSI cmds that will be used for a SCSI session. It also adds a check that we don't go over the host's can_queue value. Link: https://lore.kernel.org/r/20210207044608.27585-6-michael.christie@oracle.com Reviewed-by: Lee Duncan Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/libiscsi.c | 86 ++++++++++++++++++++++++++--------------- include/scsi/libiscsi.h | 2 + 2 files changed, 56 insertions(+), 32 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index b271d3accd2a..f64e2077754c 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -2648,6 +2648,56 @@ void iscsi_pool_free(struct iscsi_pool *q) } EXPORT_SYMBOL_GPL(iscsi_pool_free); +int iscsi_host_get_max_scsi_cmds(struct Scsi_Host *shost, + uint16_t requested_cmds_max) +{ + int scsi_cmds, total_cmds = requested_cmds_max; + +check: + if (!total_cmds) + total_cmds = ISCSI_DEF_XMIT_CMDS_MAX; + /* + * The iscsi layer needs some tasks for nop handling and tmfs, + * so the cmds_max must at least be greater than ISCSI_MGMT_CMDS_MAX + * + 1 command for scsi IO. + */ + if (total_cmds < ISCSI_TOTAL_CMDS_MIN) { + printk(KERN_ERR "iscsi: invalid max cmds of %d. Must be a power of two that is at least %d.\n", + total_cmds, ISCSI_TOTAL_CMDS_MIN); + return -EINVAL; + } + + if (total_cmds > ISCSI_TOTAL_CMDS_MAX) { + printk(KERN_INFO "iscsi: invalid max cmds of %d. Must be a power of 2 less than or equal to %d. Using %d.\n", + requested_cmds_max, ISCSI_TOTAL_CMDS_MAX, + ISCSI_TOTAL_CMDS_MAX); + total_cmds = ISCSI_TOTAL_CMDS_MAX; + } + + if (!is_power_of_2(total_cmds)) { + total_cmds = rounddown_pow_of_two(total_cmds); + if (total_cmds < ISCSI_TOTAL_CMDS_MIN) { + printk(KERN_ERR "iscsi: invalid max cmds of %d. Must be a power of 2 greater than %d.\n", requested_cmds_max, ISCSI_TOTAL_CMDS_MIN); + return -EINVAL; + } + + printk(KERN_INFO "iscsi: invalid max cmds %d. Must be a power of 2. Rounding max cmds down to %d.\n", + requested_cmds_max, total_cmds); + } + + scsi_cmds = total_cmds - ISCSI_MGMT_CMDS_MAX; + if (shost->can_queue && scsi_cmds > shost->can_queue) { + total_cmds = shost->can_queue; + + printk(KERN_INFO "iscsi: requested max cmds %u is higher than driver limit. Using driver limit %u\n", + requested_cmds_max, shost->can_queue); + goto check; + } + + return scsi_cmds; +} +EXPORT_SYMBOL_GPL(iscsi_host_get_max_scsi_cmds); + /** * iscsi_host_add - add host to system * @shost: scsi host @@ -2801,7 +2851,7 @@ iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost, struct iscsi_host *ihost = shost_priv(shost); struct iscsi_session *session; struct iscsi_cls_session *cls_session; - int cmd_i, scsi_cmds, total_cmds = cmds_max; + int cmd_i, scsi_cmds; unsigned long flags; spin_lock_irqsave(&ihost->lock, flags); @@ -2812,37 +2862,9 @@ iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost, ihost->num_sessions++; spin_unlock_irqrestore(&ihost->lock, flags); - if (!total_cmds) - total_cmds = ISCSI_DEF_XMIT_CMDS_MAX; - /* - * The iscsi layer needs some tasks for nop handling and tmfs, - * so the cmds_max must at least be greater than ISCSI_MGMT_CMDS_MAX - * + 1 command for scsi IO. - */ - if (total_cmds < ISCSI_TOTAL_CMDS_MIN) { - printk(KERN_ERR "iscsi: invalid can_queue of %d. can_queue " - "must be a power of two that is at least %d.\n", - total_cmds, ISCSI_TOTAL_CMDS_MIN); + scsi_cmds = iscsi_host_get_max_scsi_cmds(shost, cmds_max); + if (scsi_cmds < 0) goto dec_session_count; - } - - if (total_cmds > ISCSI_TOTAL_CMDS_MAX) { - printk(KERN_ERR "iscsi: invalid can_queue of %d. can_queue " - "must be a power of 2 less than or equal to %d.\n", - cmds_max, ISCSI_TOTAL_CMDS_MAX); - total_cmds = ISCSI_TOTAL_CMDS_MAX; - } - - if (!is_power_of_2(total_cmds)) { - printk(KERN_ERR "iscsi: invalid can_queue of %d. can_queue " - "must be a power of 2.\n", total_cmds); - total_cmds = rounddown_pow_of_two(total_cmds); - if (total_cmds < ISCSI_TOTAL_CMDS_MIN) - goto dec_session_count; - printk(KERN_INFO "iscsi: Rounding can_queue to %d.\n", - total_cmds); - } - scsi_cmds = total_cmds - ISCSI_MGMT_CMDS_MAX; cls_session = iscsi_alloc_session(shost, iscsit, sizeof(struct iscsi_session) + @@ -2858,7 +2880,7 @@ iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost, session->lu_reset_timeout = 15; session->abort_timeout = 10; session->scsi_cmds_max = scsi_cmds; - session->cmds_max = total_cmds; + session->cmds_max = scsi_cmds + ISCSI_MGMT_CMDS_MAX; session->queued_cmdsn = session->cmdsn = initial_cmdsn; session->exp_cmdsn = initial_cmdsn + 1; session->max_cmdsn = initial_cmdsn + 1; diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index 44a9554aea62..02f966e9358f 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -395,6 +395,8 @@ extern struct Scsi_Host *iscsi_host_alloc(struct scsi_host_template *sht, extern void iscsi_host_remove(struct Scsi_Host *shost); extern void iscsi_host_free(struct Scsi_Host *shost); extern int iscsi_target_alloc(struct scsi_target *starget); +extern int iscsi_host_get_max_scsi_cmds(struct Scsi_Host *shost, + uint16_t requested_cmds_max); /* * session management From 25c400db2083732a5fbdd72f0d3a0337119b2fa5 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sat, 6 Feb 2021 22:46:05 -0600 Subject: [PATCH 16/30] scsi: iscsi_tcp: Fix shost can_queue initialization We are setting the shost's can_queue after we add the host which is too late, because the SCSI midlayer will have allocated the tag set based on the can_queue value at that time. This patch has us use the iscsi_host_get_max_scsi_cmds() helper to figure out the number of SCSI cmds. It also fixes up the template can_queue so it reflects the max SCSI cmds we can support like how other drivers work. Link: https://lore.kernel.org/r/20210207044608.27585-7-michael.christie@oracle.com Reviewed-by: Lee Duncan Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/iscsi_tcp.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index a9ce6298b935..dd33ce0e3737 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -847,6 +847,7 @@ iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max, struct iscsi_session *session; struct iscsi_sw_tcp_host *tcp_sw_host; struct Scsi_Host *shost; + int rc; if (ep) { printk(KERN_ERR "iscsi_tcp: invalid ep %p.\n", ep); @@ -864,6 +865,11 @@ iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max, shost->max_channel = 0; shost->max_cmd_len = SCSI_MAX_VARLEN_CDB_SIZE; + rc = iscsi_host_get_max_scsi_cmds(shost, cmds_max); + if (rc < 0) + goto free_host; + shost->can_queue = rc; + if (iscsi_host_add(shost, NULL)) goto free_host; @@ -878,7 +884,6 @@ iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max, tcp_sw_host = iscsi_host_priv(shost); tcp_sw_host->session = session; - shost->can_queue = session->scsi_cmds_max; if (iscsi_tcp_r2tpool_alloc(session)) goto remove_session; return cls_session; @@ -981,7 +986,7 @@ static struct scsi_host_template iscsi_sw_tcp_sht = { .name = "iSCSI Initiator over TCP/IP", .queuecommand = iscsi_queuecommand, .change_queue_depth = scsi_change_queue_depth, - .can_queue = ISCSI_DEF_XMIT_CMDS_MAX - 1, + .can_queue = ISCSI_TOTAL_CMDS_MAX, .sg_tablesize = 4096, .max_sectors = 0xFFFF, .cmd_per_lun = ISCSI_DEF_CMD_PER_LUN, From c8447e4c2eb77dbb96012ae96e7c83179cecf880 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sat, 6 Feb 2021 22:46:06 -0600 Subject: [PATCH 17/30] scsi: libiscsi: Reset max/exp cmdsn during recovery If we lose the session then relogin, but the new cmdsn window has shrunk (due to something like an admin changing a setting) we will have the old exp/max_cmdsn values and will never be able to update them. For example, max_cmdsn would be 64, but if on the target the user set the window to be smaller then the target could try to return the max_cmdsn as 32. We will see that new max_cmdsn in the rsp but because it's lower than the old max_cmdsn when the window was larger we will not update it. So this patch has us reset the window values during session cleanup so they can be updated after a new login. Link: https://lore.kernel.org/r/20210207044608.27585-8-michael.christie@oracle.com Reviewed-by: Lee Duncan Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/libiscsi.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index f64e2077754c..7ad11e42306d 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -3273,6 +3273,13 @@ int iscsi_conn_bind(struct iscsi_cls_session *cls_session, session->leadconn = conn; spin_unlock_bh(&session->frwd_lock); + /* + * The target could have reduced it's window size between logins, so + * we have to reset max/exp cmdsn so we can see the new values. + */ + spin_lock_bh(&session->back_lock); + session->max_cmdsn = session->exp_cmdsn = session->cmdsn + 1; + spin_unlock_bh(&session->back_lock); /* * Unblock xmitworker(), Login Phase will pass through. */ From 5b0ec4cf049446e676276cd3037b9c6bf53b8f94 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sat, 6 Feb 2021 22:46:07 -0600 Subject: [PATCH 18/30] scsi: qla4xxx: Use iscsi_is_session_online() __qla4xxx_is_chap_active() just wants to know if a session is online and does not care about why it's not, so this has it use iscsi_is_session_online(). This is not a bug now, but the next patch changes the behavior of iscsi_session_chkready() so this patch just prepares the driver for that change. Link: https://lore.kernel.org/r/20210207044608.27585-9-michael.christie@oracle.com Reviewed-by: Lee Duncan Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/qla4xxx/ql4_os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index a4b014e1cd8c..7bd9a4a04ad5 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -841,7 +841,7 @@ static int __qla4xxx_is_chap_active(struct device *dev, void *data) sess = cls_session->dd_data; ddb_entry = sess->dd_data; - if (iscsi_session_chkready(cls_session)) + if (iscsi_is_session_online(cls_session)) goto exit_is_chap_active; if (ddb_entry->chap_tbl_idx == *chap_tbl_idx) From d39bfd0686fd2b21f857c61bb2753db3a932cb24 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sat, 6 Feb 2021 22:46:08 -0600 Subject: [PATCH 19/30] scsi: iscsi: Drop session lock in iscsi_session_chkready() The session lock in iscsi_session_chkready() is not needed because when we transition from logged into to another state we will block and/or remove the devices under the session, so no new I/O will be sent to the drivers after the block/remove. I/O that races with the block/removal is cleaned up by the drivers when it handles all outstanding I/O, so this just added an extra lock in the main I/O path. This patch removes the lock like other transport classes. Link: https://lore.kernel.org/r/20210207044608.27585-10-michael.christie@oracle.com Reviewed-by: Lee Duncan Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 2e68c0a87698..969d24d580e2 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -1701,10 +1701,8 @@ static const char *iscsi_session_state_name(int state) int iscsi_session_chkready(struct iscsi_cls_session *session) { - unsigned long flags; int err; - spin_lock_irqsave(&session->lock, flags); switch (session->state) { case ISCSI_SESSION_LOGGED_IN: err = 0; @@ -1719,7 +1717,6 @@ int iscsi_session_chkready(struct iscsi_cls_session *session) err = DID_NO_CONNECT << 16; break; } - spin_unlock_irqrestore(&session->lock, flags); return err; } EXPORT_SYMBOL_GPL(iscsi_session_chkready); From 1c73e0c5e54d5f7d77f422a10b03ebe61eaed5ad Mon Sep 17 00:00:00 2001 From: Aleksandr Miloserdov Date: Tue, 9 Feb 2021 10:22:01 +0300 Subject: [PATCH 20/30] scsi: target: core: Add cmd length set before cmd complete TCM doesn't properly handle underflow case for service actions. One way to prevent it is to always complete command with target_complete_cmd_with_length(), however it requires access to data_sg, which is not always available. This change introduces target_set_cmd_data_length() function which allows to set command data length before completing it. Link: https://lore.kernel.org/r/20210209072202.41154-2-a.miloserdov@yadro.com Reviewed-by: Roman Bolshakov Reviewed-by: Bodo Stroesser Signed-off-by: Aleksandr Miloserdov Signed-off-by: Martin K. Petersen --- drivers/target/target_core_transport.c | 15 +++++++++++---- include/target/target_core_backend.h | 1 + 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 93ea17cbad79..5ecb9f18a53d 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -879,11 +879,9 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status) } EXPORT_SYMBOL(target_complete_cmd); -void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int length) +void target_set_cmd_data_length(struct se_cmd *cmd, int length) { - if ((scsi_status == SAM_STAT_GOOD || - cmd->se_cmd_flags & SCF_TREAT_READ_AS_NORMAL) && - length < cmd->data_length) { + if (length < cmd->data_length) { if (cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) { cmd->residual_count += cmd->data_length - length; } else { @@ -893,6 +891,15 @@ void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int len cmd->data_length = length; } +} +EXPORT_SYMBOL(target_set_cmd_data_length); + +void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int length) +{ + if (scsi_status == SAM_STAT_GOOD || + cmd->se_cmd_flags & SCF_TREAT_READ_AS_NORMAL) { + target_set_cmd_data_length(cmd, length); + } target_complete_cmd(cmd, scsi_status); } diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 6336780d83a7..ce2fba49c95d 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -72,6 +72,7 @@ int transport_backend_register(const struct target_backend_ops *); void target_backend_unregister(const struct target_backend_ops *); void target_complete_cmd(struct se_cmd *, u8); +void target_set_cmd_data_length(struct se_cmd *, int); void target_complete_cmd_with_length(struct se_cmd *, u8, int); void transport_copy_sense_to_cmd(struct se_cmd *, unsigned char *); From 14d24e2cc77411301e906a8cf41884739de192de Mon Sep 17 00:00:00 2001 From: Aleksandr Miloserdov Date: Tue, 9 Feb 2021 10:22:02 +0300 Subject: [PATCH 21/30] scsi: target: core: Prevent underflow for service actions TCM buffer length doesn't necessarily equal 8 + ADDITIONAL LENGTH which might be considered an underflow in case of Data-In size being greater than 8 + ADDITIONAL LENGTH. So truncate buffer length to prevent underflow. Link: https://lore.kernel.org/r/20210209072202.41154-3-a.miloserdov@yadro.com Reviewed-by: Roman Bolshakov Reviewed-by: Bodo Stroesser Signed-off-by: Aleksandr Miloserdov Signed-off-by: Martin K. Petersen --- drivers/target/target_core_pr.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 14db5e568f22..d4cc43afe05b 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -3739,6 +3739,7 @@ core_scsi3_pri_read_keys(struct se_cmd *cmd) spin_unlock(&dev->t10_pr.registration_lock); put_unaligned_be32(add_len, &buf[4]); + target_set_cmd_data_length(cmd, 8 + add_len); transport_kunmap_data_sg(cmd); @@ -3757,7 +3758,7 @@ core_scsi3_pri_read_reservation(struct se_cmd *cmd) struct t10_pr_registration *pr_reg; unsigned char *buf; u64 pr_res_key; - u32 add_len = 16; /* Hardcoded to 16 when a reservation is held. */ + u32 add_len = 0; if (cmd->data_length < 8) { pr_err("PRIN SA READ_RESERVATIONS SCSI Data Length: %u" @@ -3775,8 +3776,9 @@ core_scsi3_pri_read_reservation(struct se_cmd *cmd) pr_reg = dev->dev_pr_res_holder; if (pr_reg) { /* - * Set the hardcoded Additional Length + * Set the Additional Length to 16 when a reservation is held */ + add_len = 16; put_unaligned_be32(add_len, &buf[4]); if (cmd->data_length < 22) @@ -3812,6 +3814,8 @@ core_scsi3_pri_read_reservation(struct se_cmd *cmd) (pr_reg->pr_res_type & 0x0f); } + target_set_cmd_data_length(cmd, 8 + add_len); + err: spin_unlock(&dev->dev_reservation_lock); transport_kunmap_data_sg(cmd); @@ -3830,7 +3834,7 @@ core_scsi3_pri_report_capabilities(struct se_cmd *cmd) struct se_device *dev = cmd->se_dev; struct t10_reservation *pr_tmpl = &dev->t10_pr; unsigned char *buf; - u16 add_len = 8; /* Hardcoded to 8. */ + u16 len = 8; /* Hardcoded to 8. */ if (cmd->data_length < 6) { pr_err("PRIN SA REPORT_CAPABILITIES SCSI Data Length:" @@ -3842,7 +3846,7 @@ core_scsi3_pri_report_capabilities(struct se_cmd *cmd) if (!buf) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - put_unaligned_be16(add_len, &buf[0]); + put_unaligned_be16(len, &buf[0]); buf[2] |= 0x10; /* CRH: Compatible Reservation Hanlding bit. */ buf[2] |= 0x08; /* SIP_C: Specify Initiator Ports Capable bit */ buf[2] |= 0x04; /* ATP_C: All Target Ports Capable bit */ @@ -3871,6 +3875,8 @@ core_scsi3_pri_report_capabilities(struct se_cmd *cmd) buf[4] |= 0x02; /* PR_TYPE_WRITE_EXCLUSIVE */ buf[5] |= 0x01; /* PR_TYPE_EXCLUSIVE_ACCESS_ALLREG */ + target_set_cmd_data_length(cmd, len); + transport_kunmap_data_sg(cmd); return 0; @@ -4031,6 +4037,7 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd) * Set ADDITIONAL_LENGTH */ put_unaligned_be32(add_len, &buf[4]); + target_set_cmd_data_length(cmd, 8 + add_len); transport_kunmap_data_sg(cmd); From 1f9f22acbb5dc4a787852f4ef04eb99edf42bce0 Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Tue, 9 Feb 2021 20:01:46 +0530 Subject: [PATCH 22/30] scsi: aic79xx: Fix spelling of version s/verson/version/ Link: https://lore.kernel.org/r/20210209143146.3987352-1-unixbhaskar@gmail.com Acked-by: Randy Dunlap Signed-off-by: Bhaskar Chowdhury Signed-off-by: Martin K. Petersen --- drivers/scsi/aic7xxx/aic79xx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/aic7xxx/aic79xx.h b/drivers/scsi/aic7xxx/aic79xx.h index 77e1d6bb59a3..59e9321815c8 100644 --- a/drivers/scsi/aic7xxx/aic79xx.h +++ b/drivers/scsi/aic7xxx/aic79xx.h @@ -1175,7 +1175,7 @@ struct ahd_softc { uint8_t tqinfifonext; /* - * Cached verson of the hs_mailbox so we can avoid + * Cached version of the hs_mailbox so we can avoid * pausing the sequencer during mailbox updates. */ uint8_t hs_mailbox; From 9599a1cf23330008d90b7c232efe95de7510ff29 Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Thu, 11 Feb 2021 12:46:38 +0200 Subject: [PATCH 23/30] scsi: ufs: Fix a duplicate dev quirk number Fixes: 2b2bfc8aa519 ("scsi: ufs: Introduce a quirk to allow only page-aligned sg entries") Link: https://lore.kernel.org/r/20210211104638.292499-1-avri.altman@wdc.com Reviewed-by: Bean Huo Signed-off-by: Avri Altman Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index ee61f821f75d..18e56c1c1b30 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -570,7 +570,7 @@ enum ufshcd_quirks { /* * This quirk allows only sg entries aligned with page size. */ - UFSHCD_QUIRK_ALIGN_SG_WITH_PAGE_SIZE = 1 << 13, + UFSHCD_QUIRK_ALIGN_SG_WITH_PAGE_SIZE = 1 << 14, }; enum ufshcd_caps { From eefb816acb0162e94a85a857f3a55148f671d5a5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 13 Feb 2021 11:24:28 -0800 Subject: [PATCH 24/30] scsi: bnx2fc: Fix Kconfig warning & CNIC build errors CNIC depends on MMU, but since 'select' does not follow any dependency chains, SCSI_BNX2X_FCOE also needs to depend on MMU, so that erroneous configs are not generated, which cause build errors in cnic. WARNING: unmet direct dependencies detected for CNIC Depends on [n]: NETDEVICES [=y] && ETHERNET [=y] && NET_VENDOR_BROADCOM [=y] && PCI [=y] && (IPV6 [=n] || IPV6 [=n]=n) && MMU [=n] Selected by [y]: - SCSI_BNX2X_FCOE [=y] && SCSI_LOWLEVEL [=y] && SCSI [=y] && PCI [=y] && (IPV6 [=n] || IPV6 [=n]=n) && LIBFC [=y] && LIBFCOE [=y] riscv64-linux-ld: drivers/net/ethernet/broadcom/cnic.o: in function `.L154': cnic.c:(.text+0x1094): undefined reference to `uio_event_notify' riscv64-linux-ld: cnic.c:(.text+0x10bc): undefined reference to `uio_event_notify' riscv64-linux-ld: drivers/net/ethernet/broadcom/cnic.o: in function `.L1442': cnic.c:(.text+0x96a8): undefined reference to `__uio_register_device' riscv64-linux-ld: drivers/net/ethernet/broadcom/cnic.o: in function `.L0 ': cnic.c:(.text.unlikely+0x68): undefined reference to `uio_unregister_device' Link: https://lore.kernel.org/r/20210213192428.22537-1-rdunlap@infradead.org Fixes: 853e2bd2103a ("[SCSI] bnx2fc: Broadcom FCoE offload driver") Cc: Saurav Kashyap Cc: Javed Hasan Cc: GR-QLogic-Storage-Upstream@marvell.com Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Cc: linux-scsi@vger.kernel.org Reported-by: kernel test robot Signed-off-by: Randy Dunlap Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2fc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/bnx2fc/Kconfig b/drivers/scsi/bnx2fc/Kconfig index 3cf7e08df809..ecdc0f0f4f4e 100644 --- a/drivers/scsi/bnx2fc/Kconfig +++ b/drivers/scsi/bnx2fc/Kconfig @@ -5,6 +5,7 @@ config SCSI_BNX2X_FCOE depends on (IPV6 || IPV6=n) depends on LIBFC depends on LIBFCOE + depends on MMU select NETDEVICES select ETHERNET select NET_VENDOR_BROADCOM From c2f23a96c6e25a3b8aa2e873519b513745bba27c Mon Sep 17 00:00:00 2001 From: Chen Lin Date: Mon, 15 Feb 2021 19:40:49 +0800 Subject: [PATCH 25/30] scsi: aic7xxx: Remove unused function pointer typedef ahc_bus_suspend/resume_t Remove the 'ahc_bus_suspend/resume_t' typedef as it is not used. Link: https://lore.kernel.org/r/1613389249-3409-1-git-send-email-chen45464546@163.com Signed-off-by: Chen Lin Signed-off-by: Martin K. Petersen --- drivers/scsi/aic7xxx/aic7xxx.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/aic7xxx/aic7xxx.h b/drivers/scsi/aic7xxx/aic7xxx.h index 11a09798e6b5..9bc755a0a2d3 100644 --- a/drivers/scsi/aic7xxx/aic7xxx.h +++ b/drivers/scsi/aic7xxx/aic7xxx.h @@ -896,8 +896,6 @@ union ahc_bus_softc { typedef void (*ahc_bus_intr_t)(struct ahc_softc *); typedef int (*ahc_bus_chip_init_t)(struct ahc_softc *); -typedef int (*ahc_bus_suspend_t)(struct ahc_softc *); -typedef int (*ahc_bus_resume_t)(struct ahc_softc *); typedef void ahc_callback_t (void *); struct ahc_softc { From 9acced3f58ad24407c1f9ebf53a8892c1e24cdb5 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 17 Feb 2021 22:52:45 +0900 Subject: [PATCH 26/30] scsi: sd: sd_zbc: Don't pass GFP_NOIO to kvcalloc Dan reported we're passing in GFP_NOIO to kvmalloc() which will then fallback to doing kmalloc() instead of an optional vmalloc() if the size exceeds kmalloc()s limits. This will break with drives that have zone numbers exceeding PAGE_SIZE/sizeof(u32). Instead of passing in GFP_NOIO, enter an implicit GFP_NOIO allocation scope. Link: https://lore.kernel.org/r/YCuvSfKw4qEQBr/t@mwanda Link: https://lore.kernel.org/r/5a6345e2989fd06c049ac4e4627f6acb492c15b8.1613569821.git.johannes.thumshirn@wdc.com Fixes: 5795eb443060: ("scsi: sd_zbc: emulate ZONE_APPEND commands") Cc: Damien Le Moal Reported-by: Dan Carpenter Reviewed-by: Damien Le Moal Signed-off-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/sd_zbc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index cf07b7f93579..87a7274e4632 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -688,6 +688,7 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) unsigned int nr_zones = sdkp->rev_nr_zones; u32 max_append; int ret = 0; + unsigned int flags; /* * For all zoned disks, initialize zone append emulation data if not @@ -720,16 +721,19 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) disk->queue->nr_zones == nr_zones) goto unlock; + flags = memalloc_noio_save(); sdkp->zone_blocks = zone_blocks; sdkp->nr_zones = nr_zones; - sdkp->rev_wp_offset = kvcalloc(nr_zones, sizeof(u32), GFP_NOIO); + sdkp->rev_wp_offset = kvcalloc(nr_zones, sizeof(u32), GFP_KERNEL); if (!sdkp->rev_wp_offset) { ret = -ENOMEM; + memalloc_noio_restore(flags); goto unlock; } ret = blk_revalidate_disk_zones(disk, sd_zbc_revalidate_zones_cb); + memalloc_noio_restore(flags); kvfree(sdkp->rev_wp_offset); sdkp->rev_wp_offset = NULL; From 43bf922cdd62d430e4ca3a20e6940c4a6fc2bc99 Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Thu, 18 Feb 2021 18:50:38 +0100 Subject: [PATCH 27/30] scsi: target: tcmu: Move some functions without code change This patch just moves one block of code containing some functions inside target_core_user.c to avoid adding prototypes in next patch. Link: https://lore.kernel.org/r/20210218175039.7829-2-bostroesser@gmail.com Reviewed-by: Mike Christie Signed-off-by: Bodo Stroesser Signed-off-by: Martin K. Petersen --- drivers/target/target_core_user.c | 160 +++++++++++++++--------------- 1 file changed, 80 insertions(+), 80 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index a5991df23581..6d010cf22879 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1566,6 +1566,86 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name) return &udev->se_dev; } +static void tcmu_dev_call_rcu(struct rcu_head *p) +{ + struct se_device *dev = container_of(p, struct se_device, rcu_head); + struct tcmu_dev *udev = TCMU_DEV(dev); + + kfree(udev->uio_info.name); + kfree(udev->name); + kfree(udev); +} + +static int tcmu_check_and_free_pending_cmd(struct tcmu_cmd *cmd) +{ + if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) { + kmem_cache_free(tcmu_cmd_cache, cmd); + return 0; + } + return -EINVAL; +} + +static void tcmu_blocks_release(struct radix_tree_root *blocks, + int start, int end) +{ + int i; + struct page *page; + + for (i = start; i < end; i++) { + page = radix_tree_delete(blocks, i); + if (page) { + __free_page(page); + atomic_dec(&global_db_count); + } + } +} + +static void tcmu_remove_all_queued_tmr(struct tcmu_dev *udev) +{ + struct tcmu_tmr *tmr, *tmp; + + list_for_each_entry_safe(tmr, tmp, &udev->tmr_queue, queue_entry) { + list_del_init(&tmr->queue_entry); + kfree(tmr); + } +} + +static void tcmu_dev_kref_release(struct kref *kref) +{ + struct tcmu_dev *udev = container_of(kref, struct tcmu_dev, kref); + struct se_device *dev = &udev->se_dev; + struct tcmu_cmd *cmd; + bool all_expired = true; + int i; + + vfree(udev->mb_addr); + udev->mb_addr = NULL; + + spin_lock_bh(&timed_out_udevs_lock); + if (!list_empty(&udev->timedout_entry)) + list_del(&udev->timedout_entry); + spin_unlock_bh(&timed_out_udevs_lock); + + /* Upper layer should drain all requests before calling this */ + mutex_lock(&udev->cmdr_lock); + idr_for_each_entry(&udev->commands, cmd, i) { + if (tcmu_check_and_free_pending_cmd(cmd) != 0) + all_expired = false; + } + /* There can be left over TMR cmds. Remove them. */ + tcmu_remove_all_queued_tmr(udev); + if (!list_empty(&udev->qfull_queue)) + all_expired = false; + idr_destroy(&udev->commands); + WARN_ON(!all_expired); + + tcmu_blocks_release(&udev->data_blocks, 0, udev->dbi_max + 1); + bitmap_free(udev->data_bitmap); + mutex_unlock(&udev->cmdr_lock); + + call_rcu(&dev->rcu_head, tcmu_dev_call_rcu); +} + static void run_qfull_queue(struct tcmu_dev *udev, bool fail) { struct tcmu_cmd *tcmu_cmd, *tmp_cmd; @@ -1751,86 +1831,6 @@ static int tcmu_open(struct uio_info *info, struct inode *inode) return 0; } -static void tcmu_dev_call_rcu(struct rcu_head *p) -{ - struct se_device *dev = container_of(p, struct se_device, rcu_head); - struct tcmu_dev *udev = TCMU_DEV(dev); - - kfree(udev->uio_info.name); - kfree(udev->name); - kfree(udev); -} - -static int tcmu_check_and_free_pending_cmd(struct tcmu_cmd *cmd) -{ - if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) { - kmem_cache_free(tcmu_cmd_cache, cmd); - return 0; - } - return -EINVAL; -} - -static void tcmu_blocks_release(struct radix_tree_root *blocks, - int start, int end) -{ - int i; - struct page *page; - - for (i = start; i < end; i++) { - page = radix_tree_delete(blocks, i); - if (page) { - __free_page(page); - atomic_dec(&global_db_count); - } - } -} - -static void tcmu_remove_all_queued_tmr(struct tcmu_dev *udev) -{ - struct tcmu_tmr *tmr, *tmp; - - list_for_each_entry_safe(tmr, tmp, &udev->tmr_queue, queue_entry) { - list_del_init(&tmr->queue_entry); - kfree(tmr); - } -} - -static void tcmu_dev_kref_release(struct kref *kref) -{ - struct tcmu_dev *udev = container_of(kref, struct tcmu_dev, kref); - struct se_device *dev = &udev->se_dev; - struct tcmu_cmd *cmd; - bool all_expired = true; - int i; - - vfree(udev->mb_addr); - udev->mb_addr = NULL; - - spin_lock_bh(&timed_out_udevs_lock); - if (!list_empty(&udev->timedout_entry)) - list_del(&udev->timedout_entry); - spin_unlock_bh(&timed_out_udevs_lock); - - /* Upper layer should drain all requests before calling this */ - mutex_lock(&udev->cmdr_lock); - idr_for_each_entry(&udev->commands, cmd, i) { - if (tcmu_check_and_free_pending_cmd(cmd) != 0) - all_expired = false; - } - /* There can be left over TMR cmds. Remove them. */ - tcmu_remove_all_queued_tmr(udev); - if (!list_empty(&udev->qfull_queue)) - all_expired = false; - idr_destroy(&udev->commands); - WARN_ON(!all_expired); - - tcmu_blocks_release(&udev->data_blocks, 0, udev->dbi_max + 1); - bitmap_free(udev->data_bitmap); - mutex_unlock(&udev->cmdr_lock); - - call_rcu(&dev->rcu_head, tcmu_dev_call_rcu); -} - static int tcmu_release(struct uio_info *info, struct inode *inode) { struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info); From 8f33bb2400f48a6a319176866af6e7aac9e5211e Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Thu, 18 Feb 2021 18:50:39 +0100 Subject: [PATCH 28/30] scsi: target: tcmu: Fix memory leak caused by wrong uio usage When user deletes a tcmu device via configFS, tcmu calls uio_unregister_device(). During that call uio resets its pointer to struct uio_info provided by tcmu. That means, after uio_unregister_device() uio will no longer execute any of the callbacks tcmu had set in uio_info. Especially, if userspace daemon still holds the corresponding uio device open or mmap'ed while tcmu calls uio_unregister_device(), uio will not call tcmu_release() when userspace finally closes and munmaps the uio device. Since tcmu does refcounting for the tcmu device in tcmu_open() and tcmu_release(), in the decribed case refcount does not drop to 0 and tcmu does not free tcmu device's resources. In extreme cases this can cause memory leaking of up to 1 GB for a single tcmu device. After uio_unregister_device(), uio will reject every open, read, write, mmap from userspace with -EOI. But userspace daemon can still access the mmap'ed command ring and data area. Therefore tcmu should wait until userspace munmaps the uio device before it frees the resources, as we don't want to cause SIGSEGV or SIGBUS to user space. That said, current refcounting during tcmu_open and tcmu_release does not work correctly, and refcounting better should be done in the open and close callouts of the vm_operations_struct, which tcmu assigns to each mmap of the uio device (because it wants its own page fault handler). This patch fixes the memory leak by removing refcounting from tcmu_open and tcmu_close, and instead adding new tcmu_vma_open() and tcmu_vma_close() handlers that only do refcounting. Link: https://lore.kernel.org/r/20210218175039.7829-3-bostroesser@gmail.com Reviewed-by: Mike Christie Signed-off-by: Bodo Stroesser Signed-off-by: Martin K. Petersen --- drivers/target/target_core_user.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 6d010cf22879..bf73cd5f4b04 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1643,6 +1643,8 @@ static void tcmu_dev_kref_release(struct kref *kref) bitmap_free(udev->data_bitmap); mutex_unlock(&udev->cmdr_lock); + pr_debug("dev_kref_release\n"); + call_rcu(&dev->rcu_head, tcmu_dev_call_rcu); } @@ -1758,6 +1760,25 @@ static struct page *tcmu_try_get_block_page(struct tcmu_dev *udev, uint32_t dbi) return page; } +static void tcmu_vma_open(struct vm_area_struct *vma) +{ + struct tcmu_dev *udev = vma->vm_private_data; + + pr_debug("vma_open\n"); + + kref_get(&udev->kref); +} + +static void tcmu_vma_close(struct vm_area_struct *vma) +{ + struct tcmu_dev *udev = vma->vm_private_data; + + pr_debug("vma_close\n"); + + /* release ref from tcmu_vma_open */ + kref_put(&udev->kref, tcmu_dev_kref_release); +} + static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf) { struct tcmu_dev *udev = vmf->vma->vm_private_data; @@ -1796,6 +1817,8 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf) } static const struct vm_operations_struct tcmu_vm_ops = { + .open = tcmu_vma_open, + .close = tcmu_vma_close, .fault = tcmu_vma_fault, }; @@ -1812,6 +1835,8 @@ static int tcmu_mmap(struct uio_info *info, struct vm_area_struct *vma) if (vma_pages(vma) != (udev->ring_size >> PAGE_SHIFT)) return -EINVAL; + tcmu_vma_open(vma); + return 0; } @@ -1824,7 +1849,6 @@ static int tcmu_open(struct uio_info *info, struct inode *inode) return -EBUSY; udev->inode = inode; - kref_get(&udev->kref); pr_debug("open\n"); @@ -1838,8 +1862,7 @@ static int tcmu_release(struct uio_info *info, struct inode *inode) clear_bit(TCMU_DEV_BIT_OPEN, &udev->flags); pr_debug("close\n"); - /* release ref from open */ - kref_put(&udev->kref, tcmu_dev_kref_release); + return 0; } From aaf15f8c6de932861f1fce6aeec6a89ac0e354b6 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sun, 21 Feb 2021 18:10:42 -0800 Subject: [PATCH 29/30] scsi: sd: Fix Opal support The SCSI core has been modified recently such that it only processes PM requests if rpm_status != RPM_ACTIVE. Since some Opal requests are submitted while rpm_status != RPM_ACTIVE, set flag RQF_PM for Opal requests. See also https://bugzilla.kernel.org/show_bug.cgi?id=211227. [mkp: updated sha for PM patch] Link: https://lore.kernel.org/r/20210222021042.3534-1-bvanassche@acm.org Fixes: d80210f25ff0 ("sd: add support for TCG OPAL self encrypting disks") Fixes: e6044f714b25 ("scsi: core: Only process PM requests if rpm_status != RPM_ACTIVE") Cc: chriscjsus@yahoo.com Cc: Jens Axboe Cc: Alan Stern Cc: stable@vger.kernel.org Reported-by: chriscjsus@yahoo.com Tested-by: chriscjsus@yahoo.com Reviewed-by: Christoph Hellwig Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 6e41ecbf4399..ed0b1bb99f08 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -707,9 +707,9 @@ static int sd_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, put_unaligned_be16(spsp, &cdb[2]); put_unaligned_be32(len, &cdb[6]); - ret = scsi_execute_req(sdev, cdb, - send ? DMA_TO_DEVICE : DMA_FROM_DEVICE, - buffer, len, NULL, SD_TIMEOUT, sdkp->max_retries, NULL); + ret = scsi_execute(sdev, cdb, send ? DMA_TO_DEVICE : DMA_FROM_DEVICE, + buffer, len, NULL, NULL, SD_TIMEOUT, sdkp->max_retries, 0, + RQF_PM, NULL); return ret <= 0 ? ret : -EIO; } #endif /* CONFIG_BLK_SED_OPAL */ From f749d8b7a9896bc6e5ffe104cc64345037e0b152 Mon Sep 17 00:00:00 2001 From: Don Brace Date: Mon, 15 Feb 2021 16:26:57 -0600 Subject: [PATCH 30/30] scsi: hpsa: Correct dev cmds outstanding for retried cmds Prevent incrementing device->commands_outstanding for ioaccel command retries that are driver initiated. If the command goes through the retry path, the device->commands_outstanding counter has already accounted for the number of commands outstanding to the device. Only commands going through function hpsa_cmd_resolve_events decrement this counter. - ioaccel commands go to either HBA disks or to logical volumes comprised of SSDs. The extra increment is causing device resets to hang. - Resets wait for all device outstanding commands to complete before returning. Replace unused field abort_pending with retry_pending. This is a maintenance driver so these changes have the least impact/risk. Link: https://lore.kernel.org/r/161342801747.29388.13045495968308188518.stgit@brunhilda Tested-by: Joe Szczypek Reviewed-by: Scott Benesh Reviewed-by: Scott Teel Reviewed-by: Tomas Henzl Signed-off-by: Don Brace Signed-off-by: Martin K. Petersen --- drivers/scsi/hpsa.c | 51 +++++++++++++++++++++++++++++++++++------ drivers/scsi/hpsa_cmd.h | 2 +- 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 337d3aa91945..38369766511c 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -1151,7 +1151,10 @@ static void __enqueue_cmd_and_start_io(struct ctlr_info *h, { dial_down_lockup_detection_during_fw_flash(h, c); atomic_inc(&h->commands_outstanding); - if (c->device) + /* + * Check to see if the command is being retried. + */ + if (c->device && !c->retry_pending) atomic_inc(&c->device->commands_outstanding); reply_queue = h->reply_map[raw_smp_processor_id()]; @@ -5567,7 +5570,8 @@ static inline void hpsa_cmd_partial_init(struct ctlr_info *h, int index, } static int hpsa_ioaccel_submit(struct ctlr_info *h, - struct CommandList *c, struct scsi_cmnd *cmd) + struct CommandList *c, struct scsi_cmnd *cmd, + bool retry) { struct hpsa_scsi_dev_t *dev = cmd->device->hostdata; int rc = IO_ACCEL_INELIGIBLE; @@ -5584,18 +5588,22 @@ static int hpsa_ioaccel_submit(struct ctlr_info *h, cmd->host_scribble = (unsigned char *) c; if (dev->offload_enabled) { - hpsa_cmd_init(h, c->cmdindex, c); + hpsa_cmd_init(h, c->cmdindex, c); /* Zeroes out all fields */ c->cmd_type = CMD_SCSI; c->scsi_cmd = cmd; c->device = dev; + if (retry) /* Resubmit but do not increment device->commands_outstanding. */ + c->retry_pending = true; rc = hpsa_scsi_ioaccel_raid_map(h, c); if (rc < 0) /* scsi_dma_map failed. */ rc = SCSI_MLQUEUE_HOST_BUSY; } else if (dev->hba_ioaccel_enabled) { - hpsa_cmd_init(h, c->cmdindex, c); + hpsa_cmd_init(h, c->cmdindex, c); /* Zeroes out all fields */ c->cmd_type = CMD_SCSI; c->scsi_cmd = cmd; c->device = dev; + if (retry) /* Resubmit but do not increment device->commands_outstanding. */ + c->retry_pending = true; rc = hpsa_scsi_ioaccel_direct_map(h, c); if (rc < 0) /* scsi_dma_map failed. */ rc = SCSI_MLQUEUE_HOST_BUSY; @@ -5628,7 +5636,8 @@ static void hpsa_command_resubmit_worker(struct work_struct *work) if (c2->error_data.serv_response == IOACCEL2_STATUS_SR_TASK_COMP_SET_FULL) { - rc = hpsa_ioaccel_submit(h, c, cmd); + /* Resubmit with the retry_pending flag set. */ + rc = hpsa_ioaccel_submit(h, c, cmd, true); if (rc == 0) return; if (rc == SCSI_MLQUEUE_HOST_BUSY) { @@ -5644,6 +5653,15 @@ static void hpsa_command_resubmit_worker(struct work_struct *work) } } hpsa_cmd_partial_init(c->h, c->cmdindex, c); + /* + * Here we have not come in though queue_command, so we + * can set the retry_pending flag to true for a driver initiated + * retry attempt (I.E. not a SML retry). + * I.E. We are submitting a driver initiated retry. + * Note: hpsa_ciss_submit does not zero out the command fields like + * ioaccel submit does. + */ + c->retry_pending = true; if (hpsa_ciss_submit(c->h, c, cmd, dev)) { /* * If we get here, it means dma mapping failed. Try @@ -5706,11 +5724,16 @@ static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd) /* * Call alternate submit routine for I/O accelerated commands. * Retries always go down the normal I/O path. + * Note: If cmd->retries is non-zero, then this is a SML + * initiated retry and not a driver initiated retry. + * This command has been obtained from cmd_tagged_alloc + * and is therefore a brand-new command. */ if (likely(cmd->retries == 0 && !blk_rq_is_passthrough(cmd->request) && h->acciopath_status)) { - rc = hpsa_ioaccel_submit(h, c, cmd); + /* Submit with the retry_pending flag unset. */ + rc = hpsa_ioaccel_submit(h, c, cmd, false); if (rc == 0) return 0; if (rc == SCSI_MLQUEUE_HOST_BUSY) { @@ -6105,6 +6128,7 @@ return_reset_status: * at init, and managed by cmd_tagged_alloc() and cmd_tagged_free() using the * block request tag as an index into a table of entries. cmd_tagged_free() is * the complement, although cmd_free() may be called instead. + * This function is only called for new requests from queue_command. */ static struct CommandList *cmd_tagged_alloc(struct ctlr_info *h, struct scsi_cmnd *scmd) @@ -6139,8 +6163,14 @@ static struct CommandList *cmd_tagged_alloc(struct ctlr_info *h, } atomic_inc(&c->refcount); - hpsa_cmd_partial_init(h, idx, c); + + /* + * This is a new command obtained from queue_command so + * there have not been any driver initiated retry attempts. + */ + c->retry_pending = false; + return c; } @@ -6208,6 +6238,13 @@ static struct CommandList *cmd_alloc(struct ctlr_info *h) } hpsa_cmd_partial_init(h, i, c); c->device = NULL; + + /* + * cmd_alloc is for "internal" commands and they are never + * retried. + */ + c->retry_pending = false; + return c; } diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h index 46df2e3ff89b..d126bb877250 100644 --- a/drivers/scsi/hpsa_cmd.h +++ b/drivers/scsi/hpsa_cmd.h @@ -448,7 +448,7 @@ struct CommandList { */ struct hpsa_scsi_dev_t *phys_disk; - int abort_pending; + bool retry_pending; struct hpsa_scsi_dev_t *device; atomic_t refcount; /* Must be last to avoid memset in hpsa_cmd_init() */ } __aligned(COMMANDLIST_ALIGNMENT);