From eca2040972b411ec27483bf75dc8b84e730e88ff Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 May 2023 03:13:34 +0200 Subject: [PATCH 01/19] scsi: block: ioprio: Clean up interface definition The I/O priority user interface defines the 16-bits ioprio values as the combination of the upper 3-bits for an I/O priority class and the lower 13-bits as priority data. However, the kernel only uses the lower 3-bits of the priority data to define priority levels for the RT and BE priority classes. The data part of an ioprio value is completely ignored for the IDLE and NONE classes. This is enforced by checks done in ioprio_check_cap(), which is called for all paths that allow defining an I/O priority for I/Os: the per-context ioprio_set() system call, aio interface and io_uring interface. Clarify this fact in the uapi ioprio.h header file and introduce the IOPRIO_PRIO_LEVEL_MASK and IOPRIO_PRIO_LEVEL() macros for users to define and get priority levels in an ioprio value. The coarser macro IOPRIO_PRIO_DATA() is retained for backward compatibility with old applications already using it. There is no functional change introduced with this. In-kernel users of the IOPRIO_PRIO_DATA() macro which are explicitly handling I/O priority data as a priority level are modified to use the new IOPRIO_PRIO_LEVEL() macro without any functional change. Since f2fs is the only user of this macro not explicitly using that value as a priority level, it is left unchanged. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-2-nks@flawful.org Signed-off-by: Martin K. Petersen --- block/bfq-iosched.c | 8 ++++---- block/ioprio.c | 6 +++--- include/uapi/linux/ioprio.h | 19 ++++++++++++++----- 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 3164e3177965..3067b75f3fd0 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -5524,16 +5524,16 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic) bfqq->new_ioprio_class = task_nice_ioclass(tsk); break; case IOPRIO_CLASS_RT: - bfqq->new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio); + bfqq->new_ioprio = IOPRIO_PRIO_LEVEL(bic->ioprio); bfqq->new_ioprio_class = IOPRIO_CLASS_RT; break; case IOPRIO_CLASS_BE: - bfqq->new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio); + bfqq->new_ioprio = IOPRIO_PRIO_LEVEL(bic->ioprio); bfqq->new_ioprio_class = IOPRIO_CLASS_BE; break; case IOPRIO_CLASS_IDLE: bfqq->new_ioprio_class = IOPRIO_CLASS_IDLE; - bfqq->new_ioprio = 7; + bfqq->new_ioprio = IOPRIO_NR_LEVELS - 1; break; } @@ -5830,7 +5830,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, struct bfq_io_cq *bic, bool respawn) { - const int ioprio = IOPRIO_PRIO_DATA(bic->ioprio); + const int ioprio = IOPRIO_PRIO_LEVEL(bic->ioprio); const int ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio); struct bfq_queue **async_bfqq = NULL; struct bfq_queue *bfqq; diff --git a/block/ioprio.c b/block/ioprio.c index 32a456b45804..f0d9e818abc5 100644 --- a/block/ioprio.c +++ b/block/ioprio.c @@ -33,7 +33,7 @@ int ioprio_check_cap(int ioprio) { int class = IOPRIO_PRIO_CLASS(ioprio); - int data = IOPRIO_PRIO_DATA(ioprio); + int level = IOPRIO_PRIO_LEVEL(ioprio); switch (class) { case IOPRIO_CLASS_RT: @@ -49,13 +49,13 @@ int ioprio_check_cap(int ioprio) fallthrough; /* rt has prio field too */ case IOPRIO_CLASS_BE: - if (data >= IOPRIO_NR_LEVELS || data < 0) + if (level >= IOPRIO_NR_LEVELS) return -EINVAL; break; case IOPRIO_CLASS_IDLE: break; case IOPRIO_CLASS_NONE: - if (data) + if (level) return -EINVAL; break; default: diff --git a/include/uapi/linux/ioprio.h b/include/uapi/linux/ioprio.h index f70f2596a6bf..4444b4e4fdad 100644 --- a/include/uapi/linux/ioprio.h +++ b/include/uapi/linux/ioprio.h @@ -17,7 +17,7 @@ ((data) & IOPRIO_PRIO_MASK)) /* - * These are the io priority groups as implemented by the BFQ and mq-deadline + * These are the io priority classes as implemented by the BFQ and mq-deadline * schedulers. RT is the realtime class, it always gets premium service. For * ATA disks supporting NCQ IO priority, RT class IOs will be processed using * high priority NCQ commands. BE is the best-effort scheduling class, the @@ -32,11 +32,20 @@ enum { }; /* - * The RT and BE priority classes both support up to 8 priority levels. + * The RT and BE priority classes both support up to 8 priority levels that + * can be specified using the lower 3-bits of the priority data. */ -#define IOPRIO_NR_LEVELS 8 -#define IOPRIO_BE_NR IOPRIO_NR_LEVELS +#define IOPRIO_LEVEL_NR_BITS 3 +#define IOPRIO_NR_LEVELS (1 << IOPRIO_LEVEL_NR_BITS) +#define IOPRIO_LEVEL_MASK (IOPRIO_NR_LEVELS - 1) +#define IOPRIO_PRIO_LEVEL(ioprio) ((ioprio) & IOPRIO_LEVEL_MASK) +#define IOPRIO_BE_NR IOPRIO_NR_LEVELS + +/* + * Possible values for the "which" argument of the ioprio_get() and + * ioprio_set() system calls (see "man ioprio_set"). + */ enum { IOPRIO_WHO_PROCESS = 1, IOPRIO_WHO_PGRP, @@ -44,7 +53,7 @@ enum { }; /* - * Fallback BE priority level. + * Fallback BE class priority level. */ #define IOPRIO_NORM 4 #define IOPRIO_BE_NORM IOPRIO_NORM From 6c913257226a25879bfd6226e0ee265e98904ce6 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 May 2023 03:13:35 +0200 Subject: [PATCH 02/19] scsi: block: Introduce ioprio hints I/O priorities currently only use 6-bits of the 16-bits ioprio value: the 3-upper bits are used to define up to 8 priority classes (4 of which are valid) and the 3 lower bits of the value are used to define a priority level for the real-time and best-effort class. The remaining 10-bits between the I/O priority class and level are unused, and in fact, cannot be used by the user as doing so would either result in the value being completely ignored, or in an error returned by ioprio_check_cap(). Use these 10-bits of an ioprio value to allow a user to specify I/O hints. An I/O hint is defined as a 10-bitsvalue, allowing up to 1023 different hints to be specified, with the value 0 being reserved as the "no hint" case. An I/O hint can apply to any I/O that specifies a valid priority class other than NONE, regardless of the I/O priority level specified. To do so, the macros IOPRIO_PRIO_HINT() and IOPRIO_PRIO_VALUE_HINT() are introduced in include/uapi/linux/ioprio.h to respectively allow a user to get and set a hint in an ioprio value. To support the ATA and SCSI command duration limits feature, 7 hints are defined: IOPRIO_HINT_DEV_DURATION_LIMIT_1 to IOPRIO_HINT_DEV_DURATION_LIMIT_7, allowing a user to specify which command duration limit descriptor should be applied to the commands serving an I/O. Specifying these hints has for now no effect whatsoever if the target block devices do not support the command duration limits feature. However, in the future, block I/O schedulers can be modified to optimize I/O issuing order based on these hints, even for devices that do not support the command duration limits feature. Given that the 7 duration limits hints defined have no effect on any block layer component, the actual definition of the duration limits implied by these hints remains at the device level. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-3-nks@flawful.org Signed-off-by: Martin K. Petersen --- include/uapi/linux/ioprio.h | 49 +++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/include/uapi/linux/ioprio.h b/include/uapi/linux/ioprio.h index 4444b4e4fdad..4c4806e8230b 100644 --- a/include/uapi/linux/ioprio.h +++ b/include/uapi/linux/ioprio.h @@ -58,4 +58,53 @@ enum { #define IOPRIO_NORM 4 #define IOPRIO_BE_NORM IOPRIO_NORM +/* + * The 10 bits between the priority class and the priority level are used to + * optionally define I/O hints for any combination of I/O priority class and + * level. Depending on the kernel configuration, I/O scheduler being used and + * the target I/O device being used, hints can influence how I/Os are processed + * without affecting the I/O scheduling ordering defined by the I/O priority + * class and level. + */ +#define IOPRIO_HINT_SHIFT IOPRIO_LEVEL_NR_BITS +#define IOPRIO_HINT_NR_BITS 10 +#define IOPRIO_NR_HINTS (1 << IOPRIO_HINT_NR_BITS) +#define IOPRIO_HINT_MASK (IOPRIO_NR_HINTS - 1) +#define IOPRIO_PRIO_HINT(ioprio) \ + (((ioprio) >> IOPRIO_HINT_SHIFT) & IOPRIO_HINT_MASK) + +/* + * Alternate macro for IOPRIO_PRIO_VALUE() to define an I/O priority with + * a class, level and hint. + */ +#define IOPRIO_PRIO_VALUE_HINT(class, level, hint) \ + ((((class) & IOPRIO_CLASS_MASK) << IOPRIO_CLASS_SHIFT) | \ + (((hint) & IOPRIO_HINT_MASK) << IOPRIO_HINT_SHIFT) | \ + ((level) & IOPRIO_LEVEL_MASK)) + +/* + * I/O hints. + */ +enum { + /* No hint */ + IOPRIO_HINT_NONE = 0, + + /* + * Device command duration limits: indicate to the device a desired + * duration limit for the commands that will be used to process an I/O. + * These will currently only be effective for SCSI and ATA devices that + * support the command duration limits feature. If this feature is + * enabled, then the commands issued to the device to process an I/O with + * one of these hints set will have the duration limit index (dld field) + * set to the value of the hint. + */ + IOPRIO_HINT_DEV_DURATION_LIMIT_1 = 1, + IOPRIO_HINT_DEV_DURATION_LIMIT_2 = 2, + IOPRIO_HINT_DEV_DURATION_LIMIT_3 = 3, + IOPRIO_HINT_DEV_DURATION_LIMIT_4 = 4, + IOPRIO_HINT_DEV_DURATION_LIMIT_5 = 5, + IOPRIO_HINT_DEV_DURATION_LIMIT_6 = 6, + IOPRIO_HINT_DEV_DURATION_LIMIT_7 = 7, +}; + #endif /* _UAPI_LINUX_IOPRIO_H */ From dffc480d2df1772d6092f46f2b4c5e0de941bd47 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 May 2023 03:13:36 +0200 Subject: [PATCH 03/19] scsi: block: Introduce BLK_STS_DURATION_LIMIT Introduce the new block I/O status BLK_STS_DURATION_LIMIT for LLDDs to report command that failed due to a command duration limit being exceeded. This new status is mapped to the ETIME error code to allow users to differentiate "soft" duration limit failures from other more serious hardware related errors. If we compare BLK_STS_DURATION_LIMIT with BLK_STS_TIMEOUT: -BLK_STS_DURATION_LIMIT means that the drive gave a reply indicating that the command duration limit was exceeded before the command could be completed. This I/O status is mapped to ETIME for user space. -BLK_STS_TIMEOUT means that the drive never gave a reply at all. This I/O status is mapped to ETIMEDOUT for user space. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Co-developed-by: Niklas Cassel Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-4-nks@flawful.org Signed-off-by: Martin K. Petersen --- block/blk-core.c | 3 +++ include/linux/blk_types.h | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/block/blk-core.c b/block/blk-core.c index 00c74330fa92..04ad13ec6ead 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -170,6 +170,9 @@ static const struct { [BLK_STS_ZONE_OPEN_RESOURCE] = { -ETOOMANYREFS, "open zones exceeded" }, [BLK_STS_ZONE_ACTIVE_RESOURCE] = { -EOVERFLOW, "active zones exceeded" }, + /* Command duration limit device-side timeout */ + [BLK_STS_DURATION_LIMIT] = { -ETIME, "duration limit exceeded" }, + /* everything else not covered above: */ [BLK_STS_IOERR] = { -EIO, "I/O" }, }; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 740afe80f297..dfdcd218aaac 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -171,6 +171,12 @@ typedef u16 blk_short_t; */ #define BLK_STS_OFFLINE ((__force blk_status_t)17) +/* + * BLK_STS_DURATION_LIMIT is returned from the driver when the target device + * aborted the command because it exceeded one of its Command Duration Limits. + */ +#define BLK_STS_DURATION_LIMIT ((__force blk_status_t)18) + /** * blk_path_error - returns true if error may be path related * @error: status the request was completed with From 3d848ca1ebc8d8864f25bd461914c93eff82a2d2 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 11 May 2023 03:13:37 +0200 Subject: [PATCH 04/19] scsi: core: Allow libata to complete successful commands via EH In SCSI, we get the sense data as part of the completion, for ATA however, we need to fetch the sense data as an extra step. For an aborted ATA command the sense data is fetched via libata's ->eh_strategy_handler(). For Command Duration Limits policy 0xD: The device shall complete the command without error with the additional sense code set to DATA CURRENTLY UNAVAILABLE. In order to handle this policy in libata, we intend to send a successful command via SCSI EH, and let libata's ->eh_strategy_handler() fetch the sense data for the good command. This is similar to how we handle an aborted ATA command, just that we need to read the Successful NCQ Commands log instead of the NCQ Command Error log. When we get a SATA completion with successful commands, ATA_SENSE will be set, indicating that some commands in the completion have sense data. The sense_valid bitmask in the Sense Data for Successful NCQ Commands log will inform exactly which commands that had sense data, which might be a subset of all the commands that was completed in the same completion. (Yet all will have ATA_SENSE set, since the status is per completion.) The successful commands that have e.g. a "DATA CURRENTLY UNAVAILABLE" sense data will have a SCSI ML byte set, so scsi_eh_flush_done_q() will not set the scmd->result to DID_TIME_OUT for these commands. However, the successful commands that did not have sense data, must not get their result marked as DID_TIME_OUT by SCSI EH. Add a new flag SCMD_FORCE_EH_SUCCESS, which tells SCSI EH to not mark a command as DID_TIME_OUT, even if it has scmd->result == SAM_STAT_GOOD. This will be used by libata in a subsequent commit. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-5-nks@flawful.org Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_error.c | 3 ++- include/scsi/scsi_cmnd.h | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 3ec8bfd4090f..8b7d227bfe1c 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -2165,7 +2165,8 @@ void scsi_eh_flush_done_q(struct list_head *done_q) * scsi_eh_get_sense), scmd->result is already * set, do not set DID_TIME_OUT. */ - if (!scmd->result) + if (!scmd->result && + !(scmd->flags & SCMD_FORCE_EH_SUCCESS)) scmd->result |= (DID_TIME_OUT << 16); SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index c2cb5f69635c..526def14e7fb 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -52,6 +52,11 @@ struct scsi_pointer { #define SCMD_TAGGED (1 << 0) #define SCMD_INITIALIZED (1 << 1) #define SCMD_LAST (1 << 2) +/* + * libata uses SCSI EH to fetch sense data for successful commands. + * SCSI EH should not overwrite scmd->result when SCMD_FORCE_EH_SUCCESS is set. + */ +#define SCMD_FORCE_EH_SUCCESS (1 << 3) #define SCMD_FAIL_IF_RECOVERING (1 << 4) /* flags preserved across unprep / reprep */ #define SCMD_PRESERVED_FLAGS (SCMD_INITIALIZED | SCMD_FAIL_IF_RECOVERING) From 734326937b65cec7ffd00bfbbce0f791ac4aac84 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 11 May 2023 03:13:38 +0200 Subject: [PATCH 05/19] scsi: core: Rename and move get_scsi_ml_byte() SCSI has two different getters: - get_XXX_byte() (in scsi_cmnd.h) which takes a struct scsi_cmnd *, and - XXX_byte() (in scsi.h) which takes a scmd->result. The proper name for get_scsi_ml_byte() should thus be without the get_ prefix, as it takes a scmd->result. Rename the function to rectify this. (This change was suggested by Mike Christie.) Additionally, move get_scsi_ml_byte() to scsi_priv.h since both scsi_lib.c and scsi_error.c will need to use this helper in a follow-up patch. Cc: Mike Christie Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-6-nks@flawful.org Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 7 +------ drivers/scsi/scsi_priv.h | 5 +++++ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index b7c569a42aa4..fac9c31161d2 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -578,11 +578,6 @@ static bool scsi_end_request(struct request *req, blk_status_t error, return false; } -static inline u8 get_scsi_ml_byte(int result) -{ - return (result >> 8) & 0xff; -} - /** * scsi_result_to_blk_status - translate a SCSI result code into blk_status_t * @result: scsi error code @@ -595,7 +590,7 @@ static blk_status_t scsi_result_to_blk_status(int result) * Check the scsi-ml byte first in case we converted a host or status * byte. */ - switch (get_scsi_ml_byte(result)) { + switch (scsi_ml_byte(result)) { case SCSIML_STAT_OK: break; case SCSIML_STAT_RESV_CONFLICT: diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 96284a0e13fe..74324fba4281 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -29,6 +29,11 @@ enum scsi_ml_status { SCSIML_STAT_TGT_FAILURE = 0x04, /* Permanent target failure */ }; +static inline u8 scsi_ml_byte(int result) +{ + return (result >> 8) & 0xff; +} + /* * Scsi Error Handler Flags */ From a6cdc35fab0d813d54744abe2af07d6c49c07d6e Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 May 2023 03:13:39 +0200 Subject: [PATCH 06/19] scsi: core: Support retrieving sub-pages of mode pages Allow scsi_mode_sense() to retrieve sub-pages of mode pages by adding the subpage argument. Change all the current caller sites to specify the subpage 0. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-7-nks@flawful.org Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 4 +++- drivers/scsi/scsi_transport_sas.c | 2 +- drivers/scsi/sd.c | 9 ++++----- drivers/scsi/sr.c | 2 +- include/scsi/scsi_device.h | 8 ++++---- 5 files changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index fac9c31161d2..633c4e8af830 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2144,6 +2144,7 @@ EXPORT_SYMBOL_GPL(scsi_mode_select); * @sdev: SCSI device to be queried * @dbd: set to prevent mode sense from returning block descriptors * @modepage: mode page being requested + * @subpage: sub-page of the mode page being requested * @buffer: request buffer (may not be smaller than eight bytes) * @len: length of request buffer. * @timeout: command timeout @@ -2155,7 +2156,7 @@ EXPORT_SYMBOL_GPL(scsi_mode_select); * Returns zero if successful, or a negative error number on failure */ int -scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage, +scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage, int subpage, unsigned char *buffer, int len, int timeout, int retries, struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr) { @@ -2175,6 +2176,7 @@ scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage, dbd = sdev->set_dbd_for_ms ? 8 : dbd; cmd[1] = dbd & 0x18; /* allows DBD and LLBA bits */ cmd[2] = modepage; + cmd[3] = subpage; sshdr = exec_args.sshdr; diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 74b99f2b0b74..d704c484a251 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -1245,7 +1245,7 @@ int sas_read_port_mode_page(struct scsi_device *sdev) if (!buffer) return -ENOMEM; - error = scsi_mode_sense(sdev, 1, 0x19, buffer, BUF_SIZE, 30*HZ, 3, + error = scsi_mode_sense(sdev, 1, 0x19, 0, buffer, BUF_SIZE, 30*HZ, 3, &mode_data, NULL); if (error) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 1624d528aa1f..cdcef1b651c1 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -183,7 +183,7 @@ cache_type_store(struct device *dev, struct device_attribute *attr, return count; } - if (scsi_mode_sense(sdp, 0x08, 8, buffer, sizeof(buffer), SD_TIMEOUT, + if (scsi_mode_sense(sdp, 0x08, 8, 0, buffer, sizeof(buffer), SD_TIMEOUT, sdkp->max_retries, &data, NULL)) return -EINVAL; len = min_t(size_t, sizeof(buffer), data.length - data.header_length - @@ -2609,9 +2609,8 @@ sd_do_mode_sense(struct scsi_disk *sdkp, int dbd, int modepage, if (sdkp->device->use_10_for_ms && len < 8) len = 8; - return scsi_mode_sense(sdkp->device, dbd, modepage, buffer, len, - SD_TIMEOUT, sdkp->max_retries, data, - sshdr); + return scsi_mode_sense(sdkp->device, dbd, modepage, 0, buffer, len, + SD_TIMEOUT, sdkp->max_retries, data, sshdr); } /* @@ -2868,7 +2867,7 @@ static void sd_read_app_tag_own(struct scsi_disk *sdkp, unsigned char *buffer) if (sdkp->protection_type == 0) return; - res = scsi_mode_sense(sdp, 1, 0x0a, buffer, 36, SD_TIMEOUT, + res = scsi_mode_sense(sdp, 1, 0x0a, 0, buffer, 36, SD_TIMEOUT, sdkp->max_retries, &data, &sshdr); if (res < 0 || !data.header_length || diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 12869e6d4ebd..cd5b08689c1a 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -825,7 +825,7 @@ static int get_capabilities(struct scsi_cd *cd) scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr); /* ask for mode page 0x2a */ - rc = scsi_mode_sense(cd->device, 0, 0x2a, buffer, ms_len, + rc = scsi_mode_sense(cd->device, 0, 0x2a, 0, buffer, ms_len, SR_TIMEOUT, 3, &data, NULL); if (rc < 0 || data.length > ms_len || diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index f10a008e5bfa..c146cc807d44 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -421,10 +421,10 @@ extern int scsi_track_queue_full(struct scsi_device *, int); extern int scsi_set_medium_removal(struct scsi_device *, char); -extern int scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage, - unsigned char *buffer, int len, int timeout, - int retries, struct scsi_mode_data *data, - struct scsi_sense_hdr *); +int scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage, + int subpage, unsigned char *buffer, int len, int timeout, + int retries, struct scsi_mode_data *data, + struct scsi_sense_hdr *); extern int scsi_mode_select(struct scsi_device *sdev, int pf, int sp, unsigned char *buffer, int len, int timeout, int retries, struct scsi_mode_data *data, From 152e52fb6ff180e97d64585e87fea44c49b8bda8 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 May 2023 03:13:40 +0200 Subject: [PATCH 07/19] scsi: core: Support Service Action in scsi_report_opcode() The REPORT_SUPPORTED_OPERATION_CODES command allows checking for support of commands that have the same opcode but different service actions, such as READ 32 and WRITE 32. However, the current implementation of scsi_report_opcode() only allows checking an operation code without a service action differentiation. Add the "sa" argument to scsi_report_opcode() to allow passing a service action. If a non-zero service action is specified, the reporting options field value is set to 3 to have the service action field taken into account by the device. If no service action field is specified (zero), the reporting options field is set to 1 as before. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-8-nks@flawful.org Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi.c | 26 ++++++++++++++++++-------- drivers/scsi/sd.c | 10 +++++----- include/scsi/scsi_device.h | 5 +++-- 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 09ef0b31dfc0..62d9472e08e9 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -504,18 +504,22 @@ void scsi_attach_vpd(struct scsi_device *sdev) } /** - * scsi_report_opcode - Find out if a given command opcode is supported + * scsi_report_opcode - Find out if a given command is supported * @sdev: scsi device to query * @buffer: scratch buffer (must be at least 20 bytes long) * @len: length of buffer - * @opcode: opcode for command to look up + * @opcode: opcode for the command to look up + * @sa: service action for the command to look up * - * Uses the REPORT SUPPORTED OPERATION CODES to look up the given - * opcode. Returns -EINVAL if RSOC fails, 0 if the command opcode is - * unsupported and 1 if the device claims to support the command. + * Uses the REPORT SUPPORTED OPERATION CODES to check support for the + * command identified with @opcode and @sa. If the command does not + * have a service action, @sa must be 0. Returns -EINVAL if RSOC fails, + * 0 if the command is not supported and 1 if the device claims to + * support the command. */ int scsi_report_opcode(struct scsi_device *sdev, unsigned char *buffer, - unsigned int len, unsigned char opcode) + unsigned int len, unsigned char opcode, + unsigned short sa) { unsigned char cmd[16]; struct scsi_sense_hdr sshdr; @@ -539,8 +543,14 @@ int scsi_report_opcode(struct scsi_device *sdev, unsigned char *buffer, memset(cmd, 0, 16); cmd[0] = MAINTENANCE_IN; cmd[1] = MI_REPORT_SUPPORTED_OPERATION_CODES; - cmd[2] = 1; /* One command format */ - cmd[3] = opcode; + if (!sa) { + cmd[2] = 1; /* One command format */ + cmd[3] = opcode; + } else { + cmd[2] = 3; /* One command format with service action */ + cmd[3] = opcode; + put_unaligned_be16(sa, &cmd[4]); + } put_unaligned_be32(request_len, &cmd[6]); memset(buffer, 0, len); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index cdcef1b651c1..a76092663246 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3056,7 +3056,7 @@ static void sd_read_write_same(struct scsi_disk *sdkp, unsigned char *buffer) return; } - if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, INQUIRY) < 0) { + if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, INQUIRY, 0) < 0) { struct scsi_vpd *vpd; sdev->no_report_opcodes = 1; @@ -3072,10 +3072,10 @@ static void sd_read_write_same(struct scsi_disk *sdkp, unsigned char *buffer) rcu_read_unlock(); } - if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, WRITE_SAME_16) == 1) + if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, WRITE_SAME_16, 0) == 1) sdkp->ws16 = 1; - if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, WRITE_SAME) == 1) + if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, WRITE_SAME, 0) == 1) sdkp->ws10 = 1; } @@ -3087,9 +3087,9 @@ static void sd_read_security(struct scsi_disk *sdkp, unsigned char *buffer) return; if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, - SECURITY_PROTOCOL_IN) == 1 && + SECURITY_PROTOCOL_IN, 0) == 1 && scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, - SECURITY_PROTOCOL_OUT) == 1) + SECURITY_PROTOCOL_OUT, 0) == 1) sdkp->security = 1; } diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index c146cc807d44..c93c5aaf637e 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -433,8 +433,9 @@ extern int scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries, struct scsi_sense_hdr *sshdr); extern int scsi_get_vpd_page(struct scsi_device *, u8 page, unsigned char *buf, int buf_len); -extern int scsi_report_opcode(struct scsi_device *sdev, unsigned char *buffer, - unsigned int len, unsigned char opcode); +int scsi_report_opcode(struct scsi_device *sdev, unsigned char *buffer, + unsigned int len, unsigned char opcode, + unsigned short sa); extern int scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state); extern struct scsi_event *sdev_evt_alloc(enum scsi_device_event evt_type, From 624885209f31eb9985bf51abe204ecbffe2fdeea Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 May 2023 03:13:41 +0200 Subject: [PATCH 08/19] scsi: core: Detect support for command duration limits Introduce the function scsi_cdl_check() to detect if a device supports command duration limits (CDL). Support for the READ 16, WRITE 16, READ 32 and WRITE 32 commands are checked using the function scsi_report_opcode() to probe the rwcdlp and cdlp bits as they indicate the mode page defining the command duration limits descriptors that apply to the command being tested. If any of these commands support CDL, the field cdl_supported of struct scsi_device is set to 1 to indicate that the device supports CDL. Support for CDL for a device is advertizes through sysfs using the new cdl_supported device attribute. This attribute value is 1 for a device supporting CDL and 0 otherwise. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Co-developed-by: Niklas Cassel Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-9-nks@flawful.org Signed-off-by: Martin K. Petersen --- Documentation/ABI/testing/sysfs-block-device | 9 +++ drivers/scsi/scsi.c | 81 ++++++++++++++++++++ drivers/scsi/scsi_scan.c | 3 + drivers/scsi/scsi_sysfs.c | 2 + include/scsi/scsi_device.h | 3 + 5 files changed, 98 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-block-device b/Documentation/ABI/testing/sysfs-block-device index 7ac7b19b2f72..ffc3358cba57 100644 --- a/Documentation/ABI/testing/sysfs-block-device +++ b/Documentation/ABI/testing/sysfs-block-device @@ -95,3 +95,12 @@ Description: This file does not exist if the HBA driver does not implement support for the SATA NCQ priority feature, regardless of the device support for this feature. + + +What: /sys/block/*/device/cdl_supported +Date: May, 2023 +KernelVersion: v6.5 +Contact: linux-scsi@vger.kernel.org +Description: + (RO) Indicates if the device supports the command duration + limits feature found in some ATA and SCSI devices. diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 62d9472e08e9..c03814ce23ca 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -570,6 +570,87 @@ int scsi_report_opcode(struct scsi_device *sdev, unsigned char *buffer, } EXPORT_SYMBOL(scsi_report_opcode); +#define SCSI_CDL_CHECK_BUF_LEN 64 + +static bool scsi_cdl_check_cmd(struct scsi_device *sdev, u8 opcode, u16 sa, + unsigned char *buf) +{ + int ret; + u8 cdlp; + + /* Check operation code */ + ret = scsi_report_opcode(sdev, buf, SCSI_CDL_CHECK_BUF_LEN, opcode, sa); + if (ret <= 0) + return false; + + if ((buf[1] & 0x03) != 0x03) + return false; + + /* See SPC-6, one command format of REPORT SUPPORTED OPERATION CODES */ + cdlp = (buf[1] & 0x18) >> 3; + if (buf[0] & 0x01) { + /* rwcdlp == 1 */ + switch (cdlp) { + case 0x01: + /* T2A page */ + return true; + case 0x02: + /* T2B page */ + return true; + } + } else { + /* rwcdlp == 0 */ + switch (cdlp) { + case 0x01: + /* A page */ + return true; + case 0x02: + /* B page */ + return true; + } + } + + return false; +} + +/** + * scsi_cdl_check - Check if a SCSI device supports Command Duration Limits + * @sdev: The device to check + */ +void scsi_cdl_check(struct scsi_device *sdev) +{ + bool cdl_supported; + unsigned char *buf; + + buf = kmalloc(SCSI_CDL_CHECK_BUF_LEN, GFP_KERNEL); + if (!buf) { + sdev->cdl_supported = 0; + return; + } + + /* Check support for READ_16, WRITE_16, READ_32 and WRITE_32 commands */ + cdl_supported = + scsi_cdl_check_cmd(sdev, READ_16, 0, buf) || + scsi_cdl_check_cmd(sdev, WRITE_16, 0, buf) || + scsi_cdl_check_cmd(sdev, VARIABLE_LENGTH_CMD, READ_32, buf) || + scsi_cdl_check_cmd(sdev, VARIABLE_LENGTH_CMD, WRITE_32, buf); + if (cdl_supported) { + /* + * We have CDL support: force the use of READ16/WRITE16. + * READ32 and WRITE32 will be used for devices that support + * the T10_PI_TYPE2_PROTECTION protection type. + */ + sdev->use_16_for_rw = 1; + sdev->use_10_for_rw = 0; + + sdev->cdl_supported = 1; + } else { + sdev->cdl_supported = 0; + } + + kfree(buf); +} + /** * scsi_device_get - get an additional reference to a scsi_device * @sdev: device to get a reference to diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index d217be323cc6..aa13feb17c62 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1087,6 +1087,8 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, if (sdev->scsi_level >= SCSI_3) scsi_attach_vpd(sdev); + scsi_cdl_check(sdev); + sdev->max_queue_depth = sdev->queue_depth; WARN_ON_ONCE(sdev->max_queue_depth > sdev->budget_map.depth); sdev->sdev_bflags = *bflags; @@ -1624,6 +1626,7 @@ void scsi_rescan_device(struct device *dev) device_lock(dev); scsi_attach_vpd(sdev); + scsi_cdl_check(sdev); if (sdev->handler && sdev->handler->rescan) sdev->handler->rescan(sdev); diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 603e8fcfcb8a..98fcbbf1c1e3 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -670,6 +670,7 @@ sdev_rd_attr (scsi_level, "%d\n"); sdev_rd_attr (vendor, "%.8s\n"); sdev_rd_attr (model, "%.16s\n"); sdev_rd_attr (rev, "%.4s\n"); +sdev_rd_attr (cdl_supported, "%d\n"); static ssize_t sdev_show_device_busy(struct device *dev, struct device_attribute *attr, @@ -1300,6 +1301,7 @@ static struct attribute *scsi_sdev_attrs[] = { &dev_attr_preferred_path.attr, #endif &dev_attr_queue_ramp_up_period.attr, + &dev_attr_cdl_supported.attr, REF_EVT(media_change), REF_EVT(inquiry_change_reported), REF_EVT(capacity_change_reported), diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index c93c5aaf637e..6b8df9e253a0 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -218,6 +218,8 @@ struct scsi_device { unsigned silence_suspend:1; /* Do not print runtime PM related messages */ unsigned no_vpd_size:1; /* No VPD size reported in header */ + unsigned cdl_supported:1; /* Command duration limits supported */ + unsigned int queue_stopped; /* request queue is quiesced */ bool offline_already; /* Device offline message logged */ @@ -364,6 +366,7 @@ extern int scsi_register_device_handler(struct scsi_device_handler *scsi_dh); extern void scsi_remove_device(struct scsi_device *); extern int scsi_unregister_device_handler(struct scsi_device_handler *scsi_dh); void scsi_attach_vpd(struct scsi_device *sdev); +void scsi_cdl_check(struct scsi_device *sdev); extern struct scsi_device *scsi_device_from_queue(struct request_queue *q); extern int __must_check scsi_device_get(struct scsi_device *); From 1b22cfb14142aba7742d307c4f8d7006f919308c Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 May 2023 03:13:42 +0200 Subject: [PATCH 09/19] scsi: core: Allow enabling and disabling command duration limits Add the sysfs scsi_device attribute cdl_enable to allow a user to enable or disable a device command duration limits feature. CDL is disabled by default. This feature must be explicitly enabled by a user by setting the cdl_enable attribute to 1. The new function scsi_cdl_enable() does not do anything beside setting the cdl_enable field of struct scsi_device in the case of a (real) SCSI device (e.g. a SAS HDD). For ATA devices, the command duration limits feature needs to be enabled/disabled using the ATA feature sub-page of the control mode page. To do so, the scsi_cdl_enable() function checks if this mode page is supported using scsi_mode_sense(). If it is, scsi_mode_select() is used to enable and disable CDL. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Co-developed-by: Niklas Cassel Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-10-nks@flawful.org Signed-off-by: Martin K. Petersen --- Documentation/ABI/testing/sysfs-block-device | 13 ++++ drivers/scsi/scsi.c | 62 ++++++++++++++++++++ drivers/scsi/scsi_sysfs.c | 28 +++++++++ include/scsi/scsi_device.h | 2 + 4 files changed, 105 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-block-device b/Documentation/ABI/testing/sysfs-block-device index ffc3358cba57..2d543cfa4079 100644 --- a/Documentation/ABI/testing/sysfs-block-device +++ b/Documentation/ABI/testing/sysfs-block-device @@ -104,3 +104,16 @@ Contact: linux-scsi@vger.kernel.org Description: (RO) Indicates if the device supports the command duration limits feature found in some ATA and SCSI devices. + + +What: /sys/block/*/device/cdl_enable +Date: May, 2023 +KernelVersion: v6.5 +Contact: linux-scsi@vger.kernel.org +Description: + (RW) For a device supporting the command duration limits + feature, write to the file to turn on or off the feature. + By default this feature is turned off. + Writing "1" to this file enables the use of command duration + limits for read and write commands in the kernel and turns on + the feature on the device. Writing "0" disables the feature. diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index c03814ce23ca..c4bf99a842f3 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -651,6 +651,68 @@ void scsi_cdl_check(struct scsi_device *sdev) kfree(buf); } +/** + * scsi_cdl_enable - Enable or disable a SCSI device supports for Command + * Duration Limits + * @sdev: The target device + * @enable: the target state + */ +int scsi_cdl_enable(struct scsi_device *sdev, bool enable) +{ + struct scsi_mode_data data; + struct scsi_sense_hdr sshdr; + struct scsi_vpd *vpd; + bool is_ata = false; + char buf[64]; + int ret; + + if (!sdev->cdl_supported) + return -EOPNOTSUPP; + + rcu_read_lock(); + vpd = rcu_dereference(sdev->vpd_pg89); + if (vpd) + is_ata = true; + rcu_read_unlock(); + + /* + * For ATA devices, CDL needs to be enabled with a SET FEATURES command. + */ + if (is_ata) { + char *buf_data; + int len; + + ret = scsi_mode_sense(sdev, 0x08, 0x0a, 0xf2, buf, sizeof(buf), + 5 * HZ, 3, &data, NULL); + if (ret) + return -EINVAL; + + /* Enable CDL using the ATA feature page */ + len = min_t(size_t, sizeof(buf), + data.length - data.header_length - + data.block_descriptor_length); + buf_data = buf + data.header_length + + data.block_descriptor_length; + if (enable) + buf_data[4] = 0x02; + else + buf_data[4] = 0; + + ret = scsi_mode_select(sdev, 1, 0, buf_data, len, 5 * HZ, 3, + &data, &sshdr); + if (ret) { + if (scsi_sense_valid(&sshdr)) + scsi_print_sense_hdr(sdev, + dev_name(&sdev->sdev_gendev), &sshdr); + return ret; + } + } + + sdev->cdl_enable = enable; + + return 0; +} + /** * scsi_device_get - get an additional reference to a scsi_device * @sdev: device to get a reference to diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 98fcbbf1c1e3..60317676e45f 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -1222,6 +1222,33 @@ static DEVICE_ATTR(queue_ramp_up_period, S_IRUGO | S_IWUSR, sdev_show_queue_ramp_up_period, sdev_store_queue_ramp_up_period); +static ssize_t sdev_show_cdl_enable(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct scsi_device *sdev = to_scsi_device(dev); + + return sysfs_emit(buf, "%d\n", (int)sdev->cdl_enable); +} + +static ssize_t sdev_store_cdl_enable(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int ret; + bool v; + + if (kstrtobool(buf, &v)) + return -EINVAL; + + ret = scsi_cdl_enable(to_scsi_device(dev), v); + if (ret) + return ret; + + return count; +} +static DEVICE_ATTR(cdl_enable, S_IRUGO | S_IWUSR, + sdev_show_cdl_enable, sdev_store_cdl_enable); + static umode_t scsi_sdev_attr_is_visible(struct kobject *kobj, struct attribute *attr, int i) { @@ -1302,6 +1329,7 @@ static struct attribute *scsi_sdev_attrs[] = { #endif &dev_attr_queue_ramp_up_period.attr, &dev_attr_cdl_supported.attr, + &dev_attr_cdl_enable.attr, REF_EVT(media_change), REF_EVT(inquiry_change_reported), REF_EVT(capacity_change_reported), diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 6b8df9e253a0..b2cdb078b7bd 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -219,6 +219,7 @@ struct scsi_device { unsigned no_vpd_size:1; /* No VPD size reported in header */ unsigned cdl_supported:1; /* Command duration limits supported */ + unsigned cdl_enable:1; /* Enable/disable Command duration limits */ unsigned int queue_stopped; /* request queue is quiesced */ bool offline_already; /* Device offline message logged */ @@ -367,6 +368,7 @@ extern void scsi_remove_device(struct scsi_device *); extern int scsi_unregister_device_handler(struct scsi_device_handler *scsi_dh); void scsi_attach_vpd(struct scsi_device *sdev); void scsi_cdl_check(struct scsi_device *sdev); +int scsi_cdl_enable(struct scsi_device *sdev, bool enable); extern struct scsi_device *scsi_device_from_queue(struct request_queue *q); extern int __must_check scsi_device_get(struct scsi_device *); From e59e80cfef60366ce4dda96e9322a0b5947158a6 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 May 2023 03:13:43 +0200 Subject: [PATCH 10/19] scsi: sd: Set read/write command CDL index Introduce the command duration limits helper function sd_cdl_dld() to set the DLD bits of READ/WRITE 16 and READ/WRITE 32 commands to indicate to the device the command duration limit descriptor to apply to the commands. When command duration limits are enabled, sd_cdl_dld() obtains the index of the descriptor to apply to the command using the hints field of the request IO priority value (hints IOPRIO_HINT_DEV_DURATION_LIMIT_1 to IOPRIO_HINT_DEV_DURATION_LIMIT_7). If command duration limits is disabled (which is the default), the limit index "0" is always used to indicate "no limit" for a command. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Co-developed-by: Niklas Cassel Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-11-nks@flawful.org Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index a76092663246..3825e4d159fc 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1041,13 +1041,14 @@ static blk_status_t sd_setup_flush_cmnd(struct scsi_cmnd *cmd) static blk_status_t sd_setup_rw32_cmnd(struct scsi_cmnd *cmd, bool write, sector_t lba, unsigned int nr_blocks, - unsigned char flags) + unsigned char flags, unsigned int dld) { cmd->cmd_len = SD_EXT_CDB_SIZE; cmd->cmnd[0] = VARIABLE_LENGTH_CMD; cmd->cmnd[7] = 0x18; /* Additional CDB len */ cmd->cmnd[9] = write ? WRITE_32 : READ_32; cmd->cmnd[10] = flags; + cmd->cmnd[11] = dld & 0x07; put_unaligned_be64(lba, &cmd->cmnd[12]); put_unaligned_be32(lba, &cmd->cmnd[20]); /* Expected Indirect LBA */ put_unaligned_be32(nr_blocks, &cmd->cmnd[28]); @@ -1057,12 +1058,12 @@ static blk_status_t sd_setup_rw32_cmnd(struct scsi_cmnd *cmd, bool write, static blk_status_t sd_setup_rw16_cmnd(struct scsi_cmnd *cmd, bool write, sector_t lba, unsigned int nr_blocks, - unsigned char flags) + unsigned char flags, unsigned int dld) { cmd->cmd_len = 16; cmd->cmnd[0] = write ? WRITE_16 : READ_16; - cmd->cmnd[1] = flags; - cmd->cmnd[14] = 0; + cmd->cmnd[1] = flags | ((dld >> 2) & 0x01); + cmd->cmnd[14] = (dld & 0x03) << 6; cmd->cmnd[15] = 0; put_unaligned_be64(lba, &cmd->cmnd[2]); put_unaligned_be32(nr_blocks, &cmd->cmnd[10]); @@ -1114,6 +1115,31 @@ static blk_status_t sd_setup_rw6_cmnd(struct scsi_cmnd *cmd, bool write, return BLK_STS_OK; } +/* + * Check if a command has a duration limit set. If it does, and the target + * device supports CDL and the feature is enabled, return the limit + * descriptor index to use. Return 0 (no limit) otherwise. + */ +static int sd_cdl_dld(struct scsi_disk *sdkp, struct scsi_cmnd *scmd) +{ + struct scsi_device *sdp = sdkp->device; + int hint; + + if (!sdp->cdl_supported || !sdp->cdl_enable) + return 0; + + /* + * Use "no limit" if the request ioprio does not specify a duration + * limit hint. + */ + hint = IOPRIO_PRIO_HINT(req_get_ioprio(scsi_cmd_to_rq(scmd))); + if (hint < IOPRIO_HINT_DEV_DURATION_LIMIT_1 || + hint > IOPRIO_HINT_DEV_DURATION_LIMIT_7) + return 0; + + return (hint - IOPRIO_HINT_DEV_DURATION_LIMIT_1) + 1; +} + static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd) { struct request *rq = scsi_cmd_to_rq(cmd); @@ -1125,6 +1151,7 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd) unsigned int mask = logical_to_sectors(sdp, 1) - 1; bool write = rq_data_dir(rq) == WRITE; unsigned char protect, fua; + unsigned int dld; blk_status_t ret; unsigned int dif; bool dix; @@ -1174,6 +1201,7 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd) fua = rq->cmd_flags & REQ_FUA ? 0x8 : 0; dix = scsi_prot_sg_count(cmd); dif = scsi_host_dif_capable(cmd->device->host, sdkp->protection_type); + dld = sd_cdl_dld(sdkp, cmd); if (dif || dix) protect = sd_setup_protect_cmnd(cmd, dix, dif); @@ -1182,10 +1210,10 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd) if (protect && sdkp->protection_type == T10_PI_TYPE2_PROTECTION) { ret = sd_setup_rw32_cmnd(cmd, write, lba, nr_blocks, - protect | fua); + protect | fua, dld); } else if (sdp->use_16_for_rw || (nr_blocks > 0xffff)) { ret = sd_setup_rw16_cmnd(cmd, write, lba, nr_blocks, - protect | fua); + protect | fua, dld); } else if ((nr_blocks > 0xff) || (lba > 0x1fffff) || sdp->use_10_for_rw || protect) { ret = sd_setup_rw10_cmnd(cmd, write, lba, nr_blocks, From 390e2d1a587405a522dc6b433d45648f895a352c Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 11 May 2023 03:13:44 +0200 Subject: [PATCH 11/19] scsi: sd: Handle read/write CDL timeout failures Commands using a duration limit descriptor that has limit policies set to a value other than 0x0 may be failed by the device if one of the limits are exceeded. For such commands, since the failure is the result of the user duration limit configuration and workload, the commands should not be retried and terminated immediately. Furthermore, to allow the user to differentiate these "soft" failures from hard errors due to hardware problem, a different error code than EIO should be returned. There are 2 cases to consider: (1) The failure is due to a limit policy failing the command with a check condition sense key, that is, any limit policy other than 0xD. For this case, scsi_check_sense() is modified to detect failures with the ABORTED COMMAND sense key and the COMMAND TIMEOUT BEFORE PROCESSING or COMMAND TIMEOUT DURING PROCESSING or COMMAND TIMEOUT DURING PROCESSING DUE TO ERROR RECOVERY additional sense code. For these failures, a SUCCESS disposition is returned so that scsi_finish_command() is called to terminate the command. (2) The failure is due to a limit policy set to 0xD, which result in the command being terminated with a GOOD status, COMPLETED sense key, and DATA CURRENTLY UNAVAILABLE additional sense code. To handle this case, the scsi_check_sense() is modified to return a SUCCESS disposition so that scsi_finish_command() is called to terminate the command. In addition, scsi_decide_disposition() has to be modified to see if a command being terminated with GOOD status has sense data. This is as defined in SCSI Primary Commands - 6 (SPC-6), so all according to spec, even if GOOD status commands were not checked before. If scsi_check_sense() detects sense data representing a duration limit, scsi_check_sense() will set the newly introduced SCSI ML byte SCSIML_STAT_DL_TIMEOUT. This SCSI ML byte is checked in scsi_noretry_cmd(), so that a command that failed because of a CDL timeout cannot be retried. The SCSI ML byte is also checked in scsi_result_to_blk_status() to complete the command request with the BLK_STS_DURATION_LIMIT status, which result in the user seeing ETIME errors for the failed commands. Co-developed-by: Damien Le Moal Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-12-nks@flawful.org Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_error.c | 45 +++++++++++++++++++++++++++++++++++++++ drivers/scsi/scsi_lib.c | 4 ++++ drivers/scsi/scsi_priv.h | 1 + 3 files changed, 50 insertions(+) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 8b7d227bfe1c..c67cdcdc3ba8 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -536,6 +536,7 @@ static inline void set_scsi_ml_byte(struct scsi_cmnd *cmd, u8 status) */ enum scsi_disposition scsi_check_sense(struct scsi_cmnd *scmd) { + struct request *req = scsi_cmd_to_rq(scmd); struct scsi_device *sdev = scmd->device; struct scsi_sense_hdr sshdr; @@ -595,6 +596,22 @@ enum scsi_disposition scsi_check_sense(struct scsi_cmnd *scmd) if (sshdr.asc == 0x10) /* DIF */ return SUCCESS; + /* + * Check aborts due to command duration limit policy: + * ABORTED COMMAND additional sense code with the + * COMMAND TIMEOUT BEFORE PROCESSING or + * COMMAND TIMEOUT DURING PROCESSING or + * COMMAND TIMEOUT DURING PROCESSING DUE TO ERROR RECOVERY + * additional sense code qualifiers. + */ + if (sshdr.asc == 0x2e && + sshdr.ascq >= 0x01 && sshdr.ascq <= 0x03) { + set_scsi_ml_byte(scmd, SCSIML_STAT_DL_TIMEOUT); + req->cmd_flags |= REQ_FAILFAST_DEV; + req->rq_flags |= RQF_QUIET; + return SUCCESS; + } + if (sshdr.asc == 0x44 && sdev->sdev_bflags & BLIST_RETRY_ITF) return ADD_TO_MLQUEUE; if (sshdr.asc == 0xc1 && sshdr.ascq == 0x01 && @@ -691,6 +708,14 @@ enum scsi_disposition scsi_check_sense(struct scsi_cmnd *scmd) } return SUCCESS; + case COMPLETED: + if (sshdr.asc == 0x55 && sshdr.ascq == 0x0a) { + set_scsi_ml_byte(scmd, SCSIML_STAT_DL_TIMEOUT); + req->cmd_flags |= REQ_FAILFAST_DEV; + req->rq_flags |= RQF_QUIET; + } + return SUCCESS; + default: return SUCCESS; } @@ -785,6 +810,14 @@ static enum scsi_disposition scsi_eh_completed_normally(struct scsi_cmnd *scmd) switch (get_status_byte(scmd)) { case SAM_STAT_GOOD: scsi_handle_queue_ramp_up(scmd->device); + if (scmd->sense_buffer && SCSI_SENSE_VALID(scmd)) + /* + * If we have sense data, call scsi_check_sense() in + * order to set the correct SCSI ML byte (if any). + * No point in checking the return value, since the + * command has already completed successfully. + */ + scsi_check_sense(scmd); fallthrough; case SAM_STAT_COMMAND_TERMINATED: return SUCCESS; @@ -1807,6 +1840,10 @@ bool scsi_noretry_cmd(struct scsi_cmnd *scmd) return !!(req->cmd_flags & REQ_FAILFAST_DRIVER); } + /* Never retry commands aborted due to a duration limit timeout */ + if (scsi_ml_byte(scmd->result) == SCSIML_STAT_DL_TIMEOUT) + return true; + if (!scsi_status_is_check_condition(scmd->result)) return false; @@ -1966,6 +2003,14 @@ enum scsi_disposition scsi_decide_disposition(struct scsi_cmnd *scmd) if (scmd->cmnd[0] == REPORT_LUNS) scmd->device->sdev_target->expecting_lun_change = 0; scsi_handle_queue_ramp_up(scmd->device); + if (scmd->sense_buffer && SCSI_SENSE_VALID(scmd)) + /* + * If we have sense data, call scsi_check_sense() in + * order to set the correct SCSI ML byte (if any). + * No point in checking the return value, since the + * command has already completed successfully. + */ + scsi_check_sense(scmd); fallthrough; case SAM_STAT_COMMAND_TERMINATED: return SUCCESS; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 633c4e8af830..b894432ca0b9 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -601,6 +601,8 @@ static blk_status_t scsi_result_to_blk_status(int result) return BLK_STS_MEDIUM; case SCSIML_STAT_TGT_FAILURE: return BLK_STS_TARGET; + case SCSIML_STAT_DL_TIMEOUT: + return BLK_STS_DURATION_LIMIT; } switch (host_byte(result)) { @@ -798,6 +800,8 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result) blk_stat = BLK_STS_ZONE_OPEN_RESOURCE; } break; + case COMPLETED: + fallthrough; default: action = ACTION_FAIL; break; diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 74324fba4281..f42388ecb024 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -27,6 +27,7 @@ enum scsi_ml_status { SCSIML_STAT_NOSPC = 0x02, /* Space allocation on the dev failed */ SCSIML_STAT_MED_ERROR = 0x03, /* Medium error */ SCSIML_STAT_TGT_FAILURE = 0x04, /* Permanent target failure */ + SCSIML_STAT_DL_TIMEOUT = 0x05, /* Command Duration Limit timeout */ }; static inline u8 scsi_ml_byte(int result) From 91a8967ca7f4b8eabe021b1ba974a992cfca2a07 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 11 May 2023 03:13:45 +0200 Subject: [PATCH 12/19] scsi: ata: libata-scsi: Remove unnecessary !cmd checks There is no need to check if !cmd as this can only happen for ATA internal commands which uses the ATA internal tag (32). Most users of ata_scsi_set_sense() are from _xlat functions that translate a scsicmd to an ATA command. These obviously have a qc->scsicmd. ata_scsi_qc_complete() can also call ata_scsi_set_sense() via ata_gen_passthru_sense() / ata_gen_ata_sense(), called via ata_scsi_qc_complete(). This callback is only called for translated commands, so it also has a qc->scsicmd. ata_eh_analyze_ncq_error(): the NCQ error log can only contain a 0-31 value, so it will never be able to get the ATA internal tag (32). ata_eh_request_sense(): only called by ata_eh_analyze_tf(), which is only called when iteratating the QCs using ata_qc_for_each_raw(), which does not include the internal tag. Since there is no existing call site where cmd can be NULL, remove the !cmd check from ata_scsi_set_sense() and ata_scsi_set_sense_information(). Suggested-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-13-nks@flawful.org Signed-off-by: Martin K. Petersen --- drivers/ata/libata-scsi.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 7bb12deab70c..072785808751 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -209,9 +209,6 @@ void ata_scsi_set_sense(struct ata_device *dev, struct scsi_cmnd *cmd, { bool d_sense = (dev->flags & ATA_DFLAG_D_SENSE); - if (!cmd) - return; - scsi_build_sense(cmd, d_sense, sk, asc, ascq); } @@ -221,9 +218,6 @@ void ata_scsi_set_sense_information(struct ata_device *dev, { u64 information; - if (!cmd) - return; - information = ata_tf_read_block(tf, dev); if (information == U64_MAX) return; From 24aeebbf8ea94b5c0cde06350b06e79f5beb28ae Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 11 May 2023 03:13:46 +0200 Subject: [PATCH 13/19] scsi: ata: libata: Change ata_eh_request_sense() to not set CHECK_CONDITION Currently, ata_eh_request_sense() unconditionally sets the scsicmd->result to SAM_STAT_CHECK_CONDITION. For Command Duration Limits policy 0xD: The device shall complete the command without error (SAM_STAT_GOOD) with the additional sense code set to DATA CURRENTLY UNAVAILABLE. It is perfectly fine to have sense data for a command that returned completion without error. In order to support for CDL policy 0xD, we have to remove this assumption that having sense data means that the command failed (SAM_STAT_CHECK_CONDITION). Change ata_eh_request_sense() to not set SAM_STAT_CHECK_CONDITION, and instead move the setting of SAM_STAT_CHECK_CONDITION to the single caller that wants SAM_STAT_CHECK_CONDITION set, that way ata_eh_request_sense() can be reused in a follow-up patch that adds support for CDL policy 0xD. The only caller of ata_eh_request_sense() is protected by: if (!(qc->flags & ATA_QCFLAG_SENSE_VALID)), so we can remove this duplicated check from ata_eh_request_sense() itself. Additionally, ata_eh_request_sense() is only called from ata_eh_analyze_tf(), which is only called when iteratating the QCs using ata_qc_for_each_raw(), which does not include the internal tag, so cmd can never be NULL (all non-internal commands have qc->scsicmd set), so remove the !cmd check as well. Reviewed-by: Hannes Reinecke Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-14-nks@flawful.org Signed-off-by: Martin K. Petersen --- drivers/ata/libata-eh.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index a6c901811802..598ae07195b6 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -1401,8 +1401,11 @@ unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key) * * LOCKING: * Kernel thread context (may sleep). + * + * RETURNS: + * true if sense data could be fetched, false otherwise. */ -static void ata_eh_request_sense(struct ata_queued_cmd *qc) +static bool ata_eh_request_sense(struct ata_queued_cmd *qc) { struct scsi_cmnd *cmd = qc->scsicmd; struct ata_device *dev = qc->dev; @@ -1411,15 +1414,12 @@ static void ata_eh_request_sense(struct ata_queued_cmd *qc) if (ata_port_is_frozen(qc->ap)) { ata_dev_warn(dev, "sense data available but port frozen\n"); - return; + return false; } - if (!cmd || qc->flags & ATA_QCFLAG_SENSE_VALID) - return; - if (!ata_id_sense_reporting_enabled(dev->id)) { ata_dev_warn(qc->dev, "sense data reporting disabled\n"); - return; + return false; } ata_tf_init(dev, &tf); @@ -1432,13 +1432,19 @@ static void ata_eh_request_sense(struct ata_queued_cmd *qc) /* Ignore err_mask; ATA_ERR might be set */ if (tf.status & ATA_SENSE) { if (ata_scsi_sense_is_valid(tf.lbah, tf.lbam, tf.lbal)) { - ata_scsi_set_sense(dev, cmd, tf.lbah, tf.lbam, tf.lbal); + /* Set sense without also setting scsicmd->result */ + scsi_build_sense_buffer(dev->flags & ATA_DFLAG_D_SENSE, + cmd->sense_buffer, tf.lbah, + tf.lbam, tf.lbal); qc->flags |= ATA_QCFLAG_SENSE_VALID; + return true; } } else { ata_dev_warn(dev, "request sense failed stat %02x emask %x\n", tf.status, err_mask); } + + return false; } /** @@ -1588,8 +1594,9 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc) * was not included in the NCQ command error log * (i.e. NCQ autosense is not supported by the device). */ - if (!(qc->flags & ATA_QCFLAG_SENSE_VALID) && (stat & ATA_SENSE)) - ata_eh_request_sense(qc); + if (!(qc->flags & ATA_QCFLAG_SENSE_VALID) && + (stat & ATA_SENSE) && ata_eh_request_sense(qc)) + set_status_byte(qc->scsicmd, SAM_STAT_CHECK_CONDITION); if (err & ATA_ICRC) qc->err_mask |= AC_ERR_ATA_BUS; if (err & (ATA_UNC | ATA_AMNF)) From 62e4a60e0cdb540b314061469e025fd834ff300c Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 May 2023 03:13:47 +0200 Subject: [PATCH 14/19] scsi: ata: libata: Detect support for command duration limits Use the supported capabilities identify device data log page to detect if a device supports the command duration limits feature. For devices supporting this feature, set the device flag ATA_DFLAG_CDL. To support SCSI-ATA translation, retrieve the command duration limits log page 18h and cache this page content using the cdl array added to the ata_device data structure. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Co-developed-by: Niklas Cassel Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-15-nks@flawful.org Signed-off-by: Martin K. Petersen --- drivers/ata/libata-core.c | 52 ++++++++++++++++++++++++++++++++++++++- drivers/ata/libata-scsi.c | 17 ++++++------- include/linux/ata.h | 5 +++- include/linux/libata.h | 25 +++++++++++-------- 4 files changed, 78 insertions(+), 21 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 8bf612bdd61a..83fe037f63b9 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2367,6 +2367,54 @@ static void ata_dev_config_trusted(struct ata_device *dev) dev->flags |= ATA_DFLAG_TRUSTED; } +static void ata_dev_config_cdl(struct ata_device *dev) +{ + struct ata_port *ap = dev->link->ap; + unsigned int err_mask; + u64 val; + + if (ata_id_major_version(dev->id) < 12) + goto not_supported; + + if (!ata_log_supported(dev, ATA_LOG_IDENTIFY_DEVICE) || + !ata_identify_page_supported(dev, ATA_LOG_SUPPORTED_CAPABILITIES)) + goto not_supported; + + err_mask = ata_read_log_page(dev, ATA_LOG_IDENTIFY_DEVICE, + ATA_LOG_SUPPORTED_CAPABILITIES, + ap->sector_buf, 1); + if (err_mask) + goto not_supported; + + /* Check Command Duration Limit Supported bits */ + val = get_unaligned_le64(&ap->sector_buf[168]); + if (!(val & BIT_ULL(63)) || !(val & BIT_ULL(0))) + goto not_supported; + + /* Warn the user if command duration guideline is not supported */ + if (!(val & BIT_ULL(1))) + ata_dev_warn(dev, + "Command duration guideline is not supported\n"); + + /* + * Command duration limits is supported: cache the CDL log page 18h + * (command duration descriptors). + */ + err_mask = ata_read_log_page(dev, ATA_LOG_CDL, 0, ap->sector_buf, 1); + if (err_mask) { + ata_dev_warn(dev, "Read Command Duration Limits log failed\n"); + goto not_supported; + } + + memcpy(dev->cdl, ap->sector_buf, ATA_LOG_CDL_SIZE); + dev->flags |= ATA_DFLAG_CDL; + + return; + +not_supported: + dev->flags &= ~ATA_DFLAG_CDL; +} + static int ata_dev_config_lba(struct ata_device *dev) { const u16 *id = dev->id; @@ -2534,13 +2582,14 @@ static void ata_dev_print_features(struct ata_device *dev) return; ata_dev_info(dev, - "Features:%s%s%s%s%s%s%s\n", + "Features:%s%s%s%s%s%s%s%s\n", dev->flags & ATA_DFLAG_FUA ? " FUA" : "", dev->flags & ATA_DFLAG_TRUSTED ? " Trust" : "", dev->flags & ATA_DFLAG_DA ? " Dev-Attention" : "", dev->flags & ATA_DFLAG_DEVSLP ? " Dev-Sleep" : "", dev->flags & ATA_DFLAG_NCQ_SEND_RECV ? " NCQ-sndrcv" : "", dev->flags & ATA_DFLAG_NCQ_PRIO ? " NCQ-prio" : "", + dev->flags & ATA_DFLAG_CDL ? " CDL" : "", dev->cpr_log ? " CPR" : ""); } @@ -2702,6 +2751,7 @@ int ata_dev_configure(struct ata_device *dev) ata_dev_config_zac(dev); ata_dev_config_trusted(dev); ata_dev_config_cpr(dev); + ata_dev_config_cdl(dev); dev->cdb_len = 32; if (print_info) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 072785808751..3434fec8ca5c 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -47,15 +47,14 @@ typedef unsigned int (*ata_xlat_func_t)(struct ata_queued_cmd *qc); static struct ata_device *__ata_scsi_find_dev(struct ata_port *ap, const struct scsi_device *scsidev); -#define RW_RECOVERY_MPAGE 0x1 -#define RW_RECOVERY_MPAGE_LEN 12 -#define CACHE_MPAGE 0x8 -#define CACHE_MPAGE_LEN 20 -#define CONTROL_MPAGE 0xa -#define CONTROL_MPAGE_LEN 12 -#define ALL_MPAGES 0x3f -#define ALL_SUB_MPAGES 0xff - +#define RW_RECOVERY_MPAGE 0x1 +#define RW_RECOVERY_MPAGE_LEN 12 +#define CACHE_MPAGE 0x8 +#define CACHE_MPAGE_LEN 20 +#define CONTROL_MPAGE 0xa +#define CONTROL_MPAGE_LEN 12 +#define ALL_MPAGES 0x3f +#define ALL_SUB_MPAGES 0xff static const u8 def_rw_recovery_mpage[RW_RECOVERY_MPAGE_LEN] = { RW_RECOVERY_MPAGE, diff --git a/include/linux/ata.h b/include/linux/ata.h index c224dbddb9b2..1eda46b63dcc 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -322,15 +322,18 @@ enum { ATA_LOG_SATA_NCQ = 0x10, ATA_LOG_NCQ_NON_DATA = 0x12, ATA_LOG_NCQ_SEND_RECV = 0x13, + ATA_LOG_CDL = 0x18, + ATA_LOG_CDL_SIZE = ATA_SECT_SIZE, ATA_LOG_IDENTIFY_DEVICE = 0x30, ATA_LOG_CONCURRENT_POSITIONING_RANGES = 0x47, /* Identify device log pages: */ + ATA_LOG_SUPPORTED_CAPABILITIES = 0x03, ATA_LOG_SECURITY = 0x06, ATA_LOG_SATA_SETTINGS = 0x08, ATA_LOG_ZONED_INFORMATION = 0x09, - /* Identify device SATA settings log:*/ + /* Identify device SATA settings log: */ ATA_LOG_DEVSLP_OFFSET = 0x30, ATA_LOG_DEVSLP_SIZE = 0x08, ATA_LOG_DEVSLP_MDAT = 0x00, diff --git a/include/linux/libata.h b/include/linux/libata.h index 311cd93377c7..e8a45f7f3f5c 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -94,17 +94,18 @@ enum { ATA_DFLAG_DMADIR = (1 << 10), /* device requires DMADIR */ ATA_DFLAG_NCQ_SEND_RECV = (1 << 11), /* device supports NCQ SEND and RECV */ ATA_DFLAG_NCQ_PRIO = (1 << 12), /* device supports NCQ priority */ - ATA_DFLAG_CFG_MASK = (1 << 13) - 1, + ATA_DFLAG_CDL = (1 << 13), /* supports cmd duration limits */ + ATA_DFLAG_CFG_MASK = (1 << 14) - 1, - ATA_DFLAG_PIO = (1 << 13), /* device limited to PIO mode */ - ATA_DFLAG_NCQ_OFF = (1 << 14), /* device limited to non-NCQ mode */ - ATA_DFLAG_SLEEPING = (1 << 15), /* device is sleeping */ - ATA_DFLAG_DUBIOUS_XFER = (1 << 16), /* data transfer not verified */ - ATA_DFLAG_NO_UNLOAD = (1 << 17), /* device doesn't support unload */ - ATA_DFLAG_UNLOCK_HPA = (1 << 18), /* unlock HPA */ - ATA_DFLAG_INIT_MASK = (1 << 19) - 1, + ATA_DFLAG_PIO = (1 << 14), /* device limited to PIO mode */ + ATA_DFLAG_NCQ_OFF = (1 << 15), /* device limited to non-NCQ mode */ + ATA_DFLAG_SLEEPING = (1 << 16), /* device is sleeping */ + ATA_DFLAG_DUBIOUS_XFER = (1 << 17), /* data transfer not verified */ + ATA_DFLAG_NO_UNLOAD = (1 << 18), /* device doesn't support unload */ + ATA_DFLAG_UNLOCK_HPA = (1 << 19), /* unlock HPA */ + ATA_DFLAG_INIT_MASK = (1 << 20) - 1, - ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 19), /* Priority cmds sent to dev */ + ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 20), /* Priority cmds sent to dev */ ATA_DFLAG_DETACH = (1 << 24), ATA_DFLAG_DETACHED = (1 << 25), ATA_DFLAG_DA = (1 << 26), /* device supports Device Attention */ @@ -115,7 +116,8 @@ enum { ATA_DFLAG_FEATURES_MASK = (ATA_DFLAG_TRUSTED | ATA_DFLAG_DA | \ ATA_DFLAG_DEVSLP | ATA_DFLAG_NCQ_SEND_RECV | \ - ATA_DFLAG_NCQ_PRIO | ATA_DFLAG_FUA), + ATA_DFLAG_NCQ_PRIO | ATA_DFLAG_FUA | \ + ATA_DFLAG_CDL), ATA_DEV_UNKNOWN = 0, /* unknown device */ ATA_DEV_ATA = 1, /* ATA device */ @@ -709,6 +711,9 @@ struct ata_device { /* Concurrent positioning ranges */ struct ata_cpr_log *cpr_log; + /* Command Duration Limits log support */ + u8 cdl[ATA_LOG_CDL_SIZE]; + /* error history */ int spdn_cnt; /* ering is CLEAR_END, read comment above CLEAR_END */ From 0de558015286374443cb1920d32bbf54bd045eb7 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 May 2023 03:13:48 +0200 Subject: [PATCH 15/19] scsi: ata: libata-scsi: Handle CDL bits in ata_scsiop_maint_in() For a scsi MAINTENANCE_IN/MI_REPORT_SUPPORTED_OPERATION_CODES operation, add the translation of the rwcdlp and cdlp bits for the READ 16 and WRITE 16 commands. If the ATA device does not support command duration limits, these bits are always 0. If the ATA device supports command duration limits, the rwcdlp bit is set to 1 for READ 16 and WRITE 16 and the cdlp bits are set to 0x1 for READ 16 and 0x2 for WRITE 16. These correspond to the T2A mode page containing the read descriptors and to the T2B mode page containing the write descriptors, as defined in SAT-5. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-16-nks@flawful.org Signed-off-by: Martin K. Petersen --- drivers/ata/libata-scsi.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 3434fec8ca5c..4245242664d9 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3235,7 +3235,7 @@ static unsigned int ata_scsiop_maint_in(struct ata_scsi_args *args, u8 *rbuf) { struct ata_device *dev = args->dev; u8 *cdb = args->cmd->cmnd; - u8 supported = 0; + u8 supported = 0, cdlp = 0, rwcdlp = 0; unsigned int err = 0; if (cdb[2] != 1 && cdb[2] != 3) { @@ -3262,10 +3262,8 @@ static unsigned int ata_scsiop_maint_in(struct ata_scsi_args *args, u8 *rbuf) case MAINTENANCE_IN: case READ_6: case READ_10: - case READ_16: case WRITE_6: case WRITE_10: - case WRITE_16: case ATA_12: case ATA_16: case VERIFY: @@ -3275,6 +3273,28 @@ static unsigned int ata_scsiop_maint_in(struct ata_scsi_args *args, u8 *rbuf) case START_STOP: supported = 3; break; + case READ_16: + supported = 3; + if (dev->flags & ATA_DFLAG_CDL) { + /* + * CDL read descriptors map to the T2A page, that is, + * rwcdlp = 0x01 and cdlp = 0x01 + */ + rwcdlp = 0x01; + cdlp = 0x01 << 3; + } + break; + case WRITE_16: + supported = 3; + if (dev->flags & ATA_DFLAG_CDL) { + /* + * CDL write descriptors map to the T2B page, that is, + * rwcdlp = 0x01 and cdlp = 0x02 + */ + rwcdlp = 0x01; + cdlp = 0x02 << 3; + } + break; case ZBC_IN: case ZBC_OUT: if (ata_id_zoned_cap(dev->id) || @@ -3290,7 +3310,9 @@ static unsigned int ata_scsiop_maint_in(struct ata_scsi_args *args, u8 *rbuf) break; } out: - rbuf[1] = supported; /* supported */ + /* One command format */ + rbuf[0] = rwcdlp; + rbuf[1] = cdlp | supported; return err; } From 673b2fe6ff1da29d9e70bd484903964772dcae3d Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 May 2023 03:13:49 +0200 Subject: [PATCH 16/19] scsi: ata: libata-scsi: Add support for CDL pages mode sense Modify ata_scsiop_mode_sense() and ata_msense_control() to support mode sense access to the T2A and T2B sub-pages of the control mode page. ata_msense_control() is modified to support sub-pages. The T2A sub-page is generated using the read descriptors of the command duration limits log page 18h. The T2B sub-page is generated using the write descriptors of the same log page. With the addition of these sub-pages, getting all sub-pages of the control mode page is also supported by increasing the value of ATA_SCSI_RBUF_SIZE from 576B up to 2048B to ensure that all sub-pages fit in the fill buffer. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Co-developed-by: Niklas Cassel Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-17-nks@flawful.org Signed-off-by: Martin K. Petersen --- drivers/ata/libata-scsi.c | 148 ++++++++++++++++++++++++++++++++------ 1 file changed, 127 insertions(+), 21 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 4245242664d9..4a4c6405d52e 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -37,7 +37,7 @@ #include "libata.h" #include "libata-transport.h" -#define ATA_SCSI_RBUF_SIZE 576 +#define ATA_SCSI_RBUF_SIZE 2048 static DEFINE_SPINLOCK(ata_scsi_rbuf_lock); static u8 ata_scsi_rbuf[ATA_SCSI_RBUF_SIZE]; @@ -55,6 +55,9 @@ static struct ata_device *__ata_scsi_find_dev(struct ata_port *ap, #define CONTROL_MPAGE_LEN 12 #define ALL_MPAGES 0x3f #define ALL_SUB_MPAGES 0xff +#define CDL_T2A_SUB_MPAGE 0x07 +#define CDL_T2B_SUB_MPAGE 0x08 +#define CDL_T2_SUB_MPAGE_LEN 232 static const u8 def_rw_recovery_mpage[RW_RECOVERY_MPAGE_LEN] = { RW_RECOVERY_MPAGE, @@ -2196,10 +2199,98 @@ static unsigned int ata_msense_caching(u16 *id, u8 *buf, bool changeable) return sizeof(def_cache_mpage); } +/* + * Simulate MODE SENSE control mode page, sub-page 0. + */ +static unsigned int ata_msense_control_spg0(struct ata_device *dev, u8 *buf, + bool changeable) +{ + modecpy(buf, def_control_mpage, + sizeof(def_control_mpage), changeable); + if (changeable) { + /* ata_mselect_control() */ + buf[2] |= (1 << 2); + } else { + bool d_sense = (dev->flags & ATA_DFLAG_D_SENSE); + + /* descriptor format sense data */ + buf[2] |= (d_sense << 2); + } + + return sizeof(def_control_mpage); +} + +/* + * Translate an ATA duration limit in microseconds to a SCSI duration limit + * using the t2cdlunits 0xa (10ms). Since the SCSI duration limits are 2-bytes + * only, take care of overflows. + */ +static inline u16 ata_xlat_cdl_limit(u8 *buf) +{ + u32 limit = get_unaligned_le32(buf); + + return min_t(u32, limit / 10000, 65535); +} + +/* + * Simulate MODE SENSE control mode page, sub-pages 07h and 08h + * (command duration limits T2A and T2B mode pages). + */ +static unsigned int ata_msense_control_spgt2(struct ata_device *dev, u8 *buf, + u8 spg) +{ + u8 *b, *cdl = dev->cdl, *desc; + u32 policy; + int i; + + /* + * Fill the subpage. The first four bytes of the T2A/T2B mode pages + * are a header. The PAGE LENGTH field is the size of the page + * excluding the header. + */ + buf[0] = CONTROL_MPAGE; + buf[1] = spg; + put_unaligned_be16(CDL_T2_SUB_MPAGE_LEN - 4, &buf[2]); + if (spg == CDL_T2A_SUB_MPAGE) { + /* + * Read descriptors map to the T2A page: + * set perf_vs_duration_guidleine. + */ + buf[7] = (cdl[0] & 0x03) << 4; + desc = cdl + 64; + } else { + /* Write descriptors map to the T2B page */ + desc = cdl + 288; + } + + /* Fill the T2 page descriptors */ + b = &buf[8]; + policy = get_unaligned_le32(&cdl[0]); + for (i = 0; i < 7; i++, b += 32, desc += 32) { + /* t2cdlunits: fixed to 10ms */ + b[0] = 0x0a; + + /* Max inactive time and its policy */ + put_unaligned_be16(ata_xlat_cdl_limit(&desc[8]), &b[2]); + b[6] = ((policy >> 8) & 0x0f) << 4; + + /* Max active time and its policy */ + put_unaligned_be16(ata_xlat_cdl_limit(&desc[4]), &b[4]); + b[6] |= (policy >> 4) & 0x0f; + + /* Command duration guideline and its policy */ + put_unaligned_be16(ata_xlat_cdl_limit(&desc[16]), &b[10]); + b[14] = policy & 0x0f; + } + + return CDL_T2_SUB_MPAGE_LEN; +} + /** * ata_msense_control - Simulate MODE SENSE control mode page * @dev: ATA device of interest * @buf: output buffer + * @spg: sub-page code * @changeable: whether changeable parameters are requested * * Generate a generic MODE SENSE control mode page. @@ -2208,17 +2299,24 @@ static unsigned int ata_msense_caching(u16 *id, u8 *buf, bool changeable) * None. */ static unsigned int ata_msense_control(struct ata_device *dev, u8 *buf, - bool changeable) + u8 spg, bool changeable) { - modecpy(buf, def_control_mpage, sizeof(def_control_mpage), changeable); - if (changeable) { - buf[2] |= (1 << 2); /* ata_mselect_control() */ - } else { - bool d_sense = (dev->flags & ATA_DFLAG_D_SENSE); + unsigned int n; - buf[2] |= (d_sense << 2); /* descriptor format sense data */ + switch (spg) { + case 0: + return ata_msense_control_spg0(dev, buf, changeable); + case CDL_T2A_SUB_MPAGE: + case CDL_T2B_SUB_MPAGE: + return ata_msense_control_spgt2(dev, buf, spg); + case ALL_SUB_MPAGES: + n = ata_msense_control_spg0(dev, buf, changeable); + n += ata_msense_control_spgt2(dev, buf + n, CDL_T2A_SUB_MPAGE); + n += ata_msense_control_spgt2(dev, buf + n, CDL_T2A_SUB_MPAGE); + return n; + default: + return 0; } - return sizeof(def_control_mpage); } /** @@ -2291,13 +2389,24 @@ static unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf) pg = scsicmd[2] & 0x3f; spg = scsicmd[3]; + /* - * No mode subpages supported (yet) but asking for _all_ - * subpages may be valid + * Supported subpages: all subpages and sub-pages 07h and 08h of + * the control page. */ - if (spg && (spg != ALL_SUB_MPAGES)) { - fp = 3; - goto invalid_fld; + if (spg) { + switch (spg) { + case ALL_SUB_MPAGES: + break; + case CDL_T2A_SUB_MPAGE: + case CDL_T2B_SUB_MPAGE: + if (dev->flags & ATA_DFLAG_CDL && pg == CONTROL_MPAGE) + break; + fallthrough; + default: + fp = 3; + goto invalid_fld; + } } switch(pg) { @@ -2310,13 +2419,13 @@ static unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf) break; case CONTROL_MPAGE: - p += ata_msense_control(args->dev, p, page_control == 1); + p += ata_msense_control(args->dev, p, spg, page_control == 1); break; case ALL_MPAGES: p += ata_msense_rw_recovery(p, page_control == 1); p += ata_msense_caching(args->id, p, page_control == 1); - p += ata_msense_control(args->dev, p, page_control == 1); + p += ata_msense_control(args->dev, p, spg, page_control == 1); break; default: /* invalid page code */ @@ -2335,10 +2444,7 @@ static unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf) memcpy(rbuf + 4, sat_blk_desc, sizeof(sat_blk_desc)); } } else { - unsigned int output_len = p - rbuf - 2; - - rbuf[0] = output_len >> 8; - rbuf[1] = output_len; + put_unaligned_be16(p - rbuf - 2, &rbuf[0]); rbuf[3] |= dpofua; if (ebd) { rbuf[7] = sizeof(sat_blk_desc); @@ -3637,7 +3743,7 @@ static int ata_mselect_control(struct ata_queued_cmd *qc, /* * Check that read-only bits are not modified. */ - ata_msense_control(dev, mpage, false); + ata_msense_control_spg0(dev, mpage, false); for (i = 0; i < CONTROL_MPAGE_LEN - 2; i++) { if (i == 0) continue; From df60f9c64576d6d05b59ec5c34addcd61ef1efb0 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 May 2023 03:13:50 +0200 Subject: [PATCH 17/19] scsi: ata: libata: Add ATA feature control sub-page translation Add support for the ATA feature control sub-page of the control mode page to enable/disable the command duration limits feature using the cdl_ctrl field of the ATA feature control sub-page. Both mode sense and mode select translation are supported. For mode sense, the ata device flag ATA_DFLAG_CDL_ENABLED is used to cache the status of the command duration limits feature. Enabling this feature is done using a SET FEATURES command with a cdl action set to 1 when the page cdl_ctrl field value is 0x2 (T2A and T2B pages supported). If this field is 0, CDL is disabled using the SET FEATURES command with a cdl action set to 0. Since a device CDL and NCQ priority features should not be used simultaneously, ata_mselect_control_ata_feature() returns an error when attempting to enable CDL with the device priority feature enabled. Conversely, the function ata_ncq_prio_enable_store() used to enable the use of the device NCQ priority feature through sysfs is modified to return an error if the device CDL feature is enabled. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Co-developed-by: Niklas Cassel Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-18-nks@flawful.org Signed-off-by: Martin K. Petersen --- drivers/ata/libata-core.c | 40 ++++++++- drivers/ata/libata-sata.c | 11 ++- drivers/ata/libata-scsi.c | 167 ++++++++++++++++++++++++++++++++------ include/linux/ata.h | 3 + include/linux/libata.h | 1 + 5 files changed, 193 insertions(+), 29 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 83fe037f63b9..cd7aaf202397 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2371,13 +2371,15 @@ static void ata_dev_config_cdl(struct ata_device *dev) { struct ata_port *ap = dev->link->ap; unsigned int err_mask; + bool cdl_enabled; u64 val; if (ata_id_major_version(dev->id) < 12) goto not_supported; if (!ata_log_supported(dev, ATA_LOG_IDENTIFY_DEVICE) || - !ata_identify_page_supported(dev, ATA_LOG_SUPPORTED_CAPABILITIES)) + !ata_identify_page_supported(dev, ATA_LOG_SUPPORTED_CAPABILITIES) || + !ata_identify_page_supported(dev, ATA_LOG_CURRENT_SETTINGS)) goto not_supported; err_mask = ata_read_log_page(dev, ATA_LOG_IDENTIFY_DEVICE, @@ -2396,6 +2398,40 @@ static void ata_dev_config_cdl(struct ata_device *dev) ata_dev_warn(dev, "Command duration guideline is not supported\n"); + /* + * If CDL is marked as enabled, make sure the feature is enabled too. + * Conversely, if CDL is disabled, make sure the feature is turned off. + */ + err_mask = ata_read_log_page(dev, ATA_LOG_IDENTIFY_DEVICE, + ATA_LOG_CURRENT_SETTINGS, + ap->sector_buf, 1); + if (err_mask) + goto not_supported; + + val = get_unaligned_le64(&ap->sector_buf[8]); + cdl_enabled = val & BIT_ULL(63) && val & BIT_ULL(21); + if (dev->flags & ATA_DFLAG_CDL_ENABLED) { + if (!cdl_enabled) { + /* Enable CDL on the device */ + err_mask = ata_dev_set_feature(dev, SETFEATURES_CDL, 1); + if (err_mask) { + ata_dev_err(dev, + "Enable CDL feature failed\n"); + goto not_supported; + } + } + } else { + if (cdl_enabled) { + /* Disable CDL on the device */ + err_mask = ata_dev_set_feature(dev, SETFEATURES_CDL, 0); + if (err_mask) { + ata_dev_err(dev, + "Disable CDL feature failed\n"); + goto not_supported; + } + } + } + /* * Command duration limits is supported: cache the CDL log page 18h * (command duration descriptors). @@ -2412,7 +2448,7 @@ static void ata_dev_config_cdl(struct ata_device *dev) return; not_supported: - dev->flags &= ~ATA_DFLAG_CDL; + dev->flags &= ~(ATA_DFLAG_CDL | ATA_DFLAG_CDL_ENABLED); } static int ata_dev_config_lba(struct ata_device *dev) diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index f3e7396e3191..57cb33060c9d 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -907,10 +907,17 @@ static ssize_t ata_ncq_prio_enable_store(struct device *device, goto unlock; } - if (input) + if (input) { + if (dev->flags & ATA_DFLAG_CDL_ENABLED) { + ata_dev_err(dev, + "CDL must be disabled to enable NCQ priority\n"); + rc = -EINVAL; + goto unlock; + } dev->flags |= ATA_DFLAG_NCQ_PRIO_ENABLED; - else + } else { dev->flags &= ~ATA_DFLAG_NCQ_PRIO_ENABLED; + } unlock: spin_unlock_irq(ap->lock); diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 4a4c6405d52e..91db4e7f4906 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -58,6 +58,8 @@ static struct ata_device *__ata_scsi_find_dev(struct ata_port *ap, #define CDL_T2A_SUB_MPAGE 0x07 #define CDL_T2B_SUB_MPAGE 0x08 #define CDL_T2_SUB_MPAGE_LEN 232 +#define ATA_FEATURE_SUB_MPAGE 0xf2 +#define ATA_FEATURE_SUB_MPAGE_LEN 16 static const u8 def_rw_recovery_mpage[RW_RECOVERY_MPAGE_LEN] = { RW_RECOVERY_MPAGE, @@ -2286,6 +2288,31 @@ static unsigned int ata_msense_control_spgt2(struct ata_device *dev, u8 *buf, return CDL_T2_SUB_MPAGE_LEN; } +/* + * Simulate MODE SENSE control mode page, sub-page f2h + * (ATA feature control mode page). + */ +static unsigned int ata_msense_control_ata_feature(struct ata_device *dev, + u8 *buf) +{ + /* PS=0, SPF=1 */ + buf[0] = CONTROL_MPAGE | (1 << 6); + buf[1] = ATA_FEATURE_SUB_MPAGE; + + /* + * The first four bytes of ATA Feature Control mode page are a header. + * The PAGE LENGTH field is the size of the page excluding the header. + */ + put_unaligned_be16(ATA_FEATURE_SUB_MPAGE_LEN - 4, &buf[2]); + + if (dev->flags & ATA_DFLAG_CDL) + buf[4] = 0x02; /* Support T2A and T2B pages */ + else + buf[4] = 0; + + return ATA_FEATURE_SUB_MPAGE_LEN; +} + /** * ata_msense_control - Simulate MODE SENSE control mode page * @dev: ATA device of interest @@ -2309,10 +2336,13 @@ static unsigned int ata_msense_control(struct ata_device *dev, u8 *buf, case CDL_T2A_SUB_MPAGE: case CDL_T2B_SUB_MPAGE: return ata_msense_control_spgt2(dev, buf, spg); + case ATA_FEATURE_SUB_MPAGE: + return ata_msense_control_ata_feature(dev, buf); case ALL_SUB_MPAGES: n = ata_msense_control_spg0(dev, buf, changeable); n += ata_msense_control_spgt2(dev, buf + n, CDL_T2A_SUB_MPAGE); n += ata_msense_control_spgt2(dev, buf + n, CDL_T2A_SUB_MPAGE); + n += ata_msense_control_ata_feature(dev, buf + n); return n; default: return 0; @@ -2391,7 +2421,7 @@ static unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf) spg = scsicmd[3]; /* - * Supported subpages: all subpages and sub-pages 07h and 08h of + * Supported subpages: all subpages and sub-pages 07h, 08h and f2h of * the control page. */ if (spg) { @@ -2400,6 +2430,7 @@ static unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf) break; case CDL_T2A_SUB_MPAGE: case CDL_T2B_SUB_MPAGE: + case ATA_FEATURE_SUB_MPAGE: if (dev->flags & ATA_DFLAG_CDL && pg == CONTROL_MPAGE) break; fallthrough; @@ -3708,20 +3739,11 @@ static int ata_mselect_caching(struct ata_queued_cmd *qc, return 0; } -/** - * ata_mselect_control - Simulate MODE SELECT for control page - * @qc: Storage for translated ATA taskfile - * @buf: input buffer - * @len: number of valid bytes in the input buffer - * @fp: out parameter for the failed field on error - * - * Prepare a taskfile to modify caching information for the device. - * - * LOCKING: - * None. +/* + * Simulate MODE SELECT control mode page, sub-page 0. */ -static int ata_mselect_control(struct ata_queued_cmd *qc, - const u8 *buf, int len, u16 *fp) +static int ata_mselect_control_spg0(struct ata_queued_cmd *qc, + const u8 *buf, int len, u16 *fp) { struct ata_device *dev = qc->dev; u8 mpage[CONTROL_MPAGE_LEN]; @@ -3759,6 +3781,83 @@ static int ata_mselect_control(struct ata_queued_cmd *qc, return 0; } +/* + * Translate MODE SELECT control mode page, sub-pages f2h (ATA feature mode + * page) into a SET FEATURES command. + */ +static unsigned int ata_mselect_control_ata_feature(struct ata_queued_cmd *qc, + const u8 *buf, int len, + u16 *fp) +{ + struct ata_device *dev = qc->dev; + struct ata_taskfile *tf = &qc->tf; + u8 cdl_action; + + /* + * The first four bytes of ATA Feature Control mode page are a header, + * so offsets in mpage are off by 4 compared to buf. Same for len. + */ + if (len != ATA_FEATURE_SUB_MPAGE_LEN - 4) { + *fp = min(len, ATA_FEATURE_SUB_MPAGE_LEN - 4); + return -EINVAL; + } + + /* Check cdl_ctrl */ + switch (buf[0] & 0x03) { + case 0: + /* Disable CDL */ + cdl_action = 0; + dev->flags &= ~ATA_DFLAG_CDL_ENABLED; + break; + case 0x02: + /* Enable CDL T2A/T2B: NCQ priority must be disabled */ + if (dev->flags & ATA_DFLAG_NCQ_PRIO_ENABLED) { + ata_dev_err(dev, + "NCQ priority must be disabled to enable CDL\n"); + return -EINVAL; + } + cdl_action = 1; + dev->flags |= ATA_DFLAG_CDL_ENABLED; + break; + default: + *fp = 0; + return -EINVAL; + } + + tf->flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; + tf->protocol = ATA_PROT_NODATA; + tf->command = ATA_CMD_SET_FEATURES; + tf->feature = SETFEATURES_CDL; + tf->nsect = cdl_action; + + return 1; +} + +/** + * ata_mselect_control - Simulate MODE SELECT for control page + * @qc: Storage for translated ATA taskfile + * @buf: input buffer + * @len: number of valid bytes in the input buffer + * @fp: out parameter for the failed field on error + * + * Prepare a taskfile to modify caching information for the device. + * + * LOCKING: + * None. + */ +static int ata_mselect_control(struct ata_queued_cmd *qc, u8 spg, + const u8 *buf, int len, u16 *fp) +{ + switch (spg) { + case 0: + return ata_mselect_control_spg0(qc, buf, len, fp); + case ATA_FEATURE_SUB_MPAGE: + return ata_mselect_control_ata_feature(qc, buf, len, fp); + default: + return -EINVAL; + } +} + /** * ata_scsi_mode_select_xlat - Simulate MODE SELECT 6, 10 commands * @qc: Storage for translated ATA taskfile @@ -3776,7 +3875,7 @@ static unsigned int ata_scsi_mode_select_xlat(struct ata_queued_cmd *qc) const u8 *cdb = scmd->cmnd; u8 pg, spg; unsigned six_byte, pg_len, hdr_len, bd_len; - int len; + int len, ret; u16 fp = (u16)-1; u8 bp = 0xff; u8 buffer[64]; @@ -3861,13 +3960,29 @@ static unsigned int ata_scsi_mode_select_xlat(struct ata_queued_cmd *qc) } /* - * No mode subpages supported (yet) but asking for _all_ - * subpages may be valid + * Supported subpages: all subpages and ATA feature sub-page f2h of + * the control page. */ - if (spg && (spg != ALL_SUB_MPAGES)) { - fp = (p[0] & 0x40) ? 1 : 0; - fp += hdr_len + bd_len; - goto invalid_param; + if (spg) { + switch (spg) { + case ALL_SUB_MPAGES: + /* All subpages is not supported for the control page */ + if (pg == CONTROL_MPAGE) { + fp = (p[0] & 0x40) ? 1 : 0; + fp += hdr_len + bd_len; + goto invalid_param; + } + break; + case ATA_FEATURE_SUB_MPAGE: + if (qc->dev->flags & ATA_DFLAG_CDL && + pg == CONTROL_MPAGE) + break; + fallthrough; + default: + fp = (p[0] & 0x40) ? 1 : 0; + fp += hdr_len + bd_len; + goto invalid_param; + } } if (pg_len > len) goto invalid_param_len; @@ -3880,14 +3995,16 @@ static unsigned int ata_scsi_mode_select_xlat(struct ata_queued_cmd *qc) } break; case CONTROL_MPAGE: - if (ata_mselect_control(qc, p, pg_len, &fp) < 0) { + ret = ata_mselect_control(qc, spg, p, pg_len, &fp); + if (ret < 0) { fp += hdr_len + bd_len; goto invalid_param; - } else { - goto skip; /* No ATA command to send */ } + if (!ret) + goto skip; /* No ATA command to send */ break; - default: /* invalid page code */ + default: + /* Invalid page code */ fp = bd_len + hdr_len; goto invalid_param; } diff --git a/include/linux/ata.h b/include/linux/ata.h index 1eda46b63dcc..21108471c6af 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -329,6 +329,7 @@ enum { /* Identify device log pages: */ ATA_LOG_SUPPORTED_CAPABILITIES = 0x03, + ATA_LOG_CURRENT_SETTINGS = 0x04, ATA_LOG_SECURITY = 0x06, ATA_LOG_SATA_SETTINGS = 0x08, ATA_LOG_ZONED_INFORMATION = 0x09, @@ -418,6 +419,8 @@ enum { SETFEATURES_SATA_ENABLE = 0x10, /* Enable use of SATA feature */ SETFEATURES_SATA_DISABLE = 0x90, /* Disable use of SATA feature */ + SETFEATURES_CDL = 0x0d, /* Enable/disable cmd duration limits */ + /* SETFEATURE Sector counts for SATA features */ SATA_FPDMA_OFFSET = 0x01, /* FPDMA non-zero buffer offsets */ SATA_FPDMA_AA = 0x02, /* FPDMA Setup FIS Auto-Activate */ diff --git a/include/linux/libata.h b/include/linux/libata.h index e8a45f7f3f5c..385ca23d5ad0 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -106,6 +106,7 @@ enum { ATA_DFLAG_INIT_MASK = (1 << 20) - 1, ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 20), /* Priority cmds sent to dev */ + ATA_DFLAG_CDL_ENABLED = (1 << 21), /* cmd duration limits is enabled */ ATA_DFLAG_DETACH = (1 << 24), ATA_DFLAG_DETACHED = (1 << 25), ATA_DFLAG_DA = (1 << 26), /* device supports Device Attention */ From eafe804bda7ba01da562c43351068b8a76a579af Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 May 2023 03:13:51 +0200 Subject: [PATCH 18/19] scsi: ata: libata: Set read/write commands CDL index For devices supporting the command duration limits feature, translate the dld field of read and write operation to set the command duration limit index field of the command task file when the duration limit feature is enabled. The function ata_set_tf_cdl() is introduced to do this. For unqueued (non NCQ) read and write operations, this function sets the command duration limit index set as the lower 3 bits of the feature field. For queued NCQ read/write commands, the index is set as the lower 3 bits of the auxiliary field. The flag ATA_QCFLAG_HAS_CDL is introduced to indicate that a command taskfile has a non zero cdl field. Signed-off-by: Damien Le Moal Reviewed-by: Igor Pylypiv Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Co-developed-by: Niklas Cassel Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-19-nks@flawful.org Signed-off-by: Martin K. Petersen --- drivers/ata/libata-core.c | 32 +++++++++++++++++++++++++++++--- drivers/ata/libata-scsi.c | 16 +++++++++++++++- drivers/ata/libata.h | 2 +- include/linux/libata.h | 1 + 4 files changed, 46 insertions(+), 5 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index cd7aaf202397..e63773740fc2 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -665,12 +665,29 @@ u64 ata_tf_read_block(const struct ata_taskfile *tf, struct ata_device *dev) return block; } +/* + * Set a taskfile command duration limit index. + */ +static inline void ata_set_tf_cdl(struct ata_queued_cmd *qc, int cdl) +{ + struct ata_taskfile *tf = &qc->tf; + + if (tf->protocol == ATA_PROT_NCQ) + tf->auxiliary |= cdl; + else + tf->feature |= cdl; + + /* Mark this command as having a CDL */ + qc->flags |= ATA_QCFLAG_HAS_CDL; +} + /** * ata_build_rw_tf - Build ATA taskfile for given read/write request * @qc: Metadata associated with the taskfile to build * @block: Block address * @n_block: Number of blocks * @tf_flags: RW/FUA etc... + * @cdl: Command duration limit index * @class: IO priority class * * LOCKING: @@ -685,7 +702,7 @@ u64 ata_tf_read_block(const struct ata_taskfile *tf, struct ata_device *dev) * -EINVAL if the request is invalid. */ int ata_build_rw_tf(struct ata_queued_cmd *qc, u64 block, u32 n_block, - unsigned int tf_flags, int class) + unsigned int tf_flags, int cdl, int class) { struct ata_taskfile *tf = &qc->tf; struct ata_device *dev = qc->dev; @@ -724,11 +741,20 @@ int ata_build_rw_tf(struct ata_queued_cmd *qc, u64 block, u32 n_block, if (dev->flags & ATA_DFLAG_NCQ_PRIO_ENABLED && class == IOPRIO_CLASS_RT) tf->hob_nsect |= ATA_PRIO_HIGH << ATA_SHIFT_PRIO; + + if ((dev->flags & ATA_DFLAG_CDL_ENABLED) && cdl) + ata_set_tf_cdl(qc, cdl); + } else if (dev->flags & ATA_DFLAG_LBA) { tf->flags |= ATA_TFLAG_LBA; - /* We need LBA48 for FUA writes */ - if (!(tf->flags & ATA_TFLAG_FUA) && lba_28_ok(block, n_block)) { + if ((dev->flags & ATA_DFLAG_CDL_ENABLED) && cdl) + ata_set_tf_cdl(qc, cdl); + + /* Both FUA writes and a CDL index require 48-bit commands */ + if (!(tf->flags & ATA_TFLAG_FUA) && + !(qc->flags & ATA_QCFLAG_HAS_CDL) && + lba_28_ok(block, n_block)) { /* use LBA28 */ tf->device |= (block >> 24) & 0xf; } else if (lba_48_ok(block, n_block)) { diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 91db4e7f4906..69fc0d2c2123 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1380,6 +1380,18 @@ static inline void scsi_16_lba_len(const u8 *cdb, u64 *plba, u32 *plen) *plen = get_unaligned_be32(&cdb[10]); } +/** + * scsi_dld - Get duration limit descriptor index + * @cdb: SCSI command to translate + * + * Returns the dld bits indicating the index of a command duration limit + * descriptor. + */ +static inline int scsi_dld(const u8 *cdb) +{ + return ((cdb[1] & 0x01) << 2) | ((cdb[14] >> 6) & 0x03); +} + /** * ata_scsi_verify_xlat - Translate SCSI VERIFY command into an ATA one * @qc: Storage for translated ATA taskfile @@ -1548,6 +1560,7 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc) struct request *rq = scsi_cmd_to_rq(scmd); int class = IOPRIO_PRIO_CLASS(req_get_ioprio(rq)); unsigned int tf_flags = 0; + int dld = 0; u64 block; u32 n_block; int rc; @@ -1598,6 +1611,7 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc) goto invalid_fld; } scsi_16_lba_len(cdb, &block, &n_block); + dld = scsi_dld(cdb); if (cdb[1] & (1 << 3)) tf_flags |= ATA_TFLAG_FUA; if (!ata_check_nblocks(scmd, n_block)) @@ -1622,7 +1636,7 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc) qc->flags |= ATA_QCFLAG_IO; qc->nbytes = n_block * scmd->device->sector_size; - rc = ata_build_rw_tf(qc, block, n_block, tf_flags, class); + rc = ata_build_rw_tf(qc, block, n_block, tf_flags, dld, class); if (likely(rc == 0)) return 0; diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index 926d0d33cd29..cf993885d2b2 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -45,7 +45,7 @@ static inline void ata_force_cbl(struct ata_port *ap) { } extern u64 ata_tf_to_lba(const struct ata_taskfile *tf); extern u64 ata_tf_to_lba48(const struct ata_taskfile *tf); extern int ata_build_rw_tf(struct ata_queued_cmd *qc, u64 block, u32 n_block, - unsigned int tf_flags, int class); + unsigned int tf_flags, int dld, int class); extern u64 ata_tf_read_block(const struct ata_taskfile *tf, struct ata_device *dev); extern unsigned ata_exec_internal(struct ata_device *dev, diff --git a/include/linux/libata.h b/include/linux/libata.h index 385ca23d5ad0..f679abd2e61f 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -209,6 +209,7 @@ enum { ATA_QCFLAG_CLEAR_EXCL = (1 << 5), /* clear excl_link on completion */ ATA_QCFLAG_QUIET = (1 << 6), /* don't report device error */ ATA_QCFLAG_RETRY = (1 << 7), /* retry after failure */ + ATA_QCFLAG_HAS_CDL = (1 << 8), /* qc has CDL a descriptor set */ ATA_QCFLAG_EH = (1 << 16), /* cmd aborted and owned by EH */ ATA_QCFLAG_SENSE_VALID = (1 << 17), /* sense data valid */ From 18bd7718b5c489b3161b6c2ab4685d57c1e2da3b Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 11 May 2023 03:13:52 +0200 Subject: [PATCH 19/19] scsi: ata: libata: Handle completion of CDL commands using policy 0xD A CDL timeout for policy 0xF is defined as a NCQ error, just with a CDL specific sk/asc/ascq in the sense data. Therefore, the existing code in libata does not need to be modified to handle a policy 0xF CDL timeout. For Command Duration Limits policy 0xD: The device shall complete the command without error with the additional sense code set to DATA CURRENTLY UNAVAILABLE. Since a CDL timeout for policy 0xD is not an error, we cannot use the NCQ Command Error log (10h). Instead, we need to read the Sense Data for Successful NCQ Commands log (0Fh). In the success case, just like in the error case, we cannot simply read a log page from the interrupt handler itself, since reading a log page involves sending a READ LOG DMA EXT or READ LOG EXT command. Therefore, we add a new EH action ATA_EH_GET_SUCCESS_SENSE. When a command completes without error, and when the ATA_SENSE bit is set, this new action is set as pending, and EH is scheduled. This way, similar to the NCQ error case, the log page will be read from EH context. An alternative would have been to add a new kthread or workqueue to handle this. However, extending EH can be done with minimal changes and avoids the need to synchronize a new kthread/workqueue with EH. Co-developed-by: Damien Le Moal Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-20-nks@flawful.org Signed-off-by: Martin K. Petersen --- drivers/ata/libata-core.c | 88 +++++++++++++++++++++++++++++++- drivers/ata/libata-eh.c | 105 +++++++++++++++++++++++++++++++++++++- drivers/ata/libata-sata.c | 92 +++++++++++++++++++++++++++++++++ include/linux/ata.h | 3 ++ include/linux/libata.h | 11 +++- 5 files changed, 295 insertions(+), 4 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index e63773740fc2..ddc3d9f8fa02 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -677,8 +677,12 @@ static inline void ata_set_tf_cdl(struct ata_queued_cmd *qc, int cdl) else tf->feature |= cdl; - /* Mark this command as having a CDL */ - qc->flags |= ATA_QCFLAG_HAS_CDL; + /* + * Mark this command as having a CDL and request the result + * task file so that we can inspect the sense data available + * bit on completion. + */ + qc->flags |= ATA_QCFLAG_HAS_CDL | ATA_QCFLAG_RESULT_TF; } /** @@ -2424,6 +2428,24 @@ static void ata_dev_config_cdl(struct ata_device *dev) ata_dev_warn(dev, "Command duration guideline is not supported\n"); + /* + * We must have support for the sense data for successful NCQ commands + * log indicated by the successful NCQ command sense data supported bit. + */ + val = get_unaligned_le64(&ap->sector_buf[8]); + if (!(val & BIT_ULL(63)) || !(val & BIT_ULL(47))) { + ata_dev_warn(dev, + "CDL supported but Successful NCQ Command Sense Data is not supported\n"); + goto not_supported; + } + + /* Without NCQ autosense, the successful NCQ commands log is useless. */ + if (!ata_id_has_ncq_autosense(dev->id)) { + ata_dev_warn(dev, + "CDL supported but NCQ autosense is not supported\n"); + goto not_supported; + } + /* * If CDL is marked as enabled, make sure the feature is enabled too. * Conversely, if CDL is disabled, make sure the feature is turned off. @@ -2458,6 +2480,35 @@ static void ata_dev_config_cdl(struct ata_device *dev) } } + /* + * While CDL itself has to be enabled using sysfs, CDL requires that + * sense data for successful NCQ commands is enabled to work properly. + * Just like ata_dev_config_sense_reporting(), enable it unconditionally + * if supported. + */ + if (!(val & BIT_ULL(63)) || !(val & BIT_ULL(18))) { + err_mask = ata_dev_set_feature(dev, + SETFEATURE_SENSE_DATA_SUCC_NCQ, 0x1); + if (err_mask) { + ata_dev_warn(dev, + "failed to enable Sense Data for successful NCQ commands, Emask 0x%x\n", + err_mask); + goto not_supported; + } + } + + /* + * Allocate a buffer to handle reading the sense data for successful + * NCQ Commands log page for commands using a CDL with one of the limit + * policy set to 0xD (successful completion with sense data available + * bit set). + */ + if (!ap->ncq_sense_buf) { + ap->ncq_sense_buf = kmalloc(ATA_LOG_SENSE_NCQ_SIZE, GFP_KERNEL); + if (!ap->ncq_sense_buf) + goto not_supported; + } + /* * Command duration limits is supported: cache the CDL log page 18h * (command duration descriptors). @@ -2475,6 +2526,8 @@ static void ata_dev_config_cdl(struct ata_device *dev) not_supported: dev->flags &= ~(ATA_DFLAG_CDL | ATA_DFLAG_CDL_ENABLED); + kfree(ap->ncq_sense_buf); + ap->ncq_sense_buf = NULL; } static int ata_dev_config_lba(struct ata_device *dev) @@ -4878,6 +4931,36 @@ void ata_qc_complete(struct ata_queued_cmd *qc) fill_result_tf(qc); trace_ata_qc_complete_done(qc); + + /* + * For CDL commands that completed without an error, check if + * we have sense data (ATA_SENSE is set). If we do, then the + * command may have been aborted by the device due to a limit + * timeout using the policy 0xD. For these commands, invoke EH + * to get the command sense data. + */ + if (qc->result_tf.status & ATA_SENSE && + ((ata_is_ncq(qc->tf.protocol) && + dev->flags & ATA_DFLAG_CDL_ENABLED) || + (!(ata_is_ncq(qc->tf.protocol) && + ata_id_sense_reporting_enabled(dev->id))))) { + /* + * Tell SCSI EH to not overwrite scmd->result even if + * this command is finished with result SAM_STAT_GOOD. + */ + qc->scsicmd->flags |= SCMD_FORCE_EH_SUCCESS; + qc->flags |= ATA_QCFLAG_EH_SUCCESS_CMD; + ehi->dev_action[dev->devno] |= ATA_EH_GET_SUCCESS_SENSE; + + /* + * set pending so that ata_qc_schedule_eh() does not + * trigger fast drain, and freeze the port. + */ + ap->pflags |= ATA_PFLAG_EH_PENDING; + ata_qc_schedule_eh(qc); + return; + } + /* Some commands need post-processing after successful * completion. */ @@ -5510,6 +5593,7 @@ static void ata_host_release(struct kref *kref) kfree(ap->pmp_link); kfree(ap->slave_link); + kfree(ap->ncq_sense_buf); kfree(ap); host->ports[i] = NULL; } diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 598ae07195b6..05af292eb8ce 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -1917,6 +1917,99 @@ static inline bool ata_eh_quiet(struct ata_queued_cmd *qc) return qc->flags & ATA_QCFLAG_QUIET; } +static int ata_eh_read_sense_success_non_ncq(struct ata_link *link) +{ + struct ata_port *ap = link->ap; + struct ata_queued_cmd *qc; + + qc = __ata_qc_from_tag(ap, link->active_tag); + if (!qc) + return -EIO; + + if (!(qc->flags & ATA_QCFLAG_EH) || + !(qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD) || + qc->err_mask) + return -EIO; + + if (!ata_eh_request_sense(qc)) + return -EIO; + + /* + * If we have sense data, call scsi_check_sense() in order to set the + * correct SCSI ML byte (if any). No point in checking the return value, + * since the command has already completed successfully. + */ + scsi_check_sense(qc->scsicmd); + + return 0; +} + +static void ata_eh_get_success_sense(struct ata_link *link) +{ + struct ata_eh_context *ehc = &link->eh_context; + struct ata_device *dev = link->device; + struct ata_port *ap = link->ap; + struct ata_queued_cmd *qc; + int tag, ret = 0; + + if (!(ehc->i.dev_action[dev->devno] & ATA_EH_GET_SUCCESS_SENSE)) + return; + + /* if frozen, we can't do much */ + if (ata_port_is_frozen(ap)) { + ata_dev_warn(dev, + "successful sense data available but port frozen\n"); + goto out; + } + + /* + * If the link has sactive set, then we have outstanding NCQ commands + * and have to read the Successful NCQ Commands log to get the sense + * data. Otherwise, we are dealing with a non-NCQ command and use + * request sense ext command to retrieve the sense data. + */ + if (link->sactive) + ret = ata_eh_read_sense_success_ncq_log(link); + else + ret = ata_eh_read_sense_success_non_ncq(link); + if (ret) + goto out; + + ata_eh_done(link, dev, ATA_EH_GET_SUCCESS_SENSE); + return; + +out: + /* + * If we failed to get sense data for a successful command that ought to + * have sense data, we cannot simply return BLK_STS_OK to user space. + * This is because we can't know if the sense data that we couldn't get + * was actually "DATA CURRENTLY UNAVAILABLE". Reporting such a command + * as success to user space would result in a silent data corruption. + * Thus, add a bogus ABORTED_COMMAND sense data to such commands, such + * that SCSI will report these commands as BLK_STS_IOERR to user space. + */ + ata_qc_for_each_raw(ap, qc, tag) { + if (!(qc->flags & ATA_QCFLAG_EH) || + !(qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD) || + qc->err_mask || + ata_dev_phys_link(qc->dev) != link) + continue; + + /* We managed to get sense for this success command, skip. */ + if (qc->flags & ATA_QCFLAG_SENSE_VALID) + continue; + + /* This success command did not have any sense data, skip. */ + if (!(qc->result_tf.status & ATA_SENSE)) + continue; + + /* This success command had sense data, but we failed to get. */ + ata_scsi_set_sense(dev, qc->scsicmd, ABORTED_COMMAND, 0, 0); + qc->flags |= ATA_QCFLAG_SENSE_VALID; + } + ata_eh_done(link, dev, ATA_EH_GET_SUCCESS_SENSE); +} + /** * ata_eh_link_autopsy - analyze error and determine recovery action * @link: host link to perform autopsy on @@ -1957,6 +2050,14 @@ static void ata_eh_link_autopsy(struct ata_link *link) /* analyze NCQ failure */ ata_eh_analyze_ncq_error(link); + /* + * Check if this was a successful command that simply needs sense data. + * Since the sense data is not part of the completion, we need to fetch + * it using an additional command. Since this can't be done from irq + * context, the sense data for successful commands are fetched by EH. + */ + ata_eh_get_success_sense(link); + /* any real error trumps AC_ERR_OTHER */ if (ehc->i.err_mask & ~AC_ERR_OTHER) ehc->i.err_mask &= ~AC_ERR_OTHER; @@ -1966,6 +2067,7 @@ static void ata_eh_link_autopsy(struct ata_link *link) ata_qc_for_each_raw(ap, qc, tag) { if (!(qc->flags & ATA_QCFLAG_EH) || qc->flags & ATA_QCFLAG_RETRY || + qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD || ata_dev_phys_link(qc->dev) != link) continue; @@ -3825,7 +3927,8 @@ void ata_eh_finish(struct ata_port *ap) else ata_eh_qc_complete(qc); } else { - if (qc->flags & ATA_QCFLAG_SENSE_VALID) { + if (qc->flags & ATA_QCFLAG_SENSE_VALID || + qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD) { ata_eh_qc_complete(qc); } else { /* feed zero TF to sense generation */ diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index 57cb33060c9d..7de4d8901fac 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -11,7 +11,9 @@ #include #include #include +#include #include +#include #include "libata.h" #include "libata-transport.h" @@ -1408,6 +1410,95 @@ static int ata_eh_read_log_10h(struct ata_device *dev, return 0; } +/** + * ata_eh_read_sense_success_ncq_log - Read the sense data for successful + * NCQ commands log + * @link: ATA link to get sense data for + * + * Read the sense data for successful NCQ commands log page to obtain + * sense data for all NCQ commands that completed successfully with + * the sense data available bit set. + * + * LOCKING: + * Kernel thread context (may sleep). + * + * RETURNS: + * 0 on success, -errno otherwise. + */ +int ata_eh_read_sense_success_ncq_log(struct ata_link *link) +{ + struct ata_device *dev = link->device; + struct ata_port *ap = dev->link->ap; + u8 *buf = ap->ncq_sense_buf; + struct ata_queued_cmd *qc; + unsigned int err_mask, tag; + u8 *sense, sk = 0, asc = 0, ascq = 0; + u64 sense_valid, val; + int ret = 0; + + err_mask = ata_read_log_page(dev, ATA_LOG_SENSE_NCQ, 0, buf, 2); + if (err_mask) { + ata_dev_err(dev, + "Failed to read Sense Data for Successful NCQ Commands log\n"); + return -EIO; + } + + /* Check the log header */ + val = get_unaligned_le64(&buf[0]); + if ((val & 0xffff) != 1 || ((val >> 16) & 0xff) != 0x0f) { + ata_dev_err(dev, + "Invalid Sense Data for Successful NCQ Commands log\n"); + return -EIO; + } + + sense_valid = (u64)buf[8] | ((u64)buf[9] << 8) | + ((u64)buf[10] << 16) | ((u64)buf[11] << 24); + + ata_qc_for_each_raw(ap, qc, tag) { + if (!(qc->flags & ATA_QCFLAG_EH) || + !(qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD) || + qc->err_mask || + ata_dev_phys_link(qc->dev) != link) + continue; + + /* + * If the command does not have any sense data, clear ATA_SENSE. + * Keep ATA_QCFLAG_EH_SUCCESS_CMD so that command is finished. + */ + if (!(sense_valid & (1ULL << tag))) { + qc->result_tf.status &= ~ATA_SENSE; + continue; + } + + sense = &buf[32 + 24 * tag]; + sk = sense[0]; + asc = sense[1]; + ascq = sense[2]; + + if (!ata_scsi_sense_is_valid(sk, asc, ascq)) { + ret = -EIO; + continue; + } + + /* Set sense without also setting scsicmd->result */ + scsi_build_sense_buffer(dev->flags & ATA_DFLAG_D_SENSE, + qc->scsicmd->sense_buffer, sk, + asc, ascq); + qc->flags |= ATA_QCFLAG_SENSE_VALID; + + /* + * If we have sense data, call scsi_check_sense() in order to + * set the correct SCSI ML byte (if any). No point in checking + * the return value, since the command has already completed + * successfully. + */ + scsi_check_sense(qc->scsicmd); + } + + return ret; +} +EXPORT_SYMBOL_GPL(ata_eh_read_sense_success_ncq_log); + /** * ata_eh_analyze_ncq_error - analyze NCQ error * @link: ATA link to analyze NCQ error for @@ -1488,6 +1579,7 @@ void ata_eh_analyze_ncq_error(struct ata_link *link) ata_qc_for_each_raw(ap, qc, tag) { if (!(qc->flags & ATA_QCFLAG_EH) || + qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD || ata_dev_phys_link(qc->dev) != link) continue; diff --git a/include/linux/ata.h b/include/linux/ata.h index 21108471c6af..792e10a09787 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -325,6 +325,8 @@ enum { ATA_LOG_CDL = 0x18, ATA_LOG_CDL_SIZE = ATA_SECT_SIZE, ATA_LOG_IDENTIFY_DEVICE = 0x30, + ATA_LOG_SENSE_NCQ = 0x0F, + ATA_LOG_SENSE_NCQ_SIZE = ATA_SECT_SIZE * 2, ATA_LOG_CONCURRENT_POSITIONING_RANGES = 0x47, /* Identify device log pages: */ @@ -431,6 +433,7 @@ enum { SATA_DEVSLP = 0x09, /* Device Sleep */ SETFEATURE_SENSE_DATA = 0xC3, /* Sense Data Reporting feature */ + SETFEATURE_SENSE_DATA_SUCC_NCQ = 0xC4, /* Sense Data for successful NCQ commands */ /* feature values for SET_MAX */ ATA_SET_MAX_ADDR = 0x00, diff --git a/include/linux/libata.h b/include/linux/libata.h index f679abd2e61f..5c8ef33b0af2 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -214,6 +214,7 @@ enum { ATA_QCFLAG_EH = (1 << 16), /* cmd aborted and owned by EH */ ATA_QCFLAG_SENSE_VALID = (1 << 17), /* sense data valid */ ATA_QCFLAG_EH_SCHEDULED = (1 << 18), /* EH scheduled (obsolete) */ + ATA_QCFLAG_EH_SUCCESS_CMD = (1 << 19), /* EH should fetch sense for this successful cmd */ /* host set flags */ ATA_HOST_SIMPLEX = (1 << 0), /* Host is simplex, one DMA channel per host only */ @@ -312,8 +313,10 @@ enum { ATA_EH_RESET = ATA_EH_SOFTRESET | ATA_EH_HARDRESET, ATA_EH_ENABLE_LINK = (1 << 3), ATA_EH_PARK = (1 << 5), /* unload heads and stop I/O */ + ATA_EH_GET_SUCCESS_SENSE = (1 << 6), /* Get sense data for successful cmd */ - ATA_EH_PERDEV_MASK = ATA_EH_REVALIDATE | ATA_EH_PARK, + ATA_EH_PERDEV_MASK = ATA_EH_REVALIDATE | ATA_EH_PARK | + ATA_EH_GET_SUCCESS_SENSE, ATA_EH_ALL_ACTIONS = ATA_EH_REVALIDATE | ATA_EH_RESET | ATA_EH_ENABLE_LINK, @@ -867,6 +870,7 @@ struct ata_port { struct ata_acpi_gtm __acpi_init_gtm; /* use ata_acpi_init_gtm() */ #endif /* owned by EH */ + u8 *ncq_sense_buf; u8 sector_buf[ATA_SECT_SIZE] ____cacheline_aligned; }; @@ -1185,6 +1189,7 @@ extern int sata_link_hardreset(struct ata_link *link, bool *online, int (*check_ready)(struct ata_link *)); extern int sata_link_resume(struct ata_link *link, const unsigned long *params, unsigned long deadline); +extern int ata_eh_read_sense_success_ncq_log(struct ata_link *link); extern void ata_eh_analyze_ncq_error(struct ata_link *link); #else static inline const unsigned long * @@ -1222,6 +1227,10 @@ static inline int sata_link_resume(struct ata_link *link, { return -EOPNOTSUPP; } +static inline int ata_eh_read_sense_success_ncq_log(struct ata_link *link) +{ + return -EOPNOTSUPP; +} static inline void ata_eh_analyze_ncq_error(struct ata_link *link) { } #endif extern int sata_link_debounce(struct ata_link *link,