scsi: sd: Atomic write support

Support is divided into two main areas:
- reading VPD pages and setting sdev request_queue limits
- support WRITE ATOMIC (16) command and tracing

The relevant block limits VPD page need to be read to allow the block layer
request_queue atomic write limits to be set. These VPD page limits are
described in sbc4r22 section 6.6.4 - Block limits VPD page.

There are five limits of interest:
- MAXIMUM ATOMIC TRANSFER LENGTH
- ATOMIC ALIGNMENT
- ATOMIC TRANSFER LENGTH GRANULARITY
- MAXIMUM ATOMIC TRANSFER LENGTH WITH BOUNDARY
- MAXIMUM ATOMIC BOUNDARY SIZE

MAXIMUM ATOMIC TRANSFER LENGTH is the maximum length for a WRITE ATOMIC
(16) command. It will not be greater than the device MAXIMUM TRANSFER
LENGTH.

ATOMIC ALIGNMENT and ATOMIC TRANSFER LENGTH GRANULARITY are the minimum
alignment and length values for an atomic write in terms of logical blocks.

Unlike NVMe, SCSI does not specify an LBA space boundary, but does specify
a per-IO boundary granularity. The maximum boundary size is specified in
MAXIMUM ATOMIC BOUNDARY SIZE. When used, this boundary value is set in the
WRITE ATOMIC (16) ATOMIC BOUNDARY field - layout for the WRITE_ATOMIC_16
command can be found in sbc4r22 section 5.48. This boundary value is the
granularity size at which the device may atomically write the data. A value
of zero in WRITE ATOMIC (16) ATOMIC BOUNDARY field means that all data must
be atomically written together.

MAXIMUM ATOMIC TRANSFER LENGTH WITH BOUNDARY is the maximum atomic write
length if a non-zero boundary value is set.

For atomic write support, the WRITE ATOMIC (16) boundary is not of much
interest, as the block layer expects each request submitted to be executed
atomically. However, the SCSI spec does leave itself open to a quirky
scenario where MAXIMUM ATOMIC TRANSFER LENGTH is zero, yet MAXIMUM ATOMIC
TRANSFER LENGTH WITH BOUNDARY and MAXIMUM ATOMIC BOUNDARY SIZE are both
non-zero. This case will be supported.

To set the block layer request_queue atomic write capabilities, sanitize
the VPD page limits and set limits as follows:
- atomic_write_unit_min is derived from granularity and alignment values.
  If no granularity value is not set, use physical block size
- atomic_write_unit_max is derived from MAXIMUM ATOMIC TRANSFER LENGTH. In
  the scenario where MAXIMUM ATOMIC TRANSFER LENGTH is zero and boundary
  limits are non-zero, use MAXIMUM ATOMIC BOUNDARY SIZE for
  atomic_write_unit_max. New flag scsi_disk.use_atomic_write_boundary is
  set for this scenario.
- atomic_write_boundary_bytes is set to zero always

SCSI also supports a WRITE ATOMIC (32) command, which is for type 2
protection enabled. This is not going to be supported now, so check for
T10_PI_TYPE2_PROTECTION when setting any request_queue limits.

To handle an atomic write request, add support for WRITE ATOMIC (16)
command in handler sd_setup_atomic_cmnd(). Flag use_atomic_write_boundary
is checked here for encoding ATOMIC BOUNDARY field.

Trace info is also added for WRITE_ATOMIC_16 command.

Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: John Garry <john.g.garry@oracle.com>
Acked-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Link: https://lore.kernel.org/r/20240620125359.2684798-9-john.g.garry@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
John Garry 2024-06-20 12:53:57 +00:00 committed by Jens Axboe
parent caf336f81b
commit bf4ae8f2e6
5 changed files with 124 additions and 1 deletions

View File

@ -325,6 +325,26 @@ out:
return ret; return ret;
} }
static const char *
scsi_trace_atomic_write16_out(struct trace_seq *p, unsigned char *cdb, int len)
{
const char *ret = trace_seq_buffer_ptr(p);
unsigned int boundary_size;
unsigned int nr_blocks;
sector_t lba;
lba = get_unaligned_be64(&cdb[2]);
boundary_size = get_unaligned_be16(&cdb[10]);
nr_blocks = get_unaligned_be16(&cdb[12]);
trace_seq_printf(p, "lba=%llu txlen=%u boundary_size=%u",
lba, nr_blocks, boundary_size);
trace_seq_putc(p, 0);
return ret;
}
static const char * static const char *
scsi_trace_varlen(struct trace_seq *p, unsigned char *cdb, int len) scsi_trace_varlen(struct trace_seq *p, unsigned char *cdb, int len)
{ {
@ -385,6 +405,8 @@ scsi_trace_parse_cdb(struct trace_seq *p, unsigned char *cdb, int len)
return scsi_trace_zbc_in(p, cdb, len); return scsi_trace_zbc_in(p, cdb, len);
case ZBC_OUT: case ZBC_OUT:
return scsi_trace_zbc_out(p, cdb, len); return scsi_trace_zbc_out(p, cdb, len);
case WRITE_ATOMIC_16:
return scsi_trace_atomic_write16_out(p, cdb, len);
default: default:
return scsi_trace_misc(p, cdb, len); return scsi_trace_misc(p, cdb, len);
} }

View File

@ -939,6 +939,64 @@ static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
return scsi_alloc_sgtables(cmd); return scsi_alloc_sgtables(cmd);
} }
static void sd_config_atomic(struct scsi_disk *sdkp, struct queue_limits *lim)
{
unsigned int logical_block_size = sdkp->device->sector_size,
physical_block_size_sectors, max_atomic, unit_min, unit_max;
if ((!sdkp->max_atomic && !sdkp->max_atomic_with_boundary) ||
sdkp->protection_type == T10_PI_TYPE2_PROTECTION)
return;
physical_block_size_sectors = sdkp->physical_block_size /
sdkp->device->sector_size;
unit_min = rounddown_pow_of_two(sdkp->atomic_granularity ?
sdkp->atomic_granularity :
physical_block_size_sectors);
/*
* Only use atomic boundary when we have the odd scenario of
* sdkp->max_atomic == 0, which the spec does permit.
*/
if (sdkp->max_atomic) {
max_atomic = sdkp->max_atomic;
unit_max = rounddown_pow_of_two(sdkp->max_atomic);
sdkp->use_atomic_write_boundary = 0;
} else {
max_atomic = sdkp->max_atomic_with_boundary;
unit_max = rounddown_pow_of_two(sdkp->max_atomic_boundary);
sdkp->use_atomic_write_boundary = 1;
}
/*
* Ensure compliance with granularity and alignment. For now, keep it
* simple and just don't support atomic writes for values mismatched
* with max_{boundary}atomic, physical block size, and
* atomic_granularity itself.
*
* We're really being distrustful by checking unit_max also...
*/
if (sdkp->atomic_granularity > 1) {
if (unit_min > 1 && unit_min % sdkp->atomic_granularity)
return;
if (unit_max > 1 && unit_max % sdkp->atomic_granularity)
return;
}
if (sdkp->atomic_alignment > 1) {
if (unit_min > 1 && unit_min % sdkp->atomic_alignment)
return;
if (unit_max > 1 && unit_max % sdkp->atomic_alignment)
return;
}
lim->atomic_write_hw_max = max_atomic * logical_block_size;
lim->atomic_write_hw_boundary = 0;
lim->atomic_write_hw_unit_min = unit_min * logical_block_size;
lim->atomic_write_hw_unit_max = unit_max * logical_block_size;
}
static blk_status_t sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, static blk_status_t sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd,
bool unmap) bool unmap)
{ {
@ -1237,6 +1295,26 @@ static int sd_cdl_dld(struct scsi_disk *sdkp, struct scsi_cmnd *scmd)
return (hint - IOPRIO_HINT_DEV_DURATION_LIMIT_1) + 1; return (hint - IOPRIO_HINT_DEV_DURATION_LIMIT_1) + 1;
} }
static blk_status_t sd_setup_atomic_cmnd(struct scsi_cmnd *cmd,
sector_t lba, unsigned int nr_blocks,
bool boundary, unsigned char flags)
{
cmd->cmd_len = 16;
cmd->cmnd[0] = WRITE_ATOMIC_16;
cmd->cmnd[1] = flags;
put_unaligned_be64(lba, &cmd->cmnd[2]);
put_unaligned_be16(nr_blocks, &cmd->cmnd[12]);
if (boundary)
put_unaligned_be16(nr_blocks, &cmd->cmnd[10]);
else
put_unaligned_be16(0, &cmd->cmnd[10]);
put_unaligned_be16(nr_blocks, &cmd->cmnd[12]);
cmd->cmnd[14] = 0;
cmd->cmnd[15] = 0;
return BLK_STS_OK;
}
static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd) static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
{ {
struct request *rq = scsi_cmd_to_rq(cmd); struct request *rq = scsi_cmd_to_rq(cmd);
@ -1302,6 +1380,10 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
if (protect && sdkp->protection_type == T10_PI_TYPE2_PROTECTION) { if (protect && sdkp->protection_type == T10_PI_TYPE2_PROTECTION) {
ret = sd_setup_rw32_cmnd(cmd, write, lba, nr_blocks, ret = sd_setup_rw32_cmnd(cmd, write, lba, nr_blocks,
protect | fua, dld); protect | fua, dld);
} else if (rq->cmd_flags & REQ_ATOMIC && write) {
ret = sd_setup_atomic_cmnd(cmd, lba, nr_blocks,
sdkp->use_atomic_write_boundary,
protect | fua);
} else if (sdp->use_16_for_rw || (nr_blocks > 0xffff)) { } else if (sdp->use_16_for_rw || (nr_blocks > 0xffff)) {
ret = sd_setup_rw16_cmnd(cmd, write, lba, nr_blocks, ret = sd_setup_rw16_cmnd(cmd, write, lba, nr_blocks,
protect | fua, dld); protect | fua, dld);
@ -3264,7 +3346,7 @@ static void sd_read_block_limits(struct scsi_disk *sdkp,
sdkp->max_ws_blocks = (u32)get_unaligned_be64(&vpd->data[36]); sdkp->max_ws_blocks = (u32)get_unaligned_be64(&vpd->data[36]);
if (!sdkp->lbpme) if (!sdkp->lbpme)
goto out; goto config_atomic;
lba_count = get_unaligned_be32(&vpd->data[20]); lba_count = get_unaligned_be32(&vpd->data[20]);
desc_count = get_unaligned_be32(&vpd->data[24]); desc_count = get_unaligned_be32(&vpd->data[24]);
@ -3279,6 +3361,15 @@ static void sd_read_block_limits(struct scsi_disk *sdkp,
get_unaligned_be32(&vpd->data[32]) & ~(1 << 31); get_unaligned_be32(&vpd->data[32]) & ~(1 << 31);
sd_config_discard(sdkp, lim, sd_discard_mode(sdkp)); sd_config_discard(sdkp, lim, sd_discard_mode(sdkp));
config_atomic:
sdkp->max_atomic = get_unaligned_be32(&vpd->data[44]);
sdkp->atomic_alignment = get_unaligned_be32(&vpd->data[48]);
sdkp->atomic_granularity = get_unaligned_be32(&vpd->data[52]);
sdkp->max_atomic_with_boundary = get_unaligned_be32(&vpd->data[56]);
sdkp->max_atomic_boundary = get_unaligned_be32(&vpd->data[60]);
sd_config_atomic(sdkp, lim);
} }
out: out:

View File

@ -115,6 +115,13 @@ struct scsi_disk {
u32 max_unmap_blocks; u32 max_unmap_blocks;
u32 unmap_granularity; u32 unmap_granularity;
u32 unmap_alignment; u32 unmap_alignment;
u32 max_atomic;
u32 atomic_alignment;
u32 atomic_granularity;
u32 max_atomic_with_boundary;
u32 max_atomic_boundary;
u32 index; u32 index;
unsigned int physical_block_size; unsigned int physical_block_size;
unsigned int max_medium_access_timeouts; unsigned int max_medium_access_timeouts;
@ -148,6 +155,7 @@ struct scsi_disk {
unsigned security : 1; unsigned security : 1;
unsigned ignore_medium_access_errors : 1; unsigned ignore_medium_access_errors : 1;
unsigned rscs : 1; /* reduced stream control support */ unsigned rscs : 1; /* reduced stream control support */
unsigned use_atomic_write_boundary : 1;
}; };
#define to_scsi_disk(obj) container_of(obj, struct scsi_disk, disk_dev) #define to_scsi_disk(obj) container_of(obj, struct scsi_disk, disk_dev)

View File

@ -120,6 +120,7 @@
#define WRITE_SAME_16 0x93 #define WRITE_SAME_16 0x93
#define ZBC_OUT 0x94 #define ZBC_OUT 0x94
#define ZBC_IN 0x95 #define ZBC_IN 0x95
#define WRITE_ATOMIC_16 0x9c
#define SERVICE_ACTION_BIDIRECTIONAL 0x9d #define SERVICE_ACTION_BIDIRECTIONAL 0x9d
#define SERVICE_ACTION_IN_16 0x9e #define SERVICE_ACTION_IN_16 0x9e
#define SERVICE_ACTION_OUT_16 0x9f #define SERVICE_ACTION_OUT_16 0x9f

View File

@ -102,6 +102,7 @@
scsi_opcode_name(WRITE_32), \ scsi_opcode_name(WRITE_32), \
scsi_opcode_name(WRITE_SAME_32), \ scsi_opcode_name(WRITE_SAME_32), \
scsi_opcode_name(ATA_16), \ scsi_opcode_name(ATA_16), \
scsi_opcode_name(WRITE_ATOMIC_16), \
scsi_opcode_name(ATA_12)) scsi_opcode_name(ATA_12))
#define scsi_hostbyte_name(result) { result, #result } #define scsi_hostbyte_name(result) { result, #result }