linux/block/t10-pi.c
Christoph Hellwig c6e56cf6b2 block: move integrity information into queue_limits
Move the integrity information into the queue limits so that it can be
set atomically with other queue limits, and that the sysfs changes to
the read_verify and write_generate flags are properly synchronized.
This also allows to provide a more useful helper to stack the integrity
fields, although it still is separate from the main stacking function
as not all stackable devices want to inherit the integrity settings.
Even with that it greatly simplifies the code in md and dm.

Note that the integrity field is moved as-is into the queue limits.
While there are good arguments for removing the separate blk_integrity
structure, this would cause a lot of churn and might better be done at a
later time if desired.  However the integrity field in the queue_limits
structure is now unconditional so that various ifdefs can be avoided or
replaced with IS_ENABLED().  Given that tiny size of it that seems like
a worthwhile trade off.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20240613084839.1044015-13-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2024-06-14 10:20:07 -06:00

425 lines
11 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* t10_pi.c - Functions for generating and verifying T10 Protection
* Information.
*/
#include <linux/t10-pi.h>
#include <linux/blk-integrity.h>
#include <linux/crc-t10dif.h>
#include <linux/crc64.h>
#include <linux/module.h>
#include <net/checksum.h>
#include <asm/unaligned.h>
#include "blk.h"
static __be16 t10_pi_csum(__be16 csum, void *data, unsigned int len,
unsigned char csum_type)
{
if (csum_type == BLK_INTEGRITY_CSUM_IP)
return (__force __be16)ip_compute_csum(data, len);
return cpu_to_be16(crc_t10dif_update(be16_to_cpu(csum), data, len));
}
/*
* Type 1 and Type 2 protection use the same format: 16 bit guard tag,
* 16 bit app tag, 32 bit reference tag. Type 3 does not define the ref
* tag.
*/
static void t10_pi_generate(struct blk_integrity_iter *iter,
struct blk_integrity *bi)
{
u8 offset = bi->pi_offset;
unsigned int i;
for (i = 0 ; i < iter->data_size ; i += iter->interval) {
struct t10_pi_tuple *pi = iter->prot_buf + offset;
pi->guard_tag = t10_pi_csum(0, iter->data_buf, iter->interval,
bi->csum_type);
if (offset)
pi->guard_tag = t10_pi_csum(pi->guard_tag,
iter->prot_buf, offset, bi->csum_type);
pi->app_tag = 0;
if (bi->flags & BLK_INTEGRITY_REF_TAG)
pi->ref_tag = cpu_to_be32(lower_32_bits(iter->seed));
else
pi->ref_tag = 0;
iter->data_buf += iter->interval;
iter->prot_buf += bi->tuple_size;
iter->seed++;
}
}
static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
struct blk_integrity *bi)
{
u8 offset = bi->pi_offset;
unsigned int i;
for (i = 0 ; i < iter->data_size ; i += iter->interval) {
struct t10_pi_tuple *pi = iter->prot_buf + offset;
__be16 csum;
if (bi->flags & BLK_INTEGRITY_REF_TAG) {
if (pi->app_tag == T10_PI_APP_ESCAPE)
goto next;
if (be32_to_cpu(pi->ref_tag) !=
lower_32_bits(iter->seed)) {
pr_err("%s: ref tag error at location %llu " \
"(rcvd %u)\n", iter->disk_name,
(unsigned long long)
iter->seed, be32_to_cpu(pi->ref_tag));
return BLK_STS_PROTECTION;
}
} else {
if (pi->app_tag == T10_PI_APP_ESCAPE &&
pi->ref_tag == T10_PI_REF_ESCAPE)
goto next;
}
csum = t10_pi_csum(0, iter->data_buf, iter->interval,
bi->csum_type);
if (offset)
csum = t10_pi_csum(csum, iter->prot_buf, offset,
bi->csum_type);
if (pi->guard_tag != csum) {
pr_err("%s: guard tag error at sector %llu " \
"(rcvd %04x, want %04x)\n", iter->disk_name,
(unsigned long long)iter->seed,
be16_to_cpu(pi->guard_tag), be16_to_cpu(csum));
return BLK_STS_PROTECTION;
}
next:
iter->data_buf += iter->interval;
iter->prot_buf += bi->tuple_size;
iter->seed++;
}
return BLK_STS_OK;
}
/**
* t10_pi_type1_prepare - prepare PI prior submitting request to device
* @rq: request with PI that should be prepared
*
* For Type 1/Type 2, the virtual start sector is the one that was
* originally submitted by the block layer for the ref_tag usage. Due to
* partitioning, MD/DM cloning, etc. the actual physical start sector is
* likely to be different. Remap protection information to match the
* physical LBA.
*/
static void t10_pi_type1_prepare(struct request *rq)
{
struct blk_integrity *bi = &rq->q->limits.integrity;
const int tuple_sz = bi->tuple_size;
u32 ref_tag = t10_pi_ref_tag(rq);
u8 offset = bi->pi_offset;
struct bio *bio;
__rq_for_each_bio(bio, rq) {
struct bio_integrity_payload *bip = bio_integrity(bio);
u32 virt = bip_get_seed(bip) & 0xffffffff;
struct bio_vec iv;
struct bvec_iter iter;
/* Already remapped? */
if (bip->bip_flags & BIP_MAPPED_INTEGRITY)
break;
bip_for_each_vec(iv, bip, iter) {
unsigned int j;
void *p;
p = bvec_kmap_local(&iv);
for (j = 0; j < iv.bv_len; j += tuple_sz) {
struct t10_pi_tuple *pi = p + offset;
if (be32_to_cpu(pi->ref_tag) == virt)
pi->ref_tag = cpu_to_be32(ref_tag);
virt++;
ref_tag++;
p += tuple_sz;
}
kunmap_local(p);
}
bip->bip_flags |= BIP_MAPPED_INTEGRITY;
}
}
/**
* t10_pi_type1_complete - prepare PI prior returning request to the blk layer
* @rq: request with PI that should be prepared
* @nr_bytes: total bytes to prepare
*
* For Type 1/Type 2, the virtual start sector is the one that was
* originally submitted by the block layer for the ref_tag usage. Due to
* partitioning, MD/DM cloning, etc. the actual physical start sector is
* likely to be different. Since the physical start sector was submitted
* to the device, we should remap it back to virtual values expected by the
* block layer.
*/
static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
{
struct blk_integrity *bi = &rq->q->limits.integrity;
unsigned intervals = nr_bytes >> bi->interval_exp;
const int tuple_sz = bi->tuple_size;
u32 ref_tag = t10_pi_ref_tag(rq);
u8 offset = bi->pi_offset;
struct bio *bio;
__rq_for_each_bio(bio, rq) {
struct bio_integrity_payload *bip = bio_integrity(bio);
u32 virt = bip_get_seed(bip) & 0xffffffff;
struct bio_vec iv;
struct bvec_iter iter;
bip_for_each_vec(iv, bip, iter) {
unsigned int j;
void *p;
p = bvec_kmap_local(&iv);
for (j = 0; j < iv.bv_len && intervals; j += tuple_sz) {
struct t10_pi_tuple *pi = p + offset;
if (be32_to_cpu(pi->ref_tag) == ref_tag)
pi->ref_tag = cpu_to_be32(virt);
virt++;
ref_tag++;
intervals--;
p += tuple_sz;
}
kunmap_local(p);
}
}
}
static __be64 ext_pi_crc64(u64 crc, void *data, unsigned int len)
{
return cpu_to_be64(crc64_rocksoft_update(crc, data, len));
}
static void ext_pi_crc64_generate(struct blk_integrity_iter *iter,
struct blk_integrity *bi)
{
u8 offset = bi->pi_offset;
unsigned int i;
for (i = 0 ; i < iter->data_size ; i += iter->interval) {
struct crc64_pi_tuple *pi = iter->prot_buf + offset;
pi->guard_tag = ext_pi_crc64(0, iter->data_buf, iter->interval);
if (offset)
pi->guard_tag = ext_pi_crc64(be64_to_cpu(pi->guard_tag),
iter->prot_buf, offset);
pi->app_tag = 0;
if (bi->flags & BLK_INTEGRITY_REF_TAG)
put_unaligned_be48(iter->seed, pi->ref_tag);
else
put_unaligned_be48(0ULL, pi->ref_tag);
iter->data_buf += iter->interval;
iter->prot_buf += bi->tuple_size;
iter->seed++;
}
}
static bool ext_pi_ref_escape(u8 *ref_tag)
{
static u8 ref_escape[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
return memcmp(ref_tag, ref_escape, sizeof(ref_escape)) == 0;
}
static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter,
struct blk_integrity *bi)
{
u8 offset = bi->pi_offset;
unsigned int i;
for (i = 0; i < iter->data_size; i += iter->interval) {
struct crc64_pi_tuple *pi = iter->prot_buf + offset;
u64 ref, seed;
__be64 csum;
if (bi->flags & BLK_INTEGRITY_REF_TAG) {
if (pi->app_tag == T10_PI_APP_ESCAPE)
goto next;
ref = get_unaligned_be48(pi->ref_tag);
seed = lower_48_bits(iter->seed);
if (ref != seed) {
pr_err("%s: ref tag error at location %llu (rcvd %llu)\n",
iter->disk_name, seed, ref);
return BLK_STS_PROTECTION;
}
} else {
if (pi->app_tag == T10_PI_APP_ESCAPE &&
ext_pi_ref_escape(pi->ref_tag))
goto next;
}
csum = ext_pi_crc64(0, iter->data_buf, iter->interval);
if (offset)
csum = ext_pi_crc64(be64_to_cpu(csum), iter->prot_buf,
offset);
if (pi->guard_tag != csum) {
pr_err("%s: guard tag error at sector %llu " \
"(rcvd %016llx, want %016llx)\n",
iter->disk_name, (unsigned long long)iter->seed,
be64_to_cpu(pi->guard_tag), be64_to_cpu(csum));
return BLK_STS_PROTECTION;
}
next:
iter->data_buf += iter->interval;
iter->prot_buf += bi->tuple_size;
iter->seed++;
}
return BLK_STS_OK;
}
static void ext_pi_type1_prepare(struct request *rq)
{
struct blk_integrity *bi = &rq->q->limits.integrity;
const int tuple_sz = bi->tuple_size;
u64 ref_tag = ext_pi_ref_tag(rq);
u8 offset = bi->pi_offset;
struct bio *bio;
__rq_for_each_bio(bio, rq) {
struct bio_integrity_payload *bip = bio_integrity(bio);
u64 virt = lower_48_bits(bip_get_seed(bip));
struct bio_vec iv;
struct bvec_iter iter;
/* Already remapped? */
if (bip->bip_flags & BIP_MAPPED_INTEGRITY)
break;
bip_for_each_vec(iv, bip, iter) {
unsigned int j;
void *p;
p = bvec_kmap_local(&iv);
for (j = 0; j < iv.bv_len; j += tuple_sz) {
struct crc64_pi_tuple *pi = p + offset;
u64 ref = get_unaligned_be48(pi->ref_tag);
if (ref == virt)
put_unaligned_be48(ref_tag, pi->ref_tag);
virt++;
ref_tag++;
p += tuple_sz;
}
kunmap_local(p);
}
bip->bip_flags |= BIP_MAPPED_INTEGRITY;
}
}
static void ext_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
{
struct blk_integrity *bi = &rq->q->limits.integrity;
unsigned intervals = nr_bytes >> bi->interval_exp;
const int tuple_sz = bi->tuple_size;
u64 ref_tag = ext_pi_ref_tag(rq);
u8 offset = bi->pi_offset;
struct bio *bio;
__rq_for_each_bio(bio, rq) {
struct bio_integrity_payload *bip = bio_integrity(bio);
u64 virt = lower_48_bits(bip_get_seed(bip));
struct bio_vec iv;
struct bvec_iter iter;
bip_for_each_vec(iv, bip, iter) {
unsigned int j;
void *p;
p = bvec_kmap_local(&iv);
for (j = 0; j < iv.bv_len && intervals; j += tuple_sz) {
struct crc64_pi_tuple *pi = p + offset;
u64 ref = get_unaligned_be48(pi->ref_tag);
if (ref == ref_tag)
put_unaligned_be48(virt, pi->ref_tag);
virt++;
ref_tag++;
intervals--;
p += tuple_sz;
}
kunmap_local(p);
}
}
}
void blk_integrity_generate(struct blk_integrity_iter *iter,
struct blk_integrity *bi)
{
switch (bi->csum_type) {
case BLK_INTEGRITY_CSUM_CRC64:
ext_pi_crc64_generate(iter, bi);
break;
case BLK_INTEGRITY_CSUM_CRC:
case BLK_INTEGRITY_CSUM_IP:
t10_pi_generate(iter, bi);
break;
default:
break;
}
}
blk_status_t blk_integrity_verify(struct blk_integrity_iter *iter,
struct blk_integrity *bi)
{
switch (bi->csum_type) {
case BLK_INTEGRITY_CSUM_CRC64:
return ext_pi_crc64_verify(iter, bi);
case BLK_INTEGRITY_CSUM_CRC:
case BLK_INTEGRITY_CSUM_IP:
return t10_pi_verify(iter, bi);
default:
return BLK_STS_OK;
}
}
void blk_integrity_prepare(struct request *rq)
{
struct blk_integrity *bi = &rq->q->limits.integrity;
if (!(bi->flags & BLK_INTEGRITY_REF_TAG))
return;
if (bi->csum_type == BLK_INTEGRITY_CSUM_CRC64)
ext_pi_type1_prepare(rq);
else
t10_pi_type1_prepare(rq);
}
void blk_integrity_complete(struct request *rq, unsigned int nr_bytes)
{
struct blk_integrity *bi = &rq->q->limits.integrity;
if (!(bi->flags & BLK_INTEGRITY_REF_TAG))
return;
if (bi->csum_type == BLK_INTEGRITY_CSUM_CRC64)
ext_pi_type1_complete(rq, nr_bytes);
else
t10_pi_type1_complete(rq, nr_bytes);
}
MODULE_DESCRIPTION("T10 Protection Information module");
MODULE_LICENSE("GPL");