From 0808ebf2f80b962e75741a41ced372a7116f1e26 Mon Sep 17 00:00:00 2001 From: Ken Raeburn Date: Fri, 26 Jul 2024 15:08:53 -0400 Subject: [PATCH 01/26] dm vdo: don't refer to dedupe_context after releasing it Clear the dedupe_context pointer in a data_vio whenever ownership of the context is lost, so that vdo can't examine it accidentally. Signed-off-by: Ken Raeburn Signed-off-by: Matthew Sakai Signed-off-by: Mikulas Patocka --- drivers/md/dm-vdo/dedupe.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/md/dm-vdo/dedupe.c b/drivers/md/dm-vdo/dedupe.c index 39ac68614419..80628ae93fba 100644 --- a/drivers/md/dm-vdo/dedupe.c +++ b/drivers/md/dm-vdo/dedupe.c @@ -729,6 +729,7 @@ static void process_update_result(struct data_vio *agent) !change_context_state(context, DEDUPE_CONTEXT_COMPLETE, DEDUPE_CONTEXT_IDLE)) return; + agent->dedupe_context = NULL; release_context(context); } @@ -1648,6 +1649,7 @@ static void process_query_result(struct data_vio *agent) if (change_context_state(context, DEDUPE_CONTEXT_COMPLETE, DEDUPE_CONTEXT_IDLE)) { agent->is_duplicate = decode_uds_advice(context); + agent->dedupe_context = NULL; release_context(context); } } @@ -2321,6 +2323,7 @@ static void timeout_index_operations_callback(struct vdo_completion *completion) * send its requestor on its way. */ list_del_init(&context->list_entry); + context->requestor->dedupe_context = NULL; continue_data_vio(context->requestor); timed_out++; } From 3a59b2ec2400017f7fcc5d794802ebc7f4187d22 Mon Sep 17 00:00:00 2001 From: Ken Raeburn Date: Tue, 6 Aug 2024 05:13:19 -0400 Subject: [PATCH 02/26] dm vdo: remove bad check of bi_next field Remove this check to prevent spurious warning messages due to the behavior of other storage layers. This check was intended to make sure dm-vdo does not chain metadata bios together. However, vdo has no control over underlying storage layers, so the assertion is not always true. Signed-off-by: Ken Raeburn Signed-off-by: Matthew Sakai Signed-off-by: Mikulas Patocka --- drivers/md/dm-vdo/io-submitter.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/md/dm-vdo/io-submitter.c b/drivers/md/dm-vdo/io-submitter.c index 9a3716bb3c05..ab62abe18827 100644 --- a/drivers/md/dm-vdo/io-submitter.c +++ b/drivers/md/dm-vdo/io-submitter.c @@ -346,7 +346,6 @@ void __submit_metadata_vio(struct vio *vio, physical_block_number_t physical, VDO_ASSERT_LOG_ONLY(!code->quiescent, "I/O not allowed in state %s", code->name); - VDO_ASSERT_LOG_ONLY(vio->bio->bi_next == NULL, "metadata bio has no next bio"); vdo_reset_completion(completion); completion->error_handler = error_handler; From 47874c98dc0873aad65259d09f9b9029e97429e3 Mon Sep 17 00:00:00 2001 From: Bruce Johnston Date: Thu, 18 Jul 2024 14:02:38 -0400 Subject: [PATCH 03/26] dm vdo: add dmsetup message for returning configuration info Add a new dmsetup message called config, which will return useful configuration information for the vdo volume and the uds index associated with it. The output is a YAML string, and contains a version number to allow future additions to the content. Signed-off-by: Bruce Johnston Signed-off-by: Matthew Sakai Signed-off-by: Mikulas Patocka --- .../admin-guide/device-mapper/vdo.rst | 7 ++- drivers/md/dm-vdo/dm-vdo-target.c | 5 +- drivers/md/dm-vdo/message-stats.c | 48 +++++++++++++++++++ drivers/md/dm-vdo/message-stats.h | 1 + 4 files changed, 59 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/device-mapper/vdo.rst b/Documentation/admin-guide/device-mapper/vdo.rst index c69ac186863a..a14e6d3e787c 100644 --- a/Documentation/admin-guide/device-mapper/vdo.rst +++ b/Documentation/admin-guide/device-mapper/vdo.rst @@ -251,7 +251,12 @@ The messages are: by the vdostats userspace program to interpret the output buffer. - dump: + config: + Outputs useful vdo configuration information. Mostly used + by users who want to recreate a similar VDO volume and + want to know the creation configuration used. + + dump: Dumps many internal structures to the system log. This is not always safe to run, so it should only be used to debug a hung vdo. Optional parameters to specify structures to diff --git a/drivers/md/dm-vdo/dm-vdo-target.c b/drivers/md/dm-vdo/dm-vdo-target.c index dd05691e4097..4b94f5718ed7 100644 --- a/drivers/md/dm-vdo/dm-vdo-target.c +++ b/drivers/md/dm-vdo/dm-vdo-target.c @@ -1105,6 +1105,9 @@ static int vdo_message(struct dm_target *ti, unsigned int argc, char **argv, if ((argc == 1) && (strcasecmp(argv[0], "stats") == 0)) { vdo_write_stats(vdo, result_buffer, maxlen); result = 1; + } else if ((argc == 1) && (strcasecmp(argv[0], "config") == 0)) { + vdo_write_config(vdo, &result_buffer, &maxlen); + result = 1; } else { result = vdo_status_to_errno(process_vdo_message(vdo, argc, argv)); } @@ -2832,7 +2835,7 @@ static void vdo_resume(struct dm_target *ti) static struct target_type vdo_target_bio = { .features = DM_TARGET_SINGLETON, .name = "vdo", - .version = { 9, 0, 0 }, + .version = { 9, 1, 0 }, .module = THIS_MODULE, .ctr = vdo_ctr, .dtr = vdo_dtr, diff --git a/drivers/md/dm-vdo/message-stats.c b/drivers/md/dm-vdo/message-stats.c index 2802cf92922b..75dfcd7c5f63 100644 --- a/drivers/md/dm-vdo/message-stats.c +++ b/drivers/md/dm-vdo/message-stats.c @@ -4,6 +4,7 @@ */ #include "dedupe.h" +#include "indexer.h" #include "logger.h" #include "memory-alloc.h" #include "message-stats.h" @@ -430,3 +431,50 @@ int vdo_write_stats(struct vdo *vdo, char *buf, unsigned int maxlen) vdo_free(stats); return VDO_SUCCESS; } + +static void write_index_memory(u32 mem, char **buf, unsigned int *maxlen) +{ + char *prefix = "memorySize : "; + + /* Convert index memory to fractional value */ + if (mem == (u32)UDS_MEMORY_CONFIG_256MB) + write_string(prefix, "0.25, ", NULL, buf, maxlen); + else if (mem == (u32)UDS_MEMORY_CONFIG_512MB) + write_string(prefix, "0.50, ", NULL, buf, maxlen); + else if (mem == (u32)UDS_MEMORY_CONFIG_768MB) + write_string(prefix, "0.75, ", NULL, buf, maxlen); + else + write_u32(prefix, mem, ", ", buf, maxlen); +} + +static void write_index_config(struct index_config *config, char **buf, + unsigned int *maxlen) +{ + write_string("index : ", "{ ", NULL, buf, maxlen); + /* index mem size */ + write_index_memory(config->mem, buf, maxlen); + /* whether the index is sparse or not */ + write_bool("isSparse : ", config->sparse, ", ", buf, maxlen); + write_string(NULL, "}", ", ", buf, maxlen); +} + +int vdo_write_config(struct vdo *vdo, char **buf, unsigned int *maxlen) +{ + struct vdo_config *config = &vdo->states.vdo.config; + + write_string(NULL, "{ ", NULL, buf, maxlen); + /* version */ + write_u32("version : ", 1, ", ", buf, maxlen); + /* physical size */ + write_block_count_t("physicalSize : ", config->physical_blocks * VDO_BLOCK_SIZE, ", ", + buf, maxlen); + /* logical size */ + write_block_count_t("logicalSize : ", config->logical_blocks * VDO_BLOCK_SIZE, ", ", + buf, maxlen); + /* slab size */ + write_block_count_t("slabSize : ", config->slab_size, ", ", buf, maxlen); + /* index config */ + write_index_config(&vdo->geometry.index_config, buf, maxlen); + write_string(NULL, "}", NULL, buf, maxlen); + return VDO_SUCCESS; +} diff --git a/drivers/md/dm-vdo/message-stats.h b/drivers/md/dm-vdo/message-stats.h index f7fceca9acab..f9c95eff569d 100644 --- a/drivers/md/dm-vdo/message-stats.h +++ b/drivers/md/dm-vdo/message-stats.h @@ -8,6 +8,7 @@ #include "types.h" +int vdo_write_config(struct vdo *vdo, char **buf, unsigned int *maxlen); int vdo_write_stats(struct vdo *vdo, char *buf, unsigned int maxlen); #endif /* VDO_MESSAGE_STATS_H */ From f3ff668352c59db2c0ab47f2e86488a70694ed57 Mon Sep 17 00:00:00 2001 From: Susan LeGendre-McGhee Date: Fri, 19 Jul 2024 17:52:32 -0400 Subject: [PATCH 04/26] dm vdo: abort loading dirty VDO with the old recovery journal format Abort the load process with status code VDO_UNSUPPORTED_VERSION without forcing read-only mode when a journal block with the old format version is detected. Forcing the VDO volume into read-only mode and thus requiring a read-only rebuild should only be done when absolutely necessary. Signed-off-by: Susan LeGendre-McGhee Signed-off-by: Matthew Sakai Signed-off-by: Mikulas Patocka --- drivers/md/dm-vdo/dm-vdo-target.c | 24 +++++++++++++++++++++++- drivers/md/dm-vdo/repair.c | 4 +--- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-vdo/dm-vdo-target.c b/drivers/md/dm-vdo/dm-vdo-target.c index 4b94f5718ed7..0e04c2021682 100644 --- a/drivers/md/dm-vdo/dm-vdo-target.c +++ b/drivers/md/dm-vdo/dm-vdo-target.c @@ -2296,6 +2296,14 @@ static void handle_load_error(struct vdo_completion *completion) return; } + if ((completion->result == VDO_UNSUPPORTED_VERSION) && + (vdo->admin.phase == LOAD_PHASE_MAKE_DIRTY)) { + vdo_log_error("Aborting load due to unsupported version"); + vdo->admin.phase = LOAD_PHASE_FINISHED; + load_callback(completion); + return; + } + vdo_log_error_strerror(completion->result, "Entering read-only mode due to load error"); vdo->admin.phase = LOAD_PHASE_WAIT_FOR_READ_ONLY; @@ -2740,6 +2748,19 @@ static int vdo_preresume_registered(struct dm_target *ti, struct vdo *vdo) vdo_log_info("starting device '%s'", device_name); result = perform_admin_operation(vdo, LOAD_PHASE_START, load_callback, handle_load_error, "load"); + if (result == VDO_UNSUPPORTED_VERSION) { + /* + * A component version is not supported. This can happen when the + * recovery journal metadata is in an old version format. Abort the + * load without saving the state. + */ + vdo->suspend_type = VDO_ADMIN_STATE_SUSPENDING; + perform_admin_operation(vdo, SUSPEND_PHASE_START, + suspend_callback, suspend_callback, + "suspend"); + return result; + } + if ((result != VDO_SUCCESS) && (result != VDO_READ_ONLY)) { /* * Something has gone very wrong. Make sure everything has drained and @@ -2811,7 +2832,8 @@ static int vdo_preresume(struct dm_target *ti) vdo_register_thread_device_id(&instance_thread, &vdo->instance); result = vdo_preresume_registered(ti, vdo); - if ((result == VDO_PARAMETER_MISMATCH) || (result == VDO_INVALID_ADMIN_STATE)) + if ((result == VDO_PARAMETER_MISMATCH) || (result == VDO_INVALID_ADMIN_STATE) || + (result == VDO_UNSUPPORTED_VERSION)) result = -EINVAL; vdo_unregister_thread_device_id(); return vdo_status_to_errno(result); diff --git a/drivers/md/dm-vdo/repair.c b/drivers/md/dm-vdo/repair.c index 7e0009d2f67d..8aae78bb8a05 100644 --- a/drivers/md/dm-vdo/repair.c +++ b/drivers/md/dm-vdo/repair.c @@ -1575,9 +1575,7 @@ static int parse_journal_for_recovery(struct repair_completion *repair) if (header.metadata_type == VDO_METADATA_RECOVERY_JOURNAL) { /* This is an old format block, so we need to upgrade */ vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION, - "Recovery journal is in the old format, a read-only rebuild is required."); - vdo_enter_read_only_mode(repair->completion.vdo, - VDO_UNSUPPORTED_VERSION); + "Recovery journal is in the old format. Downgrade and complete recovery, then upgrade with a clean volume"); return VDO_UNSUPPORTED_VERSION; } From 448c4e4eb1901543259c038ed08266c250f5b079 Mon Sep 17 00:00:00 2001 From: Susan LeGendre-McGhee Date: Tue, 30 Jul 2024 17:18:07 -0400 Subject: [PATCH 05/26] dm vdo: force read-only mode for a corrupt recovery journal Ensure the recovery journal does not attempt recovery when blocks with mismatched metadata versions are detected. This check is performed after determining that the blocks are otherwise valid so that it does not interfere with normal recovery. Signed-off-by: Susan LeGendre-McGhee Signed-off-by: Matthew Sakai Signed-off-by: Mikulas Patocka --- drivers/md/dm-vdo/repair.c | 39 ++++++++++++++++++-------------- drivers/md/dm-vdo/status-codes.c | 2 +- drivers/md/dm-vdo/status-codes.h | 2 +- 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/drivers/md/dm-vdo/repair.c b/drivers/md/dm-vdo/repair.c index 8aae78bb8a05..ffff2c999518 100644 --- a/drivers/md/dm-vdo/repair.c +++ b/drivers/md/dm-vdo/repair.c @@ -1202,17 +1202,14 @@ static bool __must_check is_valid_recovery_journal_block(const struct recovery_j * @journal: The journal to use. * @header: The unpacked block header to check. * @sequence: The expected sequence number. - * @type: The expected metadata type. * * Return: True if the block matches. */ static bool __must_check is_exact_recovery_journal_block(const struct recovery_journal *journal, const struct recovery_block_header *header, - sequence_number_t sequence, - enum vdo_metadata_type type) + sequence_number_t sequence) { - return ((header->metadata_type == type) && - (header->sequence_number == sequence) && + return ((header->sequence_number == sequence) && (is_valid_recovery_journal_block(journal, header, true))); } @@ -1371,7 +1368,8 @@ static void extract_entries_from_block(struct repair_completion *repair, get_recovery_journal_block_header(journal, repair->journal_data, sequence); - if (!is_exact_recovery_journal_block(journal, &header, sequence, format)) { + if (!is_exact_recovery_journal_block(journal, &header, sequence) || + (header.metadata_type != format)) { /* This block is invalid, so skip it. */ return; } @@ -1557,10 +1555,13 @@ static int parse_journal_for_recovery(struct repair_completion *repair) sequence_number_t i, head; bool found_entries = false; struct recovery_journal *journal = repair->completion.vdo->recovery_journal; + struct recovery_block_header header; + enum vdo_metadata_type expected_format; head = min(repair->block_map_head, repair->slab_journal_head); + header = get_recovery_journal_block_header(journal, repair->journal_data, head); + expected_format = header.metadata_type; for (i = head; i <= repair->highest_tail; i++) { - struct recovery_block_header header; journal_entry_count_t block_entries; u8 j; @@ -1572,17 +1573,15 @@ static int parse_journal_for_recovery(struct repair_completion *repair) }; header = get_recovery_journal_block_header(journal, repair->journal_data, i); - if (header.metadata_type == VDO_METADATA_RECOVERY_JOURNAL) { - /* This is an old format block, so we need to upgrade */ - vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION, - "Recovery journal is in the old format. Downgrade and complete recovery, then upgrade with a clean volume"); - return VDO_UNSUPPORTED_VERSION; - } - - if (!is_exact_recovery_journal_block(journal, &header, i, - VDO_METADATA_RECOVERY_JOURNAL_2)) { + if (!is_exact_recovery_journal_block(journal, &header, i)) { /* A bad block header was found so this must be the end of the journal. */ break; + } else if (header.metadata_type != expected_format) { + /* There is a mix of old and new format blocks, so we need to rebuild. */ + vdo_log_error_strerror(VDO_CORRUPT_JOURNAL, + "Recovery journal is in an invalid format, a read-only rebuild is required."); + vdo_enter_read_only_mode(repair->completion.vdo, VDO_CORRUPT_JOURNAL); + return VDO_CORRUPT_JOURNAL; } block_entries = header.entry_count; @@ -1618,8 +1617,14 @@ static int parse_journal_for_recovery(struct repair_completion *repair) break; } - if (!found_entries) + if (!found_entries) { return validate_heads(repair); + } else if (expected_format == VDO_METADATA_RECOVERY_JOURNAL) { + /* All journal blocks have the old format, so we need to upgrade. */ + vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION, + "Recovery journal is in the old format. Downgrade and complete recovery, then upgrade with a clean volume"); + return VDO_UNSUPPORTED_VERSION; + } /* Set the tail to the last valid tail block, if there is one. */ if (repair->tail_recovery_point.sector_count == 0) diff --git a/drivers/md/dm-vdo/status-codes.c b/drivers/md/dm-vdo/status-codes.c index d3493450b169..dd252d660b6d 100644 --- a/drivers/md/dm-vdo/status-codes.c +++ b/drivers/md/dm-vdo/status-codes.c @@ -28,7 +28,7 @@ const struct error_info vdo_status_list[] = { { "VDO_LOCK_ERROR", "A lock is held incorrectly" }, { "VDO_READ_ONLY", "The device is in read-only mode" }, { "VDO_SHUTTING_DOWN", "The device is shutting down" }, - { "VDO_CORRUPT_JOURNAL", "Recovery journal entries corrupted" }, + { "VDO_CORRUPT_JOURNAL", "Recovery journal corrupted" }, { "VDO_TOO_MANY_SLABS", "Exceeds maximum number of slabs supported" }, { "VDO_INVALID_FRAGMENT", "Compressed block fragment is invalid" }, { "VDO_RETRY_AFTER_REBUILD", "Retry operation after rebuilding finishes" }, diff --git a/drivers/md/dm-vdo/status-codes.h b/drivers/md/dm-vdo/status-codes.h index 72da04159f88..426dc8e2ca5d 100644 --- a/drivers/md/dm-vdo/status-codes.h +++ b/drivers/md/dm-vdo/status-codes.h @@ -52,7 +52,7 @@ enum vdo_status_codes { VDO_READ_ONLY, /* the VDO is shutting down */ VDO_SHUTTING_DOWN, - /* the recovery journal has corrupt entries */ + /* the recovery journal has corrupt entries or corrupt metadata */ VDO_CORRUPT_JOURNAL, /* exceeds maximum number of slabs supported */ VDO_TOO_MANY_SLABS, From 013f510d1fce563f37f420bf231b065885a16f21 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Thu, 1 Aug 2024 21:31:21 +0800 Subject: [PATCH 06/26] dm: Remove unused declaration dm_get_rq_mapinfo() Commit ae6ad75e5c3c ("dm: remove unused dm_get_rq_mapinfo()") removed the implementation but leave declaration. Signed-off-by: Yue Haibing Reviewed-by: Damien Le Moal Signed-off-by: Mikulas Patocka --- include/linux/device-mapper.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 53ca3a913d06..8321f65897f3 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -524,7 +524,6 @@ int dm_post_suspending(struct dm_target *ti); int dm_noflush_suspending(struct dm_target *ti); void dm_accept_partial_bio(struct bio *bio, unsigned int n_sectors); void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone); -union map_info *dm_get_rq_mapinfo(struct request *rq); #ifdef CONFIG_BLK_DEV_ZONED struct dm_report_zones_args { From 5d3691a8266e77fd2d695064532d0926af974331 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Thu, 8 Aug 2024 13:50:36 +0200 Subject: [PATCH 07/26] dm delay: enhance kernel documentation This commit improves documentation of the dm-delay target. Signed-off-by: Heinz Mauelshagen Signed-off-by: Mikulas Patocka --- .../admin-guide/device-mapper/delay.rst | 43 ++++++++++++++----- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/Documentation/admin-guide/device-mapper/delay.rst b/Documentation/admin-guide/device-mapper/delay.rst index 917ba8c33359..4d667228e744 100644 --- a/Documentation/admin-guide/device-mapper/delay.rst +++ b/Documentation/admin-guide/device-mapper/delay.rst @@ -3,29 +3,52 @@ dm-delay ======== Device-Mapper's "delay" target delays reads and/or writes -and maps them to different devices. +and/or flushs and optionally maps them to different devices. -Parameters:: +Arguments:: [ [ ]] -With separate write parameters, the first set is only used for reads. +Table line has to either have 3, 6 or 9 arguments: + +3: apply offset and delay to read, write and flush operations on device + +6: apply offset and delay to device, also apply write_offset and write_delay + to write and flush operations on optionally different write_device with + optionally different sector offset + +9: same as 6 arguments plus define flush_offset and flush_delay explicitely + on/with optionally different flush_device/flush_offset. + Offsets are specified in sectors. + Delays are specified in milliseconds. + Example scripts =============== :: - #!/bin/sh - # Create device delaying rw operation for 500ms - echo "0 `blockdev --getsz $1` delay $1 0 500" | dmsetup create delayed + # + # Create mapped device named "delayed" delaying read, write and flush operations for 500ms. + # + dmsetup create delayed --table "0 `blockdev --getsz $1` delay $1 0 500" :: - #!/bin/sh - # Create device delaying only write operation for 500ms and - # splitting reads and writes to different devices $1 $2 - echo "0 `blockdev --getsz $1` delay $1 0 0 $2 0 500" | dmsetup create delayed + # + # Create mapped device delaying write and flush operations for 400ms and + # splitting reads to device $1 but writes and flushs to different device $2 + # to different offsets of 2048 and 4096 sectors respectively. + # + dmsetup create delayed --table "0 `blockdev --getsz $1` delay $1 2048 0 $2 4096 400" + +:: + #!/bin/sh + # + # Create mapped device delaying reads for 50ms, writes for 100ms and flushs for 333ms + # onto the same backing device at offset 0 sectors. + # + dmsetup create delayed --table "0 `blockdev --getsz $1` delay $1 0 50 $2 0 100 $1 0 333" From f3631ae11d4694e2befff9dd10dab8cd56033f6c Mon Sep 17 00:00:00 2001 From: Zhang Zekun Date: Mon, 12 Aug 2024 19:53:09 +0800 Subject: [PATCH 08/26] dm: Remove unused declaration and empty definition "dm_zone_map_bio" dm_zone_map_bio() has beed removed since commit f211268ed1f9 ("dm: Use the block layer zone append emulation"), remain the declaration unused in header files. So, let's remove this unused declaration and empty definition. Signed-off-by: Zhang Zekun Reviewed-by: Damien Le Moal Signed-off-by: Mikulas Patocka --- drivers/md/dm.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/md/dm.h b/drivers/md/dm.h index cc466ad5cb1d..8ad782249af8 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -109,7 +109,6 @@ void dm_zone_endio(struct dm_io *io, struct bio *clone); int dm_blk_report_zones(struct gendisk *disk, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); bool dm_is_zone_write(struct mapped_device *md, struct bio *bio); -int dm_zone_map_bio(struct dm_target_io *io); int dm_zone_get_reset_bitmap(struct mapped_device *md, struct dm_table *t, sector_t sector, unsigned int nr_zones, unsigned long *need_reset); @@ -119,10 +118,6 @@ static inline bool dm_is_zone_write(struct mapped_device *md, struct bio *bio) { return false; } -static inline int dm_zone_map_bio(struct dm_target_io *tio) -{ - return DM_MAPIO_KILL; -} #endif /* From 4441686b24a1d7acf9834ca95864d67e3f97666a Mon Sep 17 00:00:00 2001 From: Ingo Franzki Date: Fri, 16 Aug 2024 13:21:33 +0200 Subject: [PATCH 09/26] dm-crypt: Allow to specify the integrity key size as option For the MAC based integrity operation, the integrity key size (i.e. key_mac_size) is currently set to the digest size of the used digest. For wrapped key HMAC algorithms, the key size is independent of the cryptographic key size. So there is no known size of the mac key in such cases. The desired key size can optionally be specified as argument when the dm-crypt device is configured via 'integrity_key_size:%u'. If no integrity_key_size argument is specified, the mac key size is still set to the digest size, as before. Increase version number to 1.28.0 so that support for the new argument can be detected by user space (i.e. cryptsetup). Signed-off-by: Ingo Franzki Reviewed-by: Milan Broz Signed-off-by: Mikulas Patocka --- .../admin-guide/device-mapper/dm-crypt.rst | 4 ++++ drivers/md/dm-crypt.c | 19 +++++++++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/device-mapper/dm-crypt.rst b/Documentation/admin-guide/device-mapper/dm-crypt.rst index 552c9155165d..cde1d5594b0d 100644 --- a/Documentation/admin-guide/device-mapper/dm-crypt.rst +++ b/Documentation/admin-guide/device-mapper/dm-crypt.rst @@ -160,6 +160,10 @@ iv_large_sectors The must be multiple of (in 512 bytes units) if this flag is specified. +integrity_key_size: + Use an integrity key of size instead of using an integrity key size + of the digest size of the used HMAC algorithm. + Module parameters:: diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 348b4b26c272..d5533b43054e 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -147,6 +147,7 @@ enum cipher_flags { CRYPT_MODE_INTEGRITY_AEAD, /* Use authenticated mode for cipher */ CRYPT_IV_LARGE_SECTORS, /* Calculate IV from sector_size, not 512B sectors */ CRYPT_ENCRYPT_PREPROCESS, /* Must preprocess data for encryption (elephant) */ + CRYPT_KEY_MAC_SIZE_SET, /* The integrity_key_size option was used */ }; /* @@ -2937,7 +2938,8 @@ static int crypt_ctr_auth_cipher(struct crypt_config *cc, char *cipher_api) if (IS_ERR(mac)) return PTR_ERR(mac); - cc->key_mac_size = crypto_ahash_digestsize(mac); + if (!test_bit(CRYPT_KEY_MAC_SIZE_SET, &cc->cipher_flags)) + cc->key_mac_size = crypto_ahash_digestsize(mac); crypto_free_ahash(mac); cc->authenc_key = kmalloc(crypt_authenckey_size(cc), GFP_KERNEL); @@ -3219,6 +3221,13 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar cc->cipher_auth = kstrdup(sval, GFP_KERNEL); if (!cc->cipher_auth) return -ENOMEM; + } else if (sscanf(opt_string, "integrity_key_size:%u%c", &val, &dummy) == 1) { + if (!val) { + ti->error = "Invalid integrity_key_size argument"; + return -EINVAL; + } + cc->key_mac_size = val; + set_bit(CRYPT_KEY_MAC_SIZE_SET, &cc->cipher_flags); } else if (sscanf(opt_string, "sector_size:%hu%c", &cc->sector_size, &dummy) == 1) { if (cc->sector_size < (1 << SECTOR_SHIFT) || cc->sector_size > 4096 || @@ -3607,10 +3616,10 @@ static void crypt_status(struct dm_target *ti, status_type_t type, num_feature_args += test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags); num_feature_args += test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags); num_feature_args += test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags); + num_feature_args += !!cc->used_tag_size; num_feature_args += cc->sector_size != (1 << SECTOR_SHIFT); num_feature_args += test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags); - if (cc->used_tag_size) - num_feature_args++; + num_feature_args += test_bit(CRYPT_KEY_MAC_SIZE_SET, &cc->cipher_flags); if (num_feature_args) { DMEMIT(" %d", num_feature_args); if (ti->num_discard_bios) @@ -3631,6 +3640,8 @@ static void crypt_status(struct dm_target *ti, status_type_t type, DMEMIT(" sector_size:%d", cc->sector_size); if (test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags)) DMEMIT(" iv_large_sectors"); + if (test_bit(CRYPT_KEY_MAC_SIZE_SET, &cc->cipher_flags)) + DMEMIT(" integrity_key_size:%u", cc->key_mac_size); } break; @@ -3758,7 +3769,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type crypt_target = { .name = "crypt", - .version = {1, 27, 0}, + .version = {1, 28, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, From 02c0207ecdcc9abdf3442676154f8fd2be0f649a Mon Sep 17 00:00:00 2001 From: Yuesong Li Date: Thu, 22 Aug 2024 10:14:00 +0800 Subject: [PATCH 10/26] dm bufio: Remove NULL check of list_entry() list_entry() will never return a NULL pointer, thus remove the check. Signed-off-by: Yuesong Li Signed-off-by: Mikulas Patocka --- drivers/md/dm-bufio.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 098bf526136c..d478aafa02c9 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -529,9 +529,6 @@ static struct dm_buffer *list_to_buffer(struct list_head *l) { struct lru_entry *le = list_entry(l, struct lru_entry, list); - if (!le) - return NULL; - return le_to_buffer(le); } From 00565cff01262c888512bffe9d41e4831a6f2cc7 Mon Sep 17 00:00:00 2001 From: Yuesong Li Date: Fri, 30 Aug 2024 11:16:42 +0800 Subject: [PATCH 11/26] dm: Convert to use ERR_CAST() Use ERR_CAST() as it is designed for casting an error pointer to another type. This macro utilizes the __force and __must_check modifiers, which instruct the compiler to verify for errors at the locations where it is employed. Signed-off-by: Yuesong Li Signed-off-by: Mikulas Patocka --- drivers/md/dm-thin.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index a0c1620e90c8..89632ce97760 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2948,7 +2948,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, pmd = dm_pool_metadata_open(metadata_dev, block_size, format_device); if (IS_ERR(pmd)) { *error = "Error creating metadata object"; - return (struct pool *)pmd; + return ERR_CAST(pmd); } pool = kzalloc(sizeof(*pool), GFP_KERNEL); From 35c9f09b5691d6dff1f3e62fe1825fa10b644b45 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Sat, 31 Aug 2024 17:48:01 +0800 Subject: [PATCH 12/26] dm integrity: Remove extra unlikely helper In IS_ERR, the unlikely is used for the input parameter, so these is no need to use it again outside. Signed-off-by: Hongbo Li Signed-off-by: Kunwu Chan Signed-off-by: Mikulas Patocka --- drivers/md/dm-integrity.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 51e6964c1305..8306f8511078 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -2447,7 +2447,7 @@ retry: bio->bi_iter.bi_sector += ic->start + SB_SECTORS; bip = bio_integrity_alloc(bio, GFP_NOIO, 1); - if (unlikely(IS_ERR(bip))) { + if (IS_ERR(bip)) { bio->bi_status = errno_to_blk_status(PTR_ERR(bip)); bio_endio(bio); return DM_MAPIO_SUBMITTED; @@ -2520,7 +2520,7 @@ static void dm_integrity_inline_recheck(struct work_struct *w) } bip = bio_integrity_alloc(outgoing_bio, GFP_NOIO, 1); - if (unlikely(IS_ERR(bip))) { + if (IS_ERR(bip)) { bio_put(outgoing_bio); bio->bi_status = errno_to_blk_status(PTR_ERR(bip)); bio_endio(bio); From 26207c6332e83583a74228da9e5278d4fe5d26cf Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Mon, 2 Sep 2024 21:11:23 +0800 Subject: [PATCH 13/26] dm: Make use of __assign_bit() API We have for some time the __assign_bit() API to replace open coded if (foo) __set_bit(n, bar); else __clear_bit(n, bar); Use this API to simplify the code. No functional change intended. Signed-off-by: Hongbo Li Signed-off-by: Mikulas Patocka --- drivers/md/dm-clone-metadata.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/md/dm-clone-metadata.c b/drivers/md/dm-clone-metadata.c index 2db84cd2202b..14c5c28d938b 100644 --- a/drivers/md/dm-clone-metadata.c +++ b/drivers/md/dm-clone-metadata.c @@ -530,10 +530,7 @@ static int __load_bitset_in_core(struct dm_clone_metadata *cmd) return r; for (i = 0; ; i++) { - if (dm_bitset_cursor_get_value(&c)) - __set_bit(i, cmd->region_map); - else - __clear_bit(i, cmd->region_map); + __assign_bit(i, cmd->region_map, dm_bitset_cursor_get_value(&c)); if (i >= (cmd->nr_regions - 1)) break; From a8fa6483b40943a6c8feea803a2dc8e9982cc766 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 3 Sep 2024 11:50:11 +0200 Subject: [PATCH 14/26] dm integrity: fix gcc 5 warning This commit fixes gcc 5 warning "logical not is only applied to the left hand side of comparison" Reported-by: Geert Uytterhoeven Fixes: fb0987682c62 ("dm-integrity: introduce the Inline mode") Signed-off-by: Mikulas Patocka --- drivers/md/dm-integrity.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 8306f8511078..b3489d3fe7db 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -4715,13 +4715,18 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv ti->error = "Block size doesn't match the information in superblock"; goto bad; } - if (!le32_to_cpu(ic->sb->journal_sections) != (ic->mode == 'I')) { - r = -EINVAL; - if (ic->mode != 'I') + if (ic->mode != 'I') { + if (!le32_to_cpu(ic->sb->journal_sections)) { + r = -EINVAL; ti->error = "Corrupted superblock, journal_sections is 0"; - else + goto bad; + } + } else { + if (le32_to_cpu(ic->sb->journal_sections)) { + r = -EINVAL; ti->error = "Corrupted superblock, journal_sections is not 0"; - goto bad; + goto bad; + } } /* make sure that ti->max_io_len doesn't overflow */ if (!ic->meta_dev) { From a7722b82c2ca9ca771d6c698643883be76f61a7e Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Thu, 5 Sep 2024 10:28:32 +0800 Subject: [PATCH 15/26] dm integrity: Convert comma to semicolon Replace comma between expressions with semicolons. Using a ',' in place of a ';' can have unintended side effects. Although that is not the case here, it is seems best to use ';' unless ',' is intended. Found by inspection. No functional change intended. Compile tested only. Signed-off-by: Chen Ni Reviewed-by: Mike Snitzer Signed-off-by: Mikulas Patocka --- drivers/md/dm-integrity.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index b3489d3fe7db..34fa98efa9fa 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1500,15 +1500,15 @@ static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_dat if (!ic->meta_dev) flush_data = false; if (flush_data) { - fr.io_req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC, - fr.io_req.mem.type = DM_IO_KMEM, - fr.io_req.mem.ptr.addr = NULL, - fr.io_req.notify.fn = flush_notify, + fr.io_req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; + fr.io_req.mem.type = DM_IO_KMEM; + fr.io_req.mem.ptr.addr = NULL; + fr.io_req.notify.fn = flush_notify; fr.io_req.notify.context = &fr; - fr.io_req.client = dm_bufio_get_dm_io_client(ic->bufio), - fr.io_reg.bdev = ic->dev->bdev, - fr.io_reg.sector = 0, - fr.io_reg.count = 0, + fr.io_req.client = dm_bufio_get_dm_io_client(ic->bufio); + fr.io_reg.bdev = ic->dev->bdev; + fr.io_reg.sector = 0; + fr.io_reg.count = 0; fr.ic = ic; init_completion(&fr.comp); r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL, IOPRIO_DEFAULT); From 90da77987dd59c8f6ec6d508d23d5a77c7af64f1 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 5 Sep 2024 21:01:52 +0200 Subject: [PATCH 16/26] dm-integrity: support recalculation in the 'I' mode In the kernel 6.11, dm-integrity was enhanced with an inline ('I') mode. This mode uses devices with non-power-of-2 sector size. The extra metadata after each sector are used to hold the integrity hash. This commit enhances the inline mode, so that there is automatic recalculation of the integrity hashes when the 'reclaculate' parameter is used. It allows us to activate the device instantly, and the recalculation is done on background. If the device is deactivated while recalculation is in progress, it will remember the point where it stopped and it will continue from this point when activated again. Signed-off-by: Mikulas Patocka --- drivers/md/dm-integrity.c | 288 ++++++++++++++++++++++++++++++++------ 1 file changed, 246 insertions(+), 42 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 34fa98efa9fa..c40df05e0521 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -284,6 +284,7 @@ struct dm_integrity_c { mempool_t recheck_pool; struct bio_set recheck_bios; + struct bio_set recalc_bios; struct notifier_block reboot_notifier; }; @@ -321,7 +322,9 @@ struct dm_integrity_io { struct dm_bio_details bio_details; char *integrity_payload; + unsigned payload_len; bool integrity_payload_from_mempool; + bool integrity_range_locked; }; struct journal_completion { @@ -359,7 +362,7 @@ static struct kmem_cache *journal_io_cache; #endif static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map); -static int dm_integrity_map_inline(struct dm_integrity_io *dio); +static int dm_integrity_map_inline(struct dm_integrity_io *dio, bool from_map); static void integrity_bio_wait(struct work_struct *w); static void dm_integrity_dtr(struct dm_target *ti); @@ -1946,8 +1949,13 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio) dio->bi_status = 0; dio->op = bio_op(bio); - if (ic->mode == 'I') - return dm_integrity_map_inline(dio); + if (ic->mode == 'I') { + bio->bi_iter.bi_sector = dm_target_offset(ic->ti, bio->bi_iter.bi_sector); + dio->integrity_payload = NULL; + dio->integrity_payload_from_mempool = false; + dio->integrity_range_locked = false; + return dm_integrity_map_inline(dio, true); + } if (unlikely(dio->op == REQ_OP_DISCARD)) { if (ti->max_io_len) { @@ -2395,15 +2403,13 @@ journal_read_write: do_endio_flush(ic, dio); } -static int dm_integrity_map_inline(struct dm_integrity_io *dio) +static int dm_integrity_map_inline(struct dm_integrity_io *dio, bool from_map) { struct dm_integrity_c *ic = dio->ic; struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); struct bio_integrity_payload *bip; - unsigned payload_len, digest_size, extra_size, ret; - - dio->integrity_payload = NULL; - dio->integrity_payload_from_mempool = false; + unsigned ret; + sector_t recalc_sector; if (unlikely(bio_integrity(bio))) { bio->bi_status = BLK_STS_NOTSUPP; @@ -2416,28 +2422,67 @@ static int dm_integrity_map_inline(struct dm_integrity_io *dio) return DM_MAPIO_REMAPPED; retry: - payload_len = ic->tuple_size * (bio_sectors(bio) >> ic->sb->log2_sectors_per_block); - digest_size = crypto_shash_digestsize(ic->internal_hash); - extra_size = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0; - payload_len += extra_size; - dio->integrity_payload = kmalloc(payload_len, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); - if (unlikely(!dio->integrity_payload)) { - const unsigned x_size = PAGE_SIZE << 1; - if (payload_len > x_size) { - unsigned sectors = ((x_size - extra_size) / ic->tuple_size) << ic->sb->log2_sectors_per_block; - if (WARN_ON(!sectors || sectors >= bio_sectors(bio))) { - bio->bi_status = BLK_STS_NOTSUPP; - bio_endio(bio); - return DM_MAPIO_SUBMITTED; + if (!dio->integrity_payload) { + unsigned digest_size, extra_size; + dio->payload_len = ic->tuple_size * (bio_sectors(bio) >> ic->sb->log2_sectors_per_block); + digest_size = crypto_shash_digestsize(ic->internal_hash); + extra_size = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0; + dio->payload_len += extra_size; + dio->integrity_payload = kmalloc(dio->payload_len, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + if (unlikely(!dio->integrity_payload)) { + const unsigned x_size = PAGE_SIZE << 1; + if (dio->payload_len > x_size) { + unsigned sectors = ((x_size - extra_size) / ic->tuple_size) << ic->sb->log2_sectors_per_block; + if (WARN_ON(!sectors || sectors >= bio_sectors(bio))) { + bio->bi_status = BLK_STS_NOTSUPP; + bio_endio(bio); + return DM_MAPIO_SUBMITTED; + } + dm_accept_partial_bio(bio, sectors); + goto retry; } - dm_accept_partial_bio(bio, sectors); - goto retry; } + } + + dio->range.logical_sector = bio->bi_iter.bi_sector; + dio->range.n_sectors = bio_sectors(bio); + + if (!(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))) + goto skip_spinlock; +#ifdef CONFIG_64BIT + /* + * On 64-bit CPUs we can optimize the lock away (so that it won't cause + * cache line bouncing) and use acquire/release barriers instead. + * + * Paired with smp_store_release in integrity_recalc_inline. + */ + recalc_sector = le64_to_cpu(smp_load_acquire(&ic->sb->recalc_sector)); + if (likely(dio->range.logical_sector + dio->range.n_sectors <= recalc_sector)) + goto skip_spinlock; +#endif + spin_lock_irq(&ic->endio_wait.lock); + recalc_sector = le64_to_cpu(ic->sb->recalc_sector); + if (dio->range.logical_sector + dio->range.n_sectors <= recalc_sector) + goto skip_unlock; + if (unlikely(!add_new_range(ic, &dio->range, true))) { + if (from_map) { + spin_unlock_irq(&ic->endio_wait.lock); + INIT_WORK(&dio->work, integrity_bio_wait); + queue_work(ic->wait_wq, &dio->work); + return DM_MAPIO_SUBMITTED; + } + wait_and_add_new_range(ic, &dio->range); + } + dio->integrity_range_locked = true; +skip_unlock: + spin_unlock_irq(&ic->endio_wait.lock); +skip_spinlock: + + if (unlikely(!dio->integrity_payload)) { dio->integrity_payload = page_to_virt((struct page *)mempool_alloc(&ic->recheck_pool, GFP_NOIO)); dio->integrity_payload_from_mempool = true; } - bio->bi_iter.bi_sector = dm_target_offset(ic->ti, bio->bi_iter.bi_sector); dio->bio_details.bi_iter = bio->bi_iter; if (unlikely(!dm_integrity_check_limits(ic, bio->bi_iter.bi_sector, bio))) { @@ -2468,8 +2513,8 @@ retry: } ret = bio_integrity_add_page(bio, virt_to_page(dio->integrity_payload), - payload_len, offset_in_page(dio->integrity_payload)); - if (unlikely(ret != payload_len)) { + dio->payload_len, offset_in_page(dio->integrity_payload)); + if (unlikely(ret != dio->payload_len)) { bio->bi_status = BLK_STS_RESOURCE; bio_endio(bio); return DM_MAPIO_SUBMITTED; @@ -2577,6 +2622,9 @@ static int dm_integrity_end_io(struct dm_target *ti, struct bio *bio, blk_status struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); if (dio->op == REQ_OP_READ && likely(*status == BLK_STS_OK)) { unsigned pos = 0; + if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) && + unlikely(dio->integrity_range_locked)) + goto skip_check; while (dio->bio_details.bi_iter.bi_size) { char digest[HASH_MAX_DIGESTSIZE]; struct bio_vec bv = bio_iter_iovec(bio, dio->bio_details.bi_iter); @@ -2596,9 +2644,10 @@ static int dm_integrity_end_io(struct dm_target *ti, struct bio *bio, blk_status bio_advance_iter_single(bio, &dio->bio_details.bi_iter, ic->sectors_per_block << SECTOR_SHIFT); } } - if (likely(dio->op == REQ_OP_READ) || likely(dio->op == REQ_OP_WRITE)) { - dm_integrity_free_payload(dio); - } +skip_check: + dm_integrity_free_payload(dio); + if (unlikely(dio->integrity_range_locked)) + remove_range(ic, &dio->range); } return DM_ENDIO_DONE; } @@ -2606,8 +2655,26 @@ static int dm_integrity_end_io(struct dm_target *ti, struct bio *bio, blk_status static void integrity_bio_wait(struct work_struct *w) { struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work); + struct dm_integrity_c *ic = dio->ic; - dm_integrity_map_continue(dio, false); + if (ic->mode == 'I') { + struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); + int r = dm_integrity_map_inline(dio, false); + switch (r) { + case DM_MAPIO_KILL: + bio->bi_status = BLK_STS_IOERR; + fallthrough; + case DM_MAPIO_REMAPPED: + submit_bio_noacct(bio); + fallthrough; + case DM_MAPIO_SUBMITTED: + return; + default: + BUG(); + } + } else { + dm_integrity_map_continue(dio, false); + } } static void pad_uncommitted(struct dm_integrity_c *ic) @@ -3079,6 +3146,133 @@ free_ret: kvfree(recalc_tags); } +static void integrity_recalc_inline(struct work_struct *w) +{ + struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, recalc_work); + size_t recalc_tags_size; + u8 *recalc_buffer = NULL; + u8 *recalc_tags = NULL; + struct dm_integrity_range range; + struct bio *bio; + struct bio_integrity_payload *bip; + __u8 *t; + unsigned int i; + int r; + unsigned ret; + unsigned int super_counter = 0; + unsigned recalc_sectors = RECALC_SECTORS; + +retry: + recalc_buffer = kmalloc(recalc_sectors << SECTOR_SHIFT, GFP_NOIO | __GFP_NOWARN); + if (!recalc_buffer) { +oom: + recalc_sectors >>= 1; + if (recalc_sectors >= 1U << ic->sb->log2_sectors_per_block) + goto retry; + DMCRIT("out of memory for recalculate buffer - recalculation disabled"); + goto free_ret; + } + + recalc_tags_size = (recalc_sectors >> ic->sb->log2_sectors_per_block) * ic->tuple_size; + if (crypto_shash_digestsize(ic->internal_hash) > ic->tuple_size) + recalc_tags_size += crypto_shash_digestsize(ic->internal_hash) - ic->tuple_size; + recalc_tags = kmalloc(recalc_tags_size, GFP_NOIO | __GFP_NOWARN); + if (!recalc_tags) { + kfree(recalc_buffer); + recalc_buffer = NULL; + goto oom; + } + + spin_lock_irq(&ic->endio_wait.lock); + +next_chunk: + if (unlikely(dm_post_suspending(ic->ti))) + goto unlock_ret; + + range.logical_sector = le64_to_cpu(ic->sb->recalc_sector); + if (unlikely(range.logical_sector >= ic->provided_data_sectors)) + goto unlock_ret; + range.n_sectors = min((sector_t)recalc_sectors, ic->provided_data_sectors - range.logical_sector); + + add_new_range_and_wait(ic, &range); + spin_unlock_irq(&ic->endio_wait.lock); + + if (unlikely(++super_counter == RECALC_WRITE_SUPER)) { + recalc_write_super(ic); + super_counter = 0; + } + + if (unlikely(dm_integrity_failed(ic))) + goto err; + + DEBUG_print("recalculating: %llx - %llx\n", range.logical_sector, range.n_sectors); + + bio = bio_alloc_bioset(ic->dev->bdev, 1, REQ_OP_READ, GFP_NOIO, &ic->recalc_bios); + bio->bi_iter.bi_sector = ic->start + SB_SECTORS + range.logical_sector; + __bio_add_page(bio, virt_to_page(recalc_buffer), range.n_sectors << SECTOR_SHIFT, offset_in_page(recalc_buffer)); + r = submit_bio_wait(bio); + bio_put(bio); + if (unlikely(r)) { + dm_integrity_io_error(ic, "reading data", r); + goto err; + } + + t = recalc_tags; + for (i = 0; i < range.n_sectors; i += ic->sectors_per_block) { + memset(t, 0, ic->tuple_size); + integrity_sector_checksum(ic, range.logical_sector + i, recalc_buffer + (i << SECTOR_SHIFT), t); + t += ic->tuple_size; + } + + bio = bio_alloc_bioset(ic->dev->bdev, 1, REQ_OP_WRITE, GFP_NOIO, &ic->recalc_bios); + bio->bi_iter.bi_sector = ic->start + SB_SECTORS + range.logical_sector; + __bio_add_page(bio, virt_to_page(recalc_buffer), range.n_sectors << SECTOR_SHIFT, offset_in_page(recalc_buffer)); + + bip = bio_integrity_alloc(bio, GFP_NOIO, 1); + if (unlikely(IS_ERR(bip))) { + bio_put(bio); + DMCRIT("out of memory for bio integrity payload - recalculation disabled"); + goto err; + } + ret = bio_integrity_add_page(bio, virt_to_page(recalc_tags), t - recalc_tags, offset_in_page(recalc_tags)); + if (unlikely(ret != t - recalc_tags)) { + bio_put(bio); + dm_integrity_io_error(ic, "attaching integrity tags", -ENOMEM); + goto err; + } + + r = submit_bio_wait(bio); + bio_put(bio); + if (unlikely(r)) { + dm_integrity_io_error(ic, "writing data", r); + goto err; + } + + cond_resched(); + spin_lock_irq(&ic->endio_wait.lock); + remove_range_unlocked(ic, &range); +#ifdef CONFIG_64BIT + /* Paired with smp_load_acquire in dm_integrity_map_inline. */ + smp_store_release(&ic->sb->recalc_sector, cpu_to_le64(range.logical_sector + range.n_sectors)); +#else + ic->sb->recalc_sector = cpu_to_le64(range.logical_sector + range.n_sectors); +#endif + goto next_chunk; + +err: + remove_range(ic, &range); + goto free_ret; + +unlock_ret: + spin_unlock_irq(&ic->endio_wait.lock); + + recalc_write_super(ic); + +free_ret: + kfree(recalc_buffer); + kfree(recalc_tags); +} + static void bitmap_block_work(struct work_struct *w) { struct bitmap_block_status *bbs = container_of(w, struct bitmap_block_status, work); @@ -4617,6 +4811,17 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv r = -ENOMEM; goto bad; } + r = bioset_init(&ic->recalc_bios, 1, 0, BIOSET_NEED_BVECS); + if (r) { + ti->error = "Cannot allocate bio set"; + goto bad; + } + r = bioset_integrity_create(&ic->recalc_bios, 1); + if (r) { + ti->error = "Cannot allocate bio integrity set"; + r = -ENOMEM; + goto bad; + } } ic->metadata_wq = alloc_workqueue("dm-integrity-metadata", @@ -4833,7 +5038,7 @@ try_smaller_buffer: r = -ENOMEM; goto bad; } - INIT_WORK(&ic->recalc_work, integrity_recalc); + INIT_WORK(&ic->recalc_work, ic->mode == 'I' ? integrity_recalc_inline : integrity_recalc); } else { if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { ti->error = "Recalculate can only be specified with internal_hash"; @@ -4850,17 +5055,15 @@ try_smaller_buffer: goto bad; } - if (ic->mode != 'I') { - ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev, - 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, NULL, NULL, 0); - if (IS_ERR(ic->bufio)) { - r = PTR_ERR(ic->bufio); - ti->error = "Cannot initialize dm-bufio"; - ic->bufio = NULL; - goto bad; - } - dm_bufio_set_sector_offset(ic->bufio, ic->start + ic->initial_sectors); + ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev, + 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, NULL, NULL, 0); + if (IS_ERR(ic->bufio)) { + r = PTR_ERR(ic->bufio); + ti->error = "Cannot initialize dm-bufio"; + ic->bufio = NULL; + goto bad; } + dm_bufio_set_sector_offset(ic->bufio, ic->start + ic->initial_sectors); if (ic->mode != 'R' && ic->mode != 'I') { r = create_journal(ic, &ti->error); @@ -4982,6 +5185,7 @@ static void dm_integrity_dtr(struct dm_target *ti) kvfree(ic->bbs); if (ic->bufio) dm_bufio_client_destroy(ic->bufio); + bioset_exit(&ic->recalc_bios); bioset_exit(&ic->recheck_bios); mempool_exit(&ic->recheck_pool); mempool_exit(&ic->journal_io_mempool); @@ -5036,7 +5240,7 @@ static void dm_integrity_dtr(struct dm_target *ti) static struct target_type integrity_target = { .name = "integrity", - .version = {1, 12, 0}, + .version = {1, 13, 0}, .module = THIS_MODULE, .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY, .ctr = dm_integrity_ctr, From 9c2010bccc0ce012f52de18ebd0c3add241f75b8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 10 Sep 2024 10:52:59 -0700 Subject: [PATCH 17/26] dm-integrity: check mac_size against HASH_MAX_DIGESTSIZE in sb_mac() sb_mac() verifies that the superblock + MAC don't exceed 512 bytes. Because the superblock is currently 64 bytes, this really verifies mac_size <= 448. This confuses smatch into thinking that mac_size may be as large as 448, which is inconsistent with the later code that assumes the MAC fits in a buffer of size HASH_MAX_DIGESTSIZE (64). In fact mac_size <= HASH_MAX_DIGESTSIZE is guaranteed by the crypto API, as that is the whole point of HASH_MAX_DIGESTSIZE. But, let's be defensive and explicitly check for this. This suppresses the false positive smatch warning. It does not fix an actual bug. Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202409061401.44rtN1bh-lkp@intel.com/ Signed-off-by: Eric Biggers Signed-off-by: Mikulas Patocka --- drivers/md/dm-integrity.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index c40df05e0521..42c9dc2ee0c0 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -494,7 +494,8 @@ static int sb_mac(struct dm_integrity_c *ic, bool wr) __u8 *sb = (__u8 *)ic->sb; __u8 *mac = sb + (1 << SECTOR_SHIFT) - mac_size; - if (sizeof(struct superblock) + mac_size > 1 << SECTOR_SHIFT) { + if (sizeof(struct superblock) + mac_size > 1 << SECTOR_SHIFT || + mac_size > HASH_MAX_DIGESTSIZE) { dm_integrity_io_error(ic, "digest is too long", -EINVAL); return -EINVAL; } From c8691cd0fc11197515ed148de0780d927bfca38b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 13 Sep 2024 15:05:18 +0200 Subject: [PATCH 18/26] Revert "dm: requeue IO if mapping table not yet available" This reverts commit fa247089de9936a46e290d4724cb5f0b845600f5. The following sequence of commands causes a livelock - there will be workqueue process looping and consuming 100% CPU: dmsetup create --notable test truncate -s 1MiB testdata losetup /dev/loop0 testdata dmsetup load test --table '0 2048 linear /dev/loop0 0' dd if=/dev/zero of=/dev/dm-0 bs=16k count=1 conv=fdatasync The livelock is caused by the commit fa247089de99. The commit claims that it fixes a race condition, however, it is unknown what the actual race condition is and what program is involved in the race condition. When the inactive table is loaded, the nodes /dev/dm-0 and /sys/block/dm-0 are created. /dev/dm-0 has zero size at this point. When the device is suspended and resumed, the nodes /dev/mapper/test and /dev/disk/* are created. If some program opens a block device before it is created by dmsetup or lvm, the program is buggy, so dm could just report an error as it used to do before. Reported-by: Zdenek Kabelac Signed-off-by: Mikulas Patocka Fixes: fa247089de99 ("dm: requeue IO if mapping table not yet available") --- drivers/md/dm-rq.c | 4 +++- drivers/md/dm.c | 11 ++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index f7e9a3632eb3..499f8cc8a39f 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -496,8 +496,10 @@ static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, map = dm_get_live_table(md, &srcu_idx); if (unlikely(!map)) { + DMERR_LIMIT("%s: mapping table unavailable, erroring io", + dm_device_name(md)); dm_put_live_table(md, srcu_idx); - return BLK_STS_RESOURCE; + return BLK_STS_IOERR; } ti = dm_table_find_target(map, 0); dm_put_live_table(md, srcu_idx); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 87bb90303435..ff4a6b570b76 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2030,10 +2030,15 @@ static void dm_submit_bio(struct bio *bio) struct dm_table *map; map = dm_get_live_table(md, &srcu_idx); + if (unlikely(!map)) { + DMERR_LIMIT("%s: mapping table unavailable, erroring io", + dm_device_name(md)); + bio_io_error(bio); + goto out; + } - /* If suspended, or map not yet available, queue this IO for later */ - if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) || - unlikely(!map)) { + /* If suspended, queue this IO for later */ + if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { if (bio->bi_opf & REQ_NOWAIT) bio_wouldblock_error(bio); else if (bio->bi_opf & REQ_RAHEAD) From c5391c0e04f1b6ede3623962192b08a4eb224491 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Wed, 18 Sep 2024 15:05:29 +0200 Subject: [PATCH 19/26] dm-crypt: Use up_read() together with key_put() only once in crypt_set_keyring_key() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The combination of the calls “up_read(&key->sem)” and “key_put(key)” was immediately used after a return code check for a set_key() call in this function implementation. Thus use such a function call pair only once instead directly before the check. This issue was transformed by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Mikulas Patocka --- drivers/md/dm-crypt.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index d5533b43054e..dae2fe3cb182 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2621,16 +2621,13 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string down_read(&key->sem); ret = set_key(cc, key); + up_read(&key->sem); + key_put(key); if (ret < 0) { - up_read(&key->sem); - key_put(key); kfree_sensitive(new_key_string); return ret; } - up_read(&key->sem); - key_put(key); - /* clear the flag since following operations may invalidate previously valid key */ clear_bit(DM_CRYPT_KEY_VALID, &cc->flags); From 5d49054ef616095d160c1072ba458e16e2f825de Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Wed, 18 Sep 2024 15:34:45 +0200 Subject: [PATCH 20/26] dm-crypt: Use common error handling code in crypt_set_keyring_key() Add a jump target so that a bit of exception handling can be better reused at the end of this function implementation. Signed-off-by: Markus Elfring Signed-off-by: Mikulas Patocka --- drivers/md/dm-crypt.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index dae2fe3cb182..5228b03b6fe0 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2614,32 +2614,31 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string key = request_key(type, key_desc + 1, NULL); if (IS_ERR(key)) { - kfree_sensitive(new_key_string); - return PTR_ERR(key); + ret = PTR_ERR(key); + goto free_new_key_string; } down_read(&key->sem); - ret = set_key(cc, key); up_read(&key->sem); key_put(key); - if (ret < 0) { - kfree_sensitive(new_key_string); - return ret; - } + if (ret < 0) + goto free_new_key_string; /* clear the flag since following operations may invalidate previously valid key */ clear_bit(DM_CRYPT_KEY_VALID, &cc->flags); ret = crypt_setkey(cc); + if (ret) + goto free_new_key_string; - if (!ret) { - set_bit(DM_CRYPT_KEY_VALID, &cc->flags); - kfree_sensitive(cc->key_string); - cc->key_string = new_key_string; - } else - kfree_sensitive(new_key_string); + set_bit(DM_CRYPT_KEY_VALID, &cc->flags); + kfree_sensitive(cc->key_string); + cc->key_string = new_key_string; + return 0; +free_new_key_string: + kfree_sensitive(new_key_string); return ret; } From 9bcd923952078e08cd4570e15f751a6c9ec7633f Mon Sep 17 00:00:00 2001 From: Shen Lichuan Date: Sat, 14 Sep 2024 14:38:08 +0800 Subject: [PATCH 21/26] dm vdo indexer: Convert comma to semicolon To ensure code clarity and prevent potential errors, it's advisable to employ the ';' as a statement separator, except when ',' are intentionally used for specific purposes. Signed-off-by: Shen Lichuan Signed-off-by: Mikulas Patocka --- drivers/md/dm-vdo/indexer/chapter-index.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-vdo/indexer/chapter-index.c b/drivers/md/dm-vdo/indexer/chapter-index.c index 7e32a25d3f2f..fb1db41c794b 100644 --- a/drivers/md/dm-vdo/indexer/chapter-index.c +++ b/drivers/md/dm-vdo/indexer/chapter-index.c @@ -177,7 +177,7 @@ int uds_pack_open_chapter_index_page(struct open_chapter_index *chapter_index, if (list_number < 0) return UDS_OVERFLOW; - next_list = first_list + list_number--, + next_list = first_list + list_number--; result = uds_start_delta_index_search(delta_index, next_list, 0, &entry); if (result != UDS_SUCCESS) From 66cac80698cd1e31ae9bc5c271e83209903d4861 Mon Sep 17 00:00:00 2001 From: Matthew Sakai Date: Tue, 17 Sep 2024 23:24:37 -0400 Subject: [PATCH 22/26] dm vdo: handle unaligned discards correctly Reset the data_vio properly for each discard block, and delay acknowledgement and cleanup until all discard blocks are complete. Signed-off-by: Matthew Sakai Signed-off-by: Mikulas Patocka --- drivers/md/dm-vdo/data-vio.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm-vdo/data-vio.c b/drivers/md/dm-vdo/data-vio.c index ab3ea8337809..0d502f6a86ad 100644 --- a/drivers/md/dm-vdo/data-vio.c +++ b/drivers/md/dm-vdo/data-vio.c @@ -501,6 +501,7 @@ static void launch_data_vio(struct data_vio *data_vio, logical_block_number_t lb memset(&data_vio->record_name, 0, sizeof(data_vio->record_name)); memset(&data_vio->duplicate, 0, sizeof(data_vio->duplicate)); + vdo_reset_completion(&data_vio->decrement_completion); vdo_reset_completion(completion); completion->error_handler = handle_data_vio_error; set_data_vio_logical_callback(data_vio, attempt_logical_block_lock); @@ -1273,12 +1274,14 @@ static void clean_hash_lock(struct vdo_completion *completion) static void finish_cleanup(struct data_vio *data_vio) { struct vdo_completion *completion = &data_vio->vio.completion; + u32 discard_size = min_t(u32, data_vio->remaining_discard, + VDO_BLOCK_SIZE - data_vio->offset); VDO_ASSERT_LOG_ONLY(data_vio->allocation.lock == NULL, "complete data_vio has no allocation lock"); VDO_ASSERT_LOG_ONLY(data_vio->hash_lock == NULL, "complete data_vio has no hash lock"); - if ((data_vio->remaining_discard <= VDO_BLOCK_SIZE) || + if ((data_vio->remaining_discard <= discard_size) || (completion->result != VDO_SUCCESS)) { struct data_vio_pool *pool = completion->vdo->data_vio_pool; @@ -1287,12 +1290,12 @@ static void finish_cleanup(struct data_vio *data_vio) return; } - data_vio->remaining_discard -= min_t(u32, data_vio->remaining_discard, - VDO_BLOCK_SIZE - data_vio->offset); + data_vio->remaining_discard -= discard_size; data_vio->is_partial = (data_vio->remaining_discard < VDO_BLOCK_SIZE); data_vio->read = data_vio->is_partial; data_vio->offset = 0; completion->requeue = true; + data_vio->first_reference_operation_complete = false; launch_data_vio(data_vio, data_vio->logical.lbn + 1); } @@ -1965,7 +1968,8 @@ static void allocate_block(struct vdo_completion *completion) .state = VDO_MAPPING_STATE_UNCOMPRESSED, }; - if (data_vio->fua) { + if (data_vio->fua || + data_vio->remaining_discard > (u32) (VDO_BLOCK_SIZE - data_vio->offset)) { prepare_for_dedupe(data_vio); return; } @@ -2042,7 +2046,6 @@ void continue_data_vio_with_block_map_slot(struct vdo_completion *completion) return; } - /* * We don't need to write any data, so skip allocation and just update the block map and * reference counts (via the journal). @@ -2051,7 +2054,7 @@ void continue_data_vio_with_block_map_slot(struct vdo_completion *completion) if (data_vio->is_zero) data_vio->new_mapped.state = VDO_MAPPING_STATE_UNCOMPRESSED; - if (data_vio->remaining_discard > VDO_BLOCK_SIZE) { + if (data_vio->remaining_discard > (u32) (VDO_BLOCK_SIZE - data_vio->offset)) { /* This is not the final block of a discard so we can't acknowledge it yet. */ update_metadata_for_data_vio_write(data_vio, NULL); return; From 4feb014bc79a42485b15bc3912dd3b0bca592520 Mon Sep 17 00:00:00 2001 From: Dipendra Khadka Date: Sun, 22 Sep 2024 16:47:01 +0000 Subject: [PATCH 23/26] dm-cache: remove pointless error check Smatch reported following: ''' drivers/md/dm-cache-target.c:3204 parse_cblock_range() warn: sscanf doesn't return error codes drivers/md/dm-cache-target.c:3217 parse_cblock_range() warn: sscanf doesn't return error codes ''' Sscanf doesn't return negative values at all. Signed-off-by: Dipendra Khadka Signed-off-by: Mikulas Patocka --- drivers/md/dm-cache-target.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 17f0fab1e254..621be35c2b00 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -3200,8 +3200,6 @@ static int parse_cblock_range(struct cache *cache, const char *str, * Try and parse form (ii) first. */ r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy); - if (r < 0) - return r; if (r == 2) { result->begin = to_cblock(b); @@ -3213,8 +3211,6 @@ static int parse_cblock_range(struct cache *cache, const char *str, * That didn't work, try form (i). */ r = sscanf(str, "%llu%c", &b, &dummy); - if (r < 0) - return r; if (r == 1) { result->begin = to_cblock(b); From 0a92e5cdeef9fa4cba8bef6cd1d91cff6b5d300b Mon Sep 17 00:00:00 2001 From: Shen Lichuan Date: Tue, 24 Sep 2024 15:21:11 +0200 Subject: [PATCH 24/26] dm: fix spelling errors Fixed some confusing spelling errors that were currently identified, the details are as follows: -in the code comments: dm-cache-target.c: 1371: exclussive ==> exclusive dm-raid.c: 2522: repective ==> respective Signed-off-by: Shen Lichuan Signed-off-by: Mikulas Patocka --- drivers/md/dm-cache-target.c | 2 +- drivers/md/dm-raid.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 621be35c2b00..aaeeabfab09b 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1368,7 +1368,7 @@ static void mg_copy(struct work_struct *ws) */ bool rb = bio_detain_shared(mg->cache, mg->op->oblock, mg->overwrite_bio); - BUG_ON(rb); /* An exclussive lock must _not_ be held for this block */ + BUG_ON(rb); /* An exclusive lock must _not_ be held for this block */ mg->overwrite_bio = NULL; inc_io_migrations(mg->cache); mg_full_copy(ws); diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 0c3323e0adb2..32c8240c1873 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -2519,7 +2519,7 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev) rdev->saved_raid_disk = rdev->raid_disk; } - /* Reshape support -> restore repective data offsets */ + /* Reshape support -> restore respective data offsets */ rdev->data_offset = le64_to_cpu(sb->data_offset); rdev->new_data_offset = le64_to_cpu(sb->new_data_offset); From e6a3531dd542cb127c8de32ab1e54a48ae19962b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 24 Sep 2024 15:18:29 +0200 Subject: [PATCH 25/26] dm-verity: restart or panic on an I/O error Maxim Suhanov reported that dm-verity doesn't crash if an I/O error happens. In theory, this could be used to subvert security, because an attacker can create sectors that return error with the Write Uncorrectable command. Some programs may misbehave if they have to deal with EIO. This commit fixes dm-verity, so that if "panic_on_corruption" or "restart_on_corruption" was specified and an I/O error happens, the machine will panic or restart. This commit also changes kernel_restart to emergency_restart - kernel_restart calls reboot notifiers and these reboot notifiers may wait for the bio that failed. emergency_restart doesn't call the notifiers. Reported-by: Maxim Suhanov Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org --- drivers/md/dm-verity-target.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index cf659c8feb29..a95c1b9cc5b5 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -272,8 +272,10 @@ out: if (v->mode == DM_VERITY_MODE_LOGGING) return 0; - if (v->mode == DM_VERITY_MODE_RESTART) - kernel_restart("dm-verity device corrupted"); + if (v->mode == DM_VERITY_MODE_RESTART) { + pr_emerg("dm-verity device corrupted\n"); + emergency_restart(); + } if (v->mode == DM_VERITY_MODE_PANIC) panic("dm-verity device corrupted"); @@ -596,6 +598,23 @@ static void verity_finish_io(struct dm_verity_io *io, blk_status_t status) if (!static_branch_unlikely(&use_bh_wq_enabled) || !io->in_bh) verity_fec_finish_io(io); + if (unlikely(status != BLK_STS_OK) && + unlikely(!(bio->bi_opf & REQ_RAHEAD)) && + !verity_is_system_shutting_down()) { + if (v->mode == DM_VERITY_MODE_RESTART || + v->mode == DM_VERITY_MODE_PANIC) + DMERR_LIMIT("%s has error: %s", v->data_dev->name, + blk_status_to_str(status)); + + if (v->mode == DM_VERITY_MODE_RESTART) { + pr_emerg("dm-verity device corrupted\n"); + emergency_restart(); + } + + if (v->mode == DM_VERITY_MODE_PANIC) + panic("dm-verity device corrupted"); + } + bio_endio(bio); } From 579b2ba40ece57f3f9150f59dfe327e60a5445b5 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Sun, 22 Sep 2024 18:17:53 +0200 Subject: [PATCH 26/26] dm verity: fallback to platform keyring also if key in trusted keyring is rejected If enabled, we fallback to the platform keyring if the trusted keyring doesn't have the key used to sign the roothash. But if pkcs7_verify() rejects the key for other reasons, such as usage restrictions, we do not fallback. Do so. Follow-up for 6fce1f40e95182ebbfe1ee3096b8fc0b37903269 Suggested-by: Serge Hallyn Signed-off-by: Luca Boccassi Acked-by: Jarkko Sakkinen Signed-off-by: Mikulas Patocka --- drivers/md/dm-verity-verify-sig.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-verity-verify-sig.c b/drivers/md/dm-verity-verify-sig.c index d351d7d39c60..a9e2c6c0a33c 100644 --- a/drivers/md/dm-verity-verify-sig.c +++ b/drivers/md/dm-verity-verify-sig.c @@ -127,7 +127,7 @@ int verity_verify_root_hash(const void *root_hash, size_t root_hash_len, #endif VERIFYING_UNSPECIFIED_SIGNATURE, NULL, NULL); #ifdef CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG_PLATFORM_KEYRING - if (ret == -ENOKEY) + if (ret == -ENOKEY || ret == -EKEYREJECTED) ret = verify_pkcs7_signature(root_hash, root_hash_len, sig_data, sig_len, VERIFY_USE_PLATFORM_KEYRING,