dm cache metadata: add "metadata2" feature

If "metadata2" is provided as a table argument when creating/loading a
cache target a more compact metadata format, with separate dirty bits,
is used.  "metadata2" improves speed of shutting down a cache target.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
This commit is contained in:
Joe Thornber 2016-09-22 06:15:21 -04:00 committed by Mike Snitzer
parent ae4a46a1f6
commit 629d0a8a1a
4 changed files with 278 additions and 53 deletions

View File

@ -207,6 +207,10 @@ Optional feature arguments are:
block, then the cache block is invalidated.
To enable passthrough mode the cache must be clean.
metadata2 : use version 2 of the metadata. This stores the dirty bits
in a separate btree, which improves speed of shutting
down the cache.
A policy called 'default' is always registered. This is an alias for
the policy we currently think is giving best all round performance.

View File

@ -25,7 +25,7 @@
* defines a range of metadata versions that this module can handle.
*/
#define MIN_CACHE_VERSION 1
#define MAX_CACHE_VERSION 1
#define MAX_CACHE_VERSION 2
#define CACHE_METADATA_CACHE_SIZE 64
@ -55,6 +55,7 @@ enum mapping_bits {
/*
* The data on the cache is different from that on the origin.
* This flag is only used by metadata format 1.
*/
M_DIRTY = 2
};
@ -93,12 +94,18 @@ struct cache_disk_superblock {
__le32 write_misses;
__le32 policy_version[CACHE_POLICY_VERSION_SIZE];
/*
* Metadata format 2 fields.
*/
__le64 dirty_root;
} __packed;
struct dm_cache_metadata {
atomic_t ref_count;
struct list_head list;
unsigned version;
struct block_device *bdev;
struct dm_block_manager *bm;
struct dm_space_map *metadata_sm;
@ -141,12 +148,19 @@ struct dm_cache_metadata {
*/
bool fail_io:1;
/*
* Metadata format 2 fields.
*/
dm_block_t dirty_root;
struct dm_disk_bitset dirty_info;
/*
* These structures are used when loading metadata. They're too
* big to put on the stack.
*/
struct dm_array_cursor mapping_cursor;
struct dm_array_cursor hint_cursor;
struct dm_bitset_cursor dirty_cursor;
};
/*-------------------------------------------------------------------
@ -170,6 +184,7 @@ static void sb_prepare_for_write(struct dm_block_validator *v,
static int check_metadata_version(struct cache_disk_superblock *disk_super)
{
uint32_t metadata_version = le32_to_cpu(disk_super->version);
if (metadata_version < MIN_CACHE_VERSION || metadata_version > MAX_CACHE_VERSION) {
DMERR("Cache metadata version %u found, but only versions between %u and %u supported.",
metadata_version, MIN_CACHE_VERSION, MAX_CACHE_VERSION);
@ -310,6 +325,11 @@ static void __copy_sm_root(struct dm_cache_metadata *cmd,
sizeof(cmd->metadata_space_map_root));
}
static bool separate_dirty_bits(struct dm_cache_metadata *cmd)
{
return cmd->version >= 2;
}
static int __write_initial_superblock(struct dm_cache_metadata *cmd)
{
int r;
@ -341,7 +361,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd)
disk_super->flags = 0;
memset(disk_super->uuid, 0, sizeof(disk_super->uuid));
disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC);
disk_super->version = cpu_to_le32(MAX_CACHE_VERSION);
disk_super->version = cpu_to_le32(cmd->version);
memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name));
memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version));
disk_super->policy_hint_size = 0;
@ -362,6 +382,9 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd)
disk_super->write_hits = cpu_to_le32(0);
disk_super->write_misses = cpu_to_le32(0);
if (separate_dirty_bits(cmd))
disk_super->dirty_root = cpu_to_le64(cmd->dirty_root);
return dm_tm_commit(cmd->tm, sblock);
}
@ -382,6 +405,13 @@ static int __format_metadata(struct dm_cache_metadata *cmd)
if (r < 0)
goto bad;
if (separate_dirty_bits(cmd)) {
dm_disk_bitset_init(cmd->tm, &cmd->dirty_info);
r = dm_bitset_empty(&cmd->dirty_info, &cmd->dirty_root);
if (r < 0)
goto bad;
}
dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
r = dm_bitset_empty(&cmd->discard_info, &cmd->discard_root);
if (r < 0)
@ -407,9 +437,10 @@ bad:
static int __check_incompat_features(struct cache_disk_superblock *disk_super,
struct dm_cache_metadata *cmd)
{
uint32_t features;
uint32_t incompat_flags, features;
features = le32_to_cpu(disk_super->incompat_flags) & ~DM_CACHE_FEATURE_INCOMPAT_SUPP;
incompat_flags = le32_to_cpu(disk_super->incompat_flags);
features = incompat_flags & ~DM_CACHE_FEATURE_INCOMPAT_SUPP;
if (features) {
DMERR("could not access metadata due to unsupported optional features (%lx).",
(unsigned long)features);
@ -470,6 +501,7 @@ static int __open_metadata(struct dm_cache_metadata *cmd)
}
__setup_mapping_info(cmd);
dm_disk_bitset_init(cmd->tm, &cmd->dirty_info);
dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
sb_flags = le32_to_cpu(disk_super->flags);
cmd->clean_when_opened = test_bit(CLEAN_SHUTDOWN, &sb_flags);
@ -548,6 +580,7 @@ static unsigned long clear_clean_shutdown(unsigned long flags)
static void read_superblock_fields(struct dm_cache_metadata *cmd,
struct cache_disk_superblock *disk_super)
{
cmd->version = le32_to_cpu(disk_super->version);
cmd->flags = le32_to_cpu(disk_super->flags);
cmd->root = le64_to_cpu(disk_super->mapping_root);
cmd->hint_root = le64_to_cpu(disk_super->hint_root);
@ -567,6 +600,9 @@ static void read_superblock_fields(struct dm_cache_metadata *cmd,
cmd->stats.write_hits = le32_to_cpu(disk_super->write_hits);
cmd->stats.write_misses = le32_to_cpu(disk_super->write_misses);
if (separate_dirty_bits(cmd))
cmd->dirty_root = le64_to_cpu(disk_super->dirty_root);
cmd->changed = false;
}
@ -625,6 +661,13 @@ static int __commit_transaction(struct dm_cache_metadata *cmd,
*/
BUILD_BUG_ON(sizeof(struct cache_disk_superblock) > 512);
if (separate_dirty_bits(cmd)) {
r = dm_bitset_flush(&cmd->dirty_info, cmd->dirty_root,
&cmd->dirty_root);
if (r)
return r;
}
r = dm_bitset_flush(&cmd->discard_info, cmd->discard_root,
&cmd->discard_root);
if (r)
@ -649,6 +692,8 @@ static int __commit_transaction(struct dm_cache_metadata *cmd,
update_flags(disk_super, mutator);
disk_super->mapping_root = cpu_to_le64(cmd->root);
if (separate_dirty_bits(cmd))
disk_super->dirty_root = cpu_to_le64(cmd->dirty_root);
disk_super->hint_root = cpu_to_le64(cmd->hint_root);
disk_super->discard_root = cpu_to_le64(cmd->discard_root);
disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
@ -698,7 +743,8 @@ static void unpack_value(__le64 value_le, dm_oblock_t *block, unsigned *flags)
static struct dm_cache_metadata *metadata_open(struct block_device *bdev,
sector_t data_block_size,
bool may_format_device,
size_t policy_hint_size)
size_t policy_hint_size,
unsigned metadata_version)
{
int r;
struct dm_cache_metadata *cmd;
@ -709,6 +755,7 @@ static struct dm_cache_metadata *metadata_open(struct block_device *bdev,
return ERR_PTR(-ENOMEM);
}
cmd->version = metadata_version;
atomic_set(&cmd->ref_count, 1);
init_rwsem(&cmd->root_lock);
cmd->bdev = bdev;
@ -757,7 +804,8 @@ static struct dm_cache_metadata *lookup(struct block_device *bdev)
static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev,
sector_t data_block_size,
bool may_format_device,
size_t policy_hint_size)
size_t policy_hint_size,
unsigned metadata_version)
{
struct dm_cache_metadata *cmd, *cmd2;
@ -768,7 +816,8 @@ static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev,
if (cmd)
return cmd;
cmd = metadata_open(bdev, data_block_size, may_format_device, policy_hint_size);
cmd = metadata_open(bdev, data_block_size, may_format_device,
policy_hint_size, metadata_version);
if (!IS_ERR(cmd)) {
mutex_lock(&table_lock);
cmd2 = lookup(bdev);
@ -800,10 +849,11 @@ static bool same_params(struct dm_cache_metadata *cmd, sector_t data_block_size)
struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
sector_t data_block_size,
bool may_format_device,
size_t policy_hint_size)
size_t policy_hint_size,
unsigned metadata_version)
{
struct dm_cache_metadata *cmd = lookup_or_open(bdev, data_block_size,
may_format_device, policy_hint_size);
struct dm_cache_metadata *cmd = lookup_or_open(bdev, data_block_size, may_format_device,
policy_hint_size, metadata_version);
if (!IS_ERR(cmd) && !same_params(cmd, data_block_size)) {
dm_cache_metadata_close(cmd);
@ -829,8 +879,8 @@ void dm_cache_metadata_close(struct dm_cache_metadata *cmd)
/*
* Checks that the given cache block is either unmapped or clean.
*/
static int block_unmapped_or_clean(struct dm_cache_metadata *cmd, dm_cblock_t b,
bool *result)
static int block_clean_combined_dirty(struct dm_cache_metadata *cmd, dm_cblock_t b,
bool *result)
{
int r;
__le64 value;
@ -838,10 +888,8 @@ static int block_unmapped_or_clean(struct dm_cache_metadata *cmd, dm_cblock_t b,
unsigned flags;
r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(b), &value);
if (r) {
DMERR("block_unmapped_or_clean failed");
if (r)
return r;
}
unpack_value(value, &ob, &flags);
*result = !((flags & M_VALID) && (flags & M_DIRTY));
@ -849,17 +897,19 @@ static int block_unmapped_or_clean(struct dm_cache_metadata *cmd, dm_cblock_t b,
return 0;
}
static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd,
dm_cblock_t begin, dm_cblock_t end,
bool *result)
static int blocks_are_clean_combined_dirty(struct dm_cache_metadata *cmd,
dm_cblock_t begin, dm_cblock_t end,
bool *result)
{
int r;
*result = true;
while (begin != end) {
r = block_unmapped_or_clean(cmd, begin, result);
if (r)
r = block_clean_combined_dirty(cmd, begin, result);
if (r) {
DMERR("block_clean_combined_dirty failed");
return r;
}
if (!*result) {
DMERR("cache block %llu is dirty",
@ -873,6 +923,48 @@ static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd,
return 0;
}
static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd,
dm_cblock_t begin, dm_cblock_t end,
bool *result)
{
int r;
bool dirty_flag;
*result = true;
// FIXME: use a cursor so we can benefit from preloading metadata.
while (begin != end) {
/*
* We assume that unmapped blocks have their dirty bit
* cleared.
*/
r = dm_bitset_test_bit(&cmd->dirty_info, cmd->dirty_root,
from_cblock(begin), &cmd->dirty_root, &dirty_flag);
if (r)
return r;
if (dirty_flag) {
DMERR("cache block %llu is dirty",
(unsigned long long) from_cblock(begin));
*result = false;
return 0;
}
begin = to_cblock(from_cblock(begin) + 1);
}
return 0;
}
static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd,
dm_cblock_t begin, dm_cblock_t end,
bool *result)
{
if (separate_dirty_bits(cmd))
return blocks_are_clean_separate_dirty(cmd, begin, end, result);
else
return blocks_are_clean_combined_dirty(cmd, begin, end, result);
}
static bool cmd_write_lock(struct dm_cache_metadata *cmd)
{
down_write(&cmd->root_lock);
@ -950,8 +1042,18 @@ int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size)
r = dm_array_resize(&cmd->info, cmd->root, from_cblock(cmd->cache_blocks),
from_cblock(new_cache_size),
&null_mapping, &cmd->root);
if (!r)
cmd->cache_blocks = new_cache_size;
if (r)
goto out;
if (separate_dirty_bits(cmd)) {
r = dm_bitset_resize(&cmd->dirty_info, cmd->dirty_root,
from_cblock(cmd->cache_blocks), from_cblock(new_cache_size),
false, &cmd->dirty_root);
if (r)
goto out;
}
cmd->cache_blocks = new_cache_size;
cmd->changed = true;
out:
@ -1185,11 +1287,11 @@ static bool hints_array_available(struct dm_cache_metadata *cmd,
hints_array_initialized(cmd);
}
static int __load_mapping(struct dm_cache_metadata *cmd,
uint64_t cb, bool hints_valid,
struct dm_array_cursor *mapping_cursor,
struct dm_array_cursor *hint_cursor,
load_mapping_fn fn, void *context)
static int __load_mapping_v1(struct dm_cache_metadata *cmd,
uint64_t cb, bool hints_valid,
struct dm_array_cursor *mapping_cursor,
struct dm_array_cursor *hint_cursor,
load_mapping_fn fn, void *context)
{
int r = 0;
@ -1221,6 +1323,45 @@ static int __load_mapping(struct dm_cache_metadata *cmd,
return r;
}
static int __load_mapping_v2(struct dm_cache_metadata *cmd,
uint64_t cb, bool hints_valid,
struct dm_array_cursor *mapping_cursor,
struct dm_array_cursor *hint_cursor,
struct dm_bitset_cursor *dirty_cursor,
load_mapping_fn fn, void *context)
{
int r = 0;
__le64 mapping;
__le32 hint = 0;
__le64 *mapping_value_le;
__le32 *hint_value_le;
dm_oblock_t oblock;
unsigned flags;
bool dirty;
dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le);
memcpy(&mapping, mapping_value_le, sizeof(mapping));
unpack_value(mapping, &oblock, &flags);
if (flags & M_VALID) {
if (hints_valid) {
dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le);
memcpy(&hint, hint_value_le, sizeof(hint));
}
dirty = dm_bitset_cursor_get_value(dirty_cursor);
r = fn(context, oblock, to_cblock(cb), dirty,
le32_to_cpu(hint), hints_valid);
if (r)
DMERR("policy couldn't load cblock");
}
return r;
}
static int __load_mappings(struct dm_cache_metadata *cmd,
struct dm_cache_policy *policy,
load_mapping_fn fn, void *context)
@ -1246,10 +1387,28 @@ static int __load_mappings(struct dm_cache_metadata *cmd,
}
}
if (separate_dirty_bits(cmd)) {
r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root,
from_cblock(cmd->cache_blocks),
&cmd->dirty_cursor);
if (r) {
dm_array_cursor_end(&cmd->hint_cursor);
dm_array_cursor_end(&cmd->mapping_cursor);
return r;
}
}
for (cb = 0; ; cb++) {
r = __load_mapping(cmd, cb, hints_valid,
&cmd->mapping_cursor, &cmd->hint_cursor,
fn, context);
if (separate_dirty_bits(cmd))
r = __load_mapping_v2(cmd, cb, hints_valid,
&cmd->mapping_cursor,
&cmd->hint_cursor,
&cmd->dirty_cursor,
fn, context);
else
r = __load_mapping_v1(cmd, cb, hints_valid,
&cmd->mapping_cursor, &cmd->hint_cursor,
fn, context);
if (r)
goto out;
@ -1272,12 +1431,23 @@ static int __load_mappings(struct dm_cache_metadata *cmd,
goto out;
}
}
if (separate_dirty_bits(cmd)) {
r = dm_bitset_cursor_next(&cmd->dirty_cursor);
if (r) {
DMERR("dm_bitset_cursor_next for dirty failed");
goto out;
}
}
}
out:
dm_array_cursor_end(&cmd->mapping_cursor);
if (hints_valid)
dm_array_cursor_end(&cmd->hint_cursor);
if (separate_dirty_bits(cmd))
dm_bitset_cursor_end(&cmd->dirty_cursor);
return r;
}
@ -1360,13 +1530,55 @@ static int __dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty
}
int dm_cache_set_dirty(struct dm_cache_metadata *cmd,
dm_cblock_t cblock, bool dirty)
static int __set_dirty_bits_v1(struct dm_cache_metadata *cmd, unsigned nr_bits, unsigned long *bits)
{
int r;
unsigned i;
for (i = 0; i < nr_bits; i++) {
r = __dirty(cmd, to_cblock(i), test_bit(i, bits));
if (r)
return r;
}
return 0;
}
static int __set_dirty_bits_v2(struct dm_cache_metadata *cmd, unsigned nr_bits, unsigned long *bits)
{
int r = 0;
unsigned i;
/* nr_bits is really just a sanity check */
if (nr_bits != from_cblock(cmd->cache_blocks)) {
DMERR("dirty bitset is wrong size");
return -EINVAL;
}
for (i = 0; i < nr_bits; i++) {
if (test_bit(i, bits))
r = dm_bitset_set_bit(&cmd->dirty_info, cmd->dirty_root, i, &cmd->dirty_root);
else
r = dm_bitset_clear_bit(&cmd->dirty_info, cmd->dirty_root, i, &cmd->dirty_root);
if (r)
return r;
}
cmd->changed = true;
return dm_bitset_flush(&cmd->dirty_info, cmd->dirty_root, &cmd->dirty_root);
}
int dm_cache_set_dirty_bits(struct dm_cache_metadata *cmd,
unsigned nr_bits,
unsigned long *bits)
{
int r;
WRITE_LOCK(cmd);
r = __dirty(cmd, cblock, dirty);
if (separate_dirty_bits(cmd))
r = __set_dirty_bits_v2(cmd, nr_bits, bits);
else
r = __set_dirty_bits_v1(cmd, nr_bits, bits);
WRITE_UNLOCK(cmd);
return r;

View File

@ -45,18 +45,20 @@
* As these various flags are defined they should be added to the
* following masks.
*/
#define DM_CACHE_FEATURE_COMPAT_SUPP 0UL
#define DM_CACHE_FEATURE_COMPAT_RO_SUPP 0UL
#define DM_CACHE_FEATURE_INCOMPAT_SUPP 0UL
/*
* Reopens or creates a new, empty metadata volume.
* Returns an ERR_PTR on failure.
* Reopens or creates a new, empty metadata volume. Returns an ERR_PTR on
* failure. If reopening then features must match.
*/
struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
sector_t data_block_size,
bool may_format_device,
size_t policy_hint_size);
size_t policy_hint_size,
unsigned metadata_version);
void dm_cache_metadata_close(struct dm_cache_metadata *cmd);
@ -91,7 +93,8 @@ int dm_cache_load_mappings(struct dm_cache_metadata *cmd,
load_mapping_fn fn,
void *context);
int dm_cache_set_dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty);
int dm_cache_set_dirty_bits(struct dm_cache_metadata *cmd,
unsigned nr_bits, unsigned long *bits);
struct dm_cache_statistics {
uint32_t read_hits;

View File

@ -179,6 +179,7 @@ enum cache_io_mode {
struct cache_features {
enum cache_metadata_mode mode;
enum cache_io_mode io_mode;
unsigned metadata_version;
};
struct cache_stats {
@ -2541,13 +2542,14 @@ static void init_features(struct cache_features *cf)
{
cf->mode = CM_WRITE;
cf->io_mode = CM_IO_WRITEBACK;
cf->metadata_version = 1;
}
static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
char **error)
{
static struct dm_arg _args[] = {
{0, 1, "Invalid number of cache feature arguments"},
{0, 2, "Invalid number of cache feature arguments"},
};
int r;
@ -2573,6 +2575,9 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
else if (!strcasecmp(arg, "passthrough"))
cf->io_mode = CM_IO_PASSTHROUGH;
else if (!strcasecmp(arg, "metadata2"))
cf->metadata_version = 2;
else {
*error = "Unrecognised cache feature requested";
return -EINVAL;
@ -2827,7 +2832,8 @@ static int cache_create(struct cache_args *ca, struct cache **result)
cmd = dm_cache_metadata_open(cache->metadata_dev->bdev,
ca->block_size, may_format,
dm_cache_policy_get_hint_size(cache->policy));
dm_cache_policy_get_hint_size(cache->policy),
ca->features.metadata_version);
if (IS_ERR(cmd)) {
*error = "Error creating metadata object";
r = PTR_ERR(cmd);
@ -3172,21 +3178,16 @@ static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
static int write_dirty_bitset(struct cache *cache)
{
unsigned i, r;
int r;
if (get_cache_mode(cache) >= CM_READ_ONLY)
return -EINVAL;
for (i = 0; i < from_cblock(cache->cache_size); i++) {
r = dm_cache_set_dirty(cache->cmd, to_cblock(i),
is_dirty(cache, to_cblock(i)));
if (r) {
metadata_operation_failed(cache, "dm_cache_set_dirty", r);
return r;
}
}
r = dm_cache_set_dirty_bits(cache->cmd, from_cblock(cache->cache_size), cache->dirty_bitset);
if (r)
metadata_operation_failed(cache, "dm_cache_set_dirty_bits", r);
return 0;
return r;
}
static int write_discard_bitset(struct cache *cache)
@ -3562,14 +3563,19 @@ static void cache_status(struct dm_target *ti, status_type_t type,
(unsigned) atomic_read(&cache->stats.promotion),
(unsigned long) atomic_read(&cache->nr_dirty));
if (cache->features.metadata_version == 2)
DMEMIT("2 metadata2 ");
else
DMEMIT("1 ");
if (writethrough_mode(&cache->features))
DMEMIT("1 writethrough ");
DMEMIT("writethrough ");
else if (passthrough_mode(&cache->features))
DMEMIT("1 passthrough ");
DMEMIT("passthrough ");
else if (writeback_mode(&cache->features))
DMEMIT("1 writeback ");
DMEMIT("writeback ");
else {
DMERR("%s: internal error: unknown io mode: %d",
@ -3817,7 +3823,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type cache_target = {
.name = "cache",
.version = {1, 9, 0},
.version = {1, 10, 0},
.module = THIS_MODULE,
.ctr = cache_ctr,
.dtr = cache_dtr,