dm table: rework reference counting

Rework table reference counting.

The existing code uses a reference counter. When the last reference is
dropped and the counter reaches zero, the table destructor is called.
Table reference counters are acquired/released from upcalls from other
kernel code (dm_any_congested, dm_merge_bvec, dm_unplug_all).
If the reference counter reaches zero in one of the upcalls, the table
destructor is called from almost random kernel code.

This leads to various problems:
* dm_any_congested being called under a spinlock, which calls the
  destructor, which calls some sleeping function.
* the destructor attempting to take a lock that is already taken by the
  same process.
* stale reference from some other kernel code keeps the table
  constructed, which keeps some devices open, even after successful
  return from "dmsetup remove". This can confuse lvm and prevent closing
  of underlying devices or reusing device minor numbers.

The patch changes reference counting so that the table destructor can be
called only at predetermined places.

The table has always exactly one reference from either mapped_device->map
or hash_cell->new_map. After this patch, this reference is not counted
in table->holders.  A pair of dm_create_table/dm_destroy_table functions
is used for table creation/destruction.

Temporary references from the other code increase table->holders. A pair
of dm_table_get/dm_table_put functions is used to manipulate it.

When the table is about to be destroyed, we wait for table->holders to
reach 0. Then, we call the table destructor.  We use active waiting with
msleep(1), because the situation happens rarely (to one user in 5 years)
and removing the device isn't performance-critical task: the user doesn't
care if it takes one tick more or not.

This way, the destructor is called only at specific points
(dm_table_destroy function) and the above problems associated with lazy
destruction can't happen.

Finally remove the temporary protection added to dm_any_congested().

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
This commit is contained in:
Mikulas Patocka 2009-01-06 03:05:10 +00:00 committed by Alasdair G Kergon
parent ab4c142488
commit d58168763f
4 changed files with 34 additions and 21 deletions

View File

@ -233,7 +233,7 @@ static void __hash_remove(struct hash_cell *hc)
}
if (hc->new_map)
dm_table_put(hc->new_map);
dm_table_destroy(hc->new_map);
dm_put(hc->md);
free_cell(hc);
}
@ -827,8 +827,8 @@ static int do_resume(struct dm_ioctl *param)
r = dm_swap_table(md, new_map);
if (r) {
dm_table_destroy(new_map);
dm_put(md);
dm_table_put(new_map);
return r;
}
@ -836,8 +836,6 @@ static int do_resume(struct dm_ioctl *param)
set_disk_ro(dm_disk(md), 0);
else
set_disk_ro(dm_disk(md), 1);
dm_table_put(new_map);
}
if (dm_suspended(md))
@ -1080,7 +1078,7 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
}
if (hc->new_map)
dm_table_put(hc->new_map);
dm_table_destroy(hc->new_map);
hc->new_map = t;
up_write(&_hash_lock);
@ -1109,7 +1107,7 @@ static int table_clear(struct dm_ioctl *param, size_t param_size)
}
if (hc->new_map) {
dm_table_put(hc->new_map);
dm_table_destroy(hc->new_map);
hc->new_map = NULL;
}

View File

@ -1,6 +1,6 @@
/*
* Copyright (C) 2001 Sistina Software (UK) Limited.
* Copyright (C) 2004 Red Hat, Inc. All rights reserved.
* Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
*
* This file is released under the GPL.
*/
@ -15,6 +15,7 @@
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/mutex.h>
#include <linux/delay.h>
#include <asm/atomic.h>
#define DM_MSG_PREFIX "table"
@ -24,6 +25,19 @@
#define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t))
#define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
/*
* The table has always exactly one reference from either mapped_device->map
* or hash_cell->new_map. This reference is not counted in table->holders.
* A pair of dm_create_table/dm_destroy_table functions is used for table
* creation/destruction.
*
* Temporary references from the other code increase table->holders. A pair
* of dm_table_get/dm_table_put functions is used to manipulate it.
*
* When the table is about to be destroyed, we wait for table->holders to
* drop to zero.
*/
struct dm_table {
struct mapped_device *md;
atomic_t holders;
@ -228,7 +242,7 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
return -ENOMEM;
INIT_LIST_HEAD(&t->devices);
atomic_set(&t->holders, 1);
atomic_set(&t->holders, 0);
t->barriers_supported = 1;
if (!num_targets)
@ -259,10 +273,14 @@ static void free_devices(struct list_head *devices)
}
}
static void table_destroy(struct dm_table *t)
void dm_table_destroy(struct dm_table *t)
{
unsigned int i;
while (atomic_read(&t->holders))
msleep(1);
smp_mb();
/* free the indexes (see dm_table_complete) */
if (t->depth >= 2)
vfree(t->index[t->depth - 2]);
@ -300,8 +318,8 @@ void dm_table_put(struct dm_table *t)
if (!t)
return;
if (atomic_dec_and_test(&t->holders))
table_destroy(t);
smp_mb__before_atomic_dec();
atomic_dec(&t->holders);
}
/*

View File

@ -977,8 +977,6 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
struct mapped_device *md = congested_data;
struct dm_table *map;
atomic_inc(&md->pending);
if (!test_bit(DMF_BLOCK_IO, &md->flags)) {
map = dm_get_table(md);
if (map) {
@ -987,10 +985,6 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
}
}
if (!atomic_dec_return(&md->pending))
/* nudge anyone waiting on suspend queue */
wake_up(&md->wait);
return r;
}
@ -1250,10 +1244,12 @@ static int __bind(struct mapped_device *md, struct dm_table *t)
if (md->suspended_bdev)
__set_size(md, size);
if (size == 0)
return 0;
dm_table_get(t);
if (!size) {
dm_table_destroy(t);
return 0;
}
dm_table_event_callback(t, event_callback, md);
write_lock(&md->map_lock);
@ -1275,7 +1271,7 @@ static void __unbind(struct mapped_device *md)
write_lock(&md->map_lock);
md->map = NULL;
write_unlock(&md->map_lock);
dm_table_put(map);
dm_table_destroy(map);
}
/*

View File

@ -36,6 +36,7 @@ struct dm_table;
/*-----------------------------------------------------------------
* Internal table functions.
*---------------------------------------------------------------*/
void dm_table_destroy(struct dm_table *t);
void dm_table_event_callback(struct dm_table *t,
void (*fn)(void *), void *context);
struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index);