Merge remote-tracking branch 'linus/master' into testing
This commit is contained in:
@@ -532,11 +532,11 @@ config BLK_DEV_RBD
|
||||
If unsure, say N.
|
||||
|
||||
config BLK_DEV_RSXX
|
||||
tristate "IBM FlashSystem 70/80 PCIe SSD Device Driver"
|
||||
tristate "IBM Flash Adapter 900GB Full Height PCIe Device Driver"
|
||||
depends on PCI
|
||||
help
|
||||
Device driver for IBM's high speed PCIe SSD
|
||||
storage devices: FlashSystem-70 and FlashSystem-80.
|
||||
storage device: Flash Adapter 900GB Full Height.
|
||||
|
||||
To compile this driver as a module, choose M here: the
|
||||
module will be called rsxx.
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */
|
||||
#define VERSION "81"
|
||||
/* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */
|
||||
#define VERSION "83"
|
||||
#define AOE_MAJOR 152
|
||||
#define DEVICE_NAME "aoe"
|
||||
|
||||
@@ -196,9 +196,11 @@ struct ktstate {
|
||||
struct completion rendez;
|
||||
struct task_struct *task;
|
||||
wait_queue_head_t *waitq;
|
||||
int (*fn) (void);
|
||||
char *name;
|
||||
int (*fn) (int);
|
||||
char name[12];
|
||||
spinlock_t *lock;
|
||||
int id;
|
||||
int active;
|
||||
};
|
||||
|
||||
int aoeblk_init(void);
|
||||
@@ -222,6 +224,7 @@ int aoecmd_init(void);
|
||||
struct sk_buff *aoecmd_ata_id(struct aoedev *);
|
||||
void aoe_freetframe(struct frame *);
|
||||
void aoe_flush_iocq(void);
|
||||
void aoe_flush_iocq_by_index(int);
|
||||
void aoe_end_request(struct aoedev *, struct request *, int);
|
||||
int aoe_ktstart(struct ktstate *k);
|
||||
void aoe_ktstop(struct ktstate *k);
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */
|
||||
/* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */
|
||||
/*
|
||||
* aoecmd.c
|
||||
* Filesystem request handling methods
|
||||
@@ -35,14 +35,27 @@ module_param(aoe_maxout, int, 0644);
|
||||
MODULE_PARM_DESC(aoe_maxout,
|
||||
"Only aoe_maxout outstanding packets for every MAC on eX.Y.");
|
||||
|
||||
static wait_queue_head_t ktiowq;
|
||||
static struct ktstate kts;
|
||||
/* The number of online cpus during module initialization gives us a
|
||||
* convenient heuristic cap on the parallelism used for ktio threads
|
||||
* doing I/O completion. It is not important that the cap equal the
|
||||
* actual number of running CPUs at any given time, but because of CPU
|
||||
* hotplug, we take care to use ncpus instead of using
|
||||
* num_online_cpus() after module initialization.
|
||||
*/
|
||||
static int ncpus;
|
||||
|
||||
/* mutex lock used for synchronization while thread spawning */
|
||||
static DEFINE_MUTEX(ktio_spawn_lock);
|
||||
|
||||
static wait_queue_head_t *ktiowq;
|
||||
static struct ktstate *kts;
|
||||
|
||||
/* io completion queue */
|
||||
static struct {
|
||||
struct iocq_ktio {
|
||||
struct list_head head;
|
||||
spinlock_t lock;
|
||||
} iocq;
|
||||
};
|
||||
static struct iocq_ktio *iocq;
|
||||
|
||||
static struct page *empty_page;
|
||||
|
||||
@@ -893,16 +906,10 @@ bio_pageinc(struct bio *bio)
|
||||
int i;
|
||||
|
||||
bio_for_each_segment(bv, bio, i) {
|
||||
page = bv->bv_page;
|
||||
/* Non-zero page count for non-head members of
|
||||
* compound pages is no longer allowed by the kernel,
|
||||
* but this has never been seen here.
|
||||
* compound pages is no longer allowed by the kernel.
|
||||
*/
|
||||
if (unlikely(PageCompound(page)))
|
||||
if (compound_trans_head(page) != page) {
|
||||
pr_crit("page tail used for block I/O\n");
|
||||
BUG();
|
||||
}
|
||||
page = compound_trans_head(bv->bv_page);
|
||||
atomic_inc(&page->_count);
|
||||
}
|
||||
}
|
||||
@@ -911,10 +918,13 @@ static void
|
||||
bio_pagedec(struct bio *bio)
|
||||
{
|
||||
struct bio_vec *bv;
|
||||
struct page *page;
|
||||
int i;
|
||||
|
||||
bio_for_each_segment(bv, bio, i)
|
||||
atomic_dec(&bv->bv_page->_count);
|
||||
bio_for_each_segment(bv, bio, i) {
|
||||
page = compound_trans_head(bv->bv_page);
|
||||
atomic_dec(&page->_count);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -1278,23 +1288,36 @@ out:
|
||||
* Returns true iff responses needing processing remain.
|
||||
*/
|
||||
static int
|
||||
ktio(void)
|
||||
ktio(int id)
|
||||
{
|
||||
struct frame *f;
|
||||
struct list_head *pos;
|
||||
int i;
|
||||
int actual_id;
|
||||
|
||||
for (i = 0; ; ++i) {
|
||||
if (i == MAXIOC)
|
||||
return 1;
|
||||
if (list_empty(&iocq.head))
|
||||
if (list_empty(&iocq[id].head))
|
||||
return 0;
|
||||
pos = iocq.head.next;
|
||||
pos = iocq[id].head.next;
|
||||
list_del(pos);
|
||||
spin_unlock_irq(&iocq.lock);
|
||||
f = list_entry(pos, struct frame, head);
|
||||
spin_unlock_irq(&iocq[id].lock);
|
||||
ktiocomplete(f);
|
||||
spin_lock_irq(&iocq.lock);
|
||||
|
||||
/* Figure out if extra threads are required. */
|
||||
actual_id = f->t->d->aoeminor % ncpus;
|
||||
|
||||
if (!kts[actual_id].active) {
|
||||
BUG_ON(id != 0);
|
||||
mutex_lock(&ktio_spawn_lock);
|
||||
if (!kts[actual_id].active
|
||||
&& aoe_ktstart(&kts[actual_id]) == 0)
|
||||
kts[actual_id].active = 1;
|
||||
mutex_unlock(&ktio_spawn_lock);
|
||||
}
|
||||
spin_lock_irq(&iocq[id].lock);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1311,7 +1334,7 @@ kthread(void *vp)
|
||||
complete(&k->rendez); /* tell spawner we're running */
|
||||
do {
|
||||
spin_lock_irq(k->lock);
|
||||
more = k->fn();
|
||||
more = k->fn(k->id);
|
||||
if (!more) {
|
||||
add_wait_queue(k->waitq, &wait);
|
||||
__set_current_state(TASK_INTERRUPTIBLE);
|
||||
@@ -1340,7 +1363,7 @@ aoe_ktstart(struct ktstate *k)
|
||||
struct task_struct *task;
|
||||
|
||||
init_completion(&k->rendez);
|
||||
task = kthread_run(kthread, k, k->name);
|
||||
task = kthread_run(kthread, k, "%s", k->name);
|
||||
if (task == NULL || IS_ERR(task))
|
||||
return -ENOMEM;
|
||||
k->task = task;
|
||||
@@ -1353,13 +1376,24 @@ aoe_ktstart(struct ktstate *k)
|
||||
static void
|
||||
ktcomplete(struct frame *f, struct sk_buff *skb)
|
||||
{
|
||||
int id;
|
||||
ulong flags;
|
||||
|
||||
f->r_skb = skb;
|
||||
spin_lock_irqsave(&iocq.lock, flags);
|
||||
list_add_tail(&f->head, &iocq.head);
|
||||
spin_unlock_irqrestore(&iocq.lock, flags);
|
||||
wake_up(&ktiowq);
|
||||
id = f->t->d->aoeminor % ncpus;
|
||||
spin_lock_irqsave(&iocq[id].lock, flags);
|
||||
if (!kts[id].active) {
|
||||
spin_unlock_irqrestore(&iocq[id].lock, flags);
|
||||
/* The thread with id has not been spawned yet,
|
||||
* so delegate the work to the main thread and
|
||||
* try spawning a new thread.
|
||||
*/
|
||||
id = 0;
|
||||
spin_lock_irqsave(&iocq[id].lock, flags);
|
||||
}
|
||||
list_add_tail(&f->head, &iocq[id].head);
|
||||
spin_unlock_irqrestore(&iocq[id].lock, flags);
|
||||
wake_up(&ktiowq[id]);
|
||||
}
|
||||
|
||||
struct sk_buff *
|
||||
@@ -1705,6 +1739,17 @@ aoe_failbuf(struct aoedev *d, struct buf *buf)
|
||||
|
||||
void
|
||||
aoe_flush_iocq(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ncpus; i++) {
|
||||
if (kts[i].active)
|
||||
aoe_flush_iocq_by_index(i);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
aoe_flush_iocq_by_index(int id)
|
||||
{
|
||||
struct frame *f;
|
||||
struct aoedev *d;
|
||||
@@ -1713,9 +1758,9 @@ aoe_flush_iocq(void)
|
||||
struct sk_buff *skb;
|
||||
ulong flags;
|
||||
|
||||
spin_lock_irqsave(&iocq.lock, flags);
|
||||
list_splice_init(&iocq.head, &flist);
|
||||
spin_unlock_irqrestore(&iocq.lock, flags);
|
||||
spin_lock_irqsave(&iocq[id].lock, flags);
|
||||
list_splice_init(&iocq[id].head, &flist);
|
||||
spin_unlock_irqrestore(&iocq[id].lock, flags);
|
||||
while (!list_empty(&flist)) {
|
||||
pos = flist.next;
|
||||
list_del(pos);
|
||||
@@ -1738,6 +1783,8 @@ int __init
|
||||
aoecmd_init(void)
|
||||
{
|
||||
void *p;
|
||||
int i;
|
||||
int ret;
|
||||
|
||||
/* get_zeroed_page returns page with ref count 1 */
|
||||
p = (void *) get_zeroed_page(GFP_KERNEL | __GFP_REPEAT);
|
||||
@@ -1745,22 +1792,72 @@ aoecmd_init(void)
|
||||
return -ENOMEM;
|
||||
empty_page = virt_to_page(p);
|
||||
|
||||
INIT_LIST_HEAD(&iocq.head);
|
||||
spin_lock_init(&iocq.lock);
|
||||
init_waitqueue_head(&ktiowq);
|
||||
kts.name = "aoe_ktio";
|
||||
kts.fn = ktio;
|
||||
kts.waitq = &ktiowq;
|
||||
kts.lock = &iocq.lock;
|
||||
return aoe_ktstart(&kts);
|
||||
ncpus = num_online_cpus();
|
||||
|
||||
iocq = kcalloc(ncpus, sizeof(struct iocq_ktio), GFP_KERNEL);
|
||||
if (!iocq)
|
||||
return -ENOMEM;
|
||||
|
||||
kts = kcalloc(ncpus, sizeof(struct ktstate), GFP_KERNEL);
|
||||
if (!kts) {
|
||||
ret = -ENOMEM;
|
||||
goto kts_fail;
|
||||
}
|
||||
|
||||
ktiowq = kcalloc(ncpus, sizeof(wait_queue_head_t), GFP_KERNEL);
|
||||
if (!ktiowq) {
|
||||
ret = -ENOMEM;
|
||||
goto ktiowq_fail;
|
||||
}
|
||||
|
||||
mutex_init(&ktio_spawn_lock);
|
||||
|
||||
for (i = 0; i < ncpus; i++) {
|
||||
INIT_LIST_HEAD(&iocq[i].head);
|
||||
spin_lock_init(&iocq[i].lock);
|
||||
init_waitqueue_head(&ktiowq[i]);
|
||||
snprintf(kts[i].name, sizeof(kts[i].name), "aoe_ktio%d", i);
|
||||
kts[i].fn = ktio;
|
||||
kts[i].waitq = &ktiowq[i];
|
||||
kts[i].lock = &iocq[i].lock;
|
||||
kts[i].id = i;
|
||||
kts[i].active = 0;
|
||||
}
|
||||
kts[0].active = 1;
|
||||
if (aoe_ktstart(&kts[0])) {
|
||||
ret = -ENOMEM;
|
||||
goto ktstart_fail;
|
||||
}
|
||||
return 0;
|
||||
|
||||
ktstart_fail:
|
||||
kfree(ktiowq);
|
||||
ktiowq_fail:
|
||||
kfree(kts);
|
||||
kts_fail:
|
||||
kfree(iocq);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
aoecmd_exit(void)
|
||||
{
|
||||
aoe_ktstop(&kts);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ncpus; i++)
|
||||
if (kts[i].active)
|
||||
aoe_ktstop(&kts[i]);
|
||||
|
||||
aoe_flush_iocq();
|
||||
|
||||
/* Free up the iocq and thread speicific configuration
|
||||
* allocated during startup.
|
||||
*/
|
||||
kfree(iocq);
|
||||
kfree(kts);
|
||||
kfree(ktiowq);
|
||||
|
||||
free_page((unsigned long) page_address(empty_page));
|
||||
empty_page = NULL;
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */
|
||||
/* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */
|
||||
/*
|
||||
* aoedev.c
|
||||
* AoE device utility functions; maintains device list.
|
||||
@@ -518,7 +518,6 @@ void
|
||||
aoedev_exit(void)
|
||||
{
|
||||
flush_scheduled_work();
|
||||
aoe_flush_iocq();
|
||||
flush(NULL, 0, EXITING);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */
|
||||
/* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */
|
||||
/*
|
||||
* aoenet.c
|
||||
* Ethernet portion of AoE driver
|
||||
@@ -52,7 +52,7 @@ static struct sk_buff_head skbtxq;
|
||||
|
||||
/* enters with txlock held */
|
||||
static int
|
||||
tx(void) __must_hold(&txlock)
|
||||
tx(int id) __must_hold(&txlock)
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
struct net_device *ifp;
|
||||
@@ -205,7 +205,8 @@ aoenet_init(void)
|
||||
kts.lock = &txlock;
|
||||
kts.fn = tx;
|
||||
kts.waitq = &txwq;
|
||||
kts.name = "aoe_tx";
|
||||
kts.id = 0;
|
||||
snprintf(kts.name, sizeof(kts.name), "aoe_tx%d", kts.id);
|
||||
if (aoe_ktstart(&kts))
|
||||
return -EAGAIN;
|
||||
dev_add_pack(&aoe_pt);
|
||||
|
||||
@@ -25,9 +25,9 @@
|
||||
#include <linux/string.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/loop.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include "loop.h"
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("loop blockdevice transferfunction adaptor / CryptoAPI");
|
||||
|
||||
@@ -659,6 +659,27 @@ void drbd_al_shrink(struct drbd_conf *mdev)
|
||||
wake_up(&mdev->al_wait);
|
||||
}
|
||||
|
||||
int drbd_initialize_al(struct drbd_conf *mdev, void *buffer)
|
||||
{
|
||||
struct al_transaction_on_disk *al = buffer;
|
||||
struct drbd_md *md = &mdev->ldev->md;
|
||||
sector_t al_base = md->md_offset + md->al_offset;
|
||||
int al_size_4k = md->al_stripes * md->al_stripe_size_4k;
|
||||
int i;
|
||||
|
||||
memset(al, 0, 4096);
|
||||
al->magic = cpu_to_be32(DRBD_AL_MAGIC);
|
||||
al->transaction_type = cpu_to_be16(AL_TR_INITIALIZED);
|
||||
al->crc32c = cpu_to_be32(crc32c(0, al, 4096));
|
||||
|
||||
for (i = 0; i < al_size_4k; i++) {
|
||||
int err = drbd_md_sync_page_io(mdev, mdev->ldev, al_base + i * 8, WRITE);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int w_update_odbm(struct drbd_work *w, int unused)
|
||||
{
|
||||
struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w);
|
||||
|
||||
@@ -832,6 +832,7 @@ struct drbd_tconn { /* is a resource from the config file */
|
||||
unsigned susp_nod:1; /* IO suspended because no data */
|
||||
unsigned susp_fen:1; /* IO suspended because fence peer handler runs */
|
||||
struct mutex cstate_mutex; /* Protects graceful disconnects */
|
||||
unsigned int connect_cnt; /* Inc each time a connection is established */
|
||||
|
||||
unsigned long flags;
|
||||
struct net_conf *net_conf; /* content protected by rcu */
|
||||
@@ -1132,6 +1133,7 @@ extern void drbd_mdev_cleanup(struct drbd_conf *mdev);
|
||||
void drbd_print_uuids(struct drbd_conf *mdev, const char *text);
|
||||
|
||||
extern void conn_md_sync(struct drbd_tconn *tconn);
|
||||
extern void drbd_md_write(struct drbd_conf *mdev, void *buffer);
|
||||
extern void drbd_md_sync(struct drbd_conf *mdev);
|
||||
extern int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev);
|
||||
extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
|
||||
@@ -1466,8 +1468,16 @@ extern void drbd_suspend_io(struct drbd_conf *mdev);
|
||||
extern void drbd_resume_io(struct drbd_conf *mdev);
|
||||
extern char *ppsize(char *buf, unsigned long long size);
|
||||
extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, sector_t, int);
|
||||
enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 };
|
||||
extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
|
||||
enum determine_dev_size {
|
||||
DS_ERROR_SHRINK = -3,
|
||||
DS_ERROR_SPACE_MD = -2,
|
||||
DS_ERROR = -1,
|
||||
DS_UNCHANGED = 0,
|
||||
DS_SHRUNK = 1,
|
||||
DS_GREW = 2
|
||||
};
|
||||
extern enum determine_dev_size
|
||||
drbd_determine_dev_size(struct drbd_conf *, enum dds_flags, struct resize_parms *) __must_hold(local);
|
||||
extern void resync_after_online_grow(struct drbd_conf *);
|
||||
extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev);
|
||||
extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev,
|
||||
@@ -1633,6 +1643,7 @@ extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
|
||||
#define drbd_set_out_of_sync(mdev, sector, size) \
|
||||
__drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__)
|
||||
extern void drbd_al_shrink(struct drbd_conf *mdev);
|
||||
extern int drbd_initialize_al(struct drbd_conf *, void *);
|
||||
|
||||
/* drbd_nl.c */
|
||||
/* state info broadcast */
|
||||
|
||||
@@ -2762,8 +2762,6 @@ int __init drbd_init(void)
|
||||
/*
|
||||
* allocate all necessary structs
|
||||
*/
|
||||
err = -ENOMEM;
|
||||
|
||||
init_waitqueue_head(&drbd_pp_wait);
|
||||
|
||||
drbd_proc = NULL; /* play safe for drbd_cleanup */
|
||||
@@ -2773,6 +2771,7 @@ int __init drbd_init(void)
|
||||
if (err)
|
||||
goto fail;
|
||||
|
||||
err = -ENOMEM;
|
||||
drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
|
||||
if (!drbd_proc) {
|
||||
printk(KERN_ERR "drbd: unable to register proc file\n");
|
||||
@@ -2803,7 +2802,6 @@ int __init drbd_init(void)
|
||||
fail:
|
||||
drbd_cleanup();
|
||||
if (err == -ENOMEM)
|
||||
/* currently always the case */
|
||||
printk(KERN_ERR "drbd: ran out of memory\n");
|
||||
else
|
||||
printk(KERN_ERR "drbd: initialization failure\n");
|
||||
@@ -2881,34 +2879,14 @@ struct meta_data_on_disk {
|
||||
u8 reserved_u8[4096 - (7*8 + 10*4)];
|
||||
} __packed;
|
||||
|
||||
/**
|
||||
* drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
|
||||
* @mdev: DRBD device.
|
||||
*/
|
||||
void drbd_md_sync(struct drbd_conf *mdev)
|
||||
|
||||
|
||||
void drbd_md_write(struct drbd_conf *mdev, void *b)
|
||||
{
|
||||
struct meta_data_on_disk *buffer;
|
||||
struct meta_data_on_disk *buffer = b;
|
||||
sector_t sector;
|
||||
int i;
|
||||
|
||||
/* Don't accidentally change the DRBD meta data layout. */
|
||||
BUILD_BUG_ON(UI_SIZE != 4);
|
||||
BUILD_BUG_ON(sizeof(struct meta_data_on_disk) != 4096);
|
||||
|
||||
del_timer(&mdev->md_sync_timer);
|
||||
/* timer may be rearmed by drbd_md_mark_dirty() now. */
|
||||
if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
|
||||
return;
|
||||
|
||||
/* We use here D_FAILED and not D_ATTACHING because we try to write
|
||||
* metadata even if we detach due to a disk failure! */
|
||||
if (!get_ldev_if_state(mdev, D_FAILED))
|
||||
return;
|
||||
|
||||
buffer = drbd_md_get_buffer(mdev);
|
||||
if (!buffer)
|
||||
goto out;
|
||||
|
||||
memset(buffer, 0, sizeof(*buffer));
|
||||
|
||||
buffer->la_size_sect = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
|
||||
@@ -2937,6 +2915,35 @@ void drbd_md_sync(struct drbd_conf *mdev)
|
||||
dev_err(DEV, "meta data update failed!\n");
|
||||
drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
|
||||
* @mdev: DRBD device.
|
||||
*/
|
||||
void drbd_md_sync(struct drbd_conf *mdev)
|
||||
{
|
||||
struct meta_data_on_disk *buffer;
|
||||
|
||||
/* Don't accidentally change the DRBD meta data layout. */
|
||||
BUILD_BUG_ON(UI_SIZE != 4);
|
||||
BUILD_BUG_ON(sizeof(struct meta_data_on_disk) != 4096);
|
||||
|
||||
del_timer(&mdev->md_sync_timer);
|
||||
/* timer may be rearmed by drbd_md_mark_dirty() now. */
|
||||
if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
|
||||
return;
|
||||
|
||||
/* We use here D_FAILED and not D_ATTACHING because we try to write
|
||||
* metadata even if we detach due to a disk failure! */
|
||||
if (!get_ldev_if_state(mdev, D_FAILED))
|
||||
return;
|
||||
|
||||
buffer = drbd_md_get_buffer(mdev);
|
||||
if (!buffer)
|
||||
goto out;
|
||||
|
||||
drbd_md_write(mdev, buffer);
|
||||
|
||||
/* Update mdev->ldev->md.la_size_sect,
|
||||
* since we updated it on metadata. */
|
||||
|
||||
@@ -417,6 +417,7 @@ static enum drbd_fencing_p highest_fencing_policy(struct drbd_tconn *tconn)
|
||||
|
||||
bool conn_try_outdate_peer(struct drbd_tconn *tconn)
|
||||
{
|
||||
unsigned int connect_cnt;
|
||||
union drbd_state mask = { };
|
||||
union drbd_state val = { };
|
||||
enum drbd_fencing_p fp;
|
||||
@@ -428,6 +429,10 @@ bool conn_try_outdate_peer(struct drbd_tconn *tconn)
|
||||
return false;
|
||||
}
|
||||
|
||||
spin_lock_irq(&tconn->req_lock);
|
||||
connect_cnt = tconn->connect_cnt;
|
||||
spin_unlock_irq(&tconn->req_lock);
|
||||
|
||||
fp = highest_fencing_policy(tconn);
|
||||
switch (fp) {
|
||||
case FP_NOT_AVAIL:
|
||||
@@ -492,8 +497,14 @@ bool conn_try_outdate_peer(struct drbd_tconn *tconn)
|
||||
here, because we might were able to re-establish the connection in the
|
||||
meantime. */
|
||||
spin_lock_irq(&tconn->req_lock);
|
||||
if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &tconn->flags))
|
||||
_conn_request_state(tconn, mask, val, CS_VERBOSE);
|
||||
if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &tconn->flags)) {
|
||||
if (tconn->connect_cnt != connect_cnt)
|
||||
/* In case the connection was established and droped
|
||||
while the fence-peer handler was running, ignore it */
|
||||
conn_info(tconn, "Ignoring fence-peer exit code\n");
|
||||
else
|
||||
_conn_request_state(tconn, mask, val, CS_VERBOSE);
|
||||
}
|
||||
spin_unlock_irq(&tconn->req_lock);
|
||||
|
||||
return conn_highest_pdsk(tconn) <= D_OUTDATED;
|
||||
@@ -816,15 +827,20 @@ void drbd_resume_io(struct drbd_conf *mdev)
|
||||
* Returns 0 on success, negative return values indicate errors.
|
||||
* You should call drbd_md_sync() after calling this function.
|
||||
*/
|
||||
enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
|
||||
enum determine_dev_size
|
||||
drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
|
||||
{
|
||||
sector_t prev_first_sect, prev_size; /* previous meta location */
|
||||
sector_t la_size_sect, u_size;
|
||||
struct drbd_md *md = &mdev->ldev->md;
|
||||
u32 prev_al_stripe_size_4k;
|
||||
u32 prev_al_stripes;
|
||||
sector_t size;
|
||||
char ppb[10];
|
||||
void *buffer;
|
||||
|
||||
int md_moved, la_size_changed;
|
||||
enum determine_dev_size rv = unchanged;
|
||||
enum determine_dev_size rv = DS_UNCHANGED;
|
||||
|
||||
/* race:
|
||||
* application request passes inc_ap_bio,
|
||||
@@ -836,6 +852,11 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
|
||||
* still lock the act_log to not trigger ASSERTs there.
|
||||
*/
|
||||
drbd_suspend_io(mdev);
|
||||
buffer = drbd_md_get_buffer(mdev); /* Lock meta-data IO */
|
||||
if (!buffer) {
|
||||
drbd_resume_io(mdev);
|
||||
return DS_ERROR;
|
||||
}
|
||||
|
||||
/* no wait necessary anymore, actually we could assert that */
|
||||
wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
|
||||
@@ -844,7 +865,17 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
|
||||
prev_size = mdev->ldev->md.md_size_sect;
|
||||
la_size_sect = mdev->ldev->md.la_size_sect;
|
||||
|
||||
/* TODO: should only be some assert here, not (re)init... */
|
||||
if (rs) {
|
||||
/* rs is non NULL if we should change the AL layout only */
|
||||
|
||||
prev_al_stripes = md->al_stripes;
|
||||
prev_al_stripe_size_4k = md->al_stripe_size_4k;
|
||||
|
||||
md->al_stripes = rs->al_stripes;
|
||||
md->al_stripe_size_4k = rs->al_stripe_size / 4;
|
||||
md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
|
||||
}
|
||||
|
||||
drbd_md_set_sector_offsets(mdev, mdev->ldev);
|
||||
|
||||
rcu_read_lock();
|
||||
@@ -852,6 +883,21 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
|
||||
rcu_read_unlock();
|
||||
size = drbd_new_dev_size(mdev, mdev->ldev, u_size, flags & DDSF_FORCED);
|
||||
|
||||
if (size < la_size_sect) {
|
||||
if (rs && u_size == 0) {
|
||||
/* Remove "rs &&" later. This check should always be active, but
|
||||
right now the receiver expects the permissive behavior */
|
||||
dev_warn(DEV, "Implicit shrink not allowed. "
|
||||
"Use --size=%llus for explicit shrink.\n",
|
||||
(unsigned long long)size);
|
||||
rv = DS_ERROR_SHRINK;
|
||||
}
|
||||
if (u_size > size)
|
||||
rv = DS_ERROR_SPACE_MD;
|
||||
if (rv != DS_UNCHANGED)
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
if (drbd_get_capacity(mdev->this_bdev) != size ||
|
||||
drbd_bm_capacity(mdev) != size) {
|
||||
int err;
|
||||
@@ -867,7 +913,7 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
|
||||
"Leaving size unchanged at size = %lu KB\n",
|
||||
(unsigned long)size);
|
||||
}
|
||||
rv = dev_size_error;
|
||||
rv = DS_ERROR;
|
||||
}
|
||||
/* racy, see comments above. */
|
||||
drbd_set_my_capacity(mdev, size);
|
||||
@@ -875,38 +921,57 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
|
||||
dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
|
||||
(unsigned long long)size>>1);
|
||||
}
|
||||
if (rv == dev_size_error)
|
||||
goto out;
|
||||
if (rv <= DS_ERROR)
|
||||
goto err_out;
|
||||
|
||||
la_size_changed = (la_size_sect != mdev->ldev->md.la_size_sect);
|
||||
|
||||
md_moved = prev_first_sect != drbd_md_first_sector(mdev->ldev)
|
||||
|| prev_size != mdev->ldev->md.md_size_sect;
|
||||
|
||||
if (la_size_changed || md_moved) {
|
||||
int err;
|
||||
if (la_size_changed || md_moved || rs) {
|
||||
u32 prev_flags;
|
||||
|
||||
drbd_al_shrink(mdev); /* All extents inactive. */
|
||||
|
||||
prev_flags = md->flags;
|
||||
md->flags &= ~MDF_PRIMARY_IND;
|
||||
drbd_md_write(mdev, buffer);
|
||||
|
||||
dev_info(DEV, "Writing the whole bitmap, %s\n",
|
||||
la_size_changed && md_moved ? "size changed and md moved" :
|
||||
la_size_changed ? "size changed" : "md moved");
|
||||
/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
|
||||
err = drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
|
||||
"size changed", BM_LOCKED_MASK);
|
||||
if (err) {
|
||||
rv = dev_size_error;
|
||||
goto out;
|
||||
}
|
||||
drbd_md_mark_dirty(mdev);
|
||||
drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
|
||||
"size changed", BM_LOCKED_MASK);
|
||||
drbd_initialize_al(mdev, buffer);
|
||||
|
||||
md->flags = prev_flags;
|
||||
drbd_md_write(mdev, buffer);
|
||||
|
||||
if (rs)
|
||||
dev_info(DEV, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
|
||||
md->al_stripes, md->al_stripe_size_4k * 4);
|
||||
}
|
||||
|
||||
if (size > la_size_sect)
|
||||
rv = grew;
|
||||
rv = DS_GREW;
|
||||
if (size < la_size_sect)
|
||||
rv = shrunk;
|
||||
out:
|
||||
rv = DS_SHRUNK;
|
||||
|
||||
if (0) {
|
||||
err_out:
|
||||
if (rs) {
|
||||
md->al_stripes = prev_al_stripes;
|
||||
md->al_stripe_size_4k = prev_al_stripe_size_4k;
|
||||
md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k;
|
||||
|
||||
drbd_md_set_sector_offsets(mdev, mdev->ldev);
|
||||
}
|
||||
}
|
||||
lc_unlock(mdev->act_log);
|
||||
wake_up(&mdev->al_wait);
|
||||
drbd_md_put_buffer(mdev);
|
||||
drbd_resume_io(mdev);
|
||||
|
||||
return rv;
|
||||
@@ -1607,11 +1672,11 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
|
||||
!drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
|
||||
set_bit(USE_DEGR_WFC_T, &mdev->flags);
|
||||
|
||||
dd = drbd_determine_dev_size(mdev, 0);
|
||||
if (dd == dev_size_error) {
|
||||
dd = drbd_determine_dev_size(mdev, 0, NULL);
|
||||
if (dd <= DS_ERROR) {
|
||||
retcode = ERR_NOMEM_BITMAP;
|
||||
goto force_diskless_dec;
|
||||
} else if (dd == grew)
|
||||
} else if (dd == DS_GREW)
|
||||
set_bit(RESYNC_AFTER_NEG, &mdev->flags);
|
||||
|
||||
if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC) ||
|
||||
@@ -2305,6 +2370,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
|
||||
struct drbd_conf *mdev;
|
||||
enum drbd_ret_code retcode;
|
||||
enum determine_dev_size dd;
|
||||
bool change_al_layout = false;
|
||||
enum dds_flags ddsf;
|
||||
sector_t u_size;
|
||||
int err;
|
||||
@@ -2315,31 +2381,33 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
|
||||
if (retcode != NO_ERROR)
|
||||
goto fail;
|
||||
|
||||
mdev = adm_ctx.mdev;
|
||||
if (!get_ldev(mdev)) {
|
||||
retcode = ERR_NO_DISK;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
memset(&rs, 0, sizeof(struct resize_parms));
|
||||
rs.al_stripes = mdev->ldev->md.al_stripes;
|
||||
rs.al_stripe_size = mdev->ldev->md.al_stripe_size_4k * 4;
|
||||
if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
|
||||
err = resize_parms_from_attrs(&rs, info);
|
||||
if (err) {
|
||||
retcode = ERR_MANDATORY_TAG;
|
||||
drbd_msg_put_info(from_attrs_err_to_txt(err));
|
||||
goto fail;
|
||||
goto fail_ldev;
|
||||
}
|
||||
}
|
||||
|
||||
mdev = adm_ctx.mdev;
|
||||
if (mdev->state.conn > C_CONNECTED) {
|
||||
retcode = ERR_RESIZE_RESYNC;
|
||||
goto fail;
|
||||
goto fail_ldev;
|
||||
}
|
||||
|
||||
if (mdev->state.role == R_SECONDARY &&
|
||||
mdev->state.peer == R_SECONDARY) {
|
||||
retcode = ERR_NO_PRIMARY;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (!get_ldev(mdev)) {
|
||||
retcode = ERR_NO_DISK;
|
||||
goto fail;
|
||||
goto fail_ldev;
|
||||
}
|
||||
|
||||
if (rs.no_resync && mdev->tconn->agreed_pro_version < 93) {
|
||||
@@ -2358,6 +2426,28 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
|
||||
}
|
||||
}
|
||||
|
||||
if (mdev->ldev->md.al_stripes != rs.al_stripes ||
|
||||
mdev->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) {
|
||||
u32 al_size_k = rs.al_stripes * rs.al_stripe_size;
|
||||
|
||||
if (al_size_k > (16 * 1024 * 1024)) {
|
||||
retcode = ERR_MD_LAYOUT_TOO_BIG;
|
||||
goto fail_ldev;
|
||||
}
|
||||
|
||||
if (al_size_k < MD_32kB_SECT/2) {
|
||||
retcode = ERR_MD_LAYOUT_TOO_SMALL;
|
||||
goto fail_ldev;
|
||||
}
|
||||
|
||||
if (mdev->state.conn != C_CONNECTED) {
|
||||
retcode = ERR_MD_LAYOUT_CONNECTED;
|
||||
goto fail_ldev;
|
||||
}
|
||||
|
||||
change_al_layout = true;
|
||||
}
|
||||
|
||||
if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev))
|
||||
mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
|
||||
|
||||
@@ -2373,16 +2463,22 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
|
||||
}
|
||||
|
||||
ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
|
||||
dd = drbd_determine_dev_size(mdev, ddsf);
|
||||
dd = drbd_determine_dev_size(mdev, ddsf, change_al_layout ? &rs : NULL);
|
||||
drbd_md_sync(mdev);
|
||||
put_ldev(mdev);
|
||||
if (dd == dev_size_error) {
|
||||
if (dd == DS_ERROR) {
|
||||
retcode = ERR_NOMEM_BITMAP;
|
||||
goto fail;
|
||||
} else if (dd == DS_ERROR_SPACE_MD) {
|
||||
retcode = ERR_MD_LAYOUT_NO_FIT;
|
||||
goto fail;
|
||||
} else if (dd == DS_ERROR_SHRINK) {
|
||||
retcode = ERR_IMPLICIT_SHRINK;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (mdev->state.conn == C_CONNECTED) {
|
||||
if (dd == grew)
|
||||
if (dd == DS_GREW)
|
||||
set_bit(RESIZE_PENDING, &mdev->flags);
|
||||
|
||||
drbd_send_uuids(mdev);
|
||||
@@ -2658,7 +2754,6 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
|
||||
const struct sib_info *sib)
|
||||
{
|
||||
struct state_info *si = NULL; /* for sizeof(si->member); */
|
||||
struct net_conf *nc;
|
||||
struct nlattr *nla;
|
||||
int got_ldev;
|
||||
int err = 0;
|
||||
@@ -2688,13 +2783,19 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
|
||||
goto nla_put_failure;
|
||||
|
||||
rcu_read_lock();
|
||||
if (got_ldev)
|
||||
if (disk_conf_to_skb(skb, rcu_dereference(mdev->ldev->disk_conf), exclude_sensitive))
|
||||
goto nla_put_failure;
|
||||
if (got_ldev) {
|
||||
struct disk_conf *disk_conf;
|
||||
|
||||
nc = rcu_dereference(mdev->tconn->net_conf);
|
||||
if (nc)
|
||||
err = net_conf_to_skb(skb, nc, exclude_sensitive);
|
||||
disk_conf = rcu_dereference(mdev->ldev->disk_conf);
|
||||
err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive);
|
||||
}
|
||||
if (!err) {
|
||||
struct net_conf *nc;
|
||||
|
||||
nc = rcu_dereference(mdev->tconn->net_conf);
|
||||
if (nc)
|
||||
err = net_conf_to_skb(skb, nc, exclude_sensitive);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
if (err)
|
||||
goto nla_put_failure;
|
||||
|
||||
@@ -1039,6 +1039,8 @@ randomize:
|
||||
rcu_read_lock();
|
||||
idr_for_each_entry(&tconn->volumes, mdev, vnr) {
|
||||
kref_get(&mdev->kref);
|
||||
rcu_read_unlock();
|
||||
|
||||
/* Prevent a race between resync-handshake and
|
||||
* being promoted to Primary.
|
||||
*
|
||||
@@ -1049,8 +1051,6 @@ randomize:
|
||||
mutex_lock(mdev->state_mutex);
|
||||
mutex_unlock(mdev->state_mutex);
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
if (discard_my_data)
|
||||
set_bit(DISCARD_MY_DATA, &mdev->flags);
|
||||
else
|
||||
@@ -3545,7 +3545,7 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
|
||||
{
|
||||
struct drbd_conf *mdev;
|
||||
struct p_sizes *p = pi->data;
|
||||
enum determine_dev_size dd = unchanged;
|
||||
enum determine_dev_size dd = DS_UNCHANGED;
|
||||
sector_t p_size, p_usize, my_usize;
|
||||
int ldsc = 0; /* local disk size changed */
|
||||
enum dds_flags ddsf;
|
||||
@@ -3617,9 +3617,9 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
|
||||
|
||||
ddsf = be16_to_cpu(p->dds_flags);
|
||||
if (get_ldev(mdev)) {
|
||||
dd = drbd_determine_dev_size(mdev, ddsf);
|
||||
dd = drbd_determine_dev_size(mdev, ddsf, NULL);
|
||||
put_ldev(mdev);
|
||||
if (dd == dev_size_error)
|
||||
if (dd == DS_ERROR)
|
||||
return -EIO;
|
||||
drbd_md_sync(mdev);
|
||||
} else {
|
||||
@@ -3647,7 +3647,7 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
|
||||
drbd_send_sizes(mdev, 0, ddsf);
|
||||
}
|
||||
if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
|
||||
(dd == grew && mdev->state.conn == C_CONNECTED)) {
|
||||
(dd == DS_GREW && mdev->state.conn == C_CONNECTED)) {
|
||||
if (mdev->state.pdsk >= D_INCONSISTENT &&
|
||||
mdev->state.disk >= D_INCONSISTENT) {
|
||||
if (ddsf & DDSF_NO_RESYNC)
|
||||
|
||||
@@ -1115,8 +1115,10 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
|
||||
drbd_thread_restart_nowait(&mdev->tconn->receiver);
|
||||
|
||||
/* Resume AL writing if we get a connection */
|
||||
if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
|
||||
if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
|
||||
drbd_resume_al(mdev);
|
||||
mdev->tconn->connect_cnt++;
|
||||
}
|
||||
|
||||
/* remember last attach time so request_timer_fn() won't
|
||||
* kill newly established sessions while we are still trying to thaw
|
||||
|
||||
@@ -63,7 +63,6 @@
|
||||
#include <linux/init.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/loop.h>
|
||||
#include <linux/compat.h>
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/freezer.h>
|
||||
@@ -76,6 +75,7 @@
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/falloc.h>
|
||||
#include "loop.h"
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
|
||||
85
drivers/block/loop.h
Normal file
85
drivers/block/loop.h
Normal file
@@ -0,0 +1,85 @@
|
||||
/*
|
||||
* loop.h
|
||||
*
|
||||
* Written by Theodore Ts'o, 3/29/93.
|
||||
*
|
||||
* Copyright 1993 by Theodore Ts'o. Redistribution of this file is
|
||||
* permitted under the GNU General Public License.
|
||||
*/
|
||||
#ifndef _LINUX_LOOP_H
|
||||
#define _LINUX_LOOP_H
|
||||
|
||||
#include <linux/bio.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <uapi/linux/loop.h>
|
||||
|
||||
/* Possible states of device */
|
||||
enum {
|
||||
Lo_unbound,
|
||||
Lo_bound,
|
||||
Lo_rundown,
|
||||
};
|
||||
|
||||
struct loop_func_table;
|
||||
|
||||
struct loop_device {
|
||||
int lo_number;
|
||||
int lo_refcnt;
|
||||
loff_t lo_offset;
|
||||
loff_t lo_sizelimit;
|
||||
int lo_flags;
|
||||
int (*transfer)(struct loop_device *, int cmd,
|
||||
struct page *raw_page, unsigned raw_off,
|
||||
struct page *loop_page, unsigned loop_off,
|
||||
int size, sector_t real_block);
|
||||
char lo_file_name[LO_NAME_SIZE];
|
||||
char lo_crypt_name[LO_NAME_SIZE];
|
||||
char lo_encrypt_key[LO_KEY_SIZE];
|
||||
int lo_encrypt_key_size;
|
||||
struct loop_func_table *lo_encryption;
|
||||
__u32 lo_init[2];
|
||||
kuid_t lo_key_owner; /* Who set the key */
|
||||
int (*ioctl)(struct loop_device *, int cmd,
|
||||
unsigned long arg);
|
||||
|
||||
struct file * lo_backing_file;
|
||||
struct block_device *lo_device;
|
||||
unsigned lo_blocksize;
|
||||
void *key_data;
|
||||
|
||||
gfp_t old_gfp_mask;
|
||||
|
||||
spinlock_t lo_lock;
|
||||
struct bio_list lo_bio_list;
|
||||
unsigned int lo_bio_count;
|
||||
int lo_state;
|
||||
struct mutex lo_ctl_mutex;
|
||||
struct task_struct *lo_thread;
|
||||
wait_queue_head_t lo_event;
|
||||
/* wait queue for incoming requests */
|
||||
wait_queue_head_t lo_req_wait;
|
||||
|
||||
struct request_queue *lo_queue;
|
||||
struct gendisk *lo_disk;
|
||||
};
|
||||
|
||||
/* Support for loadable transfer modules */
|
||||
struct loop_func_table {
|
||||
int number; /* filter type */
|
||||
int (*transfer)(struct loop_device *lo, int cmd,
|
||||
struct page *raw_page, unsigned raw_off,
|
||||
struct page *loop_page, unsigned loop_off,
|
||||
int size, sector_t real_block);
|
||||
int (*init)(struct loop_device *, const struct loop_info64 *);
|
||||
/* release is called from loop_unregister_transfer or clr_fd */
|
||||
int (*release)(struct loop_device *);
|
||||
int (*ioctl)(struct loop_device *, int cmd, unsigned long arg);
|
||||
struct module *owner;
|
||||
};
|
||||
|
||||
int loop_register_transfer(struct loop_func_table *funcs);
|
||||
int loop_unregister_transfer(int number);
|
||||
|
||||
#endif
|
||||
@@ -4087,7 +4087,8 @@ skip_create_disk:
|
||||
start_service_thread:
|
||||
sprintf(thd_name, "mtip_svc_thd_%02d", index);
|
||||
dd->mtip_svc_handler = kthread_create_on_node(mtip_service_thread,
|
||||
dd, dd->numa_node, thd_name);
|
||||
dd, dd->numa_node, "%s",
|
||||
thd_name);
|
||||
|
||||
if (IS_ERR(dd->mtip_svc_handler)) {
|
||||
dev_err(&dd->pdev->dev, "service thread failed to start\n");
|
||||
|
||||
@@ -623,8 +623,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
|
||||
if (!nbd->sock)
|
||||
return -EINVAL;
|
||||
|
||||
nbd->disconnect = 1;
|
||||
|
||||
nbd_send_req(nbd, &sreq);
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
case NBD_CLEAR_SOCK: {
|
||||
@@ -654,6 +656,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
|
||||
nbd->sock = SOCKET_I(inode);
|
||||
if (max_part > 0)
|
||||
bdev->bd_invalidated = 1;
|
||||
nbd->disconnect = 0; /* we're connected now */
|
||||
return 0;
|
||||
} else {
|
||||
fput(file);
|
||||
@@ -714,7 +717,8 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
|
||||
else
|
||||
blk_queue_flush(nbd->disk->queue, 0);
|
||||
|
||||
thread = kthread_create(nbd_thread, nbd, nbd->disk->disk_name);
|
||||
thread = kthread_create(nbd_thread, nbd, "%s",
|
||||
nbd->disk->disk_name);
|
||||
if (IS_ERR(thread)) {
|
||||
mutex_lock(&nbd->tx_lock);
|
||||
return PTR_ERR(thread);
|
||||
@@ -742,6 +746,8 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
|
||||
set_capacity(nbd->disk, 0);
|
||||
if (max_part > 0)
|
||||
ioctl_by_bdev(bdev, BLKRRPART, 0);
|
||||
if (nbd->disconnect) /* user requested, ignore socket errors */
|
||||
return 0;
|
||||
return nbd->harderror;
|
||||
}
|
||||
|
||||
@@ -750,7 +756,6 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
|
||||
* This is for compatibility only. The queue is always cleared
|
||||
* by NBD_DO_IT or NBD_CLEAR_SOCK.
|
||||
*/
|
||||
BUG_ON(!nbd->sock && !list_empty(&nbd->queue_head));
|
||||
return 0;
|
||||
|
||||
case NBD_PRINT_DEBUG:
|
||||
|
||||
@@ -31,6 +31,8 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/seq_file.h>
|
||||
|
||||
#include <linux/genhd.h>
|
||||
#include <linux/idr.h>
|
||||
@@ -39,8 +41,9 @@
|
||||
#include "rsxx_cfg.h"
|
||||
|
||||
#define NO_LEGACY 0
|
||||
#define SYNC_START_TIMEOUT (10 * 60) /* 10 minutes */
|
||||
|
||||
MODULE_DESCRIPTION("IBM FlashSystem 70/80 PCIe SSD Device Driver");
|
||||
MODULE_DESCRIPTION("IBM Flash Adapter 900GB Full Height Device Driver");
|
||||
MODULE_AUTHOR("Joshua Morris/Philip Kelleher, IBM");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_VERSION(DRIVER_VERSION);
|
||||
@@ -49,9 +52,282 @@ static unsigned int force_legacy = NO_LEGACY;
|
||||
module_param(force_legacy, uint, 0444);
|
||||
MODULE_PARM_DESC(force_legacy, "Force the use of legacy type PCI interrupts");
|
||||
|
||||
static unsigned int sync_start = 1;
|
||||
module_param(sync_start, uint, 0444);
|
||||
MODULE_PARM_DESC(sync_start, "On by Default: Driver load will not complete "
|
||||
"until the card startup has completed.");
|
||||
|
||||
static DEFINE_IDA(rsxx_disk_ida);
|
||||
static DEFINE_SPINLOCK(rsxx_ida_lock);
|
||||
|
||||
/* --------------------Debugfs Setup ------------------- */
|
||||
|
||||
struct rsxx_cram {
|
||||
u32 f_pos;
|
||||
u32 offset;
|
||||
void *i_private;
|
||||
};
|
||||
|
||||
static int rsxx_attr_pci_regs_show(struct seq_file *m, void *p)
|
||||
{
|
||||
struct rsxx_cardinfo *card = m->private;
|
||||
|
||||
seq_printf(m, "HWID 0x%08x\n",
|
||||
ioread32(card->regmap + HWID));
|
||||
seq_printf(m, "SCRATCH 0x%08x\n",
|
||||
ioread32(card->regmap + SCRATCH));
|
||||
seq_printf(m, "IER 0x%08x\n",
|
||||
ioread32(card->regmap + IER));
|
||||
seq_printf(m, "IPR 0x%08x\n",
|
||||
ioread32(card->regmap + IPR));
|
||||
seq_printf(m, "CREG_CMD 0x%08x\n",
|
||||
ioread32(card->regmap + CREG_CMD));
|
||||
seq_printf(m, "CREG_ADD 0x%08x\n",
|
||||
ioread32(card->regmap + CREG_ADD));
|
||||
seq_printf(m, "CREG_CNT 0x%08x\n",
|
||||
ioread32(card->regmap + CREG_CNT));
|
||||
seq_printf(m, "CREG_STAT 0x%08x\n",
|
||||
ioread32(card->regmap + CREG_STAT));
|
||||
seq_printf(m, "CREG_DATA0 0x%08x\n",
|
||||
ioread32(card->regmap + CREG_DATA0));
|
||||
seq_printf(m, "CREG_DATA1 0x%08x\n",
|
||||
ioread32(card->regmap + CREG_DATA1));
|
||||
seq_printf(m, "CREG_DATA2 0x%08x\n",
|
||||
ioread32(card->regmap + CREG_DATA2));
|
||||
seq_printf(m, "CREG_DATA3 0x%08x\n",
|
||||
ioread32(card->regmap + CREG_DATA3));
|
||||
seq_printf(m, "CREG_DATA4 0x%08x\n",
|
||||
ioread32(card->regmap + CREG_DATA4));
|
||||
seq_printf(m, "CREG_DATA5 0x%08x\n",
|
||||
ioread32(card->regmap + CREG_DATA5));
|
||||
seq_printf(m, "CREG_DATA6 0x%08x\n",
|
||||
ioread32(card->regmap + CREG_DATA6));
|
||||
seq_printf(m, "CREG_DATA7 0x%08x\n",
|
||||
ioread32(card->regmap + CREG_DATA7));
|
||||
seq_printf(m, "INTR_COAL 0x%08x\n",
|
||||
ioread32(card->regmap + INTR_COAL));
|
||||
seq_printf(m, "HW_ERROR 0x%08x\n",
|
||||
ioread32(card->regmap + HW_ERROR));
|
||||
seq_printf(m, "DEBUG0 0x%08x\n",
|
||||
ioread32(card->regmap + PCI_DEBUG0));
|
||||
seq_printf(m, "DEBUG1 0x%08x\n",
|
||||
ioread32(card->regmap + PCI_DEBUG1));
|
||||
seq_printf(m, "DEBUG2 0x%08x\n",
|
||||
ioread32(card->regmap + PCI_DEBUG2));
|
||||
seq_printf(m, "DEBUG3 0x%08x\n",
|
||||
ioread32(card->regmap + PCI_DEBUG3));
|
||||
seq_printf(m, "DEBUG4 0x%08x\n",
|
||||
ioread32(card->regmap + PCI_DEBUG4));
|
||||
seq_printf(m, "DEBUG5 0x%08x\n",
|
||||
ioread32(card->regmap + PCI_DEBUG5));
|
||||
seq_printf(m, "DEBUG6 0x%08x\n",
|
||||
ioread32(card->regmap + PCI_DEBUG6));
|
||||
seq_printf(m, "DEBUG7 0x%08x\n",
|
||||
ioread32(card->regmap + PCI_DEBUG7));
|
||||
seq_printf(m, "RECONFIG 0x%08x\n",
|
||||
ioread32(card->regmap + PCI_RECONFIG));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rsxx_attr_stats_show(struct seq_file *m, void *p)
|
||||
{
|
||||
struct rsxx_cardinfo *card = m->private;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < card->n_targets; i++) {
|
||||
seq_printf(m, "Ctrl %d CRC Errors = %d\n",
|
||||
i, card->ctrl[i].stats.crc_errors);
|
||||
seq_printf(m, "Ctrl %d Hard Errors = %d\n",
|
||||
i, card->ctrl[i].stats.hard_errors);
|
||||
seq_printf(m, "Ctrl %d Soft Errors = %d\n",
|
||||
i, card->ctrl[i].stats.soft_errors);
|
||||
seq_printf(m, "Ctrl %d Writes Issued = %d\n",
|
||||
i, card->ctrl[i].stats.writes_issued);
|
||||
seq_printf(m, "Ctrl %d Writes Failed = %d\n",
|
||||
i, card->ctrl[i].stats.writes_failed);
|
||||
seq_printf(m, "Ctrl %d Reads Issued = %d\n",
|
||||
i, card->ctrl[i].stats.reads_issued);
|
||||
seq_printf(m, "Ctrl %d Reads Failed = %d\n",
|
||||
i, card->ctrl[i].stats.reads_failed);
|
||||
seq_printf(m, "Ctrl %d Reads Retried = %d\n",
|
||||
i, card->ctrl[i].stats.reads_retried);
|
||||
seq_printf(m, "Ctrl %d Discards Issued = %d\n",
|
||||
i, card->ctrl[i].stats.discards_issued);
|
||||
seq_printf(m, "Ctrl %d Discards Failed = %d\n",
|
||||
i, card->ctrl[i].stats.discards_failed);
|
||||
seq_printf(m, "Ctrl %d DMA SW Errors = %d\n",
|
||||
i, card->ctrl[i].stats.dma_sw_err);
|
||||
seq_printf(m, "Ctrl %d DMA HW Faults = %d\n",
|
||||
i, card->ctrl[i].stats.dma_hw_fault);
|
||||
seq_printf(m, "Ctrl %d DMAs Cancelled = %d\n",
|
||||
i, card->ctrl[i].stats.dma_cancelled);
|
||||
seq_printf(m, "Ctrl %d SW Queue Depth = %d\n",
|
||||
i, card->ctrl[i].stats.sw_q_depth);
|
||||
seq_printf(m, "Ctrl %d HW Queue Depth = %d\n",
|
||||
i, atomic_read(&card->ctrl[i].stats.hw_q_depth));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rsxx_attr_stats_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, rsxx_attr_stats_show, inode->i_private);
|
||||
}
|
||||
|
||||
static int rsxx_attr_pci_regs_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, rsxx_attr_pci_regs_show, inode->i_private);
|
||||
}
|
||||
|
||||
static ssize_t rsxx_cram_read(struct file *fp, char __user *ubuf,
|
||||
size_t cnt, loff_t *ppos)
|
||||
{
|
||||
struct rsxx_cram *info = fp->private_data;
|
||||
struct rsxx_cardinfo *card = info->i_private;
|
||||
char *buf;
|
||||
int st;
|
||||
|
||||
buf = kzalloc(sizeof(*buf) * cnt, GFP_KERNEL);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
|
||||
info->f_pos = (u32)*ppos + info->offset;
|
||||
|
||||
st = rsxx_creg_read(card, CREG_ADD_CRAM + info->f_pos, cnt, buf, 1);
|
||||
if (st)
|
||||
return st;
|
||||
|
||||
st = copy_to_user(ubuf, buf, cnt);
|
||||
if (st)
|
||||
return st;
|
||||
|
||||
info->offset += cnt;
|
||||
|
||||
kfree(buf);
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
static ssize_t rsxx_cram_write(struct file *fp, const char __user *ubuf,
|
||||
size_t cnt, loff_t *ppos)
|
||||
{
|
||||
struct rsxx_cram *info = fp->private_data;
|
||||
struct rsxx_cardinfo *card = info->i_private;
|
||||
char *buf;
|
||||
int st;
|
||||
|
||||
buf = kzalloc(sizeof(*buf) * cnt, GFP_KERNEL);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
|
||||
st = copy_from_user(buf, ubuf, cnt);
|
||||
if (st)
|
||||
return st;
|
||||
|
||||
info->f_pos = (u32)*ppos + info->offset;
|
||||
|
||||
st = rsxx_creg_write(card, CREG_ADD_CRAM + info->f_pos, cnt, buf, 1);
|
||||
if (st)
|
||||
return st;
|
||||
|
||||
info->offset += cnt;
|
||||
|
||||
kfree(buf);
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
static int rsxx_cram_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct rsxx_cram *info = kzalloc(sizeof(*info), GFP_KERNEL);
|
||||
if (!info)
|
||||
return -ENOMEM;
|
||||
|
||||
info->i_private = inode->i_private;
|
||||
info->f_pos = file->f_pos;
|
||||
file->private_data = info;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rsxx_cram_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct rsxx_cram *info = file->private_data;
|
||||
|
||||
if (!info)
|
||||
return 0;
|
||||
|
||||
kfree(info);
|
||||
file->private_data = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct file_operations debugfs_cram_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = rsxx_cram_open,
|
||||
.read = rsxx_cram_read,
|
||||
.write = rsxx_cram_write,
|
||||
.release = rsxx_cram_release,
|
||||
};
|
||||
|
||||
static const struct file_operations debugfs_stats_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = rsxx_attr_stats_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static const struct file_operations debugfs_pci_regs_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = rsxx_attr_pci_regs_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static void rsxx_debugfs_dev_new(struct rsxx_cardinfo *card)
|
||||
{
|
||||
struct dentry *debugfs_stats;
|
||||
struct dentry *debugfs_pci_regs;
|
||||
struct dentry *debugfs_cram;
|
||||
|
||||
card->debugfs_dir = debugfs_create_dir(card->gendisk->disk_name, NULL);
|
||||
if (IS_ERR_OR_NULL(card->debugfs_dir))
|
||||
goto failed_debugfs_dir;
|
||||
|
||||
debugfs_stats = debugfs_create_file("stats", S_IRUGO,
|
||||
card->debugfs_dir, card,
|
||||
&debugfs_stats_fops);
|
||||
if (IS_ERR_OR_NULL(debugfs_stats))
|
||||
goto failed_debugfs_stats;
|
||||
|
||||
debugfs_pci_regs = debugfs_create_file("pci_regs", S_IRUGO,
|
||||
card->debugfs_dir, card,
|
||||
&debugfs_pci_regs_fops);
|
||||
if (IS_ERR_OR_NULL(debugfs_pci_regs))
|
||||
goto failed_debugfs_pci_regs;
|
||||
|
||||
debugfs_cram = debugfs_create_file("cram", S_IRUGO | S_IWUSR,
|
||||
card->debugfs_dir, card,
|
||||
&debugfs_cram_fops);
|
||||
if (IS_ERR_OR_NULL(debugfs_cram))
|
||||
goto failed_debugfs_cram;
|
||||
|
||||
return;
|
||||
failed_debugfs_cram:
|
||||
debugfs_remove(debugfs_pci_regs);
|
||||
failed_debugfs_pci_regs:
|
||||
debugfs_remove(debugfs_stats);
|
||||
failed_debugfs_stats:
|
||||
debugfs_remove(card->debugfs_dir);
|
||||
failed_debugfs_dir:
|
||||
card->debugfs_dir = NULL;
|
||||
}
|
||||
|
||||
/*----------------- Interrupt Control & Handling -------------------*/
|
||||
|
||||
static void rsxx_mask_interrupts(struct rsxx_cardinfo *card)
|
||||
@@ -163,12 +439,13 @@ static irqreturn_t rsxx_isr(int irq, void *pdata)
|
||||
}
|
||||
|
||||
if (isr & CR_INTR_CREG) {
|
||||
schedule_work(&card->creg_ctrl.done_work);
|
||||
queue_work(card->creg_ctrl.creg_wq,
|
||||
&card->creg_ctrl.done_work);
|
||||
handled++;
|
||||
}
|
||||
|
||||
if (isr & CR_INTR_EVENT) {
|
||||
schedule_work(&card->event_work);
|
||||
queue_work(card->event_wq, &card->event_work);
|
||||
rsxx_disable_ier_and_isr(card, CR_INTR_EVENT);
|
||||
handled++;
|
||||
}
|
||||
@@ -329,7 +606,7 @@ static int rsxx_eeh_frozen(struct pci_dev *dev)
|
||||
int i;
|
||||
int st;
|
||||
|
||||
dev_warn(&dev->dev, "IBM FlashSystem PCI: preparing for slot reset.\n");
|
||||
dev_warn(&dev->dev, "IBM Flash Adapter PCI: preparing for slot reset.\n");
|
||||
|
||||
card->eeh_state = 1;
|
||||
rsxx_mask_interrupts(card);
|
||||
@@ -367,15 +644,26 @@ static void rsxx_eeh_failure(struct pci_dev *dev)
|
||||
{
|
||||
struct rsxx_cardinfo *card = pci_get_drvdata(dev);
|
||||
int i;
|
||||
int cnt = 0;
|
||||
|
||||
dev_err(&dev->dev, "IBM FlashSystem PCI: disabling failed card.\n");
|
||||
dev_err(&dev->dev, "IBM Flash Adapter PCI: disabling failed card.\n");
|
||||
|
||||
card->eeh_state = 1;
|
||||
card->halt = 1;
|
||||
|
||||
for (i = 0; i < card->n_targets; i++)
|
||||
del_timer_sync(&card->ctrl[i].activity_timer);
|
||||
for (i = 0; i < card->n_targets; i++) {
|
||||
spin_lock_bh(&card->ctrl[i].queue_lock);
|
||||
cnt = rsxx_cleanup_dma_queue(&card->ctrl[i],
|
||||
&card->ctrl[i].queue);
|
||||
spin_unlock_bh(&card->ctrl[i].queue_lock);
|
||||
|
||||
rsxx_eeh_cancel_dmas(card);
|
||||
cnt += rsxx_dma_cancel(&card->ctrl[i]);
|
||||
|
||||
if (cnt)
|
||||
dev_info(CARD_TO_DEV(card),
|
||||
"Freed %d queued DMAs on channel %d\n",
|
||||
cnt, card->ctrl[i].id);
|
||||
}
|
||||
}
|
||||
|
||||
static int rsxx_eeh_fifo_flush_poll(struct rsxx_cardinfo *card)
|
||||
@@ -432,7 +720,7 @@ static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev)
|
||||
int st;
|
||||
|
||||
dev_warn(&dev->dev,
|
||||
"IBM FlashSystem PCI: recovering from slot reset.\n");
|
||||
"IBM Flash Adapter PCI: recovering from slot reset.\n");
|
||||
|
||||
st = pci_enable_device(dev);
|
||||
if (st)
|
||||
@@ -485,7 +773,7 @@ static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev)
|
||||
&card->ctrl[i].issue_dma_work);
|
||||
}
|
||||
|
||||
dev_info(&dev->dev, "IBM FlashSystem PCI: recovery complete.\n");
|
||||
dev_info(&dev->dev, "IBM Flash Adapter PCI: recovery complete.\n");
|
||||
|
||||
return PCI_ERS_RESULT_RECOVERED;
|
||||
|
||||
@@ -528,6 +816,7 @@ static int rsxx_pci_probe(struct pci_dev *dev,
|
||||
{
|
||||
struct rsxx_cardinfo *card;
|
||||
int st;
|
||||
unsigned int sync_timeout;
|
||||
|
||||
dev_info(&dev->dev, "PCI-Flash SSD discovered\n");
|
||||
|
||||
@@ -610,7 +899,11 @@ static int rsxx_pci_probe(struct pci_dev *dev,
|
||||
}
|
||||
|
||||
/************* Setup Processor Command Interface *************/
|
||||
rsxx_creg_setup(card);
|
||||
st = rsxx_creg_setup(card);
|
||||
if (st) {
|
||||
dev_err(CARD_TO_DEV(card), "Failed to setup creg interface.\n");
|
||||
goto failed_creg_setup;
|
||||
}
|
||||
|
||||
spin_lock_irq(&card->irq_lock);
|
||||
rsxx_enable_ier_and_isr(card, CR_INTR_CREG);
|
||||
@@ -650,6 +943,12 @@ static int rsxx_pci_probe(struct pci_dev *dev,
|
||||
}
|
||||
|
||||
/************* Setup Card Event Handler *************/
|
||||
card->event_wq = create_singlethread_workqueue(DRIVER_NAME"_event");
|
||||
if (!card->event_wq) {
|
||||
dev_err(CARD_TO_DEV(card), "Failed card event setup.\n");
|
||||
goto failed_event_handler;
|
||||
}
|
||||
|
||||
INIT_WORK(&card->event_work, card_event_handler);
|
||||
|
||||
st = rsxx_setup_dev(card);
|
||||
@@ -676,6 +975,33 @@ static int rsxx_pci_probe(struct pci_dev *dev,
|
||||
if (st)
|
||||
dev_crit(CARD_TO_DEV(card),
|
||||
"Failed issuing card startup\n");
|
||||
if (sync_start) {
|
||||
sync_timeout = SYNC_START_TIMEOUT;
|
||||
|
||||
dev_info(CARD_TO_DEV(card),
|
||||
"Waiting for card to startup\n");
|
||||
|
||||
do {
|
||||
ssleep(1);
|
||||
sync_timeout--;
|
||||
|
||||
rsxx_get_card_state(card, &card->state);
|
||||
} while (sync_timeout &&
|
||||
(card->state == CARD_STATE_STARTING));
|
||||
|
||||
if (card->state == CARD_STATE_STARTING) {
|
||||
dev_warn(CARD_TO_DEV(card),
|
||||
"Card startup timed out\n");
|
||||
card->size8 = 0;
|
||||
} else {
|
||||
dev_info(CARD_TO_DEV(card),
|
||||
"card state: %s\n",
|
||||
rsxx_card_state_to_str(card->state));
|
||||
st = rsxx_get_card_size8(card, &card->size8);
|
||||
if (st)
|
||||
card->size8 = 0;
|
||||
}
|
||||
}
|
||||
} else if (card->state == CARD_STATE_GOOD ||
|
||||
card->state == CARD_STATE_RD_ONLY_FAULT) {
|
||||
st = rsxx_get_card_size8(card, &card->size8);
|
||||
@@ -685,12 +1011,21 @@ static int rsxx_pci_probe(struct pci_dev *dev,
|
||||
|
||||
rsxx_attach_dev(card);
|
||||
|
||||
/************* Setup Debugfs *************/
|
||||
rsxx_debugfs_dev_new(card);
|
||||
|
||||
return 0;
|
||||
|
||||
failed_create_dev:
|
||||
destroy_workqueue(card->event_wq);
|
||||
card->event_wq = NULL;
|
||||
failed_event_handler:
|
||||
rsxx_dma_destroy(card);
|
||||
failed_dma_setup:
|
||||
failed_compatiblity_check:
|
||||
destroy_workqueue(card->creg_ctrl.creg_wq);
|
||||
card->creg_ctrl.creg_wq = NULL;
|
||||
failed_creg_setup:
|
||||
spin_lock_irq(&card->irq_lock);
|
||||
rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
|
||||
spin_unlock_irq(&card->irq_lock);
|
||||
@@ -756,6 +1091,8 @@ static void rsxx_pci_remove(struct pci_dev *dev)
|
||||
/* Prevent work_structs from re-queuing themselves. */
|
||||
card->halt = 1;
|
||||
|
||||
debugfs_remove_recursive(card->debugfs_dir);
|
||||
|
||||
free_irq(dev->irq, card);
|
||||
|
||||
if (!force_legacy)
|
||||
|
||||
@@ -431,6 +431,15 @@ static int __issue_creg_rw(struct rsxx_cardinfo *card,
|
||||
*hw_stat = completion.creg_status;
|
||||
|
||||
if (completion.st) {
|
||||
/*
|
||||
* This read is needed to verify that there has not been any
|
||||
* extreme errors that might have occurred, i.e. EEH. The
|
||||
* function iowrite32 will not detect EEH errors, so it is
|
||||
* necessary that we recover if such an error is the reason
|
||||
* for the timeout. This is a dummy read.
|
||||
*/
|
||||
ioread32(card->regmap + SCRATCH);
|
||||
|
||||
dev_warn(CARD_TO_DEV(card),
|
||||
"creg command failed(%d x%08x)\n",
|
||||
completion.st, addr);
|
||||
@@ -727,6 +736,11 @@ int rsxx_creg_setup(struct rsxx_cardinfo *card)
|
||||
{
|
||||
card->creg_ctrl.active_cmd = NULL;
|
||||
|
||||
card->creg_ctrl.creg_wq =
|
||||
create_singlethread_workqueue(DRIVER_NAME"_creg");
|
||||
if (!card->creg_ctrl.creg_wq)
|
||||
return -ENOMEM;
|
||||
|
||||
INIT_WORK(&card->creg_ctrl.done_work, creg_cmd_done);
|
||||
mutex_init(&card->creg_ctrl.reset_lock);
|
||||
INIT_LIST_HEAD(&card->creg_ctrl.queue);
|
||||
|
||||
@@ -155,7 +155,8 @@ static void bio_dma_done_cb(struct rsxx_cardinfo *card,
|
||||
atomic_set(&meta->error, 1);
|
||||
|
||||
if (atomic_dec_and_test(&meta->pending_dmas)) {
|
||||
disk_stats_complete(card, meta->bio, meta->start_time);
|
||||
if (!card->eeh_state && card->gendisk)
|
||||
disk_stats_complete(card, meta->bio, meta->start_time);
|
||||
|
||||
bio_endio(meta->bio, atomic_read(&meta->error) ? -EIO : 0);
|
||||
kmem_cache_free(bio_meta_pool, meta);
|
||||
@@ -170,6 +171,12 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio)
|
||||
|
||||
might_sleep();
|
||||
|
||||
if (!card)
|
||||
goto req_err;
|
||||
|
||||
if (bio->bi_sector + (bio->bi_size >> 9) > get_capacity(card->gendisk))
|
||||
goto req_err;
|
||||
|
||||
if (unlikely(card->halt)) {
|
||||
st = -EFAULT;
|
||||
goto req_err;
|
||||
@@ -196,7 +203,8 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio)
|
||||
atomic_set(&bio_meta->pending_dmas, 0);
|
||||
bio_meta->start_time = jiffies;
|
||||
|
||||
disk_stats_start(card, bio);
|
||||
if (!unlikely(card->halt))
|
||||
disk_stats_start(card, bio);
|
||||
|
||||
dev_dbg(CARD_TO_DEV(card), "BIO[%c]: meta: %p addr8: x%llx size: %d\n",
|
||||
bio_data_dir(bio) ? 'W' : 'R', bio_meta,
|
||||
@@ -225,24 +233,6 @@ static bool rsxx_discard_supported(struct rsxx_cardinfo *card)
|
||||
return (pci_rev >= RSXX_DISCARD_SUPPORT);
|
||||
}
|
||||
|
||||
static unsigned short rsxx_get_logical_block_size(
|
||||
struct rsxx_cardinfo *card)
|
||||
{
|
||||
u32 capabilities = 0;
|
||||
int st;
|
||||
|
||||
st = rsxx_get_card_capabilities(card, &capabilities);
|
||||
if (st)
|
||||
dev_warn(CARD_TO_DEV(card),
|
||||
"Failed reading card capabilities register\n");
|
||||
|
||||
/* Earlier firmware did not have support for 512 byte accesses */
|
||||
if (capabilities & CARD_CAP_SUBPAGE_WRITES)
|
||||
return 512;
|
||||
else
|
||||
return RSXX_HW_BLK_SIZE;
|
||||
}
|
||||
|
||||
int rsxx_attach_dev(struct rsxx_cardinfo *card)
|
||||
{
|
||||
mutex_lock(&card->dev_lock);
|
||||
@@ -305,7 +295,7 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
blk_size = rsxx_get_logical_block_size(card);
|
||||
blk_size = card->config.data.block_size;
|
||||
|
||||
blk_queue_make_request(card->queue, rsxx_make_request);
|
||||
blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY);
|
||||
@@ -347,6 +337,7 @@ void rsxx_destroy_dev(struct rsxx_cardinfo *card)
|
||||
card->gendisk = NULL;
|
||||
|
||||
blk_cleanup_queue(card->queue);
|
||||
card->queue->queuedata = NULL;
|
||||
unregister_blkdev(card->major, DRIVER_NAME);
|
||||
}
|
||||
|
||||
|
||||
@@ -245,6 +245,22 @@ static void rsxx_complete_dma(struct rsxx_dma_ctrl *ctrl,
|
||||
kmem_cache_free(rsxx_dma_pool, dma);
|
||||
}
|
||||
|
||||
int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl,
|
||||
struct list_head *q)
|
||||
{
|
||||
struct rsxx_dma *dma;
|
||||
struct rsxx_dma *tmp;
|
||||
int cnt = 0;
|
||||
|
||||
list_for_each_entry_safe(dma, tmp, q, list) {
|
||||
list_del(&dma->list);
|
||||
rsxx_complete_dma(ctrl, dma, DMA_CANCELLED);
|
||||
cnt++;
|
||||
}
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
static void rsxx_requeue_dma(struct rsxx_dma_ctrl *ctrl,
|
||||
struct rsxx_dma *dma)
|
||||
{
|
||||
@@ -252,9 +268,10 @@ static void rsxx_requeue_dma(struct rsxx_dma_ctrl *ctrl,
|
||||
* Requeued DMAs go to the front of the queue so they are issued
|
||||
* first.
|
||||
*/
|
||||
spin_lock(&ctrl->queue_lock);
|
||||
spin_lock_bh(&ctrl->queue_lock);
|
||||
ctrl->stats.sw_q_depth++;
|
||||
list_add(&dma->list, &ctrl->queue);
|
||||
spin_unlock(&ctrl->queue_lock);
|
||||
spin_unlock_bh(&ctrl->queue_lock);
|
||||
}
|
||||
|
||||
static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl,
|
||||
@@ -329,6 +346,7 @@ static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl,
|
||||
static void dma_engine_stalled(unsigned long data)
|
||||
{
|
||||
struct rsxx_dma_ctrl *ctrl = (struct rsxx_dma_ctrl *)data;
|
||||
int cnt;
|
||||
|
||||
if (atomic_read(&ctrl->stats.hw_q_depth) == 0 ||
|
||||
unlikely(ctrl->card->eeh_state))
|
||||
@@ -349,18 +367,28 @@ static void dma_engine_stalled(unsigned long data)
|
||||
"DMA channel %d has stalled, faulting interface.\n",
|
||||
ctrl->id);
|
||||
ctrl->card->dma_fault = 1;
|
||||
|
||||
/* Clean up the DMA queue */
|
||||
spin_lock(&ctrl->queue_lock);
|
||||
cnt = rsxx_cleanup_dma_queue(ctrl, &ctrl->queue);
|
||||
spin_unlock(&ctrl->queue_lock);
|
||||
|
||||
cnt += rsxx_dma_cancel(ctrl);
|
||||
|
||||
if (cnt)
|
||||
dev_info(CARD_TO_DEV(ctrl->card),
|
||||
"Freed %d queued DMAs on channel %d\n",
|
||||
cnt, ctrl->id);
|
||||
}
|
||||
}
|
||||
|
||||
static void rsxx_issue_dmas(struct work_struct *work)
|
||||
static void rsxx_issue_dmas(struct rsxx_dma_ctrl *ctrl)
|
||||
{
|
||||
struct rsxx_dma_ctrl *ctrl;
|
||||
struct rsxx_dma *dma;
|
||||
int tag;
|
||||
int cmds_pending = 0;
|
||||
struct hw_cmd *hw_cmd_buf;
|
||||
|
||||
ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work);
|
||||
hw_cmd_buf = ctrl->cmd.buf;
|
||||
|
||||
if (unlikely(ctrl->card->halt) ||
|
||||
@@ -368,22 +396,22 @@ static void rsxx_issue_dmas(struct work_struct *work)
|
||||
return;
|
||||
|
||||
while (1) {
|
||||
spin_lock(&ctrl->queue_lock);
|
||||
spin_lock_bh(&ctrl->queue_lock);
|
||||
if (list_empty(&ctrl->queue)) {
|
||||
spin_unlock(&ctrl->queue_lock);
|
||||
spin_unlock_bh(&ctrl->queue_lock);
|
||||
break;
|
||||
}
|
||||
spin_unlock(&ctrl->queue_lock);
|
||||
spin_unlock_bh(&ctrl->queue_lock);
|
||||
|
||||
tag = pop_tracker(ctrl->trackers);
|
||||
if (tag == -1)
|
||||
break;
|
||||
|
||||
spin_lock(&ctrl->queue_lock);
|
||||
spin_lock_bh(&ctrl->queue_lock);
|
||||
dma = list_entry(ctrl->queue.next, struct rsxx_dma, list);
|
||||
list_del(&dma->list);
|
||||
ctrl->stats.sw_q_depth--;
|
||||
spin_unlock(&ctrl->queue_lock);
|
||||
spin_unlock_bh(&ctrl->queue_lock);
|
||||
|
||||
/*
|
||||
* This will catch any DMAs that slipped in right before the
|
||||
@@ -440,9 +468,8 @@ static void rsxx_issue_dmas(struct work_struct *work)
|
||||
}
|
||||
}
|
||||
|
||||
static void rsxx_dma_done(struct work_struct *work)
|
||||
static void rsxx_dma_done(struct rsxx_dma_ctrl *ctrl)
|
||||
{
|
||||
struct rsxx_dma_ctrl *ctrl;
|
||||
struct rsxx_dma *dma;
|
||||
unsigned long flags;
|
||||
u16 count;
|
||||
@@ -450,7 +477,6 @@ static void rsxx_dma_done(struct work_struct *work)
|
||||
u8 tag;
|
||||
struct hw_status *hw_st_buf;
|
||||
|
||||
ctrl = container_of(work, struct rsxx_dma_ctrl, dma_done_work);
|
||||
hw_st_buf = ctrl->status.buf;
|
||||
|
||||
if (unlikely(ctrl->card->halt) ||
|
||||
@@ -520,33 +546,32 @@ static void rsxx_dma_done(struct work_struct *work)
|
||||
rsxx_enable_ier(ctrl->card, CR_INTR_DMA(ctrl->id));
|
||||
spin_unlock_irqrestore(&ctrl->card->irq_lock, flags);
|
||||
|
||||
spin_lock(&ctrl->queue_lock);
|
||||
spin_lock_bh(&ctrl->queue_lock);
|
||||
if (ctrl->stats.sw_q_depth)
|
||||
queue_work(ctrl->issue_wq, &ctrl->issue_dma_work);
|
||||
spin_unlock(&ctrl->queue_lock);
|
||||
spin_unlock_bh(&ctrl->queue_lock);
|
||||
}
|
||||
|
||||
static int rsxx_cleanup_dma_queue(struct rsxx_cardinfo *card,
|
||||
struct list_head *q)
|
||||
static void rsxx_schedule_issue(struct work_struct *work)
|
||||
{
|
||||
struct rsxx_dma *dma;
|
||||
struct rsxx_dma *tmp;
|
||||
int cnt = 0;
|
||||
struct rsxx_dma_ctrl *ctrl;
|
||||
|
||||
list_for_each_entry_safe(dma, tmp, q, list) {
|
||||
list_del(&dma->list);
|
||||
ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work);
|
||||
|
||||
if (dma->dma_addr)
|
||||
pci_unmap_page(card->dev, dma->dma_addr,
|
||||
get_dma_size(dma),
|
||||
(dma->cmd == HW_CMD_BLK_WRITE) ?
|
||||
PCI_DMA_TODEVICE :
|
||||
PCI_DMA_FROMDEVICE);
|
||||
kmem_cache_free(rsxx_dma_pool, dma);
|
||||
cnt++;
|
||||
}
|
||||
mutex_lock(&ctrl->work_lock);
|
||||
rsxx_issue_dmas(ctrl);
|
||||
mutex_unlock(&ctrl->work_lock);
|
||||
}
|
||||
|
||||
return cnt;
|
||||
static void rsxx_schedule_done(struct work_struct *work)
|
||||
{
|
||||
struct rsxx_dma_ctrl *ctrl;
|
||||
|
||||
ctrl = container_of(work, struct rsxx_dma_ctrl, dma_done_work);
|
||||
|
||||
mutex_lock(&ctrl->work_lock);
|
||||
rsxx_dma_done(ctrl);
|
||||
mutex_unlock(&ctrl->work_lock);
|
||||
}
|
||||
|
||||
static int rsxx_queue_discard(struct rsxx_cardinfo *card,
|
||||
@@ -698,10 +723,10 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
|
||||
|
||||
for (i = 0; i < card->n_targets; i++) {
|
||||
if (!list_empty(&dma_list[i])) {
|
||||
spin_lock(&card->ctrl[i].queue_lock);
|
||||
spin_lock_bh(&card->ctrl[i].queue_lock);
|
||||
card->ctrl[i].stats.sw_q_depth += dma_cnt[i];
|
||||
list_splice_tail(&dma_list[i], &card->ctrl[i].queue);
|
||||
spin_unlock(&card->ctrl[i].queue_lock);
|
||||
spin_unlock_bh(&card->ctrl[i].queue_lock);
|
||||
|
||||
queue_work(card->ctrl[i].issue_wq,
|
||||
&card->ctrl[i].issue_dma_work);
|
||||
@@ -711,8 +736,11 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
|
||||
return 0;
|
||||
|
||||
bvec_err:
|
||||
for (i = 0; i < card->n_targets; i++)
|
||||
rsxx_cleanup_dma_queue(card, &dma_list[i]);
|
||||
for (i = 0; i < card->n_targets; i++) {
|
||||
spin_lock_bh(&card->ctrl[i].queue_lock);
|
||||
rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i]);
|
||||
spin_unlock_bh(&card->ctrl[i].queue_lock);
|
||||
}
|
||||
|
||||
return st;
|
||||
}
|
||||
@@ -780,6 +808,7 @@ static int rsxx_dma_ctrl_init(struct pci_dev *dev,
|
||||
spin_lock_init(&ctrl->trackers->lock);
|
||||
|
||||
spin_lock_init(&ctrl->queue_lock);
|
||||
mutex_init(&ctrl->work_lock);
|
||||
INIT_LIST_HEAD(&ctrl->queue);
|
||||
|
||||
setup_timer(&ctrl->activity_timer, dma_engine_stalled,
|
||||
@@ -793,8 +822,8 @@ static int rsxx_dma_ctrl_init(struct pci_dev *dev,
|
||||
if (!ctrl->done_wq)
|
||||
return -ENOMEM;
|
||||
|
||||
INIT_WORK(&ctrl->issue_dma_work, rsxx_issue_dmas);
|
||||
INIT_WORK(&ctrl->dma_done_work, rsxx_dma_done);
|
||||
INIT_WORK(&ctrl->issue_dma_work, rsxx_schedule_issue);
|
||||
INIT_WORK(&ctrl->dma_done_work, rsxx_schedule_done);
|
||||
|
||||
st = rsxx_hw_buffers_init(dev, ctrl);
|
||||
if (st)
|
||||
@@ -918,13 +947,30 @@ failed_dma_setup:
|
||||
return st;
|
||||
}
|
||||
|
||||
int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl)
|
||||
{
|
||||
struct rsxx_dma *dma;
|
||||
int i;
|
||||
int cnt = 0;
|
||||
|
||||
/* Clean up issued DMAs */
|
||||
for (i = 0; i < RSXX_MAX_OUTSTANDING_CMDS; i++) {
|
||||
dma = get_tracker_dma(ctrl->trackers, i);
|
||||
if (dma) {
|
||||
atomic_dec(&ctrl->stats.hw_q_depth);
|
||||
rsxx_complete_dma(ctrl, dma, DMA_CANCELLED);
|
||||
push_tracker(ctrl->trackers, i);
|
||||
cnt++;
|
||||
}
|
||||
}
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
void rsxx_dma_destroy(struct rsxx_cardinfo *card)
|
||||
{
|
||||
struct rsxx_dma_ctrl *ctrl;
|
||||
struct rsxx_dma *dma;
|
||||
int i, j;
|
||||
int cnt = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < card->n_targets; i++) {
|
||||
ctrl = &card->ctrl[i];
|
||||
@@ -943,33 +989,11 @@ void rsxx_dma_destroy(struct rsxx_cardinfo *card)
|
||||
del_timer_sync(&ctrl->activity_timer);
|
||||
|
||||
/* Clean up the DMA queue */
|
||||
spin_lock(&ctrl->queue_lock);
|
||||
cnt = rsxx_cleanup_dma_queue(card, &ctrl->queue);
|
||||
spin_unlock(&ctrl->queue_lock);
|
||||
spin_lock_bh(&ctrl->queue_lock);
|
||||
rsxx_cleanup_dma_queue(ctrl, &ctrl->queue);
|
||||
spin_unlock_bh(&ctrl->queue_lock);
|
||||
|
||||
if (cnt)
|
||||
dev_info(CARD_TO_DEV(card),
|
||||
"Freed %d queued DMAs on channel %d\n",
|
||||
cnt, i);
|
||||
|
||||
/* Clean up issued DMAs */
|
||||
for (j = 0; j < RSXX_MAX_OUTSTANDING_CMDS; j++) {
|
||||
dma = get_tracker_dma(ctrl->trackers, j);
|
||||
if (dma) {
|
||||
pci_unmap_page(card->dev, dma->dma_addr,
|
||||
get_dma_size(dma),
|
||||
(dma->cmd == HW_CMD_BLK_WRITE) ?
|
||||
PCI_DMA_TODEVICE :
|
||||
PCI_DMA_FROMDEVICE);
|
||||
kmem_cache_free(rsxx_dma_pool, dma);
|
||||
cnt++;
|
||||
}
|
||||
}
|
||||
|
||||
if (cnt)
|
||||
dev_info(CARD_TO_DEV(card),
|
||||
"Freed %d pending DMAs on channel %d\n",
|
||||
cnt, i);
|
||||
rsxx_dma_cancel(ctrl);
|
||||
|
||||
vfree(ctrl->trackers);
|
||||
|
||||
@@ -1013,7 +1037,7 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
|
||||
cnt++;
|
||||
}
|
||||
|
||||
spin_lock(&card->ctrl[i].queue_lock);
|
||||
spin_lock_bh(&card->ctrl[i].queue_lock);
|
||||
list_splice(&issued_dmas[i], &card->ctrl[i].queue);
|
||||
|
||||
atomic_sub(cnt, &card->ctrl[i].stats.hw_q_depth);
|
||||
@@ -1028,7 +1052,7 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
|
||||
PCI_DMA_TODEVICE :
|
||||
PCI_DMA_FROMDEVICE);
|
||||
}
|
||||
spin_unlock(&card->ctrl[i].queue_lock);
|
||||
spin_unlock_bh(&card->ctrl[i].queue_lock);
|
||||
}
|
||||
|
||||
kfree(issued_dmas);
|
||||
@@ -1036,30 +1060,13 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card)
|
||||
{
|
||||
struct rsxx_dma *dma;
|
||||
struct rsxx_dma *tmp;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < card->n_targets; i++) {
|
||||
spin_lock(&card->ctrl[i].queue_lock);
|
||||
list_for_each_entry_safe(dma, tmp, &card->ctrl[i].queue, list) {
|
||||
list_del(&dma->list);
|
||||
|
||||
rsxx_complete_dma(&card->ctrl[i], dma, DMA_CANCELLED);
|
||||
}
|
||||
spin_unlock(&card->ctrl[i].queue_lock);
|
||||
}
|
||||
}
|
||||
|
||||
int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card)
|
||||
{
|
||||
struct rsxx_dma *dma;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < card->n_targets; i++) {
|
||||
spin_lock(&card->ctrl[i].queue_lock);
|
||||
spin_lock_bh(&card->ctrl[i].queue_lock);
|
||||
list_for_each_entry(dma, &card->ctrl[i].queue, list) {
|
||||
dma->dma_addr = pci_map_page(card->dev, dma->page,
|
||||
dma->pg_off, get_dma_size(dma),
|
||||
@@ -1067,12 +1074,12 @@ int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card)
|
||||
PCI_DMA_TODEVICE :
|
||||
PCI_DMA_FROMDEVICE);
|
||||
if (!dma->dma_addr) {
|
||||
spin_unlock(&card->ctrl[i].queue_lock);
|
||||
spin_unlock_bh(&card->ctrl[i].queue_lock);
|
||||
kmem_cache_free(rsxx_dma_pool, dma);
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
spin_unlock(&card->ctrl[i].queue_lock);
|
||||
spin_unlock_bh(&card->ctrl[i].queue_lock);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -39,6 +39,7 @@
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/timer.h>
|
||||
#include <linux/ioctl.h>
|
||||
#include <linux/delay.h>
|
||||
|
||||
#include "rsxx.h"
|
||||
#include "rsxx_cfg.h"
|
||||
@@ -114,6 +115,7 @@ struct rsxx_dma_ctrl {
|
||||
struct timer_list activity_timer;
|
||||
struct dma_tracker_list *trackers;
|
||||
struct rsxx_dma_stats stats;
|
||||
struct mutex work_lock;
|
||||
};
|
||||
|
||||
struct rsxx_cardinfo {
|
||||
@@ -134,6 +136,7 @@ struct rsxx_cardinfo {
|
||||
spinlock_t lock;
|
||||
bool active;
|
||||
struct creg_cmd *active_cmd;
|
||||
struct workqueue_struct *creg_wq;
|
||||
struct work_struct done_work;
|
||||
struct list_head queue;
|
||||
unsigned int q_depth;
|
||||
@@ -154,6 +157,7 @@ struct rsxx_cardinfo {
|
||||
int buf_len;
|
||||
} log;
|
||||
|
||||
struct workqueue_struct *event_wq;
|
||||
struct work_struct event_work;
|
||||
unsigned int state;
|
||||
u64 size8;
|
||||
@@ -181,6 +185,8 @@ struct rsxx_cardinfo {
|
||||
|
||||
int n_targets;
|
||||
struct rsxx_dma_ctrl *ctrl;
|
||||
|
||||
struct dentry *debugfs_dir;
|
||||
};
|
||||
|
||||
enum rsxx_pci_regmap {
|
||||
@@ -283,6 +289,7 @@ enum rsxx_creg_addr {
|
||||
CREG_ADD_CAPABILITIES = 0x80001050,
|
||||
CREG_ADD_LOG = 0x80002000,
|
||||
CREG_ADD_NUM_TARGETS = 0x80003000,
|
||||
CREG_ADD_CRAM = 0xA0000000,
|
||||
CREG_ADD_CONFIG = 0xB0000000,
|
||||
};
|
||||
|
||||
@@ -372,6 +379,8 @@ typedef void (*rsxx_dma_cb)(struct rsxx_cardinfo *card,
|
||||
int rsxx_dma_setup(struct rsxx_cardinfo *card);
|
||||
void rsxx_dma_destroy(struct rsxx_cardinfo *card);
|
||||
int rsxx_dma_init(void);
|
||||
int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl, struct list_head *q);
|
||||
int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl);
|
||||
void rsxx_dma_cleanup(void);
|
||||
void rsxx_dma_queue_reset(struct rsxx_cardinfo *card);
|
||||
int rsxx_dma_configure(struct rsxx_cardinfo *card);
|
||||
@@ -382,7 +391,6 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
|
||||
void *cb_data);
|
||||
int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl);
|
||||
int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card);
|
||||
void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card);
|
||||
int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card);
|
||||
|
||||
/***** cregs.c *****/
|
||||
|
||||
@@ -893,7 +893,7 @@ static int swim_probe(struct platform_device *dev)
|
||||
|
||||
swim_base = ioremap(res->start, resource_size(res));
|
||||
if (!swim_base) {
|
||||
return -ENOMEM;
|
||||
ret = -ENOMEM;
|
||||
goto out_release_io;
|
||||
}
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ module_param(use_bio, bool, S_IRUGO);
|
||||
static int major;
|
||||
static DEFINE_IDA(vd_index_ida);
|
||||
|
||||
struct workqueue_struct *virtblk_wq;
|
||||
static struct workqueue_struct *virtblk_wq;
|
||||
|
||||
struct virtio_blk
|
||||
{
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -50,6 +50,19 @@
|
||||
__func__, __LINE__, ##args)
|
||||
|
||||
|
||||
/*
|
||||
* This is the maximum number of segments that would be allowed in indirect
|
||||
* requests. This value will also be passed to the frontend.
|
||||
*/
|
||||
#define MAX_INDIRECT_SEGMENTS 256
|
||||
|
||||
#define SEGS_PER_INDIRECT_FRAME \
|
||||
(PAGE_SIZE/sizeof(struct blkif_request_segment_aligned))
|
||||
#define MAX_INDIRECT_PAGES \
|
||||
((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
|
||||
#define INDIRECT_PAGES(_segs) \
|
||||
((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
|
||||
|
||||
/* Not a real protocol. Used to generate ring structs which contain
|
||||
* the elements common to all protocols only. This way we get a
|
||||
* compiler-checkable way to use common struct elements, so we can
|
||||
@@ -83,12 +96,31 @@ struct blkif_x86_32_request_other {
|
||||
uint64_t id; /* private guest value, echoed in resp */
|
||||
} __attribute__((__packed__));
|
||||
|
||||
struct blkif_x86_32_request_indirect {
|
||||
uint8_t indirect_op;
|
||||
uint16_t nr_segments;
|
||||
uint64_t id;
|
||||
blkif_sector_t sector_number;
|
||||
blkif_vdev_t handle;
|
||||
uint16_t _pad1;
|
||||
grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
|
||||
/*
|
||||
* The maximum number of indirect segments (and pages) that will
|
||||
* be used is determined by MAX_INDIRECT_SEGMENTS, this value
|
||||
* is also exported to the guest (via xenstore
|
||||
* feature-max-indirect-segments entry), so the frontend knows how
|
||||
* many indirect segments the backend supports.
|
||||
*/
|
||||
uint64_t _pad2; /* make it 64 byte aligned */
|
||||
} __attribute__((__packed__));
|
||||
|
||||
struct blkif_x86_32_request {
|
||||
uint8_t operation; /* BLKIF_OP_??? */
|
||||
union {
|
||||
struct blkif_x86_32_request_rw rw;
|
||||
struct blkif_x86_32_request_discard discard;
|
||||
struct blkif_x86_32_request_other other;
|
||||
struct blkif_x86_32_request_indirect indirect;
|
||||
} u;
|
||||
} __attribute__((__packed__));
|
||||
|
||||
@@ -127,12 +159,32 @@ struct blkif_x86_64_request_other {
|
||||
uint64_t id; /* private guest value, echoed in resp */
|
||||
} __attribute__((__packed__));
|
||||
|
||||
struct blkif_x86_64_request_indirect {
|
||||
uint8_t indirect_op;
|
||||
uint16_t nr_segments;
|
||||
uint32_t _pad1; /* offsetof(blkif_..,u.indirect.id)==8 */
|
||||
uint64_t id;
|
||||
blkif_sector_t sector_number;
|
||||
blkif_vdev_t handle;
|
||||
uint16_t _pad2;
|
||||
grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
|
||||
/*
|
||||
* The maximum number of indirect segments (and pages) that will
|
||||
* be used is determined by MAX_INDIRECT_SEGMENTS, this value
|
||||
* is also exported to the guest (via xenstore
|
||||
* feature-max-indirect-segments entry), so the frontend knows how
|
||||
* many indirect segments the backend supports.
|
||||
*/
|
||||
uint32_t _pad3; /* make it 64 byte aligned */
|
||||
} __attribute__((__packed__));
|
||||
|
||||
struct blkif_x86_64_request {
|
||||
uint8_t operation; /* BLKIF_OP_??? */
|
||||
union {
|
||||
struct blkif_x86_64_request_rw rw;
|
||||
struct blkif_x86_64_request_discard discard;
|
||||
struct blkif_x86_64_request_other other;
|
||||
struct blkif_x86_64_request_indirect indirect;
|
||||
} u;
|
||||
} __attribute__((__packed__));
|
||||
|
||||
@@ -182,12 +234,26 @@ struct xen_vbd {
|
||||
|
||||
struct backend_info;
|
||||
|
||||
/* Number of available flags */
|
||||
#define PERSISTENT_GNT_FLAGS_SIZE 2
|
||||
/* This persistent grant is currently in use */
|
||||
#define PERSISTENT_GNT_ACTIVE 0
|
||||
/*
|
||||
* This persistent grant has been used, this flag is set when we remove the
|
||||
* PERSISTENT_GNT_ACTIVE, to know that this grant has been used recently.
|
||||
*/
|
||||
#define PERSISTENT_GNT_WAS_ACTIVE 1
|
||||
|
||||
/* Number of requests that we can fit in a ring */
|
||||
#define XEN_BLKIF_REQS 32
|
||||
|
||||
struct persistent_gnt {
|
||||
struct page *page;
|
||||
grant_ref_t gnt;
|
||||
grant_handle_t handle;
|
||||
DECLARE_BITMAP(flags, PERSISTENT_GNT_FLAGS_SIZE);
|
||||
struct rb_node node;
|
||||
struct list_head remove_node;
|
||||
};
|
||||
|
||||
struct xen_blkif {
|
||||
@@ -219,6 +285,23 @@ struct xen_blkif {
|
||||
/* tree to store persistent grants */
|
||||
struct rb_root persistent_gnts;
|
||||
unsigned int persistent_gnt_c;
|
||||
atomic_t persistent_gnt_in_use;
|
||||
unsigned long next_lru;
|
||||
|
||||
/* used by the kworker that offload work from the persistent purge */
|
||||
struct list_head persistent_purge_list;
|
||||
struct work_struct persistent_purge_work;
|
||||
|
||||
/* buffer of free pages to map grant refs */
|
||||
spinlock_t free_pages_lock;
|
||||
int free_pages_num;
|
||||
struct list_head free_pages;
|
||||
|
||||
/* List of all 'pending_req' available */
|
||||
struct list_head pending_free;
|
||||
/* And its spinlock. */
|
||||
spinlock_t pending_free_lock;
|
||||
wait_queue_head_t pending_free_wq;
|
||||
|
||||
/* statistics */
|
||||
unsigned long st_print;
|
||||
@@ -231,6 +314,41 @@ struct xen_blkif {
|
||||
unsigned long long st_wr_sect;
|
||||
|
||||
wait_queue_head_t waiting_to_free;
|
||||
/* Thread shutdown wait queue. */
|
||||
wait_queue_head_t shutdown_wq;
|
||||
};
|
||||
|
||||
struct seg_buf {
|
||||
unsigned long offset;
|
||||
unsigned int nsec;
|
||||
};
|
||||
|
||||
struct grant_page {
|
||||
struct page *page;
|
||||
struct persistent_gnt *persistent_gnt;
|
||||
grant_handle_t handle;
|
||||
grant_ref_t gref;
|
||||
};
|
||||
|
||||
/*
|
||||
* Each outstanding request that we've passed to the lower device layers has a
|
||||
* 'pending_req' allocated to it. Each buffer_head that completes decrements
|
||||
* the pendcnt towards zero. When it hits zero, the specified domain has a
|
||||
* response queued for it, with the saved 'id' passed back.
|
||||
*/
|
||||
struct pending_req {
|
||||
struct xen_blkif *blkif;
|
||||
u64 id;
|
||||
int nr_pages;
|
||||
atomic_t pendcnt;
|
||||
unsigned short operation;
|
||||
int status;
|
||||
struct list_head free_list;
|
||||
struct grant_page *segments[MAX_INDIRECT_SEGMENTS];
|
||||
/* Indirect descriptors */
|
||||
struct grant_page *indirect_pages[MAX_INDIRECT_PAGES];
|
||||
struct seg_buf seg[MAX_INDIRECT_SEGMENTS];
|
||||
struct bio *biolist[MAX_INDIRECT_SEGMENTS];
|
||||
};
|
||||
|
||||
|
||||
@@ -257,6 +375,7 @@ int xen_blkif_xenbus_init(void);
|
||||
|
||||
irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
|
||||
int xen_blkif_schedule(void *arg);
|
||||
int xen_blkif_purge_persistent(void *arg);
|
||||
|
||||
int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
|
||||
struct backend_info *be, int state);
|
||||
@@ -268,7 +387,7 @@ struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
|
||||
static inline void blkif_get_x86_32_req(struct blkif_request *dst,
|
||||
struct blkif_x86_32_request *src)
|
||||
{
|
||||
int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
|
||||
int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
|
||||
dst->operation = src->operation;
|
||||
switch (src->operation) {
|
||||
case BLKIF_OP_READ:
|
||||
@@ -291,6 +410,18 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
|
||||
dst->u.discard.sector_number = src->u.discard.sector_number;
|
||||
dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
|
||||
break;
|
||||
case BLKIF_OP_INDIRECT:
|
||||
dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
|
||||
dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
|
||||
dst->u.indirect.handle = src->u.indirect.handle;
|
||||
dst->u.indirect.id = src->u.indirect.id;
|
||||
dst->u.indirect.sector_number = src->u.indirect.sector_number;
|
||||
barrier();
|
||||
j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
|
||||
for (i = 0; i < j; i++)
|
||||
dst->u.indirect.indirect_grefs[i] =
|
||||
src->u.indirect.indirect_grefs[i];
|
||||
break;
|
||||
default:
|
||||
/*
|
||||
* Don't know how to translate this op. Only get the
|
||||
@@ -304,7 +435,7 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
|
||||
static inline void blkif_get_x86_64_req(struct blkif_request *dst,
|
||||
struct blkif_x86_64_request *src)
|
||||
{
|
||||
int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
|
||||
int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
|
||||
dst->operation = src->operation;
|
||||
switch (src->operation) {
|
||||
case BLKIF_OP_READ:
|
||||
@@ -327,6 +458,18 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst,
|
||||
dst->u.discard.sector_number = src->u.discard.sector_number;
|
||||
dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
|
||||
break;
|
||||
case BLKIF_OP_INDIRECT:
|
||||
dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
|
||||
dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
|
||||
dst->u.indirect.handle = src->u.indirect.handle;
|
||||
dst->u.indirect.id = src->u.indirect.id;
|
||||
dst->u.indirect.sector_number = src->u.indirect.sector_number;
|
||||
barrier();
|
||||
j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
|
||||
for (i = 0; i < j; i++)
|
||||
dst->u.indirect.indirect_grefs[i] =
|
||||
src->u.indirect.indirect_grefs[i];
|
||||
break;
|
||||
default:
|
||||
/*
|
||||
* Don't know how to translate this op. Only get the
|
||||
|
||||
@@ -93,17 +93,22 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
|
||||
}
|
||||
invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
|
||||
|
||||
blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, name);
|
||||
blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, "%s", name);
|
||||
if (IS_ERR(blkif->xenblkd)) {
|
||||
err = PTR_ERR(blkif->xenblkd);
|
||||
blkif->xenblkd = NULL;
|
||||
xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static struct xen_blkif *xen_blkif_alloc(domid_t domid)
|
||||
{
|
||||
struct xen_blkif *blkif;
|
||||
struct pending_req *req, *n;
|
||||
int i, j;
|
||||
|
||||
BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
|
||||
|
||||
blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL);
|
||||
if (!blkif)
|
||||
@@ -118,8 +123,57 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
|
||||
blkif->st_print = jiffies;
|
||||
init_waitqueue_head(&blkif->waiting_to_free);
|
||||
blkif->persistent_gnts.rb_node = NULL;
|
||||
spin_lock_init(&blkif->free_pages_lock);
|
||||
INIT_LIST_HEAD(&blkif->free_pages);
|
||||
blkif->free_pages_num = 0;
|
||||
atomic_set(&blkif->persistent_gnt_in_use, 0);
|
||||
|
||||
INIT_LIST_HEAD(&blkif->pending_free);
|
||||
|
||||
for (i = 0; i < XEN_BLKIF_REQS; i++) {
|
||||
req = kzalloc(sizeof(*req), GFP_KERNEL);
|
||||
if (!req)
|
||||
goto fail;
|
||||
list_add_tail(&req->free_list,
|
||||
&blkif->pending_free);
|
||||
for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
|
||||
req->segments[j] = kzalloc(sizeof(*req->segments[0]),
|
||||
GFP_KERNEL);
|
||||
if (!req->segments[j])
|
||||
goto fail;
|
||||
}
|
||||
for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
|
||||
req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
|
||||
GFP_KERNEL);
|
||||
if (!req->indirect_pages[j])
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
spin_lock_init(&blkif->pending_free_lock);
|
||||
init_waitqueue_head(&blkif->pending_free_wq);
|
||||
init_waitqueue_head(&blkif->shutdown_wq);
|
||||
|
||||
return blkif;
|
||||
|
||||
fail:
|
||||
list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
|
||||
list_del(&req->free_list);
|
||||
for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
|
||||
if (!req->segments[j])
|
||||
break;
|
||||
kfree(req->segments[j]);
|
||||
}
|
||||
for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
|
||||
if (!req->indirect_pages[j])
|
||||
break;
|
||||
kfree(req->indirect_pages[j]);
|
||||
}
|
||||
kfree(req);
|
||||
}
|
||||
|
||||
kmem_cache_free(xen_blkif_cachep, blkif);
|
||||
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
|
||||
@@ -178,6 +232,7 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
|
||||
{
|
||||
if (blkif->xenblkd) {
|
||||
kthread_stop(blkif->xenblkd);
|
||||
wake_up(&blkif->shutdown_wq);
|
||||
blkif->xenblkd = NULL;
|
||||
}
|
||||
|
||||
@@ -198,8 +253,28 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
|
||||
|
||||
static void xen_blkif_free(struct xen_blkif *blkif)
|
||||
{
|
||||
struct pending_req *req, *n;
|
||||
int i = 0, j;
|
||||
|
||||
if (!atomic_dec_and_test(&blkif->refcnt))
|
||||
BUG();
|
||||
|
||||
/* Check that there is no request in use */
|
||||
list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
|
||||
list_del(&req->free_list);
|
||||
|
||||
for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
|
||||
kfree(req->segments[j]);
|
||||
|
||||
for (j = 0; j < MAX_INDIRECT_PAGES; j++)
|
||||
kfree(req->indirect_pages[j]);
|
||||
|
||||
kfree(req);
|
||||
i++;
|
||||
}
|
||||
|
||||
WARN_ON(i != XEN_BLKIF_REQS);
|
||||
|
||||
kmem_cache_free(xen_blkif_cachep, blkif);
|
||||
}
|
||||
|
||||
@@ -678,6 +753,11 @@ again:
|
||||
dev->nodename);
|
||||
goto abort;
|
||||
}
|
||||
err = xenbus_printf(xbt, dev->nodename, "feature-max-indirect-segments", "%u",
|
||||
MAX_INDIRECT_SEGMENTS);
|
||||
if (err)
|
||||
dev_warn(&dev->dev, "writing %s/feature-max-indirect-segments (%d)",
|
||||
dev->nodename, err);
|
||||
|
||||
err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
|
||||
(unsigned long long)vbd_sz(&be->blkif->vbd));
|
||||
@@ -704,6 +784,11 @@ again:
|
||||
dev->nodename);
|
||||
goto abort;
|
||||
}
|
||||
err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u",
|
||||
bdev_physical_block_size(be->blkif->vbd.bdev));
|
||||
if (err)
|
||||
xenbus_dev_error(dev, err, "writing %s/physical-sector-size",
|
||||
dev->nodename);
|
||||
|
||||
err = xenbus_transaction_end(xbt, 0);
|
||||
if (err == -EAGAIN)
|
||||
|
||||
@@ -74,12 +74,30 @@ struct grant {
|
||||
struct blk_shadow {
|
||||
struct blkif_request req;
|
||||
struct request *request;
|
||||
struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
||||
struct grant **grants_used;
|
||||
struct grant **indirect_grants;
|
||||
struct scatterlist *sg;
|
||||
};
|
||||
|
||||
struct split_bio {
|
||||
struct bio *bio;
|
||||
atomic_t pending;
|
||||
int err;
|
||||
};
|
||||
|
||||
static DEFINE_MUTEX(blkfront_mutex);
|
||||
static const struct block_device_operations xlvbd_block_fops;
|
||||
|
||||
/*
|
||||
* Maximum number of segments in indirect requests, the actual value used by
|
||||
* the frontend driver is the minimum of this value and the value provided
|
||||
* by the backend driver.
|
||||
*/
|
||||
|
||||
static unsigned int xen_blkif_max_segments = 32;
|
||||
module_param_named(max, xen_blkif_max_segments, int, S_IRUGO);
|
||||
MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default is 32)");
|
||||
|
||||
#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
|
||||
|
||||
/*
|
||||
@@ -98,7 +116,6 @@ struct blkfront_info
|
||||
enum blkif_state connected;
|
||||
int ring_ref;
|
||||
struct blkif_front_ring ring;
|
||||
struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
||||
unsigned int evtchn, irq;
|
||||
struct request_queue *rq;
|
||||
struct work_struct work;
|
||||
@@ -114,6 +131,7 @@ struct blkfront_info
|
||||
unsigned int discard_granularity;
|
||||
unsigned int discard_alignment;
|
||||
unsigned int feature_persistent:1;
|
||||
unsigned int max_indirect_segments;
|
||||
int is_ready;
|
||||
};
|
||||
|
||||
@@ -142,6 +160,13 @@ static DEFINE_SPINLOCK(minor_lock);
|
||||
|
||||
#define DEV_NAME "xvd" /* name in /dev */
|
||||
|
||||
#define SEGS_PER_INDIRECT_FRAME \
|
||||
(PAGE_SIZE/sizeof(struct blkif_request_segment_aligned))
|
||||
#define INDIRECT_GREFS(_segs) \
|
||||
((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
|
||||
|
||||
static int blkfront_setup_indirect(struct blkfront_info *info);
|
||||
|
||||
static int get_id_from_freelist(struct blkfront_info *info)
|
||||
{
|
||||
unsigned long free = info->shadow_free;
|
||||
@@ -358,7 +383,8 @@ static int blkif_queue_request(struct request *req)
|
||||
struct blkif_request *ring_req;
|
||||
unsigned long id;
|
||||
unsigned int fsect, lsect;
|
||||
int i, ref;
|
||||
int i, ref, n;
|
||||
struct blkif_request_segment_aligned *segments = NULL;
|
||||
|
||||
/*
|
||||
* Used to store if we are able to queue the request by just using
|
||||
@@ -369,21 +395,27 @@ static int blkif_queue_request(struct request *req)
|
||||
grant_ref_t gref_head;
|
||||
struct grant *gnt_list_entry = NULL;
|
||||
struct scatterlist *sg;
|
||||
int nseg, max_grefs;
|
||||
|
||||
if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
|
||||
return 1;
|
||||
|
||||
/* Check if we have enought grants to allocate a requests */
|
||||
if (info->persistent_gnts_c < BLKIF_MAX_SEGMENTS_PER_REQUEST) {
|
||||
max_grefs = info->max_indirect_segments ?
|
||||
info->max_indirect_segments +
|
||||
INDIRECT_GREFS(info->max_indirect_segments) :
|
||||
BLKIF_MAX_SEGMENTS_PER_REQUEST;
|
||||
|
||||
/* Check if we have enough grants to allocate a requests */
|
||||
if (info->persistent_gnts_c < max_grefs) {
|
||||
new_persistent_gnts = 1;
|
||||
if (gnttab_alloc_grant_references(
|
||||
BLKIF_MAX_SEGMENTS_PER_REQUEST - info->persistent_gnts_c,
|
||||
max_grefs - info->persistent_gnts_c,
|
||||
&gref_head) < 0) {
|
||||
gnttab_request_free_callback(
|
||||
&info->callback,
|
||||
blkif_restart_queue_callback,
|
||||
info,
|
||||
BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
||||
max_grefs);
|
||||
return 1;
|
||||
}
|
||||
} else
|
||||
@@ -394,42 +426,67 @@ static int blkif_queue_request(struct request *req)
|
||||
id = get_id_from_freelist(info);
|
||||
info->shadow[id].request = req;
|
||||
|
||||
ring_req->u.rw.id = id;
|
||||
ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
|
||||
ring_req->u.rw.handle = info->handle;
|
||||
|
||||
ring_req->operation = rq_data_dir(req) ?
|
||||
BLKIF_OP_WRITE : BLKIF_OP_READ;
|
||||
|
||||
if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
|
||||
/*
|
||||
* Ideally we can do an unordered flush-to-disk. In case the
|
||||
* backend onlysupports barriers, use that. A barrier request
|
||||
* a superset of FUA, so we can implement it the same
|
||||
* way. (It's also a FLUSH+FUA, since it is
|
||||
* guaranteed ordered WRT previous writes.)
|
||||
*/
|
||||
ring_req->operation = info->flush_op;
|
||||
}
|
||||
|
||||
if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) {
|
||||
/* id, sector_number and handle are set above. */
|
||||
ring_req->operation = BLKIF_OP_DISCARD;
|
||||
ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
|
||||
ring_req->u.discard.id = id;
|
||||
ring_req->u.discard.sector_number = (blkif_sector_t)blk_rq_pos(req);
|
||||
if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard)
|
||||
ring_req->u.discard.flag = BLKIF_DISCARD_SECURE;
|
||||
else
|
||||
ring_req->u.discard.flag = 0;
|
||||
} else {
|
||||
ring_req->u.rw.nr_segments = blk_rq_map_sg(req->q, req,
|
||||
info->sg);
|
||||
BUG_ON(ring_req->u.rw.nr_segments >
|
||||
BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
||||
|
||||
for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
|
||||
BUG_ON(info->max_indirect_segments == 0 &&
|
||||
req->nr_phys_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
||||
BUG_ON(info->max_indirect_segments &&
|
||||
req->nr_phys_segments > info->max_indirect_segments);
|
||||
nseg = blk_rq_map_sg(req->q, req, info->shadow[id].sg);
|
||||
ring_req->u.rw.id = id;
|
||||
if (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) {
|
||||
/*
|
||||
* The indirect operation can only be a BLKIF_OP_READ or
|
||||
* BLKIF_OP_WRITE
|
||||
*/
|
||||
BUG_ON(req->cmd_flags & (REQ_FLUSH | REQ_FUA));
|
||||
ring_req->operation = BLKIF_OP_INDIRECT;
|
||||
ring_req->u.indirect.indirect_op = rq_data_dir(req) ?
|
||||
BLKIF_OP_WRITE : BLKIF_OP_READ;
|
||||
ring_req->u.indirect.sector_number = (blkif_sector_t)blk_rq_pos(req);
|
||||
ring_req->u.indirect.handle = info->handle;
|
||||
ring_req->u.indirect.nr_segments = nseg;
|
||||
} else {
|
||||
ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
|
||||
ring_req->u.rw.handle = info->handle;
|
||||
ring_req->operation = rq_data_dir(req) ?
|
||||
BLKIF_OP_WRITE : BLKIF_OP_READ;
|
||||
if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
|
||||
/*
|
||||
* Ideally we can do an unordered flush-to-disk. In case the
|
||||
* backend onlysupports barriers, use that. A barrier request
|
||||
* a superset of FUA, so we can implement it the same
|
||||
* way. (It's also a FLUSH+FUA, since it is
|
||||
* guaranteed ordered WRT previous writes.)
|
||||
*/
|
||||
ring_req->operation = info->flush_op;
|
||||
}
|
||||
ring_req->u.rw.nr_segments = nseg;
|
||||
}
|
||||
for_each_sg(info->shadow[id].sg, sg, nseg, i) {
|
||||
fsect = sg->offset >> 9;
|
||||
lsect = fsect + (sg->length >> 9) - 1;
|
||||
|
||||
if ((ring_req->operation == BLKIF_OP_INDIRECT) &&
|
||||
(i % SEGS_PER_INDIRECT_FRAME == 0)) {
|
||||
if (segments)
|
||||
kunmap_atomic(segments);
|
||||
|
||||
n = i / SEGS_PER_INDIRECT_FRAME;
|
||||
gnt_list_entry = get_grant(&gref_head, info);
|
||||
info->shadow[id].indirect_grants[n] = gnt_list_entry;
|
||||
segments = kmap_atomic(pfn_to_page(gnt_list_entry->pfn));
|
||||
ring_req->u.indirect.indirect_grefs[n] = gnt_list_entry->gref;
|
||||
}
|
||||
|
||||
gnt_list_entry = get_grant(&gref_head, info);
|
||||
ref = gnt_list_entry->gref;
|
||||
|
||||
@@ -441,8 +498,7 @@ static int blkif_queue_request(struct request *req)
|
||||
|
||||
BUG_ON(sg->offset + sg->length > PAGE_SIZE);
|
||||
|
||||
shared_data = kmap_atomic(
|
||||
pfn_to_page(gnt_list_entry->pfn));
|
||||
shared_data = kmap_atomic(pfn_to_page(gnt_list_entry->pfn));
|
||||
bvec_data = kmap_atomic(sg_page(sg));
|
||||
|
||||
/*
|
||||
@@ -461,13 +517,23 @@ static int blkif_queue_request(struct request *req)
|
||||
kunmap_atomic(bvec_data);
|
||||
kunmap_atomic(shared_data);
|
||||
}
|
||||
|
||||
ring_req->u.rw.seg[i] =
|
||||
(struct blkif_request_segment) {
|
||||
.gref = ref,
|
||||
.first_sect = fsect,
|
||||
.last_sect = lsect };
|
||||
if (ring_req->operation != BLKIF_OP_INDIRECT) {
|
||||
ring_req->u.rw.seg[i] =
|
||||
(struct blkif_request_segment) {
|
||||
.gref = ref,
|
||||
.first_sect = fsect,
|
||||
.last_sect = lsect };
|
||||
} else {
|
||||
n = i % SEGS_PER_INDIRECT_FRAME;
|
||||
segments[n] =
|
||||
(struct blkif_request_segment_aligned) {
|
||||
.gref = ref,
|
||||
.first_sect = fsect,
|
||||
.last_sect = lsect };
|
||||
}
|
||||
}
|
||||
if (segments)
|
||||
kunmap_atomic(segments);
|
||||
}
|
||||
|
||||
info->ring.req_prod_pvt++;
|
||||
@@ -542,7 +608,9 @@ wait:
|
||||
flush_requests(info);
|
||||
}
|
||||
|
||||
static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
|
||||
static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
|
||||
unsigned int physical_sector_size,
|
||||
unsigned int segments)
|
||||
{
|
||||
struct request_queue *rq;
|
||||
struct blkfront_info *info = gd->private_data;
|
||||
@@ -564,14 +632,15 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
|
||||
|
||||
/* Hard sector size and max sectors impersonate the equiv. hardware. */
|
||||
blk_queue_logical_block_size(rq, sector_size);
|
||||
blk_queue_max_hw_sectors(rq, 512);
|
||||
blk_queue_physical_block_size(rq, physical_sector_size);
|
||||
blk_queue_max_hw_sectors(rq, (segments * PAGE_SIZE) / 512);
|
||||
|
||||
/* Each segment in a request is up to an aligned page in size. */
|
||||
blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
|
||||
blk_queue_max_segment_size(rq, PAGE_SIZE);
|
||||
|
||||
/* Ensure a merged request will fit in a single I/O ring slot. */
|
||||
blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
||||
blk_queue_max_segments(rq, segments);
|
||||
|
||||
/* Make sure buffer addresses are sector-aligned. */
|
||||
blk_queue_dma_alignment(rq, 511);
|
||||
@@ -588,13 +657,16 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
|
||||
static void xlvbd_flush(struct blkfront_info *info)
|
||||
{
|
||||
blk_queue_flush(info->rq, info->feature_flush);
|
||||
printk(KERN_INFO "blkfront: %s: %s: %s %s\n",
|
||||
printk(KERN_INFO "blkfront: %s: %s: %s %s %s %s %s\n",
|
||||
info->gd->disk_name,
|
||||
info->flush_op == BLKIF_OP_WRITE_BARRIER ?
|
||||
"barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
|
||||
"flush diskcache" : "barrier or flush"),
|
||||
info->feature_flush ? "enabled" : "disabled",
|
||||
info->feature_persistent ? "using persistent grants" : "");
|
||||
info->feature_flush ? "enabled;" : "disabled;",
|
||||
"persistent grants:",
|
||||
info->feature_persistent ? "enabled;" : "disabled;",
|
||||
"indirect descriptors:",
|
||||
info->max_indirect_segments ? "enabled;" : "disabled;");
|
||||
}
|
||||
|
||||
static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
|
||||
@@ -667,7 +739,8 @@ static char *encode_disk_name(char *ptr, unsigned int n)
|
||||
|
||||
static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
|
||||
struct blkfront_info *info,
|
||||
u16 vdisk_info, u16 sector_size)
|
||||
u16 vdisk_info, u16 sector_size,
|
||||
unsigned int physical_sector_size)
|
||||
{
|
||||
struct gendisk *gd;
|
||||
int nr_minors = 1;
|
||||
@@ -734,7 +807,9 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
|
||||
gd->driverfs_dev = &(info->xbdev->dev);
|
||||
set_capacity(gd, capacity);
|
||||
|
||||
if (xlvbd_init_blk_queue(gd, sector_size)) {
|
||||
if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size,
|
||||
info->max_indirect_segments ? :
|
||||
BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
|
||||
del_gendisk(gd);
|
||||
goto release;
|
||||
}
|
||||
@@ -818,6 +893,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
|
||||
{
|
||||
struct grant *persistent_gnt;
|
||||
struct grant *n;
|
||||
int i, j, segs;
|
||||
|
||||
/* Prevent new requests being issued until we fix things up. */
|
||||
spin_lock_irq(&info->io_lock);
|
||||
@@ -843,6 +919,47 @@ static void blkif_free(struct blkfront_info *info, int suspend)
|
||||
}
|
||||
BUG_ON(info->persistent_gnts_c != 0);
|
||||
|
||||
for (i = 0; i < BLK_RING_SIZE; i++) {
|
||||
/*
|
||||
* Clear persistent grants present in requests already
|
||||
* on the shared ring
|
||||
*/
|
||||
if (!info->shadow[i].request)
|
||||
goto free_shadow;
|
||||
|
||||
segs = info->shadow[i].req.operation == BLKIF_OP_INDIRECT ?
|
||||
info->shadow[i].req.u.indirect.nr_segments :
|
||||
info->shadow[i].req.u.rw.nr_segments;
|
||||
for (j = 0; j < segs; j++) {
|
||||
persistent_gnt = info->shadow[i].grants_used[j];
|
||||
gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
|
||||
__free_page(pfn_to_page(persistent_gnt->pfn));
|
||||
kfree(persistent_gnt);
|
||||
}
|
||||
|
||||
if (info->shadow[i].req.operation != BLKIF_OP_INDIRECT)
|
||||
/*
|
||||
* If this is not an indirect operation don't try to
|
||||
* free indirect segments
|
||||
*/
|
||||
goto free_shadow;
|
||||
|
||||
for (j = 0; j < INDIRECT_GREFS(segs); j++) {
|
||||
persistent_gnt = info->shadow[i].indirect_grants[j];
|
||||
gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
|
||||
__free_page(pfn_to_page(persistent_gnt->pfn));
|
||||
kfree(persistent_gnt);
|
||||
}
|
||||
|
||||
free_shadow:
|
||||
kfree(info->shadow[i].grants_used);
|
||||
info->shadow[i].grants_used = NULL;
|
||||
kfree(info->shadow[i].indirect_grants);
|
||||
info->shadow[i].indirect_grants = NULL;
|
||||
kfree(info->shadow[i].sg);
|
||||
info->shadow[i].sg = NULL;
|
||||
}
|
||||
|
||||
/* No more gnttab callback work. */
|
||||
gnttab_cancel_free_callback(&info->callback);
|
||||
spin_unlock_irq(&info->io_lock);
|
||||
@@ -867,12 +984,13 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
|
||||
struct blkif_response *bret)
|
||||
{
|
||||
int i = 0;
|
||||
struct bio_vec *bvec;
|
||||
struct req_iterator iter;
|
||||
unsigned long flags;
|
||||
struct scatterlist *sg;
|
||||
char *bvec_data;
|
||||
void *shared_data;
|
||||
unsigned int offset = 0;
|
||||
int nseg;
|
||||
|
||||
nseg = s->req.operation == BLKIF_OP_INDIRECT ?
|
||||
s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments;
|
||||
|
||||
if (bret->operation == BLKIF_OP_READ) {
|
||||
/*
|
||||
@@ -881,26 +999,29 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
|
||||
* than PAGE_SIZE, we have to keep track of the current offset,
|
||||
* to be sure we are copying the data from the right shared page.
|
||||
*/
|
||||
rq_for_each_segment(bvec, s->request, iter) {
|
||||
BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE);
|
||||
if (bvec->bv_offset < offset)
|
||||
i++;
|
||||
BUG_ON(i >= s->req.u.rw.nr_segments);
|
||||
for_each_sg(s->sg, sg, nseg, i) {
|
||||
BUG_ON(sg->offset + sg->length > PAGE_SIZE);
|
||||
shared_data = kmap_atomic(
|
||||
pfn_to_page(s->grants_used[i]->pfn));
|
||||
bvec_data = bvec_kmap_irq(bvec, &flags);
|
||||
memcpy(bvec_data, shared_data + bvec->bv_offset,
|
||||
bvec->bv_len);
|
||||
bvec_kunmap_irq(bvec_data, &flags);
|
||||
bvec_data = kmap_atomic(sg_page(sg));
|
||||
memcpy(bvec_data + sg->offset,
|
||||
shared_data + sg->offset,
|
||||
sg->length);
|
||||
kunmap_atomic(bvec_data);
|
||||
kunmap_atomic(shared_data);
|
||||
offset = bvec->bv_offset + bvec->bv_len;
|
||||
}
|
||||
}
|
||||
/* Add the persistent grant into the list of free grants */
|
||||
for (i = 0; i < s->req.u.rw.nr_segments; i++) {
|
||||
for (i = 0; i < nseg; i++) {
|
||||
list_add(&s->grants_used[i]->node, &info->persistent_gnts);
|
||||
info->persistent_gnts_c++;
|
||||
}
|
||||
if (s->req.operation == BLKIF_OP_INDIRECT) {
|
||||
for (i = 0; i < INDIRECT_GREFS(nseg); i++) {
|
||||
list_add(&s->indirect_grants[i]->node, &info->persistent_gnts);
|
||||
info->persistent_gnts_c++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static irqreturn_t blkif_interrupt(int irq, void *dev_id)
|
||||
@@ -1034,14 +1155,6 @@ static int setup_blkring(struct xenbus_device *dev,
|
||||
SHARED_RING_INIT(sring);
|
||||
FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
|
||||
|
||||
sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
||||
|
||||
/* Allocate memory for grants */
|
||||
err = fill_grant_buffer(info, BLK_RING_SIZE *
|
||||
BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
||||
if (err)
|
||||
goto fail;
|
||||
|
||||
err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
|
||||
if (err < 0) {
|
||||
free_page((unsigned long)sring);
|
||||
@@ -1223,13 +1336,84 @@ static int blkfront_probe(struct xenbus_device *dev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is a clone of md_trim_bio, used to split a bio into smaller ones
|
||||
*/
|
||||
static void trim_bio(struct bio *bio, int offset, int size)
|
||||
{
|
||||
/* 'bio' is a cloned bio which we need to trim to match
|
||||
* the given offset and size.
|
||||
* This requires adjusting bi_sector, bi_size, and bi_io_vec
|
||||
*/
|
||||
int i;
|
||||
struct bio_vec *bvec;
|
||||
int sofar = 0;
|
||||
|
||||
size <<= 9;
|
||||
if (offset == 0 && size == bio->bi_size)
|
||||
return;
|
||||
|
||||
bio->bi_sector += offset;
|
||||
bio->bi_size = size;
|
||||
offset <<= 9;
|
||||
clear_bit(BIO_SEG_VALID, &bio->bi_flags);
|
||||
|
||||
while (bio->bi_idx < bio->bi_vcnt &&
|
||||
bio->bi_io_vec[bio->bi_idx].bv_len <= offset) {
|
||||
/* remove this whole bio_vec */
|
||||
offset -= bio->bi_io_vec[bio->bi_idx].bv_len;
|
||||
bio->bi_idx++;
|
||||
}
|
||||
if (bio->bi_idx < bio->bi_vcnt) {
|
||||
bio->bi_io_vec[bio->bi_idx].bv_offset += offset;
|
||||
bio->bi_io_vec[bio->bi_idx].bv_len -= offset;
|
||||
}
|
||||
/* avoid any complications with bi_idx being non-zero*/
|
||||
if (bio->bi_idx) {
|
||||
memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
|
||||
(bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
|
||||
bio->bi_vcnt -= bio->bi_idx;
|
||||
bio->bi_idx = 0;
|
||||
}
|
||||
/* Make sure vcnt and last bv are not too big */
|
||||
bio_for_each_segment(bvec, bio, i) {
|
||||
if (sofar + bvec->bv_len > size)
|
||||
bvec->bv_len = size - sofar;
|
||||
if (bvec->bv_len == 0) {
|
||||
bio->bi_vcnt = i;
|
||||
break;
|
||||
}
|
||||
sofar += bvec->bv_len;
|
||||
}
|
||||
}
|
||||
|
||||
static void split_bio_end(struct bio *bio, int error)
|
||||
{
|
||||
struct split_bio *split_bio = bio->bi_private;
|
||||
|
||||
if (error)
|
||||
split_bio->err = error;
|
||||
|
||||
if (atomic_dec_and_test(&split_bio->pending)) {
|
||||
split_bio->bio->bi_phys_segments = 0;
|
||||
bio_endio(split_bio->bio, split_bio->err);
|
||||
kfree(split_bio);
|
||||
}
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
static int blkif_recover(struct blkfront_info *info)
|
||||
{
|
||||
int i;
|
||||
struct blkif_request *req;
|
||||
struct request *req, *n;
|
||||
struct blk_shadow *copy;
|
||||
int j;
|
||||
int rc;
|
||||
struct bio *bio, *cloned_bio;
|
||||
struct bio_list bio_list, merge_bio;
|
||||
unsigned int segs, offset;
|
||||
int pending, size;
|
||||
struct split_bio *split_bio;
|
||||
struct list_head requests;
|
||||
|
||||
/* Stage 1: Make a safe copy of the shadow state. */
|
||||
copy = kmemdup(info->shadow, sizeof(info->shadow),
|
||||
@@ -1244,36 +1428,64 @@ static int blkif_recover(struct blkfront_info *info)
|
||||
info->shadow_free = info->ring.req_prod_pvt;
|
||||
info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
|
||||
|
||||
/* Stage 3: Find pending requests and requeue them. */
|
||||
rc = blkfront_setup_indirect(info);
|
||||
if (rc) {
|
||||
kfree(copy);
|
||||
return rc;
|
||||
}
|
||||
|
||||
segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
|
||||
blk_queue_max_segments(info->rq, segs);
|
||||
bio_list_init(&bio_list);
|
||||
INIT_LIST_HEAD(&requests);
|
||||
for (i = 0; i < BLK_RING_SIZE; i++) {
|
||||
/* Not in use? */
|
||||
if (!copy[i].request)
|
||||
continue;
|
||||
|
||||
/* Grab a request slot and copy shadow state into it. */
|
||||
req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
|
||||
*req = copy[i].req;
|
||||
|
||||
/* We get a new request id, and must reset the shadow state. */
|
||||
req->u.rw.id = get_id_from_freelist(info);
|
||||
memcpy(&info->shadow[req->u.rw.id], ©[i], sizeof(copy[i]));
|
||||
|
||||
if (req->operation != BLKIF_OP_DISCARD) {
|
||||
/* Rewrite any grant references invalidated by susp/resume. */
|
||||
for (j = 0; j < req->u.rw.nr_segments; j++)
|
||||
gnttab_grant_foreign_access_ref(
|
||||
req->u.rw.seg[j].gref,
|
||||
info->xbdev->otherend_id,
|
||||
pfn_to_mfn(copy[i].grants_used[j]->pfn),
|
||||
0);
|
||||
/*
|
||||
* Get the bios in the request so we can re-queue them.
|
||||
*/
|
||||
if (copy[i].request->cmd_flags &
|
||||
(REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
|
||||
/*
|
||||
* Flush operations don't contain bios, so
|
||||
* we need to requeue the whole request
|
||||
*/
|
||||
list_add(©[i].request->queuelist, &requests);
|
||||
continue;
|
||||
}
|
||||
info->shadow[req->u.rw.id].req = *req;
|
||||
|
||||
info->ring.req_prod_pvt++;
|
||||
merge_bio.head = copy[i].request->bio;
|
||||
merge_bio.tail = copy[i].request->biotail;
|
||||
bio_list_merge(&bio_list, &merge_bio);
|
||||
copy[i].request->bio = NULL;
|
||||
blk_put_request(copy[i].request);
|
||||
}
|
||||
|
||||
kfree(copy);
|
||||
|
||||
/*
|
||||
* Empty the queue, this is important because we might have
|
||||
* requests in the queue with more segments than what we
|
||||
* can handle now.
|
||||
*/
|
||||
spin_lock_irq(&info->io_lock);
|
||||
while ((req = blk_fetch_request(info->rq)) != NULL) {
|
||||
if (req->cmd_flags &
|
||||
(REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
|
||||
list_add(&req->queuelist, &requests);
|
||||
continue;
|
||||
}
|
||||
merge_bio.head = req->bio;
|
||||
merge_bio.tail = req->biotail;
|
||||
bio_list_merge(&bio_list, &merge_bio);
|
||||
req->bio = NULL;
|
||||
if (req->cmd_flags & (REQ_FLUSH | REQ_FUA))
|
||||
pr_alert("diskcache flush request found!\n");
|
||||
__blk_put_request(info->rq, req);
|
||||
}
|
||||
spin_unlock_irq(&info->io_lock);
|
||||
|
||||
xenbus_switch_state(info->xbdev, XenbusStateConnected);
|
||||
|
||||
spin_lock_irq(&info->io_lock);
|
||||
@@ -1281,14 +1493,50 @@ static int blkif_recover(struct blkfront_info *info)
|
||||
/* Now safe for us to use the shared ring */
|
||||
info->connected = BLKIF_STATE_CONNECTED;
|
||||
|
||||
/* Send off requeued requests */
|
||||
flush_requests(info);
|
||||
|
||||
/* Kick any other new requests queued since we resumed */
|
||||
kick_pending_request_queues(info);
|
||||
|
||||
list_for_each_entry_safe(req, n, &requests, queuelist) {
|
||||
/* Requeue pending requests (flush or discard) */
|
||||
list_del_init(&req->queuelist);
|
||||
BUG_ON(req->nr_phys_segments > segs);
|
||||
blk_requeue_request(info->rq, req);
|
||||
}
|
||||
spin_unlock_irq(&info->io_lock);
|
||||
|
||||
while ((bio = bio_list_pop(&bio_list)) != NULL) {
|
||||
/* Traverse the list of pending bios and re-queue them */
|
||||
if (bio_segments(bio) > segs) {
|
||||
/*
|
||||
* This bio has more segments than what we can
|
||||
* handle, we have to split it.
|
||||
*/
|
||||
pending = (bio_segments(bio) + segs - 1) / segs;
|
||||
split_bio = kzalloc(sizeof(*split_bio), GFP_NOIO);
|
||||
BUG_ON(split_bio == NULL);
|
||||
atomic_set(&split_bio->pending, pending);
|
||||
split_bio->bio = bio;
|
||||
for (i = 0; i < pending; i++) {
|
||||
offset = (i * segs * PAGE_SIZE) >> 9;
|
||||
size = min((unsigned int)(segs * PAGE_SIZE) >> 9,
|
||||
(unsigned int)(bio->bi_size >> 9) - offset);
|
||||
cloned_bio = bio_clone(bio, GFP_NOIO);
|
||||
BUG_ON(cloned_bio == NULL);
|
||||
trim_bio(cloned_bio, offset, size);
|
||||
cloned_bio->bi_private = split_bio;
|
||||
cloned_bio->bi_end_io = split_bio_end;
|
||||
submit_bio(cloned_bio->bi_rw, cloned_bio);
|
||||
}
|
||||
/*
|
||||
* Now we have to wait for all those smaller bios to
|
||||
* end, so we can also end the "parent" bio.
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
/* We don't need to split this bio */
|
||||
submit_bio(bio->bi_rw, bio);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1308,8 +1556,12 @@ static int blkfront_resume(struct xenbus_device *dev)
|
||||
blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
|
||||
|
||||
err = talk_to_blkback(dev, info);
|
||||
if (info->connected == BLKIF_STATE_SUSPENDED && !err)
|
||||
err = blkif_recover(info);
|
||||
|
||||
/*
|
||||
* We have to wait for the backend to switch to
|
||||
* connected state, since we want to read which
|
||||
* features it supports.
|
||||
*/
|
||||
|
||||
return err;
|
||||
}
|
||||
@@ -1387,6 +1639,60 @@ static void blkfront_setup_discard(struct blkfront_info *info)
|
||||
kfree(type);
|
||||
}
|
||||
|
||||
static int blkfront_setup_indirect(struct blkfront_info *info)
|
||||
{
|
||||
unsigned int indirect_segments, segs;
|
||||
int err, i;
|
||||
|
||||
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
|
||||
"feature-max-indirect-segments", "%u", &indirect_segments,
|
||||
NULL);
|
||||
if (err) {
|
||||
info->max_indirect_segments = 0;
|
||||
segs = BLKIF_MAX_SEGMENTS_PER_REQUEST;
|
||||
} else {
|
||||
info->max_indirect_segments = min(indirect_segments,
|
||||
xen_blkif_max_segments);
|
||||
segs = info->max_indirect_segments;
|
||||
}
|
||||
|
||||
err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE);
|
||||
if (err)
|
||||
goto out_of_memory;
|
||||
|
||||
for (i = 0; i < BLK_RING_SIZE; i++) {
|
||||
info->shadow[i].grants_used = kzalloc(
|
||||
sizeof(info->shadow[i].grants_used[0]) * segs,
|
||||
GFP_NOIO);
|
||||
info->shadow[i].sg = kzalloc(sizeof(info->shadow[i].sg[0]) * segs, GFP_NOIO);
|
||||
if (info->max_indirect_segments)
|
||||
info->shadow[i].indirect_grants = kzalloc(
|
||||
sizeof(info->shadow[i].indirect_grants[0]) *
|
||||
INDIRECT_GREFS(segs),
|
||||
GFP_NOIO);
|
||||
if ((info->shadow[i].grants_used == NULL) ||
|
||||
(info->shadow[i].sg == NULL) ||
|
||||
(info->max_indirect_segments &&
|
||||
(info->shadow[i].indirect_grants == NULL)))
|
||||
goto out_of_memory;
|
||||
sg_init_table(info->shadow[i].sg, segs);
|
||||
}
|
||||
|
||||
|
||||
return 0;
|
||||
|
||||
out_of_memory:
|
||||
for (i = 0; i < BLK_RING_SIZE; i++) {
|
||||
kfree(info->shadow[i].grants_used);
|
||||
info->shadow[i].grants_used = NULL;
|
||||
kfree(info->shadow[i].sg);
|
||||
info->shadow[i].sg = NULL;
|
||||
kfree(info->shadow[i].indirect_grants);
|
||||
info->shadow[i].indirect_grants = NULL;
|
||||
}
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/*
|
||||
* Invoked when the backend is finally 'ready' (and has told produced
|
||||
* the details about the physical device - #sectors, size, etc).
|
||||
@@ -1395,6 +1701,7 @@ static void blkfront_connect(struct blkfront_info *info)
|
||||
{
|
||||
unsigned long long sectors;
|
||||
unsigned long sector_size;
|
||||
unsigned int physical_sector_size;
|
||||
unsigned int binfo;
|
||||
int err;
|
||||
int barrier, flush, discard, persistent;
|
||||
@@ -1414,8 +1721,15 @@ static void blkfront_connect(struct blkfront_info *info)
|
||||
set_capacity(info->gd, sectors);
|
||||
revalidate_disk(info->gd);
|
||||
|
||||
/* fall through */
|
||||
return;
|
||||
case BLKIF_STATE_SUSPENDED:
|
||||
/*
|
||||
* If we are recovering from suspension, we need to wait
|
||||
* for the backend to announce it's features before
|
||||
* reconnecting, at least we need to know if the backend
|
||||
* supports indirect descriptors, and how many.
|
||||
*/
|
||||
blkif_recover(info);
|
||||
return;
|
||||
|
||||
default:
|
||||
@@ -1437,6 +1751,16 @@ static void blkfront_connect(struct blkfront_info *info)
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* physcial-sector-size is a newer field, so old backends may not
|
||||
* provide this. Assume physical sector size to be the same as
|
||||
* sector_size in that case.
|
||||
*/
|
||||
err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
|
||||
"physical-sector-size", "%u", &physical_sector_size);
|
||||
if (err != 1)
|
||||
physical_sector_size = sector_size;
|
||||
|
||||
info->feature_flush = 0;
|
||||
info->flush_op = 0;
|
||||
|
||||
@@ -1483,7 +1807,15 @@ static void blkfront_connect(struct blkfront_info *info)
|
||||
else
|
||||
info->feature_persistent = persistent;
|
||||
|
||||
err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
|
||||
err = blkfront_setup_indirect(info);
|
||||
if (err) {
|
||||
xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s",
|
||||
info->xbdev->otherend);
|
||||
return;
|
||||
}
|
||||
|
||||
err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size,
|
||||
physical_sector_size);
|
||||
if (err) {
|
||||
xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
|
||||
info->xbdev->otherend);
|
||||
|
||||
@@ -407,7 +407,7 @@ static void ace_dump_regs(struct ace_device *ace)
|
||||
ace_in32(ace, ACE_CFGLBA), ace_in(ace, ACE_FATSTAT));
|
||||
}
|
||||
|
||||
void ace_fix_driveid(u16 *id)
|
||||
static void ace_fix_driveid(u16 *id)
|
||||
{
|
||||
#if defined(__BIG_ENDIAN)
|
||||
int i;
|
||||
@@ -463,7 +463,7 @@ static inline void ace_fsm_yieldirq(struct ace_device *ace)
|
||||
}
|
||||
|
||||
/* Get the next read/write request; ending requests that we don't handle */
|
||||
struct request *ace_get_next_request(struct request_queue * q)
|
||||
static struct request *ace_get_next_request(struct request_queue *q)
|
||||
{
|
||||
struct request *req;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user