6e84200c0a
This patch adds virtio-pmem driver for KVM guest. Guest reads the persistent memory range information from Qemu over VIRTIO and registers it on nvdimm_bus. It also creates a nd_region object with the persistent memory range information so that existing 'nvdimm/pmem' driver can reserve this into system memory map. This way 'virtio-pmem' driver uses existing functionality of pmem driver to register persistent memory compatible for DAX capable filesystems. This also provides function to perform guest flush over VIRTIO from 'pmem' driver when userspace performs flush on DAX memory range. Signed-off-by: Pankaj Gupta <pagupta@redhat.com> Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Jakub Staron <jstaron@google.com> Tested-by: Jakub Staron <jstaron@google.com> Reviewed-by: Cornelia Huck <cohuck@redhat.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
126 lines
3.6 KiB
C
126 lines
3.6 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* virtio_pmem.c: Virtio pmem Driver
|
|
*
|
|
* Discovers persistent memory range information
|
|
* from host and provides a virtio based flushing
|
|
* interface.
|
|
*/
|
|
#include "virtio_pmem.h"
|
|
#include "nd.h"
|
|
|
|
/* The interrupt handler */
|
|
void virtio_pmem_host_ack(struct virtqueue *vq)
|
|
{
|
|
struct virtio_pmem *vpmem = vq->vdev->priv;
|
|
struct virtio_pmem_request *req_data, *req_buf;
|
|
unsigned long flags;
|
|
unsigned int len;
|
|
|
|
spin_lock_irqsave(&vpmem->pmem_lock, flags);
|
|
while ((req_data = virtqueue_get_buf(vq, &len)) != NULL) {
|
|
req_data->done = true;
|
|
wake_up(&req_data->host_acked);
|
|
|
|
if (!list_empty(&vpmem->req_list)) {
|
|
req_buf = list_first_entry(&vpmem->req_list,
|
|
struct virtio_pmem_request, list);
|
|
req_buf->wq_buf_avail = true;
|
|
wake_up(&req_buf->wq_buf);
|
|
list_del(&req_buf->list);
|
|
}
|
|
}
|
|
spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
|
|
}
|
|
EXPORT_SYMBOL_GPL(virtio_pmem_host_ack);
|
|
|
|
/* The request submission function */
|
|
static int virtio_pmem_flush(struct nd_region *nd_region)
|
|
{
|
|
struct virtio_device *vdev = nd_region->provider_data;
|
|
struct virtio_pmem *vpmem = vdev->priv;
|
|
struct virtio_pmem_request *req_data;
|
|
struct scatterlist *sgs[2], sg, ret;
|
|
unsigned long flags;
|
|
int err, err1;
|
|
|
|
might_sleep();
|
|
req_data = kmalloc(sizeof(*req_data), GFP_KERNEL);
|
|
if (!req_data)
|
|
return -ENOMEM;
|
|
|
|
req_data->done = false;
|
|
init_waitqueue_head(&req_data->host_acked);
|
|
init_waitqueue_head(&req_data->wq_buf);
|
|
INIT_LIST_HEAD(&req_data->list);
|
|
req_data->req.type = cpu_to_virtio32(vdev, VIRTIO_PMEM_REQ_TYPE_FLUSH);
|
|
sg_init_one(&sg, &req_data->req, sizeof(req_data->req));
|
|
sgs[0] = &sg;
|
|
sg_init_one(&ret, &req_data->resp.ret, sizeof(req_data->resp));
|
|
sgs[1] = &ret;
|
|
|
|
spin_lock_irqsave(&vpmem->pmem_lock, flags);
|
|
/*
|
|
* If virtqueue_add_sgs returns -ENOSPC then req_vq virtual
|
|
* queue does not have free descriptor. We add the request
|
|
* to req_list and wait for host_ack to wake us up when free
|
|
* slots are available.
|
|
*/
|
|
while ((err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req_data,
|
|
GFP_ATOMIC)) == -ENOSPC) {
|
|
|
|
dev_info(&vdev->dev, "failed to send command to virtio pmem device, no free slots in the virtqueue\n");
|
|
req_data->wq_buf_avail = false;
|
|
list_add_tail(&req_data->list, &vpmem->req_list);
|
|
spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
|
|
|
|
/* A host response results in "host_ack" getting called */
|
|
wait_event(req_data->wq_buf, req_data->wq_buf_avail);
|
|
spin_lock_irqsave(&vpmem->pmem_lock, flags);
|
|
}
|
|
err1 = virtqueue_kick(vpmem->req_vq);
|
|
spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
|
|
/*
|
|
* virtqueue_add_sgs failed with error different than -ENOSPC, we can't
|
|
* do anything about that.
|
|
*/
|
|
if (err || !err1) {
|
|
dev_info(&vdev->dev, "failed to send command to virtio pmem device\n");
|
|
err = -EIO;
|
|
} else {
|
|
/* A host repsonse results in "host_ack" getting called */
|
|
wait_event(req_data->host_acked, req_data->done);
|
|
err = virtio32_to_cpu(vdev, req_data->resp.ret);
|
|
}
|
|
|
|
kfree(req_data);
|
|
return err;
|
|
};
|
|
|
|
/* The asynchronous flush callback function */
|
|
int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
|
|
{
|
|
/*
|
|
* Create child bio for asynchronous flush and chain with
|
|
* parent bio. Otherwise directly call nd_region flush.
|
|
*/
|
|
if (bio && bio->bi_iter.bi_sector != -1) {
|
|
struct bio *child = bio_alloc(GFP_ATOMIC, 0);
|
|
|
|
if (!child)
|
|
return -ENOMEM;
|
|
bio_copy_dev(child, bio);
|
|
child->bi_opf = REQ_PREFLUSH;
|
|
child->bi_iter.bi_sector = -1;
|
|
bio_chain(child, bio);
|
|
submit_bio(child);
|
|
return 0;
|
|
}
|
|
if (virtio_pmem_flush(nd_region))
|
|
return -EIO;
|
|
|
|
return 0;
|
|
};
|
|
EXPORT_SYMBOL_GPL(async_pmem_flush);
|
|
MODULE_LICENSE("GPL");
|