mirror of
https://github.com/torvalds/linux.git
synced 2024-11-23 12:42:02 +00:00
vduse: Support registering userspace memory for IOVA regions
Introduce two ioctls: VDUSE_IOTLB_REG_UMEM and VDUSE_IOTLB_DEREG_UMEM to support registering and de-registering userspace memory for IOVA regions. Now it only supports registering userspace memory for bounce buffer region in virtio-vdpa case. Signed-off-by: Xie Yongji <xieyongji@bytedance.com> Acked-by: Jason Wang <jasowang@redhat.com> Message-Id: <20220803045523.23851-5-xieyongji@bytedance.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
This commit is contained in:
parent
6c77ed2288
commit
79a463be9e
@ -21,6 +21,8 @@
|
||||
#include <linux/uio.h>
|
||||
#include <linux/vdpa.h>
|
||||
#include <linux/nospec.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <uapi/linux/vduse.h>
|
||||
#include <uapi/linux/vdpa.h>
|
||||
#include <uapi/linux/virtio_config.h>
|
||||
@ -64,6 +66,13 @@ struct vduse_vdpa {
|
||||
struct vduse_dev *dev;
|
||||
};
|
||||
|
||||
struct vduse_umem {
|
||||
unsigned long iova;
|
||||
unsigned long npages;
|
||||
struct page **pages;
|
||||
struct mm_struct *mm;
|
||||
};
|
||||
|
||||
struct vduse_dev {
|
||||
struct vduse_vdpa *vdev;
|
||||
struct device *dev;
|
||||
@ -95,6 +104,8 @@ struct vduse_dev {
|
||||
u8 status;
|
||||
u32 vq_num;
|
||||
u32 vq_align;
|
||||
struct vduse_umem *umem;
|
||||
struct mutex mem_lock;
|
||||
};
|
||||
|
||||
struct vduse_dev_msg {
|
||||
@ -917,6 +928,102 @@ unlock:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vduse_dev_dereg_umem(struct vduse_dev *dev,
|
||||
u64 iova, u64 size)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&dev->mem_lock);
|
||||
ret = -ENOENT;
|
||||
if (!dev->umem)
|
||||
goto unlock;
|
||||
|
||||
ret = -EINVAL;
|
||||
if (dev->umem->iova != iova || size != dev->domain->bounce_size)
|
||||
goto unlock;
|
||||
|
||||
vduse_domain_remove_user_bounce_pages(dev->domain);
|
||||
unpin_user_pages_dirty_lock(dev->umem->pages,
|
||||
dev->umem->npages, true);
|
||||
atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm);
|
||||
mmdrop(dev->umem->mm);
|
||||
vfree(dev->umem->pages);
|
||||
kfree(dev->umem);
|
||||
dev->umem = NULL;
|
||||
ret = 0;
|
||||
unlock:
|
||||
mutex_unlock(&dev->mem_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vduse_dev_reg_umem(struct vduse_dev *dev,
|
||||
u64 iova, u64 uaddr, u64 size)
|
||||
{
|
||||
struct page **page_list = NULL;
|
||||
struct vduse_umem *umem = NULL;
|
||||
long pinned = 0;
|
||||
unsigned long npages, lock_limit;
|
||||
int ret;
|
||||
|
||||
if (!dev->domain->bounce_map ||
|
||||
size != dev->domain->bounce_size ||
|
||||
iova != 0 || uaddr & ~PAGE_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&dev->mem_lock);
|
||||
ret = -EEXIST;
|
||||
if (dev->umem)
|
||||
goto unlock;
|
||||
|
||||
ret = -ENOMEM;
|
||||
npages = size >> PAGE_SHIFT;
|
||||
page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
umem = kzalloc(sizeof(*umem), GFP_KERNEL);
|
||||
if (!page_list || !umem)
|
||||
goto unlock;
|
||||
|
||||
mmap_read_lock(current->mm);
|
||||
|
||||
lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
|
||||
if (npages + atomic64_read(¤t->mm->pinned_vm) > lock_limit)
|
||||
goto out;
|
||||
|
||||
pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
|
||||
page_list, NULL);
|
||||
if (pinned != npages) {
|
||||
ret = pinned < 0 ? pinned : -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = vduse_domain_add_user_bounce_pages(dev->domain,
|
||||
page_list, pinned);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
atomic64_add(npages, ¤t->mm->pinned_vm);
|
||||
|
||||
umem->pages = page_list;
|
||||
umem->npages = pinned;
|
||||
umem->iova = iova;
|
||||
umem->mm = current->mm;
|
||||
mmgrab(current->mm);
|
||||
|
||||
dev->umem = umem;
|
||||
out:
|
||||
if (ret && pinned > 0)
|
||||
unpin_user_pages(page_list, pinned);
|
||||
|
||||
mmap_read_unlock(current->mm);
|
||||
unlock:
|
||||
if (ret) {
|
||||
vfree(page_list);
|
||||
kfree(umem);
|
||||
}
|
||||
mutex_unlock(&dev->mem_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
@ -1089,6 +1196,38 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
|
||||
ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject);
|
||||
break;
|
||||
}
|
||||
case VDUSE_IOTLB_REG_UMEM: {
|
||||
struct vduse_iova_umem umem;
|
||||
|
||||
ret = -EFAULT;
|
||||
if (copy_from_user(&umem, argp, sizeof(umem)))
|
||||
break;
|
||||
|
||||
ret = -EINVAL;
|
||||
if (!is_mem_zero((const char *)umem.reserved,
|
||||
sizeof(umem.reserved)))
|
||||
break;
|
||||
|
||||
ret = vduse_dev_reg_umem(dev, umem.iova,
|
||||
umem.uaddr, umem.size);
|
||||
break;
|
||||
}
|
||||
case VDUSE_IOTLB_DEREG_UMEM: {
|
||||
struct vduse_iova_umem umem;
|
||||
|
||||
ret = -EFAULT;
|
||||
if (copy_from_user(&umem, argp, sizeof(umem)))
|
||||
break;
|
||||
|
||||
ret = -EINVAL;
|
||||
if (!is_mem_zero((const char *)umem.reserved,
|
||||
sizeof(umem.reserved)))
|
||||
break;
|
||||
|
||||
ret = vduse_dev_dereg_umem(dev, umem.iova,
|
||||
umem.size);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ret = -ENOIOCTLCMD;
|
||||
break;
|
||||
@ -1101,6 +1240,7 @@ static int vduse_dev_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct vduse_dev *dev = file->private_data;
|
||||
|
||||
vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
|
||||
spin_lock(&dev->msg_lock);
|
||||
/* Make sure the inflight messages can processed after reconncection */
|
||||
list_splice_init(&dev->recv_list, &dev->send_list);
|
||||
@ -1163,6 +1303,7 @@ static struct vduse_dev *vduse_dev_create(void)
|
||||
return NULL;
|
||||
|
||||
mutex_init(&dev->lock);
|
||||
mutex_init(&dev->mem_lock);
|
||||
spin_lock_init(&dev->msg_lock);
|
||||
INIT_LIST_HEAD(&dev->send_list);
|
||||
INIT_LIST_HEAD(&dev->recv_list);
|
||||
|
@ -210,6 +210,29 @@ struct vduse_vq_eventfd {
|
||||
*/
|
||||
#define VDUSE_VQ_INJECT_IRQ _IOW(VDUSE_BASE, 0x17, __u32)
|
||||
|
||||
/**
|
||||
* struct vduse_iova_umem - userspace memory configuration for one IOVA region
|
||||
* @uaddr: start address of userspace memory, it must be aligned to page size
|
||||
* @iova: start of the IOVA region
|
||||
* @size: size of the IOVA region
|
||||
* @reserved: for future use, needs to be initialized to zero
|
||||
*
|
||||
* Structure used by VDUSE_IOTLB_REG_UMEM and VDUSE_IOTLB_DEREG_UMEM
|
||||
* ioctls to register/de-register userspace memory for IOVA regions
|
||||
*/
|
||||
struct vduse_iova_umem {
|
||||
__u64 uaddr;
|
||||
__u64 iova;
|
||||
__u64 size;
|
||||
__u64 reserved[3];
|
||||
};
|
||||
|
||||
/* Register userspace memory for IOVA regions */
|
||||
#define VDUSE_IOTLB_REG_UMEM _IOW(VDUSE_BASE, 0x18, struct vduse_iova_umem)
|
||||
|
||||
/* De-register the userspace memory. Caller should set iova and size field. */
|
||||
#define VDUSE_IOTLB_DEREG_UMEM _IOW(VDUSE_BASE, 0x19, struct vduse_iova_umem)
|
||||
|
||||
/* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */
|
||||
|
||||
/**
|
||||
|
Loading…
Reference in New Issue
Block a user