forked from Minki/linux
fs,io_uring: add infrastructure for uring-cmd
file_operations->uring_cmd is a file private handler. This is somewhat similar to ioctl but hopefully a lot more sane and useful as it can be used to enable many io_uring capabilities for the underlying operation. IORING_OP_URING_CMD is a file private kind of request. io_uring doesn't know what is in this command type, it's for the provider of ->uring_cmd() to deal with. Co-developed-by: Kanchan Joshi <joshi.k@samsung.com> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Link: https://lore.kernel.org/r/20220511054750.20432-2-joshi.k@samsung.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
2bb04df7c2
commit
ee692a21e9
135
fs/io_uring.c
135
fs/io_uring.c
@ -202,13 +202,6 @@ struct io_rings {
|
|||||||
struct io_uring_cqe cqes[] ____cacheline_aligned_in_smp;
|
struct io_uring_cqe cqes[] ____cacheline_aligned_in_smp;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum io_uring_cmd_flags {
|
|
||||||
IO_URING_F_COMPLETE_DEFER = 1,
|
|
||||||
IO_URING_F_UNLOCKED = 2,
|
|
||||||
/* int's last bit, sign checks are usually faster than a bit test */
|
|
||||||
IO_URING_F_NONBLOCK = INT_MIN,
|
|
||||||
};
|
|
||||||
|
|
||||||
struct io_mapped_ubuf {
|
struct io_mapped_ubuf {
|
||||||
u64 ubuf;
|
u64 ubuf;
|
||||||
u64 ubuf_end;
|
u64 ubuf_end;
|
||||||
@ -972,6 +965,7 @@ struct io_kiocb {
|
|||||||
struct io_xattr xattr;
|
struct io_xattr xattr;
|
||||||
struct io_socket sock;
|
struct io_socket sock;
|
||||||
struct io_nop nop;
|
struct io_nop nop;
|
||||||
|
struct io_uring_cmd uring_cmd;
|
||||||
};
|
};
|
||||||
|
|
||||||
u8 opcode;
|
u8 opcode;
|
||||||
@ -1050,6 +1044,14 @@ struct io_cancel_data {
|
|||||||
int seq;
|
int seq;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The URING_CMD payload starts at 'cmd' in the first sqe, and continues into
|
||||||
|
* the following sqe if SQE128 is used.
|
||||||
|
*/
|
||||||
|
#define uring_cmd_pdu_size(is_sqe128) \
|
||||||
|
((1 + !!(is_sqe128)) * sizeof(struct io_uring_sqe) - \
|
||||||
|
offsetof(struct io_uring_sqe, cmd))
|
||||||
|
|
||||||
struct io_op_def {
|
struct io_op_def {
|
||||||
/* needs req->file assigned */
|
/* needs req->file assigned */
|
||||||
unsigned needs_file : 1;
|
unsigned needs_file : 1;
|
||||||
@ -1289,6 +1291,12 @@ static const struct io_op_def io_op_defs[] = {
|
|||||||
[IORING_OP_SOCKET] = {
|
[IORING_OP_SOCKET] = {
|
||||||
.audit_skip = 1,
|
.audit_skip = 1,
|
||||||
},
|
},
|
||||||
|
[IORING_OP_URING_CMD] = {
|
||||||
|
.needs_file = 1,
|
||||||
|
.plug = 1,
|
||||||
|
.needs_async_setup = 1,
|
||||||
|
.async_size = uring_cmd_pdu_size(1),
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
/* requests with any of those set should undergo io_disarm_next() */
|
/* requests with any of those set should undergo io_disarm_next() */
|
||||||
@ -1428,6 +1436,8 @@ const char *io_uring_get_opcode(u8 opcode)
|
|||||||
return "GETXATTR";
|
return "GETXATTR";
|
||||||
case IORING_OP_SOCKET:
|
case IORING_OP_SOCKET:
|
||||||
return "SOCKET";
|
return "SOCKET";
|
||||||
|
case IORING_OP_URING_CMD:
|
||||||
|
return "URING_CMD";
|
||||||
case IORING_OP_LAST:
|
case IORING_OP_LAST:
|
||||||
return "INVALID";
|
return "INVALID";
|
||||||
}
|
}
|
||||||
@ -4507,10 +4517,6 @@ static int __io_getxattr_prep(struct io_kiocb *req,
|
|||||||
const char __user *name;
|
const char __user *name;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
|
|
||||||
return -EINVAL;
|
|
||||||
if (unlikely(sqe->ioprio))
|
|
||||||
return -EINVAL;
|
|
||||||
if (unlikely(req->flags & REQ_F_FIXED_FILE))
|
if (unlikely(req->flags & REQ_F_FIXED_FILE))
|
||||||
return -EBADF;
|
return -EBADF;
|
||||||
|
|
||||||
@ -4620,10 +4626,6 @@ static int __io_setxattr_prep(struct io_kiocb *req,
|
|||||||
const char __user *name;
|
const char __user *name;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
|
|
||||||
return -EINVAL;
|
|
||||||
if (unlikely(sqe->ioprio))
|
|
||||||
return -EINVAL;
|
|
||||||
if (unlikely(req->flags & REQ_F_FIXED_FILE))
|
if (unlikely(req->flags & REQ_F_FIXED_FILE))
|
||||||
return -EBADF;
|
return -EBADF;
|
||||||
|
|
||||||
@ -4910,6 +4912,96 @@ static int io_linkat(struct io_kiocb *req, unsigned int issue_flags)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void io_uring_cmd_work(struct io_kiocb *req, bool *locked)
|
||||||
|
{
|
||||||
|
req->uring_cmd.task_work_cb(&req->uring_cmd);
|
||||||
|
}
|
||||||
|
|
||||||
|
void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
|
||||||
|
void (*task_work_cb)(struct io_uring_cmd *))
|
||||||
|
{
|
||||||
|
struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd);
|
||||||
|
|
||||||
|
req->uring_cmd.task_work_cb = task_work_cb;
|
||||||
|
req->io_task_work.func = io_uring_cmd_work;
|
||||||
|
io_req_task_work_add(req, !!(req->ctx->flags & IORING_SETUP_SQPOLL));
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(io_uring_cmd_complete_in_task);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called by consumers of io_uring_cmd, if they originally returned
|
||||||
|
* -EIOCBQUEUED upon receiving the command.
|
||||||
|
*/
|
||||||
|
void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2)
|
||||||
|
{
|
||||||
|
struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd);
|
||||||
|
|
||||||
|
if (ret < 0)
|
||||||
|
req_set_fail(req);
|
||||||
|
if (req->ctx->flags & IORING_SETUP_CQE32)
|
||||||
|
__io_req_complete32(req, 0, ret, 0, res2, 0);
|
||||||
|
else
|
||||||
|
io_req_complete(req, ret);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(io_uring_cmd_done);
|
||||||
|
|
||||||
|
static int io_uring_cmd_prep_async(struct io_kiocb *req)
|
||||||
|
{
|
||||||
|
size_t cmd_size;
|
||||||
|
|
||||||
|
cmd_size = uring_cmd_pdu_size(req->ctx->flags & IORING_SETUP_SQE128);
|
||||||
|
|
||||||
|
memcpy(req->async_data, req->uring_cmd.cmd, cmd_size);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int io_uring_cmd_prep(struct io_kiocb *req,
|
||||||
|
const struct io_uring_sqe *sqe)
|
||||||
|
{
|
||||||
|
struct io_uring_cmd *ioucmd = &req->uring_cmd;
|
||||||
|
|
||||||
|
if (sqe->rw_flags)
|
||||||
|
return -EINVAL;
|
||||||
|
ioucmd->cmd = sqe->cmd;
|
||||||
|
ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
|
||||||
|
{
|
||||||
|
struct io_uring_cmd *ioucmd = &req->uring_cmd;
|
||||||
|
struct io_ring_ctx *ctx = req->ctx;
|
||||||
|
struct file *file = req->file;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (!req->file->f_op->uring_cmd)
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
|
if (ctx->flags & IORING_SETUP_SQE128)
|
||||||
|
issue_flags |= IO_URING_F_SQE128;
|
||||||
|
if (ctx->flags & IORING_SETUP_CQE32)
|
||||||
|
issue_flags |= IO_URING_F_CQE32;
|
||||||
|
if (ctx->flags & IORING_SETUP_IOPOLL)
|
||||||
|
issue_flags |= IO_URING_F_IOPOLL;
|
||||||
|
|
||||||
|
if (req_has_async_data(req))
|
||||||
|
ioucmd->cmd = req->async_data;
|
||||||
|
|
||||||
|
ret = file->f_op->uring_cmd(ioucmd, issue_flags);
|
||||||
|
if (ret == -EAGAIN) {
|
||||||
|
if (!req_has_async_data(req)) {
|
||||||
|
if (io_alloc_async_data(req))
|
||||||
|
return -ENOMEM;
|
||||||
|
io_uring_cmd_prep_async(req);
|
||||||
|
}
|
||||||
|
return -EAGAIN;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret != -EIOCBQUEUED)
|
||||||
|
io_uring_cmd_done(ioucmd, ret, 0);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int io_shutdown_prep(struct io_kiocb *req,
|
static int io_shutdown_prep(struct io_kiocb *req,
|
||||||
const struct io_uring_sqe *sqe)
|
const struct io_uring_sqe *sqe)
|
||||||
{
|
{
|
||||||
@ -6305,9 +6397,7 @@ static int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||||||
{
|
{
|
||||||
struct io_socket *sock = &req->sock;
|
struct io_socket *sock = &req->sock;
|
||||||
|
|
||||||
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
|
if (sqe->addr || sqe->rw_flags || sqe->buf_index)
|
||||||
return -EINVAL;
|
|
||||||
if (sqe->ioprio || sqe->addr || sqe->rw_flags || sqe->buf_index)
|
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
sock->domain = READ_ONCE(sqe->fd);
|
sock->domain = READ_ONCE(sqe->fd);
|
||||||
@ -7755,6 +7845,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||||||
return io_getxattr_prep(req, sqe);
|
return io_getxattr_prep(req, sqe);
|
||||||
case IORING_OP_SOCKET:
|
case IORING_OP_SOCKET:
|
||||||
return io_socket_prep(req, sqe);
|
return io_socket_prep(req, sqe);
|
||||||
|
case IORING_OP_URING_CMD:
|
||||||
|
return io_uring_cmd_prep(req, sqe);
|
||||||
}
|
}
|
||||||
|
|
||||||
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
|
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
|
||||||
@ -7787,6 +7879,8 @@ static int io_req_prep_async(struct io_kiocb *req)
|
|||||||
return io_recvmsg_prep_async(req);
|
return io_recvmsg_prep_async(req);
|
||||||
case IORING_OP_CONNECT:
|
case IORING_OP_CONNECT:
|
||||||
return io_connect_prep_async(req);
|
return io_connect_prep_async(req);
|
||||||
|
case IORING_OP_URING_CMD:
|
||||||
|
return io_uring_cmd_prep_async(req);
|
||||||
}
|
}
|
||||||
printk_once(KERN_WARNING "io_uring: prep_async() bad opcode %d\n",
|
printk_once(KERN_WARNING "io_uring: prep_async() bad opcode %d\n",
|
||||||
req->opcode);
|
req->opcode);
|
||||||
@ -8081,6 +8175,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
|
|||||||
case IORING_OP_SOCKET:
|
case IORING_OP_SOCKET:
|
||||||
ret = io_socket(req, issue_flags);
|
ret = io_socket(req, issue_flags);
|
||||||
break;
|
break;
|
||||||
|
case IORING_OP_URING_CMD:
|
||||||
|
ret = io_uring_cmd(req, issue_flags);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
break;
|
break;
|
||||||
@ -12699,6 +12796,8 @@ static int __init io_uring_init(void)
|
|||||||
|
|
||||||
BUILD_BUG_ON(sizeof(atomic_t) != sizeof(u32));
|
BUILD_BUG_ON(sizeof(atomic_t) != sizeof(u32));
|
||||||
|
|
||||||
|
BUILD_BUG_ON(sizeof(struct io_uring_cmd) > 64);
|
||||||
|
|
||||||
req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC |
|
req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC |
|
||||||
SLAB_ACCOUNT);
|
SLAB_ACCOUNT);
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1953,6 +1953,7 @@ struct dir_context {
|
|||||||
#define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN)
|
#define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN)
|
||||||
|
|
||||||
struct iov_iter;
|
struct iov_iter;
|
||||||
|
struct io_uring_cmd;
|
||||||
|
|
||||||
struct file_operations {
|
struct file_operations {
|
||||||
struct module *owner;
|
struct module *owner;
|
||||||
@ -1995,6 +1996,7 @@ struct file_operations {
|
|||||||
struct file *file_out, loff_t pos_out,
|
struct file *file_out, loff_t pos_out,
|
||||||
loff_t len, unsigned int remap_flags);
|
loff_t len, unsigned int remap_flags);
|
||||||
int (*fadvise)(struct file *, loff_t, loff_t, int);
|
int (*fadvise)(struct file *, loff_t, loff_t, int);
|
||||||
|
int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
|
||||||
} __randomize_layout;
|
} __randomize_layout;
|
||||||
|
|
||||||
struct inode_operations {
|
struct inode_operations {
|
||||||
|
@ -5,7 +5,32 @@
|
|||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
#include <linux/xarray.h>
|
#include <linux/xarray.h>
|
||||||
|
|
||||||
|
enum io_uring_cmd_flags {
|
||||||
|
IO_URING_F_COMPLETE_DEFER = 1,
|
||||||
|
IO_URING_F_UNLOCKED = 2,
|
||||||
|
/* int's last bit, sign checks are usually faster than a bit test */
|
||||||
|
IO_URING_F_NONBLOCK = INT_MIN,
|
||||||
|
|
||||||
|
/* ctx state flags, for URING_CMD */
|
||||||
|
IO_URING_F_SQE128 = 4,
|
||||||
|
IO_URING_F_CQE32 = 8,
|
||||||
|
IO_URING_F_IOPOLL = 16,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct io_uring_cmd {
|
||||||
|
struct file *file;
|
||||||
|
const void *cmd;
|
||||||
|
/* callback to defer completions to task context */
|
||||||
|
void (*task_work_cb)(struct io_uring_cmd *cmd);
|
||||||
|
u32 cmd_op;
|
||||||
|
u32 pad;
|
||||||
|
u8 pdu[32]; /* available inline for free use */
|
||||||
|
};
|
||||||
|
|
||||||
#if defined(CONFIG_IO_URING)
|
#if defined(CONFIG_IO_URING)
|
||||||
|
void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2);
|
||||||
|
void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
|
||||||
|
void (*task_work_cb)(struct io_uring_cmd *));
|
||||||
struct sock *io_uring_get_socket(struct file *file);
|
struct sock *io_uring_get_socket(struct file *file);
|
||||||
void __io_uring_cancel(bool cancel_all);
|
void __io_uring_cancel(bool cancel_all);
|
||||||
void __io_uring_free(struct task_struct *tsk);
|
void __io_uring_free(struct task_struct *tsk);
|
||||||
@ -30,6 +55,14 @@ static inline void io_uring_free(struct task_struct *tsk)
|
|||||||
__io_uring_free(tsk);
|
__io_uring_free(tsk);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
|
||||||
|
ssize_t ret2)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
|
||||||
|
void (*task_work_cb)(struct io_uring_cmd *))
|
||||||
|
{
|
||||||
|
}
|
||||||
static inline struct sock *io_uring_get_socket(struct file *file)
|
static inline struct sock *io_uring_get_socket(struct file *file)
|
||||||
{
|
{
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -22,6 +22,7 @@ struct io_uring_sqe {
|
|||||||
union {
|
union {
|
||||||
__u64 off; /* offset into file */
|
__u64 off; /* offset into file */
|
||||||
__u64 addr2;
|
__u64 addr2;
|
||||||
|
__u32 cmd_op;
|
||||||
};
|
};
|
||||||
union {
|
union {
|
||||||
__u64 addr; /* pointer to buffer or iovecs */
|
__u64 addr; /* pointer to buffer or iovecs */
|
||||||
@ -61,14 +62,17 @@ struct io_uring_sqe {
|
|||||||
__s32 splice_fd_in;
|
__s32 splice_fd_in;
|
||||||
__u32 file_index;
|
__u32 file_index;
|
||||||
};
|
};
|
||||||
__u64 addr3;
|
union {
|
||||||
__u64 __pad2[1];
|
struct {
|
||||||
|
__u64 addr3;
|
||||||
/*
|
__u64 __pad2[1];
|
||||||
* If the ring is initialized with IORING_SETUP_SQE128, then this field
|
};
|
||||||
* contains 64-bytes of padding, doubling the size of the SQE.
|
/*
|
||||||
*/
|
* If the ring is initialized with IORING_SETUP_SQE128, then
|
||||||
__u64 __big_sqe_pad[0];
|
* this field is used for 80 bytes of arbitrary command data
|
||||||
|
*/
|
||||||
|
__u8 cmd[0];
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
@ -175,6 +179,7 @@ enum io_uring_op {
|
|||||||
IORING_OP_FGETXATTR,
|
IORING_OP_FGETXATTR,
|
||||||
IORING_OP_GETXATTR,
|
IORING_OP_GETXATTR,
|
||||||
IORING_OP_SOCKET,
|
IORING_OP_SOCKET,
|
||||||
|
IORING_OP_URING_CMD,
|
||||||
|
|
||||||
/* this goes last, obviously */
|
/* this goes last, obviously */
|
||||||
IORING_OP_LAST,
|
IORING_OP_LAST,
|
||||||
|
Loading…
Reference in New Issue
Block a user