fs,io_uring: add infrastructure for uring-cmd

file_operations->uring_cmd is a file private handler.
This is somewhat similar to ioctl but hopefully a lot more sane and
useful as it can be used to enable many io_uring capabilities for the
underlying operation.

IORING_OP_URING_CMD is a file private kind of request. io_uring doesn't
know what is in this command type, it's for the provider of ->uring_cmd()
to deal with.

Co-developed-by: Kanchan Joshi <joshi.k@samsung.com>
Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220511054750.20432-2-joshi.k@samsung.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Jens Axboe 2022-05-11 11:17:45 +05:30
parent 2bb04df7c2
commit ee692a21e9
4 changed files with 165 additions and 26 deletions

View File

@ -202,13 +202,6 @@ struct io_rings {
struct io_uring_cqe cqes[] ____cacheline_aligned_in_smp;
};
enum io_uring_cmd_flags {
IO_URING_F_COMPLETE_DEFER = 1,
IO_URING_F_UNLOCKED = 2,
/* int's last bit, sign checks are usually faster than a bit test */
IO_URING_F_NONBLOCK = INT_MIN,
};
struct io_mapped_ubuf {
u64 ubuf;
u64 ubuf_end;
@ -972,6 +965,7 @@ struct io_kiocb {
struct io_xattr xattr;
struct io_socket sock;
struct io_nop nop;
struct io_uring_cmd uring_cmd;
};
u8 opcode;
@ -1050,6 +1044,14 @@ struct io_cancel_data {
int seq;
};
/*
* The URING_CMD payload starts at 'cmd' in the first sqe, and continues into
* the following sqe if SQE128 is used.
*/
#define uring_cmd_pdu_size(is_sqe128) \
((1 + !!(is_sqe128)) * sizeof(struct io_uring_sqe) - \
offsetof(struct io_uring_sqe, cmd))
struct io_op_def {
/* needs req->file assigned */
unsigned needs_file : 1;
@ -1289,6 +1291,12 @@ static const struct io_op_def io_op_defs[] = {
[IORING_OP_SOCKET] = {
.audit_skip = 1,
},
[IORING_OP_URING_CMD] = {
.needs_file = 1,
.plug = 1,
.needs_async_setup = 1,
.async_size = uring_cmd_pdu_size(1),
},
};
/* requests with any of those set should undergo io_disarm_next() */
@ -1428,6 +1436,8 @@ const char *io_uring_get_opcode(u8 opcode)
return "GETXATTR";
case IORING_OP_SOCKET:
return "SOCKET";
case IORING_OP_URING_CMD:
return "URING_CMD";
case IORING_OP_LAST:
return "INVALID";
}
@ -4507,10 +4517,6 @@ static int __io_getxattr_prep(struct io_kiocb *req,
const char __user *name;
int ret;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (unlikely(sqe->ioprio))
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
@ -4620,10 +4626,6 @@ static int __io_setxattr_prep(struct io_kiocb *req,
const char __user *name;
int ret;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (unlikely(sqe->ioprio))
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
@ -4910,6 +4912,96 @@ static int io_linkat(struct io_kiocb *req, unsigned int issue_flags)
return 0;
}
static void io_uring_cmd_work(struct io_kiocb *req, bool *locked)
{
req->uring_cmd.task_work_cb(&req->uring_cmd);
}
void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *))
{
struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd);
req->uring_cmd.task_work_cb = task_work_cb;
req->io_task_work.func = io_uring_cmd_work;
io_req_task_work_add(req, !!(req->ctx->flags & IORING_SETUP_SQPOLL));
}
EXPORT_SYMBOL_GPL(io_uring_cmd_complete_in_task);
/*
* Called by consumers of io_uring_cmd, if they originally returned
* -EIOCBQUEUED upon receiving the command.
*/
void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2)
{
struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd);
if (ret < 0)
req_set_fail(req);
if (req->ctx->flags & IORING_SETUP_CQE32)
__io_req_complete32(req, 0, ret, 0, res2, 0);
else
io_req_complete(req, ret);
}
EXPORT_SYMBOL_GPL(io_uring_cmd_done);
static int io_uring_cmd_prep_async(struct io_kiocb *req)
{
size_t cmd_size;
cmd_size = uring_cmd_pdu_size(req->ctx->flags & IORING_SETUP_SQE128);
memcpy(req->async_data, req->uring_cmd.cmd, cmd_size);
return 0;
}
static int io_uring_cmd_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
struct io_uring_cmd *ioucmd = &req->uring_cmd;
if (sqe->rw_flags)
return -EINVAL;
ioucmd->cmd = sqe->cmd;
ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
return 0;
}
static int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_uring_cmd *ioucmd = &req->uring_cmd;
struct io_ring_ctx *ctx = req->ctx;
struct file *file = req->file;
int ret;
if (!req->file->f_op->uring_cmd)
return -EOPNOTSUPP;
if (ctx->flags & IORING_SETUP_SQE128)
issue_flags |= IO_URING_F_SQE128;
if (ctx->flags & IORING_SETUP_CQE32)
issue_flags |= IO_URING_F_CQE32;
if (ctx->flags & IORING_SETUP_IOPOLL)
issue_flags |= IO_URING_F_IOPOLL;
if (req_has_async_data(req))
ioucmd->cmd = req->async_data;
ret = file->f_op->uring_cmd(ioucmd, issue_flags);
if (ret == -EAGAIN) {
if (!req_has_async_data(req)) {
if (io_alloc_async_data(req))
return -ENOMEM;
io_uring_cmd_prep_async(req);
}
return -EAGAIN;
}
if (ret != -EIOCBQUEUED)
io_uring_cmd_done(ioucmd, ret, 0);
return 0;
}
static int io_shutdown_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
@ -6305,9 +6397,7 @@ static int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_socket *sock = &req->sock;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (sqe->ioprio || sqe->addr || sqe->rw_flags || sqe->buf_index)
if (sqe->addr || sqe->rw_flags || sqe->buf_index)
return -EINVAL;
sock->domain = READ_ONCE(sqe->fd);
@ -7755,6 +7845,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return io_getxattr_prep(req, sqe);
case IORING_OP_SOCKET:
return io_socket_prep(req, sqe);
case IORING_OP_URING_CMD:
return io_uring_cmd_prep(req, sqe);
}
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@ -7787,6 +7879,8 @@ static int io_req_prep_async(struct io_kiocb *req)
return io_recvmsg_prep_async(req);
case IORING_OP_CONNECT:
return io_connect_prep_async(req);
case IORING_OP_URING_CMD:
return io_uring_cmd_prep_async(req);
}
printk_once(KERN_WARNING "io_uring: prep_async() bad opcode %d\n",
req->opcode);
@ -8081,6 +8175,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
case IORING_OP_SOCKET:
ret = io_socket(req, issue_flags);
break;
case IORING_OP_URING_CMD:
ret = io_uring_cmd(req, issue_flags);
break;
default:
ret = -EINVAL;
break;
@ -12699,6 +12796,8 @@ static int __init io_uring_init(void)
BUILD_BUG_ON(sizeof(atomic_t) != sizeof(u32));
BUILD_BUG_ON(sizeof(struct io_uring_cmd) > 64);
req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC |
SLAB_ACCOUNT);
return 0;

View File

@ -1953,6 +1953,7 @@ struct dir_context {
#define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN)
struct iov_iter;
struct io_uring_cmd;
struct file_operations {
struct module *owner;
@ -1995,6 +1996,7 @@ struct file_operations {
struct file *file_out, loff_t pos_out,
loff_t len, unsigned int remap_flags);
int (*fadvise)(struct file *, loff_t, loff_t, int);
int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
} __randomize_layout;
struct inode_operations {

View File

@ -5,7 +5,32 @@
#include <linux/sched.h>
#include <linux/xarray.h>
enum io_uring_cmd_flags {
IO_URING_F_COMPLETE_DEFER = 1,
IO_URING_F_UNLOCKED = 2,
/* int's last bit, sign checks are usually faster than a bit test */
IO_URING_F_NONBLOCK = INT_MIN,
/* ctx state flags, for URING_CMD */
IO_URING_F_SQE128 = 4,
IO_URING_F_CQE32 = 8,
IO_URING_F_IOPOLL = 16,
};
struct io_uring_cmd {
struct file *file;
const void *cmd;
/* callback to defer completions to task context */
void (*task_work_cb)(struct io_uring_cmd *cmd);
u32 cmd_op;
u32 pad;
u8 pdu[32]; /* available inline for free use */
};
#if defined(CONFIG_IO_URING)
void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2);
void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *));
struct sock *io_uring_get_socket(struct file *file);
void __io_uring_cancel(bool cancel_all);
void __io_uring_free(struct task_struct *tsk);
@ -30,6 +55,14 @@ static inline void io_uring_free(struct task_struct *tsk)
__io_uring_free(tsk);
}
#else
static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
ssize_t ret2)
{
}
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *))
{
}
static inline struct sock *io_uring_get_socket(struct file *file)
{
return NULL;

View File

@ -22,6 +22,7 @@ struct io_uring_sqe {
union {
__u64 off; /* offset into file */
__u64 addr2;
__u32 cmd_op;
};
union {
__u64 addr; /* pointer to buffer or iovecs */
@ -61,14 +62,17 @@ struct io_uring_sqe {
__s32 splice_fd_in;
__u32 file_index;
};
union {
struct {
__u64 addr3;
__u64 __pad2[1];
};
/*
* If the ring is initialized with IORING_SETUP_SQE128, then this field
* contains 64-bytes of padding, doubling the size of the SQE.
* If the ring is initialized with IORING_SETUP_SQE128, then
* this field is used for 80 bytes of arbitrary command data
*/
__u64 __big_sqe_pad[0];
__u8 cmd[0];
};
};
enum {
@ -175,6 +179,7 @@ enum io_uring_op {
IORING_OP_FGETXATTR,
IORING_OP_GETXATTR,
IORING_OP_SOCKET,
IORING_OP_URING_CMD,
/* this goes last, obviously */
IORING_OP_LAST,