mirror of
https://github.com/torvalds/linux.git
synced 2024-11-21 19:41:42 +00:00
io_uring: move struct io_kiocb from task_struct to io_uring_task
Rather than store the task_struct itself in struct io_kiocb, store the io_uring specific task_struct. The life times are the same in terms of io_uring, and this avoids doing some dereferences through the task_struct. For the hot path of putting local task references, we can deref req->tctx instead, which we'll need anyway in that function regardless of whether it's local or remote references. This is mostly straight forward, except the original task PF_EXITING check needs a bit of tweaking. task_work is _always_ run from the originating task, except in the fallback case, where it's run from a kernel thread. Replace the potentially racy (in case of fallback work) checks for req->task->flags with current->flags. It's either the still the original task, in which case PF_EXITING will be sane, or it has PF_KTHREAD set, in which case it's fallback work. Both cases should prevent moving forward with the given request. Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
6ed368cc5d
commit
b6f58a3f4a
@ -110,7 +110,7 @@ static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
|
|||||||
|
|
||||||
static inline struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd)
|
static inline struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd)
|
||||||
{
|
{
|
||||||
return cmd_to_io_kiocb(cmd)->task;
|
return cmd_to_io_kiocb(cmd)->tctx->task;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* _LINUX_IO_URING_CMD_H */
|
#endif /* _LINUX_IO_URING_CMD_H */
|
||||||
|
@ -84,6 +84,7 @@ struct io_uring_task {
|
|||||||
/* submission side */
|
/* submission side */
|
||||||
int cached_refs;
|
int cached_refs;
|
||||||
const struct io_ring_ctx *last;
|
const struct io_ring_ctx *last;
|
||||||
|
struct task_struct *task;
|
||||||
struct io_wq *io_wq;
|
struct io_wq *io_wq;
|
||||||
struct file *registered_rings[IO_RINGFD_REG_MAX];
|
struct file *registered_rings[IO_RINGFD_REG_MAX];
|
||||||
|
|
||||||
@ -625,7 +626,7 @@ struct io_kiocb {
|
|||||||
struct io_cqe cqe;
|
struct io_cqe cqe;
|
||||||
|
|
||||||
struct io_ring_ctx *ctx;
|
struct io_ring_ctx *ctx;
|
||||||
struct task_struct *task;
|
struct io_uring_task *tctx;
|
||||||
|
|
||||||
union {
|
union {
|
||||||
/* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */
|
/* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */
|
||||||
|
@ -205,7 +205,7 @@ int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
|
|||||||
.opcode = cancel->opcode,
|
.opcode = cancel->opcode,
|
||||||
.seq = atomic_inc_return(&req->ctx->cancel_seq),
|
.seq = atomic_inc_return(&req->ctx->cancel_seq),
|
||||||
};
|
};
|
||||||
struct io_uring_task *tctx = req->task->io_uring;
|
struct io_uring_task *tctx = req->tctx;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (cd.flags & IORING_ASYNC_CANCEL_FD) {
|
if (cd.flags & IORING_ASYNC_CANCEL_FD) {
|
||||||
|
@ -203,7 +203,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
|
|||||||
|
|
||||||
hlist_for_each_entry(req, &hb->list, hash_node)
|
hlist_for_each_entry(req, &hb->list, hash_node)
|
||||||
seq_printf(m, " op=%d, task_works=%d\n", req->opcode,
|
seq_printf(m, " op=%d, task_works=%d\n", req->opcode,
|
||||||
task_work_pending(req->task));
|
task_work_pending(req->tctx->task));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (has_lock)
|
if (has_lock)
|
||||||
|
@ -206,7 +206,7 @@ bool io_match_task_safe(struct io_kiocb *head, struct io_uring_task *tctx,
|
|||||||
{
|
{
|
||||||
bool matched;
|
bool matched;
|
||||||
|
|
||||||
if (tctx && head->task->io_uring != tctx)
|
if (tctx && head->tctx != tctx)
|
||||||
return false;
|
return false;
|
||||||
if (cancel_all)
|
if (cancel_all)
|
||||||
return true;
|
return true;
|
||||||
@ -407,11 +407,8 @@ static void io_clean_op(struct io_kiocb *req)
|
|||||||
kfree(req->apoll);
|
kfree(req->apoll);
|
||||||
req->apoll = NULL;
|
req->apoll = NULL;
|
||||||
}
|
}
|
||||||
if (req->flags & REQ_F_INFLIGHT) {
|
if (req->flags & REQ_F_INFLIGHT)
|
||||||
struct io_uring_task *tctx = req->task->io_uring;
|
atomic_dec(&req->tctx->inflight_tracked);
|
||||||
|
|
||||||
atomic_dec(&tctx->inflight_tracked);
|
|
||||||
}
|
|
||||||
if (req->flags & REQ_F_CREDS)
|
if (req->flags & REQ_F_CREDS)
|
||||||
put_cred(req->creds);
|
put_cred(req->creds);
|
||||||
if (req->flags & REQ_F_ASYNC_DATA) {
|
if (req->flags & REQ_F_ASYNC_DATA) {
|
||||||
@ -425,7 +422,7 @@ static inline void io_req_track_inflight(struct io_kiocb *req)
|
|||||||
{
|
{
|
||||||
if (!(req->flags & REQ_F_INFLIGHT)) {
|
if (!(req->flags & REQ_F_INFLIGHT)) {
|
||||||
req->flags |= REQ_F_INFLIGHT;
|
req->flags |= REQ_F_INFLIGHT;
|
||||||
atomic_inc(&req->task->io_uring->inflight_tracked);
|
atomic_inc(&req->tctx->inflight_tracked);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -514,7 +511,7 @@ static void io_prep_async_link(struct io_kiocb *req)
|
|||||||
static void io_queue_iowq(struct io_kiocb *req)
|
static void io_queue_iowq(struct io_kiocb *req)
|
||||||
{
|
{
|
||||||
struct io_kiocb *link = io_prep_linked_timeout(req);
|
struct io_kiocb *link = io_prep_linked_timeout(req);
|
||||||
struct io_uring_task *tctx = req->task->io_uring;
|
struct io_uring_task *tctx = req->tctx;
|
||||||
|
|
||||||
BUG_ON(!tctx);
|
BUG_ON(!tctx);
|
||||||
BUG_ON(!tctx->io_wq);
|
BUG_ON(!tctx->io_wq);
|
||||||
@ -529,7 +526,7 @@ static void io_queue_iowq(struct io_kiocb *req)
|
|||||||
* procedure rather than attempt to run this request (or create a new
|
* procedure rather than attempt to run this request (or create a new
|
||||||
* worker for it).
|
* worker for it).
|
||||||
*/
|
*/
|
||||||
if (WARN_ON_ONCE(!same_thread_group(req->task, current)))
|
if (WARN_ON_ONCE(!same_thread_group(tctx->task, current)))
|
||||||
atomic_or(IO_WQ_WORK_CANCEL, &req->work.flags);
|
atomic_or(IO_WQ_WORK_CANCEL, &req->work.flags);
|
||||||
|
|
||||||
trace_io_uring_queue_async_work(req, io_wq_is_hashed(&req->work));
|
trace_io_uring_queue_async_work(req, io_wq_is_hashed(&req->work));
|
||||||
@ -678,17 +675,17 @@ static void io_cqring_do_overflow_flush(struct io_ring_ctx *ctx)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* must to be called somewhat shortly after putting a request */
|
/* must to be called somewhat shortly after putting a request */
|
||||||
static inline void io_put_task(struct task_struct *task)
|
static inline void io_put_task(struct io_kiocb *req)
|
||||||
{
|
{
|
||||||
struct io_uring_task *tctx = task->io_uring;
|
struct io_uring_task *tctx = req->tctx;
|
||||||
|
|
||||||
if (likely(task == current)) {
|
if (likely(tctx->task == current)) {
|
||||||
tctx->cached_refs++;
|
tctx->cached_refs++;
|
||||||
} else {
|
} else {
|
||||||
percpu_counter_sub(&tctx->inflight, 1);
|
percpu_counter_sub(&tctx->inflight, 1);
|
||||||
if (unlikely(atomic_read(&tctx->in_cancel)))
|
if (unlikely(atomic_read(&tctx->in_cancel)))
|
||||||
wake_up(&tctx->wait);
|
wake_up(&tctx->wait);
|
||||||
put_task_struct(task);
|
put_task_struct(tctx->task);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1207,7 +1204,7 @@ static inline void io_req_local_work_add(struct io_kiocb *req,
|
|||||||
|
|
||||||
static void io_req_normal_work_add(struct io_kiocb *req)
|
static void io_req_normal_work_add(struct io_kiocb *req)
|
||||||
{
|
{
|
||||||
struct io_uring_task *tctx = req->task->io_uring;
|
struct io_uring_task *tctx = req->tctx;
|
||||||
struct io_ring_ctx *ctx = req->ctx;
|
struct io_ring_ctx *ctx = req->ctx;
|
||||||
|
|
||||||
/* task_work already pending, we're done */
|
/* task_work already pending, we're done */
|
||||||
@ -1226,7 +1223,7 @@ static void io_req_normal_work_add(struct io_kiocb *req)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (likely(!task_work_add(req->task, &tctx->task_work, ctx->notify_method)))
|
if (likely(!task_work_add(tctx->task, &tctx->task_work, ctx->notify_method)))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
io_fallback_tw(tctx, false);
|
io_fallback_tw(tctx, false);
|
||||||
@ -1343,8 +1340,7 @@ static void io_req_task_cancel(struct io_kiocb *req, struct io_tw_state *ts)
|
|||||||
void io_req_task_submit(struct io_kiocb *req, struct io_tw_state *ts)
|
void io_req_task_submit(struct io_kiocb *req, struct io_tw_state *ts)
|
||||||
{
|
{
|
||||||
io_tw_lock(req->ctx, ts);
|
io_tw_lock(req->ctx, ts);
|
||||||
/* req->task == current here, checking PF_EXITING is safe */
|
if (unlikely(io_should_terminate_tw()))
|
||||||
if (unlikely(req->task->flags & PF_EXITING))
|
|
||||||
io_req_defer_failed(req, -EFAULT);
|
io_req_defer_failed(req, -EFAULT);
|
||||||
else if (req->flags & REQ_F_FORCE_ASYNC)
|
else if (req->flags & REQ_F_FORCE_ASYNC)
|
||||||
io_queue_iowq(req);
|
io_queue_iowq(req);
|
||||||
@ -1403,7 +1399,7 @@ static void io_free_batch_list(struct io_ring_ctx *ctx,
|
|||||||
}
|
}
|
||||||
io_put_file(req);
|
io_put_file(req);
|
||||||
io_req_put_rsrc_nodes(req);
|
io_req_put_rsrc_nodes(req);
|
||||||
io_put_task(req->task);
|
io_put_task(req);
|
||||||
|
|
||||||
node = req->comp_list.next;
|
node = req->comp_list.next;
|
||||||
io_req_add_to_cache(req, ctx);
|
io_req_add_to_cache(req, ctx);
|
||||||
@ -2019,7 +2015,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
|
|||||||
req->flags = (__force io_req_flags_t) sqe_flags;
|
req->flags = (__force io_req_flags_t) sqe_flags;
|
||||||
req->cqe.user_data = READ_ONCE(sqe->user_data);
|
req->cqe.user_data = READ_ONCE(sqe->user_data);
|
||||||
req->file = NULL;
|
req->file = NULL;
|
||||||
req->task = current;
|
req->tctx = current->io_uring;
|
||||||
req->cancel_seq_set = false;
|
req->cancel_seq_set = false;
|
||||||
|
|
||||||
if (unlikely(opcode >= IORING_OP_LAST)) {
|
if (unlikely(opcode >= IORING_OP_LAST)) {
|
||||||
|
@ -426,6 +426,19 @@ static inline bool io_allowed_run_tw(struct io_ring_ctx *ctx)
|
|||||||
ctx->submitter_task == current);
|
ctx->submitter_task == current);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Terminate the request if either of these conditions are true:
|
||||||
|
*
|
||||||
|
* 1) It's being executed by the original task, but that task is marked
|
||||||
|
* with PF_EXITING as it's exiting.
|
||||||
|
* 2) PF_KTHREAD is set, in which case the invoker of the task_work is
|
||||||
|
* our fallback task_work.
|
||||||
|
*/
|
||||||
|
static inline bool io_should_terminate_tw(void)
|
||||||
|
{
|
||||||
|
return current->flags & (PF_KTHREAD | PF_EXITING);
|
||||||
|
}
|
||||||
|
|
||||||
static inline void io_req_queue_tw_complete(struct io_kiocb *req, s32 res)
|
static inline void io_req_queue_tw_complete(struct io_kiocb *req, s32 res)
|
||||||
{
|
{
|
||||||
io_req_set_res(req, res, 0);
|
io_req_set_res(req, res, 0);
|
||||||
|
@ -89,8 +89,8 @@ static void io_msg_tw_complete(struct io_kiocb *req, struct io_tw_state *ts)
|
|||||||
static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req,
|
static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req,
|
||||||
int res, u32 cflags, u64 user_data)
|
int res, u32 cflags, u64 user_data)
|
||||||
{
|
{
|
||||||
req->task = READ_ONCE(ctx->submitter_task);
|
req->tctx = READ_ONCE(ctx->submitter_task->io_uring);
|
||||||
if (!req->task) {
|
if (!req->tctx) {
|
||||||
kmem_cache_free(req_cachep, req);
|
kmem_cache_free(req_cachep, req);
|
||||||
return -EOWNERDEAD;
|
return -EOWNERDEAD;
|
||||||
}
|
}
|
||||||
|
@ -89,7 +89,7 @@ static int io_link_skb(struct sk_buff *skb, struct ubuf_info *uarg)
|
|||||||
|
|
||||||
/* make sure all noifications can be finished in the same task_work */
|
/* make sure all noifications can be finished in the same task_work */
|
||||||
if (unlikely(notif->ctx != prev_notif->ctx ||
|
if (unlikely(notif->ctx != prev_notif->ctx ||
|
||||||
notif->task != prev_notif->task))
|
notif->tctx != prev_notif->tctx))
|
||||||
return -EEXIST;
|
return -EEXIST;
|
||||||
|
|
||||||
nd->head = prev_nd->head;
|
nd->head = prev_nd->head;
|
||||||
@ -115,7 +115,7 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
|
|||||||
notif->opcode = IORING_OP_NOP;
|
notif->opcode = IORING_OP_NOP;
|
||||||
notif->flags = 0;
|
notif->flags = 0;
|
||||||
notif->file = NULL;
|
notif->file = NULL;
|
||||||
notif->task = current;
|
notif->tctx = current->io_uring;
|
||||||
io_get_task_refs(1);
|
io_get_task_refs(1);
|
||||||
notif->file_node = NULL;
|
notif->file_node = NULL;
|
||||||
notif->buf_node = NULL;
|
notif->buf_node = NULL;
|
||||||
|
@ -224,8 +224,7 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
|
|||||||
{
|
{
|
||||||
int v;
|
int v;
|
||||||
|
|
||||||
/* req->task == current here, checking PF_EXITING is safe */
|
if (unlikely(io_should_terminate_tw()))
|
||||||
if (unlikely(req->task->flags & PF_EXITING))
|
|
||||||
return -ECANCELED;
|
return -ECANCELED;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
|
@ -435,7 +435,7 @@ static bool io_rw_should_reissue(struct io_kiocb *req)
|
|||||||
* Play it safe and assume not safe to re-import and reissue if we're
|
* Play it safe and assume not safe to re-import and reissue if we're
|
||||||
* not in the original thread group (or in task context).
|
* not in the original thread group (or in task context).
|
||||||
*/
|
*/
|
||||||
if (!same_thread_group(req->task, current) || !in_task())
|
if (!same_thread_group(req->tctx->task, current) || !in_task())
|
||||||
return false;
|
return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -81,6 +81,7 @@ __cold int io_uring_alloc_task_context(struct task_struct *task,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tctx->task = task;
|
||||||
xa_init(&tctx->xa);
|
xa_init(&tctx->xa);
|
||||||
init_waitqueue_head(&tctx->wait);
|
init_waitqueue_head(&tctx->wait);
|
||||||
atomic_set(&tctx->in_cancel, 0);
|
atomic_set(&tctx->in_cancel, 0);
|
||||||
|
@ -300,16 +300,18 @@ static void io_req_task_link_timeout(struct io_kiocb *req, struct io_tw_state *t
|
|||||||
{
|
{
|
||||||
struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
|
struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
|
||||||
struct io_kiocb *prev = timeout->prev;
|
struct io_kiocb *prev = timeout->prev;
|
||||||
int ret = -ENOENT;
|
int ret;
|
||||||
|
|
||||||
if (prev) {
|
if (prev) {
|
||||||
if (!(req->task->flags & PF_EXITING)) {
|
if (!io_should_terminate_tw()) {
|
||||||
struct io_cancel_data cd = {
|
struct io_cancel_data cd = {
|
||||||
.ctx = req->ctx,
|
.ctx = req->ctx,
|
||||||
.data = prev->cqe.user_data,
|
.data = prev->cqe.user_data,
|
||||||
};
|
};
|
||||||
|
|
||||||
ret = io_try_cancel(req->task->io_uring, &cd, 0);
|
ret = io_try_cancel(req->tctx, &cd, 0);
|
||||||
|
} else {
|
||||||
|
ret = -ECANCELED;
|
||||||
}
|
}
|
||||||
io_req_set_res(req, ret ?: -ETIME, 0);
|
io_req_set_res(req, ret ?: -ETIME, 0);
|
||||||
io_req_task_complete(req, ts);
|
io_req_task_complete(req, ts);
|
||||||
@ -643,7 +645,7 @@ static bool io_match_task(struct io_kiocb *head, struct io_uring_task *tctx,
|
|||||||
{
|
{
|
||||||
struct io_kiocb *req;
|
struct io_kiocb *req;
|
||||||
|
|
||||||
if (tctx && head->task->io_uring != tctx)
|
if (tctx && head->tctx != tctx)
|
||||||
return false;
|
return false;
|
||||||
if (cancel_all)
|
if (cancel_all)
|
||||||
return true;
|
return true;
|
||||||
|
@ -61,7 +61,7 @@ bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx,
|
|||||||
struct io_uring_cmd);
|
struct io_uring_cmd);
|
||||||
struct file *file = req->file;
|
struct file *file = req->file;
|
||||||
|
|
||||||
if (!cancel_all && req->task->io_uring != tctx)
|
if (!cancel_all && req->tctx != tctx)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (cmd->flags & IORING_URING_CMD_CANCELABLE) {
|
if (cmd->flags & IORING_URING_CMD_CANCELABLE) {
|
||||||
|
@ -331,7 +331,7 @@ int io_waitid(struct io_kiocb *req, unsigned int issue_flags)
|
|||||||
hlist_add_head(&req->hash_node, &ctx->waitid_list);
|
hlist_add_head(&req->hash_node, &ctx->waitid_list);
|
||||||
|
|
||||||
init_waitqueue_func_entry(&iwa->wo.child_wait, io_waitid_wait);
|
init_waitqueue_func_entry(&iwa->wo.child_wait, io_waitid_wait);
|
||||||
iwa->wo.child_wait.private = req->task;
|
iwa->wo.child_wait.private = req->tctx->task;
|
||||||
iw->head = ¤t->signal->wait_chldexit;
|
iw->head = ¤t->signal->wait_chldexit;
|
||||||
add_wait_queue(iw->head, &iwa->wo.child_wait);
|
add_wait_queue(iw->head, &iwa->wo.child_wait);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user