diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index ab7458033ee3..2df3225b562f 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -222,7 +222,7 @@ enum io_uring_op { /* * sqe->uring_cmd_flags - * IORING_URING_CMD_FIXED use registered buffer; pass thig flag + * IORING_URING_CMD_FIXED use registered buffer; pass this flag * along with setting sqe->buf_index. */ #define IORING_URING_CMD_FIXED (1U << 0) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index ac8c488e3077..4a1e482747cc 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -176,6 +176,11 @@ static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx) return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head); } +static inline unsigned int __io_cqring_events_user(struct io_ring_ctx *ctx) +{ + return READ_ONCE(ctx->rings->cq.tail) - READ_ONCE(ctx->rings->cq.head); +} + static bool io_match_linked(struct io_kiocb *head) { struct io_kiocb *req; @@ -2315,7 +2320,7 @@ static inline bool io_has_work(struct io_ring_ctx *ctx) static inline bool io_should_wake(struct io_wait_queue *iowq) { struct io_ring_ctx *ctx = iowq->ctx; - int dist = ctx->cached_cq_tail - (int) iowq->cq_tail; + int dist = READ_ONCE(ctx->rings->cq.tail) - (int) iowq->cq_tail; /* * Wake up if we have enough events, or if a timeout occurred since we @@ -2399,7 +2404,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, return ret; io_cqring_overflow_flush(ctx); - if (io_cqring_events(ctx) >= min_events) + /* if user messes with these they will just get an early return */ + if (__io_cqring_events_user(ctx) >= min_events) return 0; } while (ret > 0); diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 25cd724ade18..e2c46889d5fa 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -346,6 +346,8 @@ int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe tmp = READ_ONCE(sqe->off); if (tmp > USHRT_MAX) return -E2BIG; + if (tmp + p->nbufs >= USHRT_MAX) + return -EINVAL; p->bid = tmp; return 0; } diff --git a/io_uring/poll.c b/io_uring/poll.c index 0d9f49c575e0..f500506984ec 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -116,6 +116,8 @@ static void io_poll_req_insert_locked(struct io_kiocb *req) struct io_hash_table *table = &req->ctx->cancel_table_locked; u32 index = hash_long(req->cqe.user_data, table->hash_bits); + lockdep_assert_held(&req->ctx->uring_lock); + hlist_add_head(&req->hash_node, &table->hbs[index].list); } @@ -394,7 +396,8 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, return 1; } -static void io_poll_double_prepare(struct io_kiocb *req) +/* fails only when polling is already completing by the first entry */ +static bool io_poll_double_prepare(struct io_kiocb *req) { struct wait_queue_head *head; struct io_poll *poll = io_poll_get_single(req); @@ -403,20 +406,20 @@ static void io_poll_double_prepare(struct io_kiocb *req) rcu_read_lock(); head = smp_load_acquire(&poll->head); /* - * poll arm may not hold ownership and so race with - * io_poll_wake() by modifying req->flags. There is only one - * poll entry queued, serialise with it by taking its head lock. + * poll arm might not hold ownership and so race for req->flags with + * io_poll_wake(). There is only one poll entry queued, serialise with + * it by taking its head lock. As we're still arming the tw hanlder + * is not going to be run, so there are no races with it. */ - if (head) + if (head) { spin_lock_irq(&head->lock); - - req->flags |= REQ_F_DOUBLE_POLL; - if (req->opcode == IORING_OP_POLL_ADD) - req->flags |= REQ_F_ASYNC_DATA; - - if (head) + req->flags |= REQ_F_DOUBLE_POLL; + if (req->opcode == IORING_OP_POLL_ADD) + req->flags |= REQ_F_ASYNC_DATA; spin_unlock_irq(&head->lock); + } rcu_read_unlock(); + return !!head; } static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt, @@ -454,7 +457,11 @@ static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt, /* mark as double wq entry */ wqe_private |= IO_WQE_F_DOUBLE; io_init_poll_iocb(poll, first->events, first->wait.func); - io_poll_double_prepare(req); + if (!io_poll_double_prepare(req)) { + /* the request is completing, just back off */ + kfree(poll); + return; + } *poll_ptr = poll; } else { /* fine to modify, there is no poll queued to race with us */ diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.sh b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh index 32aa6e9dacc2..9ac4456d48fc 100755 --- a/tools/testing/selftests/net/io_uring_zerocopy_tx.sh +++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh @@ -29,7 +29,7 @@ if [[ "$#" -eq "0" ]]; then for IP in "${IPs[@]}"; do for mode in $(seq 1 3); do $0 "$IP" udp -m "$mode" -t 1 -n 32 - $0 "$IP" tcp -m "$mode" -t 1 -n 32 + $0 "$IP" tcp -m "$mode" -t 1 -n 1 done done