forked from Minki/linux
0fc8c2acbf
io_cqring_wait (and it's wake function io_has_work) used cached_cq_tail in
order to calculate the number of CQEs. cached_cq_tail is set strictly
before the user visible rings->cq.tail
However as far as userspace is concerned, if io_uring_enter(2) is called
with a minimum number of events, they will verify by checking
rings->cq.tail.
It is therefore possible for io_uring_enter(2) to return early with fewer
events visible to the user.
Instead make the wait functions read from the user visible value, so there
will be no discrepency.
This is triggered eventually by the following reproducer:
struct io_uring_sqe *sqe;
struct io_uring_cqe *cqe;
unsigned int cqe_ready;
struct io_uring ring;
int ret, i;
ret = io_uring_queue_init(N, &ring, 0);
assert(!ret);
while(true) {
for (i = 0; i < N; i++) {
sqe = io_uring_get_sqe(&ring);
io_uring_prep_nop(sqe);
sqe->flags |= IOSQE_ASYNC;
}
ret = io_uring_submit(&ring);
assert(ret == N);
do {
ret = io_uring_wait_cqes(&ring, &cqe, N, NULL, NULL);
} while(ret == -EINTR);
cqe_ready = io_uring_cq_ready(&ring);
assert(!ret);
assert(cqe_ready == N);
io_uring_cq_advance(&ring, N);
}
Fixes:
|
||
---|---|---|
.. | ||
advise.c | ||
advise.h | ||
alloc_cache.h | ||
cancel.c | ||
cancel.h | ||
epoll.c | ||
epoll.h | ||
fdinfo.c | ||
fdinfo.h | ||
filetable.c | ||
filetable.h | ||
fs.c | ||
fs.h | ||
io_uring.c | ||
io_uring.h | ||
io-wq.c | ||
io-wq.h | ||
kbuf.c | ||
kbuf.h | ||
Makefile | ||
msg_ring.c | ||
msg_ring.h | ||
net.c | ||
net.h | ||
nop.c | ||
nop.h | ||
notif.c | ||
notif.h | ||
opdef.c | ||
opdef.h | ||
openclose.c | ||
openclose.h | ||
poll.c | ||
poll.h | ||
refs.h | ||
rsrc.c | ||
rsrc.h | ||
rw.c | ||
rw.h | ||
slist.h | ||
splice.c | ||
splice.h | ||
sqpoll.c | ||
sqpoll.h | ||
statx.c | ||
statx.h | ||
sync.c | ||
sync.h | ||
tctx.c | ||
tctx.h | ||
timeout.c | ||
timeout.h | ||
uring_cmd.c | ||
uring_cmd.h | ||
xattr.c | ||
xattr.h |