io_uring/napi: use ktime in busy polling

It's more natural to use ktime/ns instead of keeping around usec,
especially since we're comparing it against user provided timers,
so convert napi busy poll internal handling to ktime. It's also nicer
since the type (ktime_t vs unsigned long) now tells the unit of measure.

Keep everything as ktime, which we convert to/from micro seconds for
IORING_[UN]REGISTER_NAPI. The net/ busy polling works seems to work with
usec, however it's not real usec as shift by 10 is used to get it from
nsecs, see busy_loop_current_time(), so it's easy to get truncated nsec
back and we get back better precision.

Note, we can further improve it later by removing the truncation and
maybe convincing net/ to use ktime/ns instead.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/95e7ec8d095069a3ed5d40a4bc6f8b586698bc7e.1722003776.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Pavel Begunkov 2024-07-26 15:24:30 +01:00 committed by Jens Axboe
parent 0db4618e8f
commit 342b2e395d
4 changed files with 30 additions and 24 deletions

View File

@ -404,7 +404,7 @@ struct io_ring_ctx {
spinlock_t napi_lock; /* napi_list lock */ spinlock_t napi_lock; /* napi_list lock */
/* napi busy poll default timeout */ /* napi busy poll default timeout */
unsigned int napi_busy_poll_to; ktime_t napi_busy_poll_dt;
bool napi_prefer_busy_poll; bool napi_prefer_busy_poll;
bool napi_enabled; bool napi_enabled;

View File

@ -43,7 +43,7 @@ struct io_wait_queue {
ktime_t timeout; ktime_t timeout;
#ifdef CONFIG_NET_RX_BUSY_POLL #ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int napi_busy_poll_to; ktime_t napi_busy_poll_dt;
bool napi_prefer_busy_poll; bool napi_prefer_busy_poll;
#endif #endif
}; };

View File

@ -33,6 +33,12 @@ static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list,
return NULL; return NULL;
} }
static inline ktime_t net_to_ktime(unsigned long t)
{
/* napi approximating usecs, reverse busy_loop_current_time */
return ns_to_ktime(t << 10);
}
void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock) void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock)
{ {
struct hlist_head *hash_list; struct hlist_head *hash_list;
@ -102,14 +108,14 @@ static inline void io_napi_remove_stale(struct io_ring_ctx *ctx, bool is_stale)
__io_napi_remove_stale(ctx); __io_napi_remove_stale(ctx);
} }
static inline bool io_napi_busy_loop_timeout(unsigned long start_time, static inline bool io_napi_busy_loop_timeout(ktime_t start_time,
unsigned long bp_usec) ktime_t bp)
{ {
if (bp_usec) { if (bp) {
unsigned long end_time = start_time + bp_usec; ktime_t end_time = ktime_add(start_time, bp);
unsigned long now = busy_loop_current_time(); ktime_t now = net_to_ktime(busy_loop_current_time());
return time_after(now, end_time); return ktime_after(now, end_time);
} }
return true; return true;
@ -124,7 +130,8 @@ static bool io_napi_busy_loop_should_end(void *data,
return true; return true;
if (io_should_wake(iowq) || io_has_work(iowq->ctx)) if (io_should_wake(iowq) || io_has_work(iowq->ctx))
return true; return true;
if (io_napi_busy_loop_timeout(start_time, iowq->napi_busy_poll_to)) if (io_napi_busy_loop_timeout(net_to_ktime(start_time),
iowq->napi_busy_poll_dt))
return true; return true;
return false; return false;
@ -181,10 +188,12 @@ static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
*/ */
void io_napi_init(struct io_ring_ctx *ctx) void io_napi_init(struct io_ring_ctx *ctx)
{ {
u64 sys_dt = READ_ONCE(sysctl_net_busy_poll) * NSEC_PER_USEC;
INIT_LIST_HEAD(&ctx->napi_list); INIT_LIST_HEAD(&ctx->napi_list);
spin_lock_init(&ctx->napi_lock); spin_lock_init(&ctx->napi_lock);
ctx->napi_prefer_busy_poll = false; ctx->napi_prefer_busy_poll = false;
ctx->napi_busy_poll_to = READ_ONCE(sysctl_net_busy_poll); ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt);
} }
/* /*
@ -217,7 +226,7 @@ void io_napi_free(struct io_ring_ctx *ctx)
int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
{ {
const struct io_uring_napi curr = { const struct io_uring_napi curr = {
.busy_poll_to = ctx->napi_busy_poll_to, .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt),
.prefer_busy_poll = ctx->napi_prefer_busy_poll .prefer_busy_poll = ctx->napi_prefer_busy_poll
}; };
struct io_uring_napi napi; struct io_uring_napi napi;
@ -232,7 +241,7 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
if (copy_to_user(arg, &curr, sizeof(curr))) if (copy_to_user(arg, &curr, sizeof(curr)))
return -EFAULT; return -EFAULT;
WRITE_ONCE(ctx->napi_busy_poll_to, napi.busy_poll_to); WRITE_ONCE(ctx->napi_busy_poll_dt, napi.busy_poll_to * NSEC_PER_USEC);
WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll); WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll);
WRITE_ONCE(ctx->napi_enabled, true); WRITE_ONCE(ctx->napi_enabled, true);
return 0; return 0;
@ -249,14 +258,14 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
{ {
const struct io_uring_napi curr = { const struct io_uring_napi curr = {
.busy_poll_to = ctx->napi_busy_poll_to, .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt),
.prefer_busy_poll = ctx->napi_prefer_busy_poll .prefer_busy_poll = ctx->napi_prefer_busy_poll
}; };
if (arg && copy_to_user(arg, &curr, sizeof(curr))) if (arg && copy_to_user(arg, &curr, sizeof(curr)))
return -EFAULT; return -EFAULT;
WRITE_ONCE(ctx->napi_busy_poll_to, 0); WRITE_ONCE(ctx->napi_busy_poll_dt, 0);
WRITE_ONCE(ctx->napi_prefer_busy_poll, false); WRITE_ONCE(ctx->napi_prefer_busy_poll, false);
WRITE_ONCE(ctx->napi_enabled, false); WRITE_ONCE(ctx->napi_enabled, false);
return 0; return 0;
@ -275,23 +284,20 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq,
struct timespec64 *ts) struct timespec64 *ts)
{ {
unsigned int poll_to = READ_ONCE(ctx->napi_busy_poll_to); ktime_t poll_dt = READ_ONCE(ctx->napi_busy_poll_dt);
if (ts) { if (ts) {
struct timespec64 poll_to_ts; struct timespec64 poll_to_ts;
poll_to_ts = ns_to_timespec64(1000 * (s64)poll_to); poll_to_ts = ns_to_timespec64(ktime_to_ns(poll_dt));
if (timespec64_compare(ts, &poll_to_ts) < 0) { if (timespec64_compare(ts, &poll_to_ts) < 0) {
s64 poll_to_ns = timespec64_to_ns(ts); s64 poll_to_ns = timespec64_to_ns(ts);
if (poll_to_ns > 0) { if (poll_to_ns > 0)
u64 val = poll_to_ns + 999; poll_dt = ns_to_ktime(poll_to_ns);
do_div(val, 1000);
poll_to = val;
}
} }
} }
iowq->napi_busy_poll_to = poll_to; iowq->napi_busy_poll_dt = poll_dt;
} }
/* /*
@ -320,7 +326,7 @@ int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx)
LIST_HEAD(napi_list); LIST_HEAD(napi_list);
bool is_stale = false; bool is_stale = false;
if (!READ_ONCE(ctx->napi_busy_poll_to)) if (!READ_ONCE(ctx->napi_busy_poll_dt))
return 0; return 0;
if (list_empty_careful(&ctx->napi_list)) if (list_empty_careful(&ctx->napi_list))
return 0; return 0;

View File

@ -55,7 +55,7 @@ static inline void io_napi_add(struct io_kiocb *req)
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
struct socket *sock; struct socket *sock;
if (!READ_ONCE(ctx->napi_busy_poll_to)) if (!READ_ONCE(ctx->napi_busy_poll_dt))
return; return;
sock = sock_from_file(req->file); sock = sock_from_file(req->file);