2019-05-29 23:57:49 +00:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
2013-06-10 08:39:50 +00:00
|
|
|
/*
|
2013-07-10 14:13:26 +00:00
|
|
|
* net busy poll support
|
2013-06-10 08:39:50 +00:00
|
|
|
* Copyright(c) 2013 Intel Corporation.
|
|
|
|
*
|
|
|
|
* Author: Eliezer Tamir
|
|
|
|
*
|
|
|
|
* Contact Information:
|
|
|
|
* e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
|
|
|
|
*/
|
|
|
|
|
2013-07-10 14:13:26 +00:00
|
|
|
#ifndef _LINUX_NET_BUSY_POLL_H
|
|
|
|
#define _LINUX_NET_BUSY_POLL_H
|
2013-06-10 08:39:50 +00:00
|
|
|
|
|
|
|
#include <linux/netdevice.h>
|
2017-02-01 15:36:40 +00:00
|
|
|
#include <linux/sched/clock.h>
|
2017-02-02 18:15:33 +00:00
|
|
|
#include <linux/sched/signal.h>
|
2013-06-10 08:39:50 +00:00
|
|
|
#include <net/ip.h>
|
|
|
|
|
2017-03-24 17:07:53 +00:00
|
|
|
/* 0 - Reserved to indicate value not set
|
|
|
|
* 1..NR_CPUS - Reserved for sender_cpu
|
|
|
|
* NR_CPUS+1..~0 - Region available for NAPI IDs
|
|
|
|
*/
|
|
|
|
#define MIN_NAPI_ID ((unsigned int)(NR_CPUS + 1))
|
|
|
|
|
2020-11-30 18:51:57 +00:00
|
|
|
#define BUSY_POLL_BUDGET 8
|
|
|
|
|
2017-08-11 16:31:24 +00:00
|
|
|
#ifdef CONFIG_NET_RX_BUSY_POLL
|
|
|
|
|
|
|
|
struct napi_struct;
|
|
|
|
extern unsigned int sysctl_net_busy_read __read_mostly;
|
|
|
|
extern unsigned int sysctl_net_busy_poll __read_mostly;
|
|
|
|
|
2013-07-08 13:20:34 +00:00
|
|
|
static inline bool net_busy_loop_on(void)
|
2013-06-28 12:59:35 +00:00
|
|
|
{
|
2013-07-10 14:13:36 +00:00
|
|
|
return sysctl_net_busy_poll;
|
2013-06-28 12:59:35 +00:00
|
|
|
}
|
|
|
|
|
2017-03-24 17:08:18 +00:00
|
|
|
static inline bool sk_can_busy_loop(const struct sock *sk)
|
2013-06-28 12:59:26 +00:00
|
|
|
{
|
2021-06-29 14:12:45 +00:00
|
|
|
return READ_ONCE(sk->sk_ll_usec) && !signal_pending(current);
|
2013-06-28 12:59:26 +00:00
|
|
|
}
|
|
|
|
|
2017-03-24 17:08:24 +00:00
|
|
|
bool sk_busy_loop_end(void *p, unsigned long start_time);
|
|
|
|
|
|
|
|
void napi_busy_loop(unsigned int napi_id,
|
|
|
|
bool (*loop_end)(void *, unsigned long),
|
2020-11-30 18:51:57 +00:00
|
|
|
void *loop_end_arg, bool prefer_busy_poll, u16 budget);
|
2013-06-14 13:33:35 +00:00
|
|
|
|
2017-03-24 17:08:18 +00:00
|
|
|
#else /* CONFIG_NET_RX_BUSY_POLL */
|
|
|
|
static inline unsigned long net_busy_loop_on(void)
|
2013-07-02 20:22:47 +00:00
|
|
|
{
|
2017-03-24 17:08:18 +00:00
|
|
|
return 0;
|
2013-06-10 08:39:50 +00:00
|
|
|
}
|
|
|
|
|
2017-03-24 17:08:18 +00:00
|
|
|
static inline bool sk_can_busy_loop(struct sock *sk)
|
2013-06-10 08:39:50 +00:00
|
|
|
{
|
2017-03-24 17:08:18 +00:00
|
|
|
return false;
|
2013-06-10 08:39:50 +00:00
|
|
|
}
|
|
|
|
|
2017-03-24 17:08:18 +00:00
|
|
|
#endif /* CONFIG_NET_RX_BUSY_POLL */
|
2013-06-10 08:39:50 +00:00
|
|
|
|
2017-03-24 17:08:18 +00:00
|
|
|
static inline unsigned long busy_loop_current_time(void)
|
2013-06-28 12:59:35 +00:00
|
|
|
{
|
2017-03-24 17:08:18 +00:00
|
|
|
#ifdef CONFIG_NET_RX_BUSY_POLL
|
|
|
|
return (unsigned long)(local_clock() >> 10);
|
|
|
|
#else
|
2013-06-28 12:59:35 +00:00
|
|
|
return 0;
|
2017-03-24 17:08:18 +00:00
|
|
|
#endif
|
2013-06-28 12:59:35 +00:00
|
|
|
}
|
2013-06-10 08:39:50 +00:00
|
|
|
|
2017-03-24 17:08:18 +00:00
|
|
|
/* in poll/select we use the global sysctl_net_ll_poll value */
|
|
|
|
static inline bool busy_loop_timeout(unsigned long start_time)
|
2013-06-10 08:39:50 +00:00
|
|
|
{
|
2017-03-24 17:08:18 +00:00
|
|
|
#ifdef CONFIG_NET_RX_BUSY_POLL
|
|
|
|
unsigned long bp_usec = READ_ONCE(sysctl_net_busy_poll);
|
2013-06-10 08:39:50 +00:00
|
|
|
|
2017-03-24 17:08:18 +00:00
|
|
|
if (bp_usec) {
|
|
|
|
unsigned long end_time = start_time + bp_usec;
|
|
|
|
unsigned long now = busy_loop_current_time();
|
2013-06-10 08:39:50 +00:00
|
|
|
|
2017-03-24 17:08:18 +00:00
|
|
|
return time_after(now, end_time);
|
|
|
|
}
|
|
|
|
#endif
|
2013-07-09 10:09:21 +00:00
|
|
|
return true;
|
2013-06-10 08:39:50 +00:00
|
|
|
}
|
|
|
|
|
2017-03-24 17:08:18 +00:00
|
|
|
static inline bool sk_busy_loop_timeout(struct sock *sk,
|
|
|
|
unsigned long start_time)
|
2013-08-01 03:10:24 +00:00
|
|
|
{
|
2017-03-24 17:08:18 +00:00
|
|
|
#ifdef CONFIG_NET_RX_BUSY_POLL
|
|
|
|
unsigned long bp_usec = READ_ONCE(sk->sk_ll_usec);
|
2013-08-01 03:10:24 +00:00
|
|
|
|
2017-03-24 17:08:18 +00:00
|
|
|
if (bp_usec) {
|
|
|
|
unsigned long end_time = start_time + bp_usec;
|
|
|
|
unsigned long now = busy_loop_current_time();
|
|
|
|
|
|
|
|
return time_after(now, end_time);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
return true;
|
|
|
|
}
|
2016-11-16 17:10:42 +00:00
|
|
|
|
2017-03-24 17:08:24 +00:00
|
|
|
static inline void sk_busy_loop(struct sock *sk, int nonblock)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_NET_RX_BUSY_POLL
|
|
|
|
unsigned int napi_id = READ_ONCE(sk->sk_napi_id);
|
|
|
|
|
|
|
|
if (napi_id >= MIN_NAPI_ID)
|
net: Introduce preferred busy-polling
The existing busy-polling mode, enabled by the SO_BUSY_POLL socket
option or system-wide using the /proc/sys/net/core/busy_read knob, is
an opportunistic. That means that if the NAPI context is not
scheduled, it will poll it. If, after busy-polling, the budget is
exceeded the busy-polling logic will schedule the NAPI onto the
regular softirq handling.
One implication of the behavior above is that a busy/heavy loaded NAPI
context will never enter/allow for busy-polling. Some applications
prefer that most NAPI processing would be done by busy-polling.
This series adds a new socket option, SO_PREFER_BUSY_POLL, that works
in concert with the napi_defer_hard_irqs and gro_flush_timeout
knobs. The napi_defer_hard_irqs and gro_flush_timeout knobs were
introduced in commit 6f8b12d661d0 ("net: napi: add hard irqs deferral
feature"), and allows for a user to defer interrupts to be enabled and
instead schedule the NAPI context from a watchdog timer. When a user
enables the SO_PREFER_BUSY_POLL, again with the other knobs enabled,
and the NAPI context is being processed by a softirq, the softirq NAPI
processing will exit early to allow the busy-polling to be performed.
If the application stops performing busy-polling via a system call,
the watchdog timer defined by gro_flush_timeout will timeout, and
regular softirq handling will resume.
In summary; Heavy traffic applications that prefer busy-polling over
softirq processing should use this option.
Example usage:
$ echo 2 | sudo tee /sys/class/net/ens785f1/napi_defer_hard_irqs
$ echo 200000 | sudo tee /sys/class/net/ens785f1/gro_flush_timeout
Note that the timeout should be larger than the userspace processing
window, otherwise the watchdog will timeout and fall back to regular
softirq processing.
Enable the SO_BUSY_POLL/SO_PREFER_BUSY_POLL options on your socket.
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/bpf/20201130185205.196029-2-bjorn.topel@gmail.com
2020-11-30 18:51:56 +00:00
|
|
|
napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk,
|
2020-11-30 18:51:57 +00:00
|
|
|
READ_ONCE(sk->sk_prefer_busy_poll),
|
|
|
|
READ_ONCE(sk->sk_busy_poll_budget) ?: BUSY_POLL_BUDGET);
|
2017-03-24 17:08:24 +00:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2017-03-24 17:08:06 +00:00
|
|
|
/* used in the NIC receive handler to mark the skb */
|
|
|
|
static inline void skb_mark_napi_id(struct sk_buff *skb,
|
|
|
|
struct napi_struct *napi)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_NET_RX_BUSY_POLL
|
2020-06-18 21:22:15 +00:00
|
|
|
/* If the skb was already marked with a valid NAPI ID, avoid overwriting
|
|
|
|
* it.
|
|
|
|
*/
|
|
|
|
if (skb->napi_id < MIN_NAPI_ID)
|
|
|
|
skb->napi_id = napi->napi_id;
|
2017-03-24 17:08:06 +00:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2016-11-16 17:10:42 +00:00
|
|
|
/* used in the protocol hanlder to propagate the napi_id to the socket */
|
|
|
|
static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_NET_RX_BUSY_POLL
|
2021-10-25 16:48:18 +00:00
|
|
|
if (unlikely(READ_ONCE(sk->sk_napi_id) != skb->napi_id))
|
|
|
|
WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
|
2016-11-16 17:10:42 +00:00
|
|
|
#endif
|
2021-11-30 18:29:39 +00:00
|
|
|
sk_rx_queue_update(sk, skb);
|
2016-11-16 17:10:42 +00:00
|
|
|
}
|
|
|
|
|
2021-12-02 23:37:24 +00:00
|
|
|
/* Variant of sk_mark_napi_id() for passive flow setup,
|
|
|
|
* as sk->sk_napi_id and sk->sk_rx_queue_mapping content
|
|
|
|
* needs to be set.
|
|
|
|
*/
|
|
|
|
static inline void sk_mark_napi_id_set(struct sock *sk,
|
|
|
|
const struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_NET_RX_BUSY_POLL
|
|
|
|
WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
|
|
|
|
#endif
|
|
|
|
sk_rx_queue_set(sk, skb);
|
|
|
|
}
|
|
|
|
|
2020-12-01 14:22:59 +00:00
|
|
|
static inline void __sk_mark_napi_id_once(struct sock *sk, unsigned int napi_id)
|
2016-11-16 17:10:42 +00:00
|
|
|
{
|
|
|
|
#ifdef CONFIG_NET_RX_BUSY_POLL
|
2019-10-29 17:54:44 +00:00
|
|
|
if (!READ_ONCE(sk->sk_napi_id))
|
2020-11-30 18:52:01 +00:00
|
|
|
WRITE_ONCE(sk->sk_napi_id, napi_id);
|
2016-11-16 17:10:42 +00:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2020-11-30 18:52:01 +00:00
|
|
|
/* variant used for unconnected sockets */
|
|
|
|
static inline void sk_mark_napi_id_once(struct sock *sk,
|
|
|
|
const struct sk_buff *skb)
|
|
|
|
{
|
2020-12-01 14:22:59 +00:00
|
|
|
#ifdef CONFIG_NET_RX_BUSY_POLL
|
|
|
|
__sk_mark_napi_id_once(sk, skb->napi_id);
|
|
|
|
#endif
|
2020-11-30 18:52:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void sk_mark_napi_id_once_xdp(struct sock *sk,
|
|
|
|
const struct xdp_buff *xdp)
|
|
|
|
{
|
2020-12-01 14:22:59 +00:00
|
|
|
#ifdef CONFIG_NET_RX_BUSY_POLL
|
|
|
|
__sk_mark_napi_id_once(sk, xdp->rxq->napi_id);
|
|
|
|
#endif
|
2020-11-30 18:52:01 +00:00
|
|
|
}
|
|
|
|
|
2013-07-10 14:13:26 +00:00
|
|
|
#endif /* _LINUX_NET_BUSY_POLL_H */
|