Alexei Starovoitov says:
====================
pull-request: bpf-next 2020-12-03
The main changes are:
1) Support BTF in kernel modules, from Andrii.
2) Introduce preferred busy-polling, from Björn.
3) bpf_ima_inode_hash() and bpf_bprm_opts_set() helpers, from KP Singh.
4) Memcg-based memory accounting for bpf objects, from Roman.
5) Allow bpf_{s,g}etsockopt from cgroup bind{4,6} hooks, from Stanislav.
* https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (118 commits)
selftests/bpf: Fix invalid use of strncat in test_sockmap
libbpf: Use memcpy instead of strncpy to please GCC
selftests/bpf: Add fentry/fexit/fmod_ret selftest for kernel module
selftests/bpf: Add tp_btf CO-RE reloc test for modules
libbpf: Support attachment of BPF tracing programs to kernel modules
libbpf: Factor out low-level BPF program loading helper
bpf: Allow to specify kernel module BTFs when attaching BPF programs
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
selftests/bpf: Add CO-RE relocs selftest relying on kernel module BTF
selftests/bpf: Add support for marking sub-tests as skipped
selftests/bpf: Add bpf_testmod kernel module for testing
libbpf: Add kernel module BTF support for CO-RE relocations
libbpf: Refactor CO-RE relocs to not assume a single BTF object
libbpf: Add internal helper to load BTF data by FD
bpf: Keep module's btf_data_size intact after load
bpf: Fix bpf_put_raw_tracepoint()'s use of __module_address()
selftests/bpf: Add Userspace tests for TCP_WINDOW_CLAMP
bpf: Adds support for setting window clamp
samples/bpf: Fix spelling mistake "recieving" -> "receiving"
bpf: Fix cold build of test_progs-no_alu32
...
====================
Link: https://lore.kernel.org/r/20201204021936.85653-1-alexei.starovoitov@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
@@ -6448,7 +6448,8 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
|
||||
|
||||
WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
|
||||
|
||||
new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED);
|
||||
new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED |
|
||||
NAPIF_STATE_PREFER_BUSY_POLL);
|
||||
|
||||
/* If STATE_MISSED was set, leave STATE_SCHED set,
|
||||
* because we will call napi->poll() one more time.
|
||||
@@ -6485,10 +6486,30 @@ static struct napi_struct *napi_by_id(unsigned int napi_id)
|
||||
|
||||
#if defined(CONFIG_NET_RX_BUSY_POLL)
|
||||
|
||||
#define BUSY_POLL_BUDGET 8
|
||||
|
||||
static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
|
||||
static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
|
||||
{
|
||||
if (!skip_schedule) {
|
||||
gro_normal_list(napi);
|
||||
__napi_schedule(napi);
|
||||
return;
|
||||
}
|
||||
|
||||
if (napi->gro_bitmask) {
|
||||
/* flush too old packets
|
||||
* If HZ < 1000, flush all packets.
|
||||
*/
|
||||
napi_gro_flush(napi, HZ >= 1000);
|
||||
}
|
||||
|
||||
gro_normal_list(napi);
|
||||
clear_bit(NAPI_STATE_SCHED, &napi->state);
|
||||
}
|
||||
|
||||
static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool prefer_busy_poll,
|
||||
u16 budget)
|
||||
{
|
||||
bool skip_schedule = false;
|
||||
unsigned long timeout;
|
||||
int rc;
|
||||
|
||||
/* Busy polling means there is a high chance device driver hard irq
|
||||
@@ -6505,29 +6526,33 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
|
||||
|
||||
local_bh_disable();
|
||||
|
||||
if (prefer_busy_poll) {
|
||||
napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs);
|
||||
timeout = READ_ONCE(napi->dev->gro_flush_timeout);
|
||||
if (napi->defer_hard_irqs_count && timeout) {
|
||||
hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED);
|
||||
skip_schedule = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* All we really want here is to re-enable device interrupts.
|
||||
* Ideally, a new ndo_busy_poll_stop() could avoid another round.
|
||||
*/
|
||||
rc = napi->poll(napi, BUSY_POLL_BUDGET);
|
||||
rc = napi->poll(napi, budget);
|
||||
/* We can't gro_normal_list() here, because napi->poll() might have
|
||||
* rearmed the napi (napi_complete_done()) in which case it could
|
||||
* already be running on another CPU.
|
||||
*/
|
||||
trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
|
||||
trace_napi_poll(napi, rc, budget);
|
||||
netpoll_poll_unlock(have_poll_lock);
|
||||
if (rc == BUSY_POLL_BUDGET) {
|
||||
/* As the whole budget was spent, we still own the napi so can
|
||||
* safely handle the rx_list.
|
||||
*/
|
||||
gro_normal_list(napi);
|
||||
__napi_schedule(napi);
|
||||
}
|
||||
if (rc == budget)
|
||||
__busy_poll_stop(napi, skip_schedule);
|
||||
local_bh_enable();
|
||||
}
|
||||
|
||||
void napi_busy_loop(unsigned int napi_id,
|
||||
bool (*loop_end)(void *, unsigned long),
|
||||
void *loop_end_arg)
|
||||
void *loop_end_arg, bool prefer_busy_poll, u16 budget)
|
||||
{
|
||||
unsigned long start_time = loop_end ? busy_loop_current_time() : 0;
|
||||
int (*napi_poll)(struct napi_struct *napi, int budget);
|
||||
@@ -6555,17 +6580,23 @@ restart:
|
||||
* we avoid dirtying napi->state as much as we can.
|
||||
*/
|
||||
if (val & (NAPIF_STATE_DISABLE | NAPIF_STATE_SCHED |
|
||||
NAPIF_STATE_IN_BUSY_POLL))
|
||||
NAPIF_STATE_IN_BUSY_POLL)) {
|
||||
if (prefer_busy_poll)
|
||||
set_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
|
||||
goto count;
|
||||
}
|
||||
if (cmpxchg(&napi->state, val,
|
||||
val | NAPIF_STATE_IN_BUSY_POLL |
|
||||
NAPIF_STATE_SCHED) != val)
|
||||
NAPIF_STATE_SCHED) != val) {
|
||||
if (prefer_busy_poll)
|
||||
set_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
|
||||
goto count;
|
||||
}
|
||||
have_poll_lock = netpoll_poll_lock(napi);
|
||||
napi_poll = napi->poll;
|
||||
}
|
||||
work = napi_poll(napi, BUSY_POLL_BUDGET);
|
||||
trace_napi_poll(napi, work, BUSY_POLL_BUDGET);
|
||||
work = napi_poll(napi, budget);
|
||||
trace_napi_poll(napi, work, budget);
|
||||
gro_normal_list(napi);
|
||||
count:
|
||||
if (work > 0)
|
||||
@@ -6578,7 +6609,7 @@ count:
|
||||
|
||||
if (unlikely(need_resched())) {
|
||||
if (napi_poll)
|
||||
busy_poll_stop(napi, have_poll_lock);
|
||||
busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
|
||||
preempt_enable();
|
||||
rcu_read_unlock();
|
||||
cond_resched();
|
||||
@@ -6589,7 +6620,7 @@ count:
|
||||
cpu_relax();
|
||||
}
|
||||
if (napi_poll)
|
||||
busy_poll_stop(napi, have_poll_lock);
|
||||
busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
|
||||
preempt_enable();
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
@@ -6640,8 +6671,10 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
|
||||
* NAPI_STATE_MISSED, since we do not react to a device IRQ.
|
||||
*/
|
||||
if (!napi_disable_pending(napi) &&
|
||||
!test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
|
||||
!test_and_set_bit(NAPI_STATE_SCHED, &napi->state)) {
|
||||
clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
|
||||
__napi_schedule_irqoff(napi);
|
||||
}
|
||||
|
||||
return HRTIMER_NORESTART;
|
||||
}
|
||||
@@ -6699,6 +6732,7 @@ void napi_disable(struct napi_struct *n)
|
||||
|
||||
hrtimer_cancel(&n->timer);
|
||||
|
||||
clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state);
|
||||
clear_bit(NAPI_STATE_DISABLE, &n->state);
|
||||
}
|
||||
EXPORT_SYMBOL(napi_disable);
|
||||
@@ -6771,6 +6805,19 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/* The NAPI context has more processing work, but busy-polling
|
||||
* is preferred. Exit early.
|
||||
*/
|
||||
if (napi_prefer_busy_poll(n)) {
|
||||
if (napi_complete_done(n, work)) {
|
||||
/* If timeout is not set, we need to make sure
|
||||
* that the NAPI is re-scheduled.
|
||||
*/
|
||||
napi_schedule(n);
|
||||
}
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (n->gro_bitmask) {
|
||||
/* flush too old packets
|
||||
* If HZ < 1000, flush all packets.
|
||||
@@ -9753,7 +9800,7 @@ static int netif_alloc_rx_queues(struct net_device *dev)
|
||||
rx[i].dev = dev;
|
||||
|
||||
/* XDP RX-queue setup */
|
||||
err = xdp_rxq_info_reg(&rx[i].xdp_rxq, dev, i);
|
||||
err = xdp_rxq_info_reg(&rx[i].xdp_rxq, dev, i, 0);
|
||||
if (err < 0)
|
||||
goto err_rxq_info;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user