Merge branch 'net-skb-defer-freeing-polish'
Eric Dumazet says: ==================== net: polish skb defer freeing While testing this recently added feature on a variety of platforms/configurations, I found the following issues: 1) A race leading to concurrent calls to smp_call_function_single_async() 2) Missed opportunity to use napi_consume_skb() 3) Need to limit the max length of the per-cpu lists. 4) Process the per-cpu list more frequently, for the (unusual) case where net_rx_action() has mutiple napi_poll() to process per round. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
ee3398c787
@ -322,6 +322,14 @@ a leaked reference faster. A larger value may be useful to prevent false
|
||||
warnings on slow/loaded systems.
|
||||
Default value is 10, minimum 1, maximum 3600.
|
||||
|
||||
skb_defer_max
|
||||
-------------
|
||||
|
||||
Max size (in skbs) of the per-cpu list of skbs being freed
|
||||
by the cpu which allocated them. Used by TCP stack so far.
|
||||
|
||||
Default: 64
|
||||
|
||||
optmem_max
|
||||
----------
|
||||
|
||||
|
@ -3136,6 +3136,7 @@ struct softnet_data {
|
||||
/* Another possibly contended cache line */
|
||||
spinlock_t defer_lock ____cacheline_aligned_in_smp;
|
||||
int defer_count;
|
||||
int defer_ipi_scheduled;
|
||||
struct sk_buff *defer_list;
|
||||
call_single_data_t defer_csd;
|
||||
};
|
||||
|
@ -4330,6 +4330,7 @@ int netdev_max_backlog __read_mostly = 1000;
|
||||
EXPORT_SYMBOL(netdev_max_backlog);
|
||||
|
||||
int netdev_tstamp_prequeue __read_mostly = 1;
|
||||
unsigned int sysctl_skb_defer_max __read_mostly = 64;
|
||||
int netdev_budget __read_mostly = 300;
|
||||
/* Must be at least 2 jiffes to guarantee 1 jiffy timeout */
|
||||
unsigned int __read_mostly netdev_budget_usecs = 2 * USEC_PER_SEC / HZ;
|
||||
@ -4582,9 +4583,12 @@ static void rps_trigger_softirq(void *data)
|
||||
#endif /* CONFIG_RPS */
|
||||
|
||||
/* Called from hardirq (IPI) context */
|
||||
static void trigger_rx_softirq(void *data __always_unused)
|
||||
static void trigger_rx_softirq(void *data)
|
||||
{
|
||||
struct softnet_data *sd = data;
|
||||
|
||||
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
|
||||
smp_store_release(&sd->defer_ipi_scheduled, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -6630,7 +6634,7 @@ static void skb_defer_free_flush(struct softnet_data *sd)
|
||||
|
||||
while (skb != NULL) {
|
||||
next = skb->next;
|
||||
__kfree_skb(skb);
|
||||
napi_consume_skb(skb, 1);
|
||||
skb = next;
|
||||
}
|
||||
}
|
||||
@ -6651,6 +6655,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
|
||||
for (;;) {
|
||||
struct napi_struct *n;
|
||||
|
||||
skb_defer_free_flush(sd);
|
||||
|
||||
if (list_empty(&list)) {
|
||||
if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
|
||||
goto end;
|
||||
@ -6680,8 +6686,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
|
||||
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
|
||||
|
||||
net_rps_action_and_irq_enable(sd);
|
||||
end:
|
||||
skb_defer_free_flush(sd);
|
||||
end:;
|
||||
}
|
||||
|
||||
struct netdev_adjacent {
|
||||
@ -11382,7 +11387,7 @@ static int __init net_dev_init(void)
|
||||
INIT_CSD(&sd->csd, rps_trigger_softirq, sd);
|
||||
sd->cpu = i;
|
||||
#endif
|
||||
INIT_CSD(&sd->defer_csd, trigger_rx_softirq, NULL);
|
||||
INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd);
|
||||
spin_lock_init(&sd->defer_lock);
|
||||
|
||||
init_gro_hash(&sd->backlog);
|
||||
|
@ -39,7 +39,7 @@ void dev_addr_check(struct net_device *dev);
|
||||
/* sysctls not referred to from outside net/core/ */
|
||||
extern int netdev_budget;
|
||||
extern unsigned int netdev_budget_usecs;
|
||||
|
||||
extern unsigned int sysctl_skb_defer_max;
|
||||
extern int netdev_tstamp_prequeue;
|
||||
extern int netdev_unregister_timeout_secs;
|
||||
extern int weight_p;
|
||||
|
@ -80,6 +80,7 @@
|
||||
#include <linux/user_namespace.h>
|
||||
#include <linux/indirect_call_wrapper.h>
|
||||
|
||||
#include "dev.h"
|
||||
#include "sock_destructor.h"
|
||||
|
||||
struct kmem_cache *skbuff_head_cache __ro_after_init;
|
||||
@ -6496,16 +6497,21 @@ void skb_attempt_defer_free(struct sk_buff *skb)
|
||||
int cpu = skb->alloc_cpu;
|
||||
struct softnet_data *sd;
|
||||
unsigned long flags;
|
||||
unsigned int defer_max;
|
||||
bool kick;
|
||||
|
||||
if (WARN_ON_ONCE(cpu >= nr_cpu_ids) ||
|
||||
!cpu_online(cpu) ||
|
||||
cpu == raw_smp_processor_id()) {
|
||||
__kfree_skb(skb);
|
||||
nodefer: __kfree_skb(skb);
|
||||
return;
|
||||
}
|
||||
|
||||
sd = &per_cpu(softnet_data, cpu);
|
||||
defer_max = READ_ONCE(sysctl_skb_defer_max);
|
||||
if (READ_ONCE(sd->defer_count) >= defer_max)
|
||||
goto nodefer;
|
||||
|
||||
/* We do not send an IPI or any signal.
|
||||
* Remote cpu will eventually call skb_defer_free_flush()
|
||||
*/
|
||||
@ -6515,18 +6521,14 @@ void skb_attempt_defer_free(struct sk_buff *skb)
|
||||
WRITE_ONCE(sd->defer_list, skb);
|
||||
sd->defer_count++;
|
||||
|
||||
/* kick every time queue length reaches 128.
|
||||
* This should avoid blocking in smp_call_function_single_async().
|
||||
* This condition should hardly be bit under normal conditions,
|
||||
* unless cpu suddenly stopped to receive NIC interrupts.
|
||||
*/
|
||||
kick = sd->defer_count == 128;
|
||||
/* Send an IPI every time queue reaches half capacity. */
|
||||
kick = sd->defer_count == (defer_max >> 1);
|
||||
|
||||
spin_unlock_irqrestore(&sd->defer_lock, flags);
|
||||
|
||||
/* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU
|
||||
* if we are unlucky enough (this seems very unlikely).
|
||||
*/
|
||||
if (unlikely(kick))
|
||||
if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1))
|
||||
smp_call_function_single_async(cpu, &sd->defer_csd);
|
||||
}
|
||||
|
@ -578,6 +578,14 @@ static struct ctl_table net_core_table[] = {
|
||||
.extra1 = SYSCTL_ONE,
|
||||
.extra2 = &int_3600,
|
||||
},
|
||||
{
|
||||
.procname = "skb_defer_max",
|
||||
.data = &sysctl_skb_defer_max,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
},
|
||||
{ }
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user