forked from Minki/linux
net: add skb_defer_max sysctl
commit 68822bdf76
("net: generalize skb freeing
deferral to per-cpu lists") added another per-cpu
cache of skbs. It was expected to be small,
and an IPI was forced whenever the list reached 128
skbs.
We might need to be able to control more precisely
queue capacity and added latency.
An IPI is generated whenever queue reaches half capacity.
Default value of the new limit is 64.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
2db60eed1a
commit
39564c3fdc
@ -322,6 +322,14 @@ a leaked reference faster. A larger value may be useful to prevent false
|
|||||||
warnings on slow/loaded systems.
|
warnings on slow/loaded systems.
|
||||||
Default value is 10, minimum 1, maximum 3600.
|
Default value is 10, minimum 1, maximum 3600.
|
||||||
|
|
||||||
|
skb_defer_max
|
||||||
|
-------------
|
||||||
|
|
||||||
|
Max size (in skbs) of the per-cpu list of skbs being freed
|
||||||
|
by the cpu which allocated them. Used by TCP stack so far.
|
||||||
|
|
||||||
|
Default: 64
|
||||||
|
|
||||||
optmem_max
|
optmem_max
|
||||||
----------
|
----------
|
||||||
|
|
||||||
|
@ -4330,6 +4330,7 @@ int netdev_max_backlog __read_mostly = 1000;
|
|||||||
EXPORT_SYMBOL(netdev_max_backlog);
|
EXPORT_SYMBOL(netdev_max_backlog);
|
||||||
|
|
||||||
int netdev_tstamp_prequeue __read_mostly = 1;
|
int netdev_tstamp_prequeue __read_mostly = 1;
|
||||||
|
unsigned int sysctl_skb_defer_max __read_mostly = 64;
|
||||||
int netdev_budget __read_mostly = 300;
|
int netdev_budget __read_mostly = 300;
|
||||||
/* Must be at least 2 jiffes to guarantee 1 jiffy timeout */
|
/* Must be at least 2 jiffes to guarantee 1 jiffy timeout */
|
||||||
unsigned int __read_mostly netdev_budget_usecs = 2 * USEC_PER_SEC / HZ;
|
unsigned int __read_mostly netdev_budget_usecs = 2 * USEC_PER_SEC / HZ;
|
||||||
|
@ -39,7 +39,7 @@ void dev_addr_check(struct net_device *dev);
|
|||||||
/* sysctls not referred to from outside net/core/ */
|
/* sysctls not referred to from outside net/core/ */
|
||||||
extern int netdev_budget;
|
extern int netdev_budget;
|
||||||
extern unsigned int netdev_budget_usecs;
|
extern unsigned int netdev_budget_usecs;
|
||||||
|
extern unsigned int sysctl_skb_defer_max;
|
||||||
extern int netdev_tstamp_prequeue;
|
extern int netdev_tstamp_prequeue;
|
||||||
extern int netdev_unregister_timeout_secs;
|
extern int netdev_unregister_timeout_secs;
|
||||||
extern int weight_p;
|
extern int weight_p;
|
||||||
|
@ -80,6 +80,7 @@
|
|||||||
#include <linux/user_namespace.h>
|
#include <linux/user_namespace.h>
|
||||||
#include <linux/indirect_call_wrapper.h>
|
#include <linux/indirect_call_wrapper.h>
|
||||||
|
|
||||||
|
#include "dev.h"
|
||||||
#include "sock_destructor.h"
|
#include "sock_destructor.h"
|
||||||
|
|
||||||
struct kmem_cache *skbuff_head_cache __ro_after_init;
|
struct kmem_cache *skbuff_head_cache __ro_after_init;
|
||||||
@ -6496,16 +6497,21 @@ void skb_attempt_defer_free(struct sk_buff *skb)
|
|||||||
int cpu = skb->alloc_cpu;
|
int cpu = skb->alloc_cpu;
|
||||||
struct softnet_data *sd;
|
struct softnet_data *sd;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
unsigned int defer_max;
|
||||||
bool kick;
|
bool kick;
|
||||||
|
|
||||||
if (WARN_ON_ONCE(cpu >= nr_cpu_ids) ||
|
if (WARN_ON_ONCE(cpu >= nr_cpu_ids) ||
|
||||||
!cpu_online(cpu) ||
|
!cpu_online(cpu) ||
|
||||||
cpu == raw_smp_processor_id()) {
|
cpu == raw_smp_processor_id()) {
|
||||||
__kfree_skb(skb);
|
nodefer: __kfree_skb(skb);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
sd = &per_cpu(softnet_data, cpu);
|
sd = &per_cpu(softnet_data, cpu);
|
||||||
|
defer_max = READ_ONCE(sysctl_skb_defer_max);
|
||||||
|
if (READ_ONCE(sd->defer_count) >= defer_max)
|
||||||
|
goto nodefer;
|
||||||
|
|
||||||
/* We do not send an IPI or any signal.
|
/* We do not send an IPI or any signal.
|
||||||
* Remote cpu will eventually call skb_defer_free_flush()
|
* Remote cpu will eventually call skb_defer_free_flush()
|
||||||
*/
|
*/
|
||||||
@ -6515,11 +6521,8 @@ void skb_attempt_defer_free(struct sk_buff *skb)
|
|||||||
WRITE_ONCE(sd->defer_list, skb);
|
WRITE_ONCE(sd->defer_list, skb);
|
||||||
sd->defer_count++;
|
sd->defer_count++;
|
||||||
|
|
||||||
/* kick every time queue length reaches 128.
|
/* Send an IPI every time queue reaches half capacity. */
|
||||||
* This condition should hardly be hit under normal conditions,
|
kick = sd->defer_count == (defer_max >> 1);
|
||||||
* unless cpu suddenly stopped to receive NIC interrupts.
|
|
||||||
*/
|
|
||||||
kick = sd->defer_count == 128;
|
|
||||||
|
|
||||||
spin_unlock_irqrestore(&sd->defer_lock, flags);
|
spin_unlock_irqrestore(&sd->defer_lock, flags);
|
||||||
|
|
||||||
|
@ -578,6 +578,14 @@ static struct ctl_table net_core_table[] = {
|
|||||||
.extra1 = SYSCTL_ONE,
|
.extra1 = SYSCTL_ONE,
|
||||||
.extra2 = &int_3600,
|
.extra2 = &int_3600,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
.procname = "skb_defer_max",
|
||||||
|
.data = &sysctl_skb_defer_max,
|
||||||
|
.maxlen = sizeof(unsigned int),
|
||||||
|
.mode = 0644,
|
||||||
|
.proc_handler = proc_dointvec_minmax,
|
||||||
|
.extra1 = SYSCTL_ZERO,
|
||||||
|
},
|
||||||
{ }
|
{ }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user