forked from Minki/linux
net: ipv4: add sysctl for nexthop api compatibility mode
Current route nexthop API maintains user space compatibility with old route API by default. Dumps and netlink notifications support both new and old API format. In systems which have moved to the new API, this compatibility mode cancels some of the performance benefits provided by the new nexthop API. This patch adds new sysctl nexthop_compat_mode which is on by default but provides the ability to turn off compatibility mode allowing systems to run entirely with the new routing API. Old route API behaviour and support is not modified by this sysctl. Uses a single sysctl to cover both ipv4 and ipv6 following other sysctls. Covers dumps and delete notifications as suggested by David Ahern. Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com> Reviewed-by: David Ahern <dsahern@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
11dd74b338
commit
4f80116d3d
@ -1560,6 +1560,18 @@ skip_notify_on_dev_down - BOOLEAN
|
||||
on userspace caches to track link events and evict routes.
|
||||
Default: false (generate message)
|
||||
|
||||
nexthop_compat_mode - BOOLEAN
|
||||
New nexthop API provides a means for managing nexthops independent of
|
||||
prefixes. Backwards compatibilty with old route format is enabled by
|
||||
default which means route dumps and notifications contain the new
|
||||
nexthop attribute but also the full, expanded nexthop definition.
|
||||
Further, updates or deletes of a nexthop configuration generate route
|
||||
notifications for each fib entry using the nexthop. Once a system
|
||||
understands the new API, this sysctl can be disabled to achieve full
|
||||
performance benefits of the new API by disabling the nexthop expansion
|
||||
and extraneous notifications.
|
||||
Default: true (backward compat mode)
|
||||
|
||||
IPv6 Fragmentation:
|
||||
|
||||
ip6frag_high_thresh - INTEGER
|
||||
|
@ -111,6 +111,8 @@ struct netns_ipv4 {
|
||||
int sysctl_tcp_early_demux;
|
||||
int sysctl_udp_early_demux;
|
||||
|
||||
int sysctl_nexthop_compat_mode;
|
||||
|
||||
int sysctl_fwmark_reflect;
|
||||
int sysctl_tcp_fwmark_accept;
|
||||
#ifdef CONFIG_NET_L3_MASTER_DEV
|
||||
|
@ -1835,6 +1835,7 @@ static __net_init int inet_init_net(struct net *net)
|
||||
net->ipv4.sysctl_ip_early_demux = 1;
|
||||
net->ipv4.sysctl_udp_early_demux = 1;
|
||||
net->ipv4.sysctl_tcp_early_demux = 1;
|
||||
net->ipv4.sysctl_nexthop_compat_mode = 1;
|
||||
#ifdef CONFIG_SYSCTL
|
||||
net->ipv4.sysctl_ip_prot_sock = PROT_SOCK;
|
||||
#endif
|
||||
|
@ -1780,6 +1780,8 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
|
||||
goto nla_put_failure;
|
||||
if (nexthop_is_blackhole(fi->nh))
|
||||
rtm->rtm_type = RTN_BLACKHOLE;
|
||||
if (!fi->fib_net->ipv4.sysctl_nexthop_compat_mode)
|
||||
goto offload;
|
||||
}
|
||||
|
||||
if (nhs == 1) {
|
||||
@ -1805,6 +1807,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
|
||||
goto nla_put_failure;
|
||||
}
|
||||
|
||||
offload:
|
||||
if (fri->offload)
|
||||
rtm->rtm_flags |= RTM_F_OFFLOAD;
|
||||
if (fri->trap)
|
||||
|
@ -784,7 +784,8 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
|
||||
list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) {
|
||||
/* __ip6_del_rt does a release, so do a hold here */
|
||||
fib6_info_hold(f6i);
|
||||
ipv6_stub->ip6_del_rt(net, f6i, false);
|
||||
ipv6_stub->ip6_del_rt(net, f6i,
|
||||
!net->ipv4.sysctl_nexthop_compat_mode);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1041,7 +1042,7 @@ out:
|
||||
if (!rc) {
|
||||
nh_base_seq_inc(net);
|
||||
nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo);
|
||||
if (replace_notify)
|
||||
if (replace_notify && net->ipv4.sysctl_nexthop_compat_mode)
|
||||
nexthop_replace_notify(net, new_nh, &cfg->nlinfo);
|
||||
}
|
||||
|
||||
|
@ -710,6 +710,15 @@ static struct ctl_table ipv4_net_table[] = {
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_tcp_early_demux
|
||||
},
|
||||
{
|
||||
.procname = "nexthop_compat_mode",
|
||||
.data = &init_net.ipv4.sysctl_nexthop_compat_mode,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = SYSCTL_ONE,
|
||||
},
|
||||
{
|
||||
.procname = "ip_default_ttl",
|
||||
.data = &init_net.ipv4.sysctl_ip_default_ttl,
|
||||
|
@ -5557,7 +5557,8 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
|
||||
if (nexthop_is_blackhole(rt->nh))
|
||||
rtm->rtm_type = RTN_BLACKHOLE;
|
||||
|
||||
if (rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)
|
||||
if (net->ipv4.sysctl_nexthop_compat_mode &&
|
||||
rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)
|
||||
goto nla_put_failure;
|
||||
|
||||
rtm->rtm_flags |= nh_flags;
|
||||
|
Loading…
Reference in New Issue
Block a user