mirror of
https://github.com/torvalds/linux.git
synced 2024-11-22 04:02:20 +00:00
Merge branch 'allow-configuration-of-multipath-hash-seed'
Petr Machata says: ==================== Allow configuration of multipath hash seed Let me just quote the commit message of patch #2 here to inform the motivation and some of the implementation: When calculating hashes for the purpose of multipath forwarding, both IPv4 and IPv6 code currently fall back on flow_hash_from_keys(). That uses a randomly-generated seed. That's a fine choice by default, but unfortunately some deployments may need a tighter control over the seed used. In this patchset, make the seed configurable by adding a new sysctl key, net.ipv4.fib_multipath_hash_seed to control the seed. This seed is used specifically for multipath forwarding and not for the other concerns that flow_hash_from_keys() is used for, such as queue selection. Expose the knob as sysctl because other such settings, such as headers to hash, are also handled that way. Despite being placed in the net.ipv4 namespace, the multipath seed sysctl is used for both IPv4 and IPv6, similarly to e.g. a number of TCP variables. Like those, the multipath hash seed is a per-netns variable. The seed used by flow_hash_from_keys() is a 128-bit quantity. However it seems that usually the seed is a much more modest value. 32 bits seem typical (Cisco, Cumulus), some systems go even lower. For that reason, and to decouple the user interface from implementation details, go with a 32-bit quantity, which is then quadruplicated to form the siphash key. One example of use of this interface is avoiding hash polarization, where two ECMP routers, one behind the other, happen to make consistent hashing decisions, and as a result, part of the ECMP space of the latter router is never used. Another is a load balancer where several machines forward traffic to one of a number of leaves, and the forwarding decisions need to be made consistently. (This is a case of a desired hash polarization, mentioned e.g. in chapter 6.3 of [0].) There has already been a proposal to include a hash seed control interface in the past[1]. - Patches #1-#2 contain the substance of the work - Patch #3 is an mlxsw offload - Patches #4 and #5 are a selftest [0] https://www.usenix.org/system/files/conference/nsdi18/nsdi18-araujo.pdf [1] https://lore.kernel.org/netdev/YIlVpYMCn%2F8WfE1P@rnd/ ==================== Link: https://lore.kernel.org/r/20240607151357.421181-1-petrm@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
05f43db7f0
@ -131,6 +131,20 @@ fib_multipath_hash_fields - UNSIGNED INTEGER
|
||||
|
||||
Default: 0x0007 (source IP, destination IP and IP protocol)
|
||||
|
||||
fib_multipath_hash_seed - UNSIGNED INTEGER
|
||||
The seed value used when calculating hash for multipath routes. Applies
|
||||
to both IPv4 and IPv6 datapath. Only present for kernels built with
|
||||
CONFIG_IP_ROUTE_MULTIPATH enabled.
|
||||
|
||||
When set to 0, the seed value used for multipath routing defaults to an
|
||||
internal random-generated one.
|
||||
|
||||
The actual hashing algorithm is not specified -- there is no guarantee
|
||||
that a next hop distribution effected by a given seed will keep stable
|
||||
across kernel versions.
|
||||
|
||||
Default: 0 (random)
|
||||
|
||||
fib_sync_mem - UNSIGNED INTEGER
|
||||
Amount of dirty memory from fib entries that can be backlogged before
|
||||
synchronize_rcu is forced.
|
||||
|
@ -11450,12 +11450,16 @@ static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
|
||||
{
|
||||
bool old_inc_parsing_depth, new_inc_parsing_depth;
|
||||
struct mlxsw_sp_mp_hash_config config = {};
|
||||
struct net *net = mlxsw_sp_net(mlxsw_sp);
|
||||
char recr2_pl[MLXSW_REG_RECR2_LEN];
|
||||
unsigned long bit;
|
||||
u32 seed;
|
||||
int err;
|
||||
|
||||
seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
|
||||
seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).user_seed;
|
||||
if (!seed)
|
||||
seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
|
||||
|
||||
mlxsw_reg_recr2_pack(recr2_pl, seed);
|
||||
mlxsw_sp_mp4_hash_init(mlxsw_sp, &config);
|
||||
mlxsw_sp_mp6_hash_init(mlxsw_sp, &config);
|
||||
|
@ -442,6 +442,8 @@ static inline bool flow_keys_have_l4(const struct flow_keys *keys)
|
||||
}
|
||||
|
||||
u32 flow_hash_from_keys(struct flow_keys *keys);
|
||||
u32 flow_hash_from_keys_seed(struct flow_keys *keys,
|
||||
const siphash_key_t *keyval);
|
||||
void skb_flow_get_icmp_tci(const struct sk_buff *skb,
|
||||
struct flow_dissector_key_icmp *key_icmp,
|
||||
const void *data, int thoff, int hlen);
|
||||
|
@ -520,7 +520,35 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig);
|
||||
#ifdef CONFIG_IP_ROUTE_MULTIPATH
|
||||
int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
|
||||
const struct sk_buff *skb, struct flow_keys *flkeys);
|
||||
|
||||
static void
|
||||
fib_multipath_hash_construct_key(siphash_key_t *key, u32 mp_seed)
|
||||
{
|
||||
u64 mp_seed_64 = mp_seed;
|
||||
|
||||
key->key[0] = (mp_seed_64 << 32) | mp_seed_64;
|
||||
key->key[1] = key->key[0];
|
||||
}
|
||||
|
||||
static inline u32 fib_multipath_hash_from_keys(const struct net *net,
|
||||
struct flow_keys *keys)
|
||||
{
|
||||
siphash_aligned_key_t hash_key;
|
||||
u32 mp_seed;
|
||||
|
||||
mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).mp_seed;
|
||||
fib_multipath_hash_construct_key(&hash_key, mp_seed);
|
||||
|
||||
return flow_hash_from_keys_seed(keys, &hash_key);
|
||||
}
|
||||
#else
|
||||
static inline u32 fib_multipath_hash_from_keys(const struct net *net,
|
||||
struct flow_keys *keys)
|
||||
{
|
||||
return flow_hash_from_keys(keys);
|
||||
}
|
||||
#endif
|
||||
|
||||
int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
|
||||
struct netlink_ext_ack *extack);
|
||||
void fib_select_multipath(struct fib_result *res, int hash);
|
||||
|
@ -40,6 +40,13 @@ struct inet_timewait_death_row {
|
||||
|
||||
struct tcp_fastopen_context;
|
||||
|
||||
#ifdef CONFIG_IP_ROUTE_MULTIPATH
|
||||
struct sysctl_fib_multipath_hash_seed {
|
||||
u32 user_seed;
|
||||
u32 mp_seed;
|
||||
};
|
||||
#endif
|
||||
|
||||
struct netns_ipv4 {
|
||||
/* Cacheline organization can be found documented in
|
||||
* Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst.
|
||||
@ -246,6 +253,7 @@ struct netns_ipv4 {
|
||||
#endif
|
||||
#endif
|
||||
#ifdef CONFIG_IP_ROUTE_MULTIPATH
|
||||
struct sysctl_fib_multipath_hash_seed sysctl_fib_multipath_hash_seed;
|
||||
u32 sysctl_fib_multipath_hash_fields;
|
||||
u8 sysctl_fib_multipath_use_neigh;
|
||||
u8 sysctl_fib_multipath_hash_policy;
|
||||
|
@ -1806,6 +1806,13 @@ u32 flow_hash_from_keys(struct flow_keys *keys)
|
||||
}
|
||||
EXPORT_SYMBOL(flow_hash_from_keys);
|
||||
|
||||
u32 flow_hash_from_keys_seed(struct flow_keys *keys,
|
||||
const siphash_key_t *keyval)
|
||||
{
|
||||
return __flow_hash_from_keys(keys, keyval);
|
||||
}
|
||||
EXPORT_SYMBOL(flow_hash_from_keys_seed);
|
||||
|
||||
static inline u32 ___skb_get_hash(const struct sk_buff *skb,
|
||||
struct flow_keys *keys,
|
||||
const siphash_key_t *keyval)
|
||||
|
@ -1923,7 +1923,7 @@ static u32 fib_multipath_custom_hash_outer(const struct net *net,
|
||||
hash_keys.ports.dst = keys.ports.dst;
|
||||
|
||||
*p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION);
|
||||
return flow_hash_from_keys(&hash_keys);
|
||||
return fib_multipath_hash_from_keys(net, &hash_keys);
|
||||
}
|
||||
|
||||
static u32 fib_multipath_custom_hash_inner(const struct net *net,
|
||||
@ -1972,7 +1972,7 @@ static u32 fib_multipath_custom_hash_inner(const struct net *net,
|
||||
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
|
||||
hash_keys.ports.dst = keys.ports.dst;
|
||||
|
||||
return flow_hash_from_keys(&hash_keys);
|
||||
return fib_multipath_hash_from_keys(net, &hash_keys);
|
||||
}
|
||||
|
||||
static u32 fib_multipath_custom_hash_skb(const struct net *net,
|
||||
@ -2009,7 +2009,7 @@ static u32 fib_multipath_custom_hash_fl4(const struct net *net,
|
||||
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
|
||||
hash_keys.ports.dst = fl4->fl4_dport;
|
||||
|
||||
return flow_hash_from_keys(&hash_keys);
|
||||
return fib_multipath_hash_from_keys(net, &hash_keys);
|
||||
}
|
||||
|
||||
/* if skb is set it will be used and fl4 can be NULL */
|
||||
@ -2030,7 +2030,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
|
||||
hash_keys.addrs.v4addrs.src = fl4->saddr;
|
||||
hash_keys.addrs.v4addrs.dst = fl4->daddr;
|
||||
}
|
||||
mhash = flow_hash_from_keys(&hash_keys);
|
||||
mhash = fib_multipath_hash_from_keys(net, &hash_keys);
|
||||
break;
|
||||
case 1:
|
||||
/* skb is currently provided only when forwarding */
|
||||
@ -2064,7 +2064,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
|
||||
hash_keys.ports.dst = fl4->fl4_dport;
|
||||
hash_keys.basic.ip_proto = fl4->flowi4_proto;
|
||||
}
|
||||
mhash = flow_hash_from_keys(&hash_keys);
|
||||
mhash = fib_multipath_hash_from_keys(net, &hash_keys);
|
||||
break;
|
||||
case 2:
|
||||
memset(&hash_keys, 0, sizeof(hash_keys));
|
||||
@ -2095,7 +2095,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
|
||||
hash_keys.addrs.v4addrs.src = fl4->saddr;
|
||||
hash_keys.addrs.v4addrs.dst = fl4->daddr;
|
||||
}
|
||||
mhash = flow_hash_from_keys(&hash_keys);
|
||||
mhash = fib_multipath_hash_from_keys(net, &hash_keys);
|
||||
break;
|
||||
case 3:
|
||||
if (skb)
|
||||
|
@ -464,6 +464,61 @@ static int proc_fib_multipath_hash_fields(struct ctl_table *table, int write,
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static u32 proc_fib_multipath_hash_rand_seed __ro_after_init;
|
||||
|
||||
static void proc_fib_multipath_hash_init_rand_seed(void)
|
||||
{
|
||||
get_random_bytes(&proc_fib_multipath_hash_rand_seed,
|
||||
sizeof(proc_fib_multipath_hash_rand_seed));
|
||||
}
|
||||
|
||||
static void proc_fib_multipath_hash_set_seed(struct net *net, u32 user_seed)
|
||||
{
|
||||
struct sysctl_fib_multipath_hash_seed new = {
|
||||
.user_seed = user_seed,
|
||||
.mp_seed = (user_seed ? user_seed :
|
||||
proc_fib_multipath_hash_rand_seed),
|
||||
};
|
||||
|
||||
WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed, new);
|
||||
}
|
||||
|
||||
static int proc_fib_multipath_hash_seed(struct ctl_table *table, int write,
|
||||
void *buffer, size_t *lenp,
|
||||
loff_t *ppos)
|
||||
{
|
||||
struct sysctl_fib_multipath_hash_seed *mphs;
|
||||
struct net *net = table->data;
|
||||
struct ctl_table tmp;
|
||||
u32 user_seed;
|
||||
int ret;
|
||||
|
||||
mphs = &net->ipv4.sysctl_fib_multipath_hash_seed;
|
||||
user_seed = mphs->user_seed;
|
||||
|
||||
tmp = *table;
|
||||
tmp.data = &user_seed;
|
||||
|
||||
ret = proc_douintvec_minmax(&tmp, write, buffer, lenp, ppos);
|
||||
|
||||
if (write && ret == 0) {
|
||||
proc_fib_multipath_hash_set_seed(net, user_seed);
|
||||
call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
|
||||
static void proc_fib_multipath_hash_init_rand_seed(void)
|
||||
{
|
||||
}
|
||||
|
||||
static void proc_fib_multipath_hash_set_seed(struct net *net, u32 user_seed)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static struct ctl_table ipv4_table[] = {
|
||||
@ -1072,6 +1127,13 @@ static struct ctl_table ipv4_net_table[] = {
|
||||
.extra1 = SYSCTL_ONE,
|
||||
.extra2 = &fib_multipath_hash_fields_all_mask,
|
||||
},
|
||||
{
|
||||
.procname = "fib_multipath_hash_seed",
|
||||
.data = &init_net,
|
||||
.maxlen = sizeof(u32),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_fib_multipath_hash_seed,
|
||||
},
|
||||
#endif
|
||||
{
|
||||
.procname = "ip_unprivileged_port_start",
|
||||
@ -1550,6 +1612,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
|
||||
if (!net->ipv4.sysctl_local_reserved_ports)
|
||||
goto err_ports;
|
||||
|
||||
proc_fib_multipath_hash_set_seed(net, 0);
|
||||
|
||||
return 0;
|
||||
|
||||
err_ports:
|
||||
@ -1584,6 +1648,8 @@ static __init int sysctl_ipv4_init(void)
|
||||
if (!hdr)
|
||||
return -ENOMEM;
|
||||
|
||||
proc_fib_multipath_hash_init_rand_seed();
|
||||
|
||||
if (register_pernet_subsys(&ipv4_sysctl_ops)) {
|
||||
unregister_net_sysctl_table(hdr);
|
||||
return -ENOMEM;
|
||||
|
@ -2372,7 +2372,7 @@ static u32 rt6_multipath_custom_hash_outer(const struct net *net,
|
||||
hash_keys.ports.dst = keys.ports.dst;
|
||||
|
||||
*p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION);
|
||||
return flow_hash_from_keys(&hash_keys);
|
||||
return fib_multipath_hash_from_keys(net, &hash_keys);
|
||||
}
|
||||
|
||||
static u32 rt6_multipath_custom_hash_inner(const struct net *net,
|
||||
@ -2421,7 +2421,7 @@ static u32 rt6_multipath_custom_hash_inner(const struct net *net,
|
||||
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
|
||||
hash_keys.ports.dst = keys.ports.dst;
|
||||
|
||||
return flow_hash_from_keys(&hash_keys);
|
||||
return fib_multipath_hash_from_keys(net, &hash_keys);
|
||||
}
|
||||
|
||||
static u32 rt6_multipath_custom_hash_skb(const struct net *net,
|
||||
@ -2460,7 +2460,7 @@ static u32 rt6_multipath_custom_hash_fl6(const struct net *net,
|
||||
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
|
||||
hash_keys.ports.dst = fl6->fl6_dport;
|
||||
|
||||
return flow_hash_from_keys(&hash_keys);
|
||||
return fib_multipath_hash_from_keys(net, &hash_keys);
|
||||
}
|
||||
|
||||
/* if skb is set it will be used and fl6 can be NULL */
|
||||
@ -2482,7 +2482,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
|
||||
hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
|
||||
hash_keys.basic.ip_proto = fl6->flowi6_proto;
|
||||
}
|
||||
mhash = flow_hash_from_keys(&hash_keys);
|
||||
mhash = fib_multipath_hash_from_keys(net, &hash_keys);
|
||||
break;
|
||||
case 1:
|
||||
if (skb) {
|
||||
@ -2514,7 +2514,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
|
||||
hash_keys.ports.dst = fl6->fl6_dport;
|
||||
hash_keys.basic.ip_proto = fl6->flowi6_proto;
|
||||
}
|
||||
mhash = flow_hash_from_keys(&hash_keys);
|
||||
mhash = fib_multipath_hash_from_keys(net, &hash_keys);
|
||||
break;
|
||||
case 2:
|
||||
memset(&hash_keys, 0, sizeof(hash_keys));
|
||||
@ -2551,7 +2551,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
|
||||
hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
|
||||
hash_keys.basic.ip_proto = fl6->flowi6_proto;
|
||||
}
|
||||
mhash = flow_hash_from_keys(&hash_keys);
|
||||
mhash = fib_multipath_hash_from_keys(net, &hash_keys);
|
||||
break;
|
||||
case 3:
|
||||
if (skb)
|
||||
|
@ -70,6 +70,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
|
||||
router_broadcast.sh \
|
||||
router_mpath_nh_res.sh \
|
||||
router_mpath_nh.sh \
|
||||
router_mpath_seed.sh \
|
||||
router_multicast.sh \
|
||||
router_multipath.sh \
|
||||
router_nh.sh \
|
||||
|
@ -1134,12 +1134,19 @@ bridge_ageing_time_get()
|
||||
}
|
||||
|
||||
declare -A SYSCTL_ORIG
|
||||
sysctl_save()
|
||||
{
|
||||
local key=$1; shift
|
||||
|
||||
SYSCTL_ORIG[$key]=$(sysctl -n $key)
|
||||
}
|
||||
|
||||
sysctl_set()
|
||||
{
|
||||
local key=$1; shift
|
||||
local value=$1; shift
|
||||
|
||||
SYSCTL_ORIG[$key]=$(sysctl -n $key)
|
||||
sysctl_save "$key"
|
||||
sysctl -qw $key="$value"
|
||||
}
|
||||
|
||||
|
333
tools/testing/selftests/net/forwarding/router_mpath_seed.sh
Executable file
333
tools/testing/selftests/net/forwarding/router_mpath_seed.sh
Executable file
@ -0,0 +1,333 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
# +-------------------------+ +-------------------------+
|
||||
# | H1 | | H2 |
|
||||
# | $h1 + | | + $h2 |
|
||||
# | 192.0.2.1/28 | | | | 192.0.2.34/28 |
|
||||
# | 2001:db8:1::1/64 | | | | 2001:db8:3::2/64 |
|
||||
# +-------------------|-----+ +-|-----------------------+
|
||||
# | |
|
||||
# +-------------------|-----+ +-|-----------------------+
|
||||
# | R1 | | | | R2 |
|
||||
# | $rp11 + | | + $rp21 |
|
||||
# | 192.0.2.2/28 | | 192.0.2.33/28 |
|
||||
# | 2001:db8:1::2/64 | | 2001:db8:3::1/64 |
|
||||
# | | | |
|
||||
# | $rp12 + | | + $rp22 |
|
||||
# | 192.0.2.17/28 | | | | 192.0.2.18..27/28 |
|
||||
# | 2001:db8:2::17/64 | | | | 2001:db8:2::18..27/64 |
|
||||
# +-------------------|-----+ +-|-----------------------+
|
||||
# | |
|
||||
# `----------'
|
||||
|
||||
ALL_TESTS="
|
||||
ping_ipv4
|
||||
ping_ipv6
|
||||
test_mpath_seed_stability_ipv4
|
||||
test_mpath_seed_stability_ipv6
|
||||
test_mpath_seed_get
|
||||
test_mpath_seed_ipv4
|
||||
test_mpath_seed_ipv6
|
||||
"
|
||||
NUM_NETIFS=6
|
||||
source lib.sh
|
||||
|
||||
h1_create()
|
||||
{
|
||||
simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
|
||||
ip -4 route add 192.0.2.32/28 vrf v$h1 nexthop via 192.0.2.2
|
||||
ip -6 route add 2001:db8:3::/64 vrf v$h1 nexthop via 2001:db8:1::2
|
||||
}
|
||||
|
||||
h1_destroy()
|
||||
{
|
||||
ip -6 route del 2001:db8:3::/64 vrf v$h1 nexthop via 2001:db8:1::2
|
||||
ip -4 route del 192.0.2.32/28 vrf v$h1 nexthop via 192.0.2.2
|
||||
simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
|
||||
}
|
||||
|
||||
h2_create()
|
||||
{
|
||||
simple_if_init $h2 192.0.2.34/28 2001:db8:3::2/64
|
||||
ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.33
|
||||
ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:3::1
|
||||
}
|
||||
|
||||
h2_destroy()
|
||||
{
|
||||
ip -6 route del 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:3::1
|
||||
ip -4 route del 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.33
|
||||
simple_if_fini $h2 192.0.2.34/28 2001:db8:3::2/64
|
||||
}
|
||||
|
||||
router1_create()
|
||||
{
|
||||
simple_if_init $rp11 192.0.2.2/28 2001:db8:1::2/64
|
||||
__simple_if_init $rp12 v$rp11 192.0.2.17/28 2001:db8:2::17/64
|
||||
}
|
||||
|
||||
router1_destroy()
|
||||
{
|
||||
__simple_if_fini $rp12 192.0.2.17/28 2001:db8:2::17/64
|
||||
simple_if_fini $rp11 192.0.2.2/28 2001:db8:1::2/64
|
||||
}
|
||||
|
||||
router2_create()
|
||||
{
|
||||
simple_if_init $rp21 192.0.2.33/28 2001:db8:3::1/64
|
||||
__simple_if_init $rp22 v$rp21 192.0.2.18/28 2001:db8:2::18/64
|
||||
ip -4 route add 192.0.2.0/28 vrf v$rp21 nexthop via 192.0.2.17
|
||||
ip -6 route add 2001:db8:1::/64 vrf v$rp21 nexthop via 2001:db8:2::17
|
||||
}
|
||||
|
||||
router2_destroy()
|
||||
{
|
||||
ip -6 route del 2001:db8:1::/64 vrf v$rp21 nexthop via 2001:db8:2::17
|
||||
ip -4 route del 192.0.2.0/28 vrf v$rp21 nexthop via 192.0.2.17
|
||||
__simple_if_fini $rp22 192.0.2.18/28 2001:db8:2::18/64
|
||||
simple_if_fini $rp21 192.0.2.33/28 2001:db8:3::1/64
|
||||
}
|
||||
|
||||
nexthops_create()
|
||||
{
|
||||
local i
|
||||
for i in $(seq 10); do
|
||||
ip nexthop add id $((1000 + i)) via 192.0.2.18 dev $rp12
|
||||
ip nexthop add id $((2000 + i)) via 2001:db8:2::18 dev $rp12
|
||||
done
|
||||
|
||||
ip nexthop add id 1000 group $(seq -s / 1001 1010) hw_stats on
|
||||
ip nexthop add id 2000 group $(seq -s / 2001 2010) hw_stats on
|
||||
ip -4 route add 192.0.2.32/28 vrf v$rp11 nhid 1000
|
||||
ip -6 route add 2001:db8:3::/64 vrf v$rp11 nhid 2000
|
||||
}
|
||||
|
||||
nexthops_destroy()
|
||||
{
|
||||
local i
|
||||
|
||||
ip -6 route del 2001:db8:3::/64 vrf v$rp11 nhid 2000
|
||||
ip -4 route del 192.0.2.32/28 vrf v$rp11 nhid 1000
|
||||
ip nexthop del id 2000
|
||||
ip nexthop del id 1000
|
||||
|
||||
for i in $(seq 10 -1 1); do
|
||||
ip nexthop del id $((2000 + i))
|
||||
ip nexthop del id $((1000 + i))
|
||||
done
|
||||
}
|
||||
|
||||
setup_prepare()
|
||||
{
|
||||
h1=${NETIFS[p1]}
|
||||
rp11=${NETIFS[p2]}
|
||||
|
||||
rp12=${NETIFS[p3]}
|
||||
rp22=${NETIFS[p4]}
|
||||
|
||||
rp21=${NETIFS[p5]}
|
||||
h2=${NETIFS[p6]}
|
||||
|
||||
sysctl_save net.ipv4.fib_multipath_hash_seed
|
||||
|
||||
vrf_prepare
|
||||
|
||||
h1_create
|
||||
h2_create
|
||||
router1_create
|
||||
router2_create
|
||||
|
||||
forwarding_enable
|
||||
}
|
||||
|
||||
cleanup()
|
||||
{
|
||||
pre_cleanup
|
||||
|
||||
forwarding_restore
|
||||
|
||||
nexthops_destroy
|
||||
router2_destroy
|
||||
router1_destroy
|
||||
h2_destroy
|
||||
h1_destroy
|
||||
|
||||
vrf_cleanup
|
||||
|
||||
sysctl_restore net.ipv4.fib_multipath_hash_seed
|
||||
}
|
||||
|
||||
ping_ipv4()
|
||||
{
|
||||
ping_test $h1 192.0.2.34
|
||||
}
|
||||
|
||||
ping_ipv6()
|
||||
{
|
||||
ping6_test $h1 2001:db8:3::2
|
||||
}
|
||||
|
||||
test_mpath_seed_get()
|
||||
{
|
||||
RET=0
|
||||
|
||||
local i
|
||||
for ((i = 0; i < 100; i++)); do
|
||||
local seed_w=$((999331 * i))
|
||||
sysctl -qw net.ipv4.fib_multipath_hash_seed=$seed_w
|
||||
local seed_r=$(sysctl -n net.ipv4.fib_multipath_hash_seed)
|
||||
((seed_r == seed_w))
|
||||
check_err $? "mpath seed written as $seed_w, but read as $seed_r"
|
||||
done
|
||||
|
||||
log_test "mpath seed set/get"
|
||||
}
|
||||
|
||||
nh_stats_snapshot()
|
||||
{
|
||||
local group_id=$1; shift
|
||||
|
||||
ip -j -s -s nexthop show id $group_id |
|
||||
jq -c '[.[].group_stats | sort_by(.id) | .[].packets]'
|
||||
}
|
||||
|
||||
get_active_nh()
|
||||
{
|
||||
local s0=$1; shift
|
||||
local s1=$1; shift
|
||||
|
||||
jq -n --argjson s0 "$s0" --argjson s1 "$s1" -f /dev/stdin <<-"EOF"
|
||||
[range($s0 | length)] |
|
||||
map($s1[.] - $s0[.]) |
|
||||
map(if . > 8 then 1 else 0 end) |
|
||||
index(1)
|
||||
EOF
|
||||
}
|
||||
|
||||
probe_nh()
|
||||
{
|
||||
local group_id=$1; shift
|
||||
local -a mz=("$@")
|
||||
|
||||
local s0=$(nh_stats_snapshot $group_id)
|
||||
"${mz[@]}"
|
||||
local s1=$(nh_stats_snapshot $group_id)
|
||||
|
||||
get_active_nh "$s0" "$s1"
|
||||
}
|
||||
|
||||
probe_seed()
|
||||
{
|
||||
local group_id=$1; shift
|
||||
local seed=$1; shift
|
||||
local -a mz=("$@")
|
||||
|
||||
sysctl -qw net.ipv4.fib_multipath_hash_seed=$seed
|
||||
probe_nh "$group_id" "${mz[@]}"
|
||||
}
|
||||
|
||||
test_mpath_seed()
|
||||
{
|
||||
local group_id=$1; shift
|
||||
local what=$1; shift
|
||||
local -a mz=("$@")
|
||||
local ii
|
||||
|
||||
RET=0
|
||||
|
||||
local -a tally=(0 0 0 0 0 0 0 0 0 0)
|
||||
for ((ii = 0; ii < 100; ii++)); do
|
||||
local act=$(probe_seed $group_id $((999331 * ii)) "${mz[@]}")
|
||||
((tally[act]++))
|
||||
done
|
||||
|
||||
local tally_str="${tally[@]}"
|
||||
for ((ii = 0; ii < ${#tally[@]}; ii++)); do
|
||||
((tally[ii] > 0))
|
||||
check_err $? "NH #$ii not hit, tally='$tally_str'"
|
||||
done
|
||||
|
||||
log_test "mpath seed $what"
|
||||
sysctl -qw net.ipv4.fib_multipath_hash_seed=0
|
||||
}
|
||||
|
||||
test_mpath_seed_ipv4()
|
||||
{
|
||||
test_mpath_seed 1000 IPv4 \
|
||||
$MZ $h1 -A 192.0.2.1 -B 192.0.2.34 -q \
|
||||
-p 64 -d 0 -c 10 -t udp
|
||||
}
|
||||
|
||||
test_mpath_seed_ipv6()
|
||||
{
|
||||
test_mpath_seed 2000 IPv6 \
|
||||
$MZ -6 $h1 -A 2001:db8:1::1 -B 2001:db8:3::2 -q \
|
||||
-p 64 -d 0 -c 10 -t udp
|
||||
}
|
||||
|
||||
check_mpath_seed_stability()
|
||||
{
|
||||
local seed=$1; shift
|
||||
local act_0=$1; shift
|
||||
local act_1=$1; shift
|
||||
|
||||
((act_0 == act_1))
|
||||
check_err $? "seed $seed: active NH moved from $act_0 to $act_1 after seed change"
|
||||
}
|
||||
|
||||
test_mpath_seed_stability()
|
||||
{
|
||||
local group_id=$1; shift
|
||||
local what=$1; shift
|
||||
local -a mz=("$@")
|
||||
|
||||
RET=0
|
||||
|
||||
local seed_0=0
|
||||
local seed_1=3221338814
|
||||
local seed_2=3735928559
|
||||
|
||||
# Initial active NH before touching the seed at all.
|
||||
local act_ini=$(probe_nh $group_id "${mz[@]}")
|
||||
|
||||
local act_0_0=$(probe_seed $group_id $seed_0 "${mz[@]}")
|
||||
local act_1_0=$(probe_seed $group_id $seed_1 "${mz[@]}")
|
||||
local act_2_0=$(probe_seed $group_id $seed_2 "${mz[@]}")
|
||||
|
||||
local act_0_1=$(probe_seed $group_id $seed_0 "${mz[@]}")
|
||||
local act_1_1=$(probe_seed $group_id $seed_1 "${mz[@]}")
|
||||
local act_2_1=$(probe_seed $group_id $seed_2 "${mz[@]}")
|
||||
|
||||
check_mpath_seed_stability initial $act_ini $act_0_0
|
||||
check_mpath_seed_stability $seed_0 $act_0_0 $act_0_1
|
||||
check_mpath_seed_stability $seed_1 $act_1_0 $act_1_1
|
||||
check_mpath_seed_stability $seed_2 $act_2_0 $act_2_1
|
||||
|
||||
log_test "mpath seed stability $what"
|
||||
sysctl -qw net.ipv4.fib_multipath_hash_seed=0
|
||||
}
|
||||
|
||||
test_mpath_seed_stability_ipv4()
|
||||
{
|
||||
test_mpath_seed_stability 1000 IPv4 \
|
||||
$MZ $h1 -A 192.0.2.1 -B 192.0.2.34 -q \
|
||||
-p 64 -d 0 -c 10 -t udp
|
||||
}
|
||||
|
||||
test_mpath_seed_stability_ipv6()
|
||||
{
|
||||
test_mpath_seed_stability 2000 IPv6 \
|
||||
$MZ -6 $h1 -A 2001:db8:1::1 -B 2001:db8:3::2 -q \
|
||||
-p 64 -d 0 -c 10 -t udp
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
setup_prepare
|
||||
setup_wait
|
||||
nexthops_create
|
||||
|
||||
tests_run
|
||||
|
||||
exit $EXIT_STATUS
|
Loading…
Reference in New Issue
Block a user