Merge branch 'ipv4-handle-tos-and-scope-properly-for-icmp-redirects-and-pmtu-updates'
Guillaume Nault says: ==================== ipv4: Handle TOS and scope properly for ICMP redirects and PMTU updates ICMPv4 PMTU and redirect handlers didn't properly initialise the struct flowi4 they used for route lookups: * ECN bits sometimes weren't cleared from ->flowi4_tos. * The RTO_ONLINK flag wasn't taken into account for ->flowi4_scope. In some special cases, this resulted in ICMP redirects and PMTU updates not being taken into account because fib_lookup() couldn't retrieve the correct route. ==================== Link: https://lore.kernel.org/r/cover.1647519748.git.gnault@redhat.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
@@ -499,6 +499,15 @@ void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
|
||||
}
|
||||
EXPORT_SYMBOL(__ip_select_ident);
|
||||
|
||||
static void ip_rt_fix_tos(struct flowi4 *fl4)
|
||||
{
|
||||
__u8 tos = RT_FL_TOS(fl4);
|
||||
|
||||
fl4->flowi4_tos = tos & IPTOS_RT_MASK;
|
||||
fl4->flowi4_scope = tos & RTO_ONLINK ?
|
||||
RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
|
||||
}
|
||||
|
||||
static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
|
||||
const struct sock *sk,
|
||||
const struct iphdr *iph,
|
||||
@@ -824,6 +833,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
|
||||
rt = (struct rtable *) dst;
|
||||
|
||||
__build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
|
||||
ip_rt_fix_tos(&fl4);
|
||||
__ip_do_redirect(rt, skb, &fl4, true);
|
||||
}
|
||||
|
||||
@@ -1048,6 +1058,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
|
||||
struct flowi4 fl4;
|
||||
|
||||
ip_rt_build_flow_key(&fl4, sk, skb);
|
||||
ip_rt_fix_tos(&fl4);
|
||||
|
||||
/* Don't make lookup fail for bridged encapsulations */
|
||||
if (skb && netif_is_any_bridge_port(skb->dev))
|
||||
@@ -1122,6 +1133,8 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
|
||||
goto out;
|
||||
|
||||
new = true;
|
||||
} else {
|
||||
ip_rt_fix_tos(&fl4);
|
||||
}
|
||||
|
||||
__ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu);
|
||||
@@ -2603,7 +2616,6 @@ add:
|
||||
struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
|
||||
const struct sk_buff *skb)
|
||||
{
|
||||
__u8 tos = RT_FL_TOS(fl4);
|
||||
struct fib_result res = {
|
||||
.type = RTN_UNSPEC,
|
||||
.fi = NULL,
|
||||
@@ -2613,9 +2625,7 @@ struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
|
||||
struct rtable *rth;
|
||||
|
||||
fl4->flowi4_iif = LOOPBACK_IFINDEX;
|
||||
fl4->flowi4_tos = tos & IPTOS_RT_MASK;
|
||||
fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
|
||||
RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
|
||||
ip_rt_fix_tos(fl4);
|
||||
|
||||
rcu_read_lock();
|
||||
rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
|
||||
|
||||
@@ -26,6 +26,15 @@
|
||||
# - pmtu_ipv6
|
||||
# Same as pmtu_ipv4, except for locked PMTU tests, using IPv6
|
||||
#
|
||||
# - pmtu_ipv4_dscp_icmp_exception
|
||||
# Set up the same network topology as pmtu_ipv4, but use non-default
|
||||
# routing table in A. A fib-rule is used to jump to this routing table
|
||||
# based on DSCP. Send ICMPv4 packets with the expected DSCP value and
|
||||
# verify that ECN doesn't interfere with the creation of PMTU exceptions.
|
||||
#
|
||||
# - pmtu_ipv4_dscp_udp_exception
|
||||
# Same as pmtu_ipv4_dscp_icmp_exception, but use UDP instead of ICMP.
|
||||
#
|
||||
# - pmtu_ipv4_vxlan4_exception
|
||||
# Set up the same network topology as pmtu_ipv4, create a VXLAN tunnel
|
||||
# over IPv4 between A and B, routed via R1. On the link between R1 and B,
|
||||
@@ -203,6 +212,8 @@ which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
|
||||
tests="
|
||||
pmtu_ipv4_exception ipv4: PMTU exceptions 1
|
||||
pmtu_ipv6_exception ipv6: PMTU exceptions 1
|
||||
pmtu_ipv4_dscp_icmp_exception ICMPv4 with DSCP and ECN: PMTU exceptions 1
|
||||
pmtu_ipv4_dscp_udp_exception UDPv4 with DSCP and ECN: PMTU exceptions 1
|
||||
pmtu_ipv4_vxlan4_exception IPv4 over vxlan4: PMTU exceptions 1
|
||||
pmtu_ipv6_vxlan4_exception IPv6 over vxlan4: PMTU exceptions 1
|
||||
pmtu_ipv4_vxlan6_exception IPv4 over vxlan6: PMTU exceptions 1
|
||||
@@ -323,6 +334,9 @@ routes_nh="
|
||||
B 6 default 61
|
||||
"
|
||||
|
||||
policy_mark=0x04
|
||||
rt_table=main
|
||||
|
||||
veth4_a_addr="192.168.1.1"
|
||||
veth4_b_addr="192.168.1.2"
|
||||
veth4_c_addr="192.168.2.10"
|
||||
@@ -346,6 +360,7 @@ dummy6_mask="64"
|
||||
err_buf=
|
||||
tcpdump_pids=
|
||||
nettest_pids=
|
||||
socat_pids=
|
||||
|
||||
err() {
|
||||
err_buf="${err_buf}${1}
|
||||
@@ -723,7 +738,7 @@ setup_routing_old() {
|
||||
|
||||
ns_name="$(nsname ${ns})"
|
||||
|
||||
ip -n ${ns_name} route add ${addr} via ${gw}
|
||||
ip -n "${ns_name}" route add "${addr}" table "${rt_table}" via "${gw}"
|
||||
|
||||
ns=""; addr=""; gw=""
|
||||
done
|
||||
@@ -753,7 +768,7 @@ setup_routing_new() {
|
||||
|
||||
ns_name="$(nsname ${ns})"
|
||||
|
||||
ip -n ${ns_name} -${fam} route add ${addr} nhid ${nhid}
|
||||
ip -n "${ns_name}" -"${fam}" route add "${addr}" table "${rt_table}" nhid "${nhid}"
|
||||
|
||||
ns=""; fam=""; addr=""; nhid=""
|
||||
done
|
||||
@@ -798,6 +813,24 @@ setup_routing() {
|
||||
return 0
|
||||
}
|
||||
|
||||
setup_policy_routing() {
|
||||
setup_routing
|
||||
|
||||
ip -netns "${NS_A}" -4 rule add dsfield "${policy_mark}" \
|
||||
table "${rt_table}"
|
||||
|
||||
# Set the IPv4 Don't Fragment bit with tc, since socat doesn't seem to
|
||||
# have an option do to it.
|
||||
tc -netns "${NS_A}" qdisc replace dev veth_A-R1 root prio
|
||||
tc -netns "${NS_A}" qdisc replace dev veth_A-R2 root prio
|
||||
tc -netns "${NS_A}" filter add dev veth_A-R1 \
|
||||
protocol ipv4 flower ip_proto udp \
|
||||
action pedit ex munge ip df set 0x40 pipe csum ip and udp
|
||||
tc -netns "${NS_A}" filter add dev veth_A-R2 \
|
||||
protocol ipv4 flower ip_proto udp \
|
||||
action pedit ex munge ip df set 0x40 pipe csum ip and udp
|
||||
}
|
||||
|
||||
setup_bridge() {
|
||||
run_cmd ${ns_a} ip link add br0 type bridge || return $ksft_skip
|
||||
run_cmd ${ns_a} ip link set br0 up
|
||||
@@ -903,6 +936,11 @@ cleanup() {
|
||||
done
|
||||
nettest_pids=
|
||||
|
||||
for pid in ${socat_pids}; do
|
||||
kill "${pid}"
|
||||
done
|
||||
socat_pids=
|
||||
|
||||
for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do
|
||||
ip netns del ${n} 2> /dev/null
|
||||
done
|
||||
@@ -950,15 +988,21 @@ link_get_mtu() {
|
||||
route_get_dst_exception() {
|
||||
ns_cmd="${1}"
|
||||
dst="${2}"
|
||||
dsfield="${3}"
|
||||
|
||||
${ns_cmd} ip route get "${dst}"
|
||||
if [ -z "${dsfield}" ]; then
|
||||
dsfield=0
|
||||
fi
|
||||
|
||||
${ns_cmd} ip route get "${dst}" dsfield "${dsfield}"
|
||||
}
|
||||
|
||||
route_get_dst_pmtu_from_exception() {
|
||||
ns_cmd="${1}"
|
||||
dst="${2}"
|
||||
dsfield="${3}"
|
||||
|
||||
mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
|
||||
mtu_parse "$(route_get_dst_exception "${ns_cmd}" "${dst}" "${dsfield}")"
|
||||
}
|
||||
|
||||
check_pmtu_value() {
|
||||
@@ -1068,6 +1112,95 @@ test_pmtu_ipv6_exception() {
|
||||
test_pmtu_ipvX 6
|
||||
}
|
||||
|
||||
test_pmtu_ipv4_dscp_icmp_exception() {
|
||||
rt_table=100
|
||||
|
||||
setup namespaces policy_routing || return $ksft_skip
|
||||
trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
|
||||
"${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
|
||||
"${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
|
||||
"${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2
|
||||
|
||||
# Set up initial MTU values
|
||||
mtu "${ns_a}" veth_A-R1 2000
|
||||
mtu "${ns_r1}" veth_R1-A 2000
|
||||
mtu "${ns_r1}" veth_R1-B 1400
|
||||
mtu "${ns_b}" veth_B-R1 1400
|
||||
|
||||
mtu "${ns_a}" veth_A-R2 2000
|
||||
mtu "${ns_r2}" veth_R2-A 2000
|
||||
mtu "${ns_r2}" veth_R2-B 1500
|
||||
mtu "${ns_b}" veth_B-R2 1500
|
||||
|
||||
len=$((2000 - 20 - 8)) # Fills MTU of veth_A-R1
|
||||
|
||||
dst1="${prefix4}.${b_r1}.1"
|
||||
dst2="${prefix4}.${b_r2}.1"
|
||||
|
||||
# Create route exceptions
|
||||
dsfield=${policy_mark} # No ECN bit set (Not-ECT)
|
||||
run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst1}"
|
||||
|
||||
dsfield=$(printf "%#x" $((policy_mark + 0x02))) # ECN=2 (ECT(0))
|
||||
run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst2}"
|
||||
|
||||
# Check that exceptions have been created with the correct PMTU
|
||||
pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")"
|
||||
check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
|
||||
|
||||
pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")"
|
||||
check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
|
||||
}
|
||||
|
||||
test_pmtu_ipv4_dscp_udp_exception() {
|
||||
rt_table=100
|
||||
|
||||
if ! which socat > /dev/null 2>&1; then
|
||||
echo "'socat' command not found; skipping tests"
|
||||
return $ksft_skip
|
||||
fi
|
||||
|
||||
setup namespaces policy_routing || return $ksft_skip
|
||||
trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
|
||||
"${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
|
||||
"${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
|
||||
"${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2
|
||||
|
||||
# Set up initial MTU values
|
||||
mtu "${ns_a}" veth_A-R1 2000
|
||||
mtu "${ns_r1}" veth_R1-A 2000
|
||||
mtu "${ns_r1}" veth_R1-B 1400
|
||||
mtu "${ns_b}" veth_B-R1 1400
|
||||
|
||||
mtu "${ns_a}" veth_A-R2 2000
|
||||
mtu "${ns_r2}" veth_R2-A 2000
|
||||
mtu "${ns_r2}" veth_R2-B 1500
|
||||
mtu "${ns_b}" veth_B-R2 1500
|
||||
|
||||
len=$((2000 - 20 - 8)) # Fills MTU of veth_A-R1
|
||||
|
||||
dst1="${prefix4}.${b_r1}.1"
|
||||
dst2="${prefix4}.${b_r2}.1"
|
||||
|
||||
# Create route exceptions
|
||||
run_cmd_bg "${ns_b}" socat UDP-LISTEN:50000 OPEN:/dev/null,wronly=1
|
||||
socat_pids="${socat_pids} $!"
|
||||
|
||||
dsfield=${policy_mark} # No ECN bit set (Not-ECT)
|
||||
run_cmd "${ns_a}" socat OPEN:/dev/zero,rdonly=1,readbytes="${len}" \
|
||||
UDP:"${dst1}":50000,tos="${dsfield}"
|
||||
|
||||
dsfield=$(printf "%#x" $((policy_mark + 0x02))) # ECN=2 (ECT(0))
|
||||
run_cmd "${ns_a}" socat OPEN:/dev/zero,rdonly=1,readbytes="${len}" \
|
||||
UDP:"${dst2}":50000,tos="${dsfield}"
|
||||
|
||||
# Check that exceptions have been created with the correct PMTU
|
||||
pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")"
|
||||
check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
|
||||
pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")"
|
||||
check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
|
||||
}
|
||||
|
||||
test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() {
|
||||
type=${1}
|
||||
family=${2}
|
||||
|
||||
Reference in New Issue
Block a user