Merge branch 'unmask-dscp-part-four'

Ido Schimmel says:

====================
Unmask upper DSCP bits - part 4 (last)

tl;dr - This patchset finishes to unmask the upper DSCP bits in the IPv4
flow key in preparation for allowing IPv4 FIB rules to match on DSCP. No
functional changes are expected.

The TOS field in the IPv4 flow key ('flowi4_tos') is used during FIB
lookup to match against the TOS selector in FIB rules and routes.

It is currently impossible for user space to configure FIB rules that
match on the DSCP value as the upper DSCP bits are either masked in the
various call sites that initialize the IPv4 flow key or along the path
to the FIB core.

In preparation for adding a DSCP selector to IPv4 and IPv6 FIB rules, we
need to make sure the entire DSCP value is present in the IPv4 flow key.
This patchset finishes to unmask the upper DSCP bits by adjusting all
the callers of ip_route_output_key() to properly initialize the full
DSCP value in the IPv4 flow key.

No functional changes are expected as commit 1fa3314c14 ("ipv4:
Centralize TOS matching") moved the masking of the upper DSCP bits to
the core where 'flowi4_tos' is matched against the TOS selector.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2024-09-09 14:14:54 +01:00
commit bfba7bc8b7
10 changed files with 23 additions and 14 deletions

View File

@ -36,6 +36,7 @@
#include <net/route.h> #include <net/route.h>
#include <net/netfilter/br_netfilter.h> #include <net/netfilter/br_netfilter.h>
#include <net/netns/generic.h> #include <net/netns/generic.h>
#include <net/inet_dscp.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include "br_private.h" #include "br_private.h"
@ -402,7 +403,7 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_
goto free_skb; goto free_skb;
rt = ip_route_output(net, iph->daddr, 0, rt = ip_route_output(net, iph->daddr, 0,
RT_TOS(iph->tos), 0, iph->tos & INET_DSCP_MASK, 0,
RT_SCOPE_UNIVERSE); RT_SCOPE_UNIVERSE);
if (!IS_ERR(rt)) { if (!IS_ERR(rt)) {
/* - Bridged-and-DNAT'ed traffic doesn't /* - Bridged-and-DNAT'ed traffic doesn't

View File

@ -12,6 +12,7 @@
#include <net/gre.h> #include <net/gre.h>
#include <net/ip6_route.h> #include <net/ip6_route.h>
#include <net/ipv6_stubs.h> #include <net/ipv6_stubs.h>
#include <net/inet_dscp.h>
struct bpf_lwt_prog { struct bpf_lwt_prog {
struct bpf_prog *prog; struct bpf_prog *prog;
@ -205,7 +206,7 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
fl4.flowi4_oif = oif; fl4.flowi4_oif = oif;
fl4.flowi4_mark = skb->mark; fl4.flowi4_mark = skb->mark;
fl4.flowi4_uid = sock_net_uid(net, sk); fl4.flowi4_uid = sock_net_uid(net, sk);
fl4.flowi4_tos = RT_TOS(iph->tos); fl4.flowi4_tos = iph->tos & INET_DSCP_MASK;
fl4.flowi4_flags = FLOWI_FLAG_ANYSRC; fl4.flowi4_flags = FLOWI_FLAG_ANYSRC;
fl4.flowi4_proto = iph->protocol; fl4.flowi4_proto = iph->protocol;
fl4.daddr = iph->daddr; fl4.daddr = iph->daddr;

View File

@ -445,7 +445,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
fl4.saddr = saddr; fl4.saddr = saddr;
fl4.flowi4_mark = mark; fl4.flowi4_mark = mark;
fl4.flowi4_uid = sock_net_uid(net, NULL); fl4.flowi4_uid = sock_net_uid(net, NULL);
fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_tos = ip_hdr(skb)->tos & INET_DSCP_MASK;
fl4.flowi4_proto = IPPROTO_ICMP; fl4.flowi4_proto = IPPROTO_ICMP;
fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev); fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev);
security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4)); security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4));

View File

@ -44,6 +44,7 @@
#include <net/gre.h> #include <net/gre.h>
#include <net/dst_metadata.h> #include <net/dst_metadata.h>
#include <net/erspan.h> #include <net/erspan.h>
#include <net/inet_dscp.h>
/* /*
Problems & solutions Problems & solutions
@ -930,7 +931,7 @@ static int ipgre_open(struct net_device *dev)
t->parms.iph.daddr, t->parms.iph.daddr,
t->parms.iph.saddr, t->parms.iph.saddr,
t->parms.o_key, t->parms.o_key,
RT_TOS(t->parms.iph.tos), t->parms.iph.tos & INET_DSCP_MASK,
t->parms.link); t->parms.link);
if (IS_ERR(rt)) if (IS_ERR(rt))
return -EADDRNOTAVAIL; return -EADDRNOTAVAIL;

View File

@ -43,6 +43,7 @@
#include <net/rtnetlink.h> #include <net/rtnetlink.h>
#include <net/udp.h> #include <net/udp.h>
#include <net/dst_metadata.h> #include <net/dst_metadata.h>
#include <net/inet_dscp.h>
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h> #include <net/ipv6.h>
@ -293,7 +294,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr, ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
iph->saddr, tunnel->parms.o_key, iph->saddr, tunnel->parms.o_key,
RT_TOS(iph->tos), dev_net(dev), iph->tos & INET_DSCP_MASK, dev_net(dev),
tunnel->parms.link, tunnel->fwmark, 0, 0); tunnel->parms.link, tunnel->fwmark, 0, 0);
rt = ip_route_output_key(tunnel->net, &fl4); rt = ip_route_output_key(tunnel->net, &fl4);
@ -609,9 +610,9 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
} }
ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
tunnel_id_to_key32(key->tun_id), RT_TOS(tos), tunnel_id_to_key32(key->tun_id),
dev_net(dev), 0, skb->mark, skb_get_hash(skb), tos & INET_DSCP_MASK, dev_net(dev), 0, skb->mark,
key->flow_flags); skb_get_hash(skb), key->flow_flags);
if (!tunnel_hlen) if (!tunnel_hlen)
tunnel_hlen = ip_encap_hlen(&tun_info->encap); tunnel_hlen = ip_encap_hlen(&tun_info->encap);
@ -772,7 +773,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
} }
ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr, ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.o_key, tos & INET_DSCP_MASK,
dev_net(dev), READ_ONCE(tunnel->parms.link), dev_net(dev), READ_ONCE(tunnel->parms.link),
tunnel->fwmark, skb_get_hash(skb), 0); tunnel->fwmark, skb_get_hash(skb), 0);

View File

@ -14,6 +14,7 @@
#include <net/route.h> #include <net/route.h>
#include <net/xfrm.h> #include <net/xfrm.h>
#include <net/ip.h> #include <net/ip.h>
#include <net/inet_dscp.h>
#include <net/netfilter/nf_queue.h> #include <net/netfilter/nf_queue.h>
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
@ -43,7 +44,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un
*/ */
fl4.daddr = iph->daddr; fl4.daddr = iph->daddr;
fl4.saddr = saddr; fl4.saddr = saddr;
fl4.flowi4_tos = RT_TOS(iph->tos); fl4.flowi4_tos = iph->tos & INET_DSCP_MASK;
fl4.flowi4_oif = sk ? sk->sk_bound_dev_if : 0; fl4.flowi4_oif = sk ? sk->sk_bound_dev_if : 0;
fl4.flowi4_l3mdev = l3mdev_master_ifindex(dev); fl4.flowi4_l3mdev = l3mdev_master_ifindex(dev);
fl4.flowi4_mark = skb->mark; fl4.flowi4_mark = skb->mark;

View File

@ -15,6 +15,7 @@
#include <net/icmp.h> #include <net/icmp.h>
#include <net/ip.h> #include <net/ip.h>
#include <net/route.h> #include <net/route.h>
#include <net/inet_dscp.h>
#include <net/netfilter/ipv4/nf_dup_ipv4.h> #include <net/netfilter/ipv4/nf_dup_ipv4.h>
#if IS_ENABLED(CONFIG_NF_CONNTRACK) #if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack.h>
@ -32,7 +33,7 @@ static bool nf_dup_ipv4_route(struct net *net, struct sk_buff *skb,
fl4.flowi4_oif = oif; fl4.flowi4_oif = oif;
fl4.daddr = gw->s_addr; fl4.daddr = gw->s_addr;
fl4.flowi4_tos = RT_TOS(iph->tos); fl4.flowi4_tos = iph->tos & INET_DSCP_MASK;
fl4.flowi4_scope = RT_SCOPE_UNIVERSE; fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH; fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH;
rt = ip_route_output_key(net, &fl4); rt = ip_route_output_key(net, &fl4);

View File

@ -6,6 +6,7 @@
#include <net/dst_metadata.h> #include <net/dst_metadata.h>
#include <net/udp.h> #include <net/udp.h>
#include <net/udp_tunnel.h> #include <net/udp_tunnel.h>
#include <net/inet_dscp.h>
int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
struct socket **sockp) struct socket **sockp)
@ -232,7 +233,7 @@ struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb,
fl4.saddr = key->u.ipv4.src; fl4.saddr = key->u.ipv4.src;
fl4.fl4_dport = dport; fl4.fl4_dport = dport;
fl4.fl4_sport = sport; fl4.fl4_sport = sport;
fl4.flowi4_tos = RT_TOS(tos); fl4.flowi4_tos = tos & INET_DSCP_MASK;
fl4.flowi4_flags = key->flow_flags; fl4.flowi4_flags = key->flow_flags;
rt = ip_route_output_key(net, &fl4); rt = ip_route_output_key(net, &fl4);

View File

@ -9,6 +9,7 @@
#include <linux/netfilter/nf_conntrack_common.h> #include <linux/netfilter/nf_conntrack_common.h>
#include <linux/netfilter/nf_tables.h> #include <linux/netfilter/nf_tables.h>
#include <net/ip.h> /* for ipv4 options. */ #include <net/ip.h> /* for ipv4 options. */
#include <net/inet_dscp.h>
#include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_core.h>
@ -235,7 +236,7 @@ static int nft_flow_route(const struct nft_pktinfo *pkt,
fl.u.ip4.saddr = ct->tuplehash[!dir].tuple.src.u3.ip; fl.u.ip4.saddr = ct->tuplehash[!dir].tuple.src.u3.ip;
fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex; fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex;
fl.u.ip4.flowi4_iif = this_dst->dev->ifindex; fl.u.ip4.flowi4_iif = this_dst->dev->ifindex;
fl.u.ip4.flowi4_tos = RT_TOS(ip_hdr(pkt->skb)->tos); fl.u.ip4.flowi4_tos = ip_hdr(pkt->skb)->tos & INET_DSCP_MASK;
fl.u.ip4.flowi4_mark = pkt->skb->mark; fl.u.ip4.flowi4_mark = pkt->skb->mark;
fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC; fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC;
break; break;

View File

@ -44,6 +44,7 @@
#include <net/inet_common.h> #include <net/inet_common.h>
#include <net/inet_ecn.h> #include <net/inet_ecn.h>
#include <net/udp_tunnel.h> #include <net/udp_tunnel.h>
#include <net/inet_dscp.h>
#define MAX_SCTP_PORT_HASH_ENTRIES (64 * 1024) #define MAX_SCTP_PORT_HASH_ENTRIES (64 * 1024)
@ -435,7 +436,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
fl4->fl4_dport = daddr->v4.sin_port; fl4->fl4_dport = daddr->v4.sin_port;
fl4->flowi4_proto = IPPROTO_SCTP; fl4->flowi4_proto = IPPROTO_SCTP;
if (asoc) { if (asoc) {
fl4->flowi4_tos = RT_TOS(tos); fl4->flowi4_tos = tos & INET_DSCP_MASK;
fl4->flowi4_scope = ip_sock_rt_scope(asoc->base.sk); fl4->flowi4_scope = ip_sock_rt_scope(asoc->base.sk);
fl4->flowi4_oif = asoc->base.sk->sk_bound_dev_if; fl4->flowi4_oif = asoc->base.sk->sk_bound_dev_if;
fl4->fl4_sport = htons(asoc->base.bind_addr.port); fl4->fl4_sport = htons(asoc->base.bind_addr.port);