mirror of
https://github.com/torvalds/linux.git
synced 2024-11-28 07:01:32 +00:00
[INET]: Add IP(V6)_PMTUDISC_RPOBE
Add IP(V6)_PMTUDISC_PROBE value for IP(V6)_MTU_DISCOVER. This option forces us not to fragment, but does not make use of the kernel path MTU discovery. That is, it allows for user-mode MTU probing (or, packetization-layer path MTU discovery). This is particularly useful for diagnostic utilities, like traceroute/tracepath. Signed-off-by: John Heffner <jheffner@psc.edu> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
b881ef7603
commit
628a5c5618
@ -83,6 +83,7 @@ struct in_addr {
|
|||||||
#define IP_PMTUDISC_DONT 0 /* Never send DF frames */
|
#define IP_PMTUDISC_DONT 0 /* Never send DF frames */
|
||||||
#define IP_PMTUDISC_WANT 1 /* Use per route hints */
|
#define IP_PMTUDISC_WANT 1 /* Use per route hints */
|
||||||
#define IP_PMTUDISC_DO 2 /* Always DF */
|
#define IP_PMTUDISC_DO 2 /* Always DF */
|
||||||
|
#define IP_PMTUDISC_PROBE 3 /* Ignore dst pmtu */
|
||||||
|
|
||||||
#define IP_MULTICAST_IF 32
|
#define IP_MULTICAST_IF 32
|
||||||
#define IP_MULTICAST_TTL 33
|
#define IP_MULTICAST_TTL 33
|
||||||
|
@ -179,6 +179,7 @@ struct in6_flowlabel_req
|
|||||||
#define IPV6_PMTUDISC_DONT 0
|
#define IPV6_PMTUDISC_DONT 0
|
||||||
#define IPV6_PMTUDISC_WANT 1
|
#define IPV6_PMTUDISC_WANT 1
|
||||||
#define IPV6_PMTUDISC_DO 2
|
#define IPV6_PMTUDISC_DO 2
|
||||||
|
#define IPV6_PMTUDISC_PROBE 3
|
||||||
|
|
||||||
/* Flowlabel */
|
/* Flowlabel */
|
||||||
#define IPV6_FLOWLABEL_MGR 32
|
#define IPV6_FLOWLABEL_MGR 32
|
||||||
|
@ -189,6 +189,14 @@ static inline int ip_finish_output2(struct sk_buff *skb)
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int ip_skb_dst_mtu(struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL;
|
||||||
|
|
||||||
|
return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ?
|
||||||
|
skb->dst->dev->mtu : dst_mtu(skb->dst);
|
||||||
|
}
|
||||||
|
|
||||||
static inline int ip_finish_output(struct sk_buff *skb)
|
static inline int ip_finish_output(struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
|
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
|
||||||
@ -198,7 +206,7 @@ static inline int ip_finish_output(struct sk_buff *skb)
|
|||||||
return dst_output(skb);
|
return dst_output(skb);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb))
|
if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
|
||||||
return ip_fragment(skb, ip_finish_output2);
|
return ip_fragment(skb, ip_finish_output2);
|
||||||
else
|
else
|
||||||
return ip_finish_output2(skb);
|
return ip_finish_output2(skb);
|
||||||
@ -422,7 +430,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
|
|||||||
if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
|
if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
|
||||||
IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
|
IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
|
||||||
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
|
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
|
||||||
htonl(dst_mtu(&rt->u.dst)));
|
htonl(ip_skb_dst_mtu(skb)));
|
||||||
kfree_skb(skb);
|
kfree_skb(skb);
|
||||||
return -EMSGSIZE;
|
return -EMSGSIZE;
|
||||||
}
|
}
|
||||||
@ -787,7 +795,9 @@ int ip_append_data(struct sock *sk,
|
|||||||
inet->cork.addr = ipc->addr;
|
inet->cork.addr = ipc->addr;
|
||||||
}
|
}
|
||||||
dst_hold(&rt->u.dst);
|
dst_hold(&rt->u.dst);
|
||||||
inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
|
inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
|
||||||
|
rt->u.dst.dev->mtu :
|
||||||
|
dst_mtu(rt->u.dst.path);
|
||||||
inet->cork.rt = rt;
|
inet->cork.rt = rt;
|
||||||
inet->cork.length = 0;
|
inet->cork.length = 0;
|
||||||
sk->sk_sndmsg_page = NULL;
|
sk->sk_sndmsg_page = NULL;
|
||||||
@ -1203,13 +1213,13 @@ int ip_push_pending_frames(struct sock *sk)
|
|||||||
* to fragment the frame generated here. No matter, what transforms
|
* to fragment the frame generated here. No matter, what transforms
|
||||||
* how transforms change size of the packet, it will come out.
|
* how transforms change size of the packet, it will come out.
|
||||||
*/
|
*/
|
||||||
if (inet->pmtudisc != IP_PMTUDISC_DO)
|
if (inet->pmtudisc < IP_PMTUDISC_DO)
|
||||||
skb->local_df = 1;
|
skb->local_df = 1;
|
||||||
|
|
||||||
/* DF bit is set when we want to see DF on outgoing frames.
|
/* DF bit is set when we want to see DF on outgoing frames.
|
||||||
* If local_df is set too, we still allow to fragment this frame
|
* If local_df is set too, we still allow to fragment this frame
|
||||||
* locally. */
|
* locally. */
|
||||||
if (inet->pmtudisc == IP_PMTUDISC_DO ||
|
if (inet->pmtudisc >= IP_PMTUDISC_DO ||
|
||||||
(skb->len <= dst_mtu(&rt->u.dst) &&
|
(skb->len <= dst_mtu(&rt->u.dst) &&
|
||||||
ip_dont_fragment(sk, &rt->u.dst)))
|
ip_dont_fragment(sk, &rt->u.dst)))
|
||||||
df = htons(IP_DF);
|
df = htons(IP_DF);
|
||||||
|
@ -542,7 +542,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
|
|||||||
inet->hdrincl = val ? 1 : 0;
|
inet->hdrincl = val ? 1 : 0;
|
||||||
break;
|
break;
|
||||||
case IP_MTU_DISCOVER:
|
case IP_MTU_DISCOVER:
|
||||||
if (val<0 || val>2)
|
if (val<0 || val>3)
|
||||||
goto e_inval;
|
goto e_inval;
|
||||||
inet->pmtudisc = val;
|
inet->pmtudisc = val;
|
||||||
break;
|
break;
|
||||||
|
@ -137,9 +137,17 @@ static int ip6_output2(struct sk_buff *skb)
|
|||||||
return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
|
return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
|
||||||
|
|
||||||
|
return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
|
||||||
|
skb->dst->dev->mtu : dst_mtu(skb->dst);
|
||||||
|
}
|
||||||
|
|
||||||
int ip6_output(struct sk_buff *skb)
|
int ip6_output(struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
if ((skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) ||
|
if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
|
||||||
dst_allfrag(skb->dst))
|
dst_allfrag(skb->dst))
|
||||||
return ip6_fragment(skb, ip6_output2);
|
return ip6_fragment(skb, ip6_output2);
|
||||||
else
|
else
|
||||||
@ -566,7 +574,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
|
|||||||
hlen = ip6_find_1stfragopt(skb, &prevhdr);
|
hlen = ip6_find_1stfragopt(skb, &prevhdr);
|
||||||
nexthdr = *prevhdr;
|
nexthdr = *prevhdr;
|
||||||
|
|
||||||
mtu = dst_mtu(&rt->u.dst);
|
mtu = ip6_skb_dst_mtu(skb);
|
||||||
|
|
||||||
/* We must not fragment if the socket is set to force MTU discovery
|
/* We must not fragment if the socket is set to force MTU discovery
|
||||||
* or if the skb it not generated by a local socket. (This last
|
* or if the skb it not generated by a local socket. (This last
|
||||||
@ -1063,7 +1071,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
|
|||||||
inet->cork.fl = *fl;
|
inet->cork.fl = *fl;
|
||||||
np->cork.hop_limit = hlimit;
|
np->cork.hop_limit = hlimit;
|
||||||
np->cork.tclass = tclass;
|
np->cork.tclass = tclass;
|
||||||
mtu = dst_mtu(rt->u.dst.path);
|
mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
|
||||||
|
rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
|
||||||
if (np->frag_size < mtu) {
|
if (np->frag_size < mtu) {
|
||||||
if (np->frag_size)
|
if (np->frag_size)
|
||||||
mtu = np->frag_size;
|
mtu = np->frag_size;
|
||||||
|
@ -694,7 +694,7 @@ done:
|
|||||||
retv = ip6_ra_control(sk, val, NULL);
|
retv = ip6_ra_control(sk, val, NULL);
|
||||||
break;
|
break;
|
||||||
case IPV6_MTU_DISCOVER:
|
case IPV6_MTU_DISCOVER:
|
||||||
if (val<0 || val>2)
|
if (val<0 || val>3)
|
||||||
goto e_inval;
|
goto e_inval;
|
||||||
np->pmtudisc = val;
|
np->pmtudisc = val;
|
||||||
retv = 0;
|
retv = 0;
|
||||||
|
Loading…
Reference in New Issue
Block a user