net: Add skb->mono_delivery_time to distinguish mono delivery_time from (rcv) timestamp
skb->tstamp was first used as the (rcv) timestamp.
The major usage is to report it to the user (e.g. SO_TIMESTAMP).
Later, skb->tstamp is also set as the (future) delivery_time (e.g. EDT in TCP)
during egress and used by the qdisc (e.g. sch_fq) to make decision on when
the skb can be passed to the dev.
Currently, there is no way to tell skb->tstamp having the (rcv) timestamp
or the delivery_time, so it is always reset to 0 whenever forwarded
between egress and ingress.
While it makes sense to always clear the (rcv) timestamp in skb->tstamp
to avoid confusing sch_fq that expects the delivery_time, it is a
performance issue [0] to clear the delivery_time if the skb finally
egress to a fq@phy-dev. For example, when forwarding from egress to
ingress and then finally back to egress:
tcp-sender => veth@netns => veth@hostns => fq@eth0@hostns
^ ^
reset rest
This patch adds one bit skb->mono_delivery_time to flag the skb->tstamp
is storing the mono delivery_time (EDT) instead of the (rcv) timestamp.
The current use case is to keep the TCP mono delivery_time (EDT) and
to be used with sch_fq. A latter patch will also allow tc-bpf@ingress
to read and change the mono delivery_time.
In the future, another bit (e.g. skb->user_delivery_time) can be added
for the SCM_TXTIME where the clock base is tracked by sk->sk_clockid.
[ This patch is a prep work. The following patches will
get the other parts of the stack ready first. Then another patch
after that will finally set the skb->mono_delivery_time. ]
skb_set_delivery_time() function is added. It is used by the tcp_output.c
and during ip[6] fragmentation to assign the delivery_time to
the skb->tstamp and also set the skb->mono_delivery_time.
A note on the change in ip_send_unicast_reply() in ip_output.c.
It is only used by TCP to send reset/ack out of a ctl_sk.
Like the new skb_set_delivery_time(), this patch sets
the skb->mono_delivery_time to 0 for now as a place
holder. It will be enabled in a latter patch.
A similar case in tcp_ipv6 can be done with
skb_set_delivery_time() in tcp_v6_send_response().
[0] (slide 22): https://linuxplumbersconf.org/event/11/contributions/953/attachments/867/1658/LPC_2021_BPF_Datapath_Extensions.pdf
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
committed by
David S. Miller
parent
6fb8661c8f
commit
a1ac9c8ace
@@ -1253,7 +1253,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
|
||||
tp = tcp_sk(sk);
|
||||
prior_wstamp = tp->tcp_wstamp_ns;
|
||||
tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
|
||||
skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
|
||||
skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true);
|
||||
if (clone_it) {
|
||||
oskb = skb;
|
||||
|
||||
@@ -1589,7 +1589,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
|
||||
|
||||
skb_split(skb, buff, len);
|
||||
|
||||
buff->tstamp = skb->tstamp;
|
||||
skb_set_delivery_time(buff, skb->tstamp, true);
|
||||
tcp_fragment_tstamp(skb, buff);
|
||||
|
||||
old_factor = tcp_skb_pcount(skb);
|
||||
@@ -2616,7 +2616,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
|
||||
|
||||
if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
|
||||
/* "skb_mstamp_ns" is used as a start point for the retransmit timer */
|
||||
skb->skb_mstamp_ns = tp->tcp_wstamp_ns = tp->tcp_clock_cache;
|
||||
tp->tcp_wstamp_ns = tp->tcp_clock_cache;
|
||||
skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true);
|
||||
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
|
||||
tcp_init_tso_segs(skb, mss_now);
|
||||
goto repair; /* Skip network transmission */
|
||||
@@ -3541,11 +3542,12 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
|
||||
now = tcp_clock_ns();
|
||||
#ifdef CONFIG_SYN_COOKIES
|
||||
if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok))
|
||||
skb->skb_mstamp_ns = cookie_init_timestamp(req, now);
|
||||
skb_set_delivery_time(skb, cookie_init_timestamp(req, now),
|
||||
true);
|
||||
else
|
||||
#endif
|
||||
{
|
||||
skb->skb_mstamp_ns = now;
|
||||
skb_set_delivery_time(skb, now, true);
|
||||
if (!tcp_rsk(req)->snt_synack) /* Timestamp first SYNACK */
|
||||
tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb);
|
||||
}
|
||||
@@ -3594,7 +3596,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
|
||||
bpf_skops_write_hdr_opt((struct sock *)sk, skb, req, syn_skb,
|
||||
synack_type, &opts);
|
||||
|
||||
skb->skb_mstamp_ns = now;
|
||||
skb_set_delivery_time(skb, now, true);
|
||||
tcp_add_tx_delay(skb, tp);
|
||||
|
||||
return skb;
|
||||
@@ -3771,7 +3773,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
|
||||
|
||||
err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
|
||||
|
||||
syn->skb_mstamp_ns = syn_data->skb_mstamp_ns;
|
||||
skb_set_delivery_time(syn, syn_data->skb_mstamp_ns, true);
|
||||
|
||||
/* Now full SYN+DATA was cloned and sent (or not),
|
||||
* remove the SYN from the original skb (syn_data)
|
||||
|
||||
Reference in New Issue
Block a user