linux/net/sched
Eric Dumazet 54ff8ad69c net_sched: sch_fq: struct sched_data reorg
q->flows can be often modified, and q->timer_slack is read mostly.

Exchange the two fields, so that cache line countaining
quantum, initial_quantum, and other critical parameters
stay clean (read-mostly).

Move q->watchdog next to q->stat_throttled

Add comments explaining how the structure is split in
three different parts.

pahole output before the patch:

struct fq_sched_data {
	struct fq_flow_head        new_flows;            /*     0  0x10 */
	struct fq_flow_head        old_flows;            /*  0x10  0x10 */
	struct rb_root             delayed;              /*  0x20   0x8 */
	u64                        time_next_delayed_flow; /*  0x28   0x8 */
	u64                        ktime_cache;          /*  0x30   0x8 */
	unsigned long              unthrottle_latency_ns; /*  0x38   0x8 */
	/* --- cacheline 1 boundary (64 bytes) --- */
	struct fq_flow             internal __attribute__((__aligned__(64))); /*  0x40  0x80 */

	/* XXX last struct has 16 bytes of padding */

	/* --- cacheline 3 boundary (192 bytes) --- */
	u32                        quantum;              /*  0xc0   0x4 */
	u32                        initial_quantum;      /*  0xc4   0x4 */
	u32                        flow_refill_delay;    /*  0xc8   0x4 */
	u32                        flow_plimit;          /*  0xcc   0x4 */
	unsigned long              flow_max_rate;        /*  0xd0   0x8 */
	u64                        ce_threshold;         /*  0xd8   0x8 */
	u64                        horizon;              /*  0xe0   0x8 */
	u32                        orphan_mask;          /*  0xe8   0x4 */
	u32                        low_rate_threshold;   /*  0xec   0x4 */
	struct rb_root *           fq_root;              /*  0xf0   0x8 */
	u8                         rate_enable;          /*  0xf8   0x1 */
	u8                         fq_trees_log;         /*  0xf9   0x1 */
	u8                         horizon_drop;         /*  0xfa   0x1 */

	/* XXX 1 byte hole, try to pack */

<bad>	u32                        flows;                /*  0xfc   0x4 */
	/* --- cacheline 4 boundary (256 bytes) --- */
	u32                        inactive_flows;       /* 0x100   0x4 */
	u32                        throttled_flows;      /* 0x104   0x4 */
	u64                        stat_gc_flows;        /* 0x108   0x8 */
	u64                        stat_internal_packets; /* 0x110   0x8 */
	u64                        stat_throttled;       /* 0x118   0x8 */
	u64                        stat_ce_mark;         /* 0x120   0x8 */
	u64                        stat_horizon_drops;   /* 0x128   0x8 */
	u64                        stat_horizon_caps;    /* 0x130   0x8 */
	u64                        stat_flows_plimit;    /* 0x138   0x8 */
	/* --- cacheline 5 boundary (320 bytes) --- */
	u64                        stat_pkts_too_long;   /* 0x140   0x8 */
	u64                        stat_allocation_errors; /* 0x148   0x8 */
<bad>	u32                        timer_slack;          /* 0x150   0x4 */

	/* XXX 4 bytes hole, try to pack */

	struct qdisc_watchdog      watchdog;             /* 0x158  0x48 */

	/* size: 448, cachelines: 7, members: 34 */
	/* sum members: 411, holes: 2, sum holes: 5 */
	/* padding: 32 */
	/* paddings: 1, sum paddings: 16 */
	/* forced alignments: 1 */
};

pahole output after the patch:

struct fq_sched_data {
	struct fq_flow_head        new_flows;            /*     0  0x10 */
	struct fq_flow_head        old_flows;            /*  0x10  0x10 */
	struct rb_root             delayed;              /*  0x20   0x8 */
	u64                        time_next_delayed_flow; /*  0x28   0x8 */
	u64                        ktime_cache;          /*  0x30   0x8 */
	unsigned long              unthrottle_latency_ns; /*  0x38   0x8 */
	/* --- cacheline 1 boundary (64 bytes) --- */
	struct fq_flow             internal __attribute__((__aligned__(64))); /*  0x40  0x80 */

	/* XXX last struct has 16 bytes of padding */

	/* --- cacheline 3 boundary (192 bytes) --- */
	u32                        quantum;              /*  0xc0   0x4 */
	u32                        initial_quantum;      /*  0xc4   0x4 */
	u32                        flow_refill_delay;    /*  0xc8   0x4 */
	u32                        flow_plimit;          /*  0xcc   0x4 */
	unsigned long              flow_max_rate;        /*  0xd0   0x8 */
	u64                        ce_threshold;         /*  0xd8   0x8 */
	u64                        horizon;              /*  0xe0   0x8 */
	u32                        orphan_mask;          /*  0xe8   0x4 */
	u32                        low_rate_threshold;   /*  0xec   0x4 */
	struct rb_root *           fq_root;              /*  0xf0   0x8 */
	u8                         rate_enable;          /*  0xf8   0x1 */
	u8                         fq_trees_log;         /*  0xf9   0x1 */
	u8                         horizon_drop;         /*  0xfa   0x1 */

	/* XXX 1 byte hole, try to pack */

<good>	u32                        timer_slack;          /*  0xfc   0x4 */
	/* --- cacheline 4 boundary (256 bytes) --- */
<good>	u32                        flows;                /* 0x100   0x4 */
	u32                        inactive_flows;       /* 0x104   0x4 */
	u32                        throttled_flows;      /* 0x108   0x4 */

	/* XXX 4 bytes hole, try to pack */

	u64                        stat_throttled;       /* 0x110   0x8 */
<better> struct qdisc_watchdog     watchdog;             /* 0x118  0x48 */
	/* --- cacheline 5 boundary (320 bytes) was 32 bytes ago --- */
	u64                        stat_gc_flows;        /* 0x160   0x8 */
	u64                        stat_internal_packets; /* 0x168   0x8 */
	u64                        stat_ce_mark;         /* 0x170   0x8 */
	u64                        stat_horizon_drops;   /* 0x178   0x8 */
	/* --- cacheline 6 boundary (384 bytes) --- */
	u64                        stat_horizon_caps;    /* 0x180   0x8 */
	u64                        stat_flows_plimit;    /* 0x188   0x8 */
	u64                        stat_pkts_too_long;   /* 0x190   0x8 */
	u64                        stat_allocation_errors; /* 0x198   0x8 */

	/* Force padding: */
	u64                        :64;
	u64                        :64;
	u64                        :64;
	u64                        :64;

	/* size: 448, cachelines: 7, members: 34 */
	/* sum members: 411, holes: 2, sum holes: 5 */
	/* padding: 32 */
	/* paddings: 1, sum paddings: 16 */
	/* forced alignments: 1 */
};

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-10-01 13:20:36 +01:00
..
act_api.c net: sched: Replace strlcpy with strscpy 2023-07-10 08:23:53 +01:00
act_bpf.c net/sched: avoid indirect act functions on retpoline kernels 2022-12-09 09:18:07 +00:00
act_connmark.c net/sched: act_connmark: handle errno on tcf_idr_check_alloc 2023-03-01 08:19:09 +00:00
act_csum.c net: skbuff: hide csum_not_inet when CONFIG_IP_SCTP not set 2023-04-19 13:04:30 +01:00
act_ct.c net: sched: set IPS_CONFIRMED in tmpl status only when commit is set in act_ct 2023-07-20 10:06:36 +02:00
act_ctinfo.c net/sched: act_ctinfo: use percpu stats 2023-02-13 20:09:01 -08:00
act_gact.c Networking changes for 6.2. 2022-12-13 15:47:48 -08:00
act_gate.c net/sched: act_gate: use percpu stats 2023-02-16 10:39:28 +01:00
act_ife.c net/sched: avoid indirect act functions on retpoline kernels 2022-12-09 09:18:07 +00:00
act_ipt.c net/sched: act_ipt: zero skb->cb before calling target 2023-06-29 12:10:37 +02:00
act_meta_mark.c
act_meta_skbprio.c
act_meta_skbtcindex.c
act_mirred.c net/sched: act_mirred: Add carrier check 2023-05-01 07:26:10 +01:00
act_mpls.c net/sched: remove two skb_mac_header() uses 2023-03-22 22:43:23 -07:00
act_nat.c net/sched: act_nat: transition to percpu stats and rcu 2023-02-16 10:39:28 +01:00
act_pedit.c net/sched: act_pedit: Add size check for TCA_PEDIT_PARMS_EX 2023-07-04 10:31:38 +02:00
act_police.c net: move gso declarations and functions to their own files 2023-06-10 00:11:41 -07:00
act_sample.c net/sched: act_sample: fix action bind logic 2023-02-26 18:27:45 +00:00
act_simple.c net/sched: avoid indirect act functions on retpoline kernels 2022-12-09 09:18:07 +00:00
act_skbedit.c net/sched: avoid indirect act functions on retpoline kernels 2022-12-09 09:18:07 +00:00
act_skbmod.c net/sched: avoid indirect act functions on retpoline kernels 2022-12-09 09:18:07 +00:00
act_tunnel_key.c net/sched: act_tunnel_key: add support for "don't fragment" 2023-03-30 23:24:24 -07:00
act_vlan.c net/sched: avoid indirect act functions on retpoline kernels 2022-12-09 09:18:07 +00:00
cls_api.c net/sched: cls_api: Fix lockup on flushing explicitly created chain 2023-06-14 23:03:16 -07:00
cls_basic.c net/sched: avoid indirect classify functions on retpoline kernels 2022-12-09 09:18:07 +00:00
cls_bpf.c net: sched: cls_bpf: Undo tcf_bind_filter in case of an error 2023-07-17 07:33:39 +01:00
cls_cgroup.c net/sched: avoid indirect classify functions on retpoline kernels 2022-12-09 09:18:07 +00:00
cls_flow.c treewide: Convert del_timer*() to timer_shutdown*() 2022-12-25 13:38:09 -08:00
cls_flower.c Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net 2023-08-03 14:34:37 -07:00
cls_fw.c net/sched: cls_fw: No longer copy tcf_result on update to avoid use-after-free 2023-07-31 20:10:36 -07:00
cls_matchall.c net: sched: cls_matchall: Undo tcf_bind_filter in case of failure after mall_set_parms 2023-07-17 07:33:38 +01:00
cls_route.c net/sched: cls_route: make netlink errors meaningful 2023-09-13 12:38:52 +01:00
cls_u32.c net/sched: cls_u32: No longer copy tcf_result on update to avoid use-after-free 2023-07-31 20:10:36 -07:00
em_canid.c
em_cmp.c net: sched: fix misspellings using misspell-fixer tool 2020-11-10 17:00:28 -08:00
em_ipset.c
em_ipt.c
em_meta.c net: annotate data-races around sk->sk_lingertime 2023-08-21 07:41:57 +01:00
em_nbyte.c net: sched: Return the correct errno code 2021-02-06 11:15:28 -08:00
em_text.c
em_u32.c
ematch.c net_sched: reject TCF_EM_SIMPLE case for complex ematch module 2022-12-19 09:43:18 +00:00
Kconfig bpf: Add fd-based tcx multi-prog infra with link support 2023-07-19 10:07:27 -07:00
Makefile net/sched: Retire rsvp classifier 2023-02-16 09:27:07 +01:00
sch_api.c net/sched: fix a qdisc modification with ambiguous command request 2023-08-23 09:44:48 +01:00
sch_blackhole.c Revert "net: sched: Pass root lock to Qdisc_ops.enqueue" 2020-07-16 16:48:34 -07:00
sch_cake.c net: move gso declarations and functions to their own files 2023-06-10 00:11:41 -07:00
sch_cbs.c net/sched: use tc_qdisc_stats_dump() in qdisc 2022-09-22 17:34:10 -07:00
sch_choke.c treewide: use get_random_u32_below() instead of deprecated function 2022-11-18 02:15:15 +01:00
sch_codel.c net: sched: remove redundant NULL check in change hook function 2022-09-01 08:06:45 +02:00
sch_drr.c net/sched: sch_drr: warn about class in use while deleting 2023-08-01 10:47:24 +02:00
sch_etf.c net: sched: etf: remove true check in etf_enable_offload() 2022-09-01 20:08:32 -07:00
sch_ets.c net/sched: use tc_qdisc_stats_dump() in qdisc 2022-09-22 17:34:10 -07:00
sch_fifo.c net_sched: fix NULL deref in fifo_set_limit() 2021-10-01 14:59:10 -07:00
sch_fq_codel.c Revert "net: sched: fq_codel: remove redundant resource cleanup in fq_codel_init()" 2022-10-19 13:47:09 +01:00
sch_fq_pie.c net/sched: fq_pie: avoid stalls in fq_pie_timer() 2023-08-31 11:21:52 +02:00
sch_fq.c net_sched: sch_fq: struct sched_data reorg 2023-10-01 13:20:36 +01:00
sch_frag.c net: dst: remove unnecessary input parameter in dst_alloc and dst_init 2023-09-12 11:42:25 +02:00
sch_generic.c net/sched: qdisc_destroy() old ingress and clsact Qdiscs before grafting 2023-06-14 10:31:39 +02:00
sch_gred.c net: sched: gred: prevent races when adding offloads to stats 2023-01-18 20:28:25 -08:00
sch_hfsc.c Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net 2023-08-29 07:44:56 +02:00
sch_hhf.c net: sched: remove redundant NULL check in change hook function 2022-09-01 08:06:45 +02:00
sch_htb.c net/sched: sch_htb: warn about class in use while deleting 2023-08-01 10:47:24 +02:00
sch_ingress.c tcx: Fix splat in ingress_destroy upon tcx_entry_free 2023-07-24 11:42:35 -07:00
sch_mq.c net: sched: add rcu annotations around qdisc->qdisc_sleeping 2023-06-07 10:25:39 +01:00
sch_mqprio_lib.c net/sched: mqprio: allow per-TC user input of FP adminStatus 2023-04-13 22:22:10 -07:00
sch_mqprio_lib.h net/sched: mqprio: allow per-TC user input of FP adminStatus 2023-04-13 22:22:10 -07:00
sch_mqprio.c net/sched: mqprio: Add length check for TCA_MQPRIO_{MAX/MIN}_RATE64 2023-07-26 22:08:14 -07:00
sch_multiq.c net/sched: use tc_qdisc_stats_dump() in qdisc 2022-09-22 17:34:10 -07:00
sch_netem.c netem: use seeded PRNG for correlated loss events 2023-08-17 19:15:06 -07:00
sch_pie.c net: sched: add rcu annotations around qdisc->qdisc_sleeping 2023-06-07 10:25:39 +01:00
sch_plug.c net: sched: sch_qfq: Fix UAF in qfq_dequeue() 2023-09-05 08:54:12 +02:00
sch_prio.c net/sched: use tc_qdisc_stats_dump() in qdisc 2022-09-22 17:34:10 -07:00
sch_qfq.c net: sched: sch_qfq: Fix UAF in qfq_dequeue() 2023-09-05 08:54:12 +02:00
sch_red.c net: sched: add rcu annotations around qdisc->qdisc_sleeping 2023-06-07 10:25:39 +01:00
sch_sfb.c Networking fixes for 6.1-rc2, including fixes from netfilter 2022-10-20 17:24:59 -07:00
sch_sfq.c net: sched: add rcu annotations around qdisc->qdisc_sleeping 2023-06-07 10:25:39 +01:00
sch_skbprio.c net/sched: use tc_qdisc_stats_dump() in qdisc 2022-09-22 17:34:10 -07:00
sch_taprio.c net/sched: taprio: dump class stats for the actual q->qdiscs[] 2023-08-09 15:59:21 -07:00
sch_tbf.c net: move gso declarations and functions to their own files 2023-06-10 00:11:41 -07:00
sch_teql.c net: sched: add rcu annotations around qdisc->qdisc_sleeping 2023-06-07 10:25:39 +01:00