mirror of
https://github.com/torvalds/linux.git
synced 2024-12-05 18:41:23 +00:00
38a6f08657
This patch allows users to pick queue_mapping, range
from A to B. Then we can load balance packets from A
to B tx queue. The range is an unsigned 16bit value
in decimal format.
$ tc filter ... action skbedit queue_mapping skbhash A B
"skbedit queue_mapping QUEUE_MAPPING" (from "man 8 tc-skbedit")
is enhanced with flags: SKBEDIT_F_TXQ_SKBHASH
+----+ +----+ +----+
| P1 | | P2 | | Pn |
+----+ +----+ +----+
| | |
+-----------+-----------+
|
| clsact/skbedit
| MQ
v
+-----------+-----------+
| q0 | qn | qm
v v v
HTB/FQ FIFO ... FIFO
For example:
If P1 sends out packets to different Pods on other host, and
we want distribute flows from qn - qm. Then we can use skb->hash
as hash.
setup commands:
$ NETDEV=eth0
$ ip netns add n1
$ ip link add ipv1 link $NETDEV type ipvlan mode l2
$ ip link set ipv1 netns n1
$ ip netns exec n1 ifconfig ipv1 2.2.2.100/24 up
$ tc qdisc add dev $NETDEV clsact
$ tc filter add dev $NETDEV egress protocol ip prio 1 \
flower skip_hw src_ip 2.2.2.100 action skbedit queue_mapping skbhash 2 6
$ tc qdisc add dev $NETDEV handle 1: root mq
$ tc qdisc add dev $NETDEV parent 1:1 handle 2: htb
$ tc class add dev $NETDEV parent 2: classid 2:1 htb rate 100kbit
$ tc class add dev $NETDEV parent 2: classid 2:2 htb rate 200kbit
$ tc qdisc add dev $NETDEV parent 1:2 tbf rate 100mbit burst 100mb latency 1
$ tc qdisc add dev $NETDEV parent 1:3 pfifo
$ tc qdisc add dev $NETDEV parent 1:4 pfifo
$ tc qdisc add dev $NETDEV parent 1:5 pfifo
$ tc qdisc add dev $NETDEV parent 1:6 pfifo
$ tc qdisc add dev $NETDEV parent 1:7 pfifo
$ ip netns exec n1 iperf3 -c 2.2.2.1 -i 1 -t 10 -P 10
pick txqueue from 2 - 6:
$ ethtool -S $NETDEV | grep -i tx_queue_[0-9]_bytes
tx_queue_0_bytes: 42
tx_queue_1_bytes: 0
tx_queue_2_bytes: 11442586444
tx_queue_3_bytes: 7383615334
tx_queue_4_bytes: 3981365579
tx_queue_5_bytes: 3983235051
tx_queue_6_bytes: 6706236461
tx_queue_7_bytes: 42
tx_queue_8_bytes: 0
tx_queue_9_bytes: 0
txqueues 2 - 6 are mapped to classid 1:3 - 1:7
$ tc -s class show dev $NETDEV
...
class mq 1:3 root leaf 8002:
Sent 11949133672 bytes 7929798 pkt (dropped 0, overlimits 0 requeues 0)
backlog 0b 0p requeues 0
class mq 1:4 root leaf 8003:
Sent 7710449050 bytes 5117279 pkt (dropped 0, overlimits 0 requeues 0)
backlog 0b 0p requeues 0
class mq 1:5 root leaf 8004:
Sent 4157648675 bytes 2758990
pkt (dropped 0, overlimits 0 requeues 0)
backlog 0b 0p requeues 0
class mq 1:6 root leaf 8005:
Sent 4159632195 bytes 2759990 pkt (dropped 0, overlimits 0 requeues 0)
backlog 0b 0p requeues 0
class mq 1:7 root leaf 8006:
Sent 7003169603 bytes 4646912 pkt (dropped 0, overlimits 0 requeues 0)
backlog 0b 0p requeues 0
...
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Jonathan Lemon <jonathan.lemon@gmail.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Alexander Lobakin <alobakin@pm.me>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Talal Ahmad <talalahmad@google.com>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: Antoine Tenart <atenart@kernel.org>
Cc: Wei Wang <weiwan@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
111 lines
2.4 KiB
C
111 lines
2.4 KiB
C
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* Copyright (c) 2008, Intel Corporation.
|
|
*
|
|
* Author: Alexander Duyck <alexander.h.duyck@intel.com>
|
|
*/
|
|
|
|
#ifndef __NET_TC_SKBEDIT_H
|
|
#define __NET_TC_SKBEDIT_H
|
|
|
|
#include <net/act_api.h>
|
|
#include <linux/tc_act/tc_skbedit.h>
|
|
|
|
struct tcf_skbedit_params {
|
|
u32 flags;
|
|
u32 priority;
|
|
u32 mark;
|
|
u32 mask;
|
|
u16 queue_mapping;
|
|
u16 mapping_mod;
|
|
u16 ptype;
|
|
struct rcu_head rcu;
|
|
};
|
|
|
|
struct tcf_skbedit {
|
|
struct tc_action common;
|
|
struct tcf_skbedit_params __rcu *params;
|
|
};
|
|
#define to_skbedit(a) ((struct tcf_skbedit *)a)
|
|
|
|
/* Return true iff action is the one identified by FLAG. */
|
|
static inline bool is_tcf_skbedit_with_flag(const struct tc_action *a, u32 flag)
|
|
{
|
|
#ifdef CONFIG_NET_CLS_ACT
|
|
u32 flags;
|
|
|
|
if (a->ops && a->ops->id == TCA_ID_SKBEDIT) {
|
|
rcu_read_lock();
|
|
flags = rcu_dereference(to_skbedit(a)->params)->flags;
|
|
rcu_read_unlock();
|
|
return flags == flag;
|
|
}
|
|
#endif
|
|
return false;
|
|
}
|
|
|
|
/* Return true iff action is mark */
|
|
static inline bool is_tcf_skbedit_mark(const struct tc_action *a)
|
|
{
|
|
return is_tcf_skbedit_with_flag(a, SKBEDIT_F_MARK);
|
|
}
|
|
|
|
static inline u32 tcf_skbedit_mark(const struct tc_action *a)
|
|
{
|
|
u32 mark;
|
|
|
|
rcu_read_lock();
|
|
mark = rcu_dereference(to_skbedit(a)->params)->mark;
|
|
rcu_read_unlock();
|
|
|
|
return mark;
|
|
}
|
|
|
|
/* Return true iff action is ptype */
|
|
static inline bool is_tcf_skbedit_ptype(const struct tc_action *a)
|
|
{
|
|
return is_tcf_skbedit_with_flag(a, SKBEDIT_F_PTYPE);
|
|
}
|
|
|
|
static inline u32 tcf_skbedit_ptype(const struct tc_action *a)
|
|
{
|
|
u16 ptype;
|
|
|
|
rcu_read_lock();
|
|
ptype = rcu_dereference(to_skbedit(a)->params)->ptype;
|
|
rcu_read_unlock();
|
|
|
|
return ptype;
|
|
}
|
|
|
|
/* Return true iff action is priority */
|
|
static inline bool is_tcf_skbedit_priority(const struct tc_action *a)
|
|
{
|
|
return is_tcf_skbedit_with_flag(a, SKBEDIT_F_PRIORITY);
|
|
}
|
|
|
|
static inline u32 tcf_skbedit_priority(const struct tc_action *a)
|
|
{
|
|
u32 priority;
|
|
|
|
rcu_read_lock();
|
|
priority = rcu_dereference(to_skbedit(a)->params)->priority;
|
|
rcu_read_unlock();
|
|
|
|
return priority;
|
|
}
|
|
|
|
/* Return true iff action is queue_mapping */
|
|
static inline bool is_tcf_skbedit_queue_mapping(const struct tc_action *a)
|
|
{
|
|
return is_tcf_skbedit_with_flag(a, SKBEDIT_F_QUEUE_MAPPING);
|
|
}
|
|
|
|
/* Return true iff action is inheritdsfield */
|
|
static inline bool is_tcf_skbedit_inheritdsfield(const struct tc_action *a)
|
|
{
|
|
return is_tcf_skbedit_with_flag(a, SKBEDIT_F_INHERITDSFIELD);
|
|
}
|
|
|
|
#endif /* __NET_TC_SKBEDIT_H */
|