Files
linux/net/bridge/br_forward.c
Tobias Waldekranz 472111920f net: bridge: switchdev: allow the TX data plane forwarding to be offloaded
Allow switchdevs to forward frames from the CPU in accordance with the
bridge configuration in the same way as is done between bridge
ports. This means that the bridge will only send a single skb towards
one of the ports under the switchdev's control, and expects the driver
to deliver the packet to all eligible ports in its domain.

Primarily this improves the performance of multicast flows with
multiple subscribers, as it allows the hardware to perform the frame
replication.

The basic flow between the driver and the bridge is as follows:

- When joining a bridge port, the switchdev driver calls
  switchdev_bridge_port_offload() with tx_fwd_offload = true.

- The bridge sends offloadable skbs to one of the ports under the
  switchdev's control using skb->offload_fwd_mark = true.

- The switchdev driver checks the skb->offload_fwd_mark field and lets
  its FDB lookup select the destination port mask for this packet.

v1->v2:
- convert br_input_skb_cb::fwd_hwdoms to a plain unsigned long
- introduce a static key "br_switchdev_fwd_offload_used" to minimize the
  impact of the newly introduced feature on all the setups which don't
  have hardware that can make use of it
- introduce a check for nbp->flags & BR_FWD_OFFLOAD to optimize cache
  line access
- reorder nbp_switchdev_frame_mark_accel() and br_handle_vlan() in
  __br_forward()
- do not strip VLAN on egress if forwarding offload on VLAN-aware bridge
  is being used
- propagate errors from .ndo_dfwd_add_station() if not EOPNOTSUPP

v2->v3:
- replace the solution based on .ndo_dfwd_add_station with a solution
  based on switchdev_bridge_port_offload
- rename BR_FWD_OFFLOAD to BR_TX_FWD_OFFLOAD
v3->v4: rebase
v4->v5:
- make sure the static key is decremented on bridge port unoffload
- more function and variable renaming and comments for them:
  br_switchdev_fwd_offload_used to br_switchdev_tx_fwd_offload
  br_switchdev_accels_skb to br_switchdev_frame_uses_tx_fwd_offload
  nbp_switchdev_frame_mark_tx_fwd to nbp_switchdev_frame_mark_tx_fwd_to_hwdom
  nbp_switchdev_frame_mark_accel to nbp_switchdev_frame_mark_tx_fwd_offload
  fwd_accel to tx_fwd_offload

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-23 16:32:37 +01:00

344 lines
7.8 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Forwarding decision
* Linux ethernet bridge
*
* Authors:
* Lennert Buytenhek <buytenh@gnu.org>
*/
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/netpoll.h>
#include <linux/skbuff.h>
#include <linux/if_vlan.h>
#include <linux/netfilter_bridge.h>
#include "br_private.h"
/* Don't forward packets to originating port or forwarding disabled */
static inline int should_deliver(const struct net_bridge_port *p,
const struct sk_buff *skb)
{
struct net_bridge_vlan_group *vg;
vg = nbp_vlan_group_rcu(p);
return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) &&
p->state == BR_STATE_FORWARDING && br_allowed_egress(vg, skb) &&
nbp_switchdev_allowed_egress(p, skb) &&
!br_skb_isolated(p, skb);
}
int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
{
skb_push(skb, ETH_HLEN);
if (!is_skb_forwardable(skb->dev, skb))
goto drop;
br_drop_fake_rtable(skb);
if (skb->ip_summed == CHECKSUM_PARTIAL &&
eth_type_vlan(skb->protocol)) {
int depth;
if (!__vlan_get_protocol(skb, skb->protocol, &depth))
goto drop;
skb_set_network_header(skb, depth);
}
skb->offload_fwd_mark = br_switchdev_frame_uses_tx_fwd_offload(skb);
dev_queue_xmit(skb);
return 0;
drop:
kfree_skb(skb);
return 0;
}
EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit);
int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
skb->tstamp = 0;
return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING,
net, sk, skb, NULL, skb->dev,
br_dev_queue_push_xmit);
}
EXPORT_SYMBOL_GPL(br_forward_finish);
static void __br_forward(const struct net_bridge_port *to,
struct sk_buff *skb, bool local_orig)
{
struct net_bridge_vlan_group *vg;
struct net_device *indev;
struct net *net;
int br_hook;
/* Mark the skb for forwarding offload early so that br_handle_vlan()
* can know whether to pop the VLAN header on egress or keep it.
*/
nbp_switchdev_frame_mark_tx_fwd_offload(to, skb);
vg = nbp_vlan_group_rcu(to);
skb = br_handle_vlan(to->br, to, vg, skb);
if (!skb)
return;
indev = skb->dev;
skb->dev = to->dev;
if (!local_orig) {
if (skb_warn_if_lro(skb)) {
kfree_skb(skb);
return;
}
br_hook = NF_BR_FORWARD;
skb_forward_csum(skb);
net = dev_net(indev);
} else {
if (unlikely(netpoll_tx_running(to->br->dev))) {
skb_push(skb, ETH_HLEN);
if (!is_skb_forwardable(skb->dev, skb))
kfree_skb(skb);
else
br_netpoll_send_skb(to, skb);
return;
}
br_hook = NF_BR_LOCAL_OUT;
net = dev_net(skb->dev);
indev = NULL;
}
NF_HOOK(NFPROTO_BRIDGE, br_hook,
net, NULL, skb, indev, skb->dev,
br_forward_finish);
}
static int deliver_clone(const struct net_bridge_port *prev,
struct sk_buff *skb, bool local_orig)
{
struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
skb = skb_clone(skb, GFP_ATOMIC);
if (!skb) {
dev->stats.tx_dropped++;
return -ENOMEM;
}
__br_forward(prev, skb, local_orig);
return 0;
}
/**
* br_forward - forward a packet to a specific port
* @to: destination port
* @skb: packet being forwarded
* @local_rcv: packet will be received locally after forwarding
* @local_orig: packet is locally originated
*
* Should be called with rcu_read_lock.
*/
void br_forward(const struct net_bridge_port *to,
struct sk_buff *skb, bool local_rcv, bool local_orig)
{
if (unlikely(!to))
goto out;
/* redirect to backup link if the destination port is down */
if (rcu_access_pointer(to->backup_port) && !netif_carrier_ok(to->dev)) {
struct net_bridge_port *backup_port;
backup_port = rcu_dereference(to->backup_port);
if (unlikely(!backup_port))
goto out;
to = backup_port;
}
if (should_deliver(to, skb)) {
if (local_rcv)
deliver_clone(to, skb, local_orig);
else
__br_forward(to, skb, local_orig);
return;
}
out:
if (!local_rcv)
kfree_skb(skb);
}
EXPORT_SYMBOL_GPL(br_forward);
static struct net_bridge_port *maybe_deliver(
struct net_bridge_port *prev, struct net_bridge_port *p,
struct sk_buff *skb, bool local_orig)
{
u8 igmp_type = br_multicast_igmp_type(skb);
int err;
if (!should_deliver(p, skb))
return prev;
nbp_switchdev_frame_mark_tx_fwd_to_hwdom(p, skb);
if (!prev)
goto out;
err = deliver_clone(prev, skb, local_orig);
if (err)
return ERR_PTR(err);
out:
br_multicast_count(p->br, p, skb, igmp_type, BR_MCAST_DIR_TX);
return p;
}
/* called under rcu_read_lock */
void br_flood(struct net_bridge *br, struct sk_buff *skb,
enum br_pkt_type pkt_type, bool local_rcv, bool local_orig)
{
struct net_bridge_port *prev = NULL;
struct net_bridge_port *p;
list_for_each_entry_rcu(p, &br->port_list, list) {
/* Do not flood unicast traffic to ports that turn it off, nor
* other traffic if flood off, except for traffic we originate
*/
switch (pkt_type) {
case BR_PKT_UNICAST:
if (!(p->flags & BR_FLOOD))
continue;
break;
case BR_PKT_MULTICAST:
if (!(p->flags & BR_MCAST_FLOOD) && skb->dev != br->dev)
continue;
break;
case BR_PKT_BROADCAST:
if (!(p->flags & BR_BCAST_FLOOD) && skb->dev != br->dev)
continue;
break;
}
/* Do not flood to ports that enable proxy ARP */
if (p->flags & BR_PROXYARP)
continue;
if ((p->flags & (BR_PROXYARP_WIFI | BR_NEIGH_SUPPRESS)) &&
BR_INPUT_SKB_CB(skb)->proxyarp_replied)
continue;
prev = maybe_deliver(prev, p, skb, local_orig);
if (IS_ERR(prev))
goto out;
}
if (!prev)
goto out;
if (local_rcv)
deliver_clone(prev, skb, local_orig);
else
__br_forward(prev, skb, local_orig);
return;
out:
if (!local_rcv)
kfree_skb(skb);
}
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb,
const unsigned char *addr, bool local_orig)
{
struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
const unsigned char *src = eth_hdr(skb)->h_source;
if (!should_deliver(p, skb))
return;
/* Even with hairpin, no soliloquies - prevent breaking IPv6 DAD */
if (skb->dev == p->dev && ether_addr_equal(src, addr))
return;
skb = skb_copy(skb, GFP_ATOMIC);
if (!skb) {
dev->stats.tx_dropped++;
return;
}
if (!is_broadcast_ether_addr(addr))
memcpy(eth_hdr(skb)->h_dest, addr, ETH_ALEN);
__br_forward(p, skb, local_orig);
}
/* called with rcu_read_lock */
void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
struct sk_buff *skb,
struct net_bridge_mcast *brmctx,
bool local_rcv, bool local_orig)
{
struct net_bridge_port *prev = NULL;
struct net_bridge_port_group *p;
bool allow_mode_include = true;
struct hlist_node *rp;
rp = br_multicast_get_first_rport_node(brmctx, skb);
if (mdst) {
p = rcu_dereference(mdst->ports);
if (br_multicast_should_handle_mode(brmctx, mdst->addr.proto) &&
br_multicast_is_star_g(&mdst->addr))
allow_mode_include = false;
} else {
p = NULL;
}
while (p || rp) {
struct net_bridge_port *port, *lport, *rport;
lport = p ? p->key.port : NULL;
rport = br_multicast_rport_from_node_skb(rp, skb);
if ((unsigned long)lport > (unsigned long)rport) {
port = lport;
if (port->flags & BR_MULTICAST_TO_UNICAST) {
maybe_deliver_addr(lport, skb, p->eth_addr,
local_orig);
goto delivered;
}
if ((!allow_mode_include &&
p->filter_mode == MCAST_INCLUDE) ||
(p->flags & MDB_PG_FLAGS_BLOCKED))
goto delivered;
} else {
port = rport;
}
prev = maybe_deliver(prev, port, skb, local_orig);
if (IS_ERR(prev))
goto out;
delivered:
if ((unsigned long)lport >= (unsigned long)port)
p = rcu_dereference(p->next);
if ((unsigned long)rport >= (unsigned long)port)
rp = rcu_dereference(hlist_next_rcu(rp));
}
if (!prev)
goto out;
if (local_rcv)
deliver_clone(prev, skb, local_orig);
else
__br_forward(prev, skb, local_orig);
return;
out:
if (!local_rcv)
kfree_skb(skb);
}
#endif