forked from Minki/linux
6bc506b4fb
switchdev_port_fwd_mark_set() is used to set the 'offload_fwd_mark' of port netdevs so that packets being flooded by the device won't be flooded twice. It works by assigning a unique identifier (the ifindex of the first bridge port) to bridge ports sharing the same parent ID. This prevents packets from being flooded twice by the same switch, but will flood packets through bridge ports belonging to a different switch. This method is problematic when stacked devices are taken into account, such as VLANs. In such cases, a physical port netdev can have upper devices being members in two different bridges, thus requiring two different 'offload_fwd_mark's to be configured on the port netdev, which is impossible. The main problem is that packet and netdev marking is performed at the physical netdev level, whereas flooding occurs between bridge ports, which are not necessarily port netdevs. Instead, packet and netdev marking should really be done in the bridge driver with the switch driver only telling it which packets it already forwarded. The bridge driver will mark such packets using the mark assigned to the ingress bridge port and will prevent the packet from being forwarded through any bridge port sharing the same mark (i.e. having the same parent ID). Remove the current switchdev 'offload_fwd_mark' implementation and instead implement the proposed method. In addition, make rocker - the sole user of the mark - use the proposed method. Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
335 lines
8.4 KiB
C
335 lines
8.4 KiB
C
/*
|
|
* Handle incoming frames
|
|
* Linux ethernet bridge
|
|
*
|
|
* Authors:
|
|
* Lennert Buytenhek <buytenh@gnu.org>
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
|
|
#include <linux/slab.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/netdevice.h>
|
|
#include <linux/etherdevice.h>
|
|
#include <linux/netfilter_bridge.h>
|
|
#include <linux/neighbour.h>
|
|
#include <net/arp.h>
|
|
#include <linux/export.h>
|
|
#include <linux/rculist.h>
|
|
#include "br_private.h"
|
|
|
|
/* Hook for brouter */
|
|
br_should_route_hook_t __rcu *br_should_route_hook __read_mostly;
|
|
EXPORT_SYMBOL(br_should_route_hook);
|
|
|
|
static int
|
|
br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
|
|
{
|
|
return netif_receive_skb(skb);
|
|
}
|
|
|
|
static int br_pass_frame_up(struct sk_buff *skb)
|
|
{
|
|
struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
|
|
struct net_bridge *br = netdev_priv(brdev);
|
|
struct net_bridge_vlan_group *vg;
|
|
struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats);
|
|
|
|
u64_stats_update_begin(&brstats->syncp);
|
|
brstats->rx_packets++;
|
|
brstats->rx_bytes += skb->len;
|
|
u64_stats_update_end(&brstats->syncp);
|
|
|
|
vg = br_vlan_group_rcu(br);
|
|
/* Bridge is just like any other port. Make sure the
|
|
* packet is allowed except in promisc modue when someone
|
|
* may be running packet capture.
|
|
*/
|
|
if (!(brdev->flags & IFF_PROMISC) &&
|
|
!br_allowed_egress(vg, skb)) {
|
|
kfree_skb(skb);
|
|
return NET_RX_DROP;
|
|
}
|
|
|
|
indev = skb->dev;
|
|
skb->dev = brdev;
|
|
skb = br_handle_vlan(br, vg, skb);
|
|
if (!skb)
|
|
return NET_RX_DROP;
|
|
/* update the multicast stats if the packet is IGMP/MLD */
|
|
br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb),
|
|
BR_MCAST_DIR_TX);
|
|
|
|
return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
|
|
dev_net(indev), NULL, skb, indev, NULL,
|
|
br_netif_receive_skb);
|
|
}
|
|
|
|
static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
|
|
u16 vid, struct net_bridge_port *p)
|
|
{
|
|
struct net_device *dev = br->dev;
|
|
struct neighbour *n;
|
|
struct arphdr *parp;
|
|
u8 *arpptr, *sha;
|
|
__be32 sip, tip;
|
|
|
|
BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
|
|
|
|
if (dev->flags & IFF_NOARP)
|
|
return;
|
|
|
|
if (!pskb_may_pull(skb, arp_hdr_len(dev))) {
|
|
dev->stats.tx_dropped++;
|
|
return;
|
|
}
|
|
parp = arp_hdr(skb);
|
|
|
|
if (parp->ar_pro != htons(ETH_P_IP) ||
|
|
parp->ar_op != htons(ARPOP_REQUEST) ||
|
|
parp->ar_hln != dev->addr_len ||
|
|
parp->ar_pln != 4)
|
|
return;
|
|
|
|
arpptr = (u8 *)parp + sizeof(struct arphdr);
|
|
sha = arpptr;
|
|
arpptr += dev->addr_len; /* sha */
|
|
memcpy(&sip, arpptr, sizeof(sip));
|
|
arpptr += sizeof(sip);
|
|
arpptr += dev->addr_len; /* tha */
|
|
memcpy(&tip, arpptr, sizeof(tip));
|
|
|
|
if (ipv4_is_loopback(tip) ||
|
|
ipv4_is_multicast(tip))
|
|
return;
|
|
|
|
n = neigh_lookup(&arp_tbl, &tip, dev);
|
|
if (n) {
|
|
struct net_bridge_fdb_entry *f;
|
|
|
|
if (!(n->nud_state & NUD_VALID)) {
|
|
neigh_release(n);
|
|
return;
|
|
}
|
|
|
|
f = __br_fdb_get(br, n->ha, vid);
|
|
if (f && ((p->flags & BR_PROXYARP) ||
|
|
(f->dst && (f->dst->flags & BR_PROXYARP_WIFI)))) {
|
|
arp_send(ARPOP_REPLY, ETH_P_ARP, sip, skb->dev, tip,
|
|
sha, n->ha, sha);
|
|
BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
|
|
}
|
|
|
|
neigh_release(n);
|
|
}
|
|
}
|
|
|
|
/* note: already called with rcu_read_lock */
|
|
int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
|
|
{
|
|
bool local_rcv = false, mcast_hit = false, unicast = true;
|
|
struct net_bridge_port *p = br_port_get_rcu(skb->dev);
|
|
const unsigned char *dest = eth_hdr(skb)->h_dest;
|
|
struct net_bridge_fdb_entry *dst = NULL;
|
|
struct net_bridge_mdb_entry *mdst;
|
|
struct net_bridge *br;
|
|
u16 vid = 0;
|
|
|
|
if (!p || p->state == BR_STATE_DISABLED)
|
|
goto drop;
|
|
|
|
if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid))
|
|
goto out;
|
|
|
|
nbp_switchdev_frame_mark(p, skb);
|
|
|
|
/* insert into forwarding database after filtering to avoid spoofing */
|
|
br = p->br;
|
|
if (p->flags & BR_LEARNING)
|
|
br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, false);
|
|
|
|
if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) &&
|
|
br_multicast_rcv(br, p, skb, vid))
|
|
goto drop;
|
|
|
|
if (p->state == BR_STATE_LEARNING)
|
|
goto drop;
|
|
|
|
BR_INPUT_SKB_CB(skb)->brdev = br->dev;
|
|
|
|
local_rcv = !!(br->dev->flags & IFF_PROMISC);
|
|
|
|
if (IS_ENABLED(CONFIG_INET) && skb->protocol == htons(ETH_P_ARP))
|
|
br_do_proxy_arp(skb, br, vid, p);
|
|
|
|
if (is_broadcast_ether_addr(dest)) {
|
|
local_rcv = true;
|
|
unicast = false;
|
|
} else if (is_multicast_ether_addr(dest)) {
|
|
mdst = br_mdb_get(br, skb, vid);
|
|
if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
|
|
br_multicast_querier_exists(br, eth_hdr(skb))) {
|
|
if ((mdst && mdst->mglist) ||
|
|
br_multicast_is_router(br)) {
|
|
local_rcv = true;
|
|
br->dev->stats.multicast++;
|
|
}
|
|
mcast_hit = true;
|
|
} else {
|
|
local_rcv = true;
|
|
br->dev->stats.multicast++;
|
|
}
|
|
unicast = false;
|
|
} else if ((dst = __br_fdb_get(br, dest, vid)) && dst->is_local) {
|
|
/* Do not forward the packet since it's local. */
|
|
return br_pass_frame_up(skb);
|
|
}
|
|
|
|
if (dst) {
|
|
dst->used = jiffies;
|
|
br_forward(dst->dst, skb, local_rcv, false);
|
|
} else {
|
|
if (!mcast_hit)
|
|
br_flood(br, skb, unicast, local_rcv, false);
|
|
else
|
|
br_multicast_flood(mdst, skb, local_rcv, false);
|
|
}
|
|
|
|
if (local_rcv)
|
|
return br_pass_frame_up(skb);
|
|
|
|
out:
|
|
return 0;
|
|
drop:
|
|
kfree_skb(skb);
|
|
goto out;
|
|
}
|
|
EXPORT_SYMBOL_GPL(br_handle_frame_finish);
|
|
|
|
static void __br_handle_local_finish(struct sk_buff *skb)
|
|
{
|
|
struct net_bridge_port *p = br_port_get_rcu(skb->dev);
|
|
u16 vid = 0;
|
|
|
|
/* check if vlan is allowed, to avoid spoofing */
|
|
if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid))
|
|
br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false);
|
|
}
|
|
|
|
/* note: already called with rcu_read_lock */
|
|
static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
|
|
{
|
|
struct net_bridge_port *p = br_port_get_rcu(skb->dev);
|
|
|
|
__br_handle_local_finish(skb);
|
|
|
|
BR_INPUT_SKB_CB(skb)->brdev = p->br->dev;
|
|
br_pass_frame_up(skb);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Return NULL if skb is handled
|
|
* note: already called with rcu_read_lock
|
|
*/
|
|
rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
|
|
{
|
|
struct net_bridge_port *p;
|
|
struct sk_buff *skb = *pskb;
|
|
const unsigned char *dest = eth_hdr(skb)->h_dest;
|
|
br_should_route_hook_t *rhook;
|
|
|
|
if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
|
|
return RX_HANDLER_PASS;
|
|
|
|
if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
|
|
goto drop;
|
|
|
|
skb = skb_share_check(skb, GFP_ATOMIC);
|
|
if (!skb)
|
|
return RX_HANDLER_CONSUMED;
|
|
|
|
p = br_port_get_rcu(skb->dev);
|
|
|
|
if (unlikely(is_link_local_ether_addr(dest))) {
|
|
u16 fwd_mask = p->br->group_fwd_mask_required;
|
|
|
|
/*
|
|
* See IEEE 802.1D Table 7-10 Reserved addresses
|
|
*
|
|
* Assignment Value
|
|
* Bridge Group Address 01-80-C2-00-00-00
|
|
* (MAC Control) 802.3 01-80-C2-00-00-01
|
|
* (Link Aggregation) 802.3 01-80-C2-00-00-02
|
|
* 802.1X PAE address 01-80-C2-00-00-03
|
|
*
|
|
* 802.1AB LLDP 01-80-C2-00-00-0E
|
|
*
|
|
* Others reserved for future standardization
|
|
*/
|
|
switch (dest[5]) {
|
|
case 0x00: /* Bridge Group Address */
|
|
/* If STP is turned off,
|
|
then must forward to keep loop detection */
|
|
if (p->br->stp_enabled == BR_NO_STP ||
|
|
fwd_mask & (1u << dest[5]))
|
|
goto forward;
|
|
*pskb = skb;
|
|
__br_handle_local_finish(skb);
|
|
return RX_HANDLER_PASS;
|
|
|
|
case 0x01: /* IEEE MAC (Pause) */
|
|
goto drop;
|
|
|
|
case 0x0E: /* 802.1AB LLDP */
|
|
fwd_mask |= p->br->group_fwd_mask;
|
|
if (fwd_mask & (1u << dest[5]))
|
|
goto forward;
|
|
*pskb = skb;
|
|
__br_handle_local_finish(skb);
|
|
return RX_HANDLER_PASS;
|
|
|
|
default:
|
|
/* Allow selective forwarding for most other protocols */
|
|
fwd_mask |= p->br->group_fwd_mask;
|
|
if (fwd_mask & (1u << dest[5]))
|
|
goto forward;
|
|
}
|
|
|
|
/* Deliver packet to local host only */
|
|
NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(skb->dev),
|
|
NULL, skb, skb->dev, NULL, br_handle_local_finish);
|
|
return RX_HANDLER_CONSUMED;
|
|
}
|
|
|
|
forward:
|
|
switch (p->state) {
|
|
case BR_STATE_FORWARDING:
|
|
rhook = rcu_dereference(br_should_route_hook);
|
|
if (rhook) {
|
|
if ((*rhook)(skb)) {
|
|
*pskb = skb;
|
|
return RX_HANDLER_PASS;
|
|
}
|
|
dest = eth_hdr(skb)->h_dest;
|
|
}
|
|
/* fall through */
|
|
case BR_STATE_LEARNING:
|
|
if (ether_addr_equal(p->br->dev->dev_addr, dest))
|
|
skb->pkt_type = PACKET_HOST;
|
|
|
|
NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING,
|
|
dev_net(skb->dev), NULL, skb, skb->dev, NULL,
|
|
br_handle_frame_finish);
|
|
break;
|
|
default:
|
|
drop:
|
|
kfree_skb(skb);
|
|
}
|
|
return RX_HANDLER_CONSUMED;
|
|
}
|