linux/net/bridge/br_private.h

815 lines
22 KiB
C
Raw Normal View History

/*
* Linux ethernet bridge
*
* Authors:
* Lennert Buytenhek <buytenh@gnu.org>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#ifndef _BR_PRIVATE_H
#define _BR_PRIVATE_H
#include <linux/netdevice.h>
#include <linux/if_bridge.h>
#include <linux/netpoll.h>
#include <linux/u64_stats_sync.h>
#include <net/route.h>
#include <linux/if_vlan.h>
#define BR_HASH_BITS 8
#define BR_HASH_SIZE (1 << BR_HASH_BITS)
#define BR_HOLD_TIME (1*HZ)
#define BR_PORT_BITS 10
#define BR_MAX_PORTS (1<<BR_PORT_BITS)
#define BR_VLAN_BITMAP_LEN BITS_TO_LONGS(VLAN_N_VID)
#define BR_VERSION "2.3"
/* Control of forwarding link local multicast */
#define BR_GROUPFWD_DEFAULT 0
/* Don't allow forwarding control protocols like STP and LLDP */
#define BR_GROUPFWD_RESTRICTED 0x4007u
/* Path to usermode spanning tree program */
#define BR_STP_PROG "/sbin/bridge-stp"
typedef struct bridge_id bridge_id;
typedef struct mac_addr mac_addr;
typedef __u16 port_id;
struct bridge_id
{
unsigned char prio[2];
unsigned char addr[6];
};
struct mac_addr
{
unsigned char addr[6];
};
struct br_ip
{
union {
__be32 ip4;
#if IS_ENABLED(CONFIG_IPV6)
struct in6_addr ip6;
#endif
} u;
__be16 proto;
__u16 vid;
};
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
/* our own querier */
struct bridge_mcast_query {
struct timer_list timer;
u32 startup_sent;
};
/* other querier */
struct bridge_mcast_querier {
struct timer_list timer;
unsigned long delay_time;
};
#endif
struct net_port_vlans {
u16 port_idx;
u16 pvid;
union {
struct net_bridge_port *port;
struct net_bridge *br;
} parent;
struct rcu_head rcu;
unsigned long vlan_bitmap[BR_VLAN_BITMAP_LEN];
unsigned long untagged_bitmap[BR_VLAN_BITMAP_LEN];
u16 num_vlans;
};
struct net_bridge_fdb_entry
{
struct hlist_node hlist;
struct net_bridge_port *dst;
struct rcu_head rcu;
unsigned long updated;
unsigned long used;
mac_addr addr;
unsigned char is_local;
unsigned char is_static;
__u16 vlan_id;
};
struct net_bridge_port_group {
struct net_bridge_port *port;
struct net_bridge_port_group __rcu *next;
struct hlist_node mglist;
struct rcu_head rcu;
struct timer_list timer;
struct br_ip addr;
unsigned char state;
};
struct net_bridge_mdb_entry
{
struct hlist_node hlist[2];
struct net_bridge *br;
struct net_bridge_port_group __rcu *ports;
struct rcu_head rcu;
struct timer_list timer;
struct br_ip addr;
bool mglist;
bool timer_armed;
};
struct net_bridge_mdb_htable
{
struct hlist_head *mhash;
struct rcu_head rcu;
struct net_bridge_mdb_htable *old;
u32 size;
u32 max;
u32 secret;
u32 ver;
};
struct net_bridge_port
{
struct net_bridge *br;
struct net_device *dev;
struct list_head list;
/* STP */
u8 priority;
u8 state;
u16 port_no;
unsigned char topology_change_ack;
unsigned char config_pending;
port_id port_id;
port_id designated_port;
bridge_id designated_root;
bridge_id designated_bridge;
u32 path_cost;
u32 designated_cost;
unsigned long designated_age;
struct timer_list forward_delay_timer;
struct timer_list hold_timer;
struct timer_list message_age_timer;
struct kobject kobj;
struct rcu_head rcu;
unsigned long flags;
#define BR_HAIRPIN_MODE 0x00000001
#define BR_BPDU_GUARD 0x00000002
#define BR_ROOT_BLOCK 0x00000004
#define BR_MULTICAST_FAST_LEAVE 0x00000008
#define BR_ADMIN_COST 0x00000010
#define BR_LEARNING 0x00000020
#define BR_FLOOD 0x00000040
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
struct bridge_mcast_query ip4_query;
#if IS_ENABLED(CONFIG_IPV6)
struct bridge_mcast_query ip6_query;
#endif /* IS_ENABLED(CONFIG_IPV6) */
unsigned char multicast_router;
struct timer_list multicast_router_timer;
struct hlist_head mglist;
struct hlist_node rlist;
#endif
#ifdef CONFIG_SYSFS
char sysfs_name[IFNAMSIZ];
#endif
#ifdef CONFIG_NET_POLL_CONTROLLER
struct netpoll *np;
#endif
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
struct net_port_vlans __rcu *vlan_info;
#endif
};
#define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT)
static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev)
{
net: set and query VEB/VEPA bridge mode via PF_BRIDGE Hardware switches may support enabling and disabling the loopback switch which puts the device in a VEPA mode defined in the IEEE 802.1Qbg specification. In this mode frames are not switched in the hardware but sent directly to the switch. SR-IOV capable NICs will likely support this mode I am aware of at least two such devices. Also I am told (but don't have any of this hardware available) that there are devices that only support VEPA modes. In these cases it is important at a minimum to be able to query these attributes. This patch adds an additional IFLA_BRIDGE_MODE attribute that can be set and dumped via the PF_BRIDGE:{SET|GET}LINK operations. Also anticipating bridge attributes that may be common for both embedded bridges and software bridges this adds a flags attribute IFLA_BRIDGE_FLAGS currently used to determine if the command or event is being generated to/from an embedded bridge or software bridge. Finally, the event generation is pulled out of the bridge module and into rtnetlink proper. For example using the macvlan driver in VEPA mode on top of an embedded switch requires putting the embedded switch into a VEPA mode to get the expected results. -------- -------- | VEPA | | VEPA | <-- macvlan vepa edge relays -------- -------- | | | | ------------------ | VEPA | <-- embedded switch in NIC ------------------ | | ------------------- | external switch | <-- shiny new physical ------------------- switch with VEPA support A packet sent from the macvlan VEPA at the top could be loopbacked on the embedded switch and never seen by the external switch. So in order for this to work the embedded switch needs to be set in the VEPA state via the above described commands. By making these attributes nested in IFLA_AF_SPEC we allow future extensions to be made as needed. CC: Lennert Buytenhek <buytenh@wantstofly.org> CC: Stephen Hemminger <shemminger@vyatta.com> Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-10-24 08:13:03 +00:00
struct net_bridge_port *port =
rcu_dereference_rtnl(dev->rx_handler_data);
return br_port_exists(dev) ? port : NULL;
}
static inline struct net_bridge_port *br_port_get_rtnl(struct net_device *dev)
{
return br_port_exists(dev) ?
rtnl_dereference(dev->rx_handler_data) : NULL;
}
struct br_cpu_netstats {
u64 rx_packets;
u64 rx_bytes;
u64 tx_packets;
u64 tx_bytes;
struct u64_stats_sync syncp;
};
struct net_bridge
{
spinlock_t lock;
struct list_head port_list;
struct net_device *dev;
struct br_cpu_netstats __percpu *stats;
spinlock_t hash_lock;
struct hlist_head hash[BR_HASH_SIZE];
#ifdef CONFIG_BRIDGE_NETFILTER
struct rtable fake_rtable;
bool nf_call_iptables;
bool nf_call_ip6tables;
bool nf_call_arptables;
#endif
u16 group_fwd_mask;
/* STP */
bridge_id designated_root;
bridge_id bridge_id;
u32 root_path_cost;
unsigned long max_age;
unsigned long hello_time;
unsigned long forward_delay;
unsigned long bridge_max_age;
unsigned long ageing_time;
unsigned long bridge_hello_time;
unsigned long bridge_forward_delay;
u8 group_addr[ETH_ALEN];
u16 root_port;
enum {
BR_NO_STP, /* no spanning tree */
BR_KERNEL_STP, /* old STP in kernel */
BR_USER_STP, /* new RSTP in userspace */
} stp_enabled;
unsigned char topology_change;
unsigned char topology_change_detected;
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
unsigned char multicast_router;
u8 multicast_disabled:1;
u8 multicast_querier:1;
u8 multicast_query_use_ifaddr:1;
u32 hash_elasticity;
u32 hash_max;
u32 multicast_last_member_count;
u32 multicast_startup_query_count;
unsigned long multicast_last_member_interval;
unsigned long multicast_membership_interval;
unsigned long multicast_querier_interval;
unsigned long multicast_query_interval;
unsigned long multicast_query_response_interval;
unsigned long multicast_startup_query_interval;
spinlock_t multicast_lock;
struct net_bridge_mdb_htable __rcu *mdb;
struct hlist_head router_list;
struct timer_list multicast_router_timer;
struct bridge_mcast_querier ip4_querier;
struct bridge_mcast_query ip4_query;
#if IS_ENABLED(CONFIG_IPV6)
struct bridge_mcast_querier ip6_querier;
struct bridge_mcast_query ip6_query;
#endif /* IS_ENABLED(CONFIG_IPV6) */
#endif
struct timer_list hello_timer;
struct timer_list tcn_timer;
struct timer_list topology_change_timer;
struct timer_list gc_timer;
struct kobject *ifobj;
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
u8 vlan_enabled;
struct net_port_vlans __rcu *vlan_info;
#endif
};
struct br_input_skb_cb {
struct net_device *brdev;
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
int igmp;
int mrouters_only;
#endif
};
#define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb)
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
# define BR_INPUT_SKB_CB_MROUTERS_ONLY(__skb) (BR_INPUT_SKB_CB(__skb)->mrouters_only)
#else
# define BR_INPUT_SKB_CB_MROUTERS_ONLY(__skb) (0)
#endif
#define br_printk(level, br, format, args...) \
printk(level "%s: " format, (br)->dev->name, ##args)
#define br_err(__br, format, args...) \
br_printk(KERN_ERR, __br, format, ##args)
#define br_warn(__br, format, args...) \
br_printk(KERN_WARNING, __br, format, ##args)
#define br_notice(__br, format, args...) \
br_printk(KERN_NOTICE, __br, format, ##args)
#define br_info(__br, format, args...) \
br_printk(KERN_INFO, __br, format, ##args)
#define br_debug(br, format, args...) \
pr_debug("%s: " format, (br)->dev->name, ##args)
extern struct notifier_block br_device_notifier;
/* called under bridge lock */
static inline int br_is_root_bridge(const struct net_bridge *br)
{
return !memcmp(&br->bridge_id, &br->designated_root, 8);
}
/* br_device.c */
extern void br_dev_setup(struct net_device *dev);
extern void br_dev_delete(struct net_device *dev, struct list_head *list);
extern netdev_tx_t br_dev_xmit(struct sk_buff *skb,
struct net_device *dev);
#ifdef CONFIG_NET_POLL_CONTROLLER
static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
struct sk_buff *skb)
{
struct netpoll *np = p->np;
if (np)
netpoll_send_skb(np, skb);
}
extern int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp);
extern void br_netpoll_disable(struct net_bridge_port *p);
#else
static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
struct sk_buff *skb)
{
}
static inline int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
{
return 0;
}
static inline void br_netpoll_disable(struct net_bridge_port *p)
{
}
#endif
/* br_fdb.c */
extern int br_fdb_init(void);
extern void br_fdb_fini(void);
extern void br_fdb_flush(struct net_bridge *br);
extern void br_fdb_changeaddr(struct net_bridge_port *p,
const unsigned char *newaddr);
extern void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr);
extern void br_fdb_cleanup(unsigned long arg);
extern void br_fdb_delete_by_port(struct net_bridge *br,
const struct net_bridge_port *p, int do_all);
extern struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
const unsigned char *addr,
__u16 vid);
extern int br_fdb_test_addr(struct net_device *dev, unsigned char *addr);
extern int br_fdb_fillbuf(struct net_bridge *br, void *buf,
unsigned long count, unsigned long off);
extern int br_fdb_insert(struct net_bridge *br,
struct net_bridge_port *source,
const unsigned char *addr,
u16 vid);
extern void br_fdb_update(struct net_bridge *br,
struct net_bridge_port *source,
const unsigned char *addr,
u16 vid);
extern int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, u16 vid);
net: add generic PF_BRIDGE:RTM_ FDB hooks This adds two new flags NTF_MASTER and NTF_SELF that can now be used to specify where PF_BRIDGE netlink commands should be sent. NTF_MASTER sends the commands to the 'dev->master' device for parsing. Typically this will be the linux net/bridge, or open-vswitch devices. Also without any flags set the command will be handled by the master device as well so that current user space tools continue to work as expected. The NTF_SELF flag will push the PF_BRIDGE commands to the device. In the basic example below the commands are then parsed and programmed in the embedded bridge. Note if both NTF_SELF and NTF_MASTER bits are set then the command will be sent to both 'dev->master' and 'dev' this allows user space to easily keep the embedded bridge and software bridge in sync. There is a slight complication in the case with both flags set when an error occurs. To resolve this the rtnl handler clears the NTF_ flag in the netlink ack to indicate which sets completed successfully. The add/del handlers will abort as soon as any error occurs. To support this new net device ops were added to call into the device and the existing bridging code was refactored to use these. There should be no required changes in user space to support the current bridge behavior. A basic setup with a SR-IOV enabled NIC looks like this, veth0 veth2 | | ------------ | bridge0 | <---- software bridging ------------ / / ethx.y ethx VF PF \ \ <---- propagate FDB entries to HW \ \ -------------------- | Embedded Bridge | <---- hardware offloaded switching -------------------- In this case the embedded bridge must be managed to allow 'veth0' to communicate with 'ethx.y' correctly. At present drivers managing the embedded bridge either send frames onto the network which then get dropped by the switch OR the embedded bridge will flood these frames. With this patch we have a mechanism to manage the embedded bridge correctly from user space. This example is specific to SR-IOV but replacing the VF with another PF or dropping this into the DSA framework generates similar management issues. Examples session using the 'br'[1] tool to add, dump and then delete a mac address with a new "embedded" option and enabled ixgbe driver: # br fdb add 22:35:19:ac:60:59 dev eth3 # br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static #br fdb add 22:35:19:ac:60:59 embedded dev eth3 #br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static eth3 22:35:19:ac:60:59 local embedded #br fdb del 22:35:19:ac:60:59 embedded dev eth3 I added a couple lines to 'br' to set the flags correctly is all. It is my opinion that the merit of this patch is now embedded and SW bridges can both be modeled correctly in user space using very nearly the same message passing. [1] 'br' tool was published as an RFC here and will be renamed 'bridge' http://patchwork.ozlabs.org/patch/117664/ Thanks to Jamal Hadi Salim, Stephen Hemminger and Ben Hutchings for valuable feedback, suggestions, and review. v2: fixed api descriptions and error case with both NTF_SELF and NTF_MASTER set plus updated patch description. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-04-15 06:43:56 +00:00
extern int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
net: add generic PF_BRIDGE:RTM_ FDB hooks This adds two new flags NTF_MASTER and NTF_SELF that can now be used to specify where PF_BRIDGE netlink commands should be sent. NTF_MASTER sends the commands to the 'dev->master' device for parsing. Typically this will be the linux net/bridge, or open-vswitch devices. Also without any flags set the command will be handled by the master device as well so that current user space tools continue to work as expected. The NTF_SELF flag will push the PF_BRIDGE commands to the device. In the basic example below the commands are then parsed and programmed in the embedded bridge. Note if both NTF_SELF and NTF_MASTER bits are set then the command will be sent to both 'dev->master' and 'dev' this allows user space to easily keep the embedded bridge and software bridge in sync. There is a slight complication in the case with both flags set when an error occurs. To resolve this the rtnl handler clears the NTF_ flag in the netlink ack to indicate which sets completed successfully. The add/del handlers will abort as soon as any error occurs. To support this new net device ops were added to call into the device and the existing bridging code was refactored to use these. There should be no required changes in user space to support the current bridge behavior. A basic setup with a SR-IOV enabled NIC looks like this, veth0 veth2 | | ------------ | bridge0 | <---- software bridging ------------ / / ethx.y ethx VF PF \ \ <---- propagate FDB entries to HW \ \ -------------------- | Embedded Bridge | <---- hardware offloaded switching -------------------- In this case the embedded bridge must be managed to allow 'veth0' to communicate with 'ethx.y' correctly. At present drivers managing the embedded bridge either send frames onto the network which then get dropped by the switch OR the embedded bridge will flood these frames. With this patch we have a mechanism to manage the embedded bridge correctly from user space. This example is specific to SR-IOV but replacing the VF with another PF or dropping this into the DSA framework generates similar management issues. Examples session using the 'br'[1] tool to add, dump and then delete a mac address with a new "embedded" option and enabled ixgbe driver: # br fdb add 22:35:19:ac:60:59 dev eth3 # br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static #br fdb add 22:35:19:ac:60:59 embedded dev eth3 #br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static eth3 22:35:19:ac:60:59 local embedded #br fdb del 22:35:19:ac:60:59 embedded dev eth3 I added a couple lines to 'br' to set the flags correctly is all. It is my opinion that the merit of this patch is now embedded and SW bridges can both be modeled correctly in user space using very nearly the same message passing. [1] 'br' tool was published as an RFC here and will be renamed 'bridge' http://patchwork.ozlabs.org/patch/117664/ Thanks to Jamal Hadi Salim, Stephen Hemminger and Ben Hutchings for valuable feedback, suggestions, and review. v2: fixed api descriptions and error case with both NTF_SELF and NTF_MASTER set plus updated patch description. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-04-15 06:43:56 +00:00
struct net_device *dev,
const unsigned char *addr);
extern int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[],
net: add generic PF_BRIDGE:RTM_ FDB hooks This adds two new flags NTF_MASTER and NTF_SELF that can now be used to specify where PF_BRIDGE netlink commands should be sent. NTF_MASTER sends the commands to the 'dev->master' device for parsing. Typically this will be the linux net/bridge, or open-vswitch devices. Also without any flags set the command will be handled by the master device as well so that current user space tools continue to work as expected. The NTF_SELF flag will push the PF_BRIDGE commands to the device. In the basic example below the commands are then parsed and programmed in the embedded bridge. Note if both NTF_SELF and NTF_MASTER bits are set then the command will be sent to both 'dev->master' and 'dev' this allows user space to easily keep the embedded bridge and software bridge in sync. There is a slight complication in the case with both flags set when an error occurs. To resolve this the rtnl handler clears the NTF_ flag in the netlink ack to indicate which sets completed successfully. The add/del handlers will abort as soon as any error occurs. To support this new net device ops were added to call into the device and the existing bridging code was refactored to use these. There should be no required changes in user space to support the current bridge behavior. A basic setup with a SR-IOV enabled NIC looks like this, veth0 veth2 | | ------------ | bridge0 | <---- software bridging ------------ / / ethx.y ethx VF PF \ \ <---- propagate FDB entries to HW \ \ -------------------- | Embedded Bridge | <---- hardware offloaded switching -------------------- In this case the embedded bridge must be managed to allow 'veth0' to communicate with 'ethx.y' correctly. At present drivers managing the embedded bridge either send frames onto the network which then get dropped by the switch OR the embedded bridge will flood these frames. With this patch we have a mechanism to manage the embedded bridge correctly from user space. This example is specific to SR-IOV but replacing the VF with another PF or dropping this into the DSA framework generates similar management issues. Examples session using the 'br'[1] tool to add, dump and then delete a mac address with a new "embedded" option and enabled ixgbe driver: # br fdb add 22:35:19:ac:60:59 dev eth3 # br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static #br fdb add 22:35:19:ac:60:59 embedded dev eth3 #br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static eth3 22:35:19:ac:60:59 local embedded #br fdb del 22:35:19:ac:60:59 embedded dev eth3 I added a couple lines to 'br' to set the flags correctly is all. It is my opinion that the merit of this patch is now embedded and SW bridges can both be modeled correctly in user space using very nearly the same message passing. [1] 'br' tool was published as an RFC here and will be renamed 'bridge' http://patchwork.ozlabs.org/patch/117664/ Thanks to Jamal Hadi Salim, Stephen Hemminger and Ben Hutchings for valuable feedback, suggestions, and review. v2: fixed api descriptions and error case with both NTF_SELF and NTF_MASTER set plus updated patch description. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-04-15 06:43:56 +00:00
struct net_device *dev,
const unsigned char *addr,
net: add generic PF_BRIDGE:RTM_ FDB hooks This adds two new flags NTF_MASTER and NTF_SELF that can now be used to specify where PF_BRIDGE netlink commands should be sent. NTF_MASTER sends the commands to the 'dev->master' device for parsing. Typically this will be the linux net/bridge, or open-vswitch devices. Also without any flags set the command will be handled by the master device as well so that current user space tools continue to work as expected. The NTF_SELF flag will push the PF_BRIDGE commands to the device. In the basic example below the commands are then parsed and programmed in the embedded bridge. Note if both NTF_SELF and NTF_MASTER bits are set then the command will be sent to both 'dev->master' and 'dev' this allows user space to easily keep the embedded bridge and software bridge in sync. There is a slight complication in the case with both flags set when an error occurs. To resolve this the rtnl handler clears the NTF_ flag in the netlink ack to indicate which sets completed successfully. The add/del handlers will abort as soon as any error occurs. To support this new net device ops were added to call into the device and the existing bridging code was refactored to use these. There should be no required changes in user space to support the current bridge behavior. A basic setup with a SR-IOV enabled NIC looks like this, veth0 veth2 | | ------------ | bridge0 | <---- software bridging ------------ / / ethx.y ethx VF PF \ \ <---- propagate FDB entries to HW \ \ -------------------- | Embedded Bridge | <---- hardware offloaded switching -------------------- In this case the embedded bridge must be managed to allow 'veth0' to communicate with 'ethx.y' correctly. At present drivers managing the embedded bridge either send frames onto the network which then get dropped by the switch OR the embedded bridge will flood these frames. With this patch we have a mechanism to manage the embedded bridge correctly from user space. This example is specific to SR-IOV but replacing the VF with another PF or dropping this into the DSA framework generates similar management issues. Examples session using the 'br'[1] tool to add, dump and then delete a mac address with a new "embedded" option and enabled ixgbe driver: # br fdb add 22:35:19:ac:60:59 dev eth3 # br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static #br fdb add 22:35:19:ac:60:59 embedded dev eth3 #br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static eth3 22:35:19:ac:60:59 local embedded #br fdb del 22:35:19:ac:60:59 embedded dev eth3 I added a couple lines to 'br' to set the flags correctly is all. It is my opinion that the merit of this patch is now embedded and SW bridges can both be modeled correctly in user space using very nearly the same message passing. [1] 'br' tool was published as an RFC here and will be renamed 'bridge' http://patchwork.ozlabs.org/patch/117664/ Thanks to Jamal Hadi Salim, Stephen Hemminger and Ben Hutchings for valuable feedback, suggestions, and review. v2: fixed api descriptions and error case with both NTF_SELF and NTF_MASTER set plus updated patch description. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-04-15 06:43:56 +00:00
u16 nlh_flags);
extern int br_fdb_dump(struct sk_buff *skb,
struct netlink_callback *cb,
struct net_device *dev,
int idx);
/* br_forward.c */
extern void br_deliver(const struct net_bridge_port *to,
struct sk_buff *skb);
extern int br_dev_queue_push_xmit(struct sk_buff *skb);
extern void br_forward(const struct net_bridge_port *to,
struct sk_buff *skb, struct sk_buff *skb0);
extern int br_forward_finish(struct sk_buff *skb);
extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb,
bool unicast);
extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb,
struct sk_buff *skb2, bool unicast);
/* br_if.c */
extern void br_port_carrier_check(struct net_bridge_port *p);
extern int br_add_bridge(struct net *net, const char *name);
extern int br_del_bridge(struct net *net, const char *name);
extern void br_net_exit(struct net *net);
extern int br_add_if(struct net_bridge *br,
struct net_device *dev);
extern int br_del_if(struct net_bridge *br,
struct net_device *dev);
extern int br_min_mtu(const struct net_bridge *br);
extern netdev_features_t br_features_recompute(struct net_bridge *br,
netdev_features_t features);
/* br_input.c */
extern int br_handle_frame_finish(struct sk_buff *skb);
extern rx_handler_result_t br_handle_frame(struct sk_buff **pskb);
/* br_ioctl.c */
extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
[NET]: Make the device list and device lookups per namespace. This patch makes most of the generic device layer network namespace safe. This patch makes dev_base_head a network namespace variable, and then it picks up a few associated variables. The functions: dev_getbyhwaddr dev_getfirsthwbytype dev_get_by_flags dev_get_by_name __dev_get_by_name dev_get_by_index __dev_get_by_index dev_ioctl dev_ethtool dev_load wireless_process_ioctl were modified to take a network namespace argument, and deal with it. vlan_ioctl_set and brioctl_set were modified so their hooks will receive a network namespace argument. So basically anthing in the core of the network stack that was affected to by the change of dev_base was modified to handle multiple network namespaces. The rest of the network stack was simply modified to explicitly use &init_net the initial network namespace. This can be fixed when those components of the network stack are modified to handle multiple network namespaces. For now the ifindex generator is left global. Fundametally ifindex numbers are per namespace, or else we will have corner case problems with migration when we get that far. At the same time there are assumptions in the network stack that the ifindex of a network device won't change. Making the ifindex number global seems a good compromise until the network stack can cope with ifindex changes when you change namespaces, and the like. Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2007-09-17 18:56:21 +00:00
extern int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *arg);
/* br_multicast.c */
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
extern unsigned int br_mdb_rehash_seq;
extern int br_multicast_rcv(struct net_bridge *br,
struct net_bridge_port *port,
struct sk_buff *skb);
extern struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
struct sk_buff *skb, u16 vid);
extern void br_multicast_add_port(struct net_bridge_port *port);
extern void br_multicast_del_port(struct net_bridge_port *port);
extern void br_multicast_enable_port(struct net_bridge_port *port);
extern void br_multicast_disable_port(struct net_bridge_port *port);
extern void br_multicast_init(struct net_bridge *br);
extern void br_multicast_open(struct net_bridge *br);
extern void br_multicast_stop(struct net_bridge *br);
extern void br_multicast_deliver(struct net_bridge_mdb_entry *mdst,
struct sk_buff *skb);
extern void br_multicast_forward(struct net_bridge_mdb_entry *mdst,
struct sk_buff *skb, struct sk_buff *skb2);
extern int br_multicast_set_router(struct net_bridge *br, unsigned long val);
extern int br_multicast_set_port_router(struct net_bridge_port *p,
unsigned long val);
extern int br_multicast_toggle(struct net_bridge *br, unsigned long val);
extern int br_multicast_set_querier(struct net_bridge *br, unsigned long val);
extern int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val);
extern struct net_bridge_mdb_entry *br_mdb_ip_get(
struct net_bridge_mdb_htable *mdb,
struct br_ip *dst);
extern struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br,
struct net_bridge_port *port, struct br_ip *group);
extern void br_multicast_free_pg(struct rcu_head *head);
extern struct net_bridge_port_group *br_multicast_new_port_group(
struct net_bridge_port *port,
struct br_ip *group,
struct net_bridge_port_group __rcu *next,
unsigned char state);
extern void br_mdb_init(void);
extern void br_mdb_uninit(void);
extern void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
struct br_ip *group, int type);
#define mlock_dereference(X, br) \
rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
static inline bool br_multicast_is_router(struct net_bridge *br)
{
return br->multicast_router == 2 ||
(br->multicast_router == 1 &&
timer_pending(&br->multicast_router_timer));
}
static inline bool
__br_multicast_querier_exists(struct net_bridge *br,
struct bridge_mcast_querier *querier)
{
return time_is_before_jiffies(querier->delay_time) &&
(br->multicast_querier || timer_pending(&querier->timer));
}
static inline bool br_multicast_querier_exists(struct net_bridge *br,
struct ethhdr *eth)
{
switch (eth->h_proto) {
case (htons(ETH_P_IP)):
return __br_multicast_querier_exists(br, &br->ip4_querier);
#if IS_ENABLED(CONFIG_IPV6)
case (htons(ETH_P_IPV6)):
return __br_multicast_querier_exists(br, &br->ip6_querier);
#endif
default:
return false;
}
}
#else
static inline int br_multicast_rcv(struct net_bridge *br,
struct net_bridge_port *port,
struct sk_buff *skb)
{
return 0;
}
static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
struct sk_buff *skb, u16 vid)
{
return NULL;
}
static inline void br_multicast_add_port(struct net_bridge_port *port)
{
}
static inline void br_multicast_del_port(struct net_bridge_port *port)
{
}
static inline void br_multicast_enable_port(struct net_bridge_port *port)
{
}
static inline void br_multicast_disable_port(struct net_bridge_port *port)
{
}
static inline void br_multicast_init(struct net_bridge *br)
{
}
static inline void br_multicast_open(struct net_bridge *br)
{
}
static inline void br_multicast_stop(struct net_bridge *br)
{
}
static inline void br_multicast_deliver(struct net_bridge_mdb_entry *mdst,
struct sk_buff *skb)
{
}
static inline void br_multicast_forward(struct net_bridge_mdb_entry *mdst,
struct sk_buff *skb,
struct sk_buff *skb2)
{
}
static inline bool br_multicast_is_router(struct net_bridge *br)
{
return 0;
}
static inline bool br_multicast_querier_exists(struct net_bridge *br,
struct ethhdr *eth)
{
return false;
}
static inline void br_mdb_init(void)
{
}
static inline void br_mdb_uninit(void)
{
}
#endif
/* br_vlan.c */
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
extern bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
struct sk_buff *skb, u16 *vid);
extern bool br_allowed_egress(struct net_bridge *br,
const struct net_port_vlans *v,
const struct sk_buff *skb);
extern struct sk_buff *br_handle_vlan(struct net_bridge *br,
const struct net_port_vlans *v,
struct sk_buff *skb);
extern int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags);
extern int br_vlan_delete(struct net_bridge *br, u16 vid);
extern void br_vlan_flush(struct net_bridge *br);
extern int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
extern int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags);
extern int nbp_vlan_delete(struct net_bridge_port *port, u16 vid);
extern void nbp_vlan_flush(struct net_bridge_port *port);
extern bool nbp_vlan_find(struct net_bridge_port *port, u16 vid);
static inline struct net_port_vlans *br_get_vlan_info(
const struct net_bridge *br)
{
return rcu_dereference_rtnl(br->vlan_info);
}
static inline struct net_port_vlans *nbp_get_vlan_info(
const struct net_bridge_port *p)
{
return rcu_dereference_rtnl(p->vlan_info);
}
/* Since bridge now depends on 8021Q module, but the time bridge sees the
* skb, the vlan tag will always be present if the frame was tagged.
*/
static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid)
{
int err = 0;
if (vlan_tx_tag_present(skb))
*vid = vlan_tx_tag_get(skb) & VLAN_VID_MASK;
else {
*vid = 0;
err = -EINVAL;
}
return err;
}
static inline u16 br_get_pvid(const struct net_port_vlans *v)
{
/* Return just the VID if it is set, or VLAN_N_VID (invalid vid) if
* vid wasn't set
*/
smp_rmb();
return (v->pvid & VLAN_TAG_PRESENT) ?
(v->pvid & ~VLAN_TAG_PRESENT) :
VLAN_N_VID;
}
#else
static inline bool br_allowed_ingress(struct net_bridge *br,
struct net_port_vlans *v,
struct sk_buff *skb,
u16 *vid)
{
return true;
}
static inline bool br_allowed_egress(struct net_bridge *br,
const struct net_port_vlans *v,
const struct sk_buff *skb)
{
return true;
}
static inline struct sk_buff *br_handle_vlan(struct net_bridge *br,
const struct net_port_vlans *v,
struct sk_buff *skb)
{
return skb;
}
static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
{
return -EOPNOTSUPP;
}
static inline int br_vlan_delete(struct net_bridge *br, u16 vid)
{
return -EOPNOTSUPP;
}
static inline void br_vlan_flush(struct net_bridge *br)
{
}
static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
{
return -EOPNOTSUPP;
}
static inline int nbp_vlan_delete(struct net_bridge_port *port, u16 vid)
{
return -EOPNOTSUPP;
}
static inline void nbp_vlan_flush(struct net_bridge_port *port)
{
}
static inline struct net_port_vlans *br_get_vlan_info(
const struct net_bridge *br)
{
return NULL;
}
static inline struct net_port_vlans *nbp_get_vlan_info(
const struct net_bridge_port *p)
{
return NULL;
}
static inline bool nbp_vlan_find(struct net_bridge_port *port, u16 vid)
{
return false;
}
static inline u16 br_vlan_get_tag(const struct sk_buff *skb, u16 *tag)
{
return 0;
}
static inline u16 br_get_pvid(const struct net_port_vlans *v)
{
return VLAN_N_VID; /* Returns invalid vid */
}
#endif
/* br_netfilter.c */
#ifdef CONFIG_BRIDGE_NETFILTER
extern int br_netfilter_init(void);
extern void br_netfilter_fini(void);
extern void br_netfilter_rtable_init(struct net_bridge *);
#else
#define br_netfilter_init() (0)
#define br_netfilter_fini() do { } while(0)
#define br_netfilter_rtable_init(x)
#endif
/* br_stp.c */
extern void br_log_state(const struct net_bridge_port *p);
extern struct net_bridge_port *br_get_port(struct net_bridge *br,
u16 port_no);
extern void br_init_port(struct net_bridge_port *p);
extern void br_become_designated_port(struct net_bridge_port *p);
extern void __br_set_forward_delay(struct net_bridge *br, unsigned long t);
extern int br_set_forward_delay(struct net_bridge *br, unsigned long x);
extern int br_set_hello_time(struct net_bridge *br, unsigned long x);
extern int br_set_max_age(struct net_bridge *br, unsigned long x);
/* br_stp_if.c */
extern void br_stp_enable_bridge(struct net_bridge *br);
extern void br_stp_disable_bridge(struct net_bridge *br);
extern void br_stp_set_enabled(struct net_bridge *br, unsigned long val);
extern void br_stp_enable_port(struct net_bridge_port *p);
extern void br_stp_disable_port(struct net_bridge_port *p);
extern bool br_stp_recalculate_bridge_id(struct net_bridge *br);
extern void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *a);
extern void br_stp_set_bridge_priority(struct net_bridge *br,
u16 newprio);
extern int br_stp_set_port_priority(struct net_bridge_port *p,
unsigned long newprio);
extern int br_stp_set_path_cost(struct net_bridge_port *p,
unsigned long path_cost);
extern ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id);
/* br_stp_bpdu.c */
struct stp_proto;
extern void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb,
struct net_device *dev);
/* br_stp_timer.c */
extern void br_stp_timer_init(struct net_bridge *br);
extern void br_stp_port_timer_init(struct net_bridge_port *p);
extern unsigned long br_timer_value(const struct timer_list *timer);
/* br.c */
#if IS_ENABLED(CONFIG_ATM_LANE)
extern int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr);
#endif
/* br_netlink.c */
extern struct rtnl_link_ops br_link_ops;
extern int br_netlink_init(void);
extern void br_netlink_fini(void);
extern void br_ifinfo_notify(int event, struct net_bridge_port *port);
extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg);
extern int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg);
extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
struct net_device *dev, u32 filter_mask);
#ifdef CONFIG_SYSFS
/* br_sysfs_if.c */
extern const struct sysfs_ops brport_sysfs_ops;
extern int br_sysfs_addif(struct net_bridge_port *p);
extern int br_sysfs_renameif(struct net_bridge_port *p);
/* br_sysfs_br.c */
extern int br_sysfs_addbr(struct net_device *dev);
extern void br_sysfs_delbr(struct net_device *dev);
#else
static inline int br_sysfs_addif(struct net_bridge_port *p) { return 0; }
static inline int br_sysfs_renameif(struct net_bridge_port *p) { return 0; }
static inline int br_sysfs_addbr(struct net_device *dev) { return 0; }
static inline void br_sysfs_delbr(struct net_device *dev) { return; }
#endif /* CONFIG_SYSFS */
#endif