linux/net/bridge/br.c

399 lines
8.6 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Generic parts
* Linux ethernet bridge
*
* Authors:
* Lennert Buytenhek <buytenh@gnu.org>
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/init.h>
#include <linux/llc.h>
#include <net/llc.h>
#include <net/stp.h>
#include <net/switchdev.h>
#include "br_private.h"
/*
* Handle changes in state of network devices enslaved to a bridge.
*
* Note: don't care about up/down if bridge itself is down, because
* port state is checked when bridge is brought up.
*/
static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
{
struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
struct netdev_notifier_pre_changeaddr_info *prechaddr_info;
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net_bridge_port *p;
struct net_bridge *br;
bool notified = false;
bool changed_addr;
int err;
if (dev->priv_flags & IFF_EBRIDGE) {
net: bridge: move default pvid init/deinit to NETDEV_REGISTER/UNREGISTER Most of the bridge device's vlan init bugs come from the fact that its default pvid is created at the wrong time, way too early in ndo_init() before the device is even assigned an ifindex. It introduces a bug when the bridge's dev_addr is added as fdb during the initial default pvid creation the notification has ifindex/NDA_MASTER both equal to 0 (see example below) which really makes no sense for user-space[0] and is wrong. Usually user-space software would ignore such entries, but they are actually valid and will eventually have all necessary attributes. It makes much more sense to send a notification *after* the device has registered and has a proper ifindex allocated rather than before when there's a chance that the registration might still fail or to receive it with ifindex/NDA_MASTER == 0. Note that we can remove the fdb flush from br_vlan_flush() since that case can no longer happen. At NETDEV_REGISTER br->default_pvid is always == 1 as it's initialized by br_vlan_init() before that and at NETDEV_UNREGISTER it can be anything depending why it was called (if called due to NETDEV_REGISTER error it'll still be == 1, otherwise it could be any value changed during the device life time). For the demonstration below a small change to iproute2 for printing all fdb notifications is added, because it contained a workaround not to show entries with ifindex == 0. Command executed while monitoring: $ ip l add br0 type bridge Before (both ifindex and master == 0): $ bridge monitor fdb 36:7e:8a:b3:56:ba dev * vlan 1 master * permanent After (proper br0 ifindex): $ bridge monitor fdb e6:2a:ae:7a:b7:48 dev br0 vlan 1 master br0 permanent v4: move only the default pvid init/deinit to NETDEV_REGISTER/UNREGISTER v3: send the correct v2 patch with all changes (stub should return 0) v2: on error in br_vlan_init set br->vlgrp to NULL and return 0 in the br_vlan_bridge_event stub when bridge vlans are disabled [0] https://bugzilla.kernel.org/show_bug.cgi?id=204389 Reported-by: michael-dev <michael-dev@fami-braun.de> Fixes: 5be5a2df40f0 ("bridge: Add filtering support for default_pvid") Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-08-02 10:57:36 +00:00
err = br_vlan_bridge_event(dev, event, ptr);
if (err)
return notifier_from_errno(err);
if (event == NETDEV_REGISTER) {
/* register of bridge completed, add sysfs entries */
br_sysfs_addbr(dev);
return NOTIFY_DONE;
}
}
/* not a port of a bridge */
p = br_port_get_rtnl(dev);
if (!p)
return NOTIFY_DONE;
br = p->br;
switch (event) {
case NETDEV_CHANGEMTU:
br_mtu_auto_adjust(br);
break;
case NETDEV_PRE_CHANGEADDR:
if (br->dev->addr_assign_type == NET_ADDR_SET)
break;
prechaddr_info = ptr;
err = dev_pre_changeaddr_notify(br->dev,
prechaddr_info->dev_addr,
extack);
if (err)
return notifier_from_errno(err);
break;
case NETDEV_CHANGEADDR:
spin_lock_bh(&br->lock);
br_fdb_changeaddr(p, dev->dev_addr);
changed_addr = br_stp_recalculate_bridge_id(br);
spin_unlock_bh(&br->lock);
if (changed_addr)
call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
break;
case NETDEV_CHANGE:
br_port_carrier_check(p, &notified);
break;
case NETDEV_FEAT_CHANGE:
netdev_update_features(br->dev);
break;
case NETDEV_DOWN:
spin_lock_bh(&br->lock);
if (br->dev->flags & IFF_UP) {
br_stp_disable_port(p);
notified = true;
}
spin_unlock_bh(&br->lock);
break;
case NETDEV_UP:
if (netif_running(br->dev) && netif_oper_up(dev)) {
spin_lock_bh(&br->lock);
br_stp_enable_port(p);
notified = true;
spin_unlock_bh(&br->lock);
}
break;
case NETDEV_UNREGISTER:
br_del_if(br, dev);
break;
case NETDEV_CHANGENAME:
err = br_sysfs_renameif(p);
if (err)
return notifier_from_errno(err);
break;
case NETDEV_PRE_TYPE_CHANGE:
/* Forbid underlaying device to change its type. */
return NOTIFY_BAD;
case NETDEV_RESEND_IGMP:
/* Propagate to master device */
call_netdevice_notifiers(event, br->dev);
break;
}
bridge: Fix possible use-after-free when deleting bridge port When a bridge port is being deleted, do not dereference it later in br_vlan_port_event() as it can result in a use-after-free [1] if the RCU callback was executed before invoking the function. [1] [ 129.638551] ================================================================== [ 129.646904] BUG: KASAN: use-after-free in br_vlan_port_event+0x53c/0x5fd [ 129.654406] Read of size 8 at addr ffff8881e4aa1ae8 by task ip/483 [ 129.663008] CPU: 0 PID: 483 Comm: ip Not tainted 5.1.0-rc5-custom-02265-ga946bd73daac #1383 [ 129.672359] Hardware name: Mellanox Technologies Ltd. MSN2100-CB2FO/SA001017, BIOS 5.6.5 06/07/2016 [ 129.682484] Call Trace: [ 129.685242] dump_stack+0xa9/0x10e [ 129.689068] print_address_description.cold.2+0x9/0x25e [ 129.694930] kasan_report.cold.3+0x78/0x9d [ 129.704420] br_vlan_port_event+0x53c/0x5fd [ 129.728300] br_device_event+0x2c7/0x7a0 [ 129.741505] notifier_call_chain+0xb5/0x1c0 [ 129.746202] rollback_registered_many+0x895/0xe90 [ 129.793119] unregister_netdevice_many+0x48/0x210 [ 129.803384] rtnl_delete_link+0xe1/0x140 [ 129.815906] rtnl_dellink+0x2a3/0x820 [ 129.844166] rtnetlink_rcv_msg+0x397/0x910 [ 129.868517] netlink_rcv_skb+0x137/0x3a0 [ 129.882013] netlink_unicast+0x49b/0x660 [ 129.900019] netlink_sendmsg+0x755/0xc90 [ 129.915758] ___sys_sendmsg+0x761/0x8e0 [ 129.966315] __sys_sendmsg+0xf0/0x1c0 [ 129.988918] do_syscall_64+0xa4/0x470 [ 129.993032] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 129.998696] RIP: 0033:0x7ff578104b58 ... [ 130.073811] Allocated by task 479: [ 130.077633] __kasan_kmalloc.constprop.5+0xc1/0xd0 [ 130.083008] kmem_cache_alloc_trace+0x152/0x320 [ 130.088090] br_add_if+0x39c/0x1580 [ 130.092005] do_set_master+0x1aa/0x210 [ 130.096211] do_setlink+0x985/0x3100 [ 130.100224] __rtnl_newlink+0xc52/0x1380 [ 130.104625] rtnl_newlink+0x6b/0xa0 [ 130.108541] rtnetlink_rcv_msg+0x397/0x910 [ 130.113136] netlink_rcv_skb+0x137/0x3a0 [ 130.117538] netlink_unicast+0x49b/0x660 [ 130.121939] netlink_sendmsg+0x755/0xc90 [ 130.126340] ___sys_sendmsg+0x761/0x8e0 [ 130.130645] __sys_sendmsg+0xf0/0x1c0 [ 130.134753] do_syscall_64+0xa4/0x470 [ 130.138864] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 130.146195] Freed by task 0: [ 130.149421] __kasan_slab_free+0x125/0x170 [ 130.154016] kfree+0xf3/0x310 [ 130.157349] kobject_put+0x1a8/0x4c0 [ 130.161363] rcu_core+0x859/0x19b0 [ 130.165175] __do_softirq+0x250/0xa26 [ 130.170956] The buggy address belongs to the object at ffff8881e4aa1ae8 which belongs to the cache kmalloc-1k of size 1024 [ 130.184972] The buggy address is located 0 bytes inside of 1024-byte region [ffff8881e4aa1ae8, ffff8881e4aa1ee8) Fixes: 9c0ec2e7182a ("bridge: support binding vlan dev link state to vlan member bridge ports") Signed-off-by: Ido Schimmel <idosch@mellanox.com> Cc: Mike Manning <mmanning@vyatta.att-mail.com> Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> Acked-by: Mike Manning <mmanning@vyatta.att-mail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-22 09:33:19 +00:00
if (event != NETDEV_UNREGISTER)
br_vlan_port_event(p, event);
/* Events that may cause spanning tree to refresh */
if (!notified && (event == NETDEV_CHANGEADDR || event == NETDEV_UP ||
event == NETDEV_CHANGE || event == NETDEV_DOWN))
br_ifinfo_notify(RTM_NEWLINK, NULL, p);
return NOTIFY_DONE;
}
static struct notifier_block br_device_notifier = {
.notifier_call = br_device_event
};
/* called with RTNL or RCU */
static int br_switchdev_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
struct net_bridge_port *p;
struct net_bridge *br;
struct switchdev_notifier_fdb_info *fdb_info;
int err = NOTIFY_DONE;
p = br_port_get_rtnl_rcu(dev);
if (!p)
goto out;
br = p->br;
switch (event) {
case SWITCHDEV_FDB_ADD_TO_BRIDGE:
fdb_info = ptr;
err = br_fdb_external_learn_add(br, p, fdb_info->addr,
fdb_info->vid, false);
if (err) {
err = notifier_from_errno(err);
break;
}
br_fdb_offloaded_set(br, p, fdb_info->addr,
fdb_info->vid, true);
break;
case SWITCHDEV_FDB_DEL_TO_BRIDGE:
fdb_info = ptr;
err = br_fdb_external_learn_del(br, p, fdb_info->addr,
fdb_info->vid, false);
if (err)
err = notifier_from_errno(err);
break;
case SWITCHDEV_FDB_OFFLOADED:
fdb_info = ptr;
br_fdb_offloaded_set(br, p, fdb_info->addr,
fdb_info->vid, fdb_info->offloaded);
break;
}
out:
return err;
}
static struct notifier_block br_switchdev_notifier = {
.notifier_call = br_switchdev_event,
};
net: bridge: add support for user-controlled bool options We have been adding many new bridge options, a big number of which are boolean but still take up netlink attribute ids and waste space in the skb. Recently we discussed learning from link-local packets[1] and decided yet another new boolean option will be needed, thus introducing this API to save some bridge nl space. The API supports changing the value of multiple boolean options at once via the br_boolopt_multi struct which has an optmask (which options to set, bit per opt) and optval (options' new values). Future boolean options will only be added to the br_boolopt_id enum and then will have to be handled in br_boolopt_toggle/get. The API will automatically add the ability to change and export them via netlink, sysfs can use the single boolopt function versions to do the same. The behaviour with failing/succeeding is the same as with normal netlink option changing. If an option requires mapping to internal kernel flag or needs special configuration to be enabled then it should be handled in br_boolopt_toggle. It should also be able to retrieve an option's current state via br_boolopt_get. v2: WARN_ON() on unsupported option as that shouldn't be possible and also will help catch people who add new options without handling them for both set and get. Pass down extack so if an option desires it could set it on error and be more user-friendly. [1] https://www.spinics.net/lists/netdev/msg532698.html Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> Reviewed-by: Andrew Lunn <andrew@lunn.ch> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-11-24 02:34:20 +00:00
/* br_boolopt_toggle - change user-controlled boolean option
*
* @br: bridge device
* @opt: id of the option to change
* @on: new option value
* @extack: extack for error messages
*
* Changes the value of the respective boolean option to @on taking care of
* any internal option value mapping and configuration.
*/
int br_boolopt_toggle(struct net_bridge *br, enum br_boolopt_id opt, bool on,
struct netlink_ext_ack *extack)
{
switch (opt) {
case BR_BOOLOPT_NO_LL_LEARN:
br_opt_toggle(br, BROPT_NO_LL_LEARN, on);
break;
net: bridge: add support for user-controlled bool options We have been adding many new bridge options, a big number of which are boolean but still take up netlink attribute ids and waste space in the skb. Recently we discussed learning from link-local packets[1] and decided yet another new boolean option will be needed, thus introducing this API to save some bridge nl space. The API supports changing the value of multiple boolean options at once via the br_boolopt_multi struct which has an optmask (which options to set, bit per opt) and optval (options' new values). Future boolean options will only be added to the br_boolopt_id enum and then will have to be handled in br_boolopt_toggle/get. The API will automatically add the ability to change and export them via netlink, sysfs can use the single boolopt function versions to do the same. The behaviour with failing/succeeding is the same as with normal netlink option changing. If an option requires mapping to internal kernel flag or needs special configuration to be enabled then it should be handled in br_boolopt_toggle. It should also be able to retrieve an option's current state via br_boolopt_get. v2: WARN_ON() on unsupported option as that shouldn't be possible and also will help catch people who add new options without handling them for both set and get. Pass down extack so if an option desires it could set it on error and be more user-friendly. [1] https://www.spinics.net/lists/netdev/msg532698.html Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> Reviewed-by: Andrew Lunn <andrew@lunn.ch> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-11-24 02:34:20 +00:00
default:
/* shouldn't be called with unsupported options */
WARN_ON(1);
break;
}
return 0;
}
int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt)
{
switch (opt) {
case BR_BOOLOPT_NO_LL_LEARN:
return br_opt_get(br, BROPT_NO_LL_LEARN);
net: bridge: add support for user-controlled bool options We have been adding many new bridge options, a big number of which are boolean but still take up netlink attribute ids and waste space in the skb. Recently we discussed learning from link-local packets[1] and decided yet another new boolean option will be needed, thus introducing this API to save some bridge nl space. The API supports changing the value of multiple boolean options at once via the br_boolopt_multi struct which has an optmask (which options to set, bit per opt) and optval (options' new values). Future boolean options will only be added to the br_boolopt_id enum and then will have to be handled in br_boolopt_toggle/get. The API will automatically add the ability to change and export them via netlink, sysfs can use the single boolopt function versions to do the same. The behaviour with failing/succeeding is the same as with normal netlink option changing. If an option requires mapping to internal kernel flag or needs special configuration to be enabled then it should be handled in br_boolopt_toggle. It should also be able to retrieve an option's current state via br_boolopt_get. v2: WARN_ON() on unsupported option as that shouldn't be possible and also will help catch people who add new options without handling them for both set and get. Pass down extack so if an option desires it could set it on error and be more user-friendly. [1] https://www.spinics.net/lists/netdev/msg532698.html Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> Reviewed-by: Andrew Lunn <andrew@lunn.ch> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-11-24 02:34:20 +00:00
default:
/* shouldn't be called with unsupported options */
WARN_ON(1);
break;
}
return 0;
}
int br_boolopt_multi_toggle(struct net_bridge *br,
struct br_boolopt_multi *bm,
struct netlink_ext_ack *extack)
{
unsigned long bitmap = bm->optmask;
int err = 0;
int opt_id;
for_each_set_bit(opt_id, &bitmap, BR_BOOLOPT_MAX) {
bool on = !!(bm->optval & BIT(opt_id));
err = br_boolopt_toggle(br, opt_id, on, extack);
if (err) {
br_debug(br, "boolopt multi-toggle error: option: %d current: %d new: %d error: %d\n",
opt_id, br_boolopt_get(br, opt_id), on, err);
break;
}
}
return err;
}
void br_boolopt_multi_get(const struct net_bridge *br,
struct br_boolopt_multi *bm)
{
u32 optval = 0;
int opt_id;
for (opt_id = 0; opt_id < BR_BOOLOPT_MAX; opt_id++)
optval |= (br_boolopt_get(br, opt_id) << opt_id);
bm->optval = optval;
bm->optmask = GENMASK((BR_BOOLOPT_MAX - 1), 0);
net: bridge: add support for user-controlled bool options We have been adding many new bridge options, a big number of which are boolean but still take up netlink attribute ids and waste space in the skb. Recently we discussed learning from link-local packets[1] and decided yet another new boolean option will be needed, thus introducing this API to save some bridge nl space. The API supports changing the value of multiple boolean options at once via the br_boolopt_multi struct which has an optmask (which options to set, bit per opt) and optval (options' new values). Future boolean options will only be added to the br_boolopt_id enum and then will have to be handled in br_boolopt_toggle/get. The API will automatically add the ability to change and export them via netlink, sysfs can use the single boolopt function versions to do the same. The behaviour with failing/succeeding is the same as with normal netlink option changing. If an option requires mapping to internal kernel flag or needs special configuration to be enabled then it should be handled in br_boolopt_toggle. It should also be able to retrieve an option's current state via br_boolopt_get. v2: WARN_ON() on unsupported option as that shouldn't be possible and also will help catch people who add new options without handling them for both set and get. Pass down extack so if an option desires it could set it on error and be more user-friendly. [1] https://www.spinics.net/lists/netdev/msg532698.html Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> Reviewed-by: Andrew Lunn <andrew@lunn.ch> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-11-24 02:34:20 +00:00
}
/* private bridge options, controlled by the kernel */
void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on)
{
bool cur = !!br_opt_get(br, opt);
br_debug(br, "toggle option: %d state: %d -> %d\n",
opt, cur, on);
if (cur == on)
return;
if (on)
set_bit(opt, &br->options);
else
clear_bit(opt, &br->options);
}
static void __net_exit br_net_exit(struct net *net)
{
struct net_device *dev;
LIST_HEAD(list);
rtnl_lock();
for_each_netdev(net, dev)
if (dev->priv_flags & IFF_EBRIDGE)
br_dev_delete(dev, &list);
unregister_netdevice_many(&list);
rtnl_unlock();
}
static struct pernet_operations br_net_ops = {
.exit = br_net_exit,
};
static const struct stp_proto br_stp_proto = {
.rcv = br_stp_rcv,
};
static int __init br_init(void)
{
int err;
BUILD_BUG_ON(sizeof(struct br_input_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
err = stp_proto_register(&br_stp_proto);
if (err < 0) {
pr_err("bridge: can't register sap for STP\n");
return err;
}
err = br_fdb_init();
if (err)
goto err_out;
err = register_pernet_subsys(&br_net_ops);
if (err)
goto err_out1;
err = br_nf_core_init();
if (err)
goto err_out2;
err = register_netdevice_notifier(&br_device_notifier);
if (err)
goto err_out3;
err = register_switchdev_notifier(&br_switchdev_notifier);
if (err)
goto err_out4;
err = br_netlink_init();
if (err)
goto err_out5;
brioctl_set(br_ioctl_deviceless_stub);
#if IS_ENABLED(CONFIG_ATM_LANE)
br_fdb_test_addr_hook = br_fdb_test_addr;
#endif
#if IS_MODULE(CONFIG_BRIDGE_NETFILTER)
pr_info("bridge: filtering via arp/ip/ip6tables is no longer available "
"by default. Update your scripts to load br_netfilter if you "
"need this.\n");
#endif
return 0;
err_out5:
unregister_switchdev_notifier(&br_switchdev_notifier);
err_out4:
unregister_netdevice_notifier(&br_device_notifier);
err_out3:
br_nf_core_fini();
err_out2:
unregister_pernet_subsys(&br_net_ops);
err_out1:
br_fdb_fini();
err_out:
stp_proto_unregister(&br_stp_proto);
return err;
}
static void __exit br_deinit(void)
{
stp_proto_unregister(&br_stp_proto);
br_netlink_fini();
unregister_switchdev_notifier(&br_switchdev_notifier);
unregister_netdevice_notifier(&br_device_notifier);
brioctl_set(NULL);
unregister_pernet_subsys(&br_net_ops);
rcu_barrier(); /* Wait for completion of call_rcu()'s */
br_nf_core_fini();
#if IS_ENABLED(CONFIG_ATM_LANE)
br_fdb_test_addr_hook = NULL;
#endif
br_fdb_fini();
}
module_init(br_init)
module_exit(br_deinit)
MODULE_LICENSE("GPL");
MODULE_VERSION(BR_VERSION);
MODULE_ALIAS_RTNL_LINK("bridge");