From b03804e7c3ad41c265c0ca21ddb306b252b4f99f Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Dec 2015 12:12:03 +0100 Subject: [PATCH 01/28] net: Check CHANGEUPPER notifier return value switchdev drivers reflect the newly requested topology to hardware when CHANGEUPPER is received, after software links were already formed. However, the operation can fail and user will not be notified, as the return value of the notifier is not checked. Add this check and rollback software links if necessary. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 5df6cbce727c..939cd1b1da15 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5490,8 +5490,12 @@ static int __netdev_upper_dev_link(struct net_device *dev, goto rollback_lower_mesh; } - call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, - &changeupper_info.info); + ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, + &changeupper_info.info); + ret = notifier_to_errno(ret); + if (ret) + goto rollback_lower_mesh; + return 0; rollback_lower_mesh: From c39d0454ec9b703d3540dd10a2e9692f89aa48ab Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Dec 2015 12:12:04 +0100 Subject: [PATCH 02/28] net: Add support for CHANGEUPPER notifier error injection Since CHANGEUPPER can now fail, add support for it in the newly introduced netdev notifier error injection infrastructure. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- Documentation/fault-injection/notifier-error-inject.txt | 1 + lib/netdev-notifier-error-inject.c | 1 + 2 files changed, 2 insertions(+) diff --git a/Documentation/fault-injection/notifier-error-inject.txt b/Documentation/fault-injection/notifier-error-inject.txt index 71e638a4c497..83d3f4e43e91 100644 --- a/Documentation/fault-injection/notifier-error-inject.txt +++ b/Documentation/fault-injection/notifier-error-inject.txt @@ -103,6 +103,7 @@ Netdevice notifier events which can be failed are: * NETDEV_POST_INIT * NETDEV_PRECHANGEMTU * NETDEV_PRECHANGEUPPER + * NETDEV_CHANGEUPPER Example: Inject netdevice mtu change error (-22 == -EINVAL) diff --git a/lib/netdev-notifier-error-inject.c b/lib/netdev-notifier-error-inject.c index b2b856675d61..13e9c62e216f 100644 --- a/lib/netdev-notifier-error-inject.c +++ b/lib/netdev-notifier-error-inject.c @@ -18,6 +18,7 @@ static struct notifier_err_inject netdev_notifier_err_inject = { { NOTIFIER_ERR_INJECT_ACTION(NETDEV_POST_INIT) }, { NOTIFIER_ERR_INJECT_ACTION(NETDEV_PRECHANGEMTU) }, { NOTIFIER_ERR_INJECT_ACTION(NETDEV_PRECHANGEUPPER) }, + { NOTIFIER_ERR_INJECT_ACTION(NETDEV_CHANGEUPPER) }, {} } }; From 3952af4d50343728e54bf93880e0ecb9c42c47aa Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:05 +0100 Subject: [PATCH 03/28] bonding: add 802.3ad support for 100G speeds Similar to other speeds, add 100G to bonding 802.3ad code. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_3ad.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 940e2ebbdea8..4cbb8b27a891 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -93,7 +93,8 @@ enum ad_link_speed_type { AD_LINK_SPEED_10000MBPS, AD_LINK_SPEED_20000MBPS, AD_LINK_SPEED_40000MBPS, - AD_LINK_SPEED_56000MBPS + AD_LINK_SPEED_56000MBPS, + AD_LINK_SPEED_100000MBPS, }; /* compare MAC addresses */ @@ -258,6 +259,7 @@ static inline int __check_agg_selection_timer(struct port *port) * %AD_LINK_SPEED_20000MBPS * %AD_LINK_SPEED_40000MBPS * %AD_LINK_SPEED_56000MBPS + * %AD_LINK_SPEED_100000MBPS */ static u16 __get_link_speed(struct port *port) { @@ -305,6 +307,10 @@ static u16 __get_link_speed(struct port *port) speed = AD_LINK_SPEED_56000MBPS; break; + case SPEED_100000: + speed = AD_LINK_SPEED_100000MBPS; + break; + default: /* unknown speed value from ethtool. shouldn't happen */ speed = 0; @@ -681,6 +687,9 @@ static u32 __get_agg_bandwidth(struct aggregator *aggregator) case AD_LINK_SPEED_56000MBPS: bandwidth = aggregator->num_of_ports * 56000; break; + case AD_LINK_SPEED_100000MBPS: + bandwidth = aggregator->num_of_ports * 100000; + break; default: bandwidth = 0; /* to silence the compiler */ } From c981e4213e9d2d4ec79501bd607722ec712742a2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:06 +0100 Subject: [PATCH 04/28] net: add netif_is_team_master helper Similar to other helpers, caller can use this to find out if device is team master. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/team/team.c | 1 + include/linux/netdevice.h | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 651d35ea22c5..d2f3ee832c47 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2054,6 +2054,7 @@ static void team_setup(struct net_device *dev) dev->flags |= IFF_MULTICAST; dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); dev->priv_flags |= IFF_NO_QUEUE; + dev->priv_flags |= IFF_TEAM; /* * Indicate we support unicast address filtering. That way core won't diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fcbc5259c630..2b889be65d88 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1273,6 +1273,7 @@ struct net_device_ops { * @IFF_NO_QUEUE: device can run without qdisc attached * @IFF_OPENVSWITCH: device is a Open vSwitch master * @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device + * @IFF_TEAM: device is a team device */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1299,6 +1300,7 @@ enum netdev_priv_flags { IFF_NO_QUEUE = 1<<21, IFF_OPENVSWITCH = 1<<22, IFF_L3MDEV_SLAVE = 1<<23, + IFF_TEAM = 1<<24, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1325,6 +1327,7 @@ enum netdev_priv_flags { #define IFF_NO_QUEUE IFF_NO_QUEUE #define IFF_OPENVSWITCH IFF_OPENVSWITCH #define IFF_L3MDEV_SLAVE IFF_L3MDEV_SLAVE +#define IFF_TEAM IFF_TEAM /** * struct net_device - The DEVICE structure. @@ -3889,6 +3892,11 @@ static inline bool netif_is_ovs_master(const struct net_device *dev) return dev->priv_flags & IFF_OPENVSWITCH; } +static inline bool netif_is_team_master(struct net_device *dev) +{ + return dev->priv_flags & IFF_TEAM; +} + /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */ static inline void netif_keep_dst(struct net_device *dev) { From f7f019ee6d117de5007d0b10e7960696bbf111eb Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:07 +0100 Subject: [PATCH 05/28] net: add netif_is_team_port helper Similar to other helpers, caller can use this to find out if device is team port. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2b889be65d88..b3601f8a9b42 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3897,6 +3897,11 @@ static inline bool netif_is_team_master(struct net_device *dev) return dev->priv_flags & IFF_TEAM; } +static inline bool netif_is_team_port(struct net_device *dev) +{ + return dev->priv_flags & IFF_TEAM_PORT; +} + /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */ static inline void netif_keep_dst(struct net_device *dev) { From 7be61833042e7757745345eedc7b0efee240c189 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:08 +0100 Subject: [PATCH 06/28] net: add netif_is_lag_master helper Some code does not mind if the master is bond or team and treats them the same, as generic LAG. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b3601f8a9b42..3ca083efa560 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3902,6 +3902,11 @@ static inline bool netif_is_team_port(struct net_device *dev) return dev->priv_flags & IFF_TEAM_PORT; } +static inline bool netif_is_lag_master(struct net_device *dev) +{ + return netif_is_bond_master(dev) || netif_is_team_master(dev); +} + /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */ static inline void netif_keep_dst(struct net_device *dev) { From e0ba1414f310c83bf425fe26fa2cd5f1befcd6dc Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:09 +0100 Subject: [PATCH 07/28] net: add netif_is_lag_port helper Some code does not mind if a device is bond slave or team port and treats them the same, as generic LAG ports. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3ca083efa560..1506be58c59a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3907,6 +3907,11 @@ static inline bool netif_is_lag_master(struct net_device *dev) return netif_is_bond_master(dev) || netif_is_team_master(dev); } +static inline bool netif_is_lag_port(struct net_device *dev) +{ + return netif_is_bond_slave(dev) || netif_is_team_port(dev); +} + /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */ static inline void netif_keep_dst(struct net_device *dev) { From 6dffb0447c25476f499d205dfceb1972e8dae919 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:10 +0100 Subject: [PATCH 08/28] net: propagate upper priv via netdev_master_upper_dev_link Eliminate netdev_master_upper_dev_link_private and pass priv directly as a parameter of netdev_master_upper_dev_link. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- drivers/net/team/team.c | 2 +- drivers/net/vrf.c | 2 +- include/linux/netdevice.h | 6 ++---- net/batman-adv/hard-interface.c | 3 ++- net/bridge/br_if.c | 2 +- net/core/dev.c | 18 ++++++------------ net/openvswitch/vport-netdev.c | 2 +- 8 files changed, 15 insertions(+), 22 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 9e0f8a7ef8b1..924015729b2d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1204,7 +1204,7 @@ static int bond_master_upper_dev_link(struct net_device *bond_dev, { int err; - err = netdev_master_upper_dev_link_private(slave_dev, bond_dev, slave); + err = netdev_master_upper_dev_link(slave_dev, bond_dev, slave); if (err) return err; slave_dev->flags |= IFF_SLAVE; diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index d2f3ee832c47..b37f8d14dca0 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1083,7 +1083,7 @@ static int team_upper_dev_link(struct net_device *dev, { int err; - err = netdev_master_upper_dev_link(port_dev, dev); + err = netdev_master_upper_dev_link(port_dev, dev, NULL); if (err) return err; port_dev->priv_flags |= IFF_TEAM_PORT; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index c2d54c4ed556..59c5bddeaedd 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -624,7 +624,7 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev) goto out_fail; } - ret = netdev_master_upper_dev_link(port_dev, dev); + ret = netdev_master_upper_dev_link(port_dev, dev, NULL); if (ret < 0) goto out_unregister; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1506be58c59a..939b8f3de810 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3619,10 +3619,8 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev); struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev); int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev); int netdev_master_upper_dev_link(struct net_device *dev, - struct net_device *upper_dev); -int netdev_master_upper_dev_link_private(struct net_device *dev, - struct net_device *upper_dev, - void *private); + struct net_device *upper_dev, + void *upper_priv); void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev); void netdev_adjacent_rename_links(struct net_device *dev, char *oldname); diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index f11345e163d7..a7f4f1085dbb 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -464,7 +464,8 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, hard_iface->soft_iface = soft_iface; bat_priv = netdev_priv(hard_iface->soft_iface); - ret = netdev_master_upper_dev_link(hard_iface->net_dev, soft_iface); + ret = netdev_master_upper_dev_link(hard_iface->net_dev, + soft_iface, NULL); if (ret) goto err_dev; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index ec02f5869a78..781abc34667a 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -493,7 +493,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) dev->priv_flags |= IFF_BRIDGE_PORT; - err = netdev_master_upper_dev_link(dev, br->dev); + err = netdev_master_upper_dev_link(dev, br->dev, NULL); if (err) goto err5; diff --git a/net/core/dev.c b/net/core/dev.c index 939cd1b1da15..27d052bb78bc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5421,7 +5421,7 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, static int __netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, bool master, - void *private) + void *upper_priv) { struct netdev_notifier_changeupper_info changeupper_info; struct netdev_adjacent *i, *j, *to_i, *to_j; @@ -5452,7 +5452,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (ret) return ret; - ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private, + ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, upper_priv, master); if (ret) return ret; @@ -5557,6 +5557,7 @@ EXPORT_SYMBOL(netdev_upper_dev_link); * netdev_master_upper_dev_link - Add a master link to the upper device * @dev: device * @upper_dev: new upper device + * @upper_priv: upper device private * * Adds a link to device which is upper to this one. In this case, only * one master upper device can be linked, although other non-master devices @@ -5565,20 +5566,13 @@ EXPORT_SYMBOL(netdev_upper_dev_link); * counts are adjusted and the function returns zero. */ int netdev_master_upper_dev_link(struct net_device *dev, - struct net_device *upper_dev) + struct net_device *upper_dev, + void *upper_priv) { - return __netdev_upper_dev_link(dev, upper_dev, true, NULL); + return __netdev_upper_dev_link(dev, upper_dev, true, upper_priv); } EXPORT_SYMBOL(netdev_master_upper_dev_link); -int netdev_master_upper_dev_link_private(struct net_device *dev, - struct net_device *upper_dev, - void *private) -{ - return __netdev_upper_dev_link(dev, upper_dev, true, private); -} -EXPORT_SYMBOL(netdev_master_upper_dev_link_private); - /** * netdev_upper_dev_unlink - Removes a link to upper device * @dev: device diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index b327368a3848..3ee3df1edeae 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -105,7 +105,7 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name) rtnl_lock(); err = netdev_master_upper_dev_link(vport->dev, - get_dpdev(vport->dp)); + get_dpdev(vport->dp), NULL); if (err) goto error_unlock; From 29bf24afb29042f568fa67b1b0eee46796725ed2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:11 +0100 Subject: [PATCH 09/28] net: add possibility to pass information about upper device via notifier Sometimes the drivers and other code would find it handy to know some internal information about upper device being changed. So allow upper-code to pass information down to notifier listeners during linking. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- drivers/net/team/team.c | 2 +- drivers/net/vrf.c | 2 +- include/linux/netdevice.h | 3 ++- net/batman-adv/hard-interface.c | 2 +- net/bridge/br_if.c | 2 +- net/core/dev.c | 11 +++++++---- net/openvswitch/vport-netdev.c | 2 +- 8 files changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 924015729b2d..fa3ed1d8a12d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1204,7 +1204,7 @@ static int bond_master_upper_dev_link(struct net_device *bond_dev, { int err; - err = netdev_master_upper_dev_link(slave_dev, bond_dev, slave); + err = netdev_master_upper_dev_link(slave_dev, bond_dev, slave, NULL); if (err) return err; slave_dev->flags |= IFF_SLAVE; diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index b37f8d14dca0..f7b6ff7948b8 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1083,7 +1083,7 @@ static int team_upper_dev_link(struct net_device *dev, { int err; - err = netdev_master_upper_dev_link(port_dev, dev, NULL); + err = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL); if (err) return err; port_dev->priv_flags |= IFF_TEAM_PORT; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 59c5bddeaedd..8944a49cda15 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -624,7 +624,7 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev) goto out_fail; } - ret = netdev_master_upper_dev_link(port_dev, dev, NULL); + ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL); if (ret < 0) goto out_unregister; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 939b8f3de810..aea556c64f2c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2163,6 +2163,7 @@ struct netdev_notifier_changeupper_info { struct net_device *upper_dev; /* new upper dev */ bool master; /* is upper dev master */ bool linking; /* is the nofication for link or unlink */ + void *upper_info; /* upper dev info */ }; static inline void netdev_notifier_info_init(struct netdev_notifier_info *info, @@ -3620,7 +3621,7 @@ struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev); int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev); int netdev_master_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, - void *upper_priv); + void *upper_priv, void *upper_info); void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev); void netdev_adjacent_rename_links(struct net_device *dev, char *oldname); diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index a7f4f1085dbb..aa8867e1d983 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -465,7 +465,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, bat_priv = netdev_priv(hard_iface->soft_iface); ret = netdev_master_upper_dev_link(hard_iface->net_dev, - soft_iface, NULL); + soft_iface, NULL, NULL); if (ret) goto err_dev; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 781abc34667a..8d1d4a22c50d 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -493,7 +493,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) dev->priv_flags |= IFF_BRIDGE_PORT; - err = netdev_master_upper_dev_link(dev, br->dev, NULL); + err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL); if (err) goto err5; diff --git a/net/core/dev.c b/net/core/dev.c index 27d052bb78bc..8ed886663c6d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5421,7 +5421,7 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, static int __netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, bool master, - void *upper_priv) + void *upper_priv, void *upper_info) { struct netdev_notifier_changeupper_info changeupper_info; struct netdev_adjacent *i, *j, *to_i, *to_j; @@ -5445,6 +5445,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, changeupper_info.upper_dev = upper_dev; changeupper_info.master = master; changeupper_info.linking = true; + changeupper_info.upper_info = upper_info; ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev, &changeupper_info.info); @@ -5549,7 +5550,7 @@ rollback_mesh: int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev) { - return __netdev_upper_dev_link(dev, upper_dev, false, NULL); + return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL); } EXPORT_SYMBOL(netdev_upper_dev_link); @@ -5558,6 +5559,7 @@ EXPORT_SYMBOL(netdev_upper_dev_link); * @dev: device * @upper_dev: new upper device * @upper_priv: upper device private + * @upper_info: upper info to be passed down via notifier * * Adds a link to device which is upper to this one. In this case, only * one master upper device can be linked, although other non-master devices @@ -5567,9 +5569,10 @@ EXPORT_SYMBOL(netdev_upper_dev_link); */ int netdev_master_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, - void *upper_priv) + void *upper_priv, void *upper_info) { - return __netdev_upper_dev_link(dev, upper_dev, true, upper_priv); + return __netdev_upper_dev_link(dev, upper_dev, true, + upper_priv, upper_info); } EXPORT_SYMBOL(netdev_master_upper_dev_link); diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 3ee3df1edeae..8f4dd4c39bfe 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -105,7 +105,7 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name) rtnl_lock(); err = netdev_master_upper_dev_link(vport->dev, - get_dpdev(vport->dp), NULL); + get_dpdev(vport->dp), NULL, NULL); if (err) goto error_unlock; From 764f5e544118508add420724789f46e04dba91eb Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:12 +0100 Subject: [PATCH 10/28] net: add info struct for LAG changeupper This struct will be shared by bonding and team to pass internal information to notifier listeners. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index aea556c64f2c..3ab90ea0ed03 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2110,6 +2110,19 @@ struct pcpu_sw_netstats { #define netdev_alloc_pcpu_stats(type) \ __netdev_alloc_pcpu_stats(type, GFP_KERNEL); +enum netdev_lag_tx_type { + NETDEV_LAG_TX_TYPE_UNKNOWN, + NETDEV_LAG_TX_TYPE_RANDOM, + NETDEV_LAG_TX_TYPE_BROADCAST, + NETDEV_LAG_TX_TYPE_ROUNDROBIN, + NETDEV_LAG_TX_TYPE_ACTIVEBACKUP, + NETDEV_LAG_TX_TYPE_HASH, +}; + +struct netdev_lag_upper_info { + enum netdev_lag_tx_type tx_type; +}; + #include /* netdevice notifier chain. Please remember to update the rtnetlink From 8fd728566a354f7bc9cb6e781f185b8c39cf505b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:13 +0100 Subject: [PATCH 11/28] team: fill-up LAG changeupper info struct and pass it along Initialize netdev_lag_upper_info structure by TX type according to current team mode and pass it along via netdev_master_upper_dev_link. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/team/team.c | 23 ++++++++++++----------- drivers/net/team/team_mode_activebackup.c | 1 + drivers/net/team/team_mode_broadcast.c | 1 + drivers/net/team/team_mode_loadbalance.c | 1 + drivers/net/team/team_mode_random.c | 1 + drivers/net/team/team_mode_roundrobin.c | 1 + include/linux/if_team.h | 1 + 7 files changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index f7b6ff7948b8..dd1504bbb4a7 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1078,23 +1078,24 @@ static void team_port_disable_netpoll(struct team_port *port) } #endif -static int team_upper_dev_link(struct net_device *dev, - struct net_device *port_dev) +static int team_upper_dev_link(struct team *team, struct team_port *port) { + struct netdev_lag_upper_info lag_upper_info; int err; - err = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL); + lag_upper_info.tx_type = team->mode->lag_tx_type; + err = netdev_master_upper_dev_link(port->dev, team->dev, NULL, + &lag_upper_info); if (err) return err; - port_dev->priv_flags |= IFF_TEAM_PORT; + port->dev->priv_flags |= IFF_TEAM_PORT; return 0; } -static void team_upper_dev_unlink(struct net_device *dev, - struct net_device *port_dev) +static void team_upper_dev_unlink(struct team *team, struct team_port *port) { - netdev_upper_dev_unlink(port_dev, dev); - port_dev->priv_flags &= ~IFF_TEAM_PORT; + netdev_upper_dev_unlink(port->dev, team->dev); + port->dev->priv_flags &= ~IFF_TEAM_PORT; } static void __team_port_change_port_added(struct team_port *port, bool linkup); @@ -1194,7 +1195,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev) goto err_handler_register; } - err = team_upper_dev_link(dev, port_dev); + err = team_upper_dev_link(team, port); if (err) { netdev_err(dev, "Device %s failed to set upper link\n", portname); @@ -1220,7 +1221,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev) return 0; err_option_port_add: - team_upper_dev_unlink(dev, port_dev); + team_upper_dev_unlink(team, port); err_set_upper_link: netdev_rx_handler_unregister(port_dev); @@ -1264,7 +1265,7 @@ static int team_port_del(struct team *team, struct net_device *port_dev) team_port_disable(team, port); list_del_rcu(&port->list); - team_upper_dev_unlink(dev, port_dev); + team_upper_dev_unlink(team, port); netdev_rx_handler_unregister(port_dev); team_port_disable_netpoll(port); vlan_vids_del_by_dev(port_dev, dev); diff --git a/drivers/net/team/team_mode_activebackup.c b/drivers/net/team/team_mode_activebackup.c index 40fd3381b693..3f189823ba3b 100644 --- a/drivers/net/team/team_mode_activebackup.c +++ b/drivers/net/team/team_mode_activebackup.c @@ -127,6 +127,7 @@ static const struct team_mode ab_mode = { .owner = THIS_MODULE, .priv_size = sizeof(struct ab_priv), .ops = &ab_mode_ops, + .lag_tx_type = NETDEV_LAG_TX_TYPE_ACTIVEBACKUP, }; static int __init ab_init_module(void) diff --git a/drivers/net/team/team_mode_broadcast.c b/drivers/net/team/team_mode_broadcast.c index c366cd299c06..302ff35b0cbc 100644 --- a/drivers/net/team/team_mode_broadcast.c +++ b/drivers/net/team/team_mode_broadcast.c @@ -56,6 +56,7 @@ static const struct team_mode bc_mode = { .kind = "broadcast", .owner = THIS_MODULE, .ops = &bc_mode_ops, + .lag_tx_type = NETDEV_LAG_TX_TYPE_BROADCAST, }; static int __init bc_init_module(void) diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index a1536d0d83a9..cdb19b385d42 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -661,6 +661,7 @@ static const struct team_mode lb_mode = { .priv_size = sizeof(struct lb_priv), .port_priv_size = sizeof(struct lb_port_priv), .ops = &lb_mode_ops, + .lag_tx_type = NETDEV_LAG_TX_TYPE_HASH, }; static int __init lb_init_module(void) diff --git a/drivers/net/team/team_mode_random.c b/drivers/net/team/team_mode_random.c index cd2f692b8074..215f845782db 100644 --- a/drivers/net/team/team_mode_random.c +++ b/drivers/net/team/team_mode_random.c @@ -46,6 +46,7 @@ static const struct team_mode rnd_mode = { .kind = "random", .owner = THIS_MODULE, .ops = &rnd_mode_ops, + .lag_tx_type = NETDEV_LAG_TX_TYPE_RANDOM, }; static int __init rnd_init_module(void) diff --git a/drivers/net/team/team_mode_roundrobin.c b/drivers/net/team/team_mode_roundrobin.c index 53665850b59e..0aa234118c03 100644 --- a/drivers/net/team/team_mode_roundrobin.c +++ b/drivers/net/team/team_mode_roundrobin.c @@ -58,6 +58,7 @@ static const struct team_mode rr_mode = { .owner = THIS_MODULE, .priv_size = sizeof(struct rr_priv), .ops = &rr_mode_ops, + .lag_tx_type = NETDEV_LAG_TX_TYPE_ROUNDROBIN, }; static int __init rr_init_module(void) diff --git a/include/linux/if_team.h b/include/linux/if_team.h index a6aa970758a2..b84e49c3a738 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -164,6 +164,7 @@ struct team_mode { size_t priv_size; size_t port_priv_size; const struct team_mode_ops *ops; + enum netdev_lag_tx_type lag_tx_type; }; #define TEAM_PORT_HASHBITS 4 From 41f0b0496466d6e0e8245f94a79889234cde5e3c Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:14 +0100 Subject: [PATCH 12/28] bonding: fill-up LAG changeupper info struct and pass it along Initialize netdev_lag_upper_info structure by TX type according to current bonding mode and pass it along via netdev_master_upper_dev_link. Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 45 +++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index fa3ed1d8a12d..e94e79ad757c 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1198,26 +1198,43 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb) return ret; } -static int bond_master_upper_dev_link(struct net_device *bond_dev, - struct net_device *slave_dev, - struct slave *slave) +static enum netdev_lag_tx_type bond_lag_tx_type(struct bonding *bond) { + switch (BOND_MODE(bond)) { + case BOND_MODE_ROUNDROBIN: + return NETDEV_LAG_TX_TYPE_ROUNDROBIN; + case BOND_MODE_ACTIVEBACKUP: + return NETDEV_LAG_TX_TYPE_ACTIVEBACKUP; + case BOND_MODE_BROADCAST: + return NETDEV_LAG_TX_TYPE_BROADCAST; + case BOND_MODE_XOR: + case BOND_MODE_8023AD: + return NETDEV_LAG_TX_TYPE_HASH; + default: + return NETDEV_LAG_TX_TYPE_UNKNOWN; + } +} + +static int bond_master_upper_dev_link(struct bonding *bond, struct slave *slave) +{ + struct netdev_lag_upper_info lag_upper_info; int err; - err = netdev_master_upper_dev_link(slave_dev, bond_dev, slave, NULL); + lag_upper_info.tx_type = bond_lag_tx_type(bond); + err = netdev_master_upper_dev_link(slave->dev, bond->dev, slave, + &lag_upper_info); if (err) return err; - slave_dev->flags |= IFF_SLAVE; - rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL); + slave->dev->flags |= IFF_SLAVE; + rtmsg_ifinfo(RTM_NEWLINK, slave->dev, IFF_SLAVE, GFP_KERNEL); return 0; } -static void bond_upper_dev_unlink(struct net_device *bond_dev, - struct net_device *slave_dev) +static void bond_upper_dev_unlink(struct bonding *bond, struct slave *slave) { - netdev_upper_dev_unlink(slave_dev, bond_dev); - slave_dev->flags &= ~IFF_SLAVE; - rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL); + netdev_upper_dev_unlink(slave->dev, bond->dev); + slave->dev->flags &= ~IFF_SLAVE; + rtmsg_ifinfo(RTM_NEWLINK, slave->dev, IFF_SLAVE, GFP_KERNEL); } static struct slave *bond_alloc_slave(struct bonding *bond) @@ -1662,7 +1679,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) goto err_detach; } - res = bond_master_upper_dev_link(bond_dev, slave_dev, new_slave); + res = bond_master_upper_dev_link(bond, new_slave); if (res) { netdev_dbg(bond_dev, "Error %d calling bond_master_upper_dev_link\n", res); goto err_unregister; @@ -1698,7 +1715,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) /* Undo stages on error */ err_upper_unlink: - bond_upper_dev_unlink(bond_dev, slave_dev); + bond_upper_dev_unlink(bond, new_slave); err_unregister: netdev_rx_handler_unregister(slave_dev); @@ -1804,7 +1821,7 @@ static int __bond_release_one(struct net_device *bond_dev, /* recompute stats just before removing the slave */ bond_get_stats(bond->dev, &bond->bond_stats); - bond_upper_dev_unlink(bond_dev, slave_dev); + bond_upper_dev_unlink(bond, slave); /* unregister rx_handler early so bond_handle_frame wouldn't be called * for this slave anymore. */ From 04d482660a07039fc4e9a42bb3517db236d98f96 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:15 +0100 Subject: [PATCH 13/28] net: introduce change lower state notifier When lower device like bonding slave, team/bridge port, etc changes its state, it is useful for others to notice this change. Currently this is implemented specificly for bonding as NETDEV_BONDING_INFO notifier. This patch aims to replace this specific usage and make this more generic to be used for all upper-lower devices. Introduce NETDEV_CHANGELOWERSTATE netdev notifier type and netdev_lower_state_changed() helper. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++++++++ net/core/dev.c | 20 ++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3ab90ea0ed03..ad69f237aa78 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2158,6 +2158,7 @@ struct netdev_lag_upper_info { #define NETDEV_CHANGEINFODATA 0x0018 #define NETDEV_BONDING_INFO 0x0019 #define NETDEV_PRECHANGEUPPER 0x001A +#define NETDEV_CHANGELOWERSTATE 0x001B int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); @@ -2179,6 +2180,11 @@ struct netdev_notifier_changeupper_info { void *upper_info; /* upper dev info */ }; +struct netdev_notifier_changelowerstate_info { + struct netdev_notifier_info info; /* must be first */ + void *lower_state_info; /* is lower dev state */ +}; + static inline void netdev_notifier_info_init(struct netdev_notifier_info *info, struct net_device *dev) { @@ -3640,6 +3646,8 @@ void netdev_upper_dev_unlink(struct net_device *dev, void netdev_adjacent_rename_links(struct net_device *dev, char *oldname); void *netdev_lower_dev_get_private(struct net_device *dev, struct net_device *lower_dev); +void netdev_lower_state_changed(struct net_device *lower_dev, + void *lower_state_info); /* RSS keys are 40 or 52 bytes long */ #define NETDEV_RSS_KEY_LEN 52 diff --git a/net/core/dev.c b/net/core/dev.c index 8ed886663c6d..d1706e88fbeb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5756,6 +5756,26 @@ int dev_get_nest_level(struct net_device *dev, } EXPORT_SYMBOL(dev_get_nest_level); +/** + * netdev_lower_change - Dispatch event about lower device state change + * @lower_dev: device + * @lower_state_info: state to dispatch + * + * Send NETDEV_CHANGELOWERSTATE to netdev notifiers with info. + * The caller must hold the RTNL lock. + */ +void netdev_lower_state_changed(struct net_device *lower_dev, + void *lower_state_info) +{ + struct netdev_notifier_changelowerstate_info changelowerstate_info; + + ASSERT_RTNL(); + changelowerstate_info.lower_state_info = lower_state_info; + call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev, + &changelowerstate_info.info); +} +EXPORT_SYMBOL(netdev_lower_state_changed); + static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; From fb1b2e3ce53aef80b3cef71f3885d584cdbdc6b8 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:16 +0100 Subject: [PATCH 14/28] net: introduce lower state changed info structure for LAG lowers This is shared info structure for bonding and team. Serves to pass down info about link state and port activity to notification listeners. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ad69f237aa78..fa84b59eb197 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2123,6 +2123,11 @@ struct netdev_lag_upper_info { enum netdev_lag_tx_type tx_type; }; +struct netdev_lag_lower_state_info { + u8 link_up : 1, + tx_enabled : 1; +}; + #include /* netdevice notifier chain. Please remember to update the rtnetlink From 7d259505299cb6f4642eac4eebb191d6c2ebe558 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:17 +0100 Subject: [PATCH 15/28] team: rtnl_lock for options set During options set, there will be needed to hold rtnl_mutex in order to safely call netdev notifiers. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/team/team.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index dd1504bbb4a7..98141338d0ce 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2422,9 +2422,13 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info) struct nlattr *nl_option; LIST_HEAD(opt_inst_list); + rtnl_lock(); + team = team_nl_team_get(info); - if (!team) - return -EINVAL; + if (!team) { + err = -EINVAL; + goto rtnl_unlock; + } err = -EINVAL; if (!info->attrs[TEAM_ATTR_LIST_OPTION]) { @@ -2551,7 +2555,8 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info) team_put: team_nl_team_put(team); - +rtnl_unlock: + rtnl_unlock(); return err; } From 3a0d6c5af5e52622a58c68d7c276db9ec38ee2a4 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:18 +0100 Subject: [PATCH 16/28] team: implement lower state change propagation Let netdev notifier listeners know about link-up and port-enable state changes. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/team/team.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 98141338d0ce..059c0f60a2b2 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -91,10 +91,24 @@ void team_modeop_port_change_dev_addr(struct team *team, } EXPORT_SYMBOL(team_modeop_port_change_dev_addr); +static void team_lower_state_changed(struct team_port *port) +{ + struct netdev_lag_lower_state_info info; + + info.link_up = port->linkup; + info.tx_enabled = team_port_enabled(port); + netdev_lower_state_changed(port->dev, &info); +} + static void team_refresh_port_linkup(struct team_port *port) { - port->linkup = port->user.linkup_enabled ? port->user.linkup : - port->state.linkup; + bool new_linkup = port->user.linkup_enabled ? port->user.linkup : + port->state.linkup; + + if (port->linkup != new_linkup) { + port->linkup = new_linkup; + team_lower_state_changed(port); + } } @@ -932,6 +946,7 @@ static void team_port_enable(struct team *team, team->ops.port_enabled(team, port); team_notify_peers(team); team_mcast_rejoin(team); + team_lower_state_changed(port); } static void __reconstruct_port_hlist(struct team *team, int rm_index) @@ -963,6 +978,7 @@ static void team_port_disable(struct team *team, team_adjust_ops(team); team_notify_peers(team); team_mcast_rejoin(team); + team_lower_state_changed(port); } #define TEAM_VLAN_FEATURES (NETIF_F_ALL_CSUM | NETIF_F_SG | \ From 5d397061ca2081d8a99e4bee5792122faa6aaf86 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:19 +0100 Subject: [PATCH 17/28] bonding: allow notifications for bond_set_slave_link_state Similar to state notifications. We allow caller to indicate if the notification should happen now or later, depending on if he holds rtnl mutex or not. Introduce bond_slave_link_notify function (similar to bond_slave_state_notify) which is later on called with rtnl mutex and goes over slaves and executes delayed notification. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 54 ++++++++++++++++++++++----------- include/net/bonding.h | 32 +++++++++++++++++-- 2 files changed, 65 insertions(+), 21 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index e94e79ad757c..7695490061ec 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -830,7 +830,8 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) } new_active->delay = 0; - bond_set_slave_link_state(new_active, BOND_LINK_UP); + bond_set_slave_link_state(new_active, BOND_LINK_UP, + BOND_SLAVE_NOTIFY_NOW); if (BOND_MODE(bond) == BOND_MODE_8023AD) bond_3ad_handle_link_change(new_active, BOND_LINK_UP); @@ -1580,21 +1581,26 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) { if (bond->params.updelay) { bond_set_slave_link_state(new_slave, - BOND_LINK_BACK); + BOND_LINK_BACK, + BOND_SLAVE_NOTIFY_NOW); new_slave->delay = bond->params.updelay; } else { bond_set_slave_link_state(new_slave, - BOND_LINK_UP); + BOND_LINK_UP, + BOND_SLAVE_NOTIFY_NOW); } } else { - bond_set_slave_link_state(new_slave, BOND_LINK_DOWN); + bond_set_slave_link_state(new_slave, BOND_LINK_DOWN, + BOND_SLAVE_NOTIFY_NOW); } } else if (bond->params.arp_interval) { bond_set_slave_link_state(new_slave, (netif_carrier_ok(slave_dev) ? - BOND_LINK_UP : BOND_LINK_DOWN)); + BOND_LINK_UP : BOND_LINK_DOWN), + BOND_SLAVE_NOTIFY_NOW); } else { - bond_set_slave_link_state(new_slave, BOND_LINK_UP); + bond_set_slave_link_state(new_slave, BOND_LINK_UP, + BOND_SLAVE_NOTIFY_NOW); } if (new_slave->link != BOND_LINK_DOWN) @@ -2013,7 +2019,8 @@ static int bond_miimon_inspect(struct bonding *bond) if (link_state) continue; - bond_set_slave_link_state(slave, BOND_LINK_FAIL); + bond_set_slave_link_state(slave, BOND_LINK_FAIL, + BOND_SLAVE_NOTIFY_LATER); slave->delay = bond->params.downdelay; if (slave->delay) { netdev_info(bond->dev, "link status down for %sinterface %s, disabling it in %d ms\n", @@ -2028,7 +2035,8 @@ static int bond_miimon_inspect(struct bonding *bond) case BOND_LINK_FAIL: if (link_state) { /* recovered before downdelay expired */ - bond_set_slave_link_state(slave, BOND_LINK_UP); + bond_set_slave_link_state(slave, BOND_LINK_UP, + BOND_SLAVE_NOTIFY_LATER); slave->last_link_up = jiffies; netdev_info(bond->dev, "link status up again after %d ms for interface %s\n", (bond->params.downdelay - slave->delay) * @@ -2050,7 +2058,8 @@ static int bond_miimon_inspect(struct bonding *bond) if (!link_state) continue; - bond_set_slave_link_state(slave, BOND_LINK_BACK); + bond_set_slave_link_state(slave, BOND_LINK_BACK, + BOND_SLAVE_NOTIFY_LATER); slave->delay = bond->params.updelay; if (slave->delay) { @@ -2064,7 +2073,8 @@ static int bond_miimon_inspect(struct bonding *bond) case BOND_LINK_BACK: if (!link_state) { bond_set_slave_link_state(slave, - BOND_LINK_DOWN); + BOND_LINK_DOWN, + BOND_SLAVE_NOTIFY_LATER); netdev_info(bond->dev, "link status down again after %d ms for interface %s\n", (bond->params.updelay - slave->delay) * bond->params.miimon, @@ -2102,7 +2112,8 @@ static void bond_miimon_commit(struct bonding *bond) continue; case BOND_LINK_UP: - bond_set_slave_link_state(slave, BOND_LINK_UP); + bond_set_slave_link_state(slave, BOND_LINK_UP, + BOND_SLAVE_NOTIFY_NOW); slave->last_link_up = jiffies; primary = rtnl_dereference(bond->primary_slave); @@ -2142,7 +2153,8 @@ static void bond_miimon_commit(struct bonding *bond) if (slave->link_failure_count < UINT_MAX) slave->link_failure_count++; - bond_set_slave_link_state(slave, BOND_LINK_DOWN); + bond_set_slave_link_state(slave, BOND_LINK_DOWN, + BOND_SLAVE_NOTIFY_NOW); if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP || BOND_MODE(bond) == BOND_MODE_8023AD) @@ -2725,7 +2737,8 @@ static void bond_ab_arp_commit(struct bonding *bond) struct slave *current_arp_slave; current_arp_slave = rtnl_dereference(bond->current_arp_slave); - bond_set_slave_link_state(slave, BOND_LINK_UP); + bond_set_slave_link_state(slave, BOND_LINK_UP, + BOND_SLAVE_NOTIFY_NOW); if (current_arp_slave) { bond_set_slave_inactive_flags( current_arp_slave, @@ -2748,7 +2761,8 @@ static void bond_ab_arp_commit(struct bonding *bond) if (slave->link_failure_count < UINT_MAX) slave->link_failure_count++; - bond_set_slave_link_state(slave, BOND_LINK_DOWN); + bond_set_slave_link_state(slave, BOND_LINK_DOWN, + BOND_SLAVE_NOTIFY_NOW); bond_set_slave_inactive_flags(slave, BOND_SLAVE_NOTIFY_NOW); @@ -2827,7 +2841,8 @@ static bool bond_ab_arp_probe(struct bonding *bond) * up when it is actually down */ if (!bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) { - bond_set_slave_link_state(slave, BOND_LINK_DOWN); + bond_set_slave_link_state(slave, BOND_LINK_DOWN, + BOND_SLAVE_NOTIFY_LATER); if (slave->link_failure_count < UINT_MAX) slave->link_failure_count++; @@ -2847,7 +2862,8 @@ static bool bond_ab_arp_probe(struct bonding *bond) if (!new_slave) goto check_state; - bond_set_slave_link_state(new_slave, BOND_LINK_BACK); + bond_set_slave_link_state(new_slave, BOND_LINK_BACK, + BOND_SLAVE_NOTIFY_LATER); bond_set_slave_active_flags(new_slave, BOND_SLAVE_NOTIFY_LATER); bond_arp_send_all(bond, new_slave); new_slave->last_link_up = jiffies; @@ -2855,7 +2871,7 @@ static bool bond_ab_arp_probe(struct bonding *bond) check_state: bond_for_each_slave_rcu(bond, slave, iter) { - if (slave->should_notify) { + if (slave->should_notify || slave->should_notify_link) { should_notify_rtnl = BOND_SLAVE_NOTIFY_NOW; break; } @@ -2910,8 +2926,10 @@ re_arm: if (should_notify_peers) call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); - if (should_notify_rtnl) + if (should_notify_rtnl) { bond_slave_state_notify(bond); + bond_slave_link_notify(bond); + } rtnl_unlock(); } diff --git a/include/net/bonding.h b/include/net/bonding.h index c1740a2794a3..1df437715e2f 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -165,7 +165,8 @@ struct slave { u8 backup:1, /* indicates backup slave. Value corresponds with BOND_STATE_ACTIVE and BOND_STATE_BACKUP */ inactive:1, /* indicates inactive slave */ - should_notify:1; /* indicateds whether the state changed */ + should_notify:1, /* indicates whether the state changed */ + should_notify_link:1; /* indicates whether the link changed */ u8 duplex; u32 original_mtu; u32 link_failure_count; @@ -504,10 +505,35 @@ static inline bool bond_is_slave_inactive(struct slave *slave) return slave->inactive; } -static inline void bond_set_slave_link_state(struct slave *slave, int state) +static inline void bond_set_slave_link_state(struct slave *slave, int state, + bool notify) { + if (slave->link == state) + return; + slave->link = state; - bond_queue_slave_event(slave); + if (notify) { + bond_queue_slave_event(slave); + slave->should_notify_link = 0; + } else { + if (slave->should_notify_link) + slave->should_notify_link = 0; + else + slave->should_notify_link = 1; + } +} + +static inline void bond_slave_link_notify(struct bonding *bond) +{ + struct list_head *iter; + struct slave *tmp; + + bond_for_each_slave(bond, tmp, iter) { + if (tmp->should_notify_link) { + bond_queue_slave_event(tmp); + tmp->should_notify_link = 0; + } + } } static inline __be32 bond_confirm_addr(struct net_device *dev, __be32 dst, __be32 local) From f7c7eb7f7af7f87e0fc150994785fd139576e43a Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:20 +0100 Subject: [PATCH 18/28] bonding: implement lower state change propagation Let netdev notifier listeners know about link and slave state change. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 10 ++++++++++ include/net/bonding.h | 7 +++++++ 2 files changed, 17 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 7695490061ec..2f1145063c60 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1317,6 +1317,16 @@ void bond_queue_slave_event(struct slave *slave) queue_delayed_work(slave->bond->wq, &nnw->work, 0); } +void bond_lower_state_changed(struct slave *slave) +{ + struct netdev_lag_lower_state_info info; + + info.link_up = slave->link == BOND_LINK_UP || + slave->link == BOND_LINK_FAIL; + info.tx_enabled = bond_is_active_slave(slave); + netdev_lower_state_changed(slave->dev, &info); +} + /* enslave device to bond device */ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) { diff --git a/include/net/bonding.h b/include/net/bonding.h index 1df437715e2f..ee6c52053aa3 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -247,6 +247,7 @@ struct bonding { ((struct slave *) rtnl_dereference(dev->rx_handler_data)) void bond_queue_slave_event(struct slave *slave); +void bond_lower_state_changed(struct slave *slave); struct bond_vlan_tag { __be16 vlan_proto; @@ -328,6 +329,7 @@ static inline void bond_set_active_slave(struct slave *slave) if (slave->backup) { slave->backup = 0; bond_queue_slave_event(slave); + bond_lower_state_changed(slave); rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); } } @@ -337,6 +339,7 @@ static inline void bond_set_backup_slave(struct slave *slave) if (!slave->backup) { slave->backup = 1; bond_queue_slave_event(slave); + bond_lower_state_changed(slave); rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); } } @@ -349,6 +352,7 @@ static inline void bond_set_slave_state(struct slave *slave, slave->backup = slave_state; if (notify) { + bond_lower_state_changed(slave); rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); bond_queue_slave_event(slave); slave->should_notify = 0; @@ -380,6 +384,7 @@ static inline void bond_slave_state_notify(struct bonding *bond) bond_for_each_slave(bond, tmp, iter) { if (tmp->should_notify) { + bond_lower_state_changed(tmp); rtmsg_ifinfo(RTM_NEWLINK, tmp->dev, 0, GFP_ATOMIC); tmp->should_notify = 0; } @@ -514,6 +519,7 @@ static inline void bond_set_slave_link_state(struct slave *slave, int state, slave->link = state; if (notify) { bond_queue_slave_event(slave); + bond_lower_state_changed(slave); slave->should_notify_link = 0; } else { if (slave->should_notify_link) @@ -531,6 +537,7 @@ static inline void bond_slave_link_notify(struct bonding *bond) bond_for_each_slave(bond, tmp, iter) { if (tmp->should_notify_link) { bond_queue_slave_event(tmp); + bond_lower_state_changed(tmp); tmp->should_notify_link = 0; } } From 57beaca8ecd77c85087270ef15ff319767994f6d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:21 +0100 Subject: [PATCH 19/28] bonding: set inactive flags on release Be correct and symmetric to enslave and set inactive flags during release. That gives LAG offload drivers - lower state change listeners - possibility to do proper cleanup. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 2f1145063c60..5a7de43a09f8 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1832,6 +1832,8 @@ static int __bond_release_one(struct net_device *bond_dev, return -EINVAL; } + bond_set_slave_inactive_flags(slave, BOND_SLAVE_NOTIFY_NOW); + bond_sysfs_slave_del(slave); /* recompute stats just before removing the slave */ From c5b9b518adab8641d9e718e629c88698cc35e7fa Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:22 +0100 Subject: [PATCH 20/28] mlxsw: spectrum: Add set_rx_mode ndo stub Add just a stub for now. This allows to pass check in dev_ifsioc, SIOCADDMULTI and SIOCDELMULTI cases. Teamd is using these to add LACP slow MAC. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 14a9a9ff89ed..a397cc18693d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -417,6 +417,10 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } +static void mlxsw_sp_set_rx_mode(struct net_device *dev) +{ +} + static int mlxsw_sp_port_set_mac_address(struct net_device *dev, void *p) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); @@ -725,6 +729,7 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_open = mlxsw_sp_port_open, .ndo_stop = mlxsw_sp_port_stop, .ndo_start_xmit = mlxsw_sp_port_xmit, + .ndo_set_rx_mode = mlxsw_sp_set_rx_mode, .ndo_set_mac_address = mlxsw_sp_port_set_mac_address, .ndo_change_mtu = mlxsw_sp_port_change_mtu, .ndo_get_stats64 = mlxsw_sp_port_get_stats64, From 8060646a0fd1b1685a16694f2eb2f461e5eb46cc Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:23 +0100 Subject: [PATCH 21/28] mlxsw: core: Add support for packets received from LAG port Lower layer (pci) has information if the packet is received via LAG port. If that is the case, it fills up rx_info accordingly. However upper layer does not care about lag_id/port_index for received packets so convert it to local_port before passing it up. For that conversion, lag mapping array is introduced. Upper layer is responsible for setting up the mapping according to what is set in HW. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.c | 79 +++++++++++++++++++++- drivers/net/ethernet/mellanox/mlxsw/core.h | 14 +++- drivers/net/ethernet/mellanox/mlxsw/pci.c | 4 +- 3 files changed, 92 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 1ecb4aabbdc7..af8a48b3b3ad 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -105,6 +105,9 @@ struct mlxsw_core { struct debugfs_blob_wrapper vsd_blob; struct debugfs_blob_wrapper psid_blob; } dbg; + struct { + u8 *mapping; /* lag_id+port_index to local_port mapping */ + } lag; struct mlxsw_hwmon *hwmon; unsigned long driver_priv[0]; /* driver_priv has to be always the last item */ @@ -815,6 +818,17 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, goto err_alloc_stats; } + if (mlxsw_driver->profile->used_max_lag && + mlxsw_driver->profile->used_max_port_per_lag) { + alloc_size = sizeof(u8) * mlxsw_driver->profile->max_lag * + mlxsw_driver->profile->max_port_per_lag; + mlxsw_core->lag.mapping = kzalloc(alloc_size, GFP_KERNEL); + if (!mlxsw_core->lag.mapping) { + err = -ENOMEM; + goto err_alloc_lag_mapping; + } + } + err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile); if (err) goto err_bus_init; @@ -847,6 +861,8 @@ err_hwmon_init: err_emad_init: mlxsw_bus->fini(bus_priv); err_bus_init: + kfree(mlxsw_core->lag.mapping); +err_alloc_lag_mapping: free_percpu(mlxsw_core->pcpu_stats); err_alloc_stats: kfree(mlxsw_core); @@ -865,6 +881,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core) mlxsw_hwmon_fini(mlxsw_core->hwmon); mlxsw_emad_fini(mlxsw_core); mlxsw_core->bus->fini(mlxsw_core->bus_priv); + kfree(mlxsw_core->lag.mapping); free_percpu(mlxsw_core->pcpu_stats); kfree(mlxsw_core); mlxsw_core_driver_put(device_kind); @@ -1196,11 +1213,25 @@ void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, struct mlxsw_rx_listener_item *rxl_item; const struct mlxsw_rx_listener *rxl; struct mlxsw_core_pcpu_stats *pcpu_stats; - u8 local_port = rx_info->sys_port; + u8 local_port; bool found = false; - dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: sys_port = %d, trap_id = 0x%x\n", - __func__, rx_info->sys_port, rx_info->trap_id); + if (rx_info->is_lag) { + dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: lag_id = %d, lag_port_index = 0x%x\n", + __func__, rx_info->u.lag_id, + rx_info->trap_id); + /* Upper layer does not care if the skb came from LAG or not, + * so just get the local_port for the lag port and push it up. + */ + local_port = mlxsw_core_lag_mapping_get(mlxsw_core, + rx_info->u.lag_id, + rx_info->lag_port_index); + } else { + local_port = rx_info->u.sys_port; + } + + dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: local_port = %d, trap_id = 0x%x\n", + __func__, local_port, rx_info->trap_id); if ((rx_info->trap_id >= MLXSW_TRAP_ID_MAX) || (local_port >= MLXSW_PORT_MAX_PORTS)) @@ -1244,6 +1275,48 @@ drop: } EXPORT_SYMBOL(mlxsw_core_skb_receive); +static int mlxsw_core_lag_mapping_index(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index) +{ + return mlxsw_core->driver->profile->max_port_per_lag * lag_id + + port_index; +} + +void mlxsw_core_lag_mapping_set(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index, u8 local_port) +{ + int index = mlxsw_core_lag_mapping_index(mlxsw_core, + lag_id, port_index); + + mlxsw_core->lag.mapping[index] = local_port; +} +EXPORT_SYMBOL(mlxsw_core_lag_mapping_set); + +u8 mlxsw_core_lag_mapping_get(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index) +{ + int index = mlxsw_core_lag_mapping_index(mlxsw_core, + lag_id, port_index); + + return mlxsw_core->lag.mapping[index]; +} +EXPORT_SYMBOL(mlxsw_core_lag_mapping_get); + +void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 local_port) +{ + int i; + + for (i = 0; i < mlxsw_core->driver->profile->max_port_per_lag; i++) { + int index = mlxsw_core_lag_mapping_index(mlxsw_core, + lag_id, i); + + if (mlxsw_core->lag.mapping[index] == local_port) + mlxsw_core->lag.mapping[index] = 0; + } +} +EXPORT_SYMBOL(mlxsw_core_lag_mapping_clear); + int mlxsw_cmd_exec(struct mlxsw_core *mlxsw_core, u16 opcode, u8 opcode_mod, u32 in_mod, bool out_mbox_direct, char *in_mbox, size_t in_mbox_size, diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 5ac952956d55..4833fb33ce07 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -112,13 +112,25 @@ int mlxsw_reg_write(struct mlxsw_core *mlxsw_core, const struct mlxsw_reg_info *reg, char *payload); struct mlxsw_rx_info { - u16 sys_port; + bool is_lag; + union { + u16 sys_port; + u16 lag_id; + } u; + u8 lag_port_index; int trap_id; }; void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, struct mlxsw_rx_info *rx_info); +void mlxsw_core_lag_mapping_set(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index, u8 local_port); +u8 mlxsw_core_lag_mapping_get(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index); +void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 local_port); + #define MLXSW_CONFIG_PROFILE_SWID_COUNT 8 struct mlxsw_swid_config { diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index de69e719dc9d..8ca66a01b097 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -690,7 +690,9 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, if (mlxsw_pci_cqe_lag_get(cqe)) goto drop; - rx_info.sys_port = mlxsw_pci_cqe_system_port_get(cqe); + rx_info.is_lag = false; + rx_info.u.sys_port = mlxsw_pci_cqe_system_port_get(cqe); + rx_info.trap_id = mlxsw_pci_cqe_trap_id_get(cqe); byte_count = mlxsw_pci_cqe_byte_count_get(cqe); From d2292e8761be2346732e3cc562ad2f38a19c325e Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:24 +0100 Subject: [PATCH 22/28] mlxsw: pci: Implement LAG processing for received packets Completion queue element for receive queue provides information if the packet was received via LAG port. Extract this info and pass it along to core. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 19 ++++++++----------- drivers/net/ethernet/mellanox/mlxsw/pci.h | 4 +++- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 8ca66a01b097..d2102e572b1d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -686,12 +686,14 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, if (q->consumer_counter++ != consumer_counter_limit) dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n"); - /* We do not support lag now */ - if (mlxsw_pci_cqe_lag_get(cqe)) - goto drop; - - rx_info.is_lag = false; - rx_info.u.sys_port = mlxsw_pci_cqe_system_port_get(cqe); + if (mlxsw_pci_cqe_lag_get(cqe)) { + rx_info.is_lag = true; + rx_info.u.lag_id = mlxsw_pci_cqe_lag_id_get(cqe); + rx_info.lag_port_index = mlxsw_pci_cqe_lag_port_index_get(cqe); + } else { + rx_info.is_lag = false; + rx_info.u.sys_port = mlxsw_pci_cqe_system_port_get(cqe); + } rx_info.trap_id = mlxsw_pci_cqe_trap_id_get(cqe); @@ -701,7 +703,6 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, skb_put(skb, byte_count); mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info); -put_new_skb: memset(wqe, 0, q->elem_size); err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info); if (err) @@ -710,10 +711,6 @@ put_new_skb: q->producer_counter++; mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); return; - -drop: - dev_kfree_skb_any(skb); - goto put_new_skb; } static char *mlxsw_pci_cq_sw_cqe_get(struct mlxsw_pci_queue *q) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.h b/drivers/net/ethernet/mellanox/mlxsw/pci.h index 142f33d978c5..912106054ff2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.h @@ -129,13 +129,15 @@ MLXSW_ITEM64_INDEXED(pci, wqe, address, 0x08, 0, 64, 0x8, 0x0, false); */ MLXSW_ITEM32(pci, cqe, lag, 0x00, 23, 1); -/* pci_cqe_system_port +/* pci_cqe_system_port/lag_id * When lag=0: System port on which the packet was received * When lag=1: * bits [15:4] LAG ID on which the packet was received * bits [3:0] sub_port on which the packet was received */ MLXSW_ITEM32(pci, cqe, system_port, 0x00, 0, 16); +MLXSW_ITEM32(pci, cqe, lag_id, 0x00, 4, 12); +MLXSW_ITEM32(pci, cqe, lag_port_index, 0x00, 0, 4); /* pci_cqe_wqe_counter * WQE count of the WQEs completed on the associated dqn From d1d40be084c60f2ba2ca369dedd27b12ae840aa1 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:25 +0100 Subject: [PATCH 23/28] mlxsw: reg: Add link aggregation configuration registers definitions Add definitions of SLDR, SLCR2, SLCOR registers that are used to configure LAG. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 293 ++++++++++++++++++++++ 1 file changed, 293 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index f89419393a7f..049abbb8bcc2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -865,6 +865,293 @@ static inline void mlxsw_reg_sftr_pack(char *payload, mlxsw_reg_sftr_port_mask_set(payload, port, 1); } +/* SLDR - Switch LAG Descriptor Register + * ----------------------------------------- + * The switch LAG descriptor register is populated by LAG descriptors. + * Each LAG descriptor is indexed by lag_id. The LAG ID runs from 0 to + * max_lag-1. + */ +#define MLXSW_REG_SLDR_ID 0x2014 +#define MLXSW_REG_SLDR_LEN 0x0C /* counting in only one port in list */ + +static const struct mlxsw_reg_info mlxsw_reg_sldr = { + .id = MLXSW_REG_SLDR_ID, + .len = MLXSW_REG_SLDR_LEN, +}; + +enum mlxsw_reg_sldr_op { + /* Indicates a creation of a new LAG-ID, lag_id must be valid */ + MLXSW_REG_SLDR_OP_LAG_CREATE, + MLXSW_REG_SLDR_OP_LAG_DESTROY, + /* Ports that appear in the list have the Distributor enabled */ + MLXSW_REG_SLDR_OP_LAG_ADD_PORT_LIST, + /* Removes ports from the disributor list */ + MLXSW_REG_SLDR_OP_LAG_REMOVE_PORT_LIST, +}; + +/* reg_sldr_op + * Operation. + * Access: RW + */ +MLXSW_ITEM32(reg, sldr, op, 0x00, 29, 3); + +/* reg_sldr_lag_id + * LAG identifier. The lag_id is the index into the LAG descriptor table. + * Access: Index + */ +MLXSW_ITEM32(reg, sldr, lag_id, 0x00, 0, 10); + +static inline void mlxsw_reg_sldr_lag_create_pack(char *payload, u8 lag_id) +{ + MLXSW_REG_ZERO(sldr, payload); + mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_CREATE); + mlxsw_reg_sldr_lag_id_set(payload, lag_id); +} + +static inline void mlxsw_reg_sldr_lag_destroy_pack(char *payload, u8 lag_id) +{ + MLXSW_REG_ZERO(sldr, payload); + mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_DESTROY); + mlxsw_reg_sldr_lag_id_set(payload, lag_id); +} + +/* reg_sldr_num_ports + * The number of member ports of the LAG. + * Reserved for Create / Destroy operations + * For Add / Remove operations - indicates the number of ports in the list. + * Access: RW + */ +MLXSW_ITEM32(reg, sldr, num_ports, 0x04, 24, 8); + +/* reg_sldr_system_port + * System port. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sldr, system_port, 0x08, 0, 16, 4, 0, false); + +static inline void mlxsw_reg_sldr_lag_add_port_pack(char *payload, u8 lag_id, + u8 local_port) +{ + MLXSW_REG_ZERO(sldr, payload); + mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_ADD_PORT_LIST); + mlxsw_reg_sldr_lag_id_set(payload, lag_id); + mlxsw_reg_sldr_num_ports_set(payload, 1); + mlxsw_reg_sldr_system_port_set(payload, 0, local_port); +} + +static inline void mlxsw_reg_sldr_lag_remove_port_pack(char *payload, u8 lag_id, + u8 local_port) +{ + MLXSW_REG_ZERO(sldr, payload); + mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_REMOVE_PORT_LIST); + mlxsw_reg_sldr_lag_id_set(payload, lag_id); + mlxsw_reg_sldr_num_ports_set(payload, 1); + mlxsw_reg_sldr_system_port_set(payload, 0, local_port); +} + +/* SLCR - Switch LAG Configuration 2 Register + * ------------------------------------------- + * The Switch LAG Configuration register is used for configuring the + * LAG properties of the switch. + */ +#define MLXSW_REG_SLCR_ID 0x2015 +#define MLXSW_REG_SLCR_LEN 0x10 + +static const struct mlxsw_reg_info mlxsw_reg_slcr = { + .id = MLXSW_REG_SLCR_ID, + .len = MLXSW_REG_SLCR_LEN, +}; + +enum mlxsw_reg_slcr_pp { + /* Global Configuration (for all ports) */ + MLXSW_REG_SLCR_PP_GLOBAL, + /* Per port configuration, based on local_port field */ + MLXSW_REG_SLCR_PP_PER_PORT, +}; + +/* reg_slcr_pp + * Per Port Configuration + * Note: Reading at Global mode results in reading port 1 configuration. + * Access: Index + */ +MLXSW_ITEM32(reg, slcr, pp, 0x00, 24, 1); + +/* reg_slcr_local_port + * Local port number + * Supported from CPU port + * Not supported from router port + * Reserved when pp = Global Configuration + * Access: Index + */ +MLXSW_ITEM32(reg, slcr, local_port, 0x00, 16, 8); + +enum mlxsw_reg_slcr_type { + MLXSW_REG_SLCR_TYPE_CRC, /* default */ + MLXSW_REG_SLCR_TYPE_XOR, + MLXSW_REG_SLCR_TYPE_RANDOM, +}; + +/* reg_slcr_type + * Hash type + * Access: RW + */ +MLXSW_ITEM32(reg, slcr, type, 0x00, 0, 4); + +/* Ingress port */ +#define MLXSW_REG_SLCR_LAG_HASH_IN_PORT BIT(0) +/* SMAC - for IPv4 and IPv6 packets */ +#define MLXSW_REG_SLCR_LAG_HASH_SMAC_IP BIT(1) +/* SMAC - for non-IP packets */ +#define MLXSW_REG_SLCR_LAG_HASH_SMAC_NONIP BIT(2) +#define MLXSW_REG_SLCR_LAG_HASH_SMAC \ + (MLXSW_REG_SLCR_LAG_HASH_SMAC_IP | \ + MLXSW_REG_SLCR_LAG_HASH_SMAC_NONIP) +/* DMAC - for IPv4 and IPv6 packets */ +#define MLXSW_REG_SLCR_LAG_HASH_DMAC_IP BIT(3) +/* DMAC - for non-IP packets */ +#define MLXSW_REG_SLCR_LAG_HASH_DMAC_NONIP BIT(4) +#define MLXSW_REG_SLCR_LAG_HASH_DMAC \ + (MLXSW_REG_SLCR_LAG_HASH_DMAC_IP | \ + MLXSW_REG_SLCR_LAG_HASH_DMAC_NONIP) +/* Ethertype - for IPv4 and IPv6 packets */ +#define MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_IP BIT(5) +/* Ethertype - for non-IP packets */ +#define MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_NONIP BIT(6) +#define MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE \ + (MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_IP | \ + MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_NONIP) +/* VLAN ID - for IPv4 and IPv6 packets */ +#define MLXSW_REG_SLCR_LAG_HASH_VLANID_IP BIT(7) +/* VLAN ID - for non-IP packets */ +#define MLXSW_REG_SLCR_LAG_HASH_VLANID_NONIP BIT(8) +#define MLXSW_REG_SLCR_LAG_HASH_VLANID \ + (MLXSW_REG_SLCR_LAG_HASH_VLANID_IP | \ + MLXSW_REG_SLCR_LAG_HASH_VLANID_NONIP) +/* Source IP address (can be IPv4 or IPv6) */ +#define MLXSW_REG_SLCR_LAG_HASH_SIP BIT(9) +/* Destination IP address (can be IPv4 or IPv6) */ +#define MLXSW_REG_SLCR_LAG_HASH_DIP BIT(10) +/* TCP/UDP source port */ +#define MLXSW_REG_SLCR_LAG_HASH_SPORT BIT(11) +/* TCP/UDP destination port*/ +#define MLXSW_REG_SLCR_LAG_HASH_DPORT BIT(12) +/* IPv4 Protocol/IPv6 Next Header */ +#define MLXSW_REG_SLCR_LAG_HASH_IPPROTO BIT(13) +/* IPv6 Flow label */ +#define MLXSW_REG_SLCR_LAG_HASH_FLOWLABEL BIT(14) +/* SID - FCoE source ID */ +#define MLXSW_REG_SLCR_LAG_HASH_FCOE_SID BIT(15) +/* DID - FCoE destination ID */ +#define MLXSW_REG_SLCR_LAG_HASH_FCOE_DID BIT(16) +/* OXID - FCoE originator exchange ID */ +#define MLXSW_REG_SLCR_LAG_HASH_FCOE_OXID BIT(17) +/* Destination QP number - for RoCE packets */ +#define MLXSW_REG_SLCR_LAG_HASH_ROCE_DQP BIT(19) + +/* reg_slcr_lag_hash + * LAG hashing configuration. This is a bitmask, in which each set + * bit includes the corresponding item in the LAG hash calculation. + * The default lag_hash contains SMAC, DMAC, VLANID and + * Ethertype (for all packet types). + * Access: RW + */ +MLXSW_ITEM32(reg, slcr, lag_hash, 0x04, 0, 20); + +static inline void mlxsw_reg_slcr_pack(char *payload, u16 lag_hash) +{ + MLXSW_REG_ZERO(slcr, payload); + mlxsw_reg_slcr_pp_set(payload, MLXSW_REG_SLCR_PP_GLOBAL); + mlxsw_reg_slcr_type_set(payload, MLXSW_REG_SLCR_TYPE_XOR); + mlxsw_reg_slcr_lag_hash_set(payload, lag_hash); +} + +/* SLCOR - Switch LAG Collector Register + * ------------------------------------- + * The Switch LAG Collector register controls the Local Port membership + * in a LAG and enablement of the collector. + */ +#define MLXSW_REG_SLCOR_ID 0x2016 +#define MLXSW_REG_SLCOR_LEN 0x10 + +static const struct mlxsw_reg_info mlxsw_reg_slcor = { + .id = MLXSW_REG_SLCOR_ID, + .len = MLXSW_REG_SLCOR_LEN, +}; + +enum mlxsw_reg_slcor_col { + /* Port is added with collector disabled */ + MLXSW_REG_SLCOR_COL_LAG_ADD_PORT, + MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_ENABLED, + MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_DISABLED, + MLXSW_REG_SLCOR_COL_LAG_REMOVE_PORT, +}; + +/* reg_slcor_col + * Collector configuration + * Access: RW + */ +MLXSW_ITEM32(reg, slcor, col, 0x00, 30, 2); + +/* reg_slcor_local_port + * Local port number + * Not supported for CPU port + * Access: Index + */ +MLXSW_ITEM32(reg, slcor, local_port, 0x00, 16, 8); + +/* reg_slcor_lag_id + * LAG Identifier. Index into the LAG descriptor table. + * Access: Index + */ +MLXSW_ITEM32(reg, slcor, lag_id, 0x00, 0, 10); + +/* reg_slcor_port_index + * Port index in the LAG list. Only valid on Add Port to LAG col. + * Valid range is from 0 to cap_max_lag_members-1 + * Access: RW + */ +MLXSW_ITEM32(reg, slcor, port_index, 0x04, 0, 10); + +static inline void mlxsw_reg_slcor_pack(char *payload, + u8 local_port, u16 lag_id, + enum mlxsw_reg_slcor_col col) +{ + MLXSW_REG_ZERO(slcor, payload); + mlxsw_reg_slcor_col_set(payload, col); + mlxsw_reg_slcor_local_port_set(payload, local_port); + mlxsw_reg_slcor_lag_id_set(payload, lag_id); +} + +static inline void mlxsw_reg_slcor_port_add_pack(char *payload, + u8 local_port, u16 lag_id, + u8 port_index) +{ + mlxsw_reg_slcor_pack(payload, local_port, lag_id, + MLXSW_REG_SLCOR_COL_LAG_ADD_PORT); + mlxsw_reg_slcor_port_index_set(payload, port_index); +} + +static inline void mlxsw_reg_slcor_port_remove_pack(char *payload, + u8 local_port, u16 lag_id) +{ + mlxsw_reg_slcor_pack(payload, local_port, lag_id, + MLXSW_REG_SLCOR_COL_LAG_REMOVE_PORT); +} + +static inline void mlxsw_reg_slcor_col_enable_pack(char *payload, + u8 local_port, u16 lag_id) +{ + mlxsw_reg_slcor_pack(payload, local_port, lag_id, + MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_ENABLED); +} + +static inline void mlxsw_reg_slcor_col_disable_pack(char *payload, + u8 local_port, u16 lag_id) +{ + mlxsw_reg_slcor_pack(payload, local_port, lag_id, + MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_ENABLED); +} + /* SPMLR - Switch Port MAC Learning Register * ----------------------------------------- * Controls the Switch MAC learning policy per port. @@ -2653,6 +2940,12 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "SFGC"; case MLXSW_REG_SFTR_ID: return "SFTR"; + case MLXSW_REG_SLDR_ID: + return "SLDR"; + case MLXSW_REG_SLCR_ID: + return "SLCR"; + case MLXSW_REG_SLCOR_ID: + return "SLCOR"; case MLXSW_REG_SPMLR_ID: return "SPMLR"; case MLXSW_REG_SVFA_ID: From e4bfbae29a1978a855f7c35bbcab1fe7ed0602d2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:26 +0100 Subject: [PATCH 24/28] mlxsw: reg: Add definition of LAG unicast record for SFD register LAG-related records have specific format in SFD register. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 79 ++++++++++++++++++++--- 1 file changed, 71 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 049abbb8bcc2..431b28e2b420 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -286,6 +286,7 @@ MLXSW_ITEM32_INDEXED(reg, sfd, rec_swid, MLXSW_REG_SFD_BASE_LEN, 24, 8, enum mlxsw_reg_sfd_rec_type { MLXSW_REG_SFD_REC_TYPE_UNICAST = 0x0, + MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG = 0x1, }; /* reg_sfd_rec_type @@ -376,24 +377,34 @@ MLXSW_ITEM32_INDEXED(reg, sfd, uc_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16, MLXSW_ITEM32_INDEXED(reg, sfd, uc_system_port, MLXSW_REG_SFD_BASE_LEN, 0, 16, MLXSW_REG_SFD_REC_LEN, 0x0C, false); -static inline void mlxsw_reg_sfd_uc_pack(char *payload, int rec_index, - enum mlxsw_reg_sfd_rec_policy policy, - const char *mac, u16 vid, - enum mlxsw_reg_sfd_rec_action action, - u8 local_port) +static inline void mlxsw_reg_sfd_rec_pack(char *payload, int rec_index, + enum mlxsw_reg_sfd_rec_type rec_type, + enum mlxsw_reg_sfd_rec_policy policy, + const char *mac, + enum mlxsw_reg_sfd_rec_action action) { u8 num_rec = mlxsw_reg_sfd_num_rec_get(payload); if (rec_index >= num_rec) mlxsw_reg_sfd_num_rec_set(payload, rec_index + 1); mlxsw_reg_sfd_rec_swid_set(payload, rec_index, 0); - mlxsw_reg_sfd_rec_type_set(payload, rec_index, - MLXSW_REG_SFD_REC_TYPE_UNICAST); + mlxsw_reg_sfd_rec_type_set(payload, rec_index, rec_type); mlxsw_reg_sfd_rec_policy_set(payload, rec_index, policy); mlxsw_reg_sfd_rec_mac_memcpy_to(payload, rec_index, mac); + mlxsw_reg_sfd_rec_action_set(payload, rec_index, action); +} + +static inline void mlxsw_reg_sfd_uc_pack(char *payload, int rec_index, + enum mlxsw_reg_sfd_rec_policy policy, + const char *mac, u16 vid, + enum mlxsw_reg_sfd_rec_action action, + u8 local_port) +{ + mlxsw_reg_sfd_rec_pack(payload, rec_index, + MLXSW_REG_SFD_REC_TYPE_UNICAST, + policy, mac, action); mlxsw_reg_sfd_uc_sub_port_set(payload, rec_index, 0); mlxsw_reg_sfd_uc_fid_vid_set(payload, rec_index, vid); - mlxsw_reg_sfd_rec_action_set(payload, rec_index, action); mlxsw_reg_sfd_uc_system_port_set(payload, rec_index, local_port); } @@ -406,6 +417,58 @@ static inline void mlxsw_reg_sfd_uc_unpack(char *payload, int rec_index, *p_local_port = mlxsw_reg_sfd_uc_system_port_get(payload, rec_index); } +/* reg_sfd_uc_lag_sub_port + * LAG sub port. + * Must be 0 if multichannel VEPA is not enabled. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_sub_port, MLXSW_REG_SFD_BASE_LEN, 16, 8, + MLXSW_REG_SFD_REC_LEN, 0x08, false); + +/* reg_sfd_uc_lag_fid_vid + * Filtering ID or VLAN ID + * For SwitchX and SwitchX-2: + * - Dynamic entries (policy 2,3) use FID + * - Static entries (policy 0) use VID + * - When independent learning is configured, VID=FID + * For Spectrum: use FID for both Dynamic and Static entries. + * VID should not be used. + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16, + MLXSW_REG_SFD_REC_LEN, 0x08, false); + +/* reg_sfd_uc_lag_lag_id + * LAG Identifier - pointer into the LAG descriptor table. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_lag_id, MLXSW_REG_SFD_BASE_LEN, 0, 10, + MLXSW_REG_SFD_REC_LEN, 0x0C, false); + +static inline void +mlxsw_reg_sfd_uc_lag_pack(char *payload, int rec_index, + enum mlxsw_reg_sfd_rec_policy policy, + const char *mac, u16 vid, + enum mlxsw_reg_sfd_rec_action action, + u16 lag_id) +{ + mlxsw_reg_sfd_rec_pack(payload, rec_index, + MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG, + policy, mac, action); + mlxsw_reg_sfd_uc_lag_sub_port_set(payload, rec_index, 0); + mlxsw_reg_sfd_uc_lag_fid_vid_set(payload, rec_index, vid); + mlxsw_reg_sfd_uc_lag_lag_id_set(payload, rec_index, lag_id); +} + +static inline void mlxsw_reg_sfd_uc_lag_unpack(char *payload, int rec_index, + char *mac, u16 *p_vid, + u16 *p_lag_id) +{ + mlxsw_reg_sfd_rec_mac_memcpy_from(payload, rec_index, mac); + *p_vid = mlxsw_reg_sfd_uc_lag_fid_vid_get(payload, rec_index); + *p_lag_id = mlxsw_reg_sfd_uc_lag_lag_id_get(payload, rec_index); +} + /* SFN - Switch FDB Notification Register * ------------------------------------------- * The switch provides notifications on newly learned FDB entries and From 3b71571c0123ec52c5b73233b4f012fa59aed61a Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:27 +0100 Subject: [PATCH 25/28] mlxsw: reg: Add definition of LAG unicast record for SFN register LAG-related records have specific format in SFN register. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 431b28e2b420..4e4e4dcf054f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -519,8 +519,12 @@ MLXSW_ITEM32_INDEXED(reg, sfn, rec_swid, MLXSW_REG_SFN_BASE_LEN, 24, 8, enum mlxsw_reg_sfn_rec_type { /* MAC addresses learned on a regular port. */ MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC = 0x5, - /* Aged-out MAC address on a regular port */ + /* MAC addresses learned on a LAG port. */ + MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC_LAG = 0x6, + /* Aged-out MAC address on a regular port. */ MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC = 0x7, + /* Aged-out MAC address on a LAG port. */ + MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC_LAG = 0x8, }; /* reg_sfn_rec_type @@ -568,6 +572,22 @@ static inline void mlxsw_reg_sfn_mac_unpack(char *payload, int rec_index, *p_local_port = mlxsw_reg_sfn_mac_system_port_get(payload, rec_index); } +/* reg_sfn_mac_lag_lag_id + * LAG ID (pointer into the LAG descriptor table). + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sfn, mac_lag_lag_id, MLXSW_REG_SFN_BASE_LEN, 0, 10, + MLXSW_REG_SFN_REC_LEN, 0x0C, false); + +static inline void mlxsw_reg_sfn_mac_lag_unpack(char *payload, int rec_index, + char *mac, u16 *p_vid, + u16 *p_lag_id) +{ + mlxsw_reg_sfn_rec_mac_memcpy_from(payload, rec_index, mac); + *p_vid = mlxsw_reg_sfn_mac_fid_get(payload, rec_index); + *p_lag_id = mlxsw_reg_sfn_mac_lag_lag_id_get(payload, rec_index); +} + /* SPMS - Switch Port MSTP/RSTP State Register * ------------------------------------------- * Configures the spanning tree state of a physical port. From 0d65fc13042fce6a2d6de58ff0dc9531e8523c07 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:28 +0100 Subject: [PATCH 26/28] mlxsw: spectrum: Implement LAG port join/leave Implement basic procedures for joining/leaving port to/from LAG. That includes HW setup of collector, core LAG mapping setup. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 288 +++++++++++++++++- .../net/ethernet/mellanox/mlxsw/spectrum.h | 35 ++- 2 files changed, 306 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index a397cc18693d..7d1f0a82a090 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1712,6 +1712,22 @@ static int mlxsw_sp_flood_init(struct mlxsw_sp *mlxsw_sp) return 0; } +static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp) +{ + char slcr_pl[MLXSW_REG_SLCR_LEN]; + + mlxsw_reg_slcr_pack(slcr_pl, MLXSW_REG_SLCR_LAG_HASH_SMAC | + MLXSW_REG_SLCR_LAG_HASH_DMAC | + MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE | + MLXSW_REG_SLCR_LAG_HASH_VLANID | + MLXSW_REG_SLCR_LAG_HASH_SIP | + MLXSW_REG_SLCR_LAG_HASH_DIP | + MLXSW_REG_SLCR_LAG_HASH_SPORT | + MLXSW_REG_SLCR_LAG_HASH_DPORT | + MLXSW_REG_SLCR_LAG_HASH_IPPROTO); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcr), slcr_pl); +} + static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core, const struct mlxsw_bus_info *mlxsw_bus_info) { @@ -1757,6 +1773,12 @@ static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core, goto err_buffers_init; } + err = mlxsw_sp_lag_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize LAG\n"); + goto err_lag_init; + } + err = mlxsw_sp_switchdev_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize switchdev\n"); @@ -1766,6 +1788,7 @@ static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core, return 0; err_switchdev_init: +err_lag_init: err_buffers_init: err_flood_init: mlxsw_sp_traps_fini(mlxsw_sp); @@ -1793,9 +1816,9 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = { .used_max_vepa_channels = 1, .max_vepa_channels = 0, .used_max_lag = 1, - .max_lag = 64, + .max_lag = MLXSW_SP_LAG_MAX, .used_max_port_per_lag = 1, - .max_port_per_lag = 16, + .max_port_per_lag = MLXSW_SP_PORT_PER_LAG_MAX, .used_max_mid = 1, .max_mid = 7000, .used_max_pgt = 1, @@ -1894,19 +1917,206 @@ static void mlxsw_sp_master_bridge_dec(struct mlxsw_sp *mlxsw_sp, mlxsw_sp->master_bridge.dev = NULL; } -static int mlxsw_sp_netdevice_event(struct notifier_block *unused, - unsigned long event, void *ptr) +static int mlxsw_sp_lag_create(struct mlxsw_sp *mlxsw_sp, u16 lag_id) +{ + char sldr_pl[MLXSW_REG_SLDR_LEN]; + + mlxsw_reg_sldr_lag_create_pack(sldr_pl, lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); +} + +static int mlxsw_sp_lag_destroy(struct mlxsw_sp *mlxsw_sp, u16 lag_id) +{ + char sldr_pl[MLXSW_REG_SLDR_LEN]; + + mlxsw_reg_sldr_lag_destroy_pack(sldr_pl, lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); +} + +static int mlxsw_sp_lag_col_port_add(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id, u8 port_index) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char slcor_pl[MLXSW_REG_SLCOR_LEN]; + + mlxsw_reg_slcor_port_add_pack(slcor_pl, mlxsw_sp_port->local_port, + lag_id, port_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl); +} + +static int mlxsw_sp_lag_col_port_remove(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char slcor_pl[MLXSW_REG_SLCOR_LEN]; + + mlxsw_reg_slcor_port_remove_pack(slcor_pl, mlxsw_sp_port->local_port, + lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl); +} + +static int mlxsw_sp_lag_col_port_enable(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char slcor_pl[MLXSW_REG_SLCOR_LEN]; + + mlxsw_reg_slcor_col_enable_pack(slcor_pl, mlxsw_sp_port->local_port, + lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl); +} + +static int mlxsw_sp_lag_col_port_disable(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char slcor_pl[MLXSW_REG_SLCOR_LEN]; + + mlxsw_reg_slcor_col_disable_pack(slcor_pl, mlxsw_sp_port->local_port, + lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl); +} + +static int mlxsw_sp_lag_index_get(struct mlxsw_sp *mlxsw_sp, + struct net_device *lag_dev, + u16 *p_lag_id) +{ + struct mlxsw_sp_upper *lag; + int free_lag_id = -1; + int i; + + for (i = 0; i < MLXSW_SP_LAG_MAX; i++) { + lag = mlxsw_sp_lag_get(mlxsw_sp, i); + if (lag->ref_count) { + if (lag->dev == lag_dev) { + *p_lag_id = i; + return 0; + } + } else if (free_lag_id < 0) { + free_lag_id = i; + } + } + if (free_lag_id < 0) + return -EBUSY; + *p_lag_id = free_lag_id; + return 0; +} + +static bool +mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp, + struct net_device *lag_dev, + struct netdev_lag_upper_info *lag_upper_info) +{ + u16 lag_id; + + if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) + return false; + if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) + return false; + return true; +} + +static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp, + u16 lag_id, u8 *p_port_index) +{ + int i; + + for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) { + if (!mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i)) { + *p_port_index = i; + return 0; + } + } + return -EBUSY; +} + +static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_upper *lag; + u16 lag_id; + u8 port_index; + int err; + + err = mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id); + if (err) + return err; + lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id); + if (!lag->ref_count) { + err = mlxsw_sp_lag_create(mlxsw_sp, lag_id); + if (err) + return err; + lag->dev = lag_dev; + } + + err = mlxsw_sp_port_lag_index_get(mlxsw_sp, lag_id, &port_index); + if (err) + return err; + err = mlxsw_sp_lag_col_port_add(mlxsw_sp_port, lag_id, port_index); + if (err) + goto err_col_port_add; + err = mlxsw_sp_lag_col_port_enable(mlxsw_sp_port, lag_id); + if (err) + goto err_col_port_enable; + + mlxsw_core_lag_mapping_set(mlxsw_sp->core, lag_id, port_index, + mlxsw_sp_port->local_port); + mlxsw_sp_port->lag_id = lag_id; + mlxsw_sp_port->lagged = 1; + lag->ref_count++; + return 0; + +err_col_port_add: + if (!lag->ref_count) + mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); +err_col_port_enable: + mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); + return err; +} + +static int mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_upper *lag; + u16 lag_id = mlxsw_sp_port->lag_id; + int err; + + if (!mlxsw_sp_port->lagged) + return 0; + lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id); + WARN_ON(lag->ref_count == 0); + + err = mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, lag_id); + if (err) + return err; + mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); + if (err) + return err; + + if (lag->ref_count == 1) { + err = mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); + if (err) + return err; + } + + mlxsw_core_lag_mapping_clear(mlxsw_sp->core, lag_id, + mlxsw_sp_port->local_port); + mlxsw_sp_port->lagged = 0; + lag->ref_count--; + return 0; +} + +static int mlxsw_sp_netdevice_port_event(struct net_device *dev, + unsigned long event, void *ptr) { - struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netdev_notifier_changeupper_info *info; struct mlxsw_sp_port *mlxsw_sp_port; struct net_device *upper_dev; struct mlxsw_sp *mlxsw_sp; int err; - if (!mlxsw_sp_port_dev_check(dev)) - return NOTIFY_DONE; - mlxsw_sp_port = netdev_priv(dev); mlxsw_sp = mlxsw_sp_port->mlxsw_sp; info = ptr; @@ -1914,16 +2124,22 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused, switch (event) { case NETDEV_PRECHANGEUPPER: upper_dev = info->upper_dev; + if (!info->master || !info->linking) + break; /* HW limitation forbids to put ports to multiple bridges. */ - if (info->master && info->linking && - netif_is_bridge_master(upper_dev) && + if (netif_is_bridge_master(upper_dev) && !mlxsw_sp_master_bridge_check(mlxsw_sp, upper_dev)) return NOTIFY_BAD; + if (netif_is_lag_master(upper_dev) && + !mlxsw_sp_master_lag_check(mlxsw_sp, upper_dev, + info->upper_info)) + return NOTIFY_BAD; break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; - if (info->master && - netif_is_bridge_master(upper_dev)) { + if (!info->master) + break; + if (netif_is_bridge_master(upper_dev)) { if (info->linking) { err = mlxsw_sp_port_bridge_join(mlxsw_sp_port); if (err) @@ -1937,6 +2153,22 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused, mlxsw_sp_port->bridged = 0; mlxsw_sp_master_bridge_dec(mlxsw_sp, upper_dev); } + } else if (netif_is_lag_master(upper_dev)) { + if (info->linking) { + err = mlxsw_sp_port_lag_join(mlxsw_sp_port, + upper_dev); + if (err) { + netdev_err(dev, "Failed to join link aggregation\n"); + return NOTIFY_BAD; + } + } else { + err = mlxsw_sp_port_lag_leave(mlxsw_sp_port, + upper_dev); + if (err) { + netdev_err(dev, "Failed to leave link aggregation\n"); + return NOTIFY_BAD; + } + } } break; } @@ -1944,6 +2176,38 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused, return NOTIFY_DONE; } +static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev, + unsigned long event, void *ptr) +{ + struct net_device *dev; + struct list_head *iter; + int ret; + + netdev_for_each_lower_dev(lag_dev, dev, iter) { + if (mlxsw_sp_port_dev_check(dev)) { + ret = mlxsw_sp_netdevice_port_event(dev, event, ptr); + if (ret == NOTIFY_BAD) + return ret; + } + } + + return NOTIFY_DONE; +} + +static int mlxsw_sp_netdevice_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + if (mlxsw_sp_port_dev_check(dev)) + return mlxsw_sp_netdevice_port_event(dev, event, ptr); + + if (netif_is_lag_master(dev)) + return mlxsw_sp_netdevice_lag_event(dev, event, ptr); + + return NOTIFY_DONE; +} + static struct notifier_block mlxsw_sp_netdevice_nb __read_mostly = { .notifier_call = mlxsw_sp_netdevice_event, }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 4365c8bccc6d..48be5a63b9b5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -46,9 +46,16 @@ #include "core.h" #define MLXSW_SP_VFID_BASE VLAN_N_VID +#define MLXSW_SP_LAG_MAX 64 +#define MLXSW_SP_PORT_PER_LAG_MAX 16 struct mlxsw_sp_port; +struct mlxsw_sp_upper { + struct net_device *dev; + unsigned int ref_count; +}; + struct mlxsw_sp { unsigned long active_vfids[BITS_TO_LONGS(VLAN_N_VID)]; unsigned long active_fids[BITS_TO_LONGS(VLAN_N_VID)]; @@ -63,12 +70,16 @@ struct mlxsw_sp { } fdb_notify; #define MLXSW_SP_DEFAULT_AGEING_TIME 300 u32 ageing_time; - struct { - struct net_device *dev; - unsigned int ref_count; - } master_bridge; + struct mlxsw_sp_upper master_bridge; + struct mlxsw_sp_upper lags[MLXSW_SP_LAG_MAX]; }; +static inline struct mlxsw_sp_upper * +mlxsw_sp_lag_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id) +{ + return &mlxsw_sp->lags[lag_id]; +} + struct mlxsw_sp_port_pcpu_stats { u64 rx_packets; u64 rx_bytes; @@ -87,8 +98,10 @@ struct mlxsw_sp_port { u8 learning:1, learning_sync:1, uc_flood:1, - bridged:1; + bridged:1, + lagged:1; u16 pvid; + u16 lag_id; /* 802.1Q bridge VLANs */ unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; /* VLAN interfaces */ @@ -96,6 +109,18 @@ struct mlxsw_sp_port { u16 nr_vfids; }; +static inline struct mlxsw_sp_port * +mlxsw_sp_port_lagged_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id, u8 port_index) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + u8 local_port; + + local_port = mlxsw_core_lag_mapping_get(mlxsw_sp->core, + lag_id, port_index); + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + return mlxsw_sp_port && mlxsw_sp_port->lagged ? mlxsw_sp_port : NULL; +} + enum mlxsw_sp_flood_table { MLXSW_SP_FLOOD_TABLE_UC, MLXSW_SP_FLOOD_TABLE_BM, From 8a1ab5d766396aad0e60cc8796646a1171b419c8 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:29 +0100 Subject: [PATCH 27/28] mlxsw: spectrum: Implement FDB add/remove/dump for LAG Implement FDB offloading for lagged ports, including learning LAG FDB entries, adding/removing static FDB entries and dumping existing LAG FDB entries. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_switchdev.c | 182 +++++++++++++++--- 1 file changed, 150 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index f21e23983a1a..406dab2f6b17 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -490,32 +490,56 @@ static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, untagged_flag, pvid_flag); } -static int mlxsw_sp_port_fdb_op(struct mlxsw_sp_port *mlxsw_sp_port, - const char *mac, u16 vid, bool adding, - bool dynamic) +static enum mlxsw_reg_sfd_rec_policy mlxsw_sp_sfd_rec_policy(bool dynamic) { - enum mlxsw_reg_sfd_rec_policy policy; - enum mlxsw_reg_sfd_op op; + return dynamic ? MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_INGRESS : + MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY; +} + +static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding) +{ + return adding ? MLXSW_REG_SFD_OP_WRITE_EDIT : + MLXSW_REG_SFD_OP_WRITE_REMOVE; +} + +static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp_port *mlxsw_sp_port, + const char *mac, u16 vid, bool adding, + bool dynamic) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char *sfd_pl; int err; - if (!vid) - vid = mlxsw_sp_port->pvid; - sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL); if (!sfd_pl) return -ENOMEM; - policy = dynamic ? MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_INGRESS : - MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY; - op = adding ? MLXSW_REG_SFD_OP_WRITE_EDIT : - MLXSW_REG_SFD_OP_WRITE_REMOVE; - mlxsw_reg_sfd_pack(sfd_pl, op, 0); - mlxsw_reg_sfd_uc_pack(sfd_pl, 0, policy, + mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); + mlxsw_reg_sfd_uc_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic), mac, vid, MLXSW_REG_SFD_REC_ACTION_NOP, mlxsw_sp_port->local_port); - err = mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(sfd), - sfd_pl); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); + kfree(sfd_pl); + + return err; +} + +static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id, + const char *mac, u16 vid, bool adding, + bool dynamic) +{ + char *sfd_pl; + int err; + + sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL); + if (!sfd_pl) + return -ENOMEM; + + mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); + mlxsw_reg_sfd_uc_lag_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic), + mac, vid, MLXSW_REG_SFD_REC_ACTION_NOP, + lag_id); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); kfree(sfd_pl); return err; @@ -526,11 +550,21 @@ mlxsw_sp_port_fdb_static_add(struct mlxsw_sp_port *mlxsw_sp_port, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans) { + u16 vid = fdb->vid; + if (switchdev_trans_ph_prepare(trans)) return 0; - return mlxsw_sp_port_fdb_op(mlxsw_sp_port, fdb->addr, fdb->vid, - true, false); + if (!vid) + vid = mlxsw_sp_port->pvid; + + if (!mlxsw_sp_port->lagged) + return mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port, + fdb->addr, vid, true, false); + else + return mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_port->lag_id, + fdb->addr, vid, true, false); } static int mlxsw_sp_port_obj_add(struct net_device *dev, @@ -645,8 +679,15 @@ static int mlxsw_sp_port_fdb_static_del(struct mlxsw_sp_port *mlxsw_sp_port, const struct switchdev_obj_port_fdb *fdb) { - return mlxsw_sp_port_fdb_op(mlxsw_sp_port, fdb->addr, fdb->vid, - false, false); + if (!mlxsw_sp_port->lagged) + return mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port, + fdb->addr, fdb->vid, + false, false); + else + return mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_port->lag_id, + fdb->addr, fdb->vid, + false, false); } static int mlxsw_sp_port_obj_del(struct net_device *dev, @@ -672,14 +713,30 @@ static int mlxsw_sp_port_obj_del(struct net_device *dev, return err; } +static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp, + u16 lag_id) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + int i; + + for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) { + mlxsw_sp_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i); + if (mlxsw_sp_port) + return mlxsw_sp_port; + } + return NULL; +} + static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, struct switchdev_obj_port_fdb *fdb, switchdev_obj_dump_cb_t *cb) { + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char *sfd_pl; char mac[ETH_ALEN]; u16 vid; u8 local_port; + u16 lag_id; u8 num_rec; int stored_err = 0; int i; @@ -692,8 +749,7 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_reg_sfd_pack(sfd_pl, MLXSW_REG_SFD_OP_QUERY_DUMP, 0); do { mlxsw_reg_sfd_num_rec_set(sfd_pl, MLXSW_REG_SFD_REC_MAX_COUNT); - err = mlxsw_reg_query(mlxsw_sp_port->mlxsw_sp->core, - MLXSW_REG(sfd), sfd_pl); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); if (err) goto out; @@ -718,6 +774,20 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, if (err) stored_err = err; } + break; + case MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG: + mlxsw_reg_sfd_uc_lag_unpack(sfd_pl, i, + mac, &vid, &lag_id); + if (mlxsw_sp_port == + mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id)) { + ether_addr_copy(fdb->addr, mac); + fdb->ndm_state = NUD_REACHABLE; + fdb->vid = vid; + err = cb(&fdb->obj); + if (err) + stored_err = err; + } + break; } } } while (num_rec == MLXSW_REG_SFD_REC_MAX_COUNT); @@ -779,6 +849,21 @@ static const struct switchdev_ops mlxsw_sp_port_switchdev_ops = { .switchdev_port_obj_dump = mlxsw_sp_port_obj_dump, }; +static void mlxsw_sp_fdb_call_notifiers(bool learning, bool learning_sync, + bool adding, char *mac, u16 vid, + struct net_device *dev) +{ + struct switchdev_notifier_fdb_info info; + unsigned long notifier_type; + + if (learning && learning_sync) { + info.addr = mac; + info.vid = vid; + notifier_type = adding ? SWITCHDEV_FDB_ADD : SWITCHDEV_FDB_DEL; + call_switchdev_notifiers(notifier_type, dev, &info.info); + } +} + static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, char *sfn_pl, int rec_index, bool adding) @@ -796,24 +881,49 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, return; } - err = mlxsw_sp_port_fdb_op(mlxsw_sp_port, mac, vid, - adding && mlxsw_sp_port->learning, true); + err = mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port, mac, vid, + adding && mlxsw_sp_port->learning, true); if (err) { if (net_ratelimit()) netdev_err(mlxsw_sp_port->dev, "Failed to set FDB entry\n"); return; } - if (mlxsw_sp_port->learning && mlxsw_sp_port->learning_sync) { - struct switchdev_notifier_fdb_info info; - unsigned long notifier_type; + mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning, + mlxsw_sp_port->learning_sync, + adding, mac, vid, mlxsw_sp_port->dev); +} - info.addr = mac; - info.vid = vid; - notifier_type = adding ? SWITCHDEV_FDB_ADD : SWITCHDEV_FDB_DEL; - call_switchdev_notifiers(notifier_type, mlxsw_sp_port->dev, - &info.info); +static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp, + char *sfn_pl, int rec_index, + bool adding) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + char mac[ETH_ALEN]; + u16 lag_id; + u16 vid; + int err; + + mlxsw_reg_sfn_mac_lag_unpack(sfn_pl, rec_index, mac, &vid, &lag_id); + mlxsw_sp_port = mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id); + if (!mlxsw_sp_port) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Cannot find port representor for LAG\n"); + return; } + + err = mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, lag_id, mac, vid, + adding && mlxsw_sp_port->learning, + true); + if (err) { + if (net_ratelimit()) + netdev_err(mlxsw_sp_port->dev, "Failed to set FDB entry\n"); + return; + } + + mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning, + mlxsw_sp_port->learning_sync, + adding, mac, vid, + mlxsw_sp_lag_get(mlxsw_sp, lag_id)->dev); } static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp, @@ -828,6 +938,14 @@ static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fdb_notify_mac_process(mlxsw_sp, sfn_pl, rec_index, false); break; + case MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC_LAG: + mlxsw_sp_fdb_notify_mac_lag_process(mlxsw_sp, sfn_pl, + rec_index, true); + break; + case MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC_LAG: + mlxsw_sp_fdb_notify_mac_lag_process(mlxsw_sp, sfn_pl, + rec_index, false); + break; } } From 745812065cf302ad11bf63bcbdbd0ff41af0e36a Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:30 +0100 Subject: [PATCH 28/28] mlxsw: spectrum: Implement LAG tx enabled lower state change Enabling/disabling TX on a LAG port means enabling/disabling distribution in our HW. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 81 ++++++++++++++++++- 1 file changed, 79 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 7d1f0a82a090..3ec07b9a458d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2108,8 +2108,47 @@ static int mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port, return 0; } -static int mlxsw_sp_netdevice_port_event(struct net_device *dev, - unsigned long event, void *ptr) +static int mlxsw_sp_lag_dist_port_add(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char sldr_pl[MLXSW_REG_SLDR_LEN]; + + mlxsw_reg_sldr_lag_add_port_pack(sldr_pl, lag_id, + mlxsw_sp_port->local_port); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); +} + +static int mlxsw_sp_lag_dist_port_remove(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char sldr_pl[MLXSW_REG_SLDR_LEN]; + + mlxsw_reg_sldr_lag_remove_port_pack(sldr_pl, lag_id, + mlxsw_sp_port->local_port); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); +} + +static int mlxsw_sp_port_lag_tx_en_set(struct mlxsw_sp_port *mlxsw_sp_port, + bool lag_tx_enabled) +{ + if (lag_tx_enabled) + return mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, + mlxsw_sp_port->lag_id); + else + return mlxsw_sp_lag_dist_port_remove(mlxsw_sp_port, + mlxsw_sp_port->lag_id); +} + +static int mlxsw_sp_port_lag_changed(struct mlxsw_sp_port *mlxsw_sp_port, + struct netdev_lag_lower_state_info *info) +{ + return mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, info->tx_enabled); +} + +static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, + unsigned long event, void *ptr) { struct netdev_notifier_changeupper_info *info; struct mlxsw_sp_port *mlxsw_sp_port; @@ -2176,6 +2215,44 @@ static int mlxsw_sp_netdevice_port_event(struct net_device *dev, return NOTIFY_DONE; } +static int mlxsw_sp_netdevice_port_lower_event(struct net_device *dev, + unsigned long event, void *ptr) +{ + struct netdev_notifier_changelowerstate_info *info; + struct mlxsw_sp_port *mlxsw_sp_port; + int err; + + mlxsw_sp_port = netdev_priv(dev); + info = ptr; + + switch (event) { + case NETDEV_CHANGELOWERSTATE: + if (netif_is_lag_port(dev) && mlxsw_sp_port->lagged) { + err = mlxsw_sp_port_lag_changed(mlxsw_sp_port, + info->lower_state_info); + if (err) + netdev_err(dev, "Failed to reflect link aggregation lower state change\n"); + } + break; + } + + return NOTIFY_DONE; +} + +static int mlxsw_sp_netdevice_port_event(struct net_device *dev, + unsigned long event, void *ptr) +{ + switch (event) { + case NETDEV_PRECHANGEUPPER: + case NETDEV_CHANGEUPPER: + return mlxsw_sp_netdevice_port_upper_event(dev, event, ptr); + case NETDEV_CHANGELOWERSTATE: + return mlxsw_sp_netdevice_port_lower_event(dev, event, ptr); + } + + return NOTIFY_DONE; +} + static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev, unsigned long event, void *ptr) {