From 24c0df82ef7919e4d10cf2e4e65d368eb2e8ea21 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 19 Dec 2017 12:01:21 +0100 Subject: [PATCH 01/56] netfilter: nf_tables: fix chain filter in nf_tables_dump_rules() ctx->chain may be null now that we have very large object names, so we cannot check for ctx->chain[0] here. Fixes: b7263e071aba7 ("netfilter: nf_tables: Allow table names of up to 255 chars") Signed-off-by: Pablo Neira Ayuso Acked-by: Phil Sutter --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 10798b357481..8d4526651661 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2072,7 +2072,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, continue; list_for_each_entry_rcu(chain, &table->chains, list) { - if (ctx && ctx->chain[0] && + if (ctx && ctx->chain && strcmp(ctx->chain, chain->name) != 0) continue; From 4c82fd0abb87e20d0d68ef5237e74732352806c8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 20 Dec 2017 12:08:33 +0100 Subject: [PATCH 02/56] netfilter: uapi: correct UNTRACKED conntrack state bit number nft_ct exposes this bit to userspace. This used to be #define NF_CT_STATE_UNTRACKED_BIT (1 << (IP_CT_NUMBER + 1)) (IP_CT_NUMBER is 5, so this was 0x40) .. but this got changed to 8 (0x100) when the untracked object got removed. Replace this with a literal 6 to prevent further incompatible changes in case IP_CT_NUMBER ever increases. Fixes: cc41c84b7e7f2 ("netfilter: kill the fake untracked conntrack objects") Reported-by: Li Shuang Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 3fea7709a441..57ccfb32e87f 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -36,7 +36,7 @@ enum ip_conntrack_info { #define NF_CT_STATE_INVALID_BIT (1 << 0) #define NF_CT_STATE_BIT(ctinfo) (1 << ((ctinfo) % IP_CT_IS_REPLY + 1)) -#define NF_CT_STATE_UNTRACKED_BIT (1 << (IP_CT_UNTRACKED + 1)) +#define NF_CT_STATE_UNTRACKED_BIT (1 << 6) /* Bitset representing status of connection. */ enum ip_conntrack_status { From 8bea728dce8972e534e6b99fd550f7b5cc3864e8 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 25 Dec 2017 11:34:54 +0800 Subject: [PATCH 03/56] netfilter: nf_tables: fix potential NULL-ptr deref in nf_tables_dump_obj_done() If there is no NFTA_OBJ_TABLE and NFTA_OBJ_TYPE, the c.data will be NULL in nf_tables_getobj(). So before free filter->table in nf_tables_dump_obj_done(), we need to check if filter is NULL first. Fixes: e46abbcc05aa ("netfilter: nf_tables: Allow table names of up to 255 chars") Signed-off-by: Hangbin Liu Acked-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8d4526651661..07bd4138c84e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4665,8 +4665,10 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb) { struct nft_obj_filter *filter = cb->data; - kfree(filter->table); - kfree(filter); + if (filter) { + kfree(filter->table); + kfree(filter); + } return 0; } From 55a5ec9b77106ffc05e8c40d7568432bf4696d7b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 2 Jan 2018 11:45:07 -0500 Subject: [PATCH 04/56] Revert "net: core: dev_get_valid_name is now the same as dev_alloc_name_ns" This reverts commit 87c320e51519a83c496ab7bfb4e96c8f9c001e89. Changing the error return code in some situations turns out to be harmful in practice. In particular Michael Ellerman reports that DHCP fails on his powerpc machines, and this revert gets things working again. Johannes Berg agrees that this revert is the best course of action for now. Fixes: 029b6d140550 ("Revert "net: core: maybe return -EEXIST in __dev_alloc_name"") Reported-by: Michael Ellerman Signed-off-by: David S. Miller --- net/core/dev.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 01ee854454a8..0e0ba36eeac9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1146,7 +1146,19 @@ EXPORT_SYMBOL(dev_alloc_name); int dev_get_valid_name(struct net *net, struct net_device *dev, const char *name) { - return dev_alloc_name_ns(net, dev, name); + BUG_ON(!net); + + if (!dev_valid_name(name)) + return -EINVAL; + + if (strchr(name, '%')) + return dev_alloc_name_ns(net, dev, name); + else if (__dev_get_by_name(net, name)) + return -EEXIST; + else if (dev->name != name) + strlcpy(dev->name, name, IFNAMSIZ); + + return 0; } EXPORT_SYMBOL(dev_get_valid_name); From 23263ec86a5f44312d2899323872468752324107 Mon Sep 17 00:00:00 2001 From: Eli Cooper Date: Mon, 25 Dec 2017 10:43:49 +0800 Subject: [PATCH 05/56] ip6_tunnel: disable dst caching if tunnel is dual-stack When an ip6_tunnel is in mode 'any', where the transport layer protocol can be either 4 or 41, dst_cache must be disabled. This is because xfrm policies might apply to only one of the two protocols. Caching dst would cause xfrm policies for one protocol incorrectly used for the other. Signed-off-by: Eli Cooper Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 931c38f6ff4a..b263c809d8d4 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1074,10 +1074,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); neigh_release(neigh); } - } else if (!(t->parms.flags & - (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) { - /* enable the cache only only if the routing decision does - * not depend on the current inner header value + } else if (t->parms.proto != 0 && !(t->parms.flags & + (IP6_TNL_F_USE_ORIG_TCLASS | + IP6_TNL_F_USE_ORIG_FWMARK))) { + /* enable the cache only if neither the outer protocol nor the + * routing decision depends on the current inner header value */ use_cache = true; } From 52a589d51f1008f62569bf89e95b26221ee76690 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 25 Dec 2017 14:43:58 +0800 Subject: [PATCH 06/56] geneve: update skb dst pmtu on tx path Commit a93bf0ff4490 ("vxlan: update skb dst pmtu on tx path") has fixed a performance issue caused by the change of lower dev's mtu for vxlan. The same thing needs to be done for geneve as well. Note that geneve cannot adjust it's mtu according to lower dev's mtu when creating it. The performance is very low later when netperfing over it without fixing the mtu manually. This patch could also avoid this issue. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- drivers/net/geneve.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index b718a02a6bb6..0a48b3073d3d 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -825,6 +825,13 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (IS_ERR(rt)) return PTR_ERR(rt); + if (skb_dst(skb)) { + int mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr) - + GENEVE_BASE_HLEN - info->options_len - 14; + + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + } + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); if (geneve->collect_md) { tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); @@ -864,6 +871,13 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (IS_ERR(dst)) return PTR_ERR(dst); + if (skb_dst(skb)) { + int mtu = dst_mtu(dst) - sizeof(struct ipv6hdr) - + GENEVE_BASE_HLEN - info->options_len - 14; + + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + } + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); if (geneve->collect_md) { prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); From 2fa771be953a17f8e0a9c39103464c2574444c62 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 25 Dec 2017 14:45:12 +0800 Subject: [PATCH 07/56] ip6_tunnel: allow ip6gre dev mtu to be set below 1280 Commit 582442d6d5bc ("ipv6: Allow the MTU of ipip6 tunnel to be set below 1280") fixed a mtu setting issue. It works for ipip6 tunnel. But ip6gre dev updates the mtu also with ip6_tnl_change_mtu. Since the inner packet over ip6gre can be ipv4 and it's mtu should also be allowed to set below 1280, the same issue also exists on ip6gre. This patch is to fix it by simply changing to check if parms.proto is IPPROTO_IPV6 in ip6_tnl_change_mtu instead, to make ip6gre to go to 'else' branch. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index b263c809d8d4..9a7cf355bc8c 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1677,11 +1677,11 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) { struct ip6_tnl *tnl = netdev_priv(dev); - if (tnl->parms.proto == IPPROTO_IPIP) { - if (new_mtu < ETH_MIN_MTU) + if (tnl->parms.proto == IPPROTO_IPV6) { + if (new_mtu < IPV6_MIN_MTU) return -EINVAL; } else { - if (new_mtu < IPV6_MIN_MTU) + if (new_mtu < ETH_MIN_MTU) return -EINVAL; } if (new_mtu > 0xFFF8 - dev->hard_header_len) From 8764a8267b128405cf383157d5e9a4a3735d2409 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 25 Dec 2017 08:57:35 +0100 Subject: [PATCH 08/56] mlxsw: spectrum_router: Fix NULL pointer deref When we remove the neighbour associated with a nexthop we should always refuse to write the nexthop to the adjacency table. Regardless if it is already present in the table or not. Otherwise, we risk dereferencing the NULL pointer that was set instead of the neighbour. Fixes: a7ff87acd995 ("mlxsw: spectrum_router: Implement next-hop routing") Signed-off-by: Ido Schimmel Reported-by: Alexander Petrovskiy Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index be657b8533f0..434b3922b34f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3228,7 +3228,7 @@ static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, { if (!removing) nh->should_offload = 1; - else if (nh->offloaded) + else nh->should_offload = 0; nh->update = 1; } From 90045fc9c78855bdc625a0ab185d97b72a937613 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 25 Dec 2017 09:05:33 +0100 Subject: [PATCH 09/56] mlxsw: spectrum: Relax sanity checks during enslavement Since commit 25cc72a33835 ("mlxsw: spectrum: Forbid linking to devices that have uppers") the driver forbids enslavement to netdevs that already have uppers of their own, as this can result in various ordering problems. This requirement proved to be too strict for some users who need to be able to enslave ports to a bridge that already has uppers. In this case, we can allow the enslavement if the bridge is already known to us, as any configuration performed on top of the bridge was already reflected to the device. Fixes: 25cc72a33835 ("mlxsw: spectrum: Forbid linking to devices that have uppers") Signed-off-by: Ido Schimmel Reported-by: Alexander Petrovskiy Tested-by: Alexander Petrovskiy Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 11 +++++++++-- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 2 ++ .../net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 6 ++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 9bd8d28de152..c3837ca7a705 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4376,7 +4376,10 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, } if (!info->linking) break; - if (netdev_has_any_upper_dev(upper_dev)) { + if (netdev_has_any_upper_dev(upper_dev) && + (!netif_is_bridge_master(upper_dev) || + !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, + upper_dev))) { NL_SET_ERR_MSG(extack, "spectrum: Enslaving a port to a device that already has an upper device is not supported"); return -EINVAL; @@ -4504,6 +4507,7 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, u16 vid) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct netdev_notifier_changeupper_info *info = ptr; struct netlink_ext_ack *extack; struct net_device *upper_dev; @@ -4520,7 +4524,10 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, } if (!info->linking) break; - if (netdev_has_any_upper_dev(upper_dev)) { + if (netdev_has_any_upper_dev(upper_dev) && + (!netif_is_bridge_master(upper_dev) || + !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, + upper_dev))) { NL_SET_ERR_MSG(extack, "spectrum: Enslaving a port to a device that already has an upper device is not supported"); return -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 432ab9b12b7f..05ce1befd9b3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -365,6 +365,8 @@ int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *brport_dev, struct net_device *br_dev); +bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev); /* spectrum.c */ int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 7b8548e25ae7..593ad31be749 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -152,6 +152,12 @@ mlxsw_sp_bridge_device_find(const struct mlxsw_sp_bridge *bridge, return NULL; } +bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev) +{ + return !!mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); +} + static struct mlxsw_sp_bridge_device * mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge, struct net_device *br_dev) From 3bb23421a504f01551b7cb9dff0e41dbf16656b0 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 26 Dec 2017 07:48:51 +0200 Subject: [PATCH 10/56] net/sched: Fix update of lastuse in act modules implementing stats_update We need to update lastuse to to the most updated value between what is already set and the new value. If HW matching fails, i.e. because of an issue, the stats are not updated but it could be that software did match and updated lastuse. Fixes: 5712bf9c5c30 ("net/sched: act_mirred: Use passed lastuse argument") Fixes: 9fea47d93bcc ("net/sched: act_gact: Update statistics when offloaded to hardware") Signed-off-by: Roi Dayan Reviewed-by: Paul Blakey Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/act_gact.c | 2 +- net/sched/act_mirred.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index e29a48ef7fc3..a0ac42b3ed06 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -159,7 +159,7 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets, if (action == TC_ACT_SHOT) this_cpu_ptr(gact->common.cpu_qstats)->drops += packets; - tm->lastuse = lastuse; + tm->lastuse = max_t(u64, tm->lastuse, lastuse); } static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 8b3e59388480..08b61849c2a2 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -239,7 +239,7 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets, struct tcf_t *tm = &m->tcf_tm; _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); - tm->lastuse = lastuse; + tm->lastuse = max_t(u64, tm->lastuse, lastuse); } static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, From d02fd6e7d2933ede6478a15f9e4ce8a93845824e Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Tue, 26 Dec 2017 21:44:32 +0800 Subject: [PATCH 11/56] macvlan: Fix one possible double free Because the macvlan_uninit would free the macvlan port, so there is one double free case in macvlan_common_newlink. When the macvlan port is just created, then register_netdevice or netdev_upper_dev_link failed and they would invoke macvlan_uninit. Then it would reach the macvlan_port_destroy which triggers the double free. Signed-off-by: Gao Feng Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index a178c5efd33e..a0f2be81d52e 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1444,9 +1444,14 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, return 0; unregister_netdev: + /* macvlan_uninit would free the macvlan port */ unregister_netdevice(dev); + return err; destroy_macvlan_port: - if (create) + /* the macvlan port may be freed by macvlan_uninit when fail to register. + * so we destroy the macvlan port only when it's valid. + */ + if (create && macvlan_port_get_rtnl(dev)) macvlan_port_destroy(port->dev); return err; } From ac817f5ad066697e4d4d35ec68c974eba2c5f17a Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 26 Dec 2017 23:15:12 +0000 Subject: [PATCH 12/56] phylink: ensure we report link down when LOS asserted Although we disable the netdev carrier, we fail to report in the kernel log that the link went down. Fix this. Fixes: 9525ae83959b ("phylink: add phylink infrastructure") Signed-off-by: Russell King Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 827f3f92560e..150cd95a6e1e 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1429,9 +1429,8 @@ static void phylink_sfp_link_down(void *upstream) WARN_ON(!lockdep_rtnl_is_held()); set_bit(PHYLINK_DISABLE_LINK, &pl->phylink_disable_state); + queue_work(system_power_efficient_wq, &pl->resolve); flush_work(&pl->resolve); - - netif_carrier_off(pl->netdev); } static void phylink_sfp_link_up(void *upstream) From 0b2122e4934c7783d336397864e34ee53aad0965 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 26 Dec 2017 23:15:17 +0000 Subject: [PATCH 13/56] sfp: fix sfp-bus oops when removing socket/upstream When we remove a socket or upstream, and the other side isn't registered, we dereference a NULL pointer, causing a kernel oops. Fix this. Fixes: ce0aa27ff3f6 ("sfp: add sfp-bus to bridge between network devices and sfp cages") Signed-off-by: Russell King Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/sfp-bus.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index 8a1b1f4c1b7c..ab64a142b832 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -356,7 +356,8 @@ EXPORT_SYMBOL_GPL(sfp_register_upstream); void sfp_unregister_upstream(struct sfp_bus *bus) { rtnl_lock(); - sfp_unregister_bus(bus); + if (bus->sfp) + sfp_unregister_bus(bus); bus->upstream = NULL; bus->netdev = NULL; rtnl_unlock(); @@ -459,7 +460,8 @@ EXPORT_SYMBOL_GPL(sfp_register_socket); void sfp_unregister_socket(struct sfp_bus *bus) { rtnl_lock(); - sfp_unregister_bus(bus); + if (bus->netdev) + sfp_unregister_bus(bus); bus->sfp_dev = NULL; bus->sfp = NULL; bus->socket_ops = NULL; From 0b76aae741abb9d16d2c0e67f8b1e766576f897d Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Wed, 6 Dec 2017 02:26:29 +0530 Subject: [PATCH 14/56] e1000: fix disabling already-disabled warning This patch adds check so that driver does not disable already disabled device. [ 44.637743] advantechwdt: Unexpected close, not stopping watchdog! [ 44.997548] input: ImExPS/2 Generic Explorer Mouse as /devices/platform/i8042/serio1/input/input6 [ 45.013419] e1000 0000:00:03.0: disabling already-disabled device [ 45.013447] ------------[ cut here ]------------ [ 45.014868] WARNING: CPU: 1 PID: 71 at drivers/pci/pci.c:1641 pci_disable_device+0xa1/0x105: pci_disable_device at drivers/pci/pci.c:1640 [ 45.016171] CPU: 1 PID: 71 Comm: rcu_perf_shutdo Not tainted 4.14.0-01330-g3c07399 #1 [ 45.017197] task: ffff88011bee9e40 task.stack: ffffc90000860000 [ 45.017987] RIP: 0010:pci_disable_device+0xa1/0x105: pci_disable_device at drivers/pci/pci.c:1640 [ 45.018603] RSP: 0000:ffffc90000863e30 EFLAGS: 00010286 [ 45.019282] RAX: 0000000000000035 RBX: ffff88013a230008 RCX: 0000000000000000 [ 45.020182] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000203 [ 45.021084] RBP: ffff88013a3f31e8 R08: 0000000000000001 R09: 0000000000000000 [ 45.021986] R10: ffffffff827ec29c R11: 0000000000000002 R12: 0000000000000001 [ 45.022946] R13: ffff88013a230008 R14: ffff880117802b20 R15: ffffc90000863e8f [ 45.023842] FS: 0000000000000000(0000) GS:ffff88013fd00000(0000) knlGS:0000000000000000 [ 45.024863] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 45.025583] CR2: ffffc900006d4000 CR3: 000000000220f000 CR4: 00000000000006a0 [ 45.026478] Call Trace: [ 45.026811] __e1000_shutdown+0x1d4/0x1e2: __e1000_shutdown at drivers/net/ethernet/intel/e1000/e1000_main.c:5162 [ 45.027344] ? rcu_perf_cleanup+0x2a1/0x2a1: rcu_perf_shutdown at kernel/rcu/rcuperf.c:627 [ 45.027883] e1000_shutdown+0x14/0x3a: e1000_shutdown at drivers/net/ethernet/intel/e1000/e1000_main.c:5235 [ 45.028351] device_shutdown+0x110/0x1aa: device_shutdown at drivers/base/core.c:2807 [ 45.028858] kernel_power_off+0x31/0x64: kernel_power_off at kernel/reboot.c:260 [ 45.029343] rcu_perf_shutdown+0x9b/0xa7: rcu_perf_shutdown at kernel/rcu/rcuperf.c:637 [ 45.029852] ? __wake_up_common_lock+0xa2/0xa2: autoremove_wake_function at kernel/sched/wait.c:376 [ 45.030414] kthread+0x126/0x12e: kthread at kernel/kthread.c:233 [ 45.030834] ? __kthread_bind_mask+0x8e/0x8e: kthread at kernel/kthread.c:190 [ 45.031399] ? ret_from_fork+0x1f/0x30: ret_from_fork at arch/x86/entry/entry_64.S:443 [ 45.031883] ? kernel_init+0xa/0xf5: kernel_init at init/main.c:997 [ 45.032325] ret_from_fork+0x1f/0x30: ret_from_fork at arch/x86/entry/entry_64.S:443 [ 45.032777] Code: 00 48 85 ed 75 07 48 8b ab a8 00 00 00 48 8d bb 98 00 00 00 e8 aa d1 11 00 48 89 ea 48 89 c6 48 c7 c7 d8 e4 0b 82 e8 55 7d da ff <0f> ff b9 01 00 00 00 31 d2 be 01 00 00 00 48 c7 c7 f0 b1 61 82 [ 45.035222] ---[ end trace c257137b1b1976ef ]--- [ 45.037838] ACPI: Preparing to enter system sleep state S5 Signed-off-by: Tushar Dave Tested-by: Fengguang Wu Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000.h | 3 ++- drivers/net/ethernet/intel/e1000/e1000_main.c | 27 +++++++++++++++---- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000.h b/drivers/net/ethernet/intel/e1000/e1000.h index d7bdea79e9fa..8fd2458060a0 100644 --- a/drivers/net/ethernet/intel/e1000/e1000.h +++ b/drivers/net/ethernet/intel/e1000/e1000.h @@ -331,7 +331,8 @@ struct e1000_adapter { enum e1000_state_t { __E1000_TESTING, __E1000_RESETTING, - __E1000_DOWN + __E1000_DOWN, + __E1000_DISABLED }; #undef pr_fmt diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 1982f7917a8d..3dd4aeb2706d 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -945,7 +945,7 @@ static int e1000_init_hw_struct(struct e1000_adapter *adapter, static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct net_device *netdev; - struct e1000_adapter *adapter; + struct e1000_adapter *adapter = NULL; struct e1000_hw *hw; static int cards_found; @@ -955,6 +955,7 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) u16 tmp = 0; u16 eeprom_apme_mask = E1000_EEPROM_APME; int bars, need_ioport; + bool disable_dev = false; /* do not allocate ioport bars when not needed */ need_ioport = e1000_is_need_ioport(pdev); @@ -1259,11 +1260,13 @@ err_mdio_ioremap: iounmap(hw->ce4100_gbe_mdio_base_virt); iounmap(hw->hw_addr); err_ioremap: + disable_dev = !test_and_set_bit(__E1000_DISABLED, &adapter->flags); free_netdev(netdev); err_alloc_etherdev: pci_release_selected_regions(pdev, bars); err_pci_reg: - pci_disable_device(pdev); + if (!adapter || disable_dev) + pci_disable_device(pdev); return err; } @@ -1281,6 +1284,7 @@ static void e1000_remove(struct pci_dev *pdev) struct net_device *netdev = pci_get_drvdata(pdev); struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; + bool disable_dev; e1000_down_and_stop(adapter); e1000_release_manageability(adapter); @@ -1299,9 +1303,11 @@ static void e1000_remove(struct pci_dev *pdev) iounmap(hw->flash_address); pci_release_selected_regions(pdev, adapter->bars); + disable_dev = !test_and_set_bit(__E1000_DISABLED, &adapter->flags); free_netdev(netdev); - pci_disable_device(pdev); + if (disable_dev) + pci_disable_device(pdev); } /** @@ -5156,7 +5162,8 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake) if (netif_running(netdev)) e1000_free_irq(adapter); - pci_disable_device(pdev); + if (!test_and_set_bit(__E1000_DISABLED, &adapter->flags)) + pci_disable_device(pdev); return 0; } @@ -5200,6 +5207,10 @@ static int e1000_resume(struct pci_dev *pdev) pr_err("Cannot enable PCI device from suspend\n"); return err; } + + /* flush memory to make sure state is correct */ + smp_mb__before_atomic(); + clear_bit(__E1000_DISABLED, &adapter->flags); pci_set_master(pdev); pci_enable_wake(pdev, PCI_D3hot, 0); @@ -5274,7 +5285,9 @@ static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev, if (netif_running(netdev)) e1000_down(adapter); - pci_disable_device(pdev); + + if (!test_and_set_bit(__E1000_DISABLED, &adapter->flags)) + pci_disable_device(pdev); /* Request a slot slot reset. */ return PCI_ERS_RESULT_NEED_RESET; @@ -5302,6 +5315,10 @@ static pci_ers_result_t e1000_io_slot_reset(struct pci_dev *pdev) pr_err("Cannot re-enable PCI device after reset.\n"); return PCI_ERS_RESULT_DISCONNECT; } + + /* flush memory to make sure state is correct */ + smp_mb__before_atomic(); + clear_bit(__E1000_DISABLED, &adapter->flags); pci_set_master(pdev); pci_enable_wake(pdev, PCI_D3hot, 0); From 4110e02eb45ea447ec6f5459c9934de0a273fb91 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Mon, 11 Dec 2017 16:26:40 +0900 Subject: [PATCH 15/56] e1000e: Fix e1000_check_for_copper_link_ich8lan return value. e1000e_check_for_copper_link() and e1000_check_for_copper_link_ich8lan() are the two functions that may be assigned to mac.ops.check_for_link when phy.media_type == e1000_media_type_copper. Commit 19110cfbb34d ("e1000e: Separate signaling for link check/link up") changed the meaning of the return value of check_for_link for copper media but only adjusted the first function. This patch adjusts the second function likewise. Reported-by: Christian Hesse Reported-by: Gabriel C Link: https://bugzilla.kernel.org/show_bug.cgi?id=198047 Fixes: 19110cfbb34d ("e1000e: Separate signaling for link check/link up") Signed-off-by: Benjamin Poirier Tested-by: Aaron Brown Tested-by: Christian Hesse Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index d6d4ed7acf03..31277d3bb7dc 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1367,6 +1367,9 @@ out: * Checks to see of the link status of the hardware has changed. If a * change in link status has been detected, then we read the PHY registers * to get the current speed/duplex if link exists. + * + * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link + * up). **/ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) { @@ -1382,7 +1385,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * Change or Rx Sequence Error interrupt. */ if (!mac->get_link_status) - return 0; + return 1; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex @@ -1613,10 +1616,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * different link partner. */ ret_val = e1000e_config_fc_after_link_up(hw); - if (ret_val) + if (ret_val) { e_dbg("Error configuring flow control\n"); + return ret_val; + } - return ret_val; + return 1; } static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter) From bd30ffc414e55194ed6149fad69a145550cb7c18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZ=20Lin=20=28=E6=9E=97=E4=B8=8A=E6=99=BA=29?= Date: Fri, 29 Dec 2017 17:02:17 +0800 Subject: [PATCH 16/56] NET: usb: qmi_wwan: add support for YUGA CLM920-NC5 PID 0x9625 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds support for PID 0x9625 of YUGA CLM920-NC5. YUGA CLM920-NC5 needs to enable QMI_WWAN_QUIRK_DTR before QMI operation. qmicli -d /dev/cdc-wdm0 -p --dms-get-revision [/dev/cdc-wdm0] Device revision retrieved: Revision: 'CLM920_NC5-V1 1 [Oct 23 2016 19:00:00]' Signed-off-by: SZ Lin (林上智) Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 3000ddd1c7e2..728819feab44 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1100,6 +1100,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x05c6, 0x9084, 4)}, {QMI_FIXED_INTF(0x05c6, 0x920d, 0)}, {QMI_FIXED_INTF(0x05c6, 0x920d, 5)}, + {QMI_QUIRK_SET_DTR(0x05c6, 0x9625, 4)}, /* YUGA CLM920-NC5 */ {QMI_FIXED_INTF(0x0846, 0x68a2, 8)}, {QMI_FIXED_INTF(0x12d1, 0x140c, 1)}, /* Huawei E173 */ {QMI_FIXED_INTF(0x12d1, 0x14ac, 1)}, /* Huawei E1820 */ From 71891e2dab6b55a870f8f7735e44a2963860b5c6 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 29 Dec 2017 10:02:52 -0800 Subject: [PATCH 17/56] ethtool: do not print warning for applications using legacy API In kernel log ths message appears on every boot: "warning: `NetworkChangeNo' uses legacy ethtool link settings API, link modes are only partially reported" When ethtool link settings API changed, it started complaining about usages of old API. Ironically, the original patch was from google but the application using the legacy API is chrome. Linux ABI is fixed as much as possible. The kernel must not break it and should not complain about applications using legacy API's. This patch just removes the warning since using legacy API's in Linux is perfectly acceptable. Fixes: 3f1ac7a700d0 ("net: ethtool: add new ETHTOOL_xLINKSETTINGS API") Signed-off-by: Stephen Hemminger Signed-off-by: David Decotigny Signed-off-by: David S. Miller --- net/core/ethtool.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index f8fcf450a36e..8225416911ae 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -770,15 +770,6 @@ static int ethtool_set_link_ksettings(struct net_device *dev, return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings); } -static void -warn_incomplete_ethtool_legacy_settings_conversion(const char *details) -{ - char name[sizeof(current->comm)]; - - pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n", - get_task_comm(name, current), details); -} - /* Query device for its ethtool_cmd settings. * * Backward compatibility note: for compatibility with legacy ethtool, @@ -805,10 +796,8 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) &link_ksettings); if (err < 0) return err; - if (!convert_link_ksettings_to_legacy_settings(&cmd, - &link_ksettings)) - warn_incomplete_ethtool_legacy_settings_conversion( - "link modes are only partially reported"); + convert_link_ksettings_to_legacy_settings(&cmd, + &link_ksettings); /* send a sensible cmd tag back to user */ cmd.cmd = ETHTOOL_GSET; From f9c935db8086231a35b7f5c2a53e3f1e10f388ee Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 29 Dec 2017 19:48:02 +0100 Subject: [PATCH 18/56] tipc: fix problems with multipoint-to-point flow control In commit 04d7b574b245 ("tipc: add multipoint-to-point flow control") we introduced a protocol for preventing buffer overflow when many group members try to simultaneously send messages to the same receiving member. Stress test of this mechanism has revealed a couple of related bugs: - When the receiving member receives an advertisement REMIT message from one of the senders, it will sometimes prematurely activate a pending member and send it the remitted advertisement, although the upper limit for active senders has been reached. This leads to accumulation of illegal advertisements, and eventually to messages being dropped because of receive buffer overflow. - When the receiving member leaves REMITTED state while a received message is being read, we miss to look at the pending queue, to activate the oldest pending peer. This leads to some pending senders being starved out, and never getting the opportunity to profit from the remitted advertisement. We fix the former in the function tipc_group_proto_rcv() by returning directly from the function once it becomes clear that the remitting peer cannot leave REMITTED state at that point. We fix the latter in the function tipc_group_update_rcv_win() by looking up and activate the longest pending peer when it becomes clear that the remitting peer now can leave REMITTED state. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/group.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/net/tipc/group.c b/net/tipc/group.c index 8e12ab55346b..5f4ffae807ee 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -109,7 +109,8 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, static void tipc_group_decr_active(struct tipc_group *grp, struct tipc_member *m) { - if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING) + if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING || + m->state == MBR_REMITTED) grp->active_cnt--; } @@ -562,7 +563,7 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, int max_active = grp->max_active; int reclaim_limit = max_active * 3 / 4; int active_cnt = grp->active_cnt; - struct tipc_member *m, *rm; + struct tipc_member *m, *rm, *pm; m = tipc_group_find_member(grp, node, port); if (!m) @@ -605,6 +606,17 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, pr_warn_ratelimited("Rcv unexpected msg after REMIT\n"); tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); } + grp->active_cnt--; + list_del_init(&m->list); + if (list_empty(&grp->pending)) + return; + + /* Set oldest pending member to active and advertise */ + pm = list_first_entry(&grp->pending, struct tipc_member, list); + pm->state = MBR_ACTIVE; + list_move_tail(&pm->list, &grp->active); + grp->active_cnt++; + tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq); break; case MBR_RECLAIMING: case MBR_DISCOVERED: @@ -742,14 +754,14 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, if (!m || m->state != MBR_RECLAIMING) return; - list_del_init(&m->list); - grp->active_cnt--; remitted = msg_grp_remitted(hdr); /* Messages preceding the REMIT still in receive queue */ if (m->advertised > remitted) { m->state = MBR_REMITTED; in_flight = m->advertised - remitted; + m->advertised = ADV_IDLE + in_flight; + return; } /* All messages preceding the REMIT have been read */ if (m->advertised <= remitted) { @@ -761,6 +773,8 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); m->advertised = ADV_IDLE + in_flight; + grp->active_cnt--; + list_del_init(&m->list); /* Set oldest pending member to active and advertise */ if (list_empty(&grp->pending)) From c0bace798436bca0fdc221ff61143f1376a9c3de Mon Sep 17 00:00:00 2001 From: Felix Janda Date: Mon, 1 Jan 2018 19:33:20 +0100 Subject: [PATCH 19/56] uapi libc compat: add fallback for unsupported libcs libc-compat.h aims to prevent symbol collisions between uapi and libc headers for each supported libc. This requires continuous coordination between them. The goal of this commit is to improve the situation for libcs (such as musl) which are not yet supported and/or do not wish to be explicitly supported, while not affecting supported libcs. More precisely, with this commit, unsupported libcs can request the suppression of any specific uapi definition by defining the correspondings _UAPI_DEF_* macro as 0. This can fix symbol collisions for them, as long as the libc headers are included before the uapi headers. Inclusion in the other order is outside the scope of this commit. All infrastructure in order to enable this fallback for unsupported libcs is already in place, except that libc-compat.h unconditionally defines all _UAPI_DEF_* macros to 1 for all unsupported libcs so that any previous definitions are ignored. In order to fix this, this commit merely makes these definitions conditional. This commit together with the musl libc commit http://git.musl-libc.org/cgit/musl/commit/?id=04983f2272382af92eb8f8838964ff944fbb8258 fixes for example the following compiler errors when is included after musl's : ./linux/in6.h:32:8: error: redefinition of 'struct in6_addr' ./linux/in6.h:49:8: error: redefinition of 'struct sockaddr_in6' ./linux/in6.h:59:8: error: redefinition of 'struct ipv6_mreq' The comments referencing glibc are still correct, but this file is not only used for glibc any more. Signed-off-by: Felix Janda Reviewed-by: Hauke Mehrtens Signed-off-by: David S. Miller --- include/uapi/linux/libc-compat.h | 55 +++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h index 282875cf8056..8254c937c9f4 100644 --- a/include/uapi/linux/libc-compat.h +++ b/include/uapi/linux/libc-compat.h @@ -168,46 +168,99 @@ /* If we did not see any headers from any supported C libraries, * or we are being included in the kernel, then define everything - * that we need. */ + * that we need. Check for previous __UAPI_* definitions to give + * unsupported C libraries a way to opt out of any kernel definition. */ #else /* !defined(__GLIBC__) */ /* Definitions for if.h */ +#ifndef __UAPI_DEF_IF_IFCONF #define __UAPI_DEF_IF_IFCONF 1 +#endif +#ifndef __UAPI_DEF_IF_IFMAP #define __UAPI_DEF_IF_IFMAP 1 +#endif +#ifndef __UAPI_DEF_IF_IFNAMSIZ #define __UAPI_DEF_IF_IFNAMSIZ 1 +#endif +#ifndef __UAPI_DEF_IF_IFREQ #define __UAPI_DEF_IF_IFREQ 1 +#endif /* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */ +#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS #define __UAPI_DEF_IF_NET_DEVICE_FLAGS 1 +#endif /* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */ +#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO #define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1 +#endif /* Definitions for in.h */ +#ifndef __UAPI_DEF_IN_ADDR #define __UAPI_DEF_IN_ADDR 1 +#endif +#ifndef __UAPI_DEF_IN_IPPROTO #define __UAPI_DEF_IN_IPPROTO 1 +#endif +#ifndef __UAPI_DEF_IN_PKTINFO #define __UAPI_DEF_IN_PKTINFO 1 +#endif +#ifndef __UAPI_DEF_IP_MREQ #define __UAPI_DEF_IP_MREQ 1 +#endif +#ifndef __UAPI_DEF_SOCKADDR_IN #define __UAPI_DEF_SOCKADDR_IN 1 +#endif +#ifndef __UAPI_DEF_IN_CLASS #define __UAPI_DEF_IN_CLASS 1 +#endif /* Definitions for in6.h */ +#ifndef __UAPI_DEF_IN6_ADDR #define __UAPI_DEF_IN6_ADDR 1 +#endif +#ifndef __UAPI_DEF_IN6_ADDR_ALT #define __UAPI_DEF_IN6_ADDR_ALT 1 +#endif +#ifndef __UAPI_DEF_SOCKADDR_IN6 #define __UAPI_DEF_SOCKADDR_IN6 1 +#endif +#ifndef __UAPI_DEF_IPV6_MREQ #define __UAPI_DEF_IPV6_MREQ 1 +#endif +#ifndef __UAPI_DEF_IPPROTO_V6 #define __UAPI_DEF_IPPROTO_V6 1 +#endif +#ifndef __UAPI_DEF_IPV6_OPTIONS #define __UAPI_DEF_IPV6_OPTIONS 1 +#endif +#ifndef __UAPI_DEF_IN6_PKTINFO #define __UAPI_DEF_IN6_PKTINFO 1 +#endif +#ifndef __UAPI_DEF_IP6_MTUINFO #define __UAPI_DEF_IP6_MTUINFO 1 +#endif /* Definitions for ipx.h */ +#ifndef __UAPI_DEF_SOCKADDR_IPX #define __UAPI_DEF_SOCKADDR_IPX 1 +#endif +#ifndef __UAPI_DEF_IPX_ROUTE_DEFINITION #define __UAPI_DEF_IPX_ROUTE_DEFINITION 1 +#endif +#ifndef __UAPI_DEF_IPX_INTERFACE_DEFINITION #define __UAPI_DEF_IPX_INTERFACE_DEFINITION 1 +#endif +#ifndef __UAPI_DEF_IPX_CONFIG_DATA #define __UAPI_DEF_IPX_CONFIG_DATA 1 +#endif +#ifndef __UAPI_DEF_IPX_ROUTE_DEF #define __UAPI_DEF_IPX_ROUTE_DEF 1 +#endif /* Definitions for xattr.h */ +#ifndef __UAPI_DEF_XATTR #define __UAPI_DEF_XATTR 1 +#endif #endif /* __GLIBC__ */ From c095508770aebf1b9218e77026e48345d719b17c Mon Sep 17 00:00:00 2001 From: Mohamed Ghannam Date: Tue, 2 Jan 2018 19:44:34 +0000 Subject: [PATCH 20/56] RDS: Heap OOB write in rds_message_alloc_sgs() When args->nr_local is 0, nr_pages gets also 0 due some size calculation via rds_rm_size(), which is later used to allocate pages for DMA, this bug produces a heap Out-Of-Bound write access to a specific memory region. Signed-off-by: Mohamed Ghannam Signed-off-by: David S. Miller --- net/rds/rdma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index bc2f1e0977d6..94729d9da437 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -525,6 +525,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args) local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; + if (args->nr_local == 0) + return -EINVAL; + /* figure out the number of pages in the vector */ for (i = 0; i < args->nr_local; i++) { if (copy_from_user(&vec, &local_vec[i], From 79d0895140e937ba111e6420b4cd83ee75efa788 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 2 Jan 2018 19:44:37 -0200 Subject: [PATCH 21/56] sctp: fix error path in sctp_stream_init syzbot noticed a NULL pointer dereference panic in sctp_stream_free() which was caused by an incomplete error handling in sctp_stream_init(). By not clearing stream->outcnt, it made a for() in sctp_stream_free() think that it had elements to free, but not, leading to the panic. As suggested by Xin Long, this patch also simplifies the error path by moving it to the only if() that uses it. See-also: https://www.spinics.net/lists/netdev/msg473756.html See-also: https://www.spinics.net/lists/netdev/msg465024.html Reported-by: syzbot Fixes: f952be79cebd ("sctp: introduce struct sctp_stream_out_ext") Signed-off-by: Marcelo Ricardo Leitner Reviewed-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/stream.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 76ea66be0bbe..524dfeb94c41 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -156,9 +156,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, sctp_stream_outq_migrate(stream, NULL, outcnt); sched->sched_all(stream); - i = sctp_stream_alloc_out(stream, outcnt, gfp); - if (i) - return i; + ret = sctp_stream_alloc_out(stream, outcnt, gfp); + if (ret) + goto out; stream->outcnt = outcnt; for (i = 0; i < stream->outcnt; i++) @@ -170,19 +170,17 @@ in: if (!incnt) goto out; - i = sctp_stream_alloc_in(stream, incnt, gfp); - if (i) { - ret = -ENOMEM; - goto free; + ret = sctp_stream_alloc_in(stream, incnt, gfp); + if (ret) { + sched->free(stream); + kfree(stream->out); + stream->out = NULL; + stream->outcnt = 0; + goto out; } stream->incnt = incnt; - goto out; -free: - sched->free(stream); - kfree(stream->out); - stream->out = NULL; out: return ret; } From f1c8d3720f2e6c8c2b209120678236debd0360e5 Mon Sep 17 00:00:00 2001 From: William Tu Date: Tue, 2 Jan 2018 14:05:19 -0800 Subject: [PATCH 22/56] vxlan: trivial indenting fix. Fix indentation of reserved_flags2 field in vxlanhdr_gpe. Fixes: e1e5314de08b ("vxlan: implement GPE") Signed-off-by: William Tu Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/vxlan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 13223396dc64..f96391e84a8a 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -146,7 +146,7 @@ struct vxlanhdr_gpe { np_applied:1, instance_applied:1, version:2, -reserved_flags2:2; + reserved_flags2:2; #elif defined(__BIG_ENDIAN_BITFIELD) u8 reserved_flags2:2, version:2, From 64e711ca59ef9b7873d77ef06bc174aa01af9115 Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Fri, 17 Nov 2017 15:51:47 -0800 Subject: [PATCH 23/56] i40e: Remove UDP support for big buffer Since UDP based filters are not supported via big buffer cloud filters, remove UDP support. Also change a few return types to indicate unsupported vs invalid configuration. Signed-off-by: Amritha Nambiar Acked-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 321d8be80871..fffd4868defb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -6038,8 +6038,8 @@ static int i40e_validate_and_set_switch_mode(struct i40e_vsi *vsi) /* Set Bit 7 to be valid */ mode = I40E_AQ_SET_SWITCH_BIT7_VALID; - /* Set L4type to both TCP and UDP support */ - mode |= I40E_AQ_SET_SWITCH_L4_TYPE_BOTH; + /* Set L4type for TCP support */ + mode |= I40E_AQ_SET_SWITCH_L4_TYPE_TCP; /* Set cloud filter mode */ mode |= I40E_AQ_SET_SWITCH_MODE_NON_TUNNEL; @@ -6969,18 +6969,18 @@ static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, is_valid_ether_addr(filter->src_mac)) || (is_multicast_ether_addr(filter->dst_mac) && is_multicast_ether_addr(filter->src_mac))) - return -EINVAL; + return -EOPNOTSUPP; - /* Make sure port is specified, otherwise bail out, for channel - * specific cloud filter needs 'L4 port' to be non-zero + /* Big buffer cloud filter needs 'L4 port' to be non-zero. Also, UDP + * ports are not supported via big buffer now. */ - if (!filter->dst_port) - return -EINVAL; + if (!filter->dst_port || filter->ip_proto == IPPROTO_UDP) + return -EOPNOTSUPP; /* adding filter using src_port/src_ip is not supported at this stage */ if (filter->src_port || filter->src_ipv4 || !ipv6_addr_any(&filter->ip.v6.src_ip6)) - return -EINVAL; + return -EOPNOTSUPP; /* copy element needed to add cloud filter from filter */ i40e_set_cld_element(filter, &cld_filter.element); @@ -6991,7 +6991,7 @@ static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, is_multicast_ether_addr(filter->src_mac)) { /* MAC + IP : unsupported mode */ if (filter->dst_ipv4) - return -EINVAL; + return -EOPNOTSUPP; /* since we validated that L4 port must be valid before * we get here, start with respective "flags" value From e90f686b4358d7d7e5dbaa48b8e78c9a4e41826e Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Wed, 3 Jan 2018 10:39:29 +0800 Subject: [PATCH 24/56] net: fec: restore dev_id in the cases of probe error The static variable dev_id always plus one before netdev registerred. It should restore the dev_id value in the cases of probe error. Signed-off-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 8184d2fca9be..6a4fc2b35488 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3556,6 +3556,7 @@ failed_phy: of_node_put(phy_node); failed_ioremap: free_netdev(ndev); + dev_id--; return ret; } From 3f38c683033a9a0a2738e7067f449deefabfa3ef Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Wed, 3 Jan 2018 10:39:30 +0800 Subject: [PATCH 25/56] net: fec: defer probe if regulator is not ready Defer probe if regulator is not ready. E.g. some regulator is fixed regulator controlled by i2c expander gpio, the i2c device may be probed after the driver, then it should handle the case of defer probe error. Signed-off-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 6a4fc2b35488..19f198e22e15 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3469,6 +3469,10 @@ fec_probe(struct platform_device *pdev) goto failed_regulator; } } else { + if (PTR_ERR(fep->reg_phy) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto failed_regulator; + } fep->reg_phy = NULL; } From 248de22e638f10bd5bfc7624a357f940f66ba137 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 8 Dec 2017 10:55:04 -0800 Subject: [PATCH 26/56] i40e/i40evf: Account for frags split over multiple descriptors in check linearize The original code for __i40e_chk_linearize didn't take into account the fact that if a fragment is 16K in size or larger it has to be split over 2 descriptors and the smaller of those 2 descriptors will be on the trailing edge of the transmit. As a result we can get into situations where we didn't catch requests that could result in a Tx hang. This patch takes care of that by subtracting the length of all but the trailing edge of the stale fragment before we test for sum. By doing this we can guarantee that we have all cases covered, including the case of a fragment that spans multiple descriptors. We don't need to worry about checking the inner portions of this since 12K is the maximum aligned DMA size and that is larger than any MSS will ever be since the MTU limit for jumbos is something on the order of 9K. Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 26 ++++++++++++++++--- drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 26 ++++++++++++++++--- 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 4566d66ffc7c..5bc2748ac468 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -3047,10 +3047,30 @@ bool __i40e_chk_linearize(struct sk_buff *skb) /* Walk through fragments adding latest fragment, testing it, and * then removing stale fragments from the sum. */ - stale = &skb_shinfo(skb)->frags[0]; - for (;;) { + for (stale = &skb_shinfo(skb)->frags[0];; stale++) { + int stale_size = skb_frag_size(stale); + sum += skb_frag_size(frag++); + /* The stale fragment may present us with a smaller + * descriptor than the actual fragment size. To account + * for that we need to remove all the data on the front and + * figure out what the remainder would be in the last + * descriptor associated with the fragment. + */ + if (stale_size > I40E_MAX_DATA_PER_TXD) { + int align_pad = -(stale->page_offset) & + (I40E_MAX_READ_REQ_SIZE - 1); + + sum -= align_pad; + stale_size -= align_pad; + + do { + sum -= I40E_MAX_DATA_PER_TXD_ALIGNED; + stale_size -= I40E_MAX_DATA_PER_TXD_ALIGNED; + } while (stale_size > I40E_MAX_DATA_PER_TXD); + } + /* if sum is negative we failed to make sufficient progress */ if (sum < 0) return true; @@ -3058,7 +3078,7 @@ bool __i40e_chk_linearize(struct sk_buff *skb) if (!nr_frags--) break; - sum -= skb_frag_size(stale++); + sum -= stale_size; } return false; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 50864f99446d..1ba29bb85b67 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -2012,10 +2012,30 @@ bool __i40evf_chk_linearize(struct sk_buff *skb) /* Walk through fragments adding latest fragment, testing it, and * then removing stale fragments from the sum. */ - stale = &skb_shinfo(skb)->frags[0]; - for (;;) { + for (stale = &skb_shinfo(skb)->frags[0];; stale++) { + int stale_size = skb_frag_size(stale); + sum += skb_frag_size(frag++); + /* The stale fragment may present us with a smaller + * descriptor than the actual fragment size. To account + * for that we need to remove all the data on the front and + * figure out what the remainder would be in the last + * descriptor associated with the fragment. + */ + if (stale_size > I40E_MAX_DATA_PER_TXD) { + int align_pad = -(stale->page_offset) & + (I40E_MAX_READ_REQ_SIZE - 1); + + sum -= align_pad; + stale_size -= align_pad; + + do { + sum -= I40E_MAX_DATA_PER_TXD_ALIGNED; + stale_size -= I40E_MAX_DATA_PER_TXD_ALIGNED; + } while (stale_size > I40E_MAX_DATA_PER_TXD); + } + /* if sum is negative we failed to make sufficient progress */ if (sum < 0) return true; @@ -2023,7 +2043,7 @@ bool __i40evf_chk_linearize(struct sk_buff *skb) if (!nr_frags--) break; - sum -= skb_frag_size(stale++); + sum -= stale_size; } return false; From 458867b2ca0c987445c5d9adccd1642970e1ba07 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 20 Dec 2017 11:04:36 -0500 Subject: [PATCH 27/56] i40e: don't remove netdev->dev_addr when syncing uc list In some circumstances, such as with bridging, it is possible that the stack will add a devices own MAC address to its unicast address list. If, later, the stack deletes this address, then the i40e driver will receive a request to remove this address. The driver stores its current MAC address as part of the MAC/VLAN hash array, since it is convenient and matches exactly how the hardware expects to be told which traffic to receive. This causes a problem, since for more devices, the MAC address is stored separately, and requests to delete a unicast address should not have the ability to remove the filter for the MAC address. Fix this by forcing a check on every address sync to ensure we do not remove the device address. There is a very narrow possibility of a race between .set_mac and .set_rx_mode, if we don't change netdev->dev_addr before updating our internal MAC list in .set_mac. This might be possible if .set_rx_mode is going to remove MAC "XYZ" from the list, at the same time as .set_mac changes our dev_addr to MAC "XYZ", we might possibly queue a delete, then an add in .set_mac, then queue a delete in .set_rx_mode's dev_uc_sync and then update netdev->dev_addr. We can avoid this by moving the copy into dev_addr prior to the changes to the MAC filter list. A similar race on the other side does not cause problems, as if we're changing our MAC form A to B, and we race with .set_rx_mode, it could queue a delete from A, we'd update our address, and allow the delete. This seems like a race, but in reality we're about to queue a delete of A anyways, so it would not cause any issues. A race in the initialization code is unlikely because the netdevice has not yet been fully initialized and the stack should not be adding or removing addresses yet. Note that we don't (yet) need similar code for the VF driver because it does not make use of __dev_uc_sync and __dev_mc_sync, but instead roles its own method for handling updates to the MAC/VLAN list, which already has code to protect against removal of the hardware address. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index fffd4868defb..9e4b78e447f8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1573,11 +1573,18 @@ static int i40e_set_mac(struct net_device *netdev, void *p) else netdev_info(netdev, "set new mac address %pM\n", addr->sa_data); + /* Copy the address first, so that we avoid a possible race with + * .set_rx_mode(). If we copy after changing the address in the filter + * list, we might open ourselves to a narrow race window where + * .set_rx_mode could delete our dev_addr filter and prevent traffic + * from passing. + */ + ether_addr_copy(netdev->dev_addr, addr->sa_data); + spin_lock_bh(&vsi->mac_filter_hash_lock); i40e_del_mac_filter(vsi, netdev->dev_addr); i40e_add_mac_filter(vsi, addr->sa_data); spin_unlock_bh(&vsi->mac_filter_hash_lock); - ether_addr_copy(netdev->dev_addr, addr->sa_data); if (vsi->type == I40E_VSI_MAIN) { i40e_status ret; @@ -1923,6 +1930,14 @@ static int i40e_addr_unsync(struct net_device *netdev, const u8 *addr) struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; + /* Under some circumstances, we might receive a request to delete + * our own device address from our uc list. Because we store the + * device address in the VSI's MAC/VLAN filter list, we need to ignore + * such requests and not delete our device address from this list. + */ + if (ether_addr_equal(addr, netdev->dev_addr)) + return 0; + i40e_del_mac_filter(vsi, addr); return 0; From bc4244c6e33f96b48c4986ce4653df4673c6a08e Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 22 Dec 2017 12:45:16 +0100 Subject: [PATCH 28/56] i40e: flower: Fix return value for unsupported offload When filter configuration is not supported, drivers should return -EOPNOTSUPP so the core can react correctly. Fixes: 2f4b411a3d67 ("i40e: Enable cloud filters via tc-flower") Signed-off-by: Jiri Pirko Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 9e4b78e447f8..42dcaefc4c19 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7371,7 +7371,7 @@ static int i40e_configure_clsflower(struct i40e_vsi *vsi, if (tc < 0) { dev_err(&vsi->back->pdev->dev, "Invalid traffic class\n"); - return -EINVAL; + return -EOPNOTSUPP; } if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) || From 15962a18284552b5ec58982ff60a5e92e0c5c92b Mon Sep 17 00:00:00 2001 From: Arjun Vynipadath Date: Wed, 3 Jan 2018 11:44:07 +0530 Subject: [PATCH 29/56] cxgb4: Fix FW flash errors commit 96ac18f14a5a ("cxgb4: Add support for new flash parts") removed initialization of adapter->params.sf_fw_start causing issues while flashing firmware to card. We no longer need sf_fw_start in adapter->params as we already have macros defined for FW flash addresses. Fixes: 96ac18f14a5a ("cxgb4: Add support for new flash parts") Signed-off-by: Arjun Vynipadath Signed-off-by: Casey Leedom Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 1 - drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 17 ++++++++--------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 6f9fa6e3c42a..d8424ed16c33 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -344,7 +344,6 @@ struct adapter_params { unsigned int sf_size; /* serial flash size in bytes */ unsigned int sf_nsec; /* # of flash sectors */ - unsigned int sf_fw_start; /* start of FW image in flash */ unsigned int fw_vers; /* firmware version */ unsigned int bs_vers; /* bootstrap version */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index f63210f15579..375ef86a84da 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -2844,8 +2844,6 @@ enum { SF_RD_DATA_FAST = 0xb, /* read flash */ SF_RD_ID = 0x9f, /* read ID */ SF_ERASE_SECTOR = 0xd8, /* erase sector */ - - FW_MAX_SIZE = 16 * SF_SEC_SIZE, }; /** @@ -3558,8 +3556,9 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size) const __be32 *p = (const __be32 *)fw_data; const struct fw_hdr *hdr = (const struct fw_hdr *)fw_data; unsigned int sf_sec_size = adap->params.sf_size / adap->params.sf_nsec; - unsigned int fw_img_start = adap->params.sf_fw_start; - unsigned int fw_start_sec = fw_img_start / sf_sec_size; + unsigned int fw_start_sec = FLASH_FW_START_SEC; + unsigned int fw_size = FLASH_FW_MAX_SIZE; + unsigned int fw_start = FLASH_FW_START; if (!size) { dev_err(adap->pdev_dev, "FW image has no data\n"); @@ -3575,9 +3574,9 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size) "FW image size differs from size in FW header\n"); return -EINVAL; } - if (size > FW_MAX_SIZE) { + if (size > fw_size) { dev_err(adap->pdev_dev, "FW image too large, max is %u bytes\n", - FW_MAX_SIZE); + fw_size); return -EFBIG; } if (!t4_fw_matches_chip(adap, hdr)) @@ -3604,11 +3603,11 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size) */ memcpy(first_page, fw_data, SF_PAGE_SIZE); ((struct fw_hdr *)first_page)->fw_ver = cpu_to_be32(0xffffffff); - ret = t4_write_flash(adap, fw_img_start, SF_PAGE_SIZE, first_page); + ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page); if (ret) goto out; - addr = fw_img_start; + addr = fw_start; for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) { addr += SF_PAGE_SIZE; fw_data += SF_PAGE_SIZE; @@ -3618,7 +3617,7 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size) } ret = t4_write_flash(adap, - fw_img_start + offsetof(struct fw_hdr, fw_ver), + fw_start + offsetof(struct fw_hdr, fw_ver), sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver); out: if (ret) From 7853b49ce8e0ef6364d24512b287463841d71bd3 Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Wed, 3 Jan 2018 06:17:29 +0000 Subject: [PATCH 30/56] net: ena: unmask MSI-X only after device initialization is completed Under certain conditions MSI-X interrupt might arrive right after it was unmasked in ena_up(). There is a chance it would be processed by the driver before device ENA_FLAG_DEV_UP flag is set. In such a case the interrupt is ignored. ENA device operates in auto-masked mode, therefore ignoring interrupt leaves it masked for good. Moving unmask of interrupt to be the last step in ena_up(). Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 26 ++++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 97c5a89a9cf7..6fb28fd43eb3 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1565,7 +1565,7 @@ static int ena_rss_configure(struct ena_adapter *adapter) static int ena_up_complete(struct ena_adapter *adapter) { - int rc, i; + int rc; rc = ena_rss_configure(adapter); if (rc) @@ -1584,17 +1584,6 @@ static int ena_up_complete(struct ena_adapter *adapter) ena_napi_enable_all(adapter); - /* Enable completion queues interrupt */ - for (i = 0; i < adapter->num_queues; i++) - ena_unmask_interrupt(&adapter->tx_ring[i], - &adapter->rx_ring[i]); - - /* schedule napi in case we had pending packets - * from the last time we disable napi - */ - for (i = 0; i < adapter->num_queues; i++) - napi_schedule(&adapter->ena_napi[i].napi); - return 0; } @@ -1731,7 +1720,7 @@ create_err: static int ena_up(struct ena_adapter *adapter) { - int rc; + int rc, i; netdev_dbg(adapter->netdev, "%s\n", __func__); @@ -1774,6 +1763,17 @@ static int ena_up(struct ena_adapter *adapter) set_bit(ENA_FLAG_DEV_UP, &adapter->flags); + /* Enable completion queues interrupt */ + for (i = 0; i < adapter->num_queues; i++) + ena_unmask_interrupt(&adapter->tx_ring[i], + &adapter->rx_ring[i]); + + /* schedule napi in case we had pending packets + * from the last time we disable napi + */ + for (i = 0; i < adapter->num_queues; i++) + napi_schedule(&adapter->ena_napi[i].napi); + return rc; err_up: From ee4552aaf3fef5345199b8a82e40be7245b289fb Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Wed, 3 Jan 2018 06:17:30 +0000 Subject: [PATCH 31/56] net: ena: fix error handling in ena_down() sequence ENA admin command queue errors are not handled as part of ena_down(). As a result, in case of error admin queue transitions to non-running state and aborts all subsequent commands including those coming from ena_up(). Reset scheduled by the driver from the timer service context would not proceed due to sharing rtnl with ena_up()/ena_down() Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 6fb28fd43eb3..fbe21a817bd8 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -75,6 +75,9 @@ static struct workqueue_struct *ena_wq; MODULE_DEVICE_TABLE(pci, ena_pci_tbl); static int ena_rss_init_default(struct ena_adapter *adapter); +static void check_for_admin_com_state(struct ena_adapter *adapter); +static void ena_destroy_device(struct ena_adapter *adapter); +static int ena_restore_device(struct ena_adapter *adapter); static void ena_tx_timeout(struct net_device *dev) { @@ -1884,6 +1887,17 @@ static int ena_close(struct net_device *netdev) if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) ena_down(adapter); + /* Check for device status and issue reset if needed*/ + check_for_admin_com_state(adapter); + if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { + netif_err(adapter, ifdown, adapter->netdev, + "Destroy failure, restarting device\n"); + ena_dump_stats_to_dmesg(adapter); + /* rtnl lock already obtained in dev_ioctl() layer */ + ena_destroy_device(adapter); + ena_restore_device(adapter); + } + return 0; } @@ -2544,11 +2558,12 @@ static void ena_destroy_device(struct ena_adapter *adapter) ena_com_set_admin_running_state(ena_dev, false); - ena_close(netdev); + if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) + ena_down(adapter); /* Before releasing the ENA resources, a device reset is required. * (to prevent the device from accessing them). - * In case the reset flag is set and the device is up, ena_close + * In case the reset flag is set and the device is up, ena_down() * already perform the reset, so it can be skipped. */ if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up)) From ee4aa8df70fa6d76bd776c025dc0d8d746c18317 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 3 Jan 2018 13:09:23 -0500 Subject: [PATCH 32/56] 3c59x: fix missing dma_mapping_error check and bad ring refill logic A few spots in 3c59x missed calls to dma_mapping_error checks, casuing WARN_ONS to trigger. Clean those up. While we're at it, refactor the refill code a bit so that if skb allocation or dma mapping fails, we recycle the existing buffer. This prevents holes in the rx ring, and makes for much simpler logic Note: This is compile only tested. Ted, if you could run this and confirm that it continues to work properly, I would appreciate it, as I currently don't have access to this hardware Signed-off-by: Neil Horman CC: Steffen Klassert CC: "David S. Miller" Reported-by: tedheadster@gmail.com Signed-off-by: David S. Miller --- drivers/net/ethernet/3com/3c59x.c | 90 +++++++++++++------------------ 1 file changed, 38 insertions(+), 52 deletions(-) diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index f4e13a7014bd..36c8950dbd2d 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -602,7 +602,7 @@ struct vortex_private { struct sk_buff* rx_skbuff[RX_RING_SIZE]; struct sk_buff* tx_skbuff[TX_RING_SIZE]; unsigned int cur_rx, cur_tx; /* The next free ring entry */ - unsigned int dirty_rx, dirty_tx; /* The ring entries to be free()ed. */ + unsigned int dirty_tx; /* The ring entries to be free()ed. */ struct vortex_extra_stats xstats; /* NIC-specific extra stats */ struct sk_buff *tx_skb; /* Packet being eaten by bus master ctrl. */ dma_addr_t tx_skb_dma; /* Allocated DMA address for bus master ctrl DMA. */ @@ -618,7 +618,6 @@ struct vortex_private { /* The remainder are related to chip state, mostly media selection. */ struct timer_list timer; /* Media selection timer. */ - struct timer_list rx_oom_timer; /* Rx skb allocation retry timer */ int options; /* User-settable misc. driver options. */ unsigned int media_override:4, /* Passed-in media type. */ default_media:4, /* Read from the EEPROM/Wn3_Config. */ @@ -760,7 +759,6 @@ static void mdio_sync(struct vortex_private *vp, int bits); static int mdio_read(struct net_device *dev, int phy_id, int location); static void mdio_write(struct net_device *vp, int phy_id, int location, int value); static void vortex_timer(struct timer_list *t); -static void rx_oom_timer(struct timer_list *t); static netdev_tx_t vortex_start_xmit(struct sk_buff *skb, struct net_device *dev); static netdev_tx_t boomerang_start_xmit(struct sk_buff *skb, @@ -1601,7 +1599,6 @@ vortex_up(struct net_device *dev) timer_setup(&vp->timer, vortex_timer, 0); mod_timer(&vp->timer, RUN_AT(media_tbl[dev->if_port].wait)); - timer_setup(&vp->rx_oom_timer, rx_oom_timer, 0); if (vortex_debug > 1) pr_debug("%s: Initial media type %s.\n", @@ -1676,7 +1673,7 @@ vortex_up(struct net_device *dev) window_write16(vp, 0x0040, 4, Wn4_NetDiag); if (vp->full_bus_master_rx) { /* Boomerang bus master. */ - vp->cur_rx = vp->dirty_rx = 0; + vp->cur_rx = 0; /* Initialize the RxEarly register as recommended. */ iowrite16(SetRxThreshold + (1536>>2), ioaddr + EL3_CMD); iowrite32(0x0020, ioaddr + PktStatus); @@ -1729,6 +1726,7 @@ vortex_open(struct net_device *dev) struct vortex_private *vp = netdev_priv(dev); int i; int retval; + dma_addr_t dma; /* Use the now-standard shared IRQ implementation. */ if ((retval = request_irq(dev->irq, vp->full_bus_master_rx ? @@ -1753,7 +1751,11 @@ vortex_open(struct net_device *dev) break; /* Bad news! */ skb_reserve(skb, NET_IP_ALIGN); /* Align IP on 16 byte boundaries */ - vp->rx_ring[i].addr = cpu_to_le32(pci_map_single(VORTEX_PCI(vp), skb->data, PKT_BUF_SZ, PCI_DMA_FROMDEVICE)); + dma = pci_map_single(VORTEX_PCI(vp), skb->data, + PKT_BUF_SZ, PCI_DMA_FROMDEVICE); + if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma)) + break; + vp->rx_ring[i].addr = cpu_to_le32(dma); } if (i != RX_RING_SIZE) { pr_emerg("%s: no memory for rx ring\n", dev->name); @@ -2067,6 +2069,12 @@ vortex_start_xmit(struct sk_buff *skb, struct net_device *dev) int len = (skb->len + 3) & ~3; vp->tx_skb_dma = pci_map_single(VORTEX_PCI(vp), skb->data, len, PCI_DMA_TODEVICE); + if (dma_mapping_error(&VORTEX_PCI(vp)->dev, vp->tx_skb_dma)) { + dev_kfree_skb_any(skb); + dev->stats.tx_dropped++; + return NETDEV_TX_OK; + } + spin_lock_irq(&vp->window_lock); window_set(vp, 7); iowrite32(vp->tx_skb_dma, ioaddr + Wn7_MasterAddr); @@ -2593,7 +2601,7 @@ boomerang_rx(struct net_device *dev) int entry = vp->cur_rx % RX_RING_SIZE; void __iomem *ioaddr = vp->ioaddr; int rx_status; - int rx_work_limit = vp->dirty_rx + RX_RING_SIZE - vp->cur_rx; + int rx_work_limit = RX_RING_SIZE; if (vortex_debug > 5) pr_debug("boomerang_rx(): status %4.4x\n", ioread16(ioaddr+EL3_STATUS)); @@ -2614,7 +2622,8 @@ boomerang_rx(struct net_device *dev) } else { /* The packet length: up to 4.5K!. */ int pkt_len = rx_status & 0x1fff; - struct sk_buff *skb; + struct sk_buff *skb, *newskb; + dma_addr_t newdma; dma_addr_t dma = le32_to_cpu(vp->rx_ring[entry].addr); if (vortex_debug > 4) @@ -2633,9 +2642,27 @@ boomerang_rx(struct net_device *dev) pci_dma_sync_single_for_device(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); vp->rx_copy++; } else { + /* Pre-allocate the replacement skb. If it or its + * mapping fails then recycle the buffer thats already + * in place + */ + newskb = netdev_alloc_skb_ip_align(dev, PKT_BUF_SZ); + if (!newskb) { + dev->stats.rx_dropped++; + goto clear_complete; + } + newdma = pci_map_single(VORTEX_PCI(vp), newskb->data, + PKT_BUF_SZ, PCI_DMA_FROMDEVICE); + if (dma_mapping_error(&VORTEX_PCI(vp)->dev, newdma)) { + dev->stats.rx_dropped++; + consume_skb(newskb); + goto clear_complete; + } + /* Pass up the skbuff already on the Rx ring. */ skb = vp->rx_skbuff[entry]; - vp->rx_skbuff[entry] = NULL; + vp->rx_skbuff[entry] = newskb; + vp->rx_ring[entry].addr = cpu_to_le32(newdma); skb_put(skb, pkt_len); pci_unmap_single(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); vp->rx_nocopy++; @@ -2653,55 +2680,15 @@ boomerang_rx(struct net_device *dev) netif_rx(skb); dev->stats.rx_packets++; } - entry = (++vp->cur_rx) % RX_RING_SIZE; - } - /* Refill the Rx ring buffers. */ - for (; vp->cur_rx - vp->dirty_rx > 0; vp->dirty_rx++) { - struct sk_buff *skb; - entry = vp->dirty_rx % RX_RING_SIZE; - if (vp->rx_skbuff[entry] == NULL) { - skb = netdev_alloc_skb_ip_align(dev, PKT_BUF_SZ); - if (skb == NULL) { - static unsigned long last_jif; - if (time_after(jiffies, last_jif + 10 * HZ)) { - pr_warn("%s: memory shortage\n", - dev->name); - last_jif = jiffies; - } - if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE) - mod_timer(&vp->rx_oom_timer, RUN_AT(HZ * 1)); - break; /* Bad news! */ - } - vp->rx_ring[entry].addr = cpu_to_le32(pci_map_single(VORTEX_PCI(vp), skb->data, PKT_BUF_SZ, PCI_DMA_FROMDEVICE)); - vp->rx_skbuff[entry] = skb; - } +clear_complete: vp->rx_ring[entry].status = 0; /* Clear complete bit. */ iowrite16(UpUnstall, ioaddr + EL3_CMD); + entry = (++vp->cur_rx) % RX_RING_SIZE; } return 0; } -/* - * If we've hit a total OOM refilling the Rx ring we poll once a second - * for some memory. Otherwise there is no way to restart the rx process. - */ -static void -rx_oom_timer(struct timer_list *t) -{ - struct vortex_private *vp = from_timer(vp, t, rx_oom_timer); - struct net_device *dev = vp->mii.dev; - - spin_lock_irq(&vp->lock); - if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE) /* This test is redundant, but makes me feel good */ - boomerang_rx(dev); - if (vortex_debug > 1) { - pr_debug("%s: rx_oom_timer %s\n", dev->name, - ((vp->cur_rx - vp->dirty_rx) != RX_RING_SIZE) ? "succeeded" : "retrying"); - } - spin_unlock_irq(&vp->lock); -} - static void vortex_down(struct net_device *dev, int final_down) { @@ -2711,7 +2698,6 @@ vortex_down(struct net_device *dev, int final_down) netdev_reset_queue(dev); netif_stop_queue(dev); - del_timer_sync(&vp->rx_oom_timer); del_timer_sync(&vp->timer); /* Turn off statistics ASAP. We update dev->stats below. */ From 3ea15452ee85754f70f3b9fa1f23165ef2e77ba7 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Wed, 3 Jan 2018 11:00:31 +0800 Subject: [PATCH 33/56] nl80211: Check for the required netlink attribute presence nl80211_nan_add_func() does not check if the required attribute NL80211_NAN_FUNC_FOLLOW_UP_DEST is present when processing NL80211_CMD_ADD_NAN_FUNCTION request. This request can be issued by users with CAP_NET_ADMIN privilege and may result in NULL dereference and a system crash. Add a check for the required attribute presence. Signed-off-by: Hao Chen Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 213d0c498c97..2b3dbcd40e46 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -11361,7 +11361,8 @@ static int nl80211_nan_add_func(struct sk_buff *skb, break; case NL80211_NAN_FUNC_FOLLOW_UP: if (!tb[NL80211_NAN_FUNC_FOLLOW_UP_ID] || - !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]) { + !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] || + !tb[NL80211_NAN_FUNC_FOLLOW_UP_DEST]) { err = -EINVAL; goto out; } From 736a80bbfda709fb3631f5f62056f250a38e5804 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 4 Jan 2018 15:51:53 +0100 Subject: [PATCH 34/56] mac80211: mesh: drop frames appearing to be from us If there are multiple mesh stations with the same MAC address, they will both get confused and start throwing warnings. Obviously in this case nothing can actually work anyway, so just drop frames that look like they're from ourselves early on. Reported-by: Gui Iribarren Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 70e9d2ca8bbe..4daafb07602f 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3632,6 +3632,8 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) } return true; case NL80211_IFTYPE_MESH_POINT: + if (ether_addr_equal(sdata->vif.addr, hdr->addr2)) + return false; if (multicast) return true; return ether_addr_equal(sdata->vif.addr, hdr->addr1); From 54e98b5d663fcd8e3279c2391537b1a1f7bfe344 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 3 Jan 2018 22:02:29 -0800 Subject: [PATCH 35/56] net: dsa: b53: Turn off Broadcom tags for more switches Models such as BCM5395/97/98 and BCM53125/24/53115 and compatible require that we turn on managed mode to actually act on Broadcom tags, otherwise they just pass them through on ingress (host -> switch) and don't insert them in egress (switch -> host). Turning on managed mode is simple, but requires us to properly support ARL misses on multicast addresses which is a much more involved set of changes not suitable for a bug fix for this release. Reported-by: Jochen Friedrich Fixes: 7edc58d614d4 ("net: dsa: b53: Turn on Broadcom tags") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index f5a8dd96fd75..4498ab897d94 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1500,10 +1500,13 @@ static enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds, { struct b53_device *dev = ds->priv; - /* Older models support a different tag format that we do not - * support in net/dsa/tag_brcm.c yet. + /* Older models (5325, 5365) support a different tag format that we do + * not support in net/dsa/tag_brcm.c yet. 539x and 531x5 require managed + * mode to be turned on which means we need to specifically manage ARL + * misses on multicast addresses (TBD). */ - if (is5325(dev) || is5365(dev) || !b53_can_enable_brcm_tags(ds, port)) + if (is5325(dev) || is5365(dev) || is539x(dev) || is531x5(dev) || + !b53_can_enable_brcm_tags(ds, port)) return DSA_TAG_PROTO_NONE; /* Broadcom BCM58xx chips have a flow accelerator on Port 8 From b4c2951a4833e66f1bbfe65ddcd4fdcdfafe5e8f Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Sat, 2 Dec 2017 18:48:52 +0100 Subject: [PATCH 36/56] can: vxcan: improve handling of missing peer name attribute Picking up the patch from Serhey Popovych (commit 191cdb3822e5df6b3c8, "veth: Be more robust on network device creation when no attributes"). When the peer name attribute is not provided the former implementation tries to register the given device name twice ... which leads to -EEXIST. If only one device name is given apply an automatic generated and valid name for the peer. Cc: Serhey Popovych Signed-off-by: Oliver Hartkopp Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/vxcan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c index 8404e8852a0f..b4c4a2c76437 100644 --- a/drivers/net/can/vxcan.c +++ b/drivers/net/can/vxcan.c @@ -194,7 +194,7 @@ static int vxcan_newlink(struct net *net, struct net_device *dev, tbp = peer_tb; } - if (tbp[IFLA_IFNAME]) { + if (ifmp && tbp[IFLA_IFNAME]) { nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); name_assign_type = NET_NAME_USER; } else { From d5b42e6607661b198d8b26a0c30969605b1bf5c7 Mon Sep 17 00:00:00 2001 From: Wolfgang Grandegger Date: Wed, 13 Dec 2017 19:52:23 +0100 Subject: [PATCH 37/56] can: gs_usb: fix return value of the "set_bittiming" callback The "set_bittiming" callback treats a positive return value as error! For that reason "can_changelink()" will quit silently after setting the bittiming values without processing ctrlmode, restart-ms, etc. Signed-off-by: Wolfgang Grandegger Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/gs_usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 68ac3e88a8ce..8bf80ad9dc44 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -449,7 +449,7 @@ static int gs_usb_set_bittiming(struct net_device *netdev) dev_err(netdev->dev.parent, "Couldn't set bittimings (err=%d)", rc); - return rc; + return (rc > 0) ? 0 : rc; } static void gs_usb_xmit_callback(struct urb *urb) From 13454c14550065fcc1705d6bd4ee6d40e057099f Mon Sep 17 00:00:00 2001 From: Luu An Phu Date: Tue, 2 Jan 2018 10:44:18 +0700 Subject: [PATCH 38/56] can: flex_can: Correct the checking for frame length in flexcan_start_xmit() The flexcan_start_xmit() function compares the frame length with data register length to write frame content into data[0] and data[1] register. Data register length is 4 bytes and frame maximum length is 8 bytes. Fix the check that compares frame length with 3. Because the register length is 4. Signed-off-by: Luu An Phu Reviewed-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 0626dcfd1f3d..760d2c07e3a2 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -526,7 +526,7 @@ static int flexcan_start_xmit(struct sk_buff *skb, struct net_device *dev) data = be32_to_cpup((__be32 *)&cf->data[0]); flexcan_write(data, &priv->tx_mb->data[0]); } - if (cf->can_dlc > 3) { + if (cf->can_dlc > 4) { data = be32_to_cpup((__be32 *)&cf->data[4]); flexcan_write(data, &priv->tx_mb->data[1]); } From 6ebc5e8fe85286c7392f1777a3dba9e1fd6d0253 Mon Sep 17 00:00:00 2001 From: Martin Lederhilger Date: Thu, 21 Dec 2017 14:42:44 +0000 Subject: [PATCH 39/56] can: ems_usb: improve error reporting for error warning and error passive This patch adds the missing CAN_ERR_CRTL to cf->can_id in case of CAN_STATE_ERROR_WARNING or CAN_STATE_ERROR_PASSIVE Signed-off-by: Martin Lederhilger Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/ems_usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index b00358297424..12ff0020ecd6 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -395,6 +395,7 @@ static void ems_usb_rx_err(struct ems_usb *dev, struct ems_cpc_msg *msg) if (dev->can.state == CAN_STATE_ERROR_WARNING || dev->can.state == CAN_STATE_ERROR_PASSIVE) { + cf->can_id |= CAN_ERR_CRTL; cf->data[1] = (txerr > rxerr) ? CAN_ERR_CRTL_TX_PASSIVE : CAN_ERR_CRTL_RX_PASSIVE; } From fb32dd3abf7a8fc13271d0d1c45ffc66df28dd15 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 2 Jan 2018 20:14:42 -0800 Subject: [PATCH 40/56] MAINTAINERS: Update my email address. Signed-off-by: Pravin Shelar Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a6e86e20761e..1e6872b4c6e2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10137,7 +10137,7 @@ F: drivers/irqchip/irq-ompic.c F: drivers/irqchip/irq-or1k-* OPENVSWITCH -M: Pravin Shelar +M: Pravin B Shelar L: netdev@vger.kernel.org L: dev@openvswitch.org W: http://openvswitch.org From f428fe4a04cc339166c8bbd489789760de3a0cee Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 2 Jan 2018 23:27:33 -0800 Subject: [PATCH 41/56] rtnetlink: give a user socket to get_target_net() This function is used from two places: rtnl_dump_ifinfo and rtnl_getlink. In rtnl_getlink(), we give a request skb into get_target_net(), but in rtnl_dump_ifinfo, we give a response skb into get_target_net(). The problem here is that NETLINK_CB() isn't initialized for the response skb. In both cases we can get a user socket and give it instead of skb into get_target_net(). This bug was found by syzkaller with this call-trace: kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Modules linked in: CPU: 1 PID: 3149 Comm: syzkaller140561 Not tainted 4.15.0-rc4-mm1+ #47 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__netlink_ns_capable+0x8b/0x120 net/netlink/af_netlink.c:868 RSP: 0018:ffff8801c880f348 EFLAGS: 00010206 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff8443f900 RDX: 000000000000007b RSI: ffffffff86510f40 RDI: 00000000000003d8 RBP: ffff8801c880f360 R08: 0000000000000000 R09: 1ffff10039101e4f R10: 0000000000000000 R11: 0000000000000001 R12: ffffffff86510f40 R13: 000000000000000c R14: 0000000000000004 R15: 0000000000000011 FS: 0000000001a1a880(0000) GS:ffff8801db300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020151000 CR3: 00000001c9511005 CR4: 00000000001606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: netlink_ns_capable+0x26/0x30 net/netlink/af_netlink.c:886 get_target_net+0x9d/0x120 net/core/rtnetlink.c:1765 rtnl_dump_ifinfo+0x2e5/0xee0 net/core/rtnetlink.c:1806 netlink_dump+0x48c/0xce0 net/netlink/af_netlink.c:2222 __netlink_dump_start+0x4f0/0x6d0 net/netlink/af_netlink.c:2319 netlink_dump_start include/linux/netlink.h:214 [inline] rtnetlink_rcv_msg+0x7f0/0xb10 net/core/rtnetlink.c:4485 netlink_rcv_skb+0x21e/0x460 net/netlink/af_netlink.c:2441 rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4540 netlink_unicast_kernel net/netlink/af_netlink.c:1308 [inline] netlink_unicast+0x4be/0x6a0 net/netlink/af_netlink.c:1334 netlink_sendmsg+0xa4a/0xe60 net/netlink/af_netlink.c:1897 Cc: Jiri Benc Fixes: 79e1ad148c84 ("rtnetlink: use netnsid to query interface") Signed-off-by: Andrei Vagin Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index dabba2a91fc8..778d7f03404a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1681,18 +1681,18 @@ static bool link_dump_filtered(struct net_device *dev, return false; } -static struct net *get_target_net(struct sk_buff *skb, int netnsid) +static struct net *get_target_net(struct sock *sk, int netnsid) { struct net *net; - net = get_net_ns_by_id(sock_net(skb->sk), netnsid); + net = get_net_ns_by_id(sock_net(sk), netnsid); if (!net) return ERR_PTR(-EINVAL); /* For now, the caller is required to have CAP_NET_ADMIN in * the user namespace owning the target net ns. */ - if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { + if (!sk_ns_capable(sk, net->user_ns, CAP_NET_ADMIN)) { put_net(net); return ERR_PTR(-EACCES); } @@ -1733,7 +1733,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) ifla_policy, NULL) >= 0) { if (tb[IFLA_IF_NETNSID]) { netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); - tgt_net = get_target_net(skb, netnsid); + tgt_net = get_target_net(skb->sk, netnsid); if (IS_ERR(tgt_net)) { tgt_net = net; netnsid = -1; @@ -2883,7 +2883,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[IFLA_IF_NETNSID]) { netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); - tgt_net = get_target_net(skb, netnsid); + tgt_net = get_target_net(NETLINK_CB(skb).sk, netnsid); if (IS_ERR(tgt_net)) return PTR_ERR(tgt_net); } From 879626e3a52630316d817cbda7cec9a5446d1d82 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 3 Jan 2018 16:46:29 +0100 Subject: [PATCH 42/56] net: stmmac: enable EEE in MII, GMII or RGMII only Note in the databook - Section 4.4 - EEE : " The EEE feature is not supported when the MAC is configured to use the TBI, RTBI, SMII, RMII or SGMII single PHY interface. Even if the MAC supports multiple PHY interfaces, you should activate the EEE mode only when the MAC is operating with GMII, MII, or RGMII interface." Applying this restriction solves a stability issue observed on Amlogic gxl platforms operating with RMII interface and the internal PHY. Fixes: 83bf79b6bb64 ("stmmac: disable at run-time the EEE if not supported") Signed-off-by: Jerome Brunet Tested-by: Arnaud Patard Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 337d53d12e94..c0af0bc4e714 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -364,9 +364,15 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t) bool stmmac_eee_init(struct stmmac_priv *priv) { struct net_device *ndev = priv->dev; + int interface = priv->plat->interface; unsigned long flags; bool ret = false; + if ((interface != PHY_INTERFACE_MODE_MII) && + (interface != PHY_INTERFACE_MODE_GMII) && + !phy_interface_mode_is_rgmii(interface)) + goto out; + /* Using PCS we cannot dial with the phy registers at this stage * so we do not support extra feature like EEE. */ From dfe8266b8dd10e12a731c985b725fcf7f0e537f0 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Wed, 3 Jan 2018 20:09:49 +0300 Subject: [PATCH 43/56] sh_eth: fix TSU resource handling When switching the driver to the managed device API, I managed to break the case of a dual Ether devices sharing a single TSU: the 2nd Ether port wouldn't probe. Iwamatsu-san has tried to fix this but his patch was buggy and he then dropped the ball... The solution is to limit calling devm_request_mem_region() to the first of the two ports sharing the same TSU, so devm_ioremap_resource() can't be used anymore for the TSU resource... Fixes: d5e07e69218f ("sh_eth: use managed device API") Reported-by: Nobuhiro Iwamatsu Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 75323000c364..1bdd67a8a869 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3225,10 +3225,29 @@ static int sh_eth_drv_probe(struct platform_device *pdev) /* ioremap the TSU registers */ if (mdp->cd->tsu) { struct resource *rtsu; + rtsu = platform_get_resource(pdev, IORESOURCE_MEM, 1); - mdp->tsu_addr = devm_ioremap_resource(&pdev->dev, rtsu); - if (IS_ERR(mdp->tsu_addr)) { - ret = PTR_ERR(mdp->tsu_addr); + if (!rtsu) { + dev_err(&pdev->dev, "no TSU resource\n"); + ret = -ENODEV; + goto out_release; + } + /* We can only request the TSU region for the first port + * of the two sharing this TSU for the probe to succeed... + */ + if (devno % 2 == 0 && + !devm_request_mem_region(&pdev->dev, rtsu->start, + resource_size(rtsu), + dev_name(&pdev->dev))) { + dev_err(&pdev->dev, "can't request TSU resource.\n"); + ret = -EBUSY; + goto out_release; + } + mdp->tsu_addr = devm_ioremap(&pdev->dev, rtsu->start, + resource_size(rtsu)); + if (!mdp->tsu_addr) { + dev_err(&pdev->dev, "TSU region ioremap() failed.\n"); + ret = -ENOMEM; goto out_release; } mdp->port = devno % 2; From 7d11f77f84b27cef452cee332f4e469503084737 Mon Sep 17 00:00:00 2001 From: Mohamed Ghannam Date: Wed, 3 Jan 2018 21:06:06 +0000 Subject: [PATCH 44/56] RDS: null pointer dereference in rds_atomic_free_op set rm->atomic.op_active to 0 when rds_pin_pages() fails or the user supplied address is invalid, this prevents a NULL pointer usage in rds_atomic_free_op() Signed-off-by: Mohamed Ghannam Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/rdma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 94729d9da437..634cfcb7bba6 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -877,6 +877,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, err: if (page) put_page(page); + rm->atomic.op_active = 0; kfree(rm->atomic.op_notifier); return ret; From 7bbfe00e025240505db3e04c3b296d7c023b2a26 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Wed, 3 Jan 2018 14:11:59 -0800 Subject: [PATCH 45/56] ipv6: fix general protection fault in fib6_add() In fib6_add(), pn could be NULL if fib6_add_1() failed to return a fib6 node. Checking pn != fn before accessing pn->leaf makes sure pn is not NULL. This fixes the following GPF reported by syzkaller: general protection fault: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 3201 Comm: syzkaller001778 Not tainted 4.15.0-rc5+ #151 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:fib6_add+0x736/0x15a0 net/ipv6/ip6_fib.c:1244 RSP: 0018:ffff8801c7626a70 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000000000020 RCX: ffffffff84794465 RDX: 0000000000000004 RSI: ffff8801d38935f0 RDI: 0000000000000282 RBP: ffff8801c7626da0 R08: 1ffff10038ec4c35 R09: 0000000000000000 R10: ffff8801c7626c68 R11: 0000000000000000 R12: 00000000fffffffe R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000009 FS: 0000000000000000(0000) GS:ffff8801db200000(0063) knlGS:0000000009b70840 CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 CR2: 0000000020be1000 CR3: 00000001d585a006 CR4: 00000000001606f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __ip6_ins_rt+0x6c/0x90 net/ipv6/route.c:1006 ip6_route_multipath_add+0xd14/0x16c0 net/ipv6/route.c:3833 inet6_rtm_newroute+0xdc/0x160 net/ipv6/route.c:3957 rtnetlink_rcv_msg+0x733/0x1020 net/core/rtnetlink.c:4411 netlink_rcv_skb+0x21e/0x460 net/netlink/af_netlink.c:2408 rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4423 netlink_unicast_kernel net/netlink/af_netlink.c:1275 [inline] netlink_unicast+0x4e8/0x6f0 net/netlink/af_netlink.c:1301 netlink_sendmsg+0xa4a/0xe60 net/netlink/af_netlink.c:1864 sock_sendmsg_nosec net/socket.c:636 [inline] sock_sendmsg+0xca/0x110 net/socket.c:646 sock_write_iter+0x31a/0x5d0 net/socket.c:915 call_write_iter include/linux/fs.h:1772 [inline] do_iter_readv_writev+0x525/0x7f0 fs/read_write.c:653 do_iter_write+0x154/0x540 fs/read_write.c:932 compat_writev+0x225/0x420 fs/read_write.c:1246 do_compat_writev+0x115/0x220 fs/read_write.c:1267 C_SYSC_writev fs/read_write.c:1278 [inline] compat_SyS_writev+0x26/0x30 fs/read_write.c:1274 do_syscall_32_irqs_on arch/x86/entry/common.c:327 [inline] do_fast_syscall_32+0x3ee/0xf9d arch/x86/entry/common.c:389 entry_SYSENTER_compat+0x54/0x63 arch/x86/entry/entry_64_compat.S:125 Reported-by: syzbot Fixes: 66f5d6ce53e6 ("ipv6: replace rwlock with rcu and spinlock in fib6_table") Signed-off-by: Wei Wang Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index f5285f4e1d08..d11a5578e4f8 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1241,23 +1241,28 @@ out: * If fib6_add_1 has cleared the old leaf pointer in the * super-tree leaf node we have to find a new one for it. */ - struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf, - lockdep_is_held(&table->tb6_lock)); - if (pn != fn && pn_leaf == rt) { - pn_leaf = NULL; - RCU_INIT_POINTER(pn->leaf, NULL); - atomic_dec(&rt->rt6i_ref); - } - if (pn != fn && !pn_leaf && !(pn->fn_flags & RTN_RTINFO)) { - pn_leaf = fib6_find_prefix(info->nl_net, table, pn); -#if RT6_DEBUG >= 2 - if (!pn_leaf) { - WARN_ON(!pn_leaf); - pn_leaf = info->nl_net->ipv6.ip6_null_entry; + if (pn != fn) { + struct rt6_info *pn_leaf = + rcu_dereference_protected(pn->leaf, + lockdep_is_held(&table->tb6_lock)); + if (pn_leaf == rt) { + pn_leaf = NULL; + RCU_INIT_POINTER(pn->leaf, NULL); + atomic_dec(&rt->rt6i_ref); } + if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) { + pn_leaf = fib6_find_prefix(info->nl_net, table, + pn); +#if RT6_DEBUG >= 2 + if (!pn_leaf) { + WARN_ON(!pn_leaf); + pn_leaf = + info->nl_net->ipv6.ip6_null_entry; + } #endif - atomic_inc(&pn_leaf->rt6i_ref); - rcu_assign_pointer(pn->leaf, pn_leaf); + atomic_inc(&pn_leaf->rt6i_ref); + rcu_assign_pointer(pn->leaf, pn_leaf); + } } #endif goto failure; From 6926e041a8920c8ec27e4e155efa760aa01551fd Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Wed, 3 Jan 2018 23:14:21 +0100 Subject: [PATCH 46/56] uapi/if_ether.h: prevent redefinition of struct ethhdr Musl provides its own ethhdr struct definition. Add a guard to prevent its definition of the appropriate musl header has already been included. glibc does not implement this header, but when glibc will implement this they can just define __UAPI_DEF_ETHHDR 0 to make it work with the kernel. Signed-off-by: Hauke Mehrtens Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 3 +++ include/uapi/linux/libc-compat.h | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 3ee3bf7c8526..144de4d2f385 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -23,6 +23,7 @@ #define _UAPI_LINUX_IF_ETHER_H #include +#include /* * IEEE 802.3 Ethernet magic constants. The frame sizes omit the preamble @@ -149,11 +150,13 @@ * This is an Ethernet frame header. */ +#if __UAPI_DEF_ETHHDR struct ethhdr { unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ unsigned char h_source[ETH_ALEN]; /* source ether addr */ __be16 h_proto; /* packet type ID field */ } __attribute__((packed)); +#endif #endif /* _UAPI_LINUX_IF_ETHER_H */ diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h index 8254c937c9f4..fc29efaa918c 100644 --- a/include/uapi/linux/libc-compat.h +++ b/include/uapi/linux/libc-compat.h @@ -264,4 +264,10 @@ #endif /* __GLIBC__ */ +/* Definitions for if_ether.h */ +/* allow libcs like musl to deactivate this, glibc does not implement this. */ +#ifndef __UAPI_DEF_ETHHDR +#define __UAPI_DEF_ETHHDR 1 +#endif + #endif /* _UAPI_LIBC_COMPAT_H */ From d1616f07e8f1a4a490d1791316d4a68906b284aa Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Thu, 4 Jan 2018 10:47:20 +0800 Subject: [PATCH 47/56] net: fec: free/restore resource in related probe error pathes Fixes in probe error path: - Restore dev_id before failed_ioremap path. Fixes: ("net: fec: restore dev_id in the cases of probe error") - Call of_node_put(phy_node) before failed_phy path. Fixes: ("net: fec: Support phys probed from devicetree and fixed-link") Signed-off-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 19f198e22e15..a74300a4459c 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3556,11 +3556,11 @@ failed_clk_ipg: failed_clk: if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); -failed_phy: of_node_put(phy_node); +failed_phy: + dev_id--; failed_ioremap: free_netdev(ndev); - dev_id--; return ret; } From 5133550296d43236439494aa955bfb765a89f615 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Thu, 4 Jan 2018 21:06:49 +0300 Subject: [PATCH 48/56] sh_eth: fix SH7757 GEther initialization Renesas SH7757 has 2 Fast and 2 Gigabit Ether controllers, while the 'sh_eth' driver can only reset and initialize TSU of the first controller pair. Shimoda-san tried to solve that adding the 'needs_init' member to the 'struct sh_eth_plat_data', however the platform code still never sets this flag. I think that we can infer this information from the 'devno' variable (set to 'platform_device::id') and reset/init the Ether controller pair only for an even 'devno'; therefore 'sh_eth_plat_data::needs_init' can be removed... Fixes: 150647fb2c31 ("net: sh_eth: change the condition of initialization") Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 4 ++-- include/linux/sh_eth.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 1bdd67a8a869..f21c1db91c3f 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3254,8 +3254,8 @@ static int sh_eth_drv_probe(struct platform_device *pdev) ndev->features = NETIF_F_HW_VLAN_CTAG_FILTER; } - /* initialize first or needed device */ - if (!devno || pd->needs_init) { + /* Need to init only the first port of the two sharing a TSU */ + if (devno % 2 == 0) { if (mdp->cd->chip_reset) mdp->cd->chip_reset(ndev); diff --git a/include/linux/sh_eth.h b/include/linux/sh_eth.h index ff3642d267f7..94081e9a5010 100644 --- a/include/linux/sh_eth.h +++ b/include/linux/sh_eth.h @@ -17,7 +17,6 @@ struct sh_eth_plat_data { unsigned char mac_addr[ETH_ALEN]; unsigned no_ether_link:1; unsigned ether_link_active_low:1; - unsigned needs_init:1; }; #endif From 7deea450eb912f269d999de62c8ab922d1461748 Mon Sep 17 00:00:00 2001 From: Sunil Challa Date: Thu, 4 Jan 2018 18:46:54 -0500 Subject: [PATCH 49/56] bnxt_en: Fix population of flow_type in bnxt_hwrm_cfa_flow_alloc() flow_type in HWRM_FLOW_ALLOC is not being populated correctly due to incorrect passing of pointer and size of l3_mask argument of is_wildcard(). Fixed this. Fixes: db1d36a27324 ("bnxt_en: add TC flower offload flow_alloc/free FW cmds") Signed-off-by: Sunil Challa Reviewed-by: Sathya Perla Reviewed-by: Venkat Duvvuru Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 3d201d7324bd..d8fee26cd45e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -421,7 +421,7 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow, } /* If all IP and L4 fields are wildcarded then this is an L2 flow */ - if (is_wildcard(&l3_mask, sizeof(l3_mask)) && + if (is_wildcard(l3_mask, sizeof(*l3_mask)) && is_wildcard(&flow->l4_mask, sizeof(flow->l4_mask))) { flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_L2; } else { From 78f300049335ae81a5cc6b4b232481dc5e1f9d41 Mon Sep 17 00:00:00 2001 From: Venkat Duvvuru Date: Thu, 4 Jan 2018 18:46:55 -0500 Subject: [PATCH 50/56] bnxt_en: Fix the 'Invalid VF' id check in bnxt_vf_ndo_prep routine. In bnxt_vf_ndo_prep (which is called by bnxt_get_vf_config ndo), there is a check for "Invalid VF id". Currently, the check is done against max_vfs. However, the user doesn't always create max_vfs. So, the check should be against the created number of VFs. The number of bnxt_vf_info structures that are allocated in bnxt_alloc_vf_resources routine is the "number of requested VFs". So, if an "invalid VF id" falls between the requested number of VFs and the max_vfs, the driver will be dereferencing an invalid pointer. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Signed-off-by: Venkat Devvuru Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index 5ee18660bc33..c9617675f934 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -70,7 +70,7 @@ static int bnxt_vf_ndo_prep(struct bnxt *bp, int vf_id) netdev_err(bp->dev, "vf ndo called though sriov is disabled\n"); return -EINVAL; } - if (vf_id >= bp->pf.max_vfs) { + if (vf_id >= bp->pf.active_vfs) { netdev_err(bp->dev, "Invalid VF id %d\n", vf_id); return -EINVAL; } From b707fda2df4070785d0fa8a278aa13944c5f51f8 Mon Sep 17 00:00:00 2001 From: Eduardo Otubo Date: Fri, 5 Jan 2018 09:42:16 +0100 Subject: [PATCH 51/56] xen-netfront: enable device after manual module load When loading the module after unloading it, the network interface would not be enabled and thus wouldn't have a backend counterpart and unable to be used by the guest. The guest would face errors like: [root@guest ~]# ethtool -i eth0 Cannot get driver information: No such device [root@guest ~]# ifconfig eth0 eth0: error fetching interface information: Device not found This patch initializes the state of the netfront device whenever it is loaded manually, this state would communicate the netback to create its device and establish the connection between them. Signed-off-by: Eduardo Otubo Reviewed-by: Boris Ostrovsky Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index c5a34671abda..9bd7ddeeb6a5 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1326,6 +1326,7 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) netif_carrier_off(netdev); + xenbus_switch_state(dev, XenbusStateInitialising); return netdev; exit: From cc35c3d1edf7a8373a1a5daa80a912dec96a9cd5 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 5 Jan 2018 11:17:17 -0200 Subject: [PATCH 52/56] sctp: do not retransmit upon FragNeeded if PMTU discovery is disabled Currently, if PMTU discovery is disabled on a given transport, but the configured value is higher than the actual PMTU, it is likely that we will get some icmp Frag Needed. The issue is, if PMTU discovery is disabled, we won't update the information and will issue a retransmission immediately, which may very well trigger another ICMP, and another retransmission, leading to a loop. The fix is to simply not trigger immediate retransmissions if PMTU discovery is disabled on the given transport. Changes from v2: - updated stale comment, noticed by Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/input.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index 621b5ca3fd1c..9320661cc41d 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -399,20 +399,20 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, return; } - if (t->param_flags & SPP_PMTUD_ENABLE) { - /* Update transports view of the MTU */ - sctp_transport_update_pmtu(t, pmtu); + if (!(t->param_flags & SPP_PMTUD_ENABLE)) + /* We can't allow retransmitting in such case, as the + * retransmission would be sized just as before, and thus we + * would get another icmp, and retransmit again. + */ + return; - /* Update association pmtu. */ - sctp_assoc_sync_pmtu(asoc); - } + /* Update transports view of the MTU */ + sctp_transport_update_pmtu(t, pmtu); - /* Retransmit with the new pmtu setting. - * Normally, if PMTU discovery is disabled, an ICMP Fragmentation - * Needed will never be sent, but if a message was sent before - * PMTU discovery was disabled that was larger than the PMTU, it - * would not be fragmented, so it must be re-transmitted fragmented. - */ + /* Update association pmtu. */ + sctp_assoc_sync_pmtu(asoc); + + /* Retransmit with the new pmtu setting. */ sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD); } From b6c5734db07079c9410147b32407f2366d584e6c Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 5 Jan 2018 11:17:18 -0200 Subject: [PATCH 53/56] sctp: fix the handling of ICMP Frag Needed for too small MTUs syzbot reported a hang involving SCTP, on which it kept flooding dmesg with the message: [ 246.742374] sctp: sctp_transport_update_pmtu: Reported pmtu 508 too low, using default minimum of 512 That happened because whenever SCTP hits an ICMP Frag Needed, it tries to adjust to the new MTU and triggers an immediate retransmission. But it didn't consider the fact that MTUs smaller than the SCTP minimum MTU allowed (512) would not cause the PMTU to change, and issued the retransmission anyway (thus leading to another ICMP Frag Needed, and so on). As IPv4 (ip_rt_min_pmtu=556) and IPv6 (IPV6_MIN_MTU=1280) minimum MTU are higher than that, sctp_transport_update_pmtu() is changed to re-fetch the PMTU that got set after our request, and with that, detect if there was an actual change or not. The fix, thus, skips the immediate retransmission if the received ICMP resulted in no change, in the hope that SCTP will select another path. Note: The value being used for the minimum MTU (512, SCTP_DEFAULT_MINSEGMENT) is not right and instead it should be (576, SCTP_MIN_PMTU), but such change belongs to another patch. Changes from v1: - do not disable PMTU discovery, in the light of commit 06ad391919b2 ("[SCTP] Don't disable PMTU discovery when mtu is small") and as suggested by Xin Long. - changed the way to break the rtx loop by detecting if the icmp resulted in a change or not Changes from v2: none See-also: https://lkml.org/lkml/2017/12/22/811 Reported-by: syzbot Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 2 +- net/sctp/input.c | 8 ++++++-- net/sctp/transport.c | 29 +++++++++++++++++++---------- 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 2f8f93da5dc2..9a5ccf03a59b 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -966,7 +966,7 @@ void sctp_transport_burst_limited(struct sctp_transport *); void sctp_transport_burst_reset(struct sctp_transport *); unsigned long sctp_transport_timeout(struct sctp_transport *); void sctp_transport_reset(struct sctp_transport *t); -void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu); +bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu); void sctp_transport_immediate_rtx(struct sctp_transport *); void sctp_transport_dst_release(struct sctp_transport *t); void sctp_transport_dst_confirm(struct sctp_transport *t); diff --git a/net/sctp/input.c b/net/sctp/input.c index 9320661cc41d..141c9c466ec1 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -406,8 +406,12 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, */ return; - /* Update transports view of the MTU */ - sctp_transport_update_pmtu(t, pmtu); + /* Update transports view of the MTU. Return if no update was needed. + * If an update wasn't needed/possible, it also doesn't make sense to + * try to retransmit now. + */ + if (!sctp_transport_update_pmtu(t, pmtu)) + return; /* Update association pmtu. */ sctp_assoc_sync_pmtu(asoc); diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 1e5a22430cf5..47f82bd794d9 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -248,28 +248,37 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } -void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) +bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) { struct dst_entry *dst = sctp_transport_dst_check(t); + bool change = true; if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { - pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n", - __func__, pmtu, SCTP_DEFAULT_MINSEGMENT); - /* Use default minimum segment size and disable - * pmtu discovery on this transport. - */ - t->pathmtu = SCTP_DEFAULT_MINSEGMENT; - } else { - t->pathmtu = pmtu; + pr_warn_ratelimited("%s: Reported pmtu %d too low, using default minimum of %d\n", + __func__, pmtu, SCTP_DEFAULT_MINSEGMENT); + /* Use default minimum segment instead */ + pmtu = SCTP_DEFAULT_MINSEGMENT; } + pmtu = SCTP_TRUNC4(pmtu); if (dst) { dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu); dst = sctp_transport_dst_check(t); } - if (!dst) + if (!dst) { t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk); + dst = t->dst; + } + + if (dst) { + /* Re-fetch, as under layers may have a higher minimum size */ + pmtu = SCTP_TRUNC4(dst_mtu(dst)); + change = t->pathmtu != pmtu; + } + t->pathmtu = pmtu; + + return change; } /* Caches the dst entry and source address for a transport's destination From 46cd75036415d94e9cf451e6606a099945d54cc6 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 5 Jan 2018 11:23:45 -0600 Subject: [PATCH 54/56] phylink: mark expected switch fall-throughs in phylink_mii_ioctl In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Addresses-Coverity-ID: 1463447 ("Missing break in switch") Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 150cd95a6e1e..249ce5cbea22 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1296,6 +1296,7 @@ int phylink_mii_ioctl(struct phylink *pl, struct ifreq *ifr, int cmd) switch (cmd) { case SIOCGMIIPHY: mii->phy_id = pl->phydev->mdio.addr; + /* fall through */ case SIOCGMIIREG: ret = phylink_phy_read(pl, mii->phy_id, mii->reg_num); @@ -1318,6 +1319,7 @@ int phylink_mii_ioctl(struct phylink *pl, struct ifreq *ifr, int cmd) switch (cmd) { case SIOCGMIIPHY: mii->phy_id = 0; + /* fall through */ case SIOCGMIIREG: ret = phylink_mii_read(pl, mii->phy_id, mii->reg_num); From 56c0290202ab94a2f2780c449395d4ae8495fab4 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 6 Jan 2018 09:00:09 +0100 Subject: [PATCH 55/56] mdio-sun4i: Fix a memory leak If the probing of the regulator is deferred, the memory allocated by 'mdiobus_alloc_size()' will be leaking. It should be freed before the next call to 'sun4i_mdio_probe()' which will reallocate it. Fixes: 4bdcb1dd9feb ("net: Add MDIO bus driver for the Allwinner EMAC") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/phy/mdio-sun4i.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/mdio-sun4i.c b/drivers/net/phy/mdio-sun4i.c index 135296508a7e..6425ce04d3f9 100644 --- a/drivers/net/phy/mdio-sun4i.c +++ b/drivers/net/phy/mdio-sun4i.c @@ -118,8 +118,10 @@ static int sun4i_mdio_probe(struct platform_device *pdev) data->regulator = devm_regulator_get(&pdev->dev, "phy"); if (IS_ERR(data->regulator)) { - if (PTR_ERR(data->regulator) == -EPROBE_DEFER) - return -EPROBE_DEFER; + if (PTR_ERR(data->regulator) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto err_out_free_mdiobus; + } dev_info(&pdev->dev, "no regulator found\n"); data->regulator = NULL; From 50f3d740d376f664f6accc7e86c9afd8f1c7e1e4 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sun, 7 Jan 2018 00:26:47 +0300 Subject: [PATCH 56/56] sh_eth: fix TXALCR1 offsets The TXALCR1 offsets are incorrect in the register offset tables, most probably due to copy&paste error. Luckily, the driver never uses this register. :-) Fixes: 4a55530f38e4 ("net: sh_eth: modify the definitions of register") Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index f21c1db91c3f..b9e2846589f8 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -147,7 +147,7 @@ static const u16 sh_eth_offset_gigabit[SH_ETH_MAX_REGISTER_OFFSET] = { [FWNLCR0] = 0x0090, [FWALCR0] = 0x0094, [TXNLCR1] = 0x00a0, - [TXALCR1] = 0x00a0, + [TXALCR1] = 0x00a4, [RXNLCR1] = 0x00a8, [RXALCR1] = 0x00ac, [FWNLCR1] = 0x00b0, @@ -399,7 +399,7 @@ static const u16 sh_eth_offset_fast_sh3_sh2[SH_ETH_MAX_REGISTER_OFFSET] = { [FWNLCR0] = 0x0090, [FWALCR0] = 0x0094, [TXNLCR1] = 0x00a0, - [TXALCR1] = 0x00a0, + [TXALCR1] = 0x00a4, [RXNLCR1] = 0x00a8, [RXALCR1] = 0x00ac, [FWNLCR1] = 0x00b0,