From 04b1d4e50e82536c12da00ee04a77510c459c844 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:11 +0200 Subject: [PATCH 01/21] net: core: Make the FIB notification chain generic The FIB notification chain is currently soley used by IPv4 code. However, we're going to introduce IPv6 FIB offload support, which requires these notification as well. As explained in commit c3852ef7f2f8 ("ipv4: fib: Replay events when registering FIB notifier"), upon registration to the chain, the callee receives a full dump of the FIB tables and rules by traversing all the net namespaces. The integrity of the dump is ensured by a per-namespace sequence counter that is incremented whenever a change to the tables or rules occurs. In order to allow more address families to use the chain, each family is expected to register its fib_notifier_ops in its pernet init. These operations allow the common code to read the family's sequence counter as well as dump its tables and rules in the given net namespace. Additionally, a 'family' parameter is added to sent notifications, so that listeners could distinguish between the different families. Implement the common code that allows listeners to register to the chain and for address families to register their fib_notifier_ops. Subsequent patches will implement these operations in IPv6. In the future, ipmr and ip6mr will be extended to provide these notifications as well. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 1 + drivers/net/ethernet/rocker/rocker_main.c | 1 + include/net/fib_notifier.h | 44 +++++ include/net/ip_fib.h | 30 +--- include/net/net_namespace.h | 1 + include/net/netns/ipv4.h | 1 + net/core/Makefile | 3 +- net/core/fib_notifier.c | 164 ++++++++++++++++++ net/ipv4/fib_frontend.c | 17 +- net/ipv4/fib_notifier.c | 98 +++++------ net/ipv4/fib_rules.c | 5 +- net/ipv4/fib_semantics.c | 9 +- net/ipv4/fib_trie.c | 5 +- 13 files changed, 284 insertions(+), 95 deletions(-) create mode 100644 include/net/fib_notifier.h create mode 100644 net/core/fib_notifier.c diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 2f03c7e71584..b79f9b67f285 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -53,6 +53,7 @@ #include #include #include +#include #include "spectrum.h" #include "core.h" diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index b1e5c07099fa..ef38c1a41bdd 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h new file mode 100644 index 000000000000..241475224f74 --- /dev/null +++ b/include/net/fib_notifier.h @@ -0,0 +1,44 @@ +#ifndef __NET_FIB_NOTIFIER_H +#define __NET_FIB_NOTIFIER_H + +#include +#include +#include + +struct fib_notifier_info { + struct net *net; + int family; +}; + +enum fib_event_type { + FIB_EVENT_ENTRY_REPLACE, + FIB_EVENT_ENTRY_APPEND, + FIB_EVENT_ENTRY_ADD, + FIB_EVENT_ENTRY_DEL, + FIB_EVENT_RULE_ADD, + FIB_EVENT_RULE_DEL, + FIB_EVENT_NH_ADD, + FIB_EVENT_NH_DEL, +}; + +struct fib_notifier_ops { + int family; + struct list_head list; + unsigned int (*fib_seq_read)(struct net *net); + int (*fib_dump)(struct net *net, struct notifier_block *nb); + struct rcu_head rcu; +}; + +int call_fib_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_notifier_info *info); +int call_fib_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info); +int register_fib_notifier(struct notifier_block *nb, + void (*cb)(struct notifier_block *nb)); +int unregister_fib_notifier(struct notifier_block *nb); +struct fib_notifier_ops * +fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net *net); +void fib_notifier_ops_unregister(struct fib_notifier_ops *ops); + +#endif diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index ef8992d49bc3..c0295c3ec5f3 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -188,10 +189,6 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); #define FIB_RES_PREFSRC(net, res) ((res).fi->fib_prefsrc ? : \ FIB_RES_SADDR(net, res)) -struct fib_notifier_info { - struct net *net; -}; - struct fib_entry_notifier_info { struct fib_notifier_info info; /* must be first */ u32 dst; @@ -212,25 +209,14 @@ struct fib_nh_notifier_info { struct fib_nh *fib_nh; }; -enum fib_event_type { - FIB_EVENT_ENTRY_REPLACE, - FIB_EVENT_ENTRY_APPEND, - FIB_EVENT_ENTRY_ADD, - FIB_EVENT_ENTRY_DEL, - FIB_EVENT_RULE_ADD, - FIB_EVENT_RULE_DEL, - FIB_EVENT_NH_ADD, - FIB_EVENT_NH_DEL, -}; - -int register_fib_notifier(struct notifier_block *nb, - void (*cb)(struct notifier_block *nb)); -int unregister_fib_notifier(struct notifier_block *nb); -int call_fib_notifier(struct notifier_block *nb, struct net *net, - enum fib_event_type event_type, - struct fib_notifier_info *info); -int call_fib_notifiers(struct net *net, enum fib_event_type event_type, +int call_fib4_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, struct fib_notifier_info *info); +int call_fib4_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info); + +int __net_init fib4_notifier_init(struct net *net); +void __net_exit fib4_notifier_exit(struct net *net); void fib_notify(struct net *net, struct notifier_block *nb); #ifdef CONFIG_IP_MULTIPLE_TABLES diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 1c401bd4c2e0..57faa375eab9 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -88,6 +88,7 @@ struct net { /* core fib_rules */ struct list_head rules_ops; + struct list_head fib_notifier_ops; /* protected by net_mutex */ struct net_device *loopback_dev; /* The loopback */ struct netns_core core; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 9a14a0850b0e..20d061c805e3 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -159,6 +159,7 @@ struct netns_ipv4 { int sysctl_fib_multipath_hash_policy; #endif + struct fib_notifier_ops *notifier_ops; unsigned int fib_seq; /* protected by rtnl_mutex */ atomic_t rt_genid; diff --git a/net/core/Makefile b/net/core/Makefile index d501c4278015..56d771a887b6 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -9,7 +9,8 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ - sock_diag.o dev_ioctl.o tso.o sock_reuseport.o + sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \ + fib_notifier.o obj-y += net-sysfs.o obj-$(CONFIG_PROC_FS) += net-procfs.o diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c new file mode 100644 index 000000000000..292aab83702f --- /dev/null +++ b/net/core/fib_notifier.c @@ -0,0 +1,164 @@ +#include +#include +#include +#include +#include +#include +#include + +static ATOMIC_NOTIFIER_HEAD(fib_chain); + +int call_fib_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_notifier_info *info) +{ + info->net = net; + return nb->notifier_call(nb, event_type, info); +} +EXPORT_SYMBOL(call_fib_notifier); + +int call_fib_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info) +{ + info->net = net; + return atomic_notifier_call_chain(&fib_chain, event_type, info); +} +EXPORT_SYMBOL(call_fib_notifiers); + +static unsigned int fib_seq_sum(void) +{ + struct fib_notifier_ops *ops; + unsigned int fib_seq = 0; + struct net *net; + + rtnl_lock(); + for_each_net(net) { + list_for_each_entry(ops, &net->fib_notifier_ops, list) + fib_seq += ops->fib_seq_read(net); + } + rtnl_unlock(); + + return fib_seq; +} + +static int fib_net_dump(struct net *net, struct notifier_block *nb) +{ + struct fib_notifier_ops *ops; + + list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) { + int err = ops->fib_dump(net, nb); + + if (err) + return err; + } + + return 0; +} + +static bool fib_dump_is_consistent(struct notifier_block *nb, + void (*cb)(struct notifier_block *nb), + unsigned int fib_seq) +{ + atomic_notifier_chain_register(&fib_chain, nb); + if (fib_seq == fib_seq_sum()) + return true; + atomic_notifier_chain_unregister(&fib_chain, nb); + if (cb) + cb(nb); + return false; +} + +#define FIB_DUMP_MAX_RETRIES 5 +int register_fib_notifier(struct notifier_block *nb, + void (*cb)(struct notifier_block *nb)) +{ + int retries = 0; + int err; + + do { + unsigned int fib_seq = fib_seq_sum(); + struct net *net; + + rcu_read_lock(); + for_each_net_rcu(net) { + err = fib_net_dump(net, nb); + if (err) + goto err_fib_net_dump; + } + rcu_read_unlock(); + + if (fib_dump_is_consistent(nb, cb, fib_seq)) + return 0; + } while (++retries < FIB_DUMP_MAX_RETRIES); + + return -EBUSY; + +err_fib_net_dump: + rcu_read_unlock(); + return err; +} +EXPORT_SYMBOL(register_fib_notifier); + +int unregister_fib_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&fib_chain, nb); +} +EXPORT_SYMBOL(unregister_fib_notifier); + +static int __fib_notifier_ops_register(struct fib_notifier_ops *ops, + struct net *net) +{ + struct fib_notifier_ops *o; + + list_for_each_entry(o, &net->fib_notifier_ops, list) + if (ops->family == o->family) + return -EEXIST; + list_add_tail_rcu(&ops->list, &net->fib_notifier_ops); + return 0; +} + +struct fib_notifier_ops * +fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net *net) +{ + struct fib_notifier_ops *ops; + int err; + + ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL); + if (!ops) + return ERR_PTR(-ENOMEM); + + err = __fib_notifier_ops_register(ops, net); + if (err) + goto err_register; + + return ops; + +err_register: + kfree(ops); + return ERR_PTR(err); +} +EXPORT_SYMBOL(fib_notifier_ops_register); + +void fib_notifier_ops_unregister(struct fib_notifier_ops *ops) +{ + list_del_rcu(&ops->list); + kfree_rcu(ops, rcu); +} +EXPORT_SYMBOL(fib_notifier_ops_unregister); + +static int __net_init fib_notifier_net_init(struct net *net) +{ + INIT_LIST_HEAD(&net->fib_notifier_ops); + return 0; +} + +static struct pernet_operations fib_notifier_net_ops = { + .init = fib_notifier_net_init, +}; + +static int __init fib_notifier_init(void) +{ + return register_pernet_subsys(&fib_notifier_net_ops); +} + +subsys_initcall(fib_notifier_init); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 044d2a159a3c..2cba559f14df 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1247,22 +1247,28 @@ static int __net_init ip_fib_net_init(struct net *net) int err; size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ; - net->ipv4.fib_seq = 0; + err = fib4_notifier_init(net); + if (err) + return err; /* Avoid false sharing : Use at least a full cache line */ size = max_t(size_t, size, L1_CACHE_BYTES); net->ipv4.fib_table_hash = kzalloc(size, GFP_KERNEL); - if (!net->ipv4.fib_table_hash) - return -ENOMEM; + if (!net->ipv4.fib_table_hash) { + err = -ENOMEM; + goto err_table_hash_alloc; + } err = fib4_rules_init(net); if (err < 0) - goto fail; + goto err_rules_init; return 0; -fail: +err_rules_init: kfree(net->ipv4.fib_table_hash); +err_table_hash_alloc: + fib4_notifier_exit(net); return err; } @@ -1292,6 +1298,7 @@ static void ip_fib_net_exit(struct net *net) #endif rtnl_unlock(); kfree(net->ipv4.fib_table_hash); + fib4_notifier_exit(net); } static int __net_init fib_net_init(struct net *net) diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c index e0714d975947..7cf1954bbadc 100644 --- a/net/ipv4/fib_notifier.c +++ b/net/ipv4/fib_notifier.c @@ -1,86 +1,66 @@ #include #include -#include +#include #include #include +#include #include #include -static ATOMIC_NOTIFIER_HEAD(fib_chain); - -int call_fib_notifier(struct notifier_block *nb, struct net *net, - enum fib_event_type event_type, - struct fib_notifier_info *info) -{ - info->net = net; - return nb->notifier_call(nb, event_type, info); -} - -int call_fib_notifiers(struct net *net, enum fib_event_type event_type, +int call_fib4_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, struct fib_notifier_info *info) { + info->family = AF_INET; + return call_fib_notifier(nb, net, event_type, info); +} + +int call_fib4_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info) +{ + ASSERT_RTNL(); + + info->family = AF_INET; net->ipv4.fib_seq++; - info->net = net; - return atomic_notifier_call_chain(&fib_chain, event_type, info); + return call_fib_notifiers(net, event_type, info); } -static unsigned int fib_seq_sum(void) +static unsigned int fib4_seq_read(struct net *net) { - unsigned int fib_seq = 0; - struct net *net; + ASSERT_RTNL(); - rtnl_lock(); - for_each_net(net) - fib_seq += net->ipv4.fib_seq; - rtnl_unlock(); - - return fib_seq; + return net->ipv4.fib_seq; } -static bool fib_dump_is_consistent(struct notifier_block *nb, - void (*cb)(struct notifier_block *nb), - unsigned int fib_seq) +static int fib4_dump(struct net *net, struct notifier_block *nb) { - atomic_notifier_chain_register(&fib_chain, nb); - if (fib_seq == fib_seq_sum()) - return true; - atomic_notifier_chain_unregister(&fib_chain, nb); - if (cb) - cb(nb); - return false; + fib_rules_notify(net, nb); + fib_notify(net, nb); + + return 0; } -#define FIB_DUMP_MAX_RETRIES 5 -int register_fib_notifier(struct notifier_block *nb, - void (*cb)(struct notifier_block *nb)) +static const struct fib_notifier_ops fib4_notifier_ops_template = { + .family = AF_INET, + .fib_seq_read = fib4_seq_read, + .fib_dump = fib4_dump, +}; + +int __net_init fib4_notifier_init(struct net *net) { - int retries = 0; + struct fib_notifier_ops *ops; - do { - unsigned int fib_seq = fib_seq_sum(); - struct net *net; + net->ipv4.fib_seq = 0; - /* Mutex semantics guarantee that every change done to - * FIB tries before we read the change sequence counter - * is now visible to us. - */ - rcu_read_lock(); - for_each_net_rcu(net) { - fib_rules_notify(net, nb); - fib_notify(net, nb); - } - rcu_read_unlock(); + ops = fib_notifier_ops_register(&fib4_notifier_ops_template, net); + if (IS_ERR(ops)) + return PTR_ERR(ops); + net->ipv4.notifier_ops = ops; - if (fib_dump_is_consistent(nb, cb, fib_seq)) - return 0; - } while (++retries < FIB_DUMP_MAX_RETRIES); - - return -EBUSY; + return 0; } -EXPORT_SYMBOL(register_fib_notifier); -int unregister_fib_notifier(struct notifier_block *nb) +void __net_exit fib4_notifier_exit(struct net *net) { - return atomic_notifier_chain_unregister(&fib_chain, nb); + fib_notifier_ops_unregister(net->ipv4.notifier_ops); } -EXPORT_SYMBOL(unregister_fib_notifier); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 778ecf977eb2..acdbf5a24ac9 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -32,6 +32,7 @@ #include #include #include +#include struct fib4_rule { struct fib_rule common; @@ -193,7 +194,7 @@ static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net, .rule = rule, }; - return call_fib_notifier(nb, net, event_type, &info.info); + return call_fib4_notifier(nb, net, event_type, &info.info); } static int call_fib_rule_notifiers(struct net *net, @@ -204,7 +205,7 @@ static int call_fib_rule_notifiers(struct net *net, .rule = rule, }; - return call_fib_notifiers(net, event_type, &info.info); + return call_fib4_notifiers(net, event_type, &info.info); } /* Called with rcu_read_lock() */ diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f62dc2463280..632b454ce77c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -44,6 +44,7 @@ #include #include #include +#include #include "fib_lookup.h" @@ -1451,14 +1452,14 @@ static int call_fib_nh_notifiers(struct fib_nh *fib_nh, if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && fib_nh->nh_flags & RTNH_F_LINKDOWN) break; - return call_fib_notifiers(dev_net(fib_nh->nh_dev), event_type, - &info.info); + return call_fib4_notifiers(dev_net(fib_nh->nh_dev), event_type, + &info.info); case FIB_EVENT_NH_DEL: if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && fib_nh->nh_flags & RTNH_F_LINKDOWN) || (fib_nh->nh_flags & RTNH_F_DEAD)) - return call_fib_notifiers(dev_net(fib_nh->nh_dev), - event_type, &info.info); + return call_fib4_notifiers(dev_net(fib_nh->nh_dev), + event_type, &info.info); default: break; } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 64668c69dda6..1a6ffb0dab9c 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -81,6 +81,7 @@ #include #include #include +#include #include #include "fib_lookup.h" @@ -97,7 +98,7 @@ static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net, .type = type, .tb_id = tb_id, }; - return call_fib_notifier(nb, net, event_type, &info.info); + return call_fib4_notifier(nb, net, event_type, &info.info); } static int call_fib_entry_notifiers(struct net *net, @@ -113,7 +114,7 @@ static int call_fib_entry_notifiers(struct net *net, .type = type, .tb_id = tb_id, }; - return call_fib_notifiers(net, event_type, &info.info); + return call_fib4_notifiers(net, event_type, &info.info); } #define MAX_STAT_DEPTH 32 From 64e5e8252d69c68ae76258328ac7e5d2e5e923b0 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:12 +0200 Subject: [PATCH 02/21] mlxsw: spectrum_router: Ignore address families other than IPv4 We're about to add IPv6 notifications in the FIB notification chain, but the driver currently doesn't support these, so ignore them. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index b79f9b67f285..78c19512250d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3093,7 +3093,7 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, struct fib_notifier_info *info = ptr; struct mlxsw_sp_router *router; - if (!net_eq(info->net, &init_net)) + if (!net_eq(info->net, &init_net) || info->family != AF_INET) return NOTIFY_DONE; fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); From d371ac1e1b1124e22bc9a82a5d170ea721a73bef Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:13 +0200 Subject: [PATCH 03/21] rocker: Ignore address families other than IPv4 As in previous patch, ignore IPv6 notifications since the driver doesn't support these. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index ef38c1a41bdd..fc8f8bdf6579 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2192,6 +2192,10 @@ static int rocker_router_fib_event(struct notifier_block *nb, { struct rocker *rocker = container_of(nb, struct rocker, fib_nb); struct rocker_fib_event_work *fib_work; + struct fib_notifier_info *info = ptr; + + if (info->family != AF_INET) + return NOTIFY_DONE; fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); if (WARN_ON(!fib_work)) From 1b2a4440858857f2f93bb2ec5bb3a60f4fcc25be Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:14 +0200 Subject: [PATCH 04/21] net: fib_rules: Implement notification logic in core Unlike the routing tables, the FIB rules share a common core, so instead of replicating the same logic for each address family we can simply dump the rules and send notifications from the core itself. To protect the integrity of the dump, a rules-specific sequence counter is added for each address family and incremented whenever a rule is added or deleted (under RTNL). Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/fib_rules.h | 9 ++++++ include/net/ip_fib.h | 24 ++++++++-------- net/core/fib_rules.c | 63 +++++++++++++++++++++++++++++++++++++++++ net/ipv4/fib_notifier.c | 9 ++++-- net/ipv4/fib_rules.c | 45 +++++++---------------------- 5 files changed, 101 insertions(+), 49 deletions(-) diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index c487bfa2f479..3d7f1cefc6f5 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -8,6 +8,7 @@ #include #include #include +#include struct fib_kuid_range { kuid_t start; @@ -57,6 +58,7 @@ struct fib_rules_ops { int addr_size; int unresolved_rules; int nr_goto_rules; + unsigned int fib_rules_seq; int (*action)(struct fib_rule *, struct flowi *, int, @@ -89,6 +91,11 @@ struct fib_rules_ops { struct rcu_head rcu; }; +struct fib_rule_notifier_info { + struct fib_notifier_info info; /* must be first */ + struct fib_rule *rule; +}; + #define FRA_GENERIC_POLICY \ [FRA_IIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ [FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ @@ -143,6 +150,8 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table, u32 flags); bool fib_rule_matchall(const struct fib_rule *rule); +int fib_rules_dump(struct net *net, struct notifier_block *nb, int family); +unsigned int fib_rules_seq_read(struct net *net, int family); int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index c0295c3ec5f3..1a7f7e424320 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -199,11 +199,6 @@ struct fib_entry_notifier_info { u32 tb_id; }; -struct fib_rule_notifier_info { - struct fib_notifier_info info; /* must be first */ - struct fib_rule *rule; -}; - struct fib_nh_notifier_info { struct fib_notifier_info info; /* must be first */ struct fib_nh *fib_nh; @@ -219,13 +214,6 @@ int __net_init fib4_notifier_init(struct net *net); void __net_exit fib4_notifier_exit(struct net *net); void fib_notify(struct net *net, struct notifier_block *nb); -#ifdef CONFIG_IP_MULTIPLE_TABLES -void fib_rules_notify(struct net *net, struct notifier_block *nb); -#else -static inline void fib_rules_notify(struct net *net, struct notifier_block *nb) -{ -} -#endif struct fib_table { struct hlist_node tb_hlist; @@ -298,6 +286,16 @@ static inline bool fib4_rule_default(const struct fib_rule *rule) return true; } +static inline int fib4_rules_dump(struct net *net, struct notifier_block *nb) +{ + return 0; +} + +static inline unsigned int fib4_rules_seq_read(struct net *net) +{ + return 0; +} + #else /* CONFIG_IP_MULTIPLE_TABLES */ int __net_init fib4_rules_init(struct net *net); void __net_exit fib4_rules_exit(struct net *net); @@ -343,6 +341,8 @@ out: } bool fib4_rule_default(const struct fib_rule *rule); +int fib4_rules_dump(struct net *net, struct notifier_block *nb); +unsigned int fib4_rules_seq_read(struct net *net); #endif /* CONFIG_IP_MULTIPLE_TABLES */ diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index fdcb1bcd2afa..fc0b65093417 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -299,6 +299,67 @@ out: } EXPORT_SYMBOL_GPL(fib_rules_lookup); +static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_rule *rule, int family) +{ + struct fib_rule_notifier_info info = { + .info.family = family, + .rule = rule, + }; + + return call_fib_notifier(nb, net, event_type, &info.info); +} + +static int call_fib_rule_notifiers(struct net *net, + enum fib_event_type event_type, + struct fib_rule *rule, + struct fib_rules_ops *ops) +{ + struct fib_rule_notifier_info info = { + .info.family = ops->family, + .rule = rule, + }; + + ops->fib_rules_seq++; + return call_fib_notifiers(net, event_type, &info.info); +} + +/* Called with rcu_read_lock() */ +int fib_rules_dump(struct net *net, struct notifier_block *nb, int family) +{ + struct fib_rules_ops *ops; + struct fib_rule *rule; + + ops = lookup_rules_ops(net, family); + if (!ops) + return -EAFNOSUPPORT; + list_for_each_entry_rcu(rule, &ops->rules_list, list) + call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule, + family); + rules_ops_put(ops); + + return 0; +} +EXPORT_SYMBOL_GPL(fib_rules_dump); + +unsigned int fib_rules_seq_read(struct net *net, int family) +{ + unsigned int fib_rules_seq; + struct fib_rules_ops *ops; + + ASSERT_RTNL(); + + ops = lookup_rules_ops(net, family); + if (!ops) + return 0; + fib_rules_seq = ops->fib_rules_seq; + rules_ops_put(ops); + + return fib_rules_seq; +} +EXPORT_SYMBOL_GPL(fib_rules_seq_read); + static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb, struct fib_rules_ops *ops) { @@ -548,6 +609,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, if (rule->tun_id) ip_tunnel_need_metadata(); + call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops); notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid); flush_route_cache(ops); rules_ops_put(ops); @@ -687,6 +749,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, } } + call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops); notify_rule_change(RTM_DELRULE, rule, ops, nlh, NETLINK_CB(skb).portid); fib_rule_put(rule); diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c index 7cf1954bbadc..5d7afb145562 100644 --- a/net/ipv4/fib_notifier.c +++ b/net/ipv4/fib_notifier.c @@ -29,12 +29,17 @@ static unsigned int fib4_seq_read(struct net *net) { ASSERT_RTNL(); - return net->ipv4.fib_seq; + return net->ipv4.fib_seq + fib4_rules_seq_read(net); } static int fib4_dump(struct net *net, struct notifier_block *nb) { - fib_rules_notify(net, nb); + int err; + + err = fib4_rules_dump(net, nb); + if (err) + return err; + fib_notify(net, nb); return 0; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index acdbf5a24ac9..35d646a62ad4 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -32,7 +32,6 @@ #include #include #include -#include struct fib4_rule { struct fib_rule common; @@ -69,6 +68,16 @@ bool fib4_rule_default(const struct fib_rule *rule) } EXPORT_SYMBOL_GPL(fib4_rule_default); +int fib4_rules_dump(struct net *net, struct notifier_block *nb) +{ + return fib_rules_dump(net, nb, AF_INET); +} + +unsigned int fib4_rules_seq_read(struct net *net) +{ + return fib_rules_seq_read(net, AF_INET); +} + int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res, unsigned int flags) { @@ -186,38 +195,6 @@ static struct fib_table *fib_empty_table(struct net *net) return NULL; } -static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net, - enum fib_event_type event_type, - struct fib_rule *rule) -{ - struct fib_rule_notifier_info info = { - .rule = rule, - }; - - return call_fib4_notifier(nb, net, event_type, &info.info); -} - -static int call_fib_rule_notifiers(struct net *net, - enum fib_event_type event_type, - struct fib_rule *rule) -{ - struct fib_rule_notifier_info info = { - .rule = rule, - }; - - return call_fib4_notifiers(net, event_type, &info.info); -} - -/* Called with rcu_read_lock() */ -void fib_rules_notify(struct net *net, struct notifier_block *nb) -{ - struct fib_rules_ops *ops = net->ipv4.rules_ops; - struct fib_rule *rule; - - list_for_each_entry_rcu(rule, &ops->rules_list, list) - call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule); -} - static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = { FRA_GENERIC_POLICY, [FRA_FLOW] = { .type = NLA_U32 }, @@ -274,7 +251,6 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule4->tos = frh->tos; net->ipv4.fib_has_custom_rules = true; - call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule); err = 0; errout: @@ -296,7 +272,6 @@ static int fib4_rule_delete(struct fib_rule *rule) net->ipv4.fib_num_tclassid_users--; #endif net->ipv4.fib_has_custom_rules = true; - call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule); errout: return err; } From e3ea973159d53559c5ae9a9dbc824da9aba6cac0 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:15 +0200 Subject: [PATCH 05/21] ipv6: fib_rules: Check if rule is a default rule As explained in commit 3c71006d15fd ("ipv4: fib_rules: Check if rule is a default rule"), drivers supporting IPv6 FIB offload need to be able to sanitize the rules they don't support and potentially flush their tables. Add an IPv6 helper to check if a FIB rule is a default rule. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 5 +++++ net/ipv6/fib6_rules.c | 20 ++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 1a88008cc6f5..6000b0dc51ee 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -295,6 +295,7 @@ int ipv6_route_open(struct inode *inode, struct file *file); #ifdef CONFIG_IPV6_MULTIPLE_TABLES int fib6_rules_init(void); void fib6_rules_cleanup(void); +bool fib6_rule_default(const struct fib_rule *rule); #else static inline int fib6_rules_init(void) { @@ -304,5 +305,9 @@ static inline void fib6_rules_cleanup(void) { return ; } +static inline bool fib6_rule_default(const struct fib_rule *rule) +{ + return true; +} #endif #endif diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index ec849d88a662..ef1fcee6bf16 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -29,6 +29,26 @@ struct fib6_rule { u8 tclass; }; +static bool fib6_rule_matchall(const struct fib_rule *rule) +{ + struct fib6_rule *r = container_of(rule, struct fib6_rule, common); + + if (r->dst.plen || r->src.plen || r->tclass) + return false; + return fib_rule_matchall(rule); +} + +bool fib6_rule_default(const struct fib_rule *rule) +{ + if (!fib6_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL || + rule->l3mdev) + return false; + if (rule->table != RT6_TABLE_LOCAL && rule->table != RT6_TABLE_MAIN) + return false; + return true; +} +EXPORT_SYMBOL_GPL(fib6_rule_default); + struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, int flags, pol_lookup_t lookup) { From 16ab6d7d4d8cc037bb4be12c2b849ac92787e1ff Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:16 +0200 Subject: [PATCH 06/21] ipv6: fib: Add FIB notifiers callbacks We're about to add IPv6 FIB offload support, so implement the necessary callbacks in IPv6 code, which will later allow us to add routes and rules notifications. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 11 ++++++++ include/net/netns/ipv6.h | 1 + net/ipv6/Makefile | 2 +- net/ipv6/fib6_notifier.c | 55 ++++++++++++++++++++++++++++++++++++++++ net/ipv6/ip6_fib.c | 7 +++++ 5 files changed, 75 insertions(+), 1 deletion(-) create mode 100644 net/ipv6/fib6_notifier.c diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 6000b0dc51ee..be8ddf3253dc 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -16,10 +16,12 @@ #include #include #include +#include #include #include #include #include +#include #ifdef CONFIG_IPV6_MULTIPLE_TABLES #define FIB6_TABLE_HASHSZ 256 @@ -292,6 +294,15 @@ int fib6_init(void); int ipv6_route_open(struct inode *inode, struct file *file); +int call_fib6_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_notifier_info *info); +int call_fib6_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info); + +int __net_init fib6_notifier_init(struct net *net); +void __net_exit fib6_notifier_exit(struct net *net); + #ifdef CONFIG_IPV6_MULTIPLE_TABLES int fib6_rules_init(void); void fib6_rules_cleanup(void); diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index de7745e2edcc..abdf3b40303b 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -86,6 +86,7 @@ struct netns_ipv6 { atomic_t dev_addr_genid; atomic_t fib6_sernum; struct seg6_pernet_data *seg6_data; + struct fib_notifier_ops *notifier_ops; }; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 217e9ff0e24b..f8b24c2e0d77 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -9,7 +9,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \ - udp_offload.o seg6.o + udp_offload.o seg6.o fib6_notifier.o ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c new file mode 100644 index 000000000000..c2bb1ab5b5eb --- /dev/null +++ b/net/ipv6/fib6_notifier.c @@ -0,0 +1,55 @@ +#include +#include +#include +#include +#include +#include +#include + +int call_fib6_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_notifier_info *info) +{ + info->family = AF_INET6; + return call_fib_notifier(nb, net, event_type, info); +} + +int call_fib6_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info) +{ + info->family = AF_INET6; + return call_fib_notifiers(net, event_type, info); +} + +static unsigned int fib6_seq_read(struct net *net) +{ + return 0; +} + +static int fib6_dump(struct net *net, struct notifier_block *nb) +{ + return 0; +} + +static const struct fib_notifier_ops fib6_notifier_ops_template = { + .family = AF_INET6, + .fib_seq_read = fib6_seq_read, + .fib_dump = fib6_dump, +}; + +int __net_init fib6_notifier_init(struct net *net) +{ + struct fib_notifier_ops *ops; + + ops = fib_notifier_ops_register(&fib6_notifier_ops_template, net); + if (IS_ERR(ops)) + return PTR_ERR(ops); + net->ipv6.notifier_ops = ops; + + return 0; +} + +void __net_exit fib6_notifier_exit(struct net *net) +{ + fib_notifier_ops_unregister(net->ipv6.notifier_ops); +} diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index ebb299cf72b7..f93976e3f65c 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1839,6 +1839,11 @@ static void fib6_gc_timer_cb(unsigned long arg) static int __net_init fib6_net_init(struct net *net) { size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ; + int err; + + err = fib6_notifier_init(net); + if (err) + return err; spin_lock_init(&net->ipv6.fib6_gc_lock); rwlock_init(&net->ipv6.fib6_walker_lock); @@ -1891,6 +1896,7 @@ out_fib_table_hash: out_rt6_stats: kfree(net->ipv6.rt6_stats); out_timer: + fib6_notifier_exit(net); return -ENOMEM; } @@ -1907,6 +1913,7 @@ static void fib6_net_exit(struct net *net) kfree(net->ipv6.fib6_main_tbl); kfree(net->ipv6.fib_table_hash); kfree(net->ipv6.rt6_stats); + fib6_notifier_exit(net); } static struct pernet_operations fib6_net_ops = { From df77fe4d9865c6354372876632bcbceeda84f6c8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:17 +0200 Subject: [PATCH 07/21] ipv6: fib: Add in-kernel notifications for route add / delete As with IPv4, allow listeners of the FIB notification chain to receive notifications whenever a route is added, replaced or deleted. This is done by placing calls to the FIB notification chain in the two lowest level functions that end up performing these operations - namely, fib6_add_rt2node() and fib6_del_route(). Unlike IPv4, APPEND notifications aren't sent as the kernel doesn't distinguish between "append" (NLM_F_CREATE|NLM_F_APPEND) and "prepend" (NLM_F_CREATE). If NLM_F_EXCL isn't set, duplicate routes are always added after the existing duplicate routes. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 5 +++++ net/ipv6/ip6_fib.c | 17 +++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index be8ddf3253dc..e2b292b79e99 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -258,6 +258,11 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *, struct fib6_table *, struct flowi6 *, int); +struct fib6_entry_notifier_info { + struct fib_notifier_info info; /* must be first */ + struct rt6_info *rt; +}; + /* * exported functions */ diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index f93976e3f65c..595a57cbbc7b 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -302,6 +303,17 @@ static void __net_init fib6_tables_init(struct net *net) #endif +static int call_fib6_entry_notifiers(struct net *net, + enum fib_event_type event_type, + struct rt6_info *rt) +{ + struct fib6_entry_notifier_info info = { + .rt = rt, + }; + + return call_fib6_notifiers(net, event_type, &info.info); +} + static int fib6_dump_node(struct fib6_walker *w) { int res; @@ -879,6 +891,8 @@ add: *ins = rt; rt->rt6i_node = fn; atomic_inc(&rt->rt6i_ref); + call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD, + rt); if (!info->skip_notify) inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); info->nl_net->ipv6.rt6_stats->fib_rt_entries++; @@ -906,6 +920,8 @@ add: rt->rt6i_node = fn; rt->dst.rt6_next = iter->dst.rt6_next; atomic_inc(&rt->rt6i_ref); + call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE, + rt); if (!info->skip_notify) inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE); if (!(fn->fn_flags & RTN_RTINFO)) { @@ -1459,6 +1475,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, fib6_purge_rt(rt, fn, net); + call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt); if (!info->skip_notify) inet6_rt_notify(RTM_DELROUTE, rt, info, 0); rt6_release(rt); From dcb18f762f6ac83a6dc9cdc26dd694dcc167beb7 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:18 +0200 Subject: [PATCH 08/21] ipv6: fib_rules: Dump rules during registration to FIB chain Allow users of the FIB notification chain to receive a complete view of the IPv6 FIB rules upon registration to the chain. The integrity of the dump is ensured by a per-family sequence counter that is incremented (under RTNL) whenever a rule is added or deleted. All the sequence counters are read (under RTNL) and summed, prior and after the dump. In case the counters differ, then the dump is either restarted or the registration fails. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 10 ++++++++++ net/ipv6/fib6_notifier.c | 4 ++-- net/ipv6/fib6_rules.c | 11 +++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index e2b292b79e99..dbe5537809f5 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -312,6 +312,8 @@ void __net_exit fib6_notifier_exit(struct net *net); int fib6_rules_init(void); void fib6_rules_cleanup(void); bool fib6_rule_default(const struct fib_rule *rule); +int fib6_rules_dump(struct net *net, struct notifier_block *nb); +unsigned int fib6_rules_seq_read(struct net *net); #else static inline int fib6_rules_init(void) { @@ -325,5 +327,13 @@ static inline bool fib6_rule_default(const struct fib_rule *rule) { return true; } +static inline int fib6_rules_dump(struct net *net, struct notifier_block *nb) +{ + return 0; +} +static inline unsigned int fib6_rules_seq_read(struct net *net) +{ + return 0; +} #endif #endif diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c index c2bb1ab5b5eb..298efc678f3b 100644 --- a/net/ipv6/fib6_notifier.c +++ b/net/ipv6/fib6_notifier.c @@ -23,12 +23,12 @@ int call_fib6_notifiers(struct net *net, enum fib_event_type event_type, static unsigned int fib6_seq_read(struct net *net) { - return 0; + return fib6_rules_seq_read(net); } static int fib6_dump(struct net *net, struct notifier_block *nb) { - return 0; + return fib6_rules_dump(net, nb); } static const struct fib_notifier_ops fib6_notifier_ops_template = { diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index ef1fcee6bf16..2f29e4e33bd3 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -14,6 +14,7 @@ */ #include +#include #include #include @@ -49,6 +50,16 @@ bool fib6_rule_default(const struct fib_rule *rule) } EXPORT_SYMBOL_GPL(fib6_rule_default); +int fib6_rules_dump(struct net *net, struct notifier_block *nb) +{ + return fib_rules_dump(net, nb, AF_INET6); +} + +unsigned int fib6_rules_seq_read(struct net *net) +{ + return fib_rules_seq_read(net, AF_INET6); +} + struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, int flags, pol_lookup_t lookup) { From e1ee0a5ba35d999caef94d659b4cb842e63aeb68 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:19 +0200 Subject: [PATCH 09/21] ipv6: fib: Dump tables during registration to FIB chain Dump all the FIB tables in each net namespace upon registration to the FIB notification chain so that the callee will have a complete view of the tables. The integrity of the dump is ensured by a per-table sequence counter that is incremented (under write lock) whenever a route is added or deleted from the table. All the sequence counters are read (under each table's read lock) and summed, prior and after the dump. In case the counters differ, then the dump is either restarted or the registration fails. While it's possible for a table to be modified after its counter has been read, this isn't really a problem. In case it happened before it was read the second time, then the comparison at the end will fail. If it happened afterwards, then we're guaranteed to be notified about the change, as the notification block is registered prior to the second read. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 4 ++ net/ipv6/fib6_notifier.c | 10 ++++- net/ipv6/ip6_fib.c | 92 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 104 insertions(+), 2 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index dbe5537809f5..0b3052157e6b 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -235,6 +235,7 @@ struct fib6_table { struct fib6_node tb6_root; struct inet_peer_base tb6_peers; unsigned int flags; + unsigned int fib_seq; #define RT6_TABLE_HAS_DFLT_ROUTER BIT(0) }; @@ -308,6 +309,9 @@ int call_fib6_notifiers(struct net *net, enum fib_event_type event_type, int __net_init fib6_notifier_init(struct net *net); void __net_exit fib6_notifier_exit(struct net *net); +unsigned int fib6_tables_seq_read(struct net *net); +int fib6_tables_dump(struct net *net, struct notifier_block *nb); + #ifdef CONFIG_IPV6_MULTIPLE_TABLES int fib6_rules_init(void); void fib6_rules_cleanup(void); diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c index 298efc678f3b..66a103ef7e86 100644 --- a/net/ipv6/fib6_notifier.c +++ b/net/ipv6/fib6_notifier.c @@ -23,12 +23,18 @@ int call_fib6_notifiers(struct net *net, enum fib_event_type event_type, static unsigned int fib6_seq_read(struct net *net) { - return fib6_rules_seq_read(net); + return fib6_tables_seq_read(net) + fib6_rules_seq_read(net); } static int fib6_dump(struct net *net, struct notifier_block *nb) { - return fib6_rules_dump(net, nb); + int err; + + err = fib6_rules_dump(net, nb); + if (err) + return err; + + return fib6_tables_dump(net, nb); } static const struct fib_notifier_ops fib6_notifier_ops_template = { diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 595a57cbbc7b..719c10480c74 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -303,6 +303,37 @@ static void __net_init fib6_tables_init(struct net *net) #endif +unsigned int fib6_tables_seq_read(struct net *net) +{ + unsigned int h, fib_seq = 0; + + rcu_read_lock(); + for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { + struct hlist_head *head = &net->ipv6.fib_table_hash[h]; + struct fib6_table *tb; + + hlist_for_each_entry_rcu(tb, head, tb6_hlist) { + read_lock_bh(&tb->tb6_lock); + fib_seq += tb->fib_seq; + read_unlock_bh(&tb->tb6_lock); + } + } + rcu_read_unlock(); + + return fib_seq; +} + +static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct rt6_info *rt) +{ + struct fib6_entry_notifier_info info = { + .rt = rt, + }; + + return call_fib6_notifier(nb, net, event_type, &info.info); +} + static int call_fib6_entry_notifiers(struct net *net, enum fib_event_type event_type, struct rt6_info *rt) @@ -311,9 +342,70 @@ static int call_fib6_entry_notifiers(struct net *net, .rt = rt, }; + rt->rt6i_table->fib_seq++; return call_fib6_notifiers(net, event_type, &info.info); } +struct fib6_dump_arg { + struct net *net; + struct notifier_block *nb; +}; + +static void fib6_rt_dump(struct rt6_info *rt, struct fib6_dump_arg *arg) +{ + if (rt == arg->net->ipv6.ip6_null_entry) + return; + call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt); +} + +static int fib6_node_dump(struct fib6_walker *w) +{ + struct rt6_info *rt; + + for (rt = w->leaf; rt; rt = rt->dst.rt6_next) + fib6_rt_dump(rt, w->args); + w->leaf = NULL; + return 0; +} + +static void fib6_table_dump(struct net *net, struct fib6_table *tb, + struct fib6_walker *w) +{ + w->root = &tb->tb6_root; + read_lock_bh(&tb->tb6_lock); + fib6_walk(net, w); + read_unlock_bh(&tb->tb6_lock); +} + +/* Called with rcu_read_lock() */ +int fib6_tables_dump(struct net *net, struct notifier_block *nb) +{ + struct fib6_dump_arg arg; + struct fib6_walker *w; + unsigned int h; + + w = kzalloc(sizeof(*w), GFP_ATOMIC); + if (!w) + return -ENOMEM; + + w->func = fib6_node_dump; + arg.net = net; + arg.nb = nb; + w->args = &arg; + + for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { + struct hlist_head *head = &net->ipv6.fib_table_hash[h]; + struct fib6_table *tb; + + hlist_for_each_entry_rcu(tb, head, tb6_hlist) + fib6_table_dump(net, tb, w); + } + + kfree(w); + + return 0; +} + static int fib6_dump_node(struct fib6_walker *w) { int res; From 61e4d01e16acddadb9723143637a20417fa67ac9 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:20 +0200 Subject: [PATCH 10/21] ipv6: fib: Add offload indication to routes Allow user space applications to see which routes are offloaded and which aren't by setting the RTNH_F_OFFLOAD flag when dumping them. To be consistent with IPv4, offload indication is provided on a per-nexthop basis. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/ipv6_route.h | 1 + net/ipv6/route.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/include/uapi/linux/ipv6_route.h b/include/uapi/linux/ipv6_route.h index d496c02e14bc..33e2a5732bd1 100644 --- a/include/uapi/linux/ipv6_route.h +++ b/include/uapi/linux/ipv6_route.h @@ -35,6 +35,7 @@ #define RTF_PREF(pref) ((pref) << 27) #define RTF_PREF_MASK 0x18000000 +#define RTF_OFFLOAD 0x20000000 /* offloaded route */ #define RTF_PCPU 0x40000000 /* read-only: can not be set by user */ #define RTF_LOCAL 0x80000000 diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4d30c96a819d..aba07fce67fb 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1820,6 +1820,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, goto out; } + if (cfg->fc_flags & RTF_OFFLOAD) { + NL_SET_ERR_MSG(extack, "Userspace can not set RTF_OFFLOAD"); + goto out; + } + if (cfg->fc_dst_len > 128) { NL_SET_ERR_MSG(extack, "Invalid prefix length"); goto out; @@ -3327,6 +3332,9 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt, goto nla_put_failure; } + if (rt->rt6i_flags & RTF_OFFLOAD) + *flags |= RTNH_F_OFFLOAD; + /* not needed for multipath encoding b/c it has a rtnexthop struct */ if (!skip_oif && rt->dst.dev && nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) From c5b12410fa591acb1d48e167b9bd0d2a7a38498d Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:21 +0200 Subject: [PATCH 11/21] ipv6: fib: Don't assume only nodes hold a reference on routes The code currently assumes that only FIB nodes can hold a reference on routes. Therefore, after fib6_purge_rt() has run and the route is no longer present in any intermediate nodes, it's assumed that its reference count would be 1 - taken by the node where it's currently stored. However, we're going to allow users other than the FIB to take a reference on a route, so this assumption is no longer valid and the BUG_ON() needs to be removed. Note that purging only takes place if the initial reference count is different than 1. I've left that check intact, as in the majority of systems (where routes are only referenced by the FIB), it does actually mean the route is present in intermediate nodes. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 719c10480c74..fa27905de92e 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -837,8 +837,6 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, } fn = fn->parent; } - /* No more references are possible at this point. */ - BUG_ON(atomic_read(&rt->rt6i_ref) != 1); } } From 7483cea79957312e9f8e9cf760a1bc5d6c507113 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:22 +0200 Subject: [PATCH 12/21] ipv6: fib: Unlink replaced routes from their nodes When a route is deleted its node pointer is set to NULL to indicate it's no longer linked to its node. Do the same for routes that are replaced. This will later allow us to test if a route is still in the FIB by checking its node pointer instead of its reference count. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index fa27905de92e..fe193b48ef61 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1019,6 +1019,7 @@ add: fn->fn_flags |= RTN_RTINFO; } nsiblings = iter->rt6i_nsiblings; + iter->rt6i_node = NULL; fib6_purge_rt(iter, fn, info->nl_net); rt6_release(iter); @@ -1031,6 +1032,7 @@ add: break; if (rt6_qualify_for_ecmp(iter)) { *ins = iter->dst.rt6_next; + iter->rt6i_node = NULL; fib6_purge_rt(iter, fn, info->nl_net); rt6_release(iter); nsiblings--; From 9217d8c2fe743f02a3ce6d430fe3b5d514fd5f1c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:23 +0200 Subject: [PATCH 13/21] ipv6: Regenerate host route according to node pointer upon loopback up When the loopback device is brought back up we need to check if the host route attached to the address is still in the FIB and regenerate one in case it's not. Host routes using the loopback device are always inserted into and removed from the FIB under RTNL (under which this function is called), so we can test their node pointer instead of the reference count in order to check if the route is in the FIB or not. Tested using the following script from Nicolas mentioned in commit a220445f9f43 ("ipv6: correctly add local routes when lo goes up"): $ ip link add dummy1 type dummy $ ip link set dummy1 up $ ip link set lo down ; ip link set lo up The host route is correctly regenerated. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3c46e9513a31..e8e4f867b994 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3066,7 +3066,7 @@ static void init_loopback(struct net_device *dev) * lo device down, release this obsolete dst and * reallocate a new router for ifa. */ - if (!atomic_read(&sp_ifa->rt->rt6i_ref)) { + if (!sp_ifa->rt->rt6i_node) { ip6_rt_put(sp_ifa->rt); sp_ifa->rt = NULL; } else { From fc882fcff1ee774cb6be9d3c714ae5ab9eec5aa4 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:24 +0200 Subject: [PATCH 14/21] ipv6: Regenerate host route according to node pointer upon interface up When an interface is brought back up, the kernel tries to restore the host routes tied to its permanent addresses. However, if the host route was removed from the FIB, then we need to reinsert it. This is done by releasing the current dst and allocating a new, so as to not reuse a dst with obsolete values. Since this function is called under RTNL and using the same explanation from the previous patch, we can test if the route is in the FIB by checking its node pointer instead of its reference count. Tested using the following script and Andrey's reproducer mentioned in commit 8048ced9beb2 ("net: ipv6: regenerate host route if moved to gc list") and linked below: $ ip link set dev lo up $ ip link add dummy1 type dummy $ ip -6 address add cafe::1/64 dev dummy1 $ ip link set dev lo down # cafe::1/128 is removed $ ip link set dev dummy1 up $ ip link set dev lo up The host route is correctly regenerated. Signed-off-by: Ido Schimmel Link: http://lkml.kernel.org/r/CAAeHK+zSe82vc5gCRgr_EoUwiALPnWVdWJBPwJZBpbxYz=kGJw@mail.gmail.com Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e8e4f867b994..30ee23eef268 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3321,11 +3321,11 @@ static void addrconf_gre_config(struct net_device *dev) static int fixup_permanent_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) { - /* rt6i_ref == 0 means the host route was removed from the + /* !rt6i_node means the host route was removed from the * FIB, for example, if 'lo' device is taken down. In that * case regenerate the host route. */ - if (!ifp->rt || !atomic_read(&ifp->rt->rt6i_ref)) { + if (!ifp->rt || !ifp->rt->rt6i_node) { struct rt6_info *rt, *prev; rt = addrconf_dst_alloc(idev, &ifp->addr, false); From a460aa83963b185a32a6377eb486b6e613ac8e38 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:25 +0200 Subject: [PATCH 15/21] ipv6: fib: Add helpers to hold / drop a reference on rt6_info Similar to commit 1c677b3d2828 ("ipv4: fib: Add fib_info_hold() helper") and commit b423cb10807b ("ipv4: fib: Export free_fib_info()") add an helper to hold a reference on rt6_info and export rt6_release() to drop it and potentially release the route. This is needed so that drivers capable of FIB offload could hold a reference on the route before queueing it for offload and drop it after the route has been programmed to the device's tables. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 16 ++++++++++++++++ net/ipv6/ip6_fib.c | 12 ++---------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 0b3052157e6b..1d790ea40ea7 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -187,6 +187,22 @@ static inline void ip6_rt_put(struct rt6_info *rt) dst_release(&rt->dst); } +void rt6_free_pcpu(struct rt6_info *non_pcpu_rt); + +static inline void rt6_hold(struct rt6_info *rt) +{ + atomic_inc(&rt->rt6i_ref); +} + +static inline void rt6_release(struct rt6_info *rt) +{ + if (atomic_dec_and_test(&rt->rt6i_ref)) { + rt6_free_pcpu(rt); + dst_dev_put(&rt->dst); + dst_release(&rt->dst); + } +} + enum fib6_walk_state { #ifdef CONFIG_IPV6_SUBTREES FWS_S, diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index fe193b48ef61..69ed0043d117 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -154,7 +154,7 @@ static void node_free(struct fib6_node *fn) kmem_cache_free(fib6_node_kmem, fn); } -static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) +void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) { int cpu; @@ -177,15 +177,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) free_percpu(non_pcpu_rt->rt6i_pcpu); non_pcpu_rt->rt6i_pcpu = NULL; } - -static void rt6_release(struct rt6_info *rt) -{ - if (atomic_dec_and_test(&rt->rt6i_ref)) { - rt6_free_pcpu(rt); - dst_dev_put(&rt->dst); - dst_release(&rt->dst); - } -} +EXPORT_SYMBOL_GPL(rt6_free_pcpu); static void fib6_link_table(struct net *net, struct fib6_table *tb) { From 66a5763ac180d43f4a16770791669dc1e085cd5d Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:26 +0200 Subject: [PATCH 16/21] mlxsw: spectrum_router: Demultiplex FIB event based on family The FIB notification block currently only handles IPv4 events, but we want to start handling IPv6 events soon, so lay the groundwork now. Do that by preparing the work item and process it according to the notified address family. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 65 +++++++++++++------ 1 file changed, 44 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 78c19512250d..166ecf5d58f1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3040,7 +3040,7 @@ struct mlxsw_sp_fib_event_work { unsigned long event; }; -static void mlxsw_sp_router_fib_event_work(struct work_struct *work) +static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) { struct mlxsw_sp_fib_event_work *fib_work = container_of(work, struct mlxsw_sp_fib_event_work, work); @@ -3085,6 +3085,42 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work) kfree(fib_work); } +static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) +{ +} + +static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, + struct fib_notifier_info *info) +{ + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ + case FIB_EVENT_ENTRY_APPEND: /* fall through */ + case FIB_EVENT_ENTRY_ADD: /* fall through */ + case FIB_EVENT_ENTRY_DEL: + memcpy(&fib_work->fen_info, info, sizeof(fib_work->fen_info)); + /* Take referece on fib_info to prevent it from being + * freed while work is queued. Release it afterwards. + */ + fib_info_hold(fib_work->fen_info.fi); + break; + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info)); + fib_rule_get(fib_work->fr_info.rule); + break; + case FIB_EVENT_NH_ADD: /* fall through */ + case FIB_EVENT_NH_DEL: + memcpy(&fib_work->fnh_info, info, sizeof(fib_work->fnh_info)); + fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent); + break; + } +} + +static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, + struct fib_notifier_info *info) +{ +} + /* Called with rcu_read_lock() */ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, unsigned long event, void *ptr) @@ -3100,31 +3136,18 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, if (WARN_ON(!fib_work)) return NOTIFY_BAD; - INIT_WORK(&fib_work->work, mlxsw_sp_router_fib_event_work); router = container_of(nb, struct mlxsw_sp_router, fib_nb); fib_work->mlxsw_sp = router->mlxsw_sp; fib_work->event = event; - switch (event) { - case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ - case FIB_EVENT_ENTRY_ADD: /* fall through */ - case FIB_EVENT_ENTRY_DEL: - memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info)); - /* Take referece on fib_info to prevent it from being - * freed while work is queued. Release it afterwards. - */ - fib_info_hold(fib_work->fen_info.fi); + switch (info->family) { + case AF_INET: + INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work); + mlxsw_sp_router_fib4_event(fib_work, info); break; - case FIB_EVENT_RULE_ADD: /* fall through */ - case FIB_EVENT_RULE_DEL: - memcpy(&fib_work->fr_info, ptr, sizeof(fib_work->fr_info)); - fib_rule_get(fib_work->fr_info.rule); - break; - case FIB_EVENT_NH_ADD: /* fall through */ - case FIB_EVENT_NH_DEL: - memcpy(&fib_work->fnh_info, ptr, sizeof(fib_work->fnh_info)); - fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent); + case AF_INET6: + INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work); + mlxsw_sp_router_fib6_event(fib_work, info); break; } From 583419fdf22bd2fc39e49520b29960f206b7ab44 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:27 +0200 Subject: [PATCH 17/21] mlxsw: spectrum_router: Sanitize IPv6 FIB rules We only allow FIB offload in the presence of default rules or an l3mdev rule. In a similar fashion to IPv4 FIB rules, sanitize IPv6 rules. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 166ecf5d58f1..6c7fc6a66aca 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -3087,6 +3088,23 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) { + struct mlxsw_sp_fib_event_work *fib_work = + container_of(work, struct mlxsw_sp_fib_event_work, work); + struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; + struct fib_rule *rule; + + rtnl_lock(); + switch (fib_work->event) { + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + rule = fib_work->fr_info.rule; + if (!fib6_rule_default(rule) && !rule->l3mdev) + mlxsw_sp_router_fib_abort(mlxsw_sp); + fib_rule_put(rule); + break; + } + rtnl_unlock(); + kfree(fib_work); } static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, @@ -3119,6 +3137,13 @@ static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, struct fib_notifier_info *info) { + switch (fib_work->event) { + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info)); + fib_rule_get(fib_work->fr_info.rule); + break; + } } /* Called with rcu_read_lock() */ From 428b851f565f11c483cfa62021d674b6fb9d6ddc Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:28 +0200 Subject: [PATCH 18/21] mlxsw: spectrum_router: Add support for IPv6 routes addition / deletion Allow directly connected and remote unicast IPv6 routes to be programmed to the device's tables. As with IPv4, identical routes - sharing the same destination prefix - are ordered in a FIB node according to their table ID and then the metric. While the kernel doesn't share the same trie for the local and main table, this does happen in the device, so ordering according to table ID is needed. Since individual nexthops can be added and deleted in IPv6, each FIB entry stores a linked list of the rt6_info structs it represents. Upon the addition or deletion of a nexthop, a new nexthop group is allocated according to the new configuration and the old one is destroyed. Identical groups aren't currently consolidated, but will be in a follow-up patchset. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/Kconfig | 1 + .../ethernet/mellanox/mlxsw/spectrum_router.c | 726 +++++++++++++++++- 2 files changed, 724 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index 695adff89d71..d56eea310509 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -75,6 +75,7 @@ config MLXSW_SPECTRUM depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV && VLAN_8021Q depends on PSAMPLE || PSAMPLE=n depends on BRIDGE || BRIDGE=n + depends on IPV6 || IPV6=n select PARMAN select MLXFW default m diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 6c7fc6a66aca..2345c00ef706 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -406,6 +407,17 @@ struct mlxsw_sp_fib4_entry { u8 type; }; +struct mlxsw_sp_fib6_entry { + struct mlxsw_sp_fib_entry common; + struct list_head rt6_list; + unsigned int nrt6; +}; + +struct mlxsw_sp_rt6 { + struct list_head list; + struct rt6_info *rt; +}; + enum mlxsw_sp_l3proto { MLXSW_SP_L3_PROTO_IPV4, MLXSW_SP_L3_PROTO_IPV6, @@ -2126,6 +2138,26 @@ mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) } } +static struct mlxsw_sp_nexthop * +mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp, + const struct mlxsw_sp_rt6 *mlxsw_sp_rt6) +{ + int i; + + for (i = 0; i < nh_grp->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + struct rt6_info *rt = mlxsw_sp_rt6->rt; + + if (nh->rif && nh->rif->dev == rt->dst.dev && + ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr, + &rt->rt6i_gateway)) + return nh; + continue; + } + + return NULL; +} + static void mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) { @@ -2160,6 +2192,48 @@ mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) } } +static void +mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry, + common); + + if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) { + list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, + list)->rt->rt6i_flags |= RTF_OFFLOAD; + return; + } + + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; + struct mlxsw_sp_nexthop *nh; + + nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6); + if (nh && nh->offloaded) + mlxsw_sp_rt6->rt->rt6i_flags |= RTF_OFFLOAD; + else + mlxsw_sp_rt6->rt->rt6i_flags &= ~RTF_OFFLOAD; + } +} + +static void +mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry, + common); + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + struct rt6_info *rt = mlxsw_sp_rt6->rt; + + rt->rt6i_flags &= ~RTF_OFFLOAD; + } +} + static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) { switch (fib_entry->fib_node->fib->proto) { @@ -2167,7 +2241,8 @@ static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) mlxsw_sp_fib4_entry_offload_set(fib_entry); break; case MLXSW_SP_L3_PROTO_IPV6: - WARN_ON_ONCE(1); + mlxsw_sp_fib6_entry_offload_set(fib_entry); + break; } } @@ -2179,7 +2254,8 @@ mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) mlxsw_sp_fib4_entry_offload_unset(fib_entry); break; case MLXSW_SP_L3_PROTO_IPV6: - WARN_ON_ONCE(1); + mlxsw_sp_fib6_entry_offload_unset(fib_entry); + break; } } @@ -2887,6 +2963,615 @@ static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); } +static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt) +{ + /* Packets with link-local destination IP arriving to the router + * are trapped to the CPU, so no need to program specific routes + * for them. + */ + if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LINKLOCAL) + return true; + + /* Multicast routes aren't supported, so ignore them. Neighbour + * Discovery packets are specifically trapped. + */ + if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_MULTICAST) + return true; + + /* Cloned routes are irrelevant in the forwarding path. */ + if (rt->rt6i_flags & RTF_CACHE) + return true; + + return false; +} + +static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL); + if (!mlxsw_sp_rt6) + return ERR_PTR(-ENOMEM); + + /* In case of route replace, replaced route is deleted with + * no notification. Take reference to prevent accessing freed + * memory. + */ + mlxsw_sp_rt6->rt = rt; + rt6_hold(rt); + + return mlxsw_sp_rt6; +} + +#if IS_ENABLED(CONFIG_IPV6) +static void mlxsw_sp_rt6_release(struct rt6_info *rt) +{ + rt6_release(rt); +} +#else +static void mlxsw_sp_rt6_release(struct rt6_info *rt) +{ +} +#endif + +static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6) +{ + mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt); + kfree(mlxsw_sp_rt6); +} + +static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt) +{ + /* RTF_CACHE routes are ignored */ + return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY; +} + +static struct rt6_info * +mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry) +{ + return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, + list)->rt; +} + +static struct mlxsw_sp_fib6_entry * +mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node, + const struct rt6_info *nrt) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + + if (!mlxsw_sp_fib6_rt_can_mp(nrt)) + return NULL; + + list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { + struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + + /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same + * virtual router. + */ + if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id) + continue; + if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id) + break; + if (rt->rt6i_metric < nrt->rt6i_metric) + continue; + if (rt->rt6i_metric == nrt->rt6i_metric && + mlxsw_sp_fib6_rt_can_mp(rt)) + return fib6_entry; + if (rt->rt6i_metric > nrt->rt6i_metric) + break; + } + + return NULL; +} + +static struct mlxsw_sp_rt6 * +mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry, + const struct rt6_info *rt) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + if (mlxsw_sp_rt6->rt == rt) + return mlxsw_sp_rt6; + } + + return NULL; +} + +static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop *nh, + const struct rt6_info *rt) +{ + struct net_device *dev = rt->dst.dev; + struct mlxsw_sp_rif *rif; + int err; + + nh->nh_grp = nh_grp; + memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr)); + + if (!dev) + return 0; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!rif) + return 0; + mlxsw_sp_nexthop_rif_init(nh, rif); + + err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); + if (err) + goto err_nexthop_neigh_init; + + return 0; + +err_nexthop_neigh_init: + mlxsw_sp_nexthop_rif_fini(nh); + return err; +} + +static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop_rif_fini(nh); +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + struct mlxsw_sp_nexthop *nh; + size_t alloc_size; + int i = 0; + int err; + + alloc_size = sizeof(*nh_grp) + + fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop); + nh_grp = kzalloc(alloc_size, GFP_KERNEL); + if (!nh_grp) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&nh_grp->fib_list); +#if IS_ENABLED(CONFIG_IPV6) + nh_grp->neigh_tbl = &nd_tbl; +#endif + mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list, + struct mlxsw_sp_rt6, list); + nh_grp->gateway = !!(mlxsw_sp_rt6->rt->rt6i_flags & RTF_GATEWAY); + nh_grp->count = fib6_entry->nrt6; + for (i = 0; i < nh_grp->count; i++) { + struct rt6_info *rt = mlxsw_sp_rt6->rt; + + nh = &nh_grp->nexthops[i]; + err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt); + if (err) + goto err_nexthop6_init; + mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list); + } + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + return nh_grp; + +err_nexthop6_init: + for (i--; i >= 0; i--) { + nh = &nh_grp->nexthops[i]; + mlxsw_sp_nexthop6_fini(mlxsw_sp, nh); + } + kfree(nh_grp); + return ERR_PTR(err); +} + +static void +mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_nexthop *nh; + int i = nh_grp->count; + + for (i--; i >= 0; i--) { + nh = &nh_grp->nexthops[i]; + mlxsw_sp_nexthop6_fini(mlxsw_sp, nh); + } + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + WARN_ON(nh_grp->adj_index_valid); + kfree(nh_grp); +} + +static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + + /* For now, don't consolidate nexthop groups */ + nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry); + if (IS_ERR(nh_grp)) + return PTR_ERR(nh_grp); + + list_add_tail(&fib6_entry->common.nexthop_group_node, + &nh_grp->fib_list); + fib6_entry->common.nh_group = nh_grp; + + return 0; +} + +static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; + + list_del(&fib_entry->nexthop_group_node); + if (!list_empty(&nh_grp->fib_list)) + return; + mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp); +} + +static int +mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group; + int err; + + fib6_entry->common.nh_group = NULL; + list_del(&fib6_entry->common.nexthop_group_node); + + err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry); + if (err) + goto err_nexthop6_group_get; + + /* In case this entry is offloaded, then the adjacency index + * currently associated with it in the device's table is that + * of the old group. Start using the new one instead. + */ + err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common); + if (err) + goto err_fib_node_entry_add; + + if (list_empty(&old_nh_grp->fib_list)) + mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp); + + return 0; + +err_fib_node_entry_add: + mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common); +err_nexthop6_group_get: + list_add_tail(&fib6_entry->common.nexthop_group_node, + &old_nh_grp->fib_list); + fib6_entry->common.nh_group = old_nh_grp; + return err; +} + +static int +mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry, + struct rt6_info *rt) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + int err; + + mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt); + if (IS_ERR(mlxsw_sp_rt6)) + return PTR_ERR(mlxsw_sp_rt6); + + list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); + fib6_entry->nrt6++; + + err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry); + if (err) + goto err_nexthop6_group_update; + + return 0; + +err_nexthop6_group_update: + fib6_entry->nrt6--; + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + return err; +} + +static void +mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry, + struct rt6_info *rt) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt); + if (WARN_ON(!mlxsw_sp_rt6)) + return; + + fib6_entry->nrt6--; + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); +} + +static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp_fib_entry *fib_entry, + const struct rt6_info *rt) +{ + /* Packets hitting RTF_REJECT routes need to be discarded by the + * stack. We can rely on their destination device not having a + * RIF (it's the loopback device) and can thus use action type + * local, which will cause them to be trapped with a lower + * priority than packets that need to be locally received. + */ + if (rt->rt6i_flags & RTF_LOCAL) + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + else if (rt->rt6i_flags & RTF_REJECT) + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; + else if (rt->rt6i_flags & RTF_GATEWAY) + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; + else + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; +} + +static void +mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp; + + list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list, + list) { + fib6_entry->nrt6--; + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + } +} + +static struct mlxsw_sp_fib6_entry * +mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node, + struct rt6_info *rt) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + int err; + + fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL); + if (!fib6_entry) + return ERR_PTR(-ENOMEM); + fib_entry = &fib6_entry->common; + + mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt); + if (IS_ERR(mlxsw_sp_rt6)) { + err = PTR_ERR(mlxsw_sp_rt6); + goto err_rt6_create; + } + + mlxsw_sp_fib6_entry_type_set(fib_entry, mlxsw_sp_rt6->rt); + + INIT_LIST_HEAD(&fib6_entry->rt6_list); + list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); + fib6_entry->nrt6 = 1; + err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry); + if (err) + goto err_nexthop6_group_get; + + fib_entry->fib_node = fib_node; + + return fib6_entry; + +err_nexthop6_group_get: + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); +err_rt6_create: + kfree(fib6_entry); + return ERR_PTR(err); +} + +static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common); + mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry); + WARN_ON(fib6_entry->nrt6); + kfree(fib6_entry); +} + +static struct mlxsw_sp_fib6_entry * +mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, + const struct rt6_info *nrt) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + + list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { + struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + + if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id) + continue; + if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id) + break; + if (rt->rt6i_metric > nrt->rt6i_metric) + return fib6_entry; + } + + return NULL; +} + +static int +mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry) +{ + struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node; + struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry); + struct mlxsw_sp_fib6_entry *fib6_entry; + + fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt); + + if (fib6_entry) { + list_add_tail(&new6_entry->common.list, + &fib6_entry->common.list); + } else { + struct mlxsw_sp_fib6_entry *last; + + list_for_each_entry(last, &fib_node->entry_list, common.list) { + struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last); + + if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id) + break; + fib6_entry = last; + } + + if (fib6_entry) + list_add(&new6_entry->common.list, + &fib6_entry->common.list); + else + list_add(&new6_entry->common.list, + &fib_node->entry_list); + } + + return 0; +} + +static void +mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry) +{ + list_del(&fib6_entry->common.list); +} + +static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + int err; + + err = mlxsw_sp_fib6_node_list_insert(fib6_entry); + if (err) + return err; + + err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common); + if (err) + goto err_fib_node_entry_add; + + return 0; + +err_fib_node_entry_add: + mlxsw_sp_fib6_node_list_remove(fib6_entry); + return err; +} + +static void +mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common); + mlxsw_sp_fib6_node_list_remove(fib6_entry); +} + +static struct mlxsw_sp_fib6_entry * +mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp, + const struct rt6_info *rt) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_fib_node *fib_node; + struct mlxsw_sp_fib *fib; + struct mlxsw_sp_vr *vr; + + vr = mlxsw_sp_vr_find(mlxsw_sp, rt->rt6i_table->tb6_id); + if (!vr) + return NULL; + fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6); + + fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->rt6i_dst.addr, + sizeof(rt->rt6i_dst.addr), + rt->rt6i_dst.plen); + if (!fib_node) + return NULL; + + list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { + struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + + if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id && + rt->rt6i_metric == iter_rt->rt6i_metric && + mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt)) + return fib6_entry; + } + + return NULL; +} + +static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, + struct rt6_info *rt) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_fib_node *fib_node; + int err; + + if (mlxsw_sp->router->aborted) + return 0; + + if (mlxsw_sp_fib6_rt_should_ignore(rt)) + return 0; + + fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->rt6i_table->tb6_id, + &rt->rt6i_dst.addr, + sizeof(rt->rt6i_dst.addr), + rt->rt6i_dst.plen, + MLXSW_SP_L3_PROTO_IPV6); + if (IS_ERR(fib_node)) + return PTR_ERR(fib_node); + + /* Before creating a new entry, try to append route to an existing + * multipath entry. + */ + fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt); + if (fib6_entry) { + err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt); + if (err) + goto err_fib6_entry_nexthop_add; + return 0; + } + + fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt); + if (IS_ERR(fib6_entry)) { + err = PTR_ERR(fib6_entry); + goto err_fib6_entry_create; + } + + err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry); + if (err) + goto err_fib6_node_entry_link; + + return 0; + +err_fib6_node_entry_link: + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); +err_fib6_entry_create: +err_fib6_entry_nexthop_add: + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return err; +} + +static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, + struct rt6_info *rt) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_fib_node *fib_node; + + if (mlxsw_sp->router->aborted) + return; + + if (mlxsw_sp_fib6_rt_should_ignore(rt)) + return; + + fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt); + if (WARN_ON(!fib6_entry)) + return; + + /* If route is part of a multipath entry, but not the last one + * removed, then only reduce its nexthop group. + */ + if (!list_is_singular(&fib6_entry->rt6_list)) { + mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt); + return; + } + + fib_node = fib6_entry->common.fib_node; + + mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry); + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); +} + static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp, enum mlxsw_reg_ralxx_protocol proto, u8 tree_id) @@ -2967,6 +3652,23 @@ static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp, } } +static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node) +{ + struct mlxsw_sp_fib6_entry *fib6_entry, *tmp; + + list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list, + common.list) { + bool do_break = &tmp->common.list == &fib_node->entry_list; + + mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry); + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + if (do_break) + break; + } +} + static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node) { @@ -2975,7 +3677,7 @@ static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node); break; case MLXSW_SP_L3_PROTO_IPV6: - WARN_ON_ONCE(1); + mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node); break; } } @@ -3033,6 +3735,7 @@ static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp) struct mlxsw_sp_fib_event_work { struct work_struct work; union { + struct fib6_entry_notifier_info fen6_info; struct fib_entry_notifier_info fen_info; struct fib_rule_notifier_info fr_info; struct fib_nh_notifier_info fnh_info; @@ -3092,9 +3795,21 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) container_of(work, struct mlxsw_sp_fib_event_work, work); struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; struct fib_rule *rule; + int err; rtnl_lock(); switch (fib_work->event) { + case FIB_EVENT_ENTRY_ADD: + err = mlxsw_sp_router_fib6_add(mlxsw_sp, + fib_work->fen6_info.rt); + if (err) + mlxsw_sp_router_fib_abort(mlxsw_sp); + mlxsw_sp_rt6_release(fib_work->fen6_info.rt); + break; + case FIB_EVENT_ENTRY_DEL: + mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt); + mlxsw_sp_rt6_release(fib_work->fen6_info.rt); + break; case FIB_EVENT_RULE_ADD: /* fall through */ case FIB_EVENT_RULE_DEL: rule = fib_work->fr_info.rule; @@ -3138,6 +3853,11 @@ static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, struct fib_notifier_info *info) { switch (fib_work->event) { + case FIB_EVENT_ENTRY_ADD: /* fall through */ + case FIB_EVENT_ENTRY_DEL: + memcpy(&fib_work->fen6_info, info, sizeof(fib_work->fen6_info)); + rt6_hold(fib_work->fen6_info.rt); + break; case FIB_EVENT_RULE_ADD: /* fall through */ case FIB_EVENT_RULE_DEL: memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info)); From 0a7fd1ac2a6b316ceeb9a57a41ce0c45f6bff549 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:29 +0200 Subject: [PATCH 19/21] mlxsw: spectrum_router: Add support for route replace In case we got a replace event, then the replaced route must exist. If the route isn't capable of multipath, then replace first matching non-multipath capable route. If the route is capable of multipath and matching multipath capable route is found, then replace it. Otherwise, replace first matching non-multipath capable route. The new route is inserted before the replaced one. In case the replaced route is currently offloaded, then it's overwritten in the device's table by the new route and later deleted, thus not impacting routed traffic. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 63 ++++++++++++++----- 1 file changed, 49 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 2345c00ef706..cded8e8039bd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3035,11 +3035,11 @@ mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry) static struct mlxsw_sp_fib6_entry * mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node, - const struct rt6_info *nrt) + const struct rt6_info *nrt, bool replace) { struct mlxsw_sp_fib6_entry *fib6_entry; - if (!mlxsw_sp_fib6_rt_can_mp(nrt)) + if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace) return NULL; list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { @@ -3371,9 +3371,9 @@ static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp, static struct mlxsw_sp_fib6_entry * mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, - const struct rt6_info *nrt) + const struct rt6_info *nrt, bool replace) { - struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL; list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); @@ -3382,21 +3382,32 @@ mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, continue; if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id) break; + if (replace && rt->rt6i_metric == nrt->rt6i_metric) { + if (mlxsw_sp_fib6_rt_can_mp(rt) == + mlxsw_sp_fib6_rt_can_mp(nrt)) + return fib6_entry; + if (mlxsw_sp_fib6_rt_can_mp(nrt)) + fallback = fallback ?: fib6_entry; + } if (rt->rt6i_metric > nrt->rt6i_metric) - return fib6_entry; + return fallback ?: fib6_entry; } - return NULL; + return fallback; } static int -mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry) +mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry, + bool replace) { struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node; struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry); struct mlxsw_sp_fib6_entry *fib6_entry; - fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt); + fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace); + + if (replace && WARN_ON(!fib6_entry)) + return -EINVAL; if (fib6_entry) { list_add_tail(&new6_entry->common.list, @@ -3430,11 +3441,12 @@ mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry) } static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib6_entry *fib6_entry) + struct mlxsw_sp_fib6_entry *fib6_entry, + bool replace) { int err; - err = mlxsw_sp_fib6_node_list_insert(fib6_entry); + err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace); if (err) return err; @@ -3489,8 +3501,25 @@ mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp, return NULL; } +static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry, + bool replace) +{ + struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node; + struct mlxsw_sp_fib6_entry *replaced; + + if (!replace) + return; + + replaced = list_next_entry(fib6_entry, common.list); + + mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced); + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); +} + static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, - struct rt6_info *rt) + struct rt6_info *rt, bool replace) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; @@ -3513,7 +3542,7 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, /* Before creating a new entry, try to append route to an existing * multipath entry. */ - fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt); + fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace); if (fib6_entry) { err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt); if (err) @@ -3527,10 +3556,12 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, goto err_fib6_entry_create; } - err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry); + err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace); if (err) goto err_fib6_node_entry_link; + mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace); + return 0; err_fib6_node_entry_link: @@ -3795,13 +3826,16 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) container_of(work, struct mlxsw_sp_fib_event_work, work); struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; struct fib_rule *rule; + bool replace; int err; rtnl_lock(); switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ case FIB_EVENT_ENTRY_ADD: + replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; err = mlxsw_sp_router_fib6_add(mlxsw_sp, - fib_work->fen6_info.rt); + fib_work->fen6_info.rt, replace); if (err) mlxsw_sp_router_fib_abort(mlxsw_sp); mlxsw_sp_rt6_release(fib_work->fen6_info.rt); @@ -3853,6 +3887,7 @@ static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, struct fib_notifier_info *info) { switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: memcpy(&fib_work->fen6_info, info, sizeof(fib_work->fen6_info)); From f36f5ac677d184a62404169c781339c0cc64ea87 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:30 +0200 Subject: [PATCH 20/21] mlxsw: spectrum_router: Abort on source-specific routes Without resorting to ACLs, the device performs route lookup solely based on the destination IP address. In case source-specific routing is needed, an error is returned and the abort mechanism is activated, thus allowing the kernel to take over forwarding decisions. Instead of aborting, we can trap specific destination prefixes where source-specific routes are present, but this will result in a lot more code that is unlikely to ever be used. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index cded8e8039bd..45cf32ca4126 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3528,6 +3528,9 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, if (mlxsw_sp->router->aborted) return 0; + if (rt->rt6i_src.plen) + return -EINVAL; + if (mlxsw_sp_fib6_rt_should_ignore(rt)) return 0; From 65e65ec137f4abe78b6c90c72c0a6ca7474e9ae6 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:31 +0200 Subject: [PATCH 21/21] mlxsw: spectrum_router: Don't ignore IPv6 notifications We now have all the necessary IPv6 infrastructure in place, so stop ignoring these notifications. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 45cf32ca4126..93b6da88e79c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3912,7 +3912,7 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, struct fib_notifier_info *info = ptr; struct mlxsw_sp_router *router; - if (!net_eq(info->net, &init_net) || info->family != AF_INET) + if (!net_eq(info->net, &init_net)) return NOTIFY_DONE; fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);