mirror of
https://github.com/torvalds/linux.git
synced 2024-11-23 20:51:44 +00:00
0eeb075fad
This feature is only enabled with the new per-interface or ipv4 global sysctls called 'ignore_routes_with_linkdown'. net.ipv4.conf.all.ignore_routes_with_linkdown = 0 net.ipv4.conf.default.ignore_routes_with_linkdown = 0 net.ipv4.conf.lo.ignore_routes_with_linkdown = 0 ... When the above sysctls are set, will report to userspace that a route is dead and will no longer resolve to this nexthop when performing a fib lookup. This will signal to userspace that the route will not be selected. The signalling of a RTNH_F_DEAD is only passed to userspace if the sysctl is enabled and link is down. This was done as without it the netlink listeners would have no idea whether or not a nexthop would be selected. The kernel only sets RTNH_F_DEAD internally if the interface has IFF_UP cleared. With the new sysctl set, the following behavior can be observed (interface p8p1 is link-down): default via 10.0.5.2 dev p9p1 10.0.5.0/24 dev p9p1 proto kernel scope link src 10.0.5.15 70.0.0.0/24 dev p7p1 proto kernel scope link src 70.0.0.1 80.0.0.0/24 dev p8p1 proto kernel scope link src 80.0.0.1 dead linkdown 90.0.0.0/24 via 80.0.0.2 dev p8p1 metric 1 dead linkdown 90.0.0.0/24 via 70.0.0.2 dev p7p1 metric 2 90.0.0.1 via 70.0.0.2 dev p7p1 src 70.0.0.1 cache local 80.0.0.1 dev lo src 80.0.0.1 cache <local> 80.0.0.2 via 10.0.5.2 dev p9p1 src 10.0.5.15 cache While the route does remain in the table (so it can be modified if needed rather than being wiped away as it would be if IFF_UP was cleared), the proper next-hop is chosen automatically when the link is down. Now interface p8p1 is linked-up: default via 10.0.5.2 dev p9p1 10.0.5.0/24 dev p9p1 proto kernel scope link src 10.0.5.15 70.0.0.0/24 dev p7p1 proto kernel scope link src 70.0.0.1 80.0.0.0/24 dev p8p1 proto kernel scope link src 80.0.0.1 90.0.0.0/24 via 80.0.0.2 dev p8p1 metric 1 90.0.0.0/24 via 70.0.0.2 dev p7p1 metric 2 192.168.56.0/24 dev p2p1 proto kernel scope link src 192.168.56.2 90.0.0.1 via 80.0.0.2 dev p8p1 src 80.0.0.1 cache local 80.0.0.1 dev lo src 80.0.0.1 cache <local> 80.0.0.2 dev p8p1 src 80.0.0.1 cache and the output changes to what one would expect. If the sysctl is not set, the following output would be expected when p8p1 is down: default via 10.0.5.2 dev p9p1 10.0.5.0/24 dev p9p1 proto kernel scope link src 10.0.5.15 70.0.0.0/24 dev p7p1 proto kernel scope link src 70.0.0.1 80.0.0.0/24 dev p8p1 proto kernel scope link src 80.0.0.1 linkdown 90.0.0.0/24 via 80.0.0.2 dev p8p1 metric 1 linkdown 90.0.0.0/24 via 70.0.0.2 dev p7p1 metric 2 Since the dead flag does not appear, there should be no expectation that the kernel would skip using this route due to link being down. v2: Split kernel changes into 2 patches, this actually makes a behavioral change if the sysctl is set. Also took suggestion from Alex to simplify code by only checking sysctl during fib lookup and suggestion from Scott to add a per-interface sysctl. v3: Code clean-ups to make it more readable and efficient as well as a reverse path check fix. v4: Drop binary sysctl v5: Whitespace fixups from Dave v6: Style changes from Dave and checkpatch suggestions v7: One more checkpatch fixup Signed-off-by: Andy Gospodarek <gospo@cumulusnetworks.com> Signed-off-by: Dinesh Dutt <ddutt@cumulusnetworks.com> Acked-by: Scott Feldman <sfeldma@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
122 lines
3.1 KiB
C
122 lines
3.1 KiB
C
#ifndef __NET_FIB_RULES_H
|
|
#define __NET_FIB_RULES_H
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/netdevice.h>
|
|
#include <linux/fib_rules.h>
|
|
#include <net/flow.h>
|
|
#include <net/rtnetlink.h>
|
|
|
|
struct fib_rule {
|
|
struct list_head list;
|
|
int iifindex;
|
|
int oifindex;
|
|
u32 mark;
|
|
u32 mark_mask;
|
|
u32 flags;
|
|
u32 table;
|
|
u8 action;
|
|
/* 3 bytes hole, try to use */
|
|
u32 target;
|
|
struct fib_rule __rcu *ctarget;
|
|
struct net *fr_net;
|
|
|
|
atomic_t refcnt;
|
|
u32 pref;
|
|
int suppress_ifgroup;
|
|
int suppress_prefixlen;
|
|
char iifname[IFNAMSIZ];
|
|
char oifname[IFNAMSIZ];
|
|
struct rcu_head rcu;
|
|
};
|
|
|
|
struct fib_lookup_arg {
|
|
void *lookup_ptr;
|
|
void *result;
|
|
struct fib_rule *rule;
|
|
int flags;
|
|
#define FIB_LOOKUP_NOREF 1
|
|
#define FIB_LOOKUP_IGNORE_LINKSTATE 2
|
|
};
|
|
|
|
struct fib_rules_ops {
|
|
int family;
|
|
struct list_head list;
|
|
int rule_size;
|
|
int addr_size;
|
|
int unresolved_rules;
|
|
int nr_goto_rules;
|
|
|
|
int (*action)(struct fib_rule *,
|
|
struct flowi *, int,
|
|
struct fib_lookup_arg *);
|
|
bool (*suppress)(struct fib_rule *,
|
|
struct fib_lookup_arg *);
|
|
int (*match)(struct fib_rule *,
|
|
struct flowi *, int);
|
|
int (*configure)(struct fib_rule *,
|
|
struct sk_buff *,
|
|
struct fib_rule_hdr *,
|
|
struct nlattr **);
|
|
int (*delete)(struct fib_rule *);
|
|
int (*compare)(struct fib_rule *,
|
|
struct fib_rule_hdr *,
|
|
struct nlattr **);
|
|
int (*fill)(struct fib_rule *, struct sk_buff *,
|
|
struct fib_rule_hdr *);
|
|
u32 (*default_pref)(struct fib_rules_ops *ops);
|
|
size_t (*nlmsg_payload)(struct fib_rule *);
|
|
|
|
/* Called after modifications to the rules set, must flush
|
|
* the route cache if one exists. */
|
|
void (*flush_cache)(struct fib_rules_ops *ops);
|
|
|
|
int nlgroup;
|
|
const struct nla_policy *policy;
|
|
struct list_head rules_list;
|
|
struct module *owner;
|
|
struct net *fro_net;
|
|
struct rcu_head rcu;
|
|
};
|
|
|
|
#define FRA_GENERIC_POLICY \
|
|
[FRA_IIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
|
|
[FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
|
|
[FRA_PRIORITY] = { .type = NLA_U32 }, \
|
|
[FRA_FWMARK] = { .type = NLA_U32 }, \
|
|
[FRA_FWMASK] = { .type = NLA_U32 }, \
|
|
[FRA_TABLE] = { .type = NLA_U32 }, \
|
|
[FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \
|
|
[FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \
|
|
[FRA_GOTO] = { .type = NLA_U32 }
|
|
|
|
static inline void fib_rule_get(struct fib_rule *rule)
|
|
{
|
|
atomic_inc(&rule->refcnt);
|
|
}
|
|
|
|
static inline void fib_rule_put(struct fib_rule *rule)
|
|
{
|
|
if (atomic_dec_and_test(&rule->refcnt))
|
|
kfree_rcu(rule, rcu);
|
|
}
|
|
|
|
static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla)
|
|
{
|
|
if (nla[FRA_TABLE])
|
|
return nla_get_u32(nla[FRA_TABLE]);
|
|
return frh->table;
|
|
}
|
|
|
|
struct fib_rules_ops *fib_rules_register(const struct fib_rules_ops *,
|
|
struct net *);
|
|
void fib_rules_unregister(struct fib_rules_ops *);
|
|
|
|
int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags,
|
|
struct fib_lookup_arg *);
|
|
int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table,
|
|
u32 flags);
|
|
u32 fib_default_rule_pref(struct fib_rules_ops *ops);
|
|
#endif
|