From 9c29a2f55ec05cc8b525ee3b2d75d3cd37911123 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 11 Dec 2018 18:57:21 -0700 Subject: [PATCH 1/5] neighbor: Fix locking order for gc_list changes Lock checker noted an inverted lock order between neigh_change_state (neighbor lock then table lock) and neigh_periodic_work (table lock and then neighbor lock) resulting in: [ 121.057652] ====================================================== [ 121.058740] WARNING: possible circular locking dependency detected [ 121.059861] 4.20.0-rc6+ #43 Not tainted [ 121.060546] ------------------------------------------------------ [ 121.061630] kworker/0:2/65 is trying to acquire lock: [ 121.062519] (____ptrval____) (&n->lock){++--}, at: neigh_periodic_work+0x237/0x324 [ 121.063894] [ 121.063894] but task is already holding lock: [ 121.064920] (____ptrval____) (&tbl->lock){+.-.}, at: neigh_periodic_work+0x194/0x324 [ 121.066274] [ 121.066274] which lock already depends on the new lock. [ 121.066274] [ 121.067693] [ 121.067693] the existing dependency chain (in reverse order) is: ... Fix by renaming neigh_change_state to neigh_update_gc_list, changing it to only manage whether an entry should be on the gc_list and taking locks in the same order as neigh_periodic_work. Invoke at the end of neigh_update only if diff between old or new states has the PERMANENT flag set. Fixes: 8cc196d6ef86 ("neighbor: gc_list changes should be protected by table lock") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/core/neighbour.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 03fdc5ae66b0..010784123bc1 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -127,30 +127,30 @@ static void neigh_mark_dead(struct neighbour *n) } } -static void neigh_change_state(struct neighbour *n, u8 new) +static void neigh_update_gc_list(struct neighbour *n) { - bool on_gc_list = !list_empty(&n->gc_list); - bool new_is_perm = new & NUD_PERMANENT; + bool on_gc_list, new_is_perm; - n->nud_state = new; + write_lock_bh(&n->tbl->lock); + write_lock(&n->lock); /* remove from the gc list if new state is permanent; * add to the gc list if new state is not permanent */ - if (new_is_perm && on_gc_list) { - write_lock_bh(&n->tbl->lock); - list_del_init(&n->gc_list); - write_unlock_bh(&n->tbl->lock); + new_is_perm = n->nud_state & NUD_PERMANENT; + on_gc_list = !list_empty(&n->gc_list); + if (new_is_perm && on_gc_list) { + list_del_init(&n->gc_list); atomic_dec(&n->tbl->gc_entries); } else if (!new_is_perm && !on_gc_list) { /* add entries to the tail; cleaning removes from the front */ - write_lock_bh(&n->tbl->lock); list_add_tail(&n->gc_list, &n->tbl->gc_list); - write_unlock_bh(&n->tbl->lock); - atomic_inc(&n->tbl->gc_entries); } + + write_unlock(&n->lock); + write_unlock_bh(&n->tbl->lock); } static bool neigh_del(struct neighbour *n, __u8 state, __u8 flags, @@ -1220,7 +1220,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, neigh_del_timer(neigh); if (old & NUD_CONNECTED) neigh_suspect(neigh); - neigh_change_state(neigh, new); + neigh->nud_state = new; err = 0; notify = old & NUD_VALID; if ((old & (NUD_INCOMPLETE | NUD_PROBE)) && @@ -1299,7 +1299,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, ((new & NUD_REACHABLE) ? neigh->parms->reachable_time : 0))); - neigh_change_state(neigh, new); + neigh->nud_state = new; notify = 1; } @@ -1360,6 +1360,9 @@ out: neigh_update_is_router(neigh, flags, ¬ify); write_unlock_bh(&neigh->lock); + if ((new ^ old) & NUD_PERMANENT) + neigh_update_gc_list(neigh); + if (notify) neigh_update_notify(neigh, nlmsg_pid); From 758a7f0b32ab890831d321145c3fe72bb85c0350 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 11 Dec 2018 18:57:22 -0700 Subject: [PATCH 2/5] neighbor: Fix state check in neigh_forced_gc PERMANENT entries are not on the gc_list so the state check is now redundant. Also, the move to not purge entries until after 5 seconds should not apply to FAILED entries; those can be removed immediately to make way for newer ones. This restores the previous logic prior to the gc_list. Fixes: 58956317c8de ("neighbor: Improve garbage collection") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/core/neighbour.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 010784123bc1..acaa1a64150d 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -204,7 +204,6 @@ static int neigh_forced_gc(struct neigh_table *tbl) unsigned long tref = jiffies - 5 * HZ; u8 flags = NTF_EXT_LEARNED; struct neighbour *n, *tmp; - u8 state = NUD_PERMANENT; int shrunk = 0; NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs); @@ -216,8 +215,8 @@ static int neigh_forced_gc(struct neigh_table *tbl) bool remove = false; write_lock(&n->lock); - if (!(n->nud_state & state) && !(n->flags & flags) && - time_after(tref, n->updated)) + if ((n->nud_state == NUD_FAILED) || + (!(n->flags & flags) && time_after(tref, n->updated))) remove = true; write_unlock(&n->lock); From 7e6f182bec7debb420a2c12ae0ea1813645a7ac4 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 11 Dec 2018 18:57:23 -0700 Subject: [PATCH 3/5] neighbor: Remove state and flags arguments to neigh_del neigh_del now only has 1 caller, and the state and flags arguments are both 0. Remove them and simplify neigh_del. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/core/neighbour.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index acaa1a64150d..bb6f9ca7a3ce 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -153,14 +153,13 @@ static void neigh_update_gc_list(struct neighbour *n) write_unlock_bh(&n->tbl->lock); } -static bool neigh_del(struct neighbour *n, __u8 state, __u8 flags, - struct neighbour __rcu **np, struct neigh_table *tbl) +static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, + struct neigh_table *tbl) { bool retval = false; write_lock(&n->lock); - if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state) && - !(n->flags & flags)) { + if (refcount_read(&n->refcnt) == 1) { struct neighbour *neigh; neigh = rcu_dereference_protected(n->next, @@ -192,7 +191,7 @@ bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl) while ((n = rcu_dereference_protected(*np, lockdep_is_held(&tbl->lock)))) { if (n == ndel) - return neigh_del(n, 0, 0, np, tbl); + return neigh_del(n, np, tbl); np = &n->next; } return false; From 526f1b587cf826d78c3e522428ce6b24a8da0d65 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 11 Dec 2018 18:57:24 -0700 Subject: [PATCH 4/5] neighbor: Move neigh_update_ext_learned to core file neigh_update_ext_learned has one caller in neighbour.c so does not need to be defined in the header. Move it and in the process remove the intialization of ndm_flags and just set it based on the flags check. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/neighbour.h | 18 ------------------ net/core/neighbour.c | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index f886b58956a6..ef0a60448a96 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -549,24 +549,6 @@ static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n, } while (read_seqretry(&n->ha_lock, seq)); } -static inline void neigh_update_ext_learned(struct neighbour *neigh, u32 flags, - int *notify) -{ - u8 ndm_flags = 0; - - if (!(flags & NEIGH_UPDATE_F_ADMIN)) - return; - - ndm_flags |= (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0; - if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) { - if (ndm_flags & NTF_EXT_LEARNED) - neigh->flags |= NTF_EXT_LEARNED; - else - neigh->flags &= ~NTF_EXT_LEARNED; - *notify = 1; - } -} - static inline void neigh_update_is_router(struct neighbour *neigh, u32 flags, int *notify) { diff --git a/net/core/neighbour.c b/net/core/neighbour.c index bb6f9ca7a3ce..2401040f799b 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -153,6 +153,24 @@ static void neigh_update_gc_list(struct neighbour *n) write_unlock_bh(&n->tbl->lock); } +static void neigh_update_ext_learned(struct neighbour *neigh, u32 flags, + int *notify) +{ + u8 ndm_flags; + + if (!(flags & NEIGH_UPDATE_F_ADMIN)) + return; + + ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0; + if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) { + if (ndm_flags & NTF_EXT_LEARNED) + neigh->flags |= NTF_EXT_LEARNED; + else + neigh->flags &= ~NTF_EXT_LEARNED; + *notify = 1; + } +} + static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, struct neigh_table *tbl) { From e997f8a20a57cae16ed0c7a2bff6d3ab75f58123 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 11 Dec 2018 18:57:25 -0700 Subject: [PATCH 5/5] neighbor: Remove externally learned entries from gc_list Externally learned entries are similar to PERMANENT entries in the sense they are managed by userspace and can not be garbage collected. As such remove them from the gc_list, remove the flags check from neigh_forced_gc and skip threshold checks in neigh_alloc. As with PERMANENT entries, this allows unlimited number of NTF_EXT_LEARNED entries. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/core/neighbour.c | 49 +++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 2401040f799b..42b413774370 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -129,21 +129,22 @@ static void neigh_mark_dead(struct neighbour *n) static void neigh_update_gc_list(struct neighbour *n) { - bool on_gc_list, new_is_perm; + bool on_gc_list, exempt_from_gc; write_lock_bh(&n->tbl->lock); write_lock(&n->lock); - /* remove from the gc list if new state is permanent; - * add to the gc list if new state is not permanent + /* remove from the gc list if new state is permanent or if neighbor + * is externally learned; otherwise entry should be on the gc list */ - new_is_perm = n->nud_state & NUD_PERMANENT; + exempt_from_gc = n->nud_state & NUD_PERMANENT || + n->flags & NTF_EXT_LEARNED; on_gc_list = !list_empty(&n->gc_list); - if (new_is_perm && on_gc_list) { + if (exempt_from_gc && on_gc_list) { list_del_init(&n->gc_list); atomic_dec(&n->tbl->gc_entries); - } else if (!new_is_perm && !on_gc_list) { + } else if (!exempt_from_gc && !on_gc_list) { /* add entries to the tail; cleaning removes from the front */ list_add_tail(&n->gc_list, &n->tbl->gc_list); atomic_inc(&n->tbl->gc_entries); @@ -153,13 +154,14 @@ static void neigh_update_gc_list(struct neighbour *n) write_unlock_bh(&n->tbl->lock); } -static void neigh_update_ext_learned(struct neighbour *neigh, u32 flags, +static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags, int *notify) { + bool rc = false; u8 ndm_flags; if (!(flags & NEIGH_UPDATE_F_ADMIN)) - return; + return rc; ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0; if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) { @@ -167,8 +169,11 @@ static void neigh_update_ext_learned(struct neighbour *neigh, u32 flags, neigh->flags |= NTF_EXT_LEARNED; else neigh->flags &= ~NTF_EXT_LEARNED; + rc = true; *notify = 1; } + + return rc; } static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, @@ -219,7 +224,6 @@ static int neigh_forced_gc(struct neigh_table *tbl) { int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2; unsigned long tref = jiffies - 5 * HZ; - u8 flags = NTF_EXT_LEARNED; struct neighbour *n, *tmp; int shrunk = 0; @@ -233,7 +237,7 @@ static int neigh_forced_gc(struct neigh_table *tbl) write_lock(&n->lock); if ((n->nud_state == NUD_FAILED) || - (!(n->flags & flags) && time_after(tref, n->updated))) + time_after(tref, n->updated)) remove = true; write_unlock(&n->lock); @@ -371,13 +375,13 @@ EXPORT_SYMBOL(neigh_ifdown); static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev, - bool permanent) + bool exempt_from_gc) { struct neighbour *n = NULL; unsigned long now = jiffies; int entries; - if (permanent) + if (exempt_from_gc) goto do_alloc; entries = atomic_inc_return(&tbl->gc_entries) - 1; @@ -419,7 +423,7 @@ out: return n; out_entries: - if (!permanent) + if (!exempt_from_gc) atomic_dec(&tbl->gc_entries); goto out; } @@ -566,9 +570,9 @@ EXPORT_SYMBOL(neigh_lookup_nodev); static struct neighbour *___neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev, - bool permanent, bool want_ref) + bool exempt_from_gc, bool want_ref) { - struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, permanent); + struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc); u32 hash_val; unsigned int key_len = tbl->key_len; int error; @@ -634,7 +638,7 @@ static struct neighbour *___neigh_create(struct neigh_table *tbl, } n->dead = 0; - if (!permanent) + if (!exempt_from_gc) list_add_tail(&n->gc_list, &n->tbl->gc_list); if (want_ref) @@ -1210,6 +1214,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, u32 nlmsg_pid, struct netlink_ext_ack *extack) { + bool ext_learn_change = false; u8 old; int err; int notify = 0; @@ -1230,7 +1235,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, goto out; } - neigh_update_ext_learned(neigh, flags, ¬ify); + ext_learn_change = neigh_update_ext_learned(neigh, flags, ¬ify); if (!(new & NUD_VALID)) { neigh_del_timer(neigh); @@ -1376,7 +1381,7 @@ out: neigh_update_is_router(neigh, flags, ¬ify); write_unlock_bh(&neigh->lock); - if ((new ^ old) & NUD_PERMANENT) + if (((new ^ old) & NUD_PERMANENT) || ext_learn_change) neigh_update_gc_list(neigh); if (notify) @@ -1881,14 +1886,16 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, neigh = neigh_lookup(tbl, dst, dev); if (neigh == NULL) { + bool exempt_from_gc; + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { err = -ENOENT; goto out; } - neigh = ___neigh_create(tbl, dst, dev, - ndm->ndm_state & NUD_PERMANENT, - true); + exempt_from_gc = ndm->ndm_state & NUD_PERMANENT || + ndm->ndm_flags & NTF_EXT_LEARNED; + neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true); if (IS_ERR(neigh)) { err = PTR_ERR(neigh); goto out;