mirror of
https://github.com/torvalds/linux.git
synced 2024-12-05 02:23:16 +00:00
netfilter: ipt_CLUSTERIP: fix deadlock in netns exit routine
When network namespace is destroyed, cleanup_net() is called.
cleanup_net() holds pernet_ops_rwsem then calls each ->exit callback.
So that clusterip_tg_destroy() is called by cleanup_net().
And clusterip_tg_destroy() calls unregister_netdevice_notifier().
But both cleanup_net() and clusterip_tg_destroy() hold same
lock(pernet_ops_rwsem). hence deadlock occurrs.
After this patch, only 1 notifier is registered when module is inserted.
And all of configs are added to per-net list.
test commands:
%ip netns add vm1
%ip netns exec vm1 bash
%ip link set lo up
%iptables -A INPUT -p tcp -i lo -d 192.168.0.5 --dport 80 \
-j CLUSTERIP --new --hashmode sourceip \
--clustermac 01:00:5e:00:00:20 --total-nodes 2 --local-node 1
%exit
%ip netns del vm1
splat looks like:
[ 341.809674] ============================================
[ 341.809674] WARNING: possible recursive locking detected
[ 341.809674] 4.19.0-rc5+ #16 Tainted: G W
[ 341.809674] --------------------------------------------
[ 341.809674] kworker/u4:2/87 is trying to acquire lock:
[ 341.809674] 000000005da2d519 (pernet_ops_rwsem){++++}, at: unregister_netdevice_notifier+0x8c/0x460
[ 341.809674]
[ 341.809674] but task is already holding lock:
[ 341.809674] 000000005da2d519 (pernet_ops_rwsem){++++}, at: cleanup_net+0x119/0x900
[ 341.809674]
[ 341.809674] other info that might help us debug this:
[ 341.809674] Possible unsafe locking scenario:
[ 341.809674]
[ 341.809674] CPU0
[ 341.809674] ----
[ 341.809674] lock(pernet_ops_rwsem);
[ 341.809674] lock(pernet_ops_rwsem);
[ 341.809674]
[ 341.809674] *** DEADLOCK ***
[ 341.809674]
[ 341.809674] May be due to missing lock nesting notation
[ 341.809674]
[ 341.809674] 3 locks held by kworker/u4:2/87:
[ 341.809674] #0: 00000000d9df6c92 ((wq_completion)"%s""netns"){+.+.}, at: process_one_work+0xafe/0x1de0
[ 341.809674] #1: 00000000c2cbcee2 (net_cleanup_work){+.+.}, at: process_one_work+0xb60/0x1de0
[ 341.809674] #2: 000000005da2d519 (pernet_ops_rwsem){++++}, at: cleanup_net+0x119/0x900
[ 341.809674]
[ 341.809674] stack backtrace:
[ 341.809674] CPU: 1 PID: 87 Comm: kworker/u4:2 Tainted: G W 4.19.0-rc5+ #16
[ 341.809674] Workqueue: netns cleanup_net
[ 341.809674] Call Trace:
[ ... ]
[ 342.070196] down_write+0x93/0x160
[ 342.070196] ? unregister_netdevice_notifier+0x8c/0x460
[ 342.070196] ? down_read+0x1e0/0x1e0
[ 342.070196] ? sched_clock_cpu+0x126/0x170
[ 342.070196] ? find_held_lock+0x39/0x1c0
[ 342.070196] unregister_netdevice_notifier+0x8c/0x460
[ 342.070196] ? register_netdevice_notifier+0x790/0x790
[ 342.070196] ? __local_bh_enable_ip+0xe9/0x1b0
[ 342.070196] ? __local_bh_enable_ip+0xe9/0x1b0
[ 342.070196] ? clusterip_tg_destroy+0x372/0x650 [ipt_CLUSTERIP]
[ 342.070196] ? trace_hardirqs_on+0x93/0x210
[ 342.070196] ? __bpf_trace_preemptirq_template+0x10/0x10
[ 342.070196] ? clusterip_tg_destroy+0x372/0x650 [ipt_CLUSTERIP]
[ 342.123094] clusterip_tg_destroy+0x3ad/0x650 [ipt_CLUSTERIP]
[ 342.123094] ? clusterip_net_init+0x3d0/0x3d0 [ipt_CLUSTERIP]
[ 342.123094] ? cleanup_match+0x17d/0x200 [ip_tables]
[ 342.123094] ? xt_unregister_table+0x215/0x300 [x_tables]
[ 342.123094] ? kfree+0xe2/0x2a0
[ 342.123094] cleanup_entry+0x1d5/0x2f0 [ip_tables]
[ 342.123094] ? cleanup_match+0x200/0x200 [ip_tables]
[ 342.123094] __ipt_unregister_table+0x9b/0x1a0 [ip_tables]
[ 342.123094] iptable_filter_net_exit+0x43/0x80 [iptable_filter]
[ 342.123094] ops_exit_list.isra.10+0x94/0x140
[ 342.123094] cleanup_net+0x45b/0x900
[ ... ]
Fixes: 202f59afd4
("netfilter: ipt_CLUSTERIP: do not hold dev")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
This commit is contained in:
parent
241faeceb8
commit
5a86d68bcf
@ -57,17 +57,14 @@ struct clusterip_config {
|
|||||||
enum clusterip_hashmode hash_mode; /* which hashing mode */
|
enum clusterip_hashmode hash_mode; /* which hashing mode */
|
||||||
u_int32_t hash_initval; /* hash initialization */
|
u_int32_t hash_initval; /* hash initialization */
|
||||||
struct rcu_head rcu;
|
struct rcu_head rcu;
|
||||||
|
struct net *net; /* netns for pernet list */
|
||||||
char ifname[IFNAMSIZ]; /* device ifname */
|
char ifname[IFNAMSIZ]; /* device ifname */
|
||||||
struct notifier_block notifier; /* refresh c->ifindex in it */
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef CONFIG_PROC_FS
|
#ifdef CONFIG_PROC_FS
|
||||||
static const struct file_operations clusterip_proc_fops;
|
static const struct file_operations clusterip_proc_fops;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static unsigned int clusterip_net_id __read_mostly;
|
|
||||||
|
|
||||||
struct clusterip_net {
|
struct clusterip_net {
|
||||||
struct list_head configs;
|
struct list_head configs;
|
||||||
/* lock protects the configs list */
|
/* lock protects the configs list */
|
||||||
@ -78,16 +75,30 @@ struct clusterip_net {
|
|||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static unsigned int clusterip_net_id __read_mostly;
|
||||||
|
static inline struct clusterip_net *clusterip_pernet(struct net *net)
|
||||||
|
{
|
||||||
|
return net_generic(net, clusterip_net_id);
|
||||||
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
clusterip_config_get(struct clusterip_config *c)
|
clusterip_config_get(struct clusterip_config *c)
|
||||||
{
|
{
|
||||||
refcount_inc(&c->refcount);
|
refcount_inc(&c->refcount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void clusterip_config_rcu_free(struct rcu_head *head)
|
static void clusterip_config_rcu_free(struct rcu_head *head)
|
||||||
{
|
{
|
||||||
kfree(container_of(head, struct clusterip_config, rcu));
|
struct clusterip_config *config;
|
||||||
|
struct net_device *dev;
|
||||||
|
|
||||||
|
config = container_of(head, struct clusterip_config, rcu);
|
||||||
|
dev = dev_get_by_name(config->net, config->ifname);
|
||||||
|
if (dev) {
|
||||||
|
dev_mc_del(dev, config->clustermac);
|
||||||
|
dev_put(dev);
|
||||||
|
}
|
||||||
|
kfree(config);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
@ -101,9 +112,9 @@ clusterip_config_put(struct clusterip_config *c)
|
|||||||
* entry(rule) is removed, remove the config from lists, but don't free it
|
* entry(rule) is removed, remove the config from lists, but don't free it
|
||||||
* yet, since proc-files could still be holding references */
|
* yet, since proc-files could still be holding references */
|
||||||
static inline void
|
static inline void
|
||||||
clusterip_config_entry_put(struct net *net, struct clusterip_config *c)
|
clusterip_config_entry_put(struct clusterip_config *c)
|
||||||
{
|
{
|
||||||
struct clusterip_net *cn = net_generic(net, clusterip_net_id);
|
struct clusterip_net *cn = clusterip_pernet(c->net);
|
||||||
|
|
||||||
local_bh_disable();
|
local_bh_disable();
|
||||||
if (refcount_dec_and_lock(&c->entries, &cn->lock)) {
|
if (refcount_dec_and_lock(&c->entries, &cn->lock)) {
|
||||||
@ -118,8 +129,6 @@ clusterip_config_entry_put(struct net *net, struct clusterip_config *c)
|
|||||||
spin_unlock(&cn->lock);
|
spin_unlock(&cn->lock);
|
||||||
local_bh_enable();
|
local_bh_enable();
|
||||||
|
|
||||||
unregister_netdevice_notifier(&c->notifier);
|
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
local_bh_enable();
|
local_bh_enable();
|
||||||
@ -129,7 +138,7 @@ static struct clusterip_config *
|
|||||||
__clusterip_config_find(struct net *net, __be32 clusterip)
|
__clusterip_config_find(struct net *net, __be32 clusterip)
|
||||||
{
|
{
|
||||||
struct clusterip_config *c;
|
struct clusterip_config *c;
|
||||||
struct clusterip_net *cn = net_generic(net, clusterip_net_id);
|
struct clusterip_net *cn = clusterip_pernet(net);
|
||||||
|
|
||||||
list_for_each_entry_rcu(c, &cn->configs, list) {
|
list_for_each_entry_rcu(c, &cn->configs, list) {
|
||||||
if (c->clusterip == clusterip)
|
if (c->clusterip == clusterip)
|
||||||
@ -181,32 +190,37 @@ clusterip_netdev_event(struct notifier_block *this, unsigned long event,
|
|||||||
void *ptr)
|
void *ptr)
|
||||||
{
|
{
|
||||||
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
|
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
|
||||||
|
struct net *net = dev_net(dev);
|
||||||
|
struct clusterip_net *cn = clusterip_pernet(net);
|
||||||
struct clusterip_config *c;
|
struct clusterip_config *c;
|
||||||
|
|
||||||
c = container_of(this, struct clusterip_config, notifier);
|
spin_lock_bh(&cn->lock);
|
||||||
switch (event) {
|
list_for_each_entry_rcu(c, &cn->configs, list) {
|
||||||
case NETDEV_REGISTER:
|
switch (event) {
|
||||||
if (!strcmp(dev->name, c->ifname)) {
|
case NETDEV_REGISTER:
|
||||||
c->ifindex = dev->ifindex;
|
if (!strcmp(dev->name, c->ifname)) {
|
||||||
dev_mc_add(dev, c->clustermac);
|
c->ifindex = dev->ifindex;
|
||||||
|
dev_mc_add(dev, c->clustermac);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case NETDEV_UNREGISTER:
|
||||||
|
if (dev->ifindex == c->ifindex) {
|
||||||
|
dev_mc_del(dev, c->clustermac);
|
||||||
|
c->ifindex = -1;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case NETDEV_CHANGENAME:
|
||||||
|
if (!strcmp(dev->name, c->ifname)) {
|
||||||
|
c->ifindex = dev->ifindex;
|
||||||
|
dev_mc_add(dev, c->clustermac);
|
||||||
|
} else if (dev->ifindex == c->ifindex) {
|
||||||
|
dev_mc_del(dev, c->clustermac);
|
||||||
|
c->ifindex = -1;
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
case NETDEV_UNREGISTER:
|
|
||||||
if (dev->ifindex == c->ifindex) {
|
|
||||||
dev_mc_del(dev, c->clustermac);
|
|
||||||
c->ifindex = -1;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case NETDEV_CHANGENAME:
|
|
||||||
if (!strcmp(dev->name, c->ifname)) {
|
|
||||||
c->ifindex = dev->ifindex;
|
|
||||||
dev_mc_add(dev, c->clustermac);
|
|
||||||
} else if (dev->ifindex == c->ifindex) {
|
|
||||||
dev_mc_del(dev, c->clustermac);
|
|
||||||
c->ifindex = -1;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
spin_unlock_bh(&cn->lock);
|
||||||
|
|
||||||
return NOTIFY_DONE;
|
return NOTIFY_DONE;
|
||||||
}
|
}
|
||||||
@ -215,30 +229,44 @@ static struct clusterip_config *
|
|||||||
clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
|
clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
|
||||||
__be32 ip, const char *iniface)
|
__be32 ip, const char *iniface)
|
||||||
{
|
{
|
||||||
struct clusterip_net *cn = net_generic(net, clusterip_net_id);
|
struct clusterip_net *cn = clusterip_pernet(net);
|
||||||
struct clusterip_config *c;
|
struct clusterip_config *c;
|
||||||
|
struct net_device *dev;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
|
if (iniface[0] == '\0') {
|
||||||
|
pr_info("Please specify an interface name\n");
|
||||||
|
return ERR_PTR(-EINVAL);
|
||||||
|
}
|
||||||
|
|
||||||
c = kzalloc(sizeof(*c), GFP_ATOMIC);
|
c = kzalloc(sizeof(*c), GFP_ATOMIC);
|
||||||
if (!c)
|
if (!c)
|
||||||
return ERR_PTR(-ENOMEM);
|
return ERR_PTR(-ENOMEM);
|
||||||
|
|
||||||
strcpy(c->ifname, iniface);
|
dev = dev_get_by_name(net, iniface);
|
||||||
c->ifindex = -1;
|
if (!dev) {
|
||||||
c->clusterip = ip;
|
pr_info("no such interface %s\n", iniface);
|
||||||
|
kfree(c);
|
||||||
|
return ERR_PTR(-ENOENT);
|
||||||
|
}
|
||||||
|
c->ifindex = dev->ifindex;
|
||||||
|
strcpy(c->ifname, dev->name);
|
||||||
memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
|
memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
|
||||||
|
dev_mc_add(dev, c->clustermac);
|
||||||
|
dev_put(dev);
|
||||||
|
|
||||||
|
c->clusterip = ip;
|
||||||
c->num_total_nodes = i->num_total_nodes;
|
c->num_total_nodes = i->num_total_nodes;
|
||||||
clusterip_config_init_nodelist(c, i);
|
clusterip_config_init_nodelist(c, i);
|
||||||
c->hash_mode = i->hash_mode;
|
c->hash_mode = i->hash_mode;
|
||||||
c->hash_initval = i->hash_initval;
|
c->hash_initval = i->hash_initval;
|
||||||
|
c->net = net;
|
||||||
refcount_set(&c->refcount, 1);
|
refcount_set(&c->refcount, 1);
|
||||||
|
|
||||||
spin_lock_bh(&cn->lock);
|
spin_lock_bh(&cn->lock);
|
||||||
if (__clusterip_config_find(net, ip)) {
|
if (__clusterip_config_find(net, ip)) {
|
||||||
spin_unlock_bh(&cn->lock);
|
err = -EBUSY;
|
||||||
kfree(c);
|
goto out_config_put;
|
||||||
|
|
||||||
return ERR_PTR(-EBUSY);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
list_add_rcu(&c->list, &cn->configs);
|
list_add_rcu(&c->list, &cn->configs);
|
||||||
@ -260,22 +288,17 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
c->notifier.notifier_call = clusterip_netdev_event;
|
refcount_set(&c->entries, 1);
|
||||||
err = register_netdevice_notifier(&c->notifier);
|
return c;
|
||||||
if (!err) {
|
|
||||||
refcount_set(&c->entries, 1);
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_PROC_FS
|
#ifdef CONFIG_PROC_FS
|
||||||
proc_remove(c->pde);
|
|
||||||
err:
|
err:
|
||||||
#endif
|
#endif
|
||||||
spin_lock_bh(&cn->lock);
|
spin_lock_bh(&cn->lock);
|
||||||
list_del_rcu(&c->list);
|
list_del_rcu(&c->list);
|
||||||
|
out_config_put:
|
||||||
spin_unlock_bh(&cn->lock);
|
spin_unlock_bh(&cn->lock);
|
||||||
clusterip_config_put(c);
|
clusterip_config_put(c);
|
||||||
|
|
||||||
return ERR_PTR(err);
|
return ERR_PTR(err);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -475,21 +498,6 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
|
|||||||
&e->ip.dst.s_addr);
|
&e->ip.dst.s_addr);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
} else {
|
} else {
|
||||||
struct net_device *dev;
|
|
||||||
|
|
||||||
if (e->ip.iniface[0] == '\0') {
|
|
||||||
pr_info("Please specify an interface name\n");
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
dev = dev_get_by_name(par->net, e->ip.iniface);
|
|
||||||
if (!dev) {
|
|
||||||
pr_info("no such interface %s\n",
|
|
||||||
e->ip.iniface);
|
|
||||||
return -ENOENT;
|
|
||||||
}
|
|
||||||
dev_put(dev);
|
|
||||||
|
|
||||||
config = clusterip_config_init(par->net, cipinfo,
|
config = clusterip_config_init(par->net, cipinfo,
|
||||||
e->ip.dst.s_addr,
|
e->ip.dst.s_addr,
|
||||||
e->ip.iniface);
|
e->ip.iniface);
|
||||||
@ -502,7 +510,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
|
|||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
pr_info("cannot load conntrack support for proto=%u\n",
|
pr_info("cannot load conntrack support for proto=%u\n",
|
||||||
par->family);
|
par->family);
|
||||||
clusterip_config_entry_put(par->net, config);
|
clusterip_config_entry_put(config);
|
||||||
clusterip_config_put(config);
|
clusterip_config_put(config);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -524,7 +532,7 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
|
|||||||
|
|
||||||
/* if no more entries are referencing the config, remove it
|
/* if no more entries are referencing the config, remove it
|
||||||
* from the list and destroy the proc entry */
|
* from the list and destroy the proc entry */
|
||||||
clusterip_config_entry_put(par->net, cipinfo->config);
|
clusterip_config_entry_put(cipinfo->config);
|
||||||
|
|
||||||
clusterip_config_put(cipinfo->config);
|
clusterip_config_put(cipinfo->config);
|
||||||
|
|
||||||
@ -806,7 +814,7 @@ static const struct file_operations clusterip_proc_fops = {
|
|||||||
|
|
||||||
static int clusterip_net_init(struct net *net)
|
static int clusterip_net_init(struct net *net)
|
||||||
{
|
{
|
||||||
struct clusterip_net *cn = net_generic(net, clusterip_net_id);
|
struct clusterip_net *cn = clusterip_pernet(net);
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
INIT_LIST_HEAD(&cn->configs);
|
INIT_LIST_HEAD(&cn->configs);
|
||||||
@ -831,7 +839,7 @@ static int clusterip_net_init(struct net *net)
|
|||||||
|
|
||||||
static void clusterip_net_exit(struct net *net)
|
static void clusterip_net_exit(struct net *net)
|
||||||
{
|
{
|
||||||
struct clusterip_net *cn = net_generic(net, clusterip_net_id);
|
struct clusterip_net *cn = clusterip_pernet(net);
|
||||||
#ifdef CONFIG_PROC_FS
|
#ifdef CONFIG_PROC_FS
|
||||||
proc_remove(cn->procdir);
|
proc_remove(cn->procdir);
|
||||||
cn->procdir = NULL;
|
cn->procdir = NULL;
|
||||||
@ -847,6 +855,10 @@ static struct pernet_operations clusterip_net_ops = {
|
|||||||
.size = sizeof(struct clusterip_net),
|
.size = sizeof(struct clusterip_net),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct notifier_block cip_netdev_notifier = {
|
||||||
|
.notifier_call = clusterip_netdev_event
|
||||||
|
};
|
||||||
|
|
||||||
static int __init clusterip_tg_init(void)
|
static int __init clusterip_tg_init(void)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
@ -859,11 +871,17 @@ static int __init clusterip_tg_init(void)
|
|||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto cleanup_subsys;
|
goto cleanup_subsys;
|
||||||
|
|
||||||
|
ret = register_netdevice_notifier(&cip_netdev_notifier);
|
||||||
|
if (ret < 0)
|
||||||
|
goto unregister_target;
|
||||||
|
|
||||||
pr_info("ClusterIP Version %s loaded successfully\n",
|
pr_info("ClusterIP Version %s loaded successfully\n",
|
||||||
CLUSTERIP_VERSION);
|
CLUSTERIP_VERSION);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
unregister_target:
|
||||||
|
xt_unregister_target(&clusterip_tg_reg);
|
||||||
cleanup_subsys:
|
cleanup_subsys:
|
||||||
unregister_pernet_subsys(&clusterip_net_ops);
|
unregister_pernet_subsys(&clusterip_net_ops);
|
||||||
return ret;
|
return ret;
|
||||||
@ -873,6 +891,7 @@ static void __exit clusterip_tg_exit(void)
|
|||||||
{
|
{
|
||||||
pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
|
pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
|
||||||
|
|
||||||
|
unregister_netdevice_notifier(&cip_netdev_notifier);
|
||||||
xt_unregister_target(&clusterip_tg_reg);
|
xt_unregister_target(&clusterip_tg_reg);
|
||||||
unregister_pernet_subsys(&clusterip_net_ops);
|
unregister_pernet_subsys(&clusterip_net_ops);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user