Merge branch 'net-speedup-netns-dismantles'

Eric Dumazet says:

====================
net: speedup netns dismantles

From: Eric Dumazet <edumazet@google.com>

In this series, I made network namespace deletions more scalable,
by 4x on the little benchmark described in this cover letter.

- Remove bottleneck on ipv6 addrconf, by replacing a global
  hash table to a per netns one.

- Rework many (struct pernet_operations)->exit() handlers to
  exit_batch() ones. This removes many rtnl acquisitions,
  and gives to cleanup_net() kind of a priority over rtnl
  ownership.

Tested on a host with 24 cpus (48 HT)

Test script:

for nr in {1..10}
do
  (for i in {1..10000}; do unshare -n /bin/bash -c "ifconfig lo up"; done) &
done
wait

for i in {1..10}
do
  sleep 1
  echo 3 >/proc/sys/vm/drop_caches
  grep net_namespace /proc/slabinfo
done

Before: We can see host struggles to clean the netns, even after there are no new creations.
Memory cost is high, because each netns consumes a good amount of memory.

time ./unshare10.sh
net_namespace      82634  82634   3968    1    1 : tunables   24   12    8 : slabdata  82634  82634      0
net_namespace      82634  82634   3968    1    1 : tunables   24   12    8 : slabdata  82634  82634      0
net_namespace      82634  82634   3968    1    1 : tunables   24   12    8 : slabdata  82634  82634      0
net_namespace      82634  82634   3968    1    1 : tunables   24   12    8 : slabdata  82634  82634      0
net_namespace      82634  82634   3968    1    1 : tunables   24   12    8 : slabdata  82634  82634      0
net_namespace      82634  82634   3968    1    1 : tunables   24   12    8 : slabdata  82634  82634      0
net_namespace      82634  82634   3968    1    1 : tunables   24   12    8 : slabdata  82634  82634      0
net_namespace      82634  82634   3968    1    1 : tunables   24   12    8 : slabdata  82634  82634      0
net_namespace      82634  82634   3968    1    1 : tunables   24   12    8 : slabdata  82634  82634      0
net_namespace      37214  37792   3968    1    1 : tunables   24   12    8 : slabdata  37214  37792    192

real	6m57.766s
user	3m37.277s
sys	40m4.826s

After: We can see the script completes much faster,
the kernel thread doing the cleanup_net() keeps up just fine.
Memory cost is not too big.

time ./unshare10.sh
net_namespace       9945   9945   4096    1    1 : tunables   24   12    8 : slabdata   9945   9945      0
net_namespace       4087   4665   4096    1    1 : tunables   24   12    8 : slabdata   4087   4665    192
net_namespace       4082   4607   4096    1    1 : tunables   24   12    8 : slabdata   4082   4607    192
net_namespace        234    761   4096    1    1 : tunables   24   12    8 : slabdata    234    761    192
net_namespace        224    751   4096    1    1 : tunables   24   12    8 : slabdata    224    751    192
net_namespace        218    745   4096    1    1 : tunables   24   12    8 : slabdata    218    745    192
net_namespace        193    667   4096    1    1 : tunables   24   12    8 : slabdata    193    667    172
net_namespace        167    609   4096    1    1 : tunables   24   12    8 : slabdata    167    609    152
net_namespace        167    609   4096    1    1 : tunables   24   12    8 : slabdata    167    609    152
net_namespace        157    609   4096    1    1 : tunables   24   12    8 : slabdata    157    609    152

real    1m43.876s
user    3m39.728s
sys 7m36.342s
====================

Link: https://lore.kernel.org/r/20220208045038.2635826-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2022-02-08 20:41:46 -08:00
commit 4caaf75888
11 changed files with 172 additions and 113 deletions

View File

@ -6048,27 +6048,38 @@ static int __net_init bond_net_init(struct net *net)
return 0;
}
static void __net_exit bond_net_exit(struct net *net)
static void __net_exit bond_net_exit_batch(struct list_head *net_list)
{
struct bond_net *bn = net_generic(net, bond_net_id);
struct bonding *bond, *tmp_bond;
struct bond_net *bn;
struct net *net;
LIST_HEAD(list);
bond_destroy_sysfs(bn);
list_for_each_entry(net, net_list, exit_list) {
bn = net_generic(net, bond_net_id);
bond_destroy_sysfs(bn);
}
/* Kill off any bonds created after unregistering bond rtnl ops */
rtnl_lock();
list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list)
unregister_netdevice_queue(bond->dev, &list);
list_for_each_entry(net, net_list, exit_list) {
struct bonding *bond, *tmp_bond;
bn = net_generic(net, bond_net_id);
list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list)
unregister_netdevice_queue(bond->dev, &list);
}
unregister_netdevice_many(&list);
rtnl_unlock();
bond_destroy_proc_dir(bn);
list_for_each_entry(net, net_list, exit_list) {
bn = net_generic(net, bond_net_id);
bond_destroy_proc_dir(bn);
}
}
static struct pernet_operations bond_net_ops = {
.init = bond_net_init,
.exit = bond_net_exit,
.exit_batch = bond_net_exit_batch,
.id = &bond_net_id,
.size = sizeof(struct bond_net),
};

View File

@ -307,7 +307,6 @@ void __net_init bond_create_proc_dir(struct bond_net *bn)
}
/* Destroy the bonding directory under /proc/net, if empty.
* Caller must hold rtnl_lock.
*/
void __net_exit bond_destroy_proc_dir(struct bond_net *bn)
{

View File

@ -92,6 +92,11 @@ struct netns_ipv6 {
struct sock *tcp_sk;
struct sock *igmp_sk;
struct sock *mc_autojoin_sk;
struct hlist_head *inet6_addr_lst;
spinlock_t addrconf_hash_lock;
struct delayed_work addr_chk_work;
#ifdef CONFIG_IPV6_MROUTE
#ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
struct mr_table *mrt6;

View File

@ -1239,16 +1239,19 @@ static int __net_init cangw_pernet_init(struct net *net)
return 0;
}
static void __net_exit cangw_pernet_exit(struct net *net)
static void __net_exit cangw_pernet_exit_batch(struct list_head *net_list)
{
struct net *net;
rtnl_lock();
cgw_remove_all_jobs(net);
list_for_each_entry(net, net_list, exit_list)
cgw_remove_all_jobs(net);
rtnl_unlock();
}
static struct pernet_operations cangw_pernet_ops = {
.init = cangw_pernet_init,
.exit = cangw_pernet_exit,
.exit_batch = cangw_pernet_exit_batch,
};
static __init int cgw_module_init(void)

View File

@ -10850,14 +10850,14 @@ static struct pernet_operations __net_initdata netdev_net_ops = {
.exit = netdev_exit,
};
static void __net_exit default_device_exit(struct net *net)
static void __net_exit default_device_exit_net(struct net *net)
{
struct net_device *dev, *aux;
/*
* Push all migratable network devices back to the
* initial network namespace
*/
rtnl_lock();
ASSERT_RTNL();
for_each_netdev_safe(net, dev, aux) {
int err;
char fb_name[IFNAMSIZ];
@ -10881,22 +10881,22 @@ static void __net_exit default_device_exit(struct net *net)
BUG();
}
}
rtnl_unlock();
}
static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
{
/* Return with the rtnl_lock held when there are no network
/* Return (with the rtnl_lock held) when there are no network
* devices unregistering in any network namespace in net_list.
*/
struct net *net;
bool unregistering;
DEFINE_WAIT_FUNC(wait, woken_wake_function);
bool unregistering;
struct net *net;
ASSERT_RTNL();
add_wait_queue(&netdev_unregistering_wq, &wait);
for (;;) {
unregistering = false;
rtnl_lock();
list_for_each_entry(net, net_list, exit_list) {
if (net->dev_unreg_count > 0) {
unregistering = true;
@ -10908,6 +10908,7 @@ static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
__rtnl_unlock();
wait_woken(&wait, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
rtnl_lock();
}
remove_wait_queue(&netdev_unregistering_wq, &wait);
}
@ -10923,6 +10924,11 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
struct net *net;
LIST_HEAD(dev_kill_list);
rtnl_lock();
list_for_each_entry(net, net_list, exit_list) {
default_device_exit_net(net);
cond_resched();
}
/* To prevent network device cleanup code from dereferencing
* loopback devices or network devices that have been freed
* wait here for all pending unregistrations to complete,
@ -10935,6 +10941,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
* default_device_exit_batch.
*/
rtnl_lock_unregistering(net_list);
list_for_each_entry(net, net_list, exit_list) {
for_each_netdev_reverse(net, dev) {
if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink)
@ -10948,7 +10955,6 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
}
static struct pernet_operations __net_initdata default_device_ops = {
.exit = default_device_exit,
.exit_batch = default_device_exit_batch,
};

View File

@ -1556,7 +1556,7 @@ static void ip_fib_net_exit(struct net *net)
{
int i;
rtnl_lock();
ASSERT_RTNL();
#ifdef CONFIG_IP_MULTIPLE_TABLES
RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
@ -1581,7 +1581,7 @@ static void ip_fib_net_exit(struct net *net)
#ifdef CONFIG_IP_MULTIPLE_TABLES
fib4_rules_exit(net);
#endif
rtnl_unlock();
kfree(net->ipv4.fib_table_hash);
fib4_notifier_exit(net);
}
@ -1608,7 +1608,9 @@ out:
out_proc:
nl_fib_lookup_exit(net);
out_nlfl:
rtnl_lock();
ip_fib_net_exit(net);
rtnl_unlock();
goto out;
}
@ -1616,12 +1618,23 @@ static void __net_exit fib_net_exit(struct net *net)
{
fib_proc_exit(net);
nl_fib_lookup_exit(net);
ip_fib_net_exit(net);
}
static void __net_exit fib_net_exit_batch(struct list_head *net_list)
{
struct net *net;
rtnl_lock();
list_for_each_entry(net, net_list, exit_list)
ip_fib_net_exit(net);
rtnl_unlock();
}
static struct pernet_operations fib_net_ops = {
.init = fib_net_init,
.exit = fib_net_exit,
.exit_batch = fib_net_exit_batch,
};
void __init ip_fib_init(void)

View File

@ -266,13 +266,12 @@ static void __net_exit ipmr_rules_exit(struct net *net)
{
struct mr_table *mrt, *next;
rtnl_lock();
ASSERT_RTNL();
list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
list_del(&mrt->list);
ipmr_free_table(mrt);
}
fib_rules_unregister(net->ipv4.mr_rules_ops);
rtnl_unlock();
}
static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
@ -328,10 +327,9 @@ static int __net_init ipmr_rules_init(struct net *net)
static void __net_exit ipmr_rules_exit(struct net *net)
{
rtnl_lock();
ASSERT_RTNL();
ipmr_free_table(net->ipv4.mrt);
net->ipv4.mrt = NULL;
rtnl_unlock();
}
static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
@ -3075,7 +3073,9 @@ static int __net_init ipmr_net_init(struct net *net)
proc_cache_fail:
remove_proc_entry("ip_mr_vif", net->proc_net);
proc_vif_fail:
rtnl_lock();
ipmr_rules_exit(net);
rtnl_unlock();
#endif
ipmr_rules_fail:
ipmr_notifier_exit(net);
@ -3090,12 +3090,22 @@ static void __net_exit ipmr_net_exit(struct net *net)
remove_proc_entry("ip_mr_vif", net->proc_net);
#endif
ipmr_notifier_exit(net);
ipmr_rules_exit(net);
}
static void __net_exit ipmr_net_exit_batch(struct list_head *net_list)
{
struct net *net;
rtnl_lock();
list_for_each_entry(net, net_list, exit_list)
ipmr_rules_exit(net);
rtnl_unlock();
}
static struct pernet_operations ipmr_net_ops = {
.init = ipmr_net_init,
.exit = ipmr_net_exit,
.exit_batch = ipmr_net_exit_batch,
};
int __init ip_mr_init(void)

View File

@ -3733,12 +3733,16 @@ out:
}
EXPORT_SYMBOL(nexthop_res_grp_activity_update);
static void __net_exit nexthop_net_exit(struct net *net)
static void __net_exit nexthop_net_exit_batch(struct list_head *net_list)
{
struct net *net;
rtnl_lock();
flush_all_nexthops(net);
list_for_each_entry(net, net_list, exit_list) {
flush_all_nexthops(net);
kfree(net->nexthop.devhash);
}
rtnl_unlock();
kfree(net->nexthop.devhash);
}
static int __net_init nexthop_net_init(struct net *net)
@ -3756,7 +3760,7 @@ static int __net_init nexthop_net_init(struct net *net)
static struct pernet_operations nexthop_net_ops = {
.init = nexthop_net_init,
.exit = nexthop_net_exit,
.exit_batch = nexthop_net_exit_batch,
};
static int __init nexthop_init(void)

View File

@ -146,18 +146,11 @@ static int ipv6_generate_stable_address(struct in6_addr *addr,
#define IN6_ADDR_HSIZE_SHIFT 8
#define IN6_ADDR_HSIZE (1 << IN6_ADDR_HSIZE_SHIFT)
/*
* Configured unicast address hash table
*/
static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE];
static DEFINE_SPINLOCK(addrconf_hash_lock);
static void addrconf_verify(void);
static void addrconf_verify_rtnl(void);
static void addrconf_verify_work(struct work_struct *);
static void addrconf_verify(struct net *net);
static void addrconf_verify_rtnl(struct net *net);
static struct workqueue_struct *addrconf_wq;
static DECLARE_DELAYED_WORK(addr_chk_work, addrconf_verify_work);
static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
@ -1011,9 +1004,7 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
{
struct inet6_ifaddr *ifp;
hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
hlist_for_each_entry(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
if (ipv6_addr_equal(&ifp->addr, addr)) {
if (!dev || ifp->idev->dev == dev)
return true;
@ -1024,20 +1015,21 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa)
{
unsigned int hash = inet6_addr_hash(dev_net(dev), &ifa->addr);
struct net *net = dev_net(dev);
unsigned int hash = inet6_addr_hash(net, &ifa->addr);
int err = 0;
spin_lock(&addrconf_hash_lock);
spin_lock(&net->ipv6.addrconf_hash_lock);
/* Ignore adding duplicate addresses on an interface */
if (ipv6_chk_same_addr(dev_net(dev), &ifa->addr, dev, hash)) {
if (ipv6_chk_same_addr(net, &ifa->addr, dev, hash)) {
netdev_dbg(dev, "ipv6_add_addr: already assigned\n");
err = -EEXIST;
} else {
hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
hlist_add_head_rcu(&ifa->addr_lst, &net->ipv6.inet6_addr_lst[hash]);
}
spin_unlock(&addrconf_hash_lock);
spin_unlock(&net->ipv6.addrconf_hash_lock);
return err;
}
@ -1261,9 +1253,10 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
static void ipv6_del_addr(struct inet6_ifaddr *ifp)
{
int state;
enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_NOP;
struct net *net = dev_net(ifp->idev->dev);
unsigned long expires;
int state;
ASSERT_RTNL();
@ -1275,9 +1268,9 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
if (state == INET6_IFADDR_STATE_DEAD)
goto out;
spin_lock_bh(&addrconf_hash_lock);
spin_lock_bh(&net->ipv6.addrconf_hash_lock);
hlist_del_init_rcu(&ifp->addr_lst);
spin_unlock_bh(&addrconf_hash_lock);
spin_unlock_bh(&net->ipv6.addrconf_hash_lock);
write_lock_bh(&ifp->idev->lock);
@ -1920,10 +1913,8 @@ __ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
if (skip_dev_check)
dev = NULL;
hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
ndev = ifp->idev->dev;
if (!net_eq(dev_net(ndev), net))
continue;
if (l3mdev_master_dev_rcu(ndev) != l3mdev)
continue;
@ -2027,9 +2018,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
struct inet6_ifaddr *ifp, *result = NULL;
rcu_read_lock();
hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
if (ipv6_addr_equal(&ifp->addr, addr)) {
if (!dev || ifp->idev->dev == dev ||
!(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
@ -2096,7 +2085,7 @@ static int addrconf_dad_end(struct inet6_ifaddr *ifp)
void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp)
{
struct inet6_dev *idev = ifp->idev;
struct net *net = dev_net(ifp->idev->dev);
struct net *net = dev_net(idev->dev);
if (addrconf_dad_end(ifp)) {
in6_ifa_put(ifp);
@ -2675,7 +2664,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
create, now);
in6_ifa_put(ifp);
addrconf_verify();
addrconf_verify(net);
}
return 0;
@ -2987,7 +2976,7 @@ static int inet6_addr_add(struct net *net, int ifindex,
manage_tempaddrs(idev, ifp, cfg->valid_lft,
cfg->preferred_lft, true, jiffies);
in6_ifa_put(ifp);
addrconf_verify_rtnl();
addrconf_verify_rtnl(net);
return 0;
} else if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) {
ipv6_mc_config(net->ipv6.mc_autojoin_sk, false,
@ -3027,7 +3016,7 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
manage_tempaddrs(idev, ifp, 0, 0, false,
jiffies);
ipv6_del_addr(ifp);
addrconf_verify_rtnl();
addrconf_verify_rtnl(net);
if (ipv6_addr_is_multicast(pfx)) {
ipv6_mc_config(net->ipv6.mc_autojoin_sk,
false, pfx, dev->ifindex);
@ -3772,9 +3761,9 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister)
/* Step 2: clear hash table */
for (i = 0; i < IN6_ADDR_HSIZE; i++) {
struct hlist_head *h = &inet6_addr_lst[i];
struct hlist_head *h = &net->ipv6.inet6_addr_lst[i];
spin_lock_bh(&addrconf_hash_lock);
spin_lock_bh(&net->ipv6.addrconf_hash_lock);
restart:
hlist_for_each_entry_rcu(ifa, h, addr_lst) {
if (ifa->idev == idev) {
@ -3790,7 +3779,7 @@ restart:
}
}
}
spin_unlock_bh(&addrconf_hash_lock);
spin_unlock_bh(&net->ipv6.addrconf_hash_lock);
}
write_lock_bh(&idev->lock);
@ -4246,7 +4235,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
* before this temporary address becomes deprecated.
*/
if (ifp->flags & IFA_F_TEMPORARY)
addrconf_verify_rtnl();
addrconf_verify_rtnl(dev_net(dev));
}
static void addrconf_dad_run(struct inet6_dev *idev, bool restart)
@ -4288,10 +4277,8 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
}
for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
hlist_for_each_entry_rcu(ifa, &inet6_addr_lst[state->bucket],
hlist_for_each_entry_rcu(ifa, &net->ipv6.inet6_addr_lst[state->bucket],
addr_lst) {
if (!net_eq(dev_net(ifa->idev->dev), net))
continue;
/* sync with offset */
if (p < state->offset) {
p++;
@ -4314,8 +4301,6 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
struct net *net = seq_file_net(seq);
hlist_for_each_entry_continue_rcu(ifa, addr_lst) {
if (!net_eq(dev_net(ifa->idev->dev), net))
continue;
state->offset++;
return ifa;
}
@ -4323,9 +4308,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
state->offset = 0;
while (++state->bucket < IN6_ADDR_HSIZE) {
hlist_for_each_entry_rcu(ifa,
&inet6_addr_lst[state->bucket], addr_lst) {
if (!net_eq(dev_net(ifa->idev->dev), net))
continue;
&net->ipv6.inet6_addr_lst[state->bucket], addr_lst) {
return ifa;
}
}
@ -4413,9 +4396,7 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
int ret = 0;
rcu_read_lock();
hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
if (ipv6_addr_equal(&ifp->addr, addr) &&
(ifp->flags & IFA_F_HOMEADDRESS)) {
ret = 1;
@ -4453,9 +4434,7 @@ int ipv6_chk_rpl_srh_loop(struct net *net, const struct in6_addr *segs,
hash = inet6_addr_hash(net, addr);
hash_found = false;
hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
if (ipv6_addr_equal(&ifp->addr, addr)) {
hash_found = true;
@ -4484,7 +4463,7 @@ int ipv6_chk_rpl_srh_loop(struct net *net, const struct in6_addr *segs,
* Periodic address status verification
*/
static void addrconf_verify_rtnl(void)
static void addrconf_verify_rtnl(struct net *net)
{
unsigned long now, next, next_sec, next_sched;
struct inet6_ifaddr *ifp;
@ -4496,11 +4475,11 @@ static void addrconf_verify_rtnl(void)
now = jiffies;
next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
cancel_delayed_work(&addr_chk_work);
cancel_delayed_work(&net->ipv6.addr_chk_work);
for (i = 0; i < IN6_ADDR_HSIZE; i++) {
restart:
hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[i], addr_lst) {
hlist_for_each_entry_rcu_bh(ifp, &net->ipv6.inet6_addr_lst[i], addr_lst) {
unsigned long age;
/* When setting preferred_lft to a value not zero or
@ -4599,20 +4578,23 @@ restart:
pr_debug("now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
now, next, next_sec, next_sched);
mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now);
mod_delayed_work(addrconf_wq, &net->ipv6.addr_chk_work, next_sched - now);
rcu_read_unlock_bh();
}
static void addrconf_verify_work(struct work_struct *w)
{
struct net *net = container_of(to_delayed_work(w), struct net,
ipv6.addr_chk_work);
rtnl_lock();
addrconf_verify_rtnl();
addrconf_verify_rtnl(net);
rtnl_unlock();
}
static void addrconf_verify(void)
static void addrconf_verify(struct net *net)
{
mod_delayed_work(addrconf_wq, &addr_chk_work, 0);
mod_delayed_work(addrconf_wq, &net->ipv6.addr_chk_work, 0);
}
static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local,
@ -4708,7 +4690,8 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
return 0;
}
static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
struct ifa6_config *cfg)
{
u32 flags;
clock_t expires;
@ -4822,7 +4805,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
jiffies);
}
addrconf_verify_rtnl();
addrconf_verify_rtnl(net);
return 0;
}
@ -4909,7 +4892,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
!(nlh->nlmsg_flags & NLM_F_REPLACE))
err = -EEXIST;
else
err = inet6_addr_modify(ifa, &cfg);
err = inet6_addr_modify(net, ifa, &cfg);
in6_ifa_put(ifa);
@ -5794,7 +5777,7 @@ update_lft:
write_unlock_bh(&idev->lock);
inet6_ifinfo_notify(RTM_NEWLINK, idev);
addrconf_verify_rtnl();
addrconf_verify_rtnl(dev_net(dev));
return 0;
}
@ -7111,6 +7094,14 @@ static int __net_init addrconf_init_net(struct net *net)
int err = -ENOMEM;
struct ipv6_devconf *all, *dflt;
spin_lock_init(&net->ipv6.addrconf_hash_lock);
INIT_DEFERRABLE_WORK(&net->ipv6.addr_chk_work, addrconf_verify_work);
net->ipv6.inet6_addr_lst = kcalloc(IN6_ADDR_HSIZE,
sizeof(struct hlist_head),
GFP_KERNEL);
if (!net->ipv6.inet6_addr_lst)
goto err_alloc_addr;
all = kmemdup(&ipv6_devconf, sizeof(ipv6_devconf), GFP_KERNEL);
if (!all)
goto err_alloc_all;
@ -7172,11 +7163,15 @@ err_reg_all:
err_alloc_dflt:
kfree(all);
err_alloc_all:
kfree(net->ipv6.inet6_addr_lst);
err_alloc_addr:
return err;
}
static void __net_exit addrconf_exit_net(struct net *net)
{
int i;
#ifdef CONFIG_SYSCTL
__addrconf_sysctl_unregister(net, net->ipv6.devconf_dflt,
NETCONFA_IFINDEX_DEFAULT);
@ -7187,6 +7182,16 @@ static void __net_exit addrconf_exit_net(struct net *net)
net->ipv6.devconf_dflt = NULL;
kfree(net->ipv6.devconf_all);
net->ipv6.devconf_all = NULL;
cancel_delayed_work(&net->ipv6.addr_chk_work);
/*
* Check hash table, then free it.
*/
for (i = 0; i < IN6_ADDR_HSIZE; i++)
WARN_ON_ONCE(!hlist_empty(&net->ipv6.inet6_addr_lst[i]));
kfree(net->ipv6.inet6_addr_lst);
net->ipv6.inet6_addr_lst = NULL;
}
static struct pernet_operations addrconf_ops = {
@ -7209,7 +7214,7 @@ static struct rtnl_af_ops inet6_ops __read_mostly = {
int __init addrconf_init(void)
{
struct inet6_dev *idev;
int i, err;
int err;
err = ipv6_addr_label_init();
if (err < 0) {
@ -7256,12 +7261,9 @@ int __init addrconf_init(void)
ip6_route_init_special_entries();
for (i = 0; i < IN6_ADDR_HSIZE; i++)
INIT_HLIST_HEAD(&inet6_addr_lst[i]);
register_netdevice_notifier(&ipv6_dev_notf);
addrconf_verify();
addrconf_verify(&init_net);
rtnl_af_register(&inet6_ops);
@ -7319,7 +7321,6 @@ out:
void addrconf_cleanup(void)
{
struct net_device *dev;
int i;
unregister_netdevice_notifier(&ipv6_dev_notf);
unregister_pernet_subsys(&addrconf_ops);
@ -7337,14 +7338,6 @@ void addrconf_cleanup(void)
}
addrconf_ifdown(init_net.loopback_dev, true);
/*
* Check hash table.
*/
spin_lock_bh(&addrconf_hash_lock);
for (i = 0; i < IN6_ADDR_HSIZE; i++)
WARN_ON(!hlist_empty(&inet6_addr_lst[i]));
spin_unlock_bh(&addrconf_hash_lock);
cancel_delayed_work(&addr_chk_work);
rtnl_unlock();
destroy_workqueue(addrconf_wq);

View File

@ -493,16 +493,21 @@ out_fib6_rules_ops:
goto out;
}
static void __net_exit fib6_rules_net_exit(struct net *net)
static void __net_exit fib6_rules_net_exit_batch(struct list_head *net_list)
{
struct net *net;
rtnl_lock();
fib_rules_unregister(net->ipv6.fib6_rules_ops);
list_for_each_entry(net, net_list, exit_list) {
fib_rules_unregister(net->ipv6.fib6_rules_ops);
cond_resched();
}
rtnl_unlock();
}
static struct pernet_operations fib6_rules_net_ops = {
.init = fib6_rules_net_init,
.exit = fib6_rules_net_exit,
.exit_batch = fib6_rules_net_exit_batch,
};
int __init fib6_rules_init(void)

View File

@ -253,13 +253,12 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
{
struct mr_table *mrt, *next;
rtnl_lock();
ASSERT_RTNL();
list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
list_del(&mrt->list);
ip6mr_free_table(mrt);
}
fib_rules_unregister(net->ipv6.mr6_rules_ops);
rtnl_unlock();
}
static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
@ -316,10 +315,9 @@ static int __net_init ip6mr_rules_init(struct net *net)
static void __net_exit ip6mr_rules_exit(struct net *net)
{
rtnl_lock();
ASSERT_RTNL();
ip6mr_free_table(net->ipv6.mrt6);
net->ipv6.mrt6 = NULL;
rtnl_unlock();
}
static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
@ -1323,7 +1321,9 @@ static int __net_init ip6mr_net_init(struct net *net)
proc_cache_fail:
remove_proc_entry("ip6_mr_vif", net->proc_net);
proc_vif_fail:
rtnl_lock();
ip6mr_rules_exit(net);
rtnl_unlock();
#endif
ip6mr_rules_fail:
ip6mr_notifier_exit(net);
@ -1336,13 +1336,23 @@ static void __net_exit ip6mr_net_exit(struct net *net)
remove_proc_entry("ip6_mr_cache", net->proc_net);
remove_proc_entry("ip6_mr_vif", net->proc_net);
#endif
ip6mr_rules_exit(net);
ip6mr_notifier_exit(net);
}
static void __net_exit ip6mr_net_exit_batch(struct list_head *net_list)
{
struct net *net;
rtnl_lock();
list_for_each_entry(net, net_list, exit_list)
ip6mr_rules_exit(net);
rtnl_unlock();
}
static struct pernet_operations ip6mr_net_ops = {
.init = ip6mr_net_init,
.exit = ip6mr_net_exit,
.exit_batch = ip6mr_net_exit_batch,
};
int __init ip6_mr_init(void)