cgroup: fix spurious warnings on cgroup_is_dead() from cgroup_sk_alloc()

cgroup_get() expected to be called only on live cgroups and triggers
warning on a dead cgroup; however, cgroup_sk_alloc() may be called
while cloning a socket which is left in an empty and removed cgroup
and thus may legitimately duplicate its reference on a dead cgroup.
This currently triggers the following warning spuriously.

 WARNING: CPU: 14 PID: 0 at kernel/cgroup.c:490 cgroup_get+0x55/0x60
 ...
  [<ffffffff8107e123>] __warn+0xd3/0xf0
  [<ffffffff8107e20e>] warn_slowpath_null+0x1e/0x20
  [<ffffffff810ff465>] cgroup_get+0x55/0x60
  [<ffffffff81106061>] cgroup_sk_alloc+0x51/0xe0
  [<ffffffff81761beb>] sk_clone_lock+0x2db/0x390
  [<ffffffff817cce06>] inet_csk_clone_lock+0x16/0xc0
  [<ffffffff817e8173>] tcp_create_openreq_child+0x23/0x4b0
  [<ffffffff818601a1>] tcp_v6_syn_recv_sock+0x91/0x670
  [<ffffffff817e8b16>] tcp_check_req+0x3a6/0x4e0
  [<ffffffff81861ba3>] tcp_v6_rcv+0x693/0xa00
  [<ffffffff81837429>] ip6_input_finish+0x59/0x3e0
  [<ffffffff81837cb2>] ip6_input+0x32/0xb0
  [<ffffffff81837387>] ip6_rcv_finish+0x57/0xa0
  [<ffffffff81837ac8>] ipv6_rcv+0x318/0x4d0
  [<ffffffff817778c7>] __netif_receive_skb_core+0x2d7/0x9a0
  [<ffffffff81777fa6>] __netif_receive_skb+0x16/0x70
  [<ffffffff81778023>] netif_receive_skb_internal+0x23/0x80
  [<ffffffff817787d8>] napi_gro_frags+0x208/0x270
  [<ffffffff8168a9ec>] mlx4_en_process_rx_cq+0x74c/0xf40
  [<ffffffff8168b270>] mlx4_en_poll_rx_cq+0x30/0x90
  [<ffffffff81778b30>] net_rx_action+0x210/0x350
  [<ffffffff8188c426>] __do_softirq+0x106/0x2c7
  [<ffffffff81082bad>] irq_exit+0x9d/0xa0 [<ffffffff8188c0e4>] do_IRQ+0x54/0xd0
  [<ffffffff8188a63f>] common_interrupt+0x7f/0x7f <EOI>
  [<ffffffff8173d7e7>] cpuidle_enter+0x17/0x20
  [<ffffffff810bdfd9>] cpu_startup_entry+0x2a9/0x2f0
  [<ffffffff8103edd1>] start_secondary+0xf1/0x100

This patch renames the existing cgroup_get() with the dead cgroup
warning to cgroup_get_live() after cgroup_kn_lock_live() and
introduces the new cgroup_get() which doesn't check whether the cgroup
is live or dead.

All existing cgroup_get() users except for cgroup_sk_alloc() are
converted to use cgroup_get_live().

Fixes: d979a39d72 ("cgroup: duplicate cgroup reference when cloning sockets")
Cc: stable@vger.kernel.org # v4.5+
Cc: Johannes Weiner <hannes@cmpxchg.org>
Reported-by: Chris Mason <clm@fb.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
Tejun Heo 2017-04-28 15:14:55 -04:00
parent b8b1a2e5ec
commit a590b90d47

View File

@ -437,6 +437,11 @@ out_unlock:
} }
static void cgroup_get(struct cgroup *cgrp) static void cgroup_get(struct cgroup *cgrp)
{
css_get(&cgrp->self);
}
static void cgroup_get_live(struct cgroup *cgrp)
{ {
WARN_ON_ONCE(cgroup_is_dead(cgrp)); WARN_ON_ONCE(cgroup_is_dead(cgrp));
css_get(&cgrp->self); css_get(&cgrp->self);
@ -932,7 +937,7 @@ static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
list_add_tail(&link->cgrp_link, &cset->cgrp_links); list_add_tail(&link->cgrp_link, &cset->cgrp_links);
if (cgroup_parent(cgrp)) if (cgroup_parent(cgrp))
cgroup_get(cgrp); cgroup_get_live(cgrp);
} }
/** /**
@ -1802,7 +1807,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
cgrp_dfl_visible = true; cgrp_dfl_visible = true;
cgroup_get(&cgrp_dfl_root.cgrp); cgroup_get_live(&cgrp_dfl_root.cgrp);
dentry = cgroup_do_mount(&cgroup2_fs_type, flags, &cgrp_dfl_root, dentry = cgroup_do_mount(&cgroup2_fs_type, flags, &cgrp_dfl_root,
CGROUP2_SUPER_MAGIC, ns); CGROUP2_SUPER_MAGIC, ns);
@ -2575,7 +2580,7 @@ restart:
if (!css || !percpu_ref_is_dying(&css->refcnt)) if (!css || !percpu_ref_is_dying(&css->refcnt))
continue; continue;
cgroup_get(dsct); cgroup_get_live(dsct);
prepare_to_wait(&dsct->offline_waitq, &wait, prepare_to_wait(&dsct->offline_waitq, &wait,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
@ -3946,7 +3951,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
{ {
lockdep_assert_held(&cgroup_mutex); lockdep_assert_held(&cgroup_mutex);
cgroup_get(cgrp); cgroup_get_live(cgrp);
memset(css, 0, sizeof(*css)); memset(css, 0, sizeof(*css));
css->cgroup = cgrp; css->cgroup = cgrp;
@ -4122,7 +4127,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
/* allocation complete, commit to creation */ /* allocation complete, commit to creation */
list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children); list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children);
atomic_inc(&root->nr_cgrps); atomic_inc(&root->nr_cgrps);
cgroup_get(parent); cgroup_get_live(parent);
/* /*
* @cgrp is now fully operational. If something fails after this * @cgrp is now fully operational. If something fails after this
@ -4946,7 +4951,7 @@ struct cgroup *cgroup_get_from_path(const char *path)
if (kn) { if (kn) {
if (kernfs_type(kn) == KERNFS_DIR) { if (kernfs_type(kn) == KERNFS_DIR) {
cgrp = kn->priv; cgrp = kn->priv;
cgroup_get(cgrp); cgroup_get_live(cgrp);
} else { } else {
cgrp = ERR_PTR(-ENOTDIR); cgrp = ERR_PTR(-ENOTDIR);
} }
@ -5026,6 +5031,11 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
/* Socket clone path */ /* Socket clone path */
if (skcd->val) { if (skcd->val) {
/*
* We might be cloning a socket which is left in an empty
* cgroup and the cgroup might have already been rmdir'd.
* Don't use cgroup_get_live().
*/
cgroup_get(sock_cgroup_ptr(skcd)); cgroup_get(sock_cgroup_ptr(skcd));
return; return;
} }