From 82a470f1119eb7d2e4941b915bf9cd6fd8d54494 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sun, 5 Oct 2014 21:27:53 -0700 Subject: [PATCH 1/3] net: sched: remove tcf_proto from ematch calls This removes the tcf_proto argument from the ematch code paths that only need it to reference the net namespace. This allows simplifying qdisc code paths especially when we need to tear down the ematch from an RCU callback. In this case we can not guarentee that the tcf_proto structure is still valid. Signed-off-by: John Fastabend Acked-by: Cong Wang Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 10 +++++----- net/sched/cls_basic.c | 2 +- net/sched/cls_cgroup.c | 4 ++-- net/sched/cls_flow.c | 4 ++-- net/sched/em_canid.c | 4 ++-- net/sched/em_ipset.c | 7 +++---- net/sched/em_meta.c | 4 ++-- net/sched/em_nbyte.c | 2 +- net/sched/em_text.c | 4 ++-- net/sched/ematch.c | 10 ++++++---- 10 files changed, 26 insertions(+), 25 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index ef44ad9a6426..bc49967e1a68 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -166,6 +166,7 @@ struct tcf_ematch { unsigned int datalen; u16 matchid; u16 flags; + struct net *net; }; static inline int tcf_em_is_container(struct tcf_ematch *em) @@ -229,12 +230,11 @@ struct tcf_ematch_tree { struct tcf_ematch_ops { int kind; int datalen; - int (*change)(struct tcf_proto *, void *, + int (*change)(struct net *net, void *, int, struct tcf_ematch *); int (*match)(struct sk_buff *, struct tcf_ematch *, struct tcf_pkt_info *); - void (*destroy)(struct tcf_proto *, - struct tcf_ematch *); + void (*destroy)(struct tcf_ematch *); int (*dump)(struct sk_buff *, struct tcf_ematch *); struct module *owner; struct list_head link; @@ -244,7 +244,7 @@ int tcf_em_register(struct tcf_ematch_ops *); void tcf_em_unregister(struct tcf_ematch_ops *); int tcf_em_tree_validate(struct tcf_proto *, struct nlattr *, struct tcf_ematch_tree *); -void tcf_em_tree_destroy(struct tcf_proto *, struct tcf_ematch_tree *); +void tcf_em_tree_destroy(struct tcf_ematch_tree *); int tcf_em_tree_dump(struct sk_buff *, struct tcf_ematch_tree *, int); int __tcf_em_tree_match(struct sk_buff *, struct tcf_ematch_tree *, struct tcf_pkt_info *); @@ -301,7 +301,7 @@ struct tcf_ematch_tree { }; #define tcf_em_tree_validate(tp, tb, t) ((void)(t), 0) -#define tcf_em_tree_destroy(tp, t) do { (void)(t); } while(0) +#define tcf_em_tree_destroy(t) do { (void)(t); } while(0) #define tcf_em_tree_dump(skb, t, tlv) (0) #define tcf_em_tree_change(tp, dst, src) do { } while(0) #define tcf_em_tree_match(skb, t, info) ((void)(info), 1) diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index fe20826a79e7..90647a8af8ca 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -95,7 +95,7 @@ static void basic_delete_filter(struct rcu_head *head) tcf_unbind_filter(tp, &f->res); tcf_exts_destroy(&f->exts); - tcf_em_tree_destroy(tp, &f->ematches); + tcf_em_tree_destroy(&f->ematches); kfree(f); } diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 3409f168225f..2f77a89655dc 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -87,7 +87,7 @@ static void cls_cgroup_destroy_rcu(struct rcu_head *root) rcu); tcf_exts_destroy(&head->exts); - tcf_em_tree_destroy(head->tp, &head->ematches); + tcf_em_tree_destroy(&head->ematches); kfree(head); } @@ -157,7 +157,7 @@ static void cls_cgroup_destroy(struct tcf_proto *tp) if (head) { tcf_exts_destroy(&head->exts); - tcf_em_tree_destroy(tp, &head->ematches); + tcf_em_tree_destroy(&head->ematches); RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); } diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index f18d27f7b5f2..a5d2b20db560 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -355,7 +355,7 @@ static void flow_destroy_filter(struct rcu_head *head) del_timer_sync(&f->perturb_timer); tcf_exts_destroy(&f->exts); - tcf_em_tree_destroy(f->tp, &f->ematches); + tcf_em_tree_destroy(&f->ematches); kfree(f); } @@ -530,7 +530,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, return 0; err2: - tcf_em_tree_destroy(tp, &t); + tcf_em_tree_destroy(&t); kfree(fnew); err1: tcf_exts_destroy(&e); diff --git a/net/sched/em_canid.c b/net/sched/em_canid.c index 7c292d474f47..ddd883ca55b2 100644 --- a/net/sched/em_canid.c +++ b/net/sched/em_canid.c @@ -120,7 +120,7 @@ static int em_canid_match(struct sk_buff *skb, struct tcf_ematch *m, return match; } -static int em_canid_change(struct tcf_proto *tp, void *data, int len, +static int em_canid_change(struct net *net, void *data, int len, struct tcf_ematch *m) { struct can_filter *conf = data; /* Array with rules */ @@ -183,7 +183,7 @@ static int em_canid_change(struct tcf_proto *tp, void *data, int len, return 0; } -static void em_canid_destroy(struct tcf_proto *tp, struct tcf_ematch *m) +static void em_canid_destroy(struct tcf_ematch *m) { struct canid_match *cm = em_canid_priv(m); diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c index 527aeb7a3ff0..5b4a4efe468c 100644 --- a/net/sched/em_ipset.c +++ b/net/sched/em_ipset.c @@ -19,12 +19,11 @@ #include #include -static int em_ipset_change(struct tcf_proto *tp, void *data, int data_len, +static int em_ipset_change(struct net *net, void *data, int data_len, struct tcf_ematch *em) { struct xt_set_info *set = data; ip_set_id_t index; - struct net *net = dev_net(qdisc_dev(tp->q)); if (data_len != sizeof(*set)) return -EINVAL; @@ -42,11 +41,11 @@ static int em_ipset_change(struct tcf_proto *tp, void *data, int data_len, return -ENOMEM; } -static void em_ipset_destroy(struct tcf_proto *p, struct tcf_ematch *em) +static void em_ipset_destroy(struct tcf_ematch *em) { const struct xt_set_info *set = (const void *) em->data; if (set) { - ip_set_nfnl_put(dev_net(qdisc_dev(p->q)), set->index); + ip_set_nfnl_put(em->net, set->index); kfree((void *) em->data); } } diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 9b8c0b0e60d7..c8f8c399b99a 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -856,7 +856,7 @@ static const struct nla_policy meta_policy[TCA_EM_META_MAX + 1] = { [TCA_EM_META_HDR] = { .len = sizeof(struct tcf_meta_hdr) }, }; -static int em_meta_change(struct tcf_proto *tp, void *data, int len, +static int em_meta_change(struct net *net, void *data, int len, struct tcf_ematch *m) { int err; @@ -908,7 +908,7 @@ errout: return err; } -static void em_meta_destroy(struct tcf_proto *tp, struct tcf_ematch *m) +static void em_meta_destroy(struct tcf_ematch *m) { if (m) meta_delete((struct meta_match *) m->data); diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c index a3bed07a008b..df3110d69585 100644 --- a/net/sched/em_nbyte.c +++ b/net/sched/em_nbyte.c @@ -23,7 +23,7 @@ struct nbyte_data { char pattern[0]; }; -static int em_nbyte_change(struct tcf_proto *tp, void *data, int data_len, +static int em_nbyte_change(struct net *net, void *data, int data_len, struct tcf_ematch *em) { struct tcf_em_nbyte *nbyte = data; diff --git a/net/sched/em_text.c b/net/sched/em_text.c index 15d353d2e4be..f03c3de16c27 100644 --- a/net/sched/em_text.c +++ b/net/sched/em_text.c @@ -45,7 +45,7 @@ static int em_text_match(struct sk_buff *skb, struct tcf_ematch *m, return skb_find_text(skb, from, to, tm->config, &state) != UINT_MAX; } -static int em_text_change(struct tcf_proto *tp, void *data, int len, +static int em_text_change(struct net *net, void *data, int len, struct tcf_ematch *m) { struct text_match *tm; @@ -100,7 +100,7 @@ retry: return 0; } -static void em_text_destroy(struct tcf_proto *tp, struct tcf_ematch *m) +static void em_text_destroy(struct tcf_ematch *m) { if (EM_TEXT_PRIV(m) && EM_TEXT_PRIV(m)->config) textsearch_destroy(EM_TEXT_PRIV(m)->config); diff --git a/net/sched/ematch.c b/net/sched/ematch.c index ad57f4444b9c..8250c36543d8 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -178,6 +178,7 @@ static int tcf_em_validate(struct tcf_proto *tp, struct tcf_ematch_hdr *em_hdr = nla_data(nla); int data_len = nla_len(nla) - sizeof(*em_hdr); void *data = (void *) em_hdr + sizeof(*em_hdr); + struct net *net = dev_net(qdisc_dev(tp->q)); if (!TCF_EM_REL_VALID(em_hdr->flags)) goto errout; @@ -240,7 +241,7 @@ static int tcf_em_validate(struct tcf_proto *tp, goto errout; if (em->ops->change) { - err = em->ops->change(tp, data, data_len, em); + err = em->ops->change(net, data, data_len, em); if (err < 0) goto errout; } else if (data_len > 0) { @@ -271,6 +272,7 @@ static int tcf_em_validate(struct tcf_proto *tp, em->matchid = em_hdr->matchid; em->flags = em_hdr->flags; em->datalen = data_len; + em->net = net; err = 0; errout: @@ -378,7 +380,7 @@ errout: return err; errout_abort: - tcf_em_tree_destroy(tp, tree); + tcf_em_tree_destroy(tree); return err; } EXPORT_SYMBOL(tcf_em_tree_validate); @@ -393,7 +395,7 @@ EXPORT_SYMBOL(tcf_em_tree_validate); * tcf_em_tree_validate()/tcf_em_tree_change(). You must ensure that * the ematch tree is not in use before calling this function. */ -void tcf_em_tree_destroy(struct tcf_proto *tp, struct tcf_ematch_tree *tree) +void tcf_em_tree_destroy(struct tcf_ematch_tree *tree) { int i; @@ -405,7 +407,7 @@ void tcf_em_tree_destroy(struct tcf_proto *tp, struct tcf_ematch_tree *tree) if (em->ops) { if (em->ops->destroy) - em->ops->destroy(tp, em); + em->ops->destroy(em); else if (!tcf_em_is_simple(em)) kfree((void *) em->data); module_put(em->ops->owner); From 13990f8156862fe945a1a226850a6550c8988a33 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sun, 5 Oct 2014 21:28:20 -0700 Subject: [PATCH 2/3] net: sched: cls_cgroup tear down exts and ematch from rcu callback It is not RCU safe to destroy the action chain while there is a possibility of readers accessing it. Move this code into the rcu callback using the same rcu callback used in the code patch to make a change to head. Signed-off-by: John Fastabend Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_cgroup.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 2f77a89655dc..d61a801222c1 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -156,10 +156,8 @@ static void cls_cgroup_destroy(struct tcf_proto *tp) struct cls_cgroup_head *head = rtnl_dereference(tp->root); if (head) { - tcf_exts_destroy(&head->exts); - tcf_em_tree_destroy(&head->ematches); RCU_INIT_POINTER(tp->root, NULL); - kfree_rcu(head, rcu); + call_rcu(&head->rcu, cls_cgroup_destroy_rcu); } } From 18cdb37ebf4c986d9502405cbd16b0ac29770c25 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sun, 5 Oct 2014 21:28:52 -0700 Subject: [PATCH 3/3] net: sched: do not use tcf_proto 'tp' argument from call_rcu Using the tcf_proto pointer 'tp' from inside the classifiers callback is not valid because it may have been cleaned up by another call_rcu occuring on another CPU. 'tp' is currently being used by tcf_unbind_filter() in this patch we move instances of tcf_unbind_filter outside of the call_rcu() context. This is safe to do because any running schedulers will either read the valid class field or it will be zeroed. And all schedulers today when the class is 0 do a lookup using the same call used by the tcf_exts_bind(). So even if we have a running classifier hit the null class pointer it will do a lookup and get to the same result. This is particularly fragile at the moment because the only way to verify this is to audit the schedulers call sites. Reported-by: Cong Wang Signed-off-by: John Fastabend Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_basic.c | 5 +++-- net/sched/cls_bpf.c | 4 +++- net/sched/cls_fw.c | 5 +++-- net/sched/cls_route.c | 8 +++++--- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 90647a8af8ca..cd61280941e5 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -91,9 +91,7 @@ static int basic_init(struct tcf_proto *tp) static void basic_delete_filter(struct rcu_head *head) { struct basic_filter *f = container_of(head, struct basic_filter, rcu); - struct tcf_proto *tp = f->tp; - tcf_unbind_filter(tp, &f->res); tcf_exts_destroy(&f->exts); tcf_em_tree_destroy(&f->ematches); kfree(f); @@ -106,6 +104,7 @@ static void basic_destroy(struct tcf_proto *tp) list_for_each_entry_safe(f, n, &head->flist, link) { list_del_rcu(&f->link); + tcf_unbind_filter(tp, &f->res); call_rcu(&f->rcu, basic_delete_filter); } RCU_INIT_POINTER(tp->root, NULL); @@ -120,6 +119,7 @@ static int basic_delete(struct tcf_proto *tp, unsigned long arg) list_for_each_entry(t, &head->flist, link) if (t == f) { list_del_rcu(&t->link); + tcf_unbind_filter(tp, &t->res); call_rcu(&t->rcu, basic_delete_filter); return 0; } @@ -222,6 +222,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, if (fold) { list_replace_rcu(&fold->link, &fnew->link); + tcf_unbind_filter(tp, &fold->res); call_rcu(&fold->rcu, basic_delete_filter); } else { list_add_rcu(&fnew->link, &head->flist); diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 4318d067b0a0..eed49d1d0878 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -92,7 +92,6 @@ static int cls_bpf_init(struct tcf_proto *tp) static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog) { - tcf_unbind_filter(tp, &prog->res); tcf_exts_destroy(&prog->exts); bpf_prog_destroy(prog->filter); @@ -116,6 +115,7 @@ static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg) list_for_each_entry(prog, &head->plist, link) { if (prog == todel) { list_del_rcu(&prog->link); + tcf_unbind_filter(tp, &prog->res); call_rcu(&prog->rcu, __cls_bpf_delete_prog); return 0; } @@ -131,6 +131,7 @@ static void cls_bpf_destroy(struct tcf_proto *tp) list_for_each_entry_safe(prog, tmp, &head->plist, link) { list_del_rcu(&prog->link); + tcf_unbind_filter(tp, &prog->res); call_rcu(&prog->rcu, __cls_bpf_delete_prog); } @@ -282,6 +283,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (oldprog) { list_replace_rcu(&prog->link, &oldprog->link); + tcf_unbind_filter(tp, &oldprog->res); call_rcu(&oldprog->rcu, __cls_bpf_delete_prog); } else { list_add_rcu(&prog->link, &head->plist); diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index da805aeeb65c..dbfdfd1f1a9f 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -123,9 +123,7 @@ static int fw_init(struct tcf_proto *tp) static void fw_delete_filter(struct rcu_head *head) { struct fw_filter *f = container_of(head, struct fw_filter, rcu); - struct tcf_proto *tp = f->tp; - tcf_unbind_filter(tp, &f->res); tcf_exts_destroy(&f->exts); kfree(f); } @@ -143,6 +141,7 @@ static void fw_destroy(struct tcf_proto *tp) while ((f = rtnl_dereference(head->ht[h])) != NULL) { RCU_INIT_POINTER(head->ht[h], rtnl_dereference(f->next)); + tcf_unbind_filter(tp, &f->res); call_rcu(&f->rcu, fw_delete_filter); } } @@ -166,6 +165,7 @@ static int fw_delete(struct tcf_proto *tp, unsigned long arg) fp = &pfp->next, pfp = rtnl_dereference(*fp)) { if (pfp == f) { RCU_INIT_POINTER(*fp, rtnl_dereference(f->next)); + tcf_unbind_filter(tp, &f->res); call_rcu(&f->rcu, fw_delete_filter); return 0; } @@ -280,6 +280,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, RCU_INIT_POINTER(fnew->next, rtnl_dereference(pfp->next)); rcu_assign_pointer(*fp, fnew); + tcf_unbind_filter(tp, &f->res); call_rcu(&f->rcu, fw_delete_filter); *arg = (unsigned long)fnew; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index b665aee661f7..6f22baae0afa 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -269,9 +269,7 @@ static void route4_delete_filter(struct rcu_head *head) { struct route4_filter *f = container_of(head, struct route4_filter, rcu); - struct tcf_proto *tp = f->tp; - tcf_unbind_filter(tp, &f->res); tcf_exts_destroy(&f->exts); kfree(f); } @@ -297,6 +295,7 @@ static void route4_destroy(struct tcf_proto *tp) next = rtnl_dereference(f->next); RCU_INIT_POINTER(b->ht[h2], next); + tcf_unbind_filter(tp, &f->res); call_rcu(&f->rcu, route4_delete_filter); } } @@ -338,6 +337,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg) route4_reset_fastmap(head); /* Delete it */ + tcf_unbind_filter(tp, &f->res); call_rcu(&f->rcu, route4_delete_filter); /* Strip RTNL protected tree */ @@ -545,8 +545,10 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, route4_reset_fastmap(head); *arg = (unsigned long)f; - if (fold) + if (fold) { + tcf_unbind_filter(tp, &fold->res); call_rcu(&fold->rcu, route4_delete_filter); + } return 0; errout: