forked from Minki/linux
e89d469e3b
The below commit added a call to ->destroy() on init failure, but multiq still frees ->queues on error in init, but ->queues is also freed by ->destroy() thus we get double free and corrupted memory. Very easy to reproduce (eth0 not multiqueue): $ tc qdisc add dev eth0 root multiq RTNETLINK answers: Operation not supported $ ip l add dumdum type dummy (crash) Trace log: [ 3929.467747] general protection fault: 0000 [#1] SMP [ 3929.468083] Modules linked in: [ 3929.468302] CPU: 3 PID: 967 Comm: ip Not tainted 4.13.0-rc6+ #56 [ 3929.468625] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014 [ 3929.469124] task: ffff88003716a700 task.stack: ffff88005872c000 [ 3929.469449] RIP: 0010:__kmalloc_track_caller+0x117/0x1be [ 3929.469746] RSP: 0018:ffff88005872f6a0 EFLAGS: 00010246 [ 3929.470042] RAX: 00000000000002de RBX: 0000000058a59000 RCX: 00000000000002df [ 3929.470406] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff821f7020 [ 3929.470770] RBP: ffff88005872f6e8 R08: 000000000001f010 R09: 0000000000000000 [ 3929.471133] R10: ffff88005872f730 R11: 0000000000008cdd R12: ff006d75646d7564 [ 3929.471496] R13: 00000000014000c0 R14: ffff88005b403c00 R15: ffff88005b403c00 [ 3929.471869] FS: 00007f0b70480740(0000) GS:ffff88005d980000(0000) knlGS:0000000000000000 [ 3929.472286] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3929.472677] CR2: 00007ffcee4f3000 CR3: 0000000059d45000 CR4: 00000000000406e0 [ 3929.473209] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 3929.474109] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 3929.474873] Call Trace: [ 3929.475337] ? kstrdup_const+0x23/0x25 [ 3929.475863] kstrdup+0x2e/0x4b [ 3929.476338] kstrdup_const+0x23/0x25 [ 3929.478084] __kernfs_new_node+0x28/0xbc [ 3929.478478] kernfs_new_node+0x35/0x55 [ 3929.478929] kernfs_create_link+0x23/0x76 [ 3929.479478] sysfs_do_create_link_sd.isra.2+0x85/0xd7 [ 3929.480096] sysfs_create_link+0x33/0x35 [ 3929.480649] device_add+0x200/0x589 [ 3929.481184] netdev_register_kobject+0x7c/0x12f [ 3929.481711] register_netdevice+0x373/0x471 [ 3929.482174] rtnl_newlink+0x614/0x729 [ 3929.482610] ? rtnl_newlink+0x17f/0x729 [ 3929.483080] rtnetlink_rcv_msg+0x188/0x197 [ 3929.483533] ? rcu_read_unlock+0x3e/0x5f [ 3929.483984] ? rtnl_newlink+0x729/0x729 [ 3929.484420] netlink_rcv_skb+0x6c/0xce [ 3929.484858] rtnetlink_rcv+0x23/0x2a [ 3929.485291] netlink_unicast+0x103/0x181 [ 3929.485735] netlink_sendmsg+0x326/0x337 [ 3929.486181] sock_sendmsg_nosec+0x14/0x3f [ 3929.486614] sock_sendmsg+0x29/0x2e [ 3929.486973] ___sys_sendmsg+0x209/0x28b [ 3929.487340] ? do_raw_spin_unlock+0xcd/0xf8 [ 3929.487719] ? _raw_spin_unlock+0x27/0x31 [ 3929.488092] ? __handle_mm_fault+0x651/0xdb1 [ 3929.488471] ? check_chain_key+0xb0/0xfd [ 3929.488847] __sys_sendmsg+0x45/0x63 [ 3929.489206] ? __sys_sendmsg+0x45/0x63 [ 3929.489576] SyS_sendmsg+0x19/0x1b [ 3929.489901] entry_SYSCALL_64_fastpath+0x23/0xc2 [ 3929.490172] RIP: 0033:0x7f0b6fb93690 [ 3929.490423] RSP: 002b:00007ffcee4ed588 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 3929.490881] RAX: ffffffffffffffda RBX: ffffffff810d278c RCX: 00007f0b6fb93690 [ 3929.491198] RDX: 0000000000000000 RSI: 00007ffcee4ed5d0 RDI: 0000000000000003 [ 3929.491521] RBP: ffff88005872ff98 R08: 0000000000000001 R09: 0000000000000000 [ 3929.491801] R10: 00007ffcee4ed350 R11: 0000000000000246 R12: 0000000000000002 [ 3929.492075] R13: 000000000066f1a0 R14: 00007ffcee4f5680 R15: 0000000000000000 [ 3929.492352] ? trace_hardirqs_off_caller+0xa7/0xcf [ 3929.492590] Code: 8b 45 c0 48 8b 45 b8 74 17 48 8b 4d c8 83 ca ff 44 89 ee 4c 89 f7 e8 83 ca ff ff 49 89 c4 eb 49 49 63 56 20 48 8d 48 01 4d 8b 06 <49> 8b 1c 14 48 89 c2 4c 89 e0 65 49 0f c7 08 0f 94 c0 83 f0 01 [ 3929.493335] RIP: __kmalloc_track_caller+0x117/0x1be RSP: ffff88005872f6a0 Fixes:87b60cfacf
("net_sched: fix error recovery at qdisc creation") Fixes:f07d150129
("multiq: Further multiqueue cleanup") Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>
423 lines
9.1 KiB
C
423 lines
9.1 KiB
C
/*
|
|
* Copyright (c) 2008, Intel Corporation.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along with
|
|
* this program; if not, see <http://www.gnu.org/licenses/>.
|
|
*
|
|
* Author: Alexander Duyck <alexander.h.duyck@intel.com>
|
|
*/
|
|
|
|
#include <linux/module.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/types.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/string.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/skbuff.h>
|
|
#include <net/netlink.h>
|
|
#include <net/pkt_sched.h>
|
|
#include <net/pkt_cls.h>
|
|
|
|
struct multiq_sched_data {
|
|
u16 bands;
|
|
u16 max_bands;
|
|
u16 curband;
|
|
struct tcf_proto __rcu *filter_list;
|
|
struct tcf_block *block;
|
|
struct Qdisc **queues;
|
|
};
|
|
|
|
|
|
static struct Qdisc *
|
|
multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
|
|
{
|
|
struct multiq_sched_data *q = qdisc_priv(sch);
|
|
u32 band;
|
|
struct tcf_result res;
|
|
struct tcf_proto *fl = rcu_dereference_bh(q->filter_list);
|
|
int err;
|
|
|
|
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
|
|
err = tcf_classify(skb, fl, &res, false);
|
|
#ifdef CONFIG_NET_CLS_ACT
|
|
switch (err) {
|
|
case TC_ACT_STOLEN:
|
|
case TC_ACT_QUEUED:
|
|
case TC_ACT_TRAP:
|
|
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
|
|
case TC_ACT_SHOT:
|
|
return NULL;
|
|
}
|
|
#endif
|
|
band = skb_get_queue_mapping(skb);
|
|
|
|
if (band >= q->bands)
|
|
return q->queues[0];
|
|
|
|
return q->queues[band];
|
|
}
|
|
|
|
static int
|
|
multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
|
|
struct sk_buff **to_free)
|
|
{
|
|
struct Qdisc *qdisc;
|
|
int ret;
|
|
|
|
qdisc = multiq_classify(skb, sch, &ret);
|
|
#ifdef CONFIG_NET_CLS_ACT
|
|
if (qdisc == NULL) {
|
|
|
|
if (ret & __NET_XMIT_BYPASS)
|
|
qdisc_qstats_drop(sch);
|
|
__qdisc_drop(skb, to_free);
|
|
return ret;
|
|
}
|
|
#endif
|
|
|
|
ret = qdisc_enqueue(skb, qdisc, to_free);
|
|
if (ret == NET_XMIT_SUCCESS) {
|
|
sch->q.qlen++;
|
|
return NET_XMIT_SUCCESS;
|
|
}
|
|
if (net_xmit_drop_count(ret))
|
|
qdisc_qstats_drop(sch);
|
|
return ret;
|
|
}
|
|
|
|
static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
|
|
{
|
|
struct multiq_sched_data *q = qdisc_priv(sch);
|
|
struct Qdisc *qdisc;
|
|
struct sk_buff *skb;
|
|
int band;
|
|
|
|
for (band = 0; band < q->bands; band++) {
|
|
/* cycle through bands to ensure fairness */
|
|
q->curband++;
|
|
if (q->curband >= q->bands)
|
|
q->curband = 0;
|
|
|
|
/* Check that target subqueue is available before
|
|
* pulling an skb to avoid head-of-line blocking.
|
|
*/
|
|
if (!netif_xmit_stopped(
|
|
netdev_get_tx_queue(qdisc_dev(sch), q->curband))) {
|
|
qdisc = q->queues[q->curband];
|
|
skb = qdisc->dequeue(qdisc);
|
|
if (skb) {
|
|
qdisc_bstats_update(sch, skb);
|
|
sch->q.qlen--;
|
|
return skb;
|
|
}
|
|
}
|
|
}
|
|
return NULL;
|
|
|
|
}
|
|
|
|
static struct sk_buff *multiq_peek(struct Qdisc *sch)
|
|
{
|
|
struct multiq_sched_data *q = qdisc_priv(sch);
|
|
unsigned int curband = q->curband;
|
|
struct Qdisc *qdisc;
|
|
struct sk_buff *skb;
|
|
int band;
|
|
|
|
for (band = 0; band < q->bands; band++) {
|
|
/* cycle through bands to ensure fairness */
|
|
curband++;
|
|
if (curband >= q->bands)
|
|
curband = 0;
|
|
|
|
/* Check that target subqueue is available before
|
|
* pulling an skb to avoid head-of-line blocking.
|
|
*/
|
|
if (!netif_xmit_stopped(
|
|
netdev_get_tx_queue(qdisc_dev(sch), curband))) {
|
|
qdisc = q->queues[curband];
|
|
skb = qdisc->ops->peek(qdisc);
|
|
if (skb)
|
|
return skb;
|
|
}
|
|
}
|
|
return NULL;
|
|
|
|
}
|
|
|
|
static void
|
|
multiq_reset(struct Qdisc *sch)
|
|
{
|
|
u16 band;
|
|
struct multiq_sched_data *q = qdisc_priv(sch);
|
|
|
|
for (band = 0; band < q->bands; band++)
|
|
qdisc_reset(q->queues[band]);
|
|
sch->q.qlen = 0;
|
|
q->curband = 0;
|
|
}
|
|
|
|
static void
|
|
multiq_destroy(struct Qdisc *sch)
|
|
{
|
|
int band;
|
|
struct multiq_sched_data *q = qdisc_priv(sch);
|
|
|
|
tcf_block_put(q->block);
|
|
for (band = 0; band < q->bands; band++)
|
|
qdisc_destroy(q->queues[band]);
|
|
|
|
kfree(q->queues);
|
|
}
|
|
|
|
static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
|
|
{
|
|
struct multiq_sched_data *q = qdisc_priv(sch);
|
|
struct tc_multiq_qopt *qopt;
|
|
int i;
|
|
|
|
if (!netif_is_multiqueue(qdisc_dev(sch)))
|
|
return -EOPNOTSUPP;
|
|
if (nla_len(opt) < sizeof(*qopt))
|
|
return -EINVAL;
|
|
|
|
qopt = nla_data(opt);
|
|
|
|
qopt->bands = qdisc_dev(sch)->real_num_tx_queues;
|
|
|
|
sch_tree_lock(sch);
|
|
q->bands = qopt->bands;
|
|
for (i = q->bands; i < q->max_bands; i++) {
|
|
if (q->queues[i] != &noop_qdisc) {
|
|
struct Qdisc *child = q->queues[i];
|
|
q->queues[i] = &noop_qdisc;
|
|
qdisc_tree_reduce_backlog(child, child->q.qlen,
|
|
child->qstats.backlog);
|
|
qdisc_destroy(child);
|
|
}
|
|
}
|
|
|
|
sch_tree_unlock(sch);
|
|
|
|
for (i = 0; i < q->bands; i++) {
|
|
if (q->queues[i] == &noop_qdisc) {
|
|
struct Qdisc *child, *old;
|
|
child = qdisc_create_dflt(sch->dev_queue,
|
|
&pfifo_qdisc_ops,
|
|
TC_H_MAKE(sch->handle,
|
|
i + 1));
|
|
if (child) {
|
|
sch_tree_lock(sch);
|
|
old = q->queues[i];
|
|
q->queues[i] = child;
|
|
if (child != &noop_qdisc)
|
|
qdisc_hash_add(child, true);
|
|
|
|
if (old != &noop_qdisc) {
|
|
qdisc_tree_reduce_backlog(old,
|
|
old->q.qlen,
|
|
old->qstats.backlog);
|
|
qdisc_destroy(old);
|
|
}
|
|
sch_tree_unlock(sch);
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
|
|
{
|
|
struct multiq_sched_data *q = qdisc_priv(sch);
|
|
int i, err;
|
|
|
|
q->queues = NULL;
|
|
|
|
if (opt == NULL)
|
|
return -EINVAL;
|
|
|
|
err = tcf_block_get(&q->block, &q->filter_list);
|
|
if (err)
|
|
return err;
|
|
|
|
q->max_bands = qdisc_dev(sch)->num_tx_queues;
|
|
|
|
q->queues = kcalloc(q->max_bands, sizeof(struct Qdisc *), GFP_KERNEL);
|
|
if (!q->queues)
|
|
return -ENOBUFS;
|
|
for (i = 0; i < q->max_bands; i++)
|
|
q->queues[i] = &noop_qdisc;
|
|
|
|
return multiq_tune(sch, opt);
|
|
}
|
|
|
|
static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb)
|
|
{
|
|
struct multiq_sched_data *q = qdisc_priv(sch);
|
|
unsigned char *b = skb_tail_pointer(skb);
|
|
struct tc_multiq_qopt opt;
|
|
|
|
opt.bands = q->bands;
|
|
opt.max_bands = q->max_bands;
|
|
|
|
if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
|
|
goto nla_put_failure;
|
|
|
|
return skb->len;
|
|
|
|
nla_put_failure:
|
|
nlmsg_trim(skb, b);
|
|
return -1;
|
|
}
|
|
|
|
static int multiq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
|
|
struct Qdisc **old)
|
|
{
|
|
struct multiq_sched_data *q = qdisc_priv(sch);
|
|
unsigned long band = arg - 1;
|
|
|
|
if (new == NULL)
|
|
new = &noop_qdisc;
|
|
|
|
*old = qdisc_replace(sch, new, &q->queues[band]);
|
|
return 0;
|
|
}
|
|
|
|
static struct Qdisc *
|
|
multiq_leaf(struct Qdisc *sch, unsigned long arg)
|
|
{
|
|
struct multiq_sched_data *q = qdisc_priv(sch);
|
|
unsigned long band = arg - 1;
|
|
|
|
return q->queues[band];
|
|
}
|
|
|
|
static unsigned long multiq_get(struct Qdisc *sch, u32 classid)
|
|
{
|
|
struct multiq_sched_data *q = qdisc_priv(sch);
|
|
unsigned long band = TC_H_MIN(classid);
|
|
|
|
if (band - 1 >= q->bands)
|
|
return 0;
|
|
return band;
|
|
}
|
|
|
|
static unsigned long multiq_bind(struct Qdisc *sch, unsigned long parent,
|
|
u32 classid)
|
|
{
|
|
return multiq_get(sch, classid);
|
|
}
|
|
|
|
|
|
static void multiq_put(struct Qdisc *q, unsigned long cl)
|
|
{
|
|
}
|
|
|
|
static int multiq_dump_class(struct Qdisc *sch, unsigned long cl,
|
|
struct sk_buff *skb, struct tcmsg *tcm)
|
|
{
|
|
struct multiq_sched_data *q = qdisc_priv(sch);
|
|
|
|
tcm->tcm_handle |= TC_H_MIN(cl);
|
|
tcm->tcm_info = q->queues[cl - 1]->handle;
|
|
return 0;
|
|
}
|
|
|
|
static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
|
|
struct gnet_dump *d)
|
|
{
|
|
struct multiq_sched_data *q = qdisc_priv(sch);
|
|
struct Qdisc *cl_q;
|
|
|
|
cl_q = q->queues[cl - 1];
|
|
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
|
|
d, NULL, &cl_q->bstats) < 0 ||
|
|
gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0)
|
|
return -1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
|
|
{
|
|
struct multiq_sched_data *q = qdisc_priv(sch);
|
|
int band;
|
|
|
|
if (arg->stop)
|
|
return;
|
|
|
|
for (band = 0; band < q->bands; band++) {
|
|
if (arg->count < arg->skip) {
|
|
arg->count++;
|
|
continue;
|
|
}
|
|
if (arg->fn(sch, band + 1, arg) < 0) {
|
|
arg->stop = 1;
|
|
break;
|
|
}
|
|
arg->count++;
|
|
}
|
|
}
|
|
|
|
static struct tcf_block *multiq_tcf_block(struct Qdisc *sch, unsigned long cl)
|
|
{
|
|
struct multiq_sched_data *q = qdisc_priv(sch);
|
|
|
|
if (cl)
|
|
return NULL;
|
|
return q->block;
|
|
}
|
|
|
|
static const struct Qdisc_class_ops multiq_class_ops = {
|
|
.graft = multiq_graft,
|
|
.leaf = multiq_leaf,
|
|
.get = multiq_get,
|
|
.put = multiq_put,
|
|
.walk = multiq_walk,
|
|
.tcf_block = multiq_tcf_block,
|
|
.bind_tcf = multiq_bind,
|
|
.unbind_tcf = multiq_put,
|
|
.dump = multiq_dump_class,
|
|
.dump_stats = multiq_dump_class_stats,
|
|
};
|
|
|
|
static struct Qdisc_ops multiq_qdisc_ops __read_mostly = {
|
|
.next = NULL,
|
|
.cl_ops = &multiq_class_ops,
|
|
.id = "multiq",
|
|
.priv_size = sizeof(struct multiq_sched_data),
|
|
.enqueue = multiq_enqueue,
|
|
.dequeue = multiq_dequeue,
|
|
.peek = multiq_peek,
|
|
.init = multiq_init,
|
|
.reset = multiq_reset,
|
|
.destroy = multiq_destroy,
|
|
.change = multiq_tune,
|
|
.dump = multiq_dump,
|
|
.owner = THIS_MODULE,
|
|
};
|
|
|
|
static int __init multiq_module_init(void)
|
|
{
|
|
return register_qdisc(&multiq_qdisc_ops);
|
|
}
|
|
|
|
static void __exit multiq_module_exit(void)
|
|
{
|
|
unregister_qdisc(&multiq_qdisc_ops);
|
|
}
|
|
|
|
module_init(multiq_module_init)
|
|
module_exit(multiq_module_exit)
|
|
|
|
MODULE_LICENSE("GPL");
|