Fix qlen underrun when doing duplication with netem. If netem is used as leaf discipline, then the parent needs to be tweaked when packets are duplicated. Signed-off-by: Stephen Hemminger <shemminger@osdl.org> Signed-off-by: David S. Miller <davem@davemloft.net>
		
			
				
	
	
		
			1298 lines
		
	
	
		
			30 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			1298 lines
		
	
	
		
			30 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 * net/sched/sch_api.c	Packet scheduler API.
 | 
						|
 *
 | 
						|
 *		This program is free software; you can redistribute it and/or
 | 
						|
 *		modify it under the terms of the GNU General Public License
 | 
						|
 *		as published by the Free Software Foundation; either version
 | 
						|
 *		2 of the License, or (at your option) any later version.
 | 
						|
 *
 | 
						|
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 | 
						|
 *
 | 
						|
 * Fixes:
 | 
						|
 *
 | 
						|
 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
 | 
						|
 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
 | 
						|
 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
 | 
						|
 */
 | 
						|
 | 
						|
#include <linux/config.h>
 | 
						|
#include <linux/module.h>
 | 
						|
#include <linux/types.h>
 | 
						|
#include <linux/kernel.h>
 | 
						|
#include <linux/sched.h>
 | 
						|
#include <linux/string.h>
 | 
						|
#include <linux/mm.h>
 | 
						|
#include <linux/socket.h>
 | 
						|
#include <linux/sockios.h>
 | 
						|
#include <linux/in.h>
 | 
						|
#include <linux/errno.h>
 | 
						|
#include <linux/interrupt.h>
 | 
						|
#include <linux/netdevice.h>
 | 
						|
#include <linux/skbuff.h>
 | 
						|
#include <linux/rtnetlink.h>
 | 
						|
#include <linux/init.h>
 | 
						|
#include <linux/proc_fs.h>
 | 
						|
#include <linux/seq_file.h>
 | 
						|
#include <linux/kmod.h>
 | 
						|
#include <linux/list.h>
 | 
						|
#include <linux/bitops.h>
 | 
						|
 | 
						|
#include <net/sock.h>
 | 
						|
#include <net/pkt_sched.h>
 | 
						|
 | 
						|
#include <asm/processor.h>
 | 
						|
#include <asm/uaccess.h>
 | 
						|
#include <asm/system.h>
 | 
						|
 | 
						|
static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
 | 
						|
			struct Qdisc *old, struct Qdisc *new);
 | 
						|
static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
 | 
						|
			 struct Qdisc *q, unsigned long cl, int event);
 | 
						|
 | 
						|
/*
 | 
						|
 | 
						|
   Short review.
 | 
						|
   -------------
 | 
						|
 | 
						|
   This file consists of two interrelated parts:
 | 
						|
 | 
						|
   1. queueing disciplines manager frontend.
 | 
						|
   2. traffic classes manager frontend.
 | 
						|
 | 
						|
   Generally, queueing discipline ("qdisc") is a black box,
 | 
						|
   which is able to enqueue packets and to dequeue them (when
 | 
						|
   device is ready to send something) in order and at times
 | 
						|
   determined by algorithm hidden in it.
 | 
						|
 | 
						|
   qdisc's are divided to two categories:
 | 
						|
   - "queues", which have no internal structure visible from outside.
 | 
						|
   - "schedulers", which split all the packets to "traffic classes",
 | 
						|
     using "packet classifiers" (look at cls_api.c)
 | 
						|
 | 
						|
   In turn, classes may have child qdiscs (as rule, queues)
 | 
						|
   attached to them etc. etc. etc.
 | 
						|
 | 
						|
   The goal of the routines in this file is to translate
 | 
						|
   information supplied by user in the form of handles
 | 
						|
   to more intelligible for kernel form, to make some sanity
 | 
						|
   checks and part of work, which is common to all qdiscs
 | 
						|
   and to provide rtnetlink notifications.
 | 
						|
 | 
						|
   All real intelligent work is done inside qdisc modules.
 | 
						|
 | 
						|
 | 
						|
 | 
						|
   Every discipline has two major routines: enqueue and dequeue.
 | 
						|
 | 
						|
   ---dequeue
 | 
						|
 | 
						|
   dequeue usually returns a skb to send. It is allowed to return NULL,
 | 
						|
   but it does not mean that queue is empty, it just means that
 | 
						|
   discipline does not want to send anything this time.
 | 
						|
   Queue is really empty if q->q.qlen == 0.
 | 
						|
   For complicated disciplines with multiple queues q->q is not
 | 
						|
   real packet queue, but however q->q.qlen must be valid.
 | 
						|
 | 
						|
   ---enqueue
 | 
						|
 | 
						|
   enqueue returns 0, if packet was enqueued successfully.
 | 
						|
   If packet (this one or another one) was dropped, it returns
 | 
						|
   not zero error code.
 | 
						|
   NET_XMIT_DROP 	- this packet dropped
 | 
						|
     Expected action: do not backoff, but wait until queue will clear.
 | 
						|
   NET_XMIT_CN	 	- probably this packet enqueued, but another one dropped.
 | 
						|
     Expected action: backoff or ignore
 | 
						|
   NET_XMIT_POLICED	- dropped by police.
 | 
						|
     Expected action: backoff or error to real-time apps.
 | 
						|
 | 
						|
   Auxiliary routines:
 | 
						|
 | 
						|
   ---requeue
 | 
						|
 | 
						|
   requeues once dequeued packet. It is used for non-standard or
 | 
						|
   just buggy devices, which can defer output even if dev->tbusy=0.
 | 
						|
 | 
						|
   ---reset
 | 
						|
 | 
						|
   returns qdisc to initial state: purge all buffers, clear all
 | 
						|
   timers, counters (except for statistics) etc.
 | 
						|
 | 
						|
   ---init
 | 
						|
 | 
						|
   initializes newly created qdisc.
 | 
						|
 | 
						|
   ---destroy
 | 
						|
 | 
						|
   destroys resources allocated by init and during lifetime of qdisc.
 | 
						|
 | 
						|
   ---change
 | 
						|
 | 
						|
   changes qdisc parameters.
 | 
						|
 */
 | 
						|
 | 
						|
/* Protects list of registered TC modules. It is pure SMP lock. */
 | 
						|
static DEFINE_RWLOCK(qdisc_mod_lock);
 | 
						|
 | 
						|
 | 
						|
/************************************************
 | 
						|
 *	Queueing disciplines manipulation.	*
 | 
						|
 ************************************************/
 | 
						|
 | 
						|
 | 
						|
/* The list of all installed queueing disciplines. */
 | 
						|
 | 
						|
static struct Qdisc_ops *qdisc_base;
 | 
						|
 | 
						|
/* Register/uregister queueing discipline */
 | 
						|
 | 
						|
int register_qdisc(struct Qdisc_ops *qops)
 | 
						|
{
 | 
						|
	struct Qdisc_ops *q, **qp;
 | 
						|
	int rc = -EEXIST;
 | 
						|
 | 
						|
	write_lock(&qdisc_mod_lock);
 | 
						|
	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
 | 
						|
		if (!strcmp(qops->id, q->id))
 | 
						|
			goto out;
 | 
						|
 | 
						|
	if (qops->enqueue == NULL)
 | 
						|
		qops->enqueue = noop_qdisc_ops.enqueue;
 | 
						|
	if (qops->requeue == NULL)
 | 
						|
		qops->requeue = noop_qdisc_ops.requeue;
 | 
						|
	if (qops->dequeue == NULL)
 | 
						|
		qops->dequeue = noop_qdisc_ops.dequeue;
 | 
						|
 | 
						|
	qops->next = NULL;
 | 
						|
	*qp = qops;
 | 
						|
	rc = 0;
 | 
						|
out:
 | 
						|
	write_unlock(&qdisc_mod_lock);
 | 
						|
	return rc;
 | 
						|
}
 | 
						|
 | 
						|
int unregister_qdisc(struct Qdisc_ops *qops)
 | 
						|
{
 | 
						|
	struct Qdisc_ops *q, **qp;
 | 
						|
	int err = -ENOENT;
 | 
						|
 | 
						|
	write_lock(&qdisc_mod_lock);
 | 
						|
	for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
 | 
						|
		if (q == qops)
 | 
						|
			break;
 | 
						|
	if (q) {
 | 
						|
		*qp = q->next;
 | 
						|
		q->next = NULL;
 | 
						|
		err = 0;
 | 
						|
	}
 | 
						|
	write_unlock(&qdisc_mod_lock);
 | 
						|
	return err;
 | 
						|
}
 | 
						|
 | 
						|
/* We know handle. Find qdisc among all qdisc's attached to device
 | 
						|
   (root qdisc, all its children, children of children etc.)
 | 
						|
 */
 | 
						|
 | 
						|
struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
 | 
						|
{
 | 
						|
	struct Qdisc *q;
 | 
						|
 | 
						|
	read_lock_bh(&qdisc_tree_lock);
 | 
						|
	list_for_each_entry(q, &dev->qdisc_list, list) {
 | 
						|
		if (q->handle == handle) {
 | 
						|
			read_unlock_bh(&qdisc_tree_lock);
 | 
						|
			return q;
 | 
						|
		}
 | 
						|
	}
 | 
						|
	read_unlock_bh(&qdisc_tree_lock);
 | 
						|
	return NULL;
 | 
						|
}
 | 
						|
 | 
						|
static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
 | 
						|
{
 | 
						|
	unsigned long cl;
 | 
						|
	struct Qdisc *leaf;
 | 
						|
	struct Qdisc_class_ops *cops = p->ops->cl_ops;
 | 
						|
 | 
						|
	if (cops == NULL)
 | 
						|
		return NULL;
 | 
						|
	cl = cops->get(p, classid);
 | 
						|
 | 
						|
	if (cl == 0)
 | 
						|
		return NULL;
 | 
						|
	leaf = cops->leaf(p, cl);
 | 
						|
	cops->put(p, cl);
 | 
						|
	return leaf;
 | 
						|
}
 | 
						|
 | 
						|
/* Find queueing discipline by name */
 | 
						|
 | 
						|
static struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind)
 | 
						|
{
 | 
						|
	struct Qdisc_ops *q = NULL;
 | 
						|
 | 
						|
	if (kind) {
 | 
						|
		read_lock(&qdisc_mod_lock);
 | 
						|
		for (q = qdisc_base; q; q = q->next) {
 | 
						|
			if (rtattr_strcmp(kind, q->id) == 0) {
 | 
						|
				if (!try_module_get(q->owner))
 | 
						|
					q = NULL;
 | 
						|
				break;
 | 
						|
			}
 | 
						|
		}
 | 
						|
		read_unlock(&qdisc_mod_lock);
 | 
						|
	}
 | 
						|
	return q;
 | 
						|
}
 | 
						|
 | 
						|
static struct qdisc_rate_table *qdisc_rtab_list;
 | 
						|
 | 
						|
struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab)
 | 
						|
{
 | 
						|
	struct qdisc_rate_table *rtab;
 | 
						|
 | 
						|
	for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
 | 
						|
		if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
 | 
						|
			rtab->refcnt++;
 | 
						|
			return rtab;
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	if (tab == NULL || r->rate == 0 || r->cell_log == 0 || RTA_PAYLOAD(tab) != 1024)
 | 
						|
		return NULL;
 | 
						|
 | 
						|
	rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
 | 
						|
	if (rtab) {
 | 
						|
		rtab->rate = *r;
 | 
						|
		rtab->refcnt = 1;
 | 
						|
		memcpy(rtab->data, RTA_DATA(tab), 1024);
 | 
						|
		rtab->next = qdisc_rtab_list;
 | 
						|
		qdisc_rtab_list = rtab;
 | 
						|
	}
 | 
						|
	return rtab;
 | 
						|
}
 | 
						|
 | 
						|
void qdisc_put_rtab(struct qdisc_rate_table *tab)
 | 
						|
{
 | 
						|
	struct qdisc_rate_table *rtab, **rtabp;
 | 
						|
 | 
						|
	if (!tab || --tab->refcnt)
 | 
						|
		return;
 | 
						|
 | 
						|
	for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
 | 
						|
		if (rtab == tab) {
 | 
						|
			*rtabp = rtab->next;
 | 
						|
			kfree(rtab);
 | 
						|
			return;
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* Allocate an unique handle from space managed by kernel */
 | 
						|
 | 
						|
static u32 qdisc_alloc_handle(struct net_device *dev)
 | 
						|
{
 | 
						|
	int i = 0x10000;
 | 
						|
	static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
 | 
						|
 | 
						|
	do {
 | 
						|
		autohandle += TC_H_MAKE(0x10000U, 0);
 | 
						|
		if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
 | 
						|
			autohandle = TC_H_MAKE(0x80000000U, 0);
 | 
						|
	} while	(qdisc_lookup(dev, autohandle) && --i > 0);
 | 
						|
 | 
						|
	return i>0 ? autohandle : 0;
 | 
						|
}
 | 
						|
 | 
						|
/* Attach toplevel qdisc to device dev */
 | 
						|
 | 
						|
static struct Qdisc *
 | 
						|
dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
 | 
						|
{
 | 
						|
	struct Qdisc *oqdisc;
 | 
						|
 | 
						|
	if (dev->flags & IFF_UP)
 | 
						|
		dev_deactivate(dev);
 | 
						|
 | 
						|
	qdisc_lock_tree(dev);
 | 
						|
	if (qdisc && qdisc->flags&TCQ_F_INGRESS) {
 | 
						|
		oqdisc = dev->qdisc_ingress;
 | 
						|
		/* Prune old scheduler */
 | 
						|
		if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) {
 | 
						|
			/* delete */
 | 
						|
			qdisc_reset(oqdisc);
 | 
						|
			dev->qdisc_ingress = NULL;
 | 
						|
		} else {  /* new */
 | 
						|
			dev->qdisc_ingress = qdisc;
 | 
						|
		}
 | 
						|
 | 
						|
	} else {
 | 
						|
 | 
						|
		oqdisc = dev->qdisc_sleeping;
 | 
						|
 | 
						|
		/* Prune old scheduler */
 | 
						|
		if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
 | 
						|
			qdisc_reset(oqdisc);
 | 
						|
 | 
						|
		/* ... and graft new one */
 | 
						|
		if (qdisc == NULL)
 | 
						|
			qdisc = &noop_qdisc;
 | 
						|
		dev->qdisc_sleeping = qdisc;
 | 
						|
		dev->qdisc = &noop_qdisc;
 | 
						|
	}
 | 
						|
 | 
						|
	qdisc_unlock_tree(dev);
 | 
						|
 | 
						|
	if (dev->flags & IFF_UP)
 | 
						|
		dev_activate(dev);
 | 
						|
 | 
						|
	return oqdisc;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* Graft qdisc "new" to class "classid" of qdisc "parent" or
 | 
						|
   to device "dev".
 | 
						|
 | 
						|
   Old qdisc is not destroyed but returned in *old.
 | 
						|
 */
 | 
						|
 | 
						|
static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 | 
						|
		       u32 classid,
 | 
						|
		       struct Qdisc *new, struct Qdisc **old)
 | 
						|
{
 | 
						|
	int err = 0;
 | 
						|
	struct Qdisc *q = *old;
 | 
						|
 | 
						|
 | 
						|
	if (parent == NULL) { 
 | 
						|
		if (q && q->flags&TCQ_F_INGRESS) {
 | 
						|
			*old = dev_graft_qdisc(dev, q);
 | 
						|
		} else {
 | 
						|
			*old = dev_graft_qdisc(dev, new);
 | 
						|
		}
 | 
						|
	} else {
 | 
						|
		struct Qdisc_class_ops *cops = parent->ops->cl_ops;
 | 
						|
 | 
						|
		err = -EINVAL;
 | 
						|
 | 
						|
		if (cops) {
 | 
						|
			unsigned long cl = cops->get(parent, classid);
 | 
						|
			if (cl) {
 | 
						|
				err = cops->graft(parent, cl, new, old);
 | 
						|
				if (new)
 | 
						|
					new->parent = classid;
 | 
						|
				cops->put(parent, cl);
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return err;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
   Allocate and initialize new qdisc.
 | 
						|
 | 
						|
   Parameters are passed via opt.
 | 
						|
 */
 | 
						|
 | 
						|
static struct Qdisc *
 | 
						|
qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
 | 
						|
{
 | 
						|
	int err;
 | 
						|
	struct rtattr *kind = tca[TCA_KIND-1];
 | 
						|
	void *p = NULL;
 | 
						|
	struct Qdisc *sch;
 | 
						|
	struct Qdisc_ops *ops;
 | 
						|
	int size;
 | 
						|
 | 
						|
	ops = qdisc_lookup_ops(kind);
 | 
						|
#ifdef CONFIG_KMOD
 | 
						|
	if (ops == NULL && kind != NULL) {
 | 
						|
		char name[IFNAMSIZ];
 | 
						|
		if (rtattr_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
 | 
						|
			/* We dropped the RTNL semaphore in order to
 | 
						|
			 * perform the module load.  So, even if we
 | 
						|
			 * succeeded in loading the module we have to
 | 
						|
			 * tell the caller to replay the request.  We
 | 
						|
			 * indicate this using -EAGAIN.
 | 
						|
			 * We replay the request because the device may
 | 
						|
			 * go away in the mean time.
 | 
						|
			 */
 | 
						|
			rtnl_unlock();
 | 
						|
			request_module("sch_%s", name);
 | 
						|
			rtnl_lock();
 | 
						|
			ops = qdisc_lookup_ops(kind);
 | 
						|
			if (ops != NULL) {
 | 
						|
				/* We will try again qdisc_lookup_ops,
 | 
						|
				 * so don't keep a reference.
 | 
						|
				 */
 | 
						|
				module_put(ops->owner);
 | 
						|
				err = -EAGAIN;
 | 
						|
				goto err_out;
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
#endif
 | 
						|
 | 
						|
	err = -EINVAL;
 | 
						|
	if (ops == NULL)
 | 
						|
		goto err_out;
 | 
						|
 | 
						|
	/* ensure that the Qdisc and the private data are 32-byte aligned */
 | 
						|
	size = ((sizeof(*sch) + QDISC_ALIGN_CONST) & ~QDISC_ALIGN_CONST);
 | 
						|
	size += ops->priv_size + QDISC_ALIGN_CONST;
 | 
						|
 | 
						|
	p = kmalloc(size, GFP_KERNEL);
 | 
						|
	err = -ENOBUFS;
 | 
						|
	if (!p)
 | 
						|
		goto err_out2;
 | 
						|
	memset(p, 0, size);
 | 
						|
	sch = (struct Qdisc *)(((unsigned long)p + QDISC_ALIGN_CONST)
 | 
						|
	                       & ~QDISC_ALIGN_CONST);
 | 
						|
	sch->padded = (char *)sch - (char *)p;
 | 
						|
 | 
						|
	INIT_LIST_HEAD(&sch->list);
 | 
						|
	skb_queue_head_init(&sch->q);
 | 
						|
 | 
						|
	if (handle == TC_H_INGRESS)
 | 
						|
		sch->flags |= TCQ_F_INGRESS;
 | 
						|
 | 
						|
	sch->ops = ops;
 | 
						|
	sch->enqueue = ops->enqueue;
 | 
						|
	sch->dequeue = ops->dequeue;
 | 
						|
	sch->dev = dev;
 | 
						|
	dev_hold(dev);
 | 
						|
	atomic_set(&sch->refcnt, 1);
 | 
						|
	sch->stats_lock = &dev->queue_lock;
 | 
						|
	if (handle == 0) {
 | 
						|
		handle = qdisc_alloc_handle(dev);
 | 
						|
		err = -ENOMEM;
 | 
						|
		if (handle == 0)
 | 
						|
			goto err_out3;
 | 
						|
	}
 | 
						|
 | 
						|
	if (handle == TC_H_INGRESS)
 | 
						|
                sch->handle =TC_H_MAKE(TC_H_INGRESS, 0);
 | 
						|
        else
 | 
						|
                sch->handle = handle;
 | 
						|
 | 
						|
	if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
 | 
						|
		qdisc_lock_tree(dev);
 | 
						|
		list_add_tail(&sch->list, &dev->qdisc_list);
 | 
						|
		qdisc_unlock_tree(dev);
 | 
						|
 | 
						|
#ifdef CONFIG_NET_ESTIMATOR
 | 
						|
		if (tca[TCA_RATE-1])
 | 
						|
			gen_new_estimator(&sch->bstats, &sch->rate_est,
 | 
						|
				sch->stats_lock, tca[TCA_RATE-1]);
 | 
						|
#endif
 | 
						|
		return sch;
 | 
						|
	}
 | 
						|
err_out3:
 | 
						|
	dev_put(dev);
 | 
						|
err_out2:
 | 
						|
	module_put(ops->owner);
 | 
						|
err_out:
 | 
						|
	*errp = err;
 | 
						|
	if (p)
 | 
						|
		kfree(p);
 | 
						|
	return NULL;
 | 
						|
}
 | 
						|
 | 
						|
static int qdisc_change(struct Qdisc *sch, struct rtattr **tca)
 | 
						|
{
 | 
						|
	if (tca[TCA_OPTIONS-1]) {
 | 
						|
		int err;
 | 
						|
 | 
						|
		if (sch->ops->change == NULL)
 | 
						|
			return -EINVAL;
 | 
						|
		err = sch->ops->change(sch, tca[TCA_OPTIONS-1]);
 | 
						|
		if (err)
 | 
						|
			return err;
 | 
						|
	}
 | 
						|
#ifdef CONFIG_NET_ESTIMATOR
 | 
						|
	if (tca[TCA_RATE-1])
 | 
						|
		gen_replace_estimator(&sch->bstats, &sch->rate_est,
 | 
						|
			sch->stats_lock, tca[TCA_RATE-1]);
 | 
						|
#endif
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
struct check_loop_arg
 | 
						|
{
 | 
						|
	struct qdisc_walker 	w;
 | 
						|
	struct Qdisc		*p;
 | 
						|
	int			depth;
 | 
						|
};
 | 
						|
 | 
						|
static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
 | 
						|
 | 
						|
static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
 | 
						|
{
 | 
						|
	struct check_loop_arg	arg;
 | 
						|
 | 
						|
	if (q->ops->cl_ops == NULL)
 | 
						|
		return 0;
 | 
						|
 | 
						|
	arg.w.stop = arg.w.skip = arg.w.count = 0;
 | 
						|
	arg.w.fn = check_loop_fn;
 | 
						|
	arg.depth = depth;
 | 
						|
	arg.p = p;
 | 
						|
	q->ops->cl_ops->walk(q, &arg.w);
 | 
						|
	return arg.w.stop ? -ELOOP : 0;
 | 
						|
}
 | 
						|
 | 
						|
static int
 | 
						|
check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
 | 
						|
{
 | 
						|
	struct Qdisc *leaf;
 | 
						|
	struct Qdisc_class_ops *cops = q->ops->cl_ops;
 | 
						|
	struct check_loop_arg *arg = (struct check_loop_arg *)w;
 | 
						|
 | 
						|
	leaf = cops->leaf(q, cl);
 | 
						|
	if (leaf) {
 | 
						|
		if (leaf == arg->p || arg->depth > 7)
 | 
						|
			return -ELOOP;
 | 
						|
		return check_loop(leaf, arg->p, arg->depth + 1);
 | 
						|
	}
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Delete/get qdisc.
 | 
						|
 */
 | 
						|
 | 
						|
static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 | 
						|
{
 | 
						|
	struct tcmsg *tcm = NLMSG_DATA(n);
 | 
						|
	struct rtattr **tca = arg;
 | 
						|
	struct net_device *dev;
 | 
						|
	u32 clid = tcm->tcm_parent;
 | 
						|
	struct Qdisc *q = NULL;
 | 
						|
	struct Qdisc *p = NULL;
 | 
						|
	int err;
 | 
						|
 | 
						|
	if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
 | 
						|
		return -ENODEV;
 | 
						|
 | 
						|
	if (clid) {
 | 
						|
		if (clid != TC_H_ROOT) {
 | 
						|
			if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
 | 
						|
				if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
 | 
						|
					return -ENOENT;
 | 
						|
				q = qdisc_leaf(p, clid);
 | 
						|
			} else { /* ingress */
 | 
						|
				q = dev->qdisc_ingress;
 | 
						|
                        }
 | 
						|
		} else {
 | 
						|
			q = dev->qdisc_sleeping;
 | 
						|
		}
 | 
						|
		if (!q)
 | 
						|
			return -ENOENT;
 | 
						|
 | 
						|
		if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
 | 
						|
			return -EINVAL;
 | 
						|
	} else {
 | 
						|
		if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
 | 
						|
			return -ENOENT;
 | 
						|
	}
 | 
						|
 | 
						|
	if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
 | 
						|
		return -EINVAL;
 | 
						|
 | 
						|
	if (n->nlmsg_type == RTM_DELQDISC) {
 | 
						|
		if (!clid)
 | 
						|
			return -EINVAL;
 | 
						|
		if (q->handle == 0)
 | 
						|
			return -ENOENT;
 | 
						|
		if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0)
 | 
						|
			return err;
 | 
						|
		if (q) {
 | 
						|
			qdisc_notify(skb, n, clid, q, NULL);
 | 
						|
			spin_lock_bh(&dev->queue_lock);
 | 
						|
			qdisc_destroy(q);
 | 
						|
			spin_unlock_bh(&dev->queue_lock);
 | 
						|
		}
 | 
						|
	} else {
 | 
						|
		qdisc_notify(skb, n, clid, NULL, q);
 | 
						|
	}
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
   Create/change qdisc.
 | 
						|
 */
 | 
						|
 | 
						|
static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 | 
						|
{
 | 
						|
	struct tcmsg *tcm;
 | 
						|
	struct rtattr **tca;
 | 
						|
	struct net_device *dev;
 | 
						|
	u32 clid;
 | 
						|
	struct Qdisc *q, *p;
 | 
						|
	int err;
 | 
						|
 | 
						|
replay:
 | 
						|
	/* Reinit, just in case something touches this. */
 | 
						|
	tcm = NLMSG_DATA(n);
 | 
						|
	tca = arg;
 | 
						|
	clid = tcm->tcm_parent;
 | 
						|
	q = p = NULL;
 | 
						|
 | 
						|
	if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
 | 
						|
		return -ENODEV;
 | 
						|
 | 
						|
	if (clid) {
 | 
						|
		if (clid != TC_H_ROOT) {
 | 
						|
			if (clid != TC_H_INGRESS) {
 | 
						|
				if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
 | 
						|
					return -ENOENT;
 | 
						|
				q = qdisc_leaf(p, clid);
 | 
						|
			} else { /*ingress */
 | 
						|
				q = dev->qdisc_ingress;
 | 
						|
			}
 | 
						|
		} else {
 | 
						|
			q = dev->qdisc_sleeping;
 | 
						|
		}
 | 
						|
 | 
						|
		/* It may be default qdisc, ignore it */
 | 
						|
		if (q && q->handle == 0)
 | 
						|
			q = NULL;
 | 
						|
 | 
						|
		if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
 | 
						|
			if (tcm->tcm_handle) {
 | 
						|
				if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
 | 
						|
					return -EEXIST;
 | 
						|
				if (TC_H_MIN(tcm->tcm_handle))
 | 
						|
					return -EINVAL;
 | 
						|
				if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
 | 
						|
					goto create_n_graft;
 | 
						|
				if (n->nlmsg_flags&NLM_F_EXCL)
 | 
						|
					return -EEXIST;
 | 
						|
				if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
 | 
						|
					return -EINVAL;
 | 
						|
				if (q == p ||
 | 
						|
				    (p && check_loop(q, p, 0)))
 | 
						|
					return -ELOOP;
 | 
						|
				atomic_inc(&q->refcnt);
 | 
						|
				goto graft;
 | 
						|
			} else {
 | 
						|
				if (q == NULL)
 | 
						|
					goto create_n_graft;
 | 
						|
 | 
						|
				/* This magic test requires explanation.
 | 
						|
				 *
 | 
						|
				 *   We know, that some child q is already
 | 
						|
				 *   attached to this parent and have choice:
 | 
						|
				 *   either to change it or to create/graft new one.
 | 
						|
				 *
 | 
						|
				 *   1. We are allowed to create/graft only
 | 
						|
				 *   if CREATE and REPLACE flags are set.
 | 
						|
				 *
 | 
						|
				 *   2. If EXCL is set, requestor wanted to say,
 | 
						|
				 *   that qdisc tcm_handle is not expected
 | 
						|
				 *   to exist, so that we choose create/graft too.
 | 
						|
				 *
 | 
						|
				 *   3. The last case is when no flags are set.
 | 
						|
				 *   Alas, it is sort of hole in API, we
 | 
						|
				 *   cannot decide what to do unambiguously.
 | 
						|
				 *   For now we select create/graft, if
 | 
						|
				 *   user gave KIND, which does not match existing.
 | 
						|
				 */
 | 
						|
				if ((n->nlmsg_flags&NLM_F_CREATE) &&
 | 
						|
				    (n->nlmsg_flags&NLM_F_REPLACE) &&
 | 
						|
				    ((n->nlmsg_flags&NLM_F_EXCL) ||
 | 
						|
				     (tca[TCA_KIND-1] &&
 | 
						|
				      rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))))
 | 
						|
					goto create_n_graft;
 | 
						|
			}
 | 
						|
		}
 | 
						|
	} else {
 | 
						|
		if (!tcm->tcm_handle)
 | 
						|
			return -EINVAL;
 | 
						|
		q = qdisc_lookup(dev, tcm->tcm_handle);
 | 
						|
	}
 | 
						|
 | 
						|
	/* Change qdisc parameters */
 | 
						|
	if (q == NULL)
 | 
						|
		return -ENOENT;
 | 
						|
	if (n->nlmsg_flags&NLM_F_EXCL)
 | 
						|
		return -EEXIST;
 | 
						|
	if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
 | 
						|
		return -EINVAL;
 | 
						|
	err = qdisc_change(q, tca);
 | 
						|
	if (err == 0)
 | 
						|
		qdisc_notify(skb, n, clid, NULL, q);
 | 
						|
	return err;
 | 
						|
 | 
						|
create_n_graft:
 | 
						|
	if (!(n->nlmsg_flags&NLM_F_CREATE))
 | 
						|
		return -ENOENT;
 | 
						|
	if (clid == TC_H_INGRESS)
 | 
						|
		q = qdisc_create(dev, tcm->tcm_parent, tca, &err);
 | 
						|
        else
 | 
						|
		q = qdisc_create(dev, tcm->tcm_handle, tca, &err);
 | 
						|
	if (q == NULL) {
 | 
						|
		if (err == -EAGAIN)
 | 
						|
			goto replay;
 | 
						|
		return err;
 | 
						|
	}
 | 
						|
 | 
						|
graft:
 | 
						|
	if (1) {
 | 
						|
		struct Qdisc *old_q = NULL;
 | 
						|
		err = qdisc_graft(dev, p, clid, q, &old_q);
 | 
						|
		if (err) {
 | 
						|
			if (q) {
 | 
						|
				spin_lock_bh(&dev->queue_lock);
 | 
						|
				qdisc_destroy(q);
 | 
						|
				spin_unlock_bh(&dev->queue_lock);
 | 
						|
			}
 | 
						|
			return err;
 | 
						|
		}
 | 
						|
		qdisc_notify(skb, n, clid, old_q, q);
 | 
						|
		if (old_q) {
 | 
						|
			spin_lock_bh(&dev->queue_lock);
 | 
						|
			qdisc_destroy(old_q);
 | 
						|
			spin_unlock_bh(&dev->queue_lock);
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 | 
						|
			 u32 pid, u32 seq, unsigned flags, int event)
 | 
						|
{
 | 
						|
	struct tcmsg *tcm;
 | 
						|
	struct nlmsghdr  *nlh;
 | 
						|
	unsigned char	 *b = skb->tail;
 | 
						|
	struct gnet_dump d;
 | 
						|
 | 
						|
	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
 | 
						|
	nlh->nlmsg_flags = flags;
 | 
						|
	tcm = NLMSG_DATA(nlh);
 | 
						|
	tcm->tcm_family = AF_UNSPEC;
 | 
						|
	tcm->tcm_ifindex = q->dev->ifindex;
 | 
						|
	tcm->tcm_parent = clid;
 | 
						|
	tcm->tcm_handle = q->handle;
 | 
						|
	tcm->tcm_info = atomic_read(&q->refcnt);
 | 
						|
	RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
 | 
						|
	if (q->ops->dump && q->ops->dump(q, skb) < 0)
 | 
						|
		goto rtattr_failure;
 | 
						|
	q->qstats.qlen = q->q.qlen;
 | 
						|
 | 
						|
	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
 | 
						|
			TCA_XSTATS, q->stats_lock, &d) < 0)
 | 
						|
		goto rtattr_failure;
 | 
						|
 | 
						|
	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
 | 
						|
		goto rtattr_failure;
 | 
						|
 | 
						|
	if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
 | 
						|
#ifdef CONFIG_NET_ESTIMATOR
 | 
						|
	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
 | 
						|
#endif
 | 
						|
	    gnet_stats_copy_queue(&d, &q->qstats) < 0)
 | 
						|
		goto rtattr_failure;
 | 
						|
	
 | 
						|
	if (gnet_stats_finish_copy(&d) < 0)
 | 
						|
		goto rtattr_failure;
 | 
						|
	
 | 
						|
	nlh->nlmsg_len = skb->tail - b;
 | 
						|
	return skb->len;
 | 
						|
 | 
						|
nlmsg_failure:
 | 
						|
rtattr_failure:
 | 
						|
	skb_trim(skb, b - skb->data);
 | 
						|
	return -1;
 | 
						|
}
 | 
						|
 | 
						|
static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
 | 
						|
			u32 clid, struct Qdisc *old, struct Qdisc *new)
 | 
						|
{
 | 
						|
	struct sk_buff *skb;
 | 
						|
	u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
 | 
						|
 | 
						|
	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 | 
						|
	if (!skb)
 | 
						|
		return -ENOBUFS;
 | 
						|
 | 
						|
	if (old && old->handle) {
 | 
						|
		if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
 | 
						|
			goto err_out;
 | 
						|
	}
 | 
						|
	if (new) {
 | 
						|
		if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
 | 
						|
			goto err_out;
 | 
						|
	}
 | 
						|
 | 
						|
	if (skb->len)
 | 
						|
		return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
 | 
						|
 | 
						|
err_out:
 | 
						|
	kfree_skb(skb);
 | 
						|
	return -EINVAL;
 | 
						|
}
 | 
						|
 | 
						|
static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 | 
						|
{
 | 
						|
	int idx, q_idx;
 | 
						|
	int s_idx, s_q_idx;
 | 
						|
	struct net_device *dev;
 | 
						|
	struct Qdisc *q;
 | 
						|
 | 
						|
	s_idx = cb->args[0];
 | 
						|
	s_q_idx = q_idx = cb->args[1];
 | 
						|
	read_lock(&dev_base_lock);
 | 
						|
	for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
 | 
						|
		if (idx < s_idx)
 | 
						|
			continue;
 | 
						|
		if (idx > s_idx)
 | 
						|
			s_q_idx = 0;
 | 
						|
		read_lock_bh(&qdisc_tree_lock);
 | 
						|
		q_idx = 0;
 | 
						|
		list_for_each_entry(q, &dev->qdisc_list, list) {
 | 
						|
			if (q_idx < s_q_idx) {
 | 
						|
				q_idx++;
 | 
						|
				continue;
 | 
						|
			}
 | 
						|
			if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
 | 
						|
					  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) {
 | 
						|
				read_unlock_bh(&qdisc_tree_lock);
 | 
						|
				goto done;
 | 
						|
			}
 | 
						|
			q_idx++;
 | 
						|
		}
 | 
						|
		read_unlock_bh(&qdisc_tree_lock);
 | 
						|
	}
 | 
						|
 | 
						|
done:
 | 
						|
	read_unlock(&dev_base_lock);
 | 
						|
 | 
						|
	cb->args[0] = idx;
 | 
						|
	cb->args[1] = q_idx;
 | 
						|
 | 
						|
	return skb->len;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
/************************************************
 | 
						|
 *	Traffic classes manipulation.		*
 | 
						|
 ************************************************/
 | 
						|
 | 
						|
 | 
						|
 | 
						|
static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 | 
						|
{
 | 
						|
	struct tcmsg *tcm = NLMSG_DATA(n);
 | 
						|
	struct rtattr **tca = arg;
 | 
						|
	struct net_device *dev;
 | 
						|
	struct Qdisc *q = NULL;
 | 
						|
	struct Qdisc_class_ops *cops;
 | 
						|
	unsigned long cl = 0;
 | 
						|
	unsigned long new_cl;
 | 
						|
	u32 pid = tcm->tcm_parent;
 | 
						|
	u32 clid = tcm->tcm_handle;
 | 
						|
	u32 qid = TC_H_MAJ(clid);
 | 
						|
	int err;
 | 
						|
 | 
						|
	if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
 | 
						|
		return -ENODEV;
 | 
						|
 | 
						|
	/*
 | 
						|
	   parent == TC_H_UNSPEC - unspecified parent.
 | 
						|
	   parent == TC_H_ROOT   - class is root, which has no parent.
 | 
						|
	   parent == X:0	 - parent is root class.
 | 
						|
	   parent == X:Y	 - parent is a node in hierarchy.
 | 
						|
	   parent == 0:Y	 - parent is X:Y, where X:0 is qdisc.
 | 
						|
 | 
						|
	   handle == 0:0	 - generate handle from kernel pool.
 | 
						|
	   handle == 0:Y	 - class is X:Y, where X:0 is qdisc.
 | 
						|
	   handle == X:Y	 - clear.
 | 
						|
	   handle == X:0	 - root class.
 | 
						|
	 */
 | 
						|
 | 
						|
	/* Step 1. Determine qdisc handle X:0 */
 | 
						|
 | 
						|
	if (pid != TC_H_ROOT) {
 | 
						|
		u32 qid1 = TC_H_MAJ(pid);
 | 
						|
 | 
						|
		if (qid && qid1) {
 | 
						|
			/* If both majors are known, they must be identical. */
 | 
						|
			if (qid != qid1)
 | 
						|
				return -EINVAL;
 | 
						|
		} else if (qid1) {
 | 
						|
			qid = qid1;
 | 
						|
		} else if (qid == 0)
 | 
						|
			qid = dev->qdisc_sleeping->handle;
 | 
						|
 | 
						|
		/* Now qid is genuine qdisc handle consistent
 | 
						|
		   both with parent and child.
 | 
						|
 | 
						|
		   TC_H_MAJ(pid) still may be unspecified, complete it now.
 | 
						|
		 */
 | 
						|
		if (pid)
 | 
						|
			pid = TC_H_MAKE(qid, pid);
 | 
						|
	} else {
 | 
						|
		if (qid == 0)
 | 
						|
			qid = dev->qdisc_sleeping->handle;
 | 
						|
	}
 | 
						|
 | 
						|
	/* OK. Locate qdisc */
 | 
						|
	if ((q = qdisc_lookup(dev, qid)) == NULL) 
 | 
						|
		return -ENOENT;
 | 
						|
 | 
						|
	/* An check that it supports classes */
 | 
						|
	cops = q->ops->cl_ops;
 | 
						|
	if (cops == NULL)
 | 
						|
		return -EINVAL;
 | 
						|
 | 
						|
	/* Now try to get class */
 | 
						|
	if (clid == 0) {
 | 
						|
		if (pid == TC_H_ROOT)
 | 
						|
			clid = qid;
 | 
						|
	} else
 | 
						|
		clid = TC_H_MAKE(qid, clid);
 | 
						|
 | 
						|
	if (clid)
 | 
						|
		cl = cops->get(q, clid);
 | 
						|
 | 
						|
	if (cl == 0) {
 | 
						|
		err = -ENOENT;
 | 
						|
		if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
 | 
						|
			goto out;
 | 
						|
	} else {
 | 
						|
		switch (n->nlmsg_type) {
 | 
						|
		case RTM_NEWTCLASS:	
 | 
						|
			err = -EEXIST;
 | 
						|
			if (n->nlmsg_flags&NLM_F_EXCL)
 | 
						|
				goto out;
 | 
						|
			break;
 | 
						|
		case RTM_DELTCLASS:
 | 
						|
			err = cops->delete(q, cl);
 | 
						|
			if (err == 0)
 | 
						|
				tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
 | 
						|
			goto out;
 | 
						|
		case RTM_GETTCLASS:
 | 
						|
			err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
 | 
						|
			goto out;
 | 
						|
		default:
 | 
						|
			err = -EINVAL;
 | 
						|
			goto out;
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	new_cl = cl;
 | 
						|
	err = cops->change(q, clid, pid, tca, &new_cl);
 | 
						|
	if (err == 0)
 | 
						|
		tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
 | 
						|
 | 
						|
out:
 | 
						|
	if (cl)
 | 
						|
		cops->put(q, cl);
 | 
						|
 | 
						|
	return err;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
 | 
						|
			  unsigned long cl,
 | 
						|
			  u32 pid, u32 seq, unsigned flags, int event)
 | 
						|
{
 | 
						|
	struct tcmsg *tcm;
 | 
						|
	struct nlmsghdr  *nlh;
 | 
						|
	unsigned char	 *b = skb->tail;
 | 
						|
	struct gnet_dump d;
 | 
						|
	struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
 | 
						|
 | 
						|
	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
 | 
						|
	nlh->nlmsg_flags = flags;
 | 
						|
	tcm = NLMSG_DATA(nlh);
 | 
						|
	tcm->tcm_family = AF_UNSPEC;
 | 
						|
	tcm->tcm_ifindex = q->dev->ifindex;
 | 
						|
	tcm->tcm_parent = q->handle;
 | 
						|
	tcm->tcm_handle = q->handle;
 | 
						|
	tcm->tcm_info = 0;
 | 
						|
	RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
 | 
						|
	if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
 | 
						|
		goto rtattr_failure;
 | 
						|
 | 
						|
	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
 | 
						|
			TCA_XSTATS, q->stats_lock, &d) < 0)
 | 
						|
		goto rtattr_failure;
 | 
						|
 | 
						|
	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
 | 
						|
		goto rtattr_failure;
 | 
						|
 | 
						|
	if (gnet_stats_finish_copy(&d) < 0)
 | 
						|
		goto rtattr_failure;
 | 
						|
 | 
						|
	nlh->nlmsg_len = skb->tail - b;
 | 
						|
	return skb->len;
 | 
						|
 | 
						|
nlmsg_failure:
 | 
						|
rtattr_failure:
 | 
						|
	skb_trim(skb, b - skb->data);
 | 
						|
	return -1;
 | 
						|
}
 | 
						|
 | 
						|
static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
 | 
						|
			  struct Qdisc *q, unsigned long cl, int event)
 | 
						|
{
 | 
						|
	struct sk_buff *skb;
 | 
						|
	u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
 | 
						|
 | 
						|
	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 | 
						|
	if (!skb)
 | 
						|
		return -ENOBUFS;
 | 
						|
 | 
						|
	if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
 | 
						|
		kfree_skb(skb);
 | 
						|
		return -EINVAL;
 | 
						|
	}
 | 
						|
 | 
						|
	return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
 | 
						|
}
 | 
						|
 | 
						|
struct qdisc_dump_args
 | 
						|
{
 | 
						|
	struct qdisc_walker w;
 | 
						|
	struct sk_buff *skb;
 | 
						|
	struct netlink_callback *cb;
 | 
						|
};
 | 
						|
 | 
						|
static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
 | 
						|
{
 | 
						|
	struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
 | 
						|
 | 
						|
	return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
 | 
						|
			      a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
 | 
						|
}
 | 
						|
 | 
						|
static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 | 
						|
{
 | 
						|
	int t;
 | 
						|
	int s_t;
 | 
						|
	struct net_device *dev;
 | 
						|
	struct Qdisc *q;
 | 
						|
	struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
 | 
						|
	struct qdisc_dump_args arg;
 | 
						|
 | 
						|
	if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
 | 
						|
		return 0;
 | 
						|
	if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
 | 
						|
		return 0;
 | 
						|
 | 
						|
	s_t = cb->args[0];
 | 
						|
	t = 0;
 | 
						|
 | 
						|
	read_lock_bh(&qdisc_tree_lock);
 | 
						|
	list_for_each_entry(q, &dev->qdisc_list, list) {
 | 
						|
		if (t < s_t || !q->ops->cl_ops ||
 | 
						|
		    (tcm->tcm_parent &&
 | 
						|
		     TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
 | 
						|
			t++;
 | 
						|
			continue;
 | 
						|
		}
 | 
						|
		if (t > s_t)
 | 
						|
			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
 | 
						|
		arg.w.fn = qdisc_class_dump;
 | 
						|
		arg.skb = skb;
 | 
						|
		arg.cb = cb;
 | 
						|
		arg.w.stop  = 0;
 | 
						|
		arg.w.skip = cb->args[1];
 | 
						|
		arg.w.count = 0;
 | 
						|
		q->ops->cl_ops->walk(q, &arg.w);
 | 
						|
		cb->args[1] = arg.w.count;
 | 
						|
		if (arg.w.stop)
 | 
						|
			break;
 | 
						|
		t++;
 | 
						|
	}
 | 
						|
	read_unlock_bh(&qdisc_tree_lock);
 | 
						|
 | 
						|
	cb->args[0] = t;
 | 
						|
 | 
						|
	dev_put(dev);
 | 
						|
	return skb->len;
 | 
						|
}
 | 
						|
 | 
						|
/* Main classifier routine: scans classifier chain attached
 | 
						|
   to this qdisc, (optionally) tests for protocol and asks
 | 
						|
   specific classifiers.
 | 
						|
 */
 | 
						|
int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
 | 
						|
	struct tcf_result *res)
 | 
						|
{
 | 
						|
	int err = 0;
 | 
						|
	u32 protocol = skb->protocol;
 | 
						|
#ifdef CONFIG_NET_CLS_ACT
 | 
						|
	struct tcf_proto *otp = tp;
 | 
						|
reclassify:
 | 
						|
#endif
 | 
						|
	protocol = skb->protocol;
 | 
						|
 | 
						|
	for ( ; tp; tp = tp->next) {
 | 
						|
		if ((tp->protocol == protocol ||
 | 
						|
			tp->protocol == __constant_htons(ETH_P_ALL)) &&
 | 
						|
			(err = tp->classify(skb, tp, res)) >= 0) {
 | 
						|
#ifdef CONFIG_NET_CLS_ACT
 | 
						|
			if ( TC_ACT_RECLASSIFY == err) {
 | 
						|
				__u32 verd = (__u32) G_TC_VERD(skb->tc_verd);
 | 
						|
				tp = otp;
 | 
						|
 | 
						|
				if (MAX_REC_LOOP < verd++) {
 | 
						|
					printk("rule prio %d protocol %02x reclassify is buggy packet dropped\n",
 | 
						|
						tp->prio&0xffff, ntohs(tp->protocol));
 | 
						|
					return TC_ACT_SHOT;
 | 
						|
				}
 | 
						|
				skb->tc_verd = SET_TC_VERD(skb->tc_verd,verd);
 | 
						|
				goto reclassify;
 | 
						|
			} else {
 | 
						|
				if (skb->tc_verd) 
 | 
						|
					skb->tc_verd = SET_TC_VERD(skb->tc_verd,0);
 | 
						|
				return err;
 | 
						|
			}
 | 
						|
#else
 | 
						|
 | 
						|
			return err;
 | 
						|
#endif
 | 
						|
		}
 | 
						|
 | 
						|
	}
 | 
						|
	return -1;
 | 
						|
}
 | 
						|
 | 
						|
static int psched_us_per_tick = 1;
 | 
						|
static int psched_tick_per_us = 1;
 | 
						|
 | 
						|
#ifdef CONFIG_PROC_FS
 | 
						|
static int psched_show(struct seq_file *seq, void *v)
 | 
						|
{
 | 
						|
	seq_printf(seq, "%08x %08x %08x %08x\n",
 | 
						|
		      psched_tick_per_us, psched_us_per_tick,
 | 
						|
		      1000000, HZ);
 | 
						|
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
static int psched_open(struct inode *inode, struct file *file)
 | 
						|
{
 | 
						|
	return single_open(file, psched_show, PDE(inode)->data);
 | 
						|
}
 | 
						|
 | 
						|
static struct file_operations psched_fops = {
 | 
						|
	.owner = THIS_MODULE,
 | 
						|
	.open = psched_open,
 | 
						|
	.read  = seq_read,
 | 
						|
	.llseek = seq_lseek,
 | 
						|
	.release = single_release,
 | 
						|
};	
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef CONFIG_NET_SCH_CLK_CPU
 | 
						|
psched_tdiff_t psched_clock_per_hz;
 | 
						|
int psched_clock_scale;
 | 
						|
EXPORT_SYMBOL(psched_clock_per_hz);
 | 
						|
EXPORT_SYMBOL(psched_clock_scale);
 | 
						|
 | 
						|
psched_time_t psched_time_base;
 | 
						|
cycles_t psched_time_mark;
 | 
						|
EXPORT_SYMBOL(psched_time_mark);
 | 
						|
EXPORT_SYMBOL(psched_time_base);
 | 
						|
 | 
						|
/*
 | 
						|
 * Periodically adjust psched_time_base to avoid overflow
 | 
						|
 * with 32-bit get_cycles(). Safe up to 4GHz CPU.
 | 
						|
 */
 | 
						|
static void psched_tick(unsigned long);
 | 
						|
static struct timer_list psched_timer = TIMER_INITIALIZER(psched_tick, 0, 0);
 | 
						|
 | 
						|
static void psched_tick(unsigned long dummy)
 | 
						|
{
 | 
						|
	if (sizeof(cycles_t) == sizeof(u32)) {
 | 
						|
		psched_time_t dummy_stamp;
 | 
						|
		PSCHED_GET_TIME(dummy_stamp);
 | 
						|
		psched_timer.expires = jiffies + 1*HZ;
 | 
						|
		add_timer(&psched_timer);
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
int __init psched_calibrate_clock(void)
 | 
						|
{
 | 
						|
	psched_time_t stamp, stamp1;
 | 
						|
	struct timeval tv, tv1;
 | 
						|
	psched_tdiff_t delay;
 | 
						|
	long rdelay;
 | 
						|
	unsigned long stop;
 | 
						|
 | 
						|
	psched_tick(0);
 | 
						|
	stop = jiffies + HZ/10;
 | 
						|
	PSCHED_GET_TIME(stamp);
 | 
						|
	do_gettimeofday(&tv);
 | 
						|
	while (time_before(jiffies, stop)) {
 | 
						|
		barrier();
 | 
						|
		cpu_relax();
 | 
						|
	}
 | 
						|
	PSCHED_GET_TIME(stamp1);
 | 
						|
	do_gettimeofday(&tv1);
 | 
						|
 | 
						|
	delay = PSCHED_TDIFF(stamp1, stamp);
 | 
						|
	rdelay = tv1.tv_usec - tv.tv_usec;
 | 
						|
	rdelay += (tv1.tv_sec - tv.tv_sec)*1000000;
 | 
						|
	if (rdelay > delay)
 | 
						|
		return -1;
 | 
						|
	delay /= rdelay;
 | 
						|
	psched_tick_per_us = delay;
 | 
						|
	while ((delay>>=1) != 0)
 | 
						|
		psched_clock_scale++;
 | 
						|
	psched_us_per_tick = 1<<psched_clock_scale;
 | 
						|
	psched_clock_per_hz = (psched_tick_per_us*(1000000/HZ))>>psched_clock_scale;
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
#endif
 | 
						|
 | 
						|
static int __init pktsched_init(void)
 | 
						|
{
 | 
						|
	struct rtnetlink_link *link_p;
 | 
						|
 | 
						|
#ifdef CONFIG_NET_SCH_CLK_CPU
 | 
						|
	if (psched_calibrate_clock() < 0)
 | 
						|
		return -1;
 | 
						|
#elif defined(CONFIG_NET_SCH_CLK_JIFFIES)
 | 
						|
	psched_tick_per_us = HZ<<PSCHED_JSCALE;
 | 
						|
	psched_us_per_tick = 1000000;
 | 
						|
#endif
 | 
						|
 | 
						|
	link_p = rtnetlink_links[PF_UNSPEC];
 | 
						|
 | 
						|
	/* Setup rtnetlink links. It is made here to avoid
 | 
						|
	   exporting large number of public symbols.
 | 
						|
	 */
 | 
						|
 | 
						|
	if (link_p) {
 | 
						|
		link_p[RTM_NEWQDISC-RTM_BASE].doit = tc_modify_qdisc;
 | 
						|
		link_p[RTM_DELQDISC-RTM_BASE].doit = tc_get_qdisc;
 | 
						|
		link_p[RTM_GETQDISC-RTM_BASE].doit = tc_get_qdisc;
 | 
						|
		link_p[RTM_GETQDISC-RTM_BASE].dumpit = tc_dump_qdisc;
 | 
						|
		link_p[RTM_NEWTCLASS-RTM_BASE].doit = tc_ctl_tclass;
 | 
						|
		link_p[RTM_DELTCLASS-RTM_BASE].doit = tc_ctl_tclass;
 | 
						|
		link_p[RTM_GETTCLASS-RTM_BASE].doit = tc_ctl_tclass;
 | 
						|
		link_p[RTM_GETTCLASS-RTM_BASE].dumpit = tc_dump_tclass;
 | 
						|
	}
 | 
						|
 | 
						|
	register_qdisc(&pfifo_qdisc_ops);
 | 
						|
	register_qdisc(&bfifo_qdisc_ops);
 | 
						|
	proc_net_fops_create("psched", 0, &psched_fops);
 | 
						|
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
subsys_initcall(pktsched_init);
 | 
						|
 | 
						|
EXPORT_SYMBOL(qdisc_lookup);
 | 
						|
EXPORT_SYMBOL(qdisc_get_rtab);
 | 
						|
EXPORT_SYMBOL(qdisc_put_rtab);
 | 
						|
EXPORT_SYMBOL(register_qdisc);
 | 
						|
EXPORT_SYMBOL(unregister_qdisc);
 | 
						|
EXPORT_SYMBOL(tc_classify);
 |