21 files changed, 453 insertions, 508 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 6767e54155db..36543b6fcef3 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -316,6 +316,17 @@ config NET_CLS_FLOW
 	  To compile this code as a module, choose M here: the
 	  module will be called cls_flow.
 
+config NET_CLS_CGROUP
+	bool "Control Group Classifier"
+	select NET_CLS
+	depends on CGROUPS
+	---help---
+	  Say Y here if you want to classify packets based on the control
+	  cgroup of their process.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_cgroup.
+
 config NET_EMATCH
 	bool "Extended Matches"
 	select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index e60c9925b269..70b35f8708c3 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -38,6 +38,7 @@ obj-$(CONFIG_NET_CLS_TCINDEX)	+= cls_tcindex.o
 obj-$(CONFIG_NET_CLS_RSVP6)	+= cls_rsvp6.o
 obj-$(CONFIG_NET_CLS_BASIC)	+= cls_basic.o
 obj-$(CONFIG_NET_CLS_FLOW)	+= cls_flow.o
+obj-$(CONFIG_NET_CLS_CGROUP)	+= cls_cgroup.o
 obj-$(CONFIG_NET_EMATCH)	+= ematch.o
 obj-$(CONFIG_NET_EMATCH_CMP)	+= em_cmp.o
 obj-$(CONFIG_NET_EMATCH_NBYTE)	+= em_nbyte.o
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
new file mode 100644
index 000000000000..53ada2c0e41c
--- /dev/null
+++ b/net/sched/cls_cgroup.c
@@ -0,0 +1,290 @@
+/*
+ * net/sched/cls_cgroup.c	Control Group Classifier
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/cgroup.h>
+#include <net/rtnetlink.h>
+#include <net/pkt_cls.h>
+
+struct cgroup_cls_state
+{
+	struct cgroup_subsys_state css;
+	u32 classid;
+};
+
+static inline struct cgroup_cls_state *net_cls_state(struct cgroup *cgrp)
+{
+	return (struct cgroup_cls_state *)
+		cgroup_subsys_state(cgrp, net_cls_subsys_id);
+}
+
+static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
+						 struct cgroup *cgrp)
+{
+	struct cgroup_cls_state *cs;
+
+	if (!(cs = kzalloc(sizeof(*cs), GFP_KERNEL)))
+		return ERR_PTR(-ENOMEM);
+
+	if (cgrp->parent)
+		cs->classid = net_cls_state(cgrp->parent)->classid;
+
+	return &cs->css;
+}
+
+static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+	kfree(ss);
+}
+
+static u64 read_classid(struct cgroup *cgrp, struct cftype *cft)
+{
+	return net_cls_state(cgrp)->classid;
+}
+
+static int write_classid(struct cgroup *cgrp, struct cftype *cft, u64 value)
+{
+	if (!cgroup_lock_live_group(cgrp))
+		return -ENODEV;
+
+	net_cls_state(cgrp)->classid = (u32) value;
+
+	cgroup_unlock();
+
+	return 0;
+}
+
+static struct cftype ss_files[] = {
+	{
+		.name = "classid",
+		.read_u64 = read_classid,
+		.write_u64 = write_classid,
+	},
+};
+
+static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+	return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
+}
+
+struct cgroup_subsys net_cls_subsys = {
+	.name		= "net_cls",
+	.create		= cgrp_create,
+	.destroy	= cgrp_destroy,
+	.populate	= cgrp_populate,
+	.subsys_id	= net_cls_subsys_id,
+};
+
+struct cls_cgroup_head
+{
+	u32			handle;
+	struct tcf_exts		exts;
+	struct tcf_ematch_tree	ematches;
+};
+
+static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
+			       struct tcf_result *res)
+{
+	struct cls_cgroup_head *head = tp->root;
+	struct cgroup_cls_state *cs;
+	int ret = 0;
+
+	/*
+	 * Due to the nature of the classifier it is required to ignore all
+	 * packets originating from softirq context as accessing `current'
+	 * would lead to false results.
+	 *
+	 * This test assumes that all callers of dev_queue_xmit() explicitely
+	 * disable bh. Knowing this, it is possible to detect softirq based
+	 * calls by looking at the number of nested bh disable calls because
+	 * softirqs always disables bh.
+	 */
+	if (softirq_count() != SOFTIRQ_OFFSET)
+		return -1;
+
+	rcu_read_lock();
+	cs = (struct cgroup_cls_state *) task_subsys_state(current,
+							   net_cls_subsys_id);
+	if (cs->classid && tcf_em_tree_match(skb, &head->ematches, NULL)) {
+		res->classid = cs->classid;
+		res->class = 0;
+		ret = tcf_exts_exec(skb, &head->exts, res);
+	} else
+		ret = -1;
+
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle)
+{
+	return 0UL;
+}
+
+static void cls_cgroup_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+static int cls_cgroup_init(struct tcf_proto *tp)
+{
+	return 0;
+}
+
+static const struct tcf_ext_map cgroup_ext_map = {
+	.action = TCA_CGROUP_ACT,
+	.police = TCA_CGROUP_POLICE,
+};
+
+static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
+	[TCA_CGROUP_EMATCHES]	= { .type = NLA_NESTED },
+};
+
+static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base,
+			     u32 handle, struct nlattr **tca,
+			     unsigned long *arg)
+{
+	struct nlattr *tb[TCA_CGROUP_MAX+1];
+	struct cls_cgroup_head *head = tp->root;
+	struct tcf_ematch_tree t;
+	struct tcf_exts e;
+	int err;
+
+	if (head == NULL) {
+		if (!handle)
+			return -EINVAL;
+
+		head = kzalloc(sizeof(*head), GFP_KERNEL);
+		if (head == NULL)
+			return -ENOBUFS;
+
+		head->handle = handle;
+
+		tcf_tree_lock(tp);
+		tp->root = head;
+		tcf_tree_unlock(tp);
+	}
+
+	if (handle != head->handle)
+		return -ENOENT;
+
+	err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS],
+			       cgroup_policy);
+	if (err < 0)
+		return err;
+
+	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &cgroup_ext_map);
+	if (err < 0)
+		return err;
+
+	err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t);
+	if (err < 0)
+		return err;
+
+	tcf_exts_change(tp, &head->exts, &e);
+	tcf_em_tree_change(tp, &head->ematches, &t);
+
+	return 0;
+}
+
+static void cls_cgroup_destroy(struct tcf_proto *tp)
+{
+	struct cls_cgroup_head *head;
+
+	head = (struct cls_cgroup_head *)xchg(&tp->root, NULL);
+
+	if (head) {
+		tcf_exts_destroy(tp, &head->exts);
+		tcf_em_tree_destroy(tp, &head->ematches);
+		kfree(head);
+	}
+}
+
+static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	return -EOPNOTSUPP;
+}
+
+static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct cls_cgroup_head *head = tp->root;
+
+	if (arg->count < arg->skip)
+		goto skip;
+
+	if (arg->fn(tp, (unsigned long) head, arg) < 0) {
+		arg->stop = 1;
+		return;
+	}
+skip:
+	arg->count++;
+}
+
+static int cls_cgroup_dump(struct tcf_proto *tp, unsigned long fh,
+			   struct sk_buff *skb, struct tcmsg *t)
+{
+	struct cls_cgroup_head *head = tp->root;
+	unsigned char *b = skb_tail_pointer(skb);
+	struct nlattr *nest;
+
+	t->tcm_handle = head->handle;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	if (tcf_exts_dump(skb, &head->exts, &cgroup_ext_map) < 0 ||
+	    tcf_em_tree_dump(skb, &head->ematches, TCA_CGROUP_EMATCHES) < 0)
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest);
+
+	if (tcf_exts_dump_stats(skb, &head->exts, &cgroup_ext_map) < 0)
+		goto nla_put_failure;
+
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_cgroup_ops __read_mostly = {
+	.kind		=	"cgroup",
+	.init		=	cls_cgroup_init,
+	.change		=	cls_cgroup_change,
+	.classify	=	cls_cgroup_classify,
+	.destroy	=	cls_cgroup_destroy,
+	.get		=	cls_cgroup_get,
+	.put		=	cls_cgroup_put,
+	.delete		=	cls_cgroup_delete,
+	.walk		=	cls_cgroup_walk,
+	.dump		=	cls_cgroup_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init init_cgroup_cls(void)
+{
+	return register_tcf_proto_ops(&cls_cgroup_ops);
+}
+
+static void __exit exit_cgroup_cls(void)
+{
+	unregister_tcf_proto_ops(&cls_cgroup_ops);
+}
+
+module_init(init_cgroup_cls);
+module_exit(exit_cgroup_cls);
+MODULE_LICENSE("GPL");
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index e82519e548d7..aab59409728b 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -71,7 +71,7 @@
  *
  *      static void __exit exit_my_ematch(void)
  *      {
- *      	return tcf_em_unregister(&my_ops);
+ *      	tcf_em_unregister(&my_ops);
  *      }
  *
  *      module_init(init_my_ematch);
@@ -154,23 +154,11 @@ EXPORT_SYMBOL(tcf_em_register);
  *
  * Returns -ENOENT if no matching ematch was found.
  */
-int tcf_em_unregister(struct tcf_ematch_ops *ops)
+void tcf_em_unregister(struct tcf_ematch_ops *ops)
 {
-	int err = 0;
-	struct tcf_ematch_ops *e;
-
 	write_lock(&ematch_mod_lock);
-	list_for_each_entry(e, &ematch_ops, link) {
-		if (e == ops) {
-			list_del(&e->link);
-			goto out;
-		}
-	}
-
-	err = -ENOENT;
-out:
+	list_del(&ops->link);
 	write_unlock(&ematch_mod_lock);
-	return err;
 }
 EXPORT_SYMBOL(tcf_em_unregister);
 
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index b16ad2972c6b..5bcef13408c8 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -97,10 +97,9 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
 
    Auxiliary routines:
 
-   ---requeue
+   ---peek
 
-   requeues once dequeued packet. It is used for non-standard or
-   just buggy devices, which can defer output even if netif_queue_stopped()=0.
+   like dequeue but without removing a packet from the queue
 
    ---reset
 
@@ -147,8 +146,14 @@ int register_qdisc(struct Qdisc_ops *qops)
 
 	if (qops->enqueue == NULL)
 		qops->enqueue = noop_qdisc_ops.enqueue;
-	if (qops->requeue == NULL)
-		qops->requeue = noop_qdisc_ops.requeue;
+	if (qops->peek == NULL) {
+		if (qops->dequeue == NULL) {
+			qops->peek = noop_qdisc_ops.peek;
+		} else {
+			rc = -EINVAL;
+			goto out;
+		}
+	}
 	if (qops->dequeue == NULL)
 		qops->dequeue = noop_qdisc_ops.dequeue;
 
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 43d37256c15e..ca90f6e59aee 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -62,7 +62,7 @@ struct atm_qdisc_data {
 	struct atm_flow_data	link;		/* unclassified skbs go here */
 	struct atm_flow_data	*flows;		/* NB: "link" is also on this
 						   list */
-	struct tasklet_struct	task;		/* requeue tasklet */
+	struct tasklet_struct	task;		/* dequeue tasklet */
 };
 
 /* ------------------------- Class/flow operations ------------------------- */
@@ -480,11 +480,14 @@ static void sch_atm_dequeue(unsigned long data)
 		 * If traffic is properly shaped, this won't generate nasty
 		 * little bursts. Otherwise, it may ... (but that's okay)
 		 */
-		while ((skb = flow->q->dequeue(flow->q))) {
-			if (!atm_may_send(flow->vcc, skb->truesize)) {
-				(void)flow->q->ops->requeue(skb, flow->q);
+		while ((skb = flow->q->ops->peek(flow->q))) {
+			if (!atm_may_send(flow->vcc, skb->truesize))
 				break;
-			}
+
+			skb = qdisc_dequeue_peeked(flow->q);
+			if (unlikely(!skb))
+				break;
+
 			pr_debug("atm_tc_dequeue: sending on class %p\n", flow);
 			/* remove any LL header somebody else has attached */
 			skb_pull(skb, skb_network_offset(skb));
@@ -516,27 +519,19 @@ static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch)
 
 	pr_debug("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p);
 	tasklet_schedule(&p->task);
-	skb = p->link.q->dequeue(p->link.q);
+	skb = qdisc_dequeue_peeked(p->link.q);
 	if (skb)
 		sch->q.qlen--;
 	return skb;
 }
 
-static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch)
+static struct sk_buff *atm_tc_peek(struct Qdisc *sch)
 {
 	struct atm_qdisc_data *p = qdisc_priv(sch);
-	int ret;
 
-	pr_debug("atm_tc_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
-	ret = p->link.q->ops->requeue(skb, p->link.q);
-	if (!ret) {
-		sch->q.qlen++;
-		sch->qstats.requeues++;
-	} else if (net_xmit_drop_count(ret)) {
-		sch->qstats.drops++;
-		p->link.qstats.drops++;
-	}
-	return ret;
+	pr_debug("atm_tc_peek(sch %p,[qdisc %p])\n", sch, p);
+
+	return p->link.q->ops->peek(p->link.q);
 }
 
 static unsigned int atm_tc_drop(struct Qdisc *sch)
@@ -694,7 +689,7 @@ static struct Qdisc_ops atm_qdisc_ops __read_mostly = {
 	.priv_size	= sizeof(struct atm_qdisc_data),
 	.enqueue	= atm_tc_enqueue,
 	.dequeue	= atm_tc_dequeue,
-	.requeue	= atm_tc_requeue,
+	.peek		= atm_tc_peek,
 	.drop		= atm_tc_drop,
 	.init		= atm_tc_init,
 	.reset		= atm_tc_reset,
diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c
index 507fb488bc98..094a874b48bc 100644
--- a/net/sched/sch_blackhole.c
+++ b/net/sched/sch_blackhole.c
@@ -33,6 +33,7 @@ static struct Qdisc_ops blackhole_qdisc_ops __read_mostly = {
 	.priv_size	= 0,
 	.enqueue	= blackhole_enqueue,
 	.dequeue	= blackhole_dequeue,
+	.peek		= blackhole_dequeue,
 	.owner		= THIS_MODULE,
 };
 
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 03e389e8d945..a99e37e9e6f1 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -405,40 +405,6 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	return ret;
 }
 
-static int
-cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
-	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct cbq_class *cl;
-	int ret;
-
-	if ((cl = q->tx_class) == NULL) {
-		kfree_skb(skb);
-		sch->qstats.drops++;
-		return NET_XMIT_CN;
-	}
-	q->tx_class = NULL;
-
-	cbq_mark_toplevel(q, cl);
-
-#ifdef CONFIG_NET_CLS_ACT
-	q->rx_class = cl;
-	cl->q->__parent = sch;
-#endif
-	if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
-		sch->q.qlen++;
-		sch->qstats.requeues++;
-		if (!cl->next_alive)
-			cbq_activate_class(cl);
-		return 0;
-	}
-	if (net_xmit_drop_count(ret)) {
-		sch->qstats.drops++;
-		cl->qstats.drops++;
-	}
-	return ret;
-}
-
 /* Overlimit actions */
 
 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
@@ -2066,7 +2032,7 @@ static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct cbq_sched_data),
 	.enqueue	=	cbq_enqueue,
 	.dequeue	=	cbq_dequeue,
-	.requeue	=	cbq_requeue,
+	.peek		=	qdisc_peek_dequeued,
 	.drop		=	cbq_drop,
 	.init		=	cbq_init,
 	.reset		=	cbq_reset,
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index ba43aab3a851..3f9427a4b757 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -313,24 +313,13 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
 	return skb;
 }
 
-static int dsmark_requeue(struct sk_buff *skb, struct Qdisc *sch)
+static struct sk_buff *dsmark_peek(struct Qdisc *sch)
 {
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
-	int err;
-
-	pr_debug("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
-
-	err = p->q->ops->requeue(skb, p->q);
-	if (err != NET_XMIT_SUCCESS) {
-		if (net_xmit_drop_count(err))
-			sch->qstats.drops++;
-		return err;
-	}
 
-	sch->q.qlen++;
-	sch->qstats.requeues++;
+	pr_debug("dsmark_peek(sch %p,[qdisc %p])\n", sch, p);
 
-	return NET_XMIT_SUCCESS;
+	return p->q->ops->peek(p->q);
 }
 
 static unsigned int dsmark_drop(struct Qdisc *sch)
@@ -496,7 +485,7 @@ static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct dsmark_qdisc_data),
 	.enqueue	=	dsmark_enqueue,
 	.dequeue	=	dsmark_dequeue,
-	.requeue	=	dsmark_requeue,
+	.peek		=	dsmark_peek,
 	.drop		=	dsmark_drop,
 	.init		=	dsmark_init,
 	.reset		=	dsmark_reset,
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 23d258bfe8ac..92cfc9d7e3b9 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -83,7 +83,7 @@ struct Qdisc_ops pfifo_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct fifo_sched_data),
 	.enqueue	=	pfifo_enqueue,
 	.dequeue	=	qdisc_dequeue_head,
-	.requeue	=	qdisc_requeue,
+	.peek		=	qdisc_peek_head,
 	.drop		=	qdisc_queue_drop,
 	.init		=	fifo_init,
 	.reset		=	qdisc_reset_queue,
@@ -98,7 +98,7 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct fifo_sched_data),
 	.enqueue	=	bfifo_enqueue,
 	.dequeue	=	qdisc_dequeue_head,
-	.requeue	=	qdisc_requeue,
+	.peek		=	qdisc_peek_head,
 	.drop		=	qdisc_queue_drop,
 	.init		=	fifo_init,
 	.reset		=	qdisc_reset_queue,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 93cd30ce6501..80c8f3dbbea1 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -306,21 +306,12 @@ static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
 	return NULL;
 }
 
-static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
-{
-	if (net_ratelimit())
-		printk(KERN_DEBUG "%s deferred output. It is buggy.\n",
-		       skb->dev->name);
-	kfree_skb(skb);
-	return NET_XMIT_CN;
-}
-
 struct Qdisc_ops noop_qdisc_ops __read_mostly = {
 	.id		=	"noop",
 	.priv_size	=	0,
 	.enqueue	=	noop_enqueue,
 	.dequeue	=	noop_dequeue,
-	.requeue	=	noop_requeue,
+	.peek		=	noop_dequeue,
 	.owner		=	THIS_MODULE,
 };
 
@@ -335,7 +326,6 @@ struct Qdisc noop_qdisc = {
 	.flags		=	TCQ_F_BUILTIN,
 	.ops		=	&noop_qdisc_ops,
 	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
-	.requeue.lock	=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
 	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
 	.dev_queue	=	&noop_netdev_queue,
 };
@@ -346,7 +336,7 @@ static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
 	.priv_size	=	0,
 	.enqueue	=	noop_enqueue,
 	.dequeue	=	noop_dequeue,
-	.requeue	=	noop_requeue,
+	.peek		=	noop_dequeue,
 	.owner		=	THIS_MODULE,
 };
 
@@ -362,7 +352,6 @@ static struct Qdisc noqueue_qdisc = {
 	.flags		=	TCQ_F_BUILTIN,
 	.ops		=	&noqueue_qdisc_ops,
 	.list		=	LIST_HEAD_INIT(noqueue_qdisc.list),
-	.requeue.lock	=	__SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
 	.q.lock		=	__SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
 	.dev_queue	=	&noqueue_netdev_queue,
 };
@@ -411,10 +400,17 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
 	return NULL;
 }
 
-static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
+static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc)
 {
-	qdisc->q.qlen++;
-	return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc));
+	int prio;
+	struct sk_buff_head *list = qdisc_priv(qdisc);
+
+	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
+		if (!skb_queue_empty(list + prio))
+			return skb_peek(list + prio);
+	}
+
+	return NULL;
 }
 
 static void pfifo_fast_reset(struct Qdisc* qdisc)
@@ -457,7 +453,7 @@ static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
 	.priv_size	=	PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
 	.enqueue	=	pfifo_fast_enqueue,
 	.dequeue	=	pfifo_fast_dequeue,
-	.requeue	=	pfifo_fast_requeue,
+	.peek		=	pfifo_fast_peek,
 	.init		=	pfifo_fast_init,
 	.reset		=	pfifo_fast_reset,
 	.dump		=	pfifo_fast_dump,
@@ -483,7 +479,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 	sch->padded = (char *) sch - (char *) p;
 
 	INIT_LIST_HEAD(&sch->list);
-	skb_queue_head_init(&sch->requeue);
 	skb_queue_head_init(&sch->q);
 	sch->ops = ops;
 	sch->enqueue = ops->enqueue;
@@ -526,6 +521,9 @@ void qdisc_reset(struct Qdisc *qdisc)
 
 	if (ops->reset)
 		ops->reset(qdisc);
+
+	kfree_skb(qdisc->gso_skb);
+	qdisc->gso_skb = NULL;
 }
 EXPORT_SYMBOL(qdisc_reset);
 
@@ -552,8 +550,6 @@ void qdisc_destroy(struct Qdisc *qdisc)
 	dev_put(qdisc_dev(qdisc));
 
 	kfree_skb(qdisc->gso_skb);
-	__skb_queue_purge(&qdisc->requeue);
-
 	kfree((char *) qdisc - qdisc->padded);
 }
 EXPORT_SYMBOL(qdisc_destroy);
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index c1ad6b8de105..40408d595c08 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -240,26 +240,6 @@ congestion_drop:
 	return NET_XMIT_CN;
 }
 
-static int gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
-{
-	struct gred_sched *t = qdisc_priv(sch);
-	struct gred_sched_data *q;
-	u16 dp = tc_index_to_dp(skb);
-
-	if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
-		if (net_ratelimit())
-			printk(KERN_WARNING "GRED: Unable to relocate VQ 0x%x "
-			       "for requeue, screwing up backlog.\n",
-			       tc_index_to_dp(skb));
-	} else {
-		if (red_is_idling(&q->parms))
-			red_end_of_idle_period(&q->parms);
-		q->backlog += qdisc_pkt_len(skb);
-	}
-
-	return qdisc_requeue(skb, sch);
-}
-
 static struct sk_buff *gred_dequeue(struct Qdisc* sch)
 {
 	struct sk_buff *skb;
@@ -602,7 +582,7 @@ static struct Qdisc_ops gred_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct gred_sched),
 	.enqueue	=	gred_enqueue,
 	.dequeue	=	gred_dequeue,
-	.requeue	=	gred_requeue,
+	.peek		=	qdisc_peek_head,
 	.drop		=	gred_drop,
 	.init		=	gred_init,
 	.reset		=	gred_reset,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index c1e77da8cd09..071c4749a12b 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -184,7 +184,6 @@ struct hfsc_sched
 	struct rb_root eligible;		/* eligible tree */
 	struct list_head droplist;		/* active leaf class list (for
 						   dropping) */
-	struct sk_buff_head requeue;		/* requeued packet */
 	struct qdisc_watchdog watchdog;		/* watchdog timer */
 };
 
@@ -880,28 +879,20 @@ set_passive(struct hfsc_class *cl)
 	 */
 }
 
-/*
- * hack to get length of first packet in queue.
- */
 static unsigned int
 qdisc_peek_len(struct Qdisc *sch)
 {
 	struct sk_buff *skb;
 	unsigned int len;
 
-	skb = sch->dequeue(sch);
+	skb = sch->ops->peek(sch);
 	if (skb == NULL) {
 		if (net_ratelimit())
 			printk("qdisc_peek_len: non work-conserving qdisc ?\n");
 		return 0;
 	}
 	len = qdisc_pkt_len(skb);
-	if (unlikely(sch->ops->requeue(skb, sch) != NET_XMIT_SUCCESS)) {
-		if (net_ratelimit())
-			printk("qdisc_peek_len: failed to requeue\n");
-		qdisc_tree_decrease_qlen(sch, 1);
-		return 0;
-	}
+
 	return len;
 }
 
@@ -1440,7 +1431,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
 		return err;
 	q->eligible = RB_ROOT;
 	INIT_LIST_HEAD(&q->droplist);
-	skb_queue_head_init(&q->requeue);
 
 	q->root.cl_common.classid = sch->handle;
 	q->root.refcnt  = 1;
@@ -1525,7 +1515,6 @@ hfsc_reset_qdisc(struct Qdisc *sch)
 		hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode)
 			hfsc_reset_class(cl);
 	}
-	__skb_queue_purge(&q->requeue);
 	q->eligible = RB_ROOT;
 	INIT_LIST_HEAD(&q->droplist);
 	qdisc_watchdog_cancel(&q->watchdog);
@@ -1550,7 +1539,6 @@ hfsc_destroy_qdisc(struct Qdisc *sch)
 			hfsc_destroy_class(sch, cl);
 	}
 	qdisc_class_hash_destroy(&q->clhash);
-	__skb_queue_purge(&q->requeue);
 	qdisc_watchdog_cancel(&q->watchdog);
 }
 
@@ -1617,8 +1605,6 @@ hfsc_dequeue(struct Qdisc *sch)
 
 	if (sch->q.qlen == 0)
 		return NULL;
-	if ((skb = __skb_dequeue(&q->requeue)))
-		goto out;
 
 	cur_time = psched_get_time();
 
@@ -1642,7 +1628,7 @@ hfsc_dequeue(struct Qdisc *sch)
 		}
 	}
 
-	skb = cl->qdisc->dequeue(cl->qdisc);
+	skb = qdisc_dequeue_peeked(cl->qdisc);
 	if (skb == NULL) {
 		if (net_ratelimit())
 			printk("HFSC: Non-work-conserving qdisc ?\n");
@@ -1667,24 +1653,12 @@ hfsc_dequeue(struct Qdisc *sch)
 		set_passive(cl);
 	}
 
- out:
 	sch->flags &= ~TCQ_F_THROTTLED;
 	sch->q.qlen--;
 
 	return skb;
 }
 
-static int
-hfsc_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
-	struct hfsc_sched *q = qdisc_priv(sch);
-
-	__skb_queue_head(&q->requeue, skb);
-	sch->q.qlen++;
-	sch->qstats.requeues++;
-	return NET_XMIT_SUCCESS;
-}
-
 static unsigned int
 hfsc_drop(struct Qdisc *sch)
 {
@@ -1735,7 +1709,7 @@ static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = {
 	.dump		= hfsc_dump_qdisc,
 	.enqueue	= hfsc_enqueue,
 	.dequeue	= hfsc_dequeue,
-	.requeue	= hfsc_requeue,
+	.peek		= qdisc_peek_dequeued,
 	.drop		= hfsc_drop,
 	.cl_ops		= &hfsc_class_ops,
 	.priv_size	= sizeof(struct hfsc_sched),
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index d14f02056ae6..83f5e69243c1 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -551,7 +551,7 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
 
 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
-	int ret;
+	int uninitialized_var(ret);
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl = htb_classify(skb, sch, &ret);
 
@@ -591,47 +591,6 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	return NET_XMIT_SUCCESS;
 }
 
-/* TODO: requeuing packet charges it to policers again !! */
-static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
-	int ret;
-	struct htb_sched *q = qdisc_priv(sch);
-	struct htb_class *cl = htb_classify(skb, sch, &ret);
-	struct sk_buff *tskb;
-
-	if (cl == HTB_DIRECT) {
-		/* enqueue to helper queue */
-		if (q->direct_queue.qlen < q->direct_qlen) {
-			__skb_queue_head(&q->direct_queue, skb);
-		} else {
-			__skb_queue_head(&q->direct_queue, skb);
-			tskb = __skb_dequeue_tail(&q->direct_queue);
-			kfree_skb(tskb);
-			sch->qstats.drops++;
-			return NET_XMIT_CN;
-		}
-#ifdef CONFIG_NET_CLS_ACT
-	} else if (!cl) {
-		if (ret & __NET_XMIT_BYPASS)
-			sch->qstats.drops++;
-		kfree_skb(skb);
-		return ret;
-#endif
-	} else if ((ret = cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q)) !=
-		   NET_XMIT_SUCCESS) {
-		if (net_xmit_drop_count(ret)) {
-			sch->qstats.drops++;
-			cl->qstats.drops++;
-		}
-		return ret;
-	} else
-		htb_activate(q, cl);
-
-	sch->q.qlen++;
-	sch->qstats.requeues++;
-	return NET_XMIT_SUCCESS;
-}
-
 /**
  * htb_charge_class - charges amount "bytes" to leaf and ancestors
  *
@@ -1565,7 +1524,7 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct htb_sched),
 	.enqueue	=	htb_enqueue,
 	.dequeue	=	htb_dequeue,
-	.requeue	=	htb_requeue,
+	.peek		=	qdisc_peek_dequeued,
 	.drop		=	htb_drop,
 	.init		=	htb_init,
 	.reset		=	htb_reset,
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 915f3149dde2..f645ac55a1a1 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -92,40 +92,6 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	return ret;
 }
 
-
-static int
-multiq_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
-	struct Qdisc *qdisc;
-	struct multiq_sched_data *q = qdisc_priv(sch);
-	int ret;
-
-	qdisc = multiq_classify(skb, sch, &ret);
-#ifdef CONFIG_NET_CLS_ACT
-	if (qdisc == NULL) {
-		if (ret & __NET_XMIT_BYPASS)
-			sch->qstats.drops++;
-		kfree_skb(skb);
-		return ret;
-	}
-#endif
-
-	ret = qdisc->ops->requeue(skb, qdisc);
-	if (ret == NET_XMIT_SUCCESS) {
-		sch->q.qlen++;
-		sch->qstats.requeues++;
-		if (q->curband)
-			q->curband--;
-		else
-			q->curband = q->bands - 1;
-		return NET_XMIT_SUCCESS;
-	}
-	if (net_xmit_drop_count(ret))
-		sch->qstats.drops++;
-	return ret;
-}
-
-
 static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
 {
 	struct multiq_sched_data *q = qdisc_priv(sch);
@@ -140,7 +106,7 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
 			q->curband = 0;
 
 		/* Check that target subqueue is available before
-		 * pulling an skb to avoid excessive requeues
+		 * pulling an skb to avoid head-of-line blocking.
 		 */
 		if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) {
 			qdisc = q->queues[q->curband];
@@ -155,6 +121,34 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
 
 }
 
+static struct sk_buff *multiq_peek(struct Qdisc *sch)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	unsigned int curband = q->curband;
+	struct Qdisc *qdisc;
+	struct sk_buff *skb;
+	int band;
+
+	for (band = 0; band < q->bands; band++) {
+		/* cycle through bands to ensure fairness */
+		curband++;
+		if (curband >= q->bands)
+			curband = 0;
+
+		/* Check that target subqueue is available before
+		 * pulling an skb to avoid head-of-line blocking.
+		 */
+		if (!__netif_subqueue_stopped(qdisc_dev(sch), curband)) {
+			qdisc = q->queues[curband];
+			skb = qdisc->ops->peek(qdisc);
+			if (skb)
+				return skb;
+		}
+	}
+	return NULL;
+
+}
+
 static unsigned int multiq_drop(struct Qdisc *sch)
 {
 	struct multiq_sched_data *q = qdisc_priv(sch);
@@ -451,7 +445,7 @@ static struct Qdisc_ops multiq_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct multiq_sched_data),
 	.enqueue	=	multiq_enqueue,
 	.dequeue	=	multiq_dequeue,
-	.requeue	=	multiq_requeue,
+	.peek		=	multiq_peek,
 	.drop		=	multiq_drop,
 	.init		=	multiq_init,
 	.reset		=	multiq_reset,
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index a11959908d9a..3cbc3ff7b5bc 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -233,7 +233,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		 */
 		cb->time_to_send = psched_get_time();
 		q->counter = 0;
-		ret = q->qdisc->ops->requeue(skb, q->qdisc);
+
+		__skb_queue_head(&q->qdisc->q, skb);
+		q->qdisc->qstats.backlog += qdisc_pkt_len(skb);
+		q->qdisc->qstats.requeues++;
+		ret = NET_XMIT_SUCCESS;
 	}
 
 	if (likely(ret == NET_XMIT_SUCCESS)) {
@@ -248,20 +252,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	return ret;
 }
 
-/* Requeue packets but don't change time stamp */
-static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
-	struct netem_sched_data *q = qdisc_priv(sch);
-	int ret;
-
-	if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
-		sch->q.qlen++;
-		sch->qstats.requeues++;
-	}
-
-	return ret;
-}
-
 static unsigned int netem_drop(struct Qdisc* sch)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
@@ -283,25 +273,22 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 	if (sch->flags & TCQ_F_THROTTLED)
 		return NULL;
 
-	skb = q->qdisc->dequeue(q->qdisc);
+	skb = q->qdisc->ops->peek(q->qdisc);
 	if (skb) {
 		const struct netem_skb_cb *cb = netem_skb_cb(skb);
 		psched_time_t now = psched_get_time();
 
 		/* if more time remaining? */
 		if (cb->time_to_send <= now) {
+			skb = qdisc_dequeue_peeked(q->qdisc);
+			if (unlikely(!skb))
+				return NULL;
+
 			pr_debug("netem_dequeue: return skb=%p\n", skb);
 			sch->q.qlen--;
 			return skb;
 		}
 
-		if (unlikely(q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS)) {
-			qdisc_tree_decrease_qlen(q->qdisc, 1);
-			sch->qstats.drops++;
-			printk(KERN_ERR "netem: %s could not requeue\n",
-			       q->qdisc->ops->id);
-		}
-
 		qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
 	}
 
@@ -351,7 +338,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
 	return 0;
 }
 
-static int get_correlation(struct Qdisc *sch, const struct nlattr *attr)
+static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
 	const struct tc_netem_corr *c = nla_data(attr);
@@ -359,27 +346,24 @@ static int get_correlation(struct Qdisc *sch, const struct nlattr *attr)
 	init_crandom(&q->delay_cor, c->delay_corr);
 	init_crandom(&q->loss_cor, c->loss_corr);
 	init_crandom(&q->dup_cor, c->dup_corr);
-	return 0;
 }
 
-static int get_reorder(struct Qdisc *sch, const struct nlattr *attr)
+static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
 	const struct tc_netem_reorder *r = nla_data(attr);
 
 	q->reorder = r->probability;
 	init_crandom(&q->reorder_cor, r->correlation);
-	return 0;
 }
 
-static int get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
+static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
 	const struct tc_netem_corrupt *r = nla_data(attr);
 
 	q->corrupt = r->probability;
 	init_crandom(&q->corrupt_cor, r->correlation);
-	return 0;
 }
 
 static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
@@ -438,11 +422,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 	if (q->gap)
 		q->reorder = ~0;
 
-	if (tb[TCA_NETEM_CORR]) {
-		ret = get_correlation(sch, tb[TCA_NETEM_CORR]);
-		if (ret)
-			return ret;
-	}
+	if (tb[TCA_NETEM_CORR])
+		get_correlation(sch, tb[TCA_NETEM_CORR]);
 
 	if (tb[TCA_NETEM_DELAY_DIST]) {
 		ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
@@ -450,17 +431,11 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 			return ret;
 	}
 
-	if (tb[TCA_NETEM_REORDER]) {
-		ret = get_reorder(sch, tb[TCA_NETEM_REORDER]);
-		if (ret)
-			return ret;
-	}
+	if (tb[TCA_NETEM_REORDER])
+		get_reorder(sch, tb[TCA_NETEM_REORDER]);
 
-	if (tb[TCA_NETEM_CORRUPT]) {
-		ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
-		if (ret)
-			return ret;
-	}
+	if (tb[TCA_NETEM_CORRUPT])
+		get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
 
 	return 0;
 }
@@ -541,7 +516,7 @@ static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct fifo_sched_data),
 	.enqueue	=	tfifo_enqueue,
 	.dequeue	=	qdisc_dequeue_head,
-	.requeue	=	qdisc_requeue,
+	.peek		=	qdisc_peek_head,
 	.drop		=	qdisc_queue_drop,
 	.init		=	tfifo_init,
 	.reset		=	qdisc_reset_queue,
@@ -624,99 +599,12 @@ nla_put_failure:
 	return -1;
 }
 
-static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
-			  struct sk_buff *skb, struct tcmsg *tcm)
-{
-	struct netem_sched_data *q = qdisc_priv(sch);
-
-	if (cl != 1) 	/* only one class */
-		return -ENOENT;
-
-	tcm->tcm_handle |= TC_H_MIN(1);
-	tcm->tcm_info = q->qdisc->handle;
-
-	return 0;
-}
-
-static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
-		     struct Qdisc **old)
-{
-	struct netem_sched_data *q = qdisc_priv(sch);
-
-	if (new == NULL)
-		new = &noop_qdisc;
-
-	sch_tree_lock(sch);
-	*old = xchg(&q->qdisc, new);
-	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
-	qdisc_reset(*old);
-	sch_tree_unlock(sch);
-
-	return 0;
-}
-
-static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
-{
-	struct netem_sched_data *q = qdisc_priv(sch);
-	return q->qdisc;
-}
-
-static unsigned long netem_get(struct Qdisc *sch, u32 classid)
-{
-	return 1;
-}
-
-static void netem_put(struct Qdisc *sch, unsigned long arg)
-{
-}
-
-static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
-			    struct nlattr **tca, unsigned long *arg)
-{
-	return -ENOSYS;
-}
-
-static int netem_delete(struct Qdisc *sch, unsigned long arg)
-{
-	return -ENOSYS;
-}
-
-static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
-{
-	if (!walker->stop) {
-		if (walker->count >= walker->skip)
-			if (walker->fn(sch, 1, walker) < 0) {
-				walker->stop = 1;
-				return;
-			}
-		walker->count++;
-	}
-}
-
-static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl)
-{
-	return NULL;
-}
-
-static const struct Qdisc_class_ops netem_class_ops = {
-	.graft		=	netem_graft,
-	.leaf		=	netem_leaf,
-	.get		=	netem_get,
-	.put		=	netem_put,
-	.change		=	netem_change_class,
-	.delete		=	netem_delete,
-	.walk		=	netem_walk,
-	.tcf_chain	=	netem_find_tcf,
-	.dump		=	netem_dump_class,
-};
-
 static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
 	.id		=	"netem",
-	.cl_ops		=	&netem_class_ops,
 	.priv_size	=	sizeof(struct netem_sched_data),
 	.enqueue	=	netem_enqueue,
 	.dequeue	=	netem_dequeue,
-	.requeue	=	netem_requeue,
+	.peek		=	qdisc_peek_dequeued,
 	.drop		=	netem_drop,
 	.init		=	netem_init,
 	.reset		=	netem_reset,
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 504a78cdb718..ea65a87ec22c 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -93,34 +93,20 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	return ret;
 }
 
-
-static int
-prio_requeue(struct sk_buff *skb, struct Qdisc* sch)
+static struct sk_buff *prio_peek(struct Qdisc *sch)
 {
-	struct Qdisc *qdisc;
-	int ret;
-
-	qdisc = prio_classify(skb, sch, &ret);
-#ifdef CONFIG_NET_CLS_ACT
-	if (qdisc == NULL) {
-		if (ret & __NET_XMIT_BYPASS)
-			sch->qstats.drops++;
-		kfree_skb(skb);
-		return ret;
-	}
-#endif
+	struct prio_sched_data *q = qdisc_priv(sch);
+	int prio;
 
-	if ((ret = qdisc->ops->requeue(skb, qdisc)) == NET_XMIT_SUCCESS) {
-		sch->q.qlen++;
-		sch->qstats.requeues++;
-		return NET_XMIT_SUCCESS;
+	for (prio = 0; prio < q->bands; prio++) {
+		struct Qdisc *qdisc = q->queues[prio];
+		struct sk_buff *skb = qdisc->ops->peek(qdisc);
+		if (skb)
+			return skb;
 	}
-	if (net_xmit_drop_count(ret))
-		sch->qstats.drops++;
-	return ret;
+	return NULL;
 }
 
-
 static struct sk_buff *prio_dequeue(struct Qdisc* sch)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
@@ -421,7 +407,7 @@ static struct Qdisc_ops prio_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct prio_sched_data),
 	.enqueue	=	prio_enqueue,
 	.dequeue	=	prio_dequeue,
-	.requeue	=	prio_requeue,
+	.peek		=	prio_peek,
 	.drop		=	prio_drop,
 	.init		=	prio_init,
 	.reset		=	prio_reset,
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 5da05839e225..6a0371c22643 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -108,23 +108,6 @@ congestion_drop:
 	return NET_XMIT_CN;
 }
 
-static int red_requeue(struct sk_buff *skb, struct Qdisc* sch)
-{
-	struct red_sched_data *q = qdisc_priv(sch);
-	struct Qdisc *child = q->qdisc;
-	int ret;
-
-	if (red_is_idling(&q->parms))
-		red_end_of_idle_period(&q->parms);
-
-	ret = child->ops->requeue(skb, child);
-	if (likely(ret == NET_XMIT_SUCCESS)) {
-		sch->qstats.requeues++;
-		sch->q.qlen++;
-	}
-	return ret;
-}
-
 static struct sk_buff * red_dequeue(struct Qdisc* sch)
 {
 	struct sk_buff *skb;
@@ -140,6 +123,14 @@ static struct sk_buff * red_dequeue(struct Qdisc* sch)
 	return skb;
 }
 
+static struct sk_buff * red_peek(struct Qdisc* sch)
+{
+	struct red_sched_data *q = qdisc_priv(sch);
+	struct Qdisc *child = q->qdisc;
+
+	return child->ops->peek(child);
+}
+
 static unsigned int red_drop(struct Qdisc* sch)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
@@ -361,7 +352,7 @@ static struct Qdisc_ops red_qdisc_ops __read_mostly = {
 	.cl_ops		=	&red_class_ops,
 	.enqueue	=	red_enqueue,
 	.dequeue	=	red_dequeue,
-	.requeue	=	red_requeue,
+	.peek		=	red_peek,
 	.drop		=	red_drop,
 	.init		=	red_init,
 	.reset		=	red_reset,
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index fe1508ef0d3d..ab8cfee3c9ce 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -329,71 +329,20 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	return NET_XMIT_CN;
 }
 
-static int
-sfq_requeue(struct sk_buff *skb, struct Qdisc *sch)
+static struct sk_buff *
+sfq_peek(struct Qdisc *sch)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
-	unsigned int hash;
-	sfq_index x;
-	int ret;
-
-	hash = sfq_classify(skb, sch, &ret);
-	if (hash == 0) {
-		if (ret & __NET_XMIT_BYPASS)
-			sch->qstats.drops++;
-		kfree_skb(skb);
-		return ret;
-	}
-	hash--;
-
-	x = q->ht[hash];
-	if (x == SFQ_DEPTH) {
-		q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
-		q->hash[x] = hash;
-	}
-
-	sch->qstats.backlog += qdisc_pkt_len(skb);
-	__skb_queue_head(&q->qs[x], skb);
-	/* If selected queue has length q->limit+1, this means that
-	 * all another queues are empty and we do simple tail drop.
-	 * This packet is still requeued at head of queue, tail packet
-	 * is dropped.
-	 */
-	if (q->qs[x].qlen > q->limit) {
-		skb = q->qs[x].prev;
-		__skb_unlink(skb, &q->qs[x]);
-		sch->qstats.drops++;
-		sch->qstats.backlog -= qdisc_pkt_len(skb);
-		kfree_skb(skb);
-		return NET_XMIT_CN;
-	}
-
-	sfq_inc(q, x);
-	if (q->qs[x].qlen == 1) {		/* The flow is new */
-		if (q->tail == SFQ_DEPTH) {	/* It is the first flow */
-			q->tail = x;
-			q->next[x] = x;
-			q->allot[x] = q->quantum;
-		} else {
-			q->next[x] = q->next[q->tail];
-			q->next[q->tail] = x;
-			q->tail = x;
-		}
-	}
+	sfq_index a;
 
-	if (++sch->q.qlen <= q->limit) {
-		sch->qstats.requeues++;
-		return 0;
-	}
+	/* No active slots */
+	if (q->tail == SFQ_DEPTH)
+		return NULL;
 
-	sch->qstats.drops++;
-	sfq_drop(sch);
-	return NET_XMIT_CN;
+	a = q->next[q->tail];
+	return skb_peek(&q->qs[a]);
 }
 
-
-
-
 static struct sk_buff *
 sfq_dequeue(struct Qdisc *sch)
 {
@@ -624,7 +573,7 @@ static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct sfq_sched_data),
 	.enqueue	=	sfq_enqueue,
 	.dequeue	=	sfq_dequeue,
-	.requeue	=	sfq_requeue,
+	.peek		=	sfq_peek,
 	.drop		=	sfq_drop,
 	.init		=	sfq_init,
 	.reset		=	sfq_reset,
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 94c61598b86a..bb7783d584bb 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -139,19 +139,6 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 	return 0;
 }
 
-static int tbf_requeue(struct sk_buff *skb, struct Qdisc* sch)
-{
-	struct tbf_sched_data *q = qdisc_priv(sch);
-	int ret;
-
-	if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
-		sch->q.qlen++;
-		sch->qstats.requeues++;
-	}
-
-	return ret;
-}
-
 static unsigned int tbf_drop(struct Qdisc* sch)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
@@ -169,7 +156,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
 	struct tbf_sched_data *q = qdisc_priv(sch);
 	struct sk_buff *skb;
 
-	skb = q->qdisc->dequeue(q->qdisc);
+	skb = q->qdisc->ops->peek(q->qdisc);
 
 	if (skb) {
 		psched_time_t now;
@@ -192,6 +179,10 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
 		toks -= L2T(q, len);
 
 		if ((toks|ptoks) >= 0) {
+			skb = qdisc_dequeue_peeked(q->qdisc);
+			if (unlikely(!skb))
+				return NULL;
+
 			q->t_c = now;
 			q->tokens = toks;
 			q->ptokens = ptoks;
@@ -214,12 +205,6 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
 		   (cf. CSZ, HPFQ, HFSC)
 		 */
 
-		if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) {
-			/* When requeue fails skb is dropped */
-			qdisc_tree_decrease_qlen(q->qdisc, 1);
-			sch->qstats.drops++;
-		}
-
 		sch->qstats.overlimits++;
 	}
 	return NULL;
@@ -469,7 +454,7 @@ static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct tbf_sched_data),
 	.enqueue	=	tbf_enqueue,
 	.dequeue	=	tbf_dequeue,
-	.requeue	=	tbf_requeue,
+	.peek		=	qdisc_peek_dequeued,
 	.drop		=	tbf_drop,
 	.init		=	tbf_init,
 	.reset		=	tbf_reset,
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index d35ef059abb1..cfc8e7caba62 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -93,16 +93,6 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 	return NET_XMIT_DROP;
 }
 
-static int
-teql_requeue(struct sk_buff *skb, struct Qdisc* sch)
-{
-	struct teql_sched_data *q = qdisc_priv(sch);
-
-	__skb_queue_head(&q->q, skb);
-	sch->qstats.requeues++;
-	return 0;
-}
-
 static struct sk_buff *
 teql_dequeue(struct Qdisc* sch)
 {
@@ -123,6 +113,13 @@ teql_dequeue(struct Qdisc* sch)
 	return skb;
 }
 
+static struct sk_buff *
+teql_peek(struct Qdisc* sch)
+{
+	/* teql is meant to be used as root qdisc */
+	return NULL;
+}
+
 static __inline__ void
 teql_neigh_release(struct neighbour *n)
 {
@@ -433,7 +430,7 @@ static __init void teql_master_setup(struct net_device *dev)
 
 	ops->enqueue	=	teql_enqueue;
 	ops->dequeue	=	teql_dequeue;
-	ops->requeue	=	teql_requeue;
+	ops->peek	=	teql_peek;
 	ops->init	=	teql_qdisc_init;
 	ops->reset	=	teql_reset;
 	ops->destroy	=	teql_destroy;