diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-20 17:43:29 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-20 17:43:29 -0700 |
commit | db6d8c7a4027b48d797b369a53f8470aaeed7063 (patch) | |
tree | e140c104a89abc2154e1f41a7db8ebecbb6fa0b4 /net/sched | |
parent | 3a533374283aea50eab3976d8a6d30532175f009 (diff) | |
parent | fb65a7c091529bfffb1262515252c0d0f6241c5c (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6: (1232 commits)
iucv: Fix bad merging.
net_sched: Add size table for qdiscs
net_sched: Add accessor function for packet length for qdiscs
net_sched: Add qdisc_enqueue wrapper
highmem: Export totalhigh_pages.
ipv6 mcast: Omit redundant address family checks in ip6_mc_source().
net: Use standard structures for generic socket address structures.
ipv6 netns: Make several "global" sysctl variables namespace aware.
netns: Use net_eq() to compare net-namespaces for optimization.
ipv6: remove unused macros from net/ipv6.h
ipv6: remove unused parameter from ip6_ra_control
tcp: fix kernel panic with listening_get_next
tcp: Remove redundant checks when setting eff_sacks
tcp: options clean up
tcp: Fix MD5 signatures for non-linear skbs
sctp: Update sctp global memory limit allocations.
sctp: remove unnecessary byteshifting, calculate directly in big-endian
sctp: Allow only 1 listening socket with SO_REUSEADDR
sctp: Do not leak memory on multiple listen() calls
sctp: Support ipv6only AF_INET6 sockets.
...
Diffstat (limited to 'net/sched')
-rw-r--r-- | net/sched/act_gact.c | 2 | ||||
-rw-r--r-- | net/sched/act_ipt.c | 2 | ||||
-rw-r--r-- | net/sched/act_mirred.c | 4 | ||||
-rw-r--r-- | net/sched/act_nat.c | 2 | ||||
-rw-r--r-- | net/sched/act_pedit.c | 2 | ||||
-rw-r--r-- | net/sched/act_police.c | 8 | ||||
-rw-r--r-- | net/sched/act_simple.c | 2 | ||||
-rw-r--r-- | net/sched/cls_api.c | 20 | ||||
-rw-r--r-- | net/sched/cls_flow.c | 52 | ||||
-rw-r--r-- | net/sched/cls_route.c | 12 | ||||
-rw-r--r-- | net/sched/cls_u32.c | 18 | ||||
-rw-r--r-- | net/sched/sch_api.c | 613 | ||||
-rw-r--r-- | net/sched/sch_atm.c | 12 | ||||
-rw-r--r-- | net/sched/sch_cbq.c | 155 | ||||
-rw-r--r-- | net/sched/sch_dsmark.c | 10 | ||||
-rw-r--r-- | net/sched/sch_fifo.c | 49 | ||||
-rw-r--r-- | net/sched/sch_generic.c | 465 | ||||
-rw-r--r-- | net/sched/sch_gred.c | 14 | ||||
-rw-r--r-- | net/sched/sch_hfsc.c | 110 | ||||
-rw-r--r-- | net/sched/sch_htb.c | 195 | ||||
-rw-r--r-- | net/sched/sch_ingress.c | 2 | ||||
-rw-r--r-- | net/sched/sch_netem.c | 65 | ||||
-rw-r--r-- | net/sched/sch_prio.c | 143 | ||||
-rw-r--r-- | net/sched/sch_red.c | 37 | ||||
-rw-r--r-- | net/sched/sch_sfq.c | 20 | ||||
-rw-r--r-- | net/sched/sch_tbf.c | 42 | ||||
-rw-r--r-- | net/sched/sch_teql.c | 50 |
27 files changed, 1211 insertions, 895 deletions
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 422872c4f14b..ac04289da5d7 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -139,7 +139,7 @@ static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result #else action = gact->tcf_action; #endif - gact->tcf_bstats.bytes += skb->len; + gact->tcf_bstats.bytes += qdisc_pkt_len(skb); gact->tcf_bstats.packets++; if (action == TC_ACT_SHOT) gact->tcf_qstats.drops++; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index da696fd3e341..d1263b3c96c3 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -205,7 +205,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, spin_lock(&ipt->tcf_lock); ipt->tcf_tm.lastuse = jiffies; - ipt->tcf_bstats.bytes += skb->len; + ipt->tcf_bstats.bytes += qdisc_pkt_len(skb); ipt->tcf_bstats.packets++; /* yes, we have to worry about both in and out dev diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 1aff005d95cd..70341c020b6d 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -164,7 +164,7 @@ bad_mirred: if (skb2 != NULL) kfree_skb(skb2); m->tcf_qstats.overlimits++; - m->tcf_bstats.bytes += skb->len; + m->tcf_bstats.bytes += qdisc_pkt_len(skb); m->tcf_bstats.packets++; spin_unlock(&m->tcf_lock); /* should we be asking for packet to be dropped? @@ -184,7 +184,7 @@ bad_mirred: goto bad_mirred; } - m->tcf_bstats.bytes += skb2->len; + m->tcf_bstats.bytes += qdisc_pkt_len(skb2); m->tcf_bstats.packets++; if (!(at & AT_EGRESS)) if (m->tcfm_ok_push) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 0a3c8339767a..7b39ed485bca 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -124,7 +124,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, egress = p->flags & TCA_NAT_FLAG_EGRESS; action = p->tcf_action; - p->tcf_bstats.bytes += skb->len; + p->tcf_bstats.bytes += qdisc_pkt_len(skb); p->tcf_bstats.packets++; spin_unlock(&p->tcf_lock); diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 3cc4cb9e500e..d5f4e3404864 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -182,7 +182,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, bad: p->tcf_qstats.overlimits++; done: - p->tcf_bstats.bytes += skb->len; + p->tcf_bstats.bytes += qdisc_pkt_len(skb); p->tcf_bstats.packets++; spin_unlock(&p->tcf_lock); return p->tcf_action; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 0898120bbcc0..32c3f9d9fb7a 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -272,7 +272,7 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, spin_lock(&police->tcf_lock); - police->tcf_bstats.bytes += skb->len; + police->tcf_bstats.bytes += qdisc_pkt_len(skb); police->tcf_bstats.packets++; if (police->tcfp_ewma_rate && @@ -282,7 +282,7 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, return police->tcf_action; } - if (skb->len <= police->tcfp_mtu) { + if (qdisc_pkt_len(skb) <= police->tcfp_mtu) { if (police->tcfp_R_tab == NULL) { spin_unlock(&police->tcf_lock); return police->tcfp_result; @@ -295,12 +295,12 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, ptoks = toks + police->tcfp_ptoks; if (ptoks > (long)L2T_P(police, police->tcfp_mtu)) ptoks = (long)L2T_P(police, police->tcfp_mtu); - ptoks -= L2T_P(police, skb->len); + ptoks -= L2T_P(police, qdisc_pkt_len(skb)); } toks += police->tcfp_toks; if (toks > (long)police->tcfp_burst) toks = police->tcfp_burst; - toks -= L2T(police, skb->len); + toks -= L2T(police, qdisc_pkt_len(skb)); if ((toks|ptoks) >= 0) { police->tcfp_t_c = now; police->tcfp_toks = toks; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 1d421d059caf..e7851ce92cfe 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -41,7 +41,7 @@ static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result spin_lock(&d->tcf_lock); d->tcf_tm.lastuse = jiffies; - d->tcf_bstats.bytes += skb->len; + d->tcf_bstats.bytes += qdisc_pkt_len(skb); d->tcf_bstats.packets++; /* print policy string followed by _ then packet count diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 9360fc81e8c7..d2b6f54a6261 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -120,6 +120,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; + spinlock_t *root_lock; struct tcmsg *t; u32 protocol; u32 prio; @@ -166,7 +167,8 @@ replay: /* Find qdisc */ if (!parent) { - q = dev->qdisc_sleeping; + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0); + q = dev_queue->qdisc_sleeping; parent = q->handle; } else { q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent)); @@ -203,6 +205,8 @@ replay: } } + root_lock = qdisc_root_lock(q); + if (tp == NULL) { /* Proto-tcf does not exist, create new one */ @@ -262,10 +266,10 @@ replay: goto errout; } - qdisc_lock_tree(dev); + spin_lock_bh(root_lock); tp->next = *back; *back = tp; - qdisc_unlock_tree(dev); + spin_unlock_bh(root_lock); } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) goto errout; @@ -274,9 +278,9 @@ replay: if (fh == 0) { if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) { - qdisc_lock_tree(dev); + spin_lock_bh(root_lock); *back = tp->next; - qdisc_unlock_tree(dev); + spin_lock_bh(root_lock); tfilter_notify(skb, n, tp, fh, RTM_DELTFILTER); tcf_destroy(tp); @@ -334,7 +338,7 @@ static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, tcm->tcm_family = AF_UNSPEC; tcm->tcm__pad1 = 0; tcm->tcm__pad1 = 0; - tcm->tcm_ifindex = tp->q->dev->ifindex; + tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex; tcm->tcm_parent = tp->classid; tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); NLA_PUT_STRING(skb, TCA_KIND, tp->ops->kind); @@ -390,6 +394,7 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); + struct netdev_queue *dev_queue; int t; int s_t; struct net_device *dev; @@ -408,8 +413,9 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return skb->len; + dev_queue = netdev_get_tx_queue(dev, 0); if (!tcm->tcm_parent) - q = dev->qdisc_sleeping; + q = dev_queue->qdisc_sleeping; else q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); if (!q) diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 971b867e0484..8f63a1a94014 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -36,6 +36,8 @@ struct flow_filter { struct list_head list; struct tcf_exts exts; struct tcf_ematch_tree ematches; + struct timer_list perturb_timer; + u32 perturb_period; u32 handle; u32 nkeys; @@ -47,11 +49,9 @@ struct flow_filter { u32 addend; u32 divisor; u32 baseclass; + u32 hashrnd; }; -static u32 flow_hashrnd __read_mostly; -static int flow_hashrnd_initted __read_mostly; - static const struct tcf_ext_map flow_ext_map = { .action = TCA_FLOW_ACT, .police = TCA_FLOW_POLICE, @@ -348,7 +348,7 @@ static int flow_classify(struct sk_buff *skb, struct tcf_proto *tp, } if (f->mode == FLOW_MODE_HASH) - classid = jhash2(keys, f->nkeys, flow_hashrnd); + classid = jhash2(keys, f->nkeys, f->hashrnd); else { classid = keys[0]; classid = (classid & f->mask) ^ f->xor; @@ -369,6 +369,15 @@ static int flow_classify(struct sk_buff *skb, struct tcf_proto *tp, return -1; } +static void flow_perturbation(unsigned long arg) +{ + struct flow_filter *f = (struct flow_filter *)arg; + + get_random_bytes(&f->hashrnd, 4); + if (f->perturb_period) + mod_timer(&f->perturb_timer, jiffies + f->perturb_period); +} + static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { [TCA_FLOW_KEYS] = { .type = NLA_U32 }, [TCA_FLOW_MODE] = { .type = NLA_U32 }, @@ -381,6 +390,7 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { [TCA_FLOW_ACT] = { .type = NLA_NESTED }, [TCA_FLOW_POLICE] = { .type = NLA_NESTED }, [TCA_FLOW_EMATCHES] = { .type = NLA_NESTED }, + [TCA_FLOW_PERTURB] = { .type = NLA_U32 }, }; static int flow_change(struct tcf_proto *tp, unsigned long base, @@ -394,6 +404,7 @@ static int flow_change(struct tcf_proto *tp, unsigned long base, struct tcf_exts e; struct tcf_ematch_tree t; unsigned int nkeys = 0; + unsigned int perturb_period = 0; u32 baseclass = 0; u32 keymask = 0; u32 mode; @@ -442,6 +453,14 @@ static int flow_change(struct tcf_proto *tp, unsigned long base, mode = nla_get_u32(tb[TCA_FLOW_MODE]); if (mode != FLOW_MODE_HASH && nkeys > 1) goto err2; + + if (mode == FLOW_MODE_HASH) + perturb_period = f->perturb_period; + if (tb[TCA_FLOW_PERTURB]) { + if (mode != FLOW_MODE_HASH) + goto err2; + perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; + } } else { err = -EINVAL; if (!handle) @@ -455,6 +474,12 @@ static int flow_change(struct tcf_proto *tp, unsigned long base, if (mode != FLOW_MODE_HASH && nkeys > 1) goto err2; + if (tb[TCA_FLOW_PERTURB]) { + if (mode != FLOW_MODE_HASH) + goto err2; + perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; + } + if (TC_H_MAJ(baseclass) == 0) baseclass = TC_H_MAKE(tp->q->handle, baseclass); if (TC_H_MIN(baseclass) == 0) @@ -467,6 +492,11 @@ static int flow_change(struct tcf_proto *tp, unsigned long base, f->handle = handle; f->mask = ~0U; + + get_random_bytes(&f->hashrnd, 4); + f->perturb_timer.function = flow_perturbation; + f->perturb_timer.data = (unsigned long)f; + init_timer_deferrable(&f->perturb_timer); } tcf_exts_change(tp, &f->exts, &e); @@ -495,6 +525,11 @@ static int flow_change(struct tcf_proto *tp, unsigned long base, if (baseclass) f->baseclass = baseclass; + f->perturb_period = perturb_period; + del_timer(&f->perturb_timer); + if (perturb_period) + mod_timer(&f->perturb_timer, jiffies + perturb_period); + if (*arg == 0) list_add_tail(&f->list, &head->filters); @@ -512,6 +547,7 @@ err1: static void flow_destroy_filter(struct tcf_proto *tp, struct flow_filter *f) { + del_timer_sync(&f->perturb_timer); tcf_exts_destroy(tp, &f->exts); tcf_em_tree_destroy(tp, &f->ematches); kfree(f); @@ -532,11 +568,6 @@ static int flow_init(struct tcf_proto *tp) { struct flow_head *head; - if (!flow_hashrnd_initted) { - get_random_bytes(&flow_hashrnd, 4); - flow_hashrnd_initted = 1; - } - head = kzalloc(sizeof(*head), GFP_KERNEL); if (head == NULL) return -ENOBUFS; @@ -605,6 +636,9 @@ static int flow_dump(struct tcf_proto *tp, unsigned long fh, if (f->baseclass) NLA_PUT_U32(skb, TCA_FLOW_BASECLASS, f->baseclass); + if (f->perturb_period) + NLA_PUT_U32(skb, TCA_FLOW_PERTURB, f->perturb_period / HZ); + if (tcf_exts_dump(skb, &f->exts, &flow_ext_map) < 0) goto nla_put_failure; #ifdef CONFIG_NET_EMATCH diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 784dcb870b98..481260a4f10f 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -73,11 +73,13 @@ static __inline__ int route4_fastmap_hash(u32 id, int iif) } static inline -void route4_reset_fastmap(struct net_device *dev, struct route4_head *head, u32 id) +void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id) { - qdisc_lock_tree(dev); + spinlock_t *root_lock = qdisc_root_lock(q); + + spin_lock_bh(root_lock); memset(head->fastmap, 0, sizeof(head->fastmap)); - qdisc_unlock_tree(dev); + spin_unlock_bh(root_lock); } static inline void @@ -302,7 +304,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg) *fp = f->next; tcf_tree_unlock(tp); - route4_reset_fastmap(tp->q->dev, head, f->id); + route4_reset_fastmap(tp->q, head, f->id); route4_delete_filter(tp, f); /* Strip tree */ @@ -500,7 +502,7 @@ reinsert: } tcf_tree_unlock(tp); - route4_reset_fastmap(tp->q->dev, head, f->id); + route4_reset_fastmap(tp->q, head, f->id); *arg = (unsigned long)f; return 0; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 4d755444c449..527db2559dd2 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -75,7 +75,6 @@ struct tc_u_hnode struct tc_u_common { - struct tc_u_common *next; struct tc_u_hnode *hlist; struct Qdisc *q; int refcnt; @@ -87,8 +86,6 @@ static const struct tcf_ext_map u32_ext_map = { .police = TCA_U32_POLICE }; -static struct tc_u_common *u32_list; - static __inline__ unsigned u32_hash_fold(__be32 key, struct tc_u32_sel *sel, u8 fshift) { unsigned h = ntohl(key & sel->hmask)>>fshift; @@ -287,9 +284,7 @@ static int u32_init(struct tcf_proto *tp) struct tc_u_hnode *root_ht; struct tc_u_common *tp_c; - for (tp_c = u32_list; tp_c; tp_c = tp_c->next) - if (tp_c->q == tp->q) - break; + tp_c = tp->q->u32_node; root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL); if (root_ht == NULL) @@ -307,8 +302,7 @@ static int u32_init(struct tcf_proto *tp) return -ENOBUFS; } tp_c->q = tp->q; - tp_c->next = u32_list; - u32_list = tp_c; + tp->q->u32_node = tp_c; } tp_c->refcnt++; @@ -402,14 +396,8 @@ static void u32_destroy(struct tcf_proto *tp) if (--tp_c->refcnt == 0) { struct tc_u_hnode *ht; - struct tc_u_common **tp_cp; - for (tp_cp = &u32_list; *tp_cp; tp_cp = &(*tp_cp)->next) { - if (*tp_cp == tp_c) { - *tp_cp = tp_c->next; - break; - } - } + tp->q->u32_node = NULL; for (ht = tp_c->hlist; ht; ht = ht->next) { ht->refcnt--; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 10f01ad04380..5219d5f9d754 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -99,7 +99,7 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, ---requeue requeues once dequeued packet. It is used for non-standard or - just buggy devices, which can defer output even if dev->tbusy=0. + just buggy devices, which can defer output even if netif_queue_stopped()=0. ---reset @@ -185,11 +185,20 @@ EXPORT_SYMBOL(unregister_qdisc); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { - struct Qdisc *q; + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + struct Qdisc *q, *txq_root = txq->qdisc; + + if (!(txq_root->flags & TCQ_F_BUILTIN) && + txq_root->handle == handle) + return txq_root; - list_for_each_entry(q, &dev->qdisc_list, list) { - if (q->handle == handle) - return q; + list_for_each_entry(q, &txq_root->list, list) { + if (q->handle == handle) + return q; + } } return NULL; } @@ -277,15 +286,137 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab) } EXPORT_SYMBOL(qdisc_put_rtab); +static LIST_HEAD(qdisc_stab_list); +static DEFINE_SPINLOCK(qdisc_stab_lock); + +static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = { + [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) }, + [TCA_STAB_DATA] = { .type = NLA_BINARY }, +}; + +static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) +{ + struct nlattr *tb[TCA_STAB_MAX + 1]; + struct qdisc_size_table *stab; + struct tc_sizespec *s; + unsigned int tsize = 0; + u16 *tab = NULL; + int err; + + err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy); + if (err < 0) + return ERR_PTR(err); + if (!tb[TCA_STAB_BASE]) + return ERR_PTR(-EINVAL); + + s = nla_data(tb[TCA_STAB_BASE]); + + if (s->tsize > 0) { + if (!tb[TCA_STAB_DATA]) + return ERR_PTR(-EINVAL); + tab = nla_data(tb[TCA_STAB_DATA]); + tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16); + } + + if (!s || tsize != s->tsize || (!tab && tsize > 0)) + return ERR_PTR(-EINVAL); + + spin_lock(&qdisc_stab_lock); + + list_for_each_entry(stab, &qdisc_stab_list, list) { + if (memcmp(&stab->szopts, s, sizeof(*s))) + continue; + if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16))) + continue; + stab->refcnt++; + spin_unlock(&qdisc_stab_lock); + return stab; + } + + spin_unlock(&qdisc_stab_lock); + + stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); + if (!stab) + return ERR_PTR(-ENOMEM); + + stab->refcnt = 1; + stab->szopts = *s; + if (tsize > 0) + memcpy(stab->data, tab, tsize * sizeof(u16)); + + spin_lock(&qdisc_stab_lock); + list_add_tail(&stab->list, &qdisc_stab_list); + spin_unlock(&qdisc_stab_lock); + + return stab; +} + +void qdisc_put_stab(struct qdisc_size_table *tab) +{ + if (!tab) + return; + + spin_lock(&qdisc_stab_lock); + + if (--tab->refcnt == 0) { + list_del(&tab->list); + kfree(tab); + } + + spin_unlock(&qdisc_stab_lock); +} +EXPORT_SYMBOL(qdisc_put_stab); + +static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab) +{ + struct nlattr *nest; + + nest = nla_nest_start(skb, TCA_STAB); + NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts); + nla_nest_end(skb, nest); + + return skb->len; + +nla_put_failure: + return -1; +} + +void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab) +{ + int pkt_len, slot; + + pkt_len = skb->len + stab->szopts.overhead; + if (unlikely(!stab->szopts.tsize)) + goto out; + + slot = pkt_len + stab->szopts.cell_align; + if (unlikely(slot < 0)) + slot = 0; + + slot >>= stab->szopts.cell_log; + if (likely(slot < stab->szopts.tsize)) + pkt_len = stab->data[slot]; + else + pkt_len = stab->data[stab->szopts.tsize - 1] * + (slot / stab->szopts.tsize) + + stab->data[slot % stab->szopts.tsize]; + + pkt_len <<= stab->szopts.size_log; +out: + if (unlikely(pkt_len < 1)) + pkt_len = 1; + qdisc_skb_cb(skb)->pkt_len = pkt_len; +} +EXPORT_SYMBOL(qdisc_calculate_pkt_len); + static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) { struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, timer); - struct net_device *dev = wd->qdisc->dev; wd->qdisc->flags &= ~TCQ_F_THROTTLED; smp_wmb(); - netif_schedule(dev); + __netif_schedule(wd->qdisc); return HRTIMER_NORESTART; } @@ -316,6 +447,110 @@ void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) } EXPORT_SYMBOL(qdisc_watchdog_cancel); +struct hlist_head *qdisc_class_hash_alloc(unsigned int n) +{ + unsigned int size = n * sizeof(struct hlist_head), i; + struct hlist_head *h; + + if (size <= PAGE_SIZE) + h = kmalloc(size, GFP_KERNEL); + else + h = (struct hlist_head *) + __get_free_pages(GFP_KERNEL, get_order(size)); + + if (h != NULL) { + for (i = 0; i < n; i++) + INIT_HLIST_HEAD(&h[i]); + } + return h; +} + +static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n) +{ + unsigned int size = n * sizeof(struct hlist_head); + + if (size <= PAGE_SIZE) + kfree(h); + else + free_pages((unsigned long)h, get_order(size)); +} + +void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash) +{ + struct Qdisc_class_common *cl; + struct hlist_node *n, *next; + struct hlist_head *nhash, *ohash; + unsigned int nsize, nmask, osize; + unsigned int i, h; + + /* Rehash when load factor exceeds 0.75 */ + if (clhash->hashelems * 4 <= clhash->hashsize * 3) + return; + nsize = clhash->hashsize * 2; + nmask = nsize - 1; + nhash = qdisc_class_hash_alloc(nsize); + if (nhash == NULL) + return; + + ohash = clhash->hash; + osize = clhash->hashsize; + + sch_tree_lock(sch); + for (i = 0; i < osize; i++) { + hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) { + h = qdisc_class_hash(cl->classid, nmask); + hlist_add_head(&cl->hnode, &nhash[h]); + } + } + clhash->hash = nhash; + clhash->hashsize = nsize; + clhash->hashmask = nmask; + sch_tree_unlock(sch); + + qdisc_class_hash_free(ohash, osize); +} +EXPORT_SYMBOL(qdisc_class_hash_grow); + +int qdisc_class_hash_init(struct Qdisc_class_hash *clhash) +{ + unsigned int size = 4; + + clhash->hash = qdisc_class_hash_alloc(size); + if (clhash->hash == NULL) + return -ENOMEM; + clhash->hashsize = size; + clhash->hashmask = size - 1; + clhash->hashelems = 0; + return 0; +} +EXPORT_SYMBOL(qdisc_class_hash_init); + +void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash) +{ + qdisc_class_hash_free(clhash->hash, clhash->hashsize); +} +EXPORT_SYMBOL(qdisc_class_hash_destroy); + +void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash, + struct Qdisc_class_common *cl) +{ + unsigned int h; + + INIT_HLIST_NODE(&cl->hnode); + h = qdisc_class_hash(cl->classid, clhash->hashmask); + hlist_add_head(&cl->hnode, &clhash->hash[h]); + clhash->hashelems++; +} +EXPORT_SYMBOL(qdisc_class_hash_insert); + +void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash, + struct Qdisc_class_common *cl) +{ + hlist_del(&cl->hnode); + clhash->hashelems--; +} +EXPORT_SYMBOL(qdisc_class_hash_remove); + /* Allocate an unique handle from space managed by kernel */ static u32 qdisc_alloc_handle(struct net_device *dev) @@ -332,32 +567,39 @@ static u32 qdisc_alloc_handle(struct net_device *dev) return i>0 ? autohandle : 0; } -/* Attach toplevel qdisc to device dev */ +/* Attach toplevel qdisc to device queue. */ -static struct Qdisc * -dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc) +static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, + struct Qdisc *qdisc) { + spinlock_t *root_lock; struct Qdisc *oqdisc; + int ingress; + + ingress = 0; + if (qdisc && qdisc->flags&TCQ_F_INGRESS) + ingress = 1; + + if (ingress) { + oqdisc = dev_queue->qdisc; + } else { + oqdisc = dev_queue->qdisc_sleeping; + } - if (dev->flags & IFF_UP) - dev_deactivate(dev); + root_lock = qdisc_root_lock(oqdisc); + spin_lock_bh(root_lock); - qdisc_lock_tree(dev); - if (qdisc && qdisc->flags&TCQ_F_INGRESS) { - oqdisc = dev->qdisc_ingress; + if (ingress) { /* Prune old scheduler */ if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) { /* delete */ qdisc_reset(oqdisc); - dev->qdisc_ingress = NULL; + dev_queue->qdisc = NULL; } else { /* new */ - dev->qdisc_ingress = qdisc; + dev_queue->qdisc = qdisc; } } else { - - oqdisc = dev->qdisc_sleeping; - /* Prune old scheduler */ if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) qdisc_reset(oqdisc); @@ -365,14 +607,11 @@ dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc) /* ... and graft new one */ if (qdisc == NULL) qdisc = &noop_qdisc; - dev->qdisc_sleeping = qdisc; - dev->qdisc = &noop_qdisc; + dev_queue->qdisc_sleeping = qdisc; + dev_queue->qdisc = &noop_qdisc; } - qdisc_unlock_tree(dev); - - if (dev->flags & IFF_UP) - dev_activate(dev); + spin_unlock_bh(root_lock); return oqdisc; } @@ -389,7 +628,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS)) return; - sch = qdisc_lookup(sch->dev, TC_H_MAJ(parentid)); + sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); if (sch == NULL) { WARN_ON(parentid != TC_H_ROOT); return; @@ -405,26 +644,66 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) } EXPORT_SYMBOL(qdisc_tree_decrease_qlen); -/* Graft qdisc "new" to class "classid" of qdisc "parent" or - to device "dev". +static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid, + struct Qdisc *old, struct Qdisc *new) +{ + if (new || old) + qdisc_notify(skb, n, clid, old, new); + + if (old) { + spin_lock_bh(&old->q.lock); + qdisc_destroy(old); + spin_unlock_bh(&old->q.lock); + } +} - Old qdisc is not destroyed but returned in *old. +/* Graft qdisc "new" to class "classid" of qdisc "parent" or + * to device "dev". + * + * When appropriate send a netlink notification using 'skb' + * and "n". + * + * On success, destroy old qdisc. */ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, - u32 classid, - struct Qdisc *new, struct Qdisc **old) + struct sk_buff *skb, struct nlmsghdr *n, u32 classid, + struct Qdisc *new, struct Qdisc *old) { + struct Qdisc *q = old; int err = 0; - struct Qdisc *q = *old; - if (parent == NULL) { - if (q && q->flags&TCQ_F_INGRESS) { - *old = dev_graft_qdisc(dev, q); - } else { - *old = dev_graft_qdisc(dev, new); + unsigned int i, num_q, ingress; + + ingress = 0; + num_q = dev->num_tx_queues; + if (q && q->flags & TCQ_F_INGRESS) { + num_q = 1; + ingress = 1; + } + + if (dev->flags & IFF_UP) + dev_deactivate(dev); + + for (i = 0; i < num_q; i++) { + struct netdev_queue *dev_queue = &dev->rx_queue; + + if (!ingress) + dev_queue = netdev_get_tx_queue(dev, i); + + if (ingress) { + old = dev_graft_qdisc(dev_queue, q); + } else { + old = dev_graft_qdisc(dev_queue, new); + if (new && i > 0) + atomic_inc(&new->refcnt); + } + notify_and_destroy(skb, n, classid, old, new); } + + if (dev->flags & IFF_UP) + dev_activate(dev); } else { const struct Qdisc_class_ops *cops = parent->ops->cl_ops; @@ -433,10 +712,12 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, if (cops) { unsigned long cl = cops->get(parent, classid); if (cl) { - err = cops->graft(parent, cl, new, old); + err = cops->graft(parent, cl, new, &old); cops->put(parent, cl); } } + if (!err) + notify_and_destroy(skb, n, classid, old, new); } return err; } @@ -448,13 +729,14 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, */ static struct Qdisc * -qdisc_create(struct net_device *dev, u32 parent, u32 handle, - struct nlattr **tca, int *errp) +qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, + u32 parent, u32 handle, struct nlattr **tca, int *errp) { int err; struct nlattr *kind = tca[TCA_KIND]; struct Qdisc *sch; struct Qdisc_ops *ops; + struct qdisc_size_table *stab; ops = qdisc_lookup_ops(kind); #ifdef CONFIG_KMOD @@ -489,7 +771,7 @@ qdisc_create(struct net_device *dev, u32 parent, u32 handle, if (ops == NULL) goto err_out; - sch = qdisc_alloc(dev, ops); + sch = qdisc_alloc(dev_queue, ops); if (IS_ERR(sch)) { err = PTR_ERR(sch); goto err_out2; @@ -499,10 +781,8 @@ qdisc_create(struct net_device *dev, u32 parent, u32 handle, if (handle == TC_H_INGRESS) { sch->flags |= TCQ_F_INGRESS; - sch->stats_lock = &dev->ingress_lock; handle = TC_H_MAKE(TC_H_INGRESS, 0); } else { - sch->stats_lock = &dev->queue_lock; if (handle == 0) { handle = qdisc_alloc_handle(dev); err = -ENOMEM; @@ -514,9 +794,17 @@ qdisc_create(struct net_device *dev, u32 parent, u32 handle, sch->handle = handle; if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) { + if (tca[TCA_STAB]) { + stab = qdisc_get_stab(tca[TCA_STAB]); + if (IS_ERR(stab)) { + err = PTR_ERR(stab); + goto err_out3; + } + sch->stab = stab; + } if (tca[TCA_RATE]) { err = gen_new_estimator(&sch->bstats, &sch->rate_est, - sch->stats_lock, + qdisc_root_lock(sch), tca[TCA_RATE]); if (err) { /* @@ -529,13 +817,13 @@ qdisc_create(struct net_device *dev, u32 parent, u32 handle, goto err_out3; } } - qdisc_lock_tree(dev); - list_add_tail(&sch->list, &dev->qdisc_list); - qdisc_unlock_tree(dev); + if (parent) + list_add_tail(&sch->list, &dev_queue->qdisc->list); return sch; } err_out3: + qdisc_put_stab(sch->stab); dev_put(dev); kfree((char *) sch - sch->padded); err_out2: @@ -547,18 +835,29 @@ err_out: static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) { - if (tca[TCA_OPTIONS]) { - int err; + struct qdisc_size_table *stab = NULL; + int err = 0; + if (tca[TCA_OPTIONS]) { if (sch->ops->change == NULL) return -EINVAL; err = sch->ops->change(sch, tca[TCA_OPTIONS]); if (err) return err; } + + if (tca[TCA_STAB]) { + stab = qdisc_get_stab(tca[TCA_STAB]); + if (IS_ERR(stab)) + return PTR_ERR(stab); + } + + qdisc_put_stab(sch->stab); + sch->stab = stab; + if (tca[TCA_RATE]) gen_replace_estimator(&sch->bstats, &sch->rate_est, - sch->stats_lock, tca[TCA_RATE]); + qdisc_root_lock(sch), tca[TCA_RATE]); return 0; } @@ -634,10 +933,12 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) return -ENOENT; q = qdisc_leaf(p, clid); } else { /* ingress */ - q = dev->qdisc_ingress; + q = dev->rx_queue.qdisc; } } else { - q = dev->qdisc_sleeping; + struct netdev_queue *dev_queue; + dev_queue = netdev_get_tx_queue(dev, 0); + q = dev_queue->qdisc_sleeping; } if (!q) return -ENOENT; @@ -657,14 +958,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) return -EINVAL; if (q->handle == 0) return -ENOENT; - if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0) + if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0) return err; - if (q) { - qdisc_notify(skb, n, clid, q, NULL); - qdisc_lock_tree(dev); - qdisc_destroy(q); - qdisc_unlock_tree(dev); - } } else { qdisc_notify(skb, n, clid, NULL, q); } @@ -708,10 +1003,12 @@ replay: return -ENOENT; q = qdisc_leaf(p, clid); } else { /*ingress */ - q = dev->qdisc_ingress; + q = dev->rx_queue.qdisc; } } else { - q = dev->qdisc_sleeping; + struct netdev_queue *dev_queue; + dev_queue = netdev_get_tx_queue(dev, 0); + q = dev_queue->qdisc_sleeping; } /* It may be default qdisc, ignore it */ @@ -788,10 +1085,12 @@ create_n_graft: if (!(n->nlmsg_flags&NLM_F_CREATE)) return -ENOENT; if (clid == TC_H_INGRESS) - q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_parent, + q = qdisc_create(dev, &dev->rx_queue, + tcm->tcm_parent, tcm->tcm_parent, tca, &err); else - q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_handle, + q = qdisc_create(dev, netdev_get_tx_queue(dev, 0), + tcm->tcm_parent, tcm->tcm_handle, tca, &err); if (q == NULL) { if (err == -EAGAIN) @@ -801,22 +1100,18 @@ create_n_graft: graft: if (1) { - struct Qdisc *old_q = NULL; - err = qdisc_graft(dev, p, clid, q, &old_q); + spinlock_t *root_lock; + + err = qdisc_graft(dev, p, skb, n, clid, q, NULL); if (err) { if (q) { - qdisc_lock_tree(dev); + root_lock = qdisc_root_lock(q); + spin_lock_bh(root_lock); qdisc_destroy(q); - qdisc_unlock_tree(dev); + spin_unlock_bh(root_lock); } return err; } - qdisc_notify(skb, n, clid, old_q, q); - if (old_q) { - qdisc_lock_tree(dev); - qdisc_destroy(old_q); - qdisc_unlock_tree(dev); - } } return 0; } @@ -834,7 +1129,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, tcm->tcm_family = AF_UNSPEC; tcm->tcm__pad1 = 0; tcm->tcm__pad2 = 0; - tcm->tcm_ifindex = q->dev->ifindex; + tcm->tcm_ifindex = qdisc_dev(q)->ifindex; tcm->tcm_parent = clid; tcm->tcm_handle = q->handle; tcm->tcm_info = atomic_read(&q->refcnt); @@ -843,8 +1138,11 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, goto nla_put_failure; q->qstats.qlen = q->q.qlen; + if (q->stab && qdisc_dump_stab(skb, q->stab) < 0) + goto nla_put_failure; + if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, q->stats_lock, &d) < 0) + TCA_XSTATS, qdisc_root_lock(q), &d) < 0) goto nla_put_failure; if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) @@ -894,13 +1192,57 @@ err_out: return -EINVAL; } +static bool tc_qdisc_dump_ignore(struct Qdisc *q) +{ + return (q->flags & TCQ_F_BUILTIN) ? true : false; +} + +static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, + struct netlink_callback *cb, + int *q_idx_p, int s_q_idx) +{ + int ret = 0, q_idx = *q_idx_p; + struct Qdisc *q; + + if (!root) + return 0; + + q = root; + if (q_idx < s_q_idx) { + q_idx++; + } else { + if (!tc_qdisc_dump_ignore(q) && + tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) + goto done; + q_idx++; + } + list_for_each_entry(q, &root->list, list) { + if (q_idx < s_q_idx) { + q_idx++; + continue; + } + if (!tc_qdisc_dump_ignore(q) && + tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) + goto done; + q_idx++; + } + +out: + *q_idx_p = q_idx; + return ret; +done: + ret = -1; + goto out; +} + static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); int idx, q_idx; int s_idx, s_q_idx; struct net_device *dev; - struct Qdisc *q; if (net != &init_net) return 0; @@ -910,21 +1252,22 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) read_lock(&dev_base_lock); idx = 0; for_each_netdev(&init_net, dev) { + struct netdev_queue *dev_queue; + if (idx < s_idx) goto cont; if (idx > s_idx) s_q_idx = 0; q_idx = 0; - list_for_each_entry(q, &dev->qdisc_list, list) { - if (q_idx < s_q_idx) { - q_idx++; - continue; - } - if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) - goto done; - q_idx++; - } + + dev_queue = netdev_get_tx_queue(dev, 0); + if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0) + goto done; + + dev_queue = &dev->rx_queue; + if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0) + goto done; + cont: idx++; } @@ -949,6 +1292,7 @@ done: static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { struct net *net = sock_net(skb->sk); + struct netdev_queue *dev_queue; struct tcmsg *tcm = NLMSG_DATA(n); struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; @@ -986,6 +1330,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) /* Step 1. Determine qdisc handle X:0 */ + dev_queue = netdev_get_tx_queue(dev, 0); if (pid != TC_H_ROOT) { u32 qid1 = TC_H_MAJ(pid); @@ -996,7 +1341,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) } else if (qid1) { qid = qid1; } else if (qid == 0) - qid = dev->qdisc_sleeping->handle; + qid = dev_queue->qdisc_sleeping->handle; /* Now qid is genuine qdisc handle consistent both with parent and child. @@ -1007,7 +1352,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) pid = TC_H_MAKE(qid, pid); } else { if (qid == 0) - qid = dev->qdisc_sleeping->handle; + qid = dev_queue->qdisc_sleeping->handle; } /* OK. Locate qdisc */ @@ -1080,7 +1425,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); tcm = NLMSG_DATA(nlh); tcm->tcm_family = AF_UNSPEC; - tcm->tcm_ifindex = q->dev->ifindex; + tcm->tcm_ifindex = qdisc_dev(q)->ifindex; tcm->tcm_parent = q->handle; tcm->tcm_handle = q->handle; tcm->tcm_info = 0; @@ -1089,7 +1434,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, goto nla_put_failure; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, q->stats_lock, &d) < 0) + TCA_XSTATS, qdisc_root_lock(q), &d) < 0) goto nla_put_failure; if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) @@ -1140,15 +1485,62 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS); } +static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, + struct tcmsg *tcm, struct netlink_callback *cb, + int *t_p, int s_t) +{ + struct qdisc_dump_args arg; + + if (tc_qdisc_dump_ignore(q) || + *t_p < s_t || !q->ops->cl_ops || + (tcm->tcm_parent && + TC_H_MAJ(tcm->tcm_parent) != q->handle)) { + (*t_p)++; + return 0; + } + if (*t_p > s_t) + memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); + arg.w.fn = qdisc_class_dump; + arg.skb = skb; + arg.cb = cb; + arg.w.stop = 0; + arg.w.skip = cb->args[1]; + arg.w.count = 0; + q->ops->cl_ops->walk(q, &arg.w); + cb->args[1] = arg.w.count; + if (arg.w.stop) + return -1; + (*t_p)++; + return 0; +} + +static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, + struct tcmsg *tcm, struct netlink_callback *cb, + int *t_p, int s_t) +{ + struct Qdisc *q; + + if (!root) + return 0; + + if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0) + return -1; + + list_for_each_entry(q, &root->list, list) { + if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) + return -1; + } + + return 0; +} + static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) { + struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); struct net *net = sock_net(skb->sk); - int t; - int s_t; + struct netdev_queue *dev_queue; struct net_device *dev; - struct Qdisc *q; - struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); - struct qdisc_dump_args arg; + int t, s_t; if (net != &init_net) return 0; @@ -1161,28 +1553,15 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; t = 0; - list_for_each_entry(q, &dev->qdisc_list, list) { - if (t < s_t || !q->ops->cl_ops || - (tcm->tcm_parent && - TC_H_MAJ(tcm->tcm_parent) != q->handle)) { - t++; - continue; - } - if (t > s_t) - memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); - arg.w.fn = qdisc_class_dump; - arg.skb = skb; - arg.cb = cb; - arg.w.stop = 0; - arg.w.skip = cb->args[1]; - arg.w.count = 0; - q->ops->cl_ops->walk(q, &arg.w); - cb->args[1] = arg.w.count; - if (arg.w.stop) - break; - t++; - } + dev_queue = netdev_get_tx_queue(dev, 0); + if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0) + goto done; + dev_queue = &dev->rx_queue; + if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0) + goto done; + +done: cb->args[0] = t; dev_put(dev); diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index db0e23ae85f8..04faa835be17 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -296,7 +296,8 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, goto err_out; } flow->filter_list = NULL; - flow->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid); + flow->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, classid); if (!flow->q) flow->q = &noop_qdisc; pr_debug("atm_tc_change: qdisc %p\n", flow->q); @@ -428,7 +429,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) #endif } - ret = flow->q->enqueue(skb, flow->q); + ret = qdisc_enqueue(skb, flow->q); if (ret != 0) { drop: __maybe_unused sch->qstats.drops++; @@ -436,9 +437,9 @@ drop: __maybe_unused flow->qstats.drops++; return ret; } - sch->bstats.bytes += skb->len; + sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; - flow->bstats.bytes += skb->len; + flow->bstats.bytes += qdisc_pkt_len(skb); flow->bstats.packets++; /* * Okay, this may seem weird. We pretend we've dropped the packet if @@ -555,7 +556,8 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt) pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); p->flows = &p->link; - p->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, sch->handle); + p->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, sch->handle); if (!p->link.q) p->link.q = &noop_qdisc; pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 2a3c97f7dc63..f1d2f8ec8b4c 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -73,11 +73,10 @@ struct cbq_sched_data; struct cbq_class { - struct cbq_class *next; /* hash table link */ + struct Qdisc_class_common common; struct cbq_class *next_alive; /* next class with backlog in this priority band */ /* Parameters */ - u32 classid; unsigned char priority; /* class priority */ unsigned char priority2; /* priority to be used after overlimit */ unsigned char ewma_log; /* time constant for idle time calculation */ @@ -144,7 +143,7 @@ struct cbq_class struct cbq_sched_data { - struct cbq_class *classes[16]; /* Hash table of all classes */ + struct Qdisc_class_hash clhash; /* Hash table of all classes */ int nclasses[TC_CBQ_MAXPRIO+1]; unsigned quanta[TC_CBQ_MAXPRIO+1]; @@ -177,23 +176,15 @@ struct cbq_sched_data #define L2T(cl,len) qdisc_l2t((cl)->R_tab,len) - -static __inline__ unsigned cbq_hash(u32 h) -{ - h ^= h>>8; - h ^= h>>4; - return h&0xF; -} - static __inline__ struct cbq_class * cbq_class_lookup(struct cbq_sched_data *q, u32 classid) { - struct cbq_class *cl; + struct Qdisc_class_common *clc; - for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next) - if (cl->classid == classid) - return cl; - return NULL; + clc = qdisc_class_find(&q->clhash, classid); + if (clc == NULL) + return NULL; + return container_of(clc, struct cbq_class, common); } #ifdef CONFIG_NET_CLS_ACT @@ -379,7 +370,6 @@ static int cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct cbq_sched_data *q = qdisc_priv(sch); - int len = skb->len; int uninitialized_var(ret); struct cbq_class *cl = cbq_classify(skb, sch, &ret); @@ -396,10 +386,11 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) #ifdef CONFIG_NET_CLS_ACT cl->q->__parent = sch; #endif - if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) { + ret = qdisc_enqueue(skb, cl->q); + if (ret == NET_XMIT_SUCCESS) { sch->q.qlen++; sch->bstats.packets++; - sch->bstats.bytes+=len; + sch->bstats.bytes += qdisc_pkt_len(skb); cbq_mark_toplevel(q, cl); if (!cl->next_alive) cbq_activate_class(cl); @@ -659,14 +650,13 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) } sch->flags &= ~TCQ_F_THROTTLED; - netif_schedule(sch->dev); + __netif_schedule(sch); return HRTIMER_NORESTART; } #ifdef CONFIG_NET_CLS_ACT static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child) { - int len = skb->len; struct Qdisc *sch = child->__parent; struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl = q->rx_class; @@ -680,10 +670,10 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child) q->rx_class = cl; cl->q->__parent = sch; - if (cl->q->enqueue(skb, cl->q) == 0) { + if (qdisc_enqueue(skb, cl->q) == 0) { sch->q.qlen++; sch->bstats.packets++; - sch->bstats.bytes+=len; + sch->bstats.bytes += qdisc_pkt_len(skb); if (!cl->next_alive) cbq_activate_class(cl); return 0; @@ -889,7 +879,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio) if (skb == NULL) goto skip_class; - cl->deficit -= skb->len; + cl->deficit -= qdisc_pkt_len(skb); q->tx_class = cl; q->tx_borrowed = borrow; if (borrow != cl) { @@ -897,11 +887,11 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio) borrow->xstats.borrows++; cl->xstats.borrows++; #else - borrow->xstats.borrows += skb->len; - cl->xstats.borrows += skb->len; + borrow->xstats.borrows += qdisc_pkt_len(skb); + cl->xstats.borrows += qdisc_pkt_len(skb); #endif } - q->tx_len = skb->len; + q->tx_len = qdisc_pkt_len(skb); if (cl->deficit <= 0) { q->active[prio] = cl; @@ -1071,13 +1061,14 @@ static void cbq_adjust_levels(struct cbq_class *this) static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio) { struct cbq_class *cl; - unsigned h; + struct hlist_node *n; + unsigned int h; if (q->quanta[prio] == 0) return; - for (h=0; h<16; h++) { - for (cl = q->classes[h]; cl; cl = cl->next) { + for (h = 0; h < q->clhash.hashsize; h++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) { /* BUGGGG... Beware! This expression suffer of arithmetic overflows! */ @@ -1085,9 +1076,9 @@ static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio) cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/ q->quanta[prio]; } - if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) { - printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum); - cl->quantum = cl->qdisc->dev->mtu/2 + 1; + if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) { + printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum); + cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1; } } } @@ -1114,10 +1105,12 @@ static void cbq_sync_defmap(struct cbq_class *cl) if (split->defaults[i]) continue; - for (h=0; h<16; h++) { + for (h = 0; h < q->clhash.hashsize; h++) { + struct hlist_node *n; struct cbq_class *c; - for (c = q->classes[h]; c; c = c->next) { + hlist_for_each_entry(c, n, &q->clhash.hash[h], + common.hnode) { if (c->split == split && c->level < level && c->defmap&(1<<i)) { split->defaults[i] = c; @@ -1135,12 +1128,12 @@ static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 ma if (splitid == 0) { if ((split = cl->split) == NULL) return; - splitid = split->classid; + splitid = split->common.classid; } - if (split == NULL || split->classid != splitid) { + if (split == NULL || split->common.classid != splitid) { for (split = cl->tparent; split; split = split->tparent) - if (split->classid == splitid) + if (split->common.classid == splitid) break; } @@ -1163,13 +1156,7 @@ static void cbq_unlink_class(struct cbq_class *this) struct cbq_class *cl, **clp; struct cbq_sched_data *q = qdisc_priv(this->qdisc); - for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) { - if (cl == this) { - *clp = cl->next; - cl->next = NULL; - break; - } - } + qdisc_class_hash_remove(&q->clhash, &this->common); if (this->tparent) { clp=&this->sibling; @@ -1195,12 +1182,10 @@ static void cbq_unlink_class(struct cbq_class *this) static void cbq_link_class(struct cbq_class *this) { struct cbq_sched_data *q = qdisc_priv(this->qdisc); - unsigned h = cbq_hash(this->classid); struct cbq_class *parent = this->tparent; this->sibling = this; - this->next = q->classes[h]; - q->classes[h] = this; + qdisc_class_hash_insert(&q->clhash, &this->common); if (parent == NULL) return; @@ -1242,6 +1227,7 @@ cbq_reset(struct Qdisc* sch) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl; + struct hlist_node *n; int prio; unsigned h; @@ -1258,8 +1244,8 @@ cbq_reset(struct Qdisc* sch) for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++) q->active[prio] = NULL; - for (h = 0; h < 16; h++) { - for (cl = q->classes[h]; cl; cl = cl->next) { + for (h = 0; h < q->clhash.hashsize; h++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) { qdisc_reset(cl->q); cl->next_alive = NULL; @@ -1406,11 +1392,16 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL) return -EINVAL; + err = qdisc_class_hash_init(&q->clhash); + if (err < 0) + goto put_rtab; + q->link.refcnt = 1; q->link.sibling = &q->link; - q->link.classid = sch->handle; + q->link.common.classid = sch->handle; q->link.qdisc = sch; - if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, sch->handle))) q->link.q = &noop_qdisc; @@ -1419,7 +1410,7 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) q->link.cpriority = TC_CBQ_MAXPRIO-1; q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC; q->link.overlimit = cbq_ovl_classic; - q->link.allot = psched_mtu(sch->dev); + q->link.allot = psched_mtu(qdisc_dev(sch)); q->link.quantum = q->link.allot; q->link.weight = q->link.R_tab->rate.rate; @@ -1441,6 +1432,10 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) cbq_addprio(q, &q->link); return 0; + +put_rtab: + qdisc_put_rtab(q->link.R_tab); + return err; } static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl) @@ -1521,7 +1516,7 @@ static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl) struct tc_cbq_fopt opt; if (cl->split || cl->defmap) { - opt.split = cl->split ? cl->split->classid : 0; + opt.split = cl->split ? cl->split->common.classid : 0; opt.defmap = cl->defmap; opt.defchange = ~0; NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt); @@ -1602,10 +1597,10 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg, struct nlattr *nest; if (cl->tparent) - tcm->tcm_parent = cl->tparent->classid; + tcm->tcm_parent = cl->tparent->common.classid; else tcm->tcm_parent = TC_H_ROOT; - tcm->tcm_handle = cl->classid; + tcm->tcm_handle = cl->common.classid; tcm->tcm_info = cl->q->handle; nest = nla_nest_start(skb, TCA_OPTIONS); @@ -1650,8 +1645,10 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (cl) { if (new == NULL) { - if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, - cl->classid)) == NULL) + new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, + cl->common.classid); + if (new == NULL) return -ENOBUFS; } else { #ifdef CONFIG_NET_CLS_ACT @@ -1716,6 +1713,7 @@ static void cbq_destroy(struct Qdisc* sch) { struct cbq_sched_data *q = qdisc_priv(sch); + struct hlist_node *n, *next; struct cbq_class *cl; unsigned h; @@ -1727,18 +1725,16 @@ cbq_destroy(struct Qdisc* sch) * classes from root to leafs which means that filters can still * be bound to classes which have been destroyed already. --TGR '04 */ - for (h = 0; h < 16; h++) { - for (cl = q->classes[h]; cl; cl = cl->next) + for (h = 0; h < q->clhash.hashsize; h++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) tcf_destroy_chain(&cl->filter_list); } - for (h = 0; h < 16; h++) { - struct cbq_class *next; - - for (cl = q->classes[h]; cl; cl = next) { - next = cl->next; + for (h = 0; h < q->clhash.hashsize; h++) { + hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h], + common.hnode) cbq_destroy_class(sch, cl); - } } + qdisc_class_hash_destroy(&q->clhash); } static void cbq_put(struct Qdisc *sch, unsigned long arg) @@ -1747,12 +1743,13 @@ static void cbq_put(struct Qdisc *sch, unsigned long arg) if (--cl->refcnt == 0) { #ifdef CONFIG_NET_CLS_ACT + spinlock_t *root_lock = qdisc_root_lock(sch); struct cbq_sched_data *q = qdisc_priv(sch); - spin_lock_bh(&sch->dev->queue_lock); + spin_lock_bh(root_lock); if (q->rx_class == cl) q->rx_class = NULL; - spin_unlock_bh(&sch->dev->queue_lock); + spin_unlock_bh(root_lock); #endif cbq_destroy_class(sch, cl); @@ -1781,7 +1778,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (cl) { /* Check parent */ if (parentid) { - if (cl->tparent && cl->tparent->classid != parentid) + if (cl->tparent && + cl->tparent->common.classid != parentid) return -EINVAL; if (!cl->tparent && parentid != TC_H_ROOT) return -EINVAL; @@ -1830,7 +1828,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (tca[TCA_RATE]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - &sch->dev->queue_lock, + qdisc_root_lock(sch), tca[TCA_RATE]); return 0; } @@ -1881,9 +1879,10 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t cl->R_tab = rtab; rtab = NULL; cl->refcnt = 1; - if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid))) + if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, classid))) cl->q = &noop_qdisc; - cl->classid = classid; + cl->common.classid = classid; cl->tparent = parent; cl->qdisc = sch; cl->allot = parent->allot; @@ -1916,9 +1915,11 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT])); sch_tree_unlock(sch); + qdisc_class_hash_grow(sch, &q->clhash); + if (tca[TCA_RATE]) gen_new_estimator(&cl->bstats, &cl->rate_est, - &sch->dev->queue_lock, tca[TCA_RATE]); + qdisc_root_lock(sch), tca[TCA_RATE]); *arg = (unsigned long)cl; return 0; @@ -2008,15 +2009,15 @@ static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg) static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct cbq_sched_data *q = qdisc_priv(sch); + struct cbq_class *cl; + struct hlist_node *n; unsigned h; if (arg->stop) return; - for (h = 0; h < 16; h++) { - struct cbq_class *cl; - - for (cl = q->classes[h]; cl; cl = cl->next) { + for (h = 0; h < q->clhash.hashsize; h++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) { if (arg->count < arg->skip) { arg->count++; continue; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index c4c1317cd47d..a935676987e2 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -60,7 +60,8 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg, sch, p, new, old); if (new == NULL) { - new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, sch->handle); if (new == NULL) new = &noop_qdisc; @@ -251,13 +252,13 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) } } - err = p->q->enqueue(skb, p->q); + err = qdisc_enqueue(skb, p->q); if (err != NET_XMIT_SUCCESS) { sch->qstats.drops++; return err; } - sch->bstats.bytes += skb->len; + sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; sch->q.qlen++; @@ -390,7 +391,8 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) p->default_index = default_index; p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]); - p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, sch->handle); + p->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, sch->handle); if (p->q == NULL) p->q = &noop_qdisc; diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 95ed48221652..23d258bfe8ac 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -27,7 +27,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch) { struct fifo_sched_data *q = qdisc_priv(sch); - if (likely(sch->qstats.backlog + skb->len <= q->limit)) + if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= q->limit)) return qdisc_enqueue_tail(skb, sch); return qdisc_reshape_fail(skb, sch); @@ -48,10 +48,10 @@ static int fifo_init(struct Qdisc *sch, struct nlattr *opt) struct fifo_sched_data *q = qdisc_priv(sch); if (opt == NULL) { - u32 limit = sch->dev->tx_queue_len ? : 1; + u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1; if (sch->ops == &bfifo_qdisc_ops) - limit *= sch->dev->mtu; + limit *= qdisc_dev(sch)->mtu; q->limit = limit; } else { @@ -107,3 +107,46 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = { .owner = THIS_MODULE, }; EXPORT_SYMBOL(bfifo_qdisc_ops); + +/* Pass size change message down to embedded FIFO */ +int fifo_set_limit(struct Qdisc *q, unsigned int limit) +{ + struct nlattr *nla; + int ret = -ENOMEM; + + /* Hack to avoid sending change message to non-FIFO */ + if (strncmp(q->ops->id + 1, "fifo", 4) != 0) + return 0; + + nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); + if (nla) { + nla->nla_type = RTM_NEWQDISC; + nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt)); + ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit; + + ret = q->ops->change(q, nla); + kfree(nla); + } + return ret; +} +EXPORT_SYMBOL(fifo_set_limit); + +struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, + unsigned int limit) +{ + struct Qdisc *q; + int err = -ENOMEM; + + q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + ops, TC_H_MAKE(sch->handle, 1)); + if (q) { + err = fifo_set_limit(q, limit); + if (err < 0) { + qdisc_destroy(q); + q = NULL; + } + } + + return q ? : ERR_PTR(err); +} +EXPORT_SYMBOL(fifo_create_dflt); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 13afa7214392..27a51f04db49 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -29,58 +29,36 @@ /* Main transmission queue. */ /* Modifications to data participating in scheduling must be protected with - * dev->queue_lock spinlock. + * qdisc_root_lock(qdisc) spinlock. * * The idea is the following: - * - enqueue, dequeue are serialized via top level device - * spinlock dev->queue_lock. - * - ingress filtering is serialized via top level device - * spinlock dev->ingress_lock. + * - enqueue, dequeue are serialized via qdisc root lock + * - ingress filtering is also serialized via qdisc root lock * - updates to tree and tree walking are only done under the rtnl mutex. */ -void qdisc_lock_tree(struct net_device *dev) - __acquires(dev->queue_lock) - __acquires(dev->ingress_lock) -{ - spin_lock_bh(&dev->queue_lock); - spin_lock(&dev->ingress_lock); -} -EXPORT_SYMBOL(qdisc_lock_tree); - -void qdisc_unlock_tree(struct net_device *dev) - __releases(dev->ingress_lock) - __releases(dev->queue_lock) -{ - spin_unlock(&dev->ingress_lock); - spin_unlock_bh(&dev->queue_lock); -} -EXPORT_SYMBOL(qdisc_unlock_tree); - static inline int qdisc_qlen(struct Qdisc *q) { return q->q.qlen; } -static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev, - struct Qdisc *q) +static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { if (unlikely(skb->next)) - dev->gso_skb = skb; + q->gso_skb = skb; else q->ops->requeue(skb, q); - netif_schedule(dev); + __netif_schedule(q); return 0; } -static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev, - struct Qdisc *q) +static inline struct sk_buff *dequeue_skb(struct Qdisc *q) { struct sk_buff *skb; - if ((skb = dev->gso_skb)) - dev->gso_skb = NULL; + if ((skb = q->gso_skb)) + q->gso_skb = NULL; else skb = q->dequeue(q); @@ -88,12 +66,12 @@ static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev, } static inline int handle_dev_cpu_collision(struct sk_buff *skb, - struct net_device *dev, + struct netdev_queue *dev_queue, struct Qdisc *q) { int ret; - if (unlikely(dev->xmit_lock_owner == smp_processor_id())) { + if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) { /* * Same CPU holding the lock. It may be a transient * configuration error, when hard_start_xmit() recurses. We @@ -103,7 +81,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, kfree_skb(skb); if (net_ratelimit()) printk(KERN_WARNING "Dead loop on netdevice %s, " - "fix it urgently!\n", dev->name); + "fix it urgently!\n", dev_queue->dev->name); ret = qdisc_qlen(q); } else { /* @@ -111,22 +89,22 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, * some time. */ __get_cpu_var(netdev_rx_stat).cpu_collision++; - ret = dev_requeue_skb(skb, dev, q); + ret = dev_requeue_skb(skb, q); } return ret; } /* - * NOTE: Called under dev->queue_lock with locally disabled BH. + * NOTE: Called under qdisc_lock(q) with locally disabled BH. * - * __LINK_STATE_QDISC_RUNNING guarantees only one CPU can process this - * device at a time. dev->queue_lock serializes queue accesses for - * this device AND dev->qdisc pointer itself. + * __QDISC_STATE_RUNNING guarantees only one CPU can process + * this qdisc at a time. qdisc_lock(q) serializes queue accesses for + * this queue. * * netif_tx_lock serializes accesses to device driver. * - * dev->queue_lock and netif_tx_lock are mutually exclusive, + * qdisc_lock(q) and netif_tx_lock are mutually exclusive, * if one is grabbed, another must be free. * * Note, that this procedure can be called by a watchdog timer @@ -136,27 +114,32 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, * >0 - queue is not empty. * */ -static inline int qdisc_restart(struct net_device *dev) +static inline int qdisc_restart(struct Qdisc *q) { - struct Qdisc *q = dev->qdisc; - struct sk_buff *skb; + struct netdev_queue *txq; int ret = NETDEV_TX_BUSY; + struct net_device *dev; + spinlock_t *root_lock; + struct sk_buff *skb; /* Dequeue packet */ - if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL)) + if (unlikely((skb = dequeue_skb(q)) == NULL)) return 0; + root_lock = qdisc_root_lock(q); - /* And release queue */ - spin_unlock(&dev->queue_lock); + /* And release qdisc */ + spin_unlock(root_lock); - HARD_TX_LOCK(dev, smp_processor_id()); + dev = qdisc_dev(q); + txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); + + HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_subqueue_stopped(dev, skb)) - ret = dev_hard_start_xmit(skb, dev); - HARD_TX_UNLOCK(dev); + ret = dev_hard_start_xmit(skb, dev, txq); + HARD_TX_UNLOCK(dev, txq); - spin_lock(&dev->queue_lock); - q = dev->qdisc; + spin_lock(root_lock); switch (ret) { case NETDEV_TX_OK: @@ -166,7 +149,7 @@ static inline int qdisc_restart(struct net_device *dev) case NETDEV_TX_LOCKED: /* Driver try lock failed */ - ret = handle_dev_cpu_collision(skb, dev, q); + ret = handle_dev_cpu_collision(skb, txq, q); break; default: @@ -175,33 +158,33 @@ static inline int qdisc_restart(struct net_device *dev) printk(KERN_WARNING "BUG %s code %d qlen %d\n", dev->name, ret, q->q.qlen); - ret = dev_requeue_skb(skb, dev, q); + ret = dev_requeue_skb(skb, q); break; } + if (ret && netif_tx_queue_stopped(txq)) + ret = 0; + return ret; } -void __qdisc_run(struct net_device *dev) +void __qdisc_run(struct Qdisc *q) { unsigned long start_time = jiffies; - while (qdisc_restart(dev)) { - if (netif_queue_stopped(dev)) - break; - + while (qdisc_restart(q)) { /* * Postpone processing if * 1. another process needs the CPU; * 2. we've been doing it for too long. */ if (need_resched() || jiffies != start_time) { - netif_schedule(dev); + __netif_schedule(q); break; } } - clear_bit(__LINK_STATE_QDISC_RUNNING, &dev->state); + clear_bit(__QDISC_STATE_RUNNING, &q->state); } static void dev_watchdog(unsigned long arg) @@ -209,19 +192,35 @@ static void dev_watchdog(unsigned long arg) struct net_device *dev = (struct net_device *)arg; netif_tx_lock(dev); - if (dev->qdisc != &noop_qdisc) { + if (!qdisc_tx_is_noop(dev)) { if (netif_device_present(dev) && netif_running(dev) && netif_carrier_ok(dev)) { - if (netif_queue_stopped(dev) && - time_after(jiffies, dev->trans_start + dev->watchdog_timeo)) { + int some_queue_stopped = 0; + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq; + + txq = netdev_get_tx_queue(dev, i); + if (netif_tx_queue_stopped(txq)) { + some_queue_stopped = 1; + break; + } + } - printk(KERN_INFO "NETDEV WATCHDOG: %s: transmit timed out\n", + if (some_queue_stopped && + time_after(jiffies, (dev->trans_start + + dev->watchdog_timeo))) { + printk(KERN_INFO "NETDEV WATCHDOG: %s: " + "transmit timed out\n", dev->name); dev->tx_timeout(dev); WARN_ON_ONCE(1); } - if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + dev->watchdog_timeo))) + if (!mod_timer(&dev->watchdog_timer, + round_jiffies(jiffies + + dev->watchdog_timeo))) dev_hold(dev); } } @@ -317,12 +316,18 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = { .owner = THIS_MODULE, }; +static struct netdev_queue noop_netdev_queue = { + .qdisc = &noop_qdisc, +}; + struct Qdisc noop_qdisc = { .enqueue = noop_enqueue, .dequeue = noop_dequeue, .flags = TCQ_F_BUILTIN, .ops = &noop_qdisc_ops, .list = LIST_HEAD_INIT(noop_qdisc.list), + .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), + .dev_queue = &noop_netdev_queue, }; EXPORT_SYMBOL(noop_qdisc); @@ -335,112 +340,65 @@ static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = { .owner = THIS_MODULE, }; +static struct Qdisc noqueue_qdisc; +static struct netdev_queue noqueue_netdev_queue = { + .qdisc = &noqueue_qdisc, +}; + static struct Qdisc noqueue_qdisc = { .enqueue = NULL, .dequeue = noop_dequeue, .flags = TCQ_F_BUILTIN, .ops = &noqueue_qdisc_ops, .list = LIST_HEAD_INIT(noqueue_qdisc.list), + .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), + .dev_queue = &noqueue_netdev_queue, }; -static const u8 prio2band[TC_PRIO_MAX+1] = - { 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 }; - -/* 3-band FIFO queue: old style, but should be a bit faster than - generic prio+fifo combination. - */ - -#define PFIFO_FAST_BANDS 3 - -static inline struct sk_buff_head *prio2list(struct sk_buff *skb, - struct Qdisc *qdisc) -{ - struct sk_buff_head *list = qdisc_priv(qdisc); - return list + prio2band[skb->priority & TC_PRIO_MAX]; -} - -static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) +static int fifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) { - struct sk_buff_head *list = prio2list(skb, qdisc); + struct sk_buff_head *list = &qdisc->q; - if (skb_queue_len(list) < qdisc->dev->tx_queue_len) { - qdisc->q.qlen++; + if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) return __qdisc_enqueue_tail(skb, qdisc, list); - } return qdisc_drop(skb, qdisc); } -static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc) +static struct sk_buff *fifo_fast_dequeue(struct Qdisc* qdisc) { - int prio; - struct sk_buff_head *list = qdisc_priv(qdisc); + struct sk_buff_head *list = &qdisc->q; - for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) { - if (!skb_queue_empty(list + prio)) { - qdisc->q.qlen--; - return __qdisc_dequeue_head(qdisc, list + prio); - } - } + if (!skb_queue_empty(list)) + return __qdisc_dequeue_head(qdisc, list); return NULL; } -static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc) +static int fifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc) { - qdisc->q.qlen++; - return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc)); + return __qdisc_requeue(skb, qdisc, &qdisc->q); } -static void pfifo_fast_reset(struct Qdisc* qdisc) +static void fifo_fast_reset(struct Qdisc* qdisc) { - int prio; - struct sk_buff_head *list = qdisc_priv(qdisc); - - for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) - __qdisc_reset_queue(qdisc, list + prio); - + __qdisc_reset_queue(qdisc, &qdisc->q); qdisc->qstats.backlog = 0; - qdisc->q.qlen = 0; -} - -static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb) -{ - struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS }; - - memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1); - NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); - return skb->len; - -nla_put_failure: - return -1; } -static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) -{ - int prio; - struct sk_buff_head *list = qdisc_priv(qdisc); - - for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) - skb_queue_head_init(list + prio); - - return 0; -} - -static struct Qdisc_ops pfifo_fast_ops __read_mostly = { - .id = "pfifo_fast", - .priv_size = PFIFO_FAST_BANDS * sizeof(struct sk_buff_head), - .enqueue = pfifo_fast_enqueue, - .dequeue = pfifo_fast_dequeue, - .requeue = pfifo_fast_requeue, - .init = pfifo_fast_init, - .reset = pfifo_fast_reset, - .dump = pfifo_fast_dump, +static struct Qdisc_ops fifo_fast_ops __read_mostly = { + .id = "fifo_fast", + .priv_size = 0, + .enqueue = fifo_fast_enqueue, + .dequeue = fifo_fast_dequeue, + .requeue = fifo_fast_requeue, + .reset = fifo_fast_reset, .owner = THIS_MODULE, }; -struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops) +struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, + struct Qdisc_ops *ops) { void *p; struct Qdisc *sch; @@ -462,8 +420,8 @@ struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops) sch->ops = ops; sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; - sch->dev = dev; - dev_hold(dev); + sch->dev_queue = dev_queue; + dev_hold(qdisc_dev(sch)); atomic_set(&sch->refcnt, 1); return sch; @@ -471,15 +429,16 @@ errout: return ERR_PTR(err); } -struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops, +struct Qdisc * qdisc_create_dflt(struct net_device *dev, + struct netdev_queue *dev_queue, + struct Qdisc_ops *ops, unsigned int parentid) { struct Qdisc *sch; - sch = qdisc_alloc(dev, ops); + sch = qdisc_alloc(dev_queue, ops); if (IS_ERR(sch)) goto errout; - sch->stats_lock = &dev->queue_lock; sch->parent = parentid; if (!ops->init || ops->init(sch, NULL) == 0) @@ -491,7 +450,7 @@ errout: } EXPORT_SYMBOL(qdisc_create_dflt); -/* Under dev->queue_lock and BH! */ +/* Under qdisc_root_lock(qdisc) and BH! */ void qdisc_reset(struct Qdisc *qdisc) { @@ -508,86 +467,162 @@ EXPORT_SYMBOL(qdisc_reset); static void __qdisc_destroy(struct rcu_head *head) { struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu); + const struct Qdisc_ops *ops = qdisc->ops; + + qdisc_put_stab(qdisc->stab); + gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); + if (ops->reset) + ops->reset(qdisc); + if (ops->destroy) + ops->destroy(qdisc); + + module_put(ops->owner); + dev_put(qdisc_dev(qdisc)); + + kfree_skb(qdisc->gso_skb); + kfree((char *) qdisc - qdisc->padded); } -/* Under dev->queue_lock and BH! */ +/* Under qdisc_root_lock(qdisc) and BH! */ void qdisc_destroy(struct Qdisc *qdisc) { - const struct Qdisc_ops *ops = qdisc->ops; - if (qdisc->flags & TCQ_F_BUILTIN || !atomic_dec_and_test(&qdisc->refcnt)) return; - list_del(&qdisc->list); - gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); - if (ops->reset) - ops->reset(qdisc); - if (ops->destroy) - ops->destroy(qdisc); + if (qdisc->parent) + list_del(&qdisc->list); - module_put(ops->owner); - dev_put(qdisc->dev); call_rcu(&qdisc->q_rcu, __qdisc_destroy); } EXPORT_SYMBOL(qdisc_destroy); +static bool dev_all_qdisc_sleeping_noop(struct net_device *dev) +{ + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + + if (txq->qdisc_sleeping != &noop_qdisc) + return false; + } + return true; +} + +static void attach_one_default_qdisc(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_unused) +{ + struct Qdisc *qdisc; + + if (dev->tx_queue_len) { + qdisc = qdisc_create_dflt(dev, dev_queue, + &fifo_fast_ops, TC_H_ROOT); + if (!qdisc) { + printk(KERN_INFO "%s: activation failed\n", dev->name); + return; + } + } else { + qdisc = &noqueue_qdisc; + } + dev_queue->qdisc_sleeping = qdisc; +} + +static void transition_one_qdisc(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_need_watchdog) +{ + struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping; + int *need_watchdog_p = _need_watchdog; + + rcu_assign_pointer(dev_queue->qdisc, new_qdisc); + if (new_qdisc != &noqueue_qdisc) + *need_watchdog_p = 1; +} + void dev_activate(struct net_device *dev) { + int need_watchdog; + /* No queueing discipline is attached to device; - create default one i.e. pfifo_fast for devices, - which need queueing and noqueue_qdisc for - virtual interfaces + * create default one i.e. fifo_fast for devices, + * which need queueing and noqueue_qdisc for + * virtual interfaces. */ - if (dev->qdisc_sleeping == &noop_qdisc) { - struct Qdisc *qdisc; - if (dev->tx_queue_len) { - qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops, - TC_H_ROOT); - if (qdisc == NULL) { - printk(KERN_INFO "%s: activation failed\n", dev->name); - return; - } - list_add_tail(&qdisc->list, &dev->qdisc_list); - } else { - qdisc = &noqueue_qdisc; - } - dev->qdisc_sleeping = qdisc; - } + if (dev_all_qdisc_sleeping_noop(dev)) + netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); if (!netif_carrier_ok(dev)) /* Delay activation until next carrier-on event */ return; - spin_lock_bh(&dev->queue_lock); - rcu_assign_pointer(dev->qdisc, dev->qdisc_sleeping); - if (dev->qdisc != &noqueue_qdisc) { + need_watchdog = 0; + netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog); + + if (need_watchdog) { dev->trans_start = jiffies; dev_watchdog_up(dev); } - spin_unlock_bh(&dev->queue_lock); } -void dev_deactivate(struct net_device *dev) +static void dev_deactivate_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_qdisc_default) { + struct Qdisc *qdisc_default = _qdisc_default; + struct sk_buff *skb = NULL; struct Qdisc *qdisc; - struct sk_buff *skb; - int running; - spin_lock_bh(&dev->queue_lock); - qdisc = dev->qdisc; - dev->qdisc = &noop_qdisc; + qdisc = dev_queue->qdisc; + if (qdisc) { + spin_lock_bh(qdisc_lock(qdisc)); - qdisc_reset(qdisc); + dev_queue->qdisc = qdisc_default; + qdisc_reset(qdisc); - skb = dev->gso_skb; - dev->gso_skb = NULL; - spin_unlock_bh(&dev->queue_lock); + spin_unlock_bh(qdisc_lock(qdisc)); + } kfree_skb(skb); +} + +static bool some_qdisc_is_running(struct net_device *dev, int lock) +{ + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *dev_queue; + spinlock_t *root_lock; + struct Qdisc *q; + int val; + + dev_queue = netdev_get_tx_queue(dev, i); + q = dev_queue->qdisc; + root_lock = qdisc_root_lock(q); + + if (lock) + spin_lock_bh(root_lock); + + val = test_bit(__QDISC_STATE_RUNNING, &q->state); + + if (lock) + spin_unlock_bh(root_lock); + + if (val) + return true; + } + return false; +} + +void dev_deactivate(struct net_device *dev) +{ + bool running; + + netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc); dev_watchdog_down(dev); @@ -596,16 +631,14 @@ void dev_deactivate(struct net_device *dev) /* Wait for outstanding qdisc_run calls. */ do { - while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state)) + while (some_qdisc_is_running(dev, 0)) yield(); /* * Double-check inside queue lock to ensure that all effects * of the queue run are visible when we return. */ - spin_lock_bh(&dev->queue_lock); - running = test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state); - spin_unlock_bh(&dev->queue_lock); + running = some_qdisc_is_running(dev, 1); /* * The running flag should never be set at this point because @@ -618,32 +651,46 @@ void dev_deactivate(struct net_device *dev) } while (WARN_ON_ONCE(running)); } +static void dev_init_scheduler_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_qdisc) +{ + struct Qdisc *qdisc = _qdisc; + + dev_queue->qdisc = qdisc; + dev_queue->qdisc_sleeping = qdisc; +} + void dev_init_scheduler(struct net_device *dev) { - qdisc_lock_tree(dev); - dev->qdisc = &noop_qdisc; - dev->qdisc_sleeping = &noop_qdisc; - INIT_LIST_HEAD(&dev->qdisc_list); - qdisc_unlock_tree(dev); + netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc); + dev_init_scheduler_queue(dev, &dev->rx_queue, NULL); setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev); } -void dev_shutdown(struct net_device *dev) +static void shutdown_scheduler_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_qdisc_default) { - struct Qdisc *qdisc; + struct Qdisc *qdisc = dev_queue->qdisc_sleeping; + struct Qdisc *qdisc_default = _qdisc_default; + + if (qdisc) { + spinlock_t *root_lock = qdisc_root_lock(qdisc); - qdisc_lock_tree(dev); - qdisc = dev->qdisc_sleeping; - dev->qdisc = &noop_qdisc; - dev->qdisc_sleeping = &noop_qdisc; - qdisc_destroy(qdisc); -#if defined(CONFIG_NET_SCH_INGRESS) || defined(CONFIG_NET_SCH_INGRESS_MODULE) - if ((qdisc = dev->qdisc_ingress) != NULL) { - dev->qdisc_ingress = NULL; + dev_queue->qdisc = qdisc_default; + dev_queue->qdisc_sleeping = qdisc_default; + + spin_lock(root_lock); qdisc_destroy(qdisc); + spin_unlock(root_lock); } -#endif +} + +void dev_shutdown(struct net_device *dev) +{ + netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); + shutdown_scheduler_queue(dev, &dev->rx_queue, NULL); BUG_TRAP(!timer_pending(&dev->watchdog_timer)); - qdisc_unlock_tree(dev); } diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index c89fba56db56..c1ad6b8de105 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -164,7 +164,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) * if no default DP has been configured. This * allows for DP flows to be left untouched. */ - if (skb_queue_len(&sch->q) < sch->dev->tx_queue_len) + if (skb_queue_len(&sch->q) < qdisc_dev(sch)->tx_queue_len) return qdisc_enqueue_tail(skb, sch); else goto drop; @@ -188,7 +188,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) } q->packetsin++; - q->bytesin += skb->len; + q->bytesin += qdisc_pkt_len(skb); if (gred_wred_mode(t)) gred_load_wred_set(t, q); @@ -226,8 +226,8 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) break; } - if (q->backlog + skb->len <= q->limit) { - q->backlog += skb->len; + if (q->backlog + qdisc_pkt_len(skb) <= q->limit) { + q->backlog += qdisc_pkt_len(skb); return qdisc_enqueue_tail(skb, sch); } @@ -254,7 +254,7 @@ static int gred_requeue(struct sk_buff *skb, struct Qdisc* sch) } else { if (red_is_idling(&q->parms)) red_end_of_idle_period(&q->parms); - q->backlog += skb->len; + q->backlog += qdisc_pkt_len(skb); } return qdisc_requeue(skb, sch); @@ -277,7 +277,7 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch) "VQ 0x%x after dequeue, screwing up " "backlog.\n", tc_index_to_dp(skb)); } else { - q->backlog -= skb->len; + q->backlog -= qdisc_pkt_len(skb); if (!q->backlog && !gred_wred_mode(t)) red_start_of_idle_period(&q->parms); @@ -299,7 +299,7 @@ static unsigned int gred_drop(struct Qdisc* sch) skb = qdisc_dequeue_tail(sch); if (skb) { - unsigned int len = skb->len; + unsigned int len = qdisc_pkt_len(skb); struct gred_sched_data *q; u16 dp = tc_index_to_dp(skb); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index e817aa00441d..0ae7d19dcba8 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -113,7 +113,7 @@ enum hfsc_class_flags struct hfsc_class { - u32 classid; /* class id */ + struct Qdisc_class_common cl_common; unsigned int refcnt; /* usage count */ struct gnet_stats_basic bstats; @@ -134,7 +134,6 @@ struct hfsc_class struct rb_node vt_node; /* parent's vt_tree member */ struct rb_root cf_tree; /* active children sorted by cl_f */ struct rb_node cf_node; /* parent's cf_heap member */ - struct list_head hlist; /* hash list member */ struct list_head dlist; /* drop list member */ u64 cl_total; /* total work in bytes */ @@ -177,13 +176,11 @@ struct hfsc_class unsigned long cl_nactive; /* number of active children */ }; -#define HFSC_HSIZE 16 - struct hfsc_sched { u16 defcls; /* default class id */ struct hfsc_class root; /* root class */ - struct list_head clhash[HFSC_HSIZE]; /* class hash */ + struct Qdisc_class_hash clhash; /* class hash */ struct rb_root eligible; /* eligible tree */ struct list_head droplist; /* active leaf class list (for dropping) */ @@ -898,7 +895,7 @@ qdisc_peek_len(struct Qdisc *sch) printk("qdisc_peek_len: non work-conserving qdisc ?\n"); return 0; } - len = skb->len; + len = qdisc_pkt_len(skb); if (unlikely(sch->ops->requeue(skb, sch) != NET_XMIT_SUCCESS)) { if (net_ratelimit()) printk("qdisc_peek_len: failed to requeue\n"); @@ -933,26 +930,16 @@ hfsc_adjust_levels(struct hfsc_class *cl) } while ((cl = cl->cl_parent) != NULL); } -static inline unsigned int -hfsc_hash(u32 h) -{ - h ^= h >> 8; - h ^= h >> 4; - - return h & (HFSC_HSIZE - 1); -} - static inline struct hfsc_class * hfsc_find_class(u32 classid, struct Qdisc *sch) { struct hfsc_sched *q = qdisc_priv(sch); - struct hfsc_class *cl; + struct Qdisc_class_common *clc; - list_for_each_entry(cl, &q->clhash[hfsc_hash(classid)], hlist) { - if (cl->classid == classid) - return cl; - } - return NULL; + clc = qdisc_class_find(&q->clhash, classid); + if (clc == NULL) + return NULL; + return container_of(clc, struct hfsc_class, cl_common); } static void @@ -1032,7 +1019,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl != NULL) { if (parentid) { - if (cl->cl_parent && cl->cl_parent->classid != parentid) + if (cl->cl_parent && + cl->cl_parent->cl_common.classid != parentid) return -EINVAL; if (cl->cl_parent == NULL && parentid != TC_H_ROOT) return -EINVAL; @@ -1057,7 +1045,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - &sch->dev->queue_lock, + qdisc_root_lock(sch), tca[TCA_RATE]); return 0; } @@ -1091,11 +1079,12 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (usc != NULL) hfsc_change_usc(cl, usc, 0); + cl->cl_common.classid = classid; cl->refcnt = 1; - cl->classid = classid; cl->sched = q; cl->cl_parent = parent; - cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid); + cl->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, classid); if (cl->qdisc == NULL) cl->qdisc = &noop_qdisc; INIT_LIST_HEAD(&cl->children); @@ -1103,7 +1092,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->cf_tree = RB_ROOT; sch_tree_lock(sch); - list_add_tail(&cl->hlist, &q->clhash[hfsc_hash(classid)]); + qdisc_class_hash_insert(&q->clhash, &cl->cl_common); list_add_tail(&cl->siblings, &parent->children); if (parent->level == 0) hfsc_purge_queue(sch, parent); @@ -1111,9 +1100,11 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->cl_pcvtoff = parent->cl_cvtoff; sch_tree_unlock(sch); + qdisc_class_hash_grow(sch, &q->clhash); + if (tca[TCA_RATE]) gen_new_estimator(&cl->bstats, &cl->rate_est, - &sch->dev->queue_lock, tca[TCA_RATE]); + qdisc_root_lock(sch), tca[TCA_RATE]); *arg = (unsigned long)cl; return 0; } @@ -1145,7 +1136,7 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg) hfsc_adjust_levels(cl->cl_parent); hfsc_purge_queue(sch, cl); - list_del(&cl->hlist); + qdisc_class_hash_remove(&q->clhash, &cl->cl_common); if (--cl->refcnt == 0) hfsc_destroy_class(sch, cl); @@ -1211,8 +1202,9 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (cl->level > 0) return -EINVAL; if (new == NULL) { - new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, - cl->classid); + new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, + cl->cl_common.classid); if (new == NULL) new = &noop_qdisc; } @@ -1345,8 +1337,9 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, struct hfsc_class *cl = (struct hfsc_class *)arg; struct nlattr *nest; - tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->classid : TC_H_ROOT; - tcm->tcm_handle = cl->classid; + tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->cl_common.classid : + TC_H_ROOT; + tcm->tcm_handle = cl->cl_common.classid; if (cl->level == 0) tcm->tcm_info = cl->qdisc->handle; @@ -1390,14 +1383,16 @@ static void hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct hfsc_sched *q = qdisc_priv(sch); + struct hlist_node *n; struct hfsc_class *cl; unsigned int i; if (arg->stop) return; - for (i = 0; i < HFSC_HSIZE; i++) { - list_for_each_entry(cl, &q->clhash[i], hlist) { + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], + cl_common.hnode) { if (arg->count < arg->skip) { arg->count++; continue; @@ -1433,23 +1428,25 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) { struct hfsc_sched *q = qdisc_priv(sch); struct tc_hfsc_qopt *qopt; - unsigned int i; + int err; if (opt == NULL || nla_len(opt) < sizeof(*qopt)) return -EINVAL; qopt = nla_data(opt); q->defcls = qopt->defcls; - for (i = 0; i < HFSC_HSIZE; i++) - INIT_LIST_HEAD(&q->clhash[i]); + err = qdisc_class_hash_init(&q->clhash); + if (err < 0) + return err; q->eligible = RB_ROOT; INIT_LIST_HEAD(&q->droplist); skb_queue_head_init(&q->requeue); + q->root.cl_common.classid = sch->handle; q->root.refcnt = 1; - q->root.classid = sch->handle; q->root.sched = q; - q->root.qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + q->root.qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, sch->handle); if (q->root.qdisc == NULL) q->root.qdisc = &noop_qdisc; @@ -1457,7 +1454,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) q->root.vt_tree = RB_ROOT; q->root.cf_tree = RB_ROOT; - list_add(&q->root.hlist, &q->clhash[hfsc_hash(q->root.classid)]); + qdisc_class_hash_insert(&q->clhash, &q->root.cl_common); + qdisc_class_hash_grow(sch, &q->clhash); qdisc_watchdog_init(&q->watchdog, sch); @@ -1520,10 +1518,11 @@ hfsc_reset_qdisc(struct Qdisc *sch) { struct hfsc_sched *q = qdisc_priv(sch); struct hfsc_class *cl; + struct hlist_node *n; unsigned int i; - for (i = 0; i < HFSC_HSIZE; i++) { - list_for_each_entry(cl, &q->clhash[i], hlist) + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode) hfsc_reset_class(cl); } __skb_queue_purge(&q->requeue); @@ -1537,17 +1536,20 @@ static void hfsc_destroy_qdisc(struct Qdisc *sch) { struct hfsc_sched *q = qdisc_priv(sch); - struct hfsc_class *cl, *next; + struct hlist_node *n, *next; + struct hfsc_class *cl; unsigned int i; - for (i = 0; i < HFSC_HSIZE; i++) { - list_for_each_entry(cl, &q->clhash[i], hlist) + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode) tcf_destroy_chain(&cl->filter_list); } - for (i = 0; i < HFSC_HSIZE; i++) { - list_for_each_entry_safe(cl, next, &q->clhash[i], hlist) + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i], + cl_common.hnode) hfsc_destroy_class(sch, cl); } + qdisc_class_hash_destroy(&q->clhash); __skb_queue_purge(&q->requeue); qdisc_watchdog_cancel(&q->watchdog); } @@ -1572,7 +1574,6 @@ static int hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct hfsc_class *cl; - unsigned int len; int err; cl = hfsc_classify(skb, sch, &err); @@ -1583,8 +1584,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) return err; } - len = skb->len; - err = cl->qdisc->enqueue(skb, cl->qdisc); + err = qdisc_enqueue(skb, cl->qdisc); if (unlikely(err != NET_XMIT_SUCCESS)) { cl->qstats.drops++; sch->qstats.drops++; @@ -1592,12 +1592,12 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) } if (cl->qdisc->q.qlen == 1) - set_active(cl, len); + set_active(cl, qdisc_pkt_len(skb)); cl->bstats.packets++; - cl->bstats.bytes += len; + cl->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; - sch->bstats.bytes += len; + sch->bstats.bytes += qdisc_pkt_len(skb); sch->q.qlen++; return NET_XMIT_SUCCESS; @@ -1647,9 +1647,9 @@ hfsc_dequeue(struct Qdisc *sch) return NULL; } - update_vf(cl, skb->len, cur_time); + update_vf(cl, qdisc_pkt_len(skb), cur_time); if (realtime) - cl->cl_cumul += skb->len; + cl->cl_cumul += qdisc_pkt_len(skb); if (cl->qdisc->q.qlen != 0) { if (cl->cl_flags & HFSC_RSC) { diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 3fb58f428f72..30c999c61b01 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -24,8 +24,6 @@ * Jiri Fojtasek * fixed requeue routine * and many others. thanks. - * - * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $ */ #include <linux/module.h> #include <linux/moduleparam.h> @@ -53,7 +51,6 @@ one less than their parent. */ -#define HTB_HSIZE 16 /* classid hash size */ static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */ #define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */ @@ -74,8 +71,8 @@ enum htb_cmode { /* interior & leaf nodes; props specific to leaves are marked L: */ struct htb_class { + struct Qdisc_class_common common; /* general class parameters */ - u32 classid; struct gnet_stats_basic bstats; struct gnet_stats_queue qstats; struct gnet_stats_rate_est rate_est; @@ -84,10 +81,8 @@ struct htb_class { /* topology */ int level; /* our level (see above) */ + unsigned int children; struct htb_class *parent; /* parent class */ - struct hlist_node hlist; /* classid hash list item */ - struct list_head sibling; /* sibling list item */ - struct list_head children; /* children list */ union { struct htb_class_leaf { @@ -142,8 +137,7 @@ static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate, } struct htb_sched { - struct list_head root; /* root classes list */ - struct hlist_head hash[HTB_HSIZE]; /* hashed by classid */ + struct Qdisc_class_hash clhash; struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */ /* self list - roots of self generating tree */ @@ -165,7 +159,6 @@ struct htb_sched { /* filters for qdisc itself */ struct tcf_proto *filter_list; - int filter_cnt; int rate2quantum; /* quant = rate / rate2quantum */ psched_time_t now; /* cached dequeue time */ @@ -178,32 +171,16 @@ struct htb_sched { long direct_pkts; }; -/* compute hash of size HTB_HSIZE for given handle */ -static inline int htb_hash(u32 h) -{ -#if HTB_HSIZE != 16 -#error "Declare new hash for your HTB_HSIZE" -#endif - h ^= h >> 8; /* stolen from cbq_hash */ - h ^= h >> 4; - return h & 0xf; -} - /* find class in global hash table using given handle */ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch) { struct htb_sched *q = qdisc_priv(sch); - struct hlist_node *p; - struct htb_class *cl; + struct Qdisc_class_common *clc; - if (TC_H_MAJ(handle) != sch->handle) + clc = qdisc_class_find(&q->clhash, handle); + if (clc == NULL) return NULL; - - hlist_for_each_entry(cl, p, q->hash + htb_hash(handle), hlist) { - if (cl->classid == handle) - return cl; - } - return NULL; + return container_of(clc, struct htb_class, common); } /** @@ -284,7 +261,7 @@ static void htb_add_to_id_tree(struct rb_root *root, parent = *p; c = rb_entry(parent, struct htb_class, node[prio]); - if (cl->classid > c->classid) + if (cl->common.classid > c->common.classid) p = &parent->rb_right; else p = &parent->rb_left; @@ -448,7 +425,7 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) /* we are removing child which is pointed to from parent feed - forget the pointer but remember classid */ - p->un.inner.last_ptr_id[prio] = cl->classid; + p->un.inner.last_ptr_id[prio] = cl->common.classid; p->un.inner.ptr[prio] = NULL; } @@ -595,21 +572,20 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) kfree_skb(skb); return ret; #endif - } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != - NET_XMIT_SUCCESS) { + } else if (qdisc_enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) { sch->qstats.drops++; cl->qstats.drops++; return NET_XMIT_DROP; } else { cl->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1; - cl->bstats.bytes += skb->len; + cl->bstats.bytes += qdisc_pkt_len(skb); htb_activate(q, cl); } sch->q.qlen++; sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1; - sch->bstats.bytes += skb->len; + sch->bstats.bytes += qdisc_pkt_len(skb); return NET_XMIT_SUCCESS; } @@ -666,7 +642,7 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch) static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, int level, struct sk_buff *skb) { - int bytes = skb->len; + int bytes = qdisc_pkt_len(skb); long toks, diff; enum htb_cmode old_mode; @@ -753,10 +729,10 @@ static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n, while (n) { struct htb_class *cl = rb_entry(n, struct htb_class, node[prio]); - if (id == cl->classid) + if (id == cl->common.classid) return n; - if (id > cl->classid) { + if (id > cl->common.classid) { n = n->rb_right; } else { r = n; @@ -866,7 +842,7 @@ next: if (!cl->warned) { printk(KERN_WARNING "htb: class %X isn't work conserving ?!\n", - cl->classid); + cl->common.classid); cl->warned = 1; } q->nwc_hit++; @@ -879,7 +855,8 @@ next: } while (cl != start); if (likely(skb != NULL)) { - if ((cl->un.leaf.deficit[level] -= skb->len) < 0) { + cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb); + if (cl->un.leaf.deficit[level] < 0) { cl->un.leaf.deficit[level] += cl->un.leaf.quantum; htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> ptr[0]) + prio); @@ -977,13 +954,12 @@ static unsigned int htb_drop(struct Qdisc *sch) static void htb_reset(struct Qdisc *sch) { struct htb_sched *q = qdisc_priv(sch); - int i; - - for (i = 0; i < HTB_HSIZE; i++) { - struct hlist_node *p; - struct htb_class *cl; + struct htb_class *cl; + struct hlist_node *n; + unsigned int i; - hlist_for_each_entry(cl, p, q->hash + i, hlist) { + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { if (cl->level) memset(&cl->un.inner, 0, sizeof(cl->un.inner)); else { @@ -1041,16 +1017,16 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) return -EINVAL; } - INIT_LIST_HEAD(&q->root); - for (i = 0; i < HTB_HSIZE; i++) - INIT_HLIST_HEAD(q->hash + i); + err = qdisc_class_hash_init(&q->clhash); + if (err < 0) + return err; for (i = 0; i < TC_HTB_NUMPRIO; i++) INIT_LIST_HEAD(q->drops + i); qdisc_watchdog_init(&q->watchdog, sch); skb_queue_head_init(&q->direct_queue); - q->direct_qlen = sch->dev->tx_queue_len; + q->direct_qlen = qdisc_dev(sch)->tx_queue_len; if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ q->direct_qlen = 2; @@ -1063,11 +1039,12 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) { + spinlock_t *root_lock = qdisc_root_lock(sch); struct htb_sched *q = qdisc_priv(sch); struct nlattr *nest; struct tc_htb_glob gopt; - spin_lock_bh(&sch->dev->queue_lock); + spin_lock_bh(root_lock); gopt.direct_pkts = q->direct_pkts; gopt.version = HTB_VER; @@ -1081,11 +1058,11 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt); nla_nest_end(skb, nest); - spin_unlock_bh(&sch->dev->queue_lock); + spin_unlock_bh(root_lock); return skb->len; nla_put_failure: - spin_unlock_bh(&sch->dev->queue_lock); + spin_unlock_bh(root_lock); nla_nest_cancel(skb, nest); return -1; } @@ -1094,12 +1071,13 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, struct tcmsg *tcm) { struct htb_class *cl = (struct htb_class *)arg; + spinlock_t *root_lock = qdisc_root_lock(sch); struct nlattr *nest; struct tc_htb_opt opt; - spin_lock_bh(&sch->dev->queue_lock); - tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT; - tcm->tcm_handle = cl->classid; + spin_lock_bh(root_lock); + tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT; + tcm->tcm_handle = cl->common.classid; if (!cl->level && cl->un.leaf.q) tcm->tcm_info = cl->un.leaf.q->handle; @@ -1119,11 +1097,11 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt); nla_nest_end(skb, nest); - spin_unlock_bh(&sch->dev->queue_lock); + spin_unlock_bh(root_lock); return skb->len; nla_put_failure: - spin_unlock_bh(&sch->dev->queue_lock); + spin_unlock_bh(root_lock); nla_nest_cancel(skb, nest); return -1; } @@ -1153,8 +1131,9 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (cl && !cl->level) { if (new == NULL && - (new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, - cl->classid)) + (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, + cl->common.classid)) == NULL) return -ENOBUFS; sch_tree_lock(sch); @@ -1195,12 +1174,9 @@ static inline int htb_parent_last_child(struct htb_class *cl) if (!cl->parent) /* the root class */ return 0; - - if (!(cl->parent->children.next == &cl->sibling && - cl->parent->children.prev == &cl->sibling)) + if (cl->parent->children > 1) /* not the last child */ return 0; - return 1; } @@ -1228,8 +1204,6 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) { - struct htb_sched *q = qdisc_priv(sch); - if (!cl->level) { BUG_TRAP(cl->un.leaf.q); qdisc_destroy(cl->un.leaf.q); @@ -1239,21 +1213,6 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) qdisc_put_rtab(cl->ceil); tcf_destroy_chain(&cl->filter_list); - - while (!list_empty(&cl->children)) - htb_destroy_class(sch, list_entry(cl->children.next, - struct htb_class, sibling)); - - /* note: this delete may happen twice (see htb_delete) */ - hlist_del_init(&cl->hlist); - list_del(&cl->sibling); - - if (cl->prio_activity) - htb_deactivate(q, cl); - - if (cl->cmode != HTB_CAN_SEND) - htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level); - kfree(cl); } @@ -1261,6 +1220,9 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) static void htb_destroy(struct Qdisc *sch) { struct htb_sched *q = qdisc_priv(sch); + struct hlist_node *n, *next; + struct htb_class *cl; + unsigned int i; qdisc_watchdog_cancel(&q->watchdog); /* This line used to be after htb_destroy_class call below @@ -1269,10 +1231,16 @@ static void htb_destroy(struct Qdisc *sch) unbind_filter on it (without Oops). */ tcf_destroy_chain(&q->filter_list); - while (!list_empty(&q->root)) - htb_destroy_class(sch, list_entry(q->root.next, - struct htb_class, sibling)); - + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) + tcf_destroy_chain(&cl->filter_list); + } + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i], + common.hnode) + htb_destroy_class(sch, cl); + } + qdisc_class_hash_destroy(&q->clhash); __skb_queue_purge(&q->direct_queue); } @@ -1287,12 +1255,13 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) // TODO: why don't allow to delete subtree ? references ? does // tc subsys quarantee us that in htb_destroy it holds no class // refs so that we can remove children safely there ? - if (!list_empty(&cl->children) || cl->filter_cnt) + if (cl->children || cl->filter_cnt) return -EBUSY; if (!cl->level && htb_parent_last_child(cl)) { - new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, - cl->parent->classid); + new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, + cl->parent->common.classid); last_child = 1; } @@ -1305,11 +1274,15 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) } /* delete from hash and active; remainder in destroy_class */ - hlist_del_init(&cl->hlist); + qdisc_class_hash_remove(&q->clhash, &cl->common); + cl->parent->children--; if (cl->prio_activity) htb_deactivate(q, cl); + if (cl->cmode != HTB_CAN_SEND) + htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level); + if (last_child) htb_parent_to_leaf(q, cl, new_q); @@ -1394,12 +1367,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, goto failure; gen_new_estimator(&cl->bstats, &cl->rate_est, - &sch->dev->queue_lock, + qdisc_root_lock(sch), tca[TCA_RATE] ? : &est.nla); cl->refcnt = 1; - INIT_LIST_HEAD(&cl->sibling); - INIT_HLIST_NODE(&cl->hlist); - INIT_LIST_HEAD(&cl->children); + cl->children = 0; INIT_LIST_HEAD(&cl->un.leaf.drop_list); RB_CLEAR_NODE(&cl->pq_node); @@ -1409,7 +1380,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) so that can't be used inside of sch_tree_lock -- thanks to Karlis Peisenieks */ - new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid); + new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, classid); sch_tree_lock(sch); if (parent && !parent->level) { unsigned int qlen = parent->un.leaf.q->q.qlen; @@ -1433,7 +1405,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* leaf (we) needs elementary qdisc */ cl->un.leaf.q = new_q ? new_q : &noop_qdisc; - cl->classid = classid; + cl->common.classid = classid; cl->parent = parent; /* set class to be in HTB_CAN_SEND state */ @@ -1444,13 +1416,13 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->cmode = HTB_CAN_SEND; /* attach to the hash list and parent's family */ - hlist_add_head(&cl->hlist, q->hash + htb_hash(classid)); - list_add_tail(&cl->sibling, - parent ? &parent->children : &q->root); + qdisc_class_hash_insert(&q->clhash, &cl->common); + if (parent) + parent->children++; } else { if (tca[TCA_RATE]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - &sch->dev->queue_lock, + qdisc_root_lock(sch), tca[TCA_RATE]); sch_tree_lock(sch); } @@ -1462,13 +1434,13 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!hopt->quantum && cl->un.leaf.quantum < 1000) { printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.\n", - cl->classid); + cl->common.classid); cl->un.leaf.quantum = 1000; } if (!hopt->quantum && cl->un.leaf.quantum > 200000) { printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.\n", - cl->classid); + cl->common.classid); cl->un.leaf.quantum = 200000; } if (hopt->quantum) @@ -1491,6 +1463,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->ceil = ctab; sch_tree_unlock(sch); + qdisc_class_hash_grow(sch, &q->clhash); + *arg = (unsigned long)cl; return 0; @@ -1514,7 +1488,6 @@ static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg) static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent, u32 classid) { - struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = htb_find(classid, sch); /*if (cl && !cl->level) return 0; @@ -1528,35 +1501,29 @@ static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent, */ if (cl) cl->filter_cnt++; - else - q->filter_cnt++; return (unsigned long)cl; } static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg) { - struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)arg; if (cl) cl->filter_cnt--; - else - q->filter_cnt--; } static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct htb_sched *q = qdisc_priv(sch); - int i; + struct htb_class *cl; + struct hlist_node *n; + unsigned int i; if (arg->stop) return; - for (i = 0; i < HTB_HSIZE; i++) { - struct hlist_node *p; - struct htb_class *cl; - - hlist_for_each_entry(cl, p, q->hash + i, hlist) { + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { if (arg->count < arg->skip) { arg->count++; continue; diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 956c80ad5965..4a2b77374358 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -77,7 +77,7 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch) result = tc_classify(skb, p->filter_list, &res); sch->bstats.packets++; - sch->bstats.bytes += skb->len; + sch->bstats.bytes += qdisc_pkt_len(skb); switch (result) { case TC_ACT_SHOT: result = TC_ACT_SHOT; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index c9c649b26eaa..a59085700678 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -82,6 +82,13 @@ struct netem_skb_cb { psched_time_t time_to_send; }; +static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(skb->cb) < + sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb)); + return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data; +} + /* init_crandom - initialize correlated random number generator * Use entropy source for initial seed. */ @@ -180,11 +187,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) * skb will be queued. */ if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { - struct Qdisc *rootq = sch->dev->qdisc; + struct Qdisc *rootq = qdisc_root(sch); u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ q->duplicate = 0; - rootq->enqueue(skb2, rootq); + qdisc_enqueue_root(skb2, rootq); q->duplicate = dupsave; } @@ -205,7 +212,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8); } - cb = (struct netem_skb_cb *)skb->cb; + cb = netem_skb_cb(skb); if (q->gap == 0 /* not doing reordering */ || q->counter < q->gap /* inside last reordering gap */ || q->reorder < get_crandom(&q->reorder_cor)) { @@ -218,7 +225,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) now = psched_get_time(); cb->time_to_send = now + delay; ++q->counter; - ret = q->qdisc->enqueue(skb, q->qdisc); + ret = qdisc_enqueue(skb, q->qdisc); } else { /* * Do re-ordering by putting one out of N packets at the front @@ -231,7 +238,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (likely(ret == NET_XMIT_SUCCESS)) { sch->q.qlen++; - sch->bstats.bytes += skb->len; + sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; } else sch->qstats.drops++; @@ -277,8 +284,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) skb = q->qdisc->dequeue(q->qdisc); if (skb) { - const struct netem_skb_cb *cb - = (const struct netem_skb_cb *)skb->cb; + const struct netem_skb_cb *cb = netem_skb_cb(skb); psched_time_t now = psched_get_time(); /* if more time remaining? */ @@ -310,28 +316,6 @@ static void netem_reset(struct Qdisc *sch) qdisc_watchdog_cancel(&q->watchdog); } -/* Pass size change message down to embedded FIFO */ -static int set_fifo_limit(struct Qdisc *q, int limit) -{ - struct nlattr *nla; - int ret = -ENOMEM; - - /* Hack to avoid sending change message to non-FIFO */ - if (strncmp(q->ops->id + 1, "fifo", 4) != 0) - return 0; - - nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); - if (nla) { - nla->nla_type = RTM_NEWQDISC; - nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt)); - ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit; - - ret = q->ops->change(q, nla); - kfree(nla); - } - return ret; -} - /* * Distribution data is a variable size payload containing * signed 16 bit values. @@ -341,6 +325,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) struct netem_sched_data *q = qdisc_priv(sch); unsigned long n = nla_len(attr)/sizeof(__s16); const __s16 *data = nla_data(attr); + spinlock_t *root_lock; struct disttable *d; int i; @@ -355,9 +340,11 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) for (i = 0; i < n; i++) d->table[i] = data[i]; - spin_lock_bh(&sch->dev->queue_lock); + root_lock = qdisc_root_lock(sch); + + spin_lock_bh(root_lock); d = xchg(&q->delay_dist, d); - spin_unlock_bh(&sch->dev->queue_lock); + spin_unlock_bh(root_lock); kfree(d); return 0; @@ -416,7 +403,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) if (ret < 0) return ret; - ret = set_fifo_limit(q->qdisc, qopt->limit); + ret = fifo_set_limit(q->qdisc, qopt->limit); if (ret) { pr_debug("netem: can't set fifo limit\n"); return ret; @@ -476,7 +463,7 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) { struct fifo_sched_data *q = qdisc_priv(sch); struct sk_buff_head *list = &sch->q; - psched_time_t tnext = ((struct netem_skb_cb *)nskb->cb)->time_to_send; + psched_time_t tnext = netem_skb_cb(nskb)->time_to_send; struct sk_buff *skb; if (likely(skb_queue_len(list) < q->limit)) { @@ -487,8 +474,7 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) } skb_queue_reverse_walk(list, skb) { - const struct netem_skb_cb *cb - = (const struct netem_skb_cb *)skb->cb; + const struct netem_skb_cb *cb = netem_skb_cb(skb); if (tnext >= cb->time_to_send) break; @@ -496,8 +482,8 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) __skb_queue_after(list, skb, nskb); - sch->qstats.backlog += nskb->len; - sch->bstats.bytes += nskb->len; + sch->qstats.backlog += qdisc_pkt_len(nskb); + sch->bstats.bytes += qdisc_pkt_len(nskb); sch->bstats.packets++; return NET_XMIT_SUCCESS; @@ -517,7 +503,7 @@ static int tfifo_init(struct Qdisc *sch, struct nlattr *opt) q->limit = ctl->limit; } else - q->limit = max_t(u32, sch->dev->tx_queue_len, 1); + q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1); q->oldest = PSCHED_PASTPERFECT; return 0; @@ -558,7 +544,8 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt) qdisc_watchdog_init(&q->watchdog, sch); - q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops, + q->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &tfifo_qdisc_ops, TC_H_MAKE(sch->handle, 1)); if (!q->qdisc) { pr_debug("netem: qdisc create failed\n"); diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 5532f1031ab5..f849243eb095 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -24,11 +24,9 @@ struct prio_sched_data { int bands; - int curband; /* for round-robin */ struct tcf_proto *filter_list; u8 prio2band[TC_PRIO_MAX+1]; struct Qdisc *queues[TCQ_PRIO_BANDS]; - int mq; }; @@ -55,17 +53,14 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) if (!q->filter_list || err < 0) { if (TC_H_MAJ(band)) band = 0; - band = q->prio2band[band&TC_PRIO_MAX]; - goto out; + return q->queues[q->prio2band[band&TC_PRIO_MAX]]; } band = res.classid; } band = TC_H_MIN(band) - 1; if (band >= q->bands) - band = q->prio2band[0]; -out: - if (q->mq) - skb_set_queue_mapping(skb, band); + return q->queues[q->prio2band[0]]; + return q->queues[band]; } @@ -86,8 +81,9 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch) } #endif - if ((ret = qdisc->enqueue(skb, qdisc)) == NET_XMIT_SUCCESS) { - sch->bstats.bytes += skb->len; + ret = qdisc_enqueue(skb, qdisc); + if (ret == NET_XMIT_SUCCESS) { + sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; sch->q.qlen++; return NET_XMIT_SUCCESS; @@ -123,67 +119,23 @@ prio_requeue(struct sk_buff *skb, struct Qdisc* sch) } -static struct sk_buff * -prio_dequeue(struct Qdisc* sch) +static struct sk_buff *prio_dequeue(struct Qdisc* sch) { - struct sk_buff *skb; struct prio_sched_data *q = qdisc_priv(sch); int prio; - struct Qdisc *qdisc; for (prio = 0; prio < q->bands; prio++) { - /* Check if the target subqueue is available before - * pulling an skb. This way we avoid excessive requeues - * for slower queues. - */ - if (!__netif_subqueue_stopped(sch->dev, (q->mq ? prio : 0))) { - qdisc = q->queues[prio]; - skb = qdisc->dequeue(qdisc); - if (skb) { - sch->q.qlen--; - return skb; - } + struct Qdisc *qdisc = q->queues[prio]; + struct sk_buff *skb = qdisc->dequeue(qdisc); + if (skb) { + sch->q.qlen--; + return skb; } } return NULL; } -static struct sk_buff *rr_dequeue(struct Qdisc* sch) -{ - struct sk_buff *skb; - struct prio_sched_data *q = qdisc_priv(sch); - struct Qdisc *qdisc; - int bandcount; - - /* Only take one pass through the queues. If nothing is available, - * return nothing. - */ - for (bandcount = 0; bandcount < q->bands; bandcount++) { - /* Check if the target subqueue is available before - * pulling an skb. This way we avoid excessive requeues - * for slower queues. If the queue is stopped, try the - * next queue. - */ - if (!__netif_subqueue_stopped(sch->dev, - (q->mq ? q->curband : 0))) { - qdisc = q->queues[q->curband]; - skb = qdisc->dequeue(qdisc); - if (skb) { - sch->q.qlen--; - q->curband++; - if (q->curband >= q->bands) - q->curband = 0; - return skb; - } - } - q->curband++; - if (q->curband >= q->bands) - q->curband = 0; - } - return NULL; -} - static unsigned int prio_drop(struct Qdisc* sch) { struct prio_sched_data *q = qdisc_priv(sch); @@ -228,45 +180,22 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) { struct prio_sched_data *q = qdisc_priv(sch); struct tc_prio_qopt *qopt; - struct nlattr *tb[TCA_PRIO_MAX + 1]; - int err; int i; - err = nla_parse_nested_compat(tb, TCA_PRIO_MAX, opt, NULL, qopt, - sizeof(*qopt)); - if (err < 0) - return err; - - q->bands = qopt->bands; - /* If we're multiqueue, make sure the number of incoming bands - * matches the number of queues on the device we're associating with. - * If the number of bands requested is zero, then set q->bands to - * dev->egress_subqueue_count. Also, the root qdisc must be the - * only one that is enabled for multiqueue, since it's the only one - * that interacts with the underlying device. - */ - q->mq = nla_get_flag(tb[TCA_PRIO_MQ]); - if (q->mq) { - if (sch->parent != TC_H_ROOT) - return -EINVAL; - if (netif_is_multiqueue(sch->dev)) { - if (q->bands == 0) - q->bands = sch->dev->egress_subqueue_count; - else if (q->bands != sch->dev->egress_subqueue_count) - return -EINVAL; - } else - return -EOPNOTSUPP; - } + if (nla_len(opt) < sizeof(*qopt)) + return -EINVAL; + qopt = nla_data(opt); - if (q->bands > TCQ_PRIO_BANDS || q->bands < 2) + if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2) return -EINVAL; for (i=0; i<=TC_PRIO_MAX; i++) { - if (qopt->priomap[i] >= q->bands) + if (qopt->priomap[i] >= qopt->bands) return -EINVAL; } sch_tree_lock(sch); + q->bands = qopt->bands; memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); for (i=q->bands; i<TCQ_PRIO_BANDS; i++) { @@ -281,7 +210,8 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) for (i=0; i<q->bands; i++) { if (q->queues[i] == &noop_qdisc) { struct Qdisc *child; - child = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, TC_H_MAKE(sch->handle, i + 1)); if (child) { sch_tree_lock(sch); @@ -331,10 +261,6 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) nest = nla_nest_compat_start(skb, TCA_OPTIONS, sizeof(opt), &opt); if (nest == NULL) goto nla_put_failure; - if (q->mq) { - if (nla_put_flag(skb, TCA_PRIO_MQ) < 0) - goto nla_put_failure; - } nla_nest_compat_end(skb, nest); return skb->len; @@ -507,44 +433,17 @@ static struct Qdisc_ops prio_qdisc_ops __read_mostly = { .owner = THIS_MODULE, }; -static struct Qdisc_ops rr_qdisc_ops __read_mostly = { - .next = NULL, - .cl_ops = &prio_class_ops, - .id = "rr", - .priv_size = sizeof(struct prio_sched_data), - .enqueue = prio_enqueue, - .dequeue = rr_dequeue, - .requeue = prio_requeue, - .drop = prio_drop, - .init = prio_init, - .reset = prio_reset, - .destroy = prio_destroy, - .change = prio_tune, - .dump = prio_dump, - .owner = THIS_MODULE, -}; - static int __init prio_module_init(void) { - int err; - - err = register_qdisc(&prio_qdisc_ops); - if (err < 0) - return err; - err = register_qdisc(&rr_qdisc_ops); - if (err < 0) - unregister_qdisc(&prio_qdisc_ops); - return err; + return register_qdisc(&prio_qdisc_ops); } static void __exit prio_module_exit(void) { unregister_qdisc(&prio_qdisc_ops); - unregister_qdisc(&rr_qdisc_ops); } module_init(prio_module_init) module_exit(prio_module_exit) MODULE_LICENSE("GPL"); -MODULE_ALIAS("sch_rr"); diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 5c569853b9c0..3f2d1d7f3bbd 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -92,9 +92,9 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch) break; } - ret = child->enqueue(skb, child); + ret = qdisc_enqueue(skb, child); if (likely(ret == NET_XMIT_SUCCESS)) { - sch->bstats.bytes += skb->len; + sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; sch->q.qlen++; } else { @@ -174,33 +174,6 @@ static void red_destroy(struct Qdisc *sch) qdisc_destroy(q->qdisc); } -static struct Qdisc *red_create_dflt(struct Qdisc *sch, u32 limit) -{ - struct Qdisc *q; - struct nlattr *nla; - int ret; - - q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops, - TC_H_MAKE(sch->handle, 1)); - if (q) { - nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), - GFP_KERNEL); - if (nla) { - nla->nla_type = RTM_NEWQDISC; - nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt)); - ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit; - - ret = q->ops->change(q, nla); - kfree(nla); - - if (ret == 0) - return q; - } - qdisc_destroy(q); - } - return NULL; -} - static const struct nla_policy red_policy[TCA_RED_MAX + 1] = { [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) }, [TCA_RED_STAB] = { .len = RED_STAB_SIZE }, @@ -228,9 +201,9 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) ctl = nla_data(tb[TCA_RED_PARMS]); if (ctl->limit > 0) { - child = red_create_dflt(sch, ctl->limit); - if (child == NULL) - return -ENOMEM; + child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit); + if (IS_ERR(child)) + return PTR_ERR(child); } sch_tree_lock(sch); diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 6a97afbfb952..8589da666568 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -245,7 +245,7 @@ static unsigned int sfq_drop(struct Qdisc *sch) if (d > 1) { sfq_index x = q->dep[d + SFQ_DEPTH].next; skb = q->qs[x].prev; - len = skb->len; + len = qdisc_pkt_len(skb); __skb_unlink(skb, &q->qs[x]); kfree_skb(skb); sfq_dec(q, x); @@ -261,7 +261,7 @@ static unsigned int sfq_drop(struct Qdisc *sch) q->next[q->tail] = q->next[d]; q->allot[q->next[d]] += q->quantum; skb = q->qs[d].prev; - len = skb->len; + len = qdisc_pkt_len(skb); __skb_unlink(skb, &q->qs[d]); kfree_skb(skb); sfq_dec(q, d); @@ -305,7 +305,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (q->qs[x].qlen >= q->limit) return qdisc_drop(skb, sch); - sch->qstats.backlog += skb->len; + sch->qstats.backlog += qdisc_pkt_len(skb); __skb_queue_tail(&q->qs[x], skb); sfq_inc(q, x); if (q->qs[x].qlen == 1) { /* The flow is new */ @@ -320,7 +320,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) } } if (++sch->q.qlen <= q->limit) { - sch->bstats.bytes += skb->len; + sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; return 0; } @@ -352,7 +352,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc *sch) q->hash[x] = hash; } - sch->qstats.backlog += skb->len; + sch->qstats.backlog += qdisc_pkt_len(skb); __skb_queue_head(&q->qs[x], skb); /* If selected queue has length q->limit+1, this means that * all another queues are empty and we do simple tail drop. @@ -363,7 +363,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc *sch) skb = q->qs[x].prev; __skb_unlink(skb, &q->qs[x]); sch->qstats.drops++; - sch->qstats.backlog -= skb->len; + sch->qstats.backlog -= qdisc_pkt_len(skb); kfree_skb(skb); return NET_XMIT_CN; } @@ -411,7 +411,7 @@ sfq_dequeue(struct Qdisc *sch) skb = __skb_dequeue(&q->qs[a]); sfq_dec(q, a); sch->q.qlen--; - sch->qstats.backlog -= skb->len; + sch->qstats.backlog -= qdisc_pkt_len(skb); /* Is the slot empty? */ if (q->qs[a].qlen == 0) { @@ -423,7 +423,7 @@ sfq_dequeue(struct Qdisc *sch) } q->next[q->tail] = a; q->allot[a] += q->quantum; - } else if ((q->allot[a] -= skb->len) <= 0) { + } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) { q->tail = a; a = q->next[a]; q->allot[a] += q->quantum; @@ -461,7 +461,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) return -EINVAL; sch_tree_lock(sch); - q->quantum = ctl->quantum ? : psched_mtu(sch->dev); + q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch)); q->perturb_period = ctl->perturb_period * HZ; if (ctl->limit) q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); @@ -502,7 +502,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) q->max_depth = 0; q->tail = SFQ_DEPTH; if (opt == NULL) { - q->quantum = psched_mtu(sch->dev); + q->quantum = psched_mtu(qdisc_dev(sch)); q->perturb_period = 0; q->perturbation = net_random(); } else { diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 0b7d78f59d8c..b296672f7632 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -123,7 +123,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) struct tbf_sched_data *q = qdisc_priv(sch); int ret; - if (skb->len > q->max_size) { + if (qdisc_pkt_len(skb) > q->max_size) { sch->qstats.drops++; #ifdef CONFIG_NET_CLS_ACT if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch)) @@ -133,13 +133,14 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) return NET_XMIT_DROP; } - if ((ret = q->qdisc->enqueue(skb, q->qdisc)) != 0) { + ret = qdisc_enqueue(skb, q->qdisc); + if (ret != 0) { sch->qstats.drops++; return ret; } sch->q.qlen++; - sch->bstats.bytes += skb->len; + sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; return 0; } @@ -180,7 +181,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch) psched_time_t now; long toks; long ptoks = 0; - unsigned int len = skb->len; + unsigned int len = qdisc_pkt_len(skb); now = psched_get_time(); toks = psched_tdiff_bounded(now, q->t_c, q->buffer); @@ -242,34 +243,6 @@ static void tbf_reset(struct Qdisc* sch) qdisc_watchdog_cancel(&q->watchdog); } -static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit) -{ - struct Qdisc *q; - struct nlattr *nla; - int ret; - - q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops, - TC_H_MAKE(sch->handle, 1)); - if (q) { - nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), - GFP_KERNEL); - if (nla) { - nla->nla_type = RTM_NEWQDISC; - nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt)); - ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit; - - ret = q->ops->change(q, nla); - kfree(nla); - - if (ret == 0) - return q; - } - qdisc_destroy(q); - } - - return NULL; -} - static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = { [TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) }, [TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, @@ -322,8 +295,11 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt) goto done; if (qopt->limit > 0) { - if ((child = tbf_create_dflt_qdisc(sch, qopt->limit)) == NULL) + child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit); + if (IS_ERR(child)) { + err = PTR_ERR(child); goto done; + } } sch_tree_lock(sch); diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 0444fd0f0d22..537223642b6e 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -78,12 +78,12 @@ struct teql_sched_data static int teql_enqueue(struct sk_buff *skb, struct Qdisc* sch) { - struct net_device *dev = sch->dev; + struct net_device *dev = qdisc_dev(sch); struct teql_sched_data *q = qdisc_priv(sch); if (q->q.qlen < dev->tx_queue_len) { __skb_queue_tail(&q->q, skb); - sch->bstats.bytes += skb->len; + sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; return 0; } @@ -107,17 +107,19 @@ static struct sk_buff * teql_dequeue(struct Qdisc* sch) { struct teql_sched_data *dat = qdisc_priv(sch); + struct netdev_queue *dat_queue; struct sk_buff *skb; skb = __skb_dequeue(&dat->q); + dat_queue = netdev_get_tx_queue(dat->m->dev, 0); if (skb == NULL) { - struct net_device *m = dat->m->dev->qdisc->dev; + struct net_device *m = qdisc_dev(dat_queue->qdisc); if (m) { dat->m->slaves = sch; netif_wake_queue(m); } } - sch->q.qlen = dat->q.qlen + dat->m->dev->qdisc->q.qlen; + sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen; return skb; } @@ -153,10 +155,16 @@ teql_destroy(struct Qdisc* sch) if (q == master->slaves) { master->slaves = NEXT_SLAVE(q); if (q == master->slaves) { + struct netdev_queue *txq; + spinlock_t *root_lock; + + txq = netdev_get_tx_queue(master->dev, 0); master->slaves = NULL; - spin_lock_bh(&master->dev->queue_lock); - qdisc_reset(master->dev->qdisc); - spin_unlock_bh(&master->dev->queue_lock); + + root_lock = qdisc_root_lock(txq->qdisc); + spin_lock_bh(root_lock); + qdisc_reset(txq->qdisc); + spin_unlock_bh(root_lock); } } skb_queue_purge(&dat->q); @@ -170,7 +178,7 @@ teql_destroy(struct Qdisc* sch) static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) { - struct net_device *dev = sch->dev; + struct net_device *dev = qdisc_dev(sch); struct teql_master *m = (struct teql_master*)sch->ops; struct teql_sched_data *q = qdisc_priv(sch); @@ -216,7 +224,8 @@ static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) static int __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev) { - struct teql_sched_data *q = qdisc_priv(dev->qdisc); + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0); + struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc); struct neighbour *mn = skb->dst->neighbour; struct neighbour *n = q->ncache; @@ -252,7 +261,8 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device * static inline int teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev) { - if (dev->qdisc == &noop_qdisc) + struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); + if (txq->qdisc == &noop_qdisc) return -ENODEV; if (dev->header_ops == NULL || @@ -268,7 +278,6 @@ static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev) struct Qdisc *start, *q; int busy; int nores; - int len = skb->len; int subq = skb_get_queue_mapping(skb); struct sk_buff *skb_res = NULL; @@ -282,12 +291,13 @@ restart: goto drop; do { - struct net_device *slave = q->dev; + struct net_device *slave = qdisc_dev(q); + struct netdev_queue *slave_txq; - if (slave->qdisc_sleeping != q) + slave_txq = netdev_get_tx_queue(slave, 0); + if (slave_txq->qdisc_sleeping != q) continue; - if (netif_queue_stopped(slave) || - __netif_subqueue_stopped(slave, subq) || + if (__netif_subqueue_stopped(slave, subq) || !netif_running(slave)) { busy = 1; continue; @@ -296,14 +306,14 @@ restart: switch (teql_resolve(skb, skb_res, slave)) { case 0: if (netif_tx_trylock(slave)) { - if (!netif_queue_stopped(slave) && - !__netif_subqueue_stopped(slave, subq) && + if (!__netif_subqueue_stopped(slave, subq) && slave->hard_start_xmit(skb, slave) == 0) { netif_tx_unlock(slave); master->slaves = NEXT_SLAVE(q); netif_wake_queue(dev); master->stats.tx_packets++; - master->stats.tx_bytes += len; + master->stats.tx_bytes += + qdisc_pkt_len(skb); return 0; } netif_tx_unlock(slave); @@ -352,7 +362,7 @@ static int teql_master_open(struct net_device *dev) q = m->slaves; do { - struct net_device *slave = q->dev; + struct net_device *slave = qdisc_dev(q); if (slave == NULL) return -EUNATCH; @@ -403,7 +413,7 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu) q = m->slaves; if (q) { do { - if (new_mtu > q->dev->mtu) + if (new_mtu > qdisc_dev(q)->mtu) return -EINVAL; } while ((q=NEXT_SLAVE(q)) != m->slaves); } |