📄 sch_cbq.c
字号:
/* * net/sched/sch_cbq.c Class-Based Queueing discipline. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * */#include <linux/config.h>#include <linux/module.h>#include <asm/uaccess.h>#include <asm/system.h>#include <asm/bitops.h>#include <linux/types.h>#include <linux/kernel.h>#include <linux/sched.h>#include <linux/string.h>#include <linux/mm.h>#include <linux/socket.h>#include <linux/sockios.h>#include <linux/in.h>#include <linux/errno.h>#include <linux/interrupt.h>#include <linux/if_ether.h>#include <linux/inet.h>#include <linux/netdevice.h>#include <linux/etherdevice.h>#include <linux/notifier.h>#include <net/ip.h>#include <net/route.h>#include <linux/skbuff.h>#include <net/sock.h>#include <net/pkt_sched.h>/* Class-Based Queueing (CBQ) algorithm. ======================================= Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource Management Models for Packet Networks", IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995 [2] Sally Floyd, "Notes on CBQ and Guaranted Service", 1995 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting Parameters", 1996 [4] Sally Floyd and Michael Speer, "Experimental Results for Class-Based Queueing", 1998, not published. ----------------------------------------------------------------------- Algorithm skeleton was taken from NS simulator cbq.cc. If someone wants to check this code against the LBL version, he should take into account that ONLY the skeleton was borrowed, the implementation is different. Particularly: --- The WRR algorithm is different. Our version looks more reasonable (I hope) and works when quanta are allowed to be less than MTU, which is always the case when real time classes have small rates. Note, that the statement of [3] is incomplete, delay may actually be estimated even if class per-round allotment is less than MTU. Namely, if per-round allotment is W*r_i, and r_1+...+r_k = r < 1 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B In the worst case we have IntServ estimate with D = W*r+k*MTU and C = MTU*r. The proof (if correct at all) is trivial. --- It seems that cbq-2.0 is not very accurate. At least, I cannot interpret some places, which look like wrong translations from NS. Anyone is advised to find these differences and explain to me, why I am wrong 8). --- Linux has no EOI event, so that we cannot estimate true class idle time. Workaround is to consider the next dequeue event as sign that previous packet is finished. This is wrong because of internal device queueing, but on a permanently loaded link it is true. Moreover, combined with clock integrator, this scheme looks very close to an ideal solution. */struct cbq_sched_data;struct cbq_class{ struct cbq_class *next; /* hash table link */ struct cbq_class *next_alive; /* next class with backlog in this priority band *//* Parameters */ u32 classid; unsigned char priority; /* class priority */ unsigned char priority2; /* priority to be used after overlimit */ unsigned char ewma_log; /* time constant for idle time calculation */ unsigned char ovl_strategy;#ifdef CONFIG_NET_CLS_POLICE unsigned char police;#endif u32 defmap; /* Link-sharing scheduler parameters */ long maxidle; /* Class paramters: see below. */ long offtime; long minidle; u32 avpkt; struct qdisc_rate_table *R_tab; /* Overlimit strategy parameters */ void (*overlimit)(struct cbq_class *cl); long penalty; /* General scheduler (WRR) parameters */ long allot; long quantum; /* Allotment per WRR round */ long weight; /* Relative allotment: see below */ struct Qdisc *qdisc; /* Ptr to CBQ discipline */ struct cbq_class *split; /* Ptr to split node */ struct cbq_class *share; /* Ptr to LS parent in the class tree */ struct cbq_class *tparent; /* Ptr to tree parent in the class tree */ struct cbq_class *borrow; /* NULL if class is bandwidth limited; parent otherwise */ struct cbq_class *sibling; /* Sibling chain */ struct cbq_class *children; /* Pointer to children chain */ struct Qdisc *q; /* Elementary queueing discipline *//* Variables */ unsigned char cpriority; /* Effective priority */ unsigned char delayed; unsigned char level; /* level of the class in hierarchy: 0 for leaf classes, and maximal level of children + 1 for nodes. */ psched_time_t last; /* Last end of service */ psched_time_t undertime; long avgidle; long deficit; /* Saved deficit for WRR */ unsigned long penalized; struct tc_stats stats; struct tc_cbq_xstats xstats; struct tcf_proto *filter_list; int refcnt; int filters; struct cbq_class *defaults[TC_PRIO_MAX+1];};struct cbq_sched_data{ struct cbq_class *classes[16]; /* Hash table of all classes */ int nclasses[TC_CBQ_MAXPRIO+1]; unsigned quanta[TC_CBQ_MAXPRIO+1]; struct cbq_class link; unsigned activemask; struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes with backlog */#ifdef CONFIG_NET_CLS_POLICE struct cbq_class *rx_class;#endif struct cbq_class *tx_class; struct cbq_class *tx_borrowed; int tx_len; psched_time_t now; /* Cached timestamp */ psched_time_t now_rt; /* Cached real time */ unsigned pmask; struct timer_list delay_timer; struct timer_list wd_timer; /* Watchdog timer, started when CBQ has backlog, but cannot transmit just now */ long wd_expires; int toplevel; u32 hgenerator;};#define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])static __inline__ unsigned cbq_hash(u32 h){ h ^= h>>8; h ^= h>>4; return h&0xF;}static __inline__ struct cbq_class *cbq_class_lookup(struct cbq_sched_data *q, u32 classid){ struct cbq_class *cl; for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next) if (cl->classid == classid) return cl; return NULL;}#ifdef CONFIG_NET_CLS_POLICEstatic struct cbq_class *cbq_reclassify(struct sk_buff *skb, struct cbq_class *this){ struct cbq_class *cl, *new; for (cl = this->tparent; cl; cl = cl->tparent) if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this) return new; return NULL;}#endif/* Classify packet. The procedure is pretty complicated, but it allows us to combine link sharing and priority scheduling transparently. Namely, you can put link sharing rules (f.e. route based) at root of CBQ, so that it resolves to split nodes. Then packets are classified by logical priority, or a more specific classifier may be attached to the split node. */static struct cbq_class *cbq_classify(struct sk_buff *skb, struct Qdisc *sch){ struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data; struct cbq_class *head = &q->link; struct cbq_class **defmap; struct cbq_class *cl = NULL; u32 prio = skb->priority; struct tcf_result res; /* * Step 1. If skb->priority points to one of our classes, use it. */ if (TC_H_MAJ(prio^sch->handle) == 0 && (cl = cbq_class_lookup(q, prio)) != NULL) return cl; for (;;) { int result = 0; defmap = head->defaults; /* * Step 2+n. Apply classifier. */ if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0) goto fallback; if ((cl = (void*)res.class) == NULL) { if (TC_H_MAJ(res.classid)) cl = cbq_class_lookup(q, res.classid); else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL) cl = defmap[TC_PRIO_BESTEFFORT]; if (cl == NULL || cl->level >= head->level) goto fallback; }#ifdef CONFIG_NET_CLS_POLICE switch (result) { case TC_POLICE_RECLASSIFY: return cbq_reclassify(skb, cl); case TC_POLICE_SHOT: return NULL; default: }#endif if (cl->level == 0) return cl; /* * Step 3+n. If classifier selected a link sharing class, * apply agency specific classifier. * Repeat this procdure until we hit a leaf node. */ head = cl; }fallback: cl = head; /* * Step 4. No success... */ if (TC_H_MAJ(prio) == 0 && !(cl = head->defaults[prio&TC_PRIO_MAX]) && !(cl = head->defaults[TC_PRIO_BESTEFFORT])) return head; return cl;}/* A packet has just been enqueued on the empty class. cbq_activate_class adds it to the tail of active class list of its priority band. */static __inline__ void cbq_activate_class(struct cbq_class *cl){ struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data; int prio = cl->cpriority; struct cbq_class *cl_tail; cl_tail = q->active[prio]; q->active[prio] = cl; if (cl_tail != NULL) { cl->next_alive = cl_tail->next_alive; cl_tail->next_alive = cl; } else { cl->next_alive = cl; q->activemask |= (1<<prio); }}/* Unlink class from active chain. Note that this same procedure is done directly in cbq_dequeue* during round-robin procedure. */static void cbq_deactivate_class(struct cbq_class *this){ struct cbq_sched_data *q = (struct cbq_sched_data*)this->qdisc->data; int prio = this->cpriority; struct cbq_class *cl; struct cbq_class *cl_prev = q->active[prio]; do { cl = cl_prev->next_alive; if (cl == this) { cl_prev->next_alive = cl->next_alive; cl->next_alive = NULL; if (cl == q->active[prio]) { q->active[prio] = cl_prev; if (cl == q->active[prio]) { q->active[prio] = NULL; q->activemask &= ~(1<<prio); return; } } cl = cl_prev->next_alive; return; } } while ((cl_prev = cl) != q->active[prio]);}static voidcbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl){ int toplevel = q->toplevel; if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) { psched_time_t now; psched_tdiff_t incr; PSCHED_GET_TIME(now); incr = PSCHED_TDIFF(now, q->now_rt); PSCHED_TADD2(q->now, incr, now); do { if (PSCHED_TLESS(cl->undertime, now)) { q->toplevel = cl->level; return; } } while ((cl=cl->borrow) != NULL && toplevel > cl->level); }}static intcbq_enqueue(struct sk_buff *skb, struct Qdisc *sch){ struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; struct cbq_class *cl = cbq_classify(skb, sch); int len = skb->len; int ret = NET_XMIT_POLICED;#ifdef CONFIG_NET_CLS_POLICE q->rx_class = cl;#endif if (cl) {#ifdef CONFIG_NET_CLS_POLICE cl->q->__parent = sch;#endif if ((ret = cl->q->enqueue(skb, cl->q)) == 0) { sch->q.qlen++; sch->stats.packets++; sch->stats.bytes+=len; cbq_mark_toplevel(q, cl); if (!cl->next_alive) cbq_activate_class(cl); return 0; } } sch->stats.drops++; if (cl == NULL) kfree_skb(skb); else { cbq_mark_toplevel(q, cl); cl->stats.drops++; } return ret;}static intcbq_requeue(struct sk_buff *skb, struct Qdisc *sch){ struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; struct cbq_class *cl; int ret; if ((cl = q->tx_class) == NULL) { kfree_skb(skb); sch->stats.drops++; return NET_XMIT_CN; } q->tx_class = NULL; cbq_mark_toplevel(q, cl);#ifdef CONFIG_NET_CLS_POLICE q->rx_class = cl; cl->q->__parent = sch;#endif if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) { sch->q.qlen++; if (!cl->next_alive) cbq_activate_class(cl); return 0; } sch->stats.drops++; cl->stats.drops++; return ret;}/* Overlimit actions *//* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */static void cbq_ovl_classic(struct cbq_class *cl){ struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data; psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now); if (!cl->delayed) { delay += cl->offtime; /* Class goes to sleep, so that it will have no chance to work avgidle. Let's forgive it 8) BTW cbq-2.0 has a crap in this place, apparently they forgot to shift it by cl->ewma_log. */ if (cl->avgidle < 0) delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log); if (cl->avgidle < cl->minidle) cl->avgidle = cl->minidle; if (delay <= 0) delay = 1; PSCHED_TADD2(q->now, delay, cl->undertime); cl->xstats.overactions++; cl->delayed = 1; } if (q->wd_expires == 0 || q->wd_expires > delay) q->wd_expires = delay; /* Dirty work! We must schedule wakeups based on real available rate, rather than leaf rate, which may be tiny (even zero). */ if (q->toplevel == TC_CBQ_MAXLEVEL) { struct cbq_class *b; psched_tdiff_t base_delay = q->wd_expires; for (b = cl->borrow; b; b = b->borrow) { delay = PSCHED_TDIFF(b->undertime, q->now); if (delay < base_delay) { if (delay <= 0) delay = 1; base_delay = delay; } } q->wd_expires = delay; }}/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when they go overlimit */static void cbq_ovl_rclassic(struct cbq_class *cl){ struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data; struct cbq_class *this = cl; do { if (cl->level > q->toplevel) { cl = NULL; break; } } while ((cl = cl->borrow) != NULL); if (cl == NULL)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -