📄 sch_cbq.c
字号:
cl = this; cbq_ovl_classic(cl);}/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */static void cbq_ovl_delay(struct cbq_class *cl){ struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data; psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now); if (!cl->delayed) { unsigned long sched = jiffies; delay += cl->offtime; if (cl->avgidle < 0) delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log); if (cl->avgidle < cl->minidle) cl->avgidle = cl->minidle; PSCHED_TADD2(q->now, delay, cl->undertime); if (delay > 0) { sched += PSCHED_US2JIFFIE(delay) + cl->penalty; cl->penalized = sched; cl->cpriority = TC_CBQ_MAXPRIO; q->pmask |= (1<<TC_CBQ_MAXPRIO); if (del_timer(&q->delay_timer) && (long)(q->delay_timer.expires - sched) > 0) q->delay_timer.expires = sched; add_timer(&q->delay_timer); cl->delayed = 1; cl->xstats.overactions++; return; } delay = 1; } if (q->wd_expires == 0 || q->wd_expires > delay) q->wd_expires = delay;}/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */static void cbq_ovl_lowprio(struct cbq_class *cl){ struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data; cl->penalized = jiffies + cl->penalty; if (cl->cpriority != cl->priority2) { cl->cpriority = cl->priority2; q->pmask |= (1<<cl->cpriority); cl->xstats.overactions++; } cbq_ovl_classic(cl);}/* TC_CBQ_OVL_DROP: penalize class by dropping */static void cbq_ovl_drop(struct cbq_class *cl){ if (cl->q->ops->drop) if (cl->q->ops->drop(cl->q)) cl->qdisc->q.qlen--; cl->xstats.overactions++; cbq_ovl_classic(cl);}static void cbq_watchdog(unsigned long arg){ struct Qdisc *sch = (struct Qdisc*)arg; sch->flags &= ~TCQ_F_THROTTLED; netif_schedule(sch->dev);}static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio){ struct cbq_class *cl; struct cbq_class *cl_prev = q->active[prio]; unsigned long now = jiffies; unsigned long sched = now; if (cl_prev == NULL) return now; do { cl = cl_prev->next_alive; if ((long)(now - cl->penalized) > 0) { cl_prev->next_alive = cl->next_alive; cl->next_alive = NULL; cl->cpriority = cl->priority; cl->delayed = 0; cbq_activate_class(cl); if (cl == q->active[prio]) { q->active[prio] = cl_prev; if (cl == q->active[prio]) { q->active[prio] = NULL; return 0; } } cl = cl_prev->next_alive; } else if ((long)(sched - cl->penalized) > 0) sched = cl->penalized; } while ((cl_prev = cl) != q->active[prio]); return (long)(sched - now);}static void cbq_undelay(unsigned long arg){ struct Qdisc *sch = (struct Qdisc*)arg; struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data; long delay = 0; unsigned pmask; pmask = q->pmask; q->pmask = 0; while (pmask) { int prio = ffz(~pmask); long tmp; pmask &= ~(1<<prio); tmp = cbq_undelay_prio(q, prio); if (tmp > 0) { q->pmask |= 1<<prio; if (tmp < delay || delay == 0) delay = tmp; } } if (delay) { q->delay_timer.expires = jiffies + delay; add_timer(&q->delay_timer); } sch->flags &= ~TCQ_F_THROTTLED; netif_schedule(sch->dev);}#ifdef CONFIG_NET_CLS_POLICEstatic int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child){ int len = skb->len; struct Qdisc *sch = child->__parent; struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; struct cbq_class *cl = q->rx_class; q->rx_class = NULL; if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) { cbq_mark_toplevel(q, cl); q->rx_class = cl; cl->q->__parent = sch; if (cl->q->enqueue(skb, cl->q) == 0) { sch->q.qlen++; sch->stats.packets++; sch->stats.bytes+=len; if (!cl->next_alive) cbq_activate_class(cl); return 0; } sch->stats.drops++; return 0; } sch->stats.drops++; return -1;}#endif/* It is mission critical procedure. We "regenerate" toplevel cutoff, if transmitting class has backlog and it is not regulated. It is not part of original CBQ description, but looks more reasonable. Probably, it is wrong. This question needs further investigation.*/static __inline__ voidcbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl, struct cbq_class *borrowed){ if (cl && q->toplevel >= borrowed->level) { if (cl->q->q.qlen > 1) { do { if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) { q->toplevel = borrowed->level; return; } } while ((borrowed=borrowed->borrow) != NULL); }#if 0 /* It is not necessary now. Uncommenting it will save CPU cycles, but decrease fairness. */ q->toplevel = TC_CBQ_MAXLEVEL;#endif }}static voidcbq_update(struct cbq_sched_data *q){ struct cbq_class *this = q->tx_class; struct cbq_class *cl = this; int len = q->tx_len; q->tx_class = NULL; for ( ; cl; cl = cl->share) { long avgidle = cl->avgidle; long idle; cl->stats.packets++; cl->stats.bytes += len; /* (now - last) is total time between packet right edges. (last_pktlen/rate) is "virtual" busy time, so that idle = (now - last) - last_pktlen/rate */ idle = PSCHED_TDIFF(q->now, cl->last); if ((unsigned long)idle > 128*1024*1024) { avgidle = cl->maxidle; } else { idle -= L2T(cl, len); /* true_avgidle := (1-W)*true_avgidle + W*idle, where W=2^{-ewma_log}. But cl->avgidle is scaled: cl->avgidle == true_avgidle/W, hence: */ avgidle += idle - (avgidle>>cl->ewma_log); } if (avgidle <= 0) { /* Overlimit or at-limit */ if (avgidle < cl->minidle) avgidle = cl->minidle; cl->avgidle = avgidle; /* Calculate expected time, when this class will be allowed to send. It will occur, when: (1-W)*true_avgidle + W*delay = 0, i.e. idle = (1/W - 1)*(-true_avgidle) or idle = (1 - W)*(-cl->avgidle); */ idle = (-avgidle) - ((-avgidle) >> cl->ewma_log); /* That is not all. To maintain the rate allocated to the class, we add to undertime virtual clock, necesary to complete transmitted packet. (len/phys_bandwidth has been already passed to the moment of cbq_update) */ idle -= L2T(&q->link, len); idle += L2T(cl, len); PSCHED_AUDIT_TDIFF(idle); PSCHED_TADD2(q->now, idle, cl->undertime); } else { /* Underlimit */ PSCHED_SET_PASTPERFECT(cl->undertime); if (avgidle > cl->maxidle) cl->avgidle = cl->maxidle; else cl->avgidle = avgidle; } cl->last = q->now; } cbq_update_toplevel(q, this, q->tx_borrowed);}static __inline__ struct cbq_class *cbq_under_limit(struct cbq_class *cl){ struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data; struct cbq_class *this_cl = cl; if (cl->tparent == NULL) return cl; if (PSCHED_IS_PASTPERFECT(cl->undertime) || !PSCHED_TLESS(q->now, cl->undertime)) { cl->delayed = 0; return cl; } do { /* It is very suspicious place. Now overlimit action is generated for not bounded classes only if link is completely congested. Though it is in agree with ancestor-only paradigm, it looks very stupid. Particularly, it means that this chunk of code will either never be called or result in strong amplification of burstiness. Dangerous, silly, and, however, no another solution exists. */ if ((cl = cl->borrow) == NULL) { this_cl->stats.overlimits++; this_cl->overlimit(this_cl); return NULL; } if (cl->level > q->toplevel) return NULL; } while (!PSCHED_IS_PASTPERFECT(cl->undertime) && PSCHED_TLESS(q->now, cl->undertime)); cl->delayed = 0; return cl;}static __inline__ struct sk_buff *cbq_dequeue_prio(struct Qdisc *sch, int prio){ struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; struct cbq_class *cl_tail, *cl_prev, *cl; struct sk_buff *skb; int deficit; cl_tail = cl_prev = q->active[prio]; cl = cl_prev->next_alive; do { deficit = 0; /* Start round */ do { struct cbq_class *borrow = cl; if (cl->q->q.qlen && (borrow = cbq_under_limit(cl)) == NULL) goto skip_class; if (cl->deficit <= 0) { /* Class exhausted its allotment per this round. Switch to the next one. */ deficit = 1; cl->deficit += cl->quantum; goto next_class; } skb = cl->q->dequeue(cl->q); /* Class did not give us any skb :-( It could occur even if cl->q->q.qlen != 0 f.e. if cl->q == "tbf" */ if (skb == NULL) goto skip_class; cl->deficit -= skb->len; q->tx_class = cl; q->tx_borrowed = borrow; if (borrow != cl) {#ifndef CBQ_XSTATS_BORROWS_BYTES borrow->xstats.borrows++; cl->xstats.borrows++;#else borrow->xstats.borrows += skb->len; cl->xstats.borrows += skb->len;#endif } q->tx_len = skb->len; if (cl->deficit <= 0) { q->active[prio] = cl; cl = cl->next_alive; cl->deficit += cl->quantum; } return skb;skip_class: if (cl->q->q.qlen == 0 || prio != cl->cpriority) { /* Class is empty or penalized. Unlink it from active chain. */ cl_prev->next_alive = cl->next_alive; cl->next_alive = NULL; /* Did cl_tail point to it? */ if (cl == cl_tail) { /* Repair it! */ cl_tail = cl_prev; /* Was it the last class in this band? */ if (cl == cl_tail) { /* Kill the band! */ q->active[prio] = NULL; q->activemask &= ~(1<<prio); if (cl->q->q.qlen) cbq_activate_class(cl); return NULL; } q->active[prio] = cl_tail; } if (cl->q->q.qlen) cbq_activate_class(cl); cl = cl_prev; }next_class: cl_prev = cl; cl = cl->next_alive; } while (cl_prev != cl_tail); } while (deficit); q->active[prio] = cl_prev; return NULL;}static __inline__ struct sk_buff *cbq_dequeue_1(struct Qdisc *sch){ struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; struct sk_buff *skb; unsigned activemask; activemask = q->activemask&0xFF; while (activemask) { int prio = ffz(~activemask); activemask &= ~(1<<prio); skb = cbq_dequeue_prio(sch, prio); if (skb) return skb; } return NULL;}static struct sk_buff *cbq_dequeue(struct Qdisc *sch){ struct sk_buff *skb; struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; psched_time_t now; psched_tdiff_t incr; PSCHED_GET_TIME(now); incr = PSCHED_TDIFF(now, q->now_rt); if (q->tx_class) { psched_tdiff_t incr2; /* Time integrator. We calculate EOS time by adding expected packet transmittion time. If real time is greater, we warp artificial clock, so that: cbq_time = max(real_time, work); */ incr2 = L2T(&q->link, q->tx_len); PSCHED_TADD(q->now, incr2); cbq_update(q); if ((incr -= incr2) < 0) incr = 0; } PSCHED_TADD(q->now, incr); q->now_rt = now; for (;;) { q->wd_expires = 0; skb = cbq_dequeue_1(sch); if (skb) { sch->q.qlen--; sch->flags &= ~TCQ_F_THROTTLED; return skb; } /* All the classes are overlimit. It is possible, if: 1. Scheduler is empty. 2. Toplevel cutoff inhibited borrowing. 3. Root class is overlimit. Reset 2d and 3d conditions and retry. Note, that NS and cbq-2.0 are buggy, peeking an arbitrary class is appropriate for ancestor-only sharing, but not for toplevel algorithm. Our version is better, but slower, because it requires two passes, but it is unavoidable with top-level sharing. */ if (q->toplevel == TC_CBQ_MAXLEVEL && PSCHED_IS_PASTPERFECT(q->link.undertime)) break; q->toplevel = TC_CBQ_MAXLEVEL; PSCHED_SET_PASTPERFECT(q->link.undertime); } /* No packets in scheduler or nobody wants to give them to us :-( Sigh... start watchdog timer in the last case. */ if (sch->q.qlen) { sch->stats.overlimits++; if (q->wd_expires && !netif_queue_stopped(sch->dev)) { long delay = PSCHED_US2JIFFIE(q->wd_expires); del_timer(&q->wd_timer);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -