📄 sch_hfsc.c
字号:
/* * Copyright (c) 2003 Patrick McHardy, <kaber@trash.net> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * 2003-10-17 - Ported from altq *//* * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved. * * Permission to use, copy, modify, and distribute this software and * its documentation is hereby granted (including for commercial or * for-profit use), provided that both the copyright notice and this * permission notice appear in all copies of the software, derivative * works, or modified versions, and any portions thereof. * * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * * Carnegie Mellon encourages (but does not require) users of this * software to return any improvements or extensions that they make, * and to grant Carnegie Mellon the rights to redistribute these * changes without encumbrance. *//* * H-FSC is described in Proceedings of SIGCOMM'97, * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing, * Real-Time and Priority Service" * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng. * * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing. * when a class has an upperlimit, the fit-time is computed from the * upperlimit service curve. the link-sharing scheduler does not schedule * a class whose fit-time exceeds the current time. */#include <linux/kernel.h>#include <linux/module.h>#include <linux/types.h>#include <linux/errno.h>#include <linux/compiler.h>#include <linux/spinlock.h>#include <linux/skbuff.h>#include <linux/string.h>#include <linux/slab.h>#include <linux/list.h>#include <linux/rbtree.h>#include <linux/init.h>#include <linux/rtnetlink.h>#include <linux/pkt_sched.h>#include <net/netlink.h>#include <net/pkt_sched.h>#include <net/pkt_cls.h>#include <asm/div64.h>/* * kernel internal service curve representation: * coordinates are given by 64 bit unsigned integers. * x-axis: unit is clock count. * y-axis: unit is byte. * * The service curve parameters are converted to the internal * representation. The slope values are scaled to avoid overflow. * the inverse slope values as well as the y-projection of the 1st * segment are kept in order to to avoid 64-bit divide operations * that are expensive on 32-bit architectures. */struct internal_sc{ u64 sm1; /* scaled slope of the 1st segment */ u64 ism1; /* scaled inverse-slope of the 1st segment */ u64 dx; /* the x-projection of the 1st segment */ u64 dy; /* the y-projection of the 1st segment */ u64 sm2; /* scaled slope of the 2nd segment */ u64 ism2; /* scaled inverse-slope of the 2nd segment */};/* runtime service curve */struct runtime_sc{ u64 x; /* current starting position on x-axis */ u64 y; /* current starting position on y-axis */ u64 sm1; /* scaled slope of the 1st segment */ u64 ism1; /* scaled inverse-slope of the 1st segment */ u64 dx; /* the x-projection of the 1st segment */ u64 dy; /* the y-projection of the 1st segment */ u64 sm2; /* scaled slope of the 2nd segment */ u64 ism2; /* scaled inverse-slope of the 2nd segment */};enum hfsc_class_flags{ HFSC_RSC = 0x1, HFSC_FSC = 0x2, HFSC_USC = 0x4};struct hfsc_class{ u32 classid; /* class id */ unsigned int refcnt; /* usage count */ struct gnet_stats_basic bstats; struct gnet_stats_queue qstats; struct gnet_stats_rate_est rate_est; unsigned int level; /* class level in hierarchy */ struct tcf_proto *filter_list; /* filter list */ unsigned int filter_cnt; /* filter count */ struct hfsc_sched *sched; /* scheduler data */ struct hfsc_class *cl_parent; /* parent class */ struct list_head siblings; /* sibling classes */ struct list_head children; /* child classes */ struct Qdisc *qdisc; /* leaf qdisc */ struct rb_node el_node; /* qdisc's eligible tree member */ struct rb_root vt_tree; /* active children sorted by cl_vt */ struct rb_node vt_node; /* parent's vt_tree member */ struct rb_root cf_tree; /* active children sorted by cl_f */ struct rb_node cf_node; /* parent's cf_heap member */ struct list_head hlist; /* hash list member */ struct list_head dlist; /* drop list member */ u64 cl_total; /* total work in bytes */ u64 cl_cumul; /* cumulative work in bytes done by real-time criteria */ u64 cl_d; /* deadline*/ u64 cl_e; /* eligible time */ u64 cl_vt; /* virtual time */ u64 cl_f; /* time when this class will fit for link-sharing, max(myf, cfmin) */ u64 cl_myf; /* my fit-time (calculated from this class's own upperlimit curve) */ u64 cl_myfadj; /* my fit-time adjustment (to cancel history dependence) */ u64 cl_cfmin; /* earliest children's fit-time (used with cl_myf to obtain cl_f) */ u64 cl_cvtmin; /* minimal virtual time among the children fit for link-sharing (monotonic within a period) */ u64 cl_vtadj; /* intra-period cumulative vt adjustment */ u64 cl_vtoff; /* inter-period cumulative vt offset */ u64 cl_cvtmax; /* max child's vt in the last period */ u64 cl_cvtoff; /* cumulative cvtmax of all periods */ u64 cl_pcvtoff; /* parent's cvtoff at initialization time */ struct internal_sc cl_rsc; /* internal real-time service curve */ struct internal_sc cl_fsc; /* internal fair service curve */ struct internal_sc cl_usc; /* internal upperlimit service curve */ struct runtime_sc cl_deadline; /* deadline curve */ struct runtime_sc cl_eligible; /* eligible curve */ struct runtime_sc cl_virtual; /* virtual curve */ struct runtime_sc cl_ulimit; /* upperlimit curve */ unsigned long cl_flags; /* which curves are valid */ unsigned long cl_vtperiod; /* vt period sequence number */ unsigned long cl_parentperiod;/* parent's vt period sequence number*/ unsigned long cl_nactive; /* number of active children */};#define HFSC_HSIZE 16struct hfsc_sched{ u16 defcls; /* default class id */ struct hfsc_class root; /* root class */ struct list_head clhash[HFSC_HSIZE]; /* class hash */ struct rb_root eligible; /* eligible tree */ struct list_head droplist; /* active leaf class list (for dropping) */ struct sk_buff_head requeue; /* requeued packet */ struct qdisc_watchdog watchdog; /* watchdog timer */};#define HT_INFINITY 0xffffffffffffffffULL /* infinite time value *//* * eligible tree holds backlogged classes being sorted by their eligible times. * there is one eligible tree per hfsc instance. */static voideltree_insert(struct hfsc_class *cl){ struct rb_node **p = &cl->sched->eligible.rb_node; struct rb_node *parent = NULL; struct hfsc_class *cl1; while (*p != NULL) { parent = *p; cl1 = rb_entry(parent, struct hfsc_class, el_node); if (cl->cl_e >= cl1->cl_e) p = &parent->rb_right; else p = &parent->rb_left; } rb_link_node(&cl->el_node, parent, p); rb_insert_color(&cl->el_node, &cl->sched->eligible);}static inline voideltree_remove(struct hfsc_class *cl){ rb_erase(&cl->el_node, &cl->sched->eligible);}static inline voideltree_update(struct hfsc_class *cl){ eltree_remove(cl); eltree_insert(cl);}/* find the class with the minimum deadline among the eligible classes */static inline struct hfsc_class *eltree_get_mindl(struct hfsc_sched *q, u64 cur_time){ struct hfsc_class *p, *cl = NULL; struct rb_node *n; for (n = rb_first(&q->eligible); n != NULL; n = rb_next(n)) { p = rb_entry(n, struct hfsc_class, el_node); if (p->cl_e > cur_time) break; if (cl == NULL || p->cl_d < cl->cl_d) cl = p; } return cl;}/* find the class with minimum eligible time among the eligible classes */static inline struct hfsc_class *eltree_get_minel(struct hfsc_sched *q){ struct rb_node *n; n = rb_first(&q->eligible); if (n == NULL) return NULL; return rb_entry(n, struct hfsc_class, el_node);}/* * vttree holds holds backlogged child classes being sorted by their virtual * time. each intermediate class has one vttree. */static voidvttree_insert(struct hfsc_class *cl){ struct rb_node **p = &cl->cl_parent->vt_tree.rb_node; struct rb_node *parent = NULL; struct hfsc_class *cl1; while (*p != NULL) { parent = *p; cl1 = rb_entry(parent, struct hfsc_class, vt_node); if (cl->cl_vt >= cl1->cl_vt) p = &parent->rb_right; else p = &parent->rb_left; } rb_link_node(&cl->vt_node, parent, p); rb_insert_color(&cl->vt_node, &cl->cl_parent->vt_tree);}static inline voidvttree_remove(struct hfsc_class *cl){ rb_erase(&cl->vt_node, &cl->cl_parent->vt_tree);}static inline voidvttree_update(struct hfsc_class *cl){ vttree_remove(cl); vttree_insert(cl);}static inline struct hfsc_class *vttree_firstfit(struct hfsc_class *cl, u64 cur_time){ struct hfsc_class *p; struct rb_node *n; for (n = rb_first(&cl->vt_tree); n != NULL; n = rb_next(n)) { p = rb_entry(n, struct hfsc_class, vt_node); if (p->cl_f <= cur_time) return p; } return NULL;}/* * get the leaf class with the minimum vt in the hierarchy */static struct hfsc_class *vttree_get_minvt(struct hfsc_class *cl, u64 cur_time){ /* if root-class's cfmin is bigger than cur_time nothing to do */ if (cl->cl_cfmin > cur_time) return NULL; while (cl->level > 0) { cl = vttree_firstfit(cl, cur_time); if (cl == NULL) return NULL; /* * update parent's cl_cvtmin. */ if (cl->cl_parent->cl_cvtmin < cl->cl_vt) cl->cl_parent->cl_cvtmin = cl->cl_vt; } return cl;}static voidcftree_insert(struct hfsc_class *cl){ struct rb_node **p = &cl->cl_parent->cf_tree.rb_node; struct rb_node *parent = NULL; struct hfsc_class *cl1; while (*p != NULL) { parent = *p; cl1 = rb_entry(parent, struct hfsc_class, cf_node); if (cl->cl_f >= cl1->cl_f) p = &parent->rb_right; else p = &parent->rb_left; } rb_link_node(&cl->cf_node, parent, p); rb_insert_color(&cl->cf_node, &cl->cl_parent->cf_tree);}static inline voidcftree_remove(struct hfsc_class *cl){ rb_erase(&cl->cf_node, &cl->cl_parent->cf_tree);}static inline voidcftree_update(struct hfsc_class *cl){ cftree_remove(cl); cftree_insert(cl);}/* * service curve support functions * * external service curve parameters * m: bps * d: us * internal service curve parameters * sm: (bytes/psched_us) << SM_SHIFT * ism: (psched_us/byte) << ISM_SHIFT * dx: psched_us * * The clock source resolution with ktime is 1.024us. * * sm and ism are scaled in order to keep effective digits. * SM_SHIFT and ISM_SHIFT are selected to keep at least 4 effective * digits in decimal using the following table. * * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps * ------------+------------------------------------------------------- * bytes/1.024us 12.8e-3 128e-3 1280e-3 12800e-3 128000e-3 * * 1.024us/byte 78.125 7.8125 0.78125 0.078125 0.0078125 */#define SM_SHIFT 20#define ISM_SHIFT 18#define SM_MASK ((1ULL << SM_SHIFT) - 1)#define ISM_MASK ((1ULL << ISM_SHIFT) - 1)static inline u64seg_x2y(u64 x, u64 sm){ u64 y; /* * compute * y = x * sm >> SM_SHIFT * but divide it for the upper and lower bits to avoid overflow */ y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT); return y;}static inline u64seg_y2x(u64 y, u64 ism){ u64 x; if (y == 0) x = 0; else if (ism == HT_INFINITY) x = HT_INFINITY; else { x = (y >> ISM_SHIFT) * ism + (((y & ISM_MASK) * ism) >> ISM_SHIFT); } return x;}/* Convert m (bps) into sm (bytes/psched us) */static u64m2sm(u32 m){ u64 sm; sm = ((u64)m << SM_SHIFT); sm += PSCHED_TICKS_PER_SEC - 1; do_div(sm, PSCHED_TICKS_PER_SEC); return sm;}/* convert m (bps) into ism (psched us/byte) */static u64m2ism(u32 m){ u64 ism; if (m == 0) ism = HT_INFINITY; else { ism = ((u64)PSCHED_TICKS_PER_SEC << ISM_SHIFT); ism += m - 1; do_div(ism, m); } return ism;}/* convert d (us) into dx (psched us) */static u64d2dx(u32 d){ u64 dx; dx = ((u64)d * PSCHED_TICKS_PER_SEC); dx += USEC_PER_SEC - 1; do_div(dx, USEC_PER_SEC); return dx;}/* convert sm (bytes/psched us) into m (bps) */static u32sm2m(u64 sm){ u64 m; m = (sm * PSCHED_TICKS_PER_SEC) >> SM_SHIFT; return (u32)m;}/* convert dx (psched us) into d (us) */static u32dx2d(u64 dx){ u64 d; d = dx * USEC_PER_SEC; do_div(d, PSCHED_TICKS_PER_SEC); return (u32)d;}static voidsc2isc(struct tc_service_curve *sc, struct internal_sc *isc){ isc->sm1 = m2sm(sc->m1); isc->ism1 = m2ism(sc->m1); isc->dx = d2dx(sc->d); isc->dy = seg_x2y(isc->dx, isc->sm1); isc->sm2 = m2sm(sc->m2); isc->ism2 = m2ism(sc->m2);}/* * initialize the runtime service curve with the given internal * service curve starting at (x, y). */static voidrtsc_init(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y){ rtsc->x = x; rtsc->y = y; rtsc->sm1 = isc->sm1; rtsc->ism1 = isc->ism1; rtsc->dx = isc->dx; rtsc->dy = isc->dy; rtsc->sm2 = isc->sm2; rtsc->ism2 = isc->ism2;}/* * calculate the y-projection of the runtime service curve by the * given x-projection value */static u64rtsc_y2x(struct runtime_sc *rtsc, u64 y){ u64 x; if (y < rtsc->y) x = rtsc->x; else if (y <= rtsc->y + rtsc->dy) { /* x belongs to the 1st segment */ if (rtsc->dy == 0) x = rtsc->x + rtsc->dx; else x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1); } else { /* x belongs to the 2nd segment */ x = rtsc->x + rtsc->dx + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2); } return x;}static u64rtsc_x2y(struct runtime_sc *rtsc, u64 x){ u64 y; if (x <= rtsc->x) y = rtsc->y; else if (x <= rtsc->x + rtsc->dx) /* y belongs to the 1st segment */ y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1); else /* y belongs to the 2nd segment */ y = rtsc->y + rtsc->dy + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2); return y;}/* * update the runtime service curve by taking the minimum of the current * runtime service curve and the service curve starting at (x, y). */static voidrtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y){ u64 y1, y2, dx, dy; u32 dsm; if (isc->sm1 <= isc->sm2) { /* service curve is convex */ y1 = rtsc_x2y(rtsc, x); if (y1 < y) /* the current rtsc is smaller */ return; rtsc->x = x; rtsc->y = y; return; } /* * service curve is concave * compute the two y values of the current rtsc * y1: at x * y2: at (x + dx)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -