📄 200-sched_esfq.patch
字号:
Index: linux-2.6.21.7/include/linux/pkt_sched.h===================================================================--- linux-2.6.21.7.orig/include/linux/pkt_sched.h+++ linux-2.6.21.7/include/linux/pkt_sched.h@@ -146,8 +146,40 @@ struct tc_sfq_qopt * * The only reason for this is efficiency, it is possible * to change these parameters in compile time.+ * + * If you need to play with these values use esfq instead. */ +/* ESFQ section */++enum+{+ /* traditional */+ TCA_SFQ_HASH_CLASSIC,+ TCA_SFQ_HASH_DST,+ TCA_SFQ_HASH_SRC,+ TCA_SFQ_HASH_FWMARK,+ /* direct */+ TCA_SFQ_HASH_DSTDIR,+ TCA_SFQ_HASH_SRCDIR,+ TCA_SFQ_HASH_FWMARKDIR,+ /* conntrack */+ TCA_SFQ_HASH_CTORIGDST,+ TCA_SFQ_HASH_CTORIGSRC,+ TCA_SFQ_HASH_CTREPLDST,+ TCA_SFQ_HASH_CTREPLSRC,+};++struct tc_esfq_qopt+{+ unsigned quantum; /* Bytes per round allocated to flow */+ int perturb_period; /* Period of hash perturbation */+ __u32 limit; /* Maximal packets in queue */+ unsigned divisor; /* Hash divisor */+ unsigned flows; /* Maximal number of flows */+ unsigned hash_kind; /* Hash function to use for flow identification */+};+ /* RED section */ enumIndex: linux-2.6.21.7/net/sched/Kconfig===================================================================--- linux-2.6.21.7.orig/net/sched/Kconfig+++ linux-2.6.21.7/net/sched/Kconfig@@ -189,6 +189,26 @@ config NET_SCH_SFQ To compile this code as a module, choose M here: the module will be called sch_sfq. +config NET_SCH_ESFQ+ tristate "Enhanced Stochastic Fairness Queueing (ESFQ)"+ ---help---+ Say Y here if you want to use the Enhanced Stochastic Fairness+ Queueing (ESFQ) packet scheduling algorithm for some of your network+ devices or as a leaf discipline for a classful qdisc such as HTB or+ CBQ (see the top of <file:net/sched/sch_esfq.c> for details and+ references to the SFQ algorithm).++ This is an enchanced SFQ version which allows you to control some+ hardcoded values in the SFQ scheduler.++ ESFQ also adds control of the hash function used to identify packet+ flows. The original SFQ discipline hashes by connection; ESFQ add+ several other hashing methods, such as by src IP or by dst IP, which+ can be more fair to users in some networking situations.+ + To compile this code as a module, choose M here: the+ module will be called sch_esfq.+ config NET_SCH_TEQL tristate "True Link Equalizer (TEQL)" ---help---Index: linux-2.6.21.7/net/sched/Makefile===================================================================--- linux-2.6.21.7.orig/net/sched/Makefile+++ linux-2.6.21.7/net/sched/Makefile@@ -23,6 +23,7 @@ obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o+obj-$(CONFIG_NET_SCH_ESFQ) += sch_esfq.o obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.oIndex: linux-2.6.21.7/net/sched/sch_esfq.c===================================================================--- /dev/null+++ linux-2.6.21.7/net/sched/sch_esfq.c@@ -0,0 +1,704 @@+/*+ * net/sched/sch_esfq.c Extended Stochastic Fairness Queueing discipline.+ *+ * This program is free software; you can redistribute it and/or+ * modify it under the terms of the GNU General Public License+ * as published by the Free Software Foundation; either version+ * 2 of the License, or (at your option) any later version.+ *+ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>+ *+ * Changes: Alexander Atanasov, <alex@ssi.bg>+ * Added dynamic depth,limit,divisor,hash_kind options.+ * Added dst and src hashes.+ *+ * Alexander Clouter, <alex@digriz.org.uk>+ * Ported ESFQ to Linux 2.6.+ *+ * Corey Hickey, <bugfood-c@fatooh.org>+ * Maintenance of the Linux 2.6 port.+ * Added fwmark hash (thanks to Robert Kurjata).+ * Added direct hashing for src, dst, and fwmark.+ * Added usage of jhash.+ * + */++#include <linux/module.h>+#include <asm/uaccess.h>+#include <asm/system.h>+#include <linux/bitops.h>+#include <linux/types.h>+#include <linux/kernel.h>+#include <linux/jiffies.h>+#include <linux/string.h>+#include <linux/mm.h>+#include <linux/socket.h>+#include <linux/sockios.h>+#include <linux/in.h>+#include <linux/errno.h>+#include <linux/interrupt.h>+#include <linux/if_ether.h>+#include <linux/inet.h>+#include <linux/netdevice.h>+#include <linux/etherdevice.h>+#include <linux/notifier.h>+#include <linux/init.h>+#include <net/ip.h>+#include <linux/ipv6.h>+#include <net/route.h>+#include <linux/skbuff.h>+#include <net/sock.h>+#include <net/pkt_sched.h>+#include <linux/jhash.h>++#ifdef CONFIG_NF_CONNTRACK_ENABLED+#include <net/netfilter/nf_conntrack.h>+#endif++/* Stochastic Fairness Queuing algorithm.+ For more comments look at sch_sfq.c.+ The difference is that you can change limit, depth,+ hash table size and choose alternate hash types.+ + classic: same as in sch_sfq.c+ dst: destination IP address+ src: source IP address+ fwmark: netfilter mark value+ dst_direct:+ src_direct:+ fwmark_direct: direct hashing of the above sources+ ctorigdst: original destination IP address+ ctorigsrc: original source IP address+ ctrepldst: reply destination IP address+ ctreplsrc: reply source IP + +*/+++/* This type should contain at least SFQ_DEPTH*2 values */+typedef unsigned int esfq_index;++struct esfq_head+{+ esfq_index next;+ esfq_index prev;+};++struct esfq_sched_data+{+/* Parameters */+ int perturb_period;+ unsigned quantum; /* Allotment per round: MUST BE >= MTU */+ int limit;+ unsigned depth;+ unsigned hash_divisor;+ unsigned hash_kind;+/* Variables */+ struct timer_list perturb_timer;+ int perturbation;+ esfq_index tail; /* Index of current slot in round */+ esfq_index max_depth; /* Maximal depth */++ esfq_index *ht; /* Hash table */+ esfq_index *next; /* Active slots link */+ short *allot; /* Current allotment per slot */+ unsigned short *hash; /* Hash value indexed by slots */+ struct sk_buff_head *qs; /* Slot queue */+ struct esfq_head *dep; /* Linked list of slots, indexed by depth */+ unsigned dyn_min; /* For dynamic divisor adjustment; minimum value seen */+ unsigned dyn_max; /* maximum value seen */+ unsigned dyn_range; /* saved range */+};++/* This contains the info we will hash. */+struct esfq_packet_info+{+ u32 proto; /* protocol or port */+ u32 src; /* source from packet header */+ u32 dst; /* destination from packet header */+ u32 ctorigsrc; /* original source from conntrack */+ u32 ctorigdst; /* original destination from conntrack */+ u32 ctreplsrc; /* reply source from conntrack */+ u32 ctrepldst; /* reply destination from conntrack */+ u32 mark; /* netfilter mark (fwmark) */+};++/* Hash input values directly into the "nearest" slot, taking into account the+ * range of input values seen. This is most useful when the hash table is at+ * least as large as the range of possible values.+ * Note: this functionality was added before the change to using jhash, and may+ * no longer be useful. */+static __inline__ unsigned esfq_hash_direct(struct esfq_sched_data *q, u32 h)+{+ /* adjust minimum and maximum */+ if (h < q->dyn_min || h > q->dyn_max) {+ q->dyn_min = h < q->dyn_min ? h : q->dyn_min;+ q->dyn_max = h > q->dyn_max ? h : q->dyn_max;+ + /* find new range */+ if ((q->dyn_range = q->dyn_max - q->dyn_min) >= q->hash_divisor)+ printk(KERN_WARNING "ESFQ: (direct hash) Input range %u is larger than hash "+ "table. See ESFQ README for details.\n", q->dyn_range);+ }+ + /* hash input values into slot numbers */+ if (q->dyn_min == q->dyn_max)+ return 0; /* only one value seen; avoid division by 0 */+ else+ return (h - q->dyn_min) * (q->hash_divisor - 1) / q->dyn_range;+}++static __inline__ unsigned esfq_jhash_1word(struct esfq_sched_data *q,u32 a)+{+ return jhash_1word(a, q->perturbation) & (q->hash_divisor-1);+}++static __inline__ unsigned esfq_jhash_2words(struct esfq_sched_data *q, u32 a, u32 b)+{+ return jhash_2words(a, b, q->perturbation) & (q->hash_divisor-1);+}++static __inline__ unsigned esfq_jhash_3words(struct esfq_sched_data *q, u32 a, u32 b, u32 c)+{+ return jhash_3words(a, b, c, q->perturbation) & (q->hash_divisor-1);+}+++static unsigned esfq_hash(struct esfq_sched_data *q, struct sk_buff *skb)+{+ struct esfq_packet_info info;+#ifdef CONFIG_NF_CONNTRACK_ENABLED+ enum ip_conntrack_info ctinfo;+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);+#endif+ + switch (skb->protocol) {+ case __constant_htons(ETH_P_IP):+ {+ struct iphdr *iph = skb->nh.iph;+ info.dst = iph->daddr;+ info.src = iph->saddr;+ if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&+ (iph->protocol == IPPROTO_TCP ||+ iph->protocol == IPPROTO_UDP ||+ iph->protocol == IPPROTO_SCTP ||+ iph->protocol == IPPROTO_DCCP ||+ iph->protocol == IPPROTO_ESP))+ info.proto = *(((u32*)iph) + iph->ihl);+ else+ info.proto = iph->protocol;+ break;+ }+ case __constant_htons(ETH_P_IPV6):+ {+ struct ipv6hdr *iph = skb->nh.ipv6h;+ /* Hash ipv6 addresses into a u32. This isn't ideal,+ * but the code is simple. */+ info.dst = jhash2(iph->daddr.s6_addr32, 4, q->perturbation);+ info.src = jhash2(iph->saddr.s6_addr32, 4, q->perturbation);+ if (iph->nexthdr == IPPROTO_TCP ||+ iph->nexthdr == IPPROTO_UDP ||+ iph->nexthdr == IPPROTO_SCTP ||+ iph->nexthdr == IPPROTO_DCCP ||+ iph->nexthdr == IPPROTO_ESP)+ info.proto = *(u32*)&iph[1];+ else+ info.proto = iph->nexthdr;+ break;+ }+ default:+ info.dst = (u32)(unsigned long)skb->dst;+ info.src = (u32)(unsigned long)skb->sk;+ info.proto = skb->protocol;+ }++ info.mark = skb->mark;++#ifdef CONFIG_NF_CONNTRACK_ENABLED+ /* defaults if there is no conntrack info */+ info.ctorigsrc = info.src;+ info.ctorigdst = info.dst;+ info.ctreplsrc = info.dst;+ info.ctrepldst = info.src;+ /* collect conntrack info */+ if (ct && ct != &nf_conntrack_untracked) {+ if (skb->protocol == __constant_htons(ETH_P_IP)) {+ info.ctorigsrc = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;+ info.ctorigdst = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip;+ info.ctreplsrc = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip;+ info.ctrepldst = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip;+ }+ else if (skb->protocol == __constant_htons(ETH_P_IPV6)) {+ /* Again, hash ipv6 addresses into a single u32. */+ info.ctorigsrc = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6, 4, q->perturbation);+ info.ctorigdst = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip6, 4, q->perturbation);+ info.ctreplsrc = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6, 4, q->perturbation);+ info.ctrepldst = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6, 4, q->perturbation);+ }++ }+#endif++ switch(q->hash_kind)+ {+ case TCA_SFQ_HASH_CLASSIC:+ return esfq_jhash_3words(q, info.dst, info.src, info.proto);+ case TCA_SFQ_HASH_DST:+ return esfq_jhash_1word(q, info.dst);+ case TCA_SFQ_HASH_DSTDIR:+ return esfq_hash_direct(q, ntohl(info.dst));+ case TCA_SFQ_HASH_SRC:+ return esfq_jhash_1word(q, info.src);+ case TCA_SFQ_HASH_SRCDIR:+ return esfq_hash_direct(q, ntohl(info.src));+ case TCA_SFQ_HASH_FWMARK:+ return esfq_jhash_1word(q, info.mark);+ case TCA_SFQ_HASH_FWMARKDIR:+ return esfq_hash_direct(q, info.mark);+#ifdef CONFIG_NF_CONNTRACK_ENABLED+ case TCA_SFQ_HASH_CTORIGDST:+ return esfq_jhash_1word(q, info.ctorigdst);+ case TCA_SFQ_HASH_CTORIGSRC:+ return esfq_jhash_1word(q, info.ctorigsrc);+ case TCA_SFQ_HASH_CTREPLDST:+ return esfq_jhash_1word(q, info.ctrepldst);+ case TCA_SFQ_HASH_CTREPLSRC:+ return esfq_jhash_1word(q, info.ctreplsrc);+#endif+ default:+ if (net_ratelimit())+ printk(KERN_WARNING "ESFQ: Unknown hash method. Falling back to classic.\n");+ }+ return esfq_jhash_3words(q, info.dst, info.src, info.proto);+}++static inline void esfq_link(struct esfq_sched_data *q, esfq_index x)+{+ esfq_index p, n;+ int d = q->qs[x].qlen + q->depth;++ p = d;+ n = q->dep[d].next;+ q->dep[x].next = n;+ q->dep[x].prev = p;+ q->dep[p].next = q->dep[n].prev = x;+}++static inline void esfq_dec(struct esfq_sched_data *q, esfq_index x)+{+ esfq_index p, n;++ n = q->dep[x].next;+ p = q->dep[x].prev;+ q->dep[p].next = n;+ q->dep[n].prev = p;++ if (n == p && q->max_depth == q->qs[x].qlen + 1)+ q->max_depth--;++ esfq_link(q, x);+}++static inline void esfq_inc(struct esfq_sched_data *q, esfq_index x)+{+ esfq_index p, n;+ int d;+
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -