sch_red.c

来自「Linux内核源代码为压缩文件是<<Linux内核>&gt」· C语言代码 · 共 502 行
502 行
/* * net/sched/sch_red.c	Random Early Detection queue. * *		This program is free software; you can redistribute it and/or *		modify it under the terms of the GNU General Public License *		as published by the Free Software Foundation; either version *		2 of the License, or (at your option) any later version. * * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * Changes: * J Hadi Salim <hadi@nortel.com> 980914:	computation fixes * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly. * J Hadi Salim <hadi@nortelnetworks.com> 980816:  ECN support	 */#include <linux/config.h>#include <linux/module.h>#include <asm/uaccess.h>#include <asm/system.h>#include <asm/bitops.h>#include <linux/types.h>#include <linux/kernel.h>#include <linux/sched.h>#include <linux/string.h>#include <linux/mm.h>#include <linux/socket.h>#include <linux/sockios.h>#include <linux/in.h>#include <linux/errno.h>#include <linux/interrupt.h>#include <linux/if_ether.h>#include <linux/inet.h>#include <linux/netdevice.h>#include <linux/etherdevice.h>#include <linux/notifier.h>#include <net/ip.h>#include <net/route.h>#include <linux/skbuff.h>#include <net/sock.h>#include <net/pkt_sched.h>#include <net/inet_ecn.h>#define RED_ECN_ECT  0x02#define RED_ECN_CE   0x01/*	Random Early Detection (RED) algorithm.	=======================================	Source: Sally Floyd and Van Jacobson, "Random Early Detection Gateways	for Congestion Avoidance", 1993, IEEE/ACM Transactions on Networking.	This file codes a "divisionless" version of RED algorithm	as written down in Fig.17 of the paper.Short description.------------------	When a new packet arrives we calculate the average queue length:	avg = (1-W)*avg + W*current_queue_len,	W is the filter time constant (choosen as 2^(-Wlog)), it controls	the inertia of the algorithm. To allow larger bursts, W should be	decreased.	if (avg > th_max) -> packet marked (dropped).	if (avg < th_min) -> packet passes.	if (th_min < avg < th_max) we calculate probability:	Pb = max_P * (avg - th_min)/(th_max-th_min)	and mark (drop) packet with this probability.	Pb changes from 0 (at avg==th_min) to max_P (avg==th_max).	max_P should be small (not 1), usually 0.01..0.02 is good value.	max_P is chosen as a number, so that max_P/(th_max-th_min)	is a negative power of two in order arithmetics to contain	only shifts.	Parameters, settable by user:	-----------------------------	limit		- bytes (must be > qth_max + burst)	Hard limit on queue length, should be chosen >qth_max	to allow packet bursts. This parameter does not	affect the algorithms behaviour and can be chosen	arbitrarily high (well, less than ram size)	Really, this limit will never be reached	if RED works correctly.	qth_min		- bytes (should be < qth_max/2)	qth_max		- bytes (should be at least 2*qth_min and less limit)	Wlog	       	- bits (<32) log(1/W).	Plog	       	- bits (<32)	Plog is related to max_P by formula:	max_P = (qth_max-qth_min)/2^Plog;	F.e. if qth_max=128K and qth_min=32K, then Plog=22	corresponds to max_P=0.02	Scell_log	Stab	Lookup table for log((1-W)^(t/t_ave).NOTES:Upper bound on W.-----------------	If you want to allow bursts of L packets of size S,	you should choose W:	L + 1 - th_min/S < (1-(1-W)^L)/W	th_min/S = 32         th_min/S = 4			                       	log(W)	L	-1	33	-2	35	-3	39	-4	46	-5	57	-6	75	-7	101	-8	135	-9	190	etc. */struct red_sched_data{/* Parameters */	u32		limit;		/* HARD maximal queue length	*/	u32		qth_min;	/* Min average length threshold: A scaled */	u32		qth_max;	/* Max average length threshold: A scaled */	u32		Rmask;	u32		Scell_max;	unsigned char	flags;	char		Wlog;		/* log(W)		*/	char		Plog;		/* random number bits	*/	char		Scell_log;	u8		Stab[256];/* Variables */	unsigned long	qave;		/* Average queue length: A scaled */	int		qcount;		/* Packets since last random number generation */	u32		qR;		/* Cached random number */	psched_time_t	qidlestart;	/* Start of idle period		*/	struct tc_red_xstats st;};static int red_ecn_mark(struct sk_buff *skb){	if (skb->nh.raw + 20 > skb->tail)		return 0;	switch (skb->protocol) {	case __constant_htons(ETH_P_IP):	{		u8 tos = skb->nh.iph->tos;		if (!(tos & RED_ECN_ECT))			return 0;		if (!(tos & RED_ECN_CE))			IP_ECN_set_ce(skb->nh.iph);		return 1;	}	case __constant_htons(ETH_P_IPV6):	{		u32 label = *(u32*)skb->nh.raw;		if (!(label & __constant_htonl(RED_ECN_ECT<<20)))			return 0;		label |= __constant_htonl(RED_ECN_CE<<20);		return 1;	}	default:		return 0;	}}static intred_enqueue(struct sk_buff *skb, struct Qdisc* sch){	struct red_sched_data *q = (struct red_sched_data *)sch->data;	psched_time_t now;	if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) {		long us_idle;		int  shift;		PSCHED_GET_TIME(now);		us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max, 0);		PSCHED_SET_PASTPERFECT(q->qidlestart);/*   The problem: ideally, average length queue recalcultion should   be done over constant clock intervals. This is too expensive, so that   the calculation is driven by outgoing packets.   When the queue is idle we have to model this clock by hand.   SF+VJ proposed to "generate" m = idletime/(average_pkt_size/bandwidth)   dummy packets as a burst after idle time, i.e.          q->qave *= (1-W)^m   This is an apparently overcomplicated solution (f.e. we have to precompute   a table to make this calculation in reasonable time)   I believe that a simpler model may be used here,   but it is field for experiments.*/		shift = q->Stab[us_idle>>q->Scell_log];		if (shift) {			q->qave >>= shift;		} else {			/* Approximate initial part of exponent			   with linear function:			   (1-W)^m ~= 1-mW + ...			   Seems, it is the best solution to			   problem of too coarce exponent tabulation.			 */			us_idle = (q->qave * us_idle)>>q->Scell_log;			if (us_idle < q->qave/2)				q->qave -= us_idle;			else				q->qave >>= 1;		}	} else {		q->qave += sch->stats.backlog - (q->qave >> q->Wlog);		/* NOTE:		   q->qave is fixed point number with point at Wlog.		   The formulae above is equvalent to floating point		   version:		   qave = qave*(1-W) + sch->stats.backlog*W;		                                           --ANK (980924)		 */	}	if (q->qave < q->qth_min) {		q->qcount = -1;enqueue:		if (sch->stats.backlog <= q->limit) {			__skb_queue_tail(&sch->q, skb);			sch->stats.backlog += skb->len;			sch->stats.bytes += skb->len;			sch->stats.packets++;			return NET_XMIT_SUCCESS;		} else {			q->st.pdrop++;		}		kfree_skb(skb);		sch->stats.drops++;		return NET_XMIT_DROP;	}	if (q->qave >= q->qth_max) {		q->qcount = -1;		sch->stats.overlimits++;mark:		if  (!(q->flags&TC_RED_ECN) || !red_ecn_mark(skb)) {			q->st.early++;			goto drop;		}		q->st.marked++;		goto enqueue;	}	if (++q->qcount) {		/* The formula used below causes questions.		   OK. qR is random number in the interval 0..Rmask		   i.e. 0..(2^Plog). If we used floating point		   arithmetics, it would be: (2^Plog)*rnd_num,		   where rnd_num is less 1.		   Taking into account, that qave have fixed		   point at Wlog, and Plog is related to max_P by		   max_P = (qth_max-qth_min)/2^Plog; two lines		   below have the following floating point equivalent:		   		   max_P*(qave - qth_min)/(qth_max-qth_min) < rnd/qcount		   Any questions? --ANK (980924)		 */		if (((q->qave - q->qth_min)>>q->Wlog)*q->qcount < q->qR)			goto enqueue;		q->qcount = 0;		q->qR = net_random()&q->Rmask;		sch->stats.overlimits++;		goto mark;	}	q->qR = net_random()&q->Rmask;	goto enqueue;drop:	kfree_skb(skb);	sch->stats.drops++;	return NET_XMIT_CN;}static intred_requeue(struct sk_buff *skb, struct Qdisc* sch){	struct red_sched_data *q = (struct red_sched_data *)sch->data;	PSCHED_SET_PASTPERFECT(q->qidlestart);	__skb_queue_head(&sch->q, skb);	sch->stats.backlog += skb->len;	return 0;}static struct sk_buff *red_dequeue(struct Qdisc* sch){	struct sk_buff *skb;	struct red_sched_data *q = (struct red_sched_data *)sch->data;	skb = __skb_dequeue(&sch->q);	if (skb) {		sch->stats.backlog -= skb->len;		return skb;	}	PSCHED_GET_TIME(q->qidlestart);	return NULL;}static intred_drop(struct Qdisc* sch){	struct sk_buff *skb;	struct red_sched_data *q = (struct red_sched_data *)sch->data;	skb = __skb_dequeue_tail(&sch->q);	if (skb) {		sch->stats.backlog -= skb->len;		sch->stats.drops++;		q->st.other++;		kfree_skb(skb);		return 1;	}	PSCHED_GET_TIME(q->qidlestart);	return 0;}static void red_reset(struct Qdisc* sch){	struct red_sched_data *q = (struct red_sched_data *)sch->data;	struct sk_buff *skb;	while((skb=__skb_dequeue(&sch->q))!=NULL)		kfree_skb(skb);	sch->stats.backlog = 0;	PSCHED_SET_PASTPERFECT(q->qidlestart);	q->qave = 0;	q->qcount = -1;}static int red_change(struct Qdisc *sch, struct rtattr *opt){	struct red_sched_data *q = (struct red_sched_data *)sch->data;	struct rtattr *tb[TCA_RED_STAB];	struct tc_red_qopt *ctl;	if (opt == NULL ||	    rtattr_parse(tb, TCA_RED_STAB, RTA_DATA(opt), RTA_PAYLOAD(opt)) ||	    tb[TCA_RED_PARMS-1] == 0 || tb[TCA_RED_STAB-1] == 0 ||	    RTA_PAYLOAD(tb[TCA_RED_PARMS-1]) < sizeof(*ctl) ||	    RTA_PAYLOAD(tb[TCA_RED_STAB-1]) < 256)		return -EINVAL;	ctl = RTA_DATA(tb[TCA_RED_PARMS-1]);	sch_tree_lock(sch);	q->flags = ctl->flags;	q->Wlog = ctl->Wlog;	q->Plog = ctl->Plog;	q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;	q->Scell_log = ctl->Scell_log;	q->Scell_max = (255<<q->Scell_log);	q->qth_min = ctl->qth_min<<ctl->Wlog;	q->qth_max = ctl->qth_max<<ctl->Wlog;	q->limit = ctl->limit;	memcpy(q->Stab, RTA_DATA(tb[TCA_RED_STAB-1]), 256);	q->qcount = -1;	if (skb_queue_len(&sch->q) == 0)		PSCHED_SET_PASTPERFECT(q->qidlestart);	sch_tree_unlock(sch);	return 0;}static int red_init(struct Qdisc* sch, struct rtattr *opt){	int err;	MOD_INC_USE_COUNT;	if ((err = red_change(sch, opt)) != 0) {		MOD_DEC_USE_COUNT;	}	return err;}#ifdef CONFIG_RTNETLINKint red_copy_xstats(struct sk_buff *skb, struct tc_red_xstats *st){        RTA_PUT(skb, TCA_XSTATS, sizeof(*st), st);        return 0;rtattr_failure:        return 1;}static int red_dump(struct Qdisc *sch, struct sk_buff *skb){	struct red_sched_data *q = (struct red_sched_data *)sch->data;	unsigned char	 *b = skb->tail;	struct rtattr *rta;	struct tc_red_qopt opt;	rta = (struct rtattr*)b;	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);	opt.limit = q->limit;	opt.qth_min = q->qth_min>>q->Wlog;	opt.qth_max = q->qth_max>>q->Wlog;	opt.Wlog = q->Wlog;	opt.Plog = q->Plog;	opt.Scell_log = q->Scell_log;	opt.flags = q->flags;	RTA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);	rta->rta_len = skb->tail - b;	if (red_copy_xstats(skb, &q->st))		goto rtattr_failure;	return skb->len;rtattr_failure:	skb_trim(skb, b - skb->data);	return -1;}#endifstatic void red_destroy(struct Qdisc *sch){	MOD_DEC_USE_COUNT;}struct Qdisc_ops red_qdisc_ops ={	NULL,	NULL,	"red",	sizeof(struct red_sched_data),	red_enqueue,	red_dequeue,	red_requeue,	red_drop,	red_init,	red_reset,	red_destroy,	red_change,#ifdef CONFIG_RTNETLINK	red_dump,#endif};#ifdef MODULEint init_module(void){	return register_qdisc(&red_qdisc_ops);}void cleanup_module(void) {	unregister_qdisc(&red_qdisc_ops);}#endif
sch_red.c - 源码说明

本页面展示了「Linux内核源代码为压缩文件是<<Linux内核>>一书中的源代码」中的 sch_red.c 源码文件，采用 C语言编程语言编写，共 502 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Linux相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?