📄 tcp_minisocks.c

📁 Linux Kernel 2.6.9 for OMAP1710
💻 C
📖 第 1 页 / 共 3 页
字号:
		tw->tw_ts_recent_stamp	= tp->ts_recent_stamp;		tw_dead_node_init(tw);#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)		if (tw->tw_family == PF_INET6) {			struct ipv6_pinfo *np = inet6_sk(sk);			ipv6_addr_copy(&tw->tw_v6_daddr, &np->daddr);			ipv6_addr_copy(&tw->tw_v6_rcv_saddr, &np->rcv_saddr);			tw->tw_v6_ipv6only = np->ipv6only;		} else {			memset(&tw->tw_v6_daddr, 0, sizeof(tw->tw_v6_daddr));			memset(&tw->tw_v6_rcv_saddr, 0, sizeof(tw->tw_v6_rcv_saddr));			tw->tw_v6_ipv6only = 0;		}#endif		/* Linkage updates. */		__tcp_tw_hashdance(sk, tw);		/* Get the TIME_WAIT timeout firing. */		if (timeo < rto)			timeo = rto;		if (recycle_ok) {			tw->tw_timeout = rto;		} else {			tw->tw_timeout = TCP_TIMEWAIT_LEN;			if (state == TCP_TIME_WAIT)				timeo = TCP_TIMEWAIT_LEN;		}		tcp_tw_schedule(tw, timeo);		tcp_tw_put(tw);	} else {		/* Sorry, if we're out of memory, just CLOSE this		 * socket up.  We've got bigger problems than		 * non-graceful socket closings.		 */		if (net_ratelimit())			printk(KERN_INFO "TCP: time wait bucket table overflow\n");	}	tcp_update_metrics(sk);	tcp_done(sk);}/* Kill off TIME_WAIT sockets once their lifetime has expired. */static int tcp_tw_death_row_slot;static void tcp_twkill(unsigned long);/* TIME_WAIT reaping mechanism. */#define TCP_TWKILL_SLOTS	8	/* Please keep this a power of 2. */#define TCP_TWKILL_PERIOD	(TCP_TIMEWAIT_LEN/TCP_TWKILL_SLOTS)#define TCP_TWKILL_QUOTA	100static struct hlist_head tcp_tw_death_row[TCP_TWKILL_SLOTS];static spinlock_t tw_death_lock = SPIN_LOCK_UNLOCKED;static struct timer_list tcp_tw_timer = TIMER_INITIALIZER(tcp_twkill, 0, 0);static void twkill_work(void *);static DECLARE_WORK(tcp_twkill_work, twkill_work, NULL);static u32 twkill_thread_slots;/* Returns non-zero if quota exceeded.  */static int tcp_do_twkill_work(int slot, unsigned int quota){	struct tcp_tw_bucket *tw;	struct hlist_node *node;	unsigned int killed;	int ret;	/* NOTE: compare this to previous version where lock	 * was released after detaching chain. It was racy,	 * because tw buckets are scheduled in not serialized context	 * in 2.3 (with netfilter), and with softnet it is common, because	 * soft irqs are not sequenced.	 */	killed = 0;	ret = 0;rescan:	tw_for_each_inmate(tw, node, &tcp_tw_death_row[slot]) {		__tw_del_dead_node(tw);		spin_unlock(&tw_death_lock);		tcp_timewait_kill(tw);		tcp_tw_put(tw);		killed++;		spin_lock(&tw_death_lock);		if (killed > quota) {			ret = 1;			break;		}		/* While we dropped tw_death_lock, another cpu may have		 * killed off the next TW bucket in the list, therefore		 * do a fresh re-read of the hlist head node with the		 * lock reacquired.  We still use the hlist traversal		 * macro in order to get the prefetches.		 */		goto rescan;	}	tcp_tw_count -= killed;	NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed);	return ret;}static void tcp_twkill(unsigned long dummy){	int need_timer, ret;	spin_lock(&tw_death_lock);	if (tcp_tw_count == 0)		goto out;	need_timer = 0;	ret = tcp_do_twkill_work(tcp_tw_death_row_slot, TCP_TWKILL_QUOTA);	if (ret) {		twkill_thread_slots |= (1 << tcp_tw_death_row_slot);		mb();		schedule_work(&tcp_twkill_work);		need_timer = 1;	} else {		/* We purged the entire slot, anything left?  */		if (tcp_tw_count)			need_timer = 1;	}	tcp_tw_death_row_slot =		((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1));	if (need_timer)		mod_timer(&tcp_tw_timer, jiffies + TCP_TWKILL_PERIOD);out:	spin_unlock(&tw_death_lock);}extern void twkill_slots_invalid(void);static void twkill_work(void *dummy){	int i;	if ((TCP_TWKILL_SLOTS - 1) > (sizeof(twkill_thread_slots) * 8))		twkill_slots_invalid();	while (twkill_thread_slots) {		spin_lock_bh(&tw_death_lock);		for (i = 0; i < TCP_TWKILL_SLOTS; i++) {			if (!(twkill_thread_slots & (1 << i)))				continue;			while (tcp_do_twkill_work(i, TCP_TWKILL_QUOTA) != 0) {				if (need_resched()) {					spin_unlock_bh(&tw_death_lock);					schedule();					spin_lock_bh(&tw_death_lock);				}			}			twkill_thread_slots &= ~(1 << i);		}		spin_unlock_bh(&tw_death_lock);	}}/* These are always called from BH context.  See callers in * tcp_input.c to verify this. *//* This is for handling early-kills of TIME_WAIT sockets. */void tcp_tw_deschedule(struct tcp_tw_bucket *tw){	spin_lock(&tw_death_lock);	if (tw_del_dead_node(tw)) {		tcp_tw_put(tw);		if (--tcp_tw_count == 0)			del_timer(&tcp_tw_timer);	}	spin_unlock(&tw_death_lock);	tcp_timewait_kill(tw);}/* Short-time timewait calendar */static int tcp_twcal_hand = -1;static int tcp_twcal_jiffie;static void tcp_twcal_tick(unsigned long);static struct timer_list tcp_twcal_timer =		TIMER_INITIALIZER(tcp_twcal_tick, 0, 0);static struct hlist_head tcp_twcal_row[TCP_TW_RECYCLE_SLOTS];void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo){	struct hlist_head *list;	int slot;	/* timeout := RTO * 3.5	 *	 * 3.5 = 1+2+0.5 to wait for two retransmits.	 *	 * RATIONALE: if FIN arrived and we entered TIME-WAIT state,	 * our ACK acking that FIN can be lost. If N subsequent retransmitted	 * FINs (or previous seqments) are lost (probability of such event	 * is p^(N+1), where p is probability to lose single packet and	 * time to detect the loss is about RTO*(2^N - 1) with exponential	 * backoff). Normal timewait length is calculated so, that we	 * waited at least for one retransmitted FIN (maximal RTO is 120sec).	 * [ BTW Linux. following BSD, violates this requirement waiting	 *   only for 60sec, we should wait at least for 240 secs.	 *   Well, 240 consumes too much of resources 8)	 * ]	 * This interval is not reduced to catch old duplicate and	 * responces to our wandering segments living for two MSLs.	 * However, if we use PAWS to detect	 * old duplicates, we can reduce the interval to bounds required	 * by RTO, rather than MSL. So, if peer understands PAWS, we	 * kill tw bucket after 3.5*RTO (it is important that this number	 * is greater than TS tick!) and detect old duplicates with help	 * of PAWS.	 */	slot = (timeo + (1<<TCP_TW_RECYCLE_TICK) - 1) >> TCP_TW_RECYCLE_TICK;	spin_lock(&tw_death_lock);	/* Unlink it, if it was scheduled */	if (tw_del_dead_node(tw))		tcp_tw_count--;	else		atomic_inc(&tw->tw_refcnt);	if (slot >= TCP_TW_RECYCLE_SLOTS) {		/* Schedule to slow timer */		if (timeo >= TCP_TIMEWAIT_LEN) {			slot = TCP_TWKILL_SLOTS-1;		} else {			slot = (timeo + TCP_TWKILL_PERIOD-1) / TCP_TWKILL_PERIOD;			if (slot >= TCP_TWKILL_SLOTS)				slot = TCP_TWKILL_SLOTS-1;		}		tw->tw_ttd = jiffies + timeo;		slot = (tcp_tw_death_row_slot + slot) & (TCP_TWKILL_SLOTS - 1);		list = &tcp_tw_death_row[slot];	} else {		tw->tw_ttd = jiffies + (slot << TCP_TW_RECYCLE_TICK);		if (tcp_twcal_hand < 0) {			tcp_twcal_hand = 0;			tcp_twcal_jiffie = jiffies;			tcp_twcal_timer.expires = tcp_twcal_jiffie + (slot<<TCP_TW_RECYCLE_TICK);			add_timer(&tcp_twcal_timer);		} else {			if (time_after(tcp_twcal_timer.expires, jiffies + (slot<<TCP_TW_RECYCLE_TICK)))				mod_timer(&tcp_twcal_timer, jiffies + (slot<<TCP_TW_RECYCLE_TICK));			slot = (tcp_twcal_hand + slot)&(TCP_TW_RECYCLE_SLOTS-1);		}		list = &tcp_twcal_row[slot];	}	hlist_add_head(&tw->tw_death_node, list);	if (tcp_tw_count++ == 0)		mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD);	spin_unlock(&tw_death_lock);}void tcp_twcal_tick(unsigned long dummy){	int n, slot;	unsigned long j;	unsigned long now = jiffies;	int killed = 0;	int adv = 0;	spin_lock(&tw_death_lock);	if (tcp_twcal_hand < 0)		goto out;	slot = tcp_twcal_hand;	j = tcp_twcal_jiffie;	for (n=0; n<TCP_TW_RECYCLE_SLOTS; n++) {		if (time_before_eq(j, now)) {			struct hlist_node *node, *safe;			struct tcp_tw_bucket *tw;			tw_for_each_inmate_safe(tw, node, safe,					   &tcp_twcal_row[slot]) {				__tw_del_dead_node(tw);				tcp_timewait_kill(tw);				tcp_tw_put(tw);				killed++;			}		} else {			if (!adv) {				adv = 1;				tcp_twcal_jiffie = j;				tcp_twcal_hand = slot;			}			if (!hlist_empty(&tcp_twcal_row[slot])) {				mod_timer(&tcp_twcal_timer, j);				goto out;			}		}		j += (1<<TCP_TW_RECYCLE_TICK);		slot = (slot+1)&(TCP_TW_RECYCLE_SLOTS-1);	}	tcp_twcal_hand = -1;out:	if ((tcp_tw_count -= killed) == 0)		del_timer(&tcp_tw_timer);	NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed);	spin_unlock(&tw_death_lock);}/* This is not only more efficient than what we used to do, it eliminates * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM * * Actually, we could lots of memory writes here. tp of listening * socket contains all necessary default parameters. */struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, struct sk_buff *skb){	/* allocate the newsk from the same slab of the master sock,	 * if not, at sk_free time we'll try to free it from the wrong	 * slabcache (i.e. is it TCPv4 or v6?) -acme */	struct sock *newsk = sk_alloc(PF_INET, GFP_ATOMIC, 0, sk->sk_prot->slab);	if(newsk != NULL) {		struct tcp_opt *newtp;		struct sk_filter *filter;		memcpy(newsk, sk, sizeof(struct tcp_sock));		newsk->sk_state = TCP_SYN_RECV;		/* SANITY */		sk_node_init(&newsk->sk_node);		tcp_sk(newsk)->bind_hash = NULL;		/* Clone the TCP header template */		inet_sk(newsk)->dport = req->rmt_port;		sock_lock_init(newsk);		bh_lock_sock(newsk);		newsk->sk_dst_lock = RW_LOCK_UNLOCKED;		atomic_set(&newsk->sk_rmem_alloc, 0);		skb_queue_head_init(&newsk->sk_receive_queue);		atomic_set(&newsk->sk_wmem_alloc, 0);		skb_queue_head_init(&newsk->sk_write_queue);		atomic_set(&newsk->sk_omem_alloc, 0);		newsk->sk_wmem_queued = 0;		newsk->sk_forward_alloc = 0;		sock_reset_flag(newsk, SOCK_DONE);		newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;		newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;		newsk->sk_send_head = NULL;		newsk->sk_callback_lock = RW_LOCK_UNLOCKED;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -