tcp_minisocks.c

来自「Linux内核源代码为压缩文件是<<Linux内核>&gt」· C语言代码 · 共 975 行 · 第 1/2 页
975 行
	}	spin_unlock(&tw_death_lock);}/* Short-time timewait calendar */static int tcp_twcal_hand = -1;static int tcp_twcal_jiffie;static void tcp_twcal_tick(unsigned long);static struct timer_list tcp_twcal_timer = {function: tcp_twcal_tick};static struct tcp_tw_bucket *tcp_twcal_row[TCP_TW_RECYCLE_SLOTS];void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo){	struct tcp_tw_bucket **tpp;	int slot;	/* timeout := RTO * 3.5	 *	 * 3.5 = 1+2+0.5 to wait for two retransmits.	 *	 * RATIONALE: if FIN arrived and we entered TIME-WAIT state,	 * our ACK acking that FIN can be lost. If N subsequent retransmitted	 * FINs (or previous seqments) are lost (probability of such event	 * is p^(N+1), where p is probability to lose single packet and	 * time to detect the loss is about RTO*(2^N - 1) with exponential	 * backoff). Normal timewait length is calculated so, that we	 * waited at least for one retransmitted FIN (maximal RTO is 120sec).	 * [ BTW Linux. following BSD, violates this requirement waiting	 *   only for 60sec, we should wait at least for 240 secs.	 *   Well, 240 consumes too much of resources 8)	 * ]	 * This interval is not reduced to catch old duplicate and	 * responces to our wandering segments living for two MSLs.	 * However, if we use PAWS to detect	 * old duplicates, we can reduce the interval to bounds required	 * by RTO, rather than MSL. So, if peer understands PAWS, we	 * kill tw bucket after 3.5*RTO (it is important that this number	 * is greater than TS tick!) and detect old duplicates with help	 * of PAWS.	 */	slot = (timeo + (1<<TCP_TW_RECYCLE_TICK) - 1) >> TCP_TW_RECYCLE_TICK;	spin_lock(&tw_death_lock);	/* Unlink it, if it was scheduled */	if (tw->pprev_death) {		if(tw->next_death)			tw->next_death->pprev_death = tw->pprev_death;		*tw->pprev_death = tw->next_death;		tw->pprev_death = NULL;		tcp_tw_count--;	} else		atomic_inc(&tw->refcnt);	if (slot >= TCP_TW_RECYCLE_SLOTS) {		/* Schedule to slow timer */		if (timeo >= TCP_TIMEWAIT_LEN) {			slot = TCP_TWKILL_SLOTS-1;		} else {			slot = (timeo + TCP_TWKILL_PERIOD-1) / TCP_TWKILL_PERIOD;			if (slot >= TCP_TWKILL_SLOTS)				slot = TCP_TWKILL_SLOTS-1;		}		tw->ttd = jiffies + timeo;		slot = (tcp_tw_death_row_slot + slot) & (TCP_TWKILL_SLOTS - 1);		tpp = &tcp_tw_death_row[slot];	} else {		tw->ttd = jiffies + (slot<<TCP_TW_RECYCLE_TICK);		if (tcp_twcal_hand < 0) {			tcp_twcal_hand = 0;			tcp_twcal_jiffie = jiffies;			tcp_twcal_timer.expires = tcp_twcal_jiffie + (slot<<TCP_TW_RECYCLE_TICK);			add_timer(&tcp_twcal_timer);		} else {			if ((long)(tcp_twcal_timer.expires - jiffies) > (slot<<TCP_TW_RECYCLE_TICK))				mod_timer(&tcp_twcal_timer, jiffies + (slot<<TCP_TW_RECYCLE_TICK));			slot = (tcp_twcal_hand + slot)&(TCP_TW_RECYCLE_SLOTS-1);		}		tpp = &tcp_twcal_row[slot];	}	if((tw->next_death = *tpp) != NULL)		(*tpp)->pprev_death = &tw->next_death;	*tpp = tw;	tw->pprev_death = tpp;	if (tcp_tw_count++ == 0)		mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD);	spin_unlock(&tw_death_lock);}void SMP_TIMER_NAME(tcp_twcal_tick)(unsigned long dummy){	int n, slot;	unsigned long j;	unsigned long now = jiffies;	int killed = 0;	int adv = 0;	spin_lock(&tw_death_lock);	if (tcp_twcal_hand < 0)		goto out;	slot = tcp_twcal_hand;	j = tcp_twcal_jiffie;	for (n=0; n<TCP_TW_RECYCLE_SLOTS; n++) {		if ((long)(j - now) <= 0) {			struct tcp_tw_bucket *tw;			while((tw = tcp_twcal_row[slot]) != NULL) {				tcp_twcal_row[slot] = tw->next_death;				tw->pprev_death = NULL;				tcp_timewait_kill(tw);				tcp_tw_put(tw);				killed++;			}		} else {			if (!adv) {				adv = 1;				tcp_twcal_jiffie = j;				tcp_twcal_hand = slot;			}			if (tcp_twcal_row[slot] != NULL) {				mod_timer(&tcp_twcal_timer, j);				goto out;			}		}		j += (1<<TCP_TW_RECYCLE_TICK);		slot = (slot+1)&(TCP_TW_RECYCLE_SLOTS-1);	}	tcp_twcal_hand = -1;out:	if ((tcp_tw_count -= killed) == 0)		del_timer(&tcp_tw_timer);	net_statistics[smp_processor_id()*2].TimeWaitKilled += killed;	spin_unlock(&tw_death_lock);}SMP_TIMER_DEFINE(tcp_twcal_tick, tcp_twcal_tasklet);/* This is not only more efficient than what we used to do, it eliminates * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM * * Actually, we could lots of memory writes here. tp of listening * socket contains all necessary default parameters. */struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, struct sk_buff *skb){	struct sock *newsk = sk_alloc(PF_INET, GFP_ATOMIC, 0);	if(newsk != NULL) {		struct tcp_opt *newtp;#ifdef CONFIG_FILTER		struct sk_filter *filter;#endif		memcpy(newsk, sk, sizeof(*newsk));		newsk->state = TCP_SYN_RECV;		/* SANITY */		newsk->pprev = NULL;		newsk->prev = NULL;		/* Clone the TCP header template */		newsk->dport = req->rmt_port;		sock_lock_init(newsk);		bh_lock_sock(newsk);		newsk->dst_lock	= RW_LOCK_UNLOCKED;		atomic_set(&newsk->rmem_alloc, 0);		skb_queue_head_init(&newsk->receive_queue);		atomic_set(&newsk->wmem_alloc, 0);		skb_queue_head_init(&newsk->write_queue);		atomic_set(&newsk->omem_alloc, 0);		newsk->wmem_queued = 0;		newsk->forward_alloc = 0;		newsk->done = 0;		newsk->userlocks = sk->userlocks & ~SOCK_BINDPORT_LOCK;		newsk->proc = 0;		newsk->backlog.head = newsk->backlog.tail = NULL;		newsk->callback_lock = RW_LOCK_UNLOCKED;		skb_queue_head_init(&newsk->error_queue);		newsk->write_space = tcp_write_space;#ifdef CONFIG_FILTER		if ((filter = newsk->filter) != NULL)			sk_filter_charge(newsk, filter);#endif		/* Now setup tcp_opt */		newtp = &(newsk->tp_pinfo.af_tcp);		newtp->pred_flags = 0;		newtp->rcv_nxt = req->rcv_isn + 1;		newtp->snd_nxt = req->snt_isn + 1;		newtp->snd_una = req->snt_isn + 1;		newtp->snd_sml = req->snt_isn + 1;		tcp_delack_init(newtp);		tcp_prequeue_init(newtp);		tcp_init_wl(newtp, req->snt_isn, req->rcv_isn);		newtp->retransmits = 0;		newtp->backoff = 0;		newtp->srtt = 0;		newtp->mdev = TCP_TIMEOUT_INIT;		newtp->rto = TCP_TIMEOUT_INIT;		newtp->packets_out = 0;		newtp->left_out = 0;		newtp->retrans_out = 0;		newtp->sacked_out = 0;		newtp->fackets_out = 0;		newtp->snd_ssthresh = 0x7fffffff;		/* So many TCP implementations out there (incorrectly) count the		 * initial SYN frame in their delayed-ACK and congestion control		 * algorithms that we must have the following bandaid to talk		 * efficiently to them.  -DaveM		 */		newtp->snd_cwnd = 2;		newtp->snd_cwnd_cnt = 0;		newtp->ca_state = TCP_CA_Open;		tcp_init_xmit_timers(newsk);		skb_queue_head_init(&newtp->out_of_order_queue);		newtp->send_head = NULL;		newtp->rcv_wup = req->rcv_isn + 1;		newtp->write_seq = req->snt_isn + 1;		newtp->pushed_seq = newtp->write_seq;		newtp->copied_seq = req->rcv_isn + 1;		newtp->saw_tstamp = 0;		newtp->dsack = 0;		newtp->eff_sacks = 0;		newtp->probes_out = 0;		newtp->num_sacks = 0;		newtp->syn_seq = req->rcv_isn;		newtp->fin_seq = req->rcv_isn;		newtp->urg_data = 0;		newtp->listen_opt = NULL;		newtp->accept_queue = newtp->accept_queue_tail = NULL;		/* Deinitialize syn_wait_lock to trap illegal accesses. */		memset(&newtp->syn_wait_lock, 0, sizeof(newtp->syn_wait_lock));		/* Back to base struct sock members. */		newsk->err = 0;		newsk->priority = 0;		atomic_set(&newsk->refcnt, 2);#ifdef INET_REFCNT_DEBUG		atomic_inc(&inet_sock_nr);#endif		atomic_inc(&tcp_sockets_allocated);		if (newsk->keepopen)			tcp_reset_keepalive_timer(newsk, keepalive_time_when(newtp));		newsk->socket = NULL;		newsk->sleep = NULL;		newtp->tstamp_ok = req->tstamp_ok;		if((newtp->sack_ok = req->sack_ok) != 0) {			if (sysctl_tcp_fack)				newtp->sack_ok |= 2;		}		newtp->window_clamp = req->window_clamp;		newtp->rcv_ssthresh = req->rcv_wnd;		newtp->rcv_wnd = req->rcv_wnd;		newtp->wscale_ok = req->wscale_ok;		if (newtp->wscale_ok) {			newtp->snd_wscale = req->snd_wscale;			newtp->rcv_wscale = req->rcv_wscale;		} else {			newtp->snd_wscale = newtp->rcv_wscale = 0;			newtp->window_clamp = min(newtp->window_clamp,65535);		}		newtp->snd_wnd = ntohs(skb->h.th->window) << newtp->snd_wscale;		newtp->max_window = newtp->snd_wnd;		if (newtp->tstamp_ok) {			newtp->ts_recent = req->ts_recent;			newtp->ts_recent_stamp = xtime.tv_sec;			newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;		} else {			newtp->ts_recent_stamp = 0;			newtp->tcp_header_len = sizeof(struct tcphdr);		}		if (skb->len >= TCP_MIN_RCVMSS+newtp->tcp_header_len)			newtp->ack.last_seg_size = skb->len-newtp->tcp_header_len;		newtp->mss_clamp = req->mss;		TCP_ECN_openreq_child(newtp, req);	}	return newsk;}/*  *	Process an incoming packet for SYN_RECV sockets represented *	as an open_request. */struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,			   struct open_request *req,			   struct open_request **prev){	struct tcphdr *th = skb->h.th;	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);	u32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);	int paws_reject = 0;	struct tcp_opt ttp;	struct sock *child;	ttp.saw_tstamp = 0;	if (th->doff > (sizeof(struct tcphdr)>>2)) {		tcp_parse_options(skb, &ttp, 0);		if (ttp.saw_tstamp) {			ttp.ts_recent = req->ts_recent;			/* We do not store true stamp, but it is not required,			 * it can be estimated (approximately)			 * from another data.			 */			ttp.ts_recent_stamp = xtime.tv_sec - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);			paws_reject = tcp_paws_check(&ttp, th->rst);		}	}	/* Check for pure retransmited SYN. */	if (TCP_SKB_CB(skb)->seq == req->rcv_isn &&	    flg == TCP_FLAG_SYN &&	    !paws_reject) {		/*		 * RFC793 draws (Incorrectly! It was fixed in RFC1122)		 * this case on figure 6 and figure 8, but formal		 * protocol description says NOTHING.		 * To be more exact, it says that we should send ACK,		 * because this segment (at least, if it has no data)		 * is out of window.		 *		 *  CONCLUSION: RFC793 (even with RFC1122) DOES NOT		 *  describe SYN-RECV state. All the description		 *  is wrong, we cannot believe to it and should		 *  rely only on common sense and implementation		 *  experience.		 *		 * Enforce "SYN-ACK" according to figure 8, figure 6		 * of RFC793, fixed by RFC1122.		 */		req->class->rtx_syn_ack(sk, req, NULL);		return NULL;	}	/* Further reproduces section "SEGMENT ARRIVES"	   for state SYN-RECEIVED of RFC793.	   It is broken, however, it does not work only	   when SYNs are crossed, which is impossible in our	   case.	   But generally, we should (RFC lies!) to accept ACK	   from SYNACK both here and in tcp_rcv_state_process().	   tcp_rcv_state_process() does not, hence, we do not too.	   Note that the case is absolutely generic:	   we cannot optimize anything here without	   violating protocol. All the checks must be made	   before attempt to create socket.	 */	/* RFC793: "first check sequence number". */	if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,					  req->rcv_isn+1, req->rcv_isn+1+req->rcv_wnd)) {		/* Out of window: send ACK and drop. */		if (!(flg & TCP_FLAG_RST))			req->class->send_ack(skb, req);		if (paws_reject)			NET_INC_STATS_BH(PAWSEstabRejected);		return NULL;	}	/* In sequence, PAWS is OK. */	if (ttp.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, req->rcv_isn+1))		req->ts_recent = ttp.rcv_tsval;	if (TCP_SKB_CB(skb)->seq == req->rcv_isn) {		/* Truncate SYN, it is out of window starting		   at req->rcv_isn+1. */		flg &= ~TCP_FLAG_SYN;	}	/* RFC793: "second check the RST bit" and	 *	   "fourth, check the SYN bit"	 */	if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN))		goto embryonic_reset;	/* RFC793: "fifth check the ACK field" */	if (!(flg & TCP_FLAG_ACK))		return NULL;	/* Invalid ACK: reset will be sent by listening socket */	if (TCP_SKB_CB(skb)->ack_seq != req->snt_isn+1)		return sk;	/* Also, it would be not so bad idea to check rcv_tsecr, which	 * is essentially ACK extension and too early or too late values	 * should cause reset in unsynchronized states.	 */	/* If TCP_DEFER_ACCEPT is set, drop bare ACK. */	if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == req->rcv_isn+1) {		req->acked = 1;		return NULL;	}	/* OK, ACK is valid, create big socket and	 * feed this segment to it. It will repeat all	 * the tests. THIS SEGMENT MUST MOVE SOCKET TO	 * ESTABLISHED STATE. If it will be dropped after	 * socket is created, wait for troubles.	 */	child = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);	if (child == NULL)		goto listen_overflow;	tcp_synq_unlink(tp, req, prev);	tcp_synq_removed(sk, req);	tcp_acceptq_queue(sk, req, child);	return child;listen_overflow:	if (!sysctl_tcp_abort_on_overflow) {		req->acked = 1;		return NULL;	}embryonic_reset:	NET_INC_STATS_BH(EmbryonicRsts);	if (!(flg & TCP_FLAG_RST))		req->class->send_reset(skb);	tcp_synq_drop(sk, req, prev);	return NULL;}/* * Queue segment on the new socket if the new socket is active, * otherwise we just shortcircuit this and continue with * the new socket. */int tcp_child_process(struct sock *parent, struct sock *child,		      struct sk_buff *skb){	int ret = 0;	int state = child->state;	if (child->lock.users == 0) {		ret = tcp_rcv_state_process(child, skb, skb->h.th, skb->len);		/* Wakeup parent, send SIGIO */		if (state == TCP_SYN_RECV && child->state != state)			parent->data_ready(parent, 0);	} else {		/* Alas, it is possible again, because we do lookup		 * in main socket hash table and lock on listening		 * socket does not protect us more.		 */		sk_add_backlog(child, skb);	}	bh_unlock_sock(child);	sock_put(child);	return ret;}
tcp_minisocks.c - 源码说明

本页面展示了「Linux内核源代码为压缩文件是<<Linux内核>>一书中的源代码」中的 tcp_minisocks.c 源码文件，采用 C语言编程语言编写，共 975 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Linux相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?