tcp_input.c

来自「基于组件方式开发操作系统的OSKIT源代码」· C语言 代码 · 共 2,144 行 · 第 1/4 页

C
2,144
字号
/* * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994 *	The Regents of the University of California.  All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright *    notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright *    notice, this list of conditions and the following disclaimer in the *    documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software *    must display the following acknowledgement: *	This product includes software developed by the University of *	California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors *    may be used to endorse or promote products derived from this software *    without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * *	From: @(#)tcp_input.c	8.5 (Berkeley) 4/10/94 */#ifndef TUBA_INCLUDE#include <sys/param.h>#include <sys/systm.h>#include <sys/malloc.h>#include <sys/mbuf.h>#include <sys/protosw.h>#include <sys/socket.h>#include <sys/socketvar.h>#include <sys/errno.h>#include <sys/queue.h>#include <sys/kernel.h>#include <net/if.h>#include <net/route.h>#include <netinet/in.h>#include <netinet/in_systm.h>#include <netinet/ip.h>#include <netinet/in_pcb.h>#include <netinet/ip_var.h>#include <netinet/tcp.h>#include <netinet/tcp_fsm.h>#include <netinet/tcp_seq.h>#include <netinet/tcp_timer.h>#include <netinet/tcp_var.h>#include <netinet/tcpip.h>#ifdef TCPDEBUG#include <netinet/tcp_debug.h>struct	tcpiphdr tcp_saveti;#endifint	tcprexmtthresh = 3;tcp_seq	tcp_iss;tcp_cc	tcp_ccgen;struct	tcpstat tcpstat;u_long	tcp_now;struct inpcbhead tcb;struct inpcbinfo tcbinfo;#endif /* TUBA_INCLUDE *//* * Insert segment ti into reassembly queue of tcp with * control block tp.  Return TH_FIN if reassembly now includes * a segment with FIN.  The macro form does the common case inline * (segment is the next to be received on an established connection, * and the queue is empty), avoiding linkage into and removal * from the queue and repetition of various conversions. * Set DELACK for segments received in order, but ack immediately * when segments are out of order (so fast retransmit can work). */#ifdef TCP_ACK_HACK#define	TCP_REASS(tp, ti, m, so, flags) { \	if ((ti)->ti_seq == (tp)->rcv_nxt && \	    (tp)->seg_next == (struct tcpiphdr *)(tp) && \	    (tp)->t_state == TCPS_ESTABLISHED) { \		if (ti->ti_flags & TH_PUSH) \			tp->t_flags |= TF_ACKNOW; \		else \			tp->t_flags |= TF_DELACK; \		(tp)->rcv_nxt += (ti)->ti_len; \		flags = (ti)->ti_flags & TH_FIN; \		tcpstat.tcps_rcvpack++;\		tcpstat.tcps_rcvbyte += (ti)->ti_len;\		sbappend(&(so)->so_rcv, (m)); \		sorwakeup(so); \	} else { \		(flags) = tcp_reass((tp), (ti), (m)); \		tp->t_flags |= TF_ACKNOW; \	} \}#else#define	TCP_REASS(tp, ti, m, so, flags) { \	if ((ti)->ti_seq == (tp)->rcv_nxt && \	    (tp)->seg_next == (struct tcpiphdr *)(tp) && \	    (tp)->t_state == TCPS_ESTABLISHED) { \		tp->t_flags |= TF_DELACK; \		(tp)->rcv_nxt += (ti)->ti_len; \		flags = (ti)->ti_flags & TH_FIN; \		tcpstat.tcps_rcvpack++;\		tcpstat.tcps_rcvbyte += (ti)->ti_len;\		sbappend(&(so)->so_rcv, (m)); \		sorwakeup(so); \	} else { \		(flags) = tcp_reass((tp), (ti), (m)); \		tp->t_flags |= TF_ACKNOW; \	} \}#endif#ifndef TUBA_INCLUDEinttcp_reass(tp, ti, m)	register struct tcpcb *tp;	register struct tcpiphdr *ti;	struct mbuf *m;{	register struct tcpiphdr *q;	struct socket *so = tp->t_inpcb->inp_socket;	int flags;	/*	 * Call with ti==0 after become established to	 * force pre-ESTABLISHED data up to user socket.	 */	if (ti == 0)		goto present;	/*	 * Find a segment which begins after this one does.	 */	for (q = tp->seg_next; q != (struct tcpiphdr *)tp;	    q = (struct tcpiphdr *)q->ti_next)		if (SEQ_GT(q->ti_seq, ti->ti_seq))			break;	/*	 * If there is a preceding segment, it may provide some of	 * our data already.  If so, drop the data from the incoming	 * segment.  If it provides all of our data, drop us.	 */	if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) {		register int i;		q = (struct tcpiphdr *)q->ti_prev;		/* conversion to int (in i) handles seq wraparound */		i = q->ti_seq + q->ti_len - ti->ti_seq;		if (i > 0) {			if (i >= ti->ti_len) {				tcpstat.tcps_rcvduppack++;				tcpstat.tcps_rcvdupbyte += ti->ti_len;				m_freem(m);				/*				 * Try to present any queued data				 * at the left window edge to the user.				 * This is needed after the 3-WHS				 * completes.				 */				goto present;	/* ??? */			}			m_adj(m, i);			ti->ti_len -= i;			ti->ti_seq += i;		}		q = (struct tcpiphdr *)(q->ti_next);	}	tcpstat.tcps_rcvoopack++;	tcpstat.tcps_rcvoobyte += ti->ti_len;	REASS_MBUF(ti) = m;		/* XXX */	/*	 * While we overlap succeeding segments trim them or,	 * if they are completely covered, dequeue them.	 */	while (q != (struct tcpiphdr *)tp) {		register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq;		if (i <= 0)			break;		if (i < q->ti_len) {			q->ti_seq += i;			q->ti_len -= i;			m_adj(REASS_MBUF(q), i);			break;		}		q = (struct tcpiphdr *)q->ti_next;		m = REASS_MBUF((struct tcpiphdr *)q->ti_prev);		remque(q->ti_prev);		m_freem(m);	}	/*	 * Stick new segment in its place.	 */	insque(ti, q->ti_prev);present:	/*	 * Present data to user, advancing rcv_nxt through	 * completed sequence space.	 */	if (!TCPS_HAVEESTABLISHED(tp->t_state))		return (0);	ti = tp->seg_next;	if (ti == (struct tcpiphdr *)tp || ti->ti_seq != tp->rcv_nxt)		return (0);	do {		tp->rcv_nxt += ti->ti_len;		flags = ti->ti_flags & TH_FIN;		remque(ti);		m = REASS_MBUF(ti);		ti = (struct tcpiphdr *)ti->ti_next;		if (so->so_state & SS_CANTRCVMORE)			m_freem(m);		else			sbappend(&so->so_rcv, m);	} while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt);	sorwakeup(so);	return (flags);}/* * TCP input routine, follows pages 65-76 of the * protocol specification dated September, 1981 very closely. */voidtcp_input(m, iphlen)	register struct mbuf *m;	int iphlen;{	register struct tcpiphdr *ti;	register struct inpcb *inp;	caddr_t optp = NULL;	int optlen = 0;	int len, tlen, off;	register struct tcpcb *tp = 0;	register int tiflags;	struct socket *so = 0;	int todrop, acked, ourfinisacked, needoutput = 0;	struct in_addr laddr;	int dropsocket = 0;	int iss = 0;	u_long tiwin;	struct tcpopt to;		/* options in this segment */	struct rmxp_tao *taop;		/* pointer to our TAO cache entry */	struct rmxp_tao	tao_noncached;	/* in case there's no cached entry */#ifdef TCPDEBUG	short ostate = 0;#endif	bzero((char *)&to, sizeof(to));	tcpstat.tcps_rcvtotal++;	/*	 * Get IP and TCP header together in first mbuf.	 * Note: IP leaves IP header in first mbuf.	 */	ti = mtod(m, struct tcpiphdr *);	if (iphlen > sizeof (struct ip))		ip_stripoptions(m, (struct mbuf *)0);	if (m->m_len < sizeof (struct tcpiphdr)) {		if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) {			tcpstat.tcps_rcvshort++;			return;		}		ti = mtod(m, struct tcpiphdr *);	}	/*	 * Checksum extended TCP header and data.	 */	tlen = ((struct ip *)ti)->ip_len;	len = sizeof (struct ip) + tlen;	ti->ti_next = ti->ti_prev = 0;	ti->ti_x1 = 0;	ti->ti_len = (u_short)tlen;	HTONS(ti->ti_len);	ti->ti_sum = in_cksum(m, len);	if (ti->ti_sum) {		tcpstat.tcps_rcvbadsum++;		goto drop;	}#endif /* TUBA_INCLUDE */	/*	 * Check that TCP offset makes sense,	 * pull out TCP options and adjust length.		XXX	 */	off = ti->ti_off << 2;	if (off < sizeof (struct tcphdr) || off > tlen) {		tcpstat.tcps_rcvbadoff++;		goto drop;	}	tlen -= off;	ti->ti_len = tlen;	if (off > sizeof (struct tcphdr)) {		if (m->m_len < sizeof(struct ip) + off) {			if ((m = m_pullup(m, sizeof (struct ip) + off)) == 0) {				tcpstat.tcps_rcvshort++;				return;			}			ti = mtod(m, struct tcpiphdr *);		}		optlen = off - sizeof (struct tcphdr);		optp = mtod(m, caddr_t) + sizeof (struct tcpiphdr);	}	tiflags = ti->ti_flags;	/*	 * Convert TCP protocol specific fields to host format.	 */	NTOHL(ti->ti_seq);	NTOHL(ti->ti_ack);	NTOHS(ti->ti_win);	NTOHS(ti->ti_urp);	/*	 * Drop TCP, IP headers and TCP options.	 */	m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);	m->m_len  -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);	/*	 * Locate pcb for segment.	 */findpcb:	/*	 * First look for an exact match.	 */	inp = in_pcblookuphash(&tcbinfo, ti->ti_src, ti->ti_sport,	    ti->ti_dst, ti->ti_dport);	/*	 * ...and if that fails, do a wildcard search.	 */	if (inp == NULL) {		inp = in_pcblookup(&tcb, ti->ti_src, ti->ti_sport,		    ti->ti_dst, ti->ti_dport, INPLOOKUP_WILDCARD);	}	/*	 * If the state is CLOSED (i.e., TCB does not exist) then	 * all data in the incoming segment is discarded.	 * If the TCB exists but is in CLOSED state, it is embryonic,	 * but should either do a listen or a connect soon.	 */	if (inp == NULL)		goto dropwithreset;	tp = intotcpcb(inp);	if (tp == 0)		goto dropwithreset;	if (tp->t_state == TCPS_CLOSED)		goto drop;	/* Unscale the window into a 32-bit value. */	if ((tiflags & TH_SYN) == 0)		tiwin = ti->ti_win << tp->snd_scale;	else		tiwin = ti->ti_win;	so = inp->inp_socket;	if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) {#ifdef TCPDEBUG		if (so->so_options & SO_DEBUG) {			ostate = tp->t_state;			tcp_saveti = *ti;		}#endif		if (so->so_options & SO_ACCEPTCONN) {			register struct tcpcb *tp0 = tp;			struct socket *so2;			if ((tiflags & (TH_RST|TH_ACK|TH_SYN)) != TH_SYN) {				/*				 * Note: dropwithreset makes sure we don't				 * send a RST in response to a RST.				 */				if (tiflags & TH_ACK) {					tcpstat.tcps_badsyn++;					goto dropwithreset;				}				goto drop;			}			so2 = sonewconn(so, 0);			if (so2 == 0) {				unsigned int i, j, qlen;				static int rnd;				static long old_mono_secs;				static unsigned int cur_cnt, old_cnt;				tcpstat.tcps_listendrop++;				/*				 * Keep a decaying average of the number				 * of overruns we've been getting.				 */				if ((i = (mono_time.tv_sec -					  old_mono_secs)) != 0) {					old_mono_secs = mono_time.tv_sec;					old_cnt = cur_cnt / i;					cur_cnt = 0;				}				so2 = so->so_q0;				if (so2 == 0)					goto drop;				/*				 * If we've been getting a lot of hits,				 * random drop an incomplete connection				 * from the queue, otherwise, fall through				 * so we head-drop from the queue.				 */				qlen = so->so_q0len;				if (++cur_cnt > qlen || old_cnt > qlen) {					rnd = (314159 * rnd + 66329) & 0xffff;					j = ((qlen + 1) * rnd) >> 16;					while (j-- && so2)						so2 = so2->so_q0;				}				if (so2) {					tcp_drop(sototcpcb(so2), ETIMEDOUT);					so2 = sonewconn(so, 0);				}				if (!so2)					goto drop;			}			so = so2;			/*			 * This is ugly, but ....			 *			 * Mark socket as temporary until we're			 * committed to keeping it.  The code at			 * ``drop'' and ``dropwithreset'' check the			 * flag dropsocket to see if the temporary			 * socket created here should be discarded.			 * We mark the socket as discardable until			 * we're committed to it below in TCPS_LISTEN.			 */			dropsocket++;			inp = (struct inpcb *)so->so_pcb;			inp->inp_laddr = ti->ti_dst;			inp->inp_lport = ti->ti_dport;			in_pcbrehash(inp);#if BSD>=43			inp->inp_options = ip_srcroute();#endif			tp = intotcpcb(inp);			tp->t_state = TCPS_LISTEN;			tp->t_flags |= tp0->t_flags & (TF_NOPUSH|TF_NOOPT);			/* Compute proper scaling value from buffer space */			while (tp->request_r_scale < TCP_MAX_WINSHIFT &&			   TCP_MAXWIN << tp->request_r_scale < so->so_rcv.sb_hiwat)				tp->request_r_scale++;		}	}	/*	 * Segment received on connection.	 * Reset idle time and keep-alive timer.	 */	tp->t_idle = 0;	if (TCPS_HAVEESTABLISHED(tp->t_state))		tp->t_timer[TCPT_KEEP] = tcp_keepidle;	/*	 * Process options if not in LISTEN state,	 * else do it below (after getting remote address).	 */	if (tp->t_state != TCPS_LISTEN)		tcp_dooptions(tp, optp, optlen, ti, &to);	/*	 * Header prediction: check for the two common cases	 * of a uni-directional data xfer.  If the packet has	 * no control flags, is in-sequence, the window didn't	 * change and we're not retransmitting, it's a	 * candidate.  If the length is zero and the ack moved	 * forward, we're the sender side of the xfer.  Just	 * free the data acked & wake any higher level process	 * that was blocked waiting for space.  If the length	 * is non-zero and the ack didn't move, we're the	 * receiver side.  If we're getting packets in-order	 * (the reassembly queue is empty), add the data to	 * the socket buffer and note that we need a delayed ack.	 * Make sure that the hidden state-flags are also off.	 * Since we check for TCPS_ESTABLISHED above, it can only	 * be TH_NEEDSYN.	 */	if (tp->t_state == TCPS_ESTABLISHED &&	    (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&	    ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&	    ((to.to_flag & TOF_TS) == 0 ||	     TSTMP_GEQ(to.to_tsval, tp->ts_recent)) &&	    /*	     * Using the CC option is compulsory if once started:	     *   the segment is OK if no T/TCP was negotiated or	     *   if the segment has a CC option equal to CCrecv	     */	    ((tp->t_flags & (TF_REQ_CC|TF_RCVD_CC)) != (TF_REQ_CC|TF_RCVD_CC) ||	     (to.to_flag & TOF_CC) != 0 && to.to_cc == tp->cc_recv) &&	    ti->ti_seq == tp->rcv_nxt &&	    tiwin && tiwin == tp->snd_wnd &&	    tp->snd_nxt == tp->snd_max) {		/*		 * If last ACK falls within this segment's sequence numbers,		 * record the timestamp.		 * NOTE that the test is modified according to the latest		 * proposal of the tcplw@cray.com list (Braden 1993/04/26).		 */		if ((to.to_flag & TOF_TS) != 0 &&		   SEQ_LEQ(ti->ti_seq, tp->last_ack_sent)) {			tp->ts_recent_age = tcp_now;			tp->ts_recent = to.to_tsval;		}		if (ti->ti_len == 0) {			if (SEQ_GT(ti->ti_ack, tp->snd_una) &&			    SEQ_LEQ(ti->ti_ack, tp->snd_max) &&			    tp->snd_cwnd >= tp->snd_wnd) {				/*

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?