tcp_input.c

来自「eCos操作系统源码」· C语言 代码 · 共 2,238 行 · 第 1/5 页

C
2,238
字号
	 * of a uni-directional data xfer.  If the packet has	 * no control flags, is in-sequence, the window didn't	 * change and we're not retransmitting, it's a	 * candidate.  If the length is zero and the ack moved	 * forward, we're the sender side of the xfer.  Just	 * free the data acked & wake any higher level process	 * that was blocked waiting for space.  If the length	 * is non-zero and the ack didn't move, we're the	 * receiver side.  If we're getting packets in-order	 * (the reassembly queue is empty), add the data to	 * the socket buffer and note that we need a delayed ack.	 * Make sure that the hidden state-flags are also off.	 * Since we check for TCPS_ESTABLISHED above, it can only	 * be TH_NEEDSYN.	 */	if (tp->t_state == TCPS_ESTABLISHED &&	    (thflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&	    ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&	    ((to.to_flag & TOF_TS) == 0 ||	     TSTMP_GEQ(to.to_tsval, tp->ts_recent)) &&	    /*	     * Using the CC option is compulsory if once started:	     *   the segment is OK if no T/TCP was negotiated or	     *   if the segment has a CC option equal to CCrecv	     */	    ((tp->t_flags & (TF_REQ_CC|TF_RCVD_CC)) != (TF_REQ_CC|TF_RCVD_CC) ||	     ((to.to_flag & TOF_CC) != 0 && to.to_cc == tp->cc_recv)) &&	    th->th_seq == tp->rcv_nxt &&	    tiwin && tiwin == tp->snd_wnd &&	    tp->snd_nxt == tp->snd_max) {		/*		 * If last ACK falls within this segment's sequence numbers,		 * record the timestamp.		 * NOTE that the test is modified according to the latest		 * proposal of the tcplw@cray.com list (Braden 1993/04/26).		 */		if ((to.to_flag & TOF_TS) != 0 &&		   SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {			tp->ts_recent_age = ticks;			tp->ts_recent = to.to_tsval;		}		if (tlen == 0) {			if (SEQ_GT(th->th_ack, tp->snd_una) &&			    SEQ_LEQ(th->th_ack, tp->snd_max) &&			    tp->snd_cwnd >= tp->snd_wnd &&			    tp->t_dupacks < tcprexmtthresh) {				/*				 * this is a pure ack for outstanding data.				 */				++tcpstat.tcps_predack;				/*				 * "bad retransmit" recovery				 */				if (tp->t_rxtshift == 1 &&				    ticks < tp->t_badrxtwin) {					tp->snd_cwnd = tp->snd_cwnd_prev;					tp->snd_ssthresh =					    tp->snd_ssthresh_prev;					tp->snd_nxt = tp->snd_max;					tp->t_badrxtwin = 0;				}				if ((to.to_flag & TOF_TS) != 0)					tcp_xmit_timer(tp,					    ticks - to.to_tsecr + 1);				else if (tp->t_rtttime &&					    SEQ_GT(th->th_ack, tp->t_rtseq))					tcp_xmit_timer(tp, ticks - tp->t_rtttime);				acked = th->th_ack - tp->snd_una;				tcpstat.tcps_rcvackpack++;				tcpstat.tcps_rcvackbyte += acked;				sbdrop(&so->so_snd, acked);				tp->snd_una = th->th_ack;				m_freem(m);				ND6_HINT(tp); /* some progress has been done */				/*				 * If all outstanding data are acked, stop				 * retransmit timer, otherwise restart timer				 * using current (possibly backed-off) value.				 * If process is waiting for space,				 * wakeup/selwakeup/signal.  If data				 * are ready to send, let tcp_output				 * decide between more output or persist.				 */				if (tp->snd_una == tp->snd_max)					callout_stop(tp->tt_rexmt);				else if (!callout_active(tp->tt_persist))					callout_reset(tp->tt_rexmt, 						      tp->t_rxtcur,						      tcp_timer_rexmt, tp);				sowwakeup(so);				if (so->so_snd.sb_cc)					(void) tcp_output(tp);				return;			}		} else if (th->th_ack == tp->snd_una &&		    LIST_EMPTY(&tp->t_segq) &&		    tlen <= sbspace(&so->so_rcv)) {			/*			 * this is a pure, in-sequence data packet			 * with nothing on the reassembly queue and			 * we have enough buffer space to take it.			 */			++tcpstat.tcps_preddat;			tp->rcv_nxt += tlen;			tcpstat.tcps_rcvpack++;			tcpstat.tcps_rcvbyte += tlen;			ND6_HINT(tp);	/* some progress has been done */			/*			 * Add data to socket buffer.			 */			m_adj(m, drop_hdrlen);	/* delayed header drop */			sbappend(&so->so_rcv, m);			sorwakeup(so);			if (DELAY_ACK(tp)) {	                        callout_reset(tp->tt_delack, tcp_delacktime,	                            tcp_timer_delack, tp);			} else {				tp->t_flags |= TF_ACKNOW;				tcp_output(tp);			}			return;		}	}	/*	 * Calculate amount of space in receive window,	 * and then do TCP input processing.	 * Receive window is amount of space in rcv queue,	 * but not less than advertised window.	 */	{ int win;	win = sbspace(&so->so_rcv);	if (win < 0)		win = 0;	tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));	}	switch (tp->t_state) {	/*	 * If the state is LISTEN then ignore segment if it contains an RST.	 * If the segment contains an ACK then it is bad and send a RST.	 * If it does not contain a SYN then it is not interesting; drop it.	 * If it is from this socket, drop it, it must be forged.	 * Don't bother responding if the destination was a broadcast.	 * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial	 * tp->iss, and send a segment:	 *     <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>	 * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.	 * Fill in remote peer address fields if not previously specified.	 * Enter SYN_RECEIVED state, and process any other fields of this	 * segment in this state.	 */	case TCPS_LISTEN: {		register struct sockaddr_in *sin;#ifdef INET6		register struct sockaddr_in6 *sin6;#endif		if (thflags & TH_RST)			goto drop;		if (thflags & TH_ACK) {			rstreason = BANDLIM_RST_OPENPORT;			goto dropwithreset;		}		if ((thflags & TH_SYN) == 0)			goto drop;		if (th->th_dport == th->th_sport) {#ifdef INET6			if (isipv6) {				if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,						       &ip6->ip6_src))					goto drop;			} else#endif /* INET6 */			if (ip->ip_dst.s_addr == ip->ip_src.s_addr)				goto drop;		}		/*		 * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN		 * in_broadcast() should never return true on a received		 * packet with M_BCAST not set. 		 * 		 * Packets with a multicast source address should also 		 * be discarded.		 */		if (m->m_flags & (M_BCAST|M_MCAST))			goto drop;#ifdef INET6		if (isipv6) {			if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||			    IN6_IS_ADDR_MULTICAST(&ip6->ip6_src))				goto drop;		} else#endif		if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||		    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||		    ip->ip_src.s_addr == htonl(INADDR_BROADCAST))			goto drop;#ifdef INET6		if (isipv6) {			MALLOC(sin6, struct sockaddr_in6 *, sizeof *sin6,			       M_SONAME, M_NOWAIT);			if (sin6 == NULL)				goto drop;			bzero(sin6, sizeof(*sin6));			sin6->sin6_family = AF_INET6;			sin6->sin6_len = sizeof(*sin6);			sin6->sin6_addr = ip6->ip6_src;			sin6->sin6_port = th->th_sport;			laddr6 = inp->in6p_laddr;			if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))				inp->in6p_laddr = ip6->ip6_dst;			if (in6_pcbconnect(inp, (struct sockaddr *)sin6,					   (struct proc *)&proc0)) {				inp->in6p_laddr = laddr6;				FREE(sin6, M_SONAME);				goto drop;			}			FREE(sin6, M_SONAME);		} else#endif	      {		MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME,		       M_NOWAIT);		if (sin == NULL)			goto drop;		sin->sin_family = AF_INET;		sin->sin_len = sizeof(*sin);		sin->sin_addr = ip->ip_src;		sin->sin_port = th->th_sport;		bzero((caddr_t)sin->sin_zero, sizeof(sin->sin_zero));		laddr = inp->inp_laddr;		if (inp->inp_laddr.s_addr == INADDR_ANY)			inp->inp_laddr = ip->ip_dst;		if (in_pcbconnect(inp, (struct sockaddr *)sin, (struct proc *)&proc0)) {			inp->inp_laddr = laddr;			FREE(sin, M_SONAME);			goto drop;		}		FREE(sin, M_SONAME);	      }		if ((taop = tcp_gettaocache(inp)) == NULL) {			taop = &tao_noncached;			bzero(taop, sizeof(*taop));		}		tcp_dooptions(tp, optp, optlen, th, &to);		if (iss)			tp->iss = iss;		else {			tp->iss = tcp_new_isn(tp); 		}		tp->irs = th->th_seq;		tcp_sendseqinit(tp);		tcp_rcvseqinit(tp);		tp->snd_recover = tp->snd_una;		/*		 * Initialization of the tcpcb for transaction;		 *   set SND.WND = SEG.WND,		 *   initialize CCsend and CCrecv.		 */		tp->snd_wnd = tiwin;	/* initial send-window */		tp->cc_send = CC_INC(tcp_ccgen);		tp->cc_recv = to.to_cc;		/*		 * Perform TAO test on incoming CC (SEG.CC) option, if any.		 * - compare SEG.CC against cached CC from the same host,		 *	if any.		 * - if SEG.CC > chached value, SYN must be new and is accepted		 *	immediately: save new CC in the cache, mark the socket		 *	connected, enter ESTABLISHED state, turn on flag to		 *	send a SYN in the next segment.		 *	A virtual advertised window is set in rcv_adv to		 *	initialize SWS prevention.  Then enter normal segment		 *	processing: drop SYN, process data and FIN.		 * - otherwise do a normal 3-way handshake.		 */		if ((to.to_flag & TOF_CC) != 0) {		    if (((tp->t_flags & TF_NOPUSH) != 0) &&			taop->tao_cc != 0 && CC_GT(to.to_cc, taop->tao_cc)) {			taop->tao_cc = to.to_cc;			tp->t_starttime = ticks;			tp->t_state = TCPS_ESTABLISHED;			/*			 * If there is a FIN, or if there is data and the			 * connection is local, then delay SYN,ACK(SYN) in			 * the hope of piggy-backing it on a response			 * segment.  Otherwise must send ACK now in case			 * the other side is slow starting.			 */			if (DELAY_ACK(tp) && ((thflags & TH_FIN) ||			    (tlen != 0 &&#ifdef INET6			      ((isipv6 && in6_localaddr(&inp->in6p_faddr))			      ||			      (!isipv6 &&#endif			    in_localaddr(inp->inp_faddr)#ifdef INET6			       ))#endif			     ))) {                                callout_reset(tp->tt_delack, tcp_delacktime,                                      tcp_timer_delack, tp);  				tp->t_flags |= TF_NEEDSYN;			} else 				tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);			/*			 * Limit the `virtual advertised window' to TCP_MAXWIN			 * here.  Even if we requested window scaling, it will			 * become effective only later when our SYN is acked.			 */			tp->rcv_adv += min(tp->rcv_wnd, TCP_MAXWIN);			tcpstat.tcps_connects++;			soisconnected(so);			callout_reset(tp->tt_keep, tcp_keepinit,				      tcp_timer_keep, tp);			dropsocket = 0;		/* committed to socket */			tcpstat.tcps_accepts++;			goto trimthenstep6;		    }		/* else do standard 3-way handshake */		} else {		    /*		     * No CC option, but maybe CC.NEW:		     *   invalidate cached value.		     */		     taop->tao_cc = 0;		}		/*		 * TAO test failed or there was no CC option,		 *    do a standard 3-way handshake.		 */		tp->t_flags |= TF_ACKNOW;		tp->t_state = TCPS_SYN_RECEIVED;		callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);		dropsocket = 0;		/* committed to socket */		tcpstat.tcps_accepts++;		goto trimthenstep6;		}	/*	 * If the state is SYN_RECEIVED:	 *	if seg contains an ACK, but not for our SYN/ACK, send a RST.	 */	case TCPS_SYN_RECEIVED:		if ((thflags & TH_ACK) &&		    (SEQ_LEQ(th->th_ack, tp->snd_una) ||		     SEQ_GT(th->th_ack, tp->snd_max))) {				rstreason = BANDLIM_RST_OPENPORT;				goto dropwithreset;		}		break;	/*	 * If the state is SYN_SENT:	 *	if seg contains an ACK, but not for our SYN, drop the input.	 *	if seg contains a RST, then drop the connection.	 *	if seg does not contain SYN, then drop it.	 * Otherwise this is an acceptable SYN segment	 *	initialize tp->rcv_nxt and tp->irs	 *	if seg contains ack then advance tp->snd_una	 *	if SYN has been acked change to ESTABLISHED else SYN_RCVD state	 *	arrange for segment to be acked (eventually)	 *	continue processing rest of data/controls, beginning with URG	 */	case TCPS_SYN_SENT:		if ((taop = tcp_gettaocache(inp)) == NULL) {			taop = &tao_noncached;			bzero(taop, sizeof(*taop));		}		if ((thflags & TH_ACK) &&		    (SEQ_LEQ(th->th_ack, tp->iss) ||		     SEQ_GT(th->th_ack, tp->snd_max))) {			/*			 * If we have a cached CCsent for the remote host,			 * hence we haven't just crashed and restarted,			 * do not send a RST.  This may be a retransmission			 * from the other side after our earlier ACK was lost.			 * Our new SYN, when it arrives, will serve as the			 * needed ACK.			 */			if (taop->tao_ccsent != 0)				goto drop;			else {				rstreason = BANDLIM_UNLIMITED;				goto dropwithreset;			}		}		if (thflags & TH_RST) {			if (thflags & TH_ACK)				tp = tcp_drop(tp, ECONNREFUSED);			goto drop;		}		if ((thflags & TH_SYN) == 0)			goto drop;		tp->snd_wnd = th->th_win;	/* initial send window */		tp->cc_recv = to.to_cc;		/* foreign CC */		tp->irs = th->th_seq;		tcp_rcvseqinit(tp);		if (thflags & TH_ACK) {			/*			 * Our SYN was acked.  If segment contains CC.ECHO			 * option, check it to make sure this segment really			 * matches our SYN.  If not, just drop it as old			 * duplicate, but send an RST if we're still playing			 * by the old rules.  If no CC.ECHO option, make sure			 * we don't get fooled into using T/TCP.			 */			if (to.to_flag & TOF_CCECHO) {				if (tp->cc_send != to.to_ccecho) {					if (taop->tao_ccsent != 0)						goto drop;					else {						rstreason = BANDLIM_UNLIMITED;						goto dropwithreset;					}				}			} else				tp->t_flags &= ~TF_RCVD_CC;			tcpstat.tcps_connects++;			soisconnected(so);			/* Do window scaling on this connection? */			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==				(TF_RCVD_SCALE|TF_REQ_SCALE)) {				tp->snd_scale = tp->requested_s_scale;				tp->rcv_scale = tp->request_r_scale;			}			/* Segment is acceptable, update cache if undefined. */			if (taop->tao_ccsent == 0)				taop->tao_ccsent = to.to_ccecho;			tp->rcv_adv += tp->rcv_wnd;			tp->snd_una++;		/* SYN is acked */			/*			 * If there's data, delay ACK; if there's also a FIN			 * ACKNOW will be turned on later.			 */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?