📄 tcp_input.c
字号:
* we have enough buffer space to take it.
*/
#ifdef TCP_SACK
/* Clean receiver SACK report if present */
if (!tp->sack_disable && tp->rcv_numsacks)
tcp_clean_sackreport(tp);
#endif /* TCP_SACK */
++tcpstat.tcps_preddat;
tp->rcv_nxt += tlen;
tcpstat.tcps_rcvpack++;
tcpstat.tcps_rcvbyte += tlen;
ND6_HINT(tp);
/*
* Drop TCP, IP headers and TCP options then add data
* to socket buffer.
*/
m_adj(m, iphlen + off);
sbappend(&so->so_rcv, m);
sorwakeup(so);
if (th->th_flags & TH_PUSH)
tp->t_flags |= TF_ACKNOW;
else
tp->t_flags |= TF_DELACK;
return;
}
}
/*
* Compute mbuf offset to TCP data segment.
*/
hdroptlen = iphlen + off;
/*
* Calculate amount of space in receive window,
* and then do TCP input processing.
* Receive window is amount of space in rcv queue,
* but not less than advertised window.
*/
{ int win;
win = sbspace(&so->so_rcv);
if (win < 0)
win = 0;
tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
}
switch (tp->t_state) {
/*
* If the state is LISTEN then ignore segment if it contains an RST.
* If the segment contains an ACK then it is bad and send a RST.
* If it does not contain a SYN then it is not interesting; drop it.
* If it is from this socket, drop it, it must be forged.
* Don't bother responding if the destination was a broadcast.
* Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
* tp->iss, and send a segment:
* <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
* Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
* Fill in remote peer address fields if not previously specified.
* Enter SYN_RECEIVED state, and process any other fields of this
* segment in this state.
*/
case TCPS_LISTEN: {
struct mbuf *am;
register struct sockaddr_in *sin;
#ifdef INET6
register struct sockaddr_in6 *sin6;
#endif /* INET6 */
if (tiflags & TH_RST)
goto drop;
if (tiflags & TH_ACK)
goto dropwithreset;
if ((tiflags & TH_SYN) == 0)
goto drop;
if (th->th_dport == th->th_sport) {
#ifdef INET6
if (is_ipv6) {
if (IN6_ARE_ADDR_EQUAL(&ipv6->ip6_src, &ipv6->ip6_dst))
goto drop;
} else {
#endif /* INET6 */
if (ti->ti_dst.s_addr == ti->ti_src.s_addr)
goto drop;
#ifdef INET6
}
#endif /* INET6 */
}
/*
* RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN
* in_broadcast() should never return true on a received
* packet with M_BCAST not set.
*/
if (m->m_flags & (M_BCAST|M_MCAST))
goto drop;
#ifdef INET6
if (is_ipv6) {
/* XXX What about IPv6 Anycasting ?? :-( rja */
if (IN6_IS_ADDR_MULTICAST(&ipv6->ip6_dst))
goto drop;
} else
#endif /* INET6 */
if (IN_MULTICAST(ti->ti_dst.s_addr))
goto drop;
am = m_get(M_DONTWAIT, MT_SONAME); /* XXX */
if (am == NULL)
goto drop;
#ifdef INET6
if (is_ipv6) {
/*
* This is probably the place to set the tp->pf value.
* (Don't forget to do it in the v4 code as well!)
*
* Also, remember to blank out things like flowlabel, or
* set flowlabel for accepted sockets in v6.
*
* FURTHERMORE, this is PROBABLY the place where the whole
* business of key munging is set up for passive
* connections.
*/
am->m_len = sizeof(struct sockaddr_in6);
sin6 = mtod(am, struct sockaddr_in6 *);
sin6->sin6_family = AF_INET6;
sin6->sin6_len = sizeof(struct sockaddr_in6);
sin6->sin6_addr = ipv6->ip6_src;
sin6->sin6_port = th->th_sport;
sin6->sin6_flowinfo = htonl(0x0fffffff) &
inp->inp_ipv6.ip6_flow;
laddr6 = inp->inp_laddr6;
if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6))
inp->inp_laddr6 = ipv6->ip6_dst;
/* This is a good optimization. */
if (in6_pcbconnect(inp, am)) {
inp->inp_laddr6 = laddr6;
(void) m_free(am);
goto drop;
} /* endif in6_pcbconnect() */
tp->pf = PF_INET6;
} else {
/*
* Letting v4 incoming datagrams to reach valid
* PF_INET6 sockets causes some overhead here.
*/
if (inp->inp_flags & INP_IPV6) {
if (!(inp->inp_flags & (INP_IPV6_UNDEC|INP_IPV6_MAPPED))) {
(void) m_free(am);
goto drop;
}
am->m_len = sizeof(struct sockaddr_in6);
sin6 = mtod(am, struct sockaddr_in6 *);
sin6->sin6_family = AF_INET6;
sin6->sin6_len = sizeof(*sin6);
CREATE_IPV6_MAPPED(sin6->sin6_addr, ti->ti_src.s_addr);
sin6->sin6_port = th->th_sport;
sin6->sin6_flowinfo = 0;
laddr6 = inp->inp_laddr6;
if (inp->inp_laddr.s_addr == INADDR_ANY)
CREATE_IPV6_MAPPED(inp->inp_laddr6, ti->ti_dst.s_addr);
/*
* The pcb initially has the v6 default hoplimit
* set. We're sending v4 packets so we need to set
* the v4 ttl and tos.
*/
inp->inp_ip.ip_ttl = ip_defttl;
inp->inp_ip.ip_tos = 0;
if (in6_pcbconnect(inp, am)) {
inp->inp_laddr6 = laddr6;
(void) m_freem(am);
goto drop;
}
tp->pf = PF_INET;
} else {
#endif /* INET6 */
am->m_len = sizeof (struct sockaddr_in);
sin = mtod(am, struct sockaddr_in *);
sin->sin_family = AF_INET;
sin->sin_len = sizeof(*sin);
sin->sin_addr = ti->ti_src;
sin->sin_port = ti->ti_sport;
bzero((caddr_t)sin->sin_zero, sizeof(sin->sin_zero));
laddr = inp->inp_laddr;
if (inp->inp_laddr.s_addr == INADDR_ANY)
inp->inp_laddr = ti->ti_dst;
if (in_pcbconnect(inp, am)) {
inp->inp_laddr = laddr;
(void) m_free(am);
goto drop;
}
(void) m_free(am);
tp->pf = PF_INET;
#ifdef INET6
} /* if (inp->inp_flags & INP_IPV6) */
} /* if (is_ipv6) */
#endif /* INET6 */
tp->t_template = tcp_template(tp);
if (tp->t_template == 0) {
tp = tcp_drop(tp, ENOBUFS);
dropsocket = 0; /* socket is already gone */
goto drop;
}
if (optp)
tcp_dooptions(tp, optp, optlen, th,
&ts_present, &ts_val, &ts_ecr);
#ifdef TCP_SACK
/*
* If peer did not send a SACK_PERMITTED option (i.e., if
* tcp_dooptions() did not set TF_SACK_PERMIT), set
* sack_disable to 1 if it is currently 0.
*/
if (!tp->sack_disable)
if ((tp->t_flags & TF_SACK_PERMIT) == 0)
tp->sack_disable = 1;
#endif
if (iss)
tp->iss = iss;
else
tp->iss = tcp_iss;
#ifdef TCP_COMPAT_42
tcp_iss += TCP_ISSINCR/2;
#else /* TCP_COMPAT_42 */
tcp_iss += arc4random() % TCP_ISSINCR + 1;
#endif /* !TCP_COMPAT_42 */
tp->irs = th->th_seq;
tcp_sendseqinit(tp);
#if defined (TCP_SACK) || defined (TCP_NEWRENO)
tp->snd_last = tp->snd_una;
#endif /* TCP_SACK || TCP_NEWRENO */
#if defined(TCP_SACK) && defined(TCP_FACK)
tp->snd_fack = tp->snd_una;
tp->retran_data = 0;
tp->snd_awnd = 0;
#endif /* TCP_FACK */
tcp_rcvseqinit(tp);
tp->t_flags |= TF_ACKNOW;
tp->t_state = TCPS_SYN_RECEIVED;
tp->t_timer[TCPT_KEEP] = tcptv_keep_init;
dropsocket = 0; /* committed to socket */
tcpstat.tcps_accepts++;
goto trimthenstep6;
}
/*
* If the state is SYN_RECEIVED:
* if seg contains SYN/ACK, send an RST.
* if seg contains an ACK, but not for our SYN/ACK, send an RST
*/
case TCPS_SYN_RECEIVED:
if (tiflags & TH_ACK) {
if (tiflags & TH_SYN) {
tcpstat.tcps_badsyn++;
goto dropwithreset;
}
if (SEQ_LEQ(th->th_ack, tp->snd_una) ||
SEQ_GT(th->th_ack, tp->snd_max))
goto dropwithreset;
}
break;
/*
* If the state is SYN_SENT:
* if seg contains an ACK, but not for our SYN, drop the input.
* if seg contains a RST, then drop the connection.
* if seg does not contain SYN, then drop it.
* Otherwise this is an acceptable SYN segment
* initialize tp->rcv_nxt and tp->irs
* if seg contains ack then advance tp->snd_una
* if SYN has been acked change to ESTABLISHED else SYN_RCVD state
* arrange for segment to be acked (eventually)
* continue processing rest of data/controls, beginning with URG
*/
case TCPS_SYN_SENT:
if ((tiflags & TH_ACK) &&
(SEQ_LEQ(th->th_ack, tp->iss) ||
SEQ_GT(th->th_ack, tp->snd_max)))
goto dropwithreset;
if (tiflags & TH_RST) {
if (tiflags & TH_ACK)
tp = tcp_drop(tp, ECONNREFUSED);
goto drop;
}
if ((tiflags & TH_SYN) == 0)
goto drop;
if (tiflags & TH_ACK) {
tp->snd_una = th->th_ack;
if (SEQ_LT(tp->snd_nxt, tp->snd_una))
tp->snd_nxt = tp->snd_una;
}
tp->t_timer[TCPT_REXMT] = 0;
tp->irs = th->th_seq;
tcp_rcvseqinit(tp);
tp->t_flags |= TF_ACKNOW;
#ifdef TCP_SACK
/*
* If we've sent a SACK_PERMITTED option, and the peer
* also replied with one, then TF_SACK_PERMIT should have
* been set in tcp_dooptions(). If it was not, disable SACKs.
*/
if (!tp->sack_disable)
if ((tp->t_flags & TF_SACK_PERMIT) == 0)
tp->sack_disable = 1;
#endif
if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) {
tcpstat.tcps_connects++;
soisconnected(so);
tp->t_state = TCPS_ESTABLISHED;
/* Do window scaling on this connection? */
if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
(TF_RCVD_SCALE|TF_REQ_SCALE)) {
tp->snd_scale = tp->requested_s_scale;
tp->rcv_scale = tp->request_r_scale;
}
(void) tcp_reass(tp, (struct tcphdr *)0,
(struct mbuf *)0, &tlen);
/*
* if we didn't have to retransmit the SYN,
* use its rtt as our initial srtt & rtt var.
*/
if (tp->t_rtt)
tcp_xmit_timer(tp, tp->t_rtt);
/*
* Since new data was acked (the SYN), open the
* congestion window by one MSS. We do this
* here, because we won't go through the normal
* ACK processing below. And since this is the
* start of the connection, we know we are in
* the exponential phase of slow-start.
*/
tp->snd_cwnd += tp->t_maxseg;
} else
tp->t_state = TCPS_SYN_RECEIVED;
trimthenstep6:
/*
* Advance ti->ti_seq to correspond to first data byte.
* If data, trim to stay within window,
* dropping FIN if necessary.
*/
th->th_seq++;
if (tlen > tp->rcv_wnd) {
todrop = tlen - tp->rcv_wnd;
m_adj(m, -todrop);
tlen = tp->rcv_wnd;
tiflags &= ~TH_FIN;
tcpstat.tcps_rcvpackafterwin++;
tcpstat.tcps_rcvbyteafterwin += todrop;
}
tp->snd_wl1 = th->th_seq - 1;
tp->rcv_up = th->th_seq;
goto step6;
}
/*
* States other than LISTEN or SYN_SENT.
* First check timestamp, if present.
* Then check that at least some bytes of segment are within
* receive window. If segment begins before rcv_nxt,
* drop leading data (and SYN); if nothing left, just ack.
*
* RFC 1323 PAWS: If we have a timestamp reply on this segment
* and it's less than ts_recent, drop it.
*/
if (ts_present && (tiflags & TH_RST) == 0 && tp->ts_recent &&
TSTMP_LT(ts_val, tp->ts_recent)) {
/* Check to see if ts_recent is over 24 days old. */
if ((int)(tcp_now - tp->ts_recent_age) > TCP_PAWS_IDLE) {
/*
* Invalidate ts_recent. If this segment updates
* ts_recent, the age will be reset later and ts_recent
* will get a valid value. If it does not, setting
* ts_recent to zero will at least satisfy the
* requirement that zero be placed in the timestamp
* echo reply when ts_recent isn't valid. The
* age isn't reset until we get a valid ts_recent
* because we don't want out-of-order segments to be
* dropped when ts_recent is old.
*/
tp->ts_recent = 0;
} else {
tcpstat.tcps_rcvduppack++;
tcpstat.tcps_rcvdupbyte += tlen;
tcpstat.tcps_pawsdrop++;
goto dropafterack;
}
}
todrop = tp->rcv_nxt - th->th_seq;
if (todrop > 0) {
if (tiflags & TH_SYN) {
tiflags &= ~TH_SYN;
th->th_seq++;
if (th->th_urp > 1)
th->th_urp--;
else
tiflags &= ~TH_URG;
todrop--;
}
if (todrop >= tlen ||
(todrop == tlen && (tiflags & TH_FIN) == 0)) {
/*
* Any valid FIN must be to the left of the
* window. At this point, FIN must be a
* duplicate or out-of-sequence, so drop it.
*/
tiflags &= ~TH_FIN;
/*
* Send ACK to resynchronize, and drop any data,
* but keep on processing for RST or ACK.
*/
tp->t_flags |= TF_ACKNOW;
tcpstat.tcps_rcvdupbyte += todrop = tlen;
tcpstat.tcps_rcvduppack++;
} else {
tcpstat.tcps_rcvpartduppack++;
tcpstat.tcps_rcvpartdupbyte += todrop;
}
hdroptlen += todrop; /* drop from head afterwards */
th->th_seq += todrop;
tlen -= todrop;
if (th->th_urp > todrop)
th->th_urp -= todrop;
else {
tiflags &= ~TH_URG;
th->th_urp = 0;
}
}
/*
* If new data are received on a connection after the
* user processes are gone, then RST the other end.
*/
if ((so->so_state & SS_NOFDREF) &&
tp->t_state > TCPS_CLOSE_WAIT && tlen) {
tp = tcp_close(tp);
tcpstat.tcps_rcvafterclose++;
goto dropwithreset;
}
/*
* If segment ends after window, drop trailing data
* (and PUSH and FIN); if nothing left, just ACK.
*/
todrop = (th->th_seq + tlen) - (tp->rcv_nxt+tp->rcv_wnd);
if (todrop > 0) {
tcpstat.tcps_rcvpackafterwin++;
if (todrop >= tlen) {
tcpstat.tcps_rcvbyteafterwin += tlen;
/*
* If a new connection request is received
* while in TIME_WAIT, drop the old connection
* and start over if the sequence numbers
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -