📄 tcp.c
字号:
#include "u.h"#include "../port/lib.h"#include "mem.h"#include "dat.h"#include "fns.h"#include "../port/error.h"#include "ip.h"enum{ QMAX = 64*1024-1, IP_TCPPROTO = 6, TCP4_IPLEN = 8, TCP4_PHDRSIZE = 12, TCP4_HDRSIZE = 20, TCP4_TCBPHDRSZ = 40, TCP4_PKT = TCP4_IPLEN+TCP4_PHDRSIZE, TCP6_IPLEN = 0, TCP6_PHDRSIZE = 40, TCP6_HDRSIZE = 20, TCP6_TCBPHDRSZ = 60, TCP6_PKT = TCP6_IPLEN+TCP6_PHDRSIZE, TcptimerOFF = 0, TcptimerON = 1, TcptimerDONE = 2, MAX_TIME = (1<<20), /* Forever */ TCP_ACK = 50, /* Timed ack sequence in ms */ MAXBACKMS = 9*60*1000, /* longest backoff time (ms) before hangup */ URG = 0x20, /* Data marked urgent */ ACK = 0x10, /* Acknowledge is valid */ PSH = 0x08, /* Whole data pipe is pushed */ RST = 0x04, /* Reset connection */ SYN = 0x02, /* Pkt. is synchronise */ FIN = 0x01, /* Start close down */ EOLOPT = 0, NOOPOPT = 1, MSSOPT = 2, MSS_LENGTH = 4, /* Mean segment size */ WSOPT = 3, WS_LENGTH = 3, /* Bits to scale window size by */ MSL2 = 10, MSPTICK = 50, /* Milliseconds per timer tick */ DEF_MSS = 1460, /* Default mean segment */ DEF_MSS6 = 1280, /* Default mean segment (min) for v6 */ DEF_RTT = 500, /* Default round trip */ DEF_KAT = 120000, /* Default time (ms) between keep alives */ TCP_LISTEN = 0, /* Listen connection */ TCP_CONNECT = 1, /* Outgoing connection */ SYNACK_RXTIMER = 250, /* ms between SYNACK retransmits */ TCPREXMTTHRESH = 3, /* dupack threshhold for rxt */ FORCE = 1, CLONE = 2, RETRAN = 4, ACTIVE = 8, SYNACK = 16, LOGAGAIN = 3, LOGDGAIN = 2, Closed = 0, /* Connection states */ Listen, Syn_sent, Syn_received, Established, Finwait1, Finwait2, Close_wait, Closing, Last_ack, Time_wait, Maxlimbo = 1000, /* maximum procs waiting for response to SYN ACK */ NLHT = 256, /* hash table size, must be a power of 2 */ LHTMASK = NLHT-1, HaveWS = 1<<8,};/* Must correspond to the enumeration above */char *tcpstates[] ={ "Closed", "Listen", "Syn_sent", "Syn_received", "Established", "Finwait1", "Finwait2", "Close_wait", "Closing", "Last_ack", "Time_wait"};typedef struct Tcptimer Tcptimer;struct Tcptimer{ Tcptimer *next; Tcptimer *prev; Tcptimer *readynext; int state; int start; int count; void (*func)(void*); void *arg;};/* * v4 and v6 pseudo headers used for * checksuming tcp */typedef struct Tcp4hdr Tcp4hdr;struct Tcp4hdr{ uchar vihl; /* Version and header length */ uchar tos; /* Type of service */ uchar length[2]; /* packet length */ uchar id[2]; /* Identification */ uchar frag[2]; /* Fragment information */ uchar Unused; uchar proto; uchar tcplen[2]; uchar tcpsrc[4]; uchar tcpdst[4]; uchar tcpsport[2]; uchar tcpdport[2]; uchar tcpseq[4]; uchar tcpack[4]; uchar tcpflag[2]; uchar tcpwin[2]; uchar tcpcksum[2]; uchar tcpurg[2]; /* Options segment */ uchar tcpopt[1];};typedef struct Tcp6hdr Tcp6hdr;struct Tcp6hdr{ uchar vcf[4]; uchar ploadlen[2]; uchar proto; uchar ttl; uchar tcpsrc[IPaddrlen]; uchar tcpdst[IPaddrlen]; uchar tcpsport[2]; uchar tcpdport[2]; uchar tcpseq[4]; uchar tcpack[4]; uchar tcpflag[2]; uchar tcpwin[2]; uchar tcpcksum[2]; uchar tcpurg[2]; /* Options segment */ uchar tcpopt[1];};/* * this represents the control info * for a single packet. It is derived from * a packet in ntohtcp{4,6}() and stuck into * a packet in htontcp{4,6}(). */typedef struct Tcp Tcp;struct Tcp{ ushort source; ushort dest; ulong seq; ulong ack; uchar flags; ushort ws; /* window scale option (if not zero) */ ulong wnd; ushort urg; ushort mss; /* max segment size option (if not zero) */ ushort len; /* size of data */};/* * this header is malloc'd to thread together fragments * waiting to be coalesced */typedef struct Reseq Reseq;struct Reseq{ Reseq *next; Tcp seg; Block *bp; ushort length;};/* * the qlock in the Conv locks this structure */typedef struct Tcpctl Tcpctl;struct Tcpctl{ uchar state; /* Connection state */ uchar type; /* Listening or active connection */ uchar code; /* Icmp code */ struct { ulong una; /* Unacked data pointer */ ulong nxt; /* Next sequence expected */ ulong ptr; /* Data pointer */ ulong wnd; /* Tcp send window */ ulong urg; /* Urgent data pointer */ ulong wl2; int scale; /* how much to right shift window in xmitted packets */ /* to implement tahoe and reno TCP */ ulong dupacks; /* number of duplicate acks rcvd */ int recovery; /* loss recovery flag */ ulong rxt; /* right window marker for recovery */ } snd; struct { ulong nxt; /* Receive pointer to next uchar slot */ ulong wnd; /* Receive window incoming */ ulong urg; /* Urgent pointer */ int blocked; int una; /* unacked data segs */ int scale; /* how much to left shift window in rcved packets */ } rcv; ulong iss; /* Initial sequence number */ int sawwsopt; /* true if we saw a wsopt on the incoming SYN */ ulong cwind; /* Congestion window */ int scale; /* desired snd.scale */ ushort ssthresh; /* Slow start threshold */ int resent; /* Bytes just resent */ int irs; /* Initial received squence */ ushort mss; /* Mean segment size */ int rerecv; /* Overlap of data rerecevived */ ulong window; /* Recevive window */ uchar backoff; /* Exponential backoff counter */ int backedoff; /* ms we've backed off for rexmits */ uchar flags; /* State flags */ Reseq *reseq; /* Resequencing queue */ Tcptimer timer; /* Activity timer */ Tcptimer acktimer; /* Acknowledge timer */ Tcptimer rtt_timer; /* Round trip timer */ Tcptimer katimer; /* keep alive timer */ ulong rttseq; /* Round trip sequence */ int srtt; /* Shortened round trip */ int mdev; /* Mean deviation of round trip */ int kacounter; /* count down for keep alive */ uint sndsyntime; /* time syn sent */ ulong time; /* time Finwait2 or Syn_received was sent */ int nochecksum; /* non-zero means don't send checksums */ int flgcnt; /* number of flags in the sequence (FIN,SEQ) */ union { Tcp4hdr tcp4hdr; Tcp6hdr tcp6hdr; } protohdr; /* prototype header */};/* * New calls are put in limbo rather than having a conversation structure * allocated. Thus, a SYN attack results in lots of limbo'd calls but not * any real Conv structures mucking things up. Calls in limbo rexmit their * SYN ACK every SYNACK_RXTIMER ms up to 4 times, i.e., they disappear after 1 second. * * In particular they aren't on a listener's queue so that they don't figure * in the input queue limit. * * If 1/2 of a T3 was attacking SYN packets, we'ld have a permanent queue * of 70000 limbo'd calls. Not great for a linear list but doable. Therefore * there is no hashing of this list. */typedef struct Limbo Limbo;struct Limbo{ Limbo *next; uchar laddr[IPaddrlen]; uchar raddr[IPaddrlen]; ushort lport; ushort rport; ulong irs; /* initial received sequence */ ulong iss; /* initial sent sequence */ ushort mss; /* mss from the other end */ ushort rcvscale; /* how much to scale rcvd windows */ ushort sndscale; /* how much to scale sent windows */ ulong lastsend; /* last time we sent a synack */ uchar version; /* v4 or v6 */ uchar rexmits; /* number of retransmissions */};int tcp_irtt = DEF_RTT; /* Initial guess at round trip time */ushort tcp_mss = DEF_MSS; /* Maximum segment size to be sent */enum { /* MIB stats */ MaxConn, ActiveOpens, PassiveOpens, EstabResets, CurrEstab, InSegs, OutSegs, RetransSegs, RetransTimeouts, InErrs, OutRsts, /* non-MIB stats */ CsumErrs, HlenErrs, LenErrs, OutOfOrder, Nstats};static char *statnames[] ={[MaxConn] "MaxConn",[ActiveOpens] "ActiveOpens",[PassiveOpens] "PassiveOpens",[EstabResets] "EstabResets",[CurrEstab] "CurrEstab",[InSegs] "InSegs",[OutSegs] "OutSegs",[RetransSegs] "RetransSegs",[RetransTimeouts] "RetransTimeouts",[InErrs] "InErrs",[OutRsts] "OutRsts",[CsumErrs] "CsumErrs",[HlenErrs] "HlenErrs",[LenErrs] "LenErrs",[OutOfOrder] "OutOfOrder",};typedef struct Tcppriv Tcppriv;struct Tcppriv{ /* List of active timers */ QLock tl; Tcptimer *timers; /* hash table for matching conversations */ Ipht ht; /* calls in limbo waiting for an ACK to our SYN ACK */ int nlimbo; Limbo *lht[NLHT]; /* for keeping track of tcpackproc */ QLock apl; int ackprocstarted; ulong stats[Nstats];};/* * Setting tcpporthogdefense to non-zero enables Dong Lin's * solution to hijacked systems staking out port's as a form * of DoS attack. * * To avoid stateless Conv hogs, we pick a sequence number at random. If * it that number gets acked by the other end, we shut down the connection. * Look for tcpporthogedefense in the code. */int tcpporthogdefense = 0;int addreseq(Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);void getreseq(Tcpctl*, Tcp*, Block**, ushort*);void localclose(Conv*, char*);void procsyn(Conv*, Tcp*);void tcpiput(Proto*, Ipifc*, Block*);void tcpoutput(Conv*);int tcptrim(Tcpctl*, Tcp*, Block**, ushort*);void tcpstart(Conv*, int);void tcptimeout(void*);void tcpsndsyn(Conv*, Tcpctl*);void tcprcvwin(Conv*);void tcpacktimer(void*);void tcpkeepalive(void*);void tcpsetkacounter(Tcpctl*);void tcprxmit(Conv*);void tcpsettimer(Tcpctl*);void tcpsynackrtt(Conv*);void tcpsetscale(Conv*, Tcpctl*, ushort, ushort);static void limborexmit(Proto*);static void limbo(Conv*, uchar*, uchar*, Tcp*, int);voidtcpsetstate(Conv *s, uchar newstate){ Tcpctl *tcb; uchar oldstate; Tcppriv *tpriv; tpriv = s->p->priv; tcb = (Tcpctl*)s->ptcl; oldstate = tcb->state; if(oldstate == newstate) return; if(oldstate == Established) tpriv->stats[CurrEstab]--; if(newstate == Established) tpriv->stats[CurrEstab]++; /** print( "%d/%d %s->%s CurrEstab=%d\n", s->lport, s->rport, tcpstates[oldstate], tcpstates[newstate], tpriv->tstats.tcpCurrEstab ); **/ switch(newstate) { case Closed: qclose(s->rq); qclose(s->wq); qclose(s->eq); break; case Close_wait: /* Remote closes */ qhangup(s->rq, nil); break; } tcb->state = newstate; if(oldstate == Syn_sent && newstate != Closed) Fsconnected(s, nil);}static char*tcpconnect(Conv *c, char **argv, int argc){ char *e; e = Fsstdconnect(c, argv, argc); if(e != nil) return e; tcpstart(c, TCP_CONNECT); return nil;}static inttcpstate(Conv *c, char *state, int n){ Tcpctl *s; s = (Tcpctl*)(c->ptcl); return snprint(state, n, "%s qin %d qout %d srtt %d mdev %d cwin %lud swin %lud>>%d rwin %lud>>%d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n", tcpstates[s->state], c->rq ? qlen(c->rq) : 0, c->wq ? qlen(c->wq) : 0, s->srtt, s->mdev, s->cwind, s->snd.wnd, s->rcv.scale, s->rcv.wnd, s->snd.scale, s->timer.start, s->timer.count, s->rerecv, s->katimer.start, s->katimer.count);}static inttcpinuse(Conv *c){ Tcpctl *s; s = (Tcpctl*)(c->ptcl); return s->state != Closed;}static char*tcpannounce(Conv *c, char **argv, int argc){ char *e; e = Fsstdannounce(c, argv, argc); if(e != nil) return e; tcpstart(c, TCP_LISTEN); Fsconnected(c, nil); return nil;}/* * tcpclose is always called with the q locked */static voidtcpclose(Conv *c){ Tcpctl *tcb; tcb = (Tcpctl*)c->ptcl; qhangup(c->rq, nil); qhangup(c->wq, nil); qhangup(c->eq, nil); qflush(c->rq); switch(tcb->state) { case Listen: /* * reset any incoming calls to this listener */ Fsconnected(c, "Hangup"); localclose(c, nil); break; case Closed: case Syn_sent: localclose(c, nil); break; case Syn_received: case Established: tcb->flgcnt++; tcb->snd.nxt++; tcpsetstate(c, Finwait1); tcpoutput(c); break; case Close_wait: tcb->flgcnt++; tcb->snd.nxt++; tcpsetstate(c, Last_ack); tcpoutput(c); break; }}voidtcpkick(void *x){ Conv *s = x; Tcpctl *tcb; tcb = (Tcpctl*)s->ptcl; if(waserror()){ qunlock(s); nexterror(); } qlock(s); switch(tcb->state) { case Syn_sent: case Syn_received: case Established: case Close_wait: /* * Push data */ tcprcvwin(s); tcpoutput(s); break; default: localclose(s, "Hangup"); break; } qunlock(s); poperror();}voidtcprcvwin(Conv *s) /* Call with tcb locked */{ int w; Tcpctl *tcb; tcb = (Tcpctl*)s->ptcl; w = tcb->window - qlen(s->rq); if(w < 0) w = 0; tcb->rcv.wnd = w; if(w == 0) tcb->rcv.blocked = 1;}voidtcpacktimer(void *v){ Tcpctl *tcb; Conv *s; s = v; tcb = (Tcpctl*)s->ptcl; if(waserror()){ qunlock(s); nexterror(); } qlock(s); if(tcb->state != Closed){ tcb->flags |= FORCE; tcprcvwin(s); tcpoutput(s); } qunlock(s); poperror();}static voidtcpcreate(Conv *c){ c->rq = qopen(QMAX, Qcoalesce, tcpacktimer, c); c->wq = qopen((3*QMAX)/2, Qkick, tcpkick, c);}static voidtimerstate(Tcppriv *priv, Tcptimer *t, int newstate){ if(newstate != TcptimerON){ if(t->state == TcptimerON){ // unchain if(priv->timers == t){ priv->timers = t->next; if(t->prev != nil) panic("timerstate1"); } if(t->next) t->next->prev = t->prev; if(t->prev) t->prev->next = t->next; t->next = t->prev = nil; } } else { if(t->state != TcptimerON){ // chain if(t->prev != nil || t->next != nil) panic("timerstate2"); t->prev = nil; t->next = priv->timers; if(t->next) t->next->prev = t; priv->timers = t; } } t->state = newstate;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -