📄 tcp.c
字号:
DPRINTF((DBG_TCP, "tcp_close((struct sock *)%X, %d)\n",sk, timeout));
sk->inuse = 1;
sk->keepopen = 1;
sk->shutdown = SHUTDOWN_MASK;
if (!sk->dead)
sk->state_change(sk);
/* We need to flush the recv. buffs. */
if (skb_peek(&sk->rqueue) != NULL)
{
struct sk_buff *skb;
if(sk->debug)
printk("Clean rcv queue\n");
while((skb=skb_dequeue(&sk->rqueue))!=NULL)
{
if(skb->len > 0 && after(skb->h.th->seq + skb->len + 1 , sk->copied_seq))
need_reset = 1;
kfree_skb(skb, FREE_READ);
}
if(sk->debug)
printk("Cleaned.\n");
}
sk->rqueue = NULL;
/* Get rid off any half-completed packets. */
if (sk->partial) {
tcp_send_partial(sk);
}
switch(sk->state) {
case TCP_FIN_WAIT1:
case TCP_FIN_WAIT2:
case TCP_LAST_ACK:
/* start a timer. */
/* original code was 4 * sk->rtt. In converting to the
* new rtt representation, we can't quite use that.
* it seems to make most sense to use the backed off value
*/
reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
if (timeout) tcp_time_wait(sk);
release_sock(sk);
return; /* break causes a double release - messy */
case TCP_TIME_WAIT:
if (timeout) {
sk->state = TCP_CLOSE;
}
release_sock(sk);
return;
case TCP_LISTEN:
sk->state = TCP_CLOSE;
release_sock(sk);
return;
case TCP_CLOSE:
release_sock(sk);
return;
case TCP_CLOSE_WAIT:
case TCP_ESTABLISHED:
case TCP_SYN_SENT:
case TCP_SYN_RECV:
prot =(struct proto *)sk->prot;
th =(struct tcphdr *)&sk->dummy_th;
buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC);
if (buff == NULL) {
/* This will force it to try again later. */
/* Or it would have if someone released the socket
first. Anyway it might work now */
release_sock(sk);
if (sk->state != TCP_CLOSE_WAIT)
sk->state = TCP_ESTABLISHED;
reset_timer(sk, TIME_CLOSE, 100);
return;
}
buff->mem_addr = buff;
buff->mem_len = MAX_FIN_SIZE;
buff->sk = sk;
buff->free = 1;
buff->len = sizeof(*t1);
t1 =(struct tcphdr *) buff->data;
/* Put in the IP header and routing stuff. */
tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
IPPROTO_TCP, sk->opt,
sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
if (tmp < 0) {
kfree_skb(buff,FREE_WRITE);
DPRINTF((DBG_TCP, "Unable to build header for fin.\n"));
release_sock(sk);
return;
}
t1 =(struct tcphdr *)((char *)t1 +tmp);
buff->len += tmp;
buff->dev = dev;
memcpy(t1, th, sizeof(*t1));
t1->seq = ntohl(sk->write_seq);
sk->write_seq++;
buff->h.seq = sk->write_seq;
t1->ack = 1;
/* Ack everything immediately from now on. */
sk->delay_acks = 0;
t1->ack_seq = ntohl(sk->acked_seq);
t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
t1->fin = 1;
t1->rst = need_reset;
t1->doff = sizeof(*t1)/4;
tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
if (sk->wfront == NULL) {
sk->sent_seq = sk->write_seq;
prot->queue_xmit(sk, dev, buff, 0);
} else {
reset_timer(sk, TIME_WRITE, sk->rto);
buff->next = NULL;
if (sk->wback == NULL) {
sk->wfront = buff;
} else {
sk->wback->next = buff;
}
sk->wback = buff;
buff->magic = TCP_WRITE_QUEUE_MAGIC;
}
if (sk->state == TCP_CLOSE_WAIT) {
sk->state = TCP_FIN_WAIT2;
} else {
sk->state = TCP_FIN_WAIT1;
}
}
release_sock(sk);
}
/*
* This routine takes stuff off of the write queue,
* and puts it in the xmit queue.
*/
static void
tcp_write_xmit(struct sock *sk)
{
struct sk_buff *skb;
DPRINTF((DBG_TCP, "tcp_write_xmit(sk=%X)\n", sk));
/* The bytes will have to remain here. In time closedown will
empty the write queue and all will be happy */
if(sk->zapped)
return;
while(sk->wfront != NULL &&
before(sk->wfront->h.seq, sk->window_seq +1) &&
(sk->retransmits == 0 ||
sk->timeout != TIME_WRITE ||
before(sk->wfront->h.seq, sk->rcv_ack_seq +1))
&& sk->packets_out < sk->cong_window) {
skb = sk->wfront;
IS_SKB(skb);
sk->wfront = skb->next;
if (sk->wfront == NULL) sk->wback = NULL;
skb->next = NULL;
if (skb->magic != TCP_WRITE_QUEUE_MAGIC) {
printk("tcp.c skb with bad magic(%X) on write queue. Squashing "
"queue\n", skb->magic);
sk->wfront = NULL;
sk->wback = NULL;
return;
}
skb->magic = 0;
DPRINTF((DBG_TCP, "Sending a packet.\n"));
/* See if we really need to send the packet. */
if (before(skb->h.seq, sk->rcv_ack_seq +1)) {
sk->retransmits = 0;
kfree_skb(skb, FREE_WRITE);
if (!sk->dead) sk->write_space(sk);
} else {
sk->sent_seq = skb->h.seq;
sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
}
}
}
/*
* This routine sorts the send list, and resets the
* sk->send_head and sk->send_tail pointers.
*/
void
sort_send(struct sock *sk)
{
struct sk_buff *list = NULL;
struct sk_buff *skb,*skb2,*skb3;
for (skb = sk->send_head; skb != NULL; skb = skb2) {
skb2 = (struct sk_buff *)skb->link3;
if (list == NULL || before (skb2->h.seq, list->h.seq)) {
skb->link3 = list;
sk->send_tail = skb;
list = skb;
} else {
for (skb3 = list; ; skb3 = (struct sk_buff *)skb3->link3) {
if (skb3->link3 == NULL ||
before(skb->h.seq, skb3->link3->h.seq)) {
skb->link3 = skb3->link3;
skb3->link3 = skb;
if (skb->link3 == NULL) sk->send_tail = skb;
break;
}
}
}
}
sk->send_head = list;
}
/* This routine deals with incoming acks, but not outgoing ones. */
static int
tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
{
unsigned long ack;
int flag = 0;
/*
* 1 - there was data in packet as well as ack or new data is sent or
* in shutdown state
* 2 - data from retransmit queue was acked and removed
* 4 - window shrunk or data from retransmit queue was acked and removed
*/
if(sk->zapped)
return(1); /* Dead, cant ack any more so why bother */
ack = ntohl(th->ack_seq);
DPRINTF((DBG_TCP, "tcp_ack ack=%d, window=%d, "
"sk->rcv_ack_seq=%d, sk->window_seq = %d\n",
ack, ntohs(th->window), sk->rcv_ack_seq, sk->window_seq));
if (ntohs(th->window) > sk->max_window) {
sk->max_window = ntohs(th->window);
sk->mss = min(sk->max_window, sk->mtu);
}
if (sk->retransmits && sk->timeout == TIME_KEEPOPEN)
sk->retransmits = 0;
/* not quite clear why the +1 and -1 here, and why not +1 in next line */
if (after(ack, sk->sent_seq+1) || before(ack, sk->rcv_ack_seq-1)) {
if (after(ack, sk->sent_seq) ||
(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) {
return(0);
}
if (sk->keepopen) {
reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
}
return(1);
}
if (len != th->doff*4) flag |= 1;
/* See if our window has been shrunk. */
if (after(sk->window_seq, ack+ntohs(th->window))) {
/*
* We may need to move packets from the send queue
* to the write queue, if the window has been shrunk on us.
* The RFC says you are not allowed to shrink your window
* like this, but if the other end does, you must be able
* to deal with it.
*/
struct sk_buff *skb;
struct sk_buff *skb2;
struct sk_buff *wskb = NULL;
skb2 = sk->send_head;
sk->send_head = NULL;
sk->send_tail = NULL;
flag |= 4;
sk->window_seq = ack + ntohs(th->window);
cli();
while (skb2 != NULL) {
skb = skb2;
skb2 = (struct sk_buff *)skb->link3;
skb->link3 = NULL;
if (after(skb->h.seq, sk->window_seq)) {
if (sk->packets_out > 0) sk->packets_out--;
/* We may need to remove this from the dev send list. */
if (skb->next != NULL) {
skb_unlink(skb);
}
/* Now add it to the write_queue. */
skb->magic = TCP_WRITE_QUEUE_MAGIC;
if (wskb == NULL) {
skb->next = sk->wfront;
sk->wfront = skb;
} else {
skb->next = wskb->next;
wskb->next = skb;
}
if (sk->wback == wskb) sk->wback = skb;
wskb = skb;
} else {
if (sk->send_head == NULL) {
sk->send_head = skb;
sk->send_tail = skb;
} else {
sk->send_tail->link3 = skb;
sk->send_tail = skb;
}
skb->link3 = NULL;
}
}
sti();
}
if (sk->send_tail == NULL || sk->send_head == NULL) {
sk->send_head = NULL;
sk->send_tail = NULL;
sk->packets_out= 0;
}
sk->window_seq = ack + ntohs(th->window);
/* We don't want too many packets out there. */
if (sk->timeout == TIME_WRITE &&
sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) {
/*
* This is Jacobson's slow start and congestion avoidance.
* SIGCOMM '88, p. 328. Because we keep cong_window in integral
* mss's, we can't do cwnd += 1 / cwnd. Instead, maintain a
* counter and increment it once every cwnd times. It's possible
* that this should be done only if sk->retransmits == 0. I'm
* interpreting "new data is acked" as including data that has
* been retransmitted but is just now being acked.
*/
if (sk->cong_window < sk->ssthresh)
/* in "safe" area, increase */
sk->cong_window++;
else {
/* in dangerous area, increase slowly. In theory this is
sk->cong_window += 1 / sk->cong_window
*/
if (sk->cong_count >= sk->cong_window) {
sk->cong_window++;
sk->cong_count = 0;
} else
sk->cong_count++;
}
}
DPRINTF((DBG_TCP, "tcp_ack: Updating rcv ack sequence.\n"));
sk->rcv_ack_seq = ack;
/*
* if this ack opens up a zero window, clear backoff. It was
* being used to time the probes, and is probably far higher than
* it needs to be for normal retransmission
*/
if (sk->timeout == TIME_PROBE0) {
if (sk->wfront != NULL && /* should always be non-null */
! before (sk->window_seq, sk->wfront->h.seq)) {
sk->retransmits = 0;
sk->backoff = 0;
/* recompute rto from rtt. this eliminates any backoff */
sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
if (sk->rto > 120*HZ)
sk->rto = 120*HZ;
if (sk->rto < 1*HZ)
sk->rto = 1*HZ;
}
}
/* See if we can take anything off of the retransmit queue. */
while(sk->send_head != NULL) {
/* Check for a bug. */
if (sk->send_head->link3 &&
after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) {
printk("INET: tcp.c: *** bug send_list out of order.\n");
sort_send(sk);
}
if (before(sk->send_head->h.seq, ack+1)) {
struct sk_buff *oskb;
if (sk->retransmits) {
/* we were retransmitting. don't count this in RTT est */
flag |= 2;
/*
* even though we've gotten an ack, we're still
* retransmitting as long as we're sending from
* the retransmit queue. Keeping retransmits non-zero
* prevents us from getting new data interspersed with
* retransmissions.
*/
if (sk->send_head->link3)
sk->retransmits = 1;
else
sk->retransmits = 0;
}
/*
* Note that we only reset backoff and rto in the
* rtt recomputation code. And that doesn't happen
* if there were retransmissions in effect. So the
* first new packet after the retransmissions is
* sent with the backoff still in effect. Not until
* we get an ack from a non-retransmitted packet do
* we reset the backoff and rto. This allows us to deal
* with a situation where the network delay has increased
* suddenly. I.e. Karn's algorithm. (SIGCOMM '87, p5.)
*/
/* We have one less packet out there. */
if (sk->packets_out > 0) sk->packets_out --;
DPRINTF((DBG_TCP, "skb=%X skb->h.seq = %d acked ack=%d\n",
sk->send_head, sk->send_head->h.seq, ack));
/* Wake up the process, it can probably write more. */
if (!sk->dead) sk->write_space(sk);
oskb = sk->send_head;
if (!(flag&2)) {
long m;
/* The following amusing code comes from Jacobson's
* article in SIGCOMM '88. Note that rtt and mdev
* are scaled versions of rtt and mean deviation.
* This is designed to be as fast as possible
* m stands for "measurement".
*/
m = jiffies - oskb->when; /* RTT */
m -= (sk->rtt >> 3); /* m is now error in rtt est */
sk->rtt += m; /* rtt = 7/8 rtt + 1/8 new */
if (m < 0)
m = -m; /* m is now abs(error) */
m -= (sk->mdev >> 2); /* similar update on mdev */
sk->mdev += m; /* mdev = 3/4 mdev + 1/4 new */
/* now update timeout. Note that this removes any backoff */
sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
if (sk->rto > 120*HZ)
sk->rto = 120*HZ;
if (sk->rto < 1*HZ)
sk->rto = 1*HZ;
sk->backoff = 0;
}
flag |= (2|4);
cli();
oskb = sk->send_head;
IS_SKB(oskb);
sk->send_head =(struct sk_buff *)oskb->link3;
if (sk->send_head == NULL) {
sk->send_tail = NULL;
}
/* We may need to remove this from the dev send list. */
skb_unlink(oskb); /* Much easier! */
sti();
oskb->magic = 0;
kfree_skb(oskb, FREE_WRITE); /* write. */
if (!sk->dead) sk->write_space(sk);
} else {
break;
}
}
/*
* Maybe we can take some stuff off of the write queue,
* and put it onto the xmit queue.
*/
if (sk->wfront != NULL) {
if (after (sk->window_seq+1, sk->wfront->h.seq) &&
(sk->retransmits == 0 ||
sk->timeout != TIME_WRITE ||
before(sk->wfront->h.seq, sk->rcv_ack_seq +1))
&& sk->packets_out < sk->cong_wi
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -