📄 tcp.c
字号:
prot =(struct proto *)sk->prot; th =(struct tcphdr *)&sk->dummy_th; buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC); if (buff == NULL) { /* This will force it to try again later. */ /* Or it would have if someone released the socket first. Anyway it might work now */ release_sock(sk); if (sk->state != TCP_CLOSE_WAIT) sk->state = TCP_ESTABLISHED; reset_timer(sk, TIME_CLOSE, 100); return; } buff->mem_addr = buff; buff->mem_len = MAX_FIN_SIZE; buff->sk = sk; buff->free = 1; buff->len = sizeof(*t1); t1 =(struct tcphdr *) buff->data; /* Put in the IP header and routing stuff. */ tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev, IPPROTO_TCP, sk->opt, sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl); if (tmp < 0) { kfree_skb(buff,FREE_WRITE); DPRINTF((DBG_TCP, "Unable to build header for fin.\n")); release_sock(sk); return; } t1 =(struct tcphdr *)((char *)t1 +tmp); buff->len += tmp; buff->dev = dev; memcpy(t1, th, sizeof(*t1)); t1->seq = ntohl(sk->write_seq); sk->write_seq++; buff->h.seq = sk->write_seq; t1->ack = 1; /* Ack everything immediately from now on. */ sk->delay_acks = 0; t1->ack_seq = ntohl(sk->acked_seq); t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/); t1->fin = 1; t1->rst = need_reset; t1->doff = sizeof(*t1)/4; tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk); if (sk->wfront == NULL) { sk->sent_seq = sk->write_seq; prot->queue_xmit(sk, dev, buff, 0); } else { reset_timer(sk, TIME_WRITE, sk->rto); buff->next = NULL; if (sk->wback == NULL) { sk->wfront = buff; } else { sk->wback->next = buff; } sk->wback = buff; buff->magic = TCP_WRITE_QUEUE_MAGIC; } if (sk->state == TCP_CLOSE_WAIT) { sk->state = TCP_FIN_WAIT2; } else { sk->state = TCP_FIN_WAIT1; } } release_sock(sk);}/* * This routine takes stuff off of the write queue, * and puts it in the xmit queue. */static voidtcp_write_xmit(struct sock *sk){ struct sk_buff *skb; DPRINTF((DBG_TCP, "tcp_write_xmit(sk=%X)\n", sk)); /* The bytes will have to remain here. In time closedown will empty the write queue and all will be happy */ if(sk->zapped) return; while(sk->wfront != NULL && before(sk->wfront->h.seq, sk->window_seq +1) && (sk->retransmits == 0 || sk->timeout != TIME_WRITE || before(sk->wfront->h.seq, sk->rcv_ack_seq +1)) && sk->packets_out < sk->cong_window) { skb = sk->wfront; IS_SKB(skb); sk->wfront = skb->next; if (sk->wfront == NULL) sk->wback = NULL; skb->next = NULL; if (skb->magic != TCP_WRITE_QUEUE_MAGIC) { printk("tcp.c skb with bad magic(%X) on write queue. Squashing " "queue\n", skb->magic); sk->wfront = NULL; sk->wback = NULL; return; } skb->magic = 0; DPRINTF((DBG_TCP, "Sending a packet.\n")); /* See if we really need to send the packet. */ if (before(skb->h.seq, sk->rcv_ack_seq +1)) { sk->retransmits = 0; kfree_skb(skb, FREE_WRITE); if (!sk->dead) sk->write_space(sk); } else { sk->sent_seq = skb->h.seq; sk->prot->queue_xmit(sk, skb->dev, skb, skb->free); } }}/* * This routine sorts the send list, and resets the * sk->send_head and sk->send_tail pointers. */voidsort_send(struct sock *sk){ struct sk_buff *list = NULL; struct sk_buff *skb,*skb2,*skb3; for (skb = sk->send_head; skb != NULL; skb = skb2) { skb2 = (struct sk_buff *)skb->link3; if (list == NULL || before (skb2->h.seq, list->h.seq)) { skb->link3 = list; sk->send_tail = skb; list = skb; } else { for (skb3 = list; ; skb3 = (struct sk_buff *)skb3->link3) { if (skb3->link3 == NULL || before(skb->h.seq, skb3->link3->h.seq)) { skb->link3 = skb3->link3; skb3->link3 = skb; if (skb->link3 == NULL) sk->send_tail = skb; break; } } } } sk->send_head = list;} /* This routine deals with incoming acks, but not outgoing ones. */static inttcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len){ unsigned long ack; int flag = 0; /* * 1 - there was data in packet as well as ack or new data is sent or * in shutdown state * 2 - data from retransmit queue was acked and removed * 4 - window shrunk or data from retransmit queue was acked and removed */ if(sk->zapped) return(1); /* Dead, cant ack any more so why bother */ ack = ntohl(th->ack_seq); DPRINTF((DBG_TCP, "tcp_ack ack=%d, window=%d, " "sk->rcv_ack_seq=%d, sk->window_seq = %d\n", ack, ntohs(th->window), sk->rcv_ack_seq, sk->window_seq)); if (ntohs(th->window) > sk->max_window) { sk->max_window = ntohs(th->window); sk->mss = min(sk->max_window, sk->mtu); } if (sk->retransmits && sk->timeout == TIME_KEEPOPEN) sk->retransmits = 0;/* not quite clear why the +1 and -1 here, and why not +1 in next line */ if (after(ack, sk->sent_seq+1) || before(ack, sk->rcv_ack_seq-1)) { if (after(ack, sk->sent_seq) || (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) { return(0); } if (sk->keepopen) { reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN); } return(1); } if (len != th->doff*4) flag |= 1; /* See if our window has been shrunk. */ if (after(sk->window_seq, ack+ntohs(th->window))) { /* * We may need to move packets from the send queue * to the write queue, if the window has been shrunk on us. * The RFC says you are not allowed to shrink your window * like this, but if the other end does, you must be able * to deal with it. */ struct sk_buff *skb; struct sk_buff *skb2; struct sk_buff *wskb = NULL; skb2 = sk->send_head; sk->send_head = NULL; sk->send_tail = NULL; flag |= 4; sk->window_seq = ack + ntohs(th->window); cli(); while (skb2 != NULL) { skb = skb2; skb2 = (struct sk_buff *)skb->link3; skb->link3 = NULL; if (after(skb->h.seq, sk->window_seq)) { if (sk->packets_out > 0) sk->packets_out--; /* We may need to remove this from the dev send list. */ if (skb->next != NULL) { skb_unlink(skb); } /* Now add it to the write_queue. */ skb->magic = TCP_WRITE_QUEUE_MAGIC; if (wskb == NULL) { skb->next = sk->wfront; sk->wfront = skb; } else { skb->next = wskb->next; wskb->next = skb; } if (sk->wback == wskb) sk->wback = skb; wskb = skb; } else { if (sk->send_head == NULL) { sk->send_head = skb; sk->send_tail = skb; } else { sk->send_tail->link3 = skb; sk->send_tail = skb; } skb->link3 = NULL; } } sti(); } if (sk->send_tail == NULL || sk->send_head == NULL) { sk->send_head = NULL; sk->send_tail = NULL; sk->packets_out= 0; } sk->window_seq = ack + ntohs(th->window); /* We don't want too many packets out there. */ if (sk->timeout == TIME_WRITE && sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) {/* * This is Jacobson's slow start and congestion avoidance. * SIGCOMM '88, p. 328. Because we keep cong_window in integral * mss's, we can't do cwnd += 1 / cwnd. Instead, maintain a * counter and increment it once every cwnd times. It's possible * that this should be done only if sk->retransmits == 0. I'm * interpreting "new data is acked" as including data that has * been retransmitted but is just now being acked. */ if (sk->cong_window < sk->ssthresh) /* in "safe" area, increase */ sk->cong_window++; else { /* in dangerous area, increase slowly. In theory this is sk->cong_window += 1 / sk->cong_window */ if (sk->cong_count >= sk->cong_window) { sk->cong_window++; sk->cong_count = 0; } else sk->cong_count++; } } DPRINTF((DBG_TCP, "tcp_ack: Updating rcv ack sequence.\n")); sk->rcv_ack_seq = ack; /* * if this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission */ if (sk->timeout == TIME_PROBE0) { if (sk->wfront != NULL && /* should always be non-null */ ! before (sk->window_seq, sk->wfront->h.seq)) { sk->retransmits = 0; sk->backoff = 0; /* recompute rto from rtt. this eliminates any backoff */ sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1; if (sk->rto > 120*HZ) sk->rto = 120*HZ; if (sk->rto < 1*HZ) sk->rto = 1*HZ; } } /* See if we can take anything off of the retransmit queue. */ while(sk->send_head != NULL) { /* Check for a bug. */ if (sk->send_head->link3 && after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) { printk("INET: tcp.c: *** bug send_list out of order.\n"); sort_send(sk); } if (before(sk->send_head->h.seq, ack+1)) { struct sk_buff *oskb; if (sk->retransmits) { /* we were retransmitting. don't count this in RTT est */ flag |= 2; /* * even though we've gotten an ack, we're still * retransmitting as long as we're sending from * the retransmit queue. Keeping retransmits non-zero * prevents us from getting new data interspersed with * retransmissions. */ if (sk->send_head->link3) sk->retransmits = 1; else sk->retransmits = 0; } /* * Note that we only reset backoff and rto in the * rtt recomputation code. And that doesn't happen * if there were retransmissions in effect. So the * first new packet after the retransmissions is * sent with the backoff still in effect. Not until * we get an ack from a non-retransmitted packet do * we reset the backoff and rto. This allows us to deal * with a situation where the network delay has increased * suddenly. I.e. Karn's algorithm. (SIGCOMM '87, p5.) */ /* We have one less packet out there. */ if (sk->packets_out > 0) sk->packets_out --; DPRINTF((DBG_TCP, "skb=%X skb->h.seq = %d acked ack=%d\n", sk->send_head, sk->send_head->h.seq, ack)); /* Wake up the process, it can probably write more. */ if (!sk->dead) sk->write_space(sk); oskb = sk->send_head; if (!(flag&2)) { long m; /* The following amusing code comes from Jacobson's * article in SIGCOMM '88. Note that rtt and mdev * are scaled versions of rtt and mean deviation. * This is designed to be as fast as possible * m stands for "measurement". */ m = jiffies - oskb->when; /* RTT */ m -= (sk->rtt >> 3); /* m is now error in rtt est */ sk->rtt += m; /* rtt = 7/8 rtt + 1/8 new */ if (m < 0) m = -m; /* m is now abs(error) */ m -= (sk->mdev >> 2); /* similar update on mdev */ sk->mdev += m; /* mdev = 3/4 mdev + 1/4 new */ /* now update timeout. Note that this removes any backoff */ sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1; if (sk->rto > 120*HZ) sk->rto = 120*HZ; if (sk->rto < 1*HZ) sk->rto = 1*HZ; sk->backoff = 0; } flag |= (2|4); cli(); oskb = sk->send_head; IS_SKB(oskb); sk->send_head =(struct sk_buff *)oskb->link3; if (sk->send_head == NULL) { sk->send_tail = NULL; } /* We may need to remove this from the dev send list. */ skb_unlink(oskb); /* Much easier! */ sti(); oskb->magic = 0; kfree_skb(oskb, FREE_WRITE); /* write. */ if (!sk->dead) sk->write_space(sk); } else { break; } } /* * Maybe we can take some stuff off of the write queue, * and put it onto the xmit queue. */ if (sk->wfront != NULL) { if (after (sk->window_seq+1, sk->wfront->h.seq) && (sk->retransmits == 0 || sk->timeout != TIME_WRITE || before(sk->wfront->h.seq, sk->rcv_ack_seq +1)) && sk->packets_out < sk->cong_window) { flag |= 1; tcp_write_xmit(sk); } else if (before(sk->window_seq, sk->wfront->h.seq) && sk->send_head == NULL && sk->ack_backlog == 0 && sk->state != TCP_TIME_WAIT) { reset_timer(sk, TIME_PROBE0, sk->rto); } } else { if (sk->send_head == NULL && sk->ack_backlog == 0 && sk->state != TCP_TIME_WAIT && !sk->keepopen) { DPRINTF((DBG_TCP, "Nothing to do, going to sleep.\n")); if (!sk->dead) sk->write_space(sk); if (sk->keepopen) reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN); else delete_timer(sk); } else { if (sk->state != (unsigned char) sk->keepopen) { reset_timer(sk, TIME_WRITE, sk->rto); } if (sk->state == TCP_TIME_WAIT) { reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); } } } if (sk->packets_out == 0 && sk->partial != NULL && sk->wfront == NULL && sk->send_head == NULL) { flag |= 1; tcp_send_partial(sk); } /* See if we are done. */ if (sk->state == TCP_TIME_WAIT) { if (!sk->dead) sk->state_change(sk); if (sk->rcv_ack_seq == sk->write_seq && sk->acked_seq == sk->fin_seq) { flag |= 1; sk->state = TCP_CLOSE; sk->shutdown = SHUTDOWN_MASK; } } if (sk->state == TCP_LAST_ACK || sk->state == TCP_FIN_WAIT2) { if (!sk->dead) sk->state_change(sk); if (sk->rcv_ack_seq == sk->write_seq) { flag |= 1; if (sk->acked_seq != sk->fin_seq) { tcp_time_wait(sk); } else { DPRINTF((DBG_TCP, "tcp_ack closing socket - %X\n", sk)); tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, sk->daddr); sk->shutdown = SHUTDOWN_MASK; sk->state = TCP_CLOSE; } } }/* * I make no guarantees about the first clause in the following * test, i.e. "(!flag) || (flag&4)". I'm not entirely sure under * what conditions "!flag" would be true. However I think the rest * of the conditions would prevent that from causing any * unnecessary retransmission. * Clearly if the first packet has expired it should be * retransmitted. The other alternative, "flag&2 && retransmits", is * harder to explain: You have to look carefully at how and when the * timer is set and with what timeout. The most recent transmission always * sets the timer. So in general if the most recent thing has timed * out, everything before it has as well. So we want to go ahead and * retransmit some more. If we didn't explicitly test for this * condition with "flag&2 && retransmits", chances
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -