📄 tcp.c

📁 基于linux1.0内核的linux源码
💻 C
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
  DPRINTF((DBG_TCP, "tcp_close((struct sock *)%X, %d)\n",sk, timeout));
  sk->inuse = 1;
  sk->keepopen = 1;
  sk->shutdown = SHUTDOWN_MASK;

  if (!sk->dead) 
  	sk->state_change(sk);

  /* We need to flush the recv. buffs. */
  if (skb_peek(&sk->rqueue) != NULL) 
  {
	struct sk_buff *skb;
	if(sk->debug)
		printk("Clean rcv queue\n");
	while((skb=skb_dequeue(&sk->rqueue))!=NULL)
	{
		if(skb->len > 0 && after(skb->h.th->seq + skb->len + 1 , sk->copied_seq))
				need_reset = 1;
		kfree_skb(skb, FREE_READ);
	}
	if(sk->debug)
		printk("Cleaned.\n");
  }
  sk->rqueue = NULL;

  /* Get rid off any half-completed packets. */
  if (sk->partial) {
	tcp_send_partial(sk);
  }

  switch(sk->state) {
	case TCP_FIN_WAIT1:
	case TCP_FIN_WAIT2:
	case TCP_LAST_ACK:
		/* start a timer. */
                /* original code was 4 * sk->rtt.  In converting to the
		 * new rtt representation, we can't quite use that.
		 * it seems to make most sense to  use the backed off value
		 */
		reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
		if (timeout) tcp_time_wait(sk);
		release_sock(sk);
		return;	/* break causes a double release - messy */
	case TCP_TIME_WAIT:
		if (timeout) {
		  sk->state = TCP_CLOSE;
		}
		release_sock(sk);
		return;
	case TCP_LISTEN:
		sk->state = TCP_CLOSE;
		release_sock(sk);
		return;
	case TCP_CLOSE:
		release_sock(sk);
		return;
	case TCP_CLOSE_WAIT:
	case TCP_ESTABLISHED:
	case TCP_SYN_SENT:
	case TCP_SYN_RECV:
		prot =(struct proto *)sk->prot;
		th =(struct tcphdr *)&sk->dummy_th;
		buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC);
		if (buff == NULL) {
			/* This will force it to try again later. */
			/* Or it would have if someone released the socket
			   first. Anyway it might work now */
			release_sock(sk);
			if (sk->state != TCP_CLOSE_WAIT)
					sk->state = TCP_ESTABLISHED;
			reset_timer(sk, TIME_CLOSE, 100);
			return;
		}
		buff->mem_addr = buff;
		buff->mem_len = MAX_FIN_SIZE;
		buff->sk = sk;
		buff->free = 1;
		buff->len = sizeof(*t1);
		t1 =(struct tcphdr *) buff->data;

		/* Put in the IP header and routing stuff. */
		tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
					 IPPROTO_TCP, sk->opt,
				         sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
		if (tmp < 0) {
			kfree_skb(buff,FREE_WRITE);
			DPRINTF((DBG_TCP, "Unable to build header for fin.\n"));
			release_sock(sk);
			return;
		}

		t1 =(struct tcphdr *)((char *)t1 +tmp);
		buff->len += tmp;
		buff->dev = dev;
		memcpy(t1, th, sizeof(*t1));
		t1->seq = ntohl(sk->write_seq);
		sk->write_seq++;
		buff->h.seq = sk->write_seq;
		t1->ack = 1;

		/* Ack everything immediately from now on. */
		sk->delay_acks = 0;
		t1->ack_seq = ntohl(sk->acked_seq);
		t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
		t1->fin = 1;
		t1->rst = need_reset;
		t1->doff = sizeof(*t1)/4;
		tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);

		if (sk->wfront == NULL) {
			sk->sent_seq = sk->write_seq;
			prot->queue_xmit(sk, dev, buff, 0);
		} else {
			reset_timer(sk, TIME_WRITE, sk->rto);
			buff->next = NULL;
			if (sk->wback == NULL) {
				sk->wfront = buff;
			} else {
				sk->wback->next = buff;
			}
			sk->wback = buff;
			buff->magic = TCP_WRITE_QUEUE_MAGIC;
		}

		if (sk->state == TCP_CLOSE_WAIT) {
			sk->state = TCP_FIN_WAIT2;
		} else {
			sk->state = TCP_FIN_WAIT1;
	}
  }
  release_sock(sk);
}


/*
 * This routine takes stuff off of the write queue,
 * and puts it in the xmit queue.
 */
static void
tcp_write_xmit(struct sock *sk)
{
  struct sk_buff *skb;

  DPRINTF((DBG_TCP, "tcp_write_xmit(sk=%X)\n", sk));

  /* The bytes will have to remain here. In time closedown will
     empty the write queue and all will be happy */
  if(sk->zapped)
	return;

  while(sk->wfront != NULL &&
        before(sk->wfront->h.seq, sk->window_seq +1) &&
	(sk->retransmits == 0 ||
	 sk->timeout != TIME_WRITE ||
	 before(sk->wfront->h.seq, sk->rcv_ack_seq +1))
        && sk->packets_out < sk->cong_window) {
		skb = sk->wfront;
		IS_SKB(skb);
		sk->wfront = skb->next;
		if (sk->wfront == NULL) sk->wback = NULL;
		skb->next = NULL;
		if (skb->magic != TCP_WRITE_QUEUE_MAGIC) {
			printk("tcp.c skb with bad magic(%X) on write queue. Squashing "
				"queue\n", skb->magic);
			sk->wfront = NULL;
			sk->wback = NULL;
			return;
		}
		skb->magic = 0;
		DPRINTF((DBG_TCP, "Sending a packet.\n"));

		/* See if we really need to send the packet. */
		if (before(skb->h.seq, sk->rcv_ack_seq +1)) {
			sk->retransmits = 0;
			kfree_skb(skb, FREE_WRITE);
			if (!sk->dead) sk->write_space(sk);
		} else {
			sk->sent_seq = skb->h.seq;
			sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
		}
	}
}


/*
 * This routine sorts the send list, and resets the
 * sk->send_head and sk->send_tail pointers.
 */
void
sort_send(struct sock *sk)
{
  struct sk_buff *list = NULL;
  struct sk_buff *skb,*skb2,*skb3;

  for (skb = sk->send_head; skb != NULL; skb = skb2) {
	skb2 = (struct sk_buff *)skb->link3;
	if (list == NULL || before (skb2->h.seq, list->h.seq)) {
		skb->link3 = list;
		sk->send_tail = skb;
		list = skb;
	} else {
		for (skb3 = list; ; skb3 = (struct sk_buff *)skb3->link3) {
			if (skb3->link3 == NULL ||
			    before(skb->h.seq, skb3->link3->h.seq)) {
				skb->link3 = skb3->link3;
				skb3->link3 = skb;
				if (skb->link3 == NULL) sk->send_tail = skb;
				break;
			}
		}
	}
  }
  sk->send_head = list;
}
  

/* This routine deals with incoming acks, but not outgoing ones. */
static int
tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
{
  unsigned long ack;
  int flag = 0;
  /* 
   * 1 - there was data in packet as well as ack or new data is sent or 
   *     in shutdown state
   * 2 - data from retransmit queue was acked and removed
   * 4 - window shrunk or data from retransmit queue was acked and removed
   */

  if(sk->zapped)
	return(1);	/* Dead, cant ack any more so why bother */

  ack = ntohl(th->ack_seq);
  DPRINTF((DBG_TCP, "tcp_ack ack=%d, window=%d, "
	  "sk->rcv_ack_seq=%d, sk->window_seq = %d\n",
	  ack, ntohs(th->window), sk->rcv_ack_seq, sk->window_seq));

  if (ntohs(th->window) > sk->max_window) {
  	sk->max_window = ntohs(th->window);
	sk->mss = min(sk->max_window, sk->mtu);
  }

  if (sk->retransmits && sk->timeout == TIME_KEEPOPEN)
  	sk->retransmits = 0;

/* not quite clear why the +1 and -1 here, and why not +1 in next line */
  if (after(ack, sk->sent_seq+1) || before(ack, sk->rcv_ack_seq-1)) {
	if (after(ack, sk->sent_seq) ||
	   (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) {
		return(0);
	}
	if (sk->keepopen) {
		reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
	}
	return(1);
  }

  if (len != th->doff*4) flag |= 1;

  /* See if our window has been shrunk. */
  if (after(sk->window_seq, ack+ntohs(th->window))) {
	/*
	 * We may need to move packets from the send queue
	 * to the write queue, if the window has been shrunk on us.
	 * The RFC says you are not allowed to shrink your window
	 * like this, but if the other end does, you must be able
	 * to deal with it.
	 */
	struct sk_buff *skb;
	struct sk_buff *skb2;
	struct sk_buff *wskb = NULL;
  
	skb2 = sk->send_head;
	sk->send_head = NULL;
	sk->send_tail = NULL;

	flag |= 4;

	sk->window_seq = ack + ntohs(th->window);
	cli();
	while (skb2 != NULL) {
		skb = skb2;
		skb2 = (struct sk_buff *)skb->link3;
		skb->link3 = NULL;
		if (after(skb->h.seq, sk->window_seq)) {
			if (sk->packets_out > 0) sk->packets_out--;
			/* We may need to remove this from the dev send list. */
			if (skb->next != NULL) {
				skb_unlink(skb);				
			}
			/* Now add it to the write_queue. */
			skb->magic = TCP_WRITE_QUEUE_MAGIC;
			if (wskb == NULL) {
				skb->next = sk->wfront;
				sk->wfront = skb;
			} else {
				skb->next = wskb->next;
				wskb->next = skb;
			}
			if (sk->wback == wskb) sk->wback = skb;
			wskb = skb;
		} else {
			if (sk->send_head == NULL) {
				sk->send_head = skb;
				sk->send_tail = skb;
			} else {
				sk->send_tail->link3 = skb;
				sk->send_tail = skb;
			}
			skb->link3 = NULL;
		}
	}
	sti();
  }

  if (sk->send_tail == NULL || sk->send_head == NULL) {
	sk->send_head = NULL;
	sk->send_tail = NULL;
	sk->packets_out= 0;
  }

  sk->window_seq = ack + ntohs(th->window);

  /* We don't want too many packets out there. */
  if (sk->timeout == TIME_WRITE && 
      sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) {
/* 
 * This is Jacobson's slow start and congestion avoidance. 
 * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
 * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a 
 * counter and increment it once every cwnd times.  It's possible
 * that this should be done only if sk->retransmits == 0.  I'm
 * interpreting "new data is acked" as including data that has
 * been retransmitted but is just now being acked.
 */
	if (sk->cong_window < sk->ssthresh)  
	  /* in "safe" area, increase */
	  sk->cong_window++;
	else {
	  /* in dangerous area, increase slowly.  In theory this is
	     sk->cong_window += 1 / sk->cong_window
	   */
	  if (sk->cong_count >= sk->cong_window) {
	    sk->cong_window++;
	    sk->cong_count = 0;
	  } else 
	    sk->cong_count++;
	}
  }

  DPRINTF((DBG_TCP, "tcp_ack: Updating rcv ack sequence.\n"));
  sk->rcv_ack_seq = ack;

  /*
   * if this ack opens up a zero window, clear backoff.  It was
   * being used to time the probes, and is probably far higher than
   * it needs to be for normal retransmission
   */
  if (sk->timeout == TIME_PROBE0) {
  	if (sk->wfront != NULL &&   /* should always be non-null */
	    ! before (sk->window_seq, sk->wfront->h.seq)) {
	  sk->retransmits = 0;
	  sk->backoff = 0;
	  /* recompute rto from rtt.  this eliminates any backoff */
	  sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
	  if (sk->rto > 120*HZ)
	    sk->rto = 120*HZ;
	  if (sk->rto < 1*HZ)
	    sk->rto = 1*HZ;
	}
  }

  /* See if we can take anything off of the retransmit queue. */
  while(sk->send_head != NULL) {
	/* Check for a bug. */
	if (sk->send_head->link3 &&
	    after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) {
		printk("INET: tcp.c: *** bug send_list out of order.\n");
		sort_send(sk);
	}

	if (before(sk->send_head->h.seq, ack+1)) {
		struct sk_buff *oskb;

		if (sk->retransmits) {

		  /* we were retransmitting.  don't count this in RTT est */
		  flag |= 2;

		  /*
		   * even though we've gotten an ack, we're still
		   * retransmitting as long as we're sending from
		   * the retransmit queue.  Keeping retransmits non-zero
		   * prevents us from getting new data interspersed with
		   * retransmissions.
		   */

		  if (sk->send_head->link3)
		    sk->retransmits = 1;
		  else
		    sk->retransmits = 0;

		}

  		/*
		 * Note that we only reset backoff and rto in the
		 * rtt recomputation code.  And that doesn't happen
		 * if there were retransmissions in effect.  So the
		 * first new packet after the retransmissions is
		 * sent with the backoff still in effect.  Not until
		 * we get an ack from a non-retransmitted packet do
		 * we reset the backoff and rto.  This allows us to deal
		 * with a situation where the network delay has increased
		 * suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
		 */

		/* We have one less packet out there. */
		if (sk->packets_out > 0) sk->packets_out --;
		DPRINTF((DBG_TCP, "skb=%X skb->h.seq = %d acked ack=%d\n",
				sk->send_head, sk->send_head->h.seq, ack));

		/* Wake up the process, it can probably write more. */
		if (!sk->dead) sk->write_space(sk);

		oskb = sk->send_head;

		if (!(flag&2)) {
		  long m;

		  /* The following amusing code comes from Jacobson's
		   * article in SIGCOMM '88.  Note that rtt and mdev
		   * are scaled versions of rtt and mean deviation.
		   * This is designed to be as fast as possible 
		   * m stands for "measurement".
		   */

		  m = jiffies - oskb->when;  /* RTT */
		  m -= (sk->rtt >> 3);       /* m is now error in rtt est */
		  sk->rtt += m;              /* rtt = 7/8 rtt + 1/8 new */
		  if (m < 0)
		    m = -m;		     /* m is now abs(error) */
		  m -= (sk->mdev >> 2);      /* similar update on mdev */
		  sk->mdev += m;	     /* mdev = 3/4 mdev + 1/4 new */

		  /* now update timeout.  Note that this removes any backoff */
		  sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
		  if (sk->rto > 120*HZ)
		    sk->rto = 120*HZ;
		  if (sk->rto < 1*HZ)
		    sk->rto = 1*HZ;
		  sk->backoff = 0;

		}
		flag |= (2|4);

		cli();

		oskb = sk->send_head;
		IS_SKB(oskb);
		sk->send_head =(struct sk_buff *)oskb->link3;
		if (sk->send_head == NULL) {
			sk->send_tail = NULL;
		}

		/* We may need to remove this from the dev send list. */		
		skb_unlink(oskb);	/* Much easier! */
		sti();
		oskb->magic = 0;
		kfree_skb(oskb, FREE_WRITE); /* write. */
		if (!sk->dead) sk->write_space(sk);
	} else {
		break;
	}
  }

  /*
   * Maybe we can take some stuff off of the write queue,
   * and put it onto the xmit queue.
   */
  if (sk->wfront != NULL) {
	if (after (sk->window_seq+1, sk->wfront->h.seq) &&
	        (sk->retransmits == 0 || 
		 sk->timeout != TIME_WRITE ||
		 before(sk->wfront->h.seq, sk->rcv_ack_seq +1))
		&& sk->packets_out < sk->cong_wi
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -