📄 tcp_output.c

📁 linux 内核源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
		return;	tcp_verify_left_out(tp);	/* Don't muck with the congestion window here.	 * Reason is that we do not increase amount of _data_	 * in network, but units changed and effective	 * cwnd/ssthresh really reduced now.	 */	if (icsk->icsk_ca_state != TCP_CA_Loss) {		tp->high_seq = tp->snd_nxt;		tp->snd_ssthresh = tcp_current_ssthresh(sk);		tp->prior_ssthresh = 0;		tp->undo_marker = 0;		tcp_set_ca_state(sk, TCP_CA_Loss);	}	tcp_xmit_retransmit_queue(sk);}/* This retransmits one SKB.  Policy decisions and retransmit queue * state updates are done by the caller.  Returns non-zero if an * error occurred which prevented the send. */int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb){	struct tcp_sock *tp = tcp_sk(sk);	struct inet_connection_sock *icsk = inet_csk(sk);	unsigned int cur_mss = tcp_current_mss(sk, 0);	int err;	/* Inconslusive MTU probe */	if (icsk->icsk_mtup.probe_size) {		icsk->icsk_mtup.probe_size = 0;	}	/* Do not sent more than we queued. 1/4 is reserved for possible	 * copying overhead: fragmentation, tunneling, mangling etc.	 */	if (atomic_read(&sk->sk_wmem_alloc) >	    min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))		return -EAGAIN;	if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {		if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))			BUG();		if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))			return -ENOMEM;	}	/* If receiver has shrunk his window, and skb is out of	 * new window, do not retransmit it. The exception is the	 * case, when window is shrunk to zero. In this case	 * our retransmit serves as a zero window probe.	 */	if (!before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)	    && TCP_SKB_CB(skb)->seq != tp->snd_una)		return -EAGAIN;	if (skb->len > cur_mss) {		if (tcp_fragment(sk, skb, cur_mss, cur_mss))			return -ENOMEM; /* We'll try again later. */	}	/* Collapse two adjacent packets if worthwhile and we can. */	if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&	    (skb->len < (cur_mss >> 1)) &&	    (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) &&	    (!tcp_skb_is_last(sk, skb)) &&	    (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) &&	    (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) &&	    (sysctl_tcp_retrans_collapse != 0))		tcp_retrans_try_collapse(sk, skb, cur_mss);	if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))		return -EHOSTUNREACH; /* Routing failure or similar. */	/* Some Solaris stacks overoptimize and ignore the FIN on a	 * retransmit when old data is attached.  So strip it off	 * since it is cheap to do so and saves bytes on the network.	 */	if (skb->len > 0 &&	    (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&	    tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {		if (!pskb_trim(skb, 0)) {			TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1;			skb_shinfo(skb)->gso_segs = 1;			skb_shinfo(skb)->gso_size = 0;			skb_shinfo(skb)->gso_type = 0;			skb->ip_summed = CHECKSUM_NONE;			skb->csum = 0;		}	}	/* Make a copy, if the first transmission SKB clone we made	 * is still in somebody's hands, else make a clone.	 */	TCP_SKB_CB(skb)->when = tcp_time_stamp;	err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);	if (err == 0) {		/* Update global TCP statistics. */		TCP_INC_STATS(TCP_MIB_RETRANSSEGS);		tp->total_retrans++;#if FASTRETRANS_DEBUG > 0		if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {			if (net_ratelimit())				printk(KERN_DEBUG "retrans_out leaked.\n");		}#endif		if (!tp->retrans_out)			tp->lost_retrans_low = tp->snd_nxt;		TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;		tp->retrans_out += tcp_skb_pcount(skb);		/* Save stamp of the first retransmit. */		if (!tp->retrans_stamp)			tp->retrans_stamp = TCP_SKB_CB(skb)->when;		tp->undo_retrans++;		/* snd_nxt is stored to detect loss of retransmitted segment,		 * see tcp_input.c tcp_sacktag_write_queue().		 */		TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;	}	return err;}/* This gets called after a retransmit timeout, and the initially * retransmitted data is acknowledged.  It tries to continue * resending the rest of the retransmit queue, until either * we've sent it all or the congestion window limit is reached. * If doing SACK, the first ACK which comes back for a timeout * based retransmit packet might feed us FACK information again. * If so, we use it to avoid unnecessarily retransmissions. */void tcp_xmit_retransmit_queue(struct sock *sk){	const struct inet_connection_sock *icsk = inet_csk(sk);	struct tcp_sock *tp = tcp_sk(sk);	struct sk_buff *skb;	int packet_cnt;	if (tp->retransmit_skb_hint) {		skb = tp->retransmit_skb_hint;		packet_cnt = tp->retransmit_cnt_hint;	}else{		skb = tcp_write_queue_head(sk);		packet_cnt = 0;	}	/* First pass: retransmit lost packets. */	if (tp->lost_out) {		tcp_for_write_queue_from(skb, sk) {			__u8 sacked = TCP_SKB_CB(skb)->sacked;			if (skb == tcp_send_head(sk))				break;			/* we could do better than to assign each time */			tp->retransmit_skb_hint = skb;			tp->retransmit_cnt_hint = packet_cnt;			/* Assume this retransmit will generate			 * only one packet for congestion window			 * calculation purposes.  This works because			 * tcp_retransmit_skb() will chop up the			 * packet to be MSS sized and all the			 * packet counting works out.			 */			if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)				return;			if (sacked & TCPCB_LOST) {				if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {					if (tcp_retransmit_skb(sk, skb)) {						tp->retransmit_skb_hint = NULL;						return;					}					if (icsk->icsk_ca_state != TCP_CA_Loss)						NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS);					else						NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS);					if (skb == tcp_write_queue_head(sk))						inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,									  inet_csk(sk)->icsk_rto,									  TCP_RTO_MAX);				}				packet_cnt += tcp_skb_pcount(skb);				if (packet_cnt >= tp->lost_out)					break;			}		}	}	/* OK, demanded retransmission is finished. */	/* Forward retransmissions are possible only during Recovery. */	if (icsk->icsk_ca_state != TCP_CA_Recovery)		return;	/* No forward retransmissions in Reno are possible. */	if (tcp_is_reno(tp))		return;	/* Yeah, we have to make difficult choice between forward transmission	 * and retransmission... Both ways have their merits...	 *	 * For now we do not retransmit anything, while we have some new	 * segments to send. In the other cases, follow rule 3 for	 * NextSeg() specified in RFC3517.	 */	if (tcp_may_send_now(sk))		return;	/* If nothing is SACKed, highest_sack in the loop won't be valid */	if (!tp->sacked_out)		return;	if (tp->forward_skb_hint)		skb = tp->forward_skb_hint;	else		skb = tcp_write_queue_head(sk);	tcp_for_write_queue_from(skb, sk) {		if (skb == tcp_send_head(sk))			break;		tp->forward_skb_hint = skb;		if (after(TCP_SKB_CB(skb)->seq, tp->highest_sack))			break;		if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)			break;		if (TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS)			continue;		/* Ok, retransmit it. */		if (tcp_retransmit_skb(sk, skb)) {			tp->forward_skb_hint = NULL;			break;		}		if (skb == tcp_write_queue_head(sk))			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,						  inet_csk(sk)->icsk_rto,						  TCP_RTO_MAX);		NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS);	}}/* Send a fin.  The caller locks the socket for us.  This cannot be * allowed to fail queueing a FIN frame under any circumstances. */void tcp_send_fin(struct sock *sk){	struct tcp_sock *tp = tcp_sk(sk);	struct sk_buff *skb = tcp_write_queue_tail(sk);	int mss_now;	/* Optimization, tack on the FIN if we have a queue of	 * unsent frames.  But be careful about outgoing SACKS	 * and IP options.	 */	mss_now = tcp_current_mss(sk, 1);	if (tcp_send_head(sk) != NULL) {		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;		TCP_SKB_CB(skb)->end_seq++;		tp->write_seq++;	} else {		/* Socket is locked, keep trying until memory is available. */		for (;;) {			skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_KERNEL);			if (skb)				break;			yield();		}		/* Reserve space for headers and prepare control bits. */		skb_reserve(skb, MAX_TCP_HEADER);		skb->csum = 0;		TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);		TCP_SKB_CB(skb)->sacked = 0;		skb_shinfo(skb)->gso_segs = 1;		skb_shinfo(skb)->gso_size = 0;		skb_shinfo(skb)->gso_type = 0;		/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */		TCP_SKB_CB(skb)->seq = tp->write_seq;		TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;		tcp_queue_skb(sk, skb);	}	__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);}/* We get here when a process closes a file descriptor (either due to * an explicit close() or as a byproduct of exit()'ing) and there * was unread data in the receive queue.  This behavior is recommended * by RFC 2525, section 2.17.  -DaveM */void tcp_send_active_reset(struct sock *sk, gfp_t priority){	struct sk_buff *skb;	/* NOTE: No TCP options attached and we never retransmit this. */	skb = alloc_skb(MAX_TCP_HEADER, priority);	if (!skb) {		NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);		return;	}	/* Reserve space for headers and prepare control bits. */	skb_reserve(skb, MAX_TCP_HEADER);	skb->csum = 0;	TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST);	TCP_SKB_CB(skb)->sacked = 0;	skb_shinfo(skb)->gso_segs = 1;	skb_shinfo(skb)->gso_size = 0;	skb_shinfo(skb)->gso_type = 0;	/* Send it off. */	TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk);	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;	TCP_SKB_CB(skb)->when = tcp_time_stamp;	if (tcp_transmit_skb(sk, skb, 0, priority))		NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);}/* WARNING: This routine must only be called when we have already sent * a SYN packet that crossed the incoming SYN that caused this routine * to get called. If this assumption fails then the initial rcv_wnd * and rcv_wscale values will not be correct. */int tcp_send_synack(struct sock *sk){	struct sk_buff* skb;	skb = tcp_write_queue_head(sk);	if (skb == NULL || !(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_SYN)) {		printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");		return -EFAULT;	}	if (!(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_ACK)) {		if (skb_cloned(skb)) {			struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);			if (nskb == NULL)				return -ENOMEM;			tcp_unlink_write_queue(skb, sk);			skb_header_release(nskb);			__tcp_add_write_queue_head(sk, nskb);			sk_stream_free_skb(sk, skb);			sk_charge_skb(sk, nskb);			skb = nskb;		}		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ACK;		TCP_ECN_send_synack(tcp_sk(sk), skb);	}	TCP_SKB_CB(skb)->when = tcp_time_stamp;	return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);}/* * Prepare a SYN-ACK. */struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,				 struct request_sock *req){	struct inet_request_sock *ireq = inet_rsk(req);	struct tcp_sock *tp = tcp_sk(sk);	struct tcphdr *th;	int tcp_header_size;	struct sk_buff *skb;#ifdef CONFIG_TCP_MD5SIG	struct tcp_md5sig_key *md5;	__u8 *md5_hash_location;#endif	skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);	if (skb == NULL)		return NULL;	/* Reserve space for headers. */	skb_reserve(skb, MAX_TCP_HEADER);	skb->dst = dst_clone(dst);	tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS +			   (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) +			   (ireq->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) +			   /* SACK_PERM is in the place of NOP NOP of TS */			   ((ireq->sack_ok && !ireq->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0));#ifdef CONFIG_TCP_MD5SIG	/* Are we doing MD5 on this segment? If so - make room for it */	md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);	if (md5)		tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;#endif	skb_push(skb, tcp_header_size);	skb_reset_transport_header(skb);	th = tcp_hdr(skb);	memset(th, 0, sizeof(struct tcphdr));	th->syn = 1;	th->ack = 1;	TCP_ECN_make_synack(req, th);	th->source = inet_sk(sk)->sport;	th->dest = ireq->rmt_port;	TCP_SKB_CB(skb)->seq = tcp_rsk(req)->snt_isn;	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;	TCP_SKB_CB(skb)->sacked = 0;	skb_shinfo(skb)->gso_segs = 1;	skb_shinfo(skb)->gso_size = 0;	skb_shinfo(skb)->gso_type = 0;	th->seq = htonl(TCP_SKB_CB(skb)->seq);	th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);	if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */		__u8 rcv_wscale;		/* Set this up on the first call only */		req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);		/* tcp_full_space because it is guaranteed to be the first packet */		tcp_select_initial_window(tcp_full_space(sk),			dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),			&req->rcv_wnd,			&req->window_clamp,			ireq->wscale_ok,			&rcv_wscale);		ireq->rcv_wscale = rcv_wscale;	}	/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */	th->window = htons(min(req->rcv_wnd, 65535U));	TCP_SKB_CB(skb)->when = tcp_time_stamp;	tcp_syn_build_options((__be32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok,			      ireq->sack_ok, ireq->wscale_ok, ireq->
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -