📄 tcp_output.c

📁 linux 内核源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
}/* This function synchronize snd mss to current pmtu/exthdr set.   tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts   for TCP options, but includes only bare TCP header.   tp->rx_opt.mss_clamp is mss negotiated at connection setup.   It is minimum of user_mss and mss received with SYN.   It also does not include TCP options.   inet_csk(sk)->icsk_pmtu_cookie is last pmtu, seen by this function.   tp->mss_cache is current effective sending mss, including   all tcp options except for SACKs. It is evaluated,   taking into account current pmtu, but never exceeds   tp->rx_opt.mss_clamp.   NOTE1. rfc1122 clearly states that advertised MSS   DOES NOT include either tcp or ip options.   NOTE2. inet_csk(sk)->icsk_pmtu_cookie and tp->mss_cache   are READ ONLY outside this function.		--ANK (980731) */unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu){	struct tcp_sock *tp = tcp_sk(sk);	struct inet_connection_sock *icsk = inet_csk(sk);	int mss_now;	if (icsk->icsk_mtup.search_high > pmtu)		icsk->icsk_mtup.search_high = pmtu;	mss_now = tcp_mtu_to_mss(sk, pmtu);	/* Bound mss with half of window */	if (tp->max_window && mss_now > (tp->max_window>>1))		mss_now = max((tp->max_window>>1), 68U - tp->tcp_header_len);	/* And store cached results */	icsk->icsk_pmtu_cookie = pmtu;	if (icsk->icsk_mtup.enabled)		mss_now = min(mss_now, tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low));	tp->mss_cache = mss_now;	return mss_now;}/* Compute the current effective MSS, taking SACKs and IP options, * and even PMTU discovery events into account. * * LARGESEND note: !urg_mode is overkill, only frames up to snd_up * cannot be large. However, taking into account rare use of URG, this * is not a big flaw. */unsigned int tcp_current_mss(struct sock *sk, int large_allowed){	struct tcp_sock *tp = tcp_sk(sk);	struct dst_entry *dst = __sk_dst_get(sk);	u32 mss_now;	u16 xmit_size_goal;	int doing_tso = 0;	mss_now = tp->mss_cache;	if (large_allowed && sk_can_gso(sk) && !tp->urg_mode)		doing_tso = 1;	if (dst) {		u32 mtu = dst_mtu(dst);		if (mtu != inet_csk(sk)->icsk_pmtu_cookie)			mss_now = tcp_sync_mss(sk, mtu);	}	if (tp->rx_opt.eff_sacks)		mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +			    (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));#ifdef CONFIG_TCP_MD5SIG	if (tp->af_specific->md5_lookup(sk, sk))		mss_now -= TCPOLEN_MD5SIG_ALIGNED;#endif	xmit_size_goal = mss_now;	if (doing_tso) {		xmit_size_goal = (65535 -				  inet_csk(sk)->icsk_af_ops->net_header_len -				  inet_csk(sk)->icsk_ext_hdr_len -				  tp->tcp_header_len);		if (tp->max_window &&		    (xmit_size_goal > (tp->max_window >> 1)))			xmit_size_goal = max((tp->max_window >> 1),					     68U - tp->tcp_header_len);		xmit_size_goal -= (xmit_size_goal % mss_now);	}	tp->xmit_size_goal = xmit_size_goal;	return mss_now;}/* Congestion window validation. (RFC2861) */static void tcp_cwnd_validate(struct sock *sk){	struct tcp_sock *tp = tcp_sk(sk);	__u32 packets_out = tp->packets_out;	if (packets_out >= tp->snd_cwnd) {		/* Network is feed fully. */		tp->snd_cwnd_used = 0;		tp->snd_cwnd_stamp = tcp_time_stamp;	} else {		/* Network starves. */		if (tp->packets_out > tp->snd_cwnd_used)			tp->snd_cwnd_used = tp->packets_out;		if (sysctl_tcp_slow_start_after_idle &&		    (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)			tcp_cwnd_application_limited(sk);	}}static unsigned int tcp_window_allows(struct tcp_sock *tp, struct sk_buff *skb, unsigned int mss_now, unsigned int cwnd){	u32 window, cwnd_len;	window = (tp->snd_una + tp->snd_wnd - TCP_SKB_CB(skb)->seq);	cwnd_len = mss_now * cwnd;	return min(window, cwnd_len);}/* Can at least one segment of SKB be sent right now, according to the * congestion window rules?  If so, return how many segments are allowed. */static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *skb){	u32 in_flight, cwnd;	/* Don't be strict about the congestion window for the final FIN.  */	if ((TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&	    tcp_skb_pcount(skb) == 1)		return 1;	in_flight = tcp_packets_in_flight(tp);	cwnd = tp->snd_cwnd;	if (in_flight < cwnd)		return (cwnd - in_flight);	return 0;}/* This must be invoked the first time we consider transmitting * SKB onto the wire. */static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now){	int tso_segs = tcp_skb_pcount(skb);	if (!tso_segs ||	    (tso_segs > 1 &&	     tcp_skb_mss(skb) != mss_now)) {		tcp_set_skb_tso_segs(sk, skb, mss_now);		tso_segs = tcp_skb_pcount(skb);	}	return tso_segs;}static inline int tcp_minshall_check(const struct tcp_sock *tp){	return after(tp->snd_sml,tp->snd_una) &&		!after(tp->snd_sml, tp->snd_nxt);}/* Return 0, if packet can be sent now without violation Nagle's rules: * 1. It is full sized. * 2. Or it contains FIN. (already checked by caller) * 3. Or TCP_NODELAY was set. * 4. Or TCP_CORK is not set, and all sent packets are ACKed. *    With Minshall's modification: all sent small packets are ACKed. */static inline int tcp_nagle_check(const struct tcp_sock *tp,				  const struct sk_buff *skb,				  unsigned mss_now, int nonagle){	return (skb->len < mss_now &&		((nonagle&TCP_NAGLE_CORK) ||		 (!nonagle &&		  tp->packets_out &&		  tcp_minshall_check(tp))));}/* Return non-zero if the Nagle test allows this packet to be * sent now. */static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,				 unsigned int cur_mss, int nonagle){	/* Nagle rule does not apply to frames, which sit in the middle of the	 * write_queue (they have no chances to get new data).	 *	 * This is implemented in the callers, where they modify the 'nonagle'	 * argument based upon the location of SKB in the send queue.	 */	if (nonagle & TCP_NAGLE_PUSH)		return 1;	/* Don't use the nagle rule for urgent data (or for the final FIN).	 * Nagle can be ignored during F-RTO too (see RFC4138).	 */	if (tp->urg_mode || (tp->frto_counter == 2) ||	    (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN))		return 1;	if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))		return 1;	return 0;}/* Does at least the first segment of SKB fit into the send window? */static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, unsigned int cur_mss){	u32 end_seq = TCP_SKB_CB(skb)->end_seq;	if (skb->len > cur_mss)		end_seq = TCP_SKB_CB(skb)->seq + cur_mss;	return !after(end_seq, tp->snd_una + tp->snd_wnd);}/* This checks if the data bearing packet SKB (usually tcp_send_head(sk)) * should be put on the wire right now.  If so, it returns the number of * packets allowed by the congestion window. */static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,				 unsigned int cur_mss, int nonagle){	struct tcp_sock *tp = tcp_sk(sk);	unsigned int cwnd_quota;	tcp_init_tso_segs(sk, skb, cur_mss);	if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))		return 0;	cwnd_quota = tcp_cwnd_test(tp, skb);	if (cwnd_quota &&	    !tcp_snd_wnd_test(tp, skb, cur_mss))		cwnd_quota = 0;	return cwnd_quota;}int tcp_may_send_now(struct sock *sk){	struct tcp_sock *tp = tcp_sk(sk);	struct sk_buff *skb = tcp_send_head(sk);	return (skb &&		tcp_snd_test(sk, skb, tcp_current_mss(sk, 1),			     (tcp_skb_is_last(sk, skb) ?			      tp->nonagle : TCP_NAGLE_PUSH)));}/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet * which is put after SKB on the list.  It is very much like * tcp_fragment() except that it may make several kinds of assumptions * in order to speed up the splitting operation.  In particular, we * know that all the data is in scatter-gather pages, and that the * packet has never been sent out before (and thus is not cloned). */static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, unsigned int mss_now){	struct sk_buff *buff;	int nlen = skb->len - len;	u16 flags;	/* All of a TSO frame must be composed of paged data.  */	if (skb->len != skb->data_len)		return tcp_fragment(sk, skb, len, mss_now);	buff = sk_stream_alloc_pskb(sk, 0, 0, GFP_ATOMIC);	if (unlikely(buff == NULL))		return -ENOMEM;	sk_charge_skb(sk, buff);	buff->truesize += nlen;	skb->truesize -= nlen;	/* Correct the sequence numbers. */	TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;	TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;	/* PSH and FIN should only be set in the second packet. */	flags = TCP_SKB_CB(skb)->flags;	TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);	TCP_SKB_CB(buff)->flags = flags;	/* This packet was never sent out yet, so no SACK bits. */	TCP_SKB_CB(buff)->sacked = 0;	buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;	skb_split(skb, buff, len);	/* Fix up tso_factor for both original and new SKB.  */	tcp_set_skb_tso_segs(sk, skb, mss_now);	tcp_set_skb_tso_segs(sk, buff, mss_now);	/* Link BUFF into the send queue. */	skb_header_release(buff);	tcp_insert_write_queue_after(skb, buff, sk);	return 0;}/* Try to defer sending, if possible, in order to minimize the amount * of TSO splitting we do.  View it as a kind of TSO Nagle test. * * This algorithm is from John Heffner. */static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb){	struct tcp_sock *tp = tcp_sk(sk);	const struct inet_connection_sock *icsk = inet_csk(sk);	u32 send_win, cong_win, limit, in_flight;	if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)		goto send_now;	if (icsk->icsk_ca_state != TCP_CA_Open)		goto send_now;	/* Defer for less than two clock ticks. */	if (!tp->tso_deferred && ((jiffies<<1)>>1) - (tp->tso_deferred>>1) > 1)		goto send_now;	in_flight = tcp_packets_in_flight(tp);	BUG_ON(tcp_skb_pcount(skb) <= 1 ||	       (tp->snd_cwnd <= in_flight));	send_win = (tp->snd_una + tp->snd_wnd) - TCP_SKB_CB(skb)->seq;	/* From in_flight test above, we know that cwnd > in_flight.  */	cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;	limit = min(send_win, cong_win);	/* If a full-sized TSO skb can be sent, do it. */	if (limit >= 65536)		goto send_now;	if (sysctl_tcp_tso_win_divisor) {		u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);		/* If at least some fraction of a window is available,		 * just use it.		 */		chunk /= sysctl_tcp_tso_win_divisor;		if (limit >= chunk)			goto send_now;	} else {		/* Different approach, try not to defer past a single		 * ACK.  Receiver should ACK every other full sized		 * frame, so if we have space for more than 3 frames		 * then send now.		 */		if (limit > tcp_max_burst(tp) * tp->mss_cache)			goto send_now;	}	/* Ok, it looks like it is advisable to defer.  */	tp->tso_deferred = 1 | (jiffies<<1);	return 1;send_now:	tp->tso_deferred = 0;	return 0;}/* Create a new MTU probe if we are ready. * Returns 0 if we should wait to probe (no cwnd available), *         1 if a probe was sent, *         -1 otherwise */static int tcp_mtu_probe(struct sock *sk){	struct tcp_sock *tp = tcp_sk(sk);	struct inet_connection_sock *icsk = inet_csk(sk);	struct sk_buff *skb, *nskb, *next;	int len;	int probe_size;	int size_needed;	unsigned int pif;	int copy;	int mss_now;	/* Not currently probing/verifying,	 * not in recovery,	 * have enough cwnd, and	 * not SACKing (the variable headers throw things off) */	if (!icsk->icsk_mtup.enabled ||	    icsk->icsk_mtup.probe_size ||	    inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||	    tp->snd_cwnd < 11 ||	    tp->rx_opt.eff_sacks)		return -1;	/* Very simple search strategy: just double the MSS. */	mss_now = tcp_current_mss(sk, 0);	probe_size = 2*tp->mss_cache;	size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;	if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {		/* TODO: set timer for probe_converge_event */		return -1;	}	/* Have enough data in the send queue to probe? */	if (tp->write_seq - tp->snd_nxt < size_needed)		return -1;	if (tp->snd_wnd < size_needed)		return -1;	if (after(tp->snd_nxt + size_needed, tp->snd_una + tp->snd_wnd))		return 0;	/* Do we need to wait to drain cwnd? */	pif = tcp_packets_in_flight(tp);	if (pif + 2 > tp->snd_cwnd) {		/* With no packets in flight, don't stall. */		if (pif == 0)			return -1;		else			return 0;	}	/* We're allowed to probe.  Build it now. */	if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL)		return -1;	sk_charge_skb(sk, nskb);	skb = tcp_send_head(sk);	tcp_insert_write_queue_before(nskb, skb, sk);
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -