⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tcp_output.c

📁 linux 内核源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
 * We are working here with either a clone of the original * SKB, or a fresh unique copy made by the retransmit engine. */static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, gfp_t gfp_mask){	const struct inet_connection_sock *icsk = inet_csk(sk);	struct inet_sock *inet;	struct tcp_sock *tp;	struct tcp_skb_cb *tcb;	int tcp_header_size;#ifdef CONFIG_TCP_MD5SIG	struct tcp_md5sig_key *md5;	__u8 *md5_hash_location;#endif	struct tcphdr *th;	int sysctl_flags;	int err;	BUG_ON(!skb || !tcp_skb_pcount(skb));	/* If congestion control is doing timestamping, we must	 * take such a timestamp before we potentially clone/copy.	 */	if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)		__net_timestamp(skb);	if (likely(clone_it)) {		if (unlikely(skb_cloned(skb)))			skb = pskb_copy(skb, gfp_mask);		else			skb = skb_clone(skb, gfp_mask);		if (unlikely(!skb))			return -ENOBUFS;	}	inet = inet_sk(sk);	tp = tcp_sk(sk);	tcb = TCP_SKB_CB(skb);	tcp_header_size = tp->tcp_header_len;#define SYSCTL_FLAG_TSTAMPS	0x1#define SYSCTL_FLAG_WSCALE	0x2#define SYSCTL_FLAG_SACK	0x4	sysctl_flags = 0;	if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {		tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;		if (sysctl_tcp_timestamps) {			tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;			sysctl_flags |= SYSCTL_FLAG_TSTAMPS;		}		if (sysctl_tcp_window_scaling) {			tcp_header_size += TCPOLEN_WSCALE_ALIGNED;			sysctl_flags |= SYSCTL_FLAG_WSCALE;		}		if (sysctl_tcp_sack) {			sysctl_flags |= SYSCTL_FLAG_SACK;			if (!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))				tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;		}	} else if (unlikely(tp->rx_opt.eff_sacks)) {		/* A SACK is 2 pad bytes, a 2 byte header, plus		 * 2 32-bit sequence numbers for each SACK block.		 */		tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +				    (tp->rx_opt.eff_sacks *				     TCPOLEN_SACK_PERBLOCK));	}	if (tcp_packets_in_flight(tp) == 0)		tcp_ca_event(sk, CA_EVENT_TX_START);#ifdef CONFIG_TCP_MD5SIG	/*	 * Are we doing MD5 on this segment? If so - make	 * room for it.	 */	md5 = tp->af_specific->md5_lookup(sk, sk);	if (md5)		tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;#endif	skb_push(skb, tcp_header_size);	skb_reset_transport_header(skb);	skb_set_owner_w(skb, sk);	/* Build TCP header and checksum it. */	th = tcp_hdr(skb);	th->source		= inet->sport;	th->dest		= inet->dport;	th->seq			= htonl(tcb->seq);	th->ack_seq		= htonl(tp->rcv_nxt);	*(((__be16 *)th) + 6)	= htons(((tcp_header_size >> 2) << 12) |					tcb->flags);	if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {		/* RFC1323: The window in SYN & SYN/ACK segments		 * is never scaled.		 */		th->window	= htons(min(tp->rcv_wnd, 65535U));	} else {		th->window	= htons(tcp_select_window(sk));	}	th->check		= 0;	th->urg_ptr		= 0;	if (unlikely(tp->urg_mode &&		     between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF))) {		th->urg_ptr		= htons(tp->snd_up-tcb->seq);		th->urg			= 1;	}	if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {		tcp_syn_build_options((__be32 *)(th + 1),				      tcp_advertise_mss(sk),				      (sysctl_flags & SYSCTL_FLAG_TSTAMPS),				      (sysctl_flags & SYSCTL_FLAG_SACK),				      (sysctl_flags & SYSCTL_FLAG_WSCALE),				      tp->rx_opt.rcv_wscale,				      tcb->when,				      tp->rx_opt.ts_recent,#ifdef CONFIG_TCP_MD5SIG				      md5 ? &md5_hash_location :#endif				      NULL);	} else {		tcp_build_and_update_options((__be32 *)(th + 1),					     tp, tcb->when,#ifdef CONFIG_TCP_MD5SIG					     md5 ? &md5_hash_location :#endif					     NULL);		TCP_ECN_send(sk, skb, tcp_header_size);	}#ifdef CONFIG_TCP_MD5SIG	/* Calculate the MD5 hash, as we have all we need now */	if (md5) {		tp->af_specific->calc_md5_hash(md5_hash_location,					       md5,					       sk, NULL, NULL,					       tcp_hdr(skb),					       sk->sk_protocol,					       skb->len);	}#endif	icsk->icsk_af_ops->send_check(sk, skb->len, skb);	if (likely(tcb->flags & TCPCB_FLAG_ACK))		tcp_event_ack_sent(sk, tcp_skb_pcount(skb));	if (skb->len != tcp_header_size)		tcp_event_data_sent(tp, skb, sk);	if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)		TCP_INC_STATS(TCP_MIB_OUTSEGS);	err = icsk->icsk_af_ops->queue_xmit(skb, 0);	if (likely(err <= 0))		return err;	tcp_enter_cwr(sk, 1);	return net_xmit_eval(err);#undef SYSCTL_FLAG_TSTAMPS#undef SYSCTL_FLAG_WSCALE#undef SYSCTL_FLAG_SACK}/* This routine just queue's the buffer * * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames, * otherwise socket can stall. */static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb){	struct tcp_sock *tp = tcp_sk(sk);	/* Advance write_seq and place onto the write_queue. */	tp->write_seq = TCP_SKB_CB(skb)->end_seq;	skb_header_release(skb);	tcp_add_write_queue_tail(sk, skb);	sk_charge_skb(sk, skb);}static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now){	if (skb->len <= mss_now || !sk_can_gso(sk)) {		/* Avoid the costly divide in the normal		 * non-TSO case.		 */		skb_shinfo(skb)->gso_segs = 1;		skb_shinfo(skb)->gso_size = 0;		skb_shinfo(skb)->gso_type = 0;	} else {		skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now);		skb_shinfo(skb)->gso_size = mss_now;		skb_shinfo(skb)->gso_type = sk->sk_gso_type;	}}/* When a modification to fackets out becomes necessary, we need to check * skb is counted to fackets_out or not. Another important thing is to * tweak SACK fastpath hint too as it would overwrite all changes unless * hint is also changed. */static void tcp_adjust_fackets_out(struct tcp_sock *tp, struct sk_buff *skb,				   int decr){	if (!tp->sacked_out || tcp_is_reno(tp))		return;	if (!before(tp->highest_sack, TCP_SKB_CB(skb)->seq))		tp->fackets_out -= decr;	/* cnt_hint is "off-by-one" compared with fackets_out (see sacktag) */	if (tp->fastpath_skb_hint != NULL &&	    after(TCP_SKB_CB(tp->fastpath_skb_hint)->seq, TCP_SKB_CB(skb)->seq))		tp->fastpath_cnt_hint -= decr;}/* Function to create two new TCP segments.  Shrinks the given segment * to the specified size and appends a new segment with the rest of the * packet to the list.  This won't be called frequently, I hope. * Remember, these are still headerless SKBs at this point. */int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now){	struct tcp_sock *tp = tcp_sk(sk);	struct sk_buff *buff;	int nsize, old_factor;	int nlen;	u16 flags;	BUG_ON(len > skb->len);	tcp_clear_retrans_hints_partial(tp);	nsize = skb_headlen(skb) - len;	if (nsize < 0)		nsize = 0;	if (skb_cloned(skb) &&	    skb_is_nonlinear(skb) &&	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))		return -ENOMEM;	/* Get a new skb... force flag on. */	buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);	if (buff == NULL)		return -ENOMEM; /* We'll just try again later. */	sk_charge_skb(sk, buff);	nlen = skb->len - len - nsize;	buff->truesize += nlen;	skb->truesize -= nlen;	/* Correct the sequence numbers. */	TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;	TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;	if (tcp_is_sack(tp) && tp->sacked_out &&	    (TCP_SKB_CB(skb)->seq == tp->highest_sack))		tp->highest_sack = TCP_SKB_CB(buff)->seq;	/* PSH and FIN should only be set in the second packet. */	flags = TCP_SKB_CB(skb)->flags;	TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);	TCP_SKB_CB(buff)->flags = flags;	TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;	TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL;	if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {		/* Copy and checksum data tail into the new buffer. */		buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize),						       nsize, 0);		skb_trim(skb, len);		skb->csum = csum_block_sub(skb->csum, buff->csum, len);	} else {		skb->ip_summed = CHECKSUM_PARTIAL;		skb_split(skb, buff, len);	}	buff->ip_summed = skb->ip_summed;	/* Looks stupid, but our code really uses when of	 * skbs, which it never sent before. --ANK	 */	TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;	buff->tstamp = skb->tstamp;	old_factor = tcp_skb_pcount(skb);	/* Fix up tso_factor for both original and new SKB.  */	tcp_set_skb_tso_segs(sk, skb, mss_now);	tcp_set_skb_tso_segs(sk, buff, mss_now);	/* If this packet has been sent out already, we must	 * adjust the various packet counters.	 */	if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) {		int diff = old_factor - tcp_skb_pcount(skb) -			tcp_skb_pcount(buff);		tp->packets_out -= diff;		if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)			tp->sacked_out -= diff;		if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)			tp->retrans_out -= diff;		if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)			tp->lost_out -= diff;		/* Adjust Reno SACK estimate. */		if (tcp_is_reno(tp) && diff > 0) {			tcp_dec_pcount_approx_int(&tp->sacked_out, diff);			tcp_verify_left_out(tp);		}		tcp_adjust_fackets_out(tp, skb, diff);	}	/* Link BUFF into the send queue. */	skb_header_release(buff);	tcp_insert_write_queue_after(skb, buff, sk);	return 0;}/* This is similar to __pskb_pull_head() (it will go to core/skbuff.c * eventually). The difference is that pulled data not copied, but * immediately discarded. */static void __pskb_trim_head(struct sk_buff *skb, int len){	int i, k, eat;	eat = len;	k = 0;	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {		if (skb_shinfo(skb)->frags[i].size <= eat) {			put_page(skb_shinfo(skb)->frags[i].page);			eat -= skb_shinfo(skb)->frags[i].size;		} else {			skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];			if (eat) {				skb_shinfo(skb)->frags[k].page_offset += eat;				skb_shinfo(skb)->frags[k].size -= eat;				eat = 0;			}			k++;		}	}	skb_shinfo(skb)->nr_frags = k;	skb_reset_tail_pointer(skb);	skb->data_len -= len;	skb->len = skb->data_len;}int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len){	if (skb_cloned(skb) &&	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))		return -ENOMEM;	/* If len == headlen, we avoid __skb_pull to preserve alignment. */	if (unlikely(len < skb_headlen(skb)))		__skb_pull(skb, len);	else		__pskb_trim_head(skb, len - skb_headlen(skb));	TCP_SKB_CB(skb)->seq += len;	skb->ip_summed = CHECKSUM_PARTIAL;	skb->truesize	     -= len;	sk->sk_wmem_queued   -= len;	sk->sk_forward_alloc += len;	sock_set_flag(sk, SOCK_QUEUE_SHRUNK);	/* Any change of skb->len requires recalculation of tso	 * factor and mss.	 */	if (tcp_skb_pcount(skb) > 1)		tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk, 1));	return 0;}/* Not accounting for SACKs here. */int tcp_mtu_to_mss(struct sock *sk, int pmtu){	struct tcp_sock *tp = tcp_sk(sk);	struct inet_connection_sock *icsk = inet_csk(sk);	int mss_now;	/* Calculate base mss without TCP options:	   It is MMS_S - sizeof(tcphdr) of rfc1122	 */	mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr);	/* Clamp it (mss_clamp does not include tcp options) */	if (mss_now > tp->rx_opt.mss_clamp)		mss_now = tp->rx_opt.mss_clamp;	/* Now subtract optional transport overhead */	mss_now -= icsk->icsk_ext_hdr_len;	/* Then reserve room for full set of TCP options and 8 bytes of data */	if (mss_now < 48)		mss_now = 48;	/* Now subtract TCP options size, not including SACKs */	mss_now -= tp->tcp_header_len - sizeof(struct tcphdr);	return mss_now;}/* Inverse of above */int tcp_mss_to_mtu(struct sock *sk, int mss){	struct tcp_sock *tp = tcp_sk(sk);	struct inet_connection_sock *icsk = inet_csk(sk);	int mtu;	mtu = mss +	      tp->tcp_header_len +	      icsk->icsk_ext_hdr_len +	      icsk->icsk_af_ops->net_header_len;	return mtu;}void tcp_mtup_init(struct sock *sk){	struct tcp_sock *tp = tcp_sk(sk);	struct inet_connection_sock *icsk = inet_csk(sk);	icsk->icsk_mtup.enabled = sysctl_tcp_mtu_probing > 1;	icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +			       icsk->icsk_af_ops->net_header_len;	icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss);	icsk->icsk_mtup.probe_size = 0;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -