📄 tcp.h
字号:
sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout); break; default: printk(timer_bug_msg); };}/* Initialize RCV_MSS value. * RCV_MSS is an our guess about MSS used by the peer. * We haven't any direct information about the MSS. * It's better to underestimate the RCV_MSS rather than overestimate. * Overestimations make us ACKing less frequently than needed. * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss(). */static inline void tcp_initialize_rcv_mss(struct sock *sk){ struct tcp_opt *tp = tcp_sk(sk); unsigned int hint = min(tp->advmss, tp->mss_cache_std); hint = min(hint, tp->rcv_wnd/2); hint = min(hint, TCP_MIN_RCVMSS); hint = max(hint, TCP_MIN_MSS); tp->ack.rcv_mss = hint;}static __inline__ void __tcp_fast_path_on(struct tcp_opt *tp, u32 snd_wnd){ tp->pred_flags = htonl((tp->tcp_header_len << 26) | ntohl(TCP_FLAG_ACK) | snd_wnd);}static __inline__ void tcp_fast_path_on(struct tcp_opt *tp){ __tcp_fast_path_on(tp, tp->snd_wnd>>tp->snd_wscale);}static inline void tcp_fast_path_check(struct sock *sk, struct tcp_opt *tp){ if (skb_queue_len(&tp->out_of_order_queue) == 0 && tp->rcv_wnd && atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && !tp->urg_data) tcp_fast_path_on(tp);}/* Compute the actual receive window we are currently advertising. * Rcv_nxt can be after the window if our peer push more data * than the offered window. */static __inline__ u32 tcp_receive_window(struct tcp_opt *tp){ s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt; if (win < 0) win = 0; return (u32) win;}/* Choose a new window, without checks for shrinking, and without * scaling applied to the result. The caller does these things * if necessary. This is a "raw" window selection. */extern u32 __tcp_select_window(struct sock *sk);/* TCP timestamps are only 32-bits, this causes a slight * complication on 64-bit systems since we store a snapshot * of jiffies in the buffer control blocks below. We decidely * only use of the low 32-bits of jiffies and hide the ugly * casts with the following macro. */#define tcp_time_stamp ((__u32)(jiffies))/* This is what the send packet queueing engine uses to pass * TCP per-packet control information to the transmission * code. We also store the host-order sequence numbers in * here too. This is 36 bytes on 32-bit architectures, * 40 bytes on 64-bit machines, if this grows please adjust * skbuff.h:skbuff->cb[xxx] size appropriately. */struct tcp_skb_cb { union { struct inet_skb_parm h4;#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) struct inet6_skb_parm h6;#endif } header; /* For incoming frames */ __u32 seq; /* Starting sequence number */ __u32 end_seq; /* SEQ + FIN + SYN + datalen */ __u32 when; /* used to compute rtt's */ __u8 flags; /* TCP header flags. */ /* NOTE: These must match up to the flags byte in a * real TCP header. */#define TCPCB_FLAG_FIN 0x01#define TCPCB_FLAG_SYN 0x02#define TCPCB_FLAG_RST 0x04#define TCPCB_FLAG_PSH 0x08#define TCPCB_FLAG_ACK 0x10#define TCPCB_FLAG_URG 0x20#define TCPCB_FLAG_ECE 0x40#define TCPCB_FLAG_CWR 0x80 __u8 sacked; /* State flags for SACK/FACK. */#define TCPCB_SACKED_ACKED 0x01 /* SKB ACK'd by a SACK block */#define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */#define TCPCB_LOST 0x04 /* SKB is lost */#define TCPCB_TAGBITS 0x07 /* All tag bits */#define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS)#define TCPCB_URG 0x20 /* Urgent pointer advenced here */#define TCPCB_AT_TAIL (TCPCB_URG) __u16 urg_ptr; /* Valid w/URG flags is set. */ __u32 ack_seq; /* Sequence number ACK'd */};#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))#include <net/tcp_ecn.h>/* Due to TSO, an SKB can be composed of multiple actual * packets. To keep these tracked properly, we use this. */static inline int tcp_skb_pcount(struct sk_buff *skb){ return skb_shinfo(skb)->tso_segs;}/* This is valid iff tcp_skb_pcount() > 1. */static inline int tcp_skb_mss(struct sk_buff *skb){ return skb_shinfo(skb)->tso_size;}static inline void tcp_inc_pcount(tcp_pcount_t *count, struct sk_buff *skb){ count->val += tcp_skb_pcount(skb);}static inline void tcp_inc_pcount_explicit(tcp_pcount_t *count, int amt){ count->val += amt;}static inline void tcp_dec_pcount_explicit(tcp_pcount_t *count, int amt){ count->val -= amt;}static inline void tcp_dec_pcount(tcp_pcount_t *count, struct sk_buff *skb){ count->val -= tcp_skb_pcount(skb);}static inline void tcp_dec_pcount_approx(tcp_pcount_t *count, struct sk_buff *skb){ if (count->val) { count->val -= tcp_skb_pcount(skb); if ((int)count->val < 0) count->val = 0; }}static inline __u32 tcp_get_pcount(tcp_pcount_t *count){ return count->val;}static inline void tcp_set_pcount(tcp_pcount_t *count, __u32 val){ count->val = val;}static inline void tcp_packets_out_inc(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb){ int orig = tcp_get_pcount(&tp->packets_out); tcp_inc_pcount(&tp->packets_out, skb); if (!orig) tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);}static inline void tcp_packets_out_dec(struct tcp_opt *tp, struct sk_buff *skb){ tcp_dec_pcount(&tp->packets_out, skb);}/* This determines how many packets are "in the network" to the best * of our knowledge. In many cases it is conservative, but where * detailed information is available from the receiver (via SACK * blocks etc.) we can make more aggressive calculations. * * Use this for decisions involving congestion control, use just * tp->packets_out to determine if the send queue is empty or not. * * Read this equation as: * * "Packets sent once on transmission queue" MINUS * "Packets left network, but not honestly ACKed yet" PLUS * "Packets fast retransmitted" */static __inline__ unsigned int tcp_packets_in_flight(struct tcp_opt *tp){ return (tcp_get_pcount(&tp->packets_out) - tcp_get_pcount(&tp->left_out) + tcp_get_pcount(&tp->retrans_out));}/* * Which congestion algorithim is in use on the connection. */#define tcp_is_vegas(__tp) ((__tp)->adv_cong == TCP_VEGAS)#define tcp_is_westwood(__tp) ((__tp)->adv_cong == TCP_WESTWOOD)#define tcp_is_bic(__tp) ((__tp)->adv_cong == TCP_BIC)/* Recalculate snd_ssthresh, we want to set it to: * * Reno: * one half the current congestion window, but no * less than two segments * * BIC: * behave like Reno until low_window is reached, * then increase congestion window slowly */static inline __u32 tcp_recalc_ssthresh(struct tcp_opt *tp){ if (tcp_is_bic(tp)) { if (sysctl_tcp_bic_fast_convergence && tp->snd_cwnd < tp->bictcp.last_max_cwnd) tp->bictcp.last_max_cwnd = (tp->snd_cwnd * (2*BICTCP_1_OVER_BETA-1)) / (BICTCP_1_OVER_BETA/2); else tp->bictcp.last_max_cwnd = tp->snd_cwnd; if (tp->snd_cwnd > sysctl_tcp_bic_low_window) return max(tp->snd_cwnd - (tp->snd_cwnd/BICTCP_1_OVER_BETA), 2U); } return max(tp->snd_cwnd >> 1U, 2U);}/* Stop taking Vegas samples for now. */#define tcp_vegas_disable(__tp) ((__tp)->vegas.doing_vegas_now = 0) static inline void tcp_vegas_enable(struct tcp_opt *tp){ /* There are several situations when we must "re-start" Vegas: * * o when a connection is established * o after an RTO * o after fast recovery * o when we send a packet and there is no outstanding * unacknowledged data (restarting an idle connection) * * In these circumstances we cannot do a Vegas calculation at the * end of the first RTT, because any calculation we do is using * stale info -- both the saved cwnd and congestion feedback are * stale. * * Instead we must wait until the completion of an RTT during * which we actually receive ACKs. */ /* Begin taking Vegas samples next time we send something. */ tp->vegas.doing_vegas_now = 1; /* Set the beginning of the next send window. */ tp->vegas.beg_snd_nxt = tp->snd_nxt; tp->vegas.cntRTT = 0; tp->vegas.minRTT = 0x7fffffff;}/* Should we be taking Vegas samples right now? */#define tcp_vegas_enabled(__tp) ((__tp)->vegas.doing_vegas_now)extern void tcp_ca_init(struct tcp_opt *tp);static inline void tcp_set_ca_state(struct tcp_opt *tp, u8 ca_state){ if (tcp_is_vegas(tp)) { if (ca_state == TCP_CA_Open) tcp_vegas_enable(tp); else tcp_vegas_disable(tp); } tp->ca_state = ca_state;}/* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd. * The exception is rate halving phase, when cwnd is decreasing towards * ssthresh. */static inline __u32 tcp_current_ssthresh(struct tcp_opt *tp){ if ((1<<tp->ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery)) return tp->snd_ssthresh; else return max(tp->snd_ssthresh, ((tp->snd_cwnd >> 1) + (tp->snd_cwnd >> 2)));}static inline void tcp_sync_left_out(struct tcp_opt *tp){ if (tp->sack_ok && (tcp_get_pcount(&tp->sacked_out) >= tcp_get_pcount(&tp->packets_out) - tcp_get_pcount(&tp->lost_out))) tcp_set_pcount(&tp->sacked_out, (tcp_get_pcount(&tp->packets_out) - tcp_get_pcount(&tp->lost_out))); tcp_set_pcount(&tp->left_out, (tcp_get_pcount(&tp->sacked_out) + tcp_get_pcount(&tp->lost_out)));}extern void tcp_cwnd_application_limited(struct sock *sk);/* Congestion window validation. (RFC2861) */static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_opt *tp){ __u32 packets_out = tcp_get_pcount(&tp->packets_out); if (packets_out >= tp->snd_cwnd) { /* Network is feed fully. */ tp->snd_cwnd_used = 0; tp->snd_cwnd_stamp = tcp_time_stamp; } else { /* Network starves. */ if (tcp_get_pcount(&tp->packets_out) > tp->snd_cwnd_used) tp->snd_cwnd_used = tcp_get_pcount(&tp->packets_out); if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto) tcp_cwnd_application_limited(sk); }}/* Set slow start threshould and cwnd not falling to slow start */static inline void __tcp_enter_cwr(struct tcp_opt *tp){ tp->undo_marker = 0; tp->snd_ssthresh = tcp_recalc_ssthresh(tp); tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1U); tp->snd_cwnd_cnt = 0; tp->high_seq = tp->snd_nxt; tp->snd_cwnd_stamp = tcp_time_stamp; TCP_ECN_queue_cwr(tp);}static inline void tcp_enter_cwr(struct tcp_opt *tp){ tp->prior_ssthresh = 0; if (tp->ca_state < TCP_CA_CWR) { __tcp_enter_cwr(tp); tcp_set_ca_state(tp, TCP_CA_CWR); }}extern __u32 tcp_init_cwnd(struct tcp_opt *tp, struct dst_entry *dst);/* Slow start with delack produces 3 packets of burst, so that * it is safe "de facto". */static __inline__ __u32 tcp_max_burst(struct tcp_opt *tp){ return 3;}static __inline__ int tcp_minshall_check(struct tcp_opt *tp){ return after(tp->snd_sml,tp->snd_una) && !after(tp->snd_sml, tp->snd_nxt);}static __inline__ void tcp_minshall_update(struct tcp_opt *tp, int mss, struct sk_buff *skb){ if (skb->len < mss) tp->snd_sml = TCP_SKB_CB(skb)->end_seq;}/* Return 0, if packet can be sent now without violation Nagle's rules: 1. It is full sized. 2. Or it contains FIN. 3. Or TCP_NODELAY was set. 4. Or TCP_CORK is not set, and all sent packets are ACKed. With Minshall's modification: all sent small packets are ACKed. */static __inline__ inttcp_nagle_check(struct tcp_opt *tp, struct sk_buff *skb, unsigned mss_now, int nonagle){ return (skb->len < mss_now && !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && ((nonagle&TCP_NAGLE_CORK) || (!nonagle && tcp_get_pcount(&tp->packets_out) && tcp_minshall_check(tp))));}extern void tcp_set_skb_tso_segs(struct sk_buff *, unsigned int);/* This checks if the data bearing packet SKB (usually sk->sk_send_head) * should be put on the wire right now. */static __inline__ int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb, unsigned cur_mss, int nonagle){ int pkts = tcp_skb_pcount(skb); if (!pkts) { tcp_set_skb_tso_segs(skb, tp->mss_cache_std); pkts = tcp_skb_pcount(skb); } /* RFC 1122 - section 4.2.3.4 * * We must queue if * * a) The right edge of this frame exceeds the window * b) There are packets in flight and we have a small segment * [SWS avoidance and Nagle algorithm] * (part of SWS is done on packetization) * Minshall version sounds: there are no _small_ * segments in flight. (tcp_nagle_check) * c) We have too many packets 'in flight' * * Don't use the nagle rule for urgent data (or * for the final FIN -DaveM). * * Also, Nagle rule does not apply to frames, which * sit in the middle of queue (they have no chances * to get new data) and if room at tail of skb is * not enough to save something seriously (<32 for now). */ /* Don't be strict about the congestion window for the * final FIN frame. -DaveM */ return (((nonagle&TCP_NAGLE_PUSH) || tp->urg_mode || !tcp_nagle_check(tp, skb, cur_mss, nonagle)) && (((tcp_packets_in_flight(tp) + (pkts-1)) < tp->snd_cwnd) || (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) && !after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd));}static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_opt *tp){ if (!tcp_get_pcount(&tp->packets_out) && !tp->pending) tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto);}static __inline__ int tcp_skb_is_last(struct sock *sk, struct sk_buff *skb){ return skb->next == (struct sk_buff *)&sk->sk_write_queue;}/* Push out any pending frames which were held back due to * TCP_CORK or attempt at coalescing tiny packets. * The socket must be locked by the caller. */static __inline__ void __tcp_push_pending_frames(struct sock *sk, struct tcp_opt *tp, unsigned cur_mss, int nonagle){ struct sk_buff *skb = sk->sk_send_head; if (skb) { if (!tcp_skb_is_last(sk, skb)) nonagle = TCP_NAGLE_PUSH; if (!tcp_snd_test(tp, skb, cur_mss, nonagle) || tcp_write_xmit(sk, nonagle)) tcp_check_probe_timer(sk, tp); } tcp_cwnd_validate(sk, tp);}static __inline__ void tcp_push_pending_frames(struct sock *sk, struct tcp_opt *tp){ __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1), tp->nonagle);}static __inline__ int tcp_may_send_now(struct sock *sk, struct tcp_opt *tp){ struct sk_buff *skb = sk->sk_send_head; return (skb && tcp_snd_test(tp, skb, tcp_current_mss(sk, 1), tcp_skb_is_last(sk, skb) ? TCP_NAGLE_PUSH : tp->nonagle));}static __inline__ void tcp_init_wl(struct tcp_opt *tp, u32 ack, u32 seq){ tp->snd_wl1 = seq;}static __inline__ void tcp_update_wl(struct tcp_opt *tp, u32 ack, u32 seq){ tp->snd_wl1 = seq;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -