📄 tcp.h
字号:
/* tcp_timer.c */extern void tcp_init_xmit_timers(struct sock *);static inline void tcp_clear_xmit_timers(struct sock *sk){ inet_csk_clear_xmit_timers(sk);}extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu);extern unsigned int tcp_current_mss(struct sock *sk, int large);/* tcp.c */extern void tcp_get_info(struct sock *, struct tcp_info *);/* Read 'sendfile()'-style from a TCP socket */typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, unsigned int, size_t);extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor);extern void tcp_initialize_rcv_mss(struct sock *sk);extern int tcp_mtu_to_mss(struct sock *sk, int pmtu);extern int tcp_mss_to_mtu(struct sock *sk, int mss);extern void tcp_mtup_init(struct sock *sk);static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd){ tp->pred_flags = htonl((tp->tcp_header_len << 26) | ntohl(TCP_FLAG_ACK) | snd_wnd);}static inline void tcp_fast_path_on(struct tcp_sock *tp){ __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);}static inline void tcp_fast_path_check(struct sock *sk){ struct tcp_sock *tp = tcp_sk(sk); if (skb_queue_empty(&tp->out_of_order_queue) && tp->rcv_wnd && atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && !tp->urg_data) tcp_fast_path_on(tp);}/* Compute the actual receive window we are currently advertising. * Rcv_nxt can be after the window if our peer push more data * than the offered window. */static inline u32 tcp_receive_window(const struct tcp_sock *tp){ s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt; if (win < 0) win = 0; return (u32) win;}/* Choose a new window, without checks for shrinking, and without * scaling applied to the result. The caller does these things * if necessary. This is a "raw" window selection. */extern u32 __tcp_select_window(struct sock *sk);/* TCP timestamps are only 32-bits, this causes a slight * complication on 64-bit systems since we store a snapshot * of jiffies in the buffer control blocks below. We decided * to use only the low 32-bits of jiffies and hide the ugly * casts with the following macro. */#define tcp_time_stamp ((__u32)(jiffies))/* This is what the send packet queuing engine uses to pass * TCP per-packet control information to the transmission * code. We also store the host-order sequence numbers in * here too. This is 36 bytes on 32-bit architectures, * 40 bytes on 64-bit machines, if this grows please adjust * skbuff.h:skbuff->cb[xxx] size appropriately. */struct tcp_skb_cb { union { struct inet_skb_parm h4;#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) struct inet6_skb_parm h6;#endif } header; /* For incoming frames */ __u32 seq; /* Starting sequence number */ __u32 end_seq; /* SEQ + FIN + SYN + datalen */ __u32 when; /* used to compute rtt's */ __u8 flags; /* TCP header flags. */ /* NOTE: These must match up to the flags byte in a * real TCP header. */#define TCPCB_FLAG_FIN 0x01#define TCPCB_FLAG_SYN 0x02#define TCPCB_FLAG_RST 0x04#define TCPCB_FLAG_PSH 0x08#define TCPCB_FLAG_ACK 0x10#define TCPCB_FLAG_URG 0x20#define TCPCB_FLAG_ECE 0x40#define TCPCB_FLAG_CWR 0x80 __u8 sacked; /* State flags for SACK/FACK. */#define TCPCB_SACKED_ACKED 0x01 /* SKB ACK'd by a SACK block */#define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */#define TCPCB_LOST 0x04 /* SKB is lost */#define TCPCB_TAGBITS 0x07 /* All tag bits */#define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS)#define TCPCB_URG 0x20 /* Urgent pointer advanced here */#define TCPCB_AT_TAIL (TCPCB_URG) __u16 urg_ptr; /* Valid w/URG flags is set. */ __u32 ack_seq; /* Sequence number ACK'd */};#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))/* Due to TSO, an SKB can be composed of multiple actual * packets. To keep these tracked properly, we use this. */static inline int tcp_skb_pcount(const struct sk_buff *skb){ return skb_shinfo(skb)->gso_segs;}/* This is valid iff tcp_skb_pcount() > 1. */static inline int tcp_skb_mss(const struct sk_buff *skb){ return skb_shinfo(skb)->gso_size;}static inline void tcp_dec_pcount_approx_int(__u32 *count, const int decr){ if (*count) { *count -= decr; if ((int)*count < 0) *count = 0; }}static inline void tcp_dec_pcount_approx(__u32 *count, const struct sk_buff *skb){ tcp_dec_pcount_approx_int(count, tcp_skb_pcount(skb));}/* Events passed to congestion control interface */enum tcp_ca_event { CA_EVENT_TX_START, /* first transmit when no packets in flight */ CA_EVENT_CWND_RESTART, /* congestion window restart */ CA_EVENT_COMPLETE_CWR, /* end of congestion recovery */ CA_EVENT_FRTO, /* fast recovery timeout */ CA_EVENT_LOSS, /* loss timeout */ CA_EVENT_FAST_ACK, /* in sequence ack */ CA_EVENT_SLOW_ACK, /* other ack */};/* * Interface for adding new TCP congestion control handlers */#define TCP_CA_NAME_MAX 16#define TCP_CA_MAX 128#define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX)#define TCP_CONG_NON_RESTRICTED 0x1#define TCP_CONG_RTT_STAMP 0x2struct tcp_congestion_ops { struct list_head list; unsigned long flags; /* initialize private data (optional) */ void (*init)(struct sock *sk); /* cleanup private data (optional) */ void (*release)(struct sock *sk); /* return slow start threshold (required) */ u32 (*ssthresh)(struct sock *sk); /* lower bound for congestion window (optional) */ u32 (*min_cwnd)(const struct sock *sk); /* do new cwnd calculation (required) */ void (*cong_avoid)(struct sock *sk, u32 ack, u32 in_flight, int good_ack); /* call before changing ca_state (optional) */ void (*set_state)(struct sock *sk, u8 new_state); /* call when cwnd event occurs (optional) */ void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev); /* new value of cwnd after loss (optional) */ u32 (*undo_cwnd)(struct sock *sk); /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, u32 num_acked, s32 rtt_us); /* get info for inet_diag (optional) */ void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb); char name[TCP_CA_NAME_MAX]; struct module *owner;};extern int tcp_register_congestion_control(struct tcp_congestion_ops *type);extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);extern void tcp_init_congestion_control(struct sock *sk);extern void tcp_cleanup_congestion_control(struct sock *sk);extern int tcp_set_default_congestion_control(const char *name);extern void tcp_get_default_congestion_control(char *name);extern void tcp_get_available_congestion_control(char *buf, size_t len);extern void tcp_get_allowed_congestion_control(char *buf, size_t len);extern int tcp_set_allowed_congestion_control(char *allowed);extern int tcp_set_congestion_control(struct sock *sk, const char *name);extern void tcp_slow_start(struct tcp_sock *tp);extern struct tcp_congestion_ops tcp_init_congestion_ops;extern u32 tcp_reno_ssthresh(struct sock *sk);extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag);extern u32 tcp_reno_min_cwnd(const struct sock *sk);extern struct tcp_congestion_ops tcp_reno;static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state){ struct inet_connection_sock *icsk = inet_csk(sk); if (icsk->icsk_ca_ops->set_state) icsk->icsk_ca_ops->set_state(sk, ca_state); icsk->icsk_ca_state = ca_state;}static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event){ const struct inet_connection_sock *icsk = inet_csk(sk); if (icsk->icsk_ca_ops->cwnd_event) icsk->icsk_ca_ops->cwnd_event(sk, event);}/* These functions determine how the current flow behaves in respect of SACK * handling. SACK is negotiated with the peer, and therefore it can vary * between different flows. * * tcp_is_sack - SACK enabled * tcp_is_reno - No SACK * tcp_is_fack - FACK enabled, implies SACK enabled */static inline int tcp_is_sack(const struct tcp_sock *tp){ return tp->rx_opt.sack_ok;}static inline int tcp_is_reno(const struct tcp_sock *tp){ return !tcp_is_sack(tp);}static inline int tcp_is_fack(const struct tcp_sock *tp){ return tp->rx_opt.sack_ok & 2;}static inline void tcp_enable_fack(struct tcp_sock *tp){ tp->rx_opt.sack_ok |= 2;}static inline unsigned int tcp_left_out(const struct tcp_sock *tp){ return tp->sacked_out + tp->lost_out;}/* This determines how many packets are "in the network" to the best * of our knowledge. In many cases it is conservative, but where * detailed information is available from the receiver (via SACK * blocks etc.) we can make more aggressive calculations. * * Use this for decisions involving congestion control, use just * tp->packets_out to determine if the send queue is empty or not. * * Read this equation as: * * "Packets sent once on transmission queue" MINUS * "Packets left network, but not honestly ACKed yet" PLUS * "Packets fast retransmitted" */static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp){ return tp->packets_out - tcp_left_out(tp) + tp->retrans_out;}/* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd. * The exception is rate halving phase, when cwnd is decreasing towards * ssthresh. */static inline __u32 tcp_current_ssthresh(const struct sock *sk){ const struct tcp_sock *tp = tcp_sk(sk); if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery)) return tp->snd_ssthresh; else return max(tp->snd_ssthresh, ((tp->snd_cwnd >> 1) + (tp->snd_cwnd >> 2)));}/* Use define here intentionally to get WARN_ON location shown at the caller */#define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out)extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh);extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst);/* Slow start with delack produces 3 packets of burst, so that * it is safe "de facto". */static __inline__ __u32 tcp_max_burst(const struct tcp_sock *tp){ return 3;}/* RFC2861 Check whether we are limited by application or congestion window * This is the inverse of cwnd check in tcp_tso_should_defer */static inline int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight){ const struct tcp_sock *tp = tcp_sk(sk); u32 left; if (in_flight >= tp->snd_cwnd) return 1; if (!sk_can_gso(sk)) return 0; left = tp->snd_cwnd - in_flight; if (sysctl_tcp_tso_win_divisor) return left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd; else return left <= tcp_max_burst(tp);}static inline void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss, const struct sk_buff *skb){ if (skb->len < mss) tp->snd_sml = TCP_SKB_CB(skb)->end_seq;}static inline void tcp_check_probe_timer(struct sock *sk){ struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); if (!tp->packets_out && !icsk->icsk_pending) inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, icsk->icsk_rto, TCP_RTO_MAX);}static inline void tcp_push_pending_frames(struct sock *sk){ struct tcp_sock *tp = tcp_sk(sk); __tcp_push_pending_frames(sk, tcp_current_mss(sk, 1), tp->nonagle);}static inline void tcp_init_wl(struct tcp_sock *tp, u32 ack, u32 seq){ tp->snd_wl1 = seq;}static inline void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq){ tp->snd_wl1 = seq;}/* * Calculate(/check) TCP checksum */static inline __sum16 tcp_v4_check(int len, __be32 saddr, __be32 daddr, __wsum base){ return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base);}static inline __sum16 __tcp_checksum_complete(struct sk_buff *skb){ return __skb_checksum_complete(skb);}static inline int tcp_checksum_complete(struct sk_buff *skb){ return !skb_csum_unnecessary(skb) && __tcp_checksum_complete(skb);}/* Prequeue for VJ style copy to user, combined with checksumming. */static inline void tcp_prequeue_init(struct tcp_sock *tp){ tp->ucopy.task = NULL; tp->ucopy.len = 0; tp->ucopy.memory = 0; skb_queue_head_init(&tp->ucopy.prequeue);#ifdef CONFIG_NET_DMA tp->ucopy.dma_chan = NULL; tp->ucopy.wakeup = 0; tp->ucopy.pinned_list = NULL; tp->ucopy.dma_cookie = 0;#endif}/* Packet is added to VJ-style prequeue for processing in process * context, if a reader task is waiting. Apparently, this exciting * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93) * failed somewhere. Latency? Burstiness? Well, at least now we will * see, why it failed. 8)8) --ANK * * NOTE: is this not too big to inline? */static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb){ struct tcp_sock *tp = tcp_sk(sk); if (!sysctl_tcp_low_latency && tp->ucopy.task) { __skb_queue_tail(&tp->ucopy.prequeue, skb); tp->ucopy.memory += skb->truesize; if (tp->ucopy.memory > sk->sk_rcvbuf) { struct sk_buff *skb1; BUG_ON(sock_owned_by_user(sk)); while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) { sk->sk_backlog_rcv(sk, skb1); NET_INC_STATS_BH(LINUX_MIB_TCPPREQUEUEDROPPED); } tp->ucopy.memory = 0; } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { wake_up_interruptible(sk->sk_sleep); if (!inet_csk_ack_scheduled(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, (3 * TCP_RTO_MIN) / 4, TCP_RTO_MAX); } return 1; } return 0;}#undef STATE_TRACE#ifdef STATE_TRACEstatic const char *statename[]={ "Unused","Established","Syn Sent","Syn Recv", "Fin Wait 1","Fin Wait 2","Time Wait", "Close", "Close Wait","Last ACK","Listen","Closing"};#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -