📄 tcp.h
字号:
/* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for the TCP module. * * Version: @(#)tcp.h 1.0.5 05/23/93 * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */#ifndef _TCP_H#define _TCP_H#define TCP_DEBUG 1#define FASTRETRANS_DEBUG 1/* Cancel timers, when they are not required. */#undef TCP_CLEAR_TIMERS#include <linux/config.h>#include <linux/list.h>#include <linux/tcp.h>#include <linux/slab.h>#include <linux/cache.h>#include <linux/percpu.h>#include <net/checksum.h>#include <net/sock.h>#include <net/snmp.h>#include <net/ip.h>#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)#include <linux/ipv6.h>#endif#include <linux/seq_file.h>/* This is for all connections with a full identity, no wildcards. * New scheme, half the table is for TIME_WAIT, the other half is * for the rest. I'll experiment with dynamic table growth later. */struct tcp_ehash_bucket { rwlock_t lock; struct hlist_head chain;} __attribute__((__aligned__(8)));/* This is for listening sockets, thus all sockets which possess wildcards. */#define TCP_LHTABLE_SIZE 32 /* Yes, really, this is all you need. *//* There are a few simple rules, which allow for local port reuse by * an application. In essence: * * 1) Sockets bound to different interfaces may share a local port. * Failing that, goto test 2. * 2) If all sockets have sk->sk_reuse set, and none of them are in * TCP_LISTEN state, the port may be shared. * Failing that, goto test 3. * 3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local * address, and none of them are the same, the port may be * shared. * Failing this, the port cannot be shared. * * The interesting point, is test #2. This is what an FTP server does * all day. To optimize this case we use a specific flag bit defined * below. As we add sockets to a bind bucket list, we perform a * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN)) * As long as all sockets added to a bind bucket pass this test, * the flag bit will be set. * The resulting situation is that tcp_v[46]_verify_bind() can just check * for this flag bit, if it is set and the socket trying to bind has * sk->sk_reuse set, we don't even have to walk the owners list at all, * we return that it is ok to bind this socket to the requested local port. * * Sounds like a lot of work, but it is worth it. In a more naive * implementation (ie. current FreeBSD etc.) the entire list of ports * must be walked for each data port opened by an ftp server. Needless * to say, this does not scale at all. With a couple thousand FTP * users logged onto your box, isn't it nice to know that new data * ports are created in O(1) time? I thought so. ;-) -DaveM */struct tcp_bind_bucket { unsigned short port; signed short fastreuse; struct hlist_node node; struct hlist_head owners;};#define tb_for_each(tb, node, head) hlist_for_each_entry(tb, node, head, node)struct tcp_bind_hashbucket { spinlock_t lock; struct hlist_head chain;};static inline struct tcp_bind_bucket *__tb_head(struct tcp_bind_hashbucket *head){ return hlist_entry(head->chain.first, struct tcp_bind_bucket, node);}static inline struct tcp_bind_bucket *tb_head(struct tcp_bind_hashbucket *head){ return hlist_empty(&head->chain) ? NULL : __tb_head(head);}extern struct tcp_hashinfo { /* This is for sockets with full identity only. Sockets here will * always be without wildcards and will have the following invariant: * * TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE * * First half of the table is for sockets not in TIME_WAIT, second half * is for TIME_WAIT sockets only. */ struct tcp_ehash_bucket *__tcp_ehash; /* Ok, let's try this, I give up, we do need a local binding * TCP hash as well as the others for fast bind/connect. */ struct tcp_bind_hashbucket *__tcp_bhash; int __tcp_bhash_size; int __tcp_ehash_size; /* All sockets in TCP_LISTEN state will be in here. This is the only * table where wildcard'd TCP sockets can exist. Hash function here * is just local port number. */ struct hlist_head __tcp_listening_hash[TCP_LHTABLE_SIZE]; /* All the above members are written once at bootup and * never written again _or_ are predominantly read-access. * * Now align to a new cache line as all the following members * are often dirty. */ rwlock_t __tcp_lhash_lock ____cacheline_aligned; atomic_t __tcp_lhash_users; wait_queue_head_t __tcp_lhash_wait; spinlock_t __tcp_portalloc_lock;} tcp_hashinfo;#define tcp_ehash (tcp_hashinfo.__tcp_ehash)#define tcp_bhash (tcp_hashinfo.__tcp_bhash)#define tcp_ehash_size (tcp_hashinfo.__tcp_ehash_size)#define tcp_bhash_size (tcp_hashinfo.__tcp_bhash_size)#define tcp_listening_hash (tcp_hashinfo.__tcp_listening_hash)#define tcp_lhash_lock (tcp_hashinfo.__tcp_lhash_lock)#define tcp_lhash_users (tcp_hashinfo.__tcp_lhash_users)#define tcp_lhash_wait (tcp_hashinfo.__tcp_lhash_wait)#define tcp_portalloc_lock (tcp_hashinfo.__tcp_portalloc_lock)extern kmem_cache_t *tcp_bucket_cachep;extern struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head, unsigned short snum);extern void tcp_bucket_destroy(struct tcp_bind_bucket *tb);extern void tcp_bucket_unlock(struct sock *sk);extern int tcp_port_rover;extern struct sock *tcp_v4_lookup_listener(u32 addr, unsigned short hnum, int dif);/* These are AF independent. */static __inline__ int tcp_bhashfn(__u16 lport){ return (lport & (tcp_bhash_size - 1));}extern void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb, unsigned short snum);#if (BITS_PER_LONG == 64)#define TCP_ADDRCMP_ALIGN_BYTES 8#else#define TCP_ADDRCMP_ALIGN_BYTES 4#endif/* This is a TIME_WAIT bucket. It works around the memory consumption * problems of sockets in such a state on heavily loaded servers, but * without violating the protocol specification. */struct tcp_tw_bucket { /* * Now struct sock also uses sock_common, so please just * don't add nothing before this first member (__tw_common) --acme */ struct sock_common __tw_common;#define tw_family __tw_common.skc_family#define tw_state __tw_common.skc_state#define tw_reuse __tw_common.skc_reuse#define tw_bound_dev_if __tw_common.skc_bound_dev_if#define tw_node __tw_common.skc_node#define tw_bind_node __tw_common.skc_bind_node#define tw_refcnt __tw_common.skc_refcnt volatile unsigned char tw_substate; unsigned char tw_rcv_wscale; __u16 tw_sport; /* Socket demultiplex comparisons on incoming packets. */ /* these five are in inet_opt */ __u32 tw_daddr __attribute__((aligned(TCP_ADDRCMP_ALIGN_BYTES))); __u32 tw_rcv_saddr; __u16 tw_dport; __u16 tw_num; /* And these are ours. */ int tw_hashent; int tw_timeout; __u32 tw_rcv_nxt; __u32 tw_snd_nxt; __u32 tw_rcv_wnd; __u32 tw_ts_recent; long tw_ts_recent_stamp; unsigned long tw_ttd; struct tcp_bind_bucket *tw_tb; struct hlist_node tw_death_node;#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) struct in6_addr tw_v6_daddr; struct in6_addr tw_v6_rcv_saddr; int tw_v6_ipv6only;#endif};static __inline__ void tw_add_node(struct tcp_tw_bucket *tw, struct hlist_head *list){ hlist_add_head(&tw->tw_node, list);}static __inline__ void tw_add_bind_node(struct tcp_tw_bucket *tw, struct hlist_head *list){ hlist_add_head(&tw->tw_bind_node, list);}static inline int tw_dead_hashed(struct tcp_tw_bucket *tw){ return tw->tw_death_node.pprev != NULL;}static __inline__ void tw_dead_node_init(struct tcp_tw_bucket *tw){ tw->tw_death_node.pprev = NULL;}static __inline__ void __tw_del_dead_node(struct tcp_tw_bucket *tw){ __hlist_del(&tw->tw_death_node); tw_dead_node_init(tw);}static __inline__ int tw_del_dead_node(struct tcp_tw_bucket *tw){ if (tw_dead_hashed(tw)) { __tw_del_dead_node(tw); return 1; } return 0;}#define tw_for_each(tw, node, head) \ hlist_for_each_entry(tw, node, head, tw_node)#define tw_for_each_inmate(tw, node, jail) \ hlist_for_each_entry(tw, node, jail, tw_death_node)#define tw_for_each_inmate_safe(tw, node, safe, jail) \ hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node)#define tcptw_sk(__sk) ((struct tcp_tw_bucket *)(__sk))static inline u32 tcp_v4_rcv_saddr(const struct sock *sk){ return likely(sk->sk_state != TCP_TIME_WAIT) ? inet_sk(sk)->rcv_saddr : tcptw_sk(sk)->tw_rcv_saddr;}#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk){ return likely(sk->sk_state != TCP_TIME_WAIT) ? &inet6_sk(sk)->rcv_saddr : &tcptw_sk(sk)->tw_v6_rcv_saddr;}static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk){ return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL;}#define tcptw_sk_ipv6only(__sk) (tcptw_sk(__sk)->tw_v6_ipv6only)static inline int tcp_v6_ipv6only(const struct sock *sk){ return likely(sk->sk_state != TCP_TIME_WAIT) ? ipv6_only_sock(sk) : tcptw_sk_ipv6only(sk);}#else# define __tcp_v6_rcv_saddr(__sk) NULL# define tcp_v6_rcv_saddr(__sk) NULL# define tcptw_sk_ipv6only(__sk) 0# define tcp_v6_ipv6only(__sk) 0#endifextern kmem_cache_t *tcp_timewait_cachep;static inline void tcp_tw_put(struct tcp_tw_bucket *tw){ if (atomic_dec_and_test(&tw->tw_refcnt)) {#ifdef INET_REFCNT_DEBUG printk(KERN_DEBUG "tw_bucket %p released\n", tw);#endif kmem_cache_free(tcp_timewait_cachep, tw); }}extern atomic_t tcp_orphan_count;extern int tcp_tw_count;extern void tcp_time_wait(struct sock *sk, int state, int timeo);extern void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo);extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw);/* Socket demux engine toys. */#ifdef __BIG_ENDIAN#define TCP_COMBINED_PORTS(__sport, __dport) \ (((__u32)(__sport)<<16) | (__u32)(__dport))#else /* __LITTLE_ENDIAN */#define TCP_COMBINED_PORTS(__sport, __dport) \ (((__u32)(__dport)<<16) | (__u32)(__sport))#endif#if (BITS_PER_LONG == 64)#ifdef __BIG_ENDIAN#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \ __u64 __name = (((__u64)(__saddr))<<32)|((__u64)(__daddr));#else /* __LITTLE_ENDIAN */#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \ __u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr));#endif /* __BIG_ENDIAN */#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ (((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \ ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ (((*((__u64 *)&(tcptw_sk(__sk)->tw_daddr))) == (__cookie)) && \ ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))#else /* 32-bit arch */#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr)#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ ((inet_sk(__sk)->daddr == (__saddr)) && \ (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ ((tcptw_sk(__sk)->tw_daddr == (__saddr)) && \ (tcptw_sk(__sk)->tw_rcv_saddr == (__daddr)) && \ ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))#endif /* 64-bit arch */#define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ (((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ ((__sk)->sk_family == AF_INET6) && \ !ipv6_addr_cmp(&inet6_sk(__sk)->daddr, (__saddr)) && \ !ipv6_addr_cmp(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))/* These can have wildcards, don't try too hard. */static __inline__ int tcp_lhashfn(unsigned short num){ return num & (TCP_LHTABLE_SIZE - 1);}static __inline__ int tcp_sk_listen_hashfn(struct sock *sk){ return tcp_lhashfn(inet_sk(sk)->num);}#define MAX_TCP_HEADER (128 + MAX_HEADER)/* * Never offer a window over 32767 without using window scaling. Some * poor stacks do signed 16bit maths! */#define MAX_TCP_WINDOW 32767U/* Minimal accepted MSS. It is (60+60+8) - (20+20). */#define TCP_MIN_MSS 88U/* Minimal RCV_MSS. */#define TCP_MIN_RCVMSS 536U/* After receiving this amount of duplicate ACKs fast retransmit starts. */#define TCP_FASTRETRANS_THRESH 3/* Maximal reordering. */#define TCP_MAX_REORDERING 127/* Maximal number of ACKs sent quickly to accelerate slow-start. */#define TCP_MAX_QUICKACKS 16U/* urg_data states */#define TCP_URG_VALID 0x0100#define TCP_URG_NOTYET 0x0200#define TCP_URG_READ 0x0400#define TCP_RETR1 3 /* * This is how many retries it does before it * tries to figure out if the gateway is * down. Minimal RFC value is 3; it corresponds * to ~3sec-8min depending on RTO. */#define TCP_RETR2 15 /* * This should take at least * 90 minutes to time out. * RFC1122 says that the limit is 100 sec. * 15 is ~13-30min depending on RTO. */#define TCP_SYN_RETRIES 5 /* number of times to retry active opening a * connection: ~180sec is RFC minumum */#define TCP_SYNACK_RETRIES 5 /* number of times to retry passive opening a * connection: ~180sec is RFC minumum */#define TCP_ORPHAN_RETRIES 7 /* number of times to retry on an orphaned * socket. 7 is ~50sec-16min. */#define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT * state, about 60 seconds */#define TCP_FIN_TIMEOUT TCP_TIMEWAIT_LEN /* BSD style FIN_WAIT2 deadlock breaker. * It used to be 3min, new value is 60sec, * to combine FIN-WAIT-2 timeout with * TIME-WAIT timer. */#define TCP_DELACK_MAX ((unsigned)(HZ/5)) /* maximal time to delay before sending an ACK */#if HZ >= 100#define TCP_DELACK_MIN ((unsigned)(HZ/25)) /* minimal time to delay before sending an ACK */#define TCP_ATO_MIN ((unsigned)(HZ/25))#else#define TCP_DELACK_MIN 4U#define TCP_ATO_MIN 4U#endif#define TCP_RTO_MAX ((unsigned)(120*HZ))#define TCP_RTO_MIN ((unsigned)(HZ/5))#define TCP_TIMEOUT_INIT ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value */#define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes * for local resources. */#define TCP_KEEPALIVE_TIME (120*60*HZ) /* two hours */#define TCP_KEEPALIVE_PROBES 9 /* Max of 9 keepalive probes */#define TCP_KEEPALIVE_INTVL (75*HZ)#define MAX_TCP_KEEPIDLE 32767#define MAX_TCP_KEEPINTVL 32767#define MAX_TCP_KEEPCNT 127#define MAX_TCP_SYNCNT 127#define TCP_SYNQ_INTERVAL (HZ/5) /* Period of SYNACK timer */#define TCP_SYNQ_HSIZE 512 /* Size of SYNACK hash table */#define TCP_PAWS_24DAYS (60 * 60 * 24 * 24)#define TCP_PAWS_MSL 60 /* Per-host timestamps are invalidated * after this time. It should be equal * (or greater than) TCP_TIMEWAIT_LEN * to provide reliability equal to one * provided by timewait state. */#define TCP_PAWS_WINDOW 1 /* Replay window for per-host * timestamps. It must be less than * minimal timewait lifetime. */#define TCP_TW_RECYCLE_SLOTS_LOG 5#define TCP_TW_RECYCLE_SLOTS (1<<TCP_TW_RECYCLE_SLOTS_LOG)/* If time > 4sec, it is "slow" path, no recycling is required, so that we select tick to get range about 4 seconds. */#if HZ <= 16 || HZ > 4096# error Unsupported: HZ <= 16 or HZ > 4096#elif HZ <= 32# define TCP_TW_RECYCLE_TICK (5+2-TCP_TW_RECYCLE_SLOTS_LOG)#elif HZ <= 64# define TCP_TW_RECYCLE_TICK (6+2-TCP_TW_RECYCLE_SLOTS_LOG)#elif HZ <= 128# define TCP_TW_RECYCLE_TICK (7+2-TCP_TW_RECYCLE_SLOTS_LOG)#elif HZ <= 256# define TCP_TW_RECYCLE_TICK (8+2-TCP_TW_RECYCLE_SLOTS_LOG)#elif HZ <= 512# define TCP_TW_RECYCLE_TICK (9+2-TCP_TW_RECYCLE_SLOTS_LOG)#elif HZ <= 1024# define TCP_TW_RECYCLE_TICK (10+2-TCP_TW_RECYCLE_SLOTS_LOG)#elif HZ <= 2048# define TCP_TW_RECYCLE_TICK (11+2-TCP_TW_RECYCLE_SLOTS_LOG)#else# define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG)#endif#define BICTCP_1_OVER_BETA 8 /* * Fast recovery * multiplicative decrease factor */#define BICTCP_MAX_INCREMENT 32 /* * Limit on the amount of * increment allowed during
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -