📄 tcp.h
字号:
/* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for the TCP module. * * Version: @(#)tcp.h 1.0.5 05/23/93 * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */#ifndef _TCP_H#define _TCP_H#define TCP_DEBUG 1#define FASTRETRANS_DEBUG 1/* Cancel timers, when they are not required. */#undef TCP_CLEAR_TIMERS#include <linux/config.h>#include <linux/tcp.h>#include <linux/slab.h>#include <net/checksum.h>#include <net/sock.h>/* This is for all connections with a full identity, no wildcards. * New scheme, half the table is for TIME_WAIT, the other half is * for the rest. I'll experiment with dynamic table growth later. */struct tcp_ehash_bucket { rwlock_t lock; struct sock *chain;} __attribute__((__aligned__(8)));/* This is for listening sockets, thus all sockets which possess wildcards. */#define TCP_LHTABLE_SIZE 32 /* Yes, really, this is all you need. *//* There are a few simple rules, which allow for local port reuse by * an application. In essence: * * 1) Sockets bound to different interfaces may share a local port. * Failing that, goto test 2. * 2) If all sockets have sk->reuse set, and none of them are in * TCP_LISTEN state, the port may be shared. * Failing that, goto test 3. * 3) If all sockets are bound to a specific sk->rcv_saddr local * address, and none of them are the same, the port may be * shared. * Failing this, the port cannot be shared. * * The interesting point, is test #2. This is what an FTP server does * all day. To optimize this case we use a specific flag bit defined * below. As we add sockets to a bind bucket list, we perform a * check of: (newsk->reuse && (newsk->state != TCP_LISTEN)) * As long as all sockets added to a bind bucket pass this test, * the flag bit will be set. * The resulting situation is that tcp_v[46]_verify_bind() can just check * for this flag bit, if it is set and the socket trying to bind has * sk->reuse set, we don't even have to walk the owners list at all, * we return that it is ok to bind this socket to the requested local port. * * Sounds like a lot of work, but it is worth it. In a more naive * implementation (ie. current FreeBSD etc.) the entire list of ports * must be walked for each data port opened by an ftp server. Needless * to say, this does not scale at all. With a couple thousand FTP * users logged onto your box, isn't it nice to know that new data * ports are created in O(1) time? I thought so. ;-) -DaveM */struct tcp_bind_bucket { unsigned short port; unsigned short fastreuse; struct tcp_bind_bucket *next; struct sock *owners; struct tcp_bind_bucket **pprev;};struct tcp_bind_hashbucket { spinlock_t lock; struct tcp_bind_bucket *chain;};extern struct tcp_hashinfo { /* This is for sockets with full identity only. Sockets here will * always be without wildcards and will have the following invariant: * * TCP_ESTABLISHED <= sk->state < TCP_CLOSE * * First half of the table is for sockets not in TIME_WAIT, second half * is for TIME_WAIT sockets only. */ struct tcp_ehash_bucket *__tcp_ehash; /* Ok, let's try this, I give up, we do need a local binding * TCP hash as well as the others for fast bind/connect. */ struct tcp_bind_hashbucket *__tcp_bhash; int __tcp_bhash_size; int __tcp_ehash_size; /* All sockets in TCP_LISTEN state will be in here. This is the only * table where wildcard'd TCP sockets can exist. Hash function here * is just local port number. */ struct sock *__tcp_listening_hash[TCP_LHTABLE_SIZE]; /* All the above members are written once at bootup and * never written again _or_ are predominantly read-access. * * Now align to a new cache line as all the following members * are often dirty. */ rwlock_t __tcp_lhash_lock __attribute__((__aligned__(SMP_CACHE_BYTES))); atomic_t __tcp_lhash_users; wait_queue_head_t __tcp_lhash_wait; spinlock_t __tcp_portalloc_lock;} tcp_hashinfo;#define tcp_ehash (tcp_hashinfo.__tcp_ehash)#define tcp_bhash (tcp_hashinfo.__tcp_bhash)#define tcp_ehash_size (tcp_hashinfo.__tcp_ehash_size)#define tcp_bhash_size (tcp_hashinfo.__tcp_bhash_size)#define tcp_listening_hash (tcp_hashinfo.__tcp_listening_hash)#define tcp_lhash_lock (tcp_hashinfo.__tcp_lhash_lock)#define tcp_lhash_users (tcp_hashinfo.__tcp_lhash_users)#define tcp_lhash_wait (tcp_hashinfo.__tcp_lhash_wait)#define tcp_portalloc_lock (tcp_hashinfo.__tcp_portalloc_lock)extern kmem_cache_t *tcp_bucket_cachep;extern struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head, unsigned short snum);extern void tcp_bucket_unlock(struct sock *sk);extern int tcp_port_rover;extern struct sock *tcp_v4_lookup_listener(u32 addr, unsigned short hnum, int dif);/* These are AF independent. */static __inline__ int tcp_bhashfn(__u16 lport){ return (lport & (tcp_bhash_size - 1));}/* This is a TIME_WAIT bucket. It works around the memory consumption * problems of sockets in such a state on heavily loaded servers, but * without violating the protocol specification. */struct tcp_tw_bucket { /* These _must_ match the beginning of struct sock precisely. * XXX Yes I know this is gross, but I'd have to edit every single * XXX networking file if I created a "struct sock_header". -DaveM */ __u32 daddr; __u32 rcv_saddr; __u16 dport; unsigned short num; int bound_dev_if; struct sock *next; struct sock **pprev; struct sock *bind_next; struct sock **bind_pprev; unsigned char state, substate; /* "zapped" is replaced with "substate" */ __u16 sport; unsigned short family; unsigned char reuse, rcv_wscale; /* It is also TW bucket specific */ atomic_t refcnt; /* And these are ours. */ int hashent; int timeout; __u32 rcv_nxt; __u32 snd_nxt; __u32 rcv_wnd; __u32 ts_recent; long ts_recent_stamp; unsigned long ttd; struct tcp_bind_bucket *tb; struct tcp_tw_bucket *next_death; struct tcp_tw_bucket **pprev_death;#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) struct in6_addr v6_daddr; struct in6_addr v6_rcv_saddr;#endif};extern kmem_cache_t *tcp_timewait_cachep;static inline void tcp_tw_put(struct tcp_tw_bucket *tw){ if (atomic_dec_and_test(&tw->refcnt)) {#ifdef INET_REFCNT_DEBUG printk(KERN_DEBUG "tw_bucket %p released\n", tw);#endif kmem_cache_free(tcp_timewait_cachep, tw); }}extern atomic_t tcp_orphan_count;extern int tcp_tw_count;extern void tcp_time_wait(struct sock *sk, int state, int timeo);extern void tcp_timewait_kill(struct tcp_tw_bucket *tw);extern void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo);extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw);/* Socket demux engine toys. */#ifdef __BIG_ENDIAN#define TCP_COMBINED_PORTS(__sport, __dport) \ (((__u32)(__sport)<<16) | (__u32)(__dport))#else /* __LITTLE_ENDIAN */#define TCP_COMBINED_PORTS(__sport, __dport) \ (((__u32)(__dport)<<16) | (__u32)(__sport))#endif#if (BITS_PER_LONG == 64)#ifdef __BIG_ENDIAN#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \ __u64 __name = (((__u64)(__saddr))<<32)|((__u64)(__daddr));#else /* __LITTLE_ENDIAN */#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \ __u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr));#endif /* __BIG_ENDIAN */#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ (((*((__u64 *)&((__sk)->daddr)))== (__cookie)) && \ ((*((__u32 *)&((__sk)->dport)))== (__ports)) && \ (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))#else /* 32-bit arch */#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr)#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ (((__sk)->daddr == (__saddr)) && \ ((__sk)->rcv_saddr == (__daddr)) && \ ((*((__u32 *)&((__sk)->dport)))== (__ports)) && \ (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))#endif /* 64-bit arch */#define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ (((*((__u32 *)&((__sk)->dport)))== (__ports)) && \ ((__sk)->family == AF_INET6) && \ !ipv6_addr_cmp(&(__sk)->net_pinfo.af_inet6.daddr, (__saddr)) && \ !ipv6_addr_cmp(&(__sk)->net_pinfo.af_inet6.rcv_saddr, (__daddr)) && \ (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))/* These can have wildcards, don't try too hard. */static __inline__ int tcp_lhashfn(unsigned short num){ return num & (TCP_LHTABLE_SIZE - 1);}static __inline__ int tcp_sk_listen_hashfn(struct sock *sk){ return tcp_lhashfn(sk->num);}#define MAX_TCP_HEADER (128 + MAX_HEADER)/* * Never offer a window over 32767 without using window scaling. Some * poor stacks do signed 16bit maths! */#define MAX_TCP_WINDOW 32767U/* Minimal accepted MSS. It is (60+60+8) - (20+20). */#define TCP_MIN_MSS 88U/* Minimal RCV_MSS. */#define TCP_MIN_RCVMSS 536U/* After receiving this amount of duplicate ACKs fast retransmit starts. */#define TCP_FASTRETRANS_THRESH 3/* Maximal reordering. */#define TCP_MAX_REORDERING 127/* Maximal number of ACKs sent quickly to accelerate slow-start. */#define TCP_MAX_QUICKACKS 16U/* urg_data states */#define TCP_URG_VALID 0x0100#define TCP_URG_NOTYET 0x0200#define TCP_URG_READ 0x0400#define TCP_RETR1 3 /* * This is how many retries it does before it * tries to figure out if the gateway is * down. Minimal RFC value is 3; it corresponds * to ~3sec-8min depending on RTO. */#define TCP_RETR2 15 /* * This should take at least * 90 minutes to time out. * RFC1122 says that the limit is 100 sec. * 15 is ~13-30min depending on RTO. */#define TCP_SYN_RETRIES 5 /* number of times to retry active opening a * connection: ~180sec is RFC minumum */#define TCP_SYNACK_RETRIES 5 /* number of times to retry passive opening a * connection: ~180sec is RFC minumum */#define TCP_ORPHAN_RETRIES 7 /* number of times to retry on an orphaned * socket. 7 is ~50sec-16min. */#define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT * state, about 60 seconds */#define TCP_FIN_TIMEOUT TCP_TIMEWAIT_LEN /* BSD style FIN_WAIT2 deadlock breaker. * It used to be 3min, new value is 60sec, * to combine FIN-WAIT-2 timeout with * TIME-WAIT timer. */#define TCP_DELACK_MAX ((unsigned)(HZ/5)) /* maximal time to delay before sending an ACK */#if HZ >= 100#define TCP_DELACK_MIN ((unsigned)(HZ/25)) /* minimal time to delay before sending an ACK */#define TCP_ATO_MIN ((unsigned)(HZ/25))#else#define TCP_DELACK_MIN 4U#define TCP_ATO_MIN 4U#endif#define TCP_RTO_MAX ((unsigned)(120*HZ))#define TCP_RTO_MIN ((unsigned)(HZ/5))#define TCP_TIMEOUT_INIT ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value */#define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes * for local resources. */#define TCP_KEEPALIVE_TIME (120*60*HZ) /* two hours */#define TCP_KEEPALIVE_PROBES 9 /* Max of 9 keepalive probes */#define TCP_KEEPALIVE_INTVL (75*HZ)#define MAX_TCP_KEEPIDLE 32767#define MAX_TCP_KEEPINTVL 32767#define MAX_TCP_KEEPCNT 127#define MAX_TCP_SYNCNT 127/* TIME_WAIT reaping mechanism. */#define TCP_TWKILL_SLOTS 8 /* Please keep this a power of 2. */#define TCP_TWKILL_PERIOD (TCP_TIMEWAIT_LEN/TCP_TWKILL_SLOTS)#define TCP_SYNQ_INTERVAL (HZ/5) /* Period of SYNACK timer */#define TCP_SYNQ_HSIZE 512 /* Size of SYNACK hash table */#define TCP_PAWS_24DAYS (60 * 60 * 24 * 24)#define TCP_PAWS_MSL 60 /* Per-host timestamps are invalidated * after this time. It should be equal * (or greater than) TCP_TIMEWAIT_LEN * to provide reliability equal to one * provided by timewait state. */#define TCP_PAWS_WINDOW 1 /* Replay window for per-host * timestamps. It must be less than * minimal timewait lifetime. */#define TCP_TW_RECYCLE_SLOTS_LOG 5#define TCP_TW_RECYCLE_SLOTS (1<<TCP_TW_RECYCLE_SLOTS_LOG)/* If time > 4sec, it is "slow" path, no recycling is required, so that we select tick to get range about 4 seconds. */#if HZ <= 16 || HZ > 4096# error Unsupported: HZ <= 16 or HZ > 4096#elif HZ <= 32# define TCP_TW_RECYCLE_TICK (5+2-TCP_TW_RECYCLE_SLOTS_LOG)#elif HZ <= 64# define TCP_TW_RECYCLE_TICK (6+2-TCP_TW_RECYCLE_SLOTS_LOG)#elif HZ <= 128# define TCP_TW_RECYCLE_TICK (7+2-TCP_TW_RECYCLE_SLOTS_LOG)#elif HZ <= 256# define TCP_TW_RECYCLE_TICK (8+2-TCP_TW_RECYCLE_SLOTS_LOG)#elif HZ <= 512# define TCP_TW_RECYCLE_TICK (9+2-TCP_TW_RECYCLE_SLOTS_LOG)#elif HZ <= 1024# define TCP_TW_RECYCLE_TICK (10+2-TCP_TW_RECYCLE_SLOTS_LOG)#elif HZ <= 2048# define TCP_TW_RECYCLE_TICK (11+2-TCP_TW_RECYCLE_SLOTS_LOG)#else# define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG)#endif/* * TCP option */ #define TCPOPT_NOP 1 /* Padding */#define TCPOPT_EOL 0 /* End of options */#define TCPOPT_MSS 2 /* Segment size negotiating */#define TCPOPT_WINDOW 3 /* Window scaling */#define TCPOPT_SACK_PERM 4 /* SACK Permitted */#define TCPOPT_SACK 5 /* SACK Block */#define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS *//* * TCP option lengths */#define TCPOLEN_MSS 4#define TCPOLEN_WINDOW 3#define TCPOLEN_SACK_PERM 2#define TCPOLEN_TIMESTAMP 10/* But this is what stacks really send out. */#define TCPOLEN_TSTAMP_ALIGNED 12#define TCPOLEN_WSCALE_ALIGNED 4#define TCPOLEN_SACKPERM_ALIGNED 4#define TCPOLEN_SACK_BASE 2#define TCPOLEN_SACK_BASE_ALIGNED 4#define TCPOLEN_SACK_PERBLOCK 8#define TCP_TIME_RETRANS 1 /* Retransmit timer */#define TCP_TIME_DACK 2 /* Delayed ack timer */#define TCP_TIME_PROBE0 3 /* Zero window probe timer */#define TCP_TIME_KEEPOPEN 4 /* Keepalive timer *//* sysctl variables for tcp */extern int sysctl_max_syn_backlog;extern int sysctl_tcp_timestamps;extern int sysctl_tcp_window_scaling;extern int sysctl_tcp_sack;extern int sysctl_tcp_fin_timeout;extern int sysctl_tcp_tw_recycle;extern int sysctl_tcp_keepalive_time;extern int sysctl_tcp_keepalive_probes;extern int sysctl_tcp_keepalive_intvl;extern int sysctl_tcp_syn_retries;extern int sysctl_tcp_synack_retries;extern int sysctl_tcp_retries1;extern int sysctl_tcp_retries2;extern int sysctl_tcp_orphan_retries;extern int sysctl_tcp_syncookies;extern int sysctl_tcp_retrans_collapse;extern int sysctl_tcp_stdurg;extern int sysctl_tcp_rfc1337;extern int sysctl_tcp_tw_recycle;extern int sysctl_tcp_abort_on_overflow;extern int sysctl_tcp_max_orphans;extern int sysctl_tcp_max_tw_buckets;extern int sysctl_tcp_fack;extern int sysctl_tcp_reordering;extern int sysctl_tcp_ecn;extern int sysctl_tcp_dsack;extern int sysctl_tcp_mem[3];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -