⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tcp_ipv4.c

📁 Linux内核源代码 为压缩文件 是<<Linux内核>>一书中的源代码
💻 C
📖 第 1 页 / 共 4 页
字号:
/* * INET		An implementation of the TCP/IP protocol suite for the LINUX *		operating system.  INET is implemented using the  BSD Socket *		interface as the means of communication with the user level. * *		Implementation of the Transmission Control Protocol(TCP). * * Version:	$Id: tcp_ipv4.c,v 1.222 2000/12/08 17:15:53 davem Exp $ * *		IPv4 specific functions * * *		code split from: *		linux/ipv4/tcp.c *		linux/ipv4/tcp_input.c *		linux/ipv4/tcp_output.c * *		See tcp.c for author information * *	This program is free software; you can redistribute it and/or *      modify it under the terms of the GNU General Public License *      as published by the Free Software Foundation; either version *      2 of the License, or (at your option) any later version. *//* * Changes: *		David S. Miller	:	New socket lookup architecture. *					This code is dedicated to John Dyson. *		David S. Miller :	Change semantics of established hash, *					half is devoted to TIME_WAIT sockets *					and the rest go in the other half. *		Andi Kleen :		Add support for syncookies and fixed *					some bugs: ip options weren't passed to *					the TCP layer, missed a check for an ACK bit. *		Andi Kleen :		Implemented fast path mtu discovery. *	     				Fixed many serious bugs in the *					open_request handling and moved *					most of it into the af independent code. *					Added tail drop and some other bugfixes. *					Added new listen sematics. *		Mike McLagan	:	Routing by source *	Juan Jose Ciarlante:		ip_dynaddr bits *		Andi Kleen:		various fixes. *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma. *	Andi Kleen		:	Fix new listen. *	Andi Kleen		:	Fix accept error reporting. */#include <linux/config.h>#include <linux/types.h>#include <linux/fcntl.h>#include <linux/random.h>#include <linux/cache.h>#include <linux/init.h>#include <net/icmp.h>#include <net/tcp.h>#include <net/ipv6.h>#include <net/inet_common.h>#include <linux/inet.h>#include <linux/stddef.h>#include <linux/ipsec.h>extern int sysctl_ip_dynaddr;/* Check TCP sequence numbers in ICMP packets. */#define ICMP_MIN_LENGTH 8/* Socket used for sending RSTs */ 	static struct inode tcp_inode;static struct socket *tcp_socket=&tcp_inode.u.socket_i;void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, 		       struct sk_buff *skb);/* * ALL members must be initialised to prevent gcc-2.7.2.3 miscompilation */struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = {	__tcp_ehash:          NULL,	__tcp_bhash:          NULL,	__tcp_bhash_size:     0,	__tcp_ehash_size:     0,	__tcp_listening_hash: { NULL, },	__tcp_lhash_lock:     RW_LOCK_UNLOCKED,	__tcp_lhash_users:    ATOMIC_INIT(0),	__tcp_lhash_wait:	  __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.__tcp_lhash_wait),	__tcp_portalloc_lock: SPIN_LOCK_UNLOCKED};/* * This array holds the first and last local port number. * For high-usage systems, use sysctl to change this to * 32768-61000 */int sysctl_local_port_range[2] = { 1024, 4999 };int tcp_port_rover = (1024 - 1);static __inline__ int tcp_hashfn(__u32 laddr, __u16 lport,				 __u32 faddr, __u16 fport){	int h = ((laddr ^ lport) ^ (faddr ^ fport));	h ^= h>>16;	h ^= h>>8;	return h & (tcp_ehash_size - 1);}static __inline__ int tcp_sk_hashfn(struct sock *sk){	__u32 laddr = sk->rcv_saddr;	__u16 lport = sk->num;	__u32 faddr = sk->daddr;	__u16 fport = sk->dport;	return tcp_hashfn(laddr, lport, faddr, fport);}/* Allocate and initialize a new TCP local port bind bucket. * The bindhash mutex for snum's hash chain must be held here. */struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,					  unsigned short snum){	struct tcp_bind_bucket *tb;	tb = kmem_cache_alloc(tcp_bucket_cachep, SLAB_ATOMIC);	if(tb != NULL) {		tb->port = snum;		tb->fastreuse = 0;		tb->owners = NULL;		if((tb->next = head->chain) != NULL)			tb->next->pprev = &tb->next;		head->chain = tb;		tb->pprev = &head->chain;	}	return tb;}/* Caller must disable local BH processing. */static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child){	struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(child->num)];	struct tcp_bind_bucket *tb;	spin_lock(&head->lock);	tb = (struct tcp_bind_bucket *)sk->prev;	if ((child->bind_next = tb->owners) != NULL)		tb->owners->bind_pprev = &child->bind_next;	tb->owners = child;	child->bind_pprev = &tb->owners;	child->prev = (struct sock *) tb;	spin_unlock(&head->lock);}__inline__ void tcp_inherit_port(struct sock *sk, struct sock *child){	local_bh_disable();	__tcp_inherit_port(sk, child);	local_bh_enable();}/* Obtain a reference to a local port for the given sock, * if snum is zero it means select any available local port. */static int tcp_v4_get_port(struct sock *sk, unsigned short snum){	struct tcp_bind_hashbucket *head;	struct tcp_bind_bucket *tb;	int ret;	local_bh_disable();	if (snum == 0) {		int low = sysctl_local_port_range[0];		int high = sysctl_local_port_range[1];		int remaining = (high - low) + 1;		int rover;		spin_lock(&tcp_portalloc_lock);		rover = tcp_port_rover;		do {	rover++;			if ((rover < low) || (rover > high))				rover = low;			head = &tcp_bhash[tcp_bhashfn(rover)];			spin_lock(&head->lock);			for (tb = head->chain; tb; tb = tb->next)				if (tb->port == rover)					goto next;			break;		next:			spin_unlock(&head->lock);		} while (--remaining > 0);		tcp_port_rover = rover;		spin_unlock(&tcp_portalloc_lock);		/* Exhausted local port range during search? */		ret = 1;		if (remaining <= 0)			goto fail;		/* OK, here is the one we will use.  HEAD is		 * non-NULL and we hold it's mutex.		 */		snum = rover;		tb = NULL;	} else {		head = &tcp_bhash[tcp_bhashfn(snum)];		spin_lock(&head->lock);		for (tb = head->chain; tb != NULL; tb = tb->next)			if (tb->port == snum)				break;	}	if (tb != NULL && tb->owners != NULL) {		if (tb->fastreuse != 0 && sk->reuse != 0 && sk->state != TCP_LISTEN) {			goto success;		} else {			struct sock *sk2 = tb->owners;			int sk_reuse = sk->reuse;			for( ; sk2 != NULL; sk2 = sk2->bind_next) {				if (sk != sk2 &&				    sk->bound_dev_if == sk2->bound_dev_if) {					if (!sk_reuse	||					    !sk2->reuse	||					    sk2->state == TCP_LISTEN) {						if (!sk2->rcv_saddr	||						    !sk->rcv_saddr	||						    (sk2->rcv_saddr == sk->rcv_saddr))							break;					}				}			}			/* If we found a conflict, fail. */			ret = 1;			if (sk2 != NULL)				goto fail_unlock;		}	}	ret = 1;	if (tb == NULL &&	    (tb = tcp_bucket_create(head, snum)) == NULL)			goto fail_unlock;	if (tb->owners == NULL) {		if (sk->reuse && sk->state != TCP_LISTEN)			tb->fastreuse = 1;		else			tb->fastreuse = 0;	} else if (tb->fastreuse &&		   ((sk->reuse == 0) || (sk->state == TCP_LISTEN)))		tb->fastreuse = 0;success:	sk->num = snum;	if (sk->prev == NULL) {		if ((sk->bind_next = tb->owners) != NULL)			tb->owners->bind_pprev = &sk->bind_next;		tb->owners = sk;		sk->bind_pprev = &tb->owners;		sk->prev = (struct sock *) tb;	} else {		BUG_TRAP(sk->prev == (struct sock *) tb);	}	ret = 0;fail_unlock:	spin_unlock(&head->lock);fail:	local_bh_enable();	return ret;}/* Get rid of any references to a local port held by the * given sock. */__inline__ void __tcp_put_port(struct sock *sk){	struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(sk->num)];	struct tcp_bind_bucket *tb;	spin_lock(&head->lock);	tb = (struct tcp_bind_bucket *) sk->prev;	if (sk->bind_next)		sk->bind_next->bind_pprev = sk->bind_pprev;	*(sk->bind_pprev) = sk->bind_next;	sk->prev = NULL;	sk->num = 0;	if (tb->owners == NULL) {		if (tb->next)			tb->next->pprev = tb->pprev;		*(tb->pprev) = tb->next;		kmem_cache_free(tcp_bucket_cachep, tb);	}	spin_unlock(&head->lock);}void tcp_put_port(struct sock *sk){	local_bh_disable();	__tcp_put_port(sk);	local_bh_enable();}/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. * Look, when several writers sleep and reader wakes them up, all but one * immediately hit write lock and grab all the cpus. Exclusive sleep solves * this, _but_ remember, it adds useless work on UP machines (wake up each * exclusive lock release). It should be ifdefed really. */void tcp_listen_wlock(void){	write_lock(&tcp_lhash_lock);	if (atomic_read(&tcp_lhash_users)) {		DECLARE_WAITQUEUE(wait, current);		add_wait_queue_exclusive(&tcp_lhash_wait, &wait);		for (;;) {			set_current_state(TASK_UNINTERRUPTIBLE);			if (atomic_read(&tcp_lhash_users) == 0)				break;			write_unlock_bh(&tcp_lhash_lock);			schedule();			write_lock_bh(&tcp_lhash_lock);		}		__set_current_state(TASK_RUNNING);		remove_wait_queue(&tcp_lhash_wait, &wait);	}}static __inline__ void __tcp_v4_hash(struct sock *sk){	struct sock **skp;	rwlock_t *lock;	BUG_TRAP(sk->pprev==NULL);	if(sk->state == TCP_LISTEN) {		skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];		lock = &tcp_lhash_lock;		tcp_listen_wlock();	} else {		skp = &tcp_ehash[(sk->hashent = tcp_sk_hashfn(sk))].chain;		lock = &tcp_ehash[sk->hashent].lock;		write_lock(lock);	}	if((sk->next = *skp) != NULL)		(*skp)->pprev = &sk->next;	*skp = sk;	sk->pprev = skp;	sock_prot_inc_use(sk->prot);	write_unlock(lock);	if (sk->state == TCP_LISTEN)		wake_up(&tcp_lhash_wait);}static void tcp_v4_hash(struct sock *sk){	if (sk->state != TCP_CLOSE) {		local_bh_disable();		__tcp_v4_hash(sk);		local_bh_enable();	}}void tcp_unhash(struct sock *sk){	rwlock_t *lock;	if (sk->state == TCP_LISTEN) {		local_bh_disable();		tcp_listen_wlock();		lock = &tcp_lhash_lock;	} else {		struct tcp_ehash_bucket *head = &tcp_ehash[sk->hashent];		lock = &head->lock;		write_lock_bh(&head->lock);	}	if(sk->pprev) {		if(sk->next)			sk->next->pprev = sk->pprev;		*sk->pprev = sk->next;		sk->pprev = NULL;		sock_prot_dec_use(sk->prot);	}	write_unlock_bh(lock);	if (sk->state == TCP_LISTEN)		wake_up(&tcp_lhash_wait);}/* Don't inline this cruft.  Here are some nice properties to * exploit here.  The BSD API does not allow a listening TCP * to specify the remote port nor the remote address for the * connection.  So always assume those are both wildcarded * during the search since they can never be otherwise. */static struct sock *__tcp_v4_lookup_listener(struct sock *sk, u32 daddr, unsigned short hnum, int dif){	struct sock *result = NULL;	int score, hiscore;	hiscore=0;	for(; sk; sk = sk->next) {		if(sk->num == hnum) {			__u32 rcv_saddr = sk->rcv_saddr;			score = 1;			if(rcv_saddr) {				if (rcv_saddr != daddr)					continue;				score++;			}			if (sk->bound_dev_if) {				if (sk->bound_dev_if != dif)					continue;				score++;			}			if (score == 3)				return sk;			if (score > hiscore) {				hiscore = score;				result = sk;			}		}	}	return result;}/* Optimize the common listener case. */__inline__ struct sock *tcp_v4_lookup_listener(u32 daddr, unsigned short hnum, int dif){	struct sock *sk;	read_lock(&tcp_lhash_lock);	sk = tcp_listening_hash[tcp_lhashfn(hnum)];	if (sk) {		if (sk->num == hnum &&		    sk->next == NULL &&		    (!sk->rcv_saddr || sk->rcv_saddr == daddr) &&		    !sk->bound_dev_if)			goto sherry_cache;		sk = __tcp_v4_lookup_listener(sk, daddr, hnum, dif);	}	if (sk) {sherry_cache:		sock_hold(sk);	}	read_unlock(&tcp_lhash_lock);	return sk;}/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM * * Local BH must be disabled here. */static inline struct sock *__tcp_v4_lookup_established(u32 saddr, u16 sport,						       u32 daddr, u16 hnum, int dif){	struct tcp_ehash_bucket *head;	TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)	__u32 ports = TCP_COMBINED_PORTS(sport, hnum);	struct sock *sk;	int hash;	/* Optimize here for direct hit, only listening connections can	 * have wildcards anyways.	 */	hash = tcp_hashfn(daddr, hnum, saddr, sport);	head = &tcp_ehash[hash];	read_lock(&head->lock);	for(sk = head->chain; sk; sk = sk->next) {		if(TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif))			goto hit; /* You sunk my battleship! */	}	/* Must check for a TIME_WAIT'er before going to listener hash. */	for(sk = (head + tcp_ehash_size)->chain; sk; sk = sk->next)		if(TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif))			goto hit;	read_unlock(&head->lock);	return NULL;hit:	sock_hold(sk);	read_unlock(&head->lock);	return sk;}static inline struct sock *__tcp_v4_lookup(u32 saddr, u16 sport,					   u32 daddr, u16 hnum, int dif){	struct sock *sk;	sk = __tcp_v4_lookup_established(saddr, sport, daddr, hnum, dif);	if (sk)		return sk;			return tcp_v4_lookup_listener(daddr, hnum, dif);}__inline__ struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif){	struct sock *sk;	local_bh_disable();	sk = __tcp_v4_lookup(saddr, sport, daddr, ntohs(dport), dif);	local_bh_enable();	return sk;}static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb){	return secure_tcp_sequence_number(skb->nh.iph->daddr,					  skb->nh.iph->saddr,					  skb->h.th->dest,					  skb->h.th->source);}static int tcp_v4_check_established(struct sock *sk){	u32 daddr = sk->rcv_saddr;	u32 saddr = sk->daddr;	int dif = sk->bound_dev_if;	TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)	__u32 ports = TCP_COMBINED_PORTS(sk->dport, sk->num);	int hash = tcp_hashfn(daddr, sk->num, saddr, sk->dport);	struct tcp_ehash_bucket *head = &tcp_ehash[hash];	struct sock *sk2, **skp;	struct tcp_tw_bucket *tw;	write_lock_bh(&head->lock);	/* Check TIME-WAIT sockets first. */	for(skp = &(head + tcp_ehash_size)->chain; (sk2=*skp) != NULL;	    skp = &sk2->next) {		tw = (struct tcp_tw_bucket*)sk2;		if(TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif)) {			struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);			/* With PAWS, it is safe from the viewpoint			   of data integrity. Even without PAWS it			   is safe provided sequence spaces do not			   overlap i.e. at data rates <= 80Mbit/sec.			   Actually, the idea is close to VJ's one,			   only timestamp cache is held not per host,			   but per port pair and TW bucket is used

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -