📄 tcp.c

📁 Linux Kernel 2.6.9 for OMAP1710
💻 C
📖 第 1 页 / 共 5 页
字号:
				len -= chunk;				copied += chunk;			}			if (tp->rcv_nxt == tp->copied_seq &&			    skb_queue_len(&tp->ucopy.prequeue)) {do_prequeue:				tcp_prequeue_process(sk);				if ((chunk = len - tp->ucopy.len) != 0) {					NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);					len -= chunk;					copied += chunk;				}			}		}		if ((flags & MSG_PEEK) && peek_seq != tp->copied_seq) {			if (net_ratelimit())				printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n",				       current->comm, current->pid);			peek_seq = tp->copied_seq;		}		continue;	found_ok_skb:		/* Ok so how much can we use? */		used = skb->len - offset;		if (len < used)			used = len;		/* Do we have urgent data here? */		if (tp->urg_data) {			u32 urg_offset = tp->urg_seq - *seq;			if (urg_offset < used) {				if (!urg_offset) {					if (!sock_flag(sk, SOCK_URGINLINE)) {						++*seq;						offset++;						used--;						if (!used)							goto skip_copy;					}				} else					used = urg_offset;			}		}		if (!(flags & MSG_TRUNC)) {			err = skb_copy_datagram_iovec(skb, offset,						      msg->msg_iov, used);			if (err) {				/* Exception. Bailout! */				if (!copied)					copied = -EFAULT;				break;			}		}		*seq += used;		copied += used;		len -= used;		tcp_rcv_space_adjust(sk);skip_copy:		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) {			tp->urg_data = 0;			tcp_fast_path_check(sk, tp);		}		if (used + offset < skb->len)			continue;		if (skb->h.th->fin)			goto found_fin_ok;		if (!(flags & MSG_PEEK))			sk_eat_skb(sk, skb);		continue;	found_fin_ok:		/* Process the FIN. */		++*seq;		if (!(flags & MSG_PEEK))			sk_eat_skb(sk, skb);		break;	} while (len > 0);	if (user_recv) {		if (skb_queue_len(&tp->ucopy.prequeue)) {			int chunk;			tp->ucopy.len = copied > 0 ? len : 0;			tcp_prequeue_process(sk);			if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {				NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);				len -= chunk;				copied += chunk;			}		}		tp->ucopy.task = NULL;		tp->ucopy.len = 0;	}	/* According to UNIX98, msg_name/msg_namelen are ignored	 * on connected socket. I was just happy when found this 8) --ANK	 */	/* Clean up data we have read: This will do ACK frames. */	cleanup_rbuf(sk, copied);	TCP_CHECK_TIMER(sk);	release_sock(sk);	return copied;out:	TCP_CHECK_TIMER(sk);	release_sock(sk);	return err;recv_urg:	err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len);	goto out;}/* *	State processing on a close. This implements the state shift for *	sending our FIN frame. Note that we only send a FIN for some *	states. A shutdown() may have already sent the FIN, or we may be *	closed. */static unsigned char new_state[16] = {  /* current state:        new state:      action:	*/  /* (Invalid)		*/ TCP_CLOSE,  /* TCP_ESTABLISHED	*/ TCP_FIN_WAIT1 | TCP_ACTION_FIN,  /* TCP_SYN_SENT	*/ TCP_CLOSE,  /* TCP_SYN_RECV	*/ TCP_FIN_WAIT1 | TCP_ACTION_FIN,  /* TCP_FIN_WAIT1	*/ TCP_FIN_WAIT1,  /* TCP_FIN_WAIT2	*/ TCP_FIN_WAIT2,  /* TCP_TIME_WAIT	*/ TCP_CLOSE,  /* TCP_CLOSE		*/ TCP_CLOSE,  /* TCP_CLOSE_WAIT	*/ TCP_LAST_ACK  | TCP_ACTION_FIN,  /* TCP_LAST_ACK	*/ TCP_LAST_ACK,  /* TCP_LISTEN		*/ TCP_CLOSE,  /* TCP_CLOSING	*/ TCP_CLOSING,};static int tcp_close_state(struct sock *sk){	int next = (int)new_state[sk->sk_state];	int ns = next & TCP_STATE_MASK;	tcp_set_state(sk, ns);	return next & TCP_ACTION_FIN;}/* *	Shutdown the sending side of a connection. Much like close except *	that we don't receive shut down or set_sock_flag(sk, SOCK_DEAD). */void tcp_shutdown(struct sock *sk, int how){	/*	We need to grab some memory, and put together a FIN,	 *	and then put it into the queue to be sent.	 *		Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.	 */	if (!(how & SEND_SHUTDOWN))		return;	/* If we've already sent a FIN, or it's a closed state, skip this. */	if ((1 << sk->sk_state) &	    (TCPF_ESTABLISHED | TCPF_SYN_SENT |	     TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {		/* Clear out any half completed packets.  FIN if needed. */		if (tcp_close_state(sk))			tcp_send_fin(sk);	}}/* * At this point, there should be no process reference to this * socket, and thus no user references at all.  Therefore we * can assume the socket waitqueue is inactive and nobody will * try to jump onto it. */void tcp_destroy_sock(struct sock *sk){	BUG_TRAP(sk->sk_state == TCP_CLOSE);	BUG_TRAP(sock_flag(sk, SOCK_DEAD));	/* It cannot be in hash table! */	BUG_TRAP(sk_unhashed(sk));	/* If it has not 0 inet_sk(sk)->num, it must be bound */	BUG_TRAP(!inet_sk(sk)->num || tcp_sk(sk)->bind_hash);	sk->sk_prot->destroy(sk);	sk_stream_kill_queues(sk);	xfrm_sk_free_policy(sk);#ifdef INET_REFCNT_DEBUG	if (atomic_read(&sk->sk_refcnt) != 1) {		printk(KERN_DEBUG "Destruction TCP %p delayed, c=%d\n",		       sk, atomic_read(&sk->sk_refcnt));	}#endif	atomic_dec(&tcp_orphan_count);	sock_put(sk);}void tcp_close(struct sock *sk, long timeout){	struct sk_buff *skb;	int data_was_unread = 0;	lock_sock(sk);	sk->sk_shutdown = SHUTDOWN_MASK;	if (sk->sk_state == TCP_LISTEN) {		tcp_set_state(sk, TCP_CLOSE);		/* Special case. */		tcp_listen_stop(sk);		goto adjudge_to_death;	}	/*  We need to flush the recv. buffs.  We do this only on the	 *  descriptor close, not protocol-sourced closes, because the	 *  reader process may not have drained the data yet!	 */	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {		u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq -			  skb->h.th->fin;		data_was_unread += len;		__kfree_skb(skb);	}	sk_stream_mem_reclaim(sk);	/* As outlined in draft-ietf-tcpimpl-prob-03.txt, section	 * 3.10, we send a RST here because data was lost.  To	 * witness the awful effects of the old behavior of always	 * doing a FIN, run an older 2.1.x kernel or 2.0.x, start	 * a bulk GET in an FTP client, suspend the process, wait	 * for the client to advertise a zero window, then kill -9	 * the FTP client, wheee...  Note: timeout is always zero	 * in such a case.	 */	if (data_was_unread) {		/* Unread data was tossed, zap the connection. */		NET_INC_STATS_USER(LINUX_MIB_TCPABORTONCLOSE);		tcp_set_state(sk, TCP_CLOSE);		tcp_send_active_reset(sk, GFP_KERNEL);	} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {		/* Check zero linger _after_ checking for unread data. */		sk->sk_prot->disconnect(sk, 0);		NET_INC_STATS_USER(LINUX_MIB_TCPABORTONDATA);	} else if (tcp_close_state(sk)) {		/* We FIN if the application ate all the data before		 * zapping the connection.		 */		/* RED-PEN. Formally speaking, we have broken TCP state		 * machine. State transitions:		 *		 * TCP_ESTABLISHED -> TCP_FIN_WAIT1		 * TCP_SYN_RECV	-> TCP_FIN_WAIT1 (forget it, it's impossible)		 * TCP_CLOSE_WAIT -> TCP_LAST_ACK		 *		 * are legal only when FIN has been sent (i.e. in window),		 * rather than queued out of window. Purists blame.		 *		 * F.e. "RFC state" is ESTABLISHED,		 * if Linux state is FIN-WAIT-1, but FIN is still not sent.		 *		 * The visible declinations are that sometimes		 * we enter time-wait state, when it is not required really		 * (harmless), do not send active resets, when they are		 * required by specs (TCP_ESTABLISHED, TCP_CLOSE_WAIT, when		 * they look as CLOSING or LAST_ACK for Linux)		 * Probably, I missed some more holelets.		 * 						--ANK		 */		tcp_send_fin(sk);	}	sk_stream_wait_close(sk, timeout);adjudge_to_death:	/* It is the last release_sock in its life. It will remove backlog. */	release_sock(sk);	/* Now socket is owned by kernel and we acquire BH lock	   to finish close. No need to check for user refs.	 */	local_bh_disable();	bh_lock_sock(sk);	BUG_TRAP(!sock_owned_by_user(sk));	sock_hold(sk);	sock_orphan(sk);	/*	This is a (useful) BSD violating of the RFC. There is a	 *	problem with TCP as specified in that the other end could	 *	keep a socket open forever with no application left this end.	 *	We use a 3 minute timeout (about the same as BSD) then kill	 *	our end. If they send after that then tough - BUT: long enough	 *	that we won't make the old 4*rto = almost no time - whoops	 *	reset mistake.	 *	 *	Nope, it was not mistake. It is really desired behaviour	 *	f.e. on http servers, when such sockets are useless, but	 *	consume significant resources. Let's do it with special	 *	linger2	option.					--ANK	 */	if (sk->sk_state == TCP_FIN_WAIT2) {		struct tcp_opt *tp = tcp_sk(sk);		if (tp->linger2 < 0) {			tcp_set_state(sk, TCP_CLOSE);			tcp_send_active_reset(sk, GFP_ATOMIC);			NET_INC_STATS_BH(LINUX_MIB_TCPABORTONLINGER);		} else {			int tmo = tcp_fin_time(tp);			if (tmo > TCP_TIMEWAIT_LEN) {				tcp_reset_keepalive_timer(sk, tcp_fin_time(tp));			} else {				atomic_inc(&tcp_orphan_count);				tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);				goto out;			}		}	}	if (sk->sk_state != TCP_CLOSE) {		sk_stream_mem_reclaim(sk);		if (atomic_read(&tcp_orphan_count) > sysctl_tcp_max_orphans ||		    (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&		     atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {			if (net_ratelimit())				printk(KERN_INFO "TCP: too many of orphaned "				       "sockets\n");			tcp_set_state(sk, TCP_CLOSE);			tcp_send_active_reset(sk, GFP_ATOMIC);			NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);		}	}	atomic_inc(&tcp_orphan_count);	if (sk->sk_state == TCP_CLOSE)		tcp_destroy_sock(sk);	/* Otherwise, socket is reprieved until protocol close. */out:	bh_unlock_sock(sk);	local_bh_enable();	sock_put(sk);}/* These states need RST on ABORT according to RFC793 */static inline int tcp_need_reset(int state){	return (1 << state) &	       (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |		TCPF_FIN_WAIT2 | TCPF_SYN_RECV);}int tcp_disconnect(struct sock *sk, int flags){	struct inet_opt *inet = inet_sk(sk);	struct tcp_opt *tp = tcp_sk(sk);	int err = 0;	int old_state = sk->sk_state;	if (old_state != TCP_CLOSE)		tcp_set_state(sk, TCP_CLOSE);	/* ABORT function of RFC793 */	if (old_state == TCP_LISTEN) {		tcp_listen_stop(sk);	} else if (tcp_need_reset(old_state) ||		   (tp->snd_nxt != tp->write_seq &&		    (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {		/* The last check adjusts for discrepance of Linux wrt. RFC		 * states		 */		tcp_send_active_reset(sk, gfp_any());		sk->sk_err = ECONNRESET;	} else if (old_state == TCP_SYN_SENT)		sk->sk_err = ECONNRESET;	tcp_clear_xmit_timers(sk);	__skb_queue_purge(&sk->sk_receive_queue);	sk_stream_writequeue_purge(sk);	__skb_queue_purge(&tp->out_of_order_queue);	inet->dport = 0;	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))		inet_reset_saddr(sk);	sk->sk_shutdown = 0;	sock_reset_flag(sk, SOCK_DONE);	tp->srtt = 0;	if ((tp->write_seq += tp->max_window + 2) == 0)		tp->write_seq = 1;	tp->backoff = 0;	tp->snd_cwnd = 2;	tp->probes_out = 0;	tcp_set_pcount(&tp->packets_out, 0);	tp->snd_ssthresh = 0x7fffffff;	tp->snd_cwnd_cnt = 0;	tcp_set_ca_state(tp, TCP_CA_Open);	tcp_clear_retrans(tp);	tcp_delack_init(tp);	sk->sk_send_head = NULL;	tp->saw_tstamp = 0;	tcp_sack_reset(tp);	__sk_dst_reset(sk);	BUG_TRAP(!inet->num || tp->bind_hash);	sk->sk_error_report(sk);	return err;}/* *	Wait for an incoming connection, avoid race *	conditions. This must be called with the socket locked. */static int wait_for_connect(struct sock *sk, long timeo){	struct tcp_opt *tp = tcp_sk(sk);	DEFINE_WAIT(wait);	int err;	/*	 * True wake-one mechanism for incoming connections: only	 * one process gets woken up, not the 'whole herd'.	 * Since we do not 'race & poll' for established sockets	 * anymore, the common case will execute the loop only once.	 *	 * Subtle issue: "add_wait_queue_exclusive()" will be added	 * after any current non-exclusive waiters, and we know that	 * it will always _stay_ after any new non-exclusive waiters	 * because all non-exclusive waiters are added at the	 * beginning of the wait-queue. As such, it's ok to "drop"	 * our exclusiveness temporarily when we get woken up without	 * having to remove and re-insert us on the wait queue.	 */	for (;;) {		prepare_to_wait_exclusive(sk->sk_sleep, &wait,					  TASK_INTERRUPTIBLE);		release_sock(sk);		if (!tp->accept_queue)
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -