⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tcp.c

📁 linux 内核源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
			}		}		/* Safe to free early-copied skbs now */		__skb_queue_purge(&sk->sk_async_wait_queue);		dma_chan_put(tp->ucopy.dma_chan);		tp->ucopy.dma_chan = NULL;	}	if (tp->ucopy.pinned_list) {		dma_unpin_iovec_pages(tp->ucopy.pinned_list);		tp->ucopy.pinned_list = NULL;	}#endif	/* According to UNIX98, msg_name/msg_namelen are ignored	 * on connected socket. I was just happy when found this 8) --ANK	 */	/* Clean up data we have read: This will do ACK frames. */	tcp_cleanup_rbuf(sk, copied);	TCP_CHECK_TIMER(sk);	release_sock(sk);	return copied;out:	TCP_CHECK_TIMER(sk);	release_sock(sk);	return err;recv_urg:	err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len);	goto out;}/* *	State processing on a close. This implements the state shift for *	sending our FIN frame. Note that we only send a FIN for some *	states. A shutdown() may have already sent the FIN, or we may be *	closed. */static const unsigned char new_state[16] = {  /* current state:        new state:      action:	*/  /* (Invalid)		*/ TCP_CLOSE,  /* TCP_ESTABLISHED	*/ TCP_FIN_WAIT1 | TCP_ACTION_FIN,  /* TCP_SYN_SENT	*/ TCP_CLOSE,  /* TCP_SYN_RECV	*/ TCP_FIN_WAIT1 | TCP_ACTION_FIN,  /* TCP_FIN_WAIT1	*/ TCP_FIN_WAIT1,  /* TCP_FIN_WAIT2	*/ TCP_FIN_WAIT2,  /* TCP_TIME_WAIT	*/ TCP_CLOSE,  /* TCP_CLOSE		*/ TCP_CLOSE,  /* TCP_CLOSE_WAIT	*/ TCP_LAST_ACK  | TCP_ACTION_FIN,  /* TCP_LAST_ACK	*/ TCP_LAST_ACK,  /* TCP_LISTEN		*/ TCP_CLOSE,  /* TCP_CLOSING	*/ TCP_CLOSING,};static int tcp_close_state(struct sock *sk){	int next = (int)new_state[sk->sk_state];	int ns = next & TCP_STATE_MASK;	tcp_set_state(sk, ns);	return next & TCP_ACTION_FIN;}/* *	Shutdown the sending side of a connection. Much like close except *	that we don't receive shut down or set_sock_flag(sk, SOCK_DEAD). */void tcp_shutdown(struct sock *sk, int how){	/*	We need to grab some memory, and put together a FIN,	 *	and then put it into the queue to be sent.	 *		Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.	 */	if (!(how & SEND_SHUTDOWN))		return;	/* If we've already sent a FIN, or it's a closed state, skip this. */	if ((1 << sk->sk_state) &	    (TCPF_ESTABLISHED | TCPF_SYN_SENT |	     TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {		/* Clear out any half completed packets.  FIN if needed. */		if (tcp_close_state(sk))			tcp_send_fin(sk);	}}void tcp_close(struct sock *sk, long timeout){	struct sk_buff *skb;	int data_was_unread = 0;	int state;	lock_sock(sk);	sk->sk_shutdown = SHUTDOWN_MASK;	if (sk->sk_state == TCP_LISTEN) {		tcp_set_state(sk, TCP_CLOSE);		/* Special case. */		inet_csk_listen_stop(sk);		goto adjudge_to_death;	}	/*  We need to flush the recv. buffs.  We do this only on the	 *  descriptor close, not protocol-sourced closes, because the	 *  reader process may not have drained the data yet!	 */	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {		u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq -			  tcp_hdr(skb)->fin;		data_was_unread += len;		__kfree_skb(skb);	}	sk_stream_mem_reclaim(sk);	/* As outlined in RFC 2525, section 2.17, we send a RST here because	 * data was lost. To witness the awful effects of the old behavior of	 * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk	 * GET in an FTP client, suspend the process, wait for the client to	 * advertise a zero window, then kill -9 the FTP client, wheee...	 * Note: timeout is always zero in such a case.	 */	if (data_was_unread) {		/* Unread data was tossed, zap the connection. */		NET_INC_STATS_USER(LINUX_MIB_TCPABORTONCLOSE);		tcp_set_state(sk, TCP_CLOSE);		tcp_send_active_reset(sk, GFP_KERNEL);	} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {		/* Check zero linger _after_ checking for unread data. */		sk->sk_prot->disconnect(sk, 0);		NET_INC_STATS_USER(LINUX_MIB_TCPABORTONDATA);	} else if (tcp_close_state(sk)) {		/* We FIN if the application ate all the data before		 * zapping the connection.		 */		/* RED-PEN. Formally speaking, we have broken TCP state		 * machine. State transitions:		 *		 * TCP_ESTABLISHED -> TCP_FIN_WAIT1		 * TCP_SYN_RECV	-> TCP_FIN_WAIT1 (forget it, it's impossible)		 * TCP_CLOSE_WAIT -> TCP_LAST_ACK		 *		 * are legal only when FIN has been sent (i.e. in window),		 * rather than queued out of window. Purists blame.		 *		 * F.e. "RFC state" is ESTABLISHED,		 * if Linux state is FIN-WAIT-1, but FIN is still not sent.		 *		 * The visible declinations are that sometimes		 * we enter time-wait state, when it is not required really		 * (harmless), do not send active resets, when they are		 * required by specs (TCP_ESTABLISHED, TCP_CLOSE_WAIT, when		 * they look as CLOSING or LAST_ACK for Linux)		 * Probably, I missed some more holelets.		 * 						--ANK		 */		tcp_send_fin(sk);	}	sk_stream_wait_close(sk, timeout);adjudge_to_death:	state = sk->sk_state;	sock_hold(sk);	sock_orphan(sk);	atomic_inc(sk->sk_prot->orphan_count);	/* It is the last release_sock in its life. It will remove backlog. */	release_sock(sk);	/* Now socket is owned by kernel and we acquire BH lock	   to finish close. No need to check for user refs.	 */	local_bh_disable();	bh_lock_sock(sk);	BUG_TRAP(!sock_owned_by_user(sk));	/* Have we already been destroyed by a softirq or backlog? */	if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)		goto out;	/*	This is a (useful) BSD violating of the RFC. There is a	 *	problem with TCP as specified in that the other end could	 *	keep a socket open forever with no application left this end.	 *	We use a 3 minute timeout (about the same as BSD) then kill	 *	our end. If they send after that then tough - BUT: long enough	 *	that we won't make the old 4*rto = almost no time - whoops	 *	reset mistake.	 *	 *	Nope, it was not mistake. It is really desired behaviour	 *	f.e. on http servers, when such sockets are useless, but	 *	consume significant resources. Let's do it with special	 *	linger2	option.					--ANK	 */	if (sk->sk_state == TCP_FIN_WAIT2) {		struct tcp_sock *tp = tcp_sk(sk);		if (tp->linger2 < 0) {			tcp_set_state(sk, TCP_CLOSE);			tcp_send_active_reset(sk, GFP_ATOMIC);			NET_INC_STATS_BH(LINUX_MIB_TCPABORTONLINGER);		} else {			const int tmo = tcp_fin_time(sk);			if (tmo > TCP_TIMEWAIT_LEN) {				inet_csk_reset_keepalive_timer(sk,						tmo - TCP_TIMEWAIT_LEN);			} else {				tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);				goto out;			}		}	}	if (sk->sk_state != TCP_CLOSE) {		sk_stream_mem_reclaim(sk);		if (tcp_too_many_orphans(sk,				atomic_read(sk->sk_prot->orphan_count))) {			if (net_ratelimit())				printk(KERN_INFO "TCP: too many of orphaned "				       "sockets\n");			tcp_set_state(sk, TCP_CLOSE);			tcp_send_active_reset(sk, GFP_ATOMIC);			NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);		}	}	if (sk->sk_state == TCP_CLOSE)		inet_csk_destroy_sock(sk);	/* Otherwise, socket is reprieved until protocol close. */out:	bh_unlock_sock(sk);	local_bh_enable();	sock_put(sk);}/* These states need RST on ABORT according to RFC793 */static inline int tcp_need_reset(int state){	return (1 << state) &	       (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |		TCPF_FIN_WAIT2 | TCPF_SYN_RECV);}int tcp_disconnect(struct sock *sk, int flags){	struct inet_sock *inet = inet_sk(sk);	struct inet_connection_sock *icsk = inet_csk(sk);	struct tcp_sock *tp = tcp_sk(sk);	int err = 0;	int old_state = sk->sk_state;	if (old_state != TCP_CLOSE)		tcp_set_state(sk, TCP_CLOSE);	/* ABORT function of RFC793 */	if (old_state == TCP_LISTEN) {		inet_csk_listen_stop(sk);	} else if (tcp_need_reset(old_state) ||		   (tp->snd_nxt != tp->write_seq &&		    (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {		/* The last check adjusts for discrepancy of Linux wrt. RFC		 * states		 */		tcp_send_active_reset(sk, gfp_any());		sk->sk_err = ECONNRESET;	} else if (old_state == TCP_SYN_SENT)		sk->sk_err = ECONNRESET;	tcp_clear_xmit_timers(sk);	__skb_queue_purge(&sk->sk_receive_queue);	tcp_write_queue_purge(sk);	__skb_queue_purge(&tp->out_of_order_queue);#ifdef CONFIG_NET_DMA	__skb_queue_purge(&sk->sk_async_wait_queue);#endif	inet->dport = 0;	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))		inet_reset_saddr(sk);	sk->sk_shutdown = 0;	sock_reset_flag(sk, SOCK_DONE);	tp->srtt = 0;	if ((tp->write_seq += tp->max_window + 2) == 0)		tp->write_seq = 1;	icsk->icsk_backoff = 0;	tp->snd_cwnd = 2;	icsk->icsk_probes_out = 0;	tp->packets_out = 0;	tp->snd_ssthresh = 0x7fffffff;	tp->snd_cwnd_cnt = 0;	tp->bytes_acked = 0;	tcp_set_ca_state(sk, TCP_CA_Open);	tcp_clear_retrans(tp);	inet_csk_delack_init(sk);	tcp_init_send_head(sk);	memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));	__sk_dst_reset(sk);	BUG_TRAP(!inet->num || icsk->icsk_bind_hash);	sk->sk_error_report(sk);	return err;}/* *	Socket option code for TCP. */static int do_tcp_setsockopt(struct sock *sk, int level,		int optname, char __user *optval, int optlen){	struct tcp_sock *tp = tcp_sk(sk);	struct inet_connection_sock *icsk = inet_csk(sk);	int val;	int err = 0;	/* This is a string value all the others are int's */	if (optname == TCP_CONGESTION) {		char name[TCP_CA_NAME_MAX];		if (optlen < 1)			return -EINVAL;		val = strncpy_from_user(name, optval,					min(TCP_CA_NAME_MAX-1, optlen));		if (val < 0)			return -EFAULT;		name[val] = 0;		lock_sock(sk);		err = tcp_set_congestion_control(sk, name);		release_sock(sk);		return err;	}	if (optlen < sizeof(int))		return -EINVAL;	if (get_user(val, (int __user *)optval))		return -EFAULT;	lock_sock(sk);	switch (optname) {	case TCP_MAXSEG:		/* Values greater than interface MTU won't take effect. However		 * at the point when this call is done we typically don't yet		 * know which interface is going to be used */		if (val < 8 || val > MAX_TCP_WINDOW) {			err = -EINVAL;			break;		}		tp->rx_opt.user_mss = val;		break;	case TCP_NODELAY:		if (val) {			/* TCP_NODELAY is weaker than TCP_CORK, so that			 * this option on corked socket is remembered, but			 * it is not activated until cork is cleared.			 *			 * However, when TCP_NODELAY is set we make			 * an explicit push, which overrides even TCP_CORK			 * for currently queued segments.			 */			tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;			tcp_push_pending_frames(sk);		} else {			tp->nonagle &= ~TCP_NAGLE_OFF;		}		break;	case TCP_CORK:		/* When set indicates to always queue non-full frames.		 * Later the user clears this option and we transmit		 * any pending partial frames in the queue.  This is		 * meant to be used alongside sendfile() to get properly		 * filled frames when the user (for example) must write		 * out headers with a write() call first and then use		 * sendfile to send out the data parts.		 *		 * TCP_CORK can be set together with TCP_NODELAY and it is		 * stronger than TCP_NODELAY.		 */		if (val) {			tp->nonagle |= TCP_NAGLE_CORK;		} else {			tp->nonagle &= ~TCP_NAGLE_CORK;			if (tp->nonagle&TCP_NAGLE_OFF)				tp->nonagle |= TCP_NAGLE_PUSH;			tcp_push_pending_frames(sk);		}		break;	case TCP_KEEPIDLE:		if (val < 1 || val > MAX_TCP_KEEPIDLE)			err = -EINVAL;		else {			tp->keepalive_time = val * HZ;			if (sock_flag(sk, SOCK_KEEPOPEN) &&			    !((1 << sk->sk_state) &			      (TCPF_CLOSE | TCPF_LISTEN))) {				__u32 elapsed = tcp_time_stamp - tp->rcv_tstamp;				if (tp->keepalive_time > elapsed)					elapsed = tp->keepalive_time - elapsed;				else					elapsed = 0;				inet_csk_reset_keepalive_timer(sk, elapsed);			}		}		break;	case TCP_KEEPINTVL:		if (val < 1 || val > MAX_TCP_KEEPINTVL)			err = -EINVAL;		else			tp->keepalive_intvl = val * HZ;		break;	case TCP_KEEPCNT:		if (val < 1 || val > MAX_TCP_KEEPCNT)			err = -EINVAL;		else			tp->keepalive_probes = val;		break;	case TCP_SYNCNT:		if (val < 1 || val > MAX_TCP_SYNCNT)			err = -EINVAL;		else			icsk->icsk_syn_retries = val;		break;	case TCP_LINGER2:		if (val < 0)			tp->linger2 = -1;		else if (val > sysctl_tcp_fin_timeout / HZ)			tp->linger2 = 0;		else			tp->linger2 = val * HZ;		break;	case TCP_DEFER_ACCEPT:		icsk->icsk_accept_queue.rskq_defer_accept = 0;		if (val > 0) {			/* Translate value in seconds to number of			 * retransmits */			while (icsk->icsk_accept_queue.rskq_defer_accept < 32 &&			       val > ((TCP_TIMEOUT_INIT / HZ) <<				       icsk->icsk_accept_queue.rskq_defer_accept))				icsk->icsk_accept_queue.rskq_defer_accept++;			icsk->icsk_accept_queue.rskq_defer_accept++;		}		break;	case TCP_WINDOW_CLAMP:		if (!val) {			if (sk->sk_state != TCP_CLOSE) {				err = -EINVAL;				break;			}			tp->window_clamp = 0;		} else			tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ?						SOCK_MIN_RCVBUF / 2 : val;		break;	case TCP_QUICKACK:		if (!val) {			icsk->icsk_ack.pingpong = 1;		} else {			icsk->icsk_ack.pingpong = 0;			if ((1 << sk->sk_state) &			    (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&			    inet_csk_ack_scheduled(sk)) {				icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;				tcp_cleanup_rbuf(sk, 1);				if (!(val & 1))					icsk->icsk_ack.pingpong = 1;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -