📄 tcp.c
字号:
len -= chunk; copied += chunk; } if (tp->rcv_nxt == tp->copied_seq && skb_queue_len(&tp->ucopy.prequeue)) {do_prequeue: tcp_prequeue_process(sk); if ((chunk = len - tp->ucopy.len) != 0) { NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); len -= chunk; copied += chunk; } } } if ((flags & MSG_PEEK) && peek_seq != tp->copied_seq) { if (net_ratelimit()) printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n", current->comm, current->pid); peek_seq = tp->copied_seq; } continue; found_ok_skb: /* Ok so how much can we use? */ used = skb->len - offset; if (len < used) used = len; /* Do we have urgent data here? */ if (tp->urg_data) { u32 urg_offset = tp->urg_seq - *seq; if (urg_offset < used) { if (!urg_offset) { if (!sock_flag(sk, SOCK_URGINLINE)) { ++*seq; offset++; used--; if (!used) goto skip_copy; } } else used = urg_offset; } } if (!(flags & MSG_TRUNC)) { err = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, used); if (err) { /* Exception. Bailout! */ if (!copied) copied = -EFAULT; break; } } *seq += used; copied += used; len -= used; tcp_rcv_space_adjust(sk);skip_copy: if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) { tp->urg_data = 0; tcp_fast_path_check(sk, tp); } if (used + offset < skb->len) continue; if (skb->h.th->fin) goto found_fin_ok; if (!(flags & MSG_PEEK)) sk_eat_skb(sk, skb); continue; found_fin_ok: /* Process the FIN. */ ++*seq; if (!(flags & MSG_PEEK)) sk_eat_skb(sk, skb); break; } while (len > 0); if (user_recv) { if (skb_queue_len(&tp->ucopy.prequeue)) { int chunk; tp->ucopy.len = copied > 0 ? len : 0; tcp_prequeue_process(sk); if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) { NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); len -= chunk; copied += chunk; } } tp->ucopy.task = NULL; tp->ucopy.len = 0; } /* According to UNIX98, msg_name/msg_namelen are ignored * on connected socket. I was just happy when found this 8) --ANK */ /* Clean up data we have read: This will do ACK frames. */ cleanup_rbuf(sk, copied); TCP_CHECK_TIMER(sk); release_sock(sk); return copied;out: TCP_CHECK_TIMER(sk); release_sock(sk); return err;recv_urg: err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len); goto out;}/* * State processing on a close. This implements the state shift for * sending our FIN frame. Note that we only send a FIN for some * states. A shutdown() may have already sent the FIN, or we may be * closed. */static unsigned char new_state[16] = { /* current state: new state: action: */ /* (Invalid) */ TCP_CLOSE, /* TCP_ESTABLISHED */ TCP_FIN_WAIT1 | TCP_ACTION_FIN, /* TCP_SYN_SENT */ TCP_CLOSE, /* TCP_SYN_RECV */ TCP_FIN_WAIT1 | TCP_ACTION_FIN, /* TCP_FIN_WAIT1 */ TCP_FIN_WAIT1, /* TCP_FIN_WAIT2 */ TCP_FIN_WAIT2, /* TCP_TIME_WAIT */ TCP_CLOSE, /* TCP_CLOSE */ TCP_CLOSE, /* TCP_CLOSE_WAIT */ TCP_LAST_ACK | TCP_ACTION_FIN, /* TCP_LAST_ACK */ TCP_LAST_ACK, /* TCP_LISTEN */ TCP_CLOSE, /* TCP_CLOSING */ TCP_CLOSING,};static int tcp_close_state(struct sock *sk){ int next = (int)new_state[sk->sk_state]; int ns = next & TCP_STATE_MASK; tcp_set_state(sk, ns); return next & TCP_ACTION_FIN;}/* * Shutdown the sending side of a connection. Much like close except * that we don't receive shut down or set_sock_flag(sk, SOCK_DEAD). */void tcp_shutdown(struct sock *sk, int how){ /* We need to grab some memory, and put together a FIN, * and then put it into the queue to be sent. * Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92. */ if (!(how & SEND_SHUTDOWN)) return; /* If we've already sent a FIN, or it's a closed state, skip this. */ if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) { /* Clear out any half completed packets. FIN if needed. */ if (tcp_close_state(sk)) tcp_send_fin(sk); }}/* * At this point, there should be no process reference to this * socket, and thus no user references at all. Therefore we * can assume the socket waitqueue is inactive and nobody will * try to jump onto it. */void tcp_destroy_sock(struct sock *sk){ BUG_TRAP(sk->sk_state == TCP_CLOSE); BUG_TRAP(sock_flag(sk, SOCK_DEAD)); /* It cannot be in hash table! */ BUG_TRAP(sk_unhashed(sk)); /* If it has not 0 inet_sk(sk)->num, it must be bound */ BUG_TRAP(!inet_sk(sk)->num || tcp_sk(sk)->bind_hash); sk->sk_prot->destroy(sk); sk_stream_kill_queues(sk); xfrm_sk_free_policy(sk);#ifdef INET_REFCNT_DEBUG if (atomic_read(&sk->sk_refcnt) != 1) { printk(KERN_DEBUG "Destruction TCP %p delayed, c=%d\n", sk, atomic_read(&sk->sk_refcnt)); }#endif atomic_dec(&tcp_orphan_count); sock_put(sk);}void tcp_close(struct sock *sk, long timeout){ struct sk_buff *skb; int data_was_unread = 0; lock_sock(sk); sk->sk_shutdown = SHUTDOWN_MASK; if (sk->sk_state == TCP_LISTEN) { tcp_set_state(sk, TCP_CLOSE); /* Special case. */ tcp_listen_stop(sk); goto adjudge_to_death; } /* We need to flush the recv. buffs. We do this only on the * descriptor close, not protocol-sourced closes, because the * reader process may not have drained the data yet! */ while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq - skb->h.th->fin; data_was_unread += len; __kfree_skb(skb); } sk_stream_mem_reclaim(sk); /* As outlined in draft-ietf-tcpimpl-prob-03.txt, section * 3.10, we send a RST here because data was lost. To * witness the awful effects of the old behavior of always * doing a FIN, run an older 2.1.x kernel or 2.0.x, start * a bulk GET in an FTP client, suspend the process, wait * for the client to advertise a zero window, then kill -9 * the FTP client, wheee... Note: timeout is always zero * in such a case. */ if (data_was_unread) { /* Unread data was tossed, zap the connection. */ NET_INC_STATS_USER(LINUX_MIB_TCPABORTONCLOSE); tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_KERNEL); } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { /* Check zero linger _after_ checking for unread data. */ sk->sk_prot->disconnect(sk, 0); NET_INC_STATS_USER(LINUX_MIB_TCPABORTONDATA); } else if (tcp_close_state(sk)) { /* We FIN if the application ate all the data before * zapping the connection. */ /* RED-PEN. Formally speaking, we have broken TCP state * machine. State transitions: * * TCP_ESTABLISHED -> TCP_FIN_WAIT1 * TCP_SYN_RECV -> TCP_FIN_WAIT1 (forget it, it's impossible) * TCP_CLOSE_WAIT -> TCP_LAST_ACK * * are legal only when FIN has been sent (i.e. in window), * rather than queued out of window. Purists blame. * * F.e. "RFC state" is ESTABLISHED, * if Linux state is FIN-WAIT-1, but FIN is still not sent. * * The visible declinations are that sometimes * we enter time-wait state, when it is not required really * (harmless), do not send active resets, when they are * required by specs (TCP_ESTABLISHED, TCP_CLOSE_WAIT, when * they look as CLOSING or LAST_ACK for Linux) * Probably, I missed some more holelets. * --ANK */ tcp_send_fin(sk); } sk_stream_wait_close(sk, timeout);adjudge_to_death: /* It is the last release_sock in its life. It will remove backlog. */ release_sock(sk); /* Now socket is owned by kernel and we acquire BH lock to finish close. No need to check for user refs. */ local_bh_disable(); bh_lock_sock(sk); BUG_TRAP(!sock_owned_by_user(sk)); sock_hold(sk); sock_orphan(sk); /* This is a (useful) BSD violating of the RFC. There is a * problem with TCP as specified in that the other end could * keep a socket open forever with no application left this end. * We use a 3 minute timeout (about the same as BSD) then kill * our end. If they send after that then tough - BUT: long enough * that we won't make the old 4*rto = almost no time - whoops * reset mistake. * * Nope, it was not mistake. It is really desired behaviour * f.e. on http servers, when such sockets are useless, but * consume significant resources. Let's do it with special * linger2 option. --ANK */ if (sk->sk_state == TCP_FIN_WAIT2) { struct tcp_opt *tp = tcp_sk(sk); if (tp->linger2 < 0) { tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_ATOMIC); NET_INC_STATS_BH(LINUX_MIB_TCPABORTONLINGER); } else { int tmo = tcp_fin_time(tp); if (tmo > TCP_TIMEWAIT_LEN) { tcp_reset_keepalive_timer(sk, tcp_fin_time(tp)); } else { atomic_inc(&tcp_orphan_count); tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); goto out; } } } if (sk->sk_state != TCP_CLOSE) { sk_stream_mem_reclaim(sk); if (atomic_read(&tcp_orphan_count) > sysctl_tcp_max_orphans || (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) { if (net_ratelimit()) printk(KERN_INFO "TCP: too many of orphaned " "sockets\n"); tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_ATOMIC); NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY); } } atomic_inc(&tcp_orphan_count); if (sk->sk_state == TCP_CLOSE) tcp_destroy_sock(sk); /* Otherwise, socket is reprieved until protocol close. */out: bh_unlock_sock(sk); local_bh_enable(); sock_put(sk);}/* These states need RST on ABORT according to RFC793 */static inline int tcp_need_reset(int state){ return (1 << state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_SYN_RECV);}int tcp_disconnect(struct sock *sk, int flags){ struct inet_opt *inet = inet_sk(sk); struct tcp_opt *tp = tcp_sk(sk); int err = 0; int old_state = sk->sk_state; if (old_state != TCP_CLOSE) tcp_set_state(sk, TCP_CLOSE); /* ABORT function of RFC793 */ if (old_state == TCP_LISTEN) { tcp_listen_stop(sk); } else if (tcp_need_reset(old_state) || (tp->snd_nxt != tp->write_seq && (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { /* The last check adjusts for discrepance of Linux wrt. RFC * states */ tcp_send_active_reset(sk, gfp_any()); sk->sk_err = ECONNRESET; } else if (old_state == TCP_SYN_SENT) sk->sk_err = ECONNRESET; tcp_clear_xmit_timers(sk); __skb_queue_purge(&sk->sk_receive_queue); sk_stream_writequeue_purge(sk); __skb_queue_purge(&tp->out_of_order_queue); inet->dport = 0; if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) inet_reset_saddr(sk); sk->sk_shutdown = 0; sock_reset_flag(sk, SOCK_DONE); tp->srtt = 0; if ((tp->write_seq += tp->max_window + 2) == 0) tp->write_seq = 1; tp->backoff = 0; tp->snd_cwnd = 2; tp->probes_out = 0; tcp_set_pcount(&tp->packets_out, 0); tp->snd_ssthresh = 0x7fffffff; tp->snd_cwnd_cnt = 0; tcp_set_ca_state(tp, TCP_CA_Open); tcp_clear_retrans(tp); tcp_delack_init(tp); sk->sk_send_head = NULL; tp->saw_tstamp = 0; tcp_sack_reset(tp); __sk_dst_reset(sk); BUG_TRAP(!inet->num || tp->bind_hash); sk->sk_error_report(sk); return err;}/* * Wait for an incoming connection, avoid race * conditions. This must be called with the socket locked. */static int wait_for_connect(struct sock *sk, long timeo){ struct tcp_opt *tp = tcp_sk(sk); DEFINE_WAIT(wait); int err; /* * True wake-one mechanism for incoming connections: only * one process gets woken up, not the 'whole herd'. * Since we do not 'race & poll' for established sockets * anymore, the common case will execute the loop only once. * * Subtle issue: "add_wait_queue_exclusive()" will be added * after any current non-exclusive waiters, and we know that * it will always _stay_ after any new non-exclusive waiters * because all non-exclusive waiters are added at the * beginning of the wait-queue. As such, it's ok to "drop" * our exclusiveness temporarily when we get woken up without * having to remove and re-insert us on the wait queue. */ for (;;) { prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); release_sock(sk); if (!tp->accept_queue)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -