📄 tcp.c
字号:
break; if (sk->state == TCP_CLOSE) { if (!sk->done) { /* This occurs when user tries to read * from never connected socket. */ copied = -ENOTCONN; break; } break; } if (!timeo) { copied = -EAGAIN; break; } } cleanup_rbuf(sk, copied); if (tp->ucopy.task == user_recv) { /* Install new reader */ if (user_recv == NULL && !(flags&(MSG_TRUNC|MSG_PEEK))) { user_recv = current; tp->ucopy.task = user_recv; tp->ucopy.iov = msg->msg_iov; } tp->ucopy.len = len; BUG_TRAP(tp->copied_seq == tp->rcv_nxt || (flags&(MSG_PEEK|MSG_TRUNC))); /* Ugly... If prequeue is not empty, we have to * process it before releasing socket, otherwise * order will be broken at second iteration. * More elegant solution is required!!! * * Look: we have the following (pseudo)queues: * * 1. packets in flight * 2. backlog * 3. prequeue * 4. receive_queue * * Each queue can be processed only if the next ones * are empty. At this point we have empty receive_queue. * But prequeue _can_ be not empty after second iteration, * when we jumped to start of loop because backlog * processing added something to receive_queue. * We cannot release_sock(), because backlog contains * packets arrived _after_ prequeued ones. * * Shortly, algorithm is clear --- to process all * the queues in order. We could make it more directly, * requeueing packets from backlog to prequeue, if * is not empty. It is more elegant, but eats cycles, * unfortunately. */ if (skb_queue_len(&tp->ucopy.prequeue)) goto do_prequeue; /* __ Set realtime policy in scheduler __ */ } if (copied >= target) { /* Do not sleep, just process backlog. */ release_sock(sk); lock_sock(sk); } else { timeo = tcp_data_wait(sk, timeo); } if (user_recv) { int chunk; /* __ Restore normal policy in scheduler __ */ if ((chunk = len - tp->ucopy.len) != 0) { net_statistics[smp_processor_id()*2+1].TCPDirectCopyFromBacklog += chunk; len -= chunk; copied += chunk; } if (tp->rcv_nxt == tp->copied_seq && skb_queue_len(&tp->ucopy.prequeue)) {do_prequeue: tcp_prequeue_process(sk); if ((chunk = len - tp->ucopy.len) != 0) { net_statistics[smp_processor_id()*2+1].TCPDirectCopyFromPrequeue += chunk; len -= chunk; copied += chunk; } } } continue; found_ok_skb: /* Ok so how much can we use? */ used = skb->len - offset; if (len < used) used = len; /* Do we have urgent data here? */ if (tp->urg_data) { u32 urg_offset = tp->urg_seq - *seq; if (urg_offset < used) { if (!urg_offset) { if (!sk->urginline) { ++*seq; offset++; used--; if (!used) goto skip_copy; } } else used = urg_offset; } } if (!(flags&MSG_TRUNC)) { err = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, used); if (err) { /* Exception. Bailout! */ if (!copied) copied = -EFAULT; break; } } *seq += used; copied += used; len -= used;skip_copy: if (tp->urg_data && after(tp->copied_seq,tp->urg_seq)) { tp->urg_data = 0; tcp_fast_path_check(sk, tp); } if (used + offset < skb->len) continue; if (skb->h.th->fin) goto found_fin_ok; if (!(flags & MSG_PEEK)) tcp_eat_skb(sk, skb); continue; found_fin_ok: /* Process the FIN. */ ++*seq; if (!(flags & MSG_PEEK)) tcp_eat_skb(sk, skb); break; } while (len > 0); if (user_recv) { if (skb_queue_len(&tp->ucopy.prequeue)) { int chunk; tp->ucopy.len = copied > 0 ? len : 0; tcp_prequeue_process(sk); if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) { net_statistics[smp_processor_id()*2+1].TCPDirectCopyFromPrequeue += chunk; len -= chunk; copied += chunk; } } tp->ucopy.task = NULL; tp->ucopy.len = 0; } /* According to UNIX98, msg_name/msg_namelen are ignored * on connected socket. I was just happy when found this 8) --ANK */ /* Clean up data we have read: This will do ACK frames. */ cleanup_rbuf(sk, copied); TCP_CHECK_TIMER(sk); release_sock(sk); return copied;out: TCP_CHECK_TIMER(sk); release_sock(sk); return err;recv_urg: err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len); goto out;}/* * State processing on a close. This implements the state shift for * sending our FIN frame. Note that we only send a FIN for some * states. A shutdown() may have already sent the FIN, or we may be * closed. */static unsigned char new_state[16] = { /* current state: new state: action: */ /* (Invalid) */ TCP_CLOSE, /* TCP_ESTABLISHED */ TCP_FIN_WAIT1 | TCP_ACTION_FIN, /* TCP_SYN_SENT */ TCP_CLOSE, /* TCP_SYN_RECV */ TCP_FIN_WAIT1 | TCP_ACTION_FIN, /* TCP_FIN_WAIT1 */ TCP_FIN_WAIT1, /* TCP_FIN_WAIT2 */ TCP_FIN_WAIT2, /* TCP_TIME_WAIT */ TCP_CLOSE, /* TCP_CLOSE */ TCP_CLOSE, /* TCP_CLOSE_WAIT */ TCP_LAST_ACK | TCP_ACTION_FIN, /* TCP_LAST_ACK */ TCP_LAST_ACK, /* TCP_LISTEN */ TCP_CLOSE, /* TCP_CLOSING */ TCP_CLOSING,};static int tcp_close_state(struct sock *sk){ int next = (int) new_state[sk->state]; int ns = (next & TCP_STATE_MASK); tcp_set_state(sk, ns); return (next & TCP_ACTION_FIN);}/* * Shutdown the sending side of a connection. Much like close except * that we don't receive shut down or set sk->dead. */void tcp_shutdown(struct sock *sk, int how){ /* We need to grab some memory, and put together a FIN, * and then put it into the queue to be sent. * Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92. */ if (!(how & SEND_SHUTDOWN)) return; /* If we've already sent a FIN, or it's a closed state, skip this. */ if ((1 << sk->state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT|TCPF_SYN_RECV|TCPF_CLOSE_WAIT)) { /* Clear out any half completed packets. FIN if needed. */ if (tcp_close_state(sk)) tcp_send_fin(sk); }}/* * Return 1 if we still have things to send in our buffers. */static inline int closing(struct sock * sk){ return ((1 << sk->state) & (TCPF_FIN_WAIT1|TCPF_CLOSING|TCPF_LAST_ACK));}static __inline__ void tcp_kill_sk_queues(struct sock *sk){ /* First the read buffer. */ __skb_queue_purge(&sk->receive_queue); /* Next, the error queue. */ __skb_queue_purge(&sk->error_queue); /* Next, the write queue. */ BUG_TRAP(skb_queue_empty(&sk->write_queue)); /* Account for returned memory. */ tcp_mem_reclaim(sk); BUG_TRAP(sk->wmem_queued == 0); BUG_TRAP(sk->forward_alloc == 0); /* It is _impossible_ for the backlog to contain anything * when we get here. All user references to this socket * have gone away, only the net layer knows can touch it. */}/* * At this point, there should be no process reference to this * socket, and thus no user references at all. Therefore we * can assume the socket waitqueue is inactive and nobody will * try to jump onto it. */void tcp_destroy_sock(struct sock *sk){ BUG_TRAP(sk->state==TCP_CLOSE); BUG_TRAP(sk->dead); /* It cannot be in hash table! */ BUG_TRAP(sk->pprev==NULL); /* If it has not 0 sk->num, it must be bound */ BUG_TRAP(!sk->num || sk->prev!=NULL);#ifdef TCP_DEBUG if (sk->zapped) { printk("TCP: double destroy sk=%p\n", sk); sock_hold(sk); } sk->zapped = 1;#endif sk->prot->destroy(sk); tcp_kill_sk_queues(sk);#ifdef INET_REFCNT_DEBUG if (atomic_read(&sk->refcnt) != 1) { printk(KERN_DEBUG "Destruction TCP %p delayed, c=%d\n", sk, atomic_read(&sk->refcnt)); }#endif atomic_dec(&tcp_orphan_count); sock_put(sk);}void tcp_close(struct sock *sk, long timeout){ struct sk_buff *skb; int data_was_unread = 0; lock_sock(sk); sk->shutdown = SHUTDOWN_MASK; if(sk->state == TCP_LISTEN) { tcp_set_state(sk, TCP_CLOSE); /* Special case. */ tcp_listen_stop(sk); goto adjudge_to_death; } /* We need to flush the recv. buffs. We do this only on the * descriptor close, not protocol-sourced closes, because the * reader process may not have drained the data yet! */ while((skb=__skb_dequeue(&sk->receive_queue))!=NULL) { u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq - skb->h.th->fin; data_was_unread += len; __kfree_skb(skb); } tcp_mem_reclaim(sk); /* As outlined in draft-ietf-tcpimpl-prob-03.txt, section * 3.10, we send a RST here because data was lost. To * witness the awful effects of the old behavior of always * doing a FIN, run an older 2.1.x kernel or 2.0.x, start * a bulk GET in an FTP client, suspend the process, wait * for the client to advertise a zero window, then kill -9 * the FTP client, wheee... Note: timeout is always zero * in such a case. */ if(data_was_unread != 0) { /* Unread data was tossed, zap the connection. */ NET_INC_STATS_USER(TCPAbortOnClose); tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_KERNEL); } else if (sk->linger && sk->lingertime==0) { /* Check zero linger _after_ checking for unread data. */ sk->prot->disconnect(sk, 0); NET_INC_STATS_USER(TCPAbortOnData); } else if (tcp_close_state(sk)) { /* We FIN if the application ate all the data before * zapping the connection. */ /* RED-PEN. Formally speaking, we have broken TCP state * machine. State transitions: * * TCP_ESTABLISHED -> TCP_FIN_WAIT1 * TCP_SYN_RECV -> TCP_FIN_WAIT1 (forget it, it's impossible) * TCP_CLOSE_WAIT -> TCP_LAST_ACK * * are legal only when FIN has been sent (i.e. in window), * rather than queued out of window. Purists blame. * * F.e. "RFC state" is ESTABLISHED, * if Linux state is FIN-WAIT-1, but FIN is still not sent. * * The visible declinations are that sometimes * we enter time-wait state, when it is not required really * (harmless), do not send active resets, when they are * required by specs (TCP_ESTABLISHED, TCP_CLOSE_WAIT, when * they look as CLOSING or LAST_ACK for Linux) * Probably, I missed some more holelets. * --ANK */ tcp_send_fin(sk); } if (timeout) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, current); add_wait_queue(sk->sleep, &wait); do { set_current_state(TASK_INTERRUPTIBLE); if (!closing(sk)) break; release_sock(sk); timeout = schedule_timeout(timeout); lock_sock(sk); } while (!signal_pending(tsk) && timeout); tsk->state = TASK_RUNNING; remove_wait_queue(sk->sleep, &wait); }adjudge_to_death: /* It is the last release_sock in its life. It will remove backlog. */ release_sock(sk); /* Now socket is owned by kernel and we acquire BH lock to finish close. No need to check for user refs. */ local_bh_disable(); bh_lock_sock(sk); BUG_TRAP(sk->lock.users==0); sock_hold(sk); sock_orphan(sk); /* This is a (useful) BSD violating of the RFC. There is a * problem with TCP as specified in that the other end could * keep a socket open forever with no application left this end. * We use a 3 minute timeout (about the same as BSD) then kill * our end. If they send after that then tough - BUT: long enough * that we won't make the old 4*rto = almost no time - whoops * reset mistake. * * Nope, it was not mistake. It is really desired behaviour * f.e. on http servers, when such sockets are useless, but * consume significant resources. Let's do it with special * linger2 option. --ANK */ if (sk->state == TCP_FIN_WAIT2) { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); if (tp->linger2 < 0) { tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_ATOMIC); NET_INC_STATS_BH(TCPAbortOnLinger); } else { int tmo = tcp_fin_time(tp); if (tmo > TCP_TIMEWAIT_LEN) { tcp_reset_keepalive_timer(sk, tcp_fin_time(tp)); } else { atomic_inc(&tcp_orphan_count); tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); goto out; } } } if (sk->state != TCP_CLOSE) { tcp_mem_reclaim(sk); if (atomic_read(&tcp_orphan_count) > sysctl_tcp_max_orphans || (sk->wmem_queued > SOCK_MIN_SNDBUF && atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) { if (net_ratelimit()) printk(KERN_INFO "TCP: too many of orphaned sockets\n"); tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_ATOMIC); NET_INC_STATS_BH(TCPAbortOnMemory); } } atomic_inc(&tcp_orphan_count); if (sk->state == TCP_CLOSE) tcp_destroy_sock(sk); /* Otherwise, socket is reprieved until protocol close. */out: bh_unlock_sock(sk); local_bh_enable(); sock_put(sk);}/* These states need RST on ABORT according to RFC793 */extern __inline__ int tcp_need_reset(int state){ return ((1 << state) &
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -