📄 tcp.c
字号:
break; } else { if (sk->done) break; if (sk->err) { copied = sock_error(sk); break; } if (sk->shutdown & RCV_SHUTDOWN) break; if (sk->state == TCP_CLOSE) { if (!sk->done) { /* This occurs when user tries to read * from never connected socket. */ copied = -ENOTCONN; break; } break; } if (!timeo) { copied = -EAGAIN; break; } } cleanup_rbuf(sk, copied); if (tp->ucopy.task == user_recv) { /* Install new reader */ if (user_recv == NULL && !(flags&(MSG_TRUNC|MSG_PEEK))) { user_recv = current; tp->ucopy.task = user_recv; tp->ucopy.iov = msg->msg_iov; } tp->ucopy.len = len; BUG_TRAP(tp->copied_seq == tp->rcv_nxt || (flags&(MSG_PEEK|MSG_TRUNC))); /* Ugly... If prequeue is not empty, we have to * process it before releasing socket, otherwise * order will be broken at second iteration. * More elegant solution is required!!! * * Look: we have the following (pseudo)queues: * * 1. packets in flight * 2. backlog * 3. prequeue * 4. receive_queue * * Each queue can be processed only if the next ones * are empty. At this point we have empty receive_queue. * But prequeue _can_ be not empty after second iteration, * when we jumped to start of loop because backlog * processing added something to receive_queue. * We cannot release_sock(), because backlog contains * packets arrived _after_ prequeued ones. * * Shortly, algorithm is clear --- to process all * the queues in order. We could make it more directly, * requeueing packets from backlog to prequeue, if * is not empty. It is more elegant, but eats cycles, * unfortunately. */ if (skb_queue_len(&tp->ucopy.prequeue)) goto do_prequeue; /* __ Set realtime policy in scheduler __ */ } if (copied >= target) { /* Do not sleep, just process backlog. */ release_sock(sk); lock_sock(sk); } else { timeo = tcp_data_wait(sk, timeo); } if (user_recv) { int chunk; /* __ Restore normal policy in scheduler __ */ if ((chunk = len - tp->ucopy.len) != 0) { net_statistics[smp_processor_id()*2+1].TCPDirectCopyFromBacklog += chunk; len -= chunk; copied += chunk; } if (tp->rcv_nxt == tp->copied_seq && skb_queue_len(&tp->ucopy.prequeue)) {do_prequeue: tcp_prequeue_process(sk); if ((chunk = len - tp->ucopy.len) != 0) { net_statistics[smp_processor_id()*2+1].TCPDirectCopyFromPrequeue += chunk; len -= chunk; copied += chunk; } } } continue; found_ok_skb: /* Ok so how much can we use? */ used = skb->len - offset; if (len < used) used = len; /* Do we have urgent data here? */ if (tp->urg_data) { u32 urg_offset = tp->urg_seq - *seq; if (urg_offset < used) { if (!urg_offset) { if (!sk->urginline) { ++*seq; offset++; used--; } } else used = urg_offset; } } err = 0; if (!(flags&MSG_TRUNC)) { err = memcpy_toiovec(msg->msg_iov, ((unsigned char *)skb->h.th) + skb->h.th->doff*4 + offset, used); if (err) { /* Exception. Bailout! */ if (!copied) copied = -EFAULT; break; } } *seq += used; copied += used; len -= used; if (after(tp->copied_seq,tp->urg_seq)) { tp->urg_data = 0; if (skb_queue_len(&tp->out_of_order_queue) == 0#ifdef TCP_FORMAL_WINDOW && tcp_receive_window(tp)#endif ) { tcp_fast_path_on(tp); } } if (used + offset < skb->len) continue; /* Process the FIN. We may also need to handle PSH * here and make it break out of MSG_WAITALL. */ if (skb->h.th->fin) goto found_fin_ok; if (flags & MSG_PEEK) continue; skb->used = 1; tcp_eat_skb(sk, skb); continue; found_fin_ok: ++*seq; if (flags & MSG_PEEK) break; /* All is done. */ skb->used = 1; break; } while (len > 0); if (user_recv) { if (skb_queue_len(&tp->ucopy.prequeue)) { int chunk; tp->ucopy.len = copied > 0 ? len : 0; tcp_prequeue_process(sk); if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) { net_statistics[smp_processor_id()*2+1].TCPDirectCopyFromPrequeue += chunk; len -= chunk; copied += chunk; } } tp->ucopy.task = NULL; tp->ucopy.len = 0; } /* According to UNIX98, msg_name/msg_namelen are ignored * on connected socket. I was just happy when found this 8) --ANK */ /* Clean up data we have read: This will do ACK frames. */ cleanup_rbuf(sk, copied); TCP_CHECK_TIMER(sk); release_sock(sk); return copied;out: TCP_CHECK_TIMER(sk); release_sock(sk); return err;recv_urg: err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len); goto out;}/* * State processing on a close. This implements the state shift for * sending our FIN frame. Note that we only send a FIN for some * states. A shutdown() may have already sent the FIN, or we may be * closed. */static unsigned char new_state[16] = { /* current state: new state: action: */ /* (Invalid) */ TCP_CLOSE, /* TCP_ESTABLISHED */ TCP_FIN_WAIT1 | TCP_ACTION_FIN, /* TCP_SYN_SENT */ TCP_CLOSE, /* TCP_SYN_RECV */ TCP_FIN_WAIT1 | TCP_ACTION_FIN, /* TCP_FIN_WAIT1 */ TCP_FIN_WAIT1, /* TCP_FIN_WAIT2 */ TCP_FIN_WAIT2, /* TCP_TIME_WAIT */ TCP_CLOSE, /* TCP_CLOSE */ TCP_CLOSE, /* TCP_CLOSE_WAIT */ TCP_LAST_ACK | TCP_ACTION_FIN, /* TCP_LAST_ACK */ TCP_LAST_ACK, /* TCP_LISTEN */ TCP_CLOSE, /* TCP_CLOSING */ TCP_CLOSING,};static int tcp_close_state(struct sock *sk){ int next = (int) new_state[sk->state]; int ns = (next & TCP_STATE_MASK); tcp_set_state(sk, ns); return (next & TCP_ACTION_FIN);}/* * Shutdown the sending side of a connection. Much like close except * that we don't receive shut down or set sk->dead. */void tcp_shutdown(struct sock *sk, int how){ /* We need to grab some memory, and put together a FIN, * and then put it into the queue to be sent. * Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92. */ if (!(how & SEND_SHUTDOWN)) return; /* If we've already sent a FIN, or it's a closed state, skip this. */ if ((1 << sk->state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT|TCPF_SYN_RECV|TCPF_CLOSE_WAIT)) { /* Clear out any half completed packets. FIN if needed. */ if (tcp_close_state(sk)) tcp_send_fin(sk); }}/* * Return 1 if we still have things to send in our buffers. */static inline int closing(struct sock * sk){ return ((1 << sk->state) & (TCPF_FIN_WAIT1|TCPF_CLOSING|TCPF_LAST_ACK));}static __inline__ void tcp_kill_sk_queues(struct sock *sk){ /* First the read buffer. */ __skb_queue_purge(&sk->receive_queue); /* Next, the error queue. */ __skb_queue_purge(&sk->error_queue); /* Next, the write queue. */ BUG_TRAP(skb_queue_empty(&sk->write_queue)); /* Account for returned memory. */ tcp_mem_reclaim(sk); BUG_TRAP(sk->wmem_queued == 0); BUG_TRAP(sk->forward_alloc == 0); /* It is _impossible_ for the backlog to contain anything * when we get here. All user references to this socket * have gone away, only the net layer knows can touch it. */}/* * At this point, there should be no process reference to this * socket, and thus no user references at all. Therefore we * can assume the socket waitqueue is inactive and nobody will * try to jump onto it. */void tcp_destroy_sock(struct sock *sk){ BUG_TRAP(sk->state==TCP_CLOSE); BUG_TRAP(sk->dead); /* It cannot be in hash table! */ BUG_TRAP(sk->pprev==NULL); /* It it has not 0 sk->num, it must be bound */ BUG_TRAP(!sk->num || sk->prev!=NULL);#ifdef TCP_DEBUG if (sk->zapped) { printk("TCP: double destroy sk=%p\n", sk); sock_hold(sk); } sk->zapped = 1;#endif sk->prot->destroy(sk); tcp_kill_sk_queues(sk);#ifdef INET_REFCNT_DEBUG if (atomic_read(&sk->refcnt) != 1) { printk(KERN_DEBUG "Destruction TCP %p delayed, c=%d\n", sk, atomic_read(&sk->refcnt)); }#endif atomic_dec(&tcp_orphan_count); sock_put(sk);}void tcp_close(struct sock *sk, long timeout){ struct sk_buff *skb; int data_was_unread = 0; lock_sock(sk); sk->shutdown = SHUTDOWN_MASK; if(sk->state == TCP_LISTEN) { tcp_set_state(sk, TCP_CLOSE); /* Special case. */ tcp_listen_stop(sk); goto adjudge_to_death; } /* We need to flush the recv. buffs. We do this only on the * descriptor close, not protocol-sourced closes, because the * reader process may not have drained the data yet! */ while((skb=__skb_dequeue(&sk->receive_queue))!=NULL) { u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq - skb->h.th->fin; data_was_unread += len; __kfree_skb(skb); } tcp_mem_reclaim(sk); /* As outlined in draft-ietf-tcpimpl-prob-03.txt, section * 3.10, we send a RST here because data was lost. To * witness the awful effects of the old behavior of always * doing a FIN, run an older 2.1.x kernel or 2.0.x, start * a bulk GET in an FTP client, suspend the process, wait * for the client to advertise a zero window, then kill -9 * the FTP client, wheee... Note: timeout is always zero * in such a case. */ if(data_was_unread != 0) { /* Unread data was tossed, zap the connection. */ NET_INC_STATS_USER(TCPAbortOnClose); tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_KERNEL); } else if (sk->linger && sk->lingertime==0) { /* Check zero linger _after_ checking for unread data. */ sk->prot->disconnect(sk, 0); NET_INC_STATS_USER(TCPAbortOnData); } else if (tcp_close_state(sk)) { /* We FIN if the application ate all the data before * zapping the connection. */ /* RED-PEN. Formally speaking, we have broken TCP state * machine. State transitions: * * TCP_ESTABLISHED -> TCP_FIN_WAIT1 * TCP_SYN_RECV -> TCP_FIN_WAIT1 (forget it, it's impossible) * TCP_CLOSE_WAIT -> TCP_LAST_ACK * * are legal only when FIN has been sent (i.e. in window), * rather than queued out of window. Purists blame. * * F.e. "RFC state" is ESTABLISHED, * if Linux state is FIN-WAIT-1, but FIN is still not sent. * * The visible declinations are that sometimes * we enter time-wait state, when it is not required really * (harmless), do not send active resets, when they are * required by specs (TCP_ESTABLISHED, TCP_CLOSE_WAIT, when * they look as CLOSING or LAST_ACK for Linux) * Probably, I missed some more holelets. * --ANK */ tcp_send_fin(sk); } if (timeout) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, current); add_wait_queue(sk->sleep, &wait); do { set_current_state(TASK_INTERRUPTIBLE); if (!closing(sk)) break; release_sock(sk); timeout = schedule_timeout(timeout); lock_sock(sk); } while (!signal_pending(tsk) && timeout); tsk->state = TASK_RUNNING; remove_wait_queue(sk->sleep, &wait); }adjudge_to_death: /* It is the last release_sock in its life. It will remove backlog. */ release_sock(sk); /* Now socket is owned by kernel and we acquire BH lock to finish close. No need to check for user refs. */ local_bh_disable(); bh_lock_sock(sk); BUG_TRAP(sk->lock.users==0); sock_hold(sk); sock_orphan(sk); /* This is a (useful) BSD violating of the RFC. There is a * problem with TCP as specified in that the other end could * keep a socket open forever with no application left this end. * We use a 3 minute timeout (about the same as BSD) then kill * our end. If they send after that then tough - BUT: long enough * that we won't make the old 4*rto = almost no time - whoops * reset mistake. * * Nope, it was not mistake. It is really desired behaviour * f.e. on http servers, when such sockets are useless, but * consume significant resources. Let's do it with special
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -