📄 tcp.c
字号:
} TCP_OFF(sk) = off + copy; } if (!copied) TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH; tp->write_seq += copy; TCP_SKB_CB(skb)->end_seq += copy; skb_shinfo(skb)->tso_segs = 0; from += copy; copied += copy; if ((seglen -= copy) == 0 && iovlen == 0) goto out; if (skb->len != mss_now || (flags & MSG_OOB)) continue; if (forced_push(tp)) { tcp_mark_push(tp, skb); __tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH); } else if (skb == sk->sk_send_head) tcp_push_one(sk, mss_now); continue;wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);wait_for_memory: if (copied) tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) goto do_error; mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); } }out: if (copied) tcp_push(sk, tp, flags, mss_now, tp->nonagle); TCP_CHECK_TIMER(sk); release_sock(sk); return copied;do_fault: if (!skb->len) { if (sk->sk_send_head == skb) sk->sk_send_head = NULL; __skb_unlink(skb, skb->list); sk_stream_free_skb(sk, skb); }do_error: if (copied) goto out;out_err: err = sk_stream_error(sk, flags, err); TCP_CHECK_TIMER(sk); release_sock(sk); return err;}/* * Handle reading urgent data. BSD has very simple semantics for * this, no blocking and very strange errors 8) */static int tcp_recv_urg(struct sock *sk, long timeo, struct msghdr *msg, int len, int flags, int *addr_len){ struct tcp_opt *tp = tcp_sk(sk); /* No URG data to read. */ if (sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data || tp->urg_data == TCP_URG_READ) return -EINVAL; /* Yes this is right ! */ if (sk->sk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DONE)) return -ENOTCONN; if (tp->urg_data & TCP_URG_VALID) { int err = 0; char c = tp->urg_data; if (!(flags & MSG_PEEK)) tp->urg_data = TCP_URG_READ; /* Read urgent data. */ msg->msg_flags |= MSG_OOB; if (len > 0) { if (!(flags & MSG_TRUNC)) err = memcpy_toiovec(msg->msg_iov, &c, 1); len = 1; } else msg->msg_flags |= MSG_TRUNC; return err ? -EFAULT : len; } if (sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN)) return 0; /* Fixed the recv(..., MSG_OOB) behaviour. BSD docs and * the available implementations agree in this case: * this call should never block, independent of the * blocking state of the socket. * Mike <pall@rz.uni-karlsruhe.de> */ return -EAGAIN;}/* Clean up the receive buffer for full frames taken by the user, * then send an ACK if necessary. COPIED is the number of bytes * tcp_recvmsg has given to the user so far, it speeds up the * calculation of whether or not we must ACK for the sake of * a window update. */static void cleanup_rbuf(struct sock *sk, int copied){ struct tcp_opt *tp = tcp_sk(sk); int time_to_ack = 0;#if TCP_DEBUG struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); BUG_TRAP(!skb || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq));#endif if (tcp_ack_scheduled(tp)) { /* Delayed ACKs frequently hit locked sockets during bulk * receive. */ if (tp->ack.blocked || /* Once-per-two-segments ACK was not sent by tcp_input.c */ tp->rcv_nxt - tp->rcv_wup > tp->ack.rcv_mss || /* * If this read emptied read buffer, we send ACK, if * connection is not bidirectional, user drained * receive buffer and there was a small segment * in queue. */ (copied > 0 && (tp->ack.pending & TCP_ACK_PUSHED) && !tp->ack.pingpong && !atomic_read(&sk->sk_rmem_alloc))) time_to_ack = 1; } /* We send an ACK if we can now advertise a non-zero window * which has been raised "significantly". * * Even if window raised up to infinity, do not send window open ACK * in states, where we will not receive more. It is useless. */ if (copied > 0 && !time_to_ack && !(sk->sk_shutdown & RCV_SHUTDOWN)) { __u32 rcv_window_now = tcp_receive_window(tp); /* Optimize, __tcp_select_window() is not cheap. */ if (2*rcv_window_now <= tp->window_clamp) { __u32 new_window = __tcp_select_window(sk); /* Send ACK now, if this read freed lots of space * in our buffer. Certainly, new_window is new window. * We can advertise it now, if it is not less than current one. * "Lots" means "at least twice" here. */ if (new_window && new_window >= 2 * rcv_window_now) time_to_ack = 1; } } if (time_to_ack) tcp_send_ack(sk);}static void tcp_prequeue_process(struct sock *sk){ struct sk_buff *skb; struct tcp_opt *tp = tcp_sk(sk); NET_ADD_STATS_USER(LINUX_MIB_TCPPREQUEUED, skb_queue_len(&tp->ucopy.prequeue)); /* RX process wants to run with disabled BHs, though it is not * necessary */ local_bh_disable(); while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) sk->sk_backlog_rcv(sk, skb); local_bh_enable(); /* Clear memory counter. */ tp->ucopy.memory = 0;}static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off){ struct sk_buff *skb; u32 offset; skb_queue_walk(&sk->sk_receive_queue, skb) { offset = seq - TCP_SKB_CB(skb)->seq; if (skb->h.th->syn) offset--; if (offset < skb->len || skb->h.th->fin) { *off = offset; return skb; } } return NULL;}/* * This routine provides an alternative to tcp_recvmsg() for routines * that would like to handle copying from skbuffs directly in 'sendfile' * fashion. * Note: * - It is assumed that the socket was locked by the caller. * - The routine does not block. * - At present, there is no support for reading OOB data * or for 'peeking' the socket using this routine * (although both would be easy to implement). */int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor){ struct sk_buff *skb; struct tcp_opt *tp = tcp_sk(sk); u32 seq = tp->copied_seq; u32 offset; int copied = 0; if (sk->sk_state == TCP_LISTEN) return -ENOTCONN; while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) { if (offset < skb->len) { size_t used, len; len = skb->len - offset; /* Stop reading if we hit a patch of urgent data */ if (tp->urg_data) { u32 urg_offset = tp->urg_seq - seq; if (urg_offset < len) len = urg_offset; if (!len) break; } used = recv_actor(desc, skb, offset, len); if (used <= len) { seq += used; copied += used; offset += used; } if (offset != skb->len) break; } if (skb->h.th->fin) { sk_eat_skb(sk, skb); ++seq; break; } sk_eat_skb(sk, skb); if (!desc->count) break; } tp->copied_seq = seq; tcp_rcv_space_adjust(sk); /* Clean up data we have read: This will do ACK frames. */ if (copied) cleanup_rbuf(sk, copied); return copied;}/* * This routine copies from a sock struct into the user buffer. * * Technical note: in 2.3 we work on _locked_ socket, so that * tricks with *seq access order and skb->users are not required. * Probably, code can be easily improved even more. */int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len){ struct tcp_opt *tp = tcp_sk(sk); int copied = 0; u32 peek_seq; u32 *seq; unsigned long used; int err; int target; /* Read at least this many bytes */ long timeo; struct task_struct *user_recv = NULL; lock_sock(sk); TCP_CHECK_TIMER(sk); err = -ENOTCONN; if (sk->sk_state == TCP_LISTEN) goto out; timeo = sock_rcvtimeo(sk, nonblock); /* Urgent data needs to be handled specially. */ if (flags & MSG_OOB) goto recv_urg; seq = &tp->copied_seq; if (flags & MSG_PEEK) { peek_seq = tp->copied_seq; seq = &peek_seq; } target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); do { struct sk_buff *skb; u32 offset; /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */ if (tp->urg_data && tp->urg_seq == *seq) { if (copied) break; if (signal_pending(current)) { copied = timeo ? sock_intr_errno(timeo) : -EAGAIN; break; } } /* Next get a buffer. */ skb = skb_peek(&sk->sk_receive_queue); do { if (!skb) break; /* Now that we have two receive queues this * shouldn't happen. */ if (before(*seq, TCP_SKB_CB(skb)->seq)) { printk(KERN_INFO "recvmsg bug: copied %X " "seq %X\n", *seq, TCP_SKB_CB(skb)->seq); break; } offset = *seq - TCP_SKB_CB(skb)->seq; if (skb->h.th->syn) offset--; if (offset < skb->len) goto found_ok_skb; if (skb->h.th->fin) goto found_fin_ok; BUG_TRAP(flags & MSG_PEEK); skb = skb->next; } while (skb != (struct sk_buff *)&sk->sk_receive_queue); /* Well, if we have backlog, try to process it now yet. */ if (copied >= target && !sk->sk_backlog.tail) break; if (copied) { if (sk->sk_err || sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN) || !timeo || signal_pending(current) || (flags & MSG_PEEK)) break; } else { if (sock_flag(sk, SOCK_DONE)) break; if (sk->sk_err) { copied = sock_error(sk); break; } if (sk->sk_shutdown & RCV_SHUTDOWN) break; if (sk->sk_state == TCP_CLOSE) { if (!sock_flag(sk, SOCK_DONE)) { /* This occurs when user tries to read * from never connected socket. */ copied = -ENOTCONN; break; } break; } if (!timeo) { copied = -EAGAIN; break; } if (signal_pending(current)) { copied = sock_intr_errno(timeo); break; } } cleanup_rbuf(sk, copied); if (tp->ucopy.task == user_recv) { /* Install new reader */ if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) { user_recv = current; tp->ucopy.task = user_recv; tp->ucopy.iov = msg->msg_iov; } tp->ucopy.len = len; BUG_TRAP(tp->copied_seq == tp->rcv_nxt || (flags & (MSG_PEEK | MSG_TRUNC))); /* Ugly... If prequeue is not empty, we have to * process it before releasing socket, otherwise * order will be broken at second iteration. * More elegant solution is required!!! * * Look: we have the following (pseudo)queues: * * 1. packets in flight * 2. backlog * 3. prequeue * 4. receive_queue * * Each queue can be processed only if the next ones * are empty. At this point we have empty receive_queue. * But prequeue _can_ be not empty after 2nd iteration, * when we jumped to start of loop because backlog * processing added something to receive_queue. * We cannot release_sock(), because backlog contains * packets arrived _after_ prequeued ones. * * Shortly, algorithm is clear --- to process all * the queues in order. We could make it more directly, * requeueing packets from backlog to prequeue, if * is not empty. It is more elegant, but eats cycles, * unfortunately. */ if (skb_queue_len(&tp->ucopy.prequeue)) goto do_prequeue; /* __ Set realtime policy in scheduler __ */ } if (copied >= target) { /* Do not sleep, just process backlog. */ release_sock(sk); lock_sock(sk); } else sk_wait_data(sk, &timeo); if (user_recv) { int chunk; /* __ Restore normal policy in scheduler __ */ if ((chunk = len - tp->ucopy.len) != 0) { NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -