📄 tcp.c
字号:
__u32 rcv_window_now = tcp_receive_window(tp); /* Optimize, __tcp_select_window() is not cheap. */ if (2*rcv_window_now <= tp->window_clamp) { __u32 new_window = __tcp_select_window(sk); /* Send ACK now, if this read freed lots of space * in our buffer. Certainly, new_window is new window. * We can advertise it now, if it is not less than current one. * "Lots" means "at least twice" here. */ if (new_window && new_window >= 2 * rcv_window_now) time_to_ack = 1; } } if (time_to_ack) tcp_send_ack(sk);}static void tcp_prequeue_process(struct sock *sk){ struct sk_buff *skb; struct tcp_sock *tp = tcp_sk(sk); NET_INC_STATS_USER(LINUX_MIB_TCPPREQUEUED); /* RX process wants to run with disabled BHs, though it is not * necessary */ local_bh_disable(); while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) sk->sk_backlog_rcv(sk, skb); local_bh_enable(); /* Clear memory counter. */ tp->ucopy.memory = 0;}static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off){ struct sk_buff *skb; u32 offset; skb_queue_walk(&sk->sk_receive_queue, skb) { offset = seq - TCP_SKB_CB(skb)->seq; if (tcp_hdr(skb)->syn) offset--; if (offset < skb->len || tcp_hdr(skb)->fin) { *off = offset; return skb; } } return NULL;}/* * This routine provides an alternative to tcp_recvmsg() for routines * that would like to handle copying from skbuffs directly in 'sendfile' * fashion. * Note: * - It is assumed that the socket was locked by the caller. * - The routine does not block. * - At present, there is no support for reading OOB data * or for 'peeking' the socket using this routine * (although both would be easy to implement). */int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor){ struct sk_buff *skb; struct tcp_sock *tp = tcp_sk(sk); u32 seq = tp->copied_seq; u32 offset; int copied = 0; if (sk->sk_state == TCP_LISTEN) return -ENOTCONN; while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) { if (offset < skb->len) { size_t used, len; len = skb->len - offset; /* Stop reading if we hit a patch of urgent data */ if (tp->urg_data) { u32 urg_offset = tp->urg_seq - seq; if (urg_offset < len) len = urg_offset; if (!len) break; } used = recv_actor(desc, skb, offset, len); if (used < 0) { if (!copied) copied = used; break; } else if (used <= len) { seq += used; copied += used; offset += used; } if (offset != skb->len) break; } if (tcp_hdr(skb)->fin) { sk_eat_skb(sk, skb, 0); ++seq; break; } sk_eat_skb(sk, skb, 0); if (!desc->count) break; } tp->copied_seq = seq; tcp_rcv_space_adjust(sk); /* Clean up data we have read: This will do ACK frames. */ if (copied > 0) tcp_cleanup_rbuf(sk, copied); return copied;}/* * This routine copies from a sock struct into the user buffer. * * Technical note: in 2.3 we work on _locked_ socket, so that * tricks with *seq access order and skb->users are not required. * Probably, code can be easily improved even more. */int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len){ struct tcp_sock *tp = tcp_sk(sk); int copied = 0; u32 peek_seq; u32 *seq; unsigned long used; int err; int target; /* Read at least this many bytes */ long timeo; struct task_struct *user_recv = NULL; int copied_early = 0; struct sk_buff *skb; lock_sock(sk); TCP_CHECK_TIMER(sk); err = -ENOTCONN; if (sk->sk_state == TCP_LISTEN) goto out; timeo = sock_rcvtimeo(sk, nonblock); /* Urgent data needs to be handled specially. */ if (flags & MSG_OOB) goto recv_urg; seq = &tp->copied_seq; if (flags & MSG_PEEK) { peek_seq = tp->copied_seq; seq = &peek_seq; } target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);#ifdef CONFIG_NET_DMA tp->ucopy.dma_chan = NULL; preempt_disable(); skb = skb_peek_tail(&sk->sk_receive_queue); { int available = 0; if (skb) available = TCP_SKB_CB(skb)->seq + skb->len - (*seq); if ((available < target) && (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && !sysctl_tcp_low_latency && __get_cpu_var(softnet_data).net_dma) { preempt_enable_no_resched(); tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len); } else { preempt_enable_no_resched(); } }#endif do { u32 offset; /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */ if (tp->urg_data && tp->urg_seq == *seq) { if (copied) break; if (signal_pending(current)) { copied = timeo ? sock_intr_errno(timeo) : -EAGAIN; break; } } /* Next get a buffer. */ skb = skb_peek(&sk->sk_receive_queue); do { if (!skb) break; /* Now that we have two receive queues this * shouldn't happen. */ if (before(*seq, TCP_SKB_CB(skb)->seq)) { printk(KERN_INFO "recvmsg bug: copied %X " "seq %X\n", *seq, TCP_SKB_CB(skb)->seq); break; } offset = *seq - TCP_SKB_CB(skb)->seq; if (tcp_hdr(skb)->syn) offset--; if (offset < skb->len) goto found_ok_skb; if (tcp_hdr(skb)->fin) goto found_fin_ok; BUG_TRAP(flags & MSG_PEEK); skb = skb->next; } while (skb != (struct sk_buff *)&sk->sk_receive_queue); /* Well, if we have backlog, try to process it now yet. */ if (copied >= target && !sk->sk_backlog.tail) break; if (copied) { if (sk->sk_err || sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN) || !timeo || signal_pending(current) || (flags & MSG_PEEK)) break; } else { if (sock_flag(sk, SOCK_DONE)) break; if (sk->sk_err) { copied = sock_error(sk); break; } if (sk->sk_shutdown & RCV_SHUTDOWN) break; if (sk->sk_state == TCP_CLOSE) { if (!sock_flag(sk, SOCK_DONE)) { /* This occurs when user tries to read * from never connected socket. */ copied = -ENOTCONN; break; } break; } if (!timeo) { copied = -EAGAIN; break; } if (signal_pending(current)) { copied = sock_intr_errno(timeo); break; } } tcp_cleanup_rbuf(sk, copied); if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) { /* Install new reader */ if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) { user_recv = current; tp->ucopy.task = user_recv; tp->ucopy.iov = msg->msg_iov; } tp->ucopy.len = len; BUG_TRAP(tp->copied_seq == tp->rcv_nxt || (flags & (MSG_PEEK | MSG_TRUNC))); /* Ugly... If prequeue is not empty, we have to * process it before releasing socket, otherwise * order will be broken at second iteration. * More elegant solution is required!!! * * Look: we have the following (pseudo)queues: * * 1. packets in flight * 2. backlog * 3. prequeue * 4. receive_queue * * Each queue can be processed only if the next ones * are empty. At this point we have empty receive_queue. * But prequeue _can_ be not empty after 2nd iteration, * when we jumped to start of loop because backlog * processing added something to receive_queue. * We cannot release_sock(), because backlog contains * packets arrived _after_ prequeued ones. * * Shortly, algorithm is clear --- to process all * the queues in order. We could make it more directly, * requeueing packets from backlog to prequeue, if * is not empty. It is more elegant, but eats cycles, * unfortunately. */ if (!skb_queue_empty(&tp->ucopy.prequeue)) goto do_prequeue; /* __ Set realtime policy in scheduler __ */ } if (copied >= target) { /* Do not sleep, just process backlog. */ release_sock(sk); lock_sock(sk); } else sk_wait_data(sk, &timeo);#ifdef CONFIG_NET_DMA tp->ucopy.wakeup = 0;#endif if (user_recv) { int chunk; /* __ Restore normal policy in scheduler __ */ if ((chunk = len - tp->ucopy.len) != 0) { NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk); len -= chunk; copied += chunk; } if (tp->rcv_nxt == tp->copied_seq && !skb_queue_empty(&tp->ucopy.prequeue)) {do_prequeue: tcp_prequeue_process(sk); if ((chunk = len - tp->ucopy.len) != 0) { NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); len -= chunk; copied += chunk; } } } if ((flags & MSG_PEEK) && peek_seq != tp->copied_seq) { if (net_ratelimit()) printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n", current->comm, task_pid_nr(current)); peek_seq = tp->copied_seq; } continue; found_ok_skb: /* Ok so how much can we use? */ used = skb->len - offset; if (len < used) used = len; /* Do we have urgent data here? */ if (tp->urg_data) { u32 urg_offset = tp->urg_seq - *seq; if (urg_offset < used) { if (!urg_offset) { if (!sock_flag(sk, SOCK_URGINLINE)) { ++*seq; offset++; used--; if (!used) goto skip_copy; } } else used = urg_offset; } } if (!(flags & MSG_TRUNC)) {#ifdef CONFIG_NET_DMA if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) tp->ucopy.dma_chan = get_softnet_dma(); if (tp->ucopy.dma_chan) { tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec( tp->ucopy.dma_chan, skb, offset, msg->msg_iov, used, tp->ucopy.pinned_list); if (tp->ucopy.dma_cookie < 0) { printk(KERN_ALERT "dma_cookie < 0\n"); /* Exception. Bailout! */ if (!copied) copied = -EFAULT; break; } if ((offset + used) == skb->len) copied_early = 1; } else#endif { err = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, used); if (err) { /* Exception. Bailout! */ if (!copied) copied = -EFAULT; break; } } } *seq += used; copied += used; len -= used; tcp_rcv_space_adjust(sk);skip_copy: if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) { tp->urg_data = 0; tcp_fast_path_check(sk); } if (used + offset < skb->len) continue; if (tcp_hdr(skb)->fin) goto found_fin_ok; if (!(flags & MSG_PEEK)) { sk_eat_skb(sk, skb, copied_early); copied_early = 0; } continue; found_fin_ok: /* Process the FIN. */ ++*seq; if (!(flags & MSG_PEEK)) { sk_eat_skb(sk, skb, copied_early); copied_early = 0; } break; } while (len > 0); if (user_recv) { if (!skb_queue_empty(&tp->ucopy.prequeue)) { int chunk; tp->ucopy.len = copied > 0 ? len : 0; tcp_prequeue_process(sk); if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) { NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); len -= chunk; copied += chunk; } } tp->ucopy.task = NULL; tp->ucopy.len = 0; }#ifdef CONFIG_NET_DMA if (tp->ucopy.dma_chan) { dma_cookie_t done, used; dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); while (dma_async_memcpy_complete(tp->ucopy.dma_chan, tp->ucopy.dma_cookie, &done, &used) == DMA_IN_PROGRESS) { /* do partial cleanup of sk_async_wait_queue */ while ((skb = skb_peek(&sk->sk_async_wait_queue)) && (dma_async_is_complete(skb->dma_cookie, done, used) == DMA_SUCCESS)) { __skb_dequeue(&sk->sk_async_wait_queue); kfree_skb(skb);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -