📄 tcp.c
字号:
sk->sk_max_ack_backlog = 0; sk->sk_ack_backlog = 0; tp->accept_queue = tp->accept_queue_tail = NULL; tp->syn_wait_lock = RW_LOCK_UNLOCKED; tcp_delack_init(tp); lopt = kmalloc(sizeof(struct tcp_listen_opt), GFP_KERNEL); if (!lopt) return -ENOMEM; memset(lopt, 0, sizeof(struct tcp_listen_opt)); for (lopt->max_qlen_log = 6; ; lopt->max_qlen_log++) if ((1 << lopt->max_qlen_log) >= sysctl_max_syn_backlog) break; get_random_bytes(&lopt->hash_rnd, 4); write_lock_bh(&tp->syn_wait_lock); tp->listen_opt = lopt; write_unlock_bh(&tp->syn_wait_lock); /* There is race window here: we announce ourselves listening, * but this transition is still not validated by get_port(). * It is OK, because this socket enters to hash table only * after validation is complete. */ sk->sk_state = TCP_LISTEN; if (!sk->sk_prot->get_port(sk, inet->num)) { inet->sport = htons(inet->num); sk_dst_reset(sk); sk->sk_prot->hash(sk); return 0; } sk->sk_state = TCP_CLOSE; write_lock_bh(&tp->syn_wait_lock); tp->listen_opt = NULL; write_unlock_bh(&tp->syn_wait_lock); kfree(lopt); return -EADDRINUSE;}/* * This routine closes sockets which have been at least partially * opened, but not yet accepted. */static void tcp_listen_stop (struct sock *sk){ struct tcp_opt *tp = tcp_sk(sk); struct tcp_listen_opt *lopt = tp->listen_opt; struct open_request *acc_req = tp->accept_queue; struct open_request *req; int i; tcp_delete_keepalive_timer(sk); /* make all the listen_opt local to us */ write_lock_bh(&tp->syn_wait_lock); tp->listen_opt = NULL; write_unlock_bh(&tp->syn_wait_lock); tp->accept_queue = tp->accept_queue_tail = NULL; if (lopt->qlen) { for (i = 0; i < TCP_SYNQ_HSIZE; i++) { while ((req = lopt->syn_table[i]) != NULL) { lopt->syn_table[i] = req->dl_next; lopt->qlen--; tcp_openreq_free(req); /* Following specs, it would be better either to send FIN * (and enter FIN-WAIT-1, it is normal close) * or to send active reset (abort). * Certainly, it is pretty dangerous while synflood, but it is * bad justification for our negligence 8) * To be honest, we are not able to make either * of the variants now. --ANK */ } } } BUG_TRAP(!lopt->qlen); kfree(lopt); while ((req = acc_req) != NULL) { struct sock *child = req->sk; acc_req = req->dl_next; local_bh_disable(); bh_lock_sock(child); BUG_TRAP(!sock_owned_by_user(child)); sock_hold(child); tcp_disconnect(child, O_NONBLOCK); sock_orphan(child); atomic_inc(&tcp_orphan_count); tcp_destroy_sock(child); bh_unlock_sock(child); local_bh_enable(); sock_put(child); sk_acceptq_removed(sk); tcp_openreq_fastfree(req); } BUG_TRAP(!sk->sk_ack_backlog);}static inline void tcp_mark_push(struct tcp_opt *tp, struct sk_buff *skb){ TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; tp->pushed_seq = tp->write_seq;}static inline int forced_push(struct tcp_opt *tp){ return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));}static inline void skb_entail(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb){ skb->csum = 0; TCP_SKB_CB(skb)->seq = tp->write_seq; TCP_SKB_CB(skb)->end_seq = tp->write_seq; TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK; TCP_SKB_CB(skb)->sacked = 0; __skb_queue_tail(&sk->sk_write_queue, skb); sk_charge_skb(sk, skb); if (!sk->sk_send_head) sk->sk_send_head = skb; else if (tp->nonagle&TCP_NAGLE_PUSH) tp->nonagle &= ~TCP_NAGLE_PUSH; }static inline void tcp_mark_urg(struct tcp_opt *tp, int flags, struct sk_buff *skb){ if (flags & MSG_OOB) { tp->urg_mode = 1; tp->snd_up = tp->write_seq; TCP_SKB_CB(skb)->sacked |= TCPCB_URG; }}static inline void tcp_push(struct sock *sk, struct tcp_opt *tp, int flags, int mss_now, int nonagle){ if (sk->sk_send_head) { struct sk_buff *skb = sk->sk_write_queue.prev; if (!(flags & MSG_MORE) || forced_push(tp)) tcp_mark_push(tp, skb); tcp_mark_urg(tp, flags, skb); __tcp_push_pending_frames(sk, tp, mss_now, (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle); }}static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags){ struct tcp_opt *tp = tcp_sk(sk); int mss_now; int err; ssize_t copied; long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); /* Wait for a connection to finish. */ if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) goto out_err; clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); copied = 0; err = -EPIPE; if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto do_error; while (psize > 0) { struct sk_buff *skb = sk->sk_write_queue.prev; struct page *page = pages[poffset / PAGE_SIZE]; int copy, i; int offset = poffset % PAGE_SIZE; int size = min_t(size_t, psize, PAGE_SIZE - offset); if (!sk->sk_send_head || (copy = mss_now - skb->len) <= 0) {new_segment: if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; skb = sk_stream_alloc_pskb(sk, 0, tp->mss_cache, sk->sk_allocation); if (!skb) goto wait_for_memory; skb_entail(sk, tp, skb); copy = mss_now; } if (copy > size) copy = size; i = skb_shinfo(skb)->nr_frags; if (skb_can_coalesce(skb, i, page, offset)) { skb_shinfo(skb)->frags[i - 1].size += copy; } else if (i < MAX_SKB_FRAGS) { get_page(page); skb_fill_page_desc(skb, i, page, offset, copy); } else { tcp_mark_push(tp, skb); goto new_segment; } skb->len += copy; skb->data_len += copy; skb->ip_summed = CHECKSUM_HW; tp->write_seq += copy; TCP_SKB_CB(skb)->end_seq += copy; skb_shinfo(skb)->tso_segs = 0; if (!copied) TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH; copied += copy; poffset += copy; if (!(psize -= copy)) goto out; if (skb->len != mss_now || (flags & MSG_OOB)) continue; if (forced_push(tp)) { tcp_mark_push(tp, skb); __tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH); } else if (skb == sk->sk_send_head) tcp_push_one(sk, mss_now); continue;wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);wait_for_memory: if (copied) tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) goto do_error; mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); }out: if (copied) tcp_push(sk, tp, flags, mss_now, tp->nonagle); return copied;do_error: if (copied) goto out;out_err: return sk_stream_error(sk, flags, err);}ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags){ ssize_t res; struct sock *sk = sock->sk;#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM) if (!(sk->sk_route_caps & NETIF_F_SG) || !(sk->sk_route_caps & TCP_ZC_CSUM_FLAGS)) return sock_no_sendpage(sock, page, offset, size, flags);#undef TCP_ZC_CSUM_FLAGS lock_sock(sk); TCP_CHECK_TIMER(sk); res = do_tcp_sendpages(sk, &page, offset, size, flags); TCP_CHECK_TIMER(sk); release_sock(sk); return res;}#define TCP_PAGE(sk) (sk->sk_sndmsg_page)#define TCP_OFF(sk) (sk->sk_sndmsg_off)static inline int select_size(struct sock *sk, struct tcp_opt *tp){ int tmp = tp->mss_cache_std; if (sk->sk_route_caps & NETIF_F_SG) { int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER); if (tmp >= pgbreak && tmp <= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE) tmp = pgbreak; } return tmp;}int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size){ struct iovec *iov; struct tcp_opt *tp = tcp_sk(sk); struct sk_buff *skb; int iovlen, flags; int mss_now; int err, copied; long timeo; lock_sock(sk); TCP_CHECK_TIMER(sk); flags = msg->msg_flags; timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); /* Wait for a connection to finish. */ if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) goto out_err; /* This should be in poll */ clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); /* Ok commence sending. */ iovlen = msg->msg_iovlen; iov = msg->msg_iov; copied = 0; err = -EPIPE; if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto do_error; while (--iovlen >= 0) { int seglen = iov->iov_len; unsigned char __user *from = iov->iov_base; iov++; while (seglen > 0) { int copy; skb = sk->sk_write_queue.prev; if (!sk->sk_send_head || (copy = mss_now - skb->len) <= 0) {new_segment: /* Allocate new segment. If the interface is SG, * allocate skb fitting to single page. */ if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; skb = sk_stream_alloc_pskb(sk, select_size(sk, tp), 0, sk->sk_allocation); if (!skb) goto wait_for_memory; /* * Check whether we can use HW checksum. */ if (sk->sk_route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) skb->ip_summed = CHECKSUM_HW; skb_entail(sk, tp, skb); copy = mss_now; } /* Try to append data to the end of skb. */ if (copy > seglen) copy = seglen; /* Where to copy to? */ if (skb_tailroom(skb) > 0) { /* We have some space in skb head. Superb! */ if (copy > skb_tailroom(skb)) copy = skb_tailroom(skb); if ((err = skb_add_data(skb, from, copy)) != 0) goto do_fault; } else { int merge = 0; int i = skb_shinfo(skb)->nr_frags; struct page *page = TCP_PAGE(sk); int off = TCP_OFF(sk); if (skb_can_coalesce(skb, i, page, off) && off != PAGE_SIZE) { /* We can extend the last page * fragment. */ merge = 1; } else if (i == MAX_SKB_FRAGS || (!i && !(sk->sk_route_caps & NETIF_F_SG))) { /* Need to add new fragment and cannot * do this because interface is non-SG, * or because all the page slots are * busy. */ tcp_mark_push(tp, skb); goto new_segment; } else if (page) { /* If page is cached, align * offset to L1 cache boundary */ off = (off + L1_CACHE_BYTES - 1) & ~(L1_CACHE_BYTES - 1); if (off == PAGE_SIZE) { put_page(page); TCP_PAGE(sk) = page = NULL; } } if (!page) { /* Allocate new cache page. */ if (!(page = sk_stream_alloc_page(sk))) goto wait_for_memory; off = 0; } if (copy > PAGE_SIZE - off) copy = PAGE_SIZE - off; /* Time to copy data. We are close to * the end! */ err = skb_copy_to_page(sk, from, skb, page, off, copy); if (err) { /* If this page was new, give it to the * socket so it does not get leaked. */ if (!TCP_PAGE(sk)) { TCP_PAGE(sk) = page; TCP_OFF(sk) = 0; } goto do_error; } /* Update the skb. */ if (merge) { skb_shinfo(skb)->frags[i - 1].size += copy; } else { skb_fill_page_desc(skb, i, page, off, copy); if (TCP_PAGE(sk)) { get_page(page); } else if (off + copy < PAGE_SIZE) { get_page(page); TCP_PAGE(sk) = page; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -