📄 tcp_output.c
字号:
return; tcp_verify_left_out(tp); /* Don't muck with the congestion window here. * Reason is that we do not increase amount of _data_ * in network, but units changed and effective * cwnd/ssthresh really reduced now. */ if (icsk->icsk_ca_state != TCP_CA_Loss) { tp->high_seq = tp->snd_nxt; tp->snd_ssthresh = tcp_current_ssthresh(sk); tp->prior_ssthresh = 0; tp->undo_marker = 0; tcp_set_ca_state(sk, TCP_CA_Loss); } tcp_xmit_retransmit_queue(sk);}/* This retransmits one SKB. Policy decisions and retransmit queue * state updates are done by the caller. Returns non-zero if an * error occurred which prevented the send. */int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb){ struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); unsigned int cur_mss = tcp_current_mss(sk, 0); int err; /* Inconslusive MTU probe */ if (icsk->icsk_mtup.probe_size) { icsk->icsk_mtup.probe_size = 0; } /* Do not sent more than we queued. 1/4 is reserved for possible * copying overhead: fragmentation, tunneling, mangling etc. */ if (atomic_read(&sk->sk_wmem_alloc) > min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) return -EAGAIN; if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) BUG(); if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) return -ENOMEM; } /* If receiver has shrunk his window, and skb is out of * new window, do not retransmit it. The exception is the * case, when window is shrunk to zero. In this case * our retransmit serves as a zero window probe. */ if (!before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd) && TCP_SKB_CB(skb)->seq != tp->snd_una) return -EAGAIN; if (skb->len > cur_mss) { if (tcp_fragment(sk, skb, cur_mss, cur_mss)) return -ENOMEM; /* We'll try again later. */ } /* Collapse two adjacent packets if worthwhile and we can. */ if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) && (skb->len < (cur_mss >> 1)) && (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) && (!tcp_skb_is_last(sk, skb)) && (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) && (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) && (sysctl_tcp_retrans_collapse != 0)) tcp_retrans_try_collapse(sk, skb, cur_mss); if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) return -EHOSTUNREACH; /* Routing failure or similar. */ /* Some Solaris stacks overoptimize and ignore the FIN on a * retransmit when old data is attached. So strip it off * since it is cheap to do so and saves bytes on the network. */ if (skb->len > 0 && (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { if (!pskb_trim(skb, 0)) { TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1; skb_shinfo(skb)->gso_segs = 1; skb_shinfo(skb)->gso_size = 0; skb_shinfo(skb)->gso_type = 0; skb->ip_summed = CHECKSUM_NONE; skb->csum = 0; } } /* Make a copy, if the first transmission SKB clone we made * is still in somebody's hands, else make a clone. */ TCP_SKB_CB(skb)->when = tcp_time_stamp; err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); if (err == 0) { /* Update global TCP statistics. */ TCP_INC_STATS(TCP_MIB_RETRANSSEGS); tp->total_retrans++;#if FASTRETRANS_DEBUG > 0 if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) { if (net_ratelimit()) printk(KERN_DEBUG "retrans_out leaked.\n"); }#endif if (!tp->retrans_out) tp->lost_retrans_low = tp->snd_nxt; TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS; tp->retrans_out += tcp_skb_pcount(skb); /* Save stamp of the first retransmit. */ if (!tp->retrans_stamp) tp->retrans_stamp = TCP_SKB_CB(skb)->when; tp->undo_retrans++; /* snd_nxt is stored to detect loss of retransmitted segment, * see tcp_input.c tcp_sacktag_write_queue(). */ TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt; } return err;}/* This gets called after a retransmit timeout, and the initially * retransmitted data is acknowledged. It tries to continue * resending the rest of the retransmit queue, until either * we've sent it all or the congestion window limit is reached. * If doing SACK, the first ACK which comes back for a timeout * based retransmit packet might feed us FACK information again. * If so, we use it to avoid unnecessarily retransmissions. */void tcp_xmit_retransmit_queue(struct sock *sk){ const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; int packet_cnt; if (tp->retransmit_skb_hint) { skb = tp->retransmit_skb_hint; packet_cnt = tp->retransmit_cnt_hint; }else{ skb = tcp_write_queue_head(sk); packet_cnt = 0; } /* First pass: retransmit lost packets. */ if (tp->lost_out) { tcp_for_write_queue_from(skb, sk) { __u8 sacked = TCP_SKB_CB(skb)->sacked; if (skb == tcp_send_head(sk)) break; /* we could do better than to assign each time */ tp->retransmit_skb_hint = skb; tp->retransmit_cnt_hint = packet_cnt; /* Assume this retransmit will generate * only one packet for congestion window * calculation purposes. This works because * tcp_retransmit_skb() will chop up the * packet to be MSS sized and all the * packet counting works out. */ if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) return; if (sacked & TCPCB_LOST) { if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { if (tcp_retransmit_skb(sk, skb)) { tp->retransmit_skb_hint = NULL; return; } if (icsk->icsk_ca_state != TCP_CA_Loss) NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS); else NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS); if (skb == tcp_write_queue_head(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX); } packet_cnt += tcp_skb_pcount(skb); if (packet_cnt >= tp->lost_out) break; } } } /* OK, demanded retransmission is finished. */ /* Forward retransmissions are possible only during Recovery. */ if (icsk->icsk_ca_state != TCP_CA_Recovery) return; /* No forward retransmissions in Reno are possible. */ if (tcp_is_reno(tp)) return; /* Yeah, we have to make difficult choice between forward transmission * and retransmission... Both ways have their merits... * * For now we do not retransmit anything, while we have some new * segments to send. In the other cases, follow rule 3 for * NextSeg() specified in RFC3517. */ if (tcp_may_send_now(sk)) return; /* If nothing is SACKed, highest_sack in the loop won't be valid */ if (!tp->sacked_out) return; if (tp->forward_skb_hint) skb = tp->forward_skb_hint; else skb = tcp_write_queue_head(sk); tcp_for_write_queue_from(skb, sk) { if (skb == tcp_send_head(sk)) break; tp->forward_skb_hint = skb; if (after(TCP_SKB_CB(skb)->seq, tp->highest_sack)) break; if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) break; if (TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) continue; /* Ok, retransmit it. */ if (tcp_retransmit_skb(sk, skb)) { tp->forward_skb_hint = NULL; break; } if (skb == tcp_write_queue_head(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX); NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS); }}/* Send a fin. The caller locks the socket for us. This cannot be * allowed to fail queueing a FIN frame under any circumstances. */void tcp_send_fin(struct sock *sk){ struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb = tcp_write_queue_tail(sk); int mss_now; /* Optimization, tack on the FIN if we have a queue of * unsent frames. But be careful about outgoing SACKS * and IP options. */ mss_now = tcp_current_mss(sk, 1); if (tcp_send_head(sk) != NULL) { TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; TCP_SKB_CB(skb)->end_seq++; tp->write_seq++; } else { /* Socket is locked, keep trying until memory is available. */ for (;;) { skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_KERNEL); if (skb) break; yield(); } /* Reserve space for headers and prepare control bits. */ skb_reserve(skb, MAX_TCP_HEADER); skb->csum = 0; TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN); TCP_SKB_CB(skb)->sacked = 0; skb_shinfo(skb)->gso_segs = 1; skb_shinfo(skb)->gso_size = 0; skb_shinfo(skb)->gso_type = 0; /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ TCP_SKB_CB(skb)->seq = tp->write_seq; TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; tcp_queue_skb(sk, skb); } __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);}/* We get here when a process closes a file descriptor (either due to * an explicit close() or as a byproduct of exit()'ing) and there * was unread data in the receive queue. This behavior is recommended * by RFC 2525, section 2.17. -DaveM */void tcp_send_active_reset(struct sock *sk, gfp_t priority){ struct sk_buff *skb; /* NOTE: No TCP options attached and we never retransmit this. */ skb = alloc_skb(MAX_TCP_HEADER, priority); if (!skb) { NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); return; } /* Reserve space for headers and prepare control bits. */ skb_reserve(skb, MAX_TCP_HEADER); skb->csum = 0; TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST); TCP_SKB_CB(skb)->sacked = 0; skb_shinfo(skb)->gso_segs = 1; skb_shinfo(skb)->gso_size = 0; skb_shinfo(skb)->gso_type = 0; /* Send it off. */ TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk); TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq; TCP_SKB_CB(skb)->when = tcp_time_stamp; if (tcp_transmit_skb(sk, skb, 0, priority)) NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);}/* WARNING: This routine must only be called when we have already sent * a SYN packet that crossed the incoming SYN that caused this routine * to get called. If this assumption fails then the initial rcv_wnd * and rcv_wscale values will not be correct. */int tcp_send_synack(struct sock *sk){ struct sk_buff* skb; skb = tcp_write_queue_head(sk); if (skb == NULL || !(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_SYN)) { printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n"); return -EFAULT; } if (!(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_ACK)) { if (skb_cloned(skb)) { struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); if (nskb == NULL) return -ENOMEM; tcp_unlink_write_queue(skb, sk); skb_header_release(nskb); __tcp_add_write_queue_head(sk, nskb); sk_stream_free_skb(sk, skb); sk_charge_skb(sk, nskb); skb = nskb; } TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ACK; TCP_ECN_send_synack(tcp_sk(sk), skb); } TCP_SKB_CB(skb)->when = tcp_time_stamp; return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);}/* * Prepare a SYN-ACK. */struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req){ struct inet_request_sock *ireq = inet_rsk(req); struct tcp_sock *tp = tcp_sk(sk); struct tcphdr *th; int tcp_header_size; struct sk_buff *skb;#ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *md5; __u8 *md5_hash_location;#endif skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); if (skb == NULL) return NULL; /* Reserve space for headers. */ skb_reserve(skb, MAX_TCP_HEADER); skb->dst = dst_clone(dst); tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS + (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) + (ireq->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) + /* SACK_PERM is in the place of NOP NOP of TS */ ((ireq->sack_ok && !ireq->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0));#ifdef CONFIG_TCP_MD5SIG /* Are we doing MD5 on this segment? If so - make room for it */ md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); if (md5) tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;#endif skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); th = tcp_hdr(skb); memset(th, 0, sizeof(struct tcphdr)); th->syn = 1; th->ack = 1; TCP_ECN_make_synack(req, th); th->source = inet_sk(sk)->sport; th->dest = ireq->rmt_port; TCP_SKB_CB(skb)->seq = tcp_rsk(req)->snt_isn; TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; TCP_SKB_CB(skb)->sacked = 0; skb_shinfo(skb)->gso_segs = 1; skb_shinfo(skb)->gso_size = 0; skb_shinfo(skb)->gso_type = 0; th->seq = htonl(TCP_SKB_CB(skb)->seq); th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ __u8 rcv_wscale; /* Set this up on the first call only */ req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); /* tcp_full_space because it is guaranteed to be the first packet */ tcp_select_initial_window(tcp_full_space(sk), dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), &req->rcv_wnd, &req->window_clamp, ireq->wscale_ok, &rcv_wscale); ireq->rcv_wscale = rcv_wscale; } /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ th->window = htons(min(req->rcv_wnd, 65535U)); TCP_SKB_CB(skb)->when = tcp_time_stamp; tcp_syn_build_options((__be32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok, ireq->sack_ok, ireq->wscale_ok, ireq->
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -