📄 mytcp_input.c
字号:
int mysysctl_tcp_timestamps = 1;int mysysctl_tcp_sack = 1;int mysysctl_tcp_fack = 1;int mysysctl_tcp_window_scaling = 1;int mysysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;int mysysctl_tcp_max_orphans = NR_FILE;int mysysctl_max_syn_backlog;int mysysctl_tcp_adv_win_scale = 2;int mysysctl_tcp_ecn;int mysysctl_tcp_moderate_rcvbuf = 1;int mysysctl_tcp_abc = 1;int mysysctl_tcp_dsack = 1;extern int mysysctl_tcp_keepalive_time;int mysysctl_tcp_rfc1337;int mysysctl_tcp_frto;#define IsReno(tp) ((tp)->rx_opt.sack_ok == 0)#define IsFack(tp) ((tp)->rx_opt.sack_ok & 2)#define IsDSack(tp) ((tp)->rx_opt.sack_ok & 4)#define FLAG_DATA 0x01#define FLAG_WIN_UPDATE 0x02#define FLAG_DATA_ACKED 0x04#define FLAG_RETRANS_DATA_ACKED 0x08#define FLAG_SYN_ACKED 0x10#define FLAG_DATA_SACKED 0x20#define FLAG_ECE 0x40#define FLAG_DATA_LOST 0x80#define FLAG_SLOWPATH 0x100#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)#define IsReno(tp) ((tp)->rx_opt.sack_ok == 0)#define IsFack(tp) ((tp)->rx_opt.sack_ok & 2)#define IsDSack(tp) ((tp)->rx_opt.sack_ok & 4)#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)void mytcp_initialize_rcv_mss(struct sock *sk){ struct tcp_sock *tp = tcp_sk(sk); unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); hint = min(hint, tp->rcv_wnd/2); hint = min(hint, TCP_MIN_RCVMSS); hint = max(hint, TCP_MIN_MSS); inet_csk(sk)->icsk_ack.rcv_mss = hint;}void mytcp_clear_retrans(struct tcp_sock *tp){ tp->left_out = 0; tp->retrans_out = 0; tp->fackets_out = 0; tp->sacked_out = 0; tp->lost_out = 0; tp->undo_marker = 0; tp->undo_retrans = 0;}void mytcp_enter_cwr(struct sock *sk){ struct tcp_sock *tp = tcp_sk(sk); tp->prior_ssthresh = 0; tp->bytes_acked = 0; if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { tp->undo_marker = 0; tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1U); tp->snd_cwnd_cnt = 0; tp->high_seq = tp->snd_nxt; tp->snd_cwnd_stamp = tcp_time_stamp; TCP_ECN_queue_cwr(tp); tcp_set_ca_state(sk, TCP_CA_CWR); }}static void mytcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep){ u32 new_sample = tp->rcv_rtt_est.rtt; long m = sample; if (m == 0) m = 1; if (new_sample != 0) { if (!win_dep) { m -= (new_sample >> 3); new_sample += m; } else if (m < new_sample) new_sample = m << 3; }else new_sample = m << 3; if (tp->rcv_rtt_est.rtt != new_sample) tp->rcv_rtt_est.rtt = new_sample;}static inline void mytcp_rcv_rtt_measure_ts(struct sock *sk, const struct sk_buff *skb){ struct tcp_sock *tp = tcp_sk(sk); if (tp->rx_opt.rcv_tsecr && (TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) mytcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0);}static int __mytcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb){ int result; if (sock_owned_by_user(sk)) { local_bh_enable(); result = __tcp_checksum_complete(skb); local_bh_disable(); } else { result = __tcp_checksum_complete(skb); } return result;}static inline int mytcp_may_update_window(const struct tcp_sock *tp, const u32 ack, const u32 ack_seq, const u32 nwin){ //printk(KERN_INFO "%s:%d: %x, %x, %x, %x, wnd: %d\n", __FUNCTION__, __LINE__, ack, tp->snd_una, // ack_seq, tp->snd_wl1, tp->snd_wnd ); return (after(ack, tp->snd_una) || after(ack_seq, tp->snd_wl1) || (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd));}static inline int mytcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb){ return skb->ip_summed != CHECKSUM_UNNECESSARY && __mytcp_checksum_complete_user(sk, skb);}static int mytcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, struct tcp_sock *tp){ if (th->doff == sizeof(struct tcphdr)>>2) { tp->rx_opt.saw_tstamp = 0; return 0; }else if( tp->rx_opt.tstamp_ok && th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { __u32 *ptr = (__u32 *)(th + 1); if (*ptr == ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { tp->rx_opt.saw_tstamp = 1; ++ptr; tp->rx_opt.rcv_tsval = ntohl(*ptr); ++ptr; tp->rx_opt.rcv_tsecr = ntohl(*ptr); return 1; } } mytcp_parse_options(skb, &tp->rx_opt, 1); return 1;}static int mytcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb){ struct tcp_sock *tp = tcp_sk(sk); struct tcphdr *th = skb->h.th; u32 seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; return ( (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) && ack == tp->snd_una && !mytcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) && (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);}static inline int mytcp_paws_discard(const struct sock *sk, const struct sk_buff *skb){ const struct tcp_sock *tp = tcp_sk(sk); return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW && xtime.tv_sec < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS && !mytcp_disordered_ack(sk, skb));}static void mytcp_incr_quickack(struct sock *sk){ struct inet_connection_sock *icsk = inet_csk(sk); unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss); if (quickacks==0) quickacks=2; if (quickacks > icsk->icsk_ack.quick) icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);}void mytcp_enter_quickack_mode(struct sock *sk){ struct inet_connection_sock *icsk = inet_csk(sk); mytcp_incr_quickack(sk); icsk->icsk_ack.pingpong = 0; icsk->icsk_ack.ato = TCP_ATO_MIN;}static void mytcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq){ if (tp->rx_opt.sack_ok && mysysctl_tcp_dsack) { if (before(seq, tp->rcv_nxt)) MYNET_INC_STATS_BH(LINUX_MIB_TCPDSACKOLDSENT); else MYNET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFOSENT); tp->rx_opt.dsack = 1; tp->duplicate_sack[0].start_seq = seq; tp->duplicate_sack[0].end_seq = end_seq; tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + 1, 4 - tp->rx_opt.tstamp_ok); }}static void mytcp_send_dupack(struct sock *sk, struct sk_buff *skb){ struct tcp_sock *tp = tcp_sk(sk); if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { MYNET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST); mytcp_enter_quickack_mode(sk); if( tp->rx_opt.sack_ok && mysysctl_tcp_dsack ){ u32 end_seq = TCP_SKB_CB(skb)->end_seq; if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) end_seq = tp->rcv_nxt; mytcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, end_seq); } } mytcp_send_ack(sk);}void mytcp_done(struct sock *sk);static void mytcp_reset(struct sock *sk){ switch (sk->sk_state) { case TCP_SYN_SENT: sk->sk_err = ECONNREFUSED; break; case TCP_CLOSE_WAIT: sk->sk_err = EPIPE; break; case TCP_CLOSE: return; default: sk->sk_err = ECONNRESET; } if (!sock_flag(sk, SOCK_DEAD)) sk->sk_error_report(sk); mytcp_done(sk);}static inline int mytcp_sequence(struct tcp_sock *tp, u32 seq, u32 end_seq){ return !before(end_seq, tp->rcv_wup) && !after(seq, tp->rcv_nxt + tcp_receive_window(tp));}static inline void mytcp_store_ts_recent(struct tcp_sock *tp){ tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; tp->rx_opt.ts_recent_stamp = xtime.tv_sec;}static inline void mytcp_replace_ts_recent(struct tcp_sock *tp, u32 seq){ if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) { if((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 || xtime.tv_sec >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS) mytcp_store_ts_recent(tp); }}void mytcp_rcv_space_adjust(struct sock *sk){ struct tcp_sock *tp = tcp_sk(sk); int time; int space; if (tp->rcvq_space.time == 0) goto new_measure; time = tcp_time_stamp - tp->rcvq_space.time; if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0) return; space = 2 * (tp->copied_seq - tp->rcvq_space.seq); space = max(tp->rcvq_space.space, space); if (tp->rcvq_space.space != space) { int rcvmem; tp->rcvq_space.space = space; if( mysysctl_tcp_moderate_rcvbuf && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { int new_clamp = space; space /= tp->advmss; if (!space) space = 1; rcvmem = (tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff)); while( mytcp_win_from_space(rcvmem) < tp->advmss) rcvmem += 128; space *= rcvmem; space = min(space, sysctl_tcp_rmem[2]); if (space > sk->sk_rcvbuf) { sk->sk_rcvbuf = space; tp->window_clamp = new_clamp; } } }new_measure: tp->rcvq_space.seq = tp->copied_seq; tp->rcvq_space.time = tcp_time_stamp;}static void mytcp_clamp_window(struct sock *sk, struct tcp_sock *tp){ struct inet_connection_sock *icsk = inet_csk(sk); icsk->icsk_ack.quick = 0; if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && !mytcp_memory_pressure && atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), sysctl_tcp_rmem[2]); } if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss);}static void mytcp_collapse(struct sock *sk, struct sk_buff_head *list, struct sk_buff *head, struct sk_buff *tail, u32 start, u32 end){ struct sk_buff *skb; for (skb = head; skb != tail; ) { if (!before(start, TCP_SKB_CB(skb)->end_seq)) { struct sk_buff *next = skb->next; __skb_unlink(skb, list); __kfree_skb(skb); MYNET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED); skb = next; continue; } if (!skb->h.th->syn && !skb->h.th->fin && (mytcp_win_from_space(skb->truesize) > skb->len || before(TCP_SKB_CB(skb)->seq, start) || (skb->next != tail && TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb->next)->seq))) break; start = TCP_SKB_CB(skb)->end_seq; skb = skb->next; } if (skb == tail || skb->h.th->syn || skb->h.th->fin) return; while (before(start, end)) { struct sk_buff *nskb; int header = skb_headroom(skb); int copy = SKB_MAX_ORDER(header, 0); if (copy < 0) return; if (end-start < copy) copy = end-start; nskb = alloc_skb(copy+header, GFP_ATOMIC); if (!nskb) return; skb_reserve(nskb, header); memcpy(nskb->head, skb->head, header); nskb->nh.raw = nskb->head + (skb->nh.raw-skb->head); nskb->h.raw = nskb->head + (skb->h.raw-skb->head); nskb->mac.raw = nskb->head + (skb->mac.raw-skb->head); memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; __skb_insert(nskb, skb->prev, skb, list); sk_stream_set_owner_r(nskb, sk); while (copy > 0) { int offset = start - TCP_SKB_CB(skb)->seq; int size = TCP_SKB_CB(skb)->end_seq - start; BUG_ON(offset < 0); if (size > 0) { size = min(copy, size); if (skb_copy_bits(skb, offset, skb_put(nskb, size), size)) BUG(); TCP_SKB_CB(nskb)->end_seq += size; copy -= size; start += size; } if (!before(start, TCP_SKB_CB(skb)->end_seq)) { struct sk_buff *next = skb->next; __skb_unlink(skb, list); __kfree_skb(skb); MYNET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED); skb = next; if (skb == tail || skb->h.th->syn || skb->h.th->fin) return; } } }}static void mytcp_collapse_ofo_queue(struct sock *sk){ struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb = skb_peek(&tp->out_of_order_queue); struct sk_buff *head; u32 start, end; if (skb == NULL) return; start = TCP_SKB_CB(skb)->seq; end = TCP_SKB_CB(skb)->end_seq; head = skb; for (;;) { skb = skb->next; if (skb == (struct sk_buff *)&tp->out_of_order_queue || after(TCP_SKB_CB(skb)->seq, end) || before(TCP_SKB_CB(skb)->end_seq, start)) { mytcp_collapse(sk, &tp->out_of_order_queue, head, skb, start, end); head = skb; if (skb == (struct sk_buff *)&tp->out_of_order_queue) break; start = TCP_SKB_CB(skb)->seq; end = TCP_SKB_CB(skb)->end_seq; }else{ if (before(TCP_SKB_CB(skb)->seq, start)) start = TCP_SKB_CB(skb)->seq; if (after(TCP_SKB_CB(skb)->end_seq, end)) end = TCP_SKB_CB(skb)->end_seq; } }}static int mytcp_prune_queue(struct sock *sk){ struct tcp_sock *tp = tcp_sk(sk); MYNET_INC_STATS_BH(LINUX_MIB_PRUNECALLED); if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) mytcp_clamp_window(sk, tp); else if( mytcp_memory_pressure ) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); mytcp_collapse_ofo_queue(sk); mytcp_collapse(sk, &sk->sk_receive_queue, sk->sk_receive_queue.next, (struct sk_buff*)&sk->sk_receive_queue, tp->copied_seq, tp->rcv_nxt); sk_stream_mem_reclaim(sk); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) return 0; if (!skb_queue_empty(&tp->out_of_order_queue)) { MYNET_INC_STATS_BH(LINUX_MIB_OFOPRUNED); __skb_queue_purge(&tp->out_of_order_queue); if (tp->rx_opt.sack_ok) tcp_sack_reset(&tp->rx_opt); sk_stream_mem_reclaim(sk); } if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) return 0; MYNET_INC_STATS_BH(LINUX_MIB_RCVPRUNED); tp->pred_flags = 0; return -1;}static int __mytcp_grow_window(const struct sock *sk, struct tcp_sock *tp, const struct sk_buff *skb)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -