📄 mytcp_output.c
字号:
extern int mysysctl_tcp_timestamps; extern int mysysctl_tcp_sack;extern int mysysctl_tcp_window_scaling;extern int mysysctl_tcp_rmem[3];extern int mysysctl_tcp_adv_win_scale;extern int mysysctl_tcp_ecn;extern int mytcp_memory_pressure;int mysysctl_tcp_tso_win_divisor = 3;int mysysctl_tcp_retrans_collapse = 1;extern struct tcp_mib *mytcp_statistics[2];#define MYTCP_INC_STATS(field) SNMP_INC_STATS(mytcp_statistics, field)static void myupdate_send_head(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb){ sk->sk_send_head = skb->next; if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue) sk->sk_send_head = NULL; tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; tcp_packets_out_inc(sk, tp, skb);}unsigned int mytcp_sync_mss(struct sock *sk, u32 pmtu){ struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); int mss_now = (pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr)); if (mss_now > tp->rx_opt.mss_clamp) mss_now = tp->rx_opt.mss_clamp; mss_now -= icsk->icsk_ext_hdr_len; if (mss_now < 48) mss_now = 48; mss_now -= tp->tcp_header_len - sizeof(struct tcphdr); if (tp->max_window && mss_now > (tp->max_window>>1)) mss_now = max((tp->max_window>>1), 68U - tp->tcp_header_len); icsk->icsk_pmtu_cookie = pmtu; tp->mss_cache = mss_now; return mss_now;}void mytcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, int wscale_ok, __u8 *rcv_wscale){ unsigned int space = (__space < 0 ? 0 : __space); if (*window_clamp == 0) (*window_clamp) = (65535 << 14); space = min( *window_clamp, space ); if (space > mss) space = (space / mss) * mss; (*rcv_wnd) = min( space, MAX_TCP_WINDOW ); (*rcv_wscale) = 0; printk(KERN_INFO "%s:%d: space: %d, rcv_wnd: %lu\n", __FUNCTION__, __LINE__, space, *rcv_wnd ); if( wscale_ok ){ space = max_t(u32, mysysctl_tcp_rmem[2], sysctl_rmem_max); printk(KERN_INFO "%s:%d: sysctl_rmem_max: %d\n", __FUNCTION__, __LINE__, sysctl_rmem_max ); printk(KERN_INFO "%s:%d: the space: %d\n", __FUNCTION__, __LINE__,space ); while (space > 65535 && (*rcv_wscale) < 14) { space >>= 1; (*rcv_wscale)++; } } if( mss > (1<<*rcv_wscale) ){ int init_cwnd = 4; if( mss > 1460 * 3 ) init_cwnd = 2; else if( mss > 1460 ) init_cwnd = 3; printk(KERN_INFO "%s:%d: init_cwnd: %d", __FUNCTION__, __LINE__, init_cwnd ); if( *rcv_wnd > init_cwnd * mss ) *rcv_wnd = init_cwnd * mss; } printk(KERN_INFO "%s:%d: rcv_wnd: %d\n", __FUNCTION__, __LINE__, *rcv_wnd ); printk(KERN_INFO "%s:%d: clamp: %lu, scale: %u\n", __FUNCTION__, __LINE__, *window_clamp, *rcv_wscale ); (*window_clamp) = min( 65535U << (*rcv_wscale), *window_clamp );}static inline int mytcp_win_from_space(int space){ return mysysctl_tcp_adv_win_scale <= 0 ? ( space >> (-mysysctl_tcp_adv_win_scale) ) : space - ( space >> mysysctl_tcp_adv_win_scale );}static inline int mytcp_full_space(const struct sock *sk){ return mytcp_win_from_space(sk->sk_rcvbuf); }static void mytcp_connect_init(struct sock *sk){ struct dst_entry *dst = __sk_dst_get(sk); struct tcp_sock *tp = tcp_sk(sk); __u8 rcv_wscale; tp->tcp_header_len = sizeof(struct tcphdr) + (mysysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0); if( tp->rx_opt.user_mss ) tp->rx_opt.mss_clamp = tp->rx_opt.user_mss; tp->max_window = 0; mytcp_sync_mss(sk, dst_mtu(dst)); if (!tp->window_clamp) tp->window_clamp = dst_metric(dst, RTAX_WINDOW); tp->advmss = dst_metric(dst, RTAX_ADVMSS); mytcp_initialize_rcv_mss(sk); mytcp_select_initial_window( mytcp_full_space(sk), tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), &tp->rcv_wnd, &tp->window_clamp, mysysctl_tcp_window_scaling, &rcv_wscale); tp->rx_opt.rcv_wscale = rcv_wscale; tp->rcv_ssthresh = tp->rcv_wnd; sk->sk_err = 0; sock_reset_flag(sk, SOCK_DONE); tp->snd_wnd = 0; tcp_init_wl(tp, tp->write_seq, 0); tp->snd_una = tp->write_seq; tp->snd_sml = tp->write_seq; tp->rcv_nxt = 0; tp->rcv_wup = 0; tp->copied_seq = 0; inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; inet_csk(sk)->icsk_retransmits = 0; mytcp_clear_retrans(tp);}static inline void MYTCP_ECN_send_syn(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb){ tp->ecn_flags = 0; if( mysysctl_tcp_ecn && !(sk->sk_route_caps & NETIF_F_TSO) ){ TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE|TCPCB_FLAG_CWR; tp->ecn_flags = TCP_ECN_OK; sock_set_flag(sk, SOCK_NO_LARGESEND); }}static inline int mytcp_space(const struct sock *sk){ return mytcp_win_from_space(sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc));} u32 __mytcp_select_window(struct sock *sk){ struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int mss = icsk->icsk_ack.rcv_mss; int free_space = mytcp_space(sk); int full_space = min_t(int, tp->window_clamp, mytcp_full_space(sk)); int window; printk(KERN_INFO "%s:%d: mss: %d, free: %d, full: %d, clamp: %d\n", __FUNCTION__, __LINE__,mss, free_space, full_space, tp->window_clamp ); if (mss > full_space) mss = full_space; if (free_space < full_space/2) { icsk->icsk_ack.quick = 0; if( mytcp_memory_pressure ) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss); if (free_space < mss) return 0; } printk(KERN_INFO "%s:%d: rcv_ssthresh: %d\n", __FUNCTION__, __LINE__, tp->rcv_ssthresh ); if (free_space > tp->rcv_ssthresh) free_space = tp->rcv_ssthresh; window = tp->rcv_wnd; if (tp->rx_opt.rcv_wscale) { window = free_space; if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window) window = (((window >> tp->rx_opt.rcv_wscale) + 1) << tp->rx_opt.rcv_wscale); } else { if (window <= free_space - mss || window > free_space) window = ( free_space / mss ) * mss; } printk(KERN_INFO "%s:%d: the windows: %d\n", __FUNCTION__, __LINE__, window ); return window;}static u16 mytcp_select_window(struct sock *sk){ struct tcp_sock *tp = tcp_sk(sk); u32 cur_win = tcp_receive_window(tp); u32 new_win = __mytcp_select_window(sk); if(new_win < cur_win) new_win = cur_win; tp->rcv_wnd = new_win; tp->rcv_wup = tp->rcv_nxt; if (!tp->rx_opt.rcv_wscale) new_win = min(new_win, MAX_TCP_WINDOW); else new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale)); new_win >>= tp->rx_opt.rcv_wscale; if (new_win == 0) tp->pred_flags = 0; return new_win;}static void mytcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack, int offer_wscale, int wscale, __u32 tstamp, __u32 ts_recent){ *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss); if (ts) { if(sack) *ptr++ = __constant_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); else *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); *ptr++ = htonl(tstamp); *ptr++ = htonl(ts_recent); }else if( sack ) *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM); if (offer_wscale) *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale));}static void mytcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp, __u32 tstamp){ if (tp->rx_opt.tstamp_ok) { *ptr++ = __constant_htonl( (TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP ); *ptr++ = htonl(tstamp); *ptr++ = htonl(tp->rx_opt.ts_recent); } if( tp->rx_opt.eff_sacks ){ struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks; int this_sack; *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_SACK << 8) | (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK))); for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) { *ptr++ = htonl(sp[this_sack].start_seq); *ptr++ = htonl(sp[this_sack].end_seq); } if (tp->rx_opt.dsack) { tp->rx_opt.dsack = 0; tp->rx_opt.eff_sacks--; } }}static __u16 mytcp_advertise_mss(struct sock *sk){ struct tcp_sock *tp = tcp_sk(sk); struct dst_entry *dst = __sk_dst_get(sk); int mss = tp->advmss; if (dst && dst_metric(dst, RTAX_ADVMSS) < mss) { mss = dst_metric(dst, RTAX_ADVMSS); tp->advmss = mss; } return (__u16)mss;}static inline void mytcp_event_ack_sent(struct sock *sk, unsigned int pkts){ tcp_dec_quickack_mode(sk, pkts); inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);}__u32 mytcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst){ __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); if (!cwnd) { if (tp->mss_cache > 1460) cwnd = 2; else cwnd = (tp->mss_cache > 1095) ? 3 : 4; } return min_t(__u32, cwnd, tp->snd_cwnd_clamp);}static void mytcp_cwnd_restart(struct sock *sk, struct dst_entry *dst){ struct tcp_sock *tp = tcp_sk(sk); s32 delta = tcp_time_stamp - tp->lsndtime; u32 restart_cwnd = mytcp_init_cwnd(tp, dst); u32 cwnd = tp->snd_cwnd; tcp_ca_event(sk, CA_EVENT_CWND_RESTART); tp->snd_ssthresh = tcp_current_ssthresh(sk); restart_cwnd = min(restart_cwnd, cwnd); while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd) cwnd >>= 1; tp->snd_cwnd = max(cwnd, restart_cwnd); tp->snd_cwnd_stamp = tcp_time_stamp; tp->snd_cwnd_used = 0;}static void mytcp_event_data_sent(struct tcp_sock *tp, struct sk_buff *skb, struct sock *sk){ struct inet_connection_sock *icsk = inet_csk(sk); const u32 now = tcp_time_stamp; if (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto) mytcp_cwnd_restart(sk, __sk_dst_get(sk)); tp->lsndtime = now; if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) icsk->icsk_ack.pingpong = 1;}static int mytcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, gfp_t gfp_mask){ const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet; struct tcp_sock *tp; struct tcp_skb_cb *tcb; int tcp_header_size; struct tcphdr *th; int sysctl_flags; int err; BUG_ON(!skb || !tcp_skb_pcount(skb)); if (icsk->icsk_ca_ops->rtt_sample) __net_timestamp(skb); if( likely(clone_it) ){ if (unlikely(skb_cloned(skb))) skb = pskb_copy(skb, gfp_mask); else skb = skb_clone(skb, gfp_mask); if (unlikely(!skb)) return -ENOBUFS; } inet = inet_sk(sk); tp = tcp_sk(sk); tcb = TCP_SKB_CB(skb); tcp_header_size = tp->tcp_header_len;#define SYSCTL_FLAG_TSTAMPS 0x1#define SYSCTL_FLAG_WSCALE 0x2#define SYSCTL_FLAG_SACK 0x4 sysctl_flags = 0; if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS; if( mysysctl_tcp_timestamps ){ tcp_header_size += TCPOLEN_TSTAMP_ALIGNED; sysctl_flags |= SYSCTL_FLAG_TSTAMPS; } if( mysysctl_tcp_window_scaling ){ tcp_header_size += TCPOLEN_WSCALE_ALIGNED; sysctl_flags |= SYSCTL_FLAG_WSCALE; } if( mysysctl_tcp_sack ){ sysctl_flags |= SYSCTL_FLAG_SACK; if (!(sysctl_flags & SYSCTL_FLAG_TSTAMPS)) tcp_header_size += TCPOLEN_SACKPERM_ALIGNED; } }else if( unlikely(tp->rx_opt.eff_sacks) ){ tcp_header_size += ( TCPOLEN_SACK_BASE_ALIGNED + ( tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK) ); } if (tcp_packets_in_flight(tp) == 0) tcp_ca_event(sk, CA_EVENT_TX_START); th = (struct tcphdr *)skb_push( skb, tcp_header_size ); skb->h.th = th; skb_set_owner_w( skb, sk ); th->source = inet->sport; th->dest = inet->dport; th->seq = htonl(tcb->seq); th->ack_seq = htonl(tp->rcv_nxt); *(((__u16 *)th) + 6) = htons( ((tcp_header_size >> 2) << 12) | tcb->flags ); if( unlikely(tcb->flags & TCPCB_FLAG_SYN) ){ th->window = htons( tp->rcv_wnd ); }else{ th->window = htons( mytcp_select_window(sk) ); } th->check = 0; th->urg_ptr = 0; if (unlikely(tp->urg_mode && between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF))) { th->urg_ptr = htons(tp->snd_up-tcb->seq); th->urg = 1; } if( unlikely(tcb->flags & TCPCB_FLAG_SYN) ){ mytcp_syn_build_options((__u32 *)(th + 1), mytcp_advertise_mss(sk), (sysctl_flags & SYSCTL_FLAG_TSTAMPS), (sysctl_flags & SYSCTL_FLAG_SACK), (sysctl_flags & SYSCTL_FLAG_WSCALE), tp->rx_opt.rcv_wscale, tcb->when, tp->rx_opt.ts_recent); }else{ mytcp_build_and_update_options((__u32 *)(th + 1), tp, tcb->when); TCP_ECN_send(sk, tp, skb, tcp_header_size); } icsk->icsk_af_ops->send_check(sk, skb->len, skb); if (likely(tcb->flags & TCPCB_FLAG_ACK)) mytcp_event_ack_sent(sk, tcp_skb_pcount(skb)); if (skb->len != tcp_header_size) mytcp_event_data_sent(tp, skb, sk); MYTCP_INC_STATS(TCP_MIB_OUTSEGS); err = icsk->icsk_af_ops->queue_xmit(skb, 0); if (unlikely(err <= 0)) return err;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -