mytcp.c
来自「一个基于linux的TCP/IP协议栈的实现」· C语言 代码 · 共 916 行 · 第 1/2 页
C
916 行
wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);wait_for_memory: if( copied ) mytcp_push( sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH ); if( (err = sk_stream_wait_memory(sk, &timeo)) != 0 ) goto do_error; mss_now = mytcp_current_mss( sk, !(flags&MSG_OOB) ); size_goal = tp->xmit_size_goal; } }out: if (copied) mytcp_push(sk, tp, flags, mss_now, tp->nonagle); TCP_CHECK_TIMER(sk); release_sock(sk); return copied;do_fault: if (!skb->len) { if (sk->sk_send_head == skb) sk->sk_send_head = NULL; __skb_unlink(skb, &sk->sk_write_queue); sk_stream_free_skb(sk, skb); }do_error: if (copied) goto out;out_err: err = sk_stream_error(sk, flags, err); TCP_CHECK_TIMER(sk); release_sock(sk); return err;}static void mycleanup_rbuf(struct sock *sk, int copied){ struct tcp_sock *tp = tcp_sk(sk); int time_to_ack = 0;#if TCP_DEBUG struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); BUG_TRAP(!skb || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq));#endif if( inet_csk_ack_scheduled(sk) ){ const struct inet_connection_sock *icsk = inet_csk(sk); if (icsk->icsk_ack.blocked || tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss || (copied > 0 && (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && !icsk->icsk_ack.pingpong && !atomic_read(&sk->sk_rmem_alloc))) time_to_ack = 1; } if (copied > 0 && !time_to_ack && !(sk->sk_shutdown & RCV_SHUTDOWN)) { __u32 rcv_window_now = tcp_receive_window(tp); if (2*rcv_window_now <= tp->window_clamp) { __u32 new_window = __mytcp_select_window(sk); if (new_window && new_window >= 2 * rcv_window_now) time_to_ack = 1; } } if( time_to_ack ) mytcp_send_ack(sk);}static void mytcp_prequeue_process(struct sock *sk){ struct sk_buff *skb; struct tcp_sock *tp = tcp_sk(sk); MYNET_INC_STATS_USER(LINUX_MIB_TCPPREQUEUED); local_bh_disable(); while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) sk->sk_backlog_rcv(sk, skb); local_bh_enable(); tp->ucopy.memory = 0;}static int mytcp_recv_urg(struct sock *sk, long timeo, struct msghdr *msg, int len, int flags, int *addr_len){ struct tcp_sock *tp = tcp_sk(sk); if( sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data || tp->urg_data == TCP_URG_READ ) return -EINVAL; if (sk->sk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DONE)) return -ENOTCONN; if (tp->urg_data & TCP_URG_VALID) { int err = 0; char c = tp->urg_data; if (!(flags & MSG_PEEK)) tp->urg_data = TCP_URG_READ; msg->msg_flags |= MSG_OOB; if (len > 0) { if (!(flags & MSG_TRUNC)) err = memcpy_toiovec(msg->msg_iov, &c, 1); len = 1; } else msg->msg_flags |= MSG_TRUNC; return err ? -EFAULT : len; } if (sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN)) return 0; return -EAGAIN;}int mytcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len){ struct tcp_sock *tp = tcp_sk(sk); int copied = 0; u32 peek_seq; u32 *seq; unsigned long used; int err; int target; long timeo; struct task_struct *user_recv = NULL; printk(KERN_INFO "%s:%d: the current: %p\n", __FUNCTION__, __LINE__, current ); lock_sock(sk); TCP_CHECK_TIMER(sk); err = -ENOTCONN; if (sk->sk_state == TCP_LISTEN) goto out; timeo = sock_rcvtimeo(sk, nonblock); if (flags & MSG_OOB) goto recv_urg; seq = &tp->copied_seq; if (flags & MSG_PEEK) { peek_seq = tp->copied_seq; seq = &peek_seq; } target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); do{ struct sk_buff *skb; u32 offset; if( tp->urg_data && tp->urg_seq == *seq ){ if( copied ) break; if( signal_pending(current) ){ copied = timeo ? sock_intr_errno(timeo) : -EAGAIN; break; } } skb = skb_peek(&sk->sk_receive_queue); do{ if( !skb ) break; if( before(*seq, TCP_SKB_CB(skb)->seq) ){ break; } offset = *seq - TCP_SKB_CB(skb)->seq; if (skb->h.th->syn) offset--; if (offset < skb->len) goto found_ok_skb; if (skb->h.th->fin) goto found_fin_ok; BUG_TRAP(flags & MSG_PEEK); skb = skb->next; }while( skb != (struct sk_buff *)&sk->sk_receive_queue ); if (copied >= target && !sk->sk_backlog.tail) break; if (copied) { if (sk->sk_err || sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN) || !timeo || signal_pending(current) || ( flags & MSG_PEEK) ) break; }else{ if( sock_flag(sk, SOCK_DONE) ) break; if( sk->sk_err ){ copied = sock_error(sk); break; } if( sk->sk_shutdown & RCV_SHUTDOWN ) break; if( sk->sk_state == TCP_CLOSE ){ if( !sock_flag(sk, SOCK_DONE) ){ copied = -ENOTCONN; break; } break; } if (!timeo) { copied = -EAGAIN; break; } if (signal_pending(current)) { copied = sock_intr_errno(timeo); break; } } mycleanup_rbuf(sk, copied); if( !sysctl_tcp_low_latency && tp->ucopy.task == user_recv ){ if( !user_recv && !(flags & (MSG_TRUNC | MSG_PEEK)) ){ user_recv = current; tp->ucopy.task = user_recv; tp->ucopy.iov = msg->msg_iov; } tp->ucopy.len = len; BUG_TRAP(tp->copied_seq == tp->rcv_nxt || (flags & (MSG_PEEK | MSG_TRUNC))); if (!skb_queue_empty(&tp->ucopy.prequeue)) goto do_prequeue; } if (copied >= target) { release_sock(sk); lock_sock(sk); } else sk_wait_data(sk, &timeo); if (user_recv) { int chunk; if ((chunk = len - tp->ucopy.len) != 0) { MYNET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk); len -= chunk; copied += chunk; } if (tp->rcv_nxt == tp->copied_seq && !skb_queue_empty(&tp->ucopy.prequeue)) {do_prequeue: mytcp_prequeue_process(sk); if ((chunk = len - tp->ucopy.len) != 0) { MYNET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); len -= chunk; copied += chunk; } } } if( (flags & MSG_PEEK) && peek_seq != tp->copied_seq ){ if (net_ratelimit()) printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n", current->comm, current->pid); peek_seq = tp->copied_seq; } continue; found_ok_skb: used = skb->len - offset; if (len < used) used = len; if (tp->urg_data) { u32 urg_offset = tp->urg_seq - *seq; if (urg_offset < used) { if (!urg_offset) { if (!sock_flag(sk, SOCK_URGINLINE)) { ++*seq; offset++; used--; if (!used) goto skip_copy; } } else used = urg_offset; } } if (!(flags & MSG_TRUNC)) { err = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, used); if (err) { if (!copied) copied = -EFAULT; break; } } *seq += used; copied += used; len -= used; mytcp_rcv_space_adjust(sk);skip_copy: if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) { tp->urg_data = 0; tcp_fast_path_check(sk, tp); } if (used + offset < skb->len) continue; if (skb->h.th->fin) goto found_fin_ok; if (!(flags & MSG_PEEK)) sk_eat_skb(sk, skb); continue; found_fin_ok: ++*seq; if (!(flags & MSG_PEEK)) sk_eat_skb(sk, skb); break; } while (len > 0); if( user_recv ){ if (!skb_queue_empty(&tp->ucopy.prequeue)) { int chunk; tp->ucopy.len = copied > 0 ? len : 0; mytcp_prequeue_process(sk); if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) { MYNET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); len -= chunk; copied += chunk; } } tp->ucopy.task = NULL; tp->ucopy.len = 0; } mycleanup_rbuf(sk, copied); TCP_CHECK_TIMER(sk); release_sock(sk); return copied;out: TCP_CHECK_TIMER(sk); release_sock(sk); return err;recv_urg: err = mytcp_recv_urg(sk, timeo, msg, len, flags, addr_len); goto out;}void mytcp_enter_memory_pressure(void){ if (!tcp_memory_pressure) { MYNET_INC_STATS(LINUX_MIB_TCPMEMORYPRESSURES); mytcp_memory_pressure = 1; }}extern void __skb_cb_too_small_for_tcp(int, int);void __init mytcp_init(void){ struct sk_buff *skb = NULL; int order, i; if (sizeof(struct tcp_skb_cb) > sizeof(skb->cb)) __skb_cb_too_small_for_tcp( sizeof(struct tcp_skb_cb), sizeof(skb->cb) ); mytcp_hashinfo.bind_bucket_cachep = kmem_cache_create("mytcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if ( !mytcp_hashinfo.bind_bucket_cachep ) panic("tcp_init: Cannot alloc tcp_bind_bucket cache."); mytcp_hashinfo.ehash = (struct inet_ehash_bucket *) myalloc_large_system_hash("MYTCP established", sizeof(struct inet_ehash_bucket), mythash_entries, (num_physpages >= 128 * 1024) ? 13 : 15, HASH_HIGHMEM, &mytcp_hashinfo.ehash_size, NULL, 0, &ehash_order ); mytcp_hashinfo.ehash_size = (1 << mytcp_hashinfo.ehash_size) >> 1; for (i = 0; i < (mytcp_hashinfo.ehash_size << 1); i++) { rwlock_init(&mytcp_hashinfo.ehash[i].lock); INIT_HLIST_HEAD(&mytcp_hashinfo.ehash[i].chain); } mytcp_hashinfo.bhash = (struct inet_bind_hashbucket *) myalloc_large_system_hash("MYTCP bind", sizeof(struct inet_bind_hashbucket), mytcp_hashinfo.ehash_size, (num_physpages >= 128 * 1024) ? 13 : 15, HASH_HIGHMEM, &mytcp_hashinfo.bhash_size, NULL, 64 * 1024, &bhash_order ); mytcp_hashinfo.bhash_size = 1 << mytcp_hashinfo.bhash_size; for (i = 0; i < mytcp_hashinfo.bhash_size; i++) { spin_lock_init(&mytcp_hashinfo.bhash[i].lock); INIT_HLIST_HEAD(&mytcp_hashinfo.bhash[i].chain); } for( order = 0; ((1 << order) << PAGE_SHIFT) < (mytcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket)); order++) ; if (order >= 4){ mysysctl_local_port_range[0] = 32768; mysysctl_local_port_range[1] = 61000; mytcp_death_row.sysctl_max_tw_buckets = 180000; mysysctl_tcp_max_orphans = 4096 << (order - 4); mysysctl_max_syn_backlog = 1024; }else if( order < 3 ){ mysysctl_local_port_range[0] = 1024 * (3 - order); mytcp_death_row.sysctl_max_tw_buckets >>= (3 - order); mysysctl_tcp_max_orphans >>= (3 - order); mysysctl_max_syn_backlog = 128; } mysysctl_tcp_mem[0] = 768 << order; mysysctl_tcp_mem[1] = 1024 << order; mysysctl_tcp_mem[2] = 1536 << order; if (order < 3) { mysysctl_tcp_wmem[2] = 64 * 1024; mysysctl_tcp_rmem[0] = PAGE_SIZE; mysysctl_tcp_rmem[1] = 43689; mysysctl_tcp_rmem[2] = 2 * 43689; } printk(KERN_INFO "MYTCP: Hash tables configured " "(established %d bind %d)\n", mytcp_hashinfo.ehash_size << 1, mytcp_hashinfo.bhash_size); mytcp_register_congestion_control(&tcp_reno);}void __exit mytcp_exit(void){ int i; struct hlist_head *head; struct hlist_node *node, *n; struct inet_bind_bucket *tb; mytcp_unregister_congestion_control(&tcp_reno); for( i = 0; i < mytcp_hashinfo.bhash_size; i ++ ){ head = &(mytcp_hashinfo.bhash[i].chain); hlist_for_each_entry_safe(tb, node, n, head, node){ kmem_cache_free( mytcp_hashinfo.bind_bucket_cachep, tb); } } if( mytcp_hashinfo.bhash ) free_pages( (unsigned long)(mytcp_hashinfo.bhash), bhash_order ); if( mytcp_hashinfo.ehash ) free_pages( (unsigned long)(mytcp_hashinfo.ehash), ehash_order ); if( mytcp_hashinfo.bind_bucket_cachep ) kmem_cache_destroy( mytcp_hashinfo.bind_bucket_cachep );}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?