mytcp.c

来自「一个基于linux的TCP/IP协议栈的实现」· C语言 代码 · 共 916 行 · 第 1/2 页

C
916
字号
wait_for_sndbuf:			set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);wait_for_memory:			if( copied )				mytcp_push( sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH );			if( (err = sk_stream_wait_memory(sk, &timeo)) != 0 )				goto do_error;			mss_now = mytcp_current_mss( sk, !(flags&MSG_OOB) );			size_goal = tp->xmit_size_goal;		}	}out:	if (copied)		mytcp_push(sk, tp, flags, mss_now, tp->nonagle);	TCP_CHECK_TIMER(sk);	release_sock(sk);	return copied;do_fault:	if (!skb->len) {		if (sk->sk_send_head == skb)			sk->sk_send_head = NULL;		__skb_unlink(skb, &sk->sk_write_queue);		sk_stream_free_skb(sk, skb);	}do_error:	if (copied)		goto out;out_err:	err = sk_stream_error(sk, flags, err);	TCP_CHECK_TIMER(sk);	release_sock(sk);	return err;}static void mycleanup_rbuf(struct sock *sk, int copied){	struct tcp_sock *tp = tcp_sk(sk);	int time_to_ack = 0;#if TCP_DEBUG	struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);	BUG_TRAP(!skb || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq));#endif	if( inet_csk_ack_scheduled(sk) ){		const struct inet_connection_sock *icsk = inet_csk(sk);		if (icsk->icsk_ack.blocked ||		    tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss ||			(copied > 0 && (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&			 !icsk->icsk_ack.pingpong && !atomic_read(&sk->sk_rmem_alloc)))			time_to_ack = 1;	}	if (copied > 0 && !time_to_ack && !(sk->sk_shutdown & RCV_SHUTDOWN)) {		__u32 rcv_window_now = tcp_receive_window(tp);		if (2*rcv_window_now <= tp->window_clamp) {			__u32 new_window = __mytcp_select_window(sk);			if (new_window && new_window >= 2 * rcv_window_now)				time_to_ack = 1;		}	}	if( time_to_ack )		mytcp_send_ack(sk);}static void mytcp_prequeue_process(struct sock *sk){	struct sk_buff *skb;	struct tcp_sock *tp = tcp_sk(sk);	MYNET_INC_STATS_USER(LINUX_MIB_TCPPREQUEUED);	local_bh_disable();	while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)		sk->sk_backlog_rcv(sk, skb);	local_bh_enable();	tp->ucopy.memory = 0;}static int mytcp_recv_urg(struct sock *sk, long timeo,			struct msghdr *msg, int len, int flags, int *addr_len){	struct tcp_sock *tp = tcp_sk(sk);	if( sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data || tp->urg_data == TCP_URG_READ )		return -EINVAL;	if (sk->sk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DONE))		return -ENOTCONN;	if (tp->urg_data & TCP_URG_VALID) {		int err = 0;		char c = tp->urg_data;		if (!(flags & MSG_PEEK))			tp->urg_data = TCP_URG_READ;		msg->msg_flags |= MSG_OOB;		if (len > 0) {			if (!(flags & MSG_TRUNC))				err = memcpy_toiovec(msg->msg_iov, &c, 1);			len = 1;		} else			msg->msg_flags |= MSG_TRUNC;		return err ? -EFAULT : len;	}	if (sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN))		return 0;	return -EAGAIN;}int mytcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,						size_t len, int nonblock, int flags, int *addr_len){	struct tcp_sock *tp = tcp_sk(sk);	int copied = 0;	u32 peek_seq;	u32 *seq;	unsigned long used;	int err;	int target;	long timeo;	struct task_struct *user_recv = NULL;	printk(KERN_INFO "%s:%d: the current: %p\n", __FUNCTION__, __LINE__, current );	lock_sock(sk);	TCP_CHECK_TIMER(sk);	err = -ENOTCONN;	if (sk->sk_state == TCP_LISTEN)		goto out;	timeo = sock_rcvtimeo(sk, nonblock);	if (flags & MSG_OOB)		goto recv_urg;	seq = &tp->copied_seq;	if (flags & MSG_PEEK) {		peek_seq = tp->copied_seq;		seq = &peek_seq;	}	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);	do{		struct sk_buff *skb;		u32 offset;		if( tp->urg_data && tp->urg_seq == *seq ){			if( copied )				break;			if( signal_pending(current) ){				copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;				break;			}		}		skb = skb_peek(&sk->sk_receive_queue);		do{			if( !skb )				break;			if( before(*seq, TCP_SKB_CB(skb)->seq) ){				break;			}			offset = *seq - TCP_SKB_CB(skb)->seq;			if (skb->h.th->syn)				offset--;			if (offset < skb->len)				goto found_ok_skb;			if (skb->h.th->fin)				goto found_fin_ok;			BUG_TRAP(flags & MSG_PEEK);			skb = skb->next;		}while( skb != (struct sk_buff *)&sk->sk_receive_queue );		if (copied >= target && !sk->sk_backlog.tail)			break;		if (copied) {			if (sk->sk_err || sk->sk_state == TCP_CLOSE ||							(sk->sk_shutdown & RCV_SHUTDOWN) || !timeo ||							signal_pending(current) || ( flags & MSG_PEEK) )				break;		}else{			if( sock_flag(sk, SOCK_DONE) )				break;			if( sk->sk_err ){				copied = sock_error(sk);				break;			}			if( sk->sk_shutdown & RCV_SHUTDOWN )				break;			if( sk->sk_state == TCP_CLOSE ){				if( !sock_flag(sk, SOCK_DONE) ){					copied = -ENOTCONN;					break;				}				break;			}			if (!timeo) {				copied = -EAGAIN;				break;			}			if (signal_pending(current)) {				copied = sock_intr_errno(timeo);				break;			}		}		mycleanup_rbuf(sk, copied);		if( !sysctl_tcp_low_latency && tp->ucopy.task == user_recv ){			if( !user_recv && !(flags & (MSG_TRUNC | MSG_PEEK)) ){				user_recv = current;				tp->ucopy.task = user_recv;				tp->ucopy.iov = msg->msg_iov;			}			tp->ucopy.len = len;			BUG_TRAP(tp->copied_seq == tp->rcv_nxt || (flags & (MSG_PEEK | MSG_TRUNC)));			if (!skb_queue_empty(&tp->ucopy.prequeue))				goto do_prequeue;		}		if (copied >= target) {			release_sock(sk);			lock_sock(sk);		} else			sk_wait_data(sk, &timeo);		if (user_recv) {			int chunk;			if ((chunk = len - tp->ucopy.len) != 0) {				MYNET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);				len -= chunk;				copied += chunk;			}			if (tp->rcv_nxt == tp->copied_seq &&			    !skb_queue_empty(&tp->ucopy.prequeue)) {do_prequeue:				mytcp_prequeue_process(sk);				if ((chunk = len - tp->ucopy.len) != 0) {					MYNET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);					len -= chunk;					copied += chunk;				}			}		}		if( (flags & MSG_PEEK) && peek_seq != tp->copied_seq ){			if (net_ratelimit())				printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n",				       current->comm, current->pid);			peek_seq = tp->copied_seq;		}		continue;	found_ok_skb:		used = skb->len - offset;		if (len < used)			used = len;		if (tp->urg_data) {			u32 urg_offset = tp->urg_seq - *seq;			if (urg_offset < used) {				if (!urg_offset) {					if (!sock_flag(sk, SOCK_URGINLINE)) {						++*seq;						offset++;						used--;						if (!used)							goto skip_copy;					}				} else					used = urg_offset;			}		}		if (!(flags & MSG_TRUNC)) {			err = skb_copy_datagram_iovec(skb, offset,						      msg->msg_iov, used);			if (err) {				if (!copied)					copied = -EFAULT;				break;			}		}		*seq += used;		copied += used;		len -= used;		mytcp_rcv_space_adjust(sk);skip_copy:		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) {			tp->urg_data = 0;			tcp_fast_path_check(sk, tp);		}		if (used + offset < skb->len)			continue;		if (skb->h.th->fin)			goto found_fin_ok;		if (!(flags & MSG_PEEK))			sk_eat_skb(sk, skb);		continue;	found_fin_ok:		++*seq;		if (!(flags & MSG_PEEK))			sk_eat_skb(sk, skb);		break;	} while (len > 0);	if( user_recv ){		if (!skb_queue_empty(&tp->ucopy.prequeue)) {			int chunk;			tp->ucopy.len = copied > 0 ? len : 0;			mytcp_prequeue_process(sk);			if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {				MYNET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);				len -= chunk;				copied += chunk;			}		}		tp->ucopy.task = NULL;		tp->ucopy.len = 0;	}	mycleanup_rbuf(sk, copied);	TCP_CHECK_TIMER(sk);	release_sock(sk);	return copied;out:	TCP_CHECK_TIMER(sk);	release_sock(sk);	return err;recv_urg:	err = mytcp_recv_urg(sk, timeo, msg, len, flags, addr_len);	goto out;}void mytcp_enter_memory_pressure(void){	if (!tcp_memory_pressure) {		MYNET_INC_STATS(LINUX_MIB_TCPMEMORYPRESSURES);		mytcp_memory_pressure = 1;	}}extern void __skb_cb_too_small_for_tcp(int, int);void __init mytcp_init(void){	struct sk_buff *skb = NULL;	int order, i;	if (sizeof(struct tcp_skb_cb) > sizeof(skb->cb))		__skb_cb_too_small_for_tcp( sizeof(struct tcp_skb_cb), sizeof(skb->cb) );	mytcp_hashinfo.bind_bucket_cachep = 			kmem_cache_create("mytcp_bind_bucket", sizeof(struct inet_bind_bucket),							0, SLAB_HWCACHE_ALIGN, NULL, NULL);	if ( !mytcp_hashinfo.bind_bucket_cachep )		panic("tcp_init: Cannot alloc tcp_bind_bucket cache.");		mytcp_hashinfo.ehash = (struct inet_ehash_bucket *)			myalloc_large_system_hash("MYTCP established",							sizeof(struct inet_ehash_bucket),							mythash_entries,							(num_physpages >= 128 * 1024) ? 13 : 15, 							HASH_HIGHMEM, &mytcp_hashinfo.ehash_size,							NULL, 0, &ehash_order );	mytcp_hashinfo.ehash_size = (1 << mytcp_hashinfo.ehash_size) >> 1;	for (i = 0; i < (mytcp_hashinfo.ehash_size << 1); i++) {		rwlock_init(&mytcp_hashinfo.ehash[i].lock);		INIT_HLIST_HEAD(&mytcp_hashinfo.ehash[i].chain);	}	mytcp_hashinfo.bhash = (struct inet_bind_hashbucket *)			myalloc_large_system_hash("MYTCP bind",							sizeof(struct inet_bind_hashbucket),							mytcp_hashinfo.ehash_size,							(num_physpages >= 128 * 1024) ? 13 : 15,							HASH_HIGHMEM, &mytcp_hashinfo.bhash_size,							NULL, 64 * 1024, &bhash_order );	mytcp_hashinfo.bhash_size = 1 << mytcp_hashinfo.bhash_size;	for (i = 0; i < mytcp_hashinfo.bhash_size; i++) {			spin_lock_init(&mytcp_hashinfo.bhash[i].lock);			INIT_HLIST_HEAD(&mytcp_hashinfo.bhash[i].chain);	}	for( order = 0; ((1 << order) << PAGE_SHIFT) <					(mytcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket));					order++)		;		if (order >= 4){		mysysctl_local_port_range[0] = 32768;		mysysctl_local_port_range[1] = 61000;		mytcp_death_row.sysctl_max_tw_buckets = 180000;		mysysctl_tcp_max_orphans = 4096 << (order - 4);		mysysctl_max_syn_backlog = 1024;	}else if( order < 3 ){		mysysctl_local_port_range[0] = 1024 * (3 - order);		mytcp_death_row.sysctl_max_tw_buckets >>= (3 - order);		mysysctl_tcp_max_orphans >>= (3 - order);		mysysctl_max_syn_backlog = 128;	}	mysysctl_tcp_mem[0] =  768 << order;	mysysctl_tcp_mem[1] = 1024 << order;	mysysctl_tcp_mem[2] = 1536 << order;	if (order < 3) {		mysysctl_tcp_wmem[2] = 64 * 1024;		mysysctl_tcp_rmem[0] = PAGE_SIZE;		mysysctl_tcp_rmem[1] = 43689;		mysysctl_tcp_rmem[2] = 2 * 43689;	}	printk(KERN_INFO "MYTCP: Hash tables configured "					"(established %d bind %d)\n",					mytcp_hashinfo.ehash_size << 1, mytcp_hashinfo.bhash_size);	mytcp_register_congestion_control(&tcp_reno);}void __exit mytcp_exit(void){	int i;	struct hlist_head *head;	struct hlist_node *node, *n;	struct inet_bind_bucket *tb; 	mytcp_unregister_congestion_control(&tcp_reno);	for( i = 0; i < mytcp_hashinfo.bhash_size; i ++ ){		head = &(mytcp_hashinfo.bhash[i].chain);		hlist_for_each_entry_safe(tb, node, n, head, node){			kmem_cache_free( mytcp_hashinfo.bind_bucket_cachep, tb);			}		}	if( mytcp_hashinfo.bhash )		free_pages( (unsigned long)(mytcp_hashinfo.bhash), bhash_order );	if( mytcp_hashinfo.ehash )		free_pages( (unsigned long)(mytcp_hashinfo.ehash), ehash_order );	if( mytcp_hashinfo.bind_bucket_cachep )		kmem_cache_destroy( mytcp_hashinfo.bind_bucket_cachep );}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?