tcp_ipv4.c

来自「嵌入式系统设计与实例开发实验教材二源码多线程应用程序设计串行端口程序设计」· C语言代码 · 共 2,221 行 · 第 1/4 页
2,221 行
			   is safe provided sequence spaces do not			   overlap i.e. at data rates <= 80Mbit/sec.			   Actually, the idea is close to VJ's one,			   only timestamp cache is held not per host,			   but per port pair and TW bucket is used			   as state holder.			   If TW bucket has been already destroyed we			   fall back to VJ's scheme and use initial			   timestamp retrieved from peer table.			 */			if (tw->ts_recent_stamp) {				if ((tp->write_seq = tw->snd_nxt+65535+2) == 0)					tp->write_seq = 1;				tp->ts_recent = tw->ts_recent;				tp->ts_recent_stamp = tw->ts_recent_stamp;				sock_hold(sk2);				skp = &head->chain;				goto unique;			} else				goto not_unique;		}	}	tw = NULL;	/* And established part... */	for(skp = &head->chain; (sk2=*skp)!=NULL; skp = &sk2->next) {		if(TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif))			goto not_unique;	}unique:	BUG_TRAP(sk->pprev==NULL);	if ((sk->next = *skp) != NULL)		(*skp)->pprev = &sk->next;	*skp = sk;	sk->pprev = skp;	sk->hashent = hash;	sock_prot_inc_use(sk->prot);	write_unlock_bh(&head->lock);	if (tw) {		/* Silly. Should hash-dance instead... */		local_bh_disable();		tcp_tw_deschedule(tw);		tcp_timewait_kill(tw);		NET_INC_STATS_BH(TimeWaitRecycled);		local_bh_enable();		tcp_tw_put(tw);	}	return 0;not_unique:	write_unlock_bh(&head->lock);	return -EADDRNOTAVAIL;}/* Hash SYN-SENT socket to established hash table after * checking that it is unique. Note, that without kernel lock * we MUST make these two operations atomically. * * Optimization: if it is bound and tcp_bind_bucket has the only * owner (us), we need not to scan established bucket. */int tcp_v4_hash_connecting(struct sock *sk){	unsigned short snum = sk->num;	struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(snum)];	struct tcp_bind_bucket *tb = (struct tcp_bind_bucket *)sk->prev;	spin_lock_bh(&head->lock);	if (tb->owners == sk && sk->bind_next == NULL) {		__tcp_v4_hash(sk);		spin_unlock_bh(&head->lock);		return 0;	} else {		spin_unlock_bh(&head->lock);		/* No definite answer... Walk to established hash table */		return tcp_v4_check_established(sk);	}}/* This will initiate an outgoing connection. */int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len){	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);	struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;	struct sk_buff *buff;	struct rtable *rt;	u32 daddr, nexthop;	int tmp;	int err;	if (addr_len < sizeof(struct sockaddr_in))		return(-EINVAL);	if (usin->sin_family != AF_INET)		return(-EAFNOSUPPORT);	nexthop = daddr = usin->sin_addr.s_addr;	if (sk->protinfo.af_inet.opt && sk->protinfo.af_inet.opt->srr) {		if (daddr == 0)			return -EINVAL;		nexthop = sk->protinfo.af_inet.opt->faddr;	}	tmp = ip_route_connect(&rt, nexthop, sk->saddr,			       RT_CONN_FLAGS(sk), sk->bound_dev_if);	if (tmp < 0)		return tmp;	if (rt->rt_flags&(RTCF_MULTICAST|RTCF_BROADCAST)) {		ip_rt_put(rt);		return -ENETUNREACH;	}	__sk_dst_set(sk, &rt->u.dst);	sk->route_caps = rt->u.dst.dev->features;	if (!sk->protinfo.af_inet.opt || !sk->protinfo.af_inet.opt->srr)		daddr = rt->rt_dst;	err = -ENOBUFS;	buff = alloc_skb(MAX_TCP_HEADER + 15, sk->allocation);	if (buff == NULL)		goto failure;	if (!sk->saddr)		sk->saddr = rt->rt_src;	sk->rcv_saddr = sk->saddr;	if (tp->ts_recent_stamp && sk->daddr != daddr) {		/* Reset inherited state */		tp->ts_recent = 0;		tp->ts_recent_stamp = 0;		tp->write_seq = 0;	}	if (sysctl_tcp_tw_recycle &&	    !tp->ts_recent_stamp &&	    rt->rt_dst == daddr) {		struct inet_peer *peer = rt_get_peer(rt);		/* VJ's idea. We save last timestamp seen from		 * the destination in peer table, when entering state TIME-WAIT		 * and initialize ts_recent from it, when trying new connection.		 */		if (peer && peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {			tp->ts_recent_stamp = peer->tcp_ts_stamp;			tp->ts_recent = peer->tcp_ts;		}	}	sk->dport = usin->sin_port;	sk->daddr = daddr;	if (!tp->write_seq)		tp->write_seq = secure_tcp_sequence_number(sk->saddr, sk->daddr,							   sk->sport, usin->sin_port);	tp->ext_header_len = 0;	if (sk->protinfo.af_inet.opt)		tp->ext_header_len = sk->protinfo.af_inet.opt->optlen;	sk->protinfo.af_inet.id = tp->write_seq^jiffies;	tp->mss_clamp = 536;	err = tcp_connect(sk, buff);	if (err == 0)		return 0;failure:	__sk_dst_reset(sk);	sk->route_caps = 0;	sk->dport = 0;	return err;}static __inline__ int tcp_v4_iif(struct sk_buff *skb){	return ((struct rtable*)skb->dst)->rt_iif;}static __inline__ unsigned tcp_v4_synq_hash(u32 raddr, u16 rport){	unsigned h = raddr ^ rport;	h ^= h>>16;	h ^= h>>8;	return h&(TCP_SYNQ_HSIZE-1);}static struct open_request *tcp_v4_search_req(struct tcp_opt *tp, 					      struct open_request ***prevp,					      __u16 rport,					      __u32 raddr, __u32 laddr){	struct tcp_listen_opt *lopt = tp->listen_opt;	struct open_request *req, **prev;  	for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport)];	     (req = *prev) != NULL;	     prev = &req->dl_next) {		if (req->rmt_port == rport &&		    req->af.v4_req.rmt_addr == raddr &&		    req->af.v4_req.loc_addr == laddr &&		    TCP_INET_FAMILY(req->class->family)) {			BUG_TRAP(req->sk == NULL);			*prevp = prev;			return req; 		}	}	return NULL;}static void tcp_v4_synq_add(struct sock *sk, struct open_request *req){	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;	struct tcp_listen_opt *lopt = tp->listen_opt;	unsigned h = tcp_v4_synq_hash(req->af.v4_req.rmt_addr, req->rmt_port);	req->expires = jiffies + TCP_TIMEOUT_INIT;	req->retrans = 0;	req->sk = NULL;	req->index = h;	req->dl_next = lopt->syn_table[h];	write_lock(&tp->syn_wait_lock);	lopt->syn_table[h] = req;	write_unlock(&tp->syn_wait_lock);	tcp_synq_added(sk);}/*  * This routine does path mtu discovery as defined in RFC1191. */static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *ip, unsigned mtu){	struct dst_entry *dst;	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs	 * send out by Linux are always <576bytes so they should go through	 * unfragmented).	 */	if (sk->state == TCP_LISTEN)		return; 	/* We don't check in the destentry if pmtu discovery is forbidden	 * on this route. We just assume that no packet_to_big packets	 * are send back when pmtu discovery is not active.     	 * There is a small race when the user changes this flag in the	 * route, but I think that's acceptable.	 */	if ((dst = __sk_dst_check(sk, 0)) == NULL)		return;	ip_rt_update_pmtu(dst, mtu);	/* Something is about to be wrong... Remember soft error	 * for the case, if this connection will not able to recover.	 */	if (mtu < dst->pmtu && ip_dont_fragment(sk, dst))		sk->err_soft = EMSGSIZE;	if (sk->protinfo.af_inet.pmtudisc != IP_PMTUDISC_DONT &&	    tp->pmtu_cookie > dst->pmtu) {		tcp_sync_mss(sk, dst->pmtu);		/* Resend the TCP packet because it's  		 * clear that the old packet has been		 * dropped. This is the new "fast" path mtu		 * discovery.		 */		tcp_simple_retransmit(sk);	} /* else let the usual retransmit timer handle it */}/* * This routine is called by the ICMP module when it gets some * sort of error condition.  If err < 0 then the socket should * be closed and the error returned to the user.  If err > 0 * it's just the icmp type << 8 | icmp code.  After adjustment * header points to the first 8 bytes of the tcp header.  We need * to find the appropriate port. * * The locking strategy used here is very "optimistic". When * someone else accesses the socket the ICMP is just dropped * and for some paths there is no check at all. * A more general error queue to queue errors for later handling * is probably better. * */void tcp_v4_err(struct sk_buff *skb, u32 info){	struct iphdr *iph = (struct iphdr*)skb->data;	struct tcphdr *th = (struct tcphdr*)(skb->data+(iph->ihl<<2));	struct tcp_opt *tp;	int type = skb->h.icmph->type;	int code = skb->h.icmph->code;	struct sock *sk;	__u32 seq;	int err;	if (skb->len < (iph->ihl << 2) + 8) {		ICMP_INC_STATS_BH(IcmpInErrors); 		return;	}	sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source, tcp_v4_iif(skb));	if (sk == NULL) {		ICMP_INC_STATS_BH(IcmpInErrors);		return;	}	if (sk->state == TCP_TIME_WAIT) {		tcp_tw_put((struct tcp_tw_bucket*)sk);		return;	}	bh_lock_sock(sk);	/* If too many ICMPs get dropped on busy	 * servers this needs to be solved differently.	 */	if (sk->lock.users != 0)		NET_INC_STATS_BH(LockDroppedIcmps);	if (sk->state == TCP_CLOSE)		goto out;	tp = &sk->tp_pinfo.af_tcp;	seq = ntohl(th->seq);	if (sk->state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) {		NET_INC_STATS(OutOfWindowIcmps);		goto out;	}	switch (type) {	case ICMP_SOURCE_QUENCH:		/* This is deprecated, but if someone generated it,		 * we have no reasons to ignore it.		 */		if (sk->lock.users == 0)			tcp_enter_cwr(tp);		goto out;	case ICMP_PARAMETERPROB:		err = EPROTO;		break; 	case ICMP_DEST_UNREACH:		if (code > NR_ICMP_UNREACH)			goto out;		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */			if (sk->lock.users == 0)				do_pmtu_discovery(sk, iph, info);			goto out;		}		err = icmp_err_convert[code].errno;		break;	case ICMP_TIME_EXCEEDED:		err = EHOSTUNREACH;		break;	default:		goto out;	}	switch (sk->state) {		struct open_request *req, **prev;	case TCP_LISTEN:		if (sk->lock.users != 0)			goto out;		req = tcp_v4_search_req(tp, &prev,					th->dest,					iph->daddr, iph->saddr); 		if (!req)			goto out;		/* ICMPs are not backlogged, hence we cannot get		   an established socket here.		 */		BUG_TRAP(req->sk == NULL);		if (seq != req->snt_isn) {			NET_INC_STATS_BH(OutOfWindowIcmps);			goto out;		}		/* 		 * Still in SYN_RECV, just remove it silently.		 * There is no good way to pass the error to the newly		 * created socket, and POSIX does not want network		 * errors returned from accept(). 		 */ 		tcp_synq_drop(sk, req, prev);		goto out;	case TCP_SYN_SENT:	case TCP_SYN_RECV:  /* Cannot happen.			       It can f.e. if SYNs crossed.			     */ 		if (sk->lock.users == 0) {			TCP_INC_STATS_BH(TcpAttemptFails);			sk->err = err;			sk->error_report(sk);			tcp_done(sk);		} else {			sk->err_soft = err;		}		goto out;	}	/* If we've already connected we will keep trying	 * until we time out, or the user gives up.	 *	 * rfc1122 4.2.3.9 allows to consider as hard errors	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,	 * but it is obsoleted by pmtu discovery).	 *	 * Note, that in modern internet, where routing is unreliable	 * and in each dark corner broken firewalls sit, sending random	 * errors ordered by their masters even this two messages finally lose	 * their original sense (even Linux sends invalid PORT_UNREACHs)	 *	 * Now we are in compliance with RFCs.	 *							--ANK (980905)	 */	if (sk->lock.users == 0 && sk->protinfo.af_inet.recverr) {		sk->err = err;		sk->error_report(sk);	} else	{ /* Only an error on timeout */		sk->err_soft = err;	}out:	bh_unlock_sock(sk);	sock_put(sk);}/* This routine computes an IPv4 TCP checksum. */void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, 		       struct sk_buff *skb){	if (skb->ip_summed == CHECKSUM_HW) {		th->check = ~tcp_v4_check(th, len, sk->saddr, sk->daddr, 0);		skb->csum = offsetof(struct tcphdr, check);	} else {		th->check = tcp_v4_check(th, len, sk->saddr, sk->daddr,					 csum_partial((char *)th, th->doff<<2, skb->csum));	}}/* *	This routine will send an RST to the other tcp. * *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.) *		      for reset. *	Answer: if a packet caused RST, it is not for a socket *		existing in our system, if it is matched to a socket, *		it is just duplicate segment or bug in other side's TCP. *		So that we build reply only basing on parameters *		arrived with segment. *	Exception: precedence violation. We do not implement it in any case. */static void tcp_v4_send_reset(struct sk_buff *skb){	struct tcphdr *th = skb->h.th;	struct tcphdr rth;	struct ip_reply_arg arg;	/* Never send a reset in response to a reset. */	if (th->rst)		return;	if (((struct rtable*)skb->dst)->rt_type != RTN_LOCAL)		return;	/* Swap the send and the receive. */	memset(&rth, 0, sizeof(struct tcphdr)); 	rth.dest = th->source;	rth.source = th->dest; 	rth.doff = sizeof(struct tcphdr)/4;	rth.rst = 1;	if (th->ack) {		rth.seq = th->ack_seq;	} else {		rth.ack = 1;		rth.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin				    + skb->len - (th->doff<<2));	}	memset(&arg, 0, sizeof arg); 	arg.iov[0].iov_base = (unsigned char *)&rth; 	arg.iov[0].iov_len  = sizeof rth;	arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr, 				      skb->nh.iph->saddr, /*XXX*/				      sizeof(struct tcphdr),				      IPPROTO_TCP,				      0); 	arg.n_iov = 1;	arg.csumoffset = offsetof(struct tcphdr, check) / 2; 	ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth);	TCP_INC_STATS_BH(TcpOutSegs);	TCP_INC_STATS_BH(TcpOutRsts);}/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states   outside socket context is ugly, certainly. What can I do? */static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts){	struct tcphdr *th = skb->h.th;	struct {		struct tcphdr th;		u32 tsopt[3];	} rep;	struct ip_reply_arg arg;	memset(&rep.th, 0, sizeof(struct tcphdr));	memset(&arg, 0, sizeof arg);	arg.iov[0].iov_base = (unsigned char *)&rep; 	arg.iov[0].iov_len  = sizeof(rep.th);	arg.n_iov = 1;	if (ts) {		rep.tsopt[0] = __constant_htonl((TCPOPT_NOP << 24) |						(TCPOPT_NOP << 16) |						(TCPOPT_TIMESTAMP << 8) |						TCPOLEN_TIMESTAMP);		rep.tsopt[1] = htonl(tcp_time_stamp);		rep.tsopt[2] = htonl(ts);		arg.iov[0].iov_len = sizeof(rep);	}	/* Swap the send and the receive. */	rep.th.dest = th->source;	rep.th.source = th->dest;
tcp_ipv4.c - 源码说明

本页面展示了「嵌入式系统设计与实例开发实验教材二源码多线程应用程序设计串行端口程序设计 AD接口实验 CAN总线通信实验 GPS通信实验 Linux内核移植与编译实验 IC卡读写实验 SD驱动使」中的 tcp_ipv4.c 源码文件，采用 C语言编程语言编写，共 2,221 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与实验相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?