📄 tcp_ipv4.c

📁 Linux Kernel 2.6.9 for OMAP1710
💻 C
📖 第 1 页 / 共 5 页
字号:
		break;	default:		goto out;	}	switch (sk->sk_state) {		struct open_request *req, **prev;	case TCP_LISTEN:		if (sock_owned_by_user(sk))			goto out;		req = tcp_v4_search_req(tp, &prev, th->dest,					iph->daddr, iph->saddr);		if (!req)			goto out;		/* ICMPs are not backlogged, hence we cannot get		   an established socket here.		 */		BUG_TRAP(!req->sk);		if (seq != req->snt_isn) {			NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);			goto out;		}		/*		 * Still in SYN_RECV, just remove it silently.		 * There is no good way to pass the error to the newly		 * created socket, and POSIX does not want network		 * errors returned from accept().		 */		tcp_synq_drop(sk, req, prev);		goto out;	case TCP_SYN_SENT:	case TCP_SYN_RECV:  /* Cannot happen.			       It can f.e. if SYNs crossed.			     */		if (!sock_owned_by_user(sk)) {			TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);			sk->sk_err = err;			sk->sk_error_report(sk);			tcp_done(sk);		} else {			sk->sk_err_soft = err;		}		goto out;	}	/* If we've already connected we will keep trying	 * until we time out, or the user gives up.	 *	 * rfc1122 4.2.3.9 allows to consider as hard errors	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,	 * but it is obsoleted by pmtu discovery).	 *	 * Note, that in modern internet, where routing is unreliable	 * and in each dark corner broken firewalls sit, sending random	 * errors ordered by their masters even this two messages finally lose	 * their original sense (even Linux sends invalid PORT_UNREACHs)	 *	 * Now we are in compliance with RFCs.	 *							--ANK (980905)	 */	inet = inet_sk(sk);	if (!sock_owned_by_user(sk) && inet->recverr) {		sk->sk_err = err;		sk->sk_error_report(sk);	} else	{ /* Only an error on timeout */		sk->sk_err_soft = err;	}out:	bh_unlock_sock(sk);	sock_put(sk);}/* This routine computes an IPv4 TCP checksum. */void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,		       struct sk_buff *skb){	struct inet_opt *inet = inet_sk(sk);	if (skb->ip_summed == CHECKSUM_HW) {		th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);		skb->csum = offsetof(struct tcphdr, check);	} else {		th->check = tcp_v4_check(th, len, inet->saddr, inet->daddr,					 csum_partial((char *)th,						      th->doff << 2,						      skb->csum));	}}/* *	This routine will send an RST to the other tcp. * *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.) *		      for reset. *	Answer: if a packet caused RST, it is not for a socket *		existing in our system, if it is matched to a socket, *		it is just duplicate segment or bug in other side's TCP. *		So that we build reply only basing on parameters *		arrived with segment. *	Exception: precedence violation. We do not implement it in any case. */static void tcp_v4_send_reset(struct sk_buff *skb){	struct tcphdr *th = skb->h.th;	struct tcphdr rth;	struct ip_reply_arg arg;	/* Never send a reset in response to a reset. */	if (th->rst)		return;	if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL)		return;	/* Swap the send and the receive. */	memset(&rth, 0, sizeof(struct tcphdr));	rth.dest   = th->source;	rth.source = th->dest;	rth.doff   = sizeof(struct tcphdr) / 4;	rth.rst    = 1;	if (th->ack) {		rth.seq = th->ack_seq;	} else {		rth.ack = 1;		rth.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +				    skb->len - (th->doff << 2));	}	memset(&arg, 0, sizeof arg);	arg.iov[0].iov_base = (unsigned char *)&rth;	arg.iov[0].iov_len  = sizeof rth;	arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,				      skb->nh.iph->saddr, /*XXX*/				      sizeof(struct tcphdr), IPPROTO_TCP, 0);	arg.csumoffset = offsetof(struct tcphdr, check) / 2;	ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth);	TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);	TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);}/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states   outside socket context is ugly, certainly. What can I do? */static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,			    u32 win, u32 ts){	struct tcphdr *th = skb->h.th;	struct {		struct tcphdr th;		u32 tsopt[3];	} rep;	struct ip_reply_arg arg;	memset(&rep.th, 0, sizeof(struct tcphdr));	memset(&arg, 0, sizeof arg);	arg.iov[0].iov_base = (unsigned char *)&rep;	arg.iov[0].iov_len  = sizeof(rep.th);	if (ts) {		rep.tsopt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |				     (TCPOPT_TIMESTAMP << 8) |				     TCPOLEN_TIMESTAMP);		rep.tsopt[1] = htonl(tcp_time_stamp);		rep.tsopt[2] = htonl(ts);		arg.iov[0].iov_len = sizeof(rep);	}	/* Swap the send and the receive. */	rep.th.dest    = th->source;	rep.th.source  = th->dest;	rep.th.doff    = arg.iov[0].iov_len / 4;	rep.th.seq     = htonl(seq);	rep.th.ack_seq = htonl(ack);	rep.th.ack     = 1;	rep.th.window  = htons(win);	arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,				      skb->nh.iph->saddr, /*XXX*/				      arg.iov[0].iov_len, IPPROTO_TCP, 0);	arg.csumoffset = offsetof(struct tcphdr, check) / 2;	ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);	TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);}static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb){	struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;	tcp_v4_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,			tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);	tcp_tw_put(tw);}static void tcp_v4_or_send_ack(struct sk_buff *skb, struct open_request *req){	tcp_v4_send_ack(skb, req->snt_isn + 1, req->rcv_isn + 1, req->rcv_wnd,			req->ts_recent);}static struct dst_entry* tcp_v4_route_req(struct sock *sk,					  struct open_request *req){	struct rtable *rt;	struct ip_options *opt = req->af.v4_req.opt;	struct flowi fl = { .oif = sk->sk_bound_dev_if,			    .nl_u = { .ip4_u =				      { .daddr = ((opt && opt->srr) ?						  opt->faddr :						  req->af.v4_req.rmt_addr),					.saddr = req->af.v4_req.loc_addr,					.tos = RT_CONN_FLAGS(sk) } },			    .proto = IPPROTO_TCP,			    .uli_u = { .ports =				       { .sport = inet_sk(sk)->sport,					 .dport = req->rmt_port } } };	if (ip_route_output_flow(&rt, &fl, sk, 0)) {		IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);		return NULL;	}	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) {		ip_rt_put(rt);		IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);		return NULL;	}	return &rt->u.dst;}/* *	Send a SYN-ACK after having received an ACK. *	This still operates on a open_request only, not on a big *	socket. */static int tcp_v4_send_synack(struct sock *sk, struct open_request *req,			      struct dst_entry *dst){	int err = -1;	struct sk_buff * skb;	/* First, grab a route. */	if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL)		goto out;	skb = tcp_make_synack(sk, dst, req);	if (skb) {		struct tcphdr *th = skb->h.th;		th->check = tcp_v4_check(th, skb->len,					 req->af.v4_req.loc_addr,					 req->af.v4_req.rmt_addr,					 csum_partial((char *)th, skb->len,						      skb->csum));		err = ip_build_and_send_pkt(skb, sk, req->af.v4_req.loc_addr,					    req->af.v4_req.rmt_addr,					    req->af.v4_req.opt);		if (err == NET_XMIT_CN)			err = 0;	}out:	dst_release(dst);	return err;}/* *	IPv4 open_request destructor. */static void tcp_v4_or_free(struct open_request *req){	if (req->af.v4_req.opt)		kfree(req->af.v4_req.opt);}static inline void syn_flood_warning(struct sk_buff *skb){	static unsigned long warntime;	if (time_after(jiffies, (warntime + HZ * 60))) {		warntime = jiffies;		printk(KERN_INFO		       "possible SYN flooding on port %d. Sending cookies.\n",		       ntohs(skb->h.th->dest));	}}/* * Save and compile IPv4 options into the open_request if needed. */static inline struct ip_options *tcp_v4_save_options(struct sock *sk,						     struct sk_buff *skb){	struct ip_options *opt = &(IPCB(skb)->opt);	struct ip_options *dopt = NULL;	if (opt && opt->optlen) {		int opt_size = optlength(opt);		dopt = kmalloc(opt_size, GFP_ATOMIC);		if (dopt) {			if (ip_options_echo(dopt, skb)) {				kfree(dopt);				dopt = NULL;			}		}	}	return dopt;}/* * Maximum number of SYN_RECV sockets in queue per LISTEN socket. * One SYN_RECV socket costs about 80bytes on a 32bit machine. * It would be better to replace it with a global counter for all sockets * but then some measure against one socket starving all other sockets * would be needed. * * It was 128 by default. Experiments with real servers show, that * it is absolutely not enough even at 100conn/sec. 256 cures most * of problems. This value is adjusted to 128 for very small machines * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb). * Further increasing requires to change hash table size. */int sysctl_max_syn_backlog = 256;struct or_calltable or_ipv4 = {	.family		=	PF_INET,	.rtx_syn_ack	=	tcp_v4_send_synack,	.send_ack	=	tcp_v4_or_send_ack,	.destructor	=	tcp_v4_or_free,	.send_reset	=	tcp_v4_send_reset,};int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb){	struct tcp_opt tp;	struct open_request *req;	__u32 saddr = skb->nh.iph->saddr;	__u32 daddr = skb->nh.iph->daddr;	__u32 isn = TCP_SKB_CB(skb)->when;	struct dst_entry *dst = NULL;#ifdef CONFIG_SYN_COOKIES	int want_cookie = 0;#else#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */#endif	/* Never answer to SYNs send to broadcast or multicast */	if (((struct rtable *)skb->dst)->rt_flags &	    (RTCF_BROADCAST | RTCF_MULTICAST))		goto drop;	/* TW buckets are converted to open requests without	 * limitations, they conserve resources and peer is	 * evidently real one.	 */	if (tcp_synq_is_full(sk) && !isn) {#ifdef CONFIG_SYN_COOKIES		if (sysctl_tcp_syncookies) {			want_cookie = 1;		} else#endif		goto drop;	}	/* Accept backlog is full. If we have already queued enough	 * of warm entries in syn queue, drop request. It is better than	 * clogging syn queue with openreqs with exponentially increasing	 * timeout.	 */	if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)		goto drop;	req = tcp_openreq_alloc();	if (!req)		goto drop;	tcp_clear_options(&tp);	tp.mss_clamp = 536;	tp.user_mss  = tcp_sk(sk)->user_mss;	tcp_parse_options(skb, &tp, 0);	if (want_cookie) {		tcp_clear_options(&tp);		tp.saw_tstamp = 0;	}	if (tp.saw_tstamp && !tp.rcv_tsval) {		/* Some OSes (unknown ones, but I see them on web server, which		 * contains information interesting only for windows'		 * users) do not send their stamp in SYN. It is easy case.		 * We simply do not advertise TS support.		 */		tp.saw_tstamp = 0;		tp.tstamp_ok  = 0;	}	tp.tstamp_ok = tp.saw_tstamp;	tcp_openreq_init(req, &tp, skb);	req->af.v4_req.loc_addr = daddr;	req->af.v4_req.rmt_addr = saddr;	req->af.v4_req.opt = tcp_v4_save_options(sk, skb);	req->class = &or_ipv4;	if (!want_cookie)		TCP_ECN_create_request(req, skb->h.th);	if (want_cookie) {#ifdef CONFIG_SYN_COOKIES		syn_flood_warning(skb);#endif		isn = cookie_v4_init_sequence(sk, skb, &req->mss);	} else if (!isn) {		struct inet_peer *peer = NULL;		/* VJ's idea. We save last timestamp seen		 * from the destination in peer table, when entering		 * state TIME-WAIT, and check against it before		 * accepting new connection request.		 *		 * If "isn" is not zero, this request hit alive		 * timewait bucket, so that all the necessary checks		 * are made in the function processing timewait state.		 */		if (tp.saw_tstamp &&		    sysctl_tcp_tw_recycle &&		    (dst = tcp_v4_route_req(sk, req)) != NULL &&		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&		    peer->v4daddr == saddr) {			if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&			    (s32)(peer->tcp_ts - req->ts_recent) >							TCP_PAWS_WINDOW) {				NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);				dst_release(dst);				goto drop_and_free;			}		}		/* Kill the following clause, if you dislike this way. */		else if (!sysctl_tcp_syncookies &&			 (sysctl_max_syn_backlog - tcp_synq_len(sk) <			  (sysctl_max_syn_backlog >> 2)) &&			 (!peer || !peer->tcp_ts_stamp) &&			 (!dst || !dst_metric(dst, RTAX_RTT))) {			/* Without syncookies last quarter of			 * backlog is filled with destinations,			 * proven to be alive.			 * It means that we continue to communicate			 * to destinations, already remembered			 * to the moment of synflood.			 */			NETDEBUG(if (net_ratelimit()) \					printk(KERN_DEBUG "TCP: drop open "							  "request from %u.%u."							  "%u.%u/%u\n", \					       NIPQUAD(saddr),					       ntohs(skb->h.th->source)));			dst_release(dst);			goto drop_and_free;		}		isn = tcp_v4_init_sequence(sk, skb);	}	req->snt_isn = isn;	if (tcp_v4_send_synack(sk, req, dst))		goto drop_and_free;	if (want_cookie) {	   	tcp_openreq_free(req);	} else {		tcp_v4_synq_add(sk, req);	}	return 0;drop_and_free:	tcp_openreq_free(req);drop:	TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);	return 0;}/* * The three way handshake has completed - we got a valid synack - * now create the new socket. */struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,				  struct open_request *req,				  struct dst_entry *dst){	struct inet_opt *newinet;	struct tcp_opt *newtp;	struct sock *newsk;	if (sk_acceptq_is_full(sk))		goto exit_overflow;	if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL)		goto exit;	newsk = tcp_create_openreq_child(sk, req, skb);	if (!newsk)		goto exit;	newsk->sk_dst_cache = dst;	tcp_v4_setup_caps(newsk, dst);	newtp		      = tcp_sk(newsk);	newinet		      = inet_sk(newsk);	newinet->daddr	      = req->af.v4_req.rmt_addr;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -