ip_vs_conn.c

来自「linux-2.4.29操作系统的源码」· C语言 代码 · 共 1,570 行 · 第 1/3 页

C
1,570
字号
	if (new_state != cp->state) {		struct ip_vs_dest *dest = cp->dest;		IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->"			  "%u.%u.%u.%u:%d state: %s->%s cnt:%d\n",			  ip_vs_proto_name(cp->protocol),			  (state_off==VS_STATE_OUTPUT)?"output ":"input ",			  th->syn? 'S' : '.',			  th->fin? 'F' : '.',			  th->ack? 'A' : '.',			  th->rst? 'R' : '.',			  NIPQUAD(cp->daddr), ntohs(cp->dport),			  NIPQUAD(cp->caddr), ntohs(cp->cport),			  ip_vs_state_name(cp->state),			  ip_vs_state_name(new_state),			  atomic_read(&cp->refcnt));		if (dest) {			if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&			    (new_state != IP_VS_S_ESTABLISHED)) {				atomic_dec(&dest->activeconns);				atomic_inc(&dest->inactconns);				cp->flags |= IP_VS_CONN_F_INACTIVE;			} else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&				   (new_state == IP_VS_S_ESTABLISHED)) {				atomic_inc(&dest->activeconns);				atomic_dec(&dest->inactconns);				cp->flags &= ~IP_VS_CONN_F_INACTIVE;			}		}	}	return vs_set_state_timeout(cp, new_state);}/* *	Handle state transitions */int ip_vs_set_state(struct ip_vs_conn *cp,		    int state_off, struct iphdr *iph, void *tp){	int ret;	spin_lock(&cp->lock);	switch (iph->protocol) {	case IPPROTO_TCP:		ret = vs_tcp_state(cp, state_off, tp);		break;	case IPPROTO_UDP:		ret = vs_set_state_timeout(cp, IP_VS_S_UDP);		break;	case IPPROTO_ICMP:		ret = vs_set_state_timeout(cp, IP_VS_S_ICMP);		break;	default:		ret = -1;	}	spin_unlock(&cp->lock);	return ret;}/* *	Set LISTEN timeout. (ip_vs_conn_put will setup timer) */int ip_vs_conn_listen(struct ip_vs_conn *cp){	vs_set_state_timeout(cp, IP_VS_S_LISTEN);	return cp->timeout;}/* *      Bypass transmitter *      Let packets bypass the destination when the destination is not *      available, it may be only used in transparent cache cluster. */static int ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp){	struct rtable *rt;			/* Route to the other host */	struct iphdr  *iph = skb->nh.iph;	u8     tos = iph->tos;	int    mtu;	EnterFunction(10);	if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(tos), 0)) {		IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, "			     "dest: %u.%u.%u.%u\n", NIPQUAD(iph->daddr));		goto tx_error_icmp;	}	/* MTU checking */	mtu = rt->u.dst.pmtu;	if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {		ip_rt_put(rt);		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));		IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n");		goto tx_error;	}	/* update checksum because skb might be defragmented */	ip_send_check(iph);	if (unlikely(skb_headroom(skb) < rt->u.dst.dev->hard_header_len)) {		if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) {			ip_rt_put(rt);			IP_VS_ERR_RL("ip_vs_bypass_xmit(): no memory\n");			goto tx_error;		}	}	/* drop old route */	dst_release(skb->dst);	skb->dst = &rt->u.dst;#ifdef CONFIG_NETFILTER_DEBUG	skb->nf_debug = 1 << NF_IP_LOCAL_OUT;#endif /* CONFIG_NETFILTER_DEBUG */	skb->nfcache |= NFC_IPVS_PROPERTY;	ip_send(skb);	LeaveFunction(10);	return NF_STOLEN;  tx_error_icmp:	dst_link_failure(skb);  tx_error:	kfree_skb(skb);	return NF_STOLEN;}/* *      NULL transmitter (do nothing except return NF_ACCEPT) */static int ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp){	return NF_ACCEPT;}/* *      NAT transmitter (only for outside-to-inside nat forwarding) */static int ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp){	struct rtable *rt;		/* Route to the other host */	struct iphdr  *iph;	union ip_vs_tphdr h;	int ihl;	unsigned short size;	int mtu;	EnterFunction(10);	/*	 * If it has ip_vs_app helper, the helper may change the payload,	 * so it needs full checksum checking and checksum calculation.	 * If not, only the header (such as IP address and port number)	 * will be changed, so it is fast to do incremental checksum update,	 * and let the destination host  do final checksum checking.	 */	if (cp->app && skb_is_nonlinear(skb)	    && skb_linearize(skb, GFP_ATOMIC) != 0)		return NF_DROP;	iph = skb->nh.iph;	ihl = iph->ihl << 2;	h.raw = (char*) iph + ihl;	size = ntohs(iph->tot_len) - ihl;	/* do TCP/UDP checksum checking if it has application helper */	if (cp->app && (iph->protocol != IPPROTO_UDP || h.uh->check != 0)) {		switch (skb->ip_summed) {		case CHECKSUM_NONE:			skb->csum = csum_partial(h.raw, size, 0);		case CHECKSUM_HW:			if (csum_tcpudp_magic(iph->saddr, iph->daddr, size,					      iph->protocol, skb->csum)) {				IP_VS_DBG_RL("Incoming failed %s checksum "					     "from %d.%d.%d.%d (size=%d)!\n",					     ip_vs_proto_name(iph->protocol),					     NIPQUAD(iph->saddr),					     size);				goto tx_error;			}			break;		default:			/* CHECKSUM_UNNECESSARY */			break;		}	}	/*	 *  Check if it is no_cport connection ...	 */	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {		if (ip_vs_conn_unhash(cp)) {			spin_lock(&cp->lock);			if (cp->flags & IP_VS_CONN_F_NO_CPORT) {				atomic_dec(&ip_vs_conn_no_cport_cnt);				cp->flags &= ~IP_VS_CONN_F_NO_CPORT;				cp->cport = h.portp[0];				IP_VS_DBG(10, "filled cport=%d\n", ntohs(cp->dport));			}			spin_unlock(&cp->lock);			/* hash on new dport */			ip_vs_conn_hash(cp);		}	}	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))		goto tx_error_icmp;	/* MTU checking */	mtu = rt->u.dst.pmtu;	if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {		ip_rt_put(rt);		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));		IP_VS_DBG_RL("ip_vs_nat_xmit(): frag needed\n");		goto tx_error;	}	/* drop old route */	dst_release(skb->dst);	skb->dst = &rt->u.dst;	/* copy-on-write the packet before mangling it */	if (ip_vs_skb_cow(skb, rt->u.dst.dev->hard_header_len, &iph, &h.raw))		return NF_DROP;	/* mangle the packet */	iph->daddr = cp->daddr;	h.portp[1] = cp->dport;	/*	 *	Attempt ip_vs_app call.	 *	will fix ip_vs_conn and iph ack_seq stuff	 */	if (ip_vs_app_pkt_in(cp, skb) != 0) {		/* skb data has probably changed, update pointers */		iph = skb->nh.iph;		h.raw = (char*) iph + ihl;		size = skb->len - ihl;	}	/*	 *	Adjust TCP/UDP checksums	 */	if (!cp->app && (iph->protocol != IPPROTO_UDP || h.uh->check != 0)) {		/* Only port and addr are changed, do fast csum update */		ip_vs_fast_check_update(&h, cp->vaddr, cp->daddr,					cp->vport, cp->dport, iph->protocol);		if (skb->ip_summed == CHECKSUM_HW)			skb->ip_summed = CHECKSUM_NONE;	} else {		/* full checksum calculation */		switch (iph->protocol) {		case IPPROTO_TCP:			h.th->check = 0;			h.th->check = csum_tcpudp_magic(iph->saddr, iph->daddr,							size, iph->protocol,							csum_partial(h.raw, size, 0));			break;		case IPPROTO_UDP:			h.uh->check = 0;			h.uh->check = csum_tcpudp_magic(iph->saddr, iph->daddr,							size, iph->protocol,							csum_partial(h.raw, size, 0));			if (h.uh->check == 0)				h.uh->check = 0xFFFF;			break;		}		skb->ip_summed = CHECKSUM_UNNECESSARY;	}	ip_send_check(iph);	IP_VS_DBG(10, "NAT to %u.%u.%u.%u:%d\n",		  NIPQUAD(iph->daddr), ntohs(h.portp[1]));	/* FIXME: when application helper enlarges the packet and the length	   is larger than the MTU of outgoing device, there will be still	   MTU problem. */#ifdef CONFIG_NETFILTER_DEBUG	skb->nf_debug = 1 << NF_IP_LOCAL_OUT;#endif /* CONFIG_NETFILTER_DEBUG */	skb->nfcache |= NFC_IPVS_PROPERTY;	ip_send(skb);	LeaveFunction(10);	return NF_STOLEN;  tx_error_icmp:	dst_link_failure(skb);  tx_error:	kfree_skb(skb);	return NF_STOLEN;}/* *   IP Tunneling transmitter * *   This function encapsulates the packet in a new IP packet, its *   destination will be set to cp->daddr. Most code of this function *   is taken from ipip.c. * *   It is used in VS/TUN cluster. The load balancer selects a real *   server from a cluster based on a scheduling algorithm, *   encapsulates the request packet and forwards it to the selected *   server. For example, all real servers are configured with *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives *   the encapsulated packet, it will decapsulate the packet, processe *   the request and return the response packets directly to the client *   without passing the load balancer. This can greatly increase the *   scalability of virtual server. */static int ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp){	struct rtable *rt;			/* Route to the other host */	struct net_device *tdev;		/* Device to other host */	struct iphdr  *old_iph = skb->nh.iph;	u8     tos = old_iph->tos;	u16    df = old_iph->frag_off;	struct iphdr  *iph;			/* Our new IP header */	int    max_headroom;			/* The extra header space needed */	int    mtu;	EnterFunction(10);	if (skb->protocol != __constant_htons(ETH_P_IP)) {		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, "			     "ETH_P_IP: %d, skb protocol: %d\n",			     __constant_htons(ETH_P_IP), skb->protocol);		goto tx_error;	}	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))		goto tx_error_icmp;	tdev = rt->u.dst.dev;	mtu = rt->u.dst.pmtu - sizeof(struct iphdr);	if (mtu < 68) {		ip_rt_put(rt);		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): mtu less than 68\n");		goto tx_error;	}	if (skb->dst && mtu < skb->dst->pmtu)		skb->dst->pmtu = mtu;	df |= (old_iph->frag_off&__constant_htons(IP_DF));	if ((old_iph->frag_off&__constant_htons(IP_DF))	    && mtu < ntohs(old_iph->tot_len)) {		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));		ip_rt_put(rt);		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): frag needed\n");		goto tx_error;	}	/* update checksum because skb might be defragmented */	ip_send_check(old_iph);	/*	 * Okay, now see if we can stuff it in the buffer as-is.	 */	max_headroom = (((tdev->hard_header_len+15)&~15)+sizeof(struct iphdr));	if (skb_headroom(skb) < max_headroom	    || skb_cloned(skb) || skb_shared(skb)) {		struct sk_buff *new_skb =			skb_realloc_headroom(skb, max_headroom);		if (!new_skb) {			ip_rt_put(rt);			IP_VS_ERR_RL("ip_vs_tunnel_xmit(): no memory\n");			return NF_DROP;		}		kfree_skb(skb);		skb = new_skb;		old_iph = skb->nh.iph;	}	skb->h.raw = skb->nh.raw;	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));	/* drop old route */	dst_release(skb->dst);	skb->dst = &rt->u.dst;	/*	 *	Push down and install the IPIP header.	 */	iph			=	skb->nh.iph;	iph->version		=	4;	iph->ihl		=	sizeof(struct iphdr)>>2;	iph->frag_off		=	df;	iph->protocol		=	IPPROTO_IPIP;	iph->tos		=	tos;	iph->daddr		=	rt->rt_dst;	iph->saddr		=	rt->rt_src;	iph->ttl		=	old_iph->ttl;	iph->tot_len		=	htons(skb->len);	ip_select_ident(iph, &rt->u.dst, NULL);	ip_send_check(iph);	skb->ip_summed = CHECKSUM_NONE;#ifdef CONFIG_NETFILTER_DEBUG	skb->nf_debug = 1 << NF_IP_LOCAL_OUT;#endif /* CONFIG_NETFILTER_DEBUG */	skb->nfcache |= NFC_IPVS_PROPERTY;	ip_send(skb);	LeaveFunction(10);	return NF_STOLEN;  tx_error_icmp:	dst_link_failure(skb);  tx_error:	kfree_skb(skb);	return NF_STOLEN;}/* *      Direct Routing transmitter */static int ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp){	struct rtable *rt;			/* Route to the other host */	struct iphdr  *iph = skb->nh.iph;	int    mtu;	EnterFunction(10);	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))		goto tx_error_icmp;	/* MTU checking */	mtu = rt->u.dst.pmtu;	if ((iph->frag_off&__constant_htons(IP_DF)) && skb->len > mtu) {		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));		ip_rt_put(rt);		IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n");		goto tx_error;	}	/* update checksum because skb might be defragmented */	ip_send_check(iph);	if (unlikely(skb_headroom(skb) < rt->u.dst.dev->hard_header_len)) {		if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) {			ip_rt_put(rt);			IP_VS_ERR_RL("ip_vs_dr_xmit(): no memory\n");			goto tx_error;		}	}	/* drop old route */	dst_release(skb->dst);	skb->dst = &rt->u.dst;#ifdef CONFIG_NETFILTER_DEBUG	skb->nf_debug = 1 << NF_IP_LOCAL_OUT;#endif /* CONFIG_NETFILTER_DEBUG */	skb->nfcache |= NFC_IPVS_PROPERTY;	ip_send(skb);#if 0000	NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,		do_ip_send);#endif	LeaveFunction(10);	return NF_STOLEN;  tx_error_icmp:	dst_link_failure(skb);  tx_error:	kfree_skb(skb);	return NF_STOLEN;}/* *  Bind a connection entry with the corresponding packet_xmit. *  Called by ip_vs_conn_new. */static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp){	switch (IP_VS_FWD_METHOD(cp)) {	case IP_VS_CONN_F_MASQ:		cp->packet_xmit = ip_vs_nat_xmit;		break;	case IP_VS_CONN_F_TUNNEL:		cp->packet_xmit = ip_vs_tunnel_xmit;		break;	case IP_VS_CONN_F_DROUTE:		cp->packet_xmit = ip_vs_dr_xmit;		break;	case IP_VS_CONN_F_LOCALNODE:		cp->packet_xmit = ip_vs_null_xmit;		break;	case IP_VS_CONN_F_BYPASS:		cp->packet_xmit = ip_vs_bypass_xmit;		break;	}}/* *  Bind a connection entry with a virtual service destination *  Called just after a new connection entry is created.

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?