ip_vs_core.c

来自「linux 内核源代码」· C语言 代码 · 共 1,153 行 · 第 1/2 页

C
1,153
字号
	struct ip_vs_protocol *pp;	unsigned int offset, ihl, verdict;	*related = 1;	/* reassemble IP fragments */	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {		if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))			return NF_STOLEN;	}	iph = ip_hdr(skb);	offset = ihl = iph->ihl * 4;	ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);	if (ic == NULL)		return NF_DROP;	IP_VS_DBG(12, "Outgoing ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n",		  ic->type, ntohs(icmp_id(ic)),		  NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));	/*	 * Work through seeing if this is for us.	 * These checks are supposed to be in an order that means easy	 * things are checked first to speed up processing.... however	 * this means that some packets will manage to get a long way	 * down this stack and then be rejected, but that's life.	 */	if ((ic->type != ICMP_DEST_UNREACH) &&	    (ic->type != ICMP_SOURCE_QUENCH) &&	    (ic->type != ICMP_TIME_EXCEEDED)) {		*related = 0;		return NF_ACCEPT;	}	/* Now find the contained IP header */	offset += sizeof(_icmph);	cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);	if (cih == NULL)		return NF_ACCEPT; /* The packet looks wrong, ignore */	pp = ip_vs_proto_get(cih->protocol);	if (!pp)		return NF_ACCEPT;	/* Is the embedded protocol header present? */	if (unlikely(cih->frag_off & htons(IP_OFFSET) &&		     pp->dont_defrag))		return NF_ACCEPT;	IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMP for");	offset += cih->ihl * 4;	/* The embedded headers contain source and dest in reverse order */	cp = pp->conn_out_get(skb, pp, cih, offset, 1);	if (!cp)		return NF_ACCEPT;	verdict = NF_DROP;	if (IP_VS_FWD_METHOD(cp) != 0) {		IP_VS_ERR("shouldn't reach here, because the box is on the "			  "half connection in the tun/dr module.\n");	}	/* Ensure the checksum is correct */	if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {		/* Failed checksum! */		IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n",			  NIPQUAD(iph->saddr));		goto out;	}	if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)		offset += 2 * sizeof(__u16);	if (!skb_make_writable(skb, offset))		goto out;	ip_vs_nat_icmp(skb, pp, cp, 1);	/* do the statistics and put it back */	ip_vs_out_stats(cp, skb);	skb->ipvs_property = 1;	verdict = NF_ACCEPT;  out:	__ip_vs_conn_put(cp);	return verdict;}static inline int is_tcp_reset(const struct sk_buff *skb){	struct tcphdr _tcph, *th;	th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);	if (th == NULL)		return 0;	return th->rst;}/* *	It is hooked at the NF_IP_FORWARD chain, used only for VS/NAT. *	Check if outgoing packet belongs to the established ip_vs_conn, *      rewrite addresses of the packet and send it on its way... */static unsigned intip_vs_out(unsigned int hooknum, struct sk_buff *skb,	  const struct net_device *in, const struct net_device *out,	  int (*okfn)(struct sk_buff *)){	struct iphdr	*iph;	struct ip_vs_protocol *pp;	struct ip_vs_conn *cp;	int ihl;	EnterFunction(11);	if (skb->ipvs_property)		return NF_ACCEPT;	iph = ip_hdr(skb);	if (unlikely(iph->protocol == IPPROTO_ICMP)) {		int related, verdict = ip_vs_out_icmp(skb, &related);		if (related)			return verdict;		iph = ip_hdr(skb);	}	pp = ip_vs_proto_get(iph->protocol);	if (unlikely(!pp))		return NF_ACCEPT;	/* reassemble IP fragments */	if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) &&		     !pp->dont_defrag)) {		if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))			return NF_STOLEN;		iph = ip_hdr(skb);	}	ihl = iph->ihl << 2;	/*	 * Check if the packet belongs to an existing entry	 */	cp = pp->conn_out_get(skb, pp, iph, ihl, 0);	if (unlikely(!cp)) {		if (sysctl_ip_vs_nat_icmp_send &&		    (pp->protocol == IPPROTO_TCP ||		     pp->protocol == IPPROTO_UDP)) {			__be16 _ports[2], *pptr;			pptr = skb_header_pointer(skb, ihl,						  sizeof(_ports), _ports);			if (pptr == NULL)				return NF_ACCEPT;	/* Not for me */			if (ip_vs_lookup_real_service(iph->protocol,						      iph->saddr, pptr[0])) {				/*				 * Notify the real server: there is no				 * existing entry if it is not RST				 * packet or not TCP packet.				 */				if (iph->protocol != IPPROTO_TCP				    || !is_tcp_reset(skb)) {					icmp_send(skb,ICMP_DEST_UNREACH,						  ICMP_PORT_UNREACH, 0);					return NF_DROP;				}			}		}		IP_VS_DBG_PKT(12, pp, skb, 0,			      "packet continues traversal as normal");		return NF_ACCEPT;	}	IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");	if (!skb_make_writable(skb, ihl))		goto drop;	/* mangle the packet */	if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))		goto drop;	ip_hdr(skb)->saddr = cp->vaddr;	ip_send_check(ip_hdr(skb));	/* For policy routing, packets originating from this	 * machine itself may be routed differently to packets	 * passing through.  We want this packet to be routed as	 * if it came from this machine itself.  So re-compute	 * the routing information.	 */	if (ip_route_me_harder(skb, RTN_LOCAL) != 0)		goto drop;	IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");	ip_vs_out_stats(cp, skb);	ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);	ip_vs_conn_put(cp);	skb->ipvs_property = 1;	LeaveFunction(11);	return NF_ACCEPT;  drop:	ip_vs_conn_put(cp);	kfree_skb(skb);	return NF_STOLEN;}/* *	Handle ICMP messages in the outside-to-inside direction (incoming). *	Find any that might be relevant, check against existing connections, *	forward to the right destination host if relevant. *	Currently handles error types - unreachable, quench, ttl exceeded. */static intip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum){	struct iphdr *iph;	struct icmphdr	_icmph, *ic;	struct iphdr	_ciph, *cih;	/* The ip header contained within the ICMP */	struct ip_vs_conn *cp;	struct ip_vs_protocol *pp;	unsigned int offset, ihl, verdict;	*related = 1;	/* reassemble IP fragments */	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {		if (ip_vs_gather_frags(skb, hooknum == NF_IP_LOCAL_IN ?					    IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD))			return NF_STOLEN;	}	iph = ip_hdr(skb);	offset = ihl = iph->ihl * 4;	ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);	if (ic == NULL)		return NF_DROP;	IP_VS_DBG(12, "Incoming ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n",		  ic->type, ntohs(icmp_id(ic)),		  NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));	/*	 * Work through seeing if this is for us.	 * These checks are supposed to be in an order that means easy	 * things are checked first to speed up processing.... however	 * this means that some packets will manage to get a long way	 * down this stack and then be rejected, but that's life.	 */	if ((ic->type != ICMP_DEST_UNREACH) &&	    (ic->type != ICMP_SOURCE_QUENCH) &&	    (ic->type != ICMP_TIME_EXCEEDED)) {		*related = 0;		return NF_ACCEPT;	}	/* Now find the contained IP header */	offset += sizeof(_icmph);	cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);	if (cih == NULL)		return NF_ACCEPT; /* The packet looks wrong, ignore */	pp = ip_vs_proto_get(cih->protocol);	if (!pp)		return NF_ACCEPT;	/* Is the embedded protocol header present? */	if (unlikely(cih->frag_off & htons(IP_OFFSET) &&		     pp->dont_defrag))		return NF_ACCEPT;	IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMP for");	offset += cih->ihl * 4;	/* The embedded headers contain source and dest in reverse order */	cp = pp->conn_in_get(skb, pp, cih, offset, 1);	if (!cp)		return NF_ACCEPT;	verdict = NF_DROP;	/* Ensure the checksum is correct */	if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {		/* Failed checksum! */		IP_VS_DBG(1, "Incoming ICMP: failed checksum from %d.%d.%d.%d!\n",			  NIPQUAD(iph->saddr));		goto out;	}	/* do the statistics and put it back */	ip_vs_in_stats(cp, skb);	if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)		offset += 2 * sizeof(__u16);	verdict = ip_vs_icmp_xmit(skb, cp, pp, offset);	/* do not touch skb anymore */  out:	__ip_vs_conn_put(cp);	return verdict;}/* *	Check if it's for virtual services, look it up, *	and send it on its way... */static unsigned intip_vs_in(unsigned int hooknum, struct sk_buff *skb,	 const struct net_device *in, const struct net_device *out,	 int (*okfn)(struct sk_buff *)){	struct iphdr	*iph;	struct ip_vs_protocol *pp;	struct ip_vs_conn *cp;	int ret, restart;	int ihl;	/*	 *	Big tappo: only PACKET_HOST (neither loopback nor mcasts)	 *	... don't know why 1st test DOES NOT include 2nd (?)	 */	if (unlikely(skb->pkt_type != PACKET_HOST		     || skb->dev->flags & IFF_LOOPBACK || skb->sk)) {		IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",			  skb->pkt_type,			  ip_hdr(skb)->protocol,			  NIPQUAD(ip_hdr(skb)->daddr));		return NF_ACCEPT;	}	iph = ip_hdr(skb);	if (unlikely(iph->protocol == IPPROTO_ICMP)) {		int related, verdict = ip_vs_in_icmp(skb, &related, hooknum);		if (related)			return verdict;		iph = ip_hdr(skb);	}	/* Protocol supported? */	pp = ip_vs_proto_get(iph->protocol);	if (unlikely(!pp))		return NF_ACCEPT;	ihl = iph->ihl << 2;	/*	 * Check if the packet belongs to an existing connection entry	 */	cp = pp->conn_in_get(skb, pp, iph, ihl, 0);	if (unlikely(!cp)) {		int v;		if (!pp->conn_schedule(skb, pp, &v, &cp))			return v;	}	if (unlikely(!cp)) {		/* sorry, all this trouble for a no-hit :) */		IP_VS_DBG_PKT(12, pp, skb, 0,			      "packet continues traversal as normal");		return NF_ACCEPT;	}	IP_VS_DBG_PKT(11, pp, skb, 0, "Incoming packet");	/* Check the server status */	if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {		/* the destination server is not available */		if (sysctl_ip_vs_expire_nodest_conn) {			/* try to expire the connection immediately */			ip_vs_conn_expire_now(cp);		}		/* don't restart its timer, and silently		   drop the packet. */		__ip_vs_conn_put(cp);		return NF_DROP;	}	ip_vs_in_stats(cp, skb);	restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);	if (cp->packet_xmit)		ret = cp->packet_xmit(skb, cp, pp);		/* do not touch skb anymore */	else {		IP_VS_DBG_RL("warning: packet_xmit is null");		ret = NF_ACCEPT;	}	/* Increase its packet counter and check if it is needed	 * to be synchronized	 *	 * Sync connection if it is about to close to	 * encorage the standby servers to update the connections timeout	 */	atomic_inc(&cp->in_pkts);	if ((ip_vs_sync_state & IP_VS_STATE_MASTER) &&	    (((cp->protocol != IPPROTO_TCP ||	       cp->state == IP_VS_TCP_S_ESTABLISHED) &&	      (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1]	       == sysctl_ip_vs_sync_threshold[0])) ||	     ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&	      ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||	       (cp->state == IP_VS_TCP_S_CLOSE)))))		ip_vs_sync_conn(cp);	cp->old_state = cp->state;	ip_vs_conn_put(cp);	return ret;}/* *	It is hooked at the NF_IP_FORWARD chain, in order to catch ICMP *      related packets destined for 0.0.0.0/0. *      When fwmark-based virtual service is used, such as transparent *      cache cluster, TCP packets can be marked and routed to ip_vs_in, *      but ICMP destined for 0.0.0.0/0 cannot not be easily marked and *      sent to ip_vs_in_icmp. So, catch them at the NF_IP_FORWARD chain *      and send them to ip_vs_in_icmp. */static unsigned intip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,		   const struct net_device *in, const struct net_device *out,		   int (*okfn)(struct sk_buff *)){	int r;	if (ip_hdr(skb)->protocol != IPPROTO_ICMP)		return NF_ACCEPT;	return ip_vs_in_icmp(skb, &r, hooknum);}/* After packet filtering, forward packet through VS/DR, VS/TUN,   or VS/NAT(change destination), so that filtering rules can be   applied to IPVS. */static struct nf_hook_ops ip_vs_in_ops = {	.hook		= ip_vs_in,	.owner		= THIS_MODULE,	.pf		= PF_INET,	.hooknum        = NF_IP_LOCAL_IN,	.priority       = 100,};/* After packet filtering, change source only for VS/NAT */static struct nf_hook_ops ip_vs_out_ops = {	.hook		= ip_vs_out,	.owner		= THIS_MODULE,	.pf		= PF_INET,	.hooknum        = NF_IP_FORWARD,	.priority       = 100,};/* After packet filtering (but before ip_vs_out_icmp), catch icmp   destined for 0.0.0.0/0, which is for incoming IPVS connections */static struct nf_hook_ops ip_vs_forward_icmp_ops = {	.hook		= ip_vs_forward_icmp,	.owner		= THIS_MODULE,	.pf		= PF_INET,	.hooknum        = NF_IP_FORWARD,	.priority       = 99,};/* Before the netfilter connection tracking, exit from POST_ROUTING */static struct nf_hook_ops ip_vs_post_routing_ops = {	.hook		= ip_vs_post_routing,	.owner		= THIS_MODULE,	.pf		= PF_INET,	.hooknum        = NF_IP_POST_ROUTING,	.priority       = NF_IP_PRI_NAT_SRC-1,};/* *	Initialize IP Virtual Server */static int __init ip_vs_init(void){	int ret;	ret = ip_vs_control_init();	if (ret < 0) {		IP_VS_ERR("can't setup control.\n");		goto cleanup_nothing;	}	ip_vs_protocol_init();	ret = ip_vs_app_init();	if (ret < 0) {		IP_VS_ERR("can't setup application helper.\n");		goto cleanup_protocol;	}	ret = ip_vs_conn_init();	if (ret < 0) {		IP_VS_ERR("can't setup connection table.\n");		goto cleanup_app;	}	ret = nf_register_hook(&ip_vs_in_ops);	if (ret < 0) {		IP_VS_ERR("can't register in hook.\n");		goto cleanup_conn;	}	ret = nf_register_hook(&ip_vs_out_ops);	if (ret < 0) {		IP_VS_ERR("can't register out hook.\n");		goto cleanup_inops;	}	ret = nf_register_hook(&ip_vs_post_routing_ops);	if (ret < 0) {		IP_VS_ERR("can't register post_routing hook.\n");		goto cleanup_outops;	}	ret = nf_register_hook(&ip_vs_forward_icmp_ops);	if (ret < 0) {		IP_VS_ERR("can't register forward_icmp hook.\n");		goto cleanup_postroutingops;	}	IP_VS_INFO("ipvs loaded.\n");	return ret;  cleanup_postroutingops:	nf_unregister_hook(&ip_vs_post_routing_ops);  cleanup_outops:	nf_unregister_hook(&ip_vs_out_ops);  cleanup_inops:	nf_unregister_hook(&ip_vs_in_ops);  cleanup_conn:	ip_vs_conn_cleanup();  cleanup_app:	ip_vs_app_cleanup();  cleanup_protocol:	ip_vs_protocol_cleanup();	ip_vs_control_cleanup();  cleanup_nothing:	return ret;}static void __exit ip_vs_cleanup(void){	nf_unregister_hook(&ip_vs_forward_icmp_ops);	nf_unregister_hook(&ip_vs_post_routing_ops);	nf_unregister_hook(&ip_vs_out_ops);	nf_unregister_hook(&ip_vs_in_ops);	ip_vs_conn_cleanup();	ip_vs_app_cleanup();	ip_vs_protocol_cleanup();	ip_vs_control_cleanup();	IP_VS_INFO("ipvs unloaded.\n");}module_init(ip_vs_init);module_exit(ip_vs_cleanup);MODULE_LICENSE("GPL");

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?