ip_vs_core.c
来自「linux 内核源代码」· C语言 代码 · 共 1,153 行 · 第 1/2 页
C
1,153 行
struct ip_vs_protocol *pp; unsigned int offset, ihl, verdict; *related = 1; /* reassemble IP fragments */ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) return NF_STOLEN; } iph = ip_hdr(skb); offset = ihl = iph->ihl * 4; ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); if (ic == NULL) return NF_DROP; IP_VS_DBG(12, "Outgoing ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n", ic->type, ntohs(icmp_id(ic)), NIPQUAD(iph->saddr), NIPQUAD(iph->daddr)); /* * Work through seeing if this is for us. * These checks are supposed to be in an order that means easy * things are checked first to speed up processing.... however * this means that some packets will manage to get a long way * down this stack and then be rejected, but that's life. */ if ((ic->type != ICMP_DEST_UNREACH) && (ic->type != ICMP_SOURCE_QUENCH) && (ic->type != ICMP_TIME_EXCEEDED)) { *related = 0; return NF_ACCEPT; } /* Now find the contained IP header */ offset += sizeof(_icmph); cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); if (cih == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ pp = ip_vs_proto_get(cih->protocol); if (!pp) return NF_ACCEPT; /* Is the embedded protocol header present? */ if (unlikely(cih->frag_off & htons(IP_OFFSET) && pp->dont_defrag)) return NF_ACCEPT; IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMP for"); offset += cih->ihl * 4; /* The embedded headers contain source and dest in reverse order */ cp = pp->conn_out_get(skb, pp, cih, offset, 1); if (!cp) return NF_ACCEPT; verdict = NF_DROP; if (IP_VS_FWD_METHOD(cp) != 0) { IP_VS_ERR("shouldn't reach here, because the box is on the " "half connection in the tun/dr module.\n"); } /* Ensure the checksum is correct */ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { /* Failed checksum! */ IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n", NIPQUAD(iph->saddr)); goto out; } if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) offset += 2 * sizeof(__u16); if (!skb_make_writable(skb, offset)) goto out; ip_vs_nat_icmp(skb, pp, cp, 1); /* do the statistics and put it back */ ip_vs_out_stats(cp, skb); skb->ipvs_property = 1; verdict = NF_ACCEPT; out: __ip_vs_conn_put(cp); return verdict;}static inline int is_tcp_reset(const struct sk_buff *skb){ struct tcphdr _tcph, *th; th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); if (th == NULL) return 0; return th->rst;}/* * It is hooked at the NF_IP_FORWARD chain, used only for VS/NAT. * Check if outgoing packet belongs to the established ip_vs_conn, * rewrite addresses of the packet and send it on its way... */static unsigned intip_vs_out(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)){ struct iphdr *iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; int ihl; EnterFunction(11); if (skb->ipvs_property) return NF_ACCEPT; iph = ip_hdr(skb); if (unlikely(iph->protocol == IPPROTO_ICMP)) { int related, verdict = ip_vs_out_icmp(skb, &related); if (related) return verdict; iph = ip_hdr(skb); } pp = ip_vs_proto_get(iph->protocol); if (unlikely(!pp)) return NF_ACCEPT; /* reassemble IP fragments */ if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) && !pp->dont_defrag)) { if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) return NF_STOLEN; iph = ip_hdr(skb); } ihl = iph->ihl << 2; /* * Check if the packet belongs to an existing entry */ cp = pp->conn_out_get(skb, pp, iph, ihl, 0); if (unlikely(!cp)) { if (sysctl_ip_vs_nat_icmp_send && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP)) { __be16 _ports[2], *pptr; pptr = skb_header_pointer(skb, ihl, sizeof(_ports), _ports); if (pptr == NULL) return NF_ACCEPT; /* Not for me */ if (ip_vs_lookup_real_service(iph->protocol, iph->saddr, pptr[0])) { /* * Notify the real server: there is no * existing entry if it is not RST * packet or not TCP packet. */ if (iph->protocol != IPPROTO_TCP || !is_tcp_reset(skb)) { icmp_send(skb,ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); return NF_DROP; } } } IP_VS_DBG_PKT(12, pp, skb, 0, "packet continues traversal as normal"); return NF_ACCEPT; } IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet"); if (!skb_make_writable(skb, ihl)) goto drop; /* mangle the packet */ if (pp->snat_handler && !pp->snat_handler(skb, pp, cp)) goto drop; ip_hdr(skb)->saddr = cp->vaddr; ip_send_check(ip_hdr(skb)); /* For policy routing, packets originating from this * machine itself may be routed differently to packets * passing through. We want this packet to be routed as * if it came from this machine itself. So re-compute * the routing information. */ if (ip_route_me_harder(skb, RTN_LOCAL) != 0) goto drop; IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); ip_vs_out_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); ip_vs_conn_put(cp); skb->ipvs_property = 1; LeaveFunction(11); return NF_ACCEPT; drop: ip_vs_conn_put(cp); kfree_skb(skb); return NF_STOLEN;}/* * Handle ICMP messages in the outside-to-inside direction (incoming). * Find any that might be relevant, check against existing connections, * forward to the right destination host if relevant. * Currently handles error types - unreachable, quench, ttl exceeded. */static intip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum){ struct iphdr *iph; struct icmphdr _icmph, *ic; struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ struct ip_vs_conn *cp; struct ip_vs_protocol *pp; unsigned int offset, ihl, verdict; *related = 1; /* reassemble IP fragments */ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { if (ip_vs_gather_frags(skb, hooknum == NF_IP_LOCAL_IN ? IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD)) return NF_STOLEN; } iph = ip_hdr(skb); offset = ihl = iph->ihl * 4; ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); if (ic == NULL) return NF_DROP; IP_VS_DBG(12, "Incoming ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n", ic->type, ntohs(icmp_id(ic)), NIPQUAD(iph->saddr), NIPQUAD(iph->daddr)); /* * Work through seeing if this is for us. * These checks are supposed to be in an order that means easy * things are checked first to speed up processing.... however * this means that some packets will manage to get a long way * down this stack and then be rejected, but that's life. */ if ((ic->type != ICMP_DEST_UNREACH) && (ic->type != ICMP_SOURCE_QUENCH) && (ic->type != ICMP_TIME_EXCEEDED)) { *related = 0; return NF_ACCEPT; } /* Now find the contained IP header */ offset += sizeof(_icmph); cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); if (cih == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ pp = ip_vs_proto_get(cih->protocol); if (!pp) return NF_ACCEPT; /* Is the embedded protocol header present? */ if (unlikely(cih->frag_off & htons(IP_OFFSET) && pp->dont_defrag)) return NF_ACCEPT; IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMP for"); offset += cih->ihl * 4; /* The embedded headers contain source and dest in reverse order */ cp = pp->conn_in_get(skb, pp, cih, offset, 1); if (!cp) return NF_ACCEPT; verdict = NF_DROP; /* Ensure the checksum is correct */ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { /* Failed checksum! */ IP_VS_DBG(1, "Incoming ICMP: failed checksum from %d.%d.%d.%d!\n", NIPQUAD(iph->saddr)); goto out; } /* do the statistics and put it back */ ip_vs_in_stats(cp, skb); if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) offset += 2 * sizeof(__u16); verdict = ip_vs_icmp_xmit(skb, cp, pp, offset); /* do not touch skb anymore */ out: __ip_vs_conn_put(cp); return verdict;}/* * Check if it's for virtual services, look it up, * and send it on its way... */static unsigned intip_vs_in(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)){ struct iphdr *iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; int ret, restart; int ihl; /* * Big tappo: only PACKET_HOST (neither loopback nor mcasts) * ... don't know why 1st test DOES NOT include 2nd (?) */ if (unlikely(skb->pkt_type != PACKET_HOST || skb->dev->flags & IFF_LOOPBACK || skb->sk)) { IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n", skb->pkt_type, ip_hdr(skb)->protocol, NIPQUAD(ip_hdr(skb)->daddr)); return NF_ACCEPT; } iph = ip_hdr(skb); if (unlikely(iph->protocol == IPPROTO_ICMP)) { int related, verdict = ip_vs_in_icmp(skb, &related, hooknum); if (related) return verdict; iph = ip_hdr(skb); } /* Protocol supported? */ pp = ip_vs_proto_get(iph->protocol); if (unlikely(!pp)) return NF_ACCEPT; ihl = iph->ihl << 2; /* * Check if the packet belongs to an existing connection entry */ cp = pp->conn_in_get(skb, pp, iph, ihl, 0); if (unlikely(!cp)) { int v; if (!pp->conn_schedule(skb, pp, &v, &cp)) return v; } if (unlikely(!cp)) { /* sorry, all this trouble for a no-hit :) */ IP_VS_DBG_PKT(12, pp, skb, 0, "packet continues traversal as normal"); return NF_ACCEPT; } IP_VS_DBG_PKT(11, pp, skb, 0, "Incoming packet"); /* Check the server status */ if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { /* the destination server is not available */ if (sysctl_ip_vs_expire_nodest_conn) { /* try to expire the connection immediately */ ip_vs_conn_expire_now(cp); } /* don't restart its timer, and silently drop the packet. */ __ip_vs_conn_put(cp); return NF_DROP; } ip_vs_in_stats(cp, skb); restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp); if (cp->packet_xmit) ret = cp->packet_xmit(skb, cp, pp); /* do not touch skb anymore */ else { IP_VS_DBG_RL("warning: packet_xmit is null"); ret = NF_ACCEPT; } /* Increase its packet counter and check if it is needed * to be synchronized * * Sync connection if it is about to close to * encorage the standby servers to update the connections timeout */ atomic_inc(&cp->in_pkts); if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && (((cp->protocol != IPPROTO_TCP || cp->state == IP_VS_TCP_S_ESTABLISHED) && (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1] == sysctl_ip_vs_sync_threshold[0])) || ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && ((cp->state == IP_VS_TCP_S_FIN_WAIT) || (cp->state == IP_VS_TCP_S_CLOSE))))) ip_vs_sync_conn(cp); cp->old_state = cp->state; ip_vs_conn_put(cp); return ret;}/* * It is hooked at the NF_IP_FORWARD chain, in order to catch ICMP * related packets destined for 0.0.0.0/0. * When fwmark-based virtual service is used, such as transparent * cache cluster, TCP packets can be marked and routed to ip_vs_in, * but ICMP destined for 0.0.0.0/0 cannot not be easily marked and * sent to ip_vs_in_icmp. So, catch them at the NF_IP_FORWARD chain * and send them to ip_vs_in_icmp. */static unsigned intip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)){ int r; if (ip_hdr(skb)->protocol != IPPROTO_ICMP) return NF_ACCEPT; return ip_vs_in_icmp(skb, &r, hooknum);}/* After packet filtering, forward packet through VS/DR, VS/TUN, or VS/NAT(change destination), so that filtering rules can be applied to IPVS. */static struct nf_hook_ops ip_vs_in_ops = { .hook = ip_vs_in, .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_IP_LOCAL_IN, .priority = 100,};/* After packet filtering, change source only for VS/NAT */static struct nf_hook_ops ip_vs_out_ops = { .hook = ip_vs_out, .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_IP_FORWARD, .priority = 100,};/* After packet filtering (but before ip_vs_out_icmp), catch icmp destined for 0.0.0.0/0, which is for incoming IPVS connections */static struct nf_hook_ops ip_vs_forward_icmp_ops = { .hook = ip_vs_forward_icmp, .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_IP_FORWARD, .priority = 99,};/* Before the netfilter connection tracking, exit from POST_ROUTING */static struct nf_hook_ops ip_vs_post_routing_ops = { .hook = ip_vs_post_routing, .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_IP_POST_ROUTING, .priority = NF_IP_PRI_NAT_SRC-1,};/* * Initialize IP Virtual Server */static int __init ip_vs_init(void){ int ret; ret = ip_vs_control_init(); if (ret < 0) { IP_VS_ERR("can't setup control.\n"); goto cleanup_nothing; } ip_vs_protocol_init(); ret = ip_vs_app_init(); if (ret < 0) { IP_VS_ERR("can't setup application helper.\n"); goto cleanup_protocol; } ret = ip_vs_conn_init(); if (ret < 0) { IP_VS_ERR("can't setup connection table.\n"); goto cleanup_app; } ret = nf_register_hook(&ip_vs_in_ops); if (ret < 0) { IP_VS_ERR("can't register in hook.\n"); goto cleanup_conn; } ret = nf_register_hook(&ip_vs_out_ops); if (ret < 0) { IP_VS_ERR("can't register out hook.\n"); goto cleanup_inops; } ret = nf_register_hook(&ip_vs_post_routing_ops); if (ret < 0) { IP_VS_ERR("can't register post_routing hook.\n"); goto cleanup_outops; } ret = nf_register_hook(&ip_vs_forward_icmp_ops); if (ret < 0) { IP_VS_ERR("can't register forward_icmp hook.\n"); goto cleanup_postroutingops; } IP_VS_INFO("ipvs loaded.\n"); return ret; cleanup_postroutingops: nf_unregister_hook(&ip_vs_post_routing_ops); cleanup_outops: nf_unregister_hook(&ip_vs_out_ops); cleanup_inops: nf_unregister_hook(&ip_vs_in_ops); cleanup_conn: ip_vs_conn_cleanup(); cleanup_app: ip_vs_app_cleanup(); cleanup_protocol: ip_vs_protocol_cleanup(); ip_vs_control_cleanup(); cleanup_nothing: return ret;}static void __exit ip_vs_cleanup(void){ nf_unregister_hook(&ip_vs_forward_icmp_ops); nf_unregister_hook(&ip_vs_post_routing_ops); nf_unregister_hook(&ip_vs_out_ops); nf_unregister_hook(&ip_vs_in_ops); ip_vs_conn_cleanup(); ip_vs_app_cleanup(); ip_vs_protocol_cleanup(); ip_vs_control_cleanup(); IP_VS_INFO("ipvs unloaded.\n");}module_init(ip_vs_init);module_exit(ip_vs_cleanup);MODULE_LICENSE("GPL");
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?