ip_vs_conn.c
来自「linux-2.4.29操作系统的源码」· C语言 代码 · 共 1,570 行 · 第 1/3 页
C
1,570 行
if (new_state != cp->state) { struct ip_vs_dest *dest = cp->dest; IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->" "%u.%u.%u.%u:%d state: %s->%s cnt:%d\n", ip_vs_proto_name(cp->protocol), (state_off==VS_STATE_OUTPUT)?"output ":"input ", th->syn? 'S' : '.', th->fin? 'F' : '.', th->ack? 'A' : '.', th->rst? 'R' : '.', NIPQUAD(cp->daddr), ntohs(cp->dport), NIPQUAD(cp->caddr), ntohs(cp->cport), ip_vs_state_name(cp->state), ip_vs_state_name(new_state), atomic_read(&cp->refcnt)); if (dest) { if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && (new_state != IP_VS_S_ESTABLISHED)) { atomic_dec(&dest->activeconns); atomic_inc(&dest->inactconns); cp->flags |= IP_VS_CONN_F_INACTIVE; } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) && (new_state == IP_VS_S_ESTABLISHED)) { atomic_inc(&dest->activeconns); atomic_dec(&dest->inactconns); cp->flags &= ~IP_VS_CONN_F_INACTIVE; } } } return vs_set_state_timeout(cp, new_state);}/* * Handle state transitions */int ip_vs_set_state(struct ip_vs_conn *cp, int state_off, struct iphdr *iph, void *tp){ int ret; spin_lock(&cp->lock); switch (iph->protocol) { case IPPROTO_TCP: ret = vs_tcp_state(cp, state_off, tp); break; case IPPROTO_UDP: ret = vs_set_state_timeout(cp, IP_VS_S_UDP); break; case IPPROTO_ICMP: ret = vs_set_state_timeout(cp, IP_VS_S_ICMP); break; default: ret = -1; } spin_unlock(&cp->lock); return ret;}/* * Set LISTEN timeout. (ip_vs_conn_put will setup timer) */int ip_vs_conn_listen(struct ip_vs_conn *cp){ vs_set_state_timeout(cp, IP_VS_S_LISTEN); return cp->timeout;}/* * Bypass transmitter * Let packets bypass the destination when the destination is not * available, it may be only used in transparent cache cluster. */static int ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp){ struct rtable *rt; /* Route to the other host */ struct iphdr *iph = skb->nh.iph; u8 tos = iph->tos; int mtu; EnterFunction(10); if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(tos), 0)) { IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, " "dest: %u.%u.%u.%u\n", NIPQUAD(iph->daddr)); goto tx_error_icmp; } /* MTU checking */ mtu = rt->u.dst.pmtu; if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) { ip_rt_put(rt); icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n"); goto tx_error; } /* update checksum because skb might be defragmented */ ip_send_check(iph); if (unlikely(skb_headroom(skb) < rt->u.dst.dev->hard_header_len)) { if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) { ip_rt_put(rt); IP_VS_ERR_RL("ip_vs_bypass_xmit(): no memory\n"); goto tx_error; } } /* drop old route */ dst_release(skb->dst); skb->dst = &rt->u.dst;#ifdef CONFIG_NETFILTER_DEBUG skb->nf_debug = 1 << NF_IP_LOCAL_OUT;#endif /* CONFIG_NETFILTER_DEBUG */ skb->nfcache |= NFC_IPVS_PROPERTY; ip_send(skb); LeaveFunction(10); return NF_STOLEN; tx_error_icmp: dst_link_failure(skb); tx_error: kfree_skb(skb); return NF_STOLEN;}/* * NULL transmitter (do nothing except return NF_ACCEPT) */static int ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp){ return NF_ACCEPT;}/* * NAT transmitter (only for outside-to-inside nat forwarding) */static int ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp){ struct rtable *rt; /* Route to the other host */ struct iphdr *iph; union ip_vs_tphdr h; int ihl; unsigned short size; int mtu; EnterFunction(10); /* * If it has ip_vs_app helper, the helper may change the payload, * so it needs full checksum checking and checksum calculation. * If not, only the header (such as IP address and port number) * will be changed, so it is fast to do incremental checksum update, * and let the destination host do final checksum checking. */ if (cp->app && skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) return NF_DROP; iph = skb->nh.iph; ihl = iph->ihl << 2; h.raw = (char*) iph + ihl; size = ntohs(iph->tot_len) - ihl; /* do TCP/UDP checksum checking if it has application helper */ if (cp->app && (iph->protocol != IPPROTO_UDP || h.uh->check != 0)) { switch (skb->ip_summed) { case CHECKSUM_NONE: skb->csum = csum_partial(h.raw, size, 0); case CHECKSUM_HW: if (csum_tcpudp_magic(iph->saddr, iph->daddr, size, iph->protocol, skb->csum)) { IP_VS_DBG_RL("Incoming failed %s checksum " "from %d.%d.%d.%d (size=%d)!\n", ip_vs_proto_name(iph->protocol), NIPQUAD(iph->saddr), size); goto tx_error; } break; default: /* CHECKSUM_UNNECESSARY */ break; } } /* * Check if it is no_cport connection ... */ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { if (ip_vs_conn_unhash(cp)) { spin_lock(&cp->lock); if (cp->flags & IP_VS_CONN_F_NO_CPORT) { atomic_dec(&ip_vs_conn_no_cport_cnt); cp->flags &= ~IP_VS_CONN_F_NO_CPORT; cp->cport = h.portp[0]; IP_VS_DBG(10, "filled cport=%d\n", ntohs(cp->dport)); } spin_unlock(&cp->lock); /* hash on new dport */ ip_vs_conn_hash(cp); } } if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos)))) goto tx_error_icmp; /* MTU checking */ mtu = rt->u.dst.pmtu; if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) { ip_rt_put(rt); icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL("ip_vs_nat_xmit(): frag needed\n"); goto tx_error; } /* drop old route */ dst_release(skb->dst); skb->dst = &rt->u.dst; /* copy-on-write the packet before mangling it */ if (ip_vs_skb_cow(skb, rt->u.dst.dev->hard_header_len, &iph, &h.raw)) return NF_DROP; /* mangle the packet */ iph->daddr = cp->daddr; h.portp[1] = cp->dport; /* * Attempt ip_vs_app call. * will fix ip_vs_conn and iph ack_seq stuff */ if (ip_vs_app_pkt_in(cp, skb) != 0) { /* skb data has probably changed, update pointers */ iph = skb->nh.iph; h.raw = (char*) iph + ihl; size = skb->len - ihl; } /* * Adjust TCP/UDP checksums */ if (!cp->app && (iph->protocol != IPPROTO_UDP || h.uh->check != 0)) { /* Only port and addr are changed, do fast csum update */ ip_vs_fast_check_update(&h, cp->vaddr, cp->daddr, cp->vport, cp->dport, iph->protocol); if (skb->ip_summed == CHECKSUM_HW) skb->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ switch (iph->protocol) { case IPPROTO_TCP: h.th->check = 0; h.th->check = csum_tcpudp_magic(iph->saddr, iph->daddr, size, iph->protocol, csum_partial(h.raw, size, 0)); break; case IPPROTO_UDP: h.uh->check = 0; h.uh->check = csum_tcpudp_magic(iph->saddr, iph->daddr, size, iph->protocol, csum_partial(h.raw, size, 0)); if (h.uh->check == 0) h.uh->check = 0xFFFF; break; } skb->ip_summed = CHECKSUM_UNNECESSARY; } ip_send_check(iph); IP_VS_DBG(10, "NAT to %u.%u.%u.%u:%d\n", NIPQUAD(iph->daddr), ntohs(h.portp[1])); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still MTU problem. */#ifdef CONFIG_NETFILTER_DEBUG skb->nf_debug = 1 << NF_IP_LOCAL_OUT;#endif /* CONFIG_NETFILTER_DEBUG */ skb->nfcache |= NFC_IPVS_PROPERTY; ip_send(skb); LeaveFunction(10); return NF_STOLEN; tx_error_icmp: dst_link_failure(skb); tx_error: kfree_skb(skb); return NF_STOLEN;}/* * IP Tunneling transmitter * * This function encapsulates the packet in a new IP packet, its * destination will be set to cp->daddr. Most code of this function * is taken from ipip.c. * * It is used in VS/TUN cluster. The load balancer selects a real * server from a cluster based on a scheduling algorithm, * encapsulates the request packet and forwards it to the selected * server. For example, all real servers are configured with * "ifconfig tunl0 <Virtual IP Address> up". When the server receives * the encapsulated packet, it will decapsulate the packet, processe * the request and return the response packets directly to the client * without passing the load balancer. This can greatly increase the * scalability of virtual server. */static int ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp){ struct rtable *rt; /* Route to the other host */ struct net_device *tdev; /* Device to other host */ struct iphdr *old_iph = skb->nh.iph; u8 tos = old_iph->tos; u16 df = old_iph->frag_off; struct iphdr *iph; /* Our new IP header */ int max_headroom; /* The extra header space needed */ int mtu; EnterFunction(10); if (skb->protocol != __constant_htons(ETH_P_IP)) { IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, " "ETH_P_IP: %d, skb protocol: %d\n", __constant_htons(ETH_P_IP), skb->protocol); goto tx_error; } if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos)))) goto tx_error_icmp; tdev = rt->u.dst.dev; mtu = rt->u.dst.pmtu - sizeof(struct iphdr); if (mtu < 68) { ip_rt_put(rt); IP_VS_DBG_RL("ip_vs_tunnel_xmit(): mtu less than 68\n"); goto tx_error; } if (skb->dst && mtu < skb->dst->pmtu) skb->dst->pmtu = mtu; df |= (old_iph->frag_off&__constant_htons(IP_DF)); if ((old_iph->frag_off&__constant_htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) { icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); ip_rt_put(rt); IP_VS_DBG_RL("ip_vs_tunnel_xmit(): frag needed\n"); goto tx_error; } /* update checksum because skb might be defragmented */ ip_send_check(old_iph); /* * Okay, now see if we can stuff it in the buffer as-is. */ max_headroom = (((tdev->hard_header_len+15)&~15)+sizeof(struct iphdr)); if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { ip_rt_put(rt); IP_VS_ERR_RL("ip_vs_tunnel_xmit(): no memory\n"); return NF_DROP; } kfree_skb(skb); skb = new_skb; old_iph = skb->nh.iph; } skb->h.raw = skb->nh.raw; skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); /* drop old route */ dst_release(skb->dst); skb->dst = &rt->u.dst; /* * Push down and install the IPIP header. */ iph = skb->nh.iph; iph->version = 4; iph->ihl = sizeof(struct iphdr)>>2; iph->frag_off = df; iph->protocol = IPPROTO_IPIP; iph->tos = tos; iph->daddr = rt->rt_dst; iph->saddr = rt->rt_src; iph->ttl = old_iph->ttl; iph->tot_len = htons(skb->len); ip_select_ident(iph, &rt->u.dst, NULL); ip_send_check(iph); skb->ip_summed = CHECKSUM_NONE;#ifdef CONFIG_NETFILTER_DEBUG skb->nf_debug = 1 << NF_IP_LOCAL_OUT;#endif /* CONFIG_NETFILTER_DEBUG */ skb->nfcache |= NFC_IPVS_PROPERTY; ip_send(skb); LeaveFunction(10); return NF_STOLEN; tx_error_icmp: dst_link_failure(skb); tx_error: kfree_skb(skb); return NF_STOLEN;}/* * Direct Routing transmitter */static int ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp){ struct rtable *rt; /* Route to the other host */ struct iphdr *iph = skb->nh.iph; int mtu; EnterFunction(10); if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos)))) goto tx_error_icmp; /* MTU checking */ mtu = rt->u.dst.pmtu; if ((iph->frag_off&__constant_htons(IP_DF)) && skb->len > mtu) { icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); ip_rt_put(rt); IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n"); goto tx_error; } /* update checksum because skb might be defragmented */ ip_send_check(iph); if (unlikely(skb_headroom(skb) < rt->u.dst.dev->hard_header_len)) { if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) { ip_rt_put(rt); IP_VS_ERR_RL("ip_vs_dr_xmit(): no memory\n"); goto tx_error; } } /* drop old route */ dst_release(skb->dst); skb->dst = &rt->u.dst;#ifdef CONFIG_NETFILTER_DEBUG skb->nf_debug = 1 << NF_IP_LOCAL_OUT;#endif /* CONFIG_NETFILTER_DEBUG */ skb->nfcache |= NFC_IPVS_PROPERTY; ip_send(skb);#if 0000 NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, do_ip_send);#endif LeaveFunction(10); return NF_STOLEN; tx_error_icmp: dst_link_failure(skb); tx_error: kfree_skb(skb); return NF_STOLEN;}/* * Bind a connection entry with the corresponding packet_xmit. * Called by ip_vs_conn_new. */static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp){ switch (IP_VS_FWD_METHOD(cp)) { case IP_VS_CONN_F_MASQ: cp->packet_xmit = ip_vs_nat_xmit; break; case IP_VS_CONN_F_TUNNEL: cp->packet_xmit = ip_vs_tunnel_xmit; break; case IP_VS_CONN_F_DROUTE: cp->packet_xmit = ip_vs_dr_xmit; break; case IP_VS_CONN_F_LOCALNODE: cp->packet_xmit = ip_vs_null_xmit; break; case IP_VS_CONN_F_BYPASS: cp->packet_xmit = ip_vs_bypass_xmit; break; }}/* * Bind a connection entry with a virtual service destination * Called just after a new connection entry is created.
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?