📄 route.c
字号:
static unsigned short mtu_plateau[] ={32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 };static __inline__ unsigned short guess_mtu(unsigned short old_mtu){ int i; for (i = 0; i < sizeof(mtu_plateau) / sizeof(mtu_plateau[0]); i++) if (old_mtu > mtu_plateau[i]) return mtu_plateau[i]; return 68;}unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu){ int i; unsigned short old_mtu = ntohs(iph->tot_len); struct rtable *rth; u32 skeys[2] = { iph->saddr, 0, }; u32 daddr = iph->daddr; u8 tos = iph->tos & IPTOS_RT_MASK; unsigned short est_mtu = 0; if (ipv4_config.no_pmtu_disc) return 0; for (i = 0; i < 2; i++) { unsigned hash = rt_hash_code(daddr, skeys[i], tos); read_lock(&rt_hash_table[hash].lock); for (rth = rt_hash_table[hash].chain; rth; rth = rth->u.rt_next) { if (rth->key.dst == daddr && rth->key.src == skeys[i] && rth->rt_dst == daddr && rth->rt_src == iph->saddr && rth->key.tos == tos && rth->key.iif == 0 && !(rth->u.dst.mxlock & (1 << RTAX_MTU))) { unsigned short mtu = new_mtu; if (new_mtu < 68 || new_mtu >= old_mtu) { /* BSD 4.2 compatibility hack :-( */ if (mtu == 0 && old_mtu >= rth->u.dst.pmtu && old_mtu >= 68 + (iph->ihl << 2)) old_mtu -= iph->ihl << 2; mtu = guess_mtu(old_mtu); } if (mtu <= rth->u.dst.pmtu) { if (mtu < rth->u.dst.pmtu) { dst_confirm(&rth->u.dst); if (mtu < ip_rt_min_pmtu) { mtu = ip_rt_min_pmtu; rth->u.dst.mxlock |= (1 << RTAX_MTU); } rth->u.dst.pmtu = mtu; dst_set_expires(&rth->u.dst, ip_rt_mtu_expires); } est_mtu = mtu; } } } read_unlock(&rt_hash_table[hash].lock); } return est_mtu ? : new_mtu;}void ip_rt_update_pmtu(struct dst_entry *dst, unsigned mtu){ if (dst->pmtu > mtu && mtu >= 68 && !(dst->mxlock & (1 << RTAX_MTU))) { if (mtu < ip_rt_min_pmtu) { mtu = ip_rt_min_pmtu; dst->mxlock |= (1 << RTAX_MTU); } dst->pmtu = mtu; dst_set_expires(dst, ip_rt_mtu_expires); }}static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie){ dst_release(dst); return NULL;}static struct dst_entry *ipv4_dst_reroute(struct dst_entry *dst, struct sk_buff *skb){ return NULL;}static void ipv4_dst_destroy(struct dst_entry *dst){ struct rtable *rt = (struct rtable *) dst; struct inet_peer *peer = rt->peer; if (peer) { rt->peer = NULL; inet_putpeer(peer); }}static void ipv4_link_failure(struct sk_buff *skb){ struct rtable *rt; icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); rt = (struct rtable *) skb->dst; if (rt) dst_set_expires(&rt->u.dst, 0);}static int ip_rt_bug(struct sk_buff *skb){ printk(KERN_DEBUG "ip_rt_bug: %u.%u.%u.%u -> %u.%u.%u.%u, %s\n", NIPQUAD(skb->nh.iph->saddr), NIPQUAD(skb->nh.iph->daddr), skb->dev ? skb->dev->name : "?"); kfree_skb(skb); return 0;}/* We do not cache source address of outgoing interface, because it is used only by IP RR, TS and SRR options, so that it out of fast path. BTW remember: "addr" is allowed to be not aligned in IP options! */void ip_rt_get_source(u8 *addr, struct rtable *rt){ u32 src; struct fib_result res; if (rt->key.iif == 0) src = rt->rt_src; else if (fib_lookup(&rt->key, &res) == 0) {#ifdef CONFIG_IP_ROUTE_NAT if (res.type == RTN_NAT) src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway, RT_SCOPE_UNIVERSE); else#endif src = FIB_RES_PREFSRC(res); fib_res_put(&res); } else src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway, RT_SCOPE_UNIVERSE); memcpy(addr, &src, 4);}#ifdef CONFIG_NET_CLS_ROUTEstatic void set_class_tag(struct rtable *rt, u32 tag){ if (!(rt->u.dst.tclassid & 0xFFFF)) rt->u.dst.tclassid |= tag & 0xFFFF; if (!(rt->u.dst.tclassid & 0xFFFF0000)) rt->u.dst.tclassid |= tag & 0xFFFF0000;}#endifstatic void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag){ struct fib_info *fi = res->fi; if (fi) { if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = FIB_RES_GW(*res); memcpy(&rt->u.dst.mxlock, fi->fib_metrics, sizeof(fi->fib_metrics)); if (fi->fib_mtu == 0) { rt->u.dst.pmtu = rt->u.dst.dev->mtu; if (rt->u.dst.mxlock & (1 << RTAX_MTU) && rt->rt_gateway != rt->rt_dst && rt->u.dst.pmtu > 576) rt->u.dst.pmtu = 576; }#ifdef CONFIG_NET_CLS_ROUTE rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid;#endif } else rt->u.dst.pmtu = rt->u.dst.dev->mtu; if (rt->u.dst.pmtu > IP_MAX_MTU) rt->u.dst.pmtu = IP_MAX_MTU; if (rt->u.dst.advmss == 0) rt->u.dst.advmss = max_t(unsigned int, rt->u.dst.dev->mtu - 40, ip_rt_min_advmss); if (rt->u.dst.advmss > 65535 - 40) rt->u.dst.advmss = 65535 - 40;#ifdef CONFIG_NET_CLS_ROUTE#ifdef CONFIG_IP_MULTIPLE_TABLES set_class_tag(rt, fib_rules_tclass(res));#endif set_class_tag(rt, itag);#endif rt->rt_type = res->type;}static int ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr, u8 tos, struct net_device *dev, int our){ unsigned hash; struct rtable *rth; u32 spec_dst; struct in_device *in_dev = in_dev_get(dev); u32 itag = 0; /* Primary sanity checks. */ if (in_dev == NULL) return -EINVAL; if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr) || skb->protocol != __constant_htons(ETH_P_IP)) goto e_inval; if (ZERONET(saddr)) { if (!LOCAL_MCAST(daddr)) goto e_inval; spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); } else if (fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, &itag) < 0) goto e_inval; rth = dst_alloc(&ipv4_dst_ops); if (!rth) goto e_nobufs; rth->u.dst.output= ip_rt_bug; atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; rth->key.dst = daddr; rth->rt_dst = daddr; rth->key.tos = tos;#ifdef CONFIG_IP_ROUTE_FWMARK rth->key.fwmark = skb->nfmark;#endif rth->key.src = saddr; rth->rt_src = saddr;#ifdef CONFIG_IP_ROUTE_NAT rth->rt_dst_map = daddr; rth->rt_src_map = saddr;#endif#ifdef CONFIG_NET_CLS_ROUTE rth->u.dst.tclassid = itag;#endif rth->rt_iif = rth->key.iif = dev->ifindex; rth->u.dst.dev = &loopback_dev; dev_hold(rth->u.dst.dev); rth->key.oif = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_type = RTN_MULTICAST; rth->rt_flags = RTCF_MULTICAST; if (our) { rth->u.dst.input= ip_local_deliver; rth->rt_flags |= RTCF_LOCAL; }#ifdef CONFIG_IP_MROUTE if (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev)) rth->u.dst.input = ip_mr_input;#endif rt_cache_stat[smp_processor_id()].in_slow_mc++; in_dev_put(in_dev); hash = rt_hash_code(daddr, saddr ^ (dev->ifindex << 5), tos); return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst);e_nobufs: in_dev_put(in_dev); return -ENOBUFS;e_inval: in_dev_put(in_dev); return -EINVAL;}/* * NOTE. We drop all the packets that has local source * addresses, because every properly looped back packet * must have correct destination already attached by output routine. * * Such approach solves two big problems: * 1. Not simplex devices are handled properly. * 2. IP spoofing attempts are filtered with 100% of guarantee. */int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr, u8 tos, struct net_device *dev){ struct rt_key key; struct fib_result res; struct in_device *in_dev = in_dev_get(dev); struct in_device *out_dev = NULL; unsigned flags = 0; u32 itag = 0; struct rtable * rth; unsigned hash; u32 spec_dst; int err = -EINVAL; int free_res = 0; /* IP on this device is disabled. */ if (!in_dev) goto out; key.dst = daddr; key.src = saddr; key.tos = tos;#ifdef CONFIG_IP_ROUTE_FWMARK key.fwmark = skb->nfmark;#endif key.iif = dev->ifindex; key.oif = 0; key.scope = RT_SCOPE_UNIVERSE; hash = rt_hash_code(daddr, saddr ^ (key.iif << 5), tos); /* Check for the most weird martians, which can be not detected by fib_lookup. */ if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr)) goto martian_source; if (daddr == 0xFFFFFFFF || (saddr == 0 && daddr == 0)) goto brd_input; /* Accept zero addresses only to limited broadcast; * I even do not know to fix it or not. Waiting for complains :-) */ if (ZERONET(saddr)) goto martian_source; if (BADCLASS(daddr) || ZERONET(daddr) || LOOPBACK(daddr)) goto martian_destination; /* * Now we are ready to route packet. */ if ((err = fib_lookup(&key, &res)) != 0) { if (!IN_DEV_FORWARD(in_dev)) goto e_inval; goto no_route; } free_res = 1; rt_cache_stat[smp_processor_id()].in_slow_tot++;#ifdef CONFIG_IP_ROUTE_NAT /* Policy is applied before mapping destination, but rerouting after map should be made with old source. */ if (1) { u32 src_map = saddr; if (res.r) src_map = fib_rules_policy(saddr, &res, &flags); if (res.type == RTN_NAT) { key.dst = fib_rules_map_destination(daddr, &res); fib_res_put(&res); free_res = 0; if (fib_lookup(&key, &res)) goto e_inval; free_res = 1; if (res.type != RTN_UNICAST) goto e_inval; flags |= RTCF_DNAT; } key.src = src_map; }#endif if (res.type == RTN_BROADCAST) goto brd_input; if (res.type == RTN_LOCAL) { int result; result = fib_validate_source(saddr, daddr, tos, loopback_dev.ifindex, dev, &spec_dst, &itag); if (result < 0) goto martian_source; if (result) flags |= RTCF_DIRECTSRC; spec_dst = daddr; goto local_input; } if (!IN_DEV_FORWARD(in_dev)) goto e_inval; if (res.type != RTN_UNICAST) goto martian_destination;#ifdef CONFIG_IP_ROUTE_MULTIPATH if (res.fi->fib_nhs > 1 && key.oif == 0) fib_select_multipath(&key, &res);#endif out_dev = in_dev_get(FIB_RES_DEV(res)); if (out_dev == NULL) { if (net_ratelimit()) printk(KERN_CRIT "Bug in ip_route_input_slow(). " "Please, report\n"); goto e_inval; } err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(res), dev, &spec_dst, &itag); if (err < 0) goto martian_source; if (err) flags |= RTCF_DIRECTSRC; if (out_dev == in_dev && err && !(flags & (RTCF_NAT | RTCF_MASQ)) && (IN_DEV_SHARED_MEDIA(out_dev) || inet_addr_onlink(out_dev, saddr, FIB_RES_GW(res)))) flags |= RTCF_DOREDIRECT; if (skb->protocol != __constant_htons(ETH_P_IP)) { /* Not IP (i.e. ARP). Do not create route, if it is * invalid for proxy arp. DNAT routes are always valid. */ if (out_dev == in_dev && !(flags & RTCF_DNAT)) goto e_inval; } rth = dst_alloc(&ipv4_dst_ops); if (!rth) goto e_nobufs; atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; rth->key.dst = daddr; rth->rt_dst = daddr; rth->key.tos = tos;#ifdef CONFIG_IP_ROUTE_FWMARK rth->key.fwmark = skb->nfmark;#endif rth->key.src = saddr; rth->rt_src = saddr; rth->rt_gateway = daddr;#ifdef CONFIG_IP_ROUTE_NAT rth->rt_src_map = key.src; rth->rt_dst_map = key.dst; if (flags&RTCF_DNAT) rth->rt_gateway = key.dst;#endif rth->rt_iif = rth->key.iif = dev->ifindex; rth->u.dst.dev = out_dev->dev; dev_hold(rth->u.dst.dev); rth->key.oif = 0; rth->rt_spec_dst= spec_dst; rth->u.dst.input = ip_forward; rth->u.dst.output = ip_output; rt_set_nexthop(rth, &res, itag); rth->rt_flags = flags;#ifdef CONFIG_NET_FASTROUTE if (netdev_fastroute && !(flags&(RTCF_NAT|RTCF_MASQ|RTCF_DOREDIRECT))) { struct net_device *odev = rth->u.dst.dev; if (odev != dev && dev->accept_fastpath && odev->mtu >= dev->mtu && dev->accept_fastpath(dev, &rth->u.dst) == 0) rth->rt_flags |= RTCF_FAST; }#endifintern: err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);done: in_dev_put(in_dev); if (out_dev) in_dev_put(out_dev); if (free_res) fib_res_put(&res);out: return err;brd_input: if (skb->protocol != __constant_htons(ETH_P_IP)) goto e_inval; if (ZERONET(saddr)) spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -