📄 route.c
字号:
if (rt->idev) in_dev_hold(rt->idev); rt->u.dst.obsolete = 0; rt->u.dst.lastuse = jiffies; rt->u.dst.path = &rt->u.dst; rt->u.dst.neighbour = NULL; rt->u.dst.hh = NULL; rt->u.dst.xfrm = NULL; rt->rt_flags |= RTCF_REDIRECTED; /* Gateway is different ... */ rt->rt_gateway = new_gw; /* Redirect received -> path was valid */ dst_confirm(&rth->u.dst); if (rt->peer) atomic_inc(&rt->peer->refcnt); if (arp_bind_neighbour(&rt->u.dst) || !(rt->u.dst.neighbour->nud_state & NUD_VALID)) { if (rt->u.dst.neighbour) neigh_event_send(rt->u.dst.neighbour, NULL); ip_rt_put(rth); rt_drop(rt); goto do_next; } rt_del(hash, rth); if (!rt_intern_hash(hash, rt, &rt)) ip_rt_put(rt); goto do_next; } rcu_read_unlock(); do_next: ; } } in_dev_put(in_dev); return;reject_redirect:#ifdef CONFIG_IP_ROUTE_VERBOSE if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) printk(KERN_INFO "Redirect from %u.%u.%u.%u on %s about " "%u.%u.%u.%u ignored.\n" " Advised path = %u.%u.%u.%u -> %u.%u.%u.%u, " "tos %02x\n", NIPQUAD(old_gw), dev->name, NIPQUAD(new_gw), NIPQUAD(saddr), NIPQUAD(daddr), tos);#endif in_dev_put(in_dev);}static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst){ struct rtable *rt = (struct rtable*)dst; struct dst_entry *ret = dst; if (rt) { if (dst->obsolete) { ip_rt_put(rt); ret = NULL; } else if ((rt->rt_flags & RTCF_REDIRECTED) || rt->u.dst.expires) { unsigned hash = rt_hash_code(rt->fl.fl4_dst, rt->fl.fl4_src ^ (rt->fl.oif << 5), rt->fl.fl4_tos);#if RT_CACHE_DEBUG >= 1 printk(KERN_DEBUG "ip_rt_advice: redirect to " "%u.%u.%u.%u/%02x dropped\n", NIPQUAD(rt->rt_dst), rt->fl.fl4_tos);#endif rt_del(hash, rt); ret = NULL; } } return ret;}/* * Algorithm: * 1. The first ip_rt_redirect_number redirects are sent * with exponential backoff, then we stop sending them at all, * assuming that the host ignores our redirects. * 2. If we did not see packets requiring redirects * during ip_rt_redirect_silence, we assume that the host * forgot redirected route and start to send redirects again. * * This algorithm is much cheaper and more intelligent than dumb load limiting * in icmp.c. * * NOTE. Do not forget to inhibit load limiting for redirects (redundant) * and "frag. need" (breaks PMTU discovery) in icmp.c. */void ip_rt_send_redirect(struct sk_buff *skb){ struct rtable *rt = (struct rtable*)skb->dst; struct in_device *in_dev = in_dev_get(rt->u.dst.dev); if (!in_dev) return; if (!IN_DEV_TX_REDIRECTS(in_dev)) goto out; /* No redirected packets during ip_rt_redirect_silence; * reset the algorithm. */ if (time_after(jiffies, rt->u.dst.rate_last + ip_rt_redirect_silence)) rt->u.dst.rate_tokens = 0; /* Too many ignored redirects; do not send anything * set u.dst.rate_last to the last seen redirected packet. */ if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) { rt->u.dst.rate_last = jiffies; goto out; } /* Check for load limit; set rate_last to the latest sent * redirect. */ if (time_after(jiffies, (rt->u.dst.rate_last + (ip_rt_redirect_load << rt->u.dst.rate_tokens)))) { icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); rt->u.dst.rate_last = jiffies; ++rt->u.dst.rate_tokens;#ifdef CONFIG_IP_ROUTE_VERBOSE if (IN_DEV_LOG_MARTIANS(in_dev) && rt->u.dst.rate_tokens == ip_rt_redirect_number && net_ratelimit()) printk(KERN_WARNING "host %u.%u.%u.%u/if%d ignores " "redirects for %u.%u.%u.%u to %u.%u.%u.%u.\n", NIPQUAD(rt->rt_src), rt->rt_iif, NIPQUAD(rt->rt_dst), NIPQUAD(rt->rt_gateway));#endif }out: in_dev_put(in_dev);}static int ip_error(struct sk_buff *skb){ struct rtable *rt = (struct rtable*)skb->dst; unsigned long now; int code; switch (rt->u.dst.error) { case EINVAL: default: goto out; case EHOSTUNREACH: code = ICMP_HOST_UNREACH; break; case ENETUNREACH: code = ICMP_NET_UNREACH; break; case EACCES: code = ICMP_PKT_FILTERED; break; } now = jiffies; rt->u.dst.rate_tokens += now - rt->u.dst.rate_last; if (rt->u.dst.rate_tokens > ip_rt_error_burst) rt->u.dst.rate_tokens = ip_rt_error_burst; rt->u.dst.rate_last = now; if (rt->u.dst.rate_tokens >= ip_rt_error_cost) { rt->u.dst.rate_tokens -= ip_rt_error_cost; icmp_send(skb, ICMP_DEST_UNREACH, code, 0); }out: kfree_skb(skb); return 0;} /* * The last two values are not from the RFC but * are needed for AMPRnet AX.25 paths. */static unsigned short mtu_plateau[] ={32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 };static __inline__ unsigned short guess_mtu(unsigned short old_mtu){ int i; for (i = 0; i < ARRAY_SIZE(mtu_plateau); i++) if (old_mtu > mtu_plateau[i]) return mtu_plateau[i]; return 68;}unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu){ int i; unsigned short old_mtu = ntohs(iph->tot_len); struct rtable *rth; u32 skeys[2] = { iph->saddr, 0, }; u32 daddr = iph->daddr; u8 tos = iph->tos & IPTOS_RT_MASK; unsigned short est_mtu = 0; if (ipv4_config.no_pmtu_disc) return 0; for (i = 0; i < 2; i++) { unsigned hash = rt_hash_code(daddr, skeys[i], tos); rcu_read_lock(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; rth = rcu_dereference(rth->u.rt_next)) { if (rth->fl.fl4_dst == daddr && rth->fl.fl4_src == skeys[i] && rth->rt_dst == daddr && rth->rt_src == iph->saddr && rth->fl.fl4_tos == tos && rth->fl.iif == 0 && !(dst_metric_locked(&rth->u.dst, RTAX_MTU))) { unsigned short mtu = new_mtu; if (new_mtu < 68 || new_mtu >= old_mtu) { /* BSD 4.2 compatibility hack :-( */ if (mtu == 0 && old_mtu >= rth->u.dst.metrics[RTAX_MTU-1] && old_mtu >= 68 + (iph->ihl << 2)) old_mtu -= iph->ihl << 2; mtu = guess_mtu(old_mtu); } if (mtu <= rth->u.dst.metrics[RTAX_MTU-1]) { if (mtu < rth->u.dst.metrics[RTAX_MTU-1]) { dst_confirm(&rth->u.dst); if (mtu < ip_rt_min_pmtu) { mtu = ip_rt_min_pmtu; rth->u.dst.metrics[RTAX_LOCK-1] |= (1 << RTAX_MTU); } rth->u.dst.metrics[RTAX_MTU-1] = mtu; dst_set_expires(&rth->u.dst, ip_rt_mtu_expires); } est_mtu = mtu; } } } rcu_read_unlock(); } return est_mtu ? : new_mtu;}static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu){ if (dst->metrics[RTAX_MTU-1] > mtu && mtu >= 68 && !(dst_metric_locked(dst, RTAX_MTU))) { if (mtu < ip_rt_min_pmtu) { mtu = ip_rt_min_pmtu; dst->metrics[RTAX_LOCK-1] |= (1 << RTAX_MTU); } dst->metrics[RTAX_MTU-1] = mtu; dst_set_expires(dst, ip_rt_mtu_expires); }}static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie){ dst_release(dst); return NULL;}static void ipv4_dst_destroy(struct dst_entry *dst){ struct rtable *rt = (struct rtable *) dst; struct inet_peer *peer = rt->peer; struct in_device *idev = rt->idev; if (peer) { rt->peer = NULL; inet_putpeer(peer); } if (idev) { rt->idev = NULL; in_dev_put(idev); }}static void ipv4_dst_ifdown(struct dst_entry *dst, int how){ struct rtable *rt = (struct rtable *) dst; struct in_device *idev = rt->idev; if (idev && idev->dev != &loopback_dev) { struct in_device *loopback_idev = in_dev_get(&loopback_dev); if (loopback_idev) { rt->idev = loopback_idev; in_dev_put(idev); } }}static void ipv4_link_failure(struct sk_buff *skb){ struct rtable *rt; icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); rt = (struct rtable *) skb->dst; if (rt) dst_set_expires(&rt->u.dst, 0);}static int ip_rt_bug(struct sk_buff **pskb){ struct sk_buff *skb = *pskb; printk(KERN_DEBUG "ip_rt_bug: %u.%u.%u.%u -> %u.%u.%u.%u, %s\n", NIPQUAD(skb->nh.iph->saddr), NIPQUAD(skb->nh.iph->daddr), skb->dev ? skb->dev->name : "?"); kfree_skb(skb); return 0;}/* We do not cache source address of outgoing interface, because it is used only by IP RR, TS and SRR options, so that it out of fast path. BTW remember: "addr" is allowed to be not aligned in IP options! */void ip_rt_get_source(u8 *addr, struct rtable *rt){ u32 src; struct fib_result res; if (rt->fl.iif == 0) src = rt->rt_src; else if (fib_lookup(&rt->fl, &res) == 0) { src = FIB_RES_PREFSRC(res); fib_res_put(&res); } else src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway, RT_SCOPE_UNIVERSE); memcpy(addr, &src, 4);}#ifdef CONFIG_NET_CLS_ROUTEstatic void set_class_tag(struct rtable *rt, u32 tag){ if (!(rt->u.dst.tclassid & 0xFFFF)) rt->u.dst.tclassid |= tag & 0xFFFF; if (!(rt->u.dst.tclassid & 0xFFFF0000)) rt->u.dst.tclassid |= tag & 0xFFFF0000;}#endifstatic void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag){ struct fib_info *fi = res->fi; if (fi) { if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = FIB_RES_GW(*res); memcpy(rt->u.dst.metrics, fi->fib_metrics, sizeof(rt->u.dst.metrics)); if (fi->fib_mtu == 0) { rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; if (rt->u.dst.metrics[RTAX_LOCK-1] & (1 << RTAX_MTU) && rt->rt_gateway != rt->rt_dst && rt->u.dst.dev->mtu > 576) rt->u.dst.metrics[RTAX_MTU-1] = 576; }#ifdef CONFIG_NET_CLS_ROUTE rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid;#endif } else rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu; if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; if (rt->u.dst.metrics[RTAX_MTU-1] > IP_MAX_MTU) rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; if (rt->u.dst.metrics[RTAX_ADVMSS-1] == 0) rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40, ip_rt_min_advmss); if (rt->u.dst.metrics[RTAX_ADVMSS-1] > 65535 - 40) rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40;#ifdef CONFIG_NET_CLS_ROUTE#ifdef CONFIG_IP_MULTIPLE_TABLES set_class_tag(rt, fib_rules_tclass(res));#endif set_class_tag(rt, itag);#endif rt->rt_type = res->type;}static int ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr, u8 tos, struct net_device *dev, int our){ unsigned hash; struct rtable *rth; u32 spec_dst; struct in_device *in_dev = in_dev_get(dev); u32 itag = 0; /* Primary sanity checks. */ if (in_dev == NULL) return -EINVAL; if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr) || skb->protocol != htons(ETH_P_IP)) goto e_inval; if (ZERONET(saddr)) { if (!LOCAL_MCAST(daddr)) goto e_inval; spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); } else if (fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, &itag) < 0) goto e_inval; rth = dst_alloc(&ipv4_dst_ops); if (!rth) goto e_nobufs; rth->u.dst.output= ip_rt_bug; atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; if (in_dev->cnf.no_policy) rth->u.dst.flags |= DST_NOPOLICY; rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos;#ifdef CONFIG_IP_ROUTE_FWMARK rth->fl.fl4_fwmark= skb->nfmark;#endif rth->fl.fl4_src = saddr; rth->rt_src = saddr;#ifdef CONFIG_NET_CLS_ROUTE rth->u.dst.tclassid = itag;#endif rth->rt_iif = rth->fl.iif = dev->ifindex; rth->u.dst.dev = &loopback_dev; dev_hold(rth->u.dst.dev); rth->idev = in_dev_get(rth->u.dst.dev); rth->fl.oif = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_type = RTN_MULTICAST; rth->rt_flags = RTCF_MULTICAST; if (our) { rth->u.dst.input= ip_local_deliver; rth->rt_flags |= RTCF_LOCAL; }#ifdef CONFIG_IP_MROUTE if (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev)) rth->u.dst.input = ip_mr_input;#endif RT_CACHE_STAT_INC(in_slow_mc); in_dev_put(in_dev); hash = rt_hash_code(daddr, saddr ^ (dev->ifindex << 5), tos); return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst);e_nobufs: in_dev_put(in_dev); return -ENOBUFS;e_inval: in_dev_put(in_dev); return -EINVAL;}/* * NOTE. We drop all the packets that has local source * addresses, because every properly looped back packet * must have correct destination already attached by output routine. * * Such approach solves two big problems: * 1. Not simplex devices are handled properly. * 2. IP spoofing attempts are filtered with 100% of guarantee. */static int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr, u8 tos, struct net_device *dev){ struct fib_result res; struct in_device *in_dev = in_dev_get(dev); struct in_device *out_dev = NULL; struct flowi fl = { .nl_u = { .ip4_u = { .daddr = daddr, .saddr = saddr, .tos = tos, .scope = RT_SCOPE_UNIVERSE,#ifdef CONFIG_IP_ROUTE_FWMARK .fwmark = skb->nfmark#endif } }, .iif = dev->ifindex }; unsigned flags = 0; u32 itag = 0; struct rtable * rth; unsigned hash; u32 spec_dst; int err = -EINVAL; int free_res = 0; /* IP on this device is disabled. */ if (!in_dev)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -