📄 route.c
字号:
/* Calculate number of entries, which we want to expire now. */ goal = atomic_read(&ipv4_dst_ops.entries) - (ip_rt_gc_elasticity << rt_hash_log); if (goal <= 0) { if (equilibrium < ipv4_dst_ops.gc_thresh) equilibrium = ipv4_dst_ops.gc_thresh; goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; if (goal > 0) { equilibrium += min_t(unsigned int, goal / 2, rt_hash_mask + 1); goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; } } else { /* We are in dangerous area. Try to reduce cache really * aggressively. */ goal = max_t(unsigned int, goal / 2, rt_hash_mask + 1); equilibrium = atomic_read(&ipv4_dst_ops.entries) - goal; } if (now - last_gc >= ip_rt_gc_min_interval) last_gc = now; if (goal <= 0) { equilibrium += goal; goto work_done; } do { int i, k; for (i = rt_hash_mask, k = rover; i >= 0; i--) { unsigned tmo = expire; k = (k + 1) & rt_hash_mask; rthp = &rt_hash_table[k].chain; write_lock_bh(&rt_hash_table[k].lock); while ((rth = *rthp) != NULL) { if (!rt_may_expire(rth, tmo, expire)) { tmo >>= 1; rthp = &rth->u.rt_next; continue; } *rthp = rth->u.rt_next; rt_free(rth); goal--; } write_unlock_bh(&rt_hash_table[k].lock); if (goal <= 0) break; } rover = k; if (goal <= 0) goto work_done; /* Goal is not achieved. We stop process if: - if expire reduced to zero. Otherwise, expire is halfed. - if table is not full. - if we are called from interrupt. - jiffies check is just fallback/debug loop breaker. We will not spin here for long time in any case. */ if (expire == 0) break; expire >>= 1;#if RT_CACHE_DEBUG >= 2 printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire, atomic_read(&ipv4_dst_ops.entries), goal, i);#endif if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) goto out; } while (!in_softirq() && jiffies - now < 1); if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) goto out; if (net_ratelimit()) printk("dst cache overflow\n"); return 1;work_done: expire += ip_rt_gc_min_interval; if (expire > ip_rt_gc_timeout || atomic_read(&ipv4_dst_ops.entries) < ipv4_dst_ops.gc_thresh) expire = ip_rt_gc_timeout;#if RT_CACHE_DEBUG >= 2 printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire, atomic_read(&ipv4_dst_ops.entries), goal, rover);#endifout: return 0;}static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp){ struct rtable *rth, **rthp; unsigned long now = jiffies; int attempts = !in_softirq();restart: rthp = &rt_hash_table[hash].chain; write_lock_bh(&rt_hash_table[hash].lock); while ((rth = *rthp) != NULL) { if (memcmp(&rth->key, &rt->key, sizeof(rt->key)) == 0) { /* Put it first */ *rthp = rth->u.rt_next; rth->u.rt_next = rt_hash_table[hash].chain; rt_hash_table[hash].chain = rth; rth->u.dst.__use++; dst_hold(&rth->u.dst); rth->u.dst.lastuse = now; write_unlock_bh(&rt_hash_table[hash].lock); rt_drop(rt); *rp = rth; return 0; } rthp = &rth->u.rt_next; } /* Try to bind route to arp only if it is output route or unicast forwarding path. */ if (rt->rt_type == RTN_UNICAST || rt->key.iif == 0) { int err = arp_bind_neighbour(&rt->u.dst); if (err) { write_unlock_bh(&rt_hash_table[hash].lock); if (err != -ENOBUFS) { rt_drop(rt); return err; } /* Neighbour tables are full and nothing can be released. Try to shrink route cache, it is most likely it holds some neighbour records. */ if (attempts-- > 0) { int saved_elasticity = ip_rt_gc_elasticity; int saved_int = ip_rt_gc_min_interval; ip_rt_gc_elasticity = 1; ip_rt_gc_min_interval = 0; rt_garbage_collect(); ip_rt_gc_min_interval = saved_int; ip_rt_gc_elasticity = saved_elasticity; goto restart; } if (net_ratelimit()) printk("Neighbour table overflow.\n"); rt_drop(rt); return -ENOBUFS; } } rt->u.rt_next = rt_hash_table[hash].chain;#if RT_CACHE_DEBUG >= 2 if (rt->u.rt_next) { struct rtable *trt; printk("rt_cache @%02x: %u.%u.%u.%u", hash, NIPQUAD(rt->rt_dst)); for (trt = rt->u.rt_next; trt; trt = trt->u.rt_next) printk(" . %u.%u.%u.%u", NIPQUAD(trt->rt_dst)); printk("\n"); }#endif rt_hash_table[hash].chain = rt; write_unlock_bh(&rt_hash_table[hash].lock); *rp = rt; return 0;}void rt_bind_peer(struct rtable *rt, int create){ static spinlock_t rt_peer_lock = SPIN_LOCK_UNLOCKED; struct inet_peer *peer; peer = inet_getpeer(rt->rt_dst, create); spin_lock_bh(&rt_peer_lock); if (rt->peer == NULL) { rt->peer = peer; peer = NULL; } spin_unlock_bh(&rt_peer_lock); if (peer) inet_putpeer(peer);}/* * Peer allocation may fail only in serious out-of-memory conditions. However * we still can generate some output. * Random ID selection looks a bit dangerous because we have no chances to * select ID being unique in a reasonable period of time. * But broken packet identifier may be better than no packet at all. */static void ip_select_fb_ident(struct iphdr *iph){ static spinlock_t ip_fb_id_lock = SPIN_LOCK_UNLOCKED; static u32 ip_fallback_id; u32 salt; spin_lock_bh(&ip_fb_id_lock); salt = secure_ip_id(ip_fallback_id ^ iph->daddr); iph->id = htons(salt & 0xFFFF); ip_fallback_id = salt; spin_unlock_bh(&ip_fb_id_lock);}void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst){ struct rtable *rt = (struct rtable *) dst; if (rt) { if (rt->peer == NULL) rt_bind_peer(rt, 1); /* If peer is attached to destination, it is never detached, so that we need not to grab a lock to dereference it. */ if (rt->peer) { iph->id = htons(inet_getid(rt->peer)); return; } } else printk(KERN_DEBUG "rt_bind_peer(0) @%p\n", NET_CALLER(iph)); ip_select_fb_ident(iph);}static void rt_del(unsigned hash, struct rtable *rt){ struct rtable **rthp; write_lock_bh(&rt_hash_table[hash].lock); ip_rt_put(rt); for (rthp = &rt_hash_table[hash].chain; *rthp; rthp = &(*rthp)->u.rt_next) if (*rthp == rt) { *rthp = rt->u.rt_next; rt_free(rt); break; } write_unlock_bh(&rt_hash_table[hash].lock);}void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, u32 saddr, u8 tos, struct net_device *dev){ int i, k; struct in_device *in_dev = in_dev_get(dev); struct rtable *rth, **rthp; u32 skeys[2] = { saddr, 0 }; int ikeys[2] = { dev->ifindex, 0 }; tos &= IPTOS_RT_MASK; if (!in_dev) return; if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) || MULTICAST(new_gw) || BADCLASS(new_gw) || ZERONET(new_gw)) goto reject_redirect; if (!IN_DEV_SHARED_MEDIA(in_dev)) { if (!inet_addr_onlink(in_dev, new_gw, old_gw)) goto reject_redirect; if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev)) goto reject_redirect; } else { if (inet_addr_type(new_gw) != RTN_UNICAST) goto reject_redirect; } for (i = 0; i < 2; i++) { for (k = 0; k < 2; k++) { unsigned hash = rt_hash_code(daddr, skeys[i] ^ (ikeys[k] << 5), tos); rthp=&rt_hash_table[hash].chain; read_lock(&rt_hash_table[hash].lock); while ((rth = *rthp) != NULL) { struct rtable *rt; if (rth->key.dst != daddr || rth->key.src != skeys[i] || rth->key.tos != tos || rth->key.oif != ikeys[k] || rth->key.iif != 0) { rthp = &rth->u.rt_next; continue; } if (rth->rt_dst != daddr || rth->rt_src != saddr || rth->u.dst.error || rth->rt_gateway != old_gw || rth->u.dst.dev != dev) break; dst_clone(&rth->u.dst); read_unlock(&rt_hash_table[hash].lock); rt = dst_alloc(&ipv4_dst_ops); if (rt == NULL) { ip_rt_put(rth); in_dev_put(in_dev); return; } /* Copy all the information. */ *rt = *rth; rt->u.dst.__use = 1; atomic_set(&rt->u.dst.__refcnt, 1); if (rt->u.dst.dev) dev_hold(rt->u.dst.dev); rt->u.dst.lastuse = jiffies; rt->u.dst.neighbour = NULL; rt->u.dst.hh = NULL; rt->u.dst.obsolete = 0; rt->rt_flags |= RTCF_REDIRECTED; /* Gateway is different ... */ rt->rt_gateway = new_gw; /* Redirect received -> path was valid */ dst_confirm(&rth->u.dst); if (rt->peer) atomic_inc(&rt->peer->refcnt); if (arp_bind_neighbour(&rt->u.dst) || !(rt->u.dst.neighbour->nud_state & NUD_VALID)) { if (rt->u.dst.neighbour) neigh_event_send(rt->u.dst.neighbour, NULL); ip_rt_put(rth); rt_drop(rt); goto do_next; } rt_del(hash, rth); if (!rt_intern_hash(hash, rt, &rt)) ip_rt_put(rt); goto do_next; } read_unlock(&rt_hash_table[hash].lock); do_next: ; } } in_dev_put(in_dev); return;reject_redirect:#ifdef CONFIG_IP_ROUTE_VERBOSE if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) printk(KERN_INFO "Redirect from %u.%u.%u.%u on %s about " "%u.%u.%u.%u ignored.\n" " Advised path = %u.%u.%u.%u -> %u.%u.%u.%u, " "tos %02x\n", NIPQUAD(old_gw), dev->name, NIPQUAD(new_gw), NIPQUAD(saddr), NIPQUAD(daddr), tos);#endif in_dev_put(in_dev);}static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst){ struct rtable *rt = (struct rtable*)dst; struct dst_entry *ret = dst; if (rt) { if (dst->obsolete) { ip_rt_put(rt); ret = NULL; } else if ((rt->rt_flags & RTCF_REDIRECTED) || rt->u.dst.expires) { unsigned hash = rt_hash_code(rt->key.dst, rt->key.src ^ (rt->key.oif << 5), rt->key.tos);#if RT_CACHE_DEBUG >= 1 printk(KERN_DEBUG "ip_rt_advice: redirect to " "%u.%u.%u.%u/%02x dropped\n", NIPQUAD(rt->rt_dst), rt->key.tos);#endif rt_del(hash, rt); ret = NULL; } } return ret;}/* * Algorithm: * 1. The first ip_rt_redirect_number redirects are sent * with exponential backoff, then we stop sending them at all, * assuming that the host ignores our redirects. * 2. If we did not see packets requiring redirects * during ip_rt_redirect_silence, we assume that the host * forgot redirected route and start to send redirects again. * * This algorithm is much cheaper and more intelligent than dumb load limiting * in icmp.c. * * NOTE. Do not forget to inhibit load limiting for redirects (redundant) * and "frag. need" (breaks PMTU discovery) in icmp.c. */void ip_rt_send_redirect(struct sk_buff *skb){ struct rtable *rt = (struct rtable*)skb->dst; struct in_device *in_dev = in_dev_get(rt->u.dst.dev); if (!in_dev) return; if (!IN_DEV_TX_REDIRECTS(in_dev)) goto out; /* No redirected packets during ip_rt_redirect_silence; * reset the algorithm. */ if (jiffies - rt->u.dst.rate_last > ip_rt_redirect_silence) rt->u.dst.rate_tokens = 0; /* Too many ignored redirects; do not send anything * set u.dst.rate_last to the last seen redirected packet. */ if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) { rt->u.dst.rate_last = jiffies; goto out; } /* Check for load limit; set rate_last to the latest sent * redirect. */ if (jiffies - rt->u.dst.rate_last > (ip_rt_redirect_load << rt->u.dst.rate_tokens)) { icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); rt->u.dst.rate_last = jiffies; ++rt->u.dst.rate_tokens;#ifdef CONFIG_IP_ROUTE_VERBOSE if (IN_DEV_LOG_MARTIANS(in_dev) && rt->u.dst.rate_tokens == ip_rt_redirect_number && net_ratelimit()) printk(KERN_WARNING "host %u.%u.%u.%u/if%d ignores " "redirects for %u.%u.%u.%u to %u.%u.%u.%u.\n", NIPQUAD(rt->rt_src), rt->rt_iif, NIPQUAD(rt->rt_dst), NIPQUAD(rt->rt_gateway));#endif }out: in_dev_put(in_dev);}static int ip_error(struct sk_buff *skb){ struct rtable *rt = (struct rtable*)skb->dst; unsigned long now; int code; switch (rt->u.dst.error) { case EINVAL: default: goto out; case EHOSTUNREACH: code = ICMP_HOST_UNREACH; break; case ENETUNREACH: code = ICMP_NET_UNREACH; break; case EACCES: code = ICMP_PKT_FILTERED; break; } now = jiffies; rt->u.dst.rate_tokens += now - rt->u.dst.rate_last; if (rt->u.dst.rate_tokens > ip_rt_error_burst) rt->u.dst.rate_tokens = ip_rt_error_burst; rt->u.dst.rate_last = now; if (rt->u.dst.rate_tokens >= ip_rt_error_cost) { rt->u.dst.rate_tokens -= ip_rt_error_cost; icmp_send(skb, ICMP_DEST_UNREACH, code, 0); }out: kfree_skb(skb); return 0;} /* * The last two values are not from the RFC but * are needed for AMPRnet AX.25 paths. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -