📄 route.c
字号:
/* * Linux INET6 implementation * FIB front-end. * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. *//* Changes: * * YOSHIFUJI Hideaki @USAGI * reworked default router selection. * - respect outgoing interface * - select from (probably) reachable routers (i.e. * routers in REACHABLE, STALE, DELAY or PROBE states). * - always select the same router if it is (probably) * reachable. otherwise, round-robin the list. * Ville Nuorvala * Fixed routing subtrees. */#include <linux/capability.h>#include <linux/errno.h>#include <linux/types.h>#include <linux/times.h>#include <linux/socket.h>#include <linux/sockios.h>#include <linux/net.h>#include <linux/route.h>#include <linux/netdevice.h>#include <linux/in6.h>#include <linux/init.h>#include <linux/if_arp.h>#include <linux/proc_fs.h>#include <linux/seq_file.h>#include <net/net_namespace.h>#include <net/snmp.h>#include <net/ipv6.h>#include <net/ip6_fib.h>#include <net/ip6_route.h>#include <net/ndisc.h>#include <net/addrconf.h>#include <net/tcp.h>#include <linux/rtnetlink.h>#include <net/dst.h>#include <net/xfrm.h>#include <net/netevent.h>#include <net/netlink.h>#include <asm/uaccess.h>#ifdef CONFIG_SYSCTL#include <linux/sysctl.h>#endif/* Set to 3 to get tracing. */#define RT6_DEBUG 2#if RT6_DEBUG >= 3#define RDBG(x) printk x#define RT6_TRACE(x...) printk(KERN_DEBUG x)#else#define RDBG(x)#define RT6_TRACE(x...) do { ; } while (0)#endif#define CLONE_OFFLINK_ROUTE 0static int ip6_rt_max_size = 4096;static int ip6_rt_gc_min_interval = HZ / 2;static int ip6_rt_gc_timeout = 60*HZ;int ip6_rt_gc_interval = 30*HZ;static int ip6_rt_gc_elasticity = 9;static int ip6_rt_mtu_expires = 10*60*HZ;static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);static struct dst_entry *ip6_negative_advice(struct dst_entry *);static void ip6_dst_destroy(struct dst_entry *);static void ip6_dst_ifdown(struct dst_entry *, struct net_device *dev, int how);static int ip6_dst_gc(void);static int ip6_pkt_discard(struct sk_buff *skb);static int ip6_pkt_discard_out(struct sk_buff *skb);static void ip6_link_failure(struct sk_buff *skb);static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);#ifdef CONFIG_IPV6_ROUTE_INFOstatic struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen, struct in6_addr *gwaddr, int ifindex, unsigned pref);static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen, struct in6_addr *gwaddr, int ifindex);#endifstatic struct dst_ops ip6_dst_ops = { .family = AF_INET6, .protocol = __constant_htons(ETH_P_IPV6), .gc = ip6_dst_gc, .gc_thresh = 1024, .check = ip6_dst_check, .destroy = ip6_dst_destroy, .ifdown = ip6_dst_ifdown, .negative_advice = ip6_negative_advice, .link_failure = ip6_link_failure, .update_pmtu = ip6_rt_update_pmtu, .entry_size = sizeof(struct rt6_info),};static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu){}static struct dst_ops ip6_dst_blackhole_ops = { .family = AF_INET6, .protocol = __constant_htons(ETH_P_IPV6), .destroy = ip6_dst_destroy, .check = ip6_dst_check, .update_pmtu = ip6_rt_blackhole_update_pmtu, .entry_size = sizeof(struct rt6_info),};struct rt6_info ip6_null_entry = { .u = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, .obsolete = -1, .error = -ENETUNREACH, .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, .input = ip6_pkt_discard, .output = ip6_pkt_discard_out, .ops = &ip6_dst_ops, .path = (struct dst_entry*)&ip6_null_entry, } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), .rt6i_metric = ~(u32) 0, .rt6i_ref = ATOMIC_INIT(1),};#ifdef CONFIG_IPV6_MULTIPLE_TABLESstatic int ip6_pkt_prohibit(struct sk_buff *skb);static int ip6_pkt_prohibit_out(struct sk_buff *skb);static int ip6_pkt_blk_hole(struct sk_buff *skb);struct rt6_info ip6_prohibit_entry = { .u = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, .obsolete = -1, .error = -EACCES, .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, .input = ip6_pkt_prohibit, .output = ip6_pkt_prohibit_out, .ops = &ip6_dst_ops, .path = (struct dst_entry*)&ip6_prohibit_entry, } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), .rt6i_metric = ~(u32) 0, .rt6i_ref = ATOMIC_INIT(1),};struct rt6_info ip6_blk_hole_entry = { .u = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, .obsolete = -1, .error = -EINVAL, .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, .input = ip6_pkt_blk_hole, .output = ip6_pkt_blk_hole, .ops = &ip6_dst_ops, .path = (struct dst_entry*)&ip6_blk_hole_entry, } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), .rt6i_metric = ~(u32) 0, .rt6i_ref = ATOMIC_INIT(1),};#endif/* allocate dst with ip6_dst_ops */static __inline__ struct rt6_info *ip6_dst_alloc(void){ return (struct rt6_info *)dst_alloc(&ip6_dst_ops);}static void ip6_dst_destroy(struct dst_entry *dst){ struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; if (idev != NULL) { rt->rt6i_idev = NULL; in6_dev_put(idev); }}static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int how){ struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; if (dev != init_net.loopback_dev && idev != NULL && idev->dev == dev) { struct inet6_dev *loopback_idev = in6_dev_get(init_net.loopback_dev); if (loopback_idev != NULL) { rt->rt6i_idev = loopback_idev; in6_dev_put(idev); } }}static __inline__ int rt6_check_expired(const struct rt6_info *rt){ return (rt->rt6i_flags & RTF_EXPIRES && time_after(jiffies, rt->rt6i_expires));}static inline int rt6_need_strict(struct in6_addr *daddr){ return (ipv6_addr_type(daddr) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));}/* * Route lookup. Any table->tb6_lock is implied. */static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, int oif, int strict){ struct rt6_info *local = NULL; struct rt6_info *sprt; if (oif) { for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) { struct net_device *dev = sprt->rt6i_dev; if (dev->ifindex == oif) return sprt; if (dev->flags & IFF_LOOPBACK) { if (sprt->rt6i_idev == NULL || sprt->rt6i_idev->dev->ifindex != oif) { if (strict && oif) continue; if (local && (!oif || local->rt6i_idev->dev->ifindex == oif)) continue; } local = sprt; } } if (local) return local; if (strict) return &ip6_null_entry; } return rt;}#ifdef CONFIG_IPV6_ROUTER_PREFstatic void rt6_probe(struct rt6_info *rt){ struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL; /* * Okay, this does not seem to be appropriate * for now, however, we need to check if it * is really so; aka Router Reachability Probing. * * Router Reachability Probe MUST be rate-limited * to no more than one per minute. */ if (!neigh || (neigh->nud_state & NUD_VALID)) return; read_lock_bh(&neigh->lock); if (!(neigh->nud_state & NUD_VALID) && time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { struct in6_addr mcaddr; struct in6_addr *target; neigh->updated = jiffies; read_unlock_bh(&neigh->lock); target = (struct in6_addr *)&neigh->primary_key; addrconf_addr_solict_mult(target, &mcaddr); ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); } else read_unlock_bh(&neigh->lock);}#elsestatic inline void rt6_probe(struct rt6_info *rt){ return;}#endif/* * Default Router Selection (RFC 2461 6.3.6) */static inline int rt6_check_dev(struct rt6_info *rt, int oif){ struct net_device *dev = rt->rt6i_dev; if (!oif || dev->ifindex == oif) return 2; if ((dev->flags & IFF_LOOPBACK) && rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) return 1; return 0;}static inline int rt6_check_neigh(struct rt6_info *rt){ struct neighbour *neigh = rt->rt6i_nexthop; int m; if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) m = 1; else if (neigh) { read_lock_bh(&neigh->lock); if (neigh->nud_state & NUD_VALID) m = 2;#ifdef CONFIG_IPV6_ROUTER_PREF else if (neigh->nud_state & NUD_FAILED) m = 0;#endif else m = 1; read_unlock_bh(&neigh->lock); } else m = 0; return m;}static int rt6_score_route(struct rt6_info *rt, int oif, int strict){ int m, n; m = rt6_check_dev(rt, oif); if (!m && (strict & RT6_LOOKUP_F_IFACE)) return -1;#ifdef CONFIG_IPV6_ROUTER_PREF m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;#endif n = rt6_check_neigh(rt); if (!n && (strict & RT6_LOOKUP_F_REACHABLE)) return -1; return m;}static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, int *mpri, struct rt6_info *match){ int m; if (rt6_check_expired(rt)) goto out; m = rt6_score_route(rt, oif, strict); if (m < 0) goto out; if (m > *mpri) { if (strict & RT6_LOOKUP_F_REACHABLE) rt6_probe(match); *mpri = m; match = rt; } else if (strict & RT6_LOOKUP_F_REACHABLE) { rt6_probe(rt); }out: return match;}static struct rt6_info *find_rr_leaf(struct fib6_node *fn, struct rt6_info *rr_head, u32 metric, int oif, int strict){ struct rt6_info *rt, *match; int mpri = -1; match = NULL; for (rt = rr_head; rt && rt->rt6i_metric == metric; rt = rt->u.dst.rt6_next) match = find_match(rt, oif, strict, &mpri, match); for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; rt = rt->u.dst.rt6_next) match = find_match(rt, oif, strict, &mpri, match); return match;}static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict){ struct rt6_info *match, *rt0; RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n", __FUNCTION__, fn->leaf, oif); rt0 = fn->rr_ptr; if (!rt0) fn->rr_ptr = rt0 = fn->leaf; match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict); if (!match && (strict & RT6_LOOKUP_F_REACHABLE)) { struct rt6_info *next = rt0->u.dst.rt6_next; /* no entries matched; do round-robin */ if (!next || next->rt6i_metric != rt0->rt6i_metric) next = fn->leaf; if (next != rt0) fn->rr_ptr = next; } RT6_TRACE("%s() => %p\n", __FUNCTION__, match); return (match ? match : &ip6_null_entry);}#ifdef CONFIG_IPV6_ROUTE_INFOint rt6_route_rcv(struct net_device *dev, u8 *opt, int len, struct in6_addr *gwaddr){ struct route_info *rinfo = (struct route_info *) opt; struct in6_addr prefix_buf, *prefix; unsigned int pref; u32 lifetime; struct rt6_info *rt; if (len < sizeof(struct route_info)) { return -EINVAL; } /* Sanity check for prefix_len and length */ if (rinfo->length > 3) { return -EINVAL; } else if (rinfo->prefix_len > 128) { return -EINVAL; } else if (rinfo->prefix_len > 64) { if (rinfo->length < 2) { return -EINVAL; } } else if (rinfo->prefix_len > 0) { if (rinfo->length < 1) { return -EINVAL; } } pref = rinfo->route_pref; if (pref == ICMPV6_ROUTER_PREF_INVALID) pref = ICMPV6_ROUTER_PREF_MEDIUM; lifetime = ntohl(rinfo->lifetime); if (lifetime == 0xffffffff) { /* infinity */ } else if (lifetime > 0x7fffffff/HZ) { /* Avoid arithmetic overflow */ lifetime = 0x7fffffff/HZ - 1; } if (rinfo->length == 3) prefix = (struct in6_addr *)rinfo->prefix; else { /* this function is safe */ ipv6_addr_prefix(&prefix_buf, (struct in6_addr *)rinfo->prefix, rinfo->prefix_len); prefix = &prefix_buf; } rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex); if (rt && !lifetime) { ip6_del_rt(rt); rt = NULL; } if (!rt && lifetime) rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex, pref); else if (rt) rt->rt6i_flags = RTF_ROUTEINFO | (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); if (rt) { if (lifetime == 0xffffffff) { rt->rt6i_flags &= ~RTF_EXPIRES; } else { rt->rt6i_expires = jiffies + HZ * lifetime; rt->rt6i_flags |= RTF_EXPIRES; } dst_release(&rt->u.dst); } return 0;}#endif#define BACKTRACK(saddr) \do { \ if (rt == &ip6_null_entry) { \ struct fib6_node *pn; \ while (1) { \ if (fn->fn_flags & RTN_TL_ROOT) \ goto out; \ pn = fn->parent; \ if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \ else \ fn = pn; \ if (fn->fn_flags & RTN_RTINFO) \ goto restart; \ } \ } \} while(0)static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table, struct flowi *fl, int flags){ struct fib6_node *fn; struct rt6_info *rt; read_lock_bh(&table->tb6_lock); fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);restart: rt = fn->leaf; rt = rt6_device_match(rt, fl->oif, flags); BACKTRACK(&fl->fl6_src);out: dst_use(&rt->u.dst, jiffies); read_unlock_bh(&table->tb6_lock); return rt;}struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, int oif, int strict){ struct flowi fl = { .oif = oif, .nl_u = { .ip6_u = { .daddr = *daddr, }, }, }; struct dst_entry *dst; int flags = strict ? RT6_LOOKUP_F_IFACE : 0; if (saddr) { memcpy(&fl.fl6_src, saddr, sizeof(*saddr)); flags |= RT6_LOOKUP_F_HAS_SADDR; } dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup); if (dst->error == 0) return (struct rt6_info *) dst; dst_release(dst); return NULL;}EXPORT_SYMBOL(rt6_lookup);/* ip6_ins_rt is called with FREE table->tb6_lock. It takes new route entry, the addition fails by any reason the route is freed. In any case, if caller does not hold it, it may be destroyed. */static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info){ int err; struct fib6_table *table; table = rt->rt6i_table; write_lock_bh(&table->tb6_lock); err = fib6_add(&table->tb6_root, rt, info); write_unlock_bh(&table->tb6_lock); return err;}int ip6_ins_rt(struct rt6_info *rt){ return __ip6_ins_rt(rt, NULL);}static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, struct in6_addr *saddr){ struct rt6_info *rt; /* * Clone the route. */ rt = ip6_rt_copy(ort); if (rt) { if (!(rt->rt6i_flags&RTF_GATEWAY)) { if (rt->rt6i_dst.plen != 128 && ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -