📄 route.c
字号:
/* route.c * linqianghe@163.com * 2006-10-13 */#include "route.h"#include "af_inet.h"#include "log.h"#include "fib_frontend.h"#include "ip_fib.h"#include "devinet.h"#include "fib_semantics.h"#include "fib_rules.h"#include "dst.h"#include "ip_output.h"#include "ip_input.h"#include "ipmr.h"#include "arp.h"#include "inetdevice.h"#include <linux/bootmem.h>#include <linux/jhash.h>#include <net/ip_mp_alg.h>#include <linux/random.h>#include <linux/in.h>#include <linux/inetdevice.h>#define IP_MAX_MTU 0xFFF0#define RT_GC_TIMEOUT (300*HZ)#define RT_FL_TOS(oldflp) \ ((u32)(oldflp->fl4_tos & (IPTOS_RT_MASK | RTO_ONLINK)))static DEFINE_PER_CPU(struct rt_cache_stat, myrt_cache_stat);#define MYRT_CACHE_STAT_INC(field) \ (per_cpu(myrt_cache_stat, raw_smp_processor_id()).field++)static int myip_rt_min_delay = 2 * HZ;static int myip_rt_max_delay = 10 * HZ;static int myip_rt_max_size;static int myip_rt_gc_timeout = RT_GC_TIMEOUT;static int myip_rt_gc_interval = 60 * HZ;static int myip_rt_gc_min_interval = HZ / 2;static int myip_rt_redirect_number = 9;static int myip_rt_redirect_load = HZ / 50;static int myip_rt_redirect_silence = ((HZ / 50) << (9 + 1));static int myip_rt_error_cost = HZ;static int myip_rt_error_burst = 5 * HZ;static int myip_rt_gc_elasticity = 8;static int myip_rt_mtu_expires = 10 * 60 * HZ;static int myip_rt_min_pmtu = 512 + 20 + 20;static int myip_rt_min_advmss = 256;static int myip_rt_secret_interval = 10 * 60 * HZ;static unsigned long myrt_deadline;extern struct net_device myloopback_dev;struct rt_hash_bucket { struct rtable *chain;};#define myrt_hash_lock_addr(slot) NULL#define myrt_hash_lock_init()static struct timer_list myrt_flush_timer;static struct timer_list myrt_periodic_timer;static struct timer_list myrt_secret_timer;static struct rt_hash_bucket *myrt_hash_table;static unsigned long mytable_order;static unsigned myrt_hash_mask;static int myrt_hash_log;static unsigned int myrt_hash_rnd;#ifdef CONFIG_NET_CLS_ROUTEstruct ip_rt_acct *myip_rt_acct;#define IP_RT_ACCT_CPU(i) (myip_rt_acct + i * 256)#endifstatic DEFINE_PER_CPU(struct rt_cache_stat, myrt_cache_stat);#define MYRT_CACHE_STAT_INC(field) \ (per_cpu(myrt_cache_stat, raw_smp_processor_id()).field++)static void myipv4_link_failure(struct sk_buff *skb){ struct rtable *rt; //icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); rt = (struct rtable *) skb->dst; if( rt ) dst_set_expires( &rt->u.dst, 0 );}static __inline__ int myrt_valuable(struct rtable *rth){ return ( rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || rth->u.dst.expires;}static __inline__ int myrt_fast_clean(struct rtable *rth){ return ( rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST) ) && rth->fl.iif && rth->u.rt_next;}static int myrt_may_expire( struct rtable *rth, unsigned long tmo1, unsigned long tmo2 ){ unsigned long age; int ret = 0; if( atomic_read(&rth->u.dst.__refcnt) ) goto out; ret = 1; if( rth->u.dst.expires && time_after_eq(jiffies, rth->u.dst.expires) ) goto out; age = jiffies - rth->u.dst.lastuse; ret = 0; if( (age <= tmo1 && !myrt_fast_clean(rth)) || (age <= tmo2 && myrt_valuable(rth)) ) goto out; ret = 1;out: return ret;}static __inline__ void myrt_free(struct rtable *rt){ multipath_remove(rt); call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);}static int myrt_garbage_collect(void){ static unsigned long expire = RT_GC_TIMEOUT; static unsigned long last_gc; static int rover; static int equilibrium; struct rtable *rth, **rthp; unsigned long now = jiffies; int goal; MYRT_CACHE_STAT_INC(gc_total); if( now - last_gc < myip_rt_gc_min_interval && atomic_read( &myipv4_dst_ops.entries ) < myip_rt_max_size ){ MYRT_CACHE_STAT_INC(gc_ignored); goto out; } goal = atomic_read( &myipv4_dst_ops.entries ) - (myip_rt_gc_elasticity << myrt_hash_log); if (goal <= 0) { if( equilibrium < myipv4_dst_ops.gc_thresh ) equilibrium = myipv4_dst_ops.gc_thresh; goal = atomic_read( &myipv4_dst_ops.entries ) - equilibrium; if( goal > 0 ){ equilibrium += min_t( unsigned int, goal / 2, myrt_hash_mask + 1 ); goal = atomic_read( &myipv4_dst_ops.entries ) - equilibrium; } }else{ goal = max_t( unsigned int, goal / 2, myrt_hash_mask + 1 ); equilibrium = atomic_read( &myipv4_dst_ops.entries ) - goal; } if (now - last_gc >= myip_rt_gc_min_interval) last_gc = now; if( goal <= 0 ){ equilibrium += goal; goto work_done; } do{ int i, k; for( i = myrt_hash_mask, k = rover; i >= 0; i--) { unsigned long tmo = expire; k = (k + 1) & myrt_hash_mask; rthp = &myrt_hash_table[k].chain; spin_lock_bh( myrt_hash_lock_addr(k) ); while( (rth = *rthp) != NULL ){ if( !myrt_may_expire(rth, tmo, expire) ){ tmo >>= 1; rthp = &rth->u.rt_next; continue; }#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED if( rth->u.dst.flags & DST_BALANCED ){ int r; rthp = myrt_remove_balanced_route( &myrt_hash_table[k].chain, rth,&r ); goal -= r; if (!rthp) break; }else{ *rthp = rth->u.rt_next; myrt_free( rth ); goal--; }#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ *rthp = rth->u.rt_next; myrt_free(rth); goal--;#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ } spin_unlock_bh( myrt_hash_lock_addr(k) ); if( goal <= 0 ) break; } rover = k; if (goal <= 0) goto work_done; MYRT_CACHE_STAT_INC(gc_goal_miss); if (expire == 0) break; expire >>= 1; if( atomic_read( &myipv4_dst_ops.entries ) < myip_rt_max_size ) goto out; }while( !in_softirq() && time_before_eq(jiffies, now) ); if( atomic_read( &myipv4_dst_ops.entries) < myip_rt_max_size) goto out; if( net_ratelimit() ) printk(KERN_WARNING "dst cache overflow\n"); MYRT_CACHE_STAT_INC(gc_dst_overflow); return 1;work_done: expire += myip_rt_gc_min_interval; if( expire > myip_rt_gc_timeout || atomic_read(&myipv4_dst_ops.entries) < myipv4_dst_ops.gc_thresh) expire = myip_rt_gc_timeout;#if RT_CACHE_DEBUG >= 2 PR_DEBUG( "expire++ %u %d %d %d\n", expire, atomic_read(&myipv4_dst_ops.entries), goal, rover);#endifout: return 0;}struct dst_ops myipv4_dst_ops = { .family = MY_AF_INET, .protocol = __constant_htons(ETH_P_IP), .gc = myrt_garbage_collect, //.check = ipv4_dst_check, //.destroy = ipv4_dst_destroy, //.ifdown = ipv4_dst_ifdown, //.negative_advice = ipv4_negative_advice, .link_failure = myipv4_link_failure, //.update_pmtu = ip_rt_update_pmtu, .entry_size = sizeof(struct rtable),};static inline u32 myrt_score(struct rtable *rt){ u32 score = jiffies - rt->u.dst.lastuse; score = ~score & ~(3<<30); if( myrt_valuable(rt) ) score |= (1<<31); if( !rt->fl.iif || !(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL)) ) score |= (1<<30); return score;}static unsigned int myrt_hash_code( u32 daddr, u32 saddr, u8 tos ){ return ( jhash_3words(daddr, saddr, (u32) tos, myrt_hash_rnd) & myrt_hash_mask );}static inline int mycompare_keys(struct flowi *fl1, struct flowi *fl2){ return memcmp( &fl1->nl_u.ip4_u, &fl2->nl_u.ip4_u, sizeof(fl1->nl_u.ip4_u)) == 0 && fl1->oif == fl2->oif && fl1->iif == fl2->iif;}static __inline__ void myrt_drop(struct rtable *rt){ multipath_remove(rt); ip_rt_put(rt); call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);}static int myrt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp){ struct rtable *rth, **rthp; unsigned long now; struct rtable *cand, **candp; u32 min_score; int chain_length; int attempts = !in_softirq();restart: chain_length = 0; min_score = ~(u32)0; cand = NULL; candp = NULL; now = jiffies; rthp = &myrt_hash_table[hash].chain; //spin_lock_bh( myrt_hash_lock_addr(hash) ); while( (rth = *rthp) != NULL ){#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED if( !(rth->u.dst.flags & DST_BALANCED) && mycompare_keys(&rth->fl, &rt->fl) ){#else if( mycompare_keys(&rth->fl, &rt->fl) ){#endif *rthp = rth->u.rt_next; rcu_assign_pointer(rth->u.rt_next, myrt_hash_table[hash].chain); rcu_assign_pointer( myrt_hash_table[hash].chain, rth ); rth->u.dst.__use++; dst_hold(&rth->u.dst); rth->u.dst.lastuse = now; //spin_unlock_bh( myrt_hash_lock_addr(hash) ); myrt_drop( rt ); *rp = rth; return 0; } if( !atomic_read(&rth->u.dst.__refcnt) ){ u32 score = myrt_score(rth); if (score <= min_score) { cand = rth; candp = rthp; min_score = score; } } chain_length++; rthp = &rth->u.rt_next; } if( cand ){ if( chain_length > myip_rt_gc_elasticity ){ *candp = cand->u.rt_next; myrt_free( cand ); } } if( rt->rt_type == RTN_UNICAST || rt->fl.iif == 0 ){ int err = myarp_bind_neighbour( &rt->u.dst ); if( err ){ //spin_unlock_bh( myrt_hash_lock_addr(hash) ); if (err != -ENOBUFS) { myrt_drop(rt); return err; } if( attempts-- > 0 ){ int saved_elasticity = myip_rt_gc_elasticity; int saved_int = myip_rt_gc_min_interval; myip_rt_gc_elasticity = 1; myip_rt_gc_min_interval = 0; myrt_garbage_collect(); myip_rt_gc_min_interval = saved_int; myip_rt_gc_elasticity = saved_elasticity; goto restart; } if (net_ratelimit()) PR_WARN( "Neighbour table overflow.\n" ); myrt_drop(rt); return -ENOBUFS; } } rt->u.rt_next = myrt_hash_table[hash].chain;#if RT_CACHE_DEBUG >= 2 if( rt->u.rt_next ){ struct rtable *trt; PR_DEBUG( "rt_cache @%02x: %u.%u.%u.%u", hash, NIPQUAD(rt->rt_dst) ); for (trt = rt->u.rt_next; trt; trt = trt->u.rt_next) printk(" . %u.%u.%u.%u", NIPQUAD(trt->rt_dst)); printk("\n"); }#endif myrt_hash_table[hash].chain = rt; //spin_unlock_bh( myrt_hash_lock_addr(hash) ); *rp = rt; return 0;}#ifdef CONFIG_NET_CLS_ROUTEstatic void myset_class_tag( struct rtable *rt, u32 tag ){ if( !(rt->u.dst.tclassid & 0xFFFF) ) rt->u.dst.tclassid |= tag & 0xFFFF; if( !(rt->u.dst.tclassid & 0xFFFF0000) ) rt->u.dst.tclassid |= tag & 0xFFFF0000;}#endifstatic void myrt_set_nexthop( struct rtable *rt, struct fib_result *res, u32 itag ){ struct fib_info *fi = res->fi; if( fi ){ if( FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = FIB_RES_GW(*res); memcpy( rt->u.dst.metrics, fi->fib_metrics, sizeof(rt->u.dst.metrics) ); if( fi->fib_mtu == 0 ){ rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; if (rt->u.dst.metrics[RTAX_LOCK-1] & (1 << RTAX_MTU) && rt->rt_gateway != rt->rt_dst && rt->u.dst.dev->mtu > 576) rt->u.dst.metrics[RTAX_MTU-1] = 576; }#ifdef CONFIG_NET_CLS_ROUTE rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid;#endif }else rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu; if( rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0 ) rt->u.dst.metrics[RTAX_HOPLIMIT-1] = mysysctl_ip_default_ttl; if( rt->u.dst.metrics[RTAX_MTU-1] > IP_MAX_MTU ) rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; if( rt->u.dst.metrics[RTAX_ADVMSS-1] == 0 ) rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40, myip_rt_min_advmss); if( rt->u.dst.metrics[RTAX_ADVMSS-1] > 65535 - 40 ) rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40;#ifdef CONFIG_NET_CLS_ROUTE#ifdef CONFIG_IP_MULTIPLE_TABLES myset_class_tag(rt, myfib_rules_tclass(res));#endif myset_class_tag(rt, itag);#endif rt->rt_type = res->type;}static inline int __mymkroute_output( struct rtable **result, struct fib_result* res, const struct flowi *fl, const struct flowi *oldflp, struct net_device *dev_out, unsigned flags ) { struct rtable *rth; struct in_device *in_dev; u32 tos = RT_FL_TOS(oldflp); int err = 0; if( LOOPBACK(fl->fl4_src) && !(dev_out->flags & IFF_LOOPBACK) ) return -EINVAL; if( fl->fl4_dst == 0xFFFFFFFF ) res->type = RTN_BROADCAST; else if( MULTICAST(fl->fl4_dst) ) res->type = RTN_MULTICAST; else if( BADCLASS(fl->fl4_dst) || ZERONET(fl->fl4_dst) ) return -EINVAL; if( dev_out->flags & IFF_LOOPBACK ) flags |= RTCF_LOCAL; in_dev = in_dev_get(dev_out); if( !in_dev ) return -EINVAL; if( res->type == RTN_BROADCAST ){ flags |= RTCF_BROADCAST | RTCF_LOCAL; if( res->fi ){ myfib_info_put( res->fi ); res->fi = NULL; } }else if( res->type == RTN_MULTICAST ){ flags |= RTCF_MULTICAST|RTCF_LOCAL; //if( !myip_check_mc( in_dev, oldflp->fl4_dst, oldflp->fl4_src, oldflp->proto) )//FIXME // flags &= ~RTCF_LOCAL; if( res->fi && res->prefixlen < 4 ){ myfib_info_put(res->fi); res->fi = NULL; } } rth = mydst_alloc( &myipv4_dst_ops ); if( !rth ){ err = -ENOBUFS; goto cleanup; } atomic_set( &rth->u.dst.__refcnt, 1 ); rth->u.dst.flags= DST_HOST;#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED if( res->fi ){ rth->rt_multipath_alg = res->fi->fib_mp_alg; if( res->fi->fib_nhs > 1 ) rth->u.dst.flags |= DST_BALANCED; }#endif if( in_dev->cnf.no_xfrm ) rth->u.dst.flags |= DST_NOXFRM; if( in_dev->cnf.no_policy ) rth->u.dst.flags |= DST_NOPOLICY; rth->fl.fl4_dst = oldflp->fl4_dst; rth->fl.fl4_tos = tos; rth->fl.fl4_src = oldflp->fl4_src; rth->fl.oif = oldflp->oif;#ifdef CONFIG_IP_ROUTE_FWMARK rth->fl.fl4_fwmark= oldflp->fl4_fwmark;#endif rth->rt_dst = fl->fl4_dst; rth->rt_src = fl->fl4_src; rth->rt_iif = oldflp->oif ? : dev_out->ifindex; rth->u.dst.dev = dev_out; dev_hold( dev_out ); rth->idev = in_dev_get(dev_out); rth->rt_gateway = fl->fl4_dst; rth->rt_spec_dst= fl->fl4_src; rth->u.dst.output = myip_output; MYRT_CACHE_STAT_INC( out_slow_tot ); if( flags & RTCF_LOCAL ){ rth->u.dst.input = myip_local_deliver; rth->rt_spec_dst = fl->fl4_dst; } if( flags & (RTCF_BROADCAST | RTCF_MULTICAST) ){ rth->rt_spec_dst = fl->fl4_src; if( flags & RTCF_LOCAL && !(dev_out->flags & IFF_LOOPBACK) ){ rth->u.dst.output = myip_mc_output; MYRT_CACHE_STAT_INC( out_slow_mc ); }#ifdef CONFIG_IP_MROUTE if( res->type == RTN_MULTICAST ){ if( MYIN_DEV_MFORWARD(in_dev) && !LOCAL_MCAST(oldflp->fl4_dst) ){ rth->u.dst.input = myip_mr_input; rth->u.dst.output = myip_mc_output; } }#endif } myrt_set_nexthop(rth, res, 0); rth->rt_flags = flags; *result = rth;cleanup: in_dev_put(in_dev); return err;}static inline int myip_mkroute_output_def( struct rtable **rp, struct fib_result* res, const struct flowi *fl, const struct flowi *oldflp, struct net_device *dev_out, unsigned flags ){ struct rtable *rth = NULL; int err = __mymkroute_output(&rth, res, fl, oldflp, dev_out, flags); unsigned hash; if( err == 0 ){ u32 tos = RT_FL_TOS(oldflp); hash = myrt_hash_code(oldflp->fl4_dst, oldflp->fl4_src ^ (oldflp->oif << 5), tos); err = myrt_intern_hash( hash, rth, rp ); } return err;}static inline int myip_mkroute_output( struct rtable** rp, struct fib_result* res, const struct flowi *fl, const struct flowi *oldflp, struct net_device *dev_out, unsigned flags ){/*#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -