📄 route.c
字号:
if (res.fi) fib_info_put(res.fi); res.fi = NULL; flags |= RTCF_LOCAL; goto make_route; }#ifdef CONFIG_IP_ROUTE_MULTIPATH if (res.fi->fib_nhs > 1 && key.oif == 0) fib_select_multipath(&key, &res); else#endif if (res.prefixlen==0 && res.type == RTN_UNICAST && key.oif == 0) fib_select_default(&key, &res); if (!key.src) key.src = FIB_RES_PREFSRC(res); if (dev_out) dev_put(dev_out); dev_out = FIB_RES_DEV(res); dev_hold(dev_out); key.oif = dev_out->ifindex;make_route: if (LOOPBACK(key.src) && !(dev_out->flags&IFF_LOOPBACK)) goto e_inval; if (key.dst == 0xFFFFFFFF) res.type = RTN_BROADCAST; else if (MULTICAST(key.dst)) res.type = RTN_MULTICAST; else if (BADCLASS(key.dst) || ZERONET(key.dst)) goto e_inval; if (dev_out->flags&IFF_LOOPBACK) flags |= RTCF_LOCAL; if (res.type == RTN_BROADCAST) { flags |= RTCF_BROADCAST|RTCF_LOCAL; if (res.fi) { fib_info_put(res.fi); res.fi = NULL; } } else if (res.type == RTN_MULTICAST) { flags |= RTCF_MULTICAST|RTCF_LOCAL; read_lock(&inetdev_lock); if (!__in_dev_get(dev_out) || !ip_check_mc(__in_dev_get(dev_out), oldkey->dst)) flags &= ~RTCF_LOCAL; read_unlock(&inetdev_lock); /* If multicast route do not exist use default one, but do not gateway in this case. Yes, it is hack. */ if (res.fi && res.prefixlen < 4) { fib_info_put(res.fi); res.fi = NULL; } } rth = dst_alloc(&ipv4_dst_ops); if (!rth) goto e_nobufs; atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; rth->key.dst = oldkey->dst; rth->key.tos = tos; rth->key.src = oldkey->src; rth->key.iif = 0; rth->key.oif = oldkey->oif;#ifdef CONFIG_IP_ROUTE_FWMARK rth->key.fwmark = oldkey->fwmark;#endif rth->rt_dst = key.dst; rth->rt_src = key.src;#ifdef CONFIG_IP_ROUTE_NAT rth->rt_dst_map = key.dst; rth->rt_src_map = key.src;#endif rth->rt_iif = oldkey->oif ? : dev_out->ifindex; rth->u.dst.dev = dev_out; dev_hold(dev_out); rth->rt_gateway = key.dst; rth->rt_spec_dst= key.src; rth->u.dst.output=ip_output; if (flags&RTCF_LOCAL) { rth->u.dst.input = ip_local_deliver; rth->rt_spec_dst = key.dst; } if (flags&(RTCF_BROADCAST|RTCF_MULTICAST)) { rth->rt_spec_dst = key.src; if (flags&RTCF_LOCAL && !(dev_out->flags&IFF_LOOPBACK)) rth->u.dst.output = ip_mc_output;#ifdef CONFIG_IP_MROUTE if (res.type == RTN_MULTICAST) { struct in_device *in_dev = in_dev_get(dev_out); if (in_dev) { if (IN_DEV_MFORWARD(in_dev) && !LOCAL_MCAST(oldkey->dst)) { rth->u.dst.input = ip_mr_input; rth->u.dst.output = ip_mc_output; } in_dev_put(in_dev); } }#endif } rt_set_nexthop(rth, &res, 0); rth->rt_flags = flags; hash = rt_hash_code(oldkey->dst, oldkey->src^(oldkey->oif<<5), tos); err = rt_intern_hash(hash, rth, rp);done: if (free_res) fib_res_put(&res); if (dev_out) dev_put(dev_out); return err;e_inval: err = -EINVAL; goto done;e_nobufs: err = -ENOBUFS; goto done;}int ip_route_output_key(struct rtable **rp, const struct rt_key *key){ unsigned hash; struct rtable *rth; hash = rt_hash_code(key->dst, key->src^(key->oif<<5), key->tos); read_lock_bh(&rt_hash_table[hash].lock); for (rth=rt_hash_table[hash].chain; rth; rth=rth->u.rt_next) { if (rth->key.dst == key->dst && rth->key.src == key->src && rth->key.iif == 0 && rth->key.oif == key->oif &&#ifdef CONFIG_IP_ROUTE_FWMARK rth->key.fwmark == key->fwmark &&#endif !((rth->key.tos^key->tos)&(IPTOS_RT_MASK|RTO_ONLINK)) && ((key->tos&RTO_TPROXY) || !(rth->rt_flags&RTCF_TPROXY)) ) { rth->u.dst.lastuse = jiffies; dst_hold(&rth->u.dst); rth->u.dst.__use++; read_unlock_bh(&rt_hash_table[hash].lock); *rp = rth; return 0; } } read_unlock_bh(&rt_hash_table[hash].lock); return ip_route_output_slow(rp, key);} #ifdef CONFIG_RTNETLINKstatic int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, int nowait){ struct rtable *rt = (struct rtable*)skb->dst; struct rtmsg *r; struct nlmsghdr *nlh; unsigned char *b = skb->tail; struct rta_cacheinfo ci;#ifdef CONFIG_IP_MROUTE struct rtattr *eptr;#endif nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*r)); r = NLMSG_DATA(nlh); nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0; r->rtm_family = AF_INET; r->rtm_dst_len = 32; r->rtm_src_len = 0; r->rtm_tos = rt->key.tos; r->rtm_table = RT_TABLE_MAIN; r->rtm_type = rt->rt_type; r->rtm_scope = RT_SCOPE_UNIVERSE; r->rtm_protocol = RTPROT_UNSPEC; r->rtm_flags = (rt->rt_flags&~0xFFFF) | RTM_F_CLONED; if (rt->rt_flags & RTCF_NOTIFY) r->rtm_flags |= RTM_F_NOTIFY; RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst); if (rt->key.src) { r->rtm_src_len = 32; RTA_PUT(skb, RTA_SRC, 4, &rt->key.src); } if (rt->u.dst.dev) RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex);#ifdef CONFIG_NET_CLS_ROUTE if (rt->u.dst.tclassid) RTA_PUT(skb, RTA_FLOW, 4, &rt->u.dst.tclassid);#endif if (rt->key.iif) RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst); else if (rt->rt_src != rt->key.src) RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_src); if (rt->rt_dst != rt->rt_gateway) RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway); if (rtnetlink_put_metrics(skb, &rt->u.dst.mxlock) < 0) goto rtattr_failure; ci.rta_lastuse = jiffies - rt->u.dst.lastuse; ci.rta_used = rt->u.dst.__use; ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt); if (rt->u.dst.expires) ci.rta_expires = rt->u.dst.expires - jiffies; else ci.rta_expires = 0; ci.rta_error = rt->u.dst.error; ci.rta_id = 0; ci.rta_ts = 0; ci.rta_tsage = 0; if (rt->peer) { ci.rta_id = rt->peer->ip_id_count; if (rt->peer->tcp_ts_stamp) { ci.rta_ts = rt->peer->tcp_ts; ci.rta_tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp; } }#ifdef CONFIG_IP_MROUTE eptr = (struct rtattr*)skb->tail;#endif RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); if (rt->key.iif) {#ifdef CONFIG_IP_MROUTE u32 dst = rt->rt_dst; if (MULTICAST(dst) && !LOCAL_MCAST(dst) && ipv4_devconf.mc_forwarding) { int err = ipmr_get_route(skb, r, nowait); if (err <= 0) { if (!nowait) { if (err == 0) return 0; goto nlmsg_failure; } else { if (err == -EMSGSIZE) goto nlmsg_failure; ((struct rta_cacheinfo*)RTA_DATA(eptr))->rta_error = err; } } } else#endif { RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->key.iif); } } nlh->nlmsg_len = skb->tail - b; return skb->len;nlmsg_failure:rtattr_failure: skb_trim(skb, b - skb->data); return -1;}int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg){ struct rtattr **rta = arg; struct rtmsg *rtm = NLMSG_DATA(nlh); struct rtable *rt = NULL; u32 dst = 0; u32 src = 0; int iif = 0; int err; struct sk_buff *skb; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (skb == NULL) return -ENOBUFS; /* Reserve room for dummy headers, this skb can pass through good chunk of routing engine. */ skb->mac.raw = skb->data; skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); if (rta[RTA_SRC-1]) memcpy(&src, RTA_DATA(rta[RTA_SRC-1]), 4); if (rta[RTA_DST-1]) memcpy(&dst, RTA_DATA(rta[RTA_DST-1]), 4); if (rta[RTA_IIF-1]) memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); if (iif) { struct net_device *dev; dev = __dev_get_by_index(iif); if (!dev) return -ENODEV; skb->protocol = __constant_htons(ETH_P_IP); skb->dev = dev; local_bh_disable(); err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); local_bh_enable(); rt = (struct rtable*)skb->dst; if (!err && rt->u.dst.error) err = -rt->u.dst.error; } else { int oif = 0; if (rta[RTA_OIF-1]) memcpy(&oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int)); err = ip_route_output(&rt, dst, src, rtm->rtm_tos, oif); } if (err) { kfree_skb(skb); return err; } skb->dst = &rt->u.dst; if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0); if (err == 0) return 0; if (err < 0) return -EMSGSIZE; err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); if (err < 0) return err; return 0;}int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb){ struct rtable *rt; int h, s_h; int idx, s_idx; s_h = cb->args[0]; s_idx = idx = cb->args[1]; for (h=0; h <= rt_hash_mask; h++) { if (h < s_h) continue; if (h > s_h) s_idx = 0; read_lock_bh(&rt_hash_table[h].lock); for (rt = rt_hash_table[h].chain, idx = 0; rt; rt = rt->u.rt_next, idx++) { if (idx < s_idx) continue; skb->dst = dst_clone(&rt->u.dst); if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1) <= 0) { dst_release(xchg(&skb->dst, NULL)); read_unlock_bh(&rt_hash_table[h].lock); goto done; } dst_release(xchg(&skb->dst, NULL)); } read_unlock_bh(&rt_hash_table[h].lock); }done: cb->args[0] = h; cb->args[1] = idx; return skb->len;}#endif /* CONFIG_RTNETLINK */void ip_rt_multicast_event(struct in_device *in_dev){ rt_cache_flush(0);}#ifdef CONFIG_SYSCTLstatic int flush_delay;staticint ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, void *buffer, size_t *lenp){ if (write) { proc_dointvec(ctl, write, filp, buffer, lenp); rt_cache_flush(flush_delay); return 0; } else return -EINVAL;}static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, int *name, int nlen, void *oldval, size_t *oldlenp, void *newval, size_t newlen, void **context){ int delay; if (newlen != sizeof(int)) return -EINVAL; if (get_user(delay,(int *)newval)) return -EFAULT; rt_cache_flush(delay); return 0;}ctl_table ipv4_route_table[] = { {NET_IPV4_ROUTE_FLUSH, "flush", &flush_delay, sizeof(int), 0644, NULL, &ipv4_sysctl_rtcache_flush, &ipv4_sysctl_rtcache_flush_strategy }, {NET_IPV4_ROUTE_MIN_DELAY, "min_delay", &ip_rt_min_delay, sizeof(int), 0644, NULL, &proc_dointvec_jiffies, &sysctl_jiffies}, {NET_IPV4_ROUTE_MAX_DELAY, "max_delay", &ip_rt_max_delay, sizeof(int), 0644, NULL, &proc_dointvec_jiffies, &sysctl_jiffies}, {NET_IPV4_ROUTE_GC_THRESH, "gc_thresh", &ipv4_dst_ops.gc_thresh, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_IPV4_ROUTE_MAX_SIZE, "max_size", &ip_rt_max_size, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_IPV4_ROUTE_GC_MIN_INTERVAL, "gc_min_interval", &ip_rt_gc_min_interval, sizeof(int), 0644, NULL, &proc_dointvec_jiffies, &sysctl_jiffies}, {NET_IPV4_ROUTE_GC_TIMEOUT, "gc_timeout", &ip_rt_gc_timeout, sizeof(int), 0644, NULL, &proc_dointvec_jiffies, &sysctl_jiffies}, {NET_IPV4_ROUTE_GC_INTERVAL, "gc_interval", &ip_rt_gc_interval, sizeof(int), 0644, NULL, &proc_dointvec_jiffies, &sysctl_jiffies}, {NET_IPV4_ROUTE_REDIRECT_LOAD, "redirect_load", &ip_rt_redirect_load, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_IPV4_ROUTE_REDIRECT_NUMBER, "redirect_number", &ip_rt_redirect_number, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_IPV4_ROUTE_REDIRECT_SILENCE, "redirect_silence", &ip_rt_redirect_silence, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_IPV4_ROUTE_ERROR_COST, "error_cost", &ip_rt_error_cost, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_IPV4_ROUTE_ERROR_BURST, "error_burst", &ip_rt_error_burst, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_IPV4_ROUTE_GC_ELASTICITY, "gc_elasticity", &ip_rt_gc_elasticity, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_IPV4_ROUTE_MTU_EXPIRES, "mtu_expires", &ip_rt_mtu_expires, sizeof(int), 0644, NULL, &proc_dointvec_jiffies, &sysctl_jiffies}, {NET_IPV4_ROUTE_MIN_PMTU, "min_pmtu", &ip_rt_min_pmtu, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_IPV4_ROUTE_MIN_ADVMSS, "min_adv_mss", &ip_rt_min_advmss, sizeof(int), 0644, NULL, &proc_dointvec}, {0}};#endif#ifdef CONFIG_NET_CLS_ROUTEstruct ip_rt_acct *ip_rt_acct;static int ip_rt_acct_read(char *buffer, char **start, off_t offset, int length, int *eof, void *data){ *start=buffer; if ((offset&3) || (length&3)) return -EIO; if (offset + length >= sizeof(struct ip_rt_acct)*256) { length = sizeof(struct ip_rt_acct)*256 - offset; *eof = 1; } if (length > 0) { u32 *dst = (u32*)buffer; u32 *src = (u32*)(((u8*)ip_rt_acct) + offset); memcpy(dst, src, length);#ifdef CONFIG_SMP if (smp_num_cpus > 1 || cpu_logical_map(0) != 0) { int i; int cnt = length/4; for (i=0; i<smp_num_cpus; i++) { int cpu = cpu_logical_map(i); int k; if (cpu == 0) continue; src = (u32*)(((u8*)ip_rt_acct) + offset + cpu*256*sizeof(struct ip_rt_acct)); for (k=0; k<cnt; k++) dst[k] += src[k]; } }#endif return length; } return 0;}#endifvoid __init ip_rt_init(void){ int i, order, goal;#ifdef CONFIG_NET_CLS_ROUTE for (order=0; (PAGE_SIZE<<order) < 256*sizeof(ip_rt_acct)*NR_CPUS; order++) /* NOTHING */; ip_rt_acct = (struct ip_rt_acct *)__get_free_pages(GFP_KERNEL, order); if (!ip_rt_acct) panic("IP: failed to allocate ip_rt_acct\n"); memset(ip_rt_acct, 0, PAGE_SIZE<<order);#endif ipv4_dst_ops.kmem_cachep = kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if (!ipv4_dst_ops.kmem_cachep) panic("IP: failed to allocate ip_dst_cache\n"); goal = num_physpages >> (26 - PAGE_SHIFT); for (order = 0; (1UL << order) < goal; order++) /* NOTHING */; do { rt_hash_mask = (1UL << order) * PAGE_SIZE / sizeof(struct rt_hash_bucket); while (rt_hash_mask & (rt_hash_mask-1)) rt_hash_mask--; rt_hash_table = (struct rt_hash_bucket *) __get_free_pages(GFP_ATOMIC, order); } while (rt_hash_table == NULL && --order > 0); if (!rt_hash_table) panic("Failed to allocate IP route cache hash table\n"); printk("IP: routing cache hash table of %u buckets, %ldKbytes\n", rt_hash_mask, (long) (rt_hash_mask*sizeof(struct rt_hash_bucket))/1024); for (rt_hash_log=0; (1<<rt_hash_log) != rt_hash_mask; rt_hash_log++) /* NOTHING */; rt_hash_mask--; for (i = 0; i <= rt_hash_mask; i++) { rt_hash_table[i].lock = RW_LOCK_UNLOCKED; rt_hash_table[i].chain = NULL; } ipv4_dst_ops.gc_thresh = (rt_hash_mask+1); ip_rt_max_size = (rt_hash_mask+1)*16; devinet_init(); ip_fib_init(); rt_flush_timer.function = rt_run_flush; rt_periodic_timer.function = rt_check_expire; /* All the timers, started at system startup tend to synchronize. Perturb it a bit. */ rt_periodic_timer.expires = jiffies + net_random()%ip_rt_gc_interval + ip_rt_gc_interval; add_timer(&rt_periodic_timer); proc_net_create ("rt_cache", 0, rt_cache_get_info);#ifdef CONFIG_NET_CLS_ROUTE create_proc_read_entry("net/rt_acct", 0, 0, ip_rt_acct_read, NULL);#endif}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -