📄 route.c
字号:
addresses. When oif is specified, routing tables are looked up with only one purpose: to catch if destination is gatewayed, rather than direct. Moreover, if MSG_DONTROUTE is set, we send packet, ignoring both routing tables and ifaddr state. --ANK We could make it even if oif is unknown, likely IPv6, but we do not. */ if (key.src == 0) key.src = inet_select_addr(dev_out, 0, RT_SCOPE_LINK); res.type = RTN_UNICAST; goto make_route; } return -ENETUNREACH; } if (res.type == RTN_NAT) return -EINVAL; if (res.type == RTN_LOCAL) { if (!key.src) key.src = key.dst; dev_out = &loopback_dev; key.oif = dev_out->ifindex; res.fi = NULL; flags |= RTCF_LOCAL; goto make_route; }#ifdef CONFIG_IP_ROUTE_MULTIPATH if (res.fi->fib_nhs > 1 && key.oif == 0) fib_select_multipath(&key, &res); else#endif if (res.prefixlen==0 && res.type == RTN_UNICAST && key.oif == 0) fib_select_default(&key, &res); if (!key.src) key.src = FIB_RES_PREFSRC(res); dev_out = FIB_RES_DEV(res); key.oif = dev_out->ifindex;make_route: if (LOOPBACK(key.src) && !(dev_out->flags&IFF_LOOPBACK)) return -EINVAL; if (key.dst == 0xFFFFFFFF) res.type = RTN_BROADCAST; else if (MULTICAST(key.dst)) res.type = RTN_MULTICAST; else if (BADCLASS(key.dst) || ZERONET(key.dst)) return -EINVAL; if (dev_out->flags&IFF_LOOPBACK) flags |= RTCF_LOCAL; if (res.type == RTN_BROADCAST) { flags |= RTCF_BROADCAST|RTCF_LOCAL; res.fi = NULL; } else if (res.type == RTN_MULTICAST) { flags |= RTCF_MULTICAST|RTCF_LOCAL; if (!ip_check_mc(dev_out, daddr)) flags &= ~RTCF_LOCAL; /* If multicast route do not exist use default one, but do not gateway in this case. Yes, it is hack. */ if (res.fi && res.prefixlen < 4) res.fi = NULL; } rth = dst_alloc(sizeof(struct rtable), &ipv4_dst_ops); if (!rth) return -ENOBUFS; atomic_set(&rth->u.dst.use, 1); rth->key.dst = daddr; rth->key.tos = tos; rth->key.src = saddr; rth->key.iif = 0; rth->key.oif = oif; rth->rt_dst = key.dst; rth->rt_src = key.src;#ifdef CONFIG_IP_ROUTE_NAT rth->rt_dst_map = key.dst; rth->rt_src_map = key.src;#endif rth->rt_iif = oif ? : dev_out->ifindex; rth->u.dst.dev = dev_out; rth->rt_gateway = key.dst; rth->rt_spec_dst= key.src; rth->u.dst.output=ip_output; if (flags&RTCF_LOCAL) { rth->u.dst.input = ip_local_deliver; rth->rt_spec_dst = key.dst; } if (flags&(RTCF_BROADCAST|RTCF_MULTICAST)) { rth->rt_spec_dst = key.src; if (flags&RTCF_LOCAL && !(dev_out->flags&IFF_LOOPBACK)) rth->u.dst.output = ip_mc_output;#ifdef CONFIG_IP_MROUTE if (res.type == RTN_MULTICAST && dev_out->ip_ptr) { struct in_device *in_dev = dev_out->ip_ptr; if (IN_DEV_MFORWARD(in_dev) && !LOCAL_MCAST(daddr)) { rth->u.dst.input = ip_mr_input; rth->u.dst.output = ip_mc_output; } }#endif } rt_set_nexthop(rth, &res, 0); rth->rt_flags = flags; hash = rt_hash_code(daddr, saddr^(oif<<5), tos); return rt_intern_hash(hash, rth, rp);}int ip_route_output(struct rtable **rp, u32 daddr, u32 saddr, u32 tos, int oif){ unsigned hash; struct rtable *rth; hash = rt_hash_code(daddr, saddr^(oif<<5), tos); start_bh_atomic(); for (rth=rt_hash_table[hash]; rth; rth=rth->u.rt_next) { if (rth->key.dst == daddr && rth->key.src == saddr && rth->key.iif == 0 && rth->key.oif == oif &&#ifndef CONFIG_IP_TRANSPARENT_PROXY rth->key.tos == tos#else !((rth->key.tos^tos)&(IPTOS_TOS_MASK|RTO_ONLINK)) && ((tos&RTO_TPROXY) || !(rth->rt_flags&RTCF_TPROXY))#endif ) { rth->u.dst.lastuse = jiffies; atomic_inc(&rth->u.dst.use); atomic_inc(&rth->u.dst.refcnt); end_bh_atomic(); *rp = rth; return 0; } } end_bh_atomic(); return ip_route_output_slow(rp, daddr, saddr, tos, oif);}#ifdef CONFIG_RTNETLINKstatic int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, int nowait){ struct rtable *rt = (struct rtable*)skb->dst; struct rtmsg *r; struct nlmsghdr *nlh; unsigned char *b = skb->tail; struct rta_cacheinfo ci;#ifdef CONFIG_IP_MROUTE struct rtattr *eptr;#endif struct rtattr *mx; nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*r)); r = NLMSG_DATA(nlh); nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0; r->rtm_family = AF_INET; r->rtm_dst_len = 32; r->rtm_src_len = 0; r->rtm_tos = rt->key.tos; r->rtm_table = RT_TABLE_MAIN; r->rtm_type = rt->rt_type; r->rtm_scope = RT_SCOPE_UNIVERSE; r->rtm_protocol = RTPROT_UNSPEC; r->rtm_flags = (rt->rt_flags&~0xFFFF) | RTM_F_CLONED; if (rt->rt_flags & RTCF_NOTIFY) r->rtm_flags |= RTM_F_NOTIFY; RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst); if (rt->key.src) { r->rtm_src_len = 32; RTA_PUT(skb, RTA_SRC, 4, &rt->key.src); } if (rt->u.dst.dev) RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex);#ifdef CONFIG_NET_CLS_ROUTE if (rt->u.dst.tclassid) RTA_PUT(skb, RTA_FLOW, 4, &rt->u.dst.tclassid);#endif if (rt->key.iif) RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst); else if (rt->rt_src != rt->key.src) RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_src); if (rt->rt_dst != rt->rt_gateway) RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway); mx = (struct rtattr*)skb->tail; RTA_PUT(skb, RTA_METRICS, 0, NULL); if (rt->u.dst.mxlock) RTA_PUT(skb, RTAX_LOCK, sizeof(unsigned), &rt->u.dst.mxlock); if (rt->u.dst.pmtu) RTA_PUT(skb, RTAX_MTU, sizeof(unsigned), &rt->u.dst.pmtu); if (rt->u.dst.window) RTA_PUT(skb, RTAX_WINDOW, sizeof(unsigned), &rt->u.dst.window); if (rt->u.dst.rtt) RTA_PUT(skb, RTAX_RTT, sizeof(unsigned), &rt->u.dst.rtt); mx->rta_len = skb->tail - (u8*)mx; if (mx->rta_len == RTA_LENGTH(0)) skb_trim(skb, (u8*)mx - skb->data); ci.rta_lastuse = jiffies - rt->u.dst.lastuse; ci.rta_used = atomic_read(&rt->u.dst.refcnt); ci.rta_clntref = atomic_read(&rt->u.dst.use); if (rt->u.dst.expires) ci.rta_expires = rt->u.dst.expires - jiffies; else ci.rta_expires = 0; ci.rta_error = rt->u.dst.error;#ifdef CONFIG_IP_MROUTE eptr = (struct rtattr*)skb->tail;#endif RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); if (rt->key.iif) {#ifdef CONFIG_IP_MROUTE u32 dst = rt->rt_dst; if (MULTICAST(dst) && !LOCAL_MCAST(dst) && ipv4_devconf.mc_forwarding) { int err = ipmr_get_route(skb, r, nowait); if (err <= 0) { if (!nowait) { if (err == 0) return 0; goto nlmsg_failure; } else { if (err == -EMSGSIZE) goto nlmsg_failure; ((struct rta_cacheinfo*)RTA_DATA(eptr))->rta_error = err; } } } else#endif { RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->key.iif); } } nlh->nlmsg_len = skb->tail - b; return skb->len;nlmsg_failure:rtattr_failure: skb_trim(skb, b - skb->data); return -1;}int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg){ struct rtattr **rta = arg; struct rtmsg *rtm = NLMSG_DATA(nlh); struct rtable *rt = NULL; u32 dst = 0; u32 src = 0; int iif = 0; int err; struct sk_buff *skb; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (skb == NULL) return -ENOBUFS; /* Reserve room for dummy headers, this skb can pass through good chunk of routing engine. */ skb->mac.raw = skb->data; skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); if (rta[RTA_SRC-1]) memcpy(&src, RTA_DATA(rta[RTA_SRC-1]), 4); if (rta[RTA_DST-1]) memcpy(&dst, RTA_DATA(rta[RTA_DST-1]), 4); if (rta[RTA_IIF-1]) memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); if (iif) { struct device *dev; dev = dev_get_by_index(iif); if (!dev) return -ENODEV; skb->protocol = __constant_htons(ETH_P_IP); skb->dev = dev; start_bh_atomic(); err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); end_bh_atomic(); rt = (struct rtable*)skb->dst; if (!err && rt->u.dst.error) err = -rt->u.dst.error; } else { int oif = 0; if (rta[RTA_OIF-1]) memcpy(&oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int)); err = ip_route_output(&rt, dst, src, rtm->rtm_tos, oif); } if (err) { kfree_skb(skb); return err; } skb->dst = &rt->u.dst; if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0); if (err == 0) return 0; if (err < 0) return -EMSGSIZE; err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); if (err < 0) return err; return 0;}int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb){ struct rtable *rt; int h, s_h; int idx, s_idx; s_h = cb->args[0]; s_idx = idx = cb->args[1]; for (h=0; h < RT_HASH_DIVISOR; h++) { if (h < s_h) continue; if (h > s_h) s_idx = 0; start_bh_atomic(); for (rt = rt_hash_table[h], idx = 0; rt; rt = rt->u.rt_next, idx++) { if (idx < s_idx) continue; skb->dst = dst_clone(&rt->u.dst); if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1) <= 0) { dst_release(xchg(&skb->dst, NULL)); end_bh_atomic(); goto done; } dst_release(xchg(&skb->dst, NULL)); } end_bh_atomic(); }done: cb->args[0] = h; cb->args[1] = idx; return skb->len;}#endif /* CONFIG_RTNETLINK */void ip_rt_multicast_event(struct in_device *in_dev){ rt_cache_flush(0);}#ifdef CONFIG_SYSCTLstatic int flush_delay;staticint ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, void *buffer, size_t *lenp){ if (write) { proc_dointvec(ctl, write, filp, buffer, lenp); rt_cache_flush(flush_delay); return 0; } else return -EINVAL;}static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, int *name, int nlen, void *oldval, size_t *oldlenp, void *newval, size_t newlen, void **context){ int delay; if (newlen != sizeof(int)) return -EINVAL; if (get_user(delay,(int *)newval)) return -EFAULT; rt_cache_flush(delay); return 0;}ctl_table ipv4_route_table[] = { {NET_IPV4_ROUTE_FLUSH, "flush", &flush_delay, sizeof(int), 0644, NULL, &ipv4_sysctl_rtcache_flush, &ipv4_sysctl_rtcache_flush_strategy }, {NET_IPV4_ROUTE_MIN_DELAY, "min_delay", &ip_rt_min_delay, sizeof(int), 0644, NULL, &proc_dointvec_jiffies, &sysctl_jiffies}, {NET_IPV4_ROUTE_MAX_DELAY, "max_delay", &ip_rt_max_delay, sizeof(int), 0644, NULL, &proc_dointvec_jiffies, &sysctl_jiffies}, {NET_IPV4_ROUTE_GC_THRESH, "gc_thresh", &ipv4_dst_ops.gc_thresh, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_IPV4_ROUTE_MAX_SIZE, "max_size", &ip_rt_max_size, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_IPV4_ROUTE_GC_MIN_INTERVAL, "gc_min_interval", &ip_rt_gc_min_interval, sizeof(int), 0644, NULL, &proc_dointvec_jiffies, &sysctl_jiffies}, {NET_IPV4_ROUTE_GC_TIMEOUT, "gc_timeout", &ip_rt_gc_timeout, sizeof(int), 0644, NULL, &proc_dointvec_jiffies, &sysctl_jiffies}, {NET_IPV4_ROUTE_GC_INTERVAL, "gc_interval", &ip_rt_gc_interval, sizeof(int), 0644, NULL, &proc_dointvec_jiffies, &sysctl_jiffies}, {NET_IPV4_ROUTE_REDIRECT_LOAD, "redirect_load", &ip_rt_redirect_load, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_IPV4_ROUTE_REDIRECT_NUMBER, "redirect_number", &ip_rt_redirect_number, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_IPV4_ROUTE_REDIRECT_SILENCE, "redirect_silence", &ip_rt_redirect_silence, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_IPV4_ROUTE_ERROR_COST, "error_cost", &ip_rt_error_cost, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_IPV4_ROUTE_ERROR_BURST, "error_burst", &ip_rt_error_burst, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_IPV4_ROUTE_GC_ELASTICITY, "gc_elasticity", &ip_rt_gc_elasticity, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_IPV4_ROUTE_MTU_EXPIRES, "mtu_expires", &ip_rt_mtu_expires, sizeof(int), 0644, NULL, &proc_dointvec_jiffies, &sysctl_jiffies}, {0}};#endif#ifdef CONFIG_NET_CLS_ROUTEstruct ip_rt_acct ip_rt_acct[256];#ifdef CONFIG_PROC_FSstatic int ip_rt_acct_read(char *buffer, char **start, off_t offset, int length, int *eof, void *data){ *start=buffer; if (offset + length > sizeof(ip_rt_acct)) { length = sizeof(ip_rt_acct) - offset; *eof = 1; } if (length > 0) { start_bh_atomic(); memcpy(buffer, ((u8*)&ip_rt_acct)+offset, length); end_bh_atomic(); return length; } return 0;}#endif#endif__initfunc(void ip_rt_init(void)){#ifdef CONFIG_PROC_FS#ifdef CONFIG_NET_CLS_ROUTE struct proc_dir_entry *ent;#endif#endif devinet_init(); ip_fib_init(); rt_periodic_timer.function = rt_check_expire; /* All the timers, started at system startup tend to synchronize. Perturb it a bit. */ rt_periodic_timer.expires = jiffies + net_random()%ip_rt_gc_interval + ip_rt_gc_interval; add_timer(&rt_periodic_timer);#ifdef CONFIG_PROC_FS proc_net_register(&(struct proc_dir_entry) { PROC_NET_RTCACHE, 8, "rt_cache", S_IFREG | S_IRUGO, 1, 0, 0, 0, &proc_net_inode_operations, rt_cache_get_info });#ifdef CONFIG_NET_CLS_ROUTE ent = create_proc_entry("net/rt_acct", 0, 0); ent->read_proc = ip_rt_acct_read;#endif#endif}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -