📄 route.c
字号:
#ifdef CONFIG_IP_MROUTE if (res->type == RTN_MULTICAST) { if (IN_DEV_MFORWARD(in_dev) && !LOCAL_MCAST(oldflp->fl4_dst)) { rth->u.dst.input = ip_mr_input; rth->u.dst.output = ip_mc_output; } }#endif } rt_set_nexthop(rth, res, 0); rth->rt_flags = flags; *result = rth; cleanup: /* release work reference to inet device */ in_dev_put(in_dev); return err;}static inline int ip_mkroute_output(struct rtable **rp, struct fib_result* res, const struct flowi *fl, const struct flowi *oldflp, struct net_device *dev_out, unsigned flags){ struct rtable *rth = NULL; int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); unsigned hash; if (err == 0) { hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif); err = rt_intern_hash(hash, rth, rp); } return err;}/* * Major route resolver routine. */static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp){ u32 tos = RT_FL_TOS(oldflp); struct flowi fl = { .nl_u = { .ip4_u = { .daddr = oldflp->fl4_dst, .saddr = oldflp->fl4_src, .tos = tos & IPTOS_RT_MASK, .scope = ((tos & RTO_ONLINK) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE), } }, .mark = oldflp->mark, .iif = init_net.loopback_dev->ifindex, .oif = oldflp->oif }; struct fib_result res; unsigned flags = 0; struct net_device *dev_out = NULL; int free_res = 0; int err; res.fi = NULL;#ifdef CONFIG_IP_MULTIPLE_TABLES res.r = NULL;#endif if (oldflp->fl4_src) { err = -EINVAL; if (MULTICAST(oldflp->fl4_src) || BADCLASS(oldflp->fl4_src) || ZERONET(oldflp->fl4_src)) goto out; /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ dev_out = ip_dev_find(oldflp->fl4_src); if (dev_out == NULL) goto out; /* I removed check for oif == dev_out->oif here. It was wrong for two reasons: 1. ip_dev_find(saddr) can return wrong iface, if saddr is assigned to multiple interfaces. 2. Moreover, we are allowed to send packets with saddr of another iface. --ANK */ if (oldflp->oif == 0 && (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF))) { /* Special hack: user can direct multicasts and limited broadcast via necessary interface without fiddling with IP_MULTICAST_IF or IP_PKTINFO. This hack is not just for fun, it allows vic,vat and friends to work. They bind socket to loopback, set ttl to zero and expect that it will work. From the viewpoint of routing cache they are broken, because we are not allowed to build multicast path with loopback source addr (look, routing cache cannot know, that ttl is zero, so that packet will not leave this host and route is valid). Luckily, this hack is good workaround. */ fl.oif = dev_out->ifindex; goto make_route; } if (dev_out) dev_put(dev_out); dev_out = NULL; } if (oldflp->oif) { dev_out = dev_get_by_index(&init_net, oldflp->oif); err = -ENODEV; if (dev_out == NULL) goto out; /* RACE: Check return value of inet_select_addr instead. */ if (__in_dev_get_rtnl(dev_out) == NULL) { dev_put(dev_out); goto out; /* Wrong error code */ } if (LOCAL_MCAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF)) { if (!fl.fl4_src) fl.fl4_src = inet_select_addr(dev_out, 0, RT_SCOPE_LINK); goto make_route; } if (!fl.fl4_src) { if (MULTICAST(oldflp->fl4_dst)) fl.fl4_src = inet_select_addr(dev_out, 0, fl.fl4_scope); else if (!oldflp->fl4_dst) fl.fl4_src = inet_select_addr(dev_out, 0, RT_SCOPE_HOST); } } if (!fl.fl4_dst) { fl.fl4_dst = fl.fl4_src; if (!fl.fl4_dst) fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); if (dev_out) dev_put(dev_out); dev_out = init_net.loopback_dev; dev_hold(dev_out); fl.oif = init_net.loopback_dev->ifindex; res.type = RTN_LOCAL; flags |= RTCF_LOCAL; goto make_route; } if (fib_lookup(&fl, &res)) { res.fi = NULL; if (oldflp->oif) { /* Apparently, routing tables are wrong. Assume, that the destination is on link. WHY? DW. Because we are allowed to send to iface even if it has NO routes and NO assigned addresses. When oif is specified, routing tables are looked up with only one purpose: to catch if destination is gatewayed, rather than direct. Moreover, if MSG_DONTROUTE is set, we send packet, ignoring both routing tables and ifaddr state. --ANK We could make it even if oif is unknown, likely IPv6, but we do not. */ if (fl.fl4_src == 0) fl.fl4_src = inet_select_addr(dev_out, 0, RT_SCOPE_LINK); res.type = RTN_UNICAST; goto make_route; } if (dev_out) dev_put(dev_out); err = -ENETUNREACH; goto out; } free_res = 1; if (res.type == RTN_LOCAL) { if (!fl.fl4_src) fl.fl4_src = fl.fl4_dst; if (dev_out) dev_put(dev_out); dev_out = init_net.loopback_dev; dev_hold(dev_out); fl.oif = dev_out->ifindex; if (res.fi) fib_info_put(res.fi); res.fi = NULL; flags |= RTCF_LOCAL; goto make_route; }#ifdef CONFIG_IP_ROUTE_MULTIPATH if (res.fi->fib_nhs > 1 && fl.oif == 0) fib_select_multipath(&fl, &res); else#endif if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif) fib_select_default(&fl, &res); if (!fl.fl4_src) fl.fl4_src = FIB_RES_PREFSRC(res); if (dev_out) dev_put(dev_out); dev_out = FIB_RES_DEV(res); dev_hold(dev_out); fl.oif = dev_out->ifindex;make_route: err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); if (free_res) fib_res_put(&res); if (dev_out) dev_put(dev_out);out: return err;}int __ip_route_output_key(struct rtable **rp, const struct flowi *flp){ unsigned hash; struct rtable *rth; hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif); rcu_read_lock_bh(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; rth = rcu_dereference(rth->u.dst.rt_next)) { if (rth->fl.fl4_dst == flp->fl4_dst && rth->fl.fl4_src == flp->fl4_src && rth->fl.iif == 0 && rth->fl.oif == flp->oif && rth->fl.mark == flp->mark && !((rth->fl.fl4_tos ^ flp->fl4_tos) & (IPTOS_RT_MASK | RTO_ONLINK))) { dst_use(&rth->u.dst, jiffies); RT_CACHE_STAT_INC(out_hit); rcu_read_unlock_bh(); *rp = rth; return 0; } RT_CACHE_STAT_INC(out_hlist_search); } rcu_read_unlock_bh(); return ip_route_output_slow(rp, flp);}EXPORT_SYMBOL_GPL(__ip_route_output_key);static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu){}static struct dst_ops ipv4_dst_blackhole_ops = { .family = AF_INET, .protocol = __constant_htons(ETH_P_IP), .destroy = ipv4_dst_destroy, .check = ipv4_dst_check, .update_pmtu = ipv4_rt_blackhole_update_pmtu, .entry_size = sizeof(struct rtable),};static int ipv4_blackhole_output(struct sk_buff *skb){ kfree_skb(skb); return 0;}static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock *sk){ struct rtable *ort = *rp; struct rtable *rt = (struct rtable *) dst_alloc(&ipv4_dst_blackhole_ops); if (rt) { struct dst_entry *new = &rt->u.dst; atomic_set(&new->__refcnt, 1); new->__use = 1; new->input = ipv4_blackhole_output; new->output = ipv4_blackhole_output; memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); new->dev = ort->u.dst.dev; if (new->dev) dev_hold(new->dev); rt->fl = ort->fl; rt->idev = ort->idev; if (rt->idev) in_dev_hold(rt->idev); rt->rt_flags = ort->rt_flags; rt->rt_type = ort->rt_type; rt->rt_dst = ort->rt_dst; rt->rt_src = ort->rt_src; rt->rt_iif = ort->rt_iif; rt->rt_gateway = ort->rt_gateway; rt->rt_spec_dst = ort->rt_spec_dst; rt->peer = ort->peer; if (rt->peer) atomic_inc(&rt->peer->refcnt); dst_free(new); } dst_release(&(*rp)->u.dst); *rp = rt; return (rt ? 0 : -ENOMEM);}int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags){ int err; if ((err = __ip_route_output_key(rp, flp)) != 0) return err; if (flp->proto) { if (!flp->fl4_src) flp->fl4_src = (*rp)->rt_src; if (!flp->fl4_dst) flp->fl4_dst = (*rp)->rt_dst; err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, flags); if (err == -EREMOTE) err = ipv4_dst_blackhole(rp, flp, sk); return err; } return 0;}EXPORT_SYMBOL_GPL(ip_route_output_flow);int ip_route_output_key(struct rtable **rp, struct flowi *flp){ return ip_route_output_flow(rp, flp, NULL, 0);}static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, int nowait, unsigned int flags){ struct rtable *rt = (struct rtable*)skb->dst; struct rtmsg *r; struct nlmsghdr *nlh; long expires; u32 id = 0, ts = 0, tsage = 0, error; nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); if (nlh == NULL) return -EMSGSIZE; r = nlmsg_data(nlh); r->rtm_family = AF_INET; r->rtm_dst_len = 32; r->rtm_src_len = 0; r->rtm_tos = rt->fl.fl4_tos; r->rtm_table = RT_TABLE_MAIN; NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); r->rtm_type = rt->rt_type; r->rtm_scope = RT_SCOPE_UNIVERSE; r->rtm_protocol = RTPROT_UNSPEC; r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; if (rt->rt_flags & RTCF_NOTIFY) r->rtm_flags |= RTM_F_NOTIFY; NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst); if (rt->fl.fl4_src) { r->rtm_src_len = 32; NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); } if (rt->u.dst.dev) NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex);#ifdef CONFIG_NET_CLS_ROUTE if (rt->u.dst.tclassid) NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid);#endif if (rt->fl.iif) NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); else if (rt->rt_src != rt->fl.fl4_src) NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); if (rt->rt_dst != rt->rt_gateway) NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) goto nla_put_failure; error = rt->u.dst.error; expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0; if (rt->peer) { id = rt->peer->ip_id_count; if (rt->peer->tcp_ts_stamp) { ts = rt->peer->tcp_ts; tsage = get_seconds() - rt->peer->tcp_ts_stamp; } } if (rt->fl.iif) {#ifdef CONFIG_IP_MROUTE __be32 dst = rt->rt_dst; if (MULTICAST(dst) && !LOCAL_MCAST(dst) && IPV4_DEVCONF_ALL(MC_FORWARDING)) { int err = ipmr_get_route(skb, r, nowait); if (err <= 0) { if (!nowait) { if (err == 0) return 0; goto nla_put_failure; } else { if (err == -EMSGSIZE) goto nla_put_failure; error = err; } } } else#endif NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); } if (rtnl_put_cacheinfo(skb, &rt->u.dst, id, ts, tsage, expires, error) < 0) goto nla_put_failure; return nlmsg_end(skb, nlh);nla_put_failure: nlmsg_cancel(skb, nlh); return -EMSGSIZE;}static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg){ struct rtmsg *rtm; struct nlattr *tb[RTA_MAX+1]; struct rtable *rt = NULL; __be32 dst = 0; __be32 src = 0; u32 iif; int err; struct sk_buff *skb; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); if (err < 0) goto errout; rtm = nlmsg_data(nlh); skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (skb == NULL) { err = -ENOBUFS; goto errout; } /* Reserve room for dummy headers, this skb can pass through good chunk of routing engine. */ skb_reset_mac_header(skb); skb_reset_network_header(skb); /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */ ip_hdr(skb)->protocol = IPPROTO_ICMP; skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0; dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0; iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; if (iif) { struct net_device *dev; dev = __dev_get_by_index(&init_net, iif); if (dev == NULL) { err = -ENODEV; goto errout_free; } skb->protocol = htons(ETH_P_IP); skb->dev = dev; local_bh_disable(); err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); local_bh_enable(); rt = (struct rtable*) skb->dst; if (err == 0 && rt->u.dst.error) err = -rt->u.dst.error; } else { struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dst, .saddr = src, .tos = rtm->rtm_tos, }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -