📄 fib_trie.c
字号:
err = -ENOBUFS; new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); if (new_fa == NULL) goto out; new_fa->fa_info = fi; new_fa->fa_tos = tos; new_fa->fa_type = cfg->fc_type; new_fa->fa_scope = cfg->fc_scope; new_fa->fa_state = 0; /* * Insert new entry to the list. */ if (!fa_head) { err = 0; fa_head = fib_insert_node(t, &err, key, plen); if (err) goto out_free_new_fa; } list_add_tail_rcu(&new_fa->fa_list, (fa ? &fa->fa_list : fa_head)); rt_cache_flush(-1); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, 0);succeeded: return 0;out_free_new_fa: kmem_cache_free(fn_alias_kmem, new_fa);out: fib_release_info(fi);err: return err;}/* should be called with rcu_read_lock */static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *plen, const struct flowi *flp, struct fib_result *res){ int err, i; __be32 mask; struct leaf_info *li; struct hlist_head *hhead = &l->list; struct hlist_node *node; hlist_for_each_entry_rcu(li, node, hhead, hlist) { i = li->plen; mask = inet_make_mask(i); if (l->key != (key & ntohl(mask))) continue; if ((err = fib_semantic_match(&li->falh, flp, res, htonl(l->key), mask, i)) <= 0) { *plen = i;#ifdef CONFIG_IP_FIB_TRIE_STATS t->stats.semantic_match_passed++;#endif return err; }#ifdef CONFIG_IP_FIB_TRIE_STATS t->stats.semantic_match_miss++;#endif } return 1;}static intfn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res){ struct trie *t = (struct trie *) tb->tb_data; int plen, ret = 0; struct node *n; struct tnode *pn; int pos, bits; t_key key = ntohl(flp->fl4_dst); int chopped_off; t_key cindex = 0; int current_prefix_length = KEYLENGTH; struct tnode *cn; t_key node_prefix, key_prefix, pref_mismatch; int mp; rcu_read_lock(); n = rcu_dereference(t->trie); if (!n) goto failed;#ifdef CONFIG_IP_FIB_TRIE_STATS t->stats.gets++;#endif /* Just a leaf? */ if (IS_LEAF(n)) { if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0) goto found; goto failed; } pn = (struct tnode *) n; chopped_off = 0; while (pn) { pos = pn->pos; bits = pn->bits; if (!chopped_off) cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length), pos, bits); n = tnode_get_child(pn, cindex); if (n == NULL) {#ifdef CONFIG_IP_FIB_TRIE_STATS t->stats.null_node_hit++;#endif goto backtrace; } if (IS_LEAF(n)) { if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0) goto found; else goto backtrace; }#define HL_OPTIMIZE#ifdef HL_OPTIMIZE cn = (struct tnode *)n; /* * It's a tnode, and we can do some extra checks here if we * like, to avoid descending into a dead-end branch. * This tnode is in the parent's child array at index * key[p_pos..p_pos+p_bits] but potentially with some bits * chopped off, so in reality the index may be just a * subprefix, padded with zero at the end. * We can also take a look at any skipped bits in this * tnode - everything up to p_pos is supposed to be ok, * and the non-chopped bits of the index (se previous * paragraph) are also guaranteed ok, but the rest is * considered unknown. * * The skipped bits are key[pos+bits..cn->pos]. */ /* If current_prefix_length < pos+bits, we are already doing * actual prefix matching, which means everything from * pos+(bits-chopped_off) onward must be zero along some * branch of this subtree - otherwise there is *no* valid * prefix present. Here we can only check the skipped * bits. Remember, since we have already indexed into the * parent's child array, we know that the bits we chopped of * *are* zero. */ /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */ if (current_prefix_length < pos+bits) { if (tkey_extract_bits(cn->key, current_prefix_length, cn->pos - current_prefix_length) != 0 || !(cn->child[0])) goto backtrace; } /* * If chopped_off=0, the index is fully validated and we * only need to look at the skipped bits for this, the new, * tnode. What we actually want to do is to find out if * these skipped bits match our key perfectly, or if we will * have to count on finding a matching prefix further down, * because if we do, we would like to have some way of * verifying the existence of such a prefix at this point. */ /* The only thing we can do at this point is to verify that * any such matching prefix can indeed be a prefix to our * key, and if the bits in the node we are inspecting that * do not match our key are not ZERO, this cannot be true. * Thus, find out where there is a mismatch (before cn->pos) * and verify that all the mismatching bits are zero in the * new tnode's key. */ /* Note: We aren't very concerned about the piece of the key * that precede pn->pos+pn->bits, since these have already been * checked. The bits after cn->pos aren't checked since these are * by definition "unknown" at this point. Thus, what we want to * see is if we are about to enter the "prefix matching" state, * and in that case verify that the skipped bits that will prevail * throughout this subtree are zero, as they have to be if we are * to find a matching prefix. */ node_prefix = mask_pfx(cn->key, cn->pos); key_prefix = mask_pfx(key, cn->pos); pref_mismatch = key_prefix^node_prefix; mp = 0; /* In short: If skipped bits in this node do not match the search * key, enter the "prefix matching" state.directly. */ if (pref_mismatch) { while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) { mp++; pref_mismatch = pref_mismatch <<1; } key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp); if (key_prefix != 0) goto backtrace; if (current_prefix_length >= cn->pos) current_prefix_length = mp; }#endif pn = (struct tnode *)n; /* Descend */ chopped_off = 0; continue;backtrace: chopped_off++; /* As zero don't change the child key (cindex) */ while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1)))) chopped_off++; /* Decrease current_... with bits chopped off */ if (current_prefix_length > pn->pos + pn->bits - chopped_off) current_prefix_length = pn->pos + pn->bits - chopped_off; /* * Either we do the actual chop off according or if we have * chopped off all bits in this tnode walk up to our parent. */ if (chopped_off <= pn->bits) { cindex &= ~(1 << (chopped_off-1)); } else { struct tnode *parent = node_parent((struct node *) pn); if (!parent) goto failed; /* Get Child's index */ cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits); pn = parent; chopped_off = 0;#ifdef CONFIG_IP_FIB_TRIE_STATS t->stats.backtrack++;#endif goto backtrace; } }failed: ret = 1;found: rcu_read_unlock(); return ret;}/* only called from updater side */static int trie_leaf_remove(struct trie *t, t_key key){ t_key cindex; struct tnode *tp = NULL; struct node *n = t->trie; struct leaf *l; pr_debug("entering trie_leaf_remove(%p)\n", n); /* Note that in the case skipped bits, those bits are *not* checked! * When we finish this, we will have NULL or a T_LEAF, and the * T_LEAF may or may not match our key. */ while (n != NULL && IS_TNODE(n)) { struct tnode *tn = (struct tnode *) n; check_tnode(tn); n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits)); BUG_ON(n && node_parent(n) != tn); } l = (struct leaf *) n; if (!n || !tkey_equals(l->key, key)) return 0; /* * Key found. * Remove the leaf and rebalance the tree */ t->revision++; t->size--; tp = node_parent(n); tnode_free((struct tnode *) n); if (tp) { cindex = tkey_extract_bits(key, tp->pos, tp->bits); put_child(t, (struct tnode *)tp, cindex, NULL); rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); } else rcu_assign_pointer(t->trie, NULL); return 1;}/* * Caller must hold RTNL. */static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg){ struct trie *t = (struct trie *) tb->tb_data; u32 key, mask; int plen = cfg->fc_dst_len; u8 tos = cfg->fc_tos; struct fib_alias *fa, *fa_to_delete; struct list_head *fa_head; struct leaf *l; struct leaf_info *li; if (plen > 32) return -EINVAL; key = ntohl(cfg->fc_dst); mask = ntohl(inet_make_mask(plen)); if (key & ~mask) return -EINVAL; key = key & mask; l = fib_find_node(t, key); if (!l) return -ESRCH; fa_head = get_fa_head(l, plen); fa = fib_find_alias(fa_head, tos, 0); if (!fa) return -ESRCH; pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); fa_to_delete = NULL; fa_head = fa->fa_list.prev; list_for_each_entry(fa, fa_head, fa_list) { struct fib_info *fi = fa->fa_info; if (fa->fa_tos != tos) break; if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && (cfg->fc_scope == RT_SCOPE_NOWHERE || fa->fa_scope == cfg->fc_scope) && (!cfg->fc_protocol || fi->fib_protocol == cfg->fc_protocol) && fib_nh_match(cfg, fi) == 0) { fa_to_delete = fa; break; } } if (!fa_to_delete) return -ESRCH; fa = fa_to_delete; rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, &cfg->fc_nlinfo, 0); l = fib_find_node(t, key); li = find_leaf_info(l, plen); list_del_rcu(&fa->fa_list); if (list_empty(fa_head)) { hlist_del_rcu(&li->hlist); free_leaf_info(li); } if (hlist_empty(&l->list)) trie_leaf_remove(t, key); if (fa->fa_state & FA_S_ACCESSED) rt_cache_flush(-1); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); return 0;}static int trie_flush_list(struct trie *t, struct list_head *head){ struct fib_alias *fa, *fa_node; int found = 0; list_for_each_entry_safe(fa, fa_node, head, fa_list) { struct fib_info *fi = fa->fa_info; if (fi && (fi->fib_flags & RTNH_F_DEAD)) { list_del_rcu(&fa->fa_list); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); found++; } } return found;}static int trie_flush_leaf(struct trie *t, struct leaf *l){ int found = 0; struct hlist_head *lih = &l->list; struct hlist_node *node, *tmp; struct leaf_info *li = NULL; hlist_for_each_entry_safe(li, node, tmp, lih, hlist) { found += trie_flush_list(t, &li->falh); if (list_empty(&li->falh)) { hlist_del_rcu(&li->hlist); free_leaf_info(li); } } return found;}/* rcu_read_lock needs to be hold by caller from readside */static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf){ struct node *c = (struct node *) thisleaf; struct tnode *p; int idx; struct node *trie = rcu_dereference(t->trie); if (c == NULL) { if (trie == NULL) return NULL; if (IS_LEAF(trie)) /* trie w. just a leaf */ return (struct leaf *) trie; p = (struct tnode*) trie; /* Start */ } else p = node_parent(c); while (p) { int pos, last; /* Find the next child of the parent */ if (c) pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits); else pos = 0; last = 1 << p->bits; for (idx = pos; idx < last ; idx++) { c = rcu_dereference(p->child[idx]); if (!c) continue; /* Decend if tnode */ while (IS_TNODE(c)) { p = (struct tnode *) c; idx = 0; /* Rightmost non-NULL branch */ if (p && IS_TNODE(p)) while (!(c = rcu_dereference(p->child[idx])) && idx < (1<<p->bits)) idx++; /* Done with this tnode? */ if (idx >= (1 << p->bits) || !c) goto up; } return (struct leaf *) c; }up: /* No more children go up one step */ c = (struct node *) p; p = node_parent(c); } return NULL; /* Ready. Root of trie */}/* * Caller must hold RTNL. */static int fn_trie_flush(struct fib_table *tb){ struct trie *t = (struct trie *) tb->tb_data; struct leaf *ll = NULL, *l = NULL; int found = 0, h; t->revision++; for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { found += trie_flush_leaf(t, l); if (ll && hlist_empty(&ll->list)) trie_leaf_remove(t, ll->key); ll = l; } if (ll && hlist_empty(&ll->list)) trie_leaf_remove(t, ll->key); pr_debug("trie_flush found=%d\n", found); return found;}static int trie_last_dflt = -1;static voidfn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res){ struct trie *t = (struct trie *) tb->tb_data; int order, last_idx; struct fib_info *fi = NULL; struct fib_info *last_resort; struct fib_alias *fa = NULL; struct list_head *fa_head; struct leaf *l; last_idx = -1; last_resort = NULL; order = -1; rcu_read_lock(); l = fib_find_node(t, 0); if (!l) goto out; fa_head = get_fa_head(l, 0); if (!fa_head) goto out; if (list_empty(fa_head)) goto out; list_for_each_entry_rcu(fa, fa_head, fa_list) { struct fib_info *next_fi = fa->fa_info; if (fa->fa_scope != res->scope || fa->fa_type != RTN_UNICAST) continue; if (next_fi->fib_priority > res->fi->fib_priority) break; if (!next_fi->fib_nh[0].nh_gw || next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) continue; fa->fa_state |= FA_S_ACCESSED; if (fi == NULL) { if (next_fi != res->fi) break; } else if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) { if (res->fi) fib_info_put(res->fi); res->fi = fi; atomic_inc(&fi->fib_clntref); trie_last_dflt = order; goto out; } fi = next_fi; order++; } if (order <= 0 || fi == NULL) { trie_last_dflt = -1; goto out; } if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) { if (res->fi) fib_info_put(res->fi); res->fi = fi; atomic_inc(&fi->fib_clntref); trie_last_dflt = order; goto out; } if (last_idx >= 0) { if (res->fi) fib_info_put(res->fi); res->fi = last_resort; if (last_resort) atomic_inc(&last_resort->fib_clntref); } trie_last_dflt = last_idx; out:; rcu_read_unlock();}static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb){ int i, s_i; struct fib_alias *fa; __be32 xkey = htonl(key); s_i = cb->args[4]; i = 0; /* rcu_read_lock is hold by caller */ list_for_each_entry_rcu(fa, fah, fa_list) { if (i < s_i) { i++; continue; } BUG_ON(!fa->fa_info); if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, tb->tb_id, fa->fa_type, fa->fa_scope, xkey, plen, fa->fa_tos, fa->fa_info, 0) < 0) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -