📄 regcomp.c
字号:
re_node_set_init_empty (dfa->eclosures + i); re_node_set_init_empty (dfa->inveclosures + i); } ret = analyze_tree (dfa, dfa->str_tree); if (BE (ret == REG_NOERROR, 1)) { ret = calc_eclosure (dfa); if (ret == REG_NOERROR) calc_inveclosure (dfa); } return ret;}/* Helper functions for analyze. This function calculate "first", "next", and "edest" for the subtree whose root is NODE. */static reg_errcode_tanalyze_tree (dfa, node) re_dfa_t *dfa; bin_tree_t *node;{ reg_errcode_t ret; if (node->first == -1) calc_first (dfa, node); if (node->next == -1) calc_next (dfa, node); if (node->eclosure.nelem == 0) calc_epsdest (dfa, node); /* Calculate "first" etc. for the left child. */ if (node->left != NULL) { ret = analyze_tree (dfa, node->left); if (BE (ret != REG_NOERROR, 0)) return ret; } /* Calculate "first" etc. for the right child. */ if (node->right != NULL) { ret = analyze_tree (dfa, node->right); if (BE (ret != REG_NOERROR, 0)) return ret; } return REG_NOERROR;}/* Calculate "first" for the node NODE. */static voidcalc_first (dfa, node) re_dfa_t *dfa; bin_tree_t *node;{ int idx, type; idx = node->node_idx; type = (node->type == 0) ? dfa->nodes[idx].type : node->type; switch (type) {#ifdef DEBUG case OP_OPEN_BRACKET: case OP_CLOSE_BRACKET: case OP_OPEN_DUP_NUM: case OP_CLOSE_DUP_NUM: case OP_NON_MATCH_LIST: case OP_OPEN_COLL_ELEM: case OP_CLOSE_COLL_ELEM: case OP_OPEN_EQUIV_CLASS: case OP_CLOSE_EQUIV_CLASS: case OP_OPEN_CHAR_CLASS: case OP_CLOSE_CHAR_CLASS: /* These must not be appeared here. */ assert (0);#endif case END_OF_RE: case CHARACTER: case OP_PERIOD: case OP_DUP_ASTERISK: case OP_DUP_QUESTION:#ifdef RE_ENABLE_I18N case COMPLEX_BRACKET:#endif /* RE_ENABLE_I18N */ case SIMPLE_BRACKET: case OP_BACK_REF: case ANCHOR: case OP_OPEN_SUBEXP: case OP_CLOSE_SUBEXP: node->first = idx; break; case OP_DUP_PLUS:#ifdef DEBUG assert (node->left != NULL);#endif if (node->left->first == -1) calc_first (dfa, node->left); node->first = node->left->first; break; case OP_ALT: node->first = idx; break; /* else fall through */ default:#ifdef DEBUG assert (node->left != NULL);#endif if (node->left->first == -1) calc_first (dfa, node->left); node->first = node->left->first; break; }}/* Calculate "next" for the node NODE. */static voidcalc_next (dfa, node) re_dfa_t *dfa; bin_tree_t *node;{ int idx, type; bin_tree_t *parent = node->parent; if (parent == NULL) { node->next = -1; idx = node->node_idx; if (node->type == 0) dfa->nexts[idx] = node->next; return; } idx = parent->node_idx; type = (parent->type == 0) ? dfa->nodes[idx].type : parent->type; switch (type) { case OP_DUP_ASTERISK: case OP_DUP_PLUS: node->next = idx; break; case CONCAT: if (parent->left == node) { if (parent->right->first == -1) calc_first (dfa, parent->right); node->next = parent->right->first; break; } /* else fall through */ default: if (parent->next == -1) calc_next (dfa, parent); node->next = parent->next; break; } idx = node->node_idx; if (node->type == 0) dfa->nexts[idx] = node->next;}/* Calculate "edest" for the node NODE. */static voidcalc_epsdest (dfa, node) re_dfa_t *dfa; bin_tree_t *node;{ int idx; idx = node->node_idx; if (node->type == 0) { if (dfa->nodes[idx].type == OP_DUP_ASTERISK || dfa->nodes[idx].type == OP_DUP_PLUS || dfa->nodes[idx].type == OP_DUP_QUESTION) { if (node->left->first == -1) calc_first (dfa, node->left); if (node->next == -1) calc_next (dfa, node); re_node_set_init_2 (dfa->edests + idx, node->left->first, node->next); } else if (dfa->nodes[idx].type == OP_ALT) { int left, right; if (node->left != NULL) { if (node->left->first == -1) calc_first (dfa, node->left); left = node->left->first; } else { if (node->next == -1) calc_next (dfa, node); left = node->next; } if (node->right != NULL) { if (node->right->first == -1) calc_first (dfa, node->right); right = node->right->first; } else { if (node->next == -1) calc_next (dfa, node); right = node->next; } re_node_set_init_2 (dfa->edests + idx, left, right); } else if (dfa->nodes[idx].type == ANCHOR || dfa->nodes[idx].type == OP_OPEN_SUBEXP || dfa->nodes[idx].type == OP_CLOSE_SUBEXP || dfa->nodes[idx].type == OP_BACK_REF) re_node_set_init_1 (dfa->edests + idx, node->next); }}/* Duplicate the epsilon closure of the node ROOT_NODE. Note that duplicated nodes have constraint INIT_CONSTRAINT in addition to their own constraint. */static reg_errcode_tduplicate_node_closure (dfa, top_org_node, top_clone_node, root_node, init_constraint) re_dfa_t *dfa; int top_org_node, top_clone_node, root_node; unsigned int init_constraint;{ reg_errcode_t err; int org_node, clone_node, ret; unsigned int constraint = init_constraint; for (org_node = top_org_node, clone_node = top_clone_node;;) { int org_dest, clone_dest; if (dfa->nodes[org_node].type == OP_BACK_REF) { /* If the back reference epsilon-transit, its destination must also have the constraint. Then duplicate the epsilon closure of the destination of the back reference, and store it in edests of the back reference. */ org_dest = dfa->nexts[org_node]; re_node_set_empty (dfa->edests + clone_node); err = duplicate_node (&clone_dest, dfa, org_dest, constraint); if (BE (err != REG_NOERROR, 0)) return err; dfa->nexts[clone_node] = dfa->nexts[org_node]; ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); if (BE (ret < 0, 0)) return REG_ESPACE; } else if (dfa->edests[org_node].nelem == 0) { /* In case of the node can't epsilon-transit, don't duplicate the destination and store the original destination as the destination of the node. */ dfa->nexts[clone_node] = dfa->nexts[org_node]; break; } else if (dfa->edests[org_node].nelem == 1) { /* In case of the node can epsilon-transit, and it has only one destination. */ org_dest = dfa->edests[org_node].elems[0]; re_node_set_empty (dfa->edests + clone_node); if (dfa->nodes[org_node].type == ANCHOR) { /* In case of the node has another constraint, append it. */ if (org_node == root_node && clone_node != org_node) { /* ...but if the node is root_node itself, it means the epsilon closure have a loop, then tie it to the destination of the root_node. */ ret = re_node_set_insert (dfa->edests + clone_node, org_dest); if (BE (ret < 0, 0)) return REG_ESPACE; break; } constraint |= dfa->nodes[org_node].opr.ctx_type; } err = duplicate_node (&clone_dest, dfa, org_dest, constraint); if (BE (err != REG_NOERROR, 0)) return err; ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); if (BE (ret < 0, 0)) return REG_ESPACE; } else /* dfa->edests[org_node].nelem == 2 */ { /* In case of the node can epsilon-transit, and it has two destinations. E.g. '|', '*', '+', '?'. */ org_dest = dfa->edests[org_node].elems[0]; re_node_set_empty (dfa->edests + clone_node); /* Search for a duplicated node which satisfies the constraint. */ clone_dest = search_duplicated_node (dfa, org_dest, constraint); if (clone_dest == -1) { /* There are no such a duplicated node, create a new one. */ err = duplicate_node (&clone_dest, dfa, org_dest, constraint); if (BE (err != REG_NOERROR, 0)) return err; ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); if (BE (ret < 0, 0)) return REG_ESPACE; err = duplicate_node_closure (dfa, org_dest, clone_dest, root_node, constraint); if (BE (err != REG_NOERROR, 0)) return err; } else { /* There are a duplicated node which satisfy the constraint, use it to avoid infinite loop. */ ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); if (BE (ret < 0, 0)) return REG_ESPACE; } org_dest = dfa->edests[org_node].elems[1]; err = duplicate_node (&clone_dest, dfa, org_dest, constraint); if (BE (err != REG_NOERROR, 0)) return err; ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); if (BE (ret < 0, 0)) return REG_ESPACE; } org_node = org_dest; clone_node = clone_dest; } return REG_NOERROR;}/* Search for a node which is duplicated from the node ORG_NODE, and satisfies the constraint CONSTRAINT. */static intsearch_duplicated_node (dfa, org_node, constraint) re_dfa_t *dfa; int org_node; unsigned int constraint;{ int idx; for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx) { if (org_node == dfa->org_indices[idx] && constraint == dfa->nodes[idx].constraint) return idx; /* Found. */ } return -1; /* Not found. */}/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT. The new index will be stored in NEW_IDX and return REG_NOERROR if succeeded, otherwise return the error code. */static reg_errcode_tduplicate_node (new_idx, dfa, org_idx, constraint) re_dfa_t *dfa; int *new_idx, org_idx; unsigned int constraint;{ re_token_t dup; int dup_idx; dup = dfa->nodes[org_idx]; dup_idx = re_dfa_add_node (dfa, dup, 1); if (BE (dup_idx == -1, 0)) return REG_ESPACE; dfa->nodes[dup_idx].constraint = constraint; if (dfa->nodes[org_idx].type == ANCHOR) dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type; dfa->nodes[dup_idx].duplicated = 1; re_node_set_init_empty (dfa->edests + dup_idx); re_node_set_init_empty (dfa->eclosures + dup_idx); re_node_set_init_empty (dfa->inveclosures + dup_idx); /* Store the index of the original node. */ dfa->org_indices[dup_idx] = org_idx; *new_idx = dup_idx; return REG_NOERROR;}static voidcalc_inveclosure (dfa) re_dfa_t *dfa;{ int src, idx, dest; for (src = 0; src < dfa->nodes_len; ++src) { for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx) { dest = dfa->eclosures[src].elems[idx]; re_node_set_insert (dfa->inveclosures + dest, src); } }}/* Calculate "eclosure" for all the node in DFA. */static reg_errcode_tcalc_eclosure (dfa) re_dfa_t *dfa;{ int node_idx, incomplete;#ifdef DEBUG assert (dfa->nodes_len > 0);#endif incomplete = 0; /* For each nodes, calculate epsilon closure. */ for (node_idx = 0; ; ++node_idx) { reg_errcode_t err; re_node_set eclosure_elem; if (node_idx == dfa->nodes_len) { if (!incomplete) break; incomplete = 0; node_idx = 0; }#ifdef DEBUG assert (dfa->eclosures[node_idx].nelem != -1);#endif /* If we have already calculated, skip it. */ if (dfa->eclosures[node_idx].nelem != 0) continue; /* Calculate epsilon closure of `node_idx'. */ err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1); if (BE (err != REG_NOERROR, 0)) return err; if (dfa->eclosures[node_idx].nelem == 0) { incomplete = 1; re_node_set_free (&eclosure_elem); } } return REG_NOERROR;}/* Calculate epsilon closure of NODE. */static reg_errcode_tcalc_eclosure_iter (new_set, dfa, node, root) re_node_set *new_set; re_dfa_t *dfa; int node, root;{ reg_errcode_t err; unsigned int constraint; int i, incomplete; re_node_set eclosure; incomplete = 0; err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1); if (BE (err != REG_NOERROR, 0)) return err; /* This indicates that we are calculating this node now. We reference this value to avoid infinite loop. */ dfa->eclosures[node].nelem = -1; constraint = ((dfa->nodes[node].type == ANCHOR) ? dfa->nodes[node].opr.ctx_type : 0);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -