📄 regcomp.c
字号:
default: if (node->left) node->left->next = node->next; if (node->right) node->right->next = node->next; break; } return REG_NOERROR;}/* Pass 3: link all DFA nodes to their NEXT node (any order will do). */static reg_errcode_tlink_nfa_nodes (void *extra, bin_tree_t *node){ re_dfa_t *dfa = (re_dfa_t *) extra; Idx idx = node->node_idx; reg_errcode_t err = REG_NOERROR; switch (node->token.type) { case CONCAT: break; case END_OF_RE: assert (node->next == NULL); break; case OP_DUP_ASTERISK: case OP_ALT: { Idx left, right; dfa->has_plural_match = 1; if (node->left != NULL) left = node->left->first->node_idx; else left = node->next->node_idx; if (node->right != NULL) right = node->right->first->node_idx; else right = node->next->node_idx; assert (REG_VALID_INDEX (left)); assert (REG_VALID_INDEX (right)); err = re_node_set_init_2 (dfa->edests + idx, left, right); } break; case ANCHOR: case OP_OPEN_SUBEXP: case OP_CLOSE_SUBEXP: err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx); break; case OP_BACK_REF: dfa->nexts[idx] = node->next->node_idx; if (node->token.type == OP_BACK_REF) re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]); break; default: assert (!IS_EPSILON_NODE (node->token.type)); dfa->nexts[idx] = node->next->node_idx; break; } return err;}/* Duplicate the epsilon closure of the node ROOT_NODE. Note that duplicated nodes have constraint INIT_CONSTRAINT in addition to their own constraint. */static reg_errcode_tinternal_functionduplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node, Idx root_node, unsigned int init_constraint){ Idx org_node, clone_node; bool ok; unsigned int constraint = init_constraint; for (org_node = top_org_node, clone_node = top_clone_node;;) { Idx org_dest, clone_dest; if (dfa->nodes[org_node].type == OP_BACK_REF) { /* If the back reference epsilon-transit, its destination must also have the constraint. Then duplicate the epsilon closure of the destination of the back reference, and store it in edests of the back reference. */ org_dest = dfa->nexts[org_node]; re_node_set_empty (dfa->edests + clone_node); clone_dest = duplicate_node (dfa, org_dest, constraint); if (BE (clone_dest == REG_MISSING, 0)) return REG_ESPACE; dfa->nexts[clone_node] = dfa->nexts[org_node]; ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); if (BE (! ok, 0)) return REG_ESPACE; } else if (dfa->edests[org_node].nelem == 0) { /* In case of the node can't epsilon-transit, don't duplicate the destination and store the original destination as the destination of the node. */ dfa->nexts[clone_node] = dfa->nexts[org_node]; break; } else if (dfa->edests[org_node].nelem == 1) { /* In case of the node can epsilon-transit, and it has only one destination. */ org_dest = dfa->edests[org_node].elems[0]; re_node_set_empty (dfa->edests + clone_node); if (dfa->nodes[org_node].type == ANCHOR) { /* In case of the node has another constraint, append it. */ if (org_node == root_node && clone_node != org_node) { /* ...but if the node is root_node itself, it means the epsilon closure have a loop, then tie it to the destination of the root_node. */ ok = re_node_set_insert (dfa->edests + clone_node, org_dest); if (BE (! ok, 0)) return REG_ESPACE; break; } constraint |= dfa->nodes[org_node].opr.ctx_type; } clone_dest = duplicate_node (dfa, org_dest, constraint); if (BE (clone_dest == REG_MISSING, 0)) return REG_ESPACE; ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); if (BE (! ok, 0)) return REG_ESPACE; } else /* dfa->edests[org_node].nelem == 2 */ { /* In case of the node can epsilon-transit, and it has two destinations. In the bin_tree_t and DFA, that's '|' and '*'. */ org_dest = dfa->edests[org_node].elems[0]; re_node_set_empty (dfa->edests + clone_node); /* Search for a duplicated node which satisfies the constraint. */ clone_dest = search_duplicated_node (dfa, org_dest, constraint); if (clone_dest == REG_MISSING) { /* There are no such a duplicated node, create a new one. */ reg_errcode_t err; clone_dest = duplicate_node (dfa, org_dest, constraint); if (BE (clone_dest == REG_MISSING, 0)) return REG_ESPACE; ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); if (BE (! ok, 0)) return REG_ESPACE; err = duplicate_node_closure (dfa, org_dest, clone_dest, root_node, constraint); if (BE (err != REG_NOERROR, 0)) return err; } else { /* There are a duplicated node which satisfy the constraint, use it to avoid infinite loop. */ ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); if (BE (! ok, 0)) return REG_ESPACE; } org_dest = dfa->edests[org_node].elems[1]; clone_dest = duplicate_node (dfa, org_dest, constraint); if (BE (clone_dest == REG_MISSING, 0)) return REG_ESPACE; ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); if (BE (! ok, 0)) return REG_ESPACE; } org_node = org_dest; clone_node = clone_dest; } return REG_NOERROR;}/* Search for a node which is duplicated from the node ORG_NODE, and satisfies the constraint CONSTRAINT. */static Idxsearch_duplicated_node (const re_dfa_t *dfa, Idx org_node, unsigned int constraint){ Idx idx; for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx) { if (org_node == dfa->org_indices[idx] && constraint == dfa->nodes[idx].constraint) return idx; /* Found. */ } return REG_MISSING; /* Not found. */}/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT. Return the index of the new node, or REG_MISSING if insufficient storage is available. */static Idxduplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint){ Idx dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]); if (BE (dup_idx != REG_MISSING, 1)) { dfa->nodes[dup_idx].constraint = constraint; if (dfa->nodes[org_idx].type == ANCHOR) dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type; dfa->nodes[dup_idx].duplicated = 1; /* Store the index of the original node. */ dfa->org_indices[dup_idx] = org_idx; } return dup_idx;}static reg_errcode_tcalc_inveclosure (re_dfa_t *dfa){ Idx src, idx; bool ok; for (idx = 0; idx < dfa->nodes_len; ++idx) re_node_set_init_empty (dfa->inveclosures + idx); for (src = 0; src < dfa->nodes_len; ++src) { Idx *elems = dfa->eclosures[src].elems; for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx) { ok = re_node_set_insert_last (dfa->inveclosures + elems[idx], src); if (BE (! ok, 0)) return REG_ESPACE; } } return REG_NOERROR;}/* Calculate "eclosure" for all the node in DFA. */static reg_errcode_tcalc_eclosure (re_dfa_t *dfa){ Idx node_idx; bool incomplete;#ifdef DEBUG assert (dfa->nodes_len > 0);#endif incomplete = false; /* For each nodes, calculate epsilon closure. */ for (node_idx = 0; ; ++node_idx) { reg_errcode_t err; re_node_set eclosure_elem; if (node_idx == dfa->nodes_len) { if (!incomplete) break; incomplete = false; node_idx = 0; }#ifdef DEBUG assert (dfa->eclosures[node_idx].nelem != REG_MISSING);#endif /* If we have already calculated, skip it. */ if (dfa->eclosures[node_idx].nelem != 0) continue; /* Calculate epsilon closure of `node_idx'. */ err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, true); if (BE (err != REG_NOERROR, 0)) return err; if (dfa->eclosures[node_idx].nelem == 0) { incomplete = true; re_node_set_free (&eclosure_elem); } } return REG_NOERROR;}/* Calculate epsilon closure of NODE. */static reg_errcode_tcalc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root){ reg_errcode_t err; unsigned int constraint; Idx i; bool incomplete; bool ok; re_node_set eclosure; incomplete = false; err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1); if (BE (err != REG_NOERROR, 0)) return err; /* This indicates that we are calculating this node now. We reference this value to avoid infinite loop. */ dfa->eclosures[node].nelem = REG_MISSING; constraint = ((dfa->nodes[node].type == ANCHOR) ? dfa->nodes[node].opr.ctx_type : 0); /* If the current node has constraints, duplicate all nodes. Since they must inherit the constraints. */ if (constraint && dfa->edests[node].nelem && !dfa->nodes[dfa->edests[node].elems[0]].duplicated) { err = duplicate_node_closure (dfa, node, node, node, constraint); if (BE (err != REG_NOERROR, 0)) return err; } /* Expand each epsilon destination nodes. */ if (IS_EPSILON_NODE(dfa->nodes[node].type)) for (i = 0; i < dfa->edests[node].nelem; ++i) { re_node_set eclosure_elem; Idx edest = dfa->edests[node].elems[i]; /* If calculating the epsilon closure of `edest' is in progress, return intermediate result. */ if (dfa->eclosures[edest].nelem == REG_MISSING) { incomplete = true; continue; } /* If we haven't calculated the epsilon closure of `edest' yet, calculate now. Otherwise use calculated epsilon closure. */ if (dfa->eclosures[edest].nelem == 0) { err = calc_eclosure_iter (&eclosure_elem, dfa, edest, false); if (BE (err != REG_NOERROR, 0)) return err; } else eclosure_elem = dfa->eclosures[edest]; /* Merge the epsilon closure of `edest'. */ re_node_set_merge (&eclosure, &eclosure_elem); /* If the epsilon closure of `edest' is incomplete, the epsilon closure of this node is also incomplete. */ if (dfa->eclosures[edest].nelem == 0) { incomplete = true; re_node_set_free (&eclosure_elem); } } /* Epsilon closures include itself. */ ok = re_node_set_insert (&eclosure, node); if (BE (! ok, 0)) return REG_ESPACE; if (incomplete && !root) dfa->eclosures[node].nelem = 0; else dfa->eclosures[node] = eclosure; *new_set = eclosure; return REG_NOERROR;}/* Functions for token which are used in the parser. *//* Fetch a token from INPUT. We must not use this function inside bracket expressions. */static voidinternal_functionfetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax){ re_string_skip_bytes (input, peek_token (result, input, syntax));}/* Peek a token from INPUT, and return the length of the token. We must not use this function inside bracket expressions. */static intinternal_functionpeek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax){ unsigned char c; if (re_string_eoi (input)) { token->type = END_OF_RE; return 0; } c = re_string_peek_byte (input, 0); token->opr.c = c; token->word_char = 0;#ifdef RE_ENABLE_I18N token->mb_partial = 0; if (input->mb_cur_max > 1 && !re_string_first_byte (input, re_string_cur_idx (input))) { token->type = CHARACTER; token->mb_partial = 1; return 1; }#endif if (c == '\\') { unsigned char c2; if (re_string_cur_idx (input) + 1 >= re_string_length (input)) { token->type = BACK_SLASH; return 1; } c2 = re_string_peek_byte_case (input, 1); token->opr.c = c2; token->type = CHARACTER;#ifdef RE_ENABLE_I18N if (input->mb_cur_max > 1) { wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input) + 1); token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; } else#endif token->word_char = IS_WORD_CHAR (c2) != 0; switch (c2) { case '|': if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR)) token->type = OP_ALT; break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (!(syntax & RE_NO_BK_REFS)) { token->type = OP_BACK_REF; token->opr.idx = c2 - '1'; } break; case '<': if (!(syntax & RE_NO_GNU_OPS)) { token->type = ANCHOR; token->opr.ctx_type = WORD_FIRST; } break; case '>': if (!(syntax & RE_NO_GNU_OPS)) { token->type = ANCHOR; token->opr.ctx_type = WORD_LAST; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -