📄 regcomp.c
字号:
/* If the current node has constraints, duplicate all nodes. Since they must inherit the constraints. */ if (constraint && !dfa->nodes[dfa->edests[node].elems[0]].duplicated) { int org_node, cur_node; org_node = cur_node = node; err = duplicate_node_closure (dfa, node, node, node, constraint); if (BE (err != REG_NOERROR, 0)) return err; } /* Expand each epsilon destination nodes. */ if (IS_EPSILON_NODE(dfa->nodes[node].type)) for (i = 0; i < dfa->edests[node].nelem; ++i) { re_node_set eclosure_elem; int edest = dfa->edests[node].elems[i]; /* If calculating the epsilon closure of `edest' is in progress, return intermediate result. */ if (dfa->eclosures[edest].nelem == -1) { incomplete = 1; continue; } /* If we haven't calculated the epsilon closure of `edest' yet, calculate now. Otherwise use calculated epsilon closure. */ if (dfa->eclosures[edest].nelem == 0) { err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0); if (BE (err != REG_NOERROR, 0)) return err; } else eclosure_elem = dfa->eclosures[edest]; /* Merge the epsilon closure of `edest'. */ re_node_set_merge (&eclosure, &eclosure_elem); /* If the epsilon closure of `edest' is incomplete, the epsilon closure of this node is also incomplete. */ if (dfa->eclosures[edest].nelem == 0) { incomplete = 1; re_node_set_free (&eclosure_elem); } } /* Epsilon closures include itself. */ re_node_set_insert (&eclosure, node); if (incomplete && !root) dfa->eclosures[node].nelem = 0; else dfa->eclosures[node] = eclosure; *new_set = eclosure; return REG_NOERROR;}/* Functions for token which are used in the parser. *//* Fetch a token from INPUT. We must not use this function inside bracket expressions. */static re_token_tfetch_token (input, syntax) re_string_t *input; reg_syntax_t syntax;{ re_token_t token; int consumed_byte; consumed_byte = peek_token (&token, input, syntax); re_string_skip_bytes (input, consumed_byte); return token;}/* Peek a token from INPUT, and return the length of the token. We must not use this function inside bracket expressions. */static intpeek_token (token, input, syntax) re_token_t *token; re_string_t *input; reg_syntax_t syntax;{ unsigned char c; if (re_string_eoi (input)) { token->type = END_OF_RE; return 0; } c = re_string_peek_byte (input, 0); token->opr.c = c;#ifdef RE_ENABLE_I18N token->mb_partial = 0; if (MB_CUR_MAX > 1 && !re_string_first_byte (input, re_string_cur_idx (input))) { token->type = CHARACTER; token->mb_partial = 1; return 1; }#endif if (c == '\\') { unsigned char c2; if (re_string_cur_idx (input) + 1 >= re_string_length (input)) { token->type = BACK_SLASH; return 1; } c2 = re_string_peek_byte_case (input, 1); token->opr.c = c2; token->type = CHARACTER; switch (c2) { case '|': if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR)) token->type = OP_ALT; break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (!(syntax & RE_NO_BK_REFS)) { token->type = OP_BACK_REF; token->opr.idx = c2 - '0'; } break; case '<': if (!(syntax & RE_NO_GNU_OPS)) { token->type = ANCHOR; token->opr.idx = WORD_FIRST; } break; case '>': if (!(syntax & RE_NO_GNU_OPS)) { token->type = ANCHOR; token->opr.idx = WORD_LAST; } break; case 'b': if (!(syntax & RE_NO_GNU_OPS)) { token->type = ANCHOR; token->opr.idx = WORD_DELIM; } break; case 'B': if (!(syntax & RE_NO_GNU_OPS)) { token->type = ANCHOR; token->opr.idx = INSIDE_WORD; } break; case 'w': if (!(syntax & RE_NO_GNU_OPS)) token->type = OP_WORD; break; case 'W': if (!(syntax & RE_NO_GNU_OPS)) token->type = OP_NOTWORD; break; case '`': if (!(syntax & RE_NO_GNU_OPS)) { token->type = ANCHOR; token->opr.idx = BUF_FIRST; } break; case '\'': if (!(syntax & RE_NO_GNU_OPS)) { token->type = ANCHOR; token->opr.idx = BUF_LAST; } break; case '(': if (!(syntax & RE_NO_BK_PARENS)) token->type = OP_OPEN_SUBEXP; break; case ')': if (!(syntax & RE_NO_BK_PARENS)) token->type = OP_CLOSE_SUBEXP; break; case '+': if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) token->type = OP_DUP_PLUS; break; case '?': if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) token->type = OP_DUP_QUESTION; break; case '{': if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) token->type = OP_OPEN_DUP_NUM; break; case '}': if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) token->type = OP_CLOSE_DUP_NUM; break; default: break; } return 2; } token->type = CHARACTER; switch (c) { case '\n': if (syntax & RE_NEWLINE_ALT) token->type = OP_ALT; break; case '|': if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR)) token->type = OP_ALT; break; case '*': token->type = OP_DUP_ASTERISK; break; case '+': if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) token->type = OP_DUP_PLUS; break; case '?': if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) token->type = OP_DUP_QUESTION; break; case '{': if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) token->type = OP_OPEN_DUP_NUM; break; case '}': if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) token->type = OP_CLOSE_DUP_NUM; break; case '(': if (syntax & RE_NO_BK_PARENS) token->type = OP_OPEN_SUBEXP; break; case ')': if (syntax & RE_NO_BK_PARENS) token->type = OP_CLOSE_SUBEXP; break; case '[': token->type = OP_OPEN_BRACKET; break; case '.': token->type = OP_PERIOD; break; case '^': if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) && re_string_cur_idx (input) != 0) { char prev = re_string_peek_byte (input, -1); if (prev != '|' && prev != '(' && (!(syntax & RE_NEWLINE_ALT) || prev != '\n')) break; } token->type = ANCHOR; token->opr.idx = LINE_FIRST; break; case '$': if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) && re_string_cur_idx (input) + 1 != re_string_length (input)) { re_token_t next; re_string_skip_bytes (input, 1); peek_token (&next, input, syntax); re_string_skip_bytes (input, -1); if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP) break; } token->type = ANCHOR; token->opr.idx = LINE_LAST; break; default: break; } return 1;}/* Peek a token from INPUT, and return the length of the token. We must not use this function out of bracket expressions. */static intpeek_token_bracket (token, input, syntax) re_token_t *token; re_string_t *input; reg_syntax_t syntax;{ unsigned char c; if (re_string_eoi (input)) { token->type = END_OF_RE; return 0; } c = re_string_peek_byte (input, 0); token->opr.c = c;#ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1 && !re_string_first_byte (input, re_string_cur_idx (input))) { token->type = CHARACTER; return 1; }#endif /* RE_ENABLE_I18N */ if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)) { /* In this case, '\' escape a character. */ unsigned char c2; re_string_skip_bytes (input, 1); c2 = re_string_peek_byte (input, 0); token->opr.c = c2; token->type = CHARACTER; return 1; } if (c == '[') /* '[' is a special char in a bracket exps. */ { unsigned char c2; int token_len; c2 = re_string_peek_byte (input, 1); token->opr.c = c2; token_len = 2; switch (c2) { case '.': token->type = OP_OPEN_COLL_ELEM; break; case '=': token->type = OP_OPEN_EQUIV_CLASS; break; case ':': if (syntax & RE_CHAR_CLASSES) { token->type = OP_OPEN_CHAR_CLASS; break; } /* else fall through. */ default: token->type = CHARACTER; token->opr.c = c; token_len = 1; break; } return token_len; } switch (c) { case '-': token->type = OP_CHARSET_RANGE; break; case ']': token->type = OP_CLOSE_BRACKET; break; case '^': token->type = OP_NON_MATCH_LIST; break; default: token->type = CHARACTER; } return 1;}/* Functions for parser. *//* Entry point of the parser. Parse the regular expression REGEXP and return the structure tree. If an error is occured, ERR is set by error code, and return NULL. This function build the following tree, from regular expression <reg_exp>: CAT / \ / \ <reg_exp> EOR CAT means concatenation. EOR means end of regular expression. */static bin_tree_t *parse (regexp, preg, syntax, err) re_string_t *regexp; regex_t *preg; reg_syntax_t syntax; reg_errcode_t *err;{ re_dfa_t *dfa = (re_dfa_t *) preg->buffer; bin_tree_t *tree, *eor, *root; re_token_t current_token; int new_idx; current_token = fetch_token (regexp, syntax); tree = parse_reg_exp (regexp, preg, ¤t_token, syntax, 0, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; new_idx = re_dfa_add_node (dfa, current_token, 0); eor = create_tree (NULL, NULL, 0, new_idx); if (tree != NULL) root = create_tree (tree, eor, CONCAT, 0); else root = eor; if (BE (new_idx == -1 || eor == NULL || root == NULL, 0)) { *err = REG_ESPACE; return NULL; } return root;}/* This function build the following tree, from regular expression <branch1>|<branch2>: ALT / \ / \ <branch1> <branch2> ALT means alternative, which represents the operator `|'. */static bin_tree_t *parse_reg_exp (regexp, preg, token, syntax, nest, err) re_string_t *regexp; regex_t *preg; re_token_t *token; reg_syntax_t syntax; int nest; reg_errcode_t *err;{ re_dfa_t *dfa = (re_dfa_t *) preg->buffer; bin_tree_t *tree, *branch = NULL; int new_idx; tree = parse_branch (regexp, preg, token, syntax, nest, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; while (token->type == OP_ALT) { re_token_t alt_token = *token; new_idx = re_dfa_add_node (dfa, alt_token, 0); *token = fetch_token (regexp, syntax); if (token->type != OP_ALT && token->type != END_OF_RE && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) { branch = parse_branch (regexp, preg, token, syntax, nest, err); if (BE (*err != REG_NOERROR && branch == NULL, 0)) { free_bin_tree (tree); return NULL; } } else branch = NULL; tree = create_tree (tree, branch, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) { *err = REG_ESPACE; return NULL; } dfa->has_plural_match = 1; } return tree;}/* This function build the following tree, from regular expression
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -