📄 regcomp.c
字号:
break; case 'b': if (!(syntax & RE_NO_GNU_OPS)) { token->type = ANCHOR; token->opr.ctx_type = WORD_DELIM; } break; case 'B': if (!(syntax & RE_NO_GNU_OPS)) { token->type = ANCHOR; token->opr.ctx_type = NOT_WORD_DELIM; } break; case 'w': if (!(syntax & RE_NO_GNU_OPS)) token->type = OP_WORD; break; case 'W': if (!(syntax & RE_NO_GNU_OPS)) token->type = OP_NOTWORD; break; case 's': if (!(syntax & RE_NO_GNU_OPS)) token->type = OP_SPACE; break; case 'S': if (!(syntax & RE_NO_GNU_OPS)) token->type = OP_NOTSPACE; break; case '`': if (!(syntax & RE_NO_GNU_OPS)) { token->type = ANCHOR; token->opr.ctx_type = BUF_FIRST; } break; case '\'': if (!(syntax & RE_NO_GNU_OPS)) { token->type = ANCHOR; token->opr.ctx_type = BUF_LAST; } break; case '(': if (!(syntax & RE_NO_BK_PARENS)) token->type = OP_OPEN_SUBEXP; break; case ')': if (!(syntax & RE_NO_BK_PARENS)) token->type = OP_CLOSE_SUBEXP; break; case '+': if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) token->type = OP_DUP_PLUS; break; case '?': if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) token->type = OP_DUP_QUESTION; break; case '{': if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) token->type = OP_OPEN_DUP_NUM; break; case '}': if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) token->type = OP_CLOSE_DUP_NUM; break; default: break; } return 2; } token->type = CHARACTER;#ifdef RE_ENABLE_I18N if (input->mb_cur_max > 1) { wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input)); token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; } else#endif token->word_char = IS_WORD_CHAR (token->opr.c); switch (c) { case '\n': if (syntax & RE_NEWLINE_ALT) token->type = OP_ALT; break; case '|': if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR)) token->type = OP_ALT; break; case '*': token->type = OP_DUP_ASTERISK; break; case '+': if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) token->type = OP_DUP_PLUS; break; case '?': if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) token->type = OP_DUP_QUESTION; break; case '{': if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) token->type = OP_OPEN_DUP_NUM; break; case '}': if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) token->type = OP_CLOSE_DUP_NUM; break; case '(': if (syntax & RE_NO_BK_PARENS) token->type = OP_OPEN_SUBEXP; break; case ')': if (syntax & RE_NO_BK_PARENS) token->type = OP_CLOSE_SUBEXP; break; case '[': token->type = OP_OPEN_BRACKET; break; case '.': token->type = OP_PERIOD; break; case '^': if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) && re_string_cur_idx (input) != 0) { char prev = re_string_peek_byte (input, -1); if (!(syntax & RE_NEWLINE_ALT) || prev != '\n') break; } token->type = ANCHOR; token->opr.ctx_type = LINE_FIRST; break; case '$': if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) && re_string_cur_idx (input) + 1 != re_string_length (input)) { re_token_t next; re_string_skip_bytes (input, 1); peek_token (&next, input, syntax); re_string_skip_bytes (input, -1); if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP) break; } token->type = ANCHOR; token->opr.ctx_type = LINE_LAST; break; default: break; } return 1;}/* Peek a token from INPUT, and return the length of the token. We must not use this function out of bracket expressions. */static intinternal_functionpeek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax){ unsigned char c; if (re_string_eoi (input)) { token->type = END_OF_RE; return 0; } c = re_string_peek_byte (input, 0); token->opr.c = c;#ifdef RE_ENABLE_I18N if (input->mb_cur_max > 1 && !re_string_first_byte (input, re_string_cur_idx (input))) { token->type = CHARACTER; return 1; }#endif /* RE_ENABLE_I18N */ if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && re_string_cur_idx (input) + 1 < re_string_length (input)) { /* In this case, '\' escape a character. */ unsigned char c2; re_string_skip_bytes (input, 1); c2 = re_string_peek_byte (input, 0); token->opr.c = c2; token->type = CHARACTER; return 1; } if (c == '[') /* '[' is a special char in a bracket exps. */ { unsigned char c2; int token_len; if (re_string_cur_idx (input) + 1 < re_string_length (input)) c2 = re_string_peek_byte (input, 1); else c2 = 0; token->opr.c = c2; token_len = 2; switch (c2) { case '.': token->type = OP_OPEN_COLL_ELEM; break; case '=': token->type = OP_OPEN_EQUIV_CLASS; break; case ':': if (syntax & RE_CHAR_CLASSES) { token->type = OP_OPEN_CHAR_CLASS; break; } /* else fall through. */ default: token->type = CHARACTER; token->opr.c = c; token_len = 1; break; } return token_len; } switch (c) { case '-': token->type = OP_CHARSET_RANGE; break; case ']': token->type = OP_CLOSE_BRACKET; break; case '^': token->type = OP_NON_MATCH_LIST; break; default: token->type = CHARACTER; } return 1;}/* Functions for parser. *//* Entry point of the parser. Parse the regular expression REGEXP and return the structure tree. If an error is occured, ERR is set by error code, and return NULL. This function build the following tree, from regular expression <reg_exp>: CAT / \ / \ <reg_exp> EOR CAT means concatenation. EOR means end of regular expression. */static bin_tree_t *parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax, reg_errcode_t *err){ re_dfa_t *dfa = (re_dfa_t *) preg->buffer; bin_tree_t *tree, *eor, *root; re_token_t current_token; dfa->syntax = syntax; fetch_token (¤t_token, regexp, syntax | RE_CARET_ANCHORS_HERE); tree = parse_reg_exp (regexp, preg, ¤t_token, syntax, 0, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; eor = create_tree (dfa, NULL, NULL, END_OF_RE); if (tree != NULL) root = create_tree (dfa, tree, eor, CONCAT); else root = eor; if (BE (eor == NULL || root == NULL, 0)) { *err = REG_ESPACE; return NULL; } return root;}/* This function build the following tree, from regular expression <branch1>|<branch2>: ALT / \ / \ <branch1> <branch2> ALT means alternative, which represents the operator `|'. */static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, reg_syntax_t syntax, Idx nest, reg_errcode_t *err){ re_dfa_t *dfa = (re_dfa_t *) preg->buffer; bin_tree_t *tree, *branch = NULL; tree = parse_branch (regexp, preg, token, syntax, nest, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; while (token->type == OP_ALT) { fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE); if (token->type != OP_ALT && token->type != END_OF_RE && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) { branch = parse_branch (regexp, preg, token, syntax, nest, err); if (BE (*err != REG_NOERROR && branch == NULL, 0)) return NULL; } else branch = NULL; tree = create_tree (dfa, tree, branch, OP_ALT); if (BE (tree == NULL, 0)) { *err = REG_ESPACE; return NULL; } } return tree;}/* This function build the following tree, from regular expression <exp1><exp2>: CAT / \ / \ <exp1> <exp2> CAT means concatenation. */static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token, reg_syntax_t syntax, Idx nest, reg_errcode_t *err){ bin_tree_t *tree, *expr; re_dfa_t *dfa = (re_dfa_t *) preg->buffer; tree = parse_expression (regexp, preg, token, syntax, nest, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; while (token->type != OP_ALT && token->type != END_OF_RE && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) { expr = parse_expression (regexp, preg, token, syntax, nest, err); if (BE (*err != REG_NOERROR && expr == NULL, 0)) { return NULL; } if (tree != NULL && expr != NULL) { tree = create_tree (dfa, tree, expr, CONCAT); if (tree == NULL) { *err = REG_ESPACE; return NULL; } } else if (tree == NULL) tree = expr; /* Otherwise expr == NULL, we don't need to create new tree. */ } return tree;}/* This function build the following tree, from regular expression a*: * | a*/static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, reg_syntax_t syntax, Idx nest, reg_errcode_t *err){ re_dfa_t *dfa = (re_dfa_t *) preg->buffer; bin_tree_t *tree; switch (token->type) { case CHARACTER: tree = create_token_tree (dfa, NULL, NULL, token); if (BE (tree == NULL, 0)) { *err = REG_ESPACE; return NULL; }#ifdef RE_ENABLE_I18N if (dfa->mb_cur_max > 1) { while (!re_string_eoi (regexp) && !re_string_first_byte (regexp, re_string_cur_idx (regexp))) { bin_tree_t *mbc_remain; fetch_token (token, regexp, syntax); mbc_remain = create_token_tree (dfa, NULL, NULL, token); tree = create_tree (dfa, tree, mbc_remain, CONCAT); if (BE (mbc_remain == NULL || tree == NULL, 0)) { *err = REG_ESPACE; return NULL; } } }#endif break; case OP_OPEN_SUBEXP: tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; break; case OP_OPEN_BRACKET: tree = parse_bracket_exp (regexp, dfa, token, syntax, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; break; case OP_BACK_REF: if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1)) { *err = REG_ESUBREG; return NULL; } dfa->used_bkref_map |= 1 << token->opr.idx; tree = create_token_tree (dfa, NULL, NULL, token); if (BE (tree == NULL, 0)) { *err = REG_ESPACE; return NULL; } ++dfa->nbackref; dfa->has_mb_node = 1; break; case OP_OPEN_DUP_NUM: if (syntax & RE_CONTEXT_INVALID_DUP) { *err = REG_BADRPT; return NULL; } /* FALLTHROUGH */ case OP_DUP_ASTERISK: case OP_DUP_PLUS: case OP_DUP_QUESTION: if (syntax & RE_CONTEXT_INVALID_OPS) { *err = REG_BADRPT; return NULL; } else if (syntax & RE_CONTEXT_INDEP_OPS) { fetch_token (token, r
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -