regcomp.c
来自「gnu tar 源码包。 tar 软件是 Unix 系统下的一个打包软件」· C语言 代码 · 共 2,273 行 · 第 1/5 页
C
2,273 行
break; case 'b': if (!(syntax & RE_NO_GNU_OPS)) { token->type = ANCHOR; token->opr.ctx_type = WORD_DELIM; } break; case 'B': if (!(syntax & RE_NO_GNU_OPS)) { token->type = ANCHOR; token->opr.ctx_type = NOT_WORD_DELIM; } break; case 'w': if (!(syntax & RE_NO_GNU_OPS)) token->type = OP_WORD; break; case 'W': if (!(syntax & RE_NO_GNU_OPS)) token->type = OP_NOTWORD; break; case 's': if (!(syntax & RE_NO_GNU_OPS)) token->type = OP_SPACE; break; case 'S': if (!(syntax & RE_NO_GNU_OPS)) token->type = OP_NOTSPACE; break; case '`': if (!(syntax & RE_NO_GNU_OPS)) { token->type = ANCHOR; token->opr.ctx_type = BUF_FIRST; } break; case '\'': if (!(syntax & RE_NO_GNU_OPS)) { token->type = ANCHOR; token->opr.ctx_type = BUF_LAST; } break; case '(': if (!(syntax & RE_NO_BK_PARENS)) token->type = OP_OPEN_SUBEXP; break; case ')': if (!(syntax & RE_NO_BK_PARENS)) token->type = OP_CLOSE_SUBEXP; break; case '+': if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) token->type = OP_DUP_PLUS; break; case '?': if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) token->type = OP_DUP_QUESTION; break; case '{': if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) token->type = OP_OPEN_DUP_NUM; break; case '}': if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) token->type = OP_CLOSE_DUP_NUM; break; default: break; } return 2; } token->type = CHARACTER;#ifdef RE_ENABLE_I18N if (input->mb_cur_max > 1) { wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input)); token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; } else#endif token->word_char = IS_WORD_CHAR (token->opr.c); switch (c) { case '\n': if (syntax & RE_NEWLINE_ALT) token->type = OP_ALT; break; case '|': if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR)) token->type = OP_ALT; break; case '*': token->type = OP_DUP_ASTERISK; break; case '+': if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) token->type = OP_DUP_PLUS; break; case '?': if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) token->type = OP_DUP_QUESTION; break; case '{': if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) token->type = OP_OPEN_DUP_NUM; break; case '}': if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) token->type = OP_CLOSE_DUP_NUM; break; case '(': if (syntax & RE_NO_BK_PARENS) token->type = OP_OPEN_SUBEXP; break; case ')': if (syntax & RE_NO_BK_PARENS) token->type = OP_CLOSE_SUBEXP; break; case '[': token->type = OP_OPEN_BRACKET; break; case '.': token->type = OP_PERIOD; break; case '^': if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) && re_string_cur_idx (input) != 0) { char prev = re_string_peek_byte (input, -1); if (!(syntax & RE_NEWLINE_ALT) || prev != '\n') break; } token->type = ANCHOR; token->opr.ctx_type = LINE_FIRST; break; case '$': if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) && re_string_cur_idx (input) + 1 != re_string_length (input)) { re_token_t next; re_string_skip_bytes (input, 1); peek_token (&next, input, syntax); re_string_skip_bytes (input, -1); if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP) break; } token->type = ANCHOR; token->opr.ctx_type = LINE_LAST; break; default: break; } return 1;}/* Peek a token from INPUT, and return the length of the token. We must not use this function out of bracket expressions. */static intinternal_functionpeek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax){ unsigned char c; if (re_string_eoi (input)) { token->type = END_OF_RE; return 0; } c = re_string_peek_byte (input, 0); token->opr.c = c;#ifdef RE_ENABLE_I18N if (input->mb_cur_max > 1 && !re_string_first_byte (input, re_string_cur_idx (input))) { token->type = CHARACTER; return 1; }#endif /* RE_ENABLE_I18N */ if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && re_string_cur_idx (input) + 1 < re_string_length (input)) { /* In this case, '\' escape a character. */ unsigned char c2; re_string_skip_bytes (input, 1); c2 = re_string_peek_byte (input, 0); token->opr.c = c2; token->type = CHARACTER; return 1; } if (c == '[') /* '[' is a special char in a bracket exps. */ { unsigned char c2; int token_len; if (re_string_cur_idx (input) + 1 < re_string_length (input)) c2 = re_string_peek_byte (input, 1); else c2 = 0; token->opr.c = c2; token_len = 2; switch (c2) { case '.': token->type = OP_OPEN_COLL_ELEM; break; case '=': token->type = OP_OPEN_EQUIV_CLASS; break; case ':': if (syntax & RE_CHAR_CLASSES) { token->type = OP_OPEN_CHAR_CLASS; break; } /* else fall through. */ default: token->type = CHARACTER; token->opr.c = c; token_len = 1; break; } return token_len; } switch (c) { case '-': token->type = OP_CHARSET_RANGE; break; case ']': token->type = OP_CLOSE_BRACKET; break; case '^': token->type = OP_NON_MATCH_LIST; break; default: token->type = CHARACTER; } return 1;}/* Functions for parser. *//* Entry point of the parser. Parse the regular expression REGEXP and return the structure tree. If an error is occured, ERR is set by error code, and return NULL. This function build the following tree, from regular expression <reg_exp>: CAT / \ / \ <reg_exp> EOR CAT means concatenation. EOR means end of regular expression. */static bin_tree_t *parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax, reg_errcode_t *err){ re_dfa_t *dfa = (re_dfa_t *) preg->buffer; bin_tree_t *tree, *eor, *root; re_token_t current_token; dfa->syntax = syntax; fetch_token (¤t_token, regexp, syntax | RE_CARET_ANCHORS_HERE); tree = parse_reg_exp (regexp, preg, ¤t_token, syntax, 0, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; eor = create_tree (dfa, NULL, NULL, END_OF_RE); if (tree != NULL) root = create_tree (dfa, tree, eor, CONCAT); else root = eor; if (BE (eor == NULL || root == NULL, 0)) { *err = REG_ESPACE; return NULL; } return root;}/* This function build the following tree, from regular expression <branch1>|<branch2>: ALT / \ / \ <branch1> <branch2> ALT means alternative, which represents the operator `|'. */static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, reg_syntax_t syntax, Idx nest, reg_errcode_t *err){ re_dfa_t *dfa = (re_dfa_t *) preg->buffer; bin_tree_t *tree, *branch = NULL; tree = parse_branch (regexp, preg, token, syntax, nest, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; while (token->type == OP_ALT) { fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE); if (token->type != OP_ALT && token->type != END_OF_RE && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) { branch = parse_branch (regexp, preg, token, syntax, nest, err); if (BE (*err != REG_NOERROR && branch == NULL, 0)) return NULL; } else branch = NULL; tree = create_tree (dfa, tree, branch, OP_ALT); if (BE (tree == NULL, 0)) { *err = REG_ESPACE; return NULL; } } return tree;}/* This function build the following tree, from regular expression <exp1><exp2>: CAT / \ / \ <exp1> <exp2> CAT means concatenation. */static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token, reg_syntax_t syntax, Idx nest, reg_errcode_t *err){ bin_tree_t *tree, *expr; re_dfa_t *dfa = (re_dfa_t *) preg->buffer; tree = parse_expression (regexp, preg, token, syntax, nest, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; while (token->type != OP_ALT && token->type != END_OF_RE && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) { expr = parse_expression (regexp, preg, token, syntax, nest, err); if (BE (*err != REG_NOERROR && expr == NULL, 0)) { return NULL; } if (tree != NULL && expr != NULL) { tree = create_tree (dfa, tree, expr, CONCAT); if (tree == NULL) { *err = REG_ESPACE; return NULL; } } else if (tree == NULL) tree = expr; /* Otherwise expr == NULL, we don't need to create new tree. */ } return tree;}/* This function build the following tree, from regular expression a*: * | a*/static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, reg_syntax_t syntax, Idx nest, reg_errcode_t *err){ re_dfa_t *dfa = (re_dfa_t *) preg->buffer; bin_tree_t *tree; switch (token->type) { case CHARACTER: tree = create_token_tree (dfa, NULL, NULL, token); if (BE (tree == NULL, 0)) { *err = REG_ESPACE; return NULL; }#ifdef RE_ENABLE_I18N if (dfa->mb_cur_max > 1) { while (!re_string_eoi (regexp) && !re_string_first_byte (regexp, re_string_cur_idx (regexp))) { bin_tree_t *mbc_remain; fetch_token (token, regexp, syntax); mbc_remain = create_token_tree (dfa, NULL, NULL, token); tree = create_tree (dfa, tree, mbc_remain, CONCAT); if (BE (mbc_remain == NULL || tree == NULL, 0)) { *err = REG_ESPACE; return NULL; } } }#endif break; case OP_OPEN_SUBEXP: tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; break; case OP_OPEN_BRACKET: tree = parse_bracket_exp (regexp, dfa, token, syntax, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; break; case OP_BACK_REF: if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1)) { *err = REG_ESUBREG; return NULL; } dfa->used_bkref_map |= 1 << token->opr.idx; tree = create_token_tree (dfa, NULL, NULL, token); if (BE (tree == NULL, 0)) { *err = REG_ESPACE; return NULL; } ++dfa->nbackref; dfa->has_mb_node = 1; break; case OP_OPEN_DUP_NUM: if (syntax & RE_CONTEXT_INVALID_DUP) { *err = REG_BADRPT; return NULL; } /* FALLTHROUGH */ case OP_DUP_ASTERISK: case OP_DUP_PLUS: case OP_DUP_QUESTION: if (syntax & RE_CONTEXT_INVALID_OPS) { *err = REG_BADRPT; return NULL; } else if (syntax & RE_CONTEXT_INDEP_OPS) { fetch_token (token, r
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?