📄 regcomp.c
字号:
<exp1><exp2>: CAT / \ / \ <exp1> <exp2> CAT means concatenation. */static bin_tree_t *parse_branch (regexp, preg, token, syntax, nest, err) re_string_t *regexp; regex_t *preg; re_token_t *token; reg_syntax_t syntax; int nest; reg_errcode_t *err;{ bin_tree_t *tree, *exp; tree = parse_expression (regexp, preg, token, syntax, nest, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; while (token->type != OP_ALT && token->type != END_OF_RE && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) { exp = parse_expression (regexp, preg, token, syntax, nest, err); if (BE (*err != REG_NOERROR && exp == NULL, 0)) { free_bin_tree (tree); return NULL; } if (tree != NULL && exp != NULL) { tree = create_tree (tree, exp, CONCAT, 0); if (tree == NULL) { *err = REG_ESPACE; return NULL; } } else if (tree == NULL) tree = exp; /* Otherwise exp == NULL, we don't need to create new tree. */ } return tree;}/* This function build the following tree, from regular expression a*: * | a*/static bin_tree_t *parse_expression (regexp, preg, token, syntax, nest, err) re_string_t *regexp; regex_t *preg; re_token_t *token; reg_syntax_t syntax; int nest; reg_errcode_t *err;{ re_dfa_t *dfa = (re_dfa_t *) preg->buffer; bin_tree_t *tree; int new_idx; switch (token->type) { case CHARACTER: new_idx = re_dfa_add_node (dfa, *token, 0); tree = create_tree (NULL, NULL, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) { *err = REG_ESPACE; return NULL; }#ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) { while (!re_string_eoi (regexp) && !re_string_first_byte (regexp, re_string_cur_idx (regexp))) { bin_tree_t *mbc_remain; *token = fetch_token (regexp, syntax); new_idx = re_dfa_add_node (dfa, *token, 0); mbc_remain = create_tree (NULL, NULL, 0, new_idx); tree = create_tree (tree, mbc_remain, CONCAT, 0); if (BE (new_idx == -1 || mbc_remain == NULL || tree == NULL, 0)) { *err = REG_ESPACE; return NULL; } } }#endif break; case OP_OPEN_SUBEXP: tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; break; case OP_OPEN_BRACKET: tree = parse_bracket_exp (regexp, dfa, token, syntax, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; break; case OP_BACK_REF: if (BE (preg->re_nsub < token->opr.idx || dfa->subexps[token->opr.idx - 1].end == -1, 0)) { *err = REG_ESUBREG; return NULL; } dfa->used_bkref_map |= 1 << (token->opr.idx - 1); new_idx = re_dfa_add_node (dfa, *token, 0); tree = create_tree (NULL, NULL, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) { *err = REG_ESPACE; return NULL; } ++dfa->nbackref; dfa->has_mb_node = 1; break; case OP_DUP_ASTERISK: case OP_DUP_PLUS: case OP_DUP_QUESTION: case OP_OPEN_DUP_NUM: if (syntax & RE_CONTEXT_INVALID_OPS) { *err = REG_BADRPT; return NULL; } else if (syntax & RE_CONTEXT_INDEP_OPS) { *token = fetch_token (regexp, syntax); return parse_expression (regexp, preg, token, syntax, nest, err); } /* else fall through */ case OP_CLOSE_SUBEXP: if ((token->type == OP_CLOSE_SUBEXP) && !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)) { *err = REG_ERPAREN; return NULL; } /* else fall through */ case OP_CLOSE_DUP_NUM: /* We treat it as a normal character. */ /* Then we can these characters as normal characters. */ token->type = CHARACTER; new_idx = re_dfa_add_node (dfa, *token, 0); tree = create_tree (NULL, NULL, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) { *err = REG_ESPACE; return NULL; } break; case ANCHOR: if (dfa->word_char == NULL) { *err = init_word_char (dfa); if (BE (*err != REG_NOERROR, 0)) return NULL; } if (token->opr.ctx_type == WORD_DELIM) { bin_tree_t *tree_first, *tree_last; int idx_first, idx_last; token->opr.ctx_type = WORD_FIRST; idx_first = re_dfa_add_node (dfa, *token, 0); tree_first = create_tree (NULL, NULL, 0, idx_first); token->opr.ctx_type = WORD_LAST; idx_last = re_dfa_add_node (dfa, *token, 0); tree_last = create_tree (NULL, NULL, 0, idx_last); token->type = OP_ALT; new_idx = re_dfa_add_node (dfa, *token, 0); tree = create_tree (tree_first, tree_last, 0, new_idx); if (BE (idx_first == -1 || idx_last == -1 || new_idx == -1 || tree_first == NULL || tree_last == NULL || tree == NULL, 0)) { *err = REG_ESPACE; return NULL; } } else { new_idx = re_dfa_add_node (dfa, *token, 0); tree = create_tree (NULL, NULL, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) { *err = REG_ESPACE; return NULL; } } /* We must return here, since ANCHORs can't be followed by repetition operators. eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>", it must not be "<ANCHOR(^)><REPEAT(*)>". */ *token = fetch_token (regexp, syntax); return tree; case OP_PERIOD: new_idx = re_dfa_add_node (dfa, *token, 0); tree = create_tree (NULL, NULL, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) { *err = REG_ESPACE; return NULL; } if (MB_CUR_MAX > 1) dfa->has_mb_node = 1; break; case OP_WORD: tree = build_word_op (dfa, 0, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; break; case OP_NOTWORD: tree = build_word_op (dfa, 1, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; break; case OP_ALT: case END_OF_RE: return NULL; case BACK_SLASH: *err = REG_EESCAPE; return NULL; default: /* Must not happen? */#ifdef DEBUG assert (0);#endif return NULL; } *token = fetch_token (regexp, syntax); while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM) { tree = parse_dup_op (tree, regexp, dfa, token, syntax, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; dfa->has_plural_match = 1; } return tree;}/* This function build the following tree, from regular expression (<reg_exp>): SUBEXP | <reg_exp>*/static bin_tree_t *parse_sub_exp (regexp, preg, token, syntax, nest, err) re_string_t *regexp; regex_t *preg; re_token_t *token; reg_syntax_t syntax; int nest; reg_errcode_t *err;{ re_dfa_t *dfa = (re_dfa_t *) preg->buffer; bin_tree_t *tree, *left_par, *right_par; size_t cur_nsub; int new_idx; cur_nsub = preg->re_nsub++; if (dfa->subexps_alloc < preg->re_nsub) { re_subexp_t *new_array; dfa->subexps_alloc *= 2; new_array = re_realloc (dfa->subexps, re_subexp_t, dfa->subexps_alloc); if (BE (new_array == NULL, 0)) { dfa->subexps_alloc /= 2; *err = REG_ESPACE; return NULL; } dfa->subexps = new_array; } dfa->subexps[cur_nsub].start = dfa->nodes_len; dfa->subexps[cur_nsub].end = -1; new_idx = re_dfa_add_node (dfa, *token, 0); left_par = create_tree (NULL, NULL, 0, new_idx); if (BE (new_idx == -1 || left_par == NULL, 0)) { *err = REG_ESPACE; return NULL; } dfa->nodes[new_idx].opr.idx = cur_nsub; *token = fetch_token (regexp, syntax); /* The subexpression may be a null string. */ if (token->type == OP_CLOSE_SUBEXP) tree = NULL; else { tree = parse_reg_exp (regexp, preg, token, syntax, nest, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; } if (BE (token->type != OP_CLOSE_SUBEXP, 0)) { free_bin_tree (tree); *err = REG_BADPAT; return NULL; } new_idx = re_dfa_add_node (dfa, *token, 0); dfa->subexps[cur_nsub].end = dfa->nodes_len; right_par = create_tree (NULL, NULL, 0, new_idx); tree = ((tree == NULL) ? right_par : create_tree (tree, right_par, CONCAT, 0)); tree = create_tree (left_par, tree, CONCAT, 0); if (BE (new_idx == -1 || right_par == NULL || tree == NULL, 0)) { *err = REG_ESPACE; return NULL; } dfa->nodes[new_idx].opr.idx = cur_nsub; return tree;}/* This function parse repetition operators like "*", "+", "{1,3}" etc. */static bin_tree_t *parse_dup_op (dup_elem, regexp, dfa, token, syntax, err) bin_tree_t *dup_elem; re_string_t *regexp; re_dfa_t *dfa; re_token_t *token; reg_syntax_t syntax; reg_errcode_t *err;{ re_token_t dup_token; bin_tree_t *tree = dup_elem, *work_tree; int new_idx, start_idx = re_string_cur_idx (regexp); re_token_t start_token = *token; if (token->type == OP_OPEN_DUP_NUM) { int i; int end = 0; int start = fetch_number (regexp, token, syntax); bin_tree_t *elem; if (start == -1) { if (token->type == CHARACTER && token->opr.c == ',') start = 0; /* We treat "{,m}" as "{0,m}". */ else { *err = REG_BADBR; /* <re>{} is invalid. */ return NULL; } } if (BE (start != -2, 1)) { /* We treat "{n}" as "{n,n}". */ end = ((token->type == OP_CLOSE_DUP_NUM) ? start : ((token->type == CHARACTER && token->opr.c == ',') ? fetch_number (regexp, token, syntax) : -2)); } if (BE (start == -2 || end == -2, 0)) { /* Invalid sequence. */ if (token->type == OP_CLOSE_DUP_NUM) goto parse_dup_op_invalid_interval; else goto parse_dup_op_ebrace; } if (BE (start == 0 && end == 0, 0)) { /* We treat "<re>{0}" and "<re>{0,0}" as null string. */ *token = fetch_token (regexp, syntax); free_bin_tree (dup_elem); return NULL; } /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */ elem = tree; for (i = 0; i < start; ++i) if (i != 0) { work_tree = duplicate_tree (elem, dfa); tree = create_tree (tree, work_tree, CONCAT, 0); if (BE (work_tree == NULL || tree == NULL, 0)) goto parse_dup_op_espace; } if (end == -1) { /* We treat "<re>{0,}" as "<re>*". */ dup_token.type = OP_DUP_ASTERISK; if (start > 0) { elem = duplicate_tree (elem, dfa); new_idx = re_dfa_add_node (dfa, dup_token, 0); work_tree = create_tree (elem, NULL, 0, new_idx); tree = create_tree (tree, work_tree, CONCAT, 0); if (BE (elem == NULL || new_idx == -1 || work_tree == NULL || tree == NULL, 0)) goto parse_dup_op_espace; } else { new_idx = re_dfa_add_node (dfa, dup_token, 0); tree = create_tree (elem, NULL, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) goto parse_dup_op_espace; } } else if (end - start > 0) { /* Then extract "<re>{0,m}" to "<re>?<re>?...<re>?". */ dup_token.type = OP_DUP_QUESTION; if (start > 0) { elem = duplicate_tree (elem, dfa); new_idx = re_dfa_add_node (dfa, dup_token, 0); elem = create_tree (elem, NULL, 0, new_idx); tree = create_tree (tree, elem, CONCAT, 0); if (BE (elem == NULL || new_idx == -1 || tree == NULL, 0)) goto parse_dup_op_espace; } else { new_idx = re_dfa_add_node (dfa, dup_token, 0); tree = elem = create_tree (elem, NULL, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) goto parse_dup_op_espace; } for (i = 1; i < end - start; ++i) { work_tree = duplicate_tree (elem, dfa); tree = create_tree (tree, work_tree, CONCAT, 0); if (BE (work_tree == NULL || tree == NULL, 0)) { *err = REG_ESPACE; return NULL; } } } } else { new_idx = re_dfa_add_node (dfa, *token, 0); tree = create_tree (tree, NULL, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) { *err = REG_ESPACE; return NULL; } } *token = fetch_token (regexp, syntax); return tree; parse_dup_op_espac
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -