📄 regcomp.c
字号:
add_opcode(reg, OP_CCLASS); r = add_bitset(reg, cc->bs); } else { if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { if (IS_CCLASS_NOT(cc)) add_opcode(reg, OP_CCLASS_MB_NOT); else add_opcode(reg, OP_CCLASS_MB); r = add_multi_byte_cclass(cc->mbuf, reg); } else { if (IS_CCLASS_NOT(cc)) add_opcode(reg, OP_CCLASS_MIX_NOT); else add_opcode(reg, OP_CCLASS_MIX); r = add_bitset(reg, cc->bs); if (r) return r; r = add_multi_byte_cclass(cc->mbuf, reg); } } return r;}static intentry_repeat_range(regex_t* reg, int id, int lower, int upper){#define REPEAT_RANGE_ALLOC 4 OnigRepeatRange* p; if (reg->repeat_range_alloc == 0) { p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC); CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); reg->repeat_range = p; reg->repeat_range_alloc = REPEAT_RANGE_ALLOC; } else if (reg->repeat_range_alloc <= id) { int n; n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC; p = (OnigRepeatRange* )xrealloc(reg->repeat_range, sizeof(OnigRepeatRange) * n); CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); reg->repeat_range = p; reg->repeat_range_alloc = n; } else { p = reg->repeat_range; } p[id].lower = lower; p[id].upper = upper; return 0;}static intcompile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info, regex_t* reg){ int r; int num_repeat = reg->num_repeat; r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG); if (r) return r; r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ reg->num_repeat++; if (r) return r; r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC); if (r) return r; r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper); if (r) return r; r = compile_tree_empty_check(qn->target, reg, empty_info); if (r) return r; if (#ifdef USE_SUBEXP_CALL reg->num_call > 0 ||#endif IS_QUALIFIER_IN_REPEAT(qn)) { r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG); } else { r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG); } if (r) return r; r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ return r;}#define QUALIFIER_EXPAND_LIMIT_SIZE 50static intcompile_length_qualifier_node(QualifierNode* qn, regex_t* reg){ int len, mod_tlen; int infinite = IS_REPEAT_INFINITE(qn->upper); int empty_info = qn->target_empty_info; int tlen = compile_length_tree(qn->target, reg); if (tlen < 0) return tlen; /* anychar repeat */ if (NTYPE(qn->target) == N_ANYCHAR) { if (qn->greedy && infinite) { if (IS_NOT_NULL(qn->next_head_exact)) return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower; else return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower; } } if (empty_info != 0) mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); else mod_tlen = tlen; if (infinite && (qn->lower <= 1 || tlen * qn->lower <= QUALIFIER_EXPAND_LIMIT_SIZE)) { if (qn->lower == 1 && tlen > QUALIFIER_EXPAND_LIMIT_SIZE) { len = SIZE_OP_JUMP; } else { len = tlen * qn->lower; } if (qn->greedy) { if (IS_NOT_NULL(qn->head_exact)) len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP; else if (IS_NOT_NULL(qn->next_head_exact)) len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP; else len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP; } else len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH; } else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */ len = SIZE_OP_JUMP + tlen; } else if (!infinite && qn->greedy && (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper <= QUALIFIER_EXPAND_LIMIT_SIZE)) { len = tlen * qn->lower; len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower); } else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen; } else { len = SIZE_OP_REPEAT_INC + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; } return len;}static intis_anychar_star_qualifier(QualifierNode* qn){ if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) && NTYPE(qn->target) == N_ANYCHAR) return 1; else return 0;}static intcompile_qualifier_node(QualifierNode* qn, regex_t* reg){ int i, r, mod_tlen; int infinite = IS_REPEAT_INFINITE(qn->upper); int empty_info = qn->target_empty_info; int tlen = compile_length_tree(qn->target, reg); if (tlen < 0) return tlen; if (is_anychar_star_qualifier(qn)) { r = compile_tree_n_times(qn->target, qn->lower, reg); if (r) return r; if (IS_NOT_NULL(qn->next_head_exact)) { if (IS_MULTILINE(reg->options)) r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); else r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); if (r) return r; return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1); } else { if (IS_MULTILINE(reg->options)) return add_opcode(reg, OP_ANYCHAR_ML_STAR); else return add_opcode(reg, OP_ANYCHAR_STAR); } } if (empty_info != 0) mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); else mod_tlen = tlen; if (infinite && (qn->lower <= 1 || tlen * qn->lower <= QUALIFIER_EXPAND_LIMIT_SIZE)) { if (qn->lower == 1 && tlen > QUALIFIER_EXPAND_LIMIT_SIZE) { if (qn->greedy) { if (IS_NOT_NULL(qn->head_exact)) r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1); else if (IS_NOT_NULL(qn->next_head_exact)) r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT); else r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH); } else { r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP); } if (r) return r; } else { r = compile_tree_n_times(qn->target, qn->lower, reg); if (r) return r; } if (qn->greedy) { if (IS_NOT_NULL(qn->head_exact)) { r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1, mod_tlen + SIZE_OP_JUMP); if (r) return r; add_bytes(reg, NSTRING(qn->head_exact).s, 1); r = compile_tree_empty_check(qn->target, reg, empty_info); if (r) return r; r = add_opcode_rel_addr(reg, OP_JUMP, -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1)); } else if (IS_NOT_NULL(qn->next_head_exact)) { r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT, mod_tlen + SIZE_OP_JUMP); if (r) return r; add_bytes(reg, NSTRING(qn->next_head_exact).s, 1); r = compile_tree_empty_check(qn->target, reg, empty_info); if (r) return r; r = add_opcode_rel_addr(reg, OP_JUMP, -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT)); } else { r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); if (r) return r; r = compile_tree_empty_check(qn->target, reg, empty_info); if (r) return r; r = add_opcode_rel_addr(reg, OP_JUMP, -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH)); } } else { r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); if (r) return r; r = compile_tree_empty_check(qn->target, reg, empty_info); if (r) return r; r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); } } else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */ r = add_opcode_rel_addr(reg, OP_JUMP, tlen); if (r) return r; r = compile_tree(qn->target, reg); } else if (!infinite && qn->greedy && (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper <= QUALIFIER_EXPAND_LIMIT_SIZE)) { int n = qn->upper - qn->lower; r = compile_tree_n_times(qn->target, qn->lower, reg); if (r) return r; for (i = 0; i < n; i++) { r = add_opcode_rel_addr(reg, OP_PUSH, (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH); if (r) return r; r = compile_tree(qn->target, reg); if (r) return r; } } else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); if (r) return r; r = add_opcode_rel_addr(reg, OP_JUMP, tlen); if (r) return r; r = compile_tree(qn->target, reg); } else { r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); } return r;}static intcompile_length_option_node(EffectNode* node, regex_t* reg){ int tlen; OnigOptionType prev = reg->options; reg->options = node->option; tlen = compile_length_tree(node->target, reg); reg->options = prev; if (tlen < 0) return tlen; if (IS_DYNAMIC_OPTION(prev ^ node->option)) { return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL + tlen + SIZE_OP_SET_OPTION; } else return tlen;}static intcompile_option_node(EffectNode* node, regex_t* reg){ int r; OnigOptionType prev = reg->options; if (IS_DYNAMIC_OPTION(prev ^ node->option)) { r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option); if (r) return r; r = add_opcode_option(reg, OP_SET_OPTION, prev); if (r) return r; r = add_opcode(reg, OP_FAIL); if (r) return r; } reg->options = node->option; r = compile_tree(node->target, reg); reg->options = prev; if (IS_DYNAMIC_OPTION(prev ^ node->option)) { if (r) return r; r = add_opcode_option(reg, OP_SET_OPTION, prev); } return r;}static intcompile_length_effect_node(EffectNode* node, regex_t* reg){ int len; int tlen; if (node->type == EFFECT_OPTION) return compile_length_option_node(node, reg); if (node->target) { tlen = compile_length_tree(node->target, reg); if (tlen < 0) return tlen; } else tlen = 0; switch (node->type) { case EFFECT_MEMORY:#ifdef USE_SUBEXP_CALL if (IS_EFFECT_CALLED(node)) { len = SIZE_OP_MEMORY_START_PUSH + tlen + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) len += (IS_EFFECT_RECURSION(node) ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); else len += (IS_EFFECT_RECURSION(node) ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); } else#endif { if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) len = SIZE_OP_MEMORY_START_PUSH; else len = SIZE_OP_MEMORY_START; len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum) ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END); } break; case EFFECT_STOP_BACKTRACK: if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) { QualifierNode* qn = &NQUALIFIER(node->target); tlen = compile_length_tree(qn->target, reg); if (tlen < 0) return tlen; len = tlen * qn->lower + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP; } else { len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT; } break; default: return ONIGERR_TYPE_BUG; break; } return len;}static int get_char_length_tree(Node* node, regex_t* reg, int* len);static intcompile_effect_node(EffectNode* node, regex_t* reg){ int r, len; if (node->type == EFFECT_OPTION) return compile_option_node(node, reg); switch (node->type) { case EFFECT_MEMORY:#ifdef USE_SUBEXP_CALL if (IS_EFFECT_CALLED(node)) { r = add_opcode(reg, OP_CALL); if (r) return r; node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP; node->state |= NST_ADDR_FIXED; r = add_abs_addr(reg, (int )node->call_addr); if (r) return r; len = compile_length_tree(node->target, reg); len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN); if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) len += (IS_EFFECT_RECURSION(node) ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); else len += (IS_EFFECT_RECURSION(node) ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); r = add_opcode_rel_addr(reg, OP_JUMP, len); if (r) return r; }#endif if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) r = add_opcode(reg, OP_MEMORY_START_PUSH); else r = add_opcode(reg, OP_MEMORY_START); if (r) return r; r = add_mem_num(reg, node->regnum); if (r) return r; r = compile_tree(node->target, reg); if (r) return r;#ifdef USE_SUBEXP_CALL if (IS_EFFECT_CALLED(node)) { if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) r = add_opcode(reg, (IS_EFFECT_RECURSION(node) ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); else r = add_opcode(reg, (IS_EFFECT_RECURSION(node) ? OP_MEMORY_END_REC : OP_MEMORY_END)); if (r) return r; r = add_mem_num(reg, node->regnum); if (r) return r; r = add_opcode(reg, OP_RETURN); } else#endif { if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) r = add_opcode(reg, OP_MEMORY_END_PUSH); else r = add_opcode(reg, OP_MEMORY_END); if (r) return r; r = add_mem_num(reg, node->regnum); } break; case EFFECT_STOP_BACKTRACK: if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) { QualifierNode* qn = &NQUALIFIER(node->target); r = compile_tree_n_times(qn->target, qn->lower, reg); if (r) return r; len = compile_length_tree(qn->target, reg); if (len < 0) return len; r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP); if (r) return r; r = compile_tree(qn->target, reg); if (r) return r; r = add_opcode(reg, OP_POP); if (r) return r; r = add_opcode_rel_addr(reg, OP_JUMP, -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); } else { r = add_opcode(reg, OP_PUSH_STOP_BT); if (r) return r; r = compile_tree(node->target, reg); if (r) return r; r = add_opcode(reg, OP_POP_STOP_BT); } break; default: return ONIGERR_TYPE_BUG; break; } return r;}static intcompile_length_anchor_node(AnchorNode* node, regex_t* reg){ int len; int tlen = 0; if (node->target) { tlen = compile_length_tree(node->target, reg); if (tlen < 0) return tlen; } switch (node->type) { case ANCHOR_PREC_READ: len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS; break; case ANCHOR_PREC_READ_NOT: len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS; break; case ANCHOR_LOOK_BEHIND: len = SIZE_OP_LOOK_BEHIND + tlen; break; case ANCHOR_LOOK_BEHIND_NOT: len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT; break; default: len = SIZE_OPCODE; break; } return len;}static intcompile_anchor_node(AnchorNode* node, regex_t* reg){ int r, len; switch (node->type) { case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break; case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break; case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break; case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break; case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break; case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break; case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break; case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;#ifdef USE_WORD_BEGIN_END case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break; case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break;#endif case ANCHOR_PREC_READ: r = add_opcode(reg, OP_PUSH_POS); if (r) return r; r = compile_tree(node->target, reg); if (r) return r; r = add_opcode(reg, OP_POP_POS); break; case ANCHOR_PREC_READ_NOT: len = compile_length_tree(node->target, reg); if (len < 0) return len; r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS); if (r) return r; r = compile_tree(node->target, reg); if (r) return r; r = add_opcode(reg, OP_FAIL_POS); break; case ANCHOR_LOOK_BEHIND: { int n; r = add_opcode(reg, OP_LOOK_BEHIND); if (r) return r; if (node->char_len < 0) { r = get_char_length_tree(node->target, reg, &n); if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -