📄 regparse.c
字号:
cc->mbuf = tbuf; } CCLASS_CLEAR_NOT(cc); } return 0;}static intand_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc){ int r, not1, not2; BBuf *buf1, *buf2, *pbuf; BitSetRef bsr1, bsr2; BitSet bs1, bs2; not1 = IS_CCLASS_NOT(dest); bsr1 = dest->bs; buf1 = dest->mbuf; not2 = IS_CCLASS_NOT(cc); bsr2 = cc->bs; buf2 = cc->mbuf; if (not1 != 0) { bitset_invert_to(bsr1, bs1); bsr1 = bs1; } if (not2 != 0) { bitset_invert_to(bsr2, bs2); bsr2 = bs2; } bitset_and(bsr1, bsr2); if (bsr1 != dest->bs) { bitset_copy(dest->bs, bsr1); bsr1 = dest->bs; } if (not1 != 0) { bitset_invert(dest->bs); } if (! ONIGENC_IS_SINGLEBYTE(enc)) { if (not1 != 0 && not2 != 0) { r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf); } else { r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf); if (r == 0 && not1 != 0) { BBuf *tbuf; r = not_code_range_buf(enc, pbuf, &tbuf); if (r != 0) { bbuf_free(pbuf); return r; } bbuf_free(pbuf); pbuf = tbuf; } } if (r != 0) return r; dest->mbuf = pbuf; bbuf_free(buf1); return r; } return 0;}static intor_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc){ int r, not1, not2; BBuf *buf1, *buf2, *pbuf; BitSetRef bsr1, bsr2; BitSet bs1, bs2; not1 = IS_CCLASS_NOT(dest); bsr1 = dest->bs; buf1 = dest->mbuf; not2 = IS_CCLASS_NOT(cc); bsr2 = cc->bs; buf2 = cc->mbuf; if (not1 != 0) { bitset_invert_to(bsr1, bs1); bsr1 = bs1; } if (not2 != 0) { bitset_invert_to(bsr2, bs2); bsr2 = bs2; } bitset_or(bsr1, bsr2); if (bsr1 != dest->bs) { bitset_copy(dest->bs, bsr1); bsr1 = dest->bs; } if (not1 != 0) { bitset_invert(dest->bs); } if (! ONIGENC_IS_SINGLEBYTE(enc)) { if (not1 != 0 && not2 != 0) { r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf); } else { r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf); if (r == 0 && not1 != 0) { BBuf *tbuf; r = not_code_range_buf(enc, pbuf, &tbuf); if (r != 0) { bbuf_free(pbuf); return r; } bbuf_free(pbuf); pbuf = tbuf; } } if (r != 0) return r; dest->mbuf = pbuf; bbuf_free(buf1); return r; } else return 0;}static intconv_backslash_value(int c, ScanEnv* env){ if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) { switch (c) { case 'n': return '\n'; case 't': return '\t'; case 'r': return '\r'; case 'f': return '\f'; case 'a': return '\007'; case 'b': return '\010'; case 'e': return '\033'; case 'v': if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB)) return '\v'; break; default: break; } } return c;}static intis_invalid_qualifier_target(Node* node){ switch (NTYPE(node)) { case N_ANCHOR: return 1; break; case N_EFFECT: if (NEFFECT(node).type == EFFECT_OPTION) return is_invalid_qualifier_target(NEFFECT(node).target); break; case N_LIST: /* ex. (?:\G\A)* */ do { if (! is_invalid_qualifier_target(NCONS(node).left)) return 0; } while (IS_NOT_NULL(node = NCONS(node).right)); return 0; break; case N_ALT: /* ex. (?:abc|\A)* */ do { if (is_invalid_qualifier_target(NCONS(node).left)) return 1; } while (IS_NOT_NULL(node = NCONS(node).right)); break; default: break; } return 0;}/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */static intpopular_qualifier_num(QualifierNode* qf){ if (qf->greedy) { if (qf->lower == 0) { if (qf->upper == 1) return 0; else if (IS_REPEAT_INFINITE(qf->upper)) return 1; } else if (qf->lower == 1) { if (IS_REPEAT_INFINITE(qf->upper)) return 2; } } else { if (qf->lower == 0) { if (qf->upper == 1) return 3; else if (IS_REPEAT_INFINITE(qf->upper)) return 4; } else if (qf->lower == 1) { if (IS_REPEAT_INFINITE(qf->upper)) return 5; } } return -1;}enum ReduceType { RQ_ASIS = 0, /* as is */ RQ_DEL = 1, /* delete parent */ RQ_A, /* to '*' */ RQ_AQ, /* to '*?' */ RQ_QQ, /* to '??' */ RQ_P_QQ, /* to '+)??' */ RQ_PQ_Q, /* to '+?)?' */};static enum ReduceType ReduceTypeTable[6][6] = { {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */ {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */ {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */ {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */ {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */ {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */};extern voidonig_reduce_nested_qualifier(Node* pnode, Node* cnode){ int pnum, cnum; QualifierNode *p, *c; p = &(NQUALIFIER(pnode)); c = &(NQUALIFIER(cnode)); pnum = popular_qualifier_num(p); cnum = popular_qualifier_num(c); switch(ReduceTypeTable[cnum][pnum]) { case RQ_DEL: *p = *c; break; case RQ_A: p->target = c->target; p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1; break; case RQ_AQ: p->target = c->target; p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0; break; case RQ_QQ: p->target = c->target; p->lower = 0; p->upper = 1; p->greedy = 0; break; case RQ_P_QQ: p->target = cnode; p->lower = 0; p->upper = 1; p->greedy = 0; c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1; return ; break; case RQ_PQ_Q: p->target = cnode; p->lower = 0; p->upper = 1; p->greedy = 1; c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0; return ; break; case RQ_ASIS: p->target = cnode; return ; break; } c->target = NULL_NODE; onig_node_free(cnode);}enum TokenSyms { TK_EOT = 0, /* end of token */ TK_RAW_BYTE = 1, TK_CHAR, TK_STRING, TK_CODE_POINT, TK_ANYCHAR, TK_CHAR_TYPE, TK_BACKREF, TK_CALL, TK_ANCHOR, TK_OP_REPEAT, TK_INTERVAL, TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */ TK_ALT, TK_SUBEXP_OPEN, TK_SUBEXP_CLOSE, TK_CC_OPEN, TK_QUOTE_OPEN, TK_CHAR_PROPERTY, /* \p{...}, \P{...} */ /* in cc */ TK_CC_CLOSE, TK_CC_RANGE, TK_POSIX_BRACKET_OPEN, TK_CC_AND, /* && */ TK_CC_CC_OPEN /* [ */};typedef struct { enum TokenSyms type; int escaped; int base; /* is number: 8, 16 (used in [....]) */ UChar* backp; union { UChar* s; int c; OnigCodePoint code; int anchor; int subtype; struct { int lower; int upper; int greedy; int possessive; } repeat; struct { int num; int ref1; int* refs; int by_name; } backref; struct { UChar* name; UChar* name_end; } call; struct { int not; } prop; } u;} OnigToken;static intfetch_range_qualifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env){ int low, up, syn_allow, non_low = 0; int r = 0; OnigCodePoint c; OnigEncoding enc = env->enc; UChar* p = *src; PFETCH_READY; syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL); if (PEND) { if (syn_allow) return 1; /* "....{" : OK! */ else return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */ } if (! syn_allow) { c = PPEEK; if (c == ')' || c == '(' || c == '|') { return ONIGERR_END_PATTERN_AT_LEFT_BRACE; } } low = onig_scan_unsigned_number(&p, end, env->enc); if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; if (low > ONIG_MAX_REPEAT_NUM) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; if (p == *src) { /* can't read low */ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) { /* allow {,n} as {0,n} */ low = 0; non_low = 1; } else goto invalid; } if (PEND) goto invalid; PFETCH(c); if (c == ',') { UChar* prev = p; up = onig_scan_unsigned_number(&p, end, env->enc); if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; if (up > ONIG_MAX_REPEAT_NUM) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; if (p == prev) { if (non_low != 0) goto invalid; up = REPEAT_INFINITE; /* {n,} : {n,infinite} */ } } else { if (non_low != 0) goto invalid; PUNFETCH; up = low; /* {n} : exact n times */ r = 2; /* fixed */ } if (PEND) goto invalid; PFETCH(c); if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) { if (c != MC_ESC(enc)) goto invalid; PFETCH(c); } if (c != '}') goto invalid; if (!IS_REPEAT_INFINITE(up) && low > up) { return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE; } tok->type = TK_INTERVAL; tok->u.repeat.lower = low; tok->u.repeat.upper = up; *src = p; return r; /* 0: normal {n,m}, 2: fixed {n} */ invalid: if (syn_allow) return 1; /* OK */ else return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;}/* \M-, \C-, \c, or \... */static intfetch_escaped_value(UChar** src, UChar* end, ScanEnv* env){ int v; OnigCodePoint c; OnigEncoding enc = env->enc; UChar* p = *src; PFETCH_READY; if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; PFETCH(c); switch (c) { case 'M': if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) { if (PEND) return ONIGERR_END_PATTERN_AT_META; PFETCH(c); if (c != '-') return ONIGERR_META_CODE_SYNTAX; if (PEND) return ONIGERR_END_PATTERN_AT_META; PFETCH(c); if (c == MC_ESC(enc)) { v = fetch_escaped_value(&p, end, env); if (v < 0) return v; c = (OnigCodePoint )v; } c = ((c & 0xff) | 0x80); } else goto backslash; break; case 'C': if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) { if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL; PFETCH(c); if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX; goto control; } else goto backslash; case 'c': if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) { control: if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL; PFETCH(c); if (c == MC_ESC(enc)) { v = fetch_escaped_value(&p, end, env); if (v < 0) return v; c = (OnigCodePoint )v; } else if (c == '?') c = 0177; else c &= 0x9f; break; } /* fall through */ default: { backslash: c = conv_backslash_value(c, env); } break; } *src = p; return c;}static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);#ifdef USE_NAMED_GROUP/* def: 0 -> define name (don't allow number name) 1 -> reference name (allow number name)*/static intfetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref){ int r, is_num; OnigCodePoint c = 0; OnigCodePoint first_code; OnigEncoding enc = env->enc; UChar *name_end; UChar *p = *src; PFETCH_READY; name_end = end; r = 0; is_num = 0; if (PEND) { return ONIGERR_EMPTY_GROUP_NAME; } else { PFETCH(c); first_code = c; if (c == '>') return ONIGERR_EMPTY_GROUP_NAME; if (ONIGENC_IS_CODE_DIGIT(enc, c)) { if (ref == 1) is_num = 1; else { r = ONIGERR_INVALID_GROUP_NAME; } } else if (!ONIGENC_IS_CODE_WORD(enc, c)) { r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; } } while (!PEND) { name_end = p; PFETCH(c); if (c == '>' || c == ')') break; if (is_num == 1) { if (! ONIGENC_IS_CODE_DIGIT(enc, c)) { if (!ONIGENC_IS_CODE_WORD(enc, c)) r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; else r = ONIGERR_INVALID_GROUP_NAME; } } else { if (!ONIGENC_IS_CODE_WORD(enc, c)) { r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; } } } if (c != '>') { r = ONIGERR_INVALID_GROUP_NAME; name_end = end; } else { if (ONIGENC_IS_CODE_ASCII(first_code) && ONIGENC_IS_CODE_UPPER(enc, first_code)) r = ONIGERR_INVALID_GROUP_NAME; } if (r == 0) { *rname_end = name_end; *src = p; return 0; } else { onig_scan_env_set_error_string(env, r, *src, name_end); return r; }}#elsestatic intfetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref){ int r, len; OnigCodePoint c = 0; UChar *name_end; OnigEncoding enc = env->enc; UChar *p = *src; PFETCH_READY; r = 0; while (!PEND) { name_end = p; if (enc_len(enc, p) > 1) r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -