📄 regparse.c
字号:
PFETCH(c); if (c == '>' || c == ')') break; if (! ONIGENC_IS_CODE_DIGIT(enc, c)) r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; } if (c != '>') { r = ONIGERR_INVALID_GROUP_NAME; name_end = end; } if (r == 0) { *rname_end = name_end; *src = p; return 0; } else { err: onig_scan_env_set_error_string(env, r, *src, name_end); return r; }}#endifstatic voidCC_ESC_WARN(ScanEnv* env, UChar *c){ if (onig_warn == onig_null_warn) return ; if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) && IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) { char buf[WARN_BUFSIZE]; onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, env->pattern, env->pattern_end, "character class has '%s' without escape", c); (*onig_warn)(buf); }}static voidCCEND_ESC_WARN(ScanEnv* env, UChar* c){ if (onig_warn == onig_null_warn) return ; if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) { char buf[WARN_BUFSIZE]; onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc, (env)->pattern, (env)->pattern_end, "regular expression has '%s' without escape", c); (*onig_warn)(buf); }}static UChar*find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to, UChar **next, OnigEncoding enc){ int i; OnigCodePoint x; UChar *q; UChar *p = from; while (p < to) { x = ONIGENC_MBC_TO_CODE(enc, p, to); q = p + enc_len(enc, p); if (x == s[0]) { for (i = 1; i < n && q < to; i++) { x = ONIGENC_MBC_TO_CODE(enc, q, to); if (x != s[i]) break; q += enc_len(enc, q); } if (i >= n) { if (IS_NOT_NULL(next)) *next = q; return p; } } p = q; } return NULL_UCHARP;}static intstr_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to, OnigCodePoint bad, OnigEncoding enc){ int i, in_esc; OnigCodePoint x; UChar *q; UChar *p = from; in_esc = 0; while (p < to) { if (in_esc) { in_esc = 0; p += enc_len(enc, p); } else { x = ONIGENC_MBC_TO_CODE(enc, p, to); q = p + enc_len(enc, p); if (x == s[0]) { for (i = 1; i < n && q < to; i++) { x = ONIGENC_MBC_TO_CODE(enc, q, to); if (x != s[i]) break; q += enc_len(enc, q); } if (i >= n) return 1; p += enc_len(enc, p); } else { x = ONIGENC_MBC_TO_CODE(enc, p, to); if (x == bad) return 0; else if (x == MC_ESC(enc)) in_esc = 1; p = q; } } } return 0;}static intfetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env){ int num; OnigCodePoint c, c2; OnigSyntaxType* syn = env->syntax; OnigEncoding enc = env->enc; UChar* prev; UChar* p = *src; PFETCH_READY; if (PEND) { tok->type = TK_EOT; return tok->type; } PFETCH(c); tok->type = TK_CHAR; tok->base = 0; tok->u.c = c; if (c == ']') { tok->type = TK_CC_CLOSE; } else if (c == '-') { tok->type = TK_CC_RANGE; } else if (c == MC_ESC(enc)) { if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) goto end; if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; PFETCH(c); tok->escaped = 1; tok->u.c = c; switch (c) { case 'w': tok->type = TK_CHAR_TYPE; tok->u.subtype = CTYPE_WORD; break; case 'W': tok->type = TK_CHAR_TYPE; tok->u.subtype = CTYPE_NOT_WORD; break; case 'd': tok->type = TK_CHAR_TYPE; tok->u.subtype = CTYPE_DIGIT; break; case 'D': tok->type = TK_CHAR_TYPE; tok->u.subtype = CTYPE_NOT_DIGIT; break; case 's': tok->type = TK_CHAR_TYPE; tok->u.subtype = CTYPE_WHITE_SPACE; break; case 'S': tok->type = TK_CHAR_TYPE; tok->u.subtype = CTYPE_NOT_WHITE_SPACE; break; case 'h': if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; tok->type = TK_CHAR_TYPE; tok->u.subtype = CTYPE_XDIGIT; break; case 'H': if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; tok->type = TK_CHAR_TYPE; tok->u.subtype = CTYPE_NOT_XDIGIT; break; case 'p': case 'P': c2 = PPEEK; if (c2 == '{' && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { PINC; tok->type = TK_CHAR_PROPERTY; tok->u.prop.not = (c == 'P' ? 1 : 0); if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { PFETCH(c2); if (c2 == '^') { tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); } else PUNFETCH; } } break; case 'x': if (PEND) break; prev = p; if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { PINC; num = scan_unsigned_hexadecimal_number(&p, end, 8, enc); if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; if (!PEND) { c2 = PPEEK; if (ONIGENC_IS_CODE_XDIGIT(enc, c2)) return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; } if (p > prev + enc_len(enc, prev) && !PEND && (PPEEK_IS('}'))) { PINC; tok->type = TK_CODE_POINT; tok->base = 16; tok->u.code = (OnigCodePoint )num; } else { /* can't read nothing or invalid format */ p = prev; } } else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { num = scan_unsigned_hexadecimal_number(&p, end, 2, enc); if (num < 0) return ONIGERR_TOO_BIG_NUMBER; if (p == prev) { /* can't read nothing. */ num = 0; /* but, it's not error */ } tok->type = TK_RAW_BYTE; tok->base = 16; tok->u.c = num; } break; case 'u': if (PEND) break; prev = p; if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { num = scan_unsigned_hexadecimal_number(&p, end, 4, enc); if (num < 0) return ONIGERR_TOO_BIG_NUMBER; if (p == prev) { /* can't read nothing. */ num = 0; /* but, it's not error */ } tok->type = TK_CODE_POINT; tok->base = 16; tok->u.code = (OnigCodePoint )num; } break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) { PUNFETCH; prev = p; num = scan_unsigned_octal_number(&p, end, 3, enc); if (num < 0) return ONIGERR_TOO_BIG_NUMBER; if (p == prev) { /* can't read nothing. */ num = 0; /* but, it's not error */ } tok->type = TK_RAW_BYTE; tok->base = 8; tok->u.c = num; } break; default: PUNFETCH; num = fetch_escaped_value(&p, end, env); if (num < 0) return num; if (tok->u.c != num) { tok->u.code = (OnigCodePoint )num; tok->type = TK_CODE_POINT; } break; } } else if (c == '[') { if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) { OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' }; tok->backp = p; /* point at '[' is readed */ PINC; if (str_exist_check_with_esc(send, 2, p, end, (OnigCodePoint )']', enc)) { tok->type = TK_POSIX_BRACKET_OPEN; } else { PUNFETCH; goto cc_in_cc; } } else { cc_in_cc: if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) { tok->type = TK_CC_CC_OPEN; } else { CC_ESC_WARN(env, "["); } } } else if (c == '&') { if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) && !PEND && (PPEEK_IS('&'))) { PINC; tok->type = TK_CC_AND; } } end: *src = p; return tok->type;}static intfetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env){ int r, num; OnigCodePoint c; OnigEncoding enc = env->enc; OnigSyntaxType* syn = env->syntax; UChar* prev; UChar* p = *src; PFETCH_READY; start: if (PEND) { tok->type = TK_EOT; return tok->type; } tok->type = TK_STRING; tok->base = 0; tok->backp = p; PFETCH(c); if (c == MC_ESC(enc)) { if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; tok->backp = p; PFETCH(c); tok->u.c = c; tok->escaped = 1; switch (c) { case '*': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break; tok->type = TK_OP_REPEAT; tok->u.repeat.lower = 0; tok->u.repeat.upper = REPEAT_INFINITE; goto greedy_check; break; case '+': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break; tok->type = TK_OP_REPEAT; tok->u.repeat.lower = 1; tok->u.repeat.upper = REPEAT_INFINITE; goto greedy_check; break; case '?': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break; tok->type = TK_OP_REPEAT; tok->u.repeat.lower = 0; tok->u.repeat.upper = 1; greedy_check: if (!PEND && PPEEK_IS('?') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) { PFETCH(c); tok->u.repeat.greedy = 0; tok->u.repeat.possessive = 0; } else { possessive_check: if (!PEND && PPEEK_IS('+') && ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) && tok->type != TK_INTERVAL) || (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) && tok->type == TK_INTERVAL))) { PFETCH(c); tok->u.repeat.greedy = 1; tok->u.repeat.possessive = 1; } else { tok->u.repeat.greedy = 1; tok->u.repeat.possessive = 0; } } break; case '{': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break; r = fetch_range_qualifier(&p, end, tok, env); if (r < 0) return r; /* error */ if (r == 0) goto greedy_check; else if (r == 2) { /* {n} */ if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY)) goto possessive_check; goto greedy_check; } /* r == 1 : normal char */ break; case '|': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break; tok->type = TK_ALT; break; case '(': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break; tok->type = TK_SUBEXP_OPEN; break; case ')': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break; tok->type = TK_SUBEXP_CLOSE; break; case 'w': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break; tok->type = TK_CHAR_TYPE; tok->u.subtype = CTYPE_WORD; break; case 'W': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break; tok->type = TK_CHAR_TYPE; tok->u.subtype = CTYPE_NOT_WORD; break; case 'b': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; tok->type = TK_ANCHOR; tok->u.anchor = ANCHOR_WORD_BOUND; break; case 'B': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; tok->type = TK_ANCHOR; tok->u.anchor = ANCHOR_NOT_WORD_BOUND; break;#ifdef USE_WORD_BEGIN_END case '<': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break; tok->type = TK_ANCHOR; tok->u.anchor = ANCHOR_WORD_BEGIN; break; case '>': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break; tok->type = TK_ANCHOR; tok->u.anchor = ANCHOR_WORD_END; break;#endif case 's': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break; tok->type = TK_CHAR_TYPE; tok->u.subtype = CTYPE_WHITE_SPACE; break; case 'S': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break; tok->type = TK_CHAR_TYPE; tok->u.subtype = CTYPE_NOT_WHITE_SPACE; break; case 'd': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break; tok->type = TK_CHAR_TYPE; tok->u.subtype = CTYPE_DIGIT; break; case 'D': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break; tok->type = TK_CHAR_TYPE; tok->u.subtype = CTYPE_NOT_DIGIT; break; case 'h': if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; tok->type = TK_CHAR_TYPE; tok->u.subtype = CTYPE_XDIGIT; break; case 'H': if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; tok->type = TK_CHAR_TYPE; tok->u.subtype = CTYPE_NOT_XDIGIT; break; case 'A': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; begin_buf: tok->type = TK_ANCHOR; tok->u.subtype = ANCHOR_BEGIN_BUF; break; case 'Z': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; tok->type = TK_ANCHOR; tok->u.subtype = ANCHOR_SEMI_END_BUF; break; case 'z': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; end_buf: tok->type = TK_ANCHOR; tok->u.subtype = ANCHOR_END_BUF; break; case 'G': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break; tok->type = TK_ANCHOR; tok->u.subtype = ANCHOR_BEGIN_POSITION; break; case '`': if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break; goto begin_buf; break; case '\'': if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break; goto end_buf; break; case 'x': if (PEND) break; prev = p; if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { PINC; num = scan_unsigned_hexadecimal_number(&p, end, 8, enc); if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; if (!PEND) { if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK)) return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; } if ((p > prev + enc_len(enc, prev)) && !PEND && PPEEK_IS('}')) { PINC; tok->type = TK_CODE_POINT; tok->u.code = (OnigCodePoint )num; } else { /* can't read nothing or invalid format */ p = prev; } } else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { num = scan_unsigned_hexadecimal_number(&p, end, 2, enc); if (num < 0) return ONIGERR_TOO_BIG_NUMBER; if (p == prev) { /* can't read nothing. */ num = 0; /* but, it's not error */ } tok->type = TK_RAW_BYTE; tok->base = 16; tok->u.c = num; } break; case 'u': if (PEND) break; prev = p; if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { num = scan_unsigned_hexadecimal_number(&p, end, 4, enc); if (num < 0) return ONIGERR_TOO_BIG_NUMBER; if (p == prev) { /* can't read nothing. */ num = 0; /* but, it's not error */ } tok->type = TK_CODE_POINT; tok->base = 16; tok->u.code = (OnigCodePoint )num; } break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8':
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -