📄 regexec.c
字号:
mb_len = enc_len(encode, s); DATA_ENSURE(mb_len); ss = s; s += mb_len; code = ONIGENC_MBC_TO_CODE(encode, ss, s);#ifdef PLATFORM_UNALIGNED_WORD_ACCESS if (! onig_is_in_code_range(p, code)) goto fail;#else q = p; ALIGNMENT_RIGHT(q); if (! onig_is_in_code_range(q, code)) goto fail;#endif } p += tlen; STAT_OP_OUT; break; case OP_CCLASS_MIX: STAT_OP_IN(OP_CCLASS_MIX); DATA_ENSURE(1); if (ONIGENC_IS_MBC_HEAD(encode, s)) { p += SIZE_BITSET; goto cclass_mb; } else { if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; p += SIZE_BITSET; GET_LENGTH_INC(tlen, p); p += tlen; s++; } STAT_OP_OUT; break; case OP_CCLASS_NOT: STAT_OP_IN(OP_CCLASS_NOT); DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; p += SIZE_BITSET; s += enc_len(encode, s); STAT_OP_OUT; break; case OP_CCLASS_MB_NOT: STAT_OP_IN(OP_CCLASS_MB_NOT); DATA_ENSURE(1); if (! ONIGENC_IS_MBC_HEAD(encode, s)) { s++; GET_LENGTH_INC(tlen, p); p += tlen; goto cc_mb_not_success; } cclass_mb_not: GET_LENGTH_INC(tlen, p); { OnigCodePoint code; UChar *ss; int mb_len = enc_len(encode, s); if (s + mb_len > end) { DATA_ENSURE(1); s = (UChar* )end; p += tlen; goto cc_mb_not_success; } ss = s; s += mb_len; code = ONIGENC_MBC_TO_CODE(encode, ss, s);#ifdef PLATFORM_UNALIGNED_WORD_ACCESS if (onig_is_in_code_range(p, code)) goto fail;#else q = p; ALIGNMENT_RIGHT(q); if (onig_is_in_code_range(q, code)) goto fail;#endif } p += tlen; cc_mb_not_success: STAT_OP_OUT; break; case OP_CCLASS_MIX_NOT: STAT_OP_IN(OP_CCLASS_MIX_NOT); DATA_ENSURE(1); if (ONIGENC_IS_MBC_HEAD(encode, s)) { p += SIZE_BITSET; goto cclass_mb_not; } else { if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; p += SIZE_BITSET; GET_LENGTH_INC(tlen, p); p += tlen; s++; } STAT_OP_OUT; break; case OP_CCLASS_NODE: STAT_OP_IN(OP_CCLASS_NODE); { OnigCodePoint code; void *node; int mb_len; UChar *ss; DATA_ENSURE(1); GET_POINTER_INC(node, p); mb_len = enc_len(encode, s); ss = s; s += mb_len; code = ONIGENC_MBC_TO_CODE(encode, ss, s); if (code_is_in_cclass_node(node, code, mb_len) == 0) goto fail; } STAT_OP_OUT; break; case OP_ANYCHAR: STAT_OP_IN(OP_ANYCHAR); DATA_ENSURE(1); n = enc_len(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; s += n; STAT_OP_OUT; break; case OP_ANYCHAR_ML: STAT_OP_IN(OP_ANYCHAR_ML); DATA_ENSURE(1); n = enc_len(encode, s); DATA_ENSURE(n); s += n; STAT_OP_OUT; break; case OP_ANYCHAR_STAR: STAT_OP_IN(OP_ANYCHAR_STAR); while (s < end) { STACK_PUSH_ALT(p, s, sprev); n = enc_len(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; sprev = s; s += n; } STAT_OP_OUT; break; case OP_ANYCHAR_ML_STAR: STAT_OP_IN(OP_ANYCHAR_ML_STAR); while (s < end) { STACK_PUSH_ALT(p, s, sprev); n = enc_len(encode, s); if (n > 1) { DATA_ENSURE(n); sprev = s; s += n; } else { sprev = s; s++; } } STAT_OP_OUT; break; case OP_ANYCHAR_STAR_PEEK_NEXT: STAT_OP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); while (s < end) { if (*p == *s) { STACK_PUSH_ALT(p + 1, s, sprev); } n = enc_len(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; sprev = s; s += n; } p++; STAT_OP_OUT; break; case OP_ANYCHAR_ML_STAR_PEEK_NEXT:STAT_OP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); while (s < end) { if (*p == *s) { STACK_PUSH_ALT(p + 1, s, sprev); } n = enc_len(encode, s); if (n >1) { DATA_ENSURE(n); sprev = s; s += n; } else { sprev = s; s++; } } p++; STAT_OP_OUT; break; case OP_WORD: STAT_OP_IN(OP_WORD); DATA_ENSURE(1); if (! ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; s += enc_len(encode, s); STAT_OP_OUT; break; case OP_NOT_WORD: STAT_OP_IN(OP_NOT_WORD); DATA_ENSURE(1); if (ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; s += enc_len(encode, s); STAT_OP_OUT; break; case OP_WORD_BOUND: STAT_OP_IN(OP_WORD_BOUND); if (ON_STR_BEGIN(s)) { DATA_ENSURE(1); if (! ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; } else if (ON_STR_END(s)) { if (! ONIGENC_IS_MBC_WORD(encode, sprev, end)) goto fail; } else { if (ONIGENC_IS_MBC_WORD(encode, s, end) == ONIGENC_IS_MBC_WORD(encode, sprev, end)) goto fail; } STAT_OP_OUT; continue; break; case OP_NOT_WORD_BOUND: STAT_OP_IN(OP_NOT_WORD_BOUND); if (ON_STR_BEGIN(s)) { if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; } else if (ON_STR_END(s)) { if (ONIGENC_IS_MBC_WORD(encode, sprev, end)) goto fail; } else { if (ONIGENC_IS_MBC_WORD(encode, s, end) != ONIGENC_IS_MBC_WORD(encode, sprev, end)) goto fail; } STAT_OP_OUT; continue; break;#ifdef USE_WORD_BEGIN_END case OP_WORD_BEGIN: STAT_OP_IN(OP_WORD_BEGIN); if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end)) { if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { STAT_OP_OUT; continue; } } goto fail; break; case OP_WORD_END: STAT_OP_IN(OP_WORD_END); if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) { if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { STAT_OP_OUT; continue; } } goto fail; break;#endif case OP_BEGIN_BUF: STAT_OP_IN(OP_BEGIN_BUF); if (! ON_STR_BEGIN(s)) goto fail; STAT_OP_OUT; continue; break; case OP_END_BUF: STAT_OP_IN(OP_END_BUF); if (! ON_STR_END(s)) goto fail; STAT_OP_OUT; continue; break; case OP_BEGIN_LINE: STAT_OP_IN(OP_BEGIN_LINE); if (ON_STR_BEGIN(s)) { if (IS_NOTBOL(msa->options)) goto fail; STAT_OP_OUT; continue; } else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { STAT_OP_OUT; continue; } goto fail; break; case OP_END_LINE: STAT_OP_IN(OP_END_LINE); if (ON_STR_END(s)) {#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {#endif if (IS_NOTEOL(msa->options)) goto fail; STAT_OP_OUT; continue;#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE }#endif } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) { STAT_OP_OUT; continue; } goto fail; break; case OP_SEMI_END_BUF: STAT_OP_IN(OP_SEMI_END_BUF); if (ON_STR_END(s)) {#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {#endif if (IS_NOTEOL(msa->options)) goto fail; /* Is it needed? */ STAT_OP_OUT; continue;#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE }#endif } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) && ON_STR_END(s + enc_len(encode, s))) { STAT_OP_OUT; continue; } goto fail; break; case OP_BEGIN_POSITION: STAT_OP_IN(OP_BEGIN_POSITION); if (s != msa->start) goto fail; STAT_OP_OUT; continue; break; case OP_MEMORY_START_PUSH: STAT_OP_IN(OP_MEMORY_START_PUSH); GET_MEMNUM_INC(mem, p); STACK_PUSH_MEM_START(mem, s); STAT_OP_OUT; continue; break; case OP_MEMORY_START: STAT_OP_IN(OP_MEMORY_START); GET_MEMNUM_INC(mem, p); mem_start_stk[mem] = (StackIndex )((void* )s); STAT_OP_OUT; continue; break; case OP_MEMORY_END_PUSH: STAT_OP_IN(OP_MEMORY_END_PUSH); GET_MEMNUM_INC(mem, p); STACK_PUSH_MEM_END(mem, s); STAT_OP_OUT; continue; break; case OP_MEMORY_END: STAT_OP_IN(OP_MEMORY_END); GET_MEMNUM_INC(mem, p); mem_end_stk[mem] = (StackIndex )((void* )s); STAT_OP_OUT; continue; break;#ifdef USE_SUBEXP_CALL case OP_MEMORY_END_PUSH_REC: STAT_OP_IN(OP_MEMORY_END_PUSH_REC); GET_MEMNUM_INC(mem, p); STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ STACK_PUSH_MEM_END(mem, s); mem_start_stk[mem] = GET_STACK_INDEX(stkp); STAT_OP_OUT; continue; break; case OP_MEMORY_END_REC: STAT_OP_IN(OP_MEMORY_END_REC); GET_MEMNUM_INC(mem, p); mem_end_stk[mem] = (StackIndex )((void* )s); STACK_GET_MEM_START(mem, stkp); if (BIT_STATUS_AT(reg->bt_mem_start, mem)) mem_start_stk[mem] = GET_STACK_INDEX(stkp); else mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr); STACK_PUSH_MEM_END_MARK(mem); STAT_OP_OUT; continue; break;#endif case OP_BACKREF1: STAT_OP_IN(OP_BACKREF1); mem = 1; goto backref; break; case OP_BACKREF2: STAT_OP_IN(OP_BACKREF2); mem = 2; goto backref; break; case OP_BACKREF3: STAT_OP_IN(OP_BACKREF3); mem = 3; goto backref; break; case OP_BACKREFN: STAT_OP_IN(OP_BACKREFN); GET_MEMNUM_INC(mem, p); backref: { int len; UChar *pstart, *pend; /* if you want to remove following line, you should check in parse and compile time. */ if (mem > num_mem) goto fail; if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; if (BIT_STATUS_AT(reg->bt_mem_start, mem)) pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; else pstart = (UChar* )((void* )mem_start_stk[mem]); pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) ? STACK_AT(mem_end_stk[mem])->u.mem.pstr : (UChar* )((void* )mem_end_stk[mem])); n = pend - pstart; DATA_ENSURE(n); sprev = s; STRING_CMP(pstart, s, n); while (sprev + (len = enc_len(encode, sprev)) < s) sprev += len; STAT_OP_OUT; continue; } break; case OP_BACKREFN_IC: STAT_OP_IN(OP_BACKREFN_IC); GET_MEMNUM_INC(mem, p); { int len; UChar *pstart, *pend; /* if you want to remove following line, you should check in parse and compile time. */ if (mem > num_mem) goto fail; if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; if (BIT_STATUS_AT(reg->bt_mem_start, mem)) pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; else pstart = (UChar* )((void* )mem_start_stk[mem]); pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) ? STACK_AT(mem_end_stk[mem])->u.mem.pstr : (UChar* )((void* )mem_end_stk[mem])); n = pend - pstart; DATA_ENSURE(n); sprev = s; STRING_CMP_IC(ambig_flag, pstart, &s, n); while (sprev + (len = enc_len(encode, sprev)) < s) sprev += len; STAT_OP_OUT; continue; } break; case OP_BACKREF_MULTI: STAT_OP_IN(OP_BACKREF_MULTI); { int len, is_fail; UChar *pstart, *pend, *swork; GET_LENGTH_INC(tlen, p); for (i = 0; i < tlen; i++) { GET_MEMNUM_INC(mem, p); if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; if (BIT_STATUS_AT(reg->bt_mem_start, mem)) pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; else pstart = (UChar* )((void* )mem_start_stk[mem]); pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) ? STACK_AT(mem_end_stk[mem])->u.mem.pstr : (UChar* )((void* )mem_end_stk[mem])); n = pend - pstart; DATA_ENSURE(n); sprev = s; swork = s; STRING_CMP_VALUE(pstart, swork, n, is_fail); if (is_fail) continue; s = swork; while (sprev + (len = enc_len(encode, sprev)) < s) sprev += len; p += (SIZE_MEMNUM * (tlen - i - 1)); break; /* success */ } if (i == tlen) goto fail; STAT_OP_OUT; continue; } break; case OP_BACKREF_MULTI_IC: STAT_OP_IN(OP_BACKREF_MULTI_IC); { int len, is_fail; UChar *pstart, *pend, *swork; GET_LENGTH_INC(tlen, p); for (i = 0; i < tlen; i++) { GET_MEMNUM_INC(mem, p); if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; if (BIT_STATUS_AT(reg->bt_mem_start, mem)) pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; else pstart = (UChar* )((void* )mem_start_stk[mem]);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -