📄 regexec.c
字号:
if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\ OpCurr = opcode;\ OpCounter[opcode]++;\ GETTIME(ts);\} while (0)#define STAT_OP_OUT do {\ GETTIME(te);\ OpTime[OpCurr] += TIMEDIFF(te, ts);\} while (0)#ifdef RUBY_PLATFORM/* * :nodoc: */static VALUE onig_stat_print(){ onig_print_statistics(stderr); return Qnil;}#endifextern void onig_statistics_init(){ int i; for (i = 0; i < 256; i++) { OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0; } MaxStackDepth = 0;#ifdef RUBY_PLATFORM rb_define_global_function("onig_stat_print", onig_stat_print, 0);#endif}extern voidonig_print_statistics(FILE* f){ int i; fprintf(f, " count prev time\n"); for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { fprintf(f, "%8d: %8d: %10ld: %s\n", OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); } fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);}#define STACK_INC do {\ stk++;\ if (stk - stk_base > MaxStackDepth) \ MaxStackDepth = stk - stk_base;\} while (0)#else#define STACK_INC stk++#define STAT_OP_IN(opcode)#define STAT_OP_OUT#endifextern intonig_is_in_code_range(const UChar* p, OnigCodePoint code){ OnigCodePoint n, *data; OnigCodePoint low, high, x; GET_CODE_POINT(n, p); data = (OnigCodePoint* )p; data++; for (low = 0, high = n; low < high; ) { x = (low + high) >> 1; if (code > data[x * 2 + 1]) low = x + 1; else high = x; } return ((low < n && code >= data[low * 2]) ? 1 : 0);}static intcode_is_in_cclass_node(void* node, OnigCodePoint code, int enclen){ unsigned int in_cc; CClassNode* cc = (CClassNode* )node; if (enclen == 1) { in_cc = BITSET_AT(cc->bs, code); } else { UChar* p = ((BBuf* )(cc->mbuf))->p; in_cc = onig_is_in_code_range(p, code); } if (IS_CCLASS_NOT(cc)) { return (in_cc ? 0 : 1); } else { return (in_cc ? 1 : 0); }}/* matching region of POSIX API */typedef int regoff_t;typedef struct { regoff_t rm_so; regoff_t rm_eo;} posix_regmatch_t;/* match data(str - end) from position (sstart). *//* if sstart == str then set sprev to NULL. */static intmatch_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, UChar* sprev, MatchArg* msa){ static UChar FinishCode[] = { OP_FINISH }; int i, n, num_mem, best_len, pop_level; LengthType tlen, tlen2; MemNumType mem; RelAddrType addr; OnigOptionType option = reg->options; OnigEncoding encode = reg->enc; OnigAmbigType ambig_flag = reg->ambig_flag; UChar *s, *q, *sbegin; UChar *p = reg->p; char *alloca_base; StackType *stk_alloc, *stk_base, *stk, *stk_end; StackType *stkp; /* used as any purpose. */ StackIndex si; StackIndex *repeat_stk; StackIndex *mem_start_stk, *mem_end_stk; n = reg->num_repeat + reg->num_mem * 2; STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE); pop_level = reg->stack_pop_level; num_mem = reg->num_mem; repeat_stk = (StackIndex* )alloca_base; mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat); mem_end_stk = mem_start_stk + num_mem; mem_start_stk--; /* for index start from 1, mem_start_stk[1]..mem_start_stk[num_mem] */ mem_end_stk--; /* for index start from 1, mem_end_stk[1]..mem_end_stk[num_mem] */ for (i = 1; i <= num_mem; i++) { mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX; }#ifdef ONIG_DEBUG_MATCH fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n", (int )str, (int )end, (int )sstart, (int )sprev); fprintf(stderr, "size: %d, start offset: %d\n", (int )(end - str), (int )(sstart - str));#endif STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */ best_len = ONIG_MISMATCH; s = (UChar* )sstart; while (1) {#ifdef ONIG_DEBUG_MATCH { UChar *q, *bp, buf[50]; int len; fprintf(stderr, "%4d> \"", (int )(s - str)); bp = buf; for (i = 0, q = s; i < 7 && q < end; i++) { len = enc_len(encode, q); while (len-- > 0) *bp++ = *q++; } if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; } else { xmemcpy(bp, "\"", 1); bp += 1; } *bp = 0; fputs(buf, stderr); for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); onig_print_compiled_byte_code(stderr, p, NULL, encode); fprintf(stderr, "\n"); }#endif sbegin = s; switch (*p++) { case OP_END: STAT_OP_IN(OP_END); n = s - sstart; if (n > best_len) { OnigRegion* region = msa->region; best_len = n; if (region) {#ifdef USE_POSIX_REGION_OPTION if (IS_POSIX_REGION(msa->options)) { posix_regmatch_t* rmt = (posix_regmatch_t* )region; rmt[0].rm_so = sstart - str; rmt[0].rm_eo = s - str; for (i = 1; i <= num_mem; i++) { if (mem_end_stk[i] != INVALID_STACK_INDEX) { if (BIT_STATUS_AT(reg->bt_mem_start, i)) rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; else rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str; rmt[i].rm_eo = (BIT_STATUS_AT(reg->bt_mem_end, i) ? STACK_AT(mem_end_stk[i])->u.mem.pstr : (UChar* )((void* )mem_end_stk[i])) - str; } else { rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS; } } } else {#endif /* USE_POSIX_REGION_OPTION */ region->beg[0] = sstart - str; region->end[0] = s - str; for (i = 1; i <= num_mem; i++) { if (mem_end_stk[i] != INVALID_STACK_INDEX) { if (BIT_STATUS_AT(reg->bt_mem_start, i)) region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; else region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str; region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i) ? STACK_AT(mem_end_stk[i])->u.mem.pstr : (UChar* )((void* )mem_end_stk[i])) - str; } else { region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; } }#ifdef USE_CAPTURE_HISTORY if (reg->capture_history != 0) { int r; OnigCaptureTreeNode* node; if (IS_NULL(region->history_root)) { region->history_root = node = history_node_new(); CHECK_NULL_RETURN_VAL(node, ONIGERR_MEMORY); } else { node = region->history_root; history_tree_clear(node); } node->group = 0; node->beg = sstart - str; node->end = s - str; stkp = stk_base; r = make_capture_history_tree(region->history_root, &stkp, stk, (UChar* )str, reg); if (r < 0) { best_len = r; /* error code */ goto finish; } }#endif /* USE_CAPTURE_HISTORY */#ifdef USE_POSIX_REGION_OPTION } /* else IS_POSIX_REGION() */#endif } /* if (region) */ } /* n > best_len */ STAT_OP_OUT; if (IS_FIND_CONDITION(option)) { if (IS_FIND_NOT_EMPTY(option) && s == sstart) { best_len = ONIG_MISMATCH; goto fail; /* for retry */ } if (IS_FIND_LONGEST(option) && s < end) { goto fail; /* for retry */ } } /* default behavior: return first-matching result. */ goto finish; break; case OP_EXACT1: STAT_OP_IN(OP_EXACT1);#if 0 DATA_ENSURE(1); if (*p != *s) goto fail; p++; s++;#endif if (*p != *s++) goto fail; DATA_ENSURE(0); p++; STAT_OP_OUT; break; case OP_EXACT1_IC: STAT_OP_IN(OP_EXACT1_IC); { int len; UChar *q, *ss, *sp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; DATA_ENSURE(1); ss = s; sp = p; exact1_ic_retry: len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf); DATA_ENSURE(0); q = lowbuf; while (len-- > 0) { if (*p != *q) {#if 1 if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND; s = ss; p = sp; goto exact1_ic_retry; } else goto fail;#else goto fail;#endif } p++; q++; } } STAT_OP_OUT; break; case OP_EXACT2: STAT_OP_IN(OP_EXACT2); DATA_ENSURE(2); if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; sprev = s; p++; s++; STAT_OP_OUT; continue; break; case OP_EXACT3: STAT_OP_IN(OP_EXACT3); DATA_ENSURE(3); if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; sprev = s; p++; s++; STAT_OP_OUT; continue; break; case OP_EXACT4: STAT_OP_IN(OP_EXACT4); DATA_ENSURE(4); if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; sprev = s; p++; s++; STAT_OP_OUT; continue; break; case OP_EXACT5: STAT_OP_IN(OP_EXACT5); DATA_ENSURE(5); if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; sprev = s; p++; s++; STAT_OP_OUT; continue; break; case OP_EXACTN: STAT_OP_IN(OP_EXACTN); GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen); while (tlen-- > 0) { if (*p++ != *s++) goto fail; } sprev = s - 1; STAT_OP_OUT; continue; break; case OP_EXACTN_IC: STAT_OP_IN(OP_EXACTN_IC); { int len; UChar *ss, *sp, *q, *endp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; GET_LENGTH_INC(tlen, p); endp = p + tlen; while (p < endp) { sprev = s; DATA_ENSURE(1); ss = s; sp = p; exactn_ic_retry: len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf); DATA_ENSURE(0); q = lowbuf; while (len-- > 0) { if (*p != *q) {#if 1 if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND; s = ss; p = sp; goto exactn_ic_retry; } else goto fail;#else goto fail;#endif } p++; q++; } } } STAT_OP_OUT; continue; break; case OP_EXACTMB2N1: STAT_OP_IN(OP_EXACTMB2N1); DATA_ENSURE(2); if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; p++; s++; STAT_OP_OUT; break; case OP_EXACTMB2N2: STAT_OP_IN(OP_EXACTMB2N2); DATA_ENSURE(4); if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; p++; s++; sprev = s; if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; p++; s++; STAT_OP_OUT; continue; break; case OP_EXACTMB2N3: STAT_OP_IN(OP_EXACTMB2N3); DATA_ENSURE(6); if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; p++; s++; sprev = s; if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; p++; s++; STAT_OP_OUT; continue; break; case OP_EXACTMB2N: STAT_OP_IN(OP_EXACTMB2N); GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen * 2); while (tlen-- > 0) { if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; p++; s++; } sprev = s - 2; STAT_OP_OUT; continue; break; case OP_EXACTMB3N: STAT_OP_IN(OP_EXACTMB3N); GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen * 3); while (tlen-- > 0) { if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; p++; s++; } sprev = s - 3; STAT_OP_OUT; continue; break; case OP_EXACTMBN: STAT_OP_IN(OP_EXACTMBN); GET_LENGTH_INC(tlen, p); /* mb-len */ GET_LENGTH_INC(tlen2, p); /* string len */ tlen2 *= tlen; DATA_ENSURE(tlen2); while (tlen2-- > 0) { if (*p != *s) goto fail; p++; s++; } sprev = s - tlen; STAT_OP_OUT; continue; break; case OP_CCLASS: STAT_OP_IN(OP_CCLASS); DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; p += SIZE_BITSET; s += enc_len(encode, s); /* OP_CCLASS can match mb-code. \D, \S */ STAT_OP_OUT; break; case OP_CCLASS_MB: STAT_OP_IN(OP_CCLASS_MB); if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail; cclass_mb: GET_LENGTH_INC(tlen, p); { OnigCodePoint code; UChar *ss; int mb_len; DATA_ENSURE(1);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -