📄 regex.cc
字号:
while (pfrom != from) *--pto = *--pfrom; store_jump_n (from, op, to, n);}/* Open up space at location THERE, and insert operation OP followed by NUM_1 and NUM_2. CURRENT_END gives the end of the storage in use, so we know how much data to copy up. If you call this function, you must zero out pending_exact. */static voidinsert_op_2 (char op, char *there, char *current_end, int num_1, int num_2){ register char *pfrom = current_end; /* Copy from here... */ register char *pto = current_end + 5; /* ...to here. */ while (pfrom != there) *--pto = *--pfrom; there[0] = op; STORE_NUMBER (there + 1, num_1); STORE_NUMBER (there + 3, num_2);}/* Given a pattern, compute a fastmap from it. The fastmap records which of the (1 << BYTEWIDTH) possible characters can start a string that matches the pattern. This fastmap is used by re_search to skip quickly over totally implausible text. The caller must supply the address of a (1 << BYTEWIDTH)-byte data area as bufp->fastmap. The other components of bufp describe the pattern to be used. */voidre_compile_fastmap (struct re_pattern_buffer *bufp){ unsigned char *pattern = (unsigned char *) bufp->buffer; int size = bufp->used; register char *fastmap = bufp->fastmap; register unsigned char *p = pattern; register unsigned char *pend = pattern + size; register int j, k; unsigned char *translate = (unsigned char *) bufp->translate; unsigned char *stackb[NFAILURES]; unsigned char **stackp = stackb; unsigned is_a_succeed_n; memset (fastmap, 0, (1 << BYTEWIDTH)); bufp->fastmap_accurate = 1; bufp->can_be_null = 0; while (p) { is_a_succeed_n = 0; if (p == pend) { bufp->can_be_null = 1; break; }#ifdef SWITCH_ENUM_BUG switch ((int) ((enum regexpcode) *p++))#else switch ((enum regexpcode) *p++)#endif { case exactn: if (translate) fastmap[translate[p[1]]] = 1; else fastmap[p[1]] = 1; break; case unused: case begline:#ifdef emacs case before_dot: case at_dot: case after_dot:#endif case begbuf: case endbuf: case wordbound: case notwordbound: case wordbeg: case wordend: continue; case endline: if (translate) fastmap[translate['\n']] = 1; else fastmap['\n'] = 1; if (bufp->can_be_null != 1) bufp->can_be_null = 2; break; case jump_n: case finalize_jump: case maybe_finalize_jump: case jump: case dummy_failure_jump: EXTRACT_NUMBER_AND_INCR (j, p); p += j; if (j > 0) continue; /* Jump backward reached implies we just went through the body of a loop and matched nothing. Opcode jumped to should be an on_failure_jump. Just treat it like an ordinary jump. For a * loop, it has pushed its failure point already; If so, discard that as redundant. */ if ((enum regexpcode) *p != on_failure_jump && (enum regexpcode) *p != succeed_n) continue; p++; EXTRACT_NUMBER_AND_INCR (j, p); p += j; if (stackp != stackb && *stackp == p) stackp--; continue; case on_failure_jump: handle_on_failure_jump: EXTRACT_NUMBER_AND_INCR (j, p); *++stackp = p + j; if (is_a_succeed_n) EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ continue; case succeed_n: is_a_succeed_n = 1; /* Get to the number of times to succeed. */ p += 2; /* Increment p past the n for when k != 0. */ EXTRACT_NUMBER_AND_INCR (k, p); if (k == 0) { p -= 4; goto handle_on_failure_jump; } continue; case set_number_at: p += 4; continue; case start_memory: case stop_memory: p++; continue; case duplicate: bufp->can_be_null = 1; fastmap['\n'] = 1; case anychar: for (j = 0; j < (1 << BYTEWIDTH); j++) if (j != '\n') fastmap[j] = 1; if (bufp->can_be_null) return; /* Don't return; check the alternative paths so we can set can_be_null if appropriate. */ break; case wordchar: for (j = 0; j < (1 << BYTEWIDTH); j++) if (SYNTAX (j) == Sword) fastmap[j] = 1; break; case notwordchar: for (j = 0; j < (1 << BYTEWIDTH); j++) if (SYNTAX (j) != Sword) fastmap[j] = 1; break;#ifdef emacs case syntaxspec: k = *p++; for (j = 0; j < (1 << BYTEWIDTH); j++) if (SYNTAX (j) == (enum syntaxcode) k) fastmap[j] = 1; break; case notsyntaxspec: k = *p++; for (j = 0; j < (1 << BYTEWIDTH); j++) if (SYNTAX (j) != (enum syntaxcode) k) fastmap[j] = 1; break;#endif /* not emacs */ case charset: for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) { if (translate) fastmap[translate[j]] = 1; else fastmap[j] = 1; } break; case charset_not: /* Chars beyond end of map must be allowed */ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) if (translate) fastmap[translate[j]] = 1; else fastmap[j] = 1; for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) { if (translate) fastmap[translate[j]] = 1; else fastmap[j] = 1; } break; } /* Get here means we have successfully found the possible starting characters of one path of the pattern. We need not follow this path any farther. Instead, look at the next alternative remembered in the stack. */ if (stackp != stackb) p = *stackp--; else break; }}/* Like re_search_2, below, but only one string is specified, and doesn't let you say where to stop matching. */intre_search (struct re_pattern_buffer *pbufp, char *string, int size, int startpos, int range, struct re_registers *regs){ return re_search_2 (pbufp, (char *) 0, 0, string, size, startpos, range, regs, size);}/* Using the compiled pattern in PBUFP->buffer, first tries to match the virtual concatenation of STRING1 and STRING2, starting first at index STARTPOS, then at STARTPOS + 1, and so on. RANGE is the number of places to try before giving up. If RANGE is negative, it searches backwards, i.e., the starting positions tried are STARTPOS, STARTPOS - 1, etc. STRING1 and STRING2 are of SIZE1 and SIZE2, respectively. In REGS, return the indices of the virtual concatenation of STRING1 and STRING2 that matched the entire PBUFP->buffer and its contained subexpressions. Do not consider matching one past the index MSTOP in the virtual concatenation of STRING1 and STRING2. The value returned is the position in the strings at which the match was found, or -1 if no match was found, or -2 if error (such as failure stack overflow). */intre_search_2 (struct re_pattern_buffer *pbufp, char *string1, int size1, char *string2, int size2, int startpos, register int range, struct re_registers *regs, int mstop){ register char *fastmap = pbufp->fastmap; register unsigned char *translate = (unsigned char *) pbufp->translate; int total_size = size1 + size2; int endpos = startpos + range; int val; /* Check for out-of-range starting position. */ if (startpos < 0 || startpos > total_size) return -1; /* Fix up range if it would eventually take startpos outside of the virtual concatenation of string1 and string2. */ if (endpos < -1) range = -1 - startpos; else if (endpos > total_size) range = total_size - startpos; /* Update the fastmap now if not correct already. */ if (fastmap && !pbufp->fastmap_accurate) re_compile_fastmap (pbufp); /* If the search isn't to be a backwards one, don't waste time in a long search for a pattern that says it is anchored. */ if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf && range > 0) { if (startpos > 0) return -1; else range = 1; } while (1) { /* If a fastmap is supplied, skip quickly over characters that cannot possibly be the start of a match. Note, however, that if the pattern can possibly match the null string, we must test it at each starting point so that we take the first null string we get. */ if (fastmap && startpos < total_size && pbufp->can_be_null != 1) { if (range > 0) /* Searching forwards. */ { register int lim = 0; register unsigned char *p; int irange = range; if (startpos < size1 && startpos + range >= size1) lim = range - (size1 - startpos); p = ((unsigned char *) &(startpos >= size1 ? string2 - size1 : string1)[startpos]); while (range > lim && !fastmap[translate ? translate[*p++] : *p++]) range--; startpos += irange - range; } else /* Searching backwards. */ { register unsigned char c; if (string1 == 0 || startpos >= size1) c = string2[startpos - size1]; else c = string1[startpos]; c &= 0xff; if (translate ? !fastmap[translate[c]] : !fastmap[c]) goto advance; } } if (range >= 0 && startpos == total_size && fastmap && pbufp->can_be_null == 0) return -1; val = re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs, mstop); if (val >= 0) return startpos; if (val == -2) return -2;#ifdef C_ALLOCA alloca (0);#endif /* C_ALLOCA */ advance: if (!range) break; else if (range > 0) { range--; startpos++; } else { range++; startpos--; } } return -1;}#ifndef emacs /* emacs never uses this. */intre_match (struct re_pattern_buffer *pbufp, char *string, int size, int pos, struct re_registers *regs){ return re_match_2 (pbufp, (char *) 0, 0, string, size, pos, regs, size); }#endif /* not emacs *//* The following are used for re_match_2, defined below: *//* Roughly the maximum number of failure points on the stack. Would be exactly that if always pushed MAX_NUM_FAILURE_ITEMS each time we failed. */ int re_max_failures = 2000;/* Routine used by re_match_2. */static int bcmp_translate (char *, char *, int, unsigned char *);/* Structure and accessing macros used in re_match_2: */struct register_info
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -