📄 regex.cc
字号:
/* Place in pattern (i.e., the {) to which to go back if the interval is invalid. */ const char *beg_interval = 0; /* Stack of information saved by \( and restored by \). Four stack elements are pushed by each \(: First, the value of b. Second, the value of fixup_jump. Third, the value of regnum. Fourth, the value of begalt. */ int stackb[40]; int *stackp = stackb; int *stacke = stackb + 40; int *stackt; /* Counts \('s as they are encountered. Remembered for the matching \), where it becomes the register number to put in the stop_memory command. */ unsigned regnum = 1; bufp->fastmap_accurate = 0;#ifndef emacs#ifndef SYNTAX_TABLE /* Initialize the syntax table. */ init_syntax_once();#endif#endif if (bufp->allocated == 0) { bufp->allocated = INIT_BUF_SIZE; if (bufp->buffer) /* EXTEND_BUFFER loses when bufp->allocated is 0. */ bufp->buffer = (char *) realloc (bufp->buffer, INIT_BUF_SIZE); else /* Caller did not allocate a buffer. Do it for them. */ bufp->buffer = (char *) malloc (INIT_BUF_SIZE); if (!bufp->buffer) goto memory_exhausted; begalt = b = bufp->buffer; } while (p != pend) { PATFETCH (c); switch (c) { case '$': { const char *p1 = p; /* When testing what follows the $, look past the \-constructs that don't consume anything. */ if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS)) while (p1 != pend) { if (*p1 == '\\' && p1 + 1 != pend && (p1[1] == '<' || p1[1] == '>' || p1[1] == '`' || p1[1] == '\''#ifdef emacs || p1[1] == '='#endif || p1[1] == 'b' || p1[1] == 'B')) p1 += 2; else break; } if (obscure_syntax & RE_TIGHT_VBAR) { if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p1 != pend) goto normal_char; /* Make operand of last vbar end before this `$'. */ if (fixup_jump) store_jump (fixup_jump, jump, b); fixup_jump = 0; BUFPUSH (endline); break; } /* $ means succeed if at end of line, but only in special contexts. If validly in the middle of a pattern, it is a normal character. */ if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && p1 != pend) goto invalid_pattern; if (p1 == pend || *p1 == '\n' || (obscure_syntax & RE_CONTEXT_INDEP_OPS) || (obscure_syntax & RE_NO_BK_PARENS ? *p1 == ')' : *p1 == '\\' && p1[1] == ')') || (obscure_syntax & RE_NO_BK_VBAR ? *p1 == '|' : *p1 == '\\' && p1[1] == '|')) { BUFPUSH (endline); break; } goto normal_char; } case '^': /* ^ means succeed if at beg of line, but only if no preceding pattern. */ if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && laststart) goto invalid_pattern; if (laststart && p - 2 >= pattern && p[-2] != '\n' && !(obscure_syntax & RE_CONTEXT_INDEP_OPS)) goto normal_char; if (obscure_syntax & RE_TIGHT_VBAR) { if (p != pattern + 1 && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS)) goto normal_char; BUFPUSH (begline); begalt = b; } else BUFPUSH (begline); break; case '+': case '?': if ((obscure_syntax & RE_BK_PLUS_QM) || (obscure_syntax & RE_LIMITED_OPS)) goto normal_char; handle_plus: case '*': /* If there is no previous pattern, char not special. */ if (!laststart) { if (obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) goto invalid_pattern; else if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS)) goto normal_char; } /* If there is a sequence of repetition chars, collapse it down to just one. */ zero_times_ok = 0; many_times_ok = 0; while (1) { zero_times_ok |= c != '+'; many_times_ok |= c != '?'; if (p == pend) break; PATFETCH (c); if (c == '*') ; else if (!(obscure_syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')) ; else if ((obscure_syntax & RE_BK_PLUS_QM) && c == '\\') { int c1; PATFETCH (c1); if (!(c1 == '+' || c1 == '?')) { PATUNFETCH; PATUNFETCH; break; } c = c1; } else { PATUNFETCH; break; } } /* Star, etc. applied to an empty pattern is equivalent to an empty pattern. */ if (!laststart) break; /* Now we know whether or not zero matches is allowed and also whether or not two or more matches is allowed. */ if (many_times_ok) { /* If more than one repetition is allowed, put in at the end a backward relative jump from b to before the next jump we're going to put in below (which jumps from laststart to after this jump). */ GET_BUFFER_SPACE (3); store_jump (b, maybe_finalize_jump, laststart - 3); b += 3; /* Because store_jump put stuff here. */ } /* On failure, jump from laststart to b + 3, which will be the end of the buffer after this jump is inserted. */ GET_BUFFER_SPACE (3); insert_jump (on_failure_jump, laststart, b + 3, b); pending_exact = 0; b += 3; if (!zero_times_ok) { /* At least one repetition is required, so insert a dummy-failure before the initial on-failure-jump instruction of the loop. This effects a skip over that instruction the first time we hit that loop. */ GET_BUFFER_SPACE (6); insert_jump (dummy_failure_jump, laststart, laststart + 6, b); b += 3; } break; case '.': laststart = b; BUFPUSH (anychar); break; case '[': if (p == pend) goto invalid_pattern; while (b - bufp->buffer > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH) EXTEND_BUFFER; laststart = b; if (*p == '^') { BUFPUSH (charset_not); p++; } else BUFPUSH (charset); p1 = p; BUFPUSH ((1 << BYTEWIDTH) / BYTEWIDTH); /* Clear the whole map */ memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH); if ((obscure_syntax & RE_HAT_NOT_NEWLINE) && b[-2] == charset_not) SET_LIST_BIT ('\n'); /* Read in characters and ranges, setting map bits. */ while (1) { /* Don't translate while fetching, in case it's a range bound. When we set the bit for the character, we translate it. */ PATFETCH_RAW (c); /* If set, \ escapes characters when inside [...]. */ if ((obscure_syntax & RE_AWK_CLASS_HACK) && c == '\\') { PATFETCH(c1); SET_LIST_BIT (c1); continue; } if (c == ']') { if (p == p1 + 1) { /* If this is an empty bracket expression. */ if ((obscure_syntax & RE_NO_EMPTY_BRACKETS) && p == pend) goto invalid_pattern; } else /* Stop if this isn't merely a ] inside a bracket expression, but rather the end of a bracket expression. */ break; } /* Get a range. */ if (p[0] == '-' && p[1] != ']') { PATFETCH (c1); /* Don't translate the range bounds while fetching them. */ PATFETCH_RAW (c1); if ((obscure_syntax & RE_NO_EMPTY_RANGES) && c > c1) goto invalid_pattern; if ((obscure_syntax & RE_NO_HYPHEN_RANGE_END) && c1 == '-' && *p != ']') goto invalid_pattern; while (c <= c1) { /* Translate each char that's in the range. */ if (translate) SET_LIST_BIT (translate[c]); else SET_LIST_BIT (c); c++; } } else if ((obscure_syntax & RE_CHAR_CLASSES) && c == '[' && p[0] == ':') { /* Longest valid character class word has six characters. */ char str[CHAR_CLASS_MAX_LENGTH]; PATFETCH (c); c1 = 0; /* If no ] at end. */ if (p == pend) goto invalid_pattern; while (1) { /* Don't translate the ``character class'' characters. */ PATFETCH_RAW (c); if (c == ':' || c == ']' || p == pend || c1 == CHAR_CLASS_MAX_LENGTH) break; str[c1++] = c; } str[c1] = '\0'; if (p == pend || c == ']' /* End of the bracket expression. */ || p[0] != ']' || p + 1 == pend || (strcmp (str, "alpha") != 0 && strcmp (str, "upper") != 0 && strcmp (str, "lower") != 0 && strcmp (str, "digit") != 0 && strcmp (str, "alnum") != 0 && strcmp (str, "xdigit") != 0 && strcmp (str, "space") != 0 && strcmp (str, "print") != 0 && strcmp (str, "punct") != 0 && strcmp (str, "graph") != 0 && strcmp (str, "cntrl") != 0)) { /* Undo the ending character, the letters, and leave the leading : and [ (but set bits for them). */ c1++; while (c1--) PATUNFETCH; SET_LIST_BIT ('['); SET_LIST_BIT (':'); } else { /* The ] at the end of the character class. */ PATFETCH (c); if (c != ']') goto invalid_pattern; for (c = 0; c < (1 << BYTEWIDTH); c++) { if ((strcmp (str, "alpha") == 0 && isalpha (c)) || (strcmp (str, "upper") == 0 && isupper (c)) || (strcmp (str, "lower") == 0 && islower (c)) || (strcmp (str, "digit") == 0 && isdigit (c)) || (strcmp (str, "alnum") == 0 && isalnum (c)) || (strcmp (str, "xdigit") == 0 && isxdigit (c)) || (strcmp (str, "space") == 0 && isspace (c)) || (strcmp (str, "print") == 0 && isprint (c)) || (strcmp (str, "punct") == 0 && ispunct (c)) || (strcmp (str, "graph") == 0 && isgraph (c)) || (strcmp (str, "cntrl") == 0 && iscntrl (c))) SET_LIST_BIT (c); } } } else if (translate) SET_LIST_BIT (translate[c]); else SET_LIST_BIT (c); } /* Discard any character set/class bitmap bytes that are all 0 at the end of the map. Decrement the map-length byte too. */ while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) b[-1]--; b += b[-1]; break; case '(': if (! (obscure_syntax & RE_NO_BK_PARENS)) goto normal_char; else goto handle_open; case ')': if (! (obscure_syntax & RE_NO_BK_PARENS)) goto normal_char; else goto handle_close; case '\n': if (! (obscure_syntax & RE_NEWLINE_OR)) goto normal_char; else goto handle_bar; case '|': if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && (! laststart || p == pend)) goto invalid_pattern; else if (! (obscure_syntax & RE_NO_BK_VBAR)) goto normal_char; else goto handle_bar; case '{': if (! ((obscure_syntax & RE_NO_BK_CURLY_BRACES) && (obscure_syntax & RE_INTERVALS))) goto normal_char; else goto handle_interval; case '\\': if (p == pend) goto invalid_pattern; PATFETCH_RAW (c); switch (c) { case '(': if (obscure_syntax & RE_NO_BK_PARENS) goto normal_backsl; handle_open: if (stackp == stacke) goto nesting_too_deep; /* Laststart should point to the start_memory that we are about to push (unless the pattern has RE_NREGS or more ('s). */ *stackp++ = b - bufp->buffer; if (regnum < RE_NREGS)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -